]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/rs6000/rs6000.cc
rs6000: build constant via li;rotldi
[thirdparty/gcc.git] / gcc / config / rs6000 / rs6000.cc
1 // SPDX-License-Identifier: GPL-3.0-or-later
2 /* Subroutines used for code generation on IBM RS/6000.
3 Copyright (C) 1991-2023 Free Software Foundation, Inc.
4 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published
10 by the Free Software Foundation; either version 3, or (at your
11 option) any later version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
16 License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #define IN_TARGET_CODE 1
23
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "backend.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "memmodel.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "cfgloop.h"
34 #include "df.h"
35 #include "tm_p.h"
36 #include "stringpool.h"
37 #include "expmed.h"
38 #include "optabs.h"
39 #include "regs.h"
40 #include "ira.h"
41 #include "recog.h"
42 #include "cgraph.h"
43 #include "diagnostic-core.h"
44 #include "insn-attr.h"
45 #include "flags.h"
46 #include "alias.h"
47 #include "fold-const.h"
48 #include "attribs.h"
49 #include "stor-layout.h"
50 #include "calls.h"
51 #include "print-tree.h"
52 #include "varasm.h"
53 #include "explow.h"
54 #include "expr.h"
55 #include "output.h"
56 #include "common/common-target.h"
57 #include "langhooks.h"
58 #include "reload.h"
59 #include "sched-int.h"
60 #include "gimplify.h"
61 #include "gimple-iterator.h"
62 #include "gimple-fold.h"
63 #include "gimple-walk.h"
64 #include "ssa.h"
65 #include "tree-vectorizer.h"
66 #include "tree-ssa-propagate.h"
67 #include "intl.h"
68 #include "tm-constrs.h"
69 #include "target-globals.h"
70 #include "builtins.h"
71 #include "tree-vector-builder.h"
72 #include "context.h"
73 #include "tree-pass.h"
74 #include "symbol-summary.h"
75 #include "ipa-prop.h"
76 #include "ipa-fnsummary.h"
77 #include "except.h"
78 #include "case-cfn-macros.h"
79 #include "ppc-auxv.h"
80 #include "rs6000-internal.h"
81 #include "opts.h"
82
83 /* This file should be included last. */
84 #include "target-def.h"
85
86 extern tree rs6000_builtin_mask_for_load (void);
87 extern tree rs6000_builtin_md_vectorized_function (tree, tree, tree);
88 extern tree rs6000_builtin_reciprocal (tree);
89
90 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
91 systems will also set long double to be IEEE 128-bit. AIX and Darwin
92 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
93 those systems will not pick up this default. This needs to be after all
94 of the include files, so that POWERPC_LINUX and POWERPC_FREEBSD are
95 properly defined. */
96 #ifndef TARGET_IEEEQUAD_DEFAULT
97 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
98 #define TARGET_IEEEQUAD_DEFAULT 1
99 #else
100 #define TARGET_IEEEQUAD_DEFAULT 0
101 #endif
102 #endif
103
104 /* Don't enable PC-relative addressing if the target does not support it. */
105 #ifndef PCREL_SUPPORTED_BY_OS
106 #define PCREL_SUPPORTED_BY_OS 0
107 #endif
108
109 #ifdef USING_ELFOS_H
110 /* Counter for labels which are to be placed in .fixup. */
111 int fixuplabelno = 0;
112 #endif
113
114 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
115 int dot_symbols;
116
117 /* Specify the machine mode that pointers have. After generation of rtl, the
118 compiler makes no further distinction between pointers and any other objects
119 of this machine mode. */
120 scalar_int_mode rs6000_pmode;
121
122 /* Track use of r13 in 64bit AIX TLS. */
123 static bool xcoff_tls_exec_model_detected = false;
124
125 /* Width in bits of a pointer. */
126 unsigned rs6000_pointer_size;
127
128 #ifdef HAVE_AS_GNU_ATTRIBUTE
129 # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
130 # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
131 # endif
132 /* Flag whether floating point values have been passed/returned.
133 Note that this doesn't say whether fprs are used, since the
134 Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls
135 should be set for soft-float values passed in gprs and ieee128
136 values passed in vsx registers. */
137 bool rs6000_passes_float = false;
138 bool rs6000_passes_long_double = false;
139 /* Flag whether vector values have been passed/returned. */
140 bool rs6000_passes_vector = false;
141 /* Flag whether small (<= 8 byte) structures have been returned. */
142 bool rs6000_returns_struct = false;
143 #endif
144
145 /* Value is TRUE if register/mode pair is acceptable. */
146 static bool rs6000_hard_regno_mode_ok_p
147 [NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
148
149 /* Maximum number of registers needed for a given register class and mode. */
150 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
151
152 /* How many registers are needed for a given register and mode. */
153 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
154
155 /* Map register number to register class. */
156 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
157
158 static int dbg_cost_ctrl;
159
160 /* Flag to say the TOC is initialized */
161 int toc_initialized, need_toc_init;
162 char toc_label_name[10];
163
164 /* Cached value of rs6000_variable_issue. This is cached in
165 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
166 static short cached_can_issue_more;
167
168 static GTY(()) section *read_only_data_section;
169 static GTY(()) section *private_data_section;
170 static GTY(()) section *tls_data_section;
171 static GTY(()) section *tls_private_data_section;
172 static GTY(()) section *read_only_private_data_section;
173 static GTY(()) section *sdata2_section;
174
175 section *toc_section = 0;
176
177 /* Describe the vector unit used for modes. */
178 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
179 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
180
181 /* Register classes for various constraints that are based on the target
182 switches. */
183 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
184
185 /* Describe the alignment of a vector. */
186 int rs6000_vector_align[NUM_MACHINE_MODES];
187
188 /* What modes to automatically generate reciprocal divide estimate (fre) and
189 reciprocal sqrt (frsqrte) for. */
190 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
191
192 /* Masks to determine which reciprocal esitmate instructions to generate
193 automatically. */
194 enum rs6000_recip_mask {
195 RECIP_SF_DIV = 0x001, /* Use divide estimate */
196 RECIP_DF_DIV = 0x002,
197 RECIP_V4SF_DIV = 0x004,
198 RECIP_V2DF_DIV = 0x008,
199
200 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
201 RECIP_DF_RSQRT = 0x020,
202 RECIP_V4SF_RSQRT = 0x040,
203 RECIP_V2DF_RSQRT = 0x080,
204
205 /* Various combination of flags for -mrecip=xxx. */
206 RECIP_NONE = 0,
207 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
208 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
209 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
210
211 RECIP_HIGH_PRECISION = RECIP_ALL,
212
213 /* On low precision machines like the power5, don't enable double precision
214 reciprocal square root estimate, since it isn't accurate enough. */
215 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
216 };
217
218 /* -mrecip options. */
219 static struct
220 {
221 const char *string; /* option name */
222 unsigned int mask; /* mask bits to set */
223 } recip_options[] = {
224 { "all", RECIP_ALL },
225 { "none", RECIP_NONE },
226 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
227 | RECIP_V2DF_DIV) },
228 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
229 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
230 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
231 | RECIP_V2DF_RSQRT) },
232 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
233 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
234 };
235
236 /* On PowerPC, we have a limited number of target clones that we care about
237 which means we can use an array to hold the options, rather than having more
238 elaborate data structures to identify each possible variation. Order the
239 clones from the default to the highest ISA. */
240 enum {
241 CLONE_DEFAULT = 0, /* default clone. */
242 CLONE_ISA_2_05, /* ISA 2.05 (power6). */
243 CLONE_ISA_2_06, /* ISA 2.06 (power7). */
244 CLONE_ISA_2_07, /* ISA 2.07 (power8). */
245 CLONE_ISA_3_00, /* ISA 3.0 (power9). */
246 CLONE_ISA_3_1, /* ISA 3.1 (power10). */
247 CLONE_MAX
248 };
249
250 /* Map compiler ISA bits into HWCAP names. */
251 struct clone_map {
252 HOST_WIDE_INT isa_mask; /* rs6000_isa mask */
253 const char *name; /* name to use in __builtin_cpu_supports. */
254 };
255
256 static const struct clone_map rs6000_clone_map[CLONE_MAX] = {
257 { 0, "" }, /* Default options. */
258 { OPTION_MASK_CMPB, "arch_2_05" }, /* ISA 2.05 (power6). */
259 { OPTION_MASK_POPCNTD, "arch_2_06" }, /* ISA 2.06 (power7). */
260 { OPTION_MASK_P8_VECTOR, "arch_2_07" }, /* ISA 2.07 (power8). */
261 { OPTION_MASK_P9_VECTOR, "arch_3_00" }, /* ISA 3.0 (power9). */
262 { OPTION_MASK_POWER10, "arch_3_1" }, /* ISA 3.1 (power10). */
263 };
264
265
266 /* Newer LIBCs explicitly export this symbol to declare that they provide
267 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
268 reference to this symbol whenever we expand a CPU builtin, so that
269 we never link against an old LIBC. */
270 const char *tcb_verification_symbol = "__parse_hwcap_and_convert_at_platform";
271
272 /* True if we have expanded a CPU builtin. */
273 bool cpu_builtin_p = false;
274
275 /* Pointer to function (in rs6000-c.cc) that can define or undefine target
276 macros that have changed. Languages that don't support the preprocessor
277 don't link in rs6000-c.cc, so we can't call it directly. */
278 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT);
279
280 /* Simplfy register classes into simpler classifications. We assume
281 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
282 check for standard register classes (gpr/floating/altivec/vsx) and
283 floating/vector classes (float/altivec/vsx). */
284
285 enum rs6000_reg_type {
286 NO_REG_TYPE,
287 PSEUDO_REG_TYPE,
288 GPR_REG_TYPE,
289 VSX_REG_TYPE,
290 ALTIVEC_REG_TYPE,
291 FPR_REG_TYPE,
292 SPR_REG_TYPE,
293 CR_REG_TYPE
294 };
295
296 /* Map register class to register type. */
297 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
298
299 /* First/last register type for the 'normal' register types (i.e. general
300 purpose, floating point, altivec, and VSX registers). */
301 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
302
303 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
304
305
306 /* Register classes we care about in secondary reload or go if legitimate
307 address. We only need to worry about GPR, FPR, and Altivec registers here,
308 along an ANY field that is the OR of the 3 register classes. */
309
310 enum rs6000_reload_reg_type {
311 RELOAD_REG_GPR, /* General purpose registers. */
312 RELOAD_REG_FPR, /* Traditional floating point regs. */
313 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
314 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
315 N_RELOAD_REG
316 };
317
318 /* For setting up register classes, loop through the 3 register classes mapping
319 into real registers, and skip the ANY class, which is just an OR of the
320 bits. */
321 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
322 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
323
324 /* Map reload register type to a register in the register class. */
325 struct reload_reg_map_type {
326 const char *name; /* Register class name. */
327 int reg; /* Register in the register class. */
328 };
329
330 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
331 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
332 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
333 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
334 { "Any", -1 }, /* RELOAD_REG_ANY. */
335 };
336
337 /* Mask bits for each register class, indexed per mode. Historically the
338 compiler has been more restrictive which types can do PRE_MODIFY instead of
339 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
340 typedef unsigned char addr_mask_type;
341
342 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
343 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
344 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
345 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
346 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
347 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
348 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
349 #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */
350
351 /* Register type masks based on the type, of valid addressing modes. */
352 struct rs6000_reg_addr {
353 enum insn_code reload_load; /* INSN to reload for loading. */
354 enum insn_code reload_store; /* INSN to reload for storing. */
355 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
356 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
357 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
358 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
359 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
360 };
361
362 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
363
364 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
365 static inline bool
366 mode_supports_pre_incdec_p (machine_mode mode)
367 {
368 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
369 != 0);
370 }
371
372 /* Helper function to say whether a mode supports PRE_MODIFY. */
373 static inline bool
374 mode_supports_pre_modify_p (machine_mode mode)
375 {
376 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
377 != 0);
378 }
379
380 /* Return true if we have D-form addressing in altivec registers. */
381 static inline bool
382 mode_supports_vmx_dform (machine_mode mode)
383 {
384 return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) != 0);
385 }
386
387 /* Return true if we have D-form addressing in VSX registers. This addressing
388 is more limited than normal d-form addressing in that the offset must be
389 aligned on a 16-byte boundary. */
390 static inline bool
391 mode_supports_dq_form (machine_mode mode)
392 {
393 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_QUAD_OFFSET)
394 != 0);
395 }
396
397 /* Given that there exists at least one variable that is set (produced)
398 by OUT_INSN and read (consumed) by IN_INSN, return true iff
399 IN_INSN represents one or more memory store operations and none of
400 the variables set by OUT_INSN is used by IN_INSN as the address of a
401 store operation. If either IN_INSN or OUT_INSN does not represent
402 a "single" RTL SET expression (as loosely defined by the
403 implementation of the single_set function) or a PARALLEL with only
404 SETs, CLOBBERs, and USEs inside, this function returns false.
405
406 This rs6000-specific version of store_data_bypass_p checks for
407 certain conditions that result in assertion failures (and internal
408 compiler errors) in the generic store_data_bypass_p function and
409 returns false rather than calling store_data_bypass_p if one of the
410 problematic conditions is detected. */
411
412 int
413 rs6000_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
414 {
415 rtx out_set, in_set;
416 rtx out_pat, in_pat;
417 rtx out_exp, in_exp;
418 int i, j;
419
420 in_set = single_set (in_insn);
421 if (in_set)
422 {
423 if (MEM_P (SET_DEST (in_set)))
424 {
425 out_set = single_set (out_insn);
426 if (!out_set)
427 {
428 out_pat = PATTERN (out_insn);
429 if (GET_CODE (out_pat) == PARALLEL)
430 {
431 for (i = 0; i < XVECLEN (out_pat, 0); i++)
432 {
433 out_exp = XVECEXP (out_pat, 0, i);
434 if ((GET_CODE (out_exp) == CLOBBER)
435 || (GET_CODE (out_exp) == USE))
436 continue;
437 else if (GET_CODE (out_exp) != SET)
438 return false;
439 }
440 }
441 }
442 }
443 }
444 else
445 {
446 in_pat = PATTERN (in_insn);
447 if (GET_CODE (in_pat) != PARALLEL)
448 return false;
449
450 for (i = 0; i < XVECLEN (in_pat, 0); i++)
451 {
452 in_exp = XVECEXP (in_pat, 0, i);
453 if ((GET_CODE (in_exp) == CLOBBER) || (GET_CODE (in_exp) == USE))
454 continue;
455 else if (GET_CODE (in_exp) != SET)
456 return false;
457
458 if (MEM_P (SET_DEST (in_exp)))
459 {
460 out_set = single_set (out_insn);
461 if (!out_set)
462 {
463 out_pat = PATTERN (out_insn);
464 if (GET_CODE (out_pat) != PARALLEL)
465 return false;
466 for (j = 0; j < XVECLEN (out_pat, 0); j++)
467 {
468 out_exp = XVECEXP (out_pat, 0, j);
469 if ((GET_CODE (out_exp) == CLOBBER)
470 || (GET_CODE (out_exp) == USE))
471 continue;
472 else if (GET_CODE (out_exp) != SET)
473 return false;
474 }
475 }
476 }
477 }
478 }
479 return store_data_bypass_p (out_insn, in_insn);
480 }
481
482 \f
483 /* Processor costs (relative to an add) */
484
485 const struct processor_costs *rs6000_cost;
486
487 /* Instruction size costs on 32bit processors. */
488 static const
489 struct processor_costs size32_cost = {
490 COSTS_N_INSNS (1), /* mulsi */
491 COSTS_N_INSNS (1), /* mulsi_const */
492 COSTS_N_INSNS (1), /* mulsi_const9 */
493 COSTS_N_INSNS (1), /* muldi */
494 COSTS_N_INSNS (1), /* divsi */
495 COSTS_N_INSNS (1), /* divdi */
496 COSTS_N_INSNS (1), /* fp */
497 COSTS_N_INSNS (1), /* dmul */
498 COSTS_N_INSNS (1), /* sdiv */
499 COSTS_N_INSNS (1), /* ddiv */
500 32, /* cache line size */
501 0, /* l1 cache */
502 0, /* l2 cache */
503 0, /* streams */
504 0, /* SF->DF convert */
505 };
506
507 /* Instruction size costs on 64bit processors. */
508 static const
509 struct processor_costs size64_cost = {
510 COSTS_N_INSNS (1), /* mulsi */
511 COSTS_N_INSNS (1), /* mulsi_const */
512 COSTS_N_INSNS (1), /* mulsi_const9 */
513 COSTS_N_INSNS (1), /* muldi */
514 COSTS_N_INSNS (1), /* divsi */
515 COSTS_N_INSNS (1), /* divdi */
516 COSTS_N_INSNS (1), /* fp */
517 COSTS_N_INSNS (1), /* dmul */
518 COSTS_N_INSNS (1), /* sdiv */
519 COSTS_N_INSNS (1), /* ddiv */
520 128, /* cache line size */
521 0, /* l1 cache */
522 0, /* l2 cache */
523 0, /* streams */
524 0, /* SF->DF convert */
525 };
526
527 /* Instruction costs on RS64A processors. */
528 static const
529 struct processor_costs rs64a_cost = {
530 COSTS_N_INSNS (20), /* mulsi */
531 COSTS_N_INSNS (12), /* mulsi_const */
532 COSTS_N_INSNS (8), /* mulsi_const9 */
533 COSTS_N_INSNS (34), /* muldi */
534 COSTS_N_INSNS (65), /* divsi */
535 COSTS_N_INSNS (67), /* divdi */
536 COSTS_N_INSNS (4), /* fp */
537 COSTS_N_INSNS (4), /* dmul */
538 COSTS_N_INSNS (31), /* sdiv */
539 COSTS_N_INSNS (31), /* ddiv */
540 128, /* cache line size */
541 128, /* l1 cache */
542 2048, /* l2 cache */
543 1, /* streams */
544 0, /* SF->DF convert */
545 };
546
547 /* Instruction costs on MPCCORE processors. */
548 static const
549 struct processor_costs mpccore_cost = {
550 COSTS_N_INSNS (2), /* mulsi */
551 COSTS_N_INSNS (2), /* mulsi_const */
552 COSTS_N_INSNS (2), /* mulsi_const9 */
553 COSTS_N_INSNS (2), /* muldi */
554 COSTS_N_INSNS (6), /* divsi */
555 COSTS_N_INSNS (6), /* divdi */
556 COSTS_N_INSNS (4), /* fp */
557 COSTS_N_INSNS (5), /* dmul */
558 COSTS_N_INSNS (10), /* sdiv */
559 COSTS_N_INSNS (17), /* ddiv */
560 32, /* cache line size */
561 4, /* l1 cache */
562 16, /* l2 cache */
563 1, /* streams */
564 0, /* SF->DF convert */
565 };
566
567 /* Instruction costs on PPC403 processors. */
568 static const
569 struct processor_costs ppc403_cost = {
570 COSTS_N_INSNS (4), /* mulsi */
571 COSTS_N_INSNS (4), /* mulsi_const */
572 COSTS_N_INSNS (4), /* mulsi_const9 */
573 COSTS_N_INSNS (4), /* muldi */
574 COSTS_N_INSNS (33), /* divsi */
575 COSTS_N_INSNS (33), /* divdi */
576 COSTS_N_INSNS (11), /* fp */
577 COSTS_N_INSNS (11), /* dmul */
578 COSTS_N_INSNS (11), /* sdiv */
579 COSTS_N_INSNS (11), /* ddiv */
580 32, /* cache line size */
581 4, /* l1 cache */
582 16, /* l2 cache */
583 1, /* streams */
584 0, /* SF->DF convert */
585 };
586
587 /* Instruction costs on PPC405 processors. */
588 static const
589 struct processor_costs ppc405_cost = {
590 COSTS_N_INSNS (5), /* mulsi */
591 COSTS_N_INSNS (4), /* mulsi_const */
592 COSTS_N_INSNS (3), /* mulsi_const9 */
593 COSTS_N_INSNS (5), /* muldi */
594 COSTS_N_INSNS (35), /* divsi */
595 COSTS_N_INSNS (35), /* divdi */
596 COSTS_N_INSNS (11), /* fp */
597 COSTS_N_INSNS (11), /* dmul */
598 COSTS_N_INSNS (11), /* sdiv */
599 COSTS_N_INSNS (11), /* ddiv */
600 32, /* cache line size */
601 16, /* l1 cache */
602 128, /* l2 cache */
603 1, /* streams */
604 0, /* SF->DF convert */
605 };
606
607 /* Instruction costs on PPC440 processors. */
608 static const
609 struct processor_costs ppc440_cost = {
610 COSTS_N_INSNS (3), /* mulsi */
611 COSTS_N_INSNS (2), /* mulsi_const */
612 COSTS_N_INSNS (2), /* mulsi_const9 */
613 COSTS_N_INSNS (3), /* muldi */
614 COSTS_N_INSNS (34), /* divsi */
615 COSTS_N_INSNS (34), /* divdi */
616 COSTS_N_INSNS (5), /* fp */
617 COSTS_N_INSNS (5), /* dmul */
618 COSTS_N_INSNS (19), /* sdiv */
619 COSTS_N_INSNS (33), /* ddiv */
620 32, /* cache line size */
621 32, /* l1 cache */
622 256, /* l2 cache */
623 1, /* streams */
624 0, /* SF->DF convert */
625 };
626
627 /* Instruction costs on PPC476 processors. */
628 static const
629 struct processor_costs ppc476_cost = {
630 COSTS_N_INSNS (4), /* mulsi */
631 COSTS_N_INSNS (4), /* mulsi_const */
632 COSTS_N_INSNS (4), /* mulsi_const9 */
633 COSTS_N_INSNS (4), /* muldi */
634 COSTS_N_INSNS (11), /* divsi */
635 COSTS_N_INSNS (11), /* divdi */
636 COSTS_N_INSNS (6), /* fp */
637 COSTS_N_INSNS (6), /* dmul */
638 COSTS_N_INSNS (19), /* sdiv */
639 COSTS_N_INSNS (33), /* ddiv */
640 32, /* l1 cache line size */
641 32, /* l1 cache */
642 512, /* l2 cache */
643 1, /* streams */
644 0, /* SF->DF convert */
645 };
646
647 /* Instruction costs on PPC601 processors. */
648 static const
649 struct processor_costs ppc601_cost = {
650 COSTS_N_INSNS (5), /* mulsi */
651 COSTS_N_INSNS (5), /* mulsi_const */
652 COSTS_N_INSNS (5), /* mulsi_const9 */
653 COSTS_N_INSNS (5), /* muldi */
654 COSTS_N_INSNS (36), /* divsi */
655 COSTS_N_INSNS (36), /* divdi */
656 COSTS_N_INSNS (4), /* fp */
657 COSTS_N_INSNS (5), /* dmul */
658 COSTS_N_INSNS (17), /* sdiv */
659 COSTS_N_INSNS (31), /* ddiv */
660 32, /* cache line size */
661 32, /* l1 cache */
662 256, /* l2 cache */
663 1, /* streams */
664 0, /* SF->DF convert */
665 };
666
667 /* Instruction costs on PPC603 processors. */
668 static const
669 struct processor_costs ppc603_cost = {
670 COSTS_N_INSNS (5), /* mulsi */
671 COSTS_N_INSNS (3), /* mulsi_const */
672 COSTS_N_INSNS (2), /* mulsi_const9 */
673 COSTS_N_INSNS (5), /* muldi */
674 COSTS_N_INSNS (37), /* divsi */
675 COSTS_N_INSNS (37), /* divdi */
676 COSTS_N_INSNS (3), /* fp */
677 COSTS_N_INSNS (4), /* dmul */
678 COSTS_N_INSNS (18), /* sdiv */
679 COSTS_N_INSNS (33), /* ddiv */
680 32, /* cache line size */
681 8, /* l1 cache */
682 64, /* l2 cache */
683 1, /* streams */
684 0, /* SF->DF convert */
685 };
686
687 /* Instruction costs on PPC604 processors. */
688 static const
689 struct processor_costs ppc604_cost = {
690 COSTS_N_INSNS (4), /* mulsi */
691 COSTS_N_INSNS (4), /* mulsi_const */
692 COSTS_N_INSNS (4), /* mulsi_const9 */
693 COSTS_N_INSNS (4), /* muldi */
694 COSTS_N_INSNS (20), /* divsi */
695 COSTS_N_INSNS (20), /* divdi */
696 COSTS_N_INSNS (3), /* fp */
697 COSTS_N_INSNS (3), /* dmul */
698 COSTS_N_INSNS (18), /* sdiv */
699 COSTS_N_INSNS (32), /* ddiv */
700 32, /* cache line size */
701 16, /* l1 cache */
702 512, /* l2 cache */
703 1, /* streams */
704 0, /* SF->DF convert */
705 };
706
707 /* Instruction costs on PPC604e processors. */
708 static const
709 struct processor_costs ppc604e_cost = {
710 COSTS_N_INSNS (2), /* mulsi */
711 COSTS_N_INSNS (2), /* mulsi_const */
712 COSTS_N_INSNS (2), /* mulsi_const9 */
713 COSTS_N_INSNS (2), /* muldi */
714 COSTS_N_INSNS (20), /* divsi */
715 COSTS_N_INSNS (20), /* divdi */
716 COSTS_N_INSNS (3), /* fp */
717 COSTS_N_INSNS (3), /* dmul */
718 COSTS_N_INSNS (18), /* sdiv */
719 COSTS_N_INSNS (32), /* ddiv */
720 32, /* cache line size */
721 32, /* l1 cache */
722 1024, /* l2 cache */
723 1, /* streams */
724 0, /* SF->DF convert */
725 };
726
727 /* Instruction costs on PPC620 processors. */
728 static const
729 struct processor_costs ppc620_cost = {
730 COSTS_N_INSNS (5), /* mulsi */
731 COSTS_N_INSNS (4), /* mulsi_const */
732 COSTS_N_INSNS (3), /* mulsi_const9 */
733 COSTS_N_INSNS (7), /* muldi */
734 COSTS_N_INSNS (21), /* divsi */
735 COSTS_N_INSNS (37), /* divdi */
736 COSTS_N_INSNS (3), /* fp */
737 COSTS_N_INSNS (3), /* dmul */
738 COSTS_N_INSNS (18), /* sdiv */
739 COSTS_N_INSNS (32), /* ddiv */
740 128, /* cache line size */
741 32, /* l1 cache */
742 1024, /* l2 cache */
743 1, /* streams */
744 0, /* SF->DF convert */
745 };
746
747 /* Instruction costs on PPC630 processors. */
748 static const
749 struct processor_costs ppc630_cost = {
750 COSTS_N_INSNS (5), /* mulsi */
751 COSTS_N_INSNS (4), /* mulsi_const */
752 COSTS_N_INSNS (3), /* mulsi_const9 */
753 COSTS_N_INSNS (7), /* muldi */
754 COSTS_N_INSNS (21), /* divsi */
755 COSTS_N_INSNS (37), /* divdi */
756 COSTS_N_INSNS (3), /* fp */
757 COSTS_N_INSNS (3), /* dmul */
758 COSTS_N_INSNS (17), /* sdiv */
759 COSTS_N_INSNS (21), /* ddiv */
760 128, /* cache line size */
761 64, /* l1 cache */
762 1024, /* l2 cache */
763 1, /* streams */
764 0, /* SF->DF convert */
765 };
766
767 /* Instruction costs on Cell processor. */
768 /* COSTS_N_INSNS (1) ~ one add. */
769 static const
770 struct processor_costs ppccell_cost = {
771 COSTS_N_INSNS (9/2)+2, /* mulsi */
772 COSTS_N_INSNS (6/2), /* mulsi_const */
773 COSTS_N_INSNS (6/2), /* mulsi_const9 */
774 COSTS_N_INSNS (15/2)+2, /* muldi */
775 COSTS_N_INSNS (38/2), /* divsi */
776 COSTS_N_INSNS (70/2), /* divdi */
777 COSTS_N_INSNS (10/2), /* fp */
778 COSTS_N_INSNS (10/2), /* dmul */
779 COSTS_N_INSNS (74/2), /* sdiv */
780 COSTS_N_INSNS (74/2), /* ddiv */
781 128, /* cache line size */
782 32, /* l1 cache */
783 512, /* l2 cache */
784 6, /* streams */
785 0, /* SF->DF convert */
786 };
787
788 /* Instruction costs on PPC750 and PPC7400 processors. */
789 static const
790 struct processor_costs ppc750_cost = {
791 COSTS_N_INSNS (5), /* mulsi */
792 COSTS_N_INSNS (3), /* mulsi_const */
793 COSTS_N_INSNS (2), /* mulsi_const9 */
794 COSTS_N_INSNS (5), /* muldi */
795 COSTS_N_INSNS (17), /* divsi */
796 COSTS_N_INSNS (17), /* divdi */
797 COSTS_N_INSNS (3), /* fp */
798 COSTS_N_INSNS (3), /* dmul */
799 COSTS_N_INSNS (17), /* sdiv */
800 COSTS_N_INSNS (31), /* ddiv */
801 32, /* cache line size */
802 32, /* l1 cache */
803 512, /* l2 cache */
804 1, /* streams */
805 0, /* SF->DF convert */
806 };
807
808 /* Instruction costs on PPC7450 processors. */
809 static const
810 struct processor_costs ppc7450_cost = {
811 COSTS_N_INSNS (4), /* mulsi */
812 COSTS_N_INSNS (3), /* mulsi_const */
813 COSTS_N_INSNS (3), /* mulsi_const9 */
814 COSTS_N_INSNS (4), /* muldi */
815 COSTS_N_INSNS (23), /* divsi */
816 COSTS_N_INSNS (23), /* divdi */
817 COSTS_N_INSNS (5), /* fp */
818 COSTS_N_INSNS (5), /* dmul */
819 COSTS_N_INSNS (21), /* sdiv */
820 COSTS_N_INSNS (35), /* ddiv */
821 32, /* cache line size */
822 32, /* l1 cache */
823 1024, /* l2 cache */
824 1, /* streams */
825 0, /* SF->DF convert */
826 };
827
828 /* Instruction costs on PPC8540 processors. */
829 static const
830 struct processor_costs ppc8540_cost = {
831 COSTS_N_INSNS (4), /* mulsi */
832 COSTS_N_INSNS (4), /* mulsi_const */
833 COSTS_N_INSNS (4), /* mulsi_const9 */
834 COSTS_N_INSNS (4), /* muldi */
835 COSTS_N_INSNS (19), /* divsi */
836 COSTS_N_INSNS (19), /* divdi */
837 COSTS_N_INSNS (4), /* fp */
838 COSTS_N_INSNS (4), /* dmul */
839 COSTS_N_INSNS (29), /* sdiv */
840 COSTS_N_INSNS (29), /* ddiv */
841 32, /* cache line size */
842 32, /* l1 cache */
843 256, /* l2 cache */
844 1, /* prefetch streams /*/
845 0, /* SF->DF convert */
846 };
847
848 /* Instruction costs on E300C2 and E300C3 cores. */
849 static const
850 struct processor_costs ppce300c2c3_cost = {
851 COSTS_N_INSNS (4), /* mulsi */
852 COSTS_N_INSNS (4), /* mulsi_const */
853 COSTS_N_INSNS (4), /* mulsi_const9 */
854 COSTS_N_INSNS (4), /* muldi */
855 COSTS_N_INSNS (19), /* divsi */
856 COSTS_N_INSNS (19), /* divdi */
857 COSTS_N_INSNS (3), /* fp */
858 COSTS_N_INSNS (4), /* dmul */
859 COSTS_N_INSNS (18), /* sdiv */
860 COSTS_N_INSNS (33), /* ddiv */
861 32,
862 16, /* l1 cache */
863 16, /* l2 cache */
864 1, /* prefetch streams /*/
865 0, /* SF->DF convert */
866 };
867
868 /* Instruction costs on PPCE500MC processors. */
869 static const
870 struct processor_costs ppce500mc_cost = {
871 COSTS_N_INSNS (4), /* mulsi */
872 COSTS_N_INSNS (4), /* mulsi_const */
873 COSTS_N_INSNS (4), /* mulsi_const9 */
874 COSTS_N_INSNS (4), /* muldi */
875 COSTS_N_INSNS (14), /* divsi */
876 COSTS_N_INSNS (14), /* divdi */
877 COSTS_N_INSNS (8), /* fp */
878 COSTS_N_INSNS (10), /* dmul */
879 COSTS_N_INSNS (36), /* sdiv */
880 COSTS_N_INSNS (66), /* ddiv */
881 64, /* cache line size */
882 32, /* l1 cache */
883 128, /* l2 cache */
884 1, /* prefetch streams /*/
885 0, /* SF->DF convert */
886 };
887
888 /* Instruction costs on PPCE500MC64 processors. */
889 static const
890 struct processor_costs ppce500mc64_cost = {
891 COSTS_N_INSNS (4), /* mulsi */
892 COSTS_N_INSNS (4), /* mulsi_const */
893 COSTS_N_INSNS (4), /* mulsi_const9 */
894 COSTS_N_INSNS (4), /* muldi */
895 COSTS_N_INSNS (14), /* divsi */
896 COSTS_N_INSNS (14), /* divdi */
897 COSTS_N_INSNS (4), /* fp */
898 COSTS_N_INSNS (10), /* dmul */
899 COSTS_N_INSNS (36), /* sdiv */
900 COSTS_N_INSNS (66), /* ddiv */
901 64, /* cache line size */
902 32, /* l1 cache */
903 128, /* l2 cache */
904 1, /* prefetch streams /*/
905 0, /* SF->DF convert */
906 };
907
908 /* Instruction costs on PPCE5500 processors. */
909 static const
910 struct processor_costs ppce5500_cost = {
911 COSTS_N_INSNS (5), /* mulsi */
912 COSTS_N_INSNS (5), /* mulsi_const */
913 COSTS_N_INSNS (4), /* mulsi_const9 */
914 COSTS_N_INSNS (5), /* muldi */
915 COSTS_N_INSNS (14), /* divsi */
916 COSTS_N_INSNS (14), /* divdi */
917 COSTS_N_INSNS (7), /* fp */
918 COSTS_N_INSNS (10), /* dmul */
919 COSTS_N_INSNS (36), /* sdiv */
920 COSTS_N_INSNS (66), /* ddiv */
921 64, /* cache line size */
922 32, /* l1 cache */
923 128, /* l2 cache */
924 1, /* prefetch streams /*/
925 0, /* SF->DF convert */
926 };
927
928 /* Instruction costs on PPCE6500 processors. */
929 static const
930 struct processor_costs ppce6500_cost = {
931 COSTS_N_INSNS (5), /* mulsi */
932 COSTS_N_INSNS (5), /* mulsi_const */
933 COSTS_N_INSNS (4), /* mulsi_const9 */
934 COSTS_N_INSNS (5), /* muldi */
935 COSTS_N_INSNS (14), /* divsi */
936 COSTS_N_INSNS (14), /* divdi */
937 COSTS_N_INSNS (7), /* fp */
938 COSTS_N_INSNS (10), /* dmul */
939 COSTS_N_INSNS (36), /* sdiv */
940 COSTS_N_INSNS (66), /* ddiv */
941 64, /* cache line size */
942 32, /* l1 cache */
943 128, /* l2 cache */
944 1, /* prefetch streams /*/
945 0, /* SF->DF convert */
946 };
947
948 /* Instruction costs on AppliedMicro Titan processors. */
949 static const
950 struct processor_costs titan_cost = {
951 COSTS_N_INSNS (5), /* mulsi */
952 COSTS_N_INSNS (5), /* mulsi_const */
953 COSTS_N_INSNS (5), /* mulsi_const9 */
954 COSTS_N_INSNS (5), /* muldi */
955 COSTS_N_INSNS (18), /* divsi */
956 COSTS_N_INSNS (18), /* divdi */
957 COSTS_N_INSNS (10), /* fp */
958 COSTS_N_INSNS (10), /* dmul */
959 COSTS_N_INSNS (46), /* sdiv */
960 COSTS_N_INSNS (72), /* ddiv */
961 32, /* cache line size */
962 32, /* l1 cache */
963 512, /* l2 cache */
964 1, /* prefetch streams /*/
965 0, /* SF->DF convert */
966 };
967
968 /* Instruction costs on POWER4 and POWER5 processors. */
969 static const
970 struct processor_costs power4_cost = {
971 COSTS_N_INSNS (3), /* mulsi */
972 COSTS_N_INSNS (2), /* mulsi_const */
973 COSTS_N_INSNS (2), /* mulsi_const9 */
974 COSTS_N_INSNS (4), /* muldi */
975 COSTS_N_INSNS (18), /* divsi */
976 COSTS_N_INSNS (34), /* divdi */
977 COSTS_N_INSNS (3), /* fp */
978 COSTS_N_INSNS (3), /* dmul */
979 COSTS_N_INSNS (17), /* sdiv */
980 COSTS_N_INSNS (17), /* ddiv */
981 128, /* cache line size */
982 32, /* l1 cache */
983 1024, /* l2 cache */
984 8, /* prefetch streams /*/
985 0, /* SF->DF convert */
986 };
987
988 /* Instruction costs on POWER6 processors. */
989 static const
990 struct processor_costs power6_cost = {
991 COSTS_N_INSNS (8), /* mulsi */
992 COSTS_N_INSNS (8), /* mulsi_const */
993 COSTS_N_INSNS (8), /* mulsi_const9 */
994 COSTS_N_INSNS (8), /* muldi */
995 COSTS_N_INSNS (22), /* divsi */
996 COSTS_N_INSNS (28), /* divdi */
997 COSTS_N_INSNS (3), /* fp */
998 COSTS_N_INSNS (3), /* dmul */
999 COSTS_N_INSNS (13), /* sdiv */
1000 COSTS_N_INSNS (16), /* ddiv */
1001 128, /* cache line size */
1002 64, /* l1 cache */
1003 2048, /* l2 cache */
1004 16, /* prefetch streams */
1005 0, /* SF->DF convert */
1006 };
1007
1008 /* Instruction costs on POWER7 processors. */
1009 static const
1010 struct processor_costs power7_cost = {
1011 COSTS_N_INSNS (2), /* mulsi */
1012 COSTS_N_INSNS (2), /* mulsi_const */
1013 COSTS_N_INSNS (2), /* mulsi_const9 */
1014 COSTS_N_INSNS (2), /* muldi */
1015 COSTS_N_INSNS (18), /* divsi */
1016 COSTS_N_INSNS (34), /* divdi */
1017 COSTS_N_INSNS (3), /* fp */
1018 COSTS_N_INSNS (3), /* dmul */
1019 COSTS_N_INSNS (13), /* sdiv */
1020 COSTS_N_INSNS (16), /* ddiv */
1021 128, /* cache line size */
1022 32, /* l1 cache */
1023 256, /* l2 cache */
1024 12, /* prefetch streams */
1025 COSTS_N_INSNS (3), /* SF->DF convert */
1026 };
1027
1028 /* Instruction costs on POWER8 processors. */
1029 static const
1030 struct processor_costs power8_cost = {
1031 COSTS_N_INSNS (3), /* mulsi */
1032 COSTS_N_INSNS (3), /* mulsi_const */
1033 COSTS_N_INSNS (3), /* mulsi_const9 */
1034 COSTS_N_INSNS (3), /* muldi */
1035 COSTS_N_INSNS (19), /* divsi */
1036 COSTS_N_INSNS (35), /* divdi */
1037 COSTS_N_INSNS (3), /* fp */
1038 COSTS_N_INSNS (3), /* dmul */
1039 COSTS_N_INSNS (14), /* sdiv */
1040 COSTS_N_INSNS (17), /* ddiv */
1041 128, /* cache line size */
1042 32, /* l1 cache */
1043 512, /* l2 cache */
1044 12, /* prefetch streams */
1045 COSTS_N_INSNS (3), /* SF->DF convert */
1046 };
1047
1048 /* Instruction costs on POWER9 processors. */
1049 static const
1050 struct processor_costs power9_cost = {
1051 COSTS_N_INSNS (3), /* mulsi */
1052 COSTS_N_INSNS (3), /* mulsi_const */
1053 COSTS_N_INSNS (3), /* mulsi_const9 */
1054 COSTS_N_INSNS (3), /* muldi */
1055 COSTS_N_INSNS (8), /* divsi */
1056 COSTS_N_INSNS (12), /* divdi */
1057 COSTS_N_INSNS (3), /* fp */
1058 COSTS_N_INSNS (3), /* dmul */
1059 COSTS_N_INSNS (13), /* sdiv */
1060 COSTS_N_INSNS (18), /* ddiv */
1061 128, /* cache line size */
1062 32, /* l1 cache */
1063 512, /* l2 cache */
1064 8, /* prefetch streams */
1065 COSTS_N_INSNS (3), /* SF->DF convert */
1066 };
1067
1068 /* Instruction costs on POWER10 processors. */
1069 static const
1070 struct processor_costs power10_cost = {
1071 COSTS_N_INSNS (2), /* mulsi */
1072 COSTS_N_INSNS (2), /* mulsi_const */
1073 COSTS_N_INSNS (2), /* mulsi_const9 */
1074 COSTS_N_INSNS (2), /* muldi */
1075 COSTS_N_INSNS (6), /* divsi */
1076 COSTS_N_INSNS (6), /* divdi */
1077 COSTS_N_INSNS (2), /* fp */
1078 COSTS_N_INSNS (2), /* dmul */
1079 COSTS_N_INSNS (11), /* sdiv */
1080 COSTS_N_INSNS (13), /* ddiv */
1081 128, /* cache line size */
1082 32, /* l1 cache */
1083 512, /* l2 cache */
1084 16, /* prefetch streams */
1085 COSTS_N_INSNS (2), /* SF->DF convert */
1086 };
1087
1088 /* Instruction costs on POWER A2 processors. */
1089 static const
1090 struct processor_costs ppca2_cost = {
1091 COSTS_N_INSNS (16), /* mulsi */
1092 COSTS_N_INSNS (16), /* mulsi_const */
1093 COSTS_N_INSNS (16), /* mulsi_const9 */
1094 COSTS_N_INSNS (16), /* muldi */
1095 COSTS_N_INSNS (22), /* divsi */
1096 COSTS_N_INSNS (28), /* divdi */
1097 COSTS_N_INSNS (3), /* fp */
1098 COSTS_N_INSNS (3), /* dmul */
1099 COSTS_N_INSNS (59), /* sdiv */
1100 COSTS_N_INSNS (72), /* ddiv */
1101 64,
1102 16, /* l1 cache */
1103 2048, /* l2 cache */
1104 16, /* prefetch streams */
1105 0, /* SF->DF convert */
1106 };
1107
1108 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1109 static tree (*rs6000_veclib_handler) (combined_fn, tree, tree);
1110
1111 \f
1112 static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool,
1113 code_helper = ERROR_MARK);
1114 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1115 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1116 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1117 static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
1118 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
1119 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1120 static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool);
1121 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1122 bool);
1123 static int rs6000_debug_adjust_cost (rtx_insn *, int, rtx_insn *, int,
1124 unsigned int);
1125 static bool is_microcoded_insn (rtx_insn *);
1126 static bool is_nonpipeline_insn (rtx_insn *);
1127 static bool is_cracked_insn (rtx_insn *);
1128 static bool is_load_insn (rtx, rtx *);
1129 static bool is_store_insn (rtx, rtx *);
1130 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1131 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1132 static bool insn_must_be_first_in_group (rtx_insn *);
1133 static bool insn_must_be_last_in_group (rtx_insn *);
1134 bool easy_vector_constant (rtx, machine_mode);
1135 static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1136 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1137 #if TARGET_MACHO
1138 static tree get_prev_label (tree);
1139 #endif
1140 static bool rs6000_mode_dependent_address (const_rtx);
1141 static bool rs6000_debug_mode_dependent_address (const_rtx);
1142 static bool rs6000_offsettable_memref_p (rtx, machine_mode, bool);
1143 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1144 machine_mode, rtx);
1145 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1146 machine_mode,
1147 rtx);
1148 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1149 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1150 enum reg_class);
1151 static bool rs6000_debug_secondary_memory_needed (machine_mode,
1152 reg_class_t,
1153 reg_class_t);
1154 static bool rs6000_debug_can_change_mode_class (machine_mode,
1155 machine_mode,
1156 reg_class_t);
1157
1158 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1159 = rs6000_mode_dependent_address;
1160
1161 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1162 machine_mode, rtx)
1163 = rs6000_secondary_reload_class;
1164
1165 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1166 = rs6000_preferred_reload_class;
1167
1168 const int INSN_NOT_AVAILABLE = -1;
1169
1170 static void rs6000_print_isa_options (FILE *, int, const char *,
1171 HOST_WIDE_INT);
1172 static HOST_WIDE_INT rs6000_disable_incompatible_switches (void);
1173
1174 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1175 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1176 enum rs6000_reg_type,
1177 machine_mode,
1178 secondary_reload_info *,
1179 bool);
1180 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1181
1182 /* Hash table stuff for keeping track of TOC entries. */
1183
1184 struct GTY((for_user)) toc_hash_struct
1185 {
1186 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1187 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1188 rtx key;
1189 machine_mode key_mode;
1190 int labelno;
1191 };
1192
1193 struct toc_hasher : ggc_ptr_hash<toc_hash_struct>
1194 {
1195 static hashval_t hash (toc_hash_struct *);
1196 static bool equal (toc_hash_struct *, toc_hash_struct *);
1197 };
1198
1199 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1200
1201
1202 \f
1203 /* Default register names. */
1204 char rs6000_reg_names[][8] =
1205 {
1206 /* GPRs */
1207 "0", "1", "2", "3", "4", "5", "6", "7",
1208 "8", "9", "10", "11", "12", "13", "14", "15",
1209 "16", "17", "18", "19", "20", "21", "22", "23",
1210 "24", "25", "26", "27", "28", "29", "30", "31",
1211 /* FPRs */
1212 "0", "1", "2", "3", "4", "5", "6", "7",
1213 "8", "9", "10", "11", "12", "13", "14", "15",
1214 "16", "17", "18", "19", "20", "21", "22", "23",
1215 "24", "25", "26", "27", "28", "29", "30", "31",
1216 /* VRs */
1217 "0", "1", "2", "3", "4", "5", "6", "7",
1218 "8", "9", "10", "11", "12", "13", "14", "15",
1219 "16", "17", "18", "19", "20", "21", "22", "23",
1220 "24", "25", "26", "27", "28", "29", "30", "31",
1221 /* lr ctr ca ap */
1222 "lr", "ctr", "ca", "ap",
1223 /* cr0..cr7 */
1224 "0", "1", "2", "3", "4", "5", "6", "7",
1225 /* vrsave vscr sfp */
1226 "vrsave", "vscr", "sfp",
1227 };
1228
1229 #ifdef TARGET_REGNAMES
1230 static const char alt_reg_names[][8] =
1231 {
1232 /* GPRs */
1233 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1234 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1235 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1236 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1237 /* FPRs */
1238 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1239 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1240 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1241 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1242 /* VRs */
1243 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1244 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1245 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1246 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1247 /* lr ctr ca ap */
1248 "lr", "ctr", "ca", "ap",
1249 /* cr0..cr7 */
1250 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1251 /* vrsave vscr sfp */
1252 "vrsave", "vscr", "sfp",
1253 };
1254 #endif
1255
1256 /* Table of valid machine attributes. */
1257
1258 static const struct attribute_spec rs6000_attribute_table[] =
1259 {
1260 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
1261 affects_type_identity, handler, exclude } */
1262 { "altivec", 1, 1, false, true, false, false,
1263 rs6000_handle_altivec_attribute, NULL },
1264 { "longcall", 0, 0, false, true, true, false,
1265 rs6000_handle_longcall_attribute, NULL },
1266 { "shortcall", 0, 0, false, true, true, false,
1267 rs6000_handle_longcall_attribute, NULL },
1268 { "ms_struct", 0, 0, false, false, false, false,
1269 rs6000_handle_struct_attribute, NULL },
1270 { "gcc_struct", 0, 0, false, false, false, false,
1271 rs6000_handle_struct_attribute, NULL },
1272 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1273 SUBTARGET_ATTRIBUTE_TABLE,
1274 #endif
1275 { NULL, 0, 0, false, false, false, false, NULL, NULL }
1276 };
1277 \f
1278 #ifndef TARGET_PROFILE_KERNEL
1279 #define TARGET_PROFILE_KERNEL 0
1280 #endif
1281 \f
1282 /* Initialize the GCC target structure. */
1283 #undef TARGET_ATTRIBUTE_TABLE
1284 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1285 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1286 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1287 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1288 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1289
1290 #undef TARGET_ASM_ALIGNED_DI_OP
1291 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1292
1293 /* Default unaligned ops are only provided for ELF. Find the ops needed
1294 for non-ELF systems. */
1295 #ifndef OBJECT_FORMAT_ELF
1296 #if TARGET_XCOFF
1297 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1298 64-bit targets. */
1299 #undef TARGET_ASM_UNALIGNED_HI_OP
1300 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1301 #undef TARGET_ASM_UNALIGNED_SI_OP
1302 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1303 #undef TARGET_ASM_UNALIGNED_DI_OP
1304 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1305 #else
1306 /* For Darwin. */
1307 #undef TARGET_ASM_UNALIGNED_HI_OP
1308 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1309 #undef TARGET_ASM_UNALIGNED_SI_OP
1310 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1311 #undef TARGET_ASM_UNALIGNED_DI_OP
1312 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1313 #undef TARGET_ASM_ALIGNED_DI_OP
1314 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1315 #endif
1316 #endif
1317
1318 /* This hook deals with fixups for relocatable code and DI-mode objects
1319 in 64-bit code. */
1320 #undef TARGET_ASM_INTEGER
1321 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1322
1323 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1324 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1325 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1326 #endif
1327
1328 #undef TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY
1329 #define TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY \
1330 rs6000_print_patchable_function_entry
1331
1332 #undef TARGET_SET_UP_BY_PROLOGUE
1333 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1334
1335 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
1336 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components
1337 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
1338 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb
1339 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
1340 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components
1341 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
1342 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components
1343 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
1344 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components
1345 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
1346 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components
1347
1348 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1349 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1350
1351 #undef TARGET_INTERNAL_ARG_POINTER
1352 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1353
1354 #undef TARGET_HAVE_TLS
1355 #define TARGET_HAVE_TLS HAVE_AS_TLS
1356
1357 #undef TARGET_CANNOT_FORCE_CONST_MEM
1358 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1359
1360 #undef TARGET_DELEGITIMIZE_ADDRESS
1361 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1362
1363 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1364 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1365
1366 #undef TARGET_LEGITIMATE_COMBINED_INSN
1367 #define TARGET_LEGITIMATE_COMBINED_INSN rs6000_legitimate_combined_insn
1368
1369 #undef TARGET_ASM_FUNCTION_PROLOGUE
1370 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1371 #undef TARGET_ASM_FUNCTION_EPILOGUE
1372 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1373
1374 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1375 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1376
1377 #undef TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC
1378 #define TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC rs6000_gen_pic_addr_diff_vec
1379
1380 #undef TARGET_LEGITIMIZE_ADDRESS
1381 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1382
1383 #undef TARGET_SCHED_VARIABLE_ISSUE
1384 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1385
1386 #undef TARGET_SCHED_ISSUE_RATE
1387 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1388 #undef TARGET_SCHED_ADJUST_COST
1389 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1390 #undef TARGET_SCHED_ADJUST_PRIORITY
1391 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1392 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1393 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1394 #undef TARGET_SCHED_INIT
1395 #define TARGET_SCHED_INIT rs6000_sched_init
1396 #undef TARGET_SCHED_FINISH
1397 #define TARGET_SCHED_FINISH rs6000_sched_finish
1398 #undef TARGET_SCHED_REORDER
1399 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1400 #undef TARGET_SCHED_REORDER2
1401 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1402
1403 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1404 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1405
1406 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1407 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1408
1409 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1410 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1411 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1412 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1413 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1414 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1415 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1416 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1417
1418 #undef TARGET_SCHED_CAN_SPECULATE_INSN
1419 #define TARGET_SCHED_CAN_SPECULATE_INSN rs6000_sched_can_speculate_insn
1420
1421 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1422 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1423 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1424 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1425 rs6000_builtin_support_vector_misalignment
1426 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1427 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1428 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1429 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1430 rs6000_builtin_vectorization_cost
1431 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1432 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1433 rs6000_preferred_simd_mode
1434 #undef TARGET_VECTORIZE_CREATE_COSTS
1435 #define TARGET_VECTORIZE_CREATE_COSTS rs6000_vectorize_create_costs
1436
1437 #undef TARGET_LOOP_UNROLL_ADJUST
1438 #define TARGET_LOOP_UNROLL_ADJUST rs6000_loop_unroll_adjust
1439
1440 #undef TARGET_INIT_BUILTINS
1441 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1442 #undef TARGET_BUILTIN_DECL
1443 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1444
1445 #undef TARGET_FOLD_BUILTIN
1446 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1447 #undef TARGET_GIMPLE_FOLD_BUILTIN
1448 #define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin
1449
1450 #undef TARGET_EXPAND_BUILTIN
1451 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1452
1453 #undef TARGET_MANGLE_TYPE
1454 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1455
1456 #undef TARGET_INIT_LIBFUNCS
1457 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1458
1459 #if TARGET_MACHO
1460 #undef TARGET_BINDS_LOCAL_P
1461 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1462 #endif
1463
1464 #undef TARGET_MS_BITFIELD_LAYOUT_P
1465 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1466
1467 #undef TARGET_ASM_OUTPUT_MI_THUNK
1468 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1469
1470 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1471 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1472
1473 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1474 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1475
1476 #undef TARGET_REGISTER_MOVE_COST
1477 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1478 #undef TARGET_MEMORY_MOVE_COST
1479 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1480 #undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
1481 #define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
1482 rs6000_ira_change_pseudo_allocno_class
1483 #undef TARGET_CANNOT_COPY_INSN_P
1484 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1485 #undef TARGET_RTX_COSTS
1486 #define TARGET_RTX_COSTS rs6000_rtx_costs
1487 #undef TARGET_ADDRESS_COST
1488 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1489 #undef TARGET_INSN_COST
1490 #define TARGET_INSN_COST rs6000_insn_cost
1491
1492 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1493 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1494
1495 #undef TARGET_PROMOTE_FUNCTION_MODE
1496 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1497
1498 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
1499 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE rs6000_override_options_after_change
1500
1501 #undef TARGET_RETURN_IN_MEMORY
1502 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1503
1504 #undef TARGET_RETURN_IN_MSB
1505 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1506
1507 #undef TARGET_SETUP_INCOMING_VARARGS
1508 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1509
1510 /* Always strict argument naming on rs6000. */
1511 #undef TARGET_STRICT_ARGUMENT_NAMING
1512 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1513 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1514 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1515 #undef TARGET_SPLIT_COMPLEX_ARG
1516 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1517 #undef TARGET_MUST_PASS_IN_STACK
1518 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1519 #undef TARGET_PASS_BY_REFERENCE
1520 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1521 #undef TARGET_ARG_PARTIAL_BYTES
1522 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1523 #undef TARGET_FUNCTION_ARG_ADVANCE
1524 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1525 #undef TARGET_FUNCTION_ARG
1526 #define TARGET_FUNCTION_ARG rs6000_function_arg
1527 #undef TARGET_FUNCTION_ARG_PADDING
1528 #define TARGET_FUNCTION_ARG_PADDING rs6000_function_arg_padding
1529 #undef TARGET_FUNCTION_ARG_BOUNDARY
1530 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1531
1532 #undef TARGET_BUILD_BUILTIN_VA_LIST
1533 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1534
1535 #undef TARGET_EXPAND_BUILTIN_VA_START
1536 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1537
1538 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1539 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1540
1541 #undef TARGET_EH_RETURN_FILTER_MODE
1542 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1543
1544 #undef TARGET_TRANSLATE_MODE_ATTRIBUTE
1545 #define TARGET_TRANSLATE_MODE_ATTRIBUTE rs6000_translate_mode_attribute
1546
1547 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1548 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1549
1550 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
1551 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
1552 rs6000_libgcc_floating_mode_supported_p
1553
1554 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1555 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1556
1557 #undef TARGET_FLOATN_MODE
1558 #define TARGET_FLOATN_MODE rs6000_floatn_mode
1559
1560 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1561 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1562
1563 #undef TARGET_MD_ASM_ADJUST
1564 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1565
1566 #undef TARGET_OPTION_OVERRIDE
1567 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1568
1569 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1570 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1571 rs6000_builtin_vectorized_function
1572
1573 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1574 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1575 rs6000_builtin_md_vectorized_function
1576
1577 #undef TARGET_STACK_PROTECT_GUARD
1578 #define TARGET_STACK_PROTECT_GUARD rs6000_init_stack_protect_guard
1579
1580 #if !TARGET_MACHO
1581 #undef TARGET_STACK_PROTECT_FAIL
1582 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1583 #endif
1584
1585 #ifdef HAVE_AS_TLS
1586 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1587 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1588 #endif
1589
1590 /* Use a 32-bit anchor range. This leads to sequences like:
1591
1592 addis tmp,anchor,high
1593 add dest,tmp,low
1594
1595 where tmp itself acts as an anchor, and can be shared between
1596 accesses to the same 64k page. */
1597 #undef TARGET_MIN_ANCHOR_OFFSET
1598 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1599 #undef TARGET_MAX_ANCHOR_OFFSET
1600 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1601 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1602 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1603 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1604 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1605
1606 #undef TARGET_BUILTIN_RECIPROCAL
1607 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1608
1609 #undef TARGET_SECONDARY_RELOAD
1610 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1611 #undef TARGET_SECONDARY_MEMORY_NEEDED
1612 #define TARGET_SECONDARY_MEMORY_NEEDED rs6000_secondary_memory_needed
1613 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
1614 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE rs6000_secondary_memory_needed_mode
1615
1616 #undef TARGET_LEGITIMATE_ADDRESS_P
1617 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1618
1619 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1620 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1621
1622 #undef TARGET_COMPUTE_PRESSURE_CLASSES
1623 #define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes
1624
1625 #undef TARGET_CAN_ELIMINATE
1626 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1627
1628 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1629 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1630
1631 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1632 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1633
1634 #undef TARGET_TRAMPOLINE_INIT
1635 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1636
1637 #undef TARGET_FUNCTION_VALUE
1638 #define TARGET_FUNCTION_VALUE rs6000_function_value
1639
1640 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1641 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1642
1643 #undef TARGET_OPTION_SAVE
1644 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1645
1646 #undef TARGET_OPTION_RESTORE
1647 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1648
1649 #undef TARGET_OPTION_PRINT
1650 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1651
1652 #undef TARGET_CAN_INLINE_P
1653 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1654
1655 #undef TARGET_SET_CURRENT_FUNCTION
1656 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1657
1658 #undef TARGET_LEGITIMATE_CONSTANT_P
1659 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1660
1661 #undef TARGET_VECTORIZE_VEC_PERM_CONST
1662 #define TARGET_VECTORIZE_VEC_PERM_CONST rs6000_vectorize_vec_perm_const
1663
1664 #undef TARGET_CAN_USE_DOLOOP_P
1665 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1666
1667 #undef TARGET_PREDICT_DOLOOP_P
1668 #define TARGET_PREDICT_DOLOOP_P rs6000_predict_doloop_p
1669
1670 #undef TARGET_HAVE_COUNT_REG_DECR_P
1671 #define TARGET_HAVE_COUNT_REG_DECR_P true
1672
1673 /* 1000000000 is infinite cost in IVOPTs. */
1674 #undef TARGET_DOLOOP_COST_FOR_GENERIC
1675 #define TARGET_DOLOOP_COST_FOR_GENERIC 1000000000
1676
1677 #undef TARGET_DOLOOP_COST_FOR_ADDRESS
1678 #define TARGET_DOLOOP_COST_FOR_ADDRESS 1000000000
1679
1680 #undef TARGET_PREFERRED_DOLOOP_MODE
1681 #define TARGET_PREFERRED_DOLOOP_MODE rs6000_preferred_doloop_mode
1682
1683 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1684 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1685
1686 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1687 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1688 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1689 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1690 #undef TARGET_UNWIND_WORD_MODE
1691 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1692
1693 #undef TARGET_OFFLOAD_OPTIONS
1694 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1695
1696 #undef TARGET_C_MODE_FOR_SUFFIX
1697 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1698
1699 #undef TARGET_INVALID_BINARY_OP
1700 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1701
1702 #undef TARGET_OPTAB_SUPPORTED_P
1703 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1704
1705 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
1706 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
1707
1708 #undef TARGET_COMPARE_VERSION_PRIORITY
1709 #define TARGET_COMPARE_VERSION_PRIORITY rs6000_compare_version_priority
1710
1711 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
1712 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
1713 rs6000_generate_version_dispatcher_body
1714
1715 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
1716 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
1717 rs6000_get_function_versions_dispatcher
1718
1719 #undef TARGET_OPTION_FUNCTION_VERSIONS
1720 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
1721
1722 #undef TARGET_HARD_REGNO_NREGS
1723 #define TARGET_HARD_REGNO_NREGS rs6000_hard_regno_nregs_hook
1724 #undef TARGET_HARD_REGNO_MODE_OK
1725 #define TARGET_HARD_REGNO_MODE_OK rs6000_hard_regno_mode_ok
1726
1727 #undef TARGET_MODES_TIEABLE_P
1728 #define TARGET_MODES_TIEABLE_P rs6000_modes_tieable_p
1729
1730 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
1731 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
1732 rs6000_hard_regno_call_part_clobbered
1733
1734 #undef TARGET_SLOW_UNALIGNED_ACCESS
1735 #define TARGET_SLOW_UNALIGNED_ACCESS rs6000_slow_unaligned_access
1736
1737 #undef TARGET_CAN_CHANGE_MODE_CLASS
1738 #define TARGET_CAN_CHANGE_MODE_CLASS rs6000_can_change_mode_class
1739
1740 #undef TARGET_CONSTANT_ALIGNMENT
1741 #define TARGET_CONSTANT_ALIGNMENT rs6000_constant_alignment
1742
1743 #undef TARGET_STARTING_FRAME_OFFSET
1744 #define TARGET_STARTING_FRAME_OFFSET rs6000_starting_frame_offset
1745
1746 #undef TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P
1747 #define TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P hook_bool_void_true
1748
1749 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
1750 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME rs6000_mangle_decl_assembler_name
1751
1752 #undef TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P
1753 #define TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P \
1754 rs6000_cannot_substitute_mem_equiv_p
1755
1756 #undef TARGET_INVALID_CONVERSION
1757 #define TARGET_INVALID_CONVERSION rs6000_invalid_conversion
1758
1759 #undef TARGET_NEED_IPA_FN_TARGET_INFO
1760 #define TARGET_NEED_IPA_FN_TARGET_INFO rs6000_need_ipa_fn_target_info
1761
1762 #undef TARGET_UPDATE_IPA_FN_TARGET_INFO
1763 #define TARGET_UPDATE_IPA_FN_TARGET_INFO rs6000_update_ipa_fn_target_info
1764
1765 #undef TARGET_CONST_ANCHOR
1766 #define TARGET_CONST_ANCHOR 0x8000
1767
1768 \f
1769
1770 /* Processor table. */
1771 struct rs6000_ptt
1772 {
1773 const char *const name; /* Canonical processor name. */
1774 const enum processor_type processor; /* Processor type enum value. */
1775 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
1776 };
1777
1778 static struct rs6000_ptt const processor_target_table[] =
1779 {
1780 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1781 #include "rs6000-cpus.def"
1782 #undef RS6000_CPU
1783 };
1784
1785 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1786 name is invalid. */
1787
1788 static int
1789 rs6000_cpu_name_lookup (const char *name)
1790 {
1791 size_t i;
1792
1793 if (name != NULL)
1794 {
1795 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
1796 if (! strcmp (name, processor_target_table[i].name))
1797 return (int)i;
1798 }
1799
1800 return -1;
1801 }
1802
1803 \f
1804 /* Return number of consecutive hard regs needed starting at reg REGNO
1805 to hold something of mode MODE.
1806 This is ordinarily the length in words of a value of mode MODE
1807 but can be less for certain modes in special long registers.
1808
1809 POWER and PowerPC GPRs hold 32 bits worth;
1810 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
1811
1812 static int
1813 rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
1814 {
1815 unsigned HOST_WIDE_INT reg_size;
1816
1817 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
1818 128-bit floating point that can go in vector registers, which has VSX
1819 memory addressing. */
1820 if (FP_REGNO_P (regno))
1821 reg_size = (VECTOR_MEM_VSX_P (mode) || VECTOR_ALIGNMENT_P (mode)
1822 ? UNITS_PER_VSX_WORD
1823 : UNITS_PER_FP_WORD);
1824
1825 else if (ALTIVEC_REGNO_P (regno))
1826 reg_size = UNITS_PER_ALTIVEC_WORD;
1827
1828 else
1829 reg_size = UNITS_PER_WORD;
1830
1831 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
1832 }
1833
1834 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1835 MODE. */
1836 static int
1837 rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode)
1838 {
1839 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
1840
1841 if (COMPLEX_MODE_P (mode))
1842 mode = GET_MODE_INNER (mode);
1843
1844 /* Vector pair modes need even/odd VSX register pairs. Only allow vector
1845 registers. */
1846 if (mode == OOmode)
1847 return (TARGET_MMA && VSX_REGNO_P (regno) && (regno & 1) == 0);
1848
1849 /* MMA accumulator modes need FPR registers divisible by 4. */
1850 if (mode == XOmode)
1851 return (TARGET_MMA && FP_REGNO_P (regno) && (regno & 3) == 0);
1852
1853 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
1854 register combinations, and use PTImode where we need to deal with quad
1855 word memory operations. Don't allow quad words in the argument or frame
1856 pointer registers, just registers 0..31. */
1857 if (mode == PTImode)
1858 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1859 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1860 && ((regno & 1) == 0));
1861
1862 /* VSX registers that overlap the FPR registers are larger than for non-VSX
1863 implementations. Don't allow an item to be split between a FP register
1864 and an Altivec register. Allow TImode in all VSX registers if the user
1865 asked for it. */
1866 if (TARGET_VSX && VSX_REGNO_P (regno)
1867 && (VECTOR_MEM_VSX_P (mode)
1868 || VECTOR_ALIGNMENT_P (mode)
1869 || reg_addr[mode].scalar_in_vmx_p
1870 || mode == TImode
1871 || (TARGET_VADDUQM && mode == V1TImode)))
1872 {
1873 if (FP_REGNO_P (regno))
1874 return FP_REGNO_P (last_regno);
1875
1876 if (ALTIVEC_REGNO_P (regno))
1877 {
1878 if (GET_MODE_SIZE (mode) < 16 && !reg_addr[mode].scalar_in_vmx_p)
1879 return 0;
1880
1881 return ALTIVEC_REGNO_P (last_regno);
1882 }
1883 }
1884
1885 /* The GPRs can hold any mode, but values bigger than one register
1886 cannot go past R31. */
1887 if (INT_REGNO_P (regno))
1888 return INT_REGNO_P (last_regno);
1889
1890 /* The float registers (except for VSX vector modes) can only hold floating
1891 modes and DImode. */
1892 if (FP_REGNO_P (regno))
1893 {
1894 if (VECTOR_ALIGNMENT_P (mode))
1895 return false;
1896
1897 if (SCALAR_FLOAT_MODE_P (mode)
1898 && (mode != TDmode || (regno % 2) == 0)
1899 && FP_REGNO_P (last_regno))
1900 return 1;
1901
1902 if (GET_MODE_CLASS (mode) == MODE_INT)
1903 {
1904 if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
1905 return 1;
1906
1907 if (TARGET_P8_VECTOR && (mode == SImode))
1908 return 1;
1909
1910 if (TARGET_P9_VECTOR && (mode == QImode || mode == HImode))
1911 return 1;
1912 }
1913
1914 return 0;
1915 }
1916
1917 /* The CR register can only hold CC modes. */
1918 if (CR_REGNO_P (regno))
1919 return GET_MODE_CLASS (mode) == MODE_CC;
1920
1921 if (CA_REGNO_P (regno))
1922 return mode == Pmode || mode == SImode;
1923
1924 /* AltiVec only in AldyVec registers. */
1925 if (ALTIVEC_REGNO_P (regno))
1926 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
1927 || mode == V1TImode);
1928
1929 /* We cannot put non-VSX TImode or PTImode anywhere except general register
1930 and it must be able to fit within the register set. */
1931
1932 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
1933 }
1934
1935 /* Implement TARGET_HARD_REGNO_NREGS. */
1936
1937 static unsigned int
1938 rs6000_hard_regno_nregs_hook (unsigned int regno, machine_mode mode)
1939 {
1940 return rs6000_hard_regno_nregs[mode][regno];
1941 }
1942
1943 /* Implement TARGET_HARD_REGNO_MODE_OK. */
1944
1945 static bool
1946 rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
1947 {
1948 return rs6000_hard_regno_mode_ok_p[mode][regno];
1949 }
1950
1951 /* Implement TARGET_MODES_TIEABLE_P.
1952
1953 PTImode cannot tie with other modes because PTImode is restricted to even
1954 GPR registers, and TImode can go in any GPR as well as VSX registers (PR
1955 57744).
1956
1957 Similarly, don't allow OOmode (vector pair, restricted to even VSX
1958 registers) or XOmode (vector quad, restricted to FPR registers divisible
1959 by 4) to tie with other modes.
1960
1961 Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
1962 128-bit floating point on VSX systems ties with other vectors. */
1963
1964 static bool
1965 rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2)
1966 {
1967 if (mode1 == PTImode || mode1 == OOmode || mode1 == XOmode
1968 || mode2 == PTImode || mode2 == OOmode || mode2 == XOmode)
1969 return mode1 == mode2;
1970
1971 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1))
1972 return ALTIVEC_OR_VSX_VECTOR_MODE (mode2);
1973 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode2))
1974 return false;
1975
1976 if (SCALAR_FLOAT_MODE_P (mode1))
1977 return SCALAR_FLOAT_MODE_P (mode2);
1978 if (SCALAR_FLOAT_MODE_P (mode2))
1979 return false;
1980
1981 if (GET_MODE_CLASS (mode1) == MODE_CC)
1982 return GET_MODE_CLASS (mode2) == MODE_CC;
1983 if (GET_MODE_CLASS (mode2) == MODE_CC)
1984 return false;
1985
1986 return true;
1987 }
1988
1989 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. */
1990
1991 static bool
1992 rs6000_hard_regno_call_part_clobbered (unsigned int, unsigned int regno,
1993 machine_mode mode)
1994 {
1995 if (TARGET_32BIT
1996 && TARGET_POWERPC64
1997 && GET_MODE_SIZE (mode) > 4
1998 && INT_REGNO_P (regno))
1999 return true;
2000
2001 if (TARGET_VSX
2002 && FP_REGNO_P (regno)
2003 && GET_MODE_SIZE (mode) > 8
2004 && !FLOAT128_2REG_P (mode))
2005 return true;
2006
2007 return false;
2008 }
2009
2010 /* Print interesting facts about registers. */
2011 static void
2012 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
2013 {
2014 int r, m;
2015
2016 for (r = first_regno; r <= last_regno; ++r)
2017 {
2018 const char *comma = "";
2019 int len;
2020
2021 if (first_regno == last_regno)
2022 fprintf (stderr, "%s:\t", reg_name);
2023 else
2024 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
2025
2026 len = 8;
2027 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2028 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
2029 {
2030 if (len > 70)
2031 {
2032 fprintf (stderr, ",\n\t");
2033 len = 8;
2034 comma = "";
2035 }
2036
2037 if (rs6000_hard_regno_nregs[m][r] > 1)
2038 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
2039 rs6000_hard_regno_nregs[m][r]);
2040 else
2041 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
2042
2043 comma = ", ";
2044 }
2045
2046 if (call_used_or_fixed_reg_p (r))
2047 {
2048 if (len > 70)
2049 {
2050 fprintf (stderr, ",\n\t");
2051 len = 8;
2052 comma = "";
2053 }
2054
2055 len += fprintf (stderr, "%s%s", comma, "call-used");
2056 comma = ", ";
2057 }
2058
2059 if (fixed_regs[r])
2060 {
2061 if (len > 70)
2062 {
2063 fprintf (stderr, ",\n\t");
2064 len = 8;
2065 comma = "";
2066 }
2067
2068 len += fprintf (stderr, "%s%s", comma, "fixed");
2069 comma = ", ";
2070 }
2071
2072 if (len > 70)
2073 {
2074 fprintf (stderr, ",\n\t");
2075 comma = "";
2076 }
2077
2078 len += fprintf (stderr, "%sreg-class = %s", comma,
2079 reg_class_names[(int)rs6000_regno_regclass[r]]);
2080 comma = ", ";
2081
2082 if (len > 70)
2083 {
2084 fprintf (stderr, ",\n\t");
2085 comma = "";
2086 }
2087
2088 fprintf (stderr, "%sregno = %d\n", comma, r);
2089 }
2090 }
2091
2092 static const char *
2093 rs6000_debug_vector_unit (enum rs6000_vector v)
2094 {
2095 const char *ret;
2096
2097 switch (v)
2098 {
2099 case VECTOR_NONE: ret = "none"; break;
2100 case VECTOR_ALTIVEC: ret = "altivec"; break;
2101 case VECTOR_VSX: ret = "vsx"; break;
2102 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
2103 default: ret = "unknown"; break;
2104 }
2105
2106 return ret;
2107 }
2108
2109 /* Inner function printing just the address mask for a particular reload
2110 register class. */
2111 DEBUG_FUNCTION char *
2112 rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
2113 {
2114 static char ret[8];
2115 char *p = ret;
2116
2117 if ((mask & RELOAD_REG_VALID) != 0)
2118 *p++ = 'v';
2119 else if (keep_spaces)
2120 *p++ = ' ';
2121
2122 if ((mask & RELOAD_REG_MULTIPLE) != 0)
2123 *p++ = 'm';
2124 else if (keep_spaces)
2125 *p++ = ' ';
2126
2127 if ((mask & RELOAD_REG_INDEXED) != 0)
2128 *p++ = 'i';
2129 else if (keep_spaces)
2130 *p++ = ' ';
2131
2132 if ((mask & RELOAD_REG_QUAD_OFFSET) != 0)
2133 *p++ = 'O';
2134 else if ((mask & RELOAD_REG_OFFSET) != 0)
2135 *p++ = 'o';
2136 else if (keep_spaces)
2137 *p++ = ' ';
2138
2139 if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
2140 *p++ = '+';
2141 else if (keep_spaces)
2142 *p++ = ' ';
2143
2144 if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
2145 *p++ = '+';
2146 else if (keep_spaces)
2147 *p++ = ' ';
2148
2149 if ((mask & RELOAD_REG_AND_M16) != 0)
2150 *p++ = '&';
2151 else if (keep_spaces)
2152 *p++ = ' ';
2153
2154 *p = '\0';
2155
2156 return ret;
2157 }
2158
2159 /* Print the address masks in a human readble fashion. */
2160 DEBUG_FUNCTION void
2161 rs6000_debug_print_mode (ssize_t m)
2162 {
2163 ssize_t rc;
2164 int spaces = 0;
2165
2166 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
2167 for (rc = 0; rc < N_RELOAD_REG; rc++)
2168 fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
2169 rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
2170
2171 if ((reg_addr[m].reload_store != CODE_FOR_nothing)
2172 || (reg_addr[m].reload_load != CODE_FOR_nothing))
2173 {
2174 fprintf (stderr, "%*s Reload=%c%c", spaces, "",
2175 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2176 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
2177 spaces = 0;
2178 }
2179 else
2180 spaces += strlen (" Reload=sl");
2181
2182 if (reg_addr[m].scalar_in_vmx_p)
2183 {
2184 fprintf (stderr, "%*s Upper=y", spaces, "");
2185 spaces = 0;
2186 }
2187 else
2188 spaces += strlen (" Upper=y");
2189
2190 if (rs6000_vector_unit[m] != VECTOR_NONE
2191 || rs6000_vector_mem[m] != VECTOR_NONE)
2192 {
2193 fprintf (stderr, "%*s vector: arith=%-10s mem=%s",
2194 spaces, "",
2195 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2196 rs6000_debug_vector_unit (rs6000_vector_mem[m]));
2197 }
2198
2199 fputs ("\n", stderr);
2200 }
2201
2202 #define DEBUG_FMT_ID "%-32s= "
2203 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2204 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2205 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2206
2207 /* Print various interesting information with -mdebug=reg. */
2208 static void
2209 rs6000_debug_reg_global (void)
2210 {
2211 static const char *const tf[2] = { "false", "true" };
2212 const char *nl = (const char *)0;
2213 int m;
2214 size_t m1, m2, v;
2215 char costly_num[20];
2216 char nop_num[20];
2217 char flags_buffer[40];
2218 const char *costly_str;
2219 const char *nop_str;
2220 const char *trace_str;
2221 const char *abi_str;
2222 const char *cmodel_str;
2223 struct cl_target_option cl_opts;
2224
2225 /* Modes we want tieable information on. */
2226 static const machine_mode print_tieable_modes[] = {
2227 QImode,
2228 HImode,
2229 SImode,
2230 DImode,
2231 TImode,
2232 PTImode,
2233 SFmode,
2234 DFmode,
2235 TFmode,
2236 IFmode,
2237 KFmode,
2238 SDmode,
2239 DDmode,
2240 TDmode,
2241 V2SImode,
2242 V2SFmode,
2243 V16QImode,
2244 V8HImode,
2245 V4SImode,
2246 V2DImode,
2247 V1TImode,
2248 V32QImode,
2249 V16HImode,
2250 V8SImode,
2251 V4DImode,
2252 V2TImode,
2253 V4SFmode,
2254 V2DFmode,
2255 V8SFmode,
2256 V4DFmode,
2257 OOmode,
2258 XOmode,
2259 CCmode,
2260 CCUNSmode,
2261 CCEQmode,
2262 CCFPmode,
2263 };
2264
2265 /* Virtual regs we are interested in. */
2266 const static struct {
2267 int regno; /* register number. */
2268 const char *name; /* register name. */
2269 } virtual_regs[] = {
2270 { STACK_POINTER_REGNUM, "stack pointer:" },
2271 { TOC_REGNUM, "toc: " },
2272 { STATIC_CHAIN_REGNUM, "static chain: " },
2273 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
2274 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
2275 { ARG_POINTER_REGNUM, "arg pointer: " },
2276 { FRAME_POINTER_REGNUM, "frame pointer:" },
2277 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
2278 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
2279 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
2280 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
2281 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
2282 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
2283 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
2284 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
2285 { LAST_VIRTUAL_REGISTER, "last virtual: " },
2286 };
2287
2288 fputs ("\nHard register information:\n", stderr);
2289 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2290 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2291 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2292 LAST_ALTIVEC_REGNO,
2293 "vs");
2294 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2295 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2296 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2297 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2298 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2299 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2300
2301 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2302 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2303 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2304
2305 fprintf (stderr,
2306 "\n"
2307 "d reg_class = %s\n"
2308 "v reg_class = %s\n"
2309 "wa reg_class = %s\n"
2310 "we reg_class = %s\n"
2311 "wr reg_class = %s\n"
2312 "wx reg_class = %s\n"
2313 "wA reg_class = %s\n"
2314 "\n",
2315 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2316 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2317 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2318 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
2319 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2320 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2321 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]]);
2322
2323 nl = "\n";
2324 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2325 rs6000_debug_print_mode (m);
2326
2327 fputs ("\n", stderr);
2328
2329 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2330 {
2331 machine_mode mode1 = print_tieable_modes[m1];
2332 bool first_time = true;
2333
2334 nl = (const char *)0;
2335 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2336 {
2337 machine_mode mode2 = print_tieable_modes[m2];
2338 if (mode1 != mode2 && rs6000_modes_tieable_p (mode1, mode2))
2339 {
2340 if (first_time)
2341 {
2342 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2343 nl = "\n";
2344 first_time = false;
2345 }
2346
2347 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2348 }
2349 }
2350
2351 if (!first_time)
2352 fputs ("\n", stderr);
2353 }
2354
2355 if (nl)
2356 fputs (nl, stderr);
2357
2358 if (rs6000_recip_control)
2359 {
2360 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2361
2362 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2363 if (rs6000_recip_bits[m])
2364 {
2365 fprintf (stderr,
2366 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2367 GET_MODE_NAME (m),
2368 (RS6000_RECIP_AUTO_RE_P (m)
2369 ? "auto"
2370 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2371 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2372 ? "auto"
2373 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2374 }
2375
2376 fputs ("\n", stderr);
2377 }
2378
2379 if (rs6000_cpu_index >= 0)
2380 {
2381 const char *name = processor_target_table[rs6000_cpu_index].name;
2382 HOST_WIDE_INT flags
2383 = processor_target_table[rs6000_cpu_index].target_enable;
2384
2385 sprintf (flags_buffer, "-mcpu=%s flags", name);
2386 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2387 }
2388 else
2389 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2390
2391 if (rs6000_tune_index >= 0)
2392 {
2393 const char *name = processor_target_table[rs6000_tune_index].name;
2394 HOST_WIDE_INT flags
2395 = processor_target_table[rs6000_tune_index].target_enable;
2396
2397 sprintf (flags_buffer, "-mtune=%s flags", name);
2398 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2399 }
2400 else
2401 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2402
2403 cl_target_option_save (&cl_opts, &global_options, &global_options_set);
2404 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2405 rs6000_isa_flags);
2406
2407 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2408 rs6000_isa_flags_explicit);
2409
2410 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2411
2412 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2413 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2414
2415 switch (rs6000_sched_costly_dep)
2416 {
2417 case max_dep_latency:
2418 costly_str = "max_dep_latency";
2419 break;
2420
2421 case no_dep_costly:
2422 costly_str = "no_dep_costly";
2423 break;
2424
2425 case all_deps_costly:
2426 costly_str = "all_deps_costly";
2427 break;
2428
2429 case true_store_to_load_dep_costly:
2430 costly_str = "true_store_to_load_dep_costly";
2431 break;
2432
2433 case store_to_load_dep_costly:
2434 costly_str = "store_to_load_dep_costly";
2435 break;
2436
2437 default:
2438 costly_str = costly_num;
2439 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2440 break;
2441 }
2442
2443 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2444
2445 switch (rs6000_sched_insert_nops)
2446 {
2447 case sched_finish_regroup_exact:
2448 nop_str = "sched_finish_regroup_exact";
2449 break;
2450
2451 case sched_finish_pad_groups:
2452 nop_str = "sched_finish_pad_groups";
2453 break;
2454
2455 case sched_finish_none:
2456 nop_str = "sched_finish_none";
2457 break;
2458
2459 default:
2460 nop_str = nop_num;
2461 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2462 break;
2463 }
2464
2465 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2466
2467 switch (rs6000_sdata)
2468 {
2469 default:
2470 case SDATA_NONE:
2471 break;
2472
2473 case SDATA_DATA:
2474 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2475 break;
2476
2477 case SDATA_SYSV:
2478 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2479 break;
2480
2481 case SDATA_EABI:
2482 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2483 break;
2484
2485 }
2486
2487 switch (rs6000_traceback)
2488 {
2489 case traceback_default: trace_str = "default"; break;
2490 case traceback_none: trace_str = "none"; break;
2491 case traceback_part: trace_str = "part"; break;
2492 case traceback_full: trace_str = "full"; break;
2493 default: trace_str = "unknown"; break;
2494 }
2495
2496 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2497
2498 switch (rs6000_current_cmodel)
2499 {
2500 case CMODEL_SMALL: cmodel_str = "small"; break;
2501 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2502 case CMODEL_LARGE: cmodel_str = "large"; break;
2503 default: cmodel_str = "unknown"; break;
2504 }
2505
2506 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2507
2508 switch (rs6000_current_abi)
2509 {
2510 case ABI_NONE: abi_str = "none"; break;
2511 case ABI_AIX: abi_str = "aix"; break;
2512 case ABI_ELFv2: abi_str = "ELFv2"; break;
2513 case ABI_V4: abi_str = "V4"; break;
2514 case ABI_DARWIN: abi_str = "darwin"; break;
2515 default: abi_str = "unknown"; break;
2516 }
2517
2518 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2519
2520 if (rs6000_altivec_abi)
2521 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2522
2523 if (rs6000_aix_extabi)
2524 fprintf (stderr, DEBUG_FMT_S, "AIX vec-extabi", "true");
2525
2526 if (rs6000_darwin64_abi)
2527 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2528
2529 fprintf (stderr, DEBUG_FMT_S, "soft_float",
2530 (TARGET_SOFT_FLOAT ? "true" : "false"));
2531
2532 if (TARGET_LINK_STACK)
2533 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2534
2535 if (TARGET_P8_FUSION)
2536 {
2537 char options[80];
2538
2539 strcpy (options, "power8");
2540 if (TARGET_P8_FUSION_SIGN)
2541 strcat (options, ", sign");
2542
2543 fprintf (stderr, DEBUG_FMT_S, "fusion", options);
2544 }
2545
2546 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2547 TARGET_SECURE_PLT ? "secure" : "bss");
2548 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2549 aix_struct_return ? "aix" : "sysv");
2550 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2551 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2552 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2553 tf[!!rs6000_align_branch_targets]);
2554 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2555 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2556 rs6000_long_double_type_size);
2557 if (rs6000_long_double_type_size > 64)
2558 {
2559 fprintf (stderr, DEBUG_FMT_S, "long double type",
2560 TARGET_IEEEQUAD ? "IEEE" : "IBM");
2561 fprintf (stderr, DEBUG_FMT_S, "default long double type",
2562 TARGET_IEEEQUAD_DEFAULT ? "IEEE" : "IBM");
2563 }
2564 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2565 (int)rs6000_sched_restricted_insns_priority);
2566 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2567 (int)END_BUILTINS);
2568
2569 fprintf (stderr, DEBUG_FMT_D, "Enable float128 on VSX",
2570 (int)TARGET_FLOAT128_ENABLE_TYPE);
2571
2572 if (TARGET_VSX)
2573 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2574 (int)VECTOR_ELEMENT_SCALAR_64BIT);
2575
2576 if (TARGET_DIRECT_MOVE_128)
2577 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit mfvsrld element",
2578 (int)VECTOR_ELEMENT_MFVSRLD_64BIT);
2579 }
2580
2581 \f
2582 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2583 legitimate address support to figure out the appropriate addressing to
2584 use. */
2585
2586 static void
2587 rs6000_setup_reg_addr_masks (void)
2588 {
2589 ssize_t rc, reg, m, nregs;
2590 addr_mask_type any_addr_mask, addr_mask;
2591
2592 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2593 {
2594 machine_mode m2 = (machine_mode) m;
2595 bool complex_p = false;
2596 bool small_int_p = (m2 == QImode || m2 == HImode || m2 == SImode);
2597 size_t msize;
2598
2599 if (COMPLEX_MODE_P (m2))
2600 {
2601 complex_p = true;
2602 m2 = GET_MODE_INNER (m2);
2603 }
2604
2605 msize = GET_MODE_SIZE (m2);
2606
2607 /* SDmode is special in that we want to access it only via REG+REG
2608 addressing on power7 and above, since we want to use the LFIWZX and
2609 STFIWZX instructions to load it. */
2610 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
2611
2612 any_addr_mask = 0;
2613 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
2614 {
2615 addr_mask = 0;
2616 reg = reload_reg_map[rc].reg;
2617
2618 /* Can mode values go in the GPR/FPR/Altivec registers? */
2619 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
2620 {
2621 bool small_int_vsx_p = (small_int_p
2622 && (rc == RELOAD_REG_FPR
2623 || rc == RELOAD_REG_VMX));
2624
2625 nregs = rs6000_hard_regno_nregs[m][reg];
2626 addr_mask |= RELOAD_REG_VALID;
2627
2628 /* Indicate if the mode takes more than 1 physical register. If
2629 it takes a single register, indicate it can do REG+REG
2630 addressing. Small integers in VSX registers can only do
2631 REG+REG addressing. */
2632 if (small_int_vsx_p)
2633 addr_mask |= RELOAD_REG_INDEXED;
2634 else if (nregs > 1 || m == BLKmode || complex_p)
2635 addr_mask |= RELOAD_REG_MULTIPLE;
2636 else
2637 addr_mask |= RELOAD_REG_INDEXED;
2638
2639 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2640 addressing. If we allow scalars into Altivec registers,
2641 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY.
2642
2643 For VSX systems, we don't allow update addressing for
2644 DFmode/SFmode if those registers can go in both the
2645 traditional floating point registers and Altivec registers.
2646 The load/store instructions for the Altivec registers do not
2647 have update forms. If we allowed update addressing, it seems
2648 to break IV-OPT code using floating point if the index type is
2649 int instead of long (PR target/81550 and target/84042). */
2650
2651 if (TARGET_UPDATE
2652 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
2653 && msize <= 8
2654 && !VECTOR_MODE_P (m2)
2655 && !VECTOR_ALIGNMENT_P (m2)
2656 && !complex_p
2657 && (m != E_DFmode || !TARGET_VSX)
2658 && (m != E_SFmode || !TARGET_P8_VECTOR)
2659 && !small_int_vsx_p)
2660 {
2661 addr_mask |= RELOAD_REG_PRE_INCDEC;
2662
2663 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2664 we don't allow PRE_MODIFY for some multi-register
2665 operations. */
2666 switch (m)
2667 {
2668 default:
2669 addr_mask |= RELOAD_REG_PRE_MODIFY;
2670 break;
2671
2672 case E_DImode:
2673 if (TARGET_POWERPC64)
2674 addr_mask |= RELOAD_REG_PRE_MODIFY;
2675 break;
2676
2677 case E_DFmode:
2678 case E_DDmode:
2679 if (TARGET_HARD_FLOAT)
2680 addr_mask |= RELOAD_REG_PRE_MODIFY;
2681 break;
2682 }
2683 }
2684 }
2685
2686 /* GPR and FPR registers can do REG+OFFSET addressing, except
2687 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing
2688 for 64-bit scalars and 32-bit SFmode to altivec registers. */
2689 if ((addr_mask != 0) && !indexed_only_p
2690 && msize <= 8
2691 && (rc == RELOAD_REG_GPR
2692 || ((msize == 8 || m2 == SFmode)
2693 && (rc == RELOAD_REG_FPR
2694 || (rc == RELOAD_REG_VMX && TARGET_P9_VECTOR)))))
2695 addr_mask |= RELOAD_REG_OFFSET;
2696
2697 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
2698 instructions are enabled. The offset for 128-bit VSX registers is
2699 only 12-bits. While GPRs can handle the full offset range, VSX
2700 registers can only handle the restricted range. */
2701 else if ((addr_mask != 0) && !indexed_only_p
2702 && msize == 16 && TARGET_P9_VECTOR
2703 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2)
2704 || (m2 == TImode && TARGET_VSX)))
2705 {
2706 addr_mask |= RELOAD_REG_OFFSET;
2707 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
2708 addr_mask |= RELOAD_REG_QUAD_OFFSET;
2709 }
2710
2711 /* Vector pairs can do both indexed and offset loads if the
2712 instructions are enabled, otherwise they can only do offset loads
2713 since it will be broken into two vector moves. Vector quads can
2714 only do offset loads. */
2715 else if ((addr_mask != 0) && TARGET_MMA
2716 && (m2 == OOmode || m2 == XOmode))
2717 {
2718 addr_mask |= RELOAD_REG_OFFSET;
2719 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
2720 {
2721 addr_mask |= RELOAD_REG_QUAD_OFFSET;
2722 if (m2 == OOmode)
2723 addr_mask |= RELOAD_REG_INDEXED;
2724 }
2725 }
2726
2727 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2728 addressing on 128-bit types. */
2729 if (rc == RELOAD_REG_VMX && msize == 16
2730 && (addr_mask & RELOAD_REG_VALID) != 0)
2731 addr_mask |= RELOAD_REG_AND_M16;
2732
2733 reg_addr[m].addr_mask[rc] = addr_mask;
2734 any_addr_mask |= addr_mask;
2735 }
2736
2737 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
2738 }
2739 }
2740
2741 \f
2742 /* Initialize the various global tables that are based on register size. */
2743 static void
2744 rs6000_init_hard_regno_mode_ok (bool global_init_p)
2745 {
2746 ssize_t r, m, c;
2747 int align64;
2748 int align32;
2749
2750 /* Precalculate REGNO_REG_CLASS. */
2751 rs6000_regno_regclass[0] = GENERAL_REGS;
2752 for (r = 1; r < 32; ++r)
2753 rs6000_regno_regclass[r] = BASE_REGS;
2754
2755 for (r = 32; r < 64; ++r)
2756 rs6000_regno_regclass[r] = FLOAT_REGS;
2757
2758 for (r = 64; HARD_REGISTER_NUM_P (r); ++r)
2759 rs6000_regno_regclass[r] = NO_REGS;
2760
2761 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
2762 rs6000_regno_regclass[r] = ALTIVEC_REGS;
2763
2764 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
2765 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
2766 rs6000_regno_regclass[r] = CR_REGS;
2767
2768 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
2769 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
2770 rs6000_regno_regclass[CA_REGNO] = NO_REGS;
2771 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
2772 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
2773 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
2774 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
2775
2776 /* Precalculate register class to simpler reload register class. We don't
2777 need all of the register classes that are combinations of different
2778 classes, just the simple ones that have constraint letters. */
2779 for (c = 0; c < N_REG_CLASSES; c++)
2780 reg_class_to_reg_type[c] = NO_REG_TYPE;
2781
2782 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
2783 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
2784 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
2785 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
2786 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
2787 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
2788 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
2789 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
2790 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
2791 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
2792
2793 if (TARGET_VSX)
2794 {
2795 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
2796 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
2797 }
2798 else
2799 {
2800 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
2801 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
2802 }
2803
2804 /* Precalculate the valid memory formats as well as the vector information,
2805 this must be set up before the rs6000_hard_regno_nregs_internal calls
2806 below. */
2807 gcc_assert ((int)VECTOR_NONE == 0);
2808 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
2809 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_mem));
2810
2811 gcc_assert ((int)CODE_FOR_nothing == 0);
2812 memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
2813
2814 gcc_assert ((int)NO_REGS == 0);
2815 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
2816
2817 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
2818 believes it can use native alignment or still uses 128-bit alignment. */
2819 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
2820 {
2821 align64 = 64;
2822 align32 = 32;
2823 }
2824 else
2825 {
2826 align64 = 128;
2827 align32 = 128;
2828 }
2829
2830 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
2831 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
2832 if (TARGET_FLOAT128_TYPE)
2833 {
2834 rs6000_vector_mem[KFmode] = VECTOR_VSX;
2835 rs6000_vector_align[KFmode] = 128;
2836
2837 if (FLOAT128_IEEE_P (TFmode))
2838 {
2839 rs6000_vector_mem[TFmode] = VECTOR_VSX;
2840 rs6000_vector_align[TFmode] = 128;
2841 }
2842 }
2843
2844 /* V2DF mode, VSX only. */
2845 if (TARGET_VSX)
2846 {
2847 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
2848 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
2849 rs6000_vector_align[V2DFmode] = align64;
2850 }
2851
2852 /* V4SF mode, either VSX or Altivec. */
2853 if (TARGET_VSX)
2854 {
2855 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
2856 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
2857 rs6000_vector_align[V4SFmode] = align32;
2858 }
2859 else if (TARGET_ALTIVEC)
2860 {
2861 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
2862 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
2863 rs6000_vector_align[V4SFmode] = align32;
2864 }
2865
2866 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
2867 and stores. */
2868 if (TARGET_ALTIVEC)
2869 {
2870 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
2871 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
2872 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
2873 rs6000_vector_align[V4SImode] = align32;
2874 rs6000_vector_align[V8HImode] = align32;
2875 rs6000_vector_align[V16QImode] = align32;
2876
2877 if (TARGET_VSX)
2878 {
2879 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
2880 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
2881 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
2882 }
2883 else
2884 {
2885 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
2886 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
2887 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
2888 }
2889 }
2890
2891 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
2892 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
2893 if (TARGET_VSX)
2894 {
2895 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
2896 rs6000_vector_unit[V2DImode]
2897 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2898 rs6000_vector_align[V2DImode] = align64;
2899
2900 rs6000_vector_mem[V1TImode] = VECTOR_VSX;
2901 rs6000_vector_unit[V1TImode]
2902 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2903 rs6000_vector_align[V1TImode] = 128;
2904 }
2905
2906 /* DFmode, see if we want to use the VSX unit. Memory is handled
2907 differently, so don't set rs6000_vector_mem. */
2908 if (TARGET_VSX)
2909 {
2910 rs6000_vector_unit[DFmode] = VECTOR_VSX;
2911 rs6000_vector_align[DFmode] = 64;
2912 }
2913
2914 /* SFmode, see if we want to use the VSX unit. */
2915 if (TARGET_P8_VECTOR)
2916 {
2917 rs6000_vector_unit[SFmode] = VECTOR_VSX;
2918 rs6000_vector_align[SFmode] = 32;
2919 }
2920
2921 /* Allow TImode in VSX register and set the VSX memory macros. */
2922 if (TARGET_VSX)
2923 {
2924 rs6000_vector_mem[TImode] = VECTOR_VSX;
2925 rs6000_vector_align[TImode] = align64;
2926 }
2927
2928 /* Add support for vector pairs and vector quad registers. */
2929 if (TARGET_MMA)
2930 {
2931 rs6000_vector_unit[OOmode] = VECTOR_NONE;
2932 rs6000_vector_mem[OOmode] = VECTOR_VSX;
2933 rs6000_vector_align[OOmode] = 256;
2934
2935 rs6000_vector_unit[XOmode] = VECTOR_NONE;
2936 rs6000_vector_mem[XOmode] = VECTOR_VSX;
2937 rs6000_vector_align[XOmode] = 512;
2938 }
2939
2940 /* Register class constraints for the constraints that depend on compile
2941 switches. When the VSX code was added, different constraints were added
2942 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
2943 of the VSX registers are used. The register classes for scalar floating
2944 point types is set, based on whether we allow that type into the upper
2945 (Altivec) registers. GCC has register classes to target the Altivec
2946 registers for load/store operations, to select using a VSX memory
2947 operation instead of the traditional floating point operation. The
2948 constraints are:
2949
2950 d - Register class to use with traditional DFmode instructions.
2951 v - Altivec register.
2952 wa - Any VSX register.
2953 wc - Reserved to represent individual CR bits (used in LLVM).
2954 wn - always NO_REGS.
2955 wr - GPR if 64-bit mode is permitted.
2956 wx - Float register if we can do 32-bit int stores. */
2957
2958 if (TARGET_HARD_FLOAT)
2959 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS;
2960 if (TARGET_ALTIVEC)
2961 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
2962 if (TARGET_VSX)
2963 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
2964
2965 if (TARGET_POWERPC64)
2966 {
2967 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
2968 rs6000_constraints[RS6000_CONSTRAINT_wA] = BASE_REGS;
2969 }
2970
2971 if (TARGET_STFIWX)
2972 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
2973
2974 /* Support for new direct moves (ISA 3.0 + 64bit). */
2975 if (TARGET_DIRECT_MOVE_128)
2976 rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
2977
2978 /* Set up the reload helper and direct move functions. */
2979 if (TARGET_VSX || TARGET_ALTIVEC)
2980 {
2981 if (TARGET_64BIT)
2982 {
2983 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
2984 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
2985 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
2986 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
2987 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
2988 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
2989 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
2990 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
2991 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
2992 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
2993 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
2994 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
2995 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
2996 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
2997 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
2998 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
2999 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
3000 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
3001 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
3002 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
3003
3004 if (FLOAT128_VECTOR_P (KFmode))
3005 {
3006 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_di_store;
3007 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_di_load;
3008 }
3009
3010 if (FLOAT128_VECTOR_P (TFmode))
3011 {
3012 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_di_store;
3013 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_di_load;
3014 }
3015
3016 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3017 available. */
3018 if (TARGET_NO_SDMODE_STACK)
3019 {
3020 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
3021 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
3022 }
3023
3024 if (TARGET_VSX)
3025 {
3026 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
3027 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
3028 }
3029
3030 if (TARGET_DIRECT_MOVE && !TARGET_DIRECT_MOVE_128)
3031 {
3032 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
3033 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
3034 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
3035 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
3036 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
3037 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
3038 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
3039 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
3040 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
3041
3042 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
3043 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
3044 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
3045 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
3046 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
3047 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
3048 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
3049 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
3050 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
3051
3052 if (FLOAT128_VECTOR_P (KFmode))
3053 {
3054 reg_addr[KFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxkf;
3055 reg_addr[KFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprkf;
3056 }
3057
3058 if (FLOAT128_VECTOR_P (TFmode))
3059 {
3060 reg_addr[TFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxtf;
3061 reg_addr[TFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprtf;
3062 }
3063
3064 if (TARGET_MMA)
3065 {
3066 reg_addr[OOmode].reload_store = CODE_FOR_reload_oo_di_store;
3067 reg_addr[OOmode].reload_load = CODE_FOR_reload_oo_di_load;
3068 reg_addr[XOmode].reload_store = CODE_FOR_reload_xo_di_store;
3069 reg_addr[XOmode].reload_load = CODE_FOR_reload_xo_di_load;
3070 }
3071 }
3072 }
3073 else
3074 {
3075 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
3076 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
3077 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
3078 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
3079 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
3080 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
3081 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
3082 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
3083 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
3084 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
3085 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
3086 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
3087 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
3088 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
3089 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
3090 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
3091 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
3092 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
3093 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
3094 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
3095
3096 if (FLOAT128_VECTOR_P (KFmode))
3097 {
3098 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_si_store;
3099 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_si_load;
3100 }
3101
3102 if (FLOAT128_IEEE_P (TFmode))
3103 {
3104 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_si_store;
3105 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_si_load;
3106 }
3107
3108 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3109 available. */
3110 if (TARGET_NO_SDMODE_STACK)
3111 {
3112 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
3113 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
3114 }
3115
3116 if (TARGET_VSX)
3117 {
3118 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
3119 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
3120 }
3121
3122 if (TARGET_DIRECT_MOVE)
3123 {
3124 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
3125 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
3126 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
3127 }
3128 }
3129
3130 reg_addr[DFmode].scalar_in_vmx_p = true;
3131 reg_addr[DImode].scalar_in_vmx_p = true;
3132
3133 if (TARGET_P8_VECTOR)
3134 {
3135 reg_addr[SFmode].scalar_in_vmx_p = true;
3136 reg_addr[SImode].scalar_in_vmx_p = true;
3137
3138 if (TARGET_P9_VECTOR)
3139 {
3140 reg_addr[HImode].scalar_in_vmx_p = true;
3141 reg_addr[QImode].scalar_in_vmx_p = true;
3142 }
3143 }
3144 }
3145
3146 /* Precalculate HARD_REGNO_NREGS. */
3147 for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3148 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3149 rs6000_hard_regno_nregs[m][r]
3150 = rs6000_hard_regno_nregs_internal (r, (machine_mode) m);
3151
3152 /* Precalculate TARGET_HARD_REGNO_MODE_OK. */
3153 for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3154 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3155 rs6000_hard_regno_mode_ok_p[m][r]
3156 = rs6000_hard_regno_mode_ok_uncached (r, (machine_mode) m);
3157
3158 /* Precalculate CLASS_MAX_NREGS sizes. */
3159 for (c = 0; c < LIM_REG_CLASSES; ++c)
3160 {
3161 int reg_size;
3162
3163 if (TARGET_VSX && VSX_REG_CLASS_P (c))
3164 reg_size = UNITS_PER_VSX_WORD;
3165
3166 else if (c == ALTIVEC_REGS)
3167 reg_size = UNITS_PER_ALTIVEC_WORD;
3168
3169 else if (c == FLOAT_REGS)
3170 reg_size = UNITS_PER_FP_WORD;
3171
3172 else
3173 reg_size = UNITS_PER_WORD;
3174
3175 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3176 {
3177 machine_mode m2 = (machine_mode)m;
3178 int reg_size2 = reg_size;
3179
3180 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3181 in VSX. */
3182 if (TARGET_VSX && VSX_REG_CLASS_P (c) && FLOAT128_2REG_P (m))
3183 reg_size2 = UNITS_PER_FP_WORD;
3184
3185 rs6000_class_max_nregs[m][c]
3186 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
3187 }
3188 }
3189
3190 /* Calculate which modes to automatically generate code to use a the
3191 reciprocal divide and square root instructions. In the future, possibly
3192 automatically generate the instructions even if the user did not specify
3193 -mrecip. The older machines double precision reciprocal sqrt estimate is
3194 not accurate enough. */
3195 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
3196 if (TARGET_FRES)
3197 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3198 if (TARGET_FRE)
3199 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3200 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3201 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3202 if (VECTOR_UNIT_VSX_P (V2DFmode))
3203 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3204
3205 if (TARGET_FRSQRTES)
3206 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3207 if (TARGET_FRSQRTE)
3208 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3209 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3210 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3211 if (VECTOR_UNIT_VSX_P (V2DFmode))
3212 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3213
3214 if (rs6000_recip_control)
3215 {
3216 if (!flag_finite_math_only)
3217 warning (0, "%qs requires %qs or %qs", "-mrecip", "-ffinite-math",
3218 "-ffast-math");
3219 if (flag_trapping_math)
3220 warning (0, "%qs requires %qs or %qs", "-mrecip",
3221 "-fno-trapping-math", "-ffast-math");
3222 if (!flag_reciprocal_math)
3223 warning (0, "%qs requires %qs or %qs", "-mrecip", "-freciprocal-math",
3224 "-ffast-math");
3225 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
3226 {
3227 if (RS6000_RECIP_HAVE_RE_P (SFmode)
3228 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
3229 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3230
3231 if (RS6000_RECIP_HAVE_RE_P (DFmode)
3232 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
3233 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3234
3235 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
3236 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
3237 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3238
3239 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3240 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3241 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3242
3243 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3244 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3245 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3246
3247 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3248 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3249 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3250
3251 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3252 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3253 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3254
3255 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3256 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3257 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3258 }
3259 }
3260
3261 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3262 legitimate address support to figure out the appropriate addressing to
3263 use. */
3264 rs6000_setup_reg_addr_masks ();
3265
3266 if (global_init_p || TARGET_DEBUG_TARGET)
3267 {
3268 if (TARGET_DEBUG_REG)
3269 rs6000_debug_reg_global ();
3270
3271 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3272 fprintf (stderr,
3273 "SImode variable mult cost = %d\n"
3274 "SImode constant mult cost = %d\n"
3275 "SImode short constant mult cost = %d\n"
3276 "DImode multipliciation cost = %d\n"
3277 "SImode division cost = %d\n"
3278 "DImode division cost = %d\n"
3279 "Simple fp operation cost = %d\n"
3280 "DFmode multiplication cost = %d\n"
3281 "SFmode division cost = %d\n"
3282 "DFmode division cost = %d\n"
3283 "cache line size = %d\n"
3284 "l1 cache size = %d\n"
3285 "l2 cache size = %d\n"
3286 "simultaneous prefetches = %d\n"
3287 "\n",
3288 rs6000_cost->mulsi,
3289 rs6000_cost->mulsi_const,
3290 rs6000_cost->mulsi_const9,
3291 rs6000_cost->muldi,
3292 rs6000_cost->divsi,
3293 rs6000_cost->divdi,
3294 rs6000_cost->fp,
3295 rs6000_cost->dmul,
3296 rs6000_cost->sdiv,
3297 rs6000_cost->ddiv,
3298 rs6000_cost->cache_line_size,
3299 rs6000_cost->l1_cache_size,
3300 rs6000_cost->l2_cache_size,
3301 rs6000_cost->simultaneous_prefetches);
3302 }
3303 }
3304
3305 #if TARGET_MACHO
3306 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3307
3308 static void
3309 darwin_rs6000_override_options (void)
3310 {
3311 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3312 off. */
3313 rs6000_altivec_abi = 1;
3314 TARGET_ALTIVEC_VRSAVE = 1;
3315 rs6000_current_abi = ABI_DARWIN;
3316
3317 if (DEFAULT_ABI == ABI_DARWIN
3318 && TARGET_64BIT)
3319 darwin_one_byte_bool = 1;
3320
3321 if (TARGET_64BIT && ! TARGET_POWERPC64)
3322 {
3323 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3324 warning (0, "%qs requires PowerPC64 architecture, enabling", "-m64");
3325 }
3326
3327 /* The linkers [ld64] that support 64Bit do not need the JBSR longcall
3328 optimisation, and will not work with the most generic case (where the
3329 symbol is undefined external, but there is no symbl stub). */
3330 if (TARGET_64BIT)
3331 rs6000_default_long_calls = 0;
3332
3333 /* ld_classic is (so far) still used for kernel (static) code, and supports
3334 the JBSR longcall / branch islands. */
3335 if (flag_mkernel)
3336 {
3337 rs6000_default_long_calls = 1;
3338
3339 /* Allow a kext author to do -mkernel -mhard-float. */
3340 if (! (rs6000_isa_flags_explicit & OPTION_MASK_SOFT_FLOAT))
3341 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3342 }
3343
3344 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3345 Altivec. */
3346 if (!flag_mkernel && !flag_apple_kext
3347 && TARGET_64BIT
3348 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3349 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3350
3351 /* Unless the user (not the configurer) has explicitly overridden
3352 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3353 G4 unless targeting the kernel. */
3354 if (!flag_mkernel
3355 && !flag_apple_kext
3356 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3357 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3358 && ! OPTION_SET_P (rs6000_cpu_index))
3359 {
3360 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3361 }
3362 }
3363 #endif
3364
3365 /* If not otherwise specified by a target, make 'long double' equivalent to
3366 'double'. */
3367
3368 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3369 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3370 #endif
3371
3372 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3373 to clobber the XER[CA] bit because clobbering that bit without telling
3374 the compiler worked just fine with versions of GCC before GCC 5, and
3375 breaking a lot of older code in ways that are hard to track down is
3376 not such a great idea. */
3377
3378 static rtx_insn *
3379 rs6000_md_asm_adjust (vec<rtx> & /*outputs*/, vec<rtx> & /*inputs*/,
3380 vec<machine_mode> & /*input_modes*/,
3381 vec<const char *> & /*constraints*/, vec<rtx> &clobbers,
3382 HARD_REG_SET &clobbered_regs, location_t /*loc*/)
3383 {
3384 clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO));
3385 SET_HARD_REG_BIT (clobbered_regs, CA_REGNO);
3386 return NULL;
3387 }
3388
3389 /* This target function is similar to the hook TARGET_OPTION_OVERRIDE
3390 but is called when the optimize level is changed via an attribute or
3391 pragma or when it is reset at the end of the code affected by the
3392 attribute or pragma. It is not called at the beginning of compilation
3393 when TARGET_OPTION_OVERRIDE is called so if you want to perform these
3394 actions then, you should have TARGET_OPTION_OVERRIDE call
3395 TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE. */
3396
3397 static void
3398 rs6000_override_options_after_change (void)
3399 {
3400 /* Explicit -funroll-loops turns -munroll-only-small-loops off, and
3401 turns -frename-registers on. */
3402 if ((OPTION_SET_P (flag_unroll_loops) && flag_unroll_loops)
3403 || (OPTION_SET_P (flag_unroll_all_loops)
3404 && flag_unroll_all_loops))
3405 {
3406 if (!OPTION_SET_P (unroll_only_small_loops))
3407 unroll_only_small_loops = 0;
3408 if (!OPTION_SET_P (flag_rename_registers))
3409 flag_rename_registers = 1;
3410 if (!OPTION_SET_P (flag_cunroll_grow_size))
3411 flag_cunroll_grow_size = 1;
3412 }
3413 else if (!OPTION_SET_P (flag_cunroll_grow_size))
3414 flag_cunroll_grow_size = flag_peel_loops || optimize >= 3;
3415
3416 /* If we are inserting ROP-protect instructions, disable shrink wrap. */
3417 if (rs6000_rop_protect)
3418 flag_shrink_wrap = 0;
3419 }
3420
3421 #ifdef TARGET_USES_LINUX64_OPT
3422 static void
3423 rs6000_linux64_override_options ()
3424 {
3425 if (!OPTION_SET_P (rs6000_alignment_flags))
3426 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
3427 if (rs6000_isa_flags & OPTION_MASK_64BIT)
3428 {
3429 if (DEFAULT_ABI != ABI_AIX)
3430 {
3431 rs6000_current_abi = ABI_AIX;
3432 error (INVALID_64BIT, "call");
3433 }
3434 dot_symbols = !strcmp (rs6000_abi_name, "aixdesc");
3435 if (ELFv2_ABI_CHECK)
3436 {
3437 rs6000_current_abi = ABI_ELFv2;
3438 if (dot_symbols)
3439 error ("%<-mcall-aixdesc%> incompatible with %<-mabi=elfv2%>");
3440 }
3441 if (rs6000_isa_flags & OPTION_MASK_RELOCATABLE)
3442 {
3443 rs6000_isa_flags &= ~OPTION_MASK_RELOCATABLE;
3444 error (INVALID_64BIT, "relocatable");
3445 }
3446 if (rs6000_isa_flags & OPTION_MASK_EABI)
3447 {
3448 rs6000_isa_flags &= ~OPTION_MASK_EABI;
3449 error (INVALID_64BIT, "eabi");
3450 }
3451 if (TARGET_PROTOTYPE)
3452 {
3453 target_prototype = 0;
3454 error (INVALID_64BIT, "prototype");
3455 }
3456 if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) == 0)
3457 {
3458 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3459 error ("%<-m64%> requires a PowerPC64 cpu");
3460 }
3461 if (!OPTION_SET_P (rs6000_current_cmodel))
3462 SET_CMODEL (CMODEL_MEDIUM);
3463 if ((rs6000_isa_flags_explicit & OPTION_MASK_MINIMAL_TOC) != 0)
3464 {
3465 if (OPTION_SET_P (rs6000_current_cmodel)
3466 && rs6000_current_cmodel != CMODEL_SMALL)
3467 error ("%<-mcmodel%> incompatible with other toc options");
3468 if (TARGET_MINIMAL_TOC)
3469 SET_CMODEL (CMODEL_SMALL);
3470 else if (TARGET_PCREL
3471 || (PCREL_SUPPORTED_BY_OS
3472 && (rs6000_isa_flags_explicit & OPTION_MASK_PCREL) == 0))
3473 /* Ignore -mno-minimal-toc. */
3474 ;
3475 else
3476 SET_CMODEL (CMODEL_SMALL);
3477 }
3478 if (rs6000_current_cmodel != CMODEL_SMALL)
3479 {
3480 if (!OPTION_SET_P (TARGET_NO_FP_IN_TOC))
3481 TARGET_NO_FP_IN_TOC = rs6000_current_cmodel == CMODEL_MEDIUM;
3482 if (!OPTION_SET_P (TARGET_NO_SUM_IN_TOC))
3483 TARGET_NO_SUM_IN_TOC = 0;
3484 }
3485 if (TARGET_PLTSEQ && DEFAULT_ABI != ABI_ELFv2)
3486 {
3487 if (OPTION_SET_P (rs6000_pltseq))
3488 warning (0, "%qs unsupported for this ABI",
3489 "-mpltseq");
3490 rs6000_pltseq = false;
3491 }
3492 }
3493 else if (TARGET_64BIT)
3494 error (INVALID_32BIT, "32");
3495 else
3496 {
3497 if (TARGET_PROFILE_KERNEL)
3498 {
3499 profile_kernel = 0;
3500 error (INVALID_32BIT, "profile-kernel");
3501 }
3502 if (OPTION_SET_P (rs6000_current_cmodel))
3503 {
3504 SET_CMODEL (CMODEL_SMALL);
3505 error (INVALID_32BIT, "cmodel");
3506 }
3507 }
3508 }
3509 #endif
3510
3511 /* Return true if we are using GLIBC, and it supports IEEE 128-bit long double.
3512 This support is only in little endian GLIBC 2.32 or newer. */
3513 static bool
3514 glibc_supports_ieee_128bit (void)
3515 {
3516 #ifdef OPTION_GLIBC
3517 if (OPTION_GLIBC && !BYTES_BIG_ENDIAN
3518 && ((TARGET_GLIBC_MAJOR * 1000) + TARGET_GLIBC_MINOR) >= 2032)
3519 return true;
3520 #endif /* OPTION_GLIBC. */
3521
3522 return false;
3523 }
3524
3525 /* Override command line options.
3526
3527 Combine build-specific configuration information with options
3528 specified on the command line to set various state variables which
3529 influence code generation, optimization, and expansion of built-in
3530 functions. Assure that command-line configuration preferences are
3531 compatible with each other and with the build configuration; issue
3532 warnings while adjusting configuration or error messages while
3533 rejecting configuration.
3534
3535 Upon entry to this function:
3536
3537 This function is called once at the beginning of
3538 compilation, and then again at the start and end of compiling
3539 each section of code that has a different configuration, as
3540 indicated, for example, by adding the
3541
3542 __attribute__((__target__("cpu=power9")))
3543
3544 qualifier to a function definition or, for example, by bracketing
3545 code between
3546
3547 #pragma GCC target("altivec")
3548
3549 and
3550
3551 #pragma GCC reset_options
3552
3553 directives. Parameter global_init_p is true for the initial
3554 invocation, which initializes global variables, and false for all
3555 subsequent invocations.
3556
3557
3558 Various global state information is assumed to be valid. This
3559 includes OPTION_TARGET_CPU_DEFAULT, representing the name of the
3560 default CPU specified at build configure time, TARGET_DEFAULT,
3561 representing the default set of option flags for the default
3562 target, and OPTION_SET_P (rs6000_isa_flags), representing
3563 which options were requested on the command line.
3564
3565 Upon return from this function:
3566
3567 rs6000_isa_flags_explicit has a non-zero bit for each flag that
3568 was set by name on the command line. Additionally, if certain
3569 attributes are automatically enabled or disabled by this function
3570 in order to assure compatibility between options and
3571 configuration, the flags associated with those attributes are
3572 also set. By setting these "explicit bits", we avoid the risk
3573 that other code might accidentally overwrite these particular
3574 attributes with "default values".
3575
3576 The various bits of rs6000_isa_flags are set to indicate the
3577 target options that have been selected for the most current
3578 compilation efforts. This has the effect of also turning on the
3579 associated TARGET_XXX values since these are macros which are
3580 generally defined to test the corresponding bit of the
3581 rs6000_isa_flags variable.
3582
3583 Various other global variables and fields of global structures
3584 (over 50 in all) are initialized to reflect the desired options
3585 for the most current compilation efforts. */
3586
3587 static bool
3588 rs6000_option_override_internal (bool global_init_p)
3589 {
3590 bool ret = true;
3591
3592 HOST_WIDE_INT set_masks;
3593 HOST_WIDE_INT ignore_masks;
3594 int cpu_index = -1;
3595 int tune_index;
3596 struct cl_target_option *main_target_opt
3597 = ((global_init_p || target_option_default_node == NULL)
3598 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
3599
3600 /* Print defaults. */
3601 if ((TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) && global_init_p)
3602 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
3603
3604 /* Remember the explicit arguments. */
3605 if (global_init_p)
3606 rs6000_isa_flags_explicit = OPTION_SET_P (rs6000_isa_flags);
3607
3608 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3609 library functions, so warn about it. The flag may be useful for
3610 performance studies from time to time though, so don't disable it
3611 entirely. */
3612 if (OPTION_SET_P (rs6000_alignment_flags)
3613 && rs6000_alignment_flags == MASK_ALIGN_POWER
3614 && DEFAULT_ABI == ABI_DARWIN
3615 && TARGET_64BIT)
3616 warning (0, "%qs is not supported for 64-bit Darwin;"
3617 " it is incompatible with the installed C and C++ libraries",
3618 "-malign-power");
3619
3620 /* Numerous experiment shows that IRA based loop pressure
3621 calculation works better for RTL loop invariant motion on targets
3622 with enough (>= 32) registers. It is an expensive optimization.
3623 So it is on only for peak performance. */
3624 if (optimize >= 3 && global_init_p
3625 && !OPTION_SET_P (flag_ira_loop_pressure))
3626 flag_ira_loop_pressure = 1;
3627
3628 /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in order
3629 for tracebacks to be complete but not if any -fasynchronous-unwind-tables
3630 options were already specified. */
3631 if (flag_sanitize & SANITIZE_USER_ADDRESS
3632 && !OPTION_SET_P (flag_asynchronous_unwind_tables))
3633 flag_asynchronous_unwind_tables = 1;
3634
3635 /* -fvariable-expansion-in-unroller is a win for POWER whenever the
3636 loop unroller is active. It is only checked during unrolling, so
3637 we can just set it on by default. */
3638 if (!OPTION_SET_P (flag_variable_expansion_in_unroller))
3639 flag_variable_expansion_in_unroller = 1;
3640
3641 /* Set the pointer size. */
3642 if (TARGET_64BIT)
3643 {
3644 rs6000_pmode = DImode;
3645 rs6000_pointer_size = 64;
3646 }
3647 else
3648 {
3649 rs6000_pmode = SImode;
3650 rs6000_pointer_size = 32;
3651 }
3652
3653 /* Some OSs don't support saving Altivec registers. On those OSs, we don't
3654 touch the OPTION_MASK_ALTIVEC settings; if the user wants it, the user
3655 must explicitly specify it and we won't interfere with the user's
3656 specification. */
3657
3658 set_masks = POWERPC_MASKS;
3659 #ifdef OS_MISSING_ALTIVEC
3660 if (OS_MISSING_ALTIVEC)
3661 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX
3662 | OTHER_VSX_VECTOR_MASKS);
3663 #endif
3664
3665 /* Don't override by the processor default if given explicitly. */
3666 set_masks &= ~rs6000_isa_flags_explicit;
3667
3668 /* Without option powerpc64 specified explicitly, we need to ensure
3669 powerpc64 always enabled for 64 bit here, otherwise some following
3670 checks can use unexpected TARGET_POWERPC64 value. */
3671 if (!(rs6000_isa_flags_explicit & OPTION_MASK_POWERPC64)
3672 && TARGET_64BIT)
3673 {
3674 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3675 /* Need to stop powerpc64 from being unset in later processing,
3676 so clear it in set_masks. But as PR108240 shows, to keep it
3677 consistent with before, we want to make this only if 64 bit
3678 is enabled explicitly. This is a hack, revisit this later. */
3679 if (rs6000_isa_flags_explicit & OPTION_MASK_64BIT)
3680 set_masks &= ~OPTION_MASK_POWERPC64;
3681 }
3682
3683 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3684 the cpu in a target attribute or pragma, but did not specify a tuning
3685 option, use the cpu for the tuning option rather than the option specified
3686 with -mtune on the command line. Process a '--with-cpu' configuration
3687 request as an implicit --cpu. */
3688 if (rs6000_cpu_index >= 0)
3689 cpu_index = rs6000_cpu_index;
3690 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
3691 cpu_index = main_target_opt->x_rs6000_cpu_index;
3692 else if (OPTION_TARGET_CPU_DEFAULT)
3693 cpu_index = rs6000_cpu_name_lookup (OPTION_TARGET_CPU_DEFAULT);
3694
3695 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3696 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3697 with those from the cpu, except for options that were explicitly set. If
3698 we don't have a cpu, do not override the target bits set in
3699 TARGET_DEFAULT. */
3700 if (cpu_index >= 0)
3701 {
3702 rs6000_cpu_index = cpu_index;
3703 rs6000_isa_flags &= ~set_masks;
3704 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3705 & set_masks);
3706 }
3707 else
3708 {
3709 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3710 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
3711 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
3712 to using rs6000_isa_flags, we need to do the initialization here.
3713
3714 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
3715 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
3716 HOST_WIDE_INT flags;
3717 if (TARGET_DEFAULT)
3718 flags = TARGET_DEFAULT;
3719 else
3720 {
3721 /* PowerPC 64-bit LE requires at least ISA 2.07. */
3722 const char *default_cpu = (!TARGET_POWERPC64
3723 ? "powerpc"
3724 : (BYTES_BIG_ENDIAN
3725 ? "powerpc64"
3726 : "powerpc64le"));
3727 int default_cpu_index = rs6000_cpu_name_lookup (default_cpu);
3728 flags = processor_target_table[default_cpu_index].target_enable;
3729 }
3730 rs6000_isa_flags |= (flags & ~rs6000_isa_flags_explicit);
3731 }
3732
3733 /* Don't expect powerpc64 enabled on those OSes with OS_MISSING_POWERPC64,
3734 since they do not save and restore the high half of the GPRs correctly
3735 in all cases. If the user explicitly specifies it, we won't interfere
3736 with the user's specification. */
3737 #ifdef OS_MISSING_POWERPC64
3738 if (OS_MISSING_POWERPC64
3739 && TARGET_32BIT
3740 && TARGET_POWERPC64
3741 && !(rs6000_isa_flags_explicit & OPTION_MASK_POWERPC64))
3742 rs6000_isa_flags &= ~OPTION_MASK_POWERPC64;
3743 #endif
3744
3745 if (rs6000_tune_index >= 0)
3746 tune_index = rs6000_tune_index;
3747 else if (cpu_index >= 0)
3748 rs6000_tune_index = tune_index = cpu_index;
3749 else
3750 {
3751 size_t i;
3752 enum processor_type tune_proc
3753 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
3754
3755 tune_index = -1;
3756 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
3757 if (processor_target_table[i].processor == tune_proc)
3758 {
3759 tune_index = i;
3760 break;
3761 }
3762 }
3763
3764 if (cpu_index >= 0)
3765 rs6000_cpu = processor_target_table[cpu_index].processor;
3766 else
3767 rs6000_cpu = TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT;
3768
3769 gcc_assert (tune_index >= 0);
3770 rs6000_tune = processor_target_table[tune_index].processor;
3771
3772 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
3773 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
3774 || rs6000_cpu == PROCESSOR_PPCE5500)
3775 {
3776 if (TARGET_ALTIVEC)
3777 error ("AltiVec not supported in this target");
3778 }
3779
3780 /* If we are optimizing big endian systems for space, use the load/store
3781 multiple instructions. */
3782 if (BYTES_BIG_ENDIAN && optimize_size)
3783 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE;
3784
3785 /* Don't allow -mmultiple on little endian systems unless the cpu is a 750,
3786 because the hardware doesn't support the instructions used in little
3787 endian mode, and causes an alignment trap. The 750 does not cause an
3788 alignment trap (except when the target is unaligned). */
3789
3790 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750 && TARGET_MULTIPLE)
3791 {
3792 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
3793 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
3794 warning (0, "%qs is not supported on little endian systems",
3795 "-mmultiple");
3796 }
3797
3798 /* If little-endian, default to -mstrict-align on older processors.
3799 Testing for direct_move matches power8 and later. */
3800 if (!BYTES_BIG_ENDIAN
3801 && !(processor_target_table[tune_index].target_enable
3802 & OPTION_MASK_DIRECT_MOVE))
3803 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
3804
3805 /* Add some warnings for VSX. */
3806 if (TARGET_VSX)
3807 {
3808 const char *msg = NULL;
3809 if (!TARGET_HARD_FLOAT)
3810 {
3811 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3812 msg = N_("%<-mvsx%> requires hardware floating point");
3813 else
3814 {
3815 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3816 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3817 }
3818 }
3819 else if (TARGET_AVOID_XFORM > 0)
3820 msg = N_("%<-mvsx%> needs indexed addressing");
3821 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
3822 & OPTION_MASK_ALTIVEC))
3823 {
3824 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3825 msg = N_("%<-mvsx%> and %<-mno-altivec%> are incompatible");
3826 else
3827 msg = N_("%<-mno-altivec%> disables vsx");
3828 }
3829
3830 if (msg)
3831 {
3832 warning (0, msg);
3833 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3834 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3835 }
3836 }
3837
3838 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
3839 the -mcpu setting to enable options that conflict. */
3840 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
3841 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
3842 | OPTION_MASK_ALTIVEC
3843 | OPTION_MASK_VSX)) != 0)
3844 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
3845 | OPTION_MASK_DIRECT_MOVE)
3846 & ~rs6000_isa_flags_explicit);
3847
3848 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3849 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
3850
3851 #ifdef XCOFF_DEBUGGING_INFO
3852 /* For AIX default to 64-bit DWARF. */
3853 if (!OPTION_SET_P (dwarf_offset_size))
3854 dwarf_offset_size = POINTER_SIZE_UNITS;
3855 #endif
3856
3857 /* Handle explicit -mno-{altivec,vsx,power8-vector,power9-vector} and turn
3858 off all of the options that depend on those flags. */
3859 ignore_masks = rs6000_disable_incompatible_switches ();
3860
3861 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
3862 unless the user explicitly used the -mno-<option> to disable the code. */
3863 if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_MISC)
3864 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
3865 else if (TARGET_P9_MINMAX)
3866 {
3867 if (cpu_index >= 0)
3868 {
3869 if (cpu_index == PROCESSOR_POWER9)
3870 {
3871 /* legacy behavior: allow -mcpu=power9 with certain
3872 capabilities explicitly disabled. */
3873 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
3874 }
3875 else
3876 error ("power9 target option is incompatible with %<%s=<xxx>%> "
3877 "for <xxx> less than power9", "-mcpu");
3878 }
3879 else if ((ISA_3_0_MASKS_SERVER & rs6000_isa_flags_explicit)
3880 != (ISA_3_0_MASKS_SERVER & rs6000_isa_flags
3881 & rs6000_isa_flags_explicit))
3882 /* Enforce that none of the ISA_3_0_MASKS_SERVER flags
3883 were explicitly cleared. */
3884 error ("%qs incompatible with explicitly disabled options",
3885 "-mpower9-minmax");
3886 else
3887 rs6000_isa_flags |= ISA_3_0_MASKS_SERVER;
3888 }
3889 else if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
3890 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~ignore_masks);
3891 else if (TARGET_VSX)
3892 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~ignore_masks);
3893 else if (TARGET_POPCNTD)
3894 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~ignore_masks);
3895 else if (TARGET_DFP)
3896 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~ignore_masks);
3897 else if (TARGET_CMPB)
3898 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~ignore_masks);
3899 else if (TARGET_FPRND)
3900 rs6000_isa_flags |= (ISA_2_4_MASKS & ~ignore_masks);
3901 else if (TARGET_POPCNTB)
3902 rs6000_isa_flags |= (ISA_2_2_MASKS & ~ignore_masks);
3903 else if (TARGET_ALTIVEC)
3904 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~ignore_masks);
3905
3906 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
3907 target attribute or pragma which automatically enables both options,
3908 unless the altivec ABI was set. This is set by default for 64-bit, but
3909 not for 32-bit. Don't move this before the above code using ignore_masks,
3910 since it can reset the cleared VSX/ALTIVEC flag again. */
3911 if (main_target_opt && !main_target_opt->x_rs6000_altivec_abi)
3912 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC)
3913 & ~rs6000_isa_flags_explicit);
3914
3915 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
3916 {
3917 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
3918 error ("%qs requires %qs", "-mcrypto", "-maltivec");
3919 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
3920 }
3921
3922 if (!TARGET_FPRND && TARGET_VSX)
3923 {
3924 if (rs6000_isa_flags_explicit & OPTION_MASK_FPRND)
3925 /* TARGET_VSX = 1 implies Power 7 and newer */
3926 error ("%qs requires %qs", "-mvsx", "-mfprnd");
3927 rs6000_isa_flags &= ~OPTION_MASK_FPRND;
3928 }
3929
3930 if (TARGET_DIRECT_MOVE && !TARGET_VSX)
3931 {
3932 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
3933 error ("%qs requires %qs", "-mdirect-move", "-mvsx");
3934 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
3935 }
3936
3937 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
3938 {
3939 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3940 error ("%qs requires %qs", "-mpower8-vector", "-maltivec");
3941 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3942 }
3943
3944 if (TARGET_P8_VECTOR && !TARGET_VSX)
3945 {
3946 if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3947 && (rs6000_isa_flags_explicit & OPTION_MASK_VSX))
3948 error ("%qs requires %qs", "-mpower8-vector", "-mvsx");
3949 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR) == 0)
3950 {
3951 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3952 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3953 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
3954 }
3955 else
3956 {
3957 /* OPTION_MASK_P8_VECTOR is explicit, and OPTION_MASK_VSX is
3958 not explicit. */
3959 rs6000_isa_flags |= OPTION_MASK_VSX;
3960 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3961 }
3962 }
3963
3964 if (TARGET_DFP && !TARGET_HARD_FLOAT)
3965 {
3966 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
3967 error ("%qs requires %qs", "-mhard-dfp", "-mhard-float");
3968 rs6000_isa_flags &= ~OPTION_MASK_DFP;
3969 }
3970
3971 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
3972 silently turn off quad memory mode. */
3973 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
3974 {
3975 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3976 warning (0, N_("%<-mquad-memory%> requires 64-bit mode"));
3977
3978 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
3979 warning (0, N_("%<-mquad-memory-atomic%> requires 64-bit mode"));
3980
3981 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
3982 | OPTION_MASK_QUAD_MEMORY_ATOMIC);
3983 }
3984
3985 /* Non-atomic quad memory load/store are disabled for little endian, since
3986 the words are reversed, but atomic operations can still be done by
3987 swapping the words. */
3988 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
3989 {
3990 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3991 warning (0, N_("%<-mquad-memory%> is not available in little endian "
3992 "mode"));
3993
3994 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
3995 }
3996
3997 /* Assume if the user asked for normal quad memory instructions, they want
3998 the atomic versions as well, unless they explicity told us not to use quad
3999 word atomic instructions. */
4000 if (TARGET_QUAD_MEMORY
4001 && !TARGET_QUAD_MEMORY_ATOMIC
4002 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
4003 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
4004
4005 /* If we can shrink-wrap the TOC register save separately, then use
4006 -msave-toc-indirect unless explicitly disabled. */
4007 if ((rs6000_isa_flags_explicit & OPTION_MASK_SAVE_TOC_INDIRECT) == 0
4008 && flag_shrink_wrap_separate
4009 && optimize_function_for_speed_p (cfun))
4010 rs6000_isa_flags |= OPTION_MASK_SAVE_TOC_INDIRECT;
4011
4012 /* Enable power8 fusion if we are tuning for power8, even if we aren't
4013 generating power8 instructions. Power9 does not optimize power8 fusion
4014 cases. */
4015 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
4016 {
4017 if (processor_target_table[tune_index].processor == PROCESSOR_POWER8)
4018 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4019 else
4020 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
4021 }
4022
4023 /* Setting additional fusion flags turns on base fusion. */
4024 if (!TARGET_P8_FUSION && TARGET_P8_FUSION_SIGN)
4025 {
4026 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
4027 {
4028 if (TARGET_P8_FUSION_SIGN)
4029 error ("%qs requires %qs", "-mpower8-fusion-sign",
4030 "-mpower8-fusion");
4031
4032 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
4033 }
4034 else
4035 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4036 }
4037
4038 /* Power8 does not fuse sign extended loads with the addis. If we are
4039 optimizing at high levels for speed, convert a sign extended load into a
4040 zero extending load, and an explicit sign extension. */
4041 if (TARGET_P8_FUSION
4042 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
4043 && optimize_function_for_speed_p (cfun)
4044 && optimize >= 3)
4045 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
4046
4047 /* ISA 3.0 vector instructions include ISA 2.07. */
4048 if (TARGET_P9_VECTOR && !TARGET_P8_VECTOR)
4049 {
4050 /* We prefer to not mention undocumented options in
4051 error messages. However, if users have managed to select
4052 power9-vector without selecting power8-vector, they
4053 already know about undocumented flags. */
4054 if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) &&
4055 (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR))
4056 error ("%qs requires %qs", "-mpower9-vector", "-mpower8-vector");
4057 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) == 0)
4058 {
4059 rs6000_isa_flags &= ~OPTION_MASK_P9_VECTOR;
4060 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4061 rs6000_isa_flags_explicit |= OPTION_MASK_P9_VECTOR;
4062 }
4063 else
4064 {
4065 /* OPTION_MASK_P9_VECTOR is explicit and
4066 OPTION_MASK_P8_VECTOR is not explicit. */
4067 rs6000_isa_flags |= OPTION_MASK_P8_VECTOR;
4068 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
4069 }
4070 }
4071
4072 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
4073 support. If we only have ISA 2.06 support, and the user did not specify
4074 the switch, leave it set to -1 so the movmisalign patterns are enabled,
4075 but we don't enable the full vectorization support */
4076 if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)
4077 TARGET_ALLOW_MOVMISALIGN = 1;
4078
4079 else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX)
4080 {
4081 if (TARGET_ALLOW_MOVMISALIGN > 0
4082 && OPTION_SET_P (TARGET_ALLOW_MOVMISALIGN))
4083 error ("%qs requires %qs", "-mallow-movmisalign", "-mvsx");
4084
4085 TARGET_ALLOW_MOVMISALIGN = 0;
4086 }
4087
4088 /* Determine when unaligned vector accesses are permitted, and when
4089 they are preferred over masked Altivec loads. Note that if
4090 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
4091 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
4092 not true. */
4093 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4094 {
4095 if (!TARGET_VSX)
4096 {
4097 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4098 error ("%qs requires %qs", "-mefficient-unaligned-vsx", "-mvsx");
4099
4100 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4101 }
4102
4103 else if (!TARGET_ALLOW_MOVMISALIGN)
4104 {
4105 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4106 error ("%qs requires %qs", "-munefficient-unaligned-vsx",
4107 "-mallow-movmisalign");
4108
4109 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4110 }
4111 }
4112
4113 if (!(rs6000_isa_flags_explicit & OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX))
4114 {
4115 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4116 rs6000_isa_flags |= OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX;
4117 else
4118 rs6000_isa_flags &= ~OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX;
4119 }
4120
4121 /* Use long double size to select the appropriate long double. We use
4122 TYPE_PRECISION to differentiate the 3 different long double types. We map
4123 128 into the precision used for TFmode. */
4124 int default_long_double_size = (RS6000_DEFAULT_LONG_DOUBLE_SIZE == 64
4125 ? 64
4126 : FLOAT_PRECISION_TFmode);
4127
4128 /* Set long double size before the IEEE 128-bit tests. */
4129 if (!OPTION_SET_P (rs6000_long_double_type_size))
4130 {
4131 if (main_target_opt != NULL
4132 && (main_target_opt->x_rs6000_long_double_type_size
4133 != default_long_double_size))
4134 error ("target attribute or pragma changes %<long double%> size");
4135 else
4136 rs6000_long_double_type_size = default_long_double_size;
4137 }
4138 else if (rs6000_long_double_type_size == FLOAT_PRECISION_TFmode)
4139 ; /* The option value can be seen when cl_target_option_restore is called. */
4140 else if (rs6000_long_double_type_size == 128)
4141 rs6000_long_double_type_size = FLOAT_PRECISION_TFmode;
4142
4143 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
4144 systems will also set long double to be IEEE 128-bit. AIX and Darwin
4145 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
4146 those systems will not pick up this default. Warn if the user changes the
4147 default unless -Wno-psabi. */
4148 if (!OPTION_SET_P (rs6000_ieeequad))
4149 rs6000_ieeequad = TARGET_IEEEQUAD_DEFAULT;
4150
4151 else if (TARGET_LONG_DOUBLE_128)
4152 {
4153 if (global_options.x_rs6000_ieeequad
4154 && (!TARGET_POPCNTD || !TARGET_VSX))
4155 error ("%qs requires full ISA 2.06 support", "-mabi=ieeelongdouble");
4156
4157 if (rs6000_ieeequad != TARGET_IEEEQUAD_DEFAULT)
4158 {
4159 /* Determine if the user can change the default long double type at
4160 compilation time. You need GLIBC 2.32 or newer to be able to
4161 change the long double type. Only issue one warning. */
4162 static bool warned_change_long_double;
4163
4164 if (!warned_change_long_double && !glibc_supports_ieee_128bit ())
4165 {
4166 warned_change_long_double = true;
4167 if (TARGET_IEEEQUAD)
4168 warning (OPT_Wpsabi, "Using IEEE extended precision "
4169 "%<long double%>");
4170 else
4171 warning (OPT_Wpsabi, "Using IBM extended precision "
4172 "%<long double%>");
4173 }
4174 }
4175 }
4176
4177 /* Enable the default support for IEEE 128-bit floating point on Linux VSX
4178 sytems. In GCC 7, we would enable the IEEE 128-bit floating point
4179 infrastructure (-mfloat128-type) but not enable the actual __float128 type
4180 unless the user used the explicit -mfloat128. In GCC 8, we enable both
4181 the keyword as well as the type. */
4182 TARGET_FLOAT128_TYPE = TARGET_FLOAT128_ENABLE_TYPE && TARGET_VSX;
4183
4184 /* IEEE 128-bit floating point requires VSX support. */
4185 if (TARGET_FLOAT128_KEYWORD)
4186 {
4187 if (!TARGET_VSX)
4188 {
4189 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0)
4190 error ("%qs requires VSX support", "-mfloat128");
4191
4192 TARGET_FLOAT128_TYPE = 0;
4193 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_KEYWORD
4194 | OPTION_MASK_FLOAT128_HW);
4195 }
4196 else if (!TARGET_FLOAT128_TYPE)
4197 {
4198 TARGET_FLOAT128_TYPE = 1;
4199 warning (0, "The %<-mfloat128%> option may not be fully supported");
4200 }
4201 }
4202
4203 /* Enable the __float128 keyword under Linux by default. */
4204 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_KEYWORD
4205 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) == 0)
4206 rs6000_isa_flags |= OPTION_MASK_FLOAT128_KEYWORD;
4207
4208 /* If we have are supporting the float128 type and full ISA 3.0 support,
4209 enable -mfloat128-hardware by default. However, don't enable the
4210 __float128 keyword if it was explicitly turned off. 64-bit mode is needed
4211 because sometimes the compiler wants to put things in an integer
4212 container, and if we don't have __int128 support, it is impossible. */
4213 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_HW && TARGET_64BIT
4214 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) == ISA_3_0_MASKS_IEEE
4215 && !(rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW))
4216 rs6000_isa_flags |= OPTION_MASK_FLOAT128_HW;
4217
4218 if (TARGET_FLOAT128_HW
4219 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) != ISA_3_0_MASKS_IEEE)
4220 {
4221 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4222 error ("%qs requires full ISA 3.0 support", "%<-mfloat128-hardware%>");
4223
4224 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4225 }
4226
4227 if (TARGET_FLOAT128_HW && !TARGET_64BIT)
4228 {
4229 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4230 error ("%qs requires %qs", "%<-mfloat128-hardware%>", "-m64");
4231
4232 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4233 }
4234
4235 /* Enable -mprefixed by default on power10 systems. */
4236 if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED) == 0)
4237 rs6000_isa_flags |= OPTION_MASK_PREFIXED;
4238
4239 /* -mprefixed requires -mcpu=power10 (or later). */
4240 else if (TARGET_PREFIXED && !TARGET_POWER10)
4241 {
4242 if ((rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED) != 0)
4243 error ("%qs requires %qs", "-mprefixed", "-mcpu=power10");
4244
4245 rs6000_isa_flags &= ~OPTION_MASK_PREFIXED;
4246 }
4247
4248 /* -mpcrel requires prefixed load/store addressing. */
4249 if (TARGET_PCREL && !TARGET_PREFIXED)
4250 {
4251 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4252 error ("%qs requires %qs", "-mpcrel", "-mprefixed");
4253
4254 rs6000_isa_flags &= ~OPTION_MASK_PCREL;
4255 }
4256
4257 /* Print the options after updating the defaults. */
4258 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4259 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
4260
4261 /* E500mc does "better" if we inline more aggressively. Respect the
4262 user's opinion, though. */
4263 if (rs6000_block_move_inline_limit == 0
4264 && (rs6000_tune == PROCESSOR_PPCE500MC
4265 || rs6000_tune == PROCESSOR_PPCE500MC64
4266 || rs6000_tune == PROCESSOR_PPCE5500
4267 || rs6000_tune == PROCESSOR_PPCE6500))
4268 rs6000_block_move_inline_limit = 128;
4269
4270 /* store_one_arg depends on expand_block_move to handle at least the
4271 size of reg_parm_stack_space. */
4272 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
4273 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
4274
4275 if (global_init_p)
4276 {
4277 /* If the appropriate debug option is enabled, replace the target hooks
4278 with debug versions that call the real version and then prints
4279 debugging information. */
4280 if (TARGET_DEBUG_COST)
4281 {
4282 targetm.rtx_costs = rs6000_debug_rtx_costs;
4283 targetm.address_cost = rs6000_debug_address_cost;
4284 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
4285 }
4286
4287 if (TARGET_DEBUG_ADDR)
4288 {
4289 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
4290 targetm.legitimize_address = rs6000_debug_legitimize_address;
4291 rs6000_secondary_reload_class_ptr
4292 = rs6000_debug_secondary_reload_class;
4293 targetm.secondary_memory_needed
4294 = rs6000_debug_secondary_memory_needed;
4295 targetm.can_change_mode_class
4296 = rs6000_debug_can_change_mode_class;
4297 rs6000_preferred_reload_class_ptr
4298 = rs6000_debug_preferred_reload_class;
4299 rs6000_mode_dependent_address_ptr
4300 = rs6000_debug_mode_dependent_address;
4301 }
4302
4303 if (rs6000_veclibabi_name)
4304 {
4305 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
4306 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
4307 else
4308 {
4309 error ("unknown vectorization library ABI type in "
4310 "%<-mveclibabi=%s%>", rs6000_veclibabi_name);
4311 ret = false;
4312 }
4313 }
4314 }
4315
4316 /* Enable Altivec ABI for AIX -maltivec. */
4317 if (TARGET_XCOFF
4318 && (TARGET_ALTIVEC || TARGET_VSX)
4319 && !OPTION_SET_P (rs6000_altivec_abi))
4320 {
4321 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4322 error ("target attribute or pragma changes AltiVec ABI");
4323 else
4324 rs6000_altivec_abi = 1;
4325 }
4326
4327 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
4328 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
4329 be explicitly overridden in either case. */
4330 if (TARGET_ELF)
4331 {
4332 if (!OPTION_SET_P (rs6000_altivec_abi)
4333 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
4334 {
4335 if (main_target_opt != NULL &&
4336 !main_target_opt->x_rs6000_altivec_abi)
4337 error ("target attribute or pragma changes AltiVec ABI");
4338 else
4339 rs6000_altivec_abi = 1;
4340 }
4341 }
4342
4343 /* Set the Darwin64 ABI as default for 64-bit Darwin.
4344 So far, the only darwin64 targets are also MACH-O. */
4345 if (TARGET_MACHO
4346 && DEFAULT_ABI == ABI_DARWIN
4347 && TARGET_64BIT)
4348 {
4349 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
4350 error ("target attribute or pragma changes darwin64 ABI");
4351 else
4352 {
4353 rs6000_darwin64_abi = 1;
4354 /* Default to natural alignment, for better performance. */
4355 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
4356 }
4357 }
4358
4359 /* Place FP constants in the constant pool instead of TOC
4360 if section anchors enabled. */
4361 if (flag_section_anchors
4362 && !OPTION_SET_P (TARGET_NO_FP_IN_TOC))
4363 TARGET_NO_FP_IN_TOC = 1;
4364
4365 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4366 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
4367
4368 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4369 SUBTARGET_OVERRIDE_OPTIONS;
4370 #endif
4371 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4372 SUBSUBTARGET_OVERRIDE_OPTIONS;
4373 #endif
4374 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
4375 SUB3TARGET_OVERRIDE_OPTIONS;
4376 #endif
4377
4378 /* If the ABI has support for PC-relative relocations, enable it by default.
4379 This test depends on the sub-target tests above setting the code model to
4380 medium for ELF v2 systems. */
4381 if (PCREL_SUPPORTED_BY_OS
4382 && (rs6000_isa_flags_explicit & OPTION_MASK_PCREL) == 0)
4383 rs6000_isa_flags |= OPTION_MASK_PCREL;
4384
4385 /* -mpcrel requires -mcmodel=medium, but we can't check TARGET_CMODEL until
4386 after the subtarget override options are done. */
4387 else if (TARGET_PCREL && TARGET_CMODEL != CMODEL_MEDIUM)
4388 {
4389 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4390 error ("%qs requires %qs", "-mpcrel", "-mcmodel=medium");
4391
4392 rs6000_isa_flags &= ~OPTION_MASK_PCREL;
4393 }
4394
4395 /* Enable -mmma by default on power10 systems. */
4396 if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_MMA) == 0)
4397 rs6000_isa_flags |= OPTION_MASK_MMA;
4398
4399 /* Turn off vector pair/mma options on non-power10 systems. */
4400 else if (!TARGET_POWER10 && TARGET_MMA)
4401 {
4402 if ((rs6000_isa_flags_explicit & OPTION_MASK_MMA) != 0)
4403 error ("%qs requires %qs", "-mmma", "-mcpu=power10");
4404
4405 rs6000_isa_flags &= ~OPTION_MASK_MMA;
4406 }
4407
4408 /* Enable power10 fusion if we are tuning for power10, even if we aren't
4409 generating power10 instructions. */
4410 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION))
4411 {
4412 if (rs6000_tune == PROCESSOR_POWER10)
4413 rs6000_isa_flags |= OPTION_MASK_P10_FUSION;
4414 else
4415 rs6000_isa_flags &= ~OPTION_MASK_P10_FUSION;
4416 }
4417
4418 /* MMA requires SIMD support as ISA 3.1 claims and our implementation
4419 such as "*movoo" uses vector pair access which use VSX registers.
4420 So make MMA require VSX support here. */
4421 if (TARGET_MMA && !TARGET_VSX)
4422 {
4423 if ((rs6000_isa_flags_explicit & OPTION_MASK_MMA) != 0)
4424 error ("%qs requires %qs", "-mmma", "-mvsx");
4425 rs6000_isa_flags &= ~OPTION_MASK_MMA;
4426 }
4427
4428 if (!TARGET_PCREL && TARGET_PCREL_OPT)
4429 rs6000_isa_flags &= ~OPTION_MASK_PCREL_OPT;
4430
4431 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4432 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
4433
4434 rs6000_always_hint = (rs6000_tune != PROCESSOR_POWER4
4435 && rs6000_tune != PROCESSOR_POWER5
4436 && rs6000_tune != PROCESSOR_POWER6
4437 && rs6000_tune != PROCESSOR_POWER7
4438 && rs6000_tune != PROCESSOR_POWER8
4439 && rs6000_tune != PROCESSOR_POWER9
4440 && rs6000_tune != PROCESSOR_POWER10
4441 && rs6000_tune != PROCESSOR_PPCA2
4442 && rs6000_tune != PROCESSOR_CELL
4443 && rs6000_tune != PROCESSOR_PPC476);
4444 rs6000_sched_groups = (rs6000_tune == PROCESSOR_POWER4
4445 || rs6000_tune == PROCESSOR_POWER5
4446 || rs6000_tune == PROCESSOR_POWER7
4447 || rs6000_tune == PROCESSOR_POWER8);
4448 rs6000_align_branch_targets = (rs6000_tune == PROCESSOR_POWER4
4449 || rs6000_tune == PROCESSOR_POWER5
4450 || rs6000_tune == PROCESSOR_POWER6
4451 || rs6000_tune == PROCESSOR_POWER7
4452 || rs6000_tune == PROCESSOR_POWER8
4453 || rs6000_tune == PROCESSOR_POWER9
4454 || rs6000_tune == PROCESSOR_POWER10
4455 || rs6000_tune == PROCESSOR_PPCE500MC
4456 || rs6000_tune == PROCESSOR_PPCE500MC64
4457 || rs6000_tune == PROCESSOR_PPCE5500
4458 || rs6000_tune == PROCESSOR_PPCE6500);
4459
4460 /* Allow debug switches to override the above settings. These are set to -1
4461 in rs6000.opt to indicate the user hasn't directly set the switch. */
4462 if (TARGET_ALWAYS_HINT >= 0)
4463 rs6000_always_hint = TARGET_ALWAYS_HINT;
4464
4465 if (TARGET_SCHED_GROUPS >= 0)
4466 rs6000_sched_groups = TARGET_SCHED_GROUPS;
4467
4468 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
4469 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
4470
4471 rs6000_sched_restricted_insns_priority
4472 = (rs6000_sched_groups ? 1 : 0);
4473
4474 /* Handle -msched-costly-dep option. */
4475 rs6000_sched_costly_dep
4476 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
4477
4478 if (rs6000_sched_costly_dep_str)
4479 {
4480 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
4481 rs6000_sched_costly_dep = no_dep_costly;
4482 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
4483 rs6000_sched_costly_dep = all_deps_costly;
4484 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
4485 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
4486 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
4487 rs6000_sched_costly_dep = store_to_load_dep_costly;
4488 else
4489 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
4490 atoi (rs6000_sched_costly_dep_str));
4491 }
4492
4493 /* Handle -minsert-sched-nops option. */
4494 rs6000_sched_insert_nops
4495 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
4496
4497 if (rs6000_sched_insert_nops_str)
4498 {
4499 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
4500 rs6000_sched_insert_nops = sched_finish_none;
4501 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
4502 rs6000_sched_insert_nops = sched_finish_pad_groups;
4503 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
4504 rs6000_sched_insert_nops = sched_finish_regroup_exact;
4505 else
4506 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
4507 atoi (rs6000_sched_insert_nops_str));
4508 }
4509
4510 /* Handle stack protector */
4511 if (!OPTION_SET_P (rs6000_stack_protector_guard))
4512 #ifdef TARGET_THREAD_SSP_OFFSET
4513 rs6000_stack_protector_guard = SSP_TLS;
4514 #else
4515 rs6000_stack_protector_guard = SSP_GLOBAL;
4516 #endif
4517
4518 #ifdef TARGET_THREAD_SSP_OFFSET
4519 rs6000_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET;
4520 rs6000_stack_protector_guard_reg = TARGET_64BIT ? 13 : 2;
4521 #endif
4522
4523 if (OPTION_SET_P (rs6000_stack_protector_guard_offset_str))
4524 {
4525 char *endp;
4526 const char *str = rs6000_stack_protector_guard_offset_str;
4527
4528 errno = 0;
4529 long offset = strtol (str, &endp, 0);
4530 if (!*str || *endp || errno)
4531 error ("%qs is not a valid number in %qs", str,
4532 "-mstack-protector-guard-offset=");
4533
4534 if (!IN_RANGE (offset, -0x8000, 0x7fff)
4535 || (TARGET_64BIT && (offset & 3)))
4536 error ("%qs is not a valid offset in %qs", str,
4537 "-mstack-protector-guard-offset=");
4538
4539 rs6000_stack_protector_guard_offset = offset;
4540 }
4541
4542 if (OPTION_SET_P (rs6000_stack_protector_guard_reg_str))
4543 {
4544 const char *str = rs6000_stack_protector_guard_reg_str;
4545 int reg = decode_reg_name (str);
4546
4547 if (!IN_RANGE (reg, 1, 31))
4548 error ("%qs is not a valid base register in %qs", str,
4549 "-mstack-protector-guard-reg=");
4550
4551 rs6000_stack_protector_guard_reg = reg;
4552 }
4553
4554 if (rs6000_stack_protector_guard == SSP_TLS
4555 && !IN_RANGE (rs6000_stack_protector_guard_reg, 1, 31))
4556 error ("%qs needs a valid base register", "-mstack-protector-guard=tls");
4557
4558 if (global_init_p)
4559 {
4560 #ifdef TARGET_REGNAMES
4561 /* If the user desires alternate register names, copy in the
4562 alternate names now. */
4563 if (TARGET_REGNAMES)
4564 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
4565 #endif
4566
4567 /* Set aix_struct_return last, after the ABI is determined.
4568 If -maix-struct-return or -msvr4-struct-return was explicitly
4569 used, don't override with the ABI default. */
4570 if (!OPTION_SET_P (aix_struct_return))
4571 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
4572
4573 #if 0
4574 /* IBM XL compiler defaults to unsigned bitfields. */
4575 if (TARGET_XL_COMPAT)
4576 flag_signed_bitfields = 0;
4577 #endif
4578
4579 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
4580 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
4581
4582 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
4583
4584 /* We can only guarantee the availability of DI pseudo-ops when
4585 assembling for 64-bit targets. */
4586 if (!TARGET_64BIT)
4587 {
4588 targetm.asm_out.aligned_op.di = NULL;
4589 targetm.asm_out.unaligned_op.di = NULL;
4590 }
4591
4592
4593 /* Set branch target alignment, if not optimizing for size. */
4594 if (!optimize_size)
4595 {
4596 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
4597 aligned 8byte to avoid misprediction by the branch predictor. */
4598 if (rs6000_tune == PROCESSOR_TITAN
4599 || rs6000_tune == PROCESSOR_CELL)
4600 {
4601 if (flag_align_functions && !str_align_functions)
4602 str_align_functions = "8";
4603 if (flag_align_jumps && !str_align_jumps)
4604 str_align_jumps = "8";
4605 if (flag_align_loops && !str_align_loops)
4606 str_align_loops = "8";
4607 }
4608 if (rs6000_align_branch_targets)
4609 {
4610 if (flag_align_functions && !str_align_functions)
4611 str_align_functions = "16";
4612 if (flag_align_jumps && !str_align_jumps)
4613 str_align_jumps = "16";
4614 if (flag_align_loops && !str_align_loops)
4615 {
4616 can_override_loop_align = 1;
4617 str_align_loops = "16";
4618 }
4619 }
4620 }
4621
4622 /* Arrange to save and restore machine status around nested functions. */
4623 init_machine_status = rs6000_init_machine_status;
4624
4625 /* We should always be splitting complex arguments, but we can't break
4626 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
4627 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
4628 targetm.calls.split_complex_arg = NULL;
4629
4630 /* The AIX and ELFv1 ABIs define standard function descriptors. */
4631 if (DEFAULT_ABI == ABI_AIX)
4632 targetm.calls.custom_function_descriptors = 0;
4633 }
4634
4635 /* Initialize rs6000_cost with the appropriate target costs. */
4636 if (optimize_size)
4637 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
4638 else
4639 switch (rs6000_tune)
4640 {
4641 case PROCESSOR_RS64A:
4642 rs6000_cost = &rs64a_cost;
4643 break;
4644
4645 case PROCESSOR_MPCCORE:
4646 rs6000_cost = &mpccore_cost;
4647 break;
4648
4649 case PROCESSOR_PPC403:
4650 rs6000_cost = &ppc403_cost;
4651 break;
4652
4653 case PROCESSOR_PPC405:
4654 rs6000_cost = &ppc405_cost;
4655 break;
4656
4657 case PROCESSOR_PPC440:
4658 rs6000_cost = &ppc440_cost;
4659 break;
4660
4661 case PROCESSOR_PPC476:
4662 rs6000_cost = &ppc476_cost;
4663 break;
4664
4665 case PROCESSOR_PPC601:
4666 rs6000_cost = &ppc601_cost;
4667 break;
4668
4669 case PROCESSOR_PPC603:
4670 rs6000_cost = &ppc603_cost;
4671 break;
4672
4673 case PROCESSOR_PPC604:
4674 rs6000_cost = &ppc604_cost;
4675 break;
4676
4677 case PROCESSOR_PPC604e:
4678 rs6000_cost = &ppc604e_cost;
4679 break;
4680
4681 case PROCESSOR_PPC620:
4682 rs6000_cost = &ppc620_cost;
4683 break;
4684
4685 case PROCESSOR_PPC630:
4686 rs6000_cost = &ppc630_cost;
4687 break;
4688
4689 case PROCESSOR_CELL:
4690 rs6000_cost = &ppccell_cost;
4691 break;
4692
4693 case PROCESSOR_PPC750:
4694 case PROCESSOR_PPC7400:
4695 rs6000_cost = &ppc750_cost;
4696 break;
4697
4698 case PROCESSOR_PPC7450:
4699 rs6000_cost = &ppc7450_cost;
4700 break;
4701
4702 case PROCESSOR_PPC8540:
4703 case PROCESSOR_PPC8548:
4704 rs6000_cost = &ppc8540_cost;
4705 break;
4706
4707 case PROCESSOR_PPCE300C2:
4708 case PROCESSOR_PPCE300C3:
4709 rs6000_cost = &ppce300c2c3_cost;
4710 break;
4711
4712 case PROCESSOR_PPCE500MC:
4713 rs6000_cost = &ppce500mc_cost;
4714 break;
4715
4716 case PROCESSOR_PPCE500MC64:
4717 rs6000_cost = &ppce500mc64_cost;
4718 break;
4719
4720 case PROCESSOR_PPCE5500:
4721 rs6000_cost = &ppce5500_cost;
4722 break;
4723
4724 case PROCESSOR_PPCE6500:
4725 rs6000_cost = &ppce6500_cost;
4726 break;
4727
4728 case PROCESSOR_TITAN:
4729 rs6000_cost = &titan_cost;
4730 break;
4731
4732 case PROCESSOR_POWER4:
4733 case PROCESSOR_POWER5:
4734 rs6000_cost = &power4_cost;
4735 break;
4736
4737 case PROCESSOR_POWER6:
4738 rs6000_cost = &power6_cost;
4739 break;
4740
4741 case PROCESSOR_POWER7:
4742 rs6000_cost = &power7_cost;
4743 break;
4744
4745 case PROCESSOR_POWER8:
4746 rs6000_cost = &power8_cost;
4747 break;
4748
4749 case PROCESSOR_POWER9:
4750 rs6000_cost = &power9_cost;
4751 break;
4752
4753 case PROCESSOR_POWER10:
4754 rs6000_cost = &power10_cost;
4755 break;
4756
4757 case PROCESSOR_PPCA2:
4758 rs6000_cost = &ppca2_cost;
4759 break;
4760
4761 default:
4762 gcc_unreachable ();
4763 }
4764
4765 if (global_init_p)
4766 {
4767 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4768 param_simultaneous_prefetches,
4769 rs6000_cost->simultaneous_prefetches);
4770 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4771 param_l1_cache_size,
4772 rs6000_cost->l1_cache_size);
4773 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4774 param_l1_cache_line_size,
4775 rs6000_cost->cache_line_size);
4776 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4777 param_l2_cache_size,
4778 rs6000_cost->l2_cache_size);
4779
4780 /* Increase loop peeling limits based on performance analysis. */
4781 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4782 param_max_peeled_insns, 400);
4783 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4784 param_max_completely_peeled_insns, 400);
4785
4786 /* The lxvl/stxvl instructions don't perform well before Power10. */
4787 if (TARGET_POWER10)
4788 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4789 param_vect_partial_vector_usage, 1);
4790 else
4791 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4792 param_vect_partial_vector_usage, 0);
4793
4794 /* Use the 'model' -fsched-pressure algorithm by default. */
4795 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4796 param_sched_pressure_algorithm,
4797 SCHED_PRESSURE_MODEL);
4798
4799 /* If using typedef char *va_list, signal that
4800 __builtin_va_start (&ap, 0) can be optimized to
4801 ap = __builtin_next_arg (0). */
4802 if (DEFAULT_ABI != ABI_V4)
4803 targetm.expand_builtin_va_start = NULL;
4804 }
4805
4806 rs6000_override_options_after_change ();
4807
4808 /* If not explicitly specified via option, decide whether to generate indexed
4809 load/store instructions. A value of -1 indicates that the
4810 initial value of this variable has not been overwritten. During
4811 compilation, TARGET_AVOID_XFORM is either 0 or 1. */
4812 if (TARGET_AVOID_XFORM == -1)
4813 /* Avoid indexed addressing when targeting Power6 in order to avoid the
4814 DERAT mispredict penalty. However the LVE and STVE altivec instructions
4815 need indexed accesses and the type used is the scalar type of the element
4816 being loaded or stored. */
4817 TARGET_AVOID_XFORM = (rs6000_tune == PROCESSOR_POWER6 && TARGET_CMPB
4818 && !TARGET_ALTIVEC);
4819
4820 /* Set the -mrecip options. */
4821 if (rs6000_recip_name)
4822 {
4823 char *p = ASTRDUP (rs6000_recip_name);
4824 char *q;
4825 unsigned int mask, i;
4826 bool invert;
4827
4828 while ((q = strtok (p, ",")) != NULL)
4829 {
4830 p = NULL;
4831 if (*q == '!')
4832 {
4833 invert = true;
4834 q++;
4835 }
4836 else
4837 invert = false;
4838
4839 if (!strcmp (q, "default"))
4840 mask = ((TARGET_RECIP_PRECISION)
4841 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
4842 else
4843 {
4844 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4845 if (!strcmp (q, recip_options[i].string))
4846 {
4847 mask = recip_options[i].mask;
4848 break;
4849 }
4850
4851 if (i == ARRAY_SIZE (recip_options))
4852 {
4853 error ("unknown option for %<%s=%s%>", "-mrecip", q);
4854 invert = false;
4855 mask = 0;
4856 ret = false;
4857 }
4858 }
4859
4860 if (invert)
4861 rs6000_recip_control &= ~mask;
4862 else
4863 rs6000_recip_control |= mask;
4864 }
4865 }
4866
4867 /* Initialize all of the registers. */
4868 rs6000_init_hard_regno_mode_ok (global_init_p);
4869
4870 /* Save the initial options in case the user does function specific options */
4871 if (global_init_p)
4872 target_option_default_node = target_option_current_node
4873 = build_target_option_node (&global_options, &global_options_set);
4874
4875 /* If not explicitly specified via option, decide whether to generate the
4876 extra blr's required to preserve the link stack on some cpus (eg, 476). */
4877 if (TARGET_LINK_STACK == -1)
4878 SET_TARGET_LINK_STACK (rs6000_tune == PROCESSOR_PPC476 && flag_pic);
4879
4880 /* Deprecate use of -mno-speculate-indirect-jumps. */
4881 if (!rs6000_speculate_indirect_jumps)
4882 warning (0, "%qs is deprecated and not recommended in any circumstances",
4883 "-mno-speculate-indirect-jumps");
4884
4885 return ret;
4886 }
4887
4888 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
4889 define the target cpu type. */
4890
4891 static void
4892 rs6000_option_override (void)
4893 {
4894 (void) rs6000_option_override_internal (true);
4895 }
4896
4897 \f
4898 /* Implement LOOP_ALIGN. */
4899 align_flags
4900 rs6000_loop_align (rtx label)
4901 {
4902 basic_block bb;
4903 int ninsns;
4904
4905 /* Don't override loop alignment if -falign-loops was specified. */
4906 if (!can_override_loop_align)
4907 return align_loops;
4908
4909 bb = BLOCK_FOR_INSN (label);
4910 ninsns = num_loop_insns(bb->loop_father);
4911
4912 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
4913 if (ninsns > 4 && ninsns <= 8
4914 && (rs6000_tune == PROCESSOR_POWER4
4915 || rs6000_tune == PROCESSOR_POWER5
4916 || rs6000_tune == PROCESSOR_POWER6
4917 || rs6000_tune == PROCESSOR_POWER7
4918 || rs6000_tune == PROCESSOR_POWER8))
4919 return align_flags (5);
4920 else
4921 return align_loops;
4922 }
4923
4924 /* Return true iff, data reference of TYPE can reach vector alignment (16)
4925 after applying N number of iterations. This routine does not determine
4926 how may iterations are required to reach desired alignment. */
4927
4928 static bool
4929 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
4930 {
4931 if (is_packed)
4932 return false;
4933
4934 if (TARGET_32BIT)
4935 {
4936 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
4937 return true;
4938
4939 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
4940 return true;
4941
4942 return false;
4943 }
4944 else
4945 {
4946 if (TARGET_MACHO)
4947 return false;
4948
4949 /* Assuming that all other types are naturally aligned. CHECKME! */
4950 return true;
4951 }
4952 }
4953
4954 /* Return true if the vector misalignment factor is supported by the
4955 target. */
4956 static bool
4957 rs6000_builtin_support_vector_misalignment (machine_mode mode,
4958 const_tree type,
4959 int misalignment,
4960 bool is_packed)
4961 {
4962 if (TARGET_VSX)
4963 {
4964 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4965 return true;
4966
4967 /* Return if movmisalign pattern is not supported for this mode. */
4968 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
4969 return false;
4970
4971 if (misalignment == -1)
4972 {
4973 /* Misalignment factor is unknown at compile time but we know
4974 it's word aligned. */
4975 if (rs6000_vector_alignment_reachable (type, is_packed))
4976 {
4977 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
4978
4979 if (element_size == 64 || element_size == 32)
4980 return true;
4981 }
4982
4983 return false;
4984 }
4985
4986 /* VSX supports word-aligned vector. */
4987 if (misalignment % 4 == 0)
4988 return true;
4989 }
4990 return false;
4991 }
4992
4993 /* Implement targetm.vectorize.builtin_vectorization_cost. */
4994 static int
4995 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
4996 tree vectype, int misalign)
4997 {
4998 unsigned elements;
4999 tree elem_type;
5000
5001 switch (type_of_cost)
5002 {
5003 case scalar_stmt:
5004 case scalar_store:
5005 case vector_stmt:
5006 case vector_store:
5007 case vec_to_scalar:
5008 case scalar_to_vec:
5009 case cond_branch_not_taken:
5010 return 1;
5011 case scalar_load:
5012 case vector_load:
5013 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5014 return 2;
5015
5016 case vec_perm:
5017 /* Power7 has only one permute unit, make it a bit expensive. */
5018 if (TARGET_VSX && rs6000_tune == PROCESSOR_POWER7)
5019 return 3;
5020 else
5021 return 1;
5022
5023 case vec_promote_demote:
5024 /* Power7 has only one permute/pack unit, make it a bit expensive. */
5025 if (TARGET_VSX && rs6000_tune == PROCESSOR_POWER7)
5026 return 4;
5027 else
5028 return 1;
5029
5030 case cond_branch_taken:
5031 return 3;
5032
5033 case unaligned_load:
5034 case vector_gather_load:
5035 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5036 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5037 return 2;
5038
5039 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5040 {
5041 elements = TYPE_VECTOR_SUBPARTS (vectype);
5042 /* See PR102767, consider V1TI to keep consistency. */
5043 if (elements == 2 || elements == 1)
5044 /* Double word aligned. */
5045 return 4;
5046
5047 if (elements == 4)
5048 {
5049 switch (misalign)
5050 {
5051 case 8:
5052 /* Double word aligned. */
5053 return 4;
5054
5055 case -1:
5056 /* Unknown misalignment. */
5057 case 4:
5058 case 12:
5059 /* Word aligned. */
5060 return 33;
5061
5062 default:
5063 gcc_unreachable ();
5064 }
5065 }
5066 }
5067
5068 if (TARGET_ALTIVEC)
5069 /* Misaligned loads are not supported. */
5070 gcc_unreachable ();
5071
5072 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5073 return 4;
5074
5075 case unaligned_store:
5076 case vector_scatter_store:
5077 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5078 return 1;
5079
5080 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5081 {
5082 elements = TYPE_VECTOR_SUBPARTS (vectype);
5083 /* See PR102767, consider V1TI to keep consistency. */
5084 if (elements == 2 || elements == 1)
5085 /* Double word aligned. */
5086 return 2;
5087
5088 if (elements == 4)
5089 {
5090 switch (misalign)
5091 {
5092 case 8:
5093 /* Double word aligned. */
5094 return 2;
5095
5096 case -1:
5097 /* Unknown misalignment. */
5098 case 4:
5099 case 12:
5100 /* Word aligned. */
5101 return 23;
5102
5103 default:
5104 gcc_unreachable ();
5105 }
5106 }
5107 }
5108
5109 if (TARGET_ALTIVEC)
5110 /* Misaligned stores are not supported. */
5111 gcc_unreachable ();
5112
5113 return 2;
5114
5115 case vec_construct:
5116 /* This is a rough approximation assuming non-constant elements
5117 constructed into a vector via element insertion. FIXME:
5118 vec_construct is not granular enough for uniformly good
5119 decisions. If the initialization is a splat, this is
5120 cheaper than we estimate. Improve this someday. */
5121 elem_type = TREE_TYPE (vectype);
5122 /* 32-bit vectors loaded into registers are stored as double
5123 precision, so we need 2 permutes, 2 converts, and 1 merge
5124 to construct a vector of short floats from them. */
5125 if (SCALAR_FLOAT_TYPE_P (elem_type)
5126 && TYPE_PRECISION (elem_type) == 32)
5127 return 5;
5128 /* On POWER9, integer vector types are built up in GPRs and then
5129 use a direct move (2 cycles). For POWER8 this is even worse,
5130 as we need two direct moves and a merge, and the direct moves
5131 are five cycles. */
5132 else if (INTEGRAL_TYPE_P (elem_type))
5133 {
5134 if (TARGET_P9_VECTOR)
5135 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 2;
5136 else
5137 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 5;
5138 }
5139 else
5140 /* V2DFmode doesn't need a direct move. */
5141 return 2;
5142
5143 default:
5144 gcc_unreachable ();
5145 }
5146 }
5147
5148 /* Implement targetm.vectorize.preferred_simd_mode. */
5149
5150 static machine_mode
5151 rs6000_preferred_simd_mode (scalar_mode mode)
5152 {
5153 opt_machine_mode vmode = mode_for_vector (mode, 16 / GET_MODE_SIZE (mode));
5154
5155 if (vmode.exists () && !VECTOR_MEM_NONE_P (vmode.require ()))
5156 return vmode.require ();
5157
5158 return word_mode;
5159 }
5160
5161 class rs6000_cost_data : public vector_costs
5162 {
5163 public:
5164 using vector_costs::vector_costs;
5165
5166 unsigned int add_stmt_cost (int count, vect_cost_for_stmt kind,
5167 stmt_vec_info stmt_info, slp_tree, tree vectype,
5168 int misalign,
5169 vect_cost_model_location where) override;
5170 void finish_cost (const vector_costs *) override;
5171
5172 protected:
5173 void update_target_cost_per_stmt (vect_cost_for_stmt, stmt_vec_info,
5174 vect_cost_model_location, unsigned int);
5175 void density_test (loop_vec_info);
5176 void adjust_vect_cost_per_loop (loop_vec_info);
5177 unsigned int determine_suggested_unroll_factor (loop_vec_info);
5178
5179 /* Total number of vectorized stmts (loop only). */
5180 unsigned m_nstmts = 0;
5181 /* Total number of loads (loop only). */
5182 unsigned m_nloads = 0;
5183 /* Total number of stores (loop only). */
5184 unsigned m_nstores = 0;
5185 /* Reduction factor for suggesting unroll factor (loop only). */
5186 unsigned m_reduc_factor = 0;
5187 /* Possible extra penalized cost on vector construction (loop only). */
5188 unsigned m_extra_ctor_cost = 0;
5189 /* For each vectorized loop, this var holds TRUE iff a non-memory vector
5190 instruction is needed by the vectorization. */
5191 bool m_vect_nonmem = false;
5192 /* If this loop gets vectorized with emulated gather load. */
5193 bool m_gather_load = false;
5194 };
5195
5196 /* Test for likely overcommitment of vector hardware resources. If a
5197 loop iteration is relatively large, and too large a percentage of
5198 instructions in the loop are vectorized, the cost model may not
5199 adequately reflect delays from unavailable vector resources.
5200 Penalize the loop body cost for this case. */
5201
5202 void
5203 rs6000_cost_data::density_test (loop_vec_info loop_vinfo)
5204 {
5205 /* This density test only cares about the cost of vector version of the
5206 loop, so immediately return if we are passed costing for the scalar
5207 version (namely computing single scalar iteration cost). */
5208 if (m_costing_for_scalar)
5209 return;
5210
5211 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5212 basic_block *bbs = get_loop_body (loop);
5213 int nbbs = loop->num_nodes;
5214 int vec_cost = m_costs[vect_body], not_vec_cost = 0;
5215
5216 for (int i = 0; i < nbbs; i++)
5217 {
5218 basic_block bb = bbs[i];
5219 gimple_stmt_iterator gsi;
5220
5221 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5222 {
5223 gimple *stmt = gsi_stmt (gsi);
5224 if (is_gimple_debug (stmt))
5225 continue;
5226
5227 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
5228
5229 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5230 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
5231 not_vec_cost++;
5232 }
5233 }
5234
5235 free (bbs);
5236 int density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
5237
5238 if (density_pct > rs6000_density_pct_threshold
5239 && vec_cost + not_vec_cost > rs6000_density_size_threshold)
5240 {
5241 m_costs[vect_body] = vec_cost * (100 + rs6000_density_penalty) / 100;
5242 if (dump_enabled_p ())
5243 dump_printf_loc (MSG_NOTE, vect_location,
5244 "density %d%%, cost %d exceeds threshold, penalizing "
5245 "loop body cost by %u%%\n", density_pct,
5246 vec_cost + not_vec_cost, rs6000_density_penalty);
5247 }
5248
5249 /* Check whether we need to penalize the body cost to account
5250 for excess strided or elementwise loads. */
5251 if (m_extra_ctor_cost > 0)
5252 {
5253 gcc_assert (m_nloads <= m_nstmts);
5254 unsigned int load_pct = (m_nloads * 100) / m_nstmts;
5255
5256 /* It's likely to be bounded by latency and execution resources
5257 from many scalar loads which are strided or elementwise loads
5258 into a vector if both conditions below are found:
5259 1. there are many loads, it's easy to result in a long wait
5260 for load units;
5261 2. load has a big proportion of all vectorized statements,
5262 it's not easy to schedule other statements to spread among
5263 the loads.
5264 One typical case is the innermost loop of the hotspot of SPEC2017
5265 503.bwaves_r without loop interchange. */
5266 if (m_nloads > (unsigned int) rs6000_density_load_num_threshold
5267 && load_pct > (unsigned int) rs6000_density_load_pct_threshold)
5268 {
5269 m_costs[vect_body] += m_extra_ctor_cost;
5270 if (dump_enabled_p ())
5271 dump_printf_loc (MSG_NOTE, vect_location,
5272 "Found %u loads and "
5273 "load pct. %u%% exceed "
5274 "the threshold, "
5275 "penalizing loop body "
5276 "cost by extra cost %u "
5277 "for ctor.\n",
5278 m_nloads, load_pct,
5279 m_extra_ctor_cost);
5280 }
5281 }
5282 }
5283
5284 /* Implement targetm.vectorize.create_costs. */
5285
5286 static vector_costs *
5287 rs6000_vectorize_create_costs (vec_info *vinfo, bool costing_for_scalar)
5288 {
5289 return new rs6000_cost_data (vinfo, costing_for_scalar);
5290 }
5291
5292 /* Adjust vectorization cost after calling rs6000_builtin_vectorization_cost.
5293 For some statement, we would like to further fine-grain tweak the cost on
5294 top of rs6000_builtin_vectorization_cost handling which doesn't have any
5295 information on statement operation codes etc. One typical case here is
5296 COND_EXPR, it takes the same cost to simple FXU instruction when evaluating
5297 for scalar cost, but it should be priced more whatever transformed to either
5298 compare + branch or compare + isel instructions. */
5299
5300 static unsigned
5301 rs6000_adjust_vect_cost_per_stmt (enum vect_cost_for_stmt kind,
5302 struct _stmt_vec_info *stmt_info)
5303 {
5304 if (kind == scalar_stmt && stmt_info && stmt_info->stmt
5305 && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN)
5306 {
5307 tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt);
5308 if (subcode == COND_EXPR)
5309 return 2;
5310 }
5311
5312 return 0;
5313 }
5314
5315 /* Helper function for add_stmt_cost. Check each statement cost
5316 entry, gather information and update the target_cost fields
5317 accordingly. */
5318 void
5319 rs6000_cost_data::update_target_cost_per_stmt (vect_cost_for_stmt kind,
5320 stmt_vec_info stmt_info,
5321 vect_cost_model_location where,
5322 unsigned int orig_count)
5323 {
5324
5325 /* Check whether we're doing something other than just a copy loop.
5326 Not all such loops may be profitably vectorized; see
5327 rs6000_finish_cost. */
5328 if (kind == vec_to_scalar
5329 || kind == vec_perm
5330 || kind == vec_promote_demote
5331 || kind == vec_construct
5332 || kind == scalar_to_vec
5333 || (where == vect_body && kind == vector_stmt))
5334 m_vect_nonmem = true;
5335
5336 /* Gather some information when we are costing the vectorized instruction
5337 for the statements located in a loop body. */
5338 if (!m_costing_for_scalar
5339 && is_a<loop_vec_info> (m_vinfo)
5340 && where == vect_body)
5341 {
5342 m_nstmts += orig_count;
5343
5344 if (kind == scalar_load
5345 || kind == vector_load
5346 || kind == unaligned_load
5347 || kind == vector_gather_load)
5348 {
5349 m_nloads += orig_count;
5350 if (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
5351 m_gather_load = true;
5352 }
5353 else if (kind == scalar_store
5354 || kind == vector_store
5355 || kind == unaligned_store
5356 || kind == vector_scatter_store)
5357 m_nstores += orig_count;
5358 else if ((kind == scalar_stmt
5359 || kind == vector_stmt
5360 || kind == vec_to_scalar)
5361 && stmt_info
5362 && vect_is_reduction (stmt_info))
5363 {
5364 /* Loop body contains normal int or fp operations and epilogue
5365 contains vector reduction. For simplicity, we assume int
5366 operation takes one cycle and fp operation takes one more. */
5367 tree lhs = gimple_get_lhs (stmt_info->stmt);
5368 bool is_float = FLOAT_TYPE_P (TREE_TYPE (lhs));
5369 unsigned int basic_cost = is_float ? 2 : 1;
5370 m_reduc_factor = MAX (basic_cost * orig_count, m_reduc_factor);
5371 }
5372
5373 /* Power processors do not currently have instructions for strided
5374 and elementwise loads, and instead we must generate multiple
5375 scalar loads. This leads to undercounting of the cost. We
5376 account for this by scaling the construction cost by the number
5377 of elements involved, and saving this as extra cost that we may
5378 or may not need to apply. When finalizing the cost of the loop,
5379 the extra penalty is applied when the load density heuristics
5380 are satisfied. */
5381 if (kind == vec_construct && stmt_info
5382 && STMT_VINFO_TYPE (stmt_info) == load_vec_info_type
5383 && (STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_ELEMENTWISE
5384 || STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_STRIDED_SLP))
5385 {
5386 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5387 unsigned int nunits = vect_nunits_for_cost (vectype);
5388 /* As PR103702 shows, it's possible that vectorizer wants to do
5389 costings for only one unit here, it's no need to do any
5390 penalization for it, so simply early return here. */
5391 if (nunits == 1)
5392 return;
5393 /* i386 port adopts nunits * stmt_cost as the penalized cost
5394 for this kind of penalization, we used to follow it but
5395 found it could result in an unreliable body cost especially
5396 for V16QI/V8HI modes. To make it better, we choose this
5397 new heuristic: for each scalar load, we use 2 as penalized
5398 cost for the case with 2 nunits and use 1 for the other
5399 cases. It's without much supporting theory, mainly
5400 concluded from the broad performance evaluations on Power8,
5401 Power9 and Power10. One possibly related point is that:
5402 vector construction for more units would use more insns,
5403 it has more chances to schedule them better (even run in
5404 parallelly when enough available units at that time), so
5405 it seems reasonable not to penalize that much for them. */
5406 unsigned int adjusted_cost = (nunits == 2) ? 2 : 1;
5407 unsigned int extra_cost = nunits * adjusted_cost;
5408 m_extra_ctor_cost += extra_cost;
5409 }
5410 }
5411 }
5412
5413 unsigned
5414 rs6000_cost_data::add_stmt_cost (int count, vect_cost_for_stmt kind,
5415 stmt_vec_info stmt_info, slp_tree,
5416 tree vectype, int misalign,
5417 vect_cost_model_location where)
5418 {
5419 unsigned retval = 0;
5420
5421 if (flag_vect_cost_model)
5422 {
5423 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
5424 misalign);
5425 stmt_cost += rs6000_adjust_vect_cost_per_stmt (kind, stmt_info);
5426 /* Statements in an inner loop relative to the loop being
5427 vectorized are weighted more heavily. The value here is
5428 arbitrary and could potentially be improved with analysis. */
5429 unsigned int orig_count = count;
5430 retval = adjust_cost_for_freq (stmt_info, where, count * stmt_cost);
5431 m_costs[where] += retval;
5432
5433 update_target_cost_per_stmt (kind, stmt_info, where, orig_count);
5434 }
5435
5436 return retval;
5437 }
5438
5439 /* For some target specific vectorization cost which can't be handled per stmt,
5440 we check the requisite conditions and adjust the vectorization cost
5441 accordingly if satisfied. One typical example is to model shift cost for
5442 vector with length by counting number of required lengths under condition
5443 LOOP_VINFO_FULLY_WITH_LENGTH_P. */
5444
5445 void
5446 rs6000_cost_data::adjust_vect_cost_per_loop (loop_vec_info loop_vinfo)
5447 {
5448 if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
5449 {
5450 rgroup_controls *rgc;
5451 unsigned int num_vectors_m1;
5452 unsigned int shift_cnt = 0;
5453 FOR_EACH_VEC_ELT (LOOP_VINFO_LENS (loop_vinfo), num_vectors_m1, rgc)
5454 if (rgc->type)
5455 /* Each length needs one shift to fill into bits 0-7. */
5456 shift_cnt += num_vectors_m1 + 1;
5457
5458 add_stmt_cost (shift_cnt, scalar_stmt, NULL, NULL,
5459 NULL_TREE, 0, vect_body);
5460 }
5461 }
5462
5463 /* Determine suggested unroll factor by considering some below factors:
5464
5465 - unroll option/pragma which can disable unrolling for this loop;
5466 - simple hardware resource model for non memory vector insns;
5467 - aggressive heuristics when iteration count is unknown:
5468 - reduction case to break cross iteration dependency;
5469 - emulated gather load;
5470 - estimated iteration count when iteration count is unknown;
5471 */
5472
5473
5474 unsigned int
5475 rs6000_cost_data::determine_suggested_unroll_factor (loop_vec_info loop_vinfo)
5476 {
5477 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5478
5479 /* Don't unroll if it's specified explicitly not to be unrolled. */
5480 if (loop->unroll == 1
5481 || (OPTION_SET_P (flag_unroll_loops) && !flag_unroll_loops)
5482 || (OPTION_SET_P (flag_unroll_all_loops) && !flag_unroll_all_loops))
5483 return 1;
5484
5485 unsigned int nstmts_nonldst = m_nstmts - m_nloads - m_nstores;
5486 /* Don't unroll if no vector instructions excepting for memory access. */
5487 if (nstmts_nonldst == 0)
5488 return 1;
5489
5490 /* Consider breaking cross iteration dependency for reduction. */
5491 unsigned int reduc_factor = m_reduc_factor > 1 ? m_reduc_factor : 1;
5492
5493 /* Use this simple hardware resource model that how many non ld/st
5494 vector instructions can be issued per cycle. */
5495 unsigned int issue_width = rs6000_vect_unroll_issue;
5496 unsigned int uf = CEIL (reduc_factor * issue_width, nstmts_nonldst);
5497 uf = MIN ((unsigned int) rs6000_vect_unroll_limit, uf);
5498 /* Make sure it is power of 2. */
5499 uf = 1 << ceil_log2 (uf);
5500
5501 /* If the iteration count is known, the costing would be exact enough,
5502 don't worry it could be worse. */
5503 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
5504 return uf;
5505
5506 /* Inspired by SPEC2017 parest_r, we want to aggressively unroll the
5507 loop if either condition is satisfied:
5508 - reduction factor exceeds the threshold;
5509 - emulated gather load adopted. */
5510 if (reduc_factor > (unsigned int) rs6000_vect_unroll_reduc_threshold
5511 || m_gather_load)
5512 return uf;
5513
5514 /* Check if we can conclude it's good to unroll from the estimated
5515 iteration count. */
5516 HOST_WIDE_INT est_niter = get_estimated_loop_iterations_int (loop);
5517 unsigned int vf = vect_vf_for_cost (loop_vinfo);
5518 unsigned int unrolled_vf = vf * uf;
5519 if (est_niter == -1 || est_niter < unrolled_vf)
5520 /* When the estimated iteration of this loop is unknown, it's possible
5521 that we are able to vectorize this loop with the original VF but fail
5522 to vectorize it with the unrolled VF any more if the actual iteration
5523 count is in between. */
5524 return 1;
5525 else
5526 {
5527 unsigned int epil_niter_unr = est_niter % unrolled_vf;
5528 unsigned int epil_niter = est_niter % vf;
5529 /* Even if we have partial vector support, it can be still inefficent
5530 to calculate the length when the iteration count is unknown, so
5531 only expect it's good to unroll when the epilogue iteration count
5532 is not bigger than VF (only one time length calculation). */
5533 if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
5534 && epil_niter_unr <= vf)
5535 return uf;
5536 /* Without partial vector support, conservatively unroll this when
5537 the epilogue iteration count is less than the original one
5538 (epilogue execution time wouldn't be longer than before). */
5539 else if (!LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
5540 && epil_niter_unr <= epil_niter)
5541 return uf;
5542 }
5543
5544 return 1;
5545 }
5546
5547 void
5548 rs6000_cost_data::finish_cost (const vector_costs *scalar_costs)
5549 {
5550 if (loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (m_vinfo))
5551 {
5552 adjust_vect_cost_per_loop (loop_vinfo);
5553 density_test (loop_vinfo);
5554
5555 /* Don't vectorize minimum-vectorization-factor, simple copy loops
5556 that require versioning for any reason. The vectorization is at
5557 best a wash inside the loop, and the versioning checks make
5558 profitability highly unlikely and potentially quite harmful. */
5559 if (!m_vect_nonmem
5560 && LOOP_VINFO_VECT_FACTOR (loop_vinfo) == 2
5561 && LOOP_REQUIRES_VERSIONING (loop_vinfo))
5562 m_costs[vect_body] += 10000;
5563
5564 m_suggested_unroll_factor
5565 = determine_suggested_unroll_factor (loop_vinfo);
5566 }
5567
5568 vector_costs::finish_cost (scalar_costs);
5569 }
5570
5571 /* Implement targetm.loop_unroll_adjust. */
5572
5573 static unsigned
5574 rs6000_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
5575 {
5576 if (unroll_only_small_loops)
5577 {
5578 /* TODO: These are hardcoded values right now. We probably should use
5579 a PARAM here. */
5580 if (loop->ninsns <= 6)
5581 return MIN (4, nunroll);
5582 if (loop->ninsns <= 10)
5583 return MIN (2, nunroll);
5584
5585 return 0;
5586 }
5587
5588 return nunroll;
5589 }
5590
5591 /* Returns a function decl for a vectorized version of the builtin function
5592 with builtin function code FN and the result vector type TYPE, or NULL_TREE
5593 if it is not available.
5594
5595 Implement targetm.vectorize.builtin_vectorized_function. */
5596
5597 static tree
5598 rs6000_builtin_vectorized_function (unsigned int fn, tree type_out,
5599 tree type_in)
5600 {
5601 machine_mode in_mode, out_mode;
5602 int in_n, out_n;
5603
5604 if (TARGET_DEBUG_BUILTIN)
5605 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
5606 combined_fn_name (combined_fn (fn)),
5607 GET_MODE_NAME (TYPE_MODE (type_out)),
5608 GET_MODE_NAME (TYPE_MODE (type_in)));
5609
5610 /* TODO: Should this be gcc_assert? */
5611 if (TREE_CODE (type_out) != VECTOR_TYPE
5612 || TREE_CODE (type_in) != VECTOR_TYPE)
5613 return NULL_TREE;
5614
5615 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5616 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5617 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5618 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5619
5620 switch (fn)
5621 {
5622 CASE_CFN_COPYSIGN:
5623 if (VECTOR_UNIT_VSX_P (V2DFmode)
5624 && out_mode == DFmode && out_n == 2
5625 && in_mode == DFmode && in_n == 2)
5626 return rs6000_builtin_decls[RS6000_BIF_CPSGNDP];
5627 if (VECTOR_UNIT_VSX_P (V4SFmode)
5628 && out_mode == SFmode && out_n == 4
5629 && in_mode == SFmode && in_n == 4)
5630 return rs6000_builtin_decls[RS6000_BIF_CPSGNSP];
5631 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5632 && out_mode == SFmode && out_n == 4
5633 && in_mode == SFmode && in_n == 4)
5634 return rs6000_builtin_decls[RS6000_BIF_COPYSIGN_V4SF];
5635 break;
5636 CASE_CFN_CEIL:
5637 if (VECTOR_UNIT_VSX_P (V2DFmode)
5638 && out_mode == DFmode && out_n == 2
5639 && in_mode == DFmode && in_n == 2)
5640 return rs6000_builtin_decls[RS6000_BIF_XVRDPIP];
5641 if (VECTOR_UNIT_VSX_P (V4SFmode)
5642 && out_mode == SFmode && out_n == 4
5643 && in_mode == SFmode && in_n == 4)
5644 return rs6000_builtin_decls[RS6000_BIF_XVRSPIP];
5645 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5646 && out_mode == SFmode && out_n == 4
5647 && in_mode == SFmode && in_n == 4)
5648 return rs6000_builtin_decls[RS6000_BIF_VRFIP];
5649 break;
5650 CASE_CFN_FLOOR:
5651 if (VECTOR_UNIT_VSX_P (V2DFmode)
5652 && out_mode == DFmode && out_n == 2
5653 && in_mode == DFmode && in_n == 2)
5654 return rs6000_builtin_decls[RS6000_BIF_XVRDPIM];
5655 if (VECTOR_UNIT_VSX_P (V4SFmode)
5656 && out_mode == SFmode && out_n == 4
5657 && in_mode == SFmode && in_n == 4)
5658 return rs6000_builtin_decls[RS6000_BIF_XVRSPIM];
5659 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5660 && out_mode == SFmode && out_n == 4
5661 && in_mode == SFmode && in_n == 4)
5662 return rs6000_builtin_decls[RS6000_BIF_VRFIM];
5663 break;
5664 CASE_CFN_FMA:
5665 if (VECTOR_UNIT_VSX_P (V2DFmode)
5666 && out_mode == DFmode && out_n == 2
5667 && in_mode == DFmode && in_n == 2)
5668 return rs6000_builtin_decls[RS6000_BIF_XVMADDDP];
5669 if (VECTOR_UNIT_VSX_P (V4SFmode)
5670 && out_mode == SFmode && out_n == 4
5671 && in_mode == SFmode && in_n == 4)
5672 return rs6000_builtin_decls[RS6000_BIF_XVMADDSP];
5673 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5674 && out_mode == SFmode && out_n == 4
5675 && in_mode == SFmode && in_n == 4)
5676 return rs6000_builtin_decls[RS6000_BIF_VMADDFP];
5677 break;
5678 CASE_CFN_TRUNC:
5679 if (VECTOR_UNIT_VSX_P (V2DFmode)
5680 && out_mode == DFmode && out_n == 2
5681 && in_mode == DFmode && in_n == 2)
5682 return rs6000_builtin_decls[RS6000_BIF_XVRDPIZ];
5683 if (VECTOR_UNIT_VSX_P (V4SFmode)
5684 && out_mode == SFmode && out_n == 4
5685 && in_mode == SFmode && in_n == 4)
5686 return rs6000_builtin_decls[RS6000_BIF_XVRSPIZ];
5687 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5688 && out_mode == SFmode && out_n == 4
5689 && in_mode == SFmode && in_n == 4)
5690 return rs6000_builtin_decls[RS6000_BIF_VRFIZ];
5691 break;
5692 CASE_CFN_NEARBYINT:
5693 if (VECTOR_UNIT_VSX_P (V2DFmode)
5694 && flag_unsafe_math_optimizations
5695 && out_mode == DFmode && out_n == 2
5696 && in_mode == DFmode && in_n == 2)
5697 return rs6000_builtin_decls[RS6000_BIF_XVRDPI];
5698 if (VECTOR_UNIT_VSX_P (V4SFmode)
5699 && flag_unsafe_math_optimizations
5700 && out_mode == SFmode && out_n == 4
5701 && in_mode == SFmode && in_n == 4)
5702 return rs6000_builtin_decls[RS6000_BIF_XVRSPI];
5703 break;
5704 CASE_CFN_RINT:
5705 if (VECTOR_UNIT_VSX_P (V2DFmode)
5706 && !flag_trapping_math
5707 && out_mode == DFmode && out_n == 2
5708 && in_mode == DFmode && in_n == 2)
5709 return rs6000_builtin_decls[RS6000_BIF_XVRDPIC];
5710 if (VECTOR_UNIT_VSX_P (V4SFmode)
5711 && !flag_trapping_math
5712 && out_mode == SFmode && out_n == 4
5713 && in_mode == SFmode && in_n == 4)
5714 return rs6000_builtin_decls[RS6000_BIF_XVRSPIC];
5715 break;
5716 default:
5717 break;
5718 }
5719
5720 /* Generate calls to libmass if appropriate. */
5721 if (rs6000_veclib_handler)
5722 return rs6000_veclib_handler (combined_fn (fn), type_out, type_in);
5723
5724 return NULL_TREE;
5725 }
5726
5727 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
5728 library with vectorized intrinsics. */
5729
5730 static tree
5731 rs6000_builtin_vectorized_libmass (combined_fn fn, tree type_out,
5732 tree type_in)
5733 {
5734 char name[32];
5735 const char *suffix = NULL;
5736 tree fntype, new_fndecl, bdecl = NULL_TREE;
5737 int n_args = 1;
5738 const char *bname;
5739 machine_mode el_mode, in_mode;
5740 int n, in_n;
5741
5742 /* Libmass is suitable for unsafe math only as it does not correctly support
5743 parts of IEEE with the required precision such as denormals. Only support
5744 it if we have VSX to use the simd d2 or f4 functions.
5745 XXX: Add variable length support. */
5746 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
5747 return NULL_TREE;
5748
5749 el_mode = TYPE_MODE (TREE_TYPE (type_out));
5750 n = TYPE_VECTOR_SUBPARTS (type_out);
5751 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5752 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5753 if (el_mode != in_mode
5754 || n != in_n)
5755 return NULL_TREE;
5756
5757 switch (fn)
5758 {
5759 CASE_CFN_ATAN2:
5760 CASE_CFN_HYPOT:
5761 CASE_CFN_POW:
5762 n_args = 2;
5763 gcc_fallthrough ();
5764
5765 CASE_CFN_ACOS:
5766 CASE_CFN_ACOSH:
5767 CASE_CFN_ASIN:
5768 CASE_CFN_ASINH:
5769 CASE_CFN_ATAN:
5770 CASE_CFN_ATANH:
5771 CASE_CFN_CBRT:
5772 CASE_CFN_COS:
5773 CASE_CFN_COSH:
5774 CASE_CFN_ERF:
5775 CASE_CFN_ERFC:
5776 CASE_CFN_EXP2:
5777 CASE_CFN_EXP:
5778 CASE_CFN_EXPM1:
5779 CASE_CFN_LGAMMA:
5780 CASE_CFN_LOG10:
5781 CASE_CFN_LOG1P:
5782 CASE_CFN_LOG2:
5783 CASE_CFN_LOG:
5784 CASE_CFN_SIN:
5785 CASE_CFN_SINH:
5786 CASE_CFN_SQRT:
5787 CASE_CFN_TAN:
5788 CASE_CFN_TANH:
5789 if (el_mode == DFmode && n == 2)
5790 {
5791 bdecl = mathfn_built_in (double_type_node, fn);
5792 suffix = "d2"; /* pow -> powd2 */
5793 }
5794 else if (el_mode == SFmode && n == 4)
5795 {
5796 bdecl = mathfn_built_in (float_type_node, fn);
5797 suffix = "4"; /* powf -> powf4 */
5798 }
5799 else
5800 return NULL_TREE;
5801 if (!bdecl)
5802 return NULL_TREE;
5803 break;
5804
5805 default:
5806 return NULL_TREE;
5807 }
5808
5809 gcc_assert (suffix != NULL);
5810 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
5811 if (!bname)
5812 return NULL_TREE;
5813
5814 strcpy (name, bname + strlen ("__builtin_"));
5815 strcat (name, suffix);
5816
5817 if (n_args == 1)
5818 fntype = build_function_type_list (type_out, type_in, NULL);
5819 else if (n_args == 2)
5820 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
5821 else
5822 gcc_unreachable ();
5823
5824 /* Build a function declaration for the vectorized function. */
5825 new_fndecl = build_decl (BUILTINS_LOCATION,
5826 FUNCTION_DECL, get_identifier (name), fntype);
5827 TREE_PUBLIC (new_fndecl) = 1;
5828 DECL_EXTERNAL (new_fndecl) = 1;
5829 DECL_IS_NOVOPS (new_fndecl) = 1;
5830 TREE_READONLY (new_fndecl) = 1;
5831
5832 return new_fndecl;
5833 }
5834
5835 \f
5836 /* Default CPU string for rs6000*_file_start functions. */
5837 static const char *rs6000_default_cpu;
5838
5839 #ifdef USING_ELFOS_H
5840 const char *rs6000_machine;
5841
5842 const char *
5843 rs6000_machine_from_flags (void)
5844 {
5845 /* e300 and e500 */
5846 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3)
5847 return "e300";
5848 if (rs6000_cpu == PROCESSOR_PPC8540 || rs6000_cpu == PROCESSOR_PPC8548)
5849 return "e500";
5850 if (rs6000_cpu == PROCESSOR_PPCE500MC)
5851 return "e500mc";
5852 if (rs6000_cpu == PROCESSOR_PPCE500MC64)
5853 return "e500mc64";
5854 if (rs6000_cpu == PROCESSOR_PPCE5500)
5855 return "e5500";
5856 if (rs6000_cpu == PROCESSOR_PPCE6500)
5857 return "e6500";
5858
5859 /* 400 series */
5860 if (rs6000_cpu == PROCESSOR_PPC403)
5861 return "\"403\"";
5862 if (rs6000_cpu == PROCESSOR_PPC405)
5863 return "\"405\"";
5864 if (rs6000_cpu == PROCESSOR_PPC440)
5865 return "\"440\"";
5866 if (rs6000_cpu == PROCESSOR_PPC476)
5867 return "\"476\"";
5868
5869 /* A2 */
5870 if (rs6000_cpu == PROCESSOR_PPCA2)
5871 return "a2";
5872
5873 /* Cell BE */
5874 if (rs6000_cpu == PROCESSOR_CELL)
5875 return "cell";
5876
5877 /* Titan */
5878 if (rs6000_cpu == PROCESSOR_TITAN)
5879 return "titan";
5880
5881 /* 500 series and 800 series */
5882 if (rs6000_cpu == PROCESSOR_MPCCORE)
5883 return "\"821\"";
5884
5885 #if 0
5886 /* This (and ppc64 below) are disabled here (for now at least) because
5887 PROCESSOR_POWERPC, PROCESSOR_POWERPC64, and PROCESSOR_COMMON
5888 are #define'd as some of these. Untangling that is a job for later. */
5889
5890 /* 600 series and 700 series, "classic" */
5891 if (rs6000_cpu == PROCESSOR_PPC601 || rs6000_cpu == PROCESSOR_PPC603
5892 || rs6000_cpu == PROCESSOR_PPC604 || rs6000_cpu == PROCESSOR_PPC604e
5893 || rs6000_cpu == PROCESSOR_PPC750)
5894 return "ppc";
5895 #endif
5896
5897 /* Classic with AltiVec, "G4" */
5898 if (rs6000_cpu == PROCESSOR_PPC7400 || rs6000_cpu == PROCESSOR_PPC7450)
5899 return "\"7450\"";
5900
5901 #if 0
5902 /* The older 64-bit CPUs */
5903 if (rs6000_cpu == PROCESSOR_PPC620 || rs6000_cpu == PROCESSOR_PPC630
5904 || rs6000_cpu == PROCESSOR_RS64A)
5905 return "ppc64";
5906 #endif
5907
5908 HOST_WIDE_INT flags = rs6000_isa_flags;
5909
5910 /* Disable the flags that should never influence the .machine selection. */
5911 flags &= ~(OPTION_MASK_PPC_GFXOPT | OPTION_MASK_PPC_GPOPT | OPTION_MASK_ISEL);
5912
5913 if ((flags & (ISA_3_1_MASKS_SERVER & ~ISA_3_0_MASKS_SERVER)) != 0)
5914 return "power10";
5915 if ((flags & (ISA_3_0_MASKS_SERVER & ~ISA_2_7_MASKS_SERVER)) != 0)
5916 return "power9";
5917 if ((flags & (ISA_2_7_MASKS_SERVER & ~ISA_2_6_MASKS_SERVER)) != 0)
5918 return "power8";
5919 if ((flags & (ISA_2_6_MASKS_SERVER & ~ISA_2_5_MASKS_SERVER)) != 0)
5920 return "power7";
5921 if ((flags & (ISA_2_5_MASKS_SERVER & ~ISA_2_4_MASKS)) != 0)
5922 return "power6";
5923 if ((flags & (ISA_2_4_MASKS & ~ISA_2_1_MASKS)) != 0)
5924 return "power5";
5925 if ((flags & ISA_2_1_MASKS) != 0)
5926 return "power4";
5927 if ((flags & OPTION_MASK_POWERPC64) != 0)
5928 return "ppc64";
5929 return "ppc";
5930 }
5931
5932 void
5933 emit_asm_machine (void)
5934 {
5935 fprintf (asm_out_file, "\t.machine %s\n", rs6000_machine);
5936 }
5937 #endif
5938
5939 /* Do anything needed at the start of the asm file. */
5940
5941 static void
5942 rs6000_file_start (void)
5943 {
5944 char buffer[80];
5945 const char *start = buffer;
5946 FILE *file = asm_out_file;
5947
5948 rs6000_default_cpu = TARGET_CPU_DEFAULT;
5949
5950 default_file_start ();
5951
5952 if (flag_verbose_asm)
5953 {
5954 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
5955
5956 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
5957 {
5958 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
5959 start = "";
5960 }
5961
5962 if (OPTION_SET_P (rs6000_cpu_index))
5963 {
5964 fprintf (file, "%s -mcpu=%s", start,
5965 processor_target_table[rs6000_cpu_index].name);
5966 start = "";
5967 }
5968
5969 if (OPTION_SET_P (rs6000_tune_index))
5970 {
5971 fprintf (file, "%s -mtune=%s", start,
5972 processor_target_table[rs6000_tune_index].name);
5973 start = "";
5974 }
5975
5976 if (PPC405_ERRATUM77)
5977 {
5978 fprintf (file, "%s PPC405CR_ERRATUM77", start);
5979 start = "";
5980 }
5981
5982 #ifdef USING_ELFOS_H
5983 switch (rs6000_sdata)
5984 {
5985 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
5986 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
5987 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
5988 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
5989 }
5990
5991 if (rs6000_sdata && g_switch_value)
5992 {
5993 fprintf (file, "%s -G %d", start,
5994 g_switch_value);
5995 start = "";
5996 }
5997 #endif
5998
5999 if (*start == '\0')
6000 putc ('\n', file);
6001 }
6002
6003 #ifdef USING_ELFOS_H
6004 rs6000_machine = rs6000_machine_from_flags ();
6005 emit_asm_machine ();
6006 #endif
6007
6008 if (DEFAULT_ABI == ABI_ELFv2)
6009 fprintf (file, "\t.abiversion 2\n");
6010 }
6011
6012 \f
6013 /* Return nonzero if this function is known to have a null epilogue. */
6014
6015 int
6016 direct_return (void)
6017 {
6018 if (reload_completed)
6019 {
6020 rs6000_stack_t *info = rs6000_stack_info ();
6021
6022 if (info->first_gp_reg_save == 32
6023 && info->first_fp_reg_save == 64
6024 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
6025 && ! info->lr_save_p
6026 && ! info->cr_save_p
6027 && info->vrsave_size == 0
6028 && ! info->push_p)
6029 return 1;
6030 }
6031
6032 return 0;
6033 }
6034
6035 /* Helper for num_insns_constant. Calculate number of instructions to
6036 load VALUE to a single gpr using combinations of addi, addis, ori,
6037 oris, sldi and rldimi instructions. */
6038
6039 static int
6040 num_insns_constant_gpr (HOST_WIDE_INT value)
6041 {
6042 /* signed constant loadable with addi */
6043 if (SIGNED_INTEGER_16BIT_P (value))
6044 return 1;
6045
6046 /* constant loadable with addis */
6047 else if ((value & 0xffff) == 0
6048 && (value >> 31 == -1 || value >> 31 == 0))
6049 return 1;
6050
6051 /* PADDI can support up to 34 bit signed integers. */
6052 else if (TARGET_PREFIXED && SIGNED_INTEGER_34BIT_P (value))
6053 return 1;
6054
6055 else if (TARGET_POWERPC64)
6056 {
6057 HOST_WIDE_INT low = sext_hwi (value, 32);
6058 HOST_WIDE_INT high = value >> 31;
6059
6060 if (high == 0 || high == -1)
6061 return 2;
6062
6063 high >>= 1;
6064
6065 if (low == 0 || low == high)
6066 return num_insns_constant_gpr (high) + 1;
6067 else if (high == 0)
6068 return num_insns_constant_gpr (low) + 1;
6069 else
6070 return (num_insns_constant_gpr (high)
6071 + num_insns_constant_gpr (low) + 1);
6072 }
6073
6074 else
6075 return 2;
6076 }
6077
6078 /* Helper for num_insns_constant. Allow constants formed by the
6079 num_insns_constant_gpr sequences, plus li -1, rldicl/rldicr/rlwinm,
6080 and handle modes that require multiple gprs. */
6081
6082 static int
6083 num_insns_constant_multi (HOST_WIDE_INT value, machine_mode mode)
6084 {
6085 int nregs = (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6086 int total = 0;
6087 while (nregs-- > 0)
6088 {
6089 HOST_WIDE_INT low = sext_hwi (value, BITS_PER_WORD);
6090 int insns = num_insns_constant_gpr (low);
6091 if (insns > 2
6092 /* We won't get more than 2 from num_insns_constant_gpr
6093 except when TARGET_POWERPC64 and mode is DImode or
6094 wider, so the register mode must be DImode. */
6095 && rs6000_is_valid_and_mask (GEN_INT (low), DImode))
6096 insns = 2;
6097 total += insns;
6098 /* If BITS_PER_WORD is the number of bits in HOST_WIDE_INT, doing
6099 it all at once would be UB. */
6100 value >>= (BITS_PER_WORD - 1);
6101 value >>= 1;
6102 }
6103 return total;
6104 }
6105
6106 /* Return the number of instructions it takes to form a constant in as
6107 many gprs are needed for MODE. */
6108
6109 int
6110 num_insns_constant (rtx op, machine_mode mode)
6111 {
6112 HOST_WIDE_INT val;
6113
6114 switch (GET_CODE (op))
6115 {
6116 case CONST_INT:
6117 val = INTVAL (op);
6118 break;
6119
6120 case CONST_WIDE_INT:
6121 {
6122 int insns = 0;
6123 for (int i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
6124 insns += num_insns_constant_multi (CONST_WIDE_INT_ELT (op, i),
6125 DImode);
6126 return insns;
6127 }
6128
6129 case CONST_DOUBLE:
6130 {
6131 const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (op);
6132
6133 if (mode == SFmode || mode == SDmode)
6134 {
6135 long l;
6136
6137 if (mode == SDmode)
6138 REAL_VALUE_TO_TARGET_DECIMAL32 (*rv, l);
6139 else
6140 REAL_VALUE_TO_TARGET_SINGLE (*rv, l);
6141 /* See the first define_split in rs6000.md handling a
6142 const_double_operand. */
6143 val = l;
6144 mode = SImode;
6145 }
6146 else if (mode == DFmode || mode == DDmode)
6147 {
6148 long l[2];
6149
6150 if (mode == DDmode)
6151 REAL_VALUE_TO_TARGET_DECIMAL64 (*rv, l);
6152 else
6153 REAL_VALUE_TO_TARGET_DOUBLE (*rv, l);
6154
6155 /* See the second (32-bit) and third (64-bit) define_split
6156 in rs6000.md handling a const_double_operand. */
6157 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 1] << 32;
6158 val |= l[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffffUL;
6159 mode = DImode;
6160 }
6161 else if (mode == TFmode || mode == TDmode
6162 || mode == KFmode || mode == IFmode)
6163 {
6164 long l[4];
6165 int insns;
6166
6167 if (mode == TDmode)
6168 REAL_VALUE_TO_TARGET_DECIMAL128 (*rv, l);
6169 else
6170 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*rv, l);
6171
6172 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 3] << 32;
6173 val |= l[WORDS_BIG_ENDIAN ? 1 : 2] & 0xffffffffUL;
6174 insns = num_insns_constant_multi (val, DImode);
6175 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 2 : 1] << 32;
6176 val |= l[WORDS_BIG_ENDIAN ? 3 : 0] & 0xffffffffUL;
6177 insns += num_insns_constant_multi (val, DImode);
6178 return insns;
6179 }
6180 else
6181 gcc_unreachable ();
6182 }
6183 break;
6184
6185 default:
6186 gcc_unreachable ();
6187 }
6188
6189 return num_insns_constant_multi (val, mode);
6190 }
6191
6192 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
6193 If the mode of OP is MODE_VECTOR_INT, this simply returns the
6194 corresponding element of the vector, but for V4SFmode, the
6195 corresponding "float" is interpreted as an SImode integer. */
6196
6197 HOST_WIDE_INT
6198 const_vector_elt_as_int (rtx op, unsigned int elt)
6199 {
6200 rtx tmp;
6201
6202 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
6203 gcc_assert (GET_MODE (op) != V2DImode
6204 && GET_MODE (op) != V2DFmode);
6205
6206 tmp = CONST_VECTOR_ELT (op, elt);
6207 if (GET_MODE (op) == V4SFmode)
6208 tmp = gen_lowpart (SImode, tmp);
6209 return INTVAL (tmp);
6210 }
6211
6212 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
6213 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
6214 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
6215 all items are set to the same value and contain COPIES replicas of the
6216 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
6217 operand and the others are set to the value of the operand's msb. */
6218
6219 static bool
6220 vspltis_constant (rtx op, unsigned step, unsigned copies)
6221 {
6222 machine_mode mode = GET_MODE (op);
6223 machine_mode inner = GET_MODE_INNER (mode);
6224
6225 unsigned i;
6226 unsigned nunits;
6227 unsigned bitsize;
6228 unsigned mask;
6229
6230 HOST_WIDE_INT val;
6231 HOST_WIDE_INT splat_val;
6232 HOST_WIDE_INT msb_val;
6233
6234 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
6235 return false;
6236
6237 nunits = GET_MODE_NUNITS (mode);
6238 bitsize = GET_MODE_BITSIZE (inner);
6239 mask = GET_MODE_MASK (inner);
6240
6241 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6242 splat_val = val;
6243 msb_val = val >= 0 ? 0 : -1;
6244
6245 if (val == 0 && step > 1)
6246 {
6247 /* Special case for loading most significant bit with step > 1.
6248 In that case, match 0s in all but step-1s elements, where match
6249 EASY_VECTOR_MSB. */
6250 for (i = 1; i < nunits; ++i)
6251 {
6252 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
6253 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
6254 if ((i & (step - 1)) == step - 1)
6255 {
6256 if (!EASY_VECTOR_MSB (elt_val, inner))
6257 break;
6258 }
6259 else if (elt_val)
6260 break;
6261 }
6262 if (i == nunits)
6263 return true;
6264 }
6265
6266 /* Construct the value to be splatted, if possible. If not, return 0. */
6267 for (i = 2; i <= copies; i *= 2)
6268 {
6269 HOST_WIDE_INT small_val;
6270 bitsize /= 2;
6271 small_val = splat_val >> bitsize;
6272 mask >>= bitsize;
6273 if (splat_val != ((HOST_WIDE_INT)
6274 ((unsigned HOST_WIDE_INT) small_val << bitsize)
6275 | (small_val & mask)))
6276 return false;
6277 splat_val = small_val;
6278 inner = smallest_int_mode_for_size (bitsize);
6279 }
6280
6281 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
6282 if (EASY_VECTOR_15 (splat_val))
6283 ;
6284
6285 /* Also check if we can splat, and then add the result to itself. Do so if
6286 the value is positive, of if the splat instruction is using OP's mode;
6287 for splat_val < 0, the splat and the add should use the same mode. */
6288 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
6289 && (splat_val >= 0 || (step == 1 && copies == 1)))
6290 ;
6291
6292 /* Also check if are loading up the most significant bit which can be done by
6293 loading up -1 and shifting the value left by -1. Only do this for
6294 step 1 here, for larger steps it is done earlier. */
6295 else if (EASY_VECTOR_MSB (splat_val, inner) && step == 1)
6296 ;
6297
6298 else
6299 return false;
6300
6301 /* Check if VAL is present in every STEP-th element, and the
6302 other elements are filled with its most significant bit. */
6303 for (i = 1; i < nunits; ++i)
6304 {
6305 HOST_WIDE_INT desired_val;
6306 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
6307 if ((i & (step - 1)) == 0)
6308 desired_val = val;
6309 else
6310 desired_val = msb_val;
6311
6312 if (desired_val != const_vector_elt_as_int (op, elt))
6313 return false;
6314 }
6315
6316 return true;
6317 }
6318
6319 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
6320 instruction, filling in the bottom elements with 0 or -1.
6321
6322 Return 0 if the constant cannot be generated with VSLDOI. Return positive
6323 for the number of zeroes to shift in, or negative for the number of 0xff
6324 bytes to shift in.
6325
6326 OP is a CONST_VECTOR. */
6327
6328 int
6329 vspltis_shifted (rtx op)
6330 {
6331 machine_mode mode = GET_MODE (op);
6332 machine_mode inner = GET_MODE_INNER (mode);
6333
6334 unsigned i, j;
6335 unsigned nunits;
6336 unsigned mask;
6337
6338 HOST_WIDE_INT val;
6339
6340 if (mode != V16QImode && mode != V8HImode && mode != V4SImode)
6341 return false;
6342
6343 /* We need to create pseudo registers to do the shift, so don't recognize
6344 shift vector constants after reload. Don't match it even before RA
6345 after split1 is done, because there won't be further splitting pass
6346 before RA to do the splitting. */
6347 if (!can_create_pseudo_p ()
6348 || (cfun->curr_properties & PROP_rtl_split_insns))
6349 return false;
6350
6351 nunits = GET_MODE_NUNITS (mode);
6352 mask = GET_MODE_MASK (inner);
6353
6354 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? 0 : nunits - 1);
6355
6356 /* Check if the value can really be the operand of a vspltis[bhw]. */
6357 if (EASY_VECTOR_15 (val))
6358 ;
6359
6360 /* Also check if we are loading up the most significant bit which can be done
6361 by loading up -1 and shifting the value left by -1. */
6362 else if (EASY_VECTOR_MSB (val, inner))
6363 ;
6364
6365 else
6366 return 0;
6367
6368 /* Check if VAL is present in every STEP-th element until we find elements
6369 that are 0 or all 1 bits. */
6370 for (i = 1; i < nunits; ++i)
6371 {
6372 unsigned elt = BYTES_BIG_ENDIAN ? i : nunits - 1 - i;
6373 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
6374
6375 /* If the value isn't the splat value, check for the remaining elements
6376 being 0/-1. */
6377 if (val != elt_val)
6378 {
6379 if (elt_val == 0)
6380 {
6381 for (j = i+1; j < nunits; ++j)
6382 {
6383 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6384 if (const_vector_elt_as_int (op, elt2) != 0)
6385 return 0;
6386 }
6387
6388 return (nunits - i) * GET_MODE_SIZE (inner);
6389 }
6390
6391 else if ((elt_val & mask) == mask)
6392 {
6393 for (j = i+1; j < nunits; ++j)
6394 {
6395 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6396 if ((const_vector_elt_as_int (op, elt2) & mask) != mask)
6397 return 0;
6398 }
6399
6400 return -((nunits - i) * GET_MODE_SIZE (inner));
6401 }
6402
6403 else
6404 return 0;
6405 }
6406 }
6407
6408 /* If all elements are equal, we don't need to do VSLDOI. */
6409 return 0;
6410 }
6411
6412
6413 /* Return non-zero (element mode byte size) if OP is of the given MODE
6414 and can be synthesized with a vspltisb, vspltish or vspltisw. */
6415
6416 int
6417 easy_altivec_constant (rtx op, machine_mode mode)
6418 {
6419 unsigned step, copies;
6420
6421 if (mode == VOIDmode)
6422 mode = GET_MODE (op);
6423 else if (mode != GET_MODE (op))
6424 return 0;
6425
6426 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
6427 constants. */
6428 if (mode == V2DFmode)
6429 return zero_constant (op, mode) ? 8 : 0;
6430
6431 else if (mode == V2DImode)
6432 {
6433 if (!CONST_INT_P (CONST_VECTOR_ELT (op, 0))
6434 || !CONST_INT_P (CONST_VECTOR_ELT (op, 1)))
6435 return 0;
6436
6437 if (zero_constant (op, mode))
6438 return 8;
6439
6440 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
6441 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
6442 return 8;
6443
6444 return 0;
6445 }
6446
6447 /* V1TImode is a special container for TImode. Ignore for now. */
6448 else if (mode == V1TImode)
6449 return 0;
6450
6451 /* Start with a vspltisw. */
6452 step = GET_MODE_NUNITS (mode) / 4;
6453 copies = 1;
6454
6455 if (vspltis_constant (op, step, copies))
6456 return 4;
6457
6458 /* Then try with a vspltish. */
6459 if (step == 1)
6460 copies <<= 1;
6461 else
6462 step >>= 1;
6463
6464 if (vspltis_constant (op, step, copies))
6465 return 2;
6466
6467 /* And finally a vspltisb. */
6468 if (step == 1)
6469 copies <<= 1;
6470 else
6471 step >>= 1;
6472
6473 if (vspltis_constant (op, step, copies))
6474 return 1;
6475
6476 if (vspltis_shifted (op) != 0)
6477 return GET_MODE_SIZE (GET_MODE_INNER (mode));
6478
6479 return 0;
6480 }
6481
6482 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
6483 result is OP. Abort if it is not possible. */
6484
6485 rtx
6486 gen_easy_altivec_constant (rtx op)
6487 {
6488 machine_mode mode = GET_MODE (op);
6489 int nunits = GET_MODE_NUNITS (mode);
6490 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6491 unsigned step = nunits / 4;
6492 unsigned copies = 1;
6493
6494 /* Start with a vspltisw. */
6495 if (vspltis_constant (op, step, copies))
6496 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
6497
6498 /* Then try with a vspltish. */
6499 if (step == 1)
6500 copies <<= 1;
6501 else
6502 step >>= 1;
6503
6504 if (vspltis_constant (op, step, copies))
6505 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
6506
6507 /* And finally a vspltisb. */
6508 if (step == 1)
6509 copies <<= 1;
6510 else
6511 step >>= 1;
6512
6513 if (vspltis_constant (op, step, copies))
6514 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
6515
6516 gcc_unreachable ();
6517 }
6518
6519 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6520 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6521
6522 Return the number of instructions needed (1 or 2) into the address pointed
6523 via NUM_INSNS_PTR.
6524
6525 Return the constant that is being split via CONSTANT_PTR. */
6526
6527 bool
6528 xxspltib_constant_p (rtx op,
6529 machine_mode mode,
6530 int *num_insns_ptr,
6531 int *constant_ptr)
6532 {
6533 size_t nunits = GET_MODE_NUNITS (mode);
6534 size_t i;
6535 HOST_WIDE_INT value;
6536 rtx element;
6537
6538 /* Set the returned values to out of bound values. */
6539 *num_insns_ptr = -1;
6540 *constant_ptr = 256;
6541
6542 if (!TARGET_P9_VECTOR)
6543 return false;
6544
6545 if (mode == VOIDmode)
6546 mode = GET_MODE (op);
6547
6548 else if (mode != GET_MODE (op) && GET_MODE (op) != VOIDmode)
6549 return false;
6550
6551 /* Handle (vec_duplicate <constant>). */
6552 if (GET_CODE (op) == VEC_DUPLICATE)
6553 {
6554 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6555 && mode != V2DImode)
6556 return false;
6557
6558 element = XEXP (op, 0);
6559 if (!CONST_INT_P (element))
6560 return false;
6561
6562 value = INTVAL (element);
6563 if (!IN_RANGE (value, -128, 127))
6564 return false;
6565 }
6566
6567 /* Handle (const_vector [...]). */
6568 else if (GET_CODE (op) == CONST_VECTOR)
6569 {
6570 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6571 && mode != V2DImode)
6572 return false;
6573
6574 element = CONST_VECTOR_ELT (op, 0);
6575 if (!CONST_INT_P (element))
6576 return false;
6577
6578 value = INTVAL (element);
6579 if (!IN_RANGE (value, -128, 127))
6580 return false;
6581
6582 for (i = 1; i < nunits; i++)
6583 {
6584 element = CONST_VECTOR_ELT (op, i);
6585 if (!CONST_INT_P (element))
6586 return false;
6587
6588 if (value != INTVAL (element))
6589 return false;
6590 }
6591 }
6592
6593 /* Handle integer constants being loaded into the upper part of the VSX
6594 register as a scalar. If the value isn't 0/-1, only allow it if the mode
6595 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */
6596 else if (CONST_INT_P (op))
6597 {
6598 if (!SCALAR_INT_MODE_P (mode))
6599 return false;
6600
6601 value = INTVAL (op);
6602 if (!IN_RANGE (value, -128, 127))
6603 return false;
6604
6605 if (!IN_RANGE (value, -1, 0))
6606 {
6607 if (!(reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID))
6608 return false;
6609
6610 if (EASY_VECTOR_15 (value))
6611 return false;
6612 }
6613 }
6614
6615 else
6616 return false;
6617
6618 /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
6619 sign extend. Special case 0/-1 to allow getting any VSX register instead
6620 of an Altivec register. */
6621 if ((mode == V4SImode || mode == V8HImode) && !IN_RANGE (value, -1, 0)
6622 && EASY_VECTOR_15 (value))
6623 return false;
6624
6625 /* Return # of instructions and the constant byte for XXSPLTIB. */
6626 if (mode == V16QImode)
6627 *num_insns_ptr = 1;
6628
6629 else if (IN_RANGE (value, -1, 0))
6630 *num_insns_ptr = 1;
6631
6632 /* Do not generate XXSPLTIB and a sign extend operation if we can generate a
6633 single XXSPLTIW or XXSPLTIDP instruction. */
6634 else if (vsx_prefixed_constant (op, mode))
6635 return false;
6636
6637 /* Return XXSPLITB followed by a sign extend operation to convert the
6638 constant to V8HImode or V4SImode. */
6639 else
6640 *num_insns_ptr = 2;
6641
6642 *constant_ptr = (int) value;
6643 return true;
6644 }
6645
6646 /* Return true if OP mode is V2DI and can be synthesized with ISA 2.07
6647 instructions vupkhsw and vspltisw.
6648
6649 Return the constant that is being split via CONSTANT_PTR. */
6650
6651 bool
6652 vspltisw_vupkhsw_constant_p (rtx op, machine_mode mode, int *constant_ptr)
6653 {
6654 HOST_WIDE_INT value;
6655 rtx elt;
6656
6657 if (!TARGET_P8_VECTOR)
6658 return false;
6659
6660 if (mode != V2DImode)
6661 return false;
6662
6663 if (!const_vec_duplicate_p (op, &elt))
6664 return false;
6665
6666 value = INTVAL (elt);
6667 if (value == 0 || value == 1
6668 || !EASY_VECTOR_15 (value))
6669 return false;
6670
6671 if (constant_ptr)
6672 *constant_ptr = (int) value;
6673 return true;
6674 }
6675
6676 const char *
6677 output_vec_const_move (rtx *operands)
6678 {
6679 int shift;
6680 machine_mode mode;
6681 rtx dest, vec;
6682
6683 dest = operands[0];
6684 vec = operands[1];
6685 mode = GET_MODE (dest);
6686
6687 if (TARGET_VSX)
6688 {
6689 bool dest_vmx_p = ALTIVEC_REGNO_P (REGNO (dest));
6690 int xxspltib_value = 256;
6691 int num_insns = -1;
6692
6693 if (zero_constant (vec, mode))
6694 {
6695 if (TARGET_P9_VECTOR)
6696 return "xxspltib %x0,0";
6697
6698 else if (dest_vmx_p)
6699 return "vspltisw %0,0";
6700
6701 else
6702 return "xxlxor %x0,%x0,%x0";
6703 }
6704
6705 if (all_ones_constant (vec, mode))
6706 {
6707 if (TARGET_P9_VECTOR)
6708 return "xxspltib %x0,255";
6709
6710 else if (dest_vmx_p)
6711 return "vspltisw %0,-1";
6712
6713 else if (TARGET_P8_VECTOR)
6714 return "xxlorc %x0,%x0,%x0";
6715
6716 else
6717 gcc_unreachable ();
6718 }
6719
6720 vec_const_128bit_type vsx_const;
6721 if (TARGET_POWER10 && vec_const_128bit_to_bytes (vec, mode, &vsx_const))
6722 {
6723 unsigned imm = constant_generates_lxvkq (&vsx_const);
6724 if (imm)
6725 {
6726 operands[2] = GEN_INT (imm);
6727 return "lxvkq %x0,%2";
6728 }
6729
6730 imm = constant_generates_xxspltiw (&vsx_const);
6731 if (imm)
6732 {
6733 operands[2] = GEN_INT (imm);
6734 return "xxspltiw %x0,%2";
6735 }
6736
6737 imm = constant_generates_xxspltidp (&vsx_const);
6738 if (imm)
6739 {
6740 operands[2] = GEN_INT (imm);
6741 return "xxspltidp %x0,%2";
6742 }
6743 }
6744
6745 if (TARGET_P9_VECTOR
6746 && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
6747 {
6748 if (num_insns == 1)
6749 {
6750 operands[2] = GEN_INT (xxspltib_value & 0xff);
6751 return "xxspltib %x0,%2";
6752 }
6753
6754 return "#";
6755 }
6756 }
6757
6758 if (TARGET_ALTIVEC)
6759 {
6760 rtx splat_vec;
6761
6762 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest)));
6763 if (zero_constant (vec, mode))
6764 return "vspltisw %0,0";
6765
6766 if (all_ones_constant (vec, mode))
6767 return "vspltisw %0,-1";
6768
6769 /* Do we need to construct a value using VSLDOI? */
6770 shift = vspltis_shifted (vec);
6771 if (shift != 0)
6772 return "#";
6773
6774 splat_vec = gen_easy_altivec_constant (vec);
6775 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
6776 operands[1] = XEXP (splat_vec, 0);
6777 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
6778 return "#";
6779
6780 switch (GET_MODE (splat_vec))
6781 {
6782 case E_V4SImode:
6783 return "vspltisw %0,%1";
6784
6785 case E_V8HImode:
6786 return "vspltish %0,%1";
6787
6788 case E_V16QImode:
6789 return "vspltisb %0,%1";
6790
6791 default:
6792 gcc_unreachable ();
6793 }
6794 }
6795
6796 gcc_unreachable ();
6797 }
6798
6799 /* Initialize vector TARGET to VALS. */
6800
6801 void
6802 rs6000_expand_vector_init (rtx target, rtx vals)
6803 {
6804 machine_mode mode = GET_MODE (target);
6805 machine_mode inner_mode = GET_MODE_INNER (mode);
6806 unsigned int n_elts = GET_MODE_NUNITS (mode);
6807 int n_var = 0, one_var = -1;
6808 bool all_same = true, all_const_zero = true;
6809 rtx x, mem;
6810 unsigned int i;
6811
6812 for (i = 0; i < n_elts; ++i)
6813 {
6814 x = XVECEXP (vals, 0, i);
6815 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
6816 ++n_var, one_var = i;
6817 else if (x != CONST0_RTX (inner_mode))
6818 all_const_zero = false;
6819
6820 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6821 all_same = false;
6822 }
6823
6824 if (n_var == 0)
6825 {
6826 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6827 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
6828 if ((int_vector_p || TARGET_VSX) && all_const_zero)
6829 {
6830 /* Zero register. */
6831 emit_move_insn (target, CONST0_RTX (mode));
6832 return;
6833 }
6834 else if (int_vector_p && easy_vector_constant (const_vec, mode))
6835 {
6836 /* Splat immediate. */
6837 emit_insn (gen_rtx_SET (target, const_vec));
6838 return;
6839 }
6840 else
6841 {
6842 /* Load from constant pool. */
6843 emit_move_insn (target, const_vec);
6844 return;
6845 }
6846 }
6847
6848 /* Double word values on VSX can use xxpermdi or lxvdsx. */
6849 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
6850 {
6851 rtx op[2];
6852 size_t i;
6853 size_t num_elements = all_same ? 1 : 2;
6854 for (i = 0; i < num_elements; i++)
6855 {
6856 op[i] = XVECEXP (vals, 0, i);
6857 /* Just in case there is a SUBREG with a smaller mode, do a
6858 conversion. */
6859 if (GET_MODE (op[i]) != inner_mode)
6860 {
6861 rtx tmp = gen_reg_rtx (inner_mode);
6862 convert_move (tmp, op[i], 0);
6863 op[i] = tmp;
6864 }
6865 /* Allow load with splat double word. */
6866 else if (MEM_P (op[i]))
6867 {
6868 if (!all_same)
6869 op[i] = force_reg (inner_mode, op[i]);
6870 }
6871 else if (!REG_P (op[i]))
6872 op[i] = force_reg (inner_mode, op[i]);
6873 }
6874
6875 if (all_same)
6876 {
6877 if (mode == V2DFmode)
6878 emit_insn (gen_vsx_splat_v2df (target, op[0]));
6879 else
6880 emit_insn (gen_vsx_splat_v2di (target, op[0]));
6881 }
6882 else
6883 {
6884 if (mode == V2DFmode)
6885 emit_insn (gen_vsx_concat_v2df (target, op[0], op[1]));
6886 else
6887 emit_insn (gen_vsx_concat_v2di (target, op[0], op[1]));
6888 }
6889 return;
6890 }
6891
6892 /* Special case initializing vector int if we are on 64-bit systems with
6893 direct move or we have the ISA 3.0 instructions. */
6894 if (mode == V4SImode && VECTOR_MEM_VSX_P (V4SImode)
6895 && TARGET_DIRECT_MOVE_64BIT)
6896 {
6897 if (all_same)
6898 {
6899 rtx element0 = XVECEXP (vals, 0, 0);
6900 if (MEM_P (element0))
6901 element0 = rs6000_force_indexed_or_indirect_mem (element0);
6902 else
6903 element0 = force_reg (SImode, element0);
6904
6905 if (TARGET_P9_VECTOR)
6906 emit_insn (gen_vsx_splat_v4si (target, element0));
6907 else
6908 {
6909 rtx tmp = gen_reg_rtx (DImode);
6910 emit_insn (gen_zero_extendsidi2 (tmp, element0));
6911 emit_insn (gen_vsx_splat_v4si_di (target, tmp));
6912 }
6913 return;
6914 }
6915 else
6916 {
6917 rtx elements[4];
6918 size_t i;
6919
6920 for (i = 0; i < 4; i++)
6921 elements[i] = force_reg (SImode, XVECEXP (vals, 0, i));
6922
6923 emit_insn (gen_vsx_init_v4si (target, elements[0], elements[1],
6924 elements[2], elements[3]));
6925 return;
6926 }
6927 }
6928
6929 /* With single precision floating point on VSX, know that internally single
6930 precision is actually represented as a double, and either make 2 V2DF
6931 vectors, and convert these vectors to single precision, or do one
6932 conversion, and splat the result to the other elements. */
6933 if (mode == V4SFmode && VECTOR_MEM_VSX_P (V4SFmode))
6934 {
6935 if (all_same)
6936 {
6937 rtx element0 = XVECEXP (vals, 0, 0);
6938
6939 if (TARGET_P9_VECTOR)
6940 {
6941 if (MEM_P (element0))
6942 element0 = rs6000_force_indexed_or_indirect_mem (element0);
6943
6944 emit_insn (gen_vsx_splat_v4sf (target, element0));
6945 }
6946
6947 else
6948 {
6949 rtx freg = gen_reg_rtx (V4SFmode);
6950 rtx sreg = force_reg (SFmode, element0);
6951 rtx cvt = (TARGET_XSCVDPSPN
6952 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
6953 : gen_vsx_xscvdpsp_scalar (freg, sreg));
6954
6955 emit_insn (cvt);
6956 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg,
6957 const0_rtx));
6958 }
6959 }
6960 else
6961 {
6962 if (TARGET_P8_VECTOR && TARGET_POWERPC64)
6963 {
6964 rtx tmp_sf[4];
6965 rtx tmp_si[4];
6966 rtx tmp_di[4];
6967 rtx mrg_di[4];
6968 for (i = 0; i < 4; i++)
6969 {
6970 tmp_si[i] = gen_reg_rtx (SImode);
6971 tmp_di[i] = gen_reg_rtx (DImode);
6972 mrg_di[i] = gen_reg_rtx (DImode);
6973 tmp_sf[i] = force_reg (SFmode, XVECEXP (vals, 0, i));
6974 emit_insn (gen_movsi_from_sf (tmp_si[i], tmp_sf[i]));
6975 emit_insn (gen_zero_extendsidi2 (tmp_di[i], tmp_si[i]));
6976 }
6977
6978 if (!BYTES_BIG_ENDIAN)
6979 {
6980 std::swap (tmp_di[0], tmp_di[1]);
6981 std::swap (tmp_di[2], tmp_di[3]);
6982 }
6983
6984 emit_insn (gen_ashldi3 (mrg_di[0], tmp_di[0], GEN_INT (32)));
6985 emit_insn (gen_iordi3 (mrg_di[1], mrg_di[0], tmp_di[1]));
6986 emit_insn (gen_ashldi3 (mrg_di[2], tmp_di[2], GEN_INT (32)));
6987 emit_insn (gen_iordi3 (mrg_di[3], mrg_di[2], tmp_di[3]));
6988
6989 rtx tmp_v2di = gen_reg_rtx (V2DImode);
6990 emit_insn (gen_vsx_concat_v2di (tmp_v2di, mrg_di[1], mrg_di[3]));
6991 emit_move_insn (target, gen_lowpart (V4SFmode, tmp_v2di));
6992 }
6993 else
6994 {
6995 rtx dbl_even = gen_reg_rtx (V2DFmode);
6996 rtx dbl_odd = gen_reg_rtx (V2DFmode);
6997 rtx flt_even = gen_reg_rtx (V4SFmode);
6998 rtx flt_odd = gen_reg_rtx (V4SFmode);
6999 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
7000 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
7001 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
7002 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
7003
7004 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
7005 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
7006 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
7007 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
7008 rs6000_expand_extract_even (target, flt_even, flt_odd);
7009 }
7010 }
7011 return;
7012 }
7013
7014 /* Special case initializing vector short/char that are splats if we are on
7015 64-bit systems with direct move. */
7016 if (all_same && TARGET_DIRECT_MOVE_64BIT
7017 && (mode == V16QImode || mode == V8HImode))
7018 {
7019 rtx op0 = XVECEXP (vals, 0, 0);
7020 rtx di_tmp = gen_reg_rtx (DImode);
7021
7022 if (!REG_P (op0))
7023 op0 = force_reg (GET_MODE_INNER (mode), op0);
7024
7025 if (mode == V16QImode)
7026 {
7027 emit_insn (gen_zero_extendqidi2 (di_tmp, op0));
7028 emit_insn (gen_vsx_vspltb_di (target, di_tmp));
7029 return;
7030 }
7031
7032 if (mode == V8HImode)
7033 {
7034 emit_insn (gen_zero_extendhidi2 (di_tmp, op0));
7035 emit_insn (gen_vsx_vsplth_di (target, di_tmp));
7036 return;
7037 }
7038 }
7039
7040 /* Store value to stack temp. Load vector element. Splat. However, splat
7041 of 64-bit items is not supported on Altivec. */
7042 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
7043 {
7044 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
7045 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
7046 XVECEXP (vals, 0, 0));
7047 x = gen_rtx_UNSPEC (VOIDmode,
7048 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
7049 emit_insn (gen_rtx_PARALLEL (VOIDmode,
7050 gen_rtvec (2,
7051 gen_rtx_SET (target, mem),
7052 x)));
7053 x = gen_rtx_VEC_SELECT (inner_mode, target,
7054 gen_rtx_PARALLEL (VOIDmode,
7055 gen_rtvec (1, const0_rtx)));
7056 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
7057 return;
7058 }
7059
7060 /* One field is non-constant. Load constant then overwrite
7061 varying field. */
7062 if (n_var == 1)
7063 {
7064 rtx copy = copy_rtx (vals);
7065
7066 /* Load constant part of vector, substitute neighboring value for
7067 varying element. */
7068 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
7069 rs6000_expand_vector_init (target, copy);
7070
7071 /* Insert variable. */
7072 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var),
7073 GEN_INT (one_var));
7074 return;
7075 }
7076
7077 if (TARGET_DIRECT_MOVE && (mode == V16QImode || mode == V8HImode))
7078 {
7079 rtx op[16];
7080 /* Force the values into word_mode registers. */
7081 for (i = 0; i < n_elts; i++)
7082 {
7083 rtx tmp = force_reg (inner_mode, XVECEXP (vals, 0, i));
7084 machine_mode tmode = TARGET_POWERPC64 ? DImode : SImode;
7085 op[i] = simplify_gen_subreg (tmode, tmp, inner_mode, 0);
7086 }
7087
7088 /* Take unsigned char big endianness on 64bit as example for below
7089 construction, the input values are: A, B, C, D, ..., O, P. */
7090
7091 if (TARGET_DIRECT_MOVE_128)
7092 {
7093 /* Move to VSX register with vec_concat, each has 2 values.
7094 eg: vr1[0] = { xxxxxxxA, xxxxxxxB };
7095 vr1[1] = { xxxxxxxC, xxxxxxxD };
7096 ...
7097 vr1[7] = { xxxxxxxO, xxxxxxxP }; */
7098 rtx vr1[8];
7099 for (i = 0; i < n_elts / 2; i++)
7100 {
7101 vr1[i] = gen_reg_rtx (V2DImode);
7102 emit_insn (gen_vsx_concat_v2di (vr1[i], op[i * 2],
7103 op[i * 2 + 1]));
7104 }
7105
7106 /* Pack vectors with 2 values into vectors with 4 values.
7107 eg: vr2[0] = { xxxAxxxB, xxxCxxxD };
7108 vr2[1] = { xxxExxxF, xxxGxxxH };
7109 vr2[1] = { xxxIxxxJ, xxxKxxxL };
7110 vr2[3] = { xxxMxxxN, xxxOxxxP }; */
7111 rtx vr2[4];
7112 for (i = 0; i < n_elts / 4; i++)
7113 {
7114 vr2[i] = gen_reg_rtx (V4SImode);
7115 emit_insn (gen_altivec_vpkudum (vr2[i], vr1[i * 2],
7116 vr1[i * 2 + 1]));
7117 }
7118
7119 /* Pack vectors with 4 values into vectors with 8 values.
7120 eg: vr3[0] = { xAxBxCxD, xExFxGxH };
7121 vr3[1] = { xIxJxKxL, xMxNxOxP }; */
7122 rtx vr3[2];
7123 for (i = 0; i < n_elts / 8; i++)
7124 {
7125 vr3[i] = gen_reg_rtx (V8HImode);
7126 emit_insn (gen_altivec_vpkuwum (vr3[i], vr2[i * 2],
7127 vr2[i * 2 + 1]));
7128 }
7129
7130 /* If it's V8HImode, it's done and return it. */
7131 if (mode == V8HImode)
7132 {
7133 emit_insn (gen_rtx_SET (target, vr3[0]));
7134 return;
7135 }
7136
7137 /* Pack vectors with 8 values into 16 values. */
7138 rtx res = gen_reg_rtx (V16QImode);
7139 emit_insn (gen_altivec_vpkuhum (res, vr3[0], vr3[1]));
7140 emit_insn (gen_rtx_SET (target, res));
7141 }
7142 else
7143 {
7144 rtx (*merge_v16qi) (rtx, rtx, rtx) = NULL;
7145 rtx (*merge_v8hi) (rtx, rtx, rtx) = NULL;
7146 rtx (*merge_v4si) (rtx, rtx, rtx) = NULL;
7147 rtx perm_idx;
7148
7149 /* Set up some common gen routines and values. */
7150 if (BYTES_BIG_ENDIAN)
7151 {
7152 if (mode == V16QImode)
7153 {
7154 merge_v16qi = gen_altivec_vmrghb;
7155 merge_v8hi = gen_altivec_vmrglh;
7156 }
7157 else
7158 merge_v8hi = gen_altivec_vmrghh;
7159
7160 merge_v4si = gen_altivec_vmrglw;
7161 perm_idx = GEN_INT (3);
7162 }
7163 else
7164 {
7165 if (mode == V16QImode)
7166 {
7167 merge_v16qi = gen_altivec_vmrglb;
7168 merge_v8hi = gen_altivec_vmrghh;
7169 }
7170 else
7171 merge_v8hi = gen_altivec_vmrglh;
7172
7173 merge_v4si = gen_altivec_vmrghw;
7174 perm_idx = GEN_INT (0);
7175 }
7176
7177 /* Move to VSX register with direct move.
7178 eg: vr_qi[0] = { xxxxxxxA, xxxxxxxx };
7179 vr_qi[1] = { xxxxxxxB, xxxxxxxx };
7180 ...
7181 vr_qi[15] = { xxxxxxxP, xxxxxxxx }; */
7182 rtx vr_qi[16];
7183 for (i = 0; i < n_elts; i++)
7184 {
7185 vr_qi[i] = gen_reg_rtx (V16QImode);
7186 if (TARGET_POWERPC64)
7187 emit_insn (gen_p8_mtvsrd_v16qidi2 (vr_qi[i], op[i]));
7188 else
7189 emit_insn (gen_p8_mtvsrwz_v16qisi2 (vr_qi[i], op[i]));
7190 }
7191
7192 /* Merge/move to vector short.
7193 eg: vr_hi[0] = { xxxxxxxx, xxxxxxAB };
7194 vr_hi[1] = { xxxxxxxx, xxxxxxCD };
7195 ...
7196 vr_hi[7] = { xxxxxxxx, xxxxxxOP }; */
7197 rtx vr_hi[8];
7198 for (i = 0; i < 8; i++)
7199 {
7200 rtx tmp = vr_qi[i];
7201 if (mode == V16QImode)
7202 {
7203 tmp = gen_reg_rtx (V16QImode);
7204 emit_insn (merge_v16qi (tmp, vr_qi[2 * i], vr_qi[2 * i + 1]));
7205 }
7206 vr_hi[i] = gen_reg_rtx (V8HImode);
7207 emit_move_insn (vr_hi[i], gen_lowpart (V8HImode, tmp));
7208 }
7209
7210 /* Merge vector short to vector int.
7211 eg: vr_si[0] = { xxxxxxxx, xxxxABCD };
7212 vr_si[1] = { xxxxxxxx, xxxxEFGH };
7213 ...
7214 vr_si[3] = { xxxxxxxx, xxxxMNOP }; */
7215 rtx vr_si[4];
7216 for (i = 0; i < 4; i++)
7217 {
7218 rtx tmp = gen_reg_rtx (V8HImode);
7219 emit_insn (merge_v8hi (tmp, vr_hi[2 * i], vr_hi[2 * i + 1]));
7220 vr_si[i] = gen_reg_rtx (V4SImode);
7221 emit_move_insn (vr_si[i], gen_lowpart (V4SImode, tmp));
7222 }
7223
7224 /* Merge vector int to vector long.
7225 eg: vr_di[0] = { xxxxxxxx, ABCDEFGH };
7226 vr_di[1] = { xxxxxxxx, IJKLMNOP }; */
7227 rtx vr_di[2];
7228 for (i = 0; i < 2; i++)
7229 {
7230 rtx tmp = gen_reg_rtx (V4SImode);
7231 emit_insn (merge_v4si (tmp, vr_si[2 * i], vr_si[2 * i + 1]));
7232 vr_di[i] = gen_reg_rtx (V2DImode);
7233 emit_move_insn (vr_di[i], gen_lowpart (V2DImode, tmp));
7234 }
7235
7236 rtx res = gen_reg_rtx (V2DImode);
7237 emit_insn (gen_vsx_xxpermdi_v2di (res, vr_di[0], vr_di[1], perm_idx));
7238 emit_insn (gen_rtx_SET (target, gen_lowpart (mode, res)));
7239 }
7240
7241 return;
7242 }
7243
7244 /* Construct the vector in memory one field at a time
7245 and load the whole vector. */
7246 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7247 for (i = 0; i < n_elts; i++)
7248 emit_move_insn (adjust_address_nv (mem, inner_mode,
7249 i * GET_MODE_SIZE (inner_mode)),
7250 XVECEXP (vals, 0, i));
7251 emit_move_insn (target, mem);
7252 }
7253
7254 /* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX
7255 is variable and also counts by vector element size for p9 and above. */
7256
7257 static void
7258 rs6000_expand_vector_set_var_p9 (rtx target, rtx val, rtx idx)
7259 {
7260 machine_mode mode = GET_MODE (target);
7261
7262 gcc_assert (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (idx));
7263
7264 machine_mode inner_mode = GET_MODE (val);
7265
7266 int width = GET_MODE_SIZE (inner_mode);
7267
7268 gcc_assert (width >= 1 && width <= 8);
7269
7270 int shift = exact_log2 (width);
7271
7272 machine_mode idx_mode = GET_MODE (idx);
7273
7274 machine_mode shift_mode;
7275 /* Gen function pointers for shifting left and generation of permutation
7276 control vectors. */
7277 rtx (*gen_ashl) (rtx, rtx, rtx);
7278 rtx (*gen_pcvr1) (rtx, rtx);
7279 rtx (*gen_pcvr2) (rtx, rtx);
7280
7281 if (TARGET_POWERPC64)
7282 {
7283 shift_mode = DImode;
7284 gen_ashl = gen_ashldi3;
7285 gen_pcvr1 = BYTES_BIG_ENDIAN ? gen_altivec_lvsl_reg_di
7286 : gen_altivec_lvsr_reg_di;
7287 gen_pcvr2 = BYTES_BIG_ENDIAN ? gen_altivec_lvsr_reg_di
7288 : gen_altivec_lvsl_reg_di;
7289 }
7290 else
7291 {
7292 shift_mode = SImode;
7293 gen_ashl = gen_ashlsi3;
7294 gen_pcvr1 = BYTES_BIG_ENDIAN ? gen_altivec_lvsl_reg_si
7295 : gen_altivec_lvsr_reg_si;
7296 gen_pcvr2 = BYTES_BIG_ENDIAN ? gen_altivec_lvsr_reg_si
7297 : gen_altivec_lvsl_reg_si;
7298 }
7299 /* Generate the IDX for permute shift, width is the vector element size.
7300 idx = idx * width. */
7301 rtx tmp = gen_reg_rtx (shift_mode);
7302 idx = convert_modes (shift_mode, idx_mode, idx, 1);
7303
7304 emit_insn (gen_ashl (tmp, idx, GEN_INT (shift)));
7305
7306 /* Generate one permutation control vector used for rotating the element
7307 at to-insert position to element zero in target vector. lvsl is
7308 used for big endianness while lvsr is used for little endianness:
7309 lvs[lr] v1,0,idx. */
7310 rtx pcvr1 = gen_reg_rtx (V16QImode);
7311 emit_insn (gen_pcvr1 (pcvr1, tmp));
7312
7313 rtx sub_target = simplify_gen_subreg (V16QImode, target, mode, 0);
7314 rtx perm1 = gen_altivec_vperm_v8hiv16qi (sub_target, sub_target, sub_target,
7315 pcvr1);
7316 emit_insn (perm1);
7317
7318 /* Insert val into element 0 of target vector. */
7319 rs6000_expand_vector_set (target, val, const0_rtx);
7320
7321 /* Rotate back with a reversed permutation control vector generated from:
7322 lvs[rl] v2,0,idx. */
7323 rtx pcvr2 = gen_reg_rtx (V16QImode);
7324 emit_insn (gen_pcvr2 (pcvr2, tmp));
7325
7326 rtx perm2 = gen_altivec_vperm_v8hiv16qi (sub_target, sub_target, sub_target,
7327 pcvr2);
7328 emit_insn (perm2);
7329 }
7330
7331 /* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX
7332 is variable and also counts by vector element size for p7 & p8. */
7333
7334 static void
7335 rs6000_expand_vector_set_var_p7 (rtx target, rtx val, rtx idx)
7336 {
7337 machine_mode mode = GET_MODE (target);
7338
7339 gcc_assert (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (idx));
7340
7341 machine_mode inner_mode = GET_MODE (val);
7342 HOST_WIDE_INT mode_mask = GET_MODE_MASK (inner_mode);
7343
7344 int width = GET_MODE_SIZE (inner_mode);
7345 gcc_assert (width >= 1 && width <= 4);
7346
7347 int shift = exact_log2 (width);
7348
7349 machine_mode idx_mode = GET_MODE (idx);
7350
7351 machine_mode shift_mode;
7352 rtx (*gen_ashl)(rtx, rtx, rtx);
7353 rtx (*gen_add)(rtx, rtx, rtx);
7354 rtx (*gen_sub)(rtx, rtx, rtx);
7355 rtx (*gen_lvsl)(rtx, rtx);
7356
7357 if (TARGET_POWERPC64)
7358 {
7359 shift_mode = DImode;
7360 gen_ashl = gen_ashldi3;
7361 gen_add = gen_adddi3;
7362 gen_sub = gen_subdi3;
7363 gen_lvsl = gen_altivec_lvsl_reg_di;
7364 }
7365 else
7366 {
7367 shift_mode = SImode;
7368 gen_ashl = gen_ashlsi3;
7369 gen_add = gen_addsi3;
7370 gen_sub = gen_subsi3;
7371 gen_lvsl = gen_altivec_lvsl_reg_si;
7372 }
7373
7374 /* idx = idx * width. */
7375 rtx tmp = gen_reg_rtx (shift_mode);
7376 idx = convert_modes (shift_mode, idx_mode, idx, 1);
7377
7378 emit_insn (gen_ashl (tmp, idx, GEN_INT (shift)));
7379
7380 /* For LE: idx = idx + 8. */
7381 if (!BYTES_BIG_ENDIAN)
7382 emit_insn (gen_add (tmp, tmp, GEN_INT (8)));
7383 else
7384 emit_insn (gen_sub (tmp, GEN_INT (24 - width), tmp));
7385
7386 /* lxv vs33, mask.
7387 DImode: 0xffffffffffffffff0000000000000000
7388 SImode: 0x00000000ffffffff0000000000000000
7389 HImode: 0x000000000000ffff0000000000000000.
7390 QImode: 0x00000000000000ff0000000000000000. */
7391 rtx mask = gen_reg_rtx (V16QImode);
7392 rtx mask_v2di = gen_reg_rtx (V2DImode);
7393 rtvec v = rtvec_alloc (2);
7394 if (!BYTES_BIG_ENDIAN)
7395 {
7396 RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (DImode, 0);
7397 RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (DImode, mode_mask);
7398 }
7399 else
7400 {
7401 RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (DImode, mode_mask);
7402 RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (DImode, 0);
7403 }
7404 emit_insn (gen_vec_initv2didi (mask_v2di, gen_rtx_PARALLEL (V2DImode, v)));
7405 rtx sub_mask = simplify_gen_subreg (V16QImode, mask_v2di, V2DImode, 0);
7406 emit_insn (gen_rtx_SET (mask, sub_mask));
7407
7408 /* mtvsrd[wz] f0,tmp_val. */
7409 rtx tmp_val = gen_reg_rtx (SImode);
7410 if (inner_mode == E_SFmode)
7411 if (TARGET_DIRECT_MOVE_64BIT)
7412 emit_insn (gen_movsi_from_sf (tmp_val, val));
7413 else
7414 {
7415 rtx stack = rs6000_allocate_stack_temp (SFmode, false, true);
7416 emit_insn (gen_movsf_hardfloat (stack, val));
7417 rtx stack2 = copy_rtx (stack);
7418 PUT_MODE (stack2, SImode);
7419 emit_move_insn (tmp_val, stack2);
7420 }
7421 else
7422 tmp_val = force_reg (SImode, val);
7423
7424 rtx val_v16qi = gen_reg_rtx (V16QImode);
7425 rtx val_v2di = gen_reg_rtx (V2DImode);
7426 rtvec vec_val = rtvec_alloc (2);
7427 if (!BYTES_BIG_ENDIAN)
7428 {
7429 RTVEC_ELT (vec_val, 0) = gen_rtx_CONST_INT (DImode, 0);
7430 RTVEC_ELT (vec_val, 1) = tmp_val;
7431 }
7432 else
7433 {
7434 RTVEC_ELT (vec_val, 0) = tmp_val;
7435 RTVEC_ELT (vec_val, 1) = gen_rtx_CONST_INT (DImode, 0);
7436 }
7437 emit_insn (
7438 gen_vec_initv2didi (val_v2di, gen_rtx_PARALLEL (V2DImode, vec_val)));
7439 rtx sub_val = simplify_gen_subreg (V16QImode, val_v2di, V2DImode, 0);
7440 emit_insn (gen_rtx_SET (val_v16qi, sub_val));
7441
7442 /* lvsl 13,0,idx. */
7443 rtx pcv = gen_reg_rtx (V16QImode);
7444 emit_insn (gen_lvsl (pcv, tmp));
7445
7446 /* vperm 1,1,1,13. */
7447 /* vperm 0,0,0,13. */
7448 rtx val_perm = gen_reg_rtx (V16QImode);
7449 rtx mask_perm = gen_reg_rtx (V16QImode);
7450 emit_insn (gen_altivec_vperm_v8hiv16qi (val_perm, val_v16qi, val_v16qi, pcv));
7451 emit_insn (gen_altivec_vperm_v8hiv16qi (mask_perm, mask, mask, pcv));
7452
7453 rtx target_v16qi = simplify_gen_subreg (V16QImode, target, mode, 0);
7454
7455 /* xxsel 34,34,32,33. */
7456 emit_insn (
7457 gen_vector_select_v16qi (target_v16qi, target_v16qi, val_perm, mask_perm));
7458 }
7459
7460 /* Set field ELT_RTX of TARGET to VAL. */
7461
7462 void
7463 rs6000_expand_vector_set (rtx target, rtx val, rtx elt_rtx)
7464 {
7465 machine_mode mode = GET_MODE (target);
7466 machine_mode inner_mode = GET_MODE_INNER (mode);
7467 rtx reg = gen_reg_rtx (mode);
7468 rtx mask, mem, x;
7469 int width = GET_MODE_SIZE (inner_mode);
7470 int i;
7471
7472 val = force_reg (GET_MODE (val), val);
7473
7474 if (VECTOR_MEM_VSX_P (mode))
7475 {
7476 if (!CONST_INT_P (elt_rtx))
7477 {
7478 /* For V2DI/V2DF, could leverage the P9 version to generate xxpermdi
7479 when elt_rtx is variable. */
7480 if ((TARGET_P9_VECTOR && TARGET_POWERPC64) || width == 8)
7481 {
7482 rs6000_expand_vector_set_var_p9 (target, val, elt_rtx);
7483 return;
7484 }
7485 else if (TARGET_VSX)
7486 {
7487 rs6000_expand_vector_set_var_p7 (target, val, elt_rtx);
7488 return;
7489 }
7490 else
7491 gcc_assert (CONST_INT_P (elt_rtx));
7492 }
7493
7494 rtx insn = NULL_RTX;
7495
7496 if (mode == V2DFmode)
7497 insn = gen_vsx_set_v2df (target, target, val, elt_rtx);
7498
7499 else if (mode == V2DImode)
7500 insn = gen_vsx_set_v2di (target, target, val, elt_rtx);
7501
7502 else if (TARGET_P9_VECTOR && TARGET_POWERPC64)
7503 {
7504 if (mode == V4SImode)
7505 insn = gen_vsx_set_v4si_p9 (target, target, val, elt_rtx);
7506 else if (mode == V8HImode)
7507 insn = gen_vsx_set_v8hi_p9 (target, target, val, elt_rtx);
7508 else if (mode == V16QImode)
7509 insn = gen_vsx_set_v16qi_p9 (target, target, val, elt_rtx);
7510 else if (mode == V4SFmode)
7511 insn = gen_vsx_set_v4sf_p9 (target, target, val, elt_rtx);
7512 }
7513
7514 if (insn)
7515 {
7516 emit_insn (insn);
7517 return;
7518 }
7519 }
7520
7521 /* Simplify setting single element vectors like V1TImode. */
7522 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode)
7523 && INTVAL (elt_rtx) == 0)
7524 {
7525 emit_move_insn (target, gen_lowpart (mode, val));
7526 return;
7527 }
7528
7529 /* Load single variable value. */
7530 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
7531 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
7532 x = gen_rtx_UNSPEC (VOIDmode,
7533 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
7534 emit_insn (gen_rtx_PARALLEL (VOIDmode,
7535 gen_rtvec (2,
7536 gen_rtx_SET (reg, mem),
7537 x)));
7538
7539 /* Linear sequence. */
7540 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
7541 for (i = 0; i < 16; ++i)
7542 XVECEXP (mask, 0, i) = GEN_INT (i);
7543
7544 /* Set permute mask to insert element into target. */
7545 for (i = 0; i < width; ++i)
7546 XVECEXP (mask, 0, INTVAL (elt_rtx) * width + i) = GEN_INT (i + 0x10);
7547 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
7548
7549 if (BYTES_BIG_ENDIAN)
7550 x = gen_rtx_UNSPEC (mode,
7551 gen_rtvec (3, target, reg,
7552 force_reg (V16QImode, x)),
7553 UNSPEC_VPERM);
7554 else
7555 {
7556 if (TARGET_P9_VECTOR)
7557 x = gen_rtx_UNSPEC (mode,
7558 gen_rtvec (3, reg, target,
7559 force_reg (V16QImode, x)),
7560 UNSPEC_VPERMR);
7561 else
7562 {
7563 /* Invert selector. We prefer to generate VNAND on P8 so
7564 that future fusion opportunities can kick in, but must
7565 generate VNOR elsewhere. */
7566 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
7567 rtx iorx = (TARGET_P8_VECTOR
7568 ? gen_rtx_IOR (V16QImode, notx, notx)
7569 : gen_rtx_AND (V16QImode, notx, notx));
7570 rtx tmp = gen_reg_rtx (V16QImode);
7571 emit_insn (gen_rtx_SET (tmp, iorx));
7572
7573 /* Permute with operands reversed and adjusted selector. */
7574 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
7575 UNSPEC_VPERM);
7576 }
7577 }
7578
7579 emit_insn (gen_rtx_SET (target, x));
7580 }
7581
7582 /* Extract field ELT from VEC into TARGET. */
7583
7584 void
7585 rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt)
7586 {
7587 machine_mode mode = GET_MODE (vec);
7588 machine_mode inner_mode = GET_MODE_INNER (mode);
7589 rtx mem;
7590
7591 if (VECTOR_MEM_VSX_P (mode) && CONST_INT_P (elt))
7592 {
7593 switch (mode)
7594 {
7595 default:
7596 break;
7597 case E_V1TImode:
7598 emit_move_insn (target, gen_lowpart (TImode, vec));
7599 break;
7600 case E_V2DFmode:
7601 emit_insn (gen_vsx_extract_v2df (target, vec, elt));
7602 return;
7603 case E_V2DImode:
7604 emit_insn (gen_vsx_extract_v2di (target, vec, elt));
7605 return;
7606 case E_V4SFmode:
7607 emit_insn (gen_vsx_extract_v4sf (target, vec, elt));
7608 return;
7609 case E_V16QImode:
7610 if (TARGET_DIRECT_MOVE_64BIT)
7611 {
7612 emit_insn (gen_vsx_extract_v16qi (target, vec, elt));
7613 return;
7614 }
7615 else
7616 break;
7617 case E_V8HImode:
7618 if (TARGET_DIRECT_MOVE_64BIT)
7619 {
7620 emit_insn (gen_vsx_extract_v8hi (target, vec, elt));
7621 return;
7622 }
7623 else
7624 break;
7625 case E_V4SImode:
7626 if (TARGET_DIRECT_MOVE_64BIT)
7627 {
7628 emit_insn (gen_vsx_extract_v4si (target, vec, elt));
7629 return;
7630 }
7631 break;
7632 }
7633 }
7634 else if (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (elt)
7635 && TARGET_DIRECT_MOVE_64BIT)
7636 {
7637 if (GET_MODE (elt) != DImode)
7638 {
7639 rtx tmp = gen_reg_rtx (DImode);
7640 convert_move (tmp, elt, 0);
7641 elt = tmp;
7642 }
7643 else if (!REG_P (elt))
7644 elt = force_reg (DImode, elt);
7645
7646 switch (mode)
7647 {
7648 case E_V1TImode:
7649 emit_move_insn (target, gen_lowpart (TImode, vec));
7650 return;
7651
7652 case E_V2DFmode:
7653 emit_insn (gen_vsx_extract_v2df_var (target, vec, elt));
7654 return;
7655
7656 case E_V2DImode:
7657 emit_insn (gen_vsx_extract_v2di_var (target, vec, elt));
7658 return;
7659
7660 case E_V4SFmode:
7661 emit_insn (gen_vsx_extract_v4sf_var (target, vec, elt));
7662 return;
7663
7664 case E_V4SImode:
7665 emit_insn (gen_vsx_extract_v4si_var (target, vec, elt));
7666 return;
7667
7668 case E_V8HImode:
7669 emit_insn (gen_vsx_extract_v8hi_var (target, vec, elt));
7670 return;
7671
7672 case E_V16QImode:
7673 emit_insn (gen_vsx_extract_v16qi_var (target, vec, elt));
7674 return;
7675
7676 default:
7677 gcc_unreachable ();
7678 }
7679 }
7680
7681 /* Allocate mode-sized buffer. */
7682 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7683
7684 emit_move_insn (mem, vec);
7685 if (CONST_INT_P (elt))
7686 {
7687 int modulo_elt = INTVAL (elt) % GET_MODE_NUNITS (mode);
7688
7689 /* Add offset to field within buffer matching vector element. */
7690 mem = adjust_address_nv (mem, inner_mode,
7691 modulo_elt * GET_MODE_SIZE (inner_mode));
7692 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
7693 }
7694 else
7695 {
7696 unsigned int ele_size = GET_MODE_SIZE (inner_mode);
7697 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (mode) - 1);
7698
7699 elt = gen_rtx_AND (Pmode, elt, num_ele_m1);
7700 if (ele_size > 1)
7701 elt = gen_rtx_MULT (Pmode, elt, GEN_INT (ele_size));
7702 rtx new_addr = gen_rtx_PLUS (Pmode, XEXP (mem, 0), elt);
7703 new_addr = change_address (mem, inner_mode, new_addr);
7704 emit_move_insn (target, new_addr);
7705 }
7706 }
7707
7708 /* Return the offset within a memory object (MEM) of a vector type to a given
7709 element within the vector (ELEMENT) with an element size (SCALAR_SIZE). If
7710 the element is constant, we return a constant integer.
7711
7712 Otherwise, we use a base register temporary to calculate the offset after
7713 masking it to fit within the bounds of the vector and scaling it. The
7714 masking is required by the 64-bit ELF version 2 ABI for the vec_extract
7715 built-in function. */
7716
7717 static rtx
7718 get_vector_offset (rtx mem, rtx element, rtx base_tmp, unsigned scalar_size)
7719 {
7720 if (CONST_INT_P (element))
7721 return GEN_INT (INTVAL (element) * scalar_size);
7722
7723 /* All insns should use the 'Q' constraint (address is a single register) if
7724 the element number is not a constant. */
7725 gcc_assert (satisfies_constraint_Q (mem));
7726
7727 /* Mask the element to make sure the element number is between 0 and the
7728 maximum number of elements - 1 so that we don't generate an address
7729 outside the vector. */
7730 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (GET_MODE (mem)) - 1);
7731 rtx and_op = gen_rtx_AND (Pmode, element, num_ele_m1);
7732 emit_insn (gen_rtx_SET (base_tmp, and_op));
7733
7734 /* Shift the element to get the byte offset from the element number. */
7735 int shift = exact_log2 (scalar_size);
7736 gcc_assert (shift >= 0);
7737
7738 if (shift > 0)
7739 {
7740 rtx shift_op = gen_rtx_ASHIFT (Pmode, base_tmp, GEN_INT (shift));
7741 emit_insn (gen_rtx_SET (base_tmp, shift_op));
7742 }
7743
7744 return base_tmp;
7745 }
7746
7747 /* Helper function update PC-relative addresses when we are adjusting a memory
7748 address (ADDR) to a vector to point to a scalar field within the vector with
7749 a constant offset (ELEMENT_OFFSET). If the address is not valid, we can
7750 use the base register temporary (BASE_TMP) to form the address. */
7751
7752 static rtx
7753 adjust_vec_address_pcrel (rtx addr, rtx element_offset, rtx base_tmp)
7754 {
7755 rtx new_addr = NULL;
7756
7757 gcc_assert (CONST_INT_P (element_offset));
7758
7759 if (GET_CODE (addr) == CONST)
7760 addr = XEXP (addr, 0);
7761
7762 if (GET_CODE (addr) == PLUS)
7763 {
7764 rtx op0 = XEXP (addr, 0);
7765 rtx op1 = XEXP (addr, 1);
7766
7767 if (CONST_INT_P (op1))
7768 {
7769 HOST_WIDE_INT offset
7770 = INTVAL (XEXP (addr, 1)) + INTVAL (element_offset);
7771
7772 if (offset == 0)
7773 new_addr = op0;
7774
7775 else
7776 {
7777 rtx plus = gen_rtx_PLUS (Pmode, op0, GEN_INT (offset));
7778 new_addr = gen_rtx_CONST (Pmode, plus);
7779 }
7780 }
7781
7782 else
7783 {
7784 emit_move_insn (base_tmp, addr);
7785 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7786 }
7787 }
7788
7789 else if (SYMBOL_REF_P (addr) || LABEL_REF_P (addr))
7790 {
7791 rtx plus = gen_rtx_PLUS (Pmode, addr, element_offset);
7792 new_addr = gen_rtx_CONST (Pmode, plus);
7793 }
7794
7795 else
7796 gcc_unreachable ();
7797
7798 return new_addr;
7799 }
7800
7801 /* Adjust a memory address (MEM) of a vector type to point to a scalar field
7802 within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
7803 temporary (BASE_TMP) to fixup the address. Return the new memory address
7804 that is valid for reads or writes to a given register (SCALAR_REG).
7805
7806 This function is expected to be called after reload is completed when we are
7807 splitting insns. The temporary BASE_TMP might be set multiple times with
7808 this code. */
7809
7810 rtx
7811 rs6000_adjust_vec_address (rtx scalar_reg,
7812 rtx mem,
7813 rtx element,
7814 rtx base_tmp,
7815 machine_mode scalar_mode)
7816 {
7817 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7818 rtx addr = XEXP (mem, 0);
7819 rtx new_addr;
7820
7821 gcc_assert (!reg_mentioned_p (base_tmp, addr));
7822 gcc_assert (!reg_mentioned_p (base_tmp, element));
7823
7824 /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
7825 gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
7826
7827 /* Calculate what we need to add to the address to get the element
7828 address. */
7829 rtx element_offset = get_vector_offset (mem, element, base_tmp, scalar_size);
7830
7831 /* Create the new address pointing to the element within the vector. If we
7832 are adding 0, we don't have to change the address. */
7833 if (element_offset == const0_rtx)
7834 new_addr = addr;
7835
7836 /* A simple indirect address can be converted into a reg + offset
7837 address. */
7838 else if (REG_P (addr) || SUBREG_P (addr))
7839 new_addr = gen_rtx_PLUS (Pmode, addr, element_offset);
7840
7841 /* For references to local static variables, fold a constant offset into the
7842 address. */
7843 else if (pcrel_local_address (addr, Pmode) && CONST_INT_P (element_offset))
7844 new_addr = adjust_vec_address_pcrel (addr, element_offset, base_tmp);
7845
7846 /* Optimize D-FORM addresses with constant offset with a constant element, to
7847 include the element offset in the address directly. */
7848 else if (GET_CODE (addr) == PLUS)
7849 {
7850 rtx op0 = XEXP (addr, 0);
7851 rtx op1 = XEXP (addr, 1);
7852
7853 gcc_assert (REG_P (op0) || SUBREG_P (op0));
7854 if (CONST_INT_P (op1) && CONST_INT_P (element_offset))
7855 {
7856 /* op0 should never be r0, because r0+offset is not valid. But it
7857 doesn't hurt to make sure it is not r0. */
7858 gcc_assert (reg_or_subregno (op0) != 0);
7859
7860 /* D-FORM address with constant element number. */
7861 HOST_WIDE_INT offset = INTVAL (op1) + INTVAL (element_offset);
7862 rtx offset_rtx = GEN_INT (offset);
7863 new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx);
7864 }
7865 else
7866 {
7867 /* If we don't have a D-FORM address with a constant element number,
7868 add the two elements in the current address. Then add the offset.
7869
7870 Previously, we tried to add the offset to OP1 and change the
7871 address to an X-FORM format adding OP0 and BASE_TMP, but it became
7872 complicated because we had to verify that op1 was not GPR0 and we
7873 had a constant element offset (due to the way ADDI is defined).
7874 By doing the add of OP0 and OP1 first, and then adding in the
7875 offset, it has the benefit that if D-FORM instructions are
7876 allowed, the offset is part of the memory access to the vector
7877 element. */
7878 emit_insn (gen_rtx_SET (base_tmp, gen_rtx_PLUS (Pmode, op0, op1)));
7879 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7880 }
7881 }
7882
7883 else
7884 {
7885 emit_move_insn (base_tmp, addr);
7886 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7887 }
7888
7889 /* If the address isn't valid, move the address into the temporary base
7890 register. Some reasons it could not be valid include:
7891
7892 The address offset overflowed the 16 or 34 bit offset size;
7893 We need to use a DS-FORM load, and the bottom 2 bits are non-zero;
7894 We need to use a DQ-FORM load, and the bottom 4 bits are non-zero;
7895 Only X_FORM loads can be done, and the address is D_FORM. */
7896
7897 enum insn_form iform
7898 = address_to_insn_form (new_addr, scalar_mode,
7899 reg_to_non_prefixed (scalar_reg, scalar_mode));
7900
7901 if (iform == INSN_FORM_BAD)
7902 {
7903 emit_move_insn (base_tmp, new_addr);
7904 new_addr = base_tmp;
7905 }
7906
7907 return change_address (mem, scalar_mode, new_addr);
7908 }
7909
7910 /* Split a variable vec_extract operation into the component instructions. */
7911
7912 void
7913 rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr,
7914 rtx tmp_altivec)
7915 {
7916 machine_mode mode = GET_MODE (src);
7917 machine_mode scalar_mode = GET_MODE_INNER (GET_MODE (src));
7918 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7919 int byte_shift = exact_log2 (scalar_size);
7920
7921 gcc_assert (byte_shift >= 0);
7922
7923 /* If we are given a memory address, optimize to load just the element. We
7924 don't have to adjust the vector element number on little endian
7925 systems. */
7926 if (MEM_P (src))
7927 {
7928 emit_move_insn (dest,
7929 rs6000_adjust_vec_address (dest, src, element, tmp_gpr,
7930 scalar_mode));
7931 return;
7932 }
7933
7934 else if (REG_P (src) || SUBREG_P (src))
7935 {
7936 int num_elements = GET_MODE_NUNITS (mode);
7937 int bits_in_element = mode_to_bits (GET_MODE_INNER (mode));
7938 int bit_shift = 7 - exact_log2 (num_elements);
7939 rtx element2;
7940 unsigned int dest_regno = reg_or_subregno (dest);
7941 unsigned int src_regno = reg_or_subregno (src);
7942 unsigned int element_regno = reg_or_subregno (element);
7943
7944 gcc_assert (REG_P (tmp_gpr));
7945
7946 /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in
7947 a general purpose register. */
7948 if (TARGET_P9_VECTOR
7949 && (mode == V16QImode || mode == V8HImode || mode == V4SImode)
7950 && INT_REGNO_P (dest_regno)
7951 && ALTIVEC_REGNO_P (src_regno)
7952 && INT_REGNO_P (element_regno))
7953 {
7954 rtx dest_si = gen_rtx_REG (SImode, dest_regno);
7955 rtx element_si = gen_rtx_REG (SImode, element_regno);
7956
7957 if (mode == V16QImode)
7958 emit_insn (BYTES_BIG_ENDIAN
7959 ? gen_vextublx (dest_si, element_si, src)
7960 : gen_vextubrx (dest_si, element_si, src));
7961
7962 else if (mode == V8HImode)
7963 {
7964 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7965 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const1_rtx));
7966 emit_insn (BYTES_BIG_ENDIAN
7967 ? gen_vextuhlx (dest_si, tmp_gpr_si, src)
7968 : gen_vextuhrx (dest_si, tmp_gpr_si, src));
7969 }
7970
7971
7972 else
7973 {
7974 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7975 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const2_rtx));
7976 emit_insn (BYTES_BIG_ENDIAN
7977 ? gen_vextuwlx (dest_si, tmp_gpr_si, src)
7978 : gen_vextuwrx (dest_si, tmp_gpr_si, src));
7979 }
7980
7981 return;
7982 }
7983
7984
7985 gcc_assert (REG_P (tmp_altivec));
7986
7987 /* For little endian, adjust element ordering. For V2DI/V2DF, we can use
7988 an XOR, otherwise we need to subtract. The shift amount is so VSLO
7989 will shift the element into the upper position (adding 3 to convert a
7990 byte shift into a bit shift). */
7991 if (scalar_size == 8)
7992 {
7993 if (!BYTES_BIG_ENDIAN)
7994 {
7995 emit_insn (gen_xordi3 (tmp_gpr, element, const1_rtx));
7996 element2 = tmp_gpr;
7997 }
7998 else
7999 element2 = element;
8000
8001 /* Generate RLDIC directly to shift left 6 bits and retrieve 1
8002 bit. */
8003 emit_insn (gen_rtx_SET (tmp_gpr,
8004 gen_rtx_AND (DImode,
8005 gen_rtx_ASHIFT (DImode,
8006 element2,
8007 GEN_INT (6)),
8008 GEN_INT (64))));
8009 }
8010 else
8011 {
8012 if (!BYTES_BIG_ENDIAN)
8013 {
8014 rtx num_ele_m1 = GEN_INT (num_elements - 1);
8015
8016 emit_insn (gen_anddi3 (tmp_gpr, element, num_ele_m1));
8017 emit_insn (gen_subdi3 (tmp_gpr, num_ele_m1, tmp_gpr));
8018 element2 = tmp_gpr;
8019 }
8020 else
8021 element2 = element;
8022
8023 emit_insn (gen_ashldi3 (tmp_gpr, element2, GEN_INT (bit_shift)));
8024 }
8025
8026 /* Get the value into the lower byte of the Altivec register where VSLO
8027 expects it. */
8028 if (TARGET_P9_VECTOR)
8029 emit_insn (gen_vsx_splat_v2di (tmp_altivec, tmp_gpr));
8030 else if (can_create_pseudo_p ())
8031 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_gpr, tmp_gpr));
8032 else
8033 {
8034 rtx tmp_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
8035 emit_move_insn (tmp_di, tmp_gpr);
8036 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_di, tmp_di));
8037 }
8038
8039 /* Do the VSLO to get the value into the final location. */
8040 switch (mode)
8041 {
8042 case E_V2DFmode:
8043 emit_insn (gen_vsx_vslo_v2df (dest, src, tmp_altivec));
8044 return;
8045
8046 case E_V2DImode:
8047 emit_insn (gen_vsx_vslo_v2di (dest, src, tmp_altivec));
8048 return;
8049
8050 case E_V4SFmode:
8051 {
8052 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
8053 rtx tmp_altivec_v4sf = gen_rtx_REG (V4SFmode, REGNO (tmp_altivec));
8054 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
8055 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
8056 tmp_altivec));
8057
8058 emit_insn (gen_vsx_xscvspdp_scalar2 (dest, tmp_altivec_v4sf));
8059 return;
8060 }
8061
8062 case E_V4SImode:
8063 case E_V8HImode:
8064 case E_V16QImode:
8065 {
8066 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
8067 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
8068 rtx tmp_gpr_di = gen_rtx_REG (DImode, REGNO (dest));
8069 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
8070 tmp_altivec));
8071 emit_move_insn (tmp_gpr_di, tmp_altivec_di);
8072 emit_insn (gen_lshrdi3 (tmp_gpr_di, tmp_gpr_di,
8073 GEN_INT (64 - bits_in_element)));
8074 return;
8075 }
8076
8077 default:
8078 gcc_unreachable ();
8079 }
8080
8081 return;
8082 }
8083 else
8084 gcc_unreachable ();
8085 }
8086
8087 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
8088 selects whether the alignment is abi mandated, optional, or
8089 both abi and optional alignment. */
8090
8091 unsigned int
8092 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
8093 {
8094 if (how != align_opt)
8095 {
8096 if (VECTOR_TYPE_P (type) && align < 128)
8097 align = 128;
8098 }
8099
8100 if (how != align_abi)
8101 {
8102 if (TREE_CODE (type) == ARRAY_TYPE
8103 && TYPE_MODE (TREE_TYPE (type)) == QImode)
8104 {
8105 if (align < BITS_PER_WORD)
8106 align = BITS_PER_WORD;
8107 }
8108 }
8109
8110 return align;
8111 }
8112
8113 /* Implement TARGET_SLOW_UNALIGNED_ACCESS. Altivec vector memory
8114 instructions simply ignore the low bits; VSX memory instructions
8115 are aligned to 4 or 8 bytes. */
8116
8117 static bool
8118 rs6000_slow_unaligned_access (machine_mode mode, unsigned int align)
8119 {
8120 return (STRICT_ALIGNMENT
8121 || (!TARGET_EFFICIENT_UNALIGNED_VSX
8122 && ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && align < 32)
8123 || ((VECTOR_MODE_P (mode) || VECTOR_ALIGNMENT_P (mode))
8124 && (int) align < VECTOR_ALIGN (mode)))));
8125 }
8126
8127 /* AIX word-aligns FP doubles but doubleword-aligns 64-bit ints. */
8128
8129 unsigned int
8130 rs6000_special_adjust_field_align (tree type, unsigned int computed)
8131 {
8132 if (computed <= 32 || TYPE_PACKED (type))
8133 return computed;
8134
8135 /* Strip initial arrays. */
8136 while (TREE_CODE (type) == ARRAY_TYPE)
8137 type = TREE_TYPE (type);
8138
8139 /* If RECORD or UNION, recursively find the first field. */
8140 while (AGGREGATE_TYPE_P (type))
8141 {
8142 tree field = TYPE_FIELDS (type);
8143
8144 /* Skip all non field decls */
8145 while (field != NULL
8146 && (TREE_CODE (field) != FIELD_DECL
8147 || DECL_FIELD_ABI_IGNORED (field)))
8148 field = DECL_CHAIN (field);
8149
8150 if (! field)
8151 break;
8152
8153 /* A packed field does not contribute any extra alignment. */
8154 if (DECL_PACKED (field))
8155 return computed;
8156
8157 type = TREE_TYPE (field);
8158
8159 /* Strip arrays. */
8160 while (TREE_CODE (type) == ARRAY_TYPE)
8161 type = TREE_TYPE (type);
8162 }
8163
8164 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node
8165 && (TYPE_MODE (type) == DFmode || TYPE_MODE (type) == DCmode))
8166 computed = MIN (computed, 32);
8167
8168 return computed;
8169 }
8170
8171 /* AIX increases natural record alignment to doubleword if the innermost first
8172 field is an FP double while the FP fields remain word aligned.
8173 Only called if TYPE initially is a RECORD or UNION. */
8174
8175 unsigned int
8176 rs6000_special_round_type_align (tree type, unsigned int computed,
8177 unsigned int specified)
8178 {
8179 unsigned int align = MAX (computed, specified);
8180
8181 if (TYPE_PACKED (type) || align >= 64)
8182 return align;
8183
8184 /* If RECORD or UNION, recursively find the first field. */
8185 do
8186 {
8187 tree field = TYPE_FIELDS (type);
8188
8189 /* Skip all non field decls */
8190 while (field != NULL
8191 && (TREE_CODE (field) != FIELD_DECL
8192 || DECL_FIELD_ABI_IGNORED (field)))
8193 field = DECL_CHAIN (field);
8194
8195 if (! field)
8196 break;
8197
8198 /* A packed field does not contribute any extra alignment. */
8199 if (DECL_PACKED (field))
8200 return align;
8201
8202 type = TREE_TYPE (field);
8203
8204 /* Strip arrays. */
8205 while (TREE_CODE (type) == ARRAY_TYPE)
8206 type = TREE_TYPE (type);
8207 } while (AGGREGATE_TYPE_P (type));
8208
8209 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node
8210 && (TYPE_MODE (type) == DFmode || TYPE_MODE (type) == DCmode))
8211 align = MAX (align, 64);
8212
8213 return align;
8214 }
8215
8216 /* Darwin increases record alignment to the natural alignment of
8217 the first field. */
8218
8219 unsigned int
8220 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
8221 unsigned int specified)
8222 {
8223 unsigned int align = MAX (computed, specified);
8224
8225 if (TYPE_PACKED (type))
8226 return align;
8227
8228 /* Find the first field, looking down into aggregates. */
8229 do {
8230 tree field = TYPE_FIELDS (type);
8231 /* Skip all non field decls */
8232 while (field != NULL
8233 && (TREE_CODE (field) != FIELD_DECL
8234 || DECL_FIELD_ABI_IGNORED (field)))
8235 field = DECL_CHAIN (field);
8236 if (! field)
8237 break;
8238 /* A packed field does not contribute any extra alignment. */
8239 if (DECL_PACKED (field))
8240 return align;
8241 type = TREE_TYPE (field);
8242 while (TREE_CODE (type) == ARRAY_TYPE)
8243 type = TREE_TYPE (type);
8244 } while (AGGREGATE_TYPE_P (type));
8245
8246 if (type != error_mark_node && ! AGGREGATE_TYPE_P (type)
8247 && ! TYPE_PACKED (type) && maximum_field_alignment == 0)
8248 align = MAX (align, TYPE_ALIGN (type));
8249
8250 return align;
8251 }
8252
8253 /* Return 1 for an operand in small memory on V.4/eabi. */
8254
8255 int
8256 small_data_operand (rtx op ATTRIBUTE_UNUSED,
8257 machine_mode mode ATTRIBUTE_UNUSED)
8258 {
8259 #if TARGET_ELF
8260 rtx sym_ref;
8261
8262 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
8263 return 0;
8264
8265 if (DEFAULT_ABI != ABI_V4)
8266 return 0;
8267
8268 if (SYMBOL_REF_P (op))
8269 sym_ref = op;
8270
8271 else if (GET_CODE (op) != CONST
8272 || GET_CODE (XEXP (op, 0)) != PLUS
8273 || !SYMBOL_REF_P (XEXP (XEXP (op, 0), 0))
8274 || !CONST_INT_P (XEXP (XEXP (op, 0), 1)))
8275 return 0;
8276
8277 else
8278 {
8279 rtx sum = XEXP (op, 0);
8280 HOST_WIDE_INT summand;
8281
8282 /* We have to be careful here, because it is the referenced address
8283 that must be 32k from _SDA_BASE_, not just the symbol. */
8284 summand = INTVAL (XEXP (sum, 1));
8285 if (summand < 0 || summand > g_switch_value)
8286 return 0;
8287
8288 sym_ref = XEXP (sum, 0);
8289 }
8290
8291 return SYMBOL_REF_SMALL_P (sym_ref);
8292 #else
8293 return 0;
8294 #endif
8295 }
8296
8297 /* Return true if either operand is a general purpose register. */
8298
8299 bool
8300 gpr_or_gpr_p (rtx op0, rtx op1)
8301 {
8302 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
8303 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
8304 }
8305
8306 /* Return true if this is a move direct operation between GPR registers and
8307 floating point/VSX registers. */
8308
8309 bool
8310 direct_move_p (rtx op0, rtx op1)
8311 {
8312 if (!REG_P (op0) || !REG_P (op1))
8313 return false;
8314
8315 if (!TARGET_DIRECT_MOVE)
8316 return false;
8317
8318 int regno0 = REGNO (op0);
8319 int regno1 = REGNO (op1);
8320 if (!HARD_REGISTER_NUM_P (regno0) || !HARD_REGISTER_NUM_P (regno1))
8321 return false;
8322
8323 if (INT_REGNO_P (regno0) && VSX_REGNO_P (regno1))
8324 return true;
8325
8326 if (VSX_REGNO_P (regno0) && INT_REGNO_P (regno1))
8327 return true;
8328
8329 return false;
8330 }
8331
8332 /* Return true if the ADDR is an acceptable address for a quad memory
8333 operation of mode MODE (either LQ/STQ for general purpose registers, or
8334 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address
8335 is intended for LQ/STQ. If it is false, the address is intended for the ISA
8336 3.0 LXV/STXV instruction. */
8337
8338 bool
8339 quad_address_p (rtx addr, machine_mode mode, bool strict)
8340 {
8341 rtx op0, op1;
8342
8343 if (GET_MODE_SIZE (mode) < 16)
8344 return false;
8345
8346 if (legitimate_indirect_address_p (addr, strict))
8347 return true;
8348
8349 if (VECTOR_MODE_P (mode) && !mode_supports_dq_form (mode))
8350 return false;
8351
8352 /* Is this a valid prefixed address? If the bottom four bits of the offset
8353 are non-zero, we could use a prefixed instruction (which does not have the
8354 DQ-form constraint that the traditional instruction had) instead of
8355 forcing the unaligned offset to a GPR. */
8356 if (address_is_prefixed (addr, mode, NON_PREFIXED_DQ))
8357 return true;
8358
8359 if (GET_CODE (addr) != PLUS)
8360 return false;
8361
8362 op0 = XEXP (addr, 0);
8363 if (!REG_P (op0) || !INT_REG_OK_FOR_BASE_P (op0, strict))
8364 return false;
8365
8366 op1 = XEXP (addr, 1);
8367 if (!CONST_INT_P (op1))
8368 return false;
8369
8370 return quad_address_offset_p (INTVAL (op1));
8371 }
8372
8373 /* Return true if this is a load or store quad operation. This function does
8374 not handle the atomic quad memory instructions. */
8375
8376 bool
8377 quad_load_store_p (rtx op0, rtx op1)
8378 {
8379 bool ret;
8380
8381 if (!TARGET_QUAD_MEMORY)
8382 ret = false;
8383
8384 else if (REG_P (op0) && MEM_P (op1))
8385 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
8386 && quad_memory_operand (op1, GET_MODE (op1))
8387 && !reg_overlap_mentioned_p (op0, op1));
8388
8389 else if (MEM_P (op0) && REG_P (op1))
8390 ret = (quad_memory_operand (op0, GET_MODE (op0))
8391 && quad_int_reg_operand (op1, GET_MODE (op1)));
8392
8393 else
8394 ret = false;
8395
8396 if (TARGET_DEBUG_ADDR)
8397 {
8398 fprintf (stderr, "\n========== quad_load_store, return %s\n",
8399 ret ? "true" : "false");
8400 debug_rtx (gen_rtx_SET (op0, op1));
8401 }
8402
8403 return ret;
8404 }
8405
8406 /* Given an address, return a constant offset term if one exists. */
8407
8408 static rtx
8409 address_offset (rtx op)
8410 {
8411 if (GET_CODE (op) == PRE_INC
8412 || GET_CODE (op) == PRE_DEC)
8413 op = XEXP (op, 0);
8414 else if (GET_CODE (op) == PRE_MODIFY
8415 || GET_CODE (op) == LO_SUM)
8416 op = XEXP (op, 1);
8417
8418 if (GET_CODE (op) == CONST)
8419 op = XEXP (op, 0);
8420
8421 if (GET_CODE (op) == PLUS)
8422 op = XEXP (op, 1);
8423
8424 if (CONST_INT_P (op))
8425 return op;
8426
8427 return NULL_RTX;
8428 }
8429
8430 /* This tests that a lo_sum {constant, symbol, symbol+offset} is valid for
8431 the mode. If we can't find (or don't know) the alignment of the symbol
8432 we assume (optimistically) that it's sufficiently aligned [??? maybe we
8433 should be pessimistic]. Offsets are validated in the same way as for
8434 reg + offset. */
8435 static bool
8436 darwin_rs6000_legitimate_lo_sum_const_p (rtx x, machine_mode mode)
8437 {
8438 /* We should not get here with this. */
8439 gcc_checking_assert (! mode_supports_dq_form (mode));
8440
8441 if (GET_CODE (x) == CONST)
8442 x = XEXP (x, 0);
8443
8444 /* If we are building PIC code, then any symbol must be wrapped in an
8445 UNSPEC_MACHOPIC_OFFSET so that it will get the picbase subtracted. */
8446 bool machopic_offs_p = false;
8447 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_MACHOPIC_OFFSET)
8448 {
8449 x = XVECEXP (x, 0, 0);
8450 machopic_offs_p = true;
8451 }
8452
8453 rtx sym = NULL_RTX;
8454 unsigned HOST_WIDE_INT offset = 0;
8455
8456 if (GET_CODE (x) == PLUS)
8457 {
8458 sym = XEXP (x, 0);
8459 if (! SYMBOL_REF_P (sym))
8460 return false;
8461 if (!CONST_INT_P (XEXP (x, 1)))
8462 return false;
8463 offset = INTVAL (XEXP (x, 1));
8464 }
8465 else if (SYMBOL_REF_P (x))
8466 sym = x;
8467 else if (CONST_INT_P (x))
8468 offset = INTVAL (x);
8469 else if (GET_CODE (x) == LABEL_REF)
8470 offset = 0; // We assume code labels are Pmode aligned
8471 else
8472 return false; // not sure what we have here.
8473
8474 /* If we don't know the alignment of the thing to which the symbol refers,
8475 we assume optimistically it is "enough".
8476 ??? maybe we should be pessimistic instead. */
8477 unsigned align = 0;
8478
8479 if (sym)
8480 {
8481 tree decl = SYMBOL_REF_DECL (sym);
8482 /* As noted above, PIC code cannot use a bare SYMBOL_REF. */
8483 if (TARGET_MACHO && flag_pic && !machopic_offs_p)
8484 return false;
8485 #if TARGET_MACHO
8486 if (MACHO_SYMBOL_INDIRECTION_P (sym))
8487 /* The decl in an indirection symbol is the original one, which might
8488 be less aligned than the indirection. Our indirections are always
8489 pointer-aligned. */
8490 ;
8491 else
8492 #endif
8493 if (decl && DECL_ALIGN (decl))
8494 align = DECL_ALIGN_UNIT (decl);
8495 }
8496
8497 unsigned int extra = 0;
8498 switch (mode)
8499 {
8500 case E_DFmode:
8501 case E_DDmode:
8502 case E_DImode:
8503 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8504 addressing. */
8505 if (VECTOR_MEM_VSX_P (mode))
8506 return false;
8507
8508 if (!TARGET_POWERPC64)
8509 extra = 4;
8510 else if ((offset & 3) || (align & 3))
8511 return false;
8512 break;
8513
8514 case E_TFmode:
8515 case E_IFmode:
8516 case E_KFmode:
8517 case E_TDmode:
8518 case E_TImode:
8519 case E_PTImode:
8520 extra = 8;
8521 if (!TARGET_POWERPC64)
8522 extra = 12;
8523 else if ((offset & 3) || (align & 3))
8524 return false;
8525 break;
8526
8527 default:
8528 break;
8529 }
8530
8531 /* We only care if the access(es) would cause a change to the high part. */
8532 offset = sext_hwi (offset, 16);
8533 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
8534 }
8535
8536 /* Return true if the MEM operand is a memory operand suitable for use
8537 with a (full width, possibly multiple) gpr load/store. On
8538 powerpc64 this means the offset must be divisible by 4.
8539 Implements 'Y' constraint.
8540
8541 Accept direct, indexed, offset, lo_sum and tocref. Since this is
8542 a constraint function we know the operand has satisfied a suitable
8543 memory predicate.
8544
8545 Offsetting a lo_sum should not be allowed, except where we know by
8546 alignment that a 32k boundary is not crossed. Note that by
8547 "offsetting" here we mean a further offset to access parts of the
8548 MEM. It's fine to have a lo_sum where the inner address is offset
8549 from a sym, since the same sym+offset will appear in the high part
8550 of the address calculation. */
8551
8552 bool
8553 mem_operand_gpr (rtx op, machine_mode mode)
8554 {
8555 unsigned HOST_WIDE_INT offset;
8556 int extra;
8557 rtx addr = XEXP (op, 0);
8558
8559 /* PR85755: Allow PRE_INC and PRE_DEC addresses. */
8560 if (TARGET_UPDATE
8561 && (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
8562 && mode_supports_pre_incdec_p (mode)
8563 && legitimate_indirect_address_p (XEXP (addr, 0), false))
8564 return true;
8565
8566 /* Allow prefixed instructions if supported. If the bottom two bits of the
8567 offset are non-zero, we could use a prefixed instruction (which does not
8568 have the DS-form constraint that the traditional instruction had) instead
8569 of forcing the unaligned offset to a GPR. */
8570 if (address_is_prefixed (addr, mode, NON_PREFIXED_DS))
8571 return true;
8572
8573 /* We need to look through Mach-O PIC unspecs to determine if a lo_sum is
8574 really OK. Doing this early avoids teaching all the other machinery
8575 about them. */
8576 if (TARGET_MACHO && GET_CODE (addr) == LO_SUM)
8577 return darwin_rs6000_legitimate_lo_sum_const_p (XEXP (addr, 1), mode);
8578
8579 /* Only allow offsettable addresses. See PRs 83969 and 84279. */
8580 if (!rs6000_offsettable_memref_p (op, mode, false))
8581 return false;
8582
8583 op = address_offset (addr);
8584 if (op == NULL_RTX)
8585 return true;
8586
8587 offset = INTVAL (op);
8588 if (TARGET_POWERPC64 && (offset & 3) != 0)
8589 return false;
8590
8591 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
8592 if (extra < 0)
8593 extra = 0;
8594
8595 if (GET_CODE (addr) == LO_SUM)
8596 /* For lo_sum addresses, we must allow any offset except one that
8597 causes a wrap, so test only the low 16 bits. */
8598 offset = sext_hwi (offset, 16);
8599
8600 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
8601 }
8602
8603 /* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr,
8604 enforce an offset divisible by 4 even for 32-bit. */
8605
8606 bool
8607 mem_operand_ds_form (rtx op, machine_mode mode)
8608 {
8609 unsigned HOST_WIDE_INT offset;
8610 int extra;
8611 rtx addr = XEXP (op, 0);
8612
8613 /* Allow prefixed instructions if supported. If the bottom two bits of the
8614 offset are non-zero, we could use a prefixed instruction (which does not
8615 have the DS-form constraint that the traditional instruction had) instead
8616 of forcing the unaligned offset to a GPR. */
8617 if (address_is_prefixed (addr, mode, NON_PREFIXED_DS))
8618 return true;
8619
8620 if (!offsettable_address_p (false, mode, addr))
8621 return false;
8622
8623 op = address_offset (addr);
8624 if (op == NULL_RTX)
8625 return true;
8626
8627 offset = INTVAL (op);
8628 if ((offset & 3) != 0)
8629 return false;
8630
8631 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
8632 if (extra < 0)
8633 extra = 0;
8634
8635 if (GET_CODE (addr) == LO_SUM)
8636 /* For lo_sum addresses, we must allow any offset except one that
8637 causes a wrap, so test only the low 16 bits. */
8638 offset = sext_hwi (offset, 16);
8639
8640 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
8641 }
8642 \f
8643 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
8644
8645 static bool
8646 reg_offset_addressing_ok_p (machine_mode mode)
8647 {
8648 switch (mode)
8649 {
8650 case E_V16QImode:
8651 case E_V8HImode:
8652 case E_V4SFmode:
8653 case E_V4SImode:
8654 case E_V2DFmode:
8655 case E_V2DImode:
8656 case E_V1TImode:
8657 case E_TImode:
8658 case E_TFmode:
8659 case E_KFmode:
8660 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the
8661 ISA 3.0 vector d-form addressing mode was added. While TImode is not
8662 a vector mode, if we want to use the VSX registers to move it around,
8663 we need to restrict ourselves to reg+reg addressing. Similarly for
8664 IEEE 128-bit floating point that is passed in a single vector
8665 register. */
8666 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
8667 return mode_supports_dq_form (mode);
8668 break;
8669
8670 /* The vector pair/quad types support offset addressing if the
8671 underlying vectors support offset addressing. */
8672 case E_OOmode:
8673 case E_XOmode:
8674 return TARGET_MMA;
8675
8676 case E_SDmode:
8677 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
8678 addressing for the LFIWZX and STFIWX instructions. */
8679 if (TARGET_NO_SDMODE_STACK)
8680 return false;
8681 break;
8682
8683 default:
8684 break;
8685 }
8686
8687 return true;
8688 }
8689
8690 static bool
8691 virtual_stack_registers_memory_p (rtx op)
8692 {
8693 int regnum;
8694
8695 if (REG_P (op))
8696 regnum = REGNO (op);
8697
8698 else if (GET_CODE (op) == PLUS
8699 && REG_P (XEXP (op, 0))
8700 && CONST_INT_P (XEXP (op, 1)))
8701 regnum = REGNO (XEXP (op, 0));
8702
8703 else
8704 return false;
8705
8706 return (regnum >= FIRST_VIRTUAL_REGISTER
8707 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
8708 }
8709
8710 /* Return true if a MODE sized memory accesses to OP plus OFFSET
8711 is known to not straddle a 32k boundary. This function is used
8712 to determine whether -mcmodel=medium code can use TOC pointer
8713 relative addressing for OP. This means the alignment of the TOC
8714 pointer must also be taken into account, and unfortunately that is
8715 only 8 bytes. */
8716
8717 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
8718 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
8719 #endif
8720
8721 static bool
8722 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
8723 machine_mode mode)
8724 {
8725 tree decl;
8726 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
8727
8728 if (!SYMBOL_REF_P (op))
8729 return false;
8730
8731 /* ISA 3.0 vector d-form addressing is restricted, don't allow
8732 SYMBOL_REF. */
8733 if (mode_supports_dq_form (mode))
8734 return false;
8735
8736 dsize = GET_MODE_SIZE (mode);
8737 decl = SYMBOL_REF_DECL (op);
8738 if (!decl)
8739 {
8740 if (dsize == 0)
8741 return false;
8742
8743 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
8744 replacing memory addresses with an anchor plus offset. We
8745 could find the decl by rummaging around in the block->objects
8746 VEC for the given offset but that seems like too much work. */
8747 dalign = BITS_PER_UNIT;
8748 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
8749 && SYMBOL_REF_ANCHOR_P (op)
8750 && SYMBOL_REF_BLOCK (op) != NULL)
8751 {
8752 struct object_block *block = SYMBOL_REF_BLOCK (op);
8753
8754 dalign = block->alignment;
8755 offset += SYMBOL_REF_BLOCK_OFFSET (op);
8756 }
8757 else if (CONSTANT_POOL_ADDRESS_P (op))
8758 {
8759 /* It would be nice to have get_pool_align().. */
8760 machine_mode cmode = get_pool_mode (op);
8761
8762 dalign = GET_MODE_ALIGNMENT (cmode);
8763 }
8764 }
8765 else if (DECL_P (decl))
8766 {
8767 dalign = DECL_ALIGN (decl);
8768
8769 if (dsize == 0)
8770 {
8771 /* Allow BLKmode when the entire object is known to not
8772 cross a 32k boundary. */
8773 if (!DECL_SIZE_UNIT (decl))
8774 return false;
8775
8776 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
8777 return false;
8778
8779 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
8780 if (dsize > 32768)
8781 return false;
8782
8783 dalign /= BITS_PER_UNIT;
8784 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8785 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8786 return dalign >= dsize;
8787 }
8788 }
8789 else
8790 gcc_unreachable ();
8791
8792 /* Find how many bits of the alignment we know for this access. */
8793 dalign /= BITS_PER_UNIT;
8794 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8795 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8796 mask = dalign - 1;
8797 lsb = offset & -offset;
8798 mask &= lsb - 1;
8799 dalign = mask + 1;
8800
8801 return dalign >= dsize;
8802 }
8803
8804 static bool
8805 constant_pool_expr_p (rtx op)
8806 {
8807 rtx base, offset;
8808
8809 split_const (op, &base, &offset);
8810 return (SYMBOL_REF_P (base)
8811 && CONSTANT_POOL_ADDRESS_P (base)
8812 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
8813 }
8814
8815 /* Create a TOC reference for symbol_ref SYMBOL. If LARGETOC_REG is non-null,
8816 use that as the register to put the HIGH value into if register allocation
8817 is already done. */
8818
8819 rtx
8820 create_TOC_reference (rtx symbol, rtx largetoc_reg)
8821 {
8822 rtx tocrel, tocreg, hi;
8823
8824 gcc_assert (TARGET_TOC);
8825
8826 if (TARGET_DEBUG_ADDR)
8827 {
8828 if (SYMBOL_REF_P (symbol))
8829 fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
8830 XSTR (symbol, 0));
8831 else
8832 {
8833 fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
8834 GET_RTX_NAME (GET_CODE (symbol)));
8835 debug_rtx (symbol);
8836 }
8837 }
8838
8839 if (!can_create_pseudo_p ())
8840 df_set_regs_ever_live (TOC_REGISTER, true);
8841
8842 tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
8843 tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
8844 if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
8845 return tocrel;
8846
8847 hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
8848 if (largetoc_reg != NULL)
8849 {
8850 emit_move_insn (largetoc_reg, hi);
8851 hi = largetoc_reg;
8852 }
8853 return gen_rtx_LO_SUM (Pmode, hi, tocrel);
8854 }
8855
8856 /* These are only used to pass through from print_operand/print_operand_address
8857 to rs6000_output_addr_const_extra over the intervening function
8858 output_addr_const which is not target code. */
8859 static const_rtx tocrel_base_oac, tocrel_offset_oac;
8860
8861 /* Return true if OP is a toc pointer relative address (the output
8862 of create_TOC_reference). If STRICT, do not match non-split
8863 -mcmodel=large/medium toc pointer relative addresses. If the pointers
8864 are non-NULL, place base and offset pieces in TOCREL_BASE_RET and
8865 TOCREL_OFFSET_RET respectively. */
8866
8867 bool
8868 toc_relative_expr_p (const_rtx op, bool strict, const_rtx *tocrel_base_ret,
8869 const_rtx *tocrel_offset_ret)
8870 {
8871 if (!TARGET_TOC)
8872 return false;
8873
8874 if (TARGET_CMODEL != CMODEL_SMALL)
8875 {
8876 /* When strict ensure we have everything tidy. */
8877 if (strict
8878 && !(GET_CODE (op) == LO_SUM
8879 && REG_P (XEXP (op, 0))
8880 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict)))
8881 return false;
8882
8883 /* When not strict, allow non-split TOC addresses and also allow
8884 (lo_sum (high ..)) TOC addresses created during reload. */
8885 if (GET_CODE (op) == LO_SUM)
8886 op = XEXP (op, 1);
8887 }
8888
8889 const_rtx tocrel_base = op;
8890 const_rtx tocrel_offset = const0_rtx;
8891
8892 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
8893 {
8894 tocrel_base = XEXP (op, 0);
8895 tocrel_offset = XEXP (op, 1);
8896 }
8897
8898 if (tocrel_base_ret)
8899 *tocrel_base_ret = tocrel_base;
8900 if (tocrel_offset_ret)
8901 *tocrel_offset_ret = tocrel_offset;
8902
8903 return (GET_CODE (tocrel_base) == UNSPEC
8904 && XINT (tocrel_base, 1) == UNSPEC_TOCREL
8905 && REG_P (XVECEXP (tocrel_base, 0, 1))
8906 && REGNO (XVECEXP (tocrel_base, 0, 1)) == TOC_REGISTER);
8907 }
8908
8909 /* Return true if X is a constant pool address, and also for cmodel=medium
8910 if X is a toc-relative address known to be offsettable within MODE. */
8911
8912 bool
8913 legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
8914 bool strict)
8915 {
8916 const_rtx tocrel_base, tocrel_offset;
8917 return (toc_relative_expr_p (x, strict, &tocrel_base, &tocrel_offset)
8918 && (TARGET_CMODEL != CMODEL_MEDIUM
8919 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
8920 || mode == QImode
8921 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
8922 INTVAL (tocrel_offset), mode)));
8923 }
8924
8925 static bool
8926 legitimate_small_data_p (machine_mode mode, rtx x)
8927 {
8928 return (DEFAULT_ABI == ABI_V4
8929 && !flag_pic && !TARGET_TOC
8930 && (SYMBOL_REF_P (x) || GET_CODE (x) == CONST)
8931 && small_data_operand (x, mode));
8932 }
8933
8934 bool
8935 rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
8936 bool strict, bool worst_case)
8937 {
8938 unsigned HOST_WIDE_INT offset;
8939 unsigned int extra;
8940
8941 if (GET_CODE (x) != PLUS)
8942 return false;
8943 if (!REG_P (XEXP (x, 0)))
8944 return false;
8945 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
8946 return false;
8947 if (mode_supports_dq_form (mode))
8948 return quad_address_p (x, mode, strict);
8949 if (!reg_offset_addressing_ok_p (mode))
8950 return virtual_stack_registers_memory_p (x);
8951 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
8952 return true;
8953 if (!CONST_INT_P (XEXP (x, 1)))
8954 return false;
8955
8956 offset = INTVAL (XEXP (x, 1));
8957 extra = 0;
8958 switch (mode)
8959 {
8960 case E_DFmode:
8961 case E_DDmode:
8962 case E_DImode:
8963 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8964 addressing. */
8965 if (VECTOR_MEM_VSX_P (mode))
8966 return false;
8967
8968 if (!worst_case)
8969 break;
8970 if (!TARGET_POWERPC64)
8971 extra = 4;
8972 else if (offset & 3)
8973 return false;
8974 break;
8975
8976 case E_TFmode:
8977 case E_IFmode:
8978 case E_KFmode:
8979 case E_TDmode:
8980 case E_TImode:
8981 case E_PTImode:
8982 extra = 8;
8983 if (!worst_case)
8984 break;
8985 if (!TARGET_POWERPC64)
8986 extra = 12;
8987 else if (offset & 3)
8988 return false;
8989 break;
8990
8991 default:
8992 break;
8993 }
8994
8995 if (TARGET_PREFIXED)
8996 return SIGNED_34BIT_OFFSET_EXTRA_P (offset, extra);
8997 else
8998 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
8999 }
9000
9001 bool
9002 legitimate_indexed_address_p (rtx x, int strict)
9003 {
9004 rtx op0, op1;
9005
9006 if (GET_CODE (x) != PLUS)
9007 return false;
9008
9009 op0 = XEXP (x, 0);
9010 op1 = XEXP (x, 1);
9011
9012 return (REG_P (op0) && REG_P (op1)
9013 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
9014 && INT_REG_OK_FOR_INDEX_P (op1, strict))
9015 || (INT_REG_OK_FOR_BASE_P (op1, strict)
9016 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
9017 }
9018
9019 bool
9020 avoiding_indexed_address_p (machine_mode mode)
9021 {
9022 unsigned int msize = GET_MODE_SIZE (mode);
9023
9024 /* Avoid indexed addressing for modes that have non-indexed load/store
9025 instruction forms. On power10, vector pairs have an indexed
9026 form, but vector quads don't. */
9027 if (msize > 16)
9028 return msize != 32;
9029
9030 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
9031 }
9032
9033 bool
9034 legitimate_indirect_address_p (rtx x, int strict)
9035 {
9036 return REG_P (x) && INT_REG_OK_FOR_BASE_P (x, strict);
9037 }
9038
9039 bool
9040 macho_lo_sum_memory_operand (rtx x, machine_mode mode)
9041 {
9042 if (!TARGET_MACHO || !flag_pic
9043 || mode != SImode || !MEM_P (x))
9044 return false;
9045 x = XEXP (x, 0);
9046
9047 if (GET_CODE (x) != LO_SUM)
9048 return false;
9049 if (!REG_P (XEXP (x, 0)))
9050 return false;
9051 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
9052 return false;
9053 x = XEXP (x, 1);
9054
9055 return CONSTANT_P (x);
9056 }
9057
9058 static bool
9059 legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
9060 {
9061 if (GET_CODE (x) != LO_SUM)
9062 return false;
9063 if (!REG_P (XEXP (x, 0)))
9064 return false;
9065 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
9066 return false;
9067 /* quad word addresses are restricted, and we can't use LO_SUM. */
9068 if (mode_supports_dq_form (mode))
9069 return false;
9070 x = XEXP (x, 1);
9071
9072 if (TARGET_ELF)
9073 {
9074 bool large_toc_ok;
9075
9076 if (DEFAULT_ABI == ABI_V4 && flag_pic)
9077 return false;
9078 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
9079 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
9080 recognizes some LO_SUM addresses as valid although this
9081 function says opposite. In most cases, LRA through different
9082 transformations can generate correct code for address reloads.
9083 It cannot manage only some LO_SUM cases. So we need to add
9084 code here saying that some addresses are still valid. */
9085 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
9086 && small_toc_ref (x, VOIDmode));
9087 if (TARGET_TOC && ! large_toc_ok)
9088 return false;
9089 if (GET_MODE_NUNITS (mode) != 1)
9090 return false;
9091 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
9092 && !(/* ??? Assume floating point reg based on mode? */
9093 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
9094 return false;
9095
9096 return CONSTANT_P (x) || large_toc_ok;
9097 }
9098 else if (TARGET_MACHO)
9099 {
9100 if (GET_MODE_NUNITS (mode) != 1)
9101 return false;
9102 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
9103 && !(/* see above */
9104 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
9105 return false;
9106 #if TARGET_MACHO
9107 if (MACHO_DYNAMIC_NO_PIC_P || !flag_pic)
9108 return CONSTANT_P (x);
9109 #endif
9110 /* Macho-O PIC code from here. */
9111 if (GET_CODE (x) == CONST)
9112 x = XEXP (x, 0);
9113
9114 /* SYMBOL_REFs need to be wrapped in an UNSPEC_MACHOPIC_OFFSET. */
9115 if (SYMBOL_REF_P (x))
9116 return false;
9117
9118 /* So this is OK if the wrapped object is const. */
9119 if (GET_CODE (x) == UNSPEC
9120 && XINT (x, 1) == UNSPEC_MACHOPIC_OFFSET)
9121 return CONSTANT_P (XVECEXP (x, 0, 0));
9122 return CONSTANT_P (x);
9123 }
9124 return false;
9125 }
9126
9127
9128 /* Try machine-dependent ways of modifying an illegitimate address
9129 to be legitimate. If we find one, return the new, valid address.
9130 This is used from only one place: `memory_address' in explow.cc.
9131
9132 OLDX is the address as it was before break_out_memory_refs was
9133 called. In some cases it is useful to look at this to decide what
9134 needs to be done.
9135
9136 It is always safe for this function to do nothing. It exists to
9137 recognize opportunities to optimize the output.
9138
9139 On RS/6000, first check for the sum of a register with a constant
9140 integer that is out of range. If so, generate code to add the
9141 constant with the low-order 16 bits masked to the register and force
9142 this result into another register (this can be done with `cau').
9143 Then generate an address of REG+(CONST&0xffff), allowing for the
9144 possibility of bit 16 being a one.
9145
9146 Then check for the sum of a register and something not constant, try to
9147 load the other things into a register and return the sum. */
9148
9149 static rtx
9150 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
9151 machine_mode mode)
9152 {
9153 unsigned int extra;
9154
9155 if (!reg_offset_addressing_ok_p (mode)
9156 || mode_supports_dq_form (mode))
9157 {
9158 if (virtual_stack_registers_memory_p (x))
9159 return x;
9160
9161 /* In theory we should not be seeing addresses of the form reg+0,
9162 but just in case it is generated, optimize it away. */
9163 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
9164 return force_reg (Pmode, XEXP (x, 0));
9165
9166 /* For TImode with load/store quad, restrict addresses to just a single
9167 pointer, so it works with both GPRs and VSX registers. */
9168 /* Make sure both operands are registers. */
9169 else if (GET_CODE (x) == PLUS
9170 && (mode != TImode || !TARGET_VSX))
9171 return gen_rtx_PLUS (Pmode,
9172 force_reg (Pmode, XEXP (x, 0)),
9173 force_reg (Pmode, XEXP (x, 1)));
9174 else
9175 return force_reg (Pmode, x);
9176 }
9177 if (SYMBOL_REF_P (x) && !TARGET_MACHO)
9178 {
9179 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
9180 if (model != 0)
9181 return rs6000_legitimize_tls_address (x, model);
9182 }
9183
9184 extra = 0;
9185 switch (mode)
9186 {
9187 case E_TFmode:
9188 case E_TDmode:
9189 case E_TImode:
9190 case E_PTImode:
9191 case E_IFmode:
9192 case E_KFmode:
9193 /* As in legitimate_offset_address_p we do not assume
9194 worst-case. The mode here is just a hint as to the registers
9195 used. A TImode is usually in gprs, but may actually be in
9196 fprs. Leave worst-case scenario for reload to handle via
9197 insn constraints. PTImode is only GPRs. */
9198 extra = 8;
9199 break;
9200 default:
9201 break;
9202 }
9203
9204 if (GET_CODE (x) == PLUS
9205 && REG_P (XEXP (x, 0))
9206 && CONST_INT_P (XEXP (x, 1))
9207 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
9208 >= 0x10000 - extra))
9209 {
9210 HOST_WIDE_INT high_int, low_int;
9211 rtx sum;
9212 low_int = sext_hwi (INTVAL (XEXP (x, 1)), 16);
9213 if (low_int >= 0x8000 - extra)
9214 low_int = 0;
9215 high_int = INTVAL (XEXP (x, 1)) - low_int;
9216 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
9217 gen_int_mode (high_int, Pmode)), 0);
9218 return plus_constant (Pmode, sum, low_int);
9219 }
9220 else if (GET_CODE (x) == PLUS
9221 && REG_P (XEXP (x, 0))
9222 && !CONST_INT_P (XEXP (x, 1))
9223 && GET_MODE_NUNITS (mode) == 1
9224 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
9225 || (/* ??? Assume floating point reg based on mode? */
9226 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
9227 && !avoiding_indexed_address_p (mode))
9228 {
9229 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
9230 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
9231 }
9232 else if ((TARGET_ELF
9233 #if TARGET_MACHO
9234 || !MACHO_DYNAMIC_NO_PIC_P
9235 #endif
9236 )
9237 && TARGET_32BIT
9238 && TARGET_NO_TOC_OR_PCREL
9239 && !flag_pic
9240 && !CONST_INT_P (x)
9241 && !CONST_WIDE_INT_P (x)
9242 && !CONST_DOUBLE_P (x)
9243 && CONSTANT_P (x)
9244 && GET_MODE_NUNITS (mode) == 1
9245 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
9246 || (/* ??? Assume floating point reg based on mode? */
9247 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode))))
9248 {
9249 rtx reg = gen_reg_rtx (Pmode);
9250 if (TARGET_ELF)
9251 emit_insn (gen_elf_high (reg, x));
9252 else
9253 emit_insn (gen_macho_high (Pmode, reg, x));
9254 return gen_rtx_LO_SUM (Pmode, reg, x);
9255 }
9256 else if (TARGET_TOC
9257 && SYMBOL_REF_P (x)
9258 && constant_pool_expr_p (x)
9259 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
9260 return create_TOC_reference (x, NULL_RTX);
9261 else
9262 return x;
9263 }
9264
9265 /* Debug version of rs6000_legitimize_address. */
9266 static rtx
9267 rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
9268 {
9269 rtx ret;
9270 rtx_insn *insns;
9271
9272 start_sequence ();
9273 ret = rs6000_legitimize_address (x, oldx, mode);
9274 insns = get_insns ();
9275 end_sequence ();
9276
9277 if (ret != x)
9278 {
9279 fprintf (stderr,
9280 "\nrs6000_legitimize_address: mode %s, old code %s, "
9281 "new code %s, modified\n",
9282 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
9283 GET_RTX_NAME (GET_CODE (ret)));
9284
9285 fprintf (stderr, "Original address:\n");
9286 debug_rtx (x);
9287
9288 fprintf (stderr, "oldx:\n");
9289 debug_rtx (oldx);
9290
9291 fprintf (stderr, "New address:\n");
9292 debug_rtx (ret);
9293
9294 if (insns)
9295 {
9296 fprintf (stderr, "Insns added:\n");
9297 debug_rtx_list (insns, 20);
9298 }
9299 }
9300 else
9301 {
9302 fprintf (stderr,
9303 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
9304 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
9305
9306 debug_rtx (x);
9307 }
9308
9309 if (insns)
9310 emit_insn (insns);
9311
9312 return ret;
9313 }
9314
9315 /* This is called from dwarf2out.cc via TARGET_ASM_OUTPUT_DWARF_DTPREL.
9316 We need to emit DTP-relative relocations. */
9317
9318 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
9319 static void
9320 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
9321 {
9322 switch (size)
9323 {
9324 case 4:
9325 fputs ("\t.long\t", file);
9326 break;
9327 case 8:
9328 fputs (DOUBLE_INT_ASM_OP, file);
9329 break;
9330 default:
9331 gcc_unreachable ();
9332 }
9333 output_addr_const (file, x);
9334 if (TARGET_ELF)
9335 fputs ("@dtprel+0x8000", file);
9336 }
9337
9338 /* Return true if X is a symbol that refers to real (rather than emulated)
9339 TLS. */
9340
9341 static bool
9342 rs6000_real_tls_symbol_ref_p (rtx x)
9343 {
9344 return (SYMBOL_REF_P (x)
9345 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
9346 }
9347
9348 /* In the name of slightly smaller debug output, and to cater to
9349 general assembler lossage, recognize various UNSPEC sequences
9350 and turn them back into a direct symbol reference. */
9351
9352 static rtx
9353 rs6000_delegitimize_address (rtx orig_x)
9354 {
9355 rtx x, y, offset;
9356
9357 /* UNSPEC_FUSION_GPR is created by the peephole2 for power8 fusion. It
9358 encodes loading up the high part of the address of a TOC reference along
9359 with a load of a GPR using the same base register used for the load. We
9360 return the original SYMBOL_REF.
9361
9362 (set (reg:INT1 <reg>
9363 (unspec:INT1 [<combined-address>] UNSPEC_FUSION_GPR)))
9364
9365 UNSPEC_PCREL_OPT_LD_ADDR is used by the power10 PCREL_OPT pass. These
9366 UNSPECs include the external SYMBOL_REF along with the value being loaded.
9367 We return the original SYMBOL_REF.
9368
9369 (parallel [(set (reg:DI <base-reg>)
9370 (unspec:DI [(symbol_ref <symbol>)
9371 (const_int <marker>)]
9372 UNSPEC_PCREL_OPT_LD_ADDR))
9373 (set (reg:DI <load-reg>)
9374 (unspec:DI [(const_int 0)]
9375 UNSPEC_PCREL_OPT_LD_DATA))])
9376
9377 UNSPEC_PCREL_OPT_LD_SAME_REG is an alternative that is used if the
9378 GPR being loaded is the same as the GPR used to hold the external address.
9379
9380 (set (reg:DI <base-reg>)
9381 (unspec:DI [(symbol_ref <symbol>)
9382 (const_int <marker>)]
9383 UNSPEC_PCREL_OPT_LD_SAME_REG))
9384
9385 UNSPEC_PCREL_OPT_ST_ADDR is used by the power10 PCREL_OPT pass. This
9386 UNSPEC include the external SYMBOL_REF along with the value being loaded.
9387 We return the original SYMBOL_REF.
9388
9389 (parallel [(set (reg:DI <base-reg>)
9390 (unspec:DI [(symbol_ref <symbol>)
9391 (const_int <marker>)]
9392 UNSPEC_PCREL_OPT_ST_ADDR))
9393 (use (reg <store-reg>))]) */
9394
9395 if (GET_CODE (orig_x) == UNSPEC)
9396 switch (XINT (orig_x, 1))
9397 {
9398 case UNSPEC_FUSION_GPR:
9399 case UNSPEC_PCREL_OPT_LD_ADDR:
9400 case UNSPEC_PCREL_OPT_LD_SAME_REG:
9401 case UNSPEC_PCREL_OPT_ST_ADDR:
9402 orig_x = XVECEXP (orig_x, 0, 0);
9403 break;
9404
9405 default:
9406 break;
9407 }
9408
9409 orig_x = delegitimize_mem_from_attrs (orig_x);
9410
9411 x = orig_x;
9412 if (MEM_P (x))
9413 x = XEXP (x, 0);
9414
9415 y = x;
9416 if (TARGET_CMODEL != CMODEL_SMALL && GET_CODE (y) == LO_SUM)
9417 y = XEXP (y, 1);
9418
9419 offset = NULL_RTX;
9420 if (GET_CODE (y) == PLUS
9421 && GET_MODE (y) == Pmode
9422 && CONST_INT_P (XEXP (y, 1)))
9423 {
9424 offset = XEXP (y, 1);
9425 y = XEXP (y, 0);
9426 }
9427
9428 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_TOCREL)
9429 {
9430 y = XVECEXP (y, 0, 0);
9431
9432 #ifdef HAVE_AS_TLS
9433 /* Do not associate thread-local symbols with the original
9434 constant pool symbol. */
9435 if (TARGET_XCOFF
9436 && SYMBOL_REF_P (y)
9437 && CONSTANT_POOL_ADDRESS_P (y)
9438 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
9439 return orig_x;
9440 #endif
9441
9442 if (offset != NULL_RTX)
9443 y = gen_rtx_PLUS (Pmode, y, offset);
9444 if (!MEM_P (orig_x))
9445 return y;
9446 else
9447 return replace_equiv_address_nv (orig_x, y);
9448 }
9449
9450 if (TARGET_MACHO
9451 && GET_CODE (orig_x) == LO_SUM
9452 && GET_CODE (XEXP (orig_x, 1)) == CONST)
9453 {
9454 y = XEXP (XEXP (orig_x, 1), 0);
9455 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
9456 return XVECEXP (y, 0, 0);
9457 }
9458
9459 return orig_x;
9460 }
9461
9462 /* Return true if X shouldn't be emitted into the debug info.
9463 The linker doesn't like .toc section references from
9464 .debug_* sections, so reject .toc section symbols. */
9465
9466 static bool
9467 rs6000_const_not_ok_for_debug_p (rtx x)
9468 {
9469 if (GET_CODE (x) == UNSPEC)
9470 return true;
9471 if (SYMBOL_REF_P (x)
9472 && CONSTANT_POOL_ADDRESS_P (x))
9473 {
9474 rtx c = get_pool_constant (x);
9475 machine_mode cmode = get_pool_mode (x);
9476 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
9477 return true;
9478 }
9479
9480 return false;
9481 }
9482
9483 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
9484
9485 static bool
9486 rs6000_legitimate_combined_insn (rtx_insn *insn)
9487 {
9488 int icode = INSN_CODE (insn);
9489
9490 /* Reject creating doloop insns. Combine should not be allowed
9491 to create these for a number of reasons:
9492 1) In a nested loop, if combine creates one of these in an
9493 outer loop and the register allocator happens to allocate ctr
9494 to the outer loop insn, then the inner loop can't use ctr.
9495 Inner loops ought to be more highly optimized.
9496 2) Combine often wants to create one of these from what was
9497 originally a three insn sequence, first combining the three
9498 insns to two, then to ctrsi/ctrdi. When ctrsi/ctrdi is not
9499 allocated ctr, the splitter takes use back to the three insn
9500 sequence. It's better to stop combine at the two insn
9501 sequence.
9502 3) Faced with not being able to allocate ctr for ctrsi/crtdi
9503 insns, the register allocator sometimes uses floating point
9504 or vector registers for the pseudo. Since ctrsi/ctrdi is a
9505 jump insn and output reloads are not implemented for jumps,
9506 the ctrsi/ctrdi splitters need to handle all possible cases.
9507 That's a pain, and it gets to be seriously difficult when a
9508 splitter that runs after reload needs memory to transfer from
9509 a gpr to fpr. See PR70098 and PR71763 which are not fixed
9510 for the difficult case. It's better to not create problems
9511 in the first place. */
9512 if (icode != CODE_FOR_nothing
9513 && (icode == CODE_FOR_bdz_si
9514 || icode == CODE_FOR_bdz_di
9515 || icode == CODE_FOR_bdnz_si
9516 || icode == CODE_FOR_bdnz_di
9517 || icode == CODE_FOR_bdztf_si
9518 || icode == CODE_FOR_bdztf_di
9519 || icode == CODE_FOR_bdnztf_si
9520 || icode == CODE_FOR_bdnztf_di))
9521 return false;
9522
9523 return true;
9524 }
9525
9526 /* Construct the SYMBOL_REF for the tls_get_addr function. */
9527
9528 static GTY(()) rtx rs6000_tls_symbol;
9529 static rtx
9530 rs6000_tls_get_addr (void)
9531 {
9532 if (!rs6000_tls_symbol)
9533 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
9534
9535 return rs6000_tls_symbol;
9536 }
9537
9538 /* Construct the SYMBOL_REF for TLS GOT references. */
9539
9540 static GTY(()) rtx rs6000_got_symbol;
9541 rtx
9542 rs6000_got_sym (void)
9543 {
9544 if (!rs6000_got_symbol)
9545 {
9546 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
9547 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
9548 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
9549 }
9550
9551 return rs6000_got_symbol;
9552 }
9553
9554 /* AIX Thread-Local Address support. */
9555
9556 static rtx
9557 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
9558 {
9559 rtx sym, mem, tocref, tlsreg, tmpreg, dest;
9560 const char *name;
9561 char *tlsname;
9562
9563 /* Place addr into TOC constant pool. */
9564 sym = force_const_mem (GET_MODE (addr), addr);
9565
9566 /* Output the TOC entry and create the MEM referencing the value. */
9567 if (constant_pool_expr_p (XEXP (sym, 0))
9568 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
9569 {
9570 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
9571 mem = gen_const_mem (Pmode, tocref);
9572 set_mem_alias_set (mem, get_TOC_alias_set ());
9573 }
9574 else
9575 return sym;
9576
9577 /* Use global-dynamic for local-dynamic. */
9578 if (model == TLS_MODEL_GLOBAL_DYNAMIC
9579 || model == TLS_MODEL_LOCAL_DYNAMIC)
9580 {
9581 /* Create new TOC reference for @m symbol. */
9582 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
9583 tlsname = XALLOCAVEC (char, strlen (name) + 1);
9584 strcpy (tlsname, "*LCM");
9585 strcat (tlsname, name + 3);
9586 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
9587 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
9588 tocref = create_TOC_reference (modaddr, NULL_RTX);
9589 rtx modmem = gen_const_mem (Pmode, tocref);
9590 set_mem_alias_set (modmem, get_TOC_alias_set ());
9591
9592 rtx modreg = gen_reg_rtx (Pmode);
9593 emit_insn (gen_rtx_SET (modreg, modmem));
9594
9595 tmpreg = gen_reg_rtx (Pmode);
9596 emit_insn (gen_rtx_SET (tmpreg, mem));
9597
9598 dest = gen_reg_rtx (Pmode);
9599 if (TARGET_32BIT)
9600 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
9601 else
9602 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
9603 return dest;
9604 }
9605 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
9606 else if (TARGET_32BIT)
9607 {
9608 tlsreg = gen_reg_rtx (SImode);
9609 emit_insn (gen_tls_get_tpointer (tlsreg));
9610 }
9611 else
9612 {
9613 tlsreg = gen_rtx_REG (DImode, 13);
9614 xcoff_tls_exec_model_detected = true;
9615 }
9616
9617 /* Load the TOC value into temporary register. */
9618 tmpreg = gen_reg_rtx (Pmode);
9619 emit_insn (gen_rtx_SET (tmpreg, mem));
9620 set_unique_reg_note (get_last_insn (), REG_EQUAL,
9621 gen_rtx_MINUS (Pmode, addr, tlsreg));
9622
9623 /* Add TOC symbol value to TLS pointer. */
9624 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
9625
9626 return dest;
9627 }
9628
9629 /* Passes the tls arg value for global dynamic and local dynamic
9630 emit_library_call_value in rs6000_legitimize_tls_address to
9631 rs6000_call_aix and rs6000_call_sysv. This is used to emit the
9632 marker relocs put on __tls_get_addr calls. */
9633 static rtx global_tlsarg;
9634
9635 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
9636 this (thread-local) address. */
9637
9638 static rtx
9639 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
9640 {
9641 rtx dest, insn;
9642
9643 if (TARGET_XCOFF)
9644 return rs6000_legitimize_tls_address_aix (addr, model);
9645
9646 dest = gen_reg_rtx (Pmode);
9647 if (model == TLS_MODEL_LOCAL_EXEC
9648 && (rs6000_tls_size == 16 || rs6000_pcrel_p ()))
9649 {
9650 rtx tlsreg;
9651
9652 if (TARGET_64BIT)
9653 {
9654 tlsreg = gen_rtx_REG (Pmode, 13);
9655 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
9656 }
9657 else
9658 {
9659 tlsreg = gen_rtx_REG (Pmode, 2);
9660 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
9661 }
9662 emit_insn (insn);
9663 }
9664 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
9665 {
9666 rtx tlsreg, tmp;
9667
9668 tmp = gen_reg_rtx (Pmode);
9669 if (TARGET_64BIT)
9670 {
9671 tlsreg = gen_rtx_REG (Pmode, 13);
9672 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
9673 }
9674 else
9675 {
9676 tlsreg = gen_rtx_REG (Pmode, 2);
9677 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
9678 }
9679 emit_insn (insn);
9680 if (TARGET_64BIT)
9681 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
9682 else
9683 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
9684 emit_insn (insn);
9685 }
9686 else
9687 {
9688 rtx got, tga, tmp1, tmp2;
9689
9690 /* We currently use relocations like @got@tlsgd for tls, which
9691 means the linker will handle allocation of tls entries, placing
9692 them in the .got section. So use a pointer to the .got section,
9693 not one to secondary TOC sections used by 64-bit -mminimal-toc,
9694 or to secondary GOT sections used by 32-bit -fPIC. */
9695 if (rs6000_pcrel_p ())
9696 got = const0_rtx;
9697 else if (TARGET_64BIT)
9698 got = gen_rtx_REG (Pmode, 2);
9699 else
9700 {
9701 if (flag_pic == 1)
9702 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
9703 else
9704 {
9705 rtx gsym = rs6000_got_sym ();
9706 got = gen_reg_rtx (Pmode);
9707 if (flag_pic == 0)
9708 rs6000_emit_move (got, gsym, Pmode);
9709 else
9710 {
9711 rtx mem, lab;
9712
9713 tmp1 = gen_reg_rtx (Pmode);
9714 tmp2 = gen_reg_rtx (Pmode);
9715 mem = gen_const_mem (Pmode, tmp1);
9716 lab = gen_label_rtx ();
9717 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
9718 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
9719 if (TARGET_LINK_STACK)
9720 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
9721 emit_move_insn (tmp2, mem);
9722 rtx_insn *last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
9723 set_unique_reg_note (last, REG_EQUAL, gsym);
9724 }
9725 }
9726 }
9727
9728 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
9729 {
9730 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addr, got),
9731 UNSPEC_TLSGD);
9732 tga = rs6000_tls_get_addr ();
9733 rtx argreg = gen_rtx_REG (Pmode, 3);
9734 emit_insn (gen_rtx_SET (argreg, arg));
9735 global_tlsarg = arg;
9736 emit_library_call_value (tga, dest, LCT_CONST, Pmode, argreg, Pmode);
9737 global_tlsarg = NULL_RTX;
9738
9739 /* Make a note so that the result of this call can be CSEd. */
9740 rtvec vec = gen_rtvec (1, copy_rtx (arg));
9741 rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
9742 set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
9743 }
9744 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
9745 {
9746 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, got), UNSPEC_TLSLD);
9747 tga = rs6000_tls_get_addr ();
9748 tmp1 = gen_reg_rtx (Pmode);
9749 rtx argreg = gen_rtx_REG (Pmode, 3);
9750 emit_insn (gen_rtx_SET (argreg, arg));
9751 global_tlsarg = arg;
9752 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode, argreg, Pmode);
9753 global_tlsarg = NULL_RTX;
9754
9755 /* Make a note so that the result of this call can be CSEd. */
9756 rtvec vec = gen_rtvec (1, copy_rtx (arg));
9757 rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
9758 set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
9759
9760 if (rs6000_tls_size == 16 || rs6000_pcrel_p ())
9761 {
9762 if (TARGET_64BIT)
9763 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
9764 else
9765 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
9766 }
9767 else if (rs6000_tls_size == 32)
9768 {
9769 tmp2 = gen_reg_rtx (Pmode);
9770 if (TARGET_64BIT)
9771 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
9772 else
9773 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
9774 emit_insn (insn);
9775 if (TARGET_64BIT)
9776 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
9777 else
9778 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
9779 }
9780 else
9781 {
9782 tmp2 = gen_reg_rtx (Pmode);
9783 if (TARGET_64BIT)
9784 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
9785 else
9786 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
9787 emit_insn (insn);
9788 insn = gen_rtx_SET (dest, gen_rtx_PLUS (Pmode, tmp2, tmp1));
9789 }
9790 emit_insn (insn);
9791 }
9792 else
9793 {
9794 /* IE, or 64-bit offset LE. */
9795 tmp2 = gen_reg_rtx (Pmode);
9796 if (TARGET_64BIT)
9797 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
9798 else
9799 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
9800 emit_insn (insn);
9801 if (rs6000_pcrel_p ())
9802 {
9803 if (TARGET_64BIT)
9804 insn = gen_tls_tls_pcrel_64 (dest, tmp2, addr);
9805 else
9806 insn = gen_tls_tls_pcrel_32 (dest, tmp2, addr);
9807 }
9808 else if (TARGET_64BIT)
9809 insn = gen_tls_tls_64 (dest, tmp2, addr);
9810 else
9811 insn = gen_tls_tls_32 (dest, tmp2, addr);
9812 emit_insn (insn);
9813 }
9814 }
9815
9816 return dest;
9817 }
9818
9819 /* Only create the global variable for the stack protect guard if we are using
9820 the global flavor of that guard. */
9821 static tree
9822 rs6000_init_stack_protect_guard (void)
9823 {
9824 if (rs6000_stack_protector_guard == SSP_GLOBAL)
9825 return default_stack_protect_guard ();
9826
9827 return NULL_TREE;
9828 }
9829
9830 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9831
9832 static bool
9833 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9834 {
9835 /* If GET_CODE (x) is HIGH, the 'X' represets the high part of a symbol_ref.
9836 It can not be put into a constant pool. e.g.
9837 (high:DI (unspec:DI [(symbol_ref/u:DI ("*.LC0")..)
9838 (high:DI (symbol_ref:DI ("var")..)). */
9839 if (GET_CODE (x) == HIGH)
9840 return true;
9841
9842 /* A TLS symbol in the TOC cannot contain a sum. */
9843 if (GET_CODE (x) == CONST
9844 && GET_CODE (XEXP (x, 0)) == PLUS
9845 && SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
9846 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
9847 return true;
9848
9849 /* Allow AIX TOC TLS symbols in the constant pool,
9850 but not ELF TLS symbols. */
9851 return TARGET_ELF && tls_referenced_p (x);
9852 }
9853
9854 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
9855 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
9856 can be addressed relative to the toc pointer. */
9857
9858 static bool
9859 use_toc_relative_ref (rtx sym, machine_mode mode)
9860 {
9861 return ((constant_pool_expr_p (sym)
9862 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
9863 get_pool_mode (sym)))
9864 || (TARGET_CMODEL == CMODEL_MEDIUM
9865 && SYMBOL_REF_LOCAL_P (sym)
9866 && GET_MODE_SIZE (mode) <= POWERPC64_TOC_POINTER_ALIGNMENT));
9867 }
9868
9869 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
9870 that is a valid memory address for an instruction.
9871 The MODE argument is the machine mode for the MEM expression
9872 that wants to use this address.
9873
9874 On the RS/6000, there are four valid address: a SYMBOL_REF that
9875 refers to a constant pool entry of an address (or the sum of it
9876 plus a constant), a short (16-bit signed) constant plus a register,
9877 the sum of two registers, or a register indirect, possibly with an
9878 auto-increment. For DFmode, DDmode and DImode with a constant plus
9879 register, we must ensure that both words are addressable or PowerPC64
9880 with offset word aligned.
9881
9882 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
9883 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
9884 because adjacent memory cells are accessed by adding word-sized offsets
9885 during assembly output. */
9886 static bool
9887 rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict,
9888 code_helper ch = ERROR_MARK)
9889 {
9890 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
9891 bool quad_offset_p = mode_supports_dq_form (mode);
9892
9893 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
9894 return 0;
9895
9896 /* lxvl and stxvl doesn't support any addressing modes with PLUS. */
9897 if (ch.is_internal_fn ()
9898 && (ch == IFN_LEN_LOAD || ch == IFN_LEN_STORE)
9899 && GET_CODE (x) == PLUS)
9900 return 0;
9901
9902 /* Handle unaligned altivec lvx/stvx type addresses. */
9903 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
9904 && GET_CODE (x) == AND
9905 && CONST_INT_P (XEXP (x, 1))
9906 && INTVAL (XEXP (x, 1)) == -16)
9907 {
9908 x = XEXP (x, 0);
9909 return (legitimate_indirect_address_p (x, reg_ok_strict)
9910 || legitimate_indexed_address_p (x, reg_ok_strict)
9911 || virtual_stack_registers_memory_p (x));
9912 }
9913
9914 if (legitimate_indirect_address_p (x, reg_ok_strict))
9915 return 1;
9916 if (TARGET_UPDATE
9917 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
9918 && mode_supports_pre_incdec_p (mode)
9919 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
9920 return 1;
9921
9922 /* Handle prefixed addresses (PC-relative or 34-bit offset). */
9923 if (address_is_prefixed (x, mode, NON_PREFIXED_DEFAULT))
9924 return 1;
9925
9926 /* Handle restricted vector d-form offsets in ISA 3.0. */
9927 if (quad_offset_p)
9928 {
9929 if (quad_address_p (x, mode, reg_ok_strict))
9930 return 1;
9931 }
9932 else if (virtual_stack_registers_memory_p (x))
9933 return 1;
9934
9935 else if (reg_offset_p)
9936 {
9937 if (legitimate_small_data_p (mode, x))
9938 return 1;
9939 if (legitimate_constant_pool_address_p (x, mode,
9940 reg_ok_strict || lra_in_progress))
9941 return 1;
9942 }
9943
9944 /* For TImode, if we have TImode in VSX registers, only allow register
9945 indirect addresses. This will allow the values to go in either GPRs
9946 or VSX registers without reloading. The vector types would tend to
9947 go into VSX registers, so we allow REG+REG, while TImode seems
9948 somewhat split, in that some uses are GPR based, and some VSX based. */
9949 /* FIXME: We could loosen this by changing the following to
9950 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX)
9951 but currently we cannot allow REG+REG addressing for TImode. See
9952 PR72827 for complete details on how this ends up hoodwinking DSE. */
9953 if (mode == TImode && TARGET_VSX)
9954 return 0;
9955 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
9956 if (! reg_ok_strict
9957 && reg_offset_p
9958 && GET_CODE (x) == PLUS
9959 && REG_P (XEXP (x, 0))
9960 && (XEXP (x, 0) == virtual_stack_vars_rtx
9961 || XEXP (x, 0) == arg_pointer_rtx)
9962 && CONST_INT_P (XEXP (x, 1)))
9963 return 1;
9964 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
9965 return 1;
9966 if (!FLOAT128_2REG_P (mode)
9967 && (TARGET_HARD_FLOAT
9968 || TARGET_POWERPC64
9969 || (mode != DFmode && mode != DDmode))
9970 && (TARGET_POWERPC64 || mode != DImode)
9971 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
9972 && mode != PTImode
9973 && !avoiding_indexed_address_p (mode)
9974 && legitimate_indexed_address_p (x, reg_ok_strict))
9975 return 1;
9976 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
9977 && mode_supports_pre_modify_p (mode)
9978 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
9979 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
9980 reg_ok_strict, false)
9981 || (!avoiding_indexed_address_p (mode)
9982 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
9983 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
9984 {
9985 /* There is no prefixed version of the load/store with update. */
9986 rtx addr = XEXP (x, 1);
9987 return !address_is_prefixed (addr, mode, NON_PREFIXED_DEFAULT);
9988 }
9989 if (reg_offset_p && !quad_offset_p
9990 && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
9991 return 1;
9992 return 0;
9993 }
9994
9995 /* Debug version of rs6000_legitimate_address_p. */
9996 static bool
9997 rs6000_debug_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict,
9998 code_helper ch)
9999 {
10000 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict, ch);
10001 fprintf (stderr,
10002 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
10003 "strict = %d, reload = %s, code = %s\n",
10004 ret ? "true" : "false",
10005 GET_MODE_NAME (mode),
10006 reg_ok_strict,
10007 (reload_completed ? "after" : "before"),
10008 GET_RTX_NAME (GET_CODE (x)));
10009 debug_rtx (x);
10010
10011 return ret;
10012 }
10013
10014 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
10015
10016 static bool
10017 rs6000_mode_dependent_address_p (const_rtx addr,
10018 addr_space_t as ATTRIBUTE_UNUSED)
10019 {
10020 return rs6000_mode_dependent_address_ptr (addr);
10021 }
10022
10023 /* Go to LABEL if ADDR (a legitimate address expression)
10024 has an effect that depends on the machine mode it is used for.
10025
10026 On the RS/6000 this is true of all integral offsets (since AltiVec
10027 and VSX modes don't allow them) or is a pre-increment or decrement.
10028
10029 ??? Except that due to conceptual problems in offsettable_address_p
10030 we can't really report the problems of integral offsets. So leave
10031 this assuming that the adjustable offset must be valid for the
10032 sub-words of a TFmode operand, which is what we had before. */
10033
10034 static bool
10035 rs6000_mode_dependent_address (const_rtx addr)
10036 {
10037 switch (GET_CODE (addr))
10038 {
10039 case PLUS:
10040 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
10041 is considered a legitimate address before reload, so there
10042 are no offset restrictions in that case. Note that this
10043 condition is safe in strict mode because any address involving
10044 virtual_stack_vars_rtx or arg_pointer_rtx would already have
10045 been rejected as illegitimate. */
10046 if (XEXP (addr, 0) != virtual_stack_vars_rtx
10047 && XEXP (addr, 0) != arg_pointer_rtx
10048 && CONST_INT_P (XEXP (addr, 1)))
10049 {
10050 HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
10051 HOST_WIDE_INT extra = TARGET_POWERPC64 ? 8 : 12;
10052 if (TARGET_PREFIXED)
10053 return !SIGNED_34BIT_OFFSET_EXTRA_P (val, extra);
10054 else
10055 return !SIGNED_16BIT_OFFSET_EXTRA_P (val, extra);
10056 }
10057 break;
10058
10059 case LO_SUM:
10060 /* Anything in the constant pool is sufficiently aligned that
10061 all bytes have the same high part address. */
10062 return !legitimate_constant_pool_address_p (addr, QImode, false);
10063
10064 /* Auto-increment cases are now treated generically in recog.cc. */
10065 case PRE_MODIFY:
10066 return TARGET_UPDATE;
10067
10068 /* AND is only allowed in Altivec loads. */
10069 case AND:
10070 return true;
10071
10072 default:
10073 break;
10074 }
10075
10076 return false;
10077 }
10078
10079 /* Debug version of rs6000_mode_dependent_address. */
10080 static bool
10081 rs6000_debug_mode_dependent_address (const_rtx addr)
10082 {
10083 bool ret = rs6000_mode_dependent_address (addr);
10084
10085 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
10086 ret ? "true" : "false");
10087 debug_rtx (addr);
10088
10089 return ret;
10090 }
10091
10092 /* Implement FIND_BASE_TERM. */
10093
10094 rtx
10095 rs6000_find_base_term (rtx op)
10096 {
10097 rtx base;
10098
10099 base = op;
10100 if (GET_CODE (base) == CONST)
10101 base = XEXP (base, 0);
10102 if (GET_CODE (base) == PLUS)
10103 base = XEXP (base, 0);
10104 if (GET_CODE (base) == UNSPEC)
10105 switch (XINT (base, 1))
10106 {
10107 case UNSPEC_TOCREL:
10108 case UNSPEC_MACHOPIC_OFFSET:
10109 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
10110 for aliasing purposes. */
10111 return XVECEXP (base, 0, 0);
10112 }
10113
10114 return op;
10115 }
10116
10117 /* More elaborate version of recog's offsettable_memref_p predicate
10118 that works around the ??? note of rs6000_mode_dependent_address.
10119 In particular it accepts
10120
10121 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
10122
10123 in 32-bit mode, that the recog predicate rejects. */
10124
10125 static bool
10126 rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode, bool strict)
10127 {
10128 bool worst_case;
10129
10130 if (!MEM_P (op))
10131 return false;
10132
10133 /* First mimic offsettable_memref_p. */
10134 if (offsettable_address_p (strict, GET_MODE (op), XEXP (op, 0)))
10135 return true;
10136
10137 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
10138 the latter predicate knows nothing about the mode of the memory
10139 reference and, therefore, assumes that it is the largest supported
10140 mode (TFmode). As a consequence, legitimate offsettable memory
10141 references are rejected. rs6000_legitimate_offset_address_p contains
10142 the correct logic for the PLUS case of rs6000_mode_dependent_address,
10143 at least with a little bit of help here given that we know the
10144 actual registers used. */
10145 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
10146 || GET_MODE_SIZE (reg_mode) == 4);
10147 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
10148 strict, worst_case);
10149 }
10150
10151 /* Determine the reassociation width to be used in reassociate_bb.
10152 This takes into account how many parallel operations we
10153 can actually do of a given type, and also the latency.
10154 P8:
10155 int add/sub 6/cycle
10156 mul 2/cycle
10157 vect add/sub/mul 2/cycle
10158 fp add/sub/mul 2/cycle
10159 dfp 1/cycle
10160 */
10161
10162 static int
10163 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
10164 machine_mode mode)
10165 {
10166 switch (rs6000_tune)
10167 {
10168 case PROCESSOR_POWER8:
10169 case PROCESSOR_POWER9:
10170 case PROCESSOR_POWER10:
10171 if (DECIMAL_FLOAT_MODE_P (mode))
10172 return 1;
10173 if (VECTOR_MODE_P (mode))
10174 return 4;
10175 if (INTEGRAL_MODE_P (mode))
10176 return 1;
10177 if (FLOAT_MODE_P (mode))
10178 return 4;
10179 break;
10180 default:
10181 break;
10182 }
10183 return 1;
10184 }
10185
10186 /* Change register usage conditional on target flags. */
10187 static void
10188 rs6000_conditional_register_usage (void)
10189 {
10190 int i;
10191
10192 if (TARGET_DEBUG_TARGET)
10193 fprintf (stderr, "rs6000_conditional_register_usage called\n");
10194
10195 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
10196 if (TARGET_64BIT)
10197 fixed_regs[13] = call_used_regs[13] = 1;
10198
10199 /* Conditionally disable FPRs. */
10200 if (TARGET_SOFT_FLOAT)
10201 for (i = 32; i < 64; i++)
10202 fixed_regs[i] = call_used_regs[i] = 1;
10203
10204 /* The TOC register is not killed across calls in a way that is
10205 visible to the compiler. */
10206 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
10207 call_used_regs[2] = 0;
10208
10209 if (DEFAULT_ABI == ABI_V4 && flag_pic == 2)
10210 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10211
10212 if (DEFAULT_ABI == ABI_V4 && flag_pic == 1)
10213 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10214 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10215
10216 if (DEFAULT_ABI == ABI_DARWIN && flag_pic)
10217 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10218 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10219
10220 if (TARGET_TOC && TARGET_MINIMAL_TOC)
10221 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10222
10223 if (!TARGET_ALTIVEC && !TARGET_VSX)
10224 {
10225 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
10226 fixed_regs[i] = call_used_regs[i] = 1;
10227 call_used_regs[VRSAVE_REGNO] = 1;
10228 }
10229
10230 if (TARGET_ALTIVEC || TARGET_VSX)
10231 global_regs[VSCR_REGNO] = 1;
10232
10233 if (TARGET_ALTIVEC_ABI)
10234 {
10235 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
10236 call_used_regs[i] = 1;
10237
10238 /* AIX reserves VR20:31 in non-extended ABI mode. */
10239 if (TARGET_XCOFF && !rs6000_aix_extabi)
10240 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
10241 fixed_regs[i] = call_used_regs[i] = 1;
10242 }
10243 }
10244
10245 \f
10246 /* Output insns to set DEST equal to the constant SOURCE as a series of
10247 lis, ori and shl instructions and return TRUE. */
10248
10249 bool
10250 rs6000_emit_set_const (rtx dest, rtx source)
10251 {
10252 machine_mode mode = GET_MODE (dest);
10253 rtx temp, set;
10254 rtx_insn *insn;
10255 HOST_WIDE_INT c;
10256
10257 gcc_checking_assert (CONST_INT_P (source));
10258 c = INTVAL (source);
10259 switch (mode)
10260 {
10261 case E_QImode:
10262 case E_HImode:
10263 emit_insn (gen_rtx_SET (dest, source));
10264 return true;
10265
10266 case E_SImode:
10267 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
10268
10269 emit_insn (gen_rtx_SET (temp, GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
10270 emit_insn (gen_rtx_SET (dest,
10271 gen_rtx_IOR (SImode, temp,
10272 GEN_INT (c & 0xffff))));
10273 break;
10274
10275 case E_DImode:
10276 if (!TARGET_POWERPC64)
10277 {
10278 rtx hi, lo;
10279
10280 hi = operand_subword_force (dest, WORDS_BIG_ENDIAN == 0, DImode);
10281 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0, DImode);
10282 emit_move_insn (hi, GEN_INT (c >> 32));
10283 c = sext_hwi (c, 32);
10284 emit_move_insn (lo, GEN_INT (c));
10285 }
10286 else
10287 rs6000_emit_set_long_const (dest, c);
10288 break;
10289
10290 default:
10291 gcc_unreachable ();
10292 }
10293
10294 insn = get_last_insn ();
10295 set = single_set (insn);
10296 if (! CONSTANT_P (SET_SRC (set)))
10297 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
10298
10299 return true;
10300 }
10301
10302 /* Check if value C can be built by 2 instructions: one is 'li', another is
10303 'rotldi'.
10304
10305 If so, *SHIFT is set to the shift operand of rotldi(rldicl), and *MASK
10306 is set to the mask operand of rotldi(rldicl), and return true.
10307 Return false otherwise. */
10308
10309 static bool
10310 can_be_built_by_li_and_rotldi (HOST_WIDE_INT c, int *shift,
10311 HOST_WIDE_INT *mask)
10312 {
10313 /* If C or ~C contains at least 49 successive zeros, then C can be rotated
10314 to/from a positive or negative value that 'li' is able to load. */
10315 int n;
10316 if (can_be_rotated_to_lowbits (c, 15, &n)
10317 || can_be_rotated_to_lowbits (~c, 15, &n))
10318 {
10319 *mask = HOST_WIDE_INT_M1;
10320 *shift = HOST_BITS_PER_WIDE_INT - n;
10321 return true;
10322 }
10323
10324 return false;
10325 }
10326
10327 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
10328 Output insns to set DEST equal to the constant C as a series of
10329 lis, ori and shl instructions. */
10330
10331 static void
10332 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
10333 {
10334 rtx temp;
10335 int shift;
10336 HOST_WIDE_INT mask;
10337 HOST_WIDE_INT ud1, ud2, ud3, ud4;
10338
10339 ud1 = c & 0xffff;
10340 ud2 = (c >> 16) & 0xffff;
10341 ud3 = (c >> 32) & 0xffff;
10342 ud4 = (c >> 48) & 0xffff;
10343
10344 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
10345 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
10346 emit_move_insn (dest, GEN_INT (sext_hwi (ud1, 16)));
10347
10348 else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
10349 || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
10350 {
10351 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10352
10353 emit_move_insn (ud1 != 0 ? temp : dest,
10354 GEN_INT (sext_hwi (ud2 << 16, 32)));
10355 if (ud1 != 0)
10356 emit_move_insn (dest, gen_rtx_IOR (DImode, temp, GEN_INT (ud1)));
10357 }
10358 else if (ud4 == 0xffff && ud3 == 0xffff && !(ud2 & 0x8000) && ud1 == 0)
10359 {
10360 /* lis; xoris */
10361 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10362 emit_move_insn (temp, GEN_INT (sext_hwi ((ud2 | 0x8000) << 16, 32)));
10363 emit_move_insn (dest, gen_rtx_XOR (DImode, temp, GEN_INT (0x80000000)));
10364 }
10365 else if (ud4 == 0xffff && ud3 == 0xffff && (ud1 & 0x8000))
10366 {
10367 /* li; xoris */
10368 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10369 emit_move_insn (temp, GEN_INT (sext_hwi (ud1, 16)));
10370 emit_move_insn (dest, gen_rtx_XOR (DImode, temp,
10371 GEN_INT ((ud2 ^ 0xffff) << 16)));
10372 }
10373 else if (can_be_built_by_li_and_rotldi (c, &shift, &mask))
10374 {
10375 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10376 unsigned HOST_WIDE_INT imm = (c | ~mask);
10377 imm = (imm >> shift) | (imm << (HOST_BITS_PER_WIDE_INT - shift));
10378
10379 emit_move_insn (temp, GEN_INT (imm));
10380 if (shift != 0)
10381 temp = gen_rtx_ROTATE (DImode, temp, GEN_INT (shift));
10382 emit_move_insn (dest, temp);
10383 }
10384 else if (ud3 == 0 && ud4 == 0)
10385 {
10386 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10387
10388 gcc_assert (ud2 & 0x8000);
10389
10390 if (ud1 == 0)
10391 {
10392 /* lis; rldicl */
10393 emit_move_insn (temp, GEN_INT (sext_hwi (ud2 << 16, 32)));
10394 emit_move_insn (dest,
10395 gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff)));
10396 }
10397 else if (!(ud1 & 0x8000))
10398 {
10399 /* li; oris */
10400 emit_move_insn (temp, GEN_INT (ud1));
10401 emit_move_insn (dest,
10402 gen_rtx_IOR (DImode, temp, GEN_INT (ud2 << 16)));
10403 }
10404 else
10405 {
10406 /* lis; ori; rldicl */
10407 emit_move_insn (temp, GEN_INT (sext_hwi (ud2 << 16, 32)));
10408 emit_move_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud1)));
10409 emit_move_insn (dest,
10410 gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff)));
10411 }
10412 }
10413 else if (ud1 == ud3 && ud2 == ud4)
10414 {
10415 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10416 HOST_WIDE_INT num = (ud2 << 16) | ud1;
10417 rs6000_emit_set_long_const (temp, sext_hwi (num, 32));
10418 rtx one = gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff));
10419 rtx two = gen_rtx_ASHIFT (DImode, temp, GEN_INT (32));
10420 emit_move_insn (dest, gen_rtx_IOR (DImode, one, two));
10421 }
10422 else if ((ud4 == 0xffff && (ud3 & 0x8000))
10423 || (ud4 == 0 && ! (ud3 & 0x8000)))
10424 {
10425 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
10426
10427 emit_move_insn (temp, GEN_INT (sext_hwi (ud3 << 16, 32)));
10428 if (ud2 != 0)
10429 emit_move_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud2)));
10430 emit_move_insn (ud1 != 0 ? temp : dest,
10431 gen_rtx_ASHIFT (DImode, temp, GEN_INT (16)));
10432 if (ud1 != 0)
10433 emit_move_insn (dest, gen_rtx_IOR (DImode, temp, GEN_INT (ud1)));
10434 }
10435 else if (TARGET_PREFIXED)
10436 {
10437 if (can_create_pseudo_p ())
10438 {
10439 /* pli A,L + pli B,H + rldimi A,B,32,0. */
10440 temp = gen_reg_rtx (DImode);
10441 rtx temp1 = gen_reg_rtx (DImode);
10442 emit_move_insn (temp, GEN_INT ((ud4 << 16) | ud3));
10443 emit_move_insn (temp1, GEN_INT ((ud2 << 16) | ud1));
10444
10445 emit_insn (gen_rotldi3_insert_3 (dest, temp, GEN_INT (32), temp1,
10446 GEN_INT (0xffffffff)));
10447 }
10448 else
10449 {
10450 /* pli A,H + sldi A,32 + paddi A,A,L. */
10451 emit_move_insn (dest, GEN_INT ((ud4 << 16) | ud3));
10452
10453 emit_move_insn (dest, gen_rtx_ASHIFT (DImode, dest, GEN_INT (32)));
10454
10455 bool can_use_paddi = REGNO (dest) != FIRST_GPR_REGNO;
10456
10457 /* Use paddi for the low 32 bits. */
10458 if (ud2 != 0 && ud1 != 0 && can_use_paddi)
10459 emit_move_insn (dest, gen_rtx_PLUS (DImode, dest,
10460 GEN_INT ((ud2 << 16) | ud1)));
10461
10462 /* Use oris, ori for low 32 bits. */
10463 if (ud2 != 0 && (ud1 == 0 || !can_use_paddi))
10464 emit_move_insn (dest,
10465 gen_rtx_IOR (DImode, dest, GEN_INT (ud2 << 16)));
10466 if (ud1 != 0 && (ud2 == 0 || !can_use_paddi))
10467 emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1)));
10468 }
10469 }
10470 else
10471 {
10472 if (can_create_pseudo_p ())
10473 {
10474 /* lis HIGH,UD4 ; ori HIGH,UD3 ;
10475 lis LOW,UD2 ; ori LOW,UD1 ; rldimi LOW,HIGH,32,0. */
10476 rtx high = gen_reg_rtx (DImode);
10477 rtx low = gen_reg_rtx (DImode);
10478 HOST_WIDE_INT num = (ud2 << 16) | ud1;
10479 rs6000_emit_set_long_const (low, sext_hwi (num, 32));
10480 num = (ud4 << 16) | ud3;
10481 rs6000_emit_set_long_const (high, sext_hwi (num, 32));
10482 emit_insn (gen_rotldi3_insert_3 (dest, high, GEN_INT (32), low,
10483 GEN_INT (0xffffffff)));
10484 }
10485 else
10486 {
10487 /* lis DEST,UD4 ; ori DEST,UD3 ; rotl DEST,32 ;
10488 oris DEST,UD2 ; ori DEST,UD1. */
10489 emit_move_insn (dest, GEN_INT (sext_hwi (ud4 << 16, 32)));
10490 if (ud3 != 0)
10491 emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud3)));
10492
10493 emit_move_insn (dest, gen_rtx_ASHIFT (DImode, dest, GEN_INT (32)));
10494 if (ud2 != 0)
10495 emit_move_insn (dest,
10496 gen_rtx_IOR (DImode, dest, GEN_INT (ud2 << 16)));
10497 if (ud1 != 0)
10498 emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1)));
10499 }
10500 }
10501 }
10502
10503 /* Helper for the following. Get rid of [r+r] memory refs
10504 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
10505
10506 static void
10507 rs6000_eliminate_indexed_memrefs (rtx operands[2])
10508 {
10509 if (MEM_P (operands[0])
10510 && !REG_P (XEXP (operands[0], 0))
10511 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
10512 GET_MODE (operands[0]), false))
10513 operands[0]
10514 = replace_equiv_address (operands[0],
10515 copy_addr_to_reg (XEXP (operands[0], 0)));
10516
10517 if (MEM_P (operands[1])
10518 && !REG_P (XEXP (operands[1], 0))
10519 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
10520 GET_MODE (operands[1]), false))
10521 operands[1]
10522 = replace_equiv_address (operands[1],
10523 copy_addr_to_reg (XEXP (operands[1], 0)));
10524 }
10525
10526 /* Generate a vector of constants to permute MODE for a little-endian
10527 storage operation by swapping the two halves of a vector. */
10528 static rtvec
10529 rs6000_const_vec (machine_mode mode)
10530 {
10531 int i, subparts;
10532 rtvec v;
10533
10534 switch (mode)
10535 {
10536 case E_V1TImode:
10537 subparts = 1;
10538 break;
10539 case E_V2DFmode:
10540 case E_V2DImode:
10541 subparts = 2;
10542 break;
10543 case E_V4SFmode:
10544 case E_V4SImode:
10545 subparts = 4;
10546 break;
10547 case E_V8HImode:
10548 subparts = 8;
10549 break;
10550 case E_V16QImode:
10551 subparts = 16;
10552 break;
10553 default:
10554 gcc_unreachable();
10555 }
10556
10557 v = rtvec_alloc (subparts);
10558
10559 for (i = 0; i < subparts / 2; ++i)
10560 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
10561 for (i = subparts / 2; i < subparts; ++i)
10562 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
10563
10564 return v;
10565 }
10566
10567 /* Emit an lxvd2x, stxvd2x, or xxpermdi instruction for a VSX load or
10568 store operation. */
10569 void
10570 rs6000_emit_le_vsx_permute (rtx dest, rtx source, machine_mode mode)
10571 {
10572 gcc_assert (!altivec_indexed_or_indirect_operand (dest, mode));
10573 gcc_assert (!altivec_indexed_or_indirect_operand (source, mode));
10574
10575 /* Scalar permutations are easier to express in integer modes rather than
10576 floating-point modes, so cast them here. We use V1TImode instead
10577 of TImode to ensure that the values don't go through GPRs. */
10578 if (FLOAT128_VECTOR_P (mode))
10579 {
10580 dest = gen_lowpart (V1TImode, dest);
10581 source = gen_lowpart (V1TImode, source);
10582 mode = V1TImode;
10583 }
10584
10585 /* Use ROTATE instead of VEC_SELECT if the mode contains only a single
10586 scalar. */
10587 if (mode == TImode || mode == V1TImode)
10588 emit_insn (gen_rtx_SET (dest, gen_rtx_ROTATE (mode, source,
10589 GEN_INT (64))));
10590 else
10591 {
10592 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
10593 emit_insn (gen_rtx_SET (dest, gen_rtx_VEC_SELECT (mode, source, par)));
10594 }
10595 }
10596
10597 /* Emit a little-endian load from vector memory location SOURCE to VSX
10598 register DEST in mode MODE. The load is done with two permuting
10599 insn's that represent an lxvd2x and xxpermdi. */
10600 void
10601 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
10602 {
10603 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
10604 V1TImode). */
10605 if (mode == TImode || mode == V1TImode)
10606 {
10607 mode = V2DImode;
10608 dest = gen_lowpart (V2DImode, dest);
10609 source = adjust_address (source, V2DImode, 0);
10610 }
10611
10612 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
10613 rs6000_emit_le_vsx_permute (tmp, source, mode);
10614 rs6000_emit_le_vsx_permute (dest, tmp, mode);
10615 }
10616
10617 /* Emit a little-endian store to vector memory location DEST from VSX
10618 register SOURCE in mode MODE. The store is done with two permuting
10619 insn's that represent an xxpermdi and an stxvd2x. */
10620 void
10621 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
10622 {
10623 /* This should never be called after LRA. */
10624 gcc_assert (can_create_pseudo_p ());
10625
10626 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
10627 V1TImode). */
10628 if (mode == TImode || mode == V1TImode)
10629 {
10630 mode = V2DImode;
10631 dest = adjust_address (dest, V2DImode, 0);
10632 source = gen_lowpart (V2DImode, source);
10633 }
10634
10635 rtx tmp = gen_reg_rtx_and_attrs (source);
10636 rs6000_emit_le_vsx_permute (tmp, source, mode);
10637 rs6000_emit_le_vsx_permute (dest, tmp, mode);
10638 }
10639
10640 /* Emit a sequence representing a little-endian VSX load or store,
10641 moving data from SOURCE to DEST in mode MODE. This is done
10642 separately from rs6000_emit_move to ensure it is called only
10643 during expand. LE VSX loads and stores introduced later are
10644 handled with a split. The expand-time RTL generation allows
10645 us to optimize away redundant pairs of register-permutes. */
10646 void
10647 rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
10648 {
10649 gcc_assert (!BYTES_BIG_ENDIAN
10650 && VECTOR_MEM_VSX_P (mode)
10651 && !TARGET_P9_VECTOR
10652 && !gpr_or_gpr_p (dest, source)
10653 && (MEM_P (source) ^ MEM_P (dest)));
10654
10655 if (MEM_P (source))
10656 {
10657 gcc_assert (REG_P (dest) || SUBREG_P (dest));
10658 rs6000_emit_le_vsx_load (dest, source, mode);
10659 }
10660 else
10661 {
10662 if (!REG_P (source))
10663 source = force_reg (mode, source);
10664 rs6000_emit_le_vsx_store (dest, source, mode);
10665 }
10666 }
10667
10668 /* Return whether a SFmode or SImode move can be done without converting one
10669 mode to another. This arrises when we have:
10670
10671 (SUBREG:SF (REG:SI ...))
10672 (SUBREG:SI (REG:SF ...))
10673
10674 and one of the values is in a floating point/vector register, where SFmode
10675 scalars are stored in DFmode format. */
10676
10677 bool
10678 valid_sf_si_move (rtx dest, rtx src, machine_mode mode)
10679 {
10680 if (TARGET_ALLOW_SF_SUBREG)
10681 return true;
10682
10683 if (mode != SFmode && GET_MODE_CLASS (mode) != MODE_INT)
10684 return true;
10685
10686 if (!SUBREG_P (src) || !sf_subreg_operand (src, mode))
10687 return true;
10688
10689 /*. Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))). */
10690 if (SUBREG_P (dest))
10691 {
10692 rtx dest_subreg = SUBREG_REG (dest);
10693 rtx src_subreg = SUBREG_REG (src);
10694 return GET_MODE (dest_subreg) == GET_MODE (src_subreg);
10695 }
10696
10697 return false;
10698 }
10699
10700
10701 /* Helper function to change moves with:
10702
10703 (SUBREG:SF (REG:SI)) and
10704 (SUBREG:SI (REG:SF))
10705
10706 into separate UNSPEC insns. In the PowerPC architecture, scalar SFmode
10707 values are stored as DFmode values in the VSX registers. We need to convert
10708 the bits before we can use a direct move or operate on the bits in the
10709 vector register as an integer type.
10710
10711 Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)). */
10712
10713 static bool
10714 rs6000_emit_move_si_sf_subreg (rtx dest, rtx source, machine_mode mode)
10715 {
10716 if (TARGET_DIRECT_MOVE_64BIT && !reload_completed
10717 && (!SUBREG_P (dest) || !sf_subreg_operand (dest, mode))
10718 && SUBREG_P (source) && sf_subreg_operand (source, mode))
10719 {
10720 rtx inner_source = SUBREG_REG (source);
10721 machine_mode inner_mode = GET_MODE (inner_source);
10722
10723 if (mode == SImode && inner_mode == SFmode)
10724 {
10725 emit_insn (gen_movsi_from_sf (dest, inner_source));
10726 return true;
10727 }
10728
10729 if (mode == SFmode && inner_mode == SImode)
10730 {
10731 emit_insn (gen_movsf_from_si (dest, inner_source));
10732 return true;
10733 }
10734 }
10735
10736 return false;
10737 }
10738
10739 /* Emit a move from SOURCE to DEST in mode MODE. */
10740 void
10741 rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
10742 {
10743 rtx operands[2];
10744 operands[0] = dest;
10745 operands[1] = source;
10746
10747 if (TARGET_DEBUG_ADDR)
10748 {
10749 fprintf (stderr,
10750 "\nrs6000_emit_move: mode = %s, lra_in_progress = %d, "
10751 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
10752 GET_MODE_NAME (mode),
10753 lra_in_progress,
10754 reload_completed,
10755 can_create_pseudo_p ());
10756 debug_rtx (dest);
10757 fprintf (stderr, "source:\n");
10758 debug_rtx (source);
10759 }
10760
10761 /* Check that we get CONST_WIDE_INT only when we should. */
10762 if (CONST_WIDE_INT_P (operands[1])
10763 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
10764 gcc_unreachable ();
10765
10766 #ifdef HAVE_AS_GNU_ATTRIBUTE
10767 /* If we use a long double type, set the flags in .gnu_attribute that say
10768 what the long double type is. This is to allow the linker's warning
10769 message for the wrong long double to be useful, even if the function does
10770 not do a call (for example, doing a 128-bit add on power9 if the long
10771 double type is IEEE 128-bit. Do not set this if __ibm128 or __floa128 are
10772 used if they aren't the default long dobule type. */
10773 if (rs6000_gnu_attr && (HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT))
10774 {
10775 if (TARGET_LONG_DOUBLE_128 && (mode == TFmode || mode == TCmode))
10776 rs6000_passes_float = rs6000_passes_long_double = true;
10777
10778 else if (!TARGET_LONG_DOUBLE_128 && (mode == DFmode || mode == DCmode))
10779 rs6000_passes_float = rs6000_passes_long_double = true;
10780 }
10781 #endif
10782
10783 /* See if we need to special case SImode/SFmode SUBREG moves. */
10784 if ((mode == SImode || mode == SFmode) && SUBREG_P (source)
10785 && rs6000_emit_move_si_sf_subreg (dest, source, mode))
10786 return;
10787
10788 /* Check if GCC is setting up a block move that will end up using FP
10789 registers as temporaries. We must make sure this is acceptable. */
10790 if (MEM_P (operands[0])
10791 && MEM_P (operands[1])
10792 && mode == DImode
10793 && (rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[0]))
10794 || rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[1])))
10795 && ! (rs6000_slow_unaligned_access (SImode,
10796 (MEM_ALIGN (operands[0]) > 32
10797 ? 32 : MEM_ALIGN (operands[0])))
10798 || rs6000_slow_unaligned_access (SImode,
10799 (MEM_ALIGN (operands[1]) > 32
10800 ? 32 : MEM_ALIGN (operands[1]))))
10801 && ! MEM_VOLATILE_P (operands [0])
10802 && ! MEM_VOLATILE_P (operands [1]))
10803 {
10804 emit_move_insn (adjust_address (operands[0], SImode, 0),
10805 adjust_address (operands[1], SImode, 0));
10806 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
10807 adjust_address (copy_rtx (operands[1]), SImode, 4));
10808 return;
10809 }
10810
10811 if (can_create_pseudo_p () && MEM_P (operands[0])
10812 && !gpc_reg_operand (operands[1], mode))
10813 operands[1] = force_reg (mode, operands[1]);
10814
10815 /* Recognize the case where operand[1] is a reference to thread-local
10816 data and load its address to a register. */
10817 if (tls_referenced_p (operands[1]))
10818 {
10819 enum tls_model model;
10820 rtx tmp = operands[1];
10821 rtx addend = NULL;
10822
10823 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
10824 {
10825 addend = XEXP (XEXP (tmp, 0), 1);
10826 tmp = XEXP (XEXP (tmp, 0), 0);
10827 }
10828
10829 gcc_assert (SYMBOL_REF_P (tmp));
10830 model = SYMBOL_REF_TLS_MODEL (tmp);
10831 gcc_assert (model != 0);
10832
10833 tmp = rs6000_legitimize_tls_address (tmp, model);
10834 if (addend)
10835 {
10836 tmp = gen_rtx_PLUS (mode, tmp, addend);
10837 tmp = force_operand (tmp, operands[0]);
10838 }
10839 operands[1] = tmp;
10840 }
10841
10842 /* 128-bit constant floating-point values on Darwin should really be loaded
10843 as two parts. However, this premature splitting is a problem when DFmode
10844 values can go into Altivec registers. */
10845 if (TARGET_MACHO && CONST_DOUBLE_P (operands[1]) && FLOAT128_IBM_P (mode)
10846 && !reg_addr[DFmode].scalar_in_vmx_p)
10847 {
10848 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
10849 simplify_gen_subreg (DFmode, operands[1], mode, 0),
10850 DFmode);
10851 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
10852 GET_MODE_SIZE (DFmode)),
10853 simplify_gen_subreg (DFmode, operands[1], mode,
10854 GET_MODE_SIZE (DFmode)),
10855 DFmode);
10856 return;
10857 }
10858
10859 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
10860 p1:SD) if p1 is not of floating point class and p0 is spilled as
10861 we can have no analogous movsd_store for this. */
10862 if (lra_in_progress && mode == DDmode
10863 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
10864 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
10865 && SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1]))
10866 && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
10867 {
10868 enum reg_class cl;
10869 int regno = REGNO (SUBREG_REG (operands[1]));
10870
10871 if (!HARD_REGISTER_NUM_P (regno))
10872 {
10873 cl = reg_preferred_class (regno);
10874 regno = reg_renumber[regno];
10875 if (regno < 0)
10876 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
10877 }
10878 if (regno >= 0 && ! FP_REGNO_P (regno))
10879 {
10880 mode = SDmode;
10881 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
10882 operands[1] = SUBREG_REG (operands[1]);
10883 }
10884 }
10885 if (lra_in_progress
10886 && mode == SDmode
10887 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
10888 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
10889 && (REG_P (operands[1])
10890 || (SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1])))))
10891 {
10892 int regno = reg_or_subregno (operands[1]);
10893 enum reg_class cl;
10894
10895 if (!HARD_REGISTER_NUM_P (regno))
10896 {
10897 cl = reg_preferred_class (regno);
10898 gcc_assert (cl != NO_REGS);
10899 regno = reg_renumber[regno];
10900 if (regno < 0)
10901 regno = ira_class_hard_regs[cl][0];
10902 }
10903 if (FP_REGNO_P (regno))
10904 {
10905 if (GET_MODE (operands[0]) != DDmode)
10906 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
10907 emit_insn (gen_movsd_store (operands[0], operands[1]));
10908 }
10909 else if (INT_REGNO_P (regno))
10910 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
10911 else
10912 gcc_unreachable();
10913 return;
10914 }
10915 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
10916 p:DD)) if p0 is not of floating point class and p1 is spilled as
10917 we can have no analogous movsd_load for this. */
10918 if (lra_in_progress && mode == DDmode
10919 && SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))
10920 && GET_MODE (SUBREG_REG (operands[0])) == SDmode
10921 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
10922 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
10923 {
10924 enum reg_class cl;
10925 int regno = REGNO (SUBREG_REG (operands[0]));
10926
10927 if (!HARD_REGISTER_NUM_P (regno))
10928 {
10929 cl = reg_preferred_class (regno);
10930 regno = reg_renumber[regno];
10931 if (regno < 0)
10932 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
10933 }
10934 if (regno >= 0 && ! FP_REGNO_P (regno))
10935 {
10936 mode = SDmode;
10937 operands[0] = SUBREG_REG (operands[0]);
10938 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
10939 }
10940 }
10941 if (lra_in_progress
10942 && mode == SDmode
10943 && (REG_P (operands[0])
10944 || (SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))))
10945 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
10946 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
10947 {
10948 int regno = reg_or_subregno (operands[0]);
10949 enum reg_class cl;
10950
10951 if (!HARD_REGISTER_NUM_P (regno))
10952 {
10953 cl = reg_preferred_class (regno);
10954 gcc_assert (cl != NO_REGS);
10955 regno = reg_renumber[regno];
10956 if (regno < 0)
10957 regno = ira_class_hard_regs[cl][0];
10958 }
10959 if (FP_REGNO_P (regno))
10960 {
10961 if (GET_MODE (operands[1]) != DDmode)
10962 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
10963 emit_insn (gen_movsd_load (operands[0], operands[1]));
10964 }
10965 else if (INT_REGNO_P (regno))
10966 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
10967 else
10968 gcc_unreachable();
10969 return;
10970 }
10971
10972 /* FIXME: In the long term, this switch statement should go away
10973 and be replaced by a sequence of tests based on things like
10974 mode == Pmode. */
10975 switch (mode)
10976 {
10977 case E_HImode:
10978 case E_QImode:
10979 if (CONSTANT_P (operands[1])
10980 && !CONST_INT_P (operands[1]))
10981 operands[1] = force_const_mem (mode, operands[1]);
10982 break;
10983
10984 case E_TFmode:
10985 case E_TDmode:
10986 case E_IFmode:
10987 case E_KFmode:
10988 if (FLOAT128_2REG_P (mode))
10989 rs6000_eliminate_indexed_memrefs (operands);
10990 /* fall through */
10991
10992 case E_DFmode:
10993 case E_DDmode:
10994 case E_SFmode:
10995 case E_SDmode:
10996 if (CONSTANT_P (operands[1])
10997 && ! easy_fp_constant (operands[1], mode))
10998 operands[1] = force_const_mem (mode, operands[1]);
10999 break;
11000
11001 case E_V16QImode:
11002 case E_V8HImode:
11003 case E_V4SFmode:
11004 case E_V4SImode:
11005 case E_V2DFmode:
11006 case E_V2DImode:
11007 case E_V1TImode:
11008 if (CONSTANT_P (operands[1])
11009 && !easy_vector_constant (operands[1], mode))
11010 operands[1] = force_const_mem (mode, operands[1]);
11011 break;
11012
11013 case E_OOmode:
11014 case E_XOmode:
11015 if (CONST_INT_P (operands[1]) && INTVAL (operands[1]) != 0)
11016 error ("%qs is an opaque type, and you cannot set it to other values",
11017 (mode == OOmode) ? "__vector_pair" : "__vector_quad");
11018 break;
11019
11020 case E_SImode:
11021 case E_DImode:
11022 /* Use default pattern for address of ELF small data */
11023 if (TARGET_ELF
11024 && mode == Pmode
11025 && DEFAULT_ABI == ABI_V4
11026 && (SYMBOL_REF_P (operands[1])
11027 || GET_CODE (operands[1]) == CONST)
11028 && small_data_operand (operands[1], mode))
11029 {
11030 emit_insn (gen_rtx_SET (operands[0], operands[1]));
11031 return;
11032 }
11033
11034 /* Use the default pattern for loading up PC-relative addresses. */
11035 if (TARGET_PCREL && mode == Pmode
11036 && pcrel_local_or_external_address (operands[1], Pmode))
11037 {
11038 emit_insn (gen_rtx_SET (operands[0], operands[1]));
11039 return;
11040 }
11041
11042 if (DEFAULT_ABI == ABI_V4
11043 && mode == Pmode && mode == SImode
11044 && flag_pic == 1 && got_operand (operands[1], mode))
11045 {
11046 emit_insn (gen_movsi_got (operands[0], operands[1]));
11047 return;
11048 }
11049
11050 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
11051 && TARGET_NO_TOC_OR_PCREL
11052 && ! flag_pic
11053 && mode == Pmode
11054 && CONSTANT_P (operands[1])
11055 && GET_CODE (operands[1]) != HIGH
11056 && !CONST_INT_P (operands[1]))
11057 {
11058 rtx target = (!can_create_pseudo_p ()
11059 ? operands[0]
11060 : gen_reg_rtx (mode));
11061
11062 /* If this is a function address on -mcall-aixdesc,
11063 convert it to the address of the descriptor. */
11064 if (DEFAULT_ABI == ABI_AIX
11065 && SYMBOL_REF_P (operands[1])
11066 && XSTR (operands[1], 0)[0] == '.')
11067 {
11068 const char *name = XSTR (operands[1], 0);
11069 rtx new_ref;
11070 while (*name == '.')
11071 name++;
11072 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
11073 CONSTANT_POOL_ADDRESS_P (new_ref)
11074 = CONSTANT_POOL_ADDRESS_P (operands[1]);
11075 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
11076 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
11077 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
11078 operands[1] = new_ref;
11079 }
11080
11081 if (DEFAULT_ABI == ABI_DARWIN)
11082 {
11083 #if TARGET_MACHO
11084 /* This is not PIC code, but could require the subset of
11085 indirections used by mdynamic-no-pic. */
11086 if (MACHO_DYNAMIC_NO_PIC_P)
11087 {
11088 /* Take care of any required data indirection. */
11089 operands[1] = rs6000_machopic_legitimize_pic_address (
11090 operands[1], mode, operands[0]);
11091 if (operands[0] != operands[1])
11092 emit_insn (gen_rtx_SET (operands[0], operands[1]));
11093 return;
11094 }
11095 #endif
11096 emit_insn (gen_macho_high (Pmode, target, operands[1]));
11097 emit_insn (gen_macho_low (Pmode, operands[0],
11098 target, operands[1]));
11099 return;
11100 }
11101
11102 emit_insn (gen_elf_high (target, operands[1]));
11103 emit_insn (gen_elf_low (operands[0], target, operands[1]));
11104 return;
11105 }
11106
11107 /* If this is a SYMBOL_REF that refers to a constant pool entry,
11108 and we have put it in the TOC, we just need to make a TOC-relative
11109 reference to it. */
11110 if (TARGET_TOC
11111 && SYMBOL_REF_P (operands[1])
11112 && use_toc_relative_ref (operands[1], mode))
11113 operands[1] = create_TOC_reference (operands[1], operands[0]);
11114 else if (mode == Pmode
11115 && CONSTANT_P (operands[1])
11116 && GET_CODE (operands[1]) != HIGH
11117 && ((REG_P (operands[0])
11118 && FP_REGNO_P (REGNO (operands[0])))
11119 || !CONST_INT_P (operands[1])
11120 || (num_insns_constant (operands[1], mode)
11121 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
11122 && !toc_relative_expr_p (operands[1], false, NULL, NULL)
11123 && (TARGET_CMODEL == CMODEL_SMALL
11124 || can_create_pseudo_p ()
11125 || (REG_P (operands[0])
11126 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
11127 {
11128
11129 #if TARGET_MACHO
11130 /* Darwin uses a special PIC legitimizer. */
11131 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
11132 {
11133 operands[1] =
11134 rs6000_machopic_legitimize_pic_address (operands[1], mode,
11135 operands[0]);
11136 if (operands[0] != operands[1])
11137 emit_insn (gen_rtx_SET (operands[0], operands[1]));
11138 return;
11139 }
11140 #endif
11141
11142 /* If we are to limit the number of things we put in the TOC and
11143 this is a symbol plus a constant we can add in one insn,
11144 just put the symbol in the TOC and add the constant. */
11145 if (GET_CODE (operands[1]) == CONST
11146 && TARGET_NO_SUM_IN_TOC
11147 && GET_CODE (XEXP (operands[1], 0)) == PLUS
11148 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
11149 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
11150 || SYMBOL_REF_P (XEXP (XEXP (operands[1], 0), 0)))
11151 && ! side_effects_p (operands[0]))
11152 {
11153 rtx sym =
11154 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
11155 rtx other = XEXP (XEXP (operands[1], 0), 1);
11156
11157 sym = force_reg (mode, sym);
11158 emit_insn (gen_add3_insn (operands[0], sym, other));
11159 return;
11160 }
11161
11162 operands[1] = force_const_mem (mode, operands[1]);
11163
11164 if (TARGET_TOC
11165 && SYMBOL_REF_P (XEXP (operands[1], 0))
11166 && use_toc_relative_ref (XEXP (operands[1], 0), mode))
11167 {
11168 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
11169 operands[0]);
11170 operands[1] = gen_const_mem (mode, tocref);
11171 set_mem_alias_set (operands[1], get_TOC_alias_set ());
11172 }
11173 }
11174 break;
11175
11176 case E_TImode:
11177 if (!VECTOR_MEM_VSX_P (TImode))
11178 rs6000_eliminate_indexed_memrefs (operands);
11179 break;
11180
11181 case E_PTImode:
11182 rs6000_eliminate_indexed_memrefs (operands);
11183 break;
11184
11185 default:
11186 fatal_insn ("bad move", gen_rtx_SET (dest, source));
11187 }
11188
11189 /* Above, we may have called force_const_mem which may have returned
11190 an invalid address. If we can, fix this up; otherwise, reload will
11191 have to deal with it. */
11192 if (MEM_P (operands[1]))
11193 operands[1] = validize_mem (operands[1]);
11194
11195 emit_insn (gen_rtx_SET (operands[0], operands[1]));
11196 }
11197 \f
11198
11199 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
11200 static void
11201 init_float128_ibm (machine_mode mode)
11202 {
11203 if (!TARGET_XL_COMPAT)
11204 {
11205 set_optab_libfunc (add_optab, mode, "__gcc_qadd");
11206 set_optab_libfunc (sub_optab, mode, "__gcc_qsub");
11207 set_optab_libfunc (smul_optab, mode, "__gcc_qmul");
11208 set_optab_libfunc (sdiv_optab, mode, "__gcc_qdiv");
11209
11210 if (!TARGET_HARD_FLOAT)
11211 {
11212 set_optab_libfunc (neg_optab, mode, "__gcc_qneg");
11213 set_optab_libfunc (eq_optab, mode, "__gcc_qeq");
11214 set_optab_libfunc (ne_optab, mode, "__gcc_qne");
11215 set_optab_libfunc (gt_optab, mode, "__gcc_qgt");
11216 set_optab_libfunc (ge_optab, mode, "__gcc_qge");
11217 set_optab_libfunc (lt_optab, mode, "__gcc_qlt");
11218 set_optab_libfunc (le_optab, mode, "__gcc_qle");
11219 set_optab_libfunc (unord_optab, mode, "__gcc_qunord");
11220
11221 set_conv_libfunc (sext_optab, mode, SFmode, "__gcc_stoq");
11222 set_conv_libfunc (sext_optab, mode, DFmode, "__gcc_dtoq");
11223 set_conv_libfunc (trunc_optab, SFmode, mode, "__gcc_qtos");
11224 set_conv_libfunc (trunc_optab, DFmode, mode, "__gcc_qtod");
11225 set_conv_libfunc (sfix_optab, SImode, mode, "__gcc_qtoi");
11226 set_conv_libfunc (ufix_optab, SImode, mode, "__gcc_qtou");
11227 set_conv_libfunc (sfloat_optab, mode, SImode, "__gcc_itoq");
11228 set_conv_libfunc (ufloat_optab, mode, SImode, "__gcc_utoq");
11229 }
11230 }
11231 else
11232 {
11233 set_optab_libfunc (add_optab, mode, "_xlqadd");
11234 set_optab_libfunc (sub_optab, mode, "_xlqsub");
11235 set_optab_libfunc (smul_optab, mode, "_xlqmul");
11236 set_optab_libfunc (sdiv_optab, mode, "_xlqdiv");
11237 }
11238
11239 /* Add various conversions for IFmode to use the traditional TFmode
11240 names. */
11241 if (mode == IFmode)
11242 {
11243 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdtf");
11244 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddtf");
11245 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdtf");
11246 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunctfsd");
11247 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunctfdd");
11248 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtftd");
11249
11250 set_conv_libfunc (sfix_optab, DImode, mode, "__fixtfdi");
11251 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunstfdi");
11252
11253 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatditf");
11254 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatunditf");
11255
11256 if (TARGET_POWERPC64)
11257 {
11258 set_conv_libfunc (sfix_optab, TImode, mode, "__fixtfti");
11259 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunstfti");
11260 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattitf");
11261 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntitf");
11262 }
11263 }
11264 }
11265
11266 /* Set up IEEE 128-bit floating point routines. Use different names if the
11267 arguments can be passed in a vector register. The historical PowerPC
11268 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
11269 continue to use that if we aren't using vector registers to pass IEEE
11270 128-bit floating point. */
11271
11272 static void
11273 init_float128_ieee (machine_mode mode)
11274 {
11275 if (FLOAT128_VECTOR_P (mode))
11276 {
11277 set_optab_libfunc (add_optab, mode, "__addkf3");
11278 set_optab_libfunc (sub_optab, mode, "__subkf3");
11279 set_optab_libfunc (neg_optab, mode, "__negkf2");
11280 set_optab_libfunc (smul_optab, mode, "__mulkf3");
11281 set_optab_libfunc (sdiv_optab, mode, "__divkf3");
11282 set_optab_libfunc (sqrt_optab, mode, "__sqrtkf2");
11283 set_optab_libfunc (abs_optab, mode, "__abskf2");
11284 set_optab_libfunc (powi_optab, mode, "__powikf2");
11285
11286 set_optab_libfunc (eq_optab, mode, "__eqkf2");
11287 set_optab_libfunc (ne_optab, mode, "__nekf2");
11288 set_optab_libfunc (gt_optab, mode, "__gtkf2");
11289 set_optab_libfunc (ge_optab, mode, "__gekf2");
11290 set_optab_libfunc (lt_optab, mode, "__ltkf2");
11291 set_optab_libfunc (le_optab, mode, "__lekf2");
11292 set_optab_libfunc (unord_optab, mode, "__unordkf2");
11293
11294 set_conv_libfunc (sext_optab, mode, SFmode, "__extendsfkf2");
11295 set_conv_libfunc (sext_optab, mode, DFmode, "__extenddfkf2");
11296 set_conv_libfunc (trunc_optab, SFmode, mode, "__trunckfsf2");
11297 set_conv_libfunc (trunc_optab, DFmode, mode, "__trunckfdf2");
11298
11299 set_conv_libfunc (sext_optab, mode, IFmode, "__trunctfkf2");
11300 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
11301 set_conv_libfunc (sext_optab, mode, TFmode, "__trunctfkf2");
11302
11303 set_conv_libfunc (trunc_optab, IFmode, mode, "__extendkftf2");
11304 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
11305 set_conv_libfunc (trunc_optab, TFmode, mode, "__extendkftf2");
11306
11307 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdkf");
11308 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddkf");
11309 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdkf");
11310 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunckfsd");
11311 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunckfdd");
11312 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendkftd");
11313
11314 set_conv_libfunc (sfix_optab, SImode, mode, "__fixkfsi");
11315 set_conv_libfunc (ufix_optab, SImode, mode, "__fixunskfsi");
11316 set_conv_libfunc (sfix_optab, DImode, mode, "__fixkfdi");
11317 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunskfdi");
11318
11319 set_conv_libfunc (sfloat_optab, mode, SImode, "__floatsikf");
11320 set_conv_libfunc (ufloat_optab, mode, SImode, "__floatunsikf");
11321 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatdikf");
11322 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatundikf");
11323
11324 if (TARGET_POWERPC64)
11325 {
11326 set_conv_libfunc (sfix_optab, TImode, mode, "__fixkfti_sw");
11327 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunskfti_sw");
11328 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattikf_sw");
11329 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntikf_sw");
11330 }
11331 }
11332
11333 else
11334 {
11335 set_optab_libfunc (add_optab, mode, "_q_add");
11336 set_optab_libfunc (sub_optab, mode, "_q_sub");
11337 set_optab_libfunc (neg_optab, mode, "_q_neg");
11338 set_optab_libfunc (smul_optab, mode, "_q_mul");
11339 set_optab_libfunc (sdiv_optab, mode, "_q_div");
11340 if (TARGET_PPC_GPOPT)
11341 set_optab_libfunc (sqrt_optab, mode, "_q_sqrt");
11342
11343 set_optab_libfunc (eq_optab, mode, "_q_feq");
11344 set_optab_libfunc (ne_optab, mode, "_q_fne");
11345 set_optab_libfunc (gt_optab, mode, "_q_fgt");
11346 set_optab_libfunc (ge_optab, mode, "_q_fge");
11347 set_optab_libfunc (lt_optab, mode, "_q_flt");
11348 set_optab_libfunc (le_optab, mode, "_q_fle");
11349
11350 set_conv_libfunc (sext_optab, mode, SFmode, "_q_stoq");
11351 set_conv_libfunc (sext_optab, mode, DFmode, "_q_dtoq");
11352 set_conv_libfunc (trunc_optab, SFmode, mode, "_q_qtos");
11353 set_conv_libfunc (trunc_optab, DFmode, mode, "_q_qtod");
11354 set_conv_libfunc (sfix_optab, SImode, mode, "_q_qtoi");
11355 set_conv_libfunc (ufix_optab, SImode, mode, "_q_qtou");
11356 set_conv_libfunc (sfloat_optab, mode, SImode, "_q_itoq");
11357 set_conv_libfunc (ufloat_optab, mode, SImode, "_q_utoq");
11358 }
11359 }
11360
11361 static void
11362 rs6000_init_libfuncs (void)
11363 {
11364 /* __float128 support. */
11365 if (TARGET_FLOAT128_TYPE)
11366 {
11367 init_float128_ibm (IFmode);
11368 init_float128_ieee (KFmode);
11369 }
11370
11371 /* AIX/Darwin/64-bit Linux quad floating point routines. */
11372 if (TARGET_LONG_DOUBLE_128)
11373 {
11374 if (!TARGET_IEEEQUAD)
11375 init_float128_ibm (TFmode);
11376
11377 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
11378 else
11379 init_float128_ieee (TFmode);
11380 }
11381 }
11382
11383 /* Emit a potentially record-form instruction, setting DST from SRC.
11384 If DOT is 0, that is all; otherwise, set CCREG to the result of the
11385 signed comparison of DST with zero. If DOT is 1, the generated RTL
11386 doesn't care about the DST result; if DOT is 2, it does. If CCREG
11387 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
11388 a separate COMPARE. */
11389
11390 void
11391 rs6000_emit_dot_insn (rtx dst, rtx src, int dot, rtx ccreg)
11392 {
11393 if (dot == 0)
11394 {
11395 emit_move_insn (dst, src);
11396 return;
11397 }
11398
11399 if (cc_reg_not_cr0_operand (ccreg, CCmode))
11400 {
11401 emit_move_insn (dst, src);
11402 emit_move_insn (ccreg, gen_rtx_COMPARE (CCmode, dst, const0_rtx));
11403 return;
11404 }
11405
11406 rtx ccset = gen_rtx_SET (ccreg, gen_rtx_COMPARE (CCmode, src, const0_rtx));
11407 if (dot == 1)
11408 {
11409 rtx clobber = gen_rtx_CLOBBER (VOIDmode, dst);
11410 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, clobber)));
11411 }
11412 else
11413 {
11414 rtx set = gen_rtx_SET (dst, src);
11415 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, set)));
11416 }
11417 }
11418
11419 \f
11420 /* A validation routine: say whether CODE, a condition code, and MODE
11421 match. The other alternatives either don't make sense or should
11422 never be generated. */
11423
11424 void
11425 validate_condition_mode (enum rtx_code code, machine_mode mode)
11426 {
11427 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
11428 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
11429 && GET_MODE_CLASS (mode) == MODE_CC);
11430
11431 /* These don't make sense. */
11432 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
11433 || mode != CCUNSmode);
11434
11435 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
11436 || mode == CCUNSmode);
11437
11438 gcc_assert (mode == CCFPmode
11439 || (code != ORDERED && code != UNORDERED
11440 && code != UNEQ && code != LTGT
11441 && code != UNGT && code != UNLT
11442 && code != UNGE && code != UNLE));
11443
11444 /* These are invalid; the information is not there. */
11445 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
11446 }
11447
11448 \f
11449 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
11450 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
11451 not zero, store there the bit offset (counted from the right) where
11452 the single stretch of 1 bits begins; and similarly for B, the bit
11453 offset where it ends. */
11454
11455 bool
11456 rs6000_is_valid_mask (rtx mask, int *b, int *e, machine_mode mode)
11457 {
11458 unsigned HOST_WIDE_INT val = INTVAL (mask);
11459 unsigned HOST_WIDE_INT bit;
11460 int nb, ne;
11461 int n = GET_MODE_PRECISION (mode);
11462
11463 if (mode != DImode && mode != SImode)
11464 return false;
11465
11466 if (INTVAL (mask) >= 0)
11467 {
11468 bit = val & -val;
11469 ne = exact_log2 (bit);
11470 nb = exact_log2 (val + bit);
11471 }
11472 else if (val + 1 == 0)
11473 {
11474 nb = n;
11475 ne = 0;
11476 }
11477 else if (val & 1)
11478 {
11479 val = ~val;
11480 bit = val & -val;
11481 nb = exact_log2 (bit);
11482 ne = exact_log2 (val + bit);
11483 }
11484 else
11485 {
11486 bit = val & -val;
11487 ne = exact_log2 (bit);
11488 if (val + bit == 0)
11489 nb = n;
11490 else
11491 nb = 0;
11492 }
11493
11494 nb--;
11495
11496 if (nb < 0 || ne < 0 || nb >= n || ne >= n)
11497 return false;
11498
11499 if (b)
11500 *b = nb;
11501 if (e)
11502 *e = ne;
11503
11504 return true;
11505 }
11506
11507 bool
11508 rs6000_is_valid_rotate_dot_mask (rtx mask, machine_mode mode)
11509 {
11510 int nb, ne;
11511 if (rs6000_is_valid_mask (mask, &nb, &ne, mode) && nb >= ne && ne > 0)
11512 {
11513 if (TARGET_64BIT)
11514 return true;
11515 /* *rotldi3_mask_dot requires for -m32 -mpowerpc64 that the mask is
11516 <= 0x7fffffff. */
11517 return (UINTVAL (mask) << (63 - nb)) <= 0x7fffffff;
11518 }
11519
11520 return false;
11521 }
11522
11523 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
11524 or rldicr instruction, to implement an AND with it in mode MODE. */
11525
11526 bool
11527 rs6000_is_valid_and_mask (rtx mask, machine_mode mode)
11528 {
11529 int nb, ne;
11530
11531 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
11532 return false;
11533
11534 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
11535 does not wrap. */
11536 if (mode == DImode)
11537 return (ne == 0 || nb == 63 || (nb < 32 && ne <= nb));
11538
11539 /* For SImode, rlwinm can do everything. */
11540 if (mode == SImode)
11541 return (nb < 32 && ne < 32);
11542
11543 return false;
11544 }
11545
11546 /* Return the instruction template for an AND with mask in mode MODE, with
11547 operands OPERANDS. If DOT is true, make it a record-form instruction. */
11548
11549 const char *
11550 rs6000_insn_for_and_mask (machine_mode mode, rtx *operands, bool dot)
11551 {
11552 int nb, ne;
11553
11554 if (!rs6000_is_valid_mask (operands[2], &nb, &ne, mode))
11555 gcc_unreachable ();
11556
11557 if (mode == DImode && ne == 0)
11558 {
11559 operands[3] = GEN_INT (63 - nb);
11560 if (dot)
11561 return "rldicl. %0,%1,0,%3";
11562 return "rldicl %0,%1,0,%3";
11563 }
11564
11565 if (mode == DImode && nb == 63)
11566 {
11567 operands[3] = GEN_INT (63 - ne);
11568 if (dot)
11569 return "rldicr. %0,%1,0,%3";
11570 return "rldicr %0,%1,0,%3";
11571 }
11572
11573 if (nb < 32 && ne < 32)
11574 {
11575 operands[3] = GEN_INT (31 - nb);
11576 operands[4] = GEN_INT (31 - ne);
11577 if (dot)
11578 return "rlwinm. %0,%1,0,%3,%4";
11579 return "rlwinm %0,%1,0,%3,%4";
11580 }
11581
11582 gcc_unreachable ();
11583 }
11584
11585 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
11586 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
11587 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
11588
11589 bool
11590 rs6000_is_valid_shift_mask (rtx mask, rtx shift, machine_mode mode)
11591 {
11592 int nb, ne;
11593
11594 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
11595 return false;
11596
11597 int n = GET_MODE_PRECISION (mode);
11598 int sh = -1;
11599
11600 if (CONST_INT_P (XEXP (shift, 1)))
11601 {
11602 sh = INTVAL (XEXP (shift, 1));
11603 if (sh < 0 || sh >= n)
11604 return false;
11605 }
11606
11607 rtx_code code = GET_CODE (shift);
11608
11609 /* Convert any shift by 0 to a rotate, to simplify below code. */
11610 if (sh == 0)
11611 code = ROTATE;
11612
11613 /* Convert rotate to simple shift if we can, to make analysis simpler. */
11614 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
11615 code = ASHIFT;
11616 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
11617 {
11618 code = LSHIFTRT;
11619 sh = n - sh;
11620 }
11621
11622 /* DImode rotates need rld*. */
11623 if (mode == DImode && code == ROTATE)
11624 return (nb == 63 || ne == 0 || ne == sh);
11625
11626 /* SImode rotates need rlw*. */
11627 if (mode == SImode && code == ROTATE)
11628 return (nb < 32 && ne < 32 && sh < 32);
11629
11630 /* Wrap-around masks are only okay for rotates. */
11631 if (ne > nb)
11632 return false;
11633
11634 /* Variable shifts are only okay for rotates. */
11635 if (sh < 0)
11636 return false;
11637
11638 /* Don't allow ASHIFT if the mask is wrong for that. */
11639 if (code == ASHIFT && ne < sh)
11640 return false;
11641
11642 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
11643 if the mask is wrong for that. */
11644 if (nb < 32 && ne < 32 && sh < 32
11645 && !(code == LSHIFTRT && nb >= 32 - sh))
11646 return true;
11647
11648 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
11649 if the mask is wrong for that. */
11650 if (code == LSHIFTRT)
11651 sh = 64 - sh;
11652 if (nb == 63 || ne == 0 || ne == sh)
11653 return !(code == LSHIFTRT && nb >= sh);
11654
11655 return false;
11656 }
11657
11658 /* Return the instruction template for a shift with mask in mode MODE, with
11659 operands OPERANDS. If DOT is true, make it a record-form instruction. */
11660
11661 const char *
11662 rs6000_insn_for_shift_mask (machine_mode mode, rtx *operands, bool dot)
11663 {
11664 int nb, ne;
11665
11666 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
11667 gcc_unreachable ();
11668
11669 if (mode == DImode && ne == 0)
11670 {
11671 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
11672 operands[2] = GEN_INT (64 - INTVAL (operands[2]));
11673 operands[3] = GEN_INT (63 - nb);
11674 if (dot)
11675 return "rld%I2cl. %0,%1,%2,%3";
11676 return "rld%I2cl %0,%1,%2,%3";
11677 }
11678
11679 if (mode == DImode && nb == 63)
11680 {
11681 operands[3] = GEN_INT (63 - ne);
11682 if (dot)
11683 return "rld%I2cr. %0,%1,%2,%3";
11684 return "rld%I2cr %0,%1,%2,%3";
11685 }
11686
11687 if (mode == DImode
11688 && GET_CODE (operands[4]) != LSHIFTRT
11689 && CONST_INT_P (operands[2])
11690 && ne == INTVAL (operands[2]))
11691 {
11692 operands[3] = GEN_INT (63 - nb);
11693 if (dot)
11694 return "rld%I2c. %0,%1,%2,%3";
11695 return "rld%I2c %0,%1,%2,%3";
11696 }
11697
11698 if (nb < 32 && ne < 32)
11699 {
11700 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
11701 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
11702 operands[3] = GEN_INT (31 - nb);
11703 operands[4] = GEN_INT (31 - ne);
11704 /* This insn can also be a 64-bit rotate with mask that really makes
11705 it just a shift right (with mask); the %h below are to adjust for
11706 that situation (shift count is >= 32 in that case). */
11707 if (dot)
11708 return "rlw%I2nm. %0,%1,%h2,%3,%4";
11709 return "rlw%I2nm %0,%1,%h2,%3,%4";
11710 }
11711
11712 gcc_unreachable ();
11713 }
11714
11715 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
11716 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
11717 ASHIFT, or LSHIFTRT) in mode MODE. */
11718
11719 bool
11720 rs6000_is_valid_insert_mask (rtx mask, rtx shift, machine_mode mode)
11721 {
11722 int nb, ne;
11723
11724 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
11725 return false;
11726
11727 int n = GET_MODE_PRECISION (mode);
11728
11729 int sh = INTVAL (XEXP (shift, 1));
11730 if (sh < 0 || sh >= n)
11731 return false;
11732
11733 rtx_code code = GET_CODE (shift);
11734
11735 /* Convert any shift by 0 to a rotate, to simplify below code. */
11736 if (sh == 0)
11737 code = ROTATE;
11738
11739 /* Convert rotate to simple shift if we can, to make analysis simpler. */
11740 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
11741 code = ASHIFT;
11742 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
11743 {
11744 code = LSHIFTRT;
11745 sh = n - sh;
11746 }
11747
11748 /* DImode rotates need rldimi. */
11749 if (mode == DImode && code == ROTATE)
11750 return (ne == sh);
11751
11752 /* SImode rotates need rlwimi. */
11753 if (mode == SImode && code == ROTATE)
11754 return (nb < 32 && ne < 32 && sh < 32);
11755
11756 /* Wrap-around masks are only okay for rotates. */
11757 if (ne > nb)
11758 return false;
11759
11760 /* Don't allow ASHIFT if the mask is wrong for that. */
11761 if (code == ASHIFT && ne < sh)
11762 return false;
11763
11764 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
11765 if the mask is wrong for that. */
11766 if (nb < 32 && ne < 32 && sh < 32
11767 && !(code == LSHIFTRT && nb >= 32 - sh))
11768 return true;
11769
11770 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
11771 if the mask is wrong for that. */
11772 if (code == LSHIFTRT)
11773 sh = 64 - sh;
11774 if (ne == sh)
11775 return !(code == LSHIFTRT && nb >= sh);
11776
11777 return false;
11778 }
11779
11780 /* Return the instruction template for an insert with mask in mode MODE, with
11781 operands OPERANDS. If DOT is true, make it a record-form instruction. */
11782
11783 const char *
11784 rs6000_insn_for_insert_mask (machine_mode mode, rtx *operands, bool dot)
11785 {
11786 int nb, ne;
11787
11788 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
11789 gcc_unreachable ();
11790
11791 /* Prefer rldimi because rlwimi is cracked. */
11792 if (TARGET_POWERPC64
11793 && (!dot || mode == DImode)
11794 && GET_CODE (operands[4]) != LSHIFTRT
11795 && ne == INTVAL (operands[2]))
11796 {
11797 operands[3] = GEN_INT (63 - nb);
11798 if (dot)
11799 return "rldimi. %0,%1,%2,%3";
11800 return "rldimi %0,%1,%2,%3";
11801 }
11802
11803 if (nb < 32 && ne < 32)
11804 {
11805 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
11806 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
11807 operands[3] = GEN_INT (31 - nb);
11808 operands[4] = GEN_INT (31 - ne);
11809 if (dot)
11810 return "rlwimi. %0,%1,%2,%3,%4";
11811 return "rlwimi %0,%1,%2,%3,%4";
11812 }
11813
11814 gcc_unreachable ();
11815 }
11816
11817 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
11818 using two machine instructions. */
11819
11820 bool
11821 rs6000_is_valid_2insn_and (rtx c, machine_mode mode)
11822 {
11823 /* There are two kinds of AND we can handle with two insns:
11824 1) those we can do with two rl* insn;
11825 2) ori[s];xori[s].
11826
11827 We do not handle that last case yet. */
11828
11829 /* If there is just one stretch of ones, we can do it. */
11830 if (rs6000_is_valid_mask (c, NULL, NULL, mode))
11831 return true;
11832
11833 /* Otherwise, fill in the lowest "hole"; if we can do the result with
11834 one insn, we can do the whole thing with two. */
11835 unsigned HOST_WIDE_INT val = INTVAL (c);
11836 unsigned HOST_WIDE_INT bit1 = val & -val;
11837 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
11838 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
11839 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
11840 return rs6000_is_valid_and_mask (GEN_INT (val + bit3 - bit2), mode);
11841 }
11842
11843 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
11844 If EXPAND is true, split rotate-and-mask instructions we generate to
11845 their constituent parts as well (this is used during expand); if DOT
11846 is 1, make the last insn a record-form instruction clobbering the
11847 destination GPR and setting the CC reg (from operands[3]); if 2, set
11848 that GPR as well as the CC reg. */
11849
11850 void
11851 rs6000_emit_2insn_and (machine_mode mode, rtx *operands, bool expand, int dot)
11852 {
11853 gcc_assert (!(expand && dot));
11854
11855 unsigned HOST_WIDE_INT val = INTVAL (operands[2]);
11856
11857 /* If it is one stretch of ones, it is DImode; shift left, mask, then
11858 shift right. This generates better code than doing the masks without
11859 shifts, or shifting first right and then left. */
11860 int nb, ne;
11861 if (rs6000_is_valid_mask (operands[2], &nb, &ne, mode) && nb >= ne)
11862 {
11863 gcc_assert (mode == DImode);
11864
11865 int shift = 63 - nb;
11866 if (expand)
11867 {
11868 rtx tmp1 = gen_reg_rtx (DImode);
11869 rtx tmp2 = gen_reg_rtx (DImode);
11870 emit_insn (gen_ashldi3 (tmp1, operands[1], GEN_INT (shift)));
11871 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (val << shift)));
11872 emit_insn (gen_lshrdi3 (operands[0], tmp2, GEN_INT (shift)));
11873 }
11874 else
11875 {
11876 rtx tmp = gen_rtx_ASHIFT (mode, operands[1], GEN_INT (shift));
11877 tmp = gen_rtx_AND (mode, tmp, GEN_INT (val << shift));
11878 emit_move_insn (operands[0], tmp);
11879 tmp = gen_rtx_LSHIFTRT (mode, operands[0], GEN_INT (shift));
11880 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
11881 }
11882 return;
11883 }
11884
11885 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
11886 that does the rest. */
11887 unsigned HOST_WIDE_INT bit1 = val & -val;
11888 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
11889 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
11890 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
11891
11892 unsigned HOST_WIDE_INT mask1 = -bit3 + bit2 - 1;
11893 unsigned HOST_WIDE_INT mask2 = val + bit3 - bit2;
11894
11895 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2), mode));
11896
11897 /* Two "no-rotate"-and-mask instructions, for SImode. */
11898 if (rs6000_is_valid_and_mask (GEN_INT (mask1), mode))
11899 {
11900 gcc_assert (mode == SImode);
11901
11902 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
11903 rtx tmp = gen_rtx_AND (mode, operands[1], GEN_INT (mask1));
11904 emit_move_insn (reg, tmp);
11905 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
11906 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
11907 return;
11908 }
11909
11910 gcc_assert (mode == DImode);
11911
11912 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
11913 insns; we have to do the first in SImode, because it wraps. */
11914 if (mask2 <= 0xffffffff
11915 && rs6000_is_valid_and_mask (GEN_INT (mask1), SImode))
11916 {
11917 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
11918 rtx tmp = gen_rtx_AND (SImode, gen_lowpart (SImode, operands[1]),
11919 GEN_INT (mask1));
11920 rtx reg_low = gen_lowpart (SImode, reg);
11921 emit_move_insn (reg_low, tmp);
11922 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
11923 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
11924 return;
11925 }
11926
11927 /* Two rld* insns: rotate, clear the hole in the middle (which now is
11928 at the top end), rotate back and clear the other hole. */
11929 int right = exact_log2 (bit3);
11930 int left = 64 - right;
11931
11932 /* Rotate the mask too. */
11933 mask1 = (mask1 >> right) | ((bit2 - 1) << left);
11934
11935 if (expand)
11936 {
11937 rtx tmp1 = gen_reg_rtx (DImode);
11938 rtx tmp2 = gen_reg_rtx (DImode);
11939 rtx tmp3 = gen_reg_rtx (DImode);
11940 emit_insn (gen_rotldi3 (tmp1, operands[1], GEN_INT (left)));
11941 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (mask1)));
11942 emit_insn (gen_rotldi3 (tmp3, tmp2, GEN_INT (right)));
11943 emit_insn (gen_anddi3 (operands[0], tmp3, GEN_INT (mask2)));
11944 }
11945 else
11946 {
11947 rtx tmp = gen_rtx_ROTATE (mode, operands[1], GEN_INT (left));
11948 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask1));
11949 emit_move_insn (operands[0], tmp);
11950 tmp = gen_rtx_ROTATE (mode, operands[0], GEN_INT (right));
11951 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask2));
11952 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
11953 }
11954 }
11955 \f
11956 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
11957 for lfq and stfq insns iff the registers are hard registers. */
11958
11959 int
11960 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
11961 {
11962 /* We might have been passed a SUBREG. */
11963 if (!REG_P (reg1) || !REG_P (reg2))
11964 return 0;
11965
11966 /* We might have been passed non floating point registers. */
11967 if (!FP_REGNO_P (REGNO (reg1))
11968 || !FP_REGNO_P (REGNO (reg2)))
11969 return 0;
11970
11971 return (REGNO (reg1) == REGNO (reg2) - 1);
11972 }
11973
11974 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
11975 addr1 and addr2 must be in consecutive memory locations
11976 (addr2 == addr1 + 8). */
11977
11978 int
11979 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
11980 {
11981 rtx addr1, addr2;
11982 unsigned int reg1, reg2;
11983 int offset1, offset2;
11984
11985 /* The mems cannot be volatile. */
11986 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
11987 return 0;
11988
11989 addr1 = XEXP (mem1, 0);
11990 addr2 = XEXP (mem2, 0);
11991
11992 /* Extract an offset (if used) from the first addr. */
11993 if (GET_CODE (addr1) == PLUS)
11994 {
11995 /* If not a REG, return zero. */
11996 if (!REG_P (XEXP (addr1, 0)))
11997 return 0;
11998 else
11999 {
12000 reg1 = REGNO (XEXP (addr1, 0));
12001 /* The offset must be constant! */
12002 if (!CONST_INT_P (XEXP (addr1, 1)))
12003 return 0;
12004 offset1 = INTVAL (XEXP (addr1, 1));
12005 }
12006 }
12007 else if (!REG_P (addr1))
12008 return 0;
12009 else
12010 {
12011 reg1 = REGNO (addr1);
12012 /* This was a simple (mem (reg)) expression. Offset is 0. */
12013 offset1 = 0;
12014 }
12015
12016 /* And now for the second addr. */
12017 if (GET_CODE (addr2) == PLUS)
12018 {
12019 /* If not a REG, return zero. */
12020 if (!REG_P (XEXP (addr2, 0)))
12021 return 0;
12022 else
12023 {
12024 reg2 = REGNO (XEXP (addr2, 0));
12025 /* The offset must be constant. */
12026 if (!CONST_INT_P (XEXP (addr2, 1)))
12027 return 0;
12028 offset2 = INTVAL (XEXP (addr2, 1));
12029 }
12030 }
12031 else if (!REG_P (addr2))
12032 return 0;
12033 else
12034 {
12035 reg2 = REGNO (addr2);
12036 /* This was a simple (mem (reg)) expression. Offset is 0. */
12037 offset2 = 0;
12038 }
12039
12040 /* Both of these must have the same base register. */
12041 if (reg1 != reg2)
12042 return 0;
12043
12044 /* The offset for the second addr must be 8 more than the first addr. */
12045 if (offset2 != offset1 + 8)
12046 return 0;
12047
12048 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
12049 instructions. */
12050 return 1;
12051 }
12052 \f
12053 /* Implement TARGET_SECONDARY_RELOAD_NEEDED_MODE. For SDmode values we
12054 need to use DDmode, in all other cases we can use the same mode. */
12055 static machine_mode
12056 rs6000_secondary_memory_needed_mode (machine_mode mode)
12057 {
12058 if (lra_in_progress && mode == SDmode)
12059 return DDmode;
12060 return mode;
12061 }
12062
12063 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
12064 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
12065 only work on the traditional altivec registers, note if an altivec register
12066 was chosen. */
12067
12068 static enum rs6000_reg_type
12069 register_to_reg_type (rtx reg, bool *is_altivec)
12070 {
12071 HOST_WIDE_INT regno;
12072 enum reg_class rclass;
12073
12074 if (SUBREG_P (reg))
12075 reg = SUBREG_REG (reg);
12076
12077 if (!REG_P (reg))
12078 return NO_REG_TYPE;
12079
12080 regno = REGNO (reg);
12081 if (!HARD_REGISTER_NUM_P (regno))
12082 {
12083 if (!lra_in_progress && !reload_completed)
12084 return PSEUDO_REG_TYPE;
12085
12086 regno = true_regnum (reg);
12087 if (regno < 0 || !HARD_REGISTER_NUM_P (regno))
12088 return PSEUDO_REG_TYPE;
12089 }
12090
12091 gcc_assert (regno >= 0);
12092
12093 if (is_altivec && ALTIVEC_REGNO_P (regno))
12094 *is_altivec = true;
12095
12096 rclass = rs6000_regno_regclass[regno];
12097 return reg_class_to_reg_type[(int)rclass];
12098 }
12099
12100 /* Helper function to return the cost of adding a TOC entry address. */
12101
12102 static inline int
12103 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
12104 {
12105 int ret;
12106
12107 if (TARGET_CMODEL != CMODEL_SMALL)
12108 ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
12109
12110 else
12111 ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
12112
12113 return ret;
12114 }
12115
12116 /* Helper function for rs6000_secondary_reload to determine whether the memory
12117 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
12118 needs reloading. Return negative if the memory is not handled by the memory
12119 helper functions and to try a different reload method, 0 if no additional
12120 instructions are need, and positive to give the extra cost for the
12121 memory. */
12122
12123 static int
12124 rs6000_secondary_reload_memory (rtx addr,
12125 enum reg_class rclass,
12126 machine_mode mode)
12127 {
12128 int extra_cost = 0;
12129 rtx reg, and_arg, plus_arg0, plus_arg1;
12130 addr_mask_type addr_mask;
12131 const char *type = NULL;
12132 const char *fail_msg = NULL;
12133
12134 if (GPR_REG_CLASS_P (rclass))
12135 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
12136
12137 else if (rclass == FLOAT_REGS)
12138 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
12139
12140 else if (rclass == ALTIVEC_REGS)
12141 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
12142
12143 /* For the combined VSX_REGS, turn off Altivec AND -16. */
12144 else if (rclass == VSX_REGS)
12145 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
12146 & ~RELOAD_REG_AND_M16);
12147
12148 /* If the register allocator hasn't made up its mind yet on the register
12149 class to use, settle on defaults to use. */
12150 else if (rclass == NO_REGS)
12151 {
12152 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_ANY]
12153 & ~RELOAD_REG_AND_M16);
12154
12155 if ((addr_mask & RELOAD_REG_MULTIPLE) != 0)
12156 addr_mask &= ~(RELOAD_REG_INDEXED
12157 | RELOAD_REG_PRE_INCDEC
12158 | RELOAD_REG_PRE_MODIFY);
12159 }
12160
12161 else
12162 addr_mask = 0;
12163
12164 /* If the register isn't valid in this register class, just return now. */
12165 if ((addr_mask & RELOAD_REG_VALID) == 0)
12166 {
12167 if (TARGET_DEBUG_ADDR)
12168 {
12169 fprintf (stderr,
12170 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
12171 "not valid in class\n",
12172 GET_MODE_NAME (mode), reg_class_names[rclass]);
12173 debug_rtx (addr);
12174 }
12175
12176 return -1;
12177 }
12178
12179 switch (GET_CODE (addr))
12180 {
12181 /* Does the register class supports auto update forms for this mode? We
12182 don't need a scratch register, since the powerpc only supports
12183 PRE_INC, PRE_DEC, and PRE_MODIFY. */
12184 case PRE_INC:
12185 case PRE_DEC:
12186 reg = XEXP (addr, 0);
12187 if (!base_reg_operand (addr, GET_MODE (reg)))
12188 {
12189 fail_msg = "no base register #1";
12190 extra_cost = -1;
12191 }
12192
12193 else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
12194 {
12195 extra_cost = 1;
12196 type = "update";
12197 }
12198 break;
12199
12200 case PRE_MODIFY:
12201 reg = XEXP (addr, 0);
12202 plus_arg1 = XEXP (addr, 1);
12203 if (!base_reg_operand (reg, GET_MODE (reg))
12204 || GET_CODE (plus_arg1) != PLUS
12205 || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
12206 {
12207 fail_msg = "bad PRE_MODIFY";
12208 extra_cost = -1;
12209 }
12210
12211 else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
12212 {
12213 extra_cost = 1;
12214 type = "update";
12215 }
12216 break;
12217
12218 /* Do we need to simulate AND -16 to clear the bottom address bits used
12219 in VMX load/stores? Only allow the AND for vector sizes. */
12220 case AND:
12221 and_arg = XEXP (addr, 0);
12222 if (GET_MODE_SIZE (mode) != 16
12223 || !CONST_INT_P (XEXP (addr, 1))
12224 || INTVAL (XEXP (addr, 1)) != -16)
12225 {
12226 fail_msg = "bad Altivec AND #1";
12227 extra_cost = -1;
12228 }
12229
12230 if (rclass != ALTIVEC_REGS)
12231 {
12232 if (legitimate_indirect_address_p (and_arg, false))
12233 extra_cost = 1;
12234
12235 else if (legitimate_indexed_address_p (and_arg, false))
12236 extra_cost = 2;
12237
12238 else
12239 {
12240 fail_msg = "bad Altivec AND #2";
12241 extra_cost = -1;
12242 }
12243
12244 type = "and";
12245 }
12246 break;
12247
12248 /* If this is an indirect address, make sure it is a base register. */
12249 case REG:
12250 case SUBREG:
12251 if (!legitimate_indirect_address_p (addr, false))
12252 {
12253 extra_cost = 1;
12254 type = "move";
12255 }
12256 break;
12257
12258 /* If this is an indexed address, make sure the register class can handle
12259 indexed addresses for this mode. */
12260 case PLUS:
12261 plus_arg0 = XEXP (addr, 0);
12262 plus_arg1 = XEXP (addr, 1);
12263
12264 /* (plus (plus (reg) (constant)) (constant)) is generated during
12265 push_reload processing, so handle it now. */
12266 if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
12267 {
12268 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12269 {
12270 extra_cost = 1;
12271 type = "offset";
12272 }
12273 }
12274
12275 /* (plus (plus (reg) (constant)) (reg)) is also generated during
12276 push_reload processing, so handle it now. */
12277 else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
12278 {
12279 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
12280 {
12281 extra_cost = 1;
12282 type = "indexed #2";
12283 }
12284 }
12285
12286 else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
12287 {
12288 fail_msg = "no base register #2";
12289 extra_cost = -1;
12290 }
12291
12292 else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
12293 {
12294 if ((addr_mask & RELOAD_REG_INDEXED) == 0
12295 || !legitimate_indexed_address_p (addr, false))
12296 {
12297 extra_cost = 1;
12298 type = "indexed";
12299 }
12300 }
12301
12302 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0
12303 && CONST_INT_P (plus_arg1))
12304 {
12305 if (!quad_address_offset_p (INTVAL (plus_arg1)))
12306 {
12307 extra_cost = 1;
12308 type = "vector d-form offset";
12309 }
12310 }
12311
12312 /* Make sure the register class can handle offset addresses. */
12313 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
12314 {
12315 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12316 {
12317 extra_cost = 1;
12318 type = "offset #2";
12319 }
12320 }
12321
12322 else
12323 {
12324 fail_msg = "bad PLUS";
12325 extra_cost = -1;
12326 }
12327
12328 break;
12329
12330 case LO_SUM:
12331 /* Quad offsets are restricted and can't handle normal addresses. */
12332 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
12333 {
12334 extra_cost = -1;
12335 type = "vector d-form lo_sum";
12336 }
12337
12338 else if (!legitimate_lo_sum_address_p (mode, addr, false))
12339 {
12340 fail_msg = "bad LO_SUM";
12341 extra_cost = -1;
12342 }
12343
12344 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12345 {
12346 extra_cost = 1;
12347 type = "lo_sum";
12348 }
12349 break;
12350
12351 /* Static addresses need to create a TOC entry. */
12352 case CONST:
12353 case SYMBOL_REF:
12354 case LABEL_REF:
12355 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
12356 {
12357 extra_cost = -1;
12358 type = "vector d-form lo_sum #2";
12359 }
12360
12361 else
12362 {
12363 type = "address";
12364 extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
12365 }
12366 break;
12367
12368 /* TOC references look like offsetable memory. */
12369 case UNSPEC:
12370 if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
12371 {
12372 fail_msg = "bad UNSPEC";
12373 extra_cost = -1;
12374 }
12375
12376 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
12377 {
12378 extra_cost = -1;
12379 type = "vector d-form lo_sum #3";
12380 }
12381
12382 else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12383 {
12384 extra_cost = 1;
12385 type = "toc reference";
12386 }
12387 break;
12388
12389 default:
12390 {
12391 fail_msg = "bad address";
12392 extra_cost = -1;
12393 }
12394 }
12395
12396 if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
12397 {
12398 if (extra_cost < 0)
12399 fprintf (stderr,
12400 "rs6000_secondary_reload_memory error: mode = %s, "
12401 "class = %s, addr_mask = '%s', %s\n",
12402 GET_MODE_NAME (mode),
12403 reg_class_names[rclass],
12404 rs6000_debug_addr_mask (addr_mask, false),
12405 (fail_msg != NULL) ? fail_msg : "<bad address>");
12406
12407 else
12408 fprintf (stderr,
12409 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
12410 "addr_mask = '%s', extra cost = %d, %s\n",
12411 GET_MODE_NAME (mode),
12412 reg_class_names[rclass],
12413 rs6000_debug_addr_mask (addr_mask, false),
12414 extra_cost,
12415 (type) ? type : "<none>");
12416
12417 debug_rtx (addr);
12418 }
12419
12420 return extra_cost;
12421 }
12422
12423 /* Helper function for rs6000_secondary_reload to return true if a move to a
12424 different register classe is really a simple move. */
12425
12426 static bool
12427 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
12428 enum rs6000_reg_type from_type,
12429 machine_mode mode)
12430 {
12431 int size = GET_MODE_SIZE (mode);
12432
12433 /* Add support for various direct moves available. In this function, we only
12434 look at cases where we don't need any extra registers, and one or more
12435 simple move insns are issued. Originally small integers are not allowed
12436 in FPR/VSX registers. Single precision binary floating is not a simple
12437 move because we need to convert to the single precision memory layout.
12438 The 4-byte SDmode can be moved. TDmode values are disallowed since they
12439 need special direct move handling, which we do not support yet. */
12440 if (TARGET_DIRECT_MOVE
12441 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
12442 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
12443 {
12444 if (TARGET_POWERPC64)
12445 {
12446 /* ISA 2.07: MTVSRD or MVFVSRD. */
12447 if (size == 8)
12448 return true;
12449
12450 /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD. */
12451 if (size == 16 && TARGET_P9_VECTOR && mode != TDmode)
12452 return true;
12453 }
12454
12455 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
12456 if (TARGET_P8_VECTOR)
12457 {
12458 if (mode == SImode)
12459 return true;
12460
12461 if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode))
12462 return true;
12463 }
12464
12465 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
12466 if (mode == SDmode)
12467 return true;
12468 }
12469
12470 /* Move to/from SPR. */
12471 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
12472 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
12473 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
12474 return true;
12475
12476 return false;
12477 }
12478
12479 /* Direct move helper function for rs6000_secondary_reload, handle all of the
12480 special direct moves that involve allocating an extra register, return the
12481 insn code of the helper function if there is such a function or
12482 CODE_FOR_nothing if not. */
12483
12484 static bool
12485 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
12486 enum rs6000_reg_type from_type,
12487 machine_mode mode,
12488 secondary_reload_info *sri,
12489 bool altivec_p)
12490 {
12491 bool ret = false;
12492 enum insn_code icode = CODE_FOR_nothing;
12493 int cost = 0;
12494 int size = GET_MODE_SIZE (mode);
12495
12496 if (TARGET_POWERPC64 && size == 16)
12497 {
12498 /* Handle moving 128-bit values from GPRs to VSX point registers on
12499 ISA 2.07 (power8, power9) when running in 64-bit mode using
12500 XXPERMDI to glue the two 64-bit values back together. */
12501 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
12502 {
12503 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
12504 icode = reg_addr[mode].reload_vsx_gpr;
12505 }
12506
12507 /* Handle moving 128-bit values from VSX point registers to GPRs on
12508 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
12509 bottom 64-bit value. */
12510 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
12511 {
12512 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
12513 icode = reg_addr[mode].reload_gpr_vsx;
12514 }
12515 }
12516
12517 else if (TARGET_POWERPC64 && mode == SFmode)
12518 {
12519 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
12520 {
12521 cost = 3; /* xscvdpspn, mfvsrd, and. */
12522 icode = reg_addr[mode].reload_gpr_vsx;
12523 }
12524
12525 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
12526 {
12527 cost = 2; /* mtvsrz, xscvspdpn. */
12528 icode = reg_addr[mode].reload_vsx_gpr;
12529 }
12530 }
12531
12532 else if (!TARGET_POWERPC64 && size == 8)
12533 {
12534 /* Handle moving 64-bit values from GPRs to floating point registers on
12535 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
12536 32-bit values back together. Altivec register classes must be handled
12537 specially since a different instruction is used, and the secondary
12538 reload support requires a single instruction class in the scratch
12539 register constraint. However, right now TFmode is not allowed in
12540 Altivec registers, so the pattern will never match. */
12541 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
12542 {
12543 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
12544 icode = reg_addr[mode].reload_fpr_gpr;
12545 }
12546 }
12547
12548 if (icode != CODE_FOR_nothing)
12549 {
12550 ret = true;
12551 if (sri)
12552 {
12553 sri->icode = icode;
12554 sri->extra_cost = cost;
12555 }
12556 }
12557
12558 return ret;
12559 }
12560
12561 /* Return whether a move between two register classes can be done either
12562 directly (simple move) or via a pattern that uses a single extra temporary
12563 (using ISA 2.07's direct move in this case. */
12564
12565 static bool
12566 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
12567 enum rs6000_reg_type from_type,
12568 machine_mode mode,
12569 secondary_reload_info *sri,
12570 bool altivec_p)
12571 {
12572 /* Fall back to load/store reloads if either type is not a register. */
12573 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
12574 return false;
12575
12576 /* If we haven't allocated registers yet, assume the move can be done for the
12577 standard register types. */
12578 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
12579 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
12580 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
12581 return true;
12582
12583 /* Moves to the same set of registers is a simple move for non-specialized
12584 registers. */
12585 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
12586 return true;
12587
12588 /* Check whether a simple move can be done directly. */
12589 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
12590 {
12591 if (sri)
12592 {
12593 sri->icode = CODE_FOR_nothing;
12594 sri->extra_cost = 0;
12595 }
12596 return true;
12597 }
12598
12599 /* Now check if we can do it in a few steps. */
12600 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
12601 altivec_p);
12602 }
12603
12604 /* Inform reload about cases where moving X with a mode MODE to a register in
12605 RCLASS requires an extra scratch or immediate register. Return the class
12606 needed for the immediate register.
12607
12608 For VSX and Altivec, we may need a register to convert sp+offset into
12609 reg+sp.
12610
12611 For misaligned 64-bit gpr loads and stores we need a register to
12612 convert an offset address to indirect. */
12613
12614 static reg_class_t
12615 rs6000_secondary_reload (bool in_p,
12616 rtx x,
12617 reg_class_t rclass_i,
12618 machine_mode mode,
12619 secondary_reload_info *sri)
12620 {
12621 enum reg_class rclass = (enum reg_class) rclass_i;
12622 reg_class_t ret = ALL_REGS;
12623 enum insn_code icode;
12624 bool default_p = false;
12625 bool done_p = false;
12626
12627 /* Allow subreg of memory before/during reload. */
12628 bool memory_p = (MEM_P (x)
12629 || (!reload_completed && SUBREG_P (x)
12630 && MEM_P (SUBREG_REG (x))));
12631
12632 sri->icode = CODE_FOR_nothing;
12633 sri->t_icode = CODE_FOR_nothing;
12634 sri->extra_cost = 0;
12635 icode = ((in_p)
12636 ? reg_addr[mode].reload_load
12637 : reg_addr[mode].reload_store);
12638
12639 if (REG_P (x) || register_operand (x, mode))
12640 {
12641 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
12642 bool altivec_p = (rclass == ALTIVEC_REGS);
12643 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
12644
12645 if (!in_p)
12646 std::swap (to_type, from_type);
12647
12648 /* Can we do a direct move of some sort? */
12649 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
12650 altivec_p))
12651 {
12652 icode = (enum insn_code)sri->icode;
12653 default_p = false;
12654 done_p = true;
12655 ret = NO_REGS;
12656 }
12657 }
12658
12659 /* Make sure 0.0 is not reloaded or forced into memory. */
12660 if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
12661 {
12662 ret = NO_REGS;
12663 default_p = false;
12664 done_p = true;
12665 }
12666
12667 /* If this is a scalar floating point value and we want to load it into the
12668 traditional Altivec registers, do it via a move via a traditional floating
12669 point register, unless we have D-form addressing. Also make sure that
12670 non-zero constants use a FPR. */
12671 if (!done_p && reg_addr[mode].scalar_in_vmx_p
12672 && !mode_supports_vmx_dform (mode)
12673 && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
12674 && (memory_p || CONST_DOUBLE_P (x)))
12675 {
12676 ret = FLOAT_REGS;
12677 default_p = false;
12678 done_p = true;
12679 }
12680
12681 /* Handle reload of load/stores if we have reload helper functions. */
12682 if (!done_p && icode != CODE_FOR_nothing && memory_p)
12683 {
12684 int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
12685 mode);
12686
12687 if (extra_cost >= 0)
12688 {
12689 done_p = true;
12690 ret = NO_REGS;
12691 if (extra_cost > 0)
12692 {
12693 sri->extra_cost = extra_cost;
12694 sri->icode = icode;
12695 }
12696 }
12697 }
12698
12699 /* Handle unaligned loads and stores of integer registers. */
12700 if (!done_p && TARGET_POWERPC64
12701 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
12702 && memory_p
12703 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
12704 {
12705 rtx addr = XEXP (x, 0);
12706 rtx off = address_offset (addr);
12707
12708 if (off != NULL_RTX)
12709 {
12710 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
12711 unsigned HOST_WIDE_INT offset = INTVAL (off);
12712
12713 /* We need a secondary reload when our legitimate_address_p
12714 says the address is good (as otherwise the entire address
12715 will be reloaded), and the offset is not a multiple of
12716 four or we have an address wrap. Address wrap will only
12717 occur for LO_SUMs since legitimate_offset_address_p
12718 rejects addresses for 16-byte mems that will wrap. */
12719 if (GET_CODE (addr) == LO_SUM
12720 ? (1 /* legitimate_address_p allows any offset for lo_sum */
12721 && ((offset & 3) != 0
12722 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
12723 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
12724 && (offset & 3) != 0))
12725 {
12726 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
12727 if (in_p)
12728 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
12729 : CODE_FOR_reload_di_load);
12730 else
12731 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
12732 : CODE_FOR_reload_di_store);
12733 sri->extra_cost = 2;
12734 ret = NO_REGS;
12735 done_p = true;
12736 }
12737 else
12738 default_p = true;
12739 }
12740 else
12741 default_p = true;
12742 }
12743
12744 if (!done_p && !TARGET_POWERPC64
12745 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
12746 && memory_p
12747 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
12748 {
12749 rtx addr = XEXP (x, 0);
12750 rtx off = address_offset (addr);
12751
12752 if (off != NULL_RTX)
12753 {
12754 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
12755 unsigned HOST_WIDE_INT offset = INTVAL (off);
12756
12757 /* We need a secondary reload when our legitimate_address_p
12758 says the address is good (as otherwise the entire address
12759 will be reloaded), and we have a wrap.
12760
12761 legitimate_lo_sum_address_p allows LO_SUM addresses to
12762 have any offset so test for wrap in the low 16 bits.
12763
12764 legitimate_offset_address_p checks for the range
12765 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
12766 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
12767 [0x7ff4,0x7fff] respectively, so test for the
12768 intersection of these ranges, [0x7ffc,0x7fff] and
12769 [0x7ff4,0x7ff7] respectively.
12770
12771 Note that the address we see here may have been
12772 manipulated by legitimize_reload_address. */
12773 if (GET_CODE (addr) == LO_SUM
12774 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
12775 : offset - (0x8000 - extra) < UNITS_PER_WORD)
12776 {
12777 if (in_p)
12778 sri->icode = CODE_FOR_reload_si_load;
12779 else
12780 sri->icode = CODE_FOR_reload_si_store;
12781 sri->extra_cost = 2;
12782 ret = NO_REGS;
12783 done_p = true;
12784 }
12785 else
12786 default_p = true;
12787 }
12788 else
12789 default_p = true;
12790 }
12791
12792 if (!done_p)
12793 default_p = true;
12794
12795 if (default_p)
12796 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
12797
12798 gcc_assert (ret != ALL_REGS);
12799
12800 if (TARGET_DEBUG_ADDR)
12801 {
12802 fprintf (stderr,
12803 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
12804 "mode = %s",
12805 reg_class_names[ret],
12806 in_p ? "true" : "false",
12807 reg_class_names[rclass],
12808 GET_MODE_NAME (mode));
12809
12810 if (reload_completed)
12811 fputs (", after reload", stderr);
12812
12813 if (!done_p)
12814 fputs (", done_p not set", stderr);
12815
12816 if (default_p)
12817 fputs (", default secondary reload", stderr);
12818
12819 if (sri->icode != CODE_FOR_nothing)
12820 fprintf (stderr, ", reload func = %s, extra cost = %d",
12821 insn_data[sri->icode].name, sri->extra_cost);
12822
12823 else if (sri->extra_cost > 0)
12824 fprintf (stderr, ", extra cost = %d", sri->extra_cost);
12825
12826 fputs ("\n", stderr);
12827 debug_rtx (x);
12828 }
12829
12830 return ret;
12831 }
12832
12833 /* Better tracing for rs6000_secondary_reload_inner. */
12834
12835 static void
12836 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
12837 bool store_p)
12838 {
12839 rtx set, clobber;
12840
12841 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
12842
12843 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
12844 store_p ? "store" : "load");
12845
12846 if (store_p)
12847 set = gen_rtx_SET (mem, reg);
12848 else
12849 set = gen_rtx_SET (reg, mem);
12850
12851 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
12852 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
12853 }
12854
12855 static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
12856 ATTRIBUTE_NORETURN;
12857
12858 static void
12859 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
12860 bool store_p)
12861 {
12862 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
12863 gcc_unreachable ();
12864 }
12865
12866 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
12867 reload helper functions. These were identified in
12868 rs6000_secondary_reload_memory, and if reload decided to use the secondary
12869 reload, it calls the insns:
12870 reload_<RELOAD:mode>_<P:mptrsize>_store
12871 reload_<RELOAD:mode>_<P:mptrsize>_load
12872
12873 which in turn calls this function, to do whatever is necessary to create
12874 valid addresses. */
12875
12876 void
12877 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
12878 {
12879 int regno = true_regnum (reg);
12880 machine_mode mode = GET_MODE (reg);
12881 addr_mask_type addr_mask;
12882 rtx addr;
12883 rtx new_addr;
12884 rtx op_reg, op0, op1;
12885 rtx and_op;
12886 rtx cc_clobber;
12887 rtvec rv;
12888
12889 if (regno < 0 || !HARD_REGISTER_NUM_P (regno) || !MEM_P (mem)
12890 || !base_reg_operand (scratch, GET_MODE (scratch)))
12891 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12892
12893 if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
12894 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
12895
12896 else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
12897 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
12898
12899 else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
12900 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
12901
12902 else
12903 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12904
12905 /* Make sure the mode is valid in this register class. */
12906 if ((addr_mask & RELOAD_REG_VALID) == 0)
12907 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12908
12909 if (TARGET_DEBUG_ADDR)
12910 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
12911
12912 new_addr = addr = XEXP (mem, 0);
12913 switch (GET_CODE (addr))
12914 {
12915 /* Does the register class support auto update forms for this mode? If
12916 not, do the update now. We don't need a scratch register, since the
12917 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
12918 case PRE_INC:
12919 case PRE_DEC:
12920 op_reg = XEXP (addr, 0);
12921 if (!base_reg_operand (op_reg, Pmode))
12922 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12923
12924 if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
12925 {
12926 int delta = GET_MODE_SIZE (mode);
12927 if (GET_CODE (addr) == PRE_DEC)
12928 delta = -delta;
12929 emit_insn (gen_add2_insn (op_reg, GEN_INT (delta)));
12930 new_addr = op_reg;
12931 }
12932 break;
12933
12934 case PRE_MODIFY:
12935 op0 = XEXP (addr, 0);
12936 op1 = XEXP (addr, 1);
12937 if (!base_reg_operand (op0, Pmode)
12938 || GET_CODE (op1) != PLUS
12939 || !rtx_equal_p (op0, XEXP (op1, 0)))
12940 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12941
12942 if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
12943 {
12944 emit_insn (gen_rtx_SET (op0, op1));
12945 new_addr = reg;
12946 }
12947 break;
12948
12949 /* Do we need to simulate AND -16 to clear the bottom address bits used
12950 in VMX load/stores? */
12951 case AND:
12952 op0 = XEXP (addr, 0);
12953 op1 = XEXP (addr, 1);
12954 if ((addr_mask & RELOAD_REG_AND_M16) == 0)
12955 {
12956 if (REG_P (op0) || SUBREG_P (op0))
12957 op_reg = op0;
12958
12959 else if (GET_CODE (op1) == PLUS)
12960 {
12961 emit_insn (gen_rtx_SET (scratch, op1));
12962 op_reg = scratch;
12963 }
12964
12965 else
12966 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12967
12968 and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
12969 cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
12970 rv = gen_rtvec (2, gen_rtx_SET (scratch, and_op), cc_clobber);
12971 emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
12972 new_addr = scratch;
12973 }
12974 break;
12975
12976 /* If this is an indirect address, make sure it is a base register. */
12977 case REG:
12978 case SUBREG:
12979 if (!base_reg_operand (addr, GET_MODE (addr)))
12980 {
12981 emit_insn (gen_rtx_SET (scratch, addr));
12982 new_addr = scratch;
12983 }
12984 break;
12985
12986 /* If this is an indexed address, make sure the register class can handle
12987 indexed addresses for this mode. */
12988 case PLUS:
12989 op0 = XEXP (addr, 0);
12990 op1 = XEXP (addr, 1);
12991 if (!base_reg_operand (op0, Pmode))
12992 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
12993
12994 else if (int_reg_operand (op1, Pmode))
12995 {
12996 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
12997 {
12998 emit_insn (gen_rtx_SET (scratch, addr));
12999 new_addr = scratch;
13000 }
13001 }
13002
13003 else if (mode_supports_dq_form (mode) && CONST_INT_P (op1))
13004 {
13005 if (((addr_mask & RELOAD_REG_QUAD_OFFSET) == 0)
13006 || !quad_address_p (addr, mode, false))
13007 {
13008 emit_insn (gen_rtx_SET (scratch, addr));
13009 new_addr = scratch;
13010 }
13011 }
13012
13013 /* Make sure the register class can handle offset addresses. */
13014 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
13015 {
13016 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
13017 {
13018 emit_insn (gen_rtx_SET (scratch, addr));
13019 new_addr = scratch;
13020 }
13021 }
13022
13023 else
13024 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
13025
13026 break;
13027
13028 case LO_SUM:
13029 op0 = XEXP (addr, 0);
13030 op1 = XEXP (addr, 1);
13031 if (!base_reg_operand (op0, Pmode))
13032 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
13033
13034 else if (int_reg_operand (op1, Pmode))
13035 {
13036 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
13037 {
13038 emit_insn (gen_rtx_SET (scratch, addr));
13039 new_addr = scratch;
13040 }
13041 }
13042
13043 /* Quad offsets are restricted and can't handle normal addresses. */
13044 else if (mode_supports_dq_form (mode))
13045 {
13046 emit_insn (gen_rtx_SET (scratch, addr));
13047 new_addr = scratch;
13048 }
13049
13050 /* Make sure the register class can handle offset addresses. */
13051 else if (legitimate_lo_sum_address_p (mode, addr, false))
13052 {
13053 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
13054 {
13055 emit_insn (gen_rtx_SET (scratch, addr));
13056 new_addr = scratch;
13057 }
13058 }
13059
13060 else
13061 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
13062
13063 break;
13064
13065 case SYMBOL_REF:
13066 case CONST:
13067 case LABEL_REF:
13068 rs6000_emit_move (scratch, addr, Pmode);
13069 new_addr = scratch;
13070 break;
13071
13072 default:
13073 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
13074 }
13075
13076 /* Adjust the address if it changed. */
13077 if (addr != new_addr)
13078 {
13079 mem = replace_equiv_address_nv (mem, new_addr);
13080 if (TARGET_DEBUG_ADDR)
13081 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
13082 }
13083
13084 /* Now create the move. */
13085 if (store_p)
13086 emit_insn (gen_rtx_SET (mem, reg));
13087 else
13088 emit_insn (gen_rtx_SET (reg, mem));
13089
13090 return;
13091 }
13092
13093 /* Convert reloads involving 64-bit gprs and misaligned offset
13094 addressing, or multiple 32-bit gprs and offsets that are too large,
13095 to use indirect addressing. */
13096
13097 void
13098 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
13099 {
13100 int regno = true_regnum (reg);
13101 enum reg_class rclass;
13102 rtx addr;
13103 rtx scratch_or_premodify = scratch;
13104
13105 if (TARGET_DEBUG_ADDR)
13106 {
13107 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
13108 store_p ? "store" : "load");
13109 fprintf (stderr, "reg:\n");
13110 debug_rtx (reg);
13111 fprintf (stderr, "mem:\n");
13112 debug_rtx (mem);
13113 fprintf (stderr, "scratch:\n");
13114 debug_rtx (scratch);
13115 }
13116
13117 gcc_assert (regno >= 0 && HARD_REGISTER_NUM_P (regno));
13118 gcc_assert (MEM_P (mem));
13119 rclass = REGNO_REG_CLASS (regno);
13120 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
13121 addr = XEXP (mem, 0);
13122
13123 if (GET_CODE (addr) == PRE_MODIFY)
13124 {
13125 gcc_assert (REG_P (XEXP (addr, 0))
13126 && GET_CODE (XEXP (addr, 1)) == PLUS
13127 && XEXP (XEXP (addr, 1), 0) == XEXP (addr, 0));
13128 scratch_or_premodify = XEXP (addr, 0);
13129 addr = XEXP (addr, 1);
13130 }
13131 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
13132
13133 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
13134
13135 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
13136
13137 /* Now create the move. */
13138 if (store_p)
13139 emit_insn (gen_rtx_SET (mem, reg));
13140 else
13141 emit_insn (gen_rtx_SET (reg, mem));
13142
13143 return;
13144 }
13145
13146 /* Given an rtx X being reloaded into a reg required to be
13147 in class CLASS, return the class of reg to actually use.
13148 In general this is just CLASS; but on some machines
13149 in some cases it is preferable to use a more restrictive class.
13150
13151 On the RS/6000, we have to return NO_REGS when we want to reload a
13152 floating-point CONST_DOUBLE to force it to be copied to memory.
13153
13154 We also don't want to reload integer values into floating-point
13155 registers if we can at all help it. In fact, this can
13156 cause reload to die, if it tries to generate a reload of CTR
13157 into a FP register and discovers it doesn't have the memory location
13158 required.
13159
13160 ??? Would it be a good idea to have reload do the converse, that is
13161 try to reload floating modes into FP registers if possible?
13162 */
13163
13164 static enum reg_class
13165 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
13166 {
13167 machine_mode mode = GET_MODE (x);
13168 bool is_constant = CONSTANT_P (x);
13169
13170 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
13171 reload class for it. */
13172 if ((rclass == ALTIVEC_REGS || rclass == VSX_REGS)
13173 && (reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID) == 0)
13174 return NO_REGS;
13175
13176 if ((rclass == FLOAT_REGS || rclass == VSX_REGS)
13177 && (reg_addr[mode].addr_mask[RELOAD_REG_FPR] & RELOAD_REG_VALID) == 0)
13178 return NO_REGS;
13179
13180 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
13181 the reloading of address expressions using PLUS into floating point
13182 registers. */
13183 if (TARGET_VSX && VSX_REG_CLASS_P (rclass) && GET_CODE (x) != PLUS)
13184 {
13185 if (is_constant)
13186 {
13187 /* Zero is always allowed in all VSX registers. */
13188 if (x == CONST0_RTX (mode))
13189 return rclass;
13190
13191 /* If this is a vector constant that can be formed with a few Altivec
13192 instructions, we want altivec registers. */
13193 if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
13194 return ALTIVEC_REGS;
13195
13196 /* If this is an integer constant that can easily be loaded into
13197 vector registers, allow it. */
13198 if (CONST_INT_P (x))
13199 {
13200 HOST_WIDE_INT value = INTVAL (x);
13201
13202 /* ISA 2.07 can generate -1 in all registers with XXLORC. ISA
13203 2.06 can generate it in the Altivec registers with
13204 VSPLTI<x>. */
13205 if (value == -1)
13206 {
13207 if (TARGET_P8_VECTOR)
13208 return rclass;
13209 else if (rclass == ALTIVEC_REGS || rclass == VSX_REGS)
13210 return ALTIVEC_REGS;
13211 else
13212 return NO_REGS;
13213 }
13214
13215 /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and
13216 a sign extend in the Altivec registers. */
13217 if (IN_RANGE (value, -128, 127) && TARGET_P9_VECTOR
13218 && (rclass == ALTIVEC_REGS || rclass == VSX_REGS))
13219 return ALTIVEC_REGS;
13220 }
13221
13222 /* Force constant to memory. */
13223 return NO_REGS;
13224 }
13225
13226 /* D-form addressing can easily reload the value. */
13227 if (mode_supports_vmx_dform (mode)
13228 || mode_supports_dq_form (mode))
13229 return rclass;
13230
13231 /* If this is a scalar floating point value and we don't have D-form
13232 addressing, prefer the traditional floating point registers so that we
13233 can use D-form (register+offset) addressing. */
13234 if (rclass == VSX_REGS
13235 && (mode == SFmode || GET_MODE_SIZE (mode) == 8))
13236 return FLOAT_REGS;
13237
13238 /* Prefer the Altivec registers if Altivec is handling the vector
13239 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
13240 loads. */
13241 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
13242 || mode == V1TImode)
13243 return ALTIVEC_REGS;
13244
13245 return rclass;
13246 }
13247
13248 if (is_constant || GET_CODE (x) == PLUS)
13249 {
13250 if (reg_class_subset_p (GENERAL_REGS, rclass))
13251 return GENERAL_REGS;
13252 if (reg_class_subset_p (BASE_REGS, rclass))
13253 return BASE_REGS;
13254 return NO_REGS;
13255 }
13256
13257 /* For the vector pair and vector quad modes, prefer their natural register
13258 (VSX or FPR) rather than GPR registers. For other integer types, prefer
13259 the GPR registers. */
13260 if (rclass == GEN_OR_FLOAT_REGS)
13261 {
13262 if (mode == OOmode)
13263 return VSX_REGS;
13264
13265 if (mode == XOmode)
13266 return FLOAT_REGS;
13267
13268 if (GET_MODE_CLASS (mode) == MODE_INT)
13269 return GENERAL_REGS;
13270 }
13271
13272 return rclass;
13273 }
13274
13275 /* Debug version of rs6000_preferred_reload_class. */
13276 static enum reg_class
13277 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
13278 {
13279 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
13280
13281 fprintf (stderr,
13282 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
13283 "mode = %s, x:\n",
13284 reg_class_names[ret], reg_class_names[rclass],
13285 GET_MODE_NAME (GET_MODE (x)));
13286 debug_rtx (x);
13287
13288 return ret;
13289 }
13290
13291 /* If we are copying between FP or AltiVec registers and anything else, we need
13292 a memory location. The exception is when we are targeting ppc64 and the
13293 move to/from fpr to gpr instructions are available. Also, under VSX, you
13294 can copy vector registers from the FP register set to the Altivec register
13295 set and vice versa. */
13296
13297 static bool
13298 rs6000_secondary_memory_needed (machine_mode mode,
13299 reg_class_t from_class,
13300 reg_class_t to_class)
13301 {
13302 enum rs6000_reg_type from_type, to_type;
13303 bool altivec_p = ((from_class == ALTIVEC_REGS)
13304 || (to_class == ALTIVEC_REGS));
13305
13306 /* If a simple/direct move is available, we don't need secondary memory */
13307 from_type = reg_class_to_reg_type[(int)from_class];
13308 to_type = reg_class_to_reg_type[(int)to_class];
13309
13310 if (rs6000_secondary_reload_move (to_type, from_type, mode,
13311 (secondary_reload_info *)0, altivec_p))
13312 return false;
13313
13314 /* If we have a floating point or vector register class, we need to use
13315 memory to transfer the data. */
13316 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
13317 return true;
13318
13319 return false;
13320 }
13321
13322 /* Debug version of rs6000_secondary_memory_needed. */
13323 static bool
13324 rs6000_debug_secondary_memory_needed (machine_mode mode,
13325 reg_class_t from_class,
13326 reg_class_t to_class)
13327 {
13328 bool ret = rs6000_secondary_memory_needed (mode, from_class, to_class);
13329
13330 fprintf (stderr,
13331 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
13332 "to_class = %s, mode = %s\n",
13333 ret ? "true" : "false",
13334 reg_class_names[from_class],
13335 reg_class_names[to_class],
13336 GET_MODE_NAME (mode));
13337
13338 return ret;
13339 }
13340
13341 /* Return the register class of a scratch register needed to copy IN into
13342 or out of a register in RCLASS in MODE. If it can be done directly,
13343 NO_REGS is returned. */
13344
13345 static enum reg_class
13346 rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
13347 rtx in)
13348 {
13349 int regno;
13350
13351 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
13352 #if TARGET_MACHO
13353 && MACHOPIC_INDIRECT
13354 #endif
13355 ))
13356 {
13357 /* We cannot copy a symbolic operand directly into anything
13358 other than BASE_REGS for TARGET_ELF. So indicate that a
13359 register from BASE_REGS is needed as an intermediate
13360 register.
13361
13362 On Darwin, pic addresses require a load from memory, which
13363 needs a base register. */
13364 if (rclass != BASE_REGS
13365 && (SYMBOL_REF_P (in)
13366 || GET_CODE (in) == HIGH
13367 || GET_CODE (in) == LABEL_REF
13368 || GET_CODE (in) == CONST))
13369 return BASE_REGS;
13370 }
13371
13372 if (REG_P (in))
13373 {
13374 regno = REGNO (in);
13375 if (!HARD_REGISTER_NUM_P (regno))
13376 {
13377 regno = true_regnum (in);
13378 if (!HARD_REGISTER_NUM_P (regno))
13379 regno = -1;
13380 }
13381 }
13382 else if (SUBREG_P (in))
13383 {
13384 regno = true_regnum (in);
13385 if (!HARD_REGISTER_NUM_P (regno))
13386 regno = -1;
13387 }
13388 else
13389 regno = -1;
13390
13391 /* If we have VSX register moves, prefer moving scalar values between
13392 Altivec registers and GPR by going via an FPR (and then via memory)
13393 instead of reloading the secondary memory address for Altivec moves. */
13394 if (TARGET_VSX
13395 && GET_MODE_SIZE (mode) < 16
13396 && !mode_supports_vmx_dform (mode)
13397 && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
13398 && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
13399 || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
13400 && (regno >= 0 && INT_REGNO_P (regno)))))
13401 return FLOAT_REGS;
13402
13403 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
13404 into anything. */
13405 if (rclass == GENERAL_REGS || rclass == BASE_REGS
13406 || (regno >= 0 && INT_REGNO_P (regno)))
13407 return NO_REGS;
13408
13409 /* Constants, memory, and VSX registers can go into VSX registers (both the
13410 traditional floating point and the altivec registers). */
13411 if (rclass == VSX_REGS
13412 && (regno == -1 || VSX_REGNO_P (regno)))
13413 return NO_REGS;
13414
13415 /* Constants, memory, and FP registers can go into FP registers. */
13416 if ((regno == -1 || FP_REGNO_P (regno))
13417 && (rclass == FLOAT_REGS || rclass == GEN_OR_FLOAT_REGS))
13418 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
13419
13420 /* Memory, and AltiVec registers can go into AltiVec registers. */
13421 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
13422 && rclass == ALTIVEC_REGS)
13423 return NO_REGS;
13424
13425 /* We can copy among the CR registers. */
13426 if ((rclass == CR_REGS || rclass == CR0_REGS)
13427 && regno >= 0 && CR_REGNO_P (regno))
13428 return NO_REGS;
13429
13430 /* Otherwise, we need GENERAL_REGS. */
13431 return GENERAL_REGS;
13432 }
13433
13434 /* Debug version of rs6000_secondary_reload_class. */
13435 static enum reg_class
13436 rs6000_debug_secondary_reload_class (enum reg_class rclass,
13437 machine_mode mode, rtx in)
13438 {
13439 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
13440 fprintf (stderr,
13441 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
13442 "mode = %s, input rtx:\n",
13443 reg_class_names[ret], reg_class_names[rclass],
13444 GET_MODE_NAME (mode));
13445 debug_rtx (in);
13446
13447 return ret;
13448 }
13449
13450 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
13451
13452 static bool
13453 rs6000_can_change_mode_class (machine_mode from,
13454 machine_mode to,
13455 reg_class_t rclass)
13456 {
13457 unsigned from_size = GET_MODE_SIZE (from);
13458 unsigned to_size = GET_MODE_SIZE (to);
13459
13460 if (from_size != to_size)
13461 {
13462 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
13463
13464 if (reg_classes_intersect_p (xclass, rclass))
13465 {
13466 unsigned to_nregs = hard_regno_nregs (FIRST_FPR_REGNO, to);
13467 unsigned from_nregs = hard_regno_nregs (FIRST_FPR_REGNO, from);
13468 bool to_float128_vector_p = FLOAT128_VECTOR_P (to);
13469 bool from_float128_vector_p = FLOAT128_VECTOR_P (from);
13470
13471 /* Don't allow 64-bit types to overlap with 128-bit types that take a
13472 single register under VSX because the scalar part of the register
13473 is in the upper 64-bits, and not the lower 64-bits. Types like
13474 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
13475 IEEE floating point can't overlap, and neither can small
13476 values. */
13477
13478 if (to_float128_vector_p && from_float128_vector_p)
13479 return true;
13480
13481 else if (to_float128_vector_p || from_float128_vector_p)
13482 return false;
13483
13484 /* TDmode in floating-mode registers must always go into a register
13485 pair with the most significant word in the even-numbered register
13486 to match ISA requirements. In little-endian mode, this does not
13487 match subreg numbering, so we cannot allow subregs. */
13488 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
13489 return false;
13490
13491 /* Allow SD<->DD changes, since SDmode values are stored in
13492 the low half of the DDmode, just like target-independent
13493 code expects. We need to allow at least SD->DD since
13494 rs6000_secondary_memory_needed_mode asks for that change
13495 to be made for SD reloads. */
13496 if ((to == DDmode && from == SDmode)
13497 || (to == SDmode && from == DDmode))
13498 return true;
13499
13500 if (from_size < 8 || to_size < 8)
13501 return false;
13502
13503 if (from_size == 8 && (8 * to_nregs) != to_size)
13504 return false;
13505
13506 if (to_size == 8 && (8 * from_nregs) != from_size)
13507 return false;
13508
13509 return true;
13510 }
13511 else
13512 return true;
13513 }
13514
13515 /* Since the VSX register set includes traditional floating point registers
13516 and altivec registers, just check for the size being different instead of
13517 trying to check whether the modes are vector modes. Otherwise it won't
13518 allow say DF and DI to change classes. For types like TFmode and TDmode
13519 that take 2 64-bit registers, rather than a single 128-bit register, don't
13520 allow subregs of those types to other 128 bit types. */
13521 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
13522 {
13523 unsigned num_regs = (from_size + 15) / 16;
13524 if (hard_regno_nregs (FIRST_FPR_REGNO, to) > num_regs
13525 || hard_regno_nregs (FIRST_FPR_REGNO, from) > num_regs)
13526 return false;
13527
13528 return (from_size == 8 || from_size == 16);
13529 }
13530
13531 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
13532 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
13533 return false;
13534
13535 return true;
13536 }
13537
13538 /* Debug version of rs6000_can_change_mode_class. */
13539 static bool
13540 rs6000_debug_can_change_mode_class (machine_mode from,
13541 machine_mode to,
13542 reg_class_t rclass)
13543 {
13544 bool ret = rs6000_can_change_mode_class (from, to, rclass);
13545
13546 fprintf (stderr,
13547 "rs6000_can_change_mode_class, return %s, from = %s, "
13548 "to = %s, rclass = %s\n",
13549 ret ? "true" : "false",
13550 GET_MODE_NAME (from), GET_MODE_NAME (to),
13551 reg_class_names[rclass]);
13552
13553 return ret;
13554 }
13555 \f
13556 /* Return a string to do a move operation of 128 bits of data. */
13557
13558 const char *
13559 rs6000_output_move_128bit (rtx operands[])
13560 {
13561 rtx dest = operands[0];
13562 rtx src = operands[1];
13563 machine_mode mode = GET_MODE (dest);
13564 int dest_regno;
13565 int src_regno;
13566 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
13567 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
13568
13569 if (REG_P (dest))
13570 {
13571 dest_regno = REGNO (dest);
13572 dest_gpr_p = INT_REGNO_P (dest_regno);
13573 dest_fp_p = FP_REGNO_P (dest_regno);
13574 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
13575 dest_vsx_p = dest_fp_p | dest_vmx_p;
13576 }
13577 else
13578 {
13579 dest_regno = -1;
13580 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
13581 }
13582
13583 if (REG_P (src))
13584 {
13585 src_regno = REGNO (src);
13586 src_gpr_p = INT_REGNO_P (src_regno);
13587 src_fp_p = FP_REGNO_P (src_regno);
13588 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
13589 src_vsx_p = src_fp_p | src_vmx_p;
13590 }
13591 else
13592 {
13593 src_regno = -1;
13594 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
13595 }
13596
13597 /* Register moves. */
13598 if (dest_regno >= 0 && src_regno >= 0)
13599 {
13600 if (dest_gpr_p)
13601 {
13602 if (src_gpr_p)
13603 return "#";
13604
13605 if (TARGET_DIRECT_MOVE_128 && src_vsx_p)
13606 return (WORDS_BIG_ENDIAN
13607 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
13608 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
13609
13610 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
13611 return "#";
13612 }
13613
13614 else if (TARGET_VSX && dest_vsx_p)
13615 {
13616 if (src_vsx_p)
13617 return "xxlor %x0,%x1,%x1";
13618
13619 else if (TARGET_DIRECT_MOVE_128 && src_gpr_p)
13620 return (WORDS_BIG_ENDIAN
13621 ? "mtvsrdd %x0,%1,%L1"
13622 : "mtvsrdd %x0,%L1,%1");
13623
13624 else if (TARGET_DIRECT_MOVE && src_gpr_p)
13625 return "#";
13626 }
13627
13628 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
13629 return "vor %0,%1,%1";
13630
13631 else if (dest_fp_p && src_fp_p)
13632 return "#";
13633 }
13634
13635 /* Loads. */
13636 else if (dest_regno >= 0 && MEM_P (src))
13637 {
13638 if (dest_gpr_p)
13639 {
13640 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
13641 return "lq %0,%1";
13642 else
13643 return "#";
13644 }
13645
13646 else if (TARGET_ALTIVEC && dest_vmx_p
13647 && altivec_indexed_or_indirect_operand (src, mode))
13648 return "lvx %0,%y1";
13649
13650 else if (TARGET_VSX && dest_vsx_p)
13651 {
13652 if (mode_supports_dq_form (mode)
13653 && quad_address_p (XEXP (src, 0), mode, true))
13654 return "lxv %x0,%1";
13655
13656 else if (TARGET_P9_VECTOR)
13657 return "lxvx %x0,%y1";
13658
13659 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
13660 return "lxvw4x %x0,%y1";
13661
13662 else
13663 return "lxvd2x %x0,%y1";
13664 }
13665
13666 else if (TARGET_ALTIVEC && dest_vmx_p)
13667 return "lvx %0,%y1";
13668
13669 else if (dest_fp_p)
13670 return "#";
13671 }
13672
13673 /* Stores. */
13674 else if (src_regno >= 0 && MEM_P (dest))
13675 {
13676 if (src_gpr_p)
13677 {
13678 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
13679 return "stq %1,%0";
13680 else
13681 return "#";
13682 }
13683
13684 else if (TARGET_ALTIVEC && src_vmx_p
13685 && altivec_indexed_or_indirect_operand (dest, mode))
13686 return "stvx %1,%y0";
13687
13688 else if (TARGET_VSX && src_vsx_p)
13689 {
13690 if (mode_supports_dq_form (mode)
13691 && quad_address_p (XEXP (dest, 0), mode, true))
13692 return "stxv %x1,%0";
13693
13694 else if (TARGET_P9_VECTOR)
13695 return "stxvx %x1,%y0";
13696
13697 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
13698 return "stxvw4x %x1,%y0";
13699
13700 else
13701 return "stxvd2x %x1,%y0";
13702 }
13703
13704 else if (TARGET_ALTIVEC && src_vmx_p)
13705 return "stvx %1,%y0";
13706
13707 else if (src_fp_p)
13708 return "#";
13709 }
13710
13711 /* Constants. */
13712 else if (dest_regno >= 0
13713 && (CONST_INT_P (src)
13714 || CONST_WIDE_INT_P (src)
13715 || CONST_DOUBLE_P (src)
13716 || GET_CODE (src) == CONST_VECTOR))
13717 {
13718 if (dest_gpr_p)
13719 return "#";
13720
13721 else if ((dest_vmx_p && TARGET_ALTIVEC)
13722 || (dest_vsx_p && TARGET_VSX))
13723 return output_vec_const_move (operands);
13724 }
13725
13726 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src));
13727 }
13728
13729 /* Validate a 128-bit move. */
13730 bool
13731 rs6000_move_128bit_ok_p (rtx operands[])
13732 {
13733 machine_mode mode = GET_MODE (operands[0]);
13734 return (gpc_reg_operand (operands[0], mode)
13735 || gpc_reg_operand (operands[1], mode));
13736 }
13737
13738 /* Return true if a 128-bit move needs to be split. */
13739 bool
13740 rs6000_split_128bit_ok_p (rtx operands[])
13741 {
13742 if (!reload_completed)
13743 return false;
13744
13745 if (!gpr_or_gpr_p (operands[0], operands[1]))
13746 return false;
13747
13748 if (quad_load_store_p (operands[0], operands[1]))
13749 return false;
13750
13751 return true;
13752 }
13753
13754 \f
13755 /* Given a comparison operation, return the bit number in CCR to test. We
13756 know this is a valid comparison.
13757
13758 SCC_P is 1 if this is for an scc. That means that %D will have been
13759 used instead of %C, so the bits will be in different places.
13760
13761 Return -1 if OP isn't a valid comparison for some reason. */
13762
13763 int
13764 ccr_bit (rtx op, int scc_p)
13765 {
13766 enum rtx_code code = GET_CODE (op);
13767 machine_mode cc_mode;
13768 int cc_regnum;
13769 int base_bit;
13770 rtx reg;
13771
13772 if (!COMPARISON_P (op))
13773 return -1;
13774
13775 reg = XEXP (op, 0);
13776
13777 if (!REG_P (reg) || !CR_REGNO_P (REGNO (reg)))
13778 return -1;
13779
13780 cc_mode = GET_MODE (reg);
13781 cc_regnum = REGNO (reg);
13782 base_bit = 4 * (cc_regnum - CR0_REGNO);
13783
13784 validate_condition_mode (code, cc_mode);
13785
13786 /* When generating a sCOND operation, only positive conditions are
13787 allowed. */
13788 if (scc_p)
13789 switch (code)
13790 {
13791 case EQ:
13792 case GT:
13793 case LT:
13794 case UNORDERED:
13795 case GTU:
13796 case LTU:
13797 break;
13798 default:
13799 return -1;
13800 }
13801
13802 switch (code)
13803 {
13804 case NE:
13805 return scc_p ? base_bit + 3 : base_bit + 2;
13806 case EQ:
13807 return base_bit + 2;
13808 case GT: case GTU: case UNLE:
13809 return base_bit + 1;
13810 case LT: case LTU: case UNGE:
13811 return base_bit;
13812 case ORDERED: case UNORDERED:
13813 return base_bit + 3;
13814
13815 case GE: case GEU:
13816 /* If scc, we will have done a cror to put the bit in the
13817 unordered position. So test that bit. For integer, this is ! LT
13818 unless this is an scc insn. */
13819 return scc_p ? base_bit + 3 : base_bit;
13820
13821 case LE: case LEU:
13822 return scc_p ? base_bit + 3 : base_bit + 1;
13823
13824 default:
13825 return -1;
13826 }
13827 }
13828 \f
13829 /* Return the GOT register. */
13830
13831 rtx
13832 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
13833 {
13834 /* The second flow pass currently (June 1999) can't update
13835 regs_ever_live without disturbing other parts of the compiler, so
13836 update it here to make the prolog/epilogue code happy. */
13837 if (!can_create_pseudo_p ()
13838 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
13839 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
13840
13841 crtl->uses_pic_offset_table = 1;
13842
13843 return pic_offset_table_rtx;
13844 }
13845 \f
13846 #define INT_P(X) (CONST_INT_P (X) && GET_MODE (X) == VOIDmode)
13847
13848 /* Write out a function code label. */
13849
13850 void
13851 rs6000_output_function_entry (FILE *file, const char *fname)
13852 {
13853 if (fname[0] != '.')
13854 {
13855 switch (DEFAULT_ABI)
13856 {
13857 default:
13858 gcc_unreachable ();
13859
13860 case ABI_AIX:
13861 if (DOT_SYMBOLS)
13862 putc ('.', file);
13863 else
13864 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
13865 break;
13866
13867 case ABI_ELFv2:
13868 case ABI_V4:
13869 case ABI_DARWIN:
13870 break;
13871 }
13872 }
13873
13874 RS6000_OUTPUT_BASENAME (file, fname);
13875 }
13876
13877 /* Print an operand. Recognize special options, documented below. */
13878
13879 #if TARGET_ELF
13880 /* Access to .sdata2 through r2 (see -msdata=eabi in invoke.texi) is
13881 only introduced by the linker, when applying the sda21
13882 relocation. */
13883 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
13884 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
13885 #else
13886 #define SMALL_DATA_RELOC "sda21"
13887 #define SMALL_DATA_REG 0
13888 #endif
13889
13890 void
13891 print_operand (FILE *file, rtx x, int code)
13892 {
13893 int i;
13894 unsigned HOST_WIDE_INT uval;
13895
13896 switch (code)
13897 {
13898 /* %a is output_address. */
13899
13900 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
13901 output_operand. */
13902
13903 case 'A':
13904 /* Write the MMA accumulator number associated with VSX register X. */
13905 if (!REG_P (x) || !FP_REGNO_P (REGNO (x)) || (REGNO (x) % 4) != 0)
13906 output_operand_lossage ("invalid %%A value");
13907 else
13908 fprintf (file, "%d", (REGNO (x) - FIRST_FPR_REGNO) / 4);
13909 return;
13910
13911 case 'D':
13912 /* Like 'J' but get to the GT bit only. */
13913 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13914 {
13915 output_operand_lossage ("invalid %%D value");
13916 return;
13917 }
13918
13919 /* Bit 1 is GT bit. */
13920 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
13921
13922 /* Add one for shift count in rlinm for scc. */
13923 fprintf (file, "%d", i + 1);
13924 return;
13925
13926 case 'e':
13927 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
13928 if (! INT_P (x))
13929 {
13930 output_operand_lossage ("invalid %%e value");
13931 return;
13932 }
13933
13934 uval = INTVAL (x);
13935 if ((uval & 0xffff) == 0 && uval != 0)
13936 putc ('s', file);
13937 return;
13938
13939 case 'E':
13940 /* X is a CR register. Print the number of the EQ bit of the CR */
13941 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13942 output_operand_lossage ("invalid %%E value");
13943 else
13944 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
13945 return;
13946
13947 case 'f':
13948 /* X is a CR register. Print the shift count needed to move it
13949 to the high-order four bits. */
13950 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13951 output_operand_lossage ("invalid %%f value");
13952 else
13953 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
13954 return;
13955
13956 case 'F':
13957 /* Similar, but print the count for the rotate in the opposite
13958 direction. */
13959 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
13960 output_operand_lossage ("invalid %%F value");
13961 else
13962 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
13963 return;
13964
13965 case 'G':
13966 /* X is a constant integer. If it is negative, print "m",
13967 otherwise print "z". This is to make an aze or ame insn. */
13968 if (!CONST_INT_P (x))
13969 output_operand_lossage ("invalid %%G value");
13970 else if (INTVAL (x) >= 0)
13971 putc ('z', file);
13972 else
13973 putc ('m', file);
13974 return;
13975
13976 case 'h':
13977 /* If constant, output low-order five bits. Otherwise, write
13978 normally. */
13979 if (INT_P (x))
13980 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
13981 else
13982 print_operand (file, x, 0);
13983 return;
13984
13985 case 'H':
13986 /* If constant, output low-order six bits. Otherwise, write
13987 normally. */
13988 if (INT_P (x))
13989 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
13990 else
13991 print_operand (file, x, 0);
13992 return;
13993
13994 case 'I':
13995 /* Print `i' if this is a constant, else nothing. */
13996 if (INT_P (x))
13997 putc ('i', file);
13998 return;
13999
14000 case 'j':
14001 /* Write the bit number in CCR for jump. */
14002 i = ccr_bit (x, 0);
14003 if (i == -1)
14004 output_operand_lossage ("invalid %%j code");
14005 else
14006 fprintf (file, "%d", i);
14007 return;
14008
14009 case 'J':
14010 /* Similar, but add one for shift count in rlinm for scc and pass
14011 scc flag to `ccr_bit'. */
14012 i = ccr_bit (x, 1);
14013 if (i == -1)
14014 output_operand_lossage ("invalid %%J code");
14015 else
14016 /* If we want bit 31, write a shift count of zero, not 32. */
14017 fprintf (file, "%d", i == 31 ? 0 : i + 1);
14018 return;
14019
14020 case 'k':
14021 /* X must be a constant. Write the 1's complement of the
14022 constant. */
14023 if (! INT_P (x))
14024 output_operand_lossage ("invalid %%k value");
14025 else
14026 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
14027 return;
14028
14029 case 'K':
14030 /* X must be a symbolic constant on ELF. Write an
14031 expression suitable for an 'addi' that adds in the low 16
14032 bits of the MEM. */
14033 if (GET_CODE (x) == CONST)
14034 {
14035 if (GET_CODE (XEXP (x, 0)) != PLUS
14036 || (!SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
14037 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
14038 || !CONST_INT_P (XEXP (XEXP (x, 0), 1)))
14039 output_operand_lossage ("invalid %%K value");
14040 }
14041 print_operand_address (file, x);
14042 fputs ("@l", file);
14043 return;
14044
14045 /* %l is output_asm_label. */
14046
14047 case 'L':
14048 /* Write second word of DImode or DFmode reference. Works on register
14049 or non-indexed memory only. */
14050 if (REG_P (x))
14051 fputs (reg_names[REGNO (x) + 1], file);
14052 else if (MEM_P (x))
14053 {
14054 machine_mode mode = GET_MODE (x);
14055 /* Handle possible auto-increment. Since it is pre-increment and
14056 we have already done it, we can just use an offset of word. */
14057 if (GET_CODE (XEXP (x, 0)) == PRE_INC
14058 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
14059 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
14060 UNITS_PER_WORD));
14061 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
14062 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
14063 UNITS_PER_WORD));
14064 else
14065 output_address (mode, XEXP (adjust_address_nv (x, SImode,
14066 UNITS_PER_WORD),
14067 0));
14068
14069 if (small_data_operand (x, GET_MODE (x)))
14070 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
14071 reg_names[SMALL_DATA_REG]);
14072 }
14073 return;
14074
14075 case 'N': /* Unused */
14076 /* Write the number of elements in the vector times 4. */
14077 if (GET_CODE (x) != PARALLEL)
14078 output_operand_lossage ("invalid %%N value");
14079 else
14080 fprintf (file, "%d", XVECLEN (x, 0) * 4);
14081 return;
14082
14083 case 'O': /* Unused */
14084 /* Similar, but subtract 1 first. */
14085 if (GET_CODE (x) != PARALLEL)
14086 output_operand_lossage ("invalid %%O value");
14087 else
14088 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
14089 return;
14090
14091 case 'p':
14092 /* X is a CONST_INT that is a power of two. Output the logarithm. */
14093 if (! INT_P (x)
14094 || INTVAL (x) < 0
14095 || (i = exact_log2 (INTVAL (x))) < 0)
14096 output_operand_lossage ("invalid %%p value");
14097 else
14098 fprintf (file, "%d", i);
14099 return;
14100
14101 case 'P':
14102 /* The operand must be an indirect memory reference. The result
14103 is the register name. */
14104 if (!MEM_P (x) || !REG_P (XEXP (x, 0))
14105 || REGNO (XEXP (x, 0)) >= 32)
14106 output_operand_lossage ("invalid %%P value");
14107 else
14108 fputs (reg_names[REGNO (XEXP (x, 0))], file);
14109 return;
14110
14111 case 'q':
14112 /* This outputs the logical code corresponding to a boolean
14113 expression. The expression may have one or both operands
14114 negated (if one, only the first one). For condition register
14115 logical operations, it will also treat the negated
14116 CR codes as NOTs, but not handle NOTs of them. */
14117 {
14118 const char *const *t = 0;
14119 const char *s;
14120 enum rtx_code code = GET_CODE (x);
14121 static const char * const tbl[3][3] = {
14122 { "and", "andc", "nor" },
14123 { "or", "orc", "nand" },
14124 { "xor", "eqv", "xor" } };
14125
14126 if (code == AND)
14127 t = tbl[0];
14128 else if (code == IOR)
14129 t = tbl[1];
14130 else if (code == XOR)
14131 t = tbl[2];
14132 else
14133 output_operand_lossage ("invalid %%q value");
14134
14135 if (GET_CODE (XEXP (x, 0)) != NOT)
14136 s = t[0];
14137 else
14138 {
14139 if (GET_CODE (XEXP (x, 1)) == NOT)
14140 s = t[2];
14141 else
14142 s = t[1];
14143 }
14144
14145 fputs (s, file);
14146 }
14147 return;
14148
14149 case 'Q':
14150 if (! TARGET_MFCRF)
14151 return;
14152 fputc (',', file);
14153 /* FALLTHRU */
14154
14155 case 'R':
14156 /* X is a CR register. Print the mask for `mtcrf'. */
14157 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
14158 output_operand_lossage ("invalid %%R value");
14159 else
14160 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
14161 return;
14162
14163 case 's':
14164 /* Low 5 bits of 32 - value */
14165 if (! INT_P (x))
14166 output_operand_lossage ("invalid %%s value");
14167 else
14168 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
14169 return;
14170
14171 case 't':
14172 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
14173 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
14174 {
14175 output_operand_lossage ("invalid %%t value");
14176 return;
14177 }
14178
14179 /* Bit 3 is OV bit. */
14180 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
14181
14182 /* If we want bit 31, write a shift count of zero, not 32. */
14183 fprintf (file, "%d", i == 31 ? 0 : i + 1);
14184 return;
14185
14186 case 'T':
14187 /* Print the symbolic name of a branch target register. */
14188 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
14189 x = XVECEXP (x, 0, 0);
14190 if (!REG_P (x) || (REGNO (x) != LR_REGNO
14191 && REGNO (x) != CTR_REGNO))
14192 output_operand_lossage ("invalid %%T value");
14193 else if (REGNO (x) == LR_REGNO)
14194 fputs ("lr", file);
14195 else
14196 fputs ("ctr", file);
14197 return;
14198
14199 case 'u':
14200 /* High-order or low-order 16 bits of constant, whichever is non-zero,
14201 for use in unsigned operand. */
14202 if (! INT_P (x))
14203 {
14204 output_operand_lossage ("invalid %%u value");
14205 return;
14206 }
14207
14208 uval = INTVAL (x);
14209 if ((uval & 0xffff) == 0)
14210 uval >>= 16;
14211
14212 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
14213 return;
14214
14215 case 'v':
14216 /* High-order 16 bits of constant for use in signed operand. */
14217 if (! INT_P (x))
14218 output_operand_lossage ("invalid %%v value");
14219 else
14220 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
14221 (INTVAL (x) >> 16) & 0xffff);
14222 return;
14223
14224 case 'U':
14225 /* Print `u' if this has an auto-increment or auto-decrement. */
14226 if (MEM_P (x)
14227 && (GET_CODE (XEXP (x, 0)) == PRE_INC
14228 || GET_CODE (XEXP (x, 0)) == PRE_DEC
14229 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
14230 putc ('u', file);
14231 return;
14232
14233 case 'V':
14234 /* Print the trap code for this operand. */
14235 switch (GET_CODE (x))
14236 {
14237 case EQ:
14238 fputs ("eq", file); /* 4 */
14239 break;
14240 case NE:
14241 fputs ("ne", file); /* 24 */
14242 break;
14243 case LT:
14244 fputs ("lt", file); /* 16 */
14245 break;
14246 case LE:
14247 fputs ("le", file); /* 20 */
14248 break;
14249 case GT:
14250 fputs ("gt", file); /* 8 */
14251 break;
14252 case GE:
14253 fputs ("ge", file); /* 12 */
14254 break;
14255 case LTU:
14256 fputs ("llt", file); /* 2 */
14257 break;
14258 case LEU:
14259 fputs ("lle", file); /* 6 */
14260 break;
14261 case GTU:
14262 fputs ("lgt", file); /* 1 */
14263 break;
14264 case GEU:
14265 fputs ("lge", file); /* 5 */
14266 break;
14267 default:
14268 output_operand_lossage ("invalid %%V value");
14269 }
14270 break;
14271
14272 case 'w':
14273 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
14274 normally. */
14275 if (INT_P (x))
14276 fprintf (file, HOST_WIDE_INT_PRINT_DEC, sext_hwi (INTVAL (x), 16));
14277 else
14278 print_operand (file, x, 0);
14279 return;
14280
14281 case 'x':
14282 /* X is a FPR or Altivec register used in a VSX context. */
14283 if (!REG_P (x) || !VSX_REGNO_P (REGNO (x)))
14284 output_operand_lossage ("invalid %%x value");
14285 else
14286 {
14287 int reg = REGNO (x);
14288 int vsx_reg = (FP_REGNO_P (reg)
14289 ? reg - 32
14290 : reg - FIRST_ALTIVEC_REGNO + 32);
14291
14292 #ifdef TARGET_REGNAMES
14293 if (TARGET_REGNAMES)
14294 fprintf (file, "%%vs%d", vsx_reg);
14295 else
14296 #endif
14297 fprintf (file, "%d", vsx_reg);
14298 }
14299 return;
14300
14301 case 'X':
14302 if (MEM_P (x)
14303 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
14304 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
14305 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
14306 putc ('x', file);
14307 return;
14308
14309 case 'Y':
14310 /* Like 'L', for third word of TImode/PTImode */
14311 if (REG_P (x))
14312 fputs (reg_names[REGNO (x) + 2], file);
14313 else if (MEM_P (x))
14314 {
14315 machine_mode mode = GET_MODE (x);
14316 if (GET_CODE (XEXP (x, 0)) == PRE_INC
14317 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
14318 output_address (mode, plus_constant (Pmode,
14319 XEXP (XEXP (x, 0), 0), 8));
14320 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
14321 output_address (mode, plus_constant (Pmode,
14322 XEXP (XEXP (x, 0), 0), 8));
14323 else
14324 output_address (mode, XEXP (adjust_address_nv (x, SImode, 8), 0));
14325 if (small_data_operand (x, GET_MODE (x)))
14326 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
14327 reg_names[SMALL_DATA_REG]);
14328 }
14329 return;
14330
14331 case 'z':
14332 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
14333 x = XVECEXP (x, 0, 1);
14334 /* X is a SYMBOL_REF. Write out the name preceded by a
14335 period and without any trailing data in brackets. Used for function
14336 names. If we are configured for System V (or the embedded ABI) on
14337 the PowerPC, do not emit the period, since those systems do not use
14338 TOCs and the like. */
14339 if (!SYMBOL_REF_P (x))
14340 {
14341 output_operand_lossage ("invalid %%z value");
14342 return;
14343 }
14344
14345 /* For macho, check to see if we need a stub. */
14346 if (TARGET_MACHO)
14347 {
14348 const char *name = XSTR (x, 0);
14349 #if TARGET_MACHO
14350 if (darwin_symbol_stubs
14351 && MACHOPIC_INDIRECT
14352 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14353 name = machopic_indirection_name (x, /*stub_p=*/true);
14354 #endif
14355 assemble_name (file, name);
14356 }
14357 else if (!DOT_SYMBOLS)
14358 assemble_name (file, XSTR (x, 0));
14359 else
14360 rs6000_output_function_entry (file, XSTR (x, 0));
14361 return;
14362
14363 case 'Z':
14364 /* Like 'L', for last word of TImode/PTImode. */
14365 if (REG_P (x))
14366 fputs (reg_names[REGNO (x) + 3], file);
14367 else if (MEM_P (x))
14368 {
14369 machine_mode mode = GET_MODE (x);
14370 if (GET_CODE (XEXP (x, 0)) == PRE_INC
14371 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
14372 output_address (mode, plus_constant (Pmode,
14373 XEXP (XEXP (x, 0), 0), 12));
14374 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
14375 output_address (mode, plus_constant (Pmode,
14376 XEXP (XEXP (x, 0), 0), 12));
14377 else
14378 output_address (mode, XEXP (adjust_address_nv (x, SImode, 12), 0));
14379 if (small_data_operand (x, GET_MODE (x)))
14380 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
14381 reg_names[SMALL_DATA_REG]);
14382 }
14383 return;
14384
14385 /* Print AltiVec memory operand. */
14386 case 'y':
14387 {
14388 rtx tmp;
14389
14390 gcc_assert (MEM_P (x));
14391
14392 tmp = XEXP (x, 0);
14393
14394 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (GET_MODE (x))
14395 && GET_CODE (tmp) == AND
14396 && CONST_INT_P (XEXP (tmp, 1))
14397 && INTVAL (XEXP (tmp, 1)) == -16)
14398 tmp = XEXP (tmp, 0);
14399 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
14400 && GET_CODE (tmp) == PRE_MODIFY)
14401 tmp = XEXP (tmp, 1);
14402 if (REG_P (tmp))
14403 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
14404 else
14405 {
14406 if (GET_CODE (tmp) != PLUS
14407 || !REG_P (XEXP (tmp, 0))
14408 || !REG_P (XEXP (tmp, 1)))
14409 {
14410 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
14411 break;
14412 }
14413
14414 if (REGNO (XEXP (tmp, 0)) == 0)
14415 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
14416 reg_names[ REGNO (XEXP (tmp, 0)) ]);
14417 else
14418 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
14419 reg_names[ REGNO (XEXP (tmp, 1)) ]);
14420 }
14421 break;
14422 }
14423
14424 case 0:
14425 if (REG_P (x))
14426 fprintf (file, "%s", reg_names[REGNO (x)]);
14427 else if (MEM_P (x))
14428 {
14429 /* We need to handle PRE_INC and PRE_DEC here, since we need to
14430 know the width from the mode. */
14431 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
14432 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
14433 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
14434 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
14435 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
14436 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
14437 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
14438 output_address (GET_MODE (x), XEXP (XEXP (x, 0), 1));
14439 else
14440 output_address (GET_MODE (x), XEXP (x, 0));
14441 }
14442 else if (toc_relative_expr_p (x, false,
14443 &tocrel_base_oac, &tocrel_offset_oac))
14444 /* This hack along with a corresponding hack in
14445 rs6000_output_addr_const_extra arranges to output addends
14446 where the assembler expects to find them. eg.
14447 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
14448 without this hack would be output as "x@toc+4". We
14449 want "x+4@toc". */
14450 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
14451 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
14452 output_addr_const (file, XVECEXP (x, 0, 0));
14453 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
14454 output_addr_const (file, XVECEXP (x, 0, 1));
14455 else
14456 output_addr_const (file, x);
14457 return;
14458
14459 case '&':
14460 if (const char *name = get_some_local_dynamic_name ())
14461 assemble_name (file, name);
14462 else
14463 output_operand_lossage ("'%%&' used without any "
14464 "local dynamic TLS references");
14465 return;
14466
14467 default:
14468 output_operand_lossage ("invalid %%xn code");
14469 }
14470 }
14471 \f
14472 /* Print the address of an operand. */
14473
14474 void
14475 print_operand_address (FILE *file, rtx x)
14476 {
14477 if (REG_P (x))
14478 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
14479
14480 /* Is it a PC-relative address? */
14481 else if (TARGET_PCREL && pcrel_local_or_external_address (x, VOIDmode))
14482 {
14483 HOST_WIDE_INT offset;
14484
14485 if (GET_CODE (x) == CONST)
14486 x = XEXP (x, 0);
14487
14488 if (GET_CODE (x) == PLUS)
14489 {
14490 offset = INTVAL (XEXP (x, 1));
14491 x = XEXP (x, 0);
14492 }
14493 else
14494 offset = 0;
14495
14496 output_addr_const (file, x);
14497
14498 if (offset)
14499 fprintf (file, "%+" PRId64, offset);
14500
14501 if (SYMBOL_REF_P (x) && !SYMBOL_REF_LOCAL_P (x))
14502 fprintf (file, "@got");
14503
14504 fprintf (file, "@pcrel");
14505 }
14506 else if (SYMBOL_REF_P (x) || GET_CODE (x) == CONST
14507 || GET_CODE (x) == LABEL_REF)
14508 {
14509 output_addr_const (file, x);
14510 if (small_data_operand (x, GET_MODE (x)))
14511 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
14512 reg_names[SMALL_DATA_REG]);
14513 else
14514 gcc_assert (!TARGET_TOC);
14515 }
14516 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
14517 && REG_P (XEXP (x, 1)))
14518 {
14519 if (REGNO (XEXP (x, 0)) == 0)
14520 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
14521 reg_names[ REGNO (XEXP (x, 0)) ]);
14522 else
14523 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
14524 reg_names[ REGNO (XEXP (x, 1)) ]);
14525 }
14526 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
14527 && CONST_INT_P (XEXP (x, 1)))
14528 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
14529 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
14530 #if TARGET_MACHO
14531 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
14532 && CONSTANT_P (XEXP (x, 1)))
14533 {
14534 fprintf (file, "lo16(");
14535 output_addr_const (file, XEXP (x, 1));
14536 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
14537 }
14538 #endif
14539 #if TARGET_ELF
14540 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
14541 && CONSTANT_P (XEXP (x, 1)))
14542 {
14543 output_addr_const (file, XEXP (x, 1));
14544 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
14545 }
14546 #endif
14547 else if (toc_relative_expr_p (x, false, &tocrel_base_oac, &tocrel_offset_oac))
14548 {
14549 /* This hack along with a corresponding hack in
14550 rs6000_output_addr_const_extra arranges to output addends
14551 where the assembler expects to find them. eg.
14552 (lo_sum (reg 9)
14553 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
14554 without this hack would be output as "x@toc+8@l(9)". We
14555 want "x+8@toc@l(9)". */
14556 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
14557 if (GET_CODE (x) == LO_SUM)
14558 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
14559 else
14560 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base_oac, 0, 1))]);
14561 }
14562 else
14563 output_addr_const (file, x);
14564 }
14565 \f
14566 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14567
14568 bool
14569 rs6000_output_addr_const_extra (FILE *file, rtx x)
14570 {
14571 if (GET_CODE (x) == UNSPEC)
14572 switch (XINT (x, 1))
14573 {
14574 case UNSPEC_TOCREL:
14575 gcc_checking_assert (SYMBOL_REF_P (XVECEXP (x, 0, 0))
14576 && REG_P (XVECEXP (x, 0, 1))
14577 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
14578 output_addr_const (file, XVECEXP (x, 0, 0));
14579 if (x == tocrel_base_oac && tocrel_offset_oac != const0_rtx)
14580 {
14581 if (INTVAL (tocrel_offset_oac) >= 0)
14582 fprintf (file, "+");
14583 output_addr_const (file, CONST_CAST_RTX (tocrel_offset_oac));
14584 }
14585 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
14586 {
14587 putc ('-', file);
14588 assemble_name (file, toc_label_name);
14589 need_toc_init = 1;
14590 }
14591 else if (TARGET_ELF)
14592 fputs ("@toc", file);
14593 return true;
14594
14595 #if TARGET_MACHO
14596 case UNSPEC_MACHOPIC_OFFSET:
14597 output_addr_const (file, XVECEXP (x, 0, 0));
14598 putc ('-', file);
14599 machopic_output_function_base_name (file);
14600 return true;
14601 #endif
14602 }
14603 return false;
14604 }
14605 \f
14606 /* Target hook for assembling integer objects. The PowerPC version has
14607 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
14608 is defined. It also needs to handle DI-mode objects on 64-bit
14609 targets. */
14610
14611 static bool
14612 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
14613 {
14614 #ifdef RELOCATABLE_NEEDS_FIXUP
14615 /* Special handling for SI values. */
14616 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
14617 {
14618 static int recurse = 0;
14619
14620 /* For -mrelocatable, we mark all addresses that need to be fixed up in
14621 the .fixup section. Since the TOC section is already relocated, we
14622 don't need to mark it here. We used to skip the text section, but it
14623 should never be valid for relocated addresses to be placed in the text
14624 section. */
14625 if (DEFAULT_ABI == ABI_V4
14626 && (TARGET_RELOCATABLE || flag_pic > 1)
14627 && in_section != toc_section
14628 && !recurse
14629 && !CONST_SCALAR_INT_P (x)
14630 && CONSTANT_P (x))
14631 {
14632 char buf[256];
14633
14634 recurse = 1;
14635 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
14636 fixuplabelno++;
14637 ASM_OUTPUT_LABEL (asm_out_file, buf);
14638 fprintf (asm_out_file, "\t.long\t(");
14639 output_addr_const (asm_out_file, x);
14640 fprintf (asm_out_file, ")@fixup\n");
14641 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
14642 ASM_OUTPUT_ALIGN (asm_out_file, 2);
14643 fprintf (asm_out_file, "\t.long\t");
14644 assemble_name (asm_out_file, buf);
14645 fprintf (asm_out_file, "\n\t.previous\n");
14646 recurse = 0;
14647 return true;
14648 }
14649 /* Remove initial .'s to turn a -mcall-aixdesc function
14650 address into the address of the descriptor, not the function
14651 itself. */
14652 else if (SYMBOL_REF_P (x)
14653 && XSTR (x, 0)[0] == '.'
14654 && DEFAULT_ABI == ABI_AIX)
14655 {
14656 const char *name = XSTR (x, 0);
14657 while (*name == '.')
14658 name++;
14659
14660 fprintf (asm_out_file, "\t.long\t%s\n", name);
14661 return true;
14662 }
14663 }
14664 #endif /* RELOCATABLE_NEEDS_FIXUP */
14665 return default_assemble_integer (x, size, aligned_p);
14666 }
14667
14668 /* Return a template string for assembly to emit when making an
14669 external call. FUNOP is the call mem argument operand number. */
14670
14671 static const char *
14672 rs6000_call_template_1 (rtx *operands, unsigned int funop, bool sibcall)
14673 {
14674 /* -Wformat-overflow workaround, without which gcc thinks that %u
14675 might produce 10 digits. */
14676 gcc_assert (funop <= MAX_RECOG_OPERANDS);
14677
14678 char arg[12];
14679 arg[0] = 0;
14680 if (GET_CODE (operands[funop + 1]) == UNSPEC)
14681 {
14682 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
14683 sprintf (arg, "(%%%u@tlsgd)", funop + 1);
14684 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
14685 sprintf (arg, "(%%&@tlsld)");
14686 }
14687
14688 /* The magic 32768 offset here corresponds to the offset of
14689 r30 in .got2, as given by LCTOC1. See sysv4.h:toc_section. */
14690 char z[11];
14691 sprintf (z, "%%z%u%s", funop,
14692 (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic == 2
14693 ? "+32768" : ""));
14694
14695 static char str[32]; /* 1 spare */
14696 if (rs6000_pcrel_p ())
14697 sprintf (str, "b%s %s@notoc%s", sibcall ? "" : "l", z, arg);
14698 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
14699 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
14700 sibcall ? "" : "\n\tnop");
14701 else if (DEFAULT_ABI == ABI_V4)
14702 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
14703 flag_pic ? "@plt" : "");
14704 #if TARGET_MACHO
14705 /* If/when we remove the mlongcall opt, we can share the AIX/ELGv2 case. */
14706 else if (DEFAULT_ABI == ABI_DARWIN)
14707 {
14708 /* The cookie is in operand func+2. */
14709 gcc_checking_assert (GET_CODE (operands[funop + 2]) == CONST_INT);
14710 int cookie = INTVAL (operands[funop + 2]);
14711 if (cookie & CALL_LONG)
14712 {
14713 tree funname = get_identifier (XSTR (operands[funop], 0));
14714 tree labelname = get_prev_label (funname);
14715 gcc_checking_assert (labelname && !sibcall);
14716
14717 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
14718 instruction will reach 'foo', otherwise link as 'bl L42'".
14719 "L42" should be a 'branch island', that will do a far jump to
14720 'foo'. Branch islands are generated in
14721 macho_branch_islands(). */
14722 sprintf (str, "jbsr %%z%u,%.10s", funop,
14723 IDENTIFIER_POINTER (labelname));
14724 }
14725 else
14726 /* Same as AIX or ELFv2, except to keep backwards compat, no nop
14727 after the call. */
14728 sprintf (str, "b%s %s%s", sibcall ? "" : "l", z, arg);
14729 }
14730 #endif
14731 else
14732 gcc_unreachable ();
14733 return str;
14734 }
14735
14736 const char *
14737 rs6000_call_template (rtx *operands, unsigned int funop)
14738 {
14739 return rs6000_call_template_1 (operands, funop, false);
14740 }
14741
14742 const char *
14743 rs6000_sibcall_template (rtx *operands, unsigned int funop)
14744 {
14745 return rs6000_call_template_1 (operands, funop, true);
14746 }
14747
14748 /* As above, for indirect calls. */
14749
14750 static const char *
14751 rs6000_indirect_call_template_1 (rtx *operands, unsigned int funop,
14752 bool sibcall)
14753 {
14754 /* -Wformat-overflow workaround, without which gcc thinks that %u
14755 might produce 10 digits. Note that -Wformat-overflow will not
14756 currently warn here for str[], so do not rely on a warning to
14757 ensure str[] is correctly sized. */
14758 gcc_assert (funop <= MAX_RECOG_OPERANDS);
14759
14760 /* Currently, funop is either 0 or 1. The maximum string is always
14761 a !speculate 64-bit __tls_get_addr call.
14762
14763 ABI_ELFv2, pcrel:
14764 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14765 . 35 .reloc .,R_PPC64_PLTSEQ_NOTOC,%z1\n\t
14766 . 9 crset 2\n\t
14767 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14768 . 36 .reloc .,R_PPC64_PLTCALL_NOTOC,%z1\n\t
14769 . 8 beq%T1l-
14770 .---
14771 .142
14772
14773 ABI_AIX:
14774 . 9 ld 2,%3\n\t
14775 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14776 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
14777 . 9 crset 2\n\t
14778 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14779 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
14780 . 10 beq%T1l-\n\t
14781 . 10 ld 2,%4(1)
14782 .---
14783 .151
14784
14785 ABI_ELFv2:
14786 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14787 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
14788 . 9 crset 2\n\t
14789 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14790 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
14791 . 10 beq%T1l-\n\t
14792 . 10 ld 2,%3(1)
14793 .---
14794 .142
14795
14796 ABI_V4:
14797 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14798 . 35 .reloc .,R_PPC64_PLTSEQ,%z1+32768\n\t
14799 . 9 crset 2\n\t
14800 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14801 . 36 .reloc .,R_PPC64_PLTCALL,%z1+32768\n\t
14802 . 8 beq%T1l-
14803 .---
14804 .141 */
14805 static char str[160]; /* 8 spare */
14806 char *s = str;
14807 const char *ptrload = TARGET_64BIT ? "d" : "wz";
14808
14809 if (DEFAULT_ABI == ABI_AIX)
14810 s += sprintf (s,
14811 "l%s 2,%%%u\n\t",
14812 ptrload, funop + 3);
14813
14814 /* We don't need the extra code to stop indirect call speculation if
14815 calling via LR. */
14816 bool speculate = (TARGET_MACHO
14817 || rs6000_speculate_indirect_jumps
14818 || (REG_P (operands[funop])
14819 && REGNO (operands[funop]) == LR_REGNO));
14820
14821 if (TARGET_PLTSEQ && GET_CODE (operands[funop]) == UNSPEC)
14822 {
14823 const char *rel64 = TARGET_64BIT ? "64" : "";
14824 char tls[29];
14825 tls[0] = 0;
14826 if (GET_CODE (operands[funop + 1]) == UNSPEC)
14827 {
14828 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
14829 sprintf (tls, ".reloc .,R_PPC%s_TLSGD,%%%u\n\t",
14830 rel64, funop + 1);
14831 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
14832 sprintf (tls, ".reloc .,R_PPC%s_TLSLD,%%&\n\t",
14833 rel64);
14834 }
14835
14836 const char *notoc = rs6000_pcrel_p () ? "_NOTOC" : "";
14837 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
14838 && flag_pic == 2 ? "+32768" : "");
14839 if (!speculate)
14840 {
14841 s += sprintf (s,
14842 "%s.reloc .,R_PPC%s_PLTSEQ%s,%%z%u%s\n\t",
14843 tls, rel64, notoc, funop, addend);
14844 s += sprintf (s, "crset 2\n\t");
14845 }
14846 s += sprintf (s,
14847 "%s.reloc .,R_PPC%s_PLTCALL%s,%%z%u%s\n\t",
14848 tls, rel64, notoc, funop, addend);
14849 }
14850 else if (!speculate)
14851 s += sprintf (s, "crset 2\n\t");
14852
14853 if (rs6000_pcrel_p ())
14854 {
14855 if (speculate)
14856 sprintf (s, "b%%T%ul", funop);
14857 else
14858 sprintf (s, "beq%%T%ul-", funop);
14859 }
14860 else if (DEFAULT_ABI == ABI_AIX)
14861 {
14862 if (speculate)
14863 sprintf (s,
14864 "b%%T%ul\n\t"
14865 "l%s 2,%%%u(1)",
14866 funop, ptrload, funop + 4);
14867 else
14868 sprintf (s,
14869 "beq%%T%ul-\n\t"
14870 "l%s 2,%%%u(1)",
14871 funop, ptrload, funop + 4);
14872 }
14873 else if (DEFAULT_ABI == ABI_ELFv2)
14874 {
14875 if (speculate)
14876 sprintf (s,
14877 "b%%T%ul\n\t"
14878 "l%s 2,%%%u(1)",
14879 funop, ptrload, funop + 3);
14880 else
14881 sprintf (s,
14882 "beq%%T%ul-\n\t"
14883 "l%s 2,%%%u(1)",
14884 funop, ptrload, funop + 3);
14885 }
14886 else
14887 {
14888 if (speculate)
14889 sprintf (s,
14890 "b%%T%u%s",
14891 funop, sibcall ? "" : "l");
14892 else
14893 sprintf (s,
14894 "beq%%T%u%s-%s",
14895 funop, sibcall ? "" : "l", sibcall ? "\n\tb $" : "");
14896 }
14897 return str;
14898 }
14899
14900 const char *
14901 rs6000_indirect_call_template (rtx *operands, unsigned int funop)
14902 {
14903 return rs6000_indirect_call_template_1 (operands, funop, false);
14904 }
14905
14906 const char *
14907 rs6000_indirect_sibcall_template (rtx *operands, unsigned int funop)
14908 {
14909 return rs6000_indirect_call_template_1 (operands, funop, true);
14910 }
14911
14912 #if HAVE_AS_PLTSEQ
14913 /* Output indirect call insns. WHICH identifies the type of sequence. */
14914 const char *
14915 rs6000_pltseq_template (rtx *operands, int which)
14916 {
14917 const char *rel64 = TARGET_64BIT ? "64" : "";
14918 char tls[30];
14919 tls[0] = 0;
14920 if (GET_CODE (operands[3]) == UNSPEC)
14921 {
14922 char off = which == RS6000_PLTSEQ_PLT_PCREL34 ? '8' : '4';
14923 if (XINT (operands[3], 1) == UNSPEC_TLSGD)
14924 sprintf (tls, ".reloc .-%c,R_PPC%s_TLSGD,%%3\n\t",
14925 off, rel64);
14926 else if (XINT (operands[3], 1) == UNSPEC_TLSLD)
14927 sprintf (tls, ".reloc .-%c,R_PPC%s_TLSLD,%%&\n\t",
14928 off, rel64);
14929 }
14930
14931 gcc_assert (DEFAULT_ABI == ABI_ELFv2 || DEFAULT_ABI == ABI_V4);
14932 static char str[96]; /* 10 spare */
14933 char off = WORDS_BIG_ENDIAN ? '2' : '4';
14934 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
14935 && flag_pic == 2 ? "+32768" : "");
14936 switch (which)
14937 {
14938 case RS6000_PLTSEQ_TOCSAVE:
14939 sprintf (str,
14940 "st%s\n\t"
14941 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2",
14942 TARGET_64BIT ? "d 2,24(1)" : "w 2,12(1)",
14943 tls, rel64);
14944 break;
14945 case RS6000_PLTSEQ_PLT16_HA:
14946 if (DEFAULT_ABI == ABI_V4 && !flag_pic)
14947 sprintf (str,
14948 "lis %%0,0\n\t"
14949 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2",
14950 tls, off, rel64);
14951 else
14952 sprintf (str,
14953 "addis %%0,%%1,0\n\t"
14954 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2%s",
14955 tls, off, rel64, addend);
14956 break;
14957 case RS6000_PLTSEQ_PLT16_LO:
14958 sprintf (str,
14959 "l%s %%0,0(%%1)\n\t"
14960 "%s.reloc .-%c,R_PPC%s_PLT16_LO%s,%%z2%s",
14961 TARGET_64BIT ? "d" : "wz",
14962 tls, off, rel64, TARGET_64BIT ? "_DS" : "", addend);
14963 break;
14964 case RS6000_PLTSEQ_MTCTR:
14965 sprintf (str,
14966 "mtctr %%1\n\t"
14967 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2%s",
14968 tls, rel64, addend);
14969 break;
14970 case RS6000_PLTSEQ_PLT_PCREL34:
14971 sprintf (str,
14972 "pl%s %%0,0(0),1\n\t"
14973 "%s.reloc .-8,R_PPC%s_PLT_PCREL34_NOTOC,%%z2",
14974 TARGET_64BIT ? "d" : "wz",
14975 tls, rel64);
14976 break;
14977 default:
14978 gcc_unreachable ();
14979 }
14980 return str;
14981 }
14982 #endif
14983 \f
14984 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
14985 /* Emit an assembler directive to set symbol visibility for DECL to
14986 VISIBILITY_TYPE. */
14987
14988 static void
14989 rs6000_assemble_visibility (tree decl, int vis)
14990 {
14991 if (TARGET_XCOFF)
14992 return;
14993
14994 /* Functions need to have their entry point symbol visibility set as
14995 well as their descriptor symbol visibility. */
14996 if (DEFAULT_ABI == ABI_AIX
14997 && DOT_SYMBOLS
14998 && TREE_CODE (decl) == FUNCTION_DECL)
14999 {
15000 static const char * const visibility_types[] = {
15001 NULL, "protected", "hidden", "internal"
15002 };
15003
15004 const char *name, *type;
15005
15006 name = ((* targetm.strip_name_encoding)
15007 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
15008 type = visibility_types[vis];
15009
15010 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
15011 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
15012 }
15013 else
15014 default_assemble_visibility (decl, vis);
15015 }
15016 #endif
15017 \f
15018 /* Write PATCH_AREA_SIZE NOPs into the asm outfile FILE around a function
15019 entry. If RECORD_P is true and the target supports named sections,
15020 the location of the NOPs will be recorded in a special object section
15021 called "__patchable_function_entries". This routine may be called
15022 twice per function to put NOPs before and after the function
15023 entry. */
15024
15025 void
15026 rs6000_print_patchable_function_entry (FILE *file,
15027 unsigned HOST_WIDE_INT patch_area_size,
15028 bool record_p)
15029 {
15030 bool global_entry_needed_p = rs6000_global_entry_point_prologue_needed_p ();
15031 /* For a function which needs global entry point, we will emit the
15032 patchable area before and after local entry point under the control of
15033 cfun->machine->global_entry_emitted, see the handling in function
15034 rs6000_output_function_prologue. */
15035 if (!global_entry_needed_p || cfun->machine->global_entry_emitted)
15036 default_print_patchable_function_entry (file, patch_area_size, record_p);
15037 }
15038 \f
15039 enum rtx_code
15040 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
15041 {
15042 /* Reversal of FP compares takes care -- an ordered compare
15043 becomes an unordered compare and vice versa. */
15044 if (mode == CCFPmode
15045 && (!flag_finite_math_only
15046 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
15047 || code == UNEQ || code == LTGT))
15048 return reverse_condition_maybe_unordered (code);
15049 else
15050 return reverse_condition (code);
15051 }
15052
15053 /* Check if C (as 64bit integer) can be rotated to a constant which constains
15054 nonzero bits at the LOWBITS low bits only.
15055
15056 Return true if C can be rotated to such constant. If so, *ROT is written
15057 to the number by which C is rotated.
15058 Return false otherwise. */
15059
15060 bool
15061 can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT c, int lowbits, int *rot)
15062 {
15063 int clz = HOST_BITS_PER_WIDE_INT - lowbits;
15064
15065 /* case a. 0..0xxx: already at least clz zeros. */
15066 int lz = clz_hwi (c);
15067 if (lz >= clz)
15068 {
15069 *rot = 0;
15070 return true;
15071 }
15072
15073 /* case b. 0..0xxx0..0: at least clz zeros. */
15074 int tz = ctz_hwi (c);
15075 if (lz + tz >= clz)
15076 {
15077 *rot = HOST_BITS_PER_WIDE_INT - tz;
15078 return true;
15079 }
15080
15081 /* case c. xx10.....0xx: rotate 'clz - 1' bits first, then check case b.
15082 ^bit -> Vbit, , then zeros are at head or tail.
15083 00...00xxx100, 'clz - 1' >= 'bits of xxxx'. */
15084 const int rot_bits = lowbits + 1;
15085 unsigned HOST_WIDE_INT rc = (c >> rot_bits) | (c << (clz - 1));
15086 tz = ctz_hwi (rc);
15087 if (clz_hwi (rc) + tz >= clz)
15088 {
15089 *rot = HOST_BITS_PER_WIDE_INT - (tz + rot_bits);
15090 return true;
15091 }
15092
15093 return false;
15094 }
15095
15096 /* Check if C (as 64bit integer) can be rotated to a positive 16bits constant
15097 which contains 48bits leading zeros and 16bits of any value. */
15098
15099 bool
15100 can_be_rotated_to_positive_16bits (HOST_WIDE_INT c)
15101 {
15102 int rot = 0;
15103 bool res = can_be_rotated_to_lowbits (c, 16, &rot);
15104 return res && rot > 0;
15105 }
15106
15107 /* Check if C (as 64bit integer) can be rotated to a negative 15bits constant
15108 which contains 49bits leading ones and 15bits of any value. */
15109
15110 bool
15111 can_be_rotated_to_negative_15bits (HOST_WIDE_INT c)
15112 {
15113 int rot = 0;
15114 bool res = can_be_rotated_to_lowbits (~c, 15, &rot);
15115 return res && rot > 0;
15116 }
15117
15118 /* Generate a compare for CODE. Return a brand-new rtx that
15119 represents the result of the compare. */
15120
15121 static rtx
15122 rs6000_generate_compare (rtx cmp, machine_mode mode)
15123 {
15124 machine_mode comp_mode;
15125 rtx compare_result;
15126 enum rtx_code code = GET_CODE (cmp);
15127 rtx op0 = XEXP (cmp, 0);
15128 rtx op1 = XEXP (cmp, 1);
15129
15130 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
15131 comp_mode = CCmode;
15132 else if (FLOAT_MODE_P (mode))
15133 comp_mode = CCFPmode;
15134 else if (code == GTU || code == LTU
15135 || code == GEU || code == LEU)
15136 comp_mode = CCUNSmode;
15137 else if ((code == EQ || code == NE)
15138 && unsigned_reg_p (op0)
15139 && (unsigned_reg_p (op1)
15140 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
15141 /* These are unsigned values, perhaps there will be a later
15142 ordering compare that can be shared with this one. */
15143 comp_mode = CCUNSmode;
15144 else
15145 comp_mode = CCmode;
15146
15147 /* If we have an unsigned compare, make sure we don't have a signed value as
15148 an immediate. */
15149 if (comp_mode == CCUNSmode && CONST_INT_P (op1)
15150 && INTVAL (op1) < 0)
15151 {
15152 op0 = copy_rtx_if_shared (op0);
15153 op1 = force_reg (GET_MODE (op0), op1);
15154 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
15155 }
15156
15157 /* First, the compare. */
15158 compare_result = gen_reg_rtx (comp_mode);
15159
15160 /* IEEE 128-bit support in VSX registers when we do not have hardware
15161 support. */
15162 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
15163 {
15164 rtx libfunc = NULL_RTX;
15165 bool check_nan = false;
15166 rtx dest;
15167
15168 switch (code)
15169 {
15170 case EQ:
15171 case NE:
15172 libfunc = optab_libfunc (eq_optab, mode);
15173 break;
15174
15175 case GT:
15176 case GE:
15177 libfunc = optab_libfunc (ge_optab, mode);
15178 break;
15179
15180 case LT:
15181 case LE:
15182 libfunc = optab_libfunc (le_optab, mode);
15183 break;
15184
15185 case UNORDERED:
15186 case ORDERED:
15187 libfunc = optab_libfunc (unord_optab, mode);
15188 code = (code == UNORDERED) ? NE : EQ;
15189 break;
15190
15191 case UNGE:
15192 case UNGT:
15193 check_nan = true;
15194 libfunc = optab_libfunc (ge_optab, mode);
15195 code = (code == UNGE) ? GE : GT;
15196 break;
15197
15198 case UNLE:
15199 case UNLT:
15200 check_nan = true;
15201 libfunc = optab_libfunc (le_optab, mode);
15202 code = (code == UNLE) ? LE : LT;
15203 break;
15204
15205 case UNEQ:
15206 case LTGT:
15207 check_nan = true;
15208 libfunc = optab_libfunc (eq_optab, mode);
15209 code = (code = UNEQ) ? EQ : NE;
15210 break;
15211
15212 default:
15213 gcc_unreachable ();
15214 }
15215
15216 gcc_assert (libfunc);
15217
15218 if (!check_nan)
15219 dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
15220 SImode, op0, mode, op1, mode);
15221
15222 /* The library signals an exception for signalling NaNs, so we need to
15223 handle isgreater, etc. by first checking isordered. */
15224 else
15225 {
15226 rtx ne_rtx, normal_dest, unord_dest;
15227 rtx unord_func = optab_libfunc (unord_optab, mode);
15228 rtx join_label = gen_label_rtx ();
15229 rtx join_ref = gen_rtx_LABEL_REF (VOIDmode, join_label);
15230 rtx unord_cmp = gen_reg_rtx (comp_mode);
15231
15232
15233 /* Test for either value being a NaN. */
15234 gcc_assert (unord_func);
15235 unord_dest = emit_library_call_value (unord_func, NULL_RTX, LCT_CONST,
15236 SImode, op0, mode, op1, mode);
15237
15238 /* Set value (0) if either value is a NaN, and jump to the join
15239 label. */
15240 dest = gen_reg_rtx (SImode);
15241 emit_move_insn (dest, const1_rtx);
15242 emit_insn (gen_rtx_SET (unord_cmp,
15243 gen_rtx_COMPARE (comp_mode, unord_dest,
15244 const0_rtx)));
15245
15246 ne_rtx = gen_rtx_NE (comp_mode, unord_cmp, const0_rtx);
15247 emit_jump_insn (gen_rtx_SET (pc_rtx,
15248 gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
15249 join_ref,
15250 pc_rtx)));
15251
15252 /* Do the normal comparison, knowing that the values are not
15253 NaNs. */
15254 normal_dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
15255 SImode, op0, mode, op1, mode);
15256
15257 emit_insn (gen_cstoresi4 (dest,
15258 gen_rtx_fmt_ee (code, SImode, normal_dest,
15259 const0_rtx),
15260 normal_dest, const0_rtx));
15261
15262 /* Join NaN and non-Nan paths. Compare dest against 0. */
15263 emit_label (join_label);
15264 code = NE;
15265 }
15266
15267 emit_insn (gen_rtx_SET (compare_result,
15268 gen_rtx_COMPARE (comp_mode, dest, const0_rtx)));
15269 }
15270
15271 else
15272 {
15273 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
15274 CLOBBERs to match cmptf_internal2 pattern. */
15275 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
15276 && FLOAT128_IBM_P (GET_MODE (op0))
15277 && TARGET_HARD_FLOAT)
15278 emit_insn (gen_rtx_PARALLEL (VOIDmode,
15279 gen_rtvec (10,
15280 gen_rtx_SET (compare_result,
15281 gen_rtx_COMPARE (comp_mode, op0, op1)),
15282 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15283 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15284 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15285 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15286 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15287 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15288 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15289 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15290 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
15291 else if (GET_CODE (op1) == UNSPEC
15292 && XINT (op1, 1) == UNSPEC_SP_TEST)
15293 {
15294 rtx op1b = XVECEXP (op1, 0, 0);
15295 comp_mode = CCEQmode;
15296 compare_result = gen_reg_rtx (CCEQmode);
15297 if (TARGET_64BIT)
15298 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
15299 else
15300 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
15301 }
15302 else
15303 emit_insn (gen_rtx_SET (compare_result,
15304 gen_rtx_COMPARE (comp_mode, op0, op1)));
15305 }
15306
15307 validate_condition_mode (code, GET_MODE (compare_result));
15308
15309 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
15310 }
15311
15312 \f
15313 /* Return the diagnostic message string if the binary operation OP is
15314 not permitted on TYPE1 and TYPE2, NULL otherwise. */
15315
15316 static const char*
15317 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED,
15318 const_tree type1,
15319 const_tree type2)
15320 {
15321 machine_mode mode1 = TYPE_MODE (type1);
15322 machine_mode mode2 = TYPE_MODE (type2);
15323
15324 /* For complex modes, use the inner type. */
15325 if (COMPLEX_MODE_P (mode1))
15326 mode1 = GET_MODE_INNER (mode1);
15327
15328 if (COMPLEX_MODE_P (mode2))
15329 mode2 = GET_MODE_INNER (mode2);
15330
15331 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
15332 double to intermix unless -mfloat128-convert. */
15333 if (mode1 == mode2)
15334 return NULL;
15335
15336 if (!TARGET_FLOAT128_CVT)
15337 {
15338 if ((FLOAT128_IEEE_P (mode1) && FLOAT128_IBM_P (mode2))
15339 || (FLOAT128_IBM_P (mode1) && FLOAT128_IEEE_P (mode2)))
15340 return N_("Invalid mixing of IEEE 128-bit and IBM 128-bit floating "
15341 "point types");
15342 }
15343
15344 return NULL;
15345 }
15346
15347 \f
15348 /* Expand floating point conversion to/from __float128 and __ibm128. */
15349
15350 void
15351 rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p)
15352 {
15353 machine_mode dest_mode = GET_MODE (dest);
15354 machine_mode src_mode = GET_MODE (src);
15355 convert_optab cvt = unknown_optab;
15356 bool do_move = false;
15357 rtx libfunc = NULL_RTX;
15358 rtx dest2;
15359 typedef rtx (*rtx_2func_t) (rtx, rtx);
15360 rtx_2func_t hw_convert = (rtx_2func_t)0;
15361 size_t kf_or_tf;
15362
15363 struct hw_conv_t {
15364 rtx_2func_t from_df;
15365 rtx_2func_t from_sf;
15366 rtx_2func_t from_si_sign;
15367 rtx_2func_t from_si_uns;
15368 rtx_2func_t from_di_sign;
15369 rtx_2func_t from_di_uns;
15370 rtx_2func_t to_df;
15371 rtx_2func_t to_sf;
15372 rtx_2func_t to_si_sign;
15373 rtx_2func_t to_si_uns;
15374 rtx_2func_t to_di_sign;
15375 rtx_2func_t to_di_uns;
15376 } hw_conversions[2] = {
15377 /* convertions to/from KFmode */
15378 {
15379 gen_extenddfkf2_hw, /* KFmode <- DFmode. */
15380 gen_extendsfkf2_hw, /* KFmode <- SFmode. */
15381 gen_float_kfsi2_hw, /* KFmode <- SImode (signed). */
15382 gen_floatuns_kfsi2_hw, /* KFmode <- SImode (unsigned). */
15383 gen_float_kfdi2_hw, /* KFmode <- DImode (signed). */
15384 gen_floatuns_kfdi2_hw, /* KFmode <- DImode (unsigned). */
15385 gen_trunckfdf2_hw, /* DFmode <- KFmode. */
15386 gen_trunckfsf2_hw, /* SFmode <- KFmode. */
15387 gen_fix_kfsi2_hw, /* SImode <- KFmode (signed). */
15388 gen_fixuns_kfsi2_hw, /* SImode <- KFmode (unsigned). */
15389 gen_fix_kfdi2_hw, /* DImode <- KFmode (signed). */
15390 gen_fixuns_kfdi2_hw, /* DImode <- KFmode (unsigned). */
15391 },
15392
15393 /* convertions to/from TFmode */
15394 {
15395 gen_extenddftf2_hw, /* TFmode <- DFmode. */
15396 gen_extendsftf2_hw, /* TFmode <- SFmode. */
15397 gen_float_tfsi2_hw, /* TFmode <- SImode (signed). */
15398 gen_floatuns_tfsi2_hw, /* TFmode <- SImode (unsigned). */
15399 gen_float_tfdi2_hw, /* TFmode <- DImode (signed). */
15400 gen_floatuns_tfdi2_hw, /* TFmode <- DImode (unsigned). */
15401 gen_trunctfdf2_hw, /* DFmode <- TFmode. */
15402 gen_trunctfsf2_hw, /* SFmode <- TFmode. */
15403 gen_fix_tfsi2_hw, /* SImode <- TFmode (signed). */
15404 gen_fixuns_tfsi2_hw, /* SImode <- TFmode (unsigned). */
15405 gen_fix_tfdi2_hw, /* DImode <- TFmode (signed). */
15406 gen_fixuns_tfdi2_hw, /* DImode <- TFmode (unsigned). */
15407 },
15408 };
15409
15410 if (dest_mode == src_mode)
15411 gcc_unreachable ();
15412
15413 /* Eliminate memory operations. */
15414 if (MEM_P (src))
15415 src = force_reg (src_mode, src);
15416
15417 if (MEM_P (dest))
15418 {
15419 rtx tmp = gen_reg_rtx (dest_mode);
15420 rs6000_expand_float128_convert (tmp, src, unsigned_p);
15421 rs6000_emit_move (dest, tmp, dest_mode);
15422 return;
15423 }
15424
15425 /* Convert to IEEE 128-bit floating point. */
15426 if (FLOAT128_IEEE_P (dest_mode))
15427 {
15428 if (dest_mode == KFmode)
15429 kf_or_tf = 0;
15430 else if (dest_mode == TFmode)
15431 kf_or_tf = 1;
15432 else
15433 gcc_unreachable ();
15434
15435 switch (src_mode)
15436 {
15437 case E_DFmode:
15438 cvt = sext_optab;
15439 hw_convert = hw_conversions[kf_or_tf].from_df;
15440 break;
15441
15442 case E_SFmode:
15443 cvt = sext_optab;
15444 hw_convert = hw_conversions[kf_or_tf].from_sf;
15445 break;
15446
15447 case E_KFmode:
15448 case E_IFmode:
15449 case E_TFmode:
15450 if (FLOAT128_IBM_P (src_mode))
15451 cvt = sext_optab;
15452 else
15453 do_move = true;
15454 break;
15455
15456 case E_SImode:
15457 if (unsigned_p)
15458 {
15459 cvt = ufloat_optab;
15460 hw_convert = hw_conversions[kf_or_tf].from_si_uns;
15461 }
15462 else
15463 {
15464 cvt = sfloat_optab;
15465 hw_convert = hw_conversions[kf_or_tf].from_si_sign;
15466 }
15467 break;
15468
15469 case E_DImode:
15470 if (unsigned_p)
15471 {
15472 cvt = ufloat_optab;
15473 hw_convert = hw_conversions[kf_or_tf].from_di_uns;
15474 }
15475 else
15476 {
15477 cvt = sfloat_optab;
15478 hw_convert = hw_conversions[kf_or_tf].from_di_sign;
15479 }
15480 break;
15481
15482 default:
15483 gcc_unreachable ();
15484 }
15485 }
15486
15487 /* Convert from IEEE 128-bit floating point. */
15488 else if (FLOAT128_IEEE_P (src_mode))
15489 {
15490 if (src_mode == KFmode)
15491 kf_or_tf = 0;
15492 else if (src_mode == TFmode)
15493 kf_or_tf = 1;
15494 else
15495 gcc_unreachable ();
15496
15497 switch (dest_mode)
15498 {
15499 case E_DFmode:
15500 cvt = trunc_optab;
15501 hw_convert = hw_conversions[kf_or_tf].to_df;
15502 break;
15503
15504 case E_SFmode:
15505 cvt = trunc_optab;
15506 hw_convert = hw_conversions[kf_or_tf].to_sf;
15507 break;
15508
15509 case E_KFmode:
15510 case E_IFmode:
15511 case E_TFmode:
15512 if (FLOAT128_IBM_P (dest_mode))
15513 cvt = trunc_optab;
15514 else
15515 do_move = true;
15516 break;
15517
15518 case E_SImode:
15519 if (unsigned_p)
15520 {
15521 cvt = ufix_optab;
15522 hw_convert = hw_conversions[kf_or_tf].to_si_uns;
15523 }
15524 else
15525 {
15526 cvt = sfix_optab;
15527 hw_convert = hw_conversions[kf_or_tf].to_si_sign;
15528 }
15529 break;
15530
15531 case E_DImode:
15532 if (unsigned_p)
15533 {
15534 cvt = ufix_optab;
15535 hw_convert = hw_conversions[kf_or_tf].to_di_uns;
15536 }
15537 else
15538 {
15539 cvt = sfix_optab;
15540 hw_convert = hw_conversions[kf_or_tf].to_di_sign;
15541 }
15542 break;
15543
15544 default:
15545 gcc_unreachable ();
15546 }
15547 }
15548
15549 /* Both IBM format. */
15550 else if (FLOAT128_IBM_P (dest_mode) && FLOAT128_IBM_P (src_mode))
15551 do_move = true;
15552
15553 else
15554 gcc_unreachable ();
15555
15556 /* Handle conversion between TFmode/KFmode/IFmode. */
15557 if (do_move)
15558 emit_insn (gen_rtx_SET (dest, gen_rtx_FLOAT_EXTEND (dest_mode, src)));
15559
15560 /* Handle conversion if we have hardware support. */
15561 else if (TARGET_FLOAT128_HW && hw_convert)
15562 emit_insn ((hw_convert) (dest, src));
15563
15564 /* Call an external function to do the conversion. */
15565 else if (cvt != unknown_optab)
15566 {
15567 libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode);
15568 gcc_assert (libfunc != NULL_RTX);
15569
15570 dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode,
15571 src, src_mode);
15572
15573 gcc_assert (dest2 != NULL_RTX);
15574 if (!rtx_equal_p (dest, dest2))
15575 emit_move_insn (dest, dest2);
15576 }
15577
15578 else
15579 gcc_unreachable ();
15580
15581 return;
15582 }
15583
15584 \f
15585 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
15586 can be used as that dest register. Return the dest register. */
15587
15588 rtx
15589 rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch)
15590 {
15591 if (op2 == const0_rtx)
15592 return op1;
15593
15594 if (GET_CODE (scratch) == SCRATCH)
15595 scratch = gen_reg_rtx (mode);
15596
15597 if (logical_operand (op2, mode))
15598 emit_insn (gen_rtx_SET (scratch, gen_rtx_XOR (mode, op1, op2)));
15599 else
15600 emit_insn (gen_rtx_SET (scratch,
15601 gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2))));
15602
15603 return scratch;
15604 }
15605
15606 /* Emit code doing a cror of two CR bits, for FP comparisons with a CODE that
15607 requires this. The result is mode MODE. */
15608 rtx
15609 rs6000_emit_fp_cror (rtx_code code, machine_mode mode, rtx x)
15610 {
15611 rtx cond[2];
15612 int n = 0;
15613 if (code == LTGT || code == LE || code == UNLT)
15614 cond[n++] = gen_rtx_fmt_ee (LT, mode, x, const0_rtx);
15615 if (code == LTGT || code == GE || code == UNGT)
15616 cond[n++] = gen_rtx_fmt_ee (GT, mode, x, const0_rtx);
15617 if (code == LE || code == GE || code == UNEQ)
15618 cond[n++] = gen_rtx_fmt_ee (EQ, mode, x, const0_rtx);
15619 if (code == UNLT || code == UNGT || code == UNEQ)
15620 cond[n++] = gen_rtx_fmt_ee (UNORDERED, mode, x, const0_rtx);
15621
15622 gcc_assert (n == 2);
15623
15624 rtx cc = gen_reg_rtx (CCEQmode);
15625 rtx logical = gen_rtx_IOR (mode, cond[0], cond[1]);
15626 emit_insn (gen_cceq_ior_compare (mode, cc, logical, cond[0], x, cond[1], x));
15627
15628 return cc;
15629 }
15630
15631 void
15632 rs6000_emit_sCOND (machine_mode mode, rtx operands[])
15633 {
15634 rtx condition_rtx = rs6000_generate_compare (operands[1], mode);
15635 rtx_code cond_code = GET_CODE (condition_rtx);
15636
15637 if (FLOAT_MODE_P (mode) && HONOR_NANS (mode)
15638 && !(FLOAT128_VECTOR_P (mode) && !TARGET_FLOAT128_HW))
15639 ;
15640 else if (cond_code == NE
15641 || cond_code == GE || cond_code == LE
15642 || cond_code == GEU || cond_code == LEU
15643 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
15644 {
15645 rtx not_result = gen_reg_rtx (CCEQmode);
15646 rtx not_op, rev_cond_rtx;
15647 machine_mode cc_mode;
15648
15649 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
15650
15651 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
15652 SImode, XEXP (condition_rtx, 0), const0_rtx);
15653 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
15654 emit_insn (gen_rtx_SET (not_result, not_op));
15655 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
15656 }
15657
15658 machine_mode op_mode = GET_MODE (XEXP (operands[1], 0));
15659 if (op_mode == VOIDmode)
15660 op_mode = GET_MODE (XEXP (operands[1], 1));
15661
15662 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
15663 {
15664 PUT_MODE (condition_rtx, DImode);
15665 convert_move (operands[0], condition_rtx, 0);
15666 }
15667 else
15668 {
15669 PUT_MODE (condition_rtx, SImode);
15670 emit_insn (gen_rtx_SET (operands[0], condition_rtx));
15671 }
15672 }
15673
15674 /* Emit a branch of kind CODE to location LOC. */
15675
15676 void
15677 rs6000_emit_cbranch (machine_mode mode, rtx operands[])
15678 {
15679 rtx condition_rtx = rs6000_generate_compare (operands[0], mode);
15680 rtx loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
15681 rtx ite = gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx, loc_ref, pc_rtx);
15682 emit_jump_insn (gen_rtx_SET (pc_rtx, ite));
15683 }
15684
15685 /* Return the string to output a conditional branch to LABEL, which is
15686 the operand template of the label, or NULL if the branch is really a
15687 conditional return.
15688
15689 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
15690 condition code register and its mode specifies what kind of
15691 comparison we made.
15692
15693 REVERSED is nonzero if we should reverse the sense of the comparison.
15694
15695 INSN is the insn. */
15696
15697 char *
15698 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
15699 {
15700 static char string[64];
15701 enum rtx_code code = GET_CODE (op);
15702 rtx cc_reg = XEXP (op, 0);
15703 machine_mode mode = GET_MODE (cc_reg);
15704 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
15705 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
15706 int really_reversed = reversed ^ need_longbranch;
15707 char *s = string;
15708 const char *ccode;
15709 const char *pred;
15710 rtx note;
15711
15712 validate_condition_mode (code, mode);
15713
15714 /* Work out which way this really branches. We could use
15715 reverse_condition_maybe_unordered here always but this
15716 makes the resulting assembler clearer. */
15717 if (really_reversed)
15718 {
15719 /* Reversal of FP compares takes care -- an ordered compare
15720 becomes an unordered compare and vice versa. */
15721 if (mode == CCFPmode)
15722 code = reverse_condition_maybe_unordered (code);
15723 else
15724 code = reverse_condition (code);
15725 }
15726
15727 switch (code)
15728 {
15729 /* Not all of these are actually distinct opcodes, but
15730 we distinguish them for clarity of the resulting assembler. */
15731 case NE: case LTGT:
15732 ccode = "ne"; break;
15733 case EQ: case UNEQ:
15734 ccode = "eq"; break;
15735 case GE: case GEU:
15736 ccode = "ge"; break;
15737 case GT: case GTU: case UNGT:
15738 ccode = "gt"; break;
15739 case LE: case LEU:
15740 ccode = "le"; break;
15741 case LT: case LTU: case UNLT:
15742 ccode = "lt"; break;
15743 case UNORDERED: ccode = "un"; break;
15744 case ORDERED: ccode = "nu"; break;
15745 case UNGE: ccode = "nl"; break;
15746 case UNLE: ccode = "ng"; break;
15747 default:
15748 gcc_unreachable ();
15749 }
15750
15751 /* Maybe we have a guess as to how likely the branch is. */
15752 pred = "";
15753 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
15754 if (note != NULL_RTX)
15755 {
15756 /* PROB is the difference from 50%. */
15757 int prob = profile_probability::from_reg_br_prob_note (XINT (note, 0))
15758 .to_reg_br_prob_base () - REG_BR_PROB_BASE / 2;
15759
15760 /* Only hint for highly probable/improbable branches on newer cpus when
15761 we have real profile data, as static prediction overrides processor
15762 dynamic prediction. For older cpus we may as well always hint, but
15763 assume not taken for branches that are very close to 50% as a
15764 mispredicted taken branch is more expensive than a
15765 mispredicted not-taken branch. */
15766 if (rs6000_always_hint
15767 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
15768 && (profile_status_for_fn (cfun) != PROFILE_GUESSED)
15769 && br_prob_note_reliable_p (note)))
15770 {
15771 if (abs (prob) > REG_BR_PROB_BASE / 20
15772 && ((prob > 0) ^ need_longbranch))
15773 pred = "+";
15774 else
15775 pred = "-";
15776 }
15777 }
15778
15779 if (label == NULL)
15780 s += sprintf (s, "b%slr%s ", ccode, pred);
15781 else
15782 s += sprintf (s, "b%s%s ", ccode, pred);
15783
15784 /* We need to escape any '%' characters in the reg_names string.
15785 Assume they'd only be the first character.... */
15786 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
15787 *s++ = '%';
15788 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
15789
15790 if (label != NULL)
15791 {
15792 /* If the branch distance was too far, we may have to use an
15793 unconditional branch to go the distance. */
15794 if (need_longbranch)
15795 s += sprintf (s, ",$+8\n\tb %s", label);
15796 else
15797 s += sprintf (s, ",%s", label);
15798 }
15799
15800 return string;
15801 }
15802
15803 /* Return insn for VSX or Altivec comparisons. */
15804
15805 static rtx
15806 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
15807 {
15808 rtx mask;
15809 machine_mode mode = GET_MODE (op0);
15810
15811 switch (code)
15812 {
15813 default:
15814 break;
15815
15816 case GE:
15817 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
15818 return NULL_RTX;
15819 /* FALLTHRU */
15820
15821 case EQ:
15822 case GT:
15823 case GTU:
15824 case ORDERED:
15825 case UNORDERED:
15826 case UNEQ:
15827 case LTGT:
15828 mask = gen_reg_rtx (mode);
15829 emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (code, mode, op0, op1)));
15830 return mask;
15831 }
15832
15833 return NULL_RTX;
15834 }
15835
15836 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
15837 DMODE is expected destination mode. This is a recursive function. */
15838
15839 static rtx
15840 rs6000_emit_vector_compare (enum rtx_code rcode,
15841 rtx op0, rtx op1,
15842 machine_mode dmode)
15843 {
15844 rtx mask;
15845 bool swap_operands = false;
15846 bool try_again = false;
15847
15848 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
15849 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
15850
15851 /* See if the comparison works as is. */
15852 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
15853 if (mask)
15854 return mask;
15855
15856 switch (rcode)
15857 {
15858 case LT:
15859 rcode = GT;
15860 swap_operands = true;
15861 try_again = true;
15862 break;
15863 case LTU:
15864 rcode = GTU;
15865 swap_operands = true;
15866 try_again = true;
15867 break;
15868 case NE:
15869 case UNLE:
15870 case UNLT:
15871 case UNGE:
15872 case UNGT:
15873 /* Invert condition and try again.
15874 e.g., A != B becomes ~(A==B). */
15875 {
15876 enum rtx_code rev_code;
15877 enum insn_code nor_code;
15878 rtx mask2;
15879
15880 rev_code = reverse_condition_maybe_unordered (rcode);
15881 if (rev_code == UNKNOWN)
15882 return NULL_RTX;
15883
15884 nor_code = optab_handler (one_cmpl_optab, dmode);
15885 if (nor_code == CODE_FOR_nothing)
15886 return NULL_RTX;
15887
15888 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
15889 if (!mask2)
15890 return NULL_RTX;
15891
15892 mask = gen_reg_rtx (dmode);
15893 emit_insn (GEN_FCN (nor_code) (mask, mask2));
15894 return mask;
15895 }
15896 break;
15897 case GE:
15898 case GEU:
15899 case LE:
15900 case LEU:
15901 /* Try GT/GTU/LT/LTU OR EQ */
15902 {
15903 rtx c_rtx, eq_rtx;
15904 enum insn_code ior_code;
15905 enum rtx_code new_code;
15906
15907 switch (rcode)
15908 {
15909 case GE:
15910 new_code = GT;
15911 break;
15912
15913 case GEU:
15914 new_code = GTU;
15915 break;
15916
15917 case LE:
15918 new_code = LT;
15919 break;
15920
15921 case LEU:
15922 new_code = LTU;
15923 break;
15924
15925 default:
15926 gcc_unreachable ();
15927 }
15928
15929 ior_code = optab_handler (ior_optab, dmode);
15930 if (ior_code == CODE_FOR_nothing)
15931 return NULL_RTX;
15932
15933 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
15934 if (!c_rtx)
15935 return NULL_RTX;
15936
15937 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
15938 if (!eq_rtx)
15939 return NULL_RTX;
15940
15941 mask = gen_reg_rtx (dmode);
15942 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
15943 return mask;
15944 }
15945 break;
15946 default:
15947 return NULL_RTX;
15948 }
15949
15950 if (try_again)
15951 {
15952 if (swap_operands)
15953 std::swap (op0, op1);
15954
15955 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
15956 if (mask)
15957 return mask;
15958 }
15959
15960 /* You only get two chances. */
15961 return NULL_RTX;
15962 }
15963
15964 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
15965 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
15966 operands for the relation operation COND. */
15967
15968 int
15969 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
15970 rtx cond, rtx cc_op0, rtx cc_op1)
15971 {
15972 machine_mode dest_mode = GET_MODE (dest);
15973 machine_mode mask_mode = GET_MODE (cc_op0);
15974 enum rtx_code rcode = GET_CODE (cond);
15975 rtx mask;
15976 bool invert_move = false;
15977
15978 if (VECTOR_UNIT_NONE_P (dest_mode))
15979 return 0;
15980
15981 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
15982 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
15983
15984 switch (rcode)
15985 {
15986 /* Swap operands if we can, and fall back to doing the operation as
15987 specified, and doing a NOR to invert the test. */
15988 case NE:
15989 case UNLE:
15990 case UNLT:
15991 case UNGE:
15992 case UNGT:
15993 /* Invert condition and try again.
15994 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
15995 invert_move = true;
15996 rcode = reverse_condition_maybe_unordered (rcode);
15997 if (rcode == UNKNOWN)
15998 return 0;
15999 break;
16000
16001 case GE:
16002 case LE:
16003 if (GET_MODE_CLASS (mask_mode) == MODE_VECTOR_INT)
16004 {
16005 /* Invert condition to avoid compound test. */
16006 invert_move = true;
16007 rcode = reverse_condition (rcode);
16008 }
16009 break;
16010
16011 case GTU:
16012 case GEU:
16013 case LTU:
16014 case LEU:
16015
16016 /* Invert condition to avoid compound test if necessary. */
16017 if (rcode == GEU || rcode == LEU)
16018 {
16019 invert_move = true;
16020 rcode = reverse_condition (rcode);
16021 }
16022 break;
16023
16024 default:
16025 break;
16026 }
16027
16028 /* Get the vector mask for the given relational operations. */
16029 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
16030
16031 if (!mask)
16032 return 0;
16033
16034 if (mask_mode != dest_mode)
16035 mask = simplify_gen_subreg (dest_mode, mask, mask_mode, 0);
16036
16037 if (invert_move)
16038 std::swap (op_true, op_false);
16039
16040 /* Optimize vec1 == vec2, to know the mask generates -1/0. */
16041 if (GET_MODE_CLASS (dest_mode) == MODE_VECTOR_INT
16042 && (GET_CODE (op_true) == CONST_VECTOR
16043 || GET_CODE (op_false) == CONST_VECTOR))
16044 {
16045 rtx constant_0 = CONST0_RTX (dest_mode);
16046 rtx constant_m1 = CONSTM1_RTX (dest_mode);
16047
16048 if (op_true == constant_m1 && op_false == constant_0)
16049 {
16050 emit_move_insn (dest, mask);
16051 return 1;
16052 }
16053
16054 else if (op_true == constant_0 && op_false == constant_m1)
16055 {
16056 emit_insn (gen_rtx_SET (dest, gen_rtx_NOT (dest_mode, mask)));
16057 return 1;
16058 }
16059
16060 /* If we can't use the vector comparison directly, perhaps we can use
16061 the mask for the true or false fields, instead of loading up a
16062 constant. */
16063 if (op_true == constant_m1)
16064 op_true = mask;
16065
16066 if (op_false == constant_0)
16067 op_false = mask;
16068 }
16069
16070 if (!REG_P (op_true) && !SUBREG_P (op_true))
16071 op_true = force_reg (dest_mode, op_true);
16072
16073 if (!REG_P (op_false) && !SUBREG_P (op_false))
16074 op_false = force_reg (dest_mode, op_false);
16075
16076 rtx tmp = gen_rtx_IOR (dest_mode,
16077 gen_rtx_AND (dest_mode, gen_rtx_NOT (dest_mode, mask),
16078 op_false),
16079 gen_rtx_AND (dest_mode, mask, op_true));
16080 emit_insn (gen_rtx_SET (dest, tmp));
16081 return 1;
16082 }
16083
16084 /* Possibly emit the xsmaxc{dp,qp} and xsminc{dp,qp} instructions to emit a
16085 maximum or minimum with "C" semantics.
16086
16087 Unless you use -ffast-math, you can't use these instructions to replace
16088 conditions that implicitly reverse the condition because the comparison
16089 might generate a NaN or signed zer0.
16090
16091 I.e. the following can be replaced all of the time
16092 ret = (op1 > op2) ? op1 : op2 ; generate xsmaxcdp
16093 ret = (op1 >= op2) ? op1 : op2 ; generate xsmaxcdp
16094 ret = (op1 < op2) ? op1 : op2; ; generate xsmincdp
16095 ret = (op1 <= op2) ? op1 : op2; ; generate xsmincdp
16096
16097 The following can be replaced only if -ffast-math is used:
16098 ret = (op1 < op2) ? op2 : op1 ; generate xsmaxcdp
16099 ret = (op1 <= op2) ? op2 : op1 ; generate xsmaxcdp
16100 ret = (op1 > op2) ? op2 : op1; ; generate xsmincdp
16101 ret = (op1 >= op2) ? op2 : op1; ; generate xsmincdp
16102
16103 Move TRUE_COND to DEST if OP of the operands of the last comparison is
16104 nonzero/true, FALSE_COND if it is zero/false.
16105
16106 Return false if we can't generate the appropriate minimum or maximum, and
16107 true if we can did the minimum or maximum. */
16108
16109 static bool
16110 rs6000_maybe_emit_maxc_minc (rtx dest, rtx op, rtx true_cond, rtx false_cond)
16111 {
16112 enum rtx_code code = GET_CODE (op);
16113 rtx op0 = XEXP (op, 0);
16114 rtx op1 = XEXP (op, 1);
16115 machine_mode compare_mode = GET_MODE (op0);
16116 machine_mode result_mode = GET_MODE (dest);
16117
16118 if (result_mode != compare_mode)
16119 return false;
16120
16121 /* See the comments of this function, it simply expects GE/GT/LE/LT in
16122 the checks, but for the reversible equivalent UNLT/UNLE/UNGT/UNGE,
16123 we need to do the reversions first to make the following checks
16124 support fewer cases, like:
16125
16126 (a UNLT b) ? op1 : op2 => (a >= b) ? op2 : op1;
16127 (a UNLE b) ? op1 : op2 => (a > b) ? op2 : op1;
16128 (a UNGT b) ? op1 : op2 => (a <= b) ? op2 : op1;
16129 (a UNGE b) ? op1 : op2 => (a < b) ? op2 : op1;
16130
16131 By the way, if we see these UNLT/UNLE/UNGT/UNGE it's guaranteed
16132 that we have 4-way condition codes (LT/GT/EQ/UN), so we do not
16133 have to check for fast-math or the like. */
16134 if (code == UNGE || code == UNGT || code == UNLE || code == UNLT)
16135 {
16136 code = reverse_condition_maybe_unordered (code);
16137 std::swap (true_cond, false_cond);
16138 }
16139
16140 bool max_p;
16141 if (code == GE || code == GT)
16142 max_p = true;
16143 else if (code == LE || code == LT)
16144 max_p = false;
16145 else
16146 return false;
16147
16148 if (rtx_equal_p (op0, true_cond) && rtx_equal_p (op1, false_cond))
16149 ;
16150
16151 /* Only when NaNs and signed-zeros are not in effect, smax could be
16152 used for `op0 < op1 ? op1 : op0`, and smin could be used for
16153 `op0 > op1 ? op1 : op0`. */
16154 else if (rtx_equal_p (op1, true_cond) && rtx_equal_p (op0, false_cond)
16155 && !HONOR_NANS (compare_mode) && !HONOR_SIGNED_ZEROS (compare_mode))
16156 max_p = !max_p;
16157
16158 else
16159 return false;
16160
16161 rs6000_emit_minmax (dest, max_p ? SMAX : SMIN, op0, op1);
16162 return true;
16163 }
16164
16165 /* Possibly emit a floating point conditional move by generating a compare that
16166 sets a mask instruction and a XXSEL select instruction.
16167
16168 Move TRUE_COND to DEST if OP of the operands of the last comparison is
16169 nonzero/true, FALSE_COND if it is zero/false.
16170
16171 Return false if the operation cannot be generated, and true if we could
16172 generate the instruction. */
16173
16174 static bool
16175 rs6000_maybe_emit_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
16176 {
16177 enum rtx_code code = GET_CODE (op);
16178 rtx op0 = XEXP (op, 0);
16179 rtx op1 = XEXP (op, 1);
16180 machine_mode compare_mode = GET_MODE (op0);
16181 machine_mode result_mode = GET_MODE (dest);
16182 rtx compare_rtx;
16183 rtx cmove_rtx;
16184 rtx clobber_rtx;
16185
16186 if (!can_create_pseudo_p ())
16187 return 0;
16188
16189 /* We allow the comparison to be either SFmode/DFmode and the true/false
16190 condition to be either SFmode/DFmode. I.e. we allow:
16191
16192 float a, b;
16193 double c, d, r;
16194
16195 r = (a == b) ? c : d;
16196
16197 and:
16198
16199 double a, b;
16200 float c, d, r;
16201
16202 r = (a == b) ? c : d;
16203
16204 but we don't allow intermixing the IEEE 128-bit floating point types with
16205 the 32/64-bit scalar types. */
16206
16207 if (!(compare_mode == result_mode
16208 || (compare_mode == SFmode && result_mode == DFmode)
16209 || (compare_mode == DFmode && result_mode == SFmode)))
16210 return false;
16211
16212 switch (code)
16213 {
16214 case EQ:
16215 case GE:
16216 case GT:
16217 break;
16218
16219 case NE:
16220 case LT:
16221 case LE:
16222 code = swap_condition (code);
16223 std::swap (op0, op1);
16224 break;
16225
16226 default:
16227 return false;
16228 }
16229
16230 /* Generate: [(parallel [(set (dest)
16231 (if_then_else (op (cmp1) (cmp2))
16232 (true)
16233 (false)))
16234 (clobber (scratch))])]. */
16235
16236 compare_rtx = gen_rtx_fmt_ee (code, CCFPmode, op0, op1);
16237 cmove_rtx = gen_rtx_SET (dest,
16238 gen_rtx_IF_THEN_ELSE (result_mode,
16239 compare_rtx,
16240 true_cond,
16241 false_cond));
16242
16243 clobber_rtx = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode));
16244 emit_insn (gen_rtx_PARALLEL (VOIDmode,
16245 gen_rtvec (2, cmove_rtx, clobber_rtx)));
16246
16247 return true;
16248 }
16249
16250 /* Helper function to return true if the target has instructions to do a
16251 compare and set mask instruction that can be used with XXSEL to implement a
16252 conditional move. It is also assumed that such a target also supports the
16253 "C" minimum and maximum instructions. */
16254
16255 static bool
16256 have_compare_and_set_mask (machine_mode mode)
16257 {
16258 switch (mode)
16259 {
16260 case E_SFmode:
16261 case E_DFmode:
16262 return TARGET_P9_MINMAX;
16263
16264 case E_KFmode:
16265 case E_TFmode:
16266 return TARGET_POWER10 && TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode);
16267
16268 default:
16269 break;
16270 }
16271
16272 return false;
16273 }
16274
16275 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
16276 operands of the last comparison is nonzero/true, FALSE_COND if it
16277 is zero/false. Return 0 if the hardware has no such operation. */
16278
16279 bool
16280 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
16281 {
16282 enum rtx_code code = GET_CODE (op);
16283 rtx op0 = XEXP (op, 0);
16284 rtx op1 = XEXP (op, 1);
16285 machine_mode compare_mode = GET_MODE (op0);
16286 machine_mode result_mode = GET_MODE (dest);
16287 rtx temp;
16288 bool is_against_zero;
16289
16290 /* These modes should always match. */
16291 if (GET_MODE (op1) != compare_mode
16292 /* In the isel case however, we can use a compare immediate, so
16293 op1 may be a small constant. */
16294 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
16295 return false;
16296 if (GET_MODE (true_cond) != result_mode)
16297 return false;
16298 if (GET_MODE (false_cond) != result_mode)
16299 return false;
16300
16301 /* See if we can use the "C" minimum, "C" maximum, and compare and set mask
16302 instructions. */
16303 if (have_compare_and_set_mask (compare_mode)
16304 && have_compare_and_set_mask (result_mode))
16305 {
16306 if (rs6000_maybe_emit_maxc_minc (dest, op, true_cond, false_cond))
16307 return true;
16308
16309 if (rs6000_maybe_emit_fp_cmove (dest, op, true_cond, false_cond))
16310 return true;
16311 }
16312
16313 /* Don't allow using floating point comparisons for integer results for
16314 now. */
16315 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
16316 return false;
16317
16318 /* First, work out if the hardware can do this at all, or
16319 if it's too slow.... */
16320 if (!FLOAT_MODE_P (compare_mode))
16321 {
16322 if (TARGET_ISEL)
16323 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
16324 return false;
16325 }
16326
16327 is_against_zero = op1 == CONST0_RTX (compare_mode);
16328
16329 /* A floating-point subtract might overflow, underflow, or produce
16330 an inexact result, thus changing the floating-point flags, so it
16331 can't be generated if we care about that. It's safe if one side
16332 of the construct is zero, since then no subtract will be
16333 generated. */
16334 if (SCALAR_FLOAT_MODE_P (compare_mode)
16335 && flag_trapping_math && ! is_against_zero)
16336 return false;
16337
16338 /* Eliminate half of the comparisons by switching operands, this
16339 makes the remaining code simpler. */
16340 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
16341 || code == LTGT || code == LT || code == UNLE)
16342 {
16343 code = reverse_condition_maybe_unordered (code);
16344 temp = true_cond;
16345 true_cond = false_cond;
16346 false_cond = temp;
16347 }
16348
16349 /* UNEQ and LTGT take four instructions for a comparison with zero,
16350 it'll probably be faster to use a branch here too. */
16351 if (code == UNEQ && HONOR_NANS (compare_mode))
16352 return false;
16353
16354 /* We're going to try to implement comparisons by performing
16355 a subtract, then comparing against zero. Unfortunately,
16356 Inf - Inf is NaN which is not zero, and so if we don't
16357 know that the operand is finite and the comparison
16358 would treat EQ different to UNORDERED, we can't do it. */
16359 if (HONOR_INFINITIES (compare_mode)
16360 && code != GT && code != UNGE
16361 && (!CONST_DOUBLE_P (op1)
16362 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1)))
16363 /* Constructs of the form (a OP b ? a : b) are safe. */
16364 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
16365 || (! rtx_equal_p (op0, true_cond)
16366 && ! rtx_equal_p (op1, true_cond))))
16367 return false;
16368
16369 /* At this point we know we can use fsel. */
16370
16371 /* Don't allow compare_mode other than SFmode or DFmode, for others there
16372 is no fsel instruction. */
16373 if (compare_mode != SFmode && compare_mode != DFmode)
16374 return false;
16375
16376 /* Reduce the comparison to a comparison against zero. */
16377 if (! is_against_zero)
16378 {
16379 temp = gen_reg_rtx (compare_mode);
16380 emit_insn (gen_rtx_SET (temp, gen_rtx_MINUS (compare_mode, op0, op1)));
16381 op0 = temp;
16382 op1 = CONST0_RTX (compare_mode);
16383 }
16384
16385 /* If we don't care about NaNs we can reduce some of the comparisons
16386 down to faster ones. */
16387 if (! HONOR_NANS (compare_mode))
16388 switch (code)
16389 {
16390 case GT:
16391 code = LE;
16392 temp = true_cond;
16393 true_cond = false_cond;
16394 false_cond = temp;
16395 break;
16396 case UNGE:
16397 code = GE;
16398 break;
16399 case UNEQ:
16400 code = EQ;
16401 break;
16402 default:
16403 break;
16404 }
16405
16406 /* Now, reduce everything down to a GE. */
16407 switch (code)
16408 {
16409 case GE:
16410 break;
16411
16412 case LE:
16413 temp = gen_reg_rtx (compare_mode);
16414 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
16415 op0 = temp;
16416 break;
16417
16418 case ORDERED:
16419 temp = gen_reg_rtx (compare_mode);
16420 emit_insn (gen_rtx_SET (temp, gen_rtx_ABS (compare_mode, op0)));
16421 op0 = temp;
16422 break;
16423
16424 case EQ:
16425 temp = gen_reg_rtx (compare_mode);
16426 emit_insn (gen_rtx_SET (temp,
16427 gen_rtx_NEG (compare_mode,
16428 gen_rtx_ABS (compare_mode, op0))));
16429 op0 = temp;
16430 break;
16431
16432 case UNGE:
16433 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
16434 temp = gen_reg_rtx (result_mode);
16435 emit_insn (gen_rtx_SET (temp,
16436 gen_rtx_IF_THEN_ELSE (result_mode,
16437 gen_rtx_GE (VOIDmode,
16438 op0, op1),
16439 true_cond, false_cond)));
16440 false_cond = true_cond;
16441 true_cond = temp;
16442
16443 temp = gen_reg_rtx (compare_mode);
16444 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
16445 op0 = temp;
16446 break;
16447
16448 case GT:
16449 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
16450 temp = gen_reg_rtx (result_mode);
16451 emit_insn (gen_rtx_SET (temp,
16452 gen_rtx_IF_THEN_ELSE (result_mode,
16453 gen_rtx_GE (VOIDmode,
16454 op0, op1),
16455 true_cond, false_cond)));
16456 true_cond = false_cond;
16457 false_cond = temp;
16458
16459 temp = gen_reg_rtx (compare_mode);
16460 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
16461 op0 = temp;
16462 break;
16463
16464 default:
16465 gcc_unreachable ();
16466 }
16467
16468 emit_insn (gen_rtx_SET (dest,
16469 gen_rtx_IF_THEN_ELSE (result_mode,
16470 gen_rtx_GE (VOIDmode,
16471 op0, op1),
16472 true_cond, false_cond)));
16473 return true;
16474 }
16475
16476 /* Same as above, but for ints (isel). */
16477
16478 bool
16479 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
16480 {
16481 rtx condition_rtx, cr;
16482 machine_mode mode = GET_MODE (dest);
16483 enum rtx_code cond_code;
16484 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
16485 bool signedp;
16486
16487 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
16488 return false;
16489
16490 /* PR104335: We now need to expect CC-mode "comparisons"
16491 coming from ifcvt. The following code expects proper
16492 comparisons so better abort here. */
16493 if (GET_MODE_CLASS (GET_MODE (XEXP (op, 0))) == MODE_CC)
16494 return false;
16495
16496 /* We still have to do the compare, because isel doesn't do a
16497 compare, it just looks at the CRx bits set by a previous compare
16498 instruction. */
16499 condition_rtx = rs6000_generate_compare (op, mode);
16500 cond_code = GET_CODE (condition_rtx);
16501 cr = XEXP (condition_rtx, 0);
16502 signedp = GET_MODE (cr) == CCmode;
16503
16504 isel_func = (mode == SImode
16505 ? (signedp ? gen_isel_cc_si : gen_isel_ccuns_si)
16506 : (signedp ? gen_isel_cc_di : gen_isel_ccuns_di));
16507
16508 switch (cond_code)
16509 {
16510 case LT: case GT: case LTU: case GTU: case EQ:
16511 /* isel handles these directly. */
16512 break;
16513
16514 default:
16515 /* We need to swap the sense of the comparison. */
16516 {
16517 std::swap (false_cond, true_cond);
16518 PUT_CODE (condition_rtx, reverse_condition (cond_code));
16519 }
16520 break;
16521 }
16522
16523 false_cond = force_reg (mode, false_cond);
16524 if (true_cond != const0_rtx)
16525 true_cond = force_reg (mode, true_cond);
16526
16527 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
16528
16529 return true;
16530 }
16531
16532 void
16533 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
16534 {
16535 machine_mode mode = GET_MODE (op0);
16536 enum rtx_code c;
16537 rtx target;
16538
16539 /* VSX/altivec have direct min/max insns. */
16540 if ((code == SMAX || code == SMIN)
16541 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
16542 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))
16543 || (TARGET_POWER10 && TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode))))
16544 {
16545 emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1)));
16546 return;
16547 }
16548
16549 if (code == SMAX || code == SMIN)
16550 c = GE;
16551 else
16552 c = GEU;
16553
16554 if (code == SMAX || code == UMAX)
16555 target = emit_conditional_move (dest, { c, op0, op1, mode },
16556 op0, op1, mode, 0);
16557 else
16558 target = emit_conditional_move (dest, { c, op0, op1, mode },
16559 op1, op0, mode, 0);
16560 gcc_assert (target);
16561 if (target != dest)
16562 emit_move_insn (dest, target);
16563 }
16564
16565 /* A subroutine of the atomic operation splitters. Jump to LABEL if
16566 COND is true. Mark the jump as unlikely to be taken. */
16567
16568 static void
16569 emit_unlikely_jump (rtx cond, rtx label)
16570 {
16571 rtx x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
16572 rtx_insn *insn = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
16573 add_reg_br_prob_note (insn, profile_probability::very_unlikely ());
16574 }
16575
16576 /* A subroutine of the atomic operation splitters. Emit a load-locked
16577 instruction in MODE. For QI/HImode, possibly use a pattern than includes
16578 the zero_extend operation. */
16579
16580 static void
16581 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
16582 {
16583 rtx (*fn) (rtx, rtx) = NULL;
16584
16585 switch (mode)
16586 {
16587 case E_QImode:
16588 fn = gen_load_lockedqi;
16589 break;
16590 case E_HImode:
16591 fn = gen_load_lockedhi;
16592 break;
16593 case E_SImode:
16594 if (GET_MODE (mem) == QImode)
16595 fn = gen_load_lockedqi_si;
16596 else if (GET_MODE (mem) == HImode)
16597 fn = gen_load_lockedhi_si;
16598 else
16599 fn = gen_load_lockedsi;
16600 break;
16601 case E_DImode:
16602 fn = gen_load_lockeddi;
16603 break;
16604 case E_TImode:
16605 fn = gen_load_lockedti;
16606 break;
16607 default:
16608 gcc_unreachable ();
16609 }
16610 emit_insn (fn (reg, mem));
16611 }
16612
16613 /* A subroutine of the atomic operation splitters. Emit a store-conditional
16614 instruction in MODE. */
16615
16616 static void
16617 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
16618 {
16619 rtx (*fn) (rtx, rtx, rtx) = NULL;
16620
16621 switch (mode)
16622 {
16623 case E_QImode:
16624 fn = gen_store_conditionalqi;
16625 break;
16626 case E_HImode:
16627 fn = gen_store_conditionalhi;
16628 break;
16629 case E_SImode:
16630 fn = gen_store_conditionalsi;
16631 break;
16632 case E_DImode:
16633 fn = gen_store_conditionaldi;
16634 break;
16635 case E_TImode:
16636 fn = gen_store_conditionalti;
16637 break;
16638 default:
16639 gcc_unreachable ();
16640 }
16641
16642 /* Emit sync before stwcx. to address PPC405 Erratum. */
16643 if (PPC405_ERRATUM77)
16644 emit_insn (gen_hwsync ());
16645
16646 emit_insn (fn (res, mem, val));
16647 }
16648
16649 /* Expand barriers before and after a load_locked/store_cond sequence. */
16650
16651 static rtx
16652 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
16653 {
16654 rtx addr = XEXP (mem, 0);
16655
16656 if (!legitimate_indirect_address_p (addr, reload_completed)
16657 && !legitimate_indexed_address_p (addr, reload_completed))
16658 {
16659 addr = force_reg (Pmode, addr);
16660 mem = replace_equiv_address_nv (mem, addr);
16661 }
16662
16663 switch (model)
16664 {
16665 case MEMMODEL_RELAXED:
16666 case MEMMODEL_CONSUME:
16667 case MEMMODEL_ACQUIRE:
16668 break;
16669 case MEMMODEL_RELEASE:
16670 case MEMMODEL_ACQ_REL:
16671 emit_insn (gen_lwsync ());
16672 break;
16673 case MEMMODEL_SEQ_CST:
16674 emit_insn (gen_hwsync ());
16675 break;
16676 default:
16677 gcc_unreachable ();
16678 }
16679 return mem;
16680 }
16681
16682 static void
16683 rs6000_post_atomic_barrier (enum memmodel model)
16684 {
16685 switch (model)
16686 {
16687 case MEMMODEL_RELAXED:
16688 case MEMMODEL_CONSUME:
16689 case MEMMODEL_RELEASE:
16690 break;
16691 case MEMMODEL_ACQUIRE:
16692 case MEMMODEL_ACQ_REL:
16693 case MEMMODEL_SEQ_CST:
16694 emit_insn (gen_isync ());
16695 break;
16696 default:
16697 gcc_unreachable ();
16698 }
16699 }
16700
16701 /* A subroutine of the various atomic expanders. For sub-word operations,
16702 we must adjust things to operate on SImode. Given the original MEM,
16703 return a new aligned memory. Also build and return the quantities by
16704 which to shift and mask. */
16705
16706 static rtx
16707 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
16708 {
16709 rtx addr, align, shift, mask, mem;
16710 HOST_WIDE_INT shift_mask;
16711 machine_mode mode = GET_MODE (orig_mem);
16712
16713 /* For smaller modes, we have to implement this via SImode. */
16714 shift_mask = (mode == QImode ? 0x18 : 0x10);
16715
16716 addr = XEXP (orig_mem, 0);
16717 addr = force_reg (GET_MODE (addr), addr);
16718
16719 /* Aligned memory containing subword. Generate a new memory. We
16720 do not want any of the existing MEM_ATTR data, as we're now
16721 accessing memory outside the original object. */
16722 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
16723 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16724 mem = gen_rtx_MEM (SImode, align);
16725 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
16726 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
16727 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
16728
16729 /* Shift amount for subword relative to aligned word. */
16730 shift = gen_reg_rtx (SImode);
16731 addr = gen_lowpart (SImode, addr);
16732 rtx tmp = gen_reg_rtx (SImode);
16733 emit_insn (gen_ashlsi3 (tmp, addr, GEN_INT (3)));
16734 emit_insn (gen_andsi3 (shift, tmp, GEN_INT (shift_mask)));
16735 if (BYTES_BIG_ENDIAN)
16736 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
16737 shift, 1, OPTAB_LIB_WIDEN);
16738 *pshift = shift;
16739
16740 /* Mask for insertion. */
16741 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
16742 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
16743 *pmask = mask;
16744
16745 return mem;
16746 }
16747
16748 /* A subroutine of the various atomic expanders. For sub-word operands,
16749 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
16750
16751 static rtx
16752 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
16753 {
16754 rtx x;
16755
16756 x = gen_reg_rtx (SImode);
16757 emit_insn (gen_rtx_SET (x, gen_rtx_AND (SImode,
16758 gen_rtx_NOT (SImode, mask),
16759 oldval)));
16760
16761 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
16762
16763 return x;
16764 }
16765
16766 /* A subroutine of the various atomic expanders. For sub-word operands,
16767 extract WIDE to NARROW via SHIFT. */
16768
16769 static void
16770 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
16771 {
16772 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
16773 wide, 1, OPTAB_LIB_WIDEN);
16774 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
16775 }
16776
16777 /* Expand an atomic compare and swap operation. */
16778
16779 void
16780 rs6000_expand_atomic_compare_and_swap (rtx operands[])
16781 {
16782 rtx boolval, retval, mem, oldval, newval, cond;
16783 rtx label1, label2, x, mask, shift;
16784 machine_mode mode, orig_mode;
16785 enum memmodel mod_s, mod_f;
16786 bool is_weak;
16787
16788 boolval = operands[0];
16789 retval = operands[1];
16790 mem = operands[2];
16791 oldval = operands[3];
16792 newval = operands[4];
16793 is_weak = (INTVAL (operands[5]) != 0);
16794 mod_s = memmodel_base (INTVAL (operands[6]));
16795 mod_f = memmodel_base (INTVAL (operands[7]));
16796 orig_mode = mode = GET_MODE (mem);
16797
16798 mask = shift = NULL_RTX;
16799 if (mode == QImode || mode == HImode)
16800 {
16801 /* Before power8, we didn't have access to lbarx/lharx, so generate a
16802 lwarx and shift/mask operations. With power8, we need to do the
16803 comparison in SImode, but the store is still done in QI/HImode. */
16804 oldval = convert_modes (SImode, mode, oldval, 1);
16805
16806 if (!TARGET_SYNC_HI_QI)
16807 {
16808 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
16809
16810 /* Shift and mask OLDVAL into position with the word. */
16811 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
16812 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16813
16814 /* Shift and mask NEWVAL into position within the word. */
16815 newval = convert_modes (SImode, mode, newval, 1);
16816 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
16817 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16818 }
16819
16820 /* Prepare to adjust the return value. */
16821 retval = gen_reg_rtx (SImode);
16822 mode = SImode;
16823 }
16824 else if (reg_overlap_mentioned_p (retval, oldval))
16825 oldval = copy_to_reg (oldval);
16826
16827 if (mode != TImode && !reg_or_short_operand (oldval, mode))
16828 oldval = copy_to_mode_reg (mode, oldval);
16829
16830 if (reg_overlap_mentioned_p (retval, newval))
16831 newval = copy_to_reg (newval);
16832
16833 mem = rs6000_pre_atomic_barrier (mem, mod_s);
16834
16835 label1 = NULL_RTX;
16836 if (!is_weak)
16837 {
16838 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
16839 emit_label (XEXP (label1, 0));
16840 }
16841 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
16842
16843 emit_load_locked (mode, retval, mem);
16844
16845 x = retval;
16846 if (mask)
16847 x = expand_simple_binop (SImode, AND, retval, mask,
16848 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16849
16850 cond = gen_reg_rtx (CCmode);
16851 /* If we have TImode, synthesize a comparison. */
16852 if (mode != TImode)
16853 x = gen_rtx_COMPARE (CCmode, x, oldval);
16854 else
16855 {
16856 rtx xor1_result = gen_reg_rtx (DImode);
16857 rtx xor2_result = gen_reg_rtx (DImode);
16858 rtx or_result = gen_reg_rtx (DImode);
16859 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
16860 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
16861 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
16862 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
16863
16864 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
16865 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
16866 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
16867 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
16868 }
16869
16870 emit_insn (gen_rtx_SET (cond, x));
16871
16872 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
16873 emit_unlikely_jump (x, label2);
16874
16875 x = newval;
16876 if (mask)
16877 x = rs6000_mask_atomic_subword (retval, newval, mask);
16878
16879 emit_store_conditional (orig_mode, cond, mem, x);
16880
16881 if (!is_weak)
16882 {
16883 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
16884 emit_unlikely_jump (x, label1);
16885 }
16886
16887 if (!is_mm_relaxed (mod_f))
16888 emit_label (XEXP (label2, 0));
16889
16890 rs6000_post_atomic_barrier (mod_s);
16891
16892 if (is_mm_relaxed (mod_f))
16893 emit_label (XEXP (label2, 0));
16894
16895 if (shift)
16896 rs6000_finish_atomic_subword (operands[1], retval, shift);
16897 else if (mode != GET_MODE (operands[1]))
16898 convert_move (operands[1], retval, 1);
16899
16900 /* In all cases, CR0 contains EQ on success, and NE on failure. */
16901 x = gen_rtx_EQ (SImode, cond, const0_rtx);
16902 emit_insn (gen_rtx_SET (boolval, x));
16903 }
16904
16905 /* Expand an atomic exchange operation. */
16906
16907 void
16908 rs6000_expand_atomic_exchange (rtx operands[])
16909 {
16910 rtx retval, mem, val, cond;
16911 machine_mode mode;
16912 enum memmodel model;
16913 rtx label, x, mask, shift;
16914
16915 retval = operands[0];
16916 mem = operands[1];
16917 val = operands[2];
16918 model = memmodel_base (INTVAL (operands[3]));
16919 mode = GET_MODE (mem);
16920
16921 mask = shift = NULL_RTX;
16922 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
16923 {
16924 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
16925
16926 /* Shift and mask VAL into position with the word. */
16927 val = convert_modes (SImode, mode, val, 1);
16928 val = expand_simple_binop (SImode, ASHIFT, val, shift,
16929 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16930
16931 /* Prepare to adjust the return value. */
16932 retval = gen_reg_rtx (SImode);
16933 mode = SImode;
16934 }
16935
16936 mem = rs6000_pre_atomic_barrier (mem, model);
16937
16938 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
16939 emit_label (XEXP (label, 0));
16940
16941 emit_load_locked (mode, retval, mem);
16942
16943 x = val;
16944 if (mask)
16945 x = rs6000_mask_atomic_subword (retval, val, mask);
16946
16947 cond = gen_reg_rtx (CCmode);
16948 emit_store_conditional (mode, cond, mem, x);
16949
16950 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
16951 emit_unlikely_jump (x, label);
16952
16953 rs6000_post_atomic_barrier (model);
16954
16955 if (shift)
16956 rs6000_finish_atomic_subword (operands[0], retval, shift);
16957 }
16958
16959 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
16960 to perform. MEM is the memory on which to operate. VAL is the second
16961 operand of the binary operator. BEFORE and AFTER are optional locations to
16962 return the value of MEM either before of after the operation. MODEL_RTX
16963 is a CONST_INT containing the memory model to use. */
16964
16965 void
16966 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
16967 rtx orig_before, rtx orig_after, rtx model_rtx)
16968 {
16969 enum memmodel model = memmodel_base (INTVAL (model_rtx));
16970 machine_mode mode = GET_MODE (mem);
16971 machine_mode store_mode = mode;
16972 rtx label, x, cond, mask, shift;
16973 rtx before = orig_before, after = orig_after;
16974
16975 mask = shift = NULL_RTX;
16976 /* On power8, we want to use SImode for the operation. On previous systems,
16977 use the operation in a subword and shift/mask to get the proper byte or
16978 halfword. */
16979 if (mode == QImode || mode == HImode)
16980 {
16981 if (TARGET_SYNC_HI_QI)
16982 {
16983 val = convert_modes (SImode, mode, val, 1);
16984
16985 /* Prepare to adjust the return value. */
16986 before = gen_reg_rtx (SImode);
16987 if (after)
16988 after = gen_reg_rtx (SImode);
16989 mode = SImode;
16990 }
16991 else
16992 {
16993 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
16994
16995 /* Shift and mask VAL into position with the word. */
16996 val = convert_modes (SImode, mode, val, 1);
16997 val = expand_simple_binop (SImode, ASHIFT, val, shift,
16998 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16999
17000 switch (code)
17001 {
17002 case IOR:
17003 case XOR:
17004 /* We've already zero-extended VAL. That is sufficient to
17005 make certain that it does not affect other bits. */
17006 mask = NULL;
17007 break;
17008
17009 case AND:
17010 /* If we make certain that all of the other bits in VAL are
17011 set, that will be sufficient to not affect other bits. */
17012 x = gen_rtx_NOT (SImode, mask);
17013 x = gen_rtx_IOR (SImode, x, val);
17014 emit_insn (gen_rtx_SET (val, x));
17015 mask = NULL;
17016 break;
17017
17018 case NOT:
17019 case PLUS:
17020 case MINUS:
17021 /* These will all affect bits outside the field and need
17022 adjustment via MASK within the loop. */
17023 break;
17024
17025 default:
17026 gcc_unreachable ();
17027 }
17028
17029 /* Prepare to adjust the return value. */
17030 before = gen_reg_rtx (SImode);
17031 if (after)
17032 after = gen_reg_rtx (SImode);
17033 store_mode = mode = SImode;
17034 }
17035 }
17036
17037 mem = rs6000_pre_atomic_barrier (mem, model);
17038
17039 label = gen_label_rtx ();
17040 emit_label (label);
17041 label = gen_rtx_LABEL_REF (VOIDmode, label);
17042
17043 if (before == NULL_RTX)
17044 before = gen_reg_rtx (mode);
17045
17046 emit_load_locked (mode, before, mem);
17047
17048 if (code == NOT)
17049 {
17050 x = expand_simple_binop (mode, AND, before, val,
17051 NULL_RTX, 1, OPTAB_LIB_WIDEN);
17052 after = expand_simple_unop (mode, NOT, x, after, 1);
17053 }
17054 else
17055 {
17056 after = expand_simple_binop (mode, code, before, val,
17057 after, 1, OPTAB_LIB_WIDEN);
17058 }
17059
17060 x = after;
17061 if (mask)
17062 {
17063 x = expand_simple_binop (SImode, AND, after, mask,
17064 NULL_RTX, 1, OPTAB_LIB_WIDEN);
17065 x = rs6000_mask_atomic_subword (before, x, mask);
17066 }
17067 else if (store_mode != mode)
17068 x = convert_modes (store_mode, mode, x, 1);
17069
17070 cond = gen_reg_rtx (CCmode);
17071 emit_store_conditional (store_mode, cond, mem, x);
17072
17073 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
17074 emit_unlikely_jump (x, label);
17075
17076 rs6000_post_atomic_barrier (model);
17077
17078 if (shift)
17079 {
17080 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
17081 then do the calcuations in a SImode register. */
17082 if (orig_before)
17083 rs6000_finish_atomic_subword (orig_before, before, shift);
17084 if (orig_after)
17085 rs6000_finish_atomic_subword (orig_after, after, shift);
17086 }
17087 else if (store_mode != mode)
17088 {
17089 /* QImode/HImode on machines with lbarx/lharx where we do the native
17090 operation and then do the calcuations in a SImode register. */
17091 if (orig_before)
17092 convert_move (orig_before, before, 1);
17093 if (orig_after)
17094 convert_move (orig_after, after, 1);
17095 }
17096 else if (orig_after && after != orig_after)
17097 emit_move_insn (orig_after, after);
17098 }
17099
17100 static GTY(()) alias_set_type TOC_alias_set = -1;
17101
17102 alias_set_type
17103 get_TOC_alias_set (void)
17104 {
17105 if (TOC_alias_set == -1)
17106 TOC_alias_set = new_alias_set ();
17107 return TOC_alias_set;
17108 }
17109
17110 /* The mode the ABI uses for a word. This is not the same as word_mode
17111 for -m32 -mpowerpc64. This is used to implement various target hooks. */
17112
17113 static scalar_int_mode
17114 rs6000_abi_word_mode (void)
17115 {
17116 return TARGET_32BIT ? SImode : DImode;
17117 }
17118
17119 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
17120 static char *
17121 rs6000_offload_options (void)
17122 {
17123 if (TARGET_64BIT)
17124 return xstrdup ("-foffload-abi=lp64");
17125 else
17126 return xstrdup ("-foffload-abi=ilp32");
17127 }
17128
17129 \f
17130 /* A quick summary of the various types of 'constant-pool tables'
17131 under PowerPC:
17132
17133 Target Flags Name One table per
17134 AIX (none) AIX TOC object file
17135 AIX -mfull-toc AIX TOC object file
17136 AIX -mminimal-toc AIX minimal TOC translation unit
17137 SVR4/EABI (none) SVR4 SDATA object file
17138 SVR4/EABI -fpic SVR4 pic object file
17139 SVR4/EABI -fPIC SVR4 PIC translation unit
17140 SVR4/EABI -mrelocatable EABI TOC function
17141 SVR4/EABI -maix AIX TOC object file
17142 SVR4/EABI -maix -mminimal-toc
17143 AIX minimal TOC translation unit
17144
17145 Name Reg. Set by entries contains:
17146 made by addrs? fp? sum?
17147
17148 AIX TOC 2 crt0 as Y option option
17149 AIX minimal TOC 30 prolog gcc Y Y option
17150 SVR4 SDATA 13 crt0 gcc N Y N
17151 SVR4 pic 30 prolog ld Y not yet N
17152 SVR4 PIC 30 prolog gcc Y option option
17153 EABI TOC 30 prolog gcc Y option option
17154
17155 */
17156
17157 /* Hash functions for the hash table. */
17158
17159 static unsigned
17160 rs6000_hash_constant (rtx k)
17161 {
17162 enum rtx_code code = GET_CODE (k);
17163 machine_mode mode = GET_MODE (k);
17164 unsigned result = (code << 3) ^ mode;
17165 const char *format;
17166 int flen, fidx;
17167
17168 format = GET_RTX_FORMAT (code);
17169 flen = strlen (format);
17170 fidx = 0;
17171
17172 switch (code)
17173 {
17174 case LABEL_REF:
17175 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
17176
17177 case CONST_WIDE_INT:
17178 {
17179 int i;
17180 flen = CONST_WIDE_INT_NUNITS (k);
17181 for (i = 0; i < flen; i++)
17182 result = result * 613 + CONST_WIDE_INT_ELT (k, i);
17183 return result;
17184 }
17185
17186 case CONST_DOUBLE:
17187 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
17188
17189 case CODE_LABEL:
17190 fidx = 3;
17191 break;
17192
17193 default:
17194 break;
17195 }
17196
17197 for (; fidx < flen; fidx++)
17198 switch (format[fidx])
17199 {
17200 case 's':
17201 {
17202 unsigned i, len;
17203 const char *str = XSTR (k, fidx);
17204 len = strlen (str);
17205 result = result * 613 + len;
17206 for (i = 0; i < len; i++)
17207 result = result * 613 + (unsigned) str[i];
17208 break;
17209 }
17210 case 'u':
17211 case 'e':
17212 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
17213 break;
17214 case 'i':
17215 case 'n':
17216 result = result * 613 + (unsigned) XINT (k, fidx);
17217 break;
17218 case 'w':
17219 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
17220 result = result * 613 + (unsigned) XWINT (k, fidx);
17221 else
17222 {
17223 size_t i;
17224 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
17225 result = result * 613 + (unsigned) (XWINT (k, fidx)
17226 >> CHAR_BIT * i);
17227 }
17228 break;
17229 case '0':
17230 break;
17231 default:
17232 gcc_unreachable ();
17233 }
17234
17235 return result;
17236 }
17237
17238 hashval_t
17239 toc_hasher::hash (toc_hash_struct *thc)
17240 {
17241 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
17242 }
17243
17244 /* Compare H1 and H2 for equivalence. */
17245
17246 bool
17247 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
17248 {
17249 rtx r1 = h1->key;
17250 rtx r2 = h2->key;
17251
17252 if (h1->key_mode != h2->key_mode)
17253 return 0;
17254
17255 return rtx_equal_p (r1, r2);
17256 }
17257
17258 /* These are the names given by the C++ front-end to vtables, and
17259 vtable-like objects. Ideally, this logic should not be here;
17260 instead, there should be some programmatic way of inquiring as
17261 to whether or not an object is a vtable. */
17262
17263 #define VTABLE_NAME_P(NAME) \
17264 (startswith (name, "_vt.") \
17265 || startswith (name, "_ZTV") \
17266 || startswith (name, "_ZTT") \
17267 || startswith (name, "_ZTI") \
17268 || startswith (name, "_ZTC"))
17269
17270 #ifdef NO_DOLLAR_IN_LABEL
17271 /* Return a GGC-allocated character string translating dollar signs in
17272 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
17273
17274 const char *
17275 rs6000_xcoff_strip_dollar (const char *name)
17276 {
17277 char *strip, *p;
17278 const char *q;
17279 size_t len;
17280
17281 q = (const char *) strchr (name, '$');
17282
17283 if (q == 0 || q == name)
17284 return name;
17285
17286 len = strlen (name);
17287 strip = XALLOCAVEC (char, len + 1);
17288 strcpy (strip, name);
17289 p = strip + (q - name);
17290 while (p)
17291 {
17292 *p = '_';
17293 p = strchr (p + 1, '$');
17294 }
17295
17296 return ggc_alloc_string (strip, len);
17297 }
17298 #endif
17299
17300 void
17301 rs6000_output_symbol_ref (FILE *file, rtx x)
17302 {
17303 const char *name = XSTR (x, 0);
17304
17305 /* Currently C++ toc references to vtables can be emitted before it
17306 is decided whether the vtable is public or private. If this is
17307 the case, then the linker will eventually complain that there is
17308 a reference to an unknown section. Thus, for vtables only,
17309 we emit the TOC reference to reference the identifier and not the
17310 symbol. */
17311 if (VTABLE_NAME_P (name))
17312 {
17313 RS6000_OUTPUT_BASENAME (file, name);
17314 }
17315 else
17316 assemble_name (file, name);
17317 }
17318
17319 /* Output a TOC entry. We derive the entry name from what is being
17320 written. */
17321
17322 void
17323 output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
17324 {
17325 char buf[256];
17326 const char *name = buf;
17327 rtx base = x;
17328 HOST_WIDE_INT offset = 0;
17329
17330 gcc_assert (!TARGET_NO_TOC_OR_PCREL);
17331
17332 /* When the linker won't eliminate them, don't output duplicate
17333 TOC entries (this happens on AIX if there is any kind of TOC,
17334 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
17335 CODE_LABELs. */
17336 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
17337 {
17338 struct toc_hash_struct *h;
17339
17340 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
17341 time because GGC is not initialized at that point. */
17342 if (toc_hash_table == NULL)
17343 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
17344
17345 h = ggc_alloc<toc_hash_struct> ();
17346 h->key = x;
17347 h->key_mode = mode;
17348 h->labelno = labelno;
17349
17350 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
17351 if (*found == NULL)
17352 *found = h;
17353 else /* This is indeed a duplicate.
17354 Set this label equal to that label. */
17355 {
17356 fputs ("\t.set ", file);
17357 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
17358 fprintf (file, "%d,", labelno);
17359 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
17360 fprintf (file, "%d\n", ((*found)->labelno));
17361
17362 #ifdef HAVE_AS_TLS
17363 if (TARGET_XCOFF && SYMBOL_REF_P (x)
17364 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
17365 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
17366 {
17367 fputs ("\t.set ", file);
17368 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
17369 fprintf (file, "%d,", labelno);
17370 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
17371 fprintf (file, "%d\n", ((*found)->labelno));
17372 }
17373 #endif
17374 return;
17375 }
17376 }
17377
17378 /* If we're going to put a double constant in the TOC, make sure it's
17379 aligned properly when strict alignment is on. */
17380 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
17381 && STRICT_ALIGNMENT
17382 && GET_MODE_BITSIZE (mode) >= 64
17383 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
17384 ASM_OUTPUT_ALIGN (file, 3);
17385 }
17386
17387 (*targetm.asm_out.internal_label) (file, "LC", labelno);
17388
17389 /* Handle FP constants specially. Note that if we have a minimal
17390 TOC, things we put here aren't actually in the TOC, so we can allow
17391 FP constants. */
17392 if (CONST_DOUBLE_P (x)
17393 && (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode
17394 || GET_MODE (x) == IFmode || GET_MODE (x) == KFmode))
17395 {
17396 long k[4];
17397
17398 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
17399 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x), k);
17400 else
17401 real_to_target (k, CONST_DOUBLE_REAL_VALUE (x), GET_MODE (x));
17402
17403 if (TARGET_64BIT)
17404 {
17405 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17406 fputs (DOUBLE_INT_ASM_OP, file);
17407 else
17408 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
17409 k[0] & 0xffffffff, k[1] & 0xffffffff,
17410 k[2] & 0xffffffff, k[3] & 0xffffffff);
17411 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
17412 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
17413 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
17414 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
17415 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
17416 return;
17417 }
17418 else
17419 {
17420 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17421 fputs ("\t.long ", file);
17422 else
17423 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
17424 k[0] & 0xffffffff, k[1] & 0xffffffff,
17425 k[2] & 0xffffffff, k[3] & 0xffffffff);
17426 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
17427 k[0] & 0xffffffff, k[1] & 0xffffffff,
17428 k[2] & 0xffffffff, k[3] & 0xffffffff);
17429 return;
17430 }
17431 }
17432 else if (CONST_DOUBLE_P (x)
17433 && (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
17434 {
17435 long k[2];
17436
17437 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
17438 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x), k);
17439 else
17440 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
17441
17442 if (TARGET_64BIT)
17443 {
17444 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17445 fputs (DOUBLE_INT_ASM_OP, file);
17446 else
17447 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
17448 k[0] & 0xffffffff, k[1] & 0xffffffff);
17449 fprintf (file, "0x%lx%08lx\n",
17450 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
17451 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
17452 return;
17453 }
17454 else
17455 {
17456 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17457 fputs ("\t.long ", file);
17458 else
17459 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
17460 k[0] & 0xffffffff, k[1] & 0xffffffff);
17461 fprintf (file, "0x%lx,0x%lx\n",
17462 k[0] & 0xffffffff, k[1] & 0xffffffff);
17463 return;
17464 }
17465 }
17466 else if (CONST_DOUBLE_P (x)
17467 && (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
17468 {
17469 long l;
17470
17471 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
17472 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x), l);
17473 else
17474 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
17475
17476 if (TARGET_64BIT)
17477 {
17478 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17479 fputs (DOUBLE_INT_ASM_OP, file);
17480 else
17481 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
17482 if (WORDS_BIG_ENDIAN)
17483 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
17484 else
17485 fprintf (file, "0x%lx\n", l & 0xffffffff);
17486 return;
17487 }
17488 else
17489 {
17490 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17491 fputs ("\t.long ", file);
17492 else
17493 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
17494 fprintf (file, "0x%lx\n", l & 0xffffffff);
17495 return;
17496 }
17497 }
17498 else if (GET_MODE (x) == VOIDmode && CONST_INT_P (x))
17499 {
17500 unsigned HOST_WIDE_INT low;
17501 HOST_WIDE_INT high;
17502
17503 low = INTVAL (x) & 0xffffffff;
17504 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
17505
17506 /* TOC entries are always Pmode-sized, so when big-endian
17507 smaller integer constants in the TOC need to be padded.
17508 (This is still a win over putting the constants in
17509 a separate constant pool, because then we'd have
17510 to have both a TOC entry _and_ the actual constant.)
17511
17512 For a 32-bit target, CONST_INT values are loaded and shifted
17513 entirely within `low' and can be stored in one TOC entry. */
17514
17515 /* It would be easy to make this work, but it doesn't now. */
17516 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
17517
17518 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
17519 {
17520 low |= high << 32;
17521 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
17522 high = (HOST_WIDE_INT) low >> 32;
17523 low &= 0xffffffff;
17524 }
17525
17526 if (TARGET_64BIT)
17527 {
17528 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17529 fputs (DOUBLE_INT_ASM_OP, file);
17530 else
17531 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
17532 (long) high & 0xffffffff, (long) low & 0xffffffff);
17533 fprintf (file, "0x%lx%08lx\n",
17534 (long) high & 0xffffffff, (long) low & 0xffffffff);
17535 return;
17536 }
17537 else
17538 {
17539 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
17540 {
17541 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17542 fputs ("\t.long ", file);
17543 else
17544 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
17545 (long) high & 0xffffffff, (long) low & 0xffffffff);
17546 fprintf (file, "0x%lx,0x%lx\n",
17547 (long) high & 0xffffffff, (long) low & 0xffffffff);
17548 }
17549 else
17550 {
17551 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17552 fputs ("\t.long ", file);
17553 else
17554 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
17555 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
17556 }
17557 return;
17558 }
17559 }
17560
17561 if (GET_CODE (x) == CONST)
17562 {
17563 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
17564 && CONST_INT_P (XEXP (XEXP (x, 0), 1)));
17565
17566 base = XEXP (XEXP (x, 0), 0);
17567 offset = INTVAL (XEXP (XEXP (x, 0), 1));
17568 }
17569
17570 switch (GET_CODE (base))
17571 {
17572 case SYMBOL_REF:
17573 name = XSTR (base, 0);
17574 break;
17575
17576 case LABEL_REF:
17577 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
17578 CODE_LABEL_NUMBER (XEXP (base, 0)));
17579 break;
17580
17581 case CODE_LABEL:
17582 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
17583 break;
17584
17585 default:
17586 gcc_unreachable ();
17587 }
17588
17589 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17590 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
17591 else
17592 {
17593 fputs ("\t.tc ", file);
17594 RS6000_OUTPUT_BASENAME (file, name);
17595
17596 if (offset < 0)
17597 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
17598 else if (offset)
17599 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
17600
17601 /* Mark large TOC symbols on AIX with [TE] so they are mapped
17602 after other TOC symbols, reducing overflow of small TOC access
17603 to [TC] symbols. */
17604 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
17605 ? "[TE]," : "[TC],", file);
17606 }
17607
17608 /* Currently C++ toc references to vtables can be emitted before it
17609 is decided whether the vtable is public or private. If this is
17610 the case, then the linker will eventually complain that there is
17611 a TOC reference to an unknown section. Thus, for vtables only,
17612 we emit the TOC reference to reference the symbol and not the
17613 section. */
17614 if (VTABLE_NAME_P (name))
17615 {
17616 RS6000_OUTPUT_BASENAME (file, name);
17617 if (offset < 0)
17618 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
17619 else if (offset > 0)
17620 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
17621 }
17622 else
17623 output_addr_const (file, x);
17624
17625 #if HAVE_AS_TLS
17626 if (TARGET_XCOFF && SYMBOL_REF_P (base))
17627 {
17628 switch (SYMBOL_REF_TLS_MODEL (base))
17629 {
17630 case 0:
17631 break;
17632 case TLS_MODEL_LOCAL_EXEC:
17633 fputs ("@le", file);
17634 break;
17635 case TLS_MODEL_INITIAL_EXEC:
17636 fputs ("@ie", file);
17637 break;
17638 /* Use global-dynamic for local-dynamic. */
17639 case TLS_MODEL_GLOBAL_DYNAMIC:
17640 case TLS_MODEL_LOCAL_DYNAMIC:
17641 putc ('\n', file);
17642 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
17643 fputs ("\t.tc .", file);
17644 RS6000_OUTPUT_BASENAME (file, name);
17645 fputs ("[TC],", file);
17646 output_addr_const (file, x);
17647 fputs ("@m", file);
17648 break;
17649 default:
17650 gcc_unreachable ();
17651 }
17652 }
17653 #endif
17654
17655 putc ('\n', file);
17656 }
17657 \f
17658 /* Output an assembler pseudo-op to write an ASCII string of N characters
17659 starting at P to FILE.
17660
17661 On the RS/6000, we have to do this using the .byte operation and
17662 write out special characters outside the quoted string.
17663 Also, the assembler is broken; very long strings are truncated,
17664 so we must artificially break them up early. */
17665
17666 void
17667 output_ascii (FILE *file, const char *p, int n)
17668 {
17669 char c;
17670 int i, count_string;
17671 const char *for_string = "\t.byte \"";
17672 const char *for_decimal = "\t.byte ";
17673 const char *to_close = NULL;
17674
17675 count_string = 0;
17676 for (i = 0; i < n; i++)
17677 {
17678 c = *p++;
17679 if (c >= ' ' && c < 0177)
17680 {
17681 if (for_string)
17682 fputs (for_string, file);
17683 putc (c, file);
17684
17685 /* Write two quotes to get one. */
17686 if (c == '"')
17687 {
17688 putc (c, file);
17689 ++count_string;
17690 }
17691
17692 for_string = NULL;
17693 for_decimal = "\"\n\t.byte ";
17694 to_close = "\"\n";
17695 ++count_string;
17696
17697 if (count_string >= 512)
17698 {
17699 fputs (to_close, file);
17700
17701 for_string = "\t.byte \"";
17702 for_decimal = "\t.byte ";
17703 to_close = NULL;
17704 count_string = 0;
17705 }
17706 }
17707 else
17708 {
17709 if (for_decimal)
17710 fputs (for_decimal, file);
17711 fprintf (file, "%d", c);
17712
17713 for_string = "\n\t.byte \"";
17714 for_decimal = ", ";
17715 to_close = "\n";
17716 count_string = 0;
17717 }
17718 }
17719
17720 /* Now close the string if we have written one. Then end the line. */
17721 if (to_close)
17722 fputs (to_close, file);
17723 }
17724 \f
17725 /* Generate a unique section name for FILENAME for a section type
17726 represented by SECTION_DESC. Output goes into BUF.
17727
17728 SECTION_DESC can be any string, as long as it is different for each
17729 possible section type.
17730
17731 We name the section in the same manner as xlc. The name begins with an
17732 underscore followed by the filename (after stripping any leading directory
17733 names) with the last period replaced by the string SECTION_DESC. If
17734 FILENAME does not contain a period, SECTION_DESC is appended to the end of
17735 the name. */
17736
17737 void
17738 rs6000_gen_section_name (char **buf, const char *filename,
17739 const char *section_desc)
17740 {
17741 const char *q, *after_last_slash, *last_period = 0;
17742 char *p;
17743 int len;
17744
17745 after_last_slash = filename;
17746 for (q = filename; *q; q++)
17747 {
17748 if (*q == '/')
17749 after_last_slash = q + 1;
17750 else if (*q == '.')
17751 last_period = q;
17752 }
17753
17754 len = strlen (after_last_slash) + strlen (section_desc) + 2;
17755 *buf = (char *) xmalloc (len);
17756
17757 p = *buf;
17758 *p++ = '_';
17759
17760 for (q = after_last_slash; *q; q++)
17761 {
17762 if (q == last_period)
17763 {
17764 strcpy (p, section_desc);
17765 p += strlen (section_desc);
17766 break;
17767 }
17768
17769 else if (ISALNUM (*q))
17770 *p++ = *q;
17771 }
17772
17773 if (last_period == 0)
17774 strcpy (p, section_desc);
17775 else
17776 *p = '\0';
17777 }
17778 \f
17779 /* Emit profile function. */
17780
17781 void
17782 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
17783 {
17784 /* Non-standard profiling for kernels, which just saves LR then calls
17785 _mcount without worrying about arg saves. The idea is to change
17786 the function prologue as little as possible as it isn't easy to
17787 account for arg save/restore code added just for _mcount. */
17788 if (TARGET_PROFILE_KERNEL)
17789 return;
17790
17791 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
17792 {
17793 #ifndef NO_PROFILE_COUNTERS
17794 # define NO_PROFILE_COUNTERS 0
17795 #endif
17796 if (NO_PROFILE_COUNTERS)
17797 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
17798 LCT_NORMAL, VOIDmode);
17799 else
17800 {
17801 char buf[30];
17802 const char *label_name;
17803 rtx fun;
17804
17805 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
17806 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
17807 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
17808
17809 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
17810 LCT_NORMAL, VOIDmode, fun, Pmode);
17811 }
17812 }
17813 else if (DEFAULT_ABI == ABI_DARWIN)
17814 {
17815 const char *mcount_name = RS6000_MCOUNT;
17816 int caller_addr_regno = LR_REGNO;
17817
17818 /* Be conservative and always set this, at least for now. */
17819 crtl->uses_pic_offset_table = 1;
17820
17821 #if TARGET_MACHO
17822 /* For PIC code, set up a stub and collect the caller's address
17823 from r0, which is where the prologue puts it. */
17824 if (MACHOPIC_INDIRECT
17825 && crtl->uses_pic_offset_table)
17826 caller_addr_regno = 0;
17827 #endif
17828 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
17829 LCT_NORMAL, VOIDmode,
17830 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
17831 }
17832 }
17833
17834 /* Write function profiler code. */
17835
17836 void
17837 output_function_profiler (FILE *file, int labelno)
17838 {
17839 char buf[100];
17840
17841 switch (DEFAULT_ABI)
17842 {
17843 default:
17844 gcc_unreachable ();
17845
17846 case ABI_V4:
17847 if (!TARGET_32BIT)
17848 {
17849 warning (0, "no profiling of 64-bit code for this ABI");
17850 return;
17851 }
17852 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
17853 fprintf (file, "\tmflr %s\n", reg_names[0]);
17854 if (NO_PROFILE_COUNTERS)
17855 {
17856 asm_fprintf (file, "\tstw %s,4(%s)\n",
17857 reg_names[0], reg_names[1]);
17858 }
17859 else if (TARGET_SECURE_PLT && flag_pic)
17860 {
17861 if (TARGET_LINK_STACK)
17862 {
17863 char name[32];
17864 get_ppc476_thunk_name (name);
17865 asm_fprintf (file, "\tbl %s\n", name);
17866 }
17867 else
17868 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
17869 asm_fprintf (file, "\tstw %s,4(%s)\n",
17870 reg_names[0], reg_names[1]);
17871 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
17872 asm_fprintf (file, "\taddis %s,%s,",
17873 reg_names[12], reg_names[12]);
17874 assemble_name (file, buf);
17875 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
17876 assemble_name (file, buf);
17877 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
17878 }
17879 else if (flag_pic == 1)
17880 {
17881 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
17882 asm_fprintf (file, "\tstw %s,4(%s)\n",
17883 reg_names[0], reg_names[1]);
17884 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
17885 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
17886 assemble_name (file, buf);
17887 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
17888 }
17889 else if (flag_pic > 1)
17890 {
17891 asm_fprintf (file, "\tstw %s,4(%s)\n",
17892 reg_names[0], reg_names[1]);
17893 /* Now, we need to get the address of the label. */
17894 if (TARGET_LINK_STACK)
17895 {
17896 char name[32];
17897 get_ppc476_thunk_name (name);
17898 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
17899 assemble_name (file, buf);
17900 fputs ("-.\n1:", file);
17901 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
17902 asm_fprintf (file, "\taddi %s,%s,4\n",
17903 reg_names[11], reg_names[11]);
17904 }
17905 else
17906 {
17907 fputs ("\tbcl 20,31,1f\n\t.long ", file);
17908 assemble_name (file, buf);
17909 fputs ("-.\n1:", file);
17910 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
17911 }
17912 asm_fprintf (file, "\tlwz %s,0(%s)\n",
17913 reg_names[0], reg_names[11]);
17914 asm_fprintf (file, "\tadd %s,%s,%s\n",
17915 reg_names[0], reg_names[0], reg_names[11]);
17916 }
17917 else
17918 {
17919 asm_fprintf (file, "\tlis %s,", reg_names[12]);
17920 assemble_name (file, buf);
17921 fputs ("@ha\n", file);
17922 asm_fprintf (file, "\tstw %s,4(%s)\n",
17923 reg_names[0], reg_names[1]);
17924 asm_fprintf (file, "\tla %s,", reg_names[0]);
17925 assemble_name (file, buf);
17926 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
17927 }
17928
17929 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
17930 fprintf (file, "\tbl %s%s\n",
17931 RS6000_MCOUNT, flag_pic ? "@plt" : "");
17932 break;
17933
17934 case ABI_AIX:
17935 case ABI_ELFv2:
17936 case ABI_DARWIN:
17937 /* Don't do anything, done in output_profile_hook (). */
17938 break;
17939 }
17940 }
17941
17942 \f
17943
17944 /* The following variable value is the last issued insn. */
17945
17946 static rtx_insn *last_scheduled_insn;
17947
17948 /* The following variable helps to balance issuing of load and
17949 store instructions */
17950
17951 static int load_store_pendulum;
17952
17953 /* The following variable helps pair divide insns during scheduling. */
17954 static int divide_cnt;
17955 /* The following variable helps pair and alternate vector and vector load
17956 insns during scheduling. */
17957 static int vec_pairing;
17958
17959
17960 /* Power4 load update and store update instructions are cracked into a
17961 load or store and an integer insn which are executed in the same cycle.
17962 Branches have their own dispatch slot which does not count against the
17963 GCC issue rate, but it changes the program flow so there are no other
17964 instructions to issue in this cycle. */
17965
17966 static int
17967 rs6000_variable_issue_1 (rtx_insn *insn, int more)
17968 {
17969 last_scheduled_insn = insn;
17970 if (GET_CODE (PATTERN (insn)) == USE
17971 || GET_CODE (PATTERN (insn)) == CLOBBER)
17972 {
17973 cached_can_issue_more = more;
17974 return cached_can_issue_more;
17975 }
17976
17977 if (insn_terminates_group_p (insn, current_group))
17978 {
17979 cached_can_issue_more = 0;
17980 return cached_can_issue_more;
17981 }
17982
17983 /* If no reservation, but reach here */
17984 if (recog_memoized (insn) < 0)
17985 return more;
17986
17987 if (rs6000_sched_groups)
17988 {
17989 if (is_microcoded_insn (insn))
17990 cached_can_issue_more = 0;
17991 else if (is_cracked_insn (insn))
17992 cached_can_issue_more = more > 2 ? more - 2 : 0;
17993 else
17994 cached_can_issue_more = more - 1;
17995
17996 return cached_can_issue_more;
17997 }
17998
17999 if (rs6000_tune == PROCESSOR_CELL && is_nonpipeline_insn (insn))
18000 return 0;
18001
18002 cached_can_issue_more = more - 1;
18003 return cached_can_issue_more;
18004 }
18005
18006 static int
18007 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
18008 {
18009 int r = rs6000_variable_issue_1 (insn, more);
18010 if (verbose)
18011 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
18012 return r;
18013 }
18014
18015 /* Adjust the cost of a scheduling dependency. Return the new cost of
18016 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
18017
18018 static int
18019 rs6000_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
18020 unsigned int)
18021 {
18022 enum attr_type attr_type;
18023
18024 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
18025 return cost;
18026
18027 switch (dep_type)
18028 {
18029 case REG_DEP_TRUE:
18030 {
18031 /* Data dependency; DEP_INSN writes a register that INSN reads
18032 some cycles later. */
18033
18034 /* Separate a load from a narrower, dependent store. */
18035 if ((rs6000_sched_groups || rs6000_tune == PROCESSOR_POWER9
18036 || rs6000_tune == PROCESSOR_POWER10)
18037 && GET_CODE (PATTERN (insn)) == SET
18038 && GET_CODE (PATTERN (dep_insn)) == SET
18039 && MEM_P (XEXP (PATTERN (insn), 1))
18040 && MEM_P (XEXP (PATTERN (dep_insn), 0))
18041 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
18042 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
18043 return cost + 14;
18044
18045 attr_type = get_attr_type (insn);
18046
18047 switch (attr_type)
18048 {
18049 case TYPE_JMPREG:
18050 /* Tell the first scheduling pass about the latency between
18051 a mtctr and bctr (and mtlr and br/blr). The first
18052 scheduling pass will not know about this latency since
18053 the mtctr instruction, which has the latency associated
18054 to it, will be generated by reload. */
18055 return 4;
18056 case TYPE_BRANCH:
18057 /* Leave some extra cycles between a compare and its
18058 dependent branch, to inhibit expensive mispredicts. */
18059 if ((rs6000_tune == PROCESSOR_PPC603
18060 || rs6000_tune == PROCESSOR_PPC604
18061 || rs6000_tune == PROCESSOR_PPC604e
18062 || rs6000_tune == PROCESSOR_PPC620
18063 || rs6000_tune == PROCESSOR_PPC630
18064 || rs6000_tune == PROCESSOR_PPC750
18065 || rs6000_tune == PROCESSOR_PPC7400
18066 || rs6000_tune == PROCESSOR_PPC7450
18067 || rs6000_tune == PROCESSOR_PPCE5500
18068 || rs6000_tune == PROCESSOR_PPCE6500
18069 || rs6000_tune == PROCESSOR_POWER4
18070 || rs6000_tune == PROCESSOR_POWER5
18071 || rs6000_tune == PROCESSOR_POWER7
18072 || rs6000_tune == PROCESSOR_POWER8
18073 || rs6000_tune == PROCESSOR_POWER9
18074 || rs6000_tune == PROCESSOR_POWER10
18075 || rs6000_tune == PROCESSOR_CELL)
18076 && recog_memoized (dep_insn)
18077 && (INSN_CODE (dep_insn) >= 0))
18078
18079 switch (get_attr_type (dep_insn))
18080 {
18081 case TYPE_CMP:
18082 case TYPE_FPCOMPARE:
18083 case TYPE_CR_LOGICAL:
18084 return cost + 2;
18085 case TYPE_EXTS:
18086 case TYPE_MUL:
18087 if (get_attr_dot (dep_insn) == DOT_YES)
18088 return cost + 2;
18089 else
18090 break;
18091 case TYPE_SHIFT:
18092 if (get_attr_dot (dep_insn) == DOT_YES
18093 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
18094 return cost + 2;
18095 else
18096 break;
18097 default:
18098 break;
18099 }
18100 break;
18101
18102 case TYPE_STORE:
18103 case TYPE_FPSTORE:
18104 if ((rs6000_tune == PROCESSOR_POWER6)
18105 && recog_memoized (dep_insn)
18106 && (INSN_CODE (dep_insn) >= 0))
18107 {
18108
18109 if (GET_CODE (PATTERN (insn)) != SET)
18110 /* If this happens, we have to extend this to schedule
18111 optimally. Return default for now. */
18112 return cost;
18113
18114 /* Adjust the cost for the case where the value written
18115 by a fixed point operation is used as the address
18116 gen value on a store. */
18117 switch (get_attr_type (dep_insn))
18118 {
18119 case TYPE_LOAD:
18120 case TYPE_CNTLZ:
18121 {
18122 if (! rs6000_store_data_bypass_p (dep_insn, insn))
18123 return get_attr_sign_extend (dep_insn)
18124 == SIGN_EXTEND_YES ? 6 : 4;
18125 break;
18126 }
18127 case TYPE_SHIFT:
18128 {
18129 if (! rs6000_store_data_bypass_p (dep_insn, insn))
18130 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
18131 6 : 3;
18132 break;
18133 }
18134 case TYPE_INTEGER:
18135 case TYPE_ADD:
18136 case TYPE_LOGICAL:
18137 case TYPE_EXTS:
18138 case TYPE_INSERT:
18139 {
18140 if (! rs6000_store_data_bypass_p (dep_insn, insn))
18141 return 3;
18142 break;
18143 }
18144 case TYPE_STORE:
18145 case TYPE_FPLOAD:
18146 case TYPE_FPSTORE:
18147 {
18148 if (get_attr_update (dep_insn) == UPDATE_YES
18149 && ! rs6000_store_data_bypass_p (dep_insn, insn))
18150 return 3;
18151 break;
18152 }
18153 case TYPE_MUL:
18154 {
18155 if (! rs6000_store_data_bypass_p (dep_insn, insn))
18156 return 17;
18157 break;
18158 }
18159 case TYPE_DIV:
18160 {
18161 if (! rs6000_store_data_bypass_p (dep_insn, insn))
18162 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
18163 break;
18164 }
18165 default:
18166 break;
18167 }
18168 }
18169 break;
18170
18171 case TYPE_LOAD:
18172 if ((rs6000_tune == PROCESSOR_POWER6)
18173 && recog_memoized (dep_insn)
18174 && (INSN_CODE (dep_insn) >= 0))
18175 {
18176
18177 /* Adjust the cost for the case where the value written
18178 by a fixed point instruction is used within the address
18179 gen portion of a subsequent load(u)(x) */
18180 switch (get_attr_type (dep_insn))
18181 {
18182 case TYPE_LOAD:
18183 case TYPE_CNTLZ:
18184 {
18185 if (set_to_load_agen (dep_insn, insn))
18186 return get_attr_sign_extend (dep_insn)
18187 == SIGN_EXTEND_YES ? 6 : 4;
18188 break;
18189 }
18190 case TYPE_SHIFT:
18191 {
18192 if (set_to_load_agen (dep_insn, insn))
18193 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
18194 6 : 3;
18195 break;
18196 }
18197 case TYPE_INTEGER:
18198 case TYPE_ADD:
18199 case TYPE_LOGICAL:
18200 case TYPE_EXTS:
18201 case TYPE_INSERT:
18202 {
18203 if (set_to_load_agen (dep_insn, insn))
18204 return 3;
18205 break;
18206 }
18207 case TYPE_STORE:
18208 case TYPE_FPLOAD:
18209 case TYPE_FPSTORE:
18210 {
18211 if (get_attr_update (dep_insn) == UPDATE_YES
18212 && set_to_load_agen (dep_insn, insn))
18213 return 3;
18214 break;
18215 }
18216 case TYPE_MUL:
18217 {
18218 if (set_to_load_agen (dep_insn, insn))
18219 return 17;
18220 break;
18221 }
18222 case TYPE_DIV:
18223 {
18224 if (set_to_load_agen (dep_insn, insn))
18225 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
18226 break;
18227 }
18228 default:
18229 break;
18230 }
18231 }
18232 break;
18233
18234 default:
18235 break;
18236 }
18237
18238 /* Fall out to return default cost. */
18239 }
18240 break;
18241
18242 case REG_DEP_OUTPUT:
18243 /* Output dependency; DEP_INSN writes a register that INSN writes some
18244 cycles later. */
18245 if ((rs6000_tune == PROCESSOR_POWER6)
18246 && recog_memoized (dep_insn)
18247 && (INSN_CODE (dep_insn) >= 0))
18248 {
18249 attr_type = get_attr_type (insn);
18250
18251 switch (attr_type)
18252 {
18253 case TYPE_FP:
18254 case TYPE_FPSIMPLE:
18255 if (get_attr_type (dep_insn) == TYPE_FP
18256 || get_attr_type (dep_insn) == TYPE_FPSIMPLE)
18257 return 1;
18258 break;
18259 default:
18260 break;
18261 }
18262 }
18263 /* Fall through, no cost for output dependency. */
18264 /* FALLTHRU */
18265
18266 case REG_DEP_ANTI:
18267 /* Anti dependency; DEP_INSN reads a register that INSN writes some
18268 cycles later. */
18269 return 0;
18270
18271 default:
18272 gcc_unreachable ();
18273 }
18274
18275 return cost;
18276 }
18277
18278 /* Debug version of rs6000_adjust_cost. */
18279
18280 static int
18281 rs6000_debug_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
18282 int cost, unsigned int dw)
18283 {
18284 int ret = rs6000_adjust_cost (insn, dep_type, dep_insn, cost, dw);
18285
18286 if (ret != cost)
18287 {
18288 const char *dep;
18289
18290 switch (dep_type)
18291 {
18292 default: dep = "unknown depencency"; break;
18293 case REG_DEP_TRUE: dep = "data dependency"; break;
18294 case REG_DEP_OUTPUT: dep = "output dependency"; break;
18295 case REG_DEP_ANTI: dep = "anti depencency"; break;
18296 }
18297
18298 fprintf (stderr,
18299 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
18300 "%s, insn:\n", ret, cost, dep);
18301
18302 debug_rtx (insn);
18303 }
18304
18305 return ret;
18306 }
18307
18308 /* The function returns a true if INSN is microcoded.
18309 Return false otherwise. */
18310
18311 static bool
18312 is_microcoded_insn (rtx_insn *insn)
18313 {
18314 if (!insn || !NONDEBUG_INSN_P (insn)
18315 || GET_CODE (PATTERN (insn)) == USE
18316 || GET_CODE (PATTERN (insn)) == CLOBBER)
18317 return false;
18318
18319 if (rs6000_tune == PROCESSOR_CELL)
18320 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
18321
18322 if (rs6000_sched_groups
18323 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
18324 {
18325 enum attr_type type = get_attr_type (insn);
18326 if ((type == TYPE_LOAD
18327 && get_attr_update (insn) == UPDATE_YES
18328 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
18329 || ((type == TYPE_LOAD || type == TYPE_STORE)
18330 && get_attr_update (insn) == UPDATE_YES
18331 && get_attr_indexed (insn) == INDEXED_YES)
18332 || type == TYPE_MFCR)
18333 return true;
18334 }
18335
18336 return false;
18337 }
18338
18339 /* The function returns true if INSN is cracked into 2 instructions
18340 by the processor (and therefore occupies 2 issue slots). */
18341
18342 static bool
18343 is_cracked_insn (rtx_insn *insn)
18344 {
18345 if (!insn || !NONDEBUG_INSN_P (insn)
18346 || GET_CODE (PATTERN (insn)) == USE
18347 || GET_CODE (PATTERN (insn)) == CLOBBER)
18348 return false;
18349
18350 if (rs6000_sched_groups
18351 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
18352 {
18353 enum attr_type type = get_attr_type (insn);
18354 if ((type == TYPE_LOAD
18355 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
18356 && get_attr_update (insn) == UPDATE_NO)
18357 || (type == TYPE_LOAD
18358 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
18359 && get_attr_update (insn) == UPDATE_YES
18360 && get_attr_indexed (insn) == INDEXED_NO)
18361 || (type == TYPE_STORE
18362 && get_attr_update (insn) == UPDATE_YES
18363 && get_attr_indexed (insn) == INDEXED_NO)
18364 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
18365 && get_attr_update (insn) == UPDATE_YES)
18366 || (type == TYPE_CR_LOGICAL
18367 && get_attr_cr_logical_3op (insn) == CR_LOGICAL_3OP_YES)
18368 || (type == TYPE_EXTS
18369 && get_attr_dot (insn) == DOT_YES)
18370 || (type == TYPE_SHIFT
18371 && get_attr_dot (insn) == DOT_YES
18372 && get_attr_var_shift (insn) == VAR_SHIFT_NO)
18373 || (type == TYPE_MUL
18374 && get_attr_dot (insn) == DOT_YES)
18375 || type == TYPE_DIV
18376 || (type == TYPE_INSERT
18377 && get_attr_size (insn) == SIZE_32))
18378 return true;
18379 }
18380
18381 return false;
18382 }
18383
18384 /* The function returns true if INSN can be issued only from
18385 the branch slot. */
18386
18387 static bool
18388 is_branch_slot_insn (rtx_insn *insn)
18389 {
18390 if (!insn || !NONDEBUG_INSN_P (insn)
18391 || GET_CODE (PATTERN (insn)) == USE
18392 || GET_CODE (PATTERN (insn)) == CLOBBER)
18393 return false;
18394
18395 if (rs6000_sched_groups)
18396 {
18397 enum attr_type type = get_attr_type (insn);
18398 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
18399 return true;
18400 return false;
18401 }
18402
18403 return false;
18404 }
18405
18406 /* The function returns true if out_inst sets a value that is
18407 used in the address generation computation of in_insn */
18408 static bool
18409 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
18410 {
18411 rtx out_set, in_set;
18412
18413 /* For performance reasons, only handle the simple case where
18414 both loads are a single_set. */
18415 out_set = single_set (out_insn);
18416 if (out_set)
18417 {
18418 in_set = single_set (in_insn);
18419 if (in_set)
18420 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
18421 }
18422
18423 return false;
18424 }
18425
18426 /* Try to determine base/offset/size parts of the given MEM.
18427 Return true if successful, false if all the values couldn't
18428 be determined.
18429
18430 This function only looks for REG or REG+CONST address forms.
18431 REG+REG address form will return false. */
18432
18433 static bool
18434 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
18435 HOST_WIDE_INT *size)
18436 {
18437 rtx addr_rtx;
18438 if (MEM_SIZE_KNOWN_P (mem))
18439 *size = MEM_SIZE (mem);
18440 else
18441 return false;
18442
18443 addr_rtx = (XEXP (mem, 0));
18444 if (GET_CODE (addr_rtx) == PRE_MODIFY)
18445 addr_rtx = XEXP (addr_rtx, 1);
18446
18447 *offset = 0;
18448 while (GET_CODE (addr_rtx) == PLUS
18449 && CONST_INT_P (XEXP (addr_rtx, 1)))
18450 {
18451 *offset += INTVAL (XEXP (addr_rtx, 1));
18452 addr_rtx = XEXP (addr_rtx, 0);
18453 }
18454 if (!REG_P (addr_rtx))
18455 return false;
18456
18457 *base = addr_rtx;
18458 return true;
18459 }
18460
18461 /* If the target storage locations of arguments MEM1 and MEM2 are
18462 adjacent, then return the argument that has the lower address.
18463 Otherwise, return NULL_RTX. */
18464
18465 static rtx
18466 adjacent_mem_locations (rtx mem1, rtx mem2)
18467 {
18468 rtx reg1, reg2;
18469 HOST_WIDE_INT off1, size1, off2, size2;
18470
18471 if (MEM_P (mem1)
18472 && MEM_P (mem2)
18473 && get_memref_parts (mem1, &reg1, &off1, &size1)
18474 && get_memref_parts (mem2, &reg2, &off2, &size2)
18475 && REGNO (reg1) == REGNO (reg2))
18476 {
18477 if (off1 + size1 == off2)
18478 return mem1;
18479 else if (off2 + size2 == off1)
18480 return mem2;
18481 }
18482
18483 return NULL_RTX;
18484 }
18485
18486 /* This function returns true if it can be determined that the two MEM
18487 locations overlap by at least 1 byte based on base reg/offset/size. */
18488
18489 static bool
18490 mem_locations_overlap (rtx mem1, rtx mem2)
18491 {
18492 rtx reg1, reg2;
18493 HOST_WIDE_INT off1, size1, off2, size2;
18494
18495 if (get_memref_parts (mem1, &reg1, &off1, &size1)
18496 && get_memref_parts (mem2, &reg2, &off2, &size2))
18497 return ((REGNO (reg1) == REGNO (reg2))
18498 && (((off1 <= off2) && (off1 + size1 > off2))
18499 || ((off2 <= off1) && (off2 + size2 > off1))));
18500
18501 return false;
18502 }
18503
18504 /* A C statement (sans semicolon) to update the integer scheduling
18505 priority INSN_PRIORITY (INSN). Increase the priority to execute the
18506 INSN earlier, reduce the priority to execute INSN later. Do not
18507 define this macro if you do not need to adjust the scheduling
18508 priorities of insns. */
18509
18510 static int
18511 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
18512 {
18513 rtx load_mem, str_mem;
18514 /* On machines (like the 750) which have asymmetric integer units,
18515 where one integer unit can do multiply and divides and the other
18516 can't, reduce the priority of multiply/divide so it is scheduled
18517 before other integer operations. */
18518
18519 #if 0
18520 if (! INSN_P (insn))
18521 return priority;
18522
18523 if (GET_CODE (PATTERN (insn)) == USE)
18524 return priority;
18525
18526 switch (rs6000_tune) {
18527 case PROCESSOR_PPC750:
18528 switch (get_attr_type (insn))
18529 {
18530 default:
18531 break;
18532
18533 case TYPE_MUL:
18534 case TYPE_DIV:
18535 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
18536 priority, priority);
18537 if (priority >= 0 && priority < 0x01000000)
18538 priority >>= 3;
18539 break;
18540 }
18541 }
18542 #endif
18543
18544 if (insn_must_be_first_in_group (insn)
18545 && reload_completed
18546 && current_sched_info->sched_max_insns_priority
18547 && rs6000_sched_restricted_insns_priority)
18548 {
18549
18550 /* Prioritize insns that can be dispatched only in the first
18551 dispatch slot. */
18552 if (rs6000_sched_restricted_insns_priority == 1)
18553 /* Attach highest priority to insn. This means that in
18554 haifa-sched.cc:ready_sort(), dispatch-slot restriction considerations
18555 precede 'priority' (critical path) considerations. */
18556 return current_sched_info->sched_max_insns_priority;
18557 else if (rs6000_sched_restricted_insns_priority == 2)
18558 /* Increase priority of insn by a minimal amount. This means that in
18559 haifa-sched.cc:ready_sort(), only 'priority' (critical path)
18560 considerations precede dispatch-slot restriction considerations. */
18561 return (priority + 1);
18562 }
18563
18564 if (rs6000_tune == PROCESSOR_POWER6
18565 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
18566 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
18567 /* Attach highest priority to insn if the scheduler has just issued two
18568 stores and this instruction is a load, or two loads and this instruction
18569 is a store. Power6 wants loads and stores scheduled alternately
18570 when possible */
18571 return current_sched_info->sched_max_insns_priority;
18572
18573 return priority;
18574 }
18575
18576 /* Return true if the instruction is nonpipelined on the Cell. */
18577 static bool
18578 is_nonpipeline_insn (rtx_insn *insn)
18579 {
18580 enum attr_type type;
18581 if (!insn || !NONDEBUG_INSN_P (insn)
18582 || GET_CODE (PATTERN (insn)) == USE
18583 || GET_CODE (PATTERN (insn)) == CLOBBER)
18584 return false;
18585
18586 type = get_attr_type (insn);
18587 if (type == TYPE_MUL
18588 || type == TYPE_DIV
18589 || type == TYPE_SDIV
18590 || type == TYPE_DDIV
18591 || type == TYPE_SSQRT
18592 || type == TYPE_DSQRT
18593 || type == TYPE_MFCR
18594 || type == TYPE_MFCRF
18595 || type == TYPE_MFJMPR)
18596 {
18597 return true;
18598 }
18599 return false;
18600 }
18601
18602
18603 /* Return how many instructions the machine can issue per cycle. */
18604
18605 static int
18606 rs6000_issue_rate (void)
18607 {
18608 /* Unless scheduling for register pressure, use issue rate of 1 for
18609 first scheduling pass to decrease degradation. */
18610 if (!reload_completed && !flag_sched_pressure)
18611 return 1;
18612
18613 switch (rs6000_tune) {
18614 case PROCESSOR_RS64A:
18615 case PROCESSOR_PPC601: /* ? */
18616 case PROCESSOR_PPC7450:
18617 return 3;
18618 case PROCESSOR_PPC440:
18619 case PROCESSOR_PPC603:
18620 case PROCESSOR_PPC750:
18621 case PROCESSOR_PPC7400:
18622 case PROCESSOR_PPC8540:
18623 case PROCESSOR_PPC8548:
18624 case PROCESSOR_CELL:
18625 case PROCESSOR_PPCE300C2:
18626 case PROCESSOR_PPCE300C3:
18627 case PROCESSOR_PPCE500MC:
18628 case PROCESSOR_PPCE500MC64:
18629 case PROCESSOR_PPCE5500:
18630 case PROCESSOR_PPCE6500:
18631 case PROCESSOR_TITAN:
18632 return 2;
18633 case PROCESSOR_PPC476:
18634 case PROCESSOR_PPC604:
18635 case PROCESSOR_PPC604e:
18636 case PROCESSOR_PPC620:
18637 case PROCESSOR_PPC630:
18638 return 4;
18639 case PROCESSOR_POWER4:
18640 case PROCESSOR_POWER5:
18641 case PROCESSOR_POWER6:
18642 case PROCESSOR_POWER7:
18643 return 5;
18644 case PROCESSOR_POWER8:
18645 return 7;
18646 case PROCESSOR_POWER9:
18647 return 6;
18648 case PROCESSOR_POWER10:
18649 return 8;
18650 default:
18651 return 1;
18652 }
18653 }
18654
18655 /* Return how many instructions to look ahead for better insn
18656 scheduling. */
18657
18658 static int
18659 rs6000_use_sched_lookahead (void)
18660 {
18661 switch (rs6000_tune)
18662 {
18663 case PROCESSOR_PPC8540:
18664 case PROCESSOR_PPC8548:
18665 return 4;
18666
18667 case PROCESSOR_CELL:
18668 return (reload_completed ? 8 : 0);
18669
18670 default:
18671 return 0;
18672 }
18673 }
18674
18675 /* We are choosing insn from the ready queue. Return zero if INSN can be
18676 chosen. */
18677 static int
18678 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
18679 {
18680 if (ready_index == 0)
18681 return 0;
18682
18683 if (rs6000_tune != PROCESSOR_CELL)
18684 return 0;
18685
18686 gcc_assert (insn != NULL_RTX && INSN_P (insn));
18687
18688 if (!reload_completed
18689 || is_nonpipeline_insn (insn)
18690 || is_microcoded_insn (insn))
18691 return 1;
18692
18693 return 0;
18694 }
18695
18696 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
18697 and return true. */
18698
18699 static bool
18700 find_mem_ref (rtx pat, rtx *mem_ref)
18701 {
18702 const char * fmt;
18703 int i, j;
18704
18705 /* stack_tie does not produce any real memory traffic. */
18706 if (tie_operand (pat, VOIDmode))
18707 return false;
18708
18709 if (MEM_P (pat))
18710 {
18711 *mem_ref = pat;
18712 return true;
18713 }
18714
18715 /* Recursively process the pattern. */
18716 fmt = GET_RTX_FORMAT (GET_CODE (pat));
18717
18718 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
18719 {
18720 if (fmt[i] == 'e')
18721 {
18722 if (find_mem_ref (XEXP (pat, i), mem_ref))
18723 return true;
18724 }
18725 else if (fmt[i] == 'E')
18726 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
18727 {
18728 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
18729 return true;
18730 }
18731 }
18732
18733 return false;
18734 }
18735
18736 /* Determine if PAT is a PATTERN of a load insn. */
18737
18738 static bool
18739 is_load_insn1 (rtx pat, rtx *load_mem)
18740 {
18741 if (!pat || pat == NULL_RTX)
18742 return false;
18743
18744 if (GET_CODE (pat) == SET)
18745 {
18746 if (REG_P (SET_DEST (pat)))
18747 return find_mem_ref (SET_SRC (pat), load_mem);
18748 else
18749 return false;
18750 }
18751
18752 if (GET_CODE (pat) == PARALLEL)
18753 {
18754 int i;
18755
18756 for (i = 0; i < XVECLEN (pat, 0); i++)
18757 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
18758 return true;
18759 }
18760
18761 return false;
18762 }
18763
18764 /* Determine if INSN loads from memory. */
18765
18766 static bool
18767 is_load_insn (rtx insn, rtx *load_mem)
18768 {
18769 if (!insn || !INSN_P (insn))
18770 return false;
18771
18772 if (CALL_P (insn))
18773 return false;
18774
18775 return is_load_insn1 (PATTERN (insn), load_mem);
18776 }
18777
18778 /* Determine if PAT is a PATTERN of a store insn. */
18779
18780 static bool
18781 is_store_insn1 (rtx pat, rtx *str_mem)
18782 {
18783 if (!pat || pat == NULL_RTX)
18784 return false;
18785
18786 if (GET_CODE (pat) == SET)
18787 {
18788 if (REG_P (SET_SRC (pat)) || SUBREG_P (SET_SRC (pat)))
18789 return find_mem_ref (SET_DEST (pat), str_mem);
18790 else
18791 return false;
18792 }
18793
18794 if (GET_CODE (pat) == PARALLEL)
18795 {
18796 int i;
18797
18798 for (i = 0; i < XVECLEN (pat, 0); i++)
18799 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
18800 return true;
18801 }
18802
18803 return false;
18804 }
18805
18806 /* Determine if INSN stores to memory. */
18807
18808 static bool
18809 is_store_insn (rtx insn, rtx *str_mem)
18810 {
18811 if (!insn || !INSN_P (insn))
18812 return false;
18813
18814 return is_store_insn1 (PATTERN (insn), str_mem);
18815 }
18816
18817 /* Return whether TYPE is a Power9 pairable vector instruction type. */
18818
18819 static bool
18820 is_power9_pairable_vec_type (enum attr_type type)
18821 {
18822 switch (type)
18823 {
18824 case TYPE_VECSIMPLE:
18825 case TYPE_VECCOMPLEX:
18826 case TYPE_VECDIV:
18827 case TYPE_VECCMP:
18828 case TYPE_VECPERM:
18829 case TYPE_VECFLOAT:
18830 case TYPE_VECFDIV:
18831 case TYPE_VECDOUBLE:
18832 return true;
18833 default:
18834 break;
18835 }
18836 return false;
18837 }
18838
18839 /* Returns whether the dependence between INSN and NEXT is considered
18840 costly by the given target. */
18841
18842 static bool
18843 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
18844 {
18845 rtx insn;
18846 rtx next;
18847 rtx load_mem, str_mem;
18848
18849 /* If the flag is not enabled - no dependence is considered costly;
18850 allow all dependent insns in the same group.
18851 This is the most aggressive option. */
18852 if (rs6000_sched_costly_dep == no_dep_costly)
18853 return false;
18854
18855 /* If the flag is set to 1 - a dependence is always considered costly;
18856 do not allow dependent instructions in the same group.
18857 This is the most conservative option. */
18858 if (rs6000_sched_costly_dep == all_deps_costly)
18859 return true;
18860
18861 insn = DEP_PRO (dep);
18862 next = DEP_CON (dep);
18863
18864 if (rs6000_sched_costly_dep == store_to_load_dep_costly
18865 && is_load_insn (next, &load_mem)
18866 && is_store_insn (insn, &str_mem))
18867 /* Prevent load after store in the same group. */
18868 return true;
18869
18870 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
18871 && is_load_insn (next, &load_mem)
18872 && is_store_insn (insn, &str_mem)
18873 && DEP_TYPE (dep) == REG_DEP_TRUE
18874 && mem_locations_overlap(str_mem, load_mem))
18875 /* Prevent load after store in the same group if it is a true
18876 dependence. */
18877 return true;
18878
18879 /* The flag is set to X; dependences with latency >= X are considered costly,
18880 and will not be scheduled in the same group. */
18881 if (rs6000_sched_costly_dep <= max_dep_latency
18882 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
18883 return true;
18884
18885 return false;
18886 }
18887
18888 /* Return the next insn after INSN that is found before TAIL is reached,
18889 skipping any "non-active" insns - insns that will not actually occupy
18890 an issue slot. Return NULL_RTX if such an insn is not found. */
18891
18892 static rtx_insn *
18893 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
18894 {
18895 if (insn == NULL_RTX || insn == tail)
18896 return NULL;
18897
18898 while (1)
18899 {
18900 insn = NEXT_INSN (insn);
18901 if (insn == NULL_RTX || insn == tail)
18902 return NULL;
18903
18904 if (CALL_P (insn)
18905 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
18906 || (NONJUMP_INSN_P (insn)
18907 && GET_CODE (PATTERN (insn)) != USE
18908 && GET_CODE (PATTERN (insn)) != CLOBBER
18909 && INSN_CODE (insn) != CODE_FOR_stack_tie))
18910 break;
18911 }
18912 return insn;
18913 }
18914
18915 /* Move instruction at POS to the end of the READY list. */
18916
18917 static void
18918 move_to_end_of_ready (rtx_insn **ready, int pos, int lastpos)
18919 {
18920 rtx_insn *tmp;
18921 int i;
18922
18923 tmp = ready[pos];
18924 for (i = pos; i < lastpos; i++)
18925 ready[i] = ready[i + 1];
18926 ready[lastpos] = tmp;
18927 }
18928
18929 /* Do Power6 specific sched_reorder2 reordering of ready list. */
18930
18931 static int
18932 power6_sched_reorder2 (rtx_insn **ready, int lastpos)
18933 {
18934 /* For Power6, we need to handle some special cases to try and keep the
18935 store queue from overflowing and triggering expensive flushes.
18936
18937 This code monitors how load and store instructions are being issued
18938 and skews the ready list one way or the other to increase the likelihood
18939 that a desired instruction is issued at the proper time.
18940
18941 A couple of things are done. First, we maintain a "load_store_pendulum"
18942 to track the current state of load/store issue.
18943
18944 - If the pendulum is at zero, then no loads or stores have been
18945 issued in the current cycle so we do nothing.
18946
18947 - If the pendulum is 1, then a single load has been issued in this
18948 cycle and we attempt to locate another load in the ready list to
18949 issue with it.
18950
18951 - If the pendulum is -2, then two stores have already been
18952 issued in this cycle, so we increase the priority of the first load
18953 in the ready list to increase it's likelihood of being chosen first
18954 in the next cycle.
18955
18956 - If the pendulum is -1, then a single store has been issued in this
18957 cycle and we attempt to locate another store in the ready list to
18958 issue with it, preferring a store to an adjacent memory location to
18959 facilitate store pairing in the store queue.
18960
18961 - If the pendulum is 2, then two loads have already been
18962 issued in this cycle, so we increase the priority of the first store
18963 in the ready list to increase it's likelihood of being chosen first
18964 in the next cycle.
18965
18966 - If the pendulum < -2 or > 2, then do nothing.
18967
18968 Note: This code covers the most common scenarios. There exist non
18969 load/store instructions which make use of the LSU and which
18970 would need to be accounted for to strictly model the behavior
18971 of the machine. Those instructions are currently unaccounted
18972 for to help minimize compile time overhead of this code.
18973 */
18974 int pos;
18975 rtx load_mem, str_mem;
18976
18977 if (is_store_insn (last_scheduled_insn, &str_mem))
18978 /* Issuing a store, swing the load_store_pendulum to the left */
18979 load_store_pendulum--;
18980 else if (is_load_insn (last_scheduled_insn, &load_mem))
18981 /* Issuing a load, swing the load_store_pendulum to the right */
18982 load_store_pendulum++;
18983 else
18984 return cached_can_issue_more;
18985
18986 /* If the pendulum is balanced, or there is only one instruction on
18987 the ready list, then all is well, so return. */
18988 if ((load_store_pendulum == 0) || (lastpos <= 0))
18989 return cached_can_issue_more;
18990
18991 if (load_store_pendulum == 1)
18992 {
18993 /* A load has been issued in this cycle. Scan the ready list
18994 for another load to issue with it */
18995 pos = lastpos;
18996
18997 while (pos >= 0)
18998 {
18999 if (is_load_insn (ready[pos], &load_mem))
19000 {
19001 /* Found a load. Move it to the head of the ready list,
19002 and adjust it's priority so that it is more likely to
19003 stay there */
19004 move_to_end_of_ready (ready, pos, lastpos);
19005
19006 if (!sel_sched_p ()
19007 && INSN_PRIORITY_KNOWN (ready[lastpos]))
19008 INSN_PRIORITY (ready[lastpos])++;
19009 break;
19010 }
19011 pos--;
19012 }
19013 }
19014 else if (load_store_pendulum == -2)
19015 {
19016 /* Two stores have been issued in this cycle. Increase the
19017 priority of the first load in the ready list to favor it for
19018 issuing in the next cycle. */
19019 pos = lastpos;
19020
19021 while (pos >= 0)
19022 {
19023 if (is_load_insn (ready[pos], &load_mem)
19024 && !sel_sched_p ()
19025 && INSN_PRIORITY_KNOWN (ready[pos]))
19026 {
19027 INSN_PRIORITY (ready[pos])++;
19028
19029 /* Adjust the pendulum to account for the fact that a load
19030 was found and increased in priority. This is to prevent
19031 increasing the priority of multiple loads */
19032 load_store_pendulum--;
19033
19034 break;
19035 }
19036 pos--;
19037 }
19038 }
19039 else if (load_store_pendulum == -1)
19040 {
19041 /* A store has been issued in this cycle. Scan the ready list for
19042 another store to issue with it, preferring a store to an adjacent
19043 memory location */
19044 int first_store_pos = -1;
19045
19046 pos = lastpos;
19047
19048 while (pos >= 0)
19049 {
19050 if (is_store_insn (ready[pos], &str_mem))
19051 {
19052 rtx str_mem2;
19053 /* Maintain the index of the first store found on the
19054 list */
19055 if (first_store_pos == -1)
19056 first_store_pos = pos;
19057
19058 if (is_store_insn (last_scheduled_insn, &str_mem2)
19059 && adjacent_mem_locations (str_mem, str_mem2))
19060 {
19061 /* Found an adjacent store. Move it to the head of the
19062 ready list, and adjust it's priority so that it is
19063 more likely to stay there */
19064 move_to_end_of_ready (ready, pos, lastpos);
19065
19066 if (!sel_sched_p ()
19067 && INSN_PRIORITY_KNOWN (ready[lastpos]))
19068 INSN_PRIORITY (ready[lastpos])++;
19069
19070 first_store_pos = -1;
19071
19072 break;
19073 };
19074 }
19075 pos--;
19076 }
19077
19078 if (first_store_pos >= 0)
19079 {
19080 /* An adjacent store wasn't found, but a non-adjacent store was,
19081 so move the non-adjacent store to the front of the ready
19082 list, and adjust its priority so that it is more likely to
19083 stay there. */
19084 move_to_end_of_ready (ready, first_store_pos, lastpos);
19085 if (!sel_sched_p ()
19086 && INSN_PRIORITY_KNOWN (ready[lastpos]))
19087 INSN_PRIORITY (ready[lastpos])++;
19088 }
19089 }
19090 else if (load_store_pendulum == 2)
19091 {
19092 /* Two loads have been issued in this cycle. Increase the priority
19093 of the first store in the ready list to favor it for issuing in
19094 the next cycle. */
19095 pos = lastpos;
19096
19097 while (pos >= 0)
19098 {
19099 if (is_store_insn (ready[pos], &str_mem)
19100 && !sel_sched_p ()
19101 && INSN_PRIORITY_KNOWN (ready[pos]))
19102 {
19103 INSN_PRIORITY (ready[pos])++;
19104
19105 /* Adjust the pendulum to account for the fact that a store
19106 was found and increased in priority. This is to prevent
19107 increasing the priority of multiple stores */
19108 load_store_pendulum++;
19109
19110 break;
19111 }
19112 pos--;
19113 }
19114 }
19115
19116 return cached_can_issue_more;
19117 }
19118
19119 /* Do Power9 specific sched_reorder2 reordering of ready list. */
19120
19121 static int
19122 power9_sched_reorder2 (rtx_insn **ready, int lastpos)
19123 {
19124 int pos;
19125 enum attr_type type, type2;
19126
19127 type = get_attr_type (last_scheduled_insn);
19128
19129 /* Try to issue fixed point divides back-to-back in pairs so they will be
19130 routed to separate execution units and execute in parallel. */
19131 if (type == TYPE_DIV && divide_cnt == 0)
19132 {
19133 /* First divide has been scheduled. */
19134 divide_cnt = 1;
19135
19136 /* Scan the ready list looking for another divide, if found move it
19137 to the end of the list so it is chosen next. */
19138 pos = lastpos;
19139 while (pos >= 0)
19140 {
19141 if (recog_memoized (ready[pos]) >= 0
19142 && get_attr_type (ready[pos]) == TYPE_DIV)
19143 {
19144 move_to_end_of_ready (ready, pos, lastpos);
19145 break;
19146 }
19147 pos--;
19148 }
19149 }
19150 else
19151 {
19152 /* Last insn was the 2nd divide or not a divide, reset the counter. */
19153 divide_cnt = 0;
19154
19155 /* The best dispatch throughput for vector and vector load insns can be
19156 achieved by interleaving a vector and vector load such that they'll
19157 dispatch to the same superslice. If this pairing cannot be achieved
19158 then it is best to pair vector insns together and vector load insns
19159 together.
19160
19161 To aid in this pairing, vec_pairing maintains the current state with
19162 the following values:
19163
19164 0 : Initial state, no vecload/vector pairing has been started.
19165
19166 1 : A vecload or vector insn has been issued and a candidate for
19167 pairing has been found and moved to the end of the ready
19168 list. */
19169 if (type == TYPE_VECLOAD)
19170 {
19171 /* Issued a vecload. */
19172 if (vec_pairing == 0)
19173 {
19174 int vecload_pos = -1;
19175 /* We issued a single vecload, look for a vector insn to pair it
19176 with. If one isn't found, try to pair another vecload. */
19177 pos = lastpos;
19178 while (pos >= 0)
19179 {
19180 if (recog_memoized (ready[pos]) >= 0)
19181 {
19182 type2 = get_attr_type (ready[pos]);
19183 if (is_power9_pairable_vec_type (type2))
19184 {
19185 /* Found a vector insn to pair with, move it to the
19186 end of the ready list so it is scheduled next. */
19187 move_to_end_of_ready (ready, pos, lastpos);
19188 vec_pairing = 1;
19189 return cached_can_issue_more;
19190 }
19191 else if (type2 == TYPE_VECLOAD && vecload_pos == -1)
19192 /* Remember position of first vecload seen. */
19193 vecload_pos = pos;
19194 }
19195 pos--;
19196 }
19197 if (vecload_pos >= 0)
19198 {
19199 /* Didn't find a vector to pair with but did find a vecload,
19200 move it to the end of the ready list. */
19201 move_to_end_of_ready (ready, vecload_pos, lastpos);
19202 vec_pairing = 1;
19203 return cached_can_issue_more;
19204 }
19205 }
19206 }
19207 else if (is_power9_pairable_vec_type (type))
19208 {
19209 /* Issued a vector operation. */
19210 if (vec_pairing == 0)
19211 {
19212 int vec_pos = -1;
19213 /* We issued a single vector insn, look for a vecload to pair it
19214 with. If one isn't found, try to pair another vector. */
19215 pos = lastpos;
19216 while (pos >= 0)
19217 {
19218 if (recog_memoized (ready[pos]) >= 0)
19219 {
19220 type2 = get_attr_type (ready[pos]);
19221 if (type2 == TYPE_VECLOAD)
19222 {
19223 /* Found a vecload insn to pair with, move it to the
19224 end of the ready list so it is scheduled next. */
19225 move_to_end_of_ready (ready, pos, lastpos);
19226 vec_pairing = 1;
19227 return cached_can_issue_more;
19228 }
19229 else if (is_power9_pairable_vec_type (type2)
19230 && vec_pos == -1)
19231 /* Remember position of first vector insn seen. */
19232 vec_pos = pos;
19233 }
19234 pos--;
19235 }
19236 if (vec_pos >= 0)
19237 {
19238 /* Didn't find a vecload to pair with but did find a vector
19239 insn, move it to the end of the ready list. */
19240 move_to_end_of_ready (ready, vec_pos, lastpos);
19241 vec_pairing = 1;
19242 return cached_can_issue_more;
19243 }
19244 }
19245 }
19246
19247 /* We've either finished a vec/vecload pair, couldn't find an insn to
19248 continue the current pair, or the last insn had nothing to do with
19249 with pairing. In any case, reset the state. */
19250 vec_pairing = 0;
19251 }
19252
19253 return cached_can_issue_more;
19254 }
19255
19256 /* Determine if INSN is a store to memory that can be fused with a similar
19257 adjacent store. */
19258
19259 static bool
19260 is_fusable_store (rtx_insn *insn, rtx *str_mem)
19261 {
19262 /* Insn must be a non-prefixed base+disp form store. */
19263 if (is_store_insn (insn, str_mem)
19264 && get_attr_prefixed (insn) == PREFIXED_NO
19265 && get_attr_update (insn) == UPDATE_NO
19266 && get_attr_indexed (insn) == INDEXED_NO)
19267 {
19268 /* Further restrictions by mode and size. */
19269 if (!MEM_SIZE_KNOWN_P (*str_mem))
19270 return false;
19271
19272 machine_mode mode = GET_MODE (*str_mem);
19273 HOST_WIDE_INT size = MEM_SIZE (*str_mem);
19274
19275 if (INTEGRAL_MODE_P (mode))
19276 /* Must be word or dword size. */
19277 return (size == 4 || size == 8);
19278 else if (FLOAT_MODE_P (mode))
19279 /* Must be dword size. */
19280 return (size == 8);
19281 }
19282
19283 return false;
19284 }
19285
19286 /* Do Power10 specific reordering of the ready list. */
19287
19288 static int
19289 power10_sched_reorder (rtx_insn **ready, int lastpos)
19290 {
19291 rtx mem1;
19292
19293 /* Do store fusion during sched2 only. */
19294 if (!reload_completed)
19295 return cached_can_issue_more;
19296
19297 /* If the prior insn finished off a store fusion pair then simply
19298 reset the counter and return, nothing more to do. */
19299 if (load_store_pendulum != 0)
19300 {
19301 load_store_pendulum = 0;
19302 return cached_can_issue_more;
19303 }
19304
19305 /* Try to pair certain store insns to adjacent memory locations
19306 so that the hardware will fuse them to a single operation. */
19307 if (TARGET_P10_FUSION && is_fusable_store (last_scheduled_insn, &mem1))
19308 {
19309
19310 /* A fusable store was just scheduled. Scan the ready list for another
19311 store that it can fuse with. */
19312 int pos = lastpos;
19313 while (pos >= 0)
19314 {
19315 rtx mem2;
19316 /* GPR stores can be ascending or descending offsets, FPR/VSR stores
19317 must be ascending only. */
19318 if (is_fusable_store (ready[pos], &mem2)
19319 && ((INTEGRAL_MODE_P (GET_MODE (mem1))
19320 && adjacent_mem_locations (mem1, mem2))
19321 || (FLOAT_MODE_P (GET_MODE (mem1))
19322 && (adjacent_mem_locations (mem1, mem2) == mem1))))
19323 {
19324 /* Found a fusable store. Move it to the end of the ready list
19325 so it is scheduled next. */
19326 move_to_end_of_ready (ready, pos, lastpos);
19327
19328 load_store_pendulum = -1;
19329 break;
19330 }
19331 pos--;
19332 }
19333 }
19334
19335 return cached_can_issue_more;
19336 }
19337
19338 /* We are about to begin issuing insns for this clock cycle. */
19339
19340 static int
19341 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
19342 rtx_insn **ready ATTRIBUTE_UNUSED,
19343 int *pn_ready ATTRIBUTE_UNUSED,
19344 int clock_var ATTRIBUTE_UNUSED)
19345 {
19346 int n_ready = *pn_ready;
19347
19348 if (sched_verbose)
19349 fprintf (dump, "// rs6000_sched_reorder :\n");
19350
19351 /* Reorder the ready list, if the second to last ready insn
19352 is a nonepipeline insn. */
19353 if (rs6000_tune == PROCESSOR_CELL && n_ready > 1)
19354 {
19355 if (is_nonpipeline_insn (ready[n_ready - 1])
19356 && (recog_memoized (ready[n_ready - 2]) > 0))
19357 /* Simply swap first two insns. */
19358 std::swap (ready[n_ready - 1], ready[n_ready - 2]);
19359 }
19360
19361 if (rs6000_tune == PROCESSOR_POWER6)
19362 load_store_pendulum = 0;
19363
19364 /* Do Power10 dependent reordering. */
19365 if (rs6000_tune == PROCESSOR_POWER10 && last_scheduled_insn)
19366 power10_sched_reorder (ready, n_ready - 1);
19367
19368 return rs6000_issue_rate ();
19369 }
19370
19371 /* Like rs6000_sched_reorder, but called after issuing each insn. */
19372
19373 static int
19374 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
19375 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
19376 {
19377 if (sched_verbose)
19378 fprintf (dump, "// rs6000_sched_reorder2 :\n");
19379
19380 /* Do Power6 dependent reordering if necessary. */
19381 if (rs6000_tune == PROCESSOR_POWER6 && last_scheduled_insn)
19382 return power6_sched_reorder2 (ready, *pn_ready - 1);
19383
19384 /* Do Power9 dependent reordering if necessary. */
19385 if (rs6000_tune == PROCESSOR_POWER9 && last_scheduled_insn
19386 && recog_memoized (last_scheduled_insn) >= 0)
19387 return power9_sched_reorder2 (ready, *pn_ready - 1);
19388
19389 /* Do Power10 dependent reordering. */
19390 if (rs6000_tune == PROCESSOR_POWER10 && last_scheduled_insn)
19391 return power10_sched_reorder (ready, *pn_ready - 1);
19392
19393 return cached_can_issue_more;
19394 }
19395
19396 /* Return whether the presence of INSN causes a dispatch group termination
19397 of group WHICH_GROUP.
19398
19399 If WHICH_GROUP == current_group, this function will return true if INSN
19400 causes the termination of the current group (i.e, the dispatch group to
19401 which INSN belongs). This means that INSN will be the last insn in the
19402 group it belongs to.
19403
19404 If WHICH_GROUP == previous_group, this function will return true if INSN
19405 causes the termination of the previous group (i.e, the dispatch group that
19406 precedes the group to which INSN belongs). This means that INSN will be
19407 the first insn in the group it belongs to). */
19408
19409 static bool
19410 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
19411 {
19412 bool first, last;
19413
19414 if (! insn)
19415 return false;
19416
19417 first = insn_must_be_first_in_group (insn);
19418 last = insn_must_be_last_in_group (insn);
19419
19420 if (first && last)
19421 return true;
19422
19423 if (which_group == current_group)
19424 return last;
19425 else if (which_group == previous_group)
19426 return first;
19427
19428 return false;
19429 }
19430
19431
19432 static bool
19433 insn_must_be_first_in_group (rtx_insn *insn)
19434 {
19435 enum attr_type type;
19436
19437 if (!insn
19438 || NOTE_P (insn)
19439 || DEBUG_INSN_P (insn)
19440 || GET_CODE (PATTERN (insn)) == USE
19441 || GET_CODE (PATTERN (insn)) == CLOBBER)
19442 return false;
19443
19444 switch (rs6000_tune)
19445 {
19446 case PROCESSOR_POWER5:
19447 if (is_cracked_insn (insn))
19448 return true;
19449 /* FALLTHRU */
19450 case PROCESSOR_POWER4:
19451 if (is_microcoded_insn (insn))
19452 return true;
19453
19454 if (!rs6000_sched_groups)
19455 return false;
19456
19457 type = get_attr_type (insn);
19458
19459 switch (type)
19460 {
19461 case TYPE_MFCR:
19462 case TYPE_MFCRF:
19463 case TYPE_MTCR:
19464 case TYPE_CR_LOGICAL:
19465 case TYPE_MTJMPR:
19466 case TYPE_MFJMPR:
19467 case TYPE_DIV:
19468 case TYPE_LOAD_L:
19469 case TYPE_STORE_C:
19470 case TYPE_ISYNC:
19471 case TYPE_SYNC:
19472 return true;
19473 default:
19474 break;
19475 }
19476 break;
19477 case PROCESSOR_POWER6:
19478 type = get_attr_type (insn);
19479
19480 switch (type)
19481 {
19482 case TYPE_EXTS:
19483 case TYPE_CNTLZ:
19484 case TYPE_TRAP:
19485 case TYPE_MUL:
19486 case TYPE_INSERT:
19487 case TYPE_FPCOMPARE:
19488 case TYPE_MFCR:
19489 case TYPE_MTCR:
19490 case TYPE_MFJMPR:
19491 case TYPE_MTJMPR:
19492 case TYPE_ISYNC:
19493 case TYPE_SYNC:
19494 case TYPE_LOAD_L:
19495 case TYPE_STORE_C:
19496 return true;
19497 case TYPE_SHIFT:
19498 if (get_attr_dot (insn) == DOT_NO
19499 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
19500 return true;
19501 else
19502 break;
19503 case TYPE_DIV:
19504 if (get_attr_size (insn) == SIZE_32)
19505 return true;
19506 else
19507 break;
19508 case TYPE_LOAD:
19509 case TYPE_STORE:
19510 case TYPE_FPLOAD:
19511 case TYPE_FPSTORE:
19512 if (get_attr_update (insn) == UPDATE_YES)
19513 return true;
19514 else
19515 break;
19516 default:
19517 break;
19518 }
19519 break;
19520 case PROCESSOR_POWER7:
19521 type = get_attr_type (insn);
19522
19523 switch (type)
19524 {
19525 case TYPE_CR_LOGICAL:
19526 case TYPE_MFCR:
19527 case TYPE_MFCRF:
19528 case TYPE_MTCR:
19529 case TYPE_DIV:
19530 case TYPE_ISYNC:
19531 case TYPE_LOAD_L:
19532 case TYPE_STORE_C:
19533 case TYPE_MFJMPR:
19534 case TYPE_MTJMPR:
19535 return true;
19536 case TYPE_MUL:
19537 case TYPE_SHIFT:
19538 case TYPE_EXTS:
19539 if (get_attr_dot (insn) == DOT_YES)
19540 return true;
19541 else
19542 break;
19543 case TYPE_LOAD:
19544 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
19545 || get_attr_update (insn) == UPDATE_YES)
19546 return true;
19547 else
19548 break;
19549 case TYPE_STORE:
19550 case TYPE_FPLOAD:
19551 case TYPE_FPSTORE:
19552 if (get_attr_update (insn) == UPDATE_YES)
19553 return true;
19554 else
19555 break;
19556 default:
19557 break;
19558 }
19559 break;
19560 case PROCESSOR_POWER8:
19561 type = get_attr_type (insn);
19562
19563 switch (type)
19564 {
19565 case TYPE_CR_LOGICAL:
19566 case TYPE_MFCR:
19567 case TYPE_MFCRF:
19568 case TYPE_MTCR:
19569 case TYPE_SYNC:
19570 case TYPE_ISYNC:
19571 case TYPE_LOAD_L:
19572 case TYPE_STORE_C:
19573 case TYPE_VECSTORE:
19574 case TYPE_MFJMPR:
19575 case TYPE_MTJMPR:
19576 return true;
19577 case TYPE_SHIFT:
19578 case TYPE_EXTS:
19579 case TYPE_MUL:
19580 if (get_attr_dot (insn) == DOT_YES)
19581 return true;
19582 else
19583 break;
19584 case TYPE_LOAD:
19585 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
19586 || get_attr_update (insn) == UPDATE_YES)
19587 return true;
19588 else
19589 break;
19590 case TYPE_STORE:
19591 if (get_attr_update (insn) == UPDATE_YES
19592 && get_attr_indexed (insn) == INDEXED_YES)
19593 return true;
19594 else
19595 break;
19596 default:
19597 break;
19598 }
19599 break;
19600 default:
19601 break;
19602 }
19603
19604 return false;
19605 }
19606
19607 static bool
19608 insn_must_be_last_in_group (rtx_insn *insn)
19609 {
19610 enum attr_type type;
19611
19612 if (!insn
19613 || NOTE_P (insn)
19614 || DEBUG_INSN_P (insn)
19615 || GET_CODE (PATTERN (insn)) == USE
19616 || GET_CODE (PATTERN (insn)) == CLOBBER)
19617 return false;
19618
19619 switch (rs6000_tune) {
19620 case PROCESSOR_POWER4:
19621 case PROCESSOR_POWER5:
19622 if (is_microcoded_insn (insn))
19623 return true;
19624
19625 if (is_branch_slot_insn (insn))
19626 return true;
19627
19628 break;
19629 case PROCESSOR_POWER6:
19630 type = get_attr_type (insn);
19631
19632 switch (type)
19633 {
19634 case TYPE_EXTS:
19635 case TYPE_CNTLZ:
19636 case TYPE_TRAP:
19637 case TYPE_MUL:
19638 case TYPE_FPCOMPARE:
19639 case TYPE_MFCR:
19640 case TYPE_MTCR:
19641 case TYPE_MFJMPR:
19642 case TYPE_MTJMPR:
19643 case TYPE_ISYNC:
19644 case TYPE_SYNC:
19645 case TYPE_LOAD_L:
19646 case TYPE_STORE_C:
19647 return true;
19648 case TYPE_SHIFT:
19649 if (get_attr_dot (insn) == DOT_NO
19650 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
19651 return true;
19652 else
19653 break;
19654 case TYPE_DIV:
19655 if (get_attr_size (insn) == SIZE_32)
19656 return true;
19657 else
19658 break;
19659 default:
19660 break;
19661 }
19662 break;
19663 case PROCESSOR_POWER7:
19664 type = get_attr_type (insn);
19665
19666 switch (type)
19667 {
19668 case TYPE_ISYNC:
19669 case TYPE_SYNC:
19670 case TYPE_LOAD_L:
19671 case TYPE_STORE_C:
19672 return true;
19673 case TYPE_LOAD:
19674 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
19675 && get_attr_update (insn) == UPDATE_YES)
19676 return true;
19677 else
19678 break;
19679 case TYPE_STORE:
19680 if (get_attr_update (insn) == UPDATE_YES
19681 && get_attr_indexed (insn) == INDEXED_YES)
19682 return true;
19683 else
19684 break;
19685 default:
19686 break;
19687 }
19688 break;
19689 case PROCESSOR_POWER8:
19690 type = get_attr_type (insn);
19691
19692 switch (type)
19693 {
19694 case TYPE_MFCR:
19695 case TYPE_MTCR:
19696 case TYPE_ISYNC:
19697 case TYPE_SYNC:
19698 case TYPE_LOAD_L:
19699 case TYPE_STORE_C:
19700 return true;
19701 case TYPE_LOAD:
19702 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
19703 && get_attr_update (insn) == UPDATE_YES)
19704 return true;
19705 else
19706 break;
19707 case TYPE_STORE:
19708 if (get_attr_update (insn) == UPDATE_YES
19709 && get_attr_indexed (insn) == INDEXED_YES)
19710 return true;
19711 else
19712 break;
19713 default:
19714 break;
19715 }
19716 break;
19717 default:
19718 break;
19719 }
19720
19721 return false;
19722 }
19723
19724 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
19725 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
19726
19727 static bool
19728 is_costly_group (rtx *group_insns, rtx next_insn)
19729 {
19730 int i;
19731 int issue_rate = rs6000_issue_rate ();
19732
19733 for (i = 0; i < issue_rate; i++)
19734 {
19735 sd_iterator_def sd_it;
19736 dep_t dep;
19737 rtx insn = group_insns[i];
19738
19739 if (!insn)
19740 continue;
19741
19742 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
19743 {
19744 rtx next = DEP_CON (dep);
19745
19746 if (next == next_insn
19747 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
19748 return true;
19749 }
19750 }
19751
19752 return false;
19753 }
19754
19755 /* Utility of the function redefine_groups.
19756 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
19757 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
19758 to keep it "far" (in a separate group) from GROUP_INSNS, following
19759 one of the following schemes, depending on the value of the flag
19760 -minsert_sched_nops = X:
19761 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
19762 in order to force NEXT_INSN into a separate group.
19763 (2) X < sched_finish_regroup_exact: insert exactly X nops.
19764 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
19765 insertion (has a group just ended, how many vacant issue slots remain in the
19766 last group, and how many dispatch groups were encountered so far). */
19767
19768 static int
19769 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
19770 rtx_insn *next_insn, bool *group_end, int can_issue_more,
19771 int *group_count)
19772 {
19773 rtx nop;
19774 bool force;
19775 int issue_rate = rs6000_issue_rate ();
19776 bool end = *group_end;
19777 int i;
19778
19779 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
19780 return can_issue_more;
19781
19782 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
19783 return can_issue_more;
19784
19785 force = is_costly_group (group_insns, next_insn);
19786 if (!force)
19787 return can_issue_more;
19788
19789 if (sched_verbose > 6)
19790 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
19791 *group_count ,can_issue_more);
19792
19793 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
19794 {
19795 if (*group_end)
19796 can_issue_more = 0;
19797
19798 /* Since only a branch can be issued in the last issue_slot, it is
19799 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
19800 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
19801 in this case the last nop will start a new group and the branch
19802 will be forced to the new group. */
19803 if (can_issue_more && !is_branch_slot_insn (next_insn))
19804 can_issue_more--;
19805
19806 /* Do we have a special group ending nop? */
19807 if (rs6000_tune == PROCESSOR_POWER6 || rs6000_tune == PROCESSOR_POWER7
19808 || rs6000_tune == PROCESSOR_POWER8)
19809 {
19810 nop = gen_group_ending_nop ();
19811 emit_insn_before (nop, next_insn);
19812 can_issue_more = 0;
19813 }
19814 else
19815 while (can_issue_more > 0)
19816 {
19817 nop = gen_nop ();
19818 emit_insn_before (nop, next_insn);
19819 can_issue_more--;
19820 }
19821
19822 *group_end = true;
19823 return 0;
19824 }
19825
19826 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
19827 {
19828 int n_nops = rs6000_sched_insert_nops;
19829
19830 /* Nops can't be issued from the branch slot, so the effective
19831 issue_rate for nops is 'issue_rate - 1'. */
19832 if (can_issue_more == 0)
19833 can_issue_more = issue_rate;
19834 can_issue_more--;
19835 if (can_issue_more == 0)
19836 {
19837 can_issue_more = issue_rate - 1;
19838 (*group_count)++;
19839 end = true;
19840 for (i = 0; i < issue_rate; i++)
19841 {
19842 group_insns[i] = 0;
19843 }
19844 }
19845
19846 while (n_nops > 0)
19847 {
19848 nop = gen_nop ();
19849 emit_insn_before (nop, next_insn);
19850 if (can_issue_more == issue_rate - 1) /* new group begins */
19851 end = false;
19852 can_issue_more--;
19853 if (can_issue_more == 0)
19854 {
19855 can_issue_more = issue_rate - 1;
19856 (*group_count)++;
19857 end = true;
19858 for (i = 0; i < issue_rate; i++)
19859 {
19860 group_insns[i] = 0;
19861 }
19862 }
19863 n_nops--;
19864 }
19865
19866 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
19867 can_issue_more++;
19868
19869 /* Is next_insn going to start a new group? */
19870 *group_end
19871 = (end
19872 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
19873 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
19874 || (can_issue_more < issue_rate &&
19875 insn_terminates_group_p (next_insn, previous_group)));
19876 if (*group_end && end)
19877 (*group_count)--;
19878
19879 if (sched_verbose > 6)
19880 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
19881 *group_count, can_issue_more);
19882 return can_issue_more;
19883 }
19884
19885 return can_issue_more;
19886 }
19887
19888 /* This function tries to synch the dispatch groups that the compiler "sees"
19889 with the dispatch groups that the processor dispatcher is expected to
19890 form in practice. It tries to achieve this synchronization by forcing the
19891 estimated processor grouping on the compiler (as opposed to the function
19892 'pad_goups' which tries to force the scheduler's grouping on the processor).
19893
19894 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
19895 examines the (estimated) dispatch groups that will be formed by the processor
19896 dispatcher. It marks these group boundaries to reflect the estimated
19897 processor grouping, overriding the grouping that the scheduler had marked.
19898 Depending on the value of the flag '-minsert-sched-nops' this function can
19899 force certain insns into separate groups or force a certain distance between
19900 them by inserting nops, for example, if there exists a "costly dependence"
19901 between the insns.
19902
19903 The function estimates the group boundaries that the processor will form as
19904 follows: It keeps track of how many vacant issue slots are available after
19905 each insn. A subsequent insn will start a new group if one of the following
19906 4 cases applies:
19907 - no more vacant issue slots remain in the current dispatch group.
19908 - only the last issue slot, which is the branch slot, is vacant, but the next
19909 insn is not a branch.
19910 - only the last 2 or less issue slots, including the branch slot, are vacant,
19911 which means that a cracked insn (which occupies two issue slots) can't be
19912 issued in this group.
19913 - less than 'issue_rate' slots are vacant, and the next insn always needs to
19914 start a new group. */
19915
19916 static int
19917 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
19918 rtx_insn *tail)
19919 {
19920 rtx_insn *insn, *next_insn;
19921 int issue_rate;
19922 int can_issue_more;
19923 int slot, i;
19924 bool group_end;
19925 int group_count = 0;
19926 rtx *group_insns;
19927
19928 /* Initialize. */
19929 issue_rate = rs6000_issue_rate ();
19930 group_insns = XALLOCAVEC (rtx, issue_rate);
19931 for (i = 0; i < issue_rate; i++)
19932 {
19933 group_insns[i] = 0;
19934 }
19935 can_issue_more = issue_rate;
19936 slot = 0;
19937 insn = get_next_active_insn (prev_head_insn, tail);
19938 group_end = false;
19939
19940 while (insn != NULL_RTX)
19941 {
19942 slot = (issue_rate - can_issue_more);
19943 group_insns[slot] = insn;
19944 can_issue_more =
19945 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
19946 if (insn_terminates_group_p (insn, current_group))
19947 can_issue_more = 0;
19948
19949 next_insn = get_next_active_insn (insn, tail);
19950 if (next_insn == NULL_RTX)
19951 return group_count + 1;
19952
19953 /* Is next_insn going to start a new group? */
19954 group_end
19955 = (can_issue_more == 0
19956 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
19957 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
19958 || (can_issue_more < issue_rate &&
19959 insn_terminates_group_p (next_insn, previous_group)));
19960
19961 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
19962 next_insn, &group_end, can_issue_more,
19963 &group_count);
19964
19965 if (group_end)
19966 {
19967 group_count++;
19968 can_issue_more = 0;
19969 for (i = 0; i < issue_rate; i++)
19970 {
19971 group_insns[i] = 0;
19972 }
19973 }
19974
19975 if (GET_MODE (next_insn) == TImode && can_issue_more)
19976 PUT_MODE (next_insn, VOIDmode);
19977 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
19978 PUT_MODE (next_insn, TImode);
19979
19980 insn = next_insn;
19981 if (can_issue_more == 0)
19982 can_issue_more = issue_rate;
19983 } /* while */
19984
19985 return group_count;
19986 }
19987
19988 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
19989 dispatch group boundaries that the scheduler had marked. Pad with nops
19990 any dispatch groups which have vacant issue slots, in order to force the
19991 scheduler's grouping on the processor dispatcher. The function
19992 returns the number of dispatch groups found. */
19993
19994 static int
19995 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
19996 rtx_insn *tail)
19997 {
19998 rtx_insn *insn, *next_insn;
19999 rtx nop;
20000 int issue_rate;
20001 int can_issue_more;
20002 int group_end;
20003 int group_count = 0;
20004
20005 /* Initialize issue_rate. */
20006 issue_rate = rs6000_issue_rate ();
20007 can_issue_more = issue_rate;
20008
20009 insn = get_next_active_insn (prev_head_insn, tail);
20010 next_insn = get_next_active_insn (insn, tail);
20011
20012 while (insn != NULL_RTX)
20013 {
20014 can_issue_more =
20015 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
20016
20017 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
20018
20019 if (next_insn == NULL_RTX)
20020 break;
20021
20022 if (group_end)
20023 {
20024 /* If the scheduler had marked group termination at this location
20025 (between insn and next_insn), and neither insn nor next_insn will
20026 force group termination, pad the group with nops to force group
20027 termination. */
20028 if (can_issue_more
20029 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
20030 && !insn_terminates_group_p (insn, current_group)
20031 && !insn_terminates_group_p (next_insn, previous_group))
20032 {
20033 if (!is_branch_slot_insn (next_insn))
20034 can_issue_more--;
20035
20036 while (can_issue_more)
20037 {
20038 nop = gen_nop ();
20039 emit_insn_before (nop, next_insn);
20040 can_issue_more--;
20041 }
20042 }
20043
20044 can_issue_more = issue_rate;
20045 group_count++;
20046 }
20047
20048 insn = next_insn;
20049 next_insn = get_next_active_insn (insn, tail);
20050 }
20051
20052 return group_count;
20053 }
20054
20055 /* We're beginning a new block. Initialize data structures as necessary. */
20056
20057 static void
20058 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
20059 int sched_verbose ATTRIBUTE_UNUSED,
20060 int max_ready ATTRIBUTE_UNUSED)
20061 {
20062 last_scheduled_insn = NULL;
20063 load_store_pendulum = 0;
20064 divide_cnt = 0;
20065 vec_pairing = 0;
20066 }
20067
20068 /* The following function is called at the end of scheduling BB.
20069 After reload, it inserts nops at insn group bundling. */
20070
20071 static void
20072 rs6000_sched_finish (FILE *dump, int sched_verbose)
20073 {
20074 int n_groups;
20075
20076 if (sched_verbose)
20077 fprintf (dump, "=== Finishing schedule.\n");
20078
20079 if (reload_completed && rs6000_sched_groups)
20080 {
20081 /* Do not run sched_finish hook when selective scheduling enabled. */
20082 if (sel_sched_p ())
20083 return;
20084
20085 if (rs6000_sched_insert_nops == sched_finish_none)
20086 return;
20087
20088 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
20089 n_groups = pad_groups (dump, sched_verbose,
20090 current_sched_info->prev_head,
20091 current_sched_info->next_tail);
20092 else
20093 n_groups = redefine_groups (dump, sched_verbose,
20094 current_sched_info->prev_head,
20095 current_sched_info->next_tail);
20096
20097 if (sched_verbose >= 6)
20098 {
20099 fprintf (dump, "ngroups = %d\n", n_groups);
20100 print_rtl (dump, current_sched_info->prev_head);
20101 fprintf (dump, "Done finish_sched\n");
20102 }
20103 }
20104 }
20105
20106 struct rs6000_sched_context
20107 {
20108 short cached_can_issue_more;
20109 rtx_insn *last_scheduled_insn;
20110 int load_store_pendulum;
20111 int divide_cnt;
20112 int vec_pairing;
20113 };
20114
20115 typedef struct rs6000_sched_context rs6000_sched_context_def;
20116 typedef rs6000_sched_context_def *rs6000_sched_context_t;
20117
20118 /* Allocate store for new scheduling context. */
20119 static void *
20120 rs6000_alloc_sched_context (void)
20121 {
20122 return xmalloc (sizeof (rs6000_sched_context_def));
20123 }
20124
20125 /* If CLEAN_P is true then initializes _SC with clean data,
20126 and from the global context otherwise. */
20127 static void
20128 rs6000_init_sched_context (void *_sc, bool clean_p)
20129 {
20130 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
20131
20132 if (clean_p)
20133 {
20134 sc->cached_can_issue_more = 0;
20135 sc->last_scheduled_insn = NULL;
20136 sc->load_store_pendulum = 0;
20137 sc->divide_cnt = 0;
20138 sc->vec_pairing = 0;
20139 }
20140 else
20141 {
20142 sc->cached_can_issue_more = cached_can_issue_more;
20143 sc->last_scheduled_insn = last_scheduled_insn;
20144 sc->load_store_pendulum = load_store_pendulum;
20145 sc->divide_cnt = divide_cnt;
20146 sc->vec_pairing = vec_pairing;
20147 }
20148 }
20149
20150 /* Sets the global scheduling context to the one pointed to by _SC. */
20151 static void
20152 rs6000_set_sched_context (void *_sc)
20153 {
20154 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
20155
20156 gcc_assert (sc != NULL);
20157
20158 cached_can_issue_more = sc->cached_can_issue_more;
20159 last_scheduled_insn = sc->last_scheduled_insn;
20160 load_store_pendulum = sc->load_store_pendulum;
20161 divide_cnt = sc->divide_cnt;
20162 vec_pairing = sc->vec_pairing;
20163 }
20164
20165 /* Free _SC. */
20166 static void
20167 rs6000_free_sched_context (void *_sc)
20168 {
20169 gcc_assert (_sc != NULL);
20170
20171 free (_sc);
20172 }
20173
20174 static bool
20175 rs6000_sched_can_speculate_insn (rtx_insn *insn)
20176 {
20177 switch (get_attr_type (insn))
20178 {
20179 case TYPE_DIV:
20180 case TYPE_SDIV:
20181 case TYPE_DDIV:
20182 case TYPE_VECDIV:
20183 case TYPE_SSQRT:
20184 case TYPE_DSQRT:
20185 return false;
20186
20187 default:
20188 return true;
20189 }
20190 }
20191 \f
20192 /* Length in units of the trampoline for entering a nested function. */
20193
20194 int
20195 rs6000_trampoline_size (void)
20196 {
20197 int ret = 0;
20198
20199 switch (DEFAULT_ABI)
20200 {
20201 default:
20202 gcc_unreachable ();
20203
20204 case ABI_AIX:
20205 ret = (TARGET_32BIT) ? 12 : 24;
20206 break;
20207
20208 case ABI_ELFv2:
20209 gcc_assert (!TARGET_32BIT);
20210 ret = 32;
20211 break;
20212
20213 case ABI_DARWIN:
20214 case ABI_V4:
20215 ret = (TARGET_32BIT) ? 40 : 48;
20216 break;
20217 }
20218
20219 return ret;
20220 }
20221
20222 /* Emit RTL insns to initialize the variable parts of a trampoline.
20223 FNADDR is an RTX for the address of the function's pure code.
20224 CXT is an RTX for the static chain value for the function. */
20225
20226 static void
20227 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
20228 {
20229 int regsize = (TARGET_32BIT) ? 4 : 8;
20230 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
20231 rtx ctx_reg = force_reg (Pmode, cxt);
20232 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
20233
20234 switch (DEFAULT_ABI)
20235 {
20236 default:
20237 gcc_unreachable ();
20238
20239 /* Under AIX, just build the 3 word function descriptor */
20240 case ABI_AIX:
20241 {
20242 rtx fnmem, fn_reg, toc_reg;
20243
20244 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
20245 error ("you cannot take the address of a nested function if you use "
20246 "the %qs option", "-mno-pointers-to-nested-functions");
20247
20248 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
20249 fn_reg = gen_reg_rtx (Pmode);
20250 toc_reg = gen_reg_rtx (Pmode);
20251
20252 /* Macro to shorten the code expansions below. */
20253 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
20254
20255 m_tramp = replace_equiv_address (m_tramp, addr);
20256
20257 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
20258 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
20259 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
20260 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
20261 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
20262
20263 # undef MEM_PLUS
20264 }
20265 break;
20266
20267 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
20268 case ABI_ELFv2:
20269 case ABI_DARWIN:
20270 case ABI_V4:
20271 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
20272 LCT_NORMAL, VOIDmode,
20273 addr, Pmode,
20274 GEN_INT (rs6000_trampoline_size ()), SImode,
20275 fnaddr, Pmode,
20276 ctx_reg, Pmode);
20277 break;
20278 }
20279 }
20280
20281 \f
20282 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
20283 identifier as an argument, so the front end shouldn't look it up. */
20284
20285 static bool
20286 rs6000_attribute_takes_identifier_p (const_tree attr_id)
20287 {
20288 return is_attribute_p ("altivec", attr_id);
20289 }
20290
20291 /* Handle the "altivec" attribute. The attribute may have
20292 arguments as follows:
20293
20294 __attribute__((altivec(vector__)))
20295 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
20296 __attribute__((altivec(bool__))) (always followed by 'unsigned')
20297
20298 and may appear more than once (e.g., 'vector bool char') in a
20299 given declaration. */
20300
20301 static tree
20302 rs6000_handle_altivec_attribute (tree *node,
20303 tree name ATTRIBUTE_UNUSED,
20304 tree args,
20305 int flags ATTRIBUTE_UNUSED,
20306 bool *no_add_attrs)
20307 {
20308 tree type = *node, result = NULL_TREE;
20309 machine_mode mode;
20310 int unsigned_p;
20311 char altivec_type
20312 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
20313 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
20314 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
20315 : '?');
20316
20317 while (POINTER_TYPE_P (type)
20318 || TREE_CODE (type) == FUNCTION_TYPE
20319 || TREE_CODE (type) == METHOD_TYPE
20320 || TREE_CODE (type) == ARRAY_TYPE)
20321 type = TREE_TYPE (type);
20322
20323 mode = TYPE_MODE (type);
20324
20325 /* Check for invalid AltiVec type qualifiers. */
20326 if (type == long_double_type_node)
20327 error ("use of %<long double%> in AltiVec types is invalid");
20328 else if (type == boolean_type_node)
20329 error ("use of boolean types in AltiVec types is invalid");
20330 else if (TREE_CODE (type) == COMPLEX_TYPE)
20331 error ("use of %<complex%> in AltiVec types is invalid");
20332 else if (DECIMAL_FLOAT_MODE_P (mode))
20333 error ("use of decimal floating-point types in AltiVec types is invalid");
20334 else if (!TARGET_VSX)
20335 {
20336 if (type == long_unsigned_type_node || type == long_integer_type_node)
20337 {
20338 if (TARGET_64BIT)
20339 error ("use of %<long%> in AltiVec types is invalid for "
20340 "64-bit code without %qs", "-mvsx");
20341 else if (rs6000_warn_altivec_long)
20342 warning (0, "use of %<long%> in AltiVec types is deprecated; "
20343 "use %<int%>");
20344 }
20345 else if (type == long_long_unsigned_type_node
20346 || type == long_long_integer_type_node)
20347 error ("use of %<long long%> in AltiVec types is invalid without %qs",
20348 "-mvsx");
20349 else if (type == double_type_node)
20350 error ("use of %<double%> in AltiVec types is invalid without %qs",
20351 "-mvsx");
20352 }
20353
20354 switch (altivec_type)
20355 {
20356 case 'v':
20357 unsigned_p = TYPE_UNSIGNED (type);
20358 switch (mode)
20359 {
20360 case E_TImode:
20361 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
20362 break;
20363 case E_DImode:
20364 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
20365 break;
20366 case E_SImode:
20367 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
20368 break;
20369 case E_HImode:
20370 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
20371 break;
20372 case E_QImode:
20373 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
20374 break;
20375 case E_SFmode: result = V4SF_type_node; break;
20376 case E_DFmode: result = V2DF_type_node; break;
20377 /* If the user says 'vector int bool', we may be handed the 'bool'
20378 attribute _before_ the 'vector' attribute, and so select the
20379 proper type in the 'b' case below. */
20380 case E_V4SImode: case E_V8HImode: case E_V16QImode: case E_V4SFmode:
20381 case E_V2DImode: case E_V2DFmode:
20382 result = type;
20383 default: break;
20384 }
20385 break;
20386 case 'b':
20387 switch (mode)
20388 {
20389 case E_TImode: case E_V1TImode: result = bool_V1TI_type_node; break;
20390 case E_DImode: case E_V2DImode: result = bool_V2DI_type_node; break;
20391 case E_SImode: case E_V4SImode: result = bool_V4SI_type_node; break;
20392 case E_HImode: case E_V8HImode: result = bool_V8HI_type_node; break;
20393 case E_QImode: case E_V16QImode: result = bool_V16QI_type_node;
20394 default: break;
20395 }
20396 break;
20397 case 'p':
20398 switch (mode)
20399 {
20400 case E_V8HImode: result = pixel_V8HI_type_node;
20401 default: break;
20402 }
20403 default: break;
20404 }
20405
20406 /* Propagate qualifiers attached to the element type
20407 onto the vector type. */
20408 if (result && result != type && TYPE_QUALS (type))
20409 result = build_qualified_type (result, TYPE_QUALS (type));
20410
20411 *no_add_attrs = true; /* No need to hang on to the attribute. */
20412
20413 if (result)
20414 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
20415
20416 return NULL_TREE;
20417 }
20418
20419 /* AltiVec defines five built-in scalar types that serve as vector
20420 elements; we must teach the compiler how to mangle them. The 128-bit
20421 floating point mangling is target-specific as well. MMA defines
20422 two built-in types to be used as opaque vector types. */
20423
20424 static const char *
20425 rs6000_mangle_type (const_tree type)
20426 {
20427 type = TYPE_MAIN_VARIANT (type);
20428
20429 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
20430 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE
20431 && TREE_CODE (type) != OPAQUE_TYPE)
20432 return NULL;
20433
20434 if (type == bool_char_type_node) return "U6__boolc";
20435 if (type == bool_short_type_node) return "U6__bools";
20436 if (type == pixel_type_node) return "u7__pixel";
20437 if (type == bool_int_type_node) return "U6__booli";
20438 if (type == bool_long_long_type_node) return "U6__boolx";
20439
20440 if (type == float128_type_node || type == float64x_type_node)
20441 return NULL;
20442
20443 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IBM_P (TYPE_MODE (type)))
20444 return "g";
20445 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IEEE_P (TYPE_MODE (type)))
20446 return "u9__ieee128";
20447
20448 if (type == vector_pair_type_node)
20449 return "u13__vector_pair";
20450 if (type == vector_quad_type_node)
20451 return "u13__vector_quad";
20452
20453 /* For all other types, use the default mangling. */
20454 return NULL;
20455 }
20456
20457 /* Handle a "longcall" or "shortcall" attribute; arguments as in
20458 struct attribute_spec.handler. */
20459
20460 static tree
20461 rs6000_handle_longcall_attribute (tree *node, tree name,
20462 tree args ATTRIBUTE_UNUSED,
20463 int flags ATTRIBUTE_UNUSED,
20464 bool *no_add_attrs)
20465 {
20466 if (TREE_CODE (*node) != FUNCTION_TYPE
20467 && TREE_CODE (*node) != FIELD_DECL
20468 && TREE_CODE (*node) != TYPE_DECL)
20469 {
20470 warning (OPT_Wattributes, "%qE attribute only applies to functions",
20471 name);
20472 *no_add_attrs = true;
20473 }
20474
20475 return NULL_TREE;
20476 }
20477
20478 /* Set longcall attributes on all functions declared when
20479 rs6000_default_long_calls is true. */
20480 static void
20481 rs6000_set_default_type_attributes (tree type)
20482 {
20483 if (rs6000_default_long_calls
20484 && FUNC_OR_METHOD_TYPE_P (type))
20485 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
20486 NULL_TREE,
20487 TYPE_ATTRIBUTES (type));
20488
20489 #if TARGET_MACHO
20490 darwin_set_default_type_attributes (type);
20491 #endif
20492 }
20493
20494 /* Return a reference suitable for calling a function with the
20495 longcall attribute. */
20496
20497 static rtx
20498 rs6000_longcall_ref (rtx call_ref, rtx arg)
20499 {
20500 /* System V adds '.' to the internal name, so skip them. */
20501 const char *call_name = XSTR (call_ref, 0);
20502 if (*call_name == '.')
20503 {
20504 while (*call_name == '.')
20505 call_name++;
20506
20507 tree node = get_identifier (call_name);
20508 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
20509 }
20510
20511 if (TARGET_PLTSEQ)
20512 {
20513 rtx base = const0_rtx;
20514 int regno = 12;
20515 if (rs6000_pcrel_p ())
20516 {
20517 rtx reg = gen_rtx_REG (Pmode, regno);
20518 rtx u = gen_rtx_UNSPEC_VOLATILE (Pmode,
20519 gen_rtvec (3, base, call_ref, arg),
20520 UNSPECV_PLT_PCREL);
20521 emit_insn (gen_rtx_SET (reg, u));
20522 return reg;
20523 }
20524
20525 if (DEFAULT_ABI == ABI_ELFv2)
20526 base = gen_rtx_REG (Pmode, TOC_REGISTER);
20527 else
20528 {
20529 if (flag_pic)
20530 base = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
20531 regno = 11;
20532 }
20533 /* Reg must match that used by linker PLT stubs. For ELFv2, r12
20534 may be used by a function global entry point. For SysV4, r11
20535 is used by __glink_PLTresolve lazy resolver entry. */
20536 rtx reg = gen_rtx_REG (Pmode, regno);
20537 rtx hi = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, base, call_ref, arg),
20538 UNSPEC_PLT16_HA);
20539 rtx lo = gen_rtx_UNSPEC_VOLATILE (Pmode,
20540 gen_rtvec (3, reg, call_ref, arg),
20541 UNSPECV_PLT16_LO);
20542 emit_insn (gen_rtx_SET (reg, hi));
20543 emit_insn (gen_rtx_SET (reg, lo));
20544 return reg;
20545 }
20546
20547 return force_reg (Pmode, call_ref);
20548 }
20549 \f
20550 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
20551 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
20552 #endif
20553
20554 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
20555 struct attribute_spec.handler. */
20556 static tree
20557 rs6000_handle_struct_attribute (tree *node, tree name,
20558 tree args ATTRIBUTE_UNUSED,
20559 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
20560 {
20561 tree *type = NULL;
20562 if (DECL_P (*node))
20563 {
20564 if (TREE_CODE (*node) == TYPE_DECL)
20565 type = &TREE_TYPE (*node);
20566 }
20567 else
20568 type = node;
20569
20570 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
20571 || TREE_CODE (*type) == UNION_TYPE)))
20572 {
20573 warning (OPT_Wattributes, "%qE attribute ignored", name);
20574 *no_add_attrs = true;
20575 }
20576
20577 else if ((is_attribute_p ("ms_struct", name)
20578 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
20579 || ((is_attribute_p ("gcc_struct", name)
20580 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
20581 {
20582 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
20583 name);
20584 *no_add_attrs = true;
20585 }
20586
20587 return NULL_TREE;
20588 }
20589
20590 static bool
20591 rs6000_ms_bitfield_layout_p (const_tree record_type)
20592 {
20593 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
20594 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
20595 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
20596 }
20597 \f
20598 #ifdef USING_ELFOS_H
20599
20600 /* A get_unnamed_section callback, used for switching to toc_section. */
20601
20602 static void
20603 rs6000_elf_output_toc_section_asm_op (const char *data ATTRIBUTE_UNUSED)
20604 {
20605 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
20606 && TARGET_MINIMAL_TOC)
20607 {
20608 if (!toc_initialized)
20609 {
20610 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
20611 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
20612 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
20613 fprintf (asm_out_file, "\t.tc ");
20614 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
20615 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
20616 fprintf (asm_out_file, "\n");
20617
20618 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
20619 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
20620 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
20621 fprintf (asm_out_file, " = .+32768\n");
20622 toc_initialized = 1;
20623 }
20624 else
20625 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
20626 }
20627 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
20628 {
20629 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
20630 if (!toc_initialized)
20631 {
20632 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
20633 toc_initialized = 1;
20634 }
20635 }
20636 else
20637 {
20638 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
20639 if (!toc_initialized)
20640 {
20641 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
20642 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
20643 fprintf (asm_out_file, " = .+32768\n");
20644 toc_initialized = 1;
20645 }
20646 }
20647 }
20648
20649 /* Implement TARGET_ASM_INIT_SECTIONS. */
20650
20651 static void
20652 rs6000_elf_asm_init_sections (void)
20653 {
20654 toc_section
20655 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
20656
20657 sdata2_section
20658 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
20659 SDATA2_SECTION_ASM_OP);
20660 }
20661
20662 /* Implement TARGET_SELECT_RTX_SECTION. */
20663
20664 static section *
20665 rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
20666 unsigned HOST_WIDE_INT align)
20667 {
20668 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
20669 return toc_section;
20670 else
20671 return default_elf_select_rtx_section (mode, x, align);
20672 }
20673 \f
20674 /* For a SYMBOL_REF, set generic flags and then perform some
20675 target-specific processing.
20676
20677 When the AIX ABI is requested on a non-AIX system, replace the
20678 function name with the real name (with a leading .) rather than the
20679 function descriptor name. This saves a lot of overriding code to
20680 read the prefixes. */
20681
20682 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
20683 static void
20684 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
20685 {
20686 default_encode_section_info (decl, rtl, first);
20687
20688 if (first
20689 && TREE_CODE (decl) == FUNCTION_DECL
20690 && !TARGET_AIX
20691 && DEFAULT_ABI == ABI_AIX)
20692 {
20693 rtx sym_ref = XEXP (rtl, 0);
20694 size_t len = strlen (XSTR (sym_ref, 0));
20695 char *str = XALLOCAVEC (char, len + 2);
20696 str[0] = '.';
20697 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
20698 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
20699 }
20700 }
20701
20702 static inline bool
20703 compare_section_name (const char *section, const char *templ)
20704 {
20705 int len;
20706
20707 len = strlen (templ);
20708 return (strncmp (section, templ, len) == 0
20709 && (section[len] == 0 || section[len] == '.'));
20710 }
20711
20712 bool
20713 rs6000_elf_in_small_data_p (const_tree decl)
20714 {
20715 if (rs6000_sdata == SDATA_NONE)
20716 return false;
20717
20718 /* We want to merge strings, so we never consider them small data. */
20719 if (TREE_CODE (decl) == STRING_CST)
20720 return false;
20721
20722 /* Functions are never in the small data area. */
20723 if (TREE_CODE (decl) == FUNCTION_DECL)
20724 return false;
20725
20726 if (VAR_P (decl) && DECL_SECTION_NAME (decl))
20727 {
20728 const char *section = DECL_SECTION_NAME (decl);
20729 if (compare_section_name (section, ".sdata")
20730 || compare_section_name (section, ".sdata2")
20731 || compare_section_name (section, ".gnu.linkonce.s")
20732 || compare_section_name (section, ".sbss")
20733 || compare_section_name (section, ".sbss2")
20734 || compare_section_name (section, ".gnu.linkonce.sb")
20735 || strcmp (section, ".PPC.EMB.sdata0") == 0
20736 || strcmp (section, ".PPC.EMB.sbss0") == 0)
20737 return true;
20738 }
20739 else
20740 {
20741 /* If we are told not to put readonly data in sdata, then don't. */
20742 if (TREE_READONLY (decl) && rs6000_sdata != SDATA_EABI
20743 && !rs6000_readonly_in_sdata)
20744 return false;
20745
20746 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
20747
20748 if (size > 0
20749 && size <= g_switch_value
20750 /* If it's not public, and we're not going to reference it there,
20751 there's no need to put it in the small data section. */
20752 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
20753 return true;
20754 }
20755
20756 return false;
20757 }
20758
20759 #endif /* USING_ELFOS_H */
20760 \f
20761 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
20762
20763 static bool
20764 rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
20765 {
20766 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
20767 }
20768
20769 /* Do not place thread-local symbols refs in the object blocks. */
20770
20771 static bool
20772 rs6000_use_blocks_for_decl_p (const_tree decl)
20773 {
20774 return !DECL_THREAD_LOCAL_P (decl);
20775 }
20776 \f
20777 /* Return a REG that occurs in ADDR with coefficient 1.
20778 ADDR can be effectively incremented by incrementing REG.
20779
20780 r0 is special and we must not select it as an address
20781 register by this routine since our caller will try to
20782 increment the returned register via an "la" instruction. */
20783
20784 rtx
20785 find_addr_reg (rtx addr)
20786 {
20787 while (GET_CODE (addr) == PLUS)
20788 {
20789 if (REG_P (XEXP (addr, 0))
20790 && REGNO (XEXP (addr, 0)) != 0)
20791 addr = XEXP (addr, 0);
20792 else if (REG_P (XEXP (addr, 1))
20793 && REGNO (XEXP (addr, 1)) != 0)
20794 addr = XEXP (addr, 1);
20795 else if (CONSTANT_P (XEXP (addr, 0)))
20796 addr = XEXP (addr, 1);
20797 else if (CONSTANT_P (XEXP (addr, 1)))
20798 addr = XEXP (addr, 0);
20799 else
20800 gcc_unreachable ();
20801 }
20802 gcc_assert (REG_P (addr) && REGNO (addr) != 0);
20803 return addr;
20804 }
20805
20806 void
20807 rs6000_fatal_bad_address (rtx op)
20808 {
20809 fatal_insn ("bad address", op);
20810 }
20811
20812 #if TARGET_MACHO
20813
20814 vec<branch_island, va_gc> *branch_islands;
20815
20816 /* Remember to generate a branch island for far calls to the given
20817 function. */
20818
20819 static void
20820 add_compiler_branch_island (tree label_name, tree function_name,
20821 int line_number)
20822 {
20823 branch_island bi = {function_name, label_name, line_number};
20824 vec_safe_push (branch_islands, bi);
20825 }
20826
20827 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
20828 already there or not. */
20829
20830 static int
20831 no_previous_def (tree function_name)
20832 {
20833 branch_island *bi;
20834 unsigned ix;
20835
20836 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
20837 if (function_name == bi->function_name)
20838 return 0;
20839 return 1;
20840 }
20841
20842 /* GET_PREV_LABEL gets the label name from the previous definition of
20843 the function. */
20844
20845 static tree
20846 get_prev_label (tree function_name)
20847 {
20848 branch_island *bi;
20849 unsigned ix;
20850
20851 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
20852 if (function_name == bi->function_name)
20853 return bi->label_name;
20854 return NULL_TREE;
20855 }
20856
20857 /* Generate external symbol indirection stubs (PIC and non-PIC). */
20858
20859 void
20860 machopic_output_stub (FILE *file, const char *symb, const char *stub)
20861 {
20862 unsigned int length;
20863 char *symbol_name, *lazy_ptr_name;
20864 char *local_label_0;
20865 static unsigned label = 0;
20866
20867 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
20868 symb = (*targetm.strip_name_encoding) (symb);
20869
20870 length = strlen (symb);
20871 symbol_name = XALLOCAVEC (char, length + 32);
20872 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
20873
20874 lazy_ptr_name = XALLOCAVEC (char, length + 32);
20875 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
20876
20877 if (MACHOPIC_PURE)
20878 {
20879 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
20880 fprintf (file, "\t.align 5\n");
20881
20882 fprintf (file, "%s:\n", stub);
20883 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20884
20885 label++;
20886 local_label_0 = XALLOCAVEC (char, 16);
20887 sprintf (local_label_0, "L%u$spb", label);
20888
20889 fprintf (file, "\tmflr r0\n");
20890 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
20891 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
20892 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
20893 lazy_ptr_name, local_label_0);
20894 fprintf (file, "\tmtlr r0\n");
20895 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
20896 (TARGET_64BIT ? "ldu" : "lwzu"),
20897 lazy_ptr_name, local_label_0);
20898 fprintf (file, "\tmtctr r12\n");
20899 fprintf (file, "\tbctr\n");
20900 }
20901 else /* mdynamic-no-pic or mkernel. */
20902 {
20903 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
20904 fprintf (file, "\t.align 4\n");
20905
20906 fprintf (file, "%s:\n", stub);
20907 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20908
20909 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
20910 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
20911 (TARGET_64BIT ? "ldu" : "lwzu"),
20912 lazy_ptr_name);
20913 fprintf (file, "\tmtctr r12\n");
20914 fprintf (file, "\tbctr\n");
20915 }
20916
20917 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
20918 fprintf (file, "%s:\n", lazy_ptr_name);
20919 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20920 fprintf (file, "%sdyld_stub_binding_helper\n",
20921 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
20922 }
20923
20924 /* Legitimize PIC addresses. If the address is already
20925 position-independent, we return ORIG. Newly generated
20926 position-independent addresses go into a reg. This is REG if non
20927 zero, otherwise we allocate register(s) as necessary. */
20928
20929 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
20930
20931 rtx
20932 rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
20933 rtx reg)
20934 {
20935 rtx base, offset;
20936
20937 if (reg == NULL && !reload_completed)
20938 reg = gen_reg_rtx (Pmode);
20939
20940 if (GET_CODE (orig) == CONST)
20941 {
20942 rtx reg_temp;
20943
20944 if (GET_CODE (XEXP (orig, 0)) == PLUS
20945 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
20946 return orig;
20947
20948 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
20949
20950 /* Use a different reg for the intermediate value, as
20951 it will be marked UNCHANGING. */
20952 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
20953 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
20954 Pmode, reg_temp);
20955 offset =
20956 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
20957 Pmode, reg);
20958
20959 if (CONST_INT_P (offset))
20960 {
20961 if (SMALL_INT (offset))
20962 return plus_constant (Pmode, base, INTVAL (offset));
20963 else if (!reload_completed)
20964 offset = force_reg (Pmode, offset);
20965 else
20966 {
20967 rtx mem = force_const_mem (Pmode, orig);
20968 return machopic_legitimize_pic_address (mem, Pmode, reg);
20969 }
20970 }
20971 return gen_rtx_PLUS (Pmode, base, offset);
20972 }
20973
20974 /* Fall back on generic machopic code. */
20975 return machopic_legitimize_pic_address (orig, mode, reg);
20976 }
20977
20978 /* Output a .machine directive for the Darwin assembler, and call
20979 the generic start_file routine. */
20980
20981 static void
20982 rs6000_darwin_file_start (void)
20983 {
20984 static const struct
20985 {
20986 const char *arg;
20987 const char *name;
20988 HOST_WIDE_INT if_set;
20989 } mapping[] = {
20990 { "ppc64", "ppc64", MASK_64BIT },
20991 { "970", "ppc970", OPTION_MASK_PPC_GPOPT | OPTION_MASK_MFCRF \
20992 | MASK_POWERPC64 },
20993 { "power4", "ppc970", 0 },
20994 { "G5", "ppc970", 0 },
20995 { "7450", "ppc7450", 0 },
20996 { "7400", "ppc7400", OPTION_MASK_ALTIVEC },
20997 { "G4", "ppc7400", 0 },
20998 { "750", "ppc750", 0 },
20999 { "740", "ppc750", 0 },
21000 { "G3", "ppc750", 0 },
21001 { "604e", "ppc604e", 0 },
21002 { "604", "ppc604", 0 },
21003 { "603e", "ppc603", 0 },
21004 { "603", "ppc603", 0 },
21005 { "601", "ppc601", 0 },
21006 { NULL, "ppc", 0 } };
21007 const char *cpu_id = "";
21008 size_t i;
21009
21010 rs6000_file_start ();
21011 darwin_file_start ();
21012
21013 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
21014
21015 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
21016 cpu_id = rs6000_default_cpu;
21017
21018 if (OPTION_SET_P (rs6000_cpu_index))
21019 cpu_id = processor_target_table[rs6000_cpu_index].name;
21020
21021 /* Look through the mapping array. Pick the first name that either
21022 matches the argument, has a bit set in IF_SET that is also set
21023 in the target flags, or has a NULL name. */
21024
21025 i = 0;
21026 while (mapping[i].arg != NULL
21027 && strcmp (mapping[i].arg, cpu_id) != 0
21028 && (mapping[i].if_set & rs6000_isa_flags) == 0)
21029 i++;
21030
21031 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
21032 }
21033
21034 #endif /* TARGET_MACHO */
21035
21036 #if TARGET_ELF
21037 static int
21038 rs6000_elf_reloc_rw_mask (void)
21039 {
21040 if (flag_pic)
21041 return 3;
21042 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
21043 return 2;
21044 else
21045 return 0;
21046 }
21047
21048 /* Record an element in the table of global constructors. SYMBOL is
21049 a SYMBOL_REF of the function to be called; PRIORITY is a number
21050 between 0 and MAX_INIT_PRIORITY.
21051
21052 This differs from default_named_section_asm_out_constructor in
21053 that we have special handling for -mrelocatable. */
21054
21055 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
21056 static void
21057 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
21058 {
21059 const char *section = ".ctors";
21060 char buf[18];
21061
21062 if (priority != DEFAULT_INIT_PRIORITY)
21063 {
21064 sprintf (buf, ".ctors.%.5u",
21065 /* Invert the numbering so the linker puts us in the proper
21066 order; constructors are run from right to left, and the
21067 linker sorts in increasing order. */
21068 MAX_INIT_PRIORITY - priority);
21069 section = buf;
21070 }
21071
21072 switch_to_section (get_section (section, SECTION_WRITE, NULL));
21073 assemble_align (POINTER_SIZE);
21074
21075 if (DEFAULT_ABI == ABI_V4
21076 && (TARGET_RELOCATABLE || flag_pic > 1))
21077 {
21078 fputs ("\t.long (", asm_out_file);
21079 output_addr_const (asm_out_file, symbol);
21080 fputs (")@fixup\n", asm_out_file);
21081 }
21082 else
21083 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
21084 }
21085
21086 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
21087 static void
21088 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
21089 {
21090 const char *section = ".dtors";
21091 char buf[18];
21092
21093 if (priority != DEFAULT_INIT_PRIORITY)
21094 {
21095 sprintf (buf, ".dtors.%.5u",
21096 /* Invert the numbering so the linker puts us in the proper
21097 order; constructors are run from right to left, and the
21098 linker sorts in increasing order. */
21099 MAX_INIT_PRIORITY - priority);
21100 section = buf;
21101 }
21102
21103 switch_to_section (get_section (section, SECTION_WRITE, NULL));
21104 assemble_align (POINTER_SIZE);
21105
21106 if (DEFAULT_ABI == ABI_V4
21107 && (TARGET_RELOCATABLE || flag_pic > 1))
21108 {
21109 fputs ("\t.long (", asm_out_file);
21110 output_addr_const (asm_out_file, symbol);
21111 fputs (")@fixup\n", asm_out_file);
21112 }
21113 else
21114 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
21115 }
21116
21117 void
21118 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
21119 {
21120 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
21121 {
21122 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
21123 ASM_OUTPUT_LABEL (file, name);
21124 fputs (DOUBLE_INT_ASM_OP, file);
21125 rs6000_output_function_entry (file, name);
21126 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
21127 if (DOT_SYMBOLS)
21128 {
21129 fputs ("\t.size\t", file);
21130 assemble_name (file, name);
21131 fputs (",24\n\t.type\t.", file);
21132 assemble_name (file, name);
21133 fputs (",@function\n", file);
21134 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
21135 {
21136 fputs ("\t.globl\t.", file);
21137 assemble_name (file, name);
21138 putc ('\n', file);
21139 }
21140 }
21141 else
21142 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
21143 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
21144 rs6000_output_function_entry (file, name);
21145 fputs (":\n", file);
21146 return;
21147 }
21148
21149 int uses_toc;
21150 if (DEFAULT_ABI == ABI_V4
21151 && (TARGET_RELOCATABLE || flag_pic > 1)
21152 && !TARGET_SECURE_PLT
21153 && (!constant_pool_empty_p () || crtl->profile)
21154 && (uses_toc = uses_TOC ()))
21155 {
21156 char buf[256];
21157
21158 if (uses_toc == 2)
21159 switch_to_other_text_partition ();
21160 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
21161
21162 fprintf (file, "\t.long ");
21163 assemble_name (file, toc_label_name);
21164 need_toc_init = 1;
21165 putc ('-', file);
21166 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
21167 assemble_name (file, buf);
21168 putc ('\n', file);
21169 if (uses_toc == 2)
21170 switch_to_other_text_partition ();
21171 }
21172
21173 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
21174 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
21175
21176 if (TARGET_CMODEL == CMODEL_LARGE
21177 && rs6000_global_entry_point_prologue_needed_p ())
21178 {
21179 char buf[256];
21180
21181 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
21182
21183 fprintf (file, "\t.quad .TOC.-");
21184 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
21185 assemble_name (file, buf);
21186 putc ('\n', file);
21187 }
21188
21189 if (DEFAULT_ABI == ABI_AIX)
21190 {
21191 const char *desc_name, *orig_name;
21192
21193 orig_name = (*targetm.strip_name_encoding) (name);
21194 desc_name = orig_name;
21195 while (*desc_name == '.')
21196 desc_name++;
21197
21198 if (TREE_PUBLIC (decl))
21199 fprintf (file, "\t.globl %s\n", desc_name);
21200
21201 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
21202 fprintf (file, "%s:\n", desc_name);
21203 fprintf (file, "\t.long %s\n", orig_name);
21204 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
21205 fputs ("\t.long 0\n", file);
21206 fprintf (file, "\t.previous\n");
21207 }
21208 ASM_OUTPUT_LABEL (file, name);
21209 }
21210
21211 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
21212 static void
21213 rs6000_elf_file_end (void)
21214 {
21215 #ifdef HAVE_AS_GNU_ATTRIBUTE
21216 /* ??? The value emitted depends on options active at file end.
21217 Assume anyone using #pragma or attributes that might change
21218 options knows what they are doing. */
21219 if ((TARGET_64BIT || DEFAULT_ABI == ABI_V4)
21220 && rs6000_passes_float)
21221 {
21222 int fp;
21223
21224 if (TARGET_HARD_FLOAT)
21225 fp = 1;
21226 else
21227 fp = 2;
21228 if (rs6000_passes_long_double)
21229 {
21230 if (!TARGET_LONG_DOUBLE_128)
21231 fp |= 2 * 4;
21232 else if (TARGET_IEEEQUAD)
21233 fp |= 3 * 4;
21234 else
21235 fp |= 1 * 4;
21236 }
21237 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n", fp);
21238 }
21239 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
21240 {
21241 if (rs6000_passes_vector)
21242 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
21243 (TARGET_ALTIVEC_ABI ? 2 : 1));
21244 if (rs6000_returns_struct)
21245 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
21246 aix_struct_return ? 2 : 1);
21247 }
21248 #endif
21249 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
21250 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
21251 file_end_indicate_exec_stack ();
21252 #endif
21253
21254 if (flag_split_stack)
21255 file_end_indicate_split_stack ();
21256
21257 if (cpu_builtin_p)
21258 {
21259 /* We have expanded a CPU builtin, so we need to emit a reference to
21260 the special symbol that LIBC uses to declare it supports the
21261 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
21262 switch_to_section (data_section);
21263 fprintf (asm_out_file, "\t.align %u\n", TARGET_32BIT ? 2 : 3);
21264 fprintf (asm_out_file, "\t%s %s\n",
21265 TARGET_32BIT ? ".long" : ".quad", tcb_verification_symbol);
21266 }
21267 }
21268 #endif
21269
21270 #if TARGET_XCOFF
21271
21272 #ifndef HAVE_XCOFF_DWARF_EXTRAS
21273 #define HAVE_XCOFF_DWARF_EXTRAS 0
21274 #endif
21275
21276
21277 /* Names of bss and data sections. These should be unique names for each
21278 compilation unit. */
21279
21280 char *xcoff_bss_section_name;
21281 char *xcoff_private_data_section_name;
21282 char *xcoff_private_rodata_section_name;
21283 char *xcoff_tls_data_section_name;
21284 char *xcoff_read_only_section_name;
21285
21286 static enum unwind_info_type
21287 rs6000_xcoff_debug_unwind_info (void)
21288 {
21289 return UI_NONE;
21290 }
21291
21292 static void
21293 rs6000_xcoff_asm_output_anchor (rtx symbol)
21294 {
21295 char buffer[100];
21296
21297 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
21298 SYMBOL_REF_BLOCK_OFFSET (symbol));
21299 fprintf (asm_out_file, "%s", SET_ASM_OP);
21300 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
21301 fprintf (asm_out_file, ",");
21302 RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
21303 fprintf (asm_out_file, "\n");
21304 }
21305
21306 static void
21307 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
21308 {
21309 fputs (GLOBAL_ASM_OP, stream);
21310 RS6000_OUTPUT_BASENAME (stream, name);
21311 putc ('\n', stream);
21312 }
21313
21314 /* A get_unnamed_decl callback, used for read-only sections. PTR
21315 points to the section string variable. */
21316
21317 static void
21318 rs6000_xcoff_output_readonly_section_asm_op (const char *directive)
21319 {
21320 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
21321 directive
21322 ? xcoff_private_rodata_section_name
21323 : xcoff_read_only_section_name,
21324 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
21325 }
21326
21327 /* Likewise for read-write sections. */
21328
21329 static void
21330 rs6000_xcoff_output_readwrite_section_asm_op (const char *)
21331 {
21332 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
21333 xcoff_private_data_section_name,
21334 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
21335 }
21336
21337 static void
21338 rs6000_xcoff_output_tls_section_asm_op (const char *directive)
21339 {
21340 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
21341 directive
21342 ? xcoff_private_data_section_name
21343 : xcoff_tls_data_section_name,
21344 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
21345 }
21346
21347 /* A get_unnamed_section callback, used for switching to toc_section. */
21348
21349 static void
21350 rs6000_xcoff_output_toc_section_asm_op (const char *data ATTRIBUTE_UNUSED)
21351 {
21352 if (TARGET_MINIMAL_TOC)
21353 {
21354 /* toc_section is always selected at least once from
21355 rs6000_xcoff_file_start, so this is guaranteed to
21356 always be defined once and only once in each file. */
21357 if (!toc_initialized)
21358 {
21359 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
21360 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
21361 toc_initialized = 1;
21362 }
21363 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
21364 (TARGET_32BIT ? "" : ",3"));
21365 }
21366 else
21367 fputs ("\t.toc\n", asm_out_file);
21368 }
21369
21370 /* Implement TARGET_ASM_INIT_SECTIONS. */
21371
21372 static void
21373 rs6000_xcoff_asm_init_sections (void)
21374 {
21375 read_only_data_section
21376 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
21377 NULL);
21378
21379 private_data_section
21380 = get_unnamed_section (SECTION_WRITE,
21381 rs6000_xcoff_output_readwrite_section_asm_op,
21382 NULL);
21383
21384 read_only_private_data_section
21385 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
21386 "");
21387
21388 tls_data_section
21389 = get_unnamed_section (SECTION_TLS,
21390 rs6000_xcoff_output_tls_section_asm_op,
21391 NULL);
21392
21393 tls_private_data_section
21394 = get_unnamed_section (SECTION_TLS,
21395 rs6000_xcoff_output_tls_section_asm_op,
21396 "");
21397
21398 toc_section
21399 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
21400
21401 readonly_data_section = read_only_data_section;
21402 }
21403
21404 static int
21405 rs6000_xcoff_reloc_rw_mask (void)
21406 {
21407 return 3;
21408 }
21409
21410 static void
21411 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
21412 tree decl ATTRIBUTE_UNUSED)
21413 {
21414 int smclass;
21415 static const char * const suffix[7]
21416 = { "PR", "RO", "RW", "BS", "TL", "UL", "XO" };
21417
21418 if (flags & SECTION_EXCLUDE)
21419 smclass = 6;
21420 else if (flags & SECTION_DEBUG)
21421 {
21422 fprintf (asm_out_file, "\t.dwsect %s\n", name);
21423 return;
21424 }
21425 else if (flags & SECTION_CODE)
21426 smclass = 0;
21427 else if (flags & SECTION_TLS)
21428 {
21429 if (flags & SECTION_BSS)
21430 smclass = 5;
21431 else
21432 smclass = 4;
21433 }
21434 else if (flags & SECTION_WRITE)
21435 {
21436 if (flags & SECTION_BSS)
21437 smclass = 3;
21438 else
21439 smclass = 2;
21440 }
21441 else
21442 smclass = 1;
21443
21444 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
21445 (flags & SECTION_CODE) ? "." : "",
21446 name, suffix[smclass], flags & SECTION_ENTSIZE);
21447 }
21448
21449 #define IN_NAMED_SECTION(DECL) \
21450 ((TREE_CODE (DECL) == FUNCTION_DECL || VAR_P (DECL)) \
21451 && DECL_SECTION_NAME (DECL) != NULL)
21452
21453 static section *
21454 rs6000_xcoff_select_section (tree decl, int reloc,
21455 unsigned HOST_WIDE_INT align)
21456 {
21457 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
21458 named section. */
21459 if (align > BIGGEST_ALIGNMENT && VAR_OR_FUNCTION_DECL_P (decl))
21460 {
21461 resolve_unique_section (decl, reloc, true);
21462 if (IN_NAMED_SECTION (decl))
21463 return get_named_section (decl, NULL, reloc);
21464 }
21465
21466 if (decl_readonly_section (decl, reloc))
21467 {
21468 if (TREE_PUBLIC (decl))
21469 return read_only_data_section;
21470 else
21471 return read_only_private_data_section;
21472 }
21473 else
21474 {
21475 #if HAVE_AS_TLS
21476 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
21477 {
21478 if (bss_initializer_p (decl))
21479 return tls_comm_section;
21480 else if (TREE_PUBLIC (decl))
21481 return tls_data_section;
21482 else
21483 return tls_private_data_section;
21484 }
21485 else
21486 #endif
21487 if (TREE_PUBLIC (decl))
21488 return data_section;
21489 else
21490 return private_data_section;
21491 }
21492 }
21493
21494 static void
21495 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
21496 {
21497 const char *name;
21498
21499 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
21500 name = (*targetm.strip_name_encoding) (name);
21501 set_decl_section_name (decl, name);
21502 }
21503
21504 /* Select section for constant in constant pool.
21505
21506 On RS/6000, all constants are in the private read-only data area.
21507 However, if this is being placed in the TOC it must be output as a
21508 toc entry. */
21509
21510 static section *
21511 rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
21512 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
21513 {
21514 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
21515 return toc_section;
21516 else
21517 return read_only_private_data_section;
21518 }
21519
21520 /* Remove any trailing [DS] or the like from the symbol name. */
21521
21522 static const char *
21523 rs6000_xcoff_strip_name_encoding (const char *name)
21524 {
21525 size_t len;
21526 if (*name == '*')
21527 name++;
21528 len = strlen (name);
21529 if (name[len - 1] == ']')
21530 return ggc_alloc_string (name, len - 4);
21531 else
21532 return name;
21533 }
21534
21535 /* Section attributes. AIX is always PIC. */
21536
21537 static unsigned int
21538 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
21539 {
21540 unsigned int align;
21541 unsigned int flags = default_section_type_flags (decl, name, reloc);
21542
21543 if (decl && DECL_P (decl) && VAR_P (decl) && bss_initializer_p (decl))
21544 flags |= SECTION_BSS;
21545
21546 /* Align to at least UNIT size. */
21547 if (!decl || !DECL_P (decl))
21548 align = MIN_UNITS_PER_WORD;
21549 /* Align code CSECT to at least 32 bytes. */
21550 else if ((flags & SECTION_CODE) != 0)
21551 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT), 32);
21552 else
21553 /* Increase alignment of large objects if not already stricter. */
21554 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
21555 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
21556 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
21557
21558 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
21559 }
21560
21561 /* Output at beginning of assembler file.
21562
21563 Initialize the section names for the RS/6000 at this point.
21564
21565 Specify filename, including full path, to assembler.
21566
21567 We want to go into the TOC section so at least one .toc will be emitted.
21568 Also, in order to output proper .bs/.es pairs, we need at least one static
21569 [RW] section emitted.
21570
21571 Finally, declare mcount when profiling to make the assembler happy. */
21572
21573 static void
21574 rs6000_xcoff_file_start (void)
21575 {
21576 rs6000_gen_section_name (&xcoff_bss_section_name,
21577 main_input_filename, ".bss_");
21578 rs6000_gen_section_name (&xcoff_private_data_section_name,
21579 main_input_filename, ".rw_");
21580 rs6000_gen_section_name (&xcoff_private_rodata_section_name,
21581 main_input_filename, ".rop_");
21582 rs6000_gen_section_name (&xcoff_read_only_section_name,
21583 main_input_filename, ".ro_");
21584 rs6000_gen_section_name (&xcoff_tls_data_section_name,
21585 main_input_filename, ".tls_");
21586
21587 fputs ("\t.file\t", asm_out_file);
21588 output_quoted_string (asm_out_file, main_input_filename);
21589 fputc ('\n', asm_out_file);
21590 if (write_symbols != NO_DEBUG)
21591 switch_to_section (private_data_section);
21592 switch_to_section (toc_section);
21593 switch_to_section (text_section);
21594 if (profile_flag)
21595 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
21596 rs6000_file_start ();
21597 }
21598
21599 /* Output at end of assembler file.
21600 On the RS/6000, referencing data should automatically pull in text. */
21601
21602 static void
21603 rs6000_xcoff_file_end (void)
21604 {
21605 switch_to_section (text_section);
21606 if (xcoff_tls_exec_model_detected)
21607 {
21608 /* Add a .ref to __tls_get_addr to force libpthread dependency. */
21609 fputs ("\t.extern __tls_get_addr\n\t.ref __tls_get_addr\n", asm_out_file);
21610 }
21611 fputs ("_section_.text:\n", asm_out_file);
21612 switch_to_section (data_section);
21613 fputs (TARGET_32BIT
21614 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
21615 asm_out_file);
21616
21617 }
21618
21619 struct declare_alias_data
21620 {
21621 FILE *file;
21622 bool function_descriptor;
21623 };
21624
21625 /* Declare alias N. A helper function for for_node_and_aliases. */
21626
21627 static bool
21628 rs6000_declare_alias (struct symtab_node *n, void *d)
21629 {
21630 struct declare_alias_data *data = (struct declare_alias_data *)d;
21631 /* Main symbol is output specially, because varasm machinery does part of
21632 the job for us - we do not need to declare .globl/lglobs and such. */
21633 if (!n->alias || n->weakref)
21634 return false;
21635
21636 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
21637 return false;
21638
21639 /* Prevent assemble_alias from trying to use .set pseudo operation
21640 that does not behave as expected by the middle-end. */
21641 TREE_ASM_WRITTEN (n->decl) = true;
21642
21643 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
21644 char *buffer = (char *) alloca (strlen (name) + 2);
21645 char *p;
21646 int dollar_inside = 0;
21647
21648 strcpy (buffer, name);
21649 p = strchr (buffer, '$');
21650 while (p) {
21651 *p = '_';
21652 dollar_inside++;
21653 p = strchr (p + 1, '$');
21654 }
21655 if (TREE_PUBLIC (n->decl))
21656 {
21657 if (!RS6000_WEAK || !DECL_WEAK (n->decl))
21658 {
21659 if (dollar_inside) {
21660 if (data->function_descriptor)
21661 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
21662 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
21663 }
21664 if (data->function_descriptor)
21665 {
21666 fputs ("\t.globl .", data->file);
21667 RS6000_OUTPUT_BASENAME (data->file, buffer);
21668 putc ('\n', data->file);
21669 }
21670 fputs ("\t.globl ", data->file);
21671 assemble_name (data->file, buffer);
21672 putc ('\n', data->file);
21673 }
21674 #ifdef ASM_WEAKEN_DECL
21675 else if (DECL_WEAK (n->decl) && !data->function_descriptor)
21676 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
21677 #endif
21678 }
21679 else
21680 {
21681 if (dollar_inside)
21682 {
21683 if (data->function_descriptor)
21684 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
21685 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
21686 }
21687 if (data->function_descriptor)
21688 {
21689 fputs ("\t.lglobl .", data->file);
21690 RS6000_OUTPUT_BASENAME (data->file, buffer);
21691 putc ('\n', data->file);
21692 }
21693 fputs ("\t.lglobl ", data->file);
21694 assemble_name (data->file, buffer);
21695 putc ('\n', data->file);
21696 }
21697 if (data->function_descriptor)
21698 putc ('.', data->file);
21699 ASM_OUTPUT_LABEL (data->file, buffer);
21700 return false;
21701 }
21702
21703
21704 #ifdef HAVE_GAS_HIDDEN
21705 /* Helper function to calculate visibility of a DECL
21706 and return the value as a const string. */
21707
21708 static const char *
21709 rs6000_xcoff_visibility (tree decl)
21710 {
21711 static const char * const visibility_types[] = {
21712 "", ",protected", ",hidden", ",internal"
21713 };
21714
21715 enum symbol_visibility vis = DECL_VISIBILITY (decl);
21716 return visibility_types[vis];
21717 }
21718 #endif
21719
21720
21721 /* This macro produces the initial definition of a function name.
21722 On the RS/6000, we need to place an extra '.' in the function name and
21723 output the function descriptor.
21724 Dollar signs are converted to underscores.
21725
21726 The csect for the function will have already been created when
21727 text_section was selected. We do have to go back to that csect, however.
21728
21729 The third and fourth parameters to the .function pseudo-op (16 and 044)
21730 are placeholders which no longer have any use.
21731
21732 Because AIX assembler's .set command has unexpected semantics, we output
21733 all aliases as alternative labels in front of the definition. */
21734
21735 void
21736 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
21737 {
21738 char *buffer = (char *) alloca (strlen (name) + 1);
21739 char *p;
21740 int dollar_inside = 0;
21741 struct declare_alias_data data = {file, false};
21742
21743 strcpy (buffer, name);
21744 p = strchr (buffer, '$');
21745 while (p) {
21746 *p = '_';
21747 dollar_inside++;
21748 p = strchr (p + 1, '$');
21749 }
21750 if (TREE_PUBLIC (decl))
21751 {
21752 if (!RS6000_WEAK || !DECL_WEAK (decl))
21753 {
21754 if (dollar_inside) {
21755 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
21756 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
21757 }
21758 fputs ("\t.globl .", file);
21759 RS6000_OUTPUT_BASENAME (file, buffer);
21760 #ifdef HAVE_GAS_HIDDEN
21761 fputs (rs6000_xcoff_visibility (decl), file);
21762 #endif
21763 putc ('\n', file);
21764 }
21765 }
21766 else
21767 {
21768 if (dollar_inside) {
21769 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
21770 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
21771 }
21772 fputs ("\t.lglobl .", file);
21773 RS6000_OUTPUT_BASENAME (file, buffer);
21774 putc ('\n', file);
21775 }
21776
21777 fputs ("\t.csect ", file);
21778 assemble_name (file, buffer);
21779 fputs (TARGET_32BIT ? "\n" : ",3\n", file);
21780
21781 ASM_OUTPUT_LABEL (file, buffer);
21782
21783 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
21784 &data, true);
21785 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
21786 RS6000_OUTPUT_BASENAME (file, buffer);
21787 fputs (", TOC[tc0], 0\n", file);
21788
21789 in_section = NULL;
21790 switch_to_section (function_section (decl));
21791 putc ('.', file);
21792 ASM_OUTPUT_LABEL (file, buffer);
21793
21794 data.function_descriptor = true;
21795 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
21796 &data, true);
21797 if (!DECL_IGNORED_P (decl))
21798 {
21799 if (dwarf_debuginfo_p ())
21800 {
21801 name = (*targetm.strip_name_encoding) (name);
21802 fprintf (file, "\t.function .%s,.%s,2,0\n", name, name);
21803 }
21804 }
21805 return;
21806 }
21807
21808
21809 /* Output assembly language to globalize a symbol from a DECL,
21810 possibly with visibility. */
21811
21812 void
21813 rs6000_xcoff_asm_globalize_decl_name (FILE *stream, tree decl)
21814 {
21815 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
21816 fputs (GLOBAL_ASM_OP, stream);
21817 assemble_name (stream, name);
21818 #ifdef HAVE_GAS_HIDDEN
21819 fputs (rs6000_xcoff_visibility (decl), stream);
21820 #endif
21821 putc ('\n', stream);
21822 }
21823
21824 /* Output assembly language to define a symbol as COMMON from a DECL,
21825 possibly with visibility. */
21826
21827 void
21828 rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream,
21829 tree decl ATTRIBUTE_UNUSED,
21830 const char *name,
21831 unsigned HOST_WIDE_INT size,
21832 unsigned int align)
21833 {
21834 unsigned int align2 = 2;
21835
21836 if (align == 0)
21837 align = DATA_ABI_ALIGNMENT (TREE_TYPE (decl), DECL_ALIGN (decl));
21838
21839 if (align > 32)
21840 align2 = floor_log2 (align / BITS_PER_UNIT);
21841 else if (size > 4)
21842 align2 = 3;
21843
21844 if (! DECL_COMMON (decl))
21845 {
21846 /* Forget section. */
21847 in_section = NULL;
21848
21849 /* Globalize TLS BSS. */
21850 if (TREE_PUBLIC (decl) && DECL_THREAD_LOCAL_P (decl))
21851 {
21852 fputs (GLOBAL_ASM_OP, stream);
21853 assemble_name (stream, name);
21854 fputc ('\n', stream);
21855 }
21856
21857 /* Switch to section and skip space. */
21858 fputs ("\t.csect ", stream);
21859 assemble_name (stream, name);
21860 fprintf (stream, ",%u\n", align2);
21861 ASM_DECLARE_OBJECT_NAME (stream, name, decl);
21862 ASM_OUTPUT_SKIP (stream, size ? size : 1);
21863 return;
21864 }
21865
21866 if (TREE_PUBLIC (decl))
21867 {
21868 fprintf (stream,
21869 "\t.comm %s," HOST_WIDE_INT_PRINT_UNSIGNED ",%u" ,
21870 name, size, align2);
21871
21872 #ifdef HAVE_GAS_HIDDEN
21873 if (decl != NULL)
21874 fputs (rs6000_xcoff_visibility (decl), stream);
21875 #endif
21876 putc ('\n', stream);
21877 }
21878 else
21879 fprintf (stream,
21880 "\t.lcomm %s," HOST_WIDE_INT_PRINT_UNSIGNED ",%s,%u\n",
21881 (*targetm.strip_name_encoding) (name), size, name, align2);
21882 }
21883
21884 /* This macro produces the initial definition of a object (variable) name.
21885 Because AIX assembler's .set command has unexpected semantics, we output
21886 all aliases as alternative labels in front of the definition. */
21887
21888 void
21889 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
21890 {
21891 struct declare_alias_data data = {file, false};
21892 ASM_OUTPUT_LABEL (file, name);
21893 symtab_node::get_create (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
21894 &data, true);
21895 }
21896
21897 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
21898
21899 void
21900 rs6000_asm_output_dwarf_pcrel (FILE *file, int size, const char *label)
21901 {
21902 fputs (integer_asm_op (size, FALSE), file);
21903 assemble_name (file, label);
21904 fputs ("-$", file);
21905 }
21906
21907 /* Output a symbol offset relative to the dbase for the current object.
21908 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
21909 signed offsets.
21910
21911 __gcc_unwind_dbase is embedded in all executables/libraries through
21912 libgcc/config/rs6000/crtdbase.S. */
21913
21914 void
21915 rs6000_asm_output_dwarf_datarel (FILE *file, int size, const char *label)
21916 {
21917 fputs (integer_asm_op (size, FALSE), file);
21918 assemble_name (file, label);
21919 fputs("-__gcc_unwind_dbase", file);
21920 }
21921
21922 #ifdef HAVE_AS_TLS
21923 static void
21924 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
21925 {
21926 rtx symbol;
21927 int flags;
21928 const char *symname;
21929
21930 default_encode_section_info (decl, rtl, first);
21931
21932 /* Careful not to prod global register variables. */
21933 if (!MEM_P (rtl))
21934 return;
21935 symbol = XEXP (rtl, 0);
21936 if (!SYMBOL_REF_P (symbol))
21937 return;
21938
21939 flags = SYMBOL_REF_FLAGS (symbol);
21940
21941 if (VAR_P (decl) && DECL_THREAD_LOCAL_P (decl))
21942 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
21943
21944 SYMBOL_REF_FLAGS (symbol) = flags;
21945
21946 symname = XSTR (symbol, 0);
21947
21948 /* Append CSECT mapping class, unless the symbol already is qualified.
21949 Aliases are implemented as labels, so the symbol name should not add
21950 a mapping class. */
21951 if (decl
21952 && DECL_P (decl)
21953 && VAR_OR_FUNCTION_DECL_P (decl)
21954 && (symtab_node::get (decl) == NULL
21955 || symtab_node::get (decl)->alias == 0)
21956 && symname[strlen (symname) - 1] != ']')
21957 {
21958 const char *smclass = NULL;
21959
21960 if (TREE_CODE (decl) == FUNCTION_DECL)
21961 smclass = "[DS]";
21962 else if (DECL_THREAD_LOCAL_P (decl))
21963 {
21964 if (bss_initializer_p (decl))
21965 smclass = "[UL]";
21966 else if (flag_data_sections)
21967 smclass = "[TL]";
21968 }
21969 else if (DECL_EXTERNAL (decl))
21970 smclass = "[UA]";
21971 else if (bss_initializer_p (decl))
21972 smclass = "[BS]";
21973 else if (flag_data_sections)
21974 {
21975 /* This must exactly match the logic of select section. */
21976 if (decl_readonly_section (decl, compute_reloc_for_var (decl)))
21977 smclass = "[RO]";
21978 else
21979 smclass = "[RW]";
21980 }
21981
21982 if (smclass != NULL)
21983 {
21984 char *newname = XALLOCAVEC (char, strlen (symname) + 5);
21985
21986 strcpy (newname, symname);
21987 strcat (newname, smclass);
21988 XSTR (symbol, 0) = ggc_strdup (newname);
21989 }
21990 }
21991 }
21992 #endif /* HAVE_AS_TLS */
21993 #endif /* TARGET_XCOFF */
21994
21995 void
21996 rs6000_asm_weaken_decl (FILE *stream, tree decl,
21997 const char *name, const char *val)
21998 {
21999 fputs ("\t.weak\t", stream);
22000 assemble_name (stream, name);
22001 if (decl && TREE_CODE (decl) == FUNCTION_DECL
22002 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
22003 {
22004 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
22005 if (TARGET_XCOFF)
22006 fputs (rs6000_xcoff_visibility (decl), stream);
22007 #endif
22008 fputs ("\n\t.weak\t.", stream);
22009 RS6000_OUTPUT_BASENAME (stream, name);
22010 }
22011 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
22012 if (TARGET_XCOFF)
22013 fputs (rs6000_xcoff_visibility (decl), stream);
22014 #endif
22015 fputc ('\n', stream);
22016
22017 if (val)
22018 {
22019 #ifdef ASM_OUTPUT_DEF
22020 ASM_OUTPUT_DEF (stream, name, val);
22021 #endif
22022 if (decl && TREE_CODE (decl) == FUNCTION_DECL
22023 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
22024 {
22025 fputs ("\t.set\t.", stream);
22026 RS6000_OUTPUT_BASENAME (stream, name);
22027 fputs (",.", stream);
22028 RS6000_OUTPUT_BASENAME (stream, val);
22029 fputc ('\n', stream);
22030 }
22031 }
22032 }
22033
22034
22035 /* Return true if INSN should not be copied. */
22036
22037 static bool
22038 rs6000_cannot_copy_insn_p (rtx_insn *insn)
22039 {
22040 return recog_memoized (insn) >= 0
22041 && get_attr_cannot_copy (insn);
22042 }
22043
22044 /* Compute a (partial) cost for rtx X. Return true if the complete
22045 cost has been computed, and false if subexpressions should be
22046 scanned. In either case, *TOTAL contains the cost result. */
22047
22048 static bool
22049 rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
22050 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
22051 {
22052 int code = GET_CODE (x);
22053
22054 switch (code)
22055 {
22056 /* On the RS/6000, if it is valid in the insn, it is free. */
22057 case CONST_INT:
22058 if (((outer_code == SET
22059 || outer_code == PLUS
22060 || outer_code == MINUS)
22061 && (satisfies_constraint_I (x)
22062 || satisfies_constraint_L (x)))
22063 || (outer_code == AND
22064 && (satisfies_constraint_K (x)
22065 || (mode == SImode
22066 ? satisfies_constraint_L (x)
22067 : satisfies_constraint_J (x))))
22068 || ((outer_code == IOR || outer_code == XOR)
22069 && (satisfies_constraint_K (x)
22070 || (mode == SImode
22071 ? satisfies_constraint_L (x)
22072 : satisfies_constraint_J (x))))
22073 || outer_code == ASHIFT
22074 || outer_code == ASHIFTRT
22075 || outer_code == LSHIFTRT
22076 || outer_code == ROTATE
22077 || outer_code == ROTATERT
22078 || outer_code == ZERO_EXTRACT
22079 || (outer_code == MULT
22080 && satisfies_constraint_I (x))
22081 || ((outer_code == DIV || outer_code == UDIV
22082 || outer_code == MOD || outer_code == UMOD)
22083 && exact_log2 (INTVAL (x)) >= 0)
22084 || (outer_code == COMPARE
22085 && (satisfies_constraint_I (x)
22086 || satisfies_constraint_K (x)))
22087 || ((outer_code == EQ || outer_code == NE)
22088 && (satisfies_constraint_I (x)
22089 || satisfies_constraint_K (x)
22090 || (mode == SImode
22091 ? satisfies_constraint_L (x)
22092 : satisfies_constraint_J (x))))
22093 || (outer_code == GTU
22094 && satisfies_constraint_I (x))
22095 || (outer_code == LTU
22096 && satisfies_constraint_P (x)))
22097 {
22098 *total = 0;
22099 return true;
22100 }
22101 else if ((outer_code == PLUS
22102 && reg_or_add_cint_operand (x, mode))
22103 || (outer_code == MINUS
22104 && reg_or_sub_cint_operand (x, mode))
22105 || ((outer_code == SET
22106 || outer_code == IOR
22107 || outer_code == XOR)
22108 && (INTVAL (x)
22109 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
22110 {
22111 *total = COSTS_N_INSNS (1);
22112 return true;
22113 }
22114 /* FALLTHRU */
22115
22116 case CONST_DOUBLE:
22117 case CONST_WIDE_INT:
22118 case CONST:
22119 case HIGH:
22120 case SYMBOL_REF:
22121 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
22122 return true;
22123
22124 case MEM:
22125 /* When optimizing for size, MEM should be slightly more expensive
22126 than generating address, e.g., (plus (reg) (const)).
22127 L1 cache latency is about two instructions. */
22128 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
22129 if (rs6000_slow_unaligned_access (mode, MEM_ALIGN (x)))
22130 *total += COSTS_N_INSNS (100);
22131 return true;
22132
22133 case LABEL_REF:
22134 *total = 0;
22135 return true;
22136
22137 case PLUS:
22138 case MINUS:
22139 if (FLOAT_MODE_P (mode))
22140 *total = rs6000_cost->fp;
22141 else
22142 *total = COSTS_N_INSNS (1);
22143 return false;
22144
22145 case MULT:
22146 if (CONST_INT_P (XEXP (x, 1))
22147 && satisfies_constraint_I (XEXP (x, 1)))
22148 {
22149 if (INTVAL (XEXP (x, 1)) >= -256
22150 && INTVAL (XEXP (x, 1)) <= 255)
22151 *total = rs6000_cost->mulsi_const9;
22152 else
22153 *total = rs6000_cost->mulsi_const;
22154 }
22155 else if (mode == SFmode)
22156 *total = rs6000_cost->fp;
22157 else if (FLOAT_MODE_P (mode))
22158 *total = rs6000_cost->dmul;
22159 else if (mode == DImode)
22160 *total = rs6000_cost->muldi;
22161 else
22162 *total = rs6000_cost->mulsi;
22163 return false;
22164
22165 case FMA:
22166 if (mode == SFmode)
22167 *total = rs6000_cost->fp;
22168 else
22169 *total = rs6000_cost->dmul;
22170 break;
22171
22172 case DIV:
22173 case MOD:
22174 if (FLOAT_MODE_P (mode))
22175 {
22176 *total = mode == DFmode ? rs6000_cost->ddiv
22177 : rs6000_cost->sdiv;
22178 return false;
22179 }
22180 /* FALLTHRU */
22181
22182 case UDIV:
22183 case UMOD:
22184 if (CONST_INT_P (XEXP (x, 1))
22185 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
22186 {
22187 if (code == DIV || code == MOD)
22188 /* Shift, addze */
22189 *total = COSTS_N_INSNS (2);
22190 else
22191 /* Shift */
22192 *total = COSTS_N_INSNS (1);
22193 }
22194 else
22195 {
22196 if (GET_MODE (XEXP (x, 1)) == DImode)
22197 *total = rs6000_cost->divdi;
22198 else
22199 *total = rs6000_cost->divsi;
22200 }
22201 /* Add in shift and subtract for MOD unless we have a mod instruction. */
22202 if ((!TARGET_MODULO
22203 || (RS6000_DISABLE_SCALAR_MODULO && SCALAR_INT_MODE_P (mode)))
22204 && (code == MOD || code == UMOD))
22205 *total += COSTS_N_INSNS (2);
22206 return false;
22207
22208 case CTZ:
22209 *total = COSTS_N_INSNS (TARGET_CTZ ? 1 : 4);
22210 return false;
22211
22212 case FFS:
22213 *total = COSTS_N_INSNS (4);
22214 return false;
22215
22216 case POPCOUNT:
22217 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
22218 return false;
22219
22220 case PARITY:
22221 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
22222 return false;
22223
22224 case NOT:
22225 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
22226 *total = 0;
22227 else
22228 *total = COSTS_N_INSNS (1);
22229 return false;
22230
22231 case AND:
22232 if (CONST_INT_P (XEXP (x, 1)))
22233 {
22234 rtx left = XEXP (x, 0);
22235 rtx_code left_code = GET_CODE (left);
22236
22237 /* rotate-and-mask: 1 insn. */
22238 if ((left_code == ROTATE
22239 || left_code == ASHIFT
22240 || left_code == LSHIFTRT)
22241 && rs6000_is_valid_shift_mask (XEXP (x, 1), left, mode))
22242 {
22243 *total = rtx_cost (XEXP (left, 0), mode, left_code, 0, speed);
22244 if (!CONST_INT_P (XEXP (left, 1)))
22245 *total += rtx_cost (XEXP (left, 1), SImode, left_code, 1, speed);
22246 *total += COSTS_N_INSNS (1);
22247 return true;
22248 }
22249
22250 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
22251 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
22252 if (rs6000_is_valid_and_mask (XEXP (x, 1), mode)
22253 || (val & 0xffff) == val
22254 || (val & 0xffff0000) == val
22255 || ((val & 0xffff) == 0 && mode == SImode))
22256 {
22257 *total = rtx_cost (left, mode, AND, 0, speed);
22258 *total += COSTS_N_INSNS (1);
22259 return true;
22260 }
22261
22262 /* 2 insns. */
22263 if (rs6000_is_valid_2insn_and (XEXP (x, 1), mode))
22264 {
22265 *total = rtx_cost (left, mode, AND, 0, speed);
22266 *total += COSTS_N_INSNS (2);
22267 return true;
22268 }
22269 }
22270
22271 *total = COSTS_N_INSNS (1);
22272 return false;
22273
22274 case IOR:
22275 /* FIXME */
22276 *total = COSTS_N_INSNS (1);
22277 return true;
22278
22279 case CLZ:
22280 case XOR:
22281 case ZERO_EXTRACT:
22282 *total = COSTS_N_INSNS (1);
22283 return false;
22284
22285 case ASHIFT:
22286 /* The EXTSWSLI instruction is a combined instruction. Don't count both
22287 the sign extend and shift separately within the insn. */
22288 if (TARGET_EXTSWSLI && mode == DImode
22289 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
22290 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode)
22291 {
22292 *total = 0;
22293 return false;
22294 }
22295 /* fall through */
22296
22297 case ASHIFTRT:
22298 case LSHIFTRT:
22299 case ROTATE:
22300 case ROTATERT:
22301 /* Handle mul_highpart. */
22302 if (outer_code == TRUNCATE
22303 && GET_CODE (XEXP (x, 0)) == MULT)
22304 {
22305 if (mode == DImode)
22306 *total = rs6000_cost->muldi;
22307 else
22308 *total = rs6000_cost->mulsi;
22309 return true;
22310 }
22311 else if (outer_code == AND)
22312 *total = 0;
22313 else
22314 *total = COSTS_N_INSNS (1);
22315 return false;
22316
22317 case SIGN_EXTEND:
22318 case ZERO_EXTEND:
22319 if (MEM_P (XEXP (x, 0)))
22320 *total = 0;
22321 else
22322 *total = COSTS_N_INSNS (1);
22323 return false;
22324
22325 case COMPARE:
22326 case NEG:
22327 case ABS:
22328 if (!FLOAT_MODE_P (mode))
22329 {
22330 *total = COSTS_N_INSNS (1);
22331 return false;
22332 }
22333 /* FALLTHRU */
22334
22335 case FLOAT:
22336 case UNSIGNED_FLOAT:
22337 case FIX:
22338 case UNSIGNED_FIX:
22339 case FLOAT_TRUNCATE:
22340 *total = rs6000_cost->fp;
22341 return false;
22342
22343 case FLOAT_EXTEND:
22344 if (mode == DFmode)
22345 *total = rs6000_cost->sfdf_convert;
22346 else
22347 *total = rs6000_cost->fp;
22348 return false;
22349
22350 case CALL:
22351 case IF_THEN_ELSE:
22352 if (!speed)
22353 {
22354 *total = COSTS_N_INSNS (1);
22355 return true;
22356 }
22357 else if (FLOAT_MODE_P (mode) && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT)
22358 {
22359 *total = rs6000_cost->fp;
22360 return false;
22361 }
22362 break;
22363
22364 case NE:
22365 case EQ:
22366 case GTU:
22367 case LTU:
22368 /* Carry bit requires mode == Pmode.
22369 NEG or PLUS already counted so only add one. */
22370 if (mode == Pmode
22371 && (outer_code == NEG || outer_code == PLUS))
22372 {
22373 *total = COSTS_N_INSNS (1);
22374 return true;
22375 }
22376 /* FALLTHRU */
22377
22378 case GT:
22379 case LT:
22380 case UNORDERED:
22381 if (outer_code == SET)
22382 {
22383 if (XEXP (x, 1) == const0_rtx)
22384 {
22385 *total = COSTS_N_INSNS (2);
22386 return true;
22387 }
22388 else
22389 {
22390 *total = COSTS_N_INSNS (3);
22391 return false;
22392 }
22393 }
22394 /* CC COMPARE. */
22395 if (outer_code == COMPARE)
22396 {
22397 *total = 0;
22398 return true;
22399 }
22400 break;
22401
22402 case UNSPEC:
22403 if (XINT (x, 1) == UNSPECV_MMA_XXSETACCZ)
22404 {
22405 *total = 0;
22406 return true;
22407 }
22408 break;
22409
22410 default:
22411 break;
22412 }
22413
22414 return false;
22415 }
22416
22417 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
22418
22419 static bool
22420 rs6000_debug_rtx_costs (rtx x, machine_mode mode, int outer_code,
22421 int opno, int *total, bool speed)
22422 {
22423 bool ret = rs6000_rtx_costs (x, mode, outer_code, opno, total, speed);
22424
22425 fprintf (stderr,
22426 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
22427 "opno = %d, total = %d, speed = %s, x:\n",
22428 ret ? "complete" : "scan inner",
22429 GET_MODE_NAME (mode),
22430 GET_RTX_NAME (outer_code),
22431 opno,
22432 *total,
22433 speed ? "true" : "false");
22434
22435 debug_rtx (x);
22436
22437 return ret;
22438 }
22439
22440 static int
22441 rs6000_insn_cost (rtx_insn *insn, bool speed)
22442 {
22443 if (recog_memoized (insn) < 0)
22444 return 0;
22445
22446 /* If we are optimizing for size, just use the length. */
22447 if (!speed)
22448 return get_attr_length (insn);
22449
22450 /* Use the cost if provided. */
22451 int cost = get_attr_cost (insn);
22452 if (cost > 0)
22453 return cost;
22454
22455 /* If the insn tells us how many insns there are, use that. Otherwise use
22456 the length/4. Adjust the insn length to remove the extra size that
22457 prefixed instructions take. */
22458 int n = get_attr_num_insns (insn);
22459 if (n == 0)
22460 {
22461 int length = get_attr_length (insn);
22462 if (get_attr_prefixed (insn) == PREFIXED_YES)
22463 {
22464 int adjust = 0;
22465 ADJUST_INSN_LENGTH (insn, adjust);
22466 length -= adjust;
22467 }
22468
22469 n = length / 4;
22470 }
22471
22472 enum attr_type type = get_attr_type (insn);
22473
22474 switch (type)
22475 {
22476 case TYPE_LOAD:
22477 case TYPE_FPLOAD:
22478 case TYPE_VECLOAD:
22479 cost = COSTS_N_INSNS (n + 1);
22480 break;
22481
22482 case TYPE_MUL:
22483 switch (get_attr_size (insn))
22484 {
22485 case SIZE_8:
22486 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const9;
22487 break;
22488 case SIZE_16:
22489 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const;
22490 break;
22491 case SIZE_32:
22492 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi;
22493 break;
22494 case SIZE_64:
22495 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->muldi;
22496 break;
22497 default:
22498 gcc_unreachable ();
22499 }
22500 break;
22501 case TYPE_DIV:
22502 switch (get_attr_size (insn))
22503 {
22504 case SIZE_32:
22505 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divsi;
22506 break;
22507 case SIZE_64:
22508 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divdi;
22509 break;
22510 default:
22511 gcc_unreachable ();
22512 }
22513 break;
22514
22515 case TYPE_FP:
22516 cost = n * rs6000_cost->fp;
22517 break;
22518 case TYPE_DMUL:
22519 cost = n * rs6000_cost->dmul;
22520 break;
22521 case TYPE_SDIV:
22522 cost = n * rs6000_cost->sdiv;
22523 break;
22524 case TYPE_DDIV:
22525 cost = n * rs6000_cost->ddiv;
22526 break;
22527
22528 case TYPE_SYNC:
22529 case TYPE_LOAD_L:
22530 case TYPE_MFCR:
22531 case TYPE_MFCRF:
22532 cost = COSTS_N_INSNS (n + 2);
22533 break;
22534
22535 default:
22536 cost = COSTS_N_INSNS (n);
22537 }
22538
22539 return cost;
22540 }
22541
22542 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
22543
22544 static int
22545 rs6000_debug_address_cost (rtx x, machine_mode mode,
22546 addr_space_t as, bool speed)
22547 {
22548 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
22549
22550 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
22551 ret, speed ? "true" : "false");
22552 debug_rtx (x);
22553
22554 return ret;
22555 }
22556
22557
22558 /* A C expression returning the cost of moving data from a register of class
22559 CLASS1 to one of CLASS2. */
22560
22561 static int
22562 rs6000_register_move_cost (machine_mode mode,
22563 reg_class_t from, reg_class_t to)
22564 {
22565 int ret;
22566 reg_class_t rclass;
22567
22568 if (TARGET_DEBUG_COST)
22569 dbg_cost_ctrl++;
22570
22571 /* If we have VSX, we can easily move between FPR or Altivec registers,
22572 otherwise we can only easily move within classes.
22573 Do this first so we give best-case answers for union classes
22574 containing both gprs and vsx regs. */
22575 HARD_REG_SET to_vsx, from_vsx;
22576 to_vsx = reg_class_contents[to] & reg_class_contents[VSX_REGS];
22577 from_vsx = reg_class_contents[from] & reg_class_contents[VSX_REGS];
22578 if (!hard_reg_set_empty_p (to_vsx)
22579 && !hard_reg_set_empty_p (from_vsx)
22580 && (TARGET_VSX
22581 || hard_reg_set_intersect_p (to_vsx, from_vsx)))
22582 {
22583 int reg = FIRST_FPR_REGNO;
22584 if (TARGET_VSX
22585 || (TEST_HARD_REG_BIT (to_vsx, FIRST_ALTIVEC_REGNO)
22586 && TEST_HARD_REG_BIT (from_vsx, FIRST_ALTIVEC_REGNO)))
22587 reg = FIRST_ALTIVEC_REGNO;
22588 ret = 2 * hard_regno_nregs (reg, mode);
22589 }
22590
22591 /* Moves from/to GENERAL_REGS. */
22592 else if ((rclass = from, reg_classes_intersect_p (to, GENERAL_REGS))
22593 || (rclass = to, reg_classes_intersect_p (from, GENERAL_REGS)))
22594 {
22595 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
22596 {
22597 if (TARGET_DIRECT_MOVE)
22598 {
22599 /* Keep the cost for direct moves above that for within
22600 a register class even if the actual processor cost is
22601 comparable. We do this because a direct move insn
22602 can't be a nop, whereas with ideal register
22603 allocation a move within the same class might turn
22604 out to be a nop. */
22605 if (rs6000_tune == PROCESSOR_POWER9
22606 || rs6000_tune == PROCESSOR_POWER10)
22607 ret = 3 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
22608 else
22609 ret = 4 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
22610 /* SFmode requires a conversion when moving between gprs
22611 and vsx. */
22612 if (mode == SFmode)
22613 ret += 2;
22614 }
22615 else
22616 ret = (rs6000_memory_move_cost (mode, rclass, false)
22617 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
22618 }
22619
22620 /* It's more expensive to move CR_REGS than CR0_REGS because of the
22621 shift. */
22622 else if (rclass == CR_REGS)
22623 ret = 4;
22624
22625 /* For those processors that have slow LR/CTR moves, make them more
22626 expensive than memory in order to bias spills to memory .*/
22627 else if ((rs6000_tune == PROCESSOR_POWER6
22628 || rs6000_tune == PROCESSOR_POWER7
22629 || rs6000_tune == PROCESSOR_POWER8
22630 || rs6000_tune == PROCESSOR_POWER9)
22631 && reg_class_subset_p (rclass, SPECIAL_REGS))
22632 ret = 6 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
22633
22634 else
22635 /* A move will cost one instruction per GPR moved. */
22636 ret = 2 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
22637 }
22638
22639 /* Everything else has to go through GENERAL_REGS. */
22640 else
22641 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
22642 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
22643
22644 if (TARGET_DEBUG_COST)
22645 {
22646 if (dbg_cost_ctrl == 1)
22647 fprintf (stderr,
22648 "rs6000_register_move_cost: ret=%d, mode=%s, from=%s, to=%s\n",
22649 ret, GET_MODE_NAME (mode), reg_class_names[from],
22650 reg_class_names[to]);
22651 dbg_cost_ctrl--;
22652 }
22653
22654 return ret;
22655 }
22656
22657 /* A C expressions returning the cost of moving data of MODE from a register to
22658 or from memory. */
22659
22660 static int
22661 rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
22662 bool in ATTRIBUTE_UNUSED)
22663 {
22664 int ret;
22665
22666 if (TARGET_DEBUG_COST)
22667 dbg_cost_ctrl++;
22668
22669 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
22670 ret = 4 * hard_regno_nregs (0, mode);
22671 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
22672 || reg_classes_intersect_p (rclass, VSX_REGS)))
22673 ret = 4 * hard_regno_nregs (32, mode);
22674 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
22675 ret = 4 * hard_regno_nregs (FIRST_ALTIVEC_REGNO, mode);
22676 else
22677 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
22678
22679 if (TARGET_DEBUG_COST)
22680 {
22681 if (dbg_cost_ctrl == 1)
22682 fprintf (stderr,
22683 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
22684 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
22685 dbg_cost_ctrl--;
22686 }
22687
22688 return ret;
22689 }
22690
22691 /* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
22692
22693 The register allocator chooses GEN_OR_VSX_REGS for the allocno
22694 class if GENERAL_REGS and VSX_REGS cost is lower than the memory
22695 cost. This happens a lot when TARGET_DIRECT_MOVE makes the register
22696 move cost between GENERAL_REGS and VSX_REGS low.
22697
22698 It might seem reasonable to use a union class. After all, if usage
22699 of vsr is low and gpr high, it might make sense to spill gpr to vsr
22700 rather than memory. However, in cases where register pressure of
22701 both is high, like the cactus_adm spec test, allowing
22702 GEN_OR_VSX_REGS as the allocno class results in bad decisions in
22703 the first scheduling pass. This is partly due to an allocno of
22704 GEN_OR_VSX_REGS wrongly contributing to the GENERAL_REGS pressure
22705 class, which gives too high a pressure for GENERAL_REGS and too low
22706 for VSX_REGS. So, force a choice of the subclass here.
22707
22708 The best class is also the union if GENERAL_REGS and VSX_REGS have
22709 the same cost. In that case we do use GEN_OR_VSX_REGS as the
22710 allocno class, since trying to narrow down the class by regno mode
22711 is prone to error. For example, SImode is allowed in VSX regs and
22712 in some cases (eg. gcc.target/powerpc/p9-xxbr-3.c do_bswap32_vect)
22713 it would be wrong to choose an allocno of GENERAL_REGS based on
22714 SImode. */
22715
22716 static reg_class_t
22717 rs6000_ira_change_pseudo_allocno_class (int regno ATTRIBUTE_UNUSED,
22718 reg_class_t allocno_class,
22719 reg_class_t best_class)
22720 {
22721 switch (allocno_class)
22722 {
22723 case GEN_OR_VSX_REGS:
22724 /* best_class must be a subset of allocno_class. */
22725 gcc_checking_assert (best_class == GEN_OR_VSX_REGS
22726 || best_class == GEN_OR_FLOAT_REGS
22727 || best_class == VSX_REGS
22728 || best_class == ALTIVEC_REGS
22729 || best_class == FLOAT_REGS
22730 || best_class == GENERAL_REGS
22731 || best_class == BASE_REGS);
22732 /* Use best_class but choose wider classes when copying from the
22733 wider class to best_class is cheap. This mimics IRA choice
22734 of allocno class. */
22735 if (best_class == BASE_REGS)
22736 return GENERAL_REGS;
22737 if (TARGET_VSX && best_class == FLOAT_REGS)
22738 return VSX_REGS;
22739 return best_class;
22740
22741 case VSX_REGS:
22742 if (best_class == ALTIVEC_REGS)
22743 return ALTIVEC_REGS;
22744
22745 default:
22746 break;
22747 }
22748
22749 return allocno_class;
22750 }
22751
22752 /* Load up a constant. If the mode is a vector mode, splat the value across
22753 all of the vector elements. */
22754
22755 static rtx
22756 rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
22757 {
22758 rtx reg;
22759
22760 if (mode == SFmode || mode == DFmode)
22761 {
22762 rtx d = const_double_from_real_value (dconst, mode);
22763 reg = force_reg (mode, d);
22764 }
22765 else if (mode == V4SFmode)
22766 {
22767 rtx d = const_double_from_real_value (dconst, SFmode);
22768 rtvec v = gen_rtvec (4, d, d, d, d);
22769 reg = gen_reg_rtx (mode);
22770 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
22771 }
22772 else if (mode == V2DFmode)
22773 {
22774 rtx d = const_double_from_real_value (dconst, DFmode);
22775 rtvec v = gen_rtvec (2, d, d);
22776 reg = gen_reg_rtx (mode);
22777 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
22778 }
22779 else
22780 gcc_unreachable ();
22781
22782 return reg;
22783 }
22784
22785 /* Generate an FMA instruction. */
22786
22787 static void
22788 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
22789 {
22790 machine_mode mode = GET_MODE (target);
22791 rtx dst;
22792
22793 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
22794 gcc_assert (dst != NULL);
22795
22796 if (dst != target)
22797 emit_move_insn (target, dst);
22798 }
22799
22800 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
22801
22802 static void
22803 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
22804 {
22805 machine_mode mode = GET_MODE (dst);
22806 rtx r;
22807
22808 /* This is a tad more complicated, since the fnma_optab is for
22809 a different expression: fma(-m1, m2, a), which is the same
22810 thing except in the case of signed zeros.
22811
22812 Fortunately we know that if FMA is supported that FNMSUB is
22813 also supported in the ISA. Just expand it directly. */
22814
22815 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
22816
22817 r = gen_rtx_NEG (mode, a);
22818 r = gen_rtx_FMA (mode, m1, m2, r);
22819 r = gen_rtx_NEG (mode, r);
22820 emit_insn (gen_rtx_SET (dst, r));
22821 }
22822
22823 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
22824 add a reg_note saying that this was a division. Support both scalar and
22825 vector divide. Assumes no trapping math and finite arguments. */
22826
22827 void
22828 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
22829 {
22830 machine_mode mode = GET_MODE (dst);
22831 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
22832 int i;
22833
22834 /* Low precision estimates guarantee 5 bits of accuracy. High
22835 precision estimates guarantee 14 bits of accuracy. SFmode
22836 requires 23 bits of accuracy. DFmode requires 52 bits of
22837 accuracy. Each pass at least doubles the accuracy, leading
22838 to the following. */
22839 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
22840 if (mode == DFmode || mode == V2DFmode)
22841 passes++;
22842
22843 enum insn_code code = optab_handler (smul_optab, mode);
22844 insn_gen_fn gen_mul = GEN_FCN (code);
22845
22846 gcc_assert (code != CODE_FOR_nothing);
22847
22848 one = rs6000_load_constant_and_splat (mode, dconst1);
22849
22850 /* x0 = 1./d estimate */
22851 x0 = gen_reg_rtx (mode);
22852 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
22853 UNSPEC_FRES)));
22854
22855 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
22856 if (passes > 1) {
22857
22858 /* e0 = 1. - d * x0 */
22859 e0 = gen_reg_rtx (mode);
22860 rs6000_emit_nmsub (e0, d, x0, one);
22861
22862 /* x1 = x0 + e0 * x0 */
22863 x1 = gen_reg_rtx (mode);
22864 rs6000_emit_madd (x1, e0, x0, x0);
22865
22866 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
22867 ++i, xprev = xnext, eprev = enext) {
22868
22869 /* enext = eprev * eprev */
22870 enext = gen_reg_rtx (mode);
22871 emit_insn (gen_mul (enext, eprev, eprev));
22872
22873 /* xnext = xprev + enext * xprev */
22874 xnext = gen_reg_rtx (mode);
22875 rs6000_emit_madd (xnext, enext, xprev, xprev);
22876 }
22877
22878 } else
22879 xprev = x0;
22880
22881 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
22882
22883 /* u = n * xprev */
22884 u = gen_reg_rtx (mode);
22885 emit_insn (gen_mul (u, n, xprev));
22886
22887 /* v = n - (d * u) */
22888 v = gen_reg_rtx (mode);
22889 rs6000_emit_nmsub (v, d, u, n);
22890
22891 /* dst = (v * xprev) + u */
22892 rs6000_emit_madd (dst, v, xprev, u);
22893
22894 if (note_p)
22895 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
22896 }
22897
22898 /* Goldschmidt's Algorithm for single/double-precision floating point
22899 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
22900
22901 void
22902 rs6000_emit_swsqrt (rtx dst, rtx src, bool recip)
22903 {
22904 machine_mode mode = GET_MODE (src);
22905 rtx e = gen_reg_rtx (mode);
22906 rtx g = gen_reg_rtx (mode);
22907 rtx h = gen_reg_rtx (mode);
22908
22909 /* Low precision estimates guarantee 5 bits of accuracy. High
22910 precision estimates guarantee 14 bits of accuracy. SFmode
22911 requires 23 bits of accuracy. DFmode requires 52 bits of
22912 accuracy. Each pass at least doubles the accuracy, leading
22913 to the following. */
22914 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
22915 if (mode == DFmode || mode == V2DFmode)
22916 passes++;
22917
22918 int i;
22919 rtx mhalf;
22920 enum insn_code code = optab_handler (smul_optab, mode);
22921 insn_gen_fn gen_mul = GEN_FCN (code);
22922
22923 gcc_assert (code != CODE_FOR_nothing);
22924
22925 mhalf = rs6000_load_constant_and_splat (mode, dconsthalf);
22926
22927 /* e = rsqrt estimate */
22928 emit_insn (gen_rtx_SET (e, gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
22929 UNSPEC_RSQRT)));
22930
22931 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
22932 if (!recip)
22933 {
22934 rtx zero = force_reg (mode, CONST0_RTX (mode));
22935
22936 if (mode == SFmode)
22937 {
22938 rtx target = emit_conditional_move (e, { GT, src, zero, mode },
22939 e, zero, mode, 0);
22940 if (target != e)
22941 emit_move_insn (e, target);
22942 }
22943 else
22944 {
22945 rtx cond = gen_rtx_GT (VOIDmode, e, zero);
22946 rs6000_emit_vector_cond_expr (e, e, zero, cond, src, zero);
22947 }
22948 }
22949
22950 /* g = sqrt estimate. */
22951 emit_insn (gen_mul (g, e, src));
22952 /* h = 1/(2*sqrt) estimate. */
22953 emit_insn (gen_mul (h, e, mhalf));
22954
22955 if (recip)
22956 {
22957 if (passes == 1)
22958 {
22959 rtx t = gen_reg_rtx (mode);
22960 rs6000_emit_nmsub (t, g, h, mhalf);
22961 /* Apply correction directly to 1/rsqrt estimate. */
22962 rs6000_emit_madd (dst, e, t, e);
22963 }
22964 else
22965 {
22966 for (i = 0; i < passes; i++)
22967 {
22968 rtx t1 = gen_reg_rtx (mode);
22969 rtx g1 = gen_reg_rtx (mode);
22970 rtx h1 = gen_reg_rtx (mode);
22971
22972 rs6000_emit_nmsub (t1, g, h, mhalf);
22973 rs6000_emit_madd (g1, g, t1, g);
22974 rs6000_emit_madd (h1, h, t1, h);
22975
22976 g = g1;
22977 h = h1;
22978 }
22979 /* Multiply by 2 for 1/rsqrt. */
22980 emit_insn (gen_add3_insn (dst, h, h));
22981 }
22982 }
22983 else
22984 {
22985 rtx t = gen_reg_rtx (mode);
22986 rs6000_emit_nmsub (t, g, h, mhalf);
22987 rs6000_emit_madd (dst, g, t, g);
22988 }
22989
22990 return;
22991 }
22992
22993 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
22994 (Power7) targets. DST is the target, and SRC is the argument operand. */
22995
22996 void
22997 rs6000_emit_popcount (rtx dst, rtx src)
22998 {
22999 machine_mode mode = GET_MODE (dst);
23000 rtx tmp1, tmp2;
23001
23002 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
23003 if (TARGET_POPCNTD)
23004 {
23005 if (mode == SImode)
23006 emit_insn (gen_popcntdsi2 (dst, src));
23007 else
23008 emit_insn (gen_popcntddi2 (dst, src));
23009 return;
23010 }
23011
23012 tmp1 = gen_reg_rtx (mode);
23013
23014 if (mode == SImode)
23015 {
23016 emit_insn (gen_popcntbsi2 (tmp1, src));
23017 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
23018 NULL_RTX, 0);
23019 tmp2 = force_reg (SImode, tmp2);
23020 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
23021 }
23022 else
23023 {
23024 emit_insn (gen_popcntbdi2 (tmp1, src));
23025 tmp2 = expand_mult (DImode, tmp1,
23026 GEN_INT ((HOST_WIDE_INT)
23027 0x01010101 << 32 | 0x01010101),
23028 NULL_RTX, 0);
23029 tmp2 = force_reg (DImode, tmp2);
23030 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
23031 }
23032 }
23033
23034
23035 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
23036 target, and SRC is the argument operand. */
23037
23038 void
23039 rs6000_emit_parity (rtx dst, rtx src)
23040 {
23041 machine_mode mode = GET_MODE (dst);
23042 rtx tmp;
23043
23044 tmp = gen_reg_rtx (mode);
23045
23046 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
23047 if (TARGET_CMPB)
23048 {
23049 if (mode == SImode)
23050 {
23051 emit_insn (gen_popcntbsi2 (tmp, src));
23052 emit_insn (gen_paritysi2_cmpb (dst, tmp));
23053 }
23054 else
23055 {
23056 emit_insn (gen_popcntbdi2 (tmp, src));
23057 emit_insn (gen_paritydi2_cmpb (dst, tmp));
23058 }
23059 return;
23060 }
23061
23062 if (mode == SImode)
23063 {
23064 /* Is mult+shift >= shift+xor+shift+xor? */
23065 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
23066 {
23067 rtx tmp1, tmp2, tmp3, tmp4;
23068
23069 tmp1 = gen_reg_rtx (SImode);
23070 emit_insn (gen_popcntbsi2 (tmp1, src));
23071
23072 tmp2 = gen_reg_rtx (SImode);
23073 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
23074 tmp3 = gen_reg_rtx (SImode);
23075 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
23076
23077 tmp4 = gen_reg_rtx (SImode);
23078 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
23079 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
23080 }
23081 else
23082 rs6000_emit_popcount (tmp, src);
23083 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
23084 }
23085 else
23086 {
23087 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
23088 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
23089 {
23090 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
23091
23092 tmp1 = gen_reg_rtx (DImode);
23093 emit_insn (gen_popcntbdi2 (tmp1, src));
23094
23095 tmp2 = gen_reg_rtx (DImode);
23096 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
23097 tmp3 = gen_reg_rtx (DImode);
23098 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
23099
23100 tmp4 = gen_reg_rtx (DImode);
23101 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
23102 tmp5 = gen_reg_rtx (DImode);
23103 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
23104
23105 tmp6 = gen_reg_rtx (DImode);
23106 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
23107 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
23108 }
23109 else
23110 rs6000_emit_popcount (tmp, src);
23111 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
23112 }
23113 }
23114
23115 /* Expand an Altivec constant permutation for little endian mode.
23116 OP0 and OP1 are the input vectors and TARGET is the output vector.
23117 SEL specifies the constant permutation vector.
23118
23119 There are two issues: First, the two input operands must be
23120 swapped so that together they form a double-wide array in LE
23121 order. Second, the vperm instruction has surprising behavior
23122 in LE mode: it interprets the elements of the source vectors
23123 in BE mode ("left to right") and interprets the elements of
23124 the destination vector in LE mode ("right to left"). To
23125 correct for this, we must subtract each element of the permute
23126 control vector from 31.
23127
23128 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
23129 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
23130 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
23131 serve as the permute control vector. Then, in BE mode,
23132
23133 vperm 9,10,11,12
23134
23135 places the desired result in vr9. However, in LE mode the
23136 vector contents will be
23137
23138 vr10 = 00000003 00000002 00000001 00000000
23139 vr11 = 00000007 00000006 00000005 00000004
23140
23141 The result of the vperm using the same permute control vector is
23142
23143 vr9 = 05000000 07000000 01000000 03000000
23144
23145 That is, the leftmost 4 bytes of vr10 are interpreted as the
23146 source for the rightmost 4 bytes of vr9, and so on.
23147
23148 If we change the permute control vector to
23149
23150 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
23151
23152 and issue
23153
23154 vperm 9,11,10,12
23155
23156 we get the desired
23157
23158 vr9 = 00000006 00000004 00000002 00000000. */
23159
23160 static void
23161 altivec_expand_vec_perm_const_le (rtx target, rtx op0, rtx op1,
23162 const vec_perm_indices &sel)
23163 {
23164 unsigned int i;
23165 rtx perm[16];
23166 rtx constv, unspec;
23167
23168 /* Unpack and adjust the constant selector. */
23169 for (i = 0; i < 16; ++i)
23170 {
23171 unsigned int elt = 31 - (sel[i] & 31);
23172 perm[i] = GEN_INT (elt);
23173 }
23174
23175 /* Expand to a permute, swapping the inputs and using the
23176 adjusted selector. */
23177 if (!REG_P (op0))
23178 op0 = force_reg (V16QImode, op0);
23179 if (!REG_P (op1))
23180 op1 = force_reg (V16QImode, op1);
23181
23182 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
23183 constv = force_reg (V16QImode, constv);
23184 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
23185 UNSPEC_VPERM);
23186 if (!REG_P (target))
23187 {
23188 rtx tmp = gen_reg_rtx (V16QImode);
23189 emit_move_insn (tmp, unspec);
23190 unspec = tmp;
23191 }
23192
23193 emit_move_insn (target, unspec);
23194 }
23195
23196 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
23197 permute control vector. But here it's not a constant, so we must
23198 generate a vector NAND or NOR to do the adjustment. */
23199
23200 void
23201 altivec_expand_vec_perm_le (rtx operands[4])
23202 {
23203 rtx notx, iorx, unspec;
23204 rtx target = operands[0];
23205 rtx op0 = operands[1];
23206 rtx op1 = operands[2];
23207 rtx sel = operands[3];
23208 rtx tmp = target;
23209 rtx norreg = gen_reg_rtx (V16QImode);
23210 machine_mode mode = GET_MODE (target);
23211
23212 /* Get everything in regs so the pattern matches. */
23213 if (!REG_P (op0))
23214 op0 = force_reg (mode, op0);
23215 if (!REG_P (op1))
23216 op1 = force_reg (mode, op1);
23217 if (!REG_P (sel))
23218 sel = force_reg (V16QImode, sel);
23219 if (!REG_P (target))
23220 tmp = gen_reg_rtx (mode);
23221
23222 if (TARGET_P9_VECTOR)
23223 {
23224 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, sel),
23225 UNSPEC_VPERMR);
23226 }
23227 else
23228 {
23229 /* Invert the selector with a VNAND if available, else a VNOR.
23230 The VNAND is preferred for future fusion opportunities. */
23231 notx = gen_rtx_NOT (V16QImode, sel);
23232 iorx = (TARGET_P8_VECTOR
23233 ? gen_rtx_IOR (V16QImode, notx, notx)
23234 : gen_rtx_AND (V16QImode, notx, notx));
23235 emit_insn (gen_rtx_SET (norreg, iorx));
23236
23237 /* Permute with operands reversed and adjusted selector. */
23238 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
23239 UNSPEC_VPERM);
23240 }
23241
23242 /* Copy into target, possibly by way of a register. */
23243 if (!REG_P (target))
23244 {
23245 emit_move_insn (tmp, unspec);
23246 unspec = tmp;
23247 }
23248
23249 emit_move_insn (target, unspec);
23250 }
23251
23252 /* Expand an Altivec constant permutation. Return true if we match
23253 an efficient implementation; false to fall back to VPERM.
23254
23255 OP0 and OP1 are the input vectors and TARGET is the output vector.
23256 SEL specifies the constant permutation vector. */
23257
23258 static bool
23259 altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1,
23260 const vec_perm_indices &sel)
23261 {
23262 struct altivec_perm_insn {
23263 HOST_WIDE_INT mask;
23264 enum insn_code impl;
23265 unsigned char perm[16];
23266 };
23267 static const struct altivec_perm_insn patterns[] = {
23268 {OPTION_MASK_ALTIVEC,
23269 CODE_FOR_altivec_vpkuhum_direct,
23270 {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}},
23271 {OPTION_MASK_ALTIVEC,
23272 CODE_FOR_altivec_vpkuwum_direct,
23273 {2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31}},
23274 {OPTION_MASK_ALTIVEC,
23275 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
23276 : CODE_FOR_altivec_vmrglb_direct,
23277 {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}},
23278 {OPTION_MASK_ALTIVEC,
23279 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
23280 : CODE_FOR_altivec_vmrglh_direct,
23281 {0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23}},
23282 {OPTION_MASK_ALTIVEC,
23283 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct_v4si
23284 : CODE_FOR_altivec_vmrglw_direct_v4si,
23285 {0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23}},
23286 {OPTION_MASK_ALTIVEC,
23287 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
23288 : CODE_FOR_altivec_vmrghb_direct,
23289 {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}},
23290 {OPTION_MASK_ALTIVEC,
23291 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
23292 : CODE_FOR_altivec_vmrghh_direct,
23293 {8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31}},
23294 {OPTION_MASK_ALTIVEC,
23295 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct_v4si
23296 : CODE_FOR_altivec_vmrghw_direct_v4si,
23297 {8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31}},
23298 {OPTION_MASK_P8_VECTOR,
23299 BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgew_v4sf_direct
23300 : CODE_FOR_p8_vmrgow_v4sf_direct,
23301 {0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27}},
23302 {OPTION_MASK_P8_VECTOR,
23303 BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgow_v4sf_direct
23304 : CODE_FOR_p8_vmrgew_v4sf_direct,
23305 {4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31}},
23306 {OPTION_MASK_VSX, CODE_FOR_vsx_xxpermdi_v16qi,
23307 {0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23}},
23308 {OPTION_MASK_VSX, CODE_FOR_vsx_xxpermdi_v16qi,
23309 {8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}},
23310 {OPTION_MASK_VSX, CODE_FOR_vsx_xxpermdi_v16qi,
23311 {0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31}},
23312 {OPTION_MASK_VSX, CODE_FOR_vsx_xxpermdi_v16qi,
23313 {8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31}}};
23314
23315 unsigned int i, j, elt, which;
23316 unsigned char perm[16];
23317 rtx x;
23318 bool one_vec;
23319
23320 /* Unpack the constant selector. */
23321 for (i = which = 0; i < 16; ++i)
23322 {
23323 elt = sel[i] & 31;
23324 which |= (elt < 16 ? 1 : 2);
23325 perm[i] = elt;
23326 }
23327
23328 /* Simplify the constant selector based on operands. */
23329 switch (which)
23330 {
23331 default:
23332 gcc_unreachable ();
23333
23334 case 3:
23335 one_vec = false;
23336 if (!rtx_equal_p (op0, op1))
23337 break;
23338 /* FALLTHRU */
23339
23340 case 2:
23341 for (i = 0; i < 16; ++i)
23342 perm[i] &= 15;
23343 op0 = op1;
23344 one_vec = true;
23345 break;
23346
23347 case 1:
23348 op1 = op0;
23349 one_vec = true;
23350 break;
23351 }
23352
23353 /* Look for splat patterns. */
23354 if (one_vec)
23355 {
23356 elt = perm[0];
23357
23358 for (i = 0; i < 16; ++i)
23359 if (perm[i] != elt)
23360 break;
23361 if (i == 16)
23362 {
23363 if (!BYTES_BIG_ENDIAN)
23364 elt = 15 - elt;
23365 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
23366 return true;
23367 }
23368
23369 if (elt % 2 == 0)
23370 {
23371 for (i = 0; i < 16; i += 2)
23372 if (perm[i] != elt || perm[i + 1] != elt + 1)
23373 break;
23374 if (i == 16)
23375 {
23376 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
23377 x = gen_reg_rtx (V8HImode);
23378 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
23379 GEN_INT (field)));
23380 emit_move_insn (target, gen_lowpart (V16QImode, x));
23381 return true;
23382 }
23383 }
23384
23385 if (elt % 4 == 0)
23386 {
23387 for (i = 0; i < 16; i += 4)
23388 if (perm[i] != elt
23389 || perm[i + 1] != elt + 1
23390 || perm[i + 2] != elt + 2
23391 || perm[i + 3] != elt + 3)
23392 break;
23393 if (i == 16)
23394 {
23395 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
23396 x = gen_reg_rtx (V4SImode);
23397 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
23398 GEN_INT (field)));
23399 emit_move_insn (target, gen_lowpart (V16QImode, x));
23400 return true;
23401 }
23402 }
23403 }
23404
23405 /* Look for merge and pack patterns. */
23406 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
23407 {
23408 bool swapped;
23409
23410 if ((patterns[j].mask & rs6000_isa_flags) == 0)
23411 continue;
23412
23413 elt = patterns[j].perm[0];
23414 if (perm[0] == elt)
23415 swapped = false;
23416 else if (perm[0] == elt + 16)
23417 swapped = true;
23418 else
23419 continue;
23420 for (i = 1; i < 16; ++i)
23421 {
23422 elt = patterns[j].perm[i];
23423 if (swapped)
23424 elt = (elt >= 16 ? elt - 16 : elt + 16);
23425 else if (one_vec && elt >= 16)
23426 elt -= 16;
23427 if (perm[i] != elt)
23428 break;
23429 }
23430 if (i == 16)
23431 {
23432 enum insn_code icode = patterns[j].impl;
23433 machine_mode omode = insn_data[icode].operand[0].mode;
23434 machine_mode imode = insn_data[icode].operand[1].mode;
23435
23436 rtx perm_idx = GEN_INT (0);
23437 if (icode == CODE_FOR_vsx_xxpermdi_v16qi)
23438 {
23439 int perm_val = 0;
23440 if (one_vec)
23441 {
23442 if (perm[0] == 8)
23443 perm_val |= 2;
23444 if (perm[8] == 8)
23445 perm_val |= 1;
23446 }
23447 else
23448 {
23449 if (perm[0] != 0)
23450 perm_val |= 2;
23451 if (perm[8] != 16)
23452 perm_val |= 1;
23453 }
23454 perm_idx = GEN_INT (perm_val);
23455 }
23456
23457 /* For little-endian, don't use vpkuwum and vpkuhum if the
23458 underlying vector type is not V4SI and V8HI, respectively.
23459 For example, using vpkuwum with a V8HI picks up the even
23460 halfwords (BE numbering) when the even halfwords (LE
23461 numbering) are what we need. */
23462 if (!BYTES_BIG_ENDIAN
23463 && icode == CODE_FOR_altivec_vpkuwum_direct
23464 && ((REG_P (op0)
23465 && GET_MODE (op0) != V4SImode)
23466 || (SUBREG_P (op0)
23467 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
23468 continue;
23469 if (!BYTES_BIG_ENDIAN
23470 && icode == CODE_FOR_altivec_vpkuhum_direct
23471 && ((REG_P (op0)
23472 && GET_MODE (op0) != V8HImode)
23473 || (SUBREG_P (op0)
23474 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
23475 continue;
23476
23477 /* For little-endian, the two input operands must be swapped
23478 (or swapped back) to ensure proper right-to-left numbering
23479 from 0 to 2N-1. */
23480 if (swapped ^ !BYTES_BIG_ENDIAN
23481 && icode != CODE_FOR_vsx_xxpermdi_v16qi)
23482 std::swap (op0, op1);
23483 if (imode != V16QImode)
23484 {
23485 op0 = gen_lowpart (imode, op0);
23486 op1 = gen_lowpart (imode, op1);
23487 }
23488 if (omode == V16QImode)
23489 x = target;
23490 else
23491 x = gen_reg_rtx (omode);
23492 if (icode == CODE_FOR_vsx_xxpermdi_v16qi)
23493 emit_insn (GEN_FCN (icode) (x, op0, op1, perm_idx));
23494 else
23495 emit_insn (GEN_FCN (icode) (x, op0, op1));
23496 if (omode != V16QImode)
23497 emit_move_insn (target, gen_lowpart (V16QImode, x));
23498 return true;
23499 }
23500 }
23501
23502 if (!BYTES_BIG_ENDIAN)
23503 {
23504 altivec_expand_vec_perm_const_le (target, op0, op1, sel);
23505 return true;
23506 }
23507
23508 return false;
23509 }
23510
23511 /* Expand a VSX Permute Doubleword constant permutation.
23512 Return true if we match an efficient implementation. */
23513
23514 static bool
23515 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
23516 unsigned char perm0, unsigned char perm1)
23517 {
23518 rtx x;
23519
23520 /* If both selectors come from the same operand, fold to single op. */
23521 if ((perm0 & 2) == (perm1 & 2))
23522 {
23523 if (perm0 & 2)
23524 op0 = op1;
23525 else
23526 op1 = op0;
23527 }
23528 /* If both operands are equal, fold to simpler permutation. */
23529 if (rtx_equal_p (op0, op1))
23530 {
23531 perm0 = perm0 & 1;
23532 perm1 = (perm1 & 1) + 2;
23533 }
23534 /* If the first selector comes from the second operand, swap. */
23535 else if (perm0 & 2)
23536 {
23537 if (perm1 & 2)
23538 return false;
23539 perm0 -= 2;
23540 perm1 += 2;
23541 std::swap (op0, op1);
23542 }
23543 /* If the second selector does not come from the second operand, fail. */
23544 else if ((perm1 & 2) == 0)
23545 return false;
23546
23547 /* Success! */
23548 if (target != NULL)
23549 {
23550 machine_mode vmode, dmode;
23551 rtvec v;
23552
23553 vmode = GET_MODE (target);
23554 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
23555 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4).require ();
23556 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
23557 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
23558 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
23559 emit_insn (gen_rtx_SET (target, x));
23560 }
23561 return true;
23562 }
23563
23564 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
23565
23566 static bool
23567 rs6000_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
23568 rtx target, rtx op0, rtx op1,
23569 const vec_perm_indices &sel)
23570 {
23571 if (vmode != op_mode)
23572 return false;
23573
23574 bool testing_p = !target;
23575
23576 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
23577 if (TARGET_ALTIVEC && testing_p)
23578 return true;
23579
23580 if (op0)
23581 {
23582 rtx nop0 = force_reg (vmode, op0);
23583 if (op0 == op1)
23584 op1 = nop0;
23585 op0 = nop0;
23586 }
23587 if (op1)
23588 op1 = force_reg (vmode, op1);
23589
23590 /* Check for ps_merge* or xxpermdi insns. */
23591 if ((vmode == V2DFmode || vmode == V2DImode) && VECTOR_MEM_VSX_P (vmode))
23592 {
23593 if (testing_p)
23594 {
23595 op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
23596 op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
23597 }
23598 if (rs6000_expand_vec_perm_const_1 (target, op0, op1, sel[0], sel[1]))
23599 return true;
23600 }
23601
23602 if (TARGET_ALTIVEC)
23603 {
23604 /* Force the target-independent code to lower to V16QImode. */
23605 if (vmode != V16QImode)
23606 return false;
23607 if (altivec_expand_vec_perm_const (target, op0, op1, sel))
23608 return true;
23609 }
23610
23611 return false;
23612 }
23613
23614 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave.
23615 OP0 and OP1 are the input vectors and TARGET is the output vector.
23616 PERM specifies the constant permutation vector. */
23617
23618 static void
23619 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
23620 machine_mode vmode, const vec_perm_builder &perm)
23621 {
23622 rtx x = expand_vec_perm_const (vmode, op0, op1, perm, BLKmode, target);
23623 if (x != target)
23624 emit_move_insn (target, x);
23625 }
23626
23627 /* Expand an extract even operation. */
23628
23629 void
23630 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
23631 {
23632 machine_mode vmode = GET_MODE (target);
23633 unsigned i, nelt = GET_MODE_NUNITS (vmode);
23634 vec_perm_builder perm (nelt, nelt, 1);
23635
23636 for (i = 0; i < nelt; i++)
23637 perm.quick_push (i * 2);
23638
23639 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
23640 }
23641
23642 /* Expand a vector interleave operation. */
23643
23644 void
23645 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
23646 {
23647 machine_mode vmode = GET_MODE (target);
23648 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
23649 vec_perm_builder perm (nelt, nelt, 1);
23650
23651 high = (highp ? 0 : nelt / 2);
23652 for (i = 0; i < nelt / 2; i++)
23653 {
23654 perm.quick_push (i + high);
23655 perm.quick_push (i + nelt + high);
23656 }
23657
23658 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
23659 }
23660
23661 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
23662 void
23663 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
23664 {
23665 HOST_WIDE_INT hwi_scale (scale);
23666 REAL_VALUE_TYPE r_pow;
23667 rtvec v = rtvec_alloc (2);
23668 rtx elt;
23669 rtx scale_vec = gen_reg_rtx (V2DFmode);
23670 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
23671 elt = const_double_from_real_value (r_pow, DFmode);
23672 RTVEC_ELT (v, 0) = elt;
23673 RTVEC_ELT (v, 1) = elt;
23674 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
23675 emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
23676 }
23677
23678 /* Return an RTX representing where to find the function value of a
23679 function returning MODE. */
23680 static rtx
23681 rs6000_complex_function_value (machine_mode mode)
23682 {
23683 unsigned int regno;
23684 rtx r1, r2;
23685 machine_mode inner = GET_MODE_INNER (mode);
23686 unsigned int inner_bytes = GET_MODE_UNIT_SIZE (mode);
23687
23688 if (TARGET_FLOAT128_TYPE
23689 && (mode == KCmode
23690 || (mode == TCmode && TARGET_IEEEQUAD)))
23691 regno = ALTIVEC_ARG_RETURN;
23692
23693 else if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
23694 regno = FP_ARG_RETURN;
23695
23696 else
23697 {
23698 regno = GP_ARG_RETURN;
23699
23700 /* 32-bit is OK since it'll go in r3/r4. */
23701 if (TARGET_32BIT && inner_bytes >= 4)
23702 return gen_rtx_REG (mode, regno);
23703 }
23704
23705 if (inner_bytes >= 8)
23706 return gen_rtx_REG (mode, regno);
23707
23708 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
23709 const0_rtx);
23710 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
23711 GEN_INT (inner_bytes));
23712 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
23713 }
23714
23715 /* Return an rtx describing a return value of MODE as a PARALLEL
23716 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
23717 stride REG_STRIDE. */
23718
23719 static rtx
23720 rs6000_parallel_return (machine_mode mode,
23721 int n_elts, machine_mode elt_mode,
23722 unsigned int regno, unsigned int reg_stride)
23723 {
23724 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
23725
23726 int i;
23727 for (i = 0; i < n_elts; i++)
23728 {
23729 rtx r = gen_rtx_REG (elt_mode, regno);
23730 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
23731 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
23732 regno += reg_stride;
23733 }
23734
23735 return par;
23736 }
23737
23738 /* Target hook for TARGET_FUNCTION_VALUE.
23739
23740 An integer value is in r3 and a floating-point value is in fp1,
23741 unless -msoft-float. */
23742
23743 static rtx
23744 rs6000_function_value (const_tree valtype,
23745 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
23746 bool outgoing ATTRIBUTE_UNUSED)
23747 {
23748 machine_mode mode;
23749 unsigned int regno;
23750 machine_mode elt_mode;
23751 int n_elts;
23752
23753 /* Special handling for structs in darwin64. */
23754 if (TARGET_MACHO
23755 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
23756 {
23757 CUMULATIVE_ARGS valcum;
23758 rtx valret;
23759
23760 valcum.words = 0;
23761 valcum.fregno = FP_ARG_MIN_REG;
23762 valcum.vregno = ALTIVEC_ARG_MIN_REG;
23763 /* Do a trial code generation as if this were going to be passed as
23764 an argument; if any part goes in memory, we return NULL. */
23765 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
23766 if (valret)
23767 return valret;
23768 /* Otherwise fall through to standard ABI rules. */
23769 }
23770
23771 mode = TYPE_MODE (valtype);
23772
23773 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
23774 if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts))
23775 {
23776 int first_reg, n_regs;
23777
23778 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode))
23779 {
23780 /* _Decimal128 must use even/odd register pairs. */
23781 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
23782 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
23783 }
23784 else
23785 {
23786 first_reg = ALTIVEC_ARG_RETURN;
23787 n_regs = 1;
23788 }
23789
23790 return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs);
23791 }
23792
23793 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
23794 if (TARGET_32BIT && TARGET_POWERPC64)
23795 switch (mode)
23796 {
23797 default:
23798 break;
23799 case E_DImode:
23800 case E_SCmode:
23801 case E_DCmode:
23802 case E_TCmode:
23803 int count = GET_MODE_SIZE (mode) / 4;
23804 return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1);
23805 }
23806
23807 if ((INTEGRAL_TYPE_P (valtype)
23808 && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64))
23809 || POINTER_TYPE_P (valtype))
23810 mode = TARGET_32BIT ? SImode : DImode;
23811
23812 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
23813 /* _Decimal128 must use an even/odd register pair. */
23814 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
23815 else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT
23816 && !FLOAT128_VECTOR_P (mode))
23817 regno = FP_ARG_RETURN;
23818 else if (TREE_CODE (valtype) == COMPLEX_TYPE
23819 && targetm.calls.split_complex_arg)
23820 return rs6000_complex_function_value (mode);
23821 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
23822 return register is used in both cases, and we won't see V2DImode/V2DFmode
23823 for pure altivec, combine the two cases. */
23824 else if ((VECTOR_TYPE_P (valtype) || VECTOR_ALIGNMENT_P (mode))
23825 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
23826 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
23827 regno = ALTIVEC_ARG_RETURN;
23828 else
23829 regno = GP_ARG_RETURN;
23830
23831 return gen_rtx_REG (mode, regno);
23832 }
23833
23834 /* Define how to find the value returned by a library function
23835 assuming the value has mode MODE. */
23836 rtx
23837 rs6000_libcall_value (machine_mode mode)
23838 {
23839 unsigned int regno;
23840
23841 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
23842 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
23843 return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1);
23844
23845 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
23846 /* _Decimal128 must use an even/odd register pair. */
23847 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
23848 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && TARGET_HARD_FLOAT)
23849 regno = FP_ARG_RETURN;
23850 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
23851 return register is used in both cases, and we won't see V2DImode/V2DFmode
23852 for pure altivec, combine the two cases. */
23853 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
23854 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
23855 regno = ALTIVEC_ARG_RETURN;
23856 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
23857 return rs6000_complex_function_value (mode);
23858 else
23859 regno = GP_ARG_RETURN;
23860
23861 return gen_rtx_REG (mode, regno);
23862 }
23863
23864 /* Compute register pressure classes. We implement the target hook to avoid
23865 IRA picking something like GEN_OR_FLOAT_REGS as a pressure class, which can
23866 lead to incorrect estimates of number of available registers and therefor
23867 increased register pressure/spill. */
23868 static int
23869 rs6000_compute_pressure_classes (enum reg_class *pressure_classes)
23870 {
23871 int n;
23872
23873 n = 0;
23874 pressure_classes[n++] = GENERAL_REGS;
23875 if (TARGET_ALTIVEC)
23876 pressure_classes[n++] = ALTIVEC_REGS;
23877 if (TARGET_VSX)
23878 pressure_classes[n++] = VSX_REGS;
23879 else
23880 {
23881 if (TARGET_HARD_FLOAT)
23882 pressure_classes[n++] = FLOAT_REGS;
23883 }
23884 pressure_classes[n++] = CR_REGS;
23885 pressure_classes[n++] = SPECIAL_REGS;
23886
23887 return n;
23888 }
23889
23890 /* Given FROM and TO register numbers, say whether this elimination is allowed.
23891 Frame pointer elimination is automatically handled.
23892
23893 For the RS/6000, if frame pointer elimination is being done, we would like
23894 to convert ap into fp, not sp.
23895
23896 We need r30 if -mminimal-toc was specified, and there are constant pool
23897 references. */
23898
23899 static bool
23900 rs6000_can_eliminate (const int from, const int to)
23901 {
23902 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
23903 ? ! frame_pointer_needed
23904 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
23905 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC_OR_PCREL
23906 || constant_pool_empty_p ()
23907 : true);
23908 }
23909
23910 /* Define the offset between two registers, FROM to be eliminated and its
23911 replacement TO, at the start of a routine. */
23912 HOST_WIDE_INT
23913 rs6000_initial_elimination_offset (int from, int to)
23914 {
23915 rs6000_stack_t *info = rs6000_stack_info ();
23916 HOST_WIDE_INT offset;
23917
23918 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
23919 offset = info->push_p ? 0 : -info->total_size;
23920 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
23921 {
23922 offset = info->push_p ? 0 : -info->total_size;
23923 if (FRAME_GROWS_DOWNWARD)
23924 offset += info->fixed_size + info->vars_size + info->parm_size;
23925 }
23926 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
23927 offset = FRAME_GROWS_DOWNWARD
23928 ? info->fixed_size + info->vars_size + info->parm_size
23929 : 0;
23930 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
23931 offset = info->total_size;
23932 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
23933 offset = info->push_p ? info->total_size : 0;
23934 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
23935 offset = 0;
23936 else
23937 gcc_unreachable ();
23938
23939 return offset;
23940 }
23941
23942 /* Fill in sizes of registers used by unwinder. */
23943
23944 static void
23945 rs6000_init_dwarf_reg_sizes_extra (tree address)
23946 {
23947 if (TARGET_MACHO && ! TARGET_ALTIVEC)
23948 {
23949 int i;
23950 machine_mode mode = TYPE_MODE (char_type_node);
23951 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
23952 rtx mem = gen_rtx_MEM (BLKmode, addr);
23953 rtx value = gen_int_mode (16, mode);
23954
23955 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
23956 The unwinder still needs to know the size of Altivec registers. */
23957
23958 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
23959 {
23960 int column = DWARF_REG_TO_UNWIND_COLUMN
23961 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
23962 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
23963
23964 emit_move_insn (adjust_address (mem, mode, offset), value);
23965 }
23966 }
23967 }
23968
23969 /* Map internal gcc register numbers to debug format register numbers.
23970 FORMAT specifies the type of debug register number to use:
23971 0 -- debug information, except for frame-related sections
23972 1 -- DWARF .debug_frame section
23973 2 -- DWARF .eh_frame section */
23974
23975 unsigned int
23976 rs6000_debugger_regno (unsigned int regno, unsigned int format)
23977 {
23978 /* On some platforms, we use the standard DWARF register
23979 numbering for .debug_info and .debug_frame. */
23980 if ((format == 0 && dwarf_debuginfo_p ()) || format == 1)
23981 {
23982 #ifdef RS6000_USE_DWARF_NUMBERING
23983 if (regno <= 31)
23984 return regno;
23985 if (FP_REGNO_P (regno))
23986 return regno - FIRST_FPR_REGNO + 32;
23987 if (ALTIVEC_REGNO_P (regno))
23988 return regno - FIRST_ALTIVEC_REGNO + 1124;
23989 if (regno == LR_REGNO)
23990 return 108;
23991 if (regno == CTR_REGNO)
23992 return 109;
23993 if (regno == CA_REGNO)
23994 return 101; /* XER */
23995 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
23996 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
23997 The actual code emitted saves the whole of CR, so we map CR2_REGNO
23998 to the DWARF reg for CR. */
23999 if (format == 1 && regno == CR2_REGNO)
24000 return 64;
24001 if (CR_REGNO_P (regno))
24002 return regno - CR0_REGNO + 86;
24003 if (regno == VRSAVE_REGNO)
24004 return 356;
24005 if (regno == VSCR_REGNO)
24006 return 67;
24007
24008 /* These do not make much sense. */
24009 if (regno == FRAME_POINTER_REGNUM)
24010 return 111;
24011 if (regno == ARG_POINTER_REGNUM)
24012 return 67;
24013 if (regno == 64)
24014 return 100;
24015
24016 gcc_unreachable ();
24017 #endif
24018 }
24019
24020 /* We use the GCC 7 (and before) internal number for non-DWARF debug
24021 information, and also for .eh_frame. */
24022 /* Translate the regnos to their numbers in GCC 7 (and before). */
24023 if (regno <= 31)
24024 return regno;
24025 if (FP_REGNO_P (regno))
24026 return regno - FIRST_FPR_REGNO + 32;
24027 if (ALTIVEC_REGNO_P (regno))
24028 return regno - FIRST_ALTIVEC_REGNO + 77;
24029 if (regno == LR_REGNO)
24030 return 65;
24031 if (regno == CTR_REGNO)
24032 return 66;
24033 if (regno == CA_REGNO)
24034 return 76; /* XER */
24035 if (CR_REGNO_P (regno))
24036 return regno - CR0_REGNO + 68;
24037 if (regno == VRSAVE_REGNO)
24038 return 109;
24039 if (regno == VSCR_REGNO)
24040 return 110;
24041
24042 if (regno == FRAME_POINTER_REGNUM)
24043 return 111;
24044 if (regno == ARG_POINTER_REGNUM)
24045 return 67;
24046 if (regno == 64)
24047 return 64;
24048
24049 gcc_unreachable ();
24050 }
24051
24052 /* target hook eh_return_filter_mode */
24053 static scalar_int_mode
24054 rs6000_eh_return_filter_mode (void)
24055 {
24056 return TARGET_32BIT ? SImode : word_mode;
24057 }
24058
24059 /* Target hook for translate_mode_attribute. */
24060 static machine_mode
24061 rs6000_translate_mode_attribute (machine_mode mode)
24062 {
24063 if ((FLOAT128_IEEE_P (mode)
24064 && ieee128_float_type_node == long_double_type_node)
24065 || (FLOAT128_IBM_P (mode)
24066 && ibm128_float_type_node == long_double_type_node))
24067 return COMPLEX_MODE_P (mode) ? E_TCmode : E_TFmode;
24068 return mode;
24069 }
24070
24071 /* Target hook for scalar_mode_supported_p. */
24072 static bool
24073 rs6000_scalar_mode_supported_p (scalar_mode mode)
24074 {
24075 /* -m32 does not support TImode. This is the default, from
24076 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
24077 same ABI as for -m32. But default_scalar_mode_supported_p allows
24078 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
24079 for -mpowerpc64. */
24080 if (TARGET_32BIT && mode == TImode)
24081 return false;
24082
24083 if (DECIMAL_FLOAT_MODE_P (mode))
24084 return default_decimal_float_supported_p ();
24085 else if (TARGET_FLOAT128_TYPE && (mode == KFmode || mode == IFmode))
24086 return true;
24087 else
24088 return default_scalar_mode_supported_p (mode);
24089 }
24090
24091 /* Target hook for libgcc_floating_mode_supported_p. */
24092
24093 static bool
24094 rs6000_libgcc_floating_mode_supported_p (scalar_float_mode mode)
24095 {
24096 switch (mode)
24097 {
24098 case E_SFmode:
24099 case E_DFmode:
24100 case E_TFmode:
24101 return true;
24102
24103 /* We only return true for KFmode if IEEE 128-bit types are supported, and
24104 if long double does not use the IEEE 128-bit format. If long double
24105 uses the IEEE 128-bit format, it will use TFmode and not KFmode.
24106 Because the code will not use KFmode in that case, there will be aborts
24107 because it can't find KFmode in the Floatn types. */
24108 case E_KFmode:
24109 return TARGET_FLOAT128_TYPE && !TARGET_IEEEQUAD;
24110
24111 default:
24112 return false;
24113 }
24114 }
24115
24116 /* Target hook for vector_mode_supported_p. */
24117 static bool
24118 rs6000_vector_mode_supported_p (machine_mode mode)
24119 {
24120 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
24121 128-bit, the compiler might try to widen IEEE 128-bit to IBM
24122 double-double. */
24123 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) && !FLOAT128_IEEE_P (mode))
24124 return true;
24125
24126 else
24127 return false;
24128 }
24129
24130 /* Target hook for floatn_mode. */
24131 static opt_scalar_float_mode
24132 rs6000_floatn_mode (int n, bool extended)
24133 {
24134 if (extended)
24135 {
24136 switch (n)
24137 {
24138 case 32:
24139 return DFmode;
24140
24141 case 64:
24142 if (TARGET_FLOAT128_TYPE)
24143 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
24144 else
24145 return opt_scalar_float_mode ();
24146
24147 case 128:
24148 return opt_scalar_float_mode ();
24149
24150 default:
24151 /* Those are the only valid _FloatNx types. */
24152 gcc_unreachable ();
24153 }
24154 }
24155 else
24156 {
24157 switch (n)
24158 {
24159 case 32:
24160 return SFmode;
24161
24162 case 64:
24163 return DFmode;
24164
24165 case 128:
24166 if (TARGET_FLOAT128_TYPE)
24167 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
24168 else
24169 return opt_scalar_float_mode ();
24170
24171 default:
24172 return opt_scalar_float_mode ();
24173 }
24174 }
24175
24176 }
24177
24178 /* Target hook for c_mode_for_suffix. */
24179 static machine_mode
24180 rs6000_c_mode_for_suffix (char suffix)
24181 {
24182 if (TARGET_FLOAT128_TYPE)
24183 {
24184 if (suffix == 'q' || suffix == 'Q')
24185 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
24186
24187 /* At the moment, we are not defining a suffix for IBM extended double.
24188 If/when the default for -mabi=ieeelongdouble is changed, and we want
24189 to support __ibm128 constants in legacy library code, we may need to
24190 re-evalaute this decision. Currently, c-lex.cc only supports 'w' and
24191 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
24192 __float80 constants. */
24193 }
24194
24195 return VOIDmode;
24196 }
24197
24198 /* Target hook for invalid_arg_for_unprototyped_fn. */
24199 static const char *
24200 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
24201 {
24202 return (!rs6000_darwin64_abi
24203 && typelist == 0
24204 && VECTOR_TYPE_P (TREE_TYPE (val))
24205 && (funcdecl == NULL_TREE
24206 || (TREE_CODE (funcdecl) == FUNCTION_DECL
24207 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
24208 ? N_("AltiVec argument passed to unprototyped function")
24209 : NULL;
24210 }
24211
24212 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
24213 setup by using __stack_chk_fail_local hidden function instead of
24214 calling __stack_chk_fail directly. Otherwise it is better to call
24215 __stack_chk_fail directly. */
24216
24217 static tree ATTRIBUTE_UNUSED
24218 rs6000_stack_protect_fail (void)
24219 {
24220 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
24221 ? default_hidden_stack_protect_fail ()
24222 : default_external_stack_protect_fail ();
24223 }
24224
24225 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
24226
24227 #if TARGET_ELF
24228 static unsigned HOST_WIDE_INT
24229 rs6000_asan_shadow_offset (void)
24230 {
24231 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
24232 }
24233 #endif
24234 \f
24235 /* Mask options that we want to support inside of attribute((target)) and
24236 #pragma GCC target operations. Note, we do not include things like
24237 64/32-bit, endianness, hard/soft floating point, etc. that would have
24238 different calling sequences. */
24239
24240 struct rs6000_opt_mask {
24241 const char *name; /* option name */
24242 HOST_WIDE_INT mask; /* mask to set */
24243 bool invert; /* invert sense of mask */
24244 bool valid_target; /* option is a target option */
24245 };
24246
24247 static struct rs6000_opt_mask const rs6000_opt_masks[] =
24248 {
24249 { "altivec", OPTION_MASK_ALTIVEC, false, true },
24250 { "block-ops-unaligned-vsx", OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX,
24251 false, true },
24252 { "block-ops-vector-pair", OPTION_MASK_BLOCK_OPS_VECTOR_PAIR,
24253 false, true },
24254 { "cmpb", OPTION_MASK_CMPB, false, true },
24255 { "crypto", OPTION_MASK_CRYPTO, false, true },
24256 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
24257 { "dlmzb", OPTION_MASK_DLMZB, false, true },
24258 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX,
24259 false, true },
24260 { "float128", OPTION_MASK_FLOAT128_KEYWORD, false, true },
24261 { "float128-hardware", OPTION_MASK_FLOAT128_HW, false, true },
24262 { "fprnd", OPTION_MASK_FPRND, false, true },
24263 { "power10", OPTION_MASK_POWER10, false, true },
24264 { "hard-dfp", OPTION_MASK_DFP, false, true },
24265 { "htm", OPTION_MASK_HTM, false, true },
24266 { "isel", OPTION_MASK_ISEL, false, true },
24267 { "mfcrf", OPTION_MASK_MFCRF, false, true },
24268 { "mfpgpr", 0, false, true },
24269 { "mma", OPTION_MASK_MMA, false, true },
24270 { "modulo", OPTION_MASK_MODULO, false, true },
24271 { "mulhw", OPTION_MASK_MULHW, false, true },
24272 { "multiple", OPTION_MASK_MULTIPLE, false, true },
24273 { "pcrel", OPTION_MASK_PCREL, false, true },
24274 { "pcrel-opt", OPTION_MASK_PCREL_OPT, false, true },
24275 { "popcntb", OPTION_MASK_POPCNTB, false, true },
24276 { "popcntd", OPTION_MASK_POPCNTD, false, true },
24277 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
24278 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
24279 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
24280 { "power9-minmax", OPTION_MASK_P9_MINMAX, false, true },
24281 { "power9-misc", OPTION_MASK_P9_MISC, false, true },
24282 { "power9-vector", OPTION_MASK_P9_VECTOR, false, true },
24283 { "power10-fusion", OPTION_MASK_P10_FUSION, false, true },
24284 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
24285 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
24286 { "prefixed", OPTION_MASK_PREFIXED, false, true },
24287 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
24288 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
24289 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
24290 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT, false, true },
24291 { "string", 0, false, true },
24292 { "update", OPTION_MASK_NO_UPDATE, true , true },
24293 { "vsx", OPTION_MASK_VSX, false, true },
24294 #ifdef OPTION_MASK_64BIT
24295 #if TARGET_AIX_OS
24296 { "aix64", OPTION_MASK_64BIT, false, false },
24297 { "aix32", OPTION_MASK_64BIT, true, false },
24298 #else
24299 { "64", OPTION_MASK_64BIT, false, false },
24300 { "32", OPTION_MASK_64BIT, true, false },
24301 #endif
24302 #endif
24303 #ifdef OPTION_MASK_EABI
24304 { "eabi", OPTION_MASK_EABI, false, false },
24305 #endif
24306 #ifdef OPTION_MASK_LITTLE_ENDIAN
24307 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
24308 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
24309 #endif
24310 #ifdef OPTION_MASK_RELOCATABLE
24311 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
24312 #endif
24313 #ifdef OPTION_MASK_STRICT_ALIGN
24314 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
24315 #endif
24316 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
24317 { "string", 0, false, false },
24318 };
24319
24320 /* Option variables that we want to support inside attribute((target)) and
24321 #pragma GCC target operations. */
24322
24323 struct rs6000_opt_var {
24324 const char *name; /* option name */
24325 size_t global_offset; /* offset of the option in global_options. */
24326 size_t target_offset; /* offset of the option in target options. */
24327 };
24328
24329 static struct rs6000_opt_var const rs6000_opt_vars[] =
24330 {
24331 { "friz",
24332 offsetof (struct gcc_options, x_TARGET_FRIZ),
24333 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
24334 { "avoid-indexed-addresses",
24335 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
24336 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
24337 { "longcall",
24338 offsetof (struct gcc_options, x_rs6000_default_long_calls),
24339 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
24340 { "optimize-swaps",
24341 offsetof (struct gcc_options, x_rs6000_optimize_swaps),
24342 offsetof (struct cl_target_option, x_rs6000_optimize_swaps), },
24343 { "allow-movmisalign",
24344 offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN),
24345 offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), },
24346 { "sched-groups",
24347 offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS),
24348 offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), },
24349 { "always-hint",
24350 offsetof (struct gcc_options, x_TARGET_ALWAYS_HINT),
24351 offsetof (struct cl_target_option, x_TARGET_ALWAYS_HINT), },
24352 { "align-branch-targets",
24353 offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS),
24354 offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), },
24355 { "sched-prolog",
24356 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
24357 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
24358 { "sched-epilog",
24359 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
24360 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
24361 { "speculate-indirect-jumps",
24362 offsetof (struct gcc_options, x_rs6000_speculate_indirect_jumps),
24363 offsetof (struct cl_target_option, x_rs6000_speculate_indirect_jumps), },
24364 };
24365
24366 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
24367 parsing. Return true if there were no errors. */
24368
24369 static bool
24370 rs6000_inner_target_options (tree args, bool attr_p)
24371 {
24372 bool ret = true;
24373
24374 if (args == NULL_TREE)
24375 ;
24376
24377 else if (TREE_CODE (args) == STRING_CST)
24378 {
24379 char *p = ASTRDUP (TREE_STRING_POINTER (args));
24380 char *q;
24381
24382 while ((q = strtok (p, ",")) != NULL)
24383 {
24384 bool error_p = false;
24385 bool not_valid_p = false;
24386 const char *cpu_opt = NULL;
24387
24388 p = NULL;
24389 if (startswith (q, "cpu="))
24390 {
24391 int cpu_index = rs6000_cpu_name_lookup (q+4);
24392 if (cpu_index >= 0)
24393 rs6000_cpu_index = cpu_index;
24394 else
24395 {
24396 error_p = true;
24397 cpu_opt = q+4;
24398 }
24399 }
24400 else if (startswith (q, "tune="))
24401 {
24402 int tune_index = rs6000_cpu_name_lookup (q+5);
24403 if (tune_index >= 0)
24404 rs6000_tune_index = tune_index;
24405 else
24406 {
24407 error_p = true;
24408 cpu_opt = q+5;
24409 }
24410 }
24411 else
24412 {
24413 size_t i;
24414 bool invert = false;
24415 char *r = q;
24416
24417 error_p = true;
24418 if (startswith (r, "no-"))
24419 {
24420 invert = true;
24421 r += 3;
24422 }
24423
24424 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
24425 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
24426 {
24427 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
24428
24429 if (!rs6000_opt_masks[i].valid_target)
24430 not_valid_p = true;
24431 else
24432 {
24433 error_p = false;
24434 rs6000_isa_flags_explicit |= mask;
24435
24436 /* VSX needs altivec, so -mvsx automagically sets
24437 altivec and disables -mavoid-indexed-addresses. */
24438 if (!invert)
24439 {
24440 if (mask == OPTION_MASK_VSX)
24441 {
24442 mask |= OPTION_MASK_ALTIVEC;
24443 TARGET_AVOID_XFORM = 0;
24444 }
24445 }
24446
24447 if (rs6000_opt_masks[i].invert)
24448 invert = !invert;
24449
24450 if (invert)
24451 rs6000_isa_flags &= ~mask;
24452 else
24453 rs6000_isa_flags |= mask;
24454 }
24455 break;
24456 }
24457
24458 if (error_p && !not_valid_p)
24459 {
24460 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
24461 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
24462 {
24463 size_t j = rs6000_opt_vars[i].global_offset;
24464 *((int *) ((char *)&global_options + j)) = !invert;
24465 error_p = false;
24466 not_valid_p = false;
24467 break;
24468 }
24469 }
24470 }
24471
24472 if (error_p)
24473 {
24474 const char *eprefix, *esuffix;
24475
24476 ret = false;
24477 if (attr_p)
24478 {
24479 eprefix = "__attribute__((__target__(";
24480 esuffix = ")))";
24481 }
24482 else
24483 {
24484 eprefix = "#pragma GCC target ";
24485 esuffix = "";
24486 }
24487
24488 if (cpu_opt)
24489 error ("invalid cpu %qs for %s%qs%s", cpu_opt, eprefix,
24490 q, esuffix);
24491 else if (not_valid_p)
24492 error ("%s%qs%s is not allowed", eprefix, q, esuffix);
24493 else
24494 error ("%s%qs%s is invalid", eprefix, q, esuffix);
24495 }
24496 }
24497 }
24498
24499 else if (TREE_CODE (args) == TREE_LIST)
24500 {
24501 do
24502 {
24503 tree value = TREE_VALUE (args);
24504 if (value)
24505 {
24506 bool ret2 = rs6000_inner_target_options (value, attr_p);
24507 if (!ret2)
24508 ret = false;
24509 }
24510 args = TREE_CHAIN (args);
24511 }
24512 while (args != NULL_TREE);
24513 }
24514
24515 else
24516 {
24517 error ("attribute %<target%> argument not a string");
24518 return false;
24519 }
24520
24521 return ret;
24522 }
24523
24524 /* Print out the target options as a list for -mdebug=target. */
24525
24526 static void
24527 rs6000_debug_target_options (tree args, const char *prefix)
24528 {
24529 if (args == NULL_TREE)
24530 fprintf (stderr, "%s<NULL>", prefix);
24531
24532 else if (TREE_CODE (args) == STRING_CST)
24533 {
24534 char *p = ASTRDUP (TREE_STRING_POINTER (args));
24535 char *q;
24536
24537 while ((q = strtok (p, ",")) != NULL)
24538 {
24539 p = NULL;
24540 fprintf (stderr, "%s\"%s\"", prefix, q);
24541 prefix = ", ";
24542 }
24543 }
24544
24545 else if (TREE_CODE (args) == TREE_LIST)
24546 {
24547 do
24548 {
24549 tree value = TREE_VALUE (args);
24550 if (value)
24551 {
24552 rs6000_debug_target_options (value, prefix);
24553 prefix = ", ";
24554 }
24555 args = TREE_CHAIN (args);
24556 }
24557 while (args != NULL_TREE);
24558 }
24559
24560 else
24561 gcc_unreachable ();
24562
24563 return;
24564 }
24565
24566 \f
24567 /* Hook to validate attribute((target("..."))). */
24568
24569 static bool
24570 rs6000_valid_attribute_p (tree fndecl,
24571 tree ARG_UNUSED (name),
24572 tree args,
24573 int flags)
24574 {
24575 struct cl_target_option cur_target;
24576 bool ret;
24577 tree old_optimize;
24578 tree new_target, new_optimize;
24579 tree func_optimize;
24580
24581 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
24582
24583 if (TARGET_DEBUG_TARGET)
24584 {
24585 tree tname = DECL_NAME (fndecl);
24586 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
24587 if (tname)
24588 fprintf (stderr, "function: %.*s\n",
24589 (int) IDENTIFIER_LENGTH (tname),
24590 IDENTIFIER_POINTER (tname));
24591 else
24592 fprintf (stderr, "function: unknown\n");
24593
24594 fprintf (stderr, "args:");
24595 rs6000_debug_target_options (args, " ");
24596 fprintf (stderr, "\n");
24597
24598 if (flags)
24599 fprintf (stderr, "flags: 0x%x\n", flags);
24600
24601 fprintf (stderr, "--------------------\n");
24602 }
24603
24604 /* attribute((target("default"))) does nothing, beyond
24605 affecting multi-versioning. */
24606 if (TREE_VALUE (args)
24607 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
24608 && TREE_CHAIN (args) == NULL_TREE
24609 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
24610 return true;
24611
24612 old_optimize = build_optimization_node (&global_options,
24613 &global_options_set);
24614 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
24615
24616 /* If the function changed the optimization levels as well as setting target
24617 options, start with the optimizations specified. */
24618 if (func_optimize && func_optimize != old_optimize)
24619 cl_optimization_restore (&global_options, &global_options_set,
24620 TREE_OPTIMIZATION (func_optimize));
24621
24622 /* The target attributes may also change some optimization flags, so update
24623 the optimization options if necessary. */
24624 cl_target_option_save (&cur_target, &global_options, &global_options_set);
24625 rs6000_cpu_index = rs6000_tune_index = -1;
24626 ret = rs6000_inner_target_options (args, true);
24627
24628 /* Set up any additional state. */
24629 if (ret)
24630 {
24631 ret = rs6000_option_override_internal (false);
24632 new_target = build_target_option_node (&global_options,
24633 &global_options_set);
24634 }
24635 else
24636 new_target = NULL;
24637
24638 new_optimize = build_optimization_node (&global_options,
24639 &global_options_set);
24640
24641 if (!new_target)
24642 ret = false;
24643
24644 else if (fndecl)
24645 {
24646 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
24647
24648 if (old_optimize != new_optimize)
24649 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
24650 }
24651
24652 cl_target_option_restore (&global_options, &global_options_set, &cur_target);
24653
24654 if (old_optimize != new_optimize)
24655 cl_optimization_restore (&global_options, &global_options_set,
24656 TREE_OPTIMIZATION (old_optimize));
24657
24658 return ret;
24659 }
24660
24661 \f
24662 /* Hook to validate the current #pragma GCC target and set the state, and
24663 update the macros based on what was changed. If ARGS is NULL, then
24664 POP_TARGET is used to reset the options. */
24665
24666 bool
24667 rs6000_pragma_target_parse (tree args, tree pop_target)
24668 {
24669 tree prev_tree = build_target_option_node (&global_options,
24670 &global_options_set);
24671 tree cur_tree;
24672 struct cl_target_option *prev_opt, *cur_opt;
24673 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
24674
24675 if (TARGET_DEBUG_TARGET)
24676 {
24677 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
24678 fprintf (stderr, "args:");
24679 rs6000_debug_target_options (args, " ");
24680 fprintf (stderr, "\n");
24681
24682 if (pop_target)
24683 {
24684 fprintf (stderr, "pop_target:\n");
24685 debug_tree (pop_target);
24686 }
24687 else
24688 fprintf (stderr, "pop_target: <NULL>\n");
24689
24690 fprintf (stderr, "--------------------\n");
24691 }
24692
24693 if (! args)
24694 {
24695 cur_tree = ((pop_target)
24696 ? pop_target
24697 : target_option_default_node);
24698 cl_target_option_restore (&global_options, &global_options_set,
24699 TREE_TARGET_OPTION (cur_tree));
24700 }
24701 else
24702 {
24703 rs6000_cpu_index = rs6000_tune_index = -1;
24704 if (!rs6000_inner_target_options (args, false)
24705 || !rs6000_option_override_internal (false)
24706 || (cur_tree = build_target_option_node (&global_options,
24707 &global_options_set))
24708 == NULL_TREE)
24709 {
24710 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
24711 fprintf (stderr, "invalid pragma\n");
24712
24713 return false;
24714 }
24715 }
24716
24717 target_option_current_node = cur_tree;
24718 rs6000_activate_target_options (target_option_current_node);
24719
24720 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
24721 change the macros that are defined. */
24722 if (rs6000_target_modify_macros_ptr)
24723 {
24724 prev_opt = TREE_TARGET_OPTION (prev_tree);
24725 prev_flags = prev_opt->x_rs6000_isa_flags;
24726
24727 cur_opt = TREE_TARGET_OPTION (cur_tree);
24728 cur_flags = cur_opt->x_rs6000_isa_flags;
24729
24730 diff_flags = (prev_flags ^ cur_flags);
24731
24732 if (diff_flags != 0)
24733 {
24734 /* Delete old macros. */
24735 rs6000_target_modify_macros_ptr (false,
24736 prev_flags & diff_flags);
24737
24738 /* Define new macros. */
24739 rs6000_target_modify_macros_ptr (true,
24740 cur_flags & diff_flags);
24741 }
24742 }
24743
24744 return true;
24745 }
24746
24747 \f
24748 /* Remember the last target of rs6000_set_current_function. */
24749 static GTY(()) tree rs6000_previous_fndecl;
24750
24751 /* Restore target's globals from NEW_TREE and invalidate the
24752 rs6000_previous_fndecl cache. */
24753
24754 void
24755 rs6000_activate_target_options (tree new_tree)
24756 {
24757 cl_target_option_restore (&global_options, &global_options_set,
24758 TREE_TARGET_OPTION (new_tree));
24759 if (TREE_TARGET_GLOBALS (new_tree))
24760 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
24761 else if (new_tree == target_option_default_node)
24762 restore_target_globals (&default_target_globals);
24763 else
24764 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
24765 rs6000_previous_fndecl = NULL_TREE;
24766 }
24767
24768 /* Establish appropriate back-end context for processing the function
24769 FNDECL. The argument might be NULL to indicate processing at top
24770 level, outside of any function scope. */
24771 static void
24772 rs6000_set_current_function (tree fndecl)
24773 {
24774 if (TARGET_DEBUG_TARGET)
24775 {
24776 fprintf (stderr, "\n==================== rs6000_set_current_function");
24777
24778 if (fndecl)
24779 fprintf (stderr, ", fndecl %s (%p)",
24780 (DECL_NAME (fndecl)
24781 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
24782 : "<unknown>"), (void *)fndecl);
24783
24784 if (rs6000_previous_fndecl)
24785 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
24786
24787 fprintf (stderr, "\n");
24788 }
24789
24790 /* Only change the context if the function changes. This hook is called
24791 several times in the course of compiling a function, and we don't want to
24792 slow things down too much or call target_reinit when it isn't safe. */
24793 if (fndecl == rs6000_previous_fndecl)
24794 return;
24795
24796 tree old_tree;
24797 if (rs6000_previous_fndecl == NULL_TREE)
24798 old_tree = target_option_current_node;
24799 else if (DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl))
24800 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl);
24801 else
24802 old_tree = target_option_default_node;
24803
24804 tree new_tree;
24805 if (fndecl == NULL_TREE)
24806 {
24807 if (old_tree != target_option_current_node)
24808 new_tree = target_option_current_node;
24809 else
24810 new_tree = NULL_TREE;
24811 }
24812 else
24813 {
24814 new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
24815 if (new_tree == NULL_TREE)
24816 new_tree = target_option_default_node;
24817 }
24818
24819 if (TARGET_DEBUG_TARGET)
24820 {
24821 if (new_tree)
24822 {
24823 fprintf (stderr, "\nnew fndecl target specific options:\n");
24824 debug_tree (new_tree);
24825 }
24826
24827 if (old_tree)
24828 {
24829 fprintf (stderr, "\nold fndecl target specific options:\n");
24830 debug_tree (old_tree);
24831 }
24832
24833 if (old_tree != NULL_TREE || new_tree != NULL_TREE)
24834 fprintf (stderr, "--------------------\n");
24835 }
24836
24837 if (new_tree && old_tree != new_tree)
24838 rs6000_activate_target_options (new_tree);
24839
24840 if (fndecl)
24841 rs6000_previous_fndecl = fndecl;
24842 }
24843
24844 \f
24845 /* Save the current options */
24846
24847 static void
24848 rs6000_function_specific_save (struct cl_target_option *ptr,
24849 struct gcc_options *opts,
24850 struct gcc_options */* opts_set */)
24851 {
24852 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
24853 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
24854 }
24855
24856 /* Restore the current options */
24857
24858 static void
24859 rs6000_function_specific_restore (struct gcc_options *opts,
24860 struct gcc_options */* opts_set */,
24861 struct cl_target_option *ptr)
24862
24863 {
24864 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
24865 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
24866 (void) rs6000_option_override_internal (false);
24867 }
24868
24869 /* Print the current options */
24870
24871 static void
24872 rs6000_function_specific_print (FILE *file, int indent,
24873 struct cl_target_option *ptr)
24874 {
24875 rs6000_print_isa_options (file, indent, "Isa options set",
24876 ptr->x_rs6000_isa_flags);
24877
24878 rs6000_print_isa_options (file, indent, "Isa options explicit",
24879 ptr->x_rs6000_isa_flags_explicit);
24880 }
24881
24882 /* Helper function to print the current isa or misc options on a line. */
24883
24884 static void
24885 rs6000_print_options_internal (FILE *file,
24886 int indent,
24887 const char *string,
24888 HOST_WIDE_INT flags,
24889 const char *prefix,
24890 const struct rs6000_opt_mask *opts,
24891 size_t num_elements)
24892 {
24893 size_t i;
24894 size_t start_column = 0;
24895 size_t cur_column;
24896 size_t max_column = 120;
24897 size_t prefix_len = strlen (prefix);
24898 size_t comma_len = 0;
24899 const char *comma = "";
24900
24901 if (indent)
24902 start_column += fprintf (file, "%*s", indent, "");
24903
24904 if (!flags)
24905 {
24906 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
24907 return;
24908 }
24909
24910 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
24911
24912 /* Print the various mask options. */
24913 cur_column = start_column;
24914 for (i = 0; i < num_elements; i++)
24915 {
24916 bool invert = opts[i].invert;
24917 const char *name = opts[i].name;
24918 const char *no_str = "";
24919 HOST_WIDE_INT mask = opts[i].mask;
24920 size_t len = comma_len + prefix_len + strlen (name);
24921
24922 if (!invert)
24923 {
24924 if ((flags & mask) == 0)
24925 {
24926 no_str = "no-";
24927 len += strlen ("no-");
24928 }
24929
24930 flags &= ~mask;
24931 }
24932
24933 else
24934 {
24935 if ((flags & mask) != 0)
24936 {
24937 no_str = "no-";
24938 len += strlen ("no-");
24939 }
24940
24941 flags |= mask;
24942 }
24943
24944 cur_column += len;
24945 if (cur_column > max_column)
24946 {
24947 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
24948 cur_column = start_column + len;
24949 comma = "";
24950 }
24951
24952 fprintf (file, "%s%s%s%s", comma, prefix, no_str, name);
24953 comma = ", ";
24954 comma_len = strlen (", ");
24955 }
24956
24957 fputs ("\n", file);
24958 }
24959
24960 /* Helper function to print the current isa options on a line. */
24961
24962 static void
24963 rs6000_print_isa_options (FILE *file, int indent, const char *string,
24964 HOST_WIDE_INT flags)
24965 {
24966 rs6000_print_options_internal (file, indent, string, flags, "-m",
24967 &rs6000_opt_masks[0],
24968 ARRAY_SIZE (rs6000_opt_masks));
24969 }
24970
24971 /* If the user used -mno-vsx, we need turn off all of the implicit ISA 2.06,
24972 2.07, and 3.0 options that relate to the vector unit (-mdirect-move,
24973 -mupper-regs-df, etc.).
24974
24975 If the user used -mno-power8-vector, we need to turn off all of the implicit
24976 ISA 2.07 and 3.0 options that relate to the vector unit.
24977
24978 If the user used -mno-power9-vector, we need to turn off all of the implicit
24979 ISA 3.0 options that relate to the vector unit.
24980
24981 This function does not handle explicit options such as the user specifying
24982 -mdirect-move. These are handled in rs6000_option_override_internal, and
24983 the appropriate error is given if needed.
24984
24985 We return a mask of all of the implicit options that should not be enabled
24986 by default. */
24987
24988 static HOST_WIDE_INT
24989 rs6000_disable_incompatible_switches (void)
24990 {
24991 HOST_WIDE_INT ignore_masks = rs6000_isa_flags_explicit;
24992 size_t i, j;
24993
24994 static const struct {
24995 const HOST_WIDE_INT no_flag; /* flag explicitly turned off. */
24996 const HOST_WIDE_INT dep_flags; /* flags that depend on this option. */
24997 const char *const name; /* name of the switch. */
24998 } flags[] = {
24999 { OPTION_MASK_P9_VECTOR, OTHER_P9_VECTOR_MASKS, "power9-vector" },
25000 { OPTION_MASK_P8_VECTOR, OTHER_P8_VECTOR_MASKS, "power8-vector" },
25001 { OPTION_MASK_VSX, OTHER_VSX_VECTOR_MASKS, "vsx" },
25002 { OPTION_MASK_ALTIVEC, OTHER_ALTIVEC_MASKS, "altivec" },
25003 };
25004
25005 for (i = 0; i < ARRAY_SIZE (flags); i++)
25006 {
25007 HOST_WIDE_INT no_flag = flags[i].no_flag;
25008
25009 if ((rs6000_isa_flags & no_flag) == 0
25010 && (rs6000_isa_flags_explicit & no_flag) != 0)
25011 {
25012 HOST_WIDE_INT dep_flags = flags[i].dep_flags;
25013 HOST_WIDE_INT set_flags = (rs6000_isa_flags_explicit
25014 & rs6000_isa_flags
25015 & dep_flags);
25016
25017 if (set_flags)
25018 {
25019 for (j = 0; j < ARRAY_SIZE (rs6000_opt_masks); j++)
25020 if ((set_flags & rs6000_opt_masks[j].mask) != 0)
25021 {
25022 set_flags &= ~rs6000_opt_masks[j].mask;
25023 error ("%<-mno-%s%> turns off %<-m%s%>",
25024 flags[i].name,
25025 rs6000_opt_masks[j].name);
25026 }
25027
25028 gcc_assert (!set_flags);
25029 }
25030
25031 rs6000_isa_flags &= ~dep_flags;
25032 ignore_masks |= no_flag | dep_flags;
25033 }
25034 }
25035
25036 return ignore_masks;
25037 }
25038
25039 \f
25040 /* Helper function for printing the function name when debugging. */
25041
25042 static const char *
25043 get_decl_name (tree fn)
25044 {
25045 tree name;
25046
25047 if (!fn)
25048 return "<null>";
25049
25050 name = DECL_NAME (fn);
25051 if (!name)
25052 return "<no-name>";
25053
25054 return IDENTIFIER_POINTER (name);
25055 }
25056
25057 /* Return the clone id of the target we are compiling code for in a target
25058 clone. The clone id is ordered from 0 (default) to CLONE_MAX-1 and gives
25059 the priority list for the target clones (ordered from lowest to
25060 highest). */
25061
25062 static int
25063 rs6000_clone_priority (tree fndecl)
25064 {
25065 tree fn_opts = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
25066 HOST_WIDE_INT isa_masks;
25067 int ret = CLONE_DEFAULT;
25068 tree attrs = lookup_attribute ("target", DECL_ATTRIBUTES (fndecl));
25069 const char *attrs_str = NULL;
25070
25071 attrs = TREE_VALUE (TREE_VALUE (attrs));
25072 attrs_str = TREE_STRING_POINTER (attrs);
25073
25074 /* Return priority zero for default function. Return the ISA needed for the
25075 function if it is not the default. */
25076 if (strcmp (attrs_str, "default") != 0)
25077 {
25078 if (fn_opts == NULL_TREE)
25079 fn_opts = target_option_default_node;
25080
25081 if (!fn_opts || !TREE_TARGET_OPTION (fn_opts))
25082 isa_masks = rs6000_isa_flags;
25083 else
25084 isa_masks = TREE_TARGET_OPTION (fn_opts)->x_rs6000_isa_flags;
25085
25086 for (ret = CLONE_MAX - 1; ret != 0; ret--)
25087 if ((rs6000_clone_map[ret].isa_mask & isa_masks) != 0)
25088 break;
25089 }
25090
25091 if (TARGET_DEBUG_TARGET)
25092 fprintf (stderr, "rs6000_get_function_version_priority (%s) => %d\n",
25093 get_decl_name (fndecl), ret);
25094
25095 return ret;
25096 }
25097
25098 /* This compares the priority of target features in function DECL1 and DECL2.
25099 It returns positive value if DECL1 is higher priority, negative value if
25100 DECL2 is higher priority and 0 if they are the same. Note, priorities are
25101 ordered from lowest (CLONE_DEFAULT) to highest (currently CLONE_ISA_3_0). */
25102
25103 static int
25104 rs6000_compare_version_priority (tree decl1, tree decl2)
25105 {
25106 int priority1 = rs6000_clone_priority (decl1);
25107 int priority2 = rs6000_clone_priority (decl2);
25108 int ret = priority1 - priority2;
25109
25110 if (TARGET_DEBUG_TARGET)
25111 fprintf (stderr, "rs6000_compare_version_priority (%s, %s) => %d\n",
25112 get_decl_name (decl1), get_decl_name (decl2), ret);
25113
25114 return ret;
25115 }
25116
25117 /* Make a dispatcher declaration for the multi-versioned function DECL.
25118 Calls to DECL function will be replaced with calls to the dispatcher
25119 by the front-end. Returns the decl of the dispatcher function. */
25120
25121 static tree
25122 rs6000_get_function_versions_dispatcher (void *decl)
25123 {
25124 tree fn = (tree) decl;
25125 struct cgraph_node *node = NULL;
25126 struct cgraph_node *default_node = NULL;
25127 struct cgraph_function_version_info *node_v = NULL;
25128 struct cgraph_function_version_info *first_v = NULL;
25129
25130 tree dispatch_decl = NULL;
25131
25132 struct cgraph_function_version_info *default_version_info = NULL;
25133 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
25134
25135 if (TARGET_DEBUG_TARGET)
25136 fprintf (stderr, "rs6000_get_function_versions_dispatcher (%s)\n",
25137 get_decl_name (fn));
25138
25139 node = cgraph_node::get (fn);
25140 gcc_assert (node != NULL);
25141
25142 node_v = node->function_version ();
25143 gcc_assert (node_v != NULL);
25144
25145 if (node_v->dispatcher_resolver != NULL)
25146 return node_v->dispatcher_resolver;
25147
25148 /* Find the default version and make it the first node. */
25149 first_v = node_v;
25150 /* Go to the beginning of the chain. */
25151 while (first_v->prev != NULL)
25152 first_v = first_v->prev;
25153
25154 default_version_info = first_v;
25155 while (default_version_info != NULL)
25156 {
25157 const tree decl2 = default_version_info->this_node->decl;
25158 if (is_function_default_version (decl2))
25159 break;
25160 default_version_info = default_version_info->next;
25161 }
25162
25163 /* If there is no default node, just return NULL. */
25164 if (default_version_info == NULL)
25165 return NULL;
25166
25167 /* Make default info the first node. */
25168 if (first_v != default_version_info)
25169 {
25170 default_version_info->prev->next = default_version_info->next;
25171 if (default_version_info->next)
25172 default_version_info->next->prev = default_version_info->prev;
25173 first_v->prev = default_version_info;
25174 default_version_info->next = first_v;
25175 default_version_info->prev = NULL;
25176 }
25177
25178 default_node = default_version_info->this_node;
25179
25180 #ifndef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
25181 error_at (DECL_SOURCE_LOCATION (default_node->decl),
25182 "%<target_clones%> attribute needs GLIBC (2.23 and newer) that "
25183 "exports hardware capability bits");
25184 #else
25185
25186 if (targetm.has_ifunc_p ())
25187 {
25188 struct cgraph_function_version_info *it_v = NULL;
25189 struct cgraph_node *dispatcher_node = NULL;
25190 struct cgraph_function_version_info *dispatcher_version_info = NULL;
25191
25192 /* Right now, the dispatching is done via ifunc. */
25193 dispatch_decl = make_dispatcher_decl (default_node->decl);
25194 TREE_NOTHROW (dispatch_decl) = TREE_NOTHROW (fn);
25195
25196 dispatcher_node = cgraph_node::get_create (dispatch_decl);
25197 gcc_assert (dispatcher_node != NULL);
25198 dispatcher_node->dispatcher_function = 1;
25199 dispatcher_version_info
25200 = dispatcher_node->insert_new_function_version ();
25201 dispatcher_version_info->next = default_version_info;
25202 dispatcher_node->definition = 1;
25203
25204 /* Set the dispatcher for all the versions. */
25205 it_v = default_version_info;
25206 while (it_v != NULL)
25207 {
25208 it_v->dispatcher_resolver = dispatch_decl;
25209 it_v = it_v->next;
25210 }
25211 }
25212 else
25213 {
25214 error_at (DECL_SOURCE_LOCATION (default_node->decl),
25215 "multiversioning needs %<ifunc%> which is not supported "
25216 "on this target");
25217 }
25218 #endif
25219
25220 return dispatch_decl;
25221 }
25222
25223 /* Make the resolver function decl to dispatch the versions of a multi-
25224 versioned function, DEFAULT_DECL. Create an empty basic block in the
25225 resolver and store the pointer in EMPTY_BB. Return the decl of the resolver
25226 function. */
25227
25228 static tree
25229 make_resolver_func (const tree default_decl,
25230 const tree dispatch_decl,
25231 basic_block *empty_bb)
25232 {
25233 /* Make the resolver function static. The resolver function returns
25234 void *. */
25235 tree decl_name = clone_function_name (default_decl, "resolver");
25236 const char *resolver_name = IDENTIFIER_POINTER (decl_name);
25237 tree type = build_function_type_list (ptr_type_node, NULL_TREE);
25238 tree decl = build_fn_decl (resolver_name, type);
25239 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
25240
25241 DECL_NAME (decl) = decl_name;
25242 TREE_USED (decl) = 1;
25243 DECL_ARTIFICIAL (decl) = 1;
25244 DECL_IGNORED_P (decl) = 0;
25245 TREE_PUBLIC (decl) = 0;
25246 DECL_UNINLINABLE (decl) = 1;
25247
25248 /* Resolver is not external, body is generated. */
25249 DECL_EXTERNAL (decl) = 0;
25250 DECL_EXTERNAL (dispatch_decl) = 0;
25251
25252 DECL_CONTEXT (decl) = NULL_TREE;
25253 DECL_INITIAL (decl) = make_node (BLOCK);
25254 DECL_STATIC_CONSTRUCTOR (decl) = 0;
25255
25256 if (DECL_COMDAT_GROUP (default_decl)
25257 || TREE_PUBLIC (default_decl))
25258 {
25259 /* In this case, each translation unit with a call to this
25260 versioned function will put out a resolver. Ensure it
25261 is comdat to keep just one copy. */
25262 DECL_COMDAT (decl) = 1;
25263 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
25264 }
25265 else
25266 TREE_PUBLIC (dispatch_decl) = 0;
25267
25268 /* Build result decl and add to function_decl. */
25269 tree t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
25270 DECL_CONTEXT (t) = decl;
25271 DECL_ARTIFICIAL (t) = 1;
25272 DECL_IGNORED_P (t) = 1;
25273 DECL_RESULT (decl) = t;
25274
25275 gimplify_function_tree (decl);
25276 push_cfun (DECL_STRUCT_FUNCTION (decl));
25277 *empty_bb = init_lowered_empty_function (decl, false,
25278 profile_count::uninitialized ());
25279
25280 cgraph_node::add_new_function (decl, true);
25281 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
25282
25283 pop_cfun ();
25284
25285 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
25286 DECL_ATTRIBUTES (dispatch_decl)
25287 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
25288
25289 cgraph_node::create_same_body_alias (dispatch_decl, decl);
25290
25291 return decl;
25292 }
25293
25294 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL to
25295 return a pointer to VERSION_DECL if we are running on a machine that
25296 supports the index CLONE_ISA hardware architecture bits. This function will
25297 be called during version dispatch to decide which function version to
25298 execute. It returns the basic block at the end, to which more conditions
25299 can be added. */
25300
25301 static basic_block
25302 add_condition_to_bb (tree function_decl, tree version_decl,
25303 int clone_isa, basic_block new_bb)
25304 {
25305 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
25306
25307 gcc_assert (new_bb != NULL);
25308 gimple_seq gseq = bb_seq (new_bb);
25309
25310
25311 tree convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
25312 build_fold_addr_expr (version_decl));
25313 tree result_var = create_tmp_var (ptr_type_node);
25314 gimple *convert_stmt = gimple_build_assign (result_var, convert_expr);
25315 gimple *return_stmt = gimple_build_return (result_var);
25316
25317 if (clone_isa == CLONE_DEFAULT)
25318 {
25319 gimple_seq_add_stmt (&gseq, convert_stmt);
25320 gimple_seq_add_stmt (&gseq, return_stmt);
25321 set_bb_seq (new_bb, gseq);
25322 gimple_set_bb (convert_stmt, new_bb);
25323 gimple_set_bb (return_stmt, new_bb);
25324 pop_cfun ();
25325 return new_bb;
25326 }
25327
25328 tree bool_zero = build_int_cst (bool_int_type_node, 0);
25329 tree cond_var = create_tmp_var (bool_int_type_node);
25330 tree predicate_decl = rs6000_builtin_decls[(int) RS6000_BIF_CPU_SUPPORTS];
25331 const char *arg_str = rs6000_clone_map[clone_isa].name;
25332 tree predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
25333 gimple *call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
25334 gimple_call_set_lhs (call_cond_stmt, cond_var);
25335
25336 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
25337 gimple_set_bb (call_cond_stmt, new_bb);
25338 gimple_seq_add_stmt (&gseq, call_cond_stmt);
25339
25340 gimple *if_else_stmt = gimple_build_cond (NE_EXPR, cond_var, bool_zero,
25341 NULL_TREE, NULL_TREE);
25342 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
25343 gimple_set_bb (if_else_stmt, new_bb);
25344 gimple_seq_add_stmt (&gseq, if_else_stmt);
25345
25346 gimple_seq_add_stmt (&gseq, convert_stmt);
25347 gimple_seq_add_stmt (&gseq, return_stmt);
25348 set_bb_seq (new_bb, gseq);
25349
25350 basic_block bb1 = new_bb;
25351 edge e12 = split_block (bb1, if_else_stmt);
25352 basic_block bb2 = e12->dest;
25353 e12->flags &= ~EDGE_FALLTHRU;
25354 e12->flags |= EDGE_TRUE_VALUE;
25355
25356 edge e23 = split_block (bb2, return_stmt);
25357 gimple_set_bb (convert_stmt, bb2);
25358 gimple_set_bb (return_stmt, bb2);
25359
25360 basic_block bb3 = e23->dest;
25361 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
25362
25363 remove_edge (e23);
25364 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
25365
25366 pop_cfun ();
25367 return bb3;
25368 }
25369
25370 /* This function generates the dispatch function for multi-versioned functions.
25371 DISPATCH_DECL is the function which will contain the dispatch logic.
25372 FNDECLS are the function choices for dispatch, and is a tree chain.
25373 EMPTY_BB is the basic block pointer in DISPATCH_DECL in which the dispatch
25374 code is generated. */
25375
25376 static int
25377 dispatch_function_versions (tree dispatch_decl,
25378 void *fndecls_p,
25379 basic_block *empty_bb)
25380 {
25381 int ix;
25382 tree ele;
25383 vec<tree> *fndecls;
25384 tree clones[CLONE_MAX];
25385
25386 if (TARGET_DEBUG_TARGET)
25387 fputs ("dispatch_function_versions, top\n", stderr);
25388
25389 gcc_assert (dispatch_decl != NULL
25390 && fndecls_p != NULL
25391 && empty_bb != NULL);
25392
25393 /* fndecls_p is actually a vector. */
25394 fndecls = static_cast<vec<tree> *> (fndecls_p);
25395
25396 /* At least one more version other than the default. */
25397 gcc_assert (fndecls->length () >= 2);
25398
25399 /* The first version in the vector is the default decl. */
25400 memset ((void *) clones, '\0', sizeof (clones));
25401 clones[CLONE_DEFAULT] = (*fndecls)[0];
25402
25403 /* On the PowerPC, we do not need to call __builtin_cpu_init, which is a NOP
25404 on the PowerPC (on the x86_64, it is not a NOP). The builtin function
25405 __builtin_cpu_support ensures that the TOC fields are setup by requiring a
25406 recent glibc. If we ever need to call __builtin_cpu_init, we would need
25407 to insert the code here to do the call. */
25408
25409 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
25410 {
25411 int priority = rs6000_clone_priority (ele);
25412 if (!clones[priority])
25413 clones[priority] = ele;
25414 }
25415
25416 for (ix = CLONE_MAX - 1; ix >= 0; ix--)
25417 if (clones[ix])
25418 {
25419 if (TARGET_DEBUG_TARGET)
25420 fprintf (stderr, "dispatch_function_versions, clone %d, %s\n",
25421 ix, get_decl_name (clones[ix]));
25422
25423 *empty_bb = add_condition_to_bb (dispatch_decl, clones[ix], ix,
25424 *empty_bb);
25425 }
25426
25427 return 0;
25428 }
25429
25430 /* Generate the dispatching code body to dispatch multi-versioned function
25431 DECL. The target hook is called to process the "target" attributes and
25432 provide the code to dispatch the right function at run-time. NODE points
25433 to the dispatcher decl whose body will be created. */
25434
25435 static tree
25436 rs6000_generate_version_dispatcher_body (void *node_p)
25437 {
25438 tree resolver;
25439 basic_block empty_bb;
25440 struct cgraph_node *node = (cgraph_node *) node_p;
25441 struct cgraph_function_version_info *ninfo = node->function_version ();
25442
25443 if (ninfo->dispatcher_resolver)
25444 return ninfo->dispatcher_resolver;
25445
25446 /* node is going to be an alias, so remove the finalized bit. */
25447 node->definition = false;
25448
25449 /* The first version in the chain corresponds to the default version. */
25450 ninfo->dispatcher_resolver = resolver
25451 = make_resolver_func (ninfo->next->this_node->decl, node->decl, &empty_bb);
25452
25453 if (TARGET_DEBUG_TARGET)
25454 fprintf (stderr, "rs6000_get_function_versions_dispatcher, %s\n",
25455 get_decl_name (resolver));
25456
25457 push_cfun (DECL_STRUCT_FUNCTION (resolver));
25458 auto_vec<tree, 2> fn_ver_vec;
25459
25460 for (struct cgraph_function_version_info *vinfo = ninfo->next;
25461 vinfo;
25462 vinfo = vinfo->next)
25463 {
25464 struct cgraph_node *version = vinfo->this_node;
25465 /* Check for virtual functions here again, as by this time it should
25466 have been determined if this function needs a vtable index or
25467 not. This happens for methods in derived classes that override
25468 virtual methods in base classes but are not explicitly marked as
25469 virtual. */
25470 if (DECL_VINDEX (version->decl))
25471 sorry ("Virtual function multiversioning not supported");
25472
25473 fn_ver_vec.safe_push (version->decl);
25474 }
25475
25476 dispatch_function_versions (resolver, &fn_ver_vec, &empty_bb);
25477 cgraph_edge::rebuild_edges ();
25478 pop_cfun ();
25479 return resolver;
25480 }
25481
25482 /* Hook to decide if we need to scan function gimple statements to
25483 collect target specific information for inlining, and update the
25484 corresponding RS6000_FN_TARGET_INFO_* bit in INFO if we are able
25485 to predict which ISA feature is used at this time. Return true
25486 if we need to scan, otherwise return false. */
25487
25488 static bool
25489 rs6000_need_ipa_fn_target_info (const_tree decl,
25490 unsigned int &info ATTRIBUTE_UNUSED)
25491 {
25492 tree target = DECL_FUNCTION_SPECIFIC_TARGET (decl);
25493 if (!target)
25494 target = target_option_default_node;
25495 struct cl_target_option *opts = TREE_TARGET_OPTION (target);
25496
25497 /* See PR102059, we only handle HTM for now, so will only do
25498 the consequent scannings when HTM feature enabled. */
25499 if (opts->x_rs6000_isa_flags & OPTION_MASK_HTM)
25500 return true;
25501
25502 return false;
25503 }
25504
25505 /* Hook to update target specific information INFO for inlining by
25506 checking the given STMT. Return false if we don't need to scan
25507 any more, otherwise return true. */
25508
25509 static bool
25510 rs6000_update_ipa_fn_target_info (unsigned int &info, const gimple *stmt)
25511 {
25512 /* Assume inline asm can use any instruction features. */
25513 if (gimple_code (stmt) == GIMPLE_ASM)
25514 {
25515 const char *asm_str = gimple_asm_string (as_a<const gasm *> (stmt));
25516 /* Ignore empty inline asm string. */
25517 if (strlen (asm_str) > 0)
25518 /* Should set any bits we concerned, for now OPTION_MASK_HTM is
25519 the only bit we care about. */
25520 info |= RS6000_FN_TARGET_INFO_HTM;
25521 return false;
25522 }
25523 else if (gimple_code (stmt) == GIMPLE_CALL)
25524 {
25525 tree fndecl = gimple_call_fndecl (stmt);
25526 if (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD))
25527 {
25528 enum rs6000_gen_builtins fcode
25529 = (enum rs6000_gen_builtins) DECL_MD_FUNCTION_CODE (fndecl);
25530 /* HTM bifs definitely exploit HTM insns. */
25531 if (bif_is_htm (rs6000_builtin_info[fcode]))
25532 {
25533 info |= RS6000_FN_TARGET_INFO_HTM;
25534 return false;
25535 }
25536 }
25537 }
25538
25539 return true;
25540 }
25541
25542 /* Hook to determine if one function can safely inline another. */
25543
25544 static bool
25545 rs6000_can_inline_p (tree caller, tree callee)
25546 {
25547 bool ret = false;
25548 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
25549 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
25550
25551 /* If the caller/callee has option attributes, then use them.
25552 Otherwise, use the command line options. */
25553 if (!callee_tree)
25554 callee_tree = target_option_default_node;
25555 if (!caller_tree)
25556 caller_tree = target_option_default_node;
25557
25558 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
25559 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
25560
25561 HOST_WIDE_INT callee_isa = callee_opts->x_rs6000_isa_flags;
25562 HOST_WIDE_INT caller_isa = caller_opts->x_rs6000_isa_flags;
25563 HOST_WIDE_INT explicit_isa = callee_opts->x_rs6000_isa_flags_explicit;
25564
25565 cgraph_node *callee_node = cgraph_node::get (callee);
25566 if (ipa_fn_summaries && ipa_fn_summaries->get (callee_node) != NULL)
25567 {
25568 unsigned int info = ipa_fn_summaries->get (callee_node)->target_info;
25569 if ((info & RS6000_FN_TARGET_INFO_HTM) == 0)
25570 {
25571 callee_isa &= ~OPTION_MASK_HTM;
25572 explicit_isa &= ~OPTION_MASK_HTM;
25573 }
25574 }
25575
25576 /* Ignore -mpower8-fusion and -mpower10-fusion options for inlining
25577 purposes. */
25578 callee_isa &= ~(OPTION_MASK_P8_FUSION | OPTION_MASK_P10_FUSION);
25579 explicit_isa &= ~(OPTION_MASK_P8_FUSION | OPTION_MASK_P10_FUSION);
25580
25581 /* The callee's options must be a subset of the caller's options, i.e.
25582 a vsx function may inline an altivec function, but a no-vsx function
25583 must not inline a vsx function. However, for those options that the
25584 callee has explicitly enabled or disabled, then we must enforce that
25585 the callee's and caller's options match exactly; see PR70010. */
25586 if (((caller_isa & callee_isa) == callee_isa)
25587 && (caller_isa & explicit_isa) == (callee_isa & explicit_isa))
25588 ret = true;
25589
25590 if (TARGET_DEBUG_TARGET)
25591 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
25592 get_decl_name (caller), get_decl_name (callee),
25593 (ret ? "can" : "cannot"));
25594
25595 return ret;
25596 }
25597 \f
25598 /* Allocate a stack temp and fixup the address so it meets the particular
25599 memory requirements (either offetable or REG+REG addressing). */
25600
25601 rtx
25602 rs6000_allocate_stack_temp (machine_mode mode,
25603 bool offsettable_p,
25604 bool reg_reg_p)
25605 {
25606 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
25607 rtx addr = XEXP (stack, 0);
25608 int strict_p = reload_completed;
25609
25610 if (!legitimate_indirect_address_p (addr, strict_p))
25611 {
25612 if (offsettable_p
25613 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
25614 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
25615
25616 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
25617 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
25618 }
25619
25620 return stack;
25621 }
25622
25623 /* Given a memory reference, if it is not a reg or reg+reg addressing,
25624 convert to such a form to deal with memory reference instructions
25625 like STFIWX and LDBRX that only take reg+reg addressing. */
25626
25627 rtx
25628 rs6000_force_indexed_or_indirect_mem (rtx x)
25629 {
25630 machine_mode mode = GET_MODE (x);
25631
25632 gcc_assert (MEM_P (x));
25633 if (can_create_pseudo_p () && !indexed_or_indirect_operand (x, mode))
25634 {
25635 rtx addr = XEXP (x, 0);
25636 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
25637 {
25638 rtx reg = XEXP (addr, 0);
25639 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
25640 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
25641 gcc_assert (REG_P (reg));
25642 emit_insn (gen_add3_insn (reg, reg, size_rtx));
25643 addr = reg;
25644 }
25645 else if (GET_CODE (addr) == PRE_MODIFY)
25646 {
25647 rtx reg = XEXP (addr, 0);
25648 rtx expr = XEXP (addr, 1);
25649 gcc_assert (REG_P (reg));
25650 gcc_assert (GET_CODE (expr) == PLUS);
25651 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
25652 addr = reg;
25653 }
25654
25655 if (GET_CODE (addr) == PLUS)
25656 {
25657 rtx op0 = XEXP (addr, 0);
25658 rtx op1 = XEXP (addr, 1);
25659 op0 = force_reg (Pmode, op0);
25660 op1 = force_reg (Pmode, op1);
25661 x = replace_equiv_address (x, gen_rtx_PLUS (Pmode, op0, op1));
25662 }
25663 else
25664 x = replace_equiv_address (x, force_reg (Pmode, addr));
25665 }
25666
25667 return x;
25668 }
25669
25670 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
25671
25672 On the RS/6000, all integer constants are acceptable, most won't be valid
25673 for particular insns, though. Only easy FP constants are acceptable. */
25674
25675 static bool
25676 rs6000_legitimate_constant_p (machine_mode mode, rtx x)
25677 {
25678 if (TARGET_ELF && tls_referenced_p (x))
25679 return false;
25680
25681 if (CONST_DOUBLE_P (x))
25682 return easy_fp_constant (x, mode);
25683
25684 if (GET_CODE (x) == CONST_VECTOR)
25685 return easy_vector_constant (x, mode);
25686
25687 return true;
25688 }
25689
25690 #if TARGET_AIX_OS
25691 /* Implement TARGET_PRECOMPUTE_TLS_P.
25692
25693 On the AIX, TLS symbols are in the TOC, which is maintained in the
25694 constant pool. AIX TOC TLS symbols need to be pre-computed, but
25695 must be considered legitimate constants. */
25696
25697 static bool
25698 rs6000_aix_precompute_tls_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
25699 {
25700 return tls_referenced_p (x);
25701 }
25702 #endif
25703
25704 \f
25705 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
25706
25707 static bool
25708 chain_already_loaded (rtx_insn *last)
25709 {
25710 for (; last != NULL; last = PREV_INSN (last))
25711 {
25712 if (NONJUMP_INSN_P (last))
25713 {
25714 rtx patt = PATTERN (last);
25715
25716 if (GET_CODE (patt) == SET)
25717 {
25718 rtx lhs = XEXP (patt, 0);
25719
25720 if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM)
25721 return true;
25722 }
25723 }
25724 }
25725 return false;
25726 }
25727
25728 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
25729
25730 void
25731 rs6000_call_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
25732 {
25733 rtx func = func_desc;
25734 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
25735 rtx toc_load = NULL_RTX;
25736 rtx toc_restore = NULL_RTX;
25737 rtx func_addr;
25738 rtx abi_reg = NULL_RTX;
25739 rtx call[5];
25740 int n_call;
25741 rtx insn;
25742 bool is_pltseq_longcall;
25743
25744 if (global_tlsarg)
25745 tlsarg = global_tlsarg;
25746
25747 /* Handle longcall attributes. */
25748 is_pltseq_longcall = false;
25749 if ((INTVAL (cookie) & CALL_LONG) != 0
25750 && GET_CODE (func_desc) == SYMBOL_REF)
25751 {
25752 func = rs6000_longcall_ref (func_desc, tlsarg);
25753 if (TARGET_PLTSEQ)
25754 is_pltseq_longcall = true;
25755 }
25756
25757 /* Handle indirect calls. */
25758 if (!SYMBOL_REF_P (func)
25759 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func)))
25760 {
25761 if (!rs6000_pcrel_p ())
25762 {
25763 /* Save the TOC into its reserved slot before the call,
25764 and prepare to restore it after the call. */
25765 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
25766 rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode,
25767 gen_rtvec (1, stack_toc_offset),
25768 UNSPEC_TOCSLOT);
25769 toc_restore = gen_rtx_SET (toc_reg, stack_toc_unspec);
25770
25771 /* Can we optimize saving the TOC in the prologue or
25772 do we need to do it at every call? */
25773 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
25774 cfun->machine->save_toc_in_prologue = true;
25775 else
25776 {
25777 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
25778 rtx stack_toc_mem = gen_frame_mem (Pmode,
25779 gen_rtx_PLUS (Pmode, stack_ptr,
25780 stack_toc_offset));
25781 MEM_VOLATILE_P (stack_toc_mem) = 1;
25782 if (is_pltseq_longcall)
25783 {
25784 rtvec v = gen_rtvec (3, toc_reg, func_desc, tlsarg);
25785 rtx mark_toc_reg = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25786 emit_insn (gen_rtx_SET (stack_toc_mem, mark_toc_reg));
25787 }
25788 else
25789 emit_move_insn (stack_toc_mem, toc_reg);
25790 }
25791 }
25792
25793 if (DEFAULT_ABI == ABI_ELFv2)
25794 {
25795 /* A function pointer in the ELFv2 ABI is just a plain address, but
25796 the ABI requires it to be loaded into r12 before the call. */
25797 func_addr = gen_rtx_REG (Pmode, 12);
25798 emit_move_insn (func_addr, func);
25799 abi_reg = func_addr;
25800 /* Indirect calls via CTR are strongly preferred over indirect
25801 calls via LR, so move the address there. Needed to mark
25802 this insn for linker plt sequence editing too. */
25803 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
25804 if (is_pltseq_longcall)
25805 {
25806 rtvec v = gen_rtvec (3, abi_reg, func_desc, tlsarg);
25807 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25808 emit_insn (gen_rtx_SET (func_addr, mark_func));
25809 v = gen_rtvec (2, func_addr, func_desc);
25810 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25811 }
25812 else
25813 emit_move_insn (func_addr, abi_reg);
25814 }
25815 else
25816 {
25817 /* A function pointer under AIX is a pointer to a data area whose
25818 first word contains the actual address of the function, whose
25819 second word contains a pointer to its TOC, and whose third word
25820 contains a value to place in the static chain register (r11).
25821 Note that if we load the static chain, our "trampoline" need
25822 not have any executable code. */
25823
25824 /* Load up address of the actual function. */
25825 func = force_reg (Pmode, func);
25826 func_addr = gen_reg_rtx (Pmode);
25827 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func));
25828
25829 /* Indirect calls via CTR are strongly preferred over indirect
25830 calls via LR, so move the address there. */
25831 rtx ctr_reg = gen_rtx_REG (Pmode, CTR_REGNO);
25832 emit_move_insn (ctr_reg, func_addr);
25833 func_addr = ctr_reg;
25834
25835 /* Prepare to load the TOC of the called function. Note that the
25836 TOC load must happen immediately before the actual call so
25837 that unwinding the TOC registers works correctly. See the
25838 comment in frob_update_context. */
25839 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
25840 rtx func_toc_mem = gen_rtx_MEM (Pmode,
25841 gen_rtx_PLUS (Pmode, func,
25842 func_toc_offset));
25843 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
25844
25845 /* If we have a static chain, load it up. But, if the call was
25846 originally direct, the 3rd word has not been written since no
25847 trampoline has been built, so we ought not to load it, lest we
25848 override a static chain value. */
25849 if (!(GET_CODE (func_desc) == SYMBOL_REF
25850 && SYMBOL_REF_FUNCTION_P (func_desc))
25851 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
25852 && !chain_already_loaded (get_current_sequence ()->next->last))
25853 {
25854 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
25855 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
25856 rtx func_sc_mem = gen_rtx_MEM (Pmode,
25857 gen_rtx_PLUS (Pmode, func,
25858 func_sc_offset));
25859 emit_move_insn (sc_reg, func_sc_mem);
25860 abi_reg = sc_reg;
25861 }
25862 }
25863 }
25864 else
25865 {
25866 /* No TOC register needed for calls from PC-relative callers. */
25867 if (!rs6000_pcrel_p ())
25868 /* Direct calls use the TOC: for local calls, the callee will
25869 assume the TOC register is set; for non-local calls, the
25870 PLT stub needs the TOC register. */
25871 abi_reg = toc_reg;
25872 func_addr = func;
25873 }
25874
25875 /* Create the call. */
25876 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
25877 if (value != NULL_RTX)
25878 call[0] = gen_rtx_SET (value, call[0]);
25879 call[1] = gen_rtx_USE (VOIDmode, cookie);
25880 n_call = 2;
25881
25882 if (toc_load)
25883 call[n_call++] = toc_load;
25884 if (toc_restore)
25885 call[n_call++] = toc_restore;
25886
25887 call[n_call++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
25888
25889 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
25890 insn = emit_call_insn (insn);
25891
25892 /* Mention all registers defined by the ABI to hold information
25893 as uses in CALL_INSN_FUNCTION_USAGE. */
25894 if (abi_reg)
25895 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
25896 }
25897
25898 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
25899
25900 void
25901 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
25902 {
25903 rtx call[2];
25904 rtx insn;
25905 rtx r12 = NULL_RTX;
25906 rtx func_addr = func_desc;
25907
25908 if (global_tlsarg)
25909 tlsarg = global_tlsarg;
25910
25911 /* Handle longcall attributes. */
25912 if (INTVAL (cookie) & CALL_LONG && SYMBOL_REF_P (func_desc))
25913 {
25914 /* PCREL can do a sibling call to a longcall function
25915 because we don't need to restore the TOC register. */
25916 gcc_assert (rs6000_pcrel_p ());
25917 func_desc = rs6000_longcall_ref (func_desc, tlsarg);
25918 }
25919 else
25920 gcc_assert (INTVAL (cookie) == 0);
25921
25922 /* For ELFv2, r12 and CTR need to hold the function address
25923 for an indirect call. */
25924 if (GET_CODE (func_desc) != SYMBOL_REF && DEFAULT_ABI == ABI_ELFv2)
25925 {
25926 r12 = gen_rtx_REG (Pmode, 12);
25927 emit_move_insn (r12, func_desc);
25928 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
25929 emit_move_insn (func_addr, r12);
25930 }
25931
25932 /* Create the call. */
25933 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
25934 if (value != NULL_RTX)
25935 call[0] = gen_rtx_SET (value, call[0]);
25936
25937 call[1] = simple_return_rtx;
25938
25939 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
25940 insn = emit_call_insn (insn);
25941
25942 /* Note use of the TOC register. */
25943 if (!rs6000_pcrel_p ())
25944 use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
25945 gen_rtx_REG (Pmode, TOC_REGNUM));
25946
25947 /* Note use of r12. */
25948 if (r12)
25949 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r12);
25950 }
25951
25952 /* Expand code to perform a call under the SYSV4 ABI. */
25953
25954 void
25955 rs6000_call_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
25956 {
25957 rtx func = func_desc;
25958 rtx func_addr;
25959 rtx call[4];
25960 rtx insn;
25961 rtx abi_reg = NULL_RTX;
25962 int n;
25963
25964 if (global_tlsarg)
25965 tlsarg = global_tlsarg;
25966
25967 /* Handle longcall attributes. */
25968 if ((INTVAL (cookie) & CALL_LONG) != 0
25969 && GET_CODE (func_desc) == SYMBOL_REF)
25970 {
25971 func = rs6000_longcall_ref (func_desc, tlsarg);
25972 /* If the longcall was implemented as an inline PLT call using
25973 PLT unspecs then func will be REG:r11. If not, func will be
25974 a pseudo reg. The inline PLT call sequence supports lazy
25975 linking (and longcalls to functions in dlopen'd libraries).
25976 The other style of longcalls don't. The lazy linking entry
25977 to the dynamic symbol resolver requires r11 be the function
25978 address (as it is for linker generated PLT stubs). Ensure
25979 r11 stays valid to the bctrl by marking r11 used by the call. */
25980 if (TARGET_PLTSEQ)
25981 abi_reg = func;
25982 }
25983
25984 /* Handle indirect calls. */
25985 if (GET_CODE (func) != SYMBOL_REF)
25986 {
25987 func = force_reg (Pmode, func);
25988
25989 /* Indirect calls via CTR are strongly preferred over indirect
25990 calls via LR, so move the address there. That can't be left
25991 to reload because we want to mark every instruction in an
25992 inline PLT call sequence with a reloc, enabling the linker to
25993 edit the sequence back to a direct call when that makes sense. */
25994 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
25995 if (abi_reg)
25996 {
25997 rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
25998 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25999 emit_insn (gen_rtx_SET (func_addr, mark_func));
26000 v = gen_rtvec (2, func_addr, func_desc);
26001 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
26002 }
26003 else
26004 emit_move_insn (func_addr, func);
26005 }
26006 else
26007 func_addr = func;
26008
26009 /* Create the call. */
26010 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
26011 if (value != NULL_RTX)
26012 call[0] = gen_rtx_SET (value, call[0]);
26013
26014 call[1] = gen_rtx_USE (VOIDmode, cookie);
26015 n = 2;
26016 if (TARGET_SECURE_PLT
26017 && flag_pic
26018 && GET_CODE (func_addr) == SYMBOL_REF
26019 && !SYMBOL_REF_LOCAL_P (func_addr))
26020 call[n++] = gen_rtx_USE (VOIDmode, pic_offset_table_rtx);
26021
26022 call[n++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
26023
26024 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n, call));
26025 insn = emit_call_insn (insn);
26026 if (abi_reg)
26027 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
26028 }
26029
26030 /* Expand code to perform a sibling call under the SysV4 ABI. */
26031
26032 void
26033 rs6000_sibcall_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
26034 {
26035 rtx func = func_desc;
26036 rtx func_addr;
26037 rtx call[3];
26038 rtx insn;
26039 rtx abi_reg = NULL_RTX;
26040
26041 if (global_tlsarg)
26042 tlsarg = global_tlsarg;
26043
26044 /* Handle longcall attributes. */
26045 if ((INTVAL (cookie) & CALL_LONG) != 0
26046 && GET_CODE (func_desc) == SYMBOL_REF)
26047 {
26048 func = rs6000_longcall_ref (func_desc, tlsarg);
26049 /* If the longcall was implemented as an inline PLT call using
26050 PLT unspecs then func will be REG:r11. If not, func will be
26051 a pseudo reg. The inline PLT call sequence supports lazy
26052 linking (and longcalls to functions in dlopen'd libraries).
26053 The other style of longcalls don't. The lazy linking entry
26054 to the dynamic symbol resolver requires r11 be the function
26055 address (as it is for linker generated PLT stubs). Ensure
26056 r11 stays valid to the bctr by marking r11 used by the call. */
26057 if (TARGET_PLTSEQ)
26058 abi_reg = func;
26059 }
26060
26061 /* Handle indirect calls. */
26062 if (GET_CODE (func) != SYMBOL_REF)
26063 {
26064 func = force_reg (Pmode, func);
26065
26066 /* Indirect sibcalls must go via CTR. That can't be left to
26067 reload because we want to mark every instruction in an inline
26068 PLT call sequence with a reloc, enabling the linker to edit
26069 the sequence back to a direct call when that makes sense. */
26070 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
26071 if (abi_reg)
26072 {
26073 rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
26074 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
26075 emit_insn (gen_rtx_SET (func_addr, mark_func));
26076 v = gen_rtvec (2, func_addr, func_desc);
26077 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
26078 }
26079 else
26080 emit_move_insn (func_addr, func);
26081 }
26082 else
26083 func_addr = func;
26084
26085 /* Create the call. */
26086 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
26087 if (value != NULL_RTX)
26088 call[0] = gen_rtx_SET (value, call[0]);
26089
26090 call[1] = gen_rtx_USE (VOIDmode, cookie);
26091 call[2] = simple_return_rtx;
26092
26093 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
26094 insn = emit_call_insn (insn);
26095 if (abi_reg)
26096 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
26097 }
26098
26099 #if TARGET_MACHO
26100
26101 /* Expand code to perform a call under the Darwin ABI.
26102 Modulo handling of mlongcall, this is much the same as sysv.
26103 if/when the longcall optimisation is removed, we could drop this
26104 code and use the sysv case (taking care to avoid the tls stuff).
26105
26106 We can use this for sibcalls too, if needed. */
26107
26108 void
26109 rs6000_call_darwin_1 (rtx value, rtx func_desc, rtx tlsarg,
26110 rtx cookie, bool sibcall)
26111 {
26112 rtx func = func_desc;
26113 rtx func_addr;
26114 rtx call[3];
26115 rtx insn;
26116 int cookie_val = INTVAL (cookie);
26117 bool make_island = false;
26118
26119 /* Handle longcall attributes, there are two cases for Darwin:
26120 1) Newer linkers are capable of synthesising any branch islands needed.
26121 2) We need a helper branch island synthesised by the compiler.
26122 The second case has mostly been retired and we don't use it for m64.
26123 In fact, it's is an optimisation, we could just indirect as sysv does..
26124 ... however, backwards compatibility for now.
26125 If we're going to use this, then we need to keep the CALL_LONG bit set,
26126 so that we can pick up the special insn form later. */
26127 if ((cookie_val & CALL_LONG) != 0
26128 && GET_CODE (func_desc) == SYMBOL_REF)
26129 {
26130 /* FIXME: the longcall opt should not hang off this flag, it is most
26131 likely incorrect for kernel-mode code-generation. */
26132 if (darwin_symbol_stubs && TARGET_32BIT)
26133 make_island = true; /* Do nothing yet, retain the CALL_LONG flag. */
26134 else
26135 {
26136 /* The linker is capable of doing this, but the user explicitly
26137 asked for -mlongcall, so we'll do the 'normal' version. */
26138 func = rs6000_longcall_ref (func_desc, NULL_RTX);
26139 cookie_val &= ~CALL_LONG; /* Handled, zap it. */
26140 }
26141 }
26142
26143 /* Handle indirect calls. */
26144 if (GET_CODE (func) != SYMBOL_REF)
26145 {
26146 func = force_reg (Pmode, func);
26147
26148 /* Indirect calls via CTR are strongly preferred over indirect
26149 calls via LR, and are required for indirect sibcalls, so move
26150 the address there. */
26151 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
26152 emit_move_insn (func_addr, func);
26153 }
26154 else
26155 func_addr = func;
26156
26157 /* Create the call. */
26158 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
26159 if (value != NULL_RTX)
26160 call[0] = gen_rtx_SET (value, call[0]);
26161
26162 call[1] = gen_rtx_USE (VOIDmode, GEN_INT (cookie_val));
26163
26164 if (sibcall)
26165 call[2] = simple_return_rtx;
26166 else
26167 call[2] = gen_hard_reg_clobber (Pmode, LR_REGNO);
26168
26169 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
26170 insn = emit_call_insn (insn);
26171 /* Now we have the debug info in the insn, we can set up the branch island
26172 if we're using one. */
26173 if (make_island)
26174 {
26175 tree funname = get_identifier (XSTR (func_desc, 0));
26176
26177 if (no_previous_def (funname))
26178 {
26179 rtx label_rtx = gen_label_rtx ();
26180 char *label_buf, temp_buf[256];
26181 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
26182 CODE_LABEL_NUMBER (label_rtx));
26183 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
26184 tree labelname = get_identifier (label_buf);
26185 add_compiler_branch_island (labelname, funname,
26186 insn_line ((const rtx_insn*)insn));
26187 }
26188 }
26189 }
26190 #endif
26191
26192 void
26193 rs6000_call_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
26194 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
26195 {
26196 #if TARGET_MACHO
26197 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, false);
26198 #else
26199 gcc_unreachable();
26200 #endif
26201 }
26202
26203
26204 void
26205 rs6000_sibcall_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
26206 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
26207 {
26208 #if TARGET_MACHO
26209 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, true);
26210 #else
26211 gcc_unreachable();
26212 #endif
26213 }
26214
26215 /* Return whether we should generate PC-relative code for FNDECL. */
26216 bool
26217 rs6000_fndecl_pcrel_p (const_tree fndecl)
26218 {
26219 if (DEFAULT_ABI != ABI_ELFv2)
26220 return false;
26221
26222 struct cl_target_option *opts = target_opts_for_fn (fndecl);
26223
26224 return ((opts->x_rs6000_isa_flags & OPTION_MASK_PCREL) != 0
26225 && TARGET_CMODEL == CMODEL_MEDIUM);
26226 }
26227
26228 /* Return whether we should generate PC-relative code for *FN. */
26229 bool
26230 rs6000_function_pcrel_p (struct function *fn)
26231 {
26232 if (DEFAULT_ABI != ABI_ELFv2)
26233 return false;
26234
26235 /* Optimize usual case. */
26236 if (fn == cfun)
26237 return ((rs6000_isa_flags & OPTION_MASK_PCREL) != 0
26238 && TARGET_CMODEL == CMODEL_MEDIUM);
26239
26240 return rs6000_fndecl_pcrel_p (fn->decl);
26241 }
26242
26243 /* Return whether we should generate PC-relative code for the current
26244 function. */
26245 bool
26246 rs6000_pcrel_p ()
26247 {
26248 return (DEFAULT_ABI == ABI_ELFv2
26249 && (rs6000_isa_flags & OPTION_MASK_PCREL) != 0
26250 && TARGET_CMODEL == CMODEL_MEDIUM);
26251 }
26252
26253 \f
26254 /* Given an address (ADDR), a mode (MODE), and what the format of the
26255 non-prefixed address (NON_PREFIXED_FORMAT) is, return the instruction format
26256 for the address. */
26257
26258 enum insn_form
26259 address_to_insn_form (rtx addr,
26260 machine_mode mode,
26261 enum non_prefixed_form non_prefixed_format)
26262 {
26263 /* Single register is easy. */
26264 if (REG_P (addr) || SUBREG_P (addr))
26265 return INSN_FORM_BASE_REG;
26266
26267 /* If the non prefixed instruction format doesn't support offset addressing,
26268 make sure only indexed addressing is allowed.
26269
26270 We special case SDmode so that the register allocator does not try to move
26271 SDmode through GPR registers, but instead uses the 32-bit integer load and
26272 store instructions for the floating point registers. */
26273 if (non_prefixed_format == NON_PREFIXED_X || (mode == SDmode && TARGET_DFP))
26274 {
26275 if (GET_CODE (addr) != PLUS)
26276 return INSN_FORM_BAD;
26277
26278 rtx op0 = XEXP (addr, 0);
26279 rtx op1 = XEXP (addr, 1);
26280 if (!REG_P (op0) && !SUBREG_P (op0))
26281 return INSN_FORM_BAD;
26282
26283 if (!REG_P (op1) && !SUBREG_P (op1))
26284 return INSN_FORM_BAD;
26285
26286 return INSN_FORM_X;
26287 }
26288
26289 /* Deal with update forms. */
26290 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
26291 return INSN_FORM_UPDATE;
26292
26293 /* Handle PC-relative symbols and labels. Check for both local and
26294 external symbols. Assume labels are always local. TLS symbols
26295 are not PC-relative for rs6000. */
26296 if (TARGET_PCREL)
26297 {
26298 if (LABEL_REF_P (addr))
26299 return INSN_FORM_PCREL_LOCAL;
26300
26301 if (SYMBOL_REF_P (addr) && !SYMBOL_REF_TLS_MODEL (addr))
26302 {
26303 if (!SYMBOL_REF_LOCAL_P (addr))
26304 return INSN_FORM_PCREL_EXTERNAL;
26305 else
26306 return INSN_FORM_PCREL_LOCAL;
26307 }
26308 }
26309
26310 if (GET_CODE (addr) == CONST)
26311 addr = XEXP (addr, 0);
26312
26313 /* Recognize LO_SUM addresses used with TOC and 32-bit addressing. */
26314 if (GET_CODE (addr) == LO_SUM)
26315 return INSN_FORM_LO_SUM;
26316
26317 /* Everything below must be an offset address of some form. */
26318 if (GET_CODE (addr) != PLUS)
26319 return INSN_FORM_BAD;
26320
26321 rtx op0 = XEXP (addr, 0);
26322 rtx op1 = XEXP (addr, 1);
26323
26324 /* Check for indexed addresses. */
26325 if (REG_P (op1) || SUBREG_P (op1))
26326 {
26327 if (REG_P (op0) || SUBREG_P (op0))
26328 return INSN_FORM_X;
26329
26330 return INSN_FORM_BAD;
26331 }
26332
26333 if (!CONST_INT_P (op1))
26334 return INSN_FORM_BAD;
26335
26336 HOST_WIDE_INT offset = INTVAL (op1);
26337 if (!SIGNED_INTEGER_34BIT_P (offset))
26338 return INSN_FORM_BAD;
26339
26340 /* Check for local and external PC-relative addresses. Labels are always
26341 local. TLS symbols are not PC-relative for rs6000. */
26342 if (TARGET_PCREL)
26343 {
26344 if (LABEL_REF_P (op0))
26345 return INSN_FORM_PCREL_LOCAL;
26346
26347 if (SYMBOL_REF_P (op0) && !SYMBOL_REF_TLS_MODEL (op0))
26348 {
26349 if (!SYMBOL_REF_LOCAL_P (op0))
26350 return INSN_FORM_PCREL_EXTERNAL;
26351 else
26352 return INSN_FORM_PCREL_LOCAL;
26353 }
26354 }
26355
26356 /* If it isn't PC-relative, the address must use a base register. */
26357 if (!REG_P (op0) && !SUBREG_P (op0))
26358 return INSN_FORM_BAD;
26359
26360 /* Large offsets must be prefixed. */
26361 if (!SIGNED_INTEGER_16BIT_P (offset))
26362 {
26363 if (TARGET_PREFIXED)
26364 return INSN_FORM_PREFIXED_NUMERIC;
26365
26366 return INSN_FORM_BAD;
26367 }
26368
26369 /* We have a 16-bit offset, see what default instruction format to use. */
26370 if (non_prefixed_format == NON_PREFIXED_DEFAULT)
26371 {
26372 unsigned size = GET_MODE_SIZE (mode);
26373
26374 /* On 64-bit systems, assume 64-bit integers need to use DS form
26375 addresses (for LD/STD). VSX vectors need to use DQ form addresses
26376 (for LXV and STXV). TImode is problematical in that its normal usage
26377 is expected to be GPRs where it wants a DS instruction format, but if
26378 it goes into the vector registers, it wants a DQ instruction
26379 format. */
26380 if (TARGET_POWERPC64 && size >= 8 && GET_MODE_CLASS (mode) == MODE_INT)
26381 non_prefixed_format = NON_PREFIXED_DS;
26382
26383 else if (TARGET_VSX && size >= 16
26384 && (VECTOR_MODE_P (mode) || VECTOR_ALIGNMENT_P (mode)))
26385 non_prefixed_format = NON_PREFIXED_DQ;
26386
26387 else
26388 non_prefixed_format = NON_PREFIXED_D;
26389 }
26390
26391 /* Classify the D/DS/DQ-form addresses. */
26392 switch (non_prefixed_format)
26393 {
26394 /* Instruction format D, all 16 bits are valid. */
26395 case NON_PREFIXED_D:
26396 return INSN_FORM_D;
26397
26398 /* Instruction format DS, bottom 2 bits must be 0. */
26399 case NON_PREFIXED_DS:
26400 if ((offset & 3) == 0)
26401 return INSN_FORM_DS;
26402
26403 else if (TARGET_PREFIXED)
26404 return INSN_FORM_PREFIXED_NUMERIC;
26405
26406 else
26407 return INSN_FORM_BAD;
26408
26409 /* Instruction format DQ, bottom 4 bits must be 0. */
26410 case NON_PREFIXED_DQ:
26411 if ((offset & 15) == 0)
26412 return INSN_FORM_DQ;
26413
26414 else if (TARGET_PREFIXED)
26415 return INSN_FORM_PREFIXED_NUMERIC;
26416
26417 else
26418 return INSN_FORM_BAD;
26419
26420 default:
26421 break;
26422 }
26423
26424 return INSN_FORM_BAD;
26425 }
26426
26427 /* Given address rtx ADDR for a load of MODE, is this legitimate for a
26428 non-prefixed D-form or X-form instruction? NON_PREFIXED_FORMAT is
26429 given NON_PREFIXED_D or NON_PREFIXED_DS to indicate whether we want
26430 a D-form or DS-form instruction. X-form and base_reg are always
26431 allowed. */
26432 bool
26433 address_is_non_pfx_d_or_x (rtx addr, machine_mode mode,
26434 enum non_prefixed_form non_prefixed_format)
26435 {
26436 enum insn_form result_form;
26437
26438 result_form = address_to_insn_form (addr, mode, non_prefixed_format);
26439
26440 switch (non_prefixed_format)
26441 {
26442 case NON_PREFIXED_D:
26443 switch (result_form)
26444 {
26445 case INSN_FORM_X:
26446 case INSN_FORM_D:
26447 case INSN_FORM_DS:
26448 case INSN_FORM_BASE_REG:
26449 return true;
26450 default:
26451 return false;
26452 }
26453 break;
26454 case NON_PREFIXED_DS:
26455 switch (result_form)
26456 {
26457 case INSN_FORM_X:
26458 case INSN_FORM_DS:
26459 case INSN_FORM_BASE_REG:
26460 return true;
26461 default:
26462 return false;
26463 }
26464 break;
26465 default:
26466 break;
26467 }
26468 return false;
26469 }
26470
26471 /* Return true if an REG with a given MODE is loaded from or stored into a MEM
26472 location uses a non-prefixed D/DS/DQ-form address. This is used to validate
26473 the load or store with the PCREL_OPT optimization to make sure it is an
26474 instruction that can be optimized.
26475
26476 We need to specify the MODE separately from the REG to allow for loads that
26477 include zero/sign/float extension. */
26478
26479 bool
26480 pcrel_opt_valid_mem_p (rtx reg, machine_mode mode, rtx mem)
26481 {
26482 /* If the instruction is indexed only like LFIWAX/LXSIWAX we cannot do the
26483 PCREL_OPT optimization. */
26484 enum non_prefixed_form non_prefixed = reg_to_non_prefixed (reg, mode);
26485 if (non_prefixed == NON_PREFIXED_X)
26486 return false;
26487
26488 /* Check if this is a non-prefixed D/DS/DQ-form instruction. */
26489 rtx addr = XEXP (mem, 0);
26490 enum insn_form iform = address_to_insn_form (addr, mode, non_prefixed);
26491 return (iform == INSN_FORM_BASE_REG
26492 || iform == INSN_FORM_D
26493 || iform == INSN_FORM_DS
26494 || iform == INSN_FORM_DQ);
26495 }
26496
26497 /* Helper function to see if we're potentially looking at lfs/stfs.
26498 - PARALLEL containing a SET and a CLOBBER
26499 - stfs:
26500 - SET is from UNSPEC_SI_FROM_SF to MEM:SI
26501 - CLOBBER is a V4SF
26502 - lfs:
26503 - SET is from UNSPEC_SF_FROM_SI to REG:SF
26504 - CLOBBER is a DI
26505 */
26506
26507 static bool
26508 is_lfs_stfs_insn (rtx_insn *insn)
26509 {
26510 rtx pattern = PATTERN (insn);
26511 if (GET_CODE (pattern) != PARALLEL)
26512 return false;
26513
26514 /* This should be a parallel with exactly one set and one clobber. */
26515 if (XVECLEN (pattern, 0) != 2)
26516 return false;
26517
26518 rtx set = XVECEXP (pattern, 0, 0);
26519 if (GET_CODE (set) != SET)
26520 return false;
26521
26522 rtx clobber = XVECEXP (pattern, 0, 1);
26523 if (GET_CODE (clobber) != CLOBBER)
26524 return false;
26525
26526 /* All we care is that the destination of the SET is a mem:SI,
26527 the source should be an UNSPEC_SI_FROM_SF, and the clobber
26528 should be a scratch:V4SF. */
26529
26530 rtx dest = SET_DEST (set);
26531 rtx src = SET_SRC (set);
26532 rtx scratch = SET_DEST (clobber);
26533
26534 if (GET_CODE (src) != UNSPEC)
26535 return false;
26536
26537 /* stfs case. */
26538 if (XINT (src, 1) == UNSPEC_SI_FROM_SF
26539 && GET_CODE (dest) == MEM && GET_MODE (dest) == SImode
26540 && GET_CODE (scratch) == SCRATCH && GET_MODE (scratch) == V4SFmode)
26541 return true;
26542
26543 /* lfs case. */
26544 if (XINT (src, 1) == UNSPEC_SF_FROM_SI
26545 && GET_CODE (dest) == REG && GET_MODE (dest) == SFmode
26546 && GET_CODE (scratch) == SCRATCH && GET_MODE (scratch) == DImode)
26547 return true;
26548
26549 return false;
26550 }
26551
26552 /* Helper function to take a REG and a MODE and turn it into the non-prefixed
26553 instruction format (D/DS/DQ) used for offset memory. */
26554
26555 enum non_prefixed_form
26556 reg_to_non_prefixed (rtx reg, machine_mode mode)
26557 {
26558 /* If it isn't a register, use the defaults. */
26559 if (!REG_P (reg) && !SUBREG_P (reg))
26560 return NON_PREFIXED_DEFAULT;
26561
26562 unsigned int r = reg_or_subregno (reg);
26563
26564 /* If we have a pseudo, use the default instruction format. */
26565 if (!HARD_REGISTER_NUM_P (r))
26566 return NON_PREFIXED_DEFAULT;
26567
26568 unsigned size = GET_MODE_SIZE (mode);
26569
26570 /* FPR registers use D-mode for scalars, and DQ-mode for vectors, IEEE
26571 128-bit floating point, and 128-bit integers. Before power9, only indexed
26572 addressing was available for vectors. */
26573 if (FP_REGNO_P (r))
26574 {
26575 if (mode == SFmode || size == 8 || FLOAT128_2REG_P (mode))
26576 return NON_PREFIXED_D;
26577
26578 else if (size < 8)
26579 return NON_PREFIXED_X;
26580
26581 else if (TARGET_VSX && size >= 16
26582 && (VECTOR_MODE_P (mode)
26583 || VECTOR_ALIGNMENT_P (mode)
26584 || mode == TImode || mode == CTImode))
26585 return (TARGET_P9_VECTOR) ? NON_PREFIXED_DQ : NON_PREFIXED_X;
26586
26587 else
26588 return NON_PREFIXED_DEFAULT;
26589 }
26590
26591 /* Altivec registers use DS-mode for scalars, and DQ-mode for vectors, IEEE
26592 128-bit floating point, and 128-bit integers. Before power9, only indexed
26593 addressing was available. */
26594 else if (ALTIVEC_REGNO_P (r))
26595 {
26596 if (!TARGET_P9_VECTOR)
26597 return NON_PREFIXED_X;
26598
26599 if (mode == SFmode || size == 8 || FLOAT128_2REG_P (mode))
26600 return NON_PREFIXED_DS;
26601
26602 else if (size < 8)
26603 return NON_PREFIXED_X;
26604
26605 else if (TARGET_VSX && size >= 16
26606 && (VECTOR_MODE_P (mode)
26607 || VECTOR_ALIGNMENT_P (mode)
26608 || mode == TImode || mode == CTImode))
26609 return NON_PREFIXED_DQ;
26610
26611 else
26612 return NON_PREFIXED_DEFAULT;
26613 }
26614
26615 /* GPR registers use DS-mode for 64-bit items on 64-bit systems, and D-mode
26616 otherwise. Assume that any other register, such as LR, CRs, etc. will go
26617 through the GPR registers for memory operations. */
26618 else if (TARGET_POWERPC64 && size >= 8)
26619 return NON_PREFIXED_DS;
26620
26621 return NON_PREFIXED_D;
26622 }
26623
26624 \f
26625 /* Whether a load instruction is a prefixed instruction. This is called from
26626 the prefixed attribute processing. */
26627
26628 bool
26629 prefixed_load_p (rtx_insn *insn)
26630 {
26631 /* Validate the insn to make sure it is a normal load insn. */
26632 extract_insn_cached (insn);
26633 if (recog_data.n_operands < 2)
26634 return false;
26635
26636 rtx reg = recog_data.operand[0];
26637 rtx mem = recog_data.operand[1];
26638
26639 if (!REG_P (reg) && !SUBREG_P (reg))
26640 return false;
26641
26642 if (!MEM_P (mem))
26643 return false;
26644
26645 /* Prefixed load instructions do not support update or indexed forms. */
26646 if (get_attr_indexed (insn) == INDEXED_YES
26647 || get_attr_update (insn) == UPDATE_YES)
26648 return false;
26649
26650 /* LWA uses the DS format instead of the D format that LWZ uses. */
26651 enum non_prefixed_form non_prefixed;
26652 machine_mode reg_mode = GET_MODE (reg);
26653 machine_mode mem_mode = GET_MODE (mem);
26654
26655 if (mem_mode == SImode && reg_mode == DImode
26656 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
26657 non_prefixed = NON_PREFIXED_DS;
26658
26659 else
26660 non_prefixed = reg_to_non_prefixed (reg, mem_mode);
26661
26662 if (non_prefixed == NON_PREFIXED_X && is_lfs_stfs_insn (insn))
26663 return address_is_prefixed (XEXP (mem, 0), mem_mode, NON_PREFIXED_DEFAULT);
26664 else
26665 return address_is_prefixed (XEXP (mem, 0), mem_mode, non_prefixed);
26666 }
26667
26668 /* Whether a store instruction is a prefixed instruction. This is called from
26669 the prefixed attribute processing. */
26670
26671 bool
26672 prefixed_store_p (rtx_insn *insn)
26673 {
26674 /* Validate the insn to make sure it is a normal store insn. */
26675 extract_insn_cached (insn);
26676 if (recog_data.n_operands < 2)
26677 return false;
26678
26679 rtx mem = recog_data.operand[0];
26680 rtx reg = recog_data.operand[1];
26681
26682 if (!REG_P (reg) && !SUBREG_P (reg))
26683 return false;
26684
26685 if (!MEM_P (mem))
26686 return false;
26687
26688 /* Prefixed store instructions do not support update or indexed forms. */
26689 if (get_attr_indexed (insn) == INDEXED_YES
26690 || get_attr_update (insn) == UPDATE_YES)
26691 return false;
26692
26693 machine_mode mem_mode = GET_MODE (mem);
26694 rtx addr = XEXP (mem, 0);
26695 enum non_prefixed_form non_prefixed = reg_to_non_prefixed (reg, mem_mode);
26696
26697 /* Need to make sure we aren't looking at a stfs which doesn't look
26698 like the other things reg_to_non_prefixed/address_is_prefixed
26699 looks for. */
26700 if (non_prefixed == NON_PREFIXED_X && is_lfs_stfs_insn (insn))
26701 return address_is_prefixed (addr, mem_mode, NON_PREFIXED_DEFAULT);
26702 else
26703 return address_is_prefixed (addr, mem_mode, non_prefixed);
26704 }
26705
26706 /* Whether a load immediate or add instruction is a prefixed instruction. This
26707 is called from the prefixed attribute processing. */
26708
26709 bool
26710 prefixed_paddi_p (rtx_insn *insn)
26711 {
26712 rtx set = single_set (insn);
26713 if (!set)
26714 return false;
26715
26716 rtx dest = SET_DEST (set);
26717 rtx src = SET_SRC (set);
26718
26719 if (!REG_P (dest) && !SUBREG_P (dest))
26720 return false;
26721
26722 /* Is this a load immediate that can't be done with a simple ADDI or
26723 ADDIS? */
26724 if (CONST_INT_P (src))
26725 return (satisfies_constraint_eI (src)
26726 && !satisfies_constraint_I (src)
26727 && !satisfies_constraint_L (src));
26728
26729 /* Is this a PADDI instruction that can't be done with a simple ADDI or
26730 ADDIS? */
26731 if (GET_CODE (src) == PLUS)
26732 {
26733 rtx op1 = XEXP (src, 1);
26734
26735 return (CONST_INT_P (op1)
26736 && satisfies_constraint_eI (op1)
26737 && !satisfies_constraint_I (op1)
26738 && !satisfies_constraint_L (op1));
26739 }
26740
26741 /* If not, is it a load of a PC-relative address? */
26742 if (!TARGET_PCREL || GET_MODE (dest) != Pmode)
26743 return false;
26744
26745 if (!SYMBOL_REF_P (src) && !LABEL_REF_P (src) && GET_CODE (src) != CONST)
26746 return false;
26747
26748 enum insn_form iform = address_to_insn_form (src, Pmode,
26749 NON_PREFIXED_DEFAULT);
26750
26751 return (iform == INSN_FORM_PCREL_EXTERNAL || iform == INSN_FORM_PCREL_LOCAL);
26752 }
26753
26754 /* Whether the next instruction needs a 'p' prefix issued before the
26755 instruction is printed out. */
26756 static bool prepend_p_to_next_insn;
26757
26758 /* Define FINAL_PRESCAN_INSN if some processing needs to be done before
26759 outputting the assembler code. On the PowerPC, we remember if the current
26760 insn is a prefixed insn where we need to emit a 'p' before the insn.
26761
26762 In addition, if the insn is part of a PC-relative reference to an external
26763 label optimization, this is recorded also. */
26764 void
26765 rs6000_final_prescan_insn (rtx_insn *insn, rtx [], int)
26766 {
26767 prepend_p_to_next_insn = (get_attr_maybe_prefixed (insn)
26768 == MAYBE_PREFIXED_YES
26769 && get_attr_prefixed (insn) == PREFIXED_YES);
26770 return;
26771 }
26772
26773 /* Define ASM_OUTPUT_OPCODE to do anything special before emitting an opcode.
26774 We use it to emit a 'p' for prefixed insns that is set in
26775 FINAL_PRESCAN_INSN. */
26776 void
26777 rs6000_asm_output_opcode (FILE *stream)
26778 {
26779 if (prepend_p_to_next_insn)
26780 {
26781 fprintf (stream, "p");
26782
26783 /* Reset the flag in the case where there are separate insn lines in the
26784 sequence, so the 'p' is only emitted for the first line. This shows up
26785 when we are doing the PCREL_OPT optimization, in that the label created
26786 with %r<n> would have a leading 'p' printed. */
26787 prepend_p_to_next_insn = false;
26788 }
26789
26790 return;
26791 }
26792
26793 /* Emit the relocation to tie the next instruction to a previous instruction
26794 that loads up an external address. This is used to do the PCREL_OPT
26795 optimization. Note, the label is generated after the PLD of the got
26796 pc-relative address to allow for the assembler to insert NOPs before the PLD
26797 instruction. The operand is a constant integer that is the label
26798 number. */
26799
26800 void
26801 output_pcrel_opt_reloc (rtx label_num)
26802 {
26803 rtx operands[1] = { label_num };
26804 output_asm_insn (".reloc .Lpcrel%0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel%0-8)",
26805 operands);
26806 }
26807
26808 /* Adjust the length of an INSN. LENGTH is the currently-computed length and
26809 should be adjusted to reflect any required changes. This macro is used when
26810 there is some systematic length adjustment required that would be difficult
26811 to express in the length attribute.
26812
26813 In the PowerPC, we use this to adjust the length of an instruction if one or
26814 more prefixed instructions are generated, using the attribute
26815 num_prefixed_insns. A prefixed instruction is 8 bytes instead of 4, but the
26816 hardware requires that a prefied instruciton does not cross a 64-byte
26817 boundary. This means the compiler has to assume the length of the first
26818 prefixed instruction is 12 bytes instead of 8 bytes. Since the length is
26819 already set for the non-prefixed instruction, we just need to udpate for the
26820 difference. */
26821
26822 int
26823 rs6000_adjust_insn_length (rtx_insn *insn, int length)
26824 {
26825 if (TARGET_PREFIXED && NONJUMP_INSN_P (insn))
26826 {
26827 rtx pattern = PATTERN (insn);
26828 if (GET_CODE (pattern) != USE && GET_CODE (pattern) != CLOBBER
26829 && get_attr_prefixed (insn) == PREFIXED_YES)
26830 {
26831 int num_prefixed = get_attr_max_prefixed_insns (insn);
26832 length += 4 * (num_prefixed + 1);
26833 }
26834 }
26835
26836 return length;
26837 }
26838
26839 \f
26840 #ifdef HAVE_GAS_HIDDEN
26841 # define USE_HIDDEN_LINKONCE 1
26842 #else
26843 # define USE_HIDDEN_LINKONCE 0
26844 #endif
26845
26846 /* Fills in the label name that should be used for a 476 link stack thunk. */
26847
26848 void
26849 get_ppc476_thunk_name (char name[32])
26850 {
26851 gcc_assert (TARGET_LINK_STACK);
26852
26853 if (USE_HIDDEN_LINKONCE)
26854 sprintf (name, "__ppc476.get_thunk");
26855 else
26856 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
26857 }
26858
26859 /* This function emits the simple thunk routine that is used to preserve
26860 the link stack on the 476 cpu. */
26861
26862 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
26863 static void
26864 rs6000_code_end (void)
26865 {
26866 char name[32];
26867 tree decl;
26868
26869 if (!TARGET_LINK_STACK)
26870 return;
26871
26872 get_ppc476_thunk_name (name);
26873
26874 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
26875 build_function_type_list (void_type_node, NULL_TREE));
26876 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
26877 NULL_TREE, void_type_node);
26878 TREE_PUBLIC (decl) = 1;
26879 TREE_STATIC (decl) = 1;
26880
26881 #if RS6000_WEAK
26882 if (USE_HIDDEN_LINKONCE && !TARGET_XCOFF)
26883 {
26884 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
26885 targetm.asm_out.unique_section (decl, 0);
26886 switch_to_section (get_named_section (decl, NULL, 0));
26887 DECL_WEAK (decl) = 1;
26888 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
26889 targetm.asm_out.globalize_label (asm_out_file, name);
26890 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
26891 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
26892 }
26893 else
26894 #endif
26895 {
26896 switch_to_section (text_section);
26897 ASM_OUTPUT_LABEL (asm_out_file, name);
26898 }
26899
26900 DECL_INITIAL (decl) = make_node (BLOCK);
26901 current_function_decl = decl;
26902 allocate_struct_function (decl, false);
26903 init_function_start (decl);
26904 first_function_block_is_cold = false;
26905 /* Make sure unwind info is emitted for the thunk if needed. */
26906 final_start_function (emit_barrier (), asm_out_file, 1);
26907
26908 fputs ("\tblr\n", asm_out_file);
26909
26910 final_end_function ();
26911 init_insn_lengths ();
26912 free_after_compilation (cfun);
26913 set_cfun (NULL);
26914 current_function_decl = NULL;
26915 }
26916
26917 /* Add r30 to hard reg set if the prologue sets it up and it is not
26918 pic_offset_table_rtx. */
26919
26920 static void
26921 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
26922 {
26923 if (!TARGET_SINGLE_PIC_BASE
26924 && TARGET_TOC
26925 && TARGET_MINIMAL_TOC
26926 && !constant_pool_empty_p ())
26927 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
26928 if (cfun->machine->split_stack_argp_used)
26929 add_to_hard_reg_set (&set->set, Pmode, 12);
26930
26931 /* Make sure the hard reg set doesn't include r2, which was possibly added
26932 via PIC_OFFSET_TABLE_REGNUM. */
26933 if (TARGET_TOC)
26934 remove_from_hard_reg_set (&set->set, Pmode, TOC_REGNUM);
26935 }
26936
26937 \f
26938 /* Helper function for rs6000_split_logical to emit a logical instruction after
26939 spliting the operation to single GPR registers.
26940
26941 DEST is the destination register.
26942 OP1 and OP2 are the input source registers.
26943 CODE is the base operation (AND, IOR, XOR, NOT).
26944 MODE is the machine mode.
26945 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
26946 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
26947 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
26948
26949 static void
26950 rs6000_split_logical_inner (rtx dest,
26951 rtx op1,
26952 rtx op2,
26953 enum rtx_code code,
26954 machine_mode mode,
26955 bool complement_final_p,
26956 bool complement_op1_p,
26957 bool complement_op2_p)
26958 {
26959 rtx bool_rtx;
26960
26961 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
26962 if (op2 && CONST_INT_P (op2)
26963 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
26964 && !complement_final_p && !complement_op1_p && !complement_op2_p)
26965 {
26966 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
26967 HOST_WIDE_INT value = INTVAL (op2) & mask;
26968
26969 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
26970 if (code == AND)
26971 {
26972 if (value == 0)
26973 {
26974 emit_insn (gen_rtx_SET (dest, const0_rtx));
26975 return;
26976 }
26977
26978 else if (value == mask)
26979 {
26980 if (!rtx_equal_p (dest, op1))
26981 emit_insn (gen_rtx_SET (dest, op1));
26982 return;
26983 }
26984 }
26985
26986 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
26987 into separate ORI/ORIS or XORI/XORIS instrucitons. */
26988 else if (code == IOR || code == XOR)
26989 {
26990 if (value == 0)
26991 {
26992 if (!rtx_equal_p (dest, op1))
26993 emit_insn (gen_rtx_SET (dest, op1));
26994 return;
26995 }
26996 }
26997 }
26998
26999 if (code == AND && mode == SImode
27000 && !complement_final_p && !complement_op1_p && !complement_op2_p)
27001 {
27002 emit_insn (gen_andsi3 (dest, op1, op2));
27003 return;
27004 }
27005
27006 if (complement_op1_p)
27007 op1 = gen_rtx_NOT (mode, op1);
27008
27009 if (complement_op2_p)
27010 op2 = gen_rtx_NOT (mode, op2);
27011
27012 /* For canonical RTL, if only one arm is inverted it is the first. */
27013 if (!complement_op1_p && complement_op2_p)
27014 std::swap (op1, op2);
27015
27016 bool_rtx = ((code == NOT)
27017 ? gen_rtx_NOT (mode, op1)
27018 : gen_rtx_fmt_ee (code, mode, op1, op2));
27019
27020 if (complement_final_p)
27021 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
27022
27023 emit_insn (gen_rtx_SET (dest, bool_rtx));
27024 }
27025
27026 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
27027 operations are split immediately during RTL generation to allow for more
27028 optimizations of the AND/IOR/XOR.
27029
27030 OPERANDS is an array containing the destination and two input operands.
27031 CODE is the base operation (AND, IOR, XOR, NOT).
27032 MODE is the machine mode.
27033 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
27034 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
27035 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
27036 CLOBBER_REG is either NULL or a scratch register of type CC to allow
27037 formation of the AND instructions. */
27038
27039 static void
27040 rs6000_split_logical_di (rtx operands[3],
27041 enum rtx_code code,
27042 bool complement_final_p,
27043 bool complement_op1_p,
27044 bool complement_op2_p)
27045 {
27046 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
27047 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
27048 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
27049 enum hi_lo { hi = 0, lo = 1 };
27050 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
27051 size_t i;
27052
27053 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
27054 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
27055 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
27056 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
27057
27058 if (code == NOT)
27059 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
27060 else
27061 {
27062 if (!CONST_INT_P (operands[2]))
27063 {
27064 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
27065 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
27066 }
27067 else
27068 {
27069 HOST_WIDE_INT value = INTVAL (operands[2]);
27070 HOST_WIDE_INT value_hi_lo[2];
27071
27072 gcc_assert (!complement_final_p);
27073 gcc_assert (!complement_op1_p);
27074 gcc_assert (!complement_op2_p);
27075
27076 value_hi_lo[hi] = value >> 32;
27077 value_hi_lo[lo] = value & lower_32bits;
27078
27079 for (i = 0; i < 2; i++)
27080 {
27081 HOST_WIDE_INT sub_value = value_hi_lo[i];
27082
27083 if (sub_value & sign_bit)
27084 sub_value |= upper_32bits;
27085
27086 op2_hi_lo[i] = GEN_INT (sub_value);
27087
27088 /* If this is an AND instruction, check to see if we need to load
27089 the value in a register. */
27090 if (code == AND && sub_value != -1 && sub_value != 0
27091 && !and_operand (op2_hi_lo[i], SImode))
27092 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
27093 }
27094 }
27095 }
27096
27097 for (i = 0; i < 2; i++)
27098 {
27099 /* Split large IOR/XOR operations. */
27100 if ((code == IOR || code == XOR)
27101 && CONST_INT_P (op2_hi_lo[i])
27102 && !complement_final_p
27103 && !complement_op1_p
27104 && !complement_op2_p
27105 && !logical_const_operand (op2_hi_lo[i], SImode))
27106 {
27107 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
27108 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
27109 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
27110 rtx tmp = gen_reg_rtx (SImode);
27111
27112 /* Make sure the constant is sign extended. */
27113 if ((hi_16bits & sign_bit) != 0)
27114 hi_16bits |= upper_32bits;
27115
27116 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
27117 code, SImode, false, false, false);
27118
27119 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
27120 code, SImode, false, false, false);
27121 }
27122 else
27123 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
27124 code, SImode, complement_final_p,
27125 complement_op1_p, complement_op2_p);
27126 }
27127
27128 return;
27129 }
27130
27131 /* Split the insns that make up boolean operations operating on multiple GPR
27132 registers. The boolean MD patterns ensure that the inputs either are
27133 exactly the same as the output registers, or there is no overlap.
27134
27135 OPERANDS is an array containing the destination and two input operands.
27136 CODE is the base operation (AND, IOR, XOR, NOT).
27137 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
27138 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
27139 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
27140
27141 void
27142 rs6000_split_logical (rtx operands[3],
27143 enum rtx_code code,
27144 bool complement_final_p,
27145 bool complement_op1_p,
27146 bool complement_op2_p)
27147 {
27148 machine_mode mode = GET_MODE (operands[0]);
27149 machine_mode sub_mode;
27150 rtx op0, op1, op2;
27151 int sub_size, regno0, regno1, nregs, i;
27152
27153 /* If this is DImode, use the specialized version that can run before
27154 register allocation. */
27155 if (mode == DImode && !TARGET_POWERPC64)
27156 {
27157 rs6000_split_logical_di (operands, code, complement_final_p,
27158 complement_op1_p, complement_op2_p);
27159 return;
27160 }
27161
27162 op0 = operands[0];
27163 op1 = operands[1];
27164 op2 = (code == NOT) ? NULL_RTX : operands[2];
27165 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
27166 sub_size = GET_MODE_SIZE (sub_mode);
27167 regno0 = REGNO (op0);
27168 regno1 = REGNO (op1);
27169
27170 gcc_assert (reload_completed);
27171 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
27172 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
27173
27174 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
27175 gcc_assert (nregs > 1);
27176
27177 if (op2 && REG_P (op2))
27178 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
27179
27180 for (i = 0; i < nregs; i++)
27181 {
27182 int offset = i * sub_size;
27183 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
27184 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
27185 rtx sub_op2 = ((code == NOT)
27186 ? NULL_RTX
27187 : simplify_subreg (sub_mode, op2, mode, offset));
27188
27189 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
27190 complement_final_p, complement_op1_p,
27191 complement_op2_p);
27192 }
27193
27194 return;
27195 }
27196
27197 /* Emit instructions to move SRC to DST. Called by splitters for
27198 multi-register moves. It will emit at most one instruction for
27199 each register that is accessed; that is, it won't emit li/lis pairs
27200 (or equivalent for 64-bit code). One of SRC or DST must be a hard
27201 register. */
27202
27203 void
27204 rs6000_split_multireg_move (rtx dst, rtx src)
27205 {
27206 /* The register number of the first register being moved. */
27207 int reg;
27208 /* The mode that is to be moved. */
27209 machine_mode mode;
27210 /* The mode that the move is being done in, and its size. */
27211 machine_mode reg_mode;
27212 int reg_mode_size;
27213 /* The number of registers that will be moved. */
27214 int nregs;
27215
27216 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
27217 mode = GET_MODE (dst);
27218 nregs = hard_regno_nregs (reg, mode);
27219
27220 /* If we have a vector quad register for MMA, and this is a load or store,
27221 see if we can use vector paired load/stores. */
27222 if (mode == XOmode && TARGET_MMA
27223 && (MEM_P (dst) || MEM_P (src)))
27224 {
27225 reg_mode = OOmode;
27226 nregs /= 2;
27227 }
27228 /* If we have a vector pair/quad mode, split it into two/four separate
27229 vectors. */
27230 else if (mode == OOmode || mode == XOmode)
27231 reg_mode = V1TImode;
27232 else if (FP_REGNO_P (reg))
27233 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
27234 (TARGET_HARD_FLOAT ? DFmode : SFmode);
27235 else if (ALTIVEC_REGNO_P (reg))
27236 reg_mode = V16QImode;
27237 else
27238 reg_mode = word_mode;
27239 reg_mode_size = GET_MODE_SIZE (reg_mode);
27240
27241 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
27242
27243 /* TDmode residing in FP registers is special, since the ISA requires that
27244 the lower-numbered word of a register pair is always the most significant
27245 word, even in little-endian mode. This does not match the usual subreg
27246 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
27247 the appropriate constituent registers "by hand" in little-endian mode.
27248
27249 Note we do not need to check for destructive overlap here since TDmode
27250 can only reside in even/odd register pairs. */
27251 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
27252 {
27253 rtx p_src, p_dst;
27254 int i;
27255
27256 for (i = 0; i < nregs; i++)
27257 {
27258 if (REG_P (src) && FP_REGNO_P (REGNO (src)))
27259 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
27260 else
27261 p_src = simplify_gen_subreg (reg_mode, src, mode,
27262 i * reg_mode_size);
27263
27264 if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
27265 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
27266 else
27267 p_dst = simplify_gen_subreg (reg_mode, dst, mode,
27268 i * reg_mode_size);
27269
27270 emit_insn (gen_rtx_SET (p_dst, p_src));
27271 }
27272
27273 return;
27274 }
27275
27276 /* The __vector_pair and __vector_quad modes are multi-register
27277 modes, so if we have to load or store the registers, we have to be
27278 careful to properly swap them if we're in little endian mode
27279 below. This means the last register gets the first memory
27280 location. We also need to be careful of using the right register
27281 numbers if we are splitting XO to OO. */
27282 if (mode == OOmode || mode == XOmode)
27283 {
27284 nregs = hard_regno_nregs (reg, mode);
27285 int reg_mode_nregs = hard_regno_nregs (reg, reg_mode);
27286 if (MEM_P (dst))
27287 {
27288 unsigned offset = 0;
27289 unsigned size = GET_MODE_SIZE (reg_mode);
27290
27291 /* If we are reading an accumulator register, we have to
27292 deprime it before we can access it. */
27293 if (TARGET_MMA
27294 && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
27295 emit_insn (gen_mma_xxmfacc (src, src));
27296
27297 for (int i = 0; i < nregs; i += reg_mode_nregs)
27298 {
27299 unsigned subreg
27300 = WORDS_BIG_ENDIAN ? i : (nregs - reg_mode_nregs - i);
27301 rtx dst2 = adjust_address (dst, reg_mode, offset);
27302 rtx src2 = gen_rtx_REG (reg_mode, reg + subreg);
27303 offset += size;
27304 emit_insn (gen_rtx_SET (dst2, src2));
27305 }
27306
27307 return;
27308 }
27309
27310 if (MEM_P (src))
27311 {
27312 unsigned offset = 0;
27313 unsigned size = GET_MODE_SIZE (reg_mode);
27314
27315 for (int i = 0; i < nregs; i += reg_mode_nregs)
27316 {
27317 unsigned subreg
27318 = WORDS_BIG_ENDIAN ? i : (nregs - reg_mode_nregs - i);
27319 rtx dst2 = gen_rtx_REG (reg_mode, reg + subreg);
27320 rtx src2 = adjust_address (src, reg_mode, offset);
27321 offset += size;
27322 emit_insn (gen_rtx_SET (dst2, src2));
27323 }
27324
27325 /* If we are writing an accumulator register, we have to
27326 prime it after we've written it. */
27327 if (TARGET_MMA
27328 && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
27329 emit_insn (gen_mma_xxmtacc (dst, dst));
27330
27331 return;
27332 }
27333
27334 if (GET_CODE (src) == UNSPEC
27335 || GET_CODE (src) == UNSPEC_VOLATILE)
27336 {
27337 gcc_assert (XINT (src, 1) == UNSPEC_VSX_ASSEMBLE
27338 || XINT (src, 1) == UNSPECV_MMA_ASSEMBLE);
27339 gcc_assert (REG_P (dst));
27340 if (GET_MODE (src) == XOmode)
27341 gcc_assert (FP_REGNO_P (REGNO (dst)));
27342 if (GET_MODE (src) == OOmode)
27343 gcc_assert (VSX_REGNO_P (REGNO (dst)));
27344
27345 int nvecs = XVECLEN (src, 0);
27346 for (int i = 0; i < nvecs; i++)
27347 {
27348 rtx op;
27349 int regno = reg + i;
27350
27351 if (WORDS_BIG_ENDIAN)
27352 {
27353 op = XVECEXP (src, 0, i);
27354
27355 /* If we are loading an even VSX register and the memory location
27356 is adjacent to the next register's memory location (if any),
27357 then we can load them both with one LXVP instruction. */
27358 if ((regno & 1) == 0)
27359 {
27360 rtx op2 = XVECEXP (src, 0, i + 1);
27361 if (adjacent_mem_locations (op, op2) == op)
27362 {
27363 op = adjust_address (op, OOmode, 0);
27364 /* Skip the next register, since we're going to
27365 load it together with this register. */
27366 i++;
27367 }
27368 }
27369 }
27370 else
27371 {
27372 op = XVECEXP (src, 0, nvecs - i - 1);
27373
27374 /* If we are loading an even VSX register and the memory location
27375 is adjacent to the next register's memory location (if any),
27376 then we can load them both with one LXVP instruction. */
27377 if ((regno & 1) == 0)
27378 {
27379 rtx op2 = XVECEXP (src, 0, nvecs - i - 2);
27380 if (adjacent_mem_locations (op2, op) == op2)
27381 {
27382 op = adjust_address (op2, OOmode, 0);
27383 /* Skip the next register, since we're going to
27384 load it together with this register. */
27385 i++;
27386 }
27387 }
27388 }
27389
27390 rtx dst_i = gen_rtx_REG (GET_MODE (op), regno);
27391 emit_insn (gen_rtx_SET (dst_i, op));
27392 }
27393
27394 /* We are writing an accumulator register, so we have to
27395 prime it after we've written it. */
27396 if (GET_MODE (src) == XOmode)
27397 emit_insn (gen_mma_xxmtacc (dst, dst));
27398
27399 return;
27400 }
27401
27402 /* Register -> register moves can use common code. */
27403 }
27404
27405 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
27406 {
27407 /* If we are reading an accumulator register, we have to
27408 deprime it before we can access it. */
27409 if (TARGET_MMA
27410 && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
27411 emit_insn (gen_mma_xxmfacc (src, src));
27412
27413 /* Move register range backwards, if we might have destructive
27414 overlap. */
27415 int i;
27416 /* XO/OO are opaque so cannot use subregs. */
27417 if (mode == OOmode || mode == XOmode )
27418 {
27419 for (i = nregs - 1; i >= 0; i--)
27420 {
27421 rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + i);
27422 rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + i);
27423 emit_insn (gen_rtx_SET (dst_i, src_i));
27424 }
27425 }
27426 else
27427 {
27428 for (i = nregs - 1; i >= 0; i--)
27429 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
27430 i * reg_mode_size),
27431 simplify_gen_subreg (reg_mode, src, mode,
27432 i * reg_mode_size)));
27433 }
27434
27435 /* If we are writing an accumulator register, we have to
27436 prime it after we've written it. */
27437 if (TARGET_MMA
27438 && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
27439 emit_insn (gen_mma_xxmtacc (dst, dst));
27440 }
27441 else
27442 {
27443 int i;
27444 int j = -1;
27445 bool used_update = false;
27446 rtx restore_basereg = NULL_RTX;
27447
27448 if (MEM_P (src) && INT_REGNO_P (reg))
27449 {
27450 rtx breg;
27451
27452 if (GET_CODE (XEXP (src, 0)) == PRE_INC
27453 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
27454 {
27455 rtx delta_rtx;
27456 breg = XEXP (XEXP (src, 0), 0);
27457 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
27458 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
27459 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
27460 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
27461 src = replace_equiv_address (src, breg);
27462 }
27463 else if (! rs6000_offsettable_memref_p (src, reg_mode, true))
27464 {
27465 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
27466 {
27467 rtx basereg = XEXP (XEXP (src, 0), 0);
27468 if (TARGET_UPDATE)
27469 {
27470 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
27471 emit_insn (gen_rtx_SET (ndst,
27472 gen_rtx_MEM (reg_mode,
27473 XEXP (src, 0))));
27474 used_update = true;
27475 }
27476 else
27477 emit_insn (gen_rtx_SET (basereg,
27478 XEXP (XEXP (src, 0), 1)));
27479 src = replace_equiv_address (src, basereg);
27480 }
27481 else
27482 {
27483 rtx basereg = gen_rtx_REG (Pmode, reg);
27484 emit_insn (gen_rtx_SET (basereg, XEXP (src, 0)));
27485 src = replace_equiv_address (src, basereg);
27486 }
27487 }
27488
27489 breg = XEXP (src, 0);
27490 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
27491 breg = XEXP (breg, 0);
27492
27493 /* If the base register we are using to address memory is
27494 also a destination reg, then change that register last. */
27495 if (REG_P (breg)
27496 && REGNO (breg) >= REGNO (dst)
27497 && REGNO (breg) < REGNO (dst) + nregs)
27498 j = REGNO (breg) - REGNO (dst);
27499 }
27500 else if (MEM_P (dst) && INT_REGNO_P (reg))
27501 {
27502 rtx breg;
27503
27504 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
27505 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
27506 {
27507 rtx delta_rtx;
27508 breg = XEXP (XEXP (dst, 0), 0);
27509 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
27510 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
27511 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
27512
27513 /* We have to update the breg before doing the store.
27514 Use store with update, if available. */
27515
27516 if (TARGET_UPDATE)
27517 {
27518 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
27519 emit_insn (TARGET_32BIT
27520 ? (TARGET_POWERPC64
27521 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
27522 : gen_movsi_si_update (breg, breg, delta_rtx, nsrc))
27523 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
27524 used_update = true;
27525 }
27526 else
27527 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
27528 dst = replace_equiv_address (dst, breg);
27529 }
27530 else if (!rs6000_offsettable_memref_p (dst, reg_mode, true)
27531 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
27532 {
27533 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
27534 {
27535 rtx basereg = XEXP (XEXP (dst, 0), 0);
27536 if (TARGET_UPDATE)
27537 {
27538 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
27539 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode,
27540 XEXP (dst, 0)),
27541 nsrc));
27542 used_update = true;
27543 }
27544 else
27545 emit_insn (gen_rtx_SET (basereg,
27546 XEXP (XEXP (dst, 0), 1)));
27547 dst = replace_equiv_address (dst, basereg);
27548 }
27549 else
27550 {
27551 rtx basereg = XEXP (XEXP (dst, 0), 0);
27552 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
27553 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
27554 && REG_P (basereg)
27555 && REG_P (offsetreg)
27556 && REGNO (basereg) != REGNO (offsetreg));
27557 if (REGNO (basereg) == 0)
27558 {
27559 rtx tmp = offsetreg;
27560 offsetreg = basereg;
27561 basereg = tmp;
27562 }
27563 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
27564 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
27565 dst = replace_equiv_address (dst, basereg);
27566 }
27567 }
27568 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
27569 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode, true));
27570 }
27571
27572 /* If we are reading an accumulator register, we have to
27573 deprime it before we can access it. */
27574 if (TARGET_MMA && REG_P (src)
27575 && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
27576 emit_insn (gen_mma_xxmfacc (src, src));
27577
27578 for (i = 0; i < nregs; i++)
27579 {
27580 /* Calculate index to next subword. */
27581 ++j;
27582 if (j == nregs)
27583 j = 0;
27584
27585 /* If compiler already emitted move of first word by
27586 store with update, no need to do anything. */
27587 if (j == 0 && used_update)
27588 continue;
27589
27590 /* XO/OO are opaque so cannot use subregs. */
27591 if (mode == OOmode || mode == XOmode )
27592 {
27593 rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + j);
27594 rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + j);
27595 emit_insn (gen_rtx_SET (dst_i, src_i));
27596 }
27597 else
27598 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
27599 j * reg_mode_size),
27600 simplify_gen_subreg (reg_mode, src, mode,
27601 j * reg_mode_size)));
27602 }
27603
27604 /* If we are writing an accumulator register, we have to
27605 prime it after we've written it. */
27606 if (TARGET_MMA && REG_P (dst)
27607 && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
27608 emit_insn (gen_mma_xxmtacc (dst, dst));
27609
27610 if (restore_basereg != NULL_RTX)
27611 emit_insn (restore_basereg);
27612 }
27613 }
27614 \f
27615 /* Return true if the peephole2 can combine a load involving a combination of
27616 an addis instruction and a load with an offset that can be fused together on
27617 a power8. */
27618
27619 bool
27620 fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
27621 rtx addis_value, /* addis value. */
27622 rtx target, /* target register that is loaded. */
27623 rtx mem) /* bottom part of the memory addr. */
27624 {
27625 rtx addr;
27626 rtx base_reg;
27627
27628 /* Validate arguments. */
27629 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
27630 return false;
27631
27632 if (!base_reg_operand (target, GET_MODE (target)))
27633 return false;
27634
27635 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
27636 return false;
27637
27638 /* Allow sign/zero extension. */
27639 if (GET_CODE (mem) == ZERO_EXTEND
27640 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
27641 mem = XEXP (mem, 0);
27642
27643 if (!MEM_P (mem))
27644 return false;
27645
27646 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
27647 return false;
27648
27649 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
27650 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
27651 return false;
27652
27653 /* Validate that the register used to load the high value is either the
27654 register being loaded, or we can safely replace its use.
27655
27656 This function is only called from the peephole2 pass and we assume that
27657 there are 2 instructions in the peephole (addis and load), so we want to
27658 check if the target register was not used in the memory address and the
27659 register to hold the addis result is dead after the peephole. */
27660 if (REGNO (addis_reg) != REGNO (target))
27661 {
27662 if (reg_mentioned_p (target, mem))
27663 return false;
27664
27665 if (!peep2_reg_dead_p (2, addis_reg))
27666 return false;
27667
27668 /* If the target register being loaded is the stack pointer, we must
27669 avoid loading any other value into it, even temporarily. */
27670 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
27671 return false;
27672 }
27673
27674 base_reg = XEXP (addr, 0);
27675 return REGNO (addis_reg) == REGNO (base_reg);
27676 }
27677
27678 /* During the peephole2 pass, adjust and expand the insns for a load fusion
27679 sequence. We adjust the addis register to use the target register. If the
27680 load sign extends, we adjust the code to do the zero extending load, and an
27681 explicit sign extension later since the fusion only covers zero extending
27682 loads.
27683
27684 The operands are:
27685 operands[0] register set with addis (to be replaced with target)
27686 operands[1] value set via addis
27687 operands[2] target register being loaded
27688 operands[3] D-form memory reference using operands[0]. */
27689
27690 void
27691 expand_fusion_gpr_load (rtx *operands)
27692 {
27693 rtx addis_value = operands[1];
27694 rtx target = operands[2];
27695 rtx orig_mem = operands[3];
27696 rtx new_addr, new_mem, orig_addr, offset;
27697 enum rtx_code plus_or_lo_sum;
27698 machine_mode target_mode = GET_MODE (target);
27699 machine_mode extend_mode = target_mode;
27700 machine_mode ptr_mode = Pmode;
27701 enum rtx_code extend = UNKNOWN;
27702
27703 if (GET_CODE (orig_mem) == ZERO_EXTEND
27704 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
27705 {
27706 extend = GET_CODE (orig_mem);
27707 orig_mem = XEXP (orig_mem, 0);
27708 target_mode = GET_MODE (orig_mem);
27709 }
27710
27711 gcc_assert (MEM_P (orig_mem));
27712
27713 orig_addr = XEXP (orig_mem, 0);
27714 plus_or_lo_sum = GET_CODE (orig_addr);
27715 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
27716
27717 offset = XEXP (orig_addr, 1);
27718 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
27719 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
27720
27721 if (extend != UNKNOWN)
27722 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
27723
27724 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
27725 UNSPEC_FUSION_GPR);
27726 emit_insn (gen_rtx_SET (target, new_mem));
27727
27728 if (extend == SIGN_EXTEND)
27729 {
27730 int sub_off = ((BYTES_BIG_ENDIAN)
27731 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
27732 : 0);
27733 rtx sign_reg
27734 = simplify_subreg (target_mode, target, extend_mode, sub_off);
27735
27736 emit_insn (gen_rtx_SET (target,
27737 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
27738 }
27739
27740 return;
27741 }
27742
27743 /* Emit the addis instruction that will be part of a fused instruction
27744 sequence. */
27745
27746 void
27747 emit_fusion_addis (rtx target, rtx addis_value)
27748 {
27749 rtx fuse_ops[10];
27750 const char *addis_str = NULL;
27751
27752 /* Emit the addis instruction. */
27753 fuse_ops[0] = target;
27754 if (satisfies_constraint_L (addis_value))
27755 {
27756 fuse_ops[1] = addis_value;
27757 addis_str = "lis %0,%v1";
27758 }
27759
27760 else if (GET_CODE (addis_value) == PLUS)
27761 {
27762 rtx op0 = XEXP (addis_value, 0);
27763 rtx op1 = XEXP (addis_value, 1);
27764
27765 if (REG_P (op0) && CONST_INT_P (op1)
27766 && satisfies_constraint_L (op1))
27767 {
27768 fuse_ops[1] = op0;
27769 fuse_ops[2] = op1;
27770 addis_str = "addis %0,%1,%v2";
27771 }
27772 }
27773
27774 else if (GET_CODE (addis_value) == HIGH)
27775 {
27776 rtx value = XEXP (addis_value, 0);
27777 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
27778 {
27779 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
27780 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
27781 if (TARGET_ELF)
27782 addis_str = "addis %0,%2,%1@toc@ha";
27783
27784 else if (TARGET_XCOFF)
27785 addis_str = "addis %0,%1@u(%2)";
27786
27787 else
27788 gcc_unreachable ();
27789 }
27790
27791 else if (GET_CODE (value) == PLUS)
27792 {
27793 rtx op0 = XEXP (value, 0);
27794 rtx op1 = XEXP (value, 1);
27795
27796 if (GET_CODE (op0) == UNSPEC
27797 && XINT (op0, 1) == UNSPEC_TOCREL
27798 && CONST_INT_P (op1))
27799 {
27800 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
27801 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
27802 fuse_ops[3] = op1;
27803 if (TARGET_ELF)
27804 addis_str = "addis %0,%2,%1+%3@toc@ha";
27805
27806 else if (TARGET_XCOFF)
27807 addis_str = "addis %0,%1+%3@u(%2)";
27808
27809 else
27810 gcc_unreachable ();
27811 }
27812 }
27813
27814 else if (satisfies_constraint_L (value))
27815 {
27816 fuse_ops[1] = value;
27817 addis_str = "lis %0,%v1";
27818 }
27819
27820 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
27821 {
27822 fuse_ops[1] = value;
27823 addis_str = "lis %0,%1@ha";
27824 }
27825 }
27826
27827 if (!addis_str)
27828 fatal_insn ("Could not generate addis value for fusion", addis_value);
27829
27830 output_asm_insn (addis_str, fuse_ops);
27831 }
27832
27833 /* Emit a D-form load or store instruction that is the second instruction
27834 of a fusion sequence. */
27835
27836 static void
27837 emit_fusion_load (rtx load_reg, rtx addis_reg, rtx offset, const char *insn_str)
27838 {
27839 rtx fuse_ops[10];
27840 char insn_template[80];
27841
27842 fuse_ops[0] = load_reg;
27843 fuse_ops[1] = addis_reg;
27844
27845 if (CONST_INT_P (offset) && satisfies_constraint_I (offset))
27846 {
27847 sprintf (insn_template, "%s %%0,%%2(%%1)", insn_str);
27848 fuse_ops[2] = offset;
27849 output_asm_insn (insn_template, fuse_ops);
27850 }
27851
27852 else if (GET_CODE (offset) == UNSPEC
27853 && XINT (offset, 1) == UNSPEC_TOCREL)
27854 {
27855 if (TARGET_ELF)
27856 sprintf (insn_template, "%s %%0,%%2@toc@l(%%1)", insn_str);
27857
27858 else if (TARGET_XCOFF)
27859 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
27860
27861 else
27862 gcc_unreachable ();
27863
27864 fuse_ops[2] = XVECEXP (offset, 0, 0);
27865 output_asm_insn (insn_template, fuse_ops);
27866 }
27867
27868 else if (GET_CODE (offset) == PLUS
27869 && GET_CODE (XEXP (offset, 0)) == UNSPEC
27870 && XINT (XEXP (offset, 0), 1) == UNSPEC_TOCREL
27871 && CONST_INT_P (XEXP (offset, 1)))
27872 {
27873 rtx tocrel_unspec = XEXP (offset, 0);
27874 if (TARGET_ELF)
27875 sprintf (insn_template, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str);
27876
27877 else if (TARGET_XCOFF)
27878 sprintf (insn_template, "%s %%0,%%2+%%3@l(%%1)", insn_str);
27879
27880 else
27881 gcc_unreachable ();
27882
27883 fuse_ops[2] = XVECEXP (tocrel_unspec, 0, 0);
27884 fuse_ops[3] = XEXP (offset, 1);
27885 output_asm_insn (insn_template, fuse_ops);
27886 }
27887
27888 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (offset))
27889 {
27890 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
27891
27892 fuse_ops[2] = offset;
27893 output_asm_insn (insn_template, fuse_ops);
27894 }
27895
27896 else
27897 fatal_insn ("Unable to generate load/store offset for fusion", offset);
27898
27899 return;
27900 }
27901
27902 /* Given an address, convert it into the addis and load offset parts. Addresses
27903 created during the peephole2 process look like:
27904 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
27905 (unspec [(...)] UNSPEC_TOCREL)) */
27906
27907 static void
27908 fusion_split_address (rtx addr, rtx *p_hi, rtx *p_lo)
27909 {
27910 rtx hi, lo;
27911
27912 if (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
27913 {
27914 hi = XEXP (addr, 0);
27915 lo = XEXP (addr, 1);
27916 }
27917 else
27918 gcc_unreachable ();
27919
27920 *p_hi = hi;
27921 *p_lo = lo;
27922 }
27923
27924 /* Return a string to fuse an addis instruction with a gpr load to the same
27925 register that we loaded up the addis instruction. The address that is used
27926 is the logical address that was formed during peephole2:
27927 (lo_sum (high) (low-part))
27928
27929 The code is complicated, so we call output_asm_insn directly, and just
27930 return "". */
27931
27932 const char *
27933 emit_fusion_gpr_load (rtx target, rtx mem)
27934 {
27935 rtx addis_value;
27936 rtx addr;
27937 rtx load_offset;
27938 const char *load_str = NULL;
27939 machine_mode mode;
27940
27941 if (GET_CODE (mem) == ZERO_EXTEND)
27942 mem = XEXP (mem, 0);
27943
27944 gcc_assert (REG_P (target) && MEM_P (mem));
27945
27946 addr = XEXP (mem, 0);
27947 fusion_split_address (addr, &addis_value, &load_offset);
27948
27949 /* Now emit the load instruction to the same register. */
27950 mode = GET_MODE (mem);
27951 switch (mode)
27952 {
27953 case E_QImode:
27954 load_str = "lbz";
27955 break;
27956
27957 case E_HImode:
27958 load_str = "lhz";
27959 break;
27960
27961 case E_SImode:
27962 case E_SFmode:
27963 load_str = "lwz";
27964 break;
27965
27966 case E_DImode:
27967 case E_DFmode:
27968 gcc_assert (TARGET_POWERPC64);
27969 load_str = "ld";
27970 break;
27971
27972 default:
27973 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target, mem));
27974 }
27975
27976 /* Emit the addis instruction. */
27977 emit_fusion_addis (target, addis_value);
27978
27979 /* Emit the D-form load instruction. */
27980 emit_fusion_load (target, target, load_offset, load_str);
27981
27982 return "";
27983 }
27984 \f
27985 /* This is not inside an #ifdef RS6000_GLIBC_ATOMIC_FENV because gengtype
27986 ignores it then. */
27987 static GTY(()) tree atomic_hold_decl;
27988 static GTY(()) tree atomic_clear_decl;
27989 static GTY(()) tree atomic_update_decl;
27990
27991 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
27992 static void
27993 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
27994 {
27995 if (!TARGET_HARD_FLOAT)
27996 {
27997 #ifdef RS6000_GLIBC_ATOMIC_FENV
27998 if (atomic_hold_decl == NULL_TREE)
27999 {
28000 atomic_hold_decl
28001 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
28002 get_identifier ("__atomic_feholdexcept"),
28003 build_function_type_list (void_type_node,
28004 double_ptr_type_node,
28005 NULL_TREE));
28006 TREE_PUBLIC (atomic_hold_decl) = 1;
28007 DECL_EXTERNAL (atomic_hold_decl) = 1;
28008 }
28009
28010 if (atomic_clear_decl == NULL_TREE)
28011 {
28012 atomic_clear_decl
28013 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
28014 get_identifier ("__atomic_feclearexcept"),
28015 build_function_type_list (void_type_node,
28016 NULL_TREE));
28017 TREE_PUBLIC (atomic_clear_decl) = 1;
28018 DECL_EXTERNAL (atomic_clear_decl) = 1;
28019 }
28020
28021 tree const_double = build_qualified_type (double_type_node,
28022 TYPE_QUAL_CONST);
28023 tree const_double_ptr = build_pointer_type (const_double);
28024 if (atomic_update_decl == NULL_TREE)
28025 {
28026 atomic_update_decl
28027 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
28028 get_identifier ("__atomic_feupdateenv"),
28029 build_function_type_list (void_type_node,
28030 const_double_ptr,
28031 NULL_TREE));
28032 TREE_PUBLIC (atomic_update_decl) = 1;
28033 DECL_EXTERNAL (atomic_update_decl) = 1;
28034 }
28035
28036 tree fenv_var = create_tmp_var_raw (double_type_node);
28037 TREE_ADDRESSABLE (fenv_var) = 1;
28038 tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node,
28039 build4 (TARGET_EXPR, double_type_node, fenv_var,
28040 void_node, NULL_TREE, NULL_TREE));
28041
28042 *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
28043 *clear = build_call_expr (atomic_clear_decl, 0);
28044 *update = build_call_expr (atomic_update_decl, 1,
28045 fold_convert (const_double_ptr, fenv_addr));
28046 #endif
28047 return;
28048 }
28049
28050 tree mffs = rs6000_builtin_decls[RS6000_BIF_MFFS];
28051 tree mtfsf = rs6000_builtin_decls[RS6000_BIF_MTFSF];
28052 tree call_mffs = build_call_expr (mffs, 0);
28053
28054 /* Generates the equivalent of feholdexcept (&fenv_var)
28055
28056 *fenv_var = __builtin_mffs ();
28057 double fenv_hold;
28058 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
28059 __builtin_mtfsf (0xff, fenv_hold); */
28060
28061 /* Mask to clear everything except for the rounding modes and non-IEEE
28062 arithmetic flag. */
28063 const unsigned HOST_WIDE_INT hold_exception_mask
28064 = HOST_WIDE_INT_C (0xffffffff00000007);
28065
28066 tree fenv_var = create_tmp_var_raw (double_type_node);
28067
28068 tree hold_mffs = build4 (TARGET_EXPR, double_type_node, fenv_var, call_mffs,
28069 NULL_TREE, NULL_TREE);
28070
28071 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
28072 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
28073 build_int_cst (uint64_type_node,
28074 hold_exception_mask));
28075
28076 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
28077 fenv_llu_and);
28078
28079 tree hold_mtfsf = build_call_expr (mtfsf, 2,
28080 build_int_cst (unsigned_type_node, 0xff),
28081 fenv_hold_mtfsf);
28082
28083 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
28084
28085 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
28086
28087 double fenv_clear = __builtin_mffs ();
28088 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
28089 __builtin_mtfsf (0xff, fenv_clear); */
28090
28091 /* Mask to clear everything except for the rounding modes and non-IEEE
28092 arithmetic flag. */
28093 const unsigned HOST_WIDE_INT clear_exception_mask
28094 = HOST_WIDE_INT_C (0xffffffff00000000);
28095
28096 tree fenv_clear = create_tmp_var_raw (double_type_node);
28097
28098 tree clear_mffs = build4 (TARGET_EXPR, double_type_node, fenv_clear,
28099 call_mffs, NULL_TREE, NULL_TREE);
28100
28101 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
28102 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
28103 fenv_clean_llu,
28104 build_int_cst (uint64_type_node,
28105 clear_exception_mask));
28106
28107 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
28108 fenv_clear_llu_and);
28109
28110 tree clear_mtfsf = build_call_expr (mtfsf, 2,
28111 build_int_cst (unsigned_type_node, 0xff),
28112 fenv_clear_mtfsf);
28113
28114 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
28115
28116 /* Generates the equivalent of feupdateenv (&fenv_var)
28117
28118 double old_fenv = __builtin_mffs ();
28119 double fenv_update;
28120 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
28121 (*(uint64_t*)fenv_var 0x1ff80fff);
28122 __builtin_mtfsf (0xff, fenv_update); */
28123
28124 const unsigned HOST_WIDE_INT update_exception_mask
28125 = HOST_WIDE_INT_C (0xffffffff1fffff00);
28126 const unsigned HOST_WIDE_INT new_exception_mask
28127 = HOST_WIDE_INT_C (0x1ff80fff);
28128
28129 tree old_fenv = create_tmp_var_raw (double_type_node);
28130 tree update_mffs = build4 (TARGET_EXPR, double_type_node, old_fenv,
28131 call_mffs, NULL_TREE, NULL_TREE);
28132
28133 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
28134 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
28135 build_int_cst (uint64_type_node,
28136 update_exception_mask));
28137
28138 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
28139 build_int_cst (uint64_type_node,
28140 new_exception_mask));
28141
28142 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
28143 old_llu_and, new_llu_and);
28144
28145 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
28146 new_llu_mask);
28147
28148 tree update_mtfsf = build_call_expr (mtfsf, 2,
28149 build_int_cst (unsigned_type_node, 0xff),
28150 fenv_update_mtfsf);
28151
28152 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
28153 }
28154
28155 void
28156 rs6000_generate_float2_double_code (rtx dst, rtx src1, rtx src2)
28157 {
28158 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
28159
28160 rtx_tmp0 = gen_reg_rtx (V2DFmode);
28161 rtx_tmp1 = gen_reg_rtx (V2DFmode);
28162
28163 /* The destination of the vmrgew instruction layout is:
28164 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
28165 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
28166 vmrgew instruction will be correct. */
28167 if (BYTES_BIG_ENDIAN)
28168 {
28169 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp0, src1, src2,
28170 GEN_INT (0)));
28171 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp1, src1, src2,
28172 GEN_INT (3)));
28173 }
28174 else
28175 {
28176 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (3)));
28177 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (0)));
28178 }
28179
28180 rtx_tmp2 = gen_reg_rtx (V4SFmode);
28181 rtx_tmp3 = gen_reg_rtx (V4SFmode);
28182
28183 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp2, rtx_tmp0));
28184 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp3, rtx_tmp1));
28185
28186 if (BYTES_BIG_ENDIAN)
28187 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
28188 else
28189 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
28190 }
28191
28192 void
28193 rs6000_generate_float2_code (bool signed_convert, rtx dst, rtx src1, rtx src2)
28194 {
28195 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
28196
28197 rtx_tmp0 = gen_reg_rtx (V2DImode);
28198 rtx_tmp1 = gen_reg_rtx (V2DImode);
28199
28200 /* The destination of the vmrgew instruction layout is:
28201 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
28202 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
28203 vmrgew instruction will be correct. */
28204 if (BYTES_BIG_ENDIAN)
28205 {
28206 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp0, src1, src2, GEN_INT (0)));
28207 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp1, src1, src2, GEN_INT (3)));
28208 }
28209 else
28210 {
28211 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp0, src1, src2, GEN_INT (3)));
28212 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp1, src1, src2, GEN_INT (0)));
28213 }
28214
28215 rtx_tmp2 = gen_reg_rtx (V4SFmode);
28216 rtx_tmp3 = gen_reg_rtx (V4SFmode);
28217
28218 if (signed_convert)
28219 {
28220 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp2, rtx_tmp0));
28221 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp3, rtx_tmp1));
28222 }
28223 else
28224 {
28225 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp2, rtx_tmp0));
28226 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp3, rtx_tmp1));
28227 }
28228
28229 if (BYTES_BIG_ENDIAN)
28230 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
28231 else
28232 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
28233 }
28234
28235 void
28236 rs6000_generate_vsigned2_code (bool signed_convert, rtx dst, rtx src1,
28237 rtx src2)
28238 {
28239 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
28240
28241 rtx_tmp0 = gen_reg_rtx (V2DFmode);
28242 rtx_tmp1 = gen_reg_rtx (V2DFmode);
28243
28244 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (0)));
28245 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (3)));
28246
28247 rtx_tmp2 = gen_reg_rtx (V4SImode);
28248 rtx_tmp3 = gen_reg_rtx (V4SImode);
28249
28250 if (signed_convert)
28251 {
28252 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp2, rtx_tmp0));
28253 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp3, rtx_tmp1));
28254 }
28255 else
28256 {
28257 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp2, rtx_tmp0));
28258 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp3, rtx_tmp1));
28259 }
28260
28261 emit_insn (gen_p8_vmrgew_v4si (dst, rtx_tmp2, rtx_tmp3));
28262 }
28263
28264 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
28265
28266 static bool
28267 rs6000_optab_supported_p (int op, machine_mode mode1, machine_mode,
28268 optimization_type opt_type)
28269 {
28270 switch (op)
28271 {
28272 case rsqrt_optab:
28273 return (opt_type == OPTIMIZE_FOR_SPEED
28274 && RS6000_RECIP_AUTO_RSQRTE_P (mode1));
28275
28276 default:
28277 return true;
28278 }
28279 }
28280
28281 /* Implement TARGET_CONSTANT_ALIGNMENT. */
28282
28283 static HOST_WIDE_INT
28284 rs6000_constant_alignment (const_tree exp, HOST_WIDE_INT align)
28285 {
28286 if (TREE_CODE (exp) == STRING_CST
28287 && (STRICT_ALIGNMENT || !optimize_size))
28288 return MAX (align, BITS_PER_WORD);
28289 return align;
28290 }
28291
28292 /* Implement TARGET_STARTING_FRAME_OFFSET. */
28293
28294 static HOST_WIDE_INT
28295 rs6000_starting_frame_offset (void)
28296 {
28297 if (FRAME_GROWS_DOWNWARD)
28298 return 0;
28299 return RS6000_STARTING_FRAME_OFFSET;
28300 }
28301 \f
28302 /* Internal function to return the built-in function id for the complex
28303 multiply operation for a given mode. */
28304
28305 static inline built_in_function
28306 complex_multiply_builtin_code (machine_mode mode)
28307 {
28308 gcc_assert (IN_RANGE (mode, MIN_MODE_COMPLEX_FLOAT, MAX_MODE_COMPLEX_FLOAT));
28309 int func = BUILT_IN_COMPLEX_MUL_MIN + mode - MIN_MODE_COMPLEX_FLOAT;
28310 return (built_in_function) func;
28311 }
28312
28313 /* Internal function to return the built-in function id for the complex divide
28314 operation for a given mode. */
28315
28316 static inline built_in_function
28317 complex_divide_builtin_code (machine_mode mode)
28318 {
28319 gcc_assert (IN_RANGE (mode, MIN_MODE_COMPLEX_FLOAT, MAX_MODE_COMPLEX_FLOAT));
28320 int func = BUILT_IN_COMPLEX_DIV_MIN + mode - MIN_MODE_COMPLEX_FLOAT;
28321 return (built_in_function) func;
28322 }
28323
28324 /* On 64-bit Linux and Freebsd systems, possibly switch the long double library
28325 function names from <foo>l to <foo>f128 if the default long double type is
28326 IEEE 128-bit. Typically, with the C and C++ languages, the standard math.h
28327 include file switches the names on systems that support long double as IEEE
28328 128-bit, but that doesn't work if the user uses __builtin_<foo>l directly.
28329 In the future, glibc will export names like __ieee128_sinf128 and we can
28330 switch to using those instead of using sinf128, which pollutes the user's
28331 namespace.
28332
28333 This will switch the names for Fortran math functions as well (which doesn't
28334 use math.h). However, Fortran needs other changes to the compiler and
28335 library before you can switch the real*16 type at compile time.
28336
28337 We use the TARGET_MANGLE_DECL_ASSEMBLER_NAME hook to change this name. We
28338 only do this transformation if the __float128 type is enabled. This
28339 prevents us from doing the transformation on older 32-bit ports that might
28340 have enabled using IEEE 128-bit floating point as the default long double
28341 type.
28342
28343 We also use the TARGET_MANGLE_DECL_ASSEMBLER_NAME hook to change the
28344 function names used for complex multiply and divide to the appropriate
28345 names. */
28346
28347 static tree
28348 rs6000_mangle_decl_assembler_name (tree decl, tree id)
28349 {
28350 /* Handle complex multiply/divide. For IEEE 128-bit, use __mulkc3 or
28351 __divkc3 and for IBM 128-bit use __multc3 and __divtc3. */
28352 if (TARGET_FLOAT128_TYPE
28353 && TREE_CODE (decl) == FUNCTION_DECL
28354 && DECL_IS_UNDECLARED_BUILTIN (decl)
28355 && DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
28356 {
28357 built_in_function id = DECL_FUNCTION_CODE (decl);
28358 const char *newname = NULL;
28359
28360 if (id == complex_multiply_builtin_code (KCmode))
28361 newname = "__mulkc3";
28362
28363 else if (id == complex_multiply_builtin_code (ICmode))
28364 newname = "__multc3";
28365
28366 else if (id == complex_multiply_builtin_code (TCmode))
28367 newname = (TARGET_IEEEQUAD) ? "__mulkc3" : "__multc3";
28368
28369 else if (id == complex_divide_builtin_code (KCmode))
28370 newname = "__divkc3";
28371
28372 else if (id == complex_divide_builtin_code (ICmode))
28373 newname = "__divtc3";
28374
28375 else if (id == complex_divide_builtin_code (TCmode))
28376 newname = (TARGET_IEEEQUAD) ? "__divkc3" : "__divtc3";
28377
28378 if (newname)
28379 {
28380 if (TARGET_DEBUG_BUILTIN)
28381 fprintf (stderr, "Map complex mul/div => %s\n", newname);
28382
28383 return get_identifier (newname);
28384 }
28385 }
28386
28387 /* Map long double built-in functions if long double is IEEE 128-bit. */
28388 if (TARGET_FLOAT128_TYPE && TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128
28389 && TREE_CODE (decl) == FUNCTION_DECL
28390 && DECL_IS_UNDECLARED_BUILTIN (decl)
28391 && DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
28392 {
28393 size_t len = IDENTIFIER_LENGTH (id);
28394 const char *name = IDENTIFIER_POINTER (id);
28395 char *newname = NULL;
28396
28397 /* See if it is one of the built-in functions with an unusual name. */
28398 switch (DECL_FUNCTION_CODE (decl))
28399 {
28400 case BUILT_IN_DREML:
28401 newname = xstrdup ("__remainderieee128");
28402 break;
28403
28404 case BUILT_IN_GAMMAL:
28405 newname = xstrdup ("__lgammaieee128");
28406 break;
28407
28408 case BUILT_IN_GAMMAL_R:
28409 case BUILT_IN_LGAMMAL_R:
28410 newname = xstrdup ("__lgammaieee128_r");
28411 break;
28412
28413 case BUILT_IN_NEXTTOWARD:
28414 newname = xstrdup ("__nexttoward_to_ieee128");
28415 break;
28416
28417 case BUILT_IN_NEXTTOWARDF:
28418 newname = xstrdup ("__nexttowardf_to_ieee128");
28419 break;
28420
28421 case BUILT_IN_NEXTTOWARDL:
28422 newname = xstrdup ("__nexttowardieee128");
28423 break;
28424
28425 case BUILT_IN_POW10L:
28426 newname = xstrdup ("__exp10ieee128");
28427 break;
28428
28429 case BUILT_IN_SCALBL:
28430 newname = xstrdup ("__scalbieee128");
28431 break;
28432
28433 case BUILT_IN_SIGNIFICANDL:
28434 newname = xstrdup ("__significandieee128");
28435 break;
28436
28437 case BUILT_IN_SINCOSL:
28438 newname = xstrdup ("__sincosieee128");
28439 break;
28440
28441 default:
28442 break;
28443 }
28444
28445 /* Update the __builtin_*printf and __builtin_*scanf functions. */
28446 if (!newname)
28447 {
28448 size_t printf_len = strlen ("printf");
28449 size_t scanf_len = strlen ("scanf");
28450 size_t printf_chk_len = strlen ("printf_chk");
28451
28452 if (len >= printf_len
28453 && strcmp (name + len - printf_len, "printf") == 0)
28454 newname = xasprintf ("__%sieee128", name);
28455
28456 else if (len >= scanf_len
28457 && strcmp (name + len - scanf_len, "scanf") == 0)
28458 newname = xasprintf ("__isoc99_%sieee128", name);
28459
28460 else if (len >= printf_chk_len
28461 && strcmp (name + len - printf_chk_len, "printf_chk") == 0)
28462 newname = xasprintf ("%sieee128", name);
28463
28464 else if (name[len - 1] == 'l')
28465 {
28466 bool uses_ieee128_p = false;
28467 tree type = TREE_TYPE (decl);
28468 machine_mode ret_mode = TYPE_MODE (type);
28469
28470 /* See if the function returns a IEEE 128-bit floating point type or
28471 complex type. */
28472 if (ret_mode == TFmode || ret_mode == TCmode)
28473 uses_ieee128_p = true;
28474 else
28475 {
28476 function_args_iterator args_iter;
28477 tree arg;
28478
28479 /* See if the function passes a IEEE 128-bit floating point type
28480 or complex type. */
28481 FOREACH_FUNCTION_ARGS (type, arg, args_iter)
28482 {
28483 machine_mode arg_mode = TYPE_MODE (arg);
28484 if (arg_mode == TFmode || arg_mode == TCmode)
28485 {
28486 uses_ieee128_p = true;
28487 break;
28488 }
28489 }
28490 }
28491
28492 /* If we passed or returned an IEEE 128-bit floating point type,
28493 change the name. Use __<name>ieee128, instead of <name>l. */
28494 if (uses_ieee128_p)
28495 newname = xasprintf ("__%.*sieee128", (int)(len - 1), name);
28496 }
28497 }
28498
28499 if (newname)
28500 {
28501 if (TARGET_DEBUG_BUILTIN)
28502 fprintf (stderr, "Map %s => %s\n", name, newname);
28503
28504 id = get_identifier (newname);
28505 free (newname);
28506 }
28507 }
28508
28509 return id;
28510 }
28511
28512 /* Predict whether the given loop in gimple will be transformed in the RTL
28513 doloop_optimize pass. */
28514
28515 static bool
28516 rs6000_predict_doloop_p (struct loop *loop)
28517 {
28518 gcc_assert (loop);
28519
28520 /* On rs6000, targetm.can_use_doloop_p is actually
28521 can_use_doloop_if_innermost. Just ensure the loop is innermost. */
28522 if (loop->inner != NULL)
28523 {
28524 if (dump_file && (dump_flags & TDF_DETAILS))
28525 fprintf (dump_file, "Predict doloop failure due to"
28526 " loop nesting.\n");
28527 return false;
28528 }
28529
28530 return true;
28531 }
28532
28533 /* Implement TARGET_PREFERRED_DOLOOP_MODE. */
28534
28535 static machine_mode
28536 rs6000_preferred_doloop_mode (machine_mode)
28537 {
28538 return word_mode;
28539 }
28540
28541 /* Implement TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P. */
28542
28543 static bool
28544 rs6000_cannot_substitute_mem_equiv_p (rtx mem)
28545 {
28546 gcc_assert (MEM_P (mem));
28547
28548 /* curr_insn_transform()'s handling of subregs cannot handle altivec AND:
28549 type addresses, so don't allow MEMs with those address types to be
28550 substituted as an equivalent expression. See PR93974 for details. */
28551 if (GET_CODE (XEXP (mem, 0)) == AND)
28552 return true;
28553
28554 return false;
28555 }
28556
28557 /* Implement TARGET_INVALID_CONVERSION. */
28558
28559 static const char *
28560 rs6000_invalid_conversion (const_tree fromtype, const_tree totype)
28561 {
28562 /* Make sure we're working with the canonical types. */
28563 if (TYPE_CANONICAL (fromtype) != NULL_TREE)
28564 fromtype = TYPE_CANONICAL (fromtype);
28565 if (TYPE_CANONICAL (totype) != NULL_TREE)
28566 totype = TYPE_CANONICAL (totype);
28567
28568 machine_mode frommode = TYPE_MODE (fromtype);
28569 machine_mode tomode = TYPE_MODE (totype);
28570
28571 if (frommode != tomode)
28572 {
28573 /* Do not allow conversions to/from XOmode and OOmode types. */
28574 if (frommode == XOmode)
28575 return N_("invalid conversion from type %<__vector_quad%>");
28576 if (tomode == XOmode)
28577 return N_("invalid conversion to type %<__vector_quad%>");
28578 if (frommode == OOmode)
28579 return N_("invalid conversion from type %<__vector_pair%>");
28580 if (tomode == OOmode)
28581 return N_("invalid conversion to type %<__vector_pair%>");
28582 }
28583
28584 /* Conversion allowed. */
28585 return NULL;
28586 }
28587
28588 /* Convert a SFmode constant to the integer bit pattern. */
28589
28590 long
28591 rs6000_const_f32_to_i32 (rtx operand)
28592 {
28593 long value;
28594 const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (operand);
28595
28596 gcc_assert (GET_MODE (operand) == SFmode);
28597 REAL_VALUE_TO_TARGET_SINGLE (*rv, value);
28598 return value;
28599 }
28600
28601 void
28602 rs6000_emit_xxspltidp_v2df (rtx dst, long value)
28603 {
28604 if (((value & 0x7F800000) == 0) && ((value & 0x7FFFFF) != 0))
28605 inform (input_location,
28606 "the result for the xxspltidp instruction "
28607 "is undefined for subnormal input values");
28608 emit_insn( gen_xxspltidp_v2df_inst (dst, GEN_INT (value)));
28609 }
28610
28611 /* Implement TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC. */
28612
28613 static bool
28614 rs6000_gen_pic_addr_diff_vec (void)
28615 {
28616 return rs6000_relative_jumptables;
28617 }
28618
28619 void
28620 rs6000_output_addr_vec_elt (FILE *file, int value)
28621 {
28622 const char *directive = TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t";
28623 char buf[100];
28624
28625 fprintf (file, "%s", directive);
28626 ASM_GENERATE_INTERNAL_LABEL (buf, "L", value);
28627 assemble_name (file, buf);
28628 fprintf (file, "\n");
28629 }
28630
28631 \f
28632 /* Copy an integer constant to the vector constant structure. */
28633
28634 static void
28635 constant_int_to_128bit_vector (rtx op,
28636 machine_mode mode,
28637 size_t byte_num,
28638 vec_const_128bit_type *info)
28639 {
28640 unsigned HOST_WIDE_INT uvalue = UINTVAL (op);
28641 unsigned bitsize = GET_MODE_BITSIZE (mode);
28642
28643 for (int shift = bitsize - 8; shift >= 0; shift -= 8)
28644 info->bytes[byte_num++] = (uvalue >> shift) & 0xff;
28645 }
28646
28647 /* Copy a floating point constant to the vector constant structure. */
28648
28649 static void
28650 constant_fp_to_128bit_vector (rtx op,
28651 machine_mode mode,
28652 size_t byte_num,
28653 vec_const_128bit_type *info)
28654 {
28655 unsigned bitsize = GET_MODE_BITSIZE (mode);
28656 unsigned num_words = bitsize / 32;
28657 const REAL_VALUE_TYPE *rtype = CONST_DOUBLE_REAL_VALUE (op);
28658 long real_words[VECTOR_128BIT_WORDS];
28659
28660 /* Make sure we don't overflow the real_words array and that it is
28661 filled completely. */
28662 gcc_assert (num_words <= VECTOR_128BIT_WORDS && (bitsize % 32) == 0);
28663
28664 real_to_target (real_words, rtype, mode);
28665
28666 /* Iterate over each 32-bit word in the floating point constant. The
28667 real_to_target function puts out words in target endian fashion. We need
28668 to arrange the order so that the bytes are written in big endian order. */
28669 for (unsigned num = 0; num < num_words; num++)
28670 {
28671 unsigned endian_num = (BYTES_BIG_ENDIAN
28672 ? num
28673 : num_words - 1 - num);
28674
28675 unsigned uvalue = real_words[endian_num];
28676 for (int shift = 32 - 8; shift >= 0; shift -= 8)
28677 info->bytes[byte_num++] = (uvalue >> shift) & 0xff;
28678 }
28679
28680 /* Mark that this constant involves floating point. */
28681 info->fp_constant_p = true;
28682 }
28683
28684 /* Convert a vector constant OP with mode MODE to a vector 128-bit constant
28685 structure INFO.
28686
28687 Break out the constant out to bytes, half words, words, and double words.
28688 Return true if we have successfully converted the constant.
28689
28690 We handle CONST_INT, CONST_DOUBLE, CONST_VECTOR, and VEC_DUPLICATE of
28691 constants. Integer and floating point scalar constants are splatted to fill
28692 out the vector. */
28693
28694 bool
28695 vec_const_128bit_to_bytes (rtx op,
28696 machine_mode mode,
28697 vec_const_128bit_type *info)
28698 {
28699 /* Initialize the constant structure. */
28700 memset ((void *)info, 0, sizeof (vec_const_128bit_type));
28701
28702 /* Assume CONST_INTs are DImode. */
28703 if (mode == VOIDmode)
28704 mode = CONST_INT_P (op) ? DImode : GET_MODE (op);
28705
28706 if (mode == VOIDmode)
28707 return false;
28708
28709 unsigned size = GET_MODE_SIZE (mode);
28710 bool splat_p = false;
28711
28712 if (size > VECTOR_128BIT_BYTES)
28713 return false;
28714
28715 /* Set up the bits. */
28716 switch (GET_CODE (op))
28717 {
28718 /* Integer constants, default to double word. */
28719 case CONST_INT:
28720 {
28721 constant_int_to_128bit_vector (op, mode, 0, info);
28722 splat_p = true;
28723 break;
28724 }
28725
28726 /* Floating point constants. */
28727 case CONST_DOUBLE:
28728 {
28729 /* Fail if the floating point constant is the wrong mode. */
28730 if (GET_MODE (op) != mode)
28731 return false;
28732
28733 /* SFmode stored as scalars are stored in DFmode format. */
28734 if (mode == SFmode)
28735 {
28736 mode = DFmode;
28737 size = GET_MODE_SIZE (DFmode);
28738 }
28739
28740 constant_fp_to_128bit_vector (op, mode, 0, info);
28741 splat_p = true;
28742 break;
28743 }
28744
28745 /* Vector constants, iterate over each element. On little endian
28746 systems, we have to reverse the element numbers. */
28747 case CONST_VECTOR:
28748 {
28749 /* Fail if the vector constant is the wrong mode or size. */
28750 if (GET_MODE (op) != mode
28751 || GET_MODE_SIZE (mode) != VECTOR_128BIT_BYTES)
28752 return false;
28753
28754 machine_mode ele_mode = GET_MODE_INNER (mode);
28755 size_t ele_size = GET_MODE_SIZE (ele_mode);
28756 size_t nunits = GET_MODE_NUNITS (mode);
28757
28758 for (size_t num = 0; num < nunits; num++)
28759 {
28760 rtx ele = CONST_VECTOR_ELT (op, num);
28761 size_t byte_num = (BYTES_BIG_ENDIAN
28762 ? num
28763 : nunits - 1 - num) * ele_size;
28764
28765 if (CONST_INT_P (ele))
28766 constant_int_to_128bit_vector (ele, ele_mode, byte_num, info);
28767 else if (CONST_DOUBLE_P (ele))
28768 constant_fp_to_128bit_vector (ele, ele_mode, byte_num, info);
28769 else
28770 return false;
28771 }
28772
28773 break;
28774 }
28775
28776 /* Treat VEC_DUPLICATE of a constant just like a vector constant.
28777 Since we are duplicating the element, we don't have to worry about
28778 endian issues. */
28779 case VEC_DUPLICATE:
28780 {
28781 /* Fail if the vector duplicate is the wrong mode or size. */
28782 if (GET_MODE (op) != mode
28783 || GET_MODE_SIZE (mode) != VECTOR_128BIT_BYTES)
28784 return false;
28785
28786 machine_mode ele_mode = GET_MODE_INNER (mode);
28787 size_t ele_size = GET_MODE_SIZE (ele_mode);
28788 rtx ele = XEXP (op, 0);
28789 size_t nunits = GET_MODE_NUNITS (mode);
28790
28791 if (!CONST_INT_P (ele) && !CONST_DOUBLE_P (ele))
28792 return false;
28793
28794 for (size_t num = 0; num < nunits; num++)
28795 {
28796 size_t byte_num = num * ele_size;
28797
28798 if (CONST_INT_P (ele))
28799 constant_int_to_128bit_vector (ele, ele_mode, byte_num, info);
28800 else
28801 constant_fp_to_128bit_vector (ele, ele_mode, byte_num, info);
28802 }
28803
28804 break;
28805 }
28806
28807 /* Any thing else, just return failure. */
28808 default:
28809 return false;
28810 }
28811
28812 /* Splat the constant to fill 128 bits if desired. */
28813 if (splat_p && size < VECTOR_128BIT_BYTES)
28814 {
28815 if ((VECTOR_128BIT_BYTES % size) != 0)
28816 return false;
28817
28818 for (size_t offset = size;
28819 offset < VECTOR_128BIT_BYTES;
28820 offset += size)
28821 memcpy ((void *) &info->bytes[offset],
28822 (void *) &info->bytes[0],
28823 size);
28824 }
28825
28826 /* Remember original size. */
28827 info->original_size = size;
28828
28829 /* Determine if the bytes are all the same. */
28830 unsigned char first_byte = info->bytes[0];
28831 info->all_bytes_same = true;
28832 for (size_t i = 1; i < VECTOR_128BIT_BYTES; i++)
28833 if (first_byte != info->bytes[i])
28834 {
28835 info->all_bytes_same = false;
28836 break;
28837 }
28838
28839 /* Pack half words together & determine if all of the half words are the
28840 same. */
28841 for (size_t i = 0; i < VECTOR_128BIT_HALF_WORDS; i++)
28842 info->half_words[i] = ((info->bytes[i * 2] << 8)
28843 | info->bytes[(i * 2) + 1]);
28844
28845 unsigned short first_hword = info->half_words[0];
28846 info->all_half_words_same = true;
28847 for (size_t i = 1; i < VECTOR_128BIT_HALF_WORDS; i++)
28848 if (first_hword != info->half_words[i])
28849 {
28850 info->all_half_words_same = false;
28851 break;
28852 }
28853
28854 /* Pack words together & determine if all of the words are the same. */
28855 for (size_t i = 0; i < VECTOR_128BIT_WORDS; i++)
28856 info->words[i] = ((info->bytes[i * 4] << 24)
28857 | (info->bytes[(i * 4) + 1] << 16)
28858 | (info->bytes[(i * 4) + 2] << 8)
28859 | info->bytes[(i * 4) + 3]);
28860
28861 info->all_words_same
28862 = (info->words[0] == info->words[1]
28863 && info->words[0] == info->words[2]
28864 && info->words[0] == info->words[3]);
28865
28866 /* Pack double words together & determine if all of the double words are the
28867 same. */
28868 for (size_t i = 0; i < VECTOR_128BIT_DOUBLE_WORDS; i++)
28869 {
28870 unsigned HOST_WIDE_INT d_word = 0;
28871 for (size_t j = 0; j < 8; j++)
28872 d_word = (d_word << 8) | info->bytes[(i * 8) + j];
28873
28874 info->double_words[i] = d_word;
28875 }
28876
28877 info->all_double_words_same
28878 = (info->double_words[0] == info->double_words[1]);
28879
28880 return true;
28881 }
28882
28883 /* Determine if an IEEE 128-bit constant can be loaded with LXVKQ. Return zero
28884 if the LXVKQ instruction cannot be used. Otherwise return the immediate
28885 value to be used with the LXVKQ instruction. */
28886
28887 unsigned
28888 constant_generates_lxvkq (vec_const_128bit_type *vsx_const)
28889 {
28890 /* Is the instruction supported with power10 code generation, IEEE 128-bit
28891 floating point hardware and VSX registers are available. */
28892 if (!TARGET_IEEE128_CONSTANT || !TARGET_FLOAT128_HW || !TARGET_POWER10
28893 || !TARGET_VSX)
28894 return 0;
28895
28896 /* All of the constants that are generated by LXVKQ have the bottom 3 words
28897 that are 0. */
28898 if (vsx_const->words[1] != 0
28899 || vsx_const->words[2] != 0
28900 || vsx_const->words[3] != 0)
28901 return 0;
28902
28903 /* See if we have a match for the first word. */
28904 switch (vsx_const->words[0])
28905 {
28906 case 0x3FFF0000U: return 1; /* IEEE 128-bit +1.0. */
28907 case 0x40000000U: return 2; /* IEEE 128-bit +2.0. */
28908 case 0x40008000U: return 3; /* IEEE 128-bit +3.0. */
28909 case 0x40010000U: return 4; /* IEEE 128-bit +4.0. */
28910 case 0x40014000U: return 5; /* IEEE 128-bit +5.0. */
28911 case 0x40018000U: return 6; /* IEEE 128-bit +6.0. */
28912 case 0x4001C000U: return 7; /* IEEE 128-bit +7.0. */
28913 case 0x7FFF0000U: return 8; /* IEEE 128-bit +Infinity. */
28914 case 0x7FFF8000U: return 9; /* IEEE 128-bit quiet NaN. */
28915 case 0x80000000U: return 16; /* IEEE 128-bit -0.0. */
28916 case 0xBFFF0000U: return 17; /* IEEE 128-bit -1.0. */
28917 case 0xC0000000U: return 18; /* IEEE 128-bit -2.0. */
28918 case 0xC0008000U: return 19; /* IEEE 128-bit -3.0. */
28919 case 0xC0010000U: return 20; /* IEEE 128-bit -4.0. */
28920 case 0xC0014000U: return 21; /* IEEE 128-bit -5.0. */
28921 case 0xC0018000U: return 22; /* IEEE 128-bit -6.0. */
28922 case 0xC001C000U: return 23; /* IEEE 128-bit -7.0. */
28923 case 0xFFFF0000U: return 24; /* IEEE 128-bit -Infinity. */
28924
28925 /* anything else cannot be loaded. */
28926 default:
28927 break;
28928 }
28929
28930 return 0;
28931 }
28932
28933 /* Determine if a vector constant can be loaded with XXSPLTIW. Return zero if
28934 the XXSPLTIW instruction cannot be used. Otherwise return the immediate
28935 value to be used with the XXSPLTIW instruction. */
28936
28937 unsigned
28938 constant_generates_xxspltiw (vec_const_128bit_type *vsx_const)
28939 {
28940 if (!TARGET_SPLAT_WORD_CONSTANT || !TARGET_PREFIXED || !TARGET_VSX)
28941 return 0;
28942
28943 if (!vsx_const->all_words_same)
28944 return 0;
28945
28946 /* If we can use XXSPLTIB, don't generate XXSPLTIW. */
28947 if (vsx_const->all_bytes_same)
28948 return 0;
28949
28950 /* See if we can use VSPLTISH or VSPLTISW. */
28951 if (vsx_const->all_half_words_same)
28952 {
28953 short sign_h_word = vsx_const->half_words[0];
28954 if (EASY_VECTOR_15 (sign_h_word))
28955 return 0;
28956 }
28957
28958 int sign_word = vsx_const->words[0];
28959 if (EASY_VECTOR_15 (sign_word))
28960 return 0;
28961
28962 return vsx_const->words[0];
28963 }
28964
28965 /* Determine if a vector constant can be loaded with XXSPLTIDP. Return zero if
28966 the XXSPLTIDP instruction cannot be used. Otherwise return the immediate
28967 value to be used with the XXSPLTIDP instruction. */
28968
28969 unsigned
28970 constant_generates_xxspltidp (vec_const_128bit_type *vsx_const)
28971 {
28972 if (!TARGET_SPLAT_FLOAT_CONSTANT || !TARGET_PREFIXED || !TARGET_VSX)
28973 return 0;
28974
28975 /* Reject if the two 64-bit segments are not the same. */
28976 if (!vsx_const->all_double_words_same)
28977 return 0;
28978
28979 /* If the bytes, half words, or words are all the same, don't use XXSPLTIDP.
28980 Use a simpler instruction (XXSPLTIB, VSPLTISB, VSPLTISH, or VSPLTISW). */
28981 if (vsx_const->all_bytes_same
28982 || vsx_const->all_half_words_same
28983 || vsx_const->all_words_same)
28984 return 0;
28985
28986 unsigned HOST_WIDE_INT value = vsx_const->double_words[0];
28987
28988 /* Avoid values that look like DFmode NaN's, except for the normal NaN bit
28989 pattern and the signalling NaN bit pattern. Recognize infinity and
28990 negative infinity. */
28991
28992 /* Bit representation of DFmode normal quiet NaN. */
28993 #define RS6000_CONST_DF_NAN HOST_WIDE_INT_UC (0x7ff8000000000000)
28994
28995 /* Bit representation of DFmode normal signaling NaN. */
28996 #define RS6000_CONST_DF_NANS HOST_WIDE_INT_UC (0x7ff4000000000000)
28997
28998 /* Bit representation of DFmode positive infinity. */
28999 #define RS6000_CONST_DF_INF HOST_WIDE_INT_UC (0x7ff0000000000000)
29000
29001 /* Bit representation of DFmode negative infinity. */
29002 #define RS6000_CONST_DF_NEG_INF HOST_WIDE_INT_UC (0xfff0000000000000)
29003
29004 if (value != RS6000_CONST_DF_NAN
29005 && value != RS6000_CONST_DF_NANS
29006 && value != RS6000_CONST_DF_INF
29007 && value != RS6000_CONST_DF_NEG_INF)
29008 {
29009 /* The IEEE 754 64-bit floating format has 1 bit for sign, 11 bits for
29010 the exponent, and 52 bits for the mantissa (not counting the hidden
29011 bit used for normal numbers). NaN values have the exponent set to all
29012 1 bits, and the mantissa non-zero (mantissa == 0 is infinity). */
29013
29014 int df_exponent = (value >> 52) & 0x7ff;
29015 unsigned HOST_WIDE_INT
29016 df_mantissa = value & ((HOST_WIDE_INT_1U << 52) - HOST_WIDE_INT_1U);
29017
29018 if (df_exponent == 0x7ff && df_mantissa != 0) /* other NaNs. */
29019 return 0;
29020
29021 /* Avoid values that are DFmode subnormal values. Subnormal numbers have
29022 the exponent all 0 bits, and the mantissa non-zero. If the value is
29023 subnormal, then the hidden bit in the mantissa is not set. */
29024 if (df_exponent == 0 && df_mantissa != 0) /* subnormal. */
29025 return 0;
29026 }
29027
29028 /* Change the representation to DFmode constant. */
29029 long df_words[2] = { vsx_const->words[0], vsx_const->words[1] };
29030
29031 /* real_from_target takes the target words in target order. */
29032 if (!BYTES_BIG_ENDIAN)
29033 std::swap (df_words[0], df_words[1]);
29034
29035 REAL_VALUE_TYPE rv_type;
29036 real_from_target (&rv_type, df_words, DFmode);
29037
29038 const REAL_VALUE_TYPE *rv = &rv_type;
29039
29040 /* Validate that the number can be stored as a SFmode value. */
29041 if (!exact_real_truncate (SFmode, rv))
29042 return 0;
29043
29044 /* Validate that the number is not a SFmode subnormal value (exponent is 0,
29045 mantissa field is non-zero) which is undefined for the XXSPLTIDP
29046 instruction. */
29047 long sf_value;
29048 real_to_target (&sf_value, rv, SFmode);
29049
29050 /* IEEE 754 32-bit values have 1 bit for the sign, 8 bits for the exponent,
29051 and 23 bits for the mantissa. Subnormal numbers have the exponent all
29052 0 bits, and the mantissa non-zero. */
29053 long sf_exponent = (sf_value >> 23) & 0xFF;
29054 long sf_mantissa = sf_value & 0x7FFFFF;
29055
29056 if (sf_exponent == 0 && sf_mantissa != 0)
29057 return 0;
29058
29059 /* Return the immediate to be used. */
29060 return sf_value;
29061 }
29062
29063 /* Now we have only two opaque types, they are __vector_quad and
29064 __vector_pair built-in types. They are target specific and
29065 only available when MMA is supported. With MMA supported, it
29066 simply returns true, otherwise it checks if the given gimple
29067 STMT is an assignment, asm or call stmt and uses either of
29068 these two opaque types unexpectedly, if yes, it would raise
29069 an error message and returns true, otherwise it returns false. */
29070
29071 bool
29072 rs6000_opaque_type_invalid_use_p (gimple *stmt)
29073 {
29074 if (TARGET_MMA)
29075 return false;
29076
29077 /* If the given TYPE is one MMA opaque type, emit the corresponding
29078 error messages and return true, otherwise return false. */
29079 auto check_and_error_invalid_use = [](tree type)
29080 {
29081 tree mv = TYPE_MAIN_VARIANT (type);
29082 if (mv == vector_quad_type_node)
29083 {
29084 error ("type %<__vector_quad%> requires the %qs option", "-mmma");
29085 return true;
29086 }
29087 else if (mv == vector_pair_type_node)
29088 {
29089 error ("type %<__vector_pair%> requires the %qs option", "-mmma");
29090 return true;
29091 }
29092 return false;
29093 };
29094
29095 if (stmt)
29096 {
29097 /* The usage of MMA opaque types is very limited for now,
29098 to check with gassign, gasm and gcall is enough so far. */
29099 if (gassign *ga = dyn_cast<gassign *> (stmt))
29100 {
29101 tree lhs = gimple_assign_lhs (ga);
29102 tree type = TREE_TYPE (lhs);
29103 if (check_and_error_invalid_use (type))
29104 return true;
29105 }
29106 else if (gasm *gs = dyn_cast<gasm *> (stmt))
29107 {
29108 unsigned ninputs = gimple_asm_ninputs (gs);
29109 for (unsigned i = 0; i < ninputs; i++)
29110 {
29111 tree op = gimple_asm_input_op (gs, i);
29112 tree val = TREE_VALUE (op);
29113 tree type = TREE_TYPE (val);
29114 if (check_and_error_invalid_use (type))
29115 return true;
29116 }
29117 unsigned noutputs = gimple_asm_noutputs (gs);
29118 for (unsigned i = 0; i < noutputs; i++)
29119 {
29120 tree op = gimple_asm_output_op (gs, i);
29121 tree val = TREE_VALUE (op);
29122 tree type = TREE_TYPE (val);
29123 if (check_and_error_invalid_use (type))
29124 return true;
29125 }
29126 }
29127 else if (gcall *gc = dyn_cast<gcall *> (stmt))
29128 {
29129 unsigned nargs = gimple_call_num_args (gc);
29130 for (unsigned i = 0; i < nargs; i++)
29131 {
29132 tree arg = gimple_call_arg (gc, i);
29133 tree type = TREE_TYPE (arg);
29134 if (check_and_error_invalid_use (type))
29135 return true;
29136 }
29137 }
29138 }
29139
29140 return false;
29141 }
29142
29143 struct gcc_target targetm = TARGET_INITIALIZER;
29144
29145 #include "gt-rs6000.h"