]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/rs6000/rs6000.cc
rs6000: accurate num_insns_constant_gpr
[thirdparty/gcc.git] / gcc / config / rs6000 / rs6000.cc
1 // SPDX-License-Identifier: GPL-3.0-or-later
2 /* Subroutines used for code generation on IBM RS/6000.
3 Copyright (C) 1991-2023 Free Software Foundation, Inc.
4 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published
10 by the Free Software Foundation; either version 3, or (at your
11 option) any later version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
16 License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #define IN_TARGET_CODE 1
23
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "backend.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "memmodel.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "cfgloop.h"
34 #include "df.h"
35 #include "tm_p.h"
36 #include "stringpool.h"
37 #include "expmed.h"
38 #include "optabs.h"
39 #include "regs.h"
40 #include "ira.h"
41 #include "recog.h"
42 #include "cgraph.h"
43 #include "diagnostic-core.h"
44 #include "insn-attr.h"
45 #include "flags.h"
46 #include "alias.h"
47 #include "fold-const.h"
48 #include "attribs.h"
49 #include "stor-layout.h"
50 #include "calls.h"
51 #include "print-tree.h"
52 #include "varasm.h"
53 #include "explow.h"
54 #include "expr.h"
55 #include "output.h"
56 #include "common/common-target.h"
57 #include "langhooks.h"
58 #include "reload.h"
59 #include "sched-int.h"
60 #include "gimplify.h"
61 #include "gimple-iterator.h"
62 #include "gimple-fold.h"
63 #include "gimple-walk.h"
64 #include "ssa.h"
65 #include "tree-vectorizer.h"
66 #include "tree-ssa-propagate.h"
67 #include "intl.h"
68 #include "tm-constrs.h"
69 #include "target-globals.h"
70 #include "builtins.h"
71 #include "tree-vector-builder.h"
72 #include "context.h"
73 #include "tree-pass.h"
74 #include "symbol-summary.h"
75 #include "ipa-prop.h"
76 #include "ipa-fnsummary.h"
77 #include "except.h"
78 #include "case-cfn-macros.h"
79 #include "ppc-auxv.h"
80 #include "rs6000-internal.h"
81 #include "opts.h"
82
83 /* This file should be included last. */
84 #include "target-def.h"
85
86 extern tree rs6000_builtin_mask_for_load (void);
87 extern tree rs6000_builtin_md_vectorized_function (tree, tree, tree);
88 extern tree rs6000_builtin_reciprocal (tree);
89
90 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
91 systems will also set long double to be IEEE 128-bit. AIX and Darwin
92 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
93 those systems will not pick up this default. This needs to be after all
94 of the include files, so that POWERPC_LINUX and POWERPC_FREEBSD are
95 properly defined. */
96 #ifndef TARGET_IEEEQUAD_DEFAULT
97 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
98 #define TARGET_IEEEQUAD_DEFAULT 1
99 #else
100 #define TARGET_IEEEQUAD_DEFAULT 0
101 #endif
102 #endif
103
104 /* Don't enable PC-relative addressing if the target does not support it. */
105 #ifndef PCREL_SUPPORTED_BY_OS
106 #define PCREL_SUPPORTED_BY_OS 0
107 #endif
108
109 #ifdef USING_ELFOS_H
110 /* Counter for labels which are to be placed in .fixup. */
111 int fixuplabelno = 0;
112 #endif
113
114 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
115 int dot_symbols;
116
117 /* Specify the machine mode that pointers have. After generation of rtl, the
118 compiler makes no further distinction between pointers and any other objects
119 of this machine mode. */
120 scalar_int_mode rs6000_pmode;
121
122 /* Track use of r13 in 64bit AIX TLS. */
123 static bool xcoff_tls_exec_model_detected = false;
124
125 /* Width in bits of a pointer. */
126 unsigned rs6000_pointer_size;
127
128 #ifdef HAVE_AS_GNU_ATTRIBUTE
129 # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE
130 # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0
131 # endif
132 /* Flag whether floating point values have been passed/returned.
133 Note that this doesn't say whether fprs are used, since the
134 Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls
135 should be set for soft-float values passed in gprs and ieee128
136 values passed in vsx registers. */
137 bool rs6000_passes_float = false;
138 bool rs6000_passes_long_double = false;
139 /* Flag whether vector values have been passed/returned. */
140 bool rs6000_passes_vector = false;
141 /* Flag whether small (<= 8 byte) structures have been returned. */
142 bool rs6000_returns_struct = false;
143 #endif
144
145 /* Value is TRUE if register/mode pair is acceptable. */
146 static bool rs6000_hard_regno_mode_ok_p
147 [NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
148
149 /* Maximum number of registers needed for a given register class and mode. */
150 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
151
152 /* How many registers are needed for a given register and mode. */
153 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
154
155 /* Map register number to register class. */
156 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
157
158 static int dbg_cost_ctrl;
159
160 /* Flag to say the TOC is initialized */
161 int toc_initialized, need_toc_init;
162 char toc_label_name[10];
163
164 /* Cached value of rs6000_variable_issue. This is cached in
165 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
166 static short cached_can_issue_more;
167
168 static GTY(()) section *read_only_data_section;
169 static GTY(()) section *private_data_section;
170 static GTY(()) section *tls_data_section;
171 static GTY(()) section *tls_private_data_section;
172 static GTY(()) section *read_only_private_data_section;
173 static GTY(()) section *sdata2_section;
174
175 section *toc_section = 0;
176
177 /* Describe the vector unit used for modes. */
178 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
179 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
180
181 /* Register classes for various constraints that are based on the target
182 switches. */
183 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
184
185 /* Describe the alignment of a vector. */
186 int rs6000_vector_align[NUM_MACHINE_MODES];
187
188 /* What modes to automatically generate reciprocal divide estimate (fre) and
189 reciprocal sqrt (frsqrte) for. */
190 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
191
192 /* Masks to determine which reciprocal esitmate instructions to generate
193 automatically. */
194 enum rs6000_recip_mask {
195 RECIP_SF_DIV = 0x001, /* Use divide estimate */
196 RECIP_DF_DIV = 0x002,
197 RECIP_V4SF_DIV = 0x004,
198 RECIP_V2DF_DIV = 0x008,
199
200 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
201 RECIP_DF_RSQRT = 0x020,
202 RECIP_V4SF_RSQRT = 0x040,
203 RECIP_V2DF_RSQRT = 0x080,
204
205 /* Various combination of flags for -mrecip=xxx. */
206 RECIP_NONE = 0,
207 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
208 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
209 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
210
211 RECIP_HIGH_PRECISION = RECIP_ALL,
212
213 /* On low precision machines like the power5, don't enable double precision
214 reciprocal square root estimate, since it isn't accurate enough. */
215 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
216 };
217
218 /* -mrecip options. */
219 static struct
220 {
221 const char *string; /* option name */
222 unsigned int mask; /* mask bits to set */
223 } recip_options[] = {
224 { "all", RECIP_ALL },
225 { "none", RECIP_NONE },
226 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
227 | RECIP_V2DF_DIV) },
228 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
229 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
230 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
231 | RECIP_V2DF_RSQRT) },
232 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
233 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
234 };
235
236 /* On PowerPC, we have a limited number of target clones that we care about
237 which means we can use an array to hold the options, rather than having more
238 elaborate data structures to identify each possible variation. Order the
239 clones from the default to the highest ISA. */
240 enum {
241 CLONE_DEFAULT = 0, /* default clone. */
242 CLONE_ISA_2_05, /* ISA 2.05 (power6). */
243 CLONE_ISA_2_06, /* ISA 2.06 (power7). */
244 CLONE_ISA_2_07, /* ISA 2.07 (power8). */
245 CLONE_ISA_3_00, /* ISA 3.0 (power9). */
246 CLONE_ISA_3_1, /* ISA 3.1 (power10). */
247 CLONE_MAX
248 };
249
250 /* Map compiler ISA bits into HWCAP names. */
251 struct clone_map {
252 HOST_WIDE_INT isa_mask; /* rs6000_isa mask */
253 const char *name; /* name to use in __builtin_cpu_supports. */
254 };
255
256 static const struct clone_map rs6000_clone_map[CLONE_MAX] = {
257 { 0, "" }, /* Default options. */
258 { OPTION_MASK_CMPB, "arch_2_05" }, /* ISA 2.05 (power6). */
259 { OPTION_MASK_POPCNTD, "arch_2_06" }, /* ISA 2.06 (power7). */
260 { OPTION_MASK_P8_VECTOR, "arch_2_07" }, /* ISA 2.07 (power8). */
261 { OPTION_MASK_P9_VECTOR, "arch_3_00" }, /* ISA 3.0 (power9). */
262 { OPTION_MASK_POWER10, "arch_3_1" }, /* ISA 3.1 (power10). */
263 };
264
265
266 /* Newer LIBCs explicitly export this symbol to declare that they provide
267 the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a
268 reference to this symbol whenever we expand a CPU builtin, so that
269 we never link against an old LIBC. */
270 const char *tcb_verification_symbol = "__parse_hwcap_and_convert_at_platform";
271
272 /* True if we have expanded a CPU builtin. */
273 bool cpu_builtin_p = false;
274
275 /* Pointer to function (in rs6000-c.cc) that can define or undefine target
276 macros that have changed. Languages that don't support the preprocessor
277 don't link in rs6000-c.cc, so we can't call it directly. */
278 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT);
279
280 /* Simplfy register classes into simpler classifications. We assume
281 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
282 check for standard register classes (gpr/floating/altivec/vsx) and
283 floating/vector classes (float/altivec/vsx). */
284
285 enum rs6000_reg_type {
286 NO_REG_TYPE,
287 PSEUDO_REG_TYPE,
288 GPR_REG_TYPE,
289 VSX_REG_TYPE,
290 ALTIVEC_REG_TYPE,
291 FPR_REG_TYPE,
292 SPR_REG_TYPE,
293 CR_REG_TYPE
294 };
295
296 /* Map register class to register type. */
297 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
298
299 /* First/last register type for the 'normal' register types (i.e. general
300 purpose, floating point, altivec, and VSX registers). */
301 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
302
303 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
304
305
306 /* Register classes we care about in secondary reload or go if legitimate
307 address. We only need to worry about GPR, FPR, and Altivec registers here,
308 along an ANY field that is the OR of the 3 register classes. */
309
310 enum rs6000_reload_reg_type {
311 RELOAD_REG_GPR, /* General purpose registers. */
312 RELOAD_REG_FPR, /* Traditional floating point regs. */
313 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
314 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
315 N_RELOAD_REG
316 };
317
318 /* For setting up register classes, loop through the 3 register classes mapping
319 into real registers, and skip the ANY class, which is just an OR of the
320 bits. */
321 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
322 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
323
324 /* Map reload register type to a register in the register class. */
325 struct reload_reg_map_type {
326 const char *name; /* Register class name. */
327 int reg; /* Register in the register class. */
328 };
329
330 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
331 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
332 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
333 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
334 { "Any", -1 }, /* RELOAD_REG_ANY. */
335 };
336
337 /* Mask bits for each register class, indexed per mode. Historically the
338 compiler has been more restrictive which types can do PRE_MODIFY instead of
339 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
340 typedef unsigned char addr_mask_type;
341
342 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
343 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
344 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
345 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
346 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
347 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
348 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
349 #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */
350
351 /* Register type masks based on the type, of valid addressing modes. */
352 struct rs6000_reg_addr {
353 enum insn_code reload_load; /* INSN to reload for loading. */
354 enum insn_code reload_store; /* INSN to reload for storing. */
355 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
356 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
357 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
358 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
359 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
360 };
361
362 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
363
364 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
365 static inline bool
366 mode_supports_pre_incdec_p (machine_mode mode)
367 {
368 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
369 != 0);
370 }
371
372 /* Helper function to say whether a mode supports PRE_MODIFY. */
373 static inline bool
374 mode_supports_pre_modify_p (machine_mode mode)
375 {
376 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
377 != 0);
378 }
379
380 /* Return true if we have D-form addressing in altivec registers. */
381 static inline bool
382 mode_supports_vmx_dform (machine_mode mode)
383 {
384 return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) != 0);
385 }
386
387 /* Return true if we have D-form addressing in VSX registers. This addressing
388 is more limited than normal d-form addressing in that the offset must be
389 aligned on a 16-byte boundary. */
390 static inline bool
391 mode_supports_dq_form (machine_mode mode)
392 {
393 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_QUAD_OFFSET)
394 != 0);
395 }
396
397 /* Given that there exists at least one variable that is set (produced)
398 by OUT_INSN and read (consumed) by IN_INSN, return true iff
399 IN_INSN represents one or more memory store operations and none of
400 the variables set by OUT_INSN is used by IN_INSN as the address of a
401 store operation. If either IN_INSN or OUT_INSN does not represent
402 a "single" RTL SET expression (as loosely defined by the
403 implementation of the single_set function) or a PARALLEL with only
404 SETs, CLOBBERs, and USEs inside, this function returns false.
405
406 This rs6000-specific version of store_data_bypass_p checks for
407 certain conditions that result in assertion failures (and internal
408 compiler errors) in the generic store_data_bypass_p function and
409 returns false rather than calling store_data_bypass_p if one of the
410 problematic conditions is detected. */
411
412 int
413 rs6000_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
414 {
415 rtx out_set, in_set;
416 rtx out_pat, in_pat;
417 rtx out_exp, in_exp;
418 int i, j;
419
420 in_set = single_set (in_insn);
421 if (in_set)
422 {
423 if (MEM_P (SET_DEST (in_set)))
424 {
425 out_set = single_set (out_insn);
426 if (!out_set)
427 {
428 out_pat = PATTERN (out_insn);
429 if (GET_CODE (out_pat) == PARALLEL)
430 {
431 for (i = 0; i < XVECLEN (out_pat, 0); i++)
432 {
433 out_exp = XVECEXP (out_pat, 0, i);
434 if ((GET_CODE (out_exp) == CLOBBER)
435 || (GET_CODE (out_exp) == USE))
436 continue;
437 else if (GET_CODE (out_exp) != SET)
438 return false;
439 }
440 }
441 }
442 }
443 }
444 else
445 {
446 in_pat = PATTERN (in_insn);
447 if (GET_CODE (in_pat) != PARALLEL)
448 return false;
449
450 for (i = 0; i < XVECLEN (in_pat, 0); i++)
451 {
452 in_exp = XVECEXP (in_pat, 0, i);
453 if ((GET_CODE (in_exp) == CLOBBER) || (GET_CODE (in_exp) == USE))
454 continue;
455 else if (GET_CODE (in_exp) != SET)
456 return false;
457
458 if (MEM_P (SET_DEST (in_exp)))
459 {
460 out_set = single_set (out_insn);
461 if (!out_set)
462 {
463 out_pat = PATTERN (out_insn);
464 if (GET_CODE (out_pat) != PARALLEL)
465 return false;
466 for (j = 0; j < XVECLEN (out_pat, 0); j++)
467 {
468 out_exp = XVECEXP (out_pat, 0, j);
469 if ((GET_CODE (out_exp) == CLOBBER)
470 || (GET_CODE (out_exp) == USE))
471 continue;
472 else if (GET_CODE (out_exp) != SET)
473 return false;
474 }
475 }
476 }
477 }
478 }
479 return store_data_bypass_p (out_insn, in_insn);
480 }
481
482 \f
483 /* Processor costs (relative to an add) */
484
485 const struct processor_costs *rs6000_cost;
486
487 /* Instruction size costs on 32bit processors. */
488 static const
489 struct processor_costs size32_cost = {
490 COSTS_N_INSNS (1), /* mulsi */
491 COSTS_N_INSNS (1), /* mulsi_const */
492 COSTS_N_INSNS (1), /* mulsi_const9 */
493 COSTS_N_INSNS (1), /* muldi */
494 COSTS_N_INSNS (1), /* divsi */
495 COSTS_N_INSNS (1), /* divdi */
496 COSTS_N_INSNS (1), /* fp */
497 COSTS_N_INSNS (1), /* dmul */
498 COSTS_N_INSNS (1), /* sdiv */
499 COSTS_N_INSNS (1), /* ddiv */
500 32, /* cache line size */
501 0, /* l1 cache */
502 0, /* l2 cache */
503 0, /* streams */
504 0, /* SF->DF convert */
505 };
506
507 /* Instruction size costs on 64bit processors. */
508 static const
509 struct processor_costs size64_cost = {
510 COSTS_N_INSNS (1), /* mulsi */
511 COSTS_N_INSNS (1), /* mulsi_const */
512 COSTS_N_INSNS (1), /* mulsi_const9 */
513 COSTS_N_INSNS (1), /* muldi */
514 COSTS_N_INSNS (1), /* divsi */
515 COSTS_N_INSNS (1), /* divdi */
516 COSTS_N_INSNS (1), /* fp */
517 COSTS_N_INSNS (1), /* dmul */
518 COSTS_N_INSNS (1), /* sdiv */
519 COSTS_N_INSNS (1), /* ddiv */
520 128, /* cache line size */
521 0, /* l1 cache */
522 0, /* l2 cache */
523 0, /* streams */
524 0, /* SF->DF convert */
525 };
526
527 /* Instruction costs on RS64A processors. */
528 static const
529 struct processor_costs rs64a_cost = {
530 COSTS_N_INSNS (20), /* mulsi */
531 COSTS_N_INSNS (12), /* mulsi_const */
532 COSTS_N_INSNS (8), /* mulsi_const9 */
533 COSTS_N_INSNS (34), /* muldi */
534 COSTS_N_INSNS (65), /* divsi */
535 COSTS_N_INSNS (67), /* divdi */
536 COSTS_N_INSNS (4), /* fp */
537 COSTS_N_INSNS (4), /* dmul */
538 COSTS_N_INSNS (31), /* sdiv */
539 COSTS_N_INSNS (31), /* ddiv */
540 128, /* cache line size */
541 128, /* l1 cache */
542 2048, /* l2 cache */
543 1, /* streams */
544 0, /* SF->DF convert */
545 };
546
547 /* Instruction costs on MPCCORE processors. */
548 static const
549 struct processor_costs mpccore_cost = {
550 COSTS_N_INSNS (2), /* mulsi */
551 COSTS_N_INSNS (2), /* mulsi_const */
552 COSTS_N_INSNS (2), /* mulsi_const9 */
553 COSTS_N_INSNS (2), /* muldi */
554 COSTS_N_INSNS (6), /* divsi */
555 COSTS_N_INSNS (6), /* divdi */
556 COSTS_N_INSNS (4), /* fp */
557 COSTS_N_INSNS (5), /* dmul */
558 COSTS_N_INSNS (10), /* sdiv */
559 COSTS_N_INSNS (17), /* ddiv */
560 32, /* cache line size */
561 4, /* l1 cache */
562 16, /* l2 cache */
563 1, /* streams */
564 0, /* SF->DF convert */
565 };
566
567 /* Instruction costs on PPC403 processors. */
568 static const
569 struct processor_costs ppc403_cost = {
570 COSTS_N_INSNS (4), /* mulsi */
571 COSTS_N_INSNS (4), /* mulsi_const */
572 COSTS_N_INSNS (4), /* mulsi_const9 */
573 COSTS_N_INSNS (4), /* muldi */
574 COSTS_N_INSNS (33), /* divsi */
575 COSTS_N_INSNS (33), /* divdi */
576 COSTS_N_INSNS (11), /* fp */
577 COSTS_N_INSNS (11), /* dmul */
578 COSTS_N_INSNS (11), /* sdiv */
579 COSTS_N_INSNS (11), /* ddiv */
580 32, /* cache line size */
581 4, /* l1 cache */
582 16, /* l2 cache */
583 1, /* streams */
584 0, /* SF->DF convert */
585 };
586
587 /* Instruction costs on PPC405 processors. */
588 static const
589 struct processor_costs ppc405_cost = {
590 COSTS_N_INSNS (5), /* mulsi */
591 COSTS_N_INSNS (4), /* mulsi_const */
592 COSTS_N_INSNS (3), /* mulsi_const9 */
593 COSTS_N_INSNS (5), /* muldi */
594 COSTS_N_INSNS (35), /* divsi */
595 COSTS_N_INSNS (35), /* divdi */
596 COSTS_N_INSNS (11), /* fp */
597 COSTS_N_INSNS (11), /* dmul */
598 COSTS_N_INSNS (11), /* sdiv */
599 COSTS_N_INSNS (11), /* ddiv */
600 32, /* cache line size */
601 16, /* l1 cache */
602 128, /* l2 cache */
603 1, /* streams */
604 0, /* SF->DF convert */
605 };
606
607 /* Instruction costs on PPC440 processors. */
608 static const
609 struct processor_costs ppc440_cost = {
610 COSTS_N_INSNS (3), /* mulsi */
611 COSTS_N_INSNS (2), /* mulsi_const */
612 COSTS_N_INSNS (2), /* mulsi_const9 */
613 COSTS_N_INSNS (3), /* muldi */
614 COSTS_N_INSNS (34), /* divsi */
615 COSTS_N_INSNS (34), /* divdi */
616 COSTS_N_INSNS (5), /* fp */
617 COSTS_N_INSNS (5), /* dmul */
618 COSTS_N_INSNS (19), /* sdiv */
619 COSTS_N_INSNS (33), /* ddiv */
620 32, /* cache line size */
621 32, /* l1 cache */
622 256, /* l2 cache */
623 1, /* streams */
624 0, /* SF->DF convert */
625 };
626
627 /* Instruction costs on PPC476 processors. */
628 static const
629 struct processor_costs ppc476_cost = {
630 COSTS_N_INSNS (4), /* mulsi */
631 COSTS_N_INSNS (4), /* mulsi_const */
632 COSTS_N_INSNS (4), /* mulsi_const9 */
633 COSTS_N_INSNS (4), /* muldi */
634 COSTS_N_INSNS (11), /* divsi */
635 COSTS_N_INSNS (11), /* divdi */
636 COSTS_N_INSNS (6), /* fp */
637 COSTS_N_INSNS (6), /* dmul */
638 COSTS_N_INSNS (19), /* sdiv */
639 COSTS_N_INSNS (33), /* ddiv */
640 32, /* l1 cache line size */
641 32, /* l1 cache */
642 512, /* l2 cache */
643 1, /* streams */
644 0, /* SF->DF convert */
645 };
646
647 /* Instruction costs on PPC601 processors. */
648 static const
649 struct processor_costs ppc601_cost = {
650 COSTS_N_INSNS (5), /* mulsi */
651 COSTS_N_INSNS (5), /* mulsi_const */
652 COSTS_N_INSNS (5), /* mulsi_const9 */
653 COSTS_N_INSNS (5), /* muldi */
654 COSTS_N_INSNS (36), /* divsi */
655 COSTS_N_INSNS (36), /* divdi */
656 COSTS_N_INSNS (4), /* fp */
657 COSTS_N_INSNS (5), /* dmul */
658 COSTS_N_INSNS (17), /* sdiv */
659 COSTS_N_INSNS (31), /* ddiv */
660 32, /* cache line size */
661 32, /* l1 cache */
662 256, /* l2 cache */
663 1, /* streams */
664 0, /* SF->DF convert */
665 };
666
667 /* Instruction costs on PPC603 processors. */
668 static const
669 struct processor_costs ppc603_cost = {
670 COSTS_N_INSNS (5), /* mulsi */
671 COSTS_N_INSNS (3), /* mulsi_const */
672 COSTS_N_INSNS (2), /* mulsi_const9 */
673 COSTS_N_INSNS (5), /* muldi */
674 COSTS_N_INSNS (37), /* divsi */
675 COSTS_N_INSNS (37), /* divdi */
676 COSTS_N_INSNS (3), /* fp */
677 COSTS_N_INSNS (4), /* dmul */
678 COSTS_N_INSNS (18), /* sdiv */
679 COSTS_N_INSNS (33), /* ddiv */
680 32, /* cache line size */
681 8, /* l1 cache */
682 64, /* l2 cache */
683 1, /* streams */
684 0, /* SF->DF convert */
685 };
686
687 /* Instruction costs on PPC604 processors. */
688 static const
689 struct processor_costs ppc604_cost = {
690 COSTS_N_INSNS (4), /* mulsi */
691 COSTS_N_INSNS (4), /* mulsi_const */
692 COSTS_N_INSNS (4), /* mulsi_const9 */
693 COSTS_N_INSNS (4), /* muldi */
694 COSTS_N_INSNS (20), /* divsi */
695 COSTS_N_INSNS (20), /* divdi */
696 COSTS_N_INSNS (3), /* fp */
697 COSTS_N_INSNS (3), /* dmul */
698 COSTS_N_INSNS (18), /* sdiv */
699 COSTS_N_INSNS (32), /* ddiv */
700 32, /* cache line size */
701 16, /* l1 cache */
702 512, /* l2 cache */
703 1, /* streams */
704 0, /* SF->DF convert */
705 };
706
707 /* Instruction costs on PPC604e processors. */
708 static const
709 struct processor_costs ppc604e_cost = {
710 COSTS_N_INSNS (2), /* mulsi */
711 COSTS_N_INSNS (2), /* mulsi_const */
712 COSTS_N_INSNS (2), /* mulsi_const9 */
713 COSTS_N_INSNS (2), /* muldi */
714 COSTS_N_INSNS (20), /* divsi */
715 COSTS_N_INSNS (20), /* divdi */
716 COSTS_N_INSNS (3), /* fp */
717 COSTS_N_INSNS (3), /* dmul */
718 COSTS_N_INSNS (18), /* sdiv */
719 COSTS_N_INSNS (32), /* ddiv */
720 32, /* cache line size */
721 32, /* l1 cache */
722 1024, /* l2 cache */
723 1, /* streams */
724 0, /* SF->DF convert */
725 };
726
727 /* Instruction costs on PPC620 processors. */
728 static const
729 struct processor_costs ppc620_cost = {
730 COSTS_N_INSNS (5), /* mulsi */
731 COSTS_N_INSNS (4), /* mulsi_const */
732 COSTS_N_INSNS (3), /* mulsi_const9 */
733 COSTS_N_INSNS (7), /* muldi */
734 COSTS_N_INSNS (21), /* divsi */
735 COSTS_N_INSNS (37), /* divdi */
736 COSTS_N_INSNS (3), /* fp */
737 COSTS_N_INSNS (3), /* dmul */
738 COSTS_N_INSNS (18), /* sdiv */
739 COSTS_N_INSNS (32), /* ddiv */
740 128, /* cache line size */
741 32, /* l1 cache */
742 1024, /* l2 cache */
743 1, /* streams */
744 0, /* SF->DF convert */
745 };
746
747 /* Instruction costs on PPC630 processors. */
748 static const
749 struct processor_costs ppc630_cost = {
750 COSTS_N_INSNS (5), /* mulsi */
751 COSTS_N_INSNS (4), /* mulsi_const */
752 COSTS_N_INSNS (3), /* mulsi_const9 */
753 COSTS_N_INSNS (7), /* muldi */
754 COSTS_N_INSNS (21), /* divsi */
755 COSTS_N_INSNS (37), /* divdi */
756 COSTS_N_INSNS (3), /* fp */
757 COSTS_N_INSNS (3), /* dmul */
758 COSTS_N_INSNS (17), /* sdiv */
759 COSTS_N_INSNS (21), /* ddiv */
760 128, /* cache line size */
761 64, /* l1 cache */
762 1024, /* l2 cache */
763 1, /* streams */
764 0, /* SF->DF convert */
765 };
766
767 /* Instruction costs on Cell processor. */
768 /* COSTS_N_INSNS (1) ~ one add. */
769 static const
770 struct processor_costs ppccell_cost = {
771 COSTS_N_INSNS (9/2)+2, /* mulsi */
772 COSTS_N_INSNS (6/2), /* mulsi_const */
773 COSTS_N_INSNS (6/2), /* mulsi_const9 */
774 COSTS_N_INSNS (15/2)+2, /* muldi */
775 COSTS_N_INSNS (38/2), /* divsi */
776 COSTS_N_INSNS (70/2), /* divdi */
777 COSTS_N_INSNS (10/2), /* fp */
778 COSTS_N_INSNS (10/2), /* dmul */
779 COSTS_N_INSNS (74/2), /* sdiv */
780 COSTS_N_INSNS (74/2), /* ddiv */
781 128, /* cache line size */
782 32, /* l1 cache */
783 512, /* l2 cache */
784 6, /* streams */
785 0, /* SF->DF convert */
786 };
787
788 /* Instruction costs on PPC750 and PPC7400 processors. */
789 static const
790 struct processor_costs ppc750_cost = {
791 COSTS_N_INSNS (5), /* mulsi */
792 COSTS_N_INSNS (3), /* mulsi_const */
793 COSTS_N_INSNS (2), /* mulsi_const9 */
794 COSTS_N_INSNS (5), /* muldi */
795 COSTS_N_INSNS (17), /* divsi */
796 COSTS_N_INSNS (17), /* divdi */
797 COSTS_N_INSNS (3), /* fp */
798 COSTS_N_INSNS (3), /* dmul */
799 COSTS_N_INSNS (17), /* sdiv */
800 COSTS_N_INSNS (31), /* ddiv */
801 32, /* cache line size */
802 32, /* l1 cache */
803 512, /* l2 cache */
804 1, /* streams */
805 0, /* SF->DF convert */
806 };
807
808 /* Instruction costs on PPC7450 processors. */
809 static const
810 struct processor_costs ppc7450_cost = {
811 COSTS_N_INSNS (4), /* mulsi */
812 COSTS_N_INSNS (3), /* mulsi_const */
813 COSTS_N_INSNS (3), /* mulsi_const9 */
814 COSTS_N_INSNS (4), /* muldi */
815 COSTS_N_INSNS (23), /* divsi */
816 COSTS_N_INSNS (23), /* divdi */
817 COSTS_N_INSNS (5), /* fp */
818 COSTS_N_INSNS (5), /* dmul */
819 COSTS_N_INSNS (21), /* sdiv */
820 COSTS_N_INSNS (35), /* ddiv */
821 32, /* cache line size */
822 32, /* l1 cache */
823 1024, /* l2 cache */
824 1, /* streams */
825 0, /* SF->DF convert */
826 };
827
828 /* Instruction costs on PPC8540 processors. */
829 static const
830 struct processor_costs ppc8540_cost = {
831 COSTS_N_INSNS (4), /* mulsi */
832 COSTS_N_INSNS (4), /* mulsi_const */
833 COSTS_N_INSNS (4), /* mulsi_const9 */
834 COSTS_N_INSNS (4), /* muldi */
835 COSTS_N_INSNS (19), /* divsi */
836 COSTS_N_INSNS (19), /* divdi */
837 COSTS_N_INSNS (4), /* fp */
838 COSTS_N_INSNS (4), /* dmul */
839 COSTS_N_INSNS (29), /* sdiv */
840 COSTS_N_INSNS (29), /* ddiv */
841 32, /* cache line size */
842 32, /* l1 cache */
843 256, /* l2 cache */
844 1, /* prefetch streams /*/
845 0, /* SF->DF convert */
846 };
847
848 /* Instruction costs on E300C2 and E300C3 cores. */
849 static const
850 struct processor_costs ppce300c2c3_cost = {
851 COSTS_N_INSNS (4), /* mulsi */
852 COSTS_N_INSNS (4), /* mulsi_const */
853 COSTS_N_INSNS (4), /* mulsi_const9 */
854 COSTS_N_INSNS (4), /* muldi */
855 COSTS_N_INSNS (19), /* divsi */
856 COSTS_N_INSNS (19), /* divdi */
857 COSTS_N_INSNS (3), /* fp */
858 COSTS_N_INSNS (4), /* dmul */
859 COSTS_N_INSNS (18), /* sdiv */
860 COSTS_N_INSNS (33), /* ddiv */
861 32,
862 16, /* l1 cache */
863 16, /* l2 cache */
864 1, /* prefetch streams /*/
865 0, /* SF->DF convert */
866 };
867
868 /* Instruction costs on PPCE500MC processors. */
869 static const
870 struct processor_costs ppce500mc_cost = {
871 COSTS_N_INSNS (4), /* mulsi */
872 COSTS_N_INSNS (4), /* mulsi_const */
873 COSTS_N_INSNS (4), /* mulsi_const9 */
874 COSTS_N_INSNS (4), /* muldi */
875 COSTS_N_INSNS (14), /* divsi */
876 COSTS_N_INSNS (14), /* divdi */
877 COSTS_N_INSNS (8), /* fp */
878 COSTS_N_INSNS (10), /* dmul */
879 COSTS_N_INSNS (36), /* sdiv */
880 COSTS_N_INSNS (66), /* ddiv */
881 64, /* cache line size */
882 32, /* l1 cache */
883 128, /* l2 cache */
884 1, /* prefetch streams /*/
885 0, /* SF->DF convert */
886 };
887
888 /* Instruction costs on PPCE500MC64 processors. */
889 static const
890 struct processor_costs ppce500mc64_cost = {
891 COSTS_N_INSNS (4), /* mulsi */
892 COSTS_N_INSNS (4), /* mulsi_const */
893 COSTS_N_INSNS (4), /* mulsi_const9 */
894 COSTS_N_INSNS (4), /* muldi */
895 COSTS_N_INSNS (14), /* divsi */
896 COSTS_N_INSNS (14), /* divdi */
897 COSTS_N_INSNS (4), /* fp */
898 COSTS_N_INSNS (10), /* dmul */
899 COSTS_N_INSNS (36), /* sdiv */
900 COSTS_N_INSNS (66), /* ddiv */
901 64, /* cache line size */
902 32, /* l1 cache */
903 128, /* l2 cache */
904 1, /* prefetch streams /*/
905 0, /* SF->DF convert */
906 };
907
908 /* Instruction costs on PPCE5500 processors. */
909 static const
910 struct processor_costs ppce5500_cost = {
911 COSTS_N_INSNS (5), /* mulsi */
912 COSTS_N_INSNS (5), /* mulsi_const */
913 COSTS_N_INSNS (4), /* mulsi_const9 */
914 COSTS_N_INSNS (5), /* muldi */
915 COSTS_N_INSNS (14), /* divsi */
916 COSTS_N_INSNS (14), /* divdi */
917 COSTS_N_INSNS (7), /* fp */
918 COSTS_N_INSNS (10), /* dmul */
919 COSTS_N_INSNS (36), /* sdiv */
920 COSTS_N_INSNS (66), /* ddiv */
921 64, /* cache line size */
922 32, /* l1 cache */
923 128, /* l2 cache */
924 1, /* prefetch streams /*/
925 0, /* SF->DF convert */
926 };
927
928 /* Instruction costs on PPCE6500 processors. */
929 static const
930 struct processor_costs ppce6500_cost = {
931 COSTS_N_INSNS (5), /* mulsi */
932 COSTS_N_INSNS (5), /* mulsi_const */
933 COSTS_N_INSNS (4), /* mulsi_const9 */
934 COSTS_N_INSNS (5), /* muldi */
935 COSTS_N_INSNS (14), /* divsi */
936 COSTS_N_INSNS (14), /* divdi */
937 COSTS_N_INSNS (7), /* fp */
938 COSTS_N_INSNS (10), /* dmul */
939 COSTS_N_INSNS (36), /* sdiv */
940 COSTS_N_INSNS (66), /* ddiv */
941 64, /* cache line size */
942 32, /* l1 cache */
943 128, /* l2 cache */
944 1, /* prefetch streams /*/
945 0, /* SF->DF convert */
946 };
947
948 /* Instruction costs on AppliedMicro Titan processors. */
949 static const
950 struct processor_costs titan_cost = {
951 COSTS_N_INSNS (5), /* mulsi */
952 COSTS_N_INSNS (5), /* mulsi_const */
953 COSTS_N_INSNS (5), /* mulsi_const9 */
954 COSTS_N_INSNS (5), /* muldi */
955 COSTS_N_INSNS (18), /* divsi */
956 COSTS_N_INSNS (18), /* divdi */
957 COSTS_N_INSNS (10), /* fp */
958 COSTS_N_INSNS (10), /* dmul */
959 COSTS_N_INSNS (46), /* sdiv */
960 COSTS_N_INSNS (72), /* ddiv */
961 32, /* cache line size */
962 32, /* l1 cache */
963 512, /* l2 cache */
964 1, /* prefetch streams /*/
965 0, /* SF->DF convert */
966 };
967
968 /* Instruction costs on POWER4 and POWER5 processors. */
969 static const
970 struct processor_costs power4_cost = {
971 COSTS_N_INSNS (3), /* mulsi */
972 COSTS_N_INSNS (2), /* mulsi_const */
973 COSTS_N_INSNS (2), /* mulsi_const9 */
974 COSTS_N_INSNS (4), /* muldi */
975 COSTS_N_INSNS (18), /* divsi */
976 COSTS_N_INSNS (34), /* divdi */
977 COSTS_N_INSNS (3), /* fp */
978 COSTS_N_INSNS (3), /* dmul */
979 COSTS_N_INSNS (17), /* sdiv */
980 COSTS_N_INSNS (17), /* ddiv */
981 128, /* cache line size */
982 32, /* l1 cache */
983 1024, /* l2 cache */
984 8, /* prefetch streams /*/
985 0, /* SF->DF convert */
986 };
987
988 /* Instruction costs on POWER6 processors. */
989 static const
990 struct processor_costs power6_cost = {
991 COSTS_N_INSNS (8), /* mulsi */
992 COSTS_N_INSNS (8), /* mulsi_const */
993 COSTS_N_INSNS (8), /* mulsi_const9 */
994 COSTS_N_INSNS (8), /* muldi */
995 COSTS_N_INSNS (22), /* divsi */
996 COSTS_N_INSNS (28), /* divdi */
997 COSTS_N_INSNS (3), /* fp */
998 COSTS_N_INSNS (3), /* dmul */
999 COSTS_N_INSNS (13), /* sdiv */
1000 COSTS_N_INSNS (16), /* ddiv */
1001 128, /* cache line size */
1002 64, /* l1 cache */
1003 2048, /* l2 cache */
1004 16, /* prefetch streams */
1005 0, /* SF->DF convert */
1006 };
1007
1008 /* Instruction costs on POWER7 processors. */
1009 static const
1010 struct processor_costs power7_cost = {
1011 COSTS_N_INSNS (2), /* mulsi */
1012 COSTS_N_INSNS (2), /* mulsi_const */
1013 COSTS_N_INSNS (2), /* mulsi_const9 */
1014 COSTS_N_INSNS (2), /* muldi */
1015 COSTS_N_INSNS (18), /* divsi */
1016 COSTS_N_INSNS (34), /* divdi */
1017 COSTS_N_INSNS (3), /* fp */
1018 COSTS_N_INSNS (3), /* dmul */
1019 COSTS_N_INSNS (13), /* sdiv */
1020 COSTS_N_INSNS (16), /* ddiv */
1021 128, /* cache line size */
1022 32, /* l1 cache */
1023 256, /* l2 cache */
1024 12, /* prefetch streams */
1025 COSTS_N_INSNS (3), /* SF->DF convert */
1026 };
1027
1028 /* Instruction costs on POWER8 processors. */
1029 static const
1030 struct processor_costs power8_cost = {
1031 COSTS_N_INSNS (3), /* mulsi */
1032 COSTS_N_INSNS (3), /* mulsi_const */
1033 COSTS_N_INSNS (3), /* mulsi_const9 */
1034 COSTS_N_INSNS (3), /* muldi */
1035 COSTS_N_INSNS (19), /* divsi */
1036 COSTS_N_INSNS (35), /* divdi */
1037 COSTS_N_INSNS (3), /* fp */
1038 COSTS_N_INSNS (3), /* dmul */
1039 COSTS_N_INSNS (14), /* sdiv */
1040 COSTS_N_INSNS (17), /* ddiv */
1041 128, /* cache line size */
1042 32, /* l1 cache */
1043 512, /* l2 cache */
1044 12, /* prefetch streams */
1045 COSTS_N_INSNS (3), /* SF->DF convert */
1046 };
1047
1048 /* Instruction costs on POWER9 processors. */
1049 static const
1050 struct processor_costs power9_cost = {
1051 COSTS_N_INSNS (3), /* mulsi */
1052 COSTS_N_INSNS (3), /* mulsi_const */
1053 COSTS_N_INSNS (3), /* mulsi_const9 */
1054 COSTS_N_INSNS (3), /* muldi */
1055 COSTS_N_INSNS (8), /* divsi */
1056 COSTS_N_INSNS (12), /* divdi */
1057 COSTS_N_INSNS (3), /* fp */
1058 COSTS_N_INSNS (3), /* dmul */
1059 COSTS_N_INSNS (13), /* sdiv */
1060 COSTS_N_INSNS (18), /* ddiv */
1061 128, /* cache line size */
1062 32, /* l1 cache */
1063 512, /* l2 cache */
1064 8, /* prefetch streams */
1065 COSTS_N_INSNS (3), /* SF->DF convert */
1066 };
1067
1068 /* Instruction costs on POWER10 processors. */
1069 static const
1070 struct processor_costs power10_cost = {
1071 COSTS_N_INSNS (2), /* mulsi */
1072 COSTS_N_INSNS (2), /* mulsi_const */
1073 COSTS_N_INSNS (2), /* mulsi_const9 */
1074 COSTS_N_INSNS (2), /* muldi */
1075 COSTS_N_INSNS (6), /* divsi */
1076 COSTS_N_INSNS (6), /* divdi */
1077 COSTS_N_INSNS (2), /* fp */
1078 COSTS_N_INSNS (2), /* dmul */
1079 COSTS_N_INSNS (11), /* sdiv */
1080 COSTS_N_INSNS (13), /* ddiv */
1081 128, /* cache line size */
1082 32, /* l1 cache */
1083 512, /* l2 cache */
1084 16, /* prefetch streams */
1085 COSTS_N_INSNS (2), /* SF->DF convert */
1086 };
1087
1088 /* Instruction costs on POWER A2 processors. */
1089 static const
1090 struct processor_costs ppca2_cost = {
1091 COSTS_N_INSNS (16), /* mulsi */
1092 COSTS_N_INSNS (16), /* mulsi_const */
1093 COSTS_N_INSNS (16), /* mulsi_const9 */
1094 COSTS_N_INSNS (16), /* muldi */
1095 COSTS_N_INSNS (22), /* divsi */
1096 COSTS_N_INSNS (28), /* divdi */
1097 COSTS_N_INSNS (3), /* fp */
1098 COSTS_N_INSNS (3), /* dmul */
1099 COSTS_N_INSNS (59), /* sdiv */
1100 COSTS_N_INSNS (72), /* ddiv */
1101 64,
1102 16, /* l1 cache */
1103 2048, /* l2 cache */
1104 16, /* prefetch streams */
1105 0, /* SF->DF convert */
1106 };
1107
1108 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1109 static tree (*rs6000_veclib_handler) (combined_fn, tree, tree);
1110
1111 \f
1112 static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool,
1113 code_helper = ERROR_MARK);
1114 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1115 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1116 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1117 static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree);
1118 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT, int * = nullptr);
1119 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1120 static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool);
1121 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1122 bool);
1123 static int rs6000_debug_adjust_cost (rtx_insn *, int, rtx_insn *, int,
1124 unsigned int);
1125 static bool is_microcoded_insn (rtx_insn *);
1126 static bool is_nonpipeline_insn (rtx_insn *);
1127 static bool is_cracked_insn (rtx_insn *);
1128 static bool is_load_insn (rtx, rtx *);
1129 static bool is_store_insn (rtx, rtx *);
1130 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1131 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1132 static bool insn_must_be_first_in_group (rtx_insn *);
1133 static bool insn_must_be_last_in_group (rtx_insn *);
1134 bool easy_vector_constant (rtx, machine_mode);
1135 static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1136 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1137 #if TARGET_MACHO
1138 static tree get_prev_label (tree);
1139 #endif
1140 static bool rs6000_mode_dependent_address (const_rtx);
1141 static bool rs6000_debug_mode_dependent_address (const_rtx);
1142 static bool rs6000_offsettable_memref_p (rtx, machine_mode, bool);
1143 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1144 machine_mode, rtx);
1145 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1146 machine_mode,
1147 rtx);
1148 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1149 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1150 enum reg_class);
1151 static bool rs6000_debug_secondary_memory_needed (machine_mode,
1152 reg_class_t,
1153 reg_class_t);
1154 static bool rs6000_debug_can_change_mode_class (machine_mode,
1155 machine_mode,
1156 reg_class_t);
1157
1158 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1159 = rs6000_mode_dependent_address;
1160
1161 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1162 machine_mode, rtx)
1163 = rs6000_secondary_reload_class;
1164
1165 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1166 = rs6000_preferred_reload_class;
1167
1168 const int INSN_NOT_AVAILABLE = -1;
1169
1170 static void rs6000_print_isa_options (FILE *, int, const char *,
1171 HOST_WIDE_INT);
1172 static HOST_WIDE_INT rs6000_disable_incompatible_switches (void);
1173
1174 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1175 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1176 enum rs6000_reg_type,
1177 machine_mode,
1178 secondary_reload_info *,
1179 bool);
1180 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1181
1182 /* Hash table stuff for keeping track of TOC entries. */
1183
1184 struct GTY((for_user)) toc_hash_struct
1185 {
1186 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1187 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1188 rtx key;
1189 machine_mode key_mode;
1190 int labelno;
1191 };
1192
1193 struct toc_hasher : ggc_ptr_hash<toc_hash_struct>
1194 {
1195 static hashval_t hash (toc_hash_struct *);
1196 static bool equal (toc_hash_struct *, toc_hash_struct *);
1197 };
1198
1199 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1200
1201
1202 \f
1203 /* Default register names. */
1204 char rs6000_reg_names[][8] =
1205 {
1206 /* GPRs */
1207 "0", "1", "2", "3", "4", "5", "6", "7",
1208 "8", "9", "10", "11", "12", "13", "14", "15",
1209 "16", "17", "18", "19", "20", "21", "22", "23",
1210 "24", "25", "26", "27", "28", "29", "30", "31",
1211 /* FPRs */
1212 "0", "1", "2", "3", "4", "5", "6", "7",
1213 "8", "9", "10", "11", "12", "13", "14", "15",
1214 "16", "17", "18", "19", "20", "21", "22", "23",
1215 "24", "25", "26", "27", "28", "29", "30", "31",
1216 /* VRs */
1217 "0", "1", "2", "3", "4", "5", "6", "7",
1218 "8", "9", "10", "11", "12", "13", "14", "15",
1219 "16", "17", "18", "19", "20", "21", "22", "23",
1220 "24", "25", "26", "27", "28", "29", "30", "31",
1221 /* lr ctr ca ap */
1222 "lr", "ctr", "ca", "ap",
1223 /* cr0..cr7 */
1224 "0", "1", "2", "3", "4", "5", "6", "7",
1225 /* vrsave vscr sfp */
1226 "vrsave", "vscr", "sfp",
1227 };
1228
1229 #ifdef TARGET_REGNAMES
1230 static const char alt_reg_names[][8] =
1231 {
1232 /* GPRs */
1233 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1234 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1235 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1236 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1237 /* FPRs */
1238 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1239 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1240 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1241 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1242 /* VRs */
1243 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1244 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1245 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1246 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1247 /* lr ctr ca ap */
1248 "lr", "ctr", "ca", "ap",
1249 /* cr0..cr7 */
1250 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1251 /* vrsave vscr sfp */
1252 "vrsave", "vscr", "sfp",
1253 };
1254 #endif
1255
1256 /* Table of valid machine attributes. */
1257
1258 static const attribute_spec rs6000_gnu_attributes[] =
1259 {
1260 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
1261 affects_type_identity, handler, exclude } */
1262 { "altivec", 1, 1, false, true, false, false,
1263 rs6000_handle_altivec_attribute, NULL },
1264 { "longcall", 0, 0, false, true, true, false,
1265 rs6000_handle_longcall_attribute, NULL },
1266 { "shortcall", 0, 0, false, true, true, false,
1267 rs6000_handle_longcall_attribute, NULL },
1268 { "ms_struct", 0, 0, false, false, false, false,
1269 rs6000_handle_struct_attribute, NULL },
1270 { "gcc_struct", 0, 0, false, false, false, false,
1271 rs6000_handle_struct_attribute, NULL },
1272 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1273 SUBTARGET_ATTRIBUTE_TABLE,
1274 #endif
1275 };
1276
1277 static const scoped_attribute_specs rs6000_gnu_attribute_table =
1278 {
1279 "gnu", { rs6000_gnu_attributes }
1280 };
1281
1282 static const scoped_attribute_specs *const rs6000_attribute_table[] =
1283 {
1284 &rs6000_gnu_attribute_table
1285 };
1286 \f
1287 #ifndef TARGET_PROFILE_KERNEL
1288 #define TARGET_PROFILE_KERNEL 0
1289 #endif
1290 \f
1291 /* Initialize the GCC target structure. */
1292 #undef TARGET_ATTRIBUTE_TABLE
1293 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1294 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1295 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1296 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1297 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1298
1299 #undef TARGET_ASM_ALIGNED_DI_OP
1300 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1301
1302 /* Default unaligned ops are only provided for ELF. Find the ops needed
1303 for non-ELF systems. */
1304 #ifndef OBJECT_FORMAT_ELF
1305 #if TARGET_XCOFF
1306 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1307 64-bit targets. */
1308 #undef TARGET_ASM_UNALIGNED_HI_OP
1309 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1310 #undef TARGET_ASM_UNALIGNED_SI_OP
1311 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1312 #undef TARGET_ASM_UNALIGNED_DI_OP
1313 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1314 #else
1315 /* For Darwin. */
1316 #undef TARGET_ASM_UNALIGNED_HI_OP
1317 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1318 #undef TARGET_ASM_UNALIGNED_SI_OP
1319 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1320 #undef TARGET_ASM_UNALIGNED_DI_OP
1321 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1322 #undef TARGET_ASM_ALIGNED_DI_OP
1323 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1324 #endif
1325 #endif
1326
1327 /* This hook deals with fixups for relocatable code and DI-mode objects
1328 in 64-bit code. */
1329 #undef TARGET_ASM_INTEGER
1330 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1331
1332 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1333 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1334 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1335 #endif
1336
1337 #undef TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY
1338 #define TARGET_ASM_PRINT_PATCHABLE_FUNCTION_ENTRY \
1339 rs6000_print_patchable_function_entry
1340
1341 #undef TARGET_SET_UP_BY_PROLOGUE
1342 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1343
1344 #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
1345 #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components
1346 #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
1347 #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb
1348 #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
1349 #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components
1350 #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
1351 #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components
1352 #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
1353 #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components
1354 #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
1355 #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components
1356
1357 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1358 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1359
1360 #undef TARGET_INTERNAL_ARG_POINTER
1361 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1362
1363 #undef TARGET_HAVE_TLS
1364 #define TARGET_HAVE_TLS HAVE_AS_TLS
1365
1366 #undef TARGET_CANNOT_FORCE_CONST_MEM
1367 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1368
1369 #undef TARGET_DELEGITIMIZE_ADDRESS
1370 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1371
1372 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1373 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1374
1375 #undef TARGET_LEGITIMATE_COMBINED_INSN
1376 #define TARGET_LEGITIMATE_COMBINED_INSN rs6000_legitimate_combined_insn
1377
1378 #undef TARGET_ASM_FUNCTION_PROLOGUE
1379 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1380 #undef TARGET_ASM_FUNCTION_EPILOGUE
1381 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1382
1383 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1384 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1385
1386 #undef TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC
1387 #define TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC rs6000_gen_pic_addr_diff_vec
1388
1389 #undef TARGET_LEGITIMIZE_ADDRESS
1390 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1391
1392 #undef TARGET_SCHED_VARIABLE_ISSUE
1393 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1394
1395 #undef TARGET_SCHED_ISSUE_RATE
1396 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1397 #undef TARGET_SCHED_ADJUST_COST
1398 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1399 #undef TARGET_SCHED_ADJUST_PRIORITY
1400 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1401 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1402 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1403 #undef TARGET_SCHED_INIT
1404 #define TARGET_SCHED_INIT rs6000_sched_init
1405 #undef TARGET_SCHED_FINISH
1406 #define TARGET_SCHED_FINISH rs6000_sched_finish
1407 #undef TARGET_SCHED_REORDER
1408 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1409 #undef TARGET_SCHED_REORDER2
1410 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1411
1412 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1413 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1414
1415 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1416 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1417
1418 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1419 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1420 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1421 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1422 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1423 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1424 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1425 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1426
1427 #undef TARGET_SCHED_CAN_SPECULATE_INSN
1428 #define TARGET_SCHED_CAN_SPECULATE_INSN rs6000_sched_can_speculate_insn
1429
1430 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1431 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1432 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1433 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1434 rs6000_builtin_support_vector_misalignment
1435 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1436 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1437 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1438 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1439 rs6000_builtin_vectorization_cost
1440 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1441 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1442 rs6000_preferred_simd_mode
1443 #undef TARGET_VECTORIZE_CREATE_COSTS
1444 #define TARGET_VECTORIZE_CREATE_COSTS rs6000_vectorize_create_costs
1445
1446 #undef TARGET_LOOP_UNROLL_ADJUST
1447 #define TARGET_LOOP_UNROLL_ADJUST rs6000_loop_unroll_adjust
1448
1449 #undef TARGET_INIT_BUILTINS
1450 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1451 #undef TARGET_BUILTIN_DECL
1452 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1453
1454 #undef TARGET_FOLD_BUILTIN
1455 #define TARGET_FOLD_BUILTIN rs6000_fold_builtin
1456 #undef TARGET_GIMPLE_FOLD_BUILTIN
1457 #define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin
1458
1459 #undef TARGET_EXPAND_BUILTIN
1460 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1461
1462 #undef TARGET_MANGLE_TYPE
1463 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1464
1465 #undef TARGET_INIT_LIBFUNCS
1466 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1467
1468 #if TARGET_MACHO
1469 #undef TARGET_BINDS_LOCAL_P
1470 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1471 #endif
1472
1473 #undef TARGET_MS_BITFIELD_LAYOUT_P
1474 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1475
1476 #undef TARGET_ASM_OUTPUT_MI_THUNK
1477 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1478
1479 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1480 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1481
1482 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1483 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1484
1485 #undef TARGET_REGISTER_MOVE_COST
1486 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1487 #undef TARGET_MEMORY_MOVE_COST
1488 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1489 #undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
1490 #define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
1491 rs6000_ira_change_pseudo_allocno_class
1492 #undef TARGET_CANNOT_COPY_INSN_P
1493 #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p
1494 #undef TARGET_RTX_COSTS
1495 #define TARGET_RTX_COSTS rs6000_rtx_costs
1496 #undef TARGET_ADDRESS_COST
1497 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1498 #undef TARGET_INSN_COST
1499 #define TARGET_INSN_COST rs6000_insn_cost
1500
1501 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1502 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1503
1504 #undef TARGET_PROMOTE_FUNCTION_MODE
1505 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1506
1507 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
1508 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE rs6000_override_options_after_change
1509
1510 #undef TARGET_RETURN_IN_MEMORY
1511 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1512
1513 #undef TARGET_RETURN_IN_MSB
1514 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1515
1516 #undef TARGET_SETUP_INCOMING_VARARGS
1517 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1518
1519 /* Always strict argument naming on rs6000. */
1520 #undef TARGET_STRICT_ARGUMENT_NAMING
1521 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1522 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1523 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1524 #undef TARGET_SPLIT_COMPLEX_ARG
1525 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1526 #undef TARGET_MUST_PASS_IN_STACK
1527 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1528 #undef TARGET_PASS_BY_REFERENCE
1529 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1530 #undef TARGET_ARG_PARTIAL_BYTES
1531 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1532 #undef TARGET_FUNCTION_ARG_ADVANCE
1533 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1534 #undef TARGET_FUNCTION_ARG
1535 #define TARGET_FUNCTION_ARG rs6000_function_arg
1536 #undef TARGET_FUNCTION_ARG_PADDING
1537 #define TARGET_FUNCTION_ARG_PADDING rs6000_function_arg_padding
1538 #undef TARGET_FUNCTION_ARG_BOUNDARY
1539 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1540
1541 #undef TARGET_BUILD_BUILTIN_VA_LIST
1542 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1543
1544 #undef TARGET_EXPAND_BUILTIN_VA_START
1545 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1546
1547 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1548 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1549
1550 #undef TARGET_EH_RETURN_FILTER_MODE
1551 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1552
1553 #undef TARGET_TRANSLATE_MODE_ATTRIBUTE
1554 #define TARGET_TRANSLATE_MODE_ATTRIBUTE rs6000_translate_mode_attribute
1555
1556 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1557 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1558
1559 #undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
1560 #define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
1561 rs6000_libgcc_floating_mode_supported_p
1562
1563 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1564 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1565
1566 #undef TARGET_FLOATN_MODE
1567 #define TARGET_FLOATN_MODE rs6000_floatn_mode
1568
1569 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1570 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1571
1572 #undef TARGET_MD_ASM_ADJUST
1573 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1574
1575 #undef TARGET_OPTION_OVERRIDE
1576 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1577
1578 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1579 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1580 rs6000_builtin_vectorized_function
1581
1582 #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION
1583 #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \
1584 rs6000_builtin_md_vectorized_function
1585
1586 #undef TARGET_STACK_PROTECT_GUARD
1587 #define TARGET_STACK_PROTECT_GUARD rs6000_init_stack_protect_guard
1588
1589 #if !TARGET_MACHO
1590 #undef TARGET_STACK_PROTECT_FAIL
1591 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1592 #endif
1593
1594 #ifdef HAVE_AS_TLS
1595 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1596 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1597 #endif
1598
1599 /* Use a 32-bit anchor range. This leads to sequences like:
1600
1601 addis tmp,anchor,high
1602 add dest,tmp,low
1603
1604 where tmp itself acts as an anchor, and can be shared between
1605 accesses to the same 64k page. */
1606 #undef TARGET_MIN_ANCHOR_OFFSET
1607 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1608 #undef TARGET_MAX_ANCHOR_OFFSET
1609 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1610 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1611 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1612 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1613 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1614
1615 #undef TARGET_BUILTIN_RECIPROCAL
1616 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1617
1618 #undef TARGET_SECONDARY_RELOAD
1619 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1620 #undef TARGET_SECONDARY_MEMORY_NEEDED
1621 #define TARGET_SECONDARY_MEMORY_NEEDED rs6000_secondary_memory_needed
1622 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
1623 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE rs6000_secondary_memory_needed_mode
1624
1625 #undef TARGET_LEGITIMATE_ADDRESS_P
1626 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1627
1628 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1629 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1630
1631 #undef TARGET_COMPUTE_PRESSURE_CLASSES
1632 #define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes
1633
1634 #undef TARGET_CAN_ELIMINATE
1635 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1636
1637 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1638 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1639
1640 #undef TARGET_SCHED_REASSOCIATION_WIDTH
1641 #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width
1642
1643 #undef TARGET_TRAMPOLINE_INIT
1644 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1645
1646 #undef TARGET_FUNCTION_VALUE
1647 #define TARGET_FUNCTION_VALUE rs6000_function_value
1648
1649 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1650 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1651
1652 #undef TARGET_OPTION_SAVE
1653 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1654
1655 #undef TARGET_OPTION_RESTORE
1656 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1657
1658 #undef TARGET_OPTION_PRINT
1659 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1660
1661 #undef TARGET_CAN_INLINE_P
1662 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1663
1664 #undef TARGET_SET_CURRENT_FUNCTION
1665 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1666
1667 #undef TARGET_LEGITIMATE_CONSTANT_P
1668 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1669
1670 #undef TARGET_VECTORIZE_VEC_PERM_CONST
1671 #define TARGET_VECTORIZE_VEC_PERM_CONST rs6000_vectorize_vec_perm_const
1672
1673 #undef TARGET_CAN_USE_DOLOOP_P
1674 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1675
1676 #undef TARGET_PREDICT_DOLOOP_P
1677 #define TARGET_PREDICT_DOLOOP_P rs6000_predict_doloop_p
1678
1679 #undef TARGET_HAVE_COUNT_REG_DECR_P
1680 #define TARGET_HAVE_COUNT_REG_DECR_P true
1681
1682 /* 1000000000 is infinite cost in IVOPTs. */
1683 #undef TARGET_DOLOOP_COST_FOR_GENERIC
1684 #define TARGET_DOLOOP_COST_FOR_GENERIC 1000000000
1685
1686 #undef TARGET_DOLOOP_COST_FOR_ADDRESS
1687 #define TARGET_DOLOOP_COST_FOR_ADDRESS 1000000000
1688
1689 #undef TARGET_PREFERRED_DOLOOP_MODE
1690 #define TARGET_PREFERRED_DOLOOP_MODE rs6000_preferred_doloop_mode
1691
1692 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1693 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1694
1695 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1696 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1697 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1698 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1699 #undef TARGET_UNWIND_WORD_MODE
1700 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1701
1702 #undef TARGET_OFFLOAD_OPTIONS
1703 #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options
1704
1705 #undef TARGET_C_MODE_FOR_SUFFIX
1706 #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix
1707
1708 #undef TARGET_INVALID_BINARY_OP
1709 #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op
1710
1711 #undef TARGET_OPTAB_SUPPORTED_P
1712 #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p
1713
1714 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
1715 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
1716
1717 #undef TARGET_COMPARE_VERSION_PRIORITY
1718 #define TARGET_COMPARE_VERSION_PRIORITY rs6000_compare_version_priority
1719
1720 #undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
1721 #define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
1722 rs6000_generate_version_dispatcher_body
1723
1724 #undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
1725 #define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
1726 rs6000_get_function_versions_dispatcher
1727
1728 #undef TARGET_OPTION_FUNCTION_VERSIONS
1729 #define TARGET_OPTION_FUNCTION_VERSIONS common_function_versions
1730
1731 #undef TARGET_HARD_REGNO_NREGS
1732 #define TARGET_HARD_REGNO_NREGS rs6000_hard_regno_nregs_hook
1733 #undef TARGET_HARD_REGNO_MODE_OK
1734 #define TARGET_HARD_REGNO_MODE_OK rs6000_hard_regno_mode_ok
1735
1736 #undef TARGET_MODES_TIEABLE_P
1737 #define TARGET_MODES_TIEABLE_P rs6000_modes_tieable_p
1738
1739 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
1740 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
1741 rs6000_hard_regno_call_part_clobbered
1742
1743 #undef TARGET_SLOW_UNALIGNED_ACCESS
1744 #define TARGET_SLOW_UNALIGNED_ACCESS rs6000_slow_unaligned_access
1745
1746 #undef TARGET_CAN_CHANGE_MODE_CLASS
1747 #define TARGET_CAN_CHANGE_MODE_CLASS rs6000_can_change_mode_class
1748
1749 #undef TARGET_CONSTANT_ALIGNMENT
1750 #define TARGET_CONSTANT_ALIGNMENT rs6000_constant_alignment
1751
1752 #undef TARGET_STARTING_FRAME_OFFSET
1753 #define TARGET_STARTING_FRAME_OFFSET rs6000_starting_frame_offset
1754
1755 #undef TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P
1756 #define TARGET_SETJMP_PRESERVES_NONVOLATILE_REGS_P hook_bool_void_true
1757
1758 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
1759 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME rs6000_mangle_decl_assembler_name
1760
1761 #undef TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P
1762 #define TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P \
1763 rs6000_cannot_substitute_mem_equiv_p
1764
1765 #undef TARGET_INVALID_CONVERSION
1766 #define TARGET_INVALID_CONVERSION rs6000_invalid_conversion
1767
1768 #undef TARGET_NEED_IPA_FN_TARGET_INFO
1769 #define TARGET_NEED_IPA_FN_TARGET_INFO rs6000_need_ipa_fn_target_info
1770
1771 #undef TARGET_UPDATE_IPA_FN_TARGET_INFO
1772 #define TARGET_UPDATE_IPA_FN_TARGET_INFO rs6000_update_ipa_fn_target_info
1773
1774 #undef TARGET_CONST_ANCHOR
1775 #define TARGET_CONST_ANCHOR 0x8000
1776
1777 \f
1778
1779 /* Processor table. */
1780 struct rs6000_ptt
1781 {
1782 const char *const name; /* Canonical processor name. */
1783 const enum processor_type processor; /* Processor type enum value. */
1784 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
1785 };
1786
1787 static struct rs6000_ptt const processor_target_table[] =
1788 {
1789 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1790 #include "rs6000-cpus.def"
1791 #undef RS6000_CPU
1792 };
1793
1794 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1795 name is invalid. */
1796
1797 static int
1798 rs6000_cpu_name_lookup (const char *name)
1799 {
1800 size_t i;
1801
1802 if (name != NULL)
1803 {
1804 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
1805 if (! strcmp (name, processor_target_table[i].name))
1806 return (int)i;
1807 }
1808
1809 return -1;
1810 }
1811
1812 \f
1813 /* Return number of consecutive hard regs needed starting at reg REGNO
1814 to hold something of mode MODE.
1815 This is ordinarily the length in words of a value of mode MODE
1816 but can be less for certain modes in special long registers.
1817
1818 POWER and PowerPC GPRs hold 32 bits worth;
1819 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
1820
1821 static int
1822 rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
1823 {
1824 unsigned HOST_WIDE_INT reg_size;
1825
1826 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
1827 128-bit floating point that can go in vector registers, which has VSX
1828 memory addressing. */
1829 if (FP_REGNO_P (regno))
1830 reg_size = (VECTOR_MEM_VSX_P (mode) || VECTOR_ALIGNMENT_P (mode)
1831 ? UNITS_PER_VSX_WORD
1832 : UNITS_PER_FP_WORD);
1833
1834 else if (ALTIVEC_REGNO_P (regno))
1835 reg_size = UNITS_PER_ALTIVEC_WORD;
1836
1837 else
1838 reg_size = UNITS_PER_WORD;
1839
1840 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
1841 }
1842
1843 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1844 MODE. */
1845 static int
1846 rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode)
1847 {
1848 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
1849
1850 if (COMPLEX_MODE_P (mode))
1851 mode = GET_MODE_INNER (mode);
1852
1853 /* Vector pair modes need even/odd VSX register pairs. Only allow vector
1854 registers. */
1855 if (mode == OOmode)
1856 return (TARGET_MMA && VSX_REGNO_P (regno) && (regno & 1) == 0);
1857
1858 /* MMA accumulator modes need FPR registers divisible by 4. */
1859 if (mode == XOmode)
1860 return (TARGET_MMA && FP_REGNO_P (regno) && (regno & 3) == 0);
1861
1862 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
1863 register combinations, and use PTImode where we need to deal with quad
1864 word memory operations. Don't allow quad words in the argument or frame
1865 pointer registers, just registers 0..31. */
1866 if (mode == PTImode)
1867 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1868 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1869 && ((regno & 1) == 0));
1870
1871 /* VSX registers that overlap the FPR registers are larger than for non-VSX
1872 implementations. Don't allow an item to be split between a FP register
1873 and an Altivec register. Allow TImode in all VSX registers if the user
1874 asked for it. */
1875 if (TARGET_VSX && VSX_REGNO_P (regno)
1876 && (VECTOR_MEM_VSX_P (mode)
1877 || VECTOR_ALIGNMENT_P (mode)
1878 || reg_addr[mode].scalar_in_vmx_p
1879 || mode == TImode
1880 || (TARGET_VADDUQM && mode == V1TImode)))
1881 {
1882 if (FP_REGNO_P (regno))
1883 return FP_REGNO_P (last_regno);
1884
1885 if (ALTIVEC_REGNO_P (regno))
1886 {
1887 if (GET_MODE_SIZE (mode) < 16 && !reg_addr[mode].scalar_in_vmx_p)
1888 return 0;
1889
1890 return ALTIVEC_REGNO_P (last_regno);
1891 }
1892 }
1893
1894 /* The GPRs can hold any mode, but values bigger than one register
1895 cannot go past R31. */
1896 if (INT_REGNO_P (regno))
1897 return INT_REGNO_P (last_regno);
1898
1899 /* The float registers (except for VSX vector modes) can only hold floating
1900 modes and DImode. */
1901 if (FP_REGNO_P (regno))
1902 {
1903 if (VECTOR_ALIGNMENT_P (mode))
1904 return false;
1905
1906 if (SCALAR_FLOAT_MODE_P (mode)
1907 && (mode != TDmode || (regno % 2) == 0)
1908 && FP_REGNO_P (last_regno))
1909 return 1;
1910
1911 if (GET_MODE_CLASS (mode) == MODE_INT)
1912 {
1913 if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
1914 return 1;
1915
1916 if (TARGET_POPCNTD && mode == SImode)
1917 return 1;
1918
1919 if (TARGET_P9_VECTOR && (mode == QImode || mode == HImode))
1920 return 1;
1921 }
1922
1923 return 0;
1924 }
1925
1926 /* The CR register can only hold CC modes. */
1927 if (CR_REGNO_P (regno))
1928 return GET_MODE_CLASS (mode) == MODE_CC;
1929
1930 if (CA_REGNO_P (regno))
1931 return mode == Pmode || mode == SImode;
1932
1933 /* AltiVec only in AldyVec registers. */
1934 if (ALTIVEC_REGNO_P (regno))
1935 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
1936 || mode == V1TImode);
1937
1938 /* We cannot put non-VSX TImode or PTImode anywhere except general register
1939 and it must be able to fit within the register set. */
1940
1941 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
1942 }
1943
1944 /* Implement TARGET_HARD_REGNO_NREGS. */
1945
1946 static unsigned int
1947 rs6000_hard_regno_nregs_hook (unsigned int regno, machine_mode mode)
1948 {
1949 return rs6000_hard_regno_nregs[mode][regno];
1950 }
1951
1952 /* Implement TARGET_HARD_REGNO_MODE_OK. */
1953
1954 static bool
1955 rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
1956 {
1957 return rs6000_hard_regno_mode_ok_p[mode][regno];
1958 }
1959
1960 /* Implement TARGET_MODES_TIEABLE_P.
1961
1962 PTImode cannot tie with other modes because PTImode is restricted to even
1963 GPR registers, and TImode can go in any GPR as well as VSX registers (PR
1964 57744).
1965
1966 Similarly, don't allow OOmode (vector pair, restricted to even VSX
1967 registers) or XOmode (vector quad, restricted to FPR registers divisible
1968 by 4) to tie with other modes.
1969
1970 Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE
1971 128-bit floating point on VSX systems ties with other vectors. */
1972
1973 static bool
1974 rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2)
1975 {
1976 if (mode1 == PTImode || mode1 == OOmode || mode1 == XOmode
1977 || mode2 == PTImode || mode2 == OOmode || mode2 == XOmode)
1978 return mode1 == mode2;
1979
1980 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1))
1981 return ALTIVEC_OR_VSX_VECTOR_MODE (mode2);
1982 if (ALTIVEC_OR_VSX_VECTOR_MODE (mode2))
1983 return false;
1984
1985 if (SCALAR_FLOAT_MODE_P (mode1))
1986 return SCALAR_FLOAT_MODE_P (mode2);
1987 if (SCALAR_FLOAT_MODE_P (mode2))
1988 return false;
1989
1990 if (GET_MODE_CLASS (mode1) == MODE_CC)
1991 return GET_MODE_CLASS (mode2) == MODE_CC;
1992 if (GET_MODE_CLASS (mode2) == MODE_CC)
1993 return false;
1994
1995 return true;
1996 }
1997
1998 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. */
1999
2000 static bool
2001 rs6000_hard_regno_call_part_clobbered (unsigned int, unsigned int regno,
2002 machine_mode mode)
2003 {
2004 if (TARGET_32BIT
2005 && TARGET_POWERPC64
2006 && GET_MODE_SIZE (mode) > 4
2007 && INT_REGNO_P (regno))
2008 return true;
2009
2010 if (TARGET_VSX
2011 && FP_REGNO_P (regno)
2012 && GET_MODE_SIZE (mode) > 8
2013 && !FLOAT128_2REG_P (mode))
2014 return true;
2015
2016 return false;
2017 }
2018
2019 /* Print interesting facts about registers. */
2020 static void
2021 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
2022 {
2023 int r, m;
2024
2025 for (r = first_regno; r <= last_regno; ++r)
2026 {
2027 const char *comma = "";
2028 int len;
2029
2030 if (first_regno == last_regno)
2031 fprintf (stderr, "%s:\t", reg_name);
2032 else
2033 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
2034
2035 len = 8;
2036 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2037 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
2038 {
2039 if (len > 70)
2040 {
2041 fprintf (stderr, ",\n\t");
2042 len = 8;
2043 comma = "";
2044 }
2045
2046 if (rs6000_hard_regno_nregs[m][r] > 1)
2047 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
2048 rs6000_hard_regno_nregs[m][r]);
2049 else
2050 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
2051
2052 comma = ", ";
2053 }
2054
2055 if (call_used_or_fixed_reg_p (r))
2056 {
2057 if (len > 70)
2058 {
2059 fprintf (stderr, ",\n\t");
2060 len = 8;
2061 comma = "";
2062 }
2063
2064 len += fprintf (stderr, "%s%s", comma, "call-used");
2065 comma = ", ";
2066 }
2067
2068 if (fixed_regs[r])
2069 {
2070 if (len > 70)
2071 {
2072 fprintf (stderr, ",\n\t");
2073 len = 8;
2074 comma = "";
2075 }
2076
2077 len += fprintf (stderr, "%s%s", comma, "fixed");
2078 comma = ", ";
2079 }
2080
2081 if (len > 70)
2082 {
2083 fprintf (stderr, ",\n\t");
2084 comma = "";
2085 }
2086
2087 len += fprintf (stderr, "%sreg-class = %s", comma,
2088 reg_class_names[(int)rs6000_regno_regclass[r]]);
2089 comma = ", ";
2090
2091 if (len > 70)
2092 {
2093 fprintf (stderr, ",\n\t");
2094 comma = "";
2095 }
2096
2097 fprintf (stderr, "%sregno = %d\n", comma, r);
2098 }
2099 }
2100
2101 static const char *
2102 rs6000_debug_vector_unit (enum rs6000_vector v)
2103 {
2104 const char *ret;
2105
2106 switch (v)
2107 {
2108 case VECTOR_NONE: ret = "none"; break;
2109 case VECTOR_ALTIVEC: ret = "altivec"; break;
2110 case VECTOR_VSX: ret = "vsx"; break;
2111 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
2112 default: ret = "unknown"; break;
2113 }
2114
2115 return ret;
2116 }
2117
2118 /* Inner function printing just the address mask for a particular reload
2119 register class. */
2120 DEBUG_FUNCTION char *
2121 rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
2122 {
2123 static char ret[8];
2124 char *p = ret;
2125
2126 if ((mask & RELOAD_REG_VALID) != 0)
2127 *p++ = 'v';
2128 else if (keep_spaces)
2129 *p++ = ' ';
2130
2131 if ((mask & RELOAD_REG_MULTIPLE) != 0)
2132 *p++ = 'm';
2133 else if (keep_spaces)
2134 *p++ = ' ';
2135
2136 if ((mask & RELOAD_REG_INDEXED) != 0)
2137 *p++ = 'i';
2138 else if (keep_spaces)
2139 *p++ = ' ';
2140
2141 if ((mask & RELOAD_REG_QUAD_OFFSET) != 0)
2142 *p++ = 'O';
2143 else if ((mask & RELOAD_REG_OFFSET) != 0)
2144 *p++ = 'o';
2145 else if (keep_spaces)
2146 *p++ = ' ';
2147
2148 if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
2149 *p++ = '+';
2150 else if (keep_spaces)
2151 *p++ = ' ';
2152
2153 if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
2154 *p++ = '+';
2155 else if (keep_spaces)
2156 *p++ = ' ';
2157
2158 if ((mask & RELOAD_REG_AND_M16) != 0)
2159 *p++ = '&';
2160 else if (keep_spaces)
2161 *p++ = ' ';
2162
2163 *p = '\0';
2164
2165 return ret;
2166 }
2167
2168 /* Print the address masks in a human readble fashion. */
2169 DEBUG_FUNCTION void
2170 rs6000_debug_print_mode (ssize_t m)
2171 {
2172 ssize_t rc;
2173 int spaces = 0;
2174
2175 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
2176 for (rc = 0; rc < N_RELOAD_REG; rc++)
2177 fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
2178 rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
2179
2180 if ((reg_addr[m].reload_store != CODE_FOR_nothing)
2181 || (reg_addr[m].reload_load != CODE_FOR_nothing))
2182 {
2183 fprintf (stderr, "%*s Reload=%c%c", spaces, "",
2184 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2185 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
2186 spaces = 0;
2187 }
2188 else
2189 spaces += strlen (" Reload=sl");
2190
2191 if (reg_addr[m].scalar_in_vmx_p)
2192 {
2193 fprintf (stderr, "%*s Upper=y", spaces, "");
2194 spaces = 0;
2195 }
2196 else
2197 spaces += strlen (" Upper=y");
2198
2199 if (rs6000_vector_unit[m] != VECTOR_NONE
2200 || rs6000_vector_mem[m] != VECTOR_NONE)
2201 {
2202 fprintf (stderr, "%*s vector: arith=%-10s mem=%s",
2203 spaces, "",
2204 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2205 rs6000_debug_vector_unit (rs6000_vector_mem[m]));
2206 }
2207
2208 fputs ("\n", stderr);
2209 }
2210
2211 #define DEBUG_FMT_ID "%-32s= "
2212 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2213 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2214 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2215
2216 /* Print various interesting information with -mdebug=reg. */
2217 static void
2218 rs6000_debug_reg_global (void)
2219 {
2220 static const char *const tf[2] = { "false", "true" };
2221 const char *nl = (const char *)0;
2222 int m;
2223 size_t m1, m2, v;
2224 char costly_num[20];
2225 char nop_num[20];
2226 char flags_buffer[40];
2227 const char *costly_str;
2228 const char *nop_str;
2229 const char *trace_str;
2230 const char *abi_str;
2231 const char *cmodel_str;
2232 struct cl_target_option cl_opts;
2233
2234 /* Modes we want tieable information on. */
2235 static const machine_mode print_tieable_modes[] = {
2236 QImode,
2237 HImode,
2238 SImode,
2239 DImode,
2240 TImode,
2241 PTImode,
2242 SFmode,
2243 DFmode,
2244 TFmode,
2245 IFmode,
2246 KFmode,
2247 SDmode,
2248 DDmode,
2249 TDmode,
2250 V2SImode,
2251 V2SFmode,
2252 V16QImode,
2253 V8HImode,
2254 V4SImode,
2255 V2DImode,
2256 V1TImode,
2257 V32QImode,
2258 V16HImode,
2259 V8SImode,
2260 V4DImode,
2261 V2TImode,
2262 V4SFmode,
2263 V2DFmode,
2264 V8SFmode,
2265 V4DFmode,
2266 OOmode,
2267 XOmode,
2268 CCmode,
2269 CCUNSmode,
2270 CCEQmode,
2271 CCFPmode,
2272 };
2273
2274 /* Virtual regs we are interested in. */
2275 const static struct {
2276 int regno; /* register number. */
2277 const char *name; /* register name. */
2278 } virtual_regs[] = {
2279 { STACK_POINTER_REGNUM, "stack pointer:" },
2280 { TOC_REGNUM, "toc: " },
2281 { STATIC_CHAIN_REGNUM, "static chain: " },
2282 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
2283 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
2284 { ARG_POINTER_REGNUM, "arg pointer: " },
2285 { FRAME_POINTER_REGNUM, "frame pointer:" },
2286 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
2287 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
2288 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
2289 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
2290 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
2291 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
2292 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
2293 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
2294 { LAST_VIRTUAL_REGISTER, "last virtual: " },
2295 };
2296
2297 fputs ("\nHard register information:\n", stderr);
2298 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2299 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2300 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2301 LAST_ALTIVEC_REGNO,
2302 "vs");
2303 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2304 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2305 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2306 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2307 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2308 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2309
2310 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2311 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2312 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2313
2314 fprintf (stderr,
2315 "\n"
2316 "d reg_class = %s\n"
2317 "v reg_class = %s\n"
2318 "wa reg_class = %s\n"
2319 "we reg_class = %s\n"
2320 "wr reg_class = %s\n"
2321 "wx reg_class = %s\n"
2322 "wA reg_class = %s\n"
2323 "\n",
2324 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2325 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2326 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2327 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]],
2328 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2329 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2330 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]]);
2331
2332 nl = "\n";
2333 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2334 rs6000_debug_print_mode (m);
2335
2336 fputs ("\n", stderr);
2337
2338 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2339 {
2340 machine_mode mode1 = print_tieable_modes[m1];
2341 bool first_time = true;
2342
2343 nl = (const char *)0;
2344 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2345 {
2346 machine_mode mode2 = print_tieable_modes[m2];
2347 if (mode1 != mode2 && rs6000_modes_tieable_p (mode1, mode2))
2348 {
2349 if (first_time)
2350 {
2351 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2352 nl = "\n";
2353 first_time = false;
2354 }
2355
2356 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2357 }
2358 }
2359
2360 if (!first_time)
2361 fputs ("\n", stderr);
2362 }
2363
2364 if (nl)
2365 fputs (nl, stderr);
2366
2367 if (rs6000_recip_control)
2368 {
2369 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2370
2371 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2372 if (rs6000_recip_bits[m])
2373 {
2374 fprintf (stderr,
2375 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2376 GET_MODE_NAME (m),
2377 (RS6000_RECIP_AUTO_RE_P (m)
2378 ? "auto"
2379 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2380 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2381 ? "auto"
2382 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2383 }
2384
2385 fputs ("\n", stderr);
2386 }
2387
2388 if (rs6000_cpu_index >= 0)
2389 {
2390 const char *name = processor_target_table[rs6000_cpu_index].name;
2391 HOST_WIDE_INT flags
2392 = processor_target_table[rs6000_cpu_index].target_enable;
2393
2394 sprintf (flags_buffer, "-mcpu=%s flags", name);
2395 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2396 }
2397 else
2398 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2399
2400 if (rs6000_tune_index >= 0)
2401 {
2402 const char *name = processor_target_table[rs6000_tune_index].name;
2403 HOST_WIDE_INT flags
2404 = processor_target_table[rs6000_tune_index].target_enable;
2405
2406 sprintf (flags_buffer, "-mtune=%s flags", name);
2407 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2408 }
2409 else
2410 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2411
2412 cl_target_option_save (&cl_opts, &global_options, &global_options_set);
2413 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2414 rs6000_isa_flags);
2415
2416 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2417 rs6000_isa_flags_explicit);
2418
2419 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2420
2421 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2422 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2423
2424 switch (rs6000_sched_costly_dep)
2425 {
2426 case max_dep_latency:
2427 costly_str = "max_dep_latency";
2428 break;
2429
2430 case no_dep_costly:
2431 costly_str = "no_dep_costly";
2432 break;
2433
2434 case all_deps_costly:
2435 costly_str = "all_deps_costly";
2436 break;
2437
2438 case true_store_to_load_dep_costly:
2439 costly_str = "true_store_to_load_dep_costly";
2440 break;
2441
2442 case store_to_load_dep_costly:
2443 costly_str = "store_to_load_dep_costly";
2444 break;
2445
2446 default:
2447 costly_str = costly_num;
2448 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2449 break;
2450 }
2451
2452 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2453
2454 switch (rs6000_sched_insert_nops)
2455 {
2456 case sched_finish_regroup_exact:
2457 nop_str = "sched_finish_regroup_exact";
2458 break;
2459
2460 case sched_finish_pad_groups:
2461 nop_str = "sched_finish_pad_groups";
2462 break;
2463
2464 case sched_finish_none:
2465 nop_str = "sched_finish_none";
2466 break;
2467
2468 default:
2469 nop_str = nop_num;
2470 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2471 break;
2472 }
2473
2474 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2475
2476 switch (rs6000_sdata)
2477 {
2478 default:
2479 case SDATA_NONE:
2480 break;
2481
2482 case SDATA_DATA:
2483 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2484 break;
2485
2486 case SDATA_SYSV:
2487 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2488 break;
2489
2490 case SDATA_EABI:
2491 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2492 break;
2493
2494 }
2495
2496 switch (rs6000_traceback)
2497 {
2498 case traceback_default: trace_str = "default"; break;
2499 case traceback_none: trace_str = "none"; break;
2500 case traceback_part: trace_str = "part"; break;
2501 case traceback_full: trace_str = "full"; break;
2502 default: trace_str = "unknown"; break;
2503 }
2504
2505 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2506
2507 switch (rs6000_current_cmodel)
2508 {
2509 case CMODEL_SMALL: cmodel_str = "small"; break;
2510 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2511 case CMODEL_LARGE: cmodel_str = "large"; break;
2512 default: cmodel_str = "unknown"; break;
2513 }
2514
2515 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2516
2517 switch (rs6000_current_abi)
2518 {
2519 case ABI_NONE: abi_str = "none"; break;
2520 case ABI_AIX: abi_str = "aix"; break;
2521 case ABI_ELFv2: abi_str = "ELFv2"; break;
2522 case ABI_V4: abi_str = "V4"; break;
2523 case ABI_DARWIN: abi_str = "darwin"; break;
2524 default: abi_str = "unknown"; break;
2525 }
2526
2527 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2528
2529 if (rs6000_altivec_abi)
2530 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2531
2532 if (rs6000_aix_extabi)
2533 fprintf (stderr, DEBUG_FMT_S, "AIX vec-extabi", "true");
2534
2535 if (rs6000_darwin64_abi)
2536 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2537
2538 fprintf (stderr, DEBUG_FMT_S, "soft_float",
2539 (TARGET_SOFT_FLOAT ? "true" : "false"));
2540
2541 if (TARGET_LINK_STACK)
2542 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2543
2544 if (TARGET_P8_FUSION)
2545 {
2546 char options[80];
2547
2548 strcpy (options, "power8");
2549 if (TARGET_P8_FUSION_SIGN)
2550 strcat (options, ", sign");
2551
2552 fprintf (stderr, DEBUG_FMT_S, "fusion", options);
2553 }
2554
2555 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2556 TARGET_SECURE_PLT ? "secure" : "bss");
2557 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2558 aix_struct_return ? "aix" : "sysv");
2559 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2560 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2561 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2562 tf[!!rs6000_align_branch_targets]);
2563 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2564 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2565 rs6000_long_double_type_size);
2566 if (rs6000_long_double_type_size > 64)
2567 {
2568 fprintf (stderr, DEBUG_FMT_S, "long double type",
2569 TARGET_IEEEQUAD ? "IEEE" : "IBM");
2570 fprintf (stderr, DEBUG_FMT_S, "default long double type",
2571 TARGET_IEEEQUAD_DEFAULT ? "IEEE" : "IBM");
2572 }
2573 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2574 (int)rs6000_sched_restricted_insns_priority);
2575 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2576 (int)END_BUILTINS);
2577
2578 fprintf (stderr, DEBUG_FMT_D, "Enable float128 on VSX",
2579 (int)TARGET_FLOAT128_ENABLE_TYPE);
2580
2581 if (TARGET_VSX)
2582 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2583 (int)VECTOR_ELEMENT_SCALAR_64BIT);
2584
2585 if (TARGET_DIRECT_MOVE_128)
2586 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit mfvsrld element",
2587 (int)VECTOR_ELEMENT_MFVSRLD_64BIT);
2588 }
2589
2590 \f
2591 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2592 legitimate address support to figure out the appropriate addressing to
2593 use. */
2594
2595 static void
2596 rs6000_setup_reg_addr_masks (void)
2597 {
2598 ssize_t rc, reg, m, nregs;
2599 addr_mask_type any_addr_mask, addr_mask;
2600
2601 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2602 {
2603 machine_mode m2 = (machine_mode) m;
2604 bool complex_p = false;
2605 bool small_int_p = (m2 == QImode || m2 == HImode || m2 == SImode);
2606 size_t msize;
2607
2608 if (COMPLEX_MODE_P (m2))
2609 {
2610 complex_p = true;
2611 m2 = GET_MODE_INNER (m2);
2612 }
2613
2614 msize = GET_MODE_SIZE (m2);
2615
2616 /* SDmode is special in that we want to access it only via REG+REG
2617 addressing on power7 and above, since we want to use the LFIWZX and
2618 STFIWZX instructions to load it. */
2619 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
2620
2621 any_addr_mask = 0;
2622 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
2623 {
2624 addr_mask = 0;
2625 reg = reload_reg_map[rc].reg;
2626
2627 /* Can mode values go in the GPR/FPR/Altivec registers? */
2628 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
2629 {
2630 bool small_int_vsx_p = (small_int_p
2631 && (rc == RELOAD_REG_FPR
2632 || rc == RELOAD_REG_VMX));
2633
2634 nregs = rs6000_hard_regno_nregs[m][reg];
2635 addr_mask |= RELOAD_REG_VALID;
2636
2637 /* Indicate if the mode takes more than 1 physical register. If
2638 it takes a single register, indicate it can do REG+REG
2639 addressing. Small integers in VSX registers can only do
2640 REG+REG addressing. */
2641 if (small_int_vsx_p)
2642 addr_mask |= RELOAD_REG_INDEXED;
2643 else if (nregs > 1 || m == BLKmode || complex_p)
2644 addr_mask |= RELOAD_REG_MULTIPLE;
2645 else
2646 addr_mask |= RELOAD_REG_INDEXED;
2647
2648 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2649 addressing. If we allow scalars into Altivec registers,
2650 don't allow PRE_INC, PRE_DEC, or PRE_MODIFY.
2651
2652 For VSX systems, we don't allow update addressing for
2653 DFmode/SFmode if those registers can go in both the
2654 traditional floating point registers and Altivec registers.
2655 The load/store instructions for the Altivec registers do not
2656 have update forms. If we allowed update addressing, it seems
2657 to break IV-OPT code using floating point if the index type is
2658 int instead of long (PR target/81550 and target/84042). */
2659
2660 if (TARGET_UPDATE
2661 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
2662 && msize <= 8
2663 && !VECTOR_MODE_P (m2)
2664 && !VECTOR_ALIGNMENT_P (m2)
2665 && !complex_p
2666 && (m != E_DFmode || !TARGET_VSX)
2667 && (m != E_SFmode || !TARGET_P8_VECTOR)
2668 && !small_int_vsx_p)
2669 {
2670 addr_mask |= RELOAD_REG_PRE_INCDEC;
2671
2672 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2673 we don't allow PRE_MODIFY for some multi-register
2674 operations. */
2675 switch (m)
2676 {
2677 default:
2678 addr_mask |= RELOAD_REG_PRE_MODIFY;
2679 break;
2680
2681 case E_DImode:
2682 if (TARGET_POWERPC64)
2683 addr_mask |= RELOAD_REG_PRE_MODIFY;
2684 break;
2685
2686 case E_DFmode:
2687 case E_DDmode:
2688 if (TARGET_HARD_FLOAT)
2689 addr_mask |= RELOAD_REG_PRE_MODIFY;
2690 break;
2691 }
2692 }
2693 }
2694
2695 /* GPR and FPR registers can do REG+OFFSET addressing, except
2696 possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing
2697 for 64-bit scalars and 32-bit SFmode to altivec registers. */
2698 if ((addr_mask != 0) && !indexed_only_p
2699 && msize <= 8
2700 && (rc == RELOAD_REG_GPR
2701 || ((msize == 8 || m2 == SFmode)
2702 && (rc == RELOAD_REG_FPR
2703 || (rc == RELOAD_REG_VMX && TARGET_P9_VECTOR)))))
2704 addr_mask |= RELOAD_REG_OFFSET;
2705
2706 /* VSX registers can do REG+OFFSET addresssing if ISA 3.0
2707 instructions are enabled. The offset for 128-bit VSX registers is
2708 only 12-bits. While GPRs can handle the full offset range, VSX
2709 registers can only handle the restricted range. */
2710 else if ((addr_mask != 0) && !indexed_only_p
2711 && msize == 16 && TARGET_P9_VECTOR
2712 && (ALTIVEC_OR_VSX_VECTOR_MODE (m2)
2713 || (m2 == TImode && TARGET_VSX)))
2714 {
2715 addr_mask |= RELOAD_REG_OFFSET;
2716 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
2717 addr_mask |= RELOAD_REG_QUAD_OFFSET;
2718 }
2719
2720 /* Vector pairs can do both indexed and offset loads if the
2721 instructions are enabled, otherwise they can only do offset loads
2722 since it will be broken into two vector moves. Vector quads can
2723 only do offset loads. */
2724 else if ((addr_mask != 0) && TARGET_MMA
2725 && (m2 == OOmode || m2 == XOmode))
2726 {
2727 addr_mask |= RELOAD_REG_OFFSET;
2728 if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX)
2729 {
2730 addr_mask |= RELOAD_REG_QUAD_OFFSET;
2731 if (m2 == OOmode)
2732 addr_mask |= RELOAD_REG_INDEXED;
2733 }
2734 }
2735
2736 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2737 addressing on 128-bit types. */
2738 if (rc == RELOAD_REG_VMX && msize == 16
2739 && (addr_mask & RELOAD_REG_VALID) != 0)
2740 addr_mask |= RELOAD_REG_AND_M16;
2741
2742 reg_addr[m].addr_mask[rc] = addr_mask;
2743 any_addr_mask |= addr_mask;
2744 }
2745
2746 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
2747 }
2748 }
2749
2750 \f
2751 /* Initialize the various global tables that are based on register size. */
2752 static void
2753 rs6000_init_hard_regno_mode_ok (bool global_init_p)
2754 {
2755 ssize_t r, m, c;
2756 int align64;
2757 int align32;
2758
2759 /* Precalculate REGNO_REG_CLASS. */
2760 rs6000_regno_regclass[0] = GENERAL_REGS;
2761 for (r = 1; r < 32; ++r)
2762 rs6000_regno_regclass[r] = BASE_REGS;
2763
2764 for (r = 32; r < 64; ++r)
2765 rs6000_regno_regclass[r] = FLOAT_REGS;
2766
2767 for (r = 64; HARD_REGISTER_NUM_P (r); ++r)
2768 rs6000_regno_regclass[r] = NO_REGS;
2769
2770 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
2771 rs6000_regno_regclass[r] = ALTIVEC_REGS;
2772
2773 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
2774 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
2775 rs6000_regno_regclass[r] = CR_REGS;
2776
2777 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
2778 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
2779 rs6000_regno_regclass[CA_REGNO] = NO_REGS;
2780 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
2781 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
2782 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
2783 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
2784
2785 /* Precalculate register class to simpler reload register class. We don't
2786 need all of the register classes that are combinations of different
2787 classes, just the simple ones that have constraint letters. */
2788 for (c = 0; c < N_REG_CLASSES; c++)
2789 reg_class_to_reg_type[c] = NO_REG_TYPE;
2790
2791 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
2792 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
2793 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
2794 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
2795 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
2796 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
2797 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
2798 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
2799 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
2800 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
2801
2802 if (TARGET_VSX)
2803 {
2804 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
2805 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
2806 }
2807 else
2808 {
2809 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
2810 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
2811 }
2812
2813 /* Precalculate the valid memory formats as well as the vector information,
2814 this must be set up before the rs6000_hard_regno_nregs_internal calls
2815 below. */
2816 gcc_assert ((int)VECTOR_NONE == 0);
2817 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
2818 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_mem));
2819
2820 gcc_assert ((int)CODE_FOR_nothing == 0);
2821 memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
2822
2823 gcc_assert ((int)NO_REGS == 0);
2824 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
2825
2826 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
2827 believes it can use native alignment or still uses 128-bit alignment. */
2828 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
2829 {
2830 align64 = 64;
2831 align32 = 32;
2832 }
2833 else
2834 {
2835 align64 = 128;
2836 align32 = 128;
2837 }
2838
2839 /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so
2840 only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */
2841 if (TARGET_FLOAT128_TYPE)
2842 {
2843 rs6000_vector_mem[KFmode] = VECTOR_VSX;
2844 rs6000_vector_align[KFmode] = 128;
2845
2846 if (FLOAT128_IEEE_P (TFmode))
2847 {
2848 rs6000_vector_mem[TFmode] = VECTOR_VSX;
2849 rs6000_vector_align[TFmode] = 128;
2850 }
2851 }
2852
2853 /* V2DF mode, VSX only. */
2854 if (TARGET_VSX)
2855 {
2856 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
2857 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
2858 rs6000_vector_align[V2DFmode] = align64;
2859 }
2860
2861 /* V4SF mode, either VSX or Altivec. */
2862 if (TARGET_VSX)
2863 {
2864 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
2865 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
2866 rs6000_vector_align[V4SFmode] = align32;
2867 }
2868 else if (TARGET_ALTIVEC)
2869 {
2870 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
2871 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
2872 rs6000_vector_align[V4SFmode] = align32;
2873 }
2874
2875 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
2876 and stores. */
2877 if (TARGET_ALTIVEC)
2878 {
2879 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
2880 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
2881 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
2882 rs6000_vector_align[V4SImode] = align32;
2883 rs6000_vector_align[V8HImode] = align32;
2884 rs6000_vector_align[V16QImode] = align32;
2885
2886 if (TARGET_VSX)
2887 {
2888 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
2889 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
2890 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
2891 }
2892 else
2893 {
2894 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
2895 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
2896 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
2897 }
2898 }
2899
2900 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
2901 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
2902 if (TARGET_VSX)
2903 {
2904 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
2905 rs6000_vector_unit[V2DImode]
2906 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2907 rs6000_vector_align[V2DImode] = align64;
2908
2909 rs6000_vector_mem[V1TImode] = VECTOR_VSX;
2910 rs6000_vector_unit[V1TImode]
2911 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2912 rs6000_vector_align[V1TImode] = 128;
2913 }
2914
2915 /* DFmode, see if we want to use the VSX unit. Memory is handled
2916 differently, so don't set rs6000_vector_mem. */
2917 if (TARGET_VSX)
2918 {
2919 rs6000_vector_unit[DFmode] = VECTOR_VSX;
2920 rs6000_vector_align[DFmode] = 64;
2921 }
2922
2923 /* SFmode, see if we want to use the VSX unit. */
2924 if (TARGET_P8_VECTOR)
2925 {
2926 rs6000_vector_unit[SFmode] = VECTOR_VSX;
2927 rs6000_vector_align[SFmode] = 32;
2928 }
2929
2930 /* Allow TImode in VSX register and set the VSX memory macros. */
2931 if (TARGET_VSX)
2932 {
2933 rs6000_vector_mem[TImode] = VECTOR_VSX;
2934 rs6000_vector_align[TImode] = align64;
2935 }
2936
2937 /* Add support for vector pairs and vector quad registers. */
2938 if (TARGET_MMA)
2939 {
2940 rs6000_vector_unit[OOmode] = VECTOR_NONE;
2941 rs6000_vector_mem[OOmode] = VECTOR_VSX;
2942 rs6000_vector_align[OOmode] = 256;
2943
2944 rs6000_vector_unit[XOmode] = VECTOR_NONE;
2945 rs6000_vector_mem[XOmode] = VECTOR_VSX;
2946 rs6000_vector_align[XOmode] = 512;
2947 }
2948
2949 /* Register class constraints for the constraints that depend on compile
2950 switches. When the VSX code was added, different constraints were added
2951 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
2952 of the VSX registers are used. The register classes for scalar floating
2953 point types is set, based on whether we allow that type into the upper
2954 (Altivec) registers. GCC has register classes to target the Altivec
2955 registers for load/store operations, to select using a VSX memory
2956 operation instead of the traditional floating point operation. The
2957 constraints are:
2958
2959 d - Register class to use with traditional DFmode instructions.
2960 v - Altivec register.
2961 wa - Any VSX register.
2962 wc - Reserved to represent individual CR bits (used in LLVM).
2963 wn - always NO_REGS.
2964 wr - GPR if 64-bit mode is permitted.
2965 wx - Float register if we can do 32-bit int stores. */
2966
2967 if (TARGET_HARD_FLOAT)
2968 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS;
2969 if (TARGET_ALTIVEC)
2970 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
2971 if (TARGET_VSX)
2972 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
2973
2974 if (TARGET_POWERPC64)
2975 {
2976 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
2977 rs6000_constraints[RS6000_CONSTRAINT_wA] = BASE_REGS;
2978 }
2979
2980 if (TARGET_STFIWX)
2981 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
2982
2983 /* Support for new direct moves (ISA 3.0 + 64bit). */
2984 if (TARGET_DIRECT_MOVE_128)
2985 rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
2986
2987 /* Set up the reload helper and direct move functions. */
2988 if (TARGET_VSX || TARGET_ALTIVEC)
2989 {
2990 if (TARGET_64BIT)
2991 {
2992 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
2993 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
2994 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
2995 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
2996 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
2997 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
2998 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
2999 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
3000 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
3001 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
3002 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
3003 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
3004 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
3005 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
3006 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
3007 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
3008 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
3009 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
3010 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
3011 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
3012
3013 if (FLOAT128_VECTOR_P (KFmode))
3014 {
3015 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_di_store;
3016 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_di_load;
3017 }
3018
3019 if (FLOAT128_VECTOR_P (TFmode))
3020 {
3021 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_di_store;
3022 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_di_load;
3023 }
3024
3025 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3026 available. */
3027 if (TARGET_NO_SDMODE_STACK)
3028 {
3029 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
3030 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
3031 }
3032
3033 if (TARGET_VSX)
3034 {
3035 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
3036 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
3037 }
3038
3039 if (TARGET_DIRECT_MOVE && !TARGET_DIRECT_MOVE_128)
3040 {
3041 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
3042 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
3043 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
3044 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
3045 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
3046 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
3047 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
3048 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
3049 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
3050
3051 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
3052 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
3053 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
3054 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
3055 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
3056 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
3057 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
3058 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
3059 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
3060
3061 if (FLOAT128_VECTOR_P (KFmode))
3062 {
3063 reg_addr[KFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxkf;
3064 reg_addr[KFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprkf;
3065 }
3066
3067 if (FLOAT128_VECTOR_P (TFmode))
3068 {
3069 reg_addr[TFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxtf;
3070 reg_addr[TFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprtf;
3071 }
3072
3073 if (TARGET_MMA)
3074 {
3075 reg_addr[OOmode].reload_store = CODE_FOR_reload_oo_di_store;
3076 reg_addr[OOmode].reload_load = CODE_FOR_reload_oo_di_load;
3077 reg_addr[XOmode].reload_store = CODE_FOR_reload_xo_di_store;
3078 reg_addr[XOmode].reload_load = CODE_FOR_reload_xo_di_load;
3079 }
3080 }
3081 }
3082 else
3083 {
3084 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
3085 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
3086 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
3087 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
3088 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
3089 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
3090 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
3091 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
3092 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
3093 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
3094 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
3095 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
3096 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
3097 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
3098 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
3099 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
3100 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
3101 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
3102 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
3103 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
3104
3105 if (FLOAT128_VECTOR_P (KFmode))
3106 {
3107 reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_si_store;
3108 reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_si_load;
3109 }
3110
3111 if (FLOAT128_IEEE_P (TFmode))
3112 {
3113 reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_si_store;
3114 reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_si_load;
3115 }
3116
3117 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
3118 available. */
3119 if (TARGET_NO_SDMODE_STACK)
3120 {
3121 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
3122 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
3123 }
3124
3125 if (TARGET_VSX)
3126 {
3127 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
3128 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
3129 }
3130
3131 if (TARGET_DIRECT_MOVE)
3132 {
3133 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
3134 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
3135 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
3136 }
3137 }
3138
3139 reg_addr[DFmode].scalar_in_vmx_p = true;
3140 reg_addr[DImode].scalar_in_vmx_p = true;
3141
3142 if (TARGET_P8_VECTOR)
3143 {
3144 reg_addr[SFmode].scalar_in_vmx_p = true;
3145 reg_addr[SImode].scalar_in_vmx_p = true;
3146
3147 if (TARGET_P9_VECTOR)
3148 {
3149 reg_addr[HImode].scalar_in_vmx_p = true;
3150 reg_addr[QImode].scalar_in_vmx_p = true;
3151 }
3152 }
3153 }
3154
3155 /* Precalculate HARD_REGNO_NREGS. */
3156 for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3157 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3158 rs6000_hard_regno_nregs[m][r]
3159 = rs6000_hard_regno_nregs_internal (r, (machine_mode) m);
3160
3161 /* Precalculate TARGET_HARD_REGNO_MODE_OK. */
3162 for (r = 0; HARD_REGISTER_NUM_P (r); ++r)
3163 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3164 rs6000_hard_regno_mode_ok_p[m][r]
3165 = rs6000_hard_regno_mode_ok_uncached (r, (machine_mode) m);
3166
3167 /* Precalculate CLASS_MAX_NREGS sizes. */
3168 for (c = 0; c < LIM_REG_CLASSES; ++c)
3169 {
3170 int reg_size;
3171
3172 if (TARGET_VSX && VSX_REG_CLASS_P (c))
3173 reg_size = UNITS_PER_VSX_WORD;
3174
3175 else if (c == ALTIVEC_REGS)
3176 reg_size = UNITS_PER_ALTIVEC_WORD;
3177
3178 else if (c == FLOAT_REGS)
3179 reg_size = UNITS_PER_FP_WORD;
3180
3181 else
3182 reg_size = UNITS_PER_WORD;
3183
3184 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3185 {
3186 machine_mode m2 = (machine_mode)m;
3187 int reg_size2 = reg_size;
3188
3189 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3190 in VSX. */
3191 if (TARGET_VSX && VSX_REG_CLASS_P (c) && FLOAT128_2REG_P (m))
3192 reg_size2 = UNITS_PER_FP_WORD;
3193
3194 rs6000_class_max_nregs[m][c]
3195 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
3196 }
3197 }
3198
3199 /* Calculate which modes to automatically generate code to use a the
3200 reciprocal divide and square root instructions. In the future, possibly
3201 automatically generate the instructions even if the user did not specify
3202 -mrecip. The older machines double precision reciprocal sqrt estimate is
3203 not accurate enough. */
3204 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
3205 if (TARGET_FRES)
3206 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3207 if (TARGET_FRE)
3208 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3209 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3210 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3211 if (VECTOR_UNIT_VSX_P (V2DFmode))
3212 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3213
3214 if (TARGET_FRSQRTES)
3215 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3216 if (TARGET_FRSQRTE)
3217 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3218 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3219 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3220 if (VECTOR_UNIT_VSX_P (V2DFmode))
3221 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3222
3223 if (rs6000_recip_control)
3224 {
3225 if (!flag_finite_math_only)
3226 warning (0, "%qs requires %qs or %qs", "-mrecip", "-ffinite-math",
3227 "-ffast-math");
3228 if (flag_trapping_math)
3229 warning (0, "%qs requires %qs or %qs", "-mrecip",
3230 "-fno-trapping-math", "-ffast-math");
3231 if (!flag_reciprocal_math)
3232 warning (0, "%qs requires %qs or %qs", "-mrecip", "-freciprocal-math",
3233 "-ffast-math");
3234 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
3235 {
3236 if (RS6000_RECIP_HAVE_RE_P (SFmode)
3237 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
3238 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3239
3240 if (RS6000_RECIP_HAVE_RE_P (DFmode)
3241 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
3242 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3243
3244 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
3245 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
3246 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3247
3248 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3249 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3250 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3251
3252 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3253 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3254 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3255
3256 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3257 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3258 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3259
3260 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3261 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3262 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3263
3264 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3265 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3266 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3267 }
3268 }
3269
3270 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3271 legitimate address support to figure out the appropriate addressing to
3272 use. */
3273 rs6000_setup_reg_addr_masks ();
3274
3275 if (global_init_p || TARGET_DEBUG_TARGET)
3276 {
3277 if (TARGET_DEBUG_REG)
3278 rs6000_debug_reg_global ();
3279
3280 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3281 fprintf (stderr,
3282 "SImode variable mult cost = %d\n"
3283 "SImode constant mult cost = %d\n"
3284 "SImode short constant mult cost = %d\n"
3285 "DImode multipliciation cost = %d\n"
3286 "SImode division cost = %d\n"
3287 "DImode division cost = %d\n"
3288 "Simple fp operation cost = %d\n"
3289 "DFmode multiplication cost = %d\n"
3290 "SFmode division cost = %d\n"
3291 "DFmode division cost = %d\n"
3292 "cache line size = %d\n"
3293 "l1 cache size = %d\n"
3294 "l2 cache size = %d\n"
3295 "simultaneous prefetches = %d\n"
3296 "\n",
3297 rs6000_cost->mulsi,
3298 rs6000_cost->mulsi_const,
3299 rs6000_cost->mulsi_const9,
3300 rs6000_cost->muldi,
3301 rs6000_cost->divsi,
3302 rs6000_cost->divdi,
3303 rs6000_cost->fp,
3304 rs6000_cost->dmul,
3305 rs6000_cost->sdiv,
3306 rs6000_cost->ddiv,
3307 rs6000_cost->cache_line_size,
3308 rs6000_cost->l1_cache_size,
3309 rs6000_cost->l2_cache_size,
3310 rs6000_cost->simultaneous_prefetches);
3311 }
3312 }
3313
3314 #if TARGET_MACHO
3315 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3316
3317 static void
3318 darwin_rs6000_override_options (void)
3319 {
3320 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3321 off. */
3322 rs6000_altivec_abi = 1;
3323 TARGET_ALTIVEC_VRSAVE = 1;
3324 rs6000_current_abi = ABI_DARWIN;
3325
3326 if (DEFAULT_ABI == ABI_DARWIN
3327 && TARGET_64BIT)
3328 darwin_one_byte_bool = 1;
3329
3330 if (TARGET_64BIT && ! TARGET_POWERPC64)
3331 {
3332 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3333 warning (0, "%qs requires PowerPC64 architecture, enabling", "-m64");
3334 }
3335
3336 /* The linkers [ld64] that support 64Bit do not need the JBSR longcall
3337 optimisation, and will not work with the most generic case (where the
3338 symbol is undefined external, but there is no symbl stub). */
3339 if (TARGET_64BIT)
3340 rs6000_default_long_calls = 0;
3341
3342 /* ld_classic is (so far) still used for kernel (static) code, and supports
3343 the JBSR longcall / branch islands. */
3344 if (flag_mkernel)
3345 {
3346 rs6000_default_long_calls = 1;
3347
3348 /* Allow a kext author to do -mkernel -mhard-float. */
3349 if (! (rs6000_isa_flags_explicit & OPTION_MASK_SOFT_FLOAT))
3350 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3351 }
3352
3353 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3354 Altivec. */
3355 if (!flag_mkernel && !flag_apple_kext
3356 && TARGET_64BIT
3357 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3358 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3359
3360 /* Unless the user (not the configurer) has explicitly overridden
3361 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3362 G4 unless targeting the kernel. */
3363 if (!flag_mkernel
3364 && !flag_apple_kext
3365 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3366 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3367 && ! OPTION_SET_P (rs6000_cpu_index))
3368 {
3369 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3370 }
3371 }
3372 #endif
3373
3374 /* If not otherwise specified by a target, make 'long double' equivalent to
3375 'double'. */
3376
3377 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3378 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3379 #endif
3380
3381 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3382 to clobber the XER[CA] bit because clobbering that bit without telling
3383 the compiler worked just fine with versions of GCC before GCC 5, and
3384 breaking a lot of older code in ways that are hard to track down is
3385 not such a great idea. */
3386
3387 static rtx_insn *
3388 rs6000_md_asm_adjust (vec<rtx> & /*outputs*/, vec<rtx> & /*inputs*/,
3389 vec<machine_mode> & /*input_modes*/,
3390 vec<const char *> & /*constraints*/,
3391 vec<rtx> &/*uses*/, vec<rtx> &clobbers,
3392 HARD_REG_SET &clobbered_regs, location_t /*loc*/)
3393 {
3394 clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO));
3395 SET_HARD_REG_BIT (clobbered_regs, CA_REGNO);
3396 return NULL;
3397 }
3398
3399 /* This target function is similar to the hook TARGET_OPTION_OVERRIDE
3400 but is called when the optimize level is changed via an attribute or
3401 pragma or when it is reset at the end of the code affected by the
3402 attribute or pragma. It is not called at the beginning of compilation
3403 when TARGET_OPTION_OVERRIDE is called so if you want to perform these
3404 actions then, you should have TARGET_OPTION_OVERRIDE call
3405 TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE. */
3406
3407 static void
3408 rs6000_override_options_after_change (void)
3409 {
3410 /* Explicit -funroll-loops turns -munroll-only-small-loops off, and
3411 turns -frename-registers on. */
3412 if ((OPTION_SET_P (flag_unroll_loops) && flag_unroll_loops)
3413 || (OPTION_SET_P (flag_unroll_all_loops)
3414 && flag_unroll_all_loops))
3415 {
3416 if (!OPTION_SET_P (unroll_only_small_loops))
3417 unroll_only_small_loops = 0;
3418 if (!OPTION_SET_P (flag_rename_registers))
3419 flag_rename_registers = 1;
3420 if (!OPTION_SET_P (flag_cunroll_grow_size))
3421 flag_cunroll_grow_size = 1;
3422 }
3423 else if (!OPTION_SET_P (flag_cunroll_grow_size))
3424 flag_cunroll_grow_size = flag_peel_loops || optimize >= 3;
3425
3426 /* If we are inserting ROP-protect instructions, disable shrink wrap. */
3427 if (rs6000_rop_protect)
3428 flag_shrink_wrap = 0;
3429 }
3430
3431 #ifdef TARGET_USES_LINUX64_OPT
3432 static void
3433 rs6000_linux64_override_options ()
3434 {
3435 if (!OPTION_SET_P (rs6000_alignment_flags))
3436 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
3437 if (rs6000_isa_flags & OPTION_MASK_64BIT)
3438 {
3439 if (DEFAULT_ABI != ABI_AIX)
3440 {
3441 rs6000_current_abi = ABI_AIX;
3442 error (INVALID_64BIT, "call");
3443 }
3444 dot_symbols = !strcmp (rs6000_abi_name, "aixdesc");
3445 if (ELFv2_ABI_CHECK)
3446 {
3447 rs6000_current_abi = ABI_ELFv2;
3448 if (dot_symbols)
3449 error ("%<-mcall-aixdesc%> incompatible with %<-mabi=elfv2%>");
3450 }
3451 if (rs6000_isa_flags & OPTION_MASK_RELOCATABLE)
3452 {
3453 rs6000_isa_flags &= ~OPTION_MASK_RELOCATABLE;
3454 error (INVALID_64BIT, "relocatable");
3455 }
3456 if (rs6000_isa_flags & OPTION_MASK_EABI)
3457 {
3458 rs6000_isa_flags &= ~OPTION_MASK_EABI;
3459 error (INVALID_64BIT, "eabi");
3460 }
3461 if (TARGET_PROTOTYPE)
3462 {
3463 target_prototype = 0;
3464 error (INVALID_64BIT, "prototype");
3465 }
3466 if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) == 0)
3467 {
3468 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3469 error ("%<-m64%> requires a PowerPC64 cpu");
3470 }
3471 if (!OPTION_SET_P (rs6000_current_cmodel))
3472 SET_CMODEL (CMODEL_MEDIUM);
3473 if ((rs6000_isa_flags_explicit & OPTION_MASK_MINIMAL_TOC) != 0)
3474 {
3475 if (OPTION_SET_P (rs6000_current_cmodel)
3476 && rs6000_current_cmodel != CMODEL_SMALL)
3477 error ("%<-mcmodel%> incompatible with other toc options");
3478 if (TARGET_MINIMAL_TOC)
3479 SET_CMODEL (CMODEL_SMALL);
3480 else if (TARGET_PCREL
3481 || (PCREL_SUPPORTED_BY_OS
3482 && (rs6000_isa_flags_explicit & OPTION_MASK_PCREL) == 0))
3483 /* Ignore -mno-minimal-toc. */
3484 ;
3485 else
3486 SET_CMODEL (CMODEL_SMALL);
3487 }
3488 if (rs6000_current_cmodel != CMODEL_SMALL)
3489 {
3490 if (!OPTION_SET_P (TARGET_NO_FP_IN_TOC))
3491 TARGET_NO_FP_IN_TOC = rs6000_current_cmodel == CMODEL_MEDIUM;
3492 if (!OPTION_SET_P (TARGET_NO_SUM_IN_TOC))
3493 TARGET_NO_SUM_IN_TOC = 0;
3494 }
3495 if (TARGET_PLTSEQ && DEFAULT_ABI != ABI_ELFv2)
3496 {
3497 if (OPTION_SET_P (rs6000_pltseq))
3498 warning (0, "%qs unsupported for this ABI",
3499 "-mpltseq");
3500 rs6000_pltseq = false;
3501 }
3502 }
3503 else if (TARGET_64BIT)
3504 error (INVALID_32BIT, "32");
3505 else
3506 {
3507 if (TARGET_PROFILE_KERNEL)
3508 {
3509 profile_kernel = 0;
3510 error (INVALID_32BIT, "profile-kernel");
3511 }
3512 if (OPTION_SET_P (rs6000_current_cmodel))
3513 {
3514 SET_CMODEL (CMODEL_SMALL);
3515 error (INVALID_32BIT, "cmodel");
3516 }
3517 }
3518 }
3519 #endif
3520
3521 /* Return true if we are using GLIBC, and it supports IEEE 128-bit long double.
3522 This support is only in little endian GLIBC 2.32 or newer. */
3523 static bool
3524 glibc_supports_ieee_128bit (void)
3525 {
3526 #ifdef OPTION_GLIBC
3527 if (OPTION_GLIBC && !BYTES_BIG_ENDIAN
3528 && ((TARGET_GLIBC_MAJOR * 1000) + TARGET_GLIBC_MINOR) >= 2032)
3529 return true;
3530 #endif /* OPTION_GLIBC. */
3531
3532 return false;
3533 }
3534
3535 /* Override command line options.
3536
3537 Combine build-specific configuration information with options
3538 specified on the command line to set various state variables which
3539 influence code generation, optimization, and expansion of built-in
3540 functions. Assure that command-line configuration preferences are
3541 compatible with each other and with the build configuration; issue
3542 warnings while adjusting configuration or error messages while
3543 rejecting configuration.
3544
3545 Upon entry to this function:
3546
3547 This function is called once at the beginning of
3548 compilation, and then again at the start and end of compiling
3549 each section of code that has a different configuration, as
3550 indicated, for example, by adding the
3551
3552 __attribute__((__target__("cpu=power9")))
3553
3554 qualifier to a function definition or, for example, by bracketing
3555 code between
3556
3557 #pragma GCC target("altivec")
3558
3559 and
3560
3561 #pragma GCC reset_options
3562
3563 directives. Parameter global_init_p is true for the initial
3564 invocation, which initializes global variables, and false for all
3565 subsequent invocations.
3566
3567
3568 Various global state information is assumed to be valid. This
3569 includes OPTION_TARGET_CPU_DEFAULT, representing the name of the
3570 default CPU specified at build configure time, TARGET_DEFAULT,
3571 representing the default set of option flags for the default
3572 target, and OPTION_SET_P (rs6000_isa_flags), representing
3573 which options were requested on the command line.
3574
3575 Upon return from this function:
3576
3577 rs6000_isa_flags_explicit has a non-zero bit for each flag that
3578 was set by name on the command line. Additionally, if certain
3579 attributes are automatically enabled or disabled by this function
3580 in order to assure compatibility between options and
3581 configuration, the flags associated with those attributes are
3582 also set. By setting these "explicit bits", we avoid the risk
3583 that other code might accidentally overwrite these particular
3584 attributes with "default values".
3585
3586 The various bits of rs6000_isa_flags are set to indicate the
3587 target options that have been selected for the most current
3588 compilation efforts. This has the effect of also turning on the
3589 associated TARGET_XXX values since these are macros which are
3590 generally defined to test the corresponding bit of the
3591 rs6000_isa_flags variable.
3592
3593 Various other global variables and fields of global structures
3594 (over 50 in all) are initialized to reflect the desired options
3595 for the most current compilation efforts. */
3596
3597 static bool
3598 rs6000_option_override_internal (bool global_init_p)
3599 {
3600 bool ret = true;
3601
3602 HOST_WIDE_INT set_masks;
3603 HOST_WIDE_INT ignore_masks;
3604 int cpu_index = -1;
3605 int tune_index;
3606 struct cl_target_option *main_target_opt
3607 = ((global_init_p || target_option_default_node == NULL)
3608 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
3609
3610 /* Print defaults. */
3611 if ((TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) && global_init_p)
3612 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
3613
3614 /* Remember the explicit arguments. */
3615 if (global_init_p)
3616 rs6000_isa_flags_explicit = OPTION_SET_P (rs6000_isa_flags);
3617
3618 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3619 library functions, so warn about it. The flag may be useful for
3620 performance studies from time to time though, so don't disable it
3621 entirely. */
3622 if (OPTION_SET_P (rs6000_alignment_flags)
3623 && rs6000_alignment_flags == MASK_ALIGN_POWER
3624 && DEFAULT_ABI == ABI_DARWIN
3625 && TARGET_64BIT)
3626 warning (0, "%qs is not supported for 64-bit Darwin;"
3627 " it is incompatible with the installed C and C++ libraries",
3628 "-malign-power");
3629
3630 /* Numerous experiment shows that IRA based loop pressure
3631 calculation works better for RTL loop invariant motion on targets
3632 with enough (>= 32) registers. It is an expensive optimization.
3633 So it is on only for peak performance. */
3634 if (optimize >= 3 && global_init_p
3635 && !OPTION_SET_P (flag_ira_loop_pressure))
3636 flag_ira_loop_pressure = 1;
3637
3638 /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in order
3639 for tracebacks to be complete but not if any -fasynchronous-unwind-tables
3640 options were already specified. */
3641 if (flag_sanitize & SANITIZE_USER_ADDRESS
3642 && !OPTION_SET_P (flag_asynchronous_unwind_tables))
3643 flag_asynchronous_unwind_tables = 1;
3644
3645 /* -fvariable-expansion-in-unroller is a win for POWER whenever the
3646 loop unroller is active. It is only checked during unrolling, so
3647 we can just set it on by default. */
3648 if (!OPTION_SET_P (flag_variable_expansion_in_unroller))
3649 flag_variable_expansion_in_unroller = 1;
3650
3651 /* Set the pointer size. */
3652 if (TARGET_64BIT)
3653 {
3654 rs6000_pmode = DImode;
3655 rs6000_pointer_size = 64;
3656 }
3657 else
3658 {
3659 rs6000_pmode = SImode;
3660 rs6000_pointer_size = 32;
3661 }
3662
3663 /* Some OSs don't support saving Altivec registers. On those OSs, we don't
3664 touch the OPTION_MASK_ALTIVEC settings; if the user wants it, the user
3665 must explicitly specify it and we won't interfere with the user's
3666 specification. */
3667
3668 set_masks = POWERPC_MASKS;
3669 #ifdef OS_MISSING_ALTIVEC
3670 if (OS_MISSING_ALTIVEC)
3671 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX
3672 | OTHER_VSX_VECTOR_MASKS);
3673 #endif
3674
3675 /* Don't override by the processor default if given explicitly. */
3676 set_masks &= ~rs6000_isa_flags_explicit;
3677
3678 /* Without option powerpc64 specified explicitly, we need to ensure
3679 powerpc64 always enabled for 64 bit here, otherwise some following
3680 checks can use unexpected TARGET_POWERPC64 value. */
3681 if (!(rs6000_isa_flags_explicit & OPTION_MASK_POWERPC64)
3682 && TARGET_64BIT)
3683 {
3684 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3685 /* Need to stop powerpc64 from being unset in later processing,
3686 so clear it in set_masks. But as PR108240 shows, to keep it
3687 consistent with before, we want to make this only if 64 bit
3688 is enabled explicitly. This is a hack, revisit this later. */
3689 if (rs6000_isa_flags_explicit & OPTION_MASK_64BIT)
3690 set_masks &= ~OPTION_MASK_POWERPC64;
3691 }
3692
3693 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3694 the cpu in a target attribute or pragma, but did not specify a tuning
3695 option, use the cpu for the tuning option rather than the option specified
3696 with -mtune on the command line. Process a '--with-cpu' configuration
3697 request as an implicit --cpu. */
3698 if (rs6000_cpu_index >= 0)
3699 cpu_index = rs6000_cpu_index;
3700 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
3701 cpu_index = main_target_opt->x_rs6000_cpu_index;
3702 else if (OPTION_TARGET_CPU_DEFAULT)
3703 cpu_index = rs6000_cpu_name_lookup (OPTION_TARGET_CPU_DEFAULT);
3704
3705 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3706 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3707 with those from the cpu, except for options that were explicitly set. If
3708 we don't have a cpu, do not override the target bits set in
3709 TARGET_DEFAULT. */
3710 if (cpu_index >= 0)
3711 {
3712 rs6000_cpu_index = cpu_index;
3713 rs6000_isa_flags &= ~set_masks;
3714 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3715 & set_masks);
3716 }
3717 else
3718 {
3719 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3720 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
3721 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
3722 to using rs6000_isa_flags, we need to do the initialization here.
3723
3724 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
3725 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
3726 HOST_WIDE_INT flags;
3727 if (TARGET_DEFAULT)
3728 flags = TARGET_DEFAULT;
3729 else
3730 {
3731 /* PowerPC 64-bit LE requires at least ISA 2.07. */
3732 const char *default_cpu = (!TARGET_POWERPC64
3733 ? "powerpc"
3734 : (BYTES_BIG_ENDIAN
3735 ? "powerpc64"
3736 : "powerpc64le"));
3737 int default_cpu_index = rs6000_cpu_name_lookup (default_cpu);
3738 flags = processor_target_table[default_cpu_index].target_enable;
3739 }
3740 rs6000_isa_flags |= (flags & ~rs6000_isa_flags_explicit);
3741 }
3742
3743 /* Don't expect powerpc64 enabled on those OSes with OS_MISSING_POWERPC64,
3744 since they do not save and restore the high half of the GPRs correctly
3745 in all cases. If the user explicitly specifies it, we won't interfere
3746 with the user's specification. */
3747 #ifdef OS_MISSING_POWERPC64
3748 if (OS_MISSING_POWERPC64
3749 && TARGET_32BIT
3750 && TARGET_POWERPC64
3751 && !(rs6000_isa_flags_explicit & OPTION_MASK_POWERPC64))
3752 rs6000_isa_flags &= ~OPTION_MASK_POWERPC64;
3753 #endif
3754
3755 if (rs6000_tune_index >= 0)
3756 tune_index = rs6000_tune_index;
3757 else if (cpu_index >= 0)
3758 rs6000_tune_index = tune_index = cpu_index;
3759 else
3760 {
3761 size_t i;
3762 enum processor_type tune_proc
3763 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
3764
3765 tune_index = -1;
3766 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
3767 if (processor_target_table[i].processor == tune_proc)
3768 {
3769 tune_index = i;
3770 break;
3771 }
3772 }
3773
3774 if (cpu_index >= 0)
3775 rs6000_cpu = processor_target_table[cpu_index].processor;
3776 else
3777 rs6000_cpu = TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT;
3778
3779 gcc_assert (tune_index >= 0);
3780 rs6000_tune = processor_target_table[tune_index].processor;
3781
3782 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
3783 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
3784 || rs6000_cpu == PROCESSOR_PPCE5500)
3785 {
3786 if (TARGET_ALTIVEC)
3787 error ("AltiVec not supported in this target");
3788 }
3789
3790 /* If we are optimizing big endian systems for space, use the load/store
3791 multiple instructions. */
3792 if (BYTES_BIG_ENDIAN && optimize_size)
3793 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE;
3794
3795 /* Don't allow -mmultiple on little endian systems unless the cpu is a 750,
3796 because the hardware doesn't support the instructions used in little
3797 endian mode, and causes an alignment trap. The 750 does not cause an
3798 alignment trap (except when the target is unaligned). */
3799
3800 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750 && TARGET_MULTIPLE)
3801 {
3802 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
3803 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
3804 warning (0, "%qs is not supported on little endian systems",
3805 "-mmultiple");
3806 }
3807
3808 /* If little-endian, default to -mstrict-align on older processors.
3809 Testing for direct_move matches power8 and later. */
3810 if (!BYTES_BIG_ENDIAN
3811 && !(processor_target_table[tune_index].target_enable
3812 & OPTION_MASK_DIRECT_MOVE))
3813 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
3814
3815 /* Add some warnings for VSX. */
3816 if (TARGET_VSX)
3817 {
3818 const char *msg = NULL;
3819 if (!TARGET_HARD_FLOAT)
3820 {
3821 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3822 msg = N_("%<-mvsx%> requires hardware floating point");
3823 else
3824 {
3825 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3826 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3827 }
3828 }
3829 else if (TARGET_AVOID_XFORM > 0)
3830 msg = N_("%<-mvsx%> needs indexed addressing");
3831 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
3832 & OPTION_MASK_ALTIVEC))
3833 {
3834 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3835 msg = N_("%<-mvsx%> and %<-mno-altivec%> are incompatible");
3836 else
3837 msg = N_("%<-mno-altivec%> disables vsx");
3838 }
3839
3840 if (msg)
3841 {
3842 warning (0, msg);
3843 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3844 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3845 }
3846 }
3847
3848 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
3849 the -mcpu setting to enable options that conflict. */
3850 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
3851 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
3852 | OPTION_MASK_ALTIVEC
3853 | OPTION_MASK_VSX)) != 0)
3854 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
3855 | OPTION_MASK_DIRECT_MOVE)
3856 & ~rs6000_isa_flags_explicit);
3857
3858 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3859 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
3860
3861 #ifdef XCOFF_DEBUGGING_INFO
3862 /* For AIX default to 64-bit DWARF. */
3863 if (!OPTION_SET_P (dwarf_offset_size))
3864 dwarf_offset_size = POINTER_SIZE_UNITS;
3865 #endif
3866
3867 /* Handle explicit -mno-{altivec,vsx,power8-vector,power9-vector} and turn
3868 off all of the options that depend on those flags. */
3869 ignore_masks = rs6000_disable_incompatible_switches ();
3870
3871 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
3872 unless the user explicitly used the -mno-<option> to disable the code. */
3873 if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_MISC)
3874 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
3875 else if (TARGET_P9_MINMAX)
3876 {
3877 if (cpu_index >= 0)
3878 {
3879 if (cpu_index == PROCESSOR_POWER9)
3880 {
3881 /* legacy behavior: allow -mcpu=power9 with certain
3882 capabilities explicitly disabled. */
3883 rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks);
3884 }
3885 else
3886 error ("power9 target option is incompatible with %<%s=<xxx>%> "
3887 "for <xxx> less than power9", "-mcpu");
3888 }
3889 else if ((ISA_3_0_MASKS_SERVER & rs6000_isa_flags_explicit)
3890 != (ISA_3_0_MASKS_SERVER & rs6000_isa_flags
3891 & rs6000_isa_flags_explicit))
3892 /* Enforce that none of the ISA_3_0_MASKS_SERVER flags
3893 were explicitly cleared. */
3894 error ("%qs incompatible with explicitly disabled options",
3895 "-mpower9-minmax");
3896 else
3897 rs6000_isa_flags |= ISA_3_0_MASKS_SERVER;
3898 }
3899 else if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
3900 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~ignore_masks);
3901 else if (TARGET_VSX)
3902 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~ignore_masks);
3903 else if (TARGET_POPCNTD)
3904 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~ignore_masks);
3905 else if (TARGET_DFP)
3906 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~ignore_masks);
3907 else if (TARGET_CMPB)
3908 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~ignore_masks);
3909 else if (TARGET_FPRND)
3910 rs6000_isa_flags |= (ISA_2_4_MASKS & ~ignore_masks);
3911 else if (TARGET_POPCNTB)
3912 rs6000_isa_flags |= (ISA_2_2_MASKS & ~ignore_masks);
3913 else if (TARGET_ALTIVEC)
3914 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~ignore_masks);
3915
3916 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
3917 target attribute or pragma which automatically enables both options,
3918 unless the altivec ABI was set. This is set by default for 64-bit, but
3919 not for 32-bit. Don't move this before the above code using ignore_masks,
3920 since it can reset the cleared VSX/ALTIVEC flag again. */
3921 if (main_target_opt && !main_target_opt->x_rs6000_altivec_abi)
3922 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC)
3923 & ~rs6000_isa_flags_explicit);
3924
3925 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
3926 {
3927 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
3928 error ("%qs requires %qs", "-mcrypto", "-maltivec");
3929 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
3930 }
3931
3932 if (!TARGET_FPRND && TARGET_VSX)
3933 {
3934 if (rs6000_isa_flags_explicit & OPTION_MASK_FPRND)
3935 /* TARGET_VSX = 1 implies Power 7 and newer */
3936 error ("%qs requires %qs", "-mvsx", "-mfprnd");
3937 rs6000_isa_flags &= ~OPTION_MASK_FPRND;
3938 }
3939
3940 if (TARGET_DIRECT_MOVE && !TARGET_VSX)
3941 {
3942 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
3943 error ("%qs requires %qs", "-mdirect-move", "-mvsx");
3944 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
3945 }
3946
3947 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
3948 {
3949 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3950 error ("%qs requires %qs", "-mpower8-vector", "-maltivec");
3951 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3952 }
3953
3954 if (TARGET_P8_VECTOR && !TARGET_VSX)
3955 {
3956 if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3957 && (rs6000_isa_flags_explicit & OPTION_MASK_VSX))
3958 error ("%qs requires %qs", "-mpower8-vector", "-mvsx");
3959 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR) == 0)
3960 {
3961 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3962 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3963 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
3964 }
3965 else
3966 {
3967 /* OPTION_MASK_P8_VECTOR is explicit, and OPTION_MASK_VSX is
3968 not explicit. */
3969 rs6000_isa_flags |= OPTION_MASK_VSX;
3970 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3971 }
3972 }
3973
3974 if (TARGET_DFP && !TARGET_HARD_FLOAT)
3975 {
3976 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
3977 error ("%qs requires %qs", "-mhard-dfp", "-mhard-float");
3978 rs6000_isa_flags &= ~OPTION_MASK_DFP;
3979 }
3980
3981 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
3982 silently turn off quad memory mode. */
3983 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
3984 {
3985 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3986 warning (0, N_("%<-mquad-memory%> requires 64-bit mode"));
3987
3988 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
3989 warning (0, N_("%<-mquad-memory-atomic%> requires 64-bit mode"));
3990
3991 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
3992 | OPTION_MASK_QUAD_MEMORY_ATOMIC);
3993 }
3994
3995 /* Non-atomic quad memory load/store are disabled for little endian, since
3996 the words are reversed, but atomic operations can still be done by
3997 swapping the words. */
3998 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
3999 {
4000 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
4001 warning (0, N_("%<-mquad-memory%> is not available in little endian "
4002 "mode"));
4003
4004 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
4005 }
4006
4007 /* Assume if the user asked for normal quad memory instructions, they want
4008 the atomic versions as well, unless they explicity told us not to use quad
4009 word atomic instructions. */
4010 if (TARGET_QUAD_MEMORY
4011 && !TARGET_QUAD_MEMORY_ATOMIC
4012 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
4013 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
4014
4015 /* If we can shrink-wrap the TOC register save separately, then use
4016 -msave-toc-indirect unless explicitly disabled. */
4017 if ((rs6000_isa_flags_explicit & OPTION_MASK_SAVE_TOC_INDIRECT) == 0
4018 && flag_shrink_wrap_separate
4019 && optimize_function_for_speed_p (cfun))
4020 rs6000_isa_flags |= OPTION_MASK_SAVE_TOC_INDIRECT;
4021
4022 /* Enable power8 fusion if we are tuning for power8, even if we aren't
4023 generating power8 instructions. Power9 does not optimize power8 fusion
4024 cases. */
4025 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
4026 {
4027 if (processor_target_table[tune_index].processor == PROCESSOR_POWER8)
4028 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4029 else
4030 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
4031 }
4032
4033 /* Setting additional fusion flags turns on base fusion. */
4034 if (!TARGET_P8_FUSION && TARGET_P8_FUSION_SIGN)
4035 {
4036 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)
4037 {
4038 if (TARGET_P8_FUSION_SIGN)
4039 error ("%qs requires %qs", "-mpower8-fusion-sign",
4040 "-mpower8-fusion");
4041
4042 rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION;
4043 }
4044 else
4045 rs6000_isa_flags |= OPTION_MASK_P8_FUSION;
4046 }
4047
4048 /* Power8 does not fuse sign extended loads with the addis. If we are
4049 optimizing at high levels for speed, convert a sign extended load into a
4050 zero extending load, and an explicit sign extension. */
4051 if (TARGET_P8_FUSION
4052 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
4053 && optimize_function_for_speed_p (cfun)
4054 && optimize >= 3)
4055 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
4056
4057 /* ISA 3.0 vector instructions include ISA 2.07. */
4058 if (TARGET_P9_VECTOR && !TARGET_P8_VECTOR)
4059 {
4060 /* We prefer to not mention undocumented options in
4061 error messages. However, if users have managed to select
4062 power9-vector without selecting power8-vector, they
4063 already know about undocumented flags. */
4064 if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) &&
4065 (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR))
4066 error ("%qs requires %qs", "-mpower9-vector", "-mpower8-vector");
4067 else if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) == 0)
4068 {
4069 rs6000_isa_flags &= ~OPTION_MASK_P9_VECTOR;
4070 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
4071 rs6000_isa_flags_explicit |= OPTION_MASK_P9_VECTOR;
4072 }
4073 else
4074 {
4075 /* OPTION_MASK_P9_VECTOR is explicit and
4076 OPTION_MASK_P8_VECTOR is not explicit. */
4077 rs6000_isa_flags |= OPTION_MASK_P8_VECTOR;
4078 rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR;
4079 }
4080 }
4081
4082 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
4083 support. If we only have ISA 2.06 support, and the user did not specify
4084 the switch, leave it set to -1 so the movmisalign patterns are enabled,
4085 but we don't enable the full vectorization support */
4086 if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)
4087 TARGET_ALLOW_MOVMISALIGN = 1;
4088
4089 else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX)
4090 {
4091 if (TARGET_ALLOW_MOVMISALIGN > 0
4092 && OPTION_SET_P (TARGET_ALLOW_MOVMISALIGN))
4093 error ("%qs requires %qs", "-mallow-movmisalign", "-mvsx");
4094
4095 TARGET_ALLOW_MOVMISALIGN = 0;
4096 }
4097
4098 /* Determine when unaligned vector accesses are permitted, and when
4099 they are preferred over masked Altivec loads. Note that if
4100 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
4101 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
4102 not true. */
4103 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4104 {
4105 if (!TARGET_VSX)
4106 {
4107 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4108 error ("%qs requires %qs", "-mefficient-unaligned-vsx", "-mvsx");
4109
4110 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4111 }
4112
4113 else if (!TARGET_ALLOW_MOVMISALIGN)
4114 {
4115 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
4116 error ("%qs requires %qs", "-munefficient-unaligned-vsx",
4117 "-mallow-movmisalign");
4118
4119 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
4120 }
4121 }
4122
4123 if (!(rs6000_isa_flags_explicit & OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX))
4124 {
4125 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4126 rs6000_isa_flags |= OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX;
4127 else
4128 rs6000_isa_flags &= ~OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX;
4129 }
4130
4131 /* Use long double size to select the appropriate long double. We use
4132 TYPE_PRECISION to differentiate the 3 different long double types. We map
4133 128 into the precision used for TFmode. */
4134 int default_long_double_size = (RS6000_DEFAULT_LONG_DOUBLE_SIZE == 64
4135 ? 64
4136 : FLOAT_PRECISION_TFmode);
4137
4138 /* Set long double size before the IEEE 128-bit tests. */
4139 if (!OPTION_SET_P (rs6000_long_double_type_size))
4140 {
4141 if (main_target_opt != NULL
4142 && (main_target_opt->x_rs6000_long_double_type_size
4143 != default_long_double_size))
4144 error ("target attribute or pragma changes %<long double%> size");
4145 else
4146 rs6000_long_double_type_size = default_long_double_size;
4147 }
4148 else if (rs6000_long_double_type_size == FLOAT_PRECISION_TFmode)
4149 ; /* The option value can be seen when cl_target_option_restore is called. */
4150 else if (rs6000_long_double_type_size == 128)
4151 rs6000_long_double_type_size = FLOAT_PRECISION_TFmode;
4152
4153 /* Set -mabi=ieeelongdouble on some old targets. In the future, power server
4154 systems will also set long double to be IEEE 128-bit. AIX and Darwin
4155 explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
4156 those systems will not pick up this default. Warn if the user changes the
4157 default unless -Wno-psabi. */
4158 if (!OPTION_SET_P (rs6000_ieeequad))
4159 rs6000_ieeequad = TARGET_IEEEQUAD_DEFAULT;
4160
4161 else if (TARGET_LONG_DOUBLE_128)
4162 {
4163 if (global_options.x_rs6000_ieeequad
4164 && (!TARGET_POPCNTD || !TARGET_VSX))
4165 error ("%qs requires full ISA 2.06 support", "-mabi=ieeelongdouble");
4166
4167 if (rs6000_ieeequad != TARGET_IEEEQUAD_DEFAULT)
4168 {
4169 /* Determine if the user can change the default long double type at
4170 compilation time. You need GLIBC 2.32 or newer to be able to
4171 change the long double type. Only issue one warning. */
4172 static bool warned_change_long_double;
4173
4174 if (!warned_change_long_double && !glibc_supports_ieee_128bit ())
4175 {
4176 warned_change_long_double = true;
4177 if (TARGET_IEEEQUAD)
4178 warning (OPT_Wpsabi, "Using IEEE extended precision "
4179 "%<long double%>");
4180 else
4181 warning (OPT_Wpsabi, "Using IBM extended precision "
4182 "%<long double%>");
4183 }
4184 }
4185 }
4186
4187 /* Enable the default support for IEEE 128-bit floating point on Linux VSX
4188 sytems. In GCC 7, we would enable the IEEE 128-bit floating point
4189 infrastructure (-mfloat128-type) but not enable the actual __float128 type
4190 unless the user used the explicit -mfloat128. In GCC 8, we enable both
4191 the keyword as well as the type. */
4192 TARGET_FLOAT128_TYPE = TARGET_FLOAT128_ENABLE_TYPE && TARGET_VSX;
4193
4194 /* IEEE 128-bit floating point requires VSX support. */
4195 if (TARGET_FLOAT128_KEYWORD)
4196 {
4197 if (!TARGET_VSX)
4198 {
4199 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0)
4200 error ("%qs requires VSX support", "-mfloat128");
4201
4202 TARGET_FLOAT128_TYPE = 0;
4203 rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_KEYWORD
4204 | OPTION_MASK_FLOAT128_HW);
4205 }
4206 else if (!TARGET_FLOAT128_TYPE)
4207 {
4208 TARGET_FLOAT128_TYPE = 1;
4209 warning (0, "The %<-mfloat128%> option may not be fully supported");
4210 }
4211 }
4212
4213 /* Enable the __float128 keyword under Linux by default. */
4214 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_KEYWORD
4215 && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) == 0)
4216 rs6000_isa_flags |= OPTION_MASK_FLOAT128_KEYWORD;
4217
4218 /* If we have are supporting the float128 type and full ISA 3.0 support,
4219 enable -mfloat128-hardware by default. However, don't enable the
4220 __float128 keyword if it was explicitly turned off. 64-bit mode is needed
4221 because sometimes the compiler wants to put things in an integer
4222 container, and if we don't have __int128 support, it is impossible. */
4223 if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_HW && TARGET_64BIT
4224 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) == ISA_3_0_MASKS_IEEE
4225 && !(rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW))
4226 rs6000_isa_flags |= OPTION_MASK_FLOAT128_HW;
4227
4228 if (TARGET_FLOAT128_HW
4229 && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) != ISA_3_0_MASKS_IEEE)
4230 {
4231 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4232 error ("%qs requires full ISA 3.0 support", "%<-mfloat128-hardware%>");
4233
4234 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4235 }
4236
4237 if (TARGET_FLOAT128_HW && !TARGET_64BIT)
4238 {
4239 if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
4240 error ("%qs requires %qs", "%<-mfloat128-hardware%>", "-m64");
4241
4242 rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW;
4243 }
4244
4245 /* Enable -mprefixed by default on power10 systems. */
4246 if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED) == 0)
4247 rs6000_isa_flags |= OPTION_MASK_PREFIXED;
4248
4249 /* -mprefixed requires -mcpu=power10 (or later). */
4250 else if (TARGET_PREFIXED && !TARGET_POWER10)
4251 {
4252 if ((rs6000_isa_flags_explicit & OPTION_MASK_PREFIXED) != 0)
4253 error ("%qs requires %qs", "-mprefixed", "-mcpu=power10");
4254
4255 rs6000_isa_flags &= ~OPTION_MASK_PREFIXED;
4256 }
4257
4258 /* -mpcrel requires prefixed load/store addressing. */
4259 if (TARGET_PCREL && !TARGET_PREFIXED)
4260 {
4261 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4262 error ("%qs requires %qs", "-mpcrel", "-mprefixed");
4263
4264 rs6000_isa_flags &= ~OPTION_MASK_PCREL;
4265 }
4266
4267 /* Print the options after updating the defaults. */
4268 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4269 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
4270
4271 /* E500mc does "better" if we inline more aggressively. Respect the
4272 user's opinion, though. */
4273 if (rs6000_block_move_inline_limit == 0
4274 && (rs6000_tune == PROCESSOR_PPCE500MC
4275 || rs6000_tune == PROCESSOR_PPCE500MC64
4276 || rs6000_tune == PROCESSOR_PPCE5500
4277 || rs6000_tune == PROCESSOR_PPCE6500))
4278 rs6000_block_move_inline_limit = 128;
4279
4280 /* store_one_arg depends on expand_block_move to handle at least the
4281 size of reg_parm_stack_space. */
4282 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
4283 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
4284
4285 if (global_init_p)
4286 {
4287 /* If the appropriate debug option is enabled, replace the target hooks
4288 with debug versions that call the real version and then prints
4289 debugging information. */
4290 if (TARGET_DEBUG_COST)
4291 {
4292 targetm.rtx_costs = rs6000_debug_rtx_costs;
4293 targetm.address_cost = rs6000_debug_address_cost;
4294 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
4295 }
4296
4297 if (TARGET_DEBUG_ADDR)
4298 {
4299 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
4300 targetm.legitimize_address = rs6000_debug_legitimize_address;
4301 rs6000_secondary_reload_class_ptr
4302 = rs6000_debug_secondary_reload_class;
4303 targetm.secondary_memory_needed
4304 = rs6000_debug_secondary_memory_needed;
4305 targetm.can_change_mode_class
4306 = rs6000_debug_can_change_mode_class;
4307 rs6000_preferred_reload_class_ptr
4308 = rs6000_debug_preferred_reload_class;
4309 rs6000_mode_dependent_address_ptr
4310 = rs6000_debug_mode_dependent_address;
4311 }
4312
4313 if (rs6000_veclibabi_name)
4314 {
4315 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
4316 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
4317 else
4318 {
4319 error ("unknown vectorization library ABI type in "
4320 "%<-mveclibabi=%s%>", rs6000_veclibabi_name);
4321 ret = false;
4322 }
4323 }
4324 }
4325
4326 /* Enable Altivec ABI for AIX -maltivec. */
4327 if (TARGET_XCOFF
4328 && (TARGET_ALTIVEC || TARGET_VSX)
4329 && !OPTION_SET_P (rs6000_altivec_abi))
4330 {
4331 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
4332 error ("target attribute or pragma changes AltiVec ABI");
4333 else
4334 rs6000_altivec_abi = 1;
4335 }
4336
4337 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
4338 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
4339 be explicitly overridden in either case. */
4340 if (TARGET_ELF)
4341 {
4342 if (!OPTION_SET_P (rs6000_altivec_abi)
4343 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
4344 {
4345 if (main_target_opt != NULL &&
4346 !main_target_opt->x_rs6000_altivec_abi)
4347 error ("target attribute or pragma changes AltiVec ABI");
4348 else
4349 rs6000_altivec_abi = 1;
4350 }
4351 }
4352
4353 /* Set the Darwin64 ABI as default for 64-bit Darwin.
4354 So far, the only darwin64 targets are also MACH-O. */
4355 if (TARGET_MACHO
4356 && DEFAULT_ABI == ABI_DARWIN
4357 && TARGET_64BIT)
4358 {
4359 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
4360 error ("target attribute or pragma changes darwin64 ABI");
4361 else
4362 {
4363 rs6000_darwin64_abi = 1;
4364 /* Default to natural alignment, for better performance. */
4365 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
4366 }
4367 }
4368
4369 /* Place FP constants in the constant pool instead of TOC
4370 if section anchors enabled. */
4371 if (flag_section_anchors
4372 && !OPTION_SET_P (TARGET_NO_FP_IN_TOC))
4373 TARGET_NO_FP_IN_TOC = 1;
4374
4375 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4376 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
4377
4378 #ifdef SUBTARGET_OVERRIDE_OPTIONS
4379 SUBTARGET_OVERRIDE_OPTIONS;
4380 #endif
4381 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
4382 SUBSUBTARGET_OVERRIDE_OPTIONS;
4383 #endif
4384 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
4385 SUB3TARGET_OVERRIDE_OPTIONS;
4386 #endif
4387
4388 /* If the ABI has support for PC-relative relocations, enable it by default.
4389 This test depends on the sub-target tests above setting the code model to
4390 medium for ELF v2 systems. */
4391 if (PCREL_SUPPORTED_BY_OS
4392 && (rs6000_isa_flags_explicit & OPTION_MASK_PCREL) == 0)
4393 rs6000_isa_flags |= OPTION_MASK_PCREL;
4394
4395 /* -mpcrel requires -mcmodel=medium, but we can't check TARGET_CMODEL until
4396 after the subtarget override options are done. */
4397 else if (TARGET_PCREL && TARGET_CMODEL != CMODEL_MEDIUM)
4398 {
4399 if ((rs6000_isa_flags_explicit & OPTION_MASK_PCREL) != 0)
4400 error ("%qs requires %qs", "-mpcrel", "-mcmodel=medium");
4401
4402 rs6000_isa_flags &= ~OPTION_MASK_PCREL;
4403 }
4404
4405 /* Enable -mmma by default on power10 systems. */
4406 if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_MMA) == 0)
4407 rs6000_isa_flags |= OPTION_MASK_MMA;
4408
4409 /* Turn off vector pair/mma options on non-power10 systems. */
4410 else if (!TARGET_POWER10 && TARGET_MMA)
4411 {
4412 if ((rs6000_isa_flags_explicit & OPTION_MASK_MMA) != 0)
4413 error ("%qs requires %qs", "-mmma", "-mcpu=power10");
4414
4415 rs6000_isa_flags &= ~OPTION_MASK_MMA;
4416 }
4417
4418 /* Enable power10 fusion if we are tuning for power10, even if we aren't
4419 generating power10 instructions. */
4420 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION))
4421 {
4422 if (rs6000_tune == PROCESSOR_POWER10)
4423 rs6000_isa_flags |= OPTION_MASK_P10_FUSION;
4424 else
4425 rs6000_isa_flags &= ~OPTION_MASK_P10_FUSION;
4426 }
4427
4428 /* MMA requires SIMD support as ISA 3.1 claims and our implementation
4429 such as "*movoo" uses vector pair access which use VSX registers.
4430 So make MMA require VSX support here. */
4431 if (TARGET_MMA && !TARGET_VSX)
4432 {
4433 if ((rs6000_isa_flags_explicit & OPTION_MASK_MMA) != 0)
4434 error ("%qs requires %qs", "-mmma", "-mvsx");
4435 rs6000_isa_flags &= ~OPTION_MASK_MMA;
4436 }
4437
4438 if (!TARGET_PCREL && TARGET_PCREL_OPT)
4439 rs6000_isa_flags &= ~OPTION_MASK_PCREL_OPT;
4440
4441 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
4442 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
4443
4444 rs6000_always_hint = (rs6000_tune != PROCESSOR_POWER4
4445 && rs6000_tune != PROCESSOR_POWER5
4446 && rs6000_tune != PROCESSOR_POWER6
4447 && rs6000_tune != PROCESSOR_POWER7
4448 && rs6000_tune != PROCESSOR_POWER8
4449 && rs6000_tune != PROCESSOR_POWER9
4450 && rs6000_tune != PROCESSOR_POWER10
4451 && rs6000_tune != PROCESSOR_PPCA2
4452 && rs6000_tune != PROCESSOR_CELL
4453 && rs6000_tune != PROCESSOR_PPC476);
4454 rs6000_sched_groups = (rs6000_tune == PROCESSOR_POWER4
4455 || rs6000_tune == PROCESSOR_POWER5
4456 || rs6000_tune == PROCESSOR_POWER7
4457 || rs6000_tune == PROCESSOR_POWER8);
4458 rs6000_align_branch_targets = (rs6000_tune == PROCESSOR_POWER4
4459 || rs6000_tune == PROCESSOR_POWER5
4460 || rs6000_tune == PROCESSOR_POWER6
4461 || rs6000_tune == PROCESSOR_POWER7
4462 || rs6000_tune == PROCESSOR_POWER8
4463 || rs6000_tune == PROCESSOR_POWER9
4464 || rs6000_tune == PROCESSOR_POWER10
4465 || rs6000_tune == PROCESSOR_PPCE500MC
4466 || rs6000_tune == PROCESSOR_PPCE500MC64
4467 || rs6000_tune == PROCESSOR_PPCE5500
4468 || rs6000_tune == PROCESSOR_PPCE6500);
4469
4470 /* Allow debug switches to override the above settings. These are set to -1
4471 in rs6000.opt to indicate the user hasn't directly set the switch. */
4472 if (TARGET_ALWAYS_HINT >= 0)
4473 rs6000_always_hint = TARGET_ALWAYS_HINT;
4474
4475 if (TARGET_SCHED_GROUPS >= 0)
4476 rs6000_sched_groups = TARGET_SCHED_GROUPS;
4477
4478 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
4479 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
4480
4481 rs6000_sched_restricted_insns_priority
4482 = (rs6000_sched_groups ? 1 : 0);
4483
4484 /* Handle -msched-costly-dep option. */
4485 rs6000_sched_costly_dep
4486 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
4487
4488 if (rs6000_sched_costly_dep_str)
4489 {
4490 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
4491 rs6000_sched_costly_dep = no_dep_costly;
4492 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
4493 rs6000_sched_costly_dep = all_deps_costly;
4494 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
4495 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
4496 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
4497 rs6000_sched_costly_dep = store_to_load_dep_costly;
4498 else
4499 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
4500 atoi (rs6000_sched_costly_dep_str));
4501 }
4502
4503 /* Handle -minsert-sched-nops option. */
4504 rs6000_sched_insert_nops
4505 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
4506
4507 if (rs6000_sched_insert_nops_str)
4508 {
4509 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
4510 rs6000_sched_insert_nops = sched_finish_none;
4511 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
4512 rs6000_sched_insert_nops = sched_finish_pad_groups;
4513 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
4514 rs6000_sched_insert_nops = sched_finish_regroup_exact;
4515 else
4516 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
4517 atoi (rs6000_sched_insert_nops_str));
4518 }
4519
4520 /* Handle stack protector */
4521 if (!OPTION_SET_P (rs6000_stack_protector_guard))
4522 #ifdef TARGET_THREAD_SSP_OFFSET
4523 rs6000_stack_protector_guard = SSP_TLS;
4524 #else
4525 rs6000_stack_protector_guard = SSP_GLOBAL;
4526 #endif
4527
4528 #ifdef TARGET_THREAD_SSP_OFFSET
4529 rs6000_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET;
4530 rs6000_stack_protector_guard_reg = TARGET_64BIT ? 13 : 2;
4531 #endif
4532
4533 if (OPTION_SET_P (rs6000_stack_protector_guard_offset_str))
4534 {
4535 char *endp;
4536 const char *str = rs6000_stack_protector_guard_offset_str;
4537
4538 errno = 0;
4539 long offset = strtol (str, &endp, 0);
4540 if (!*str || *endp || errno)
4541 error ("%qs is not a valid number in %qs", str,
4542 "-mstack-protector-guard-offset=");
4543
4544 if (!IN_RANGE (offset, -0x8000, 0x7fff)
4545 || (TARGET_64BIT && (offset & 3)))
4546 error ("%qs is not a valid offset in %qs", str,
4547 "-mstack-protector-guard-offset=");
4548
4549 rs6000_stack_protector_guard_offset = offset;
4550 }
4551
4552 if (OPTION_SET_P (rs6000_stack_protector_guard_reg_str))
4553 {
4554 const char *str = rs6000_stack_protector_guard_reg_str;
4555 int reg = decode_reg_name (str);
4556
4557 if (!IN_RANGE (reg, 1, 31))
4558 error ("%qs is not a valid base register in %qs", str,
4559 "-mstack-protector-guard-reg=");
4560
4561 rs6000_stack_protector_guard_reg = reg;
4562 }
4563
4564 if (rs6000_stack_protector_guard == SSP_TLS
4565 && !IN_RANGE (rs6000_stack_protector_guard_reg, 1, 31))
4566 error ("%qs needs a valid base register", "-mstack-protector-guard=tls");
4567
4568 if (global_init_p)
4569 {
4570 #ifdef TARGET_REGNAMES
4571 /* If the user desires alternate register names, copy in the
4572 alternate names now. */
4573 if (TARGET_REGNAMES)
4574 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
4575 #endif
4576
4577 /* Set aix_struct_return last, after the ABI is determined.
4578 If -maix-struct-return or -msvr4-struct-return was explicitly
4579 used, don't override with the ABI default. */
4580 if (!OPTION_SET_P (aix_struct_return))
4581 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
4582
4583 #if 0
4584 /* IBM XL compiler defaults to unsigned bitfields. */
4585 if (TARGET_XL_COMPAT)
4586 flag_signed_bitfields = 0;
4587 #endif
4588
4589 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
4590 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
4591
4592 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
4593
4594 /* We can only guarantee the availability of DI pseudo-ops when
4595 assembling for 64-bit targets. */
4596 if (!TARGET_64BIT)
4597 {
4598 targetm.asm_out.aligned_op.di = NULL;
4599 targetm.asm_out.unaligned_op.di = NULL;
4600 }
4601
4602
4603 /* Set branch target alignment, if not optimizing for size. */
4604 if (!optimize_size)
4605 {
4606 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
4607 aligned 8byte to avoid misprediction by the branch predictor. */
4608 if (rs6000_tune == PROCESSOR_TITAN
4609 || rs6000_tune == PROCESSOR_CELL)
4610 {
4611 if (flag_align_functions && !str_align_functions)
4612 str_align_functions = "8";
4613 if (flag_align_jumps && !str_align_jumps)
4614 str_align_jumps = "8";
4615 if (flag_align_loops && !str_align_loops)
4616 str_align_loops = "8";
4617 }
4618 if (rs6000_align_branch_targets)
4619 {
4620 if (flag_align_functions && !str_align_functions)
4621 str_align_functions = "16";
4622 if (flag_align_jumps && !str_align_jumps)
4623 str_align_jumps = "16";
4624 if (flag_align_loops && !str_align_loops)
4625 {
4626 can_override_loop_align = 1;
4627 str_align_loops = "16";
4628 }
4629 }
4630 }
4631
4632 /* Arrange to save and restore machine status around nested functions. */
4633 init_machine_status = rs6000_init_machine_status;
4634
4635 /* We should always be splitting complex arguments, but we can't break
4636 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
4637 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
4638 targetm.calls.split_complex_arg = NULL;
4639
4640 /* The AIX and ELFv1 ABIs define standard function descriptors. */
4641 if (DEFAULT_ABI == ABI_AIX)
4642 targetm.calls.custom_function_descriptors = 0;
4643 }
4644
4645 /* Initialize rs6000_cost with the appropriate target costs. */
4646 if (optimize_size)
4647 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
4648 else
4649 switch (rs6000_tune)
4650 {
4651 case PROCESSOR_RS64A:
4652 rs6000_cost = &rs64a_cost;
4653 break;
4654
4655 case PROCESSOR_MPCCORE:
4656 rs6000_cost = &mpccore_cost;
4657 break;
4658
4659 case PROCESSOR_PPC403:
4660 rs6000_cost = &ppc403_cost;
4661 break;
4662
4663 case PROCESSOR_PPC405:
4664 rs6000_cost = &ppc405_cost;
4665 break;
4666
4667 case PROCESSOR_PPC440:
4668 rs6000_cost = &ppc440_cost;
4669 break;
4670
4671 case PROCESSOR_PPC476:
4672 rs6000_cost = &ppc476_cost;
4673 break;
4674
4675 case PROCESSOR_PPC601:
4676 rs6000_cost = &ppc601_cost;
4677 break;
4678
4679 case PROCESSOR_PPC603:
4680 rs6000_cost = &ppc603_cost;
4681 break;
4682
4683 case PROCESSOR_PPC604:
4684 rs6000_cost = &ppc604_cost;
4685 break;
4686
4687 case PROCESSOR_PPC604e:
4688 rs6000_cost = &ppc604e_cost;
4689 break;
4690
4691 case PROCESSOR_PPC620:
4692 rs6000_cost = &ppc620_cost;
4693 break;
4694
4695 case PROCESSOR_PPC630:
4696 rs6000_cost = &ppc630_cost;
4697 break;
4698
4699 case PROCESSOR_CELL:
4700 rs6000_cost = &ppccell_cost;
4701 break;
4702
4703 case PROCESSOR_PPC750:
4704 case PROCESSOR_PPC7400:
4705 rs6000_cost = &ppc750_cost;
4706 break;
4707
4708 case PROCESSOR_PPC7450:
4709 rs6000_cost = &ppc7450_cost;
4710 break;
4711
4712 case PROCESSOR_PPC8540:
4713 case PROCESSOR_PPC8548:
4714 rs6000_cost = &ppc8540_cost;
4715 break;
4716
4717 case PROCESSOR_PPCE300C2:
4718 case PROCESSOR_PPCE300C3:
4719 rs6000_cost = &ppce300c2c3_cost;
4720 break;
4721
4722 case PROCESSOR_PPCE500MC:
4723 rs6000_cost = &ppce500mc_cost;
4724 break;
4725
4726 case PROCESSOR_PPCE500MC64:
4727 rs6000_cost = &ppce500mc64_cost;
4728 break;
4729
4730 case PROCESSOR_PPCE5500:
4731 rs6000_cost = &ppce5500_cost;
4732 break;
4733
4734 case PROCESSOR_PPCE6500:
4735 rs6000_cost = &ppce6500_cost;
4736 break;
4737
4738 case PROCESSOR_TITAN:
4739 rs6000_cost = &titan_cost;
4740 break;
4741
4742 case PROCESSOR_POWER4:
4743 case PROCESSOR_POWER5:
4744 rs6000_cost = &power4_cost;
4745 break;
4746
4747 case PROCESSOR_POWER6:
4748 rs6000_cost = &power6_cost;
4749 break;
4750
4751 case PROCESSOR_POWER7:
4752 rs6000_cost = &power7_cost;
4753 break;
4754
4755 case PROCESSOR_POWER8:
4756 rs6000_cost = &power8_cost;
4757 break;
4758
4759 case PROCESSOR_POWER9:
4760 rs6000_cost = &power9_cost;
4761 break;
4762
4763 case PROCESSOR_POWER10:
4764 rs6000_cost = &power10_cost;
4765 break;
4766
4767 case PROCESSOR_PPCA2:
4768 rs6000_cost = &ppca2_cost;
4769 break;
4770
4771 default:
4772 gcc_unreachable ();
4773 }
4774
4775 if (global_init_p)
4776 {
4777 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4778 param_simultaneous_prefetches,
4779 rs6000_cost->simultaneous_prefetches);
4780 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4781 param_l1_cache_size,
4782 rs6000_cost->l1_cache_size);
4783 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4784 param_l1_cache_line_size,
4785 rs6000_cost->cache_line_size);
4786 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4787 param_l2_cache_size,
4788 rs6000_cost->l2_cache_size);
4789
4790 /* Increase loop peeling limits based on performance analysis. */
4791 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4792 param_max_peeled_insns, 400);
4793 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4794 param_max_completely_peeled_insns, 400);
4795
4796 /* The lxvl/stxvl instructions don't perform well before Power10. */
4797 if (TARGET_POWER10)
4798 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4799 param_vect_partial_vector_usage, 1);
4800 else
4801 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4802 param_vect_partial_vector_usage, 0);
4803
4804 /* Use the 'model' -fsched-pressure algorithm by default. */
4805 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
4806 param_sched_pressure_algorithm,
4807 SCHED_PRESSURE_MODEL);
4808
4809 /* If using typedef char *va_list, signal that
4810 __builtin_va_start (&ap, 0) can be optimized to
4811 ap = __builtin_next_arg (0). */
4812 if (DEFAULT_ABI != ABI_V4)
4813 targetm.expand_builtin_va_start = NULL;
4814 }
4815
4816 rs6000_override_options_after_change ();
4817
4818 /* If not explicitly specified via option, decide whether to generate indexed
4819 load/store instructions. A value of -1 indicates that the
4820 initial value of this variable has not been overwritten. During
4821 compilation, TARGET_AVOID_XFORM is either 0 or 1. */
4822 if (TARGET_AVOID_XFORM == -1)
4823 /* Avoid indexed addressing when targeting Power6 in order to avoid the
4824 DERAT mispredict penalty. However the LVE and STVE altivec instructions
4825 need indexed accesses and the type used is the scalar type of the element
4826 being loaded or stored. */
4827 TARGET_AVOID_XFORM = (rs6000_tune == PROCESSOR_POWER6 && TARGET_CMPB
4828 && !TARGET_ALTIVEC);
4829
4830 /* Set the -mrecip options. */
4831 if (rs6000_recip_name)
4832 {
4833 char *p = ASTRDUP (rs6000_recip_name);
4834 char *q;
4835 unsigned int mask, i;
4836 bool invert;
4837
4838 while ((q = strtok (p, ",")) != NULL)
4839 {
4840 p = NULL;
4841 if (*q == '!')
4842 {
4843 invert = true;
4844 q++;
4845 }
4846 else
4847 invert = false;
4848
4849 if (!strcmp (q, "default"))
4850 mask = ((TARGET_RECIP_PRECISION)
4851 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
4852 else
4853 {
4854 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4855 if (!strcmp (q, recip_options[i].string))
4856 {
4857 mask = recip_options[i].mask;
4858 break;
4859 }
4860
4861 if (i == ARRAY_SIZE (recip_options))
4862 {
4863 error ("unknown option for %<%s=%s%>", "-mrecip", q);
4864 invert = false;
4865 mask = 0;
4866 ret = false;
4867 }
4868 }
4869
4870 if (invert)
4871 rs6000_recip_control &= ~mask;
4872 else
4873 rs6000_recip_control |= mask;
4874 }
4875 }
4876
4877 /* Initialize all of the registers. */
4878 rs6000_init_hard_regno_mode_ok (global_init_p);
4879
4880 /* Save the initial options in case the user does function specific options */
4881 if (global_init_p)
4882 target_option_default_node = target_option_current_node
4883 = build_target_option_node (&global_options, &global_options_set);
4884
4885 /* If not explicitly specified via option, decide whether to generate the
4886 extra blr's required to preserve the link stack on some cpus (eg, 476). */
4887 if (TARGET_LINK_STACK == -1)
4888 SET_TARGET_LINK_STACK (rs6000_tune == PROCESSOR_PPC476 && flag_pic);
4889
4890 /* Deprecate use of -mno-speculate-indirect-jumps. */
4891 if (!rs6000_speculate_indirect_jumps)
4892 warning (0, "%qs is deprecated and not recommended in any circumstances",
4893 "-mno-speculate-indirect-jumps");
4894
4895 return ret;
4896 }
4897
4898 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
4899 define the target cpu type. */
4900
4901 static void
4902 rs6000_option_override (void)
4903 {
4904 (void) rs6000_option_override_internal (true);
4905 }
4906
4907 \f
4908 /* Implement LOOP_ALIGN. */
4909 align_flags
4910 rs6000_loop_align (rtx label)
4911 {
4912 basic_block bb;
4913 int ninsns;
4914
4915 /* Don't override loop alignment if -falign-loops was specified. */
4916 if (!can_override_loop_align)
4917 return align_loops;
4918
4919 bb = BLOCK_FOR_INSN (label);
4920 ninsns = num_loop_insns(bb->loop_father);
4921
4922 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
4923 if (ninsns > 4 && ninsns <= 8
4924 && (rs6000_tune == PROCESSOR_POWER4
4925 || rs6000_tune == PROCESSOR_POWER5
4926 || rs6000_tune == PROCESSOR_POWER6
4927 || rs6000_tune == PROCESSOR_POWER7
4928 || rs6000_tune == PROCESSOR_POWER8))
4929 return align_flags (5);
4930 else
4931 return align_loops;
4932 }
4933
4934 /* Return true iff, data reference of TYPE can reach vector alignment (16)
4935 after applying N number of iterations. This routine does not determine
4936 how may iterations are required to reach desired alignment. */
4937
4938 static bool
4939 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
4940 {
4941 if (is_packed)
4942 return false;
4943
4944 if (TARGET_32BIT)
4945 {
4946 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
4947 return true;
4948
4949 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
4950 return true;
4951
4952 return false;
4953 }
4954 else
4955 {
4956 if (TARGET_MACHO)
4957 return false;
4958
4959 /* Assuming that all other types are naturally aligned. CHECKME! */
4960 return true;
4961 }
4962 }
4963
4964 /* Return true if the vector misalignment factor is supported by the
4965 target. */
4966 static bool
4967 rs6000_builtin_support_vector_misalignment (machine_mode mode,
4968 const_tree type,
4969 int misalignment,
4970 bool is_packed)
4971 {
4972 if (TARGET_VSX)
4973 {
4974 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4975 return true;
4976
4977 /* Return if movmisalign pattern is not supported for this mode. */
4978 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
4979 return false;
4980
4981 if (misalignment == -1)
4982 {
4983 /* Misalignment factor is unknown at compile time but we know
4984 it's word aligned. */
4985 if (rs6000_vector_alignment_reachable (type, is_packed))
4986 {
4987 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
4988
4989 if (element_size == 64 || element_size == 32)
4990 return true;
4991 }
4992
4993 return false;
4994 }
4995
4996 /* VSX supports word-aligned vector. */
4997 if (misalignment % 4 == 0)
4998 return true;
4999 }
5000 return false;
5001 }
5002
5003 /* Implement targetm.vectorize.builtin_vectorization_cost. */
5004 static int
5005 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
5006 tree vectype, int misalign)
5007 {
5008 unsigned elements;
5009 tree elem_type;
5010
5011 switch (type_of_cost)
5012 {
5013 case scalar_stmt:
5014 case scalar_store:
5015 case vector_stmt:
5016 case vector_store:
5017 case vec_to_scalar:
5018 case scalar_to_vec:
5019 case cond_branch_not_taken:
5020 return 1;
5021 case scalar_load:
5022 case vector_load:
5023 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5024 return 2;
5025
5026 case vec_perm:
5027 /* Power7 has only one permute unit, make it a bit expensive. */
5028 if (TARGET_VSX && rs6000_tune == PROCESSOR_POWER7)
5029 return 3;
5030 else
5031 return 1;
5032
5033 case vec_promote_demote:
5034 /* Power7 has only one permute/pack unit, make it a bit expensive. */
5035 if (TARGET_VSX && rs6000_tune == PROCESSOR_POWER7)
5036 return 4;
5037 else
5038 return 1;
5039
5040 case cond_branch_taken:
5041 return 3;
5042
5043 case unaligned_load:
5044 case vector_gather_load:
5045 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5046 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5047 return 2;
5048
5049 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5050 {
5051 elements = TYPE_VECTOR_SUBPARTS (vectype);
5052 /* See PR102767, consider V1TI to keep consistency. */
5053 if (elements == 2 || elements == 1)
5054 /* Double word aligned. */
5055 return 4;
5056
5057 if (elements == 4)
5058 {
5059 switch (misalign)
5060 {
5061 case 8:
5062 /* Double word aligned. */
5063 return 4;
5064
5065 case -1:
5066 /* Unknown misalignment. */
5067 case 4:
5068 case 12:
5069 /* Word aligned. */
5070 return 33;
5071
5072 default:
5073 gcc_unreachable ();
5074 }
5075 }
5076 }
5077
5078 if (TARGET_ALTIVEC)
5079 /* Misaligned loads are not supported. */
5080 gcc_unreachable ();
5081
5082 /* Like rs6000_insn_cost, make load insns cost a bit more. */
5083 return 4;
5084
5085 case unaligned_store:
5086 case vector_scatter_store:
5087 if (TARGET_EFFICIENT_UNALIGNED_VSX)
5088 return 1;
5089
5090 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
5091 {
5092 elements = TYPE_VECTOR_SUBPARTS (vectype);
5093 /* See PR102767, consider V1TI to keep consistency. */
5094 if (elements == 2 || elements == 1)
5095 /* Double word aligned. */
5096 return 2;
5097
5098 if (elements == 4)
5099 {
5100 switch (misalign)
5101 {
5102 case 8:
5103 /* Double word aligned. */
5104 return 2;
5105
5106 case -1:
5107 /* Unknown misalignment. */
5108 case 4:
5109 case 12:
5110 /* Word aligned. */
5111 return 23;
5112
5113 default:
5114 gcc_unreachable ();
5115 }
5116 }
5117 }
5118
5119 if (TARGET_ALTIVEC)
5120 /* Misaligned stores are not supported. */
5121 gcc_unreachable ();
5122
5123 return 2;
5124
5125 case vec_construct:
5126 /* This is a rough approximation assuming non-constant elements
5127 constructed into a vector via element insertion. FIXME:
5128 vec_construct is not granular enough for uniformly good
5129 decisions. If the initialization is a splat, this is
5130 cheaper than we estimate. Improve this someday. */
5131 elem_type = TREE_TYPE (vectype);
5132 /* 32-bit vectors loaded into registers are stored as double
5133 precision, so we need 2 permutes, 2 converts, and 1 merge
5134 to construct a vector of short floats from them. */
5135 if (SCALAR_FLOAT_TYPE_P (elem_type)
5136 && TYPE_PRECISION (elem_type) == 32)
5137 return 5;
5138 /* On POWER9, integer vector types are built up in GPRs and then
5139 use a direct move (2 cycles). For POWER8 this is even worse,
5140 as we need two direct moves and a merge, and the direct moves
5141 are five cycles. */
5142 else if (INTEGRAL_TYPE_P (elem_type))
5143 {
5144 if (TARGET_P9_VECTOR)
5145 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 2;
5146 else
5147 return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 5;
5148 }
5149 else
5150 /* V2DFmode doesn't need a direct move. */
5151 return 2;
5152
5153 default:
5154 gcc_unreachable ();
5155 }
5156 }
5157
5158 /* Implement targetm.vectorize.preferred_simd_mode. */
5159
5160 static machine_mode
5161 rs6000_preferred_simd_mode (scalar_mode mode)
5162 {
5163 opt_machine_mode vmode = mode_for_vector (mode, 16 / GET_MODE_SIZE (mode));
5164
5165 if (vmode.exists () && !VECTOR_MEM_NONE_P (vmode.require ()))
5166 return vmode.require ();
5167
5168 return word_mode;
5169 }
5170
5171 class rs6000_cost_data : public vector_costs
5172 {
5173 public:
5174 using vector_costs::vector_costs;
5175
5176 unsigned int add_stmt_cost (int count, vect_cost_for_stmt kind,
5177 stmt_vec_info stmt_info, slp_tree, tree vectype,
5178 int misalign,
5179 vect_cost_model_location where) override;
5180 void finish_cost (const vector_costs *) override;
5181
5182 protected:
5183 void update_target_cost_per_stmt (vect_cost_for_stmt, stmt_vec_info,
5184 vect_cost_model_location, unsigned int);
5185 void density_test (loop_vec_info);
5186 void adjust_vect_cost_per_loop (loop_vec_info);
5187 unsigned int determine_suggested_unroll_factor (loop_vec_info);
5188
5189 /* Total number of vectorized stmts (loop only). */
5190 unsigned m_nstmts = 0;
5191 /* Total number of loads (loop only). */
5192 unsigned m_nloads = 0;
5193 /* Total number of stores (loop only). */
5194 unsigned m_nstores = 0;
5195 /* Reduction factor for suggesting unroll factor (loop only). */
5196 unsigned m_reduc_factor = 0;
5197 /* Possible extra penalized cost on vector construction (loop only). */
5198 unsigned m_extra_ctor_cost = 0;
5199 /* For each vectorized loop, this var holds TRUE iff a non-memory vector
5200 instruction is needed by the vectorization. */
5201 bool m_vect_nonmem = false;
5202 /* If this loop gets vectorized with emulated gather load. */
5203 bool m_gather_load = false;
5204 };
5205
5206 /* Test for likely overcommitment of vector hardware resources. If a
5207 loop iteration is relatively large, and too large a percentage of
5208 instructions in the loop are vectorized, the cost model may not
5209 adequately reflect delays from unavailable vector resources.
5210 Penalize the loop body cost for this case. */
5211
5212 void
5213 rs6000_cost_data::density_test (loop_vec_info loop_vinfo)
5214 {
5215 /* This density test only cares about the cost of vector version of the
5216 loop, so immediately return if we are passed costing for the scalar
5217 version (namely computing single scalar iteration cost). */
5218 if (m_costing_for_scalar)
5219 return;
5220
5221 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5222 basic_block *bbs = get_loop_body (loop);
5223 int nbbs = loop->num_nodes;
5224 int vec_cost = m_costs[vect_body], not_vec_cost = 0;
5225
5226 for (int i = 0; i < nbbs; i++)
5227 {
5228 basic_block bb = bbs[i];
5229 gimple_stmt_iterator gsi;
5230
5231 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5232 {
5233 gimple *stmt = gsi_stmt (gsi);
5234 if (is_gimple_debug (stmt))
5235 continue;
5236
5237 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
5238
5239 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5240 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
5241 not_vec_cost++;
5242 }
5243 }
5244
5245 free (bbs);
5246 int density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
5247
5248 if (density_pct > rs6000_density_pct_threshold
5249 && vec_cost + not_vec_cost > rs6000_density_size_threshold)
5250 {
5251 m_costs[vect_body] = vec_cost * (100 + rs6000_density_penalty) / 100;
5252 if (dump_enabled_p ())
5253 dump_printf_loc (MSG_NOTE, vect_location,
5254 "density %d%%, cost %d exceeds threshold, penalizing "
5255 "loop body cost by %u%%\n", density_pct,
5256 vec_cost + not_vec_cost, rs6000_density_penalty);
5257 }
5258
5259 /* Check whether we need to penalize the body cost to account
5260 for excess strided or elementwise loads. */
5261 if (m_extra_ctor_cost > 0)
5262 {
5263 gcc_assert (m_nloads <= m_nstmts);
5264 unsigned int load_pct = (m_nloads * 100) / m_nstmts;
5265
5266 /* It's likely to be bounded by latency and execution resources
5267 from many scalar loads which are strided or elementwise loads
5268 into a vector if both conditions below are found:
5269 1. there are many loads, it's easy to result in a long wait
5270 for load units;
5271 2. load has a big proportion of all vectorized statements,
5272 it's not easy to schedule other statements to spread among
5273 the loads.
5274 One typical case is the innermost loop of the hotspot of SPEC2017
5275 503.bwaves_r without loop interchange. */
5276 if (m_nloads > (unsigned int) rs6000_density_load_num_threshold
5277 && load_pct > (unsigned int) rs6000_density_load_pct_threshold)
5278 {
5279 m_costs[vect_body] += m_extra_ctor_cost;
5280 if (dump_enabled_p ())
5281 dump_printf_loc (MSG_NOTE, vect_location,
5282 "Found %u loads and "
5283 "load pct. %u%% exceed "
5284 "the threshold, "
5285 "penalizing loop body "
5286 "cost by extra cost %u "
5287 "for ctor.\n",
5288 m_nloads, load_pct,
5289 m_extra_ctor_cost);
5290 }
5291 }
5292 }
5293
5294 /* Implement targetm.vectorize.create_costs. */
5295
5296 static vector_costs *
5297 rs6000_vectorize_create_costs (vec_info *vinfo, bool costing_for_scalar)
5298 {
5299 return new rs6000_cost_data (vinfo, costing_for_scalar);
5300 }
5301
5302 /* Adjust vectorization cost after calling rs6000_builtin_vectorization_cost.
5303 For some statement, we would like to further fine-grain tweak the cost on
5304 top of rs6000_builtin_vectorization_cost handling which doesn't have any
5305 information on statement operation codes etc. One typical case here is
5306 COND_EXPR, it takes the same cost to simple FXU instruction when evaluating
5307 for scalar cost, but it should be priced more whatever transformed to either
5308 compare + branch or compare + isel instructions. */
5309
5310 static unsigned
5311 rs6000_adjust_vect_cost_per_stmt (enum vect_cost_for_stmt kind,
5312 struct _stmt_vec_info *stmt_info)
5313 {
5314 if (kind == scalar_stmt && stmt_info && stmt_info->stmt
5315 && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN)
5316 {
5317 tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt);
5318 if (subcode == COND_EXPR)
5319 return 2;
5320 }
5321
5322 return 0;
5323 }
5324
5325 /* Helper function for add_stmt_cost. Check each statement cost
5326 entry, gather information and update the target_cost fields
5327 accordingly. */
5328 void
5329 rs6000_cost_data::update_target_cost_per_stmt (vect_cost_for_stmt kind,
5330 stmt_vec_info stmt_info,
5331 vect_cost_model_location where,
5332 unsigned int orig_count)
5333 {
5334
5335 /* Check whether we're doing something other than just a copy loop.
5336 Not all such loops may be profitably vectorized; see
5337 rs6000_finish_cost. */
5338 if (kind == vec_to_scalar
5339 || kind == vec_perm
5340 || kind == vec_promote_demote
5341 || kind == vec_construct
5342 || kind == scalar_to_vec
5343 || (where == vect_body && kind == vector_stmt))
5344 m_vect_nonmem = true;
5345
5346 /* Gather some information when we are costing the vectorized instruction
5347 for the statements located in a loop body. */
5348 if (!m_costing_for_scalar
5349 && is_a<loop_vec_info> (m_vinfo)
5350 && where == vect_body)
5351 {
5352 m_nstmts += orig_count;
5353
5354 if (kind == scalar_load
5355 || kind == vector_load
5356 || kind == unaligned_load
5357 || kind == vector_gather_load)
5358 {
5359 m_nloads += orig_count;
5360 if (stmt_info && STMT_VINFO_GATHER_SCATTER_P (stmt_info))
5361 m_gather_load = true;
5362 }
5363 else if (kind == scalar_store
5364 || kind == vector_store
5365 || kind == unaligned_store
5366 || kind == vector_scatter_store)
5367 m_nstores += orig_count;
5368 else if ((kind == scalar_stmt
5369 || kind == vector_stmt
5370 || kind == vec_to_scalar)
5371 && stmt_info
5372 && vect_is_reduction (stmt_info))
5373 {
5374 /* Loop body contains normal int or fp operations and epilogue
5375 contains vector reduction. For simplicity, we assume int
5376 operation takes one cycle and fp operation takes one more. */
5377 tree lhs = gimple_get_lhs (stmt_info->stmt);
5378 bool is_float = FLOAT_TYPE_P (TREE_TYPE (lhs));
5379 unsigned int basic_cost = is_float ? 2 : 1;
5380 m_reduc_factor = MAX (basic_cost * orig_count, m_reduc_factor);
5381 }
5382
5383 /* Power processors do not currently have instructions for strided
5384 and elementwise loads, and instead we must generate multiple
5385 scalar loads. This leads to undercounting of the cost. We
5386 account for this by scaling the construction cost by the number
5387 of elements involved, and saving this as extra cost that we may
5388 or may not need to apply. When finalizing the cost of the loop,
5389 the extra penalty is applied when the load density heuristics
5390 are satisfied. */
5391 if (kind == vec_construct && stmt_info
5392 && STMT_VINFO_TYPE (stmt_info) == load_vec_info_type
5393 && (STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_ELEMENTWISE
5394 || STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_STRIDED_SLP))
5395 {
5396 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5397 unsigned int nunits = vect_nunits_for_cost (vectype);
5398 /* As PR103702 shows, it's possible that vectorizer wants to do
5399 costings for only one unit here, it's no need to do any
5400 penalization for it, so simply early return here. */
5401 if (nunits == 1)
5402 return;
5403 /* i386 port adopts nunits * stmt_cost as the penalized cost
5404 for this kind of penalization, we used to follow it but
5405 found it could result in an unreliable body cost especially
5406 for V16QI/V8HI modes. To make it better, we choose this
5407 new heuristic: for each scalar load, we use 2 as penalized
5408 cost for the case with 2 nunits and use 1 for the other
5409 cases. It's without much supporting theory, mainly
5410 concluded from the broad performance evaluations on Power8,
5411 Power9 and Power10. One possibly related point is that:
5412 vector construction for more units would use more insns,
5413 it has more chances to schedule them better (even run in
5414 parallelly when enough available units at that time), so
5415 it seems reasonable not to penalize that much for them. */
5416 unsigned int adjusted_cost = (nunits == 2) ? 2 : 1;
5417 unsigned int extra_cost = nunits * adjusted_cost;
5418 m_extra_ctor_cost += extra_cost;
5419 }
5420 }
5421 }
5422
5423 unsigned
5424 rs6000_cost_data::add_stmt_cost (int count, vect_cost_for_stmt kind,
5425 stmt_vec_info stmt_info, slp_tree,
5426 tree vectype, int misalign,
5427 vect_cost_model_location where)
5428 {
5429 unsigned retval = 0;
5430
5431 if (flag_vect_cost_model)
5432 {
5433 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
5434 misalign);
5435 stmt_cost += rs6000_adjust_vect_cost_per_stmt (kind, stmt_info);
5436 /* Statements in an inner loop relative to the loop being
5437 vectorized are weighted more heavily. The value here is
5438 arbitrary and could potentially be improved with analysis. */
5439 unsigned int orig_count = count;
5440 retval = adjust_cost_for_freq (stmt_info, where, count * stmt_cost);
5441 m_costs[where] += retval;
5442
5443 update_target_cost_per_stmt (kind, stmt_info, where, orig_count);
5444 }
5445
5446 return retval;
5447 }
5448
5449 /* For some target specific vectorization cost which can't be handled per stmt,
5450 we check the requisite conditions and adjust the vectorization cost
5451 accordingly if satisfied. One typical example is to model shift cost for
5452 vector with length by counting number of required lengths under condition
5453 LOOP_VINFO_FULLY_WITH_LENGTH_P. */
5454
5455 void
5456 rs6000_cost_data::adjust_vect_cost_per_loop (loop_vec_info loop_vinfo)
5457 {
5458 if (LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
5459 {
5460 rgroup_controls *rgc;
5461 unsigned int num_vectors_m1;
5462 unsigned int shift_cnt = 0;
5463 FOR_EACH_VEC_ELT (LOOP_VINFO_LENS (loop_vinfo), num_vectors_m1, rgc)
5464 if (rgc->type)
5465 /* Each length needs one shift to fill into bits 0-7. */
5466 shift_cnt += num_vectors_m1 + 1;
5467
5468 add_stmt_cost (shift_cnt, scalar_stmt, NULL, NULL,
5469 NULL_TREE, 0, vect_body);
5470 }
5471 }
5472
5473 /* Determine suggested unroll factor by considering some below factors:
5474
5475 - unroll option/pragma which can disable unrolling for this loop;
5476 - simple hardware resource model for non memory vector insns;
5477 - aggressive heuristics when iteration count is unknown:
5478 - reduction case to break cross iteration dependency;
5479 - emulated gather load;
5480 - estimated iteration count when iteration count is unknown;
5481 */
5482
5483
5484 unsigned int
5485 rs6000_cost_data::determine_suggested_unroll_factor (loop_vec_info loop_vinfo)
5486 {
5487 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5488
5489 /* Don't unroll if it's specified explicitly not to be unrolled. */
5490 if (loop->unroll == 1
5491 || (OPTION_SET_P (flag_unroll_loops) && !flag_unroll_loops)
5492 || (OPTION_SET_P (flag_unroll_all_loops) && !flag_unroll_all_loops))
5493 return 1;
5494
5495 unsigned int nstmts_nonldst = m_nstmts - m_nloads - m_nstores;
5496 /* Don't unroll if no vector instructions excepting for memory access. */
5497 if (nstmts_nonldst == 0)
5498 return 1;
5499
5500 /* Consider breaking cross iteration dependency for reduction. */
5501 unsigned int reduc_factor = m_reduc_factor > 1 ? m_reduc_factor : 1;
5502
5503 /* Use this simple hardware resource model that how many non ld/st
5504 vector instructions can be issued per cycle. */
5505 unsigned int issue_width = rs6000_vect_unroll_issue;
5506 unsigned int uf = CEIL (reduc_factor * issue_width, nstmts_nonldst);
5507 uf = MIN ((unsigned int) rs6000_vect_unroll_limit, uf);
5508 /* Make sure it is power of 2. */
5509 uf = 1 << ceil_log2 (uf);
5510
5511 /* If the iteration count is known, the costing would be exact enough,
5512 don't worry it could be worse. */
5513 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
5514 return uf;
5515
5516 /* Inspired by SPEC2017 parest_r, we want to aggressively unroll the
5517 loop if either condition is satisfied:
5518 - reduction factor exceeds the threshold;
5519 - emulated gather load adopted. */
5520 if (reduc_factor > (unsigned int) rs6000_vect_unroll_reduc_threshold
5521 || m_gather_load)
5522 return uf;
5523
5524 /* Check if we can conclude it's good to unroll from the estimated
5525 iteration count. */
5526 HOST_WIDE_INT est_niter = get_estimated_loop_iterations_int (loop);
5527 unsigned int vf = vect_vf_for_cost (loop_vinfo);
5528 unsigned int unrolled_vf = vf * uf;
5529 if (est_niter == -1 || est_niter < unrolled_vf)
5530 /* When the estimated iteration of this loop is unknown, it's possible
5531 that we are able to vectorize this loop with the original VF but fail
5532 to vectorize it with the unrolled VF any more if the actual iteration
5533 count is in between. */
5534 return 1;
5535 else
5536 {
5537 unsigned int epil_niter_unr = est_niter % unrolled_vf;
5538 unsigned int epil_niter = est_niter % vf;
5539 /* Even if we have partial vector support, it can be still inefficent
5540 to calculate the length when the iteration count is unknown, so
5541 only expect it's good to unroll when the epilogue iteration count
5542 is not bigger than VF (only one time length calculation). */
5543 if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
5544 && epil_niter_unr <= vf)
5545 return uf;
5546 /* Without partial vector support, conservatively unroll this when
5547 the epilogue iteration count is less than the original one
5548 (epilogue execution time wouldn't be longer than before). */
5549 else if (!LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
5550 && epil_niter_unr <= epil_niter)
5551 return uf;
5552 }
5553
5554 return 1;
5555 }
5556
5557 void
5558 rs6000_cost_data::finish_cost (const vector_costs *scalar_costs)
5559 {
5560 if (loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (m_vinfo))
5561 {
5562 adjust_vect_cost_per_loop (loop_vinfo);
5563 density_test (loop_vinfo);
5564
5565 /* Don't vectorize minimum-vectorization-factor, simple copy loops
5566 that require versioning for any reason. The vectorization is at
5567 best a wash inside the loop, and the versioning checks make
5568 profitability highly unlikely and potentially quite harmful. */
5569 if (!m_vect_nonmem
5570 && LOOP_VINFO_VECT_FACTOR (loop_vinfo) == 2
5571 && LOOP_REQUIRES_VERSIONING (loop_vinfo))
5572 m_costs[vect_body] += 10000;
5573
5574 m_suggested_unroll_factor
5575 = determine_suggested_unroll_factor (loop_vinfo);
5576 }
5577
5578 vector_costs::finish_cost (scalar_costs);
5579 }
5580
5581 /* Implement targetm.loop_unroll_adjust. */
5582
5583 static unsigned
5584 rs6000_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
5585 {
5586 if (unroll_only_small_loops)
5587 {
5588 /* TODO: These are hardcoded values right now. We probably should use
5589 a PARAM here. */
5590 if (loop->ninsns <= 6)
5591 return MIN (4, nunroll);
5592 if (loop->ninsns <= 10)
5593 return MIN (2, nunroll);
5594
5595 return 0;
5596 }
5597
5598 return nunroll;
5599 }
5600
5601 /* Returns a function decl for a vectorized version of the builtin function
5602 with builtin function code FN and the result vector type TYPE, or NULL_TREE
5603 if it is not available.
5604
5605 Implement targetm.vectorize.builtin_vectorized_function. */
5606
5607 static tree
5608 rs6000_builtin_vectorized_function (unsigned int fn, tree type_out,
5609 tree type_in)
5610 {
5611 machine_mode in_mode, out_mode;
5612 int in_n, out_n;
5613
5614 if (TARGET_DEBUG_BUILTIN)
5615 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
5616 combined_fn_name (combined_fn (fn)),
5617 GET_MODE_NAME (TYPE_MODE (type_out)),
5618 GET_MODE_NAME (TYPE_MODE (type_in)));
5619
5620 /* TODO: Should this be gcc_assert? */
5621 if (TREE_CODE (type_out) != VECTOR_TYPE
5622 || TREE_CODE (type_in) != VECTOR_TYPE)
5623 return NULL_TREE;
5624
5625 out_mode = TYPE_MODE (TREE_TYPE (type_out));
5626 out_n = TYPE_VECTOR_SUBPARTS (type_out);
5627 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5628 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5629
5630 switch (fn)
5631 {
5632 CASE_CFN_COPYSIGN:
5633 if (VECTOR_UNIT_VSX_P (V2DFmode)
5634 && out_mode == DFmode && out_n == 2
5635 && in_mode == DFmode && in_n == 2)
5636 return rs6000_builtin_decls[RS6000_BIF_CPSGNDP];
5637 if (VECTOR_UNIT_VSX_P (V4SFmode)
5638 && out_mode == SFmode && out_n == 4
5639 && in_mode == SFmode && in_n == 4)
5640 return rs6000_builtin_decls[RS6000_BIF_CPSGNSP];
5641 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5642 && out_mode == SFmode && out_n == 4
5643 && in_mode == SFmode && in_n == 4)
5644 return rs6000_builtin_decls[RS6000_BIF_COPYSIGN_V4SF];
5645 break;
5646 CASE_CFN_CEIL:
5647 if (VECTOR_UNIT_VSX_P (V2DFmode)
5648 && out_mode == DFmode && out_n == 2
5649 && in_mode == DFmode && in_n == 2)
5650 return rs6000_builtin_decls[RS6000_BIF_XVRDPIP];
5651 if (VECTOR_UNIT_VSX_P (V4SFmode)
5652 && out_mode == SFmode && out_n == 4
5653 && in_mode == SFmode && in_n == 4)
5654 return rs6000_builtin_decls[RS6000_BIF_XVRSPIP];
5655 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5656 && out_mode == SFmode && out_n == 4
5657 && in_mode == SFmode && in_n == 4)
5658 return rs6000_builtin_decls[RS6000_BIF_VRFIP];
5659 break;
5660 CASE_CFN_FLOOR:
5661 if (VECTOR_UNIT_VSX_P (V2DFmode)
5662 && out_mode == DFmode && out_n == 2
5663 && in_mode == DFmode && in_n == 2)
5664 return rs6000_builtin_decls[RS6000_BIF_XVRDPIM];
5665 if (VECTOR_UNIT_VSX_P (V4SFmode)
5666 && out_mode == SFmode && out_n == 4
5667 && in_mode == SFmode && in_n == 4)
5668 return rs6000_builtin_decls[RS6000_BIF_XVRSPIM];
5669 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5670 && out_mode == SFmode && out_n == 4
5671 && in_mode == SFmode && in_n == 4)
5672 return rs6000_builtin_decls[RS6000_BIF_VRFIM];
5673 break;
5674 CASE_CFN_FMA:
5675 if (VECTOR_UNIT_VSX_P (V2DFmode)
5676 && out_mode == DFmode && out_n == 2
5677 && in_mode == DFmode && in_n == 2)
5678 return rs6000_builtin_decls[RS6000_BIF_XVMADDDP];
5679 if (VECTOR_UNIT_VSX_P (V4SFmode)
5680 && out_mode == SFmode && out_n == 4
5681 && in_mode == SFmode && in_n == 4)
5682 return rs6000_builtin_decls[RS6000_BIF_XVMADDSP];
5683 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5684 && out_mode == SFmode && out_n == 4
5685 && in_mode == SFmode && in_n == 4)
5686 return rs6000_builtin_decls[RS6000_BIF_VMADDFP];
5687 break;
5688 CASE_CFN_TRUNC:
5689 if (VECTOR_UNIT_VSX_P (V2DFmode)
5690 && out_mode == DFmode && out_n == 2
5691 && in_mode == DFmode && in_n == 2)
5692 return rs6000_builtin_decls[RS6000_BIF_XVRDPIZ];
5693 if (VECTOR_UNIT_VSX_P (V4SFmode)
5694 && out_mode == SFmode && out_n == 4
5695 && in_mode == SFmode && in_n == 4)
5696 return rs6000_builtin_decls[RS6000_BIF_XVRSPIZ];
5697 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5698 && out_mode == SFmode && out_n == 4
5699 && in_mode == SFmode && in_n == 4)
5700 return rs6000_builtin_decls[RS6000_BIF_VRFIZ];
5701 break;
5702 CASE_CFN_NEARBYINT:
5703 if (VECTOR_UNIT_VSX_P (V2DFmode)
5704 && flag_unsafe_math_optimizations
5705 && out_mode == DFmode && out_n == 2
5706 && in_mode == DFmode && in_n == 2)
5707 return rs6000_builtin_decls[RS6000_BIF_XVRDPI];
5708 if (VECTOR_UNIT_VSX_P (V4SFmode)
5709 && flag_unsafe_math_optimizations
5710 && out_mode == SFmode && out_n == 4
5711 && in_mode == SFmode && in_n == 4)
5712 return rs6000_builtin_decls[RS6000_BIF_XVRSPI];
5713 break;
5714 CASE_CFN_RINT:
5715 if (VECTOR_UNIT_VSX_P (V2DFmode)
5716 && !flag_trapping_math
5717 && out_mode == DFmode && out_n == 2
5718 && in_mode == DFmode && in_n == 2)
5719 return rs6000_builtin_decls[RS6000_BIF_XVRDPIC];
5720 if (VECTOR_UNIT_VSX_P (V4SFmode)
5721 && !flag_trapping_math
5722 && out_mode == SFmode && out_n == 4
5723 && in_mode == SFmode && in_n == 4)
5724 return rs6000_builtin_decls[RS6000_BIF_XVRSPIC];
5725 break;
5726 default:
5727 break;
5728 }
5729
5730 /* Generate calls to libmass if appropriate. */
5731 if (rs6000_veclib_handler)
5732 return rs6000_veclib_handler (combined_fn (fn), type_out, type_in);
5733
5734 return NULL_TREE;
5735 }
5736
5737 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
5738 library with vectorized intrinsics. */
5739
5740 static tree
5741 rs6000_builtin_vectorized_libmass (combined_fn fn, tree type_out,
5742 tree type_in)
5743 {
5744 char name[32];
5745 const char *suffix = NULL;
5746 tree fntype, new_fndecl, bdecl = NULL_TREE;
5747 int n_args = 1;
5748 const char *bname;
5749 machine_mode el_mode, in_mode;
5750 int n, in_n;
5751
5752 /* Libmass is suitable for unsafe math only as it does not correctly support
5753 parts of IEEE with the required precision such as denormals. Only support
5754 it if we have VSX to use the simd d2 or f4 functions.
5755 XXX: Add variable length support. */
5756 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
5757 return NULL_TREE;
5758
5759 el_mode = TYPE_MODE (TREE_TYPE (type_out));
5760 n = TYPE_VECTOR_SUBPARTS (type_out);
5761 in_mode = TYPE_MODE (TREE_TYPE (type_in));
5762 in_n = TYPE_VECTOR_SUBPARTS (type_in);
5763 if (el_mode != in_mode
5764 || n != in_n)
5765 return NULL_TREE;
5766
5767 switch (fn)
5768 {
5769 CASE_CFN_ATAN2:
5770 CASE_CFN_HYPOT:
5771 CASE_CFN_POW:
5772 n_args = 2;
5773 gcc_fallthrough ();
5774
5775 CASE_CFN_ACOS:
5776 CASE_CFN_ACOSH:
5777 CASE_CFN_ASIN:
5778 CASE_CFN_ASINH:
5779 CASE_CFN_ATAN:
5780 CASE_CFN_ATANH:
5781 CASE_CFN_CBRT:
5782 CASE_CFN_COS:
5783 CASE_CFN_COSH:
5784 CASE_CFN_ERF:
5785 CASE_CFN_ERFC:
5786 CASE_CFN_EXP2:
5787 CASE_CFN_EXP:
5788 CASE_CFN_EXPM1:
5789 CASE_CFN_LGAMMA:
5790 CASE_CFN_LOG10:
5791 CASE_CFN_LOG1P:
5792 CASE_CFN_LOG2:
5793 CASE_CFN_LOG:
5794 CASE_CFN_SIN:
5795 CASE_CFN_SINH:
5796 CASE_CFN_SQRT:
5797 CASE_CFN_TAN:
5798 CASE_CFN_TANH:
5799 if (el_mode == DFmode && n == 2)
5800 {
5801 bdecl = mathfn_built_in (double_type_node, fn);
5802 suffix = "d2"; /* pow -> powd2 */
5803 }
5804 else if (el_mode == SFmode && n == 4)
5805 {
5806 bdecl = mathfn_built_in (float_type_node, fn);
5807 suffix = "4"; /* powf -> powf4 */
5808 }
5809 else
5810 return NULL_TREE;
5811 if (!bdecl)
5812 return NULL_TREE;
5813 break;
5814
5815 default:
5816 return NULL_TREE;
5817 }
5818
5819 gcc_assert (suffix != NULL);
5820 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
5821 if (!bname)
5822 return NULL_TREE;
5823
5824 strcpy (name, bname + strlen ("__builtin_"));
5825 strcat (name, suffix);
5826
5827 if (n_args == 1)
5828 fntype = build_function_type_list (type_out, type_in, NULL);
5829 else if (n_args == 2)
5830 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
5831 else
5832 gcc_unreachable ();
5833
5834 /* Build a function declaration for the vectorized function. */
5835 new_fndecl = build_decl (BUILTINS_LOCATION,
5836 FUNCTION_DECL, get_identifier (name), fntype);
5837 TREE_PUBLIC (new_fndecl) = 1;
5838 DECL_EXTERNAL (new_fndecl) = 1;
5839 DECL_IS_NOVOPS (new_fndecl) = 1;
5840 TREE_READONLY (new_fndecl) = 1;
5841
5842 return new_fndecl;
5843 }
5844
5845 \f
5846 /* Default CPU string for rs6000*_file_start functions. */
5847 static const char *rs6000_default_cpu;
5848
5849 #ifdef USING_ELFOS_H
5850 const char *rs6000_machine;
5851
5852 const char *
5853 rs6000_machine_from_flags (void)
5854 {
5855 /* e300 and e500 */
5856 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3)
5857 return "e300";
5858 if (rs6000_cpu == PROCESSOR_PPC8540 || rs6000_cpu == PROCESSOR_PPC8548)
5859 return "e500";
5860 if (rs6000_cpu == PROCESSOR_PPCE500MC)
5861 return "e500mc";
5862 if (rs6000_cpu == PROCESSOR_PPCE500MC64)
5863 return "e500mc64";
5864 if (rs6000_cpu == PROCESSOR_PPCE5500)
5865 return "e5500";
5866 if (rs6000_cpu == PROCESSOR_PPCE6500)
5867 return "e6500";
5868
5869 /* 400 series */
5870 if (rs6000_cpu == PROCESSOR_PPC403)
5871 return "\"403\"";
5872 if (rs6000_cpu == PROCESSOR_PPC405)
5873 return "\"405\"";
5874 if (rs6000_cpu == PROCESSOR_PPC440)
5875 return "\"440\"";
5876 if (rs6000_cpu == PROCESSOR_PPC476)
5877 return "\"476\"";
5878
5879 /* A2 */
5880 if (rs6000_cpu == PROCESSOR_PPCA2)
5881 return "a2";
5882
5883 /* Cell BE */
5884 if (rs6000_cpu == PROCESSOR_CELL)
5885 return "cell";
5886
5887 /* Titan */
5888 if (rs6000_cpu == PROCESSOR_TITAN)
5889 return "titan";
5890
5891 /* 500 series and 800 series */
5892 if (rs6000_cpu == PROCESSOR_MPCCORE)
5893 return "\"821\"";
5894
5895 #if 0
5896 /* This (and ppc64 below) are disabled here (for now at least) because
5897 PROCESSOR_POWERPC, PROCESSOR_POWERPC64, and PROCESSOR_COMMON
5898 are #define'd as some of these. Untangling that is a job for later. */
5899
5900 /* 600 series and 700 series, "classic" */
5901 if (rs6000_cpu == PROCESSOR_PPC601 || rs6000_cpu == PROCESSOR_PPC603
5902 || rs6000_cpu == PROCESSOR_PPC604 || rs6000_cpu == PROCESSOR_PPC604e
5903 || rs6000_cpu == PROCESSOR_PPC750)
5904 return "ppc";
5905 #endif
5906
5907 /* Classic with AltiVec, "G4" */
5908 if (rs6000_cpu == PROCESSOR_PPC7400 || rs6000_cpu == PROCESSOR_PPC7450)
5909 return "\"7450\"";
5910
5911 #if 0
5912 /* The older 64-bit CPUs */
5913 if (rs6000_cpu == PROCESSOR_PPC620 || rs6000_cpu == PROCESSOR_PPC630
5914 || rs6000_cpu == PROCESSOR_RS64A)
5915 return "ppc64";
5916 #endif
5917
5918 HOST_WIDE_INT flags = rs6000_isa_flags;
5919
5920 /* Disable the flags that should never influence the .machine selection. */
5921 flags &= ~(OPTION_MASK_PPC_GFXOPT | OPTION_MASK_PPC_GPOPT | OPTION_MASK_ISEL);
5922
5923 if ((flags & (ISA_3_1_MASKS_SERVER & ~ISA_3_0_MASKS_SERVER)) != 0)
5924 return "power10";
5925 if ((flags & (ISA_3_0_MASKS_SERVER & ~ISA_2_7_MASKS_SERVER)) != 0)
5926 return "power9";
5927 if ((flags & (ISA_2_7_MASKS_SERVER & ~ISA_2_6_MASKS_SERVER)) != 0)
5928 return "power8";
5929 if ((flags & (ISA_2_6_MASKS_SERVER & ~ISA_2_5_MASKS_SERVER)) != 0)
5930 return "power7";
5931 if ((flags & (ISA_2_5_MASKS_SERVER & ~ISA_2_4_MASKS)) != 0)
5932 return "power6";
5933 if ((flags & (ISA_2_4_MASKS & ~ISA_2_1_MASKS)) != 0)
5934 return "power5";
5935 if ((flags & ISA_2_1_MASKS) != 0)
5936 return "power4";
5937 if ((flags & OPTION_MASK_POWERPC64) != 0)
5938 return "ppc64";
5939 return "ppc";
5940 }
5941
5942 void
5943 emit_asm_machine (void)
5944 {
5945 fprintf (asm_out_file, "\t.machine %s\n", rs6000_machine);
5946 }
5947 #endif
5948
5949 /* Do anything needed at the start of the asm file. */
5950
5951 static void
5952 rs6000_file_start (void)
5953 {
5954 char buffer[80];
5955 const char *start = buffer;
5956 FILE *file = asm_out_file;
5957
5958 rs6000_default_cpu = TARGET_CPU_DEFAULT;
5959
5960 default_file_start ();
5961
5962 if (flag_verbose_asm)
5963 {
5964 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
5965
5966 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
5967 {
5968 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
5969 start = "";
5970 }
5971
5972 if (OPTION_SET_P (rs6000_cpu_index))
5973 {
5974 fprintf (file, "%s -mcpu=%s", start,
5975 processor_target_table[rs6000_cpu_index].name);
5976 start = "";
5977 }
5978
5979 if (OPTION_SET_P (rs6000_tune_index))
5980 {
5981 fprintf (file, "%s -mtune=%s", start,
5982 processor_target_table[rs6000_tune_index].name);
5983 start = "";
5984 }
5985
5986 if (PPC405_ERRATUM77)
5987 {
5988 fprintf (file, "%s PPC405CR_ERRATUM77", start);
5989 start = "";
5990 }
5991
5992 #ifdef USING_ELFOS_H
5993 switch (rs6000_sdata)
5994 {
5995 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
5996 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
5997 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
5998 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
5999 }
6000
6001 if (rs6000_sdata && g_switch_value)
6002 {
6003 fprintf (file, "%s -G %d", start,
6004 g_switch_value);
6005 start = "";
6006 }
6007 #endif
6008
6009 if (*start == '\0')
6010 putc ('\n', file);
6011 }
6012
6013 #ifdef USING_ELFOS_H
6014 rs6000_machine = rs6000_machine_from_flags ();
6015 emit_asm_machine ();
6016 #endif
6017
6018 if (DEFAULT_ABI == ABI_ELFv2)
6019 fprintf (file, "\t.abiversion 2\n");
6020 }
6021
6022 \f
6023 /* Return nonzero if this function is known to have a null epilogue. */
6024
6025 int
6026 direct_return (void)
6027 {
6028 if (reload_completed)
6029 {
6030 rs6000_stack_t *info = rs6000_stack_info ();
6031
6032 if (info->first_gp_reg_save == 32
6033 && info->first_fp_reg_save == 64
6034 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
6035 && ! info->lr_save_p
6036 && ! info->cr_save_p
6037 && info->vrsave_size == 0
6038 && ! info->push_p)
6039 return 1;
6040 }
6041
6042 return 0;
6043 }
6044
6045 /* Helper for num_insns_constant. Calculate number of instructions to
6046 load VALUE to a single gpr using combinations of addi, addis, ori,
6047 oris, sldi and rldimi instructions. */
6048
6049 static int
6050 num_insns_constant_gpr (HOST_WIDE_INT value)
6051 {
6052 /* signed constant loadable with addi */
6053 if (SIGNED_INTEGER_16BIT_P (value))
6054 return 1;
6055
6056 /* constant loadable with addis */
6057 else if ((value & 0xffff) == 0
6058 && (value >> 31 == -1 || value >> 31 == 0))
6059 return 1;
6060
6061 /* PADDI can support up to 34 bit signed integers. */
6062 else if (TARGET_PREFIXED && SIGNED_INTEGER_34BIT_P (value))
6063 return 1;
6064
6065 else if (TARGET_POWERPC64)
6066 {
6067 int num_insns = 0;
6068 rs6000_emit_set_long_const (nullptr, value, &num_insns);
6069 return num_insns;
6070 }
6071
6072 else
6073 return 2;
6074 }
6075
6076 /* Helper for num_insns_constant. Allow constants formed by the
6077 num_insns_constant_gpr sequences, plus li -1, rldicl/rldicr/rlwinm,
6078 and handle modes that require multiple gprs. */
6079
6080 static int
6081 num_insns_constant_multi (HOST_WIDE_INT value, machine_mode mode)
6082 {
6083 int nregs = (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6084 int total = 0;
6085 while (nregs-- > 0)
6086 {
6087 HOST_WIDE_INT low = sext_hwi (value, BITS_PER_WORD);
6088 int insns = num_insns_constant_gpr (low);
6089 if (insns > 2
6090 /* We won't get more than 2 from num_insns_constant_gpr
6091 except when TARGET_POWERPC64 and mode is DImode or
6092 wider, so the register mode must be DImode. */
6093 && rs6000_is_valid_and_mask (GEN_INT (low), DImode))
6094 insns = 2;
6095 total += insns;
6096 /* If BITS_PER_WORD is the number of bits in HOST_WIDE_INT, doing
6097 it all at once would be UB. */
6098 value >>= (BITS_PER_WORD - 1);
6099 value >>= 1;
6100 }
6101 return total;
6102 }
6103
6104 /* Return the number of instructions it takes to form a constant in as
6105 many gprs are needed for MODE. */
6106
6107 int
6108 num_insns_constant (rtx op, machine_mode mode)
6109 {
6110 HOST_WIDE_INT val;
6111
6112 switch (GET_CODE (op))
6113 {
6114 case CONST_INT:
6115 val = INTVAL (op);
6116 break;
6117
6118 case CONST_WIDE_INT:
6119 {
6120 int insns = 0;
6121 for (int i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
6122 insns += num_insns_constant_multi (CONST_WIDE_INT_ELT (op, i),
6123 DImode);
6124 return insns;
6125 }
6126
6127 case CONST_DOUBLE:
6128 {
6129 const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (op);
6130
6131 if (mode == SFmode || mode == SDmode)
6132 {
6133 long l;
6134
6135 if (mode == SDmode)
6136 REAL_VALUE_TO_TARGET_DECIMAL32 (*rv, l);
6137 else
6138 REAL_VALUE_TO_TARGET_SINGLE (*rv, l);
6139 /* See the first define_split in rs6000.md handling a
6140 const_double_operand. */
6141 val = l;
6142 mode = SImode;
6143 }
6144 else if (mode == DFmode || mode == DDmode)
6145 {
6146 long l[2];
6147
6148 if (mode == DDmode)
6149 REAL_VALUE_TO_TARGET_DECIMAL64 (*rv, l);
6150 else
6151 REAL_VALUE_TO_TARGET_DOUBLE (*rv, l);
6152
6153 /* See the second (32-bit) and third (64-bit) define_split
6154 in rs6000.md handling a const_double_operand. */
6155 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 1] << 32;
6156 val |= l[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffffUL;
6157 mode = DImode;
6158 }
6159 else if (mode == TFmode || mode == TDmode
6160 || mode == KFmode || mode == IFmode)
6161 {
6162 long l[4];
6163 int insns;
6164
6165 if (mode == TDmode)
6166 REAL_VALUE_TO_TARGET_DECIMAL128 (*rv, l);
6167 else
6168 REAL_VALUE_TO_TARGET_LONG_DOUBLE (*rv, l);
6169
6170 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 0 : 3] << 32;
6171 val |= l[WORDS_BIG_ENDIAN ? 1 : 2] & 0xffffffffUL;
6172 insns = num_insns_constant_multi (val, DImode);
6173 val = (unsigned HOST_WIDE_INT) l[WORDS_BIG_ENDIAN ? 2 : 1] << 32;
6174 val |= l[WORDS_BIG_ENDIAN ? 3 : 0] & 0xffffffffUL;
6175 insns += num_insns_constant_multi (val, DImode);
6176 return insns;
6177 }
6178 else
6179 gcc_unreachable ();
6180 }
6181 break;
6182
6183 default:
6184 gcc_unreachable ();
6185 }
6186
6187 return num_insns_constant_multi (val, mode);
6188 }
6189
6190 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
6191 If the mode of OP is MODE_VECTOR_INT, this simply returns the
6192 corresponding element of the vector, but for V4SFmode, the
6193 corresponding "float" is interpreted as an SImode integer. */
6194
6195 HOST_WIDE_INT
6196 const_vector_elt_as_int (rtx op, unsigned int elt)
6197 {
6198 rtx tmp;
6199
6200 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
6201 gcc_assert (GET_MODE (op) != V2DImode
6202 && GET_MODE (op) != V2DFmode);
6203
6204 tmp = CONST_VECTOR_ELT (op, elt);
6205 if (GET_MODE (op) == V4SFmode)
6206 tmp = gen_lowpart (SImode, tmp);
6207 return INTVAL (tmp);
6208 }
6209
6210 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
6211 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
6212 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
6213 all items are set to the same value and contain COPIES replicas of the
6214 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
6215 operand and the others are set to the value of the operand's msb. */
6216
6217 static bool
6218 vspltis_constant (rtx op, unsigned step, unsigned copies)
6219 {
6220 machine_mode mode = GET_MODE (op);
6221 machine_mode inner = GET_MODE_INNER (mode);
6222
6223 unsigned i;
6224 unsigned nunits;
6225 unsigned bitsize;
6226 unsigned mask;
6227
6228 HOST_WIDE_INT val;
6229 HOST_WIDE_INT splat_val;
6230 HOST_WIDE_INT msb_val;
6231
6232 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
6233 return false;
6234
6235 nunits = GET_MODE_NUNITS (mode);
6236 bitsize = GET_MODE_BITSIZE (inner);
6237 mask = GET_MODE_MASK (inner);
6238
6239 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6240 splat_val = val;
6241 msb_val = val >= 0 ? 0 : -1;
6242
6243 if (val == 0 && step > 1)
6244 {
6245 /* Special case for loading most significant bit with step > 1.
6246 In that case, match 0s in all but step-1s elements, where match
6247 EASY_VECTOR_MSB. */
6248 for (i = 1; i < nunits; ++i)
6249 {
6250 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
6251 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
6252 if ((i & (step - 1)) == step - 1)
6253 {
6254 if (!EASY_VECTOR_MSB (elt_val, inner))
6255 break;
6256 }
6257 else if (elt_val)
6258 break;
6259 }
6260 if (i == nunits)
6261 return true;
6262 }
6263
6264 /* Construct the value to be splatted, if possible. If not, return 0. */
6265 for (i = 2; i <= copies; i *= 2)
6266 {
6267 HOST_WIDE_INT small_val;
6268 bitsize /= 2;
6269 small_val = splat_val >> bitsize;
6270 mask >>= bitsize;
6271 if (splat_val != ((HOST_WIDE_INT)
6272 ((unsigned HOST_WIDE_INT) small_val << bitsize)
6273 | (small_val & mask)))
6274 return false;
6275 splat_val = small_val;
6276 inner = smallest_int_mode_for_size (bitsize);
6277 }
6278
6279 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
6280 if (EASY_VECTOR_15 (splat_val))
6281 ;
6282
6283 /* Also check if we can splat, and then add the result to itself. Do so if
6284 the value is positive, of if the splat instruction is using OP's mode;
6285 for splat_val < 0, the splat and the add should use the same mode. */
6286 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
6287 && (splat_val >= 0 || (step == 1 && copies == 1)))
6288 ;
6289
6290 /* Also check if are loading up the most significant bit which can be done by
6291 loading up -1 and shifting the value left by -1. Only do this for
6292 step 1 here, for larger steps it is done earlier. */
6293 else if (EASY_VECTOR_MSB (splat_val, inner) && step == 1)
6294 ;
6295
6296 else
6297 return false;
6298
6299 /* Check if VAL is present in every STEP-th element, and the
6300 other elements are filled with its most significant bit. */
6301 for (i = 1; i < nunits; ++i)
6302 {
6303 HOST_WIDE_INT desired_val;
6304 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
6305 if ((i & (step - 1)) == 0)
6306 desired_val = val;
6307 else
6308 desired_val = msb_val;
6309
6310 if (desired_val != const_vector_elt_as_int (op, elt))
6311 return false;
6312 }
6313
6314 return true;
6315 }
6316
6317 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
6318 instruction, filling in the bottom elements with 0 or -1.
6319
6320 Return 0 if the constant cannot be generated with VSLDOI. Return positive
6321 for the number of zeroes to shift in, or negative for the number of 0xff
6322 bytes to shift in.
6323
6324 OP is a CONST_VECTOR. */
6325
6326 int
6327 vspltis_shifted (rtx op)
6328 {
6329 machine_mode mode = GET_MODE (op);
6330 machine_mode inner = GET_MODE_INNER (mode);
6331
6332 unsigned i, j;
6333 unsigned nunits;
6334 unsigned mask;
6335
6336 HOST_WIDE_INT val;
6337
6338 if (mode != V16QImode && mode != V8HImode && mode != V4SImode)
6339 return false;
6340
6341 /* We need to create pseudo registers to do the shift, so don't recognize
6342 shift vector constants after reload. Don't match it even before RA
6343 after split1 is done, because there won't be further splitting pass
6344 before RA to do the splitting. */
6345 if (!can_create_pseudo_p ()
6346 || (cfun->curr_properties & PROP_rtl_split_insns))
6347 return false;
6348
6349 nunits = GET_MODE_NUNITS (mode);
6350 mask = GET_MODE_MASK (inner);
6351
6352 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? 0 : nunits - 1);
6353
6354 /* Check if the value can really be the operand of a vspltis[bhw]. */
6355 if (EASY_VECTOR_15 (val))
6356 ;
6357
6358 /* Also check if we are loading up the most significant bit which can be done
6359 by loading up -1 and shifting the value left by -1. */
6360 else if (EASY_VECTOR_MSB (val, inner))
6361 ;
6362
6363 else
6364 return 0;
6365
6366 /* Check if VAL is present in every STEP-th element until we find elements
6367 that are 0 or all 1 bits. */
6368 for (i = 1; i < nunits; ++i)
6369 {
6370 unsigned elt = BYTES_BIG_ENDIAN ? i : nunits - 1 - i;
6371 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
6372
6373 /* If the value isn't the splat value, check for the remaining elements
6374 being 0/-1. */
6375 if (val != elt_val)
6376 {
6377 if (elt_val == 0)
6378 {
6379 for (j = i+1; j < nunits; ++j)
6380 {
6381 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6382 if (const_vector_elt_as_int (op, elt2) != 0)
6383 return 0;
6384 }
6385
6386 return (nunits - i) * GET_MODE_SIZE (inner);
6387 }
6388
6389 else if ((elt_val & mask) == mask)
6390 {
6391 for (j = i+1; j < nunits; ++j)
6392 {
6393 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
6394 if ((const_vector_elt_as_int (op, elt2) & mask) != mask)
6395 return 0;
6396 }
6397
6398 return -((nunits - i) * GET_MODE_SIZE (inner));
6399 }
6400
6401 else
6402 return 0;
6403 }
6404 }
6405
6406 /* If all elements are equal, we don't need to do VSLDOI. */
6407 return 0;
6408 }
6409
6410
6411 /* Return non-zero (element mode byte size) if OP is of the given MODE
6412 and can be synthesized with a vspltisb, vspltish or vspltisw. */
6413
6414 int
6415 easy_altivec_constant (rtx op, machine_mode mode)
6416 {
6417 unsigned step, copies;
6418
6419 if (mode == VOIDmode)
6420 mode = GET_MODE (op);
6421 else if (mode != GET_MODE (op))
6422 return 0;
6423
6424 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
6425 constants. */
6426 if (mode == V2DFmode)
6427 return zero_constant (op, mode) ? 8 : 0;
6428
6429 else if (mode == V2DImode)
6430 {
6431 if (!CONST_INT_P (CONST_VECTOR_ELT (op, 0))
6432 || !CONST_INT_P (CONST_VECTOR_ELT (op, 1)))
6433 return 0;
6434
6435 if (zero_constant (op, mode))
6436 return 8;
6437
6438 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
6439 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
6440 return 8;
6441
6442 return 0;
6443 }
6444
6445 /* V1TImode is a special container for TImode. Ignore for now. */
6446 else if (mode == V1TImode)
6447 return 0;
6448
6449 /* Start with a vspltisw. */
6450 step = GET_MODE_NUNITS (mode) / 4;
6451 copies = 1;
6452
6453 if (vspltis_constant (op, step, copies))
6454 return 4;
6455
6456 /* Then try with a vspltish. */
6457 if (step == 1)
6458 copies <<= 1;
6459 else
6460 step >>= 1;
6461
6462 if (vspltis_constant (op, step, copies))
6463 return 2;
6464
6465 /* And finally a vspltisb. */
6466 if (step == 1)
6467 copies <<= 1;
6468 else
6469 step >>= 1;
6470
6471 if (vspltis_constant (op, step, copies))
6472 return 1;
6473
6474 if (vspltis_shifted (op) != 0)
6475 return GET_MODE_SIZE (GET_MODE_INNER (mode));
6476
6477 return 0;
6478 }
6479
6480 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
6481 result is OP. Abort if it is not possible. */
6482
6483 rtx
6484 gen_easy_altivec_constant (rtx op)
6485 {
6486 machine_mode mode = GET_MODE (op);
6487 int nunits = GET_MODE_NUNITS (mode);
6488 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
6489 unsigned step = nunits / 4;
6490 unsigned copies = 1;
6491
6492 /* Start with a vspltisw. */
6493 if (vspltis_constant (op, step, copies))
6494 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
6495
6496 /* Then try with a vspltish. */
6497 if (step == 1)
6498 copies <<= 1;
6499 else
6500 step >>= 1;
6501
6502 if (vspltis_constant (op, step, copies))
6503 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
6504
6505 /* And finally a vspltisb. */
6506 if (step == 1)
6507 copies <<= 1;
6508 else
6509 step >>= 1;
6510
6511 if (vspltis_constant (op, step, copies))
6512 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
6513
6514 gcc_unreachable ();
6515 }
6516
6517 /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
6518 instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
6519
6520 Return the number of instructions needed (1 or 2) into the address pointed
6521 via NUM_INSNS_PTR.
6522
6523 Return the constant that is being split via CONSTANT_PTR. */
6524
6525 bool
6526 xxspltib_constant_p (rtx op,
6527 machine_mode mode,
6528 int *num_insns_ptr,
6529 int *constant_ptr)
6530 {
6531 size_t nunits = GET_MODE_NUNITS (mode);
6532 size_t i;
6533 HOST_WIDE_INT value;
6534 rtx element;
6535
6536 /* Set the returned values to out of bound values. */
6537 *num_insns_ptr = -1;
6538 *constant_ptr = 256;
6539
6540 if (!TARGET_P9_VECTOR)
6541 return false;
6542
6543 if (mode == VOIDmode)
6544 mode = GET_MODE (op);
6545
6546 else if (mode != GET_MODE (op) && GET_MODE (op) != VOIDmode)
6547 return false;
6548
6549 /* Handle (vec_duplicate <constant>). */
6550 if (GET_CODE (op) == VEC_DUPLICATE)
6551 {
6552 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6553 && mode != V2DImode)
6554 return false;
6555
6556 element = XEXP (op, 0);
6557 if (!CONST_INT_P (element))
6558 return false;
6559
6560 value = INTVAL (element);
6561 if (!IN_RANGE (value, -128, 127))
6562 return false;
6563 }
6564
6565 /* Handle (const_vector [...]). */
6566 else if (GET_CODE (op) == CONST_VECTOR)
6567 {
6568 if (mode != V16QImode && mode != V8HImode && mode != V4SImode
6569 && mode != V2DImode)
6570 return false;
6571
6572 element = CONST_VECTOR_ELT (op, 0);
6573 if (!CONST_INT_P (element))
6574 return false;
6575
6576 value = INTVAL (element);
6577 if (!IN_RANGE (value, -128, 127))
6578 return false;
6579
6580 for (i = 1; i < nunits; i++)
6581 {
6582 element = CONST_VECTOR_ELT (op, i);
6583 if (!CONST_INT_P (element))
6584 return false;
6585
6586 if (value != INTVAL (element))
6587 return false;
6588 }
6589 }
6590
6591 /* Handle integer constants being loaded into the upper part of the VSX
6592 register as a scalar. If the value isn't 0/-1, only allow it if the mode
6593 can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */
6594 else if (CONST_INT_P (op))
6595 {
6596 if (!SCALAR_INT_MODE_P (mode))
6597 return false;
6598
6599 value = INTVAL (op);
6600 if (!IN_RANGE (value, -128, 127))
6601 return false;
6602
6603 if (!IN_RANGE (value, -1, 0))
6604 {
6605 if (!(reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID))
6606 return false;
6607
6608 if (EASY_VECTOR_15 (value))
6609 return false;
6610 }
6611 }
6612
6613 else
6614 return false;
6615
6616 /* See if we could generate vspltisw/vspltish directly instead of xxspltib +
6617 sign extend. Special case 0/-1 to allow getting any VSX register instead
6618 of an Altivec register. */
6619 if ((mode == V4SImode || mode == V8HImode) && !IN_RANGE (value, -1, 0)
6620 && EASY_VECTOR_15 (value))
6621 return false;
6622
6623 /* Return # of instructions and the constant byte for XXSPLTIB. */
6624 if (mode == V16QImode)
6625 *num_insns_ptr = 1;
6626
6627 else if (IN_RANGE (value, -1, 0))
6628 *num_insns_ptr = 1;
6629
6630 /* Do not generate XXSPLTIB and a sign extend operation if we can generate a
6631 single XXSPLTIW or XXSPLTIDP instruction. */
6632 else if (vsx_prefixed_constant (op, mode))
6633 return false;
6634
6635 /* Return XXSPLITB followed by a sign extend operation to convert the
6636 constant to V8HImode or V4SImode. */
6637 else
6638 *num_insns_ptr = 2;
6639
6640 *constant_ptr = (int) value;
6641 return true;
6642 }
6643
6644 /* Return true if OP mode is V2DI and can be synthesized with ISA 2.07
6645 instructions vupkhsw and vspltisw.
6646
6647 Return the constant that is being split via CONSTANT_PTR. */
6648
6649 bool
6650 vspltisw_vupkhsw_constant_p (rtx op, machine_mode mode, int *constant_ptr)
6651 {
6652 HOST_WIDE_INT value;
6653 rtx elt;
6654
6655 if (!TARGET_P8_VECTOR)
6656 return false;
6657
6658 if (mode != V2DImode)
6659 return false;
6660
6661 if (!const_vec_duplicate_p (op, &elt))
6662 return false;
6663
6664 value = INTVAL (elt);
6665 if (value == 0 || value == 1
6666 || !EASY_VECTOR_15 (value))
6667 return false;
6668
6669 if (constant_ptr)
6670 *constant_ptr = (int) value;
6671 return true;
6672 }
6673
6674 const char *
6675 output_vec_const_move (rtx *operands)
6676 {
6677 int shift;
6678 machine_mode mode;
6679 rtx dest, vec;
6680
6681 dest = operands[0];
6682 vec = operands[1];
6683 mode = GET_MODE (dest);
6684
6685 if (TARGET_VSX)
6686 {
6687 bool dest_vmx_p = ALTIVEC_REGNO_P (REGNO (dest));
6688 int xxspltib_value = 256;
6689 int num_insns = -1;
6690
6691 if (zero_constant (vec, mode))
6692 {
6693 if (TARGET_P9_VECTOR)
6694 return "xxspltib %x0,0";
6695
6696 else if (dest_vmx_p)
6697 return "vspltisw %0,0";
6698
6699 else
6700 return "xxlxor %x0,%x0,%x0";
6701 }
6702
6703 if (all_ones_constant (vec, mode))
6704 {
6705 if (TARGET_P9_VECTOR)
6706 return "xxspltib %x0,255";
6707
6708 else if (dest_vmx_p)
6709 return "vspltisw %0,-1";
6710
6711 else if (TARGET_P8_VECTOR)
6712 return "xxlorc %x0,%x0,%x0";
6713
6714 else
6715 gcc_unreachable ();
6716 }
6717
6718 vec_const_128bit_type vsx_const;
6719 if (TARGET_POWER10 && vec_const_128bit_to_bytes (vec, mode, &vsx_const))
6720 {
6721 unsigned imm = constant_generates_lxvkq (&vsx_const);
6722 if (imm)
6723 {
6724 operands[2] = GEN_INT (imm);
6725 return "lxvkq %x0,%2";
6726 }
6727
6728 imm = constant_generates_xxspltiw (&vsx_const);
6729 if (imm)
6730 {
6731 operands[2] = GEN_INT (imm);
6732 return "xxspltiw %x0,%2";
6733 }
6734
6735 imm = constant_generates_xxspltidp (&vsx_const);
6736 if (imm)
6737 {
6738 operands[2] = GEN_INT (imm);
6739 return "xxspltidp %x0,%2";
6740 }
6741 }
6742
6743 if (TARGET_P9_VECTOR
6744 && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
6745 {
6746 if (num_insns == 1)
6747 {
6748 operands[2] = GEN_INT (xxspltib_value & 0xff);
6749 return "xxspltib %x0,%2";
6750 }
6751
6752 return "#";
6753 }
6754 }
6755
6756 if (TARGET_ALTIVEC)
6757 {
6758 rtx splat_vec;
6759
6760 gcc_assert (ALTIVEC_REGNO_P (REGNO (dest)));
6761 if (zero_constant (vec, mode))
6762 return "vspltisw %0,0";
6763
6764 if (all_ones_constant (vec, mode))
6765 return "vspltisw %0,-1";
6766
6767 /* Do we need to construct a value using VSLDOI? */
6768 shift = vspltis_shifted (vec);
6769 if (shift != 0)
6770 return "#";
6771
6772 splat_vec = gen_easy_altivec_constant (vec);
6773 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
6774 operands[1] = XEXP (splat_vec, 0);
6775 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
6776 return "#";
6777
6778 switch (GET_MODE (splat_vec))
6779 {
6780 case E_V4SImode:
6781 return "vspltisw %0,%1";
6782
6783 case E_V8HImode:
6784 return "vspltish %0,%1";
6785
6786 case E_V16QImode:
6787 return "vspltisb %0,%1";
6788
6789 default:
6790 gcc_unreachable ();
6791 }
6792 }
6793
6794 gcc_unreachable ();
6795 }
6796
6797 /* Initialize vector TARGET to VALS. */
6798
6799 void
6800 rs6000_expand_vector_init (rtx target, rtx vals)
6801 {
6802 machine_mode mode = GET_MODE (target);
6803 machine_mode inner_mode = GET_MODE_INNER (mode);
6804 unsigned int n_elts = GET_MODE_NUNITS (mode);
6805 int n_var = 0, one_var = -1;
6806 bool all_same = true, all_const_zero = true;
6807 rtx x, mem;
6808 unsigned int i;
6809
6810 for (i = 0; i < n_elts; ++i)
6811 {
6812 x = XVECEXP (vals, 0, i);
6813 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
6814 ++n_var, one_var = i;
6815 else if (x != CONST0_RTX (inner_mode))
6816 all_const_zero = false;
6817
6818 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6819 all_same = false;
6820 }
6821
6822 if (n_var == 0)
6823 {
6824 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6825 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
6826 if ((int_vector_p || TARGET_VSX) && all_const_zero)
6827 {
6828 /* Zero register. */
6829 emit_move_insn (target, CONST0_RTX (mode));
6830 return;
6831 }
6832 else if (int_vector_p && easy_vector_constant (const_vec, mode))
6833 {
6834 /* Splat immediate. */
6835 emit_insn (gen_rtx_SET (target, const_vec));
6836 return;
6837 }
6838 else
6839 {
6840 /* Load from constant pool. */
6841 emit_move_insn (target, const_vec);
6842 return;
6843 }
6844 }
6845
6846 /* Double word values on VSX can use xxpermdi or lxvdsx. */
6847 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
6848 {
6849 rtx op[2];
6850 size_t i;
6851 size_t num_elements = all_same ? 1 : 2;
6852 for (i = 0; i < num_elements; i++)
6853 {
6854 op[i] = XVECEXP (vals, 0, i);
6855 /* Just in case there is a SUBREG with a smaller mode, do a
6856 conversion. */
6857 if (GET_MODE (op[i]) != inner_mode)
6858 {
6859 rtx tmp = gen_reg_rtx (inner_mode);
6860 convert_move (tmp, op[i], 0);
6861 op[i] = tmp;
6862 }
6863 /* Allow load with splat double word. */
6864 else if (MEM_P (op[i]))
6865 {
6866 if (!all_same)
6867 op[i] = force_reg (inner_mode, op[i]);
6868 }
6869 else if (!REG_P (op[i]))
6870 op[i] = force_reg (inner_mode, op[i]);
6871 }
6872
6873 if (all_same)
6874 {
6875 if (mode == V2DFmode)
6876 emit_insn (gen_vsx_splat_v2df (target, op[0]));
6877 else
6878 emit_insn (gen_vsx_splat_v2di (target, op[0]));
6879 }
6880 else
6881 {
6882 if (mode == V2DFmode)
6883 emit_insn (gen_vsx_concat_v2df (target, op[0], op[1]));
6884 else
6885 emit_insn (gen_vsx_concat_v2di (target, op[0], op[1]));
6886 }
6887 return;
6888 }
6889
6890 /* Special case initializing vector int if we are on 64-bit systems with
6891 direct move or we have the ISA 3.0 instructions. */
6892 if (mode == V4SImode && VECTOR_MEM_VSX_P (V4SImode)
6893 && TARGET_DIRECT_MOVE_64BIT)
6894 {
6895 if (all_same)
6896 {
6897 rtx element0 = XVECEXP (vals, 0, 0);
6898 if (MEM_P (element0))
6899 element0 = rs6000_force_indexed_or_indirect_mem (element0);
6900 else
6901 element0 = force_reg (SImode, element0);
6902
6903 if (TARGET_P9_VECTOR)
6904 emit_insn (gen_vsx_splat_v4si (target, element0));
6905 else
6906 {
6907 rtx tmp = gen_reg_rtx (DImode);
6908 emit_insn (gen_zero_extendsidi2 (tmp, element0));
6909 emit_insn (gen_vsx_splat_v4si_di (target, tmp));
6910 }
6911 return;
6912 }
6913 else
6914 {
6915 rtx elements[4];
6916 size_t i;
6917
6918 for (i = 0; i < 4; i++)
6919 elements[i] = force_reg (SImode, XVECEXP (vals, 0, i));
6920
6921 emit_insn (gen_vsx_init_v4si (target, elements[0], elements[1],
6922 elements[2], elements[3]));
6923 return;
6924 }
6925 }
6926
6927 /* With single precision floating point on VSX, know that internally single
6928 precision is actually represented as a double, and either make 2 V2DF
6929 vectors, and convert these vectors to single precision, or do one
6930 conversion, and splat the result to the other elements. */
6931 if (mode == V4SFmode && VECTOR_MEM_VSX_P (V4SFmode))
6932 {
6933 if (all_same)
6934 {
6935 rtx element0 = XVECEXP (vals, 0, 0);
6936
6937 if (TARGET_P9_VECTOR)
6938 {
6939 if (MEM_P (element0))
6940 element0 = rs6000_force_indexed_or_indirect_mem (element0);
6941
6942 emit_insn (gen_vsx_splat_v4sf (target, element0));
6943 }
6944
6945 else
6946 {
6947 rtx freg = gen_reg_rtx (V4SFmode);
6948 rtx sreg = force_reg (SFmode, element0);
6949 rtx cvt = (TARGET_XSCVDPSPN
6950 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
6951 : gen_vsx_xscvdpsp_scalar (freg, sreg));
6952
6953 emit_insn (cvt);
6954 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg,
6955 const0_rtx));
6956 }
6957 }
6958 else
6959 {
6960 if (TARGET_P8_VECTOR && TARGET_POWERPC64)
6961 {
6962 rtx tmp_sf[4];
6963 rtx tmp_si[4];
6964 rtx tmp_di[4];
6965 rtx mrg_di[4];
6966 for (i = 0; i < 4; i++)
6967 {
6968 tmp_si[i] = gen_reg_rtx (SImode);
6969 tmp_di[i] = gen_reg_rtx (DImode);
6970 mrg_di[i] = gen_reg_rtx (DImode);
6971 tmp_sf[i] = force_reg (SFmode, XVECEXP (vals, 0, i));
6972 emit_insn (gen_movsi_from_sf (tmp_si[i], tmp_sf[i]));
6973 emit_insn (gen_zero_extendsidi2 (tmp_di[i], tmp_si[i]));
6974 }
6975
6976 if (!BYTES_BIG_ENDIAN)
6977 {
6978 std::swap (tmp_di[0], tmp_di[1]);
6979 std::swap (tmp_di[2], tmp_di[3]);
6980 }
6981
6982 emit_insn (gen_ashldi3 (mrg_di[0], tmp_di[0], GEN_INT (32)));
6983 emit_insn (gen_iordi3 (mrg_di[1], mrg_di[0], tmp_di[1]));
6984 emit_insn (gen_ashldi3 (mrg_di[2], tmp_di[2], GEN_INT (32)));
6985 emit_insn (gen_iordi3 (mrg_di[3], mrg_di[2], tmp_di[3]));
6986
6987 rtx tmp_v2di = gen_reg_rtx (V2DImode);
6988 emit_insn (gen_vsx_concat_v2di (tmp_v2di, mrg_di[1], mrg_di[3]));
6989 emit_move_insn (target, gen_lowpart (V4SFmode, tmp_v2di));
6990 }
6991 else
6992 {
6993 rtx dbl_even = gen_reg_rtx (V2DFmode);
6994 rtx dbl_odd = gen_reg_rtx (V2DFmode);
6995 rtx flt_even = gen_reg_rtx (V4SFmode);
6996 rtx flt_odd = gen_reg_rtx (V4SFmode);
6997 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
6998 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
6999 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
7000 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
7001
7002 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
7003 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
7004 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
7005 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
7006 rs6000_expand_extract_even (target, flt_even, flt_odd);
7007 }
7008 }
7009 return;
7010 }
7011
7012 /* Special case initializing vector short/char that are splats if we are on
7013 64-bit systems with direct move. */
7014 if (all_same && TARGET_DIRECT_MOVE_64BIT
7015 && (mode == V16QImode || mode == V8HImode))
7016 {
7017 rtx op0 = XVECEXP (vals, 0, 0);
7018 rtx di_tmp = gen_reg_rtx (DImode);
7019
7020 if (!REG_P (op0))
7021 op0 = force_reg (GET_MODE_INNER (mode), op0);
7022
7023 if (mode == V16QImode)
7024 {
7025 emit_insn (gen_zero_extendqidi2 (di_tmp, op0));
7026 emit_insn (gen_vsx_vspltb_di (target, di_tmp));
7027 return;
7028 }
7029
7030 if (mode == V8HImode)
7031 {
7032 emit_insn (gen_zero_extendhidi2 (di_tmp, op0));
7033 emit_insn (gen_vsx_vsplth_di (target, di_tmp));
7034 return;
7035 }
7036 }
7037
7038 /* Store value to stack temp. Load vector element. Splat. However, splat
7039 of 64-bit items is not supported on Altivec. */
7040 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
7041 {
7042 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
7043 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
7044 XVECEXP (vals, 0, 0));
7045 x = gen_rtx_UNSPEC (VOIDmode,
7046 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
7047 emit_insn (gen_rtx_PARALLEL (VOIDmode,
7048 gen_rtvec (2,
7049 gen_rtx_SET (target, mem),
7050 x)));
7051 x = gen_rtx_VEC_SELECT (inner_mode, target,
7052 gen_rtx_PARALLEL (VOIDmode,
7053 gen_rtvec (1, const0_rtx)));
7054 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
7055 return;
7056 }
7057
7058 /* One field is non-constant. Load constant then overwrite
7059 varying field. */
7060 if (n_var == 1)
7061 {
7062 rtx copy = copy_rtx (vals);
7063
7064 /* Load constant part of vector, substitute neighboring value for
7065 varying element. */
7066 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
7067 rs6000_expand_vector_init (target, copy);
7068
7069 /* Insert variable. */
7070 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var),
7071 GEN_INT (one_var));
7072 return;
7073 }
7074
7075 if (TARGET_DIRECT_MOVE && (mode == V16QImode || mode == V8HImode))
7076 {
7077 rtx op[16];
7078 /* Force the values into word_mode registers. */
7079 for (i = 0; i < n_elts; i++)
7080 {
7081 rtx tmp = force_reg (inner_mode, XVECEXP (vals, 0, i));
7082 machine_mode tmode = TARGET_POWERPC64 ? DImode : SImode;
7083 op[i] = simplify_gen_subreg (tmode, tmp, inner_mode, 0);
7084 }
7085
7086 /* Take unsigned char big endianness on 64bit as example for below
7087 construction, the input values are: A, B, C, D, ..., O, P. */
7088
7089 if (TARGET_DIRECT_MOVE_128)
7090 {
7091 /* Move to VSX register with vec_concat, each has 2 values.
7092 eg: vr1[0] = { xxxxxxxA, xxxxxxxB };
7093 vr1[1] = { xxxxxxxC, xxxxxxxD };
7094 ...
7095 vr1[7] = { xxxxxxxO, xxxxxxxP }; */
7096 rtx vr1[8];
7097 for (i = 0; i < n_elts / 2; i++)
7098 {
7099 vr1[i] = gen_reg_rtx (V2DImode);
7100 emit_insn (gen_vsx_concat_v2di (vr1[i], op[i * 2],
7101 op[i * 2 + 1]));
7102 }
7103
7104 /* Pack vectors with 2 values into vectors with 4 values.
7105 eg: vr2[0] = { xxxAxxxB, xxxCxxxD };
7106 vr2[1] = { xxxExxxF, xxxGxxxH };
7107 vr2[1] = { xxxIxxxJ, xxxKxxxL };
7108 vr2[3] = { xxxMxxxN, xxxOxxxP }; */
7109 rtx vr2[4];
7110 for (i = 0; i < n_elts / 4; i++)
7111 {
7112 vr2[i] = gen_reg_rtx (V4SImode);
7113 emit_insn (gen_altivec_vpkudum (vr2[i], vr1[i * 2],
7114 vr1[i * 2 + 1]));
7115 }
7116
7117 /* Pack vectors with 4 values into vectors with 8 values.
7118 eg: vr3[0] = { xAxBxCxD, xExFxGxH };
7119 vr3[1] = { xIxJxKxL, xMxNxOxP }; */
7120 rtx vr3[2];
7121 for (i = 0; i < n_elts / 8; i++)
7122 {
7123 vr3[i] = gen_reg_rtx (V8HImode);
7124 emit_insn (gen_altivec_vpkuwum (vr3[i], vr2[i * 2],
7125 vr2[i * 2 + 1]));
7126 }
7127
7128 /* If it's V8HImode, it's done and return it. */
7129 if (mode == V8HImode)
7130 {
7131 emit_insn (gen_rtx_SET (target, vr3[0]));
7132 return;
7133 }
7134
7135 /* Pack vectors with 8 values into 16 values. */
7136 rtx res = gen_reg_rtx (V16QImode);
7137 emit_insn (gen_altivec_vpkuhum (res, vr3[0], vr3[1]));
7138 emit_insn (gen_rtx_SET (target, res));
7139 }
7140 else
7141 {
7142 rtx (*merge_v16qi) (rtx, rtx, rtx) = NULL;
7143 rtx (*merge_v8hi) (rtx, rtx, rtx) = NULL;
7144 rtx (*merge_v4si) (rtx, rtx, rtx) = NULL;
7145 rtx perm_idx;
7146
7147 /* Set up some common gen routines and values. */
7148 if (BYTES_BIG_ENDIAN)
7149 {
7150 if (mode == V16QImode)
7151 {
7152 merge_v16qi = gen_altivec_vmrghb;
7153 merge_v8hi = gen_altivec_vmrglh;
7154 }
7155 else
7156 merge_v8hi = gen_altivec_vmrghh;
7157
7158 merge_v4si = gen_altivec_vmrglw;
7159 perm_idx = GEN_INT (3);
7160 }
7161 else
7162 {
7163 if (mode == V16QImode)
7164 {
7165 merge_v16qi = gen_altivec_vmrglb;
7166 merge_v8hi = gen_altivec_vmrghh;
7167 }
7168 else
7169 merge_v8hi = gen_altivec_vmrglh;
7170
7171 merge_v4si = gen_altivec_vmrghw;
7172 perm_idx = GEN_INT (0);
7173 }
7174
7175 /* Move to VSX register with direct move.
7176 eg: vr_qi[0] = { xxxxxxxA, xxxxxxxx };
7177 vr_qi[1] = { xxxxxxxB, xxxxxxxx };
7178 ...
7179 vr_qi[15] = { xxxxxxxP, xxxxxxxx }; */
7180 rtx vr_qi[16];
7181 for (i = 0; i < n_elts; i++)
7182 {
7183 vr_qi[i] = gen_reg_rtx (V16QImode);
7184 if (TARGET_POWERPC64)
7185 emit_insn (gen_p8_mtvsrd_v16qidi2 (vr_qi[i], op[i]));
7186 else
7187 emit_insn (gen_p8_mtvsrwz_v16qisi2 (vr_qi[i], op[i]));
7188 }
7189
7190 /* Merge/move to vector short.
7191 eg: vr_hi[0] = { xxxxxxxx, xxxxxxAB };
7192 vr_hi[1] = { xxxxxxxx, xxxxxxCD };
7193 ...
7194 vr_hi[7] = { xxxxxxxx, xxxxxxOP }; */
7195 rtx vr_hi[8];
7196 for (i = 0; i < 8; i++)
7197 {
7198 rtx tmp = vr_qi[i];
7199 if (mode == V16QImode)
7200 {
7201 tmp = gen_reg_rtx (V16QImode);
7202 emit_insn (merge_v16qi (tmp, vr_qi[2 * i], vr_qi[2 * i + 1]));
7203 }
7204 vr_hi[i] = gen_reg_rtx (V8HImode);
7205 emit_move_insn (vr_hi[i], gen_lowpart (V8HImode, tmp));
7206 }
7207
7208 /* Merge vector short to vector int.
7209 eg: vr_si[0] = { xxxxxxxx, xxxxABCD };
7210 vr_si[1] = { xxxxxxxx, xxxxEFGH };
7211 ...
7212 vr_si[3] = { xxxxxxxx, xxxxMNOP }; */
7213 rtx vr_si[4];
7214 for (i = 0; i < 4; i++)
7215 {
7216 rtx tmp = gen_reg_rtx (V8HImode);
7217 emit_insn (merge_v8hi (tmp, vr_hi[2 * i], vr_hi[2 * i + 1]));
7218 vr_si[i] = gen_reg_rtx (V4SImode);
7219 emit_move_insn (vr_si[i], gen_lowpart (V4SImode, tmp));
7220 }
7221
7222 /* Merge vector int to vector long.
7223 eg: vr_di[0] = { xxxxxxxx, ABCDEFGH };
7224 vr_di[1] = { xxxxxxxx, IJKLMNOP }; */
7225 rtx vr_di[2];
7226 for (i = 0; i < 2; i++)
7227 {
7228 rtx tmp = gen_reg_rtx (V4SImode);
7229 emit_insn (merge_v4si (tmp, vr_si[2 * i], vr_si[2 * i + 1]));
7230 vr_di[i] = gen_reg_rtx (V2DImode);
7231 emit_move_insn (vr_di[i], gen_lowpart (V2DImode, tmp));
7232 }
7233
7234 rtx res = gen_reg_rtx (V2DImode);
7235 emit_insn (gen_vsx_xxpermdi_v2di (res, vr_di[0], vr_di[1], perm_idx));
7236 emit_insn (gen_rtx_SET (target, gen_lowpart (mode, res)));
7237 }
7238
7239 return;
7240 }
7241
7242 /* Construct the vector in memory one field at a time
7243 and load the whole vector. */
7244 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7245 for (i = 0; i < n_elts; i++)
7246 emit_move_insn (adjust_address_nv (mem, inner_mode,
7247 i * GET_MODE_SIZE (inner_mode)),
7248 XVECEXP (vals, 0, i));
7249 emit_move_insn (target, mem);
7250 }
7251
7252 /* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX
7253 is variable and also counts by vector element size for p9 and above. */
7254
7255 static void
7256 rs6000_expand_vector_set_var_p9 (rtx target, rtx val, rtx idx)
7257 {
7258 machine_mode mode = GET_MODE (target);
7259
7260 gcc_assert (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (idx));
7261
7262 machine_mode inner_mode = GET_MODE (val);
7263
7264 int width = GET_MODE_SIZE (inner_mode);
7265
7266 gcc_assert (width >= 1 && width <= 8);
7267
7268 int shift = exact_log2 (width);
7269
7270 machine_mode idx_mode = GET_MODE (idx);
7271
7272 machine_mode shift_mode;
7273 /* Gen function pointers for shifting left and generation of permutation
7274 control vectors. */
7275 rtx (*gen_ashl) (rtx, rtx, rtx);
7276 rtx (*gen_pcvr1) (rtx, rtx);
7277 rtx (*gen_pcvr2) (rtx, rtx);
7278
7279 if (TARGET_POWERPC64)
7280 {
7281 shift_mode = DImode;
7282 gen_ashl = gen_ashldi3;
7283 gen_pcvr1 = BYTES_BIG_ENDIAN ? gen_altivec_lvsl_reg_di
7284 : gen_altivec_lvsr_reg_di;
7285 gen_pcvr2 = BYTES_BIG_ENDIAN ? gen_altivec_lvsr_reg_di
7286 : gen_altivec_lvsl_reg_di;
7287 }
7288 else
7289 {
7290 shift_mode = SImode;
7291 gen_ashl = gen_ashlsi3;
7292 gen_pcvr1 = BYTES_BIG_ENDIAN ? gen_altivec_lvsl_reg_si
7293 : gen_altivec_lvsr_reg_si;
7294 gen_pcvr2 = BYTES_BIG_ENDIAN ? gen_altivec_lvsr_reg_si
7295 : gen_altivec_lvsl_reg_si;
7296 }
7297 /* Generate the IDX for permute shift, width is the vector element size.
7298 idx = idx * width. */
7299 rtx tmp = gen_reg_rtx (shift_mode);
7300 idx = convert_modes (shift_mode, idx_mode, idx, 1);
7301
7302 emit_insn (gen_ashl (tmp, idx, GEN_INT (shift)));
7303
7304 /* Generate one permutation control vector used for rotating the element
7305 at to-insert position to element zero in target vector. lvsl is
7306 used for big endianness while lvsr is used for little endianness:
7307 lvs[lr] v1,0,idx. */
7308 rtx pcvr1 = gen_reg_rtx (V16QImode);
7309 emit_insn (gen_pcvr1 (pcvr1, tmp));
7310
7311 rtx sub_target = simplify_gen_subreg (V16QImode, target, mode, 0);
7312 rtx perm1 = gen_altivec_vperm_v8hiv16qi (sub_target, sub_target, sub_target,
7313 pcvr1);
7314 emit_insn (perm1);
7315
7316 /* Insert val into element 0 of target vector. */
7317 rs6000_expand_vector_set (target, val, const0_rtx);
7318
7319 /* Rotate back with a reversed permutation control vector generated from:
7320 lvs[rl] v2,0,idx. */
7321 rtx pcvr2 = gen_reg_rtx (V16QImode);
7322 emit_insn (gen_pcvr2 (pcvr2, tmp));
7323
7324 rtx perm2 = gen_altivec_vperm_v8hiv16qi (sub_target, sub_target, sub_target,
7325 pcvr2);
7326 emit_insn (perm2);
7327 }
7328
7329 /* Insert VAL into IDX of TARGET, VAL size is same of the vector element, IDX
7330 is variable and also counts by vector element size for p7 & p8. */
7331
7332 static void
7333 rs6000_expand_vector_set_var_p7 (rtx target, rtx val, rtx idx)
7334 {
7335 machine_mode mode = GET_MODE (target);
7336
7337 gcc_assert (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (idx));
7338
7339 machine_mode inner_mode = GET_MODE (val);
7340 HOST_WIDE_INT mode_mask = GET_MODE_MASK (inner_mode);
7341
7342 int width = GET_MODE_SIZE (inner_mode);
7343 gcc_assert (width >= 1 && width <= 4);
7344
7345 int shift = exact_log2 (width);
7346
7347 machine_mode idx_mode = GET_MODE (idx);
7348
7349 machine_mode shift_mode;
7350 rtx (*gen_ashl)(rtx, rtx, rtx);
7351 rtx (*gen_add)(rtx, rtx, rtx);
7352 rtx (*gen_sub)(rtx, rtx, rtx);
7353 rtx (*gen_lvsl)(rtx, rtx);
7354
7355 if (TARGET_POWERPC64)
7356 {
7357 shift_mode = DImode;
7358 gen_ashl = gen_ashldi3;
7359 gen_add = gen_adddi3;
7360 gen_sub = gen_subdi3;
7361 gen_lvsl = gen_altivec_lvsl_reg_di;
7362 }
7363 else
7364 {
7365 shift_mode = SImode;
7366 gen_ashl = gen_ashlsi3;
7367 gen_add = gen_addsi3;
7368 gen_sub = gen_subsi3;
7369 gen_lvsl = gen_altivec_lvsl_reg_si;
7370 }
7371
7372 /* idx = idx * width. */
7373 rtx tmp = gen_reg_rtx (shift_mode);
7374 idx = convert_modes (shift_mode, idx_mode, idx, 1);
7375
7376 emit_insn (gen_ashl (tmp, idx, GEN_INT (shift)));
7377
7378 /* For LE: idx = idx + 8. */
7379 if (!BYTES_BIG_ENDIAN)
7380 emit_insn (gen_add (tmp, tmp, GEN_INT (8)));
7381 else
7382 emit_insn (gen_sub (tmp, GEN_INT (24 - width), tmp));
7383
7384 /* lxv vs33, mask.
7385 DImode: 0xffffffffffffffff0000000000000000
7386 SImode: 0x00000000ffffffff0000000000000000
7387 HImode: 0x000000000000ffff0000000000000000.
7388 QImode: 0x00000000000000ff0000000000000000. */
7389 rtx mask = gen_reg_rtx (V16QImode);
7390 rtx mask_v2di = gen_reg_rtx (V2DImode);
7391 rtvec v = rtvec_alloc (2);
7392 if (!BYTES_BIG_ENDIAN)
7393 {
7394 RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (DImode, 0);
7395 RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (DImode, mode_mask);
7396 }
7397 else
7398 {
7399 RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (DImode, mode_mask);
7400 RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (DImode, 0);
7401 }
7402 emit_insn (gen_vec_initv2didi (mask_v2di, gen_rtx_PARALLEL (V2DImode, v)));
7403 rtx sub_mask = simplify_gen_subreg (V16QImode, mask_v2di, V2DImode, 0);
7404 emit_insn (gen_rtx_SET (mask, sub_mask));
7405
7406 /* mtvsrd[wz] f0,tmp_val. */
7407 rtx tmp_val = gen_reg_rtx (SImode);
7408 if (inner_mode == E_SFmode)
7409 if (TARGET_DIRECT_MOVE_64BIT)
7410 emit_insn (gen_movsi_from_sf (tmp_val, val));
7411 else
7412 {
7413 rtx stack = rs6000_allocate_stack_temp (SFmode, false, true);
7414 emit_insn (gen_movsf_hardfloat (stack, val));
7415 rtx stack2 = copy_rtx (stack);
7416 PUT_MODE (stack2, SImode);
7417 emit_move_insn (tmp_val, stack2);
7418 }
7419 else
7420 tmp_val = force_reg (SImode, val);
7421
7422 rtx val_v16qi = gen_reg_rtx (V16QImode);
7423 rtx val_v2di = gen_reg_rtx (V2DImode);
7424 rtvec vec_val = rtvec_alloc (2);
7425 if (!BYTES_BIG_ENDIAN)
7426 {
7427 RTVEC_ELT (vec_val, 0) = gen_rtx_CONST_INT (DImode, 0);
7428 RTVEC_ELT (vec_val, 1) = tmp_val;
7429 }
7430 else
7431 {
7432 RTVEC_ELT (vec_val, 0) = tmp_val;
7433 RTVEC_ELT (vec_val, 1) = gen_rtx_CONST_INT (DImode, 0);
7434 }
7435 emit_insn (
7436 gen_vec_initv2didi (val_v2di, gen_rtx_PARALLEL (V2DImode, vec_val)));
7437 rtx sub_val = simplify_gen_subreg (V16QImode, val_v2di, V2DImode, 0);
7438 emit_insn (gen_rtx_SET (val_v16qi, sub_val));
7439
7440 /* lvsl 13,0,idx. */
7441 rtx pcv = gen_reg_rtx (V16QImode);
7442 emit_insn (gen_lvsl (pcv, tmp));
7443
7444 /* vperm 1,1,1,13. */
7445 /* vperm 0,0,0,13. */
7446 rtx val_perm = gen_reg_rtx (V16QImode);
7447 rtx mask_perm = gen_reg_rtx (V16QImode);
7448 emit_insn (gen_altivec_vperm_v8hiv16qi (val_perm, val_v16qi, val_v16qi, pcv));
7449 emit_insn (gen_altivec_vperm_v8hiv16qi (mask_perm, mask, mask, pcv));
7450
7451 rtx target_v16qi = simplify_gen_subreg (V16QImode, target, mode, 0);
7452
7453 /* xxsel 34,34,32,33. */
7454 emit_insn (
7455 gen_vector_select_v16qi (target_v16qi, target_v16qi, val_perm, mask_perm));
7456 }
7457
7458 /* Set field ELT_RTX of TARGET to VAL. */
7459
7460 void
7461 rs6000_expand_vector_set (rtx target, rtx val, rtx elt_rtx)
7462 {
7463 machine_mode mode = GET_MODE (target);
7464 machine_mode inner_mode = GET_MODE_INNER (mode);
7465 rtx reg = gen_reg_rtx (mode);
7466 rtx mask, mem, x;
7467 int width = GET_MODE_SIZE (inner_mode);
7468 int i;
7469
7470 val = force_reg (GET_MODE (val), val);
7471
7472 if (VECTOR_MEM_VSX_P (mode))
7473 {
7474 if (!CONST_INT_P (elt_rtx))
7475 {
7476 /* For V2DI/V2DF, could leverage the P9 version to generate xxpermdi
7477 when elt_rtx is variable. */
7478 if ((TARGET_P9_VECTOR && TARGET_POWERPC64) || width == 8)
7479 {
7480 rs6000_expand_vector_set_var_p9 (target, val, elt_rtx);
7481 return;
7482 }
7483 else if (TARGET_VSX)
7484 {
7485 rs6000_expand_vector_set_var_p7 (target, val, elt_rtx);
7486 return;
7487 }
7488 else
7489 gcc_assert (CONST_INT_P (elt_rtx));
7490 }
7491
7492 rtx insn = NULL_RTX;
7493
7494 if (mode == V2DFmode)
7495 insn = gen_vsx_set_v2df (target, target, val, elt_rtx);
7496
7497 else if (mode == V2DImode)
7498 insn = gen_vsx_set_v2di (target, target, val, elt_rtx);
7499
7500 else if (TARGET_P9_VECTOR && TARGET_POWERPC64)
7501 {
7502 if (mode == V4SImode)
7503 insn = gen_vsx_set_v4si_p9 (target, target, val, elt_rtx);
7504 else if (mode == V8HImode)
7505 insn = gen_vsx_set_v8hi_p9 (target, target, val, elt_rtx);
7506 else if (mode == V16QImode)
7507 insn = gen_vsx_set_v16qi_p9 (target, target, val, elt_rtx);
7508 else if (mode == V4SFmode)
7509 insn = gen_vsx_set_v4sf_p9 (target, target, val, elt_rtx);
7510 }
7511
7512 if (insn)
7513 {
7514 emit_insn (insn);
7515 return;
7516 }
7517 }
7518
7519 /* Simplify setting single element vectors like V1TImode. */
7520 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode)
7521 && INTVAL (elt_rtx) == 0)
7522 {
7523 emit_move_insn (target, gen_lowpart (mode, val));
7524 return;
7525 }
7526
7527 /* Load single variable value. */
7528 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
7529 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
7530 x = gen_rtx_UNSPEC (VOIDmode,
7531 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
7532 emit_insn (gen_rtx_PARALLEL (VOIDmode,
7533 gen_rtvec (2,
7534 gen_rtx_SET (reg, mem),
7535 x)));
7536
7537 /* Linear sequence. */
7538 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
7539 for (i = 0; i < 16; ++i)
7540 XVECEXP (mask, 0, i) = GEN_INT (i);
7541
7542 /* Set permute mask to insert element into target. */
7543 for (i = 0; i < width; ++i)
7544 XVECEXP (mask, 0, INTVAL (elt_rtx) * width + i) = GEN_INT (i + 0x10);
7545 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
7546
7547 if (BYTES_BIG_ENDIAN)
7548 x = gen_rtx_UNSPEC (mode,
7549 gen_rtvec (3, target, reg,
7550 force_reg (V16QImode, x)),
7551 UNSPEC_VPERM);
7552 else
7553 {
7554 if (TARGET_P9_VECTOR)
7555 x = gen_rtx_UNSPEC (mode,
7556 gen_rtvec (3, reg, target,
7557 force_reg (V16QImode, x)),
7558 UNSPEC_VPERMR);
7559 else
7560 {
7561 /* Invert selector. We prefer to generate VNAND on P8 so
7562 that future fusion opportunities can kick in, but must
7563 generate VNOR elsewhere. */
7564 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
7565 rtx iorx = (TARGET_P8_VECTOR
7566 ? gen_rtx_IOR (V16QImode, notx, notx)
7567 : gen_rtx_AND (V16QImode, notx, notx));
7568 rtx tmp = gen_reg_rtx (V16QImode);
7569 emit_insn (gen_rtx_SET (tmp, iorx));
7570
7571 /* Permute with operands reversed and adjusted selector. */
7572 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
7573 UNSPEC_VPERM);
7574 }
7575 }
7576
7577 emit_insn (gen_rtx_SET (target, x));
7578 }
7579
7580 /* Extract field ELT from VEC into TARGET. */
7581
7582 void
7583 rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt)
7584 {
7585 machine_mode mode = GET_MODE (vec);
7586 machine_mode inner_mode = GET_MODE_INNER (mode);
7587 rtx mem;
7588
7589 if (VECTOR_MEM_VSX_P (mode) && CONST_INT_P (elt))
7590 {
7591 switch (mode)
7592 {
7593 default:
7594 break;
7595 case E_V1TImode:
7596 emit_move_insn (target, gen_lowpart (TImode, vec));
7597 break;
7598 case E_V2DFmode:
7599 emit_insn (gen_vsx_extract_v2df (target, vec, elt));
7600 return;
7601 case E_V2DImode:
7602 emit_insn (gen_vsx_extract_v2di (target, vec, elt));
7603 return;
7604 case E_V4SFmode:
7605 emit_insn (gen_vsx_extract_v4sf (target, vec, elt));
7606 return;
7607 case E_V16QImode:
7608 if (TARGET_DIRECT_MOVE_64BIT)
7609 {
7610 emit_insn (gen_vsx_extract_v16qi (target, vec, elt));
7611 return;
7612 }
7613 else
7614 break;
7615 case E_V8HImode:
7616 if (TARGET_DIRECT_MOVE_64BIT)
7617 {
7618 emit_insn (gen_vsx_extract_v8hi (target, vec, elt));
7619 return;
7620 }
7621 else
7622 break;
7623 case E_V4SImode:
7624 if (TARGET_DIRECT_MOVE_64BIT)
7625 {
7626 emit_insn (gen_vsx_extract_v4si (target, vec, elt));
7627 return;
7628 }
7629 break;
7630 }
7631 }
7632 else if (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (elt)
7633 && TARGET_DIRECT_MOVE_64BIT)
7634 {
7635 if (GET_MODE (elt) != DImode)
7636 {
7637 rtx tmp = gen_reg_rtx (DImode);
7638 convert_move (tmp, elt, 0);
7639 elt = tmp;
7640 }
7641 else if (!REG_P (elt))
7642 elt = force_reg (DImode, elt);
7643
7644 switch (mode)
7645 {
7646 case E_V1TImode:
7647 emit_move_insn (target, gen_lowpart (TImode, vec));
7648 return;
7649
7650 case E_V2DFmode:
7651 emit_insn (gen_vsx_extract_v2df_var (target, vec, elt));
7652 return;
7653
7654 case E_V2DImode:
7655 emit_insn (gen_vsx_extract_v2di_var (target, vec, elt));
7656 return;
7657
7658 case E_V4SFmode:
7659 emit_insn (gen_vsx_extract_v4sf_var (target, vec, elt));
7660 return;
7661
7662 case E_V4SImode:
7663 emit_insn (gen_vsx_extract_v4si_var (target, vec, elt));
7664 return;
7665
7666 case E_V8HImode:
7667 emit_insn (gen_vsx_extract_v8hi_var (target, vec, elt));
7668 return;
7669
7670 case E_V16QImode:
7671 emit_insn (gen_vsx_extract_v16qi_var (target, vec, elt));
7672 return;
7673
7674 default:
7675 gcc_unreachable ();
7676 }
7677 }
7678
7679 /* Allocate mode-sized buffer. */
7680 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
7681
7682 emit_move_insn (mem, vec);
7683 if (CONST_INT_P (elt))
7684 {
7685 int modulo_elt = INTVAL (elt) % GET_MODE_NUNITS (mode);
7686
7687 /* Add offset to field within buffer matching vector element. */
7688 mem = adjust_address_nv (mem, inner_mode,
7689 modulo_elt * GET_MODE_SIZE (inner_mode));
7690 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
7691 }
7692 else
7693 {
7694 unsigned int ele_size = GET_MODE_SIZE (inner_mode);
7695 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (mode) - 1);
7696
7697 elt = gen_rtx_AND (Pmode, elt, num_ele_m1);
7698 if (ele_size > 1)
7699 elt = gen_rtx_MULT (Pmode, elt, GEN_INT (ele_size));
7700 rtx new_addr = gen_rtx_PLUS (Pmode, XEXP (mem, 0), elt);
7701 new_addr = change_address (mem, inner_mode, new_addr);
7702 emit_move_insn (target, new_addr);
7703 }
7704 }
7705
7706 /* Return the offset within a memory object (MEM) of a vector type to a given
7707 element within the vector (ELEMENT) with an element size (SCALAR_SIZE). If
7708 the element is constant, we return a constant integer.
7709
7710 Otherwise, we use a base register temporary to calculate the offset after
7711 masking it to fit within the bounds of the vector and scaling it. The
7712 masking is required by the 64-bit ELF version 2 ABI for the vec_extract
7713 built-in function. */
7714
7715 static rtx
7716 get_vector_offset (rtx mem, rtx element, rtx base_tmp, unsigned scalar_size)
7717 {
7718 if (CONST_INT_P (element))
7719 return GEN_INT (INTVAL (element) * scalar_size);
7720
7721 /* All insns should use the 'Q' constraint (address is a single register) if
7722 the element number is not a constant. */
7723 gcc_assert (satisfies_constraint_Q (mem));
7724
7725 /* Mask the element to make sure the element number is between 0 and the
7726 maximum number of elements - 1 so that we don't generate an address
7727 outside the vector. */
7728 rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (GET_MODE (mem)) - 1);
7729 rtx and_op = gen_rtx_AND (Pmode, element, num_ele_m1);
7730 emit_insn (gen_rtx_SET (base_tmp, and_op));
7731
7732 /* Shift the element to get the byte offset from the element number. */
7733 int shift = exact_log2 (scalar_size);
7734 gcc_assert (shift >= 0);
7735
7736 if (shift > 0)
7737 {
7738 rtx shift_op = gen_rtx_ASHIFT (Pmode, base_tmp, GEN_INT (shift));
7739 emit_insn (gen_rtx_SET (base_tmp, shift_op));
7740 }
7741
7742 return base_tmp;
7743 }
7744
7745 /* Helper function update PC-relative addresses when we are adjusting a memory
7746 address (ADDR) to a vector to point to a scalar field within the vector with
7747 a constant offset (ELEMENT_OFFSET). If the address is not valid, we can
7748 use the base register temporary (BASE_TMP) to form the address. */
7749
7750 static rtx
7751 adjust_vec_address_pcrel (rtx addr, rtx element_offset, rtx base_tmp)
7752 {
7753 rtx new_addr = NULL;
7754
7755 gcc_assert (CONST_INT_P (element_offset));
7756
7757 if (GET_CODE (addr) == CONST)
7758 addr = XEXP (addr, 0);
7759
7760 if (GET_CODE (addr) == PLUS)
7761 {
7762 rtx op0 = XEXP (addr, 0);
7763 rtx op1 = XEXP (addr, 1);
7764
7765 if (CONST_INT_P (op1))
7766 {
7767 HOST_WIDE_INT offset
7768 = INTVAL (XEXP (addr, 1)) + INTVAL (element_offset);
7769
7770 if (offset == 0)
7771 new_addr = op0;
7772
7773 else
7774 {
7775 rtx plus = gen_rtx_PLUS (Pmode, op0, GEN_INT (offset));
7776 new_addr = gen_rtx_CONST (Pmode, plus);
7777 }
7778 }
7779
7780 else
7781 {
7782 emit_move_insn (base_tmp, addr);
7783 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7784 }
7785 }
7786
7787 else if (SYMBOL_REF_P (addr) || LABEL_REF_P (addr))
7788 {
7789 rtx plus = gen_rtx_PLUS (Pmode, addr, element_offset);
7790 new_addr = gen_rtx_CONST (Pmode, plus);
7791 }
7792
7793 else
7794 gcc_unreachable ();
7795
7796 return new_addr;
7797 }
7798
7799 /* Adjust a memory address (MEM) of a vector type to point to a scalar field
7800 within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register
7801 temporary (BASE_TMP) to fixup the address. Return the new memory address
7802 that is valid for reads or writes to a given register (SCALAR_REG).
7803
7804 This function is expected to be called after reload is completed when we are
7805 splitting insns. The temporary BASE_TMP might be set multiple times with
7806 this code. */
7807
7808 rtx
7809 rs6000_adjust_vec_address (rtx scalar_reg,
7810 rtx mem,
7811 rtx element,
7812 rtx base_tmp,
7813 machine_mode scalar_mode)
7814 {
7815 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7816 rtx addr = XEXP (mem, 0);
7817 rtx new_addr;
7818
7819 gcc_assert (!reg_mentioned_p (base_tmp, addr));
7820 gcc_assert (!reg_mentioned_p (base_tmp, element));
7821
7822 /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */
7823 gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC);
7824
7825 /* Calculate what we need to add to the address to get the element
7826 address. */
7827 rtx element_offset = get_vector_offset (mem, element, base_tmp, scalar_size);
7828
7829 /* Create the new address pointing to the element within the vector. If we
7830 are adding 0, we don't have to change the address. */
7831 if (element_offset == const0_rtx)
7832 new_addr = addr;
7833
7834 /* A simple indirect address can be converted into a reg + offset
7835 address. */
7836 else if (REG_P (addr) || SUBREG_P (addr))
7837 new_addr = gen_rtx_PLUS (Pmode, addr, element_offset);
7838
7839 /* For references to local static variables, fold a constant offset into the
7840 address. */
7841 else if (pcrel_local_address (addr, Pmode) && CONST_INT_P (element_offset))
7842 new_addr = adjust_vec_address_pcrel (addr, element_offset, base_tmp);
7843
7844 /* Optimize D-FORM addresses with constant offset with a constant element, to
7845 include the element offset in the address directly. */
7846 else if (GET_CODE (addr) == PLUS)
7847 {
7848 rtx op0 = XEXP (addr, 0);
7849 rtx op1 = XEXP (addr, 1);
7850
7851 gcc_assert (REG_P (op0) || SUBREG_P (op0));
7852 if (CONST_INT_P (op1) && CONST_INT_P (element_offset))
7853 {
7854 /* op0 should never be r0, because r0+offset is not valid. But it
7855 doesn't hurt to make sure it is not r0. */
7856 gcc_assert (reg_or_subregno (op0) != 0);
7857
7858 /* D-FORM address with constant element number. */
7859 HOST_WIDE_INT offset = INTVAL (op1) + INTVAL (element_offset);
7860 rtx offset_rtx = GEN_INT (offset);
7861 new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx);
7862 }
7863 else
7864 {
7865 /* If we don't have a D-FORM address with a constant element number,
7866 add the two elements in the current address. Then add the offset.
7867
7868 Previously, we tried to add the offset to OP1 and change the
7869 address to an X-FORM format adding OP0 and BASE_TMP, but it became
7870 complicated because we had to verify that op1 was not GPR0 and we
7871 had a constant element offset (due to the way ADDI is defined).
7872 By doing the add of OP0 and OP1 first, and then adding in the
7873 offset, it has the benefit that if D-FORM instructions are
7874 allowed, the offset is part of the memory access to the vector
7875 element. */
7876 emit_insn (gen_rtx_SET (base_tmp, gen_rtx_PLUS (Pmode, op0, op1)));
7877 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7878 }
7879 }
7880
7881 else
7882 {
7883 emit_move_insn (base_tmp, addr);
7884 new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset);
7885 }
7886
7887 /* If the address isn't valid, move the address into the temporary base
7888 register. Some reasons it could not be valid include:
7889
7890 The address offset overflowed the 16 or 34 bit offset size;
7891 We need to use a DS-FORM load, and the bottom 2 bits are non-zero;
7892 We need to use a DQ-FORM load, and the bottom 4 bits are non-zero;
7893 Only X_FORM loads can be done, and the address is D_FORM. */
7894
7895 enum insn_form iform
7896 = address_to_insn_form (new_addr, scalar_mode,
7897 reg_to_non_prefixed (scalar_reg, scalar_mode));
7898
7899 if (iform == INSN_FORM_BAD)
7900 {
7901 emit_move_insn (base_tmp, new_addr);
7902 new_addr = base_tmp;
7903 }
7904
7905 return change_address (mem, scalar_mode, new_addr);
7906 }
7907
7908 /* Split a variable vec_extract operation into the component instructions. */
7909
7910 void
7911 rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr,
7912 rtx tmp_altivec)
7913 {
7914 machine_mode mode = GET_MODE (src);
7915 machine_mode scalar_mode = GET_MODE_INNER (GET_MODE (src));
7916 unsigned scalar_size = GET_MODE_SIZE (scalar_mode);
7917 int byte_shift = exact_log2 (scalar_size);
7918
7919 gcc_assert (byte_shift >= 0);
7920
7921 /* If we are given a memory address, optimize to load just the element. We
7922 don't have to adjust the vector element number on little endian
7923 systems. */
7924 if (MEM_P (src))
7925 {
7926 emit_move_insn (dest,
7927 rs6000_adjust_vec_address (dest, src, element, tmp_gpr,
7928 scalar_mode));
7929 return;
7930 }
7931
7932 else if (REG_P (src) || SUBREG_P (src))
7933 {
7934 int num_elements = GET_MODE_NUNITS (mode);
7935 int bits_in_element = mode_to_bits (GET_MODE_INNER (mode));
7936 int bit_shift = 7 - exact_log2 (num_elements);
7937 rtx element2;
7938 unsigned int dest_regno = reg_or_subregno (dest);
7939 unsigned int src_regno = reg_or_subregno (src);
7940 unsigned int element_regno = reg_or_subregno (element);
7941
7942 gcc_assert (REG_P (tmp_gpr));
7943
7944 /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in
7945 a general purpose register. */
7946 if (TARGET_P9_VECTOR
7947 && (mode == V16QImode || mode == V8HImode || mode == V4SImode)
7948 && INT_REGNO_P (dest_regno)
7949 && ALTIVEC_REGNO_P (src_regno)
7950 && INT_REGNO_P (element_regno))
7951 {
7952 rtx dest_si = gen_rtx_REG (SImode, dest_regno);
7953 rtx element_si = gen_rtx_REG (SImode, element_regno);
7954
7955 if (mode == V16QImode)
7956 emit_insn (BYTES_BIG_ENDIAN
7957 ? gen_vextublx (dest_si, element_si, src)
7958 : gen_vextubrx (dest_si, element_si, src));
7959
7960 else if (mode == V8HImode)
7961 {
7962 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7963 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const1_rtx));
7964 emit_insn (BYTES_BIG_ENDIAN
7965 ? gen_vextuhlx (dest_si, tmp_gpr_si, src)
7966 : gen_vextuhrx (dest_si, tmp_gpr_si, src));
7967 }
7968
7969
7970 else
7971 {
7972 rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr));
7973 emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const2_rtx));
7974 emit_insn (BYTES_BIG_ENDIAN
7975 ? gen_vextuwlx (dest_si, tmp_gpr_si, src)
7976 : gen_vextuwrx (dest_si, tmp_gpr_si, src));
7977 }
7978
7979 return;
7980 }
7981
7982
7983 gcc_assert (REG_P (tmp_altivec));
7984
7985 /* For little endian, adjust element ordering. For V2DI/V2DF, we can use
7986 an XOR, otherwise we need to subtract. The shift amount is so VSLO
7987 will shift the element into the upper position (adding 3 to convert a
7988 byte shift into a bit shift). */
7989 if (scalar_size == 8)
7990 {
7991 if (!BYTES_BIG_ENDIAN)
7992 {
7993 emit_insn (gen_xordi3 (tmp_gpr, element, const1_rtx));
7994 element2 = tmp_gpr;
7995 }
7996 else
7997 element2 = element;
7998
7999 /* Generate RLDIC directly to shift left 6 bits and retrieve 1
8000 bit. */
8001 emit_insn (gen_rtx_SET (tmp_gpr,
8002 gen_rtx_AND (DImode,
8003 gen_rtx_ASHIFT (DImode,
8004 element2,
8005 GEN_INT (6)),
8006 GEN_INT (64))));
8007 }
8008 else
8009 {
8010 if (!BYTES_BIG_ENDIAN)
8011 {
8012 rtx num_ele_m1 = GEN_INT (num_elements - 1);
8013
8014 emit_insn (gen_anddi3 (tmp_gpr, element, num_ele_m1));
8015 emit_insn (gen_subdi3 (tmp_gpr, num_ele_m1, tmp_gpr));
8016 element2 = tmp_gpr;
8017 }
8018 else
8019 element2 = element;
8020
8021 emit_insn (gen_ashldi3 (tmp_gpr, element2, GEN_INT (bit_shift)));
8022 }
8023
8024 /* Get the value into the lower byte of the Altivec register where VSLO
8025 expects it. */
8026 if (TARGET_P9_VECTOR)
8027 emit_insn (gen_vsx_splat_v2di (tmp_altivec, tmp_gpr));
8028 else if (can_create_pseudo_p ())
8029 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_gpr, tmp_gpr));
8030 else
8031 {
8032 rtx tmp_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
8033 emit_move_insn (tmp_di, tmp_gpr);
8034 emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_di, tmp_di));
8035 }
8036
8037 /* Do the VSLO to get the value into the final location. */
8038 switch (mode)
8039 {
8040 case E_V2DFmode:
8041 emit_insn (gen_vsx_vslo_v2df (dest, src, tmp_altivec));
8042 return;
8043
8044 case E_V2DImode:
8045 emit_insn (gen_vsx_vslo_v2di (dest, src, tmp_altivec));
8046 return;
8047
8048 case E_V4SFmode:
8049 {
8050 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
8051 rtx tmp_altivec_v4sf = gen_rtx_REG (V4SFmode, REGNO (tmp_altivec));
8052 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
8053 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
8054 tmp_altivec));
8055
8056 emit_insn (gen_vsx_xscvspdp_scalar2 (dest, tmp_altivec_v4sf));
8057 return;
8058 }
8059
8060 case E_V4SImode:
8061 case E_V8HImode:
8062 case E_V16QImode:
8063 {
8064 rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec));
8065 rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src));
8066 rtx tmp_gpr_di = gen_rtx_REG (DImode, REGNO (dest));
8067 emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di,
8068 tmp_altivec));
8069 emit_move_insn (tmp_gpr_di, tmp_altivec_di);
8070 emit_insn (gen_lshrdi3 (tmp_gpr_di, tmp_gpr_di,
8071 GEN_INT (64 - bits_in_element)));
8072 return;
8073 }
8074
8075 default:
8076 gcc_unreachable ();
8077 }
8078
8079 return;
8080 }
8081 else
8082 gcc_unreachable ();
8083 }
8084
8085 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
8086 selects whether the alignment is abi mandated, optional, or
8087 both abi and optional alignment. */
8088
8089 unsigned int
8090 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
8091 {
8092 if (how != align_opt)
8093 {
8094 if (VECTOR_TYPE_P (type) && align < 128)
8095 align = 128;
8096 }
8097
8098 if (how != align_abi)
8099 {
8100 if (TREE_CODE (type) == ARRAY_TYPE
8101 && TYPE_MODE (TREE_TYPE (type)) == QImode)
8102 {
8103 if (align < BITS_PER_WORD)
8104 align = BITS_PER_WORD;
8105 }
8106 }
8107
8108 return align;
8109 }
8110
8111 /* Implement TARGET_SLOW_UNALIGNED_ACCESS. Altivec vector memory
8112 instructions simply ignore the low bits; VSX memory instructions
8113 are aligned to 4 or 8 bytes. */
8114
8115 static bool
8116 rs6000_slow_unaligned_access (machine_mode mode, unsigned int align)
8117 {
8118 return (STRICT_ALIGNMENT
8119 || (!TARGET_EFFICIENT_UNALIGNED_VSX
8120 && ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && align < 32)
8121 || ((VECTOR_MODE_P (mode) || VECTOR_ALIGNMENT_P (mode))
8122 && (int) align < VECTOR_ALIGN (mode)))));
8123 }
8124
8125 /* AIX word-aligns FP doubles but doubleword-aligns 64-bit ints. */
8126
8127 unsigned int
8128 rs6000_special_adjust_field_align (tree type, unsigned int computed)
8129 {
8130 if (computed <= 32 || TYPE_PACKED (type))
8131 return computed;
8132
8133 /* Strip initial arrays. */
8134 while (TREE_CODE (type) == ARRAY_TYPE)
8135 type = TREE_TYPE (type);
8136
8137 /* If RECORD or UNION, recursively find the first field. */
8138 while (AGGREGATE_TYPE_P (type))
8139 {
8140 tree field = TYPE_FIELDS (type);
8141
8142 /* Skip all non field decls */
8143 while (field != NULL
8144 && (TREE_CODE (field) != FIELD_DECL
8145 || DECL_FIELD_ABI_IGNORED (field)))
8146 field = DECL_CHAIN (field);
8147
8148 if (! field)
8149 break;
8150
8151 /* A packed field does not contribute any extra alignment. */
8152 if (DECL_PACKED (field))
8153 return computed;
8154
8155 type = TREE_TYPE (field);
8156
8157 /* Strip arrays. */
8158 while (TREE_CODE (type) == ARRAY_TYPE)
8159 type = TREE_TYPE (type);
8160 }
8161
8162 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node
8163 && (TYPE_MODE (type) == DFmode || TYPE_MODE (type) == DCmode))
8164 computed = MIN (computed, 32);
8165
8166 return computed;
8167 }
8168
8169 /* AIX increases natural record alignment to doubleword if the innermost first
8170 field is an FP double while the FP fields remain word aligned.
8171 Only called if TYPE initially is a RECORD or UNION. */
8172
8173 unsigned int
8174 rs6000_special_round_type_align (tree type, unsigned int computed,
8175 unsigned int specified)
8176 {
8177 unsigned int align = MAX (computed, specified);
8178
8179 if (TYPE_PACKED (type) || align >= 64)
8180 return align;
8181
8182 /* If RECORD or UNION, recursively find the first field. */
8183 do
8184 {
8185 tree field = TYPE_FIELDS (type);
8186
8187 /* Skip all non field decls */
8188 while (field != NULL
8189 && (TREE_CODE (field) != FIELD_DECL
8190 || DECL_FIELD_ABI_IGNORED (field)))
8191 field = DECL_CHAIN (field);
8192
8193 if (! field)
8194 break;
8195
8196 /* A packed field does not contribute any extra alignment. */
8197 if (DECL_PACKED (field))
8198 return align;
8199
8200 type = TREE_TYPE (field);
8201
8202 /* Strip arrays. */
8203 while (TREE_CODE (type) == ARRAY_TYPE)
8204 type = TREE_TYPE (type);
8205 } while (AGGREGATE_TYPE_P (type));
8206
8207 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node
8208 && (TYPE_MODE (type) == DFmode || TYPE_MODE (type) == DCmode))
8209 align = MAX (align, 64);
8210
8211 return align;
8212 }
8213
8214 /* Darwin increases record alignment to the natural alignment of
8215 the first field. */
8216
8217 unsigned int
8218 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
8219 unsigned int specified)
8220 {
8221 unsigned int align = MAX (computed, specified);
8222
8223 if (TYPE_PACKED (type))
8224 return align;
8225
8226 /* Find the first field, looking down into aggregates. */
8227 do {
8228 tree field = TYPE_FIELDS (type);
8229 /* Skip all non field decls */
8230 while (field != NULL
8231 && (TREE_CODE (field) != FIELD_DECL
8232 || DECL_FIELD_ABI_IGNORED (field)))
8233 field = DECL_CHAIN (field);
8234 if (! field)
8235 break;
8236 /* A packed field does not contribute any extra alignment. */
8237 if (DECL_PACKED (field))
8238 return align;
8239 type = TREE_TYPE (field);
8240 while (TREE_CODE (type) == ARRAY_TYPE)
8241 type = TREE_TYPE (type);
8242 } while (AGGREGATE_TYPE_P (type));
8243
8244 if (type != error_mark_node && ! AGGREGATE_TYPE_P (type)
8245 && ! TYPE_PACKED (type) && maximum_field_alignment == 0)
8246 align = MAX (align, TYPE_ALIGN (type));
8247
8248 return align;
8249 }
8250
8251 /* Return 1 for an operand in small memory on V.4/eabi. */
8252
8253 int
8254 small_data_operand (rtx op ATTRIBUTE_UNUSED,
8255 machine_mode mode ATTRIBUTE_UNUSED)
8256 {
8257 #if TARGET_ELF
8258 rtx sym_ref;
8259
8260 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
8261 return 0;
8262
8263 if (DEFAULT_ABI != ABI_V4)
8264 return 0;
8265
8266 if (SYMBOL_REF_P (op))
8267 sym_ref = op;
8268
8269 else if (GET_CODE (op) != CONST
8270 || GET_CODE (XEXP (op, 0)) != PLUS
8271 || !SYMBOL_REF_P (XEXP (XEXP (op, 0), 0))
8272 || !CONST_INT_P (XEXP (XEXP (op, 0), 1)))
8273 return 0;
8274
8275 else
8276 {
8277 rtx sum = XEXP (op, 0);
8278 HOST_WIDE_INT summand;
8279
8280 /* We have to be careful here, because it is the referenced address
8281 that must be 32k from _SDA_BASE_, not just the symbol. */
8282 summand = INTVAL (XEXP (sum, 1));
8283 if (summand < 0 || summand > g_switch_value)
8284 return 0;
8285
8286 sym_ref = XEXP (sum, 0);
8287 }
8288
8289 return SYMBOL_REF_SMALL_P (sym_ref);
8290 #else
8291 return 0;
8292 #endif
8293 }
8294
8295 /* Return true if either operand is a general purpose register. */
8296
8297 bool
8298 gpr_or_gpr_p (rtx op0, rtx op1)
8299 {
8300 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
8301 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
8302 }
8303
8304 /* Return true if this is a move direct operation between GPR registers and
8305 floating point/VSX registers. */
8306
8307 bool
8308 direct_move_p (rtx op0, rtx op1)
8309 {
8310 if (!REG_P (op0) || !REG_P (op1))
8311 return false;
8312
8313 if (!TARGET_DIRECT_MOVE)
8314 return false;
8315
8316 int regno0 = REGNO (op0);
8317 int regno1 = REGNO (op1);
8318 if (!HARD_REGISTER_NUM_P (regno0) || !HARD_REGISTER_NUM_P (regno1))
8319 return false;
8320
8321 if (INT_REGNO_P (regno0) && VSX_REGNO_P (regno1))
8322 return true;
8323
8324 if (VSX_REGNO_P (regno0) && INT_REGNO_P (regno1))
8325 return true;
8326
8327 return false;
8328 }
8329
8330 /* Return true if the ADDR is an acceptable address for a quad memory
8331 operation of mode MODE (either LQ/STQ for general purpose registers, or
8332 LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address
8333 is intended for LQ/STQ. If it is false, the address is intended for the ISA
8334 3.0 LXV/STXV instruction. */
8335
8336 bool
8337 quad_address_p (rtx addr, machine_mode mode, bool strict)
8338 {
8339 rtx op0, op1;
8340
8341 if (GET_MODE_SIZE (mode) < 16)
8342 return false;
8343
8344 if (legitimate_indirect_address_p (addr, strict))
8345 return true;
8346
8347 if (VECTOR_MODE_P (mode) && !mode_supports_dq_form (mode))
8348 return false;
8349
8350 /* Is this a valid prefixed address? If the bottom four bits of the offset
8351 are non-zero, we could use a prefixed instruction (which does not have the
8352 DQ-form constraint that the traditional instruction had) instead of
8353 forcing the unaligned offset to a GPR. */
8354 if (address_is_prefixed (addr, mode, NON_PREFIXED_DQ))
8355 return true;
8356
8357 if (GET_CODE (addr) != PLUS)
8358 return false;
8359
8360 op0 = XEXP (addr, 0);
8361 if (!REG_P (op0) || !INT_REG_OK_FOR_BASE_P (op0, strict))
8362 return false;
8363
8364 op1 = XEXP (addr, 1);
8365 if (!CONST_INT_P (op1))
8366 return false;
8367
8368 return quad_address_offset_p (INTVAL (op1));
8369 }
8370
8371 /* Return true if this is a load or store quad operation. This function does
8372 not handle the atomic quad memory instructions. */
8373
8374 bool
8375 quad_load_store_p (rtx op0, rtx op1)
8376 {
8377 bool ret;
8378
8379 if (!TARGET_QUAD_MEMORY)
8380 ret = false;
8381
8382 else if (REG_P (op0) && MEM_P (op1))
8383 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
8384 && quad_memory_operand (op1, GET_MODE (op1))
8385 && !reg_overlap_mentioned_p (op0, op1));
8386
8387 else if (MEM_P (op0) && REG_P (op1))
8388 ret = (quad_memory_operand (op0, GET_MODE (op0))
8389 && quad_int_reg_operand (op1, GET_MODE (op1)));
8390
8391 else
8392 ret = false;
8393
8394 if (TARGET_DEBUG_ADDR)
8395 {
8396 fprintf (stderr, "\n========== quad_load_store, return %s\n",
8397 ret ? "true" : "false");
8398 debug_rtx (gen_rtx_SET (op0, op1));
8399 }
8400
8401 return ret;
8402 }
8403
8404 /* Given an address, return a constant offset term if one exists. */
8405
8406 static rtx
8407 address_offset (rtx op)
8408 {
8409 if (GET_CODE (op) == PRE_INC
8410 || GET_CODE (op) == PRE_DEC)
8411 op = XEXP (op, 0);
8412 else if (GET_CODE (op) == PRE_MODIFY
8413 || GET_CODE (op) == LO_SUM)
8414 op = XEXP (op, 1);
8415
8416 if (GET_CODE (op) == CONST)
8417 op = XEXP (op, 0);
8418
8419 if (GET_CODE (op) == PLUS)
8420 op = XEXP (op, 1);
8421
8422 if (CONST_INT_P (op))
8423 return op;
8424
8425 return NULL_RTX;
8426 }
8427
8428 /* This tests that a lo_sum {constant, symbol, symbol+offset} is valid for
8429 the mode. If we can't find (or don't know) the alignment of the symbol
8430 we assume (optimistically) that it's sufficiently aligned [??? maybe we
8431 should be pessimistic]. Offsets are validated in the same way as for
8432 reg + offset. */
8433 static bool
8434 darwin_rs6000_legitimate_lo_sum_const_p (rtx x, machine_mode mode)
8435 {
8436 /* We should not get here with this. */
8437 gcc_checking_assert (! mode_supports_dq_form (mode));
8438
8439 if (GET_CODE (x) == CONST)
8440 x = XEXP (x, 0);
8441
8442 /* If we are building PIC code, then any symbol must be wrapped in an
8443 UNSPEC_MACHOPIC_OFFSET so that it will get the picbase subtracted. */
8444 bool machopic_offs_p = false;
8445 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_MACHOPIC_OFFSET)
8446 {
8447 x = XVECEXP (x, 0, 0);
8448 machopic_offs_p = true;
8449 }
8450
8451 rtx sym = NULL_RTX;
8452 unsigned HOST_WIDE_INT offset = 0;
8453
8454 if (GET_CODE (x) == PLUS)
8455 {
8456 sym = XEXP (x, 0);
8457 if (! SYMBOL_REF_P (sym))
8458 return false;
8459 if (!CONST_INT_P (XEXP (x, 1)))
8460 return false;
8461 offset = INTVAL (XEXP (x, 1));
8462 }
8463 else if (SYMBOL_REF_P (x))
8464 sym = x;
8465 else if (CONST_INT_P (x))
8466 offset = INTVAL (x);
8467 else if (GET_CODE (x) == LABEL_REF)
8468 offset = 0; // We assume code labels are Pmode aligned
8469 else
8470 return false; // not sure what we have here.
8471
8472 /* If we don't know the alignment of the thing to which the symbol refers,
8473 we assume optimistically it is "enough".
8474 ??? maybe we should be pessimistic instead. */
8475 unsigned align = 0;
8476
8477 if (sym)
8478 {
8479 tree decl = SYMBOL_REF_DECL (sym);
8480 /* As noted above, PIC code cannot use a bare SYMBOL_REF. */
8481 if (TARGET_MACHO && flag_pic && !machopic_offs_p)
8482 return false;
8483 #if TARGET_MACHO
8484 if (MACHO_SYMBOL_INDIRECTION_P (sym))
8485 /* The decl in an indirection symbol is the original one, which might
8486 be less aligned than the indirection. Our indirections are always
8487 pointer-aligned. */
8488 ;
8489 else
8490 #endif
8491 if (decl && DECL_ALIGN (decl))
8492 align = DECL_ALIGN_UNIT (decl);
8493 }
8494
8495 unsigned int extra = 0;
8496 switch (mode)
8497 {
8498 case E_DFmode:
8499 case E_DDmode:
8500 case E_DImode:
8501 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8502 addressing. */
8503 if (VECTOR_MEM_VSX_P (mode))
8504 return false;
8505
8506 if (!TARGET_POWERPC64)
8507 extra = 4;
8508 else if ((offset & 3) || (align & 3))
8509 return false;
8510 break;
8511
8512 case E_TFmode:
8513 case E_IFmode:
8514 case E_KFmode:
8515 case E_TDmode:
8516 case E_TImode:
8517 case E_PTImode:
8518 extra = 8;
8519 if (!TARGET_POWERPC64)
8520 extra = 12;
8521 else if ((offset & 3) || (align & 3))
8522 return false;
8523 break;
8524
8525 default:
8526 break;
8527 }
8528
8529 /* We only care if the access(es) would cause a change to the high part. */
8530 offset = sext_hwi (offset, 16);
8531 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
8532 }
8533
8534 /* Return true if the MEM operand is a memory operand suitable for use
8535 with a (full width, possibly multiple) gpr load/store. On
8536 powerpc64 this means the offset must be divisible by 4.
8537 Implements 'Y' constraint.
8538
8539 Accept direct, indexed, offset, lo_sum and tocref. Since this is
8540 a constraint function we know the operand has satisfied a suitable
8541 memory predicate.
8542
8543 Offsetting a lo_sum should not be allowed, except where we know by
8544 alignment that a 32k boundary is not crossed. Note that by
8545 "offsetting" here we mean a further offset to access parts of the
8546 MEM. It's fine to have a lo_sum where the inner address is offset
8547 from a sym, since the same sym+offset will appear in the high part
8548 of the address calculation. */
8549
8550 bool
8551 mem_operand_gpr (rtx op, machine_mode mode)
8552 {
8553 unsigned HOST_WIDE_INT offset;
8554 int extra;
8555 rtx addr = XEXP (op, 0);
8556
8557 /* PR85755: Allow PRE_INC and PRE_DEC addresses. */
8558 if (TARGET_UPDATE
8559 && (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
8560 && mode_supports_pre_incdec_p (mode)
8561 && legitimate_indirect_address_p (XEXP (addr, 0), false))
8562 return true;
8563
8564 /* Allow prefixed instructions if supported. If the bottom two bits of the
8565 offset are non-zero, we could use a prefixed instruction (which does not
8566 have the DS-form constraint that the traditional instruction had) instead
8567 of forcing the unaligned offset to a GPR. */
8568 if (address_is_prefixed (addr, mode, NON_PREFIXED_DS))
8569 return true;
8570
8571 /* We need to look through Mach-O PIC unspecs to determine if a lo_sum is
8572 really OK. Doing this early avoids teaching all the other machinery
8573 about them. */
8574 if (TARGET_MACHO && GET_CODE (addr) == LO_SUM)
8575 return darwin_rs6000_legitimate_lo_sum_const_p (XEXP (addr, 1), mode);
8576
8577 /* Only allow offsettable addresses. See PRs 83969 and 84279. */
8578 if (!rs6000_offsettable_memref_p (op, mode, false))
8579 return false;
8580
8581 op = address_offset (addr);
8582 if (op == NULL_RTX)
8583 return true;
8584
8585 offset = INTVAL (op);
8586 if (TARGET_POWERPC64 && (offset & 3) != 0)
8587 return false;
8588
8589 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
8590 if (extra < 0)
8591 extra = 0;
8592
8593 if (GET_CODE (addr) == LO_SUM)
8594 /* For lo_sum addresses, we must allow any offset except one that
8595 causes a wrap, so test only the low 16 bits. */
8596 offset = sext_hwi (offset, 16);
8597
8598 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
8599 }
8600
8601 /* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr,
8602 enforce an offset divisible by 4 even for 32-bit. */
8603
8604 bool
8605 mem_operand_ds_form (rtx op, machine_mode mode)
8606 {
8607 unsigned HOST_WIDE_INT offset;
8608 int extra;
8609 rtx addr = XEXP (op, 0);
8610
8611 /* Allow prefixed instructions if supported. If the bottom two bits of the
8612 offset are non-zero, we could use a prefixed instruction (which does not
8613 have the DS-form constraint that the traditional instruction had) instead
8614 of forcing the unaligned offset to a GPR. */
8615 if (address_is_prefixed (addr, mode, NON_PREFIXED_DS))
8616 return true;
8617
8618 if (!offsettable_address_p (false, mode, addr))
8619 return false;
8620
8621 op = address_offset (addr);
8622 if (op == NULL_RTX)
8623 return true;
8624
8625 offset = INTVAL (op);
8626 if ((offset & 3) != 0)
8627 return false;
8628
8629 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
8630 if (extra < 0)
8631 extra = 0;
8632
8633 if (GET_CODE (addr) == LO_SUM)
8634 /* For lo_sum addresses, we must allow any offset except one that
8635 causes a wrap, so test only the low 16 bits. */
8636 offset = sext_hwi (offset, 16);
8637
8638 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
8639 }
8640 \f
8641 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
8642
8643 static bool
8644 reg_offset_addressing_ok_p (machine_mode mode)
8645 {
8646 switch (mode)
8647 {
8648 case E_V16QImode:
8649 case E_V8HImode:
8650 case E_V4SFmode:
8651 case E_V4SImode:
8652 case E_V2DFmode:
8653 case E_V2DImode:
8654 case E_V1TImode:
8655 case E_TImode:
8656 case E_TFmode:
8657 case E_KFmode:
8658 /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the
8659 ISA 3.0 vector d-form addressing mode was added. While TImode is not
8660 a vector mode, if we want to use the VSX registers to move it around,
8661 we need to restrict ourselves to reg+reg addressing. Similarly for
8662 IEEE 128-bit floating point that is passed in a single vector
8663 register. */
8664 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
8665 return mode_supports_dq_form (mode);
8666 break;
8667
8668 /* The vector pair/quad types support offset addressing if the
8669 underlying vectors support offset addressing. */
8670 case E_OOmode:
8671 case E_XOmode:
8672 return TARGET_MMA;
8673
8674 case E_SDmode:
8675 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
8676 addressing for the LFIWZX and STFIWX instructions. */
8677 if (TARGET_NO_SDMODE_STACK)
8678 return false;
8679 break;
8680
8681 default:
8682 break;
8683 }
8684
8685 return true;
8686 }
8687
8688 static bool
8689 virtual_stack_registers_memory_p (rtx op)
8690 {
8691 int regnum;
8692
8693 if (REG_P (op))
8694 regnum = REGNO (op);
8695
8696 else if (GET_CODE (op) == PLUS
8697 && REG_P (XEXP (op, 0))
8698 && CONST_INT_P (XEXP (op, 1)))
8699 regnum = REGNO (XEXP (op, 0));
8700
8701 else
8702 return false;
8703
8704 return (regnum >= FIRST_VIRTUAL_REGISTER
8705 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
8706 }
8707
8708 /* Return true if a MODE sized memory accesses to OP plus OFFSET
8709 is known to not straddle a 32k boundary. This function is used
8710 to determine whether -mcmodel=medium code can use TOC pointer
8711 relative addressing for OP. This means the alignment of the TOC
8712 pointer must also be taken into account, and unfortunately that is
8713 only 8 bytes. */
8714
8715 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
8716 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
8717 #endif
8718
8719 static bool
8720 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
8721 machine_mode mode)
8722 {
8723 tree decl;
8724 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
8725
8726 if (!SYMBOL_REF_P (op))
8727 return false;
8728
8729 /* ISA 3.0 vector d-form addressing is restricted, don't allow
8730 SYMBOL_REF. */
8731 if (mode_supports_dq_form (mode))
8732 return false;
8733
8734 dsize = GET_MODE_SIZE (mode);
8735 decl = SYMBOL_REF_DECL (op);
8736 if (!decl)
8737 {
8738 if (dsize == 0)
8739 return false;
8740
8741 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
8742 replacing memory addresses with an anchor plus offset. We
8743 could find the decl by rummaging around in the block->objects
8744 VEC for the given offset but that seems like too much work. */
8745 dalign = BITS_PER_UNIT;
8746 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
8747 && SYMBOL_REF_ANCHOR_P (op)
8748 && SYMBOL_REF_BLOCK (op) != NULL)
8749 {
8750 struct object_block *block = SYMBOL_REF_BLOCK (op);
8751
8752 dalign = block->alignment;
8753 offset += SYMBOL_REF_BLOCK_OFFSET (op);
8754 }
8755 else if (CONSTANT_POOL_ADDRESS_P (op))
8756 {
8757 /* It would be nice to have get_pool_align().. */
8758 machine_mode cmode = get_pool_mode (op);
8759
8760 dalign = GET_MODE_ALIGNMENT (cmode);
8761 }
8762 }
8763 else if (DECL_P (decl))
8764 {
8765 dalign = DECL_ALIGN (decl);
8766
8767 if (dsize == 0)
8768 {
8769 /* Allow BLKmode when the entire object is known to not
8770 cross a 32k boundary. */
8771 if (!DECL_SIZE_UNIT (decl))
8772 return false;
8773
8774 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
8775 return false;
8776
8777 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
8778 if (dsize > 32768)
8779 return false;
8780
8781 dalign /= BITS_PER_UNIT;
8782 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8783 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8784 return dalign >= dsize;
8785 }
8786 }
8787 else
8788 gcc_unreachable ();
8789
8790 /* Find how many bits of the alignment we know for this access. */
8791 dalign /= BITS_PER_UNIT;
8792 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
8793 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
8794 mask = dalign - 1;
8795 lsb = offset & -offset;
8796 mask &= lsb - 1;
8797 dalign = mask + 1;
8798
8799 return dalign >= dsize;
8800 }
8801
8802 static bool
8803 constant_pool_expr_p (rtx op)
8804 {
8805 rtx base, offset;
8806
8807 split_const (op, &base, &offset);
8808 return (SYMBOL_REF_P (base)
8809 && CONSTANT_POOL_ADDRESS_P (base)
8810 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
8811 }
8812
8813 /* Create a TOC reference for symbol_ref SYMBOL. If LARGETOC_REG is non-null,
8814 use that as the register to put the HIGH value into if register allocation
8815 is already done. */
8816
8817 rtx
8818 create_TOC_reference (rtx symbol, rtx largetoc_reg)
8819 {
8820 rtx tocrel, tocreg, hi;
8821
8822 gcc_assert (TARGET_TOC);
8823
8824 if (TARGET_DEBUG_ADDR)
8825 {
8826 if (SYMBOL_REF_P (symbol))
8827 fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
8828 XSTR (symbol, 0));
8829 else
8830 {
8831 fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
8832 GET_RTX_NAME (GET_CODE (symbol)));
8833 debug_rtx (symbol);
8834 }
8835 }
8836
8837 if (!can_create_pseudo_p ())
8838 df_set_regs_ever_live (TOC_REGISTER, true);
8839
8840 tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
8841 tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
8842 if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
8843 return tocrel;
8844
8845 hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
8846 if (largetoc_reg != NULL)
8847 {
8848 emit_move_insn (largetoc_reg, hi);
8849 hi = largetoc_reg;
8850 }
8851 return gen_rtx_LO_SUM (Pmode, hi, tocrel);
8852 }
8853
8854 /* These are only used to pass through from print_operand/print_operand_address
8855 to rs6000_output_addr_const_extra over the intervening function
8856 output_addr_const which is not target code. */
8857 static const_rtx tocrel_base_oac, tocrel_offset_oac;
8858
8859 /* Return true if OP is a toc pointer relative address (the output
8860 of create_TOC_reference). If STRICT, do not match non-split
8861 -mcmodel=large/medium toc pointer relative addresses. If the pointers
8862 are non-NULL, place base and offset pieces in TOCREL_BASE_RET and
8863 TOCREL_OFFSET_RET respectively. */
8864
8865 bool
8866 toc_relative_expr_p (const_rtx op, bool strict, const_rtx *tocrel_base_ret,
8867 const_rtx *tocrel_offset_ret)
8868 {
8869 if (!TARGET_TOC)
8870 return false;
8871
8872 if (TARGET_CMODEL != CMODEL_SMALL)
8873 {
8874 /* When strict ensure we have everything tidy. */
8875 if (strict
8876 && !(GET_CODE (op) == LO_SUM
8877 && REG_P (XEXP (op, 0))
8878 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict)))
8879 return false;
8880
8881 /* When not strict, allow non-split TOC addresses and also allow
8882 (lo_sum (high ..)) TOC addresses created during reload. */
8883 if (GET_CODE (op) == LO_SUM)
8884 op = XEXP (op, 1);
8885 }
8886
8887 const_rtx tocrel_base = op;
8888 const_rtx tocrel_offset = const0_rtx;
8889
8890 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
8891 {
8892 tocrel_base = XEXP (op, 0);
8893 tocrel_offset = XEXP (op, 1);
8894 }
8895
8896 if (tocrel_base_ret)
8897 *tocrel_base_ret = tocrel_base;
8898 if (tocrel_offset_ret)
8899 *tocrel_offset_ret = tocrel_offset;
8900
8901 return (GET_CODE (tocrel_base) == UNSPEC
8902 && XINT (tocrel_base, 1) == UNSPEC_TOCREL
8903 && REG_P (XVECEXP (tocrel_base, 0, 1))
8904 && REGNO (XVECEXP (tocrel_base, 0, 1)) == TOC_REGISTER);
8905 }
8906
8907 /* Return true if X is a constant pool address, and also for cmodel=medium
8908 if X is a toc-relative address known to be offsettable within MODE. */
8909
8910 bool
8911 legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
8912 bool strict)
8913 {
8914 const_rtx tocrel_base, tocrel_offset;
8915 return (toc_relative_expr_p (x, strict, &tocrel_base, &tocrel_offset)
8916 && (TARGET_CMODEL != CMODEL_MEDIUM
8917 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
8918 || mode == QImode
8919 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
8920 INTVAL (tocrel_offset), mode)));
8921 }
8922
8923 static bool
8924 legitimate_small_data_p (machine_mode mode, rtx x)
8925 {
8926 return (DEFAULT_ABI == ABI_V4
8927 && !flag_pic && !TARGET_TOC
8928 && (SYMBOL_REF_P (x) || GET_CODE (x) == CONST)
8929 && small_data_operand (x, mode));
8930 }
8931
8932 bool
8933 rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
8934 bool strict, bool worst_case)
8935 {
8936 unsigned HOST_WIDE_INT offset;
8937 unsigned int extra;
8938
8939 if (GET_CODE (x) != PLUS)
8940 return false;
8941 if (!REG_P (XEXP (x, 0)))
8942 return false;
8943 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
8944 return false;
8945 if (mode_supports_dq_form (mode))
8946 return quad_address_p (x, mode, strict);
8947 if (!reg_offset_addressing_ok_p (mode))
8948 return virtual_stack_registers_memory_p (x);
8949 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
8950 return true;
8951 if (!CONST_INT_P (XEXP (x, 1)))
8952 return false;
8953
8954 offset = INTVAL (XEXP (x, 1));
8955 extra = 0;
8956 switch (mode)
8957 {
8958 case E_DFmode:
8959 case E_DDmode:
8960 case E_DImode:
8961 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
8962 addressing. */
8963 if (VECTOR_MEM_VSX_P (mode))
8964 return false;
8965
8966 if (!worst_case)
8967 break;
8968 if (!TARGET_POWERPC64)
8969 extra = 4;
8970 else if (offset & 3)
8971 return false;
8972 break;
8973
8974 case E_TFmode:
8975 case E_IFmode:
8976 case E_KFmode:
8977 case E_TDmode:
8978 case E_TImode:
8979 case E_PTImode:
8980 extra = 8;
8981 if (!worst_case)
8982 break;
8983 if (!TARGET_POWERPC64)
8984 extra = 12;
8985 else if (offset & 3)
8986 return false;
8987 break;
8988
8989 default:
8990 break;
8991 }
8992
8993 if (TARGET_PREFIXED)
8994 return SIGNED_34BIT_OFFSET_EXTRA_P (offset, extra);
8995 else
8996 return SIGNED_16BIT_OFFSET_EXTRA_P (offset, extra);
8997 }
8998
8999 bool
9000 legitimate_indexed_address_p (rtx x, int strict)
9001 {
9002 rtx op0, op1;
9003
9004 if (GET_CODE (x) != PLUS)
9005 return false;
9006
9007 op0 = XEXP (x, 0);
9008 op1 = XEXP (x, 1);
9009
9010 return (REG_P (op0) && REG_P (op1)
9011 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
9012 && INT_REG_OK_FOR_INDEX_P (op1, strict))
9013 || (INT_REG_OK_FOR_BASE_P (op1, strict)
9014 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
9015 }
9016
9017 bool
9018 avoiding_indexed_address_p (machine_mode mode)
9019 {
9020 unsigned int msize = GET_MODE_SIZE (mode);
9021
9022 /* Avoid indexed addressing for modes that have non-indexed load/store
9023 instruction forms. On power10, vector pairs have an indexed
9024 form, but vector quads don't. */
9025 if (msize > 16)
9026 return msize != 32;
9027
9028 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
9029 }
9030
9031 bool
9032 legitimate_indirect_address_p (rtx x, int strict)
9033 {
9034 return REG_P (x) && INT_REG_OK_FOR_BASE_P (x, strict);
9035 }
9036
9037 bool
9038 macho_lo_sum_memory_operand (rtx x, machine_mode mode)
9039 {
9040 if (!TARGET_MACHO || !flag_pic
9041 || mode != SImode || !MEM_P (x))
9042 return false;
9043 x = XEXP (x, 0);
9044
9045 if (GET_CODE (x) != LO_SUM)
9046 return false;
9047 if (!REG_P (XEXP (x, 0)))
9048 return false;
9049 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
9050 return false;
9051 x = XEXP (x, 1);
9052
9053 return CONSTANT_P (x);
9054 }
9055
9056 static bool
9057 legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
9058 {
9059 if (GET_CODE (x) != LO_SUM)
9060 return false;
9061 if (!REG_P (XEXP (x, 0)))
9062 return false;
9063 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
9064 return false;
9065 /* quad word addresses are restricted, and we can't use LO_SUM. */
9066 if (mode_supports_dq_form (mode))
9067 return false;
9068 x = XEXP (x, 1);
9069
9070 if (TARGET_ELF)
9071 {
9072 bool large_toc_ok;
9073
9074 if (DEFAULT_ABI == ABI_V4 && flag_pic)
9075 return false;
9076 /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
9077 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
9078 recognizes some LO_SUM addresses as valid although this
9079 function says opposite. In most cases, LRA through different
9080 transformations can generate correct code for address reloads.
9081 It cannot manage only some LO_SUM cases. So we need to add
9082 code here saying that some addresses are still valid. */
9083 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
9084 && small_toc_ref (x, VOIDmode));
9085 if (TARGET_TOC && ! large_toc_ok)
9086 return false;
9087 if (GET_MODE_NUNITS (mode) != 1)
9088 return false;
9089 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
9090 && !(/* ??? Assume floating point reg based on mode? */
9091 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
9092 return false;
9093
9094 return CONSTANT_P (x) || large_toc_ok;
9095 }
9096 else if (TARGET_MACHO)
9097 {
9098 if (GET_MODE_NUNITS (mode) != 1)
9099 return false;
9100 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
9101 && !(/* see above */
9102 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
9103 return false;
9104 #if TARGET_MACHO
9105 if (MACHO_DYNAMIC_NO_PIC_P || !flag_pic)
9106 return CONSTANT_P (x);
9107 #endif
9108 /* Macho-O PIC code from here. */
9109 if (GET_CODE (x) == CONST)
9110 x = XEXP (x, 0);
9111
9112 /* SYMBOL_REFs need to be wrapped in an UNSPEC_MACHOPIC_OFFSET. */
9113 if (SYMBOL_REF_P (x))
9114 return false;
9115
9116 /* So this is OK if the wrapped object is const. */
9117 if (GET_CODE (x) == UNSPEC
9118 && XINT (x, 1) == UNSPEC_MACHOPIC_OFFSET)
9119 return CONSTANT_P (XVECEXP (x, 0, 0));
9120 return CONSTANT_P (x);
9121 }
9122 return false;
9123 }
9124
9125
9126 /* Try machine-dependent ways of modifying an illegitimate address
9127 to be legitimate. If we find one, return the new, valid address.
9128 This is used from only one place: `memory_address' in explow.cc.
9129
9130 OLDX is the address as it was before break_out_memory_refs was
9131 called. In some cases it is useful to look at this to decide what
9132 needs to be done.
9133
9134 It is always safe for this function to do nothing. It exists to
9135 recognize opportunities to optimize the output.
9136
9137 On RS/6000, first check for the sum of a register with a constant
9138 integer that is out of range. If so, generate code to add the
9139 constant with the low-order 16 bits masked to the register and force
9140 this result into another register (this can be done with `cau').
9141 Then generate an address of REG+(CONST&0xffff), allowing for the
9142 possibility of bit 16 being a one.
9143
9144 Then check for the sum of a register and something not constant, try to
9145 load the other things into a register and return the sum. */
9146
9147 static rtx
9148 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
9149 machine_mode mode)
9150 {
9151 unsigned int extra;
9152
9153 if (!reg_offset_addressing_ok_p (mode)
9154 || mode_supports_dq_form (mode))
9155 {
9156 if (virtual_stack_registers_memory_p (x))
9157 return x;
9158
9159 /* In theory we should not be seeing addresses of the form reg+0,
9160 but just in case it is generated, optimize it away. */
9161 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
9162 return force_reg (Pmode, XEXP (x, 0));
9163
9164 /* For TImode with load/store quad, restrict addresses to just a single
9165 pointer, so it works with both GPRs and VSX registers. */
9166 /* Make sure both operands are registers. */
9167 else if (GET_CODE (x) == PLUS
9168 && (mode != TImode || !TARGET_VSX))
9169 return gen_rtx_PLUS (Pmode,
9170 force_reg (Pmode, XEXP (x, 0)),
9171 force_reg (Pmode, XEXP (x, 1)));
9172 else
9173 return force_reg (Pmode, x);
9174 }
9175 if (SYMBOL_REF_P (x) && !TARGET_MACHO)
9176 {
9177 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
9178 if (model != 0)
9179 return rs6000_legitimize_tls_address (x, model);
9180 }
9181
9182 extra = 0;
9183 switch (mode)
9184 {
9185 case E_TFmode:
9186 case E_TDmode:
9187 case E_TImode:
9188 case E_PTImode:
9189 case E_IFmode:
9190 case E_KFmode:
9191 /* As in legitimate_offset_address_p we do not assume
9192 worst-case. The mode here is just a hint as to the registers
9193 used. A TImode is usually in gprs, but may actually be in
9194 fprs. Leave worst-case scenario for reload to handle via
9195 insn constraints. PTImode is only GPRs. */
9196 extra = 8;
9197 break;
9198 default:
9199 break;
9200 }
9201
9202 if (GET_CODE (x) == PLUS
9203 && REG_P (XEXP (x, 0))
9204 && CONST_INT_P (XEXP (x, 1))
9205 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
9206 >= 0x10000 - extra))
9207 {
9208 HOST_WIDE_INT high_int, low_int;
9209 rtx sum;
9210 low_int = sext_hwi (INTVAL (XEXP (x, 1)), 16);
9211 if (low_int >= 0x8000 - extra)
9212 low_int = 0;
9213 high_int = INTVAL (XEXP (x, 1)) - low_int;
9214 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
9215 gen_int_mode (high_int, Pmode)), 0);
9216 return plus_constant (Pmode, sum, low_int);
9217 }
9218 else if (GET_CODE (x) == PLUS
9219 && REG_P (XEXP (x, 0))
9220 && !CONST_INT_P (XEXP (x, 1))
9221 && GET_MODE_NUNITS (mode) == 1
9222 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
9223 || (/* ??? Assume floating point reg based on mode? */
9224 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode)))
9225 && !avoiding_indexed_address_p (mode))
9226 {
9227 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
9228 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
9229 }
9230 else if ((TARGET_ELF
9231 #if TARGET_MACHO
9232 || !MACHO_DYNAMIC_NO_PIC_P
9233 #endif
9234 )
9235 && TARGET_32BIT
9236 && TARGET_NO_TOC_OR_PCREL
9237 && !flag_pic
9238 && !CONST_INT_P (x)
9239 && !CONST_WIDE_INT_P (x)
9240 && !CONST_DOUBLE_P (x)
9241 && CONSTANT_P (x)
9242 && GET_MODE_NUNITS (mode) == 1
9243 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
9244 || (/* ??? Assume floating point reg based on mode? */
9245 TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode))))
9246 {
9247 rtx reg = gen_reg_rtx (Pmode);
9248 if (TARGET_ELF)
9249 emit_insn (gen_elf_high (reg, x));
9250 else
9251 emit_insn (gen_macho_high (Pmode, reg, x));
9252 return gen_rtx_LO_SUM (Pmode, reg, x);
9253 }
9254 else if (TARGET_TOC
9255 && SYMBOL_REF_P (x)
9256 && constant_pool_expr_p (x)
9257 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
9258 return create_TOC_reference (x, NULL_RTX);
9259 else
9260 return x;
9261 }
9262
9263 /* Debug version of rs6000_legitimize_address. */
9264 static rtx
9265 rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
9266 {
9267 rtx ret;
9268 rtx_insn *insns;
9269
9270 start_sequence ();
9271 ret = rs6000_legitimize_address (x, oldx, mode);
9272 insns = get_insns ();
9273 end_sequence ();
9274
9275 if (ret != x)
9276 {
9277 fprintf (stderr,
9278 "\nrs6000_legitimize_address: mode %s, old code %s, "
9279 "new code %s, modified\n",
9280 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
9281 GET_RTX_NAME (GET_CODE (ret)));
9282
9283 fprintf (stderr, "Original address:\n");
9284 debug_rtx (x);
9285
9286 fprintf (stderr, "oldx:\n");
9287 debug_rtx (oldx);
9288
9289 fprintf (stderr, "New address:\n");
9290 debug_rtx (ret);
9291
9292 if (insns)
9293 {
9294 fprintf (stderr, "Insns added:\n");
9295 debug_rtx_list (insns, 20);
9296 }
9297 }
9298 else
9299 {
9300 fprintf (stderr,
9301 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
9302 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
9303
9304 debug_rtx (x);
9305 }
9306
9307 if (insns)
9308 emit_insn (insns);
9309
9310 return ret;
9311 }
9312
9313 /* This is called from dwarf2out.cc via TARGET_ASM_OUTPUT_DWARF_DTPREL.
9314 We need to emit DTP-relative relocations. */
9315
9316 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
9317 static void
9318 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
9319 {
9320 switch (size)
9321 {
9322 case 4:
9323 fputs ("\t.long\t", file);
9324 break;
9325 case 8:
9326 fputs (DOUBLE_INT_ASM_OP, file);
9327 break;
9328 default:
9329 gcc_unreachable ();
9330 }
9331 output_addr_const (file, x);
9332 if (TARGET_ELF)
9333 fputs ("@dtprel+0x8000", file);
9334 }
9335
9336 /* Return true if X is a symbol that refers to real (rather than emulated)
9337 TLS. */
9338
9339 static bool
9340 rs6000_real_tls_symbol_ref_p (rtx x)
9341 {
9342 return (SYMBOL_REF_P (x)
9343 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
9344 }
9345
9346 /* In the name of slightly smaller debug output, and to cater to
9347 general assembler lossage, recognize various UNSPEC sequences
9348 and turn them back into a direct symbol reference. */
9349
9350 static rtx
9351 rs6000_delegitimize_address (rtx orig_x)
9352 {
9353 rtx x, y, offset;
9354
9355 /* UNSPEC_FUSION_GPR is created by the peephole2 for power8 fusion. It
9356 encodes loading up the high part of the address of a TOC reference along
9357 with a load of a GPR using the same base register used for the load. We
9358 return the original SYMBOL_REF.
9359
9360 (set (reg:INT1 <reg>
9361 (unspec:INT1 [<combined-address>] UNSPEC_FUSION_GPR)))
9362
9363 UNSPEC_PCREL_OPT_LD_ADDR is used by the power10 PCREL_OPT pass. These
9364 UNSPECs include the external SYMBOL_REF along with the value being loaded.
9365 We return the original SYMBOL_REF.
9366
9367 (parallel [(set (reg:DI <base-reg>)
9368 (unspec:DI [(symbol_ref <symbol>)
9369 (const_int <marker>)]
9370 UNSPEC_PCREL_OPT_LD_ADDR))
9371 (set (reg:DI <load-reg>)
9372 (unspec:DI [(const_int 0)]
9373 UNSPEC_PCREL_OPT_LD_DATA))])
9374
9375 UNSPEC_PCREL_OPT_LD_SAME_REG is an alternative that is used if the
9376 GPR being loaded is the same as the GPR used to hold the external address.
9377
9378 (set (reg:DI <base-reg>)
9379 (unspec:DI [(symbol_ref <symbol>)
9380 (const_int <marker>)]
9381 UNSPEC_PCREL_OPT_LD_SAME_REG))
9382
9383 UNSPEC_PCREL_OPT_ST_ADDR is used by the power10 PCREL_OPT pass. This
9384 UNSPEC include the external SYMBOL_REF along with the value being loaded.
9385 We return the original SYMBOL_REF.
9386
9387 (parallel [(set (reg:DI <base-reg>)
9388 (unspec:DI [(symbol_ref <symbol>)
9389 (const_int <marker>)]
9390 UNSPEC_PCREL_OPT_ST_ADDR))
9391 (use (reg <store-reg>))]) */
9392
9393 if (GET_CODE (orig_x) == UNSPEC)
9394 switch (XINT (orig_x, 1))
9395 {
9396 case UNSPEC_FUSION_GPR:
9397 case UNSPEC_PCREL_OPT_LD_ADDR:
9398 case UNSPEC_PCREL_OPT_LD_SAME_REG:
9399 case UNSPEC_PCREL_OPT_ST_ADDR:
9400 orig_x = XVECEXP (orig_x, 0, 0);
9401 break;
9402
9403 default:
9404 break;
9405 }
9406
9407 orig_x = delegitimize_mem_from_attrs (orig_x);
9408
9409 x = orig_x;
9410 if (MEM_P (x))
9411 x = XEXP (x, 0);
9412
9413 y = x;
9414 if (TARGET_CMODEL != CMODEL_SMALL && GET_CODE (y) == LO_SUM)
9415 y = XEXP (y, 1);
9416
9417 offset = NULL_RTX;
9418 if (GET_CODE (y) == PLUS
9419 && GET_MODE (y) == Pmode
9420 && CONST_INT_P (XEXP (y, 1)))
9421 {
9422 offset = XEXP (y, 1);
9423 y = XEXP (y, 0);
9424 }
9425
9426 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_TOCREL)
9427 {
9428 y = XVECEXP (y, 0, 0);
9429
9430 #ifdef HAVE_AS_TLS
9431 /* Do not associate thread-local symbols with the original
9432 constant pool symbol. */
9433 if (TARGET_XCOFF
9434 && SYMBOL_REF_P (y)
9435 && CONSTANT_POOL_ADDRESS_P (y)
9436 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
9437 return orig_x;
9438 #endif
9439
9440 if (offset != NULL_RTX)
9441 y = gen_rtx_PLUS (Pmode, y, offset);
9442 if (!MEM_P (orig_x))
9443 return y;
9444 else
9445 return replace_equiv_address_nv (orig_x, y);
9446 }
9447
9448 if (TARGET_MACHO
9449 && GET_CODE (orig_x) == LO_SUM
9450 && GET_CODE (XEXP (orig_x, 1)) == CONST)
9451 {
9452 y = XEXP (XEXP (orig_x, 1), 0);
9453 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
9454 return XVECEXP (y, 0, 0);
9455 }
9456
9457 return orig_x;
9458 }
9459
9460 /* Return true if X shouldn't be emitted into the debug info.
9461 The linker doesn't like .toc section references from
9462 .debug_* sections, so reject .toc section symbols. */
9463
9464 static bool
9465 rs6000_const_not_ok_for_debug_p (rtx x)
9466 {
9467 if (GET_CODE (x) == UNSPEC)
9468 return true;
9469 if (SYMBOL_REF_P (x)
9470 && CONSTANT_POOL_ADDRESS_P (x))
9471 {
9472 rtx c = get_pool_constant (x);
9473 machine_mode cmode = get_pool_mode (x);
9474 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
9475 return true;
9476 }
9477
9478 return false;
9479 }
9480
9481 /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
9482
9483 static bool
9484 rs6000_legitimate_combined_insn (rtx_insn *insn)
9485 {
9486 int icode = INSN_CODE (insn);
9487
9488 /* Reject creating doloop insns. Combine should not be allowed
9489 to create these for a number of reasons:
9490 1) In a nested loop, if combine creates one of these in an
9491 outer loop and the register allocator happens to allocate ctr
9492 to the outer loop insn, then the inner loop can't use ctr.
9493 Inner loops ought to be more highly optimized.
9494 2) Combine often wants to create one of these from what was
9495 originally a three insn sequence, first combining the three
9496 insns to two, then to ctrsi/ctrdi. When ctrsi/ctrdi is not
9497 allocated ctr, the splitter takes use back to the three insn
9498 sequence. It's better to stop combine at the two insn
9499 sequence.
9500 3) Faced with not being able to allocate ctr for ctrsi/crtdi
9501 insns, the register allocator sometimes uses floating point
9502 or vector registers for the pseudo. Since ctrsi/ctrdi is a
9503 jump insn and output reloads are not implemented for jumps,
9504 the ctrsi/ctrdi splitters need to handle all possible cases.
9505 That's a pain, and it gets to be seriously difficult when a
9506 splitter that runs after reload needs memory to transfer from
9507 a gpr to fpr. See PR70098 and PR71763 which are not fixed
9508 for the difficult case. It's better to not create problems
9509 in the first place. */
9510 if (icode != CODE_FOR_nothing
9511 && (icode == CODE_FOR_bdz_si
9512 || icode == CODE_FOR_bdz_di
9513 || icode == CODE_FOR_bdnz_si
9514 || icode == CODE_FOR_bdnz_di
9515 || icode == CODE_FOR_bdztf_si
9516 || icode == CODE_FOR_bdztf_di
9517 || icode == CODE_FOR_bdnztf_si
9518 || icode == CODE_FOR_bdnztf_di))
9519 return false;
9520
9521 return true;
9522 }
9523
9524 /* Construct the SYMBOL_REF for the tls_get_addr function. */
9525
9526 static GTY(()) rtx rs6000_tls_symbol;
9527 static rtx
9528 rs6000_tls_get_addr (void)
9529 {
9530 if (!rs6000_tls_symbol)
9531 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
9532
9533 return rs6000_tls_symbol;
9534 }
9535
9536 /* Construct the SYMBOL_REF for TLS GOT references. */
9537
9538 static GTY(()) rtx rs6000_got_symbol;
9539 rtx
9540 rs6000_got_sym (void)
9541 {
9542 if (!rs6000_got_symbol)
9543 {
9544 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
9545 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
9546 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
9547 }
9548
9549 return rs6000_got_symbol;
9550 }
9551
9552 /* AIX Thread-Local Address support. */
9553
9554 static rtx
9555 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
9556 {
9557 rtx sym, mem, tocref, tlsreg, tmpreg, dest;
9558 const char *name;
9559 char *tlsname;
9560
9561 /* Place addr into TOC constant pool. */
9562 sym = force_const_mem (GET_MODE (addr), addr);
9563
9564 /* Output the TOC entry and create the MEM referencing the value. */
9565 if (constant_pool_expr_p (XEXP (sym, 0))
9566 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
9567 {
9568 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
9569 mem = gen_const_mem (Pmode, tocref);
9570 set_mem_alias_set (mem, get_TOC_alias_set ());
9571 }
9572 else
9573 return sym;
9574
9575 /* Use global-dynamic for local-dynamic. */
9576 if (model == TLS_MODEL_GLOBAL_DYNAMIC
9577 || model == TLS_MODEL_LOCAL_DYNAMIC)
9578 {
9579 /* Create new TOC reference for @m symbol. */
9580 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
9581 tlsname = XALLOCAVEC (char, strlen (name) + 1);
9582 strcpy (tlsname, "*LCM");
9583 strcat (tlsname, name + 3);
9584 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
9585 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
9586 tocref = create_TOC_reference (modaddr, NULL_RTX);
9587 rtx modmem = gen_const_mem (Pmode, tocref);
9588 set_mem_alias_set (modmem, get_TOC_alias_set ());
9589
9590 rtx modreg = gen_reg_rtx (Pmode);
9591 emit_insn (gen_rtx_SET (modreg, modmem));
9592
9593 tmpreg = gen_reg_rtx (Pmode);
9594 emit_insn (gen_rtx_SET (tmpreg, mem));
9595
9596 dest = gen_reg_rtx (Pmode);
9597 if (TARGET_32BIT)
9598 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
9599 else
9600 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
9601 return dest;
9602 }
9603 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
9604 else if (TARGET_32BIT)
9605 {
9606 tlsreg = gen_reg_rtx (SImode);
9607 emit_insn (gen_tls_get_tpointer (tlsreg));
9608 }
9609 else
9610 {
9611 tlsreg = gen_rtx_REG (DImode, 13);
9612 xcoff_tls_exec_model_detected = true;
9613 }
9614
9615 /* Load the TOC value into temporary register. */
9616 tmpreg = gen_reg_rtx (Pmode);
9617 emit_insn (gen_rtx_SET (tmpreg, mem));
9618 set_unique_reg_note (get_last_insn (), REG_EQUAL,
9619 gen_rtx_MINUS (Pmode, addr, tlsreg));
9620
9621 /* Add TOC symbol value to TLS pointer. */
9622 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
9623
9624 return dest;
9625 }
9626
9627 /* Passes the tls arg value for global dynamic and local dynamic
9628 emit_library_call_value in rs6000_legitimize_tls_address to
9629 rs6000_call_aix and rs6000_call_sysv. This is used to emit the
9630 marker relocs put on __tls_get_addr calls. */
9631 static rtx global_tlsarg;
9632
9633 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
9634 this (thread-local) address. */
9635
9636 static rtx
9637 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
9638 {
9639 rtx dest, insn;
9640
9641 if (TARGET_XCOFF)
9642 return rs6000_legitimize_tls_address_aix (addr, model);
9643
9644 dest = gen_reg_rtx (Pmode);
9645 if (model == TLS_MODEL_LOCAL_EXEC
9646 && (rs6000_tls_size == 16 || rs6000_pcrel_p ()))
9647 {
9648 rtx tlsreg;
9649
9650 if (TARGET_64BIT)
9651 {
9652 tlsreg = gen_rtx_REG (Pmode, 13);
9653 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
9654 }
9655 else
9656 {
9657 tlsreg = gen_rtx_REG (Pmode, 2);
9658 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
9659 }
9660 emit_insn (insn);
9661 }
9662 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
9663 {
9664 rtx tlsreg, tmp;
9665
9666 tmp = gen_reg_rtx (Pmode);
9667 if (TARGET_64BIT)
9668 {
9669 tlsreg = gen_rtx_REG (Pmode, 13);
9670 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
9671 }
9672 else
9673 {
9674 tlsreg = gen_rtx_REG (Pmode, 2);
9675 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
9676 }
9677 emit_insn (insn);
9678 if (TARGET_64BIT)
9679 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
9680 else
9681 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
9682 emit_insn (insn);
9683 }
9684 else
9685 {
9686 rtx got, tga, tmp1, tmp2;
9687
9688 /* We currently use relocations like @got@tlsgd for tls, which
9689 means the linker will handle allocation of tls entries, placing
9690 them in the .got section. So use a pointer to the .got section,
9691 not one to secondary TOC sections used by 64-bit -mminimal-toc,
9692 or to secondary GOT sections used by 32-bit -fPIC. */
9693 if (rs6000_pcrel_p ())
9694 got = const0_rtx;
9695 else if (TARGET_64BIT)
9696 got = gen_rtx_REG (Pmode, 2);
9697 else
9698 {
9699 if (flag_pic == 1)
9700 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
9701 else
9702 {
9703 rtx gsym = rs6000_got_sym ();
9704 got = gen_reg_rtx (Pmode);
9705 if (flag_pic == 0)
9706 rs6000_emit_move (got, gsym, Pmode);
9707 else
9708 {
9709 rtx mem, lab;
9710
9711 tmp1 = gen_reg_rtx (Pmode);
9712 tmp2 = gen_reg_rtx (Pmode);
9713 mem = gen_const_mem (Pmode, tmp1);
9714 lab = gen_label_rtx ();
9715 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
9716 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
9717 if (TARGET_LINK_STACK)
9718 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
9719 emit_move_insn (tmp2, mem);
9720 rtx_insn *last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
9721 set_unique_reg_note (last, REG_EQUAL, gsym);
9722 }
9723 }
9724 }
9725
9726 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
9727 {
9728 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addr, got),
9729 UNSPEC_TLSGD);
9730 tga = rs6000_tls_get_addr ();
9731 rtx argreg = gen_rtx_REG (Pmode, 3);
9732 emit_insn (gen_rtx_SET (argreg, arg));
9733 global_tlsarg = arg;
9734 emit_library_call_value (tga, dest, LCT_CONST, Pmode, argreg, Pmode);
9735 global_tlsarg = NULL_RTX;
9736
9737 /* Make a note so that the result of this call can be CSEd. */
9738 rtvec vec = gen_rtvec (1, copy_rtx (arg));
9739 rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
9740 set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
9741 }
9742 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
9743 {
9744 rtx arg = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, got), UNSPEC_TLSLD);
9745 tga = rs6000_tls_get_addr ();
9746 tmp1 = gen_reg_rtx (Pmode);
9747 rtx argreg = gen_rtx_REG (Pmode, 3);
9748 emit_insn (gen_rtx_SET (argreg, arg));
9749 global_tlsarg = arg;
9750 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode, argreg, Pmode);
9751 global_tlsarg = NULL_RTX;
9752
9753 /* Make a note so that the result of this call can be CSEd. */
9754 rtvec vec = gen_rtvec (1, copy_rtx (arg));
9755 rtx uns = gen_rtx_UNSPEC (Pmode, vec, UNSPEC_TLS_GET_ADDR);
9756 set_unique_reg_note (get_last_insn (), REG_EQUAL, uns);
9757
9758 if (rs6000_tls_size == 16 || rs6000_pcrel_p ())
9759 {
9760 if (TARGET_64BIT)
9761 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
9762 else
9763 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
9764 }
9765 else if (rs6000_tls_size == 32)
9766 {
9767 tmp2 = gen_reg_rtx (Pmode);
9768 if (TARGET_64BIT)
9769 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
9770 else
9771 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
9772 emit_insn (insn);
9773 if (TARGET_64BIT)
9774 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
9775 else
9776 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
9777 }
9778 else
9779 {
9780 tmp2 = gen_reg_rtx (Pmode);
9781 if (TARGET_64BIT)
9782 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
9783 else
9784 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
9785 emit_insn (insn);
9786 insn = gen_rtx_SET (dest, gen_rtx_PLUS (Pmode, tmp2, tmp1));
9787 }
9788 emit_insn (insn);
9789 }
9790 else
9791 {
9792 /* IE, or 64-bit offset LE. */
9793 tmp2 = gen_reg_rtx (Pmode);
9794 if (TARGET_64BIT)
9795 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
9796 else
9797 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
9798 emit_insn (insn);
9799 if (rs6000_pcrel_p ())
9800 {
9801 if (TARGET_64BIT)
9802 insn = gen_tls_tls_pcrel_64 (dest, tmp2, addr);
9803 else
9804 insn = gen_tls_tls_pcrel_32 (dest, tmp2, addr);
9805 }
9806 else if (TARGET_64BIT)
9807 insn = gen_tls_tls_64 (dest, tmp2, addr);
9808 else
9809 insn = gen_tls_tls_32 (dest, tmp2, addr);
9810 emit_insn (insn);
9811 }
9812 }
9813
9814 return dest;
9815 }
9816
9817 /* Only create the global variable for the stack protect guard if we are using
9818 the global flavor of that guard. */
9819 static tree
9820 rs6000_init_stack_protect_guard (void)
9821 {
9822 if (rs6000_stack_protector_guard == SSP_GLOBAL)
9823 return default_stack_protect_guard ();
9824
9825 return NULL_TREE;
9826 }
9827
9828 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
9829
9830 static bool
9831 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
9832 {
9833 /* If GET_CODE (x) is HIGH, the 'X' represets the high part of a symbol_ref.
9834 It can not be put into a constant pool. e.g.
9835 (high:DI (unspec:DI [(symbol_ref/u:DI ("*.LC0")..)
9836 (high:DI (symbol_ref:DI ("var")..)). */
9837 if (GET_CODE (x) == HIGH)
9838 return true;
9839
9840 /* A TLS symbol in the TOC cannot contain a sum. */
9841 if (GET_CODE (x) == CONST
9842 && GET_CODE (XEXP (x, 0)) == PLUS
9843 && SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
9844 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
9845 return true;
9846
9847 /* Allow AIX TOC TLS symbols in the constant pool,
9848 but not ELF TLS symbols. */
9849 return TARGET_ELF && tls_referenced_p (x);
9850 }
9851
9852 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
9853 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
9854 can be addressed relative to the toc pointer. */
9855
9856 static bool
9857 use_toc_relative_ref (rtx sym, machine_mode mode)
9858 {
9859 return ((constant_pool_expr_p (sym)
9860 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
9861 get_pool_mode (sym)))
9862 || (TARGET_CMODEL == CMODEL_MEDIUM
9863 && SYMBOL_REF_LOCAL_P (sym)
9864 && GET_MODE_SIZE (mode) <= POWERPC64_TOC_POINTER_ALIGNMENT));
9865 }
9866
9867 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
9868 that is a valid memory address for an instruction.
9869 The MODE argument is the machine mode for the MEM expression
9870 that wants to use this address.
9871
9872 On the RS/6000, there are four valid address: a SYMBOL_REF that
9873 refers to a constant pool entry of an address (or the sum of it
9874 plus a constant), a short (16-bit signed) constant plus a register,
9875 the sum of two registers, or a register indirect, possibly with an
9876 auto-increment. For DFmode, DDmode and DImode with a constant plus
9877 register, we must ensure that both words are addressable or PowerPC64
9878 with offset word aligned.
9879
9880 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
9881 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
9882 because adjacent memory cells are accessed by adding word-sized offsets
9883 during assembly output. */
9884 static bool
9885 rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict,
9886 code_helper ch = ERROR_MARK)
9887 {
9888 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
9889 bool quad_offset_p = mode_supports_dq_form (mode);
9890
9891 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
9892 return 0;
9893
9894 /* lxvl and stxvl doesn't support any addressing modes with PLUS. */
9895 if (ch.is_internal_fn ()
9896 && (ch == IFN_LEN_LOAD || ch == IFN_LEN_STORE)
9897 && GET_CODE (x) == PLUS)
9898 return 0;
9899
9900 /* Handle unaligned altivec lvx/stvx type addresses. */
9901 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
9902 && GET_CODE (x) == AND
9903 && CONST_INT_P (XEXP (x, 1))
9904 && INTVAL (XEXP (x, 1)) == -16)
9905 {
9906 x = XEXP (x, 0);
9907 return (legitimate_indirect_address_p (x, reg_ok_strict)
9908 || legitimate_indexed_address_p (x, reg_ok_strict)
9909 || virtual_stack_registers_memory_p (x));
9910 }
9911
9912 if (legitimate_indirect_address_p (x, reg_ok_strict))
9913 return 1;
9914 if (TARGET_UPDATE
9915 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
9916 && mode_supports_pre_incdec_p (mode)
9917 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
9918 return 1;
9919
9920 /* Handle prefixed addresses (PC-relative or 34-bit offset). */
9921 if (address_is_prefixed (x, mode, NON_PREFIXED_DEFAULT))
9922 return 1;
9923
9924 /* Handle restricted vector d-form offsets in ISA 3.0. */
9925 if (quad_offset_p)
9926 {
9927 if (quad_address_p (x, mode, reg_ok_strict))
9928 return 1;
9929 }
9930 else if (virtual_stack_registers_memory_p (x))
9931 return 1;
9932
9933 else if (reg_offset_p)
9934 {
9935 if (legitimate_small_data_p (mode, x))
9936 return 1;
9937 if (legitimate_constant_pool_address_p (x, mode,
9938 reg_ok_strict || lra_in_progress))
9939 return 1;
9940 }
9941
9942 /* For TImode, if we have TImode in VSX registers, only allow register
9943 indirect addresses. This will allow the values to go in either GPRs
9944 or VSX registers without reloading. The vector types would tend to
9945 go into VSX registers, so we allow REG+REG, while TImode seems
9946 somewhat split, in that some uses are GPR based, and some VSX based. */
9947 /* FIXME: We could loosen this by changing the following to
9948 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX)
9949 but currently we cannot allow REG+REG addressing for TImode. See
9950 PR72827 for complete details on how this ends up hoodwinking DSE. */
9951 if (mode == TImode && TARGET_VSX)
9952 return 0;
9953 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
9954 if (! reg_ok_strict
9955 && reg_offset_p
9956 && GET_CODE (x) == PLUS
9957 && REG_P (XEXP (x, 0))
9958 && (XEXP (x, 0) == virtual_stack_vars_rtx
9959 || XEXP (x, 0) == arg_pointer_rtx)
9960 && CONST_INT_P (XEXP (x, 1)))
9961 return 1;
9962 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
9963 return 1;
9964 if (!FLOAT128_2REG_P (mode)
9965 && (TARGET_HARD_FLOAT
9966 || TARGET_POWERPC64
9967 || (mode != DFmode && mode != DDmode))
9968 && (TARGET_POWERPC64 || mode != DImode)
9969 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
9970 && mode != PTImode
9971 && !avoiding_indexed_address_p (mode)
9972 && legitimate_indexed_address_p (x, reg_ok_strict))
9973 return 1;
9974 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
9975 && mode_supports_pre_modify_p (mode)
9976 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
9977 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
9978 reg_ok_strict, false)
9979 || (!avoiding_indexed_address_p (mode)
9980 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
9981 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
9982 {
9983 /* There is no prefixed version of the load/store with update. */
9984 rtx addr = XEXP (x, 1);
9985 return !address_is_prefixed (addr, mode, NON_PREFIXED_DEFAULT);
9986 }
9987 if (reg_offset_p && !quad_offset_p
9988 && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
9989 return 1;
9990 return 0;
9991 }
9992
9993 /* Debug version of rs6000_legitimate_address_p. */
9994 static bool
9995 rs6000_debug_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict,
9996 code_helper ch)
9997 {
9998 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict, ch);
9999 fprintf (stderr,
10000 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
10001 "strict = %d, reload = %s, code = %s\n",
10002 ret ? "true" : "false",
10003 GET_MODE_NAME (mode),
10004 reg_ok_strict,
10005 (reload_completed ? "after" : "before"),
10006 GET_RTX_NAME (GET_CODE (x)));
10007 debug_rtx (x);
10008
10009 return ret;
10010 }
10011
10012 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
10013
10014 static bool
10015 rs6000_mode_dependent_address_p (const_rtx addr,
10016 addr_space_t as ATTRIBUTE_UNUSED)
10017 {
10018 return rs6000_mode_dependent_address_ptr (addr);
10019 }
10020
10021 /* Go to LABEL if ADDR (a legitimate address expression)
10022 has an effect that depends on the machine mode it is used for.
10023
10024 On the RS/6000 this is true of all integral offsets (since AltiVec
10025 and VSX modes don't allow them) or is a pre-increment or decrement.
10026
10027 ??? Except that due to conceptual problems in offsettable_address_p
10028 we can't really report the problems of integral offsets. So leave
10029 this assuming that the adjustable offset must be valid for the
10030 sub-words of a TFmode operand, which is what we had before. */
10031
10032 static bool
10033 rs6000_mode_dependent_address (const_rtx addr)
10034 {
10035 switch (GET_CODE (addr))
10036 {
10037 case PLUS:
10038 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
10039 is considered a legitimate address before reload, so there
10040 are no offset restrictions in that case. Note that this
10041 condition is safe in strict mode because any address involving
10042 virtual_stack_vars_rtx or arg_pointer_rtx would already have
10043 been rejected as illegitimate. */
10044 if (XEXP (addr, 0) != virtual_stack_vars_rtx
10045 && XEXP (addr, 0) != arg_pointer_rtx
10046 && CONST_INT_P (XEXP (addr, 1)))
10047 {
10048 HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
10049 HOST_WIDE_INT extra = TARGET_POWERPC64 ? 8 : 12;
10050 if (TARGET_PREFIXED)
10051 return !SIGNED_34BIT_OFFSET_EXTRA_P (val, extra);
10052 else
10053 return !SIGNED_16BIT_OFFSET_EXTRA_P (val, extra);
10054 }
10055 break;
10056
10057 case LO_SUM:
10058 /* Anything in the constant pool is sufficiently aligned that
10059 all bytes have the same high part address. */
10060 return !legitimate_constant_pool_address_p (addr, QImode, false);
10061
10062 /* Auto-increment cases are now treated generically in recog.cc. */
10063 case PRE_MODIFY:
10064 return TARGET_UPDATE;
10065
10066 /* AND is only allowed in Altivec loads. */
10067 case AND:
10068 return true;
10069
10070 default:
10071 break;
10072 }
10073
10074 return false;
10075 }
10076
10077 /* Debug version of rs6000_mode_dependent_address. */
10078 static bool
10079 rs6000_debug_mode_dependent_address (const_rtx addr)
10080 {
10081 bool ret = rs6000_mode_dependent_address (addr);
10082
10083 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
10084 ret ? "true" : "false");
10085 debug_rtx (addr);
10086
10087 return ret;
10088 }
10089
10090 /* Implement FIND_BASE_TERM. */
10091
10092 rtx
10093 rs6000_find_base_term (rtx op)
10094 {
10095 rtx base;
10096
10097 base = op;
10098 if (GET_CODE (base) == CONST)
10099 base = XEXP (base, 0);
10100 if (GET_CODE (base) == PLUS)
10101 base = XEXP (base, 0);
10102 if (GET_CODE (base) == UNSPEC)
10103 switch (XINT (base, 1))
10104 {
10105 case UNSPEC_TOCREL:
10106 case UNSPEC_MACHOPIC_OFFSET:
10107 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
10108 for aliasing purposes. */
10109 return XVECEXP (base, 0, 0);
10110 }
10111
10112 return op;
10113 }
10114
10115 /* More elaborate version of recog's offsettable_memref_p predicate
10116 that works around the ??? note of rs6000_mode_dependent_address.
10117 In particular it accepts
10118
10119 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
10120
10121 in 32-bit mode, that the recog predicate rejects. */
10122
10123 static bool
10124 rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode, bool strict)
10125 {
10126 bool worst_case;
10127
10128 if (!MEM_P (op))
10129 return false;
10130
10131 /* First mimic offsettable_memref_p. */
10132 if (offsettable_address_p (strict, GET_MODE (op), XEXP (op, 0)))
10133 return true;
10134
10135 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
10136 the latter predicate knows nothing about the mode of the memory
10137 reference and, therefore, assumes that it is the largest supported
10138 mode (TFmode). As a consequence, legitimate offsettable memory
10139 references are rejected. rs6000_legitimate_offset_address_p contains
10140 the correct logic for the PLUS case of rs6000_mode_dependent_address,
10141 at least with a little bit of help here given that we know the
10142 actual registers used. */
10143 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
10144 || GET_MODE_SIZE (reg_mode) == 4);
10145 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
10146 strict, worst_case);
10147 }
10148
10149 /* Determine the reassociation width to be used in reassociate_bb.
10150 This takes into account how many parallel operations we
10151 can actually do of a given type, and also the latency.
10152 P8:
10153 int add/sub 6/cycle
10154 mul 2/cycle
10155 vect add/sub/mul 2/cycle
10156 fp add/sub/mul 2/cycle
10157 dfp 1/cycle
10158 */
10159
10160 static int
10161 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
10162 machine_mode mode)
10163 {
10164 switch (rs6000_tune)
10165 {
10166 case PROCESSOR_POWER8:
10167 case PROCESSOR_POWER9:
10168 case PROCESSOR_POWER10:
10169 if (DECIMAL_FLOAT_MODE_P (mode))
10170 return 1;
10171 if (VECTOR_MODE_P (mode))
10172 return 4;
10173 if (INTEGRAL_MODE_P (mode))
10174 return 1;
10175 if (FLOAT_MODE_P (mode))
10176 return 4;
10177 break;
10178 default:
10179 break;
10180 }
10181 return 1;
10182 }
10183
10184 /* Change register usage conditional on target flags. */
10185 static void
10186 rs6000_conditional_register_usage (void)
10187 {
10188 int i;
10189
10190 if (TARGET_DEBUG_TARGET)
10191 fprintf (stderr, "rs6000_conditional_register_usage called\n");
10192
10193 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
10194 if (TARGET_64BIT)
10195 fixed_regs[13] = call_used_regs[13] = 1;
10196
10197 /* Conditionally disable FPRs. */
10198 if (TARGET_SOFT_FLOAT)
10199 for (i = 32; i < 64; i++)
10200 fixed_regs[i] = call_used_regs[i] = 1;
10201
10202 /* The TOC register is not killed across calls in a way that is
10203 visible to the compiler. */
10204 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
10205 call_used_regs[2] = 0;
10206
10207 if (DEFAULT_ABI == ABI_V4 && flag_pic == 2)
10208 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10209
10210 if (DEFAULT_ABI == ABI_V4 && flag_pic == 1)
10211 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10212 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10213
10214 if (DEFAULT_ABI == ABI_DARWIN && flag_pic)
10215 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
10216 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10217
10218 if (TARGET_TOC && TARGET_MINIMAL_TOC)
10219 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
10220
10221 if (!TARGET_ALTIVEC && !TARGET_VSX)
10222 {
10223 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
10224 fixed_regs[i] = call_used_regs[i] = 1;
10225 call_used_regs[VRSAVE_REGNO] = 1;
10226 }
10227
10228 if (TARGET_ALTIVEC || TARGET_VSX)
10229 global_regs[VSCR_REGNO] = 1;
10230
10231 if (TARGET_ALTIVEC_ABI)
10232 {
10233 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
10234 call_used_regs[i] = 1;
10235
10236 /* AIX reserves VR20:31 in non-extended ABI mode. */
10237 if (TARGET_XCOFF && !rs6000_aix_extabi)
10238 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
10239 fixed_regs[i] = call_used_regs[i] = 1;
10240 }
10241 }
10242
10243 \f
10244 /* Output insns to set DEST equal to the constant SOURCE as a series of
10245 lis, ori and shl instructions and return TRUE. */
10246
10247 bool
10248 rs6000_emit_set_const (rtx dest, rtx source)
10249 {
10250 machine_mode mode = GET_MODE (dest);
10251 rtx temp, set;
10252 rtx_insn *insn;
10253 HOST_WIDE_INT c;
10254
10255 gcc_checking_assert (CONST_INT_P (source));
10256 c = INTVAL (source);
10257 switch (mode)
10258 {
10259 case E_QImode:
10260 case E_HImode:
10261 emit_insn (gen_rtx_SET (dest, source));
10262 return true;
10263
10264 case E_SImode:
10265 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
10266
10267 emit_insn (gen_rtx_SET (temp, GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
10268 emit_insn (gen_rtx_SET (dest,
10269 gen_rtx_IOR (SImode, temp,
10270 GEN_INT (c & 0xffff))));
10271 break;
10272
10273 case E_DImode:
10274 if (!TARGET_POWERPC64)
10275 {
10276 rtx hi, lo;
10277
10278 hi = operand_subword_force (dest, WORDS_BIG_ENDIAN == 0, DImode);
10279 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0, DImode);
10280 emit_move_insn (hi, GEN_INT (c >> 32));
10281 c = sext_hwi (c, 32);
10282 emit_move_insn (lo, GEN_INT (c));
10283 }
10284 else
10285 rs6000_emit_set_long_const (dest, c);
10286 break;
10287
10288 default:
10289 gcc_unreachable ();
10290 }
10291
10292 insn = get_last_insn ();
10293 set = single_set (insn);
10294 if (! CONSTANT_P (SET_SRC (set)))
10295 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
10296
10297 return true;
10298 }
10299
10300 /* Check if C can be rotated to a negative value which 'lis' instruction is
10301 able to load: 1..1xx0..0. If so, set *ROT to the number by which C is
10302 rotated, and return true. Return false otherwise. */
10303
10304 static bool
10305 can_be_rotated_to_negative_lis (HOST_WIDE_INT c, int *rot)
10306 {
10307 /* case a. 1..1xxx0..01..1: up to 15 x's, at least 16 0's. */
10308 int leading_ones = clz_hwi (~c);
10309 int tailing_ones = ctz_hwi (~c);
10310 int middle_zeros = ctz_hwi (c >> tailing_ones);
10311 if (middle_zeros >= 16 && leading_ones + tailing_ones >= 33)
10312 {
10313 *rot = HOST_BITS_PER_WIDE_INT - tailing_ones;
10314 return true;
10315 }
10316
10317 /* case b. xx0..01..1xx: some of 15 x's (and some of 16 0's) are
10318 rotated over the highest bit. */
10319 int pos_one = clz_hwi ((c << 16) >> 16);
10320 middle_zeros = ctz_hwi (c >> (HOST_BITS_PER_WIDE_INT - pos_one));
10321 int middle_ones = clz_hwi (~(c << pos_one));
10322 if (middle_zeros >= 16 && middle_ones >= 33)
10323 {
10324 *rot = pos_one;
10325 return true;
10326 }
10327
10328 return false;
10329 }
10330
10331 /* Check if value C can be built by 2 instructions: one is 'li or lis',
10332 another is rotldi.
10333
10334 If so, *SHIFT is set to the shift operand of rotldi(rldicl), and *MASK
10335 is set to the mask operand of rotldi(rldicl), and return true.
10336 Return false otherwise. */
10337
10338 static bool
10339 can_be_built_by_li_lis_and_rotldi (HOST_WIDE_INT c, int *shift,
10340 HOST_WIDE_INT *mask)
10341 {
10342 /* If C or ~C contains at least 49 successive zeros, then C can be rotated
10343 to/from a positive or negative value that 'li' is able to load. */
10344 int n;
10345 if (can_be_rotated_to_lowbits (c, 15, &n)
10346 || can_be_rotated_to_lowbits (~c, 15, &n)
10347 || can_be_rotated_to_negative_lis (c, &n))
10348 {
10349 *mask = HOST_WIDE_INT_M1;
10350 *shift = HOST_BITS_PER_WIDE_INT - n;
10351 return true;
10352 }
10353
10354 return false;
10355 }
10356
10357 /* Check if value C can be built by 2 instructions: one is 'li or lis',
10358 another is rldicl.
10359
10360 If so, *SHIFT is set to the shift operand of rldicl, and *MASK is set to
10361 the mask operand of rldicl, and return true.
10362 Return false otherwise. */
10363
10364 static bool
10365 can_be_built_by_li_lis_and_rldicl (HOST_WIDE_INT c, int *shift,
10366 HOST_WIDE_INT *mask)
10367 {
10368 /* Leading zeros may be cleaned by rldicl with a mask. Change leading zeros
10369 to ones and then recheck it. */
10370 int lz = clz_hwi (c);
10371
10372 /* If lz == 0, the left shift is undefined. */
10373 if (!lz)
10374 return false;
10375
10376 HOST_WIDE_INT unmask_c
10377 = c | (HOST_WIDE_INT_M1U << (HOST_BITS_PER_WIDE_INT - lz));
10378 int n;
10379 if (can_be_rotated_to_lowbits (~unmask_c, 15, &n)
10380 || can_be_rotated_to_negative_lis (unmask_c, &n))
10381 {
10382 *mask = HOST_WIDE_INT_M1U >> lz;
10383 *shift = n == 0 ? 0 : HOST_BITS_PER_WIDE_INT - n;
10384 return true;
10385 }
10386
10387 return false;
10388 }
10389
10390 /* Check if value C can be built by 2 instructions: one is 'li or lis',
10391 another is rldicr.
10392
10393 If so, *SHIFT is set to the shift operand of rldicr, and *MASK is set to
10394 the mask operand of rldicr, and return true.
10395 Return false otherwise. */
10396
10397 static bool
10398 can_be_built_by_li_lis_and_rldicr (HOST_WIDE_INT c, int *shift,
10399 HOST_WIDE_INT *mask)
10400 {
10401 /* Tailing zeros may be cleaned by rldicr with a mask. Change tailing zeros
10402 to ones and then recheck it. */
10403 int tz = ctz_hwi (c);
10404
10405 /* If tz == HOST_BITS_PER_WIDE_INT, the left shift is undefined. */
10406 if (tz >= HOST_BITS_PER_WIDE_INT)
10407 return false;
10408
10409 HOST_WIDE_INT unmask_c = c | ((HOST_WIDE_INT_1U << tz) - 1);
10410 int n;
10411 if (can_be_rotated_to_lowbits (~unmask_c, 15, &n)
10412 || can_be_rotated_to_negative_lis (unmask_c, &n))
10413 {
10414 *mask = HOST_WIDE_INT_M1U << tz;
10415 *shift = HOST_BITS_PER_WIDE_INT - n;
10416 return true;
10417 }
10418
10419 return false;
10420 }
10421
10422 /* Check if value C can be built by 2 instructions: one is 'li', another is
10423 rldic.
10424
10425 If so, *SHIFT is set to the 'shift' operand of rldic; and *MASK is set
10426 to the mask value about the 'mb' operand of rldic; and return true.
10427 Return false otherwise. */
10428
10429 static bool
10430 can_be_built_by_li_and_rldic (HOST_WIDE_INT c, int *shift, HOST_WIDE_INT *mask)
10431 {
10432 /* There are 49 successive ones in the negative value of 'li'. */
10433 int ones = 49;
10434
10435 /* 1..1xx1..1: negative value of li --> 0..01..1xx0..0:
10436 right bits are shifted as 0's, and left 1's(and x's) are cleaned. */
10437 int tz = ctz_hwi (c);
10438 int lz = clz_hwi (c);
10439
10440 /* If lz == HOST_BITS_PER_WIDE_INT, the left shift is undefined. */
10441 if (lz >= HOST_BITS_PER_WIDE_INT)
10442 return false;
10443
10444 int middle_ones = clz_hwi (~(c << lz));
10445 if (tz + lz + middle_ones >= ones
10446 && (tz - lz) < HOST_BITS_PER_WIDE_INT
10447 && tz < HOST_BITS_PER_WIDE_INT)
10448 {
10449 *mask = ((1LL << (HOST_BITS_PER_WIDE_INT - tz - lz)) - 1LL) << tz;
10450 *shift = tz;
10451 return true;
10452 }
10453
10454 /* 1..1xx1..1 --> 1..1xx0..01..1: some 1's(following x's) are cleaned. */
10455 int leading_ones = clz_hwi (~c);
10456 int tailing_ones = ctz_hwi (~c);
10457 int middle_zeros = ctz_hwi (c >> tailing_ones);
10458 if (leading_ones + tailing_ones + middle_zeros >= ones
10459 && middle_zeros < HOST_BITS_PER_WIDE_INT)
10460 {
10461 *mask = ~(((1ULL << middle_zeros) - 1ULL) << tailing_ones);
10462 *shift = tailing_ones + middle_zeros;
10463 return true;
10464 }
10465
10466 /* xx1..1xx: --> xx0..01..1xx: some 1's(following x's) are cleaned. */
10467 /* Get the position for the first bit of successive 1.
10468 The 24th bit would be in successive 0 or 1. */
10469 HOST_WIDE_INT low_mask = (HOST_WIDE_INT_1U << 24) - HOST_WIDE_INT_1U;
10470 int pos_first_1 = ((c & (low_mask + 1)) == 0)
10471 ? clz_hwi (c & low_mask)
10472 : HOST_BITS_PER_WIDE_INT - ctz_hwi (~(c | low_mask));
10473
10474 /* Make sure the left and right shifts are defined. */
10475 if (!IN_RANGE (pos_first_1, 1, HOST_BITS_PER_WIDE_INT-1))
10476 return false;
10477
10478 middle_ones = clz_hwi (~c << pos_first_1);
10479 middle_zeros = ctz_hwi (c >> (HOST_BITS_PER_WIDE_INT - pos_first_1));
10480 if (pos_first_1 < HOST_BITS_PER_WIDE_INT
10481 && middle_ones + middle_zeros < HOST_BITS_PER_WIDE_INT
10482 && middle_ones + middle_zeros >= ones)
10483 {
10484 *mask = ~(((1ULL << middle_zeros) - 1LL)
10485 << (HOST_BITS_PER_WIDE_INT - pos_first_1));
10486 *shift = HOST_BITS_PER_WIDE_INT - pos_first_1 + middle_zeros;
10487 return true;
10488 }
10489
10490 return false;
10491 }
10492
10493 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
10494 Output insns to set DEST equal to the constant C as a series of
10495 lis, ori and shl instructions. If NUM_INSNS is not NULL, then
10496 only increase *NUM_INSNS as the number of insns, and do not emit
10497 any insns. */
10498
10499 static void
10500 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c, int *num_insns)
10501 {
10502 HOST_WIDE_INT ud1, ud2, ud3, ud4;
10503
10504 ud1 = c & 0xffff;
10505 ud2 = (c >> 16) & 0xffff;
10506 ud3 = (c >> 32) & 0xffff;
10507 ud4 = (c >> 48) & 0xffff;
10508
10509 /* This lambda is used to emit one insn or just increase the insn count.
10510 When counting the insn number, no need to emit the insn. */
10511 auto count_or_emit_insn = [&num_insns] (rtx dest_or_insn, rtx src = nullptr) {
10512 if (num_insns)
10513 {
10514 (*num_insns)++;
10515 return;
10516 }
10517
10518 if (src)
10519 emit_move_insn (dest_or_insn, src);
10520 else
10521 emit_insn (dest_or_insn);
10522 };
10523
10524 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
10525 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && !(ud1 & 0x8000)))
10526 {
10527 /* li */
10528 count_or_emit_insn (dest, GEN_INT (sext_hwi (ud1, 16)));
10529 return;
10530 }
10531
10532 rtx temp
10533 = (num_insns || !can_create_pseudo_p ()) ? dest : gen_reg_rtx (DImode);
10534
10535 if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
10536 || (ud4 == 0 && ud3 == 0 && !(ud2 & 0x8000)))
10537 {
10538 /* lis[; ori] */
10539 count_or_emit_insn (ud1 != 0 ? temp : dest,
10540 GEN_INT (sext_hwi (ud2 << 16, 32)));
10541 if (ud1 != 0)
10542 count_or_emit_insn (dest, gen_rtx_IOR (DImode, temp, GEN_INT (ud1)));
10543 return;
10544 }
10545
10546 if (ud4 == 0xffff && ud3 == 0xffff && !(ud2 & 0x8000) && ud1 == 0)
10547 {
10548 /* lis; xoris */
10549 count_or_emit_insn (temp, GEN_INT (sext_hwi ((ud2 | 0x8000) << 16, 32)));
10550 count_or_emit_insn (dest,
10551 gen_rtx_XOR (DImode, temp, GEN_INT (0x80000000)));
10552 return;
10553 }
10554
10555 if (ud4 == 0xffff && ud3 == 0xffff && (ud1 & 0x8000))
10556 {
10557 /* li; xoris */
10558 count_or_emit_insn (temp, GEN_INT (sext_hwi (ud1, 16)));
10559 count_or_emit_insn (dest, gen_rtx_XOR (DImode, temp,
10560 GEN_INT ((ud2 ^ 0xffff) << 16)));
10561 return;
10562 }
10563
10564 int shift;
10565 HOST_WIDE_INT mask;
10566 if (can_be_built_by_li_lis_and_rotldi (c, &shift, &mask)
10567 || can_be_built_by_li_lis_and_rldicl (c, &shift, &mask)
10568 || can_be_built_by_li_lis_and_rldicr (c, &shift, &mask)
10569 || can_be_built_by_li_and_rldic (c, &shift, &mask))
10570 {
10571 /* li/lis; rldicX */
10572 unsigned HOST_WIDE_INT imm = (c | ~mask);
10573 imm = (imm >> shift) | (imm << (HOST_BITS_PER_WIDE_INT - shift));
10574
10575 count_or_emit_insn (temp, GEN_INT (imm));
10576 if (shift != 0)
10577 temp = gen_rtx_ROTATE (DImode, temp, GEN_INT (shift));
10578 if (mask != HOST_WIDE_INT_M1)
10579 temp = gen_rtx_AND (DImode, temp, GEN_INT (mask));
10580 count_or_emit_insn (dest, temp);
10581
10582 return;
10583 }
10584
10585 if (ud3 == 0 && ud4 == 0)
10586 {
10587 gcc_assert ((ud2 & 0x8000) && ud1 != 0);
10588 if (!(ud1 & 0x8000))
10589 {
10590 /* li; oris */
10591 count_or_emit_insn (temp, GEN_INT (ud1));
10592 count_or_emit_insn (dest,
10593 gen_rtx_IOR (DImode, temp, GEN_INT (ud2 << 16)));
10594 return;
10595 }
10596
10597 /* lis; ori; rldicl */
10598 count_or_emit_insn (temp, GEN_INT (sext_hwi (ud2 << 16, 32)));
10599 count_or_emit_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud1)));
10600 count_or_emit_insn (dest,
10601 gen_rtx_AND (DImode, temp, GEN_INT (0xffffffff)));
10602 return;
10603 }
10604
10605 if (ud1 == ud3 && ud2 == ud4)
10606 {
10607 /* load low 32bits first, e.g. "lis; ori", then "rldimi". */
10608 HOST_WIDE_INT num = (ud2 << 16) | ud1;
10609 rs6000_emit_set_long_const (temp, sext_hwi (num, 32), num_insns);
10610
10611 rtx rldimi = gen_rotldi3_insert_3 (dest, temp, GEN_INT (32), temp,
10612 GEN_INT (0xffffffff));
10613 count_or_emit_insn (rldimi);
10614 return;
10615 }
10616
10617 if ((ud4 == 0xffff && (ud3 & 0x8000)) || (ud4 == 0 && !(ud3 & 0x8000)))
10618 {
10619 /* li; [ori;] rldicl [;oir]. */
10620 count_or_emit_insn (temp, GEN_INT (sext_hwi (ud3 << 16, 32)));
10621 if (ud2 != 0)
10622 count_or_emit_insn (temp, gen_rtx_IOR (DImode, temp, GEN_INT (ud2)));
10623 count_or_emit_insn (ud1 != 0 ? temp : dest,
10624 gen_rtx_ASHIFT (DImode, temp, GEN_INT (16)));
10625 if (ud1 != 0)
10626 count_or_emit_insn (dest, gen_rtx_IOR (DImode, temp, GEN_INT (ud1)));
10627 return;
10628 }
10629
10630 if (TARGET_PREFIXED)
10631 {
10632 if (can_create_pseudo_p ())
10633 {
10634 /* pli A,L; pli B,H; rldimi A,B,32,0. */
10635 rtx temp1 = num_insns ? nullptr : gen_reg_rtx (DImode);
10636 count_or_emit_insn (temp, GEN_INT ((ud4 << 16) | ud3));
10637 count_or_emit_insn (temp1, GEN_INT ((ud2 << 16) | ud1));
10638 rtx rldimi = gen_rotldi3_insert_3 (dest, temp, GEN_INT (32), temp1,
10639 GEN_INT (0xffffffff));
10640 count_or_emit_insn (rldimi);
10641 return;
10642 }
10643
10644 /* pli A,H; sldi A,32; paddi A,A,L. */
10645 count_or_emit_insn (dest, GEN_INT ((ud4 << 16) | ud3));
10646 count_or_emit_insn (dest, gen_rtx_ASHIFT (DImode, dest, GEN_INT (32)));
10647
10648 bool can_use_paddi = dest ? REGNO (dest) != FIRST_GPR_REGNO : false;
10649 /* Use paddi for the low 32 bits. */
10650 if (ud2 != 0 && ud1 != 0 && can_use_paddi)
10651 count_or_emit_insn (dest, gen_rtx_PLUS (DImode, dest,
10652 GEN_INT ((ud2 << 16) | ud1)));
10653 /* Use oris, ori for low 32 bits. */
10654 if (ud2 != 0 && (ud1 == 0 || !can_use_paddi))
10655 count_or_emit_insn (dest,
10656 gen_rtx_IOR (DImode, dest, GEN_INT (ud2 << 16)));
10657 if (ud1 != 0 && (ud2 == 0 || !can_use_paddi))
10658 count_or_emit_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1)));
10659 return;
10660 }
10661
10662 if (can_create_pseudo_p ())
10663 {
10664 /* lis HIGH,UD4 ; ori HIGH,UD3 ;
10665 lis LOW,UD2 ; ori LOW,UD1 ; rldimi LOW,HIGH,32,0. */
10666 rtx high = num_insns ? nullptr : gen_reg_rtx (DImode);
10667 rtx low = num_insns ? nullptr : gen_reg_rtx (DImode);
10668 HOST_WIDE_INT num = (ud2 << 16) | ud1;
10669 rs6000_emit_set_long_const (low, sext_hwi (num, 32), num_insns);
10670 num = (ud4 << 16) | ud3;
10671 rs6000_emit_set_long_const (high, sext_hwi (num, 32), num_insns);
10672
10673 rtx rldimi = gen_rotldi3_insert_3 (dest, high, GEN_INT (32), low,
10674 GEN_INT (0xffffffff));
10675 count_or_emit_insn (rldimi);
10676 return;
10677 }
10678
10679 /* lis DEST,UD4 ; ori DEST,UD3 ; rotl DEST,32 ;
10680 oris DEST,UD2 ; ori DEST,UD1. */
10681 count_or_emit_insn (dest, GEN_INT (sext_hwi (ud4 << 16, 32)));
10682 if (ud3 != 0)
10683 count_or_emit_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud3)));
10684
10685 count_or_emit_insn (dest, gen_rtx_ASHIFT (DImode, dest, GEN_INT (32)));
10686 if (ud2 != 0)
10687 count_or_emit_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud2 << 16)));
10688 if (ud1 != 0)
10689 count_or_emit_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1)));
10690
10691 return;
10692 }
10693
10694 /* Helper for the following. Get rid of [r+r] memory refs
10695 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
10696
10697 static void
10698 rs6000_eliminate_indexed_memrefs (rtx operands[2])
10699 {
10700 if (MEM_P (operands[0])
10701 && !REG_P (XEXP (operands[0], 0))
10702 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
10703 GET_MODE (operands[0]), false))
10704 operands[0]
10705 = replace_equiv_address (operands[0],
10706 copy_addr_to_reg (XEXP (operands[0], 0)));
10707
10708 if (MEM_P (operands[1])
10709 && !REG_P (XEXP (operands[1], 0))
10710 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
10711 GET_MODE (operands[1]), false))
10712 operands[1]
10713 = replace_equiv_address (operands[1],
10714 copy_addr_to_reg (XEXP (operands[1], 0)));
10715 }
10716
10717 /* Generate a vector of constants to permute MODE for a little-endian
10718 storage operation by swapping the two halves of a vector. */
10719 static rtvec
10720 rs6000_const_vec (machine_mode mode)
10721 {
10722 int i, subparts;
10723 rtvec v;
10724
10725 switch (mode)
10726 {
10727 case E_V1TImode:
10728 subparts = 1;
10729 break;
10730 case E_V2DFmode:
10731 case E_V2DImode:
10732 subparts = 2;
10733 break;
10734 case E_V4SFmode:
10735 case E_V4SImode:
10736 subparts = 4;
10737 break;
10738 case E_V8HImode:
10739 subparts = 8;
10740 break;
10741 case E_V16QImode:
10742 subparts = 16;
10743 break;
10744 default:
10745 gcc_unreachable();
10746 }
10747
10748 v = rtvec_alloc (subparts);
10749
10750 for (i = 0; i < subparts / 2; ++i)
10751 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
10752 for (i = subparts / 2; i < subparts; ++i)
10753 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
10754
10755 return v;
10756 }
10757
10758 /* Emit an lxvd2x, stxvd2x, or xxpermdi instruction for a VSX load or
10759 store operation. */
10760 void
10761 rs6000_emit_le_vsx_permute (rtx dest, rtx source, machine_mode mode)
10762 {
10763 gcc_assert (!altivec_indexed_or_indirect_operand (dest, mode));
10764 gcc_assert (!altivec_indexed_or_indirect_operand (source, mode));
10765
10766 /* Scalar permutations are easier to express in integer modes rather than
10767 floating-point modes, so cast them here. We use V1TImode instead
10768 of TImode to ensure that the values don't go through GPRs. */
10769 if (FLOAT128_VECTOR_P (mode))
10770 {
10771 dest = gen_lowpart (V1TImode, dest);
10772 source = gen_lowpart (V1TImode, source);
10773 mode = V1TImode;
10774 }
10775
10776 /* Use ROTATE instead of VEC_SELECT if the mode contains only a single
10777 scalar. */
10778 if (mode == TImode || mode == V1TImode)
10779 emit_insn (gen_rtx_SET (dest, gen_rtx_ROTATE (mode, source,
10780 GEN_INT (64))));
10781 else
10782 {
10783 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
10784 emit_insn (gen_rtx_SET (dest, gen_rtx_VEC_SELECT (mode, source, par)));
10785 }
10786 }
10787
10788 /* Emit a little-endian load from vector memory location SOURCE to VSX
10789 register DEST in mode MODE. The load is done with two permuting
10790 insn's that represent an lxvd2x and xxpermdi. */
10791 void
10792 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
10793 {
10794 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
10795 V1TImode). */
10796 if (mode == TImode || mode == V1TImode)
10797 {
10798 mode = V2DImode;
10799 dest = gen_lowpart (V2DImode, dest);
10800 source = adjust_address (source, V2DImode, 0);
10801 }
10802
10803 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
10804 rs6000_emit_le_vsx_permute (tmp, source, mode);
10805 rs6000_emit_le_vsx_permute (dest, tmp, mode);
10806 }
10807
10808 /* Emit a little-endian store to vector memory location DEST from VSX
10809 register SOURCE in mode MODE. The store is done with two permuting
10810 insn's that represent an xxpermdi and an stxvd2x. */
10811 void
10812 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
10813 {
10814 /* This should never be called after LRA. */
10815 gcc_assert (can_create_pseudo_p ());
10816
10817 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
10818 V1TImode). */
10819 if (mode == TImode || mode == V1TImode)
10820 {
10821 mode = V2DImode;
10822 dest = adjust_address (dest, V2DImode, 0);
10823 source = gen_lowpart (V2DImode, source);
10824 }
10825
10826 rtx tmp = gen_reg_rtx_and_attrs (source);
10827 rs6000_emit_le_vsx_permute (tmp, source, mode);
10828 rs6000_emit_le_vsx_permute (dest, tmp, mode);
10829 }
10830
10831 /* Emit a sequence representing a little-endian VSX load or store,
10832 moving data from SOURCE to DEST in mode MODE. This is done
10833 separately from rs6000_emit_move to ensure it is called only
10834 during expand. LE VSX loads and stores introduced later are
10835 handled with a split. The expand-time RTL generation allows
10836 us to optimize away redundant pairs of register-permutes. */
10837 void
10838 rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
10839 {
10840 gcc_assert (!BYTES_BIG_ENDIAN
10841 && VECTOR_MEM_VSX_P (mode)
10842 && !TARGET_P9_VECTOR
10843 && !gpr_or_gpr_p (dest, source)
10844 && (MEM_P (source) ^ MEM_P (dest)));
10845
10846 if (MEM_P (source))
10847 {
10848 gcc_assert (REG_P (dest) || SUBREG_P (dest));
10849 rs6000_emit_le_vsx_load (dest, source, mode);
10850 }
10851 else
10852 {
10853 if (!REG_P (source))
10854 source = force_reg (mode, source);
10855 rs6000_emit_le_vsx_store (dest, source, mode);
10856 }
10857 }
10858
10859 /* Return whether a SFmode or SImode move can be done without converting one
10860 mode to another. This arrises when we have:
10861
10862 (SUBREG:SF (REG:SI ...))
10863 (SUBREG:SI (REG:SF ...))
10864
10865 and one of the values is in a floating point/vector register, where SFmode
10866 scalars are stored in DFmode format. */
10867
10868 bool
10869 valid_sf_si_move (rtx dest, rtx src, machine_mode mode)
10870 {
10871 if (TARGET_ALLOW_SF_SUBREG)
10872 return true;
10873
10874 if (mode != SFmode && GET_MODE_CLASS (mode) != MODE_INT)
10875 return true;
10876
10877 if (!SUBREG_P (src) || !sf_subreg_operand (src, mode))
10878 return true;
10879
10880 /*. Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))). */
10881 if (SUBREG_P (dest))
10882 {
10883 rtx dest_subreg = SUBREG_REG (dest);
10884 rtx src_subreg = SUBREG_REG (src);
10885 return GET_MODE (dest_subreg) == GET_MODE (src_subreg);
10886 }
10887
10888 return false;
10889 }
10890
10891
10892 /* Helper function to change moves with:
10893
10894 (SUBREG:SF (REG:SI)) and
10895 (SUBREG:SI (REG:SF))
10896
10897 into separate UNSPEC insns. In the PowerPC architecture, scalar SFmode
10898 values are stored as DFmode values in the VSX registers. We need to convert
10899 the bits before we can use a direct move or operate on the bits in the
10900 vector register as an integer type.
10901
10902 Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)). */
10903
10904 static bool
10905 rs6000_emit_move_si_sf_subreg (rtx dest, rtx source, machine_mode mode)
10906 {
10907 if (TARGET_DIRECT_MOVE_64BIT && !reload_completed
10908 && (!SUBREG_P (dest) || !sf_subreg_operand (dest, mode))
10909 && SUBREG_P (source) && sf_subreg_operand (source, mode))
10910 {
10911 rtx inner_source = SUBREG_REG (source);
10912 machine_mode inner_mode = GET_MODE (inner_source);
10913
10914 if (mode == SImode && inner_mode == SFmode)
10915 {
10916 emit_insn (gen_movsi_from_sf (dest, inner_source));
10917 return true;
10918 }
10919
10920 if (mode == SFmode && inner_mode == SImode)
10921 {
10922 emit_insn (gen_movsf_from_si (dest, inner_source));
10923 return true;
10924 }
10925 }
10926
10927 return false;
10928 }
10929
10930 /* Emit a move from SOURCE to DEST in mode MODE. */
10931 void
10932 rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
10933 {
10934 rtx operands[2];
10935 operands[0] = dest;
10936 operands[1] = source;
10937
10938 if (TARGET_DEBUG_ADDR)
10939 {
10940 fprintf (stderr,
10941 "\nrs6000_emit_move: mode = %s, lra_in_progress = %d, "
10942 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
10943 GET_MODE_NAME (mode),
10944 lra_in_progress,
10945 reload_completed,
10946 can_create_pseudo_p ());
10947 debug_rtx (dest);
10948 fprintf (stderr, "source:\n");
10949 debug_rtx (source);
10950 }
10951
10952 /* Check that we get CONST_WIDE_INT only when we should. */
10953 if (CONST_WIDE_INT_P (operands[1])
10954 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
10955 gcc_unreachable ();
10956
10957 #ifdef HAVE_AS_GNU_ATTRIBUTE
10958 /* If we use a long double type, set the flags in .gnu_attribute that say
10959 what the long double type is. This is to allow the linker's warning
10960 message for the wrong long double to be useful, even if the function does
10961 not do a call (for example, doing a 128-bit add on power9 if the long
10962 double type is IEEE 128-bit. Do not set this if __ibm128 or __floa128 are
10963 used if they aren't the default long dobule type. */
10964 if (rs6000_gnu_attr && (HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT))
10965 {
10966 if (TARGET_LONG_DOUBLE_128 && (mode == TFmode || mode == TCmode))
10967 rs6000_passes_float = rs6000_passes_long_double = true;
10968
10969 else if (!TARGET_LONG_DOUBLE_128 && (mode == DFmode || mode == DCmode))
10970 rs6000_passes_float = rs6000_passes_long_double = true;
10971 }
10972 #endif
10973
10974 /* See if we need to special case SImode/SFmode SUBREG moves. */
10975 if ((mode == SImode || mode == SFmode) && SUBREG_P (source)
10976 && rs6000_emit_move_si_sf_subreg (dest, source, mode))
10977 return;
10978
10979 /* Check if GCC is setting up a block move that will end up using FP
10980 registers as temporaries. We must make sure this is acceptable. */
10981 if (MEM_P (operands[0])
10982 && MEM_P (operands[1])
10983 && mode == DImode
10984 && (rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[0]))
10985 || rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[1])))
10986 && ! (rs6000_slow_unaligned_access (SImode,
10987 (MEM_ALIGN (operands[0]) > 32
10988 ? 32 : MEM_ALIGN (operands[0])))
10989 || rs6000_slow_unaligned_access (SImode,
10990 (MEM_ALIGN (operands[1]) > 32
10991 ? 32 : MEM_ALIGN (operands[1]))))
10992 && ! MEM_VOLATILE_P (operands [0])
10993 && ! MEM_VOLATILE_P (operands [1]))
10994 {
10995 emit_move_insn (adjust_address (operands[0], SImode, 0),
10996 adjust_address (operands[1], SImode, 0));
10997 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
10998 adjust_address (copy_rtx (operands[1]), SImode, 4));
10999 return;
11000 }
11001
11002 if (can_create_pseudo_p () && MEM_P (operands[0])
11003 && !gpc_reg_operand (operands[1], mode))
11004 operands[1] = force_reg (mode, operands[1]);
11005
11006 /* Recognize the case where operand[1] is a reference to thread-local
11007 data and load its address to a register. */
11008 if (tls_referenced_p (operands[1]))
11009 {
11010 enum tls_model model;
11011 rtx tmp = operands[1];
11012 rtx addend = NULL;
11013
11014 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
11015 {
11016 addend = XEXP (XEXP (tmp, 0), 1);
11017 tmp = XEXP (XEXP (tmp, 0), 0);
11018 }
11019
11020 gcc_assert (SYMBOL_REF_P (tmp));
11021 model = SYMBOL_REF_TLS_MODEL (tmp);
11022 gcc_assert (model != 0);
11023
11024 tmp = rs6000_legitimize_tls_address (tmp, model);
11025 if (addend)
11026 {
11027 tmp = gen_rtx_PLUS (mode, tmp, addend);
11028 tmp = force_operand (tmp, operands[0]);
11029 }
11030 operands[1] = tmp;
11031 }
11032
11033 /* 128-bit constant floating-point values on Darwin should really be loaded
11034 as two parts. However, this premature splitting is a problem when DFmode
11035 values can go into Altivec registers. */
11036 if (TARGET_MACHO && CONST_DOUBLE_P (operands[1]) && FLOAT128_IBM_P (mode)
11037 && !reg_addr[DFmode].scalar_in_vmx_p)
11038 {
11039 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
11040 simplify_gen_subreg (DFmode, operands[1], mode, 0),
11041 DFmode);
11042 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
11043 GET_MODE_SIZE (DFmode)),
11044 simplify_gen_subreg (DFmode, operands[1], mode,
11045 GET_MODE_SIZE (DFmode)),
11046 DFmode);
11047 return;
11048 }
11049
11050 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
11051 p1:SD) if p1 is not of floating point class and p0 is spilled as
11052 we can have no analogous movsd_store for this. */
11053 if (lra_in_progress && mode == DDmode
11054 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
11055 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
11056 && SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1]))
11057 && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
11058 {
11059 enum reg_class cl;
11060 int regno = REGNO (SUBREG_REG (operands[1]));
11061
11062 if (!HARD_REGISTER_NUM_P (regno))
11063 {
11064 cl = reg_preferred_class (regno);
11065 regno = reg_renumber[regno];
11066 if (regno < 0)
11067 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
11068 }
11069 if (regno >= 0 && ! FP_REGNO_P (regno))
11070 {
11071 mode = SDmode;
11072 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
11073 operands[1] = SUBREG_REG (operands[1]);
11074 }
11075 }
11076 if (lra_in_progress
11077 && mode == SDmode
11078 && REG_P (operands[0]) && !HARD_REGISTER_P (operands[0])
11079 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
11080 && (REG_P (operands[1])
11081 || (SUBREG_P (operands[1]) && REG_P (SUBREG_REG (operands[1])))))
11082 {
11083 int regno = reg_or_subregno (operands[1]);
11084 enum reg_class cl;
11085
11086 if (!HARD_REGISTER_NUM_P (regno))
11087 {
11088 cl = reg_preferred_class (regno);
11089 gcc_assert (cl != NO_REGS);
11090 regno = reg_renumber[regno];
11091 if (regno < 0)
11092 regno = ira_class_hard_regs[cl][0];
11093 }
11094 if (FP_REGNO_P (regno))
11095 {
11096 if (GET_MODE (operands[0]) != DDmode)
11097 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
11098 emit_insn (gen_movsd_store (operands[0], operands[1]));
11099 }
11100 else if (INT_REGNO_P (regno))
11101 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
11102 else
11103 gcc_unreachable();
11104 return;
11105 }
11106 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
11107 p:DD)) if p0 is not of floating point class and p1 is spilled as
11108 we can have no analogous movsd_load for this. */
11109 if (lra_in_progress && mode == DDmode
11110 && SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))
11111 && GET_MODE (SUBREG_REG (operands[0])) == SDmode
11112 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
11113 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
11114 {
11115 enum reg_class cl;
11116 int regno = REGNO (SUBREG_REG (operands[0]));
11117
11118 if (!HARD_REGISTER_NUM_P (regno))
11119 {
11120 cl = reg_preferred_class (regno);
11121 regno = reg_renumber[regno];
11122 if (regno < 0)
11123 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
11124 }
11125 if (regno >= 0 && ! FP_REGNO_P (regno))
11126 {
11127 mode = SDmode;
11128 operands[0] = SUBREG_REG (operands[0]);
11129 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
11130 }
11131 }
11132 if (lra_in_progress
11133 && mode == SDmode
11134 && (REG_P (operands[0])
11135 || (SUBREG_P (operands[0]) && REG_P (SUBREG_REG (operands[0]))))
11136 && REG_P (operands[1]) && !HARD_REGISTER_P (operands[1])
11137 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
11138 {
11139 int regno = reg_or_subregno (operands[0]);
11140 enum reg_class cl;
11141
11142 if (!HARD_REGISTER_NUM_P (regno))
11143 {
11144 cl = reg_preferred_class (regno);
11145 gcc_assert (cl != NO_REGS);
11146 regno = reg_renumber[regno];
11147 if (regno < 0)
11148 regno = ira_class_hard_regs[cl][0];
11149 }
11150 if (FP_REGNO_P (regno))
11151 {
11152 if (GET_MODE (operands[1]) != DDmode)
11153 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
11154 emit_insn (gen_movsd_load (operands[0], operands[1]));
11155 }
11156 else if (INT_REGNO_P (regno))
11157 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
11158 else
11159 gcc_unreachable();
11160 return;
11161 }
11162
11163 /* FIXME: In the long term, this switch statement should go away
11164 and be replaced by a sequence of tests based on things like
11165 mode == Pmode. */
11166 switch (mode)
11167 {
11168 case E_HImode:
11169 case E_QImode:
11170 if (CONSTANT_P (operands[1])
11171 && !CONST_INT_P (operands[1]))
11172 operands[1] = force_const_mem (mode, operands[1]);
11173 break;
11174
11175 case E_TFmode:
11176 case E_TDmode:
11177 case E_IFmode:
11178 case E_KFmode:
11179 if (FLOAT128_2REG_P (mode))
11180 rs6000_eliminate_indexed_memrefs (operands);
11181 /* fall through */
11182
11183 case E_DFmode:
11184 case E_DDmode:
11185 case E_SFmode:
11186 case E_SDmode:
11187 if (CONSTANT_P (operands[1])
11188 && ! easy_fp_constant (operands[1], mode))
11189 operands[1] = force_const_mem (mode, operands[1]);
11190 break;
11191
11192 case E_V16QImode:
11193 case E_V8HImode:
11194 case E_V4SFmode:
11195 case E_V4SImode:
11196 case E_V2DFmode:
11197 case E_V2DImode:
11198 case E_V1TImode:
11199 if (CONSTANT_P (operands[1])
11200 && !easy_vector_constant (operands[1], mode))
11201 operands[1] = force_const_mem (mode, operands[1]);
11202 break;
11203
11204 case E_OOmode:
11205 case E_XOmode:
11206 if (CONST_INT_P (operands[1]) && INTVAL (operands[1]) != 0)
11207 error ("%qs is an opaque type, and you cannot set it to other values",
11208 (mode == OOmode) ? "__vector_pair" : "__vector_quad");
11209 break;
11210
11211 case E_SImode:
11212 case E_DImode:
11213 /* Use default pattern for address of ELF small data */
11214 if (TARGET_ELF
11215 && mode == Pmode
11216 && DEFAULT_ABI == ABI_V4
11217 && (SYMBOL_REF_P (operands[1])
11218 || GET_CODE (operands[1]) == CONST)
11219 && small_data_operand (operands[1], mode))
11220 {
11221 emit_insn (gen_rtx_SET (operands[0], operands[1]));
11222 return;
11223 }
11224
11225 /* Use the default pattern for loading up PC-relative addresses. */
11226 if (TARGET_PCREL && mode == Pmode
11227 && pcrel_local_or_external_address (operands[1], Pmode))
11228 {
11229 emit_insn (gen_rtx_SET (operands[0], operands[1]));
11230 return;
11231 }
11232
11233 if (DEFAULT_ABI == ABI_V4
11234 && mode == Pmode && mode == SImode
11235 && flag_pic == 1 && got_operand (operands[1], mode))
11236 {
11237 emit_insn (gen_movsi_got (operands[0], operands[1]));
11238 return;
11239 }
11240
11241 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
11242 && TARGET_NO_TOC_OR_PCREL
11243 && ! flag_pic
11244 && mode == Pmode
11245 && CONSTANT_P (operands[1])
11246 && GET_CODE (operands[1]) != HIGH
11247 && !CONST_INT_P (operands[1]))
11248 {
11249 rtx target = (!can_create_pseudo_p ()
11250 ? operands[0]
11251 : gen_reg_rtx (mode));
11252
11253 /* If this is a function address on -mcall-aixdesc,
11254 convert it to the address of the descriptor. */
11255 if (DEFAULT_ABI == ABI_AIX
11256 && SYMBOL_REF_P (operands[1])
11257 && XSTR (operands[1], 0)[0] == '.')
11258 {
11259 const char *name = XSTR (operands[1], 0);
11260 rtx new_ref;
11261 while (*name == '.')
11262 name++;
11263 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
11264 CONSTANT_POOL_ADDRESS_P (new_ref)
11265 = CONSTANT_POOL_ADDRESS_P (operands[1]);
11266 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
11267 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
11268 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
11269 operands[1] = new_ref;
11270 }
11271
11272 if (DEFAULT_ABI == ABI_DARWIN)
11273 {
11274 #if TARGET_MACHO
11275 /* This is not PIC code, but could require the subset of
11276 indirections used by mdynamic-no-pic. */
11277 if (MACHO_DYNAMIC_NO_PIC_P)
11278 {
11279 /* Take care of any required data indirection. */
11280 operands[1] = rs6000_machopic_legitimize_pic_address (
11281 operands[1], mode, operands[0]);
11282 if (operands[0] != operands[1])
11283 emit_insn (gen_rtx_SET (operands[0], operands[1]));
11284 return;
11285 }
11286 #endif
11287 emit_insn (gen_macho_high (Pmode, target, operands[1]));
11288 emit_insn (gen_macho_low (Pmode, operands[0],
11289 target, operands[1]));
11290 return;
11291 }
11292
11293 emit_insn (gen_elf_high (target, operands[1]));
11294 emit_insn (gen_elf_low (operands[0], target, operands[1]));
11295 return;
11296 }
11297
11298 /* If this is a SYMBOL_REF that refers to a constant pool entry,
11299 and we have put it in the TOC, we just need to make a TOC-relative
11300 reference to it. */
11301 if (TARGET_TOC
11302 && SYMBOL_REF_P (operands[1])
11303 && use_toc_relative_ref (operands[1], mode))
11304 operands[1] = create_TOC_reference (operands[1], operands[0]);
11305 else if (mode == Pmode
11306 && CONSTANT_P (operands[1])
11307 && GET_CODE (operands[1]) != HIGH
11308 && ((REG_P (operands[0])
11309 && FP_REGNO_P (REGNO (operands[0])))
11310 || !CONST_INT_P (operands[1])
11311 || (num_insns_constant (operands[1], mode)
11312 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
11313 && !toc_relative_expr_p (operands[1], false, NULL, NULL)
11314 && (TARGET_CMODEL == CMODEL_SMALL
11315 || can_create_pseudo_p ()
11316 || (REG_P (operands[0])
11317 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
11318 {
11319
11320 #if TARGET_MACHO
11321 /* Darwin uses a special PIC legitimizer. */
11322 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
11323 {
11324 operands[1] =
11325 rs6000_machopic_legitimize_pic_address (operands[1], mode,
11326 operands[0]);
11327 if (operands[0] != operands[1])
11328 emit_insn (gen_rtx_SET (operands[0], operands[1]));
11329 return;
11330 }
11331 #endif
11332
11333 /* If we are to limit the number of things we put in the TOC and
11334 this is a symbol plus a constant we can add in one insn,
11335 just put the symbol in the TOC and add the constant. */
11336 if (GET_CODE (operands[1]) == CONST
11337 && TARGET_NO_SUM_IN_TOC
11338 && GET_CODE (XEXP (operands[1], 0)) == PLUS
11339 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
11340 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
11341 || SYMBOL_REF_P (XEXP (XEXP (operands[1], 0), 0)))
11342 && ! side_effects_p (operands[0]))
11343 {
11344 rtx sym =
11345 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
11346 rtx other = XEXP (XEXP (operands[1], 0), 1);
11347
11348 sym = force_reg (mode, sym);
11349 emit_insn (gen_add3_insn (operands[0], sym, other));
11350 return;
11351 }
11352
11353 operands[1] = force_const_mem (mode, operands[1]);
11354
11355 if (TARGET_TOC
11356 && SYMBOL_REF_P (XEXP (operands[1], 0))
11357 && use_toc_relative_ref (XEXP (operands[1], 0), mode))
11358 {
11359 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
11360 operands[0]);
11361 operands[1] = gen_const_mem (mode, tocref);
11362 set_mem_alias_set (operands[1], get_TOC_alias_set ());
11363 }
11364 }
11365 break;
11366
11367 case E_TImode:
11368 if (!VECTOR_MEM_VSX_P (TImode))
11369 rs6000_eliminate_indexed_memrefs (operands);
11370 break;
11371
11372 case E_PTImode:
11373 rs6000_eliminate_indexed_memrefs (operands);
11374 break;
11375
11376 default:
11377 fatal_insn ("bad move", gen_rtx_SET (dest, source));
11378 }
11379
11380 /* Above, we may have called force_const_mem which may have returned
11381 an invalid address. If we can, fix this up; otherwise, reload will
11382 have to deal with it. */
11383 if (MEM_P (operands[1]))
11384 operands[1] = validize_mem (operands[1]);
11385
11386 emit_insn (gen_rtx_SET (operands[0], operands[1]));
11387 }
11388 \f
11389
11390 /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */
11391 static void
11392 init_float128_ibm (machine_mode mode)
11393 {
11394 if (!TARGET_XL_COMPAT)
11395 {
11396 set_optab_libfunc (add_optab, mode, "__gcc_qadd");
11397 set_optab_libfunc (sub_optab, mode, "__gcc_qsub");
11398 set_optab_libfunc (smul_optab, mode, "__gcc_qmul");
11399 set_optab_libfunc (sdiv_optab, mode, "__gcc_qdiv");
11400
11401 if (!TARGET_HARD_FLOAT)
11402 {
11403 set_optab_libfunc (neg_optab, mode, "__gcc_qneg");
11404 set_optab_libfunc (eq_optab, mode, "__gcc_qeq");
11405 set_optab_libfunc (ne_optab, mode, "__gcc_qne");
11406 set_optab_libfunc (gt_optab, mode, "__gcc_qgt");
11407 set_optab_libfunc (ge_optab, mode, "__gcc_qge");
11408 set_optab_libfunc (lt_optab, mode, "__gcc_qlt");
11409 set_optab_libfunc (le_optab, mode, "__gcc_qle");
11410 set_optab_libfunc (unord_optab, mode, "__gcc_qunord");
11411
11412 set_conv_libfunc (sext_optab, mode, SFmode, "__gcc_stoq");
11413 set_conv_libfunc (sext_optab, mode, DFmode, "__gcc_dtoq");
11414 set_conv_libfunc (trunc_optab, SFmode, mode, "__gcc_qtos");
11415 set_conv_libfunc (trunc_optab, DFmode, mode, "__gcc_qtod");
11416 set_conv_libfunc (sfix_optab, SImode, mode, "__gcc_qtoi");
11417 set_conv_libfunc (ufix_optab, SImode, mode, "__gcc_qtou");
11418 set_conv_libfunc (sfloat_optab, mode, SImode, "__gcc_itoq");
11419 set_conv_libfunc (ufloat_optab, mode, SImode, "__gcc_utoq");
11420 }
11421 }
11422 else
11423 {
11424 set_optab_libfunc (add_optab, mode, "_xlqadd");
11425 set_optab_libfunc (sub_optab, mode, "_xlqsub");
11426 set_optab_libfunc (smul_optab, mode, "_xlqmul");
11427 set_optab_libfunc (sdiv_optab, mode, "_xlqdiv");
11428 }
11429
11430 /* Add various conversions for IFmode to use the traditional TFmode
11431 names. */
11432 if (mode == IFmode)
11433 {
11434 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdtf");
11435 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddtf");
11436 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdtf");
11437 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunctfsd");
11438 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunctfdd");
11439 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtftd");
11440
11441 set_conv_libfunc (sfix_optab, DImode, mode, "__fixtfdi");
11442 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunstfdi");
11443
11444 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatditf");
11445 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatunditf");
11446
11447 if (TARGET_POWERPC64)
11448 {
11449 set_conv_libfunc (sfix_optab, TImode, mode, "__fixtfti");
11450 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunstfti");
11451 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattitf");
11452 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntitf");
11453 }
11454 }
11455 }
11456
11457 /* Set up IEEE 128-bit floating point routines. Use different names if the
11458 arguments can be passed in a vector register. The historical PowerPC
11459 implementation of IEEE 128-bit floating point used _q_<op> for the names, so
11460 continue to use that if we aren't using vector registers to pass IEEE
11461 128-bit floating point. */
11462
11463 static void
11464 init_float128_ieee (machine_mode mode)
11465 {
11466 if (FLOAT128_VECTOR_P (mode))
11467 {
11468 set_optab_libfunc (add_optab, mode, "__addkf3");
11469 set_optab_libfunc (sub_optab, mode, "__subkf3");
11470 set_optab_libfunc (neg_optab, mode, "__negkf2");
11471 set_optab_libfunc (smul_optab, mode, "__mulkf3");
11472 set_optab_libfunc (sdiv_optab, mode, "__divkf3");
11473 set_optab_libfunc (sqrt_optab, mode, "__sqrtkf2");
11474 set_optab_libfunc (abs_optab, mode, "__abskf2");
11475 set_optab_libfunc (powi_optab, mode, "__powikf2");
11476
11477 set_optab_libfunc (eq_optab, mode, "__eqkf2");
11478 set_optab_libfunc (ne_optab, mode, "__nekf2");
11479 set_optab_libfunc (gt_optab, mode, "__gtkf2");
11480 set_optab_libfunc (ge_optab, mode, "__gekf2");
11481 set_optab_libfunc (lt_optab, mode, "__ltkf2");
11482 set_optab_libfunc (le_optab, mode, "__lekf2");
11483 set_optab_libfunc (unord_optab, mode, "__unordkf2");
11484
11485 set_conv_libfunc (sext_optab, mode, SFmode, "__extendsfkf2");
11486 set_conv_libfunc (sext_optab, mode, DFmode, "__extenddfkf2");
11487 set_conv_libfunc (trunc_optab, SFmode, mode, "__trunckfsf2");
11488 set_conv_libfunc (trunc_optab, DFmode, mode, "__trunckfdf2");
11489
11490 set_conv_libfunc (sext_optab, mode, IFmode, "__trunctfkf2");
11491 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
11492 set_conv_libfunc (sext_optab, mode, TFmode, "__trunctfkf2");
11493
11494 set_conv_libfunc (trunc_optab, IFmode, mode, "__extendkftf2");
11495 if (mode != TFmode && FLOAT128_IBM_P (TFmode))
11496 set_conv_libfunc (trunc_optab, TFmode, mode, "__extendkftf2");
11497
11498 set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdkf");
11499 set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddkf");
11500 set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctdkf");
11501 set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunckfsd");
11502 set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunckfdd");
11503 set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendkftd");
11504
11505 set_conv_libfunc (sfix_optab, SImode, mode, "__fixkfsi");
11506 set_conv_libfunc (ufix_optab, SImode, mode, "__fixunskfsi");
11507 set_conv_libfunc (sfix_optab, DImode, mode, "__fixkfdi");
11508 set_conv_libfunc (ufix_optab, DImode, mode, "__fixunskfdi");
11509
11510 set_conv_libfunc (sfloat_optab, mode, SImode, "__floatsikf");
11511 set_conv_libfunc (ufloat_optab, mode, SImode, "__floatunsikf");
11512 set_conv_libfunc (sfloat_optab, mode, DImode, "__floatdikf");
11513 set_conv_libfunc (ufloat_optab, mode, DImode, "__floatundikf");
11514
11515 if (TARGET_POWERPC64)
11516 {
11517 set_conv_libfunc (sfix_optab, TImode, mode, "__fixkfti_sw");
11518 set_conv_libfunc (ufix_optab, TImode, mode, "__fixunskfti_sw");
11519 set_conv_libfunc (sfloat_optab, mode, TImode, "__floattikf_sw");
11520 set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntikf_sw");
11521 }
11522 }
11523
11524 else
11525 {
11526 set_optab_libfunc (add_optab, mode, "_q_add");
11527 set_optab_libfunc (sub_optab, mode, "_q_sub");
11528 set_optab_libfunc (neg_optab, mode, "_q_neg");
11529 set_optab_libfunc (smul_optab, mode, "_q_mul");
11530 set_optab_libfunc (sdiv_optab, mode, "_q_div");
11531 if (TARGET_PPC_GPOPT)
11532 set_optab_libfunc (sqrt_optab, mode, "_q_sqrt");
11533
11534 set_optab_libfunc (eq_optab, mode, "_q_feq");
11535 set_optab_libfunc (ne_optab, mode, "_q_fne");
11536 set_optab_libfunc (gt_optab, mode, "_q_fgt");
11537 set_optab_libfunc (ge_optab, mode, "_q_fge");
11538 set_optab_libfunc (lt_optab, mode, "_q_flt");
11539 set_optab_libfunc (le_optab, mode, "_q_fle");
11540
11541 set_conv_libfunc (sext_optab, mode, SFmode, "_q_stoq");
11542 set_conv_libfunc (sext_optab, mode, DFmode, "_q_dtoq");
11543 set_conv_libfunc (trunc_optab, SFmode, mode, "_q_qtos");
11544 set_conv_libfunc (trunc_optab, DFmode, mode, "_q_qtod");
11545 set_conv_libfunc (sfix_optab, SImode, mode, "_q_qtoi");
11546 set_conv_libfunc (ufix_optab, SImode, mode, "_q_qtou");
11547 set_conv_libfunc (sfloat_optab, mode, SImode, "_q_itoq");
11548 set_conv_libfunc (ufloat_optab, mode, SImode, "_q_utoq");
11549 }
11550 }
11551
11552 static void
11553 rs6000_init_libfuncs (void)
11554 {
11555 /* __float128 support. */
11556 if (TARGET_FLOAT128_TYPE)
11557 {
11558 init_float128_ibm (IFmode);
11559 init_float128_ieee (KFmode);
11560 }
11561
11562 /* AIX/Darwin/64-bit Linux quad floating point routines. */
11563 if (TARGET_LONG_DOUBLE_128)
11564 {
11565 if (!TARGET_IEEEQUAD)
11566 init_float128_ibm (TFmode);
11567
11568 /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */
11569 else
11570 init_float128_ieee (TFmode);
11571 }
11572 }
11573
11574 /* Emit a potentially record-form instruction, setting DST from SRC.
11575 If DOT is 0, that is all; otherwise, set CCREG to the result of the
11576 signed comparison of DST with zero. If DOT is 1, the generated RTL
11577 doesn't care about the DST result; if DOT is 2, it does. If CCREG
11578 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
11579 a separate COMPARE. */
11580
11581 void
11582 rs6000_emit_dot_insn (rtx dst, rtx src, int dot, rtx ccreg)
11583 {
11584 if (dot == 0)
11585 {
11586 emit_move_insn (dst, src);
11587 return;
11588 }
11589
11590 if (cc_reg_not_cr0_operand (ccreg, CCmode))
11591 {
11592 emit_move_insn (dst, src);
11593 emit_move_insn (ccreg, gen_rtx_COMPARE (CCmode, dst, const0_rtx));
11594 return;
11595 }
11596
11597 rtx ccset = gen_rtx_SET (ccreg, gen_rtx_COMPARE (CCmode, src, const0_rtx));
11598 if (dot == 1)
11599 {
11600 rtx clobber = gen_rtx_CLOBBER (VOIDmode, dst);
11601 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, clobber)));
11602 }
11603 else
11604 {
11605 rtx set = gen_rtx_SET (dst, src);
11606 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, set)));
11607 }
11608 }
11609
11610 \f
11611 /* A validation routine: say whether CODE, a condition code, and MODE
11612 match. The other alternatives either don't make sense or should
11613 never be generated. */
11614
11615 void
11616 validate_condition_mode (enum rtx_code code, machine_mode mode)
11617 {
11618 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
11619 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
11620 && GET_MODE_CLASS (mode) == MODE_CC);
11621
11622 /* These don't make sense. */
11623 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
11624 || mode != CCUNSmode);
11625
11626 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
11627 || mode == CCUNSmode);
11628
11629 gcc_assert (mode == CCFPmode
11630 || (code != ORDERED && code != UNORDERED
11631 && code != UNEQ && code != LTGT
11632 && code != UNGT && code != UNLT
11633 && code != UNGE && code != UNLE));
11634
11635 /* These are invalid; the information is not there. */
11636 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
11637 }
11638
11639 \f
11640 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
11641 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
11642 not zero, store there the bit offset (counted from the right) where
11643 the single stretch of 1 bits begins; and similarly for B, the bit
11644 offset where it ends. */
11645
11646 bool
11647 rs6000_is_valid_mask (rtx mask, int *b, int *e, machine_mode mode)
11648 {
11649 unsigned HOST_WIDE_INT val = INTVAL (mask);
11650 unsigned HOST_WIDE_INT bit;
11651 int nb, ne;
11652 int n = GET_MODE_PRECISION (mode);
11653
11654 if (mode != DImode && mode != SImode)
11655 return false;
11656
11657 if (INTVAL (mask) >= 0)
11658 {
11659 bit = val & -val;
11660 ne = exact_log2 (bit);
11661 nb = exact_log2 (val + bit);
11662 }
11663 else if (val + 1 == 0)
11664 {
11665 nb = n;
11666 ne = 0;
11667 }
11668 else if (val & 1)
11669 {
11670 val = ~val;
11671 bit = val & -val;
11672 nb = exact_log2 (bit);
11673 ne = exact_log2 (val + bit);
11674 }
11675 else
11676 {
11677 bit = val & -val;
11678 ne = exact_log2 (bit);
11679 if (val + bit == 0)
11680 nb = n;
11681 else
11682 nb = 0;
11683 }
11684
11685 nb--;
11686
11687 if (nb < 0 || ne < 0 || nb >= n || ne >= n)
11688 return false;
11689
11690 if (b)
11691 *b = nb;
11692 if (e)
11693 *e = ne;
11694
11695 return true;
11696 }
11697
11698 bool
11699 rs6000_is_valid_rotate_dot_mask (rtx mask, machine_mode mode)
11700 {
11701 int nb, ne;
11702 if (rs6000_is_valid_mask (mask, &nb, &ne, mode) && nb >= ne && ne > 0)
11703 {
11704 if (TARGET_64BIT)
11705 return true;
11706 /* *rotldi3_mask_dot requires for -m32 -mpowerpc64 that the mask is
11707 <= 0x7fffffff. */
11708 return (UINTVAL (mask) << (63 - nb)) <= 0x7fffffff;
11709 }
11710
11711 return false;
11712 }
11713
11714 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
11715 or rldicr instruction, to implement an AND with it in mode MODE. */
11716
11717 bool
11718 rs6000_is_valid_and_mask (rtx mask, machine_mode mode)
11719 {
11720 int nb, ne;
11721
11722 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
11723 return false;
11724
11725 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
11726 does not wrap. */
11727 if (mode == DImode)
11728 return (ne == 0 || nb == 63 || (nb < 32 && ne <= nb));
11729
11730 /* For SImode, rlwinm can do everything. */
11731 if (mode == SImode)
11732 return (nb < 32 && ne < 32);
11733
11734 return false;
11735 }
11736
11737 /* Return the instruction template for an AND with mask in mode MODE, with
11738 operands OPERANDS. If DOT is true, make it a record-form instruction. */
11739
11740 const char *
11741 rs6000_insn_for_and_mask (machine_mode mode, rtx *operands, bool dot)
11742 {
11743 int nb, ne;
11744
11745 if (!rs6000_is_valid_mask (operands[2], &nb, &ne, mode))
11746 gcc_unreachable ();
11747
11748 if (mode == DImode && ne == 0)
11749 {
11750 operands[3] = GEN_INT (63 - nb);
11751 if (dot)
11752 return "rldicl. %0,%1,0,%3";
11753 return "rldicl %0,%1,0,%3";
11754 }
11755
11756 if (mode == DImode && nb == 63)
11757 {
11758 operands[3] = GEN_INT (63 - ne);
11759 if (dot)
11760 return "rldicr. %0,%1,0,%3";
11761 return "rldicr %0,%1,0,%3";
11762 }
11763
11764 if (nb < 32 && ne < 32)
11765 {
11766 operands[3] = GEN_INT (31 - nb);
11767 operands[4] = GEN_INT (31 - ne);
11768 if (dot)
11769 return "rlwinm. %0,%1,0,%3,%4";
11770 return "rlwinm %0,%1,0,%3,%4";
11771 }
11772
11773 gcc_unreachable ();
11774 }
11775
11776 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
11777 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
11778 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
11779
11780 bool
11781 rs6000_is_valid_shift_mask (rtx mask, rtx shift, machine_mode mode)
11782 {
11783 int nb, ne;
11784
11785 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
11786 return false;
11787
11788 int n = GET_MODE_PRECISION (mode);
11789 int sh = -1;
11790
11791 if (CONST_INT_P (XEXP (shift, 1)))
11792 {
11793 sh = INTVAL (XEXP (shift, 1));
11794 if (sh < 0 || sh >= n)
11795 return false;
11796 }
11797
11798 rtx_code code = GET_CODE (shift);
11799
11800 /* Convert any shift by 0 to a rotate, to simplify below code. */
11801 if (sh == 0)
11802 code = ROTATE;
11803
11804 /* Convert rotate to simple shift if we can, to make analysis simpler. */
11805 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
11806 code = ASHIFT;
11807 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
11808 {
11809 code = LSHIFTRT;
11810 sh = n - sh;
11811 }
11812
11813 /* DImode rotates need rld*. */
11814 if (mode == DImode && code == ROTATE)
11815 return (nb == 63 || ne == 0 || ne == sh);
11816
11817 /* SImode rotates need rlw*. */
11818 if (mode == SImode && code == ROTATE)
11819 return (nb < 32 && ne < 32 && sh < 32);
11820
11821 /* Wrap-around masks are only okay for rotates. */
11822 if (ne > nb)
11823 return false;
11824
11825 /* Variable shifts are only okay for rotates. */
11826 if (sh < 0)
11827 return false;
11828
11829 /* Don't allow ASHIFT if the mask is wrong for that. */
11830 if (code == ASHIFT && ne < sh)
11831 return false;
11832
11833 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
11834 if the mask is wrong for that. */
11835 if (nb < 32 && ne < 32 && sh < 32
11836 && !(code == LSHIFTRT && nb >= 32 - sh))
11837 return true;
11838
11839 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
11840 if the mask is wrong for that. */
11841 if (code == LSHIFTRT)
11842 sh = 64 - sh;
11843 if (nb == 63 || ne == 0 || ne == sh)
11844 return !(code == LSHIFTRT && nb >= sh);
11845
11846 return false;
11847 }
11848
11849 /* Return the instruction template for a shift with mask in mode MODE, with
11850 operands OPERANDS. If DOT is true, make it a record-form instruction. */
11851
11852 const char *
11853 rs6000_insn_for_shift_mask (machine_mode mode, rtx *operands, bool dot)
11854 {
11855 int nb, ne;
11856
11857 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
11858 gcc_unreachable ();
11859
11860 if (mode == DImode && ne == 0)
11861 {
11862 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
11863 operands[2] = GEN_INT (64 - INTVAL (operands[2]));
11864 operands[3] = GEN_INT (63 - nb);
11865 if (dot)
11866 return "rld%I2cl. %0,%1,%2,%3";
11867 return "rld%I2cl %0,%1,%2,%3";
11868 }
11869
11870 if (mode == DImode && nb == 63)
11871 {
11872 operands[3] = GEN_INT (63 - ne);
11873 if (dot)
11874 return "rld%I2cr. %0,%1,%2,%3";
11875 return "rld%I2cr %0,%1,%2,%3";
11876 }
11877
11878 if (mode == DImode
11879 && GET_CODE (operands[4]) != LSHIFTRT
11880 && CONST_INT_P (operands[2])
11881 && ne == INTVAL (operands[2]))
11882 {
11883 operands[3] = GEN_INT (63 - nb);
11884 if (dot)
11885 return "rld%I2c. %0,%1,%2,%3";
11886 return "rld%I2c %0,%1,%2,%3";
11887 }
11888
11889 if (nb < 32 && ne < 32)
11890 {
11891 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
11892 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
11893 operands[3] = GEN_INT (31 - nb);
11894 operands[4] = GEN_INT (31 - ne);
11895 /* This insn can also be a 64-bit rotate with mask that really makes
11896 it just a shift right (with mask); the %h below are to adjust for
11897 that situation (shift count is >= 32 in that case). */
11898 if (dot)
11899 return "rlw%I2nm. %0,%1,%h2,%3,%4";
11900 return "rlw%I2nm %0,%1,%h2,%3,%4";
11901 }
11902
11903 gcc_unreachable ();
11904 }
11905
11906 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
11907 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
11908 ASHIFT, or LSHIFTRT) in mode MODE. */
11909
11910 bool
11911 rs6000_is_valid_insert_mask (rtx mask, rtx shift, machine_mode mode)
11912 {
11913 int nb, ne;
11914
11915 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
11916 return false;
11917
11918 int n = GET_MODE_PRECISION (mode);
11919
11920 int sh = INTVAL (XEXP (shift, 1));
11921 if (sh < 0 || sh >= n)
11922 return false;
11923
11924 rtx_code code = GET_CODE (shift);
11925
11926 /* Convert any shift by 0 to a rotate, to simplify below code. */
11927 if (sh == 0)
11928 code = ROTATE;
11929
11930 /* Convert rotate to simple shift if we can, to make analysis simpler. */
11931 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
11932 code = ASHIFT;
11933 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
11934 {
11935 code = LSHIFTRT;
11936 sh = n - sh;
11937 }
11938
11939 /* DImode rotates need rldimi. */
11940 if (mode == DImode && code == ROTATE)
11941 return (ne == sh);
11942
11943 /* SImode rotates need rlwimi. */
11944 if (mode == SImode && code == ROTATE)
11945 return (nb < 32 && ne < 32 && sh < 32);
11946
11947 /* Wrap-around masks are only okay for rotates. */
11948 if (ne > nb)
11949 return false;
11950
11951 /* Don't allow ASHIFT if the mask is wrong for that. */
11952 if (code == ASHIFT && ne < sh)
11953 return false;
11954
11955 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
11956 if the mask is wrong for that. */
11957 if (nb < 32 && ne < 32 && sh < 32
11958 && !(code == LSHIFTRT && nb >= 32 - sh))
11959 return true;
11960
11961 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
11962 if the mask is wrong for that. */
11963 if (code == LSHIFTRT)
11964 sh = 64 - sh;
11965 if (ne == sh)
11966 return !(code == LSHIFTRT && nb >= sh);
11967
11968 return false;
11969 }
11970
11971 /* Return the instruction template for an insert with mask in mode MODE, with
11972 operands OPERANDS. If DOT is true, make it a record-form instruction. */
11973
11974 const char *
11975 rs6000_insn_for_insert_mask (machine_mode mode, rtx *operands, bool dot)
11976 {
11977 int nb, ne;
11978
11979 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
11980 gcc_unreachable ();
11981
11982 /* Prefer rldimi because rlwimi is cracked. */
11983 if (TARGET_POWERPC64
11984 && (!dot || mode == DImode)
11985 && GET_CODE (operands[4]) != LSHIFTRT
11986 && ne == INTVAL (operands[2]))
11987 {
11988 operands[3] = GEN_INT (63 - nb);
11989 if (dot)
11990 return "rldimi. %0,%1,%2,%3";
11991 return "rldimi %0,%1,%2,%3";
11992 }
11993
11994 if (nb < 32 && ne < 32)
11995 {
11996 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
11997 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
11998 operands[3] = GEN_INT (31 - nb);
11999 operands[4] = GEN_INT (31 - ne);
12000 if (dot)
12001 return "rlwimi. %0,%1,%2,%3,%4";
12002 return "rlwimi %0,%1,%2,%3,%4";
12003 }
12004
12005 gcc_unreachable ();
12006 }
12007
12008 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
12009 using two machine instructions. */
12010
12011 bool
12012 rs6000_is_valid_2insn_and (rtx c, machine_mode mode)
12013 {
12014 /* There are two kinds of AND we can handle with two insns:
12015 1) those we can do with two rl* insn;
12016 2) ori[s];xori[s].
12017
12018 We do not handle that last case yet. */
12019
12020 /* If there is just one stretch of ones, we can do it. */
12021 if (rs6000_is_valid_mask (c, NULL, NULL, mode))
12022 return true;
12023
12024 /* Otherwise, fill in the lowest "hole"; if we can do the result with
12025 one insn, we can do the whole thing with two. */
12026 unsigned HOST_WIDE_INT val = INTVAL (c);
12027 unsigned HOST_WIDE_INT bit1 = val & -val;
12028 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
12029 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
12030 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
12031 return rs6000_is_valid_and_mask (GEN_INT (val + bit3 - bit2), mode);
12032 }
12033
12034 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
12035 If EXPAND is true, split rotate-and-mask instructions we generate to
12036 their constituent parts as well (this is used during expand); if DOT
12037 is 1, make the last insn a record-form instruction clobbering the
12038 destination GPR and setting the CC reg (from operands[3]); if 2, set
12039 that GPR as well as the CC reg. */
12040
12041 void
12042 rs6000_emit_2insn_and (machine_mode mode, rtx *operands, bool expand, int dot)
12043 {
12044 gcc_assert (!(expand && dot));
12045
12046 unsigned HOST_WIDE_INT val = INTVAL (operands[2]);
12047
12048 /* If it is one stretch of ones, it is DImode; shift left, mask, then
12049 shift right. This generates better code than doing the masks without
12050 shifts, or shifting first right and then left. */
12051 int nb, ne;
12052 if (rs6000_is_valid_mask (operands[2], &nb, &ne, mode) && nb >= ne)
12053 {
12054 gcc_assert (mode == DImode);
12055
12056 int shift = 63 - nb;
12057 if (expand)
12058 {
12059 rtx tmp1 = gen_reg_rtx (DImode);
12060 rtx tmp2 = gen_reg_rtx (DImode);
12061 emit_insn (gen_ashldi3 (tmp1, operands[1], GEN_INT (shift)));
12062 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (val << shift)));
12063 emit_insn (gen_lshrdi3 (operands[0], tmp2, GEN_INT (shift)));
12064 }
12065 else
12066 {
12067 rtx tmp = gen_rtx_ASHIFT (mode, operands[1], GEN_INT (shift));
12068 tmp = gen_rtx_AND (mode, tmp, GEN_INT (val << shift));
12069 emit_move_insn (operands[0], tmp);
12070 tmp = gen_rtx_LSHIFTRT (mode, operands[0], GEN_INT (shift));
12071 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
12072 }
12073 return;
12074 }
12075
12076 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
12077 that does the rest. */
12078 unsigned HOST_WIDE_INT bit1 = val & -val;
12079 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
12080 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
12081 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
12082
12083 unsigned HOST_WIDE_INT mask1 = -bit3 + bit2 - 1;
12084 unsigned HOST_WIDE_INT mask2 = val + bit3 - bit2;
12085
12086 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2), mode));
12087
12088 /* Two "no-rotate"-and-mask instructions, for SImode. */
12089 if (rs6000_is_valid_and_mask (GEN_INT (mask1), mode))
12090 {
12091 gcc_assert (mode == SImode);
12092
12093 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
12094 rtx tmp = gen_rtx_AND (mode, operands[1], GEN_INT (mask1));
12095 emit_move_insn (reg, tmp);
12096 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
12097 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
12098 return;
12099 }
12100
12101 gcc_assert (mode == DImode);
12102
12103 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
12104 insns; we have to do the first in SImode, because it wraps. */
12105 if (mask2 <= 0xffffffff
12106 && rs6000_is_valid_and_mask (GEN_INT (mask1), SImode))
12107 {
12108 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
12109 rtx tmp = gen_rtx_AND (SImode, gen_lowpart (SImode, operands[1]),
12110 GEN_INT (mask1));
12111 rtx reg_low = gen_lowpart (SImode, reg);
12112 emit_move_insn (reg_low, tmp);
12113 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
12114 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
12115 return;
12116 }
12117
12118 /* Two rld* insns: rotate, clear the hole in the middle (which now is
12119 at the top end), rotate back and clear the other hole. */
12120 int right = exact_log2 (bit3);
12121 int left = 64 - right;
12122
12123 /* Rotate the mask too. */
12124 mask1 = (mask1 >> right) | ((bit2 - 1) << left);
12125
12126 if (expand)
12127 {
12128 rtx tmp1 = gen_reg_rtx (DImode);
12129 rtx tmp2 = gen_reg_rtx (DImode);
12130 rtx tmp3 = gen_reg_rtx (DImode);
12131 emit_insn (gen_rotldi3 (tmp1, operands[1], GEN_INT (left)));
12132 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (mask1)));
12133 emit_insn (gen_rotldi3 (tmp3, tmp2, GEN_INT (right)));
12134 emit_insn (gen_anddi3 (operands[0], tmp3, GEN_INT (mask2)));
12135 }
12136 else
12137 {
12138 rtx tmp = gen_rtx_ROTATE (mode, operands[1], GEN_INT (left));
12139 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask1));
12140 emit_move_insn (operands[0], tmp);
12141 tmp = gen_rtx_ROTATE (mode, operands[0], GEN_INT (right));
12142 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask2));
12143 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
12144 }
12145 }
12146 \f
12147 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
12148 for lfq and stfq insns iff the registers are hard registers. */
12149
12150 int
12151 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
12152 {
12153 /* We might have been passed a SUBREG. */
12154 if (!REG_P (reg1) || !REG_P (reg2))
12155 return 0;
12156
12157 /* We might have been passed non floating point registers. */
12158 if (!FP_REGNO_P (REGNO (reg1))
12159 || !FP_REGNO_P (REGNO (reg2)))
12160 return 0;
12161
12162 return (REGNO (reg1) == REGNO (reg2) - 1);
12163 }
12164
12165 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
12166 addr1 and addr2 must be in consecutive memory locations
12167 (addr2 == addr1 + 8). */
12168
12169 int
12170 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
12171 {
12172 rtx addr1, addr2;
12173 unsigned int reg1, reg2;
12174 int offset1, offset2;
12175
12176 /* The mems cannot be volatile. */
12177 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
12178 return 0;
12179
12180 addr1 = XEXP (mem1, 0);
12181 addr2 = XEXP (mem2, 0);
12182
12183 /* Extract an offset (if used) from the first addr. */
12184 if (GET_CODE (addr1) == PLUS)
12185 {
12186 /* If not a REG, return zero. */
12187 if (!REG_P (XEXP (addr1, 0)))
12188 return 0;
12189 else
12190 {
12191 reg1 = REGNO (XEXP (addr1, 0));
12192 /* The offset must be constant! */
12193 if (!CONST_INT_P (XEXP (addr1, 1)))
12194 return 0;
12195 offset1 = INTVAL (XEXP (addr1, 1));
12196 }
12197 }
12198 else if (!REG_P (addr1))
12199 return 0;
12200 else
12201 {
12202 reg1 = REGNO (addr1);
12203 /* This was a simple (mem (reg)) expression. Offset is 0. */
12204 offset1 = 0;
12205 }
12206
12207 /* And now for the second addr. */
12208 if (GET_CODE (addr2) == PLUS)
12209 {
12210 /* If not a REG, return zero. */
12211 if (!REG_P (XEXP (addr2, 0)))
12212 return 0;
12213 else
12214 {
12215 reg2 = REGNO (XEXP (addr2, 0));
12216 /* The offset must be constant. */
12217 if (!CONST_INT_P (XEXP (addr2, 1)))
12218 return 0;
12219 offset2 = INTVAL (XEXP (addr2, 1));
12220 }
12221 }
12222 else if (!REG_P (addr2))
12223 return 0;
12224 else
12225 {
12226 reg2 = REGNO (addr2);
12227 /* This was a simple (mem (reg)) expression. Offset is 0. */
12228 offset2 = 0;
12229 }
12230
12231 /* Both of these must have the same base register. */
12232 if (reg1 != reg2)
12233 return 0;
12234
12235 /* The offset for the second addr must be 8 more than the first addr. */
12236 if (offset2 != offset1 + 8)
12237 return 0;
12238
12239 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
12240 instructions. */
12241 return 1;
12242 }
12243 \f
12244 /* Implement TARGET_SECONDARY_RELOAD_NEEDED_MODE. For SDmode values we
12245 need to use DDmode, in all other cases we can use the same mode. */
12246 static machine_mode
12247 rs6000_secondary_memory_needed_mode (machine_mode mode)
12248 {
12249 if (lra_in_progress && mode == SDmode)
12250 return DDmode;
12251 return mode;
12252 }
12253
12254 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
12255 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
12256 only work on the traditional altivec registers, note if an altivec register
12257 was chosen. */
12258
12259 static enum rs6000_reg_type
12260 register_to_reg_type (rtx reg, bool *is_altivec)
12261 {
12262 HOST_WIDE_INT regno;
12263 enum reg_class rclass;
12264
12265 if (SUBREG_P (reg))
12266 reg = SUBREG_REG (reg);
12267
12268 if (!REG_P (reg))
12269 return NO_REG_TYPE;
12270
12271 regno = REGNO (reg);
12272 if (!HARD_REGISTER_NUM_P (regno))
12273 {
12274 if (!lra_in_progress && !reload_completed)
12275 return PSEUDO_REG_TYPE;
12276
12277 regno = true_regnum (reg);
12278 if (regno < 0 || !HARD_REGISTER_NUM_P (regno))
12279 return PSEUDO_REG_TYPE;
12280 }
12281
12282 gcc_assert (regno >= 0);
12283
12284 if (is_altivec && ALTIVEC_REGNO_P (regno))
12285 *is_altivec = true;
12286
12287 rclass = rs6000_regno_regclass[regno];
12288 return reg_class_to_reg_type[(int)rclass];
12289 }
12290
12291 /* Helper function to return the cost of adding a TOC entry address. */
12292
12293 static inline int
12294 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
12295 {
12296 int ret;
12297
12298 if (TARGET_CMODEL != CMODEL_SMALL)
12299 ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
12300
12301 else
12302 ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
12303
12304 return ret;
12305 }
12306
12307 /* Helper function for rs6000_secondary_reload to determine whether the memory
12308 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
12309 needs reloading. Return negative if the memory is not handled by the memory
12310 helper functions and to try a different reload method, 0 if no additional
12311 instructions are need, and positive to give the extra cost for the
12312 memory. */
12313
12314 static int
12315 rs6000_secondary_reload_memory (rtx addr,
12316 enum reg_class rclass,
12317 machine_mode mode)
12318 {
12319 int extra_cost = 0;
12320 rtx reg, and_arg, plus_arg0, plus_arg1;
12321 addr_mask_type addr_mask;
12322 const char *type = NULL;
12323 const char *fail_msg = NULL;
12324
12325 if (GPR_REG_CLASS_P (rclass))
12326 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
12327
12328 else if (rclass == FLOAT_REGS)
12329 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
12330
12331 else if (rclass == ALTIVEC_REGS)
12332 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
12333
12334 /* For the combined VSX_REGS, turn off Altivec AND -16. */
12335 else if (rclass == VSX_REGS)
12336 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
12337 & ~RELOAD_REG_AND_M16);
12338
12339 /* If the register allocator hasn't made up its mind yet on the register
12340 class to use, settle on defaults to use. */
12341 else if (rclass == NO_REGS)
12342 {
12343 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_ANY]
12344 & ~RELOAD_REG_AND_M16);
12345
12346 if ((addr_mask & RELOAD_REG_MULTIPLE) != 0)
12347 addr_mask &= ~(RELOAD_REG_INDEXED
12348 | RELOAD_REG_PRE_INCDEC
12349 | RELOAD_REG_PRE_MODIFY);
12350 }
12351
12352 else
12353 addr_mask = 0;
12354
12355 /* If the register isn't valid in this register class, just return now. */
12356 if ((addr_mask & RELOAD_REG_VALID) == 0)
12357 {
12358 if (TARGET_DEBUG_ADDR)
12359 {
12360 fprintf (stderr,
12361 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
12362 "not valid in class\n",
12363 GET_MODE_NAME (mode), reg_class_names[rclass]);
12364 debug_rtx (addr);
12365 }
12366
12367 return -1;
12368 }
12369
12370 switch (GET_CODE (addr))
12371 {
12372 /* Does the register class supports auto update forms for this mode? We
12373 don't need a scratch register, since the powerpc only supports
12374 PRE_INC, PRE_DEC, and PRE_MODIFY. */
12375 case PRE_INC:
12376 case PRE_DEC:
12377 reg = XEXP (addr, 0);
12378 if (!base_reg_operand (addr, GET_MODE (reg)))
12379 {
12380 fail_msg = "no base register #1";
12381 extra_cost = -1;
12382 }
12383
12384 else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
12385 {
12386 extra_cost = 1;
12387 type = "update";
12388 }
12389 break;
12390
12391 case PRE_MODIFY:
12392 reg = XEXP (addr, 0);
12393 plus_arg1 = XEXP (addr, 1);
12394 if (!base_reg_operand (reg, GET_MODE (reg))
12395 || GET_CODE (plus_arg1) != PLUS
12396 || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
12397 {
12398 fail_msg = "bad PRE_MODIFY";
12399 extra_cost = -1;
12400 }
12401
12402 else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
12403 {
12404 extra_cost = 1;
12405 type = "update";
12406 }
12407 break;
12408
12409 /* Do we need to simulate AND -16 to clear the bottom address bits used
12410 in VMX load/stores? Only allow the AND for vector sizes. */
12411 case AND:
12412 and_arg = XEXP (addr, 0);
12413 if (GET_MODE_SIZE (mode) != 16
12414 || !CONST_INT_P (XEXP (addr, 1))
12415 || INTVAL (XEXP (addr, 1)) != -16)
12416 {
12417 fail_msg = "bad Altivec AND #1";
12418 extra_cost = -1;
12419 }
12420
12421 if (rclass != ALTIVEC_REGS)
12422 {
12423 if (legitimate_indirect_address_p (and_arg, false))
12424 extra_cost = 1;
12425
12426 else if (legitimate_indexed_address_p (and_arg, false))
12427 extra_cost = 2;
12428
12429 else
12430 {
12431 fail_msg = "bad Altivec AND #2";
12432 extra_cost = -1;
12433 }
12434
12435 type = "and";
12436 }
12437 break;
12438
12439 /* If this is an indirect address, make sure it is a base register. */
12440 case REG:
12441 case SUBREG:
12442 if (!legitimate_indirect_address_p (addr, false))
12443 {
12444 extra_cost = 1;
12445 type = "move";
12446 }
12447 break;
12448
12449 /* If this is an indexed address, make sure the register class can handle
12450 indexed addresses for this mode. */
12451 case PLUS:
12452 plus_arg0 = XEXP (addr, 0);
12453 plus_arg1 = XEXP (addr, 1);
12454
12455 /* (plus (plus (reg) (constant)) (constant)) is generated during
12456 push_reload processing, so handle it now. */
12457 if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
12458 {
12459 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12460 {
12461 extra_cost = 1;
12462 type = "offset";
12463 }
12464 }
12465
12466 /* (plus (plus (reg) (constant)) (reg)) is also generated during
12467 push_reload processing, so handle it now. */
12468 else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
12469 {
12470 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
12471 {
12472 extra_cost = 1;
12473 type = "indexed #2";
12474 }
12475 }
12476
12477 else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
12478 {
12479 fail_msg = "no base register #2";
12480 extra_cost = -1;
12481 }
12482
12483 else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
12484 {
12485 if ((addr_mask & RELOAD_REG_INDEXED) == 0
12486 || !legitimate_indexed_address_p (addr, false))
12487 {
12488 extra_cost = 1;
12489 type = "indexed";
12490 }
12491 }
12492
12493 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0
12494 && CONST_INT_P (plus_arg1))
12495 {
12496 if (!quad_address_offset_p (INTVAL (plus_arg1)))
12497 {
12498 extra_cost = 1;
12499 type = "vector d-form offset";
12500 }
12501 }
12502
12503 /* Make sure the register class can handle offset addresses. */
12504 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
12505 {
12506 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12507 {
12508 extra_cost = 1;
12509 type = "offset #2";
12510 }
12511 }
12512
12513 else
12514 {
12515 fail_msg = "bad PLUS";
12516 extra_cost = -1;
12517 }
12518
12519 break;
12520
12521 case LO_SUM:
12522 /* Quad offsets are restricted and can't handle normal addresses. */
12523 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
12524 {
12525 extra_cost = -1;
12526 type = "vector d-form lo_sum";
12527 }
12528
12529 else if (!legitimate_lo_sum_address_p (mode, addr, false))
12530 {
12531 fail_msg = "bad LO_SUM";
12532 extra_cost = -1;
12533 }
12534
12535 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12536 {
12537 extra_cost = 1;
12538 type = "lo_sum";
12539 }
12540 break;
12541
12542 /* Static addresses need to create a TOC entry. */
12543 case CONST:
12544 case SYMBOL_REF:
12545 case LABEL_REF:
12546 if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
12547 {
12548 extra_cost = -1;
12549 type = "vector d-form lo_sum #2";
12550 }
12551
12552 else
12553 {
12554 type = "address";
12555 extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
12556 }
12557 break;
12558
12559 /* TOC references look like offsetable memory. */
12560 case UNSPEC:
12561 if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
12562 {
12563 fail_msg = "bad UNSPEC";
12564 extra_cost = -1;
12565 }
12566
12567 else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0)
12568 {
12569 extra_cost = -1;
12570 type = "vector d-form lo_sum #3";
12571 }
12572
12573 else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
12574 {
12575 extra_cost = 1;
12576 type = "toc reference";
12577 }
12578 break;
12579
12580 default:
12581 {
12582 fail_msg = "bad address";
12583 extra_cost = -1;
12584 }
12585 }
12586
12587 if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
12588 {
12589 if (extra_cost < 0)
12590 fprintf (stderr,
12591 "rs6000_secondary_reload_memory error: mode = %s, "
12592 "class = %s, addr_mask = '%s', %s\n",
12593 GET_MODE_NAME (mode),
12594 reg_class_names[rclass],
12595 rs6000_debug_addr_mask (addr_mask, false),
12596 (fail_msg != NULL) ? fail_msg : "<bad address>");
12597
12598 else
12599 fprintf (stderr,
12600 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
12601 "addr_mask = '%s', extra cost = %d, %s\n",
12602 GET_MODE_NAME (mode),
12603 reg_class_names[rclass],
12604 rs6000_debug_addr_mask (addr_mask, false),
12605 extra_cost,
12606 (type) ? type : "<none>");
12607
12608 debug_rtx (addr);
12609 }
12610
12611 return extra_cost;
12612 }
12613
12614 /* Helper function for rs6000_secondary_reload to return true if a move to a
12615 different register classe is really a simple move. */
12616
12617 static bool
12618 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
12619 enum rs6000_reg_type from_type,
12620 machine_mode mode)
12621 {
12622 int size = GET_MODE_SIZE (mode);
12623
12624 /* Add support for various direct moves available. In this function, we only
12625 look at cases where we don't need any extra registers, and one or more
12626 simple move insns are issued. Originally small integers are not allowed
12627 in FPR/VSX registers. Single precision binary floating is not a simple
12628 move because we need to convert to the single precision memory layout.
12629 The 4-byte SDmode can be moved. TDmode values are disallowed since they
12630 need special direct move handling, which we do not support yet. */
12631 if (TARGET_DIRECT_MOVE
12632 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
12633 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
12634 {
12635 if (TARGET_POWERPC64)
12636 {
12637 /* ISA 2.07: MTVSRD or MVFVSRD. */
12638 if (size == 8)
12639 return true;
12640
12641 /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD. */
12642 if (size == 16 && TARGET_P9_VECTOR && mode != TDmode)
12643 return true;
12644 }
12645
12646 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
12647 if (TARGET_P8_VECTOR)
12648 {
12649 if (mode == SImode)
12650 return true;
12651
12652 if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode))
12653 return true;
12654 }
12655
12656 /* ISA 2.07: MTVSRWZ or MFVSRWZ. */
12657 if (mode == SDmode)
12658 return true;
12659 }
12660
12661 /* Move to/from SPR. */
12662 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
12663 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
12664 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
12665 return true;
12666
12667 return false;
12668 }
12669
12670 /* Direct move helper function for rs6000_secondary_reload, handle all of the
12671 special direct moves that involve allocating an extra register, return the
12672 insn code of the helper function if there is such a function or
12673 CODE_FOR_nothing if not. */
12674
12675 static bool
12676 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
12677 enum rs6000_reg_type from_type,
12678 machine_mode mode,
12679 secondary_reload_info *sri,
12680 bool altivec_p)
12681 {
12682 bool ret = false;
12683 enum insn_code icode = CODE_FOR_nothing;
12684 int cost = 0;
12685 int size = GET_MODE_SIZE (mode);
12686
12687 if (TARGET_POWERPC64 && size == 16)
12688 {
12689 /* Handle moving 128-bit values from GPRs to VSX point registers on
12690 ISA 2.07 (power8, power9) when running in 64-bit mode using
12691 XXPERMDI to glue the two 64-bit values back together. */
12692 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
12693 {
12694 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
12695 icode = reg_addr[mode].reload_vsx_gpr;
12696 }
12697
12698 /* Handle moving 128-bit values from VSX point registers to GPRs on
12699 ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
12700 bottom 64-bit value. */
12701 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
12702 {
12703 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
12704 icode = reg_addr[mode].reload_gpr_vsx;
12705 }
12706 }
12707
12708 else if (TARGET_POWERPC64 && mode == SFmode)
12709 {
12710 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
12711 {
12712 cost = 3; /* xscvdpspn, mfvsrd, and. */
12713 icode = reg_addr[mode].reload_gpr_vsx;
12714 }
12715
12716 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
12717 {
12718 cost = 2; /* mtvsrz, xscvspdpn. */
12719 icode = reg_addr[mode].reload_vsx_gpr;
12720 }
12721 }
12722
12723 else if (!TARGET_POWERPC64 && size == 8)
12724 {
12725 /* Handle moving 64-bit values from GPRs to floating point registers on
12726 ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
12727 32-bit values back together. Altivec register classes must be handled
12728 specially since a different instruction is used, and the secondary
12729 reload support requires a single instruction class in the scratch
12730 register constraint. However, right now TFmode is not allowed in
12731 Altivec registers, so the pattern will never match. */
12732 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
12733 {
12734 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
12735 icode = reg_addr[mode].reload_fpr_gpr;
12736 }
12737 }
12738
12739 if (icode != CODE_FOR_nothing)
12740 {
12741 ret = true;
12742 if (sri)
12743 {
12744 sri->icode = icode;
12745 sri->extra_cost = cost;
12746 }
12747 }
12748
12749 return ret;
12750 }
12751
12752 /* Return whether a move between two register classes can be done either
12753 directly (simple move) or via a pattern that uses a single extra temporary
12754 (using ISA 2.07's direct move in this case. */
12755
12756 static bool
12757 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
12758 enum rs6000_reg_type from_type,
12759 machine_mode mode,
12760 secondary_reload_info *sri,
12761 bool altivec_p)
12762 {
12763 /* Fall back to load/store reloads if either type is not a register. */
12764 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
12765 return false;
12766
12767 /* If we haven't allocated registers yet, assume the move can be done for the
12768 standard register types. */
12769 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
12770 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
12771 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
12772 return true;
12773
12774 /* Moves to the same set of registers is a simple move for non-specialized
12775 registers. */
12776 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
12777 return true;
12778
12779 /* Check whether a simple move can be done directly. */
12780 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
12781 {
12782 if (sri)
12783 {
12784 sri->icode = CODE_FOR_nothing;
12785 sri->extra_cost = 0;
12786 }
12787 return true;
12788 }
12789
12790 /* Now check if we can do it in a few steps. */
12791 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
12792 altivec_p);
12793 }
12794
12795 /* Inform reload about cases where moving X with a mode MODE to a register in
12796 RCLASS requires an extra scratch or immediate register. Return the class
12797 needed for the immediate register.
12798
12799 For VSX and Altivec, we may need a register to convert sp+offset into
12800 reg+sp.
12801
12802 For misaligned 64-bit gpr loads and stores we need a register to
12803 convert an offset address to indirect. */
12804
12805 static reg_class_t
12806 rs6000_secondary_reload (bool in_p,
12807 rtx x,
12808 reg_class_t rclass_i,
12809 machine_mode mode,
12810 secondary_reload_info *sri)
12811 {
12812 enum reg_class rclass = (enum reg_class) rclass_i;
12813 reg_class_t ret = ALL_REGS;
12814 enum insn_code icode;
12815 bool default_p = false;
12816 bool done_p = false;
12817
12818 /* Allow subreg of memory before/during reload. */
12819 bool memory_p = (MEM_P (x)
12820 || (!reload_completed && SUBREG_P (x)
12821 && MEM_P (SUBREG_REG (x))));
12822
12823 sri->icode = CODE_FOR_nothing;
12824 sri->t_icode = CODE_FOR_nothing;
12825 sri->extra_cost = 0;
12826 icode = ((in_p)
12827 ? reg_addr[mode].reload_load
12828 : reg_addr[mode].reload_store);
12829
12830 if (REG_P (x) || register_operand (x, mode))
12831 {
12832 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
12833 bool altivec_p = (rclass == ALTIVEC_REGS);
12834 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
12835
12836 if (!in_p)
12837 std::swap (to_type, from_type);
12838
12839 /* Can we do a direct move of some sort? */
12840 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
12841 altivec_p))
12842 {
12843 icode = (enum insn_code)sri->icode;
12844 default_p = false;
12845 done_p = true;
12846 ret = NO_REGS;
12847 }
12848 }
12849
12850 /* Make sure 0.0 is not reloaded or forced into memory. */
12851 if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
12852 {
12853 ret = NO_REGS;
12854 default_p = false;
12855 done_p = true;
12856 }
12857
12858 /* If this is a scalar floating point value and we want to load it into the
12859 traditional Altivec registers, do it via a move via a traditional floating
12860 point register, unless we have D-form addressing. Also make sure that
12861 non-zero constants use a FPR. */
12862 if (!done_p && reg_addr[mode].scalar_in_vmx_p
12863 && !mode_supports_vmx_dform (mode)
12864 && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
12865 && (memory_p || CONST_DOUBLE_P (x)))
12866 {
12867 ret = FLOAT_REGS;
12868 default_p = false;
12869 done_p = true;
12870 }
12871
12872 /* Handle reload of load/stores if we have reload helper functions. */
12873 if (!done_p && icode != CODE_FOR_nothing && memory_p)
12874 {
12875 int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
12876 mode);
12877
12878 if (extra_cost >= 0)
12879 {
12880 done_p = true;
12881 ret = NO_REGS;
12882 if (extra_cost > 0)
12883 {
12884 sri->extra_cost = extra_cost;
12885 sri->icode = icode;
12886 }
12887 }
12888 }
12889
12890 /* Handle unaligned loads and stores of integer registers. */
12891 if (!done_p && TARGET_POWERPC64
12892 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
12893 && memory_p
12894 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
12895 {
12896 rtx addr = XEXP (x, 0);
12897 rtx off = address_offset (addr);
12898
12899 if (off != NULL_RTX)
12900 {
12901 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
12902 unsigned HOST_WIDE_INT offset = INTVAL (off);
12903
12904 /* We need a secondary reload when our legitimate_address_p
12905 says the address is good (as otherwise the entire address
12906 will be reloaded), and the offset is not a multiple of
12907 four or we have an address wrap. Address wrap will only
12908 occur for LO_SUMs since legitimate_offset_address_p
12909 rejects addresses for 16-byte mems that will wrap. */
12910 if (GET_CODE (addr) == LO_SUM
12911 ? (1 /* legitimate_address_p allows any offset for lo_sum */
12912 && ((offset & 3) != 0
12913 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
12914 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
12915 && (offset & 3) != 0))
12916 {
12917 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
12918 if (in_p)
12919 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
12920 : CODE_FOR_reload_di_load);
12921 else
12922 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
12923 : CODE_FOR_reload_di_store);
12924 sri->extra_cost = 2;
12925 ret = NO_REGS;
12926 done_p = true;
12927 }
12928 else
12929 default_p = true;
12930 }
12931 else
12932 default_p = true;
12933 }
12934
12935 if (!done_p && !TARGET_POWERPC64
12936 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
12937 && memory_p
12938 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
12939 {
12940 rtx addr = XEXP (x, 0);
12941 rtx off = address_offset (addr);
12942
12943 if (off != NULL_RTX)
12944 {
12945 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
12946 unsigned HOST_WIDE_INT offset = INTVAL (off);
12947
12948 /* We need a secondary reload when our legitimate_address_p
12949 says the address is good (as otherwise the entire address
12950 will be reloaded), and we have a wrap.
12951
12952 legitimate_lo_sum_address_p allows LO_SUM addresses to
12953 have any offset so test for wrap in the low 16 bits.
12954
12955 legitimate_offset_address_p checks for the range
12956 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
12957 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
12958 [0x7ff4,0x7fff] respectively, so test for the
12959 intersection of these ranges, [0x7ffc,0x7fff] and
12960 [0x7ff4,0x7ff7] respectively.
12961
12962 Note that the address we see here may have been
12963 manipulated by legitimize_reload_address. */
12964 if (GET_CODE (addr) == LO_SUM
12965 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
12966 : offset - (0x8000 - extra) < UNITS_PER_WORD)
12967 {
12968 if (in_p)
12969 sri->icode = CODE_FOR_reload_si_load;
12970 else
12971 sri->icode = CODE_FOR_reload_si_store;
12972 sri->extra_cost = 2;
12973 ret = NO_REGS;
12974 done_p = true;
12975 }
12976 else
12977 default_p = true;
12978 }
12979 else
12980 default_p = true;
12981 }
12982
12983 if (!done_p)
12984 default_p = true;
12985
12986 if (default_p)
12987 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
12988
12989 gcc_assert (ret != ALL_REGS);
12990
12991 if (TARGET_DEBUG_ADDR)
12992 {
12993 fprintf (stderr,
12994 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
12995 "mode = %s",
12996 reg_class_names[ret],
12997 in_p ? "true" : "false",
12998 reg_class_names[rclass],
12999 GET_MODE_NAME (mode));
13000
13001 if (reload_completed)
13002 fputs (", after reload", stderr);
13003
13004 if (!done_p)
13005 fputs (", done_p not set", stderr);
13006
13007 if (default_p)
13008 fputs (", default secondary reload", stderr);
13009
13010 if (sri->icode != CODE_FOR_nothing)
13011 fprintf (stderr, ", reload func = %s, extra cost = %d",
13012 insn_data[sri->icode].name, sri->extra_cost);
13013
13014 else if (sri->extra_cost > 0)
13015 fprintf (stderr, ", extra cost = %d", sri->extra_cost);
13016
13017 fputs ("\n", stderr);
13018 debug_rtx (x);
13019 }
13020
13021 return ret;
13022 }
13023
13024 /* Better tracing for rs6000_secondary_reload_inner. */
13025
13026 static void
13027 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
13028 bool store_p)
13029 {
13030 rtx set, clobber;
13031
13032 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
13033
13034 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
13035 store_p ? "store" : "load");
13036
13037 if (store_p)
13038 set = gen_rtx_SET (mem, reg);
13039 else
13040 set = gen_rtx_SET (reg, mem);
13041
13042 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
13043 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
13044 }
13045
13046 static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
13047 ATTRIBUTE_NORETURN;
13048
13049 static void
13050 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
13051 bool store_p)
13052 {
13053 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
13054 gcc_unreachable ();
13055 }
13056
13057 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
13058 reload helper functions. These were identified in
13059 rs6000_secondary_reload_memory, and if reload decided to use the secondary
13060 reload, it calls the insns:
13061 reload_<RELOAD:mode>_<P:mptrsize>_store
13062 reload_<RELOAD:mode>_<P:mptrsize>_load
13063
13064 which in turn calls this function, to do whatever is necessary to create
13065 valid addresses. */
13066
13067 void
13068 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
13069 {
13070 int regno = true_regnum (reg);
13071 machine_mode mode = GET_MODE (reg);
13072 addr_mask_type addr_mask;
13073 rtx addr;
13074 rtx new_addr;
13075 rtx op_reg, op0, op1;
13076 rtx and_op;
13077 rtx cc_clobber;
13078 rtvec rv;
13079
13080 if (regno < 0 || !HARD_REGISTER_NUM_P (regno) || !MEM_P (mem)
13081 || !base_reg_operand (scratch, GET_MODE (scratch)))
13082 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
13083
13084 if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
13085 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
13086
13087 else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
13088 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
13089
13090 else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
13091 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
13092
13093 else
13094 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
13095
13096 /* Make sure the mode is valid in this register class. */
13097 if ((addr_mask & RELOAD_REG_VALID) == 0)
13098 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
13099
13100 if (TARGET_DEBUG_ADDR)
13101 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
13102
13103 new_addr = addr = XEXP (mem, 0);
13104 switch (GET_CODE (addr))
13105 {
13106 /* Does the register class support auto update forms for this mode? If
13107 not, do the update now. We don't need a scratch register, since the
13108 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
13109 case PRE_INC:
13110 case PRE_DEC:
13111 op_reg = XEXP (addr, 0);
13112 if (!base_reg_operand (op_reg, Pmode))
13113 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
13114
13115 if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
13116 {
13117 int delta = GET_MODE_SIZE (mode);
13118 if (GET_CODE (addr) == PRE_DEC)
13119 delta = -delta;
13120 emit_insn (gen_add2_insn (op_reg, GEN_INT (delta)));
13121 new_addr = op_reg;
13122 }
13123 break;
13124
13125 case PRE_MODIFY:
13126 op0 = XEXP (addr, 0);
13127 op1 = XEXP (addr, 1);
13128 if (!base_reg_operand (op0, Pmode)
13129 || GET_CODE (op1) != PLUS
13130 || !rtx_equal_p (op0, XEXP (op1, 0)))
13131 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
13132
13133 if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
13134 {
13135 emit_insn (gen_rtx_SET (op0, op1));
13136 new_addr = reg;
13137 }
13138 break;
13139
13140 /* Do we need to simulate AND -16 to clear the bottom address bits used
13141 in VMX load/stores? */
13142 case AND:
13143 op0 = XEXP (addr, 0);
13144 op1 = XEXP (addr, 1);
13145 if ((addr_mask & RELOAD_REG_AND_M16) == 0)
13146 {
13147 if (REG_P (op0) || SUBREG_P (op0))
13148 op_reg = op0;
13149
13150 else if (GET_CODE (op1) == PLUS)
13151 {
13152 emit_insn (gen_rtx_SET (scratch, op1));
13153 op_reg = scratch;
13154 }
13155
13156 else
13157 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
13158
13159 and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
13160 cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
13161 rv = gen_rtvec (2, gen_rtx_SET (scratch, and_op), cc_clobber);
13162 emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
13163 new_addr = scratch;
13164 }
13165 break;
13166
13167 /* If this is an indirect address, make sure it is a base register. */
13168 case REG:
13169 case SUBREG:
13170 if (!base_reg_operand (addr, GET_MODE (addr)))
13171 {
13172 emit_insn (gen_rtx_SET (scratch, addr));
13173 new_addr = scratch;
13174 }
13175 break;
13176
13177 /* If this is an indexed address, make sure the register class can handle
13178 indexed addresses for this mode. */
13179 case PLUS:
13180 op0 = XEXP (addr, 0);
13181 op1 = XEXP (addr, 1);
13182 if (!base_reg_operand (op0, Pmode))
13183 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
13184
13185 else if (int_reg_operand (op1, Pmode))
13186 {
13187 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
13188 {
13189 emit_insn (gen_rtx_SET (scratch, addr));
13190 new_addr = scratch;
13191 }
13192 }
13193
13194 else if (mode_supports_dq_form (mode) && CONST_INT_P (op1))
13195 {
13196 if (((addr_mask & RELOAD_REG_QUAD_OFFSET) == 0)
13197 || !quad_address_p (addr, mode, false))
13198 {
13199 emit_insn (gen_rtx_SET (scratch, addr));
13200 new_addr = scratch;
13201 }
13202 }
13203
13204 /* Make sure the register class can handle offset addresses. */
13205 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
13206 {
13207 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
13208 {
13209 emit_insn (gen_rtx_SET (scratch, addr));
13210 new_addr = scratch;
13211 }
13212 }
13213
13214 else
13215 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
13216
13217 break;
13218
13219 case LO_SUM:
13220 op0 = XEXP (addr, 0);
13221 op1 = XEXP (addr, 1);
13222 if (!base_reg_operand (op0, Pmode))
13223 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
13224
13225 else if (int_reg_operand (op1, Pmode))
13226 {
13227 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
13228 {
13229 emit_insn (gen_rtx_SET (scratch, addr));
13230 new_addr = scratch;
13231 }
13232 }
13233
13234 /* Quad offsets are restricted and can't handle normal addresses. */
13235 else if (mode_supports_dq_form (mode))
13236 {
13237 emit_insn (gen_rtx_SET (scratch, addr));
13238 new_addr = scratch;
13239 }
13240
13241 /* Make sure the register class can handle offset addresses. */
13242 else if (legitimate_lo_sum_address_p (mode, addr, false))
13243 {
13244 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
13245 {
13246 emit_insn (gen_rtx_SET (scratch, addr));
13247 new_addr = scratch;
13248 }
13249 }
13250
13251 else
13252 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
13253
13254 break;
13255
13256 case SYMBOL_REF:
13257 case CONST:
13258 case LABEL_REF:
13259 rs6000_emit_move (scratch, addr, Pmode);
13260 new_addr = scratch;
13261 break;
13262
13263 default:
13264 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
13265 }
13266
13267 /* Adjust the address if it changed. */
13268 if (addr != new_addr)
13269 {
13270 mem = replace_equiv_address_nv (mem, new_addr);
13271 if (TARGET_DEBUG_ADDR)
13272 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
13273 }
13274
13275 /* Now create the move. */
13276 if (store_p)
13277 emit_insn (gen_rtx_SET (mem, reg));
13278 else
13279 emit_insn (gen_rtx_SET (reg, mem));
13280
13281 return;
13282 }
13283
13284 /* Convert reloads involving 64-bit gprs and misaligned offset
13285 addressing, or multiple 32-bit gprs and offsets that are too large,
13286 to use indirect addressing. */
13287
13288 void
13289 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
13290 {
13291 int regno = true_regnum (reg);
13292 enum reg_class rclass;
13293 rtx addr;
13294 rtx scratch_or_premodify = scratch;
13295
13296 if (TARGET_DEBUG_ADDR)
13297 {
13298 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
13299 store_p ? "store" : "load");
13300 fprintf (stderr, "reg:\n");
13301 debug_rtx (reg);
13302 fprintf (stderr, "mem:\n");
13303 debug_rtx (mem);
13304 fprintf (stderr, "scratch:\n");
13305 debug_rtx (scratch);
13306 }
13307
13308 gcc_assert (regno >= 0 && HARD_REGISTER_NUM_P (regno));
13309 gcc_assert (MEM_P (mem));
13310 rclass = REGNO_REG_CLASS (regno);
13311 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
13312 addr = XEXP (mem, 0);
13313
13314 if (GET_CODE (addr) == PRE_MODIFY)
13315 {
13316 gcc_assert (REG_P (XEXP (addr, 0))
13317 && GET_CODE (XEXP (addr, 1)) == PLUS
13318 && XEXP (XEXP (addr, 1), 0) == XEXP (addr, 0));
13319 scratch_or_premodify = XEXP (addr, 0);
13320 addr = XEXP (addr, 1);
13321 }
13322 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
13323
13324 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
13325
13326 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
13327
13328 /* Now create the move. */
13329 if (store_p)
13330 emit_insn (gen_rtx_SET (mem, reg));
13331 else
13332 emit_insn (gen_rtx_SET (reg, mem));
13333
13334 return;
13335 }
13336
13337 /* Given an rtx X being reloaded into a reg required to be
13338 in class CLASS, return the class of reg to actually use.
13339 In general this is just CLASS; but on some machines
13340 in some cases it is preferable to use a more restrictive class.
13341
13342 On the RS/6000, we have to return NO_REGS when we want to reload a
13343 floating-point CONST_DOUBLE to force it to be copied to memory.
13344
13345 We also don't want to reload integer values into floating-point
13346 registers if we can at all help it. In fact, this can
13347 cause reload to die, if it tries to generate a reload of CTR
13348 into a FP register and discovers it doesn't have the memory location
13349 required.
13350
13351 ??? Would it be a good idea to have reload do the converse, that is
13352 try to reload floating modes into FP registers if possible?
13353 */
13354
13355 static enum reg_class
13356 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
13357 {
13358 machine_mode mode = GET_MODE (x);
13359 bool is_constant = CONSTANT_P (x);
13360
13361 /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred
13362 reload class for it. */
13363 if ((rclass == ALTIVEC_REGS || rclass == VSX_REGS)
13364 && (reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID) == 0)
13365 return NO_REGS;
13366
13367 if ((rclass == FLOAT_REGS || rclass == VSX_REGS)
13368 && (reg_addr[mode].addr_mask[RELOAD_REG_FPR] & RELOAD_REG_VALID) == 0)
13369 return NO_REGS;
13370
13371 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
13372 the reloading of address expressions using PLUS into floating point
13373 registers. */
13374 if (TARGET_VSX && VSX_REG_CLASS_P (rclass) && GET_CODE (x) != PLUS)
13375 {
13376 if (is_constant)
13377 {
13378 /* Zero is always allowed in all VSX registers. */
13379 if (x == CONST0_RTX (mode))
13380 return rclass;
13381
13382 /* If this is a vector constant that can be formed with a few Altivec
13383 instructions, we want altivec registers. */
13384 if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
13385 return ALTIVEC_REGS;
13386
13387 /* If this is an integer constant that can easily be loaded into
13388 vector registers, allow it. */
13389 if (CONST_INT_P (x))
13390 {
13391 HOST_WIDE_INT value = INTVAL (x);
13392
13393 /* ISA 2.07 can generate -1 in all registers with XXLORC. ISA
13394 2.06 can generate it in the Altivec registers with
13395 VSPLTI<x>. */
13396 if (value == -1)
13397 {
13398 if (TARGET_P8_VECTOR)
13399 return rclass;
13400 else if (rclass == ALTIVEC_REGS || rclass == VSX_REGS)
13401 return ALTIVEC_REGS;
13402 else
13403 return NO_REGS;
13404 }
13405
13406 /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and
13407 a sign extend in the Altivec registers. */
13408 if (IN_RANGE (value, -128, 127) && TARGET_P9_VECTOR
13409 && (rclass == ALTIVEC_REGS || rclass == VSX_REGS))
13410 return ALTIVEC_REGS;
13411 }
13412
13413 /* Force constant to memory. */
13414 return NO_REGS;
13415 }
13416
13417 /* D-form addressing can easily reload the value. */
13418 if (mode_supports_vmx_dform (mode)
13419 || mode_supports_dq_form (mode))
13420 return rclass;
13421
13422 /* If this is a scalar floating point value and we don't have D-form
13423 addressing, prefer the traditional floating point registers so that we
13424 can use D-form (register+offset) addressing. */
13425 if (rclass == VSX_REGS
13426 && (mode == SFmode || GET_MODE_SIZE (mode) == 8))
13427 return FLOAT_REGS;
13428
13429 /* Prefer the Altivec registers if Altivec is handling the vector
13430 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
13431 loads. */
13432 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
13433 || mode == V1TImode)
13434 return ALTIVEC_REGS;
13435
13436 return rclass;
13437 }
13438
13439 if (is_constant || GET_CODE (x) == PLUS)
13440 {
13441 if (reg_class_subset_p (GENERAL_REGS, rclass))
13442 return GENERAL_REGS;
13443 if (reg_class_subset_p (BASE_REGS, rclass))
13444 return BASE_REGS;
13445 return NO_REGS;
13446 }
13447
13448 /* For the vector pair and vector quad modes, prefer their natural register
13449 (VSX or FPR) rather than GPR registers. For other integer types, prefer
13450 the GPR registers. */
13451 if (rclass == GEN_OR_FLOAT_REGS)
13452 {
13453 if (mode == OOmode)
13454 return VSX_REGS;
13455
13456 if (mode == XOmode)
13457 return FLOAT_REGS;
13458
13459 if (GET_MODE_CLASS (mode) == MODE_INT)
13460 return GENERAL_REGS;
13461 }
13462
13463 return rclass;
13464 }
13465
13466 /* Debug version of rs6000_preferred_reload_class. */
13467 static enum reg_class
13468 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
13469 {
13470 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
13471
13472 fprintf (stderr,
13473 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
13474 "mode = %s, x:\n",
13475 reg_class_names[ret], reg_class_names[rclass],
13476 GET_MODE_NAME (GET_MODE (x)));
13477 debug_rtx (x);
13478
13479 return ret;
13480 }
13481
13482 /* If we are copying between FP or AltiVec registers and anything else, we need
13483 a memory location. The exception is when we are targeting ppc64 and the
13484 move to/from fpr to gpr instructions are available. Also, under VSX, you
13485 can copy vector registers from the FP register set to the Altivec register
13486 set and vice versa. */
13487
13488 static bool
13489 rs6000_secondary_memory_needed (machine_mode mode,
13490 reg_class_t from_class,
13491 reg_class_t to_class)
13492 {
13493 enum rs6000_reg_type from_type, to_type;
13494 bool altivec_p = ((from_class == ALTIVEC_REGS)
13495 || (to_class == ALTIVEC_REGS));
13496
13497 /* If a simple/direct move is available, we don't need secondary memory */
13498 from_type = reg_class_to_reg_type[(int)from_class];
13499 to_type = reg_class_to_reg_type[(int)to_class];
13500
13501 if (rs6000_secondary_reload_move (to_type, from_type, mode,
13502 (secondary_reload_info *)0, altivec_p))
13503 return false;
13504
13505 /* If we have a floating point or vector register class, we need to use
13506 memory to transfer the data. */
13507 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
13508 return true;
13509
13510 return false;
13511 }
13512
13513 /* Debug version of rs6000_secondary_memory_needed. */
13514 static bool
13515 rs6000_debug_secondary_memory_needed (machine_mode mode,
13516 reg_class_t from_class,
13517 reg_class_t to_class)
13518 {
13519 bool ret = rs6000_secondary_memory_needed (mode, from_class, to_class);
13520
13521 fprintf (stderr,
13522 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
13523 "to_class = %s, mode = %s\n",
13524 ret ? "true" : "false",
13525 reg_class_names[from_class],
13526 reg_class_names[to_class],
13527 GET_MODE_NAME (mode));
13528
13529 return ret;
13530 }
13531
13532 /* Return the register class of a scratch register needed to copy IN into
13533 or out of a register in RCLASS in MODE. If it can be done directly,
13534 NO_REGS is returned. */
13535
13536 static enum reg_class
13537 rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
13538 rtx in)
13539 {
13540 int regno;
13541
13542 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
13543 #if TARGET_MACHO
13544 && MACHOPIC_INDIRECT
13545 #endif
13546 ))
13547 {
13548 /* We cannot copy a symbolic operand directly into anything
13549 other than BASE_REGS for TARGET_ELF. So indicate that a
13550 register from BASE_REGS is needed as an intermediate
13551 register.
13552
13553 On Darwin, pic addresses require a load from memory, which
13554 needs a base register. */
13555 if (rclass != BASE_REGS
13556 && (SYMBOL_REF_P (in)
13557 || GET_CODE (in) == HIGH
13558 || GET_CODE (in) == LABEL_REF
13559 || GET_CODE (in) == CONST))
13560 return BASE_REGS;
13561 }
13562
13563 if (REG_P (in))
13564 {
13565 regno = REGNO (in);
13566 if (!HARD_REGISTER_NUM_P (regno))
13567 {
13568 regno = true_regnum (in);
13569 if (!HARD_REGISTER_NUM_P (regno))
13570 regno = -1;
13571 }
13572 }
13573 else if (SUBREG_P (in))
13574 {
13575 regno = true_regnum (in);
13576 if (!HARD_REGISTER_NUM_P (regno))
13577 regno = -1;
13578 }
13579 else
13580 regno = -1;
13581
13582 /* If we have VSX register moves, prefer moving scalar values between
13583 Altivec registers and GPR by going via an FPR (and then via memory)
13584 instead of reloading the secondary memory address for Altivec moves. */
13585 if (TARGET_VSX
13586 && GET_MODE_SIZE (mode) < 16
13587 && !mode_supports_vmx_dform (mode)
13588 && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
13589 && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
13590 || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
13591 && (regno >= 0 && INT_REGNO_P (regno)))))
13592 return FLOAT_REGS;
13593
13594 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
13595 into anything. */
13596 if (rclass == GENERAL_REGS || rclass == BASE_REGS
13597 || (regno >= 0 && INT_REGNO_P (regno)))
13598 return NO_REGS;
13599
13600 /* Constants, memory, and VSX registers can go into VSX registers (both the
13601 traditional floating point and the altivec registers). */
13602 if (rclass == VSX_REGS
13603 && (regno == -1 || VSX_REGNO_P (regno)))
13604 return NO_REGS;
13605
13606 /* Constants, memory, and FP registers can go into FP registers. */
13607 if ((regno == -1 || FP_REGNO_P (regno))
13608 && (rclass == FLOAT_REGS || rclass == GEN_OR_FLOAT_REGS))
13609 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
13610
13611 /* Memory, and AltiVec registers can go into AltiVec registers. */
13612 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
13613 && rclass == ALTIVEC_REGS)
13614 return NO_REGS;
13615
13616 /* We can copy among the CR registers. */
13617 if ((rclass == CR_REGS || rclass == CR0_REGS)
13618 && regno >= 0 && CR_REGNO_P (regno))
13619 return NO_REGS;
13620
13621 /* Otherwise, we need GENERAL_REGS. */
13622 return GENERAL_REGS;
13623 }
13624
13625 /* Debug version of rs6000_secondary_reload_class. */
13626 static enum reg_class
13627 rs6000_debug_secondary_reload_class (enum reg_class rclass,
13628 machine_mode mode, rtx in)
13629 {
13630 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
13631 fprintf (stderr,
13632 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
13633 "mode = %s, input rtx:\n",
13634 reg_class_names[ret], reg_class_names[rclass],
13635 GET_MODE_NAME (mode));
13636 debug_rtx (in);
13637
13638 return ret;
13639 }
13640
13641 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
13642
13643 static bool
13644 rs6000_can_change_mode_class (machine_mode from,
13645 machine_mode to,
13646 reg_class_t rclass)
13647 {
13648 unsigned from_size = GET_MODE_SIZE (from);
13649 unsigned to_size = GET_MODE_SIZE (to);
13650
13651 if (from_size != to_size)
13652 {
13653 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
13654
13655 if (reg_classes_intersect_p (xclass, rclass))
13656 {
13657 unsigned to_nregs = hard_regno_nregs (FIRST_FPR_REGNO, to);
13658 unsigned from_nregs = hard_regno_nregs (FIRST_FPR_REGNO, from);
13659 bool to_float128_vector_p = FLOAT128_VECTOR_P (to);
13660 bool from_float128_vector_p = FLOAT128_VECTOR_P (from);
13661
13662 /* Don't allow 64-bit types to overlap with 128-bit types that take a
13663 single register under VSX because the scalar part of the register
13664 is in the upper 64-bits, and not the lower 64-bits. Types like
13665 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
13666 IEEE floating point can't overlap, and neither can small
13667 values. */
13668
13669 if (to_float128_vector_p && from_float128_vector_p)
13670 return true;
13671
13672 else if (to_float128_vector_p || from_float128_vector_p)
13673 return false;
13674
13675 /* TDmode in floating-mode registers must always go into a register
13676 pair with the most significant word in the even-numbered register
13677 to match ISA requirements. In little-endian mode, this does not
13678 match subreg numbering, so we cannot allow subregs. */
13679 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
13680 return false;
13681
13682 /* Allow SD<->DD changes, since SDmode values are stored in
13683 the low half of the DDmode, just like target-independent
13684 code expects. We need to allow at least SD->DD since
13685 rs6000_secondary_memory_needed_mode asks for that change
13686 to be made for SD reloads. */
13687 if ((to == DDmode && from == SDmode)
13688 || (to == SDmode && from == DDmode))
13689 return true;
13690
13691 if (from_size < 8 || to_size < 8)
13692 return false;
13693
13694 if (from_size == 8 && (8 * to_nregs) != to_size)
13695 return false;
13696
13697 if (to_size == 8 && (8 * from_nregs) != from_size)
13698 return false;
13699
13700 return true;
13701 }
13702 else
13703 return true;
13704 }
13705
13706 /* Since the VSX register set includes traditional floating point registers
13707 and altivec registers, just check for the size being different instead of
13708 trying to check whether the modes are vector modes. Otherwise it won't
13709 allow say DF and DI to change classes. For types like TFmode and TDmode
13710 that take 2 64-bit registers, rather than a single 128-bit register, don't
13711 allow subregs of those types to other 128 bit types. */
13712 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
13713 {
13714 unsigned num_regs = (from_size + 15) / 16;
13715 if (hard_regno_nregs (FIRST_FPR_REGNO, to) > num_regs
13716 || hard_regno_nregs (FIRST_FPR_REGNO, from) > num_regs)
13717 return false;
13718
13719 return (from_size == 8 || from_size == 16);
13720 }
13721
13722 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
13723 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
13724 return false;
13725
13726 return true;
13727 }
13728
13729 /* Debug version of rs6000_can_change_mode_class. */
13730 static bool
13731 rs6000_debug_can_change_mode_class (machine_mode from,
13732 machine_mode to,
13733 reg_class_t rclass)
13734 {
13735 bool ret = rs6000_can_change_mode_class (from, to, rclass);
13736
13737 fprintf (stderr,
13738 "rs6000_can_change_mode_class, return %s, from = %s, "
13739 "to = %s, rclass = %s\n",
13740 ret ? "true" : "false",
13741 GET_MODE_NAME (from), GET_MODE_NAME (to),
13742 reg_class_names[rclass]);
13743
13744 return ret;
13745 }
13746 \f
13747 /* Return a string to do a move operation of 128 bits of data. */
13748
13749 const char *
13750 rs6000_output_move_128bit (rtx operands[])
13751 {
13752 rtx dest = operands[0];
13753 rtx src = operands[1];
13754 machine_mode mode = GET_MODE (dest);
13755 int dest_regno;
13756 int src_regno;
13757 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
13758 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
13759
13760 if (REG_P (dest))
13761 {
13762 dest_regno = REGNO (dest);
13763 dest_gpr_p = INT_REGNO_P (dest_regno);
13764 dest_fp_p = FP_REGNO_P (dest_regno);
13765 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
13766 dest_vsx_p = dest_fp_p | dest_vmx_p;
13767 }
13768 else
13769 {
13770 dest_regno = -1;
13771 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
13772 }
13773
13774 if (REG_P (src))
13775 {
13776 src_regno = REGNO (src);
13777 src_gpr_p = INT_REGNO_P (src_regno);
13778 src_fp_p = FP_REGNO_P (src_regno);
13779 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
13780 src_vsx_p = src_fp_p | src_vmx_p;
13781 }
13782 else
13783 {
13784 src_regno = -1;
13785 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
13786 }
13787
13788 /* Register moves. */
13789 if (dest_regno >= 0 && src_regno >= 0)
13790 {
13791 if (dest_gpr_p)
13792 {
13793 if (src_gpr_p)
13794 return "#";
13795
13796 if (TARGET_DIRECT_MOVE_128 && src_vsx_p)
13797 return (WORDS_BIG_ENDIAN
13798 ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
13799 : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
13800
13801 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
13802 return "#";
13803 }
13804
13805 else if (TARGET_VSX && dest_vsx_p)
13806 {
13807 if (src_vsx_p)
13808 return "xxlor %x0,%x1,%x1";
13809
13810 else if (TARGET_DIRECT_MOVE_128 && src_gpr_p)
13811 return (WORDS_BIG_ENDIAN
13812 ? "mtvsrdd %x0,%1,%L1"
13813 : "mtvsrdd %x0,%L1,%1");
13814
13815 else if (TARGET_DIRECT_MOVE && src_gpr_p)
13816 return "#";
13817 }
13818
13819 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
13820 return "vor %0,%1,%1";
13821
13822 else if (dest_fp_p && src_fp_p)
13823 return "#";
13824 }
13825
13826 /* Loads. */
13827 else if (dest_regno >= 0 && MEM_P (src))
13828 {
13829 if (dest_gpr_p)
13830 {
13831 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
13832 return "lq %0,%1";
13833 else
13834 return "#";
13835 }
13836
13837 else if (TARGET_ALTIVEC && dest_vmx_p
13838 && altivec_indexed_or_indirect_operand (src, mode))
13839 return "lvx %0,%y1";
13840
13841 else if (TARGET_VSX && dest_vsx_p)
13842 {
13843 if (mode_supports_dq_form (mode)
13844 && quad_address_p (XEXP (src, 0), mode, true))
13845 return "lxv %x0,%1";
13846
13847 else if (TARGET_P9_VECTOR)
13848 return "lxvx %x0,%y1";
13849
13850 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
13851 return "lxvw4x %x0,%y1";
13852
13853 else
13854 return "lxvd2x %x0,%y1";
13855 }
13856
13857 else if (TARGET_ALTIVEC && dest_vmx_p)
13858 return "lvx %0,%y1";
13859
13860 else if (dest_fp_p)
13861 return "#";
13862 }
13863
13864 /* Stores. */
13865 else if (src_regno >= 0 && MEM_P (dest))
13866 {
13867 if (src_gpr_p)
13868 {
13869 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
13870 return "stq %1,%0";
13871 else
13872 return "#";
13873 }
13874
13875 else if (TARGET_ALTIVEC && src_vmx_p
13876 && altivec_indexed_or_indirect_operand (dest, mode))
13877 return "stvx %1,%y0";
13878
13879 else if (TARGET_VSX && src_vsx_p)
13880 {
13881 if (mode_supports_dq_form (mode)
13882 && quad_address_p (XEXP (dest, 0), mode, true))
13883 return "stxv %x1,%0";
13884
13885 else if (TARGET_P9_VECTOR)
13886 return "stxvx %x1,%y0";
13887
13888 else if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
13889 return "stxvw4x %x1,%y0";
13890
13891 else
13892 return "stxvd2x %x1,%y0";
13893 }
13894
13895 else if (TARGET_ALTIVEC && src_vmx_p)
13896 return "stvx %1,%y0";
13897
13898 else if (src_fp_p)
13899 return "#";
13900 }
13901
13902 /* Constants. */
13903 else if (dest_regno >= 0
13904 && (CONST_INT_P (src)
13905 || CONST_WIDE_INT_P (src)
13906 || CONST_DOUBLE_P (src)
13907 || GET_CODE (src) == CONST_VECTOR))
13908 {
13909 if (dest_gpr_p)
13910 return "#";
13911
13912 else if ((dest_vmx_p && TARGET_ALTIVEC)
13913 || (dest_vsx_p && TARGET_VSX))
13914 return output_vec_const_move (operands);
13915 }
13916
13917 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src));
13918 }
13919
13920 /* Validate a 128-bit move. */
13921 bool
13922 rs6000_move_128bit_ok_p (rtx operands[])
13923 {
13924 machine_mode mode = GET_MODE (operands[0]);
13925 return (gpc_reg_operand (operands[0], mode)
13926 || gpc_reg_operand (operands[1], mode));
13927 }
13928
13929 /* Return true if a 128-bit move needs to be split. */
13930 bool
13931 rs6000_split_128bit_ok_p (rtx operands[])
13932 {
13933 if (!reload_completed)
13934 return false;
13935
13936 if (!gpr_or_gpr_p (operands[0], operands[1]))
13937 return false;
13938
13939 if (quad_load_store_p (operands[0], operands[1]))
13940 return false;
13941
13942 return true;
13943 }
13944
13945 \f
13946 /* Given a comparison operation, return the bit number in CCR to test. We
13947 know this is a valid comparison.
13948
13949 SCC_P is 1 if this is for an scc. That means that %D will have been
13950 used instead of %C, so the bits will be in different places.
13951
13952 Return -1 if OP isn't a valid comparison for some reason. */
13953
13954 int
13955 ccr_bit (rtx op, int scc_p)
13956 {
13957 enum rtx_code code = GET_CODE (op);
13958 machine_mode cc_mode;
13959 int cc_regnum;
13960 int base_bit;
13961 rtx reg;
13962
13963 if (!COMPARISON_P (op))
13964 return -1;
13965
13966 reg = XEXP (op, 0);
13967
13968 if (!REG_P (reg) || !CR_REGNO_P (REGNO (reg)))
13969 return -1;
13970
13971 cc_mode = GET_MODE (reg);
13972 cc_regnum = REGNO (reg);
13973 base_bit = 4 * (cc_regnum - CR0_REGNO);
13974
13975 validate_condition_mode (code, cc_mode);
13976
13977 /* When generating a sCOND operation, only positive conditions are
13978 allowed. */
13979 if (scc_p)
13980 switch (code)
13981 {
13982 case EQ:
13983 case GT:
13984 case LT:
13985 case UNORDERED:
13986 case GTU:
13987 case LTU:
13988 break;
13989 default:
13990 return -1;
13991 }
13992
13993 switch (code)
13994 {
13995 case NE:
13996 return scc_p ? base_bit + 3 : base_bit + 2;
13997 case EQ:
13998 return base_bit + 2;
13999 case GT: case GTU: case UNLE:
14000 return base_bit + 1;
14001 case LT: case LTU: case UNGE:
14002 return base_bit;
14003 case ORDERED: case UNORDERED:
14004 return base_bit + 3;
14005
14006 case GE: case GEU:
14007 /* If scc, we will have done a cror to put the bit in the
14008 unordered position. So test that bit. For integer, this is ! LT
14009 unless this is an scc insn. */
14010 return scc_p ? base_bit + 3 : base_bit;
14011
14012 case LE: case LEU:
14013 return scc_p ? base_bit + 3 : base_bit + 1;
14014
14015 default:
14016 return -1;
14017 }
14018 }
14019 \f
14020 /* Return the GOT register. */
14021
14022 rtx
14023 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
14024 {
14025 /* The second flow pass currently (June 1999) can't update
14026 regs_ever_live without disturbing other parts of the compiler, so
14027 update it here to make the prolog/epilogue code happy. */
14028 if (!can_create_pseudo_p ()
14029 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
14030 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
14031
14032 crtl->uses_pic_offset_table = 1;
14033
14034 return pic_offset_table_rtx;
14035 }
14036 \f
14037 #define INT_P(X) (CONST_INT_P (X) && GET_MODE (X) == VOIDmode)
14038
14039 /* Write out a function code label. */
14040
14041 void
14042 rs6000_output_function_entry (FILE *file, const char *fname)
14043 {
14044 if (fname[0] != '.')
14045 {
14046 switch (DEFAULT_ABI)
14047 {
14048 default:
14049 gcc_unreachable ();
14050
14051 case ABI_AIX:
14052 if (DOT_SYMBOLS)
14053 putc ('.', file);
14054 else
14055 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
14056 break;
14057
14058 case ABI_ELFv2:
14059 case ABI_V4:
14060 case ABI_DARWIN:
14061 break;
14062 }
14063 }
14064
14065 RS6000_OUTPUT_BASENAME (file, fname);
14066 }
14067
14068 /* Print an operand. Recognize special options, documented below. */
14069
14070 #if TARGET_ELF
14071 /* Access to .sdata2 through r2 (see -msdata=eabi in invoke.texi) is
14072 only introduced by the linker, when applying the sda21
14073 relocation. */
14074 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
14075 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
14076 #else
14077 #define SMALL_DATA_RELOC "sda21"
14078 #define SMALL_DATA_REG 0
14079 #endif
14080
14081 void
14082 print_operand (FILE *file, rtx x, int code)
14083 {
14084 int i;
14085 unsigned HOST_WIDE_INT uval;
14086
14087 switch (code)
14088 {
14089 /* %a is output_address. */
14090
14091 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
14092 output_operand. */
14093
14094 case 'A':
14095 /* Write the MMA accumulator number associated with VSX register X. */
14096 if (!REG_P (x) || !FP_REGNO_P (REGNO (x)) || (REGNO (x) % 4) != 0)
14097 output_operand_lossage ("invalid %%A value");
14098 else
14099 fprintf (file, "%d", (REGNO (x) - FIRST_FPR_REGNO) / 4);
14100 return;
14101
14102 case 'D':
14103 /* Like 'J' but get to the GT bit only. */
14104 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
14105 {
14106 output_operand_lossage ("invalid %%D value");
14107 return;
14108 }
14109
14110 /* Bit 1 is GT bit. */
14111 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
14112
14113 /* Add one for shift count in rlinm for scc. */
14114 fprintf (file, "%d", i + 1);
14115 return;
14116
14117 case 'e':
14118 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
14119 if (! INT_P (x))
14120 {
14121 output_operand_lossage ("invalid %%e value");
14122 return;
14123 }
14124
14125 uval = INTVAL (x);
14126 if ((uval & 0xffff) == 0 && uval != 0)
14127 putc ('s', file);
14128 return;
14129
14130 case 'E':
14131 /* X is a CR register. Print the number of the EQ bit of the CR */
14132 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
14133 output_operand_lossage ("invalid %%E value");
14134 else
14135 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
14136 return;
14137
14138 case 'f':
14139 /* X is a CR register. Print the shift count needed to move it
14140 to the high-order four bits. */
14141 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
14142 output_operand_lossage ("invalid %%f value");
14143 else
14144 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
14145 return;
14146
14147 case 'F':
14148 /* Similar, but print the count for the rotate in the opposite
14149 direction. */
14150 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
14151 output_operand_lossage ("invalid %%F value");
14152 else
14153 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
14154 return;
14155
14156 case 'G':
14157 /* X is a constant integer. If it is negative, print "m",
14158 otherwise print "z". This is to make an aze or ame insn. */
14159 if (!CONST_INT_P (x))
14160 output_operand_lossage ("invalid %%G value");
14161 else if (INTVAL (x) >= 0)
14162 putc ('z', file);
14163 else
14164 putc ('m', file);
14165 return;
14166
14167 case 'h':
14168 /* If constant, output low-order five bits. Otherwise, write
14169 normally. */
14170 if (INT_P (x))
14171 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
14172 else
14173 print_operand (file, x, 0);
14174 return;
14175
14176 case 'H':
14177 /* If constant, output low-order six bits. Otherwise, write
14178 normally. */
14179 if (INT_P (x))
14180 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
14181 else
14182 print_operand (file, x, 0);
14183 return;
14184
14185 case 'I':
14186 /* Print `i' if this is a constant, else nothing. */
14187 if (INT_P (x))
14188 putc ('i', file);
14189 return;
14190
14191 case 'j':
14192 /* Write the bit number in CCR for jump. */
14193 i = ccr_bit (x, 0);
14194 if (i == -1)
14195 output_operand_lossage ("invalid %%j code");
14196 else
14197 fprintf (file, "%d", i);
14198 return;
14199
14200 case 'J':
14201 /* Similar, but add one for shift count in rlinm for scc and pass
14202 scc flag to `ccr_bit'. */
14203 i = ccr_bit (x, 1);
14204 if (i == -1)
14205 output_operand_lossage ("invalid %%J code");
14206 else
14207 /* If we want bit 31, write a shift count of zero, not 32. */
14208 fprintf (file, "%d", i == 31 ? 0 : i + 1);
14209 return;
14210
14211 case 'k':
14212 /* X must be a constant. Write the 1's complement of the
14213 constant. */
14214 if (! INT_P (x))
14215 output_operand_lossage ("invalid %%k value");
14216 else
14217 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
14218 return;
14219
14220 case 'K':
14221 /* X must be a symbolic constant on ELF. Write an
14222 expression suitable for an 'addi' that adds in the low 16
14223 bits of the MEM. */
14224 if (GET_CODE (x) == CONST)
14225 {
14226 if (GET_CODE (XEXP (x, 0)) != PLUS
14227 || (!SYMBOL_REF_P (XEXP (XEXP (x, 0), 0))
14228 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
14229 || !CONST_INT_P (XEXP (XEXP (x, 0), 1)))
14230 output_operand_lossage ("invalid %%K value");
14231 }
14232 print_operand_address (file, x);
14233 fputs ("@l", file);
14234 return;
14235
14236 /* %l is output_asm_label. */
14237
14238 case 'L':
14239 /* Write second word of DImode or DFmode reference. Works on register
14240 or non-indexed memory only. */
14241 if (REG_P (x))
14242 fputs (reg_names[REGNO (x) + 1], file);
14243 else if (MEM_P (x))
14244 {
14245 machine_mode mode = GET_MODE (x);
14246 /* Handle possible auto-increment. Since it is pre-increment and
14247 we have already done it, we can just use an offset of word. */
14248 if (GET_CODE (XEXP (x, 0)) == PRE_INC
14249 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
14250 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
14251 UNITS_PER_WORD));
14252 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
14253 output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
14254 UNITS_PER_WORD));
14255 else
14256 output_address (mode, XEXP (adjust_address_nv (x, SImode,
14257 UNITS_PER_WORD),
14258 0));
14259
14260 if (small_data_operand (x, GET_MODE (x)))
14261 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
14262 reg_names[SMALL_DATA_REG]);
14263 }
14264 return;
14265
14266 case 'N': /* Unused */
14267 /* Write the number of elements in the vector times 4. */
14268 if (GET_CODE (x) != PARALLEL)
14269 output_operand_lossage ("invalid %%N value");
14270 else
14271 fprintf (file, "%d", XVECLEN (x, 0) * 4);
14272 return;
14273
14274 case 'O': /* Unused */
14275 /* Similar, but subtract 1 first. */
14276 if (GET_CODE (x) != PARALLEL)
14277 output_operand_lossage ("invalid %%O value");
14278 else
14279 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
14280 return;
14281
14282 case 'p':
14283 /* X is a CONST_INT that is a power of two. Output the logarithm. */
14284 if (! INT_P (x)
14285 || INTVAL (x) < 0
14286 || (i = exact_log2 (INTVAL (x))) < 0)
14287 output_operand_lossage ("invalid %%p value");
14288 else
14289 fprintf (file, "%d", i);
14290 return;
14291
14292 case 'P':
14293 /* The operand must be an indirect memory reference. The result
14294 is the register name. */
14295 if (!MEM_P (x) || !REG_P (XEXP (x, 0))
14296 || REGNO (XEXP (x, 0)) >= 32)
14297 output_operand_lossage ("invalid %%P value");
14298 else
14299 fputs (reg_names[REGNO (XEXP (x, 0))], file);
14300 return;
14301
14302 case 'q':
14303 /* This outputs the logical code corresponding to a boolean
14304 expression. The expression may have one or both operands
14305 negated (if one, only the first one). For condition register
14306 logical operations, it will also treat the negated
14307 CR codes as NOTs, but not handle NOTs of them. */
14308 {
14309 const char *const *t = 0;
14310 const char *s;
14311 enum rtx_code code = GET_CODE (x);
14312 static const char * const tbl[3][3] = {
14313 { "and", "andc", "nor" },
14314 { "or", "orc", "nand" },
14315 { "xor", "eqv", "xor" } };
14316
14317 if (code == AND)
14318 t = tbl[0];
14319 else if (code == IOR)
14320 t = tbl[1];
14321 else if (code == XOR)
14322 t = tbl[2];
14323 else
14324 output_operand_lossage ("invalid %%q value");
14325
14326 if (GET_CODE (XEXP (x, 0)) != NOT)
14327 s = t[0];
14328 else
14329 {
14330 if (GET_CODE (XEXP (x, 1)) == NOT)
14331 s = t[2];
14332 else
14333 s = t[1];
14334 }
14335
14336 fputs (s, file);
14337 }
14338 return;
14339
14340 case 'Q':
14341 if (! TARGET_MFCRF)
14342 return;
14343 fputc (',', file);
14344 /* FALLTHRU */
14345
14346 case 'R':
14347 /* X is a CR register. Print the mask for `mtcrf'. */
14348 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
14349 output_operand_lossage ("invalid %%R value");
14350 else
14351 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
14352 return;
14353
14354 case 's':
14355 /* Low 5 bits of 32 - value */
14356 if (! INT_P (x))
14357 output_operand_lossage ("invalid %%s value");
14358 else
14359 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
14360 return;
14361
14362 case 't':
14363 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
14364 if (!REG_P (x) || !CR_REGNO_P (REGNO (x)))
14365 {
14366 output_operand_lossage ("invalid %%t value");
14367 return;
14368 }
14369
14370 /* Bit 3 is OV bit. */
14371 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
14372
14373 /* If we want bit 31, write a shift count of zero, not 32. */
14374 fprintf (file, "%d", i == 31 ? 0 : i + 1);
14375 return;
14376
14377 case 'T':
14378 /* Print the symbolic name of a branch target register. */
14379 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
14380 x = XVECEXP (x, 0, 0);
14381 if (!REG_P (x) || (REGNO (x) != LR_REGNO
14382 && REGNO (x) != CTR_REGNO))
14383 output_operand_lossage ("invalid %%T value");
14384 else if (REGNO (x) == LR_REGNO)
14385 fputs ("lr", file);
14386 else
14387 fputs ("ctr", file);
14388 return;
14389
14390 case 'u':
14391 /* High-order or low-order 16 bits of constant, whichever is non-zero,
14392 for use in unsigned operand. */
14393 if (! INT_P (x))
14394 {
14395 output_operand_lossage ("invalid %%u value");
14396 return;
14397 }
14398
14399 uval = INTVAL (x);
14400 if ((uval & 0xffff) == 0)
14401 uval >>= 16;
14402
14403 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
14404 return;
14405
14406 case 'v':
14407 /* High-order 16 bits of constant for use in signed operand. */
14408 if (! INT_P (x))
14409 output_operand_lossage ("invalid %%v value");
14410 else
14411 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
14412 (INTVAL (x) >> 16) & 0xffff);
14413 return;
14414
14415 case 'U':
14416 /* Print `u' if this has an auto-increment or auto-decrement. */
14417 if (MEM_P (x)
14418 && (GET_CODE (XEXP (x, 0)) == PRE_INC
14419 || GET_CODE (XEXP (x, 0)) == PRE_DEC
14420 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
14421 putc ('u', file);
14422 return;
14423
14424 case 'V':
14425 /* Print the trap code for this operand. */
14426 switch (GET_CODE (x))
14427 {
14428 case EQ:
14429 fputs ("eq", file); /* 4 */
14430 break;
14431 case NE:
14432 fputs ("ne", file); /* 24 */
14433 break;
14434 case LT:
14435 fputs ("lt", file); /* 16 */
14436 break;
14437 case LE:
14438 fputs ("le", file); /* 20 */
14439 break;
14440 case GT:
14441 fputs ("gt", file); /* 8 */
14442 break;
14443 case GE:
14444 fputs ("ge", file); /* 12 */
14445 break;
14446 case LTU:
14447 fputs ("llt", file); /* 2 */
14448 break;
14449 case LEU:
14450 fputs ("lle", file); /* 6 */
14451 break;
14452 case GTU:
14453 fputs ("lgt", file); /* 1 */
14454 break;
14455 case GEU:
14456 fputs ("lge", file); /* 5 */
14457 break;
14458 default:
14459 output_operand_lossage ("invalid %%V value");
14460 }
14461 break;
14462
14463 case 'w':
14464 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
14465 normally. */
14466 if (INT_P (x))
14467 fprintf (file, HOST_WIDE_INT_PRINT_DEC, sext_hwi (INTVAL (x), 16));
14468 else
14469 print_operand (file, x, 0);
14470 return;
14471
14472 case 'x':
14473 /* X is a FPR or Altivec register used in a VSX context. */
14474 if (!REG_P (x) || !VSX_REGNO_P (REGNO (x)))
14475 output_operand_lossage ("invalid %%x value");
14476 else
14477 {
14478 int reg = REGNO (x);
14479 int vsx_reg = (FP_REGNO_P (reg)
14480 ? reg - 32
14481 : reg - FIRST_ALTIVEC_REGNO + 32);
14482
14483 #ifdef TARGET_REGNAMES
14484 if (TARGET_REGNAMES)
14485 fprintf (file, "%%vs%d", vsx_reg);
14486 else
14487 #endif
14488 fprintf (file, "%d", vsx_reg);
14489 }
14490 return;
14491
14492 case 'X':
14493 if (MEM_P (x)
14494 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
14495 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
14496 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
14497 putc ('x', file);
14498 return;
14499
14500 case 'Y':
14501 /* Like 'L', for third word of TImode/PTImode */
14502 if (REG_P (x))
14503 fputs (reg_names[REGNO (x) + 2], file);
14504 else if (MEM_P (x))
14505 {
14506 machine_mode mode = GET_MODE (x);
14507 if (GET_CODE (XEXP (x, 0)) == PRE_INC
14508 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
14509 output_address (mode, plus_constant (Pmode,
14510 XEXP (XEXP (x, 0), 0), 8));
14511 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
14512 output_address (mode, plus_constant (Pmode,
14513 XEXP (XEXP (x, 0), 0), 8));
14514 else
14515 output_address (mode, XEXP (adjust_address_nv (x, SImode, 8), 0));
14516 if (small_data_operand (x, GET_MODE (x)))
14517 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
14518 reg_names[SMALL_DATA_REG]);
14519 }
14520 return;
14521
14522 case 'z':
14523 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
14524 x = XVECEXP (x, 0, 1);
14525 /* X is a SYMBOL_REF. Write out the name preceded by a
14526 period and without any trailing data in brackets. Used for function
14527 names. If we are configured for System V (or the embedded ABI) on
14528 the PowerPC, do not emit the period, since those systems do not use
14529 TOCs and the like. */
14530 if (!SYMBOL_REF_P (x))
14531 {
14532 output_operand_lossage ("invalid %%z value");
14533 return;
14534 }
14535
14536 /* For macho, check to see if we need a stub. */
14537 if (TARGET_MACHO)
14538 {
14539 const char *name = XSTR (x, 0);
14540 #if TARGET_MACHO
14541 if (darwin_symbol_stubs
14542 && MACHOPIC_INDIRECT
14543 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
14544 name = machopic_indirection_name (x, /*stub_p=*/true);
14545 #endif
14546 assemble_name (file, name);
14547 }
14548 else if (!DOT_SYMBOLS)
14549 assemble_name (file, XSTR (x, 0));
14550 else
14551 rs6000_output_function_entry (file, XSTR (x, 0));
14552 return;
14553
14554 case 'Z':
14555 /* Like 'L', for last word of TImode/PTImode. */
14556 if (REG_P (x))
14557 fputs (reg_names[REGNO (x) + 3], file);
14558 else if (MEM_P (x))
14559 {
14560 machine_mode mode = GET_MODE (x);
14561 if (GET_CODE (XEXP (x, 0)) == PRE_INC
14562 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
14563 output_address (mode, plus_constant (Pmode,
14564 XEXP (XEXP (x, 0), 0), 12));
14565 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
14566 output_address (mode, plus_constant (Pmode,
14567 XEXP (XEXP (x, 0), 0), 12));
14568 else
14569 output_address (mode, XEXP (adjust_address_nv (x, SImode, 12), 0));
14570 if (small_data_operand (x, GET_MODE (x)))
14571 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
14572 reg_names[SMALL_DATA_REG]);
14573 }
14574 return;
14575
14576 /* Print AltiVec memory operand. */
14577 case 'y':
14578 {
14579 rtx tmp;
14580
14581 gcc_assert (MEM_P (x));
14582
14583 tmp = XEXP (x, 0);
14584
14585 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (GET_MODE (x))
14586 && GET_CODE (tmp) == AND
14587 && CONST_INT_P (XEXP (tmp, 1))
14588 && INTVAL (XEXP (tmp, 1)) == -16)
14589 tmp = XEXP (tmp, 0);
14590 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
14591 && GET_CODE (tmp) == PRE_MODIFY)
14592 tmp = XEXP (tmp, 1);
14593 if (REG_P (tmp))
14594 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
14595 else
14596 {
14597 if (GET_CODE (tmp) != PLUS
14598 || !REG_P (XEXP (tmp, 0))
14599 || !REG_P (XEXP (tmp, 1)))
14600 {
14601 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
14602 break;
14603 }
14604
14605 if (REGNO (XEXP (tmp, 0)) == 0)
14606 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
14607 reg_names[ REGNO (XEXP (tmp, 0)) ]);
14608 else
14609 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
14610 reg_names[ REGNO (XEXP (tmp, 1)) ]);
14611 }
14612 break;
14613 }
14614
14615 case 0:
14616 if (REG_P (x))
14617 fprintf (file, "%s", reg_names[REGNO (x)]);
14618 else if (MEM_P (x))
14619 {
14620 /* We need to handle PRE_INC and PRE_DEC here, since we need to
14621 know the width from the mode. */
14622 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
14623 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
14624 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
14625 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
14626 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
14627 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
14628 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
14629 output_address (GET_MODE (x), XEXP (XEXP (x, 0), 1));
14630 else
14631 output_address (GET_MODE (x), XEXP (x, 0));
14632 }
14633 else if (toc_relative_expr_p (x, false,
14634 &tocrel_base_oac, &tocrel_offset_oac))
14635 /* This hack along with a corresponding hack in
14636 rs6000_output_addr_const_extra arranges to output addends
14637 where the assembler expects to find them. eg.
14638 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
14639 without this hack would be output as "x@toc+4". We
14640 want "x+4@toc". */
14641 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
14642 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
14643 output_addr_const (file, XVECEXP (x, 0, 0));
14644 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PLTSEQ)
14645 output_addr_const (file, XVECEXP (x, 0, 1));
14646 else
14647 output_addr_const (file, x);
14648 return;
14649
14650 case '&':
14651 if (const char *name = get_some_local_dynamic_name ())
14652 assemble_name (file, name);
14653 else
14654 output_operand_lossage ("'%%&' used without any "
14655 "local dynamic TLS references");
14656 return;
14657
14658 default:
14659 output_operand_lossage ("invalid %%xn code");
14660 }
14661 }
14662 \f
14663 /* Print the address of an operand. */
14664
14665 void
14666 print_operand_address (FILE *file, rtx x)
14667 {
14668 if (REG_P (x))
14669 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
14670
14671 /* Is it a PC-relative address? */
14672 else if (TARGET_PCREL && pcrel_local_or_external_address (x, VOIDmode))
14673 {
14674 HOST_WIDE_INT offset;
14675
14676 if (GET_CODE (x) == CONST)
14677 x = XEXP (x, 0);
14678
14679 if (GET_CODE (x) == PLUS)
14680 {
14681 offset = INTVAL (XEXP (x, 1));
14682 x = XEXP (x, 0);
14683 }
14684 else
14685 offset = 0;
14686
14687 output_addr_const (file, x);
14688
14689 if (offset)
14690 fprintf (file, "%+" PRId64, offset);
14691
14692 if (SYMBOL_REF_P (x) && !SYMBOL_REF_LOCAL_P (x))
14693 fprintf (file, "@got");
14694
14695 fprintf (file, "@pcrel");
14696 }
14697 else if (SYMBOL_REF_P (x) || GET_CODE (x) == CONST
14698 || GET_CODE (x) == LABEL_REF)
14699 {
14700 output_addr_const (file, x);
14701 if (small_data_operand (x, GET_MODE (x)))
14702 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
14703 reg_names[SMALL_DATA_REG]);
14704 else
14705 gcc_assert (!TARGET_TOC);
14706 }
14707 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
14708 && REG_P (XEXP (x, 1)))
14709 {
14710 if (REGNO (XEXP (x, 0)) == 0)
14711 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
14712 reg_names[ REGNO (XEXP (x, 0)) ]);
14713 else
14714 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
14715 reg_names[ REGNO (XEXP (x, 1)) ]);
14716 }
14717 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
14718 && CONST_INT_P (XEXP (x, 1)))
14719 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
14720 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
14721 #if TARGET_MACHO
14722 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
14723 && CONSTANT_P (XEXP (x, 1)))
14724 {
14725 fprintf (file, "lo16(");
14726 output_addr_const (file, XEXP (x, 1));
14727 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
14728 }
14729 #endif
14730 #if TARGET_ELF
14731 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
14732 && CONSTANT_P (XEXP (x, 1)))
14733 {
14734 output_addr_const (file, XEXP (x, 1));
14735 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
14736 }
14737 #endif
14738 else if (toc_relative_expr_p (x, false, &tocrel_base_oac, &tocrel_offset_oac))
14739 {
14740 /* This hack along with a corresponding hack in
14741 rs6000_output_addr_const_extra arranges to output addends
14742 where the assembler expects to find them. eg.
14743 (lo_sum (reg 9)
14744 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
14745 without this hack would be output as "x@toc+8@l(9)". We
14746 want "x+8@toc@l(9)". */
14747 output_addr_const (file, CONST_CAST_RTX (tocrel_base_oac));
14748 if (GET_CODE (x) == LO_SUM)
14749 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
14750 else
14751 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base_oac, 0, 1))]);
14752 }
14753 else
14754 output_addr_const (file, x);
14755 }
14756 \f
14757 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
14758
14759 bool
14760 rs6000_output_addr_const_extra (FILE *file, rtx x)
14761 {
14762 if (GET_CODE (x) == UNSPEC)
14763 switch (XINT (x, 1))
14764 {
14765 case UNSPEC_TOCREL:
14766 gcc_checking_assert (SYMBOL_REF_P (XVECEXP (x, 0, 0))
14767 && REG_P (XVECEXP (x, 0, 1))
14768 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
14769 output_addr_const (file, XVECEXP (x, 0, 0));
14770 if (x == tocrel_base_oac && tocrel_offset_oac != const0_rtx)
14771 {
14772 if (INTVAL (tocrel_offset_oac) >= 0)
14773 fprintf (file, "+");
14774 output_addr_const (file, CONST_CAST_RTX (tocrel_offset_oac));
14775 }
14776 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
14777 {
14778 putc ('-', file);
14779 assemble_name (file, toc_label_name);
14780 need_toc_init = 1;
14781 }
14782 else if (TARGET_ELF)
14783 fputs ("@toc", file);
14784 return true;
14785
14786 #if TARGET_MACHO
14787 case UNSPEC_MACHOPIC_OFFSET:
14788 output_addr_const (file, XVECEXP (x, 0, 0));
14789 putc ('-', file);
14790 machopic_output_function_base_name (file);
14791 return true;
14792 #endif
14793 }
14794 return false;
14795 }
14796 \f
14797 /* Target hook for assembling integer objects. The PowerPC version has
14798 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
14799 is defined. It also needs to handle DI-mode objects on 64-bit
14800 targets. */
14801
14802 static bool
14803 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
14804 {
14805 #ifdef RELOCATABLE_NEEDS_FIXUP
14806 /* Special handling for SI values. */
14807 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
14808 {
14809 static int recurse = 0;
14810
14811 /* For -mrelocatable, we mark all addresses that need to be fixed up in
14812 the .fixup section. Since the TOC section is already relocated, we
14813 don't need to mark it here. We used to skip the text section, but it
14814 should never be valid for relocated addresses to be placed in the text
14815 section. */
14816 if (DEFAULT_ABI == ABI_V4
14817 && (TARGET_RELOCATABLE || flag_pic > 1)
14818 && in_section != toc_section
14819 && !recurse
14820 && !CONST_SCALAR_INT_P (x)
14821 && CONSTANT_P (x))
14822 {
14823 char buf[256];
14824
14825 recurse = 1;
14826 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
14827 fixuplabelno++;
14828 ASM_OUTPUT_LABEL (asm_out_file, buf);
14829 fprintf (asm_out_file, "\t.long\t(");
14830 output_addr_const (asm_out_file, x);
14831 fprintf (asm_out_file, ")@fixup\n");
14832 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
14833 ASM_OUTPUT_ALIGN (asm_out_file, 2);
14834 fprintf (asm_out_file, "\t.long\t");
14835 assemble_name (asm_out_file, buf);
14836 fprintf (asm_out_file, "\n\t.previous\n");
14837 recurse = 0;
14838 return true;
14839 }
14840 /* Remove initial .'s to turn a -mcall-aixdesc function
14841 address into the address of the descriptor, not the function
14842 itself. */
14843 else if (SYMBOL_REF_P (x)
14844 && XSTR (x, 0)[0] == '.'
14845 && DEFAULT_ABI == ABI_AIX)
14846 {
14847 const char *name = XSTR (x, 0);
14848 while (*name == '.')
14849 name++;
14850
14851 fprintf (asm_out_file, "\t.long\t%s\n", name);
14852 return true;
14853 }
14854 }
14855 #endif /* RELOCATABLE_NEEDS_FIXUP */
14856 return default_assemble_integer (x, size, aligned_p);
14857 }
14858
14859 /* Return a template string for assembly to emit when making an
14860 external call. FUNOP is the call mem argument operand number. */
14861
14862 static const char *
14863 rs6000_call_template_1 (rtx *operands, unsigned int funop, bool sibcall)
14864 {
14865 /* -Wformat-overflow workaround, without which gcc thinks that %u
14866 might produce 10 digits. */
14867 gcc_assert (funop <= MAX_RECOG_OPERANDS);
14868
14869 char arg[12];
14870 arg[0] = 0;
14871 if (GET_CODE (operands[funop + 1]) == UNSPEC)
14872 {
14873 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
14874 sprintf (arg, "(%%%u@tlsgd)", funop + 1);
14875 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
14876 sprintf (arg, "(%%&@tlsld)");
14877 }
14878
14879 /* The magic 32768 offset here corresponds to the offset of
14880 r30 in .got2, as given by LCTOC1. See sysv4.h:toc_section. */
14881 char z[11];
14882 sprintf (z, "%%z%u%s", funop,
14883 (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic == 2
14884 ? "+32768" : ""));
14885
14886 static char str[32]; /* 1 spare */
14887 if (rs6000_pcrel_p ())
14888 sprintf (str, "b%s %s@notoc%s", sibcall ? "" : "l", z, arg);
14889 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
14890 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
14891 sibcall ? "" : "\n\tnop");
14892 else if (DEFAULT_ABI == ABI_V4)
14893 sprintf (str, "b%s %s%s%s", sibcall ? "" : "l", z, arg,
14894 flag_pic ? "@plt" : "");
14895 #if TARGET_MACHO
14896 /* If/when we remove the mlongcall opt, we can share the AIX/ELGv2 case. */
14897 else if (DEFAULT_ABI == ABI_DARWIN)
14898 {
14899 /* The cookie is in operand func+2. */
14900 gcc_checking_assert (GET_CODE (operands[funop + 2]) == CONST_INT);
14901 int cookie = INTVAL (operands[funop + 2]);
14902 if (cookie & CALL_LONG)
14903 {
14904 tree funname = get_identifier (XSTR (operands[funop], 0));
14905 tree labelname = get_prev_label (funname);
14906 gcc_checking_assert (labelname && !sibcall);
14907
14908 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
14909 instruction will reach 'foo', otherwise link as 'bl L42'".
14910 "L42" should be a 'branch island', that will do a far jump to
14911 'foo'. Branch islands are generated in
14912 macho_branch_islands(). */
14913 sprintf (str, "jbsr %%z%u,%.10s", funop,
14914 IDENTIFIER_POINTER (labelname));
14915 }
14916 else
14917 /* Same as AIX or ELFv2, except to keep backwards compat, no nop
14918 after the call. */
14919 sprintf (str, "b%s %s%s", sibcall ? "" : "l", z, arg);
14920 }
14921 #endif
14922 else
14923 gcc_unreachable ();
14924 return str;
14925 }
14926
14927 const char *
14928 rs6000_call_template (rtx *operands, unsigned int funop)
14929 {
14930 return rs6000_call_template_1 (operands, funop, false);
14931 }
14932
14933 const char *
14934 rs6000_sibcall_template (rtx *operands, unsigned int funop)
14935 {
14936 return rs6000_call_template_1 (operands, funop, true);
14937 }
14938
14939 /* As above, for indirect calls. */
14940
14941 static const char *
14942 rs6000_indirect_call_template_1 (rtx *operands, unsigned int funop,
14943 bool sibcall)
14944 {
14945 /* -Wformat-overflow workaround, without which gcc thinks that %u
14946 might produce 10 digits. Note that -Wformat-overflow will not
14947 currently warn here for str[], so do not rely on a warning to
14948 ensure str[] is correctly sized. */
14949 gcc_assert (funop <= MAX_RECOG_OPERANDS);
14950
14951 /* Currently, funop is either 0 or 1. The maximum string is always
14952 a !speculate 64-bit __tls_get_addr call.
14953
14954 ABI_ELFv2, pcrel:
14955 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14956 . 35 .reloc .,R_PPC64_PLTSEQ_NOTOC,%z1\n\t
14957 . 9 crset 2\n\t
14958 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14959 . 36 .reloc .,R_PPC64_PLTCALL_NOTOC,%z1\n\t
14960 . 8 beq%T1l-
14961 .---
14962 .142
14963
14964 ABI_AIX:
14965 . 9 ld 2,%3\n\t
14966 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14967 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
14968 . 9 crset 2\n\t
14969 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14970 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
14971 . 10 beq%T1l-\n\t
14972 . 10 ld 2,%4(1)
14973 .---
14974 .151
14975
14976 ABI_ELFv2:
14977 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14978 . 29 .reloc .,R_PPC64_PLTSEQ,%z1\n\t
14979 . 9 crset 2\n\t
14980 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14981 . 30 .reloc .,R_PPC64_PLTCALL,%z1\n\t
14982 . 10 beq%T1l-\n\t
14983 . 10 ld 2,%3(1)
14984 .---
14985 .142
14986
14987 ABI_V4:
14988 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14989 . 35 .reloc .,R_PPC64_PLTSEQ,%z1+32768\n\t
14990 . 9 crset 2\n\t
14991 . 27 .reloc .,R_PPC64_TLSGD,%2\n\t
14992 . 36 .reloc .,R_PPC64_PLTCALL,%z1+32768\n\t
14993 . 8 beq%T1l-
14994 .---
14995 .141 */
14996 static char str[160]; /* 8 spare */
14997 char *s = str;
14998 const char *ptrload = TARGET_64BIT ? "d" : "wz";
14999
15000 if (DEFAULT_ABI == ABI_AIX)
15001 s += sprintf (s,
15002 "l%s 2,%%%u\n\t",
15003 ptrload, funop + 3);
15004
15005 /* We don't need the extra code to stop indirect call speculation if
15006 calling via LR. */
15007 bool speculate = (TARGET_MACHO
15008 || rs6000_speculate_indirect_jumps
15009 || (REG_P (operands[funop])
15010 && REGNO (operands[funop]) == LR_REGNO));
15011
15012 if (TARGET_PLTSEQ && GET_CODE (operands[funop]) == UNSPEC)
15013 {
15014 const char *rel64 = TARGET_64BIT ? "64" : "";
15015 char tls[29];
15016 tls[0] = 0;
15017 if (GET_CODE (operands[funop + 1]) == UNSPEC)
15018 {
15019 if (XINT (operands[funop + 1], 1) == UNSPEC_TLSGD)
15020 sprintf (tls, ".reloc .,R_PPC%s_TLSGD,%%%u\n\t",
15021 rel64, funop + 1);
15022 else if (XINT (operands[funop + 1], 1) == UNSPEC_TLSLD)
15023 sprintf (tls, ".reloc .,R_PPC%s_TLSLD,%%&\n\t",
15024 rel64);
15025 }
15026
15027 const char *notoc = rs6000_pcrel_p () ? "_NOTOC" : "";
15028 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
15029 && flag_pic == 2 ? "+32768" : "");
15030 if (!speculate)
15031 {
15032 s += sprintf (s,
15033 "%s.reloc .,R_PPC%s_PLTSEQ%s,%%z%u%s\n\t",
15034 tls, rel64, notoc, funop, addend);
15035 s += sprintf (s, "crset 2\n\t");
15036 }
15037 s += sprintf (s,
15038 "%s.reloc .,R_PPC%s_PLTCALL%s,%%z%u%s\n\t",
15039 tls, rel64, notoc, funop, addend);
15040 }
15041 else if (!speculate)
15042 s += sprintf (s, "crset 2\n\t");
15043
15044 if (rs6000_pcrel_p ())
15045 {
15046 if (speculate)
15047 sprintf (s, "b%%T%ul", funop);
15048 else
15049 sprintf (s, "beq%%T%ul-", funop);
15050 }
15051 else if (DEFAULT_ABI == ABI_AIX)
15052 {
15053 if (speculate)
15054 sprintf (s,
15055 "b%%T%ul\n\t"
15056 "l%s 2,%%%u(1)",
15057 funop, ptrload, funop + 4);
15058 else
15059 sprintf (s,
15060 "beq%%T%ul-\n\t"
15061 "l%s 2,%%%u(1)",
15062 funop, ptrload, funop + 4);
15063 }
15064 else if (DEFAULT_ABI == ABI_ELFv2)
15065 {
15066 if (speculate)
15067 sprintf (s,
15068 "b%%T%ul\n\t"
15069 "l%s 2,%%%u(1)",
15070 funop, ptrload, funop + 3);
15071 else
15072 sprintf (s,
15073 "beq%%T%ul-\n\t"
15074 "l%s 2,%%%u(1)",
15075 funop, ptrload, funop + 3);
15076 }
15077 else
15078 {
15079 if (speculate)
15080 sprintf (s,
15081 "b%%T%u%s",
15082 funop, sibcall ? "" : "l");
15083 else
15084 sprintf (s,
15085 "beq%%T%u%s-%s",
15086 funop, sibcall ? "" : "l", sibcall ? "\n\tb $" : "");
15087 }
15088 return str;
15089 }
15090
15091 const char *
15092 rs6000_indirect_call_template (rtx *operands, unsigned int funop)
15093 {
15094 return rs6000_indirect_call_template_1 (operands, funop, false);
15095 }
15096
15097 const char *
15098 rs6000_indirect_sibcall_template (rtx *operands, unsigned int funop)
15099 {
15100 return rs6000_indirect_call_template_1 (operands, funop, true);
15101 }
15102
15103 #if HAVE_AS_PLTSEQ
15104 /* Output indirect call insns. WHICH identifies the type of sequence. */
15105 const char *
15106 rs6000_pltseq_template (rtx *operands, int which)
15107 {
15108 const char *rel64 = TARGET_64BIT ? "64" : "";
15109 char tls[30];
15110 tls[0] = 0;
15111 if (GET_CODE (operands[3]) == UNSPEC)
15112 {
15113 char off = which == RS6000_PLTSEQ_PLT_PCREL34 ? '8' : '4';
15114 if (XINT (operands[3], 1) == UNSPEC_TLSGD)
15115 sprintf (tls, ".reloc .-%c,R_PPC%s_TLSGD,%%3\n\t",
15116 off, rel64);
15117 else if (XINT (operands[3], 1) == UNSPEC_TLSLD)
15118 sprintf (tls, ".reloc .-%c,R_PPC%s_TLSLD,%%&\n\t",
15119 off, rel64);
15120 }
15121
15122 gcc_assert (DEFAULT_ABI == ABI_ELFv2 || DEFAULT_ABI == ABI_V4);
15123 static char str[96]; /* 10 spare */
15124 char off = WORDS_BIG_ENDIAN ? '2' : '4';
15125 const char *addend = (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT
15126 && flag_pic == 2 ? "+32768" : "");
15127 switch (which)
15128 {
15129 case RS6000_PLTSEQ_TOCSAVE:
15130 sprintf (str,
15131 "st%s\n\t"
15132 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2",
15133 TARGET_64BIT ? "d 2,24(1)" : "w 2,12(1)",
15134 tls, rel64);
15135 break;
15136 case RS6000_PLTSEQ_PLT16_HA:
15137 if (DEFAULT_ABI == ABI_V4 && !flag_pic)
15138 sprintf (str,
15139 "lis %%0,0\n\t"
15140 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2",
15141 tls, off, rel64);
15142 else
15143 sprintf (str,
15144 "addis %%0,%%1,0\n\t"
15145 "%s.reloc .-%c,R_PPC%s_PLT16_HA,%%z2%s",
15146 tls, off, rel64, addend);
15147 break;
15148 case RS6000_PLTSEQ_PLT16_LO:
15149 sprintf (str,
15150 "l%s %%0,0(%%1)\n\t"
15151 "%s.reloc .-%c,R_PPC%s_PLT16_LO%s,%%z2%s",
15152 TARGET_64BIT ? "d" : "wz",
15153 tls, off, rel64, TARGET_64BIT ? "_DS" : "", addend);
15154 break;
15155 case RS6000_PLTSEQ_MTCTR:
15156 sprintf (str,
15157 "mtctr %%1\n\t"
15158 "%s.reloc .-4,R_PPC%s_PLTSEQ,%%z2%s",
15159 tls, rel64, addend);
15160 break;
15161 case RS6000_PLTSEQ_PLT_PCREL34:
15162 sprintf (str,
15163 "pl%s %%0,0(0),1\n\t"
15164 "%s.reloc .-8,R_PPC%s_PLT_PCREL34_NOTOC,%%z2",
15165 TARGET_64BIT ? "d" : "wz",
15166 tls, rel64);
15167 break;
15168 default:
15169 gcc_unreachable ();
15170 }
15171 return str;
15172 }
15173 #endif
15174 \f
15175 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
15176 /* Emit an assembler directive to set symbol visibility for DECL to
15177 VISIBILITY_TYPE. */
15178
15179 static void
15180 rs6000_assemble_visibility (tree decl, int vis)
15181 {
15182 if (TARGET_XCOFF)
15183 return;
15184
15185 /* Functions need to have their entry point symbol visibility set as
15186 well as their descriptor symbol visibility. */
15187 if (DEFAULT_ABI == ABI_AIX
15188 && DOT_SYMBOLS
15189 && TREE_CODE (decl) == FUNCTION_DECL)
15190 {
15191 static const char * const visibility_types[] = {
15192 NULL, "protected", "hidden", "internal"
15193 };
15194
15195 const char *name, *type;
15196
15197 name = ((* targetm.strip_name_encoding)
15198 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
15199 type = visibility_types[vis];
15200
15201 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
15202 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
15203 }
15204 else
15205 default_assemble_visibility (decl, vis);
15206 }
15207 #endif
15208 \f
15209 /* Write PATCH_AREA_SIZE NOPs into the asm outfile FILE around a function
15210 entry. If RECORD_P is true and the target supports named sections,
15211 the location of the NOPs will be recorded in a special object section
15212 called "__patchable_function_entries". This routine may be called
15213 twice per function to put NOPs before and after the function
15214 entry. */
15215
15216 void
15217 rs6000_print_patchable_function_entry (FILE *file,
15218 unsigned HOST_WIDE_INT patch_area_size,
15219 bool record_p)
15220 {
15221 bool global_entry_needed_p = rs6000_global_entry_point_prologue_needed_p ();
15222 /* For a function which needs global entry point, we will emit the
15223 patchable area before and after local entry point under the control of
15224 cfun->machine->global_entry_emitted, see the handling in function
15225 rs6000_output_function_prologue. */
15226 if (!global_entry_needed_p || cfun->machine->global_entry_emitted)
15227 default_print_patchable_function_entry (file, patch_area_size, record_p);
15228 }
15229 \f
15230 enum rtx_code
15231 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
15232 {
15233 /* Reversal of FP compares takes care -- an ordered compare
15234 becomes an unordered compare and vice versa. */
15235 if (mode == CCFPmode
15236 && (!flag_finite_math_only
15237 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
15238 || code == UNEQ || code == LTGT))
15239 return reverse_condition_maybe_unordered (code);
15240 else
15241 return reverse_condition (code);
15242 }
15243
15244 /* Check if C (as 64bit integer) can be rotated to a constant which constains
15245 nonzero bits at the LOWBITS low bits only.
15246
15247 Return true if C can be rotated to such constant. If so, *ROT is written
15248 to the number by which C is rotated.
15249 Return false otherwise. */
15250
15251 bool
15252 can_be_rotated_to_lowbits (unsigned HOST_WIDE_INT c, int lowbits, int *rot)
15253 {
15254 int clz = HOST_BITS_PER_WIDE_INT - lowbits;
15255
15256 /* case a. 0..0xxx: already at least clz zeros. */
15257 int lz = clz_hwi (c);
15258 if (lz >= clz)
15259 {
15260 *rot = 0;
15261 return true;
15262 }
15263
15264 /* case b. 0..0xxx0..0: at least clz zeros. */
15265 int tz = ctz_hwi (c);
15266 if (lz + tz >= clz)
15267 {
15268 *rot = HOST_BITS_PER_WIDE_INT - tz;
15269 return true;
15270 }
15271
15272 /* case c. xx10.....0xx: rotate 'clz - 1' bits first, then check case b.
15273 ^bit -> Vbit, , then zeros are at head or tail.
15274 00...00xxx100, 'clz - 1' >= 'bits of xxxx'. */
15275 const int rot_bits = lowbits + 1;
15276 unsigned HOST_WIDE_INT rc = (c >> rot_bits) | (c << (clz - 1));
15277 tz = ctz_hwi (rc);
15278 if (clz_hwi (rc) + tz >= clz)
15279 {
15280 *rot = HOST_BITS_PER_WIDE_INT - (tz + rot_bits);
15281 return true;
15282 }
15283
15284 return false;
15285 }
15286
15287 /* Check if C (as 64bit integer) can be rotated to a positive 16bits constant
15288 which contains 48bits leading zeros and 16bits of any value. */
15289
15290 bool
15291 can_be_rotated_to_positive_16bits (HOST_WIDE_INT c)
15292 {
15293 int rot = 0;
15294 bool res = can_be_rotated_to_lowbits (c, 16, &rot);
15295 return res && rot > 0;
15296 }
15297
15298 /* Check if C (as 64bit integer) can be rotated to a negative 15bits constant
15299 which contains 49bits leading ones and 15bits of any value. */
15300
15301 bool
15302 can_be_rotated_to_negative_15bits (HOST_WIDE_INT c)
15303 {
15304 int rot = 0;
15305 bool res = can_be_rotated_to_lowbits (~c, 15, &rot);
15306 return res && rot > 0;
15307 }
15308
15309 /* Generate a compare for CODE. Return a brand-new rtx that
15310 represents the result of the compare. */
15311
15312 static rtx
15313 rs6000_generate_compare (rtx cmp, machine_mode mode)
15314 {
15315 machine_mode comp_mode;
15316 rtx compare_result;
15317 enum rtx_code code = GET_CODE (cmp);
15318 rtx op0 = XEXP (cmp, 0);
15319 rtx op1 = XEXP (cmp, 1);
15320
15321 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
15322 comp_mode = CCmode;
15323 else if (FLOAT_MODE_P (mode))
15324 comp_mode = CCFPmode;
15325 else if (code == GTU || code == LTU
15326 || code == GEU || code == LEU)
15327 comp_mode = CCUNSmode;
15328 else if ((code == EQ || code == NE)
15329 && unsigned_reg_p (op0)
15330 && (unsigned_reg_p (op1)
15331 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
15332 /* These are unsigned values, perhaps there will be a later
15333 ordering compare that can be shared with this one. */
15334 comp_mode = CCUNSmode;
15335 else
15336 comp_mode = CCmode;
15337
15338 /* If we have an unsigned compare, make sure we don't have a signed value as
15339 an immediate. */
15340 if (comp_mode == CCUNSmode && CONST_INT_P (op1)
15341 && INTVAL (op1) < 0)
15342 {
15343 op0 = copy_rtx_if_shared (op0);
15344 op1 = force_reg (GET_MODE (op0), op1);
15345 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
15346 }
15347
15348 /* First, the compare. */
15349 compare_result = gen_reg_rtx (comp_mode);
15350
15351 /* IEEE 128-bit support in VSX registers when we do not have hardware
15352 support. */
15353 if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode))
15354 {
15355 rtx libfunc = NULL_RTX;
15356 bool check_nan = false;
15357 rtx dest;
15358
15359 switch (code)
15360 {
15361 case EQ:
15362 case NE:
15363 libfunc = optab_libfunc (eq_optab, mode);
15364 break;
15365
15366 case GT:
15367 case GE:
15368 libfunc = optab_libfunc (ge_optab, mode);
15369 break;
15370
15371 case LT:
15372 case LE:
15373 libfunc = optab_libfunc (le_optab, mode);
15374 break;
15375
15376 case UNORDERED:
15377 case ORDERED:
15378 libfunc = optab_libfunc (unord_optab, mode);
15379 code = (code == UNORDERED) ? NE : EQ;
15380 break;
15381
15382 case UNGE:
15383 case UNGT:
15384 check_nan = true;
15385 libfunc = optab_libfunc (ge_optab, mode);
15386 code = (code == UNGE) ? GE : GT;
15387 break;
15388
15389 case UNLE:
15390 case UNLT:
15391 check_nan = true;
15392 libfunc = optab_libfunc (le_optab, mode);
15393 code = (code == UNLE) ? LE : LT;
15394 break;
15395
15396 case UNEQ:
15397 case LTGT:
15398 check_nan = true;
15399 libfunc = optab_libfunc (eq_optab, mode);
15400 code = (code = UNEQ) ? EQ : NE;
15401 break;
15402
15403 default:
15404 gcc_unreachable ();
15405 }
15406
15407 gcc_assert (libfunc);
15408
15409 if (!check_nan)
15410 dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
15411 SImode, op0, mode, op1, mode);
15412
15413 /* The library signals an exception for signalling NaNs, so we need to
15414 handle isgreater, etc. by first checking isordered. */
15415 else
15416 {
15417 rtx ne_rtx, normal_dest, unord_dest;
15418 rtx unord_func = optab_libfunc (unord_optab, mode);
15419 rtx join_label = gen_label_rtx ();
15420 rtx join_ref = gen_rtx_LABEL_REF (VOIDmode, join_label);
15421 rtx unord_cmp = gen_reg_rtx (comp_mode);
15422
15423
15424 /* Test for either value being a NaN. */
15425 gcc_assert (unord_func);
15426 unord_dest = emit_library_call_value (unord_func, NULL_RTX, LCT_CONST,
15427 SImode, op0, mode, op1, mode);
15428
15429 /* Set value (0) if either value is a NaN, and jump to the join
15430 label. */
15431 dest = gen_reg_rtx (SImode);
15432 emit_move_insn (dest, const1_rtx);
15433 emit_insn (gen_rtx_SET (unord_cmp,
15434 gen_rtx_COMPARE (comp_mode, unord_dest,
15435 const0_rtx)));
15436
15437 ne_rtx = gen_rtx_NE (comp_mode, unord_cmp, const0_rtx);
15438 emit_jump_insn (gen_rtx_SET (pc_rtx,
15439 gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx,
15440 join_ref,
15441 pc_rtx)));
15442
15443 /* Do the normal comparison, knowing that the values are not
15444 NaNs. */
15445 normal_dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
15446 SImode, op0, mode, op1, mode);
15447
15448 emit_insn (gen_cstoresi4 (dest,
15449 gen_rtx_fmt_ee (code, SImode, normal_dest,
15450 const0_rtx),
15451 normal_dest, const0_rtx));
15452
15453 /* Join NaN and non-Nan paths. Compare dest against 0. */
15454 emit_label (join_label);
15455 code = NE;
15456 }
15457
15458 emit_insn (gen_rtx_SET (compare_result,
15459 gen_rtx_COMPARE (comp_mode, dest, const0_rtx)));
15460 }
15461
15462 else
15463 {
15464 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
15465 CLOBBERs to match cmptf_internal2 pattern. */
15466 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
15467 && FLOAT128_IBM_P (GET_MODE (op0))
15468 && TARGET_HARD_FLOAT)
15469 emit_insn (gen_rtx_PARALLEL (VOIDmode,
15470 gen_rtvec (10,
15471 gen_rtx_SET (compare_result,
15472 gen_rtx_COMPARE (comp_mode, op0, op1)),
15473 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15474 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15475 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15476 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15477 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15478 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15479 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15480 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
15481 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
15482 else if (GET_CODE (op1) == UNSPEC
15483 && XINT (op1, 1) == UNSPEC_SP_TEST)
15484 {
15485 rtx op1b = XVECEXP (op1, 0, 0);
15486 comp_mode = CCEQmode;
15487 compare_result = gen_reg_rtx (CCEQmode);
15488 if (TARGET_64BIT)
15489 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
15490 else
15491 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
15492 }
15493 else if (mode == V16QImode)
15494 {
15495 gcc_assert (code == EQ || code == NE);
15496
15497 rtx result_vector = gen_reg_rtx (V16QImode);
15498 rtx cc_bit = gen_reg_rtx (SImode);
15499 emit_insn (gen_altivec_vcmpequb_p (result_vector, op0, op1));
15500 emit_insn (gen_cr6_test_for_lt (cc_bit));
15501 emit_insn (gen_rtx_SET (compare_result,
15502 gen_rtx_COMPARE (comp_mode, cc_bit,
15503 const1_rtx)));
15504 }
15505 else
15506 emit_insn (gen_rtx_SET (compare_result,
15507 gen_rtx_COMPARE (comp_mode, op0, op1)));
15508 }
15509
15510 validate_condition_mode (code, GET_MODE (compare_result));
15511
15512 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
15513 }
15514
15515 \f
15516 /* Return the diagnostic message string if the binary operation OP is
15517 not permitted on TYPE1 and TYPE2, NULL otherwise. */
15518
15519 static const char*
15520 rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED,
15521 const_tree type1,
15522 const_tree type2)
15523 {
15524 machine_mode mode1 = TYPE_MODE (type1);
15525 machine_mode mode2 = TYPE_MODE (type2);
15526
15527 /* For complex modes, use the inner type. */
15528 if (COMPLEX_MODE_P (mode1))
15529 mode1 = GET_MODE_INNER (mode1);
15530
15531 if (COMPLEX_MODE_P (mode2))
15532 mode2 = GET_MODE_INNER (mode2);
15533
15534 /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended
15535 double to intermix unless -mfloat128-convert. */
15536 if (mode1 == mode2)
15537 return NULL;
15538
15539 if (!TARGET_FLOAT128_CVT)
15540 {
15541 if ((FLOAT128_IEEE_P (mode1) && FLOAT128_IBM_P (mode2))
15542 || (FLOAT128_IBM_P (mode1) && FLOAT128_IEEE_P (mode2)))
15543 return N_("Invalid mixing of IEEE 128-bit and IBM 128-bit floating "
15544 "point types");
15545 }
15546
15547 return NULL;
15548 }
15549
15550 \f
15551 /* Expand floating point conversion to/from __float128 and __ibm128. */
15552
15553 void
15554 rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p)
15555 {
15556 machine_mode dest_mode = GET_MODE (dest);
15557 machine_mode src_mode = GET_MODE (src);
15558 convert_optab cvt = unknown_optab;
15559 bool do_move = false;
15560 rtx libfunc = NULL_RTX;
15561 rtx dest2;
15562 typedef rtx (*rtx_2func_t) (rtx, rtx);
15563 rtx_2func_t hw_convert = (rtx_2func_t)0;
15564 size_t kf_or_tf;
15565
15566 struct hw_conv_t {
15567 rtx_2func_t from_df;
15568 rtx_2func_t from_sf;
15569 rtx_2func_t from_si_sign;
15570 rtx_2func_t from_si_uns;
15571 rtx_2func_t from_di_sign;
15572 rtx_2func_t from_di_uns;
15573 rtx_2func_t to_df;
15574 rtx_2func_t to_sf;
15575 rtx_2func_t to_si_sign;
15576 rtx_2func_t to_si_uns;
15577 rtx_2func_t to_di_sign;
15578 rtx_2func_t to_di_uns;
15579 } hw_conversions[2] = {
15580 /* convertions to/from KFmode */
15581 {
15582 gen_extenddfkf2_hw, /* KFmode <- DFmode. */
15583 gen_extendsfkf2_hw, /* KFmode <- SFmode. */
15584 gen_float_kfsi2_hw, /* KFmode <- SImode (signed). */
15585 gen_floatuns_kfsi2_hw, /* KFmode <- SImode (unsigned). */
15586 gen_float_kfdi2_hw, /* KFmode <- DImode (signed). */
15587 gen_floatuns_kfdi2_hw, /* KFmode <- DImode (unsigned). */
15588 gen_trunckfdf2_hw, /* DFmode <- KFmode. */
15589 gen_trunckfsf2_hw, /* SFmode <- KFmode. */
15590 gen_fix_kfsi2_hw, /* SImode <- KFmode (signed). */
15591 gen_fixuns_kfsi2_hw, /* SImode <- KFmode (unsigned). */
15592 gen_fix_kfdi2_hw, /* DImode <- KFmode (signed). */
15593 gen_fixuns_kfdi2_hw, /* DImode <- KFmode (unsigned). */
15594 },
15595
15596 /* convertions to/from TFmode */
15597 {
15598 gen_extenddftf2_hw, /* TFmode <- DFmode. */
15599 gen_extendsftf2_hw, /* TFmode <- SFmode. */
15600 gen_float_tfsi2_hw, /* TFmode <- SImode (signed). */
15601 gen_floatuns_tfsi2_hw, /* TFmode <- SImode (unsigned). */
15602 gen_float_tfdi2_hw, /* TFmode <- DImode (signed). */
15603 gen_floatuns_tfdi2_hw, /* TFmode <- DImode (unsigned). */
15604 gen_trunctfdf2_hw, /* DFmode <- TFmode. */
15605 gen_trunctfsf2_hw, /* SFmode <- TFmode. */
15606 gen_fix_tfsi2_hw, /* SImode <- TFmode (signed). */
15607 gen_fixuns_tfsi2_hw, /* SImode <- TFmode (unsigned). */
15608 gen_fix_tfdi2_hw, /* DImode <- TFmode (signed). */
15609 gen_fixuns_tfdi2_hw, /* DImode <- TFmode (unsigned). */
15610 },
15611 };
15612
15613 if (dest_mode == src_mode)
15614 gcc_unreachable ();
15615
15616 /* Eliminate memory operations. */
15617 if (MEM_P (src))
15618 src = force_reg (src_mode, src);
15619
15620 if (MEM_P (dest))
15621 {
15622 rtx tmp = gen_reg_rtx (dest_mode);
15623 rs6000_expand_float128_convert (tmp, src, unsigned_p);
15624 rs6000_emit_move (dest, tmp, dest_mode);
15625 return;
15626 }
15627
15628 /* Convert to IEEE 128-bit floating point. */
15629 if (FLOAT128_IEEE_P (dest_mode))
15630 {
15631 if (dest_mode == KFmode)
15632 kf_or_tf = 0;
15633 else if (dest_mode == TFmode)
15634 kf_or_tf = 1;
15635 else
15636 gcc_unreachable ();
15637
15638 switch (src_mode)
15639 {
15640 case E_DFmode:
15641 cvt = sext_optab;
15642 hw_convert = hw_conversions[kf_or_tf].from_df;
15643 break;
15644
15645 case E_SFmode:
15646 cvt = sext_optab;
15647 hw_convert = hw_conversions[kf_or_tf].from_sf;
15648 break;
15649
15650 case E_KFmode:
15651 case E_IFmode:
15652 case E_TFmode:
15653 if (FLOAT128_IBM_P (src_mode))
15654 cvt = sext_optab;
15655 else
15656 do_move = true;
15657 break;
15658
15659 case E_SImode:
15660 if (unsigned_p)
15661 {
15662 cvt = ufloat_optab;
15663 hw_convert = hw_conversions[kf_or_tf].from_si_uns;
15664 }
15665 else
15666 {
15667 cvt = sfloat_optab;
15668 hw_convert = hw_conversions[kf_or_tf].from_si_sign;
15669 }
15670 break;
15671
15672 case E_DImode:
15673 if (unsigned_p)
15674 {
15675 cvt = ufloat_optab;
15676 hw_convert = hw_conversions[kf_or_tf].from_di_uns;
15677 }
15678 else
15679 {
15680 cvt = sfloat_optab;
15681 hw_convert = hw_conversions[kf_or_tf].from_di_sign;
15682 }
15683 break;
15684
15685 default:
15686 gcc_unreachable ();
15687 }
15688 }
15689
15690 /* Convert from IEEE 128-bit floating point. */
15691 else if (FLOAT128_IEEE_P (src_mode))
15692 {
15693 if (src_mode == KFmode)
15694 kf_or_tf = 0;
15695 else if (src_mode == TFmode)
15696 kf_or_tf = 1;
15697 else
15698 gcc_unreachable ();
15699
15700 switch (dest_mode)
15701 {
15702 case E_DFmode:
15703 cvt = trunc_optab;
15704 hw_convert = hw_conversions[kf_or_tf].to_df;
15705 break;
15706
15707 case E_SFmode:
15708 cvt = trunc_optab;
15709 hw_convert = hw_conversions[kf_or_tf].to_sf;
15710 break;
15711
15712 case E_KFmode:
15713 case E_IFmode:
15714 case E_TFmode:
15715 if (FLOAT128_IBM_P (dest_mode))
15716 cvt = trunc_optab;
15717 else
15718 do_move = true;
15719 break;
15720
15721 case E_SImode:
15722 if (unsigned_p)
15723 {
15724 cvt = ufix_optab;
15725 hw_convert = hw_conversions[kf_or_tf].to_si_uns;
15726 }
15727 else
15728 {
15729 cvt = sfix_optab;
15730 hw_convert = hw_conversions[kf_or_tf].to_si_sign;
15731 }
15732 break;
15733
15734 case E_DImode:
15735 if (unsigned_p)
15736 {
15737 cvt = ufix_optab;
15738 hw_convert = hw_conversions[kf_or_tf].to_di_uns;
15739 }
15740 else
15741 {
15742 cvt = sfix_optab;
15743 hw_convert = hw_conversions[kf_or_tf].to_di_sign;
15744 }
15745 break;
15746
15747 default:
15748 gcc_unreachable ();
15749 }
15750 }
15751
15752 /* Both IBM format. */
15753 else if (FLOAT128_IBM_P (dest_mode) && FLOAT128_IBM_P (src_mode))
15754 do_move = true;
15755
15756 else
15757 gcc_unreachable ();
15758
15759 /* Handle conversion between TFmode/KFmode/IFmode. */
15760 if (do_move)
15761 emit_insn (gen_rtx_SET (dest, gen_rtx_FLOAT_EXTEND (dest_mode, src)));
15762
15763 /* Handle conversion if we have hardware support. */
15764 else if (TARGET_FLOAT128_HW && hw_convert)
15765 emit_insn ((hw_convert) (dest, src));
15766
15767 /* Call an external function to do the conversion. */
15768 else if (cvt != unknown_optab)
15769 {
15770 libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode);
15771 gcc_assert (libfunc != NULL_RTX);
15772
15773 dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode,
15774 src, src_mode);
15775
15776 gcc_assert (dest2 != NULL_RTX);
15777 if (!rtx_equal_p (dest, dest2))
15778 emit_move_insn (dest, dest2);
15779 }
15780
15781 else
15782 gcc_unreachable ();
15783
15784 return;
15785 }
15786
15787 \f
15788 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
15789 can be used as that dest register. Return the dest register. */
15790
15791 rtx
15792 rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch)
15793 {
15794 if (op2 == const0_rtx)
15795 return op1;
15796
15797 if (GET_CODE (scratch) == SCRATCH)
15798 scratch = gen_reg_rtx (mode);
15799
15800 if (logical_operand (op2, mode))
15801 emit_insn (gen_rtx_SET (scratch, gen_rtx_XOR (mode, op1, op2)));
15802 else
15803 emit_insn (gen_rtx_SET (scratch,
15804 gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2))));
15805
15806 return scratch;
15807 }
15808
15809 /* Emit code doing a cror of two CR bits, for FP comparisons with a CODE that
15810 requires this. The result is mode MODE. */
15811 rtx
15812 rs6000_emit_fp_cror (rtx_code code, machine_mode mode, rtx x)
15813 {
15814 rtx cond[2];
15815 int n = 0;
15816 if (code == LTGT || code == LE || code == UNLT)
15817 cond[n++] = gen_rtx_fmt_ee (LT, mode, x, const0_rtx);
15818 if (code == LTGT || code == GE || code == UNGT)
15819 cond[n++] = gen_rtx_fmt_ee (GT, mode, x, const0_rtx);
15820 if (code == LE || code == GE || code == UNEQ)
15821 cond[n++] = gen_rtx_fmt_ee (EQ, mode, x, const0_rtx);
15822 if (code == UNLT || code == UNGT || code == UNEQ)
15823 cond[n++] = gen_rtx_fmt_ee (UNORDERED, mode, x, const0_rtx);
15824
15825 gcc_assert (n == 2);
15826
15827 rtx cc = gen_reg_rtx (CCEQmode);
15828 rtx logical = gen_rtx_IOR (mode, cond[0], cond[1]);
15829 emit_insn (gen_cceq_ior_compare (mode, cc, logical, cond[0], x, cond[1], x));
15830
15831 return cc;
15832 }
15833
15834 void
15835 rs6000_emit_sCOND (machine_mode mode, rtx operands[])
15836 {
15837 rtx condition_rtx = rs6000_generate_compare (operands[1], mode);
15838 rtx_code cond_code = GET_CODE (condition_rtx);
15839
15840 if (FLOAT_MODE_P (mode) && HONOR_NANS (mode)
15841 && !(FLOAT128_VECTOR_P (mode) && !TARGET_FLOAT128_HW))
15842 ;
15843 else if (cond_code == NE
15844 || cond_code == GE || cond_code == LE
15845 || cond_code == GEU || cond_code == LEU
15846 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
15847 {
15848 rtx not_result = gen_reg_rtx (CCEQmode);
15849 rtx not_op, rev_cond_rtx;
15850 machine_mode cc_mode;
15851
15852 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
15853
15854 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
15855 SImode, XEXP (condition_rtx, 0), const0_rtx);
15856 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
15857 emit_insn (gen_rtx_SET (not_result, not_op));
15858 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
15859 }
15860
15861 machine_mode op_mode = GET_MODE (XEXP (operands[1], 0));
15862 if (op_mode == VOIDmode)
15863 op_mode = GET_MODE (XEXP (operands[1], 1));
15864
15865 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
15866 {
15867 PUT_MODE (condition_rtx, DImode);
15868 convert_move (operands[0], condition_rtx, 0);
15869 }
15870 else
15871 {
15872 PUT_MODE (condition_rtx, SImode);
15873 emit_insn (gen_rtx_SET (operands[0], condition_rtx));
15874 }
15875 }
15876
15877 /* Emit a branch of kind CODE to location LOC. */
15878
15879 void
15880 rs6000_emit_cbranch (machine_mode mode, rtx operands[])
15881 {
15882 rtx condition_rtx = rs6000_generate_compare (operands[0], mode);
15883 rtx loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
15884 rtx ite = gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx, loc_ref, pc_rtx);
15885 emit_jump_insn (gen_rtx_SET (pc_rtx, ite));
15886 }
15887
15888 /* Return the string to output a conditional branch to LABEL, which is
15889 the operand template of the label, or NULL if the branch is really a
15890 conditional return.
15891
15892 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
15893 condition code register and its mode specifies what kind of
15894 comparison we made.
15895
15896 REVERSED is nonzero if we should reverse the sense of the comparison.
15897
15898 INSN is the insn. */
15899
15900 char *
15901 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
15902 {
15903 static char string[64];
15904 enum rtx_code code = GET_CODE (op);
15905 rtx cc_reg = XEXP (op, 0);
15906 machine_mode mode = GET_MODE (cc_reg);
15907 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
15908 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
15909 int really_reversed = reversed ^ need_longbranch;
15910 char *s = string;
15911 const char *ccode;
15912 const char *pred;
15913 rtx note;
15914
15915 validate_condition_mode (code, mode);
15916
15917 /* Work out which way this really branches. We could use
15918 reverse_condition_maybe_unordered here always but this
15919 makes the resulting assembler clearer. */
15920 if (really_reversed)
15921 {
15922 /* Reversal of FP compares takes care -- an ordered compare
15923 becomes an unordered compare and vice versa. */
15924 if (mode == CCFPmode)
15925 code = reverse_condition_maybe_unordered (code);
15926 else
15927 code = reverse_condition (code);
15928 }
15929
15930 switch (code)
15931 {
15932 /* Not all of these are actually distinct opcodes, but
15933 we distinguish them for clarity of the resulting assembler. */
15934 case NE: case LTGT:
15935 ccode = "ne"; break;
15936 case EQ: case UNEQ:
15937 ccode = "eq"; break;
15938 case GE: case GEU:
15939 ccode = "ge"; break;
15940 case GT: case GTU: case UNGT:
15941 ccode = "gt"; break;
15942 case LE: case LEU:
15943 ccode = "le"; break;
15944 case LT: case LTU: case UNLT:
15945 ccode = "lt"; break;
15946 case UNORDERED: ccode = "un"; break;
15947 case ORDERED: ccode = "nu"; break;
15948 case UNGE: ccode = "nl"; break;
15949 case UNLE: ccode = "ng"; break;
15950 default:
15951 gcc_unreachable ();
15952 }
15953
15954 /* Maybe we have a guess as to how likely the branch is. */
15955 pred = "";
15956 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
15957 if (note != NULL_RTX)
15958 {
15959 /* PROB is the difference from 50%. */
15960 int prob = profile_probability::from_reg_br_prob_note (XINT (note, 0))
15961 .to_reg_br_prob_base () - REG_BR_PROB_BASE / 2;
15962
15963 /* Only hint for highly probable/improbable branches on newer cpus when
15964 we have real profile data, as static prediction overrides processor
15965 dynamic prediction. For older cpus we may as well always hint, but
15966 assume not taken for branches that are very close to 50% as a
15967 mispredicted taken branch is more expensive than a
15968 mispredicted not-taken branch. */
15969 if (rs6000_always_hint
15970 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
15971 && (profile_status_for_fn (cfun) != PROFILE_GUESSED)
15972 && br_prob_note_reliable_p (note)))
15973 {
15974 if (abs (prob) > REG_BR_PROB_BASE / 20
15975 && ((prob > 0) ^ need_longbranch))
15976 pred = "+";
15977 else
15978 pred = "-";
15979 }
15980 }
15981
15982 if (label == NULL)
15983 s += sprintf (s, "b%slr%s ", ccode, pred);
15984 else
15985 s += sprintf (s, "b%s%s ", ccode, pred);
15986
15987 /* We need to escape any '%' characters in the reg_names string.
15988 Assume they'd only be the first character.... */
15989 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
15990 *s++ = '%';
15991 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
15992
15993 if (label != NULL)
15994 {
15995 /* If the branch distance was too far, we may have to use an
15996 unconditional branch to go the distance. */
15997 if (need_longbranch)
15998 s += sprintf (s, ",$+8\n\tb %s", label);
15999 else
16000 s += sprintf (s, ",%s", label);
16001 }
16002
16003 return string;
16004 }
16005
16006 /* Return insn for VSX or Altivec comparisons. */
16007
16008 static rtx
16009 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
16010 {
16011 rtx mask;
16012 machine_mode mode = GET_MODE (op0);
16013
16014 switch (code)
16015 {
16016 default:
16017 break;
16018
16019 case GE:
16020 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
16021 return NULL_RTX;
16022 /* FALLTHRU */
16023
16024 case EQ:
16025 case GT:
16026 case GTU:
16027 case ORDERED:
16028 case UNORDERED:
16029 case UNEQ:
16030 case LTGT:
16031 mask = gen_reg_rtx (mode);
16032 emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (code, mode, op0, op1)));
16033 return mask;
16034 }
16035
16036 return NULL_RTX;
16037 }
16038
16039 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
16040 DMODE is expected destination mode. This is a recursive function. */
16041
16042 static rtx
16043 rs6000_emit_vector_compare (enum rtx_code rcode,
16044 rtx op0, rtx op1,
16045 machine_mode dmode)
16046 {
16047 rtx mask;
16048 bool swap_operands = false;
16049 bool try_again = false;
16050
16051 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
16052 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
16053
16054 /* See if the comparison works as is. */
16055 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
16056 if (mask)
16057 return mask;
16058
16059 switch (rcode)
16060 {
16061 case LT:
16062 rcode = GT;
16063 swap_operands = true;
16064 try_again = true;
16065 break;
16066 case LTU:
16067 rcode = GTU;
16068 swap_operands = true;
16069 try_again = true;
16070 break;
16071 case NE:
16072 case UNLE:
16073 case UNLT:
16074 case UNGE:
16075 case UNGT:
16076 /* Invert condition and try again.
16077 e.g., A != B becomes ~(A==B). */
16078 {
16079 enum rtx_code rev_code;
16080 enum insn_code nor_code;
16081 rtx mask2;
16082
16083 rev_code = reverse_condition_maybe_unordered (rcode);
16084 if (rev_code == UNKNOWN)
16085 return NULL_RTX;
16086
16087 nor_code = optab_handler (one_cmpl_optab, dmode);
16088 if (nor_code == CODE_FOR_nothing)
16089 return NULL_RTX;
16090
16091 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
16092 if (!mask2)
16093 return NULL_RTX;
16094
16095 mask = gen_reg_rtx (dmode);
16096 emit_insn (GEN_FCN (nor_code) (mask, mask2));
16097 return mask;
16098 }
16099 break;
16100 case GE:
16101 case GEU:
16102 case LE:
16103 case LEU:
16104 /* Try GT/GTU/LT/LTU OR EQ */
16105 {
16106 rtx c_rtx, eq_rtx;
16107 enum insn_code ior_code;
16108 enum rtx_code new_code;
16109
16110 switch (rcode)
16111 {
16112 case GE:
16113 new_code = GT;
16114 break;
16115
16116 case GEU:
16117 new_code = GTU;
16118 break;
16119
16120 case LE:
16121 new_code = LT;
16122 break;
16123
16124 case LEU:
16125 new_code = LTU;
16126 break;
16127
16128 default:
16129 gcc_unreachable ();
16130 }
16131
16132 ior_code = optab_handler (ior_optab, dmode);
16133 if (ior_code == CODE_FOR_nothing)
16134 return NULL_RTX;
16135
16136 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
16137 if (!c_rtx)
16138 return NULL_RTX;
16139
16140 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
16141 if (!eq_rtx)
16142 return NULL_RTX;
16143
16144 mask = gen_reg_rtx (dmode);
16145 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
16146 return mask;
16147 }
16148 break;
16149 default:
16150 return NULL_RTX;
16151 }
16152
16153 if (try_again)
16154 {
16155 if (swap_operands)
16156 std::swap (op0, op1);
16157
16158 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
16159 if (mask)
16160 return mask;
16161 }
16162
16163 /* You only get two chances. */
16164 return NULL_RTX;
16165 }
16166
16167 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
16168 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
16169 operands for the relation operation COND. */
16170
16171 int
16172 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
16173 rtx cond, rtx cc_op0, rtx cc_op1)
16174 {
16175 machine_mode dest_mode = GET_MODE (dest);
16176 machine_mode mask_mode = GET_MODE (cc_op0);
16177 enum rtx_code rcode = GET_CODE (cond);
16178 rtx mask;
16179 bool invert_move = false;
16180
16181 if (VECTOR_UNIT_NONE_P (dest_mode))
16182 return 0;
16183
16184 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
16185 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
16186
16187 switch (rcode)
16188 {
16189 /* Swap operands if we can, and fall back to doing the operation as
16190 specified, and doing a NOR to invert the test. */
16191 case NE:
16192 case UNLE:
16193 case UNLT:
16194 case UNGE:
16195 case UNGT:
16196 /* Invert condition and try again.
16197 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
16198 invert_move = true;
16199 rcode = reverse_condition_maybe_unordered (rcode);
16200 if (rcode == UNKNOWN)
16201 return 0;
16202 break;
16203
16204 case GE:
16205 case LE:
16206 if (GET_MODE_CLASS (mask_mode) == MODE_VECTOR_INT)
16207 {
16208 /* Invert condition to avoid compound test. */
16209 invert_move = true;
16210 rcode = reverse_condition (rcode);
16211 }
16212 break;
16213
16214 case GTU:
16215 case GEU:
16216 case LTU:
16217 case LEU:
16218
16219 /* Invert condition to avoid compound test if necessary. */
16220 if (rcode == GEU || rcode == LEU)
16221 {
16222 invert_move = true;
16223 rcode = reverse_condition (rcode);
16224 }
16225 break;
16226
16227 default:
16228 break;
16229 }
16230
16231 /* Get the vector mask for the given relational operations. */
16232 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
16233
16234 if (!mask)
16235 return 0;
16236
16237 if (mask_mode != dest_mode)
16238 mask = simplify_gen_subreg (dest_mode, mask, mask_mode, 0);
16239
16240 if (invert_move)
16241 std::swap (op_true, op_false);
16242
16243 /* Optimize vec1 == vec2, to know the mask generates -1/0. */
16244 if (GET_MODE_CLASS (dest_mode) == MODE_VECTOR_INT
16245 && (GET_CODE (op_true) == CONST_VECTOR
16246 || GET_CODE (op_false) == CONST_VECTOR))
16247 {
16248 rtx constant_0 = CONST0_RTX (dest_mode);
16249 rtx constant_m1 = CONSTM1_RTX (dest_mode);
16250
16251 if (op_true == constant_m1 && op_false == constant_0)
16252 {
16253 emit_move_insn (dest, mask);
16254 return 1;
16255 }
16256
16257 else if (op_true == constant_0 && op_false == constant_m1)
16258 {
16259 emit_insn (gen_rtx_SET (dest, gen_rtx_NOT (dest_mode, mask)));
16260 return 1;
16261 }
16262
16263 /* If we can't use the vector comparison directly, perhaps we can use
16264 the mask for the true or false fields, instead of loading up a
16265 constant. */
16266 if (op_true == constant_m1)
16267 op_true = mask;
16268
16269 if (op_false == constant_0)
16270 op_false = mask;
16271 }
16272
16273 if (!REG_P (op_true) && !SUBREG_P (op_true))
16274 op_true = force_reg (dest_mode, op_true);
16275
16276 if (!REG_P (op_false) && !SUBREG_P (op_false))
16277 op_false = force_reg (dest_mode, op_false);
16278
16279 rtx tmp = gen_rtx_IOR (dest_mode,
16280 gen_rtx_AND (dest_mode, gen_rtx_NOT (dest_mode, mask),
16281 op_false),
16282 gen_rtx_AND (dest_mode, mask, op_true));
16283 emit_insn (gen_rtx_SET (dest, tmp));
16284 return 1;
16285 }
16286
16287 /* Possibly emit the xsmaxc{dp,qp} and xsminc{dp,qp} instructions to emit a
16288 maximum or minimum with "C" semantics.
16289
16290 Unless you use -ffast-math, you can't use these instructions to replace
16291 conditions that implicitly reverse the condition because the comparison
16292 might generate a NaN or signed zer0.
16293
16294 I.e. the following can be replaced all of the time
16295 ret = (op1 > op2) ? op1 : op2 ; generate xsmaxcdp
16296 ret = (op1 >= op2) ? op1 : op2 ; generate xsmaxcdp
16297 ret = (op1 < op2) ? op1 : op2; ; generate xsmincdp
16298 ret = (op1 <= op2) ? op1 : op2; ; generate xsmincdp
16299
16300 The following can be replaced only if -ffast-math is used:
16301 ret = (op1 < op2) ? op2 : op1 ; generate xsmaxcdp
16302 ret = (op1 <= op2) ? op2 : op1 ; generate xsmaxcdp
16303 ret = (op1 > op2) ? op2 : op1; ; generate xsmincdp
16304 ret = (op1 >= op2) ? op2 : op1; ; generate xsmincdp
16305
16306 Move TRUE_COND to DEST if OP of the operands of the last comparison is
16307 nonzero/true, FALSE_COND if it is zero/false.
16308
16309 Return false if we can't generate the appropriate minimum or maximum, and
16310 true if we can did the minimum or maximum. */
16311
16312 static bool
16313 rs6000_maybe_emit_maxc_minc (rtx dest, rtx op, rtx true_cond, rtx false_cond)
16314 {
16315 enum rtx_code code = GET_CODE (op);
16316 rtx op0 = XEXP (op, 0);
16317 rtx op1 = XEXP (op, 1);
16318 machine_mode compare_mode = GET_MODE (op0);
16319 machine_mode result_mode = GET_MODE (dest);
16320
16321 if (result_mode != compare_mode)
16322 return false;
16323
16324 /* See the comments of this function, it simply expects GE/GT/LE/LT in
16325 the checks, but for the reversible equivalent UNLT/UNLE/UNGT/UNGE,
16326 we need to do the reversions first to make the following checks
16327 support fewer cases, like:
16328
16329 (a UNLT b) ? op1 : op2 => (a >= b) ? op2 : op1;
16330 (a UNLE b) ? op1 : op2 => (a > b) ? op2 : op1;
16331 (a UNGT b) ? op1 : op2 => (a <= b) ? op2 : op1;
16332 (a UNGE b) ? op1 : op2 => (a < b) ? op2 : op1;
16333
16334 By the way, if we see these UNLT/UNLE/UNGT/UNGE it's guaranteed
16335 that we have 4-way condition codes (LT/GT/EQ/UN), so we do not
16336 have to check for fast-math or the like. */
16337 if (code == UNGE || code == UNGT || code == UNLE || code == UNLT)
16338 {
16339 code = reverse_condition_maybe_unordered (code);
16340 std::swap (true_cond, false_cond);
16341 }
16342
16343 bool max_p;
16344 if (code == GE || code == GT)
16345 max_p = true;
16346 else if (code == LE || code == LT)
16347 max_p = false;
16348 else
16349 return false;
16350
16351 if (rtx_equal_p (op0, true_cond) && rtx_equal_p (op1, false_cond))
16352 ;
16353
16354 /* Only when NaNs and signed-zeros are not in effect, smax could be
16355 used for `op0 < op1 ? op1 : op0`, and smin could be used for
16356 `op0 > op1 ? op1 : op0`. */
16357 else if (rtx_equal_p (op1, true_cond) && rtx_equal_p (op0, false_cond)
16358 && !HONOR_NANS (compare_mode) && !HONOR_SIGNED_ZEROS (compare_mode))
16359 max_p = !max_p;
16360
16361 else
16362 return false;
16363
16364 rs6000_emit_minmax (dest, max_p ? SMAX : SMIN, op0, op1);
16365 return true;
16366 }
16367
16368 /* Possibly emit a floating point conditional move by generating a compare that
16369 sets a mask instruction and a XXSEL select instruction.
16370
16371 Move TRUE_COND to DEST if OP of the operands of the last comparison is
16372 nonzero/true, FALSE_COND if it is zero/false.
16373
16374 Return false if the operation cannot be generated, and true if we could
16375 generate the instruction. */
16376
16377 static bool
16378 rs6000_maybe_emit_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
16379 {
16380 enum rtx_code code = GET_CODE (op);
16381 rtx op0 = XEXP (op, 0);
16382 rtx op1 = XEXP (op, 1);
16383 machine_mode compare_mode = GET_MODE (op0);
16384 machine_mode result_mode = GET_MODE (dest);
16385 rtx compare_rtx;
16386 rtx cmove_rtx;
16387 rtx clobber_rtx;
16388
16389 if (!can_create_pseudo_p ())
16390 return 0;
16391
16392 /* We allow the comparison to be either SFmode/DFmode and the true/false
16393 condition to be either SFmode/DFmode. I.e. we allow:
16394
16395 float a, b;
16396 double c, d, r;
16397
16398 r = (a == b) ? c : d;
16399
16400 and:
16401
16402 double a, b;
16403 float c, d, r;
16404
16405 r = (a == b) ? c : d;
16406
16407 but we don't allow intermixing the IEEE 128-bit floating point types with
16408 the 32/64-bit scalar types. */
16409
16410 if (!(compare_mode == result_mode
16411 || (compare_mode == SFmode && result_mode == DFmode)
16412 || (compare_mode == DFmode && result_mode == SFmode)))
16413 return false;
16414
16415 switch (code)
16416 {
16417 case EQ:
16418 case GE:
16419 case GT:
16420 break;
16421
16422 case NE:
16423 case LT:
16424 case LE:
16425 code = swap_condition (code);
16426 std::swap (op0, op1);
16427 break;
16428
16429 default:
16430 return false;
16431 }
16432
16433 /* Generate: [(parallel [(set (dest)
16434 (if_then_else (op (cmp1) (cmp2))
16435 (true)
16436 (false)))
16437 (clobber (scratch))])]. */
16438
16439 compare_rtx = gen_rtx_fmt_ee (code, CCFPmode, op0, op1);
16440 cmove_rtx = gen_rtx_SET (dest,
16441 gen_rtx_IF_THEN_ELSE (result_mode,
16442 compare_rtx,
16443 true_cond,
16444 false_cond));
16445
16446 clobber_rtx = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode));
16447 emit_insn (gen_rtx_PARALLEL (VOIDmode,
16448 gen_rtvec (2, cmove_rtx, clobber_rtx)));
16449
16450 return true;
16451 }
16452
16453 /* Helper function to return true if the target has instructions to do a
16454 compare and set mask instruction that can be used with XXSEL to implement a
16455 conditional move. It is also assumed that such a target also supports the
16456 "C" minimum and maximum instructions. */
16457
16458 static bool
16459 have_compare_and_set_mask (machine_mode mode)
16460 {
16461 switch (mode)
16462 {
16463 case E_SFmode:
16464 case E_DFmode:
16465 return TARGET_P9_MINMAX;
16466
16467 case E_KFmode:
16468 case E_TFmode:
16469 return TARGET_POWER10 && TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode);
16470
16471 default:
16472 break;
16473 }
16474
16475 return false;
16476 }
16477
16478 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
16479 operands of the last comparison is nonzero/true, FALSE_COND if it
16480 is zero/false. Return 0 if the hardware has no such operation. */
16481
16482 bool
16483 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
16484 {
16485 enum rtx_code code = GET_CODE (op);
16486 rtx op0 = XEXP (op, 0);
16487 rtx op1 = XEXP (op, 1);
16488 machine_mode compare_mode = GET_MODE (op0);
16489 machine_mode result_mode = GET_MODE (dest);
16490 rtx temp;
16491 bool is_against_zero;
16492
16493 /* These modes should always match. */
16494 if (GET_MODE (op1) != compare_mode
16495 /* In the isel case however, we can use a compare immediate, so
16496 op1 may be a small constant. */
16497 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
16498 return false;
16499 if (GET_MODE (true_cond) != result_mode)
16500 return false;
16501 if (GET_MODE (false_cond) != result_mode)
16502 return false;
16503
16504 /* See if we can use the "C" minimum, "C" maximum, and compare and set mask
16505 instructions. */
16506 if (have_compare_and_set_mask (compare_mode)
16507 && have_compare_and_set_mask (result_mode))
16508 {
16509 if (rs6000_maybe_emit_maxc_minc (dest, op, true_cond, false_cond))
16510 return true;
16511
16512 if (rs6000_maybe_emit_fp_cmove (dest, op, true_cond, false_cond))
16513 return true;
16514 }
16515
16516 /* Don't allow using floating point comparisons for integer results for
16517 now. */
16518 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
16519 return false;
16520
16521 /* First, work out if the hardware can do this at all, or
16522 if it's too slow.... */
16523 if (!FLOAT_MODE_P (compare_mode))
16524 {
16525 if (TARGET_ISEL)
16526 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
16527 return false;
16528 }
16529
16530 is_against_zero = op1 == CONST0_RTX (compare_mode);
16531
16532 /* A floating-point subtract might overflow, underflow, or produce
16533 an inexact result, thus changing the floating-point flags, so it
16534 can't be generated if we care about that. It's safe if one side
16535 of the construct is zero, since then no subtract will be
16536 generated. */
16537 if (SCALAR_FLOAT_MODE_P (compare_mode)
16538 && flag_trapping_math && ! is_against_zero)
16539 return false;
16540
16541 /* Eliminate half of the comparisons by switching operands, this
16542 makes the remaining code simpler. */
16543 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
16544 || code == LTGT || code == LT || code == UNLE)
16545 {
16546 code = reverse_condition_maybe_unordered (code);
16547 temp = true_cond;
16548 true_cond = false_cond;
16549 false_cond = temp;
16550 }
16551
16552 /* UNEQ and LTGT take four instructions for a comparison with zero,
16553 it'll probably be faster to use a branch here too. */
16554 if (code == UNEQ && HONOR_NANS (compare_mode))
16555 return false;
16556
16557 /* We're going to try to implement comparisons by performing
16558 a subtract, then comparing against zero. Unfortunately,
16559 Inf - Inf is NaN which is not zero, and so if we don't
16560 know that the operand is finite and the comparison
16561 would treat EQ different to UNORDERED, we can't do it. */
16562 if (HONOR_INFINITIES (compare_mode)
16563 && code != GT && code != UNGE
16564 && (!CONST_DOUBLE_P (op1)
16565 || real_isinf (CONST_DOUBLE_REAL_VALUE (op1)))
16566 /* Constructs of the form (a OP b ? a : b) are safe. */
16567 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
16568 || (! rtx_equal_p (op0, true_cond)
16569 && ! rtx_equal_p (op1, true_cond))))
16570 return false;
16571
16572 /* At this point we know we can use fsel. */
16573
16574 /* Don't allow compare_mode other than SFmode or DFmode, for others there
16575 is no fsel instruction. */
16576 if (compare_mode != SFmode && compare_mode != DFmode)
16577 return false;
16578
16579 /* Reduce the comparison to a comparison against zero. */
16580 if (! is_against_zero)
16581 {
16582 temp = gen_reg_rtx (compare_mode);
16583 emit_insn (gen_rtx_SET (temp, gen_rtx_MINUS (compare_mode, op0, op1)));
16584 op0 = temp;
16585 op1 = CONST0_RTX (compare_mode);
16586 }
16587
16588 /* If we don't care about NaNs we can reduce some of the comparisons
16589 down to faster ones. */
16590 if (! HONOR_NANS (compare_mode))
16591 switch (code)
16592 {
16593 case GT:
16594 code = LE;
16595 temp = true_cond;
16596 true_cond = false_cond;
16597 false_cond = temp;
16598 break;
16599 case UNGE:
16600 code = GE;
16601 break;
16602 case UNEQ:
16603 code = EQ;
16604 break;
16605 default:
16606 break;
16607 }
16608
16609 /* Now, reduce everything down to a GE. */
16610 switch (code)
16611 {
16612 case GE:
16613 break;
16614
16615 case LE:
16616 temp = gen_reg_rtx (compare_mode);
16617 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
16618 op0 = temp;
16619 break;
16620
16621 case ORDERED:
16622 temp = gen_reg_rtx (compare_mode);
16623 emit_insn (gen_rtx_SET (temp, gen_rtx_ABS (compare_mode, op0)));
16624 op0 = temp;
16625 break;
16626
16627 case EQ:
16628 temp = gen_reg_rtx (compare_mode);
16629 emit_insn (gen_rtx_SET (temp,
16630 gen_rtx_NEG (compare_mode,
16631 gen_rtx_ABS (compare_mode, op0))));
16632 op0 = temp;
16633 break;
16634
16635 case UNGE:
16636 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
16637 temp = gen_reg_rtx (result_mode);
16638 emit_insn (gen_rtx_SET (temp,
16639 gen_rtx_IF_THEN_ELSE (result_mode,
16640 gen_rtx_GE (VOIDmode,
16641 op0, op1),
16642 true_cond, false_cond)));
16643 false_cond = true_cond;
16644 true_cond = temp;
16645
16646 temp = gen_reg_rtx (compare_mode);
16647 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
16648 op0 = temp;
16649 break;
16650
16651 case GT:
16652 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
16653 temp = gen_reg_rtx (result_mode);
16654 emit_insn (gen_rtx_SET (temp,
16655 gen_rtx_IF_THEN_ELSE (result_mode,
16656 gen_rtx_GE (VOIDmode,
16657 op0, op1),
16658 true_cond, false_cond)));
16659 true_cond = false_cond;
16660 false_cond = temp;
16661
16662 temp = gen_reg_rtx (compare_mode);
16663 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
16664 op0 = temp;
16665 break;
16666
16667 default:
16668 gcc_unreachable ();
16669 }
16670
16671 emit_insn (gen_rtx_SET (dest,
16672 gen_rtx_IF_THEN_ELSE (result_mode,
16673 gen_rtx_GE (VOIDmode,
16674 op0, op1),
16675 true_cond, false_cond)));
16676 return true;
16677 }
16678
16679 /* Same as above, but for ints (isel). */
16680
16681 bool
16682 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
16683 {
16684 rtx condition_rtx, cr;
16685 machine_mode mode = GET_MODE (dest);
16686 enum rtx_code cond_code;
16687 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
16688 bool signedp;
16689
16690 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
16691 return false;
16692
16693 /* PR104335: We now need to expect CC-mode "comparisons"
16694 coming from ifcvt. The following code expects proper
16695 comparisons so better abort here. */
16696 if (GET_MODE_CLASS (GET_MODE (XEXP (op, 0))) == MODE_CC)
16697 return false;
16698
16699 /* We still have to do the compare, because isel doesn't do a
16700 compare, it just looks at the CRx bits set by a previous compare
16701 instruction. */
16702 condition_rtx = rs6000_generate_compare (op, mode);
16703 cond_code = GET_CODE (condition_rtx);
16704 cr = XEXP (condition_rtx, 0);
16705 signedp = GET_MODE (cr) == CCmode;
16706
16707 isel_func = (mode == SImode
16708 ? (signedp ? gen_isel_cc_si : gen_isel_ccuns_si)
16709 : (signedp ? gen_isel_cc_di : gen_isel_ccuns_di));
16710
16711 switch (cond_code)
16712 {
16713 case LT: case GT: case LTU: case GTU: case EQ:
16714 /* isel handles these directly. */
16715 break;
16716
16717 default:
16718 /* We need to swap the sense of the comparison. */
16719 {
16720 std::swap (false_cond, true_cond);
16721 PUT_CODE (condition_rtx, reverse_condition (cond_code));
16722 }
16723 break;
16724 }
16725
16726 false_cond = force_reg (mode, false_cond);
16727 if (true_cond != const0_rtx)
16728 true_cond = force_reg (mode, true_cond);
16729
16730 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
16731
16732 return true;
16733 }
16734
16735 void
16736 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
16737 {
16738 machine_mode mode = GET_MODE (op0);
16739 enum rtx_code c;
16740 rtx target;
16741
16742 /* VSX/altivec have direct min/max insns. */
16743 if ((code == SMAX || code == SMIN)
16744 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
16745 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))
16746 || (TARGET_POWER10 && TARGET_FLOAT128_HW && FLOAT128_IEEE_P (mode))))
16747 {
16748 emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1)));
16749 return;
16750 }
16751
16752 if (code == SMAX || code == SMIN)
16753 c = GE;
16754 else
16755 c = GEU;
16756
16757 if (code == SMAX || code == UMAX)
16758 target = emit_conditional_move (dest, { c, op0, op1, mode },
16759 op0, op1, mode, 0);
16760 else
16761 target = emit_conditional_move (dest, { c, op0, op1, mode },
16762 op1, op0, mode, 0);
16763 gcc_assert (target);
16764 if (target != dest)
16765 emit_move_insn (dest, target);
16766 }
16767
16768 /* A subroutine of the atomic operation splitters. Jump to LABEL if
16769 COND is true. Mark the jump as unlikely to be taken. */
16770
16771 static void
16772 emit_unlikely_jump (rtx cond, rtx label)
16773 {
16774 rtx x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
16775 rtx_insn *insn = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
16776 add_reg_br_prob_note (insn, profile_probability::very_unlikely ());
16777 }
16778
16779 /* A subroutine of the atomic operation splitters. Emit a load-locked
16780 instruction in MODE. For QI/HImode, possibly use a pattern than includes
16781 the zero_extend operation. */
16782
16783 static void
16784 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
16785 {
16786 rtx (*fn) (rtx, rtx) = NULL;
16787
16788 switch (mode)
16789 {
16790 case E_QImode:
16791 fn = gen_load_lockedqi;
16792 break;
16793 case E_HImode:
16794 fn = gen_load_lockedhi;
16795 break;
16796 case E_SImode:
16797 if (GET_MODE (mem) == QImode)
16798 fn = gen_load_lockedqi_si;
16799 else if (GET_MODE (mem) == HImode)
16800 fn = gen_load_lockedhi_si;
16801 else
16802 fn = gen_load_lockedsi;
16803 break;
16804 case E_DImode:
16805 fn = gen_load_lockeddi;
16806 break;
16807 case E_TImode:
16808 fn = gen_load_lockedti;
16809 break;
16810 default:
16811 gcc_unreachable ();
16812 }
16813 emit_insn (fn (reg, mem));
16814 }
16815
16816 /* A subroutine of the atomic operation splitters. Emit a store-conditional
16817 instruction in MODE. */
16818
16819 static void
16820 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
16821 {
16822 rtx (*fn) (rtx, rtx, rtx) = NULL;
16823
16824 switch (mode)
16825 {
16826 case E_QImode:
16827 fn = gen_store_conditionalqi;
16828 break;
16829 case E_HImode:
16830 fn = gen_store_conditionalhi;
16831 break;
16832 case E_SImode:
16833 fn = gen_store_conditionalsi;
16834 break;
16835 case E_DImode:
16836 fn = gen_store_conditionaldi;
16837 break;
16838 case E_TImode:
16839 fn = gen_store_conditionalti;
16840 break;
16841 default:
16842 gcc_unreachable ();
16843 }
16844
16845 /* Emit sync before stwcx. to address PPC405 Erratum. */
16846 if (PPC405_ERRATUM77)
16847 emit_insn (gen_hwsync ());
16848
16849 emit_insn (fn (res, mem, val));
16850 }
16851
16852 /* Expand barriers before and after a load_locked/store_cond sequence. */
16853
16854 static rtx
16855 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
16856 {
16857 rtx addr = XEXP (mem, 0);
16858
16859 if (!legitimate_indirect_address_p (addr, reload_completed)
16860 && !legitimate_indexed_address_p (addr, reload_completed))
16861 {
16862 addr = force_reg (Pmode, addr);
16863 mem = replace_equiv_address_nv (mem, addr);
16864 }
16865
16866 switch (model)
16867 {
16868 case MEMMODEL_RELAXED:
16869 case MEMMODEL_CONSUME:
16870 case MEMMODEL_ACQUIRE:
16871 break;
16872 case MEMMODEL_RELEASE:
16873 case MEMMODEL_ACQ_REL:
16874 emit_insn (gen_lwsync ());
16875 break;
16876 case MEMMODEL_SEQ_CST:
16877 emit_insn (gen_hwsync ());
16878 break;
16879 default:
16880 gcc_unreachable ();
16881 }
16882 return mem;
16883 }
16884
16885 static void
16886 rs6000_post_atomic_barrier (enum memmodel model)
16887 {
16888 switch (model)
16889 {
16890 case MEMMODEL_RELAXED:
16891 case MEMMODEL_CONSUME:
16892 case MEMMODEL_RELEASE:
16893 break;
16894 case MEMMODEL_ACQUIRE:
16895 case MEMMODEL_ACQ_REL:
16896 case MEMMODEL_SEQ_CST:
16897 emit_insn (gen_isync ());
16898 break;
16899 default:
16900 gcc_unreachable ();
16901 }
16902 }
16903
16904 /* A subroutine of the various atomic expanders. For sub-word operations,
16905 we must adjust things to operate on SImode. Given the original MEM,
16906 return a new aligned memory. Also build and return the quantities by
16907 which to shift and mask. */
16908
16909 static rtx
16910 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
16911 {
16912 rtx addr, align, shift, mask, mem;
16913 HOST_WIDE_INT shift_mask;
16914 machine_mode mode = GET_MODE (orig_mem);
16915
16916 /* For smaller modes, we have to implement this via SImode. */
16917 shift_mask = (mode == QImode ? 0x18 : 0x10);
16918
16919 addr = XEXP (orig_mem, 0);
16920 addr = force_reg (GET_MODE (addr), addr);
16921
16922 /* Aligned memory containing subword. Generate a new memory. We
16923 do not want any of the existing MEM_ATTR data, as we're now
16924 accessing memory outside the original object. */
16925 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
16926 NULL_RTX, 1, OPTAB_LIB_WIDEN);
16927 mem = gen_rtx_MEM (SImode, align);
16928 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
16929 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
16930 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
16931
16932 /* Shift amount for subword relative to aligned word. */
16933 shift = gen_reg_rtx (SImode);
16934 addr = gen_lowpart (SImode, addr);
16935 rtx tmp = gen_reg_rtx (SImode);
16936 emit_insn (gen_ashlsi3 (tmp, addr, GEN_INT (3)));
16937 emit_insn (gen_andsi3 (shift, tmp, GEN_INT (shift_mask)));
16938 if (BYTES_BIG_ENDIAN)
16939 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
16940 shift, 1, OPTAB_LIB_WIDEN);
16941 *pshift = shift;
16942
16943 /* Mask for insertion. */
16944 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
16945 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
16946 *pmask = mask;
16947
16948 return mem;
16949 }
16950
16951 /* A subroutine of the various atomic expanders. For sub-word operands,
16952 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
16953
16954 static rtx
16955 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
16956 {
16957 rtx x;
16958
16959 x = gen_reg_rtx (SImode);
16960 emit_insn (gen_rtx_SET (x, gen_rtx_AND (SImode,
16961 gen_rtx_NOT (SImode, mask),
16962 oldval)));
16963
16964 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
16965
16966 return x;
16967 }
16968
16969 /* A subroutine of the various atomic expanders. For sub-word operands,
16970 extract WIDE to NARROW via SHIFT. */
16971
16972 static void
16973 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
16974 {
16975 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
16976 wide, 1, OPTAB_LIB_WIDEN);
16977 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
16978 }
16979
16980 /* Expand an atomic compare and swap operation. */
16981
16982 void
16983 rs6000_expand_atomic_compare_and_swap (rtx operands[])
16984 {
16985 rtx boolval, retval, mem, oldval, newval, cond;
16986 rtx label1, label2, x, mask, shift;
16987 machine_mode mode, orig_mode;
16988 enum memmodel mod_s, mod_f;
16989 bool is_weak;
16990
16991 boolval = operands[0];
16992 retval = operands[1];
16993 mem = operands[2];
16994 oldval = operands[3];
16995 newval = operands[4];
16996 is_weak = (INTVAL (operands[5]) != 0);
16997 mod_s = memmodel_base (INTVAL (operands[6]));
16998 mod_f = memmodel_base (INTVAL (operands[7]));
16999 orig_mode = mode = GET_MODE (mem);
17000
17001 mask = shift = NULL_RTX;
17002 if (mode == QImode || mode == HImode)
17003 {
17004 /* Before power8, we didn't have access to lbarx/lharx, so generate a
17005 lwarx and shift/mask operations. With power8, we need to do the
17006 comparison in SImode, but the store is still done in QI/HImode. */
17007 oldval = convert_modes (SImode, mode, oldval, 1);
17008
17009 if (!TARGET_SYNC_HI_QI)
17010 {
17011 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
17012
17013 /* Shift and mask OLDVAL into position with the word. */
17014 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
17015 NULL_RTX, 1, OPTAB_LIB_WIDEN);
17016
17017 /* Shift and mask NEWVAL into position within the word. */
17018 newval = convert_modes (SImode, mode, newval, 1);
17019 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
17020 NULL_RTX, 1, OPTAB_LIB_WIDEN);
17021 }
17022
17023 /* Prepare to adjust the return value. */
17024 retval = gen_reg_rtx (SImode);
17025 mode = SImode;
17026 }
17027 else if (reg_overlap_mentioned_p (retval, oldval))
17028 oldval = copy_to_reg (oldval);
17029
17030 if (mode != TImode && !reg_or_short_operand (oldval, mode))
17031 oldval = copy_to_mode_reg (mode, oldval);
17032
17033 if (reg_overlap_mentioned_p (retval, newval))
17034 newval = copy_to_reg (newval);
17035
17036 mem = rs6000_pre_atomic_barrier (mem, mod_s);
17037
17038 label1 = NULL_RTX;
17039 if (!is_weak)
17040 {
17041 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
17042 emit_label (XEXP (label1, 0));
17043 }
17044 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
17045
17046 emit_load_locked (mode, retval, mem);
17047
17048 x = retval;
17049 if (mask)
17050 x = expand_simple_binop (SImode, AND, retval, mask,
17051 NULL_RTX, 1, OPTAB_LIB_WIDEN);
17052
17053 cond = gen_reg_rtx (CCmode);
17054 /* If we have TImode, synthesize a comparison. */
17055 if (mode != TImode)
17056 x = gen_rtx_COMPARE (CCmode, x, oldval);
17057 else
17058 {
17059 rtx xor1_result = gen_reg_rtx (DImode);
17060 rtx xor2_result = gen_reg_rtx (DImode);
17061 rtx or_result = gen_reg_rtx (DImode);
17062 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
17063 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
17064 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
17065 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
17066
17067 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
17068 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
17069 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
17070 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
17071 }
17072
17073 emit_insn (gen_rtx_SET (cond, x));
17074
17075 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
17076 emit_unlikely_jump (x, label2);
17077
17078 x = newval;
17079 if (mask)
17080 x = rs6000_mask_atomic_subword (retval, newval, mask);
17081
17082 emit_store_conditional (orig_mode, cond, mem, x);
17083
17084 if (!is_weak)
17085 {
17086 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
17087 emit_unlikely_jump (x, label1);
17088 }
17089
17090 if (!is_mm_relaxed (mod_f))
17091 emit_label (XEXP (label2, 0));
17092
17093 rs6000_post_atomic_barrier (mod_s);
17094
17095 if (is_mm_relaxed (mod_f))
17096 emit_label (XEXP (label2, 0));
17097
17098 if (shift)
17099 rs6000_finish_atomic_subword (operands[1], retval, shift);
17100 else if (mode != GET_MODE (operands[1]))
17101 convert_move (operands[1], retval, 1);
17102
17103 /* In all cases, CR0 contains EQ on success, and NE on failure. */
17104 x = gen_rtx_EQ (SImode, cond, const0_rtx);
17105 emit_insn (gen_rtx_SET (boolval, x));
17106 }
17107
17108 /* Expand an atomic exchange operation. */
17109
17110 void
17111 rs6000_expand_atomic_exchange (rtx operands[])
17112 {
17113 rtx retval, mem, val, cond;
17114 machine_mode mode;
17115 enum memmodel model;
17116 rtx label, x, mask, shift;
17117
17118 retval = operands[0];
17119 mem = operands[1];
17120 val = operands[2];
17121 model = memmodel_base (INTVAL (operands[3]));
17122 mode = GET_MODE (mem);
17123
17124 mask = shift = NULL_RTX;
17125 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
17126 {
17127 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
17128
17129 /* Shift and mask VAL into position with the word. */
17130 val = convert_modes (SImode, mode, val, 1);
17131 val = expand_simple_binop (SImode, ASHIFT, val, shift,
17132 NULL_RTX, 1, OPTAB_LIB_WIDEN);
17133
17134 /* Prepare to adjust the return value. */
17135 retval = gen_reg_rtx (SImode);
17136 mode = SImode;
17137 }
17138
17139 mem = rs6000_pre_atomic_barrier (mem, model);
17140
17141 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
17142 emit_label (XEXP (label, 0));
17143
17144 emit_load_locked (mode, retval, mem);
17145
17146 x = val;
17147 if (mask)
17148 x = rs6000_mask_atomic_subword (retval, val, mask);
17149
17150 cond = gen_reg_rtx (CCmode);
17151 emit_store_conditional (mode, cond, mem, x);
17152
17153 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
17154 emit_unlikely_jump (x, label);
17155
17156 rs6000_post_atomic_barrier (model);
17157
17158 if (shift)
17159 rs6000_finish_atomic_subword (operands[0], retval, shift);
17160 }
17161
17162 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
17163 to perform. MEM is the memory on which to operate. VAL is the second
17164 operand of the binary operator. BEFORE and AFTER are optional locations to
17165 return the value of MEM either before of after the operation. MODEL_RTX
17166 is a CONST_INT containing the memory model to use. */
17167
17168 void
17169 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
17170 rtx orig_before, rtx orig_after, rtx model_rtx)
17171 {
17172 enum memmodel model = memmodel_base (INTVAL (model_rtx));
17173 machine_mode mode = GET_MODE (mem);
17174 machine_mode store_mode = mode;
17175 rtx label, x, cond, mask, shift;
17176 rtx before = orig_before, after = orig_after;
17177
17178 mask = shift = NULL_RTX;
17179 /* On power8, we want to use SImode for the operation. On previous systems,
17180 use the operation in a subword and shift/mask to get the proper byte or
17181 halfword. */
17182 if (mode == QImode || mode == HImode)
17183 {
17184 if (TARGET_SYNC_HI_QI)
17185 {
17186 val = convert_modes (SImode, mode, val, 1);
17187
17188 /* Prepare to adjust the return value. */
17189 before = gen_reg_rtx (SImode);
17190 if (after)
17191 after = gen_reg_rtx (SImode);
17192 mode = SImode;
17193 }
17194 else
17195 {
17196 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
17197
17198 /* Shift and mask VAL into position with the word. */
17199 val = convert_modes (SImode, mode, val, 1);
17200 val = expand_simple_binop (SImode, ASHIFT, val, shift,
17201 NULL_RTX, 1, OPTAB_LIB_WIDEN);
17202
17203 switch (code)
17204 {
17205 case IOR:
17206 case XOR:
17207 /* We've already zero-extended VAL. That is sufficient to
17208 make certain that it does not affect other bits. */
17209 mask = NULL;
17210 break;
17211
17212 case AND:
17213 /* If we make certain that all of the other bits in VAL are
17214 set, that will be sufficient to not affect other bits. */
17215 x = gen_rtx_NOT (SImode, mask);
17216 x = gen_rtx_IOR (SImode, x, val);
17217 emit_insn (gen_rtx_SET (val, x));
17218 mask = NULL;
17219 break;
17220
17221 case NOT:
17222 case PLUS:
17223 case MINUS:
17224 /* These will all affect bits outside the field and need
17225 adjustment via MASK within the loop. */
17226 break;
17227
17228 default:
17229 gcc_unreachable ();
17230 }
17231
17232 /* Prepare to adjust the return value. */
17233 before = gen_reg_rtx (SImode);
17234 if (after)
17235 after = gen_reg_rtx (SImode);
17236 store_mode = mode = SImode;
17237 }
17238 }
17239
17240 mem = rs6000_pre_atomic_barrier (mem, model);
17241
17242 label = gen_label_rtx ();
17243 emit_label (label);
17244 label = gen_rtx_LABEL_REF (VOIDmode, label);
17245
17246 if (before == NULL_RTX)
17247 before = gen_reg_rtx (mode);
17248
17249 emit_load_locked (mode, before, mem);
17250
17251 if (code == NOT)
17252 {
17253 x = expand_simple_binop (mode, AND, before, val,
17254 NULL_RTX, 1, OPTAB_LIB_WIDEN);
17255 after = expand_simple_unop (mode, NOT, x, after, 1);
17256 }
17257 else
17258 {
17259 after = expand_simple_binop (mode, code, before, val,
17260 after, 1, OPTAB_LIB_WIDEN);
17261 }
17262
17263 x = after;
17264 if (mask)
17265 {
17266 x = expand_simple_binop (SImode, AND, after, mask,
17267 NULL_RTX, 1, OPTAB_LIB_WIDEN);
17268 x = rs6000_mask_atomic_subword (before, x, mask);
17269 }
17270 else if (store_mode != mode)
17271 x = convert_modes (store_mode, mode, x, 1);
17272
17273 cond = gen_reg_rtx (CCmode);
17274 emit_store_conditional (store_mode, cond, mem, x);
17275
17276 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
17277 emit_unlikely_jump (x, label);
17278
17279 rs6000_post_atomic_barrier (model);
17280
17281 if (shift)
17282 {
17283 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
17284 then do the calcuations in a SImode register. */
17285 if (orig_before)
17286 rs6000_finish_atomic_subword (orig_before, before, shift);
17287 if (orig_after)
17288 rs6000_finish_atomic_subword (orig_after, after, shift);
17289 }
17290 else if (store_mode != mode)
17291 {
17292 /* QImode/HImode on machines with lbarx/lharx where we do the native
17293 operation and then do the calcuations in a SImode register. */
17294 if (orig_before)
17295 convert_move (orig_before, before, 1);
17296 if (orig_after)
17297 convert_move (orig_after, after, 1);
17298 }
17299 else if (orig_after && after != orig_after)
17300 emit_move_insn (orig_after, after);
17301 }
17302
17303 static GTY(()) alias_set_type TOC_alias_set = -1;
17304
17305 alias_set_type
17306 get_TOC_alias_set (void)
17307 {
17308 if (TOC_alias_set == -1)
17309 TOC_alias_set = new_alias_set ();
17310 return TOC_alias_set;
17311 }
17312
17313 /* The mode the ABI uses for a word. This is not the same as word_mode
17314 for -m32 -mpowerpc64. This is used to implement various target hooks. */
17315
17316 static scalar_int_mode
17317 rs6000_abi_word_mode (void)
17318 {
17319 return TARGET_32BIT ? SImode : DImode;
17320 }
17321
17322 /* Implement the TARGET_OFFLOAD_OPTIONS hook. */
17323 static char *
17324 rs6000_offload_options (void)
17325 {
17326 if (TARGET_64BIT)
17327 return xstrdup ("-foffload-abi=lp64");
17328 else
17329 return xstrdup ("-foffload-abi=ilp32");
17330 }
17331
17332 \f
17333 /* A quick summary of the various types of 'constant-pool tables'
17334 under PowerPC:
17335
17336 Target Flags Name One table per
17337 AIX (none) AIX TOC object file
17338 AIX -mfull-toc AIX TOC object file
17339 AIX -mminimal-toc AIX minimal TOC translation unit
17340 SVR4/EABI (none) SVR4 SDATA object file
17341 SVR4/EABI -fpic SVR4 pic object file
17342 SVR4/EABI -fPIC SVR4 PIC translation unit
17343 SVR4/EABI -mrelocatable EABI TOC function
17344 SVR4/EABI -maix AIX TOC object file
17345 SVR4/EABI -maix -mminimal-toc
17346 AIX minimal TOC translation unit
17347
17348 Name Reg. Set by entries contains:
17349 made by addrs? fp? sum?
17350
17351 AIX TOC 2 crt0 as Y option option
17352 AIX minimal TOC 30 prolog gcc Y Y option
17353 SVR4 SDATA 13 crt0 gcc N Y N
17354 SVR4 pic 30 prolog ld Y not yet N
17355 SVR4 PIC 30 prolog gcc Y option option
17356 EABI TOC 30 prolog gcc Y option option
17357
17358 */
17359
17360 /* Hash functions for the hash table. */
17361
17362 static unsigned
17363 rs6000_hash_constant (rtx k)
17364 {
17365 enum rtx_code code = GET_CODE (k);
17366 machine_mode mode = GET_MODE (k);
17367 unsigned result = (code << 3) ^ mode;
17368 const char *format;
17369 int flen, fidx;
17370
17371 format = GET_RTX_FORMAT (code);
17372 flen = strlen (format);
17373 fidx = 0;
17374
17375 switch (code)
17376 {
17377 case LABEL_REF:
17378 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
17379
17380 case CONST_WIDE_INT:
17381 {
17382 int i;
17383 flen = CONST_WIDE_INT_NUNITS (k);
17384 for (i = 0; i < flen; i++)
17385 result = result * 613 + CONST_WIDE_INT_ELT (k, i);
17386 return result;
17387 }
17388
17389 case CONST_DOUBLE:
17390 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
17391
17392 case CODE_LABEL:
17393 fidx = 3;
17394 break;
17395
17396 default:
17397 break;
17398 }
17399
17400 for (; fidx < flen; fidx++)
17401 switch (format[fidx])
17402 {
17403 case 's':
17404 {
17405 unsigned i, len;
17406 const char *str = XSTR (k, fidx);
17407 len = strlen (str);
17408 result = result * 613 + len;
17409 for (i = 0; i < len; i++)
17410 result = result * 613 + (unsigned) str[i];
17411 break;
17412 }
17413 case 'u':
17414 case 'e':
17415 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
17416 break;
17417 case 'i':
17418 case 'n':
17419 result = result * 613 + (unsigned) XINT (k, fidx);
17420 break;
17421 case 'w':
17422 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
17423 result = result * 613 + (unsigned) XWINT (k, fidx);
17424 else
17425 {
17426 size_t i;
17427 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
17428 result = result * 613 + (unsigned) (XWINT (k, fidx)
17429 >> CHAR_BIT * i);
17430 }
17431 break;
17432 case '0':
17433 break;
17434 default:
17435 gcc_unreachable ();
17436 }
17437
17438 return result;
17439 }
17440
17441 hashval_t
17442 toc_hasher::hash (toc_hash_struct *thc)
17443 {
17444 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
17445 }
17446
17447 /* Compare H1 and H2 for equivalence. */
17448
17449 bool
17450 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
17451 {
17452 rtx r1 = h1->key;
17453 rtx r2 = h2->key;
17454
17455 if (h1->key_mode != h2->key_mode)
17456 return 0;
17457
17458 return rtx_equal_p (r1, r2);
17459 }
17460
17461 /* These are the names given by the C++ front-end to vtables, and
17462 vtable-like objects. Ideally, this logic should not be here;
17463 instead, there should be some programmatic way of inquiring as
17464 to whether or not an object is a vtable. */
17465
17466 #define VTABLE_NAME_P(NAME) \
17467 (startswith (name, "_vt.") \
17468 || startswith (name, "_ZTV") \
17469 || startswith (name, "_ZTT") \
17470 || startswith (name, "_ZTI") \
17471 || startswith (name, "_ZTC"))
17472
17473 #ifdef NO_DOLLAR_IN_LABEL
17474 /* Return a GGC-allocated character string translating dollar signs in
17475 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
17476
17477 const char *
17478 rs6000_xcoff_strip_dollar (const char *name)
17479 {
17480 char *strip, *p;
17481 const char *q;
17482 size_t len;
17483
17484 q = (const char *) strchr (name, '$');
17485
17486 if (q == 0 || q == name)
17487 return name;
17488
17489 len = strlen (name);
17490 strip = XALLOCAVEC (char, len + 1);
17491 strcpy (strip, name);
17492 p = strip + (q - name);
17493 while (p)
17494 {
17495 *p = '_';
17496 p = strchr (p + 1, '$');
17497 }
17498
17499 return ggc_alloc_string (strip, len);
17500 }
17501 #endif
17502
17503 void
17504 rs6000_output_symbol_ref (FILE *file, rtx x)
17505 {
17506 const char *name = XSTR (x, 0);
17507
17508 /* Currently C++ toc references to vtables can be emitted before it
17509 is decided whether the vtable is public or private. If this is
17510 the case, then the linker will eventually complain that there is
17511 a reference to an unknown section. Thus, for vtables only,
17512 we emit the TOC reference to reference the identifier and not the
17513 symbol. */
17514 if (VTABLE_NAME_P (name))
17515 {
17516 RS6000_OUTPUT_BASENAME (file, name);
17517 }
17518 else
17519 assemble_name (file, name);
17520 }
17521
17522 /* Output a TOC entry. We derive the entry name from what is being
17523 written. */
17524
17525 void
17526 output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
17527 {
17528 char buf[256];
17529 const char *name = buf;
17530 rtx base = x;
17531 HOST_WIDE_INT offset = 0;
17532
17533 gcc_assert (!TARGET_NO_TOC_OR_PCREL);
17534
17535 /* When the linker won't eliminate them, don't output duplicate
17536 TOC entries (this happens on AIX if there is any kind of TOC,
17537 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
17538 CODE_LABELs. */
17539 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
17540 {
17541 struct toc_hash_struct *h;
17542
17543 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
17544 time because GGC is not initialized at that point. */
17545 if (toc_hash_table == NULL)
17546 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
17547
17548 h = ggc_alloc<toc_hash_struct> ();
17549 h->key = x;
17550 h->key_mode = mode;
17551 h->labelno = labelno;
17552
17553 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
17554 if (*found == NULL)
17555 *found = h;
17556 else /* This is indeed a duplicate.
17557 Set this label equal to that label. */
17558 {
17559 fputs ("\t.set ", file);
17560 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
17561 fprintf (file, "%d,", labelno);
17562 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
17563 fprintf (file, "%d\n", ((*found)->labelno));
17564
17565 #ifdef HAVE_AS_TLS
17566 if (TARGET_XCOFF && SYMBOL_REF_P (x)
17567 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
17568 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
17569 {
17570 fputs ("\t.set ", file);
17571 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
17572 fprintf (file, "%d,", labelno);
17573 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
17574 fprintf (file, "%d\n", ((*found)->labelno));
17575 }
17576 #endif
17577 return;
17578 }
17579 }
17580
17581 /* If we're going to put a double constant in the TOC, make sure it's
17582 aligned properly when strict alignment is on. */
17583 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
17584 && STRICT_ALIGNMENT
17585 && GET_MODE_BITSIZE (mode) >= 64
17586 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
17587 ASM_OUTPUT_ALIGN (file, 3);
17588 }
17589
17590 (*targetm.asm_out.internal_label) (file, "LC", labelno);
17591
17592 /* Handle FP constants specially. Note that if we have a minimal
17593 TOC, things we put here aren't actually in the TOC, so we can allow
17594 FP constants. */
17595 if (CONST_DOUBLE_P (x)
17596 && (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode
17597 || GET_MODE (x) == IFmode || GET_MODE (x) == KFmode))
17598 {
17599 long k[4];
17600
17601 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
17602 REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x), k);
17603 else
17604 real_to_target (k, CONST_DOUBLE_REAL_VALUE (x), GET_MODE (x));
17605
17606 if (TARGET_64BIT)
17607 {
17608 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17609 fputs (DOUBLE_INT_ASM_OP, file);
17610 else
17611 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
17612 k[0] & 0xffffffff, k[1] & 0xffffffff,
17613 k[2] & 0xffffffff, k[3] & 0xffffffff);
17614 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
17615 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
17616 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
17617 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
17618 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
17619 return;
17620 }
17621 else
17622 {
17623 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17624 fputs ("\t.long ", file);
17625 else
17626 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
17627 k[0] & 0xffffffff, k[1] & 0xffffffff,
17628 k[2] & 0xffffffff, k[3] & 0xffffffff);
17629 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
17630 k[0] & 0xffffffff, k[1] & 0xffffffff,
17631 k[2] & 0xffffffff, k[3] & 0xffffffff);
17632 return;
17633 }
17634 }
17635 else if (CONST_DOUBLE_P (x)
17636 && (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
17637 {
17638 long k[2];
17639
17640 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
17641 REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x), k);
17642 else
17643 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k);
17644
17645 if (TARGET_64BIT)
17646 {
17647 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17648 fputs (DOUBLE_INT_ASM_OP, file);
17649 else
17650 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
17651 k[0] & 0xffffffff, k[1] & 0xffffffff);
17652 fprintf (file, "0x%lx%08lx\n",
17653 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
17654 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
17655 return;
17656 }
17657 else
17658 {
17659 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17660 fputs ("\t.long ", file);
17661 else
17662 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
17663 k[0] & 0xffffffff, k[1] & 0xffffffff);
17664 fprintf (file, "0x%lx,0x%lx\n",
17665 k[0] & 0xffffffff, k[1] & 0xffffffff);
17666 return;
17667 }
17668 }
17669 else if (CONST_DOUBLE_P (x)
17670 && (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
17671 {
17672 long l;
17673
17674 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
17675 REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x), l);
17676 else
17677 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l);
17678
17679 if (TARGET_64BIT)
17680 {
17681 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17682 fputs (DOUBLE_INT_ASM_OP, file);
17683 else
17684 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
17685 if (WORDS_BIG_ENDIAN)
17686 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
17687 else
17688 fprintf (file, "0x%lx\n", l & 0xffffffff);
17689 return;
17690 }
17691 else
17692 {
17693 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17694 fputs ("\t.long ", file);
17695 else
17696 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
17697 fprintf (file, "0x%lx\n", l & 0xffffffff);
17698 return;
17699 }
17700 }
17701 else if (GET_MODE (x) == VOIDmode && CONST_INT_P (x))
17702 {
17703 unsigned HOST_WIDE_INT low;
17704 HOST_WIDE_INT high;
17705
17706 low = INTVAL (x) & 0xffffffff;
17707 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
17708
17709 /* TOC entries are always Pmode-sized, so when big-endian
17710 smaller integer constants in the TOC need to be padded.
17711 (This is still a win over putting the constants in
17712 a separate constant pool, because then we'd have
17713 to have both a TOC entry _and_ the actual constant.)
17714
17715 For a 32-bit target, CONST_INT values are loaded and shifted
17716 entirely within `low' and can be stored in one TOC entry. */
17717
17718 /* It would be easy to make this work, but it doesn't now. */
17719 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
17720
17721 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
17722 {
17723 low |= high << 32;
17724 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
17725 high = (HOST_WIDE_INT) low >> 32;
17726 low &= 0xffffffff;
17727 }
17728
17729 if (TARGET_64BIT)
17730 {
17731 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17732 fputs (DOUBLE_INT_ASM_OP, file);
17733 else
17734 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
17735 (long) high & 0xffffffff, (long) low & 0xffffffff);
17736 fprintf (file, "0x%lx%08lx\n",
17737 (long) high & 0xffffffff, (long) low & 0xffffffff);
17738 return;
17739 }
17740 else
17741 {
17742 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
17743 {
17744 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17745 fputs ("\t.long ", file);
17746 else
17747 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
17748 (long) high & 0xffffffff, (long) low & 0xffffffff);
17749 fprintf (file, "0x%lx,0x%lx\n",
17750 (long) high & 0xffffffff, (long) low & 0xffffffff);
17751 }
17752 else
17753 {
17754 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17755 fputs ("\t.long ", file);
17756 else
17757 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
17758 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
17759 }
17760 return;
17761 }
17762 }
17763
17764 if (GET_CODE (x) == CONST)
17765 {
17766 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
17767 && CONST_INT_P (XEXP (XEXP (x, 0), 1)));
17768
17769 base = XEXP (XEXP (x, 0), 0);
17770 offset = INTVAL (XEXP (XEXP (x, 0), 1));
17771 }
17772
17773 switch (GET_CODE (base))
17774 {
17775 case SYMBOL_REF:
17776 name = XSTR (base, 0);
17777 break;
17778
17779 case LABEL_REF:
17780 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
17781 CODE_LABEL_NUMBER (XEXP (base, 0)));
17782 break;
17783
17784 case CODE_LABEL:
17785 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
17786 break;
17787
17788 default:
17789 gcc_unreachable ();
17790 }
17791
17792 if (TARGET_ELF || TARGET_MINIMAL_TOC)
17793 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
17794 else
17795 {
17796 fputs ("\t.tc ", file);
17797 RS6000_OUTPUT_BASENAME (file, name);
17798
17799 if (offset < 0)
17800 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
17801 else if (offset)
17802 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
17803
17804 /* Mark large TOC symbols on AIX with [TE] so they are mapped
17805 after other TOC symbols, reducing overflow of small TOC access
17806 to [TC] symbols. */
17807 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
17808 ? "[TE]," : "[TC],", file);
17809 }
17810
17811 /* Currently C++ toc references to vtables can be emitted before it
17812 is decided whether the vtable is public or private. If this is
17813 the case, then the linker will eventually complain that there is
17814 a TOC reference to an unknown section. Thus, for vtables only,
17815 we emit the TOC reference to reference the symbol and not the
17816 section. */
17817 if (VTABLE_NAME_P (name))
17818 {
17819 RS6000_OUTPUT_BASENAME (file, name);
17820 if (offset < 0)
17821 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
17822 else if (offset > 0)
17823 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
17824 }
17825 else
17826 output_addr_const (file, x);
17827
17828 #if HAVE_AS_TLS
17829 if (TARGET_XCOFF && SYMBOL_REF_P (base))
17830 {
17831 switch (SYMBOL_REF_TLS_MODEL (base))
17832 {
17833 case 0:
17834 break;
17835 case TLS_MODEL_LOCAL_EXEC:
17836 fputs ("@le", file);
17837 break;
17838 case TLS_MODEL_INITIAL_EXEC:
17839 fputs ("@ie", file);
17840 break;
17841 /* Use global-dynamic for local-dynamic. */
17842 case TLS_MODEL_GLOBAL_DYNAMIC:
17843 case TLS_MODEL_LOCAL_DYNAMIC:
17844 putc ('\n', file);
17845 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
17846 fputs ("\t.tc .", file);
17847 RS6000_OUTPUT_BASENAME (file, name);
17848 fputs ("[TC],", file);
17849 output_addr_const (file, x);
17850 fputs ("@m", file);
17851 break;
17852 default:
17853 gcc_unreachable ();
17854 }
17855 }
17856 #endif
17857
17858 putc ('\n', file);
17859 }
17860 \f
17861 /* Output an assembler pseudo-op to write an ASCII string of N characters
17862 starting at P to FILE.
17863
17864 On the RS/6000, we have to do this using the .byte operation and
17865 write out special characters outside the quoted string.
17866 Also, the assembler is broken; very long strings are truncated,
17867 so we must artificially break them up early. */
17868
17869 void
17870 output_ascii (FILE *file, const char *p, int n)
17871 {
17872 char c;
17873 int i, count_string;
17874 const char *for_string = "\t.byte \"";
17875 const char *for_decimal = "\t.byte ";
17876 const char *to_close = NULL;
17877
17878 count_string = 0;
17879 for (i = 0; i < n; i++)
17880 {
17881 c = *p++;
17882 if (c >= ' ' && c < 0177)
17883 {
17884 if (for_string)
17885 fputs (for_string, file);
17886 putc (c, file);
17887
17888 /* Write two quotes to get one. */
17889 if (c == '"')
17890 {
17891 putc (c, file);
17892 ++count_string;
17893 }
17894
17895 for_string = NULL;
17896 for_decimal = "\"\n\t.byte ";
17897 to_close = "\"\n";
17898 ++count_string;
17899
17900 if (count_string >= 512)
17901 {
17902 fputs (to_close, file);
17903
17904 for_string = "\t.byte \"";
17905 for_decimal = "\t.byte ";
17906 to_close = NULL;
17907 count_string = 0;
17908 }
17909 }
17910 else
17911 {
17912 if (for_decimal)
17913 fputs (for_decimal, file);
17914 fprintf (file, "%d", c);
17915
17916 for_string = "\n\t.byte \"";
17917 for_decimal = ", ";
17918 to_close = "\n";
17919 count_string = 0;
17920 }
17921 }
17922
17923 /* Now close the string if we have written one. Then end the line. */
17924 if (to_close)
17925 fputs (to_close, file);
17926 }
17927 \f
17928 /* Generate a unique section name for FILENAME for a section type
17929 represented by SECTION_DESC. Output goes into BUF.
17930
17931 SECTION_DESC can be any string, as long as it is different for each
17932 possible section type.
17933
17934 We name the section in the same manner as xlc. The name begins with an
17935 underscore followed by the filename (after stripping any leading directory
17936 names) with the last period replaced by the string SECTION_DESC. If
17937 FILENAME does not contain a period, SECTION_DESC is appended to the end of
17938 the name. */
17939
17940 void
17941 rs6000_gen_section_name (char **buf, const char *filename,
17942 const char *section_desc)
17943 {
17944 const char *q, *after_last_slash, *last_period = 0;
17945 char *p;
17946 int len;
17947
17948 after_last_slash = filename;
17949 for (q = filename; *q; q++)
17950 {
17951 if (*q == '/')
17952 after_last_slash = q + 1;
17953 else if (*q == '.')
17954 last_period = q;
17955 }
17956
17957 len = strlen (after_last_slash) + strlen (section_desc) + 2;
17958 *buf = (char *) xmalloc (len);
17959
17960 p = *buf;
17961 *p++ = '_';
17962
17963 for (q = after_last_slash; *q; q++)
17964 {
17965 if (q == last_period)
17966 {
17967 strcpy (p, section_desc);
17968 p += strlen (section_desc);
17969 break;
17970 }
17971
17972 else if (ISALNUM (*q))
17973 *p++ = *q;
17974 }
17975
17976 if (last_period == 0)
17977 strcpy (p, section_desc);
17978 else
17979 *p = '\0';
17980 }
17981 \f
17982 /* Emit profile function. */
17983
17984 void
17985 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
17986 {
17987 /* Non-standard profiling for kernels, which just saves LR then calls
17988 _mcount without worrying about arg saves. The idea is to change
17989 the function prologue as little as possible as it isn't easy to
17990 account for arg save/restore code added just for _mcount. */
17991 if (TARGET_PROFILE_KERNEL)
17992 return;
17993
17994 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
17995 {
17996 #ifndef NO_PROFILE_COUNTERS
17997 # define NO_PROFILE_COUNTERS 0
17998 #endif
17999 if (NO_PROFILE_COUNTERS)
18000 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
18001 LCT_NORMAL, VOIDmode);
18002 else
18003 {
18004 char buf[30];
18005 const char *label_name;
18006 rtx fun;
18007
18008 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
18009 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
18010 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
18011
18012 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
18013 LCT_NORMAL, VOIDmode, fun, Pmode);
18014 }
18015 }
18016 else if (DEFAULT_ABI == ABI_DARWIN)
18017 {
18018 const char *mcount_name = RS6000_MCOUNT;
18019 int caller_addr_regno = LR_REGNO;
18020
18021 /* Be conservative and always set this, at least for now. */
18022 crtl->uses_pic_offset_table = 1;
18023
18024 #if TARGET_MACHO
18025 /* For PIC code, set up a stub and collect the caller's address
18026 from r0, which is where the prologue puts it. */
18027 if (MACHOPIC_INDIRECT
18028 && crtl->uses_pic_offset_table)
18029 caller_addr_regno = 0;
18030 #endif
18031 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
18032 LCT_NORMAL, VOIDmode,
18033 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
18034 }
18035 }
18036
18037 /* Write function profiler code. */
18038
18039 void
18040 output_function_profiler (FILE *file, int labelno)
18041 {
18042 char buf[100];
18043
18044 switch (DEFAULT_ABI)
18045 {
18046 default:
18047 gcc_unreachable ();
18048
18049 case ABI_V4:
18050 if (!TARGET_32BIT)
18051 {
18052 warning (0, "no profiling of 64-bit code for this ABI");
18053 return;
18054 }
18055 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
18056 fprintf (file, "\tmflr %s\n", reg_names[0]);
18057 if (NO_PROFILE_COUNTERS)
18058 {
18059 asm_fprintf (file, "\tstw %s,4(%s)\n",
18060 reg_names[0], reg_names[1]);
18061 }
18062 else if (TARGET_SECURE_PLT && flag_pic)
18063 {
18064 if (TARGET_LINK_STACK)
18065 {
18066 char name[32];
18067 get_ppc476_thunk_name (name);
18068 asm_fprintf (file, "\tbl %s\n", name);
18069 }
18070 else
18071 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
18072 asm_fprintf (file, "\tstw %s,4(%s)\n",
18073 reg_names[0], reg_names[1]);
18074 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
18075 asm_fprintf (file, "\taddis %s,%s,",
18076 reg_names[12], reg_names[12]);
18077 assemble_name (file, buf);
18078 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
18079 assemble_name (file, buf);
18080 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
18081 }
18082 else if (flag_pic == 1)
18083 {
18084 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
18085 asm_fprintf (file, "\tstw %s,4(%s)\n",
18086 reg_names[0], reg_names[1]);
18087 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
18088 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
18089 assemble_name (file, buf);
18090 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
18091 }
18092 else if (flag_pic > 1)
18093 {
18094 asm_fprintf (file, "\tstw %s,4(%s)\n",
18095 reg_names[0], reg_names[1]);
18096 /* Now, we need to get the address of the label. */
18097 if (TARGET_LINK_STACK)
18098 {
18099 char name[32];
18100 get_ppc476_thunk_name (name);
18101 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
18102 assemble_name (file, buf);
18103 fputs ("-.\n1:", file);
18104 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
18105 asm_fprintf (file, "\taddi %s,%s,4\n",
18106 reg_names[11], reg_names[11]);
18107 }
18108 else
18109 {
18110 fputs ("\tbcl 20,31,1f\n\t.long ", file);
18111 assemble_name (file, buf);
18112 fputs ("-.\n1:", file);
18113 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
18114 }
18115 asm_fprintf (file, "\tlwz %s,0(%s)\n",
18116 reg_names[0], reg_names[11]);
18117 asm_fprintf (file, "\tadd %s,%s,%s\n",
18118 reg_names[0], reg_names[0], reg_names[11]);
18119 }
18120 else
18121 {
18122 asm_fprintf (file, "\tlis %s,", reg_names[12]);
18123 assemble_name (file, buf);
18124 fputs ("@ha\n", file);
18125 asm_fprintf (file, "\tstw %s,4(%s)\n",
18126 reg_names[0], reg_names[1]);
18127 asm_fprintf (file, "\tla %s,", reg_names[0]);
18128 assemble_name (file, buf);
18129 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
18130 }
18131
18132 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
18133 fprintf (file, "\tbl %s%s\n",
18134 RS6000_MCOUNT, flag_pic ? "@plt" : "");
18135 break;
18136
18137 case ABI_AIX:
18138 case ABI_ELFv2:
18139 case ABI_DARWIN:
18140 /* Don't do anything, done in output_profile_hook (). */
18141 break;
18142 }
18143 }
18144
18145 \f
18146
18147 /* The following variable value is the last issued insn. */
18148
18149 static rtx_insn *last_scheduled_insn;
18150
18151 /* The following variable helps to balance issuing of load and
18152 store instructions */
18153
18154 static int load_store_pendulum;
18155
18156 /* The following variable helps pair divide insns during scheduling. */
18157 static int divide_cnt;
18158 /* The following variable helps pair and alternate vector and vector load
18159 insns during scheduling. */
18160 static int vec_pairing;
18161
18162
18163 /* Power4 load update and store update instructions are cracked into a
18164 load or store and an integer insn which are executed in the same cycle.
18165 Branches have their own dispatch slot which does not count against the
18166 GCC issue rate, but it changes the program flow so there are no other
18167 instructions to issue in this cycle. */
18168
18169 static int
18170 rs6000_variable_issue_1 (rtx_insn *insn, int more)
18171 {
18172 last_scheduled_insn = insn;
18173 if (GET_CODE (PATTERN (insn)) == USE
18174 || GET_CODE (PATTERN (insn)) == CLOBBER)
18175 {
18176 cached_can_issue_more = more;
18177 return cached_can_issue_more;
18178 }
18179
18180 if (insn_terminates_group_p (insn, current_group))
18181 {
18182 cached_can_issue_more = 0;
18183 return cached_can_issue_more;
18184 }
18185
18186 /* If no reservation, but reach here */
18187 if (recog_memoized (insn) < 0)
18188 return more;
18189
18190 if (rs6000_sched_groups)
18191 {
18192 if (is_microcoded_insn (insn))
18193 cached_can_issue_more = 0;
18194 else if (is_cracked_insn (insn))
18195 cached_can_issue_more = more > 2 ? more - 2 : 0;
18196 else
18197 cached_can_issue_more = more - 1;
18198
18199 return cached_can_issue_more;
18200 }
18201
18202 if (rs6000_tune == PROCESSOR_CELL && is_nonpipeline_insn (insn))
18203 return 0;
18204
18205 cached_can_issue_more = more - 1;
18206 return cached_can_issue_more;
18207 }
18208
18209 static int
18210 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
18211 {
18212 int r = rs6000_variable_issue_1 (insn, more);
18213 if (verbose)
18214 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
18215 return r;
18216 }
18217
18218 /* Adjust the cost of a scheduling dependency. Return the new cost of
18219 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
18220
18221 static int
18222 rs6000_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
18223 unsigned int)
18224 {
18225 enum attr_type attr_type;
18226
18227 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
18228 return cost;
18229
18230 switch (dep_type)
18231 {
18232 case REG_DEP_TRUE:
18233 {
18234 /* Data dependency; DEP_INSN writes a register that INSN reads
18235 some cycles later. */
18236
18237 /* Separate a load from a narrower, dependent store. */
18238 if ((rs6000_sched_groups || rs6000_tune == PROCESSOR_POWER9
18239 || rs6000_tune == PROCESSOR_POWER10)
18240 && GET_CODE (PATTERN (insn)) == SET
18241 && GET_CODE (PATTERN (dep_insn)) == SET
18242 && MEM_P (XEXP (PATTERN (insn), 1))
18243 && MEM_P (XEXP (PATTERN (dep_insn), 0))
18244 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
18245 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
18246 return cost + 14;
18247
18248 attr_type = get_attr_type (insn);
18249
18250 switch (attr_type)
18251 {
18252 case TYPE_JMPREG:
18253 /* Tell the first scheduling pass about the latency between
18254 a mtctr and bctr (and mtlr and br/blr). The first
18255 scheduling pass will not know about this latency since
18256 the mtctr instruction, which has the latency associated
18257 to it, will be generated by reload. */
18258 return 4;
18259 case TYPE_BRANCH:
18260 /* Leave some extra cycles between a compare and its
18261 dependent branch, to inhibit expensive mispredicts. */
18262 if ((rs6000_tune == PROCESSOR_PPC603
18263 || rs6000_tune == PROCESSOR_PPC604
18264 || rs6000_tune == PROCESSOR_PPC604e
18265 || rs6000_tune == PROCESSOR_PPC620
18266 || rs6000_tune == PROCESSOR_PPC630
18267 || rs6000_tune == PROCESSOR_PPC750
18268 || rs6000_tune == PROCESSOR_PPC7400
18269 || rs6000_tune == PROCESSOR_PPC7450
18270 || rs6000_tune == PROCESSOR_PPCE5500
18271 || rs6000_tune == PROCESSOR_PPCE6500
18272 || rs6000_tune == PROCESSOR_POWER4
18273 || rs6000_tune == PROCESSOR_POWER5
18274 || rs6000_tune == PROCESSOR_POWER7
18275 || rs6000_tune == PROCESSOR_POWER8
18276 || rs6000_tune == PROCESSOR_POWER9
18277 || rs6000_tune == PROCESSOR_POWER10
18278 || rs6000_tune == PROCESSOR_CELL)
18279 && recog_memoized (dep_insn)
18280 && (INSN_CODE (dep_insn) >= 0))
18281
18282 switch (get_attr_type (dep_insn))
18283 {
18284 case TYPE_CMP:
18285 case TYPE_FPCOMPARE:
18286 case TYPE_CR_LOGICAL:
18287 return cost + 2;
18288 case TYPE_EXTS:
18289 case TYPE_MUL:
18290 if (get_attr_dot (dep_insn) == DOT_YES)
18291 return cost + 2;
18292 else
18293 break;
18294 case TYPE_SHIFT:
18295 if (get_attr_dot (dep_insn) == DOT_YES
18296 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
18297 return cost + 2;
18298 else
18299 break;
18300 default:
18301 break;
18302 }
18303 break;
18304
18305 case TYPE_STORE:
18306 case TYPE_FPSTORE:
18307 if ((rs6000_tune == PROCESSOR_POWER6)
18308 && recog_memoized (dep_insn)
18309 && (INSN_CODE (dep_insn) >= 0))
18310 {
18311
18312 if (GET_CODE (PATTERN (insn)) != SET)
18313 /* If this happens, we have to extend this to schedule
18314 optimally. Return default for now. */
18315 return cost;
18316
18317 /* Adjust the cost for the case where the value written
18318 by a fixed point operation is used as the address
18319 gen value on a store. */
18320 switch (get_attr_type (dep_insn))
18321 {
18322 case TYPE_LOAD:
18323 case TYPE_CNTLZ:
18324 {
18325 if (! rs6000_store_data_bypass_p (dep_insn, insn))
18326 return get_attr_sign_extend (dep_insn)
18327 == SIGN_EXTEND_YES ? 6 : 4;
18328 break;
18329 }
18330 case TYPE_SHIFT:
18331 {
18332 if (! rs6000_store_data_bypass_p (dep_insn, insn))
18333 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
18334 6 : 3;
18335 break;
18336 }
18337 case TYPE_INTEGER:
18338 case TYPE_ADD:
18339 case TYPE_LOGICAL:
18340 case TYPE_EXTS:
18341 case TYPE_INSERT:
18342 {
18343 if (! rs6000_store_data_bypass_p (dep_insn, insn))
18344 return 3;
18345 break;
18346 }
18347 case TYPE_STORE:
18348 case TYPE_FPLOAD:
18349 case TYPE_FPSTORE:
18350 {
18351 if (get_attr_update (dep_insn) == UPDATE_YES
18352 && ! rs6000_store_data_bypass_p (dep_insn, insn))
18353 return 3;
18354 break;
18355 }
18356 case TYPE_MUL:
18357 {
18358 if (! rs6000_store_data_bypass_p (dep_insn, insn))
18359 return 17;
18360 break;
18361 }
18362 case TYPE_DIV:
18363 {
18364 if (! rs6000_store_data_bypass_p (dep_insn, insn))
18365 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
18366 break;
18367 }
18368 default:
18369 break;
18370 }
18371 }
18372 break;
18373
18374 case TYPE_LOAD:
18375 if ((rs6000_tune == PROCESSOR_POWER6)
18376 && recog_memoized (dep_insn)
18377 && (INSN_CODE (dep_insn) >= 0))
18378 {
18379
18380 /* Adjust the cost for the case where the value written
18381 by a fixed point instruction is used within the address
18382 gen portion of a subsequent load(u)(x) */
18383 switch (get_attr_type (dep_insn))
18384 {
18385 case TYPE_LOAD:
18386 case TYPE_CNTLZ:
18387 {
18388 if (set_to_load_agen (dep_insn, insn))
18389 return get_attr_sign_extend (dep_insn)
18390 == SIGN_EXTEND_YES ? 6 : 4;
18391 break;
18392 }
18393 case TYPE_SHIFT:
18394 {
18395 if (set_to_load_agen (dep_insn, insn))
18396 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
18397 6 : 3;
18398 break;
18399 }
18400 case TYPE_INTEGER:
18401 case TYPE_ADD:
18402 case TYPE_LOGICAL:
18403 case TYPE_EXTS:
18404 case TYPE_INSERT:
18405 {
18406 if (set_to_load_agen (dep_insn, insn))
18407 return 3;
18408 break;
18409 }
18410 case TYPE_STORE:
18411 case TYPE_FPLOAD:
18412 case TYPE_FPSTORE:
18413 {
18414 if (get_attr_update (dep_insn) == UPDATE_YES
18415 && set_to_load_agen (dep_insn, insn))
18416 return 3;
18417 break;
18418 }
18419 case TYPE_MUL:
18420 {
18421 if (set_to_load_agen (dep_insn, insn))
18422 return 17;
18423 break;
18424 }
18425 case TYPE_DIV:
18426 {
18427 if (set_to_load_agen (dep_insn, insn))
18428 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
18429 break;
18430 }
18431 default:
18432 break;
18433 }
18434 }
18435 break;
18436
18437 default:
18438 break;
18439 }
18440
18441 /* Fall out to return default cost. */
18442 }
18443 break;
18444
18445 case REG_DEP_OUTPUT:
18446 /* Output dependency; DEP_INSN writes a register that INSN writes some
18447 cycles later. */
18448 if ((rs6000_tune == PROCESSOR_POWER6)
18449 && recog_memoized (dep_insn)
18450 && (INSN_CODE (dep_insn) >= 0))
18451 {
18452 attr_type = get_attr_type (insn);
18453
18454 switch (attr_type)
18455 {
18456 case TYPE_FP:
18457 case TYPE_FPSIMPLE:
18458 if (get_attr_type (dep_insn) == TYPE_FP
18459 || get_attr_type (dep_insn) == TYPE_FPSIMPLE)
18460 return 1;
18461 break;
18462 default:
18463 break;
18464 }
18465 }
18466 /* Fall through, no cost for output dependency. */
18467 /* FALLTHRU */
18468
18469 case REG_DEP_ANTI:
18470 /* Anti dependency; DEP_INSN reads a register that INSN writes some
18471 cycles later. */
18472 return 0;
18473
18474 default:
18475 gcc_unreachable ();
18476 }
18477
18478 return cost;
18479 }
18480
18481 /* Debug version of rs6000_adjust_cost. */
18482
18483 static int
18484 rs6000_debug_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
18485 int cost, unsigned int dw)
18486 {
18487 int ret = rs6000_adjust_cost (insn, dep_type, dep_insn, cost, dw);
18488
18489 if (ret != cost)
18490 {
18491 const char *dep;
18492
18493 switch (dep_type)
18494 {
18495 default: dep = "unknown depencency"; break;
18496 case REG_DEP_TRUE: dep = "data dependency"; break;
18497 case REG_DEP_OUTPUT: dep = "output dependency"; break;
18498 case REG_DEP_ANTI: dep = "anti depencency"; break;
18499 }
18500
18501 fprintf (stderr,
18502 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
18503 "%s, insn:\n", ret, cost, dep);
18504
18505 debug_rtx (insn);
18506 }
18507
18508 return ret;
18509 }
18510
18511 /* The function returns a true if INSN is microcoded.
18512 Return false otherwise. */
18513
18514 static bool
18515 is_microcoded_insn (rtx_insn *insn)
18516 {
18517 if (!insn || !NONDEBUG_INSN_P (insn)
18518 || GET_CODE (PATTERN (insn)) == USE
18519 || GET_CODE (PATTERN (insn)) == CLOBBER)
18520 return false;
18521
18522 if (rs6000_tune == PROCESSOR_CELL)
18523 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
18524
18525 if (rs6000_sched_groups
18526 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
18527 {
18528 enum attr_type type = get_attr_type (insn);
18529 if ((type == TYPE_LOAD
18530 && get_attr_update (insn) == UPDATE_YES
18531 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
18532 || ((type == TYPE_LOAD || type == TYPE_STORE)
18533 && get_attr_update (insn) == UPDATE_YES
18534 && get_attr_indexed (insn) == INDEXED_YES)
18535 || type == TYPE_MFCR)
18536 return true;
18537 }
18538
18539 return false;
18540 }
18541
18542 /* The function returns true if INSN is cracked into 2 instructions
18543 by the processor (and therefore occupies 2 issue slots). */
18544
18545 static bool
18546 is_cracked_insn (rtx_insn *insn)
18547 {
18548 if (!insn || !NONDEBUG_INSN_P (insn)
18549 || GET_CODE (PATTERN (insn)) == USE
18550 || GET_CODE (PATTERN (insn)) == CLOBBER)
18551 return false;
18552
18553 if (rs6000_sched_groups
18554 && (rs6000_tune == PROCESSOR_POWER4 || rs6000_tune == PROCESSOR_POWER5))
18555 {
18556 enum attr_type type = get_attr_type (insn);
18557 if ((type == TYPE_LOAD
18558 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
18559 && get_attr_update (insn) == UPDATE_NO)
18560 || (type == TYPE_LOAD
18561 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
18562 && get_attr_update (insn) == UPDATE_YES
18563 && get_attr_indexed (insn) == INDEXED_NO)
18564 || (type == TYPE_STORE
18565 && get_attr_update (insn) == UPDATE_YES
18566 && get_attr_indexed (insn) == INDEXED_NO)
18567 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
18568 && get_attr_update (insn) == UPDATE_YES)
18569 || (type == TYPE_CR_LOGICAL
18570 && get_attr_cr_logical_3op (insn) == CR_LOGICAL_3OP_YES)
18571 || (type == TYPE_EXTS
18572 && get_attr_dot (insn) == DOT_YES)
18573 || (type == TYPE_SHIFT
18574 && get_attr_dot (insn) == DOT_YES
18575 && get_attr_var_shift (insn) == VAR_SHIFT_NO)
18576 || (type == TYPE_MUL
18577 && get_attr_dot (insn) == DOT_YES)
18578 || type == TYPE_DIV
18579 || (type == TYPE_INSERT
18580 && get_attr_size (insn) == SIZE_32))
18581 return true;
18582 }
18583
18584 return false;
18585 }
18586
18587 /* The function returns true if INSN can be issued only from
18588 the branch slot. */
18589
18590 static bool
18591 is_branch_slot_insn (rtx_insn *insn)
18592 {
18593 if (!insn || !NONDEBUG_INSN_P (insn)
18594 || GET_CODE (PATTERN (insn)) == USE
18595 || GET_CODE (PATTERN (insn)) == CLOBBER)
18596 return false;
18597
18598 if (rs6000_sched_groups)
18599 {
18600 enum attr_type type = get_attr_type (insn);
18601 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
18602 return true;
18603 return false;
18604 }
18605
18606 return false;
18607 }
18608
18609 /* The function returns true if out_inst sets a value that is
18610 used in the address generation computation of in_insn */
18611 static bool
18612 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
18613 {
18614 rtx out_set, in_set;
18615
18616 /* For performance reasons, only handle the simple case where
18617 both loads are a single_set. */
18618 out_set = single_set (out_insn);
18619 if (out_set)
18620 {
18621 in_set = single_set (in_insn);
18622 if (in_set)
18623 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
18624 }
18625
18626 return false;
18627 }
18628
18629 /* Try to determine base/offset/size parts of the given MEM.
18630 Return true if successful, false if all the values couldn't
18631 be determined.
18632
18633 This function only looks for REG or REG+CONST address forms.
18634 REG+REG address form will return false. */
18635
18636 static bool
18637 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
18638 HOST_WIDE_INT *size)
18639 {
18640 rtx addr_rtx;
18641 if (MEM_SIZE_KNOWN_P (mem))
18642 *size = MEM_SIZE (mem);
18643 else
18644 return false;
18645
18646 addr_rtx = (XEXP (mem, 0));
18647 if (GET_CODE (addr_rtx) == PRE_MODIFY)
18648 addr_rtx = XEXP (addr_rtx, 1);
18649
18650 *offset = 0;
18651 while (GET_CODE (addr_rtx) == PLUS
18652 && CONST_INT_P (XEXP (addr_rtx, 1)))
18653 {
18654 *offset += INTVAL (XEXP (addr_rtx, 1));
18655 addr_rtx = XEXP (addr_rtx, 0);
18656 }
18657 if (!REG_P (addr_rtx))
18658 return false;
18659
18660 *base = addr_rtx;
18661 return true;
18662 }
18663
18664 /* If the target storage locations of arguments MEM1 and MEM2 are
18665 adjacent, then return the argument that has the lower address.
18666 Otherwise, return NULL_RTX. */
18667
18668 static rtx
18669 adjacent_mem_locations (rtx mem1, rtx mem2)
18670 {
18671 rtx reg1, reg2;
18672 HOST_WIDE_INT off1, size1, off2, size2;
18673
18674 if (MEM_P (mem1)
18675 && MEM_P (mem2)
18676 && get_memref_parts (mem1, &reg1, &off1, &size1)
18677 && get_memref_parts (mem2, &reg2, &off2, &size2)
18678 && REGNO (reg1) == REGNO (reg2))
18679 {
18680 if (off1 + size1 == off2)
18681 return mem1;
18682 else if (off2 + size2 == off1)
18683 return mem2;
18684 }
18685
18686 return NULL_RTX;
18687 }
18688
18689 /* This function returns true if it can be determined that the two MEM
18690 locations overlap by at least 1 byte based on base reg/offset/size. */
18691
18692 static bool
18693 mem_locations_overlap (rtx mem1, rtx mem2)
18694 {
18695 rtx reg1, reg2;
18696 HOST_WIDE_INT off1, size1, off2, size2;
18697
18698 if (get_memref_parts (mem1, &reg1, &off1, &size1)
18699 && get_memref_parts (mem2, &reg2, &off2, &size2))
18700 return ((REGNO (reg1) == REGNO (reg2))
18701 && (((off1 <= off2) && (off1 + size1 > off2))
18702 || ((off2 <= off1) && (off2 + size2 > off1))));
18703
18704 return false;
18705 }
18706
18707 /* A C statement (sans semicolon) to update the integer scheduling
18708 priority INSN_PRIORITY (INSN). Increase the priority to execute the
18709 INSN earlier, reduce the priority to execute INSN later. Do not
18710 define this macro if you do not need to adjust the scheduling
18711 priorities of insns. */
18712
18713 static int
18714 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
18715 {
18716 rtx load_mem, str_mem;
18717 /* On machines (like the 750) which have asymmetric integer units,
18718 where one integer unit can do multiply and divides and the other
18719 can't, reduce the priority of multiply/divide so it is scheduled
18720 before other integer operations. */
18721
18722 #if 0
18723 if (! INSN_P (insn))
18724 return priority;
18725
18726 if (GET_CODE (PATTERN (insn)) == USE)
18727 return priority;
18728
18729 switch (rs6000_tune) {
18730 case PROCESSOR_PPC750:
18731 switch (get_attr_type (insn))
18732 {
18733 default:
18734 break;
18735
18736 case TYPE_MUL:
18737 case TYPE_DIV:
18738 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
18739 priority, priority);
18740 if (priority >= 0 && priority < 0x01000000)
18741 priority >>= 3;
18742 break;
18743 }
18744 }
18745 #endif
18746
18747 if (insn_must_be_first_in_group (insn)
18748 && reload_completed
18749 && current_sched_info->sched_max_insns_priority
18750 && rs6000_sched_restricted_insns_priority)
18751 {
18752
18753 /* Prioritize insns that can be dispatched only in the first
18754 dispatch slot. */
18755 if (rs6000_sched_restricted_insns_priority == 1)
18756 /* Attach highest priority to insn. This means that in
18757 haifa-sched.cc:ready_sort(), dispatch-slot restriction considerations
18758 precede 'priority' (critical path) considerations. */
18759 return current_sched_info->sched_max_insns_priority;
18760 else if (rs6000_sched_restricted_insns_priority == 2)
18761 /* Increase priority of insn by a minimal amount. This means that in
18762 haifa-sched.cc:ready_sort(), only 'priority' (critical path)
18763 considerations precede dispatch-slot restriction considerations. */
18764 return (priority + 1);
18765 }
18766
18767 if (rs6000_tune == PROCESSOR_POWER6
18768 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
18769 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
18770 /* Attach highest priority to insn if the scheduler has just issued two
18771 stores and this instruction is a load, or two loads and this instruction
18772 is a store. Power6 wants loads and stores scheduled alternately
18773 when possible */
18774 return current_sched_info->sched_max_insns_priority;
18775
18776 return priority;
18777 }
18778
18779 /* Return true if the instruction is nonpipelined on the Cell. */
18780 static bool
18781 is_nonpipeline_insn (rtx_insn *insn)
18782 {
18783 enum attr_type type;
18784 if (!insn || !NONDEBUG_INSN_P (insn)
18785 || GET_CODE (PATTERN (insn)) == USE
18786 || GET_CODE (PATTERN (insn)) == CLOBBER)
18787 return false;
18788
18789 type = get_attr_type (insn);
18790 if (type == TYPE_MUL
18791 || type == TYPE_DIV
18792 || type == TYPE_SDIV
18793 || type == TYPE_DDIV
18794 || type == TYPE_SSQRT
18795 || type == TYPE_DSQRT
18796 || type == TYPE_MFCR
18797 || type == TYPE_MFCRF
18798 || type == TYPE_MFJMPR)
18799 {
18800 return true;
18801 }
18802 return false;
18803 }
18804
18805
18806 /* Return how many instructions the machine can issue per cycle. */
18807
18808 static int
18809 rs6000_issue_rate (void)
18810 {
18811 /* Unless scheduling for register pressure, use issue rate of 1 for
18812 first scheduling pass to decrease degradation. */
18813 if (!reload_completed && !flag_sched_pressure)
18814 return 1;
18815
18816 switch (rs6000_tune) {
18817 case PROCESSOR_RS64A:
18818 case PROCESSOR_PPC601: /* ? */
18819 case PROCESSOR_PPC7450:
18820 return 3;
18821 case PROCESSOR_PPC440:
18822 case PROCESSOR_PPC603:
18823 case PROCESSOR_PPC750:
18824 case PROCESSOR_PPC7400:
18825 case PROCESSOR_PPC8540:
18826 case PROCESSOR_PPC8548:
18827 case PROCESSOR_CELL:
18828 case PROCESSOR_PPCE300C2:
18829 case PROCESSOR_PPCE300C3:
18830 case PROCESSOR_PPCE500MC:
18831 case PROCESSOR_PPCE500MC64:
18832 case PROCESSOR_PPCE5500:
18833 case PROCESSOR_PPCE6500:
18834 case PROCESSOR_TITAN:
18835 return 2;
18836 case PROCESSOR_PPC476:
18837 case PROCESSOR_PPC604:
18838 case PROCESSOR_PPC604e:
18839 case PROCESSOR_PPC620:
18840 case PROCESSOR_PPC630:
18841 return 4;
18842 case PROCESSOR_POWER4:
18843 case PROCESSOR_POWER5:
18844 case PROCESSOR_POWER6:
18845 case PROCESSOR_POWER7:
18846 return 5;
18847 case PROCESSOR_POWER8:
18848 return 7;
18849 case PROCESSOR_POWER9:
18850 return 6;
18851 case PROCESSOR_POWER10:
18852 return 8;
18853 default:
18854 return 1;
18855 }
18856 }
18857
18858 /* Return how many instructions to look ahead for better insn
18859 scheduling. */
18860
18861 static int
18862 rs6000_use_sched_lookahead (void)
18863 {
18864 switch (rs6000_tune)
18865 {
18866 case PROCESSOR_PPC8540:
18867 case PROCESSOR_PPC8548:
18868 return 4;
18869
18870 case PROCESSOR_CELL:
18871 return (reload_completed ? 8 : 0);
18872
18873 default:
18874 return 0;
18875 }
18876 }
18877
18878 /* We are choosing insn from the ready queue. Return zero if INSN can be
18879 chosen. */
18880 static int
18881 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
18882 {
18883 if (ready_index == 0)
18884 return 0;
18885
18886 if (rs6000_tune != PROCESSOR_CELL)
18887 return 0;
18888
18889 gcc_assert (insn != NULL_RTX && INSN_P (insn));
18890
18891 if (!reload_completed
18892 || is_nonpipeline_insn (insn)
18893 || is_microcoded_insn (insn))
18894 return 1;
18895
18896 return 0;
18897 }
18898
18899 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
18900 and return true. */
18901
18902 static bool
18903 find_mem_ref (rtx pat, rtx *mem_ref)
18904 {
18905 const char * fmt;
18906 int i, j;
18907
18908 /* stack_tie does not produce any real memory traffic. */
18909 if (tie_operand (pat, VOIDmode))
18910 return false;
18911
18912 if (MEM_P (pat))
18913 {
18914 *mem_ref = pat;
18915 return true;
18916 }
18917
18918 /* Recursively process the pattern. */
18919 fmt = GET_RTX_FORMAT (GET_CODE (pat));
18920
18921 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
18922 {
18923 if (fmt[i] == 'e')
18924 {
18925 if (find_mem_ref (XEXP (pat, i), mem_ref))
18926 return true;
18927 }
18928 else if (fmt[i] == 'E')
18929 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
18930 {
18931 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
18932 return true;
18933 }
18934 }
18935
18936 return false;
18937 }
18938
18939 /* Determine if PAT is a PATTERN of a load insn. */
18940
18941 static bool
18942 is_load_insn1 (rtx pat, rtx *load_mem)
18943 {
18944 if (!pat || pat == NULL_RTX)
18945 return false;
18946
18947 if (GET_CODE (pat) == SET)
18948 {
18949 if (REG_P (SET_DEST (pat)))
18950 return find_mem_ref (SET_SRC (pat), load_mem);
18951 else
18952 return false;
18953 }
18954
18955 if (GET_CODE (pat) == PARALLEL)
18956 {
18957 int i;
18958
18959 for (i = 0; i < XVECLEN (pat, 0); i++)
18960 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
18961 return true;
18962 }
18963
18964 return false;
18965 }
18966
18967 /* Determine if INSN loads from memory. */
18968
18969 static bool
18970 is_load_insn (rtx insn, rtx *load_mem)
18971 {
18972 if (!insn || !INSN_P (insn))
18973 return false;
18974
18975 if (CALL_P (insn))
18976 return false;
18977
18978 return is_load_insn1 (PATTERN (insn), load_mem);
18979 }
18980
18981 /* Determine if PAT is a PATTERN of a store insn. */
18982
18983 static bool
18984 is_store_insn1 (rtx pat, rtx *str_mem)
18985 {
18986 if (!pat || pat == NULL_RTX)
18987 return false;
18988
18989 if (GET_CODE (pat) == SET)
18990 {
18991 if (REG_P (SET_SRC (pat)) || SUBREG_P (SET_SRC (pat)))
18992 return find_mem_ref (SET_DEST (pat), str_mem);
18993 else
18994 return false;
18995 }
18996
18997 if (GET_CODE (pat) == PARALLEL)
18998 {
18999 int i;
19000
19001 for (i = 0; i < XVECLEN (pat, 0); i++)
19002 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
19003 return true;
19004 }
19005
19006 return false;
19007 }
19008
19009 /* Determine if INSN stores to memory. */
19010
19011 static bool
19012 is_store_insn (rtx insn, rtx *str_mem)
19013 {
19014 if (!insn || !INSN_P (insn))
19015 return false;
19016
19017 return is_store_insn1 (PATTERN (insn), str_mem);
19018 }
19019
19020 /* Return whether TYPE is a Power9 pairable vector instruction type. */
19021
19022 static bool
19023 is_power9_pairable_vec_type (enum attr_type type)
19024 {
19025 switch (type)
19026 {
19027 case TYPE_VECSIMPLE:
19028 case TYPE_VECCOMPLEX:
19029 case TYPE_VECDIV:
19030 case TYPE_VECCMP:
19031 case TYPE_VECPERM:
19032 case TYPE_VECFLOAT:
19033 case TYPE_VECFDIV:
19034 case TYPE_VECDOUBLE:
19035 return true;
19036 default:
19037 break;
19038 }
19039 return false;
19040 }
19041
19042 /* Returns whether the dependence between INSN and NEXT is considered
19043 costly by the given target. */
19044
19045 static bool
19046 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
19047 {
19048 rtx insn;
19049 rtx next;
19050 rtx load_mem, str_mem;
19051
19052 /* If the flag is not enabled - no dependence is considered costly;
19053 allow all dependent insns in the same group.
19054 This is the most aggressive option. */
19055 if (rs6000_sched_costly_dep == no_dep_costly)
19056 return false;
19057
19058 /* If the flag is set to 1 - a dependence is always considered costly;
19059 do not allow dependent instructions in the same group.
19060 This is the most conservative option. */
19061 if (rs6000_sched_costly_dep == all_deps_costly)
19062 return true;
19063
19064 insn = DEP_PRO (dep);
19065 next = DEP_CON (dep);
19066
19067 if (rs6000_sched_costly_dep == store_to_load_dep_costly
19068 && is_load_insn (next, &load_mem)
19069 && is_store_insn (insn, &str_mem))
19070 /* Prevent load after store in the same group. */
19071 return true;
19072
19073 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
19074 && is_load_insn (next, &load_mem)
19075 && is_store_insn (insn, &str_mem)
19076 && DEP_TYPE (dep) == REG_DEP_TRUE
19077 && mem_locations_overlap(str_mem, load_mem))
19078 /* Prevent load after store in the same group if it is a true
19079 dependence. */
19080 return true;
19081
19082 /* The flag is set to X; dependences with latency >= X are considered costly,
19083 and will not be scheduled in the same group. */
19084 if (rs6000_sched_costly_dep <= max_dep_latency
19085 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
19086 return true;
19087
19088 return false;
19089 }
19090
19091 /* Return the next insn after INSN that is found before TAIL is reached,
19092 skipping any "non-active" insns - insns that will not actually occupy
19093 an issue slot. Return NULL_RTX if such an insn is not found. */
19094
19095 static rtx_insn *
19096 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
19097 {
19098 if (insn == NULL_RTX || insn == tail)
19099 return NULL;
19100
19101 while (1)
19102 {
19103 insn = NEXT_INSN (insn);
19104 if (insn == NULL_RTX || insn == tail)
19105 return NULL;
19106
19107 if (CALL_P (insn)
19108 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
19109 || (NONJUMP_INSN_P (insn)
19110 && GET_CODE (PATTERN (insn)) != USE
19111 && GET_CODE (PATTERN (insn)) != CLOBBER
19112 && INSN_CODE (insn) != CODE_FOR_stack_tie))
19113 break;
19114 }
19115 return insn;
19116 }
19117
19118 /* Move instruction at POS to the end of the READY list. */
19119
19120 static void
19121 move_to_end_of_ready (rtx_insn **ready, int pos, int lastpos)
19122 {
19123 rtx_insn *tmp;
19124 int i;
19125
19126 tmp = ready[pos];
19127 for (i = pos; i < lastpos; i++)
19128 ready[i] = ready[i + 1];
19129 ready[lastpos] = tmp;
19130 }
19131
19132 /* Do Power6 specific sched_reorder2 reordering of ready list. */
19133
19134 static int
19135 power6_sched_reorder2 (rtx_insn **ready, int lastpos)
19136 {
19137 /* For Power6, we need to handle some special cases to try and keep the
19138 store queue from overflowing and triggering expensive flushes.
19139
19140 This code monitors how load and store instructions are being issued
19141 and skews the ready list one way or the other to increase the likelihood
19142 that a desired instruction is issued at the proper time.
19143
19144 A couple of things are done. First, we maintain a "load_store_pendulum"
19145 to track the current state of load/store issue.
19146
19147 - If the pendulum is at zero, then no loads or stores have been
19148 issued in the current cycle so we do nothing.
19149
19150 - If the pendulum is 1, then a single load has been issued in this
19151 cycle and we attempt to locate another load in the ready list to
19152 issue with it.
19153
19154 - If the pendulum is -2, then two stores have already been
19155 issued in this cycle, so we increase the priority of the first load
19156 in the ready list to increase it's likelihood of being chosen first
19157 in the next cycle.
19158
19159 - If the pendulum is -1, then a single store has been issued in this
19160 cycle and we attempt to locate another store in the ready list to
19161 issue with it, preferring a store to an adjacent memory location to
19162 facilitate store pairing in the store queue.
19163
19164 - If the pendulum is 2, then two loads have already been
19165 issued in this cycle, so we increase the priority of the first store
19166 in the ready list to increase it's likelihood of being chosen first
19167 in the next cycle.
19168
19169 - If the pendulum < -2 or > 2, then do nothing.
19170
19171 Note: This code covers the most common scenarios. There exist non
19172 load/store instructions which make use of the LSU and which
19173 would need to be accounted for to strictly model the behavior
19174 of the machine. Those instructions are currently unaccounted
19175 for to help minimize compile time overhead of this code.
19176 */
19177 int pos;
19178 rtx load_mem, str_mem;
19179
19180 if (is_store_insn (last_scheduled_insn, &str_mem))
19181 /* Issuing a store, swing the load_store_pendulum to the left */
19182 load_store_pendulum--;
19183 else if (is_load_insn (last_scheduled_insn, &load_mem))
19184 /* Issuing a load, swing the load_store_pendulum to the right */
19185 load_store_pendulum++;
19186 else
19187 return cached_can_issue_more;
19188
19189 /* If the pendulum is balanced, or there is only one instruction on
19190 the ready list, then all is well, so return. */
19191 if ((load_store_pendulum == 0) || (lastpos <= 0))
19192 return cached_can_issue_more;
19193
19194 if (load_store_pendulum == 1)
19195 {
19196 /* A load has been issued in this cycle. Scan the ready list
19197 for another load to issue with it */
19198 pos = lastpos;
19199
19200 while (pos >= 0)
19201 {
19202 if (is_load_insn (ready[pos], &load_mem))
19203 {
19204 /* Found a load. Move it to the head of the ready list,
19205 and adjust it's priority so that it is more likely to
19206 stay there */
19207 move_to_end_of_ready (ready, pos, lastpos);
19208
19209 if (!sel_sched_p ()
19210 && INSN_PRIORITY_KNOWN (ready[lastpos]))
19211 INSN_PRIORITY (ready[lastpos])++;
19212 break;
19213 }
19214 pos--;
19215 }
19216 }
19217 else if (load_store_pendulum == -2)
19218 {
19219 /* Two stores have been issued in this cycle. Increase the
19220 priority of the first load in the ready list to favor it for
19221 issuing in the next cycle. */
19222 pos = lastpos;
19223
19224 while (pos >= 0)
19225 {
19226 if (is_load_insn (ready[pos], &load_mem)
19227 && !sel_sched_p ()
19228 && INSN_PRIORITY_KNOWN (ready[pos]))
19229 {
19230 INSN_PRIORITY (ready[pos])++;
19231
19232 /* Adjust the pendulum to account for the fact that a load
19233 was found and increased in priority. This is to prevent
19234 increasing the priority of multiple loads */
19235 load_store_pendulum--;
19236
19237 break;
19238 }
19239 pos--;
19240 }
19241 }
19242 else if (load_store_pendulum == -1)
19243 {
19244 /* A store has been issued in this cycle. Scan the ready list for
19245 another store to issue with it, preferring a store to an adjacent
19246 memory location */
19247 int first_store_pos = -1;
19248
19249 pos = lastpos;
19250
19251 while (pos >= 0)
19252 {
19253 if (is_store_insn (ready[pos], &str_mem))
19254 {
19255 rtx str_mem2;
19256 /* Maintain the index of the first store found on the
19257 list */
19258 if (first_store_pos == -1)
19259 first_store_pos = pos;
19260
19261 if (is_store_insn (last_scheduled_insn, &str_mem2)
19262 && adjacent_mem_locations (str_mem, str_mem2))
19263 {
19264 /* Found an adjacent store. Move it to the head of the
19265 ready list, and adjust it's priority so that it is
19266 more likely to stay there */
19267 move_to_end_of_ready (ready, pos, lastpos);
19268
19269 if (!sel_sched_p ()
19270 && INSN_PRIORITY_KNOWN (ready[lastpos]))
19271 INSN_PRIORITY (ready[lastpos])++;
19272
19273 first_store_pos = -1;
19274
19275 break;
19276 };
19277 }
19278 pos--;
19279 }
19280
19281 if (first_store_pos >= 0)
19282 {
19283 /* An adjacent store wasn't found, but a non-adjacent store was,
19284 so move the non-adjacent store to the front of the ready
19285 list, and adjust its priority so that it is more likely to
19286 stay there. */
19287 move_to_end_of_ready (ready, first_store_pos, lastpos);
19288 if (!sel_sched_p ()
19289 && INSN_PRIORITY_KNOWN (ready[lastpos]))
19290 INSN_PRIORITY (ready[lastpos])++;
19291 }
19292 }
19293 else if (load_store_pendulum == 2)
19294 {
19295 /* Two loads have been issued in this cycle. Increase the priority
19296 of the first store in the ready list to favor it for issuing in
19297 the next cycle. */
19298 pos = lastpos;
19299
19300 while (pos >= 0)
19301 {
19302 if (is_store_insn (ready[pos], &str_mem)
19303 && !sel_sched_p ()
19304 && INSN_PRIORITY_KNOWN (ready[pos]))
19305 {
19306 INSN_PRIORITY (ready[pos])++;
19307
19308 /* Adjust the pendulum to account for the fact that a store
19309 was found and increased in priority. This is to prevent
19310 increasing the priority of multiple stores */
19311 load_store_pendulum++;
19312
19313 break;
19314 }
19315 pos--;
19316 }
19317 }
19318
19319 return cached_can_issue_more;
19320 }
19321
19322 /* Do Power9 specific sched_reorder2 reordering of ready list. */
19323
19324 static int
19325 power9_sched_reorder2 (rtx_insn **ready, int lastpos)
19326 {
19327 int pos;
19328 enum attr_type type, type2;
19329
19330 type = get_attr_type (last_scheduled_insn);
19331
19332 /* Try to issue fixed point divides back-to-back in pairs so they will be
19333 routed to separate execution units and execute in parallel. */
19334 if (type == TYPE_DIV && divide_cnt == 0)
19335 {
19336 /* First divide has been scheduled. */
19337 divide_cnt = 1;
19338
19339 /* Scan the ready list looking for another divide, if found move it
19340 to the end of the list so it is chosen next. */
19341 pos = lastpos;
19342 while (pos >= 0)
19343 {
19344 if (recog_memoized (ready[pos]) >= 0
19345 && get_attr_type (ready[pos]) == TYPE_DIV)
19346 {
19347 move_to_end_of_ready (ready, pos, lastpos);
19348 break;
19349 }
19350 pos--;
19351 }
19352 }
19353 else
19354 {
19355 /* Last insn was the 2nd divide or not a divide, reset the counter. */
19356 divide_cnt = 0;
19357
19358 /* The best dispatch throughput for vector and vector load insns can be
19359 achieved by interleaving a vector and vector load such that they'll
19360 dispatch to the same superslice. If this pairing cannot be achieved
19361 then it is best to pair vector insns together and vector load insns
19362 together.
19363
19364 To aid in this pairing, vec_pairing maintains the current state with
19365 the following values:
19366
19367 0 : Initial state, no vecload/vector pairing has been started.
19368
19369 1 : A vecload or vector insn has been issued and a candidate for
19370 pairing has been found and moved to the end of the ready
19371 list. */
19372 if (type == TYPE_VECLOAD)
19373 {
19374 /* Issued a vecload. */
19375 if (vec_pairing == 0)
19376 {
19377 int vecload_pos = -1;
19378 /* We issued a single vecload, look for a vector insn to pair it
19379 with. If one isn't found, try to pair another vecload. */
19380 pos = lastpos;
19381 while (pos >= 0)
19382 {
19383 if (recog_memoized (ready[pos]) >= 0)
19384 {
19385 type2 = get_attr_type (ready[pos]);
19386 if (is_power9_pairable_vec_type (type2))
19387 {
19388 /* Found a vector insn to pair with, move it to the
19389 end of the ready list so it is scheduled next. */
19390 move_to_end_of_ready (ready, pos, lastpos);
19391 vec_pairing = 1;
19392 return cached_can_issue_more;
19393 }
19394 else if (type2 == TYPE_VECLOAD && vecload_pos == -1)
19395 /* Remember position of first vecload seen. */
19396 vecload_pos = pos;
19397 }
19398 pos--;
19399 }
19400 if (vecload_pos >= 0)
19401 {
19402 /* Didn't find a vector to pair with but did find a vecload,
19403 move it to the end of the ready list. */
19404 move_to_end_of_ready (ready, vecload_pos, lastpos);
19405 vec_pairing = 1;
19406 return cached_can_issue_more;
19407 }
19408 }
19409 }
19410 else if (is_power9_pairable_vec_type (type))
19411 {
19412 /* Issued a vector operation. */
19413 if (vec_pairing == 0)
19414 {
19415 int vec_pos = -1;
19416 /* We issued a single vector insn, look for a vecload to pair it
19417 with. If one isn't found, try to pair another vector. */
19418 pos = lastpos;
19419 while (pos >= 0)
19420 {
19421 if (recog_memoized (ready[pos]) >= 0)
19422 {
19423 type2 = get_attr_type (ready[pos]);
19424 if (type2 == TYPE_VECLOAD)
19425 {
19426 /* Found a vecload insn to pair with, move it to the
19427 end of the ready list so it is scheduled next. */
19428 move_to_end_of_ready (ready, pos, lastpos);
19429 vec_pairing = 1;
19430 return cached_can_issue_more;
19431 }
19432 else if (is_power9_pairable_vec_type (type2)
19433 && vec_pos == -1)
19434 /* Remember position of first vector insn seen. */
19435 vec_pos = pos;
19436 }
19437 pos--;
19438 }
19439 if (vec_pos >= 0)
19440 {
19441 /* Didn't find a vecload to pair with but did find a vector
19442 insn, move it to the end of the ready list. */
19443 move_to_end_of_ready (ready, vec_pos, lastpos);
19444 vec_pairing = 1;
19445 return cached_can_issue_more;
19446 }
19447 }
19448 }
19449
19450 /* We've either finished a vec/vecload pair, couldn't find an insn to
19451 continue the current pair, or the last insn had nothing to do with
19452 with pairing. In any case, reset the state. */
19453 vec_pairing = 0;
19454 }
19455
19456 return cached_can_issue_more;
19457 }
19458
19459 /* Determine if INSN is a store to memory that can be fused with a similar
19460 adjacent store. */
19461
19462 static bool
19463 is_fusable_store (rtx_insn *insn, rtx *str_mem)
19464 {
19465 /* Insn must be a non-prefixed base+disp form store. */
19466 if (is_store_insn (insn, str_mem)
19467 && get_attr_prefixed (insn) == PREFIXED_NO
19468 && get_attr_update (insn) == UPDATE_NO
19469 && get_attr_indexed (insn) == INDEXED_NO)
19470 {
19471 /* Further restrictions by mode and size. */
19472 if (!MEM_SIZE_KNOWN_P (*str_mem))
19473 return false;
19474
19475 machine_mode mode = GET_MODE (*str_mem);
19476 HOST_WIDE_INT size = MEM_SIZE (*str_mem);
19477
19478 if (INTEGRAL_MODE_P (mode))
19479 /* Must be word or dword size. */
19480 return (size == 4 || size == 8);
19481 else if (FLOAT_MODE_P (mode))
19482 /* Must be dword size. */
19483 return (size == 8);
19484 }
19485
19486 return false;
19487 }
19488
19489 /* Do Power10 specific reordering of the ready list. */
19490
19491 static int
19492 power10_sched_reorder (rtx_insn **ready, int lastpos)
19493 {
19494 rtx mem1;
19495
19496 /* Do store fusion during sched2 only. */
19497 if (!reload_completed)
19498 return cached_can_issue_more;
19499
19500 /* If the prior insn finished off a store fusion pair then simply
19501 reset the counter and return, nothing more to do. */
19502 if (load_store_pendulum != 0)
19503 {
19504 load_store_pendulum = 0;
19505 return cached_can_issue_more;
19506 }
19507
19508 /* Try to pair certain store insns to adjacent memory locations
19509 so that the hardware will fuse them to a single operation. */
19510 if (TARGET_P10_FUSION && is_fusable_store (last_scheduled_insn, &mem1))
19511 {
19512
19513 /* A fusable store was just scheduled. Scan the ready list for another
19514 store that it can fuse with. */
19515 int pos = lastpos;
19516 while (pos >= 0)
19517 {
19518 rtx mem2;
19519 /* GPR stores can be ascending or descending offsets, FPR/VSR stores
19520 must be ascending only. */
19521 if (is_fusable_store (ready[pos], &mem2)
19522 && ((INTEGRAL_MODE_P (GET_MODE (mem1))
19523 && adjacent_mem_locations (mem1, mem2))
19524 || (FLOAT_MODE_P (GET_MODE (mem1))
19525 && (adjacent_mem_locations (mem1, mem2) == mem1))))
19526 {
19527 /* Found a fusable store. Move it to the end of the ready list
19528 so it is scheduled next. */
19529 move_to_end_of_ready (ready, pos, lastpos);
19530
19531 load_store_pendulum = -1;
19532 break;
19533 }
19534 pos--;
19535 }
19536 }
19537
19538 return cached_can_issue_more;
19539 }
19540
19541 /* We are about to begin issuing insns for this clock cycle. */
19542
19543 static int
19544 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
19545 rtx_insn **ready ATTRIBUTE_UNUSED,
19546 int *pn_ready ATTRIBUTE_UNUSED,
19547 int clock_var ATTRIBUTE_UNUSED)
19548 {
19549 int n_ready = *pn_ready;
19550
19551 if (sched_verbose)
19552 fprintf (dump, "// rs6000_sched_reorder :\n");
19553
19554 /* Reorder the ready list, if the second to last ready insn
19555 is a nonepipeline insn. */
19556 if (rs6000_tune == PROCESSOR_CELL && n_ready > 1)
19557 {
19558 if (is_nonpipeline_insn (ready[n_ready - 1])
19559 && (recog_memoized (ready[n_ready - 2]) > 0))
19560 /* Simply swap first two insns. */
19561 std::swap (ready[n_ready - 1], ready[n_ready - 2]);
19562 }
19563
19564 if (rs6000_tune == PROCESSOR_POWER6)
19565 load_store_pendulum = 0;
19566
19567 /* Do Power10 dependent reordering. */
19568 if (rs6000_tune == PROCESSOR_POWER10 && last_scheduled_insn)
19569 power10_sched_reorder (ready, n_ready - 1);
19570
19571 return rs6000_issue_rate ();
19572 }
19573
19574 /* Like rs6000_sched_reorder, but called after issuing each insn. */
19575
19576 static int
19577 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
19578 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
19579 {
19580 if (sched_verbose)
19581 fprintf (dump, "// rs6000_sched_reorder2 :\n");
19582
19583 /* Do Power6 dependent reordering if necessary. */
19584 if (rs6000_tune == PROCESSOR_POWER6 && last_scheduled_insn)
19585 return power6_sched_reorder2 (ready, *pn_ready - 1);
19586
19587 /* Do Power9 dependent reordering if necessary. */
19588 if (rs6000_tune == PROCESSOR_POWER9 && last_scheduled_insn
19589 && recog_memoized (last_scheduled_insn) >= 0)
19590 return power9_sched_reorder2 (ready, *pn_ready - 1);
19591
19592 /* Do Power10 dependent reordering. */
19593 if (rs6000_tune == PROCESSOR_POWER10 && last_scheduled_insn)
19594 return power10_sched_reorder (ready, *pn_ready - 1);
19595
19596 return cached_can_issue_more;
19597 }
19598
19599 /* Return whether the presence of INSN causes a dispatch group termination
19600 of group WHICH_GROUP.
19601
19602 If WHICH_GROUP == current_group, this function will return true if INSN
19603 causes the termination of the current group (i.e, the dispatch group to
19604 which INSN belongs). This means that INSN will be the last insn in the
19605 group it belongs to.
19606
19607 If WHICH_GROUP == previous_group, this function will return true if INSN
19608 causes the termination of the previous group (i.e, the dispatch group that
19609 precedes the group to which INSN belongs). This means that INSN will be
19610 the first insn in the group it belongs to). */
19611
19612 static bool
19613 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
19614 {
19615 bool first, last;
19616
19617 if (! insn)
19618 return false;
19619
19620 first = insn_must_be_first_in_group (insn);
19621 last = insn_must_be_last_in_group (insn);
19622
19623 if (first && last)
19624 return true;
19625
19626 if (which_group == current_group)
19627 return last;
19628 else if (which_group == previous_group)
19629 return first;
19630
19631 return false;
19632 }
19633
19634
19635 static bool
19636 insn_must_be_first_in_group (rtx_insn *insn)
19637 {
19638 enum attr_type type;
19639
19640 if (!insn
19641 || NOTE_P (insn)
19642 || DEBUG_INSN_P (insn)
19643 || GET_CODE (PATTERN (insn)) == USE
19644 || GET_CODE (PATTERN (insn)) == CLOBBER)
19645 return false;
19646
19647 switch (rs6000_tune)
19648 {
19649 case PROCESSOR_POWER5:
19650 if (is_cracked_insn (insn))
19651 return true;
19652 /* FALLTHRU */
19653 case PROCESSOR_POWER4:
19654 if (is_microcoded_insn (insn))
19655 return true;
19656
19657 if (!rs6000_sched_groups)
19658 return false;
19659
19660 type = get_attr_type (insn);
19661
19662 switch (type)
19663 {
19664 case TYPE_MFCR:
19665 case TYPE_MFCRF:
19666 case TYPE_MTCR:
19667 case TYPE_CR_LOGICAL:
19668 case TYPE_MTJMPR:
19669 case TYPE_MFJMPR:
19670 case TYPE_DIV:
19671 case TYPE_LOAD_L:
19672 case TYPE_STORE_C:
19673 case TYPE_ISYNC:
19674 case TYPE_SYNC:
19675 return true;
19676 default:
19677 break;
19678 }
19679 break;
19680 case PROCESSOR_POWER6:
19681 type = get_attr_type (insn);
19682
19683 switch (type)
19684 {
19685 case TYPE_EXTS:
19686 case TYPE_CNTLZ:
19687 case TYPE_TRAP:
19688 case TYPE_MUL:
19689 case TYPE_INSERT:
19690 case TYPE_FPCOMPARE:
19691 case TYPE_MFCR:
19692 case TYPE_MTCR:
19693 case TYPE_MFJMPR:
19694 case TYPE_MTJMPR:
19695 case TYPE_ISYNC:
19696 case TYPE_SYNC:
19697 case TYPE_LOAD_L:
19698 case TYPE_STORE_C:
19699 return true;
19700 case TYPE_SHIFT:
19701 if (get_attr_dot (insn) == DOT_NO
19702 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
19703 return true;
19704 else
19705 break;
19706 case TYPE_DIV:
19707 if (get_attr_size (insn) == SIZE_32)
19708 return true;
19709 else
19710 break;
19711 case TYPE_LOAD:
19712 case TYPE_STORE:
19713 case TYPE_FPLOAD:
19714 case TYPE_FPSTORE:
19715 if (get_attr_update (insn) == UPDATE_YES)
19716 return true;
19717 else
19718 break;
19719 default:
19720 break;
19721 }
19722 break;
19723 case PROCESSOR_POWER7:
19724 type = get_attr_type (insn);
19725
19726 switch (type)
19727 {
19728 case TYPE_CR_LOGICAL:
19729 case TYPE_MFCR:
19730 case TYPE_MFCRF:
19731 case TYPE_MTCR:
19732 case TYPE_DIV:
19733 case TYPE_ISYNC:
19734 case TYPE_LOAD_L:
19735 case TYPE_STORE_C:
19736 case TYPE_MFJMPR:
19737 case TYPE_MTJMPR:
19738 return true;
19739 case TYPE_MUL:
19740 case TYPE_SHIFT:
19741 case TYPE_EXTS:
19742 if (get_attr_dot (insn) == DOT_YES)
19743 return true;
19744 else
19745 break;
19746 case TYPE_LOAD:
19747 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
19748 || get_attr_update (insn) == UPDATE_YES)
19749 return true;
19750 else
19751 break;
19752 case TYPE_STORE:
19753 case TYPE_FPLOAD:
19754 case TYPE_FPSTORE:
19755 if (get_attr_update (insn) == UPDATE_YES)
19756 return true;
19757 else
19758 break;
19759 default:
19760 break;
19761 }
19762 break;
19763 case PROCESSOR_POWER8:
19764 type = get_attr_type (insn);
19765
19766 switch (type)
19767 {
19768 case TYPE_CR_LOGICAL:
19769 case TYPE_MFCR:
19770 case TYPE_MFCRF:
19771 case TYPE_MTCR:
19772 case TYPE_SYNC:
19773 case TYPE_ISYNC:
19774 case TYPE_LOAD_L:
19775 case TYPE_STORE_C:
19776 case TYPE_VECSTORE:
19777 case TYPE_MFJMPR:
19778 case TYPE_MTJMPR:
19779 return true;
19780 case TYPE_SHIFT:
19781 case TYPE_EXTS:
19782 case TYPE_MUL:
19783 if (get_attr_dot (insn) == DOT_YES)
19784 return true;
19785 else
19786 break;
19787 case TYPE_LOAD:
19788 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
19789 || get_attr_update (insn) == UPDATE_YES)
19790 return true;
19791 else
19792 break;
19793 case TYPE_STORE:
19794 if (get_attr_update (insn) == UPDATE_YES
19795 && get_attr_indexed (insn) == INDEXED_YES)
19796 return true;
19797 else
19798 break;
19799 default:
19800 break;
19801 }
19802 break;
19803 default:
19804 break;
19805 }
19806
19807 return false;
19808 }
19809
19810 static bool
19811 insn_must_be_last_in_group (rtx_insn *insn)
19812 {
19813 enum attr_type type;
19814
19815 if (!insn
19816 || NOTE_P (insn)
19817 || DEBUG_INSN_P (insn)
19818 || GET_CODE (PATTERN (insn)) == USE
19819 || GET_CODE (PATTERN (insn)) == CLOBBER)
19820 return false;
19821
19822 switch (rs6000_tune) {
19823 case PROCESSOR_POWER4:
19824 case PROCESSOR_POWER5:
19825 if (is_microcoded_insn (insn))
19826 return true;
19827
19828 if (is_branch_slot_insn (insn))
19829 return true;
19830
19831 break;
19832 case PROCESSOR_POWER6:
19833 type = get_attr_type (insn);
19834
19835 switch (type)
19836 {
19837 case TYPE_EXTS:
19838 case TYPE_CNTLZ:
19839 case TYPE_TRAP:
19840 case TYPE_MUL:
19841 case TYPE_FPCOMPARE:
19842 case TYPE_MFCR:
19843 case TYPE_MTCR:
19844 case TYPE_MFJMPR:
19845 case TYPE_MTJMPR:
19846 case TYPE_ISYNC:
19847 case TYPE_SYNC:
19848 case TYPE_LOAD_L:
19849 case TYPE_STORE_C:
19850 return true;
19851 case TYPE_SHIFT:
19852 if (get_attr_dot (insn) == DOT_NO
19853 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
19854 return true;
19855 else
19856 break;
19857 case TYPE_DIV:
19858 if (get_attr_size (insn) == SIZE_32)
19859 return true;
19860 else
19861 break;
19862 default:
19863 break;
19864 }
19865 break;
19866 case PROCESSOR_POWER7:
19867 type = get_attr_type (insn);
19868
19869 switch (type)
19870 {
19871 case TYPE_ISYNC:
19872 case TYPE_SYNC:
19873 case TYPE_LOAD_L:
19874 case TYPE_STORE_C:
19875 return true;
19876 case TYPE_LOAD:
19877 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
19878 && get_attr_update (insn) == UPDATE_YES)
19879 return true;
19880 else
19881 break;
19882 case TYPE_STORE:
19883 if (get_attr_update (insn) == UPDATE_YES
19884 && get_attr_indexed (insn) == INDEXED_YES)
19885 return true;
19886 else
19887 break;
19888 default:
19889 break;
19890 }
19891 break;
19892 case PROCESSOR_POWER8:
19893 type = get_attr_type (insn);
19894
19895 switch (type)
19896 {
19897 case TYPE_MFCR:
19898 case TYPE_MTCR:
19899 case TYPE_ISYNC:
19900 case TYPE_SYNC:
19901 case TYPE_LOAD_L:
19902 case TYPE_STORE_C:
19903 return true;
19904 case TYPE_LOAD:
19905 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
19906 && get_attr_update (insn) == UPDATE_YES)
19907 return true;
19908 else
19909 break;
19910 case TYPE_STORE:
19911 if (get_attr_update (insn) == UPDATE_YES
19912 && get_attr_indexed (insn) == INDEXED_YES)
19913 return true;
19914 else
19915 break;
19916 default:
19917 break;
19918 }
19919 break;
19920 default:
19921 break;
19922 }
19923
19924 return false;
19925 }
19926
19927 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
19928 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
19929
19930 static bool
19931 is_costly_group (rtx *group_insns, rtx next_insn)
19932 {
19933 int i;
19934 int issue_rate = rs6000_issue_rate ();
19935
19936 for (i = 0; i < issue_rate; i++)
19937 {
19938 sd_iterator_def sd_it;
19939 dep_t dep;
19940 rtx insn = group_insns[i];
19941
19942 if (!insn)
19943 continue;
19944
19945 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
19946 {
19947 rtx next = DEP_CON (dep);
19948
19949 if (next == next_insn
19950 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
19951 return true;
19952 }
19953 }
19954
19955 return false;
19956 }
19957
19958 /* Utility of the function redefine_groups.
19959 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
19960 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
19961 to keep it "far" (in a separate group) from GROUP_INSNS, following
19962 one of the following schemes, depending on the value of the flag
19963 -minsert_sched_nops = X:
19964 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
19965 in order to force NEXT_INSN into a separate group.
19966 (2) X < sched_finish_regroup_exact: insert exactly X nops.
19967 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
19968 insertion (has a group just ended, how many vacant issue slots remain in the
19969 last group, and how many dispatch groups were encountered so far). */
19970
19971 static int
19972 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
19973 rtx_insn *next_insn, bool *group_end, int can_issue_more,
19974 int *group_count)
19975 {
19976 rtx nop;
19977 bool force;
19978 int issue_rate = rs6000_issue_rate ();
19979 bool end = *group_end;
19980 int i;
19981
19982 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
19983 return can_issue_more;
19984
19985 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
19986 return can_issue_more;
19987
19988 force = is_costly_group (group_insns, next_insn);
19989 if (!force)
19990 return can_issue_more;
19991
19992 if (sched_verbose > 6)
19993 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
19994 *group_count ,can_issue_more);
19995
19996 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
19997 {
19998 if (*group_end)
19999 can_issue_more = 0;
20000
20001 /* Since only a branch can be issued in the last issue_slot, it is
20002 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
20003 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
20004 in this case the last nop will start a new group and the branch
20005 will be forced to the new group. */
20006 if (can_issue_more && !is_branch_slot_insn (next_insn))
20007 can_issue_more--;
20008
20009 /* Do we have a special group ending nop? */
20010 if (rs6000_tune == PROCESSOR_POWER6 || rs6000_tune == PROCESSOR_POWER7
20011 || rs6000_tune == PROCESSOR_POWER8)
20012 {
20013 nop = gen_group_ending_nop ();
20014 emit_insn_before (nop, next_insn);
20015 can_issue_more = 0;
20016 }
20017 else
20018 while (can_issue_more > 0)
20019 {
20020 nop = gen_nop ();
20021 emit_insn_before (nop, next_insn);
20022 can_issue_more--;
20023 }
20024
20025 *group_end = true;
20026 return 0;
20027 }
20028
20029 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
20030 {
20031 int n_nops = rs6000_sched_insert_nops;
20032
20033 /* Nops can't be issued from the branch slot, so the effective
20034 issue_rate for nops is 'issue_rate - 1'. */
20035 if (can_issue_more == 0)
20036 can_issue_more = issue_rate;
20037 can_issue_more--;
20038 if (can_issue_more == 0)
20039 {
20040 can_issue_more = issue_rate - 1;
20041 (*group_count)++;
20042 end = true;
20043 for (i = 0; i < issue_rate; i++)
20044 {
20045 group_insns[i] = 0;
20046 }
20047 }
20048
20049 while (n_nops > 0)
20050 {
20051 nop = gen_nop ();
20052 emit_insn_before (nop, next_insn);
20053 if (can_issue_more == issue_rate - 1) /* new group begins */
20054 end = false;
20055 can_issue_more--;
20056 if (can_issue_more == 0)
20057 {
20058 can_issue_more = issue_rate - 1;
20059 (*group_count)++;
20060 end = true;
20061 for (i = 0; i < issue_rate; i++)
20062 {
20063 group_insns[i] = 0;
20064 }
20065 }
20066 n_nops--;
20067 }
20068
20069 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
20070 can_issue_more++;
20071
20072 /* Is next_insn going to start a new group? */
20073 *group_end
20074 = (end
20075 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
20076 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
20077 || (can_issue_more < issue_rate &&
20078 insn_terminates_group_p (next_insn, previous_group)));
20079 if (*group_end && end)
20080 (*group_count)--;
20081
20082 if (sched_verbose > 6)
20083 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
20084 *group_count, can_issue_more);
20085 return can_issue_more;
20086 }
20087
20088 return can_issue_more;
20089 }
20090
20091 /* This function tries to synch the dispatch groups that the compiler "sees"
20092 with the dispatch groups that the processor dispatcher is expected to
20093 form in practice. It tries to achieve this synchronization by forcing the
20094 estimated processor grouping on the compiler (as opposed to the function
20095 'pad_goups' which tries to force the scheduler's grouping on the processor).
20096
20097 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
20098 examines the (estimated) dispatch groups that will be formed by the processor
20099 dispatcher. It marks these group boundaries to reflect the estimated
20100 processor grouping, overriding the grouping that the scheduler had marked.
20101 Depending on the value of the flag '-minsert-sched-nops' this function can
20102 force certain insns into separate groups or force a certain distance between
20103 them by inserting nops, for example, if there exists a "costly dependence"
20104 between the insns.
20105
20106 The function estimates the group boundaries that the processor will form as
20107 follows: It keeps track of how many vacant issue slots are available after
20108 each insn. A subsequent insn will start a new group if one of the following
20109 4 cases applies:
20110 - no more vacant issue slots remain in the current dispatch group.
20111 - only the last issue slot, which is the branch slot, is vacant, but the next
20112 insn is not a branch.
20113 - only the last 2 or less issue slots, including the branch slot, are vacant,
20114 which means that a cracked insn (which occupies two issue slots) can't be
20115 issued in this group.
20116 - less than 'issue_rate' slots are vacant, and the next insn always needs to
20117 start a new group. */
20118
20119 static int
20120 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
20121 rtx_insn *tail)
20122 {
20123 rtx_insn *insn, *next_insn;
20124 int issue_rate;
20125 int can_issue_more;
20126 int slot, i;
20127 bool group_end;
20128 int group_count = 0;
20129 rtx *group_insns;
20130
20131 /* Initialize. */
20132 issue_rate = rs6000_issue_rate ();
20133 group_insns = XALLOCAVEC (rtx, issue_rate);
20134 for (i = 0; i < issue_rate; i++)
20135 {
20136 group_insns[i] = 0;
20137 }
20138 can_issue_more = issue_rate;
20139 slot = 0;
20140 insn = get_next_active_insn (prev_head_insn, tail);
20141 group_end = false;
20142
20143 while (insn != NULL_RTX)
20144 {
20145 slot = (issue_rate - can_issue_more);
20146 group_insns[slot] = insn;
20147 can_issue_more =
20148 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
20149 if (insn_terminates_group_p (insn, current_group))
20150 can_issue_more = 0;
20151
20152 next_insn = get_next_active_insn (insn, tail);
20153 if (next_insn == NULL_RTX)
20154 return group_count + 1;
20155
20156 /* Is next_insn going to start a new group? */
20157 group_end
20158 = (can_issue_more == 0
20159 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
20160 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
20161 || (can_issue_more < issue_rate &&
20162 insn_terminates_group_p (next_insn, previous_group)));
20163
20164 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
20165 next_insn, &group_end, can_issue_more,
20166 &group_count);
20167
20168 if (group_end)
20169 {
20170 group_count++;
20171 can_issue_more = 0;
20172 for (i = 0; i < issue_rate; i++)
20173 {
20174 group_insns[i] = 0;
20175 }
20176 }
20177
20178 if (GET_MODE (next_insn) == TImode && can_issue_more)
20179 PUT_MODE (next_insn, VOIDmode);
20180 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
20181 PUT_MODE (next_insn, TImode);
20182
20183 insn = next_insn;
20184 if (can_issue_more == 0)
20185 can_issue_more = issue_rate;
20186 } /* while */
20187
20188 return group_count;
20189 }
20190
20191 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
20192 dispatch group boundaries that the scheduler had marked. Pad with nops
20193 any dispatch groups which have vacant issue slots, in order to force the
20194 scheduler's grouping on the processor dispatcher. The function
20195 returns the number of dispatch groups found. */
20196
20197 static int
20198 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
20199 rtx_insn *tail)
20200 {
20201 rtx_insn *insn, *next_insn;
20202 rtx nop;
20203 int issue_rate;
20204 int can_issue_more;
20205 int group_end;
20206 int group_count = 0;
20207
20208 /* Initialize issue_rate. */
20209 issue_rate = rs6000_issue_rate ();
20210 can_issue_more = issue_rate;
20211
20212 insn = get_next_active_insn (prev_head_insn, tail);
20213 next_insn = get_next_active_insn (insn, tail);
20214
20215 while (insn != NULL_RTX)
20216 {
20217 can_issue_more =
20218 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
20219
20220 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
20221
20222 if (next_insn == NULL_RTX)
20223 break;
20224
20225 if (group_end)
20226 {
20227 /* If the scheduler had marked group termination at this location
20228 (between insn and next_insn), and neither insn nor next_insn will
20229 force group termination, pad the group with nops to force group
20230 termination. */
20231 if (can_issue_more
20232 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
20233 && !insn_terminates_group_p (insn, current_group)
20234 && !insn_terminates_group_p (next_insn, previous_group))
20235 {
20236 if (!is_branch_slot_insn (next_insn))
20237 can_issue_more--;
20238
20239 while (can_issue_more)
20240 {
20241 nop = gen_nop ();
20242 emit_insn_before (nop, next_insn);
20243 can_issue_more--;
20244 }
20245 }
20246
20247 can_issue_more = issue_rate;
20248 group_count++;
20249 }
20250
20251 insn = next_insn;
20252 next_insn = get_next_active_insn (insn, tail);
20253 }
20254
20255 return group_count;
20256 }
20257
20258 /* We're beginning a new block. Initialize data structures as necessary. */
20259
20260 static void
20261 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
20262 int sched_verbose ATTRIBUTE_UNUSED,
20263 int max_ready ATTRIBUTE_UNUSED)
20264 {
20265 last_scheduled_insn = NULL;
20266 load_store_pendulum = 0;
20267 divide_cnt = 0;
20268 vec_pairing = 0;
20269 }
20270
20271 /* The following function is called at the end of scheduling BB.
20272 After reload, it inserts nops at insn group bundling. */
20273
20274 static void
20275 rs6000_sched_finish (FILE *dump, int sched_verbose)
20276 {
20277 int n_groups;
20278
20279 if (sched_verbose)
20280 fprintf (dump, "=== Finishing schedule.\n");
20281
20282 if (reload_completed && rs6000_sched_groups)
20283 {
20284 /* Do not run sched_finish hook when selective scheduling enabled. */
20285 if (sel_sched_p ())
20286 return;
20287
20288 if (rs6000_sched_insert_nops == sched_finish_none)
20289 return;
20290
20291 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
20292 n_groups = pad_groups (dump, sched_verbose,
20293 current_sched_info->prev_head,
20294 current_sched_info->next_tail);
20295 else
20296 n_groups = redefine_groups (dump, sched_verbose,
20297 current_sched_info->prev_head,
20298 current_sched_info->next_tail);
20299
20300 if (sched_verbose >= 6)
20301 {
20302 fprintf (dump, "ngroups = %d\n", n_groups);
20303 print_rtl (dump, current_sched_info->prev_head);
20304 fprintf (dump, "Done finish_sched\n");
20305 }
20306 }
20307 }
20308
20309 struct rs6000_sched_context
20310 {
20311 short cached_can_issue_more;
20312 rtx_insn *last_scheduled_insn;
20313 int load_store_pendulum;
20314 int divide_cnt;
20315 int vec_pairing;
20316 };
20317
20318 typedef struct rs6000_sched_context rs6000_sched_context_def;
20319 typedef rs6000_sched_context_def *rs6000_sched_context_t;
20320
20321 /* Allocate store for new scheduling context. */
20322 static void *
20323 rs6000_alloc_sched_context (void)
20324 {
20325 return xmalloc (sizeof (rs6000_sched_context_def));
20326 }
20327
20328 /* If CLEAN_P is true then initializes _SC with clean data,
20329 and from the global context otherwise. */
20330 static void
20331 rs6000_init_sched_context (void *_sc, bool clean_p)
20332 {
20333 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
20334
20335 if (clean_p)
20336 {
20337 sc->cached_can_issue_more = 0;
20338 sc->last_scheduled_insn = NULL;
20339 sc->load_store_pendulum = 0;
20340 sc->divide_cnt = 0;
20341 sc->vec_pairing = 0;
20342 }
20343 else
20344 {
20345 sc->cached_can_issue_more = cached_can_issue_more;
20346 sc->last_scheduled_insn = last_scheduled_insn;
20347 sc->load_store_pendulum = load_store_pendulum;
20348 sc->divide_cnt = divide_cnt;
20349 sc->vec_pairing = vec_pairing;
20350 }
20351 }
20352
20353 /* Sets the global scheduling context to the one pointed to by _SC. */
20354 static void
20355 rs6000_set_sched_context (void *_sc)
20356 {
20357 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
20358
20359 gcc_assert (sc != NULL);
20360
20361 cached_can_issue_more = sc->cached_can_issue_more;
20362 last_scheduled_insn = sc->last_scheduled_insn;
20363 load_store_pendulum = sc->load_store_pendulum;
20364 divide_cnt = sc->divide_cnt;
20365 vec_pairing = sc->vec_pairing;
20366 }
20367
20368 /* Free _SC. */
20369 static void
20370 rs6000_free_sched_context (void *_sc)
20371 {
20372 gcc_assert (_sc != NULL);
20373
20374 free (_sc);
20375 }
20376
20377 static bool
20378 rs6000_sched_can_speculate_insn (rtx_insn *insn)
20379 {
20380 switch (get_attr_type (insn))
20381 {
20382 case TYPE_DIV:
20383 case TYPE_SDIV:
20384 case TYPE_DDIV:
20385 case TYPE_VECDIV:
20386 case TYPE_SSQRT:
20387 case TYPE_DSQRT:
20388 return false;
20389
20390 default:
20391 return true;
20392 }
20393 }
20394 \f
20395 /* Length in units of the trampoline for entering a nested function. */
20396
20397 int
20398 rs6000_trampoline_size (void)
20399 {
20400 int ret = 0;
20401
20402 switch (DEFAULT_ABI)
20403 {
20404 default:
20405 gcc_unreachable ();
20406
20407 case ABI_AIX:
20408 ret = (TARGET_32BIT) ? 12 : 24;
20409 break;
20410
20411 case ABI_ELFv2:
20412 gcc_assert (!TARGET_32BIT);
20413 ret = 32;
20414 break;
20415
20416 case ABI_DARWIN:
20417 case ABI_V4:
20418 ret = (TARGET_32BIT) ? 40 : 48;
20419 break;
20420 }
20421
20422 return ret;
20423 }
20424
20425 /* Emit RTL insns to initialize the variable parts of a trampoline.
20426 FNADDR is an RTX for the address of the function's pure code.
20427 CXT is an RTX for the static chain value for the function. */
20428
20429 static void
20430 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
20431 {
20432 int regsize = (TARGET_32BIT) ? 4 : 8;
20433 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
20434 rtx ctx_reg = force_reg (Pmode, cxt);
20435 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
20436
20437 switch (DEFAULT_ABI)
20438 {
20439 default:
20440 gcc_unreachable ();
20441
20442 /* Under AIX, just build the 3 word function descriptor */
20443 case ABI_AIX:
20444 {
20445 rtx fnmem, fn_reg, toc_reg;
20446
20447 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
20448 error ("you cannot take the address of a nested function if you use "
20449 "the %qs option", "-mno-pointers-to-nested-functions");
20450
20451 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
20452 fn_reg = gen_reg_rtx (Pmode);
20453 toc_reg = gen_reg_rtx (Pmode);
20454
20455 /* Macro to shorten the code expansions below. */
20456 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
20457
20458 m_tramp = replace_equiv_address (m_tramp, addr);
20459
20460 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
20461 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
20462 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
20463 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
20464 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
20465
20466 # undef MEM_PLUS
20467 }
20468 break;
20469
20470 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
20471 case ABI_ELFv2:
20472 case ABI_DARWIN:
20473 case ABI_V4:
20474 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
20475 LCT_NORMAL, VOIDmode,
20476 addr, Pmode,
20477 GEN_INT (rs6000_trampoline_size ()), SImode,
20478 fnaddr, Pmode,
20479 ctx_reg, Pmode);
20480 break;
20481 }
20482 }
20483
20484 \f
20485 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
20486 identifier as an argument, so the front end shouldn't look it up. */
20487
20488 static bool
20489 rs6000_attribute_takes_identifier_p (const_tree attr_id)
20490 {
20491 return is_attribute_p ("altivec", attr_id);
20492 }
20493
20494 /* Handle the "altivec" attribute. The attribute may have
20495 arguments as follows:
20496
20497 __attribute__((altivec(vector__)))
20498 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
20499 __attribute__((altivec(bool__))) (always followed by 'unsigned')
20500
20501 and may appear more than once (e.g., 'vector bool char') in a
20502 given declaration. */
20503
20504 static tree
20505 rs6000_handle_altivec_attribute (tree *node,
20506 tree name ATTRIBUTE_UNUSED,
20507 tree args,
20508 int flags ATTRIBUTE_UNUSED,
20509 bool *no_add_attrs)
20510 {
20511 tree type = *node, result = NULL_TREE;
20512 machine_mode mode;
20513 int unsigned_p;
20514 char altivec_type
20515 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
20516 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
20517 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
20518 : '?');
20519
20520 while (POINTER_TYPE_P (type)
20521 || TREE_CODE (type) == FUNCTION_TYPE
20522 || TREE_CODE (type) == METHOD_TYPE
20523 || TREE_CODE (type) == ARRAY_TYPE)
20524 type = TREE_TYPE (type);
20525
20526 mode = TYPE_MODE (type);
20527
20528 /* Check for invalid AltiVec type qualifiers. */
20529 if (type == long_double_type_node)
20530 error ("use of %<long double%> in AltiVec types is invalid");
20531 else if (type == boolean_type_node)
20532 error ("use of boolean types in AltiVec types is invalid");
20533 else if (TREE_CODE (type) == COMPLEX_TYPE)
20534 error ("use of %<complex%> in AltiVec types is invalid");
20535 else if (DECIMAL_FLOAT_MODE_P (mode))
20536 error ("use of decimal floating-point types in AltiVec types is invalid");
20537 else if (!TARGET_VSX)
20538 {
20539 if (type == long_unsigned_type_node || type == long_integer_type_node)
20540 {
20541 if (TARGET_64BIT)
20542 error ("use of %<long%> in AltiVec types is invalid for "
20543 "64-bit code without %qs", "-mvsx");
20544 else if (rs6000_warn_altivec_long)
20545 warning (0, "use of %<long%> in AltiVec types is deprecated; "
20546 "use %<int%>");
20547 }
20548 else if (type == long_long_unsigned_type_node
20549 || type == long_long_integer_type_node)
20550 error ("use of %<long long%> in AltiVec types is invalid without %qs",
20551 "-mvsx");
20552 else if (type == double_type_node)
20553 error ("use of %<double%> in AltiVec types is invalid without %qs",
20554 "-mvsx");
20555 }
20556
20557 switch (altivec_type)
20558 {
20559 case 'v':
20560 unsigned_p = TYPE_UNSIGNED (type);
20561 switch (mode)
20562 {
20563 case E_TImode:
20564 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
20565 break;
20566 case E_DImode:
20567 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
20568 break;
20569 case E_SImode:
20570 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
20571 break;
20572 case E_HImode:
20573 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
20574 break;
20575 case E_QImode:
20576 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
20577 break;
20578 case E_SFmode: result = V4SF_type_node; break;
20579 case E_DFmode: result = V2DF_type_node; break;
20580 /* If the user says 'vector int bool', we may be handed the 'bool'
20581 attribute _before_ the 'vector' attribute, and so select the
20582 proper type in the 'b' case below. */
20583 case E_V4SImode: case E_V8HImode: case E_V16QImode: case E_V4SFmode:
20584 case E_V2DImode: case E_V2DFmode:
20585 result = type;
20586 default: break;
20587 }
20588 break;
20589 case 'b':
20590 switch (mode)
20591 {
20592 case E_TImode: case E_V1TImode: result = bool_V1TI_type_node; break;
20593 case E_DImode: case E_V2DImode: result = bool_V2DI_type_node; break;
20594 case E_SImode: case E_V4SImode: result = bool_V4SI_type_node; break;
20595 case E_HImode: case E_V8HImode: result = bool_V8HI_type_node; break;
20596 case E_QImode: case E_V16QImode: result = bool_V16QI_type_node;
20597 default: break;
20598 }
20599 break;
20600 case 'p':
20601 switch (mode)
20602 {
20603 case E_V8HImode: result = pixel_V8HI_type_node;
20604 default: break;
20605 }
20606 default: break;
20607 }
20608
20609 /* Propagate qualifiers attached to the element type
20610 onto the vector type. */
20611 if (result && result != type && TYPE_QUALS (type))
20612 result = build_qualified_type (result, TYPE_QUALS (type));
20613
20614 *no_add_attrs = true; /* No need to hang on to the attribute. */
20615
20616 if (result)
20617 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
20618
20619 return NULL_TREE;
20620 }
20621
20622 /* AltiVec defines five built-in scalar types that serve as vector
20623 elements; we must teach the compiler how to mangle them. The 128-bit
20624 floating point mangling is target-specific as well. MMA defines
20625 two built-in types to be used as opaque vector types. */
20626
20627 static const char *
20628 rs6000_mangle_type (const_tree type)
20629 {
20630 type = TYPE_MAIN_VARIANT (type);
20631
20632 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
20633 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE
20634 && TREE_CODE (type) != OPAQUE_TYPE)
20635 return NULL;
20636
20637 if (type == bool_char_type_node) return "U6__boolc";
20638 if (type == bool_short_type_node) return "U6__bools";
20639 if (type == pixel_type_node) return "u7__pixel";
20640 if (type == bool_int_type_node) return "U6__booli";
20641 if (type == bool_long_long_type_node) return "U6__boolx";
20642
20643 if (type == float128_type_node || type == float64x_type_node)
20644 return NULL;
20645
20646 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IBM_P (TYPE_MODE (type)))
20647 return "g";
20648 if (SCALAR_FLOAT_TYPE_P (type) && FLOAT128_IEEE_P (TYPE_MODE (type)))
20649 return "u9__ieee128";
20650
20651 if (type == vector_pair_type_node)
20652 return "u13__vector_pair";
20653 if (type == vector_quad_type_node)
20654 return "u13__vector_quad";
20655
20656 /* For all other types, use the default mangling. */
20657 return NULL;
20658 }
20659
20660 /* Handle a "longcall" or "shortcall" attribute; arguments as in
20661 struct attribute_spec.handler. */
20662
20663 static tree
20664 rs6000_handle_longcall_attribute (tree *node, tree name,
20665 tree args ATTRIBUTE_UNUSED,
20666 int flags ATTRIBUTE_UNUSED,
20667 bool *no_add_attrs)
20668 {
20669 if (TREE_CODE (*node) != FUNCTION_TYPE
20670 && TREE_CODE (*node) != FIELD_DECL
20671 && TREE_CODE (*node) != TYPE_DECL)
20672 {
20673 warning (OPT_Wattributes, "%qE attribute only applies to functions",
20674 name);
20675 *no_add_attrs = true;
20676 }
20677
20678 return NULL_TREE;
20679 }
20680
20681 /* Set longcall attributes on all functions declared when
20682 rs6000_default_long_calls is true. */
20683 static void
20684 rs6000_set_default_type_attributes (tree type)
20685 {
20686 if (rs6000_default_long_calls
20687 && FUNC_OR_METHOD_TYPE_P (type))
20688 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
20689 NULL_TREE,
20690 TYPE_ATTRIBUTES (type));
20691
20692 #if TARGET_MACHO
20693 darwin_set_default_type_attributes (type);
20694 #endif
20695 }
20696
20697 /* Return a reference suitable for calling a function with the
20698 longcall attribute. */
20699
20700 static rtx
20701 rs6000_longcall_ref (rtx call_ref, rtx arg)
20702 {
20703 /* System V adds '.' to the internal name, so skip them. */
20704 const char *call_name = XSTR (call_ref, 0);
20705 if (*call_name == '.')
20706 {
20707 while (*call_name == '.')
20708 call_name++;
20709
20710 tree node = get_identifier (call_name);
20711 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
20712 }
20713
20714 if (TARGET_PLTSEQ)
20715 {
20716 rtx base = const0_rtx;
20717 int regno = 12;
20718 if (rs6000_pcrel_p ())
20719 {
20720 rtx reg = gen_rtx_REG (Pmode, regno);
20721 rtx u = gen_rtx_UNSPEC_VOLATILE (Pmode,
20722 gen_rtvec (3, base, call_ref, arg),
20723 UNSPECV_PLT_PCREL);
20724 emit_insn (gen_rtx_SET (reg, u));
20725 return reg;
20726 }
20727
20728 if (DEFAULT_ABI == ABI_ELFv2)
20729 base = gen_rtx_REG (Pmode, TOC_REGISTER);
20730 else
20731 {
20732 if (flag_pic)
20733 base = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
20734 regno = 11;
20735 }
20736 /* Reg must match that used by linker PLT stubs. For ELFv2, r12
20737 may be used by a function global entry point. For SysV4, r11
20738 is used by __glink_PLTresolve lazy resolver entry. */
20739 rtx reg = gen_rtx_REG (Pmode, regno);
20740 rtx hi = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, base, call_ref, arg),
20741 UNSPEC_PLT16_HA);
20742 rtx lo = gen_rtx_UNSPEC_VOLATILE (Pmode,
20743 gen_rtvec (3, reg, call_ref, arg),
20744 UNSPECV_PLT16_LO);
20745 emit_insn (gen_rtx_SET (reg, hi));
20746 emit_insn (gen_rtx_SET (reg, lo));
20747 return reg;
20748 }
20749
20750 return force_reg (Pmode, call_ref);
20751 }
20752 \f
20753 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
20754 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
20755 #endif
20756
20757 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
20758 struct attribute_spec.handler. */
20759 static tree
20760 rs6000_handle_struct_attribute (tree *node, tree name,
20761 tree args ATTRIBUTE_UNUSED,
20762 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
20763 {
20764 tree *type = NULL;
20765 if (DECL_P (*node))
20766 {
20767 if (TREE_CODE (*node) == TYPE_DECL)
20768 type = &TREE_TYPE (*node);
20769 }
20770 else
20771 type = node;
20772
20773 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
20774 || TREE_CODE (*type) == UNION_TYPE)))
20775 {
20776 warning (OPT_Wattributes, "%qE attribute ignored", name);
20777 *no_add_attrs = true;
20778 }
20779
20780 else if ((is_attribute_p ("ms_struct", name)
20781 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
20782 || ((is_attribute_p ("gcc_struct", name)
20783 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
20784 {
20785 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
20786 name);
20787 *no_add_attrs = true;
20788 }
20789
20790 return NULL_TREE;
20791 }
20792
20793 static bool
20794 rs6000_ms_bitfield_layout_p (const_tree record_type)
20795 {
20796 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
20797 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
20798 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
20799 }
20800 \f
20801 #ifdef USING_ELFOS_H
20802
20803 /* A get_unnamed_section callback, used for switching to toc_section. */
20804
20805 static void
20806 rs6000_elf_output_toc_section_asm_op (const char *data ATTRIBUTE_UNUSED)
20807 {
20808 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
20809 && TARGET_MINIMAL_TOC)
20810 {
20811 if (!toc_initialized)
20812 {
20813 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
20814 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
20815 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
20816 fprintf (asm_out_file, "\t.tc ");
20817 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
20818 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
20819 fprintf (asm_out_file, "\n");
20820
20821 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
20822 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
20823 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
20824 fprintf (asm_out_file, " = .+32768\n");
20825 toc_initialized = 1;
20826 }
20827 else
20828 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
20829 }
20830 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
20831 {
20832 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
20833 if (!toc_initialized)
20834 {
20835 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
20836 toc_initialized = 1;
20837 }
20838 }
20839 else
20840 {
20841 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
20842 if (!toc_initialized)
20843 {
20844 ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2);
20845 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
20846 fprintf (asm_out_file, " = .+32768\n");
20847 toc_initialized = 1;
20848 }
20849 }
20850 }
20851
20852 /* Implement TARGET_ASM_INIT_SECTIONS. */
20853
20854 static void
20855 rs6000_elf_asm_init_sections (void)
20856 {
20857 toc_section
20858 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
20859
20860 sdata2_section
20861 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
20862 SDATA2_SECTION_ASM_OP);
20863 }
20864
20865 /* Implement TARGET_SELECT_RTX_SECTION. */
20866
20867 static section *
20868 rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
20869 unsigned HOST_WIDE_INT align)
20870 {
20871 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
20872 return toc_section;
20873 else
20874 return default_elf_select_rtx_section (mode, x, align);
20875 }
20876 \f
20877 /* For a SYMBOL_REF, set generic flags and then perform some
20878 target-specific processing.
20879
20880 When the AIX ABI is requested on a non-AIX system, replace the
20881 function name with the real name (with a leading .) rather than the
20882 function descriptor name. This saves a lot of overriding code to
20883 read the prefixes. */
20884
20885 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
20886 static void
20887 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
20888 {
20889 default_encode_section_info (decl, rtl, first);
20890
20891 if (first
20892 && TREE_CODE (decl) == FUNCTION_DECL
20893 && !TARGET_AIX
20894 && DEFAULT_ABI == ABI_AIX)
20895 {
20896 rtx sym_ref = XEXP (rtl, 0);
20897 size_t len = strlen (XSTR (sym_ref, 0));
20898 char *str = XALLOCAVEC (char, len + 2);
20899 str[0] = '.';
20900 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
20901 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
20902 }
20903 }
20904
20905 static inline bool
20906 compare_section_name (const char *section, const char *templ)
20907 {
20908 int len;
20909
20910 len = strlen (templ);
20911 return (strncmp (section, templ, len) == 0
20912 && (section[len] == 0 || section[len] == '.'));
20913 }
20914
20915 bool
20916 rs6000_elf_in_small_data_p (const_tree decl)
20917 {
20918 if (rs6000_sdata == SDATA_NONE)
20919 return false;
20920
20921 /* We want to merge strings, so we never consider them small data. */
20922 if (TREE_CODE (decl) == STRING_CST)
20923 return false;
20924
20925 /* Functions are never in the small data area. */
20926 if (TREE_CODE (decl) == FUNCTION_DECL)
20927 return false;
20928
20929 if (VAR_P (decl) && DECL_SECTION_NAME (decl))
20930 {
20931 const char *section = DECL_SECTION_NAME (decl);
20932 if (compare_section_name (section, ".sdata")
20933 || compare_section_name (section, ".sdata2")
20934 || compare_section_name (section, ".gnu.linkonce.s")
20935 || compare_section_name (section, ".sbss")
20936 || compare_section_name (section, ".sbss2")
20937 || compare_section_name (section, ".gnu.linkonce.sb")
20938 || strcmp (section, ".PPC.EMB.sdata0") == 0
20939 || strcmp (section, ".PPC.EMB.sbss0") == 0)
20940 return true;
20941 }
20942 else
20943 {
20944 /* If we are told not to put readonly data in sdata, then don't. */
20945 if (TREE_READONLY (decl) && rs6000_sdata != SDATA_EABI
20946 && !rs6000_readonly_in_sdata)
20947 return false;
20948
20949 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
20950
20951 if (size > 0
20952 && size <= g_switch_value
20953 /* If it's not public, and we're not going to reference it there,
20954 there's no need to put it in the small data section. */
20955 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
20956 return true;
20957 }
20958
20959 return false;
20960 }
20961
20962 #endif /* USING_ELFOS_H */
20963 \f
20964 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
20965
20966 static bool
20967 rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
20968 {
20969 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
20970 }
20971
20972 /* Do not place thread-local symbols refs in the object blocks. */
20973
20974 static bool
20975 rs6000_use_blocks_for_decl_p (const_tree decl)
20976 {
20977 return !DECL_THREAD_LOCAL_P (decl);
20978 }
20979 \f
20980 /* Return a REG that occurs in ADDR with coefficient 1.
20981 ADDR can be effectively incremented by incrementing REG.
20982
20983 r0 is special and we must not select it as an address
20984 register by this routine since our caller will try to
20985 increment the returned register via an "la" instruction. */
20986
20987 rtx
20988 find_addr_reg (rtx addr)
20989 {
20990 while (GET_CODE (addr) == PLUS)
20991 {
20992 if (REG_P (XEXP (addr, 0))
20993 && REGNO (XEXP (addr, 0)) != 0)
20994 addr = XEXP (addr, 0);
20995 else if (REG_P (XEXP (addr, 1))
20996 && REGNO (XEXP (addr, 1)) != 0)
20997 addr = XEXP (addr, 1);
20998 else if (CONSTANT_P (XEXP (addr, 0)))
20999 addr = XEXP (addr, 1);
21000 else if (CONSTANT_P (XEXP (addr, 1)))
21001 addr = XEXP (addr, 0);
21002 else
21003 gcc_unreachable ();
21004 }
21005 gcc_assert (REG_P (addr) && REGNO (addr) != 0);
21006 return addr;
21007 }
21008
21009 void
21010 rs6000_fatal_bad_address (rtx op)
21011 {
21012 fatal_insn ("bad address", op);
21013 }
21014
21015 #if TARGET_MACHO
21016
21017 vec<branch_island, va_gc> *branch_islands;
21018
21019 /* Remember to generate a branch island for far calls to the given
21020 function. */
21021
21022 static void
21023 add_compiler_branch_island (tree label_name, tree function_name,
21024 int line_number)
21025 {
21026 branch_island bi = {function_name, label_name, line_number};
21027 vec_safe_push (branch_islands, bi);
21028 }
21029
21030 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
21031 already there or not. */
21032
21033 static int
21034 no_previous_def (tree function_name)
21035 {
21036 branch_island *bi;
21037 unsigned ix;
21038
21039 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
21040 if (function_name == bi->function_name)
21041 return 0;
21042 return 1;
21043 }
21044
21045 /* GET_PREV_LABEL gets the label name from the previous definition of
21046 the function. */
21047
21048 static tree
21049 get_prev_label (tree function_name)
21050 {
21051 branch_island *bi;
21052 unsigned ix;
21053
21054 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
21055 if (function_name == bi->function_name)
21056 return bi->label_name;
21057 return NULL_TREE;
21058 }
21059
21060 /* Generate external symbol indirection stubs (PIC and non-PIC). */
21061
21062 void
21063 machopic_output_stub (FILE *file, const char *symb, const char *stub)
21064 {
21065 unsigned int length;
21066 char *symbol_name, *lazy_ptr_name;
21067 char *local_label_0;
21068 static unsigned label = 0;
21069
21070 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
21071 symb = (*targetm.strip_name_encoding) (symb);
21072
21073 length = strlen (symb);
21074 symbol_name = XALLOCAVEC (char, length + 32);
21075 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
21076
21077 lazy_ptr_name = XALLOCAVEC (char, length + 32);
21078 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
21079
21080 if (MACHOPIC_PURE)
21081 {
21082 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
21083 fprintf (file, "\t.align 5\n");
21084
21085 fprintf (file, "%s:\n", stub);
21086 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
21087
21088 label++;
21089 local_label_0 = XALLOCAVEC (char, 16);
21090 sprintf (local_label_0, "L%u$spb", label);
21091
21092 fprintf (file, "\tmflr r0\n");
21093 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
21094 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
21095 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
21096 lazy_ptr_name, local_label_0);
21097 fprintf (file, "\tmtlr r0\n");
21098 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
21099 (TARGET_64BIT ? "ldu" : "lwzu"),
21100 lazy_ptr_name, local_label_0);
21101 fprintf (file, "\tmtctr r12\n");
21102 fprintf (file, "\tbctr\n");
21103 }
21104 else /* mdynamic-no-pic or mkernel. */
21105 {
21106 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
21107 fprintf (file, "\t.align 4\n");
21108
21109 fprintf (file, "%s:\n", stub);
21110 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
21111
21112 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
21113 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
21114 (TARGET_64BIT ? "ldu" : "lwzu"),
21115 lazy_ptr_name);
21116 fprintf (file, "\tmtctr r12\n");
21117 fprintf (file, "\tbctr\n");
21118 }
21119
21120 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
21121 fprintf (file, "%s:\n", lazy_ptr_name);
21122 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
21123 fprintf (file, "%sdyld_stub_binding_helper\n",
21124 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
21125 }
21126
21127 /* Legitimize PIC addresses. If the address is already
21128 position-independent, we return ORIG. Newly generated
21129 position-independent addresses go into a reg. This is REG if non
21130 zero, otherwise we allocate register(s) as necessary. */
21131
21132 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
21133
21134 rtx
21135 rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
21136 rtx reg)
21137 {
21138 rtx base, offset;
21139
21140 if (reg == NULL && !reload_completed)
21141 reg = gen_reg_rtx (Pmode);
21142
21143 if (GET_CODE (orig) == CONST)
21144 {
21145 rtx reg_temp;
21146
21147 if (GET_CODE (XEXP (orig, 0)) == PLUS
21148 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
21149 return orig;
21150
21151 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
21152
21153 /* Use a different reg for the intermediate value, as
21154 it will be marked UNCHANGING. */
21155 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
21156 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
21157 Pmode, reg_temp);
21158 offset =
21159 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
21160 Pmode, reg);
21161
21162 if (CONST_INT_P (offset))
21163 {
21164 if (SMALL_INT (offset))
21165 return plus_constant (Pmode, base, INTVAL (offset));
21166 else if (!reload_completed)
21167 offset = force_reg (Pmode, offset);
21168 else
21169 {
21170 rtx mem = force_const_mem (Pmode, orig);
21171 return machopic_legitimize_pic_address (mem, Pmode, reg);
21172 }
21173 }
21174 return gen_rtx_PLUS (Pmode, base, offset);
21175 }
21176
21177 /* Fall back on generic machopic code. */
21178 return machopic_legitimize_pic_address (orig, mode, reg);
21179 }
21180
21181 /* Output a .machine directive for the Darwin assembler, and call
21182 the generic start_file routine. */
21183
21184 static void
21185 rs6000_darwin_file_start (void)
21186 {
21187 static const struct
21188 {
21189 const char *arg;
21190 const char *name;
21191 HOST_WIDE_INT if_set;
21192 } mapping[] = {
21193 { "ppc64", "ppc64", MASK_64BIT },
21194 { "970", "ppc970", OPTION_MASK_PPC_GPOPT | OPTION_MASK_MFCRF \
21195 | MASK_POWERPC64 },
21196 { "power4", "ppc970", 0 },
21197 { "G5", "ppc970", 0 },
21198 { "7450", "ppc7450", 0 },
21199 { "7400", "ppc7400", OPTION_MASK_ALTIVEC },
21200 { "G4", "ppc7400", 0 },
21201 { "750", "ppc750", 0 },
21202 { "740", "ppc750", 0 },
21203 { "G3", "ppc750", 0 },
21204 { "604e", "ppc604e", 0 },
21205 { "604", "ppc604", 0 },
21206 { "603e", "ppc603", 0 },
21207 { "603", "ppc603", 0 },
21208 { "601", "ppc601", 0 },
21209 { NULL, "ppc", 0 } };
21210 const char *cpu_id = "";
21211 size_t i;
21212
21213 rs6000_file_start ();
21214 darwin_file_start ();
21215
21216 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
21217
21218 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
21219 cpu_id = rs6000_default_cpu;
21220
21221 if (OPTION_SET_P (rs6000_cpu_index))
21222 cpu_id = processor_target_table[rs6000_cpu_index].name;
21223
21224 /* Look through the mapping array. Pick the first name that either
21225 matches the argument, has a bit set in IF_SET that is also set
21226 in the target flags, or has a NULL name. */
21227
21228 i = 0;
21229 while (mapping[i].arg != NULL
21230 && strcmp (mapping[i].arg, cpu_id) != 0
21231 && (mapping[i].if_set & rs6000_isa_flags) == 0)
21232 i++;
21233
21234 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
21235 }
21236
21237 #endif /* TARGET_MACHO */
21238
21239 #if TARGET_ELF
21240 static int
21241 rs6000_elf_reloc_rw_mask (void)
21242 {
21243 if (flag_pic)
21244 return 3;
21245 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
21246 return 2;
21247 else
21248 return 0;
21249 }
21250
21251 /* Record an element in the table of global constructors. SYMBOL is
21252 a SYMBOL_REF of the function to be called; PRIORITY is a number
21253 between 0 and MAX_INIT_PRIORITY.
21254
21255 This differs from default_named_section_asm_out_constructor in
21256 that we have special handling for -mrelocatable. */
21257
21258 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
21259 static void
21260 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
21261 {
21262 const char *section = ".ctors";
21263 char buf[18];
21264
21265 if (priority != DEFAULT_INIT_PRIORITY)
21266 {
21267 sprintf (buf, ".ctors.%.5u",
21268 /* Invert the numbering so the linker puts us in the proper
21269 order; constructors are run from right to left, and the
21270 linker sorts in increasing order. */
21271 MAX_INIT_PRIORITY - priority);
21272 section = buf;
21273 }
21274
21275 switch_to_section (get_section (section, SECTION_WRITE, NULL));
21276 assemble_align (POINTER_SIZE);
21277
21278 if (DEFAULT_ABI == ABI_V4
21279 && (TARGET_RELOCATABLE || flag_pic > 1))
21280 {
21281 fputs ("\t.long (", asm_out_file);
21282 output_addr_const (asm_out_file, symbol);
21283 fputs (")@fixup\n", asm_out_file);
21284 }
21285 else
21286 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
21287 }
21288
21289 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
21290 static void
21291 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
21292 {
21293 const char *section = ".dtors";
21294 char buf[18];
21295
21296 if (priority != DEFAULT_INIT_PRIORITY)
21297 {
21298 sprintf (buf, ".dtors.%.5u",
21299 /* Invert the numbering so the linker puts us in the proper
21300 order; constructors are run from right to left, and the
21301 linker sorts in increasing order. */
21302 MAX_INIT_PRIORITY - priority);
21303 section = buf;
21304 }
21305
21306 switch_to_section (get_section (section, SECTION_WRITE, NULL));
21307 assemble_align (POINTER_SIZE);
21308
21309 if (DEFAULT_ABI == ABI_V4
21310 && (TARGET_RELOCATABLE || flag_pic > 1))
21311 {
21312 fputs ("\t.long (", asm_out_file);
21313 output_addr_const (asm_out_file, symbol);
21314 fputs (")@fixup\n", asm_out_file);
21315 }
21316 else
21317 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
21318 }
21319
21320 void
21321 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
21322 {
21323 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
21324 {
21325 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
21326 ASM_OUTPUT_LABEL (file, name);
21327 fputs (DOUBLE_INT_ASM_OP, file);
21328 rs6000_output_function_entry (file, name);
21329 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
21330 if (DOT_SYMBOLS)
21331 {
21332 fputs ("\t.size\t", file);
21333 assemble_name (file, name);
21334 fputs (",24\n\t.type\t.", file);
21335 assemble_name (file, name);
21336 fputs (",@function\n", file);
21337 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
21338 {
21339 fputs ("\t.globl\t.", file);
21340 assemble_name (file, name);
21341 putc ('\n', file);
21342 }
21343 }
21344 else
21345 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
21346 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
21347 rs6000_output_function_entry (file, name);
21348 fputs (":\n", file);
21349 return;
21350 }
21351
21352 int uses_toc;
21353 if (DEFAULT_ABI == ABI_V4
21354 && (TARGET_RELOCATABLE || flag_pic > 1)
21355 && !TARGET_SECURE_PLT
21356 && (!constant_pool_empty_p () || crtl->profile)
21357 && (uses_toc = uses_TOC ()))
21358 {
21359 char buf[256];
21360
21361 if (uses_toc == 2)
21362 switch_to_other_text_partition ();
21363 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
21364
21365 fprintf (file, "\t.long ");
21366 assemble_name (file, toc_label_name);
21367 need_toc_init = 1;
21368 putc ('-', file);
21369 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
21370 assemble_name (file, buf);
21371 putc ('\n', file);
21372 if (uses_toc == 2)
21373 switch_to_other_text_partition ();
21374 }
21375
21376 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
21377 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
21378
21379 if (TARGET_CMODEL == CMODEL_LARGE
21380 && rs6000_global_entry_point_prologue_needed_p ())
21381 {
21382 char buf[256];
21383
21384 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
21385
21386 fprintf (file, "\t.quad .TOC.-");
21387 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
21388 assemble_name (file, buf);
21389 putc ('\n', file);
21390 }
21391
21392 if (DEFAULT_ABI == ABI_AIX)
21393 {
21394 const char *desc_name, *orig_name;
21395
21396 orig_name = (*targetm.strip_name_encoding) (name);
21397 desc_name = orig_name;
21398 while (*desc_name == '.')
21399 desc_name++;
21400
21401 if (TREE_PUBLIC (decl))
21402 fprintf (file, "\t.globl %s\n", desc_name);
21403
21404 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
21405 fprintf (file, "%s:\n", desc_name);
21406 fprintf (file, "\t.long %s\n", orig_name);
21407 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
21408 fputs ("\t.long 0\n", file);
21409 fprintf (file, "\t.previous\n");
21410 }
21411 ASM_OUTPUT_LABEL (file, name);
21412 }
21413
21414 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
21415 static void
21416 rs6000_elf_file_end (void)
21417 {
21418 #ifdef HAVE_AS_GNU_ATTRIBUTE
21419 /* ??? The value emitted depends on options active at file end.
21420 Assume anyone using #pragma or attributes that might change
21421 options knows what they are doing. */
21422 if ((TARGET_64BIT || DEFAULT_ABI == ABI_V4)
21423 && rs6000_passes_float)
21424 {
21425 int fp;
21426
21427 if (TARGET_HARD_FLOAT)
21428 fp = 1;
21429 else
21430 fp = 2;
21431 if (rs6000_passes_long_double)
21432 {
21433 if (!TARGET_LONG_DOUBLE_128)
21434 fp |= 2 * 4;
21435 else if (TARGET_IEEEQUAD)
21436 fp |= 3 * 4;
21437 else
21438 fp |= 1 * 4;
21439 }
21440 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n", fp);
21441 }
21442 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
21443 {
21444 if (rs6000_passes_vector)
21445 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
21446 (TARGET_ALTIVEC_ABI ? 2 : 1));
21447 if (rs6000_returns_struct)
21448 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
21449 aix_struct_return ? 2 : 1);
21450 }
21451 #endif
21452 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
21453 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
21454 file_end_indicate_exec_stack ();
21455 #endif
21456
21457 if (flag_split_stack)
21458 file_end_indicate_split_stack ();
21459
21460 if (cpu_builtin_p)
21461 {
21462 /* We have expanded a CPU builtin, so we need to emit a reference to
21463 the special symbol that LIBC uses to declare it supports the
21464 AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */
21465 switch_to_section (data_section);
21466 fprintf (asm_out_file, "\t.align %u\n", TARGET_32BIT ? 2 : 3);
21467 fprintf (asm_out_file, "\t%s %s\n",
21468 TARGET_32BIT ? ".long" : ".quad", tcb_verification_symbol);
21469 }
21470 }
21471 #endif
21472
21473 #if TARGET_XCOFF
21474
21475 #ifndef HAVE_XCOFF_DWARF_EXTRAS
21476 #define HAVE_XCOFF_DWARF_EXTRAS 0
21477 #endif
21478
21479
21480 /* Names of bss and data sections. These should be unique names for each
21481 compilation unit. */
21482
21483 char *xcoff_bss_section_name;
21484 char *xcoff_private_data_section_name;
21485 char *xcoff_private_rodata_section_name;
21486 char *xcoff_tls_data_section_name;
21487 char *xcoff_read_only_section_name;
21488
21489 static enum unwind_info_type
21490 rs6000_xcoff_debug_unwind_info (void)
21491 {
21492 return UI_NONE;
21493 }
21494
21495 static void
21496 rs6000_xcoff_asm_output_anchor (rtx symbol)
21497 {
21498 char buffer[100];
21499
21500 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
21501 SYMBOL_REF_BLOCK_OFFSET (symbol));
21502 fprintf (asm_out_file, "%s", SET_ASM_OP);
21503 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
21504 fprintf (asm_out_file, ",");
21505 RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
21506 fprintf (asm_out_file, "\n");
21507 }
21508
21509 static void
21510 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
21511 {
21512 fputs (GLOBAL_ASM_OP, stream);
21513 RS6000_OUTPUT_BASENAME (stream, name);
21514 putc ('\n', stream);
21515 }
21516
21517 /* A get_unnamed_decl callback, used for read-only sections. PTR
21518 points to the section string variable. */
21519
21520 static void
21521 rs6000_xcoff_output_readonly_section_asm_op (const char *directive)
21522 {
21523 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
21524 directive
21525 ? xcoff_private_rodata_section_name
21526 : xcoff_read_only_section_name,
21527 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
21528 }
21529
21530 /* Likewise for read-write sections. */
21531
21532 static void
21533 rs6000_xcoff_output_readwrite_section_asm_op (const char *)
21534 {
21535 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
21536 xcoff_private_data_section_name,
21537 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
21538 }
21539
21540 static void
21541 rs6000_xcoff_output_tls_section_asm_op (const char *directive)
21542 {
21543 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
21544 directive
21545 ? xcoff_private_data_section_name
21546 : xcoff_tls_data_section_name,
21547 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
21548 }
21549
21550 /* A get_unnamed_section callback, used for switching to toc_section. */
21551
21552 static void
21553 rs6000_xcoff_output_toc_section_asm_op (const char *data ATTRIBUTE_UNUSED)
21554 {
21555 if (TARGET_MINIMAL_TOC)
21556 {
21557 /* toc_section is always selected at least once from
21558 rs6000_xcoff_file_start, so this is guaranteed to
21559 always be defined once and only once in each file. */
21560 if (!toc_initialized)
21561 {
21562 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
21563 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
21564 toc_initialized = 1;
21565 }
21566 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
21567 (TARGET_32BIT ? "" : ",3"));
21568 }
21569 else
21570 fputs ("\t.toc\n", asm_out_file);
21571 }
21572
21573 /* Implement TARGET_ASM_INIT_SECTIONS. */
21574
21575 static void
21576 rs6000_xcoff_asm_init_sections (void)
21577 {
21578 read_only_data_section
21579 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
21580 NULL);
21581
21582 private_data_section
21583 = get_unnamed_section (SECTION_WRITE,
21584 rs6000_xcoff_output_readwrite_section_asm_op,
21585 NULL);
21586
21587 read_only_private_data_section
21588 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
21589 "");
21590
21591 tls_data_section
21592 = get_unnamed_section (SECTION_TLS,
21593 rs6000_xcoff_output_tls_section_asm_op,
21594 NULL);
21595
21596 tls_private_data_section
21597 = get_unnamed_section (SECTION_TLS,
21598 rs6000_xcoff_output_tls_section_asm_op,
21599 "");
21600
21601 toc_section
21602 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
21603
21604 readonly_data_section = read_only_data_section;
21605 }
21606
21607 static int
21608 rs6000_xcoff_reloc_rw_mask (void)
21609 {
21610 return 3;
21611 }
21612
21613 static void
21614 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
21615 tree decl ATTRIBUTE_UNUSED)
21616 {
21617 int smclass;
21618 static const char * const suffix[7]
21619 = { "PR", "RO", "RW", "BS", "TL", "UL", "XO" };
21620
21621 if (flags & SECTION_EXCLUDE)
21622 smclass = 6;
21623 else if (flags & SECTION_DEBUG)
21624 {
21625 fprintf (asm_out_file, "\t.dwsect %s\n", name);
21626 return;
21627 }
21628 else if (flags & SECTION_CODE)
21629 smclass = 0;
21630 else if (flags & SECTION_TLS)
21631 {
21632 if (flags & SECTION_BSS)
21633 smclass = 5;
21634 else
21635 smclass = 4;
21636 }
21637 else if (flags & SECTION_WRITE)
21638 {
21639 if (flags & SECTION_BSS)
21640 smclass = 3;
21641 else
21642 smclass = 2;
21643 }
21644 else
21645 smclass = 1;
21646
21647 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
21648 (flags & SECTION_CODE) ? "." : "",
21649 name, suffix[smclass], flags & SECTION_ENTSIZE);
21650 }
21651
21652 #define IN_NAMED_SECTION(DECL) \
21653 ((TREE_CODE (DECL) == FUNCTION_DECL || VAR_P (DECL)) \
21654 && DECL_SECTION_NAME (DECL) != NULL)
21655
21656 static section *
21657 rs6000_xcoff_select_section (tree decl, int reloc,
21658 unsigned HOST_WIDE_INT align)
21659 {
21660 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
21661 named section. */
21662 if (align > BIGGEST_ALIGNMENT && VAR_OR_FUNCTION_DECL_P (decl))
21663 {
21664 resolve_unique_section (decl, reloc, true);
21665 if (IN_NAMED_SECTION (decl))
21666 return get_named_section (decl, NULL, reloc);
21667 }
21668
21669 if (decl_readonly_section (decl, reloc))
21670 {
21671 if (TREE_PUBLIC (decl))
21672 return read_only_data_section;
21673 else
21674 return read_only_private_data_section;
21675 }
21676 else
21677 {
21678 #if HAVE_AS_TLS
21679 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
21680 {
21681 if (bss_initializer_p (decl))
21682 return tls_comm_section;
21683 else if (TREE_PUBLIC (decl))
21684 return tls_data_section;
21685 else
21686 return tls_private_data_section;
21687 }
21688 else
21689 #endif
21690 if (TREE_PUBLIC (decl))
21691 return data_section;
21692 else
21693 return private_data_section;
21694 }
21695 }
21696
21697 static void
21698 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
21699 {
21700 const char *name;
21701
21702 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
21703 name = (*targetm.strip_name_encoding) (name);
21704 set_decl_section_name (decl, name);
21705 }
21706
21707 /* Select section for constant in constant pool.
21708
21709 On RS/6000, all constants are in the private read-only data area.
21710 However, if this is being placed in the TOC it must be output as a
21711 toc entry. */
21712
21713 static section *
21714 rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
21715 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
21716 {
21717 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
21718 return toc_section;
21719 else
21720 return read_only_private_data_section;
21721 }
21722
21723 /* Remove any trailing [DS] or the like from the symbol name. */
21724
21725 static const char *
21726 rs6000_xcoff_strip_name_encoding (const char *name)
21727 {
21728 size_t len;
21729 if (*name == '*')
21730 name++;
21731 len = strlen (name);
21732 if (name[len - 1] == ']')
21733 return ggc_alloc_string (name, len - 4);
21734 else
21735 return name;
21736 }
21737
21738 /* Section attributes. AIX is always PIC. */
21739
21740 static unsigned int
21741 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
21742 {
21743 unsigned int align;
21744 unsigned int flags = default_section_type_flags (decl, name, reloc);
21745
21746 if (decl && DECL_P (decl) && VAR_P (decl) && bss_initializer_p (decl))
21747 flags |= SECTION_BSS;
21748
21749 /* Align to at least UNIT size. */
21750 if (!decl || !DECL_P (decl))
21751 align = MIN_UNITS_PER_WORD;
21752 /* Align code CSECT to at least 32 bytes. */
21753 else if ((flags & SECTION_CODE) != 0)
21754 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT), 32);
21755 else
21756 /* Increase alignment of large objects if not already stricter. */
21757 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
21758 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
21759 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
21760
21761 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
21762 }
21763
21764 /* Output at beginning of assembler file.
21765
21766 Initialize the section names for the RS/6000 at this point.
21767
21768 Specify filename, including full path, to assembler.
21769
21770 We want to go into the TOC section so at least one .toc will be emitted.
21771 Also, in order to output proper .bs/.es pairs, we need at least one static
21772 [RW] section emitted.
21773
21774 Finally, declare mcount when profiling to make the assembler happy. */
21775
21776 static void
21777 rs6000_xcoff_file_start (void)
21778 {
21779 rs6000_gen_section_name (&xcoff_bss_section_name,
21780 main_input_filename, ".bss_");
21781 rs6000_gen_section_name (&xcoff_private_data_section_name,
21782 main_input_filename, ".rw_");
21783 rs6000_gen_section_name (&xcoff_private_rodata_section_name,
21784 main_input_filename, ".rop_");
21785 rs6000_gen_section_name (&xcoff_read_only_section_name,
21786 main_input_filename, ".ro_");
21787 rs6000_gen_section_name (&xcoff_tls_data_section_name,
21788 main_input_filename, ".tls_");
21789
21790 fputs ("\t.file\t", asm_out_file);
21791 output_quoted_string (asm_out_file, main_input_filename);
21792 fputc ('\n', asm_out_file);
21793 if (write_symbols != NO_DEBUG)
21794 switch_to_section (private_data_section);
21795 switch_to_section (toc_section);
21796 switch_to_section (text_section);
21797 if (profile_flag)
21798 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
21799 rs6000_file_start ();
21800 }
21801
21802 /* Output at end of assembler file.
21803 On the RS/6000, referencing data should automatically pull in text. */
21804
21805 static void
21806 rs6000_xcoff_file_end (void)
21807 {
21808 switch_to_section (text_section);
21809 if (xcoff_tls_exec_model_detected)
21810 {
21811 /* Add a .ref to __tls_get_addr to force libpthread dependency. */
21812 fputs ("\t.extern __tls_get_addr\n\t.ref __tls_get_addr\n", asm_out_file);
21813 }
21814 fputs ("_section_.text:\n", asm_out_file);
21815 switch_to_section (data_section);
21816 fputs (TARGET_32BIT
21817 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
21818 asm_out_file);
21819
21820 }
21821
21822 struct declare_alias_data
21823 {
21824 FILE *file;
21825 bool function_descriptor;
21826 };
21827
21828 /* Declare alias N. A helper function for for_node_and_aliases. */
21829
21830 static bool
21831 rs6000_declare_alias (struct symtab_node *n, void *d)
21832 {
21833 struct declare_alias_data *data = (struct declare_alias_data *)d;
21834 /* Main symbol is output specially, because varasm machinery does part of
21835 the job for us - we do not need to declare .globl/lglobs and such. */
21836 if (!n->alias || n->weakref)
21837 return false;
21838
21839 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
21840 return false;
21841
21842 /* Prevent assemble_alias from trying to use .set pseudo operation
21843 that does not behave as expected by the middle-end. */
21844 TREE_ASM_WRITTEN (n->decl) = true;
21845
21846 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
21847 char *buffer = (char *) alloca (strlen (name) + 2);
21848 char *p;
21849 int dollar_inside = 0;
21850
21851 strcpy (buffer, name);
21852 p = strchr (buffer, '$');
21853 while (p) {
21854 *p = '_';
21855 dollar_inside++;
21856 p = strchr (p + 1, '$');
21857 }
21858 if (TREE_PUBLIC (n->decl))
21859 {
21860 if (!RS6000_WEAK || !DECL_WEAK (n->decl))
21861 {
21862 if (dollar_inside) {
21863 if (data->function_descriptor)
21864 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
21865 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
21866 }
21867 if (data->function_descriptor)
21868 {
21869 fputs ("\t.globl .", data->file);
21870 RS6000_OUTPUT_BASENAME (data->file, buffer);
21871 putc ('\n', data->file);
21872 }
21873 fputs ("\t.globl ", data->file);
21874 assemble_name (data->file, buffer);
21875 putc ('\n', data->file);
21876 }
21877 #ifdef ASM_WEAKEN_DECL
21878 else if (DECL_WEAK (n->decl) && !data->function_descriptor)
21879 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
21880 #endif
21881 }
21882 else
21883 {
21884 if (dollar_inside)
21885 {
21886 if (data->function_descriptor)
21887 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
21888 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
21889 }
21890 if (data->function_descriptor)
21891 {
21892 fputs ("\t.lglobl .", data->file);
21893 RS6000_OUTPUT_BASENAME (data->file, buffer);
21894 putc ('\n', data->file);
21895 }
21896 fputs ("\t.lglobl ", data->file);
21897 assemble_name (data->file, buffer);
21898 putc ('\n', data->file);
21899 }
21900 if (data->function_descriptor)
21901 putc ('.', data->file);
21902 ASM_OUTPUT_LABEL (data->file, buffer);
21903 return false;
21904 }
21905
21906
21907 #ifdef HAVE_GAS_HIDDEN
21908 /* Helper function to calculate visibility of a DECL
21909 and return the value as a const string. */
21910
21911 static const char *
21912 rs6000_xcoff_visibility (tree decl)
21913 {
21914 static const char * const visibility_types[] = {
21915 "", ",protected", ",hidden", ",internal"
21916 };
21917
21918 enum symbol_visibility vis = DECL_VISIBILITY (decl);
21919 return visibility_types[vis];
21920 }
21921 #endif
21922
21923
21924 /* This macro produces the initial definition of a function name.
21925 On the RS/6000, we need to place an extra '.' in the function name and
21926 output the function descriptor.
21927 Dollar signs are converted to underscores.
21928
21929 The csect for the function will have already been created when
21930 text_section was selected. We do have to go back to that csect, however.
21931
21932 The third and fourth parameters to the .function pseudo-op (16 and 044)
21933 are placeholders which no longer have any use.
21934
21935 Because AIX assembler's .set command has unexpected semantics, we output
21936 all aliases as alternative labels in front of the definition. */
21937
21938 void
21939 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
21940 {
21941 char *buffer = (char *) alloca (strlen (name) + 1);
21942 char *p;
21943 int dollar_inside = 0;
21944 struct declare_alias_data data = {file, false};
21945
21946 strcpy (buffer, name);
21947 p = strchr (buffer, '$');
21948 while (p) {
21949 *p = '_';
21950 dollar_inside++;
21951 p = strchr (p + 1, '$');
21952 }
21953 if (TREE_PUBLIC (decl))
21954 {
21955 if (!RS6000_WEAK || !DECL_WEAK (decl))
21956 {
21957 if (dollar_inside) {
21958 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
21959 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
21960 }
21961 fputs ("\t.globl .", file);
21962 RS6000_OUTPUT_BASENAME (file, buffer);
21963 #ifdef HAVE_GAS_HIDDEN
21964 fputs (rs6000_xcoff_visibility (decl), file);
21965 #endif
21966 putc ('\n', file);
21967 }
21968 }
21969 else
21970 {
21971 if (dollar_inside) {
21972 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
21973 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
21974 }
21975 fputs ("\t.lglobl .", file);
21976 RS6000_OUTPUT_BASENAME (file, buffer);
21977 putc ('\n', file);
21978 }
21979
21980 fputs ("\t.csect ", file);
21981 assemble_name (file, buffer);
21982 fputs (TARGET_32BIT ? "\n" : ",3\n", file);
21983
21984 ASM_OUTPUT_LABEL (file, buffer);
21985
21986 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
21987 &data, true);
21988 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
21989 RS6000_OUTPUT_BASENAME (file, buffer);
21990 fputs (", TOC[tc0], 0\n", file);
21991
21992 in_section = NULL;
21993 switch_to_section (function_section (decl));
21994 putc ('.', file);
21995 ASM_OUTPUT_LABEL (file, buffer);
21996
21997 data.function_descriptor = true;
21998 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
21999 &data, true);
22000 if (!DECL_IGNORED_P (decl))
22001 {
22002 if (dwarf_debuginfo_p ())
22003 {
22004 name = (*targetm.strip_name_encoding) (name);
22005 fprintf (file, "\t.function .%s,.%s,2,0\n", name, name);
22006 }
22007 }
22008 return;
22009 }
22010
22011
22012 /* Output assembly language to globalize a symbol from a DECL,
22013 possibly with visibility. */
22014
22015 void
22016 rs6000_xcoff_asm_globalize_decl_name (FILE *stream, tree decl)
22017 {
22018 const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
22019 fputs (GLOBAL_ASM_OP, stream);
22020 assemble_name (stream, name);
22021 #ifdef HAVE_GAS_HIDDEN
22022 fputs (rs6000_xcoff_visibility (decl), stream);
22023 #endif
22024 putc ('\n', stream);
22025 }
22026
22027 /* Output assembly language to define a symbol as COMMON from a DECL,
22028 possibly with visibility. */
22029
22030 void
22031 rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream,
22032 tree decl ATTRIBUTE_UNUSED,
22033 const char *name,
22034 unsigned HOST_WIDE_INT size,
22035 unsigned int align)
22036 {
22037 unsigned int align2 = 2;
22038
22039 if (align == 0)
22040 align = DATA_ABI_ALIGNMENT (TREE_TYPE (decl), DECL_ALIGN (decl));
22041
22042 if (align > 32)
22043 align2 = floor_log2 (align / BITS_PER_UNIT);
22044 else if (size > 4)
22045 align2 = 3;
22046
22047 if (! DECL_COMMON (decl))
22048 {
22049 /* Forget section. */
22050 in_section = NULL;
22051
22052 /* Globalize TLS BSS. */
22053 if (TREE_PUBLIC (decl) && DECL_THREAD_LOCAL_P (decl))
22054 {
22055 fputs (GLOBAL_ASM_OP, stream);
22056 assemble_name (stream, name);
22057 fputc ('\n', stream);
22058 }
22059
22060 /* Switch to section and skip space. */
22061 fputs ("\t.csect ", stream);
22062 assemble_name (stream, name);
22063 fprintf (stream, ",%u\n", align2);
22064 ASM_DECLARE_OBJECT_NAME (stream, name, decl);
22065 ASM_OUTPUT_SKIP (stream, size ? size : 1);
22066 return;
22067 }
22068
22069 if (TREE_PUBLIC (decl))
22070 {
22071 fprintf (stream,
22072 "\t.comm %s," HOST_WIDE_INT_PRINT_UNSIGNED ",%u" ,
22073 name, size, align2);
22074
22075 #ifdef HAVE_GAS_HIDDEN
22076 if (decl != NULL)
22077 fputs (rs6000_xcoff_visibility (decl), stream);
22078 #endif
22079 putc ('\n', stream);
22080 }
22081 else
22082 fprintf (stream,
22083 "\t.lcomm %s," HOST_WIDE_INT_PRINT_UNSIGNED ",%s,%u\n",
22084 (*targetm.strip_name_encoding) (name), size, name, align2);
22085 }
22086
22087 /* This macro produces the initial definition of a object (variable) name.
22088 Because AIX assembler's .set command has unexpected semantics, we output
22089 all aliases as alternative labels in front of the definition. */
22090
22091 void
22092 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
22093 {
22094 struct declare_alias_data data = {file, false};
22095 ASM_OUTPUT_LABEL (file, name);
22096 symtab_node::get_create (decl)->call_for_symbol_and_aliases (rs6000_declare_alias,
22097 &data, true);
22098 }
22099
22100 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
22101
22102 void
22103 rs6000_asm_output_dwarf_pcrel (FILE *file, int size, const char *label)
22104 {
22105 fputs (integer_asm_op (size, FALSE), file);
22106 assemble_name (file, label);
22107 fputs ("-$", file);
22108 }
22109
22110 /* Output a symbol offset relative to the dbase for the current object.
22111 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
22112 signed offsets.
22113
22114 __gcc_unwind_dbase is embedded in all executables/libraries through
22115 libgcc/config/rs6000/crtdbase.S. */
22116
22117 void
22118 rs6000_asm_output_dwarf_datarel (FILE *file, int size, const char *label)
22119 {
22120 fputs (integer_asm_op (size, FALSE), file);
22121 assemble_name (file, label);
22122 fputs("-__gcc_unwind_dbase", file);
22123 }
22124
22125 #ifdef HAVE_AS_TLS
22126 static void
22127 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
22128 {
22129 rtx symbol;
22130 int flags;
22131 const char *symname;
22132
22133 default_encode_section_info (decl, rtl, first);
22134
22135 /* Careful not to prod global register variables. */
22136 if (!MEM_P (rtl))
22137 return;
22138 symbol = XEXP (rtl, 0);
22139 if (!SYMBOL_REF_P (symbol))
22140 return;
22141
22142 flags = SYMBOL_REF_FLAGS (symbol);
22143
22144 if (VAR_P (decl) && DECL_THREAD_LOCAL_P (decl))
22145 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
22146
22147 SYMBOL_REF_FLAGS (symbol) = flags;
22148
22149 symname = XSTR (symbol, 0);
22150
22151 /* Append CSECT mapping class, unless the symbol already is qualified.
22152 Aliases are implemented as labels, so the symbol name should not add
22153 a mapping class. */
22154 if (decl
22155 && DECL_P (decl)
22156 && VAR_OR_FUNCTION_DECL_P (decl)
22157 && (symtab_node::get (decl) == NULL
22158 || symtab_node::get (decl)->alias == 0)
22159 && symname[strlen (symname) - 1] != ']')
22160 {
22161 const char *smclass = NULL;
22162
22163 if (TREE_CODE (decl) == FUNCTION_DECL)
22164 smclass = "[DS]";
22165 else if (DECL_THREAD_LOCAL_P (decl))
22166 {
22167 if (bss_initializer_p (decl))
22168 smclass = "[UL]";
22169 else if (flag_data_sections)
22170 smclass = "[TL]";
22171 }
22172 else if (DECL_EXTERNAL (decl))
22173 smclass = "[UA]";
22174 else if (bss_initializer_p (decl))
22175 smclass = "[BS]";
22176 else if (flag_data_sections)
22177 {
22178 /* This must exactly match the logic of select section. */
22179 if (decl_readonly_section (decl, compute_reloc_for_var (decl)))
22180 smclass = "[RO]";
22181 else
22182 smclass = "[RW]";
22183 }
22184
22185 if (smclass != NULL)
22186 {
22187 char *newname = XALLOCAVEC (char, strlen (symname) + 5);
22188
22189 strcpy (newname, symname);
22190 strcat (newname, smclass);
22191 XSTR (symbol, 0) = ggc_strdup (newname);
22192 }
22193 }
22194 }
22195 #endif /* HAVE_AS_TLS */
22196 #endif /* TARGET_XCOFF */
22197
22198 void
22199 rs6000_asm_weaken_decl (FILE *stream, tree decl,
22200 const char *name, const char *val)
22201 {
22202 fputs ("\t.weak\t", stream);
22203 assemble_name (stream, name);
22204 if (decl && TREE_CODE (decl) == FUNCTION_DECL
22205 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
22206 {
22207 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
22208 if (TARGET_XCOFF)
22209 fputs (rs6000_xcoff_visibility (decl), stream);
22210 #endif
22211 fputs ("\n\t.weak\t.", stream);
22212 RS6000_OUTPUT_BASENAME (stream, name);
22213 }
22214 #if TARGET_XCOFF && HAVE_GAS_HIDDEN
22215 if (TARGET_XCOFF)
22216 fputs (rs6000_xcoff_visibility (decl), stream);
22217 #endif
22218 fputc ('\n', stream);
22219
22220 if (val)
22221 {
22222 #ifdef ASM_OUTPUT_DEF
22223 ASM_OUTPUT_DEF (stream, name, val);
22224 #endif
22225 if (decl && TREE_CODE (decl) == FUNCTION_DECL
22226 && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS)
22227 {
22228 fputs ("\t.set\t.", stream);
22229 RS6000_OUTPUT_BASENAME (stream, name);
22230 fputs (",.", stream);
22231 RS6000_OUTPUT_BASENAME (stream, val);
22232 fputc ('\n', stream);
22233 }
22234 }
22235 }
22236
22237
22238 /* Return true if INSN should not be copied. */
22239
22240 static bool
22241 rs6000_cannot_copy_insn_p (rtx_insn *insn)
22242 {
22243 return recog_memoized (insn) >= 0
22244 && get_attr_cannot_copy (insn);
22245 }
22246
22247 /* Compute a (partial) cost for rtx X. Return true if the complete
22248 cost has been computed, and false if subexpressions should be
22249 scanned. In either case, *TOTAL contains the cost result. */
22250
22251 static bool
22252 rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
22253 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
22254 {
22255 int code = GET_CODE (x);
22256
22257 switch (code)
22258 {
22259 /* On the RS/6000, if it is valid in the insn, it is free. */
22260 case CONST_INT:
22261 if (((outer_code == SET
22262 || outer_code == PLUS
22263 || outer_code == MINUS)
22264 && (satisfies_constraint_I (x)
22265 || satisfies_constraint_L (x)))
22266 || (outer_code == AND
22267 && (satisfies_constraint_K (x)
22268 || (mode == SImode
22269 ? satisfies_constraint_L (x)
22270 : satisfies_constraint_J (x))))
22271 || ((outer_code == IOR || outer_code == XOR)
22272 && (satisfies_constraint_K (x)
22273 || (mode == SImode
22274 ? satisfies_constraint_L (x)
22275 : satisfies_constraint_J (x))))
22276 || outer_code == ASHIFT
22277 || outer_code == ASHIFTRT
22278 || outer_code == LSHIFTRT
22279 || outer_code == ROTATE
22280 || outer_code == ROTATERT
22281 || outer_code == ZERO_EXTRACT
22282 || (outer_code == MULT
22283 && satisfies_constraint_I (x))
22284 || ((outer_code == DIV || outer_code == UDIV
22285 || outer_code == MOD || outer_code == UMOD)
22286 && exact_log2 (INTVAL (x)) >= 0)
22287 || (outer_code == COMPARE
22288 && (satisfies_constraint_I (x)
22289 || satisfies_constraint_K (x)))
22290 || ((outer_code == EQ || outer_code == NE)
22291 && (satisfies_constraint_I (x)
22292 || satisfies_constraint_K (x)
22293 || (mode == SImode
22294 ? satisfies_constraint_L (x)
22295 : satisfies_constraint_J (x))))
22296 || (outer_code == GTU
22297 && satisfies_constraint_I (x))
22298 || (outer_code == LTU
22299 && satisfies_constraint_P (x)))
22300 {
22301 *total = 0;
22302 return true;
22303 }
22304 else if ((outer_code == PLUS
22305 && reg_or_add_cint_operand (x, mode))
22306 || (outer_code == MINUS
22307 && reg_or_sub_cint_operand (x, mode))
22308 || ((outer_code == SET
22309 || outer_code == IOR
22310 || outer_code == XOR)
22311 && (INTVAL (x)
22312 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
22313 {
22314 *total = COSTS_N_INSNS (1);
22315 return true;
22316 }
22317 /* FALLTHRU */
22318
22319 case CONST_DOUBLE:
22320 case CONST_WIDE_INT:
22321 case CONST:
22322 case HIGH:
22323 case SYMBOL_REF:
22324 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
22325 return true;
22326
22327 case MEM:
22328 /* When optimizing for size, MEM should be slightly more expensive
22329 than generating address, e.g., (plus (reg) (const)).
22330 L1 cache latency is about two instructions. */
22331 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
22332 if (rs6000_slow_unaligned_access (mode, MEM_ALIGN (x)))
22333 *total += COSTS_N_INSNS (100);
22334 return true;
22335
22336 case LABEL_REF:
22337 *total = 0;
22338 return true;
22339
22340 case PLUS:
22341 case MINUS:
22342 if (FLOAT_MODE_P (mode))
22343 *total = rs6000_cost->fp;
22344 else
22345 *total = COSTS_N_INSNS (1);
22346 return false;
22347
22348 case MULT:
22349 if (CONST_INT_P (XEXP (x, 1))
22350 && satisfies_constraint_I (XEXP (x, 1)))
22351 {
22352 if (INTVAL (XEXP (x, 1)) >= -256
22353 && INTVAL (XEXP (x, 1)) <= 255)
22354 *total = rs6000_cost->mulsi_const9;
22355 else
22356 *total = rs6000_cost->mulsi_const;
22357 }
22358 else if (mode == SFmode)
22359 *total = rs6000_cost->fp;
22360 else if (FLOAT_MODE_P (mode))
22361 *total = rs6000_cost->dmul;
22362 else if (mode == DImode)
22363 *total = rs6000_cost->muldi;
22364 else
22365 *total = rs6000_cost->mulsi;
22366 return false;
22367
22368 case FMA:
22369 if (mode == SFmode)
22370 *total = rs6000_cost->fp;
22371 else
22372 *total = rs6000_cost->dmul;
22373 break;
22374
22375 case DIV:
22376 case MOD:
22377 if (FLOAT_MODE_P (mode))
22378 {
22379 *total = mode == DFmode ? rs6000_cost->ddiv
22380 : rs6000_cost->sdiv;
22381 return false;
22382 }
22383 /* FALLTHRU */
22384
22385 case UDIV:
22386 case UMOD:
22387 if (CONST_INT_P (XEXP (x, 1))
22388 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
22389 {
22390 if (code == DIV || code == MOD)
22391 /* Shift, addze */
22392 *total = COSTS_N_INSNS (2);
22393 else
22394 /* Shift */
22395 *total = COSTS_N_INSNS (1);
22396 }
22397 else
22398 {
22399 if (GET_MODE (XEXP (x, 1)) == DImode)
22400 *total = rs6000_cost->divdi;
22401 else
22402 *total = rs6000_cost->divsi;
22403 }
22404 /* Add in shift and subtract for MOD unless we have a mod instruction. */
22405 if ((!TARGET_MODULO
22406 || (RS6000_DISABLE_SCALAR_MODULO && SCALAR_INT_MODE_P (mode)))
22407 && (code == MOD || code == UMOD))
22408 *total += COSTS_N_INSNS (2);
22409 return false;
22410
22411 case CTZ:
22412 *total = COSTS_N_INSNS (TARGET_CTZ ? 1 : 4);
22413 return false;
22414
22415 case FFS:
22416 *total = COSTS_N_INSNS (4);
22417 return false;
22418
22419 case POPCOUNT:
22420 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
22421 return false;
22422
22423 case PARITY:
22424 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
22425 return false;
22426
22427 case NOT:
22428 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
22429 *total = 0;
22430 else
22431 *total = COSTS_N_INSNS (1);
22432 return false;
22433
22434 case AND:
22435 if (CONST_INT_P (XEXP (x, 1)))
22436 {
22437 rtx left = XEXP (x, 0);
22438 rtx_code left_code = GET_CODE (left);
22439
22440 /* rotate-and-mask: 1 insn. */
22441 if ((left_code == ROTATE
22442 || left_code == ASHIFT
22443 || left_code == LSHIFTRT)
22444 && rs6000_is_valid_shift_mask (XEXP (x, 1), left, mode))
22445 {
22446 *total = rtx_cost (XEXP (left, 0), mode, left_code, 0, speed);
22447 if (!CONST_INT_P (XEXP (left, 1)))
22448 *total += rtx_cost (XEXP (left, 1), SImode, left_code, 1, speed);
22449 *total += COSTS_N_INSNS (1);
22450 return true;
22451 }
22452
22453 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
22454 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
22455 if (rs6000_is_valid_and_mask (XEXP (x, 1), mode)
22456 || (val & 0xffff) == val
22457 || (val & 0xffff0000) == val
22458 || ((val & 0xffff) == 0 && mode == SImode))
22459 {
22460 *total = rtx_cost (left, mode, AND, 0, speed);
22461 *total += COSTS_N_INSNS (1);
22462 return true;
22463 }
22464
22465 /* 2 insns. */
22466 if (rs6000_is_valid_2insn_and (XEXP (x, 1), mode))
22467 {
22468 *total = rtx_cost (left, mode, AND, 0, speed);
22469 *total += COSTS_N_INSNS (2);
22470 return true;
22471 }
22472 }
22473
22474 *total = COSTS_N_INSNS (1);
22475 return false;
22476
22477 case IOR:
22478 /* FIXME */
22479 *total = COSTS_N_INSNS (1);
22480 return true;
22481
22482 case CLZ:
22483 case XOR:
22484 case ZERO_EXTRACT:
22485 *total = COSTS_N_INSNS (1);
22486 return false;
22487
22488 case ASHIFT:
22489 /* The EXTSWSLI instruction is a combined instruction. Don't count both
22490 the sign extend and shift separately within the insn. */
22491 if (TARGET_EXTSWSLI && mode == DImode
22492 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
22493 && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode)
22494 {
22495 *total = 0;
22496 return false;
22497 }
22498 /* fall through */
22499
22500 case ASHIFTRT:
22501 case LSHIFTRT:
22502 case ROTATE:
22503 case ROTATERT:
22504 /* Handle mul_highpart. */
22505 if (outer_code == TRUNCATE
22506 && GET_CODE (XEXP (x, 0)) == MULT)
22507 {
22508 if (mode == DImode)
22509 *total = rs6000_cost->muldi;
22510 else
22511 *total = rs6000_cost->mulsi;
22512 return true;
22513 }
22514 else if (outer_code == AND)
22515 *total = 0;
22516 else
22517 *total = COSTS_N_INSNS (1);
22518 return false;
22519
22520 case SIGN_EXTEND:
22521 case ZERO_EXTEND:
22522 if (MEM_P (XEXP (x, 0)))
22523 *total = 0;
22524 else
22525 *total = COSTS_N_INSNS (1);
22526 return false;
22527
22528 case COMPARE:
22529 case NEG:
22530 case ABS:
22531 if (!FLOAT_MODE_P (mode))
22532 {
22533 *total = COSTS_N_INSNS (1);
22534 return false;
22535 }
22536 /* FALLTHRU */
22537
22538 case FLOAT:
22539 case UNSIGNED_FLOAT:
22540 case FIX:
22541 case UNSIGNED_FIX:
22542 case FLOAT_TRUNCATE:
22543 *total = rs6000_cost->fp;
22544 return false;
22545
22546 case FLOAT_EXTEND:
22547 if (mode == DFmode)
22548 *total = rs6000_cost->sfdf_convert;
22549 else
22550 *total = rs6000_cost->fp;
22551 return false;
22552
22553 case CALL:
22554 case IF_THEN_ELSE:
22555 if (!speed)
22556 {
22557 *total = COSTS_N_INSNS (1);
22558 return true;
22559 }
22560 else if (FLOAT_MODE_P (mode) && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT)
22561 {
22562 *total = rs6000_cost->fp;
22563 return false;
22564 }
22565 break;
22566
22567 case NE:
22568 case EQ:
22569 case GTU:
22570 case LTU:
22571 /* Carry bit requires mode == Pmode.
22572 NEG or PLUS already counted so only add one. */
22573 if (mode == Pmode
22574 && (outer_code == NEG || outer_code == PLUS))
22575 {
22576 *total = COSTS_N_INSNS (1);
22577 return true;
22578 }
22579 /* FALLTHRU */
22580
22581 case GT:
22582 case LT:
22583 case UNORDERED:
22584 if (outer_code == SET)
22585 {
22586 if (XEXP (x, 1) == const0_rtx)
22587 {
22588 *total = COSTS_N_INSNS (2);
22589 return true;
22590 }
22591 else
22592 {
22593 *total = COSTS_N_INSNS (3);
22594 return false;
22595 }
22596 }
22597 /* CC COMPARE. */
22598 if (outer_code == COMPARE)
22599 {
22600 *total = 0;
22601 return true;
22602 }
22603 break;
22604
22605 case UNSPEC:
22606 if (XINT (x, 1) == UNSPECV_MMA_XXSETACCZ)
22607 {
22608 *total = 0;
22609 return true;
22610 }
22611 break;
22612
22613 default:
22614 break;
22615 }
22616
22617 return false;
22618 }
22619
22620 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
22621
22622 static bool
22623 rs6000_debug_rtx_costs (rtx x, machine_mode mode, int outer_code,
22624 int opno, int *total, bool speed)
22625 {
22626 bool ret = rs6000_rtx_costs (x, mode, outer_code, opno, total, speed);
22627
22628 fprintf (stderr,
22629 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
22630 "opno = %d, total = %d, speed = %s, x:\n",
22631 ret ? "complete" : "scan inner",
22632 GET_MODE_NAME (mode),
22633 GET_RTX_NAME (outer_code),
22634 opno,
22635 *total,
22636 speed ? "true" : "false");
22637
22638 debug_rtx (x);
22639
22640 return ret;
22641 }
22642
22643 static int
22644 rs6000_insn_cost (rtx_insn *insn, bool speed)
22645 {
22646 if (recog_memoized (insn) < 0)
22647 return 0;
22648
22649 /* If we are optimizing for size, just use the length. */
22650 if (!speed)
22651 return get_attr_length (insn);
22652
22653 /* Use the cost if provided. */
22654 int cost = get_attr_cost (insn);
22655 if (cost > 0)
22656 return cost;
22657
22658 /* If the insn tells us how many insns there are, use that. Otherwise use
22659 the length/4. Adjust the insn length to remove the extra size that
22660 prefixed instructions take. */
22661 int n = get_attr_num_insns (insn);
22662 if (n == 0)
22663 {
22664 int length = get_attr_length (insn);
22665 if (get_attr_prefixed (insn) == PREFIXED_YES)
22666 {
22667 int adjust = 0;
22668 ADJUST_INSN_LENGTH (insn, adjust);
22669 length -= adjust;
22670 }
22671
22672 n = length / 4;
22673 }
22674
22675 enum attr_type type = get_attr_type (insn);
22676
22677 switch (type)
22678 {
22679 case TYPE_LOAD:
22680 case TYPE_FPLOAD:
22681 case TYPE_VECLOAD:
22682 cost = COSTS_N_INSNS (n + 1);
22683 break;
22684
22685 case TYPE_MUL:
22686 switch (get_attr_size (insn))
22687 {
22688 case SIZE_8:
22689 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const9;
22690 break;
22691 case SIZE_16:
22692 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const;
22693 break;
22694 case SIZE_32:
22695 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi;
22696 break;
22697 case SIZE_64:
22698 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->muldi;
22699 break;
22700 default:
22701 gcc_unreachable ();
22702 }
22703 break;
22704 case TYPE_DIV:
22705 switch (get_attr_size (insn))
22706 {
22707 case SIZE_32:
22708 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divsi;
22709 break;
22710 case SIZE_64:
22711 cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divdi;
22712 break;
22713 default:
22714 gcc_unreachable ();
22715 }
22716 break;
22717
22718 case TYPE_FP:
22719 cost = n * rs6000_cost->fp;
22720 break;
22721 case TYPE_DMUL:
22722 cost = n * rs6000_cost->dmul;
22723 break;
22724 case TYPE_SDIV:
22725 cost = n * rs6000_cost->sdiv;
22726 break;
22727 case TYPE_DDIV:
22728 cost = n * rs6000_cost->ddiv;
22729 break;
22730
22731 case TYPE_SYNC:
22732 case TYPE_LOAD_L:
22733 case TYPE_MFCR:
22734 case TYPE_MFCRF:
22735 cost = COSTS_N_INSNS (n + 2);
22736 break;
22737
22738 default:
22739 cost = COSTS_N_INSNS (n);
22740 }
22741
22742 return cost;
22743 }
22744
22745 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
22746
22747 static int
22748 rs6000_debug_address_cost (rtx x, machine_mode mode,
22749 addr_space_t as, bool speed)
22750 {
22751 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
22752
22753 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
22754 ret, speed ? "true" : "false");
22755 debug_rtx (x);
22756
22757 return ret;
22758 }
22759
22760
22761 /* A C expression returning the cost of moving data from a register of class
22762 CLASS1 to one of CLASS2. */
22763
22764 static int
22765 rs6000_register_move_cost (machine_mode mode,
22766 reg_class_t from, reg_class_t to)
22767 {
22768 int ret;
22769 reg_class_t rclass;
22770
22771 if (TARGET_DEBUG_COST)
22772 dbg_cost_ctrl++;
22773
22774 /* If we have VSX, we can easily move between FPR or Altivec registers,
22775 otherwise we can only easily move within classes.
22776 Do this first so we give best-case answers for union classes
22777 containing both gprs and vsx regs. */
22778 HARD_REG_SET to_vsx, from_vsx;
22779 to_vsx = reg_class_contents[to] & reg_class_contents[VSX_REGS];
22780 from_vsx = reg_class_contents[from] & reg_class_contents[VSX_REGS];
22781 if (!hard_reg_set_empty_p (to_vsx)
22782 && !hard_reg_set_empty_p (from_vsx)
22783 && (TARGET_VSX
22784 || hard_reg_set_intersect_p (to_vsx, from_vsx)))
22785 {
22786 int reg = FIRST_FPR_REGNO;
22787 if (TARGET_VSX
22788 || (TEST_HARD_REG_BIT (to_vsx, FIRST_ALTIVEC_REGNO)
22789 && TEST_HARD_REG_BIT (from_vsx, FIRST_ALTIVEC_REGNO)))
22790 reg = FIRST_ALTIVEC_REGNO;
22791 ret = 2 * hard_regno_nregs (reg, mode);
22792 }
22793
22794 /* Moves from/to GENERAL_REGS. */
22795 else if ((rclass = from, reg_classes_intersect_p (to, GENERAL_REGS))
22796 || (rclass = to, reg_classes_intersect_p (from, GENERAL_REGS)))
22797 {
22798 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
22799 {
22800 if (TARGET_DIRECT_MOVE)
22801 {
22802 /* Keep the cost for direct moves above that for within
22803 a register class even if the actual processor cost is
22804 comparable. We do this because a direct move insn
22805 can't be a nop, whereas with ideal register
22806 allocation a move within the same class might turn
22807 out to be a nop. */
22808 if (rs6000_tune == PROCESSOR_POWER9
22809 || rs6000_tune == PROCESSOR_POWER10)
22810 ret = 3 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
22811 else
22812 ret = 4 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
22813 /* SFmode requires a conversion when moving between gprs
22814 and vsx. */
22815 if (mode == SFmode)
22816 ret += 2;
22817 }
22818 else
22819 ret = (rs6000_memory_move_cost (mode, rclass, false)
22820 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
22821 }
22822
22823 /* It's more expensive to move CR_REGS than CR0_REGS because of the
22824 shift. */
22825 else if (rclass == CR_REGS)
22826 ret = 4;
22827
22828 /* For those processors that have slow LR/CTR moves, make them more
22829 expensive than memory in order to bias spills to memory .*/
22830 else if ((rs6000_tune == PROCESSOR_POWER6
22831 || rs6000_tune == PROCESSOR_POWER7
22832 || rs6000_tune == PROCESSOR_POWER8
22833 || rs6000_tune == PROCESSOR_POWER9)
22834 && reg_class_subset_p (rclass, SPECIAL_REGS))
22835 ret = 6 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
22836
22837 else
22838 /* A move will cost one instruction per GPR moved. */
22839 ret = 2 * hard_regno_nregs (FIRST_GPR_REGNO, mode);
22840 }
22841
22842 /* Everything else has to go through GENERAL_REGS. */
22843 else
22844 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
22845 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
22846
22847 if (TARGET_DEBUG_COST)
22848 {
22849 if (dbg_cost_ctrl == 1)
22850 fprintf (stderr,
22851 "rs6000_register_move_cost: ret=%d, mode=%s, from=%s, to=%s\n",
22852 ret, GET_MODE_NAME (mode), reg_class_names[from],
22853 reg_class_names[to]);
22854 dbg_cost_ctrl--;
22855 }
22856
22857 return ret;
22858 }
22859
22860 /* A C expressions returning the cost of moving data of MODE from a register to
22861 or from memory. */
22862
22863 static int
22864 rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
22865 bool in ATTRIBUTE_UNUSED)
22866 {
22867 int ret;
22868
22869 if (TARGET_DEBUG_COST)
22870 dbg_cost_ctrl++;
22871
22872 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
22873 ret = 4 * hard_regno_nregs (0, mode);
22874 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
22875 || reg_classes_intersect_p (rclass, VSX_REGS)))
22876 ret = 4 * hard_regno_nregs (32, mode);
22877 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
22878 ret = 4 * hard_regno_nregs (FIRST_ALTIVEC_REGNO, mode);
22879 else
22880 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
22881
22882 if (TARGET_DEBUG_COST)
22883 {
22884 if (dbg_cost_ctrl == 1)
22885 fprintf (stderr,
22886 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
22887 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
22888 dbg_cost_ctrl--;
22889 }
22890
22891 return ret;
22892 }
22893
22894 /* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
22895
22896 The register allocator chooses GEN_OR_VSX_REGS for the allocno
22897 class if GENERAL_REGS and VSX_REGS cost is lower than the memory
22898 cost. This happens a lot when TARGET_DIRECT_MOVE makes the register
22899 move cost between GENERAL_REGS and VSX_REGS low.
22900
22901 It might seem reasonable to use a union class. After all, if usage
22902 of vsr is low and gpr high, it might make sense to spill gpr to vsr
22903 rather than memory. However, in cases where register pressure of
22904 both is high, like the cactus_adm spec test, allowing
22905 GEN_OR_VSX_REGS as the allocno class results in bad decisions in
22906 the first scheduling pass. This is partly due to an allocno of
22907 GEN_OR_VSX_REGS wrongly contributing to the GENERAL_REGS pressure
22908 class, which gives too high a pressure for GENERAL_REGS and too low
22909 for VSX_REGS. So, force a choice of the subclass here.
22910
22911 The best class is also the union if GENERAL_REGS and VSX_REGS have
22912 the same cost. In that case we do use GEN_OR_VSX_REGS as the
22913 allocno class, since trying to narrow down the class by regno mode
22914 is prone to error. For example, SImode is allowed in VSX regs and
22915 in some cases (eg. gcc.target/powerpc/p9-xxbr-3.c do_bswap32_vect)
22916 it would be wrong to choose an allocno of GENERAL_REGS based on
22917 SImode. */
22918
22919 static reg_class_t
22920 rs6000_ira_change_pseudo_allocno_class (int regno ATTRIBUTE_UNUSED,
22921 reg_class_t allocno_class,
22922 reg_class_t best_class)
22923 {
22924 switch (allocno_class)
22925 {
22926 case GEN_OR_VSX_REGS:
22927 /* best_class must be a subset of allocno_class. */
22928 gcc_checking_assert (best_class == GEN_OR_VSX_REGS
22929 || best_class == GEN_OR_FLOAT_REGS
22930 || best_class == VSX_REGS
22931 || best_class == ALTIVEC_REGS
22932 || best_class == FLOAT_REGS
22933 || best_class == GENERAL_REGS
22934 || best_class == BASE_REGS);
22935 /* Use best_class but choose wider classes when copying from the
22936 wider class to best_class is cheap. This mimics IRA choice
22937 of allocno class. */
22938 if (best_class == BASE_REGS)
22939 return GENERAL_REGS;
22940 if (TARGET_VSX && best_class == FLOAT_REGS)
22941 return VSX_REGS;
22942 return best_class;
22943
22944 case VSX_REGS:
22945 if (best_class == ALTIVEC_REGS)
22946 return ALTIVEC_REGS;
22947
22948 default:
22949 break;
22950 }
22951
22952 return allocno_class;
22953 }
22954
22955 /* Load up a constant. If the mode is a vector mode, splat the value across
22956 all of the vector elements. */
22957
22958 static rtx
22959 rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
22960 {
22961 rtx reg;
22962
22963 if (mode == SFmode || mode == DFmode)
22964 {
22965 rtx d = const_double_from_real_value (dconst, mode);
22966 reg = force_reg (mode, d);
22967 }
22968 else if (mode == V4SFmode)
22969 {
22970 rtx d = const_double_from_real_value (dconst, SFmode);
22971 rtvec v = gen_rtvec (4, d, d, d, d);
22972 reg = gen_reg_rtx (mode);
22973 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
22974 }
22975 else if (mode == V2DFmode)
22976 {
22977 rtx d = const_double_from_real_value (dconst, DFmode);
22978 rtvec v = gen_rtvec (2, d, d);
22979 reg = gen_reg_rtx (mode);
22980 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
22981 }
22982 else
22983 gcc_unreachable ();
22984
22985 return reg;
22986 }
22987
22988 /* Generate an FMA instruction. */
22989
22990 static void
22991 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
22992 {
22993 machine_mode mode = GET_MODE (target);
22994 rtx dst;
22995
22996 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
22997 gcc_assert (dst != NULL);
22998
22999 if (dst != target)
23000 emit_move_insn (target, dst);
23001 }
23002
23003 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
23004
23005 static void
23006 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
23007 {
23008 machine_mode mode = GET_MODE (dst);
23009 rtx r;
23010
23011 /* This is a tad more complicated, since the fnma_optab is for
23012 a different expression: fma(-m1, m2, a), which is the same
23013 thing except in the case of signed zeros.
23014
23015 Fortunately we know that if FMA is supported that FNMSUB is
23016 also supported in the ISA. Just expand it directly. */
23017
23018 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
23019
23020 r = gen_rtx_NEG (mode, a);
23021 r = gen_rtx_FMA (mode, m1, m2, r);
23022 r = gen_rtx_NEG (mode, r);
23023 emit_insn (gen_rtx_SET (dst, r));
23024 }
23025
23026 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
23027 add a reg_note saying that this was a division. Support both scalar and
23028 vector divide. Assumes no trapping math and finite arguments. */
23029
23030 void
23031 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
23032 {
23033 machine_mode mode = GET_MODE (dst);
23034 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
23035 int i;
23036
23037 /* Low precision estimates guarantee 5 bits of accuracy. High
23038 precision estimates guarantee 14 bits of accuracy. SFmode
23039 requires 23 bits of accuracy. DFmode requires 52 bits of
23040 accuracy. Each pass at least doubles the accuracy, leading
23041 to the following. */
23042 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
23043 if (mode == DFmode || mode == V2DFmode)
23044 passes++;
23045
23046 enum insn_code code = optab_handler (smul_optab, mode);
23047 insn_gen_fn gen_mul = GEN_FCN (code);
23048
23049 gcc_assert (code != CODE_FOR_nothing);
23050
23051 one = rs6000_load_constant_and_splat (mode, dconst1);
23052
23053 /* x0 = 1./d estimate */
23054 x0 = gen_reg_rtx (mode);
23055 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
23056 UNSPEC_FRES)));
23057
23058 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
23059 if (passes > 1) {
23060
23061 /* e0 = 1. - d * x0 */
23062 e0 = gen_reg_rtx (mode);
23063 rs6000_emit_nmsub (e0, d, x0, one);
23064
23065 /* x1 = x0 + e0 * x0 */
23066 x1 = gen_reg_rtx (mode);
23067 rs6000_emit_madd (x1, e0, x0, x0);
23068
23069 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
23070 ++i, xprev = xnext, eprev = enext) {
23071
23072 /* enext = eprev * eprev */
23073 enext = gen_reg_rtx (mode);
23074 emit_insn (gen_mul (enext, eprev, eprev));
23075
23076 /* xnext = xprev + enext * xprev */
23077 xnext = gen_reg_rtx (mode);
23078 rs6000_emit_madd (xnext, enext, xprev, xprev);
23079 }
23080
23081 } else
23082 xprev = x0;
23083
23084 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
23085
23086 /* u = n * xprev */
23087 u = gen_reg_rtx (mode);
23088 emit_insn (gen_mul (u, n, xprev));
23089
23090 /* v = n - (d * u) */
23091 v = gen_reg_rtx (mode);
23092 rs6000_emit_nmsub (v, d, u, n);
23093
23094 /* dst = (v * xprev) + u */
23095 rs6000_emit_madd (dst, v, xprev, u);
23096
23097 if (note_p)
23098 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
23099 }
23100
23101 /* Goldschmidt's Algorithm for single/double-precision floating point
23102 sqrt and rsqrt. Assumes no trapping math and finite arguments. */
23103
23104 void
23105 rs6000_emit_swsqrt (rtx dst, rtx src, bool recip)
23106 {
23107 machine_mode mode = GET_MODE (src);
23108 rtx e = gen_reg_rtx (mode);
23109 rtx g = gen_reg_rtx (mode);
23110 rtx h = gen_reg_rtx (mode);
23111
23112 /* Low precision estimates guarantee 5 bits of accuracy. High
23113 precision estimates guarantee 14 bits of accuracy. SFmode
23114 requires 23 bits of accuracy. DFmode requires 52 bits of
23115 accuracy. Each pass at least doubles the accuracy, leading
23116 to the following. */
23117 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
23118 if (mode == DFmode || mode == V2DFmode)
23119 passes++;
23120
23121 int i;
23122 rtx mhalf;
23123 enum insn_code code = optab_handler (smul_optab, mode);
23124 insn_gen_fn gen_mul = GEN_FCN (code);
23125
23126 gcc_assert (code != CODE_FOR_nothing);
23127
23128 mhalf = rs6000_load_constant_and_splat (mode, dconsthalf);
23129
23130 /* e = rsqrt estimate */
23131 emit_insn (gen_rtx_SET (e, gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
23132 UNSPEC_RSQRT)));
23133
23134 /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */
23135 if (!recip)
23136 {
23137 rtx zero = force_reg (mode, CONST0_RTX (mode));
23138
23139 if (mode == SFmode)
23140 {
23141 rtx target = emit_conditional_move (e, { GT, src, zero, mode },
23142 e, zero, mode, 0);
23143 if (target != e)
23144 emit_move_insn (e, target);
23145 }
23146 else
23147 {
23148 rtx cond = gen_rtx_GT (VOIDmode, e, zero);
23149 rs6000_emit_vector_cond_expr (e, e, zero, cond, src, zero);
23150 }
23151 }
23152
23153 /* g = sqrt estimate. */
23154 emit_insn (gen_mul (g, e, src));
23155 /* h = 1/(2*sqrt) estimate. */
23156 emit_insn (gen_mul (h, e, mhalf));
23157
23158 if (recip)
23159 {
23160 if (passes == 1)
23161 {
23162 rtx t = gen_reg_rtx (mode);
23163 rs6000_emit_nmsub (t, g, h, mhalf);
23164 /* Apply correction directly to 1/rsqrt estimate. */
23165 rs6000_emit_madd (dst, e, t, e);
23166 }
23167 else
23168 {
23169 for (i = 0; i < passes; i++)
23170 {
23171 rtx t1 = gen_reg_rtx (mode);
23172 rtx g1 = gen_reg_rtx (mode);
23173 rtx h1 = gen_reg_rtx (mode);
23174
23175 rs6000_emit_nmsub (t1, g, h, mhalf);
23176 rs6000_emit_madd (g1, g, t1, g);
23177 rs6000_emit_madd (h1, h, t1, h);
23178
23179 g = g1;
23180 h = h1;
23181 }
23182 /* Multiply by 2 for 1/rsqrt. */
23183 emit_insn (gen_add3_insn (dst, h, h));
23184 }
23185 }
23186 else
23187 {
23188 rtx t = gen_reg_rtx (mode);
23189 rs6000_emit_nmsub (t, g, h, mhalf);
23190 rs6000_emit_madd (dst, g, t, g);
23191 }
23192
23193 return;
23194 }
23195
23196 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
23197 (Power7) targets. DST is the target, and SRC is the argument operand. */
23198
23199 void
23200 rs6000_emit_popcount (rtx dst, rtx src)
23201 {
23202 machine_mode mode = GET_MODE (dst);
23203 rtx tmp1, tmp2;
23204
23205 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
23206 if (TARGET_POPCNTD)
23207 {
23208 if (mode == SImode)
23209 emit_insn (gen_popcntdsi2 (dst, src));
23210 else
23211 emit_insn (gen_popcntddi2 (dst, src));
23212 return;
23213 }
23214
23215 tmp1 = gen_reg_rtx (mode);
23216
23217 if (mode == SImode)
23218 {
23219 emit_insn (gen_popcntbsi2 (tmp1, src));
23220 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
23221 NULL_RTX, 0);
23222 tmp2 = force_reg (SImode, tmp2);
23223 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
23224 }
23225 else
23226 {
23227 emit_insn (gen_popcntbdi2 (tmp1, src));
23228 tmp2 = expand_mult (DImode, tmp1,
23229 GEN_INT ((HOST_WIDE_INT)
23230 0x01010101 << 32 | 0x01010101),
23231 NULL_RTX, 0);
23232 tmp2 = force_reg (DImode, tmp2);
23233 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
23234 }
23235 }
23236
23237
23238 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
23239 target, and SRC is the argument operand. */
23240
23241 void
23242 rs6000_emit_parity (rtx dst, rtx src)
23243 {
23244 machine_mode mode = GET_MODE (dst);
23245 rtx tmp;
23246
23247 tmp = gen_reg_rtx (mode);
23248
23249 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
23250 if (TARGET_CMPB)
23251 {
23252 if (mode == SImode)
23253 {
23254 emit_insn (gen_popcntbsi2 (tmp, src));
23255 emit_insn (gen_paritysi2_cmpb (dst, tmp));
23256 }
23257 else
23258 {
23259 emit_insn (gen_popcntbdi2 (tmp, src));
23260 emit_insn (gen_paritydi2_cmpb (dst, tmp));
23261 }
23262 return;
23263 }
23264
23265 if (mode == SImode)
23266 {
23267 /* Is mult+shift >= shift+xor+shift+xor? */
23268 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
23269 {
23270 rtx tmp1, tmp2, tmp3, tmp4;
23271
23272 tmp1 = gen_reg_rtx (SImode);
23273 emit_insn (gen_popcntbsi2 (tmp1, src));
23274
23275 tmp2 = gen_reg_rtx (SImode);
23276 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
23277 tmp3 = gen_reg_rtx (SImode);
23278 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
23279
23280 tmp4 = gen_reg_rtx (SImode);
23281 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
23282 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
23283 }
23284 else
23285 rs6000_emit_popcount (tmp, src);
23286 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
23287 }
23288 else
23289 {
23290 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
23291 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
23292 {
23293 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
23294
23295 tmp1 = gen_reg_rtx (DImode);
23296 emit_insn (gen_popcntbdi2 (tmp1, src));
23297
23298 tmp2 = gen_reg_rtx (DImode);
23299 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
23300 tmp3 = gen_reg_rtx (DImode);
23301 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
23302
23303 tmp4 = gen_reg_rtx (DImode);
23304 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
23305 tmp5 = gen_reg_rtx (DImode);
23306 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
23307
23308 tmp6 = gen_reg_rtx (DImode);
23309 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
23310 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
23311 }
23312 else
23313 rs6000_emit_popcount (tmp, src);
23314 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
23315 }
23316 }
23317
23318 /* Expand an Altivec constant permutation for little endian mode.
23319 OP0 and OP1 are the input vectors and TARGET is the output vector.
23320 SEL specifies the constant permutation vector.
23321
23322 There are two issues: First, the two input operands must be
23323 swapped so that together they form a double-wide array in LE
23324 order. Second, the vperm instruction has surprising behavior
23325 in LE mode: it interprets the elements of the source vectors
23326 in BE mode ("left to right") and interprets the elements of
23327 the destination vector in LE mode ("right to left"). To
23328 correct for this, we must subtract each element of the permute
23329 control vector from 31.
23330
23331 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
23332 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
23333 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
23334 serve as the permute control vector. Then, in BE mode,
23335
23336 vperm 9,10,11,12
23337
23338 places the desired result in vr9. However, in LE mode the
23339 vector contents will be
23340
23341 vr10 = 00000003 00000002 00000001 00000000
23342 vr11 = 00000007 00000006 00000005 00000004
23343
23344 The result of the vperm using the same permute control vector is
23345
23346 vr9 = 05000000 07000000 01000000 03000000
23347
23348 That is, the leftmost 4 bytes of vr10 are interpreted as the
23349 source for the rightmost 4 bytes of vr9, and so on.
23350
23351 If we change the permute control vector to
23352
23353 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
23354
23355 and issue
23356
23357 vperm 9,11,10,12
23358
23359 we get the desired
23360
23361 vr9 = 00000006 00000004 00000002 00000000. */
23362
23363 static void
23364 altivec_expand_vec_perm_const_le (rtx target, rtx op0, rtx op1,
23365 const vec_perm_indices &sel)
23366 {
23367 unsigned int i;
23368 rtx perm[16];
23369 rtx constv, unspec;
23370
23371 /* Unpack and adjust the constant selector. */
23372 for (i = 0; i < 16; ++i)
23373 {
23374 unsigned int elt = 31 - (sel[i] & 31);
23375 perm[i] = GEN_INT (elt);
23376 }
23377
23378 /* Expand to a permute, swapping the inputs and using the
23379 adjusted selector. */
23380 if (!REG_P (op0))
23381 op0 = force_reg (V16QImode, op0);
23382 if (!REG_P (op1))
23383 op1 = force_reg (V16QImode, op1);
23384
23385 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
23386 constv = force_reg (V16QImode, constv);
23387 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
23388 UNSPEC_VPERM);
23389 if (!REG_P (target))
23390 {
23391 rtx tmp = gen_reg_rtx (V16QImode);
23392 emit_move_insn (tmp, unspec);
23393 unspec = tmp;
23394 }
23395
23396 emit_move_insn (target, unspec);
23397 }
23398
23399 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
23400 permute control vector. But here it's not a constant, so we must
23401 generate a vector NAND or NOR to do the adjustment. */
23402
23403 void
23404 altivec_expand_vec_perm_le (rtx operands[4])
23405 {
23406 rtx notx, iorx, unspec;
23407 rtx target = operands[0];
23408 rtx op0 = operands[1];
23409 rtx op1 = operands[2];
23410 rtx sel = operands[3];
23411 rtx tmp = target;
23412 rtx norreg = gen_reg_rtx (V16QImode);
23413 machine_mode mode = GET_MODE (target);
23414
23415 /* Get everything in regs so the pattern matches. */
23416 if (!REG_P (op0))
23417 op0 = force_reg (mode, op0);
23418 if (!REG_P (op1))
23419 op1 = force_reg (mode, op1);
23420 if (!REG_P (sel))
23421 sel = force_reg (V16QImode, sel);
23422 if (!REG_P (target))
23423 tmp = gen_reg_rtx (mode);
23424
23425 if (TARGET_P9_VECTOR)
23426 {
23427 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, sel),
23428 UNSPEC_VPERMR);
23429 }
23430 else
23431 {
23432 /* Invert the selector with a VNAND if available, else a VNOR.
23433 The VNAND is preferred for future fusion opportunities. */
23434 notx = gen_rtx_NOT (V16QImode, sel);
23435 iorx = (TARGET_P8_VECTOR
23436 ? gen_rtx_IOR (V16QImode, notx, notx)
23437 : gen_rtx_AND (V16QImode, notx, notx));
23438 emit_insn (gen_rtx_SET (norreg, iorx));
23439
23440 /* Permute with operands reversed and adjusted selector. */
23441 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
23442 UNSPEC_VPERM);
23443 }
23444
23445 /* Copy into target, possibly by way of a register. */
23446 if (!REG_P (target))
23447 {
23448 emit_move_insn (tmp, unspec);
23449 unspec = tmp;
23450 }
23451
23452 emit_move_insn (target, unspec);
23453 }
23454
23455 /* Expand an Altivec constant permutation. Return true if we match
23456 an efficient implementation; false to fall back to VPERM.
23457
23458 OP0 and OP1 are the input vectors and TARGET is the output vector.
23459 SEL specifies the constant permutation vector. */
23460
23461 static bool
23462 altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1,
23463 const vec_perm_indices &sel)
23464 {
23465 struct altivec_perm_insn {
23466 HOST_WIDE_INT mask;
23467 enum insn_code impl;
23468 unsigned char perm[16];
23469 };
23470 static const struct altivec_perm_insn patterns[] = {
23471 {OPTION_MASK_ALTIVEC,
23472 CODE_FOR_altivec_vpkuhum_direct,
23473 {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31}},
23474 {OPTION_MASK_ALTIVEC,
23475 CODE_FOR_altivec_vpkuwum_direct,
23476 {2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31}},
23477 {OPTION_MASK_ALTIVEC,
23478 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
23479 : CODE_FOR_altivec_vmrglb_direct,
23480 {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23}},
23481 {OPTION_MASK_ALTIVEC,
23482 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
23483 : CODE_FOR_altivec_vmrglh_direct,
23484 {0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23}},
23485 {OPTION_MASK_ALTIVEC,
23486 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct_v4si
23487 : CODE_FOR_altivec_vmrglw_direct_v4si,
23488 {0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23}},
23489 {OPTION_MASK_ALTIVEC,
23490 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
23491 : CODE_FOR_altivec_vmrghb_direct,
23492 {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}},
23493 {OPTION_MASK_ALTIVEC,
23494 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
23495 : CODE_FOR_altivec_vmrghh_direct,
23496 {8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31}},
23497 {OPTION_MASK_ALTIVEC,
23498 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct_v4si
23499 : CODE_FOR_altivec_vmrghw_direct_v4si,
23500 {8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31}},
23501 {OPTION_MASK_P8_VECTOR,
23502 BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgew_v4sf_direct
23503 : CODE_FOR_p8_vmrgow_v4sf_direct,
23504 {0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27}},
23505 {OPTION_MASK_P8_VECTOR,
23506 BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgow_v4sf_direct
23507 : CODE_FOR_p8_vmrgew_v4sf_direct,
23508 {4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31}},
23509 {OPTION_MASK_VSX, CODE_FOR_vsx_xxpermdi_v16qi,
23510 {0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23}},
23511 {OPTION_MASK_VSX, CODE_FOR_vsx_xxpermdi_v16qi,
23512 {8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}},
23513 {OPTION_MASK_VSX, CODE_FOR_vsx_xxpermdi_v16qi,
23514 {0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31}},
23515 {OPTION_MASK_VSX, CODE_FOR_vsx_xxpermdi_v16qi,
23516 {8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31}}};
23517
23518 unsigned int i, j, elt, which;
23519 unsigned char perm[16];
23520 rtx x;
23521 bool one_vec;
23522
23523 /* Unpack the constant selector. */
23524 for (i = which = 0; i < 16; ++i)
23525 {
23526 elt = sel[i] & 31;
23527 which |= (elt < 16 ? 1 : 2);
23528 perm[i] = elt;
23529 }
23530
23531 /* Simplify the constant selector based on operands. */
23532 switch (which)
23533 {
23534 default:
23535 gcc_unreachable ();
23536
23537 case 3:
23538 one_vec = false;
23539 if (!rtx_equal_p (op0, op1))
23540 break;
23541 /* FALLTHRU */
23542
23543 case 2:
23544 for (i = 0; i < 16; ++i)
23545 perm[i] &= 15;
23546 op0 = op1;
23547 one_vec = true;
23548 break;
23549
23550 case 1:
23551 op1 = op0;
23552 one_vec = true;
23553 break;
23554 }
23555
23556 /* Look for splat patterns. */
23557 if (one_vec)
23558 {
23559 elt = perm[0];
23560
23561 for (i = 0; i < 16; ++i)
23562 if (perm[i] != elt)
23563 break;
23564 if (i == 16)
23565 {
23566 if (!BYTES_BIG_ENDIAN)
23567 elt = 15 - elt;
23568 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
23569 return true;
23570 }
23571
23572 if (elt % 2 == 0)
23573 {
23574 for (i = 0; i < 16; i += 2)
23575 if (perm[i] != elt || perm[i + 1] != elt + 1)
23576 break;
23577 if (i == 16)
23578 {
23579 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
23580 x = gen_reg_rtx (V8HImode);
23581 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
23582 GEN_INT (field)));
23583 emit_move_insn (target, gen_lowpart (V16QImode, x));
23584 return true;
23585 }
23586 }
23587
23588 if (elt % 4 == 0)
23589 {
23590 for (i = 0; i < 16; i += 4)
23591 if (perm[i] != elt
23592 || perm[i + 1] != elt + 1
23593 || perm[i + 2] != elt + 2
23594 || perm[i + 3] != elt + 3)
23595 break;
23596 if (i == 16)
23597 {
23598 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
23599 x = gen_reg_rtx (V4SImode);
23600 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
23601 GEN_INT (field)));
23602 emit_move_insn (target, gen_lowpart (V16QImode, x));
23603 return true;
23604 }
23605 }
23606 }
23607
23608 /* Look for merge and pack patterns. */
23609 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
23610 {
23611 bool swapped;
23612
23613 if ((patterns[j].mask & rs6000_isa_flags) == 0)
23614 continue;
23615
23616 elt = patterns[j].perm[0];
23617 if (perm[0] == elt)
23618 swapped = false;
23619 else if (perm[0] == elt + 16)
23620 swapped = true;
23621 else
23622 continue;
23623 for (i = 1; i < 16; ++i)
23624 {
23625 elt = patterns[j].perm[i];
23626 if (swapped)
23627 elt = (elt >= 16 ? elt - 16 : elt + 16);
23628 else if (one_vec && elt >= 16)
23629 elt -= 16;
23630 if (perm[i] != elt)
23631 break;
23632 }
23633 if (i == 16)
23634 {
23635 enum insn_code icode = patterns[j].impl;
23636 machine_mode omode = insn_data[icode].operand[0].mode;
23637 machine_mode imode = insn_data[icode].operand[1].mode;
23638
23639 rtx perm_idx = GEN_INT (0);
23640 if (icode == CODE_FOR_vsx_xxpermdi_v16qi)
23641 {
23642 int perm_val = 0;
23643 if (one_vec)
23644 {
23645 if (perm[0] == 8)
23646 perm_val |= 2;
23647 if (perm[8] == 8)
23648 perm_val |= 1;
23649 }
23650 else
23651 {
23652 if (perm[0] != 0)
23653 perm_val |= 2;
23654 if (perm[8] != 16)
23655 perm_val |= 1;
23656 }
23657 perm_idx = GEN_INT (perm_val);
23658 }
23659
23660 /* For little-endian, don't use vpkuwum and vpkuhum if the
23661 underlying vector type is not V4SI and V8HI, respectively.
23662 For example, using vpkuwum with a V8HI picks up the even
23663 halfwords (BE numbering) when the even halfwords (LE
23664 numbering) are what we need. */
23665 if (!BYTES_BIG_ENDIAN
23666 && icode == CODE_FOR_altivec_vpkuwum_direct
23667 && ((REG_P (op0)
23668 && GET_MODE (op0) != V4SImode)
23669 || (SUBREG_P (op0)
23670 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
23671 continue;
23672 if (!BYTES_BIG_ENDIAN
23673 && icode == CODE_FOR_altivec_vpkuhum_direct
23674 && ((REG_P (op0)
23675 && GET_MODE (op0) != V8HImode)
23676 || (SUBREG_P (op0)
23677 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
23678 continue;
23679
23680 /* For little-endian, the two input operands must be swapped
23681 (or swapped back) to ensure proper right-to-left numbering
23682 from 0 to 2N-1. */
23683 if (swapped == BYTES_BIG_ENDIAN
23684 && icode != CODE_FOR_vsx_xxpermdi_v16qi)
23685 std::swap (op0, op1);
23686 if (imode != V16QImode)
23687 {
23688 op0 = gen_lowpart (imode, op0);
23689 op1 = gen_lowpart (imode, op1);
23690 }
23691 if (omode == V16QImode)
23692 x = target;
23693 else
23694 x = gen_reg_rtx (omode);
23695 if (icode == CODE_FOR_vsx_xxpermdi_v16qi)
23696 emit_insn (GEN_FCN (icode) (x, op0, op1, perm_idx));
23697 else
23698 emit_insn (GEN_FCN (icode) (x, op0, op1));
23699 if (omode != V16QImode)
23700 emit_move_insn (target, gen_lowpart (V16QImode, x));
23701 return true;
23702 }
23703 }
23704
23705 if (!BYTES_BIG_ENDIAN)
23706 {
23707 altivec_expand_vec_perm_const_le (target, op0, op1, sel);
23708 return true;
23709 }
23710
23711 return false;
23712 }
23713
23714 /* Expand a VSX Permute Doubleword constant permutation.
23715 Return true if we match an efficient implementation. */
23716
23717 static bool
23718 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
23719 unsigned char perm0, unsigned char perm1)
23720 {
23721 rtx x;
23722
23723 /* If both selectors come from the same operand, fold to single op. */
23724 if ((perm0 & 2) == (perm1 & 2))
23725 {
23726 if (perm0 & 2)
23727 op0 = op1;
23728 else
23729 op1 = op0;
23730 }
23731 /* If both operands are equal, fold to simpler permutation. */
23732 if (rtx_equal_p (op0, op1))
23733 {
23734 perm0 = perm0 & 1;
23735 perm1 = (perm1 & 1) + 2;
23736 }
23737 /* If the first selector comes from the second operand, swap. */
23738 else if (perm0 & 2)
23739 {
23740 if (perm1 & 2)
23741 return false;
23742 perm0 -= 2;
23743 perm1 += 2;
23744 std::swap (op0, op1);
23745 }
23746 /* If the second selector does not come from the second operand, fail. */
23747 else if ((perm1 & 2) == 0)
23748 return false;
23749
23750 /* Success! */
23751 if (target != NULL)
23752 {
23753 machine_mode vmode, dmode;
23754 rtvec v;
23755
23756 vmode = GET_MODE (target);
23757 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
23758 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4).require ();
23759 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
23760 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
23761 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
23762 emit_insn (gen_rtx_SET (target, x));
23763 }
23764 return true;
23765 }
23766
23767 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
23768
23769 static bool
23770 rs6000_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
23771 rtx target, rtx op0, rtx op1,
23772 const vec_perm_indices &sel)
23773 {
23774 if (vmode != op_mode)
23775 return false;
23776
23777 bool testing_p = !target;
23778
23779 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
23780 if (TARGET_ALTIVEC && testing_p)
23781 return true;
23782
23783 if (op0)
23784 {
23785 rtx nop0 = force_reg (vmode, op0);
23786 if (op0 == op1)
23787 op1 = nop0;
23788 op0 = nop0;
23789 }
23790 if (op1)
23791 op1 = force_reg (vmode, op1);
23792
23793 /* Check for ps_merge* or xxpermdi insns. */
23794 if ((vmode == V2DFmode || vmode == V2DImode) && VECTOR_MEM_VSX_P (vmode))
23795 {
23796 if (testing_p)
23797 {
23798 op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
23799 op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
23800 }
23801 if (rs6000_expand_vec_perm_const_1 (target, op0, op1, sel[0], sel[1]))
23802 return true;
23803 }
23804
23805 if (TARGET_ALTIVEC)
23806 {
23807 /* Force the target-independent code to lower to V16QImode. */
23808 if (vmode != V16QImode)
23809 return false;
23810 if (altivec_expand_vec_perm_const (target, op0, op1, sel))
23811 return true;
23812 }
23813
23814 return false;
23815 }
23816
23817 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave.
23818 OP0 and OP1 are the input vectors and TARGET is the output vector.
23819 PERM specifies the constant permutation vector. */
23820
23821 static void
23822 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
23823 machine_mode vmode, const vec_perm_builder &perm)
23824 {
23825 rtx x = expand_vec_perm_const (vmode, op0, op1, perm, BLKmode, target);
23826 if (x != target)
23827 emit_move_insn (target, x);
23828 }
23829
23830 /* Expand an extract even operation. */
23831
23832 void
23833 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
23834 {
23835 machine_mode vmode = GET_MODE (target);
23836 unsigned i, nelt = GET_MODE_NUNITS (vmode);
23837 vec_perm_builder perm (nelt, nelt, 1);
23838
23839 for (i = 0; i < nelt; i++)
23840 perm.quick_push (i * 2);
23841
23842 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
23843 }
23844
23845 /* Expand a vector interleave operation. */
23846
23847 void
23848 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
23849 {
23850 machine_mode vmode = GET_MODE (target);
23851 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
23852 vec_perm_builder perm (nelt, nelt, 1);
23853
23854 high = (highp ? 0 : nelt / 2);
23855 for (i = 0; i < nelt / 2; i++)
23856 {
23857 perm.quick_push (i + high);
23858 perm.quick_push (i + nelt + high);
23859 }
23860
23861 rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm);
23862 }
23863
23864 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
23865 void
23866 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
23867 {
23868 HOST_WIDE_INT hwi_scale (scale);
23869 REAL_VALUE_TYPE r_pow;
23870 rtvec v = rtvec_alloc (2);
23871 rtx elt;
23872 rtx scale_vec = gen_reg_rtx (V2DFmode);
23873 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
23874 elt = const_double_from_real_value (r_pow, DFmode);
23875 RTVEC_ELT (v, 0) = elt;
23876 RTVEC_ELT (v, 1) = elt;
23877 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
23878 emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
23879 }
23880
23881 /* Return an RTX representing where to find the function value of a
23882 function returning MODE. */
23883 static rtx
23884 rs6000_complex_function_value (machine_mode mode)
23885 {
23886 unsigned int regno;
23887 rtx r1, r2;
23888 machine_mode inner = GET_MODE_INNER (mode);
23889 unsigned int inner_bytes = GET_MODE_UNIT_SIZE (mode);
23890
23891 if (TARGET_FLOAT128_TYPE
23892 && (mode == KCmode
23893 || (mode == TCmode && TARGET_IEEEQUAD)))
23894 regno = ALTIVEC_ARG_RETURN;
23895
23896 else if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
23897 regno = FP_ARG_RETURN;
23898
23899 else
23900 {
23901 regno = GP_ARG_RETURN;
23902
23903 /* 32-bit is OK since it'll go in r3/r4. */
23904 if (TARGET_32BIT && inner_bytes >= 4)
23905 return gen_rtx_REG (mode, regno);
23906 }
23907
23908 if (inner_bytes >= 8)
23909 return gen_rtx_REG (mode, regno);
23910
23911 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
23912 const0_rtx);
23913 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
23914 GEN_INT (inner_bytes));
23915 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
23916 }
23917
23918 /* Return an rtx describing a return value of MODE as a PARALLEL
23919 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
23920 stride REG_STRIDE. */
23921
23922 static rtx
23923 rs6000_parallel_return (machine_mode mode,
23924 int n_elts, machine_mode elt_mode,
23925 unsigned int regno, unsigned int reg_stride)
23926 {
23927 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
23928
23929 int i;
23930 for (i = 0; i < n_elts; i++)
23931 {
23932 rtx r = gen_rtx_REG (elt_mode, regno);
23933 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
23934 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
23935 regno += reg_stride;
23936 }
23937
23938 return par;
23939 }
23940
23941 /* Target hook for TARGET_FUNCTION_VALUE.
23942
23943 An integer value is in r3 and a floating-point value is in fp1,
23944 unless -msoft-float. */
23945
23946 static rtx
23947 rs6000_function_value (const_tree valtype,
23948 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
23949 bool outgoing ATTRIBUTE_UNUSED)
23950 {
23951 machine_mode mode;
23952 unsigned int regno;
23953 machine_mode elt_mode;
23954 int n_elts;
23955
23956 /* Special handling for structs in darwin64. */
23957 if (TARGET_MACHO
23958 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
23959 {
23960 CUMULATIVE_ARGS valcum;
23961 rtx valret;
23962
23963 valcum.words = 0;
23964 valcum.fregno = FP_ARG_MIN_REG;
23965 valcum.vregno = ALTIVEC_ARG_MIN_REG;
23966 /* Do a trial code generation as if this were going to be passed as
23967 an argument; if any part goes in memory, we return NULL. */
23968 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
23969 if (valret)
23970 return valret;
23971 /* Otherwise fall through to standard ABI rules. */
23972 }
23973
23974 mode = TYPE_MODE (valtype);
23975
23976 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
23977 if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts))
23978 {
23979 int first_reg, n_regs;
23980
23981 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode))
23982 {
23983 /* _Decimal128 must use even/odd register pairs. */
23984 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
23985 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
23986 }
23987 else
23988 {
23989 first_reg = ALTIVEC_ARG_RETURN;
23990 n_regs = 1;
23991 }
23992
23993 return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs);
23994 }
23995
23996 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
23997 if (TARGET_32BIT && TARGET_POWERPC64)
23998 switch (mode)
23999 {
24000 default:
24001 break;
24002 case E_DImode:
24003 case E_SCmode:
24004 case E_DCmode:
24005 case E_TCmode:
24006 int count = GET_MODE_SIZE (mode) / 4;
24007 return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1);
24008 }
24009
24010 if ((INTEGRAL_TYPE_P (valtype)
24011 && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64))
24012 || POINTER_TYPE_P (valtype))
24013 mode = TARGET_32BIT ? SImode : DImode;
24014
24015 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
24016 /* _Decimal128 must use an even/odd register pair. */
24017 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
24018 else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT
24019 && !FLOAT128_VECTOR_P (mode))
24020 regno = FP_ARG_RETURN;
24021 else if (TREE_CODE (valtype) == COMPLEX_TYPE
24022 && targetm.calls.split_complex_arg)
24023 return rs6000_complex_function_value (mode);
24024 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
24025 return register is used in both cases, and we won't see V2DImode/V2DFmode
24026 for pure altivec, combine the two cases. */
24027 else if ((VECTOR_TYPE_P (valtype) || VECTOR_ALIGNMENT_P (mode))
24028 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
24029 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
24030 regno = ALTIVEC_ARG_RETURN;
24031 else
24032 regno = GP_ARG_RETURN;
24033
24034 return gen_rtx_REG (mode, regno);
24035 }
24036
24037 /* Define how to find the value returned by a library function
24038 assuming the value has mode MODE. */
24039 rtx
24040 rs6000_libcall_value (machine_mode mode)
24041 {
24042 unsigned int regno;
24043
24044 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
24045 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
24046 return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1);
24047
24048 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT)
24049 /* _Decimal128 must use an even/odd register pair. */
24050 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
24051 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && TARGET_HARD_FLOAT)
24052 regno = FP_ARG_RETURN;
24053 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
24054 return register is used in both cases, and we won't see V2DImode/V2DFmode
24055 for pure altivec, combine the two cases. */
24056 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
24057 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
24058 regno = ALTIVEC_ARG_RETURN;
24059 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
24060 return rs6000_complex_function_value (mode);
24061 else
24062 regno = GP_ARG_RETURN;
24063
24064 return gen_rtx_REG (mode, regno);
24065 }
24066
24067 /* Compute register pressure classes. We implement the target hook to avoid
24068 IRA picking something like GEN_OR_FLOAT_REGS as a pressure class, which can
24069 lead to incorrect estimates of number of available registers and therefor
24070 increased register pressure/spill. */
24071 static int
24072 rs6000_compute_pressure_classes (enum reg_class *pressure_classes)
24073 {
24074 int n;
24075
24076 n = 0;
24077 pressure_classes[n++] = GENERAL_REGS;
24078 if (TARGET_ALTIVEC)
24079 pressure_classes[n++] = ALTIVEC_REGS;
24080 if (TARGET_VSX)
24081 pressure_classes[n++] = VSX_REGS;
24082 else
24083 {
24084 if (TARGET_HARD_FLOAT)
24085 pressure_classes[n++] = FLOAT_REGS;
24086 }
24087 pressure_classes[n++] = CR_REGS;
24088 pressure_classes[n++] = SPECIAL_REGS;
24089
24090 return n;
24091 }
24092
24093 /* Given FROM and TO register numbers, say whether this elimination is allowed.
24094 Frame pointer elimination is automatically handled.
24095
24096 For the RS/6000, if frame pointer elimination is being done, we would like
24097 to convert ap into fp, not sp.
24098
24099 We need r30 if -mminimal-toc was specified, and there are constant pool
24100 references. */
24101
24102 static bool
24103 rs6000_can_eliminate (const int from, const int to)
24104 {
24105 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
24106 ? ! frame_pointer_needed
24107 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
24108 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC_OR_PCREL
24109 || constant_pool_empty_p ()
24110 : true);
24111 }
24112
24113 /* Define the offset between two registers, FROM to be eliminated and its
24114 replacement TO, at the start of a routine. */
24115 HOST_WIDE_INT
24116 rs6000_initial_elimination_offset (int from, int to)
24117 {
24118 rs6000_stack_t *info = rs6000_stack_info ();
24119 HOST_WIDE_INT offset;
24120
24121 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
24122 offset = info->push_p ? 0 : -info->total_size;
24123 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
24124 {
24125 offset = info->push_p ? 0 : -info->total_size;
24126 if (FRAME_GROWS_DOWNWARD)
24127 offset += info->fixed_size + info->vars_size + info->parm_size;
24128 }
24129 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
24130 offset = FRAME_GROWS_DOWNWARD
24131 ? info->fixed_size + info->vars_size + info->parm_size
24132 : 0;
24133 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
24134 offset = info->total_size;
24135 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
24136 offset = info->push_p ? info->total_size : 0;
24137 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
24138 offset = 0;
24139 else
24140 gcc_unreachable ();
24141
24142 return offset;
24143 }
24144
24145 /* Fill in sizes of registers used by unwinder. */
24146
24147 static void
24148 rs6000_init_dwarf_reg_sizes_extra (tree address)
24149 {
24150 if (TARGET_MACHO && ! TARGET_ALTIVEC)
24151 {
24152 int i;
24153 machine_mode mode = TYPE_MODE (char_type_node);
24154 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
24155 rtx mem = gen_rtx_MEM (BLKmode, addr);
24156 rtx value = gen_int_mode (16, mode);
24157
24158 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
24159 The unwinder still needs to know the size of Altivec registers. */
24160
24161 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
24162 {
24163 int column = DWARF_REG_TO_UNWIND_COLUMN
24164 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
24165 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
24166
24167 emit_move_insn (adjust_address (mem, mode, offset), value);
24168 }
24169 }
24170 }
24171
24172 /* Map internal gcc register numbers to debug format register numbers.
24173 FORMAT specifies the type of debug register number to use:
24174 0 -- debug information, except for frame-related sections
24175 1 -- DWARF .debug_frame section
24176 2 -- DWARF .eh_frame section */
24177
24178 unsigned int
24179 rs6000_debugger_regno (unsigned int regno, unsigned int format)
24180 {
24181 /* On some platforms, we use the standard DWARF register
24182 numbering for .debug_info and .debug_frame. */
24183 if ((format == 0 && dwarf_debuginfo_p ()) || format == 1)
24184 {
24185 #ifdef RS6000_USE_DWARF_NUMBERING
24186 if (regno <= 31)
24187 return regno;
24188 if (FP_REGNO_P (regno))
24189 return regno - FIRST_FPR_REGNO + 32;
24190 if (ALTIVEC_REGNO_P (regno))
24191 return regno - FIRST_ALTIVEC_REGNO + 1124;
24192 if (regno == LR_REGNO)
24193 return 108;
24194 if (regno == CTR_REGNO)
24195 return 109;
24196 if (regno == CA_REGNO)
24197 return 101; /* XER */
24198 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
24199 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
24200 The actual code emitted saves the whole of CR, so we map CR2_REGNO
24201 to the DWARF reg for CR. */
24202 if (format == 1 && regno == CR2_REGNO)
24203 return 64;
24204 if (CR_REGNO_P (regno))
24205 return regno - CR0_REGNO + 86;
24206 if (regno == VRSAVE_REGNO)
24207 return 356;
24208 if (regno == VSCR_REGNO)
24209 return 67;
24210
24211 /* These do not make much sense. */
24212 if (regno == FRAME_POINTER_REGNUM)
24213 return 111;
24214 if (regno == ARG_POINTER_REGNUM)
24215 return 67;
24216 if (regno == 64)
24217 return 100;
24218
24219 gcc_unreachable ();
24220 #endif
24221 }
24222
24223 /* We use the GCC 7 (and before) internal number for non-DWARF debug
24224 information, and also for .eh_frame. */
24225 /* Translate the regnos to their numbers in GCC 7 (and before). */
24226 if (regno <= 31)
24227 return regno;
24228 if (FP_REGNO_P (regno))
24229 return regno - FIRST_FPR_REGNO + 32;
24230 if (ALTIVEC_REGNO_P (regno))
24231 return regno - FIRST_ALTIVEC_REGNO + 77;
24232 if (regno == LR_REGNO)
24233 return 65;
24234 if (regno == CTR_REGNO)
24235 return 66;
24236 if (regno == CA_REGNO)
24237 return 76; /* XER */
24238 if (CR_REGNO_P (regno))
24239 return regno - CR0_REGNO + 68;
24240 if (regno == VRSAVE_REGNO)
24241 return 109;
24242 if (regno == VSCR_REGNO)
24243 return 110;
24244
24245 if (regno == FRAME_POINTER_REGNUM)
24246 return 111;
24247 if (regno == ARG_POINTER_REGNUM)
24248 return 67;
24249 if (regno == 64)
24250 return 64;
24251
24252 gcc_unreachable ();
24253 }
24254
24255 /* target hook eh_return_filter_mode */
24256 static scalar_int_mode
24257 rs6000_eh_return_filter_mode (void)
24258 {
24259 return TARGET_32BIT ? SImode : word_mode;
24260 }
24261
24262 /* Target hook for translate_mode_attribute. */
24263 static machine_mode
24264 rs6000_translate_mode_attribute (machine_mode mode)
24265 {
24266 if ((FLOAT128_IEEE_P (mode)
24267 && ieee128_float_type_node == long_double_type_node)
24268 || (FLOAT128_IBM_P (mode)
24269 && ibm128_float_type_node == long_double_type_node))
24270 return COMPLEX_MODE_P (mode) ? E_TCmode : E_TFmode;
24271 return mode;
24272 }
24273
24274 /* Target hook for scalar_mode_supported_p. */
24275 static bool
24276 rs6000_scalar_mode_supported_p (scalar_mode mode)
24277 {
24278 /* -m32 does not support TImode. This is the default, from
24279 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
24280 same ABI as for -m32. But default_scalar_mode_supported_p allows
24281 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
24282 for -mpowerpc64. */
24283 if (TARGET_32BIT && mode == TImode)
24284 return false;
24285
24286 if (DECIMAL_FLOAT_MODE_P (mode))
24287 return default_decimal_float_supported_p ();
24288 else if (TARGET_FLOAT128_TYPE && (mode == KFmode || mode == IFmode))
24289 return true;
24290 else
24291 return default_scalar_mode_supported_p (mode);
24292 }
24293
24294 /* Target hook for libgcc_floating_mode_supported_p. */
24295
24296 static bool
24297 rs6000_libgcc_floating_mode_supported_p (scalar_float_mode mode)
24298 {
24299 switch (mode)
24300 {
24301 case E_SFmode:
24302 case E_DFmode:
24303 case E_TFmode:
24304 return true;
24305
24306 /* We only return true for KFmode if IEEE 128-bit types are supported, and
24307 if long double does not use the IEEE 128-bit format. If long double
24308 uses the IEEE 128-bit format, it will use TFmode and not KFmode.
24309 Because the code will not use KFmode in that case, there will be aborts
24310 because it can't find KFmode in the Floatn types. */
24311 case E_KFmode:
24312 return TARGET_FLOAT128_TYPE && !TARGET_IEEEQUAD;
24313
24314 default:
24315 return false;
24316 }
24317 }
24318
24319 /* Target hook for vector_mode_supported_p. */
24320 static bool
24321 rs6000_vector_mode_supported_p (machine_mode mode)
24322 {
24323 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
24324 128-bit, the compiler might try to widen IEEE 128-bit to IBM
24325 double-double. */
24326 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) && !FLOAT128_IEEE_P (mode))
24327 return true;
24328
24329 else
24330 return false;
24331 }
24332
24333 /* Target hook for floatn_mode. */
24334 static opt_scalar_float_mode
24335 rs6000_floatn_mode (int n, bool extended)
24336 {
24337 if (extended)
24338 {
24339 switch (n)
24340 {
24341 case 32:
24342 return DFmode;
24343
24344 case 64:
24345 if (TARGET_FLOAT128_TYPE)
24346 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
24347 else
24348 return opt_scalar_float_mode ();
24349
24350 case 128:
24351 return opt_scalar_float_mode ();
24352
24353 default:
24354 /* Those are the only valid _FloatNx types. */
24355 gcc_unreachable ();
24356 }
24357 }
24358 else
24359 {
24360 switch (n)
24361 {
24362 case 32:
24363 return SFmode;
24364
24365 case 64:
24366 return DFmode;
24367
24368 case 128:
24369 if (TARGET_FLOAT128_TYPE)
24370 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
24371 else
24372 return opt_scalar_float_mode ();
24373
24374 default:
24375 return opt_scalar_float_mode ();
24376 }
24377 }
24378
24379 }
24380
24381 /* Target hook for c_mode_for_suffix. */
24382 static machine_mode
24383 rs6000_c_mode_for_suffix (char suffix)
24384 {
24385 if (TARGET_FLOAT128_TYPE)
24386 {
24387 if (suffix == 'q' || suffix == 'Q')
24388 return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode;
24389
24390 /* At the moment, we are not defining a suffix for IBM extended double.
24391 If/when the default for -mabi=ieeelongdouble is changed, and we want
24392 to support __ibm128 constants in legacy library code, we may need to
24393 re-evalaute this decision. Currently, c-lex.cc only supports 'w' and
24394 'q' as machine dependent suffixes. The x86_64 port uses 'w' for
24395 __float80 constants. */
24396 }
24397
24398 return VOIDmode;
24399 }
24400
24401 /* Target hook for invalid_arg_for_unprototyped_fn. */
24402 static const char *
24403 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
24404 {
24405 return (!rs6000_darwin64_abi
24406 && typelist == 0
24407 && VECTOR_TYPE_P (TREE_TYPE (val))
24408 && (funcdecl == NULL_TREE
24409 || (TREE_CODE (funcdecl) == FUNCTION_DECL
24410 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD
24411 && !fndecl_built_in_p (funcdecl, BUILT_IN_CLASSIFY_TYPE))))
24412 ? N_("AltiVec argument passed to unprototyped function")
24413 : NULL;
24414 }
24415
24416 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
24417 setup by using __stack_chk_fail_local hidden function instead of
24418 calling __stack_chk_fail directly. Otherwise it is better to call
24419 __stack_chk_fail directly. */
24420
24421 static tree ATTRIBUTE_UNUSED
24422 rs6000_stack_protect_fail (void)
24423 {
24424 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
24425 ? default_hidden_stack_protect_fail ()
24426 : default_external_stack_protect_fail ();
24427 }
24428
24429 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
24430
24431 #if TARGET_ELF
24432 static unsigned HOST_WIDE_INT
24433 rs6000_asan_shadow_offset (void)
24434 {
24435 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
24436 }
24437 #endif
24438 \f
24439 /* Mask options that we want to support inside of attribute((target)) and
24440 #pragma GCC target operations. Note, we do not include things like
24441 64/32-bit, endianness, hard/soft floating point, etc. that would have
24442 different calling sequences. */
24443
24444 struct rs6000_opt_mask {
24445 const char *name; /* option name */
24446 HOST_WIDE_INT mask; /* mask to set */
24447 bool invert; /* invert sense of mask */
24448 bool valid_target; /* option is a target option */
24449 };
24450
24451 static struct rs6000_opt_mask const rs6000_opt_masks[] =
24452 {
24453 { "altivec", OPTION_MASK_ALTIVEC, false, true },
24454 { "block-ops-unaligned-vsx", OPTION_MASK_BLOCK_OPS_UNALIGNED_VSX,
24455 false, true },
24456 { "block-ops-vector-pair", OPTION_MASK_BLOCK_OPS_VECTOR_PAIR,
24457 false, true },
24458 { "cmpb", OPTION_MASK_CMPB, false, true },
24459 { "crypto", OPTION_MASK_CRYPTO, false, true },
24460 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
24461 { "dlmzb", OPTION_MASK_DLMZB, false, true },
24462 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX,
24463 false, true },
24464 { "float128", OPTION_MASK_FLOAT128_KEYWORD, false, true },
24465 { "float128-hardware", OPTION_MASK_FLOAT128_HW, false, true },
24466 { "fprnd", OPTION_MASK_FPRND, false, true },
24467 { "power10", OPTION_MASK_POWER10, false, true },
24468 { "hard-dfp", OPTION_MASK_DFP, false, true },
24469 { "htm", OPTION_MASK_HTM, false, true },
24470 { "isel", OPTION_MASK_ISEL, false, true },
24471 { "mfcrf", OPTION_MASK_MFCRF, false, true },
24472 { "mfpgpr", 0, false, true },
24473 { "mma", OPTION_MASK_MMA, false, true },
24474 { "modulo", OPTION_MASK_MODULO, false, true },
24475 { "mulhw", OPTION_MASK_MULHW, false, true },
24476 { "multiple", OPTION_MASK_MULTIPLE, false, true },
24477 { "pcrel", OPTION_MASK_PCREL, false, true },
24478 { "pcrel-opt", OPTION_MASK_PCREL_OPT, false, true },
24479 { "popcntb", OPTION_MASK_POPCNTB, false, true },
24480 { "popcntd", OPTION_MASK_POPCNTD, false, true },
24481 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
24482 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
24483 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
24484 { "power9-minmax", OPTION_MASK_P9_MINMAX, false, true },
24485 { "power9-misc", OPTION_MASK_P9_MISC, false, true },
24486 { "power9-vector", OPTION_MASK_P9_VECTOR, false, true },
24487 { "power10-fusion", OPTION_MASK_P10_FUSION, false, true },
24488 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
24489 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
24490 { "prefixed", OPTION_MASK_PREFIXED, false, true },
24491 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
24492 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
24493 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
24494 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT, false, true },
24495 { "string", 0, false, true },
24496 { "update", OPTION_MASK_NO_UPDATE, true , true },
24497 { "vsx", OPTION_MASK_VSX, false, true },
24498 #ifdef OPTION_MASK_64BIT
24499 #if TARGET_AIX_OS
24500 { "aix64", OPTION_MASK_64BIT, false, false },
24501 { "aix32", OPTION_MASK_64BIT, true, false },
24502 #else
24503 { "64", OPTION_MASK_64BIT, false, false },
24504 { "32", OPTION_MASK_64BIT, true, false },
24505 #endif
24506 #endif
24507 #ifdef OPTION_MASK_EABI
24508 { "eabi", OPTION_MASK_EABI, false, false },
24509 #endif
24510 #ifdef OPTION_MASK_LITTLE_ENDIAN
24511 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
24512 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
24513 #endif
24514 #ifdef OPTION_MASK_RELOCATABLE
24515 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
24516 #endif
24517 #ifdef OPTION_MASK_STRICT_ALIGN
24518 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
24519 #endif
24520 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
24521 { "string", 0, false, false },
24522 };
24523
24524 /* Option variables that we want to support inside attribute((target)) and
24525 #pragma GCC target operations. */
24526
24527 struct rs6000_opt_var {
24528 const char *name; /* option name */
24529 size_t global_offset; /* offset of the option in global_options. */
24530 size_t target_offset; /* offset of the option in target options. */
24531 };
24532
24533 static struct rs6000_opt_var const rs6000_opt_vars[] =
24534 {
24535 { "friz",
24536 offsetof (struct gcc_options, x_TARGET_FRIZ),
24537 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
24538 { "avoid-indexed-addresses",
24539 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
24540 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
24541 { "longcall",
24542 offsetof (struct gcc_options, x_rs6000_default_long_calls),
24543 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
24544 { "optimize-swaps",
24545 offsetof (struct gcc_options, x_rs6000_optimize_swaps),
24546 offsetof (struct cl_target_option, x_rs6000_optimize_swaps), },
24547 { "allow-movmisalign",
24548 offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN),
24549 offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), },
24550 { "sched-groups",
24551 offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS),
24552 offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), },
24553 { "always-hint",
24554 offsetof (struct gcc_options, x_TARGET_ALWAYS_HINT),
24555 offsetof (struct cl_target_option, x_TARGET_ALWAYS_HINT), },
24556 { "align-branch-targets",
24557 offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS),
24558 offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), },
24559 { "sched-prolog",
24560 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
24561 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
24562 { "sched-epilog",
24563 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
24564 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
24565 { "speculate-indirect-jumps",
24566 offsetof (struct gcc_options, x_rs6000_speculate_indirect_jumps),
24567 offsetof (struct cl_target_option, x_rs6000_speculate_indirect_jumps), },
24568 };
24569
24570 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
24571 parsing. Return true if there were no errors. */
24572
24573 static bool
24574 rs6000_inner_target_options (tree args, bool attr_p)
24575 {
24576 bool ret = true;
24577
24578 if (args == NULL_TREE)
24579 ;
24580
24581 else if (TREE_CODE (args) == STRING_CST)
24582 {
24583 char *p = ASTRDUP (TREE_STRING_POINTER (args));
24584 char *q;
24585
24586 while ((q = strtok (p, ",")) != NULL)
24587 {
24588 bool error_p = false;
24589 bool not_valid_p = false;
24590 const char *cpu_opt = NULL;
24591
24592 p = NULL;
24593 if (startswith (q, "cpu="))
24594 {
24595 int cpu_index = rs6000_cpu_name_lookup (q+4);
24596 if (cpu_index >= 0)
24597 rs6000_cpu_index = cpu_index;
24598 else
24599 {
24600 error_p = true;
24601 cpu_opt = q+4;
24602 }
24603 }
24604 else if (startswith (q, "tune="))
24605 {
24606 int tune_index = rs6000_cpu_name_lookup (q+5);
24607 if (tune_index >= 0)
24608 rs6000_tune_index = tune_index;
24609 else
24610 {
24611 error_p = true;
24612 cpu_opt = q+5;
24613 }
24614 }
24615 else
24616 {
24617 size_t i;
24618 bool invert = false;
24619 char *r = q;
24620
24621 error_p = true;
24622 if (startswith (r, "no-"))
24623 {
24624 invert = true;
24625 r += 3;
24626 }
24627
24628 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
24629 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
24630 {
24631 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
24632
24633 if (!rs6000_opt_masks[i].valid_target)
24634 not_valid_p = true;
24635 else
24636 {
24637 error_p = false;
24638 rs6000_isa_flags_explicit |= mask;
24639
24640 /* VSX needs altivec, so -mvsx automagically sets
24641 altivec and disables -mavoid-indexed-addresses. */
24642 if (!invert)
24643 {
24644 if (mask == OPTION_MASK_VSX)
24645 {
24646 mask |= OPTION_MASK_ALTIVEC;
24647 TARGET_AVOID_XFORM = 0;
24648 }
24649 }
24650
24651 if (rs6000_opt_masks[i].invert)
24652 invert = !invert;
24653
24654 if (invert)
24655 rs6000_isa_flags &= ~mask;
24656 else
24657 rs6000_isa_flags |= mask;
24658 }
24659 break;
24660 }
24661
24662 if (error_p && !not_valid_p)
24663 {
24664 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
24665 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
24666 {
24667 size_t j = rs6000_opt_vars[i].global_offset;
24668 *((int *) ((char *)&global_options + j)) = !invert;
24669 error_p = false;
24670 not_valid_p = false;
24671 break;
24672 }
24673 }
24674 }
24675
24676 if (error_p)
24677 {
24678 const char *eprefix, *esuffix;
24679
24680 ret = false;
24681 if (attr_p)
24682 {
24683 eprefix = "__attribute__((__target__(";
24684 esuffix = ")))";
24685 }
24686 else
24687 {
24688 eprefix = "#pragma GCC target ";
24689 esuffix = "";
24690 }
24691
24692 if (cpu_opt)
24693 error ("invalid cpu %qs for %s%qs%s", cpu_opt, eprefix,
24694 q, esuffix);
24695 else if (not_valid_p)
24696 error ("%s%qs%s is not allowed", eprefix, q, esuffix);
24697 else
24698 error ("%s%qs%s is invalid", eprefix, q, esuffix);
24699 }
24700 }
24701 }
24702
24703 else if (TREE_CODE (args) == TREE_LIST)
24704 {
24705 do
24706 {
24707 tree value = TREE_VALUE (args);
24708 if (value)
24709 {
24710 bool ret2 = rs6000_inner_target_options (value, attr_p);
24711 if (!ret2)
24712 ret = false;
24713 }
24714 args = TREE_CHAIN (args);
24715 }
24716 while (args != NULL_TREE);
24717 }
24718
24719 else
24720 {
24721 error ("attribute %<target%> argument not a string");
24722 return false;
24723 }
24724
24725 return ret;
24726 }
24727
24728 /* Print out the target options as a list for -mdebug=target. */
24729
24730 static void
24731 rs6000_debug_target_options (tree args, const char *prefix)
24732 {
24733 if (args == NULL_TREE)
24734 fprintf (stderr, "%s<NULL>", prefix);
24735
24736 else if (TREE_CODE (args) == STRING_CST)
24737 {
24738 char *p = ASTRDUP (TREE_STRING_POINTER (args));
24739 char *q;
24740
24741 while ((q = strtok (p, ",")) != NULL)
24742 {
24743 p = NULL;
24744 fprintf (stderr, "%s\"%s\"", prefix, q);
24745 prefix = ", ";
24746 }
24747 }
24748
24749 else if (TREE_CODE (args) == TREE_LIST)
24750 {
24751 do
24752 {
24753 tree value = TREE_VALUE (args);
24754 if (value)
24755 {
24756 rs6000_debug_target_options (value, prefix);
24757 prefix = ", ";
24758 }
24759 args = TREE_CHAIN (args);
24760 }
24761 while (args != NULL_TREE);
24762 }
24763
24764 else
24765 gcc_unreachable ();
24766
24767 return;
24768 }
24769
24770 \f
24771 /* Hook to validate attribute((target("..."))). */
24772
24773 static bool
24774 rs6000_valid_attribute_p (tree fndecl,
24775 tree ARG_UNUSED (name),
24776 tree args,
24777 int flags)
24778 {
24779 struct cl_target_option cur_target;
24780 bool ret;
24781 tree old_optimize;
24782 tree new_target, new_optimize;
24783 tree func_optimize;
24784
24785 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
24786
24787 if (TARGET_DEBUG_TARGET)
24788 {
24789 tree tname = DECL_NAME (fndecl);
24790 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
24791 if (tname)
24792 fprintf (stderr, "function: %.*s\n",
24793 (int) IDENTIFIER_LENGTH (tname),
24794 IDENTIFIER_POINTER (tname));
24795 else
24796 fprintf (stderr, "function: unknown\n");
24797
24798 fprintf (stderr, "args:");
24799 rs6000_debug_target_options (args, " ");
24800 fprintf (stderr, "\n");
24801
24802 if (flags)
24803 fprintf (stderr, "flags: 0x%x\n", flags);
24804
24805 fprintf (stderr, "--------------------\n");
24806 }
24807
24808 /* attribute((target("default"))) does nothing, beyond
24809 affecting multi-versioning. */
24810 if (TREE_VALUE (args)
24811 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
24812 && TREE_CHAIN (args) == NULL_TREE
24813 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
24814 return true;
24815
24816 old_optimize = build_optimization_node (&global_options,
24817 &global_options_set);
24818 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
24819
24820 /* If the function changed the optimization levels as well as setting target
24821 options, start with the optimizations specified. */
24822 if (func_optimize && func_optimize != old_optimize)
24823 cl_optimization_restore (&global_options, &global_options_set,
24824 TREE_OPTIMIZATION (func_optimize));
24825
24826 /* The target attributes may also change some optimization flags, so update
24827 the optimization options if necessary. */
24828 cl_target_option_save (&cur_target, &global_options, &global_options_set);
24829 rs6000_cpu_index = rs6000_tune_index = -1;
24830 ret = rs6000_inner_target_options (args, true);
24831
24832 /* Set up any additional state. */
24833 if (ret)
24834 {
24835 ret = rs6000_option_override_internal (false);
24836 new_target = build_target_option_node (&global_options,
24837 &global_options_set);
24838 }
24839 else
24840 new_target = NULL;
24841
24842 new_optimize = build_optimization_node (&global_options,
24843 &global_options_set);
24844
24845 if (!new_target)
24846 ret = false;
24847
24848 else if (fndecl)
24849 {
24850 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
24851
24852 if (old_optimize != new_optimize)
24853 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
24854 }
24855
24856 cl_target_option_restore (&global_options, &global_options_set, &cur_target);
24857
24858 if (old_optimize != new_optimize)
24859 cl_optimization_restore (&global_options, &global_options_set,
24860 TREE_OPTIMIZATION (old_optimize));
24861
24862 return ret;
24863 }
24864
24865 \f
24866 /* Hook to validate the current #pragma GCC target and set the state, and
24867 update the macros based on what was changed. If ARGS is NULL, then
24868 POP_TARGET is used to reset the options. */
24869
24870 bool
24871 rs6000_pragma_target_parse (tree args, tree pop_target)
24872 {
24873 tree prev_tree = build_target_option_node (&global_options,
24874 &global_options_set);
24875 tree cur_tree;
24876 struct cl_target_option *prev_opt, *cur_opt;
24877 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
24878
24879 if (TARGET_DEBUG_TARGET)
24880 {
24881 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
24882 fprintf (stderr, "args:");
24883 rs6000_debug_target_options (args, " ");
24884 fprintf (stderr, "\n");
24885
24886 if (pop_target)
24887 {
24888 fprintf (stderr, "pop_target:\n");
24889 debug_tree (pop_target);
24890 }
24891 else
24892 fprintf (stderr, "pop_target: <NULL>\n");
24893
24894 fprintf (stderr, "--------------------\n");
24895 }
24896
24897 if (! args)
24898 {
24899 cur_tree = ((pop_target)
24900 ? pop_target
24901 : target_option_default_node);
24902 cl_target_option_restore (&global_options, &global_options_set,
24903 TREE_TARGET_OPTION (cur_tree));
24904 }
24905 else
24906 {
24907 rs6000_cpu_index = rs6000_tune_index = -1;
24908 if (!rs6000_inner_target_options (args, false)
24909 || !rs6000_option_override_internal (false)
24910 || (cur_tree = build_target_option_node (&global_options,
24911 &global_options_set))
24912 == NULL_TREE)
24913 {
24914 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
24915 fprintf (stderr, "invalid pragma\n");
24916
24917 return false;
24918 }
24919 }
24920
24921 target_option_current_node = cur_tree;
24922 rs6000_activate_target_options (target_option_current_node);
24923
24924 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
24925 change the macros that are defined. */
24926 if (rs6000_target_modify_macros_ptr)
24927 {
24928 prev_opt = TREE_TARGET_OPTION (prev_tree);
24929 prev_flags = prev_opt->x_rs6000_isa_flags;
24930
24931 cur_opt = TREE_TARGET_OPTION (cur_tree);
24932 cur_flags = cur_opt->x_rs6000_isa_flags;
24933
24934 diff_flags = (prev_flags ^ cur_flags);
24935
24936 if (diff_flags != 0)
24937 {
24938 /* Delete old macros. */
24939 rs6000_target_modify_macros_ptr (false,
24940 prev_flags & diff_flags);
24941
24942 /* Define new macros. */
24943 rs6000_target_modify_macros_ptr (true,
24944 cur_flags & diff_flags);
24945 }
24946 }
24947
24948 return true;
24949 }
24950
24951 \f
24952 /* Remember the last target of rs6000_set_current_function. */
24953 static GTY(()) tree rs6000_previous_fndecl;
24954
24955 /* Restore target's globals from NEW_TREE and invalidate the
24956 rs6000_previous_fndecl cache. */
24957
24958 void
24959 rs6000_activate_target_options (tree new_tree)
24960 {
24961 cl_target_option_restore (&global_options, &global_options_set,
24962 TREE_TARGET_OPTION (new_tree));
24963 if (TREE_TARGET_GLOBALS (new_tree))
24964 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
24965 else if (new_tree == target_option_default_node)
24966 restore_target_globals (&default_target_globals);
24967 else
24968 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
24969 rs6000_previous_fndecl = NULL_TREE;
24970 }
24971
24972 /* Establish appropriate back-end context for processing the function
24973 FNDECL. The argument might be NULL to indicate processing at top
24974 level, outside of any function scope. */
24975 static void
24976 rs6000_set_current_function (tree fndecl)
24977 {
24978 if (TARGET_DEBUG_TARGET)
24979 {
24980 fprintf (stderr, "\n==================== rs6000_set_current_function");
24981
24982 if (fndecl)
24983 fprintf (stderr, ", fndecl %s (%p)",
24984 (DECL_NAME (fndecl)
24985 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
24986 : "<unknown>"), (void *)fndecl);
24987
24988 if (rs6000_previous_fndecl)
24989 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
24990
24991 fprintf (stderr, "\n");
24992 }
24993
24994 /* Only change the context if the function changes. This hook is called
24995 several times in the course of compiling a function, and we don't want to
24996 slow things down too much or call target_reinit when it isn't safe. */
24997 if (fndecl == rs6000_previous_fndecl)
24998 return;
24999
25000 tree old_tree;
25001 if (rs6000_previous_fndecl == NULL_TREE)
25002 old_tree = target_option_current_node;
25003 else if (DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl))
25004 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl);
25005 else
25006 old_tree = target_option_default_node;
25007
25008 tree new_tree;
25009 if (fndecl == NULL_TREE)
25010 {
25011 if (old_tree != target_option_current_node)
25012 new_tree = target_option_current_node;
25013 else
25014 new_tree = NULL_TREE;
25015 }
25016 else
25017 {
25018 new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
25019 if (new_tree == NULL_TREE)
25020 new_tree = target_option_default_node;
25021 }
25022
25023 if (TARGET_DEBUG_TARGET)
25024 {
25025 if (new_tree)
25026 {
25027 fprintf (stderr, "\nnew fndecl target specific options:\n");
25028 debug_tree (new_tree);
25029 }
25030
25031 if (old_tree)
25032 {
25033 fprintf (stderr, "\nold fndecl target specific options:\n");
25034 debug_tree (old_tree);
25035 }
25036
25037 if (old_tree != NULL_TREE || new_tree != NULL_TREE)
25038 fprintf (stderr, "--------------------\n");
25039 }
25040
25041 if (new_tree && old_tree != new_tree)
25042 rs6000_activate_target_options (new_tree);
25043
25044 if (fndecl)
25045 rs6000_previous_fndecl = fndecl;
25046 }
25047
25048 \f
25049 /* Save the current options */
25050
25051 static void
25052 rs6000_function_specific_save (struct cl_target_option *ptr,
25053 struct gcc_options *opts,
25054 struct gcc_options */* opts_set */)
25055 {
25056 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
25057 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
25058 }
25059
25060 /* Restore the current options */
25061
25062 static void
25063 rs6000_function_specific_restore (struct gcc_options *opts,
25064 struct gcc_options */* opts_set */,
25065 struct cl_target_option *ptr)
25066
25067 {
25068 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
25069 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
25070 (void) rs6000_option_override_internal (false);
25071 }
25072
25073 /* Print the current options */
25074
25075 static void
25076 rs6000_function_specific_print (FILE *file, int indent,
25077 struct cl_target_option *ptr)
25078 {
25079 rs6000_print_isa_options (file, indent, "Isa options set",
25080 ptr->x_rs6000_isa_flags);
25081
25082 rs6000_print_isa_options (file, indent, "Isa options explicit",
25083 ptr->x_rs6000_isa_flags_explicit);
25084 }
25085
25086 /* Helper function to print the current isa or misc options on a line. */
25087
25088 static void
25089 rs6000_print_options_internal (FILE *file,
25090 int indent,
25091 const char *string,
25092 HOST_WIDE_INT flags,
25093 const char *prefix,
25094 const struct rs6000_opt_mask *opts,
25095 size_t num_elements)
25096 {
25097 size_t i;
25098 size_t start_column = 0;
25099 size_t cur_column;
25100 size_t max_column = 120;
25101 size_t prefix_len = strlen (prefix);
25102 size_t comma_len = 0;
25103 const char *comma = "";
25104
25105 if (indent)
25106 start_column += fprintf (file, "%*s", indent, "");
25107
25108 if (!flags)
25109 {
25110 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
25111 return;
25112 }
25113
25114 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
25115
25116 /* Print the various mask options. */
25117 cur_column = start_column;
25118 for (i = 0; i < num_elements; i++)
25119 {
25120 bool invert = opts[i].invert;
25121 const char *name = opts[i].name;
25122 const char *no_str = "";
25123 HOST_WIDE_INT mask = opts[i].mask;
25124 size_t len = comma_len + prefix_len + strlen (name);
25125
25126 if (!invert)
25127 {
25128 if ((flags & mask) == 0)
25129 {
25130 no_str = "no-";
25131 len += strlen ("no-");
25132 }
25133
25134 flags &= ~mask;
25135 }
25136
25137 else
25138 {
25139 if ((flags & mask) != 0)
25140 {
25141 no_str = "no-";
25142 len += strlen ("no-");
25143 }
25144
25145 flags |= mask;
25146 }
25147
25148 cur_column += len;
25149 if (cur_column > max_column)
25150 {
25151 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
25152 cur_column = start_column + len;
25153 comma = "";
25154 }
25155
25156 fprintf (file, "%s%s%s%s", comma, prefix, no_str, name);
25157 comma = ", ";
25158 comma_len = strlen (", ");
25159 }
25160
25161 fputs ("\n", file);
25162 }
25163
25164 /* Helper function to print the current isa options on a line. */
25165
25166 static void
25167 rs6000_print_isa_options (FILE *file, int indent, const char *string,
25168 HOST_WIDE_INT flags)
25169 {
25170 rs6000_print_options_internal (file, indent, string, flags, "-m",
25171 &rs6000_opt_masks[0],
25172 ARRAY_SIZE (rs6000_opt_masks));
25173 }
25174
25175 /* If the user used -mno-vsx, we need turn off all of the implicit ISA 2.06,
25176 2.07, and 3.0 options that relate to the vector unit (-mdirect-move,
25177 -mupper-regs-df, etc.).
25178
25179 If the user used -mno-power8-vector, we need to turn off all of the implicit
25180 ISA 2.07 and 3.0 options that relate to the vector unit.
25181
25182 If the user used -mno-power9-vector, we need to turn off all of the implicit
25183 ISA 3.0 options that relate to the vector unit.
25184
25185 This function does not handle explicit options such as the user specifying
25186 -mdirect-move. These are handled in rs6000_option_override_internal, and
25187 the appropriate error is given if needed.
25188
25189 We return a mask of all of the implicit options that should not be enabled
25190 by default. */
25191
25192 static HOST_WIDE_INT
25193 rs6000_disable_incompatible_switches (void)
25194 {
25195 HOST_WIDE_INT ignore_masks = rs6000_isa_flags_explicit;
25196 size_t i, j;
25197
25198 static const struct {
25199 const HOST_WIDE_INT no_flag; /* flag explicitly turned off. */
25200 const HOST_WIDE_INT dep_flags; /* flags that depend on this option. */
25201 const char *const name; /* name of the switch. */
25202 } flags[] = {
25203 { OPTION_MASK_P9_VECTOR, OTHER_P9_VECTOR_MASKS, "power9-vector" },
25204 { OPTION_MASK_P8_VECTOR, OTHER_P8_VECTOR_MASKS, "power8-vector" },
25205 { OPTION_MASK_VSX, OTHER_VSX_VECTOR_MASKS, "vsx" },
25206 { OPTION_MASK_ALTIVEC, OTHER_ALTIVEC_MASKS, "altivec" },
25207 };
25208
25209 for (i = 0; i < ARRAY_SIZE (flags); i++)
25210 {
25211 HOST_WIDE_INT no_flag = flags[i].no_flag;
25212
25213 if ((rs6000_isa_flags & no_flag) == 0
25214 && (rs6000_isa_flags_explicit & no_flag) != 0)
25215 {
25216 HOST_WIDE_INT dep_flags = flags[i].dep_flags;
25217 HOST_WIDE_INT set_flags = (rs6000_isa_flags_explicit
25218 & rs6000_isa_flags
25219 & dep_flags);
25220
25221 if (set_flags)
25222 {
25223 for (j = 0; j < ARRAY_SIZE (rs6000_opt_masks); j++)
25224 if ((set_flags & rs6000_opt_masks[j].mask) != 0)
25225 {
25226 set_flags &= ~rs6000_opt_masks[j].mask;
25227 error ("%<-mno-%s%> turns off %<-m%s%>",
25228 flags[i].name,
25229 rs6000_opt_masks[j].name);
25230 }
25231
25232 gcc_assert (!set_flags);
25233 }
25234
25235 rs6000_isa_flags &= ~dep_flags;
25236 ignore_masks |= no_flag | dep_flags;
25237 }
25238 }
25239
25240 return ignore_masks;
25241 }
25242
25243 \f
25244 /* Helper function for printing the function name when debugging. */
25245
25246 static const char *
25247 get_decl_name (tree fn)
25248 {
25249 tree name;
25250
25251 if (!fn)
25252 return "<null>";
25253
25254 name = DECL_NAME (fn);
25255 if (!name)
25256 return "<no-name>";
25257
25258 return IDENTIFIER_POINTER (name);
25259 }
25260
25261 /* Return the clone id of the target we are compiling code for in a target
25262 clone. The clone id is ordered from 0 (default) to CLONE_MAX-1 and gives
25263 the priority list for the target clones (ordered from lowest to
25264 highest). */
25265
25266 static int
25267 rs6000_clone_priority (tree fndecl)
25268 {
25269 tree fn_opts = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
25270 HOST_WIDE_INT isa_masks;
25271 int ret = CLONE_DEFAULT;
25272 tree attrs = lookup_attribute ("target", DECL_ATTRIBUTES (fndecl));
25273 const char *attrs_str = NULL;
25274
25275 attrs = TREE_VALUE (TREE_VALUE (attrs));
25276 attrs_str = TREE_STRING_POINTER (attrs);
25277
25278 /* Return priority zero for default function. Return the ISA needed for the
25279 function if it is not the default. */
25280 if (strcmp (attrs_str, "default") != 0)
25281 {
25282 if (fn_opts == NULL_TREE)
25283 fn_opts = target_option_default_node;
25284
25285 if (!fn_opts || !TREE_TARGET_OPTION (fn_opts))
25286 isa_masks = rs6000_isa_flags;
25287 else
25288 isa_masks = TREE_TARGET_OPTION (fn_opts)->x_rs6000_isa_flags;
25289
25290 for (ret = CLONE_MAX - 1; ret != 0; ret--)
25291 if ((rs6000_clone_map[ret].isa_mask & isa_masks) != 0)
25292 break;
25293 }
25294
25295 if (TARGET_DEBUG_TARGET)
25296 fprintf (stderr, "rs6000_get_function_version_priority (%s) => %d\n",
25297 get_decl_name (fndecl), ret);
25298
25299 return ret;
25300 }
25301
25302 /* This compares the priority of target features in function DECL1 and DECL2.
25303 It returns positive value if DECL1 is higher priority, negative value if
25304 DECL2 is higher priority and 0 if they are the same. Note, priorities are
25305 ordered from lowest (CLONE_DEFAULT) to highest (currently CLONE_ISA_3_0). */
25306
25307 static int
25308 rs6000_compare_version_priority (tree decl1, tree decl2)
25309 {
25310 int priority1 = rs6000_clone_priority (decl1);
25311 int priority2 = rs6000_clone_priority (decl2);
25312 int ret = priority1 - priority2;
25313
25314 if (TARGET_DEBUG_TARGET)
25315 fprintf (stderr, "rs6000_compare_version_priority (%s, %s) => %d\n",
25316 get_decl_name (decl1), get_decl_name (decl2), ret);
25317
25318 return ret;
25319 }
25320
25321 /* Make a dispatcher declaration for the multi-versioned function DECL.
25322 Calls to DECL function will be replaced with calls to the dispatcher
25323 by the front-end. Returns the decl of the dispatcher function. */
25324
25325 static tree
25326 rs6000_get_function_versions_dispatcher (void *decl)
25327 {
25328 tree fn = (tree) decl;
25329 struct cgraph_node *node = NULL;
25330 struct cgraph_node *default_node = NULL;
25331 struct cgraph_function_version_info *node_v = NULL;
25332 struct cgraph_function_version_info *first_v = NULL;
25333
25334 tree dispatch_decl = NULL;
25335
25336 struct cgraph_function_version_info *default_version_info = NULL;
25337 gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
25338
25339 if (TARGET_DEBUG_TARGET)
25340 fprintf (stderr, "rs6000_get_function_versions_dispatcher (%s)\n",
25341 get_decl_name (fn));
25342
25343 node = cgraph_node::get (fn);
25344 gcc_assert (node != NULL);
25345
25346 node_v = node->function_version ();
25347 gcc_assert (node_v != NULL);
25348
25349 if (node_v->dispatcher_resolver != NULL)
25350 return node_v->dispatcher_resolver;
25351
25352 /* Find the default version and make it the first node. */
25353 first_v = node_v;
25354 /* Go to the beginning of the chain. */
25355 while (first_v->prev != NULL)
25356 first_v = first_v->prev;
25357
25358 default_version_info = first_v;
25359 while (default_version_info != NULL)
25360 {
25361 const tree decl2 = default_version_info->this_node->decl;
25362 if (is_function_default_version (decl2))
25363 break;
25364 default_version_info = default_version_info->next;
25365 }
25366
25367 /* If there is no default node, just return NULL. */
25368 if (default_version_info == NULL)
25369 return NULL;
25370
25371 /* Make default info the first node. */
25372 if (first_v != default_version_info)
25373 {
25374 default_version_info->prev->next = default_version_info->next;
25375 if (default_version_info->next)
25376 default_version_info->next->prev = default_version_info->prev;
25377 first_v->prev = default_version_info;
25378 default_version_info->next = first_v;
25379 default_version_info->prev = NULL;
25380 }
25381
25382 default_node = default_version_info->this_node;
25383
25384 #ifndef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
25385 error_at (DECL_SOURCE_LOCATION (default_node->decl),
25386 "%<target_clones%> attribute needs GLIBC (2.23 and newer) that "
25387 "exports hardware capability bits");
25388 #else
25389
25390 if (targetm.has_ifunc_p ())
25391 {
25392 struct cgraph_function_version_info *it_v = NULL;
25393 struct cgraph_node *dispatcher_node = NULL;
25394 struct cgraph_function_version_info *dispatcher_version_info = NULL;
25395
25396 /* Right now, the dispatching is done via ifunc. */
25397 dispatch_decl = make_dispatcher_decl (default_node->decl);
25398 TREE_NOTHROW (dispatch_decl) = TREE_NOTHROW (fn);
25399
25400 dispatcher_node = cgraph_node::get_create (dispatch_decl);
25401 gcc_assert (dispatcher_node != NULL);
25402 dispatcher_node->dispatcher_function = 1;
25403 dispatcher_version_info
25404 = dispatcher_node->insert_new_function_version ();
25405 dispatcher_version_info->next = default_version_info;
25406 dispatcher_node->definition = 1;
25407
25408 /* Set the dispatcher for all the versions. */
25409 it_v = default_version_info;
25410 while (it_v != NULL)
25411 {
25412 it_v->dispatcher_resolver = dispatch_decl;
25413 it_v = it_v->next;
25414 }
25415 }
25416 else
25417 {
25418 error_at (DECL_SOURCE_LOCATION (default_node->decl),
25419 "multiversioning needs %<ifunc%> which is not supported "
25420 "on this target");
25421 }
25422 #endif
25423
25424 return dispatch_decl;
25425 }
25426
25427 /* Make the resolver function decl to dispatch the versions of a multi-
25428 versioned function, DEFAULT_DECL. Create an empty basic block in the
25429 resolver and store the pointer in EMPTY_BB. Return the decl of the resolver
25430 function. */
25431
25432 static tree
25433 make_resolver_func (const tree default_decl,
25434 const tree dispatch_decl,
25435 basic_block *empty_bb)
25436 {
25437 /* Make the resolver function static. The resolver function returns
25438 void *. */
25439 tree decl_name = clone_function_name (default_decl, "resolver");
25440 const char *resolver_name = IDENTIFIER_POINTER (decl_name);
25441 tree type = build_function_type_list (ptr_type_node, NULL_TREE);
25442 tree decl = build_fn_decl (resolver_name, type);
25443 SET_DECL_ASSEMBLER_NAME (decl, decl_name);
25444
25445 DECL_NAME (decl) = decl_name;
25446 TREE_USED (decl) = 1;
25447 DECL_ARTIFICIAL (decl) = 1;
25448 DECL_IGNORED_P (decl) = 0;
25449 TREE_PUBLIC (decl) = 0;
25450 DECL_UNINLINABLE (decl) = 1;
25451
25452 /* Resolver is not external, body is generated. */
25453 DECL_EXTERNAL (decl) = 0;
25454 DECL_EXTERNAL (dispatch_decl) = 0;
25455
25456 DECL_CONTEXT (decl) = NULL_TREE;
25457 DECL_INITIAL (decl) = make_node (BLOCK);
25458 DECL_STATIC_CONSTRUCTOR (decl) = 0;
25459
25460 if (DECL_COMDAT_GROUP (default_decl)
25461 || TREE_PUBLIC (default_decl))
25462 {
25463 /* In this case, each translation unit with a call to this
25464 versioned function will put out a resolver. Ensure it
25465 is comdat to keep just one copy. */
25466 DECL_COMDAT (decl) = 1;
25467 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
25468 }
25469 else
25470 TREE_PUBLIC (dispatch_decl) = 0;
25471
25472 /* Build result decl and add to function_decl. */
25473 tree t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
25474 DECL_CONTEXT (t) = decl;
25475 DECL_ARTIFICIAL (t) = 1;
25476 DECL_IGNORED_P (t) = 1;
25477 DECL_RESULT (decl) = t;
25478
25479 gimplify_function_tree (decl);
25480 push_cfun (DECL_STRUCT_FUNCTION (decl));
25481 *empty_bb = init_lowered_empty_function (decl, false,
25482 profile_count::uninitialized ());
25483
25484 cgraph_node::add_new_function (decl, true);
25485 symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
25486
25487 pop_cfun ();
25488
25489 /* Mark dispatch_decl as "ifunc" with resolver as resolver_name. */
25490 DECL_ATTRIBUTES (dispatch_decl)
25491 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
25492
25493 cgraph_node::create_same_body_alias (dispatch_decl, decl);
25494
25495 return decl;
25496 }
25497
25498 /* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL to
25499 return a pointer to VERSION_DECL if we are running on a machine that
25500 supports the index CLONE_ISA hardware architecture bits. This function will
25501 be called during version dispatch to decide which function version to
25502 execute. It returns the basic block at the end, to which more conditions
25503 can be added. */
25504
25505 static basic_block
25506 add_condition_to_bb (tree function_decl, tree version_decl,
25507 int clone_isa, basic_block new_bb)
25508 {
25509 push_cfun (DECL_STRUCT_FUNCTION (function_decl));
25510
25511 gcc_assert (new_bb != NULL);
25512 gimple_seq gseq = bb_seq (new_bb);
25513
25514
25515 tree convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
25516 build_fold_addr_expr (version_decl));
25517 tree result_var = create_tmp_var (ptr_type_node);
25518 gimple *convert_stmt = gimple_build_assign (result_var, convert_expr);
25519 gimple *return_stmt = gimple_build_return (result_var);
25520
25521 if (clone_isa == CLONE_DEFAULT)
25522 {
25523 gimple_seq_add_stmt (&gseq, convert_stmt);
25524 gimple_seq_add_stmt (&gseq, return_stmt);
25525 set_bb_seq (new_bb, gseq);
25526 gimple_set_bb (convert_stmt, new_bb);
25527 gimple_set_bb (return_stmt, new_bb);
25528 pop_cfun ();
25529 return new_bb;
25530 }
25531
25532 tree bool_zero = build_int_cst (bool_int_type_node, 0);
25533 tree cond_var = create_tmp_var (bool_int_type_node);
25534 tree predicate_decl = rs6000_builtin_decls[(int) RS6000_BIF_CPU_SUPPORTS];
25535 const char *arg_str = rs6000_clone_map[clone_isa].name;
25536 tree predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
25537 gimple *call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
25538 gimple_call_set_lhs (call_cond_stmt, cond_var);
25539
25540 gimple_set_block (call_cond_stmt, DECL_INITIAL (function_decl));
25541 gimple_set_bb (call_cond_stmt, new_bb);
25542 gimple_seq_add_stmt (&gseq, call_cond_stmt);
25543
25544 gimple *if_else_stmt = gimple_build_cond (NE_EXPR, cond_var, bool_zero,
25545 NULL_TREE, NULL_TREE);
25546 gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
25547 gimple_set_bb (if_else_stmt, new_bb);
25548 gimple_seq_add_stmt (&gseq, if_else_stmt);
25549
25550 gimple_seq_add_stmt (&gseq, convert_stmt);
25551 gimple_seq_add_stmt (&gseq, return_stmt);
25552 set_bb_seq (new_bb, gseq);
25553
25554 basic_block bb1 = new_bb;
25555 edge e12 = split_block (bb1, if_else_stmt);
25556 basic_block bb2 = e12->dest;
25557 e12->flags &= ~EDGE_FALLTHRU;
25558 e12->flags |= EDGE_TRUE_VALUE;
25559
25560 edge e23 = split_block (bb2, return_stmt);
25561 gimple_set_bb (convert_stmt, bb2);
25562 gimple_set_bb (return_stmt, bb2);
25563
25564 basic_block bb3 = e23->dest;
25565 make_edge (bb1, bb3, EDGE_FALSE_VALUE);
25566
25567 remove_edge (e23);
25568 make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
25569
25570 pop_cfun ();
25571 return bb3;
25572 }
25573
25574 /* This function generates the dispatch function for multi-versioned functions.
25575 DISPATCH_DECL is the function which will contain the dispatch logic.
25576 FNDECLS are the function choices for dispatch, and is a tree chain.
25577 EMPTY_BB is the basic block pointer in DISPATCH_DECL in which the dispatch
25578 code is generated. */
25579
25580 static int
25581 dispatch_function_versions (tree dispatch_decl,
25582 void *fndecls_p,
25583 basic_block *empty_bb)
25584 {
25585 int ix;
25586 tree ele;
25587 vec<tree> *fndecls;
25588 tree clones[CLONE_MAX];
25589
25590 if (TARGET_DEBUG_TARGET)
25591 fputs ("dispatch_function_versions, top\n", stderr);
25592
25593 gcc_assert (dispatch_decl != NULL
25594 && fndecls_p != NULL
25595 && empty_bb != NULL);
25596
25597 /* fndecls_p is actually a vector. */
25598 fndecls = static_cast<vec<tree> *> (fndecls_p);
25599
25600 /* At least one more version other than the default. */
25601 gcc_assert (fndecls->length () >= 2);
25602
25603 /* The first version in the vector is the default decl. */
25604 memset ((void *) clones, '\0', sizeof (clones));
25605 clones[CLONE_DEFAULT] = (*fndecls)[0];
25606
25607 /* On the PowerPC, we do not need to call __builtin_cpu_init, which is a NOP
25608 on the PowerPC (on the x86_64, it is not a NOP). The builtin function
25609 __builtin_cpu_support ensures that the TOC fields are setup by requiring a
25610 recent glibc. If we ever need to call __builtin_cpu_init, we would need
25611 to insert the code here to do the call. */
25612
25613 for (ix = 1; fndecls->iterate (ix, &ele); ++ix)
25614 {
25615 int priority = rs6000_clone_priority (ele);
25616 if (!clones[priority])
25617 clones[priority] = ele;
25618 }
25619
25620 for (ix = CLONE_MAX - 1; ix >= 0; ix--)
25621 if (clones[ix])
25622 {
25623 if (TARGET_DEBUG_TARGET)
25624 fprintf (stderr, "dispatch_function_versions, clone %d, %s\n",
25625 ix, get_decl_name (clones[ix]));
25626
25627 *empty_bb = add_condition_to_bb (dispatch_decl, clones[ix], ix,
25628 *empty_bb);
25629 }
25630
25631 return 0;
25632 }
25633
25634 /* Generate the dispatching code body to dispatch multi-versioned function
25635 DECL. The target hook is called to process the "target" attributes and
25636 provide the code to dispatch the right function at run-time. NODE points
25637 to the dispatcher decl whose body will be created. */
25638
25639 static tree
25640 rs6000_generate_version_dispatcher_body (void *node_p)
25641 {
25642 tree resolver;
25643 basic_block empty_bb;
25644 struct cgraph_node *node = (cgraph_node *) node_p;
25645 struct cgraph_function_version_info *ninfo = node->function_version ();
25646
25647 if (ninfo->dispatcher_resolver)
25648 return ninfo->dispatcher_resolver;
25649
25650 /* node is going to be an alias, so remove the finalized bit. */
25651 node->definition = false;
25652
25653 /* The first version in the chain corresponds to the default version. */
25654 ninfo->dispatcher_resolver = resolver
25655 = make_resolver_func (ninfo->next->this_node->decl, node->decl, &empty_bb);
25656
25657 if (TARGET_DEBUG_TARGET)
25658 fprintf (stderr, "rs6000_get_function_versions_dispatcher, %s\n",
25659 get_decl_name (resolver));
25660
25661 push_cfun (DECL_STRUCT_FUNCTION (resolver));
25662 auto_vec<tree, 2> fn_ver_vec;
25663
25664 for (struct cgraph_function_version_info *vinfo = ninfo->next;
25665 vinfo;
25666 vinfo = vinfo->next)
25667 {
25668 struct cgraph_node *version = vinfo->this_node;
25669 /* Check for virtual functions here again, as by this time it should
25670 have been determined if this function needs a vtable index or
25671 not. This happens for methods in derived classes that override
25672 virtual methods in base classes but are not explicitly marked as
25673 virtual. */
25674 if (DECL_VINDEX (version->decl))
25675 sorry ("Virtual function multiversioning not supported");
25676
25677 fn_ver_vec.safe_push (version->decl);
25678 }
25679
25680 dispatch_function_versions (resolver, &fn_ver_vec, &empty_bb);
25681 cgraph_edge::rebuild_edges ();
25682 pop_cfun ();
25683 return resolver;
25684 }
25685
25686 /* Hook to decide if we need to scan function gimple statements to
25687 collect target specific information for inlining, and update the
25688 corresponding RS6000_FN_TARGET_INFO_* bit in INFO if we are able
25689 to predict which ISA feature is used at this time. Return true
25690 if we need to scan, otherwise return false. */
25691
25692 static bool
25693 rs6000_need_ipa_fn_target_info (const_tree decl,
25694 unsigned int &info ATTRIBUTE_UNUSED)
25695 {
25696 tree target = DECL_FUNCTION_SPECIFIC_TARGET (decl);
25697 if (!target)
25698 target = target_option_default_node;
25699 struct cl_target_option *opts = TREE_TARGET_OPTION (target);
25700
25701 /* See PR102059, we only handle HTM for now, so will only do
25702 the consequent scannings when HTM feature enabled. */
25703 if (opts->x_rs6000_isa_flags & OPTION_MASK_HTM)
25704 return true;
25705
25706 return false;
25707 }
25708
25709 /* Hook to update target specific information INFO for inlining by
25710 checking the given STMT. Return false if we don't need to scan
25711 any more, otherwise return true. */
25712
25713 static bool
25714 rs6000_update_ipa_fn_target_info (unsigned int &info, const gimple *stmt)
25715 {
25716 #ifndef HAVE_AS_POWER10_HTM
25717 /* Assume inline asm can use any instruction features. */
25718 if (gimple_code (stmt) == GIMPLE_ASM)
25719 {
25720 const char *asm_str = gimple_asm_string (as_a<const gasm *> (stmt));
25721 /* Ignore empty inline asm string. */
25722 if (strlen (asm_str) > 0)
25723 /* Should set any bits we concerned, for now OPTION_MASK_HTM is
25724 the only bit we care about. */
25725 info |= RS6000_FN_TARGET_INFO_HTM;
25726 return false;
25727 }
25728 #endif
25729
25730 if (gimple_code (stmt) == GIMPLE_CALL)
25731 {
25732 tree fndecl = gimple_call_fndecl (stmt);
25733 if (fndecl && fndecl_built_in_p (fndecl, BUILT_IN_MD))
25734 {
25735 enum rs6000_gen_builtins fcode
25736 = (enum rs6000_gen_builtins) DECL_MD_FUNCTION_CODE (fndecl);
25737 /* HTM bifs definitely exploit HTM insns. */
25738 if (bif_is_htm (rs6000_builtin_info[fcode]))
25739 {
25740 info |= RS6000_FN_TARGET_INFO_HTM;
25741 return false;
25742 }
25743 }
25744 }
25745
25746 return true;
25747 }
25748
25749 /* Hook to determine if one function can safely inline another. */
25750
25751 static bool
25752 rs6000_can_inline_p (tree caller, tree callee)
25753 {
25754 bool ret = false;
25755 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
25756 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
25757
25758 /* If the caller/callee has option attributes, then use them.
25759 Otherwise, use the command line options. */
25760 if (!callee_tree)
25761 callee_tree = target_option_default_node;
25762 if (!caller_tree)
25763 caller_tree = target_option_default_node;
25764
25765 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
25766 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
25767
25768 HOST_WIDE_INT callee_isa = callee_opts->x_rs6000_isa_flags;
25769 HOST_WIDE_INT caller_isa = caller_opts->x_rs6000_isa_flags;
25770 HOST_WIDE_INT explicit_isa = callee_opts->x_rs6000_isa_flags_explicit;
25771
25772 cgraph_node *callee_node = cgraph_node::get (callee);
25773 if (ipa_fn_summaries && ipa_fn_summaries->get (callee_node) != NULL)
25774 {
25775 unsigned int info = ipa_fn_summaries->get (callee_node)->target_info;
25776 if ((info & RS6000_FN_TARGET_INFO_HTM) == 0)
25777 {
25778 callee_isa &= ~OPTION_MASK_HTM;
25779 explicit_isa &= ~OPTION_MASK_HTM;
25780 }
25781 }
25782
25783 /* Ignore -mpower8-fusion and -mpower10-fusion options for inlining
25784 purposes. */
25785 callee_isa &= ~(OPTION_MASK_P8_FUSION | OPTION_MASK_P10_FUSION);
25786 explicit_isa &= ~(OPTION_MASK_P8_FUSION | OPTION_MASK_P10_FUSION);
25787
25788 /* The callee's options must be a subset of the caller's options, i.e.
25789 a vsx function may inline an altivec function, but a no-vsx function
25790 must not inline a vsx function. However, for those options that the
25791 callee has explicitly enabled or disabled, then we must enforce that
25792 the callee's and caller's options match exactly; see PR70010. */
25793 if (((caller_isa & callee_isa) == callee_isa)
25794 && (caller_isa & explicit_isa) == (callee_isa & explicit_isa))
25795 ret = true;
25796
25797 if (TARGET_DEBUG_TARGET)
25798 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
25799 get_decl_name (caller), get_decl_name (callee),
25800 (ret ? "can" : "cannot"));
25801
25802 return ret;
25803 }
25804 \f
25805 /* Allocate a stack temp and fixup the address so it meets the particular
25806 memory requirements (either offetable or REG+REG addressing). */
25807
25808 rtx
25809 rs6000_allocate_stack_temp (machine_mode mode,
25810 bool offsettable_p,
25811 bool reg_reg_p)
25812 {
25813 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
25814 rtx addr = XEXP (stack, 0);
25815 int strict_p = reload_completed;
25816
25817 if (!legitimate_indirect_address_p (addr, strict_p))
25818 {
25819 if (offsettable_p
25820 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
25821 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
25822
25823 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
25824 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
25825 }
25826
25827 return stack;
25828 }
25829
25830 /* Given a memory reference, if it is not a reg or reg+reg addressing,
25831 convert to such a form to deal with memory reference instructions
25832 like STFIWX and LDBRX that only take reg+reg addressing. */
25833
25834 rtx
25835 rs6000_force_indexed_or_indirect_mem (rtx x)
25836 {
25837 machine_mode mode = GET_MODE (x);
25838
25839 gcc_assert (MEM_P (x));
25840 if (can_create_pseudo_p () && !indexed_or_indirect_operand (x, mode))
25841 {
25842 rtx addr = XEXP (x, 0);
25843 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
25844 {
25845 rtx reg = XEXP (addr, 0);
25846 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
25847 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
25848 gcc_assert (REG_P (reg));
25849 emit_insn (gen_add3_insn (reg, reg, size_rtx));
25850 addr = reg;
25851 }
25852 else if (GET_CODE (addr) == PRE_MODIFY)
25853 {
25854 rtx reg = XEXP (addr, 0);
25855 rtx expr = XEXP (addr, 1);
25856 gcc_assert (REG_P (reg));
25857 gcc_assert (GET_CODE (expr) == PLUS);
25858 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
25859 addr = reg;
25860 }
25861
25862 if (GET_CODE (addr) == PLUS)
25863 {
25864 rtx op0 = XEXP (addr, 0);
25865 rtx op1 = XEXP (addr, 1);
25866 op0 = force_reg (Pmode, op0);
25867 op1 = force_reg (Pmode, op1);
25868 x = replace_equiv_address (x, gen_rtx_PLUS (Pmode, op0, op1));
25869 }
25870 else
25871 x = replace_equiv_address (x, force_reg (Pmode, addr));
25872 }
25873
25874 return x;
25875 }
25876
25877 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
25878
25879 On the RS/6000, all integer constants are acceptable, most won't be valid
25880 for particular insns, though. Only easy FP constants are acceptable. */
25881
25882 static bool
25883 rs6000_legitimate_constant_p (machine_mode mode, rtx x)
25884 {
25885 if (TARGET_ELF && tls_referenced_p (x))
25886 return false;
25887
25888 if (CONST_DOUBLE_P (x))
25889 return easy_fp_constant (x, mode);
25890
25891 if (GET_CODE (x) == CONST_VECTOR)
25892 return easy_vector_constant (x, mode);
25893
25894 return true;
25895 }
25896
25897 #if TARGET_AIX_OS
25898 /* Implement TARGET_PRECOMPUTE_TLS_P.
25899
25900 On the AIX, TLS symbols are in the TOC, which is maintained in the
25901 constant pool. AIX TOC TLS symbols need to be pre-computed, but
25902 must be considered legitimate constants. */
25903
25904 static bool
25905 rs6000_aix_precompute_tls_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
25906 {
25907 return tls_referenced_p (x);
25908 }
25909 #endif
25910
25911 \f
25912 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
25913
25914 static bool
25915 chain_already_loaded (rtx_insn *last)
25916 {
25917 for (; last != NULL; last = PREV_INSN (last))
25918 {
25919 if (NONJUMP_INSN_P (last))
25920 {
25921 rtx patt = PATTERN (last);
25922
25923 if (GET_CODE (patt) == SET)
25924 {
25925 rtx lhs = XEXP (patt, 0);
25926
25927 if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM)
25928 return true;
25929 }
25930 }
25931 }
25932 return false;
25933 }
25934
25935 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
25936
25937 void
25938 rs6000_call_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
25939 {
25940 rtx func = func_desc;
25941 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
25942 rtx toc_load = NULL_RTX;
25943 rtx toc_restore = NULL_RTX;
25944 rtx func_addr;
25945 rtx abi_reg = NULL_RTX;
25946 rtx call[5];
25947 int n_call;
25948 rtx insn;
25949 bool is_pltseq_longcall;
25950
25951 if (global_tlsarg)
25952 tlsarg = global_tlsarg;
25953
25954 /* Handle longcall attributes. */
25955 is_pltseq_longcall = false;
25956 if ((INTVAL (cookie) & CALL_LONG) != 0
25957 && GET_CODE (func_desc) == SYMBOL_REF)
25958 {
25959 func = rs6000_longcall_ref (func_desc, tlsarg);
25960 if (TARGET_PLTSEQ)
25961 is_pltseq_longcall = true;
25962 }
25963
25964 /* Handle indirect calls. */
25965 if (!SYMBOL_REF_P (func)
25966 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func)))
25967 {
25968 if (!rs6000_pcrel_p ())
25969 {
25970 /* Save the TOC into its reserved slot before the call,
25971 and prepare to restore it after the call. */
25972 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
25973 rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode,
25974 gen_rtvec (1, stack_toc_offset),
25975 UNSPEC_TOCSLOT);
25976 toc_restore = gen_rtx_SET (toc_reg, stack_toc_unspec);
25977
25978 /* Can we optimize saving the TOC in the prologue or
25979 do we need to do it at every call? */
25980 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
25981 cfun->machine->save_toc_in_prologue = true;
25982 else
25983 {
25984 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
25985 rtx stack_toc_mem = gen_frame_mem (Pmode,
25986 gen_rtx_PLUS (Pmode, stack_ptr,
25987 stack_toc_offset));
25988 MEM_VOLATILE_P (stack_toc_mem) = 1;
25989 if (is_pltseq_longcall)
25990 {
25991 rtvec v = gen_rtvec (3, toc_reg, func_desc, tlsarg);
25992 rtx mark_toc_reg = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
25993 emit_insn (gen_rtx_SET (stack_toc_mem, mark_toc_reg));
25994 }
25995 else
25996 emit_move_insn (stack_toc_mem, toc_reg);
25997 }
25998 }
25999
26000 if (DEFAULT_ABI == ABI_ELFv2)
26001 {
26002 /* A function pointer in the ELFv2 ABI is just a plain address, but
26003 the ABI requires it to be loaded into r12 before the call. */
26004 func_addr = gen_rtx_REG (Pmode, 12);
26005 emit_move_insn (func_addr, func);
26006 abi_reg = func_addr;
26007 /* Indirect calls via CTR are strongly preferred over indirect
26008 calls via LR, so move the address there. Needed to mark
26009 this insn for linker plt sequence editing too. */
26010 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
26011 if (is_pltseq_longcall)
26012 {
26013 rtvec v = gen_rtvec (3, abi_reg, func_desc, tlsarg);
26014 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
26015 emit_insn (gen_rtx_SET (func_addr, mark_func));
26016 v = gen_rtvec (2, func_addr, func_desc);
26017 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
26018 }
26019 else
26020 emit_move_insn (func_addr, abi_reg);
26021 }
26022 else
26023 {
26024 /* A function pointer under AIX is a pointer to a data area whose
26025 first word contains the actual address of the function, whose
26026 second word contains a pointer to its TOC, and whose third word
26027 contains a value to place in the static chain register (r11).
26028 Note that if we load the static chain, our "trampoline" need
26029 not have any executable code. */
26030
26031 /* Load up address of the actual function. */
26032 func = force_reg (Pmode, func);
26033 func_addr = gen_reg_rtx (Pmode);
26034 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func));
26035
26036 /* Indirect calls via CTR are strongly preferred over indirect
26037 calls via LR, so move the address there. */
26038 rtx ctr_reg = gen_rtx_REG (Pmode, CTR_REGNO);
26039 emit_move_insn (ctr_reg, func_addr);
26040 func_addr = ctr_reg;
26041
26042 /* Prepare to load the TOC of the called function. Note that the
26043 TOC load must happen immediately before the actual call so
26044 that unwinding the TOC registers works correctly. See the
26045 comment in frob_update_context. */
26046 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
26047 rtx func_toc_mem = gen_rtx_MEM (Pmode,
26048 gen_rtx_PLUS (Pmode, func,
26049 func_toc_offset));
26050 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
26051
26052 /* If we have a static chain, load it up. But, if the call was
26053 originally direct, the 3rd word has not been written since no
26054 trampoline has been built, so we ought not to load it, lest we
26055 override a static chain value. */
26056 if (!(GET_CODE (func_desc) == SYMBOL_REF
26057 && SYMBOL_REF_FUNCTION_P (func_desc))
26058 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
26059 && !chain_already_loaded (get_current_sequence ()->next->last))
26060 {
26061 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
26062 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
26063 rtx func_sc_mem = gen_rtx_MEM (Pmode,
26064 gen_rtx_PLUS (Pmode, func,
26065 func_sc_offset));
26066 emit_move_insn (sc_reg, func_sc_mem);
26067 abi_reg = sc_reg;
26068 }
26069 }
26070 }
26071 else
26072 {
26073 /* No TOC register needed for calls from PC-relative callers. */
26074 if (!rs6000_pcrel_p ())
26075 /* Direct calls use the TOC: for local calls, the callee will
26076 assume the TOC register is set; for non-local calls, the
26077 PLT stub needs the TOC register. */
26078 abi_reg = toc_reg;
26079 func_addr = func;
26080 }
26081
26082 /* Create the call. */
26083 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
26084 if (value != NULL_RTX)
26085 call[0] = gen_rtx_SET (value, call[0]);
26086 call[1] = gen_rtx_USE (VOIDmode, cookie);
26087 n_call = 2;
26088
26089 if (toc_load)
26090 call[n_call++] = toc_load;
26091 if (toc_restore)
26092 call[n_call++] = toc_restore;
26093
26094 call[n_call++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
26095
26096 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
26097 insn = emit_call_insn (insn);
26098
26099 /* Mention all registers defined by the ABI to hold information
26100 as uses in CALL_INSN_FUNCTION_USAGE. */
26101 if (abi_reg)
26102 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
26103 }
26104
26105 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
26106
26107 void
26108 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
26109 {
26110 rtx call[2];
26111 rtx insn;
26112 rtx r12 = NULL_RTX;
26113 rtx func_addr = func_desc;
26114
26115 if (global_tlsarg)
26116 tlsarg = global_tlsarg;
26117
26118 /* Handle longcall attributes. */
26119 if (INTVAL (cookie) & CALL_LONG && SYMBOL_REF_P (func_desc))
26120 {
26121 /* PCREL can do a sibling call to a longcall function
26122 because we don't need to restore the TOC register. */
26123 gcc_assert (rs6000_pcrel_p ());
26124 func_desc = rs6000_longcall_ref (func_desc, tlsarg);
26125 }
26126 else
26127 gcc_assert (INTVAL (cookie) == 0);
26128
26129 /* For ELFv2, r12 and CTR need to hold the function address
26130 for an indirect call. */
26131 if (GET_CODE (func_desc) != SYMBOL_REF && DEFAULT_ABI == ABI_ELFv2)
26132 {
26133 r12 = gen_rtx_REG (Pmode, 12);
26134 emit_move_insn (r12, func_desc);
26135 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
26136 emit_move_insn (func_addr, r12);
26137 }
26138
26139 /* Create the call. */
26140 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
26141 if (value != NULL_RTX)
26142 call[0] = gen_rtx_SET (value, call[0]);
26143
26144 call[1] = simple_return_rtx;
26145
26146 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
26147 insn = emit_call_insn (insn);
26148
26149 /* Note use of the TOC register. */
26150 if (!rs6000_pcrel_p ())
26151 use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
26152 gen_rtx_REG (Pmode, TOC_REGNUM));
26153
26154 /* Note use of r12. */
26155 if (r12)
26156 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r12);
26157 }
26158
26159 /* Expand code to perform a call under the SYSV4 ABI. */
26160
26161 void
26162 rs6000_call_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
26163 {
26164 rtx func = func_desc;
26165 rtx func_addr;
26166 rtx call[4];
26167 rtx insn;
26168 rtx abi_reg = NULL_RTX;
26169 int n;
26170
26171 if (global_tlsarg)
26172 tlsarg = global_tlsarg;
26173
26174 /* Handle longcall attributes. */
26175 if ((INTVAL (cookie) & CALL_LONG) != 0
26176 && GET_CODE (func_desc) == SYMBOL_REF)
26177 {
26178 func = rs6000_longcall_ref (func_desc, tlsarg);
26179 /* If the longcall was implemented as an inline PLT call using
26180 PLT unspecs then func will be REG:r11. If not, func will be
26181 a pseudo reg. The inline PLT call sequence supports lazy
26182 linking (and longcalls to functions in dlopen'd libraries).
26183 The other style of longcalls don't. The lazy linking entry
26184 to the dynamic symbol resolver requires r11 be the function
26185 address (as it is for linker generated PLT stubs). Ensure
26186 r11 stays valid to the bctrl by marking r11 used by the call. */
26187 if (TARGET_PLTSEQ)
26188 abi_reg = func;
26189 }
26190
26191 /* Handle indirect calls. */
26192 if (GET_CODE (func) != SYMBOL_REF)
26193 {
26194 func = force_reg (Pmode, func);
26195
26196 /* Indirect calls via CTR are strongly preferred over indirect
26197 calls via LR, so move the address there. That can't be left
26198 to reload because we want to mark every instruction in an
26199 inline PLT call sequence with a reloc, enabling the linker to
26200 edit the sequence back to a direct call when that makes sense. */
26201 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
26202 if (abi_reg)
26203 {
26204 rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
26205 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
26206 emit_insn (gen_rtx_SET (func_addr, mark_func));
26207 v = gen_rtvec (2, func_addr, func_desc);
26208 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
26209 }
26210 else
26211 emit_move_insn (func_addr, func);
26212 }
26213 else
26214 func_addr = func;
26215
26216 /* Create the call. */
26217 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
26218 if (value != NULL_RTX)
26219 call[0] = gen_rtx_SET (value, call[0]);
26220
26221 call[1] = gen_rtx_USE (VOIDmode, cookie);
26222 n = 2;
26223 if (TARGET_SECURE_PLT
26224 && flag_pic
26225 && GET_CODE (func_addr) == SYMBOL_REF
26226 && !SYMBOL_REF_LOCAL_P (func_addr))
26227 call[n++] = gen_rtx_USE (VOIDmode, pic_offset_table_rtx);
26228
26229 call[n++] = gen_hard_reg_clobber (Pmode, LR_REGNO);
26230
26231 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n, call));
26232 insn = emit_call_insn (insn);
26233 if (abi_reg)
26234 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
26235 }
26236
26237 /* Expand code to perform a sibling call under the SysV4 ABI. */
26238
26239 void
26240 rs6000_sibcall_sysv (rtx value, rtx func_desc, rtx tlsarg, rtx cookie)
26241 {
26242 rtx func = func_desc;
26243 rtx func_addr;
26244 rtx call[3];
26245 rtx insn;
26246 rtx abi_reg = NULL_RTX;
26247
26248 if (global_tlsarg)
26249 tlsarg = global_tlsarg;
26250
26251 /* Handle longcall attributes. */
26252 if ((INTVAL (cookie) & CALL_LONG) != 0
26253 && GET_CODE (func_desc) == SYMBOL_REF)
26254 {
26255 func = rs6000_longcall_ref (func_desc, tlsarg);
26256 /* If the longcall was implemented as an inline PLT call using
26257 PLT unspecs then func will be REG:r11. If not, func will be
26258 a pseudo reg. The inline PLT call sequence supports lazy
26259 linking (and longcalls to functions in dlopen'd libraries).
26260 The other style of longcalls don't. The lazy linking entry
26261 to the dynamic symbol resolver requires r11 be the function
26262 address (as it is for linker generated PLT stubs). Ensure
26263 r11 stays valid to the bctr by marking r11 used by the call. */
26264 if (TARGET_PLTSEQ)
26265 abi_reg = func;
26266 }
26267
26268 /* Handle indirect calls. */
26269 if (GET_CODE (func) != SYMBOL_REF)
26270 {
26271 func = force_reg (Pmode, func);
26272
26273 /* Indirect sibcalls must go via CTR. That can't be left to
26274 reload because we want to mark every instruction in an inline
26275 PLT call sequence with a reloc, enabling the linker to edit
26276 the sequence back to a direct call when that makes sense. */
26277 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
26278 if (abi_reg)
26279 {
26280 rtvec v = gen_rtvec (3, func, func_desc, tlsarg);
26281 rtx mark_func = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
26282 emit_insn (gen_rtx_SET (func_addr, mark_func));
26283 v = gen_rtvec (2, func_addr, func_desc);
26284 func_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_PLTSEQ);
26285 }
26286 else
26287 emit_move_insn (func_addr, func);
26288 }
26289 else
26290 func_addr = func;
26291
26292 /* Create the call. */
26293 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
26294 if (value != NULL_RTX)
26295 call[0] = gen_rtx_SET (value, call[0]);
26296
26297 call[1] = gen_rtx_USE (VOIDmode, cookie);
26298 call[2] = simple_return_rtx;
26299
26300 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
26301 insn = emit_call_insn (insn);
26302 if (abi_reg)
26303 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
26304 }
26305
26306 #if TARGET_MACHO
26307
26308 /* Expand code to perform a call under the Darwin ABI.
26309 Modulo handling of mlongcall, this is much the same as sysv.
26310 if/when the longcall optimisation is removed, we could drop this
26311 code and use the sysv case (taking care to avoid the tls stuff).
26312
26313 We can use this for sibcalls too, if needed. */
26314
26315 void
26316 rs6000_call_darwin_1 (rtx value, rtx func_desc, rtx tlsarg,
26317 rtx cookie, bool sibcall)
26318 {
26319 rtx func = func_desc;
26320 rtx func_addr;
26321 rtx call[3];
26322 rtx insn;
26323 int cookie_val = INTVAL (cookie);
26324 bool make_island = false;
26325
26326 /* Handle longcall attributes, there are two cases for Darwin:
26327 1) Newer linkers are capable of synthesising any branch islands needed.
26328 2) We need a helper branch island synthesised by the compiler.
26329 The second case has mostly been retired and we don't use it for m64.
26330 In fact, it's is an optimisation, we could just indirect as sysv does..
26331 ... however, backwards compatibility for now.
26332 If we're going to use this, then we need to keep the CALL_LONG bit set,
26333 so that we can pick up the special insn form later. */
26334 if ((cookie_val & CALL_LONG) != 0
26335 && GET_CODE (func_desc) == SYMBOL_REF)
26336 {
26337 /* FIXME: the longcall opt should not hang off this flag, it is most
26338 likely incorrect for kernel-mode code-generation. */
26339 if (darwin_symbol_stubs && TARGET_32BIT)
26340 make_island = true; /* Do nothing yet, retain the CALL_LONG flag. */
26341 else
26342 {
26343 /* The linker is capable of doing this, but the user explicitly
26344 asked for -mlongcall, so we'll do the 'normal' version. */
26345 func = rs6000_longcall_ref (func_desc, NULL_RTX);
26346 cookie_val &= ~CALL_LONG; /* Handled, zap it. */
26347 }
26348 }
26349
26350 /* Handle indirect calls. */
26351 if (GET_CODE (func) != SYMBOL_REF)
26352 {
26353 func = force_reg (Pmode, func);
26354
26355 /* Indirect calls via CTR are strongly preferred over indirect
26356 calls via LR, and are required for indirect sibcalls, so move
26357 the address there. */
26358 func_addr = gen_rtx_REG (Pmode, CTR_REGNO);
26359 emit_move_insn (func_addr, func);
26360 }
26361 else
26362 func_addr = func;
26363
26364 /* Create the call. */
26365 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), tlsarg);
26366 if (value != NULL_RTX)
26367 call[0] = gen_rtx_SET (value, call[0]);
26368
26369 call[1] = gen_rtx_USE (VOIDmode, GEN_INT (cookie_val));
26370
26371 if (sibcall)
26372 call[2] = simple_return_rtx;
26373 else
26374 call[2] = gen_hard_reg_clobber (Pmode, LR_REGNO);
26375
26376 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (3, call));
26377 insn = emit_call_insn (insn);
26378 /* Now we have the debug info in the insn, we can set up the branch island
26379 if we're using one. */
26380 if (make_island)
26381 {
26382 tree funname = get_identifier (XSTR (func_desc, 0));
26383
26384 if (no_previous_def (funname))
26385 {
26386 rtx label_rtx = gen_label_rtx ();
26387 char *label_buf, temp_buf[256];
26388 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
26389 CODE_LABEL_NUMBER (label_rtx));
26390 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
26391 tree labelname = get_identifier (label_buf);
26392 add_compiler_branch_island (labelname, funname,
26393 insn_line ((const rtx_insn*)insn));
26394 }
26395 }
26396 }
26397 #endif
26398
26399 void
26400 rs6000_call_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
26401 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
26402 {
26403 #if TARGET_MACHO
26404 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, false);
26405 #else
26406 gcc_unreachable();
26407 #endif
26408 }
26409
26410
26411 void
26412 rs6000_sibcall_darwin (rtx value ATTRIBUTE_UNUSED, rtx func_desc ATTRIBUTE_UNUSED,
26413 rtx tlsarg ATTRIBUTE_UNUSED, rtx cookie ATTRIBUTE_UNUSED)
26414 {
26415 #if TARGET_MACHO
26416 rs6000_call_darwin_1 (value, func_desc, tlsarg, cookie, true);
26417 #else
26418 gcc_unreachable();
26419 #endif
26420 }
26421
26422 /* Return whether we should generate PC-relative code for FNDECL. */
26423 bool
26424 rs6000_fndecl_pcrel_p (const_tree fndecl)
26425 {
26426 if (DEFAULT_ABI != ABI_ELFv2)
26427 return false;
26428
26429 struct cl_target_option *opts = target_opts_for_fn (fndecl);
26430
26431 return ((opts->x_rs6000_isa_flags & OPTION_MASK_PCREL) != 0
26432 && TARGET_CMODEL == CMODEL_MEDIUM);
26433 }
26434
26435 /* Return whether we should generate PC-relative code for *FN. */
26436 bool
26437 rs6000_function_pcrel_p (struct function *fn)
26438 {
26439 if (DEFAULT_ABI != ABI_ELFv2)
26440 return false;
26441
26442 /* Optimize usual case. */
26443 if (fn == cfun)
26444 return ((rs6000_isa_flags & OPTION_MASK_PCREL) != 0
26445 && TARGET_CMODEL == CMODEL_MEDIUM);
26446
26447 return rs6000_fndecl_pcrel_p (fn->decl);
26448 }
26449
26450 /* Return whether we should generate PC-relative code for the current
26451 function. */
26452 bool
26453 rs6000_pcrel_p ()
26454 {
26455 return (DEFAULT_ABI == ABI_ELFv2
26456 && (rs6000_isa_flags & OPTION_MASK_PCREL) != 0
26457 && TARGET_CMODEL == CMODEL_MEDIUM);
26458 }
26459
26460 \f
26461 /* Given an address (ADDR), a mode (MODE), and what the format of the
26462 non-prefixed address (NON_PREFIXED_FORMAT) is, return the instruction format
26463 for the address. */
26464
26465 enum insn_form
26466 address_to_insn_form (rtx addr,
26467 machine_mode mode,
26468 enum non_prefixed_form non_prefixed_format)
26469 {
26470 /* Single register is easy. */
26471 if (REG_P (addr) || SUBREG_P (addr))
26472 return INSN_FORM_BASE_REG;
26473
26474 /* If the non prefixed instruction format doesn't support offset addressing,
26475 make sure only indexed addressing is allowed.
26476
26477 We special case SDmode so that the register allocator does not try to move
26478 SDmode through GPR registers, but instead uses the 32-bit integer load and
26479 store instructions for the floating point registers. */
26480 if (non_prefixed_format == NON_PREFIXED_X || (mode == SDmode && TARGET_DFP))
26481 {
26482 if (GET_CODE (addr) != PLUS)
26483 return INSN_FORM_BAD;
26484
26485 rtx op0 = XEXP (addr, 0);
26486 rtx op1 = XEXP (addr, 1);
26487 if (!REG_P (op0) && !SUBREG_P (op0))
26488 return INSN_FORM_BAD;
26489
26490 if (!REG_P (op1) && !SUBREG_P (op1))
26491 return INSN_FORM_BAD;
26492
26493 return INSN_FORM_X;
26494 }
26495
26496 /* Deal with update forms. */
26497 if (GET_RTX_CLASS (GET_CODE (addr)) == RTX_AUTOINC)
26498 return INSN_FORM_UPDATE;
26499
26500 /* Handle PC-relative symbols and labels. Check for both local and
26501 external symbols. Assume labels are always local. TLS symbols
26502 are not PC-relative for rs6000. */
26503 if (TARGET_PCREL)
26504 {
26505 if (LABEL_REF_P (addr))
26506 return INSN_FORM_PCREL_LOCAL;
26507
26508 if (SYMBOL_REF_P (addr) && !SYMBOL_REF_TLS_MODEL (addr))
26509 {
26510 if (!SYMBOL_REF_LOCAL_P (addr))
26511 return INSN_FORM_PCREL_EXTERNAL;
26512 else
26513 return INSN_FORM_PCREL_LOCAL;
26514 }
26515 }
26516
26517 if (GET_CODE (addr) == CONST)
26518 addr = XEXP (addr, 0);
26519
26520 /* Recognize LO_SUM addresses used with TOC and 32-bit addressing. */
26521 if (GET_CODE (addr) == LO_SUM)
26522 return INSN_FORM_LO_SUM;
26523
26524 /* Everything below must be an offset address of some form. */
26525 if (GET_CODE (addr) != PLUS)
26526 return INSN_FORM_BAD;
26527
26528 rtx op0 = XEXP (addr, 0);
26529 rtx op1 = XEXP (addr, 1);
26530
26531 /* Check for indexed addresses. */
26532 if (REG_P (op1) || SUBREG_P (op1))
26533 {
26534 if (REG_P (op0) || SUBREG_P (op0))
26535 return INSN_FORM_X;
26536
26537 return INSN_FORM_BAD;
26538 }
26539
26540 if (!CONST_INT_P (op1))
26541 return INSN_FORM_BAD;
26542
26543 HOST_WIDE_INT offset = INTVAL (op1);
26544 if (!SIGNED_INTEGER_34BIT_P (offset))
26545 return INSN_FORM_BAD;
26546
26547 /* Check for local and external PC-relative addresses. Labels are always
26548 local. TLS symbols are not PC-relative for rs6000. */
26549 if (TARGET_PCREL)
26550 {
26551 if (LABEL_REF_P (op0))
26552 return INSN_FORM_PCREL_LOCAL;
26553
26554 if (SYMBOL_REF_P (op0) && !SYMBOL_REF_TLS_MODEL (op0))
26555 {
26556 if (!SYMBOL_REF_LOCAL_P (op0))
26557 return INSN_FORM_PCREL_EXTERNAL;
26558 else
26559 return INSN_FORM_PCREL_LOCAL;
26560 }
26561 }
26562
26563 /* If it isn't PC-relative, the address must use a base register. */
26564 if (!REG_P (op0) && !SUBREG_P (op0))
26565 return INSN_FORM_BAD;
26566
26567 /* Large offsets must be prefixed. */
26568 if (!SIGNED_INTEGER_16BIT_P (offset))
26569 {
26570 if (TARGET_PREFIXED)
26571 return INSN_FORM_PREFIXED_NUMERIC;
26572
26573 return INSN_FORM_BAD;
26574 }
26575
26576 /* We have a 16-bit offset, see what default instruction format to use. */
26577 if (non_prefixed_format == NON_PREFIXED_DEFAULT)
26578 {
26579 unsigned size = GET_MODE_SIZE (mode);
26580
26581 /* On 64-bit systems, assume 64-bit integers need to use DS form
26582 addresses (for LD/STD). VSX vectors need to use DQ form addresses
26583 (for LXV and STXV). TImode is problematical in that its normal usage
26584 is expected to be GPRs where it wants a DS instruction format, but if
26585 it goes into the vector registers, it wants a DQ instruction
26586 format. */
26587 if (TARGET_POWERPC64 && size >= 8 && GET_MODE_CLASS (mode) == MODE_INT)
26588 non_prefixed_format = NON_PREFIXED_DS;
26589
26590 else if (TARGET_VSX && size >= 16
26591 && (VECTOR_MODE_P (mode) || VECTOR_ALIGNMENT_P (mode)))
26592 non_prefixed_format = NON_PREFIXED_DQ;
26593
26594 else
26595 non_prefixed_format = NON_PREFIXED_D;
26596 }
26597
26598 /* Classify the D/DS/DQ-form addresses. */
26599 switch (non_prefixed_format)
26600 {
26601 /* Instruction format D, all 16 bits are valid. */
26602 case NON_PREFIXED_D:
26603 return INSN_FORM_D;
26604
26605 /* Instruction format DS, bottom 2 bits must be 0. */
26606 case NON_PREFIXED_DS:
26607 if ((offset & 3) == 0)
26608 return INSN_FORM_DS;
26609
26610 else if (TARGET_PREFIXED)
26611 return INSN_FORM_PREFIXED_NUMERIC;
26612
26613 else
26614 return INSN_FORM_BAD;
26615
26616 /* Instruction format DQ, bottom 4 bits must be 0. */
26617 case NON_PREFIXED_DQ:
26618 if ((offset & 15) == 0)
26619 return INSN_FORM_DQ;
26620
26621 else if (TARGET_PREFIXED)
26622 return INSN_FORM_PREFIXED_NUMERIC;
26623
26624 else
26625 return INSN_FORM_BAD;
26626
26627 default:
26628 break;
26629 }
26630
26631 return INSN_FORM_BAD;
26632 }
26633
26634 /* Given address rtx ADDR for a load of MODE, is this legitimate for a
26635 non-prefixed D-form or X-form instruction? NON_PREFIXED_FORMAT is
26636 given NON_PREFIXED_D or NON_PREFIXED_DS to indicate whether we want
26637 a D-form or DS-form instruction. X-form and base_reg are always
26638 allowed. */
26639 bool
26640 address_is_non_pfx_d_or_x (rtx addr, machine_mode mode,
26641 enum non_prefixed_form non_prefixed_format)
26642 {
26643 enum insn_form result_form;
26644
26645 result_form = address_to_insn_form (addr, mode, non_prefixed_format);
26646
26647 switch (non_prefixed_format)
26648 {
26649 case NON_PREFIXED_D:
26650 switch (result_form)
26651 {
26652 case INSN_FORM_X:
26653 case INSN_FORM_D:
26654 case INSN_FORM_DS:
26655 case INSN_FORM_BASE_REG:
26656 return true;
26657 default:
26658 return false;
26659 }
26660 break;
26661 case NON_PREFIXED_DS:
26662 switch (result_form)
26663 {
26664 case INSN_FORM_X:
26665 case INSN_FORM_DS:
26666 case INSN_FORM_BASE_REG:
26667 return true;
26668 default:
26669 return false;
26670 }
26671 break;
26672 default:
26673 break;
26674 }
26675 return false;
26676 }
26677
26678 /* Return true if an REG with a given MODE is loaded from or stored into a MEM
26679 location uses a non-prefixed D/DS/DQ-form address. This is used to validate
26680 the load or store with the PCREL_OPT optimization to make sure it is an
26681 instruction that can be optimized.
26682
26683 We need to specify the MODE separately from the REG to allow for loads that
26684 include zero/sign/float extension. */
26685
26686 bool
26687 pcrel_opt_valid_mem_p (rtx reg, machine_mode mode, rtx mem)
26688 {
26689 /* If the instruction is indexed only like LFIWAX/LXSIWAX we cannot do the
26690 PCREL_OPT optimization. */
26691 enum non_prefixed_form non_prefixed = reg_to_non_prefixed (reg, mode);
26692 if (non_prefixed == NON_PREFIXED_X)
26693 return false;
26694
26695 /* Check if this is a non-prefixed D/DS/DQ-form instruction. */
26696 rtx addr = XEXP (mem, 0);
26697 enum insn_form iform = address_to_insn_form (addr, mode, non_prefixed);
26698 return (iform == INSN_FORM_BASE_REG
26699 || iform == INSN_FORM_D
26700 || iform == INSN_FORM_DS
26701 || iform == INSN_FORM_DQ);
26702 }
26703
26704 /* Helper function to see if we're potentially looking at lfs/stfs.
26705 - PARALLEL containing a SET and a CLOBBER
26706 - stfs:
26707 - SET is from UNSPEC_SI_FROM_SF to MEM:SI
26708 - CLOBBER is a V4SF
26709 - lfs:
26710 - SET is from UNSPEC_SF_FROM_SI to REG:SF
26711 - CLOBBER is a DI
26712 */
26713
26714 static bool
26715 is_lfs_stfs_insn (rtx_insn *insn)
26716 {
26717 rtx pattern = PATTERN (insn);
26718 if (GET_CODE (pattern) != PARALLEL)
26719 return false;
26720
26721 /* This should be a parallel with exactly one set and one clobber. */
26722 if (XVECLEN (pattern, 0) != 2)
26723 return false;
26724
26725 rtx set = XVECEXP (pattern, 0, 0);
26726 if (GET_CODE (set) != SET)
26727 return false;
26728
26729 rtx clobber = XVECEXP (pattern, 0, 1);
26730 if (GET_CODE (clobber) != CLOBBER)
26731 return false;
26732
26733 /* All we care is that the destination of the SET is a mem:SI,
26734 the source should be an UNSPEC_SI_FROM_SF, and the clobber
26735 should be a scratch:V4SF. */
26736
26737 rtx dest = SET_DEST (set);
26738 rtx src = SET_SRC (set);
26739 rtx scratch = SET_DEST (clobber);
26740
26741 if (GET_CODE (src) != UNSPEC)
26742 return false;
26743
26744 /* stfs case. */
26745 if (XINT (src, 1) == UNSPEC_SI_FROM_SF
26746 && GET_CODE (dest) == MEM && GET_MODE (dest) == SImode
26747 && GET_CODE (scratch) == SCRATCH && GET_MODE (scratch) == V4SFmode)
26748 return true;
26749
26750 /* lfs case. */
26751 if (XINT (src, 1) == UNSPEC_SF_FROM_SI
26752 && GET_CODE (dest) == REG && GET_MODE (dest) == SFmode
26753 && GET_CODE (scratch) == SCRATCH && GET_MODE (scratch) == DImode)
26754 return true;
26755
26756 return false;
26757 }
26758
26759 /* Helper function to take a REG and a MODE and turn it into the non-prefixed
26760 instruction format (D/DS/DQ) used for offset memory. */
26761
26762 enum non_prefixed_form
26763 reg_to_non_prefixed (rtx reg, machine_mode mode)
26764 {
26765 /* If it isn't a register, use the defaults. */
26766 if (!REG_P (reg) && !SUBREG_P (reg))
26767 return NON_PREFIXED_DEFAULT;
26768
26769 unsigned int r = reg_or_subregno (reg);
26770
26771 /* If we have a pseudo, use the default instruction format. */
26772 if (!HARD_REGISTER_NUM_P (r))
26773 return NON_PREFIXED_DEFAULT;
26774
26775 unsigned size = GET_MODE_SIZE (mode);
26776
26777 /* FPR registers use D-mode for scalars, and DQ-mode for vectors, IEEE
26778 128-bit floating point, and 128-bit integers. Before power9, only indexed
26779 addressing was available for vectors. */
26780 if (FP_REGNO_P (r))
26781 {
26782 if (mode == SFmode || size == 8 || FLOAT128_2REG_P (mode))
26783 return NON_PREFIXED_D;
26784
26785 else if (size < 8)
26786 return NON_PREFIXED_X;
26787
26788 else if (TARGET_VSX && size >= 16
26789 && (VECTOR_MODE_P (mode)
26790 || VECTOR_ALIGNMENT_P (mode)
26791 || mode == TImode || mode == CTImode))
26792 return (TARGET_P9_VECTOR) ? NON_PREFIXED_DQ : NON_PREFIXED_X;
26793
26794 else
26795 return NON_PREFIXED_DEFAULT;
26796 }
26797
26798 /* Altivec registers use DS-mode for scalars, and DQ-mode for vectors, IEEE
26799 128-bit floating point, and 128-bit integers. Before power9, only indexed
26800 addressing was available. */
26801 else if (ALTIVEC_REGNO_P (r))
26802 {
26803 if (!TARGET_P9_VECTOR)
26804 return NON_PREFIXED_X;
26805
26806 if (mode == SFmode || size == 8 || FLOAT128_2REG_P (mode))
26807 return NON_PREFIXED_DS;
26808
26809 else if (size < 8)
26810 return NON_PREFIXED_X;
26811
26812 else if (TARGET_VSX && size >= 16
26813 && (VECTOR_MODE_P (mode)
26814 || VECTOR_ALIGNMENT_P (mode)
26815 || mode == TImode || mode == CTImode))
26816 return NON_PREFIXED_DQ;
26817
26818 else
26819 return NON_PREFIXED_DEFAULT;
26820 }
26821
26822 /* GPR registers use DS-mode for 64-bit items on 64-bit systems, and D-mode
26823 otherwise. Assume that any other register, such as LR, CRs, etc. will go
26824 through the GPR registers for memory operations. */
26825 else if (TARGET_POWERPC64 && size >= 8)
26826 return NON_PREFIXED_DS;
26827
26828 return NON_PREFIXED_D;
26829 }
26830
26831 \f
26832 /* Whether a load instruction is a prefixed instruction. This is called from
26833 the prefixed attribute processing. */
26834
26835 bool
26836 prefixed_load_p (rtx_insn *insn)
26837 {
26838 /* Validate the insn to make sure it is a normal load insn. */
26839 extract_insn_cached (insn);
26840 if (recog_data.n_operands < 2)
26841 return false;
26842
26843 rtx reg = recog_data.operand[0];
26844 rtx mem = recog_data.operand[1];
26845
26846 if (!REG_P (reg) && !SUBREG_P (reg))
26847 return false;
26848
26849 if (!MEM_P (mem))
26850 return false;
26851
26852 /* Prefixed load instructions do not support update or indexed forms. */
26853 if (get_attr_indexed (insn) == INDEXED_YES
26854 || get_attr_update (insn) == UPDATE_YES)
26855 return false;
26856
26857 /* LWA uses the DS format instead of the D format that LWZ uses. */
26858 enum non_prefixed_form non_prefixed;
26859 machine_mode reg_mode = GET_MODE (reg);
26860 machine_mode mem_mode = GET_MODE (mem);
26861
26862 if (mem_mode == SImode && reg_mode == DImode
26863 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
26864 non_prefixed = NON_PREFIXED_DS;
26865
26866 else
26867 non_prefixed = reg_to_non_prefixed (reg, mem_mode);
26868
26869 if (non_prefixed == NON_PREFIXED_X && is_lfs_stfs_insn (insn))
26870 return address_is_prefixed (XEXP (mem, 0), mem_mode, NON_PREFIXED_DEFAULT);
26871 else
26872 return address_is_prefixed (XEXP (mem, 0), mem_mode, non_prefixed);
26873 }
26874
26875 /* Whether a store instruction is a prefixed instruction. This is called from
26876 the prefixed attribute processing. */
26877
26878 bool
26879 prefixed_store_p (rtx_insn *insn)
26880 {
26881 /* Validate the insn to make sure it is a normal store insn. */
26882 extract_insn_cached (insn);
26883 if (recog_data.n_operands < 2)
26884 return false;
26885
26886 rtx mem = recog_data.operand[0];
26887 rtx reg = recog_data.operand[1];
26888
26889 if (!REG_P (reg) && !SUBREG_P (reg))
26890 return false;
26891
26892 if (!MEM_P (mem))
26893 return false;
26894
26895 /* Prefixed store instructions do not support update or indexed forms. */
26896 if (get_attr_indexed (insn) == INDEXED_YES
26897 || get_attr_update (insn) == UPDATE_YES)
26898 return false;
26899
26900 machine_mode mem_mode = GET_MODE (mem);
26901 rtx addr = XEXP (mem, 0);
26902 enum non_prefixed_form non_prefixed = reg_to_non_prefixed (reg, mem_mode);
26903
26904 /* Need to make sure we aren't looking at a stfs which doesn't look
26905 like the other things reg_to_non_prefixed/address_is_prefixed
26906 looks for. */
26907 if (non_prefixed == NON_PREFIXED_X && is_lfs_stfs_insn (insn))
26908 return address_is_prefixed (addr, mem_mode, NON_PREFIXED_DEFAULT);
26909 else
26910 return address_is_prefixed (addr, mem_mode, non_prefixed);
26911 }
26912
26913 /* Whether a load immediate or add instruction is a prefixed instruction. This
26914 is called from the prefixed attribute processing. */
26915
26916 bool
26917 prefixed_paddi_p (rtx_insn *insn)
26918 {
26919 rtx set = single_set (insn);
26920 if (!set)
26921 return false;
26922
26923 rtx dest = SET_DEST (set);
26924 rtx src = SET_SRC (set);
26925
26926 if (!REG_P (dest) && !SUBREG_P (dest))
26927 return false;
26928
26929 /* Is this a load immediate that can't be done with a simple ADDI or
26930 ADDIS? */
26931 if (CONST_INT_P (src))
26932 return (satisfies_constraint_eI (src)
26933 && !satisfies_constraint_I (src)
26934 && !satisfies_constraint_L (src));
26935
26936 /* Is this a PADDI instruction that can't be done with a simple ADDI or
26937 ADDIS? */
26938 if (GET_CODE (src) == PLUS)
26939 {
26940 rtx op1 = XEXP (src, 1);
26941
26942 return (CONST_INT_P (op1)
26943 && satisfies_constraint_eI (op1)
26944 && !satisfies_constraint_I (op1)
26945 && !satisfies_constraint_L (op1));
26946 }
26947
26948 /* If not, is it a load of a PC-relative address? */
26949 if (!TARGET_PCREL || GET_MODE (dest) != Pmode)
26950 return false;
26951
26952 if (!SYMBOL_REF_P (src) && !LABEL_REF_P (src) && GET_CODE (src) != CONST)
26953 return false;
26954
26955 enum insn_form iform = address_to_insn_form (src, Pmode,
26956 NON_PREFIXED_DEFAULT);
26957
26958 return (iform == INSN_FORM_PCREL_EXTERNAL || iform == INSN_FORM_PCREL_LOCAL);
26959 }
26960
26961 /* Whether the next instruction needs a 'p' prefix issued before the
26962 instruction is printed out. */
26963 static bool prepend_p_to_next_insn;
26964
26965 /* Define FINAL_PRESCAN_INSN if some processing needs to be done before
26966 outputting the assembler code. On the PowerPC, we remember if the current
26967 insn is a prefixed insn where we need to emit a 'p' before the insn.
26968
26969 In addition, if the insn is part of a PC-relative reference to an external
26970 label optimization, this is recorded also. */
26971 void
26972 rs6000_final_prescan_insn (rtx_insn *insn, rtx [], int)
26973 {
26974 prepend_p_to_next_insn = (get_attr_maybe_prefixed (insn)
26975 == MAYBE_PREFIXED_YES
26976 && get_attr_prefixed (insn) == PREFIXED_YES);
26977 return;
26978 }
26979
26980 /* Define ASM_OUTPUT_OPCODE to do anything special before emitting an opcode.
26981 We use it to emit a 'p' for prefixed insns that is set in
26982 FINAL_PRESCAN_INSN. */
26983 void
26984 rs6000_asm_output_opcode (FILE *stream)
26985 {
26986 if (prepend_p_to_next_insn)
26987 {
26988 fprintf (stream, "p");
26989
26990 /* Reset the flag in the case where there are separate insn lines in the
26991 sequence, so the 'p' is only emitted for the first line. This shows up
26992 when we are doing the PCREL_OPT optimization, in that the label created
26993 with %r<n> would have a leading 'p' printed. */
26994 prepend_p_to_next_insn = false;
26995 }
26996
26997 return;
26998 }
26999
27000 /* Emit the relocation to tie the next instruction to a previous instruction
27001 that loads up an external address. This is used to do the PCREL_OPT
27002 optimization. Note, the label is generated after the PLD of the got
27003 pc-relative address to allow for the assembler to insert NOPs before the PLD
27004 instruction. The operand is a constant integer that is the label
27005 number. */
27006
27007 void
27008 output_pcrel_opt_reloc (rtx label_num)
27009 {
27010 rtx operands[1] = { label_num };
27011 output_asm_insn (".reloc .Lpcrel%0-8,R_PPC64_PCREL_OPT,.-(.Lpcrel%0-8)",
27012 operands);
27013 }
27014
27015 /* Adjust the length of an INSN. LENGTH is the currently-computed length and
27016 should be adjusted to reflect any required changes. This macro is used when
27017 there is some systematic length adjustment required that would be difficult
27018 to express in the length attribute.
27019
27020 In the PowerPC, we use this to adjust the length of an instruction if one or
27021 more prefixed instructions are generated, using the attribute
27022 num_prefixed_insns. A prefixed instruction is 8 bytes instead of 4, but the
27023 hardware requires that a prefied instruciton does not cross a 64-byte
27024 boundary. This means the compiler has to assume the length of the first
27025 prefixed instruction is 12 bytes instead of 8 bytes. Since the length is
27026 already set for the non-prefixed instruction, we just need to udpate for the
27027 difference. */
27028
27029 int
27030 rs6000_adjust_insn_length (rtx_insn *insn, int length)
27031 {
27032 if (TARGET_PREFIXED && NONJUMP_INSN_P (insn))
27033 {
27034 rtx pattern = PATTERN (insn);
27035 if (GET_CODE (pattern) != USE && GET_CODE (pattern) != CLOBBER
27036 && get_attr_prefixed (insn) == PREFIXED_YES)
27037 {
27038 int num_prefixed = get_attr_max_prefixed_insns (insn);
27039 length += 4 * (num_prefixed + 1);
27040 }
27041 }
27042
27043 return length;
27044 }
27045
27046 \f
27047 #ifdef HAVE_GAS_HIDDEN
27048 # define USE_HIDDEN_LINKONCE 1
27049 #else
27050 # define USE_HIDDEN_LINKONCE 0
27051 #endif
27052
27053 /* Fills in the label name that should be used for a 476 link stack thunk. */
27054
27055 void
27056 get_ppc476_thunk_name (char name[32])
27057 {
27058 gcc_assert (TARGET_LINK_STACK);
27059
27060 if (USE_HIDDEN_LINKONCE)
27061 sprintf (name, "__ppc476.get_thunk");
27062 else
27063 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
27064 }
27065
27066 /* This function emits the simple thunk routine that is used to preserve
27067 the link stack on the 476 cpu. */
27068
27069 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
27070 static void
27071 rs6000_code_end (void)
27072 {
27073 char name[32];
27074 tree decl;
27075
27076 if (!TARGET_LINK_STACK)
27077 return;
27078
27079 get_ppc476_thunk_name (name);
27080
27081 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
27082 build_function_type_list (void_type_node, NULL_TREE));
27083 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
27084 NULL_TREE, void_type_node);
27085 TREE_PUBLIC (decl) = 1;
27086 TREE_STATIC (decl) = 1;
27087
27088 #if RS6000_WEAK
27089 if (USE_HIDDEN_LINKONCE && !TARGET_XCOFF)
27090 {
27091 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
27092 targetm.asm_out.unique_section (decl, 0);
27093 switch_to_section (get_named_section (decl, NULL, 0));
27094 DECL_WEAK (decl) = 1;
27095 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
27096 targetm.asm_out.globalize_label (asm_out_file, name);
27097 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
27098 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
27099 }
27100 else
27101 #endif
27102 {
27103 switch_to_section (text_section);
27104 ASM_OUTPUT_LABEL (asm_out_file, name);
27105 }
27106
27107 DECL_INITIAL (decl) = make_node (BLOCK);
27108 current_function_decl = decl;
27109 allocate_struct_function (decl, false);
27110 init_function_start (decl);
27111 first_function_block_is_cold = false;
27112 /* Make sure unwind info is emitted for the thunk if needed. */
27113 final_start_function (emit_barrier (), asm_out_file, 1);
27114
27115 fputs ("\tblr\n", asm_out_file);
27116
27117 final_end_function ();
27118 init_insn_lengths ();
27119 free_after_compilation (cfun);
27120 set_cfun (NULL);
27121 current_function_decl = NULL;
27122 }
27123
27124 /* Add r30 to hard reg set if the prologue sets it up and it is not
27125 pic_offset_table_rtx. */
27126
27127 static void
27128 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
27129 {
27130 if (!TARGET_SINGLE_PIC_BASE
27131 && TARGET_TOC
27132 && TARGET_MINIMAL_TOC
27133 && !constant_pool_empty_p ())
27134 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
27135 if (cfun->machine->split_stack_argp_used)
27136 add_to_hard_reg_set (&set->set, Pmode, 12);
27137
27138 /* Make sure the hard reg set doesn't include r2, which was possibly added
27139 via PIC_OFFSET_TABLE_REGNUM. */
27140 if (TARGET_TOC)
27141 remove_from_hard_reg_set (&set->set, Pmode, TOC_REGNUM);
27142 }
27143
27144 \f
27145 /* Helper function for rs6000_split_logical to emit a logical instruction after
27146 spliting the operation to single GPR registers.
27147
27148 DEST is the destination register.
27149 OP1 and OP2 are the input source registers.
27150 CODE is the base operation (AND, IOR, XOR, NOT).
27151 MODE is the machine mode.
27152 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
27153 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
27154 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
27155
27156 static void
27157 rs6000_split_logical_inner (rtx dest,
27158 rtx op1,
27159 rtx op2,
27160 enum rtx_code code,
27161 machine_mode mode,
27162 bool complement_final_p,
27163 bool complement_op1_p,
27164 bool complement_op2_p)
27165 {
27166 rtx bool_rtx;
27167
27168 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
27169 if (op2 && CONST_INT_P (op2)
27170 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
27171 && !complement_final_p && !complement_op1_p && !complement_op2_p)
27172 {
27173 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
27174 HOST_WIDE_INT value = INTVAL (op2) & mask;
27175
27176 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
27177 if (code == AND)
27178 {
27179 if (value == 0)
27180 {
27181 emit_insn (gen_rtx_SET (dest, const0_rtx));
27182 return;
27183 }
27184
27185 else if (value == mask)
27186 {
27187 if (!rtx_equal_p (dest, op1))
27188 emit_insn (gen_rtx_SET (dest, op1));
27189 return;
27190 }
27191 }
27192
27193 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
27194 into separate ORI/ORIS or XORI/XORIS instrucitons. */
27195 else if (code == IOR || code == XOR)
27196 {
27197 if (value == 0)
27198 {
27199 if (!rtx_equal_p (dest, op1))
27200 emit_insn (gen_rtx_SET (dest, op1));
27201 return;
27202 }
27203 }
27204 }
27205
27206 if (code == AND && mode == SImode
27207 && !complement_final_p && !complement_op1_p && !complement_op2_p)
27208 {
27209 emit_insn (gen_andsi3 (dest, op1, op2));
27210 return;
27211 }
27212
27213 if (complement_op1_p)
27214 op1 = gen_rtx_NOT (mode, op1);
27215
27216 if (complement_op2_p)
27217 op2 = gen_rtx_NOT (mode, op2);
27218
27219 /* For canonical RTL, if only one arm is inverted it is the first. */
27220 if (!complement_op1_p && complement_op2_p)
27221 std::swap (op1, op2);
27222
27223 bool_rtx = ((code == NOT)
27224 ? gen_rtx_NOT (mode, op1)
27225 : gen_rtx_fmt_ee (code, mode, op1, op2));
27226
27227 if (complement_final_p)
27228 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
27229
27230 emit_insn (gen_rtx_SET (dest, bool_rtx));
27231 }
27232
27233 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
27234 operations are split immediately during RTL generation to allow for more
27235 optimizations of the AND/IOR/XOR.
27236
27237 OPERANDS is an array containing the destination and two input operands.
27238 CODE is the base operation (AND, IOR, XOR, NOT).
27239 MODE is the machine mode.
27240 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
27241 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
27242 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
27243 CLOBBER_REG is either NULL or a scratch register of type CC to allow
27244 formation of the AND instructions. */
27245
27246 static void
27247 rs6000_split_logical_di (rtx operands[3],
27248 enum rtx_code code,
27249 bool complement_final_p,
27250 bool complement_op1_p,
27251 bool complement_op2_p)
27252 {
27253 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
27254 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
27255 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
27256 enum hi_lo { hi = 0, lo = 1 };
27257 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
27258 size_t i;
27259
27260 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
27261 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
27262 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
27263 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
27264
27265 if (code == NOT)
27266 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
27267 else
27268 {
27269 if (!CONST_INT_P (operands[2]))
27270 {
27271 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
27272 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
27273 }
27274 else
27275 {
27276 HOST_WIDE_INT value = INTVAL (operands[2]);
27277 HOST_WIDE_INT value_hi_lo[2];
27278
27279 gcc_assert (!complement_final_p);
27280 gcc_assert (!complement_op1_p);
27281 gcc_assert (!complement_op2_p);
27282
27283 value_hi_lo[hi] = value >> 32;
27284 value_hi_lo[lo] = value & lower_32bits;
27285
27286 for (i = 0; i < 2; i++)
27287 {
27288 HOST_WIDE_INT sub_value = value_hi_lo[i];
27289
27290 if (sub_value & sign_bit)
27291 sub_value |= upper_32bits;
27292
27293 op2_hi_lo[i] = GEN_INT (sub_value);
27294
27295 /* If this is an AND instruction, check to see if we need to load
27296 the value in a register. */
27297 if (code == AND && sub_value != -1 && sub_value != 0
27298 && !and_operand (op2_hi_lo[i], SImode))
27299 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
27300 }
27301 }
27302 }
27303
27304 for (i = 0; i < 2; i++)
27305 {
27306 /* Split large IOR/XOR operations. */
27307 if ((code == IOR || code == XOR)
27308 && CONST_INT_P (op2_hi_lo[i])
27309 && !complement_final_p
27310 && !complement_op1_p
27311 && !complement_op2_p
27312 && !logical_const_operand (op2_hi_lo[i], SImode))
27313 {
27314 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
27315 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
27316 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
27317 rtx tmp = gen_reg_rtx (SImode);
27318
27319 /* Make sure the constant is sign extended. */
27320 if ((hi_16bits & sign_bit) != 0)
27321 hi_16bits |= upper_32bits;
27322
27323 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
27324 code, SImode, false, false, false);
27325
27326 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
27327 code, SImode, false, false, false);
27328 }
27329 else
27330 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
27331 code, SImode, complement_final_p,
27332 complement_op1_p, complement_op2_p);
27333 }
27334
27335 return;
27336 }
27337
27338 /* Split the insns that make up boolean operations operating on multiple GPR
27339 registers. The boolean MD patterns ensure that the inputs either are
27340 exactly the same as the output registers, or there is no overlap.
27341
27342 OPERANDS is an array containing the destination and two input operands.
27343 CODE is the base operation (AND, IOR, XOR, NOT).
27344 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
27345 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
27346 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
27347
27348 void
27349 rs6000_split_logical (rtx operands[3],
27350 enum rtx_code code,
27351 bool complement_final_p,
27352 bool complement_op1_p,
27353 bool complement_op2_p)
27354 {
27355 machine_mode mode = GET_MODE (operands[0]);
27356 machine_mode sub_mode;
27357 rtx op0, op1, op2;
27358 int sub_size, regno0, regno1, nregs, i;
27359
27360 /* If this is DImode, use the specialized version that can run before
27361 register allocation. */
27362 if (mode == DImode && !TARGET_POWERPC64)
27363 {
27364 rs6000_split_logical_di (operands, code, complement_final_p,
27365 complement_op1_p, complement_op2_p);
27366 return;
27367 }
27368
27369 op0 = operands[0];
27370 op1 = operands[1];
27371 op2 = (code == NOT) ? NULL_RTX : operands[2];
27372 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
27373 sub_size = GET_MODE_SIZE (sub_mode);
27374 regno0 = REGNO (op0);
27375 regno1 = REGNO (op1);
27376
27377 gcc_assert (reload_completed);
27378 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
27379 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
27380
27381 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
27382 gcc_assert (nregs > 1);
27383
27384 if (op2 && REG_P (op2))
27385 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
27386
27387 for (i = 0; i < nregs; i++)
27388 {
27389 int offset = i * sub_size;
27390 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
27391 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
27392 rtx sub_op2 = ((code == NOT)
27393 ? NULL_RTX
27394 : simplify_subreg (sub_mode, op2, mode, offset));
27395
27396 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
27397 complement_final_p, complement_op1_p,
27398 complement_op2_p);
27399 }
27400
27401 return;
27402 }
27403
27404 /* Emit instructions to move SRC to DST. Called by splitters for
27405 multi-register moves. It will emit at most one instruction for
27406 each register that is accessed; that is, it won't emit li/lis pairs
27407 (or equivalent for 64-bit code). One of SRC or DST must be a hard
27408 register. */
27409
27410 void
27411 rs6000_split_multireg_move (rtx dst, rtx src)
27412 {
27413 /* The register number of the first register being moved. */
27414 int reg;
27415 /* The mode that is to be moved. */
27416 machine_mode mode;
27417 /* The mode that the move is being done in, and its size. */
27418 machine_mode reg_mode;
27419 int reg_mode_size;
27420 /* The number of registers that will be moved. */
27421 int nregs;
27422
27423 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
27424 mode = GET_MODE (dst);
27425 nregs = hard_regno_nregs (reg, mode);
27426
27427 /* If we have a vector quad register for MMA, and this is a load or store,
27428 see if we can use vector paired load/stores. */
27429 if (mode == XOmode && TARGET_MMA
27430 && (MEM_P (dst) || MEM_P (src)))
27431 {
27432 reg_mode = OOmode;
27433 nregs /= 2;
27434 }
27435 /* If we have a vector pair/quad mode, split it into two/four separate
27436 vectors. */
27437 else if (mode == OOmode || mode == XOmode)
27438 reg_mode = V1TImode;
27439 else if (FP_REGNO_P (reg))
27440 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
27441 (TARGET_HARD_FLOAT ? DFmode : SFmode);
27442 else if (ALTIVEC_REGNO_P (reg))
27443 reg_mode = V16QImode;
27444 else
27445 reg_mode = word_mode;
27446 reg_mode_size = GET_MODE_SIZE (reg_mode);
27447
27448 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
27449
27450 /* TDmode residing in FP registers is special, since the ISA requires that
27451 the lower-numbered word of a register pair is always the most significant
27452 word, even in little-endian mode. This does not match the usual subreg
27453 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
27454 the appropriate constituent registers "by hand" in little-endian mode.
27455
27456 Note we do not need to check for destructive overlap here since TDmode
27457 can only reside in even/odd register pairs. */
27458 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
27459 {
27460 rtx p_src, p_dst;
27461 int i;
27462
27463 for (i = 0; i < nregs; i++)
27464 {
27465 if (REG_P (src) && FP_REGNO_P (REGNO (src)))
27466 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
27467 else
27468 p_src = simplify_gen_subreg (reg_mode, src, mode,
27469 i * reg_mode_size);
27470
27471 if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
27472 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
27473 else
27474 p_dst = simplify_gen_subreg (reg_mode, dst, mode,
27475 i * reg_mode_size);
27476
27477 emit_insn (gen_rtx_SET (p_dst, p_src));
27478 }
27479
27480 return;
27481 }
27482
27483 /* The __vector_pair and __vector_quad modes are multi-register
27484 modes, so if we have to load or store the registers, we have to be
27485 careful to properly swap them if we're in little endian mode
27486 below. This means the last register gets the first memory
27487 location. We also need to be careful of using the right register
27488 numbers if we are splitting XO to OO. */
27489 if (mode == OOmode || mode == XOmode)
27490 {
27491 nregs = hard_regno_nregs (reg, mode);
27492 int reg_mode_nregs = hard_regno_nregs (reg, reg_mode);
27493 if (MEM_P (dst))
27494 {
27495 unsigned offset = 0;
27496 unsigned size = GET_MODE_SIZE (reg_mode);
27497
27498 /* If we are reading an accumulator register, we have to
27499 deprime it before we can access it. */
27500 if (TARGET_MMA
27501 && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
27502 emit_insn (gen_mma_xxmfacc (src, src));
27503
27504 for (int i = 0; i < nregs; i += reg_mode_nregs)
27505 {
27506 unsigned subreg
27507 = WORDS_BIG_ENDIAN ? i : (nregs - reg_mode_nregs - i);
27508 rtx dst2 = adjust_address (dst, reg_mode, offset);
27509 rtx src2 = gen_rtx_REG (reg_mode, reg + subreg);
27510 offset += size;
27511 emit_insn (gen_rtx_SET (dst2, src2));
27512 }
27513
27514 return;
27515 }
27516
27517 if (MEM_P (src))
27518 {
27519 unsigned offset = 0;
27520 unsigned size = GET_MODE_SIZE (reg_mode);
27521
27522 for (int i = 0; i < nregs; i += reg_mode_nregs)
27523 {
27524 unsigned subreg
27525 = WORDS_BIG_ENDIAN ? i : (nregs - reg_mode_nregs - i);
27526 rtx dst2 = gen_rtx_REG (reg_mode, reg + subreg);
27527 rtx src2 = adjust_address (src, reg_mode, offset);
27528 offset += size;
27529 emit_insn (gen_rtx_SET (dst2, src2));
27530 }
27531
27532 /* If we are writing an accumulator register, we have to
27533 prime it after we've written it. */
27534 if (TARGET_MMA
27535 && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
27536 emit_insn (gen_mma_xxmtacc (dst, dst));
27537
27538 return;
27539 }
27540
27541 if (GET_CODE (src) == UNSPEC
27542 || GET_CODE (src) == UNSPEC_VOLATILE)
27543 {
27544 gcc_assert (XINT (src, 1) == UNSPEC_VSX_ASSEMBLE
27545 || XINT (src, 1) == UNSPECV_MMA_ASSEMBLE);
27546 gcc_assert (REG_P (dst));
27547 if (GET_MODE (src) == XOmode)
27548 gcc_assert (FP_REGNO_P (REGNO (dst)));
27549 if (GET_MODE (src) == OOmode)
27550 gcc_assert (VSX_REGNO_P (REGNO (dst)));
27551
27552 int nvecs = XVECLEN (src, 0);
27553 for (int i = 0; i < nvecs; i++)
27554 {
27555 rtx op;
27556 int regno = reg + i;
27557
27558 if (WORDS_BIG_ENDIAN)
27559 {
27560 op = XVECEXP (src, 0, i);
27561
27562 /* If we are loading an even VSX register and the memory location
27563 is adjacent to the next register's memory location (if any),
27564 then we can load them both with one LXVP instruction. */
27565 if ((regno & 1) == 0)
27566 {
27567 rtx op2 = XVECEXP (src, 0, i + 1);
27568 if (adjacent_mem_locations (op, op2) == op)
27569 {
27570 op = adjust_address (op, OOmode, 0);
27571 /* Skip the next register, since we're going to
27572 load it together with this register. */
27573 i++;
27574 }
27575 }
27576 }
27577 else
27578 {
27579 op = XVECEXP (src, 0, nvecs - i - 1);
27580
27581 /* If we are loading an even VSX register and the memory location
27582 is adjacent to the next register's memory location (if any),
27583 then we can load them both with one LXVP instruction. */
27584 if ((regno & 1) == 0)
27585 {
27586 rtx op2 = XVECEXP (src, 0, nvecs - i - 2);
27587 if (adjacent_mem_locations (op2, op) == op2)
27588 {
27589 op = adjust_address (op2, OOmode, 0);
27590 /* Skip the next register, since we're going to
27591 load it together with this register. */
27592 i++;
27593 }
27594 }
27595 }
27596
27597 rtx dst_i = gen_rtx_REG (GET_MODE (op), regno);
27598 emit_insn (gen_rtx_SET (dst_i, op));
27599 }
27600
27601 /* We are writing an accumulator register, so we have to
27602 prime it after we've written it. */
27603 if (GET_MODE (src) == XOmode)
27604 emit_insn (gen_mma_xxmtacc (dst, dst));
27605
27606 return;
27607 }
27608
27609 /* Register -> register moves can use common code. */
27610 }
27611
27612 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
27613 {
27614 /* If we are reading an accumulator register, we have to
27615 deprime it before we can access it. */
27616 if (TARGET_MMA
27617 && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
27618 emit_insn (gen_mma_xxmfacc (src, src));
27619
27620 /* Move register range backwards, if we might have destructive
27621 overlap. */
27622 int i;
27623 /* XO/OO are opaque so cannot use subregs. */
27624 if (mode == OOmode || mode == XOmode )
27625 {
27626 for (i = nregs - 1; i >= 0; i--)
27627 {
27628 rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + i);
27629 rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + i);
27630 emit_insn (gen_rtx_SET (dst_i, src_i));
27631 }
27632 }
27633 else
27634 {
27635 for (i = nregs - 1; i >= 0; i--)
27636 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
27637 i * reg_mode_size),
27638 simplify_gen_subreg (reg_mode, src, mode,
27639 i * reg_mode_size)));
27640 }
27641
27642 /* If we are writing an accumulator register, we have to
27643 prime it after we've written it. */
27644 if (TARGET_MMA
27645 && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
27646 emit_insn (gen_mma_xxmtacc (dst, dst));
27647 }
27648 else
27649 {
27650 int i;
27651 int j = -1;
27652 bool used_update = false;
27653 rtx restore_basereg = NULL_RTX;
27654
27655 if (MEM_P (src) && INT_REGNO_P (reg))
27656 {
27657 rtx breg;
27658
27659 if (GET_CODE (XEXP (src, 0)) == PRE_INC
27660 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
27661 {
27662 rtx delta_rtx;
27663 breg = XEXP (XEXP (src, 0), 0);
27664 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
27665 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
27666 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
27667 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
27668 src = replace_equiv_address (src, breg);
27669 }
27670 else if (! rs6000_offsettable_memref_p (src, reg_mode, true))
27671 {
27672 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
27673 {
27674 rtx basereg = XEXP (XEXP (src, 0), 0);
27675 if (TARGET_UPDATE)
27676 {
27677 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
27678 emit_insn (gen_rtx_SET (ndst,
27679 gen_rtx_MEM (reg_mode,
27680 XEXP (src, 0))));
27681 used_update = true;
27682 }
27683 else
27684 emit_insn (gen_rtx_SET (basereg,
27685 XEXP (XEXP (src, 0), 1)));
27686 src = replace_equiv_address (src, basereg);
27687 }
27688 else
27689 {
27690 rtx basereg = gen_rtx_REG (Pmode, reg);
27691 emit_insn (gen_rtx_SET (basereg, XEXP (src, 0)));
27692 src = replace_equiv_address (src, basereg);
27693 }
27694 }
27695
27696 breg = XEXP (src, 0);
27697 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
27698 breg = XEXP (breg, 0);
27699
27700 /* If the base register we are using to address memory is
27701 also a destination reg, then change that register last. */
27702 if (REG_P (breg)
27703 && REGNO (breg) >= REGNO (dst)
27704 && REGNO (breg) < REGNO (dst) + nregs)
27705 j = REGNO (breg) - REGNO (dst);
27706 }
27707 else if (MEM_P (dst) && INT_REGNO_P (reg))
27708 {
27709 rtx breg;
27710
27711 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
27712 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
27713 {
27714 rtx delta_rtx;
27715 breg = XEXP (XEXP (dst, 0), 0);
27716 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
27717 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
27718 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
27719
27720 /* We have to update the breg before doing the store.
27721 Use store with update, if available. */
27722
27723 if (TARGET_UPDATE)
27724 {
27725 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
27726 emit_insn (TARGET_32BIT
27727 ? (TARGET_POWERPC64
27728 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
27729 : gen_movsi_si_update (breg, breg, delta_rtx, nsrc))
27730 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
27731 used_update = true;
27732 }
27733 else
27734 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
27735 dst = replace_equiv_address (dst, breg);
27736 }
27737 else if (!rs6000_offsettable_memref_p (dst, reg_mode, true)
27738 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
27739 {
27740 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
27741 {
27742 rtx basereg = XEXP (XEXP (dst, 0), 0);
27743 if (TARGET_UPDATE)
27744 {
27745 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
27746 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode,
27747 XEXP (dst, 0)),
27748 nsrc));
27749 used_update = true;
27750 }
27751 else
27752 emit_insn (gen_rtx_SET (basereg,
27753 XEXP (XEXP (dst, 0), 1)));
27754 dst = replace_equiv_address (dst, basereg);
27755 }
27756 else
27757 {
27758 rtx basereg = XEXP (XEXP (dst, 0), 0);
27759 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
27760 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
27761 && REG_P (basereg)
27762 && REG_P (offsetreg)
27763 && REGNO (basereg) != REGNO (offsetreg));
27764 if (REGNO (basereg) == 0)
27765 {
27766 rtx tmp = offsetreg;
27767 offsetreg = basereg;
27768 basereg = tmp;
27769 }
27770 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
27771 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
27772 dst = replace_equiv_address (dst, basereg);
27773 }
27774 }
27775 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
27776 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode, true));
27777 }
27778
27779 /* If we are reading an accumulator register, we have to
27780 deprime it before we can access it. */
27781 if (TARGET_MMA && REG_P (src)
27782 && GET_MODE (src) == XOmode && FP_REGNO_P (REGNO (src)))
27783 emit_insn (gen_mma_xxmfacc (src, src));
27784
27785 for (i = 0; i < nregs; i++)
27786 {
27787 /* Calculate index to next subword. */
27788 ++j;
27789 if (j == nregs)
27790 j = 0;
27791
27792 /* If compiler already emitted move of first word by
27793 store with update, no need to do anything. */
27794 if (j == 0 && used_update)
27795 continue;
27796
27797 /* XO/OO are opaque so cannot use subregs. */
27798 if (mode == OOmode || mode == XOmode )
27799 {
27800 rtx dst_i = gen_rtx_REG (reg_mode, REGNO (dst) + j);
27801 rtx src_i = gen_rtx_REG (reg_mode, REGNO (src) + j);
27802 emit_insn (gen_rtx_SET (dst_i, src_i));
27803 }
27804 else
27805 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
27806 j * reg_mode_size),
27807 simplify_gen_subreg (reg_mode, src, mode,
27808 j * reg_mode_size)));
27809 }
27810
27811 /* If we are writing an accumulator register, we have to
27812 prime it after we've written it. */
27813 if (TARGET_MMA && REG_P (dst)
27814 && GET_MODE (dst) == XOmode && FP_REGNO_P (REGNO (dst)))
27815 emit_insn (gen_mma_xxmtacc (dst, dst));
27816
27817 if (restore_basereg != NULL_RTX)
27818 emit_insn (restore_basereg);
27819 }
27820 }
27821 \f
27822 /* Return true if the peephole2 can combine a load involving a combination of
27823 an addis instruction and a load with an offset that can be fused together on
27824 a power8. */
27825
27826 bool
27827 fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
27828 rtx addis_value, /* addis value. */
27829 rtx target, /* target register that is loaded. */
27830 rtx mem) /* bottom part of the memory addr. */
27831 {
27832 rtx addr;
27833 rtx base_reg;
27834
27835 /* Validate arguments. */
27836 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
27837 return false;
27838
27839 if (!base_reg_operand (target, GET_MODE (target)))
27840 return false;
27841
27842 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
27843 return false;
27844
27845 /* Allow sign/zero extension. */
27846 if (GET_CODE (mem) == ZERO_EXTEND
27847 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
27848 mem = XEXP (mem, 0);
27849
27850 if (!MEM_P (mem))
27851 return false;
27852
27853 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
27854 return false;
27855
27856 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
27857 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
27858 return false;
27859
27860 /* Validate that the register used to load the high value is either the
27861 register being loaded, or we can safely replace its use.
27862
27863 This function is only called from the peephole2 pass and we assume that
27864 there are 2 instructions in the peephole (addis and load), so we want to
27865 check if the target register was not used in the memory address and the
27866 register to hold the addis result is dead after the peephole. */
27867 if (REGNO (addis_reg) != REGNO (target))
27868 {
27869 if (reg_mentioned_p (target, mem))
27870 return false;
27871
27872 if (!peep2_reg_dead_p (2, addis_reg))
27873 return false;
27874
27875 /* If the target register being loaded is the stack pointer, we must
27876 avoid loading any other value into it, even temporarily. */
27877 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
27878 return false;
27879 }
27880
27881 base_reg = XEXP (addr, 0);
27882 return REGNO (addis_reg) == REGNO (base_reg);
27883 }
27884
27885 /* During the peephole2 pass, adjust and expand the insns for a load fusion
27886 sequence. We adjust the addis register to use the target register. If the
27887 load sign extends, we adjust the code to do the zero extending load, and an
27888 explicit sign extension later since the fusion only covers zero extending
27889 loads.
27890
27891 The operands are:
27892 operands[0] register set with addis (to be replaced with target)
27893 operands[1] value set via addis
27894 operands[2] target register being loaded
27895 operands[3] D-form memory reference using operands[0]. */
27896
27897 void
27898 expand_fusion_gpr_load (rtx *operands)
27899 {
27900 rtx addis_value = operands[1];
27901 rtx target = operands[2];
27902 rtx orig_mem = operands[3];
27903 rtx new_addr, new_mem, orig_addr, offset;
27904 enum rtx_code plus_or_lo_sum;
27905 machine_mode target_mode = GET_MODE (target);
27906 machine_mode extend_mode = target_mode;
27907 machine_mode ptr_mode = Pmode;
27908 enum rtx_code extend = UNKNOWN;
27909
27910 if (GET_CODE (orig_mem) == ZERO_EXTEND
27911 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
27912 {
27913 extend = GET_CODE (orig_mem);
27914 orig_mem = XEXP (orig_mem, 0);
27915 target_mode = GET_MODE (orig_mem);
27916 }
27917
27918 gcc_assert (MEM_P (orig_mem));
27919
27920 orig_addr = XEXP (orig_mem, 0);
27921 plus_or_lo_sum = GET_CODE (orig_addr);
27922 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
27923
27924 offset = XEXP (orig_addr, 1);
27925 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
27926 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
27927
27928 if (extend != UNKNOWN)
27929 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
27930
27931 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
27932 UNSPEC_FUSION_GPR);
27933 emit_insn (gen_rtx_SET (target, new_mem));
27934
27935 if (extend == SIGN_EXTEND)
27936 {
27937 int sub_off = ((BYTES_BIG_ENDIAN)
27938 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
27939 : 0);
27940 rtx sign_reg
27941 = simplify_subreg (target_mode, target, extend_mode, sub_off);
27942
27943 emit_insn (gen_rtx_SET (target,
27944 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
27945 }
27946
27947 return;
27948 }
27949
27950 /* Emit the addis instruction that will be part of a fused instruction
27951 sequence. */
27952
27953 void
27954 emit_fusion_addis (rtx target, rtx addis_value)
27955 {
27956 rtx fuse_ops[10];
27957 const char *addis_str = NULL;
27958
27959 /* Emit the addis instruction. */
27960 fuse_ops[0] = target;
27961 if (satisfies_constraint_L (addis_value))
27962 {
27963 fuse_ops[1] = addis_value;
27964 addis_str = "lis %0,%v1";
27965 }
27966
27967 else if (GET_CODE (addis_value) == PLUS)
27968 {
27969 rtx op0 = XEXP (addis_value, 0);
27970 rtx op1 = XEXP (addis_value, 1);
27971
27972 if (REG_P (op0) && CONST_INT_P (op1)
27973 && satisfies_constraint_L (op1))
27974 {
27975 fuse_ops[1] = op0;
27976 fuse_ops[2] = op1;
27977 addis_str = "addis %0,%1,%v2";
27978 }
27979 }
27980
27981 else if (GET_CODE (addis_value) == HIGH)
27982 {
27983 rtx value = XEXP (addis_value, 0);
27984 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
27985 {
27986 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
27987 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
27988 if (TARGET_ELF)
27989 addis_str = "addis %0,%2,%1@toc@ha";
27990
27991 else if (TARGET_XCOFF)
27992 addis_str = "addis %0,%1@u(%2)";
27993
27994 else
27995 gcc_unreachable ();
27996 }
27997
27998 else if (GET_CODE (value) == PLUS)
27999 {
28000 rtx op0 = XEXP (value, 0);
28001 rtx op1 = XEXP (value, 1);
28002
28003 if (GET_CODE (op0) == UNSPEC
28004 && XINT (op0, 1) == UNSPEC_TOCREL
28005 && CONST_INT_P (op1))
28006 {
28007 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
28008 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
28009 fuse_ops[3] = op1;
28010 if (TARGET_ELF)
28011 addis_str = "addis %0,%2,%1+%3@toc@ha";
28012
28013 else if (TARGET_XCOFF)
28014 addis_str = "addis %0,%1+%3@u(%2)";
28015
28016 else
28017 gcc_unreachable ();
28018 }
28019 }
28020
28021 else if (satisfies_constraint_L (value))
28022 {
28023 fuse_ops[1] = value;
28024 addis_str = "lis %0,%v1";
28025 }
28026
28027 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
28028 {
28029 fuse_ops[1] = value;
28030 addis_str = "lis %0,%1@ha";
28031 }
28032 }
28033
28034 if (!addis_str)
28035 fatal_insn ("Could not generate addis value for fusion", addis_value);
28036
28037 output_asm_insn (addis_str, fuse_ops);
28038 }
28039
28040 /* Emit a D-form load or store instruction that is the second instruction
28041 of a fusion sequence. */
28042
28043 static void
28044 emit_fusion_load (rtx load_reg, rtx addis_reg, rtx offset, const char *insn_str)
28045 {
28046 rtx fuse_ops[10];
28047 char insn_template[80];
28048
28049 fuse_ops[0] = load_reg;
28050 fuse_ops[1] = addis_reg;
28051
28052 if (CONST_INT_P (offset) && satisfies_constraint_I (offset))
28053 {
28054 sprintf (insn_template, "%s %%0,%%2(%%1)", insn_str);
28055 fuse_ops[2] = offset;
28056 output_asm_insn (insn_template, fuse_ops);
28057 }
28058
28059 else if (GET_CODE (offset) == UNSPEC
28060 && XINT (offset, 1) == UNSPEC_TOCREL)
28061 {
28062 if (TARGET_ELF)
28063 sprintf (insn_template, "%s %%0,%%2@toc@l(%%1)", insn_str);
28064
28065 else if (TARGET_XCOFF)
28066 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
28067
28068 else
28069 gcc_unreachable ();
28070
28071 fuse_ops[2] = XVECEXP (offset, 0, 0);
28072 output_asm_insn (insn_template, fuse_ops);
28073 }
28074
28075 else if (GET_CODE (offset) == PLUS
28076 && GET_CODE (XEXP (offset, 0)) == UNSPEC
28077 && XINT (XEXP (offset, 0), 1) == UNSPEC_TOCREL
28078 && CONST_INT_P (XEXP (offset, 1)))
28079 {
28080 rtx tocrel_unspec = XEXP (offset, 0);
28081 if (TARGET_ELF)
28082 sprintf (insn_template, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str);
28083
28084 else if (TARGET_XCOFF)
28085 sprintf (insn_template, "%s %%0,%%2+%%3@l(%%1)", insn_str);
28086
28087 else
28088 gcc_unreachable ();
28089
28090 fuse_ops[2] = XVECEXP (tocrel_unspec, 0, 0);
28091 fuse_ops[3] = XEXP (offset, 1);
28092 output_asm_insn (insn_template, fuse_ops);
28093 }
28094
28095 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (offset))
28096 {
28097 sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str);
28098
28099 fuse_ops[2] = offset;
28100 output_asm_insn (insn_template, fuse_ops);
28101 }
28102
28103 else
28104 fatal_insn ("Unable to generate load/store offset for fusion", offset);
28105
28106 return;
28107 }
28108
28109 /* Given an address, convert it into the addis and load offset parts. Addresses
28110 created during the peephole2 process look like:
28111 (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL))
28112 (unspec [(...)] UNSPEC_TOCREL)) */
28113
28114 static void
28115 fusion_split_address (rtx addr, rtx *p_hi, rtx *p_lo)
28116 {
28117 rtx hi, lo;
28118
28119 if (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
28120 {
28121 hi = XEXP (addr, 0);
28122 lo = XEXP (addr, 1);
28123 }
28124 else
28125 gcc_unreachable ();
28126
28127 *p_hi = hi;
28128 *p_lo = lo;
28129 }
28130
28131 /* Return a string to fuse an addis instruction with a gpr load to the same
28132 register that we loaded up the addis instruction. The address that is used
28133 is the logical address that was formed during peephole2:
28134 (lo_sum (high) (low-part))
28135
28136 The code is complicated, so we call output_asm_insn directly, and just
28137 return "". */
28138
28139 const char *
28140 emit_fusion_gpr_load (rtx target, rtx mem)
28141 {
28142 rtx addis_value;
28143 rtx addr;
28144 rtx load_offset;
28145 const char *load_str = NULL;
28146 machine_mode mode;
28147
28148 if (GET_CODE (mem) == ZERO_EXTEND)
28149 mem = XEXP (mem, 0);
28150
28151 gcc_assert (REG_P (target) && MEM_P (mem));
28152
28153 addr = XEXP (mem, 0);
28154 fusion_split_address (addr, &addis_value, &load_offset);
28155
28156 /* Now emit the load instruction to the same register. */
28157 mode = GET_MODE (mem);
28158 switch (mode)
28159 {
28160 case E_QImode:
28161 load_str = "lbz";
28162 break;
28163
28164 case E_HImode:
28165 load_str = "lhz";
28166 break;
28167
28168 case E_SImode:
28169 case E_SFmode:
28170 load_str = "lwz";
28171 break;
28172
28173 case E_DImode:
28174 case E_DFmode:
28175 gcc_assert (TARGET_POWERPC64);
28176 load_str = "ld";
28177 break;
28178
28179 default:
28180 fatal_insn ("Bad GPR fusion", gen_rtx_SET (target, mem));
28181 }
28182
28183 /* Emit the addis instruction. */
28184 emit_fusion_addis (target, addis_value);
28185
28186 /* Emit the D-form load instruction. */
28187 emit_fusion_load (target, target, load_offset, load_str);
28188
28189 return "";
28190 }
28191 \f
28192 /* This is not inside an #ifdef RS6000_GLIBC_ATOMIC_FENV because gengtype
28193 ignores it then. */
28194 static GTY(()) tree atomic_hold_decl;
28195 static GTY(()) tree atomic_clear_decl;
28196 static GTY(()) tree atomic_update_decl;
28197
28198 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
28199 static void
28200 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
28201 {
28202 if (!TARGET_HARD_FLOAT)
28203 {
28204 #ifdef RS6000_GLIBC_ATOMIC_FENV
28205 if (atomic_hold_decl == NULL_TREE)
28206 {
28207 atomic_hold_decl
28208 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
28209 get_identifier ("__atomic_feholdexcept"),
28210 build_function_type_list (void_type_node,
28211 double_ptr_type_node,
28212 NULL_TREE));
28213 TREE_PUBLIC (atomic_hold_decl) = 1;
28214 DECL_EXTERNAL (atomic_hold_decl) = 1;
28215 }
28216
28217 if (atomic_clear_decl == NULL_TREE)
28218 {
28219 atomic_clear_decl
28220 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
28221 get_identifier ("__atomic_feclearexcept"),
28222 build_function_type_list (void_type_node,
28223 NULL_TREE));
28224 TREE_PUBLIC (atomic_clear_decl) = 1;
28225 DECL_EXTERNAL (atomic_clear_decl) = 1;
28226 }
28227
28228 tree const_double = build_qualified_type (double_type_node,
28229 TYPE_QUAL_CONST);
28230 tree const_double_ptr = build_pointer_type (const_double);
28231 if (atomic_update_decl == NULL_TREE)
28232 {
28233 atomic_update_decl
28234 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
28235 get_identifier ("__atomic_feupdateenv"),
28236 build_function_type_list (void_type_node,
28237 const_double_ptr,
28238 NULL_TREE));
28239 TREE_PUBLIC (atomic_update_decl) = 1;
28240 DECL_EXTERNAL (atomic_update_decl) = 1;
28241 }
28242
28243 tree fenv_var = create_tmp_var_raw (double_type_node);
28244 TREE_ADDRESSABLE (fenv_var) = 1;
28245 tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node,
28246 build4 (TARGET_EXPR, double_type_node, fenv_var,
28247 void_node, NULL_TREE, NULL_TREE));
28248
28249 *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
28250 *clear = build_call_expr (atomic_clear_decl, 0);
28251 *update = build_call_expr (atomic_update_decl, 1,
28252 fold_convert (const_double_ptr, fenv_addr));
28253 #endif
28254 return;
28255 }
28256
28257 tree mffs = rs6000_builtin_decls[RS6000_BIF_MFFS];
28258 tree mtfsf = rs6000_builtin_decls[RS6000_BIF_MTFSF];
28259 tree call_mffs = build_call_expr (mffs, 0);
28260
28261 /* Generates the equivalent of feholdexcept (&fenv_var)
28262
28263 *fenv_var = __builtin_mffs ();
28264 double fenv_hold;
28265 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
28266 __builtin_mtfsf (0xff, fenv_hold); */
28267
28268 /* Mask to clear everything except for the rounding modes and non-IEEE
28269 arithmetic flag. */
28270 const unsigned HOST_WIDE_INT hold_exception_mask
28271 = HOST_WIDE_INT_C (0xffffffff00000007);
28272
28273 tree fenv_var = create_tmp_var_raw (double_type_node);
28274
28275 tree hold_mffs = build4 (TARGET_EXPR, double_type_node, fenv_var, call_mffs,
28276 NULL_TREE, NULL_TREE);
28277
28278 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
28279 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
28280 build_int_cst (uint64_type_node,
28281 hold_exception_mask));
28282
28283 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
28284 fenv_llu_and);
28285
28286 tree hold_mtfsf = build_call_expr (mtfsf, 2,
28287 build_int_cst (unsigned_type_node, 0xff),
28288 fenv_hold_mtfsf);
28289
28290 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
28291
28292 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
28293
28294 double fenv_clear = __builtin_mffs ();
28295 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
28296 __builtin_mtfsf (0xff, fenv_clear); */
28297
28298 /* Mask to clear everything except for the rounding modes and non-IEEE
28299 arithmetic flag. */
28300 const unsigned HOST_WIDE_INT clear_exception_mask
28301 = HOST_WIDE_INT_C (0xffffffff00000000);
28302
28303 tree fenv_clear = create_tmp_var_raw (double_type_node);
28304
28305 tree clear_mffs = build4 (TARGET_EXPR, double_type_node, fenv_clear,
28306 call_mffs, NULL_TREE, NULL_TREE);
28307
28308 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
28309 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
28310 fenv_clean_llu,
28311 build_int_cst (uint64_type_node,
28312 clear_exception_mask));
28313
28314 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
28315 fenv_clear_llu_and);
28316
28317 tree clear_mtfsf = build_call_expr (mtfsf, 2,
28318 build_int_cst (unsigned_type_node, 0xff),
28319 fenv_clear_mtfsf);
28320
28321 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
28322
28323 /* Generates the equivalent of feupdateenv (&fenv_var)
28324
28325 double old_fenv = __builtin_mffs ();
28326 double fenv_update;
28327 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
28328 (*(uint64_t*)fenv_var 0x1ff80fff);
28329 __builtin_mtfsf (0xff, fenv_update); */
28330
28331 const unsigned HOST_WIDE_INT update_exception_mask
28332 = HOST_WIDE_INT_C (0xffffffff1fffff00);
28333 const unsigned HOST_WIDE_INT new_exception_mask
28334 = HOST_WIDE_INT_C (0x1ff80fff);
28335
28336 tree old_fenv = create_tmp_var_raw (double_type_node);
28337 tree update_mffs = build4 (TARGET_EXPR, double_type_node, old_fenv,
28338 call_mffs, NULL_TREE, NULL_TREE);
28339
28340 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
28341 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
28342 build_int_cst (uint64_type_node,
28343 update_exception_mask));
28344
28345 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
28346 build_int_cst (uint64_type_node,
28347 new_exception_mask));
28348
28349 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
28350 old_llu_and, new_llu_and);
28351
28352 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
28353 new_llu_mask);
28354
28355 tree update_mtfsf = build_call_expr (mtfsf, 2,
28356 build_int_cst (unsigned_type_node, 0xff),
28357 fenv_update_mtfsf);
28358
28359 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
28360 }
28361
28362 void
28363 rs6000_generate_float2_double_code (rtx dst, rtx src1, rtx src2)
28364 {
28365 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
28366
28367 rtx_tmp0 = gen_reg_rtx (V2DFmode);
28368 rtx_tmp1 = gen_reg_rtx (V2DFmode);
28369
28370 /* The destination of the vmrgew instruction layout is:
28371 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
28372 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
28373 vmrgew instruction will be correct. */
28374 if (BYTES_BIG_ENDIAN)
28375 {
28376 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp0, src1, src2,
28377 GEN_INT (0)));
28378 emit_insn (gen_vsx_xxpermdi_v2df_be (rtx_tmp1, src1, src2,
28379 GEN_INT (3)));
28380 }
28381 else
28382 {
28383 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (3)));
28384 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (0)));
28385 }
28386
28387 rtx_tmp2 = gen_reg_rtx (V4SFmode);
28388 rtx_tmp3 = gen_reg_rtx (V4SFmode);
28389
28390 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp2, rtx_tmp0));
28391 emit_insn (gen_vsx_xvcvdpsp (rtx_tmp3, rtx_tmp1));
28392
28393 if (BYTES_BIG_ENDIAN)
28394 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
28395 else
28396 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
28397 }
28398
28399 void
28400 rs6000_generate_float2_code (bool signed_convert, rtx dst, rtx src1, rtx src2)
28401 {
28402 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
28403
28404 rtx_tmp0 = gen_reg_rtx (V2DImode);
28405 rtx_tmp1 = gen_reg_rtx (V2DImode);
28406
28407 /* The destination of the vmrgew instruction layout is:
28408 rtx_tmp2[0] rtx_tmp3[0] rtx_tmp2[1] rtx_tmp3[0].
28409 Setup rtx_tmp0 and rtx_tmp1 to ensure the order of the elements after the
28410 vmrgew instruction will be correct. */
28411 if (BYTES_BIG_ENDIAN)
28412 {
28413 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp0, src1, src2, GEN_INT (0)));
28414 emit_insn (gen_vsx_xxpermdi_v2di_be (rtx_tmp1, src1, src2, GEN_INT (3)));
28415 }
28416 else
28417 {
28418 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp0, src1, src2, GEN_INT (3)));
28419 emit_insn (gen_vsx_xxpermdi_v2di (rtx_tmp1, src1, src2, GEN_INT (0)));
28420 }
28421
28422 rtx_tmp2 = gen_reg_rtx (V4SFmode);
28423 rtx_tmp3 = gen_reg_rtx (V4SFmode);
28424
28425 if (signed_convert)
28426 {
28427 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp2, rtx_tmp0));
28428 emit_insn (gen_vsx_xvcvsxdsp (rtx_tmp3, rtx_tmp1));
28429 }
28430 else
28431 {
28432 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp2, rtx_tmp0));
28433 emit_insn (gen_vsx_xvcvuxdsp (rtx_tmp3, rtx_tmp1));
28434 }
28435
28436 if (BYTES_BIG_ENDIAN)
28437 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp2, rtx_tmp3));
28438 else
28439 emit_insn (gen_p8_vmrgew_v4sf (dst, rtx_tmp3, rtx_tmp2));
28440 }
28441
28442 void
28443 rs6000_generate_vsigned2_code (bool signed_convert, rtx dst, rtx src1,
28444 rtx src2)
28445 {
28446 rtx rtx_tmp0, rtx_tmp1, rtx_tmp2, rtx_tmp3;
28447
28448 rtx_tmp0 = gen_reg_rtx (V2DFmode);
28449 rtx_tmp1 = gen_reg_rtx (V2DFmode);
28450
28451 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp0, src1, src2, GEN_INT (0)));
28452 emit_insn (gen_vsx_xxpermdi_v2df (rtx_tmp1, src1, src2, GEN_INT (3)));
28453
28454 rtx_tmp2 = gen_reg_rtx (V4SImode);
28455 rtx_tmp3 = gen_reg_rtx (V4SImode);
28456
28457 if (signed_convert)
28458 {
28459 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp2, rtx_tmp0));
28460 emit_insn (gen_vsx_xvcvdpsxws (rtx_tmp3, rtx_tmp1));
28461 }
28462 else
28463 {
28464 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp2, rtx_tmp0));
28465 emit_insn (gen_vsx_xvcvdpuxws (rtx_tmp3, rtx_tmp1));
28466 }
28467
28468 emit_insn (gen_p8_vmrgew_v4si (dst, rtx_tmp2, rtx_tmp3));
28469 }
28470
28471 /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
28472
28473 static bool
28474 rs6000_optab_supported_p (int op, machine_mode mode1, machine_mode,
28475 optimization_type opt_type)
28476 {
28477 switch (op)
28478 {
28479 case rsqrt_optab:
28480 return (opt_type == OPTIMIZE_FOR_SPEED
28481 && RS6000_RECIP_AUTO_RSQRTE_P (mode1));
28482
28483 default:
28484 return true;
28485 }
28486 }
28487
28488 /* Implement TARGET_CONSTANT_ALIGNMENT. */
28489
28490 static HOST_WIDE_INT
28491 rs6000_constant_alignment (const_tree exp, HOST_WIDE_INT align)
28492 {
28493 if (TREE_CODE (exp) == STRING_CST
28494 && (STRICT_ALIGNMENT || !optimize_size))
28495 return MAX (align, BITS_PER_WORD);
28496 return align;
28497 }
28498
28499 /* Implement TARGET_STARTING_FRAME_OFFSET. */
28500
28501 static HOST_WIDE_INT
28502 rs6000_starting_frame_offset (void)
28503 {
28504 if (FRAME_GROWS_DOWNWARD)
28505 return 0;
28506 return RS6000_STARTING_FRAME_OFFSET;
28507 }
28508 \f
28509 /* Internal function to return the built-in function id for the complex
28510 multiply operation for a given mode. */
28511
28512 static inline built_in_function
28513 complex_multiply_builtin_code (machine_mode mode)
28514 {
28515 gcc_assert (IN_RANGE (mode, MIN_MODE_COMPLEX_FLOAT, MAX_MODE_COMPLEX_FLOAT));
28516 int func = BUILT_IN_COMPLEX_MUL_MIN + mode - MIN_MODE_COMPLEX_FLOAT;
28517 return (built_in_function) func;
28518 }
28519
28520 /* Internal function to return the built-in function id for the complex divide
28521 operation for a given mode. */
28522
28523 static inline built_in_function
28524 complex_divide_builtin_code (machine_mode mode)
28525 {
28526 gcc_assert (IN_RANGE (mode, MIN_MODE_COMPLEX_FLOAT, MAX_MODE_COMPLEX_FLOAT));
28527 int func = BUILT_IN_COMPLEX_DIV_MIN + mode - MIN_MODE_COMPLEX_FLOAT;
28528 return (built_in_function) func;
28529 }
28530
28531 /* On 64-bit Linux and Freebsd systems, possibly switch the long double library
28532 function names from <foo>l to <foo>f128 if the default long double type is
28533 IEEE 128-bit. Typically, with the C and C++ languages, the standard math.h
28534 include file switches the names on systems that support long double as IEEE
28535 128-bit, but that doesn't work if the user uses __builtin_<foo>l directly.
28536 In the future, glibc will export names like __ieee128_sinf128 and we can
28537 switch to using those instead of using sinf128, which pollutes the user's
28538 namespace.
28539
28540 This will switch the names for Fortran math functions as well (which doesn't
28541 use math.h). However, Fortran needs other changes to the compiler and
28542 library before you can switch the real*16 type at compile time.
28543
28544 We use the TARGET_MANGLE_DECL_ASSEMBLER_NAME hook to change this name. We
28545 only do this transformation if the __float128 type is enabled. This
28546 prevents us from doing the transformation on older 32-bit ports that might
28547 have enabled using IEEE 128-bit floating point as the default long double
28548 type.
28549
28550 We also use the TARGET_MANGLE_DECL_ASSEMBLER_NAME hook to change the
28551 function names used for complex multiply and divide to the appropriate
28552 names. */
28553
28554 static tree
28555 rs6000_mangle_decl_assembler_name (tree decl, tree id)
28556 {
28557 /* Handle complex multiply/divide. For IEEE 128-bit, use __mulkc3 or
28558 __divkc3 and for IBM 128-bit use __multc3 and __divtc3. */
28559 if (TARGET_FLOAT128_TYPE
28560 && TREE_CODE (decl) == FUNCTION_DECL
28561 && DECL_IS_UNDECLARED_BUILTIN (decl)
28562 && DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
28563 {
28564 built_in_function id = DECL_FUNCTION_CODE (decl);
28565 const char *newname = NULL;
28566
28567 if (id == complex_multiply_builtin_code (KCmode))
28568 newname = "__mulkc3";
28569
28570 else if (id == complex_multiply_builtin_code (ICmode))
28571 newname = "__multc3";
28572
28573 else if (id == complex_multiply_builtin_code (TCmode))
28574 newname = (TARGET_IEEEQUAD) ? "__mulkc3" : "__multc3";
28575
28576 else if (id == complex_divide_builtin_code (KCmode))
28577 newname = "__divkc3";
28578
28579 else if (id == complex_divide_builtin_code (ICmode))
28580 newname = "__divtc3";
28581
28582 else if (id == complex_divide_builtin_code (TCmode))
28583 newname = (TARGET_IEEEQUAD) ? "__divkc3" : "__divtc3";
28584
28585 if (newname)
28586 {
28587 if (TARGET_DEBUG_BUILTIN)
28588 fprintf (stderr, "Map complex mul/div => %s\n", newname);
28589
28590 return get_identifier (newname);
28591 }
28592 }
28593
28594 /* Map long double built-in functions if long double is IEEE 128-bit. */
28595 if (TARGET_FLOAT128_TYPE && TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128
28596 && TREE_CODE (decl) == FUNCTION_DECL
28597 && DECL_IS_UNDECLARED_BUILTIN (decl)
28598 && DECL_BUILT_IN_CLASS (decl) == BUILT_IN_NORMAL)
28599 {
28600 size_t len = IDENTIFIER_LENGTH (id);
28601 const char *name = IDENTIFIER_POINTER (id);
28602 char *newname = NULL;
28603
28604 /* See if it is one of the built-in functions with an unusual name. */
28605 switch (DECL_FUNCTION_CODE (decl))
28606 {
28607 case BUILT_IN_DREML:
28608 newname = xstrdup ("__remainderieee128");
28609 break;
28610
28611 case BUILT_IN_GAMMAL:
28612 newname = xstrdup ("__lgammaieee128");
28613 break;
28614
28615 case BUILT_IN_GAMMAL_R:
28616 case BUILT_IN_LGAMMAL_R:
28617 newname = xstrdup ("__lgammaieee128_r");
28618 break;
28619
28620 case BUILT_IN_NEXTTOWARD:
28621 newname = xstrdup ("__nexttoward_to_ieee128");
28622 break;
28623
28624 case BUILT_IN_NEXTTOWARDF:
28625 newname = xstrdup ("__nexttowardf_to_ieee128");
28626 break;
28627
28628 case BUILT_IN_NEXTTOWARDL:
28629 newname = xstrdup ("__nexttowardieee128");
28630 break;
28631
28632 case BUILT_IN_POW10L:
28633 newname = xstrdup ("__exp10ieee128");
28634 break;
28635
28636 case BUILT_IN_SCALBL:
28637 newname = xstrdup ("__scalbieee128");
28638 break;
28639
28640 case BUILT_IN_SIGNIFICANDL:
28641 newname = xstrdup ("__significandieee128");
28642 break;
28643
28644 case BUILT_IN_SINCOSL:
28645 newname = xstrdup ("__sincosieee128");
28646 break;
28647
28648 default:
28649 break;
28650 }
28651
28652 /* Update the __builtin_*printf and __builtin_*scanf functions. */
28653 if (!newname)
28654 {
28655 size_t printf_len = strlen ("printf");
28656 size_t scanf_len = strlen ("scanf");
28657 size_t printf_chk_len = strlen ("printf_chk");
28658
28659 if (len >= printf_len
28660 && strcmp (name + len - printf_len, "printf") == 0)
28661 newname = xasprintf ("__%sieee128", name);
28662
28663 else if (len >= scanf_len
28664 && strcmp (name + len - scanf_len, "scanf") == 0)
28665 newname = xasprintf ("__isoc99_%sieee128", name);
28666
28667 else if (len >= printf_chk_len
28668 && strcmp (name + len - printf_chk_len, "printf_chk") == 0)
28669 newname = xasprintf ("%sieee128", name);
28670
28671 else if (name[len - 1] == 'l')
28672 {
28673 bool uses_ieee128_p = false;
28674 tree type = TREE_TYPE (decl);
28675 machine_mode ret_mode = TYPE_MODE (type);
28676
28677 /* See if the function returns a IEEE 128-bit floating point type or
28678 complex type. */
28679 if (ret_mode == TFmode || ret_mode == TCmode)
28680 uses_ieee128_p = true;
28681 else
28682 {
28683 function_args_iterator args_iter;
28684 tree arg;
28685
28686 /* See if the function passes a IEEE 128-bit floating point type
28687 or complex type. */
28688 FOREACH_FUNCTION_ARGS (type, arg, args_iter)
28689 {
28690 machine_mode arg_mode = TYPE_MODE (arg);
28691 if (arg_mode == TFmode || arg_mode == TCmode)
28692 {
28693 uses_ieee128_p = true;
28694 break;
28695 }
28696 }
28697 }
28698
28699 /* If we passed or returned an IEEE 128-bit floating point type,
28700 change the name. Use __<name>ieee128, instead of <name>l. */
28701 if (uses_ieee128_p)
28702 newname = xasprintf ("__%.*sieee128", (int)(len - 1), name);
28703 }
28704 }
28705
28706 if (newname)
28707 {
28708 if (TARGET_DEBUG_BUILTIN)
28709 fprintf (stderr, "Map %s => %s\n", name, newname);
28710
28711 id = get_identifier (newname);
28712 free (newname);
28713 }
28714 }
28715
28716 return id;
28717 }
28718
28719 /* Predict whether the given loop in gimple will be transformed in the RTL
28720 doloop_optimize pass. */
28721
28722 static bool
28723 rs6000_predict_doloop_p (struct loop *loop)
28724 {
28725 gcc_assert (loop);
28726
28727 /* On rs6000, targetm.can_use_doloop_p is actually
28728 can_use_doloop_if_innermost. Just ensure the loop is innermost. */
28729 if (loop->inner != NULL)
28730 {
28731 if (dump_file && (dump_flags & TDF_DETAILS))
28732 fprintf (dump_file, "Predict doloop failure due to"
28733 " loop nesting.\n");
28734 return false;
28735 }
28736
28737 return true;
28738 }
28739
28740 /* Implement TARGET_PREFERRED_DOLOOP_MODE. */
28741
28742 static machine_mode
28743 rs6000_preferred_doloop_mode (machine_mode)
28744 {
28745 return word_mode;
28746 }
28747
28748 /* Implement TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P. */
28749
28750 static bool
28751 rs6000_cannot_substitute_mem_equiv_p (rtx mem)
28752 {
28753 gcc_assert (MEM_P (mem));
28754
28755 /* curr_insn_transform()'s handling of subregs cannot handle altivec AND:
28756 type addresses, so don't allow MEMs with those address types to be
28757 substituted as an equivalent expression. See PR93974 for details. */
28758 if (GET_CODE (XEXP (mem, 0)) == AND)
28759 return true;
28760
28761 return false;
28762 }
28763
28764 /* Implement TARGET_INVALID_CONVERSION. */
28765
28766 static const char *
28767 rs6000_invalid_conversion (const_tree fromtype, const_tree totype)
28768 {
28769 /* Make sure we're working with the canonical types. */
28770 if (TYPE_CANONICAL (fromtype) != NULL_TREE)
28771 fromtype = TYPE_CANONICAL (fromtype);
28772 if (TYPE_CANONICAL (totype) != NULL_TREE)
28773 totype = TYPE_CANONICAL (totype);
28774
28775 machine_mode frommode = TYPE_MODE (fromtype);
28776 machine_mode tomode = TYPE_MODE (totype);
28777
28778 if (frommode != tomode)
28779 {
28780 /* Do not allow conversions to/from XOmode and OOmode types. */
28781 if (frommode == XOmode)
28782 return N_("invalid conversion from type %<__vector_quad%>");
28783 if (tomode == XOmode)
28784 return N_("invalid conversion to type %<__vector_quad%>");
28785 if (frommode == OOmode)
28786 return N_("invalid conversion from type %<__vector_pair%>");
28787 if (tomode == OOmode)
28788 return N_("invalid conversion to type %<__vector_pair%>");
28789 }
28790
28791 /* Conversion allowed. */
28792 return NULL;
28793 }
28794
28795 /* Convert a SFmode constant to the integer bit pattern. */
28796
28797 long
28798 rs6000_const_f32_to_i32 (rtx operand)
28799 {
28800 long value;
28801 const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (operand);
28802
28803 gcc_assert (GET_MODE (operand) == SFmode);
28804 REAL_VALUE_TO_TARGET_SINGLE (*rv, value);
28805 return value;
28806 }
28807
28808 void
28809 rs6000_emit_xxspltidp_v2df (rtx dst, long value)
28810 {
28811 if (((value & 0x7F800000) == 0) && ((value & 0x7FFFFF) != 0))
28812 inform (input_location,
28813 "the result for the xxspltidp instruction "
28814 "is undefined for subnormal input values");
28815 emit_insn( gen_xxspltidp_v2df_inst (dst, GEN_INT (value)));
28816 }
28817
28818 /* Implement TARGET_ASM_GENERATE_PIC_ADDR_DIFF_VEC. */
28819
28820 static bool
28821 rs6000_gen_pic_addr_diff_vec (void)
28822 {
28823 return rs6000_relative_jumptables;
28824 }
28825
28826 void
28827 rs6000_output_addr_vec_elt (FILE *file, int value)
28828 {
28829 const char *directive = TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t";
28830 char buf[100];
28831
28832 fprintf (file, "%s", directive);
28833 ASM_GENERATE_INTERNAL_LABEL (buf, "L", value);
28834 assemble_name (file, buf);
28835 fprintf (file, "\n");
28836 }
28837
28838 \f
28839 /* Copy an integer constant to the vector constant structure. */
28840
28841 static void
28842 constant_int_to_128bit_vector (rtx op,
28843 machine_mode mode,
28844 size_t byte_num,
28845 vec_const_128bit_type *info)
28846 {
28847 unsigned HOST_WIDE_INT uvalue = UINTVAL (op);
28848 unsigned bitsize = GET_MODE_BITSIZE (mode);
28849
28850 for (int shift = bitsize - 8; shift >= 0; shift -= 8)
28851 info->bytes[byte_num++] = (uvalue >> shift) & 0xff;
28852 }
28853
28854 /* Copy a floating point constant to the vector constant structure. */
28855
28856 static void
28857 constant_fp_to_128bit_vector (rtx op,
28858 machine_mode mode,
28859 size_t byte_num,
28860 vec_const_128bit_type *info)
28861 {
28862 unsigned bitsize = GET_MODE_BITSIZE (mode);
28863 unsigned num_words = bitsize / 32;
28864 const REAL_VALUE_TYPE *rtype = CONST_DOUBLE_REAL_VALUE (op);
28865 long real_words[VECTOR_128BIT_WORDS];
28866
28867 /* Make sure we don't overflow the real_words array and that it is
28868 filled completely. */
28869 gcc_assert (num_words <= VECTOR_128BIT_WORDS && (bitsize % 32) == 0);
28870
28871 real_to_target (real_words, rtype, mode);
28872
28873 /* Iterate over each 32-bit word in the floating point constant. The
28874 real_to_target function puts out words in target endian fashion. We need
28875 to arrange the order so that the bytes are written in big endian order. */
28876 for (unsigned num = 0; num < num_words; num++)
28877 {
28878 unsigned endian_num = (BYTES_BIG_ENDIAN
28879 ? num
28880 : num_words - 1 - num);
28881
28882 unsigned uvalue = real_words[endian_num];
28883 for (int shift = 32 - 8; shift >= 0; shift -= 8)
28884 info->bytes[byte_num++] = (uvalue >> shift) & 0xff;
28885 }
28886
28887 /* Mark that this constant involves floating point. */
28888 info->fp_constant_p = true;
28889 }
28890
28891 /* Convert a vector constant OP with mode MODE to a vector 128-bit constant
28892 structure INFO.
28893
28894 Break out the constant out to bytes, half words, words, and double words.
28895 Return true if we have successfully converted the constant.
28896
28897 We handle CONST_INT, CONST_DOUBLE, CONST_VECTOR, and VEC_DUPLICATE of
28898 constants. Integer and floating point scalar constants are splatted to fill
28899 out the vector. */
28900
28901 bool
28902 vec_const_128bit_to_bytes (rtx op,
28903 machine_mode mode,
28904 vec_const_128bit_type *info)
28905 {
28906 /* Initialize the constant structure. */
28907 memset ((void *)info, 0, sizeof (vec_const_128bit_type));
28908
28909 /* Assume CONST_INTs are DImode. */
28910 if (mode == VOIDmode)
28911 mode = CONST_INT_P (op) ? DImode : GET_MODE (op);
28912
28913 if (mode == VOIDmode)
28914 return false;
28915
28916 unsigned size = GET_MODE_SIZE (mode);
28917 bool splat_p = false;
28918
28919 if (size > VECTOR_128BIT_BYTES)
28920 return false;
28921
28922 /* Set up the bits. */
28923 switch (GET_CODE (op))
28924 {
28925 /* Integer constants, default to double word. */
28926 case CONST_INT:
28927 {
28928 constant_int_to_128bit_vector (op, mode, 0, info);
28929 splat_p = true;
28930 break;
28931 }
28932
28933 /* Floating point constants. */
28934 case CONST_DOUBLE:
28935 {
28936 /* Fail if the floating point constant is the wrong mode. */
28937 if (GET_MODE (op) != mode)
28938 return false;
28939
28940 /* SFmode stored as scalars are stored in DFmode format. */
28941 if (mode == SFmode)
28942 {
28943 mode = DFmode;
28944 size = GET_MODE_SIZE (DFmode);
28945 }
28946
28947 constant_fp_to_128bit_vector (op, mode, 0, info);
28948 splat_p = true;
28949 break;
28950 }
28951
28952 /* Vector constants, iterate over each element. On little endian
28953 systems, we have to reverse the element numbers. */
28954 case CONST_VECTOR:
28955 {
28956 /* Fail if the vector constant is the wrong mode or size. */
28957 if (GET_MODE (op) != mode
28958 || GET_MODE_SIZE (mode) != VECTOR_128BIT_BYTES)
28959 return false;
28960
28961 machine_mode ele_mode = GET_MODE_INNER (mode);
28962 size_t ele_size = GET_MODE_SIZE (ele_mode);
28963 size_t nunits = GET_MODE_NUNITS (mode);
28964
28965 for (size_t num = 0; num < nunits; num++)
28966 {
28967 rtx ele = CONST_VECTOR_ELT (op, num);
28968 size_t byte_num = (BYTES_BIG_ENDIAN
28969 ? num
28970 : nunits - 1 - num) * ele_size;
28971
28972 if (CONST_INT_P (ele))
28973 constant_int_to_128bit_vector (ele, ele_mode, byte_num, info);
28974 else if (CONST_DOUBLE_P (ele))
28975 constant_fp_to_128bit_vector (ele, ele_mode, byte_num, info);
28976 else
28977 return false;
28978 }
28979
28980 break;
28981 }
28982
28983 /* Treat VEC_DUPLICATE of a constant just like a vector constant.
28984 Since we are duplicating the element, we don't have to worry about
28985 endian issues. */
28986 case VEC_DUPLICATE:
28987 {
28988 /* Fail if the vector duplicate is the wrong mode or size. */
28989 if (GET_MODE (op) != mode
28990 || GET_MODE_SIZE (mode) != VECTOR_128BIT_BYTES)
28991 return false;
28992
28993 machine_mode ele_mode = GET_MODE_INNER (mode);
28994 size_t ele_size = GET_MODE_SIZE (ele_mode);
28995 rtx ele = XEXP (op, 0);
28996 size_t nunits = GET_MODE_NUNITS (mode);
28997
28998 if (!CONST_INT_P (ele) && !CONST_DOUBLE_P (ele))
28999 return false;
29000
29001 for (size_t num = 0; num < nunits; num++)
29002 {
29003 size_t byte_num = num * ele_size;
29004
29005 if (CONST_INT_P (ele))
29006 constant_int_to_128bit_vector (ele, ele_mode, byte_num, info);
29007 else
29008 constant_fp_to_128bit_vector (ele, ele_mode, byte_num, info);
29009 }
29010
29011 break;
29012 }
29013
29014 /* Any thing else, just return failure. */
29015 default:
29016 return false;
29017 }
29018
29019 /* Splat the constant to fill 128 bits if desired. */
29020 if (splat_p && size < VECTOR_128BIT_BYTES)
29021 {
29022 if ((VECTOR_128BIT_BYTES % size) != 0)
29023 return false;
29024
29025 for (size_t offset = size;
29026 offset < VECTOR_128BIT_BYTES;
29027 offset += size)
29028 memcpy ((void *) &info->bytes[offset],
29029 (void *) &info->bytes[0],
29030 size);
29031 }
29032
29033 /* Remember original size. */
29034 info->original_size = size;
29035
29036 /* Determine if the bytes are all the same. */
29037 unsigned char first_byte = info->bytes[0];
29038 info->all_bytes_same = true;
29039 for (size_t i = 1; i < VECTOR_128BIT_BYTES; i++)
29040 if (first_byte != info->bytes[i])
29041 {
29042 info->all_bytes_same = false;
29043 break;
29044 }
29045
29046 /* Pack half words together & determine if all of the half words are the
29047 same. */
29048 for (size_t i = 0; i < VECTOR_128BIT_HALF_WORDS; i++)
29049 info->half_words[i] = ((info->bytes[i * 2] << 8)
29050 | info->bytes[(i * 2) + 1]);
29051
29052 unsigned short first_hword = info->half_words[0];
29053 info->all_half_words_same = true;
29054 for (size_t i = 1; i < VECTOR_128BIT_HALF_WORDS; i++)
29055 if (first_hword != info->half_words[i])
29056 {
29057 info->all_half_words_same = false;
29058 break;
29059 }
29060
29061 /* Pack words together & determine if all of the words are the same. */
29062 for (size_t i = 0; i < VECTOR_128BIT_WORDS; i++)
29063 info->words[i] = ((info->bytes[i * 4] << 24)
29064 | (info->bytes[(i * 4) + 1] << 16)
29065 | (info->bytes[(i * 4) + 2] << 8)
29066 | info->bytes[(i * 4) + 3]);
29067
29068 info->all_words_same
29069 = (info->words[0] == info->words[1]
29070 && info->words[0] == info->words[2]
29071 && info->words[0] == info->words[3]);
29072
29073 /* Pack double words together & determine if all of the double words are the
29074 same. */
29075 for (size_t i = 0; i < VECTOR_128BIT_DOUBLE_WORDS; i++)
29076 {
29077 unsigned HOST_WIDE_INT d_word = 0;
29078 for (size_t j = 0; j < 8; j++)
29079 d_word = (d_word << 8) | info->bytes[(i * 8) + j];
29080
29081 info->double_words[i] = d_word;
29082 }
29083
29084 info->all_double_words_same
29085 = (info->double_words[0] == info->double_words[1]);
29086
29087 return true;
29088 }
29089
29090 /* Determine if an IEEE 128-bit constant can be loaded with LXVKQ. Return zero
29091 if the LXVKQ instruction cannot be used. Otherwise return the immediate
29092 value to be used with the LXVKQ instruction. */
29093
29094 unsigned
29095 constant_generates_lxvkq (vec_const_128bit_type *vsx_const)
29096 {
29097 /* Is the instruction supported with power10 code generation, IEEE 128-bit
29098 floating point hardware and VSX registers are available. */
29099 if (!TARGET_IEEE128_CONSTANT || !TARGET_FLOAT128_HW || !TARGET_POWER10
29100 || !TARGET_VSX)
29101 return 0;
29102
29103 /* All of the constants that are generated by LXVKQ have the bottom 3 words
29104 that are 0. */
29105 if (vsx_const->words[1] != 0
29106 || vsx_const->words[2] != 0
29107 || vsx_const->words[3] != 0)
29108 return 0;
29109
29110 /* See if we have a match for the first word. */
29111 switch (vsx_const->words[0])
29112 {
29113 case 0x3FFF0000U: return 1; /* IEEE 128-bit +1.0. */
29114 case 0x40000000U: return 2; /* IEEE 128-bit +2.0. */
29115 case 0x40008000U: return 3; /* IEEE 128-bit +3.0. */
29116 case 0x40010000U: return 4; /* IEEE 128-bit +4.0. */
29117 case 0x40014000U: return 5; /* IEEE 128-bit +5.0. */
29118 case 0x40018000U: return 6; /* IEEE 128-bit +6.0. */
29119 case 0x4001C000U: return 7; /* IEEE 128-bit +7.0. */
29120 case 0x7FFF0000U: return 8; /* IEEE 128-bit +Infinity. */
29121 case 0x7FFF8000U: return 9; /* IEEE 128-bit quiet NaN. */
29122 case 0x80000000U: return 16; /* IEEE 128-bit -0.0. */
29123 case 0xBFFF0000U: return 17; /* IEEE 128-bit -1.0. */
29124 case 0xC0000000U: return 18; /* IEEE 128-bit -2.0. */
29125 case 0xC0008000U: return 19; /* IEEE 128-bit -3.0. */
29126 case 0xC0010000U: return 20; /* IEEE 128-bit -4.0. */
29127 case 0xC0014000U: return 21; /* IEEE 128-bit -5.0. */
29128 case 0xC0018000U: return 22; /* IEEE 128-bit -6.0. */
29129 case 0xC001C000U: return 23; /* IEEE 128-bit -7.0. */
29130 case 0xFFFF0000U: return 24; /* IEEE 128-bit -Infinity. */
29131
29132 /* anything else cannot be loaded. */
29133 default:
29134 break;
29135 }
29136
29137 return 0;
29138 }
29139
29140 /* Determine if a vector constant can be loaded with XXSPLTIW. Return zero if
29141 the XXSPLTIW instruction cannot be used. Otherwise return the immediate
29142 value to be used with the XXSPLTIW instruction. */
29143
29144 unsigned
29145 constant_generates_xxspltiw (vec_const_128bit_type *vsx_const)
29146 {
29147 if (!TARGET_SPLAT_WORD_CONSTANT || !TARGET_PREFIXED || !TARGET_VSX)
29148 return 0;
29149
29150 if (!vsx_const->all_words_same)
29151 return 0;
29152
29153 /* If we can use XXSPLTIB, don't generate XXSPLTIW. */
29154 if (vsx_const->all_bytes_same)
29155 return 0;
29156
29157 /* See if we can use VSPLTISH or VSPLTISW. */
29158 if (vsx_const->all_half_words_same)
29159 {
29160 short sign_h_word = vsx_const->half_words[0];
29161 if (EASY_VECTOR_15 (sign_h_word))
29162 return 0;
29163 }
29164
29165 int sign_word = vsx_const->words[0];
29166 if (EASY_VECTOR_15 (sign_word))
29167 return 0;
29168
29169 return vsx_const->words[0];
29170 }
29171
29172 /* Determine if a vector constant can be loaded with XXSPLTIDP. Return zero if
29173 the XXSPLTIDP instruction cannot be used. Otherwise return the immediate
29174 value to be used with the XXSPLTIDP instruction. */
29175
29176 unsigned
29177 constant_generates_xxspltidp (vec_const_128bit_type *vsx_const)
29178 {
29179 if (!TARGET_SPLAT_FLOAT_CONSTANT || !TARGET_PREFIXED || !TARGET_VSX)
29180 return 0;
29181
29182 /* Reject if the two 64-bit segments are not the same. */
29183 if (!vsx_const->all_double_words_same)
29184 return 0;
29185
29186 /* If the bytes, half words, or words are all the same, don't use XXSPLTIDP.
29187 Use a simpler instruction (XXSPLTIB, VSPLTISB, VSPLTISH, or VSPLTISW). */
29188 if (vsx_const->all_bytes_same
29189 || vsx_const->all_half_words_same
29190 || vsx_const->all_words_same)
29191 return 0;
29192
29193 unsigned HOST_WIDE_INT value = vsx_const->double_words[0];
29194
29195 /* Avoid values that look like DFmode NaN's, except for the normal NaN bit
29196 pattern and the signalling NaN bit pattern. Recognize infinity and
29197 negative infinity. */
29198
29199 /* Bit representation of DFmode normal quiet NaN. */
29200 #define RS6000_CONST_DF_NAN HOST_WIDE_INT_UC (0x7ff8000000000000)
29201
29202 /* Bit representation of DFmode normal signaling NaN. */
29203 #define RS6000_CONST_DF_NANS HOST_WIDE_INT_UC (0x7ff4000000000000)
29204
29205 /* Bit representation of DFmode positive infinity. */
29206 #define RS6000_CONST_DF_INF HOST_WIDE_INT_UC (0x7ff0000000000000)
29207
29208 /* Bit representation of DFmode negative infinity. */
29209 #define RS6000_CONST_DF_NEG_INF HOST_WIDE_INT_UC (0xfff0000000000000)
29210
29211 if (value != RS6000_CONST_DF_NAN
29212 && value != RS6000_CONST_DF_NANS
29213 && value != RS6000_CONST_DF_INF
29214 && value != RS6000_CONST_DF_NEG_INF)
29215 {
29216 /* The IEEE 754 64-bit floating format has 1 bit for sign, 11 bits for
29217 the exponent, and 52 bits for the mantissa (not counting the hidden
29218 bit used for normal numbers). NaN values have the exponent set to all
29219 1 bits, and the mantissa non-zero (mantissa == 0 is infinity). */
29220
29221 int df_exponent = (value >> 52) & 0x7ff;
29222 unsigned HOST_WIDE_INT
29223 df_mantissa = value & ((HOST_WIDE_INT_1U << 52) - HOST_WIDE_INT_1U);
29224
29225 if (df_exponent == 0x7ff && df_mantissa != 0) /* other NaNs. */
29226 return 0;
29227
29228 /* Avoid values that are DFmode subnormal values. Subnormal numbers have
29229 the exponent all 0 bits, and the mantissa non-zero. If the value is
29230 subnormal, then the hidden bit in the mantissa is not set. */
29231 if (df_exponent == 0 && df_mantissa != 0) /* subnormal. */
29232 return 0;
29233 }
29234
29235 /* Change the representation to DFmode constant. */
29236 long df_words[2] = { vsx_const->words[0], vsx_const->words[1] };
29237
29238 /* real_from_target takes the target words in target order. */
29239 if (!BYTES_BIG_ENDIAN)
29240 std::swap (df_words[0], df_words[1]);
29241
29242 REAL_VALUE_TYPE rv_type;
29243 real_from_target (&rv_type, df_words, DFmode);
29244
29245 const REAL_VALUE_TYPE *rv = &rv_type;
29246
29247 /* Validate that the number can be stored as a SFmode value. */
29248 if (!exact_real_truncate (SFmode, rv))
29249 return 0;
29250
29251 /* Validate that the number is not a SFmode subnormal value (exponent is 0,
29252 mantissa field is non-zero) which is undefined for the XXSPLTIDP
29253 instruction. */
29254 long sf_value;
29255 real_to_target (&sf_value, rv, SFmode);
29256
29257 /* IEEE 754 32-bit values have 1 bit for the sign, 8 bits for the exponent,
29258 and 23 bits for the mantissa. Subnormal numbers have the exponent all
29259 0 bits, and the mantissa non-zero. */
29260 long sf_exponent = (sf_value >> 23) & 0xFF;
29261 long sf_mantissa = sf_value & 0x7FFFFF;
29262
29263 if (sf_exponent == 0 && sf_mantissa != 0)
29264 return 0;
29265
29266 /* Return the immediate to be used. */
29267 return sf_value;
29268 }
29269
29270 /* Now we have only two opaque types, they are __vector_quad and
29271 __vector_pair built-in types. They are target specific and
29272 only available when MMA is supported. With MMA supported, it
29273 simply returns true, otherwise it checks if the given gimple
29274 STMT is an assignment, asm or call stmt and uses either of
29275 these two opaque types unexpectedly, if yes, it would raise
29276 an error message and returns true, otherwise it returns false. */
29277
29278 bool
29279 rs6000_opaque_type_invalid_use_p (gimple *stmt)
29280 {
29281 if (TARGET_MMA)
29282 return false;
29283
29284 /* If the given TYPE is one MMA opaque type, emit the corresponding
29285 error messages and return true, otherwise return false. */
29286 auto check_and_error_invalid_use = [](tree type)
29287 {
29288 tree mv = TYPE_MAIN_VARIANT (type);
29289 if (mv == vector_quad_type_node)
29290 {
29291 error ("type %<__vector_quad%> requires the %qs option", "-mmma");
29292 return true;
29293 }
29294 else if (mv == vector_pair_type_node)
29295 {
29296 error ("type %<__vector_pair%> requires the %qs option", "-mmma");
29297 return true;
29298 }
29299 return false;
29300 };
29301
29302 if (stmt)
29303 {
29304 /* The usage of MMA opaque types is very limited for now,
29305 to check with gassign, gasm and gcall is enough so far. */
29306 if (gassign *ga = dyn_cast<gassign *> (stmt))
29307 {
29308 tree lhs = gimple_assign_lhs (ga);
29309 tree type = TREE_TYPE (lhs);
29310 if (check_and_error_invalid_use (type))
29311 return true;
29312 }
29313 else if (gasm *gs = dyn_cast<gasm *> (stmt))
29314 {
29315 unsigned ninputs = gimple_asm_ninputs (gs);
29316 for (unsigned i = 0; i < ninputs; i++)
29317 {
29318 tree op = gimple_asm_input_op (gs, i);
29319 tree val = TREE_VALUE (op);
29320 tree type = TREE_TYPE (val);
29321 if (check_and_error_invalid_use (type))
29322 return true;
29323 }
29324 unsigned noutputs = gimple_asm_noutputs (gs);
29325 for (unsigned i = 0; i < noutputs; i++)
29326 {
29327 tree op = gimple_asm_output_op (gs, i);
29328 tree val = TREE_VALUE (op);
29329 tree type = TREE_TYPE (val);
29330 if (check_and_error_invalid_use (type))
29331 return true;
29332 }
29333 }
29334 else if (gcall *gc = dyn_cast<gcall *> (stmt))
29335 {
29336 unsigned nargs = gimple_call_num_args (gc);
29337 for (unsigned i = 0; i < nargs; i++)
29338 {
29339 tree arg = gimple_call_arg (gc, i);
29340 tree type = TREE_TYPE (arg);
29341 if (check_and_error_invalid_use (type))
29342 return true;
29343 }
29344 }
29345 }
29346
29347 return false;
29348 }
29349
29350 struct gcc_target targetm = TARGET_INITIALIZER;
29351
29352 #include "gt-rs6000.h"