]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/rs6000/rs6000.c
gimple-walk.h: New File.
[thirdparty/gcc.git] / gcc / config / rs6000 / rs6000.c
1 /* Subroutines used for code generation on IBM RS/6000.
2 Copyright (C) 1991-2013 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "rtl.h"
26 #include "regs.h"
27 #include "hard-reg-set.h"
28 #include "insn-config.h"
29 #include "conditions.h"
30 #include "insn-attr.h"
31 #include "flags.h"
32 #include "recog.h"
33 #include "obstack.h"
34 #include "tree.h"
35 #include "expr.h"
36 #include "optabs.h"
37 #include "except.h"
38 #include "function.h"
39 #include "output.h"
40 #include "dbxout.h"
41 #include "basic-block.h"
42 #include "diagnostic-core.h"
43 #include "toplev.h"
44 #include "ggc.h"
45 #include "hashtab.h"
46 #include "tm_p.h"
47 #include "target.h"
48 #include "target-def.h"
49 #include "common/common-target.h"
50 #include "langhooks.h"
51 #include "reload.h"
52 #include "cfgloop.h"
53 #include "sched-int.h"
54 #include "gimplify.h"
55 #include "gimple-iterator.h"
56 #include "gimple-walk.h"
57 #include "intl.h"
58 #include "params.h"
59 #include "tm-constrs.h"
60 #include "ira.h"
61 #include "opts.h"
62 #include "tree-vectorizer.h"
63 #include "dumpfile.h"
64 #include "cgraph.h"
65 #if TARGET_XCOFF
66 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
67 #endif
68 #if TARGET_MACHO
69 #include "gstab.h" /* for N_SLINE */
70 #endif
71
72 #ifndef TARGET_NO_PROTOTYPE
73 #define TARGET_NO_PROTOTYPE 0
74 #endif
75
76 #define min(A,B) ((A) < (B) ? (A) : (B))
77 #define max(A,B) ((A) > (B) ? (A) : (B))
78
79 /* Structure used to define the rs6000 stack */
80 typedef struct rs6000_stack {
81 int reload_completed; /* stack info won't change from here on */
82 int first_gp_reg_save; /* first callee saved GP register used */
83 int first_fp_reg_save; /* first callee saved FP register used */
84 int first_altivec_reg_save; /* first callee saved AltiVec register used */
85 int lr_save_p; /* true if the link reg needs to be saved */
86 int cr_save_p; /* true if the CR reg needs to be saved */
87 unsigned int vrsave_mask; /* mask of vec registers to save */
88 int push_p; /* true if we need to allocate stack space */
89 int calls_p; /* true if the function makes any calls */
90 int world_save_p; /* true if we're saving *everything*:
91 r13-r31, cr, f14-f31, vrsave, v20-v31 */
92 enum rs6000_abi abi; /* which ABI to use */
93 int gp_save_offset; /* offset to save GP regs from initial SP */
94 int fp_save_offset; /* offset to save FP regs from initial SP */
95 int altivec_save_offset; /* offset to save AltiVec regs from initial SP */
96 int lr_save_offset; /* offset to save LR from initial SP */
97 int cr_save_offset; /* offset to save CR from initial SP */
98 int vrsave_save_offset; /* offset to save VRSAVE from initial SP */
99 int spe_gp_save_offset; /* offset to save spe 64-bit gprs */
100 int varargs_save_offset; /* offset to save the varargs registers */
101 int ehrd_offset; /* offset to EH return data */
102 int reg_size; /* register size (4 or 8) */
103 HOST_WIDE_INT vars_size; /* variable save area size */
104 int parm_size; /* outgoing parameter size */
105 int save_size; /* save area size */
106 int fixed_size; /* fixed size of stack frame */
107 int gp_size; /* size of saved GP registers */
108 int fp_size; /* size of saved FP registers */
109 int altivec_size; /* size of saved AltiVec registers */
110 int cr_size; /* size to hold CR if not in save_size */
111 int vrsave_size; /* size to hold VRSAVE if not in save_size */
112 int altivec_padding_size; /* size of altivec alignment padding if
113 not in save_size */
114 int spe_gp_size; /* size of 64-bit GPR save size for SPE */
115 int spe_padding_size;
116 HOST_WIDE_INT total_size; /* total bytes allocated for stack */
117 int spe_64bit_regs_used;
118 int savres_strategy;
119 } rs6000_stack_t;
120
121 /* A C structure for machine-specific, per-function data.
122 This is added to the cfun structure. */
123 typedef struct GTY(()) machine_function
124 {
125 /* Some local-dynamic symbol. */
126 const char *some_ld_name;
127 /* Whether the instruction chain has been scanned already. */
128 int insn_chain_scanned_p;
129 /* Flags if __builtin_return_address (n) with n >= 1 was used. */
130 int ra_needs_full_frame;
131 /* Flags if __builtin_return_address (0) was used. */
132 int ra_need_lr;
133 /* Cache lr_save_p after expansion of builtin_eh_return. */
134 int lr_save_state;
135 /* Whether we need to save the TOC to the reserved stack location in the
136 function prologue. */
137 bool save_toc_in_prologue;
138 /* Offset from virtual_stack_vars_rtx to the start of the ABI_V4
139 varargs save area. */
140 HOST_WIDE_INT varargs_save_offset;
141 /* Temporary stack slot to use for SDmode copies. This slot is
142 64-bits wide and is allocated early enough so that the offset
143 does not overflow the 16-bit load/store offset field. */
144 rtx sdmode_stack_slot;
145 } machine_function;
146
147 /* Support targetm.vectorize.builtin_mask_for_load. */
148 static GTY(()) tree altivec_builtin_mask_for_load;
149
150 /* Set to nonzero once AIX common-mode calls have been defined. */
151 static GTY(()) int common_mode_defined;
152
153 /* Label number of label created for -mrelocatable, to call to so we can
154 get the address of the GOT section */
155 static int rs6000_pic_labelno;
156
157 #ifdef USING_ELFOS_H
158 /* Counter for labels which are to be placed in .fixup. */
159 int fixuplabelno = 0;
160 #endif
161
162 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
163 int dot_symbols;
164
165 /* Specify the machine mode that pointers have. After generation of rtl, the
166 compiler makes no further distinction between pointers and any other objects
167 of this machine mode. The type is unsigned since not all things that
168 include rs6000.h also include machmode.h. */
169 unsigned rs6000_pmode;
170
171 /* Width in bits of a pointer. */
172 unsigned rs6000_pointer_size;
173
174 #ifdef HAVE_AS_GNU_ATTRIBUTE
175 /* Flag whether floating point values have been passed/returned. */
176 static bool rs6000_passes_float;
177 /* Flag whether vector values have been passed/returned. */
178 static bool rs6000_passes_vector;
179 /* Flag whether small (<= 8 byte) structures have been returned. */
180 static bool rs6000_returns_struct;
181 #endif
182
183 /* Value is TRUE if register/mode pair is acceptable. */
184 bool rs6000_hard_regno_mode_ok_p[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
185
186 /* Maximum number of registers needed for a given register class and mode. */
187 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
188
189 /* How many registers are needed for a given register and mode. */
190 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
191
192 /* Map register number to register class. */
193 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
194
195 static int dbg_cost_ctrl;
196
197 /* Built in types. */
198 tree rs6000_builtin_types[RS6000_BTI_MAX];
199 tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT];
200
201 /* Flag to say the TOC is initialized */
202 int toc_initialized;
203 char toc_label_name[10];
204
205 /* Cached value of rs6000_variable_issue. This is cached in
206 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
207 static short cached_can_issue_more;
208
209 static GTY(()) section *read_only_data_section;
210 static GTY(()) section *private_data_section;
211 static GTY(()) section *tls_data_section;
212 static GTY(()) section *tls_private_data_section;
213 static GTY(()) section *read_only_private_data_section;
214 static GTY(()) section *sdata2_section;
215 static GTY(()) section *toc_section;
216
217 struct builtin_description
218 {
219 const HOST_WIDE_INT mask;
220 const enum insn_code icode;
221 const char *const name;
222 const enum rs6000_builtins code;
223 };
224
225 /* Describe the vector unit used for modes. */
226 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
227 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
228
229 /* Register classes for various constraints that are based on the target
230 switches. */
231 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
232
233 /* Describe the alignment of a vector. */
234 int rs6000_vector_align[NUM_MACHINE_MODES];
235
236 /* Map selected modes to types for builtins. */
237 static GTY(()) tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
238
239 /* What modes to automatically generate reciprocal divide estimate (fre) and
240 reciprocal sqrt (frsqrte) for. */
241 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
242
243 /* Masks to determine which reciprocal esitmate instructions to generate
244 automatically. */
245 enum rs6000_recip_mask {
246 RECIP_SF_DIV = 0x001, /* Use divide estimate */
247 RECIP_DF_DIV = 0x002,
248 RECIP_V4SF_DIV = 0x004,
249 RECIP_V2DF_DIV = 0x008,
250
251 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
252 RECIP_DF_RSQRT = 0x020,
253 RECIP_V4SF_RSQRT = 0x040,
254 RECIP_V2DF_RSQRT = 0x080,
255
256 /* Various combination of flags for -mrecip=xxx. */
257 RECIP_NONE = 0,
258 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
259 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
260 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
261
262 RECIP_HIGH_PRECISION = RECIP_ALL,
263
264 /* On low precision machines like the power5, don't enable double precision
265 reciprocal square root estimate, since it isn't accurate enough. */
266 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
267 };
268
269 /* -mrecip options. */
270 static struct
271 {
272 const char *string; /* option name */
273 unsigned int mask; /* mask bits to set */
274 } recip_options[] = {
275 { "all", RECIP_ALL },
276 { "none", RECIP_NONE },
277 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
278 | RECIP_V2DF_DIV) },
279 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
280 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
281 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
282 | RECIP_V2DF_RSQRT) },
283 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
284 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
285 };
286
287 /* Pointer to function (in rs6000-c.c) that can define or undefine target
288 macros that have changed. Languages that don't support the preprocessor
289 don't link in rs6000-c.c, so we can't call it directly. */
290 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
291
292 /* Simplfy register classes into simpler classifications. We assume
293 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
294 check for standard register classes (gpr/floating/altivec/vsx) and
295 floating/vector classes (float/altivec/vsx). */
296
297 enum rs6000_reg_type {
298 NO_REG_TYPE,
299 PSEUDO_REG_TYPE,
300 GPR_REG_TYPE,
301 VSX_REG_TYPE,
302 ALTIVEC_REG_TYPE,
303 FPR_REG_TYPE,
304 SPR_REG_TYPE,
305 CR_REG_TYPE,
306 SPE_ACC_TYPE,
307 SPEFSCR_REG_TYPE
308 };
309
310 /* Map register class to register type. */
311 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
312
313 /* First/last register type for the 'normal' register types (i.e. general
314 purpose, floating point, altivec, and VSX registers). */
315 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
316
317 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
318
319
320 /* Register classes we care about in secondary reload or go if legitimate
321 address. We only need to worry about GPR, FPR, and Altivec registers here,
322 along an ANY field that is the OR of the 3 register classes. */
323
324 enum rs6000_reload_reg_type {
325 RELOAD_REG_GPR, /* General purpose registers. */
326 RELOAD_REG_FPR, /* Traditional floating point regs. */
327 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
328 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
329 N_RELOAD_REG
330 };
331
332 /* For setting up register classes, loop through the 3 register classes mapping
333 into real registers, and skip the ANY class, which is just an OR of the
334 bits. */
335 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
336 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
337
338 /* Map reload register type to a register in the register class. */
339 struct reload_reg_map_type {
340 const char *name; /* Register class name. */
341 int reg; /* Register in the register class. */
342 };
343
344 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
345 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
346 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
347 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
348 { "Any", -1 }, /* RELOAD_REG_ANY. */
349 };
350
351 /* Mask bits for each register class, indexed per mode. Historically the
352 compiler has been more restrictive which types can do PRE_MODIFY instead of
353 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
354 typedef unsigned char addr_mask_type;
355
356 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
357 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
358 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
359 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
360 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
361 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
362
363 /* Register type masks based on the type, of valid addressing modes. */
364 struct rs6000_reg_addr {
365 enum insn_code reload_load; /* INSN to reload for loading. */
366 enum insn_code reload_store; /* INSN to reload for storing. */
367 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
368 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
369 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
370 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
371 };
372
373 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
374
375 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
376 static inline bool
377 mode_supports_pre_incdec_p (enum machine_mode mode)
378 {
379 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
380 != 0);
381 }
382
383 /* Helper function to say whether a mode supports PRE_MODIFY. */
384 static inline bool
385 mode_supports_pre_modify_p (enum machine_mode mode)
386 {
387 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
388 != 0);
389 }
390
391 \f
392 /* Target cpu costs. */
393
394 struct processor_costs {
395 const int mulsi; /* cost of SImode multiplication. */
396 const int mulsi_const; /* cost of SImode multiplication by constant. */
397 const int mulsi_const9; /* cost of SImode mult by short constant. */
398 const int muldi; /* cost of DImode multiplication. */
399 const int divsi; /* cost of SImode division. */
400 const int divdi; /* cost of DImode division. */
401 const int fp; /* cost of simple SFmode and DFmode insns. */
402 const int dmul; /* cost of DFmode multiplication (and fmadd). */
403 const int sdiv; /* cost of SFmode division (fdivs). */
404 const int ddiv; /* cost of DFmode division (fdiv). */
405 const int cache_line_size; /* cache line size in bytes. */
406 const int l1_cache_size; /* size of l1 cache, in kilobytes. */
407 const int l2_cache_size; /* size of l2 cache, in kilobytes. */
408 const int simultaneous_prefetches; /* number of parallel prefetch
409 operations. */
410 };
411
412 const struct processor_costs *rs6000_cost;
413
414 /* Processor costs (relative to an add) */
415
416 /* Instruction size costs on 32bit processors. */
417 static const
418 struct processor_costs size32_cost = {
419 COSTS_N_INSNS (1), /* mulsi */
420 COSTS_N_INSNS (1), /* mulsi_const */
421 COSTS_N_INSNS (1), /* mulsi_const9 */
422 COSTS_N_INSNS (1), /* muldi */
423 COSTS_N_INSNS (1), /* divsi */
424 COSTS_N_INSNS (1), /* divdi */
425 COSTS_N_INSNS (1), /* fp */
426 COSTS_N_INSNS (1), /* dmul */
427 COSTS_N_INSNS (1), /* sdiv */
428 COSTS_N_INSNS (1), /* ddiv */
429 32,
430 0,
431 0,
432 0,
433 };
434
435 /* Instruction size costs on 64bit processors. */
436 static const
437 struct processor_costs size64_cost = {
438 COSTS_N_INSNS (1), /* mulsi */
439 COSTS_N_INSNS (1), /* mulsi_const */
440 COSTS_N_INSNS (1), /* mulsi_const9 */
441 COSTS_N_INSNS (1), /* muldi */
442 COSTS_N_INSNS (1), /* divsi */
443 COSTS_N_INSNS (1), /* divdi */
444 COSTS_N_INSNS (1), /* fp */
445 COSTS_N_INSNS (1), /* dmul */
446 COSTS_N_INSNS (1), /* sdiv */
447 COSTS_N_INSNS (1), /* ddiv */
448 128,
449 0,
450 0,
451 0,
452 };
453
454 /* Instruction costs on RS64A processors. */
455 static const
456 struct processor_costs rs64a_cost = {
457 COSTS_N_INSNS (20), /* mulsi */
458 COSTS_N_INSNS (12), /* mulsi_const */
459 COSTS_N_INSNS (8), /* mulsi_const9 */
460 COSTS_N_INSNS (34), /* muldi */
461 COSTS_N_INSNS (65), /* divsi */
462 COSTS_N_INSNS (67), /* divdi */
463 COSTS_N_INSNS (4), /* fp */
464 COSTS_N_INSNS (4), /* dmul */
465 COSTS_N_INSNS (31), /* sdiv */
466 COSTS_N_INSNS (31), /* ddiv */
467 128, /* cache line size */
468 128, /* l1 cache */
469 2048, /* l2 cache */
470 1, /* streams */
471 };
472
473 /* Instruction costs on MPCCORE processors. */
474 static const
475 struct processor_costs mpccore_cost = {
476 COSTS_N_INSNS (2), /* mulsi */
477 COSTS_N_INSNS (2), /* mulsi_const */
478 COSTS_N_INSNS (2), /* mulsi_const9 */
479 COSTS_N_INSNS (2), /* muldi */
480 COSTS_N_INSNS (6), /* divsi */
481 COSTS_N_INSNS (6), /* divdi */
482 COSTS_N_INSNS (4), /* fp */
483 COSTS_N_INSNS (5), /* dmul */
484 COSTS_N_INSNS (10), /* sdiv */
485 COSTS_N_INSNS (17), /* ddiv */
486 32, /* cache line size */
487 4, /* l1 cache */
488 16, /* l2 cache */
489 1, /* streams */
490 };
491
492 /* Instruction costs on PPC403 processors. */
493 static const
494 struct processor_costs ppc403_cost = {
495 COSTS_N_INSNS (4), /* mulsi */
496 COSTS_N_INSNS (4), /* mulsi_const */
497 COSTS_N_INSNS (4), /* mulsi_const9 */
498 COSTS_N_INSNS (4), /* muldi */
499 COSTS_N_INSNS (33), /* divsi */
500 COSTS_N_INSNS (33), /* divdi */
501 COSTS_N_INSNS (11), /* fp */
502 COSTS_N_INSNS (11), /* dmul */
503 COSTS_N_INSNS (11), /* sdiv */
504 COSTS_N_INSNS (11), /* ddiv */
505 32, /* cache line size */
506 4, /* l1 cache */
507 16, /* l2 cache */
508 1, /* streams */
509 };
510
511 /* Instruction costs on PPC405 processors. */
512 static const
513 struct processor_costs ppc405_cost = {
514 COSTS_N_INSNS (5), /* mulsi */
515 COSTS_N_INSNS (4), /* mulsi_const */
516 COSTS_N_INSNS (3), /* mulsi_const9 */
517 COSTS_N_INSNS (5), /* muldi */
518 COSTS_N_INSNS (35), /* divsi */
519 COSTS_N_INSNS (35), /* divdi */
520 COSTS_N_INSNS (11), /* fp */
521 COSTS_N_INSNS (11), /* dmul */
522 COSTS_N_INSNS (11), /* sdiv */
523 COSTS_N_INSNS (11), /* ddiv */
524 32, /* cache line size */
525 16, /* l1 cache */
526 128, /* l2 cache */
527 1, /* streams */
528 };
529
530 /* Instruction costs on PPC440 processors. */
531 static const
532 struct processor_costs ppc440_cost = {
533 COSTS_N_INSNS (3), /* mulsi */
534 COSTS_N_INSNS (2), /* mulsi_const */
535 COSTS_N_INSNS (2), /* mulsi_const9 */
536 COSTS_N_INSNS (3), /* muldi */
537 COSTS_N_INSNS (34), /* divsi */
538 COSTS_N_INSNS (34), /* divdi */
539 COSTS_N_INSNS (5), /* fp */
540 COSTS_N_INSNS (5), /* dmul */
541 COSTS_N_INSNS (19), /* sdiv */
542 COSTS_N_INSNS (33), /* ddiv */
543 32, /* cache line size */
544 32, /* l1 cache */
545 256, /* l2 cache */
546 1, /* streams */
547 };
548
549 /* Instruction costs on PPC476 processors. */
550 static const
551 struct processor_costs ppc476_cost = {
552 COSTS_N_INSNS (4), /* mulsi */
553 COSTS_N_INSNS (4), /* mulsi_const */
554 COSTS_N_INSNS (4), /* mulsi_const9 */
555 COSTS_N_INSNS (4), /* muldi */
556 COSTS_N_INSNS (11), /* divsi */
557 COSTS_N_INSNS (11), /* divdi */
558 COSTS_N_INSNS (6), /* fp */
559 COSTS_N_INSNS (6), /* dmul */
560 COSTS_N_INSNS (19), /* sdiv */
561 COSTS_N_INSNS (33), /* ddiv */
562 32, /* l1 cache line size */
563 32, /* l1 cache */
564 512, /* l2 cache */
565 1, /* streams */
566 };
567
568 /* Instruction costs on PPC601 processors. */
569 static const
570 struct processor_costs ppc601_cost = {
571 COSTS_N_INSNS (5), /* mulsi */
572 COSTS_N_INSNS (5), /* mulsi_const */
573 COSTS_N_INSNS (5), /* mulsi_const9 */
574 COSTS_N_INSNS (5), /* muldi */
575 COSTS_N_INSNS (36), /* divsi */
576 COSTS_N_INSNS (36), /* divdi */
577 COSTS_N_INSNS (4), /* fp */
578 COSTS_N_INSNS (5), /* dmul */
579 COSTS_N_INSNS (17), /* sdiv */
580 COSTS_N_INSNS (31), /* ddiv */
581 32, /* cache line size */
582 32, /* l1 cache */
583 256, /* l2 cache */
584 1, /* streams */
585 };
586
587 /* Instruction costs on PPC603 processors. */
588 static const
589 struct processor_costs ppc603_cost = {
590 COSTS_N_INSNS (5), /* mulsi */
591 COSTS_N_INSNS (3), /* mulsi_const */
592 COSTS_N_INSNS (2), /* mulsi_const9 */
593 COSTS_N_INSNS (5), /* muldi */
594 COSTS_N_INSNS (37), /* divsi */
595 COSTS_N_INSNS (37), /* divdi */
596 COSTS_N_INSNS (3), /* fp */
597 COSTS_N_INSNS (4), /* dmul */
598 COSTS_N_INSNS (18), /* sdiv */
599 COSTS_N_INSNS (33), /* ddiv */
600 32, /* cache line size */
601 8, /* l1 cache */
602 64, /* l2 cache */
603 1, /* streams */
604 };
605
606 /* Instruction costs on PPC604 processors. */
607 static const
608 struct processor_costs ppc604_cost = {
609 COSTS_N_INSNS (4), /* mulsi */
610 COSTS_N_INSNS (4), /* mulsi_const */
611 COSTS_N_INSNS (4), /* mulsi_const9 */
612 COSTS_N_INSNS (4), /* muldi */
613 COSTS_N_INSNS (20), /* divsi */
614 COSTS_N_INSNS (20), /* divdi */
615 COSTS_N_INSNS (3), /* fp */
616 COSTS_N_INSNS (3), /* dmul */
617 COSTS_N_INSNS (18), /* sdiv */
618 COSTS_N_INSNS (32), /* ddiv */
619 32, /* cache line size */
620 16, /* l1 cache */
621 512, /* l2 cache */
622 1, /* streams */
623 };
624
625 /* Instruction costs on PPC604e processors. */
626 static const
627 struct processor_costs ppc604e_cost = {
628 COSTS_N_INSNS (2), /* mulsi */
629 COSTS_N_INSNS (2), /* mulsi_const */
630 COSTS_N_INSNS (2), /* mulsi_const9 */
631 COSTS_N_INSNS (2), /* muldi */
632 COSTS_N_INSNS (20), /* divsi */
633 COSTS_N_INSNS (20), /* divdi */
634 COSTS_N_INSNS (3), /* fp */
635 COSTS_N_INSNS (3), /* dmul */
636 COSTS_N_INSNS (18), /* sdiv */
637 COSTS_N_INSNS (32), /* ddiv */
638 32, /* cache line size */
639 32, /* l1 cache */
640 1024, /* l2 cache */
641 1, /* streams */
642 };
643
644 /* Instruction costs on PPC620 processors. */
645 static const
646 struct processor_costs ppc620_cost = {
647 COSTS_N_INSNS (5), /* mulsi */
648 COSTS_N_INSNS (4), /* mulsi_const */
649 COSTS_N_INSNS (3), /* mulsi_const9 */
650 COSTS_N_INSNS (7), /* muldi */
651 COSTS_N_INSNS (21), /* divsi */
652 COSTS_N_INSNS (37), /* divdi */
653 COSTS_N_INSNS (3), /* fp */
654 COSTS_N_INSNS (3), /* dmul */
655 COSTS_N_INSNS (18), /* sdiv */
656 COSTS_N_INSNS (32), /* ddiv */
657 128, /* cache line size */
658 32, /* l1 cache */
659 1024, /* l2 cache */
660 1, /* streams */
661 };
662
663 /* Instruction costs on PPC630 processors. */
664 static const
665 struct processor_costs ppc630_cost = {
666 COSTS_N_INSNS (5), /* mulsi */
667 COSTS_N_INSNS (4), /* mulsi_const */
668 COSTS_N_INSNS (3), /* mulsi_const9 */
669 COSTS_N_INSNS (7), /* muldi */
670 COSTS_N_INSNS (21), /* divsi */
671 COSTS_N_INSNS (37), /* divdi */
672 COSTS_N_INSNS (3), /* fp */
673 COSTS_N_INSNS (3), /* dmul */
674 COSTS_N_INSNS (17), /* sdiv */
675 COSTS_N_INSNS (21), /* ddiv */
676 128, /* cache line size */
677 64, /* l1 cache */
678 1024, /* l2 cache */
679 1, /* streams */
680 };
681
682 /* Instruction costs on Cell processor. */
683 /* COSTS_N_INSNS (1) ~ one add. */
684 static const
685 struct processor_costs ppccell_cost = {
686 COSTS_N_INSNS (9/2)+2, /* mulsi */
687 COSTS_N_INSNS (6/2), /* mulsi_const */
688 COSTS_N_INSNS (6/2), /* mulsi_const9 */
689 COSTS_N_INSNS (15/2)+2, /* muldi */
690 COSTS_N_INSNS (38/2), /* divsi */
691 COSTS_N_INSNS (70/2), /* divdi */
692 COSTS_N_INSNS (10/2), /* fp */
693 COSTS_N_INSNS (10/2), /* dmul */
694 COSTS_N_INSNS (74/2), /* sdiv */
695 COSTS_N_INSNS (74/2), /* ddiv */
696 128, /* cache line size */
697 32, /* l1 cache */
698 512, /* l2 cache */
699 6, /* streams */
700 };
701
702 /* Instruction costs on PPC750 and PPC7400 processors. */
703 static const
704 struct processor_costs ppc750_cost = {
705 COSTS_N_INSNS (5), /* mulsi */
706 COSTS_N_INSNS (3), /* mulsi_const */
707 COSTS_N_INSNS (2), /* mulsi_const9 */
708 COSTS_N_INSNS (5), /* muldi */
709 COSTS_N_INSNS (17), /* divsi */
710 COSTS_N_INSNS (17), /* divdi */
711 COSTS_N_INSNS (3), /* fp */
712 COSTS_N_INSNS (3), /* dmul */
713 COSTS_N_INSNS (17), /* sdiv */
714 COSTS_N_INSNS (31), /* ddiv */
715 32, /* cache line size */
716 32, /* l1 cache */
717 512, /* l2 cache */
718 1, /* streams */
719 };
720
721 /* Instruction costs on PPC7450 processors. */
722 static const
723 struct processor_costs ppc7450_cost = {
724 COSTS_N_INSNS (4), /* mulsi */
725 COSTS_N_INSNS (3), /* mulsi_const */
726 COSTS_N_INSNS (3), /* mulsi_const9 */
727 COSTS_N_INSNS (4), /* muldi */
728 COSTS_N_INSNS (23), /* divsi */
729 COSTS_N_INSNS (23), /* divdi */
730 COSTS_N_INSNS (5), /* fp */
731 COSTS_N_INSNS (5), /* dmul */
732 COSTS_N_INSNS (21), /* sdiv */
733 COSTS_N_INSNS (35), /* ddiv */
734 32, /* cache line size */
735 32, /* l1 cache */
736 1024, /* l2 cache */
737 1, /* streams */
738 };
739
740 /* Instruction costs on PPC8540 processors. */
741 static const
742 struct processor_costs ppc8540_cost = {
743 COSTS_N_INSNS (4), /* mulsi */
744 COSTS_N_INSNS (4), /* mulsi_const */
745 COSTS_N_INSNS (4), /* mulsi_const9 */
746 COSTS_N_INSNS (4), /* muldi */
747 COSTS_N_INSNS (19), /* divsi */
748 COSTS_N_INSNS (19), /* divdi */
749 COSTS_N_INSNS (4), /* fp */
750 COSTS_N_INSNS (4), /* dmul */
751 COSTS_N_INSNS (29), /* sdiv */
752 COSTS_N_INSNS (29), /* ddiv */
753 32, /* cache line size */
754 32, /* l1 cache */
755 256, /* l2 cache */
756 1, /* prefetch streams /*/
757 };
758
759 /* Instruction costs on E300C2 and E300C3 cores. */
760 static const
761 struct processor_costs ppce300c2c3_cost = {
762 COSTS_N_INSNS (4), /* mulsi */
763 COSTS_N_INSNS (4), /* mulsi_const */
764 COSTS_N_INSNS (4), /* mulsi_const9 */
765 COSTS_N_INSNS (4), /* muldi */
766 COSTS_N_INSNS (19), /* divsi */
767 COSTS_N_INSNS (19), /* divdi */
768 COSTS_N_INSNS (3), /* fp */
769 COSTS_N_INSNS (4), /* dmul */
770 COSTS_N_INSNS (18), /* sdiv */
771 COSTS_N_INSNS (33), /* ddiv */
772 32,
773 16, /* l1 cache */
774 16, /* l2 cache */
775 1, /* prefetch streams /*/
776 };
777
778 /* Instruction costs on PPCE500MC processors. */
779 static const
780 struct processor_costs ppce500mc_cost = {
781 COSTS_N_INSNS (4), /* mulsi */
782 COSTS_N_INSNS (4), /* mulsi_const */
783 COSTS_N_INSNS (4), /* mulsi_const9 */
784 COSTS_N_INSNS (4), /* muldi */
785 COSTS_N_INSNS (14), /* divsi */
786 COSTS_N_INSNS (14), /* divdi */
787 COSTS_N_INSNS (8), /* fp */
788 COSTS_N_INSNS (10), /* dmul */
789 COSTS_N_INSNS (36), /* sdiv */
790 COSTS_N_INSNS (66), /* ddiv */
791 64, /* cache line size */
792 32, /* l1 cache */
793 128, /* l2 cache */
794 1, /* prefetch streams /*/
795 };
796
797 /* Instruction costs on PPCE500MC64 processors. */
798 static const
799 struct processor_costs ppce500mc64_cost = {
800 COSTS_N_INSNS (4), /* mulsi */
801 COSTS_N_INSNS (4), /* mulsi_const */
802 COSTS_N_INSNS (4), /* mulsi_const9 */
803 COSTS_N_INSNS (4), /* muldi */
804 COSTS_N_INSNS (14), /* divsi */
805 COSTS_N_INSNS (14), /* divdi */
806 COSTS_N_INSNS (4), /* fp */
807 COSTS_N_INSNS (10), /* dmul */
808 COSTS_N_INSNS (36), /* sdiv */
809 COSTS_N_INSNS (66), /* ddiv */
810 64, /* cache line size */
811 32, /* l1 cache */
812 128, /* l2 cache */
813 1, /* prefetch streams /*/
814 };
815
816 /* Instruction costs on PPCE5500 processors. */
817 static const
818 struct processor_costs ppce5500_cost = {
819 COSTS_N_INSNS (5), /* mulsi */
820 COSTS_N_INSNS (5), /* mulsi_const */
821 COSTS_N_INSNS (4), /* mulsi_const9 */
822 COSTS_N_INSNS (5), /* muldi */
823 COSTS_N_INSNS (14), /* divsi */
824 COSTS_N_INSNS (14), /* divdi */
825 COSTS_N_INSNS (7), /* fp */
826 COSTS_N_INSNS (10), /* dmul */
827 COSTS_N_INSNS (36), /* sdiv */
828 COSTS_N_INSNS (66), /* ddiv */
829 64, /* cache line size */
830 32, /* l1 cache */
831 128, /* l2 cache */
832 1, /* prefetch streams /*/
833 };
834
835 /* Instruction costs on PPCE6500 processors. */
836 static const
837 struct processor_costs ppce6500_cost = {
838 COSTS_N_INSNS (5), /* mulsi */
839 COSTS_N_INSNS (5), /* mulsi_const */
840 COSTS_N_INSNS (4), /* mulsi_const9 */
841 COSTS_N_INSNS (5), /* muldi */
842 COSTS_N_INSNS (14), /* divsi */
843 COSTS_N_INSNS (14), /* divdi */
844 COSTS_N_INSNS (7), /* fp */
845 COSTS_N_INSNS (10), /* dmul */
846 COSTS_N_INSNS (36), /* sdiv */
847 COSTS_N_INSNS (66), /* ddiv */
848 64, /* cache line size */
849 32, /* l1 cache */
850 128, /* l2 cache */
851 1, /* prefetch streams /*/
852 };
853
854 /* Instruction costs on AppliedMicro Titan processors. */
855 static const
856 struct processor_costs titan_cost = {
857 COSTS_N_INSNS (5), /* mulsi */
858 COSTS_N_INSNS (5), /* mulsi_const */
859 COSTS_N_INSNS (5), /* mulsi_const9 */
860 COSTS_N_INSNS (5), /* muldi */
861 COSTS_N_INSNS (18), /* divsi */
862 COSTS_N_INSNS (18), /* divdi */
863 COSTS_N_INSNS (10), /* fp */
864 COSTS_N_INSNS (10), /* dmul */
865 COSTS_N_INSNS (46), /* sdiv */
866 COSTS_N_INSNS (72), /* ddiv */
867 32, /* cache line size */
868 32, /* l1 cache */
869 512, /* l2 cache */
870 1, /* prefetch streams /*/
871 };
872
873 /* Instruction costs on POWER4 and POWER5 processors. */
874 static const
875 struct processor_costs power4_cost = {
876 COSTS_N_INSNS (3), /* mulsi */
877 COSTS_N_INSNS (2), /* mulsi_const */
878 COSTS_N_INSNS (2), /* mulsi_const9 */
879 COSTS_N_INSNS (4), /* muldi */
880 COSTS_N_INSNS (18), /* divsi */
881 COSTS_N_INSNS (34), /* divdi */
882 COSTS_N_INSNS (3), /* fp */
883 COSTS_N_INSNS (3), /* dmul */
884 COSTS_N_INSNS (17), /* sdiv */
885 COSTS_N_INSNS (17), /* ddiv */
886 128, /* cache line size */
887 32, /* l1 cache */
888 1024, /* l2 cache */
889 8, /* prefetch streams /*/
890 };
891
892 /* Instruction costs on POWER6 processors. */
893 static const
894 struct processor_costs power6_cost = {
895 COSTS_N_INSNS (8), /* mulsi */
896 COSTS_N_INSNS (8), /* mulsi_const */
897 COSTS_N_INSNS (8), /* mulsi_const9 */
898 COSTS_N_INSNS (8), /* muldi */
899 COSTS_N_INSNS (22), /* divsi */
900 COSTS_N_INSNS (28), /* divdi */
901 COSTS_N_INSNS (3), /* fp */
902 COSTS_N_INSNS (3), /* dmul */
903 COSTS_N_INSNS (13), /* sdiv */
904 COSTS_N_INSNS (16), /* ddiv */
905 128, /* cache line size */
906 64, /* l1 cache */
907 2048, /* l2 cache */
908 16, /* prefetch streams */
909 };
910
911 /* Instruction costs on POWER7 processors. */
912 static const
913 struct processor_costs power7_cost = {
914 COSTS_N_INSNS (2), /* mulsi */
915 COSTS_N_INSNS (2), /* mulsi_const */
916 COSTS_N_INSNS (2), /* mulsi_const9 */
917 COSTS_N_INSNS (2), /* muldi */
918 COSTS_N_INSNS (18), /* divsi */
919 COSTS_N_INSNS (34), /* divdi */
920 COSTS_N_INSNS (3), /* fp */
921 COSTS_N_INSNS (3), /* dmul */
922 COSTS_N_INSNS (13), /* sdiv */
923 COSTS_N_INSNS (16), /* ddiv */
924 128, /* cache line size */
925 32, /* l1 cache */
926 256, /* l2 cache */
927 12, /* prefetch streams */
928 };
929
930 /* Instruction costs on POWER8 processors. */
931 static const
932 struct processor_costs power8_cost = {
933 COSTS_N_INSNS (3), /* mulsi */
934 COSTS_N_INSNS (3), /* mulsi_const */
935 COSTS_N_INSNS (3), /* mulsi_const9 */
936 COSTS_N_INSNS (3), /* muldi */
937 COSTS_N_INSNS (19), /* divsi */
938 COSTS_N_INSNS (35), /* divdi */
939 COSTS_N_INSNS (3), /* fp */
940 COSTS_N_INSNS (3), /* dmul */
941 COSTS_N_INSNS (14), /* sdiv */
942 COSTS_N_INSNS (17), /* ddiv */
943 128, /* cache line size */
944 32, /* l1 cache */
945 256, /* l2 cache */
946 12, /* prefetch streams */
947 };
948
949 /* Instruction costs on POWER A2 processors. */
950 static const
951 struct processor_costs ppca2_cost = {
952 COSTS_N_INSNS (16), /* mulsi */
953 COSTS_N_INSNS (16), /* mulsi_const */
954 COSTS_N_INSNS (16), /* mulsi_const9 */
955 COSTS_N_INSNS (16), /* muldi */
956 COSTS_N_INSNS (22), /* divsi */
957 COSTS_N_INSNS (28), /* divdi */
958 COSTS_N_INSNS (3), /* fp */
959 COSTS_N_INSNS (3), /* dmul */
960 COSTS_N_INSNS (59), /* sdiv */
961 COSTS_N_INSNS (72), /* ddiv */
962 64,
963 16, /* l1 cache */
964 2048, /* l2 cache */
965 16, /* prefetch streams */
966 };
967
968 \f
969 /* Table that classifies rs6000 builtin functions (pure, const, etc.). */
970 #undef RS6000_BUILTIN_1
971 #undef RS6000_BUILTIN_2
972 #undef RS6000_BUILTIN_3
973 #undef RS6000_BUILTIN_A
974 #undef RS6000_BUILTIN_D
975 #undef RS6000_BUILTIN_E
976 #undef RS6000_BUILTIN_H
977 #undef RS6000_BUILTIN_P
978 #undef RS6000_BUILTIN_Q
979 #undef RS6000_BUILTIN_S
980 #undef RS6000_BUILTIN_X
981
982 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
983 { NAME, ICODE, MASK, ATTR },
984
985 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
986 { NAME, ICODE, MASK, ATTR },
987
988 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
989 { NAME, ICODE, MASK, ATTR },
990
991 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
992 { NAME, ICODE, MASK, ATTR },
993
994 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
995 { NAME, ICODE, MASK, ATTR },
996
997 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
998 { NAME, ICODE, MASK, ATTR },
999
1000 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
1001 { NAME, ICODE, MASK, ATTR },
1002
1003 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
1004 { NAME, ICODE, MASK, ATTR },
1005
1006 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
1007 { NAME, ICODE, MASK, ATTR },
1008
1009 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
1010 { NAME, ICODE, MASK, ATTR },
1011
1012 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) \
1013 { NAME, ICODE, MASK, ATTR },
1014
1015 struct rs6000_builtin_info_type {
1016 const char *name;
1017 const enum insn_code icode;
1018 const HOST_WIDE_INT mask;
1019 const unsigned attr;
1020 };
1021
1022 static const struct rs6000_builtin_info_type rs6000_builtin_info[] =
1023 {
1024 #include "rs6000-builtin.def"
1025 };
1026
1027 #undef RS6000_BUILTIN_1
1028 #undef RS6000_BUILTIN_2
1029 #undef RS6000_BUILTIN_3
1030 #undef RS6000_BUILTIN_A
1031 #undef RS6000_BUILTIN_D
1032 #undef RS6000_BUILTIN_E
1033 #undef RS6000_BUILTIN_H
1034 #undef RS6000_BUILTIN_P
1035 #undef RS6000_BUILTIN_Q
1036 #undef RS6000_BUILTIN_S
1037 #undef RS6000_BUILTIN_X
1038
1039 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1040 static tree (*rs6000_veclib_handler) (tree, tree, tree);
1041
1042 \f
1043 static bool rs6000_debug_legitimate_address_p (enum machine_mode, rtx, bool);
1044 static bool spe_func_has_64bit_regs_p (void);
1045 static struct machine_function * rs6000_init_machine_status (void);
1046 static int rs6000_ra_ever_killed (void);
1047 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1048 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1049 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1050 static tree rs6000_builtin_vectorized_libmass (tree, tree, tree);
1051 static rtx rs6000_emit_set_long_const (rtx, HOST_WIDE_INT, HOST_WIDE_INT);
1052 static int rs6000_memory_move_cost (enum machine_mode, reg_class_t, bool);
1053 static bool rs6000_debug_rtx_costs (rtx, int, int, int, int *, bool);
1054 static int rs6000_debug_address_cost (rtx, enum machine_mode, addr_space_t,
1055 bool);
1056 static int rs6000_debug_adjust_cost (rtx, rtx, rtx, int);
1057 static bool is_microcoded_insn (rtx);
1058 static bool is_nonpipeline_insn (rtx);
1059 static bool is_cracked_insn (rtx);
1060 static bool is_load_insn (rtx, rtx *);
1061 static bool is_store_insn (rtx, rtx *);
1062 static bool set_to_load_agen (rtx,rtx);
1063 static bool insn_terminates_group_p (rtx , enum group_termination);
1064 static bool insn_must_be_first_in_group (rtx);
1065 static bool insn_must_be_last_in_group (rtx);
1066 static void altivec_init_builtins (void);
1067 static tree builtin_function_type (enum machine_mode, enum machine_mode,
1068 enum machine_mode, enum machine_mode,
1069 enum rs6000_builtins, const char *name);
1070 static void rs6000_common_init_builtins (void);
1071 static void paired_init_builtins (void);
1072 static rtx paired_expand_predicate_builtin (enum insn_code, tree, rtx);
1073 static void spe_init_builtins (void);
1074 static void htm_init_builtins (void);
1075 static rtx spe_expand_predicate_builtin (enum insn_code, tree, rtx);
1076 static rtx spe_expand_evsel_builtin (enum insn_code, tree, rtx);
1077 static int rs6000_emit_int_cmove (rtx, rtx, rtx, rtx);
1078 static rs6000_stack_t *rs6000_stack_info (void);
1079 static void is_altivec_return_reg (rtx, void *);
1080 int easy_vector_constant (rtx, enum machine_mode);
1081 static rtx rs6000_debug_legitimize_address (rtx, rtx, enum machine_mode);
1082 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1083 static int rs6000_tls_symbol_ref_1 (rtx *, void *);
1084 static int rs6000_get_some_local_dynamic_name_1 (rtx *, void *);
1085 static rtx rs6000_darwin64_record_arg (CUMULATIVE_ARGS *, const_tree,
1086 bool, bool);
1087 #if TARGET_MACHO
1088 static void macho_branch_islands (void);
1089 #endif
1090 static rtx rs6000_legitimize_reload_address (rtx, enum machine_mode, int, int,
1091 int, int *);
1092 static rtx rs6000_debug_legitimize_reload_address (rtx, enum machine_mode, int,
1093 int, int, int *);
1094 static bool rs6000_mode_dependent_address (const_rtx);
1095 static bool rs6000_debug_mode_dependent_address (const_rtx);
1096 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1097 enum machine_mode, rtx);
1098 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1099 enum machine_mode,
1100 rtx);
1101 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1102 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1103 enum reg_class);
1104 static bool rs6000_secondary_memory_needed (enum reg_class, enum reg_class,
1105 enum machine_mode);
1106 static bool rs6000_debug_secondary_memory_needed (enum reg_class,
1107 enum reg_class,
1108 enum machine_mode);
1109 static bool rs6000_cannot_change_mode_class (enum machine_mode,
1110 enum machine_mode,
1111 enum reg_class);
1112 static bool rs6000_debug_cannot_change_mode_class (enum machine_mode,
1113 enum machine_mode,
1114 enum reg_class);
1115 static bool rs6000_save_toc_in_prologue_p (void);
1116
1117 rtx (*rs6000_legitimize_reload_address_ptr) (rtx, enum machine_mode, int, int,
1118 int, int *)
1119 = rs6000_legitimize_reload_address;
1120
1121 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1122 = rs6000_mode_dependent_address;
1123
1124 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1125 enum machine_mode, rtx)
1126 = rs6000_secondary_reload_class;
1127
1128 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1129 = rs6000_preferred_reload_class;
1130
1131 bool (*rs6000_secondary_memory_needed_ptr) (enum reg_class, enum reg_class,
1132 enum machine_mode)
1133 = rs6000_secondary_memory_needed;
1134
1135 bool (*rs6000_cannot_change_mode_class_ptr) (enum machine_mode,
1136 enum machine_mode,
1137 enum reg_class)
1138 = rs6000_cannot_change_mode_class;
1139
1140 const int INSN_NOT_AVAILABLE = -1;
1141
1142 static void rs6000_print_isa_options (FILE *, int, const char *,
1143 HOST_WIDE_INT);
1144 static void rs6000_print_builtin_options (FILE *, int, const char *,
1145 HOST_WIDE_INT);
1146
1147 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1148 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1149 enum rs6000_reg_type,
1150 enum machine_mode,
1151 secondary_reload_info *,
1152 bool);
1153
1154 /* Hash table stuff for keeping track of TOC entries. */
1155
1156 struct GTY(()) toc_hash_struct
1157 {
1158 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1159 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1160 rtx key;
1161 enum machine_mode key_mode;
1162 int labelno;
1163 };
1164
1165 static GTY ((param_is (struct toc_hash_struct))) htab_t toc_hash_table;
1166
1167 /* Hash table to keep track of the argument types for builtin functions. */
1168
1169 struct GTY(()) builtin_hash_struct
1170 {
1171 tree type;
1172 enum machine_mode mode[4]; /* return value + 3 arguments. */
1173 unsigned char uns_p[4]; /* and whether the types are unsigned. */
1174 };
1175
1176 static GTY ((param_is (struct builtin_hash_struct))) htab_t builtin_hash_table;
1177
1178 \f
1179 /* Default register names. */
1180 char rs6000_reg_names[][8] =
1181 {
1182 "0", "1", "2", "3", "4", "5", "6", "7",
1183 "8", "9", "10", "11", "12", "13", "14", "15",
1184 "16", "17", "18", "19", "20", "21", "22", "23",
1185 "24", "25", "26", "27", "28", "29", "30", "31",
1186 "0", "1", "2", "3", "4", "5", "6", "7",
1187 "8", "9", "10", "11", "12", "13", "14", "15",
1188 "16", "17", "18", "19", "20", "21", "22", "23",
1189 "24", "25", "26", "27", "28", "29", "30", "31",
1190 "mq", "lr", "ctr","ap",
1191 "0", "1", "2", "3", "4", "5", "6", "7",
1192 "ca",
1193 /* AltiVec registers. */
1194 "0", "1", "2", "3", "4", "5", "6", "7",
1195 "8", "9", "10", "11", "12", "13", "14", "15",
1196 "16", "17", "18", "19", "20", "21", "22", "23",
1197 "24", "25", "26", "27", "28", "29", "30", "31",
1198 "vrsave", "vscr",
1199 /* SPE registers. */
1200 "spe_acc", "spefscr",
1201 /* Soft frame pointer. */
1202 "sfp",
1203 /* HTM SPR registers. */
1204 "tfhar", "tfiar", "texasr"
1205 };
1206
1207 #ifdef TARGET_REGNAMES
1208 static const char alt_reg_names[][8] =
1209 {
1210 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1211 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1212 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1213 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1214 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1215 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1216 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1217 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1218 "mq", "lr", "ctr", "ap",
1219 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1220 "ca",
1221 /* AltiVec registers. */
1222 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1223 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1224 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1225 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1226 "vrsave", "vscr",
1227 /* SPE registers. */
1228 "spe_acc", "spefscr",
1229 /* Soft frame pointer. */
1230 "sfp",
1231 /* HTM SPR registers. */
1232 "tfhar", "tfiar", "texasr"
1233 };
1234 #endif
1235
1236 /* Table of valid machine attributes. */
1237
1238 static const struct attribute_spec rs6000_attribute_table[] =
1239 {
1240 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
1241 affects_type_identity } */
1242 { "altivec", 1, 1, false, true, false, rs6000_handle_altivec_attribute,
1243 false },
1244 { "longcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute,
1245 false },
1246 { "shortcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute,
1247 false },
1248 { "ms_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute,
1249 false },
1250 { "gcc_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute,
1251 false },
1252 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1253 SUBTARGET_ATTRIBUTE_TABLE,
1254 #endif
1255 { NULL, 0, 0, false, false, false, NULL, false }
1256 };
1257 \f
1258 #ifndef TARGET_PROFILE_KERNEL
1259 #define TARGET_PROFILE_KERNEL 0
1260 #endif
1261
1262 /* The VRSAVE bitmask puts bit %v0 as the most significant bit. */
1263 #define ALTIVEC_REG_BIT(REGNO) (0x80000000 >> ((REGNO) - FIRST_ALTIVEC_REGNO))
1264 \f
1265 /* Initialize the GCC target structure. */
1266 #undef TARGET_ATTRIBUTE_TABLE
1267 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1268 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1269 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1270 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1271 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1272
1273 #undef TARGET_ASM_ALIGNED_DI_OP
1274 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1275
1276 /* Default unaligned ops are only provided for ELF. Find the ops needed
1277 for non-ELF systems. */
1278 #ifndef OBJECT_FORMAT_ELF
1279 #if TARGET_XCOFF
1280 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1281 64-bit targets. */
1282 #undef TARGET_ASM_UNALIGNED_HI_OP
1283 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1284 #undef TARGET_ASM_UNALIGNED_SI_OP
1285 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1286 #undef TARGET_ASM_UNALIGNED_DI_OP
1287 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1288 #else
1289 /* For Darwin. */
1290 #undef TARGET_ASM_UNALIGNED_HI_OP
1291 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1292 #undef TARGET_ASM_UNALIGNED_SI_OP
1293 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1294 #undef TARGET_ASM_UNALIGNED_DI_OP
1295 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1296 #undef TARGET_ASM_ALIGNED_DI_OP
1297 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1298 #endif
1299 #endif
1300
1301 /* This hook deals with fixups for relocatable code and DI-mode objects
1302 in 64-bit code. */
1303 #undef TARGET_ASM_INTEGER
1304 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1305
1306 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1307 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1308 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1309 #endif
1310
1311 #undef TARGET_SET_UP_BY_PROLOGUE
1312 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1313
1314 #undef TARGET_HAVE_TLS
1315 #define TARGET_HAVE_TLS HAVE_AS_TLS
1316
1317 #undef TARGET_CANNOT_FORCE_CONST_MEM
1318 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1319
1320 #undef TARGET_DELEGITIMIZE_ADDRESS
1321 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1322
1323 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1324 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1325
1326 #undef TARGET_ASM_FUNCTION_PROLOGUE
1327 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1328 #undef TARGET_ASM_FUNCTION_EPILOGUE
1329 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1330
1331 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1332 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1333
1334 #undef TARGET_LEGITIMIZE_ADDRESS
1335 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1336
1337 #undef TARGET_SCHED_VARIABLE_ISSUE
1338 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1339
1340 #undef TARGET_SCHED_ISSUE_RATE
1341 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1342 #undef TARGET_SCHED_ADJUST_COST
1343 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1344 #undef TARGET_SCHED_ADJUST_PRIORITY
1345 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1346 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1347 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1348 #undef TARGET_SCHED_INIT
1349 #define TARGET_SCHED_INIT rs6000_sched_init
1350 #undef TARGET_SCHED_FINISH
1351 #define TARGET_SCHED_FINISH rs6000_sched_finish
1352 #undef TARGET_SCHED_REORDER
1353 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1354 #undef TARGET_SCHED_REORDER2
1355 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1356
1357 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1358 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1359
1360 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1361 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1362
1363 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1364 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1365 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1366 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1367 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1368 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1369 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1370 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1371
1372 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1373 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1374 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1375 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1376 rs6000_builtin_support_vector_misalignment
1377 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1378 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1379 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1380 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1381 rs6000_builtin_vectorization_cost
1382 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1383 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1384 rs6000_preferred_simd_mode
1385 #undef TARGET_VECTORIZE_INIT_COST
1386 #define TARGET_VECTORIZE_INIT_COST rs6000_init_cost
1387 #undef TARGET_VECTORIZE_ADD_STMT_COST
1388 #define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost
1389 #undef TARGET_VECTORIZE_FINISH_COST
1390 #define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost
1391 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
1392 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
1393
1394 #undef TARGET_INIT_BUILTINS
1395 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1396 #undef TARGET_BUILTIN_DECL
1397 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1398
1399 #undef TARGET_EXPAND_BUILTIN
1400 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1401
1402 #undef TARGET_MANGLE_TYPE
1403 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1404
1405 #undef TARGET_INIT_LIBFUNCS
1406 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1407
1408 #if TARGET_MACHO
1409 #undef TARGET_BINDS_LOCAL_P
1410 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1411 #endif
1412
1413 #undef TARGET_MS_BITFIELD_LAYOUT_P
1414 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1415
1416 #undef TARGET_ASM_OUTPUT_MI_THUNK
1417 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1418
1419 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1420 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1421
1422 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1423 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1424
1425 #undef TARGET_REGISTER_MOVE_COST
1426 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1427 #undef TARGET_MEMORY_MOVE_COST
1428 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1429 #undef TARGET_RTX_COSTS
1430 #define TARGET_RTX_COSTS rs6000_rtx_costs
1431 #undef TARGET_ADDRESS_COST
1432 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1433
1434 #undef TARGET_DWARF_REGISTER_SPAN
1435 #define TARGET_DWARF_REGISTER_SPAN rs6000_dwarf_register_span
1436
1437 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1438 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1439
1440 #undef TARGET_MEMBER_TYPE_FORCES_BLK
1441 #define TARGET_MEMBER_TYPE_FORCES_BLK rs6000_member_type_forces_blk
1442
1443 /* On rs6000, function arguments are promoted, as are function return
1444 values. */
1445 #undef TARGET_PROMOTE_FUNCTION_MODE
1446 #define TARGET_PROMOTE_FUNCTION_MODE default_promote_function_mode_always_promote
1447
1448 #undef TARGET_RETURN_IN_MEMORY
1449 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1450
1451 #undef TARGET_SETUP_INCOMING_VARARGS
1452 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1453
1454 /* Always strict argument naming on rs6000. */
1455 #undef TARGET_STRICT_ARGUMENT_NAMING
1456 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1457 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1458 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1459 #undef TARGET_SPLIT_COMPLEX_ARG
1460 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1461 #undef TARGET_MUST_PASS_IN_STACK
1462 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1463 #undef TARGET_PASS_BY_REFERENCE
1464 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1465 #undef TARGET_ARG_PARTIAL_BYTES
1466 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1467 #undef TARGET_FUNCTION_ARG_ADVANCE
1468 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1469 #undef TARGET_FUNCTION_ARG
1470 #define TARGET_FUNCTION_ARG rs6000_function_arg
1471 #undef TARGET_FUNCTION_ARG_BOUNDARY
1472 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1473
1474 #undef TARGET_BUILD_BUILTIN_VA_LIST
1475 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1476
1477 #undef TARGET_EXPAND_BUILTIN_VA_START
1478 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1479
1480 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1481 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1482
1483 #undef TARGET_EH_RETURN_FILTER_MODE
1484 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1485
1486 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1487 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1488
1489 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1490 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1491
1492 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1493 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1494
1495 #undef TARGET_ASM_LOOP_ALIGN_MAX_SKIP
1496 #define TARGET_ASM_LOOP_ALIGN_MAX_SKIP rs6000_loop_align_max_skip
1497
1498 #undef TARGET_OPTION_OVERRIDE
1499 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1500
1501 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1502 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1503 rs6000_builtin_vectorized_function
1504
1505 #if !TARGET_MACHO
1506 #undef TARGET_STACK_PROTECT_FAIL
1507 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1508 #endif
1509
1510 /* MPC604EUM 3.5.2 Weak Consistency between Multiple Processors
1511 The PowerPC architecture requires only weak consistency among
1512 processors--that is, memory accesses between processors need not be
1513 sequentially consistent and memory accesses among processors can occur
1514 in any order. The ability to order memory accesses weakly provides
1515 opportunities for more efficient use of the system bus. Unless a
1516 dependency exists, the 604e allows read operations to precede store
1517 operations. */
1518 #undef TARGET_RELAXED_ORDERING
1519 #define TARGET_RELAXED_ORDERING true
1520
1521 #ifdef HAVE_AS_TLS
1522 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1523 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1524 #endif
1525
1526 /* Use a 32-bit anchor range. This leads to sequences like:
1527
1528 addis tmp,anchor,high
1529 add dest,tmp,low
1530
1531 where tmp itself acts as an anchor, and can be shared between
1532 accesses to the same 64k page. */
1533 #undef TARGET_MIN_ANCHOR_OFFSET
1534 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1535 #undef TARGET_MAX_ANCHOR_OFFSET
1536 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1537 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1538 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1539 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1540 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1541
1542 #undef TARGET_BUILTIN_RECIPROCAL
1543 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1544
1545 #undef TARGET_EXPAND_TO_RTL_HOOK
1546 #define TARGET_EXPAND_TO_RTL_HOOK rs6000_alloc_sdmode_stack_slot
1547
1548 #undef TARGET_INSTANTIATE_DECLS
1549 #define TARGET_INSTANTIATE_DECLS rs6000_instantiate_decls
1550
1551 #undef TARGET_SECONDARY_RELOAD
1552 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1553
1554 #undef TARGET_LEGITIMATE_ADDRESS_P
1555 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1556
1557 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1558 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1559
1560 #undef TARGET_LRA_P
1561 #define TARGET_LRA_P rs6000_lra_p
1562
1563 #undef TARGET_CAN_ELIMINATE
1564 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1565
1566 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1567 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1568
1569 #undef TARGET_TRAMPOLINE_INIT
1570 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1571
1572 #undef TARGET_FUNCTION_VALUE
1573 #define TARGET_FUNCTION_VALUE rs6000_function_value
1574
1575 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1576 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1577
1578 #undef TARGET_OPTION_SAVE
1579 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1580
1581 #undef TARGET_OPTION_RESTORE
1582 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1583
1584 #undef TARGET_OPTION_PRINT
1585 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1586
1587 #undef TARGET_CAN_INLINE_P
1588 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1589
1590 #undef TARGET_SET_CURRENT_FUNCTION
1591 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1592
1593 #undef TARGET_LEGITIMATE_CONSTANT_P
1594 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1595
1596 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
1597 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK rs6000_vectorize_vec_perm_const_ok
1598
1599 #undef TARGET_CAN_USE_DOLOOP_P
1600 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1601 \f
1602
1603 /* Processor table. */
1604 struct rs6000_ptt
1605 {
1606 const char *const name; /* Canonical processor name. */
1607 const enum processor_type processor; /* Processor type enum value. */
1608 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
1609 };
1610
1611 static struct rs6000_ptt const processor_target_table[] =
1612 {
1613 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1614 #include "rs6000-cpus.def"
1615 #undef RS6000_CPU
1616 };
1617
1618 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1619 name is invalid. */
1620
1621 static int
1622 rs6000_cpu_name_lookup (const char *name)
1623 {
1624 size_t i;
1625
1626 if (name != NULL)
1627 {
1628 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
1629 if (! strcmp (name, processor_target_table[i].name))
1630 return (int)i;
1631 }
1632
1633 return -1;
1634 }
1635
1636 \f
1637 /* Return number of consecutive hard regs needed starting at reg REGNO
1638 to hold something of mode MODE.
1639 This is ordinarily the length in words of a value of mode MODE
1640 but can be less for certain modes in special long registers.
1641
1642 For the SPE, GPRs are 64 bits but only 32 bits are visible in
1643 scalar instructions. The upper 32 bits are only available to the
1644 SIMD instructions.
1645
1646 POWER and PowerPC GPRs hold 32 bits worth;
1647 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
1648
1649 static int
1650 rs6000_hard_regno_nregs_internal (int regno, enum machine_mode mode)
1651 {
1652 unsigned HOST_WIDE_INT reg_size;
1653
1654 /* TF/TD modes are special in that they always take 2 registers. */
1655 if (FP_REGNO_P (regno))
1656 reg_size = ((VECTOR_MEM_VSX_P (mode) && mode != TDmode && mode != TFmode)
1657 ? UNITS_PER_VSX_WORD
1658 : UNITS_PER_FP_WORD);
1659
1660 else if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
1661 reg_size = UNITS_PER_SPE_WORD;
1662
1663 else if (ALTIVEC_REGNO_P (regno))
1664 reg_size = UNITS_PER_ALTIVEC_WORD;
1665
1666 /* The value returned for SCmode in the E500 double case is 2 for
1667 ABI compatibility; storing an SCmode value in a single register
1668 would require function_arg and rs6000_spe_function_arg to handle
1669 SCmode so as to pass the value correctly in a pair of
1670 registers. */
1671 else if (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode) && mode != SCmode
1672 && !DECIMAL_FLOAT_MODE_P (mode))
1673 reg_size = UNITS_PER_FP_WORD;
1674
1675 else
1676 reg_size = UNITS_PER_WORD;
1677
1678 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
1679 }
1680
1681 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1682 MODE. */
1683 static int
1684 rs6000_hard_regno_mode_ok (int regno, enum machine_mode mode)
1685 {
1686 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
1687
1688 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
1689 register combinations, and use PTImode where we need to deal with quad
1690 word memory operations. Don't allow quad words in the argument or frame
1691 pointer registers, just registers 0..31. */
1692 if (mode == PTImode)
1693 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1694 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1695 && ((regno & 1) == 0));
1696
1697 /* VSX registers that overlap the FPR registers are larger than for non-VSX
1698 implementations. Don't allow an item to be split between a FP register
1699 and an Altivec register. Allow TImode in all VSX registers if the user
1700 asked for it. */
1701 if (TARGET_VSX && VSX_REGNO_P (regno)
1702 && (VECTOR_MEM_VSX_P (mode)
1703 || (TARGET_VSX_SCALAR_FLOAT && mode == SFmode)
1704 || (TARGET_VSX_SCALAR_DOUBLE && (mode == DFmode || mode == DImode))
1705 || (TARGET_VSX_TIMODE && mode == TImode)))
1706 {
1707 if (FP_REGNO_P (regno))
1708 return FP_REGNO_P (last_regno);
1709
1710 if (ALTIVEC_REGNO_P (regno))
1711 {
1712 if (mode == SFmode && !TARGET_UPPER_REGS_SF)
1713 return 0;
1714
1715 if ((mode == DFmode || mode == DImode) && !TARGET_UPPER_REGS_DF)
1716 return 0;
1717
1718 return ALTIVEC_REGNO_P (last_regno);
1719 }
1720 }
1721
1722 /* The GPRs can hold any mode, but values bigger than one register
1723 cannot go past R31. */
1724 if (INT_REGNO_P (regno))
1725 return INT_REGNO_P (last_regno);
1726
1727 /* The float registers (except for VSX vector modes) can only hold floating
1728 modes and DImode. */
1729 if (FP_REGNO_P (regno))
1730 {
1731 if (SCALAR_FLOAT_MODE_P (mode)
1732 && (mode != TDmode || (regno % 2) == 0)
1733 && FP_REGNO_P (last_regno))
1734 return 1;
1735
1736 if (GET_MODE_CLASS (mode) == MODE_INT
1737 && GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
1738 return 1;
1739
1740 if (PAIRED_SIMD_REGNO_P (regno) && TARGET_PAIRED_FLOAT
1741 && PAIRED_VECTOR_MODE (mode))
1742 return 1;
1743
1744 return 0;
1745 }
1746
1747 /* The CR register can only hold CC modes. */
1748 if (CR_REGNO_P (regno))
1749 return GET_MODE_CLASS (mode) == MODE_CC;
1750
1751 if (CA_REGNO_P (regno))
1752 return mode == BImode;
1753
1754 /* AltiVec only in AldyVec registers. */
1755 if (ALTIVEC_REGNO_P (regno))
1756 return VECTOR_MEM_ALTIVEC_OR_VSX_P (mode);
1757
1758 /* ...but GPRs can hold SIMD data on the SPE in one register. */
1759 if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
1760 return 1;
1761
1762 /* We cannot put non-VSX TImode or PTImode anywhere except general register
1763 and it must be able to fit within the register set. */
1764
1765 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
1766 }
1767
1768 /* Print interesting facts about registers. */
1769 static void
1770 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
1771 {
1772 int r, m;
1773
1774 for (r = first_regno; r <= last_regno; ++r)
1775 {
1776 const char *comma = "";
1777 int len;
1778
1779 if (first_regno == last_regno)
1780 fprintf (stderr, "%s:\t", reg_name);
1781 else
1782 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
1783
1784 len = 8;
1785 for (m = 0; m < NUM_MACHINE_MODES; ++m)
1786 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
1787 {
1788 if (len > 70)
1789 {
1790 fprintf (stderr, ",\n\t");
1791 len = 8;
1792 comma = "";
1793 }
1794
1795 if (rs6000_hard_regno_nregs[m][r] > 1)
1796 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
1797 rs6000_hard_regno_nregs[m][r]);
1798 else
1799 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
1800
1801 comma = ", ";
1802 }
1803
1804 if (call_used_regs[r])
1805 {
1806 if (len > 70)
1807 {
1808 fprintf (stderr, ",\n\t");
1809 len = 8;
1810 comma = "";
1811 }
1812
1813 len += fprintf (stderr, "%s%s", comma, "call-used");
1814 comma = ", ";
1815 }
1816
1817 if (fixed_regs[r])
1818 {
1819 if (len > 70)
1820 {
1821 fprintf (stderr, ",\n\t");
1822 len = 8;
1823 comma = "";
1824 }
1825
1826 len += fprintf (stderr, "%s%s", comma, "fixed");
1827 comma = ", ";
1828 }
1829
1830 if (len > 70)
1831 {
1832 fprintf (stderr, ",\n\t");
1833 comma = "";
1834 }
1835
1836 len += fprintf (stderr, "%sreg-class = %s", comma,
1837 reg_class_names[(int)rs6000_regno_regclass[r]]);
1838 comma = ", ";
1839
1840 if (len > 70)
1841 {
1842 fprintf (stderr, ",\n\t");
1843 comma = "";
1844 }
1845
1846 fprintf (stderr, "%sregno = %d\n", comma, r);
1847 }
1848 }
1849
1850 static const char *
1851 rs6000_debug_vector_unit (enum rs6000_vector v)
1852 {
1853 const char *ret;
1854
1855 switch (v)
1856 {
1857 case VECTOR_NONE: ret = "none"; break;
1858 case VECTOR_ALTIVEC: ret = "altivec"; break;
1859 case VECTOR_VSX: ret = "vsx"; break;
1860 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
1861 case VECTOR_PAIRED: ret = "paired"; break;
1862 case VECTOR_SPE: ret = "spe"; break;
1863 case VECTOR_OTHER: ret = "other"; break;
1864 default: ret = "unknown"; break;
1865 }
1866
1867 return ret;
1868 }
1869
1870 /* Print the address masks in a human readble fashion. */
1871 DEBUG_FUNCTION void
1872 rs6000_debug_print_mode (ssize_t m)
1873 {
1874 ssize_t rc;
1875
1876 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
1877 for (rc = 0; rc < N_RELOAD_REG; rc++)
1878 {
1879 addr_mask_type mask = reg_addr[m].addr_mask[rc];
1880 fprintf (stderr,
1881 " %s: %c%c%c%c%c%c",
1882 reload_reg_map[rc].name,
1883 (mask & RELOAD_REG_VALID) != 0 ? 'v' : ' ',
1884 (mask & RELOAD_REG_MULTIPLE) != 0 ? 'm' : ' ',
1885 (mask & RELOAD_REG_INDEXED) != 0 ? 'i' : ' ',
1886 (mask & RELOAD_REG_OFFSET) != 0 ? 'o' : ' ',
1887 (mask & RELOAD_REG_PRE_INCDEC) != 0 ? '+' : ' ',
1888 (mask & RELOAD_REG_PRE_MODIFY) != 0 ? '+' : ' ');
1889 }
1890
1891 if (rs6000_vector_unit[m] != VECTOR_NONE
1892 || rs6000_vector_mem[m] != VECTOR_NONE
1893 || (reg_addr[m].reload_store != CODE_FOR_nothing)
1894 || (reg_addr[m].reload_load != CODE_FOR_nothing))
1895 {
1896 fprintf (stderr,
1897 " Vector-arith=%-10s Vector-mem=%-10s Reload=%c%c",
1898 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
1899 rs6000_debug_vector_unit (rs6000_vector_mem[m]),
1900 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
1901 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
1902 }
1903
1904 fputs ("\n", stderr);
1905 }
1906
1907 #define DEBUG_FMT_ID "%-32s= "
1908 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
1909 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
1910 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
1911
1912 /* Print various interesting information with -mdebug=reg. */
1913 static void
1914 rs6000_debug_reg_global (void)
1915 {
1916 static const char *const tf[2] = { "false", "true" };
1917 const char *nl = (const char *)0;
1918 int m;
1919 size_t m1, m2, v;
1920 char costly_num[20];
1921 char nop_num[20];
1922 char flags_buffer[40];
1923 const char *costly_str;
1924 const char *nop_str;
1925 const char *trace_str;
1926 const char *abi_str;
1927 const char *cmodel_str;
1928 struct cl_target_option cl_opts;
1929
1930 /* Modes we want tieable information on. */
1931 static const enum machine_mode print_tieable_modes[] = {
1932 QImode,
1933 HImode,
1934 SImode,
1935 DImode,
1936 TImode,
1937 PTImode,
1938 SFmode,
1939 DFmode,
1940 TFmode,
1941 SDmode,
1942 DDmode,
1943 TDmode,
1944 V8QImode,
1945 V4HImode,
1946 V2SImode,
1947 V16QImode,
1948 V8HImode,
1949 V4SImode,
1950 V2DImode,
1951 V32QImode,
1952 V16HImode,
1953 V8SImode,
1954 V4DImode,
1955 V2SFmode,
1956 V4SFmode,
1957 V2DFmode,
1958 V8SFmode,
1959 V4DFmode,
1960 CCmode,
1961 CCUNSmode,
1962 CCEQmode,
1963 };
1964
1965 /* Virtual regs we are interested in. */
1966 const static struct {
1967 int regno; /* register number. */
1968 const char *name; /* register name. */
1969 } virtual_regs[] = {
1970 { STACK_POINTER_REGNUM, "stack pointer:" },
1971 { TOC_REGNUM, "toc: " },
1972 { STATIC_CHAIN_REGNUM, "static chain: " },
1973 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
1974 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
1975 { ARG_POINTER_REGNUM, "arg pointer: " },
1976 { FRAME_POINTER_REGNUM, "frame pointer:" },
1977 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
1978 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
1979 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
1980 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
1981 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
1982 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
1983 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
1984 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
1985 { LAST_VIRTUAL_REGISTER, "last virtual: " },
1986 };
1987
1988 fputs ("\nHard register information:\n", stderr);
1989 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
1990 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
1991 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
1992 LAST_ALTIVEC_REGNO,
1993 "vs");
1994 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
1995 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
1996 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
1997 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
1998 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
1999 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2000 rs6000_debug_reg_print (SPE_ACC_REGNO, SPE_ACC_REGNO, "spe_a");
2001 rs6000_debug_reg_print (SPEFSCR_REGNO, SPEFSCR_REGNO, "spe_f");
2002
2003 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2004 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2005 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2006
2007 fprintf (stderr,
2008 "\n"
2009 "d reg_class = %s\n"
2010 "f reg_class = %s\n"
2011 "v reg_class = %s\n"
2012 "wa reg_class = %s\n"
2013 "wd reg_class = %s\n"
2014 "wf reg_class = %s\n"
2015 "wg reg_class = %s\n"
2016 "wl reg_class = %s\n"
2017 "wm reg_class = %s\n"
2018 "wr reg_class = %s\n"
2019 "ws reg_class = %s\n"
2020 "wt reg_class = %s\n"
2021 "wu reg_class = %s\n"
2022 "wv reg_class = %s\n"
2023 "ww reg_class = %s\n"
2024 "wx reg_class = %s\n"
2025 "wy reg_class = %s\n"
2026 "wz reg_class = %s\n"
2027 "\n",
2028 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2029 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
2030 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2031 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2032 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wd]],
2033 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wf]],
2034 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wg]],
2035 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wl]],
2036 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wm]],
2037 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2038 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]],
2039 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wt]],
2040 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wu]],
2041 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wv]],
2042 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ww]],
2043 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2044 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wy]],
2045 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wz]]);
2046
2047 nl = "\n";
2048 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2049 rs6000_debug_print_mode (m);
2050
2051 fputs ("\n", stderr);
2052
2053 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2054 {
2055 enum machine_mode mode1 = print_tieable_modes[m1];
2056 bool first_time = true;
2057
2058 nl = (const char *)0;
2059 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2060 {
2061 enum machine_mode mode2 = print_tieable_modes[m2];
2062 if (mode1 != mode2 && MODES_TIEABLE_P (mode1, mode2))
2063 {
2064 if (first_time)
2065 {
2066 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2067 nl = "\n";
2068 first_time = false;
2069 }
2070
2071 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2072 }
2073 }
2074
2075 if (!first_time)
2076 fputs ("\n", stderr);
2077 }
2078
2079 if (nl)
2080 fputs (nl, stderr);
2081
2082 if (rs6000_recip_control)
2083 {
2084 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2085
2086 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2087 if (rs6000_recip_bits[m])
2088 {
2089 fprintf (stderr,
2090 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2091 GET_MODE_NAME (m),
2092 (RS6000_RECIP_AUTO_RE_P (m)
2093 ? "auto"
2094 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2095 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2096 ? "auto"
2097 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2098 }
2099
2100 fputs ("\n", stderr);
2101 }
2102
2103 if (rs6000_cpu_index >= 0)
2104 {
2105 const char *name = processor_target_table[rs6000_cpu_index].name;
2106 HOST_WIDE_INT flags
2107 = processor_target_table[rs6000_cpu_index].target_enable;
2108
2109 sprintf (flags_buffer, "-mcpu=%s flags", name);
2110 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2111 }
2112 else
2113 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2114
2115 if (rs6000_tune_index >= 0)
2116 {
2117 const char *name = processor_target_table[rs6000_tune_index].name;
2118 HOST_WIDE_INT flags
2119 = processor_target_table[rs6000_tune_index].target_enable;
2120
2121 sprintf (flags_buffer, "-mtune=%s flags", name);
2122 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2123 }
2124 else
2125 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2126
2127 cl_target_option_save (&cl_opts, &global_options);
2128 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2129 rs6000_isa_flags);
2130
2131 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2132 rs6000_isa_flags_explicit);
2133
2134 rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask",
2135 rs6000_builtin_mask);
2136
2137 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2138
2139 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2140 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2141
2142 switch (rs6000_sched_costly_dep)
2143 {
2144 case max_dep_latency:
2145 costly_str = "max_dep_latency";
2146 break;
2147
2148 case no_dep_costly:
2149 costly_str = "no_dep_costly";
2150 break;
2151
2152 case all_deps_costly:
2153 costly_str = "all_deps_costly";
2154 break;
2155
2156 case true_store_to_load_dep_costly:
2157 costly_str = "true_store_to_load_dep_costly";
2158 break;
2159
2160 case store_to_load_dep_costly:
2161 costly_str = "store_to_load_dep_costly";
2162 break;
2163
2164 default:
2165 costly_str = costly_num;
2166 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2167 break;
2168 }
2169
2170 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2171
2172 switch (rs6000_sched_insert_nops)
2173 {
2174 case sched_finish_regroup_exact:
2175 nop_str = "sched_finish_regroup_exact";
2176 break;
2177
2178 case sched_finish_pad_groups:
2179 nop_str = "sched_finish_pad_groups";
2180 break;
2181
2182 case sched_finish_none:
2183 nop_str = "sched_finish_none";
2184 break;
2185
2186 default:
2187 nop_str = nop_num;
2188 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2189 break;
2190 }
2191
2192 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2193
2194 switch (rs6000_sdata)
2195 {
2196 default:
2197 case SDATA_NONE:
2198 break;
2199
2200 case SDATA_DATA:
2201 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2202 break;
2203
2204 case SDATA_SYSV:
2205 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2206 break;
2207
2208 case SDATA_EABI:
2209 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2210 break;
2211
2212 }
2213
2214 switch (rs6000_traceback)
2215 {
2216 case traceback_default: trace_str = "default"; break;
2217 case traceback_none: trace_str = "none"; break;
2218 case traceback_part: trace_str = "part"; break;
2219 case traceback_full: trace_str = "full"; break;
2220 default: trace_str = "unknown"; break;
2221 }
2222
2223 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2224
2225 switch (rs6000_current_cmodel)
2226 {
2227 case CMODEL_SMALL: cmodel_str = "small"; break;
2228 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2229 case CMODEL_LARGE: cmodel_str = "large"; break;
2230 default: cmodel_str = "unknown"; break;
2231 }
2232
2233 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2234
2235 switch (rs6000_current_abi)
2236 {
2237 case ABI_NONE: abi_str = "none"; break;
2238 case ABI_AIX: abi_str = "aix"; break;
2239 case ABI_V4: abi_str = "V4"; break;
2240 case ABI_DARWIN: abi_str = "darwin"; break;
2241 default: abi_str = "unknown"; break;
2242 }
2243
2244 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2245
2246 if (rs6000_altivec_abi)
2247 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2248
2249 if (rs6000_spe_abi)
2250 fprintf (stderr, DEBUG_FMT_S, "spe_abi", "true");
2251
2252 if (rs6000_darwin64_abi)
2253 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2254
2255 if (rs6000_float_gprs)
2256 fprintf (stderr, DEBUG_FMT_S, "float_gprs", "true");
2257
2258 if (TARGET_LINK_STACK)
2259 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2260
2261 if (targetm.lra_p ())
2262 fprintf (stderr, DEBUG_FMT_S, "lra", "true");
2263
2264 if (TARGET_P8_FUSION)
2265 fprintf (stderr, DEBUG_FMT_S, "p8 fusion",
2266 (TARGET_P8_FUSION_SIGN) ? "zero+sign" : "zero");
2267
2268 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2269 TARGET_SECURE_PLT ? "secure" : "bss");
2270 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2271 aix_struct_return ? "aix" : "sysv");
2272 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2273 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2274 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2275 tf[!!rs6000_align_branch_targets]);
2276 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2277 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2278 rs6000_long_double_type_size);
2279 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2280 (int)rs6000_sched_restricted_insns_priority);
2281 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2282 (int)END_BUILTINS);
2283 fprintf (stderr, DEBUG_FMT_D, "Number of rs6000 builtins",
2284 (int)RS6000_BUILTIN_COUNT);
2285 }
2286
2287 \f
2288 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2289 legitimate address support to figure out the appropriate addressing to
2290 use. */
2291
2292 static void
2293 rs6000_setup_reg_addr_masks (void)
2294 {
2295 ssize_t rc, reg, m, nregs;
2296 addr_mask_type any_addr_mask, addr_mask;
2297
2298 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2299 {
2300 /* SDmode is special in that we want to access it only via REG+REG
2301 addressing on power7 and above, since we want to use the LFIWZX and
2302 STFIWZX instructions to load it. */
2303 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
2304
2305 any_addr_mask = 0;
2306 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
2307 {
2308 addr_mask = 0;
2309 reg = reload_reg_map[rc].reg;
2310
2311 /* Can mode values go in the GPR/FPR/Altivec registers? */
2312 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
2313 {
2314 nregs = rs6000_hard_regno_nregs[m][reg];
2315 addr_mask |= RELOAD_REG_VALID;
2316
2317 /* Indicate if the mode takes more than 1 physical register. If
2318 it takes a single register, indicate it can do REG+REG
2319 addressing. */
2320 if (nregs > 1 || m == BLKmode)
2321 addr_mask |= RELOAD_REG_MULTIPLE;
2322 else
2323 addr_mask |= RELOAD_REG_INDEXED;
2324
2325 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2326 addressing. Restrict addressing on SPE for 64-bit types
2327 because of the SUBREG hackery used to address 64-bit floats in
2328 '32-bit' GPRs. To simplify secondary reload, don't allow
2329 update forms on scalar floating point types that can go in the
2330 upper registers. */
2331
2332 if (TARGET_UPDATE
2333 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
2334 && GET_MODE_SIZE (m) <= 8
2335 && !VECTOR_MODE_P (m)
2336 && !COMPLEX_MODE_P (m)
2337 && !indexed_only_p
2338 && !(TARGET_E500_DOUBLE && GET_MODE_SIZE (m) == 8)
2339 && !(m == DFmode && TARGET_UPPER_REGS_DF)
2340 && !(m == SFmode && TARGET_UPPER_REGS_SF))
2341 {
2342 addr_mask |= RELOAD_REG_PRE_INCDEC;
2343
2344 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2345 we don't allow PRE_MODIFY for some multi-register
2346 operations. */
2347 switch (m)
2348 {
2349 default:
2350 addr_mask |= RELOAD_REG_PRE_MODIFY;
2351 break;
2352
2353 case DImode:
2354 if (TARGET_POWERPC64)
2355 addr_mask |= RELOAD_REG_PRE_MODIFY;
2356 break;
2357
2358 case DFmode:
2359 case DDmode:
2360 if (TARGET_DF_INSN)
2361 addr_mask |= RELOAD_REG_PRE_MODIFY;
2362 break;
2363 }
2364 }
2365 }
2366
2367 /* GPR and FPR registers can do REG+OFFSET addressing, except
2368 possibly for SDmode. */
2369 if ((addr_mask != 0) && !indexed_only_p
2370 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR))
2371 addr_mask |= RELOAD_REG_OFFSET;
2372
2373 reg_addr[m].addr_mask[rc] = addr_mask;
2374 any_addr_mask |= addr_mask;
2375 }
2376
2377 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
2378 }
2379 }
2380
2381 \f
2382 /* Initialize the various global tables that are based on register size. */
2383 static void
2384 rs6000_init_hard_regno_mode_ok (bool global_init_p)
2385 {
2386 ssize_t r, m, c;
2387 int align64;
2388 int align32;
2389
2390 /* Precalculate REGNO_REG_CLASS. */
2391 rs6000_regno_regclass[0] = GENERAL_REGS;
2392 for (r = 1; r < 32; ++r)
2393 rs6000_regno_regclass[r] = BASE_REGS;
2394
2395 for (r = 32; r < 64; ++r)
2396 rs6000_regno_regclass[r] = FLOAT_REGS;
2397
2398 for (r = 64; r < FIRST_PSEUDO_REGISTER; ++r)
2399 rs6000_regno_regclass[r] = NO_REGS;
2400
2401 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
2402 rs6000_regno_regclass[r] = ALTIVEC_REGS;
2403
2404 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
2405 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
2406 rs6000_regno_regclass[r] = CR_REGS;
2407
2408 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
2409 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
2410 rs6000_regno_regclass[CA_REGNO] = CA_REGS;
2411 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
2412 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
2413 rs6000_regno_regclass[SPE_ACC_REGNO] = SPE_ACC_REGS;
2414 rs6000_regno_regclass[SPEFSCR_REGNO] = SPEFSCR_REGS;
2415 rs6000_regno_regclass[TFHAR_REGNO] = SPR_REGS;
2416 rs6000_regno_regclass[TFIAR_REGNO] = SPR_REGS;
2417 rs6000_regno_regclass[TEXASR_REGNO] = SPR_REGS;
2418 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
2419 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
2420
2421 /* Precalculate register class to simpler reload register class. We don't
2422 need all of the register classes that are combinations of different
2423 classes, just the simple ones that have constraint letters. */
2424 for (c = 0; c < N_REG_CLASSES; c++)
2425 reg_class_to_reg_type[c] = NO_REG_TYPE;
2426
2427 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
2428 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
2429 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
2430 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
2431 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
2432 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
2433 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
2434 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
2435 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
2436 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
2437 reg_class_to_reg_type[(int)SPE_ACC_REGS] = SPE_ACC_TYPE;
2438 reg_class_to_reg_type[(int)SPEFSCR_REGS] = SPEFSCR_REG_TYPE;
2439
2440 if (TARGET_VSX)
2441 {
2442 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
2443 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
2444 }
2445 else
2446 {
2447 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
2448 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
2449 }
2450
2451 /* Precalculate the valid memory formats as well as the vector information,
2452 this must be set up before the rs6000_hard_regno_nregs_internal calls
2453 below. */
2454 gcc_assert ((int)VECTOR_NONE == 0);
2455 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
2456 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_unit));
2457
2458 gcc_assert ((int)CODE_FOR_nothing == 0);
2459 memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
2460
2461 gcc_assert ((int)NO_REGS == 0);
2462 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
2463
2464 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
2465 believes it can use native alignment or still uses 128-bit alignment. */
2466 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
2467 {
2468 align64 = 64;
2469 align32 = 32;
2470 }
2471 else
2472 {
2473 align64 = 128;
2474 align32 = 128;
2475 }
2476
2477 /* V2DF mode, VSX only. */
2478 if (TARGET_VSX)
2479 {
2480 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
2481 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
2482 rs6000_vector_align[V2DFmode] = align64;
2483 }
2484
2485 /* V4SF mode, either VSX or Altivec. */
2486 if (TARGET_VSX)
2487 {
2488 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
2489 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
2490 rs6000_vector_align[V4SFmode] = align32;
2491 }
2492 else if (TARGET_ALTIVEC)
2493 {
2494 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
2495 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
2496 rs6000_vector_align[V4SFmode] = align32;
2497 }
2498
2499 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
2500 and stores. */
2501 if (TARGET_ALTIVEC)
2502 {
2503 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
2504 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
2505 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
2506 rs6000_vector_align[V4SImode] = align32;
2507 rs6000_vector_align[V8HImode] = align32;
2508 rs6000_vector_align[V16QImode] = align32;
2509
2510 if (TARGET_VSX)
2511 {
2512 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
2513 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
2514 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
2515 }
2516 else
2517 {
2518 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
2519 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
2520 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
2521 }
2522 }
2523
2524 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
2525 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
2526 if (TARGET_VSX)
2527 {
2528 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
2529 rs6000_vector_unit[V2DImode]
2530 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2531 rs6000_vector_align[V2DImode] = align64;
2532 }
2533
2534 /* DFmode, see if we want to use the VSX unit. */
2535 if (TARGET_VSX && TARGET_VSX_SCALAR_DOUBLE)
2536 {
2537 rs6000_vector_unit[DFmode] = VECTOR_VSX;
2538 rs6000_vector_mem[DFmode]
2539 = (TARGET_UPPER_REGS_DF ? VECTOR_VSX : VECTOR_NONE);
2540 rs6000_vector_align[DFmode] = align64;
2541 }
2542
2543 /* Allow TImode in VSX register and set the VSX memory macros. */
2544 if (TARGET_VSX && TARGET_VSX_TIMODE)
2545 {
2546 rs6000_vector_mem[TImode] = VECTOR_VSX;
2547 rs6000_vector_align[TImode] = align64;
2548 }
2549
2550 /* TODO add SPE and paired floating point vector support. */
2551
2552 /* Register class constraints for the constraints that depend on compile
2553 switches. When the VSX code was added, different constraints were added
2554 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
2555 of the VSX registers are used. The register classes for scalar floating
2556 point types is set, based on whether we allow that type into the upper
2557 (Altivec) registers. GCC has register classes to target the Altivec
2558 registers for load/store operations, to select using a VSX memory
2559 operation instead of the traditional floating point operation. The
2560 constraints are:
2561
2562 d - Register class to use with traditional DFmode instructions.
2563 f - Register class to use with traditional SFmode instructions.
2564 v - Altivec register.
2565 wa - Any VSX register.
2566 wd - Preferred register class for V2DFmode.
2567 wf - Preferred register class for V4SFmode.
2568 wg - Float register for power6x move insns.
2569 wl - Float register if we can do 32-bit signed int loads.
2570 wm - VSX register for ISA 2.07 direct move operations.
2571 wr - GPR if 64-bit mode is permitted.
2572 ws - Register class to do ISA 2.06 DF operations.
2573 wu - Altivec register for ISA 2.07 VSX SF/SI load/stores.
2574 wv - Altivec register for ISA 2.06 VSX DF/DI load/stores.
2575 wt - VSX register for TImode in VSX registers.
2576 ww - Register class to do SF conversions in with VSX operations.
2577 wx - Float register if we can do 32-bit int stores.
2578 wy - Register class to do ISA 2.07 SF operations.
2579 wz - Float register if we can do 32-bit unsigned int loads. */
2580
2581 if (TARGET_HARD_FLOAT && TARGET_FPRS)
2582 rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS;
2583
2584 if (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
2585 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS;
2586
2587 if (TARGET_VSX)
2588 {
2589 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
2590 rs6000_constraints[RS6000_CONSTRAINT_wd] = VSX_REGS;
2591 rs6000_constraints[RS6000_CONSTRAINT_wf] = VSX_REGS;
2592
2593 if (TARGET_VSX_TIMODE)
2594 rs6000_constraints[RS6000_CONSTRAINT_wt] = VSX_REGS;
2595
2596 if (TARGET_UPPER_REGS_DF)
2597 {
2598 rs6000_constraints[RS6000_CONSTRAINT_ws] = VSX_REGS;
2599 rs6000_constraints[RS6000_CONSTRAINT_wv] = ALTIVEC_REGS;
2600 }
2601 else
2602 rs6000_constraints[RS6000_CONSTRAINT_ws] = FLOAT_REGS;
2603 }
2604
2605 /* Add conditional constraints based on various options, to allow us to
2606 collapse multiple insn patterns. */
2607 if (TARGET_ALTIVEC)
2608 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
2609
2610 if (TARGET_MFPGPR)
2611 rs6000_constraints[RS6000_CONSTRAINT_wg] = FLOAT_REGS;
2612
2613 if (TARGET_LFIWAX)
2614 rs6000_constraints[RS6000_CONSTRAINT_wl] = FLOAT_REGS;
2615
2616 if (TARGET_DIRECT_MOVE)
2617 rs6000_constraints[RS6000_CONSTRAINT_wm] = VSX_REGS;
2618
2619 if (TARGET_POWERPC64)
2620 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
2621
2622 if (TARGET_P8_VECTOR && TARGET_UPPER_REGS_SF)
2623 {
2624 rs6000_constraints[RS6000_CONSTRAINT_wu] = ALTIVEC_REGS;
2625 rs6000_constraints[RS6000_CONSTRAINT_wy] = VSX_REGS;
2626 rs6000_constraints[RS6000_CONSTRAINT_ww] = VSX_REGS;
2627 }
2628 else if (TARGET_P8_VECTOR)
2629 {
2630 rs6000_constraints[RS6000_CONSTRAINT_wy] = FLOAT_REGS;
2631 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
2632 }
2633 else if (TARGET_VSX)
2634 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
2635
2636 if (TARGET_STFIWX)
2637 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS;
2638
2639 if (TARGET_LFIWZX)
2640 rs6000_constraints[RS6000_CONSTRAINT_wz] = FLOAT_REGS;
2641
2642 /* Set up the reload helper and direct move functions. */
2643 if (TARGET_VSX || TARGET_ALTIVEC)
2644 {
2645 if (TARGET_64BIT)
2646 {
2647 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
2648 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
2649 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
2650 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
2651 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
2652 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
2653 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
2654 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
2655 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
2656 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
2657 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
2658 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
2659 if (TARGET_VSX && TARGET_UPPER_REGS_DF)
2660 {
2661 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
2662 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
2663 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
2664 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
2665 }
2666 if (TARGET_P8_VECTOR)
2667 {
2668 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
2669 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
2670 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
2671 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
2672 }
2673 if (TARGET_VSX_TIMODE)
2674 {
2675 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
2676 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
2677 }
2678 if (TARGET_DIRECT_MOVE)
2679 {
2680 if (TARGET_POWERPC64)
2681 {
2682 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
2683 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
2684 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
2685 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
2686 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
2687 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
2688 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
2689 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
2690
2691 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
2692 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
2693 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
2694 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
2695 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
2696 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
2697 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
2698 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
2699 }
2700 else
2701 {
2702 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
2703 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
2704 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
2705 }
2706 }
2707 }
2708 else
2709 {
2710 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
2711 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
2712 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
2713 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
2714 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
2715 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
2716 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
2717 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
2718 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
2719 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
2720 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
2721 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
2722 if (TARGET_VSX && TARGET_UPPER_REGS_DF)
2723 {
2724 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
2725 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
2726 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
2727 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
2728 }
2729 if (TARGET_P8_VECTOR)
2730 {
2731 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
2732 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
2733 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
2734 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
2735 }
2736 if (TARGET_VSX_TIMODE)
2737 {
2738 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
2739 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
2740 }
2741 }
2742 }
2743
2744 /* Precalculate HARD_REGNO_NREGS. */
2745 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
2746 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2747 rs6000_hard_regno_nregs[m][r]
2748 = rs6000_hard_regno_nregs_internal (r, (enum machine_mode)m);
2749
2750 /* Precalculate HARD_REGNO_MODE_OK. */
2751 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
2752 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2753 if (rs6000_hard_regno_mode_ok (r, (enum machine_mode)m))
2754 rs6000_hard_regno_mode_ok_p[m][r] = true;
2755
2756 /* Precalculate CLASS_MAX_NREGS sizes. */
2757 for (c = 0; c < LIM_REG_CLASSES; ++c)
2758 {
2759 int reg_size;
2760
2761 if (TARGET_VSX && VSX_REG_CLASS_P (c))
2762 reg_size = UNITS_PER_VSX_WORD;
2763
2764 else if (c == ALTIVEC_REGS)
2765 reg_size = UNITS_PER_ALTIVEC_WORD;
2766
2767 else if (c == FLOAT_REGS)
2768 reg_size = UNITS_PER_FP_WORD;
2769
2770 else
2771 reg_size = UNITS_PER_WORD;
2772
2773 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2774 {
2775 int reg_size2 = reg_size;
2776
2777 /* TFmode/TDmode always takes 2 registers, even in VSX. */
2778 if (TARGET_VSX && VSX_REG_CLASS_P (c)
2779 && (m == TDmode || m == TFmode))
2780 reg_size2 = UNITS_PER_FP_WORD;
2781
2782 rs6000_class_max_nregs[m][c]
2783 = (GET_MODE_SIZE (m) + reg_size2 - 1) / reg_size2;
2784 }
2785 }
2786
2787 if (TARGET_E500_DOUBLE)
2788 rs6000_class_max_nregs[DFmode][GENERAL_REGS] = 1;
2789
2790 /* Calculate which modes to automatically generate code to use a the
2791 reciprocal divide and square root instructions. In the future, possibly
2792 automatically generate the instructions even if the user did not specify
2793 -mrecip. The older machines double precision reciprocal sqrt estimate is
2794 not accurate enough. */
2795 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
2796 if (TARGET_FRES)
2797 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
2798 if (TARGET_FRE)
2799 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
2800 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
2801 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
2802 if (VECTOR_UNIT_VSX_P (V2DFmode))
2803 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
2804
2805 if (TARGET_FRSQRTES)
2806 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
2807 if (TARGET_FRSQRTE)
2808 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
2809 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
2810 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
2811 if (VECTOR_UNIT_VSX_P (V2DFmode))
2812 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
2813
2814 if (rs6000_recip_control)
2815 {
2816 if (!flag_finite_math_only)
2817 warning (0, "-mrecip requires -ffinite-math or -ffast-math");
2818 if (flag_trapping_math)
2819 warning (0, "-mrecip requires -fno-trapping-math or -ffast-math");
2820 if (!flag_reciprocal_math)
2821 warning (0, "-mrecip requires -freciprocal-math or -ffast-math");
2822 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
2823 {
2824 if (RS6000_RECIP_HAVE_RE_P (SFmode)
2825 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
2826 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
2827
2828 if (RS6000_RECIP_HAVE_RE_P (DFmode)
2829 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
2830 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
2831
2832 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
2833 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
2834 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
2835
2836 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
2837 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
2838 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
2839
2840 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
2841 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
2842 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
2843
2844 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
2845 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
2846 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
2847
2848 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
2849 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
2850 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
2851
2852 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
2853 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
2854 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
2855 }
2856 }
2857
2858 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2859 legitimate address support to figure out the appropriate addressing to
2860 use. */
2861 rs6000_setup_reg_addr_masks ();
2862
2863 if (global_init_p || TARGET_DEBUG_TARGET)
2864 {
2865 if (TARGET_DEBUG_REG)
2866 rs6000_debug_reg_global ();
2867
2868 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
2869 fprintf (stderr,
2870 "SImode variable mult cost = %d\n"
2871 "SImode constant mult cost = %d\n"
2872 "SImode short constant mult cost = %d\n"
2873 "DImode multipliciation cost = %d\n"
2874 "SImode division cost = %d\n"
2875 "DImode division cost = %d\n"
2876 "Simple fp operation cost = %d\n"
2877 "DFmode multiplication cost = %d\n"
2878 "SFmode division cost = %d\n"
2879 "DFmode division cost = %d\n"
2880 "cache line size = %d\n"
2881 "l1 cache size = %d\n"
2882 "l2 cache size = %d\n"
2883 "simultaneous prefetches = %d\n"
2884 "\n",
2885 rs6000_cost->mulsi,
2886 rs6000_cost->mulsi_const,
2887 rs6000_cost->mulsi_const9,
2888 rs6000_cost->muldi,
2889 rs6000_cost->divsi,
2890 rs6000_cost->divdi,
2891 rs6000_cost->fp,
2892 rs6000_cost->dmul,
2893 rs6000_cost->sdiv,
2894 rs6000_cost->ddiv,
2895 rs6000_cost->cache_line_size,
2896 rs6000_cost->l1_cache_size,
2897 rs6000_cost->l2_cache_size,
2898 rs6000_cost->simultaneous_prefetches);
2899 }
2900 }
2901
2902 #if TARGET_MACHO
2903 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
2904
2905 static void
2906 darwin_rs6000_override_options (void)
2907 {
2908 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
2909 off. */
2910 rs6000_altivec_abi = 1;
2911 TARGET_ALTIVEC_VRSAVE = 1;
2912 rs6000_current_abi = ABI_DARWIN;
2913
2914 if (DEFAULT_ABI == ABI_DARWIN
2915 && TARGET_64BIT)
2916 darwin_one_byte_bool = 1;
2917
2918 if (TARGET_64BIT && ! TARGET_POWERPC64)
2919 {
2920 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
2921 warning (0, "-m64 requires PowerPC64 architecture, enabling");
2922 }
2923 if (flag_mkernel)
2924 {
2925 rs6000_default_long_calls = 1;
2926 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
2927 }
2928
2929 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
2930 Altivec. */
2931 if (!flag_mkernel && !flag_apple_kext
2932 && TARGET_64BIT
2933 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
2934 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
2935
2936 /* Unless the user (not the configurer) has explicitly overridden
2937 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
2938 G4 unless targeting the kernel. */
2939 if (!flag_mkernel
2940 && !flag_apple_kext
2941 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
2942 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
2943 && ! global_options_set.x_rs6000_cpu_index)
2944 {
2945 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
2946 }
2947 }
2948 #endif
2949
2950 /* If not otherwise specified by a target, make 'long double' equivalent to
2951 'double'. */
2952
2953 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
2954 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
2955 #endif
2956
2957 /* Return the builtin mask of the various options used that could affect which
2958 builtins were used. In the past we used target_flags, but we've run out of
2959 bits, and some options like SPE and PAIRED are no longer in
2960 target_flags. */
2961
2962 HOST_WIDE_INT
2963 rs6000_builtin_mask_calculate (void)
2964 {
2965 return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0)
2966 | ((TARGET_VSX) ? RS6000_BTM_VSX : 0)
2967 | ((TARGET_SPE) ? RS6000_BTM_SPE : 0)
2968 | ((TARGET_PAIRED_FLOAT) ? RS6000_BTM_PAIRED : 0)
2969 | ((TARGET_FRE) ? RS6000_BTM_FRE : 0)
2970 | ((TARGET_FRES) ? RS6000_BTM_FRES : 0)
2971 | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0)
2972 | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0)
2973 | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0)
2974 | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0)
2975 | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0)
2976 | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0)
2977 | ((TARGET_HTM) ? RS6000_BTM_HTM : 0));
2978 }
2979
2980 /* Override command line options. Mostly we process the processor type and
2981 sometimes adjust other TARGET_ options. */
2982
2983 static bool
2984 rs6000_option_override_internal (bool global_init_p)
2985 {
2986 bool ret = true;
2987 bool have_cpu = false;
2988
2989 /* The default cpu requested at configure time, if any. */
2990 const char *implicit_cpu = OPTION_TARGET_CPU_DEFAULT;
2991
2992 HOST_WIDE_INT set_masks;
2993 int cpu_index;
2994 int tune_index;
2995 struct cl_target_option *main_target_opt
2996 = ((global_init_p || target_option_default_node == NULL)
2997 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
2998
2999 /* Remember the explicit arguments. */
3000 if (global_init_p)
3001 rs6000_isa_flags_explicit = global_options_set.x_rs6000_isa_flags;
3002
3003 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3004 library functions, so warn about it. The flag may be useful for
3005 performance studies from time to time though, so don't disable it
3006 entirely. */
3007 if (global_options_set.x_rs6000_alignment_flags
3008 && rs6000_alignment_flags == MASK_ALIGN_POWER
3009 && DEFAULT_ABI == ABI_DARWIN
3010 && TARGET_64BIT)
3011 warning (0, "-malign-power is not supported for 64-bit Darwin;"
3012 " it is incompatible with the installed C and C++ libraries");
3013
3014 /* Numerous experiment shows that IRA based loop pressure
3015 calculation works better for RTL loop invariant motion on targets
3016 with enough (>= 32) registers. It is an expensive optimization.
3017 So it is on only for peak performance. */
3018 if (optimize >= 3 && global_init_p)
3019 flag_ira_loop_pressure = 1;
3020
3021 /* Set the pointer size. */
3022 if (TARGET_64BIT)
3023 {
3024 rs6000_pmode = (int)DImode;
3025 rs6000_pointer_size = 64;
3026 }
3027 else
3028 {
3029 rs6000_pmode = (int)SImode;
3030 rs6000_pointer_size = 32;
3031 }
3032
3033 /* Some OSs don't support saving the high part of 64-bit registers on context
3034 switch. Other OSs don't support saving Altivec registers. On those OSs,
3035 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
3036 if the user wants either, the user must explicitly specify them and we
3037 won't interfere with the user's specification. */
3038
3039 set_masks = POWERPC_MASKS;
3040 #ifdef OS_MISSING_POWERPC64
3041 if (OS_MISSING_POWERPC64)
3042 set_masks &= ~OPTION_MASK_POWERPC64;
3043 #endif
3044 #ifdef OS_MISSING_ALTIVEC
3045 if (OS_MISSING_ALTIVEC)
3046 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX);
3047 #endif
3048
3049 /* Don't override by the processor default if given explicitly. */
3050 set_masks &= ~rs6000_isa_flags_explicit;
3051
3052 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3053 the cpu in a target attribute or pragma, but did not specify a tuning
3054 option, use the cpu for the tuning option rather than the option specified
3055 with -mtune on the command line. Process a '--with-cpu' configuration
3056 request as an implicit --cpu. */
3057 if (rs6000_cpu_index >= 0)
3058 {
3059 cpu_index = rs6000_cpu_index;
3060 have_cpu = true;
3061 }
3062 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
3063 {
3064 rs6000_cpu_index = cpu_index = main_target_opt->x_rs6000_cpu_index;
3065 have_cpu = true;
3066 }
3067 else if (implicit_cpu)
3068 {
3069 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (implicit_cpu);
3070 have_cpu = true;
3071 }
3072 else
3073 {
3074 const char *default_cpu = (TARGET_POWERPC64 ? "powerpc64" : "powerpc");
3075 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (default_cpu);
3076 have_cpu = false;
3077 }
3078
3079 gcc_assert (cpu_index >= 0);
3080
3081 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3082 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3083 with those from the cpu, except for options that were explicitly set. If
3084 we don't have a cpu, do not override the target bits set in
3085 TARGET_DEFAULT. */
3086 if (have_cpu)
3087 {
3088 rs6000_isa_flags &= ~set_masks;
3089 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3090 & set_masks);
3091 }
3092 else
3093 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3094 & ~rs6000_isa_flags_explicit);
3095
3096 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3097 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
3098 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
3099 to using rs6000_isa_flags, we need to do the initialization here. */
3100 if (!have_cpu)
3101 rs6000_isa_flags |= (TARGET_DEFAULT & ~rs6000_isa_flags_explicit);
3102
3103 if (rs6000_tune_index >= 0)
3104 tune_index = rs6000_tune_index;
3105 else if (have_cpu)
3106 rs6000_tune_index = tune_index = cpu_index;
3107 else
3108 {
3109 size_t i;
3110 enum processor_type tune_proc
3111 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
3112
3113 tune_index = -1;
3114 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
3115 if (processor_target_table[i].processor == tune_proc)
3116 {
3117 rs6000_tune_index = tune_index = i;
3118 break;
3119 }
3120 }
3121
3122 gcc_assert (tune_index >= 0);
3123 rs6000_cpu = processor_target_table[tune_index].processor;
3124
3125 /* Pick defaults for SPE related control flags. Do this early to make sure
3126 that the TARGET_ macros are representative ASAP. */
3127 {
3128 int spe_capable_cpu =
3129 (rs6000_cpu == PROCESSOR_PPC8540
3130 || rs6000_cpu == PROCESSOR_PPC8548);
3131
3132 if (!global_options_set.x_rs6000_spe_abi)
3133 rs6000_spe_abi = spe_capable_cpu;
3134
3135 if (!global_options_set.x_rs6000_spe)
3136 rs6000_spe = spe_capable_cpu;
3137
3138 if (!global_options_set.x_rs6000_float_gprs)
3139 rs6000_float_gprs =
3140 (rs6000_cpu == PROCESSOR_PPC8540 ? 1
3141 : rs6000_cpu == PROCESSOR_PPC8548 ? 2
3142 : 0);
3143 }
3144
3145 if (global_options_set.x_rs6000_spe_abi
3146 && rs6000_spe_abi
3147 && !TARGET_SPE_ABI)
3148 error ("not configured for SPE ABI");
3149
3150 if (global_options_set.x_rs6000_spe
3151 && rs6000_spe
3152 && !TARGET_SPE)
3153 error ("not configured for SPE instruction set");
3154
3155 if (main_target_opt != NULL
3156 && ((main_target_opt->x_rs6000_spe_abi != rs6000_spe_abi)
3157 || (main_target_opt->x_rs6000_spe != rs6000_spe)
3158 || (main_target_opt->x_rs6000_float_gprs != rs6000_float_gprs)))
3159 error ("target attribute or pragma changes SPE ABI");
3160
3161 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
3162 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
3163 || rs6000_cpu == PROCESSOR_PPCE5500)
3164 {
3165 if (TARGET_ALTIVEC)
3166 error ("AltiVec not supported in this target");
3167 if (TARGET_SPE)
3168 error ("SPE not supported in this target");
3169 }
3170 if (rs6000_cpu == PROCESSOR_PPCE6500)
3171 {
3172 if (TARGET_SPE)
3173 error ("SPE not supported in this target");
3174 }
3175
3176 /* Disable Cell microcode if we are optimizing for the Cell
3177 and not optimizing for size. */
3178 if (rs6000_gen_cell_microcode == -1)
3179 rs6000_gen_cell_microcode = !(rs6000_cpu == PROCESSOR_CELL
3180 && !optimize_size);
3181
3182 /* If we are optimizing big endian systems for space and it's OK to
3183 use instructions that would be microcoded on the Cell, use the
3184 load/store multiple and string instructions. */
3185 if (BYTES_BIG_ENDIAN && optimize_size && rs6000_gen_cell_microcode)
3186 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & (OPTION_MASK_MULTIPLE
3187 | OPTION_MASK_STRING);
3188
3189 /* Don't allow -mmultiple or -mstring on little endian systems
3190 unless the cpu is a 750, because the hardware doesn't support the
3191 instructions used in little endian mode, and causes an alignment
3192 trap. The 750 does not cause an alignment trap (except when the
3193 target is unaligned). */
3194
3195 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750)
3196 {
3197 if (TARGET_MULTIPLE)
3198 {
3199 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
3200 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
3201 warning (0, "-mmultiple is not supported on little endian systems");
3202 }
3203
3204 if (TARGET_STRING)
3205 {
3206 rs6000_isa_flags &= ~OPTION_MASK_STRING;
3207 if ((rs6000_isa_flags_explicit & OPTION_MASK_STRING) != 0)
3208 warning (0, "-mstring is not supported on little endian systems");
3209 }
3210 }
3211
3212 /* Add some warnings for VSX. */
3213 if (TARGET_VSX)
3214 {
3215 const char *msg = NULL;
3216 if (!TARGET_HARD_FLOAT || !TARGET_FPRS
3217 || !TARGET_SINGLE_FLOAT || !TARGET_DOUBLE_FLOAT)
3218 {
3219 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3220 msg = N_("-mvsx requires hardware floating point");
3221 else
3222 {
3223 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3224 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3225 }
3226 }
3227 else if (TARGET_PAIRED_FLOAT)
3228 msg = N_("-mvsx and -mpaired are incompatible");
3229 else if (TARGET_AVOID_XFORM > 0)
3230 msg = N_("-mvsx needs indexed addressing");
3231 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
3232 & OPTION_MASK_ALTIVEC))
3233 {
3234 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3235 msg = N_("-mvsx and -mno-altivec are incompatible");
3236 else
3237 msg = N_("-mno-altivec disables vsx");
3238 }
3239
3240 if (msg)
3241 {
3242 warning (0, msg);
3243 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3244 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3245 }
3246 }
3247
3248 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
3249 the -mcpu setting to enable options that conflict. */
3250 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
3251 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
3252 | OPTION_MASK_ALTIVEC
3253 | OPTION_MASK_VSX)) != 0)
3254 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
3255 | OPTION_MASK_DIRECT_MOVE)
3256 & ~rs6000_isa_flags_explicit);
3257
3258 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3259 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
3260
3261 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
3262 unless the user explicitly used the -mno-<option> to disable the code. */
3263 if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
3264 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~rs6000_isa_flags_explicit);
3265 else if (TARGET_VSX)
3266 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~rs6000_isa_flags_explicit);
3267 else if (TARGET_POPCNTD)
3268 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~rs6000_isa_flags_explicit);
3269 else if (TARGET_DFP)
3270 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~rs6000_isa_flags_explicit);
3271 else if (TARGET_CMPB)
3272 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~rs6000_isa_flags_explicit);
3273 else if (TARGET_FPRND)
3274 rs6000_isa_flags |= (ISA_2_4_MASKS & ~rs6000_isa_flags_explicit);
3275 else if (TARGET_POPCNTB)
3276 rs6000_isa_flags |= (ISA_2_2_MASKS & ~rs6000_isa_flags_explicit);
3277 else if (TARGET_ALTIVEC)
3278 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~rs6000_isa_flags_explicit);
3279
3280 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
3281 {
3282 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
3283 error ("-mcrypto requires -maltivec");
3284 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
3285 }
3286
3287 if (TARGET_DIRECT_MOVE && !TARGET_VSX)
3288 {
3289 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
3290 error ("-mdirect-move requires -mvsx");
3291 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
3292 }
3293
3294 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
3295 {
3296 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3297 error ("-mpower8-vector requires -maltivec");
3298 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3299 }
3300
3301 if (TARGET_P8_VECTOR && !TARGET_VSX)
3302 {
3303 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3304 error ("-mpower8-vector requires -mvsx");
3305 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3306 }
3307
3308 if (TARGET_VSX_TIMODE && !TARGET_VSX)
3309 {
3310 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE)
3311 error ("-mvsx-timode requires -mvsx");
3312 rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE;
3313 }
3314
3315 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
3316 silently turn off quad memory mode. */
3317 if (TARGET_QUAD_MEMORY && !TARGET_POWERPC64)
3318 {
3319 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3320 warning (0, N_("-mquad-memory requires 64-bit mode"));
3321
3322 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
3323 }
3324
3325 /* Enable power8 fusion if we are tuning for power8, even if we aren't
3326 generating power8 instructions. */
3327 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
3328 rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
3329 & OPTION_MASK_P8_FUSION);
3330
3331 /* Power8 does not fuse sign extended loads with the addis. If we are
3332 optimizing at high levels for speed, convert a sign extended load into a
3333 zero extending load, and an explicit sign extension. */
3334 if (TARGET_P8_FUSION
3335 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
3336 && optimize_function_for_speed_p (cfun)
3337 && optimize >= 3)
3338 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
3339
3340 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3341 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
3342
3343 /* E500mc does "better" if we inline more aggressively. Respect the
3344 user's opinion, though. */
3345 if (rs6000_block_move_inline_limit == 0
3346 && (rs6000_cpu == PROCESSOR_PPCE500MC
3347 || rs6000_cpu == PROCESSOR_PPCE500MC64
3348 || rs6000_cpu == PROCESSOR_PPCE5500
3349 || rs6000_cpu == PROCESSOR_PPCE6500))
3350 rs6000_block_move_inline_limit = 128;
3351
3352 /* store_one_arg depends on expand_block_move to handle at least the
3353 size of reg_parm_stack_space. */
3354 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
3355 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
3356
3357 if (global_init_p)
3358 {
3359 /* If the appropriate debug option is enabled, replace the target hooks
3360 with debug versions that call the real version and then prints
3361 debugging information. */
3362 if (TARGET_DEBUG_COST)
3363 {
3364 targetm.rtx_costs = rs6000_debug_rtx_costs;
3365 targetm.address_cost = rs6000_debug_address_cost;
3366 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
3367 }
3368
3369 if (TARGET_DEBUG_ADDR)
3370 {
3371 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
3372 targetm.legitimize_address = rs6000_debug_legitimize_address;
3373 rs6000_secondary_reload_class_ptr
3374 = rs6000_debug_secondary_reload_class;
3375 rs6000_secondary_memory_needed_ptr
3376 = rs6000_debug_secondary_memory_needed;
3377 rs6000_cannot_change_mode_class_ptr
3378 = rs6000_debug_cannot_change_mode_class;
3379 rs6000_preferred_reload_class_ptr
3380 = rs6000_debug_preferred_reload_class;
3381 rs6000_legitimize_reload_address_ptr
3382 = rs6000_debug_legitimize_reload_address;
3383 rs6000_mode_dependent_address_ptr
3384 = rs6000_debug_mode_dependent_address;
3385 }
3386
3387 if (rs6000_veclibabi_name)
3388 {
3389 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
3390 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
3391 else
3392 {
3393 error ("unknown vectorization library ABI type (%s) for "
3394 "-mveclibabi= switch", rs6000_veclibabi_name);
3395 ret = false;
3396 }
3397 }
3398 }
3399
3400 if (!global_options_set.x_rs6000_long_double_type_size)
3401 {
3402 if (main_target_opt != NULL
3403 && (main_target_opt->x_rs6000_long_double_type_size
3404 != RS6000_DEFAULT_LONG_DOUBLE_SIZE))
3405 error ("target attribute or pragma changes long double size");
3406 else
3407 rs6000_long_double_type_size = RS6000_DEFAULT_LONG_DOUBLE_SIZE;
3408 }
3409
3410 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
3411 if (!global_options_set.x_rs6000_ieeequad)
3412 rs6000_ieeequad = 1;
3413 #endif
3414
3415 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
3416 target attribute or pragma which automatically enables both options,
3417 unless the altivec ABI was set. This is set by default for 64-bit, but
3418 not for 32-bit. */
3419 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
3420 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC)
3421 & ~rs6000_isa_flags_explicit);
3422
3423 /* Enable Altivec ABI for AIX -maltivec. */
3424 if (TARGET_XCOFF && (TARGET_ALTIVEC || TARGET_VSX))
3425 {
3426 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
3427 error ("target attribute or pragma changes AltiVec ABI");
3428 else
3429 rs6000_altivec_abi = 1;
3430 }
3431
3432 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
3433 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
3434 be explicitly overridden in either case. */
3435 if (TARGET_ELF)
3436 {
3437 if (!global_options_set.x_rs6000_altivec_abi
3438 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
3439 {
3440 if (main_target_opt != NULL &&
3441 !main_target_opt->x_rs6000_altivec_abi)
3442 error ("target attribute or pragma changes AltiVec ABI");
3443 else
3444 rs6000_altivec_abi = 1;
3445 }
3446 }
3447
3448 /* Set the Darwin64 ABI as default for 64-bit Darwin.
3449 So far, the only darwin64 targets are also MACH-O. */
3450 if (TARGET_MACHO
3451 && DEFAULT_ABI == ABI_DARWIN
3452 && TARGET_64BIT)
3453 {
3454 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
3455 error ("target attribute or pragma changes darwin64 ABI");
3456 else
3457 {
3458 rs6000_darwin64_abi = 1;
3459 /* Default to natural alignment, for better performance. */
3460 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
3461 }
3462 }
3463
3464 /* Place FP constants in the constant pool instead of TOC
3465 if section anchors enabled. */
3466 if (flag_section_anchors
3467 && !global_options_set.x_TARGET_NO_FP_IN_TOC)
3468 TARGET_NO_FP_IN_TOC = 1;
3469
3470 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3471 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
3472
3473 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3474 SUBTARGET_OVERRIDE_OPTIONS;
3475 #endif
3476 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3477 SUBSUBTARGET_OVERRIDE_OPTIONS;
3478 #endif
3479 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
3480 SUB3TARGET_OVERRIDE_OPTIONS;
3481 #endif
3482
3483 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3484 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
3485
3486 /* For the E500 family of cores, reset the single/double FP flags to let us
3487 check that they remain constant across attributes or pragmas. Also,
3488 clear a possible request for string instructions, not supported and which
3489 we might have silently queried above for -Os.
3490
3491 For other families, clear ISEL in case it was set implicitly.
3492 */
3493
3494 switch (rs6000_cpu)
3495 {
3496 case PROCESSOR_PPC8540:
3497 case PROCESSOR_PPC8548:
3498 case PROCESSOR_PPCE500MC:
3499 case PROCESSOR_PPCE500MC64:
3500 case PROCESSOR_PPCE5500:
3501 case PROCESSOR_PPCE6500:
3502
3503 rs6000_single_float = TARGET_E500_SINGLE || TARGET_E500_DOUBLE;
3504 rs6000_double_float = TARGET_E500_DOUBLE;
3505
3506 rs6000_isa_flags &= ~OPTION_MASK_STRING;
3507
3508 break;
3509
3510 default:
3511
3512 if (have_cpu && !(rs6000_isa_flags_explicit & OPTION_MASK_ISEL))
3513 rs6000_isa_flags &= ~OPTION_MASK_ISEL;
3514
3515 break;
3516 }
3517
3518 if (main_target_opt)
3519 {
3520 if (main_target_opt->x_rs6000_single_float != rs6000_single_float)
3521 error ("target attribute or pragma changes single precision floating "
3522 "point");
3523 if (main_target_opt->x_rs6000_double_float != rs6000_double_float)
3524 error ("target attribute or pragma changes double precision floating "
3525 "point");
3526 }
3527
3528 /* Detect invalid option combinations with E500. */
3529 CHECK_E500_OPTIONS;
3530
3531 rs6000_always_hint = (rs6000_cpu != PROCESSOR_POWER4
3532 && rs6000_cpu != PROCESSOR_POWER5
3533 && rs6000_cpu != PROCESSOR_POWER6
3534 && rs6000_cpu != PROCESSOR_POWER7
3535 && rs6000_cpu != PROCESSOR_POWER8
3536 && rs6000_cpu != PROCESSOR_PPCA2
3537 && rs6000_cpu != PROCESSOR_CELL
3538 && rs6000_cpu != PROCESSOR_PPC476);
3539 rs6000_sched_groups = (rs6000_cpu == PROCESSOR_POWER4
3540 || rs6000_cpu == PROCESSOR_POWER5
3541 || rs6000_cpu == PROCESSOR_POWER7
3542 || rs6000_cpu == PROCESSOR_POWER8);
3543 rs6000_align_branch_targets = (rs6000_cpu == PROCESSOR_POWER4
3544 || rs6000_cpu == PROCESSOR_POWER5
3545 || rs6000_cpu == PROCESSOR_POWER6
3546 || rs6000_cpu == PROCESSOR_POWER7
3547 || rs6000_cpu == PROCESSOR_POWER8
3548 || rs6000_cpu == PROCESSOR_PPCE500MC
3549 || rs6000_cpu == PROCESSOR_PPCE500MC64
3550 || rs6000_cpu == PROCESSOR_PPCE5500
3551 || rs6000_cpu == PROCESSOR_PPCE6500);
3552
3553 /* Allow debug switches to override the above settings. These are set to -1
3554 in rs6000.opt to indicate the user hasn't directly set the switch. */
3555 if (TARGET_ALWAYS_HINT >= 0)
3556 rs6000_always_hint = TARGET_ALWAYS_HINT;
3557
3558 if (TARGET_SCHED_GROUPS >= 0)
3559 rs6000_sched_groups = TARGET_SCHED_GROUPS;
3560
3561 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
3562 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
3563
3564 rs6000_sched_restricted_insns_priority
3565 = (rs6000_sched_groups ? 1 : 0);
3566
3567 /* Handle -msched-costly-dep option. */
3568 rs6000_sched_costly_dep
3569 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
3570
3571 if (rs6000_sched_costly_dep_str)
3572 {
3573 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
3574 rs6000_sched_costly_dep = no_dep_costly;
3575 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
3576 rs6000_sched_costly_dep = all_deps_costly;
3577 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
3578 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
3579 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
3580 rs6000_sched_costly_dep = store_to_load_dep_costly;
3581 else
3582 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
3583 atoi (rs6000_sched_costly_dep_str));
3584 }
3585
3586 /* Handle -minsert-sched-nops option. */
3587 rs6000_sched_insert_nops
3588 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
3589
3590 if (rs6000_sched_insert_nops_str)
3591 {
3592 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
3593 rs6000_sched_insert_nops = sched_finish_none;
3594 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
3595 rs6000_sched_insert_nops = sched_finish_pad_groups;
3596 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
3597 rs6000_sched_insert_nops = sched_finish_regroup_exact;
3598 else
3599 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
3600 atoi (rs6000_sched_insert_nops_str));
3601 }
3602
3603 if (global_init_p)
3604 {
3605 #ifdef TARGET_REGNAMES
3606 /* If the user desires alternate register names, copy in the
3607 alternate names now. */
3608 if (TARGET_REGNAMES)
3609 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
3610 #endif
3611
3612 /* Set aix_struct_return last, after the ABI is determined.
3613 If -maix-struct-return or -msvr4-struct-return was explicitly
3614 used, don't override with the ABI default. */
3615 if (!global_options_set.x_aix_struct_return)
3616 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
3617
3618 #if 0
3619 /* IBM XL compiler defaults to unsigned bitfields. */
3620 if (TARGET_XL_COMPAT)
3621 flag_signed_bitfields = 0;
3622 #endif
3623
3624 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
3625 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
3626
3627 if (TARGET_TOC)
3628 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
3629
3630 /* We can only guarantee the availability of DI pseudo-ops when
3631 assembling for 64-bit targets. */
3632 if (!TARGET_64BIT)
3633 {
3634 targetm.asm_out.aligned_op.di = NULL;
3635 targetm.asm_out.unaligned_op.di = NULL;
3636 }
3637
3638
3639 /* Set branch target alignment, if not optimizing for size. */
3640 if (!optimize_size)
3641 {
3642 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
3643 aligned 8byte to avoid misprediction by the branch predictor. */
3644 if (rs6000_cpu == PROCESSOR_TITAN
3645 || rs6000_cpu == PROCESSOR_CELL)
3646 {
3647 if (align_functions <= 0)
3648 align_functions = 8;
3649 if (align_jumps <= 0)
3650 align_jumps = 8;
3651 if (align_loops <= 0)
3652 align_loops = 8;
3653 }
3654 if (rs6000_align_branch_targets)
3655 {
3656 if (align_functions <= 0)
3657 align_functions = 16;
3658 if (align_jumps <= 0)
3659 align_jumps = 16;
3660 if (align_loops <= 0)
3661 {
3662 can_override_loop_align = 1;
3663 align_loops = 16;
3664 }
3665 }
3666 if (align_jumps_max_skip <= 0)
3667 align_jumps_max_skip = 15;
3668 if (align_loops_max_skip <= 0)
3669 align_loops_max_skip = 15;
3670 }
3671
3672 /* Arrange to save and restore machine status around nested functions. */
3673 init_machine_status = rs6000_init_machine_status;
3674
3675 /* We should always be splitting complex arguments, but we can't break
3676 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
3677 if (DEFAULT_ABI != ABI_AIX)
3678 targetm.calls.split_complex_arg = NULL;
3679 }
3680
3681 /* Initialize rs6000_cost with the appropriate target costs. */
3682 if (optimize_size)
3683 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
3684 else
3685 switch (rs6000_cpu)
3686 {
3687 case PROCESSOR_RS64A:
3688 rs6000_cost = &rs64a_cost;
3689 break;
3690
3691 case PROCESSOR_MPCCORE:
3692 rs6000_cost = &mpccore_cost;
3693 break;
3694
3695 case PROCESSOR_PPC403:
3696 rs6000_cost = &ppc403_cost;
3697 break;
3698
3699 case PROCESSOR_PPC405:
3700 rs6000_cost = &ppc405_cost;
3701 break;
3702
3703 case PROCESSOR_PPC440:
3704 rs6000_cost = &ppc440_cost;
3705 break;
3706
3707 case PROCESSOR_PPC476:
3708 rs6000_cost = &ppc476_cost;
3709 break;
3710
3711 case PROCESSOR_PPC601:
3712 rs6000_cost = &ppc601_cost;
3713 break;
3714
3715 case PROCESSOR_PPC603:
3716 rs6000_cost = &ppc603_cost;
3717 break;
3718
3719 case PROCESSOR_PPC604:
3720 rs6000_cost = &ppc604_cost;
3721 break;
3722
3723 case PROCESSOR_PPC604e:
3724 rs6000_cost = &ppc604e_cost;
3725 break;
3726
3727 case PROCESSOR_PPC620:
3728 rs6000_cost = &ppc620_cost;
3729 break;
3730
3731 case PROCESSOR_PPC630:
3732 rs6000_cost = &ppc630_cost;
3733 break;
3734
3735 case PROCESSOR_CELL:
3736 rs6000_cost = &ppccell_cost;
3737 break;
3738
3739 case PROCESSOR_PPC750:
3740 case PROCESSOR_PPC7400:
3741 rs6000_cost = &ppc750_cost;
3742 break;
3743
3744 case PROCESSOR_PPC7450:
3745 rs6000_cost = &ppc7450_cost;
3746 break;
3747
3748 case PROCESSOR_PPC8540:
3749 case PROCESSOR_PPC8548:
3750 rs6000_cost = &ppc8540_cost;
3751 break;
3752
3753 case PROCESSOR_PPCE300C2:
3754 case PROCESSOR_PPCE300C3:
3755 rs6000_cost = &ppce300c2c3_cost;
3756 break;
3757
3758 case PROCESSOR_PPCE500MC:
3759 rs6000_cost = &ppce500mc_cost;
3760 break;
3761
3762 case PROCESSOR_PPCE500MC64:
3763 rs6000_cost = &ppce500mc64_cost;
3764 break;
3765
3766 case PROCESSOR_PPCE5500:
3767 rs6000_cost = &ppce5500_cost;
3768 break;
3769
3770 case PROCESSOR_PPCE6500:
3771 rs6000_cost = &ppce6500_cost;
3772 break;
3773
3774 case PROCESSOR_TITAN:
3775 rs6000_cost = &titan_cost;
3776 break;
3777
3778 case PROCESSOR_POWER4:
3779 case PROCESSOR_POWER5:
3780 rs6000_cost = &power4_cost;
3781 break;
3782
3783 case PROCESSOR_POWER6:
3784 rs6000_cost = &power6_cost;
3785 break;
3786
3787 case PROCESSOR_POWER7:
3788 rs6000_cost = &power7_cost;
3789 break;
3790
3791 case PROCESSOR_POWER8:
3792 rs6000_cost = &power8_cost;
3793 break;
3794
3795 case PROCESSOR_PPCA2:
3796 rs6000_cost = &ppca2_cost;
3797 break;
3798
3799 default:
3800 gcc_unreachable ();
3801 }
3802
3803 if (global_init_p)
3804 {
3805 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3806 rs6000_cost->simultaneous_prefetches,
3807 global_options.x_param_values,
3808 global_options_set.x_param_values);
3809 maybe_set_param_value (PARAM_L1_CACHE_SIZE, rs6000_cost->l1_cache_size,
3810 global_options.x_param_values,
3811 global_options_set.x_param_values);
3812 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3813 rs6000_cost->cache_line_size,
3814 global_options.x_param_values,
3815 global_options_set.x_param_values);
3816 maybe_set_param_value (PARAM_L2_CACHE_SIZE, rs6000_cost->l2_cache_size,
3817 global_options.x_param_values,
3818 global_options_set.x_param_values);
3819
3820 /* Increase loop peeling limits based on performance analysis. */
3821 maybe_set_param_value (PARAM_MAX_PEELED_INSNS, 400,
3822 global_options.x_param_values,
3823 global_options_set.x_param_values);
3824 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 400,
3825 global_options.x_param_values,
3826 global_options_set.x_param_values);
3827
3828 /* If using typedef char *va_list, signal that
3829 __builtin_va_start (&ap, 0) can be optimized to
3830 ap = __builtin_next_arg (0). */
3831 if (DEFAULT_ABI != ABI_V4)
3832 targetm.expand_builtin_va_start = NULL;
3833 }
3834
3835 /* Set up single/double float flags.
3836 If TARGET_HARD_FLOAT is set, but neither single or double is set,
3837 then set both flags. */
3838 if (TARGET_HARD_FLOAT && TARGET_FPRS
3839 && rs6000_single_float == 0 && rs6000_double_float == 0)
3840 rs6000_single_float = rs6000_double_float = 1;
3841
3842 /* If not explicitly specified via option, decide whether to generate indexed
3843 load/store instructions. */
3844 if (TARGET_AVOID_XFORM == -1)
3845 /* Avoid indexed addressing when targeting Power6 in order to avoid the
3846 DERAT mispredict penalty. However the LVE and STVE altivec instructions
3847 need indexed accesses and the type used is the scalar type of the element
3848 being loaded or stored. */
3849 TARGET_AVOID_XFORM = (rs6000_cpu == PROCESSOR_POWER6 && TARGET_CMPB
3850 && !TARGET_ALTIVEC);
3851
3852 /* Set the -mrecip options. */
3853 if (rs6000_recip_name)
3854 {
3855 char *p = ASTRDUP (rs6000_recip_name);
3856 char *q;
3857 unsigned int mask, i;
3858 bool invert;
3859
3860 while ((q = strtok (p, ",")) != NULL)
3861 {
3862 p = NULL;
3863 if (*q == '!')
3864 {
3865 invert = true;
3866 q++;
3867 }
3868 else
3869 invert = false;
3870
3871 if (!strcmp (q, "default"))
3872 mask = ((TARGET_RECIP_PRECISION)
3873 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
3874 else
3875 {
3876 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
3877 if (!strcmp (q, recip_options[i].string))
3878 {
3879 mask = recip_options[i].mask;
3880 break;
3881 }
3882
3883 if (i == ARRAY_SIZE (recip_options))
3884 {
3885 error ("unknown option for -mrecip=%s", q);
3886 invert = false;
3887 mask = 0;
3888 ret = false;
3889 }
3890 }
3891
3892 if (invert)
3893 rs6000_recip_control &= ~mask;
3894 else
3895 rs6000_recip_control |= mask;
3896 }
3897 }
3898
3899 /* Set the builtin mask of the various options used that could affect which
3900 builtins were used. In the past we used target_flags, but we've run out
3901 of bits, and some options like SPE and PAIRED are no longer in
3902 target_flags. */
3903 rs6000_builtin_mask = rs6000_builtin_mask_calculate ();
3904 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
3905 {
3906 fprintf (stderr,
3907 "new builtin mask = " HOST_WIDE_INT_PRINT_HEX ", ",
3908 rs6000_builtin_mask);
3909 rs6000_print_builtin_options (stderr, 0, NULL, rs6000_builtin_mask);
3910 }
3911
3912 /* Initialize all of the registers. */
3913 rs6000_init_hard_regno_mode_ok (global_init_p);
3914
3915 /* Save the initial options in case the user does function specific options */
3916 if (global_init_p)
3917 target_option_default_node = target_option_current_node
3918 = build_target_option_node (&global_options);
3919
3920 /* If not explicitly specified via option, decide whether to generate the
3921 extra blr's required to preserve the link stack on some cpus (eg, 476). */
3922 if (TARGET_LINK_STACK == -1)
3923 SET_TARGET_LINK_STACK (rs6000_cpu == PROCESSOR_PPC476 && flag_pic);
3924
3925 return ret;
3926 }
3927
3928 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
3929 define the target cpu type. */
3930
3931 static void
3932 rs6000_option_override (void)
3933 {
3934 (void) rs6000_option_override_internal (true);
3935 }
3936
3937 \f
3938 /* Implement targetm.vectorize.builtin_mask_for_load. */
3939 static tree
3940 rs6000_builtin_mask_for_load (void)
3941 {
3942 if (TARGET_ALTIVEC || TARGET_VSX)
3943 return altivec_builtin_mask_for_load;
3944 else
3945 return 0;
3946 }
3947
3948 /* Implement LOOP_ALIGN. */
3949 int
3950 rs6000_loop_align (rtx label)
3951 {
3952 basic_block bb;
3953 int ninsns;
3954
3955 /* Don't override loop alignment if -falign-loops was specified. */
3956 if (!can_override_loop_align)
3957 return align_loops_log;
3958
3959 bb = BLOCK_FOR_INSN (label);
3960 ninsns = num_loop_insns(bb->loop_father);
3961
3962 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
3963 if (ninsns > 4 && ninsns <= 8
3964 && (rs6000_cpu == PROCESSOR_POWER4
3965 || rs6000_cpu == PROCESSOR_POWER5
3966 || rs6000_cpu == PROCESSOR_POWER6
3967 || rs6000_cpu == PROCESSOR_POWER7
3968 || rs6000_cpu == PROCESSOR_POWER8))
3969 return 5;
3970 else
3971 return align_loops_log;
3972 }
3973
3974 /* Implement TARGET_LOOP_ALIGN_MAX_SKIP. */
3975 static int
3976 rs6000_loop_align_max_skip (rtx label)
3977 {
3978 return (1 << rs6000_loop_align (label)) - 1;
3979 }
3980
3981 /* Return true iff, data reference of TYPE can reach vector alignment (16)
3982 after applying N number of iterations. This routine does not determine
3983 how may iterations are required to reach desired alignment. */
3984
3985 static bool
3986 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
3987 {
3988 if (is_packed)
3989 return false;
3990
3991 if (TARGET_32BIT)
3992 {
3993 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
3994 return true;
3995
3996 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
3997 return true;
3998
3999 return false;
4000 }
4001 else
4002 {
4003 if (TARGET_MACHO)
4004 return false;
4005
4006 /* Assuming that all other types are naturally aligned. CHECKME! */
4007 return true;
4008 }
4009 }
4010
4011 /* Return true if the vector misalignment factor is supported by the
4012 target. */
4013 static bool
4014 rs6000_builtin_support_vector_misalignment (enum machine_mode mode,
4015 const_tree type,
4016 int misalignment,
4017 bool is_packed)
4018 {
4019 if (TARGET_VSX)
4020 {
4021 /* Return if movmisalign pattern is not supported for this mode. */
4022 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
4023 return false;
4024
4025 if (misalignment == -1)
4026 {
4027 /* Misalignment factor is unknown at compile time but we know
4028 it's word aligned. */
4029 if (rs6000_vector_alignment_reachable (type, is_packed))
4030 {
4031 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
4032
4033 if (element_size == 64 || element_size == 32)
4034 return true;
4035 }
4036
4037 return false;
4038 }
4039
4040 /* VSX supports word-aligned vector. */
4041 if (misalignment % 4 == 0)
4042 return true;
4043 }
4044 return false;
4045 }
4046
4047 /* Implement targetm.vectorize.builtin_vectorization_cost. */
4048 static int
4049 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
4050 tree vectype, int misalign)
4051 {
4052 unsigned elements;
4053 tree elem_type;
4054
4055 switch (type_of_cost)
4056 {
4057 case scalar_stmt:
4058 case scalar_load:
4059 case scalar_store:
4060 case vector_stmt:
4061 case vector_load:
4062 case vector_store:
4063 case vec_to_scalar:
4064 case scalar_to_vec:
4065 case cond_branch_not_taken:
4066 return 1;
4067
4068 case vec_perm:
4069 if (TARGET_VSX)
4070 return 3;
4071 else
4072 return 1;
4073
4074 case vec_promote_demote:
4075 if (TARGET_VSX)
4076 return 4;
4077 else
4078 return 1;
4079
4080 case cond_branch_taken:
4081 return 3;
4082
4083 case unaligned_load:
4084 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
4085 {
4086 elements = TYPE_VECTOR_SUBPARTS (vectype);
4087 if (elements == 2)
4088 /* Double word aligned. */
4089 return 2;
4090
4091 if (elements == 4)
4092 {
4093 switch (misalign)
4094 {
4095 case 8:
4096 /* Double word aligned. */
4097 return 2;
4098
4099 case -1:
4100 /* Unknown misalignment. */
4101 case 4:
4102 case 12:
4103 /* Word aligned. */
4104 return 22;
4105
4106 default:
4107 gcc_unreachable ();
4108 }
4109 }
4110 }
4111
4112 if (TARGET_ALTIVEC)
4113 /* Misaligned loads are not supported. */
4114 gcc_unreachable ();
4115
4116 return 2;
4117
4118 case unaligned_store:
4119 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
4120 {
4121 elements = TYPE_VECTOR_SUBPARTS (vectype);
4122 if (elements == 2)
4123 /* Double word aligned. */
4124 return 2;
4125
4126 if (elements == 4)
4127 {
4128 switch (misalign)
4129 {
4130 case 8:
4131 /* Double word aligned. */
4132 return 2;
4133
4134 case -1:
4135 /* Unknown misalignment. */
4136 case 4:
4137 case 12:
4138 /* Word aligned. */
4139 return 23;
4140
4141 default:
4142 gcc_unreachable ();
4143 }
4144 }
4145 }
4146
4147 if (TARGET_ALTIVEC)
4148 /* Misaligned stores are not supported. */
4149 gcc_unreachable ();
4150
4151 return 2;
4152
4153 case vec_construct:
4154 elements = TYPE_VECTOR_SUBPARTS (vectype);
4155 elem_type = TREE_TYPE (vectype);
4156 /* 32-bit vectors loaded into registers are stored as double
4157 precision, so we need n/2 converts in addition to the usual
4158 n/2 merges to construct a vector of short floats from them. */
4159 if (SCALAR_FLOAT_TYPE_P (elem_type)
4160 && TYPE_PRECISION (elem_type) == 32)
4161 return elements + 1;
4162 else
4163 return elements / 2 + 1;
4164
4165 default:
4166 gcc_unreachable ();
4167 }
4168 }
4169
4170 /* Implement targetm.vectorize.preferred_simd_mode. */
4171
4172 static enum machine_mode
4173 rs6000_preferred_simd_mode (enum machine_mode mode)
4174 {
4175 if (TARGET_VSX)
4176 switch (mode)
4177 {
4178 case DFmode:
4179 return V2DFmode;
4180 default:;
4181 }
4182 if (TARGET_ALTIVEC || TARGET_VSX)
4183 switch (mode)
4184 {
4185 case SFmode:
4186 return V4SFmode;
4187 case DImode:
4188 return V2DImode;
4189 case SImode:
4190 return V4SImode;
4191 case HImode:
4192 return V8HImode;
4193 case QImode:
4194 return V16QImode;
4195 default:;
4196 }
4197 if (TARGET_SPE)
4198 switch (mode)
4199 {
4200 case SFmode:
4201 return V2SFmode;
4202 case SImode:
4203 return V2SImode;
4204 default:;
4205 }
4206 if (TARGET_PAIRED_FLOAT
4207 && mode == SFmode)
4208 return V2SFmode;
4209 return word_mode;
4210 }
4211
4212 typedef struct _rs6000_cost_data
4213 {
4214 struct loop *loop_info;
4215 unsigned cost[3];
4216 } rs6000_cost_data;
4217
4218 /* Test for likely overcommitment of vector hardware resources. If a
4219 loop iteration is relatively large, and too large a percentage of
4220 instructions in the loop are vectorized, the cost model may not
4221 adequately reflect delays from unavailable vector resources.
4222 Penalize the loop body cost for this case. */
4223
4224 static void
4225 rs6000_density_test (rs6000_cost_data *data)
4226 {
4227 const int DENSITY_PCT_THRESHOLD = 85;
4228 const int DENSITY_SIZE_THRESHOLD = 70;
4229 const int DENSITY_PENALTY = 10;
4230 struct loop *loop = data->loop_info;
4231 basic_block *bbs = get_loop_body (loop);
4232 int nbbs = loop->num_nodes;
4233 int vec_cost = data->cost[vect_body], not_vec_cost = 0;
4234 int i, density_pct;
4235
4236 for (i = 0; i < nbbs; i++)
4237 {
4238 basic_block bb = bbs[i];
4239 gimple_stmt_iterator gsi;
4240
4241 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4242 {
4243 gimple stmt = gsi_stmt (gsi);
4244 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4245
4246 if (!STMT_VINFO_RELEVANT_P (stmt_info)
4247 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
4248 not_vec_cost++;
4249 }
4250 }
4251
4252 free (bbs);
4253 density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
4254
4255 if (density_pct > DENSITY_PCT_THRESHOLD
4256 && vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD)
4257 {
4258 data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100;
4259 if (dump_enabled_p ())
4260 dump_printf_loc (MSG_NOTE, vect_location,
4261 "density %d%%, cost %d exceeds threshold, penalizing "
4262 "loop body cost by %d%%", density_pct,
4263 vec_cost + not_vec_cost, DENSITY_PENALTY);
4264 }
4265 }
4266
4267 /* Implement targetm.vectorize.init_cost. */
4268
4269 static void *
4270 rs6000_init_cost (struct loop *loop_info)
4271 {
4272 rs6000_cost_data *data = XNEW (struct _rs6000_cost_data);
4273 data->loop_info = loop_info;
4274 data->cost[vect_prologue] = 0;
4275 data->cost[vect_body] = 0;
4276 data->cost[vect_epilogue] = 0;
4277 return data;
4278 }
4279
4280 /* Implement targetm.vectorize.add_stmt_cost. */
4281
4282 static unsigned
4283 rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
4284 struct _stmt_vec_info *stmt_info, int misalign,
4285 enum vect_cost_model_location where)
4286 {
4287 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
4288 unsigned retval = 0;
4289
4290 if (flag_vect_cost_model)
4291 {
4292 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
4293 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
4294 misalign);
4295 /* Statements in an inner loop relative to the loop being
4296 vectorized are weighted more heavily. The value here is
4297 arbitrary and could potentially be improved with analysis. */
4298 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
4299 count *= 50; /* FIXME. */
4300
4301 retval = (unsigned) (count * stmt_cost);
4302 cost_data->cost[where] += retval;
4303 }
4304
4305 return retval;
4306 }
4307
4308 /* Implement targetm.vectorize.finish_cost. */
4309
4310 static void
4311 rs6000_finish_cost (void *data, unsigned *prologue_cost,
4312 unsigned *body_cost, unsigned *epilogue_cost)
4313 {
4314 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
4315
4316 if (cost_data->loop_info)
4317 rs6000_density_test (cost_data);
4318
4319 *prologue_cost = cost_data->cost[vect_prologue];
4320 *body_cost = cost_data->cost[vect_body];
4321 *epilogue_cost = cost_data->cost[vect_epilogue];
4322 }
4323
4324 /* Implement targetm.vectorize.destroy_cost_data. */
4325
4326 static void
4327 rs6000_destroy_cost_data (void *data)
4328 {
4329 free (data);
4330 }
4331
4332 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
4333 library with vectorized intrinsics. */
4334
4335 static tree
4336 rs6000_builtin_vectorized_libmass (tree fndecl, tree type_out, tree type_in)
4337 {
4338 char name[32];
4339 const char *suffix = NULL;
4340 tree fntype, new_fndecl, bdecl = NULL_TREE;
4341 int n_args = 1;
4342 const char *bname;
4343 enum machine_mode el_mode, in_mode;
4344 int n, in_n;
4345
4346 /* Libmass is suitable for unsafe math only as it does not correctly support
4347 parts of IEEE with the required precision such as denormals. Only support
4348 it if we have VSX to use the simd d2 or f4 functions.
4349 XXX: Add variable length support. */
4350 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
4351 return NULL_TREE;
4352
4353 el_mode = TYPE_MODE (TREE_TYPE (type_out));
4354 n = TYPE_VECTOR_SUBPARTS (type_out);
4355 in_mode = TYPE_MODE (TREE_TYPE (type_in));
4356 in_n = TYPE_VECTOR_SUBPARTS (type_in);
4357 if (el_mode != in_mode
4358 || n != in_n)
4359 return NULL_TREE;
4360
4361 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
4362 {
4363 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
4364 switch (fn)
4365 {
4366 case BUILT_IN_ATAN2:
4367 case BUILT_IN_HYPOT:
4368 case BUILT_IN_POW:
4369 n_args = 2;
4370 /* fall through */
4371
4372 case BUILT_IN_ACOS:
4373 case BUILT_IN_ACOSH:
4374 case BUILT_IN_ASIN:
4375 case BUILT_IN_ASINH:
4376 case BUILT_IN_ATAN:
4377 case BUILT_IN_ATANH:
4378 case BUILT_IN_CBRT:
4379 case BUILT_IN_COS:
4380 case BUILT_IN_COSH:
4381 case BUILT_IN_ERF:
4382 case BUILT_IN_ERFC:
4383 case BUILT_IN_EXP2:
4384 case BUILT_IN_EXP:
4385 case BUILT_IN_EXPM1:
4386 case BUILT_IN_LGAMMA:
4387 case BUILT_IN_LOG10:
4388 case BUILT_IN_LOG1P:
4389 case BUILT_IN_LOG2:
4390 case BUILT_IN_LOG:
4391 case BUILT_IN_SIN:
4392 case BUILT_IN_SINH:
4393 case BUILT_IN_SQRT:
4394 case BUILT_IN_TAN:
4395 case BUILT_IN_TANH:
4396 bdecl = builtin_decl_implicit (fn);
4397 suffix = "d2"; /* pow -> powd2 */
4398 if (el_mode != DFmode
4399 || n != 2
4400 || !bdecl)
4401 return NULL_TREE;
4402 break;
4403
4404 case BUILT_IN_ATAN2F:
4405 case BUILT_IN_HYPOTF:
4406 case BUILT_IN_POWF:
4407 n_args = 2;
4408 /* fall through */
4409
4410 case BUILT_IN_ACOSF:
4411 case BUILT_IN_ACOSHF:
4412 case BUILT_IN_ASINF:
4413 case BUILT_IN_ASINHF:
4414 case BUILT_IN_ATANF:
4415 case BUILT_IN_ATANHF:
4416 case BUILT_IN_CBRTF:
4417 case BUILT_IN_COSF:
4418 case BUILT_IN_COSHF:
4419 case BUILT_IN_ERFF:
4420 case BUILT_IN_ERFCF:
4421 case BUILT_IN_EXP2F:
4422 case BUILT_IN_EXPF:
4423 case BUILT_IN_EXPM1F:
4424 case BUILT_IN_LGAMMAF:
4425 case BUILT_IN_LOG10F:
4426 case BUILT_IN_LOG1PF:
4427 case BUILT_IN_LOG2F:
4428 case BUILT_IN_LOGF:
4429 case BUILT_IN_SINF:
4430 case BUILT_IN_SINHF:
4431 case BUILT_IN_SQRTF:
4432 case BUILT_IN_TANF:
4433 case BUILT_IN_TANHF:
4434 bdecl = builtin_decl_implicit (fn);
4435 suffix = "4"; /* powf -> powf4 */
4436 if (el_mode != SFmode
4437 || n != 4
4438 || !bdecl)
4439 return NULL_TREE;
4440 break;
4441
4442 default:
4443 return NULL_TREE;
4444 }
4445 }
4446 else
4447 return NULL_TREE;
4448
4449 gcc_assert (suffix != NULL);
4450 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
4451 if (!bname)
4452 return NULL_TREE;
4453
4454 strcpy (name, bname + sizeof ("__builtin_") - 1);
4455 strcat (name, suffix);
4456
4457 if (n_args == 1)
4458 fntype = build_function_type_list (type_out, type_in, NULL);
4459 else if (n_args == 2)
4460 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
4461 else
4462 gcc_unreachable ();
4463
4464 /* Build a function declaration for the vectorized function. */
4465 new_fndecl = build_decl (BUILTINS_LOCATION,
4466 FUNCTION_DECL, get_identifier (name), fntype);
4467 TREE_PUBLIC (new_fndecl) = 1;
4468 DECL_EXTERNAL (new_fndecl) = 1;
4469 DECL_IS_NOVOPS (new_fndecl) = 1;
4470 TREE_READONLY (new_fndecl) = 1;
4471
4472 return new_fndecl;
4473 }
4474
4475 /* Returns a function decl for a vectorized version of the builtin function
4476 with builtin function code FN and the result vector type TYPE, or NULL_TREE
4477 if it is not available. */
4478
4479 static tree
4480 rs6000_builtin_vectorized_function (tree fndecl, tree type_out,
4481 tree type_in)
4482 {
4483 enum machine_mode in_mode, out_mode;
4484 int in_n, out_n;
4485
4486 if (TARGET_DEBUG_BUILTIN)
4487 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
4488 IDENTIFIER_POINTER (DECL_NAME (fndecl)),
4489 GET_MODE_NAME (TYPE_MODE (type_out)),
4490 GET_MODE_NAME (TYPE_MODE (type_in)));
4491
4492 if (TREE_CODE (type_out) != VECTOR_TYPE
4493 || TREE_CODE (type_in) != VECTOR_TYPE
4494 || !TARGET_VECTORIZE_BUILTINS)
4495 return NULL_TREE;
4496
4497 out_mode = TYPE_MODE (TREE_TYPE (type_out));
4498 out_n = TYPE_VECTOR_SUBPARTS (type_out);
4499 in_mode = TYPE_MODE (TREE_TYPE (type_in));
4500 in_n = TYPE_VECTOR_SUBPARTS (type_in);
4501
4502 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
4503 {
4504 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
4505 switch (fn)
4506 {
4507 case BUILT_IN_CLZIMAX:
4508 case BUILT_IN_CLZLL:
4509 case BUILT_IN_CLZL:
4510 case BUILT_IN_CLZ:
4511 if (TARGET_P8_VECTOR && in_mode == out_mode && out_n == in_n)
4512 {
4513 if (out_mode == QImode && out_n == 16)
4514 return rs6000_builtin_decls[P8V_BUILTIN_VCLZB];
4515 else if (out_mode == HImode && out_n == 8)
4516 return rs6000_builtin_decls[P8V_BUILTIN_VCLZH];
4517 else if (out_mode == SImode && out_n == 4)
4518 return rs6000_builtin_decls[P8V_BUILTIN_VCLZW];
4519 else if (out_mode == DImode && out_n == 2)
4520 return rs6000_builtin_decls[P8V_BUILTIN_VCLZD];
4521 }
4522 break;
4523 case BUILT_IN_COPYSIGN:
4524 if (VECTOR_UNIT_VSX_P (V2DFmode)
4525 && out_mode == DFmode && out_n == 2
4526 && in_mode == DFmode && in_n == 2)
4527 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNDP];
4528 break;
4529 case BUILT_IN_COPYSIGNF:
4530 if (out_mode != SFmode || out_n != 4
4531 || in_mode != SFmode || in_n != 4)
4532 break;
4533 if (VECTOR_UNIT_VSX_P (V4SFmode))
4534 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNSP];
4535 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
4536 return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
4537 break;
4538 case BUILT_IN_POPCOUNTIMAX:
4539 case BUILT_IN_POPCOUNTLL:
4540 case BUILT_IN_POPCOUNTL:
4541 case BUILT_IN_POPCOUNT:
4542 if (TARGET_P8_VECTOR && in_mode == out_mode && out_n == in_n)
4543 {
4544 if (out_mode == QImode && out_n == 16)
4545 return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTB];
4546 else if (out_mode == HImode && out_n == 8)
4547 return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTH];
4548 else if (out_mode == SImode && out_n == 4)
4549 return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTW];
4550 else if (out_mode == DImode && out_n == 2)
4551 return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTD];
4552 }
4553 break;
4554 case BUILT_IN_SQRT:
4555 if (VECTOR_UNIT_VSX_P (V2DFmode)
4556 && out_mode == DFmode && out_n == 2
4557 && in_mode == DFmode && in_n == 2)
4558 return rs6000_builtin_decls[VSX_BUILTIN_XVSQRTDP];
4559 break;
4560 case BUILT_IN_SQRTF:
4561 if (VECTOR_UNIT_VSX_P (V4SFmode)
4562 && out_mode == SFmode && out_n == 4
4563 && in_mode == SFmode && in_n == 4)
4564 return rs6000_builtin_decls[VSX_BUILTIN_XVSQRTSP];
4565 break;
4566 case BUILT_IN_CEIL:
4567 if (VECTOR_UNIT_VSX_P (V2DFmode)
4568 && out_mode == DFmode && out_n == 2
4569 && in_mode == DFmode && in_n == 2)
4570 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIP];
4571 break;
4572 case BUILT_IN_CEILF:
4573 if (out_mode != SFmode || out_n != 4
4574 || in_mode != SFmode || in_n != 4)
4575 break;
4576 if (VECTOR_UNIT_VSX_P (V4SFmode))
4577 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIP];
4578 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
4579 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIP];
4580 break;
4581 case BUILT_IN_FLOOR:
4582 if (VECTOR_UNIT_VSX_P (V2DFmode)
4583 && out_mode == DFmode && out_n == 2
4584 && in_mode == DFmode && in_n == 2)
4585 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIM];
4586 break;
4587 case BUILT_IN_FLOORF:
4588 if (out_mode != SFmode || out_n != 4
4589 || in_mode != SFmode || in_n != 4)
4590 break;
4591 if (VECTOR_UNIT_VSX_P (V4SFmode))
4592 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIM];
4593 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
4594 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM];
4595 break;
4596 case BUILT_IN_FMA:
4597 if (VECTOR_UNIT_VSX_P (V2DFmode)
4598 && out_mode == DFmode && out_n == 2
4599 && in_mode == DFmode && in_n == 2)
4600 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDDP];
4601 break;
4602 case BUILT_IN_FMAF:
4603 if (VECTOR_UNIT_VSX_P (V4SFmode)
4604 && out_mode == SFmode && out_n == 4
4605 && in_mode == SFmode && in_n == 4)
4606 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDSP];
4607 else if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
4608 && out_mode == SFmode && out_n == 4
4609 && in_mode == SFmode && in_n == 4)
4610 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMADDFP];
4611 break;
4612 case BUILT_IN_TRUNC:
4613 if (VECTOR_UNIT_VSX_P (V2DFmode)
4614 && out_mode == DFmode && out_n == 2
4615 && in_mode == DFmode && in_n == 2)
4616 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIZ];
4617 break;
4618 case BUILT_IN_TRUNCF:
4619 if (out_mode != SFmode || out_n != 4
4620 || in_mode != SFmode || in_n != 4)
4621 break;
4622 if (VECTOR_UNIT_VSX_P (V4SFmode))
4623 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIZ];
4624 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
4625 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIZ];
4626 break;
4627 case BUILT_IN_NEARBYINT:
4628 if (VECTOR_UNIT_VSX_P (V2DFmode)
4629 && flag_unsafe_math_optimizations
4630 && out_mode == DFmode && out_n == 2
4631 && in_mode == DFmode && in_n == 2)
4632 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPI];
4633 break;
4634 case BUILT_IN_NEARBYINTF:
4635 if (VECTOR_UNIT_VSX_P (V4SFmode)
4636 && flag_unsafe_math_optimizations
4637 && out_mode == SFmode && out_n == 4
4638 && in_mode == SFmode && in_n == 4)
4639 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPI];
4640 break;
4641 case BUILT_IN_RINT:
4642 if (VECTOR_UNIT_VSX_P (V2DFmode)
4643 && !flag_trapping_math
4644 && out_mode == DFmode && out_n == 2
4645 && in_mode == DFmode && in_n == 2)
4646 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIC];
4647 break;
4648 case BUILT_IN_RINTF:
4649 if (VECTOR_UNIT_VSX_P (V4SFmode)
4650 && !flag_trapping_math
4651 && out_mode == SFmode && out_n == 4
4652 && in_mode == SFmode && in_n == 4)
4653 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIC];
4654 break;
4655 default:
4656 break;
4657 }
4658 }
4659
4660 else if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
4661 {
4662 enum rs6000_builtins fn
4663 = (enum rs6000_builtins)DECL_FUNCTION_CODE (fndecl);
4664 switch (fn)
4665 {
4666 case RS6000_BUILTIN_RSQRTF:
4667 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
4668 && out_mode == SFmode && out_n == 4
4669 && in_mode == SFmode && in_n == 4)
4670 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRSQRTFP];
4671 break;
4672 case RS6000_BUILTIN_RSQRT:
4673 if (VECTOR_UNIT_VSX_P (V2DFmode)
4674 && out_mode == DFmode && out_n == 2
4675 && in_mode == DFmode && in_n == 2)
4676 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
4677 break;
4678 case RS6000_BUILTIN_RECIPF:
4679 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
4680 && out_mode == SFmode && out_n == 4
4681 && in_mode == SFmode && in_n == 4)
4682 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRECIPFP];
4683 break;
4684 case RS6000_BUILTIN_RECIP:
4685 if (VECTOR_UNIT_VSX_P (V2DFmode)
4686 && out_mode == DFmode && out_n == 2
4687 && in_mode == DFmode && in_n == 2)
4688 return rs6000_builtin_decls[VSX_BUILTIN_RECIP_V2DF];
4689 break;
4690 default:
4691 break;
4692 }
4693 }
4694
4695 /* Generate calls to libmass if appropriate. */
4696 if (rs6000_veclib_handler)
4697 return rs6000_veclib_handler (fndecl, type_out, type_in);
4698
4699 return NULL_TREE;
4700 }
4701 \f
4702 /* Default CPU string for rs6000*_file_start functions. */
4703 static const char *rs6000_default_cpu;
4704
4705 /* Do anything needed at the start of the asm file. */
4706
4707 static void
4708 rs6000_file_start (void)
4709 {
4710 char buffer[80];
4711 const char *start = buffer;
4712 FILE *file = asm_out_file;
4713
4714 rs6000_default_cpu = TARGET_CPU_DEFAULT;
4715
4716 default_file_start ();
4717
4718 if (flag_verbose_asm)
4719 {
4720 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
4721
4722 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
4723 {
4724 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
4725 start = "";
4726 }
4727
4728 if (global_options_set.x_rs6000_cpu_index)
4729 {
4730 fprintf (file, "%s -mcpu=%s", start,
4731 processor_target_table[rs6000_cpu_index].name);
4732 start = "";
4733 }
4734
4735 if (global_options_set.x_rs6000_tune_index)
4736 {
4737 fprintf (file, "%s -mtune=%s", start,
4738 processor_target_table[rs6000_tune_index].name);
4739 start = "";
4740 }
4741
4742 if (PPC405_ERRATUM77)
4743 {
4744 fprintf (file, "%s PPC405CR_ERRATUM77", start);
4745 start = "";
4746 }
4747
4748 #ifdef USING_ELFOS_H
4749 switch (rs6000_sdata)
4750 {
4751 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
4752 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
4753 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
4754 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
4755 }
4756
4757 if (rs6000_sdata && g_switch_value)
4758 {
4759 fprintf (file, "%s -G %d", start,
4760 g_switch_value);
4761 start = "";
4762 }
4763 #endif
4764
4765 if (*start == '\0')
4766 putc ('\n', file);
4767 }
4768
4769 if (DEFAULT_ABI == ABI_AIX || (TARGET_ELF && flag_pic == 2))
4770 {
4771 switch_to_section (toc_section);
4772 switch_to_section (text_section);
4773 }
4774 }
4775
4776 \f
4777 /* Return nonzero if this function is known to have a null epilogue. */
4778
4779 int
4780 direct_return (void)
4781 {
4782 if (reload_completed)
4783 {
4784 rs6000_stack_t *info = rs6000_stack_info ();
4785
4786 if (info->first_gp_reg_save == 32
4787 && info->first_fp_reg_save == 64
4788 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
4789 && ! info->lr_save_p
4790 && ! info->cr_save_p
4791 && info->vrsave_mask == 0
4792 && ! info->push_p)
4793 return 1;
4794 }
4795
4796 return 0;
4797 }
4798
4799 /* Return the number of instructions it takes to form a constant in an
4800 integer register. */
4801
4802 int
4803 num_insns_constant_wide (HOST_WIDE_INT value)
4804 {
4805 /* signed constant loadable with addi */
4806 if ((unsigned HOST_WIDE_INT) (value + 0x8000) < 0x10000)
4807 return 1;
4808
4809 /* constant loadable with addis */
4810 else if ((value & 0xffff) == 0
4811 && (value >> 31 == -1 || value >> 31 == 0))
4812 return 1;
4813
4814 else if (TARGET_POWERPC64)
4815 {
4816 HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
4817 HOST_WIDE_INT high = value >> 31;
4818
4819 if (high == 0 || high == -1)
4820 return 2;
4821
4822 high >>= 1;
4823
4824 if (low == 0)
4825 return num_insns_constant_wide (high) + 1;
4826 else if (high == 0)
4827 return num_insns_constant_wide (low) + 1;
4828 else
4829 return (num_insns_constant_wide (high)
4830 + num_insns_constant_wide (low) + 1);
4831 }
4832
4833 else
4834 return 2;
4835 }
4836
4837 int
4838 num_insns_constant (rtx op, enum machine_mode mode)
4839 {
4840 HOST_WIDE_INT low, high;
4841
4842 switch (GET_CODE (op))
4843 {
4844 case CONST_INT:
4845 if ((INTVAL (op) >> 31) != 0 && (INTVAL (op) >> 31) != -1
4846 && mask64_operand (op, mode))
4847 return 2;
4848 else
4849 return num_insns_constant_wide (INTVAL (op));
4850
4851 case CONST_DOUBLE:
4852 if (mode == SFmode || mode == SDmode)
4853 {
4854 long l;
4855 REAL_VALUE_TYPE rv;
4856
4857 REAL_VALUE_FROM_CONST_DOUBLE (rv, op);
4858 if (DECIMAL_FLOAT_MODE_P (mode))
4859 REAL_VALUE_TO_TARGET_DECIMAL32 (rv, l);
4860 else
4861 REAL_VALUE_TO_TARGET_SINGLE (rv, l);
4862 return num_insns_constant_wide ((HOST_WIDE_INT) l);
4863 }
4864
4865 long l[2];
4866 REAL_VALUE_TYPE rv;
4867
4868 REAL_VALUE_FROM_CONST_DOUBLE (rv, op);
4869 if (DECIMAL_FLOAT_MODE_P (mode))
4870 REAL_VALUE_TO_TARGET_DECIMAL64 (rv, l);
4871 else
4872 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
4873 high = l[WORDS_BIG_ENDIAN == 0];
4874 low = l[WORDS_BIG_ENDIAN != 0];
4875
4876 if (TARGET_32BIT)
4877 return (num_insns_constant_wide (low)
4878 + num_insns_constant_wide (high));
4879 else
4880 {
4881 if ((high == 0 && low >= 0)
4882 || (high == -1 && low < 0))
4883 return num_insns_constant_wide (low);
4884
4885 else if (mask64_operand (op, mode))
4886 return 2;
4887
4888 else if (low == 0)
4889 return num_insns_constant_wide (high) + 1;
4890
4891 else
4892 return (num_insns_constant_wide (high)
4893 + num_insns_constant_wide (low) + 1);
4894 }
4895
4896 default:
4897 gcc_unreachable ();
4898 }
4899 }
4900
4901 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
4902 If the mode of OP is MODE_VECTOR_INT, this simply returns the
4903 corresponding element of the vector, but for V4SFmode and V2SFmode,
4904 the corresponding "float" is interpreted as an SImode integer. */
4905
4906 HOST_WIDE_INT
4907 const_vector_elt_as_int (rtx op, unsigned int elt)
4908 {
4909 rtx tmp;
4910
4911 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
4912 gcc_assert (GET_MODE (op) != V2DImode
4913 && GET_MODE (op) != V2DFmode);
4914
4915 tmp = CONST_VECTOR_ELT (op, elt);
4916 if (GET_MODE (op) == V4SFmode
4917 || GET_MODE (op) == V2SFmode)
4918 tmp = gen_lowpart (SImode, tmp);
4919 return INTVAL (tmp);
4920 }
4921
4922 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
4923 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
4924 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
4925 all items are set to the same value and contain COPIES replicas of the
4926 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
4927 operand and the others are set to the value of the operand's msb. */
4928
4929 static bool
4930 vspltis_constant (rtx op, unsigned step, unsigned copies)
4931 {
4932 enum machine_mode mode = GET_MODE (op);
4933 enum machine_mode inner = GET_MODE_INNER (mode);
4934
4935 unsigned i;
4936 unsigned nunits;
4937 unsigned bitsize;
4938 unsigned mask;
4939
4940 HOST_WIDE_INT val;
4941 HOST_WIDE_INT splat_val;
4942 HOST_WIDE_INT msb_val;
4943
4944 if (mode == V2DImode || mode == V2DFmode)
4945 return false;
4946
4947 nunits = GET_MODE_NUNITS (mode);
4948 bitsize = GET_MODE_BITSIZE (inner);
4949 mask = GET_MODE_MASK (inner);
4950
4951 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
4952 splat_val = val;
4953 msb_val = val > 0 ? 0 : -1;
4954
4955 /* Construct the value to be splatted, if possible. If not, return 0. */
4956 for (i = 2; i <= copies; i *= 2)
4957 {
4958 HOST_WIDE_INT small_val;
4959 bitsize /= 2;
4960 small_val = splat_val >> bitsize;
4961 mask >>= bitsize;
4962 if (splat_val != ((small_val << bitsize) | (small_val & mask)))
4963 return false;
4964 splat_val = small_val;
4965 }
4966
4967 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
4968 if (EASY_VECTOR_15 (splat_val))
4969 ;
4970
4971 /* Also check if we can splat, and then add the result to itself. Do so if
4972 the value is positive, of if the splat instruction is using OP's mode;
4973 for splat_val < 0, the splat and the add should use the same mode. */
4974 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
4975 && (splat_val >= 0 || (step == 1 && copies == 1)))
4976 ;
4977
4978 /* Also check if are loading up the most significant bit which can be done by
4979 loading up -1 and shifting the value left by -1. */
4980 else if (EASY_VECTOR_MSB (splat_val, inner))
4981 ;
4982
4983 else
4984 return false;
4985
4986 /* Check if VAL is present in every STEP-th element, and the
4987 other elements are filled with its most significant bit. */
4988 for (i = 1; i < nunits; ++i)
4989 {
4990 HOST_WIDE_INT desired_val;
4991 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
4992 if ((i & (step - 1)) == 0)
4993 desired_val = val;
4994 else
4995 desired_val = msb_val;
4996
4997 if (desired_val != const_vector_elt_as_int (op, elt))
4998 return false;
4999 }
5000
5001 return true;
5002 }
5003
5004
5005 /* Return true if OP is of the given MODE and can be synthesized
5006 with a vspltisb, vspltish or vspltisw. */
5007
5008 bool
5009 easy_altivec_constant (rtx op, enum machine_mode mode)
5010 {
5011 unsigned step, copies;
5012
5013 if (mode == VOIDmode)
5014 mode = GET_MODE (op);
5015 else if (mode != GET_MODE (op))
5016 return false;
5017
5018 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
5019 constants. */
5020 if (mode == V2DFmode)
5021 return zero_constant (op, mode);
5022
5023 if (mode == V2DImode)
5024 {
5025 /* In case the compiler is built 32-bit, CONST_DOUBLE constants are not
5026 easy. */
5027 if (GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
5028 || GET_CODE (CONST_VECTOR_ELT (op, 1)) != CONST_INT)
5029 return false;
5030
5031 if (zero_constant (op, mode))
5032 return true;
5033
5034 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
5035 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
5036 return true;
5037
5038 return false;
5039 }
5040
5041 /* Start with a vspltisw. */
5042 step = GET_MODE_NUNITS (mode) / 4;
5043 copies = 1;
5044
5045 if (vspltis_constant (op, step, copies))
5046 return true;
5047
5048 /* Then try with a vspltish. */
5049 if (step == 1)
5050 copies <<= 1;
5051 else
5052 step >>= 1;
5053
5054 if (vspltis_constant (op, step, copies))
5055 return true;
5056
5057 /* And finally a vspltisb. */
5058 if (step == 1)
5059 copies <<= 1;
5060 else
5061 step >>= 1;
5062
5063 if (vspltis_constant (op, step, copies))
5064 return true;
5065
5066 return false;
5067 }
5068
5069 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
5070 result is OP. Abort if it is not possible. */
5071
5072 rtx
5073 gen_easy_altivec_constant (rtx op)
5074 {
5075 enum machine_mode mode = GET_MODE (op);
5076 int nunits = GET_MODE_NUNITS (mode);
5077 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
5078 unsigned step = nunits / 4;
5079 unsigned copies = 1;
5080
5081 /* Start with a vspltisw. */
5082 if (vspltis_constant (op, step, copies))
5083 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
5084
5085 /* Then try with a vspltish. */
5086 if (step == 1)
5087 copies <<= 1;
5088 else
5089 step >>= 1;
5090
5091 if (vspltis_constant (op, step, copies))
5092 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
5093
5094 /* And finally a vspltisb. */
5095 if (step == 1)
5096 copies <<= 1;
5097 else
5098 step >>= 1;
5099
5100 if (vspltis_constant (op, step, copies))
5101 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
5102
5103 gcc_unreachable ();
5104 }
5105
5106 const char *
5107 output_vec_const_move (rtx *operands)
5108 {
5109 int cst, cst2;
5110 enum machine_mode mode;
5111 rtx dest, vec;
5112
5113 dest = operands[0];
5114 vec = operands[1];
5115 mode = GET_MODE (dest);
5116
5117 if (TARGET_VSX)
5118 {
5119 if (zero_constant (vec, mode))
5120 return "xxlxor %x0,%x0,%x0";
5121
5122 if (mode == V2DImode
5123 && INTVAL (CONST_VECTOR_ELT (vec, 0)) == -1
5124 && INTVAL (CONST_VECTOR_ELT (vec, 1)) == -1)
5125 return "vspltisw %0,-1";
5126 }
5127
5128 if (TARGET_ALTIVEC)
5129 {
5130 rtx splat_vec;
5131 if (zero_constant (vec, mode))
5132 return "vxor %0,%0,%0";
5133
5134 splat_vec = gen_easy_altivec_constant (vec);
5135 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
5136 operands[1] = XEXP (splat_vec, 0);
5137 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
5138 return "#";
5139
5140 switch (GET_MODE (splat_vec))
5141 {
5142 case V4SImode:
5143 return "vspltisw %0,%1";
5144
5145 case V8HImode:
5146 return "vspltish %0,%1";
5147
5148 case V16QImode:
5149 return "vspltisb %0,%1";
5150
5151 default:
5152 gcc_unreachable ();
5153 }
5154 }
5155
5156 gcc_assert (TARGET_SPE);
5157
5158 /* Vector constant 0 is handled as a splitter of V2SI, and in the
5159 pattern of V1DI, V4HI, and V2SF.
5160
5161 FIXME: We should probably return # and add post reload
5162 splitters for these, but this way is so easy ;-). */
5163 cst = INTVAL (CONST_VECTOR_ELT (vec, 0));
5164 cst2 = INTVAL (CONST_VECTOR_ELT (vec, 1));
5165 operands[1] = CONST_VECTOR_ELT (vec, 0);
5166 operands[2] = CONST_VECTOR_ELT (vec, 1);
5167 if (cst == cst2)
5168 return "li %0,%1\n\tevmergelo %0,%0,%0";
5169 else
5170 return "li %0,%1\n\tevmergelo %0,%0,%0\n\tli %0,%2";
5171 }
5172
5173 /* Initialize TARGET of vector PAIRED to VALS. */
5174
5175 void
5176 paired_expand_vector_init (rtx target, rtx vals)
5177 {
5178 enum machine_mode mode = GET_MODE (target);
5179 int n_elts = GET_MODE_NUNITS (mode);
5180 int n_var = 0;
5181 rtx x, new_rtx, tmp, constant_op, op1, op2;
5182 int i;
5183
5184 for (i = 0; i < n_elts; ++i)
5185 {
5186 x = XVECEXP (vals, 0, i);
5187 if (!(CONST_INT_P (x)
5188 || GET_CODE (x) == CONST_DOUBLE
5189 || GET_CODE (x) == CONST_FIXED))
5190 ++n_var;
5191 }
5192 if (n_var == 0)
5193 {
5194 /* Load from constant pool. */
5195 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
5196 return;
5197 }
5198
5199 if (n_var == 2)
5200 {
5201 /* The vector is initialized only with non-constants. */
5202 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, XVECEXP (vals, 0, 0),
5203 XVECEXP (vals, 0, 1));
5204
5205 emit_move_insn (target, new_rtx);
5206 return;
5207 }
5208
5209 /* One field is non-constant and the other one is a constant. Load the
5210 constant from the constant pool and use ps_merge instruction to
5211 construct the whole vector. */
5212 op1 = XVECEXP (vals, 0, 0);
5213 op2 = XVECEXP (vals, 0, 1);
5214
5215 constant_op = (CONSTANT_P (op1)) ? op1 : op2;
5216
5217 tmp = gen_reg_rtx (GET_MODE (constant_op));
5218 emit_move_insn (tmp, constant_op);
5219
5220 if (CONSTANT_P (op1))
5221 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, tmp, op2);
5222 else
5223 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, op1, tmp);
5224
5225 emit_move_insn (target, new_rtx);
5226 }
5227
5228 void
5229 paired_expand_vector_move (rtx operands[])
5230 {
5231 rtx op0 = operands[0], op1 = operands[1];
5232
5233 emit_move_insn (op0, op1);
5234 }
5235
5236 /* Emit vector compare for code RCODE. DEST is destination, OP1 and
5237 OP2 are two VEC_COND_EXPR operands, CC_OP0 and CC_OP1 are the two
5238 operands for the relation operation COND. This is a recursive
5239 function. */
5240
5241 static void
5242 paired_emit_vector_compare (enum rtx_code rcode,
5243 rtx dest, rtx op0, rtx op1,
5244 rtx cc_op0, rtx cc_op1)
5245 {
5246 rtx tmp = gen_reg_rtx (V2SFmode);
5247 rtx tmp1, max, min;
5248
5249 gcc_assert (TARGET_PAIRED_FLOAT);
5250 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
5251
5252 switch (rcode)
5253 {
5254 case LT:
5255 case LTU:
5256 paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
5257 return;
5258 case GE:
5259 case GEU:
5260 emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
5261 emit_insn (gen_selv2sf4 (dest, tmp, op0, op1, CONST0_RTX (SFmode)));
5262 return;
5263 case LE:
5264 case LEU:
5265 paired_emit_vector_compare (GE, dest, op0, op1, cc_op1, cc_op0);
5266 return;
5267 case GT:
5268 paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
5269 return;
5270 case EQ:
5271 tmp1 = gen_reg_rtx (V2SFmode);
5272 max = gen_reg_rtx (V2SFmode);
5273 min = gen_reg_rtx (V2SFmode);
5274 gen_reg_rtx (V2SFmode);
5275
5276 emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
5277 emit_insn (gen_selv2sf4
5278 (max, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
5279 emit_insn (gen_subv2sf3 (tmp, cc_op1, cc_op0));
5280 emit_insn (gen_selv2sf4
5281 (min, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
5282 emit_insn (gen_subv2sf3 (tmp1, min, max));
5283 emit_insn (gen_selv2sf4 (dest, tmp1, op0, op1, CONST0_RTX (SFmode)));
5284 return;
5285 case NE:
5286 paired_emit_vector_compare (EQ, dest, op1, op0, cc_op0, cc_op1);
5287 return;
5288 case UNLE:
5289 paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
5290 return;
5291 case UNLT:
5292 paired_emit_vector_compare (LT, dest, op1, op0, cc_op0, cc_op1);
5293 return;
5294 case UNGE:
5295 paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
5296 return;
5297 case UNGT:
5298 paired_emit_vector_compare (GT, dest, op1, op0, cc_op0, cc_op1);
5299 return;
5300 default:
5301 gcc_unreachable ();
5302 }
5303
5304 return;
5305 }
5306
5307 /* Emit vector conditional expression.
5308 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
5309 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
5310
5311 int
5312 paired_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
5313 rtx cond, rtx cc_op0, rtx cc_op1)
5314 {
5315 enum rtx_code rcode = GET_CODE (cond);
5316
5317 if (!TARGET_PAIRED_FLOAT)
5318 return 0;
5319
5320 paired_emit_vector_compare (rcode, dest, op1, op2, cc_op0, cc_op1);
5321
5322 return 1;
5323 }
5324
5325 /* Initialize vector TARGET to VALS. */
5326
5327 void
5328 rs6000_expand_vector_init (rtx target, rtx vals)
5329 {
5330 enum machine_mode mode = GET_MODE (target);
5331 enum machine_mode inner_mode = GET_MODE_INNER (mode);
5332 int n_elts = GET_MODE_NUNITS (mode);
5333 int n_var = 0, one_var = -1;
5334 bool all_same = true, all_const_zero = true;
5335 rtx x, mem;
5336 int i;
5337
5338 for (i = 0; i < n_elts; ++i)
5339 {
5340 x = XVECEXP (vals, 0, i);
5341 if (!(CONST_INT_P (x)
5342 || GET_CODE (x) == CONST_DOUBLE
5343 || GET_CODE (x) == CONST_FIXED))
5344 ++n_var, one_var = i;
5345 else if (x != CONST0_RTX (inner_mode))
5346 all_const_zero = false;
5347
5348 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
5349 all_same = false;
5350 }
5351
5352 if (n_var == 0)
5353 {
5354 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
5355 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
5356 if ((int_vector_p || TARGET_VSX) && all_const_zero)
5357 {
5358 /* Zero register. */
5359 emit_insn (gen_rtx_SET (VOIDmode, target,
5360 gen_rtx_XOR (mode, target, target)));
5361 return;
5362 }
5363 else if (int_vector_p && easy_vector_constant (const_vec, mode))
5364 {
5365 /* Splat immediate. */
5366 emit_insn (gen_rtx_SET (VOIDmode, target, const_vec));
5367 return;
5368 }
5369 else
5370 {
5371 /* Load from constant pool. */
5372 emit_move_insn (target, const_vec);
5373 return;
5374 }
5375 }
5376
5377 /* Double word values on VSX can use xxpermdi or lxvdsx. */
5378 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
5379 {
5380 rtx op0 = XVECEXP (vals, 0, 0);
5381 rtx op1 = XVECEXP (vals, 0, 1);
5382 if (all_same)
5383 {
5384 if (!MEM_P (op0) && !REG_P (op0))
5385 op0 = force_reg (inner_mode, op0);
5386 if (mode == V2DFmode)
5387 emit_insn (gen_vsx_splat_v2df (target, op0));
5388 else
5389 emit_insn (gen_vsx_splat_v2di (target, op0));
5390 }
5391 else
5392 {
5393 op0 = force_reg (inner_mode, op0);
5394 op1 = force_reg (inner_mode, op1);
5395 if (mode == V2DFmode)
5396 emit_insn (gen_vsx_concat_v2df (target, op0, op1));
5397 else
5398 emit_insn (gen_vsx_concat_v2di (target, op0, op1));
5399 }
5400 return;
5401 }
5402
5403 /* With single precision floating point on VSX, know that internally single
5404 precision is actually represented as a double, and either make 2 V2DF
5405 vectors, and convert these vectors to single precision, or do one
5406 conversion, and splat the result to the other elements. */
5407 if (mode == V4SFmode && VECTOR_MEM_VSX_P (mode))
5408 {
5409 if (all_same)
5410 {
5411 rtx freg = gen_reg_rtx (V4SFmode);
5412 rtx sreg = force_reg (SFmode, XVECEXP (vals, 0, 0));
5413 rtx cvt = ((TARGET_XSCVDPSPN)
5414 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
5415 : gen_vsx_xscvdpsp_scalar (freg, sreg));
5416
5417 emit_insn (cvt);
5418 emit_insn (gen_vsx_xxspltw_v4sf (target, freg, const0_rtx));
5419 }
5420 else
5421 {
5422 rtx dbl_even = gen_reg_rtx (V2DFmode);
5423 rtx dbl_odd = gen_reg_rtx (V2DFmode);
5424 rtx flt_even = gen_reg_rtx (V4SFmode);
5425 rtx flt_odd = gen_reg_rtx (V4SFmode);
5426 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
5427 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
5428 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
5429 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
5430
5431 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
5432 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
5433 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
5434 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
5435 rs6000_expand_extract_even (target, flt_even, flt_odd);
5436 }
5437 return;
5438 }
5439
5440 /* Store value to stack temp. Load vector element. Splat. However, splat
5441 of 64-bit items is not supported on Altivec. */
5442 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
5443 {
5444 rtx field;
5445 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
5446 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
5447 XVECEXP (vals, 0, 0));
5448 x = gen_rtx_UNSPEC (VOIDmode,
5449 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
5450 emit_insn (gen_rtx_PARALLEL (VOIDmode,
5451 gen_rtvec (2,
5452 gen_rtx_SET (VOIDmode,
5453 target, mem),
5454 x)));
5455 field = (BYTES_BIG_ENDIAN ? const0_rtx
5456 : GEN_INT (GET_MODE_NUNITS (mode) - 1));
5457 x = gen_rtx_VEC_SELECT (inner_mode, target,
5458 gen_rtx_PARALLEL (VOIDmode,
5459 gen_rtvec (1, field)));
5460 emit_insn (gen_rtx_SET (VOIDmode, target,
5461 gen_rtx_VEC_DUPLICATE (mode, x)));
5462 return;
5463 }
5464
5465 /* One field is non-constant. Load constant then overwrite
5466 varying field. */
5467 if (n_var == 1)
5468 {
5469 rtx copy = copy_rtx (vals);
5470
5471 /* Load constant part of vector, substitute neighboring value for
5472 varying element. */
5473 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
5474 rs6000_expand_vector_init (target, copy);
5475
5476 /* Insert variable. */
5477 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var), one_var);
5478 return;
5479 }
5480
5481 /* Construct the vector in memory one field at a time
5482 and load the whole vector. */
5483 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
5484 for (i = 0; i < n_elts; i++)
5485 emit_move_insn (adjust_address_nv (mem, inner_mode,
5486 i * GET_MODE_SIZE (inner_mode)),
5487 XVECEXP (vals, 0, i));
5488 emit_move_insn (target, mem);
5489 }
5490
5491 /* Set field ELT of TARGET to VAL. */
5492
5493 void
5494 rs6000_expand_vector_set (rtx target, rtx val, int elt)
5495 {
5496 enum machine_mode mode = GET_MODE (target);
5497 enum machine_mode inner_mode = GET_MODE_INNER (mode);
5498 rtx reg = gen_reg_rtx (mode);
5499 rtx mask, mem, x;
5500 int width = GET_MODE_SIZE (inner_mode);
5501 int i;
5502
5503 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
5504 {
5505 rtx (*set_func) (rtx, rtx, rtx, rtx)
5506 = ((mode == V2DFmode) ? gen_vsx_set_v2df : gen_vsx_set_v2di);
5507 emit_insn (set_func (target, target, val, GEN_INT (elt)));
5508 return;
5509 }
5510
5511 /* Load single variable value. */
5512 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
5513 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
5514 x = gen_rtx_UNSPEC (VOIDmode,
5515 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
5516 emit_insn (gen_rtx_PARALLEL (VOIDmode,
5517 gen_rtvec (2,
5518 gen_rtx_SET (VOIDmode,
5519 reg, mem),
5520 x)));
5521
5522 /* Linear sequence. */
5523 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
5524 for (i = 0; i < 16; ++i)
5525 XVECEXP (mask, 0, i) = GEN_INT (i);
5526
5527 /* Set permute mask to insert element into target. */
5528 for (i = 0; i < width; ++i)
5529 XVECEXP (mask, 0, elt*width + i)
5530 = GEN_INT (i + 0x10);
5531 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
5532
5533 if (BYTES_BIG_ENDIAN)
5534 x = gen_rtx_UNSPEC (mode,
5535 gen_rtvec (3, target, reg,
5536 force_reg (V16QImode, x)),
5537 UNSPEC_VPERM);
5538 else
5539 {
5540 /* Invert selector. */
5541 rtx splat = gen_rtx_VEC_DUPLICATE (V16QImode,
5542 gen_rtx_CONST_INT (QImode, -1));
5543 rtx tmp = gen_reg_rtx (V16QImode);
5544 emit_move_insn (tmp, splat);
5545 x = gen_rtx_MINUS (V16QImode, tmp, force_reg (V16QImode, x));
5546 emit_move_insn (tmp, x);
5547
5548 /* Permute with operands reversed and adjusted selector. */
5549 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
5550 UNSPEC_VPERM);
5551 }
5552
5553 emit_insn (gen_rtx_SET (VOIDmode, target, x));
5554 }
5555
5556 /* Extract field ELT from VEC into TARGET. */
5557
5558 void
5559 rs6000_expand_vector_extract (rtx target, rtx vec, int elt)
5560 {
5561 enum machine_mode mode = GET_MODE (vec);
5562 enum machine_mode inner_mode = GET_MODE_INNER (mode);
5563 rtx mem;
5564
5565 if (VECTOR_MEM_VSX_P (mode))
5566 {
5567 switch (mode)
5568 {
5569 default:
5570 break;
5571 case V2DFmode:
5572 emit_insn (gen_vsx_extract_v2df (target, vec, GEN_INT (elt)));
5573 return;
5574 case V2DImode:
5575 emit_insn (gen_vsx_extract_v2di (target, vec, GEN_INT (elt)));
5576 return;
5577 case V4SFmode:
5578 emit_insn (gen_vsx_extract_v4sf (target, vec, GEN_INT (elt)));
5579 return;
5580 }
5581 }
5582
5583 /* Allocate mode-sized buffer. */
5584 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
5585
5586 emit_move_insn (mem, vec);
5587
5588 /* Add offset to field within buffer matching vector element. */
5589 mem = adjust_address_nv (mem, inner_mode, elt * GET_MODE_SIZE (inner_mode));
5590
5591 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
5592 }
5593
5594 /* Generates shifts and masks for a pair of rldicl or rldicr insns to
5595 implement ANDing by the mask IN. */
5596 void
5597 build_mask64_2_operands (rtx in, rtx *out)
5598 {
5599 unsigned HOST_WIDE_INT c, lsb, m1, m2;
5600 int shift;
5601
5602 gcc_assert (GET_CODE (in) == CONST_INT);
5603
5604 c = INTVAL (in);
5605 if (c & 1)
5606 {
5607 /* Assume c initially something like 0x00fff000000fffff. The idea
5608 is to rotate the word so that the middle ^^^^^^ group of zeros
5609 is at the MS end and can be cleared with an rldicl mask. We then
5610 rotate back and clear off the MS ^^ group of zeros with a
5611 second rldicl. */
5612 c = ~c; /* c == 0xff000ffffff00000 */
5613 lsb = c & -c; /* lsb == 0x0000000000100000 */
5614 m1 = -lsb; /* m1 == 0xfffffffffff00000 */
5615 c = ~c; /* c == 0x00fff000000fffff */
5616 c &= -lsb; /* c == 0x00fff00000000000 */
5617 lsb = c & -c; /* lsb == 0x0000100000000000 */
5618 c = ~c; /* c == 0xff000fffffffffff */
5619 c &= -lsb; /* c == 0xff00000000000000 */
5620 shift = 0;
5621 while ((lsb >>= 1) != 0)
5622 shift++; /* shift == 44 on exit from loop */
5623 m1 <<= 64 - shift; /* m1 == 0xffffff0000000000 */
5624 m1 = ~m1; /* m1 == 0x000000ffffffffff */
5625 m2 = ~c; /* m2 == 0x00ffffffffffffff */
5626 }
5627 else
5628 {
5629 /* Assume c initially something like 0xff000f0000000000. The idea
5630 is to rotate the word so that the ^^^ middle group of zeros
5631 is at the LS end and can be cleared with an rldicr mask. We then
5632 rotate back and clear off the LS group of ^^^^^^^^^^ zeros with
5633 a second rldicr. */
5634 lsb = c & -c; /* lsb == 0x0000010000000000 */
5635 m2 = -lsb; /* m2 == 0xffffff0000000000 */
5636 c = ~c; /* c == 0x00fff0ffffffffff */
5637 c &= -lsb; /* c == 0x00fff00000000000 */
5638 lsb = c & -c; /* lsb == 0x0000100000000000 */
5639 c = ~c; /* c == 0xff000fffffffffff */
5640 c &= -lsb; /* c == 0xff00000000000000 */
5641 shift = 0;
5642 while ((lsb >>= 1) != 0)
5643 shift++; /* shift == 44 on exit from loop */
5644 m1 = ~c; /* m1 == 0x00ffffffffffffff */
5645 m1 >>= shift; /* m1 == 0x0000000000000fff */
5646 m1 = ~m1; /* m1 == 0xfffffffffffff000 */
5647 }
5648
5649 /* Note that when we only have two 0->1 and 1->0 transitions, one of the
5650 masks will be all 1's. We are guaranteed more than one transition. */
5651 out[0] = GEN_INT (64 - shift);
5652 out[1] = GEN_INT (m1);
5653 out[2] = GEN_INT (shift);
5654 out[3] = GEN_INT (m2);
5655 }
5656
5657 /* Return TRUE if OP is an invalid SUBREG operation on the e500. */
5658
5659 bool
5660 invalid_e500_subreg (rtx op, enum machine_mode mode)
5661 {
5662 if (TARGET_E500_DOUBLE)
5663 {
5664 /* Reject (subreg:SI (reg:DF)); likewise with subreg:DI or
5665 subreg:TI and reg:TF. Decimal float modes are like integer
5666 modes (only low part of each register used) for this
5667 purpose. */
5668 if (GET_CODE (op) == SUBREG
5669 && (mode == SImode || mode == DImode || mode == TImode
5670 || mode == DDmode || mode == TDmode || mode == PTImode)
5671 && REG_P (SUBREG_REG (op))
5672 && (GET_MODE (SUBREG_REG (op)) == DFmode
5673 || GET_MODE (SUBREG_REG (op)) == TFmode))
5674 return true;
5675
5676 /* Reject (subreg:DF (reg:DI)); likewise with subreg:TF and
5677 reg:TI. */
5678 if (GET_CODE (op) == SUBREG
5679 && (mode == DFmode || mode == TFmode)
5680 && REG_P (SUBREG_REG (op))
5681 && (GET_MODE (SUBREG_REG (op)) == DImode
5682 || GET_MODE (SUBREG_REG (op)) == TImode
5683 || GET_MODE (SUBREG_REG (op)) == PTImode
5684 || GET_MODE (SUBREG_REG (op)) == DDmode
5685 || GET_MODE (SUBREG_REG (op)) == TDmode))
5686 return true;
5687 }
5688
5689 if (TARGET_SPE
5690 && GET_CODE (op) == SUBREG
5691 && mode == SImode
5692 && REG_P (SUBREG_REG (op))
5693 && SPE_VECTOR_MODE (GET_MODE (SUBREG_REG (op))))
5694 return true;
5695
5696 return false;
5697 }
5698
5699 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
5700 selects whether the alignment is abi mandated, optional, or
5701 both abi and optional alignment. */
5702
5703 unsigned int
5704 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
5705 {
5706 if (how != align_opt)
5707 {
5708 if (TREE_CODE (type) == VECTOR_TYPE)
5709 {
5710 if ((TARGET_SPE && SPE_VECTOR_MODE (TYPE_MODE (type)))
5711 || (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (TYPE_MODE (type))))
5712 {
5713 if (align < 64)
5714 align = 64;
5715 }
5716 else if (align < 128)
5717 align = 128;
5718 }
5719 else if (TARGET_E500_DOUBLE
5720 && TREE_CODE (type) == REAL_TYPE
5721 && TYPE_MODE (type) == DFmode)
5722 {
5723 if (align < 64)
5724 align = 64;
5725 }
5726 }
5727
5728 if (how != align_abi)
5729 {
5730 if (TREE_CODE (type) == ARRAY_TYPE
5731 && TYPE_MODE (TREE_TYPE (type)) == QImode)
5732 {
5733 if (align < BITS_PER_WORD)
5734 align = BITS_PER_WORD;
5735 }
5736 }
5737
5738 return align;
5739 }
5740
5741 /* AIX increases natural record alignment to doubleword if the first
5742 field is an FP double while the FP fields remain word aligned. */
5743
5744 unsigned int
5745 rs6000_special_round_type_align (tree type, unsigned int computed,
5746 unsigned int specified)
5747 {
5748 unsigned int align = MAX (computed, specified);
5749 tree field = TYPE_FIELDS (type);
5750
5751 /* Skip all non field decls */
5752 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
5753 field = DECL_CHAIN (field);
5754
5755 if (field != NULL && field != type)
5756 {
5757 type = TREE_TYPE (field);
5758 while (TREE_CODE (type) == ARRAY_TYPE)
5759 type = TREE_TYPE (type);
5760
5761 if (type != error_mark_node && TYPE_MODE (type) == DFmode)
5762 align = MAX (align, 64);
5763 }
5764
5765 return align;
5766 }
5767
5768 /* Darwin increases record alignment to the natural alignment of
5769 the first field. */
5770
5771 unsigned int
5772 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
5773 unsigned int specified)
5774 {
5775 unsigned int align = MAX (computed, specified);
5776
5777 if (TYPE_PACKED (type))
5778 return align;
5779
5780 /* Find the first field, looking down into aggregates. */
5781 do {
5782 tree field = TYPE_FIELDS (type);
5783 /* Skip all non field decls */
5784 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
5785 field = DECL_CHAIN (field);
5786 if (! field)
5787 break;
5788 /* A packed field does not contribute any extra alignment. */
5789 if (DECL_PACKED (field))
5790 return align;
5791 type = TREE_TYPE (field);
5792 while (TREE_CODE (type) == ARRAY_TYPE)
5793 type = TREE_TYPE (type);
5794 } while (AGGREGATE_TYPE_P (type));
5795
5796 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
5797 align = MAX (align, TYPE_ALIGN (type));
5798
5799 return align;
5800 }
5801
5802 /* Return 1 for an operand in small memory on V.4/eabi. */
5803
5804 int
5805 small_data_operand (rtx op ATTRIBUTE_UNUSED,
5806 enum machine_mode mode ATTRIBUTE_UNUSED)
5807 {
5808 #if TARGET_ELF
5809 rtx sym_ref;
5810
5811 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
5812 return 0;
5813
5814 if (DEFAULT_ABI != ABI_V4)
5815 return 0;
5816
5817 /* Vector and float memory instructions have a limited offset on the
5818 SPE, so using a vector or float variable directly as an operand is
5819 not useful. */
5820 if (TARGET_SPE
5821 && (SPE_VECTOR_MODE (mode) || FLOAT_MODE_P (mode)))
5822 return 0;
5823
5824 if (GET_CODE (op) == SYMBOL_REF)
5825 sym_ref = op;
5826
5827 else if (GET_CODE (op) != CONST
5828 || GET_CODE (XEXP (op, 0)) != PLUS
5829 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF
5830 || GET_CODE (XEXP (XEXP (op, 0), 1)) != CONST_INT)
5831 return 0;
5832
5833 else
5834 {
5835 rtx sum = XEXP (op, 0);
5836 HOST_WIDE_INT summand;
5837
5838 /* We have to be careful here, because it is the referenced address
5839 that must be 32k from _SDA_BASE_, not just the symbol. */
5840 summand = INTVAL (XEXP (sum, 1));
5841 if (summand < 0 || summand > g_switch_value)
5842 return 0;
5843
5844 sym_ref = XEXP (sum, 0);
5845 }
5846
5847 return SYMBOL_REF_SMALL_P (sym_ref);
5848 #else
5849 return 0;
5850 #endif
5851 }
5852
5853 /* Return true if either operand is a general purpose register. */
5854
5855 bool
5856 gpr_or_gpr_p (rtx op0, rtx op1)
5857 {
5858 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
5859 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
5860 }
5861
5862 /* Return true if this is a move direct operation between GPR registers and
5863 floating point/VSX registers. */
5864
5865 bool
5866 direct_move_p (rtx op0, rtx op1)
5867 {
5868 int regno0, regno1;
5869
5870 if (!REG_P (op0) || !REG_P (op1))
5871 return false;
5872
5873 if (!TARGET_DIRECT_MOVE && !TARGET_MFPGPR)
5874 return false;
5875
5876 regno0 = REGNO (op0);
5877 regno1 = REGNO (op1);
5878 if (regno0 >= FIRST_PSEUDO_REGISTER || regno1 >= FIRST_PSEUDO_REGISTER)
5879 return false;
5880
5881 if (INT_REGNO_P (regno0))
5882 return (TARGET_DIRECT_MOVE) ? VSX_REGNO_P (regno1) : FP_REGNO_P (regno1);
5883
5884 else if (INT_REGNO_P (regno1))
5885 {
5886 if (TARGET_MFPGPR && FP_REGNO_P (regno0))
5887 return true;
5888
5889 else if (TARGET_DIRECT_MOVE && VSX_REGNO_P (regno0))
5890 return true;
5891 }
5892
5893 return false;
5894 }
5895
5896 /* Return true if this is a load or store quad operation. */
5897
5898 bool
5899 quad_load_store_p (rtx op0, rtx op1)
5900 {
5901 bool ret;
5902
5903 if (!TARGET_QUAD_MEMORY)
5904 ret = false;
5905
5906 else if (REG_P (op0) && MEM_P (op1))
5907 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
5908 && quad_memory_operand (op1, GET_MODE (op1))
5909 && !reg_overlap_mentioned_p (op0, op1));
5910
5911 else if (MEM_P (op0) && REG_P (op1))
5912 ret = (quad_memory_operand (op0, GET_MODE (op0))
5913 && quad_int_reg_operand (op1, GET_MODE (op1)));
5914
5915 else
5916 ret = false;
5917
5918 if (TARGET_DEBUG_ADDR)
5919 {
5920 fprintf (stderr, "\n========== quad_load_store, return %s\n",
5921 ret ? "true" : "false");
5922 debug_rtx (gen_rtx_SET (VOIDmode, op0, op1));
5923 }
5924
5925 return ret;
5926 }
5927
5928 /* Given an address, return a constant offset term if one exists. */
5929
5930 static rtx
5931 address_offset (rtx op)
5932 {
5933 if (GET_CODE (op) == PRE_INC
5934 || GET_CODE (op) == PRE_DEC)
5935 op = XEXP (op, 0);
5936 else if (GET_CODE (op) == PRE_MODIFY
5937 || GET_CODE (op) == LO_SUM)
5938 op = XEXP (op, 1);
5939
5940 if (GET_CODE (op) == CONST)
5941 op = XEXP (op, 0);
5942
5943 if (GET_CODE (op) == PLUS)
5944 op = XEXP (op, 1);
5945
5946 if (CONST_INT_P (op))
5947 return op;
5948
5949 return NULL_RTX;
5950 }
5951
5952 /* Return true if the MEM operand is a memory operand suitable for use
5953 with a (full width, possibly multiple) gpr load/store. On
5954 powerpc64 this means the offset must be divisible by 4.
5955 Implements 'Y' constraint.
5956
5957 Accept direct, indexed, offset, lo_sum and tocref. Since this is
5958 a constraint function we know the operand has satisfied a suitable
5959 memory predicate. Also accept some odd rtl generated by reload
5960 (see rs6000_legitimize_reload_address for various forms). It is
5961 important that reload rtl be accepted by appropriate constraints
5962 but not by the operand predicate.
5963
5964 Offsetting a lo_sum should not be allowed, except where we know by
5965 alignment that a 32k boundary is not crossed, but see the ???
5966 comment in rs6000_legitimize_reload_address. Note that by
5967 "offsetting" here we mean a further offset to access parts of the
5968 MEM. It's fine to have a lo_sum where the inner address is offset
5969 from a sym, since the same sym+offset will appear in the high part
5970 of the address calculation. */
5971
5972 bool
5973 mem_operand_gpr (rtx op, enum machine_mode mode)
5974 {
5975 unsigned HOST_WIDE_INT offset;
5976 int extra;
5977 rtx addr = XEXP (op, 0);
5978
5979 op = address_offset (addr);
5980 if (op == NULL_RTX)
5981 return true;
5982
5983 offset = INTVAL (op);
5984 if (TARGET_POWERPC64 && (offset & 3) != 0)
5985 return false;
5986
5987 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
5988 gcc_assert (extra >= 0);
5989
5990 if (GET_CODE (addr) == LO_SUM)
5991 /* For lo_sum addresses, we must allow any offset except one that
5992 causes a wrap, so test only the low 16 bits. */
5993 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
5994
5995 return offset + 0x8000 < 0x10000u - extra;
5996 }
5997 \f
5998 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
5999
6000 static bool
6001 reg_offset_addressing_ok_p (enum machine_mode mode)
6002 {
6003 switch (mode)
6004 {
6005 case V16QImode:
6006 case V8HImode:
6007 case V4SFmode:
6008 case V4SImode:
6009 case V2DFmode:
6010 case V2DImode:
6011 case TImode:
6012 /* AltiVec/VSX vector modes. Only reg+reg addressing is valid. While
6013 TImode is not a vector mode, if we want to use the VSX registers to
6014 move it around, we need to restrict ourselves to reg+reg
6015 addressing. */
6016 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
6017 return false;
6018 break;
6019
6020 case V4HImode:
6021 case V2SImode:
6022 case V1DImode:
6023 case V2SFmode:
6024 /* Paired vector modes. Only reg+reg addressing is valid. */
6025 if (TARGET_PAIRED_FLOAT)
6026 return false;
6027 break;
6028
6029 case SDmode:
6030 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
6031 addressing for the LFIWZX and STFIWX instructions. */
6032 if (TARGET_NO_SDMODE_STACK)
6033 return false;
6034 break;
6035
6036 default:
6037 break;
6038 }
6039
6040 return true;
6041 }
6042
6043 static bool
6044 virtual_stack_registers_memory_p (rtx op)
6045 {
6046 int regnum;
6047
6048 if (GET_CODE (op) == REG)
6049 regnum = REGNO (op);
6050
6051 else if (GET_CODE (op) == PLUS
6052 && GET_CODE (XEXP (op, 0)) == REG
6053 && GET_CODE (XEXP (op, 1)) == CONST_INT)
6054 regnum = REGNO (XEXP (op, 0));
6055
6056 else
6057 return false;
6058
6059 return (regnum >= FIRST_VIRTUAL_REGISTER
6060 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
6061 }
6062
6063 /* Return true if a MODE sized memory accesses to OP plus OFFSET
6064 is known to not straddle a 32k boundary. */
6065
6066 static bool
6067 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
6068 enum machine_mode mode)
6069 {
6070 tree decl, type;
6071 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
6072
6073 if (GET_CODE (op) != SYMBOL_REF)
6074 return false;
6075
6076 dsize = GET_MODE_SIZE (mode);
6077 decl = SYMBOL_REF_DECL (op);
6078 if (!decl)
6079 {
6080 if (dsize == 0)
6081 return false;
6082
6083 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
6084 replacing memory addresses with an anchor plus offset. We
6085 could find the decl by rummaging around in the block->objects
6086 VEC for the given offset but that seems like too much work. */
6087 dalign = BITS_PER_UNIT;
6088 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
6089 && SYMBOL_REF_ANCHOR_P (op)
6090 && SYMBOL_REF_BLOCK (op) != NULL)
6091 {
6092 struct object_block *block = SYMBOL_REF_BLOCK (op);
6093
6094 dalign = block->alignment;
6095 offset += SYMBOL_REF_BLOCK_OFFSET (op);
6096 }
6097 else if (CONSTANT_POOL_ADDRESS_P (op))
6098 {
6099 /* It would be nice to have get_pool_align().. */
6100 enum machine_mode cmode = get_pool_mode (op);
6101
6102 dalign = GET_MODE_ALIGNMENT (cmode);
6103 }
6104 }
6105 else if (DECL_P (decl))
6106 {
6107 dalign = DECL_ALIGN (decl);
6108
6109 if (dsize == 0)
6110 {
6111 /* Allow BLKmode when the entire object is known to not
6112 cross a 32k boundary. */
6113 if (!DECL_SIZE_UNIT (decl))
6114 return false;
6115
6116 if (!host_integerp (DECL_SIZE_UNIT (decl), 1))
6117 return false;
6118
6119 dsize = tree_low_cst (DECL_SIZE_UNIT (decl), 1);
6120 if (dsize > 32768)
6121 return false;
6122
6123 return dalign / BITS_PER_UNIT >= dsize;
6124 }
6125 }
6126 else
6127 {
6128 type = TREE_TYPE (decl);
6129
6130 dalign = TYPE_ALIGN (type);
6131 if (CONSTANT_CLASS_P (decl))
6132 dalign = CONSTANT_ALIGNMENT (decl, dalign);
6133 else
6134 dalign = DATA_ALIGNMENT (decl, dalign);
6135
6136 if (dsize == 0)
6137 {
6138 /* BLKmode, check the entire object. */
6139 if (TREE_CODE (decl) == STRING_CST)
6140 dsize = TREE_STRING_LENGTH (decl);
6141 else if (TYPE_SIZE_UNIT (type)
6142 && host_integerp (TYPE_SIZE_UNIT (type), 1))
6143 dsize = tree_low_cst (TYPE_SIZE_UNIT (type), 1);
6144 else
6145 return false;
6146 if (dsize > 32768)
6147 return false;
6148
6149 return dalign / BITS_PER_UNIT >= dsize;
6150 }
6151 }
6152
6153 /* Find how many bits of the alignment we know for this access. */
6154 mask = dalign / BITS_PER_UNIT - 1;
6155 lsb = offset & -offset;
6156 mask &= lsb - 1;
6157 dalign = mask + 1;
6158
6159 return dalign >= dsize;
6160 }
6161
6162 static bool
6163 constant_pool_expr_p (rtx op)
6164 {
6165 rtx base, offset;
6166
6167 split_const (op, &base, &offset);
6168 return (GET_CODE (base) == SYMBOL_REF
6169 && CONSTANT_POOL_ADDRESS_P (base)
6170 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
6171 }
6172
6173 static const_rtx tocrel_base, tocrel_offset;
6174
6175 /* Return true if OP is a toc pointer relative address (the output
6176 of create_TOC_reference). If STRICT, do not match high part or
6177 non-split -mcmodel=large/medium toc pointer relative addresses. */
6178
6179 bool
6180 toc_relative_expr_p (const_rtx op, bool strict)
6181 {
6182 if (!TARGET_TOC)
6183 return false;
6184
6185 if (TARGET_CMODEL != CMODEL_SMALL)
6186 {
6187 /* Only match the low part. */
6188 if (GET_CODE (op) == LO_SUM
6189 && REG_P (XEXP (op, 0))
6190 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict))
6191 op = XEXP (op, 1);
6192 else if (strict)
6193 return false;
6194 }
6195
6196 tocrel_base = op;
6197 tocrel_offset = const0_rtx;
6198 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
6199 {
6200 tocrel_base = XEXP (op, 0);
6201 tocrel_offset = XEXP (op, 1);
6202 }
6203
6204 return (GET_CODE (tocrel_base) == UNSPEC
6205 && XINT (tocrel_base, 1) == UNSPEC_TOCREL);
6206 }
6207
6208 /* Return true if X is a constant pool address, and also for cmodel=medium
6209 if X is a toc-relative address known to be offsettable within MODE. */
6210
6211 bool
6212 legitimate_constant_pool_address_p (const_rtx x, enum machine_mode mode,
6213 bool strict)
6214 {
6215 return (toc_relative_expr_p (x, strict)
6216 && (TARGET_CMODEL != CMODEL_MEDIUM
6217 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
6218 || mode == QImode
6219 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
6220 INTVAL (tocrel_offset), mode)));
6221 }
6222
6223 static bool
6224 legitimate_small_data_p (enum machine_mode mode, rtx x)
6225 {
6226 return (DEFAULT_ABI == ABI_V4
6227 && !flag_pic && !TARGET_TOC
6228 && (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST)
6229 && small_data_operand (x, mode));
6230 }
6231
6232 /* SPE offset addressing is limited to 5-bits worth of double words. */
6233 #define SPE_CONST_OFFSET_OK(x) (((x) & ~0xf8) == 0)
6234
6235 bool
6236 rs6000_legitimate_offset_address_p (enum machine_mode mode, rtx x,
6237 bool strict, bool worst_case)
6238 {
6239 unsigned HOST_WIDE_INT offset;
6240 unsigned int extra;
6241
6242 if (GET_CODE (x) != PLUS)
6243 return false;
6244 if (!REG_P (XEXP (x, 0)))
6245 return false;
6246 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
6247 return false;
6248 if (!reg_offset_addressing_ok_p (mode))
6249 return virtual_stack_registers_memory_p (x);
6250 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
6251 return true;
6252 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6253 return false;
6254
6255 offset = INTVAL (XEXP (x, 1));
6256 extra = 0;
6257 switch (mode)
6258 {
6259 case V4HImode:
6260 case V2SImode:
6261 case V1DImode:
6262 case V2SFmode:
6263 /* SPE vector modes. */
6264 return SPE_CONST_OFFSET_OK (offset);
6265
6266 case DFmode:
6267 case DDmode:
6268 case DImode:
6269 /* On e500v2, we may have:
6270
6271 (subreg:DF (mem:DI (plus (reg) (const_int))) 0).
6272
6273 Which gets addressed with evldd instructions. */
6274 if (TARGET_E500_DOUBLE)
6275 return SPE_CONST_OFFSET_OK (offset);
6276
6277 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
6278 addressing. */
6279 if (VECTOR_MEM_VSX_P (mode))
6280 return false;
6281
6282 if (!worst_case)
6283 break;
6284 if (!TARGET_POWERPC64)
6285 extra = 4;
6286 else if (offset & 3)
6287 return false;
6288 break;
6289
6290 case TFmode:
6291 case TDmode:
6292 case TImode:
6293 case PTImode:
6294 if (TARGET_E500_DOUBLE)
6295 return (SPE_CONST_OFFSET_OK (offset)
6296 && SPE_CONST_OFFSET_OK (offset + 8));
6297
6298 extra = 8;
6299 if (!worst_case)
6300 break;
6301 if (!TARGET_POWERPC64)
6302 extra = 12;
6303 else if (offset & 3)
6304 return false;
6305 break;
6306
6307 default:
6308 break;
6309 }
6310
6311 offset += 0x8000;
6312 return offset < 0x10000 - extra;
6313 }
6314
6315 bool
6316 legitimate_indexed_address_p (rtx x, int strict)
6317 {
6318 rtx op0, op1;
6319
6320 if (GET_CODE (x) != PLUS)
6321 return false;
6322
6323 op0 = XEXP (x, 0);
6324 op1 = XEXP (x, 1);
6325
6326 /* Recognize the rtl generated by reload which we know will later be
6327 replaced with proper base and index regs. */
6328 if (!strict
6329 && reload_in_progress
6330 && (REG_P (op0) || GET_CODE (op0) == PLUS)
6331 && REG_P (op1))
6332 return true;
6333
6334 return (REG_P (op0) && REG_P (op1)
6335 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
6336 && INT_REG_OK_FOR_INDEX_P (op1, strict))
6337 || (INT_REG_OK_FOR_BASE_P (op1, strict)
6338 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
6339 }
6340
6341 bool
6342 avoiding_indexed_address_p (enum machine_mode mode)
6343 {
6344 /* Avoid indexed addressing for modes that have non-indexed
6345 load/store instruction forms. */
6346 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
6347 }
6348
6349 bool
6350 legitimate_indirect_address_p (rtx x, int strict)
6351 {
6352 return GET_CODE (x) == REG && INT_REG_OK_FOR_BASE_P (x, strict);
6353 }
6354
6355 bool
6356 macho_lo_sum_memory_operand (rtx x, enum machine_mode mode)
6357 {
6358 if (!TARGET_MACHO || !flag_pic
6359 || mode != SImode || GET_CODE (x) != MEM)
6360 return false;
6361 x = XEXP (x, 0);
6362
6363 if (GET_CODE (x) != LO_SUM)
6364 return false;
6365 if (GET_CODE (XEXP (x, 0)) != REG)
6366 return false;
6367 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
6368 return false;
6369 x = XEXP (x, 1);
6370
6371 return CONSTANT_P (x);
6372 }
6373
6374 static bool
6375 legitimate_lo_sum_address_p (enum machine_mode mode, rtx x, int strict)
6376 {
6377 if (GET_CODE (x) != LO_SUM)
6378 return false;
6379 if (GET_CODE (XEXP (x, 0)) != REG)
6380 return false;
6381 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
6382 return false;
6383 /* Restrict addressing for DI because of our SUBREG hackery. */
6384 if (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
6385 return false;
6386 x = XEXP (x, 1);
6387
6388 if (TARGET_ELF || TARGET_MACHO)
6389 {
6390 bool large_toc_ok;
6391
6392 if (DEFAULT_ABI != ABI_AIX && DEFAULT_ABI != ABI_DARWIN && flag_pic)
6393 return false;
6394 /* LRA don't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
6395 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
6396 recognizes some LO_SUM addresses as valid although this
6397 function says opposite. In most cases, LRA through different
6398 transformations can generate correct code for address reloads.
6399 It can not manage only some LO_SUM cases. So we need to add
6400 code analogous to one in rs6000_legitimize_reload_address for
6401 LOW_SUM here saying that some addresses are still valid. */
6402 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
6403 && small_toc_ref (x, VOIDmode));
6404 if (TARGET_TOC && ! large_toc_ok)
6405 return false;
6406 if (GET_MODE_NUNITS (mode) != 1)
6407 return false;
6408 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
6409 && !(/* ??? Assume floating point reg based on mode? */
6410 TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
6411 && (mode == DFmode || mode == DDmode)))
6412 return false;
6413
6414 return CONSTANT_P (x) || large_toc_ok;
6415 }
6416
6417 return false;
6418 }
6419
6420
6421 /* Try machine-dependent ways of modifying an illegitimate address
6422 to be legitimate. If we find one, return the new, valid address.
6423 This is used from only one place: `memory_address' in explow.c.
6424
6425 OLDX is the address as it was before break_out_memory_refs was
6426 called. In some cases it is useful to look at this to decide what
6427 needs to be done.
6428
6429 It is always safe for this function to do nothing. It exists to
6430 recognize opportunities to optimize the output.
6431
6432 On RS/6000, first check for the sum of a register with a constant
6433 integer that is out of range. If so, generate code to add the
6434 constant with the low-order 16 bits masked to the register and force
6435 this result into another register (this can be done with `cau').
6436 Then generate an address of REG+(CONST&0xffff), allowing for the
6437 possibility of bit 16 being a one.
6438
6439 Then check for the sum of a register and something not constant, try to
6440 load the other things into a register and return the sum. */
6441
6442 static rtx
6443 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
6444 enum machine_mode mode)
6445 {
6446 unsigned int extra;
6447
6448 if (!reg_offset_addressing_ok_p (mode))
6449 {
6450 if (virtual_stack_registers_memory_p (x))
6451 return x;
6452
6453 /* In theory we should not be seeing addresses of the form reg+0,
6454 but just in case it is generated, optimize it away. */
6455 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
6456 return force_reg (Pmode, XEXP (x, 0));
6457
6458 /* For TImode with load/store quad, restrict addresses to just a single
6459 pointer, so it works with both GPRs and VSX registers. */
6460 /* Make sure both operands are registers. */
6461 else if (GET_CODE (x) == PLUS
6462 && (mode != TImode || !TARGET_QUAD_MEMORY))
6463 return gen_rtx_PLUS (Pmode,
6464 force_reg (Pmode, XEXP (x, 0)),
6465 force_reg (Pmode, XEXP (x, 1)));
6466 else
6467 return force_reg (Pmode, x);
6468 }
6469 if (GET_CODE (x) == SYMBOL_REF)
6470 {
6471 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
6472 if (model != 0)
6473 return rs6000_legitimize_tls_address (x, model);
6474 }
6475
6476 extra = 0;
6477 switch (mode)
6478 {
6479 case TFmode:
6480 case TDmode:
6481 case TImode:
6482 case PTImode:
6483 /* As in legitimate_offset_address_p we do not assume
6484 worst-case. The mode here is just a hint as to the registers
6485 used. A TImode is usually in gprs, but may actually be in
6486 fprs. Leave worst-case scenario for reload to handle via
6487 insn constraints. PTImode is only GPRs. */
6488 extra = 8;
6489 break;
6490 default:
6491 break;
6492 }
6493
6494 if (GET_CODE (x) == PLUS
6495 && GET_CODE (XEXP (x, 0)) == REG
6496 && GET_CODE (XEXP (x, 1)) == CONST_INT
6497 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
6498 >= 0x10000 - extra)
6499 && !(SPE_VECTOR_MODE (mode)
6500 || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)))
6501 {
6502 HOST_WIDE_INT high_int, low_int;
6503 rtx sum;
6504 low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000;
6505 if (low_int >= 0x8000 - extra)
6506 low_int = 0;
6507 high_int = INTVAL (XEXP (x, 1)) - low_int;
6508 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
6509 GEN_INT (high_int)), 0);
6510 return plus_constant (Pmode, sum, low_int);
6511 }
6512 else if (GET_CODE (x) == PLUS
6513 && GET_CODE (XEXP (x, 0)) == REG
6514 && GET_CODE (XEXP (x, 1)) != CONST_INT
6515 && GET_MODE_NUNITS (mode) == 1
6516 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
6517 || (/* ??? Assume floating point reg based on mode? */
6518 (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
6519 && (mode == DFmode || mode == DDmode)))
6520 && !avoiding_indexed_address_p (mode))
6521 {
6522 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
6523 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
6524 }
6525 else if (SPE_VECTOR_MODE (mode)
6526 || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD))
6527 {
6528 if (mode == DImode)
6529 return x;
6530 /* We accept [reg + reg] and [reg + OFFSET]. */
6531
6532 if (GET_CODE (x) == PLUS)
6533 {
6534 rtx op1 = XEXP (x, 0);
6535 rtx op2 = XEXP (x, 1);
6536 rtx y;
6537
6538 op1 = force_reg (Pmode, op1);
6539
6540 if (GET_CODE (op2) != REG
6541 && (GET_CODE (op2) != CONST_INT
6542 || !SPE_CONST_OFFSET_OK (INTVAL (op2))
6543 || (GET_MODE_SIZE (mode) > 8
6544 && !SPE_CONST_OFFSET_OK (INTVAL (op2) + 8))))
6545 op2 = force_reg (Pmode, op2);
6546
6547 /* We can't always do [reg + reg] for these, because [reg +
6548 reg + offset] is not a legitimate addressing mode. */
6549 y = gen_rtx_PLUS (Pmode, op1, op2);
6550
6551 if ((GET_MODE_SIZE (mode) > 8 || mode == DDmode) && REG_P (op2))
6552 return force_reg (Pmode, y);
6553 else
6554 return y;
6555 }
6556
6557 return force_reg (Pmode, x);
6558 }
6559 else if ((TARGET_ELF
6560 #if TARGET_MACHO
6561 || !MACHO_DYNAMIC_NO_PIC_P
6562 #endif
6563 )
6564 && TARGET_32BIT
6565 && TARGET_NO_TOC
6566 && ! flag_pic
6567 && GET_CODE (x) != CONST_INT
6568 && GET_CODE (x) != CONST_DOUBLE
6569 && CONSTANT_P (x)
6570 && GET_MODE_NUNITS (mode) == 1
6571 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
6572 || (/* ??? Assume floating point reg based on mode? */
6573 (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
6574 && (mode == DFmode || mode == DDmode))))
6575 {
6576 rtx reg = gen_reg_rtx (Pmode);
6577 if (TARGET_ELF)
6578 emit_insn (gen_elf_high (reg, x));
6579 else
6580 emit_insn (gen_macho_high (reg, x));
6581 return gen_rtx_LO_SUM (Pmode, reg, x);
6582 }
6583 else if (TARGET_TOC
6584 && GET_CODE (x) == SYMBOL_REF
6585 && constant_pool_expr_p (x)
6586 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
6587 return create_TOC_reference (x, NULL_RTX);
6588 else
6589 return x;
6590 }
6591
6592 /* Debug version of rs6000_legitimize_address. */
6593 static rtx
6594 rs6000_debug_legitimize_address (rtx x, rtx oldx, enum machine_mode mode)
6595 {
6596 rtx ret;
6597 rtx insns;
6598
6599 start_sequence ();
6600 ret = rs6000_legitimize_address (x, oldx, mode);
6601 insns = get_insns ();
6602 end_sequence ();
6603
6604 if (ret != x)
6605 {
6606 fprintf (stderr,
6607 "\nrs6000_legitimize_address: mode %s, old code %s, "
6608 "new code %s, modified\n",
6609 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
6610 GET_RTX_NAME (GET_CODE (ret)));
6611
6612 fprintf (stderr, "Original address:\n");
6613 debug_rtx (x);
6614
6615 fprintf (stderr, "oldx:\n");
6616 debug_rtx (oldx);
6617
6618 fprintf (stderr, "New address:\n");
6619 debug_rtx (ret);
6620
6621 if (insns)
6622 {
6623 fprintf (stderr, "Insns added:\n");
6624 debug_rtx_list (insns, 20);
6625 }
6626 }
6627 else
6628 {
6629 fprintf (stderr,
6630 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
6631 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
6632
6633 debug_rtx (x);
6634 }
6635
6636 if (insns)
6637 emit_insn (insns);
6638
6639 return ret;
6640 }
6641
6642 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
6643 We need to emit DTP-relative relocations. */
6644
6645 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
6646 static void
6647 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
6648 {
6649 switch (size)
6650 {
6651 case 4:
6652 fputs ("\t.long\t", file);
6653 break;
6654 case 8:
6655 fputs (DOUBLE_INT_ASM_OP, file);
6656 break;
6657 default:
6658 gcc_unreachable ();
6659 }
6660 output_addr_const (file, x);
6661 fputs ("@dtprel+0x8000", file);
6662 }
6663
6664 /* In the name of slightly smaller debug output, and to cater to
6665 general assembler lossage, recognize various UNSPEC sequences
6666 and turn them back into a direct symbol reference. */
6667
6668 static rtx
6669 rs6000_delegitimize_address (rtx orig_x)
6670 {
6671 rtx x, y, offset;
6672
6673 orig_x = delegitimize_mem_from_attrs (orig_x);
6674 x = orig_x;
6675 if (MEM_P (x))
6676 x = XEXP (x, 0);
6677
6678 y = x;
6679 if (TARGET_CMODEL != CMODEL_SMALL
6680 && GET_CODE (y) == LO_SUM)
6681 y = XEXP (y, 1);
6682
6683 offset = NULL_RTX;
6684 if (GET_CODE (y) == PLUS
6685 && GET_MODE (y) == Pmode
6686 && CONST_INT_P (XEXP (y, 1)))
6687 {
6688 offset = XEXP (y, 1);
6689 y = XEXP (y, 0);
6690 }
6691
6692 if (GET_CODE (y) == UNSPEC
6693 && XINT (y, 1) == UNSPEC_TOCREL)
6694 {
6695 #ifdef ENABLE_CHECKING
6696 if (REG_P (XVECEXP (y, 0, 1))
6697 && REGNO (XVECEXP (y, 0, 1)) == TOC_REGISTER)
6698 {
6699 /* All good. */
6700 }
6701 else if (GET_CODE (XVECEXP (y, 0, 1)) == DEBUG_EXPR)
6702 {
6703 /* Weirdness alert. df_note_compute can replace r2 with a
6704 debug_expr when this unspec is in a debug_insn.
6705 Seen in gcc.dg/pr51957-1.c */
6706 }
6707 else
6708 {
6709 debug_rtx (orig_x);
6710 abort ();
6711 }
6712 #endif
6713 y = XVECEXP (y, 0, 0);
6714
6715 #ifdef HAVE_AS_TLS
6716 /* Do not associate thread-local symbols with the original
6717 constant pool symbol. */
6718 if (TARGET_XCOFF
6719 && GET_CODE (y) == SYMBOL_REF
6720 && CONSTANT_POOL_ADDRESS_P (y)
6721 && SYMBOL_REF_TLS_MODEL (get_pool_constant (y)) >= TLS_MODEL_REAL)
6722 return orig_x;
6723 #endif
6724
6725 if (offset != NULL_RTX)
6726 y = gen_rtx_PLUS (Pmode, y, offset);
6727 if (!MEM_P (orig_x))
6728 return y;
6729 else
6730 return replace_equiv_address_nv (orig_x, y);
6731 }
6732
6733 if (TARGET_MACHO
6734 && GET_CODE (orig_x) == LO_SUM
6735 && GET_CODE (XEXP (orig_x, 1)) == CONST)
6736 {
6737 y = XEXP (XEXP (orig_x, 1), 0);
6738 if (GET_CODE (y) == UNSPEC
6739 && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
6740 return XVECEXP (y, 0, 0);
6741 }
6742
6743 return orig_x;
6744 }
6745
6746 /* Return true if X shouldn't be emitted into the debug info.
6747 The linker doesn't like .toc section references from
6748 .debug_* sections, so reject .toc section symbols. */
6749
6750 static bool
6751 rs6000_const_not_ok_for_debug_p (rtx x)
6752 {
6753 if (GET_CODE (x) == SYMBOL_REF
6754 && CONSTANT_POOL_ADDRESS_P (x))
6755 {
6756 rtx c = get_pool_constant (x);
6757 enum machine_mode cmode = get_pool_mode (x);
6758 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
6759 return true;
6760 }
6761
6762 return false;
6763 }
6764
6765 /* Construct the SYMBOL_REF for the tls_get_addr function. */
6766
6767 static GTY(()) rtx rs6000_tls_symbol;
6768 static rtx
6769 rs6000_tls_get_addr (void)
6770 {
6771 if (!rs6000_tls_symbol)
6772 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
6773
6774 return rs6000_tls_symbol;
6775 }
6776
6777 /* Construct the SYMBOL_REF for TLS GOT references. */
6778
6779 static GTY(()) rtx rs6000_got_symbol;
6780 static rtx
6781 rs6000_got_sym (void)
6782 {
6783 if (!rs6000_got_symbol)
6784 {
6785 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
6786 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
6787 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
6788 }
6789
6790 return rs6000_got_symbol;
6791 }
6792
6793 /* AIX Thread-Local Address support. */
6794
6795 static rtx
6796 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
6797 {
6798 rtx sym, mem, tocref, tlsreg, tmpreg, dest, tlsaddr;
6799 const char *name;
6800 char *tlsname;
6801
6802 name = XSTR (addr, 0);
6803 /* Append TLS CSECT qualifier, unless the symbol already is qualified
6804 or the symbol will be in TLS private data section. */
6805 if (name[strlen (name) - 1] != ']'
6806 && (TREE_PUBLIC (SYMBOL_REF_DECL (addr))
6807 || bss_initializer_p (SYMBOL_REF_DECL (addr))))
6808 {
6809 tlsname = XALLOCAVEC (char, strlen (name) + 4);
6810 strcpy (tlsname, name);
6811 strcat (tlsname,
6812 bss_initializer_p (SYMBOL_REF_DECL (addr)) ? "[UL]" : "[TL]");
6813 tlsaddr = copy_rtx (addr);
6814 XSTR (tlsaddr, 0) = ggc_strdup (tlsname);
6815 }
6816 else
6817 tlsaddr = addr;
6818
6819 /* Place addr into TOC constant pool. */
6820 sym = force_const_mem (GET_MODE (tlsaddr), tlsaddr);
6821
6822 /* Output the TOC entry and create the MEM referencing the value. */
6823 if (constant_pool_expr_p (XEXP (sym, 0))
6824 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
6825 {
6826 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
6827 mem = gen_const_mem (Pmode, tocref);
6828 set_mem_alias_set (mem, get_TOC_alias_set ());
6829 }
6830 else
6831 return sym;
6832
6833 /* Use global-dynamic for local-dynamic. */
6834 if (model == TLS_MODEL_GLOBAL_DYNAMIC
6835 || model == TLS_MODEL_LOCAL_DYNAMIC)
6836 {
6837 /* Create new TOC reference for @m symbol. */
6838 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
6839 tlsname = XALLOCAVEC (char, strlen (name) + 1);
6840 strcpy (tlsname, "*LCM");
6841 strcat (tlsname, name + 3);
6842 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
6843 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
6844 tocref = create_TOC_reference (modaddr, NULL_RTX);
6845 rtx modmem = gen_const_mem (Pmode, tocref);
6846 set_mem_alias_set (modmem, get_TOC_alias_set ());
6847
6848 rtx modreg = gen_reg_rtx (Pmode);
6849 emit_insn (gen_rtx_SET (VOIDmode, modreg, modmem));
6850
6851 tmpreg = gen_reg_rtx (Pmode);
6852 emit_insn (gen_rtx_SET (VOIDmode, tmpreg, mem));
6853
6854 dest = gen_reg_rtx (Pmode);
6855 if (TARGET_32BIT)
6856 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
6857 else
6858 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
6859 return dest;
6860 }
6861 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
6862 else if (TARGET_32BIT)
6863 {
6864 tlsreg = gen_reg_rtx (SImode);
6865 emit_insn (gen_tls_get_tpointer (tlsreg));
6866 }
6867 else
6868 tlsreg = gen_rtx_REG (DImode, 13);
6869
6870 /* Load the TOC value into temporary register. */
6871 tmpreg = gen_reg_rtx (Pmode);
6872 emit_insn (gen_rtx_SET (VOIDmode, tmpreg, mem));
6873 set_unique_reg_note (get_last_insn (), REG_EQUAL,
6874 gen_rtx_MINUS (Pmode, addr, tlsreg));
6875
6876 /* Add TOC symbol value to TLS pointer. */
6877 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
6878
6879 return dest;
6880 }
6881
6882 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
6883 this (thread-local) address. */
6884
6885 static rtx
6886 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
6887 {
6888 rtx dest, insn;
6889
6890 if (TARGET_XCOFF)
6891 return rs6000_legitimize_tls_address_aix (addr, model);
6892
6893 dest = gen_reg_rtx (Pmode);
6894 if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 16)
6895 {
6896 rtx tlsreg;
6897
6898 if (TARGET_64BIT)
6899 {
6900 tlsreg = gen_rtx_REG (Pmode, 13);
6901 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
6902 }
6903 else
6904 {
6905 tlsreg = gen_rtx_REG (Pmode, 2);
6906 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
6907 }
6908 emit_insn (insn);
6909 }
6910 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
6911 {
6912 rtx tlsreg, tmp;
6913
6914 tmp = gen_reg_rtx (Pmode);
6915 if (TARGET_64BIT)
6916 {
6917 tlsreg = gen_rtx_REG (Pmode, 13);
6918 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
6919 }
6920 else
6921 {
6922 tlsreg = gen_rtx_REG (Pmode, 2);
6923 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
6924 }
6925 emit_insn (insn);
6926 if (TARGET_64BIT)
6927 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
6928 else
6929 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
6930 emit_insn (insn);
6931 }
6932 else
6933 {
6934 rtx r3, got, tga, tmp1, tmp2, call_insn;
6935
6936 /* We currently use relocations like @got@tlsgd for tls, which
6937 means the linker will handle allocation of tls entries, placing
6938 them in the .got section. So use a pointer to the .got section,
6939 not one to secondary TOC sections used by 64-bit -mminimal-toc,
6940 or to secondary GOT sections used by 32-bit -fPIC. */
6941 if (TARGET_64BIT)
6942 got = gen_rtx_REG (Pmode, 2);
6943 else
6944 {
6945 if (flag_pic == 1)
6946 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
6947 else
6948 {
6949 rtx gsym = rs6000_got_sym ();
6950 got = gen_reg_rtx (Pmode);
6951 if (flag_pic == 0)
6952 rs6000_emit_move (got, gsym, Pmode);
6953 else
6954 {
6955 rtx mem, lab, last;
6956
6957 tmp1 = gen_reg_rtx (Pmode);
6958 tmp2 = gen_reg_rtx (Pmode);
6959 mem = gen_const_mem (Pmode, tmp1);
6960 lab = gen_label_rtx ();
6961 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
6962 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
6963 if (TARGET_LINK_STACK)
6964 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
6965 emit_move_insn (tmp2, mem);
6966 last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
6967 set_unique_reg_note (last, REG_EQUAL, gsym);
6968 }
6969 }
6970 }
6971
6972 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
6973 {
6974 tga = rs6000_tls_get_addr ();
6975 emit_library_call_value (tga, dest, LCT_CONST, Pmode,
6976 1, const0_rtx, Pmode);
6977
6978 r3 = gen_rtx_REG (Pmode, 3);
6979 if (DEFAULT_ABI == ABI_AIX && TARGET_64BIT)
6980 insn = gen_tls_gd_aix64 (r3, got, addr, tga, const0_rtx);
6981 else if (DEFAULT_ABI == ABI_AIX && !TARGET_64BIT)
6982 insn = gen_tls_gd_aix32 (r3, got, addr, tga, const0_rtx);
6983 else if (DEFAULT_ABI == ABI_V4)
6984 insn = gen_tls_gd_sysvsi (r3, got, addr, tga, const0_rtx);
6985 else
6986 gcc_unreachable ();
6987 call_insn = last_call_insn ();
6988 PATTERN (call_insn) = insn;
6989 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
6990 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
6991 pic_offset_table_rtx);
6992 }
6993 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
6994 {
6995 tga = rs6000_tls_get_addr ();
6996 tmp1 = gen_reg_rtx (Pmode);
6997 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode,
6998 1, const0_rtx, Pmode);
6999
7000 r3 = gen_rtx_REG (Pmode, 3);
7001 if (DEFAULT_ABI == ABI_AIX && TARGET_64BIT)
7002 insn = gen_tls_ld_aix64 (r3, got, tga, const0_rtx);
7003 else if (DEFAULT_ABI == ABI_AIX && !TARGET_64BIT)
7004 insn = gen_tls_ld_aix32 (r3, got, tga, const0_rtx);
7005 else if (DEFAULT_ABI == ABI_V4)
7006 insn = gen_tls_ld_sysvsi (r3, got, tga, const0_rtx);
7007 else
7008 gcc_unreachable ();
7009 call_insn = last_call_insn ();
7010 PATTERN (call_insn) = insn;
7011 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
7012 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
7013 pic_offset_table_rtx);
7014
7015 if (rs6000_tls_size == 16)
7016 {
7017 if (TARGET_64BIT)
7018 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
7019 else
7020 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
7021 }
7022 else if (rs6000_tls_size == 32)
7023 {
7024 tmp2 = gen_reg_rtx (Pmode);
7025 if (TARGET_64BIT)
7026 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
7027 else
7028 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
7029 emit_insn (insn);
7030 if (TARGET_64BIT)
7031 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
7032 else
7033 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
7034 }
7035 else
7036 {
7037 tmp2 = gen_reg_rtx (Pmode);
7038 if (TARGET_64BIT)
7039 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
7040 else
7041 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
7042 emit_insn (insn);
7043 insn = gen_rtx_SET (Pmode, dest,
7044 gen_rtx_PLUS (Pmode, tmp2, tmp1));
7045 }
7046 emit_insn (insn);
7047 }
7048 else
7049 {
7050 /* IE, or 64-bit offset LE. */
7051 tmp2 = gen_reg_rtx (Pmode);
7052 if (TARGET_64BIT)
7053 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
7054 else
7055 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
7056 emit_insn (insn);
7057 if (TARGET_64BIT)
7058 insn = gen_tls_tls_64 (dest, tmp2, addr);
7059 else
7060 insn = gen_tls_tls_32 (dest, tmp2, addr);
7061 emit_insn (insn);
7062 }
7063 }
7064
7065 return dest;
7066 }
7067
7068 /* Return 1 if X contains a thread-local symbol. */
7069
7070 static bool
7071 rs6000_tls_referenced_p (rtx x)
7072 {
7073 if (! TARGET_HAVE_TLS)
7074 return false;
7075
7076 return for_each_rtx (&x, &rs6000_tls_symbol_ref_1, 0);
7077 }
7078
7079 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
7080
7081 static bool
7082 rs6000_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
7083 {
7084 if (GET_CODE (x) == HIGH
7085 && GET_CODE (XEXP (x, 0)) == UNSPEC)
7086 return true;
7087
7088 /* A TLS symbol in the TOC cannot contain a sum. */
7089 if (GET_CODE (x) == CONST
7090 && GET_CODE (XEXP (x, 0)) == PLUS
7091 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7092 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
7093 return true;
7094
7095 /* Do not place an ELF TLS symbol in the constant pool. */
7096 return TARGET_ELF && rs6000_tls_referenced_p (x);
7097 }
7098
7099 /* Return 1 if *X is a thread-local symbol. This is the same as
7100 rs6000_tls_symbol_ref except for the type of the unused argument. */
7101
7102 static int
7103 rs6000_tls_symbol_ref_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
7104 {
7105 return RS6000_SYMBOL_REF_TLS_P (*x);
7106 }
7107
7108 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
7109 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
7110 can be addressed relative to the toc pointer. */
7111
7112 static bool
7113 use_toc_relative_ref (rtx sym)
7114 {
7115 return ((constant_pool_expr_p (sym)
7116 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
7117 get_pool_mode (sym)))
7118 || (TARGET_CMODEL == CMODEL_MEDIUM
7119 && SYMBOL_REF_LOCAL_P (sym)));
7120 }
7121
7122 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
7123 replace the input X, or the original X if no replacement is called for.
7124 The output parameter *WIN is 1 if the calling macro should goto WIN,
7125 0 if it should not.
7126
7127 For RS/6000, we wish to handle large displacements off a base
7128 register by splitting the addend across an addiu/addis and the mem insn.
7129 This cuts number of extra insns needed from 3 to 1.
7130
7131 On Darwin, we use this to generate code for floating point constants.
7132 A movsf_low is generated so we wind up with 2 instructions rather than 3.
7133 The Darwin code is inside #if TARGET_MACHO because only then are the
7134 machopic_* functions defined. */
7135 static rtx
7136 rs6000_legitimize_reload_address (rtx x, enum machine_mode mode,
7137 int opnum, int type,
7138 int ind_levels ATTRIBUTE_UNUSED, int *win)
7139 {
7140 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
7141
7142 /* Nasty hack for vsx_splat_V2DF/V2DI load from mem, which takes a
7143 DFmode/DImode MEM. */
7144 if (reg_offset_p
7145 && opnum == 1
7146 && ((mode == DFmode && recog_data.operand_mode[0] == V2DFmode)
7147 || (mode == DImode && recog_data.operand_mode[0] == V2DImode)))
7148 reg_offset_p = false;
7149
7150 /* We must recognize output that we have already generated ourselves. */
7151 if (GET_CODE (x) == PLUS
7152 && GET_CODE (XEXP (x, 0)) == PLUS
7153 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
7154 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
7155 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7156 {
7157 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7158 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
7159 opnum, (enum reload_type) type);
7160 *win = 1;
7161 return x;
7162 }
7163
7164 /* Likewise for (lo_sum (high ...) ...) output we have generated. */
7165 if (GET_CODE (x) == LO_SUM
7166 && GET_CODE (XEXP (x, 0)) == HIGH)
7167 {
7168 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7169 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
7170 opnum, (enum reload_type) type);
7171 *win = 1;
7172 return x;
7173 }
7174
7175 #if TARGET_MACHO
7176 if (DEFAULT_ABI == ABI_DARWIN && flag_pic
7177 && GET_CODE (x) == LO_SUM
7178 && GET_CODE (XEXP (x, 0)) == PLUS
7179 && XEXP (XEXP (x, 0), 0) == pic_offset_table_rtx
7180 && GET_CODE (XEXP (XEXP (x, 0), 1)) == HIGH
7181 && XEXP (XEXP (XEXP (x, 0), 1), 0) == XEXP (x, 1)
7182 && machopic_operand_p (XEXP (x, 1)))
7183 {
7184 /* Result of previous invocation of this function on Darwin
7185 floating point constant. */
7186 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7187 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
7188 opnum, (enum reload_type) type);
7189 *win = 1;
7190 return x;
7191 }
7192 #endif
7193
7194 if (TARGET_CMODEL != CMODEL_SMALL
7195 && reg_offset_p
7196 && small_toc_ref (x, VOIDmode))
7197 {
7198 rtx hi = gen_rtx_HIGH (Pmode, copy_rtx (x));
7199 x = gen_rtx_LO_SUM (Pmode, hi, x);
7200 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7201 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
7202 opnum, (enum reload_type) type);
7203 *win = 1;
7204 return x;
7205 }
7206
7207 if (GET_CODE (x) == PLUS
7208 && GET_CODE (XEXP (x, 0)) == REG
7209 && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER
7210 && INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 1)
7211 && GET_CODE (XEXP (x, 1)) == CONST_INT
7212 && reg_offset_p
7213 && !SPE_VECTOR_MODE (mode)
7214 && !(TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
7215 && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode)))
7216 {
7217 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
7218 HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
7219 HOST_WIDE_INT high
7220 = (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000;
7221
7222 /* Check for 32-bit overflow. */
7223 if (high + low != val)
7224 {
7225 *win = 0;
7226 return x;
7227 }
7228
7229 /* Reload the high part into a base reg; leave the low part
7230 in the mem directly. */
7231
7232 x = gen_rtx_PLUS (GET_MODE (x),
7233 gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
7234 GEN_INT (high)),
7235 GEN_INT (low));
7236
7237 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7238 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
7239 opnum, (enum reload_type) type);
7240 *win = 1;
7241 return x;
7242 }
7243
7244 if (GET_CODE (x) == SYMBOL_REF
7245 && reg_offset_p
7246 && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode))
7247 && !SPE_VECTOR_MODE (mode)
7248 #if TARGET_MACHO
7249 && DEFAULT_ABI == ABI_DARWIN
7250 && (flag_pic || MACHO_DYNAMIC_NO_PIC_P)
7251 && machopic_symbol_defined_p (x)
7252 #else
7253 && DEFAULT_ABI == ABI_V4
7254 && !flag_pic
7255 #endif
7256 /* Don't do this for TFmode or TDmode, since the result isn't offsettable.
7257 The same goes for DImode without 64-bit gprs and DFmode and DDmode
7258 without fprs.
7259 ??? Assume floating point reg based on mode? This assumption is
7260 violated by eg. powerpc-linux -m32 compile of gcc.dg/pr28796-2.c
7261 where reload ends up doing a DFmode load of a constant from
7262 mem using two gprs. Unfortunately, at this point reload
7263 hasn't yet selected regs so poking around in reload data
7264 won't help and even if we could figure out the regs reliably,
7265 we'd still want to allow this transformation when the mem is
7266 naturally aligned. Since we say the address is good here, we
7267 can't disable offsets from LO_SUMs in mem_operand_gpr.
7268 FIXME: Allow offset from lo_sum for other modes too, when
7269 mem is sufficiently aligned. */
7270 && mode != TFmode
7271 && mode != TDmode
7272 && (mode != TImode || !TARGET_VSX_TIMODE)
7273 && mode != PTImode
7274 && (mode != DImode || TARGET_POWERPC64)
7275 && ((mode != DFmode && mode != DDmode) || TARGET_POWERPC64
7276 || (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)))
7277 {
7278 #if TARGET_MACHO
7279 if (flag_pic)
7280 {
7281 rtx offset = machopic_gen_offset (x);
7282 x = gen_rtx_LO_SUM (GET_MODE (x),
7283 gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
7284 gen_rtx_HIGH (Pmode, offset)), offset);
7285 }
7286 else
7287 #endif
7288 x = gen_rtx_LO_SUM (GET_MODE (x),
7289 gen_rtx_HIGH (Pmode, x), x);
7290
7291 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7292 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
7293 opnum, (enum reload_type) type);
7294 *win = 1;
7295 return x;
7296 }
7297
7298 /* Reload an offset address wrapped by an AND that represents the
7299 masking of the lower bits. Strip the outer AND and let reload
7300 convert the offset address into an indirect address. For VSX,
7301 force reload to create the address with an AND in a separate
7302 register, because we can't guarantee an altivec register will
7303 be used. */
7304 if (VECTOR_MEM_ALTIVEC_P (mode)
7305 && GET_CODE (x) == AND
7306 && GET_CODE (XEXP (x, 0)) == PLUS
7307 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
7308 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
7309 && GET_CODE (XEXP (x, 1)) == CONST_INT
7310 && INTVAL (XEXP (x, 1)) == -16)
7311 {
7312 x = XEXP (x, 0);
7313 *win = 1;
7314 return x;
7315 }
7316
7317 if (TARGET_TOC
7318 && reg_offset_p
7319 && GET_CODE (x) == SYMBOL_REF
7320 && use_toc_relative_ref (x))
7321 {
7322 x = create_TOC_reference (x, NULL_RTX);
7323 if (TARGET_CMODEL != CMODEL_SMALL)
7324 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7325 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
7326 opnum, (enum reload_type) type);
7327 *win = 1;
7328 return x;
7329 }
7330 *win = 0;
7331 return x;
7332 }
7333
7334 /* Debug version of rs6000_legitimize_reload_address. */
7335 static rtx
7336 rs6000_debug_legitimize_reload_address (rtx x, enum machine_mode mode,
7337 int opnum, int type,
7338 int ind_levels, int *win)
7339 {
7340 rtx ret = rs6000_legitimize_reload_address (x, mode, opnum, type,
7341 ind_levels, win);
7342 fprintf (stderr,
7343 "\nrs6000_legitimize_reload_address: mode = %s, opnum = %d, "
7344 "type = %d, ind_levels = %d, win = %d, original addr:\n",
7345 GET_MODE_NAME (mode), opnum, type, ind_levels, *win);
7346 debug_rtx (x);
7347
7348 if (x == ret)
7349 fprintf (stderr, "Same address returned\n");
7350 else if (!ret)
7351 fprintf (stderr, "NULL returned\n");
7352 else
7353 {
7354 fprintf (stderr, "New address:\n");
7355 debug_rtx (ret);
7356 }
7357
7358 return ret;
7359 }
7360
7361 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
7362 that is a valid memory address for an instruction.
7363 The MODE argument is the machine mode for the MEM expression
7364 that wants to use this address.
7365
7366 On the RS/6000, there are four valid address: a SYMBOL_REF that
7367 refers to a constant pool entry of an address (or the sum of it
7368 plus a constant), a short (16-bit signed) constant plus a register,
7369 the sum of two registers, or a register indirect, possibly with an
7370 auto-increment. For DFmode, DDmode and DImode with a constant plus
7371 register, we must ensure that both words are addressable or PowerPC64
7372 with offset word aligned.
7373
7374 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
7375 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
7376 because adjacent memory cells are accessed by adding word-sized offsets
7377 during assembly output. */
7378 static bool
7379 rs6000_legitimate_address_p (enum machine_mode mode, rtx x, bool reg_ok_strict)
7380 {
7381 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
7382
7383 /* If this is an unaligned stvx/ldvx type address, discard the outer AND. */
7384 if (VECTOR_MEM_ALTIVEC_P (mode)
7385 && GET_CODE (x) == AND
7386 && GET_CODE (XEXP (x, 1)) == CONST_INT
7387 && INTVAL (XEXP (x, 1)) == -16)
7388 x = XEXP (x, 0);
7389
7390 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
7391 return 0;
7392 if (legitimate_indirect_address_p (x, reg_ok_strict))
7393 return 1;
7394 if (TARGET_UPDATE
7395 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
7396 && mode_supports_pre_incdec_p (mode)
7397 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
7398 return 1;
7399 if (virtual_stack_registers_memory_p (x))
7400 return 1;
7401 if (reg_offset_p && legitimate_small_data_p (mode, x))
7402 return 1;
7403 if (reg_offset_p
7404 && legitimate_constant_pool_address_p (x, mode,
7405 reg_ok_strict || lra_in_progress))
7406 return 1;
7407 /* For TImode, if we have load/store quad and TImode in VSX registers, only
7408 allow register indirect addresses. This will allow the values to go in
7409 either GPRs or VSX registers without reloading. The vector types would
7410 tend to go into VSX registers, so we allow REG+REG, while TImode seems
7411 somewhat split, in that some uses are GPR based, and some VSX based. */
7412 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX_TIMODE)
7413 return 0;
7414 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
7415 if (! reg_ok_strict
7416 && reg_offset_p
7417 && GET_CODE (x) == PLUS
7418 && GET_CODE (XEXP (x, 0)) == REG
7419 && (XEXP (x, 0) == virtual_stack_vars_rtx
7420 || XEXP (x, 0) == arg_pointer_rtx)
7421 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7422 return 1;
7423 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
7424 return 1;
7425 if (mode != TFmode
7426 && mode != TDmode
7427 && ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
7428 || TARGET_POWERPC64
7429 || (mode != DFmode && mode != DDmode)
7430 || (TARGET_E500_DOUBLE && mode != DDmode))
7431 && (TARGET_POWERPC64 || mode != DImode)
7432 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
7433 && mode != PTImode
7434 && !avoiding_indexed_address_p (mode)
7435 && legitimate_indexed_address_p (x, reg_ok_strict))
7436 return 1;
7437 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
7438 && mode_supports_pre_modify_p (mode)
7439 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
7440 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
7441 reg_ok_strict, false)
7442 || (!avoiding_indexed_address_p (mode)
7443 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
7444 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7445 return 1;
7446 if (reg_offset_p && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
7447 return 1;
7448 return 0;
7449 }
7450
7451 /* Debug version of rs6000_legitimate_address_p. */
7452 static bool
7453 rs6000_debug_legitimate_address_p (enum machine_mode mode, rtx x,
7454 bool reg_ok_strict)
7455 {
7456 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
7457 fprintf (stderr,
7458 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
7459 "strict = %d, reload = %s, code = %s\n",
7460 ret ? "true" : "false",
7461 GET_MODE_NAME (mode),
7462 reg_ok_strict,
7463 (reload_completed
7464 ? "after"
7465 : (reload_in_progress ? "progress" : "before")),
7466 GET_RTX_NAME (GET_CODE (x)));
7467 debug_rtx (x);
7468
7469 return ret;
7470 }
7471
7472 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
7473
7474 static bool
7475 rs6000_mode_dependent_address_p (const_rtx addr,
7476 addr_space_t as ATTRIBUTE_UNUSED)
7477 {
7478 return rs6000_mode_dependent_address_ptr (addr);
7479 }
7480
7481 /* Go to LABEL if ADDR (a legitimate address expression)
7482 has an effect that depends on the machine mode it is used for.
7483
7484 On the RS/6000 this is true of all integral offsets (since AltiVec
7485 and VSX modes don't allow them) or is a pre-increment or decrement.
7486
7487 ??? Except that due to conceptual problems in offsettable_address_p
7488 we can't really report the problems of integral offsets. So leave
7489 this assuming that the adjustable offset must be valid for the
7490 sub-words of a TFmode operand, which is what we had before. */
7491
7492 static bool
7493 rs6000_mode_dependent_address (const_rtx addr)
7494 {
7495 switch (GET_CODE (addr))
7496 {
7497 case PLUS:
7498 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
7499 is considered a legitimate address before reload, so there
7500 are no offset restrictions in that case. Note that this
7501 condition is safe in strict mode because any address involving
7502 virtual_stack_vars_rtx or arg_pointer_rtx would already have
7503 been rejected as illegitimate. */
7504 if (XEXP (addr, 0) != virtual_stack_vars_rtx
7505 && XEXP (addr, 0) != arg_pointer_rtx
7506 && GET_CODE (XEXP (addr, 1)) == CONST_INT)
7507 {
7508 unsigned HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
7509 return val + 0x8000 >= 0x10000 - (TARGET_POWERPC64 ? 8 : 12);
7510 }
7511 break;
7512
7513 case LO_SUM:
7514 /* Anything in the constant pool is sufficiently aligned that
7515 all bytes have the same high part address. */
7516 return !legitimate_constant_pool_address_p (addr, QImode, false);
7517
7518 /* Auto-increment cases are now treated generically in recog.c. */
7519 case PRE_MODIFY:
7520 return TARGET_UPDATE;
7521
7522 /* AND is only allowed in Altivec loads. */
7523 case AND:
7524 return true;
7525
7526 default:
7527 break;
7528 }
7529
7530 return false;
7531 }
7532
7533 /* Debug version of rs6000_mode_dependent_address. */
7534 static bool
7535 rs6000_debug_mode_dependent_address (const_rtx addr)
7536 {
7537 bool ret = rs6000_mode_dependent_address (addr);
7538
7539 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
7540 ret ? "true" : "false");
7541 debug_rtx (addr);
7542
7543 return ret;
7544 }
7545
7546 /* Implement FIND_BASE_TERM. */
7547
7548 rtx
7549 rs6000_find_base_term (rtx op)
7550 {
7551 rtx base;
7552
7553 base = op;
7554 if (GET_CODE (base) == CONST)
7555 base = XEXP (base, 0);
7556 if (GET_CODE (base) == PLUS)
7557 base = XEXP (base, 0);
7558 if (GET_CODE (base) == UNSPEC)
7559 switch (XINT (base, 1))
7560 {
7561 case UNSPEC_TOCREL:
7562 case UNSPEC_MACHOPIC_OFFSET:
7563 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
7564 for aliasing purposes. */
7565 return XVECEXP (base, 0, 0);
7566 }
7567
7568 return op;
7569 }
7570
7571 /* More elaborate version of recog's offsettable_memref_p predicate
7572 that works around the ??? note of rs6000_mode_dependent_address.
7573 In particular it accepts
7574
7575 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
7576
7577 in 32-bit mode, that the recog predicate rejects. */
7578
7579 static bool
7580 rs6000_offsettable_memref_p (rtx op, enum machine_mode reg_mode)
7581 {
7582 bool worst_case;
7583
7584 if (!MEM_P (op))
7585 return false;
7586
7587 /* First mimic offsettable_memref_p. */
7588 if (offsettable_address_p (true, GET_MODE (op), XEXP (op, 0)))
7589 return true;
7590
7591 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
7592 the latter predicate knows nothing about the mode of the memory
7593 reference and, therefore, assumes that it is the largest supported
7594 mode (TFmode). As a consequence, legitimate offsettable memory
7595 references are rejected. rs6000_legitimate_offset_address_p contains
7596 the correct logic for the PLUS case of rs6000_mode_dependent_address,
7597 at least with a little bit of help here given that we know the
7598 actual registers used. */
7599 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
7600 || GET_MODE_SIZE (reg_mode) == 4);
7601 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
7602 true, worst_case);
7603 }
7604
7605 /* Change register usage conditional on target flags. */
7606 static void
7607 rs6000_conditional_register_usage (void)
7608 {
7609 int i;
7610
7611 if (TARGET_DEBUG_TARGET)
7612 fprintf (stderr, "rs6000_conditional_register_usage called\n");
7613
7614 /* Set MQ register fixed (already call_used) so that it will not be
7615 allocated. */
7616 fixed_regs[64] = 1;
7617
7618 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
7619 if (TARGET_64BIT)
7620 fixed_regs[13] = call_used_regs[13]
7621 = call_really_used_regs[13] = 1;
7622
7623 /* Conditionally disable FPRs. */
7624 if (TARGET_SOFT_FLOAT || !TARGET_FPRS)
7625 for (i = 32; i < 64; i++)
7626 fixed_regs[i] = call_used_regs[i]
7627 = call_really_used_regs[i] = 1;
7628
7629 /* The TOC register is not killed across calls in a way that is
7630 visible to the compiler. */
7631 if (DEFAULT_ABI == ABI_AIX)
7632 call_really_used_regs[2] = 0;
7633
7634 if (DEFAULT_ABI == ABI_V4
7635 && PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM
7636 && flag_pic == 2)
7637 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
7638
7639 if (DEFAULT_ABI == ABI_V4
7640 && PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM
7641 && flag_pic == 1)
7642 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
7643 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
7644 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
7645
7646 if (DEFAULT_ABI == ABI_DARWIN
7647 && PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
7648 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
7649 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
7650 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
7651
7652 if (TARGET_TOC && TARGET_MINIMAL_TOC)
7653 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
7654 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
7655
7656 if (TARGET_SPE)
7657 {
7658 global_regs[SPEFSCR_REGNO] = 1;
7659 /* We used to use r14 as FIXED_SCRATCH to address SPE 64-bit
7660 registers in prologues and epilogues. We no longer use r14
7661 for FIXED_SCRATCH, but we're keeping r14 out of the allocation
7662 pool for link-compatibility with older versions of GCC. Once
7663 "old" code has died out, we can return r14 to the allocation
7664 pool. */
7665 fixed_regs[14]
7666 = call_used_regs[14]
7667 = call_really_used_regs[14] = 1;
7668 }
7669
7670 if (!TARGET_ALTIVEC && !TARGET_VSX)
7671 {
7672 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
7673 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
7674 call_really_used_regs[VRSAVE_REGNO] = 1;
7675 }
7676
7677 if (TARGET_ALTIVEC || TARGET_VSX)
7678 global_regs[VSCR_REGNO] = 1;
7679
7680 if (TARGET_ALTIVEC_ABI)
7681 {
7682 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
7683 call_used_regs[i] = call_really_used_regs[i] = 1;
7684
7685 /* AIX reserves VR20:31 in non-extended ABI mode. */
7686 if (TARGET_XCOFF)
7687 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
7688 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
7689 }
7690 }
7691
7692 \f
7693 /* Try to output insns to set TARGET equal to the constant C if it can
7694 be done in less than N insns. Do all computations in MODE.
7695 Returns the place where the output has been placed if it can be
7696 done and the insns have been emitted. If it would take more than N
7697 insns, zero is returned and no insns and emitted. */
7698
7699 rtx
7700 rs6000_emit_set_const (rtx dest, enum machine_mode mode,
7701 rtx source, int n ATTRIBUTE_UNUSED)
7702 {
7703 rtx result, insn, set;
7704 HOST_WIDE_INT c0, c1;
7705
7706 switch (mode)
7707 {
7708 case QImode:
7709 case HImode:
7710 if (dest == NULL)
7711 dest = gen_reg_rtx (mode);
7712 emit_insn (gen_rtx_SET (VOIDmode, dest, source));
7713 return dest;
7714
7715 case SImode:
7716 result = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
7717
7718 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (result),
7719 GEN_INT (INTVAL (source)
7720 & (~ (HOST_WIDE_INT) 0xffff))));
7721 emit_insn (gen_rtx_SET (VOIDmode, dest,
7722 gen_rtx_IOR (SImode, copy_rtx (result),
7723 GEN_INT (INTVAL (source) & 0xffff))));
7724 result = dest;
7725 break;
7726
7727 case DImode:
7728 switch (GET_CODE (source))
7729 {
7730 case CONST_INT:
7731 c0 = INTVAL (source);
7732 c1 = -(c0 < 0);
7733 break;
7734
7735 default:
7736 gcc_unreachable ();
7737 }
7738
7739 result = rs6000_emit_set_long_const (dest, c0, c1);
7740 break;
7741
7742 default:
7743 gcc_unreachable ();
7744 }
7745
7746 insn = get_last_insn ();
7747 set = single_set (insn);
7748 if (! CONSTANT_P (SET_SRC (set)))
7749 set_unique_reg_note (insn, REG_EQUAL, source);
7750
7751 return result;
7752 }
7753
7754 /* Having failed to find a 3 insn sequence in rs6000_emit_set_const,
7755 fall back to a straight forward decomposition. We do this to avoid
7756 exponential run times encountered when looking for longer sequences
7757 with rs6000_emit_set_const. */
7758 static rtx
7759 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c1, HOST_WIDE_INT c2)
7760 {
7761 if (!TARGET_POWERPC64)
7762 {
7763 rtx operand1, operand2;
7764
7765 operand1 = operand_subword_force (dest, WORDS_BIG_ENDIAN == 0,
7766 DImode);
7767 operand2 = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN != 0,
7768 DImode);
7769 emit_move_insn (operand1, GEN_INT (c1));
7770 emit_move_insn (operand2, GEN_INT (c2));
7771 }
7772 else
7773 {
7774 HOST_WIDE_INT ud1, ud2, ud3, ud4;
7775
7776 ud1 = c1 & 0xffff;
7777 ud2 = (c1 & 0xffff0000) >> 16;
7778 c2 = c1 >> 32;
7779 ud3 = c2 & 0xffff;
7780 ud4 = (c2 & 0xffff0000) >> 16;
7781
7782 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
7783 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
7784 emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
7785
7786 else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
7787 || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
7788 {
7789 emit_move_insn (dest, GEN_INT (((ud2 << 16) ^ 0x80000000)
7790 - 0x80000000));
7791 if (ud1 != 0)
7792 emit_move_insn (copy_rtx (dest),
7793 gen_rtx_IOR (DImode, copy_rtx (dest),
7794 GEN_INT (ud1)));
7795 }
7796 else if (ud3 == 0 && ud4 == 0)
7797 {
7798 gcc_assert (ud2 & 0x8000);
7799 emit_move_insn (dest, GEN_INT (((ud2 << 16) ^ 0x80000000)
7800 - 0x80000000));
7801 if (ud1 != 0)
7802 emit_move_insn (copy_rtx (dest),
7803 gen_rtx_IOR (DImode, copy_rtx (dest),
7804 GEN_INT (ud1)));
7805 emit_move_insn (copy_rtx (dest),
7806 gen_rtx_ZERO_EXTEND (DImode,
7807 gen_lowpart (SImode,
7808 copy_rtx (dest))));
7809 }
7810 else if ((ud4 == 0xffff && (ud3 & 0x8000))
7811 || (ud4 == 0 && ! (ud3 & 0x8000)))
7812 {
7813 emit_move_insn (dest, GEN_INT (((ud3 << 16) ^ 0x80000000)
7814 - 0x80000000));
7815 if (ud2 != 0)
7816 emit_move_insn (copy_rtx (dest),
7817 gen_rtx_IOR (DImode, copy_rtx (dest),
7818 GEN_INT (ud2)));
7819 emit_move_insn (copy_rtx (dest),
7820 gen_rtx_ASHIFT (DImode, copy_rtx (dest),
7821 GEN_INT (16)));
7822 if (ud1 != 0)
7823 emit_move_insn (copy_rtx (dest),
7824 gen_rtx_IOR (DImode, copy_rtx (dest),
7825 GEN_INT (ud1)));
7826 }
7827 else
7828 {
7829 emit_move_insn (dest, GEN_INT (((ud4 << 16) ^ 0x80000000)
7830 - 0x80000000));
7831 if (ud3 != 0)
7832 emit_move_insn (copy_rtx (dest),
7833 gen_rtx_IOR (DImode, copy_rtx (dest),
7834 GEN_INT (ud3)));
7835
7836 emit_move_insn (copy_rtx (dest),
7837 gen_rtx_ASHIFT (DImode, copy_rtx (dest),
7838 GEN_INT (32)));
7839 if (ud2 != 0)
7840 emit_move_insn (copy_rtx (dest),
7841 gen_rtx_IOR (DImode, copy_rtx (dest),
7842 GEN_INT (ud2 << 16)));
7843 if (ud1 != 0)
7844 emit_move_insn (copy_rtx (dest),
7845 gen_rtx_IOR (DImode, copy_rtx (dest),
7846 GEN_INT (ud1)));
7847 }
7848 }
7849 return dest;
7850 }
7851
7852 /* Helper for the following. Get rid of [r+r] memory refs
7853 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
7854
7855 static void
7856 rs6000_eliminate_indexed_memrefs (rtx operands[2])
7857 {
7858 if (reload_in_progress)
7859 return;
7860
7861 if (GET_CODE (operands[0]) == MEM
7862 && GET_CODE (XEXP (operands[0], 0)) != REG
7863 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
7864 GET_MODE (operands[0]), false))
7865 operands[0]
7866 = replace_equiv_address (operands[0],
7867 copy_addr_to_reg (XEXP (operands[0], 0)));
7868
7869 if (GET_CODE (operands[1]) == MEM
7870 && GET_CODE (XEXP (operands[1], 0)) != REG
7871 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
7872 GET_MODE (operands[1]), false))
7873 operands[1]
7874 = replace_equiv_address (operands[1],
7875 copy_addr_to_reg (XEXP (operands[1], 0)));
7876 }
7877
7878 /* Generate a vector of constants to permute MODE for a little-endian
7879 storage operation by swapping the two halves of a vector. */
7880 static rtvec
7881 rs6000_const_vec (enum machine_mode mode)
7882 {
7883 int i, subparts;
7884 rtvec v;
7885
7886 switch (mode)
7887 {
7888 case V2DFmode:
7889 case V2DImode:
7890 subparts = 2;
7891 break;
7892 case V4SFmode:
7893 case V4SImode:
7894 subparts = 4;
7895 break;
7896 case V8HImode:
7897 subparts = 8;
7898 break;
7899 case V16QImode:
7900 subparts = 16;
7901 break;
7902 default:
7903 gcc_unreachable();
7904 }
7905
7906 v = rtvec_alloc (subparts);
7907
7908 for (i = 0; i < subparts / 2; ++i)
7909 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
7910 for (i = subparts / 2; i < subparts; ++i)
7911 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
7912
7913 return v;
7914 }
7915
7916 /* Generate a permute rtx that represents an lxvd2x, stxvd2x, or xxpermdi
7917 for a VSX load or store operation. */
7918 rtx
7919 rs6000_gen_le_vsx_permute (rtx source, enum machine_mode mode)
7920 {
7921 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
7922 return gen_rtx_VEC_SELECT (mode, source, par);
7923 }
7924
7925 /* Emit a little-endian load from vector memory location SOURCE to VSX
7926 register DEST in mode MODE. The load is done with two permuting
7927 insn's that represent an lxvd2x and xxpermdi. */
7928 void
7929 rs6000_emit_le_vsx_load (rtx dest, rtx source, enum machine_mode mode)
7930 {
7931 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
7932 rtx permute_mem = rs6000_gen_le_vsx_permute (source, mode);
7933 rtx permute_reg = rs6000_gen_le_vsx_permute (tmp, mode);
7934 emit_insn (gen_rtx_SET (VOIDmode, tmp, permute_mem));
7935 emit_insn (gen_rtx_SET (VOIDmode, dest, permute_reg));
7936 }
7937
7938 /* Emit a little-endian store to vector memory location DEST from VSX
7939 register SOURCE in mode MODE. The store is done with two permuting
7940 insn's that represent an xxpermdi and an stxvd2x. */
7941 void
7942 rs6000_emit_le_vsx_store (rtx dest, rtx source, enum machine_mode mode)
7943 {
7944 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
7945 rtx permute_src = rs6000_gen_le_vsx_permute (source, mode);
7946 rtx permute_tmp = rs6000_gen_le_vsx_permute (tmp, mode);
7947 emit_insn (gen_rtx_SET (VOIDmode, tmp, permute_src));
7948 emit_insn (gen_rtx_SET (VOIDmode, dest, permute_tmp));
7949 }
7950
7951 /* Emit a sequence representing a little-endian VSX load or store,
7952 moving data from SOURCE to DEST in mode MODE. This is done
7953 separately from rs6000_emit_move to ensure it is called only
7954 during expand. LE VSX loads and stores introduced later are
7955 handled with a split. The expand-time RTL generation allows
7956 us to optimize away redundant pairs of register-permutes. */
7957 void
7958 rs6000_emit_le_vsx_move (rtx dest, rtx source, enum machine_mode mode)
7959 {
7960 gcc_assert (!BYTES_BIG_ENDIAN
7961 && VECTOR_MEM_VSX_P (mode)
7962 && mode != TImode
7963 && (MEM_P (source) ^ MEM_P (dest)));
7964
7965 if (MEM_P (source))
7966 {
7967 gcc_assert (REG_P (dest));
7968 rs6000_emit_le_vsx_load (dest, source, mode);
7969 }
7970 else
7971 {
7972 if (!REG_P (source))
7973 source = force_reg (mode, source);
7974 rs6000_emit_le_vsx_store (dest, source, mode);
7975 }
7976 }
7977
7978 /* Emit a move from SOURCE to DEST in mode MODE. */
7979 void
7980 rs6000_emit_move (rtx dest, rtx source, enum machine_mode mode)
7981 {
7982 rtx operands[2];
7983 operands[0] = dest;
7984 operands[1] = source;
7985
7986 if (TARGET_DEBUG_ADDR)
7987 {
7988 fprintf (stderr,
7989 "\nrs6000_emit_move: mode = %s, reload_in_progress = %d, "
7990 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
7991 GET_MODE_NAME (mode),
7992 reload_in_progress,
7993 reload_completed,
7994 can_create_pseudo_p ());
7995 debug_rtx (dest);
7996 fprintf (stderr, "source:\n");
7997 debug_rtx (source);
7998 }
7999
8000 /* Sanity checks. Check that we get CONST_DOUBLE only when we should. */
8001 if (GET_CODE (operands[1]) == CONST_DOUBLE
8002 && ! FLOAT_MODE_P (mode)
8003 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
8004 {
8005 /* FIXME. This should never happen. */
8006 /* Since it seems that it does, do the safe thing and convert
8007 to a CONST_INT. */
8008 operands[1] = gen_int_mode (CONST_DOUBLE_LOW (operands[1]), mode);
8009 }
8010 gcc_assert (GET_CODE (operands[1]) != CONST_DOUBLE
8011 || FLOAT_MODE_P (mode)
8012 || ((CONST_DOUBLE_HIGH (operands[1]) != 0
8013 || CONST_DOUBLE_LOW (operands[1]) < 0)
8014 && (CONST_DOUBLE_HIGH (operands[1]) != -1
8015 || CONST_DOUBLE_LOW (operands[1]) >= 0)));
8016
8017 /* Check if GCC is setting up a block move that will end up using FP
8018 registers as temporaries. We must make sure this is acceptable. */
8019 if (GET_CODE (operands[0]) == MEM
8020 && GET_CODE (operands[1]) == MEM
8021 && mode == DImode
8022 && (SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[0]))
8023 || SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[1])))
8024 && ! (SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[0]) > 32
8025 ? 32 : MEM_ALIGN (operands[0])))
8026 || SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[1]) > 32
8027 ? 32
8028 : MEM_ALIGN (operands[1]))))
8029 && ! MEM_VOLATILE_P (operands [0])
8030 && ! MEM_VOLATILE_P (operands [1]))
8031 {
8032 emit_move_insn (adjust_address (operands[0], SImode, 0),
8033 adjust_address (operands[1], SImode, 0));
8034 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
8035 adjust_address (copy_rtx (operands[1]), SImode, 4));
8036 return;
8037 }
8038
8039 if (can_create_pseudo_p () && GET_CODE (operands[0]) == MEM
8040 && !gpc_reg_operand (operands[1], mode))
8041 operands[1] = force_reg (mode, operands[1]);
8042
8043 /* Recognize the case where operand[1] is a reference to thread-local
8044 data and load its address to a register. */
8045 if (rs6000_tls_referenced_p (operands[1]))
8046 {
8047 enum tls_model model;
8048 rtx tmp = operands[1];
8049 rtx addend = NULL;
8050
8051 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
8052 {
8053 addend = XEXP (XEXP (tmp, 0), 1);
8054 tmp = XEXP (XEXP (tmp, 0), 0);
8055 }
8056
8057 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
8058 model = SYMBOL_REF_TLS_MODEL (tmp);
8059 gcc_assert (model != 0);
8060
8061 tmp = rs6000_legitimize_tls_address (tmp, model);
8062 if (addend)
8063 {
8064 tmp = gen_rtx_PLUS (mode, tmp, addend);
8065 tmp = force_operand (tmp, operands[0]);
8066 }
8067 operands[1] = tmp;
8068 }
8069
8070 /* Handle the case where reload calls us with an invalid address. */
8071 if (reload_in_progress && mode == Pmode
8072 && (! general_operand (operands[1], mode)
8073 || ! nonimmediate_operand (operands[0], mode)))
8074 goto emit_set;
8075
8076 /* 128-bit constant floating-point values on Darwin should really be
8077 loaded as two parts. */
8078 if (!TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128
8079 && mode == TFmode && GET_CODE (operands[1]) == CONST_DOUBLE)
8080 {
8081 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
8082 simplify_gen_subreg (DFmode, operands[1], mode, 0),
8083 DFmode);
8084 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
8085 GET_MODE_SIZE (DFmode)),
8086 simplify_gen_subreg (DFmode, operands[1], mode,
8087 GET_MODE_SIZE (DFmode)),
8088 DFmode);
8089 return;
8090 }
8091
8092 if (reload_in_progress && cfun->machine->sdmode_stack_slot != NULL_RTX)
8093 cfun->machine->sdmode_stack_slot =
8094 eliminate_regs (cfun->machine->sdmode_stack_slot, VOIDmode, NULL_RTX);
8095
8096
8097 if (lra_in_progress
8098 && mode == SDmode
8099 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
8100 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
8101 && (REG_P (operands[1])
8102 || (GET_CODE (operands[1]) == SUBREG
8103 && REG_P (SUBREG_REG (operands[1])))))
8104 {
8105 int regno = REGNO (GET_CODE (operands[1]) == SUBREG
8106 ? SUBREG_REG (operands[1]) : operands[1]);
8107 enum reg_class cl;
8108
8109 if (regno >= FIRST_PSEUDO_REGISTER)
8110 {
8111 cl = reg_preferred_class (regno);
8112 gcc_assert (cl != NO_REGS);
8113 regno = ira_class_hard_regs[cl][0];
8114 }
8115 if (FP_REGNO_P (regno))
8116 {
8117 if (GET_MODE (operands[0]) != DDmode)
8118 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
8119 emit_insn (gen_movsd_store (operands[0], operands[1]));
8120 }
8121 else if (INT_REGNO_P (regno))
8122 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
8123 else
8124 gcc_unreachable();
8125 return;
8126 }
8127 if (lra_in_progress
8128 && mode == SDmode
8129 && (REG_P (operands[0])
8130 || (GET_CODE (operands[0]) == SUBREG
8131 && REG_P (SUBREG_REG (operands[0]))))
8132 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
8133 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
8134 {
8135 int regno = REGNO (GET_CODE (operands[0]) == SUBREG
8136 ? SUBREG_REG (operands[0]) : operands[0]);
8137 enum reg_class cl;
8138
8139 if (regno >= FIRST_PSEUDO_REGISTER)
8140 {
8141 cl = reg_preferred_class (regno);
8142 gcc_assert (cl != NO_REGS);
8143 regno = ira_class_hard_regs[cl][0];
8144 }
8145 if (FP_REGNO_P (regno))
8146 {
8147 if (GET_MODE (operands[1]) != DDmode)
8148 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
8149 emit_insn (gen_movsd_load (operands[0], operands[1]));
8150 }
8151 else if (INT_REGNO_P (regno))
8152 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
8153 else
8154 gcc_unreachable();
8155 return;
8156 }
8157
8158 if (reload_in_progress
8159 && mode == SDmode
8160 && cfun->machine->sdmode_stack_slot != NULL_RTX
8161 && MEM_P (operands[0])
8162 && rtx_equal_p (operands[0], cfun->machine->sdmode_stack_slot)
8163 && REG_P (operands[1]))
8164 {
8165 if (FP_REGNO_P (REGNO (operands[1])))
8166 {
8167 rtx mem = adjust_address_nv (operands[0], DDmode, 0);
8168 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
8169 emit_insn (gen_movsd_store (mem, operands[1]));
8170 }
8171 else if (INT_REGNO_P (REGNO (operands[1])))
8172 {
8173 rtx mem = adjust_address_nv (operands[0], mode, 4);
8174 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
8175 emit_insn (gen_movsd_hardfloat (mem, operands[1]));
8176 }
8177 else
8178 gcc_unreachable();
8179 return;
8180 }
8181 if (reload_in_progress
8182 && mode == SDmode
8183 && REG_P (operands[0])
8184 && MEM_P (operands[1])
8185 && cfun->machine->sdmode_stack_slot != NULL_RTX
8186 && rtx_equal_p (operands[1], cfun->machine->sdmode_stack_slot))
8187 {
8188 if (FP_REGNO_P (REGNO (operands[0])))
8189 {
8190 rtx mem = adjust_address_nv (operands[1], DDmode, 0);
8191 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
8192 emit_insn (gen_movsd_load (operands[0], mem));
8193 }
8194 else if (INT_REGNO_P (REGNO (operands[0])))
8195 {
8196 rtx mem = adjust_address_nv (operands[1], mode, 4);
8197 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
8198 emit_insn (gen_movsd_hardfloat (operands[0], mem));
8199 }
8200 else
8201 gcc_unreachable();
8202 return;
8203 }
8204
8205 /* FIXME: In the long term, this switch statement should go away
8206 and be replaced by a sequence of tests based on things like
8207 mode == Pmode. */
8208 switch (mode)
8209 {
8210 case HImode:
8211 case QImode:
8212 if (CONSTANT_P (operands[1])
8213 && GET_CODE (operands[1]) != CONST_INT)
8214 operands[1] = force_const_mem (mode, operands[1]);
8215 break;
8216
8217 case TFmode:
8218 case TDmode:
8219 rs6000_eliminate_indexed_memrefs (operands);
8220 /* fall through */
8221
8222 case DFmode:
8223 case DDmode:
8224 case SFmode:
8225 case SDmode:
8226 if (CONSTANT_P (operands[1])
8227 && ! easy_fp_constant (operands[1], mode))
8228 operands[1] = force_const_mem (mode, operands[1]);
8229 break;
8230
8231 case V16QImode:
8232 case V8HImode:
8233 case V4SFmode:
8234 case V4SImode:
8235 case V4HImode:
8236 case V2SFmode:
8237 case V2SImode:
8238 case V1DImode:
8239 case V2DFmode:
8240 case V2DImode:
8241 if (CONSTANT_P (operands[1])
8242 && !easy_vector_constant (operands[1], mode))
8243 operands[1] = force_const_mem (mode, operands[1]);
8244 break;
8245
8246 case SImode:
8247 case DImode:
8248 /* Use default pattern for address of ELF small data */
8249 if (TARGET_ELF
8250 && mode == Pmode
8251 && DEFAULT_ABI == ABI_V4
8252 && (GET_CODE (operands[1]) == SYMBOL_REF
8253 || GET_CODE (operands[1]) == CONST)
8254 && small_data_operand (operands[1], mode))
8255 {
8256 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8257 return;
8258 }
8259
8260 if (DEFAULT_ABI == ABI_V4
8261 && mode == Pmode && mode == SImode
8262 && flag_pic == 1 && got_operand (operands[1], mode))
8263 {
8264 emit_insn (gen_movsi_got (operands[0], operands[1]));
8265 return;
8266 }
8267
8268 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
8269 && TARGET_NO_TOC
8270 && ! flag_pic
8271 && mode == Pmode
8272 && CONSTANT_P (operands[1])
8273 && GET_CODE (operands[1]) != HIGH
8274 && GET_CODE (operands[1]) != CONST_INT)
8275 {
8276 rtx target = (!can_create_pseudo_p ()
8277 ? operands[0]
8278 : gen_reg_rtx (mode));
8279
8280 /* If this is a function address on -mcall-aixdesc,
8281 convert it to the address of the descriptor. */
8282 if (DEFAULT_ABI == ABI_AIX
8283 && GET_CODE (operands[1]) == SYMBOL_REF
8284 && XSTR (operands[1], 0)[0] == '.')
8285 {
8286 const char *name = XSTR (operands[1], 0);
8287 rtx new_ref;
8288 while (*name == '.')
8289 name++;
8290 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
8291 CONSTANT_POOL_ADDRESS_P (new_ref)
8292 = CONSTANT_POOL_ADDRESS_P (operands[1]);
8293 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
8294 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
8295 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
8296 operands[1] = new_ref;
8297 }
8298
8299 if (DEFAULT_ABI == ABI_DARWIN)
8300 {
8301 #if TARGET_MACHO
8302 if (MACHO_DYNAMIC_NO_PIC_P)
8303 {
8304 /* Take care of any required data indirection. */
8305 operands[1] = rs6000_machopic_legitimize_pic_address (
8306 operands[1], mode, operands[0]);
8307 if (operands[0] != operands[1])
8308 emit_insn (gen_rtx_SET (VOIDmode,
8309 operands[0], operands[1]));
8310 return;
8311 }
8312 #endif
8313 emit_insn (gen_macho_high (target, operands[1]));
8314 emit_insn (gen_macho_low (operands[0], target, operands[1]));
8315 return;
8316 }
8317
8318 emit_insn (gen_elf_high (target, operands[1]));
8319 emit_insn (gen_elf_low (operands[0], target, operands[1]));
8320 return;
8321 }
8322
8323 /* If this is a SYMBOL_REF that refers to a constant pool entry,
8324 and we have put it in the TOC, we just need to make a TOC-relative
8325 reference to it. */
8326 if (TARGET_TOC
8327 && GET_CODE (operands[1]) == SYMBOL_REF
8328 && use_toc_relative_ref (operands[1]))
8329 operands[1] = create_TOC_reference (operands[1], operands[0]);
8330 else if (mode == Pmode
8331 && CONSTANT_P (operands[1])
8332 && GET_CODE (operands[1]) != HIGH
8333 && ((GET_CODE (operands[1]) != CONST_INT
8334 && ! easy_fp_constant (operands[1], mode))
8335 || (GET_CODE (operands[1]) == CONST_INT
8336 && (num_insns_constant (operands[1], mode)
8337 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
8338 || (GET_CODE (operands[0]) == REG
8339 && FP_REGNO_P (REGNO (operands[0]))))
8340 && !toc_relative_expr_p (operands[1], false)
8341 && (TARGET_CMODEL == CMODEL_SMALL
8342 || can_create_pseudo_p ()
8343 || (REG_P (operands[0])
8344 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
8345 {
8346
8347 #if TARGET_MACHO
8348 /* Darwin uses a special PIC legitimizer. */
8349 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
8350 {
8351 operands[1] =
8352 rs6000_machopic_legitimize_pic_address (operands[1], mode,
8353 operands[0]);
8354 if (operands[0] != operands[1])
8355 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8356 return;
8357 }
8358 #endif
8359
8360 /* If we are to limit the number of things we put in the TOC and
8361 this is a symbol plus a constant we can add in one insn,
8362 just put the symbol in the TOC and add the constant. Don't do
8363 this if reload is in progress. */
8364 if (GET_CODE (operands[1]) == CONST
8365 && TARGET_NO_SUM_IN_TOC && ! reload_in_progress
8366 && GET_CODE (XEXP (operands[1], 0)) == PLUS
8367 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
8368 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
8369 || GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == SYMBOL_REF)
8370 && ! side_effects_p (operands[0]))
8371 {
8372 rtx sym =
8373 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
8374 rtx other = XEXP (XEXP (operands[1], 0), 1);
8375
8376 sym = force_reg (mode, sym);
8377 emit_insn (gen_add3_insn (operands[0], sym, other));
8378 return;
8379 }
8380
8381 operands[1] = force_const_mem (mode, operands[1]);
8382
8383 if (TARGET_TOC
8384 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
8385 && constant_pool_expr_p (XEXP (operands[1], 0))
8386 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (
8387 get_pool_constant (XEXP (operands[1], 0)),
8388 get_pool_mode (XEXP (operands[1], 0))))
8389 {
8390 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
8391 operands[0]);
8392 operands[1] = gen_const_mem (mode, tocref);
8393 set_mem_alias_set (operands[1], get_TOC_alias_set ());
8394 }
8395 }
8396 break;
8397
8398 case TImode:
8399 if (!VECTOR_MEM_VSX_P (TImode))
8400 rs6000_eliminate_indexed_memrefs (operands);
8401 break;
8402
8403 case PTImode:
8404 rs6000_eliminate_indexed_memrefs (operands);
8405 break;
8406
8407 default:
8408 fatal_insn ("bad move", gen_rtx_SET (VOIDmode, dest, source));
8409 }
8410
8411 /* Above, we may have called force_const_mem which may have returned
8412 an invalid address. If we can, fix this up; otherwise, reload will
8413 have to deal with it. */
8414 if (GET_CODE (operands[1]) == MEM && ! reload_in_progress)
8415 operands[1] = validize_mem (operands[1]);
8416
8417 emit_set:
8418 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8419 }
8420
8421 /* Return true if a structure, union or array containing FIELD should be
8422 accessed using `BLKMODE'.
8423
8424 For the SPE, simd types are V2SI, and gcc can be tempted to put the
8425 entire thing in a DI and use subregs to access the internals.
8426 store_bit_field() will force (subreg:DI (reg:V2SI x))'s to the
8427 back-end. Because a single GPR can hold a V2SI, but not a DI, the
8428 best thing to do is set structs to BLKmode and avoid Severe Tire
8429 Damage.
8430
8431 On e500 v2, DF and DI modes suffer from the same anomaly. DF can
8432 fit into 1, whereas DI still needs two. */
8433
8434 static bool
8435 rs6000_member_type_forces_blk (const_tree field, enum machine_mode mode)
8436 {
8437 return ((TARGET_SPE && TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
8438 || (TARGET_E500_DOUBLE && mode == DFmode));
8439 }
8440 \f
8441 /* Nonzero if we can use a floating-point register to pass this arg. */
8442 #define USE_FP_FOR_ARG_P(CUM,MODE,TYPE) \
8443 (SCALAR_FLOAT_MODE_P (MODE) \
8444 && (CUM)->fregno <= FP_ARG_MAX_REG \
8445 && TARGET_HARD_FLOAT && TARGET_FPRS)
8446
8447 /* Nonzero if we can use an AltiVec register to pass this arg. */
8448 #define USE_ALTIVEC_FOR_ARG_P(CUM,MODE,TYPE,NAMED) \
8449 (ALTIVEC_OR_VSX_VECTOR_MODE (MODE) \
8450 && (CUM)->vregno <= ALTIVEC_ARG_MAX_REG \
8451 && TARGET_ALTIVEC_ABI \
8452 && (NAMED))
8453
8454 /* Return a nonzero value to say to return the function value in
8455 memory, just as large structures are always returned. TYPE will be
8456 the data type of the value, and FNTYPE will be the type of the
8457 function doing the returning, or @code{NULL} for libcalls.
8458
8459 The AIX ABI for the RS/6000 specifies that all structures are
8460 returned in memory. The Darwin ABI does the same.
8461
8462 For the Darwin 64 Bit ABI, a function result can be returned in
8463 registers or in memory, depending on the size of the return data
8464 type. If it is returned in registers, the value occupies the same
8465 registers as it would if it were the first and only function
8466 argument. Otherwise, the function places its result in memory at
8467 the location pointed to by GPR3.
8468
8469 The SVR4 ABI specifies that structures <= 8 bytes are returned in r3/r4,
8470 but a draft put them in memory, and GCC used to implement the draft
8471 instead of the final standard. Therefore, aix_struct_return
8472 controls this instead of DEFAULT_ABI; V.4 targets needing backward
8473 compatibility can change DRAFT_V4_STRUCT_RET to override the
8474 default, and -m switches get the final word. See
8475 rs6000_option_override_internal for more details.
8476
8477 The PPC32 SVR4 ABI uses IEEE double extended for long double, if 128-bit
8478 long double support is enabled. These values are returned in memory.
8479
8480 int_size_in_bytes returns -1 for variable size objects, which go in
8481 memory always. The cast to unsigned makes -1 > 8. */
8482
8483 static bool
8484 rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
8485 {
8486 /* For the Darwin64 ABI, test if we can fit the return value in regs. */
8487 if (TARGET_MACHO
8488 && rs6000_darwin64_abi
8489 && TREE_CODE (type) == RECORD_TYPE
8490 && int_size_in_bytes (type) > 0)
8491 {
8492 CUMULATIVE_ARGS valcum;
8493 rtx valret;
8494
8495 valcum.words = 0;
8496 valcum.fregno = FP_ARG_MIN_REG;
8497 valcum.vregno = ALTIVEC_ARG_MIN_REG;
8498 /* Do a trial code generation as if this were going to be passed
8499 as an argument; if any part goes in memory, we return NULL. */
8500 valret = rs6000_darwin64_record_arg (&valcum, type, true, true);
8501 if (valret)
8502 return false;
8503 /* Otherwise fall through to more conventional ABI rules. */
8504 }
8505
8506 if (AGGREGATE_TYPE_P (type)
8507 && (aix_struct_return
8508 || (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8))
8509 return true;
8510
8511 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
8512 modes only exist for GCC vector types if -maltivec. */
8513 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI
8514 && ALTIVEC_VECTOR_MODE (TYPE_MODE (type)))
8515 return false;
8516
8517 /* Return synthetic vectors in memory. */
8518 if (TREE_CODE (type) == VECTOR_TYPE
8519 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
8520 {
8521 static bool warned_for_return_big_vectors = false;
8522 if (!warned_for_return_big_vectors)
8523 {
8524 warning (0, "GCC vector returned by reference: "
8525 "non-standard ABI extension with no compatibility guarantee");
8526 warned_for_return_big_vectors = true;
8527 }
8528 return true;
8529 }
8530
8531 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD && TYPE_MODE (type) == TFmode)
8532 return true;
8533
8534 return false;
8535 }
8536
8537 #ifdef HAVE_AS_GNU_ATTRIBUTE
8538 /* Return TRUE if a call to function FNDECL may be one that
8539 potentially affects the function calling ABI of the object file. */
8540
8541 static bool
8542 call_ABI_of_interest (tree fndecl)
8543 {
8544 if (cgraph_state == CGRAPH_STATE_EXPANSION)
8545 {
8546 struct cgraph_node *c_node;
8547
8548 /* Libcalls are always interesting. */
8549 if (fndecl == NULL_TREE)
8550 return true;
8551
8552 /* Any call to an external function is interesting. */
8553 if (DECL_EXTERNAL (fndecl))
8554 return true;
8555
8556 /* Interesting functions that we are emitting in this object file. */
8557 c_node = cgraph_get_node (fndecl);
8558 c_node = cgraph_function_or_thunk_node (c_node, NULL);
8559 return !cgraph_only_called_directly_p (c_node);
8560 }
8561 return false;
8562 }
8563 #endif
8564
8565 /* Initialize a variable CUM of type CUMULATIVE_ARGS
8566 for a call to a function whose data type is FNTYPE.
8567 For a library call, FNTYPE is 0 and RETURN_MODE the return value mode.
8568
8569 For incoming args we set the number of arguments in the prototype large
8570 so we never return a PARALLEL. */
8571
8572 void
8573 init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype,
8574 rtx libname ATTRIBUTE_UNUSED, int incoming,
8575 int libcall, int n_named_args,
8576 tree fndecl ATTRIBUTE_UNUSED,
8577 enum machine_mode return_mode ATTRIBUTE_UNUSED)
8578 {
8579 static CUMULATIVE_ARGS zero_cumulative;
8580
8581 *cum = zero_cumulative;
8582 cum->words = 0;
8583 cum->fregno = FP_ARG_MIN_REG;
8584 cum->vregno = ALTIVEC_ARG_MIN_REG;
8585 cum->prototype = (fntype && prototype_p (fntype));
8586 cum->call_cookie = ((DEFAULT_ABI == ABI_V4 && libcall)
8587 ? CALL_LIBCALL : CALL_NORMAL);
8588 cum->sysv_gregno = GP_ARG_MIN_REG;
8589 cum->stdarg = stdarg_p (fntype);
8590
8591 cum->nargs_prototype = 0;
8592 if (incoming || cum->prototype)
8593 cum->nargs_prototype = n_named_args;
8594
8595 /* Check for a longcall attribute. */
8596 if ((!fntype && rs6000_default_long_calls)
8597 || (fntype
8598 && lookup_attribute ("longcall", TYPE_ATTRIBUTES (fntype))
8599 && !lookup_attribute ("shortcall", TYPE_ATTRIBUTES (fntype))))
8600 cum->call_cookie |= CALL_LONG;
8601
8602 if (TARGET_DEBUG_ARG)
8603 {
8604 fprintf (stderr, "\ninit_cumulative_args:");
8605 if (fntype)
8606 {
8607 tree ret_type = TREE_TYPE (fntype);
8608 fprintf (stderr, " ret code = %s,",
8609 get_tree_code_name (TREE_CODE (ret_type)));
8610 }
8611
8612 if (cum->call_cookie & CALL_LONG)
8613 fprintf (stderr, " longcall,");
8614
8615 fprintf (stderr, " proto = %d, nargs = %d\n",
8616 cum->prototype, cum->nargs_prototype);
8617 }
8618
8619 #ifdef HAVE_AS_GNU_ATTRIBUTE
8620 if (DEFAULT_ABI == ABI_V4)
8621 {
8622 cum->escapes = call_ABI_of_interest (fndecl);
8623 if (cum->escapes)
8624 {
8625 tree return_type;
8626
8627 if (fntype)
8628 {
8629 return_type = TREE_TYPE (fntype);
8630 return_mode = TYPE_MODE (return_type);
8631 }
8632 else
8633 return_type = lang_hooks.types.type_for_mode (return_mode, 0);
8634
8635 if (return_type != NULL)
8636 {
8637 if (TREE_CODE (return_type) == RECORD_TYPE
8638 && TYPE_TRANSPARENT_AGGR (return_type))
8639 {
8640 return_type = TREE_TYPE (first_field (return_type));
8641 return_mode = TYPE_MODE (return_type);
8642 }
8643 if (AGGREGATE_TYPE_P (return_type)
8644 && ((unsigned HOST_WIDE_INT) int_size_in_bytes (return_type)
8645 <= 8))
8646 rs6000_returns_struct = true;
8647 }
8648 if (SCALAR_FLOAT_MODE_P (return_mode))
8649 rs6000_passes_float = true;
8650 else if (ALTIVEC_OR_VSX_VECTOR_MODE (return_mode)
8651 || SPE_VECTOR_MODE (return_mode))
8652 rs6000_passes_vector = true;
8653 }
8654 }
8655 #endif
8656
8657 if (fntype
8658 && !TARGET_ALTIVEC
8659 && TARGET_ALTIVEC_ABI
8660 && ALTIVEC_VECTOR_MODE (TYPE_MODE (TREE_TYPE (fntype))))
8661 {
8662 error ("cannot return value in vector register because"
8663 " altivec instructions are disabled, use -maltivec"
8664 " to enable them");
8665 }
8666 }
8667 \f
8668 /* Return true if TYPE must be passed on the stack and not in registers. */
8669
8670 static bool
8671 rs6000_must_pass_in_stack (enum machine_mode mode, const_tree type)
8672 {
8673 if (DEFAULT_ABI == ABI_AIX || TARGET_64BIT)
8674 return must_pass_in_stack_var_size (mode, type);
8675 else
8676 return must_pass_in_stack_var_size_or_pad (mode, type);
8677 }
8678
8679 /* If defined, a C expression which determines whether, and in which
8680 direction, to pad out an argument with extra space. The value
8681 should be of type `enum direction': either `upward' to pad above
8682 the argument, `downward' to pad below, or `none' to inhibit
8683 padding.
8684
8685 For the AIX ABI structs are always stored left shifted in their
8686 argument slot. */
8687
8688 enum direction
8689 function_arg_padding (enum machine_mode mode, const_tree type)
8690 {
8691 #ifndef AGGREGATE_PADDING_FIXED
8692 #define AGGREGATE_PADDING_FIXED 0
8693 #endif
8694 #ifndef AGGREGATES_PAD_UPWARD_ALWAYS
8695 #define AGGREGATES_PAD_UPWARD_ALWAYS 0
8696 #endif
8697
8698 if (!AGGREGATE_PADDING_FIXED)
8699 {
8700 /* GCC used to pass structures of the same size as integer types as
8701 if they were in fact integers, ignoring FUNCTION_ARG_PADDING.
8702 i.e. Structures of size 1 or 2 (or 4 when TARGET_64BIT) were
8703 passed padded downward, except that -mstrict-align further
8704 muddied the water in that multi-component structures of 2 and 4
8705 bytes in size were passed padded upward.
8706
8707 The following arranges for best compatibility with previous
8708 versions of gcc, but removes the -mstrict-align dependency. */
8709 if (BYTES_BIG_ENDIAN)
8710 {
8711 HOST_WIDE_INT size = 0;
8712
8713 if (mode == BLKmode)
8714 {
8715 if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
8716 size = int_size_in_bytes (type);
8717 }
8718 else
8719 size = GET_MODE_SIZE (mode);
8720
8721 if (size == 1 || size == 2 || size == 4)
8722 return downward;
8723 }
8724 return upward;
8725 }
8726
8727 if (AGGREGATES_PAD_UPWARD_ALWAYS)
8728 {
8729 if (type != 0 && AGGREGATE_TYPE_P (type))
8730 return upward;
8731 }
8732
8733 /* Fall back to the default. */
8734 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
8735 }
8736
8737 /* If defined, a C expression that gives the alignment boundary, in bits,
8738 of an argument with the specified mode and type. If it is not defined,
8739 PARM_BOUNDARY is used for all arguments.
8740
8741 V.4 wants long longs and doubles to be double word aligned. Just
8742 testing the mode size is a boneheaded way to do this as it means
8743 that other types such as complex int are also double word aligned.
8744 However, we're stuck with this because changing the ABI might break
8745 existing library interfaces.
8746
8747 Doubleword align SPE vectors.
8748 Quadword align Altivec/VSX vectors.
8749 Quadword align large synthetic vector types. */
8750
8751 static unsigned int
8752 rs6000_function_arg_boundary (enum machine_mode mode, const_tree type)
8753 {
8754 if (DEFAULT_ABI == ABI_V4
8755 && (GET_MODE_SIZE (mode) == 8
8756 || (TARGET_HARD_FLOAT
8757 && TARGET_FPRS
8758 && (mode == TFmode || mode == TDmode))))
8759 return 64;
8760 else if (SPE_VECTOR_MODE (mode)
8761 || (type && TREE_CODE (type) == VECTOR_TYPE
8762 && int_size_in_bytes (type) >= 8
8763 && int_size_in_bytes (type) < 16))
8764 return 64;
8765 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
8766 || (type && TREE_CODE (type) == VECTOR_TYPE
8767 && int_size_in_bytes (type) >= 16))
8768 return 128;
8769 else if (((TARGET_MACHO && rs6000_darwin64_abi)
8770 || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm))
8771 && mode == BLKmode
8772 && type && TYPE_ALIGN (type) > 64)
8773 return 128;
8774 else
8775 return PARM_BOUNDARY;
8776 }
8777
8778 /* For a function parm of MODE and TYPE, return the starting word in
8779 the parameter area. NWORDS of the parameter area are already used. */
8780
8781 static unsigned int
8782 rs6000_parm_start (enum machine_mode mode, const_tree type,
8783 unsigned int nwords)
8784 {
8785 unsigned int align;
8786 unsigned int parm_offset;
8787
8788 align = rs6000_function_arg_boundary (mode, type) / PARM_BOUNDARY - 1;
8789 parm_offset = DEFAULT_ABI == ABI_V4 ? 2 : 6;
8790 return nwords + (-(parm_offset + nwords) & align);
8791 }
8792
8793 /* Compute the size (in words) of a function argument. */
8794
8795 static unsigned long
8796 rs6000_arg_size (enum machine_mode mode, const_tree type)
8797 {
8798 unsigned long size;
8799
8800 if (mode != BLKmode)
8801 size = GET_MODE_SIZE (mode);
8802 else
8803 size = int_size_in_bytes (type);
8804
8805 if (TARGET_32BIT)
8806 return (size + 3) >> 2;
8807 else
8808 return (size + 7) >> 3;
8809 }
8810 \f
8811 /* Use this to flush pending int fields. */
8812
8813 static void
8814 rs6000_darwin64_record_arg_advance_flush (CUMULATIVE_ARGS *cum,
8815 HOST_WIDE_INT bitpos, int final)
8816 {
8817 unsigned int startbit, endbit;
8818 int intregs, intoffset;
8819 enum machine_mode mode;
8820
8821 /* Handle the situations where a float is taking up the first half
8822 of the GPR, and the other half is empty (typically due to
8823 alignment restrictions). We can detect this by a 8-byte-aligned
8824 int field, or by seeing that this is the final flush for this
8825 argument. Count the word and continue on. */
8826 if (cum->floats_in_gpr == 1
8827 && (cum->intoffset % 64 == 0
8828 || (cum->intoffset == -1 && final)))
8829 {
8830 cum->words++;
8831 cum->floats_in_gpr = 0;
8832 }
8833
8834 if (cum->intoffset == -1)
8835 return;
8836
8837 intoffset = cum->intoffset;
8838 cum->intoffset = -1;
8839 cum->floats_in_gpr = 0;
8840
8841 if (intoffset % BITS_PER_WORD != 0)
8842 {
8843 mode = mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
8844 MODE_INT, 0);
8845 if (mode == BLKmode)
8846 {
8847 /* We couldn't find an appropriate mode, which happens,
8848 e.g., in packed structs when there are 3 bytes to load.
8849 Back intoffset back to the beginning of the word in this
8850 case. */
8851 intoffset = intoffset & -BITS_PER_WORD;
8852 }
8853 }
8854
8855 startbit = intoffset & -BITS_PER_WORD;
8856 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
8857 intregs = (endbit - startbit) / BITS_PER_WORD;
8858 cum->words += intregs;
8859 /* words should be unsigned. */
8860 if ((unsigned)cum->words < (endbit/BITS_PER_WORD))
8861 {
8862 int pad = (endbit/BITS_PER_WORD) - cum->words;
8863 cum->words += pad;
8864 }
8865 }
8866
8867 /* The darwin64 ABI calls for us to recurse down through structs,
8868 looking for elements passed in registers. Unfortunately, we have
8869 to track int register count here also because of misalignments
8870 in powerpc alignment mode. */
8871
8872 static void
8873 rs6000_darwin64_record_arg_advance_recurse (CUMULATIVE_ARGS *cum,
8874 const_tree type,
8875 HOST_WIDE_INT startbitpos)
8876 {
8877 tree f;
8878
8879 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
8880 if (TREE_CODE (f) == FIELD_DECL)
8881 {
8882 HOST_WIDE_INT bitpos = startbitpos;
8883 tree ftype = TREE_TYPE (f);
8884 enum machine_mode mode;
8885 if (ftype == error_mark_node)
8886 continue;
8887 mode = TYPE_MODE (ftype);
8888
8889 if (DECL_SIZE (f) != 0
8890 && host_integerp (bit_position (f), 1))
8891 bitpos += int_bit_position (f);
8892
8893 /* ??? FIXME: else assume zero offset. */
8894
8895 if (TREE_CODE (ftype) == RECORD_TYPE)
8896 rs6000_darwin64_record_arg_advance_recurse (cum, ftype, bitpos);
8897 else if (USE_FP_FOR_ARG_P (cum, mode, ftype))
8898 {
8899 unsigned n_fpregs = (GET_MODE_SIZE (mode) + 7) >> 3;
8900 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
8901 cum->fregno += n_fpregs;
8902 /* Single-precision floats present a special problem for
8903 us, because they are smaller than an 8-byte GPR, and so
8904 the structure-packing rules combined with the standard
8905 varargs behavior mean that we want to pack float/float
8906 and float/int combinations into a single register's
8907 space. This is complicated by the arg advance flushing,
8908 which works on arbitrarily large groups of int-type
8909 fields. */
8910 if (mode == SFmode)
8911 {
8912 if (cum->floats_in_gpr == 1)
8913 {
8914 /* Two floats in a word; count the word and reset
8915 the float count. */
8916 cum->words++;
8917 cum->floats_in_gpr = 0;
8918 }
8919 else if (bitpos % 64 == 0)
8920 {
8921 /* A float at the beginning of an 8-byte word;
8922 count it and put off adjusting cum->words until
8923 we see if a arg advance flush is going to do it
8924 for us. */
8925 cum->floats_in_gpr++;
8926 }
8927 else
8928 {
8929 /* The float is at the end of a word, preceded
8930 by integer fields, so the arg advance flush
8931 just above has already set cum->words and
8932 everything is taken care of. */
8933 }
8934 }
8935 else
8936 cum->words += n_fpregs;
8937 }
8938 else if (USE_ALTIVEC_FOR_ARG_P (cum, mode, type, 1))
8939 {
8940 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
8941 cum->vregno++;
8942 cum->words += 2;
8943 }
8944 else if (cum->intoffset == -1)
8945 cum->intoffset = bitpos;
8946 }
8947 }
8948
8949 /* Check for an item that needs to be considered specially under the darwin 64
8950 bit ABI. These are record types where the mode is BLK or the structure is
8951 8 bytes in size. */
8952 static int
8953 rs6000_darwin64_struct_check_p (enum machine_mode mode, const_tree type)
8954 {
8955 return rs6000_darwin64_abi
8956 && ((mode == BLKmode
8957 && TREE_CODE (type) == RECORD_TYPE
8958 && int_size_in_bytes (type) > 0)
8959 || (type && TREE_CODE (type) == RECORD_TYPE
8960 && int_size_in_bytes (type) == 8)) ? 1 : 0;
8961 }
8962
8963 /* Update the data in CUM to advance over an argument
8964 of mode MODE and data type TYPE.
8965 (TYPE is null for libcalls where that information may not be available.)
8966
8967 Note that for args passed by reference, function_arg will be called
8968 with MODE and TYPE set to that of the pointer to the arg, not the arg
8969 itself. */
8970
8971 static void
8972 rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
8973 const_tree type, bool named, int depth)
8974 {
8975 /* Only tick off an argument if we're not recursing. */
8976 if (depth == 0)
8977 cum->nargs_prototype--;
8978
8979 #ifdef HAVE_AS_GNU_ATTRIBUTE
8980 if (DEFAULT_ABI == ABI_V4
8981 && cum->escapes)
8982 {
8983 if (SCALAR_FLOAT_MODE_P (mode))
8984 rs6000_passes_float = true;
8985 else if (named && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
8986 rs6000_passes_vector = true;
8987 else if (SPE_VECTOR_MODE (mode)
8988 && !cum->stdarg
8989 && cum->sysv_gregno <= GP_ARG_MAX_REG)
8990 rs6000_passes_vector = true;
8991 }
8992 #endif
8993
8994 if (TARGET_ALTIVEC_ABI
8995 && (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
8996 || (type && TREE_CODE (type) == VECTOR_TYPE
8997 && int_size_in_bytes (type) == 16)))
8998 {
8999 bool stack = false;
9000
9001 if (USE_ALTIVEC_FOR_ARG_P (cum, mode, type, named))
9002 {
9003 cum->vregno++;
9004 if (!TARGET_ALTIVEC)
9005 error ("cannot pass argument in vector register because"
9006 " altivec instructions are disabled, use -maltivec"
9007 " to enable them");
9008
9009 /* PowerPC64 Linux and AIX allocate GPRs for a vector argument
9010 even if it is going to be passed in a vector register.
9011 Darwin does the same for variable-argument functions. */
9012 if ((DEFAULT_ABI == ABI_AIX && TARGET_64BIT)
9013 || (cum->stdarg && DEFAULT_ABI != ABI_V4))
9014 stack = true;
9015 }
9016 else
9017 stack = true;
9018
9019 if (stack)
9020 {
9021 int align;
9022
9023 /* Vector parameters must be 16-byte aligned. This places
9024 them at 2 mod 4 in terms of words in 32-bit mode, since
9025 the parameter save area starts at offset 24 from the
9026 stack. In 64-bit mode, they just have to start on an
9027 even word, since the parameter save area is 16-byte
9028 aligned. Space for GPRs is reserved even if the argument
9029 will be passed in memory. */
9030 if (TARGET_32BIT)
9031 align = (2 - cum->words) & 3;
9032 else
9033 align = cum->words & 1;
9034 cum->words += align + rs6000_arg_size (mode, type);
9035
9036 if (TARGET_DEBUG_ARG)
9037 {
9038 fprintf (stderr, "function_adv: words = %2d, align=%d, ",
9039 cum->words, align);
9040 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s\n",
9041 cum->nargs_prototype, cum->prototype,
9042 GET_MODE_NAME (mode));
9043 }
9044 }
9045 }
9046 else if (TARGET_SPE_ABI && TARGET_SPE && SPE_VECTOR_MODE (mode)
9047 && !cum->stdarg
9048 && cum->sysv_gregno <= GP_ARG_MAX_REG)
9049 cum->sysv_gregno++;
9050
9051 else if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
9052 {
9053 int size = int_size_in_bytes (type);
9054 /* Variable sized types have size == -1 and are
9055 treated as if consisting entirely of ints.
9056 Pad to 16 byte boundary if needed. */
9057 if (TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
9058 && (cum->words % 2) != 0)
9059 cum->words++;
9060 /* For varargs, we can just go up by the size of the struct. */
9061 if (!named)
9062 cum->words += (size + 7) / 8;
9063 else
9064 {
9065 /* It is tempting to say int register count just goes up by
9066 sizeof(type)/8, but this is wrong in a case such as
9067 { int; double; int; } [powerpc alignment]. We have to
9068 grovel through the fields for these too. */
9069 cum->intoffset = 0;
9070 cum->floats_in_gpr = 0;
9071 rs6000_darwin64_record_arg_advance_recurse (cum, type, 0);
9072 rs6000_darwin64_record_arg_advance_flush (cum,
9073 size * BITS_PER_UNIT, 1);
9074 }
9075 if (TARGET_DEBUG_ARG)
9076 {
9077 fprintf (stderr, "function_adv: words = %2d, align=%d, size=%d",
9078 cum->words, TYPE_ALIGN (type), size);
9079 fprintf (stderr,
9080 "nargs = %4d, proto = %d, mode = %4s (darwin64 abi)\n",
9081 cum->nargs_prototype, cum->prototype,
9082 GET_MODE_NAME (mode));
9083 }
9084 }
9085 else if (DEFAULT_ABI == ABI_V4)
9086 {
9087 if (TARGET_HARD_FLOAT && TARGET_FPRS
9088 && ((TARGET_SINGLE_FLOAT && mode == SFmode)
9089 || (TARGET_DOUBLE_FLOAT && mode == DFmode)
9090 || (mode == TFmode && !TARGET_IEEEQUAD)
9091 || mode == SDmode || mode == DDmode || mode == TDmode))
9092 {
9093 /* _Decimal128 must use an even/odd register pair. This assumes
9094 that the register number is odd when fregno is odd. */
9095 if (mode == TDmode && (cum->fregno % 2) == 1)
9096 cum->fregno++;
9097
9098 if (cum->fregno + (mode == TFmode || mode == TDmode ? 1 : 0)
9099 <= FP_ARG_V4_MAX_REG)
9100 cum->fregno += (GET_MODE_SIZE (mode) + 7) >> 3;
9101 else
9102 {
9103 cum->fregno = FP_ARG_V4_MAX_REG + 1;
9104 if (mode == DFmode || mode == TFmode
9105 || mode == DDmode || mode == TDmode)
9106 cum->words += cum->words & 1;
9107 cum->words += rs6000_arg_size (mode, type);
9108 }
9109 }
9110 else
9111 {
9112 int n_words = rs6000_arg_size (mode, type);
9113 int gregno = cum->sysv_gregno;
9114
9115 /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
9116 (r7,r8) or (r9,r10). As does any other 2 word item such
9117 as complex int due to a historical mistake. */
9118 if (n_words == 2)
9119 gregno += (1 - gregno) & 1;
9120
9121 /* Multi-reg args are not split between registers and stack. */
9122 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
9123 {
9124 /* Long long and SPE vectors are aligned on the stack.
9125 So are other 2 word items such as complex int due to
9126 a historical mistake. */
9127 if (n_words == 2)
9128 cum->words += cum->words & 1;
9129 cum->words += n_words;
9130 }
9131
9132 /* Note: continuing to accumulate gregno past when we've started
9133 spilling to the stack indicates the fact that we've started
9134 spilling to the stack to expand_builtin_saveregs. */
9135 cum->sysv_gregno = gregno + n_words;
9136 }
9137
9138 if (TARGET_DEBUG_ARG)
9139 {
9140 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
9141 cum->words, cum->fregno);
9142 fprintf (stderr, "gregno = %2d, nargs = %4d, proto = %d, ",
9143 cum->sysv_gregno, cum->nargs_prototype, cum->prototype);
9144 fprintf (stderr, "mode = %4s, named = %d\n",
9145 GET_MODE_NAME (mode), named);
9146 }
9147 }
9148 else
9149 {
9150 int n_words = rs6000_arg_size (mode, type);
9151 int start_words = cum->words;
9152 int align_words = rs6000_parm_start (mode, type, start_words);
9153
9154 cum->words = align_words + n_words;
9155
9156 if (SCALAR_FLOAT_MODE_P (mode)
9157 && TARGET_HARD_FLOAT && TARGET_FPRS)
9158 {
9159 /* _Decimal128 must be passed in an even/odd float register pair.
9160 This assumes that the register number is odd when fregno is
9161 odd. */
9162 if (mode == TDmode && (cum->fregno % 2) == 1)
9163 cum->fregno++;
9164 cum->fregno += (GET_MODE_SIZE (mode) + 7) >> 3;
9165 }
9166
9167 if (TARGET_DEBUG_ARG)
9168 {
9169 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
9170 cum->words, cum->fregno);
9171 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s, ",
9172 cum->nargs_prototype, cum->prototype, GET_MODE_NAME (mode));
9173 fprintf (stderr, "named = %d, align = %d, depth = %d\n",
9174 named, align_words - start_words, depth);
9175 }
9176 }
9177 }
9178
9179 static void
9180 rs6000_function_arg_advance (cumulative_args_t cum, enum machine_mode mode,
9181 const_tree type, bool named)
9182 {
9183 rs6000_function_arg_advance_1 (get_cumulative_args (cum), mode, type, named,
9184 0);
9185 }
9186
9187 static rtx
9188 spe_build_register_parallel (enum machine_mode mode, int gregno)
9189 {
9190 rtx r1, r3, r5, r7;
9191
9192 switch (mode)
9193 {
9194 case DFmode:
9195 r1 = gen_rtx_REG (DImode, gregno);
9196 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
9197 return gen_rtx_PARALLEL (mode, gen_rtvec (1, r1));
9198
9199 case DCmode:
9200 case TFmode:
9201 r1 = gen_rtx_REG (DImode, gregno);
9202 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
9203 r3 = gen_rtx_REG (DImode, gregno + 2);
9204 r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8));
9205 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r3));
9206
9207 case TCmode:
9208 r1 = gen_rtx_REG (DImode, gregno);
9209 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
9210 r3 = gen_rtx_REG (DImode, gregno + 2);
9211 r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8));
9212 r5 = gen_rtx_REG (DImode, gregno + 4);
9213 r5 = gen_rtx_EXPR_LIST (VOIDmode, r5, GEN_INT (16));
9214 r7 = gen_rtx_REG (DImode, gregno + 6);
9215 r7 = gen_rtx_EXPR_LIST (VOIDmode, r7, GEN_INT (24));
9216 return gen_rtx_PARALLEL (mode, gen_rtvec (4, r1, r3, r5, r7));
9217
9218 default:
9219 gcc_unreachable ();
9220 }
9221 }
9222
9223 /* Determine where to put a SIMD argument on the SPE. */
9224 static rtx
9225 rs6000_spe_function_arg (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
9226 const_tree type)
9227 {
9228 int gregno = cum->sysv_gregno;
9229
9230 /* On E500 v2, double arithmetic is done on the full 64-bit GPR, but
9231 are passed and returned in a pair of GPRs for ABI compatibility. */
9232 if (TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode
9233 || mode == DCmode || mode == TCmode))
9234 {
9235 int n_words = rs6000_arg_size (mode, type);
9236
9237 /* Doubles go in an odd/even register pair (r5/r6, etc). */
9238 if (mode == DFmode)
9239 gregno += (1 - gregno) & 1;
9240
9241 /* Multi-reg args are not split between registers and stack. */
9242 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
9243 return NULL_RTX;
9244
9245 return spe_build_register_parallel (mode, gregno);
9246 }
9247 if (cum->stdarg)
9248 {
9249 int n_words = rs6000_arg_size (mode, type);
9250
9251 /* SPE vectors are put in odd registers. */
9252 if (n_words == 2 && (gregno & 1) == 0)
9253 gregno += 1;
9254
9255 if (gregno + n_words - 1 <= GP_ARG_MAX_REG)
9256 {
9257 rtx r1, r2;
9258 enum machine_mode m = SImode;
9259
9260 r1 = gen_rtx_REG (m, gregno);
9261 r1 = gen_rtx_EXPR_LIST (m, r1, const0_rtx);
9262 r2 = gen_rtx_REG (m, gregno + 1);
9263 r2 = gen_rtx_EXPR_LIST (m, r2, GEN_INT (4));
9264 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
9265 }
9266 else
9267 return NULL_RTX;
9268 }
9269 else
9270 {
9271 if (gregno <= GP_ARG_MAX_REG)
9272 return gen_rtx_REG (mode, gregno);
9273 else
9274 return NULL_RTX;
9275 }
9276 }
9277
9278 /* A subroutine of rs6000_darwin64_record_arg. Assign the bits of the
9279 structure between cum->intoffset and bitpos to integer registers. */
9280
9281 static void
9282 rs6000_darwin64_record_arg_flush (CUMULATIVE_ARGS *cum,
9283 HOST_WIDE_INT bitpos, rtx rvec[], int *k)
9284 {
9285 enum machine_mode mode;
9286 unsigned int regno;
9287 unsigned int startbit, endbit;
9288 int this_regno, intregs, intoffset;
9289 rtx reg;
9290
9291 if (cum->intoffset == -1)
9292 return;
9293
9294 intoffset = cum->intoffset;
9295 cum->intoffset = -1;
9296
9297 /* If this is the trailing part of a word, try to only load that
9298 much into the register. Otherwise load the whole register. Note
9299 that in the latter case we may pick up unwanted bits. It's not a
9300 problem at the moment but may wish to revisit. */
9301
9302 if (intoffset % BITS_PER_WORD != 0)
9303 {
9304 mode = mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
9305 MODE_INT, 0);
9306 if (mode == BLKmode)
9307 {
9308 /* We couldn't find an appropriate mode, which happens,
9309 e.g., in packed structs when there are 3 bytes to load.
9310 Back intoffset back to the beginning of the word in this
9311 case. */
9312 intoffset = intoffset & -BITS_PER_WORD;
9313 mode = word_mode;
9314 }
9315 }
9316 else
9317 mode = word_mode;
9318
9319 startbit = intoffset & -BITS_PER_WORD;
9320 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
9321 intregs = (endbit - startbit) / BITS_PER_WORD;
9322 this_regno = cum->words + intoffset / BITS_PER_WORD;
9323
9324 if (intregs > 0 && intregs > GP_ARG_NUM_REG - this_regno)
9325 cum->use_stack = 1;
9326
9327 intregs = MIN (intregs, GP_ARG_NUM_REG - this_regno);
9328 if (intregs <= 0)
9329 return;
9330
9331 intoffset /= BITS_PER_UNIT;
9332 do
9333 {
9334 regno = GP_ARG_MIN_REG + this_regno;
9335 reg = gen_rtx_REG (mode, regno);
9336 rvec[(*k)++] =
9337 gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
9338
9339 this_regno += 1;
9340 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
9341 mode = word_mode;
9342 intregs -= 1;
9343 }
9344 while (intregs > 0);
9345 }
9346
9347 /* Recursive workhorse for the following. */
9348
9349 static void
9350 rs6000_darwin64_record_arg_recurse (CUMULATIVE_ARGS *cum, const_tree type,
9351 HOST_WIDE_INT startbitpos, rtx rvec[],
9352 int *k)
9353 {
9354 tree f;
9355
9356 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
9357 if (TREE_CODE (f) == FIELD_DECL)
9358 {
9359 HOST_WIDE_INT bitpos = startbitpos;
9360 tree ftype = TREE_TYPE (f);
9361 enum machine_mode mode;
9362 if (ftype == error_mark_node)
9363 continue;
9364 mode = TYPE_MODE (ftype);
9365
9366 if (DECL_SIZE (f) != 0
9367 && host_integerp (bit_position (f), 1))
9368 bitpos += int_bit_position (f);
9369
9370 /* ??? FIXME: else assume zero offset. */
9371
9372 if (TREE_CODE (ftype) == RECORD_TYPE)
9373 rs6000_darwin64_record_arg_recurse (cum, ftype, bitpos, rvec, k);
9374 else if (cum->named && USE_FP_FOR_ARG_P (cum, mode, ftype))
9375 {
9376 unsigned n_fpreg = (GET_MODE_SIZE (mode) + 7) >> 3;
9377 #if 0
9378 switch (mode)
9379 {
9380 case SCmode: mode = SFmode; break;
9381 case DCmode: mode = DFmode; break;
9382 case TCmode: mode = TFmode; break;
9383 default: break;
9384 }
9385 #endif
9386 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
9387 if (cum->fregno + n_fpreg > FP_ARG_MAX_REG + 1)
9388 {
9389 gcc_assert (cum->fregno == FP_ARG_MAX_REG
9390 && (mode == TFmode || mode == TDmode));
9391 /* Long double or _Decimal128 split over regs and memory. */
9392 mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode : DFmode;
9393 cum->use_stack=1;
9394 }
9395 rvec[(*k)++]
9396 = gen_rtx_EXPR_LIST (VOIDmode,
9397 gen_rtx_REG (mode, cum->fregno++),
9398 GEN_INT (bitpos / BITS_PER_UNIT));
9399 if (mode == TFmode || mode == TDmode)
9400 cum->fregno++;
9401 }
9402 else if (cum->named && USE_ALTIVEC_FOR_ARG_P (cum, mode, ftype, 1))
9403 {
9404 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
9405 rvec[(*k)++]
9406 = gen_rtx_EXPR_LIST (VOIDmode,
9407 gen_rtx_REG (mode, cum->vregno++),
9408 GEN_INT (bitpos / BITS_PER_UNIT));
9409 }
9410 else if (cum->intoffset == -1)
9411 cum->intoffset = bitpos;
9412 }
9413 }
9414
9415 /* For the darwin64 ABI, we want to construct a PARALLEL consisting of
9416 the register(s) to be used for each field and subfield of a struct
9417 being passed by value, along with the offset of where the
9418 register's value may be found in the block. FP fields go in FP
9419 register, vector fields go in vector registers, and everything
9420 else goes in int registers, packed as in memory.
9421
9422 This code is also used for function return values. RETVAL indicates
9423 whether this is the case.
9424
9425 Much of this is taken from the SPARC V9 port, which has a similar
9426 calling convention. */
9427
9428 static rtx
9429 rs6000_darwin64_record_arg (CUMULATIVE_ARGS *orig_cum, const_tree type,
9430 bool named, bool retval)
9431 {
9432 rtx rvec[FIRST_PSEUDO_REGISTER];
9433 int k = 1, kbase = 1;
9434 HOST_WIDE_INT typesize = int_size_in_bytes (type);
9435 /* This is a copy; modifications are not visible to our caller. */
9436 CUMULATIVE_ARGS copy_cum = *orig_cum;
9437 CUMULATIVE_ARGS *cum = &copy_cum;
9438
9439 /* Pad to 16 byte boundary if needed. */
9440 if (!retval && TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
9441 && (cum->words % 2) != 0)
9442 cum->words++;
9443
9444 cum->intoffset = 0;
9445 cum->use_stack = 0;
9446 cum->named = named;
9447
9448 /* Put entries into rvec[] for individual FP and vector fields, and
9449 for the chunks of memory that go in int regs. Note we start at
9450 element 1; 0 is reserved for an indication of using memory, and
9451 may or may not be filled in below. */
9452 rs6000_darwin64_record_arg_recurse (cum, type, /* startbit pos= */ 0, rvec, &k);
9453 rs6000_darwin64_record_arg_flush (cum, typesize * BITS_PER_UNIT, rvec, &k);
9454
9455 /* If any part of the struct went on the stack put all of it there.
9456 This hack is because the generic code for
9457 FUNCTION_ARG_PARTIAL_NREGS cannot handle cases where the register
9458 parts of the struct are not at the beginning. */
9459 if (cum->use_stack)
9460 {
9461 if (retval)
9462 return NULL_RTX; /* doesn't go in registers at all */
9463 kbase = 0;
9464 rvec[0] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
9465 }
9466 if (k > 1 || cum->use_stack)
9467 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (k - kbase, &rvec[kbase]));
9468 else
9469 return NULL_RTX;
9470 }
9471
9472 /* Determine where to place an argument in 64-bit mode with 32-bit ABI. */
9473
9474 static rtx
9475 rs6000_mixed_function_arg (enum machine_mode mode, const_tree type,
9476 int align_words)
9477 {
9478 int n_units;
9479 int i, k;
9480 rtx rvec[GP_ARG_NUM_REG + 1];
9481
9482 if (align_words >= GP_ARG_NUM_REG)
9483 return NULL_RTX;
9484
9485 n_units = rs6000_arg_size (mode, type);
9486
9487 /* Optimize the simple case where the arg fits in one gpr, except in
9488 the case of BLKmode due to assign_parms assuming that registers are
9489 BITS_PER_WORD wide. */
9490 if (n_units == 0
9491 || (n_units == 1 && mode != BLKmode))
9492 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
9493
9494 k = 0;
9495 if (align_words + n_units > GP_ARG_NUM_REG)
9496 /* Not all of the arg fits in gprs. Say that it goes in memory too,
9497 using a magic NULL_RTX component.
9498 This is not strictly correct. Only some of the arg belongs in
9499 memory, not all of it. However, the normal scheme using
9500 function_arg_partial_nregs can result in unusual subregs, eg.
9501 (subreg:SI (reg:DF) 4), which are not handled well. The code to
9502 store the whole arg to memory is often more efficient than code
9503 to store pieces, and we know that space is available in the right
9504 place for the whole arg. */
9505 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
9506
9507 i = 0;
9508 do
9509 {
9510 rtx r = gen_rtx_REG (SImode, GP_ARG_MIN_REG + align_words);
9511 rtx off = GEN_INT (i++ * 4);
9512 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
9513 }
9514 while (++align_words < GP_ARG_NUM_REG && --n_units != 0);
9515
9516 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
9517 }
9518
9519 /* Determine where to put an argument to a function.
9520 Value is zero to push the argument on the stack,
9521 or a hard register in which to store the argument.
9522
9523 MODE is the argument's machine mode.
9524 TYPE is the data type of the argument (as a tree).
9525 This is null for libcalls where that information may
9526 not be available.
9527 CUM is a variable of type CUMULATIVE_ARGS which gives info about
9528 the preceding args and about the function being called. It is
9529 not modified in this routine.
9530 NAMED is nonzero if this argument is a named parameter
9531 (otherwise it is an extra parameter matching an ellipsis).
9532
9533 On RS/6000 the first eight words of non-FP are normally in registers
9534 and the rest are pushed. Under AIX, the first 13 FP args are in registers.
9535 Under V.4, the first 8 FP args are in registers.
9536
9537 If this is floating-point and no prototype is specified, we use
9538 both an FP and integer register (or possibly FP reg and stack). Library
9539 functions (when CALL_LIBCALL is set) always have the proper types for args,
9540 so we can pass the FP value just in one register. emit_library_function
9541 doesn't support PARALLEL anyway.
9542
9543 Note that for args passed by reference, function_arg will be called
9544 with MODE and TYPE set to that of the pointer to the arg, not the arg
9545 itself. */
9546
9547 static rtx
9548 rs6000_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
9549 const_tree type, bool named)
9550 {
9551 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9552 enum rs6000_abi abi = DEFAULT_ABI;
9553
9554 /* Return a marker to indicate whether CR1 needs to set or clear the
9555 bit that V.4 uses to say fp args were passed in registers.
9556 Assume that we don't need the marker for software floating point,
9557 or compiler generated library calls. */
9558 if (mode == VOIDmode)
9559 {
9560 if (abi == ABI_V4
9561 && (cum->call_cookie & CALL_LIBCALL) == 0
9562 && (cum->stdarg
9563 || (cum->nargs_prototype < 0
9564 && (cum->prototype || TARGET_NO_PROTOTYPE))))
9565 {
9566 /* For the SPE, we need to crxor CR6 always. */
9567 if (TARGET_SPE_ABI)
9568 return GEN_INT (cum->call_cookie | CALL_V4_SET_FP_ARGS);
9569 else if (TARGET_HARD_FLOAT && TARGET_FPRS)
9570 return GEN_INT (cum->call_cookie
9571 | ((cum->fregno == FP_ARG_MIN_REG)
9572 ? CALL_V4_SET_FP_ARGS
9573 : CALL_V4_CLEAR_FP_ARGS));
9574 }
9575
9576 return GEN_INT (cum->call_cookie & ~CALL_LIBCALL);
9577 }
9578
9579 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
9580 {
9581 rtx rslt = rs6000_darwin64_record_arg (cum, type, named, /*retval= */false);
9582 if (rslt != NULL_RTX)
9583 return rslt;
9584 /* Else fall through to usual handling. */
9585 }
9586
9587 if (USE_ALTIVEC_FOR_ARG_P (cum, mode, type, named))
9588 if (TARGET_64BIT && ! cum->prototype)
9589 {
9590 /* Vector parameters get passed in vector register
9591 and also in GPRs or memory, in absence of prototype. */
9592 int align_words;
9593 rtx slot;
9594 align_words = (cum->words + 1) & ~1;
9595
9596 if (align_words >= GP_ARG_NUM_REG)
9597 {
9598 slot = NULL_RTX;
9599 }
9600 else
9601 {
9602 slot = gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
9603 }
9604 return gen_rtx_PARALLEL (mode,
9605 gen_rtvec (2,
9606 gen_rtx_EXPR_LIST (VOIDmode,
9607 slot, const0_rtx),
9608 gen_rtx_EXPR_LIST (VOIDmode,
9609 gen_rtx_REG (mode, cum->vregno),
9610 const0_rtx)));
9611 }
9612 else
9613 return gen_rtx_REG (mode, cum->vregno);
9614 else if (TARGET_ALTIVEC_ABI
9615 && (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
9616 || (type && TREE_CODE (type) == VECTOR_TYPE
9617 && int_size_in_bytes (type) == 16)))
9618 {
9619 if (named || abi == ABI_V4)
9620 return NULL_RTX;
9621 else
9622 {
9623 /* Vector parameters to varargs functions under AIX or Darwin
9624 get passed in memory and possibly also in GPRs. */
9625 int align, align_words, n_words;
9626 enum machine_mode part_mode;
9627
9628 /* Vector parameters must be 16-byte aligned. This places them at
9629 2 mod 4 in terms of words in 32-bit mode, since the parameter
9630 save area starts at offset 24 from the stack. In 64-bit mode,
9631 they just have to start on an even word, since the parameter
9632 save area is 16-byte aligned. */
9633 if (TARGET_32BIT)
9634 align = (2 - cum->words) & 3;
9635 else
9636 align = cum->words & 1;
9637 align_words = cum->words + align;
9638
9639 /* Out of registers? Memory, then. */
9640 if (align_words >= GP_ARG_NUM_REG)
9641 return NULL_RTX;
9642
9643 if (TARGET_32BIT && TARGET_POWERPC64)
9644 return rs6000_mixed_function_arg (mode, type, align_words);
9645
9646 /* The vector value goes in GPRs. Only the part of the
9647 value in GPRs is reported here. */
9648 part_mode = mode;
9649 n_words = rs6000_arg_size (mode, type);
9650 if (align_words + n_words > GP_ARG_NUM_REG)
9651 /* Fortunately, there are only two possibilities, the value
9652 is either wholly in GPRs or half in GPRs and half not. */
9653 part_mode = DImode;
9654
9655 return gen_rtx_REG (part_mode, GP_ARG_MIN_REG + align_words);
9656 }
9657 }
9658 else if (TARGET_SPE_ABI && TARGET_SPE
9659 && (SPE_VECTOR_MODE (mode)
9660 || (TARGET_E500_DOUBLE && (mode == DFmode
9661 || mode == DCmode
9662 || mode == TFmode
9663 || mode == TCmode))))
9664 return rs6000_spe_function_arg (cum, mode, type);
9665
9666 else if (abi == ABI_V4)
9667 {
9668 if (TARGET_HARD_FLOAT && TARGET_FPRS
9669 && ((TARGET_SINGLE_FLOAT && mode == SFmode)
9670 || (TARGET_DOUBLE_FLOAT && mode == DFmode)
9671 || (mode == TFmode && !TARGET_IEEEQUAD)
9672 || mode == SDmode || mode == DDmode || mode == TDmode))
9673 {
9674 /* _Decimal128 must use an even/odd register pair. This assumes
9675 that the register number is odd when fregno is odd. */
9676 if (mode == TDmode && (cum->fregno % 2) == 1)
9677 cum->fregno++;
9678
9679 if (cum->fregno + (mode == TFmode || mode == TDmode ? 1 : 0)
9680 <= FP_ARG_V4_MAX_REG)
9681 return gen_rtx_REG (mode, cum->fregno);
9682 else
9683 return NULL_RTX;
9684 }
9685 else
9686 {
9687 int n_words = rs6000_arg_size (mode, type);
9688 int gregno = cum->sysv_gregno;
9689
9690 /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
9691 (r7,r8) or (r9,r10). As does any other 2 word item such
9692 as complex int due to a historical mistake. */
9693 if (n_words == 2)
9694 gregno += (1 - gregno) & 1;
9695
9696 /* Multi-reg args are not split between registers and stack. */
9697 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
9698 return NULL_RTX;
9699
9700 if (TARGET_32BIT && TARGET_POWERPC64)
9701 return rs6000_mixed_function_arg (mode, type,
9702 gregno - GP_ARG_MIN_REG);
9703 return gen_rtx_REG (mode, gregno);
9704 }
9705 }
9706 else
9707 {
9708 int align_words = rs6000_parm_start (mode, type, cum->words);
9709
9710 /* _Decimal128 must be passed in an even/odd float register pair.
9711 This assumes that the register number is odd when fregno is odd. */
9712 if (mode == TDmode && (cum->fregno % 2) == 1)
9713 cum->fregno++;
9714
9715 if (USE_FP_FOR_ARG_P (cum, mode, type))
9716 {
9717 rtx rvec[GP_ARG_NUM_REG + 1];
9718 rtx r;
9719 int k;
9720 bool needs_psave;
9721 enum machine_mode fmode = mode;
9722 unsigned long n_fpreg = (GET_MODE_SIZE (mode) + 7) >> 3;
9723
9724 if (cum->fregno + n_fpreg > FP_ARG_MAX_REG + 1)
9725 {
9726 /* Currently, we only ever need one reg here because complex
9727 doubles are split. */
9728 gcc_assert (cum->fregno == FP_ARG_MAX_REG
9729 && (fmode == TFmode || fmode == TDmode));
9730
9731 /* Long double or _Decimal128 split over regs and memory. */
9732 fmode = DECIMAL_FLOAT_MODE_P (fmode) ? DDmode : DFmode;
9733 }
9734
9735 /* Do we also need to pass this arg in the parameter save
9736 area? */
9737 needs_psave = (type
9738 && (cum->nargs_prototype <= 0
9739 || (DEFAULT_ABI == ABI_AIX
9740 && TARGET_XL_COMPAT
9741 && align_words >= GP_ARG_NUM_REG)));
9742
9743 if (!needs_psave && mode == fmode)
9744 return gen_rtx_REG (fmode, cum->fregno);
9745
9746 k = 0;
9747 if (needs_psave)
9748 {
9749 /* Describe the part that goes in gprs or the stack.
9750 This piece must come first, before the fprs. */
9751 if (align_words < GP_ARG_NUM_REG)
9752 {
9753 unsigned long n_words = rs6000_arg_size (mode, type);
9754
9755 if (align_words + n_words > GP_ARG_NUM_REG
9756 || (TARGET_32BIT && TARGET_POWERPC64))
9757 {
9758 /* If this is partially on the stack, then we only
9759 include the portion actually in registers here. */
9760 enum machine_mode rmode = TARGET_32BIT ? SImode : DImode;
9761 rtx off;
9762 int i = 0;
9763 if (align_words + n_words > GP_ARG_NUM_REG)
9764 /* Not all of the arg fits in gprs. Say that it
9765 goes in memory too, using a magic NULL_RTX
9766 component. Also see comment in
9767 rs6000_mixed_function_arg for why the normal
9768 function_arg_partial_nregs scheme doesn't work
9769 in this case. */
9770 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX,
9771 const0_rtx);
9772 do
9773 {
9774 r = gen_rtx_REG (rmode,
9775 GP_ARG_MIN_REG + align_words);
9776 off = GEN_INT (i++ * GET_MODE_SIZE (rmode));
9777 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
9778 }
9779 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
9780 }
9781 else
9782 {
9783 /* The whole arg fits in gprs. */
9784 r = gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
9785 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, const0_rtx);
9786 }
9787 }
9788 else
9789 /* It's entirely in memory. */
9790 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
9791 }
9792
9793 /* Describe where this piece goes in the fprs. */
9794 r = gen_rtx_REG (fmode, cum->fregno);
9795 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, const0_rtx);
9796
9797 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
9798 }
9799 else if (align_words < GP_ARG_NUM_REG)
9800 {
9801 if (TARGET_32BIT && TARGET_POWERPC64)
9802 return rs6000_mixed_function_arg (mode, type, align_words);
9803
9804 if (mode == BLKmode)
9805 mode = Pmode;
9806
9807 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
9808 }
9809 else
9810 return NULL_RTX;
9811 }
9812 }
9813 \f
9814 /* For an arg passed partly in registers and partly in memory, this is
9815 the number of bytes passed in registers. For args passed entirely in
9816 registers or entirely in memory, zero. When an arg is described by a
9817 PARALLEL, perhaps using more than one register type, this function
9818 returns the number of bytes used by the first element of the PARALLEL. */
9819
9820 static int
9821 rs6000_arg_partial_bytes (cumulative_args_t cum_v, enum machine_mode mode,
9822 tree type, bool named)
9823 {
9824 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9825 int ret = 0;
9826 int align_words;
9827
9828 if (DEFAULT_ABI == ABI_V4)
9829 return 0;
9830
9831 if (USE_ALTIVEC_FOR_ARG_P (cum, mode, type, named)
9832 && cum->nargs_prototype >= 0)
9833 return 0;
9834
9835 /* In this complicated case we just disable the partial_nregs code. */
9836 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
9837 return 0;
9838
9839 align_words = rs6000_parm_start (mode, type, cum->words);
9840
9841 if (USE_FP_FOR_ARG_P (cum, mode, type))
9842 {
9843 /* If we are passing this arg in the fixed parameter save area
9844 (gprs or memory) as well as fprs, then this function should
9845 return the number of partial bytes passed in the parameter
9846 save area rather than partial bytes passed in fprs. */
9847 if (type
9848 && (cum->nargs_prototype <= 0
9849 || (DEFAULT_ABI == ABI_AIX
9850 && TARGET_XL_COMPAT
9851 && align_words >= GP_ARG_NUM_REG)))
9852 return 0;
9853 else if (cum->fregno + ((GET_MODE_SIZE (mode) + 7) >> 3)
9854 > FP_ARG_MAX_REG + 1)
9855 ret = (FP_ARG_MAX_REG + 1 - cum->fregno) * 8;
9856 else if (cum->nargs_prototype >= 0)
9857 return 0;
9858 }
9859
9860 if (align_words < GP_ARG_NUM_REG
9861 && GP_ARG_NUM_REG < align_words + rs6000_arg_size (mode, type))
9862 ret = (GP_ARG_NUM_REG - align_words) * (TARGET_32BIT ? 4 : 8);
9863
9864 if (ret != 0 && TARGET_DEBUG_ARG)
9865 fprintf (stderr, "rs6000_arg_partial_bytes: %d\n", ret);
9866
9867 return ret;
9868 }
9869 \f
9870 /* A C expression that indicates when an argument must be passed by
9871 reference. If nonzero for an argument, a copy of that argument is
9872 made in memory and a pointer to the argument is passed instead of
9873 the argument itself. The pointer is passed in whatever way is
9874 appropriate for passing a pointer to that type.
9875
9876 Under V.4, aggregates and long double are passed by reference.
9877
9878 As an extension to all 32-bit ABIs, AltiVec vectors are passed by
9879 reference unless the AltiVec vector extension ABI is in force.
9880
9881 As an extension to all ABIs, variable sized types are passed by
9882 reference. */
9883
9884 static bool
9885 rs6000_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
9886 enum machine_mode mode, const_tree type,
9887 bool named ATTRIBUTE_UNUSED)
9888 {
9889 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD && mode == TFmode)
9890 {
9891 if (TARGET_DEBUG_ARG)
9892 fprintf (stderr, "function_arg_pass_by_reference: V4 long double\n");
9893 return 1;
9894 }
9895
9896 if (!type)
9897 return 0;
9898
9899 if (DEFAULT_ABI == ABI_V4 && AGGREGATE_TYPE_P (type))
9900 {
9901 if (TARGET_DEBUG_ARG)
9902 fprintf (stderr, "function_arg_pass_by_reference: V4 aggregate\n");
9903 return 1;
9904 }
9905
9906 if (int_size_in_bytes (type) < 0)
9907 {
9908 if (TARGET_DEBUG_ARG)
9909 fprintf (stderr, "function_arg_pass_by_reference: variable size\n");
9910 return 1;
9911 }
9912
9913 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
9914 modes only exist for GCC vector types if -maltivec. */
9915 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
9916 {
9917 if (TARGET_DEBUG_ARG)
9918 fprintf (stderr, "function_arg_pass_by_reference: AltiVec\n");
9919 return 1;
9920 }
9921
9922 /* Pass synthetic vectors in memory. */
9923 if (TREE_CODE (type) == VECTOR_TYPE
9924 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
9925 {
9926 static bool warned_for_pass_big_vectors = false;
9927 if (TARGET_DEBUG_ARG)
9928 fprintf (stderr, "function_arg_pass_by_reference: synthetic vector\n");
9929 if (!warned_for_pass_big_vectors)
9930 {
9931 warning (0, "GCC vector passed by reference: "
9932 "non-standard ABI extension with no compatibility guarantee");
9933 warned_for_pass_big_vectors = true;
9934 }
9935 return 1;
9936 }
9937
9938 return 0;
9939 }
9940
9941 static void
9942 rs6000_move_block_from_reg (int regno, rtx x, int nregs)
9943 {
9944 int i;
9945 enum machine_mode reg_mode = TARGET_32BIT ? SImode : DImode;
9946
9947 if (nregs == 0)
9948 return;
9949
9950 for (i = 0; i < nregs; i++)
9951 {
9952 rtx tem = adjust_address_nv (x, reg_mode, i * GET_MODE_SIZE (reg_mode));
9953 if (reload_completed)
9954 {
9955 if (! strict_memory_address_p (reg_mode, XEXP (tem, 0)))
9956 tem = NULL_RTX;
9957 else
9958 tem = simplify_gen_subreg (reg_mode, x, BLKmode,
9959 i * GET_MODE_SIZE (reg_mode));
9960 }
9961 else
9962 tem = replace_equiv_address (tem, XEXP (tem, 0));
9963
9964 gcc_assert (tem);
9965
9966 emit_move_insn (tem, gen_rtx_REG (reg_mode, regno + i));
9967 }
9968 }
9969 \f
9970 /* Perform any needed actions needed for a function that is receiving a
9971 variable number of arguments.
9972
9973 CUM is as above.
9974
9975 MODE and TYPE are the mode and type of the current parameter.
9976
9977 PRETEND_SIZE is a variable that should be set to the amount of stack
9978 that must be pushed by the prolog to pretend that our caller pushed
9979 it.
9980
9981 Normally, this macro will push all remaining incoming registers on the
9982 stack and set PRETEND_SIZE to the length of the registers pushed. */
9983
9984 static void
9985 setup_incoming_varargs (cumulative_args_t cum, enum machine_mode mode,
9986 tree type, int *pretend_size ATTRIBUTE_UNUSED,
9987 int no_rtl)
9988 {
9989 CUMULATIVE_ARGS next_cum;
9990 int reg_size = TARGET_32BIT ? 4 : 8;
9991 rtx save_area = NULL_RTX, mem;
9992 int first_reg_offset;
9993 alias_set_type set;
9994
9995 /* Skip the last named argument. */
9996 next_cum = *get_cumulative_args (cum);
9997 rs6000_function_arg_advance_1 (&next_cum, mode, type, true, 0);
9998
9999 if (DEFAULT_ABI == ABI_V4)
10000 {
10001 first_reg_offset = next_cum.sysv_gregno - GP_ARG_MIN_REG;
10002
10003 if (! no_rtl)
10004 {
10005 int gpr_reg_num = 0, gpr_size = 0, fpr_size = 0;
10006 HOST_WIDE_INT offset = 0;
10007
10008 /* Try to optimize the size of the varargs save area.
10009 The ABI requires that ap.reg_save_area is doubleword
10010 aligned, but we don't need to allocate space for all
10011 the bytes, only those to which we actually will save
10012 anything. */
10013 if (cfun->va_list_gpr_size && first_reg_offset < GP_ARG_NUM_REG)
10014 gpr_reg_num = GP_ARG_NUM_REG - first_reg_offset;
10015 if (TARGET_HARD_FLOAT && TARGET_FPRS
10016 && next_cum.fregno <= FP_ARG_V4_MAX_REG
10017 && cfun->va_list_fpr_size)
10018 {
10019 if (gpr_reg_num)
10020 fpr_size = (next_cum.fregno - FP_ARG_MIN_REG)
10021 * UNITS_PER_FP_WORD;
10022 if (cfun->va_list_fpr_size
10023 < FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
10024 fpr_size += cfun->va_list_fpr_size * UNITS_PER_FP_WORD;
10025 else
10026 fpr_size += (FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
10027 * UNITS_PER_FP_WORD;
10028 }
10029 if (gpr_reg_num)
10030 {
10031 offset = -((first_reg_offset * reg_size) & ~7);
10032 if (!fpr_size && gpr_reg_num > cfun->va_list_gpr_size)
10033 {
10034 gpr_reg_num = cfun->va_list_gpr_size;
10035 if (reg_size == 4 && (first_reg_offset & 1))
10036 gpr_reg_num++;
10037 }
10038 gpr_size = (gpr_reg_num * reg_size + 7) & ~7;
10039 }
10040 else if (fpr_size)
10041 offset = - (int) (next_cum.fregno - FP_ARG_MIN_REG)
10042 * UNITS_PER_FP_WORD
10043 - (int) (GP_ARG_NUM_REG * reg_size);
10044
10045 if (gpr_size + fpr_size)
10046 {
10047 rtx reg_save_area
10048 = assign_stack_local (BLKmode, gpr_size + fpr_size, 64);
10049 gcc_assert (GET_CODE (reg_save_area) == MEM);
10050 reg_save_area = XEXP (reg_save_area, 0);
10051 if (GET_CODE (reg_save_area) == PLUS)
10052 {
10053 gcc_assert (XEXP (reg_save_area, 0)
10054 == virtual_stack_vars_rtx);
10055 gcc_assert (GET_CODE (XEXP (reg_save_area, 1)) == CONST_INT);
10056 offset += INTVAL (XEXP (reg_save_area, 1));
10057 }
10058 else
10059 gcc_assert (reg_save_area == virtual_stack_vars_rtx);
10060 }
10061
10062 cfun->machine->varargs_save_offset = offset;
10063 save_area = plus_constant (Pmode, virtual_stack_vars_rtx, offset);
10064 }
10065 }
10066 else
10067 {
10068 first_reg_offset = next_cum.words;
10069 save_area = virtual_incoming_args_rtx;
10070
10071 if (targetm.calls.must_pass_in_stack (mode, type))
10072 first_reg_offset += rs6000_arg_size (TYPE_MODE (type), type);
10073 }
10074
10075 set = get_varargs_alias_set ();
10076 if (! no_rtl && first_reg_offset < GP_ARG_NUM_REG
10077 && cfun->va_list_gpr_size)
10078 {
10079 int n_gpr, nregs = GP_ARG_NUM_REG - first_reg_offset;
10080
10081 if (va_list_gpr_counter_field)
10082 /* V4 va_list_gpr_size counts number of registers needed. */
10083 n_gpr = cfun->va_list_gpr_size;
10084 else
10085 /* char * va_list instead counts number of bytes needed. */
10086 n_gpr = (cfun->va_list_gpr_size + reg_size - 1) / reg_size;
10087
10088 if (nregs > n_gpr)
10089 nregs = n_gpr;
10090
10091 mem = gen_rtx_MEM (BLKmode,
10092 plus_constant (Pmode, save_area,
10093 first_reg_offset * reg_size));
10094 MEM_NOTRAP_P (mem) = 1;
10095 set_mem_alias_set (mem, set);
10096 set_mem_align (mem, BITS_PER_WORD);
10097
10098 rs6000_move_block_from_reg (GP_ARG_MIN_REG + first_reg_offset, mem,
10099 nregs);
10100 }
10101
10102 /* Save FP registers if needed. */
10103 if (DEFAULT_ABI == ABI_V4
10104 && TARGET_HARD_FLOAT && TARGET_FPRS
10105 && ! no_rtl
10106 && next_cum.fregno <= FP_ARG_V4_MAX_REG
10107 && cfun->va_list_fpr_size)
10108 {
10109 int fregno = next_cum.fregno, nregs;
10110 rtx cr1 = gen_rtx_REG (CCmode, CR1_REGNO);
10111 rtx lab = gen_label_rtx ();
10112 int off = (GP_ARG_NUM_REG * reg_size) + ((fregno - FP_ARG_MIN_REG)
10113 * UNITS_PER_FP_WORD);
10114
10115 emit_jump_insn
10116 (gen_rtx_SET (VOIDmode,
10117 pc_rtx,
10118 gen_rtx_IF_THEN_ELSE (VOIDmode,
10119 gen_rtx_NE (VOIDmode, cr1,
10120 const0_rtx),
10121 gen_rtx_LABEL_REF (VOIDmode, lab),
10122 pc_rtx)));
10123
10124 for (nregs = 0;
10125 fregno <= FP_ARG_V4_MAX_REG && nregs < cfun->va_list_fpr_size;
10126 fregno++, off += UNITS_PER_FP_WORD, nregs++)
10127 {
10128 mem = gen_rtx_MEM ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
10129 ? DFmode : SFmode,
10130 plus_constant (Pmode, save_area, off));
10131 MEM_NOTRAP_P (mem) = 1;
10132 set_mem_alias_set (mem, set);
10133 set_mem_align (mem, GET_MODE_ALIGNMENT (
10134 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
10135 ? DFmode : SFmode));
10136 emit_move_insn (mem, gen_rtx_REG (
10137 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
10138 ? DFmode : SFmode, fregno));
10139 }
10140
10141 emit_label (lab);
10142 }
10143 }
10144
10145 /* Create the va_list data type. */
10146
10147 static tree
10148 rs6000_build_builtin_va_list (void)
10149 {
10150 tree f_gpr, f_fpr, f_res, f_ovf, f_sav, record, type_decl;
10151
10152 /* For AIX, prefer 'char *' because that's what the system
10153 header files like. */
10154 if (DEFAULT_ABI != ABI_V4)
10155 return build_pointer_type (char_type_node);
10156
10157 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
10158 type_decl = build_decl (BUILTINS_LOCATION, TYPE_DECL,
10159 get_identifier ("__va_list_tag"), record);
10160
10161 f_gpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("gpr"),
10162 unsigned_char_type_node);
10163 f_fpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("fpr"),
10164 unsigned_char_type_node);
10165 /* Give the two bytes of padding a name, so that -Wpadded won't warn on
10166 every user file. */
10167 f_res = build_decl (BUILTINS_LOCATION, FIELD_DECL,
10168 get_identifier ("reserved"), short_unsigned_type_node);
10169 f_ovf = build_decl (BUILTINS_LOCATION, FIELD_DECL,
10170 get_identifier ("overflow_arg_area"),
10171 ptr_type_node);
10172 f_sav = build_decl (BUILTINS_LOCATION, FIELD_DECL,
10173 get_identifier ("reg_save_area"),
10174 ptr_type_node);
10175
10176 va_list_gpr_counter_field = f_gpr;
10177 va_list_fpr_counter_field = f_fpr;
10178
10179 DECL_FIELD_CONTEXT (f_gpr) = record;
10180 DECL_FIELD_CONTEXT (f_fpr) = record;
10181 DECL_FIELD_CONTEXT (f_res) = record;
10182 DECL_FIELD_CONTEXT (f_ovf) = record;
10183 DECL_FIELD_CONTEXT (f_sav) = record;
10184
10185 TYPE_STUB_DECL (record) = type_decl;
10186 TYPE_NAME (record) = type_decl;
10187 TYPE_FIELDS (record) = f_gpr;
10188 DECL_CHAIN (f_gpr) = f_fpr;
10189 DECL_CHAIN (f_fpr) = f_res;
10190 DECL_CHAIN (f_res) = f_ovf;
10191 DECL_CHAIN (f_ovf) = f_sav;
10192
10193 layout_type (record);
10194
10195 /* The correct type is an array type of one element. */
10196 return build_array_type (record, build_index_type (size_zero_node));
10197 }
10198
10199 /* Implement va_start. */
10200
10201 static void
10202 rs6000_va_start (tree valist, rtx nextarg)
10203 {
10204 HOST_WIDE_INT words, n_gpr, n_fpr;
10205 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
10206 tree gpr, fpr, ovf, sav, t;
10207
10208 /* Only SVR4 needs something special. */
10209 if (DEFAULT_ABI != ABI_V4)
10210 {
10211 std_expand_builtin_va_start (valist, nextarg);
10212 return;
10213 }
10214
10215 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
10216 f_fpr = DECL_CHAIN (f_gpr);
10217 f_res = DECL_CHAIN (f_fpr);
10218 f_ovf = DECL_CHAIN (f_res);
10219 f_sav = DECL_CHAIN (f_ovf);
10220
10221 valist = build_simple_mem_ref (valist);
10222 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
10223 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
10224 f_fpr, NULL_TREE);
10225 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
10226 f_ovf, NULL_TREE);
10227 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
10228 f_sav, NULL_TREE);
10229
10230 /* Count number of gp and fp argument registers used. */
10231 words = crtl->args.info.words;
10232 n_gpr = MIN (crtl->args.info.sysv_gregno - GP_ARG_MIN_REG,
10233 GP_ARG_NUM_REG);
10234 n_fpr = MIN (crtl->args.info.fregno - FP_ARG_MIN_REG,
10235 FP_ARG_NUM_REG);
10236
10237 if (TARGET_DEBUG_ARG)
10238 fprintf (stderr, "va_start: words = "HOST_WIDE_INT_PRINT_DEC", n_gpr = "
10239 HOST_WIDE_INT_PRINT_DEC", n_fpr = "HOST_WIDE_INT_PRINT_DEC"\n",
10240 words, n_gpr, n_fpr);
10241
10242 if (cfun->va_list_gpr_size)
10243 {
10244 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
10245 build_int_cst (NULL_TREE, n_gpr));
10246 TREE_SIDE_EFFECTS (t) = 1;
10247 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10248 }
10249
10250 if (cfun->va_list_fpr_size)
10251 {
10252 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
10253 build_int_cst (NULL_TREE, n_fpr));
10254 TREE_SIDE_EFFECTS (t) = 1;
10255 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10256
10257 #ifdef HAVE_AS_GNU_ATTRIBUTE
10258 if (call_ABI_of_interest (cfun->decl))
10259 rs6000_passes_float = true;
10260 #endif
10261 }
10262
10263 /* Find the overflow area. */
10264 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
10265 if (words != 0)
10266 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
10267 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
10268 TREE_SIDE_EFFECTS (t) = 1;
10269 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10270
10271 /* If there were no va_arg invocations, don't set up the register
10272 save area. */
10273 if (!cfun->va_list_gpr_size
10274 && !cfun->va_list_fpr_size
10275 && n_gpr < GP_ARG_NUM_REG
10276 && n_fpr < FP_ARG_V4_MAX_REG)
10277 return;
10278
10279 /* Find the register save area. */
10280 t = make_tree (TREE_TYPE (sav), virtual_stack_vars_rtx);
10281 if (cfun->machine->varargs_save_offset)
10282 t = fold_build_pointer_plus_hwi (t, cfun->machine->varargs_save_offset);
10283 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
10284 TREE_SIDE_EFFECTS (t) = 1;
10285 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
10286 }
10287
10288 /* Implement va_arg. */
10289
10290 static tree
10291 rs6000_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
10292 gimple_seq *post_p)
10293 {
10294 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
10295 tree gpr, fpr, ovf, sav, reg, t, u;
10296 int size, rsize, n_reg, sav_ofs, sav_scale;
10297 tree lab_false, lab_over, addr;
10298 int align;
10299 tree ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
10300 int regalign = 0;
10301 gimple stmt;
10302
10303 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
10304 {
10305 t = rs6000_gimplify_va_arg (valist, ptrtype, pre_p, post_p);
10306 return build_va_arg_indirect_ref (t);
10307 }
10308
10309 /* We need to deal with the fact that the darwin ppc64 ABI is defined by an
10310 earlier version of gcc, with the property that it always applied alignment
10311 adjustments to the va-args (even for zero-sized types). The cheapest way
10312 to deal with this is to replicate the effect of the part of
10313 std_gimplify_va_arg_expr that carries out the align adjust, for the case
10314 of relevance.
10315 We don't need to check for pass-by-reference because of the test above.
10316 We can return a simplifed answer, since we know there's no offset to add. */
10317
10318 if (((TARGET_MACHO
10319 && rs6000_darwin64_abi)
10320 || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm))
10321 && integer_zerop (TYPE_SIZE (type)))
10322 {
10323 unsigned HOST_WIDE_INT align, boundary;
10324 tree valist_tmp = get_initialized_tmp_var (valist, pre_p, NULL);
10325 align = PARM_BOUNDARY / BITS_PER_UNIT;
10326 boundary = rs6000_function_arg_boundary (TYPE_MODE (type), type);
10327 if (boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
10328 boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
10329 boundary /= BITS_PER_UNIT;
10330 if (boundary > align)
10331 {
10332 tree t ;
10333 /* This updates arg ptr by the amount that would be necessary
10334 to align the zero-sized (but not zero-alignment) item. */
10335 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
10336 fold_build_pointer_plus_hwi (valist_tmp, boundary - 1));
10337 gimplify_and_add (t, pre_p);
10338
10339 t = fold_convert (sizetype, valist_tmp);
10340 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
10341 fold_convert (TREE_TYPE (valist),
10342 fold_build2 (BIT_AND_EXPR, sizetype, t,
10343 size_int (-boundary))));
10344 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
10345 gimplify_and_add (t, pre_p);
10346 }
10347 /* Since it is zero-sized there's no increment for the item itself. */
10348 valist_tmp = fold_convert (build_pointer_type (type), valist_tmp);
10349 return build_va_arg_indirect_ref (valist_tmp);
10350 }
10351
10352 if (DEFAULT_ABI != ABI_V4)
10353 {
10354 if (targetm.calls.split_complex_arg && TREE_CODE (type) == COMPLEX_TYPE)
10355 {
10356 tree elem_type = TREE_TYPE (type);
10357 enum machine_mode elem_mode = TYPE_MODE (elem_type);
10358 int elem_size = GET_MODE_SIZE (elem_mode);
10359
10360 if (elem_size < UNITS_PER_WORD)
10361 {
10362 tree real_part, imag_part;
10363 gimple_seq post = NULL;
10364
10365 real_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
10366 &post);
10367 /* Copy the value into a temporary, lest the formal temporary
10368 be reused out from under us. */
10369 real_part = get_initialized_tmp_var (real_part, pre_p, &post);
10370 gimple_seq_add_seq (pre_p, post);
10371
10372 imag_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
10373 post_p);
10374
10375 return build2 (COMPLEX_EXPR, type, real_part, imag_part);
10376 }
10377 }
10378
10379 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
10380 }
10381
10382 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
10383 f_fpr = DECL_CHAIN (f_gpr);
10384 f_res = DECL_CHAIN (f_fpr);
10385 f_ovf = DECL_CHAIN (f_res);
10386 f_sav = DECL_CHAIN (f_ovf);
10387
10388 valist = build_va_arg_indirect_ref (valist);
10389 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
10390 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
10391 f_fpr, NULL_TREE);
10392 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
10393 f_ovf, NULL_TREE);
10394 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
10395 f_sav, NULL_TREE);
10396
10397 size = int_size_in_bytes (type);
10398 rsize = (size + 3) / 4;
10399 align = 1;
10400
10401 if (TARGET_HARD_FLOAT && TARGET_FPRS
10402 && ((TARGET_SINGLE_FLOAT && TYPE_MODE (type) == SFmode)
10403 || (TARGET_DOUBLE_FLOAT
10404 && (TYPE_MODE (type) == DFmode
10405 || TYPE_MODE (type) == TFmode
10406 || TYPE_MODE (type) == SDmode
10407 || TYPE_MODE (type) == DDmode
10408 || TYPE_MODE (type) == TDmode))))
10409 {
10410 /* FP args go in FP registers, if present. */
10411 reg = fpr;
10412 n_reg = (size + 7) / 8;
10413 sav_ofs = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4) * 4;
10414 sav_scale = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4);
10415 if (TYPE_MODE (type) != SFmode && TYPE_MODE (type) != SDmode)
10416 align = 8;
10417 }
10418 else
10419 {
10420 /* Otherwise into GP registers. */
10421 reg = gpr;
10422 n_reg = rsize;
10423 sav_ofs = 0;
10424 sav_scale = 4;
10425 if (n_reg == 2)
10426 align = 8;
10427 }
10428
10429 /* Pull the value out of the saved registers.... */
10430
10431 lab_over = NULL;
10432 addr = create_tmp_var (ptr_type_node, "addr");
10433
10434 /* AltiVec vectors never go in registers when -mabi=altivec. */
10435 if (TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (TYPE_MODE (type)))
10436 align = 16;
10437 else
10438 {
10439 lab_false = create_artificial_label (input_location);
10440 lab_over = create_artificial_label (input_location);
10441
10442 /* Long long and SPE vectors are aligned in the registers.
10443 As are any other 2 gpr item such as complex int due to a
10444 historical mistake. */
10445 u = reg;
10446 if (n_reg == 2 && reg == gpr)
10447 {
10448 regalign = 1;
10449 u = build2 (BIT_AND_EXPR, TREE_TYPE (reg), unshare_expr (reg),
10450 build_int_cst (TREE_TYPE (reg), n_reg - 1));
10451 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg),
10452 unshare_expr (reg), u);
10453 }
10454 /* _Decimal128 is passed in even/odd fpr pairs; the stored
10455 reg number is 0 for f1, so we want to make it odd. */
10456 else if (reg == fpr && TYPE_MODE (type) == TDmode)
10457 {
10458 t = build2 (BIT_IOR_EXPR, TREE_TYPE (reg), unshare_expr (reg),
10459 build_int_cst (TREE_TYPE (reg), 1));
10460 u = build2 (MODIFY_EXPR, void_type_node, unshare_expr (reg), t);
10461 }
10462
10463 t = fold_convert (TREE_TYPE (reg), size_int (8 - n_reg + 1));
10464 t = build2 (GE_EXPR, boolean_type_node, u, t);
10465 u = build1 (GOTO_EXPR, void_type_node, lab_false);
10466 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
10467 gimplify_and_add (t, pre_p);
10468
10469 t = sav;
10470 if (sav_ofs)
10471 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
10472
10473 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg), unshare_expr (reg),
10474 build_int_cst (TREE_TYPE (reg), n_reg));
10475 u = fold_convert (sizetype, u);
10476 u = build2 (MULT_EXPR, sizetype, u, size_int (sav_scale));
10477 t = fold_build_pointer_plus (t, u);
10478
10479 /* _Decimal32 varargs are located in the second word of the 64-bit
10480 FP register for 32-bit binaries. */
10481 if (!TARGET_POWERPC64
10482 && TARGET_HARD_FLOAT && TARGET_FPRS
10483 && TYPE_MODE (type) == SDmode)
10484 t = fold_build_pointer_plus_hwi (t, size);
10485
10486 gimplify_assign (addr, t, pre_p);
10487
10488 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
10489
10490 stmt = gimple_build_label (lab_false);
10491 gimple_seq_add_stmt (pre_p, stmt);
10492
10493 if ((n_reg == 2 && !regalign) || n_reg > 2)
10494 {
10495 /* Ensure that we don't find any more args in regs.
10496 Alignment has taken care of for special cases. */
10497 gimplify_assign (reg, build_int_cst (TREE_TYPE (reg), 8), pre_p);
10498 }
10499 }
10500
10501 /* ... otherwise out of the overflow area. */
10502
10503 /* Care for on-stack alignment if needed. */
10504 t = ovf;
10505 if (align != 1)
10506 {
10507 t = fold_build_pointer_plus_hwi (t, align - 1);
10508 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
10509 build_int_cst (TREE_TYPE (t), -align));
10510 }
10511 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
10512
10513 gimplify_assign (unshare_expr (addr), t, pre_p);
10514
10515 t = fold_build_pointer_plus_hwi (t, size);
10516 gimplify_assign (unshare_expr (ovf), t, pre_p);
10517
10518 if (lab_over)
10519 {
10520 stmt = gimple_build_label (lab_over);
10521 gimple_seq_add_stmt (pre_p, stmt);
10522 }
10523
10524 if (STRICT_ALIGNMENT
10525 && (TYPE_ALIGN (type)
10526 > (unsigned) BITS_PER_UNIT * (align < 4 ? 4 : align)))
10527 {
10528 /* The value (of type complex double, for example) may not be
10529 aligned in memory in the saved registers, so copy via a
10530 temporary. (This is the same code as used for SPARC.) */
10531 tree tmp = create_tmp_var (type, "va_arg_tmp");
10532 tree dest_addr = build_fold_addr_expr (tmp);
10533
10534 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
10535 3, dest_addr, addr, size_int (rsize * 4));
10536
10537 gimplify_and_add (copy, pre_p);
10538 addr = dest_addr;
10539 }
10540
10541 addr = fold_convert (ptrtype, addr);
10542 return build_va_arg_indirect_ref (addr);
10543 }
10544
10545 /* Builtins. */
10546
10547 static void
10548 def_builtin (const char *name, tree type, enum rs6000_builtins code)
10549 {
10550 tree t;
10551 unsigned classify = rs6000_builtin_info[(int)code].attr;
10552 const char *attr_string = "";
10553
10554 gcc_assert (name != NULL);
10555 gcc_assert (IN_RANGE ((int)code, 0, (int)RS6000_BUILTIN_COUNT));
10556
10557 if (rs6000_builtin_decls[(int)code])
10558 fatal_error ("internal error: builtin function %s already processed", name);
10559
10560 rs6000_builtin_decls[(int)code] = t =
10561 add_builtin_function (name, type, (int)code, BUILT_IN_MD, NULL, NULL_TREE);
10562
10563 /* Set any special attributes. */
10564 if ((classify & RS6000_BTC_CONST) != 0)
10565 {
10566 /* const function, function only depends on the inputs. */
10567 TREE_READONLY (t) = 1;
10568 TREE_NOTHROW (t) = 1;
10569 attr_string = ", pure";
10570 }
10571 else if ((classify & RS6000_BTC_PURE) != 0)
10572 {
10573 /* pure function, function can read global memory, but does not set any
10574 external state. */
10575 DECL_PURE_P (t) = 1;
10576 TREE_NOTHROW (t) = 1;
10577 attr_string = ", const";
10578 }
10579 else if ((classify & RS6000_BTC_FP) != 0)
10580 {
10581 /* Function is a math function. If rounding mode is on, then treat the
10582 function as not reading global memory, but it can have arbitrary side
10583 effects. If it is off, then assume the function is a const function.
10584 This mimics the ATTR_MATHFN_FPROUNDING attribute in
10585 builtin-attribute.def that is used for the math functions. */
10586 TREE_NOTHROW (t) = 1;
10587 if (flag_rounding_math)
10588 {
10589 DECL_PURE_P (t) = 1;
10590 DECL_IS_NOVOPS (t) = 1;
10591 attr_string = ", fp, pure";
10592 }
10593 else
10594 {
10595 TREE_READONLY (t) = 1;
10596 attr_string = ", fp, const";
10597 }
10598 }
10599 else if ((classify & RS6000_BTC_ATTR_MASK) != 0)
10600 gcc_unreachable ();
10601
10602 if (TARGET_DEBUG_BUILTIN)
10603 fprintf (stderr, "rs6000_builtin, code = %4d, %s%s\n",
10604 (int)code, name, attr_string);
10605 }
10606
10607 /* Simple ternary operations: VECd = foo (VECa, VECb, VECc). */
10608
10609 #undef RS6000_BUILTIN_1
10610 #undef RS6000_BUILTIN_2
10611 #undef RS6000_BUILTIN_3
10612 #undef RS6000_BUILTIN_A
10613 #undef RS6000_BUILTIN_D
10614 #undef RS6000_BUILTIN_E
10615 #undef RS6000_BUILTIN_H
10616 #undef RS6000_BUILTIN_P
10617 #undef RS6000_BUILTIN_Q
10618 #undef RS6000_BUILTIN_S
10619 #undef RS6000_BUILTIN_X
10620
10621 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
10622 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
10623 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
10624 { MASK, ICODE, NAME, ENUM },
10625
10626 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
10627 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
10628 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
10629 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
10630 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
10631 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
10632 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
10633 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
10634
10635 static const struct builtin_description bdesc_3arg[] =
10636 {
10637 #include "rs6000-builtin.def"
10638 };
10639
10640 /* DST operations: void foo (void *, const int, const char). */
10641
10642 #undef RS6000_BUILTIN_1
10643 #undef RS6000_BUILTIN_2
10644 #undef RS6000_BUILTIN_3
10645 #undef RS6000_BUILTIN_A
10646 #undef RS6000_BUILTIN_D
10647 #undef RS6000_BUILTIN_E
10648 #undef RS6000_BUILTIN_H
10649 #undef RS6000_BUILTIN_P
10650 #undef RS6000_BUILTIN_Q
10651 #undef RS6000_BUILTIN_S
10652 #undef RS6000_BUILTIN_X
10653
10654 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
10655 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
10656 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
10657 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
10658 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
10659 { MASK, ICODE, NAME, ENUM },
10660
10661 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
10662 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
10663 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
10664 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
10665 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
10666 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
10667
10668 static const struct builtin_description bdesc_dst[] =
10669 {
10670 #include "rs6000-builtin.def"
10671 };
10672
10673 /* Simple binary operations: VECc = foo (VECa, VECb). */
10674
10675 #undef RS6000_BUILTIN_1
10676 #undef RS6000_BUILTIN_2
10677 #undef RS6000_BUILTIN_3
10678 #undef RS6000_BUILTIN_A
10679 #undef RS6000_BUILTIN_D
10680 #undef RS6000_BUILTIN_E
10681 #undef RS6000_BUILTIN_H
10682 #undef RS6000_BUILTIN_P
10683 #undef RS6000_BUILTIN_Q
10684 #undef RS6000_BUILTIN_S
10685 #undef RS6000_BUILTIN_X
10686
10687 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
10688 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
10689 { MASK, ICODE, NAME, ENUM },
10690
10691 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
10692 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
10693 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
10694 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
10695 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
10696 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
10697 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
10698 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
10699 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
10700
10701 static const struct builtin_description bdesc_2arg[] =
10702 {
10703 #include "rs6000-builtin.def"
10704 };
10705
10706 #undef RS6000_BUILTIN_1
10707 #undef RS6000_BUILTIN_2
10708 #undef RS6000_BUILTIN_3
10709 #undef RS6000_BUILTIN_A
10710 #undef RS6000_BUILTIN_D
10711 #undef RS6000_BUILTIN_E
10712 #undef RS6000_BUILTIN_H
10713 #undef RS6000_BUILTIN_P
10714 #undef RS6000_BUILTIN_Q
10715 #undef RS6000_BUILTIN_S
10716 #undef RS6000_BUILTIN_X
10717
10718 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
10719 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
10720 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
10721 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
10722 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
10723 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
10724 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
10725 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
10726 { MASK, ICODE, NAME, ENUM },
10727
10728 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
10729 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
10730 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
10731
10732 /* AltiVec predicates. */
10733
10734 static const struct builtin_description bdesc_altivec_preds[] =
10735 {
10736 #include "rs6000-builtin.def"
10737 };
10738
10739 /* SPE predicates. */
10740 #undef RS6000_BUILTIN_1
10741 #undef RS6000_BUILTIN_2
10742 #undef RS6000_BUILTIN_3
10743 #undef RS6000_BUILTIN_A
10744 #undef RS6000_BUILTIN_D
10745 #undef RS6000_BUILTIN_E
10746 #undef RS6000_BUILTIN_H
10747 #undef RS6000_BUILTIN_P
10748 #undef RS6000_BUILTIN_Q
10749 #undef RS6000_BUILTIN_S
10750 #undef RS6000_BUILTIN_X
10751
10752 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
10753 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
10754 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
10755 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
10756 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
10757 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
10758 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
10759 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
10760 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
10761 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
10762 { MASK, ICODE, NAME, ENUM },
10763
10764 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
10765
10766 static const struct builtin_description bdesc_spe_predicates[] =
10767 {
10768 #include "rs6000-builtin.def"
10769 };
10770
10771 /* SPE evsel predicates. */
10772 #undef RS6000_BUILTIN_1
10773 #undef RS6000_BUILTIN_2
10774 #undef RS6000_BUILTIN_3
10775 #undef RS6000_BUILTIN_A
10776 #undef RS6000_BUILTIN_D
10777 #undef RS6000_BUILTIN_E
10778 #undef RS6000_BUILTIN_H
10779 #undef RS6000_BUILTIN_P
10780 #undef RS6000_BUILTIN_Q
10781 #undef RS6000_BUILTIN_S
10782 #undef RS6000_BUILTIN_X
10783
10784 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
10785 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
10786 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
10787 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
10788 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
10789 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
10790 { MASK, ICODE, NAME, ENUM },
10791
10792 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
10793 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
10794 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
10795 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
10796 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
10797
10798 static const struct builtin_description bdesc_spe_evsel[] =
10799 {
10800 #include "rs6000-builtin.def"
10801 };
10802
10803 /* PAIRED predicates. */
10804 #undef RS6000_BUILTIN_1
10805 #undef RS6000_BUILTIN_2
10806 #undef RS6000_BUILTIN_3
10807 #undef RS6000_BUILTIN_A
10808 #undef RS6000_BUILTIN_D
10809 #undef RS6000_BUILTIN_E
10810 #undef RS6000_BUILTIN_H
10811 #undef RS6000_BUILTIN_P
10812 #undef RS6000_BUILTIN_Q
10813 #undef RS6000_BUILTIN_S
10814 #undef RS6000_BUILTIN_X
10815
10816 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
10817 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
10818 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
10819 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
10820 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
10821 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
10822 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
10823 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
10824 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
10825 { MASK, ICODE, NAME, ENUM },
10826
10827 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
10828 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
10829
10830 static const struct builtin_description bdesc_paired_preds[] =
10831 {
10832 #include "rs6000-builtin.def"
10833 };
10834
10835 /* ABS* operations. */
10836
10837 #undef RS6000_BUILTIN_1
10838 #undef RS6000_BUILTIN_2
10839 #undef RS6000_BUILTIN_3
10840 #undef RS6000_BUILTIN_A
10841 #undef RS6000_BUILTIN_D
10842 #undef RS6000_BUILTIN_E
10843 #undef RS6000_BUILTIN_H
10844 #undef RS6000_BUILTIN_P
10845 #undef RS6000_BUILTIN_Q
10846 #undef RS6000_BUILTIN_S
10847 #undef RS6000_BUILTIN_X
10848
10849 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
10850 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
10851 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
10852 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
10853 { MASK, ICODE, NAME, ENUM },
10854
10855 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
10856 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
10857 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
10858 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
10859 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
10860 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
10861 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
10862
10863 static const struct builtin_description bdesc_abs[] =
10864 {
10865 #include "rs6000-builtin.def"
10866 };
10867
10868 /* Simple unary operations: VECb = foo (unsigned literal) or VECb =
10869 foo (VECa). */
10870
10871 #undef RS6000_BUILTIN_1
10872 #undef RS6000_BUILTIN_2
10873 #undef RS6000_BUILTIN_3
10874 #undef RS6000_BUILTIN_A
10875 #undef RS6000_BUILTIN_D
10876 #undef RS6000_BUILTIN_E
10877 #undef RS6000_BUILTIN_H
10878 #undef RS6000_BUILTIN_P
10879 #undef RS6000_BUILTIN_Q
10880 #undef RS6000_BUILTIN_S
10881 #undef RS6000_BUILTIN_X
10882
10883 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
10884 { MASK, ICODE, NAME, ENUM },
10885
10886 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
10887 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
10888 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
10889 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
10890 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
10891 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
10892 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
10893 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
10894 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
10895 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
10896
10897 static const struct builtin_description bdesc_1arg[] =
10898 {
10899 #include "rs6000-builtin.def"
10900 };
10901
10902 /* HTM builtins. */
10903 #undef RS6000_BUILTIN_1
10904 #undef RS6000_BUILTIN_2
10905 #undef RS6000_BUILTIN_3
10906 #undef RS6000_BUILTIN_A
10907 #undef RS6000_BUILTIN_D
10908 #undef RS6000_BUILTIN_E
10909 #undef RS6000_BUILTIN_H
10910 #undef RS6000_BUILTIN_P
10911 #undef RS6000_BUILTIN_Q
10912 #undef RS6000_BUILTIN_S
10913 #undef RS6000_BUILTIN_X
10914
10915 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
10916 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
10917 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
10918 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
10919 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
10920 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
10921 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
10922 { MASK, ICODE, NAME, ENUM },
10923
10924 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
10925 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
10926 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
10927 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
10928
10929 static const struct builtin_description bdesc_htm[] =
10930 {
10931 #include "rs6000-builtin.def"
10932 };
10933
10934 #undef RS6000_BUILTIN_1
10935 #undef RS6000_BUILTIN_2
10936 #undef RS6000_BUILTIN_3
10937 #undef RS6000_BUILTIN_A
10938 #undef RS6000_BUILTIN_D
10939 #undef RS6000_BUILTIN_E
10940 #undef RS6000_BUILTIN_H
10941 #undef RS6000_BUILTIN_P
10942 #undef RS6000_BUILTIN_Q
10943 #undef RS6000_BUILTIN_S
10944
10945 /* Return true if a builtin function is overloaded. */
10946 bool
10947 rs6000_overloaded_builtin_p (enum rs6000_builtins fncode)
10948 {
10949 return (rs6000_builtin_info[(int)fncode].attr & RS6000_BTC_OVERLOADED) != 0;
10950 }
10951
10952 /* Expand an expression EXP that calls a builtin without arguments. */
10953 static rtx
10954 rs6000_expand_zeroop_builtin (enum insn_code icode, rtx target)
10955 {
10956 rtx pat;
10957 enum machine_mode tmode = insn_data[icode].operand[0].mode;
10958
10959 if (icode == CODE_FOR_nothing)
10960 /* Builtin not supported on this processor. */
10961 return 0;
10962
10963 if (target == 0
10964 || GET_MODE (target) != tmode
10965 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10966 target = gen_reg_rtx (tmode);
10967
10968 pat = GEN_FCN (icode) (target);
10969 if (! pat)
10970 return 0;
10971 emit_insn (pat);
10972
10973 return target;
10974 }
10975
10976
10977 static rtx
10978 rs6000_expand_unop_builtin (enum insn_code icode, tree exp, rtx target)
10979 {
10980 rtx pat;
10981 tree arg0 = CALL_EXPR_ARG (exp, 0);
10982 rtx op0 = expand_normal (arg0);
10983 enum machine_mode tmode = insn_data[icode].operand[0].mode;
10984 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
10985
10986 if (icode == CODE_FOR_nothing)
10987 /* Builtin not supported on this processor. */
10988 return 0;
10989
10990 /* If we got invalid arguments bail out before generating bad rtl. */
10991 if (arg0 == error_mark_node)
10992 return const0_rtx;
10993
10994 if (icode == CODE_FOR_altivec_vspltisb
10995 || icode == CODE_FOR_altivec_vspltish
10996 || icode == CODE_FOR_altivec_vspltisw
10997 || icode == CODE_FOR_spe_evsplatfi
10998 || icode == CODE_FOR_spe_evsplati)
10999 {
11000 /* Only allow 5-bit *signed* literals. */
11001 if (GET_CODE (op0) != CONST_INT
11002 || INTVAL (op0) > 15
11003 || INTVAL (op0) < -16)
11004 {
11005 error ("argument 1 must be a 5-bit signed literal");
11006 return const0_rtx;
11007 }
11008 }
11009
11010 if (target == 0
11011 || GET_MODE (target) != tmode
11012 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11013 target = gen_reg_rtx (tmode);
11014
11015 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11016 op0 = copy_to_mode_reg (mode0, op0);
11017
11018 pat = GEN_FCN (icode) (target, op0);
11019 if (! pat)
11020 return 0;
11021 emit_insn (pat);
11022
11023 return target;
11024 }
11025
11026 static rtx
11027 altivec_expand_abs_builtin (enum insn_code icode, tree exp, rtx target)
11028 {
11029 rtx pat, scratch1, scratch2;
11030 tree arg0 = CALL_EXPR_ARG (exp, 0);
11031 rtx op0 = expand_normal (arg0);
11032 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11033 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11034
11035 /* If we have invalid arguments, bail out before generating bad rtl. */
11036 if (arg0 == error_mark_node)
11037 return const0_rtx;
11038
11039 if (target == 0
11040 || GET_MODE (target) != tmode
11041 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11042 target = gen_reg_rtx (tmode);
11043
11044 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11045 op0 = copy_to_mode_reg (mode0, op0);
11046
11047 scratch1 = gen_reg_rtx (mode0);
11048 scratch2 = gen_reg_rtx (mode0);
11049
11050 pat = GEN_FCN (icode) (target, op0, scratch1, scratch2);
11051 if (! pat)
11052 return 0;
11053 emit_insn (pat);
11054
11055 return target;
11056 }
11057
11058 static rtx
11059 rs6000_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
11060 {
11061 rtx pat;
11062 tree arg0 = CALL_EXPR_ARG (exp, 0);
11063 tree arg1 = CALL_EXPR_ARG (exp, 1);
11064 rtx op0 = expand_normal (arg0);
11065 rtx op1 = expand_normal (arg1);
11066 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11067 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11068 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
11069
11070 if (icode == CODE_FOR_nothing)
11071 /* Builtin not supported on this processor. */
11072 return 0;
11073
11074 /* If we got invalid arguments bail out before generating bad rtl. */
11075 if (arg0 == error_mark_node || arg1 == error_mark_node)
11076 return const0_rtx;
11077
11078 if (icode == CODE_FOR_altivec_vcfux
11079 || icode == CODE_FOR_altivec_vcfsx
11080 || icode == CODE_FOR_altivec_vctsxs
11081 || icode == CODE_FOR_altivec_vctuxs
11082 || icode == CODE_FOR_altivec_vspltb
11083 || icode == CODE_FOR_altivec_vsplth
11084 || icode == CODE_FOR_altivec_vspltw
11085 || icode == CODE_FOR_spe_evaddiw
11086 || icode == CODE_FOR_spe_evldd
11087 || icode == CODE_FOR_spe_evldh
11088 || icode == CODE_FOR_spe_evldw
11089 || icode == CODE_FOR_spe_evlhhesplat
11090 || icode == CODE_FOR_spe_evlhhossplat
11091 || icode == CODE_FOR_spe_evlhhousplat
11092 || icode == CODE_FOR_spe_evlwhe
11093 || icode == CODE_FOR_spe_evlwhos
11094 || icode == CODE_FOR_spe_evlwhou
11095 || icode == CODE_FOR_spe_evlwhsplat
11096 || icode == CODE_FOR_spe_evlwwsplat
11097 || icode == CODE_FOR_spe_evrlwi
11098 || icode == CODE_FOR_spe_evslwi
11099 || icode == CODE_FOR_spe_evsrwis
11100 || icode == CODE_FOR_spe_evsubifw
11101 || icode == CODE_FOR_spe_evsrwiu)
11102 {
11103 /* Only allow 5-bit unsigned literals. */
11104 STRIP_NOPS (arg1);
11105 if (TREE_CODE (arg1) != INTEGER_CST
11106 || TREE_INT_CST_LOW (arg1) & ~0x1f)
11107 {
11108 error ("argument 2 must be a 5-bit unsigned literal");
11109 return const0_rtx;
11110 }
11111 }
11112
11113 if (target == 0
11114 || GET_MODE (target) != tmode
11115 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11116 target = gen_reg_rtx (tmode);
11117
11118 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11119 op0 = copy_to_mode_reg (mode0, op0);
11120 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11121 op1 = copy_to_mode_reg (mode1, op1);
11122
11123 pat = GEN_FCN (icode) (target, op0, op1);
11124 if (! pat)
11125 return 0;
11126 emit_insn (pat);
11127
11128 return target;
11129 }
11130
11131 static rtx
11132 altivec_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
11133 {
11134 rtx pat, scratch;
11135 tree cr6_form = CALL_EXPR_ARG (exp, 0);
11136 tree arg0 = CALL_EXPR_ARG (exp, 1);
11137 tree arg1 = CALL_EXPR_ARG (exp, 2);
11138 rtx op0 = expand_normal (arg0);
11139 rtx op1 = expand_normal (arg1);
11140 enum machine_mode tmode = SImode;
11141 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11142 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
11143 int cr6_form_int;
11144
11145 if (TREE_CODE (cr6_form) != INTEGER_CST)
11146 {
11147 error ("argument 1 of __builtin_altivec_predicate must be a constant");
11148 return const0_rtx;
11149 }
11150 else
11151 cr6_form_int = TREE_INT_CST_LOW (cr6_form);
11152
11153 gcc_assert (mode0 == mode1);
11154
11155 /* If we have invalid arguments, bail out before generating bad rtl. */
11156 if (arg0 == error_mark_node || arg1 == error_mark_node)
11157 return const0_rtx;
11158
11159 if (target == 0
11160 || GET_MODE (target) != tmode
11161 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11162 target = gen_reg_rtx (tmode);
11163
11164 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11165 op0 = copy_to_mode_reg (mode0, op0);
11166 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11167 op1 = copy_to_mode_reg (mode1, op1);
11168
11169 scratch = gen_reg_rtx (mode0);
11170
11171 pat = GEN_FCN (icode) (scratch, op0, op1);
11172 if (! pat)
11173 return 0;
11174 emit_insn (pat);
11175
11176 /* The vec_any* and vec_all* predicates use the same opcodes for two
11177 different operations, but the bits in CR6 will be different
11178 depending on what information we want. So we have to play tricks
11179 with CR6 to get the right bits out.
11180
11181 If you think this is disgusting, look at the specs for the
11182 AltiVec predicates. */
11183
11184 switch (cr6_form_int)
11185 {
11186 case 0:
11187 emit_insn (gen_cr6_test_for_zero (target));
11188 break;
11189 case 1:
11190 emit_insn (gen_cr6_test_for_zero_reverse (target));
11191 break;
11192 case 2:
11193 emit_insn (gen_cr6_test_for_lt (target));
11194 break;
11195 case 3:
11196 emit_insn (gen_cr6_test_for_lt_reverse (target));
11197 break;
11198 default:
11199 error ("argument 1 of __builtin_altivec_predicate is out of range");
11200 break;
11201 }
11202
11203 return target;
11204 }
11205
11206 static rtx
11207 paired_expand_lv_builtin (enum insn_code icode, tree exp, rtx target)
11208 {
11209 rtx pat, addr;
11210 tree arg0 = CALL_EXPR_ARG (exp, 0);
11211 tree arg1 = CALL_EXPR_ARG (exp, 1);
11212 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11213 enum machine_mode mode0 = Pmode;
11214 enum machine_mode mode1 = Pmode;
11215 rtx op0 = expand_normal (arg0);
11216 rtx op1 = expand_normal (arg1);
11217
11218 if (icode == CODE_FOR_nothing)
11219 /* Builtin not supported on this processor. */
11220 return 0;
11221
11222 /* If we got invalid arguments bail out before generating bad rtl. */
11223 if (arg0 == error_mark_node || arg1 == error_mark_node)
11224 return const0_rtx;
11225
11226 if (target == 0
11227 || GET_MODE (target) != tmode
11228 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11229 target = gen_reg_rtx (tmode);
11230
11231 op1 = copy_to_mode_reg (mode1, op1);
11232
11233 if (op0 == const0_rtx)
11234 {
11235 addr = gen_rtx_MEM (tmode, op1);
11236 }
11237 else
11238 {
11239 op0 = copy_to_mode_reg (mode0, op0);
11240 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op0, op1));
11241 }
11242
11243 pat = GEN_FCN (icode) (target, addr);
11244
11245 if (! pat)
11246 return 0;
11247 emit_insn (pat);
11248
11249 return target;
11250 }
11251
11252 static rtx
11253 altivec_expand_lv_builtin (enum insn_code icode, tree exp, rtx target, bool blk)
11254 {
11255 rtx pat, addr;
11256 tree arg0 = CALL_EXPR_ARG (exp, 0);
11257 tree arg1 = CALL_EXPR_ARG (exp, 1);
11258 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11259 enum machine_mode mode0 = Pmode;
11260 enum machine_mode mode1 = Pmode;
11261 rtx op0 = expand_normal (arg0);
11262 rtx op1 = expand_normal (arg1);
11263
11264 if (icode == CODE_FOR_nothing)
11265 /* Builtin not supported on this processor. */
11266 return 0;
11267
11268 /* If we got invalid arguments bail out before generating bad rtl. */
11269 if (arg0 == error_mark_node || arg1 == error_mark_node)
11270 return const0_rtx;
11271
11272 if (target == 0
11273 || GET_MODE (target) != tmode
11274 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11275 target = gen_reg_rtx (tmode);
11276
11277 op1 = copy_to_mode_reg (mode1, op1);
11278
11279 if (op0 == const0_rtx)
11280 {
11281 addr = gen_rtx_MEM (blk ? BLKmode : tmode, op1);
11282 }
11283 else
11284 {
11285 op0 = copy_to_mode_reg (mode0, op0);
11286 addr = gen_rtx_MEM (blk ? BLKmode : tmode, gen_rtx_PLUS (Pmode, op0, op1));
11287 }
11288
11289 pat = GEN_FCN (icode) (target, addr);
11290
11291 if (! pat)
11292 return 0;
11293 emit_insn (pat);
11294
11295 return target;
11296 }
11297
11298 static rtx
11299 spe_expand_stv_builtin (enum insn_code icode, tree exp)
11300 {
11301 tree arg0 = CALL_EXPR_ARG (exp, 0);
11302 tree arg1 = CALL_EXPR_ARG (exp, 1);
11303 tree arg2 = CALL_EXPR_ARG (exp, 2);
11304 rtx op0 = expand_normal (arg0);
11305 rtx op1 = expand_normal (arg1);
11306 rtx op2 = expand_normal (arg2);
11307 rtx pat;
11308 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
11309 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
11310 enum machine_mode mode2 = insn_data[icode].operand[2].mode;
11311
11312 /* Invalid arguments. Bail before doing anything stoopid! */
11313 if (arg0 == error_mark_node
11314 || arg1 == error_mark_node
11315 || arg2 == error_mark_node)
11316 return const0_rtx;
11317
11318 if (! (*insn_data[icode].operand[2].predicate) (op0, mode2))
11319 op0 = copy_to_mode_reg (mode2, op0);
11320 if (! (*insn_data[icode].operand[0].predicate) (op1, mode0))
11321 op1 = copy_to_mode_reg (mode0, op1);
11322 if (! (*insn_data[icode].operand[1].predicate) (op2, mode1))
11323 op2 = copy_to_mode_reg (mode1, op2);
11324
11325 pat = GEN_FCN (icode) (op1, op2, op0);
11326 if (pat)
11327 emit_insn (pat);
11328 return NULL_RTX;
11329 }
11330
11331 static rtx
11332 paired_expand_stv_builtin (enum insn_code icode, tree exp)
11333 {
11334 tree arg0 = CALL_EXPR_ARG (exp, 0);
11335 tree arg1 = CALL_EXPR_ARG (exp, 1);
11336 tree arg2 = CALL_EXPR_ARG (exp, 2);
11337 rtx op0 = expand_normal (arg0);
11338 rtx op1 = expand_normal (arg1);
11339 rtx op2 = expand_normal (arg2);
11340 rtx pat, addr;
11341 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11342 enum machine_mode mode1 = Pmode;
11343 enum machine_mode mode2 = Pmode;
11344
11345 /* Invalid arguments. Bail before doing anything stoopid! */
11346 if (arg0 == error_mark_node
11347 || arg1 == error_mark_node
11348 || arg2 == error_mark_node)
11349 return const0_rtx;
11350
11351 if (! (*insn_data[icode].operand[1].predicate) (op0, tmode))
11352 op0 = copy_to_mode_reg (tmode, op0);
11353
11354 op2 = copy_to_mode_reg (mode2, op2);
11355
11356 if (op1 == const0_rtx)
11357 {
11358 addr = gen_rtx_MEM (tmode, op2);
11359 }
11360 else
11361 {
11362 op1 = copy_to_mode_reg (mode1, op1);
11363 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op1, op2));
11364 }
11365
11366 pat = GEN_FCN (icode) (addr, op0);
11367 if (pat)
11368 emit_insn (pat);
11369 return NULL_RTX;
11370 }
11371
11372 static rtx
11373 altivec_expand_stv_builtin (enum insn_code icode, tree exp)
11374 {
11375 tree arg0 = CALL_EXPR_ARG (exp, 0);
11376 tree arg1 = CALL_EXPR_ARG (exp, 1);
11377 tree arg2 = CALL_EXPR_ARG (exp, 2);
11378 rtx op0 = expand_normal (arg0);
11379 rtx op1 = expand_normal (arg1);
11380 rtx op2 = expand_normal (arg2);
11381 rtx pat, addr;
11382 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11383 enum machine_mode smode = insn_data[icode].operand[1].mode;
11384 enum machine_mode mode1 = Pmode;
11385 enum machine_mode mode2 = Pmode;
11386
11387 /* Invalid arguments. Bail before doing anything stoopid! */
11388 if (arg0 == error_mark_node
11389 || arg1 == error_mark_node
11390 || arg2 == error_mark_node)
11391 return const0_rtx;
11392
11393 if (! (*insn_data[icode].operand[1].predicate) (op0, smode))
11394 op0 = copy_to_mode_reg (smode, op0);
11395
11396 op2 = copy_to_mode_reg (mode2, op2);
11397
11398 if (op1 == const0_rtx)
11399 {
11400 addr = gen_rtx_MEM (tmode, op2);
11401 }
11402 else
11403 {
11404 op1 = copy_to_mode_reg (mode1, op1);
11405 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op1, op2));
11406 }
11407
11408 pat = GEN_FCN (icode) (addr, op0);
11409 if (pat)
11410 emit_insn (pat);
11411 return NULL_RTX;
11412 }
11413
11414 /* Return the appropriate SPR number associated with the given builtin. */
11415 static inline HOST_WIDE_INT
11416 htm_spr_num (enum rs6000_builtins code)
11417 {
11418 if (code == HTM_BUILTIN_GET_TFHAR
11419 || code == HTM_BUILTIN_SET_TFHAR)
11420 return TFHAR_SPR;
11421 else if (code == HTM_BUILTIN_GET_TFIAR
11422 || code == HTM_BUILTIN_SET_TFIAR)
11423 return TFIAR_SPR;
11424 else if (code == HTM_BUILTIN_GET_TEXASR
11425 || code == HTM_BUILTIN_SET_TEXASR)
11426 return TEXASR_SPR;
11427 gcc_assert (code == HTM_BUILTIN_GET_TEXASRU
11428 || code == HTM_BUILTIN_SET_TEXASRU);
11429 return TEXASRU_SPR;
11430 }
11431
11432 /* Return the appropriate SPR regno associated with the given builtin. */
11433 static inline HOST_WIDE_INT
11434 htm_spr_regno (enum rs6000_builtins code)
11435 {
11436 if (code == HTM_BUILTIN_GET_TFHAR
11437 || code == HTM_BUILTIN_SET_TFHAR)
11438 return TFHAR_REGNO;
11439 else if (code == HTM_BUILTIN_GET_TFIAR
11440 || code == HTM_BUILTIN_SET_TFIAR)
11441 return TFIAR_REGNO;
11442 gcc_assert (code == HTM_BUILTIN_GET_TEXASR
11443 || code == HTM_BUILTIN_SET_TEXASR
11444 || code == HTM_BUILTIN_GET_TEXASRU
11445 || code == HTM_BUILTIN_SET_TEXASRU);
11446 return TEXASR_REGNO;
11447 }
11448
11449 /* Return the correct ICODE value depending on whether we are
11450 setting or reading the HTM SPRs. */
11451 static inline enum insn_code
11452 rs6000_htm_spr_icode (bool nonvoid)
11453 {
11454 if (nonvoid)
11455 return (TARGET_64BIT) ? CODE_FOR_htm_mfspr_di : CODE_FOR_htm_mfspr_si;
11456 else
11457 return (TARGET_64BIT) ? CODE_FOR_htm_mtspr_di : CODE_FOR_htm_mtspr_si;
11458 }
11459
11460 /* Expand the HTM builtin in EXP and store the result in TARGET.
11461 Store true in *EXPANDEDP if we found a builtin to expand. */
11462 static rtx
11463 htm_expand_builtin (tree exp, rtx target, bool * expandedp)
11464 {
11465 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
11466 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
11467 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
11468 const struct builtin_description *d;
11469 size_t i;
11470
11471 *expandedp = false;
11472
11473 /* Expand the HTM builtins. */
11474 d = bdesc_htm;
11475 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
11476 if (d->code == fcode)
11477 {
11478 rtx op[MAX_HTM_OPERANDS], pat;
11479 int nopnds = 0;
11480 tree arg;
11481 call_expr_arg_iterator iter;
11482 unsigned attr = rs6000_builtin_info[fcode].attr;
11483 enum insn_code icode = d->icode;
11484
11485 if (attr & RS6000_BTC_SPR)
11486 icode = rs6000_htm_spr_icode (nonvoid);
11487
11488 if (nonvoid)
11489 {
11490 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11491 if (!target
11492 || GET_MODE (target) != tmode
11493 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
11494 target = gen_reg_rtx (tmode);
11495 op[nopnds++] = target;
11496 }
11497
11498 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
11499 {
11500 const struct insn_operand_data *insn_op;
11501
11502 if (arg == error_mark_node || nopnds >= MAX_HTM_OPERANDS)
11503 return NULL_RTX;
11504
11505 insn_op = &insn_data[icode].operand[nopnds];
11506
11507 op[nopnds] = expand_normal (arg);
11508
11509 if (!(*insn_op->predicate) (op[nopnds], insn_op->mode))
11510 {
11511 if (!strcmp (insn_op->constraint, "n"))
11512 {
11513 int arg_num = (nonvoid) ? nopnds : nopnds + 1;
11514 if (!CONST_INT_P (op[nopnds]))
11515 error ("argument %d must be an unsigned literal", arg_num);
11516 else
11517 error ("argument %d is an unsigned literal that is "
11518 "out of range", arg_num);
11519 return const0_rtx;
11520 }
11521 op[nopnds] = copy_to_mode_reg (insn_op->mode, op[nopnds]);
11522 }
11523
11524 nopnds++;
11525 }
11526
11527 /* Handle the builtins for extended mnemonics. These accept
11528 no arguments, but map to builtins that take arguments. */
11529 switch (fcode)
11530 {
11531 case HTM_BUILTIN_TENDALL: /* Alias for: tend. 1 */
11532 case HTM_BUILTIN_TRESUME: /* Alias for: tsr. 1 */
11533 op[nopnds++] = GEN_INT (1);
11534 #ifdef ENABLE_CHECKING
11535 attr |= RS6000_BTC_UNARY;
11536 #endif
11537 break;
11538 case HTM_BUILTIN_TSUSPEND: /* Alias for: tsr. 0 */
11539 op[nopnds++] = GEN_INT (0);
11540 #ifdef ENABLE_CHECKING
11541 attr |= RS6000_BTC_UNARY;
11542 #endif
11543 break;
11544 default:
11545 break;
11546 }
11547
11548 /* If this builtin accesses SPRs, then pass in the appropriate
11549 SPR number and SPR regno as the last two operands. */
11550 if (attr & RS6000_BTC_SPR)
11551 {
11552 op[nopnds++] = gen_rtx_CONST_INT (Pmode, htm_spr_num (fcode));
11553 op[nopnds++] = gen_rtx_REG (Pmode, htm_spr_regno (fcode));
11554 }
11555
11556 #ifdef ENABLE_CHECKING
11557 int expected_nopnds = 0;
11558 if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_UNARY)
11559 expected_nopnds = 1;
11560 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_BINARY)
11561 expected_nopnds = 2;
11562 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_TERNARY)
11563 expected_nopnds = 3;
11564 if (!(attr & RS6000_BTC_VOID))
11565 expected_nopnds += 1;
11566 if (attr & RS6000_BTC_SPR)
11567 expected_nopnds += 2;
11568
11569 gcc_assert (nopnds == expected_nopnds && nopnds <= MAX_HTM_OPERANDS);
11570 #endif
11571
11572 switch (nopnds)
11573 {
11574 case 1:
11575 pat = GEN_FCN (icode) (op[0]);
11576 break;
11577 case 2:
11578 pat = GEN_FCN (icode) (op[0], op[1]);
11579 break;
11580 case 3:
11581 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
11582 break;
11583 case 4:
11584 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
11585 break;
11586 default:
11587 gcc_unreachable ();
11588 }
11589 if (!pat)
11590 return NULL_RTX;
11591 emit_insn (pat);
11592
11593 *expandedp = true;
11594 if (nonvoid)
11595 return target;
11596 return const0_rtx;
11597 }
11598
11599 return NULL_RTX;
11600 }
11601
11602 static rtx
11603 rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
11604 {
11605 rtx pat;
11606 tree arg0 = CALL_EXPR_ARG (exp, 0);
11607 tree arg1 = CALL_EXPR_ARG (exp, 1);
11608 tree arg2 = CALL_EXPR_ARG (exp, 2);
11609 rtx op0 = expand_normal (arg0);
11610 rtx op1 = expand_normal (arg1);
11611 rtx op2 = expand_normal (arg2);
11612 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11613 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11614 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
11615 enum machine_mode mode2 = insn_data[icode].operand[3].mode;
11616
11617 if (icode == CODE_FOR_nothing)
11618 /* Builtin not supported on this processor. */
11619 return 0;
11620
11621 /* If we got invalid arguments bail out before generating bad rtl. */
11622 if (arg0 == error_mark_node
11623 || arg1 == error_mark_node
11624 || arg2 == error_mark_node)
11625 return const0_rtx;
11626
11627 /* Check and prepare argument depending on the instruction code.
11628
11629 Note that a switch statement instead of the sequence of tests
11630 would be incorrect as many of the CODE_FOR values could be
11631 CODE_FOR_nothing and that would yield multiple alternatives
11632 with identical values. We'd never reach here at runtime in
11633 this case. */
11634 if (icode == CODE_FOR_altivec_vsldoi_v4sf
11635 || icode == CODE_FOR_altivec_vsldoi_v4si
11636 || icode == CODE_FOR_altivec_vsldoi_v8hi
11637 || icode == CODE_FOR_altivec_vsldoi_v16qi)
11638 {
11639 /* Only allow 4-bit unsigned literals. */
11640 STRIP_NOPS (arg2);
11641 if (TREE_CODE (arg2) != INTEGER_CST
11642 || TREE_INT_CST_LOW (arg2) & ~0xf)
11643 {
11644 error ("argument 3 must be a 4-bit unsigned literal");
11645 return const0_rtx;
11646 }
11647 }
11648 else if (icode == CODE_FOR_vsx_xxpermdi_v2df
11649 || icode == CODE_FOR_vsx_xxpermdi_v2di
11650 || icode == CODE_FOR_vsx_xxsldwi_v16qi
11651 || icode == CODE_FOR_vsx_xxsldwi_v8hi
11652 || icode == CODE_FOR_vsx_xxsldwi_v4si
11653 || icode == CODE_FOR_vsx_xxsldwi_v4sf
11654 || icode == CODE_FOR_vsx_xxsldwi_v2di
11655 || icode == CODE_FOR_vsx_xxsldwi_v2df)
11656 {
11657 /* Only allow 2-bit unsigned literals. */
11658 STRIP_NOPS (arg2);
11659 if (TREE_CODE (arg2) != INTEGER_CST
11660 || TREE_INT_CST_LOW (arg2) & ~0x3)
11661 {
11662 error ("argument 3 must be a 2-bit unsigned literal");
11663 return const0_rtx;
11664 }
11665 }
11666 else if (icode == CODE_FOR_vsx_set_v2df
11667 || icode == CODE_FOR_vsx_set_v2di)
11668 {
11669 /* Only allow 1-bit unsigned literals. */
11670 STRIP_NOPS (arg2);
11671 if (TREE_CODE (arg2) != INTEGER_CST
11672 || TREE_INT_CST_LOW (arg2) & ~0x1)
11673 {
11674 error ("argument 3 must be a 1-bit unsigned literal");
11675 return const0_rtx;
11676 }
11677 }
11678 else if (icode == CODE_FOR_crypto_vshasigmaw
11679 || icode == CODE_FOR_crypto_vshasigmad)
11680 {
11681 /* Check whether the 2nd and 3rd arguments are integer constants and in
11682 range and prepare arguments. */
11683 STRIP_NOPS (arg1);
11684 if (TREE_CODE (arg1) != INTEGER_CST
11685 || !IN_RANGE (TREE_INT_CST_LOW (arg1), 0, 1))
11686 {
11687 error ("argument 2 must be 0 or 1");
11688 return const0_rtx;
11689 }
11690
11691 STRIP_NOPS (arg2);
11692 if (TREE_CODE (arg2) != INTEGER_CST
11693 || !IN_RANGE (TREE_INT_CST_LOW (arg2), 0, 15))
11694 {
11695 error ("argument 3 must be in the range 0..15");
11696 return const0_rtx;
11697 }
11698 }
11699
11700 if (target == 0
11701 || GET_MODE (target) != tmode
11702 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11703 target = gen_reg_rtx (tmode);
11704
11705 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11706 op0 = copy_to_mode_reg (mode0, op0);
11707 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11708 op1 = copy_to_mode_reg (mode1, op1);
11709 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
11710 op2 = copy_to_mode_reg (mode2, op2);
11711
11712 if (TARGET_PAIRED_FLOAT && icode == CODE_FOR_selv2sf4)
11713 pat = GEN_FCN (icode) (target, op0, op1, op2, CONST0_RTX (SFmode));
11714 else
11715 pat = GEN_FCN (icode) (target, op0, op1, op2);
11716 if (! pat)
11717 return 0;
11718 emit_insn (pat);
11719
11720 return target;
11721 }
11722
11723 /* Expand the lvx builtins. */
11724 static rtx
11725 altivec_expand_ld_builtin (tree exp, rtx target, bool *expandedp)
11726 {
11727 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
11728 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
11729 tree arg0;
11730 enum machine_mode tmode, mode0;
11731 rtx pat, op0;
11732 enum insn_code icode;
11733
11734 switch (fcode)
11735 {
11736 case ALTIVEC_BUILTIN_LD_INTERNAL_16qi:
11737 icode = CODE_FOR_vector_altivec_load_v16qi;
11738 break;
11739 case ALTIVEC_BUILTIN_LD_INTERNAL_8hi:
11740 icode = CODE_FOR_vector_altivec_load_v8hi;
11741 break;
11742 case ALTIVEC_BUILTIN_LD_INTERNAL_4si:
11743 icode = CODE_FOR_vector_altivec_load_v4si;
11744 break;
11745 case ALTIVEC_BUILTIN_LD_INTERNAL_4sf:
11746 icode = CODE_FOR_vector_altivec_load_v4sf;
11747 break;
11748 case ALTIVEC_BUILTIN_LD_INTERNAL_2df:
11749 icode = CODE_FOR_vector_altivec_load_v2df;
11750 break;
11751 case ALTIVEC_BUILTIN_LD_INTERNAL_2di:
11752 icode = CODE_FOR_vector_altivec_load_v2di;
11753 break;
11754 default:
11755 *expandedp = false;
11756 return NULL_RTX;
11757 }
11758
11759 *expandedp = true;
11760
11761 arg0 = CALL_EXPR_ARG (exp, 0);
11762 op0 = expand_normal (arg0);
11763 tmode = insn_data[icode].operand[0].mode;
11764 mode0 = insn_data[icode].operand[1].mode;
11765
11766 if (target == 0
11767 || GET_MODE (target) != tmode
11768 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11769 target = gen_reg_rtx (tmode);
11770
11771 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11772 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
11773
11774 pat = GEN_FCN (icode) (target, op0);
11775 if (! pat)
11776 return 0;
11777 emit_insn (pat);
11778 return target;
11779 }
11780
11781 /* Expand the stvx builtins. */
11782 static rtx
11783 altivec_expand_st_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
11784 bool *expandedp)
11785 {
11786 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
11787 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
11788 tree arg0, arg1;
11789 enum machine_mode mode0, mode1;
11790 rtx pat, op0, op1;
11791 enum insn_code icode;
11792
11793 switch (fcode)
11794 {
11795 case ALTIVEC_BUILTIN_ST_INTERNAL_16qi:
11796 icode = CODE_FOR_vector_altivec_store_v16qi;
11797 break;
11798 case ALTIVEC_BUILTIN_ST_INTERNAL_8hi:
11799 icode = CODE_FOR_vector_altivec_store_v8hi;
11800 break;
11801 case ALTIVEC_BUILTIN_ST_INTERNAL_4si:
11802 icode = CODE_FOR_vector_altivec_store_v4si;
11803 break;
11804 case ALTIVEC_BUILTIN_ST_INTERNAL_4sf:
11805 icode = CODE_FOR_vector_altivec_store_v4sf;
11806 break;
11807 case ALTIVEC_BUILTIN_ST_INTERNAL_2df:
11808 icode = CODE_FOR_vector_altivec_store_v2df;
11809 break;
11810 case ALTIVEC_BUILTIN_ST_INTERNAL_2di:
11811 icode = CODE_FOR_vector_altivec_store_v2di;
11812 break;
11813 default:
11814 *expandedp = false;
11815 return NULL_RTX;
11816 }
11817
11818 arg0 = CALL_EXPR_ARG (exp, 0);
11819 arg1 = CALL_EXPR_ARG (exp, 1);
11820 op0 = expand_normal (arg0);
11821 op1 = expand_normal (arg1);
11822 mode0 = insn_data[icode].operand[0].mode;
11823 mode1 = insn_data[icode].operand[1].mode;
11824
11825 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
11826 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
11827 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
11828 op1 = copy_to_mode_reg (mode1, op1);
11829
11830 pat = GEN_FCN (icode) (op0, op1);
11831 if (pat)
11832 emit_insn (pat);
11833
11834 *expandedp = true;
11835 return NULL_RTX;
11836 }
11837
11838 /* Expand the dst builtins. */
11839 static rtx
11840 altivec_expand_dst_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
11841 bool *expandedp)
11842 {
11843 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
11844 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
11845 tree arg0, arg1, arg2;
11846 enum machine_mode mode0, mode1;
11847 rtx pat, op0, op1, op2;
11848 const struct builtin_description *d;
11849 size_t i;
11850
11851 *expandedp = false;
11852
11853 /* Handle DST variants. */
11854 d = bdesc_dst;
11855 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
11856 if (d->code == fcode)
11857 {
11858 arg0 = CALL_EXPR_ARG (exp, 0);
11859 arg1 = CALL_EXPR_ARG (exp, 1);
11860 arg2 = CALL_EXPR_ARG (exp, 2);
11861 op0 = expand_normal (arg0);
11862 op1 = expand_normal (arg1);
11863 op2 = expand_normal (arg2);
11864 mode0 = insn_data[d->icode].operand[0].mode;
11865 mode1 = insn_data[d->icode].operand[1].mode;
11866
11867 /* Invalid arguments, bail out before generating bad rtl. */
11868 if (arg0 == error_mark_node
11869 || arg1 == error_mark_node
11870 || arg2 == error_mark_node)
11871 return const0_rtx;
11872
11873 *expandedp = true;
11874 STRIP_NOPS (arg2);
11875 if (TREE_CODE (arg2) != INTEGER_CST
11876 || TREE_INT_CST_LOW (arg2) & ~0x3)
11877 {
11878 error ("argument to %qs must be a 2-bit unsigned literal", d->name);
11879 return const0_rtx;
11880 }
11881
11882 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
11883 op0 = copy_to_mode_reg (Pmode, op0);
11884 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
11885 op1 = copy_to_mode_reg (mode1, op1);
11886
11887 pat = GEN_FCN (d->icode) (op0, op1, op2);
11888 if (pat != 0)
11889 emit_insn (pat);
11890
11891 return NULL_RTX;
11892 }
11893
11894 return NULL_RTX;
11895 }
11896
11897 /* Expand vec_init builtin. */
11898 static rtx
11899 altivec_expand_vec_init_builtin (tree type, tree exp, rtx target)
11900 {
11901 enum machine_mode tmode = TYPE_MODE (type);
11902 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
11903 int i, n_elt = GET_MODE_NUNITS (tmode);
11904 rtvec v = rtvec_alloc (n_elt);
11905
11906 gcc_assert (VECTOR_MODE_P (tmode));
11907 gcc_assert (n_elt == call_expr_nargs (exp));
11908
11909 for (i = 0; i < n_elt; ++i)
11910 {
11911 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
11912 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
11913 }
11914
11915 if (!target || !register_operand (target, tmode))
11916 target = gen_reg_rtx (tmode);
11917
11918 rs6000_expand_vector_init (target, gen_rtx_PARALLEL (tmode, v));
11919 return target;
11920 }
11921
11922 /* Return the integer constant in ARG. Constrain it to be in the range
11923 of the subparts of VEC_TYPE; issue an error if not. */
11924
11925 static int
11926 get_element_number (tree vec_type, tree arg)
11927 {
11928 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
11929
11930 if (!host_integerp (arg, 1)
11931 || (elt = tree_low_cst (arg, 1), elt > max))
11932 {
11933 error ("selector must be an integer constant in the range 0..%wi", max);
11934 return 0;
11935 }
11936
11937 return elt;
11938 }
11939
11940 /* Expand vec_set builtin. */
11941 static rtx
11942 altivec_expand_vec_set_builtin (tree exp)
11943 {
11944 enum machine_mode tmode, mode1;
11945 tree arg0, arg1, arg2;
11946 int elt;
11947 rtx op0, op1;
11948
11949 arg0 = CALL_EXPR_ARG (exp, 0);
11950 arg1 = CALL_EXPR_ARG (exp, 1);
11951 arg2 = CALL_EXPR_ARG (exp, 2);
11952
11953 tmode = TYPE_MODE (TREE_TYPE (arg0));
11954 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
11955 gcc_assert (VECTOR_MODE_P (tmode));
11956
11957 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
11958 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
11959 elt = get_element_number (TREE_TYPE (arg0), arg2);
11960
11961 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
11962 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
11963
11964 op0 = force_reg (tmode, op0);
11965 op1 = force_reg (mode1, op1);
11966
11967 rs6000_expand_vector_set (op0, op1, elt);
11968
11969 return op0;
11970 }
11971
11972 /* Expand vec_ext builtin. */
11973 static rtx
11974 altivec_expand_vec_ext_builtin (tree exp, rtx target)
11975 {
11976 enum machine_mode tmode, mode0;
11977 tree arg0, arg1;
11978 int elt;
11979 rtx op0;
11980
11981 arg0 = CALL_EXPR_ARG (exp, 0);
11982 arg1 = CALL_EXPR_ARG (exp, 1);
11983
11984 op0 = expand_normal (arg0);
11985 elt = get_element_number (TREE_TYPE (arg0), arg1);
11986
11987 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
11988 mode0 = TYPE_MODE (TREE_TYPE (arg0));
11989 gcc_assert (VECTOR_MODE_P (mode0));
11990
11991 op0 = force_reg (mode0, op0);
11992
11993 if (optimize || !target || !register_operand (target, tmode))
11994 target = gen_reg_rtx (tmode);
11995
11996 rs6000_expand_vector_extract (target, op0, elt);
11997
11998 return target;
11999 }
12000
12001 /* Expand the builtin in EXP and store the result in TARGET. Store
12002 true in *EXPANDEDP if we found a builtin to expand. */
12003 static rtx
12004 altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
12005 {
12006 const struct builtin_description *d;
12007 size_t i;
12008 enum insn_code icode;
12009 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
12010 tree arg0;
12011 rtx op0, pat;
12012 enum machine_mode tmode, mode0;
12013 enum rs6000_builtins fcode
12014 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
12015
12016 if (rs6000_overloaded_builtin_p (fcode))
12017 {
12018 *expandedp = true;
12019 error ("unresolved overload for Altivec builtin %qF", fndecl);
12020
12021 /* Given it is invalid, just generate a normal call. */
12022 return expand_call (exp, target, false);
12023 }
12024
12025 target = altivec_expand_ld_builtin (exp, target, expandedp);
12026 if (*expandedp)
12027 return target;
12028
12029 target = altivec_expand_st_builtin (exp, target, expandedp);
12030 if (*expandedp)
12031 return target;
12032
12033 target = altivec_expand_dst_builtin (exp, target, expandedp);
12034 if (*expandedp)
12035 return target;
12036
12037 *expandedp = true;
12038
12039 switch (fcode)
12040 {
12041 case ALTIVEC_BUILTIN_STVX:
12042 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4si, exp);
12043 case ALTIVEC_BUILTIN_STVEBX:
12044 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvebx, exp);
12045 case ALTIVEC_BUILTIN_STVEHX:
12046 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvehx, exp);
12047 case ALTIVEC_BUILTIN_STVEWX:
12048 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvewx, exp);
12049 case ALTIVEC_BUILTIN_STVXL:
12050 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl, exp);
12051
12052 case ALTIVEC_BUILTIN_STVLX:
12053 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlx, exp);
12054 case ALTIVEC_BUILTIN_STVLXL:
12055 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlxl, exp);
12056 case ALTIVEC_BUILTIN_STVRX:
12057 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrx, exp);
12058 case ALTIVEC_BUILTIN_STVRXL:
12059 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrxl, exp);
12060
12061 case VSX_BUILTIN_STXVD2X_V2DF:
12062 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2df, exp);
12063 case VSX_BUILTIN_STXVD2X_V2DI:
12064 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2di, exp);
12065 case VSX_BUILTIN_STXVW4X_V4SF:
12066 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4sf, exp);
12067 case VSX_BUILTIN_STXVW4X_V4SI:
12068 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4si, exp);
12069 case VSX_BUILTIN_STXVW4X_V8HI:
12070 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v8hi, exp);
12071 case VSX_BUILTIN_STXVW4X_V16QI:
12072 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v16qi, exp);
12073
12074 case ALTIVEC_BUILTIN_MFVSCR:
12075 icode = CODE_FOR_altivec_mfvscr;
12076 tmode = insn_data[icode].operand[0].mode;
12077
12078 if (target == 0
12079 || GET_MODE (target) != tmode
12080 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12081 target = gen_reg_rtx (tmode);
12082
12083 pat = GEN_FCN (icode) (target);
12084 if (! pat)
12085 return 0;
12086 emit_insn (pat);
12087 return target;
12088
12089 case ALTIVEC_BUILTIN_MTVSCR:
12090 icode = CODE_FOR_altivec_mtvscr;
12091 arg0 = CALL_EXPR_ARG (exp, 0);
12092 op0 = expand_normal (arg0);
12093 mode0 = insn_data[icode].operand[0].mode;
12094
12095 /* If we got invalid arguments bail out before generating bad rtl. */
12096 if (arg0 == error_mark_node)
12097 return const0_rtx;
12098
12099 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
12100 op0 = copy_to_mode_reg (mode0, op0);
12101
12102 pat = GEN_FCN (icode) (op0);
12103 if (pat)
12104 emit_insn (pat);
12105 return NULL_RTX;
12106
12107 case ALTIVEC_BUILTIN_DSSALL:
12108 emit_insn (gen_altivec_dssall ());
12109 return NULL_RTX;
12110
12111 case ALTIVEC_BUILTIN_DSS:
12112 icode = CODE_FOR_altivec_dss;
12113 arg0 = CALL_EXPR_ARG (exp, 0);
12114 STRIP_NOPS (arg0);
12115 op0 = expand_normal (arg0);
12116 mode0 = insn_data[icode].operand[0].mode;
12117
12118 /* If we got invalid arguments bail out before generating bad rtl. */
12119 if (arg0 == error_mark_node)
12120 return const0_rtx;
12121
12122 if (TREE_CODE (arg0) != INTEGER_CST
12123 || TREE_INT_CST_LOW (arg0) & ~0x3)
12124 {
12125 error ("argument to dss must be a 2-bit unsigned literal");
12126 return const0_rtx;
12127 }
12128
12129 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
12130 op0 = copy_to_mode_reg (mode0, op0);
12131
12132 emit_insn (gen_altivec_dss (op0));
12133 return NULL_RTX;
12134
12135 case ALTIVEC_BUILTIN_VEC_INIT_V4SI:
12136 case ALTIVEC_BUILTIN_VEC_INIT_V8HI:
12137 case ALTIVEC_BUILTIN_VEC_INIT_V16QI:
12138 case ALTIVEC_BUILTIN_VEC_INIT_V4SF:
12139 case VSX_BUILTIN_VEC_INIT_V2DF:
12140 case VSX_BUILTIN_VEC_INIT_V2DI:
12141 return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
12142
12143 case ALTIVEC_BUILTIN_VEC_SET_V4SI:
12144 case ALTIVEC_BUILTIN_VEC_SET_V8HI:
12145 case ALTIVEC_BUILTIN_VEC_SET_V16QI:
12146 case ALTIVEC_BUILTIN_VEC_SET_V4SF:
12147 case VSX_BUILTIN_VEC_SET_V2DF:
12148 case VSX_BUILTIN_VEC_SET_V2DI:
12149 return altivec_expand_vec_set_builtin (exp);
12150
12151 case ALTIVEC_BUILTIN_VEC_EXT_V4SI:
12152 case ALTIVEC_BUILTIN_VEC_EXT_V8HI:
12153 case ALTIVEC_BUILTIN_VEC_EXT_V16QI:
12154 case ALTIVEC_BUILTIN_VEC_EXT_V4SF:
12155 case VSX_BUILTIN_VEC_EXT_V2DF:
12156 case VSX_BUILTIN_VEC_EXT_V2DI:
12157 return altivec_expand_vec_ext_builtin (exp, target);
12158
12159 default:
12160 break;
12161 /* Fall through. */
12162 }
12163
12164 /* Expand abs* operations. */
12165 d = bdesc_abs;
12166 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
12167 if (d->code == fcode)
12168 return altivec_expand_abs_builtin (d->icode, exp, target);
12169
12170 /* Expand the AltiVec predicates. */
12171 d = bdesc_altivec_preds;
12172 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
12173 if (d->code == fcode)
12174 return altivec_expand_predicate_builtin (d->icode, exp, target);
12175
12176 /* LV* are funky. We initialized them differently. */
12177 switch (fcode)
12178 {
12179 case ALTIVEC_BUILTIN_LVSL:
12180 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsl,
12181 exp, target, false);
12182 case ALTIVEC_BUILTIN_LVSR:
12183 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsr,
12184 exp, target, false);
12185 case ALTIVEC_BUILTIN_LVEBX:
12186 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvebx,
12187 exp, target, false);
12188 case ALTIVEC_BUILTIN_LVEHX:
12189 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvehx,
12190 exp, target, false);
12191 case ALTIVEC_BUILTIN_LVEWX:
12192 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvewx,
12193 exp, target, false);
12194 case ALTIVEC_BUILTIN_LVXL:
12195 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl,
12196 exp, target, false);
12197 case ALTIVEC_BUILTIN_LVX:
12198 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4si,
12199 exp, target, false);
12200 case ALTIVEC_BUILTIN_LVLX:
12201 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlx,
12202 exp, target, true);
12203 case ALTIVEC_BUILTIN_LVLXL:
12204 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlxl,
12205 exp, target, true);
12206 case ALTIVEC_BUILTIN_LVRX:
12207 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrx,
12208 exp, target, true);
12209 case ALTIVEC_BUILTIN_LVRXL:
12210 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrxl,
12211 exp, target, true);
12212 case VSX_BUILTIN_LXVD2X_V2DF:
12213 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2df,
12214 exp, target, false);
12215 case VSX_BUILTIN_LXVD2X_V2DI:
12216 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2di,
12217 exp, target, false);
12218 case VSX_BUILTIN_LXVW4X_V4SF:
12219 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4sf,
12220 exp, target, false);
12221 case VSX_BUILTIN_LXVW4X_V4SI:
12222 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4si,
12223 exp, target, false);
12224 case VSX_BUILTIN_LXVW4X_V8HI:
12225 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v8hi,
12226 exp, target, false);
12227 case VSX_BUILTIN_LXVW4X_V16QI:
12228 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v16qi,
12229 exp, target, false);
12230 break;
12231 default:
12232 break;
12233 /* Fall through. */
12234 }
12235
12236 *expandedp = false;
12237 return NULL_RTX;
12238 }
12239
12240 /* Expand the builtin in EXP and store the result in TARGET. Store
12241 true in *EXPANDEDP if we found a builtin to expand. */
12242 static rtx
12243 paired_expand_builtin (tree exp, rtx target, bool * expandedp)
12244 {
12245 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
12246 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
12247 const struct builtin_description *d;
12248 size_t i;
12249
12250 *expandedp = true;
12251
12252 switch (fcode)
12253 {
12254 case PAIRED_BUILTIN_STX:
12255 return paired_expand_stv_builtin (CODE_FOR_paired_stx, exp);
12256 case PAIRED_BUILTIN_LX:
12257 return paired_expand_lv_builtin (CODE_FOR_paired_lx, exp, target);
12258 default:
12259 break;
12260 /* Fall through. */
12261 }
12262
12263 /* Expand the paired predicates. */
12264 d = bdesc_paired_preds;
12265 for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); i++, d++)
12266 if (d->code == fcode)
12267 return paired_expand_predicate_builtin (d->icode, exp, target);
12268
12269 *expandedp = false;
12270 return NULL_RTX;
12271 }
12272
12273 /* Binops that need to be initialized manually, but can be expanded
12274 automagically by rs6000_expand_binop_builtin. */
12275 static const struct builtin_description bdesc_2arg_spe[] =
12276 {
12277 { RS6000_BTM_SPE, CODE_FOR_spe_evlddx, "__builtin_spe_evlddx", SPE_BUILTIN_EVLDDX },
12278 { RS6000_BTM_SPE, CODE_FOR_spe_evldwx, "__builtin_spe_evldwx", SPE_BUILTIN_EVLDWX },
12279 { RS6000_BTM_SPE, CODE_FOR_spe_evldhx, "__builtin_spe_evldhx", SPE_BUILTIN_EVLDHX },
12280 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhex, "__builtin_spe_evlwhex", SPE_BUILTIN_EVLWHEX },
12281 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhoux, "__builtin_spe_evlwhoux", SPE_BUILTIN_EVLWHOUX },
12282 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhosx, "__builtin_spe_evlwhosx", SPE_BUILTIN_EVLWHOSX },
12283 { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplatx, "__builtin_spe_evlwwsplatx", SPE_BUILTIN_EVLWWSPLATX },
12284 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplatx, "__builtin_spe_evlwhsplatx", SPE_BUILTIN_EVLWHSPLATX },
12285 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplatx, "__builtin_spe_evlhhesplatx", SPE_BUILTIN_EVLHHESPLATX },
12286 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplatx, "__builtin_spe_evlhhousplatx", SPE_BUILTIN_EVLHHOUSPLATX },
12287 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplatx, "__builtin_spe_evlhhossplatx", SPE_BUILTIN_EVLHHOSSPLATX },
12288 { RS6000_BTM_SPE, CODE_FOR_spe_evldd, "__builtin_spe_evldd", SPE_BUILTIN_EVLDD },
12289 { RS6000_BTM_SPE, CODE_FOR_spe_evldw, "__builtin_spe_evldw", SPE_BUILTIN_EVLDW },
12290 { RS6000_BTM_SPE, CODE_FOR_spe_evldh, "__builtin_spe_evldh", SPE_BUILTIN_EVLDH },
12291 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhe, "__builtin_spe_evlwhe", SPE_BUILTIN_EVLWHE },
12292 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhou, "__builtin_spe_evlwhou", SPE_BUILTIN_EVLWHOU },
12293 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhos, "__builtin_spe_evlwhos", SPE_BUILTIN_EVLWHOS },
12294 { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplat, "__builtin_spe_evlwwsplat", SPE_BUILTIN_EVLWWSPLAT },
12295 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplat, "__builtin_spe_evlwhsplat", SPE_BUILTIN_EVLWHSPLAT },
12296 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplat, "__builtin_spe_evlhhesplat", SPE_BUILTIN_EVLHHESPLAT },
12297 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplat, "__builtin_spe_evlhhousplat", SPE_BUILTIN_EVLHHOUSPLAT },
12298 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplat, "__builtin_spe_evlhhossplat", SPE_BUILTIN_EVLHHOSSPLAT }
12299 };
12300
12301 /* Expand the builtin in EXP and store the result in TARGET. Store
12302 true in *EXPANDEDP if we found a builtin to expand.
12303
12304 This expands the SPE builtins that are not simple unary and binary
12305 operations. */
12306 static rtx
12307 spe_expand_builtin (tree exp, rtx target, bool *expandedp)
12308 {
12309 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
12310 tree arg1, arg0;
12311 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
12312 enum insn_code icode;
12313 enum machine_mode tmode, mode0;
12314 rtx pat, op0;
12315 const struct builtin_description *d;
12316 size_t i;
12317
12318 *expandedp = true;
12319
12320 /* Syntax check for a 5-bit unsigned immediate. */
12321 switch (fcode)
12322 {
12323 case SPE_BUILTIN_EVSTDD:
12324 case SPE_BUILTIN_EVSTDH:
12325 case SPE_BUILTIN_EVSTDW:
12326 case SPE_BUILTIN_EVSTWHE:
12327 case SPE_BUILTIN_EVSTWHO:
12328 case SPE_BUILTIN_EVSTWWE:
12329 case SPE_BUILTIN_EVSTWWO:
12330 arg1 = CALL_EXPR_ARG (exp, 2);
12331 if (TREE_CODE (arg1) != INTEGER_CST
12332 || TREE_INT_CST_LOW (arg1) & ~0x1f)
12333 {
12334 error ("argument 2 must be a 5-bit unsigned literal");
12335 return const0_rtx;
12336 }
12337 break;
12338 default:
12339 break;
12340 }
12341
12342 /* The evsplat*i instructions are not quite generic. */
12343 switch (fcode)
12344 {
12345 case SPE_BUILTIN_EVSPLATFI:
12346 return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplatfi,
12347 exp, target);
12348 case SPE_BUILTIN_EVSPLATI:
12349 return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplati,
12350 exp, target);
12351 default:
12352 break;
12353 }
12354
12355 d = bdesc_2arg_spe;
12356 for (i = 0; i < ARRAY_SIZE (bdesc_2arg_spe); ++i, ++d)
12357 if (d->code == fcode)
12358 return rs6000_expand_binop_builtin (d->icode, exp, target);
12359
12360 d = bdesc_spe_predicates;
12361 for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, ++d)
12362 if (d->code == fcode)
12363 return spe_expand_predicate_builtin (d->icode, exp, target);
12364
12365 d = bdesc_spe_evsel;
12366 for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, ++d)
12367 if (d->code == fcode)
12368 return spe_expand_evsel_builtin (d->icode, exp, target);
12369
12370 switch (fcode)
12371 {
12372 case SPE_BUILTIN_EVSTDDX:
12373 return spe_expand_stv_builtin (CODE_FOR_spe_evstddx, exp);
12374 case SPE_BUILTIN_EVSTDHX:
12375 return spe_expand_stv_builtin (CODE_FOR_spe_evstdhx, exp);
12376 case SPE_BUILTIN_EVSTDWX:
12377 return spe_expand_stv_builtin (CODE_FOR_spe_evstdwx, exp);
12378 case SPE_BUILTIN_EVSTWHEX:
12379 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhex, exp);
12380 case SPE_BUILTIN_EVSTWHOX:
12381 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhox, exp);
12382 case SPE_BUILTIN_EVSTWWEX:
12383 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwex, exp);
12384 case SPE_BUILTIN_EVSTWWOX:
12385 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwox, exp);
12386 case SPE_BUILTIN_EVSTDD:
12387 return spe_expand_stv_builtin (CODE_FOR_spe_evstdd, exp);
12388 case SPE_BUILTIN_EVSTDH:
12389 return spe_expand_stv_builtin (CODE_FOR_spe_evstdh, exp);
12390 case SPE_BUILTIN_EVSTDW:
12391 return spe_expand_stv_builtin (CODE_FOR_spe_evstdw, exp);
12392 case SPE_BUILTIN_EVSTWHE:
12393 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhe, exp);
12394 case SPE_BUILTIN_EVSTWHO:
12395 return spe_expand_stv_builtin (CODE_FOR_spe_evstwho, exp);
12396 case SPE_BUILTIN_EVSTWWE:
12397 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwe, exp);
12398 case SPE_BUILTIN_EVSTWWO:
12399 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwo, exp);
12400 case SPE_BUILTIN_MFSPEFSCR:
12401 icode = CODE_FOR_spe_mfspefscr;
12402 tmode = insn_data[icode].operand[0].mode;
12403
12404 if (target == 0
12405 || GET_MODE (target) != tmode
12406 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12407 target = gen_reg_rtx (tmode);
12408
12409 pat = GEN_FCN (icode) (target);
12410 if (! pat)
12411 return 0;
12412 emit_insn (pat);
12413 return target;
12414 case SPE_BUILTIN_MTSPEFSCR:
12415 icode = CODE_FOR_spe_mtspefscr;
12416 arg0 = CALL_EXPR_ARG (exp, 0);
12417 op0 = expand_normal (arg0);
12418 mode0 = insn_data[icode].operand[0].mode;
12419
12420 if (arg0 == error_mark_node)
12421 return const0_rtx;
12422
12423 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
12424 op0 = copy_to_mode_reg (mode0, op0);
12425
12426 pat = GEN_FCN (icode) (op0);
12427 if (pat)
12428 emit_insn (pat);
12429 return NULL_RTX;
12430 default:
12431 break;
12432 }
12433
12434 *expandedp = false;
12435 return NULL_RTX;
12436 }
12437
12438 static rtx
12439 paired_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
12440 {
12441 rtx pat, scratch, tmp;
12442 tree form = CALL_EXPR_ARG (exp, 0);
12443 tree arg0 = CALL_EXPR_ARG (exp, 1);
12444 tree arg1 = CALL_EXPR_ARG (exp, 2);
12445 rtx op0 = expand_normal (arg0);
12446 rtx op1 = expand_normal (arg1);
12447 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12448 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
12449 int form_int;
12450 enum rtx_code code;
12451
12452 if (TREE_CODE (form) != INTEGER_CST)
12453 {
12454 error ("argument 1 of __builtin_paired_predicate must be a constant");
12455 return const0_rtx;
12456 }
12457 else
12458 form_int = TREE_INT_CST_LOW (form);
12459
12460 gcc_assert (mode0 == mode1);
12461
12462 if (arg0 == error_mark_node || arg1 == error_mark_node)
12463 return const0_rtx;
12464
12465 if (target == 0
12466 || GET_MODE (target) != SImode
12467 || !(*insn_data[icode].operand[0].predicate) (target, SImode))
12468 target = gen_reg_rtx (SImode);
12469 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
12470 op0 = copy_to_mode_reg (mode0, op0);
12471 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
12472 op1 = copy_to_mode_reg (mode1, op1);
12473
12474 scratch = gen_reg_rtx (CCFPmode);
12475
12476 pat = GEN_FCN (icode) (scratch, op0, op1);
12477 if (!pat)
12478 return const0_rtx;
12479
12480 emit_insn (pat);
12481
12482 switch (form_int)
12483 {
12484 /* LT bit. */
12485 case 0:
12486 code = LT;
12487 break;
12488 /* GT bit. */
12489 case 1:
12490 code = GT;
12491 break;
12492 /* EQ bit. */
12493 case 2:
12494 code = EQ;
12495 break;
12496 /* UN bit. */
12497 case 3:
12498 emit_insn (gen_move_from_CR_ov_bit (target, scratch));
12499 return target;
12500 default:
12501 error ("argument 1 of __builtin_paired_predicate is out of range");
12502 return const0_rtx;
12503 }
12504
12505 tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
12506 emit_move_insn (target, tmp);
12507 return target;
12508 }
12509
12510 static rtx
12511 spe_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
12512 {
12513 rtx pat, scratch, tmp;
12514 tree form = CALL_EXPR_ARG (exp, 0);
12515 tree arg0 = CALL_EXPR_ARG (exp, 1);
12516 tree arg1 = CALL_EXPR_ARG (exp, 2);
12517 rtx op0 = expand_normal (arg0);
12518 rtx op1 = expand_normal (arg1);
12519 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12520 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
12521 int form_int;
12522 enum rtx_code code;
12523
12524 if (TREE_CODE (form) != INTEGER_CST)
12525 {
12526 error ("argument 1 of __builtin_spe_predicate must be a constant");
12527 return const0_rtx;
12528 }
12529 else
12530 form_int = TREE_INT_CST_LOW (form);
12531
12532 gcc_assert (mode0 == mode1);
12533
12534 if (arg0 == error_mark_node || arg1 == error_mark_node)
12535 return const0_rtx;
12536
12537 if (target == 0
12538 || GET_MODE (target) != SImode
12539 || ! (*insn_data[icode].operand[0].predicate) (target, SImode))
12540 target = gen_reg_rtx (SImode);
12541
12542 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12543 op0 = copy_to_mode_reg (mode0, op0);
12544 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12545 op1 = copy_to_mode_reg (mode1, op1);
12546
12547 scratch = gen_reg_rtx (CCmode);
12548
12549 pat = GEN_FCN (icode) (scratch, op0, op1);
12550 if (! pat)
12551 return const0_rtx;
12552 emit_insn (pat);
12553
12554 /* There are 4 variants for each predicate: _any_, _all_, _upper_,
12555 _lower_. We use one compare, but look in different bits of the
12556 CR for each variant.
12557
12558 There are 2 elements in each SPE simd type (upper/lower). The CR
12559 bits are set as follows:
12560
12561 BIT0 | BIT 1 | BIT 2 | BIT 3
12562 U | L | (U | L) | (U & L)
12563
12564 So, for an "all" relationship, BIT 3 would be set.
12565 For an "any" relationship, BIT 2 would be set. Etc.
12566
12567 Following traditional nomenclature, these bits map to:
12568
12569 BIT0 | BIT 1 | BIT 2 | BIT 3
12570 LT | GT | EQ | OV
12571
12572 Later, we will generate rtl to look in the LT/EQ/EQ/OV bits.
12573 */
12574
12575 switch (form_int)
12576 {
12577 /* All variant. OV bit. */
12578 case 0:
12579 /* We need to get to the OV bit, which is the ORDERED bit. We
12580 could generate (ordered:SI (reg:CC xx) (const_int 0)), but
12581 that's ugly and will make validate_condition_mode die.
12582 So let's just use another pattern. */
12583 emit_insn (gen_move_from_CR_ov_bit (target, scratch));
12584 return target;
12585 /* Any variant. EQ bit. */
12586 case 1:
12587 code = EQ;
12588 break;
12589 /* Upper variant. LT bit. */
12590 case 2:
12591 code = LT;
12592 break;
12593 /* Lower variant. GT bit. */
12594 case 3:
12595 code = GT;
12596 break;
12597 default:
12598 error ("argument 1 of __builtin_spe_predicate is out of range");
12599 return const0_rtx;
12600 }
12601
12602 tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
12603 emit_move_insn (target, tmp);
12604
12605 return target;
12606 }
12607
12608 /* The evsel builtins look like this:
12609
12610 e = __builtin_spe_evsel_OP (a, b, c, d);
12611
12612 and work like this:
12613
12614 e[upper] = a[upper] *OP* b[upper] ? c[upper] : d[upper];
12615 e[lower] = a[lower] *OP* b[lower] ? c[lower] : d[lower];
12616 */
12617
12618 static rtx
12619 spe_expand_evsel_builtin (enum insn_code icode, tree exp, rtx target)
12620 {
12621 rtx pat, scratch;
12622 tree arg0 = CALL_EXPR_ARG (exp, 0);
12623 tree arg1 = CALL_EXPR_ARG (exp, 1);
12624 tree arg2 = CALL_EXPR_ARG (exp, 2);
12625 tree arg3 = CALL_EXPR_ARG (exp, 3);
12626 rtx op0 = expand_normal (arg0);
12627 rtx op1 = expand_normal (arg1);
12628 rtx op2 = expand_normal (arg2);
12629 rtx op3 = expand_normal (arg3);
12630 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12631 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
12632
12633 gcc_assert (mode0 == mode1);
12634
12635 if (arg0 == error_mark_node || arg1 == error_mark_node
12636 || arg2 == error_mark_node || arg3 == error_mark_node)
12637 return const0_rtx;
12638
12639 if (target == 0
12640 || GET_MODE (target) != mode0
12641 || ! (*insn_data[icode].operand[0].predicate) (target, mode0))
12642 target = gen_reg_rtx (mode0);
12643
12644 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12645 op0 = copy_to_mode_reg (mode0, op0);
12646 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12647 op1 = copy_to_mode_reg (mode0, op1);
12648 if (! (*insn_data[icode].operand[1].predicate) (op2, mode1))
12649 op2 = copy_to_mode_reg (mode0, op2);
12650 if (! (*insn_data[icode].operand[1].predicate) (op3, mode1))
12651 op3 = copy_to_mode_reg (mode0, op3);
12652
12653 /* Generate the compare. */
12654 scratch = gen_reg_rtx (CCmode);
12655 pat = GEN_FCN (icode) (scratch, op0, op1);
12656 if (! pat)
12657 return const0_rtx;
12658 emit_insn (pat);
12659
12660 if (mode0 == V2SImode)
12661 emit_insn (gen_spe_evsel (target, op2, op3, scratch));
12662 else
12663 emit_insn (gen_spe_evsel_fs (target, op2, op3, scratch));
12664
12665 return target;
12666 }
12667
12668 /* Raise an error message for a builtin function that is called without the
12669 appropriate target options being set. */
12670
12671 static void
12672 rs6000_invalid_builtin (enum rs6000_builtins fncode)
12673 {
12674 size_t uns_fncode = (size_t)fncode;
12675 const char *name = rs6000_builtin_info[uns_fncode].name;
12676 HOST_WIDE_INT fnmask = rs6000_builtin_info[uns_fncode].mask;
12677
12678 gcc_assert (name != NULL);
12679 if ((fnmask & RS6000_BTM_CELL) != 0)
12680 error ("Builtin function %s is only valid for the cell processor", name);
12681 else if ((fnmask & RS6000_BTM_VSX) != 0)
12682 error ("Builtin function %s requires the -mvsx option", name);
12683 else if ((fnmask & RS6000_BTM_HTM) != 0)
12684 error ("Builtin function %s requires the -mhtm option", name);
12685 else if ((fnmask & RS6000_BTM_ALTIVEC) != 0)
12686 error ("Builtin function %s requires the -maltivec option", name);
12687 else if ((fnmask & RS6000_BTM_PAIRED) != 0)
12688 error ("Builtin function %s requires the -mpaired option", name);
12689 else if ((fnmask & RS6000_BTM_SPE) != 0)
12690 error ("Builtin function %s requires the -mspe option", name);
12691 else
12692 error ("Builtin function %s is not supported with the current options",
12693 name);
12694 }
12695
12696 /* Expand an expression EXP that calls a built-in function,
12697 with result going to TARGET if that's convenient
12698 (and in mode MODE if that's convenient).
12699 SUBTARGET may be used as the target for computing one of EXP's operands.
12700 IGNORE is nonzero if the value is to be ignored. */
12701
12702 static rtx
12703 rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
12704 enum machine_mode mode ATTRIBUTE_UNUSED,
12705 int ignore ATTRIBUTE_UNUSED)
12706 {
12707 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
12708 enum rs6000_builtins fcode
12709 = (enum rs6000_builtins)DECL_FUNCTION_CODE (fndecl);
12710 size_t uns_fcode = (size_t)fcode;
12711 const struct builtin_description *d;
12712 size_t i;
12713 rtx ret;
12714 bool success;
12715 HOST_WIDE_INT mask = rs6000_builtin_info[uns_fcode].mask;
12716 bool func_valid_p = ((rs6000_builtin_mask & mask) == mask);
12717
12718 if (TARGET_DEBUG_BUILTIN)
12719 {
12720 enum insn_code icode = rs6000_builtin_info[uns_fcode].icode;
12721 const char *name1 = rs6000_builtin_info[uns_fcode].name;
12722 const char *name2 = ((icode != CODE_FOR_nothing)
12723 ? get_insn_name ((int)icode)
12724 : "nothing");
12725 const char *name3;
12726
12727 switch (rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK)
12728 {
12729 default: name3 = "unknown"; break;
12730 case RS6000_BTC_SPECIAL: name3 = "special"; break;
12731 case RS6000_BTC_UNARY: name3 = "unary"; break;
12732 case RS6000_BTC_BINARY: name3 = "binary"; break;
12733 case RS6000_BTC_TERNARY: name3 = "ternary"; break;
12734 case RS6000_BTC_PREDICATE: name3 = "predicate"; break;
12735 case RS6000_BTC_ABS: name3 = "abs"; break;
12736 case RS6000_BTC_EVSEL: name3 = "evsel"; break;
12737 case RS6000_BTC_DST: name3 = "dst"; break;
12738 }
12739
12740
12741 fprintf (stderr,
12742 "rs6000_expand_builtin, %s (%d), insn = %s (%d), type=%s%s\n",
12743 (name1) ? name1 : "---", fcode,
12744 (name2) ? name2 : "---", (int)icode,
12745 name3,
12746 func_valid_p ? "" : ", not valid");
12747 }
12748
12749 if (!func_valid_p)
12750 {
12751 rs6000_invalid_builtin (fcode);
12752
12753 /* Given it is invalid, just generate a normal call. */
12754 return expand_call (exp, target, ignore);
12755 }
12756
12757 switch (fcode)
12758 {
12759 case RS6000_BUILTIN_RECIP:
12760 return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target);
12761
12762 case RS6000_BUILTIN_RECIPF:
12763 return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target);
12764
12765 case RS6000_BUILTIN_RSQRTF:
12766 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
12767
12768 case RS6000_BUILTIN_RSQRT:
12769 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target);
12770
12771 case POWER7_BUILTIN_BPERMD:
12772 return rs6000_expand_binop_builtin (((TARGET_64BIT)
12773 ? CODE_FOR_bpermd_di
12774 : CODE_FOR_bpermd_si), exp, target);
12775
12776 case RS6000_BUILTIN_GET_TB:
12777 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase,
12778 target);
12779
12780 case RS6000_BUILTIN_MFTB:
12781 return rs6000_expand_zeroop_builtin (((TARGET_64BIT)
12782 ? CODE_FOR_rs6000_mftb_di
12783 : CODE_FOR_rs6000_mftb_si),
12784 target);
12785
12786 case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
12787 case ALTIVEC_BUILTIN_MASK_FOR_STORE:
12788 {
12789 int icode = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr
12790 : (int) CODE_FOR_altivec_lvsl);
12791 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12792 enum machine_mode mode = insn_data[icode].operand[1].mode;
12793 tree arg;
12794 rtx op, addr, pat;
12795
12796 gcc_assert (TARGET_ALTIVEC);
12797
12798 arg = CALL_EXPR_ARG (exp, 0);
12799 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
12800 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
12801 addr = memory_address (mode, op);
12802 if (fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE)
12803 op = addr;
12804 else
12805 {
12806 /* For the load case need to negate the address. */
12807 op = gen_reg_rtx (GET_MODE (addr));
12808 emit_insn (gen_rtx_SET (VOIDmode, op,
12809 gen_rtx_NEG (GET_MODE (addr), addr)));
12810 }
12811 op = gen_rtx_MEM (mode, op);
12812
12813 if (target == 0
12814 || GET_MODE (target) != tmode
12815 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12816 target = gen_reg_rtx (tmode);
12817
12818 /*pat = gen_altivec_lvsr (target, op);*/
12819 pat = GEN_FCN (icode) (target, op);
12820 if (!pat)
12821 return 0;
12822 emit_insn (pat);
12823
12824 return target;
12825 }
12826
12827 case ALTIVEC_BUILTIN_VCFUX:
12828 case ALTIVEC_BUILTIN_VCFSX:
12829 case ALTIVEC_BUILTIN_VCTUXS:
12830 case ALTIVEC_BUILTIN_VCTSXS:
12831 /* FIXME: There's got to be a nicer way to handle this case than
12832 constructing a new CALL_EXPR. */
12833 if (call_expr_nargs (exp) == 1)
12834 {
12835 exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp),
12836 2, CALL_EXPR_ARG (exp, 0), integer_zero_node);
12837 }
12838 break;
12839
12840 default:
12841 break;
12842 }
12843
12844 if (TARGET_ALTIVEC)
12845 {
12846 ret = altivec_expand_builtin (exp, target, &success);
12847
12848 if (success)
12849 return ret;
12850 }
12851 if (TARGET_SPE)
12852 {
12853 ret = spe_expand_builtin (exp, target, &success);
12854
12855 if (success)
12856 return ret;
12857 }
12858 if (TARGET_PAIRED_FLOAT)
12859 {
12860 ret = paired_expand_builtin (exp, target, &success);
12861
12862 if (success)
12863 return ret;
12864 }
12865 if (TARGET_HTM)
12866 {
12867 ret = htm_expand_builtin (exp, target, &success);
12868
12869 if (success)
12870 return ret;
12871 }
12872
12873 gcc_assert (TARGET_ALTIVEC || TARGET_VSX || TARGET_SPE || TARGET_PAIRED_FLOAT);
12874
12875 /* Handle simple unary operations. */
12876 d = bdesc_1arg;
12877 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
12878 if (d->code == fcode)
12879 return rs6000_expand_unop_builtin (d->icode, exp, target);
12880
12881 /* Handle simple binary operations. */
12882 d = bdesc_2arg;
12883 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
12884 if (d->code == fcode)
12885 return rs6000_expand_binop_builtin (d->icode, exp, target);
12886
12887 /* Handle simple ternary operations. */
12888 d = bdesc_3arg;
12889 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
12890 if (d->code == fcode)
12891 return rs6000_expand_ternop_builtin (d->icode, exp, target);
12892
12893 gcc_unreachable ();
12894 }
12895
12896 static void
12897 rs6000_init_builtins (void)
12898 {
12899 tree tdecl;
12900 tree ftype;
12901 enum machine_mode mode;
12902
12903 if (TARGET_DEBUG_BUILTIN)
12904 fprintf (stderr, "rs6000_init_builtins%s%s%s%s\n",
12905 (TARGET_PAIRED_FLOAT) ? ", paired" : "",
12906 (TARGET_SPE) ? ", spe" : "",
12907 (TARGET_ALTIVEC) ? ", altivec" : "",
12908 (TARGET_VSX) ? ", vsx" : "");
12909
12910 V2SI_type_node = build_vector_type (intSI_type_node, 2);
12911 V2SF_type_node = build_vector_type (float_type_node, 2);
12912 V2DI_type_node = build_vector_type (intDI_type_node, 2);
12913 V2DF_type_node = build_vector_type (double_type_node, 2);
12914 V4HI_type_node = build_vector_type (intHI_type_node, 4);
12915 V4SI_type_node = build_vector_type (intSI_type_node, 4);
12916 V4SF_type_node = build_vector_type (float_type_node, 4);
12917 V8HI_type_node = build_vector_type (intHI_type_node, 8);
12918 V16QI_type_node = build_vector_type (intQI_type_node, 16);
12919
12920 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
12921 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
12922 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
12923 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
12924
12925 opaque_V2SF_type_node = build_opaque_vector_type (float_type_node, 2);
12926 opaque_V2SI_type_node = build_opaque_vector_type (intSI_type_node, 2);
12927 opaque_p_V2SI_type_node = build_pointer_type (opaque_V2SI_type_node);
12928 opaque_V4SI_type_node = build_opaque_vector_type (intSI_type_node, 4);
12929
12930 /* The 'vector bool ...' types must be kept distinct from 'vector unsigned ...'
12931 types, especially in C++ land. Similarly, 'vector pixel' is distinct from
12932 'vector unsigned short'. */
12933
12934 bool_char_type_node = build_distinct_type_copy (unsigned_intQI_type_node);
12935 bool_short_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
12936 bool_int_type_node = build_distinct_type_copy (unsigned_intSI_type_node);
12937 bool_long_type_node = build_distinct_type_copy (unsigned_intDI_type_node);
12938 pixel_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
12939
12940 long_integer_type_internal_node = long_integer_type_node;
12941 long_unsigned_type_internal_node = long_unsigned_type_node;
12942 long_long_integer_type_internal_node = long_long_integer_type_node;
12943 long_long_unsigned_type_internal_node = long_long_unsigned_type_node;
12944 intQI_type_internal_node = intQI_type_node;
12945 uintQI_type_internal_node = unsigned_intQI_type_node;
12946 intHI_type_internal_node = intHI_type_node;
12947 uintHI_type_internal_node = unsigned_intHI_type_node;
12948 intSI_type_internal_node = intSI_type_node;
12949 uintSI_type_internal_node = unsigned_intSI_type_node;
12950 intDI_type_internal_node = intDI_type_node;
12951 uintDI_type_internal_node = unsigned_intDI_type_node;
12952 float_type_internal_node = float_type_node;
12953 double_type_internal_node = double_type_node;
12954 void_type_internal_node = void_type_node;
12955
12956 /* Initialize the modes for builtin_function_type, mapping a machine mode to
12957 tree type node. */
12958 builtin_mode_to_type[QImode][0] = integer_type_node;
12959 builtin_mode_to_type[HImode][0] = integer_type_node;
12960 builtin_mode_to_type[SImode][0] = intSI_type_node;
12961 builtin_mode_to_type[SImode][1] = unsigned_intSI_type_node;
12962 builtin_mode_to_type[DImode][0] = intDI_type_node;
12963 builtin_mode_to_type[DImode][1] = unsigned_intDI_type_node;
12964 builtin_mode_to_type[SFmode][0] = float_type_node;
12965 builtin_mode_to_type[DFmode][0] = double_type_node;
12966 builtin_mode_to_type[V2SImode][0] = V2SI_type_node;
12967 builtin_mode_to_type[V2SFmode][0] = V2SF_type_node;
12968 builtin_mode_to_type[V2DImode][0] = V2DI_type_node;
12969 builtin_mode_to_type[V2DImode][1] = unsigned_V2DI_type_node;
12970 builtin_mode_to_type[V2DFmode][0] = V2DF_type_node;
12971 builtin_mode_to_type[V4HImode][0] = V4HI_type_node;
12972 builtin_mode_to_type[V4SImode][0] = V4SI_type_node;
12973 builtin_mode_to_type[V4SImode][1] = unsigned_V4SI_type_node;
12974 builtin_mode_to_type[V4SFmode][0] = V4SF_type_node;
12975 builtin_mode_to_type[V8HImode][0] = V8HI_type_node;
12976 builtin_mode_to_type[V8HImode][1] = unsigned_V8HI_type_node;
12977 builtin_mode_to_type[V16QImode][0] = V16QI_type_node;
12978 builtin_mode_to_type[V16QImode][1] = unsigned_V16QI_type_node;
12979
12980 tdecl = add_builtin_type ("__bool char", bool_char_type_node);
12981 TYPE_NAME (bool_char_type_node) = tdecl;
12982
12983 tdecl = add_builtin_type ("__bool short", bool_short_type_node);
12984 TYPE_NAME (bool_short_type_node) = tdecl;
12985
12986 tdecl = add_builtin_type ("__bool int", bool_int_type_node);
12987 TYPE_NAME (bool_int_type_node) = tdecl;
12988
12989 tdecl = add_builtin_type ("__pixel", pixel_type_node);
12990 TYPE_NAME (pixel_type_node) = tdecl;
12991
12992 bool_V16QI_type_node = build_vector_type (bool_char_type_node, 16);
12993 bool_V8HI_type_node = build_vector_type (bool_short_type_node, 8);
12994 bool_V4SI_type_node = build_vector_type (bool_int_type_node, 4);
12995 bool_V2DI_type_node = build_vector_type (bool_long_type_node, 2);
12996 pixel_V8HI_type_node = build_vector_type (pixel_type_node, 8);
12997
12998 tdecl = add_builtin_type ("__vector unsigned char", unsigned_V16QI_type_node);
12999 TYPE_NAME (unsigned_V16QI_type_node) = tdecl;
13000
13001 tdecl = add_builtin_type ("__vector signed char", V16QI_type_node);
13002 TYPE_NAME (V16QI_type_node) = tdecl;
13003
13004 tdecl = add_builtin_type ("__vector __bool char", bool_V16QI_type_node);
13005 TYPE_NAME ( bool_V16QI_type_node) = tdecl;
13006
13007 tdecl = add_builtin_type ("__vector unsigned short", unsigned_V8HI_type_node);
13008 TYPE_NAME (unsigned_V8HI_type_node) = tdecl;
13009
13010 tdecl = add_builtin_type ("__vector signed short", V8HI_type_node);
13011 TYPE_NAME (V8HI_type_node) = tdecl;
13012
13013 tdecl = add_builtin_type ("__vector __bool short", bool_V8HI_type_node);
13014 TYPE_NAME (bool_V8HI_type_node) = tdecl;
13015
13016 tdecl = add_builtin_type ("__vector unsigned int", unsigned_V4SI_type_node);
13017 TYPE_NAME (unsigned_V4SI_type_node) = tdecl;
13018
13019 tdecl = add_builtin_type ("__vector signed int", V4SI_type_node);
13020 TYPE_NAME (V4SI_type_node) = tdecl;
13021
13022 tdecl = add_builtin_type ("__vector __bool int", bool_V4SI_type_node);
13023 TYPE_NAME (bool_V4SI_type_node) = tdecl;
13024
13025 tdecl = add_builtin_type ("__vector float", V4SF_type_node);
13026 TYPE_NAME (V4SF_type_node) = tdecl;
13027
13028 tdecl = add_builtin_type ("__vector __pixel", pixel_V8HI_type_node);
13029 TYPE_NAME (pixel_V8HI_type_node) = tdecl;
13030
13031 tdecl = add_builtin_type ("__vector double", V2DF_type_node);
13032 TYPE_NAME (V2DF_type_node) = tdecl;
13033
13034 tdecl = add_builtin_type ("__vector long", V2DI_type_node);
13035 TYPE_NAME (V2DI_type_node) = tdecl;
13036
13037 tdecl = add_builtin_type ("__vector unsigned long", unsigned_V2DI_type_node);
13038 TYPE_NAME (unsigned_V2DI_type_node) = tdecl;
13039
13040 tdecl = add_builtin_type ("__vector __bool long", bool_V2DI_type_node);
13041 TYPE_NAME (bool_V2DI_type_node) = tdecl;
13042
13043 /* Paired and SPE builtins are only available if you build a compiler with
13044 the appropriate options, so only create those builtins with the
13045 appropriate compiler option. Create Altivec and VSX builtins on machines
13046 with at least the general purpose extensions (970 and newer) to allow the
13047 use of the target attribute. */
13048 if (TARGET_PAIRED_FLOAT)
13049 paired_init_builtins ();
13050 if (TARGET_SPE)
13051 spe_init_builtins ();
13052 if (TARGET_EXTRA_BUILTINS)
13053 altivec_init_builtins ();
13054 if (TARGET_HTM)
13055 htm_init_builtins ();
13056
13057 if (TARGET_EXTRA_BUILTINS || TARGET_SPE || TARGET_PAIRED_FLOAT)
13058 rs6000_common_init_builtins ();
13059
13060 ftype = builtin_function_type (DFmode, DFmode, DFmode, VOIDmode,
13061 RS6000_BUILTIN_RECIP, "__builtin_recipdiv");
13062 def_builtin ("__builtin_recipdiv", ftype, RS6000_BUILTIN_RECIP);
13063
13064 ftype = builtin_function_type (SFmode, SFmode, SFmode, VOIDmode,
13065 RS6000_BUILTIN_RECIPF, "__builtin_recipdivf");
13066 def_builtin ("__builtin_recipdivf", ftype, RS6000_BUILTIN_RECIPF);
13067
13068 ftype = builtin_function_type (DFmode, DFmode, VOIDmode, VOIDmode,
13069 RS6000_BUILTIN_RSQRT, "__builtin_rsqrt");
13070 def_builtin ("__builtin_rsqrt", ftype, RS6000_BUILTIN_RSQRT);
13071
13072 ftype = builtin_function_type (SFmode, SFmode, VOIDmode, VOIDmode,
13073 RS6000_BUILTIN_RSQRTF, "__builtin_rsqrtf");
13074 def_builtin ("__builtin_rsqrtf", ftype, RS6000_BUILTIN_RSQRTF);
13075
13076 mode = (TARGET_64BIT) ? DImode : SImode;
13077 ftype = builtin_function_type (mode, mode, mode, VOIDmode,
13078 POWER7_BUILTIN_BPERMD, "__builtin_bpermd");
13079 def_builtin ("__builtin_bpermd", ftype, POWER7_BUILTIN_BPERMD);
13080
13081 ftype = build_function_type_list (unsigned_intDI_type_node,
13082 NULL_TREE);
13083 def_builtin ("__builtin_ppc_get_timebase", ftype, RS6000_BUILTIN_GET_TB);
13084
13085 if (TARGET_64BIT)
13086 ftype = build_function_type_list (unsigned_intDI_type_node,
13087 NULL_TREE);
13088 else
13089 ftype = build_function_type_list (unsigned_intSI_type_node,
13090 NULL_TREE);
13091 def_builtin ("__builtin_ppc_mftb", ftype, RS6000_BUILTIN_MFTB);
13092
13093 #if TARGET_XCOFF
13094 /* AIX libm provides clog as __clog. */
13095 if ((tdecl = builtin_decl_explicit (BUILT_IN_CLOG)) != NULL_TREE)
13096 set_user_assembler_name (tdecl, "__clog");
13097 #endif
13098
13099 #ifdef SUBTARGET_INIT_BUILTINS
13100 SUBTARGET_INIT_BUILTINS;
13101 #endif
13102 }
13103
13104 /* Returns the rs6000 builtin decl for CODE. */
13105
13106 static tree
13107 rs6000_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
13108 {
13109 HOST_WIDE_INT fnmask;
13110
13111 if (code >= RS6000_BUILTIN_COUNT)
13112 return error_mark_node;
13113
13114 fnmask = rs6000_builtin_info[code].mask;
13115 if ((fnmask & rs6000_builtin_mask) != fnmask)
13116 {
13117 rs6000_invalid_builtin ((enum rs6000_builtins)code);
13118 return error_mark_node;
13119 }
13120
13121 return rs6000_builtin_decls[code];
13122 }
13123
13124 static void
13125 spe_init_builtins (void)
13126 {
13127 tree puint_type_node = build_pointer_type (unsigned_type_node);
13128 tree pushort_type_node = build_pointer_type (short_unsigned_type_node);
13129 const struct builtin_description *d;
13130 size_t i;
13131
13132 tree v2si_ftype_4_v2si
13133 = build_function_type_list (opaque_V2SI_type_node,
13134 opaque_V2SI_type_node,
13135 opaque_V2SI_type_node,
13136 opaque_V2SI_type_node,
13137 opaque_V2SI_type_node,
13138 NULL_TREE);
13139
13140 tree v2sf_ftype_4_v2sf
13141 = build_function_type_list (opaque_V2SF_type_node,
13142 opaque_V2SF_type_node,
13143 opaque_V2SF_type_node,
13144 opaque_V2SF_type_node,
13145 opaque_V2SF_type_node,
13146 NULL_TREE);
13147
13148 tree int_ftype_int_v2si_v2si
13149 = build_function_type_list (integer_type_node,
13150 integer_type_node,
13151 opaque_V2SI_type_node,
13152 opaque_V2SI_type_node,
13153 NULL_TREE);
13154
13155 tree int_ftype_int_v2sf_v2sf
13156 = build_function_type_list (integer_type_node,
13157 integer_type_node,
13158 opaque_V2SF_type_node,
13159 opaque_V2SF_type_node,
13160 NULL_TREE);
13161
13162 tree void_ftype_v2si_puint_int
13163 = build_function_type_list (void_type_node,
13164 opaque_V2SI_type_node,
13165 puint_type_node,
13166 integer_type_node,
13167 NULL_TREE);
13168
13169 tree void_ftype_v2si_puint_char
13170 = build_function_type_list (void_type_node,
13171 opaque_V2SI_type_node,
13172 puint_type_node,
13173 char_type_node,
13174 NULL_TREE);
13175
13176 tree void_ftype_v2si_pv2si_int
13177 = build_function_type_list (void_type_node,
13178 opaque_V2SI_type_node,
13179 opaque_p_V2SI_type_node,
13180 integer_type_node,
13181 NULL_TREE);
13182
13183 tree void_ftype_v2si_pv2si_char
13184 = build_function_type_list (void_type_node,
13185 opaque_V2SI_type_node,
13186 opaque_p_V2SI_type_node,
13187 char_type_node,
13188 NULL_TREE);
13189
13190 tree void_ftype_int
13191 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
13192
13193 tree int_ftype_void
13194 = build_function_type_list (integer_type_node, NULL_TREE);
13195
13196 tree v2si_ftype_pv2si_int
13197 = build_function_type_list (opaque_V2SI_type_node,
13198 opaque_p_V2SI_type_node,
13199 integer_type_node,
13200 NULL_TREE);
13201
13202 tree v2si_ftype_puint_int
13203 = build_function_type_list (opaque_V2SI_type_node,
13204 puint_type_node,
13205 integer_type_node,
13206 NULL_TREE);
13207
13208 tree v2si_ftype_pushort_int
13209 = build_function_type_list (opaque_V2SI_type_node,
13210 pushort_type_node,
13211 integer_type_node,
13212 NULL_TREE);
13213
13214 tree v2si_ftype_signed_char
13215 = build_function_type_list (opaque_V2SI_type_node,
13216 signed_char_type_node,
13217 NULL_TREE);
13218
13219 add_builtin_type ("__ev64_opaque__", opaque_V2SI_type_node);
13220
13221 /* Initialize irregular SPE builtins. */
13222
13223 def_builtin ("__builtin_spe_mtspefscr", void_ftype_int, SPE_BUILTIN_MTSPEFSCR);
13224 def_builtin ("__builtin_spe_mfspefscr", int_ftype_void, SPE_BUILTIN_MFSPEFSCR);
13225 def_builtin ("__builtin_spe_evstddx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDDX);
13226 def_builtin ("__builtin_spe_evstdhx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDHX);
13227 def_builtin ("__builtin_spe_evstdwx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDWX);
13228 def_builtin ("__builtin_spe_evstwhex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHEX);
13229 def_builtin ("__builtin_spe_evstwhox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHOX);
13230 def_builtin ("__builtin_spe_evstwwex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWEX);
13231 def_builtin ("__builtin_spe_evstwwox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWOX);
13232 def_builtin ("__builtin_spe_evstdd", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDD);
13233 def_builtin ("__builtin_spe_evstdh", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDH);
13234 def_builtin ("__builtin_spe_evstdw", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDW);
13235 def_builtin ("__builtin_spe_evstwhe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHE);
13236 def_builtin ("__builtin_spe_evstwho", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHO);
13237 def_builtin ("__builtin_spe_evstwwe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWE);
13238 def_builtin ("__builtin_spe_evstwwo", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWO);
13239 def_builtin ("__builtin_spe_evsplatfi", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATFI);
13240 def_builtin ("__builtin_spe_evsplati", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATI);
13241
13242 /* Loads. */
13243 def_builtin ("__builtin_spe_evlddx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDDX);
13244 def_builtin ("__builtin_spe_evldwx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDWX);
13245 def_builtin ("__builtin_spe_evldhx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDHX);
13246 def_builtin ("__builtin_spe_evlwhex", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHEX);
13247 def_builtin ("__builtin_spe_evlwhoux", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOUX);
13248 def_builtin ("__builtin_spe_evlwhosx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOSX);
13249 def_builtin ("__builtin_spe_evlwwsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLATX);
13250 def_builtin ("__builtin_spe_evlwhsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLATX);
13251 def_builtin ("__builtin_spe_evlhhesplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLATX);
13252 def_builtin ("__builtin_spe_evlhhousplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLATX);
13253 def_builtin ("__builtin_spe_evlhhossplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLATX);
13254 def_builtin ("__builtin_spe_evldd", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDD);
13255 def_builtin ("__builtin_spe_evldw", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDW);
13256 def_builtin ("__builtin_spe_evldh", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDH);
13257 def_builtin ("__builtin_spe_evlhhesplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLAT);
13258 def_builtin ("__builtin_spe_evlhhossplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLAT);
13259 def_builtin ("__builtin_spe_evlhhousplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLAT);
13260 def_builtin ("__builtin_spe_evlwhe", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHE);
13261 def_builtin ("__builtin_spe_evlwhos", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOS);
13262 def_builtin ("__builtin_spe_evlwhou", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOU);
13263 def_builtin ("__builtin_spe_evlwhsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLAT);
13264 def_builtin ("__builtin_spe_evlwwsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLAT);
13265
13266 /* Predicates. */
13267 d = bdesc_spe_predicates;
13268 for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, d++)
13269 {
13270 tree type;
13271
13272 switch (insn_data[d->icode].operand[1].mode)
13273 {
13274 case V2SImode:
13275 type = int_ftype_int_v2si_v2si;
13276 break;
13277 case V2SFmode:
13278 type = int_ftype_int_v2sf_v2sf;
13279 break;
13280 default:
13281 gcc_unreachable ();
13282 }
13283
13284 def_builtin (d->name, type, d->code);
13285 }
13286
13287 /* Evsel predicates. */
13288 d = bdesc_spe_evsel;
13289 for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, d++)
13290 {
13291 tree type;
13292
13293 switch (insn_data[d->icode].operand[1].mode)
13294 {
13295 case V2SImode:
13296 type = v2si_ftype_4_v2si;
13297 break;
13298 case V2SFmode:
13299 type = v2sf_ftype_4_v2sf;
13300 break;
13301 default:
13302 gcc_unreachable ();
13303 }
13304
13305 def_builtin (d->name, type, d->code);
13306 }
13307 }
13308
13309 static void
13310 paired_init_builtins (void)
13311 {
13312 const struct builtin_description *d;
13313 size_t i;
13314
13315 tree int_ftype_int_v2sf_v2sf
13316 = build_function_type_list (integer_type_node,
13317 integer_type_node,
13318 V2SF_type_node,
13319 V2SF_type_node,
13320 NULL_TREE);
13321 tree pcfloat_type_node =
13322 build_pointer_type (build_qualified_type
13323 (float_type_node, TYPE_QUAL_CONST));
13324
13325 tree v2sf_ftype_long_pcfloat = build_function_type_list (V2SF_type_node,
13326 long_integer_type_node,
13327 pcfloat_type_node,
13328 NULL_TREE);
13329 tree void_ftype_v2sf_long_pcfloat =
13330 build_function_type_list (void_type_node,
13331 V2SF_type_node,
13332 long_integer_type_node,
13333 pcfloat_type_node,
13334 NULL_TREE);
13335
13336
13337 def_builtin ("__builtin_paired_lx", v2sf_ftype_long_pcfloat,
13338 PAIRED_BUILTIN_LX);
13339
13340
13341 def_builtin ("__builtin_paired_stx", void_ftype_v2sf_long_pcfloat,
13342 PAIRED_BUILTIN_STX);
13343
13344 /* Predicates. */
13345 d = bdesc_paired_preds;
13346 for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); ++i, d++)
13347 {
13348 tree type;
13349
13350 if (TARGET_DEBUG_BUILTIN)
13351 fprintf (stderr, "paired pred #%d, insn = %s [%d], mode = %s\n",
13352 (int)i, get_insn_name (d->icode), (int)d->icode,
13353 GET_MODE_NAME (insn_data[d->icode].operand[1].mode));
13354
13355 switch (insn_data[d->icode].operand[1].mode)
13356 {
13357 case V2SFmode:
13358 type = int_ftype_int_v2sf_v2sf;
13359 break;
13360 default:
13361 gcc_unreachable ();
13362 }
13363
13364 def_builtin (d->name, type, d->code);
13365 }
13366 }
13367
13368 static void
13369 altivec_init_builtins (void)
13370 {
13371 const struct builtin_description *d;
13372 size_t i;
13373 tree ftype;
13374 tree decl;
13375
13376 tree pvoid_type_node = build_pointer_type (void_type_node);
13377
13378 tree pcvoid_type_node
13379 = build_pointer_type (build_qualified_type (void_type_node,
13380 TYPE_QUAL_CONST));
13381
13382 tree int_ftype_opaque
13383 = build_function_type_list (integer_type_node,
13384 opaque_V4SI_type_node, NULL_TREE);
13385 tree opaque_ftype_opaque
13386 = build_function_type_list (integer_type_node, NULL_TREE);
13387 tree opaque_ftype_opaque_int
13388 = build_function_type_list (opaque_V4SI_type_node,
13389 opaque_V4SI_type_node, integer_type_node, NULL_TREE);
13390 tree opaque_ftype_opaque_opaque_int
13391 = build_function_type_list (opaque_V4SI_type_node,
13392 opaque_V4SI_type_node, opaque_V4SI_type_node,
13393 integer_type_node, NULL_TREE);
13394 tree int_ftype_int_opaque_opaque
13395 = build_function_type_list (integer_type_node,
13396 integer_type_node, opaque_V4SI_type_node,
13397 opaque_V4SI_type_node, NULL_TREE);
13398 tree int_ftype_int_v4si_v4si
13399 = build_function_type_list (integer_type_node,
13400 integer_type_node, V4SI_type_node,
13401 V4SI_type_node, NULL_TREE);
13402 tree int_ftype_int_v2di_v2di
13403 = build_function_type_list (integer_type_node,
13404 integer_type_node, V2DI_type_node,
13405 V2DI_type_node, NULL_TREE);
13406 tree void_ftype_v4si
13407 = build_function_type_list (void_type_node, V4SI_type_node, NULL_TREE);
13408 tree v8hi_ftype_void
13409 = build_function_type_list (V8HI_type_node, NULL_TREE);
13410 tree void_ftype_void
13411 = build_function_type_list (void_type_node, NULL_TREE);
13412 tree void_ftype_int
13413 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
13414
13415 tree opaque_ftype_long_pcvoid
13416 = build_function_type_list (opaque_V4SI_type_node,
13417 long_integer_type_node, pcvoid_type_node,
13418 NULL_TREE);
13419 tree v16qi_ftype_long_pcvoid
13420 = build_function_type_list (V16QI_type_node,
13421 long_integer_type_node, pcvoid_type_node,
13422 NULL_TREE);
13423 tree v8hi_ftype_long_pcvoid
13424 = build_function_type_list (V8HI_type_node,
13425 long_integer_type_node, pcvoid_type_node,
13426 NULL_TREE);
13427 tree v4si_ftype_long_pcvoid
13428 = build_function_type_list (V4SI_type_node,
13429 long_integer_type_node, pcvoid_type_node,
13430 NULL_TREE);
13431 tree v4sf_ftype_long_pcvoid
13432 = build_function_type_list (V4SF_type_node,
13433 long_integer_type_node, pcvoid_type_node,
13434 NULL_TREE);
13435 tree v2df_ftype_long_pcvoid
13436 = build_function_type_list (V2DF_type_node,
13437 long_integer_type_node, pcvoid_type_node,
13438 NULL_TREE);
13439 tree v2di_ftype_long_pcvoid
13440 = build_function_type_list (V2DI_type_node,
13441 long_integer_type_node, pcvoid_type_node,
13442 NULL_TREE);
13443
13444 tree void_ftype_opaque_long_pvoid
13445 = build_function_type_list (void_type_node,
13446 opaque_V4SI_type_node, long_integer_type_node,
13447 pvoid_type_node, NULL_TREE);
13448 tree void_ftype_v4si_long_pvoid
13449 = build_function_type_list (void_type_node,
13450 V4SI_type_node, long_integer_type_node,
13451 pvoid_type_node, NULL_TREE);
13452 tree void_ftype_v16qi_long_pvoid
13453 = build_function_type_list (void_type_node,
13454 V16QI_type_node, long_integer_type_node,
13455 pvoid_type_node, NULL_TREE);
13456 tree void_ftype_v8hi_long_pvoid
13457 = build_function_type_list (void_type_node,
13458 V8HI_type_node, long_integer_type_node,
13459 pvoid_type_node, NULL_TREE);
13460 tree void_ftype_v4sf_long_pvoid
13461 = build_function_type_list (void_type_node,
13462 V4SF_type_node, long_integer_type_node,
13463 pvoid_type_node, NULL_TREE);
13464 tree void_ftype_v2df_long_pvoid
13465 = build_function_type_list (void_type_node,
13466 V2DF_type_node, long_integer_type_node,
13467 pvoid_type_node, NULL_TREE);
13468 tree void_ftype_v2di_long_pvoid
13469 = build_function_type_list (void_type_node,
13470 V2DI_type_node, long_integer_type_node,
13471 pvoid_type_node, NULL_TREE);
13472 tree int_ftype_int_v8hi_v8hi
13473 = build_function_type_list (integer_type_node,
13474 integer_type_node, V8HI_type_node,
13475 V8HI_type_node, NULL_TREE);
13476 tree int_ftype_int_v16qi_v16qi
13477 = build_function_type_list (integer_type_node,
13478 integer_type_node, V16QI_type_node,
13479 V16QI_type_node, NULL_TREE);
13480 tree int_ftype_int_v4sf_v4sf
13481 = build_function_type_list (integer_type_node,
13482 integer_type_node, V4SF_type_node,
13483 V4SF_type_node, NULL_TREE);
13484 tree int_ftype_int_v2df_v2df
13485 = build_function_type_list (integer_type_node,
13486 integer_type_node, V2DF_type_node,
13487 V2DF_type_node, NULL_TREE);
13488 tree v2di_ftype_v2di
13489 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
13490 tree v4si_ftype_v4si
13491 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
13492 tree v8hi_ftype_v8hi
13493 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
13494 tree v16qi_ftype_v16qi
13495 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
13496 tree v4sf_ftype_v4sf
13497 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
13498 tree v2df_ftype_v2df
13499 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
13500 tree void_ftype_pcvoid_int_int
13501 = build_function_type_list (void_type_node,
13502 pcvoid_type_node, integer_type_node,
13503 integer_type_node, NULL_TREE);
13504
13505 def_builtin ("__builtin_altivec_mtvscr", void_ftype_v4si, ALTIVEC_BUILTIN_MTVSCR);
13506 def_builtin ("__builtin_altivec_mfvscr", v8hi_ftype_void, ALTIVEC_BUILTIN_MFVSCR);
13507 def_builtin ("__builtin_altivec_dssall", void_ftype_void, ALTIVEC_BUILTIN_DSSALL);
13508 def_builtin ("__builtin_altivec_dss", void_ftype_int, ALTIVEC_BUILTIN_DSS);
13509 def_builtin ("__builtin_altivec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSL);
13510 def_builtin ("__builtin_altivec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSR);
13511 def_builtin ("__builtin_altivec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEBX);
13512 def_builtin ("__builtin_altivec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEHX);
13513 def_builtin ("__builtin_altivec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEWX);
13514 def_builtin ("__builtin_altivec_lvxl", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVXL);
13515 def_builtin ("__builtin_altivec_lvx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVX);
13516 def_builtin ("__builtin_altivec_stvx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVX);
13517 def_builtin ("__builtin_altivec_stvewx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVEWX);
13518 def_builtin ("__builtin_altivec_stvxl", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVXL);
13519 def_builtin ("__builtin_altivec_stvebx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVEBX);
13520 def_builtin ("__builtin_altivec_stvehx", void_ftype_v8hi_long_pvoid, ALTIVEC_BUILTIN_STVEHX);
13521 def_builtin ("__builtin_vec_ld", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LD);
13522 def_builtin ("__builtin_vec_lde", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDE);
13523 def_builtin ("__builtin_vec_ldl", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDL);
13524 def_builtin ("__builtin_vec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSL);
13525 def_builtin ("__builtin_vec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSR);
13526 def_builtin ("__builtin_vec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEBX);
13527 def_builtin ("__builtin_vec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEHX);
13528 def_builtin ("__builtin_vec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEWX);
13529 def_builtin ("__builtin_vec_st", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_ST);
13530 def_builtin ("__builtin_vec_ste", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STE);
13531 def_builtin ("__builtin_vec_stl", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STL);
13532 def_builtin ("__builtin_vec_stvewx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEWX);
13533 def_builtin ("__builtin_vec_stvebx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEBX);
13534 def_builtin ("__builtin_vec_stvehx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEHX);
13535
13536 def_builtin ("__builtin_vsx_lxvd2x_v2df", v2df_ftype_long_pcvoid,
13537 VSX_BUILTIN_LXVD2X_V2DF);
13538 def_builtin ("__builtin_vsx_lxvd2x_v2di", v2di_ftype_long_pcvoid,
13539 VSX_BUILTIN_LXVD2X_V2DI);
13540 def_builtin ("__builtin_vsx_lxvw4x_v4sf", v4sf_ftype_long_pcvoid,
13541 VSX_BUILTIN_LXVW4X_V4SF);
13542 def_builtin ("__builtin_vsx_lxvw4x_v4si", v4si_ftype_long_pcvoid,
13543 VSX_BUILTIN_LXVW4X_V4SI);
13544 def_builtin ("__builtin_vsx_lxvw4x_v8hi", v8hi_ftype_long_pcvoid,
13545 VSX_BUILTIN_LXVW4X_V8HI);
13546 def_builtin ("__builtin_vsx_lxvw4x_v16qi", v16qi_ftype_long_pcvoid,
13547 VSX_BUILTIN_LXVW4X_V16QI);
13548 def_builtin ("__builtin_vsx_stxvd2x_v2df", void_ftype_v2df_long_pvoid,
13549 VSX_BUILTIN_STXVD2X_V2DF);
13550 def_builtin ("__builtin_vsx_stxvd2x_v2di", void_ftype_v2di_long_pvoid,
13551 VSX_BUILTIN_STXVD2X_V2DI);
13552 def_builtin ("__builtin_vsx_stxvw4x_v4sf", void_ftype_v4sf_long_pvoid,
13553 VSX_BUILTIN_STXVW4X_V4SF);
13554 def_builtin ("__builtin_vsx_stxvw4x_v4si", void_ftype_v4si_long_pvoid,
13555 VSX_BUILTIN_STXVW4X_V4SI);
13556 def_builtin ("__builtin_vsx_stxvw4x_v8hi", void_ftype_v8hi_long_pvoid,
13557 VSX_BUILTIN_STXVW4X_V8HI);
13558 def_builtin ("__builtin_vsx_stxvw4x_v16qi", void_ftype_v16qi_long_pvoid,
13559 VSX_BUILTIN_STXVW4X_V16QI);
13560 def_builtin ("__builtin_vec_vsx_ld", opaque_ftype_long_pcvoid,
13561 VSX_BUILTIN_VEC_LD);
13562 def_builtin ("__builtin_vec_vsx_st", void_ftype_opaque_long_pvoid,
13563 VSX_BUILTIN_VEC_ST);
13564
13565 def_builtin ("__builtin_vec_step", int_ftype_opaque, ALTIVEC_BUILTIN_VEC_STEP);
13566 def_builtin ("__builtin_vec_splats", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_SPLATS);
13567 def_builtin ("__builtin_vec_promote", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_PROMOTE);
13568
13569 def_builtin ("__builtin_vec_sld", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_SLD);
13570 def_builtin ("__builtin_vec_splat", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_SPLAT);
13571 def_builtin ("__builtin_vec_extract", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_EXTRACT);
13572 def_builtin ("__builtin_vec_insert", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_INSERT);
13573 def_builtin ("__builtin_vec_vspltw", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTW);
13574 def_builtin ("__builtin_vec_vsplth", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTH);
13575 def_builtin ("__builtin_vec_vspltb", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTB);
13576 def_builtin ("__builtin_vec_ctf", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTF);
13577 def_builtin ("__builtin_vec_vcfsx", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFSX);
13578 def_builtin ("__builtin_vec_vcfux", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFUX);
13579 def_builtin ("__builtin_vec_cts", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTS);
13580 def_builtin ("__builtin_vec_ctu", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTU);
13581
13582 /* Cell builtins. */
13583 def_builtin ("__builtin_altivec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLX);
13584 def_builtin ("__builtin_altivec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLXL);
13585 def_builtin ("__builtin_altivec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRX);
13586 def_builtin ("__builtin_altivec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRXL);
13587
13588 def_builtin ("__builtin_vec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLX);
13589 def_builtin ("__builtin_vec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLXL);
13590 def_builtin ("__builtin_vec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRX);
13591 def_builtin ("__builtin_vec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRXL);
13592
13593 def_builtin ("__builtin_altivec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLX);
13594 def_builtin ("__builtin_altivec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLXL);
13595 def_builtin ("__builtin_altivec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRX);
13596 def_builtin ("__builtin_altivec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRXL);
13597
13598 def_builtin ("__builtin_vec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLX);
13599 def_builtin ("__builtin_vec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLXL);
13600 def_builtin ("__builtin_vec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRX);
13601 def_builtin ("__builtin_vec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRXL);
13602
13603 /* Add the DST variants. */
13604 d = bdesc_dst;
13605 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
13606 def_builtin (d->name, void_ftype_pcvoid_int_int, d->code);
13607
13608 /* Initialize the predicates. */
13609 d = bdesc_altivec_preds;
13610 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
13611 {
13612 enum machine_mode mode1;
13613 tree type;
13614
13615 if (rs6000_overloaded_builtin_p (d->code))
13616 mode1 = VOIDmode;
13617 else
13618 mode1 = insn_data[d->icode].operand[1].mode;
13619
13620 switch (mode1)
13621 {
13622 case VOIDmode:
13623 type = int_ftype_int_opaque_opaque;
13624 break;
13625 case V2DImode:
13626 type = int_ftype_int_v2di_v2di;
13627 break;
13628 case V4SImode:
13629 type = int_ftype_int_v4si_v4si;
13630 break;
13631 case V8HImode:
13632 type = int_ftype_int_v8hi_v8hi;
13633 break;
13634 case V16QImode:
13635 type = int_ftype_int_v16qi_v16qi;
13636 break;
13637 case V4SFmode:
13638 type = int_ftype_int_v4sf_v4sf;
13639 break;
13640 case V2DFmode:
13641 type = int_ftype_int_v2df_v2df;
13642 break;
13643 default:
13644 gcc_unreachable ();
13645 }
13646
13647 def_builtin (d->name, type, d->code);
13648 }
13649
13650 /* Initialize the abs* operators. */
13651 d = bdesc_abs;
13652 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
13653 {
13654 enum machine_mode mode0;
13655 tree type;
13656
13657 mode0 = insn_data[d->icode].operand[0].mode;
13658
13659 switch (mode0)
13660 {
13661 case V2DImode:
13662 type = v2di_ftype_v2di;
13663 break;
13664 case V4SImode:
13665 type = v4si_ftype_v4si;
13666 break;
13667 case V8HImode:
13668 type = v8hi_ftype_v8hi;
13669 break;
13670 case V16QImode:
13671 type = v16qi_ftype_v16qi;
13672 break;
13673 case V4SFmode:
13674 type = v4sf_ftype_v4sf;
13675 break;
13676 case V2DFmode:
13677 type = v2df_ftype_v2df;
13678 break;
13679 default:
13680 gcc_unreachable ();
13681 }
13682
13683 def_builtin (d->name, type, d->code);
13684 }
13685
13686 /* Initialize target builtin that implements
13687 targetm.vectorize.builtin_mask_for_load. */
13688
13689 decl = add_builtin_function ("__builtin_altivec_mask_for_load",
13690 v16qi_ftype_long_pcvoid,
13691 ALTIVEC_BUILTIN_MASK_FOR_LOAD,
13692 BUILT_IN_MD, NULL, NULL_TREE);
13693 TREE_READONLY (decl) = 1;
13694 /* Record the decl. Will be used by rs6000_builtin_mask_for_load. */
13695 altivec_builtin_mask_for_load = decl;
13696
13697 /* Access to the vec_init patterns. */
13698 ftype = build_function_type_list (V4SI_type_node, integer_type_node,
13699 integer_type_node, integer_type_node,
13700 integer_type_node, NULL_TREE);
13701 def_builtin ("__builtin_vec_init_v4si", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SI);
13702
13703 ftype = build_function_type_list (V8HI_type_node, short_integer_type_node,
13704 short_integer_type_node,
13705 short_integer_type_node,
13706 short_integer_type_node,
13707 short_integer_type_node,
13708 short_integer_type_node,
13709 short_integer_type_node,
13710 short_integer_type_node, NULL_TREE);
13711 def_builtin ("__builtin_vec_init_v8hi", ftype, ALTIVEC_BUILTIN_VEC_INIT_V8HI);
13712
13713 ftype = build_function_type_list (V16QI_type_node, char_type_node,
13714 char_type_node, char_type_node,
13715 char_type_node, char_type_node,
13716 char_type_node, char_type_node,
13717 char_type_node, char_type_node,
13718 char_type_node, char_type_node,
13719 char_type_node, char_type_node,
13720 char_type_node, char_type_node,
13721 char_type_node, NULL_TREE);
13722 def_builtin ("__builtin_vec_init_v16qi", ftype,
13723 ALTIVEC_BUILTIN_VEC_INIT_V16QI);
13724
13725 ftype = build_function_type_list (V4SF_type_node, float_type_node,
13726 float_type_node, float_type_node,
13727 float_type_node, NULL_TREE);
13728 def_builtin ("__builtin_vec_init_v4sf", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SF);
13729
13730 /* VSX builtins. */
13731 ftype = build_function_type_list (V2DF_type_node, double_type_node,
13732 double_type_node, NULL_TREE);
13733 def_builtin ("__builtin_vec_init_v2df", ftype, VSX_BUILTIN_VEC_INIT_V2DF);
13734
13735 ftype = build_function_type_list (V2DI_type_node, intDI_type_node,
13736 intDI_type_node, NULL_TREE);
13737 def_builtin ("__builtin_vec_init_v2di", ftype, VSX_BUILTIN_VEC_INIT_V2DI);
13738
13739 /* Access to the vec_set patterns. */
13740 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
13741 intSI_type_node,
13742 integer_type_node, NULL_TREE);
13743 def_builtin ("__builtin_vec_set_v4si", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SI);
13744
13745 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
13746 intHI_type_node,
13747 integer_type_node, NULL_TREE);
13748 def_builtin ("__builtin_vec_set_v8hi", ftype, ALTIVEC_BUILTIN_VEC_SET_V8HI);
13749
13750 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
13751 intQI_type_node,
13752 integer_type_node, NULL_TREE);
13753 def_builtin ("__builtin_vec_set_v16qi", ftype, ALTIVEC_BUILTIN_VEC_SET_V16QI);
13754
13755 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
13756 float_type_node,
13757 integer_type_node, NULL_TREE);
13758 def_builtin ("__builtin_vec_set_v4sf", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SF);
13759
13760 ftype = build_function_type_list (V2DF_type_node, V2DF_type_node,
13761 double_type_node,
13762 integer_type_node, NULL_TREE);
13763 def_builtin ("__builtin_vec_set_v2df", ftype, VSX_BUILTIN_VEC_SET_V2DF);
13764
13765 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
13766 intDI_type_node,
13767 integer_type_node, NULL_TREE);
13768 def_builtin ("__builtin_vec_set_v2di", ftype, VSX_BUILTIN_VEC_SET_V2DI);
13769
13770 /* Access to the vec_extract patterns. */
13771 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
13772 integer_type_node, NULL_TREE);
13773 def_builtin ("__builtin_vec_ext_v4si", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SI);
13774
13775 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
13776 integer_type_node, NULL_TREE);
13777 def_builtin ("__builtin_vec_ext_v8hi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V8HI);
13778
13779 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
13780 integer_type_node, NULL_TREE);
13781 def_builtin ("__builtin_vec_ext_v16qi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V16QI);
13782
13783 ftype = build_function_type_list (float_type_node, V4SF_type_node,
13784 integer_type_node, NULL_TREE);
13785 def_builtin ("__builtin_vec_ext_v4sf", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SF);
13786
13787 ftype = build_function_type_list (double_type_node, V2DF_type_node,
13788 integer_type_node, NULL_TREE);
13789 def_builtin ("__builtin_vec_ext_v2df", ftype, VSX_BUILTIN_VEC_EXT_V2DF);
13790
13791 ftype = build_function_type_list (intDI_type_node, V2DI_type_node,
13792 integer_type_node, NULL_TREE);
13793 def_builtin ("__builtin_vec_ext_v2di", ftype, VSX_BUILTIN_VEC_EXT_V2DI);
13794 }
13795
13796 static void
13797 htm_init_builtins (void)
13798 {
13799 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
13800 const struct builtin_description *d;
13801 size_t i;
13802
13803 d = bdesc_htm;
13804 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
13805 {
13806 tree op[MAX_HTM_OPERANDS], type;
13807 HOST_WIDE_INT mask = d->mask;
13808 unsigned attr = rs6000_builtin_info[d->code].attr;
13809 bool void_func = (attr & RS6000_BTC_VOID);
13810 int attr_args = (attr & RS6000_BTC_TYPE_MASK);
13811 int nopnds = 0;
13812 tree argtype = (attr & RS6000_BTC_SPR) ? long_unsigned_type_node
13813 : unsigned_type_node;
13814
13815 if ((mask & builtin_mask) != mask)
13816 {
13817 if (TARGET_DEBUG_BUILTIN)
13818 fprintf (stderr, "htm_builtin, skip binary %s\n", d->name);
13819 continue;
13820 }
13821
13822 if (d->name == 0)
13823 {
13824 if (TARGET_DEBUG_BUILTIN)
13825 fprintf (stderr, "htm_builtin, bdesc_htm[%ld] no name\n",
13826 (long unsigned) i);
13827 continue;
13828 }
13829
13830 op[nopnds++] = (void_func) ? void_type_node : argtype;
13831
13832 if (attr_args == RS6000_BTC_UNARY)
13833 op[nopnds++] = argtype;
13834 else if (attr_args == RS6000_BTC_BINARY)
13835 {
13836 op[nopnds++] = argtype;
13837 op[nopnds++] = argtype;
13838 }
13839 else if (attr_args == RS6000_BTC_TERNARY)
13840 {
13841 op[nopnds++] = argtype;
13842 op[nopnds++] = argtype;
13843 op[nopnds++] = argtype;
13844 }
13845
13846 switch (nopnds)
13847 {
13848 case 1:
13849 type = build_function_type_list (op[0], NULL_TREE);
13850 break;
13851 case 2:
13852 type = build_function_type_list (op[0], op[1], NULL_TREE);
13853 break;
13854 case 3:
13855 type = build_function_type_list (op[0], op[1], op[2], NULL_TREE);
13856 break;
13857 case 4:
13858 type = build_function_type_list (op[0], op[1], op[2], op[3],
13859 NULL_TREE);
13860 break;
13861 default:
13862 gcc_unreachable ();
13863 }
13864
13865 def_builtin (d->name, type, d->code);
13866 }
13867 }
13868
13869 /* Hash function for builtin functions with up to 3 arguments and a return
13870 type. */
13871 static unsigned
13872 builtin_hash_function (const void *hash_entry)
13873 {
13874 unsigned ret = 0;
13875 int i;
13876 const struct builtin_hash_struct *bh =
13877 (const struct builtin_hash_struct *) hash_entry;
13878
13879 for (i = 0; i < 4; i++)
13880 {
13881 ret = (ret * (unsigned)MAX_MACHINE_MODE) + ((unsigned)bh->mode[i]);
13882 ret = (ret * 2) + bh->uns_p[i];
13883 }
13884
13885 return ret;
13886 }
13887
13888 /* Compare builtin hash entries H1 and H2 for equivalence. */
13889 static int
13890 builtin_hash_eq (const void *h1, const void *h2)
13891 {
13892 const struct builtin_hash_struct *p1 = (const struct builtin_hash_struct *) h1;
13893 const struct builtin_hash_struct *p2 = (const struct builtin_hash_struct *) h2;
13894
13895 return ((p1->mode[0] == p2->mode[0])
13896 && (p1->mode[1] == p2->mode[1])
13897 && (p1->mode[2] == p2->mode[2])
13898 && (p1->mode[3] == p2->mode[3])
13899 && (p1->uns_p[0] == p2->uns_p[0])
13900 && (p1->uns_p[1] == p2->uns_p[1])
13901 && (p1->uns_p[2] == p2->uns_p[2])
13902 && (p1->uns_p[3] == p2->uns_p[3]));
13903 }
13904
13905 /* Map types for builtin functions with an explicit return type and up to 3
13906 arguments. Functions with fewer than 3 arguments use VOIDmode as the type
13907 of the argument. */
13908 static tree
13909 builtin_function_type (enum machine_mode mode_ret, enum machine_mode mode_arg0,
13910 enum machine_mode mode_arg1, enum machine_mode mode_arg2,
13911 enum rs6000_builtins builtin, const char *name)
13912 {
13913 struct builtin_hash_struct h;
13914 struct builtin_hash_struct *h2;
13915 void **found;
13916 int num_args = 3;
13917 int i;
13918 tree ret_type = NULL_TREE;
13919 tree arg_type[3] = { NULL_TREE, NULL_TREE, NULL_TREE };
13920
13921 /* Create builtin_hash_table. */
13922 if (builtin_hash_table == NULL)
13923 builtin_hash_table = htab_create_ggc (1500, builtin_hash_function,
13924 builtin_hash_eq, NULL);
13925
13926 h.type = NULL_TREE;
13927 h.mode[0] = mode_ret;
13928 h.mode[1] = mode_arg0;
13929 h.mode[2] = mode_arg1;
13930 h.mode[3] = mode_arg2;
13931 h.uns_p[0] = 0;
13932 h.uns_p[1] = 0;
13933 h.uns_p[2] = 0;
13934 h.uns_p[3] = 0;
13935
13936 /* If the builtin is a type that produces unsigned results or takes unsigned
13937 arguments, and it is returned as a decl for the vectorizer (such as
13938 widening multiplies, permute), make sure the arguments and return value
13939 are type correct. */
13940 switch (builtin)
13941 {
13942 /* unsigned 1 argument functions. */
13943 case CRYPTO_BUILTIN_VSBOX:
13944 case P8V_BUILTIN_VGBBD:
13945 h.uns_p[0] = 1;
13946 h.uns_p[1] = 1;
13947 break;
13948
13949 /* unsigned 2 argument functions. */
13950 case ALTIVEC_BUILTIN_VMULEUB_UNS:
13951 case ALTIVEC_BUILTIN_VMULEUH_UNS:
13952 case ALTIVEC_BUILTIN_VMULOUB_UNS:
13953 case ALTIVEC_BUILTIN_VMULOUH_UNS:
13954 case CRYPTO_BUILTIN_VCIPHER:
13955 case CRYPTO_BUILTIN_VCIPHERLAST:
13956 case CRYPTO_BUILTIN_VNCIPHER:
13957 case CRYPTO_BUILTIN_VNCIPHERLAST:
13958 case CRYPTO_BUILTIN_VPMSUMB:
13959 case CRYPTO_BUILTIN_VPMSUMH:
13960 case CRYPTO_BUILTIN_VPMSUMW:
13961 case CRYPTO_BUILTIN_VPMSUMD:
13962 case CRYPTO_BUILTIN_VPMSUM:
13963 h.uns_p[0] = 1;
13964 h.uns_p[1] = 1;
13965 h.uns_p[2] = 1;
13966 break;
13967
13968 /* unsigned 3 argument functions. */
13969 case ALTIVEC_BUILTIN_VPERM_16QI_UNS:
13970 case ALTIVEC_BUILTIN_VPERM_8HI_UNS:
13971 case ALTIVEC_BUILTIN_VPERM_4SI_UNS:
13972 case ALTIVEC_BUILTIN_VPERM_2DI_UNS:
13973 case ALTIVEC_BUILTIN_VSEL_16QI_UNS:
13974 case ALTIVEC_BUILTIN_VSEL_8HI_UNS:
13975 case ALTIVEC_BUILTIN_VSEL_4SI_UNS:
13976 case ALTIVEC_BUILTIN_VSEL_2DI_UNS:
13977 case VSX_BUILTIN_VPERM_16QI_UNS:
13978 case VSX_BUILTIN_VPERM_8HI_UNS:
13979 case VSX_BUILTIN_VPERM_4SI_UNS:
13980 case VSX_BUILTIN_VPERM_2DI_UNS:
13981 case VSX_BUILTIN_XXSEL_16QI_UNS:
13982 case VSX_BUILTIN_XXSEL_8HI_UNS:
13983 case VSX_BUILTIN_XXSEL_4SI_UNS:
13984 case VSX_BUILTIN_XXSEL_2DI_UNS:
13985 case CRYPTO_BUILTIN_VPERMXOR:
13986 case CRYPTO_BUILTIN_VPERMXOR_V2DI:
13987 case CRYPTO_BUILTIN_VPERMXOR_V4SI:
13988 case CRYPTO_BUILTIN_VPERMXOR_V8HI:
13989 case CRYPTO_BUILTIN_VPERMXOR_V16QI:
13990 case CRYPTO_BUILTIN_VSHASIGMAW:
13991 case CRYPTO_BUILTIN_VSHASIGMAD:
13992 case CRYPTO_BUILTIN_VSHASIGMA:
13993 h.uns_p[0] = 1;
13994 h.uns_p[1] = 1;
13995 h.uns_p[2] = 1;
13996 h.uns_p[3] = 1;
13997 break;
13998
13999 /* signed permute functions with unsigned char mask. */
14000 case ALTIVEC_BUILTIN_VPERM_16QI:
14001 case ALTIVEC_BUILTIN_VPERM_8HI:
14002 case ALTIVEC_BUILTIN_VPERM_4SI:
14003 case ALTIVEC_BUILTIN_VPERM_4SF:
14004 case ALTIVEC_BUILTIN_VPERM_2DI:
14005 case ALTIVEC_BUILTIN_VPERM_2DF:
14006 case VSX_BUILTIN_VPERM_16QI:
14007 case VSX_BUILTIN_VPERM_8HI:
14008 case VSX_BUILTIN_VPERM_4SI:
14009 case VSX_BUILTIN_VPERM_4SF:
14010 case VSX_BUILTIN_VPERM_2DI:
14011 case VSX_BUILTIN_VPERM_2DF:
14012 h.uns_p[3] = 1;
14013 break;
14014
14015 /* unsigned args, signed return. */
14016 case VSX_BUILTIN_XVCVUXDDP_UNS:
14017 case ALTIVEC_BUILTIN_UNSFLOAT_V4SI_V4SF:
14018 h.uns_p[1] = 1;
14019 break;
14020
14021 /* signed args, unsigned return. */
14022 case VSX_BUILTIN_XVCVDPUXDS_UNS:
14023 case ALTIVEC_BUILTIN_FIXUNS_V4SF_V4SI:
14024 h.uns_p[0] = 1;
14025 break;
14026
14027 default:
14028 break;
14029 }
14030
14031 /* Figure out how many args are present. */
14032 while (num_args > 0 && h.mode[num_args] == VOIDmode)
14033 num_args--;
14034
14035 if (num_args == 0)
14036 fatal_error ("internal error: builtin function %s had no type", name);
14037
14038 ret_type = builtin_mode_to_type[h.mode[0]][h.uns_p[0]];
14039 if (!ret_type && h.uns_p[0])
14040 ret_type = builtin_mode_to_type[h.mode[0]][0];
14041
14042 if (!ret_type)
14043 fatal_error ("internal error: builtin function %s had an unexpected "
14044 "return type %s", name, GET_MODE_NAME (h.mode[0]));
14045
14046 for (i = 0; i < (int) ARRAY_SIZE (arg_type); i++)
14047 arg_type[i] = NULL_TREE;
14048
14049 for (i = 0; i < num_args; i++)
14050 {
14051 int m = (int) h.mode[i+1];
14052 int uns_p = h.uns_p[i+1];
14053
14054 arg_type[i] = builtin_mode_to_type[m][uns_p];
14055 if (!arg_type[i] && uns_p)
14056 arg_type[i] = builtin_mode_to_type[m][0];
14057
14058 if (!arg_type[i])
14059 fatal_error ("internal error: builtin function %s, argument %d "
14060 "had unexpected argument type %s", name, i,
14061 GET_MODE_NAME (m));
14062 }
14063
14064 found = htab_find_slot (builtin_hash_table, &h, INSERT);
14065 if (*found == NULL)
14066 {
14067 h2 = ggc_alloc_builtin_hash_struct ();
14068 *h2 = h;
14069 *found = (void *)h2;
14070
14071 h2->type = build_function_type_list (ret_type, arg_type[0], arg_type[1],
14072 arg_type[2], NULL_TREE);
14073 }
14074
14075 return ((struct builtin_hash_struct *)(*found))->type;
14076 }
14077
14078 static void
14079 rs6000_common_init_builtins (void)
14080 {
14081 const struct builtin_description *d;
14082 size_t i;
14083
14084 tree opaque_ftype_opaque = NULL_TREE;
14085 tree opaque_ftype_opaque_opaque = NULL_TREE;
14086 tree opaque_ftype_opaque_opaque_opaque = NULL_TREE;
14087 tree v2si_ftype_qi = NULL_TREE;
14088 tree v2si_ftype_v2si_qi = NULL_TREE;
14089 tree v2si_ftype_int_qi = NULL_TREE;
14090 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
14091
14092 if (!TARGET_PAIRED_FLOAT)
14093 {
14094 builtin_mode_to_type[V2SImode][0] = opaque_V2SI_type_node;
14095 builtin_mode_to_type[V2SFmode][0] = opaque_V2SF_type_node;
14096 }
14097
14098 /* Paired and SPE builtins are only available if you build a compiler with
14099 the appropriate options, so only create those builtins with the
14100 appropriate compiler option. Create Altivec and VSX builtins on machines
14101 with at least the general purpose extensions (970 and newer) to allow the
14102 use of the target attribute.. */
14103
14104 if (TARGET_EXTRA_BUILTINS)
14105 builtin_mask |= RS6000_BTM_COMMON;
14106
14107 /* Add the ternary operators. */
14108 d = bdesc_3arg;
14109 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
14110 {
14111 tree type;
14112 HOST_WIDE_INT mask = d->mask;
14113
14114 if ((mask & builtin_mask) != mask)
14115 {
14116 if (TARGET_DEBUG_BUILTIN)
14117 fprintf (stderr, "rs6000_builtin, skip ternary %s\n", d->name);
14118 continue;
14119 }
14120
14121 if (rs6000_overloaded_builtin_p (d->code))
14122 {
14123 if (! (type = opaque_ftype_opaque_opaque_opaque))
14124 type = opaque_ftype_opaque_opaque_opaque
14125 = build_function_type_list (opaque_V4SI_type_node,
14126 opaque_V4SI_type_node,
14127 opaque_V4SI_type_node,
14128 opaque_V4SI_type_node,
14129 NULL_TREE);
14130 }
14131 else
14132 {
14133 enum insn_code icode = d->icode;
14134 if (d->name == 0)
14135 {
14136 if (TARGET_DEBUG_BUILTIN)
14137 fprintf (stderr, "rs6000_builtin, bdesc_3arg[%ld] no name\n",
14138 (long unsigned)i);
14139
14140 continue;
14141 }
14142
14143 if (icode == CODE_FOR_nothing)
14144 {
14145 if (TARGET_DEBUG_BUILTIN)
14146 fprintf (stderr, "rs6000_builtin, skip ternary %s (no code)\n",
14147 d->name);
14148
14149 continue;
14150 }
14151
14152 type = builtin_function_type (insn_data[icode].operand[0].mode,
14153 insn_data[icode].operand[1].mode,
14154 insn_data[icode].operand[2].mode,
14155 insn_data[icode].operand[3].mode,
14156 d->code, d->name);
14157 }
14158
14159 def_builtin (d->name, type, d->code);
14160 }
14161
14162 /* Add the binary operators. */
14163 d = bdesc_2arg;
14164 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14165 {
14166 enum machine_mode mode0, mode1, mode2;
14167 tree type;
14168 HOST_WIDE_INT mask = d->mask;
14169
14170 if ((mask & builtin_mask) != mask)
14171 {
14172 if (TARGET_DEBUG_BUILTIN)
14173 fprintf (stderr, "rs6000_builtin, skip binary %s\n", d->name);
14174 continue;
14175 }
14176
14177 if (rs6000_overloaded_builtin_p (d->code))
14178 {
14179 if (! (type = opaque_ftype_opaque_opaque))
14180 type = opaque_ftype_opaque_opaque
14181 = build_function_type_list (opaque_V4SI_type_node,
14182 opaque_V4SI_type_node,
14183 opaque_V4SI_type_node,
14184 NULL_TREE);
14185 }
14186 else
14187 {
14188 enum insn_code icode = d->icode;
14189 if (d->name == 0)
14190 {
14191 if (TARGET_DEBUG_BUILTIN)
14192 fprintf (stderr, "rs6000_builtin, bdesc_2arg[%ld] no name\n",
14193 (long unsigned)i);
14194
14195 continue;
14196 }
14197
14198 if (icode == CODE_FOR_nothing)
14199 {
14200 if (TARGET_DEBUG_BUILTIN)
14201 fprintf (stderr, "rs6000_builtin, skip binary %s (no code)\n",
14202 d->name);
14203
14204 continue;
14205 }
14206
14207 mode0 = insn_data[icode].operand[0].mode;
14208 mode1 = insn_data[icode].operand[1].mode;
14209 mode2 = insn_data[icode].operand[2].mode;
14210
14211 if (mode0 == V2SImode && mode1 == V2SImode && mode2 == QImode)
14212 {
14213 if (! (type = v2si_ftype_v2si_qi))
14214 type = v2si_ftype_v2si_qi
14215 = build_function_type_list (opaque_V2SI_type_node,
14216 opaque_V2SI_type_node,
14217 char_type_node,
14218 NULL_TREE);
14219 }
14220
14221 else if (mode0 == V2SImode && GET_MODE_CLASS (mode1) == MODE_INT
14222 && mode2 == QImode)
14223 {
14224 if (! (type = v2si_ftype_int_qi))
14225 type = v2si_ftype_int_qi
14226 = build_function_type_list (opaque_V2SI_type_node,
14227 integer_type_node,
14228 char_type_node,
14229 NULL_TREE);
14230 }
14231
14232 else
14233 type = builtin_function_type (mode0, mode1, mode2, VOIDmode,
14234 d->code, d->name);
14235 }
14236
14237 def_builtin (d->name, type, d->code);
14238 }
14239
14240 /* Add the simple unary operators. */
14241 d = bdesc_1arg;
14242 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14243 {
14244 enum machine_mode mode0, mode1;
14245 tree type;
14246 HOST_WIDE_INT mask = d->mask;
14247
14248 if ((mask & builtin_mask) != mask)
14249 {
14250 if (TARGET_DEBUG_BUILTIN)
14251 fprintf (stderr, "rs6000_builtin, skip unary %s\n", d->name);
14252 continue;
14253 }
14254
14255 if (rs6000_overloaded_builtin_p (d->code))
14256 {
14257 if (! (type = opaque_ftype_opaque))
14258 type = opaque_ftype_opaque
14259 = build_function_type_list (opaque_V4SI_type_node,
14260 opaque_V4SI_type_node,
14261 NULL_TREE);
14262 }
14263 else
14264 {
14265 enum insn_code icode = d->icode;
14266 if (d->name == 0)
14267 {
14268 if (TARGET_DEBUG_BUILTIN)
14269 fprintf (stderr, "rs6000_builtin, bdesc_1arg[%ld] no name\n",
14270 (long unsigned)i);
14271
14272 continue;
14273 }
14274
14275 if (icode == CODE_FOR_nothing)
14276 {
14277 if (TARGET_DEBUG_BUILTIN)
14278 fprintf (stderr, "rs6000_builtin, skip unary %s (no code)\n",
14279 d->name);
14280
14281 continue;
14282 }
14283
14284 mode0 = insn_data[icode].operand[0].mode;
14285 mode1 = insn_data[icode].operand[1].mode;
14286
14287 if (mode0 == V2SImode && mode1 == QImode)
14288 {
14289 if (! (type = v2si_ftype_qi))
14290 type = v2si_ftype_qi
14291 = build_function_type_list (opaque_V2SI_type_node,
14292 char_type_node,
14293 NULL_TREE);
14294 }
14295
14296 else
14297 type = builtin_function_type (mode0, mode1, VOIDmode, VOIDmode,
14298 d->code, d->name);
14299 }
14300
14301 def_builtin (d->name, type, d->code);
14302 }
14303 }
14304
14305 static void
14306 rs6000_init_libfuncs (void)
14307 {
14308 if (!TARGET_IEEEQUAD)
14309 /* AIX/Darwin/64-bit Linux quad floating point routines. */
14310 if (!TARGET_XL_COMPAT)
14311 {
14312 set_optab_libfunc (add_optab, TFmode, "__gcc_qadd");
14313 set_optab_libfunc (sub_optab, TFmode, "__gcc_qsub");
14314 set_optab_libfunc (smul_optab, TFmode, "__gcc_qmul");
14315 set_optab_libfunc (sdiv_optab, TFmode, "__gcc_qdiv");
14316
14317 if (!(TARGET_HARD_FLOAT && (TARGET_FPRS || TARGET_E500_DOUBLE)))
14318 {
14319 set_optab_libfunc (neg_optab, TFmode, "__gcc_qneg");
14320 set_optab_libfunc (eq_optab, TFmode, "__gcc_qeq");
14321 set_optab_libfunc (ne_optab, TFmode, "__gcc_qne");
14322 set_optab_libfunc (gt_optab, TFmode, "__gcc_qgt");
14323 set_optab_libfunc (ge_optab, TFmode, "__gcc_qge");
14324 set_optab_libfunc (lt_optab, TFmode, "__gcc_qlt");
14325 set_optab_libfunc (le_optab, TFmode, "__gcc_qle");
14326
14327 set_conv_libfunc (sext_optab, TFmode, SFmode, "__gcc_stoq");
14328 set_conv_libfunc (sext_optab, TFmode, DFmode, "__gcc_dtoq");
14329 set_conv_libfunc (trunc_optab, SFmode, TFmode, "__gcc_qtos");
14330 set_conv_libfunc (trunc_optab, DFmode, TFmode, "__gcc_qtod");
14331 set_conv_libfunc (sfix_optab, SImode, TFmode, "__gcc_qtoi");
14332 set_conv_libfunc (ufix_optab, SImode, TFmode, "__gcc_qtou");
14333 set_conv_libfunc (sfloat_optab, TFmode, SImode, "__gcc_itoq");
14334 set_conv_libfunc (ufloat_optab, TFmode, SImode, "__gcc_utoq");
14335 }
14336
14337 if (!(TARGET_HARD_FLOAT && TARGET_FPRS))
14338 set_optab_libfunc (unord_optab, TFmode, "__gcc_qunord");
14339 }
14340 else
14341 {
14342 set_optab_libfunc (add_optab, TFmode, "_xlqadd");
14343 set_optab_libfunc (sub_optab, TFmode, "_xlqsub");
14344 set_optab_libfunc (smul_optab, TFmode, "_xlqmul");
14345 set_optab_libfunc (sdiv_optab, TFmode, "_xlqdiv");
14346 }
14347 else
14348 {
14349 /* 32-bit SVR4 quad floating point routines. */
14350
14351 set_optab_libfunc (add_optab, TFmode, "_q_add");
14352 set_optab_libfunc (sub_optab, TFmode, "_q_sub");
14353 set_optab_libfunc (neg_optab, TFmode, "_q_neg");
14354 set_optab_libfunc (smul_optab, TFmode, "_q_mul");
14355 set_optab_libfunc (sdiv_optab, TFmode, "_q_div");
14356 if (TARGET_PPC_GPOPT)
14357 set_optab_libfunc (sqrt_optab, TFmode, "_q_sqrt");
14358
14359 set_optab_libfunc (eq_optab, TFmode, "_q_feq");
14360 set_optab_libfunc (ne_optab, TFmode, "_q_fne");
14361 set_optab_libfunc (gt_optab, TFmode, "_q_fgt");
14362 set_optab_libfunc (ge_optab, TFmode, "_q_fge");
14363 set_optab_libfunc (lt_optab, TFmode, "_q_flt");
14364 set_optab_libfunc (le_optab, TFmode, "_q_fle");
14365
14366 set_conv_libfunc (sext_optab, TFmode, SFmode, "_q_stoq");
14367 set_conv_libfunc (sext_optab, TFmode, DFmode, "_q_dtoq");
14368 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_q_qtos");
14369 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_q_qtod");
14370 set_conv_libfunc (sfix_optab, SImode, TFmode, "_q_qtoi");
14371 set_conv_libfunc (ufix_optab, SImode, TFmode, "_q_qtou");
14372 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_q_itoq");
14373 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_q_utoq");
14374 }
14375 }
14376
14377 \f
14378 /* Expand a block clear operation, and return 1 if successful. Return 0
14379 if we should let the compiler generate normal code.
14380
14381 operands[0] is the destination
14382 operands[1] is the length
14383 operands[3] is the alignment */
14384
14385 int
14386 expand_block_clear (rtx operands[])
14387 {
14388 rtx orig_dest = operands[0];
14389 rtx bytes_rtx = operands[1];
14390 rtx align_rtx = operands[3];
14391 bool constp = (GET_CODE (bytes_rtx) == CONST_INT);
14392 HOST_WIDE_INT align;
14393 HOST_WIDE_INT bytes;
14394 int offset;
14395 int clear_bytes;
14396 int clear_step;
14397
14398 /* If this is not a fixed size move, just call memcpy */
14399 if (! constp)
14400 return 0;
14401
14402 /* This must be a fixed size alignment */
14403 gcc_assert (GET_CODE (align_rtx) == CONST_INT);
14404 align = INTVAL (align_rtx) * BITS_PER_UNIT;
14405
14406 /* Anything to clear? */
14407 bytes = INTVAL (bytes_rtx);
14408 if (bytes <= 0)
14409 return 1;
14410
14411 /* Use the builtin memset after a point, to avoid huge code bloat.
14412 When optimize_size, avoid any significant code bloat; calling
14413 memset is about 4 instructions, so allow for one instruction to
14414 load zero and three to do clearing. */
14415 if (TARGET_ALTIVEC && align >= 128)
14416 clear_step = 16;
14417 else if (TARGET_POWERPC64 && align >= 32)
14418 clear_step = 8;
14419 else if (TARGET_SPE && align >= 64)
14420 clear_step = 8;
14421 else
14422 clear_step = 4;
14423
14424 if (optimize_size && bytes > 3 * clear_step)
14425 return 0;
14426 if (! optimize_size && bytes > 8 * clear_step)
14427 return 0;
14428
14429 for (offset = 0; bytes > 0; offset += clear_bytes, bytes -= clear_bytes)
14430 {
14431 enum machine_mode mode = BLKmode;
14432 rtx dest;
14433
14434 if (bytes >= 16 && TARGET_ALTIVEC && align >= 128)
14435 {
14436 clear_bytes = 16;
14437 mode = V4SImode;
14438 }
14439 else if (bytes >= 8 && TARGET_SPE && align >= 64)
14440 {
14441 clear_bytes = 8;
14442 mode = V2SImode;
14443 }
14444 else if (bytes >= 8 && TARGET_POWERPC64
14445 /* 64-bit loads and stores require word-aligned
14446 displacements. */
14447 && (align >= 64 || (!STRICT_ALIGNMENT && align >= 32)))
14448 {
14449 clear_bytes = 8;
14450 mode = DImode;
14451 }
14452 else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT))
14453 { /* move 4 bytes */
14454 clear_bytes = 4;
14455 mode = SImode;
14456 }
14457 else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT))
14458 { /* move 2 bytes */
14459 clear_bytes = 2;
14460 mode = HImode;
14461 }
14462 else /* move 1 byte at a time */
14463 {
14464 clear_bytes = 1;
14465 mode = QImode;
14466 }
14467
14468 dest = adjust_address (orig_dest, mode, offset);
14469
14470 emit_move_insn (dest, CONST0_RTX (mode));
14471 }
14472
14473 return 1;
14474 }
14475
14476 \f
14477 /* Expand a block move operation, and return 1 if successful. Return 0
14478 if we should let the compiler generate normal code.
14479
14480 operands[0] is the destination
14481 operands[1] is the source
14482 operands[2] is the length
14483 operands[3] is the alignment */
14484
14485 #define MAX_MOVE_REG 4
14486
14487 int
14488 expand_block_move (rtx operands[])
14489 {
14490 rtx orig_dest = operands[0];
14491 rtx orig_src = operands[1];
14492 rtx bytes_rtx = operands[2];
14493 rtx align_rtx = operands[3];
14494 int constp = (GET_CODE (bytes_rtx) == CONST_INT);
14495 int align;
14496 int bytes;
14497 int offset;
14498 int move_bytes;
14499 rtx stores[MAX_MOVE_REG];
14500 int num_reg = 0;
14501
14502 /* If this is not a fixed size move, just call memcpy */
14503 if (! constp)
14504 return 0;
14505
14506 /* This must be a fixed size alignment */
14507 gcc_assert (GET_CODE (align_rtx) == CONST_INT);
14508 align = INTVAL (align_rtx) * BITS_PER_UNIT;
14509
14510 /* Anything to move? */
14511 bytes = INTVAL (bytes_rtx);
14512 if (bytes <= 0)
14513 return 1;
14514
14515 if (bytes > rs6000_block_move_inline_limit)
14516 return 0;
14517
14518 for (offset = 0; bytes > 0; offset += move_bytes, bytes -= move_bytes)
14519 {
14520 union {
14521 rtx (*movmemsi) (rtx, rtx, rtx, rtx);
14522 rtx (*mov) (rtx, rtx);
14523 } gen_func;
14524 enum machine_mode mode = BLKmode;
14525 rtx src, dest;
14526
14527 /* Altivec first, since it will be faster than a string move
14528 when it applies, and usually not significantly larger. */
14529 if (TARGET_ALTIVEC && bytes >= 16 && align >= 128)
14530 {
14531 move_bytes = 16;
14532 mode = V4SImode;
14533 gen_func.mov = gen_movv4si;
14534 }
14535 else if (TARGET_SPE && bytes >= 8 && align >= 64)
14536 {
14537 move_bytes = 8;
14538 mode = V2SImode;
14539 gen_func.mov = gen_movv2si;
14540 }
14541 else if (TARGET_STRING
14542 && bytes > 24 /* move up to 32 bytes at a time */
14543 && ! fixed_regs[5]
14544 && ! fixed_regs[6]
14545 && ! fixed_regs[7]
14546 && ! fixed_regs[8]
14547 && ! fixed_regs[9]
14548 && ! fixed_regs[10]
14549 && ! fixed_regs[11]
14550 && ! fixed_regs[12])
14551 {
14552 move_bytes = (bytes > 32) ? 32 : bytes;
14553 gen_func.movmemsi = gen_movmemsi_8reg;
14554 }
14555 else if (TARGET_STRING
14556 && bytes > 16 /* move up to 24 bytes at a time */
14557 && ! fixed_regs[5]
14558 && ! fixed_regs[6]
14559 && ! fixed_regs[7]
14560 && ! fixed_regs[8]
14561 && ! fixed_regs[9]
14562 && ! fixed_regs[10])
14563 {
14564 move_bytes = (bytes > 24) ? 24 : bytes;
14565 gen_func.movmemsi = gen_movmemsi_6reg;
14566 }
14567 else if (TARGET_STRING
14568 && bytes > 8 /* move up to 16 bytes at a time */
14569 && ! fixed_regs[5]
14570 && ! fixed_regs[6]
14571 && ! fixed_regs[7]
14572 && ! fixed_regs[8])
14573 {
14574 move_bytes = (bytes > 16) ? 16 : bytes;
14575 gen_func.movmemsi = gen_movmemsi_4reg;
14576 }
14577 else if (bytes >= 8 && TARGET_POWERPC64
14578 /* 64-bit loads and stores require word-aligned
14579 displacements. */
14580 && (align >= 64 || (!STRICT_ALIGNMENT && align >= 32)))
14581 {
14582 move_bytes = 8;
14583 mode = DImode;
14584 gen_func.mov = gen_movdi;
14585 }
14586 else if (TARGET_STRING && bytes > 4 && !TARGET_POWERPC64)
14587 { /* move up to 8 bytes at a time */
14588 move_bytes = (bytes > 8) ? 8 : bytes;
14589 gen_func.movmemsi = gen_movmemsi_2reg;
14590 }
14591 else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT))
14592 { /* move 4 bytes */
14593 move_bytes = 4;
14594 mode = SImode;
14595 gen_func.mov = gen_movsi;
14596 }
14597 else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT))
14598 { /* move 2 bytes */
14599 move_bytes = 2;
14600 mode = HImode;
14601 gen_func.mov = gen_movhi;
14602 }
14603 else if (TARGET_STRING && bytes > 1)
14604 { /* move up to 4 bytes at a time */
14605 move_bytes = (bytes > 4) ? 4 : bytes;
14606 gen_func.movmemsi = gen_movmemsi_1reg;
14607 }
14608 else /* move 1 byte at a time */
14609 {
14610 move_bytes = 1;
14611 mode = QImode;
14612 gen_func.mov = gen_movqi;
14613 }
14614
14615 src = adjust_address (orig_src, mode, offset);
14616 dest = adjust_address (orig_dest, mode, offset);
14617
14618 if (mode != BLKmode)
14619 {
14620 rtx tmp_reg = gen_reg_rtx (mode);
14621
14622 emit_insn ((*gen_func.mov) (tmp_reg, src));
14623 stores[num_reg++] = (*gen_func.mov) (dest, tmp_reg);
14624 }
14625
14626 if (mode == BLKmode || num_reg >= MAX_MOVE_REG || bytes == move_bytes)
14627 {
14628 int i;
14629 for (i = 0; i < num_reg; i++)
14630 emit_insn (stores[i]);
14631 num_reg = 0;
14632 }
14633
14634 if (mode == BLKmode)
14635 {
14636 /* Move the address into scratch registers. The movmemsi
14637 patterns require zero offset. */
14638 if (!REG_P (XEXP (src, 0)))
14639 {
14640 rtx src_reg = copy_addr_to_reg (XEXP (src, 0));
14641 src = replace_equiv_address (src, src_reg);
14642 }
14643 set_mem_size (src, move_bytes);
14644
14645 if (!REG_P (XEXP (dest, 0)))
14646 {
14647 rtx dest_reg = copy_addr_to_reg (XEXP (dest, 0));
14648 dest = replace_equiv_address (dest, dest_reg);
14649 }
14650 set_mem_size (dest, move_bytes);
14651
14652 emit_insn ((*gen_func.movmemsi) (dest, src,
14653 GEN_INT (move_bytes & 31),
14654 align_rtx));
14655 }
14656 }
14657
14658 return 1;
14659 }
14660
14661 \f
14662 /* Return a string to perform a load_multiple operation.
14663 operands[0] is the vector.
14664 operands[1] is the source address.
14665 operands[2] is the first destination register. */
14666
14667 const char *
14668 rs6000_output_load_multiple (rtx operands[3])
14669 {
14670 /* We have to handle the case where the pseudo used to contain the address
14671 is assigned to one of the output registers. */
14672 int i, j;
14673 int words = XVECLEN (operands[0], 0);
14674 rtx xop[10];
14675
14676 if (XVECLEN (operands[0], 0) == 1)
14677 return "lwz %2,0(%1)";
14678
14679 for (i = 0; i < words; i++)
14680 if (refers_to_regno_p (REGNO (operands[2]) + i,
14681 REGNO (operands[2]) + i + 1, operands[1], 0))
14682 {
14683 if (i == words-1)
14684 {
14685 xop[0] = GEN_INT (4 * (words-1));
14686 xop[1] = operands[1];
14687 xop[2] = operands[2];
14688 output_asm_insn ("lswi %2,%1,%0\n\tlwz %1,%0(%1)", xop);
14689 return "";
14690 }
14691 else if (i == 0)
14692 {
14693 xop[0] = GEN_INT (4 * (words-1));
14694 xop[1] = operands[1];
14695 xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14696 output_asm_insn ("addi %1,%1,4\n\tlswi %2,%1,%0\n\tlwz %1,-4(%1)", xop);
14697 return "";
14698 }
14699 else
14700 {
14701 for (j = 0; j < words; j++)
14702 if (j != i)
14703 {
14704 xop[0] = GEN_INT (j * 4);
14705 xop[1] = operands[1];
14706 xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + j);
14707 output_asm_insn ("lwz %2,%0(%1)", xop);
14708 }
14709 xop[0] = GEN_INT (i * 4);
14710 xop[1] = operands[1];
14711 output_asm_insn ("lwz %1,%0(%1)", xop);
14712 return "";
14713 }
14714 }
14715
14716 return "lswi %2,%1,%N0";
14717 }
14718
14719 \f
14720 /* A validation routine: say whether CODE, a condition code, and MODE
14721 match. The other alternatives either don't make sense or should
14722 never be generated. */
14723
14724 void
14725 validate_condition_mode (enum rtx_code code, enum machine_mode mode)
14726 {
14727 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
14728 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
14729 && GET_MODE_CLASS (mode) == MODE_CC);
14730
14731 /* These don't make sense. */
14732 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
14733 || mode != CCUNSmode);
14734
14735 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
14736 || mode == CCUNSmode);
14737
14738 gcc_assert (mode == CCFPmode
14739 || (code != ORDERED && code != UNORDERED
14740 && code != UNEQ && code != LTGT
14741 && code != UNGT && code != UNLT
14742 && code != UNGE && code != UNLE));
14743
14744 /* These should never be generated except for
14745 flag_finite_math_only. */
14746 gcc_assert (mode != CCFPmode
14747 || flag_finite_math_only
14748 || (code != LE && code != GE
14749 && code != UNEQ && code != LTGT
14750 && code != UNGT && code != UNLT));
14751
14752 /* These are invalid; the information is not there. */
14753 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
14754 }
14755
14756 \f
14757 /* Return 1 if ANDOP is a mask that has no bits on that are not in the
14758 mask required to convert the result of a rotate insn into a shift
14759 left insn of SHIFTOP bits. Both are known to be SImode CONST_INT. */
14760
14761 int
14762 includes_lshift_p (rtx shiftop, rtx andop)
14763 {
14764 unsigned HOST_WIDE_INT shift_mask = ~(unsigned HOST_WIDE_INT) 0;
14765
14766 shift_mask <<= INTVAL (shiftop);
14767
14768 return (INTVAL (andop) & 0xffffffff & ~shift_mask) == 0;
14769 }
14770
14771 /* Similar, but for right shift. */
14772
14773 int
14774 includes_rshift_p (rtx shiftop, rtx andop)
14775 {
14776 unsigned HOST_WIDE_INT shift_mask = ~(unsigned HOST_WIDE_INT) 0;
14777
14778 shift_mask >>= INTVAL (shiftop);
14779
14780 return (INTVAL (andop) & 0xffffffff & ~shift_mask) == 0;
14781 }
14782
14783 /* Return 1 if ANDOP is a mask suitable for use with an rldic insn
14784 to perform a left shift. It must have exactly SHIFTOP least
14785 significant 0's, then one or more 1's, then zero or more 0's. */
14786
14787 int
14788 includes_rldic_lshift_p (rtx shiftop, rtx andop)
14789 {
14790 if (GET_CODE (andop) == CONST_INT)
14791 {
14792 HOST_WIDE_INT c, lsb, shift_mask;
14793
14794 c = INTVAL (andop);
14795 if (c == 0 || c == ~0)
14796 return 0;
14797
14798 shift_mask = ~0;
14799 shift_mask <<= INTVAL (shiftop);
14800
14801 /* Find the least significant one bit. */
14802 lsb = c & -c;
14803
14804 /* It must coincide with the LSB of the shift mask. */
14805 if (-lsb != shift_mask)
14806 return 0;
14807
14808 /* Invert to look for the next transition (if any). */
14809 c = ~c;
14810
14811 /* Remove the low group of ones (originally low group of zeros). */
14812 c &= -lsb;
14813
14814 /* Again find the lsb, and check we have all 1's above. */
14815 lsb = c & -c;
14816 return c == -lsb;
14817 }
14818 else
14819 return 0;
14820 }
14821
14822 /* Return 1 if ANDOP is a mask suitable for use with an rldicr insn
14823 to perform a left shift. It must have SHIFTOP or more least
14824 significant 0's, with the remainder of the word 1's. */
14825
14826 int
14827 includes_rldicr_lshift_p (rtx shiftop, rtx andop)
14828 {
14829 if (GET_CODE (andop) == CONST_INT)
14830 {
14831 HOST_WIDE_INT c, lsb, shift_mask;
14832
14833 shift_mask = ~0;
14834 shift_mask <<= INTVAL (shiftop);
14835 c = INTVAL (andop);
14836
14837 /* Find the least significant one bit. */
14838 lsb = c & -c;
14839
14840 /* It must be covered by the shift mask.
14841 This test also rejects c == 0. */
14842 if ((lsb & shift_mask) == 0)
14843 return 0;
14844
14845 /* Check we have all 1's above the transition, and reject all 1's. */
14846 return c == -lsb && lsb != 1;
14847 }
14848 else
14849 return 0;
14850 }
14851
14852 /* Return 1 if operands will generate a valid arguments to rlwimi
14853 instruction for insert with right shift in 64-bit mode. The mask may
14854 not start on the first bit or stop on the last bit because wrap-around
14855 effects of instruction do not correspond to semantics of RTL insn. */
14856
14857 int
14858 insvdi_rshift_rlwimi_p (rtx sizeop, rtx startop, rtx shiftop)
14859 {
14860 if (INTVAL (startop) > 32
14861 && INTVAL (startop) < 64
14862 && INTVAL (sizeop) > 1
14863 && INTVAL (sizeop) + INTVAL (startop) < 64
14864 && INTVAL (shiftop) > 0
14865 && INTVAL (sizeop) + INTVAL (shiftop) < 32
14866 && (64 - (INTVAL (shiftop) & 63)) >= INTVAL (sizeop))
14867 return 1;
14868
14869 return 0;
14870 }
14871
14872 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
14873 for lfq and stfq insns iff the registers are hard registers. */
14874
14875 int
14876 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
14877 {
14878 /* We might have been passed a SUBREG. */
14879 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
14880 return 0;
14881
14882 /* We might have been passed non floating point registers. */
14883 if (!FP_REGNO_P (REGNO (reg1))
14884 || !FP_REGNO_P (REGNO (reg2)))
14885 return 0;
14886
14887 return (REGNO (reg1) == REGNO (reg2) - 1);
14888 }
14889
14890 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
14891 addr1 and addr2 must be in consecutive memory locations
14892 (addr2 == addr1 + 8). */
14893
14894 int
14895 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
14896 {
14897 rtx addr1, addr2;
14898 unsigned int reg1, reg2;
14899 int offset1, offset2;
14900
14901 /* The mems cannot be volatile. */
14902 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
14903 return 0;
14904
14905 addr1 = XEXP (mem1, 0);
14906 addr2 = XEXP (mem2, 0);
14907
14908 /* Extract an offset (if used) from the first addr. */
14909 if (GET_CODE (addr1) == PLUS)
14910 {
14911 /* If not a REG, return zero. */
14912 if (GET_CODE (XEXP (addr1, 0)) != REG)
14913 return 0;
14914 else
14915 {
14916 reg1 = REGNO (XEXP (addr1, 0));
14917 /* The offset must be constant! */
14918 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
14919 return 0;
14920 offset1 = INTVAL (XEXP (addr1, 1));
14921 }
14922 }
14923 else if (GET_CODE (addr1) != REG)
14924 return 0;
14925 else
14926 {
14927 reg1 = REGNO (addr1);
14928 /* This was a simple (mem (reg)) expression. Offset is 0. */
14929 offset1 = 0;
14930 }
14931
14932 /* And now for the second addr. */
14933 if (GET_CODE (addr2) == PLUS)
14934 {
14935 /* If not a REG, return zero. */
14936 if (GET_CODE (XEXP (addr2, 0)) != REG)
14937 return 0;
14938 else
14939 {
14940 reg2 = REGNO (XEXP (addr2, 0));
14941 /* The offset must be constant. */
14942 if (GET_CODE (XEXP (addr2, 1)) != CONST_INT)
14943 return 0;
14944 offset2 = INTVAL (XEXP (addr2, 1));
14945 }
14946 }
14947 else if (GET_CODE (addr2) != REG)
14948 return 0;
14949 else
14950 {
14951 reg2 = REGNO (addr2);
14952 /* This was a simple (mem (reg)) expression. Offset is 0. */
14953 offset2 = 0;
14954 }
14955
14956 /* Both of these must have the same base register. */
14957 if (reg1 != reg2)
14958 return 0;
14959
14960 /* The offset for the second addr must be 8 more than the first addr. */
14961 if (offset2 != offset1 + 8)
14962 return 0;
14963
14964 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
14965 instructions. */
14966 return 1;
14967 }
14968 \f
14969
14970 rtx
14971 rs6000_secondary_memory_needed_rtx (enum machine_mode mode)
14972 {
14973 static bool eliminated = false;
14974 rtx ret;
14975
14976 if (mode != SDmode || TARGET_NO_SDMODE_STACK)
14977 ret = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
14978 else
14979 {
14980 rtx mem = cfun->machine->sdmode_stack_slot;
14981 gcc_assert (mem != NULL_RTX);
14982
14983 if (!eliminated)
14984 {
14985 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
14986 cfun->machine->sdmode_stack_slot = mem;
14987 eliminated = true;
14988 }
14989 ret = mem;
14990 }
14991
14992 if (TARGET_DEBUG_ADDR)
14993 {
14994 fprintf (stderr, "\nrs6000_secondary_memory_needed_rtx, mode %s, rtx:\n",
14995 GET_MODE_NAME (mode));
14996 if (!ret)
14997 fprintf (stderr, "\tNULL_RTX\n");
14998 else
14999 debug_rtx (ret);
15000 }
15001
15002 return ret;
15003 }
15004
15005 /* Return the mode to be used for memory when a secondary memory
15006 location is needed. For SDmode values we need to use DDmode, in
15007 all other cases we can use the same mode. */
15008 enum machine_mode
15009 rs6000_secondary_memory_needed_mode (enum machine_mode mode)
15010 {
15011 if (mode == SDmode)
15012 return DDmode;
15013 return mode;
15014 }
15015
15016 static tree
15017 rs6000_check_sdmode (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED)
15018 {
15019 /* Don't walk into types. */
15020 if (*tp == NULL_TREE || *tp == error_mark_node || TYPE_P (*tp))
15021 {
15022 *walk_subtrees = 0;
15023 return NULL_TREE;
15024 }
15025
15026 switch (TREE_CODE (*tp))
15027 {
15028 case VAR_DECL:
15029 case PARM_DECL:
15030 case FIELD_DECL:
15031 case RESULT_DECL:
15032 case SSA_NAME:
15033 case REAL_CST:
15034 case MEM_REF:
15035 case VIEW_CONVERT_EXPR:
15036 if (TYPE_MODE (TREE_TYPE (*tp)) == SDmode)
15037 return *tp;
15038 break;
15039 default:
15040 break;
15041 }
15042
15043 return NULL_TREE;
15044 }
15045
15046 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
15047 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
15048 only work on the traditional altivec registers, note if an altivec register
15049 was chosen. */
15050
15051 static enum rs6000_reg_type
15052 register_to_reg_type (rtx reg, bool *is_altivec)
15053 {
15054 HOST_WIDE_INT regno;
15055 enum reg_class rclass;
15056
15057 if (GET_CODE (reg) == SUBREG)
15058 reg = SUBREG_REG (reg);
15059
15060 if (!REG_P (reg))
15061 return NO_REG_TYPE;
15062
15063 regno = REGNO (reg);
15064 if (regno >= FIRST_PSEUDO_REGISTER)
15065 {
15066 if (!lra_in_progress && !reload_in_progress && !reload_completed)
15067 return PSEUDO_REG_TYPE;
15068
15069 regno = true_regnum (reg);
15070 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)
15071 return PSEUDO_REG_TYPE;
15072 }
15073
15074 gcc_assert (regno >= 0);
15075
15076 if (is_altivec && ALTIVEC_REGNO_P (regno))
15077 *is_altivec = true;
15078
15079 rclass = rs6000_regno_regclass[regno];
15080 return reg_class_to_reg_type[(int)rclass];
15081 }
15082
15083 /* Helper function for rs6000_secondary_reload to return true if a move to a
15084 different register classe is really a simple move. */
15085
15086 static bool
15087 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
15088 enum rs6000_reg_type from_type,
15089 enum machine_mode mode)
15090 {
15091 int size;
15092
15093 /* Add support for various direct moves available. In this function, we only
15094 look at cases where we don't need any extra registers, and one or more
15095 simple move insns are issued. At present, 32-bit integers are not allowed
15096 in FPR/VSX registers. Single precision binary floating is not a simple
15097 move because we need to convert to the single precision memory layout.
15098 The 4-byte SDmode can be moved. */
15099 size = GET_MODE_SIZE (mode);
15100 if (TARGET_DIRECT_MOVE
15101 && ((mode == SDmode) || (TARGET_POWERPC64 && size == 8))
15102 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
15103 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
15104 return true;
15105
15106 else if (TARGET_MFPGPR && TARGET_POWERPC64 && size == 8
15107 && ((to_type == GPR_REG_TYPE && from_type == FPR_REG_TYPE)
15108 || (to_type == FPR_REG_TYPE && from_type == GPR_REG_TYPE)))
15109 return true;
15110
15111 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
15112 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
15113 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
15114 return true;
15115
15116 return false;
15117 }
15118
15119 /* Power8 helper function for rs6000_secondary_reload, handle all of the
15120 special direct moves that involve allocating an extra register, return the
15121 insn code of the helper function if there is such a function or
15122 CODE_FOR_nothing if not. */
15123
15124 static bool
15125 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
15126 enum rs6000_reg_type from_type,
15127 enum machine_mode mode,
15128 secondary_reload_info *sri,
15129 bool altivec_p)
15130 {
15131 bool ret = false;
15132 enum insn_code icode = CODE_FOR_nothing;
15133 int cost = 0;
15134 int size = GET_MODE_SIZE (mode);
15135
15136 if (TARGET_POWERPC64)
15137 {
15138 if (size == 16)
15139 {
15140 /* Handle moving 128-bit values from GPRs to VSX point registers on
15141 power8 when running in 64-bit mode using XXPERMDI to glue the two
15142 64-bit values back together. */
15143 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
15144 {
15145 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
15146 icode = reg_addr[mode].reload_vsx_gpr;
15147 }
15148
15149 /* Handle moving 128-bit values from VSX point registers to GPRs on
15150 power8 when running in 64-bit mode using XXPERMDI to get access to the
15151 bottom 64-bit value. */
15152 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
15153 {
15154 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
15155 icode = reg_addr[mode].reload_gpr_vsx;
15156 }
15157 }
15158
15159 else if (mode == SFmode)
15160 {
15161 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
15162 {
15163 cost = 3; /* xscvdpspn, mfvsrd, and. */
15164 icode = reg_addr[mode].reload_gpr_vsx;
15165 }
15166
15167 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
15168 {
15169 cost = 2; /* mtvsrz, xscvspdpn. */
15170 icode = reg_addr[mode].reload_vsx_gpr;
15171 }
15172 }
15173 }
15174
15175 if (TARGET_POWERPC64 && size == 16)
15176 {
15177 /* Handle moving 128-bit values from GPRs to VSX point registers on
15178 power8 when running in 64-bit mode using XXPERMDI to glue the two
15179 64-bit values back together. */
15180 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
15181 {
15182 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
15183 icode = reg_addr[mode].reload_vsx_gpr;
15184 }
15185
15186 /* Handle moving 128-bit values from VSX point registers to GPRs on
15187 power8 when running in 64-bit mode using XXPERMDI to get access to the
15188 bottom 64-bit value. */
15189 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
15190 {
15191 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
15192 icode = reg_addr[mode].reload_gpr_vsx;
15193 }
15194 }
15195
15196 else if (!TARGET_POWERPC64 && size == 8)
15197 {
15198 /* Handle moving 64-bit values from GPRs to floating point registers on
15199 power8 when running in 32-bit mode using FMRGOW to glue the two 32-bit
15200 values back together. Altivec register classes must be handled
15201 specially since a different instruction is used, and the secondary
15202 reload support requires a single instruction class in the scratch
15203 register constraint. However, right now TFmode is not allowed in
15204 Altivec registers, so the pattern will never match. */
15205 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
15206 {
15207 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
15208 icode = reg_addr[mode].reload_fpr_gpr;
15209 }
15210 }
15211
15212 if (icode != CODE_FOR_nothing)
15213 {
15214 ret = true;
15215 if (sri)
15216 {
15217 sri->icode = icode;
15218 sri->extra_cost = cost;
15219 }
15220 }
15221
15222 return ret;
15223 }
15224
15225 /* Return whether a move between two register classes can be done either
15226 directly (simple move) or via a pattern that uses a single extra temporary
15227 (using power8's direct move in this case. */
15228
15229 static bool
15230 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
15231 enum rs6000_reg_type from_type,
15232 enum machine_mode mode,
15233 secondary_reload_info *sri,
15234 bool altivec_p)
15235 {
15236 /* Fall back to load/store reloads if either type is not a register. */
15237 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
15238 return false;
15239
15240 /* If we haven't allocated registers yet, assume the move can be done for the
15241 standard register types. */
15242 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
15243 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
15244 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
15245 return true;
15246
15247 /* Moves to the same set of registers is a simple move for non-specialized
15248 registers. */
15249 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
15250 return true;
15251
15252 /* Check whether a simple move can be done directly. */
15253 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
15254 {
15255 if (sri)
15256 {
15257 sri->icode = CODE_FOR_nothing;
15258 sri->extra_cost = 0;
15259 }
15260 return true;
15261 }
15262
15263 /* Now check if we can do it in a few steps. */
15264 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
15265 altivec_p);
15266 }
15267
15268 /* Inform reload about cases where moving X with a mode MODE to a register in
15269 RCLASS requires an extra scratch or immediate register. Return the class
15270 needed for the immediate register.
15271
15272 For VSX and Altivec, we may need a register to convert sp+offset into
15273 reg+sp.
15274
15275 For misaligned 64-bit gpr loads and stores we need a register to
15276 convert an offset address to indirect. */
15277
15278 static reg_class_t
15279 rs6000_secondary_reload (bool in_p,
15280 rtx x,
15281 reg_class_t rclass_i,
15282 enum machine_mode mode,
15283 secondary_reload_info *sri)
15284 {
15285 enum reg_class rclass = (enum reg_class) rclass_i;
15286 reg_class_t ret = ALL_REGS;
15287 enum insn_code icode;
15288 bool default_p = false;
15289
15290 sri->icode = CODE_FOR_nothing;
15291 icode = ((in_p)
15292 ? reg_addr[mode].reload_load
15293 : reg_addr[mode].reload_store);
15294
15295 if (REG_P (x) || register_operand (x, mode))
15296 {
15297 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
15298 bool altivec_p = (rclass == ALTIVEC_REGS);
15299 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
15300
15301 if (!in_p)
15302 {
15303 enum rs6000_reg_type exchange = to_type;
15304 to_type = from_type;
15305 from_type = exchange;
15306 }
15307
15308 /* Can we do a direct move of some sort? */
15309 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
15310 altivec_p))
15311 {
15312 icode = (enum insn_code)sri->icode;
15313 default_p = false;
15314 ret = NO_REGS;
15315 }
15316 }
15317
15318 /* Handle vector moves with reload helper functions. */
15319 if (ret == ALL_REGS && icode != CODE_FOR_nothing)
15320 {
15321 ret = NO_REGS;
15322 sri->icode = CODE_FOR_nothing;
15323 sri->extra_cost = 0;
15324
15325 if (GET_CODE (x) == MEM)
15326 {
15327 rtx addr = XEXP (x, 0);
15328
15329 /* Loads to and stores from gprs can do reg+offset, and wouldn't need
15330 an extra register in that case, but it would need an extra
15331 register if the addressing is reg+reg or (reg+reg)&(-16). Special
15332 case load/store quad. */
15333 if (rclass == GENERAL_REGS || rclass == BASE_REGS)
15334 {
15335 if (TARGET_POWERPC64 && TARGET_QUAD_MEMORY
15336 && GET_MODE_SIZE (mode) == 16
15337 && quad_memory_operand (x, mode))
15338 {
15339 sri->icode = icode;
15340 sri->extra_cost = 2;
15341 }
15342
15343 else if (!legitimate_indirect_address_p (addr, false)
15344 && !rs6000_legitimate_offset_address_p (PTImode, addr,
15345 false, true))
15346 {
15347 sri->icode = icode;
15348 /* account for splitting the loads, and converting the
15349 address from reg+reg to reg. */
15350 sri->extra_cost = (((TARGET_64BIT) ? 3 : 5)
15351 + ((GET_CODE (addr) == AND) ? 1 : 0));
15352 }
15353 }
15354 /* Allow scalar loads to/from the traditional floating point
15355 registers, even if VSX memory is set. */
15356 else if ((rclass == FLOAT_REGS || rclass == NO_REGS)
15357 && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
15358 && (legitimate_indirect_address_p (addr, false)
15359 || legitimate_indirect_address_p (addr, false)
15360 || rs6000_legitimate_offset_address_p (mode, addr,
15361 false, true)))
15362
15363 ;
15364 /* Loads to and stores from vector registers can only do reg+reg
15365 addressing. Altivec registers can also do (reg+reg)&(-16). Allow
15366 scalar modes loading up the traditional floating point registers
15367 to use offset addresses. */
15368 else if (rclass == VSX_REGS || rclass == ALTIVEC_REGS
15369 || rclass == FLOAT_REGS || rclass == NO_REGS)
15370 {
15371 if (!VECTOR_MEM_ALTIVEC_P (mode)
15372 && GET_CODE (addr) == AND
15373 && GET_CODE (XEXP (addr, 1)) == CONST_INT
15374 && INTVAL (XEXP (addr, 1)) == -16
15375 && (legitimate_indirect_address_p (XEXP (addr, 0), false)
15376 || legitimate_indexed_address_p (XEXP (addr, 0), false)))
15377 {
15378 sri->icode = icode;
15379 sri->extra_cost = ((GET_CODE (XEXP (addr, 0)) == PLUS)
15380 ? 2 : 1);
15381 }
15382 else if (!legitimate_indirect_address_p (addr, false)
15383 && (rclass == NO_REGS
15384 || !legitimate_indexed_address_p (addr, false)))
15385 {
15386 sri->icode = icode;
15387 sri->extra_cost = 1;
15388 }
15389 else
15390 icode = CODE_FOR_nothing;
15391 }
15392 /* Any other loads, including to pseudo registers which haven't been
15393 assigned to a register yet, default to require a scratch
15394 register. */
15395 else
15396 {
15397 sri->icode = icode;
15398 sri->extra_cost = 2;
15399 }
15400 }
15401 else if (REG_P (x))
15402 {
15403 int regno = true_regnum (x);
15404
15405 icode = CODE_FOR_nothing;
15406 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)
15407 default_p = true;
15408 else
15409 {
15410 enum reg_class xclass = REGNO_REG_CLASS (regno);
15411 enum rs6000_reg_type rtype1 = reg_class_to_reg_type[(int)rclass];
15412 enum rs6000_reg_type rtype2 = reg_class_to_reg_type[(int)xclass];
15413
15414 /* If memory is needed, use default_secondary_reload to create the
15415 stack slot. */
15416 if (rtype1 != rtype2 || !IS_STD_REG_TYPE (rtype1))
15417 default_p = true;
15418 else
15419 ret = NO_REGS;
15420 }
15421 }
15422 else
15423 default_p = true;
15424 }
15425 else if (TARGET_POWERPC64
15426 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
15427 && MEM_P (x)
15428 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
15429 {
15430 rtx addr = XEXP (x, 0);
15431 rtx off = address_offset (addr);
15432
15433 if (off != NULL_RTX)
15434 {
15435 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
15436 unsigned HOST_WIDE_INT offset = INTVAL (off);
15437
15438 /* We need a secondary reload when our legitimate_address_p
15439 says the address is good (as otherwise the entire address
15440 will be reloaded), and the offset is not a multiple of
15441 four or we have an address wrap. Address wrap will only
15442 occur for LO_SUMs since legitimate_offset_address_p
15443 rejects addresses for 16-byte mems that will wrap. */
15444 if (GET_CODE (addr) == LO_SUM
15445 ? (1 /* legitimate_address_p allows any offset for lo_sum */
15446 && ((offset & 3) != 0
15447 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
15448 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
15449 && (offset & 3) != 0))
15450 {
15451 if (in_p)
15452 sri->icode = CODE_FOR_reload_di_load;
15453 else
15454 sri->icode = CODE_FOR_reload_di_store;
15455 sri->extra_cost = 2;
15456 ret = NO_REGS;
15457 }
15458 else
15459 default_p = true;
15460 }
15461 else
15462 default_p = true;
15463 }
15464 else if (!TARGET_POWERPC64
15465 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
15466 && MEM_P (x)
15467 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
15468 {
15469 rtx addr = XEXP (x, 0);
15470 rtx off = address_offset (addr);
15471
15472 if (off != NULL_RTX)
15473 {
15474 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
15475 unsigned HOST_WIDE_INT offset = INTVAL (off);
15476
15477 /* We need a secondary reload when our legitimate_address_p
15478 says the address is good (as otherwise the entire address
15479 will be reloaded), and we have a wrap.
15480
15481 legitimate_lo_sum_address_p allows LO_SUM addresses to
15482 have any offset so test for wrap in the low 16 bits.
15483
15484 legitimate_offset_address_p checks for the range
15485 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
15486 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
15487 [0x7ff4,0x7fff] respectively, so test for the
15488 intersection of these ranges, [0x7ffc,0x7fff] and
15489 [0x7ff4,0x7ff7] respectively.
15490
15491 Note that the address we see here may have been
15492 manipulated by legitimize_reload_address. */
15493 if (GET_CODE (addr) == LO_SUM
15494 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
15495 : offset - (0x8000 - extra) < UNITS_PER_WORD)
15496 {
15497 if (in_p)
15498 sri->icode = CODE_FOR_reload_si_load;
15499 else
15500 sri->icode = CODE_FOR_reload_si_store;
15501 sri->extra_cost = 2;
15502 ret = NO_REGS;
15503 }
15504 else
15505 default_p = true;
15506 }
15507 else
15508 default_p = true;
15509 }
15510 else
15511 default_p = true;
15512
15513 if (default_p)
15514 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
15515
15516 gcc_assert (ret != ALL_REGS);
15517
15518 if (TARGET_DEBUG_ADDR)
15519 {
15520 fprintf (stderr,
15521 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
15522 "mode = %s",
15523 reg_class_names[ret],
15524 in_p ? "true" : "false",
15525 reg_class_names[rclass],
15526 GET_MODE_NAME (mode));
15527
15528 if (default_p)
15529 fprintf (stderr, ", default secondary reload");
15530
15531 if (sri->icode != CODE_FOR_nothing)
15532 fprintf (stderr, ", reload func = %s, extra cost = %d\n",
15533 insn_data[sri->icode].name, sri->extra_cost);
15534 else
15535 fprintf (stderr, "\n");
15536
15537 debug_rtx (x);
15538 }
15539
15540 return ret;
15541 }
15542
15543 /* Better tracing for rs6000_secondary_reload_inner. */
15544
15545 static void
15546 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
15547 bool store_p)
15548 {
15549 rtx set, clobber;
15550
15551 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
15552
15553 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
15554 store_p ? "store" : "load");
15555
15556 if (store_p)
15557 set = gen_rtx_SET (VOIDmode, mem, reg);
15558 else
15559 set = gen_rtx_SET (VOIDmode, reg, mem);
15560
15561 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
15562 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
15563 }
15564
15565 static void
15566 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
15567 bool store_p)
15568 {
15569 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
15570 gcc_unreachable ();
15571 }
15572
15573 /* Fixup reload addresses for Altivec or VSX loads/stores to change SP+offset
15574 to SP+reg addressing. */
15575
15576 void
15577 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
15578 {
15579 int regno = true_regnum (reg);
15580 enum machine_mode mode = GET_MODE (reg);
15581 enum reg_class rclass;
15582 rtx addr;
15583 rtx and_op2 = NULL_RTX;
15584 rtx addr_op1;
15585 rtx addr_op2;
15586 rtx scratch_or_premodify = scratch;
15587 rtx and_rtx;
15588 rtx cc_clobber;
15589
15590 if (TARGET_DEBUG_ADDR)
15591 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
15592
15593 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)
15594 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
15595
15596 if (GET_CODE (mem) != MEM)
15597 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
15598
15599 rclass = REGNO_REG_CLASS (regno);
15600 addr = XEXP (mem, 0);
15601
15602 switch (rclass)
15603 {
15604 /* GPRs can handle reg + small constant, all other addresses need to use
15605 the scratch register. */
15606 case GENERAL_REGS:
15607 case BASE_REGS:
15608 if (GET_CODE (addr) == AND)
15609 {
15610 and_op2 = XEXP (addr, 1);
15611 addr = XEXP (addr, 0);
15612 }
15613
15614 if (GET_CODE (addr) == PRE_MODIFY)
15615 {
15616 scratch_or_premodify = XEXP (addr, 0);
15617 if (!REG_P (scratch_or_premodify))
15618 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
15619
15620 if (GET_CODE (XEXP (addr, 1)) != PLUS)
15621 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
15622
15623 addr = XEXP (addr, 1);
15624 }
15625
15626 if (GET_CODE (addr) == PLUS
15627 && (and_op2 != NULL_RTX
15628 || !rs6000_legitimate_offset_address_p (PTImode, addr,
15629 false, true)))
15630 {
15631 addr_op1 = XEXP (addr, 0);
15632 addr_op2 = XEXP (addr, 1);
15633 if (!legitimate_indirect_address_p (addr_op1, false))
15634 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
15635
15636 if (!REG_P (addr_op2)
15637 && (GET_CODE (addr_op2) != CONST_INT
15638 || !satisfies_constraint_I (addr_op2)))
15639 {
15640 if (TARGET_DEBUG_ADDR)
15641 {
15642 fprintf (stderr,
15643 "\nMove plus addr to register %s, mode = %s: ",
15644 rs6000_reg_names[REGNO (scratch)],
15645 GET_MODE_NAME (mode));
15646 debug_rtx (addr_op2);
15647 }
15648 rs6000_emit_move (scratch, addr_op2, Pmode);
15649 addr_op2 = scratch;
15650 }
15651
15652 emit_insn (gen_rtx_SET (VOIDmode,
15653 scratch_or_premodify,
15654 gen_rtx_PLUS (Pmode,
15655 addr_op1,
15656 addr_op2)));
15657
15658 addr = scratch_or_premodify;
15659 scratch_or_premodify = scratch;
15660 }
15661 else if (!legitimate_indirect_address_p (addr, false)
15662 && !rs6000_legitimate_offset_address_p (PTImode, addr,
15663 false, true))
15664 {
15665 if (TARGET_DEBUG_ADDR)
15666 {
15667 fprintf (stderr, "\nMove addr to register %s, mode = %s: ",
15668 rs6000_reg_names[REGNO (scratch_or_premodify)],
15669 GET_MODE_NAME (mode));
15670 debug_rtx (addr);
15671 }
15672 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
15673 addr = scratch_or_premodify;
15674 scratch_or_premodify = scratch;
15675 }
15676 break;
15677
15678 /* Float registers can do offset+reg addressing for scalar types. */
15679 case FLOAT_REGS:
15680 if (legitimate_indirect_address_p (addr, false) /* reg */
15681 || legitimate_indexed_address_p (addr, false) /* reg+reg */
15682 || ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
15683 && and_op2 == NULL_RTX
15684 && scratch_or_premodify == scratch
15685 && rs6000_legitimate_offset_address_p (mode, addr, false, false)))
15686 break;
15687
15688 /* If this isn't a legacy floating point load/store, fall through to the
15689 VSX defaults. */
15690
15691 /* VSX/Altivec registers can only handle reg+reg addressing. Move other
15692 addresses into a scratch register. */
15693 case VSX_REGS:
15694 case ALTIVEC_REGS:
15695
15696 /* With float regs, we need to handle the AND ourselves, since we can't
15697 use the Altivec instruction with an implicit AND -16. Allow scalar
15698 loads to float registers to use reg+offset even if VSX. */
15699 if (GET_CODE (addr) == AND
15700 && (rclass != ALTIVEC_REGS || GET_MODE_SIZE (mode) != 16
15701 || GET_CODE (XEXP (addr, 1)) != CONST_INT
15702 || INTVAL (XEXP (addr, 1)) != -16
15703 || !VECTOR_MEM_ALTIVEC_P (mode)))
15704 {
15705 and_op2 = XEXP (addr, 1);
15706 addr = XEXP (addr, 0);
15707 }
15708
15709 /* If we aren't using a VSX load, save the PRE_MODIFY register and use it
15710 as the address later. */
15711 if (GET_CODE (addr) == PRE_MODIFY
15712 && ((ALTIVEC_OR_VSX_VECTOR_MODE (mode)
15713 && (rclass != FLOAT_REGS
15714 || (GET_MODE_SIZE (mode) != 4 && GET_MODE_SIZE (mode) != 8)))
15715 || and_op2 != NULL_RTX
15716 || !legitimate_indexed_address_p (XEXP (addr, 1), false)))
15717 {
15718 scratch_or_premodify = XEXP (addr, 0);
15719 if (!legitimate_indirect_address_p (scratch_or_premodify, false))
15720 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
15721
15722 if (GET_CODE (XEXP (addr, 1)) != PLUS)
15723 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
15724
15725 addr = XEXP (addr, 1);
15726 }
15727
15728 if (legitimate_indirect_address_p (addr, false) /* reg */
15729 || legitimate_indexed_address_p (addr, false) /* reg+reg */
15730 || (GET_CODE (addr) == AND /* Altivec memory */
15731 && rclass == ALTIVEC_REGS
15732 && GET_CODE (XEXP (addr, 1)) == CONST_INT
15733 && INTVAL (XEXP (addr, 1)) == -16
15734 && (legitimate_indirect_address_p (XEXP (addr, 0), false)
15735 || legitimate_indexed_address_p (XEXP (addr, 0), false))))
15736 ;
15737
15738 else if (GET_CODE (addr) == PLUS)
15739 {
15740 addr_op1 = XEXP (addr, 0);
15741 addr_op2 = XEXP (addr, 1);
15742 if (!REG_P (addr_op1))
15743 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
15744
15745 if (TARGET_DEBUG_ADDR)
15746 {
15747 fprintf (stderr, "\nMove plus addr to register %s, mode = %s: ",
15748 rs6000_reg_names[REGNO (scratch)], GET_MODE_NAME (mode));
15749 debug_rtx (addr_op2);
15750 }
15751 rs6000_emit_move (scratch, addr_op2, Pmode);
15752 emit_insn (gen_rtx_SET (VOIDmode,
15753 scratch_or_premodify,
15754 gen_rtx_PLUS (Pmode,
15755 addr_op1,
15756 scratch)));
15757 addr = scratch_or_premodify;
15758 scratch_or_premodify = scratch;
15759 }
15760
15761 else if (GET_CODE (addr) == SYMBOL_REF || GET_CODE (addr) == CONST
15762 || GET_CODE (addr) == CONST_INT || GET_CODE (addr) == LO_SUM
15763 || REG_P (addr))
15764 {
15765 if (TARGET_DEBUG_ADDR)
15766 {
15767 fprintf (stderr, "\nMove addr to register %s, mode = %s: ",
15768 rs6000_reg_names[REGNO (scratch_or_premodify)],
15769 GET_MODE_NAME (mode));
15770 debug_rtx (addr);
15771 }
15772
15773 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
15774 addr = scratch_or_premodify;
15775 scratch_or_premodify = scratch;
15776 }
15777
15778 else
15779 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
15780
15781 break;
15782
15783 default:
15784 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
15785 }
15786
15787 /* If the original address involved a pre-modify that we couldn't use the VSX
15788 memory instruction with update, and we haven't taken care of already,
15789 store the address in the pre-modify register and use that as the
15790 address. */
15791 if (scratch_or_premodify != scratch && scratch_or_premodify != addr)
15792 {
15793 emit_insn (gen_rtx_SET (VOIDmode, scratch_or_premodify, addr));
15794 addr = scratch_or_premodify;
15795 }
15796
15797 /* If the original address involved an AND -16 and we couldn't use an ALTIVEC
15798 memory instruction, recreate the AND now, including the clobber which is
15799 generated by the general ANDSI3/ANDDI3 patterns for the
15800 andi. instruction. */
15801 if (and_op2 != NULL_RTX)
15802 {
15803 if (! legitimate_indirect_address_p (addr, false))
15804 {
15805 emit_insn (gen_rtx_SET (VOIDmode, scratch, addr));
15806 addr = scratch;
15807 }
15808
15809 if (TARGET_DEBUG_ADDR)
15810 {
15811 fprintf (stderr, "\nAnd addr to register %s, mode = %s: ",
15812 rs6000_reg_names[REGNO (scratch)], GET_MODE_NAME (mode));
15813 debug_rtx (and_op2);
15814 }
15815
15816 and_rtx = gen_rtx_SET (VOIDmode,
15817 scratch,
15818 gen_rtx_AND (Pmode,
15819 addr,
15820 and_op2));
15821
15822 cc_clobber = gen_rtx_CLOBBER (CCmode, gen_rtx_SCRATCH (CCmode));
15823 emit_insn (gen_rtx_PARALLEL (VOIDmode,
15824 gen_rtvec (2, and_rtx, cc_clobber)));
15825 addr = scratch;
15826 }
15827
15828 /* Adjust the address if it changed. */
15829 if (addr != XEXP (mem, 0))
15830 {
15831 mem = replace_equiv_address_nv (mem, addr);
15832 if (TARGET_DEBUG_ADDR)
15833 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
15834 }
15835
15836 /* Now create the move. */
15837 if (store_p)
15838 emit_insn (gen_rtx_SET (VOIDmode, mem, reg));
15839 else
15840 emit_insn (gen_rtx_SET (VOIDmode, reg, mem));
15841
15842 return;
15843 }
15844
15845 /* Convert reloads involving 64-bit gprs and misaligned offset
15846 addressing, or multiple 32-bit gprs and offsets that are too large,
15847 to use indirect addressing. */
15848
15849 void
15850 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
15851 {
15852 int regno = true_regnum (reg);
15853 enum reg_class rclass;
15854 rtx addr;
15855 rtx scratch_or_premodify = scratch;
15856
15857 if (TARGET_DEBUG_ADDR)
15858 {
15859 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
15860 store_p ? "store" : "load");
15861 fprintf (stderr, "reg:\n");
15862 debug_rtx (reg);
15863 fprintf (stderr, "mem:\n");
15864 debug_rtx (mem);
15865 fprintf (stderr, "scratch:\n");
15866 debug_rtx (scratch);
15867 }
15868
15869 gcc_assert (regno >= 0 && regno < FIRST_PSEUDO_REGISTER);
15870 gcc_assert (GET_CODE (mem) == MEM);
15871 rclass = REGNO_REG_CLASS (regno);
15872 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
15873 addr = XEXP (mem, 0);
15874
15875 if (GET_CODE (addr) == PRE_MODIFY)
15876 {
15877 scratch_or_premodify = XEXP (addr, 0);
15878 gcc_assert (REG_P (scratch_or_premodify));
15879 addr = XEXP (addr, 1);
15880 }
15881 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
15882
15883 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
15884
15885 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
15886
15887 /* Now create the move. */
15888 if (store_p)
15889 emit_insn (gen_rtx_SET (VOIDmode, mem, reg));
15890 else
15891 emit_insn (gen_rtx_SET (VOIDmode, reg, mem));
15892
15893 return;
15894 }
15895
15896 /* Allocate a 64-bit stack slot to be used for copying SDmode values through if
15897 this function has any SDmode references. If we are on a power7 or later, we
15898 don't need the 64-bit stack slot since the LFIWZX and STIFWX instructions
15899 can load/store the value. */
15900
15901 static void
15902 rs6000_alloc_sdmode_stack_slot (void)
15903 {
15904 tree t;
15905 basic_block bb;
15906 gimple_stmt_iterator gsi;
15907
15908 gcc_assert (cfun->machine->sdmode_stack_slot == NULL_RTX);
15909 /* We use a different approach for dealing with the secondary
15910 memory in LRA. */
15911 if (ira_use_lra_p)
15912 return;
15913
15914 if (TARGET_NO_SDMODE_STACK)
15915 return;
15916
15917 FOR_EACH_BB (bb)
15918 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
15919 {
15920 tree ret = walk_gimple_op (gsi_stmt (gsi), rs6000_check_sdmode, NULL);
15921 if (ret)
15922 {
15923 rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
15924 cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
15925 SDmode, 0);
15926 return;
15927 }
15928 }
15929
15930 /* Check for any SDmode parameters of the function. */
15931 for (t = DECL_ARGUMENTS (cfun->decl); t; t = DECL_CHAIN (t))
15932 {
15933 if (TREE_TYPE (t) == error_mark_node)
15934 continue;
15935
15936 if (TYPE_MODE (TREE_TYPE (t)) == SDmode
15937 || TYPE_MODE (DECL_ARG_TYPE (t)) == SDmode)
15938 {
15939 rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
15940 cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
15941 SDmode, 0);
15942 return;
15943 }
15944 }
15945 }
15946
15947 static void
15948 rs6000_instantiate_decls (void)
15949 {
15950 if (cfun->machine->sdmode_stack_slot != NULL_RTX)
15951 instantiate_decl_rtl (cfun->machine->sdmode_stack_slot);
15952 }
15953
15954 /* Given an rtx X being reloaded into a reg required to be
15955 in class CLASS, return the class of reg to actually use.
15956 In general this is just CLASS; but on some machines
15957 in some cases it is preferable to use a more restrictive class.
15958
15959 On the RS/6000, we have to return NO_REGS when we want to reload a
15960 floating-point CONST_DOUBLE to force it to be copied to memory.
15961
15962 We also don't want to reload integer values into floating-point
15963 registers if we can at all help it. In fact, this can
15964 cause reload to die, if it tries to generate a reload of CTR
15965 into a FP register and discovers it doesn't have the memory location
15966 required.
15967
15968 ??? Would it be a good idea to have reload do the converse, that is
15969 try to reload floating modes into FP registers if possible?
15970 */
15971
15972 static enum reg_class
15973 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
15974 {
15975 enum machine_mode mode = GET_MODE (x);
15976
15977 if (TARGET_VSX && x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
15978 return rclass;
15979
15980 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
15981 && (rclass == ALTIVEC_REGS || rclass == VSX_REGS)
15982 && easy_vector_constant (x, mode))
15983 return ALTIVEC_REGS;
15984
15985 if (CONSTANT_P (x) && reg_classes_intersect_p (rclass, FLOAT_REGS))
15986 return NO_REGS;
15987
15988 if (GET_MODE_CLASS (mode) == MODE_INT && rclass == NON_SPECIAL_REGS)
15989 return GENERAL_REGS;
15990
15991 /* For VSX, prefer the traditional registers for 64-bit values because we can
15992 use the non-VSX loads. Prefer the Altivec registers if Altivec is
15993 handling the vector operations (i.e. V16QI, V8HI, and V4SI), or if we
15994 prefer Altivec loads.. */
15995 if (rclass == VSX_REGS)
15996 {
15997 if (GET_MODE_SIZE (mode) <= 8)
15998 return FLOAT_REGS;
15999
16000 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode))
16001 return ALTIVEC_REGS;
16002
16003 return rclass;
16004 }
16005
16006 return rclass;
16007 }
16008
16009 /* Debug version of rs6000_preferred_reload_class. */
16010 static enum reg_class
16011 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
16012 {
16013 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
16014
16015 fprintf (stderr,
16016 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
16017 "mode = %s, x:\n",
16018 reg_class_names[ret], reg_class_names[rclass],
16019 GET_MODE_NAME (GET_MODE (x)));
16020 debug_rtx (x);
16021
16022 return ret;
16023 }
16024
16025 /* If we are copying between FP or AltiVec registers and anything else, we need
16026 a memory location. The exception is when we are targeting ppc64 and the
16027 move to/from fpr to gpr instructions are available. Also, under VSX, you
16028 can copy vector registers from the FP register set to the Altivec register
16029 set and vice versa. */
16030
16031 static bool
16032 rs6000_secondary_memory_needed (enum reg_class from_class,
16033 enum reg_class to_class,
16034 enum machine_mode mode)
16035 {
16036 enum rs6000_reg_type from_type, to_type;
16037 bool altivec_p = ((from_class == ALTIVEC_REGS)
16038 || (to_class == ALTIVEC_REGS));
16039
16040 /* If a simple/direct move is available, we don't need secondary memory */
16041 from_type = reg_class_to_reg_type[(int)from_class];
16042 to_type = reg_class_to_reg_type[(int)to_class];
16043
16044 if (rs6000_secondary_reload_move (to_type, from_type, mode,
16045 (secondary_reload_info *)0, altivec_p))
16046 return false;
16047
16048 /* If we have a floating point or vector register class, we need to use
16049 memory to transfer the data. */
16050 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
16051 return true;
16052
16053 return false;
16054 }
16055
16056 /* Debug version of rs6000_secondary_memory_needed. */
16057 static bool
16058 rs6000_debug_secondary_memory_needed (enum reg_class from_class,
16059 enum reg_class to_class,
16060 enum machine_mode mode)
16061 {
16062 bool ret = rs6000_secondary_memory_needed (from_class, to_class, mode);
16063
16064 fprintf (stderr,
16065 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
16066 "to_class = %s, mode = %s\n",
16067 ret ? "true" : "false",
16068 reg_class_names[from_class],
16069 reg_class_names[to_class],
16070 GET_MODE_NAME (mode));
16071
16072 return ret;
16073 }
16074
16075 /* Return the register class of a scratch register needed to copy IN into
16076 or out of a register in RCLASS in MODE. If it can be done directly,
16077 NO_REGS is returned. */
16078
16079 static enum reg_class
16080 rs6000_secondary_reload_class (enum reg_class rclass, enum machine_mode mode,
16081 rtx in)
16082 {
16083 int regno;
16084
16085 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
16086 #if TARGET_MACHO
16087 && MACHOPIC_INDIRECT
16088 #endif
16089 ))
16090 {
16091 /* We cannot copy a symbolic operand directly into anything
16092 other than BASE_REGS for TARGET_ELF. So indicate that a
16093 register from BASE_REGS is needed as an intermediate
16094 register.
16095
16096 On Darwin, pic addresses require a load from memory, which
16097 needs a base register. */
16098 if (rclass != BASE_REGS
16099 && (GET_CODE (in) == SYMBOL_REF
16100 || GET_CODE (in) == HIGH
16101 || GET_CODE (in) == LABEL_REF
16102 || GET_CODE (in) == CONST))
16103 return BASE_REGS;
16104 }
16105
16106 if (GET_CODE (in) == REG)
16107 {
16108 regno = REGNO (in);
16109 if (regno >= FIRST_PSEUDO_REGISTER)
16110 {
16111 regno = true_regnum (in);
16112 if (regno >= FIRST_PSEUDO_REGISTER)
16113 regno = -1;
16114 }
16115 }
16116 else if (GET_CODE (in) == SUBREG)
16117 {
16118 regno = true_regnum (in);
16119 if (regno >= FIRST_PSEUDO_REGISTER)
16120 regno = -1;
16121 }
16122 else
16123 regno = -1;
16124
16125 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
16126 into anything. */
16127 if (rclass == GENERAL_REGS || rclass == BASE_REGS
16128 || (regno >= 0 && INT_REGNO_P (regno)))
16129 return NO_REGS;
16130
16131 /* Constants, memory, and FP registers can go into FP registers. */
16132 if ((regno == -1 || FP_REGNO_P (regno))
16133 && (rclass == FLOAT_REGS || rclass == NON_SPECIAL_REGS))
16134 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
16135
16136 /* Memory, and FP/altivec registers can go into fp/altivec registers under
16137 VSX. However, for scalar variables, use the traditional floating point
16138 registers so that we can use offset+register addressing. */
16139 if (TARGET_VSX
16140 && (regno == -1 || VSX_REGNO_P (regno))
16141 && VSX_REG_CLASS_P (rclass))
16142 {
16143 if (GET_MODE_SIZE (mode) < 16)
16144 return FLOAT_REGS;
16145
16146 return NO_REGS;
16147 }
16148
16149 /* Memory, and AltiVec registers can go into AltiVec registers. */
16150 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
16151 && rclass == ALTIVEC_REGS)
16152 return NO_REGS;
16153
16154 /* We can copy among the CR registers. */
16155 if ((rclass == CR_REGS || rclass == CR0_REGS)
16156 && regno >= 0 && CR_REGNO_P (regno))
16157 return NO_REGS;
16158
16159 /* Otherwise, we need GENERAL_REGS. */
16160 return GENERAL_REGS;
16161 }
16162
16163 /* Debug version of rs6000_secondary_reload_class. */
16164 static enum reg_class
16165 rs6000_debug_secondary_reload_class (enum reg_class rclass,
16166 enum machine_mode mode, rtx in)
16167 {
16168 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
16169 fprintf (stderr,
16170 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
16171 "mode = %s, input rtx:\n",
16172 reg_class_names[ret], reg_class_names[rclass],
16173 GET_MODE_NAME (mode));
16174 debug_rtx (in);
16175
16176 return ret;
16177 }
16178
16179 /* Return nonzero if for CLASS a mode change from FROM to TO is invalid. */
16180
16181 static bool
16182 rs6000_cannot_change_mode_class (enum machine_mode from,
16183 enum machine_mode to,
16184 enum reg_class rclass)
16185 {
16186 unsigned from_size = GET_MODE_SIZE (from);
16187 unsigned to_size = GET_MODE_SIZE (to);
16188
16189 if (from_size != to_size)
16190 {
16191 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
16192
16193 if (reg_classes_intersect_p (xclass, rclass))
16194 {
16195 unsigned to_nregs = hard_regno_nregs[FIRST_FPR_REGNO][to];
16196 unsigned from_nregs = hard_regno_nregs[FIRST_FPR_REGNO][from];
16197
16198 /* Don't allow 64-bit types to overlap with 128-bit types that take a
16199 single register under VSX because the scalar part of the register
16200 is in the upper 64-bits, and not the lower 64-bits. Types like
16201 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
16202 IEEE floating point can't overlap, and neither can small
16203 values. */
16204
16205 if (TARGET_IEEEQUAD && (to == TFmode || from == TFmode))
16206 return true;
16207
16208 if (from_size < 8 || to_size < 8)
16209 return true;
16210
16211 if (from_size == 8 && (8 * to_nregs) != to_size)
16212 return true;
16213
16214 if (to_size == 8 && (8 * from_nregs) != from_size)
16215 return true;
16216
16217 return false;
16218 }
16219 else
16220 return false;
16221 }
16222
16223 if (TARGET_E500_DOUBLE
16224 && ((((to) == DFmode) + ((from) == DFmode)) == 1
16225 || (((to) == TFmode) + ((from) == TFmode)) == 1
16226 || (((to) == DDmode) + ((from) == DDmode)) == 1
16227 || (((to) == TDmode) + ((from) == TDmode)) == 1
16228 || (((to) == DImode) + ((from) == DImode)) == 1))
16229 return true;
16230
16231 /* Since the VSX register set includes traditional floating point registers
16232 and altivec registers, just check for the size being different instead of
16233 trying to check whether the modes are vector modes. Otherwise it won't
16234 allow say DF and DI to change classes. For types like TFmode and TDmode
16235 that take 2 64-bit registers, rather than a single 128-bit register, don't
16236 allow subregs of those types to other 128 bit types. */
16237 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
16238 {
16239 unsigned num_regs = (from_size + 15) / 16;
16240 if (hard_regno_nregs[FIRST_FPR_REGNO][to] > num_regs
16241 || hard_regno_nregs[FIRST_FPR_REGNO][from] > num_regs)
16242 return true;
16243
16244 return (from_size != 8 && from_size != 16);
16245 }
16246
16247 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
16248 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
16249 return true;
16250
16251 if (TARGET_SPE && (SPE_VECTOR_MODE (from) + SPE_VECTOR_MODE (to)) == 1
16252 && reg_classes_intersect_p (GENERAL_REGS, rclass))
16253 return true;
16254
16255 return false;
16256 }
16257
16258 /* Debug version of rs6000_cannot_change_mode_class. */
16259 static bool
16260 rs6000_debug_cannot_change_mode_class (enum machine_mode from,
16261 enum machine_mode to,
16262 enum reg_class rclass)
16263 {
16264 bool ret = rs6000_cannot_change_mode_class (from, to, rclass);
16265
16266 fprintf (stderr,
16267 "rs6000_cannot_change_mode_class, return %s, from = %s, "
16268 "to = %s, rclass = %s\n",
16269 ret ? "true" : "false",
16270 GET_MODE_NAME (from), GET_MODE_NAME (to),
16271 reg_class_names[rclass]);
16272
16273 return ret;
16274 }
16275 \f
16276 /* Return a string to do a move operation of 128 bits of data. */
16277
16278 const char *
16279 rs6000_output_move_128bit (rtx operands[])
16280 {
16281 rtx dest = operands[0];
16282 rtx src = operands[1];
16283 enum machine_mode mode = GET_MODE (dest);
16284 int dest_regno;
16285 int src_regno;
16286 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
16287 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
16288
16289 if (REG_P (dest))
16290 {
16291 dest_regno = REGNO (dest);
16292 dest_gpr_p = INT_REGNO_P (dest_regno);
16293 dest_fp_p = FP_REGNO_P (dest_regno);
16294 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
16295 dest_vsx_p = dest_fp_p | dest_vmx_p;
16296 }
16297 else
16298 {
16299 dest_regno = -1;
16300 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
16301 }
16302
16303 if (REG_P (src))
16304 {
16305 src_regno = REGNO (src);
16306 src_gpr_p = INT_REGNO_P (src_regno);
16307 src_fp_p = FP_REGNO_P (src_regno);
16308 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
16309 src_vsx_p = src_fp_p | src_vmx_p;
16310 }
16311 else
16312 {
16313 src_regno = -1;
16314 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
16315 }
16316
16317 /* Register moves. */
16318 if (dest_regno >= 0 && src_regno >= 0)
16319 {
16320 if (dest_gpr_p)
16321 {
16322 if (src_gpr_p)
16323 return "#";
16324
16325 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
16326 return "#";
16327 }
16328
16329 else if (TARGET_VSX && dest_vsx_p)
16330 {
16331 if (src_vsx_p)
16332 return "xxlor %x0,%x1,%x1";
16333
16334 else if (TARGET_DIRECT_MOVE && src_gpr_p)
16335 return "#";
16336 }
16337
16338 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
16339 return "vor %0,%1,%1";
16340
16341 else if (dest_fp_p && src_fp_p)
16342 return "#";
16343 }
16344
16345 /* Loads. */
16346 else if (dest_regno >= 0 && MEM_P (src))
16347 {
16348 if (dest_gpr_p)
16349 {
16350 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
16351 return "lq %0,%1";
16352 else
16353 return "#";
16354 }
16355
16356 else if (TARGET_ALTIVEC && dest_vmx_p
16357 && altivec_indexed_or_indirect_operand (src, mode))
16358 return "lvx %0,%y1";
16359
16360 else if (TARGET_VSX && dest_vsx_p)
16361 {
16362 if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
16363 return "lxvw4x %x0,%y1";
16364 else
16365 return "lxvd2x %x0,%y1";
16366 }
16367
16368 else if (TARGET_ALTIVEC && dest_vmx_p)
16369 return "lvx %0,%y1";
16370
16371 else if (dest_fp_p)
16372 return "#";
16373 }
16374
16375 /* Stores. */
16376 else if (src_regno >= 0 && MEM_P (dest))
16377 {
16378 if (src_gpr_p)
16379 {
16380 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
16381 return "stq %1,%0";
16382 else
16383 return "#";
16384 }
16385
16386 else if (TARGET_ALTIVEC && src_vmx_p
16387 && altivec_indexed_or_indirect_operand (src, mode))
16388 return "stvx %1,%y0";
16389
16390 else if (TARGET_VSX && src_vsx_p)
16391 {
16392 if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
16393 return "stxvw4x %x1,%y0";
16394 else
16395 return "stxvd2x %x1,%y0";
16396 }
16397
16398 else if (TARGET_ALTIVEC && src_vmx_p)
16399 return "stvx %1,%y0";
16400
16401 else if (src_fp_p)
16402 return "#";
16403 }
16404
16405 /* Constants. */
16406 else if (dest_regno >= 0
16407 && (GET_CODE (src) == CONST_INT
16408 || GET_CODE (src) == CONST_DOUBLE
16409 || GET_CODE (src) == CONST_VECTOR))
16410 {
16411 if (dest_gpr_p)
16412 return "#";
16413
16414 else if (TARGET_VSX && dest_vsx_p && zero_constant (src, mode))
16415 return "xxlxor %x0,%x0,%x0";
16416
16417 else if (TARGET_ALTIVEC && dest_vmx_p)
16418 return output_vec_const_move (operands);
16419 }
16420
16421 if (TARGET_DEBUG_ADDR)
16422 {
16423 fprintf (stderr, "\n===== Bad 128 bit move:\n");
16424 debug_rtx (gen_rtx_SET (VOIDmode, dest, src));
16425 }
16426
16427 gcc_unreachable ();
16428 }
16429
16430 \f
16431 /* Given a comparison operation, return the bit number in CCR to test. We
16432 know this is a valid comparison.
16433
16434 SCC_P is 1 if this is for an scc. That means that %D will have been
16435 used instead of %C, so the bits will be in different places.
16436
16437 Return -1 if OP isn't a valid comparison for some reason. */
16438
16439 int
16440 ccr_bit (rtx op, int scc_p)
16441 {
16442 enum rtx_code code = GET_CODE (op);
16443 enum machine_mode cc_mode;
16444 int cc_regnum;
16445 int base_bit;
16446 rtx reg;
16447
16448 if (!COMPARISON_P (op))
16449 return -1;
16450
16451 reg = XEXP (op, 0);
16452
16453 gcc_assert (GET_CODE (reg) == REG && CR_REGNO_P (REGNO (reg)));
16454
16455 cc_mode = GET_MODE (reg);
16456 cc_regnum = REGNO (reg);
16457 base_bit = 4 * (cc_regnum - CR0_REGNO);
16458
16459 validate_condition_mode (code, cc_mode);
16460
16461 /* When generating a sCOND operation, only positive conditions are
16462 allowed. */
16463 gcc_assert (!scc_p
16464 || code == EQ || code == GT || code == LT || code == UNORDERED
16465 || code == GTU || code == LTU);
16466
16467 switch (code)
16468 {
16469 case NE:
16470 return scc_p ? base_bit + 3 : base_bit + 2;
16471 case EQ:
16472 return base_bit + 2;
16473 case GT: case GTU: case UNLE:
16474 return base_bit + 1;
16475 case LT: case LTU: case UNGE:
16476 return base_bit;
16477 case ORDERED: case UNORDERED:
16478 return base_bit + 3;
16479
16480 case GE: case GEU:
16481 /* If scc, we will have done a cror to put the bit in the
16482 unordered position. So test that bit. For integer, this is ! LT
16483 unless this is an scc insn. */
16484 return scc_p ? base_bit + 3 : base_bit;
16485
16486 case LE: case LEU:
16487 return scc_p ? base_bit + 3 : base_bit + 1;
16488
16489 default:
16490 gcc_unreachable ();
16491 }
16492 }
16493 \f
16494 /* Return the GOT register. */
16495
16496 rtx
16497 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
16498 {
16499 /* The second flow pass currently (June 1999) can't update
16500 regs_ever_live without disturbing other parts of the compiler, so
16501 update it here to make the prolog/epilogue code happy. */
16502 if (!can_create_pseudo_p ()
16503 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
16504 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
16505
16506 crtl->uses_pic_offset_table = 1;
16507
16508 return pic_offset_table_rtx;
16509 }
16510 \f
16511 static rs6000_stack_t stack_info;
16512
16513 /* Function to init struct machine_function.
16514 This will be called, via a pointer variable,
16515 from push_function_context. */
16516
16517 static struct machine_function *
16518 rs6000_init_machine_status (void)
16519 {
16520 stack_info.reload_completed = 0;
16521 return ggc_alloc_cleared_machine_function ();
16522 }
16523 \f
16524 #define INT_P(X) (GET_CODE (X) == CONST_INT && GET_MODE (X) == VOIDmode)
16525
16526 int
16527 extract_MB (rtx op)
16528 {
16529 int i;
16530 unsigned long val = INTVAL (op);
16531
16532 /* If the high bit is zero, the value is the first 1 bit we find
16533 from the left. */
16534 if ((val & 0x80000000) == 0)
16535 {
16536 gcc_assert (val & 0xffffffff);
16537
16538 i = 1;
16539 while (((val <<= 1) & 0x80000000) == 0)
16540 ++i;
16541 return i;
16542 }
16543
16544 /* If the high bit is set and the low bit is not, or the mask is all
16545 1's, the value is zero. */
16546 if ((val & 1) == 0 || (val & 0xffffffff) == 0xffffffff)
16547 return 0;
16548
16549 /* Otherwise we have a wrap-around mask. Look for the first 0 bit
16550 from the right. */
16551 i = 31;
16552 while (((val >>= 1) & 1) != 0)
16553 --i;
16554
16555 return i;
16556 }
16557
16558 int
16559 extract_ME (rtx op)
16560 {
16561 int i;
16562 unsigned long val = INTVAL (op);
16563
16564 /* If the low bit is zero, the value is the first 1 bit we find from
16565 the right. */
16566 if ((val & 1) == 0)
16567 {
16568 gcc_assert (val & 0xffffffff);
16569
16570 i = 30;
16571 while (((val >>= 1) & 1) == 0)
16572 --i;
16573
16574 return i;
16575 }
16576
16577 /* If the low bit is set and the high bit is not, or the mask is all
16578 1's, the value is 31. */
16579 if ((val & 0x80000000) == 0 || (val & 0xffffffff) == 0xffffffff)
16580 return 31;
16581
16582 /* Otherwise we have a wrap-around mask. Look for the first 0 bit
16583 from the left. */
16584 i = 0;
16585 while (((val <<= 1) & 0x80000000) != 0)
16586 ++i;
16587
16588 return i;
16589 }
16590
16591 /* Locate some local-dynamic symbol still in use by this function
16592 so that we can print its name in some tls_ld pattern. */
16593
16594 static const char *
16595 rs6000_get_some_local_dynamic_name (void)
16596 {
16597 rtx insn;
16598
16599 if (cfun->machine->some_ld_name)
16600 return cfun->machine->some_ld_name;
16601
16602 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
16603 if (INSN_P (insn)
16604 && for_each_rtx (&PATTERN (insn),
16605 rs6000_get_some_local_dynamic_name_1, 0))
16606 return cfun->machine->some_ld_name;
16607
16608 gcc_unreachable ();
16609 }
16610
16611 /* Helper function for rs6000_get_some_local_dynamic_name. */
16612
16613 static int
16614 rs6000_get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
16615 {
16616 rtx x = *px;
16617
16618 if (GET_CODE (x) == SYMBOL_REF)
16619 {
16620 const char *str = XSTR (x, 0);
16621 if (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
16622 {
16623 cfun->machine->some_ld_name = str;
16624 return 1;
16625 }
16626 }
16627
16628 return 0;
16629 }
16630
16631 /* Write out a function code label. */
16632
16633 void
16634 rs6000_output_function_entry (FILE *file, const char *fname)
16635 {
16636 if (fname[0] != '.')
16637 {
16638 switch (DEFAULT_ABI)
16639 {
16640 default:
16641 gcc_unreachable ();
16642
16643 case ABI_AIX:
16644 if (DOT_SYMBOLS)
16645 putc ('.', file);
16646 else
16647 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
16648 break;
16649
16650 case ABI_V4:
16651 case ABI_DARWIN:
16652 break;
16653 }
16654 }
16655
16656 RS6000_OUTPUT_BASENAME (file, fname);
16657 }
16658
16659 /* Print an operand. Recognize special options, documented below. */
16660
16661 #if TARGET_ELF
16662 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
16663 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
16664 #else
16665 #define SMALL_DATA_RELOC "sda21"
16666 #define SMALL_DATA_REG 0
16667 #endif
16668
16669 void
16670 print_operand (FILE *file, rtx x, int code)
16671 {
16672 int i;
16673 unsigned HOST_WIDE_INT uval;
16674
16675 switch (code)
16676 {
16677 /* %a is output_address. */
16678
16679 case 'b':
16680 /* If constant, low-order 16 bits of constant, unsigned.
16681 Otherwise, write normally. */
16682 if (INT_P (x))
16683 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0xffff);
16684 else
16685 print_operand (file, x, 0);
16686 return;
16687
16688 case 'B':
16689 /* If the low-order bit is zero, write 'r'; otherwise, write 'l'
16690 for 64-bit mask direction. */
16691 putc (((INTVAL (x) & 1) == 0 ? 'r' : 'l'), file);
16692 return;
16693
16694 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
16695 output_operand. */
16696
16697 case 'D':
16698 /* Like 'J' but get to the GT bit only. */
16699 gcc_assert (REG_P (x));
16700
16701 /* Bit 1 is GT bit. */
16702 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
16703
16704 /* Add one for shift count in rlinm for scc. */
16705 fprintf (file, "%d", i + 1);
16706 return;
16707
16708 case 'E':
16709 /* X is a CR register. Print the number of the EQ bit of the CR */
16710 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
16711 output_operand_lossage ("invalid %%E value");
16712 else
16713 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
16714 return;
16715
16716 case 'f':
16717 /* X is a CR register. Print the shift count needed to move it
16718 to the high-order four bits. */
16719 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
16720 output_operand_lossage ("invalid %%f value");
16721 else
16722 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
16723 return;
16724
16725 case 'F':
16726 /* Similar, but print the count for the rotate in the opposite
16727 direction. */
16728 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
16729 output_operand_lossage ("invalid %%F value");
16730 else
16731 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
16732 return;
16733
16734 case 'G':
16735 /* X is a constant integer. If it is negative, print "m",
16736 otherwise print "z". This is to make an aze or ame insn. */
16737 if (GET_CODE (x) != CONST_INT)
16738 output_operand_lossage ("invalid %%G value");
16739 else if (INTVAL (x) >= 0)
16740 putc ('z', file);
16741 else
16742 putc ('m', file);
16743 return;
16744
16745 case 'h':
16746 /* If constant, output low-order five bits. Otherwise, write
16747 normally. */
16748 if (INT_P (x))
16749 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
16750 else
16751 print_operand (file, x, 0);
16752 return;
16753
16754 case 'H':
16755 /* If constant, output low-order six bits. Otherwise, write
16756 normally. */
16757 if (INT_P (x))
16758 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
16759 else
16760 print_operand (file, x, 0);
16761 return;
16762
16763 case 'I':
16764 /* Print `i' if this is a constant, else nothing. */
16765 if (INT_P (x))
16766 putc ('i', file);
16767 return;
16768
16769 case 'j':
16770 /* Write the bit number in CCR for jump. */
16771 i = ccr_bit (x, 0);
16772 if (i == -1)
16773 output_operand_lossage ("invalid %%j code");
16774 else
16775 fprintf (file, "%d", i);
16776 return;
16777
16778 case 'J':
16779 /* Similar, but add one for shift count in rlinm for scc and pass
16780 scc flag to `ccr_bit'. */
16781 i = ccr_bit (x, 1);
16782 if (i == -1)
16783 output_operand_lossage ("invalid %%J code");
16784 else
16785 /* If we want bit 31, write a shift count of zero, not 32. */
16786 fprintf (file, "%d", i == 31 ? 0 : i + 1);
16787 return;
16788
16789 case 'k':
16790 /* X must be a constant. Write the 1's complement of the
16791 constant. */
16792 if (! INT_P (x))
16793 output_operand_lossage ("invalid %%k value");
16794 else
16795 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
16796 return;
16797
16798 case 'K':
16799 /* X must be a symbolic constant on ELF. Write an
16800 expression suitable for an 'addi' that adds in the low 16
16801 bits of the MEM. */
16802 if (GET_CODE (x) == CONST)
16803 {
16804 if (GET_CODE (XEXP (x, 0)) != PLUS
16805 || (GET_CODE (XEXP (XEXP (x, 0), 0)) != SYMBOL_REF
16806 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
16807 || GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
16808 output_operand_lossage ("invalid %%K value");
16809 }
16810 print_operand_address (file, x);
16811 fputs ("@l", file);
16812 return;
16813
16814 /* %l is output_asm_label. */
16815
16816 case 'L':
16817 /* Write second word of DImode or DFmode reference. Works on register
16818 or non-indexed memory only. */
16819 if (REG_P (x))
16820 fputs (reg_names[REGNO (x) + 1], file);
16821 else if (MEM_P (x))
16822 {
16823 /* Handle possible auto-increment. Since it is pre-increment and
16824 we have already done it, we can just use an offset of word. */
16825 if (GET_CODE (XEXP (x, 0)) == PRE_INC
16826 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
16827 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
16828 UNITS_PER_WORD));
16829 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
16830 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
16831 UNITS_PER_WORD));
16832 else
16833 output_address (XEXP (adjust_address_nv (x, SImode,
16834 UNITS_PER_WORD),
16835 0));
16836
16837 if (small_data_operand (x, GET_MODE (x)))
16838 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
16839 reg_names[SMALL_DATA_REG]);
16840 }
16841 return;
16842
16843 case 'm':
16844 /* MB value for a mask operand. */
16845 if (! mask_operand (x, SImode))
16846 output_operand_lossage ("invalid %%m value");
16847
16848 fprintf (file, "%d", extract_MB (x));
16849 return;
16850
16851 case 'M':
16852 /* ME value for a mask operand. */
16853 if (! mask_operand (x, SImode))
16854 output_operand_lossage ("invalid %%M value");
16855
16856 fprintf (file, "%d", extract_ME (x));
16857 return;
16858
16859 /* %n outputs the negative of its operand. */
16860
16861 case 'N':
16862 /* Write the number of elements in the vector times 4. */
16863 if (GET_CODE (x) != PARALLEL)
16864 output_operand_lossage ("invalid %%N value");
16865 else
16866 fprintf (file, "%d", XVECLEN (x, 0) * 4);
16867 return;
16868
16869 case 'O':
16870 /* Similar, but subtract 1 first. */
16871 if (GET_CODE (x) != PARALLEL)
16872 output_operand_lossage ("invalid %%O value");
16873 else
16874 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
16875 return;
16876
16877 case 'p':
16878 /* X is a CONST_INT that is a power of two. Output the logarithm. */
16879 if (! INT_P (x)
16880 || INTVAL (x) < 0
16881 || (i = exact_log2 (INTVAL (x))) < 0)
16882 output_operand_lossage ("invalid %%p value");
16883 else
16884 fprintf (file, "%d", i);
16885 return;
16886
16887 case 'P':
16888 /* The operand must be an indirect memory reference. The result
16889 is the register name. */
16890 if (GET_CODE (x) != MEM || GET_CODE (XEXP (x, 0)) != REG
16891 || REGNO (XEXP (x, 0)) >= 32)
16892 output_operand_lossage ("invalid %%P value");
16893 else
16894 fputs (reg_names[REGNO (XEXP (x, 0))], file);
16895 return;
16896
16897 case 'q':
16898 /* This outputs the logical code corresponding to a boolean
16899 expression. The expression may have one or both operands
16900 negated (if one, only the first one). For condition register
16901 logical operations, it will also treat the negated
16902 CR codes as NOTs, but not handle NOTs of them. */
16903 {
16904 const char *const *t = 0;
16905 const char *s;
16906 enum rtx_code code = GET_CODE (x);
16907 static const char * const tbl[3][3] = {
16908 { "and", "andc", "nor" },
16909 { "or", "orc", "nand" },
16910 { "xor", "eqv", "xor" } };
16911
16912 if (code == AND)
16913 t = tbl[0];
16914 else if (code == IOR)
16915 t = tbl[1];
16916 else if (code == XOR)
16917 t = tbl[2];
16918 else
16919 output_operand_lossage ("invalid %%q value");
16920
16921 if (GET_CODE (XEXP (x, 0)) != NOT)
16922 s = t[0];
16923 else
16924 {
16925 if (GET_CODE (XEXP (x, 1)) == NOT)
16926 s = t[2];
16927 else
16928 s = t[1];
16929 }
16930
16931 fputs (s, file);
16932 }
16933 return;
16934
16935 case 'Q':
16936 if (! TARGET_MFCRF)
16937 return;
16938 fputc (',', file);
16939 /* FALLTHRU */
16940
16941 case 'R':
16942 /* X is a CR register. Print the mask for `mtcrf'. */
16943 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
16944 output_operand_lossage ("invalid %%R value");
16945 else
16946 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
16947 return;
16948
16949 case 's':
16950 /* Low 5 bits of 32 - value */
16951 if (! INT_P (x))
16952 output_operand_lossage ("invalid %%s value");
16953 else
16954 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
16955 return;
16956
16957 case 'S':
16958 /* PowerPC64 mask position. All 0's is excluded.
16959 CONST_INT 32-bit mask is considered sign-extended so any
16960 transition must occur within the CONST_INT, not on the boundary. */
16961 if (! mask64_operand (x, DImode))
16962 output_operand_lossage ("invalid %%S value");
16963
16964 uval = INTVAL (x);
16965
16966 if (uval & 1) /* Clear Left */
16967 {
16968 #if HOST_BITS_PER_WIDE_INT > 64
16969 uval &= ((unsigned HOST_WIDE_INT) 1 << 64) - 1;
16970 #endif
16971 i = 64;
16972 }
16973 else /* Clear Right */
16974 {
16975 uval = ~uval;
16976 #if HOST_BITS_PER_WIDE_INT > 64
16977 uval &= ((unsigned HOST_WIDE_INT) 1 << 64) - 1;
16978 #endif
16979 i = 63;
16980 }
16981 while (uval != 0)
16982 --i, uval >>= 1;
16983 gcc_assert (i >= 0);
16984 fprintf (file, "%d", i);
16985 return;
16986
16987 case 't':
16988 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
16989 gcc_assert (REG_P (x) && GET_MODE (x) == CCmode);
16990
16991 /* Bit 3 is OV bit. */
16992 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
16993
16994 /* If we want bit 31, write a shift count of zero, not 32. */
16995 fprintf (file, "%d", i == 31 ? 0 : i + 1);
16996 return;
16997
16998 case 'T':
16999 /* Print the symbolic name of a branch target register. */
17000 if (GET_CODE (x) != REG || (REGNO (x) != LR_REGNO
17001 && REGNO (x) != CTR_REGNO))
17002 output_operand_lossage ("invalid %%T value");
17003 else if (REGNO (x) == LR_REGNO)
17004 fputs ("lr", file);
17005 else
17006 fputs ("ctr", file);
17007 return;
17008
17009 case 'u':
17010 /* High-order 16 bits of constant for use in unsigned operand. */
17011 if (! INT_P (x))
17012 output_operand_lossage ("invalid %%u value");
17013 else
17014 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
17015 (INTVAL (x) >> 16) & 0xffff);
17016 return;
17017
17018 case 'v':
17019 /* High-order 16 bits of constant for use in signed operand. */
17020 if (! INT_P (x))
17021 output_operand_lossage ("invalid %%v value");
17022 else
17023 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
17024 (INTVAL (x) >> 16) & 0xffff);
17025 return;
17026
17027 case 'U':
17028 /* Print `u' if this has an auto-increment or auto-decrement. */
17029 if (MEM_P (x)
17030 && (GET_CODE (XEXP (x, 0)) == PRE_INC
17031 || GET_CODE (XEXP (x, 0)) == PRE_DEC
17032 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
17033 putc ('u', file);
17034 return;
17035
17036 case 'V':
17037 /* Print the trap code for this operand. */
17038 switch (GET_CODE (x))
17039 {
17040 case EQ:
17041 fputs ("eq", file); /* 4 */
17042 break;
17043 case NE:
17044 fputs ("ne", file); /* 24 */
17045 break;
17046 case LT:
17047 fputs ("lt", file); /* 16 */
17048 break;
17049 case LE:
17050 fputs ("le", file); /* 20 */
17051 break;
17052 case GT:
17053 fputs ("gt", file); /* 8 */
17054 break;
17055 case GE:
17056 fputs ("ge", file); /* 12 */
17057 break;
17058 case LTU:
17059 fputs ("llt", file); /* 2 */
17060 break;
17061 case LEU:
17062 fputs ("lle", file); /* 6 */
17063 break;
17064 case GTU:
17065 fputs ("lgt", file); /* 1 */
17066 break;
17067 case GEU:
17068 fputs ("lge", file); /* 5 */
17069 break;
17070 default:
17071 gcc_unreachable ();
17072 }
17073 break;
17074
17075 case 'w':
17076 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
17077 normally. */
17078 if (INT_P (x))
17079 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
17080 ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
17081 else
17082 print_operand (file, x, 0);
17083 return;
17084
17085 case 'W':
17086 /* MB value for a PowerPC64 rldic operand. */
17087 i = clz_hwi (INTVAL (x));
17088
17089 fprintf (file, "%d", i);
17090 return;
17091
17092 case 'x':
17093 /* X is a FPR or Altivec register used in a VSX context. */
17094 if (GET_CODE (x) != REG || !VSX_REGNO_P (REGNO (x)))
17095 output_operand_lossage ("invalid %%x value");
17096 else
17097 {
17098 int reg = REGNO (x);
17099 int vsx_reg = (FP_REGNO_P (reg)
17100 ? reg - 32
17101 : reg - FIRST_ALTIVEC_REGNO + 32);
17102
17103 #ifdef TARGET_REGNAMES
17104 if (TARGET_REGNAMES)
17105 fprintf (file, "%%vs%d", vsx_reg);
17106 else
17107 #endif
17108 fprintf (file, "%d", vsx_reg);
17109 }
17110 return;
17111
17112 case 'X':
17113 if (MEM_P (x)
17114 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
17115 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
17116 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
17117 putc ('x', file);
17118 return;
17119
17120 case 'Y':
17121 /* Like 'L', for third word of TImode/PTImode */
17122 if (REG_P (x))
17123 fputs (reg_names[REGNO (x) + 2], file);
17124 else if (MEM_P (x))
17125 {
17126 if (GET_CODE (XEXP (x, 0)) == PRE_INC
17127 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
17128 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 8));
17129 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
17130 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 8));
17131 else
17132 output_address (XEXP (adjust_address_nv (x, SImode, 8), 0));
17133 if (small_data_operand (x, GET_MODE (x)))
17134 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
17135 reg_names[SMALL_DATA_REG]);
17136 }
17137 return;
17138
17139 case 'z':
17140 /* X is a SYMBOL_REF. Write out the name preceded by a
17141 period and without any trailing data in brackets. Used for function
17142 names. If we are configured for System V (or the embedded ABI) on
17143 the PowerPC, do not emit the period, since those systems do not use
17144 TOCs and the like. */
17145 gcc_assert (GET_CODE (x) == SYMBOL_REF);
17146
17147 /* For macho, check to see if we need a stub. */
17148 if (TARGET_MACHO)
17149 {
17150 const char *name = XSTR (x, 0);
17151 #if TARGET_MACHO
17152 if (darwin_emit_branch_islands
17153 && MACHOPIC_INDIRECT
17154 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
17155 name = machopic_indirection_name (x, /*stub_p=*/true);
17156 #endif
17157 assemble_name (file, name);
17158 }
17159 else if (!DOT_SYMBOLS)
17160 assemble_name (file, XSTR (x, 0));
17161 else
17162 rs6000_output_function_entry (file, XSTR (x, 0));
17163 return;
17164
17165 case 'Z':
17166 /* Like 'L', for last word of TImode/PTImode. */
17167 if (REG_P (x))
17168 fputs (reg_names[REGNO (x) + 3], file);
17169 else if (MEM_P (x))
17170 {
17171 if (GET_CODE (XEXP (x, 0)) == PRE_INC
17172 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
17173 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 12));
17174 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
17175 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 12));
17176 else
17177 output_address (XEXP (adjust_address_nv (x, SImode, 12), 0));
17178 if (small_data_operand (x, GET_MODE (x)))
17179 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
17180 reg_names[SMALL_DATA_REG]);
17181 }
17182 return;
17183
17184 /* Print AltiVec or SPE memory operand. */
17185 case 'y':
17186 {
17187 rtx tmp;
17188
17189 gcc_assert (MEM_P (x));
17190
17191 tmp = XEXP (x, 0);
17192
17193 /* Ugly hack because %y is overloaded. */
17194 if ((TARGET_SPE || TARGET_E500_DOUBLE)
17195 && (GET_MODE_SIZE (GET_MODE (x)) == 8
17196 || GET_MODE (x) == TFmode
17197 || GET_MODE (x) == TImode
17198 || GET_MODE (x) == PTImode))
17199 {
17200 /* Handle [reg]. */
17201 if (REG_P (tmp))
17202 {
17203 fprintf (file, "0(%s)", reg_names[REGNO (tmp)]);
17204 break;
17205 }
17206 /* Handle [reg+UIMM]. */
17207 else if (GET_CODE (tmp) == PLUS &&
17208 GET_CODE (XEXP (tmp, 1)) == CONST_INT)
17209 {
17210 int x;
17211
17212 gcc_assert (REG_P (XEXP (tmp, 0)));
17213
17214 x = INTVAL (XEXP (tmp, 1));
17215 fprintf (file, "%d(%s)", x, reg_names[REGNO (XEXP (tmp, 0))]);
17216 break;
17217 }
17218
17219 /* Fall through. Must be [reg+reg]. */
17220 }
17221 if (VECTOR_MEM_ALTIVEC_P (GET_MODE (x))
17222 && GET_CODE (tmp) == AND
17223 && GET_CODE (XEXP (tmp, 1)) == CONST_INT
17224 && INTVAL (XEXP (tmp, 1)) == -16)
17225 tmp = XEXP (tmp, 0);
17226 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
17227 && GET_CODE (tmp) == PRE_MODIFY)
17228 tmp = XEXP (tmp, 1);
17229 if (REG_P (tmp))
17230 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
17231 else
17232 {
17233 if (!GET_CODE (tmp) == PLUS
17234 || !REG_P (XEXP (tmp, 0))
17235 || !REG_P (XEXP (tmp, 1)))
17236 {
17237 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
17238 break;
17239 }
17240
17241 if (REGNO (XEXP (tmp, 0)) == 0)
17242 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
17243 reg_names[ REGNO (XEXP (tmp, 0)) ]);
17244 else
17245 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
17246 reg_names[ REGNO (XEXP (tmp, 1)) ]);
17247 }
17248 break;
17249 }
17250
17251 case 0:
17252 if (REG_P (x))
17253 fprintf (file, "%s", reg_names[REGNO (x)]);
17254 else if (MEM_P (x))
17255 {
17256 /* We need to handle PRE_INC and PRE_DEC here, since we need to
17257 know the width from the mode. */
17258 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
17259 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
17260 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
17261 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
17262 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
17263 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
17264 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
17265 output_address (XEXP (XEXP (x, 0), 1));
17266 else
17267 output_address (XEXP (x, 0));
17268 }
17269 else
17270 {
17271 if (toc_relative_expr_p (x, false))
17272 /* This hack along with a corresponding hack in
17273 rs6000_output_addr_const_extra arranges to output addends
17274 where the assembler expects to find them. eg.
17275 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
17276 without this hack would be output as "x@toc+4". We
17277 want "x+4@toc". */
17278 output_addr_const (file, CONST_CAST_RTX (tocrel_base));
17279 else
17280 output_addr_const (file, x);
17281 }
17282 return;
17283
17284 case '&':
17285 assemble_name (file, rs6000_get_some_local_dynamic_name ());
17286 return;
17287
17288 default:
17289 output_operand_lossage ("invalid %%xn code");
17290 }
17291 }
17292 \f
17293 /* Print the address of an operand. */
17294
17295 void
17296 print_operand_address (FILE *file, rtx x)
17297 {
17298 if (REG_P (x))
17299 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
17300 else if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST
17301 || GET_CODE (x) == LABEL_REF)
17302 {
17303 output_addr_const (file, x);
17304 if (small_data_operand (x, GET_MODE (x)))
17305 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
17306 reg_names[SMALL_DATA_REG]);
17307 else
17308 gcc_assert (!TARGET_TOC);
17309 }
17310 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
17311 && REG_P (XEXP (x, 1)))
17312 {
17313 if (REGNO (XEXP (x, 0)) == 0)
17314 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
17315 reg_names[ REGNO (XEXP (x, 0)) ]);
17316 else
17317 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
17318 reg_names[ REGNO (XEXP (x, 1)) ]);
17319 }
17320 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
17321 && GET_CODE (XEXP (x, 1)) == CONST_INT)
17322 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
17323 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
17324 #if TARGET_MACHO
17325 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
17326 && CONSTANT_P (XEXP (x, 1)))
17327 {
17328 fprintf (file, "lo16(");
17329 output_addr_const (file, XEXP (x, 1));
17330 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
17331 }
17332 #endif
17333 #if TARGET_ELF
17334 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
17335 && CONSTANT_P (XEXP (x, 1)))
17336 {
17337 output_addr_const (file, XEXP (x, 1));
17338 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
17339 }
17340 #endif
17341 else if (toc_relative_expr_p (x, false))
17342 {
17343 /* This hack along with a corresponding hack in
17344 rs6000_output_addr_const_extra arranges to output addends
17345 where the assembler expects to find them. eg.
17346 (lo_sum (reg 9)
17347 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
17348 without this hack would be output as "x@toc+8@l(9)". We
17349 want "x+8@toc@l(9)". */
17350 output_addr_const (file, CONST_CAST_RTX (tocrel_base));
17351 if (GET_CODE (x) == LO_SUM)
17352 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
17353 else
17354 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base, 0, 1))]);
17355 }
17356 else
17357 gcc_unreachable ();
17358 }
17359 \f
17360 /* Implement TARGET_OUTPUT_ADDR_CONST_EXTRA. */
17361
17362 static bool
17363 rs6000_output_addr_const_extra (FILE *file, rtx x)
17364 {
17365 if (GET_CODE (x) == UNSPEC)
17366 switch (XINT (x, 1))
17367 {
17368 case UNSPEC_TOCREL:
17369 gcc_checking_assert (GET_CODE (XVECEXP (x, 0, 0)) == SYMBOL_REF
17370 && REG_P (XVECEXP (x, 0, 1))
17371 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
17372 output_addr_const (file, XVECEXP (x, 0, 0));
17373 if (x == tocrel_base && tocrel_offset != const0_rtx)
17374 {
17375 if (INTVAL (tocrel_offset) >= 0)
17376 fprintf (file, "+");
17377 output_addr_const (file, CONST_CAST_RTX (tocrel_offset));
17378 }
17379 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
17380 {
17381 putc ('-', file);
17382 assemble_name (file, toc_label_name);
17383 }
17384 else if (TARGET_ELF)
17385 fputs ("@toc", file);
17386 return true;
17387
17388 #if TARGET_MACHO
17389 case UNSPEC_MACHOPIC_OFFSET:
17390 output_addr_const (file, XVECEXP (x, 0, 0));
17391 putc ('-', file);
17392 machopic_output_function_base_name (file);
17393 return true;
17394 #endif
17395 }
17396 return false;
17397 }
17398 \f
17399 /* Target hook for assembling integer objects. The PowerPC version has
17400 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
17401 is defined. It also needs to handle DI-mode objects on 64-bit
17402 targets. */
17403
17404 static bool
17405 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
17406 {
17407 #ifdef RELOCATABLE_NEEDS_FIXUP
17408 /* Special handling for SI values. */
17409 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
17410 {
17411 static int recurse = 0;
17412
17413 /* For -mrelocatable, we mark all addresses that need to be fixed up in
17414 the .fixup section. Since the TOC section is already relocated, we
17415 don't need to mark it here. We used to skip the text section, but it
17416 should never be valid for relocated addresses to be placed in the text
17417 section. */
17418 if (TARGET_RELOCATABLE
17419 && in_section != toc_section
17420 && !recurse
17421 && GET_CODE (x) != CONST_INT
17422 && GET_CODE (x) != CONST_DOUBLE
17423 && CONSTANT_P (x))
17424 {
17425 char buf[256];
17426
17427 recurse = 1;
17428 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
17429 fixuplabelno++;
17430 ASM_OUTPUT_LABEL (asm_out_file, buf);
17431 fprintf (asm_out_file, "\t.long\t(");
17432 output_addr_const (asm_out_file, x);
17433 fprintf (asm_out_file, ")@fixup\n");
17434 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
17435 ASM_OUTPUT_ALIGN (asm_out_file, 2);
17436 fprintf (asm_out_file, "\t.long\t");
17437 assemble_name (asm_out_file, buf);
17438 fprintf (asm_out_file, "\n\t.previous\n");
17439 recurse = 0;
17440 return true;
17441 }
17442 /* Remove initial .'s to turn a -mcall-aixdesc function
17443 address into the address of the descriptor, not the function
17444 itself. */
17445 else if (GET_CODE (x) == SYMBOL_REF
17446 && XSTR (x, 0)[0] == '.'
17447 && DEFAULT_ABI == ABI_AIX)
17448 {
17449 const char *name = XSTR (x, 0);
17450 while (*name == '.')
17451 name++;
17452
17453 fprintf (asm_out_file, "\t.long\t%s\n", name);
17454 return true;
17455 }
17456 }
17457 #endif /* RELOCATABLE_NEEDS_FIXUP */
17458 return default_assemble_integer (x, size, aligned_p);
17459 }
17460
17461 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
17462 /* Emit an assembler directive to set symbol visibility for DECL to
17463 VISIBILITY_TYPE. */
17464
17465 static void
17466 rs6000_assemble_visibility (tree decl, int vis)
17467 {
17468 if (TARGET_XCOFF)
17469 return;
17470
17471 /* Functions need to have their entry point symbol visibility set as
17472 well as their descriptor symbol visibility. */
17473 if (DEFAULT_ABI == ABI_AIX
17474 && DOT_SYMBOLS
17475 && TREE_CODE (decl) == FUNCTION_DECL)
17476 {
17477 static const char * const visibility_types[] = {
17478 NULL, "internal", "hidden", "protected"
17479 };
17480
17481 const char *name, *type;
17482
17483 name = ((* targetm.strip_name_encoding)
17484 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
17485 type = visibility_types[vis];
17486
17487 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
17488 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
17489 }
17490 else
17491 default_assemble_visibility (decl, vis);
17492 }
17493 #endif
17494 \f
17495 enum rtx_code
17496 rs6000_reverse_condition (enum machine_mode mode, enum rtx_code code)
17497 {
17498 /* Reversal of FP compares takes care -- an ordered compare
17499 becomes an unordered compare and vice versa. */
17500 if (mode == CCFPmode
17501 && (!flag_finite_math_only
17502 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
17503 || code == UNEQ || code == LTGT))
17504 return reverse_condition_maybe_unordered (code);
17505 else
17506 return reverse_condition (code);
17507 }
17508
17509 /* Generate a compare for CODE. Return a brand-new rtx that
17510 represents the result of the compare. */
17511
17512 static rtx
17513 rs6000_generate_compare (rtx cmp, enum machine_mode mode)
17514 {
17515 enum machine_mode comp_mode;
17516 rtx compare_result;
17517 enum rtx_code code = GET_CODE (cmp);
17518 rtx op0 = XEXP (cmp, 0);
17519 rtx op1 = XEXP (cmp, 1);
17520
17521 if (FLOAT_MODE_P (mode))
17522 comp_mode = CCFPmode;
17523 else if (code == GTU || code == LTU
17524 || code == GEU || code == LEU)
17525 comp_mode = CCUNSmode;
17526 else if ((code == EQ || code == NE)
17527 && unsigned_reg_p (op0)
17528 && (unsigned_reg_p (op1)
17529 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
17530 /* These are unsigned values, perhaps there will be a later
17531 ordering compare that can be shared with this one. */
17532 comp_mode = CCUNSmode;
17533 else
17534 comp_mode = CCmode;
17535
17536 /* If we have an unsigned compare, make sure we don't have a signed value as
17537 an immediate. */
17538 if (comp_mode == CCUNSmode && GET_CODE (op1) == CONST_INT
17539 && INTVAL (op1) < 0)
17540 {
17541 op0 = copy_rtx_if_shared (op0);
17542 op1 = force_reg (GET_MODE (op0), op1);
17543 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
17544 }
17545
17546 /* First, the compare. */
17547 compare_result = gen_reg_rtx (comp_mode);
17548
17549 /* E500 FP compare instructions on the GPRs. Yuck! */
17550 if ((!TARGET_FPRS && TARGET_HARD_FLOAT)
17551 && FLOAT_MODE_P (mode))
17552 {
17553 rtx cmp, or_result, compare_result2;
17554 enum machine_mode op_mode = GET_MODE (op0);
17555 bool reverse_p;
17556
17557 if (op_mode == VOIDmode)
17558 op_mode = GET_MODE (op1);
17559
17560 /* First reverse the condition codes that aren't directly supported. */
17561 switch (code)
17562 {
17563 case NE:
17564 case UNLT:
17565 case UNLE:
17566 case UNGT:
17567 case UNGE:
17568 code = reverse_condition_maybe_unordered (code);
17569 reverse_p = true;
17570 break;
17571
17572 case EQ:
17573 case LT:
17574 case LE:
17575 case GT:
17576 case GE:
17577 reverse_p = false;
17578 break;
17579
17580 default:
17581 gcc_unreachable ();
17582 }
17583
17584 /* The E500 FP compare instructions toggle the GT bit (CR bit 1) only.
17585 This explains the following mess. */
17586
17587 switch (code)
17588 {
17589 case EQ:
17590 switch (op_mode)
17591 {
17592 case SFmode:
17593 cmp = (flag_finite_math_only && !flag_trapping_math)
17594 ? gen_tstsfeq_gpr (compare_result, op0, op1)
17595 : gen_cmpsfeq_gpr (compare_result, op0, op1);
17596 break;
17597
17598 case DFmode:
17599 cmp = (flag_finite_math_only && !flag_trapping_math)
17600 ? gen_tstdfeq_gpr (compare_result, op0, op1)
17601 : gen_cmpdfeq_gpr (compare_result, op0, op1);
17602 break;
17603
17604 case TFmode:
17605 cmp = (flag_finite_math_only && !flag_trapping_math)
17606 ? gen_tsttfeq_gpr (compare_result, op0, op1)
17607 : gen_cmptfeq_gpr (compare_result, op0, op1);
17608 break;
17609
17610 default:
17611 gcc_unreachable ();
17612 }
17613 break;
17614
17615 case GT:
17616 case GE:
17617 switch (op_mode)
17618 {
17619 case SFmode:
17620 cmp = (flag_finite_math_only && !flag_trapping_math)
17621 ? gen_tstsfgt_gpr (compare_result, op0, op1)
17622 : gen_cmpsfgt_gpr (compare_result, op0, op1);
17623 break;
17624
17625 case DFmode:
17626 cmp = (flag_finite_math_only && !flag_trapping_math)
17627 ? gen_tstdfgt_gpr (compare_result, op0, op1)
17628 : gen_cmpdfgt_gpr (compare_result, op0, op1);
17629 break;
17630
17631 case TFmode:
17632 cmp = (flag_finite_math_only && !flag_trapping_math)
17633 ? gen_tsttfgt_gpr (compare_result, op0, op1)
17634 : gen_cmptfgt_gpr (compare_result, op0, op1);
17635 break;
17636
17637 default:
17638 gcc_unreachable ();
17639 }
17640 break;
17641
17642 case LT:
17643 case LE:
17644 switch (op_mode)
17645 {
17646 case SFmode:
17647 cmp = (flag_finite_math_only && !flag_trapping_math)
17648 ? gen_tstsflt_gpr (compare_result, op0, op1)
17649 : gen_cmpsflt_gpr (compare_result, op0, op1);
17650 break;
17651
17652 case DFmode:
17653 cmp = (flag_finite_math_only && !flag_trapping_math)
17654 ? gen_tstdflt_gpr (compare_result, op0, op1)
17655 : gen_cmpdflt_gpr (compare_result, op0, op1);
17656 break;
17657
17658 case TFmode:
17659 cmp = (flag_finite_math_only && !flag_trapping_math)
17660 ? gen_tsttflt_gpr (compare_result, op0, op1)
17661 : gen_cmptflt_gpr (compare_result, op0, op1);
17662 break;
17663
17664 default:
17665 gcc_unreachable ();
17666 }
17667 break;
17668
17669 default:
17670 gcc_unreachable ();
17671 }
17672
17673 /* Synthesize LE and GE from LT/GT || EQ. */
17674 if (code == LE || code == GE)
17675 {
17676 emit_insn (cmp);
17677
17678 compare_result2 = gen_reg_rtx (CCFPmode);
17679
17680 /* Do the EQ. */
17681 switch (op_mode)
17682 {
17683 case SFmode:
17684 cmp = (flag_finite_math_only && !flag_trapping_math)
17685 ? gen_tstsfeq_gpr (compare_result2, op0, op1)
17686 : gen_cmpsfeq_gpr (compare_result2, op0, op1);
17687 break;
17688
17689 case DFmode:
17690 cmp = (flag_finite_math_only && !flag_trapping_math)
17691 ? gen_tstdfeq_gpr (compare_result2, op0, op1)
17692 : gen_cmpdfeq_gpr (compare_result2, op0, op1);
17693 break;
17694
17695 case TFmode:
17696 cmp = (flag_finite_math_only && !flag_trapping_math)
17697 ? gen_tsttfeq_gpr (compare_result2, op0, op1)
17698 : gen_cmptfeq_gpr (compare_result2, op0, op1);
17699 break;
17700
17701 default:
17702 gcc_unreachable ();
17703 }
17704
17705 emit_insn (cmp);
17706
17707 /* OR them together. */
17708 or_result = gen_reg_rtx (CCFPmode);
17709 cmp = gen_e500_cr_ior_compare (or_result, compare_result,
17710 compare_result2);
17711 compare_result = or_result;
17712 }
17713
17714 code = reverse_p ? NE : EQ;
17715
17716 emit_insn (cmp);
17717 }
17718 else
17719 {
17720 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
17721 CLOBBERs to match cmptf_internal2 pattern. */
17722 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
17723 && GET_MODE (op0) == TFmode
17724 && !TARGET_IEEEQUAD
17725 && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_LONG_DOUBLE_128)
17726 emit_insn (gen_rtx_PARALLEL (VOIDmode,
17727 gen_rtvec (10,
17728 gen_rtx_SET (VOIDmode,
17729 compare_result,
17730 gen_rtx_COMPARE (comp_mode, op0, op1)),
17731 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
17732 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
17733 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
17734 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
17735 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
17736 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
17737 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
17738 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
17739 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
17740 else if (GET_CODE (op1) == UNSPEC
17741 && XINT (op1, 1) == UNSPEC_SP_TEST)
17742 {
17743 rtx op1b = XVECEXP (op1, 0, 0);
17744 comp_mode = CCEQmode;
17745 compare_result = gen_reg_rtx (CCEQmode);
17746 if (TARGET_64BIT)
17747 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
17748 else
17749 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
17750 }
17751 else
17752 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
17753 gen_rtx_COMPARE (comp_mode, op0, op1)));
17754 }
17755
17756 /* Some kinds of FP comparisons need an OR operation;
17757 under flag_finite_math_only we don't bother. */
17758 if (FLOAT_MODE_P (mode)
17759 && !flag_finite_math_only
17760 && !(TARGET_HARD_FLOAT && !TARGET_FPRS)
17761 && (code == LE || code == GE
17762 || code == UNEQ || code == LTGT
17763 || code == UNGT || code == UNLT))
17764 {
17765 enum rtx_code or1, or2;
17766 rtx or1_rtx, or2_rtx, compare2_rtx;
17767 rtx or_result = gen_reg_rtx (CCEQmode);
17768
17769 switch (code)
17770 {
17771 case LE: or1 = LT; or2 = EQ; break;
17772 case GE: or1 = GT; or2 = EQ; break;
17773 case UNEQ: or1 = UNORDERED; or2 = EQ; break;
17774 case LTGT: or1 = LT; or2 = GT; break;
17775 case UNGT: or1 = UNORDERED; or2 = GT; break;
17776 case UNLT: or1 = UNORDERED; or2 = LT; break;
17777 default: gcc_unreachable ();
17778 }
17779 validate_condition_mode (or1, comp_mode);
17780 validate_condition_mode (or2, comp_mode);
17781 or1_rtx = gen_rtx_fmt_ee (or1, SImode, compare_result, const0_rtx);
17782 or2_rtx = gen_rtx_fmt_ee (or2, SImode, compare_result, const0_rtx);
17783 compare2_rtx = gen_rtx_COMPARE (CCEQmode,
17784 gen_rtx_IOR (SImode, or1_rtx, or2_rtx),
17785 const_true_rtx);
17786 emit_insn (gen_rtx_SET (VOIDmode, or_result, compare2_rtx));
17787
17788 compare_result = or_result;
17789 code = EQ;
17790 }
17791
17792 validate_condition_mode (code, GET_MODE (compare_result));
17793
17794 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
17795 }
17796
17797
17798 /* Emit the RTL for an sISEL pattern. */
17799
17800 void
17801 rs6000_emit_sISEL (enum machine_mode mode ATTRIBUTE_UNUSED, rtx operands[])
17802 {
17803 rs6000_emit_int_cmove (operands[0], operands[1], const1_rtx, const0_rtx);
17804 }
17805
17806 void
17807 rs6000_emit_sCOND (enum machine_mode mode, rtx operands[])
17808 {
17809 rtx condition_rtx;
17810 enum machine_mode op_mode;
17811 enum rtx_code cond_code;
17812 rtx result = operands[0];
17813
17814 if (TARGET_ISEL && (mode == SImode || mode == DImode))
17815 {
17816 rs6000_emit_sISEL (mode, operands);
17817 return;
17818 }
17819
17820 condition_rtx = rs6000_generate_compare (operands[1], mode);
17821 cond_code = GET_CODE (condition_rtx);
17822
17823 if (FLOAT_MODE_P (mode)
17824 && !TARGET_FPRS && TARGET_HARD_FLOAT)
17825 {
17826 rtx t;
17827
17828 PUT_MODE (condition_rtx, SImode);
17829 t = XEXP (condition_rtx, 0);
17830
17831 gcc_assert (cond_code == NE || cond_code == EQ);
17832
17833 if (cond_code == NE)
17834 emit_insn (gen_e500_flip_gt_bit (t, t));
17835
17836 emit_insn (gen_move_from_CR_gt_bit (result, t));
17837 return;
17838 }
17839
17840 if (cond_code == NE
17841 || cond_code == GE || cond_code == LE
17842 || cond_code == GEU || cond_code == LEU
17843 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
17844 {
17845 rtx not_result = gen_reg_rtx (CCEQmode);
17846 rtx not_op, rev_cond_rtx;
17847 enum machine_mode cc_mode;
17848
17849 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
17850
17851 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
17852 SImode, XEXP (condition_rtx, 0), const0_rtx);
17853 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
17854 emit_insn (gen_rtx_SET (VOIDmode, not_result, not_op));
17855 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
17856 }
17857
17858 op_mode = GET_MODE (XEXP (operands[1], 0));
17859 if (op_mode == VOIDmode)
17860 op_mode = GET_MODE (XEXP (operands[1], 1));
17861
17862 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
17863 {
17864 PUT_MODE (condition_rtx, DImode);
17865 convert_move (result, condition_rtx, 0);
17866 }
17867 else
17868 {
17869 PUT_MODE (condition_rtx, SImode);
17870 emit_insn (gen_rtx_SET (VOIDmode, result, condition_rtx));
17871 }
17872 }
17873
17874 /* Emit a branch of kind CODE to location LOC. */
17875
17876 void
17877 rs6000_emit_cbranch (enum machine_mode mode, rtx operands[])
17878 {
17879 rtx condition_rtx, loc_ref;
17880
17881 condition_rtx = rs6000_generate_compare (operands[0], mode);
17882 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
17883 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
17884 gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx,
17885 loc_ref, pc_rtx)));
17886 }
17887
17888 /* Return the string to output a conditional branch to LABEL, which is
17889 the operand template of the label, or NULL if the branch is really a
17890 conditional return.
17891
17892 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
17893 condition code register and its mode specifies what kind of
17894 comparison we made.
17895
17896 REVERSED is nonzero if we should reverse the sense of the comparison.
17897
17898 INSN is the insn. */
17899
17900 char *
17901 output_cbranch (rtx op, const char *label, int reversed, rtx insn)
17902 {
17903 static char string[64];
17904 enum rtx_code code = GET_CODE (op);
17905 rtx cc_reg = XEXP (op, 0);
17906 enum machine_mode mode = GET_MODE (cc_reg);
17907 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
17908 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
17909 int really_reversed = reversed ^ need_longbranch;
17910 char *s = string;
17911 const char *ccode;
17912 const char *pred;
17913 rtx note;
17914
17915 validate_condition_mode (code, mode);
17916
17917 /* Work out which way this really branches. We could use
17918 reverse_condition_maybe_unordered here always but this
17919 makes the resulting assembler clearer. */
17920 if (really_reversed)
17921 {
17922 /* Reversal of FP compares takes care -- an ordered compare
17923 becomes an unordered compare and vice versa. */
17924 if (mode == CCFPmode)
17925 code = reverse_condition_maybe_unordered (code);
17926 else
17927 code = reverse_condition (code);
17928 }
17929
17930 if ((!TARGET_FPRS && TARGET_HARD_FLOAT) && mode == CCFPmode)
17931 {
17932 /* The efscmp/tst* instructions twiddle bit 2, which maps nicely
17933 to the GT bit. */
17934 switch (code)
17935 {
17936 case EQ:
17937 /* Opposite of GT. */
17938 code = GT;
17939 break;
17940
17941 case NE:
17942 code = UNLE;
17943 break;
17944
17945 default:
17946 gcc_unreachable ();
17947 }
17948 }
17949
17950 switch (code)
17951 {
17952 /* Not all of these are actually distinct opcodes, but
17953 we distinguish them for clarity of the resulting assembler. */
17954 case NE: case LTGT:
17955 ccode = "ne"; break;
17956 case EQ: case UNEQ:
17957 ccode = "eq"; break;
17958 case GE: case GEU:
17959 ccode = "ge"; break;
17960 case GT: case GTU: case UNGT:
17961 ccode = "gt"; break;
17962 case LE: case LEU:
17963 ccode = "le"; break;
17964 case LT: case LTU: case UNLT:
17965 ccode = "lt"; break;
17966 case UNORDERED: ccode = "un"; break;
17967 case ORDERED: ccode = "nu"; break;
17968 case UNGE: ccode = "nl"; break;
17969 case UNLE: ccode = "ng"; break;
17970 default:
17971 gcc_unreachable ();
17972 }
17973
17974 /* Maybe we have a guess as to how likely the branch is. */
17975 pred = "";
17976 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
17977 if (note != NULL_RTX)
17978 {
17979 /* PROB is the difference from 50%. */
17980 int prob = XINT (note, 0) - REG_BR_PROB_BASE / 2;
17981
17982 /* Only hint for highly probable/improbable branches on newer
17983 cpus as static prediction overrides processor dynamic
17984 prediction. For older cpus we may as well always hint, but
17985 assume not taken for branches that are very close to 50% as a
17986 mispredicted taken branch is more expensive than a
17987 mispredicted not-taken branch. */
17988 if (rs6000_always_hint
17989 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
17990 && br_prob_note_reliable_p (note)))
17991 {
17992 if (abs (prob) > REG_BR_PROB_BASE / 20
17993 && ((prob > 0) ^ need_longbranch))
17994 pred = "+";
17995 else
17996 pred = "-";
17997 }
17998 }
17999
18000 if (label == NULL)
18001 s += sprintf (s, "b%slr%s ", ccode, pred);
18002 else
18003 s += sprintf (s, "b%s%s ", ccode, pred);
18004
18005 /* We need to escape any '%' characters in the reg_names string.
18006 Assume they'd only be the first character.... */
18007 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
18008 *s++ = '%';
18009 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
18010
18011 if (label != NULL)
18012 {
18013 /* If the branch distance was too far, we may have to use an
18014 unconditional branch to go the distance. */
18015 if (need_longbranch)
18016 s += sprintf (s, ",$+8\n\tb %s", label);
18017 else
18018 s += sprintf (s, ",%s", label);
18019 }
18020
18021 return string;
18022 }
18023
18024 /* Return the string to flip the GT bit on a CR. */
18025 char *
18026 output_e500_flip_gt_bit (rtx dst, rtx src)
18027 {
18028 static char string[64];
18029 int a, b;
18030
18031 gcc_assert (GET_CODE (dst) == REG && CR_REGNO_P (REGNO (dst))
18032 && GET_CODE (src) == REG && CR_REGNO_P (REGNO (src)));
18033
18034 /* GT bit. */
18035 a = 4 * (REGNO (dst) - CR0_REGNO) + 1;
18036 b = 4 * (REGNO (src) - CR0_REGNO) + 1;
18037
18038 sprintf (string, "crnot %d,%d", a, b);
18039 return string;
18040 }
18041
18042 /* Return insn for VSX or Altivec comparisons. */
18043
18044 static rtx
18045 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
18046 {
18047 rtx mask;
18048 enum machine_mode mode = GET_MODE (op0);
18049
18050 switch (code)
18051 {
18052 default:
18053 break;
18054
18055 case GE:
18056 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
18057 return NULL_RTX;
18058
18059 case EQ:
18060 case GT:
18061 case GTU:
18062 case ORDERED:
18063 case UNORDERED:
18064 case UNEQ:
18065 case LTGT:
18066 mask = gen_reg_rtx (mode);
18067 emit_insn (gen_rtx_SET (VOIDmode,
18068 mask,
18069 gen_rtx_fmt_ee (code, mode, op0, op1)));
18070 return mask;
18071 }
18072
18073 return NULL_RTX;
18074 }
18075
18076 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
18077 DMODE is expected destination mode. This is a recursive function. */
18078
18079 static rtx
18080 rs6000_emit_vector_compare (enum rtx_code rcode,
18081 rtx op0, rtx op1,
18082 enum machine_mode dmode)
18083 {
18084 rtx mask;
18085 bool swap_operands = false;
18086 bool try_again = false;
18087
18088 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
18089 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
18090
18091 /* See if the comparison works as is. */
18092 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
18093 if (mask)
18094 return mask;
18095
18096 switch (rcode)
18097 {
18098 case LT:
18099 rcode = GT;
18100 swap_operands = true;
18101 try_again = true;
18102 break;
18103 case LTU:
18104 rcode = GTU;
18105 swap_operands = true;
18106 try_again = true;
18107 break;
18108 case NE:
18109 case UNLE:
18110 case UNLT:
18111 case UNGE:
18112 case UNGT:
18113 /* Invert condition and try again.
18114 e.g., A != B becomes ~(A==B). */
18115 {
18116 enum rtx_code rev_code;
18117 enum insn_code nor_code;
18118 rtx mask2;
18119
18120 rev_code = reverse_condition_maybe_unordered (rcode);
18121 if (rev_code == UNKNOWN)
18122 return NULL_RTX;
18123
18124 nor_code = optab_handler (one_cmpl_optab, dmode);
18125 if (nor_code == CODE_FOR_nothing)
18126 return NULL_RTX;
18127
18128 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
18129 if (!mask2)
18130 return NULL_RTX;
18131
18132 mask = gen_reg_rtx (dmode);
18133 emit_insn (GEN_FCN (nor_code) (mask, mask2));
18134 return mask;
18135 }
18136 break;
18137 case GE:
18138 case GEU:
18139 case LE:
18140 case LEU:
18141 /* Try GT/GTU/LT/LTU OR EQ */
18142 {
18143 rtx c_rtx, eq_rtx;
18144 enum insn_code ior_code;
18145 enum rtx_code new_code;
18146
18147 switch (rcode)
18148 {
18149 case GE:
18150 new_code = GT;
18151 break;
18152
18153 case GEU:
18154 new_code = GTU;
18155 break;
18156
18157 case LE:
18158 new_code = LT;
18159 break;
18160
18161 case LEU:
18162 new_code = LTU;
18163 break;
18164
18165 default:
18166 gcc_unreachable ();
18167 }
18168
18169 ior_code = optab_handler (ior_optab, dmode);
18170 if (ior_code == CODE_FOR_nothing)
18171 return NULL_RTX;
18172
18173 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
18174 if (!c_rtx)
18175 return NULL_RTX;
18176
18177 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
18178 if (!eq_rtx)
18179 return NULL_RTX;
18180
18181 mask = gen_reg_rtx (dmode);
18182 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
18183 return mask;
18184 }
18185 break;
18186 default:
18187 return NULL_RTX;
18188 }
18189
18190 if (try_again)
18191 {
18192 if (swap_operands)
18193 {
18194 rtx tmp;
18195 tmp = op0;
18196 op0 = op1;
18197 op1 = tmp;
18198 }
18199
18200 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
18201 if (mask)
18202 return mask;
18203 }
18204
18205 /* You only get two chances. */
18206 return NULL_RTX;
18207 }
18208
18209 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
18210 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
18211 operands for the relation operation COND. */
18212
18213 int
18214 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
18215 rtx cond, rtx cc_op0, rtx cc_op1)
18216 {
18217 enum machine_mode dest_mode = GET_MODE (dest);
18218 enum machine_mode mask_mode = GET_MODE (cc_op0);
18219 enum rtx_code rcode = GET_CODE (cond);
18220 enum machine_mode cc_mode = CCmode;
18221 rtx mask;
18222 rtx cond2;
18223 rtx tmp;
18224 bool invert_move = false;
18225
18226 if (VECTOR_UNIT_NONE_P (dest_mode))
18227 return 0;
18228
18229 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
18230 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
18231
18232 switch (rcode)
18233 {
18234 /* Swap operands if we can, and fall back to doing the operation as
18235 specified, and doing a NOR to invert the test. */
18236 case NE:
18237 case UNLE:
18238 case UNLT:
18239 case UNGE:
18240 case UNGT:
18241 /* Invert condition and try again.
18242 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
18243 invert_move = true;
18244 rcode = reverse_condition_maybe_unordered (rcode);
18245 if (rcode == UNKNOWN)
18246 return 0;
18247 break;
18248
18249 /* Mark unsigned tests with CCUNSmode. */
18250 case GTU:
18251 case GEU:
18252 case LTU:
18253 case LEU:
18254 cc_mode = CCUNSmode;
18255 break;
18256
18257 default:
18258 break;
18259 }
18260
18261 /* Get the vector mask for the given relational operations. */
18262 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
18263
18264 if (!mask)
18265 return 0;
18266
18267 if (invert_move)
18268 {
18269 tmp = op_true;
18270 op_true = op_false;
18271 op_false = tmp;
18272 }
18273
18274 cond2 = gen_rtx_fmt_ee (NE, cc_mode, gen_lowpart (dest_mode, mask),
18275 CONST0_RTX (dest_mode));
18276 emit_insn (gen_rtx_SET (VOIDmode,
18277 dest,
18278 gen_rtx_IF_THEN_ELSE (dest_mode,
18279 cond2,
18280 op_true,
18281 op_false)));
18282 return 1;
18283 }
18284
18285 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
18286 operands of the last comparison is nonzero/true, FALSE_COND if it
18287 is zero/false. Return 0 if the hardware has no such operation. */
18288
18289 int
18290 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
18291 {
18292 enum rtx_code code = GET_CODE (op);
18293 rtx op0 = XEXP (op, 0);
18294 rtx op1 = XEXP (op, 1);
18295 REAL_VALUE_TYPE c1;
18296 enum machine_mode compare_mode = GET_MODE (op0);
18297 enum machine_mode result_mode = GET_MODE (dest);
18298 rtx temp;
18299 bool is_against_zero;
18300
18301 /* These modes should always match. */
18302 if (GET_MODE (op1) != compare_mode
18303 /* In the isel case however, we can use a compare immediate, so
18304 op1 may be a small constant. */
18305 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
18306 return 0;
18307 if (GET_MODE (true_cond) != result_mode)
18308 return 0;
18309 if (GET_MODE (false_cond) != result_mode)
18310 return 0;
18311
18312 /* Don't allow using floating point comparisons for integer results for
18313 now. */
18314 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
18315 return 0;
18316
18317 /* First, work out if the hardware can do this at all, or
18318 if it's too slow.... */
18319 if (!FLOAT_MODE_P (compare_mode))
18320 {
18321 if (TARGET_ISEL)
18322 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
18323 return 0;
18324 }
18325 else if (TARGET_HARD_FLOAT && !TARGET_FPRS
18326 && SCALAR_FLOAT_MODE_P (compare_mode))
18327 return 0;
18328
18329 is_against_zero = op1 == CONST0_RTX (compare_mode);
18330
18331 /* A floating-point subtract might overflow, underflow, or produce
18332 an inexact result, thus changing the floating-point flags, so it
18333 can't be generated if we care about that. It's safe if one side
18334 of the construct is zero, since then no subtract will be
18335 generated. */
18336 if (SCALAR_FLOAT_MODE_P (compare_mode)
18337 && flag_trapping_math && ! is_against_zero)
18338 return 0;
18339
18340 /* Eliminate half of the comparisons by switching operands, this
18341 makes the remaining code simpler. */
18342 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
18343 || code == LTGT || code == LT || code == UNLE)
18344 {
18345 code = reverse_condition_maybe_unordered (code);
18346 temp = true_cond;
18347 true_cond = false_cond;
18348 false_cond = temp;
18349 }
18350
18351 /* UNEQ and LTGT take four instructions for a comparison with zero,
18352 it'll probably be faster to use a branch here too. */
18353 if (code == UNEQ && HONOR_NANS (compare_mode))
18354 return 0;
18355
18356 if (GET_CODE (op1) == CONST_DOUBLE)
18357 REAL_VALUE_FROM_CONST_DOUBLE (c1, op1);
18358
18359 /* We're going to try to implement comparisons by performing
18360 a subtract, then comparing against zero. Unfortunately,
18361 Inf - Inf is NaN which is not zero, and so if we don't
18362 know that the operand is finite and the comparison
18363 would treat EQ different to UNORDERED, we can't do it. */
18364 if (HONOR_INFINITIES (compare_mode)
18365 && code != GT && code != UNGE
18366 && (GET_CODE (op1) != CONST_DOUBLE || real_isinf (&c1))
18367 /* Constructs of the form (a OP b ? a : b) are safe. */
18368 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
18369 || (! rtx_equal_p (op0, true_cond)
18370 && ! rtx_equal_p (op1, true_cond))))
18371 return 0;
18372
18373 /* At this point we know we can use fsel. */
18374
18375 /* Reduce the comparison to a comparison against zero. */
18376 if (! is_against_zero)
18377 {
18378 temp = gen_reg_rtx (compare_mode);
18379 emit_insn (gen_rtx_SET (VOIDmode, temp,
18380 gen_rtx_MINUS (compare_mode, op0, op1)));
18381 op0 = temp;
18382 op1 = CONST0_RTX (compare_mode);
18383 }
18384
18385 /* If we don't care about NaNs we can reduce some of the comparisons
18386 down to faster ones. */
18387 if (! HONOR_NANS (compare_mode))
18388 switch (code)
18389 {
18390 case GT:
18391 code = LE;
18392 temp = true_cond;
18393 true_cond = false_cond;
18394 false_cond = temp;
18395 break;
18396 case UNGE:
18397 code = GE;
18398 break;
18399 case UNEQ:
18400 code = EQ;
18401 break;
18402 default:
18403 break;
18404 }
18405
18406 /* Now, reduce everything down to a GE. */
18407 switch (code)
18408 {
18409 case GE:
18410 break;
18411
18412 case LE:
18413 temp = gen_reg_rtx (compare_mode);
18414 emit_insn (gen_rtx_SET (VOIDmode, temp, gen_rtx_NEG (compare_mode, op0)));
18415 op0 = temp;
18416 break;
18417
18418 case ORDERED:
18419 temp = gen_reg_rtx (compare_mode);
18420 emit_insn (gen_rtx_SET (VOIDmode, temp, gen_rtx_ABS (compare_mode, op0)));
18421 op0 = temp;
18422 break;
18423
18424 case EQ:
18425 temp = gen_reg_rtx (compare_mode);
18426 emit_insn (gen_rtx_SET (VOIDmode, temp,
18427 gen_rtx_NEG (compare_mode,
18428 gen_rtx_ABS (compare_mode, op0))));
18429 op0 = temp;
18430 break;
18431
18432 case UNGE:
18433 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
18434 temp = gen_reg_rtx (result_mode);
18435 emit_insn (gen_rtx_SET (VOIDmode, temp,
18436 gen_rtx_IF_THEN_ELSE (result_mode,
18437 gen_rtx_GE (VOIDmode,
18438 op0, op1),
18439 true_cond, false_cond)));
18440 false_cond = true_cond;
18441 true_cond = temp;
18442
18443 temp = gen_reg_rtx (compare_mode);
18444 emit_insn (gen_rtx_SET (VOIDmode, temp, gen_rtx_NEG (compare_mode, op0)));
18445 op0 = temp;
18446 break;
18447
18448 case GT:
18449 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
18450 temp = gen_reg_rtx (result_mode);
18451 emit_insn (gen_rtx_SET (VOIDmode, temp,
18452 gen_rtx_IF_THEN_ELSE (result_mode,
18453 gen_rtx_GE (VOIDmode,
18454 op0, op1),
18455 true_cond, false_cond)));
18456 true_cond = false_cond;
18457 false_cond = temp;
18458
18459 temp = gen_reg_rtx (compare_mode);
18460 emit_insn (gen_rtx_SET (VOIDmode, temp, gen_rtx_NEG (compare_mode, op0)));
18461 op0 = temp;
18462 break;
18463
18464 default:
18465 gcc_unreachable ();
18466 }
18467
18468 emit_insn (gen_rtx_SET (VOIDmode, dest,
18469 gen_rtx_IF_THEN_ELSE (result_mode,
18470 gen_rtx_GE (VOIDmode,
18471 op0, op1),
18472 true_cond, false_cond)));
18473 return 1;
18474 }
18475
18476 /* Same as above, but for ints (isel). */
18477
18478 static int
18479 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
18480 {
18481 rtx condition_rtx, cr;
18482 enum machine_mode mode = GET_MODE (dest);
18483 enum rtx_code cond_code;
18484 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
18485 bool signedp;
18486
18487 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
18488 return 0;
18489
18490 /* We still have to do the compare, because isel doesn't do a
18491 compare, it just looks at the CRx bits set by a previous compare
18492 instruction. */
18493 condition_rtx = rs6000_generate_compare (op, mode);
18494 cond_code = GET_CODE (condition_rtx);
18495 cr = XEXP (condition_rtx, 0);
18496 signedp = GET_MODE (cr) == CCmode;
18497
18498 isel_func = (mode == SImode
18499 ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si)
18500 : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di));
18501
18502 switch (cond_code)
18503 {
18504 case LT: case GT: case LTU: case GTU: case EQ:
18505 /* isel handles these directly. */
18506 break;
18507
18508 default:
18509 /* We need to swap the sense of the comparison. */
18510 {
18511 rtx t = true_cond;
18512 true_cond = false_cond;
18513 false_cond = t;
18514 PUT_CODE (condition_rtx, reverse_condition (cond_code));
18515 }
18516 break;
18517 }
18518
18519 false_cond = force_reg (mode, false_cond);
18520 if (true_cond != const0_rtx)
18521 true_cond = force_reg (mode, true_cond);
18522
18523 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
18524
18525 return 1;
18526 }
18527
18528 const char *
18529 output_isel (rtx *operands)
18530 {
18531 enum rtx_code code;
18532
18533 code = GET_CODE (operands[1]);
18534
18535 if (code == GE || code == GEU || code == LE || code == LEU || code == NE)
18536 {
18537 gcc_assert (GET_CODE (operands[2]) == REG
18538 && GET_CODE (operands[3]) == REG);
18539 PUT_CODE (operands[1], reverse_condition (code));
18540 return "isel %0,%3,%2,%j1";
18541 }
18542
18543 return "isel %0,%2,%3,%j1";
18544 }
18545
18546 void
18547 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
18548 {
18549 enum machine_mode mode = GET_MODE (op0);
18550 enum rtx_code c;
18551 rtx target;
18552
18553 /* VSX/altivec have direct min/max insns. */
18554 if ((code == SMAX || code == SMIN)
18555 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
18556 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))))
18557 {
18558 emit_insn (gen_rtx_SET (VOIDmode,
18559 dest,
18560 gen_rtx_fmt_ee (code, mode, op0, op1)));
18561 return;
18562 }
18563
18564 if (code == SMAX || code == SMIN)
18565 c = GE;
18566 else
18567 c = GEU;
18568
18569 if (code == SMAX || code == UMAX)
18570 target = emit_conditional_move (dest, c, op0, op1, mode,
18571 op0, op1, mode, 0);
18572 else
18573 target = emit_conditional_move (dest, c, op0, op1, mode,
18574 op1, op0, mode, 0);
18575 gcc_assert (target);
18576 if (target != dest)
18577 emit_move_insn (dest, target);
18578 }
18579
18580 /* A subroutine of the atomic operation splitters. Jump to LABEL if
18581 COND is true. Mark the jump as unlikely to be taken. */
18582
18583 static void
18584 emit_unlikely_jump (rtx cond, rtx label)
18585 {
18586 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
18587 rtx x;
18588
18589 x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
18590 x = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, x));
18591 add_int_reg_note (x, REG_BR_PROB, very_unlikely);
18592 }
18593
18594 /* A subroutine of the atomic operation splitters. Emit a load-locked
18595 instruction in MODE. For QI/HImode, possibly use a pattern than includes
18596 the zero_extend operation. */
18597
18598 static void
18599 emit_load_locked (enum machine_mode mode, rtx reg, rtx mem)
18600 {
18601 rtx (*fn) (rtx, rtx) = NULL;
18602
18603 switch (mode)
18604 {
18605 case QImode:
18606 fn = gen_load_lockedqi;
18607 break;
18608 case HImode:
18609 fn = gen_load_lockedhi;
18610 break;
18611 case SImode:
18612 if (GET_MODE (mem) == QImode)
18613 fn = gen_load_lockedqi_si;
18614 else if (GET_MODE (mem) == HImode)
18615 fn = gen_load_lockedhi_si;
18616 else
18617 fn = gen_load_lockedsi;
18618 break;
18619 case DImode:
18620 fn = gen_load_lockeddi;
18621 break;
18622 case TImode:
18623 fn = gen_load_lockedti;
18624 break;
18625 default:
18626 gcc_unreachable ();
18627 }
18628 emit_insn (fn (reg, mem));
18629 }
18630
18631 /* A subroutine of the atomic operation splitters. Emit a store-conditional
18632 instruction in MODE. */
18633
18634 static void
18635 emit_store_conditional (enum machine_mode mode, rtx res, rtx mem, rtx val)
18636 {
18637 rtx (*fn) (rtx, rtx, rtx) = NULL;
18638
18639 switch (mode)
18640 {
18641 case QImode:
18642 fn = gen_store_conditionalqi;
18643 break;
18644 case HImode:
18645 fn = gen_store_conditionalhi;
18646 break;
18647 case SImode:
18648 fn = gen_store_conditionalsi;
18649 break;
18650 case DImode:
18651 fn = gen_store_conditionaldi;
18652 break;
18653 case TImode:
18654 fn = gen_store_conditionalti;
18655 break;
18656 default:
18657 gcc_unreachable ();
18658 }
18659
18660 /* Emit sync before stwcx. to address PPC405 Erratum. */
18661 if (PPC405_ERRATUM77)
18662 emit_insn (gen_hwsync ());
18663
18664 emit_insn (fn (res, mem, val));
18665 }
18666
18667 /* Expand barriers before and after a load_locked/store_cond sequence. */
18668
18669 static rtx
18670 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
18671 {
18672 rtx addr = XEXP (mem, 0);
18673 int strict_p = (reload_in_progress || reload_completed);
18674
18675 if (!legitimate_indirect_address_p (addr, strict_p)
18676 && !legitimate_indexed_address_p (addr, strict_p))
18677 {
18678 addr = force_reg (Pmode, addr);
18679 mem = replace_equiv_address_nv (mem, addr);
18680 }
18681
18682 switch (model)
18683 {
18684 case MEMMODEL_RELAXED:
18685 case MEMMODEL_CONSUME:
18686 case MEMMODEL_ACQUIRE:
18687 break;
18688 case MEMMODEL_RELEASE:
18689 case MEMMODEL_ACQ_REL:
18690 emit_insn (gen_lwsync ());
18691 break;
18692 case MEMMODEL_SEQ_CST:
18693 emit_insn (gen_hwsync ());
18694 break;
18695 default:
18696 gcc_unreachable ();
18697 }
18698 return mem;
18699 }
18700
18701 static void
18702 rs6000_post_atomic_barrier (enum memmodel model)
18703 {
18704 switch (model)
18705 {
18706 case MEMMODEL_RELAXED:
18707 case MEMMODEL_CONSUME:
18708 case MEMMODEL_RELEASE:
18709 break;
18710 case MEMMODEL_ACQUIRE:
18711 case MEMMODEL_ACQ_REL:
18712 case MEMMODEL_SEQ_CST:
18713 emit_insn (gen_isync ());
18714 break;
18715 default:
18716 gcc_unreachable ();
18717 }
18718 }
18719
18720 /* A subroutine of the various atomic expanders. For sub-word operations,
18721 we must adjust things to operate on SImode. Given the original MEM,
18722 return a new aligned memory. Also build and return the quantities by
18723 which to shift and mask. */
18724
18725 static rtx
18726 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
18727 {
18728 rtx addr, align, shift, mask, mem;
18729 HOST_WIDE_INT shift_mask;
18730 enum machine_mode mode = GET_MODE (orig_mem);
18731
18732 /* For smaller modes, we have to implement this via SImode. */
18733 shift_mask = (mode == QImode ? 0x18 : 0x10);
18734
18735 addr = XEXP (orig_mem, 0);
18736 addr = force_reg (GET_MODE (addr), addr);
18737
18738 /* Aligned memory containing subword. Generate a new memory. We
18739 do not want any of the existing MEM_ATTR data, as we're now
18740 accessing memory outside the original object. */
18741 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
18742 NULL_RTX, 1, OPTAB_LIB_WIDEN);
18743 mem = gen_rtx_MEM (SImode, align);
18744 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
18745 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
18746 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
18747
18748 /* Shift amount for subword relative to aligned word. */
18749 shift = gen_reg_rtx (SImode);
18750 addr = gen_lowpart (SImode, addr);
18751 emit_insn (gen_rlwinm (shift, addr, GEN_INT (3), GEN_INT (shift_mask)));
18752 if (WORDS_BIG_ENDIAN)
18753 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
18754 shift, 1, OPTAB_LIB_WIDEN);
18755 *pshift = shift;
18756
18757 /* Mask for insertion. */
18758 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
18759 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
18760 *pmask = mask;
18761
18762 return mem;
18763 }
18764
18765 /* A subroutine of the various atomic expanders. For sub-word operands,
18766 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
18767
18768 static rtx
18769 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
18770 {
18771 rtx x;
18772
18773 x = gen_reg_rtx (SImode);
18774 emit_insn (gen_rtx_SET (VOIDmode, x,
18775 gen_rtx_AND (SImode,
18776 gen_rtx_NOT (SImode, mask),
18777 oldval)));
18778
18779 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
18780
18781 return x;
18782 }
18783
18784 /* A subroutine of the various atomic expanders. For sub-word operands,
18785 extract WIDE to NARROW via SHIFT. */
18786
18787 static void
18788 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
18789 {
18790 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
18791 wide, 1, OPTAB_LIB_WIDEN);
18792 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
18793 }
18794
18795 /* Expand an atomic compare and swap operation. */
18796
18797 void
18798 rs6000_expand_atomic_compare_and_swap (rtx operands[])
18799 {
18800 rtx boolval, retval, mem, oldval, newval, cond;
18801 rtx label1, label2, x, mask, shift;
18802 enum machine_mode mode, orig_mode;
18803 enum memmodel mod_s, mod_f;
18804 bool is_weak;
18805
18806 boolval = operands[0];
18807 retval = operands[1];
18808 mem = operands[2];
18809 oldval = operands[3];
18810 newval = operands[4];
18811 is_weak = (INTVAL (operands[5]) != 0);
18812 mod_s = (enum memmodel) INTVAL (operands[6]);
18813 mod_f = (enum memmodel) INTVAL (operands[7]);
18814 orig_mode = mode = GET_MODE (mem);
18815
18816 mask = shift = NULL_RTX;
18817 if (mode == QImode || mode == HImode)
18818 {
18819 /* Before power8, we didn't have access to lbarx/lharx, so generate a
18820 lwarx and shift/mask operations. With power8, we need to do the
18821 comparison in SImode, but the store is still done in QI/HImode. */
18822 oldval = convert_modes (SImode, mode, oldval, 1);
18823
18824 if (!TARGET_SYNC_HI_QI)
18825 {
18826 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
18827
18828 /* Shift and mask OLDVAL into position with the word. */
18829 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
18830 NULL_RTX, 1, OPTAB_LIB_WIDEN);
18831
18832 /* Shift and mask NEWVAL into position within the word. */
18833 newval = convert_modes (SImode, mode, newval, 1);
18834 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
18835 NULL_RTX, 1, OPTAB_LIB_WIDEN);
18836 }
18837
18838 /* Prepare to adjust the return value. */
18839 retval = gen_reg_rtx (SImode);
18840 mode = SImode;
18841 }
18842 else if (reg_overlap_mentioned_p (retval, oldval))
18843 oldval = copy_to_reg (oldval);
18844
18845 mem = rs6000_pre_atomic_barrier (mem, mod_s);
18846
18847 label1 = NULL_RTX;
18848 if (!is_weak)
18849 {
18850 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
18851 emit_label (XEXP (label1, 0));
18852 }
18853 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
18854
18855 emit_load_locked (mode, retval, mem);
18856
18857 x = retval;
18858 if (mask)
18859 {
18860 x = expand_simple_binop (SImode, AND, retval, mask,
18861 NULL_RTX, 1, OPTAB_LIB_WIDEN);
18862 }
18863
18864 cond = gen_reg_rtx (CCmode);
18865 /* If we have TImode, synthesize a comparison. */
18866 if (mode != TImode)
18867 x = gen_rtx_COMPARE (CCmode, x, oldval);
18868 else
18869 {
18870 rtx xor1_result = gen_reg_rtx (DImode);
18871 rtx xor2_result = gen_reg_rtx (DImode);
18872 rtx or_result = gen_reg_rtx (DImode);
18873 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
18874 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
18875 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
18876 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
18877
18878 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
18879 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
18880 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
18881 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
18882 }
18883
18884 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
18885
18886 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
18887 emit_unlikely_jump (x, label2);
18888
18889 x = newval;
18890 if (mask)
18891 x = rs6000_mask_atomic_subword (retval, newval, mask);
18892
18893 emit_store_conditional (orig_mode, cond, mem, x);
18894
18895 if (!is_weak)
18896 {
18897 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
18898 emit_unlikely_jump (x, label1);
18899 }
18900
18901 if (mod_f != MEMMODEL_RELAXED)
18902 emit_label (XEXP (label2, 0));
18903
18904 rs6000_post_atomic_barrier (mod_s);
18905
18906 if (mod_f == MEMMODEL_RELAXED)
18907 emit_label (XEXP (label2, 0));
18908
18909 if (shift)
18910 rs6000_finish_atomic_subword (operands[1], retval, shift);
18911 else if (mode != GET_MODE (operands[1]))
18912 convert_move (operands[1], retval, 1);
18913
18914 /* In all cases, CR0 contains EQ on success, and NE on failure. */
18915 x = gen_rtx_EQ (SImode, cond, const0_rtx);
18916 emit_insn (gen_rtx_SET (VOIDmode, boolval, x));
18917 }
18918
18919 /* Expand an atomic exchange operation. */
18920
18921 void
18922 rs6000_expand_atomic_exchange (rtx operands[])
18923 {
18924 rtx retval, mem, val, cond;
18925 enum machine_mode mode;
18926 enum memmodel model;
18927 rtx label, x, mask, shift;
18928
18929 retval = operands[0];
18930 mem = operands[1];
18931 val = operands[2];
18932 model = (enum memmodel) INTVAL (operands[3]);
18933 mode = GET_MODE (mem);
18934
18935 mask = shift = NULL_RTX;
18936 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
18937 {
18938 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
18939
18940 /* Shift and mask VAL into position with the word. */
18941 val = convert_modes (SImode, mode, val, 1);
18942 val = expand_simple_binop (SImode, ASHIFT, val, shift,
18943 NULL_RTX, 1, OPTAB_LIB_WIDEN);
18944
18945 /* Prepare to adjust the return value. */
18946 retval = gen_reg_rtx (SImode);
18947 mode = SImode;
18948 }
18949
18950 mem = rs6000_pre_atomic_barrier (mem, model);
18951
18952 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
18953 emit_label (XEXP (label, 0));
18954
18955 emit_load_locked (mode, retval, mem);
18956
18957 x = val;
18958 if (mask)
18959 x = rs6000_mask_atomic_subword (retval, val, mask);
18960
18961 cond = gen_reg_rtx (CCmode);
18962 emit_store_conditional (mode, cond, mem, x);
18963
18964 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
18965 emit_unlikely_jump (x, label);
18966
18967 rs6000_post_atomic_barrier (model);
18968
18969 if (shift)
18970 rs6000_finish_atomic_subword (operands[0], retval, shift);
18971 }
18972
18973 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
18974 to perform. MEM is the memory on which to operate. VAL is the second
18975 operand of the binary operator. BEFORE and AFTER are optional locations to
18976 return the value of MEM either before of after the operation. MODEL_RTX
18977 is a CONST_INT containing the memory model to use. */
18978
18979 void
18980 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
18981 rtx orig_before, rtx orig_after, rtx model_rtx)
18982 {
18983 enum memmodel model = (enum memmodel) INTVAL (model_rtx);
18984 enum machine_mode mode = GET_MODE (mem);
18985 enum machine_mode store_mode = mode;
18986 rtx label, x, cond, mask, shift;
18987 rtx before = orig_before, after = orig_after;
18988
18989 mask = shift = NULL_RTX;
18990 /* On power8, we want to use SImode for the operation. On previous systems,
18991 use the operation in a subword and shift/mask to get the proper byte or
18992 halfword. */
18993 if (mode == QImode || mode == HImode)
18994 {
18995 if (TARGET_SYNC_HI_QI)
18996 {
18997 val = convert_modes (SImode, mode, val, 1);
18998
18999 /* Prepare to adjust the return value. */
19000 before = gen_reg_rtx (SImode);
19001 if (after)
19002 after = gen_reg_rtx (SImode);
19003 mode = SImode;
19004 }
19005 else
19006 {
19007 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
19008
19009 /* Shift and mask VAL into position with the word. */
19010 val = convert_modes (SImode, mode, val, 1);
19011 val = expand_simple_binop (SImode, ASHIFT, val, shift,
19012 NULL_RTX, 1, OPTAB_LIB_WIDEN);
19013
19014 switch (code)
19015 {
19016 case IOR:
19017 case XOR:
19018 /* We've already zero-extended VAL. That is sufficient to
19019 make certain that it does not affect other bits. */
19020 mask = NULL;
19021 break;
19022
19023 case AND:
19024 /* If we make certain that all of the other bits in VAL are
19025 set, that will be sufficient to not affect other bits. */
19026 x = gen_rtx_NOT (SImode, mask);
19027 x = gen_rtx_IOR (SImode, x, val);
19028 emit_insn (gen_rtx_SET (VOIDmode, val, x));
19029 mask = NULL;
19030 break;
19031
19032 case NOT:
19033 case PLUS:
19034 case MINUS:
19035 /* These will all affect bits outside the field and need
19036 adjustment via MASK within the loop. */
19037 break;
19038
19039 default:
19040 gcc_unreachable ();
19041 }
19042
19043 /* Prepare to adjust the return value. */
19044 before = gen_reg_rtx (SImode);
19045 if (after)
19046 after = gen_reg_rtx (SImode);
19047 store_mode = mode = SImode;
19048 }
19049 }
19050
19051 mem = rs6000_pre_atomic_barrier (mem, model);
19052
19053 label = gen_label_rtx ();
19054 emit_label (label);
19055 label = gen_rtx_LABEL_REF (VOIDmode, label);
19056
19057 if (before == NULL_RTX)
19058 before = gen_reg_rtx (mode);
19059
19060 emit_load_locked (mode, before, mem);
19061
19062 if (code == NOT)
19063 {
19064 x = expand_simple_binop (mode, AND, before, val,
19065 NULL_RTX, 1, OPTAB_LIB_WIDEN);
19066 after = expand_simple_unop (mode, NOT, x, after, 1);
19067 }
19068 else
19069 {
19070 after = expand_simple_binop (mode, code, before, val,
19071 after, 1, OPTAB_LIB_WIDEN);
19072 }
19073
19074 x = after;
19075 if (mask)
19076 {
19077 x = expand_simple_binop (SImode, AND, after, mask,
19078 NULL_RTX, 1, OPTAB_LIB_WIDEN);
19079 x = rs6000_mask_atomic_subword (before, x, mask);
19080 }
19081 else if (store_mode != mode)
19082 x = convert_modes (store_mode, mode, x, 1);
19083
19084 cond = gen_reg_rtx (CCmode);
19085 emit_store_conditional (store_mode, cond, mem, x);
19086
19087 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
19088 emit_unlikely_jump (x, label);
19089
19090 rs6000_post_atomic_barrier (model);
19091
19092 if (shift)
19093 {
19094 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
19095 then do the calcuations in a SImode register. */
19096 if (orig_before)
19097 rs6000_finish_atomic_subword (orig_before, before, shift);
19098 if (orig_after)
19099 rs6000_finish_atomic_subword (orig_after, after, shift);
19100 }
19101 else if (store_mode != mode)
19102 {
19103 /* QImode/HImode on machines with lbarx/lharx where we do the native
19104 operation and then do the calcuations in a SImode register. */
19105 if (orig_before)
19106 convert_move (orig_before, before, 1);
19107 if (orig_after)
19108 convert_move (orig_after, after, 1);
19109 }
19110 else if (orig_after && after != orig_after)
19111 emit_move_insn (orig_after, after);
19112 }
19113
19114 /* Emit instructions to move SRC to DST. Called by splitters for
19115 multi-register moves. It will emit at most one instruction for
19116 each register that is accessed; that is, it won't emit li/lis pairs
19117 (or equivalent for 64-bit code). One of SRC or DST must be a hard
19118 register. */
19119
19120 void
19121 rs6000_split_multireg_move (rtx dst, rtx src)
19122 {
19123 /* The register number of the first register being moved. */
19124 int reg;
19125 /* The mode that is to be moved. */
19126 enum machine_mode mode;
19127 /* The mode that the move is being done in, and its size. */
19128 enum machine_mode reg_mode;
19129 int reg_mode_size;
19130 /* The number of registers that will be moved. */
19131 int nregs;
19132
19133 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
19134 mode = GET_MODE (dst);
19135 nregs = hard_regno_nregs[reg][mode];
19136 if (FP_REGNO_P (reg))
19137 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
19138 ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? DFmode : SFmode);
19139 else if (ALTIVEC_REGNO_P (reg))
19140 reg_mode = V16QImode;
19141 else if (TARGET_E500_DOUBLE && mode == TFmode)
19142 reg_mode = DFmode;
19143 else
19144 reg_mode = word_mode;
19145 reg_mode_size = GET_MODE_SIZE (reg_mode);
19146
19147 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
19148
19149 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
19150 {
19151 /* Move register range backwards, if we might have destructive
19152 overlap. */
19153 int i;
19154 for (i = nregs - 1; i >= 0; i--)
19155 emit_insn (gen_rtx_SET (VOIDmode,
19156 simplify_gen_subreg (reg_mode, dst, mode,
19157 i * reg_mode_size),
19158 simplify_gen_subreg (reg_mode, src, mode,
19159 i * reg_mode_size)));
19160 }
19161 else
19162 {
19163 int i;
19164 int j = -1;
19165 bool used_update = false;
19166 rtx restore_basereg = NULL_RTX;
19167
19168 if (MEM_P (src) && INT_REGNO_P (reg))
19169 {
19170 rtx breg;
19171
19172 if (GET_CODE (XEXP (src, 0)) == PRE_INC
19173 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
19174 {
19175 rtx delta_rtx;
19176 breg = XEXP (XEXP (src, 0), 0);
19177 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
19178 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
19179 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
19180 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
19181 src = replace_equiv_address (src, breg);
19182 }
19183 else if (! rs6000_offsettable_memref_p (src, reg_mode))
19184 {
19185 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
19186 {
19187 rtx basereg = XEXP (XEXP (src, 0), 0);
19188 if (TARGET_UPDATE)
19189 {
19190 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
19191 emit_insn (gen_rtx_SET (VOIDmode, ndst,
19192 gen_rtx_MEM (reg_mode, XEXP (src, 0))));
19193 used_update = true;
19194 }
19195 else
19196 emit_insn (gen_rtx_SET (VOIDmode, basereg,
19197 XEXP (XEXP (src, 0), 1)));
19198 src = replace_equiv_address (src, basereg);
19199 }
19200 else
19201 {
19202 rtx basereg = gen_rtx_REG (Pmode, reg);
19203 emit_insn (gen_rtx_SET (VOIDmode, basereg, XEXP (src, 0)));
19204 src = replace_equiv_address (src, basereg);
19205 }
19206 }
19207
19208 breg = XEXP (src, 0);
19209 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
19210 breg = XEXP (breg, 0);
19211
19212 /* If the base register we are using to address memory is
19213 also a destination reg, then change that register last. */
19214 if (REG_P (breg)
19215 && REGNO (breg) >= REGNO (dst)
19216 && REGNO (breg) < REGNO (dst) + nregs)
19217 j = REGNO (breg) - REGNO (dst);
19218 }
19219 else if (MEM_P (dst) && INT_REGNO_P (reg))
19220 {
19221 rtx breg;
19222
19223 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
19224 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
19225 {
19226 rtx delta_rtx;
19227 breg = XEXP (XEXP (dst, 0), 0);
19228 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
19229 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
19230 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
19231
19232 /* We have to update the breg before doing the store.
19233 Use store with update, if available. */
19234
19235 if (TARGET_UPDATE)
19236 {
19237 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
19238 emit_insn (TARGET_32BIT
19239 ? (TARGET_POWERPC64
19240 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
19241 : gen_movsi_update (breg, breg, delta_rtx, nsrc))
19242 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
19243 used_update = true;
19244 }
19245 else
19246 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
19247 dst = replace_equiv_address (dst, breg);
19248 }
19249 else if (!rs6000_offsettable_memref_p (dst, reg_mode)
19250 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
19251 {
19252 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
19253 {
19254 rtx basereg = XEXP (XEXP (dst, 0), 0);
19255 if (TARGET_UPDATE)
19256 {
19257 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
19258 emit_insn (gen_rtx_SET (VOIDmode,
19259 gen_rtx_MEM (reg_mode, XEXP (dst, 0)), nsrc));
19260 used_update = true;
19261 }
19262 else
19263 emit_insn (gen_rtx_SET (VOIDmode, basereg,
19264 XEXP (XEXP (dst, 0), 1)));
19265 dst = replace_equiv_address (dst, basereg);
19266 }
19267 else
19268 {
19269 rtx basereg = XEXP (XEXP (dst, 0), 0);
19270 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
19271 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
19272 && REG_P (basereg)
19273 && REG_P (offsetreg)
19274 && REGNO (basereg) != REGNO (offsetreg));
19275 if (REGNO (basereg) == 0)
19276 {
19277 rtx tmp = offsetreg;
19278 offsetreg = basereg;
19279 basereg = tmp;
19280 }
19281 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
19282 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
19283 dst = replace_equiv_address (dst, basereg);
19284 }
19285 }
19286 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
19287 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode));
19288 }
19289
19290 for (i = 0; i < nregs; i++)
19291 {
19292 /* Calculate index to next subword. */
19293 ++j;
19294 if (j == nregs)
19295 j = 0;
19296
19297 /* If compiler already emitted move of first word by
19298 store with update, no need to do anything. */
19299 if (j == 0 && used_update)
19300 continue;
19301
19302 emit_insn (gen_rtx_SET (VOIDmode,
19303 simplify_gen_subreg (reg_mode, dst, mode,
19304 j * reg_mode_size),
19305 simplify_gen_subreg (reg_mode, src, mode,
19306 j * reg_mode_size)));
19307 }
19308 if (restore_basereg != NULL_RTX)
19309 emit_insn (restore_basereg);
19310 }
19311 }
19312
19313 \f
19314 /* This page contains routines that are used to determine what the
19315 function prologue and epilogue code will do and write them out. */
19316
19317 static inline bool
19318 save_reg_p (int r)
19319 {
19320 return !call_used_regs[r] && df_regs_ever_live_p (r);
19321 }
19322
19323 /* Return the first fixed-point register that is required to be
19324 saved. 32 if none. */
19325
19326 int
19327 first_reg_to_save (void)
19328 {
19329 int first_reg;
19330
19331 /* Find lowest numbered live register. */
19332 for (first_reg = 13; first_reg <= 31; first_reg++)
19333 if (save_reg_p (first_reg))
19334 break;
19335
19336 if (first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM
19337 && ((DEFAULT_ABI == ABI_V4 && flag_pic != 0)
19338 || (DEFAULT_ABI == ABI_DARWIN && flag_pic)
19339 || (TARGET_TOC && TARGET_MINIMAL_TOC))
19340 && df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
19341 first_reg = RS6000_PIC_OFFSET_TABLE_REGNUM;
19342
19343 #if TARGET_MACHO
19344 if (flag_pic
19345 && crtl->uses_pic_offset_table
19346 && first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM)
19347 return RS6000_PIC_OFFSET_TABLE_REGNUM;
19348 #endif
19349
19350 return first_reg;
19351 }
19352
19353 /* Similar, for FP regs. */
19354
19355 int
19356 first_fp_reg_to_save (void)
19357 {
19358 int first_reg;
19359
19360 /* Find lowest numbered live register. */
19361 for (first_reg = 14 + 32; first_reg <= 63; first_reg++)
19362 if (save_reg_p (first_reg))
19363 break;
19364
19365 return first_reg;
19366 }
19367
19368 /* Similar, for AltiVec regs. */
19369
19370 static int
19371 first_altivec_reg_to_save (void)
19372 {
19373 int i;
19374
19375 /* Stack frame remains as is unless we are in AltiVec ABI. */
19376 if (! TARGET_ALTIVEC_ABI)
19377 return LAST_ALTIVEC_REGNO + 1;
19378
19379 /* On Darwin, the unwind routines are compiled without
19380 TARGET_ALTIVEC, and use save_world to save/restore the
19381 altivec registers when necessary. */
19382 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
19383 && ! TARGET_ALTIVEC)
19384 return FIRST_ALTIVEC_REGNO + 20;
19385
19386 /* Find lowest numbered live register. */
19387 for (i = FIRST_ALTIVEC_REGNO + 20; i <= LAST_ALTIVEC_REGNO; ++i)
19388 if (save_reg_p (i))
19389 break;
19390
19391 return i;
19392 }
19393
19394 /* Return a 32-bit mask of the AltiVec registers we need to set in
19395 VRSAVE. Bit n of the return value is 1 if Vn is live. The MSB in
19396 the 32-bit word is 0. */
19397
19398 static unsigned int
19399 compute_vrsave_mask (void)
19400 {
19401 unsigned int i, mask = 0;
19402
19403 /* On Darwin, the unwind routines are compiled without
19404 TARGET_ALTIVEC, and use save_world to save/restore the
19405 call-saved altivec registers when necessary. */
19406 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
19407 && ! TARGET_ALTIVEC)
19408 mask |= 0xFFF;
19409
19410 /* First, find out if we use _any_ altivec registers. */
19411 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
19412 if (df_regs_ever_live_p (i))
19413 mask |= ALTIVEC_REG_BIT (i);
19414
19415 if (mask == 0)
19416 return mask;
19417
19418 /* Next, remove the argument registers from the set. These must
19419 be in the VRSAVE mask set by the caller, so we don't need to add
19420 them in again. More importantly, the mask we compute here is
19421 used to generate CLOBBERs in the set_vrsave insn, and we do not
19422 wish the argument registers to die. */
19423 for (i = crtl->args.info.vregno - 1; i >= ALTIVEC_ARG_MIN_REG; --i)
19424 mask &= ~ALTIVEC_REG_BIT (i);
19425
19426 /* Similarly, remove the return value from the set. */
19427 {
19428 bool yes = false;
19429 diddle_return_value (is_altivec_return_reg, &yes);
19430 if (yes)
19431 mask &= ~ALTIVEC_REG_BIT (ALTIVEC_ARG_RETURN);
19432 }
19433
19434 return mask;
19435 }
19436
19437 /* For a very restricted set of circumstances, we can cut down the
19438 size of prologues/epilogues by calling our own save/restore-the-world
19439 routines. */
19440
19441 static void
19442 compute_save_world_info (rs6000_stack_t *info_ptr)
19443 {
19444 info_ptr->world_save_p = 1;
19445 info_ptr->world_save_p
19446 = (WORLD_SAVE_P (info_ptr)
19447 && DEFAULT_ABI == ABI_DARWIN
19448 && !cfun->has_nonlocal_label
19449 && info_ptr->first_fp_reg_save == FIRST_SAVED_FP_REGNO
19450 && info_ptr->first_gp_reg_save == FIRST_SAVED_GP_REGNO
19451 && info_ptr->first_altivec_reg_save == FIRST_SAVED_ALTIVEC_REGNO
19452 && info_ptr->cr_save_p);
19453
19454 /* This will not work in conjunction with sibcalls. Make sure there
19455 are none. (This check is expensive, but seldom executed.) */
19456 if (WORLD_SAVE_P (info_ptr))
19457 {
19458 rtx insn;
19459 for (insn = get_last_insn_anywhere (); insn; insn = PREV_INSN (insn))
19460 if (CALL_P (insn) && SIBLING_CALL_P (insn))
19461 {
19462 info_ptr->world_save_p = 0;
19463 break;
19464 }
19465 }
19466
19467 if (WORLD_SAVE_P (info_ptr))
19468 {
19469 /* Even if we're not touching VRsave, make sure there's room on the
19470 stack for it, if it looks like we're calling SAVE_WORLD, which
19471 will attempt to save it. */
19472 info_ptr->vrsave_size = 4;
19473
19474 /* If we are going to save the world, we need to save the link register too. */
19475 info_ptr->lr_save_p = 1;
19476
19477 /* "Save" the VRsave register too if we're saving the world. */
19478 if (info_ptr->vrsave_mask == 0)
19479 info_ptr->vrsave_mask = compute_vrsave_mask ();
19480
19481 /* Because the Darwin register save/restore routines only handle
19482 F14 .. F31 and V20 .. V31 as per the ABI, perform a consistency
19483 check. */
19484 gcc_assert (info_ptr->first_fp_reg_save >= FIRST_SAVED_FP_REGNO
19485 && (info_ptr->first_altivec_reg_save
19486 >= FIRST_SAVED_ALTIVEC_REGNO));
19487 }
19488 return;
19489 }
19490
19491
19492 static void
19493 is_altivec_return_reg (rtx reg, void *xyes)
19494 {
19495 bool *yes = (bool *) xyes;
19496 if (REGNO (reg) == ALTIVEC_ARG_RETURN)
19497 *yes = true;
19498 }
19499
19500 \f
19501 /* Look for user-defined global regs in the range FIRST to LAST-1.
19502 We should not restore these, and so cannot use lmw or out-of-line
19503 restore functions if there are any. We also can't save them
19504 (well, emit frame notes for them), because frame unwinding during
19505 exception handling will restore saved registers. */
19506
19507 static bool
19508 global_regs_p (unsigned first, unsigned last)
19509 {
19510 while (first < last)
19511 if (global_regs[first++])
19512 return true;
19513 return false;
19514 }
19515
19516 /* Determine the strategy for savings/restoring registers. */
19517
19518 enum {
19519 SAVRES_MULTIPLE = 0x1,
19520 SAVE_INLINE_FPRS = 0x2,
19521 SAVE_INLINE_GPRS = 0x4,
19522 REST_INLINE_FPRS = 0x8,
19523 REST_INLINE_GPRS = 0x10,
19524 SAVE_NOINLINE_GPRS_SAVES_LR = 0x20,
19525 SAVE_NOINLINE_FPRS_SAVES_LR = 0x40,
19526 REST_NOINLINE_FPRS_DOESNT_RESTORE_LR = 0x80,
19527 SAVE_INLINE_VRS = 0x100,
19528 REST_INLINE_VRS = 0x200
19529 };
19530
19531 static int
19532 rs6000_savres_strategy (rs6000_stack_t *info,
19533 bool using_static_chain_p)
19534 {
19535 int strategy = 0;
19536 bool lr_save_p;
19537
19538 if (TARGET_MULTIPLE
19539 && !TARGET_POWERPC64
19540 && !(TARGET_SPE_ABI && info->spe_64bit_regs_used)
19541 && info->first_gp_reg_save < 31
19542 && !global_regs_p (info->first_gp_reg_save, 32))
19543 strategy |= SAVRES_MULTIPLE;
19544
19545 if (crtl->calls_eh_return
19546 || cfun->machine->ra_need_lr)
19547 strategy |= (SAVE_INLINE_FPRS | REST_INLINE_FPRS
19548 | SAVE_INLINE_GPRS | REST_INLINE_GPRS
19549 | SAVE_INLINE_VRS | REST_INLINE_VRS);
19550
19551 if (info->first_fp_reg_save == 64
19552 /* The out-of-line FP routines use double-precision stores;
19553 we can't use those routines if we don't have such stores. */
19554 || (TARGET_HARD_FLOAT && !TARGET_DOUBLE_FLOAT)
19555 || global_regs_p (info->first_fp_reg_save, 64))
19556 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
19557
19558 if (info->first_gp_reg_save == 32
19559 || (!(strategy & SAVRES_MULTIPLE)
19560 && global_regs_p (info->first_gp_reg_save, 32)))
19561 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
19562
19563 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
19564 || global_regs_p (info->first_altivec_reg_save, LAST_ALTIVEC_REGNO + 1))
19565 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
19566
19567 /* Define cutoff for using out-of-line functions to save registers. */
19568 if (DEFAULT_ABI == ABI_V4 || TARGET_ELF)
19569 {
19570 if (!optimize_size)
19571 {
19572 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
19573 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
19574 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
19575 }
19576 else
19577 {
19578 /* Prefer out-of-line restore if it will exit. */
19579 if (info->first_fp_reg_save > 61)
19580 strategy |= SAVE_INLINE_FPRS;
19581 if (info->first_gp_reg_save > 29)
19582 {
19583 if (info->first_fp_reg_save == 64)
19584 strategy |= SAVE_INLINE_GPRS;
19585 else
19586 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
19587 }
19588 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO)
19589 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
19590 }
19591 }
19592 else if (DEFAULT_ABI == ABI_DARWIN)
19593 {
19594 if (info->first_fp_reg_save > 60)
19595 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
19596 if (info->first_gp_reg_save > 29)
19597 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
19598 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
19599 }
19600 else
19601 {
19602 gcc_checking_assert (DEFAULT_ABI == ABI_AIX);
19603 if (info->first_fp_reg_save > 61)
19604 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
19605 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
19606 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
19607 }
19608
19609 /* Don't bother to try to save things out-of-line if r11 is occupied
19610 by the static chain. It would require too much fiddling and the
19611 static chain is rarely used anyway. FPRs are saved w.r.t the stack
19612 pointer on Darwin, and AIX uses r1 or r12. */
19613 if (using_static_chain_p && DEFAULT_ABI != ABI_AIX)
19614 strategy |= ((DEFAULT_ABI == ABI_DARWIN ? 0 : SAVE_INLINE_FPRS)
19615 | SAVE_INLINE_GPRS
19616 | SAVE_INLINE_VRS | REST_INLINE_VRS);
19617
19618 /* We can only use the out-of-line routines to restore if we've
19619 saved all the registers from first_fp_reg_save in the prologue.
19620 Otherwise, we risk loading garbage. */
19621 if ((strategy & (SAVE_INLINE_FPRS | REST_INLINE_FPRS)) == SAVE_INLINE_FPRS)
19622 {
19623 int i;
19624
19625 for (i = info->first_fp_reg_save; i < 64; i++)
19626 if (!save_reg_p (i))
19627 {
19628 strategy |= REST_INLINE_FPRS;
19629 break;
19630 }
19631 }
19632
19633 /* If we are going to use store multiple, then don't even bother
19634 with the out-of-line routines, since the store-multiple
19635 instruction will always be smaller. */
19636 if ((strategy & SAVRES_MULTIPLE))
19637 strategy |= SAVE_INLINE_GPRS;
19638
19639 /* info->lr_save_p isn't yet set if the only reason lr needs to be
19640 saved is an out-of-line save or restore. Set up the value for
19641 the next test (excluding out-of-line gpr restore). */
19642 lr_save_p = (info->lr_save_p
19643 || !(strategy & SAVE_INLINE_GPRS)
19644 || !(strategy & SAVE_INLINE_FPRS)
19645 || !(strategy & SAVE_INLINE_VRS)
19646 || !(strategy & REST_INLINE_FPRS)
19647 || !(strategy & REST_INLINE_VRS));
19648
19649 /* The situation is more complicated with load multiple. We'd
19650 prefer to use the out-of-line routines for restores, since the
19651 "exit" out-of-line routines can handle the restore of LR and the
19652 frame teardown. However if doesn't make sense to use the
19653 out-of-line routine if that is the only reason we'd need to save
19654 LR, and we can't use the "exit" out-of-line gpr restore if we
19655 have saved some fprs; In those cases it is advantageous to use
19656 load multiple when available. */
19657 if ((strategy & SAVRES_MULTIPLE)
19658 && (!lr_save_p
19659 || info->first_fp_reg_save != 64))
19660 strategy |= REST_INLINE_GPRS;
19661
19662 /* Saving CR interferes with the exit routines used on the SPE, so
19663 just punt here. */
19664 if (TARGET_SPE_ABI
19665 && info->spe_64bit_regs_used
19666 && info->cr_save_p)
19667 strategy |= REST_INLINE_GPRS;
19668
19669 /* We can only use load multiple or the out-of-line routines to
19670 restore if we've used store multiple or out-of-line routines
19671 in the prologue, i.e. if we've saved all the registers from
19672 first_gp_reg_save. Otherwise, we risk loading garbage. */
19673 if ((strategy & (SAVE_INLINE_GPRS | REST_INLINE_GPRS | SAVRES_MULTIPLE))
19674 == SAVE_INLINE_GPRS)
19675 {
19676 int i;
19677
19678 for (i = info->first_gp_reg_save; i < 32; i++)
19679 if (!save_reg_p (i))
19680 {
19681 strategy |= REST_INLINE_GPRS;
19682 break;
19683 }
19684 }
19685
19686 if (TARGET_ELF && TARGET_64BIT)
19687 {
19688 if (!(strategy & SAVE_INLINE_FPRS))
19689 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
19690 else if (!(strategy & SAVE_INLINE_GPRS)
19691 && info->first_fp_reg_save == 64)
19692 strategy |= SAVE_NOINLINE_GPRS_SAVES_LR;
19693 }
19694 else if (TARGET_AIX && !(strategy & REST_INLINE_FPRS))
19695 strategy |= REST_NOINLINE_FPRS_DOESNT_RESTORE_LR;
19696
19697 if (TARGET_MACHO && !(strategy & SAVE_INLINE_FPRS))
19698 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
19699
19700 return strategy;
19701 }
19702
19703 /* Calculate the stack information for the current function. This is
19704 complicated by having two separate calling sequences, the AIX calling
19705 sequence and the V.4 calling sequence.
19706
19707 AIX (and Darwin/Mac OS X) stack frames look like:
19708 32-bit 64-bit
19709 SP----> +---------------------------------------+
19710 | back chain to caller | 0 0
19711 +---------------------------------------+
19712 | saved CR | 4 8 (8-11)
19713 +---------------------------------------+
19714 | saved LR | 8 16
19715 +---------------------------------------+
19716 | reserved for compilers | 12 24
19717 +---------------------------------------+
19718 | reserved for binders | 16 32
19719 +---------------------------------------+
19720 | saved TOC pointer | 20 40
19721 +---------------------------------------+
19722 | Parameter save area (P) | 24 48
19723 +---------------------------------------+
19724 | Alloca space (A) | 24+P etc.
19725 +---------------------------------------+
19726 | Local variable space (L) | 24+P+A
19727 +---------------------------------------+
19728 | Float/int conversion temporary (X) | 24+P+A+L
19729 +---------------------------------------+
19730 | Save area for AltiVec registers (W) | 24+P+A+L+X
19731 +---------------------------------------+
19732 | AltiVec alignment padding (Y) | 24+P+A+L+X+W
19733 +---------------------------------------+
19734 | Save area for VRSAVE register (Z) | 24+P+A+L+X+W+Y
19735 +---------------------------------------+
19736 | Save area for GP registers (G) | 24+P+A+X+L+X+W+Y+Z
19737 +---------------------------------------+
19738 | Save area for FP registers (F) | 24+P+A+X+L+X+W+Y+Z+G
19739 +---------------------------------------+
19740 old SP->| back chain to caller's caller |
19741 +---------------------------------------+
19742
19743 The required alignment for AIX configurations is two words (i.e., 8
19744 or 16 bytes).
19745
19746
19747 V.4 stack frames look like:
19748
19749 SP----> +---------------------------------------+
19750 | back chain to caller | 0
19751 +---------------------------------------+
19752 | caller's saved LR | 4
19753 +---------------------------------------+
19754 | Parameter save area (P) | 8
19755 +---------------------------------------+
19756 | Alloca space (A) | 8+P
19757 +---------------------------------------+
19758 | Varargs save area (V) | 8+P+A
19759 +---------------------------------------+
19760 | Local variable space (L) | 8+P+A+V
19761 +---------------------------------------+
19762 | Float/int conversion temporary (X) | 8+P+A+V+L
19763 +---------------------------------------+
19764 | Save area for AltiVec registers (W) | 8+P+A+V+L+X
19765 +---------------------------------------+
19766 | AltiVec alignment padding (Y) | 8+P+A+V+L+X+W
19767 +---------------------------------------+
19768 | Save area for VRSAVE register (Z) | 8+P+A+V+L+X+W+Y
19769 +---------------------------------------+
19770 | SPE: area for 64-bit GP registers |
19771 +---------------------------------------+
19772 | SPE alignment padding |
19773 +---------------------------------------+
19774 | saved CR (C) | 8+P+A+V+L+X+W+Y+Z
19775 +---------------------------------------+
19776 | Save area for GP registers (G) | 8+P+A+V+L+X+W+Y+Z+C
19777 +---------------------------------------+
19778 | Save area for FP registers (F) | 8+P+A+V+L+X+W+Y+Z+C+G
19779 +---------------------------------------+
19780 old SP->| back chain to caller's caller |
19781 +---------------------------------------+
19782
19783 The required alignment for V.4 is 16 bytes, or 8 bytes if -meabi is
19784 given. (But note below and in sysv4.h that we require only 8 and
19785 may round up the size of our stack frame anyways. The historical
19786 reason is early versions of powerpc-linux which didn't properly
19787 align the stack at program startup. A happy side-effect is that
19788 -mno-eabi libraries can be used with -meabi programs.)
19789
19790 The EABI configuration defaults to the V.4 layout. However,
19791 the stack alignment requirements may differ. If -mno-eabi is not
19792 given, the required stack alignment is 8 bytes; if -mno-eabi is
19793 given, the required alignment is 16 bytes. (But see V.4 comment
19794 above.) */
19795
19796 #ifndef ABI_STACK_BOUNDARY
19797 #define ABI_STACK_BOUNDARY STACK_BOUNDARY
19798 #endif
19799
19800 static rs6000_stack_t *
19801 rs6000_stack_info (void)
19802 {
19803 rs6000_stack_t *info_ptr = &stack_info;
19804 int reg_size = TARGET_32BIT ? 4 : 8;
19805 int ehrd_size;
19806 int save_align;
19807 int first_gp;
19808 HOST_WIDE_INT non_fixed_size;
19809 bool using_static_chain_p;
19810
19811 if (reload_completed && info_ptr->reload_completed)
19812 return info_ptr;
19813
19814 memset (info_ptr, 0, sizeof (*info_ptr));
19815 info_ptr->reload_completed = reload_completed;
19816
19817 if (TARGET_SPE)
19818 {
19819 /* Cache value so we don't rescan instruction chain over and over. */
19820 if (cfun->machine->insn_chain_scanned_p == 0)
19821 cfun->machine->insn_chain_scanned_p
19822 = spe_func_has_64bit_regs_p () + 1;
19823 info_ptr->spe_64bit_regs_used = cfun->machine->insn_chain_scanned_p - 1;
19824 }
19825
19826 /* Select which calling sequence. */
19827 info_ptr->abi = DEFAULT_ABI;
19828
19829 /* Calculate which registers need to be saved & save area size. */
19830 info_ptr->first_gp_reg_save = first_reg_to_save ();
19831 /* Assume that we will have to save RS6000_PIC_OFFSET_TABLE_REGNUM,
19832 even if it currently looks like we won't. Reload may need it to
19833 get at a constant; if so, it will have already created a constant
19834 pool entry for it. */
19835 if (((TARGET_TOC && TARGET_MINIMAL_TOC)
19836 || (flag_pic == 1 && DEFAULT_ABI == ABI_V4)
19837 || (flag_pic && DEFAULT_ABI == ABI_DARWIN))
19838 && crtl->uses_const_pool
19839 && info_ptr->first_gp_reg_save > RS6000_PIC_OFFSET_TABLE_REGNUM)
19840 first_gp = RS6000_PIC_OFFSET_TABLE_REGNUM;
19841 else
19842 first_gp = info_ptr->first_gp_reg_save;
19843
19844 info_ptr->gp_size = reg_size * (32 - first_gp);
19845
19846 /* For the SPE, we have an additional upper 32-bits on each GPR.
19847 Ideally we should save the entire 64-bits only when the upper
19848 half is used in SIMD instructions. Since we only record
19849 registers live (not the size they are used in), this proves
19850 difficult because we'd have to traverse the instruction chain at
19851 the right time, taking reload into account. This is a real pain,
19852 so we opt to save the GPRs in 64-bits always if but one register
19853 gets used in 64-bits. Otherwise, all the registers in the frame
19854 get saved in 32-bits.
19855
19856 So... since when we save all GPRs (except the SP) in 64-bits, the
19857 traditional GP save area will be empty. */
19858 if (TARGET_SPE_ABI && info_ptr->spe_64bit_regs_used != 0)
19859 info_ptr->gp_size = 0;
19860
19861 info_ptr->first_fp_reg_save = first_fp_reg_to_save ();
19862 info_ptr->fp_size = 8 * (64 - info_ptr->first_fp_reg_save);
19863
19864 info_ptr->first_altivec_reg_save = first_altivec_reg_to_save ();
19865 info_ptr->altivec_size = 16 * (LAST_ALTIVEC_REGNO + 1
19866 - info_ptr->first_altivec_reg_save);
19867
19868 /* Does this function call anything? */
19869 info_ptr->calls_p = (! crtl->is_leaf
19870 || cfun->machine->ra_needs_full_frame);
19871
19872 /* Determine if we need to save the condition code registers. */
19873 if (df_regs_ever_live_p (CR2_REGNO)
19874 || df_regs_ever_live_p (CR3_REGNO)
19875 || df_regs_ever_live_p (CR4_REGNO))
19876 {
19877 info_ptr->cr_save_p = 1;
19878 if (DEFAULT_ABI == ABI_V4)
19879 info_ptr->cr_size = reg_size;
19880 }
19881
19882 /* If the current function calls __builtin_eh_return, then we need
19883 to allocate stack space for registers that will hold data for
19884 the exception handler. */
19885 if (crtl->calls_eh_return)
19886 {
19887 unsigned int i;
19888 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
19889 continue;
19890
19891 /* SPE saves EH registers in 64-bits. */
19892 ehrd_size = i * (TARGET_SPE_ABI
19893 && info_ptr->spe_64bit_regs_used != 0
19894 ? UNITS_PER_SPE_WORD : UNITS_PER_WORD);
19895 }
19896 else
19897 ehrd_size = 0;
19898
19899 /* Determine various sizes. */
19900 info_ptr->reg_size = reg_size;
19901 info_ptr->fixed_size = RS6000_SAVE_AREA;
19902 info_ptr->vars_size = RS6000_ALIGN (get_frame_size (), 8);
19903 info_ptr->parm_size = RS6000_ALIGN (crtl->outgoing_args_size,
19904 TARGET_ALTIVEC ? 16 : 8);
19905 if (FRAME_GROWS_DOWNWARD)
19906 info_ptr->vars_size
19907 += RS6000_ALIGN (info_ptr->fixed_size + info_ptr->vars_size
19908 + info_ptr->parm_size,
19909 ABI_STACK_BOUNDARY / BITS_PER_UNIT)
19910 - (info_ptr->fixed_size + info_ptr->vars_size
19911 + info_ptr->parm_size);
19912
19913 if (TARGET_SPE_ABI && info_ptr->spe_64bit_regs_used != 0)
19914 info_ptr->spe_gp_size = 8 * (32 - first_gp);
19915 else
19916 info_ptr->spe_gp_size = 0;
19917
19918 if (TARGET_ALTIVEC_ABI)
19919 info_ptr->vrsave_mask = compute_vrsave_mask ();
19920 else
19921 info_ptr->vrsave_mask = 0;
19922
19923 if (TARGET_ALTIVEC_VRSAVE && info_ptr->vrsave_mask)
19924 info_ptr->vrsave_size = 4;
19925 else
19926 info_ptr->vrsave_size = 0;
19927
19928 compute_save_world_info (info_ptr);
19929
19930 /* Calculate the offsets. */
19931 switch (DEFAULT_ABI)
19932 {
19933 case ABI_NONE:
19934 default:
19935 gcc_unreachable ();
19936
19937 case ABI_AIX:
19938 case ABI_DARWIN:
19939 info_ptr->fp_save_offset = - info_ptr->fp_size;
19940 info_ptr->gp_save_offset = info_ptr->fp_save_offset - info_ptr->gp_size;
19941
19942 if (TARGET_ALTIVEC_ABI)
19943 {
19944 info_ptr->vrsave_save_offset
19945 = info_ptr->gp_save_offset - info_ptr->vrsave_size;
19946
19947 /* Align stack so vector save area is on a quadword boundary.
19948 The padding goes above the vectors. */
19949 if (info_ptr->altivec_size != 0)
19950 info_ptr->altivec_padding_size
19951 = info_ptr->vrsave_save_offset & 0xF;
19952 else
19953 info_ptr->altivec_padding_size = 0;
19954
19955 info_ptr->altivec_save_offset
19956 = info_ptr->vrsave_save_offset
19957 - info_ptr->altivec_padding_size
19958 - info_ptr->altivec_size;
19959 gcc_assert (info_ptr->altivec_size == 0
19960 || info_ptr->altivec_save_offset % 16 == 0);
19961
19962 /* Adjust for AltiVec case. */
19963 info_ptr->ehrd_offset = info_ptr->altivec_save_offset - ehrd_size;
19964 }
19965 else
19966 info_ptr->ehrd_offset = info_ptr->gp_save_offset - ehrd_size;
19967 info_ptr->cr_save_offset = reg_size; /* first word when 64-bit. */
19968 info_ptr->lr_save_offset = 2*reg_size;
19969 break;
19970
19971 case ABI_V4:
19972 info_ptr->fp_save_offset = - info_ptr->fp_size;
19973 info_ptr->gp_save_offset = info_ptr->fp_save_offset - info_ptr->gp_size;
19974 info_ptr->cr_save_offset = info_ptr->gp_save_offset - info_ptr->cr_size;
19975
19976 if (TARGET_SPE_ABI && info_ptr->spe_64bit_regs_used != 0)
19977 {
19978 /* Align stack so SPE GPR save area is aligned on a
19979 double-word boundary. */
19980 if (info_ptr->spe_gp_size != 0 && info_ptr->cr_save_offset != 0)
19981 info_ptr->spe_padding_size
19982 = 8 - (-info_ptr->cr_save_offset % 8);
19983 else
19984 info_ptr->spe_padding_size = 0;
19985
19986 info_ptr->spe_gp_save_offset
19987 = info_ptr->cr_save_offset
19988 - info_ptr->spe_padding_size
19989 - info_ptr->spe_gp_size;
19990
19991 /* Adjust for SPE case. */
19992 info_ptr->ehrd_offset = info_ptr->spe_gp_save_offset;
19993 }
19994 else if (TARGET_ALTIVEC_ABI)
19995 {
19996 info_ptr->vrsave_save_offset
19997 = info_ptr->cr_save_offset - info_ptr->vrsave_size;
19998
19999 /* Align stack so vector save area is on a quadword boundary. */
20000 if (info_ptr->altivec_size != 0)
20001 info_ptr->altivec_padding_size
20002 = 16 - (-info_ptr->vrsave_save_offset % 16);
20003 else
20004 info_ptr->altivec_padding_size = 0;
20005
20006 info_ptr->altivec_save_offset
20007 = info_ptr->vrsave_save_offset
20008 - info_ptr->altivec_padding_size
20009 - info_ptr->altivec_size;
20010
20011 /* Adjust for AltiVec case. */
20012 info_ptr->ehrd_offset = info_ptr->altivec_save_offset;
20013 }
20014 else
20015 info_ptr->ehrd_offset = info_ptr->cr_save_offset;
20016 info_ptr->ehrd_offset -= ehrd_size;
20017 info_ptr->lr_save_offset = reg_size;
20018 break;
20019 }
20020
20021 save_align = (TARGET_ALTIVEC_ABI || DEFAULT_ABI == ABI_DARWIN) ? 16 : 8;
20022 info_ptr->save_size = RS6000_ALIGN (info_ptr->fp_size
20023 + info_ptr->gp_size
20024 + info_ptr->altivec_size
20025 + info_ptr->altivec_padding_size
20026 + info_ptr->spe_gp_size
20027 + info_ptr->spe_padding_size
20028 + ehrd_size
20029 + info_ptr->cr_size
20030 + info_ptr->vrsave_size,
20031 save_align);
20032
20033 non_fixed_size = (info_ptr->vars_size
20034 + info_ptr->parm_size
20035 + info_ptr->save_size);
20036
20037 info_ptr->total_size = RS6000_ALIGN (non_fixed_size + info_ptr->fixed_size,
20038 ABI_STACK_BOUNDARY / BITS_PER_UNIT);
20039
20040 /* Determine if we need to save the link register. */
20041 if (info_ptr->calls_p
20042 || (DEFAULT_ABI == ABI_AIX
20043 && crtl->profile
20044 && !TARGET_PROFILE_KERNEL)
20045 || (DEFAULT_ABI == ABI_V4 && cfun->calls_alloca)
20046 #ifdef TARGET_RELOCATABLE
20047 || (TARGET_RELOCATABLE && (get_pool_size () != 0))
20048 #endif
20049 || rs6000_ra_ever_killed ())
20050 info_ptr->lr_save_p = 1;
20051
20052 using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
20053 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
20054 && call_used_regs[STATIC_CHAIN_REGNUM]);
20055 info_ptr->savres_strategy = rs6000_savres_strategy (info_ptr,
20056 using_static_chain_p);
20057
20058 if (!(info_ptr->savres_strategy & SAVE_INLINE_GPRS)
20059 || !(info_ptr->savres_strategy & SAVE_INLINE_FPRS)
20060 || !(info_ptr->savres_strategy & SAVE_INLINE_VRS)
20061 || !(info_ptr->savres_strategy & REST_INLINE_GPRS)
20062 || !(info_ptr->savres_strategy & REST_INLINE_FPRS)
20063 || !(info_ptr->savres_strategy & REST_INLINE_VRS))
20064 info_ptr->lr_save_p = 1;
20065
20066 if (info_ptr->lr_save_p)
20067 df_set_regs_ever_live (LR_REGNO, true);
20068
20069 /* Determine if we need to allocate any stack frame:
20070
20071 For AIX we need to push the stack if a frame pointer is needed
20072 (because the stack might be dynamically adjusted), if we are
20073 debugging, if we make calls, or if the sum of fp_save, gp_save,
20074 and local variables are more than the space needed to save all
20075 non-volatile registers: 32-bit: 18*8 + 19*4 = 220 or 64-bit: 18*8
20076 + 18*8 = 288 (GPR13 reserved).
20077
20078 For V.4 we don't have the stack cushion that AIX uses, but assume
20079 that the debugger can handle stackless frames. */
20080
20081 if (info_ptr->calls_p)
20082 info_ptr->push_p = 1;
20083
20084 else if (DEFAULT_ABI == ABI_V4)
20085 info_ptr->push_p = non_fixed_size != 0;
20086
20087 else if (frame_pointer_needed)
20088 info_ptr->push_p = 1;
20089
20090 else if (TARGET_XCOFF && write_symbols != NO_DEBUG)
20091 info_ptr->push_p = 1;
20092
20093 else
20094 info_ptr->push_p = non_fixed_size > (TARGET_32BIT ? 220 : 288);
20095
20096 /* Zero offsets if we're not saving those registers. */
20097 if (info_ptr->fp_size == 0)
20098 info_ptr->fp_save_offset = 0;
20099
20100 if (info_ptr->gp_size == 0)
20101 info_ptr->gp_save_offset = 0;
20102
20103 if (! TARGET_ALTIVEC_ABI || info_ptr->altivec_size == 0)
20104 info_ptr->altivec_save_offset = 0;
20105
20106 /* Zero VRSAVE offset if not saved and restored. */
20107 if (! TARGET_ALTIVEC_VRSAVE || info_ptr->vrsave_mask == 0)
20108 info_ptr->vrsave_save_offset = 0;
20109
20110 if (! TARGET_SPE_ABI
20111 || info_ptr->spe_64bit_regs_used == 0
20112 || info_ptr->spe_gp_size == 0)
20113 info_ptr->spe_gp_save_offset = 0;
20114
20115 if (! info_ptr->lr_save_p)
20116 info_ptr->lr_save_offset = 0;
20117
20118 if (! info_ptr->cr_save_p)
20119 info_ptr->cr_save_offset = 0;
20120
20121 return info_ptr;
20122 }
20123
20124 /* Return true if the current function uses any GPRs in 64-bit SIMD
20125 mode. */
20126
20127 static bool
20128 spe_func_has_64bit_regs_p (void)
20129 {
20130 rtx insns, insn;
20131
20132 /* Functions that save and restore all the call-saved registers will
20133 need to save/restore the registers in 64-bits. */
20134 if (crtl->calls_eh_return
20135 || cfun->calls_setjmp
20136 || crtl->has_nonlocal_goto)
20137 return true;
20138
20139 insns = get_insns ();
20140
20141 for (insn = NEXT_INSN (insns); insn != NULL_RTX; insn = NEXT_INSN (insn))
20142 {
20143 if (INSN_P (insn))
20144 {
20145 rtx i;
20146
20147 /* FIXME: This should be implemented with attributes...
20148
20149 (set_attr "spe64" "true")....then,
20150 if (get_spe64(insn)) return true;
20151
20152 It's the only reliable way to do the stuff below. */
20153
20154 i = PATTERN (insn);
20155 if (GET_CODE (i) == SET)
20156 {
20157 enum machine_mode mode = GET_MODE (SET_SRC (i));
20158
20159 if (SPE_VECTOR_MODE (mode))
20160 return true;
20161 if (TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode))
20162 return true;
20163 }
20164 }
20165 }
20166
20167 return false;
20168 }
20169
20170 static void
20171 debug_stack_info (rs6000_stack_t *info)
20172 {
20173 const char *abi_string;
20174
20175 if (! info)
20176 info = rs6000_stack_info ();
20177
20178 fprintf (stderr, "\nStack information for function %s:\n",
20179 ((current_function_decl && DECL_NAME (current_function_decl))
20180 ? IDENTIFIER_POINTER (DECL_NAME (current_function_decl))
20181 : "<unknown>"));
20182
20183 switch (info->abi)
20184 {
20185 default: abi_string = "Unknown"; break;
20186 case ABI_NONE: abi_string = "NONE"; break;
20187 case ABI_AIX: abi_string = "AIX"; break;
20188 case ABI_DARWIN: abi_string = "Darwin"; break;
20189 case ABI_V4: abi_string = "V.4"; break;
20190 }
20191
20192 fprintf (stderr, "\tABI = %5s\n", abi_string);
20193
20194 if (TARGET_ALTIVEC_ABI)
20195 fprintf (stderr, "\tALTIVEC ABI extensions enabled.\n");
20196
20197 if (TARGET_SPE_ABI)
20198 fprintf (stderr, "\tSPE ABI extensions enabled.\n");
20199
20200 if (info->first_gp_reg_save != 32)
20201 fprintf (stderr, "\tfirst_gp_reg_save = %5d\n", info->first_gp_reg_save);
20202
20203 if (info->first_fp_reg_save != 64)
20204 fprintf (stderr, "\tfirst_fp_reg_save = %5d\n", info->first_fp_reg_save);
20205
20206 if (info->first_altivec_reg_save <= LAST_ALTIVEC_REGNO)
20207 fprintf (stderr, "\tfirst_altivec_reg_save = %5d\n",
20208 info->first_altivec_reg_save);
20209
20210 if (info->lr_save_p)
20211 fprintf (stderr, "\tlr_save_p = %5d\n", info->lr_save_p);
20212
20213 if (info->cr_save_p)
20214 fprintf (stderr, "\tcr_save_p = %5d\n", info->cr_save_p);
20215
20216 if (info->vrsave_mask)
20217 fprintf (stderr, "\tvrsave_mask = 0x%x\n", info->vrsave_mask);
20218
20219 if (info->push_p)
20220 fprintf (stderr, "\tpush_p = %5d\n", info->push_p);
20221
20222 if (info->calls_p)
20223 fprintf (stderr, "\tcalls_p = %5d\n", info->calls_p);
20224
20225 if (info->gp_save_offset)
20226 fprintf (stderr, "\tgp_save_offset = %5d\n", info->gp_save_offset);
20227
20228 if (info->fp_save_offset)
20229 fprintf (stderr, "\tfp_save_offset = %5d\n", info->fp_save_offset);
20230
20231 if (info->altivec_save_offset)
20232 fprintf (stderr, "\taltivec_save_offset = %5d\n",
20233 info->altivec_save_offset);
20234
20235 if (info->spe_gp_save_offset)
20236 fprintf (stderr, "\tspe_gp_save_offset = %5d\n",
20237 info->spe_gp_save_offset);
20238
20239 if (info->vrsave_save_offset)
20240 fprintf (stderr, "\tvrsave_save_offset = %5d\n",
20241 info->vrsave_save_offset);
20242
20243 if (info->lr_save_offset)
20244 fprintf (stderr, "\tlr_save_offset = %5d\n", info->lr_save_offset);
20245
20246 if (info->cr_save_offset)
20247 fprintf (stderr, "\tcr_save_offset = %5d\n", info->cr_save_offset);
20248
20249 if (info->varargs_save_offset)
20250 fprintf (stderr, "\tvarargs_save_offset = %5d\n", info->varargs_save_offset);
20251
20252 if (info->total_size)
20253 fprintf (stderr, "\ttotal_size = "HOST_WIDE_INT_PRINT_DEC"\n",
20254 info->total_size);
20255
20256 if (info->vars_size)
20257 fprintf (stderr, "\tvars_size = "HOST_WIDE_INT_PRINT_DEC"\n",
20258 info->vars_size);
20259
20260 if (info->parm_size)
20261 fprintf (stderr, "\tparm_size = %5d\n", info->parm_size);
20262
20263 if (info->fixed_size)
20264 fprintf (stderr, "\tfixed_size = %5d\n", info->fixed_size);
20265
20266 if (info->gp_size)
20267 fprintf (stderr, "\tgp_size = %5d\n", info->gp_size);
20268
20269 if (info->spe_gp_size)
20270 fprintf (stderr, "\tspe_gp_size = %5d\n", info->spe_gp_size);
20271
20272 if (info->fp_size)
20273 fprintf (stderr, "\tfp_size = %5d\n", info->fp_size);
20274
20275 if (info->altivec_size)
20276 fprintf (stderr, "\taltivec_size = %5d\n", info->altivec_size);
20277
20278 if (info->vrsave_size)
20279 fprintf (stderr, "\tvrsave_size = %5d\n", info->vrsave_size);
20280
20281 if (info->altivec_padding_size)
20282 fprintf (stderr, "\taltivec_padding_size= %5d\n",
20283 info->altivec_padding_size);
20284
20285 if (info->spe_padding_size)
20286 fprintf (stderr, "\tspe_padding_size = %5d\n",
20287 info->spe_padding_size);
20288
20289 if (info->cr_size)
20290 fprintf (stderr, "\tcr_size = %5d\n", info->cr_size);
20291
20292 if (info->save_size)
20293 fprintf (stderr, "\tsave_size = %5d\n", info->save_size);
20294
20295 if (info->reg_size != 4)
20296 fprintf (stderr, "\treg_size = %5d\n", info->reg_size);
20297
20298 fprintf (stderr, "\tsave-strategy = %04x\n", info->savres_strategy);
20299
20300 fprintf (stderr, "\n");
20301 }
20302
20303 rtx
20304 rs6000_return_addr (int count, rtx frame)
20305 {
20306 /* Currently we don't optimize very well between prolog and body
20307 code and for PIC code the code can be actually quite bad, so
20308 don't try to be too clever here. */
20309 if (count != 0 || (DEFAULT_ABI != ABI_AIX && flag_pic))
20310 {
20311 cfun->machine->ra_needs_full_frame = 1;
20312
20313 return
20314 gen_rtx_MEM
20315 (Pmode,
20316 memory_address
20317 (Pmode,
20318 plus_constant (Pmode,
20319 copy_to_reg
20320 (gen_rtx_MEM (Pmode,
20321 memory_address (Pmode, frame))),
20322 RETURN_ADDRESS_OFFSET)));
20323 }
20324
20325 cfun->machine->ra_need_lr = 1;
20326 return get_hard_reg_initial_val (Pmode, LR_REGNO);
20327 }
20328
20329 /* Say whether a function is a candidate for sibcall handling or not. */
20330
20331 static bool
20332 rs6000_function_ok_for_sibcall (tree decl, tree exp)
20333 {
20334 tree fntype;
20335
20336 if (decl)
20337 fntype = TREE_TYPE (decl);
20338 else
20339 fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
20340
20341 /* We can't do it if the called function has more vector parameters
20342 than the current function; there's nowhere to put the VRsave code. */
20343 if (TARGET_ALTIVEC_ABI
20344 && TARGET_ALTIVEC_VRSAVE
20345 && !(decl && decl == current_function_decl))
20346 {
20347 function_args_iterator args_iter;
20348 tree type;
20349 int nvreg = 0;
20350
20351 /* Functions with vector parameters are required to have a
20352 prototype, so the argument type info must be available
20353 here. */
20354 FOREACH_FUNCTION_ARGS(fntype, type, args_iter)
20355 if (TREE_CODE (type) == VECTOR_TYPE
20356 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
20357 nvreg++;
20358
20359 FOREACH_FUNCTION_ARGS(TREE_TYPE (current_function_decl), type, args_iter)
20360 if (TREE_CODE (type) == VECTOR_TYPE
20361 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
20362 nvreg--;
20363
20364 if (nvreg > 0)
20365 return false;
20366 }
20367
20368 /* Under the AIX ABI we can't allow calls to non-local functions,
20369 because the callee may have a different TOC pointer to the
20370 caller and there's no way to ensure we restore the TOC when we
20371 return. With the secure-plt SYSV ABI we can't make non-local
20372 calls when -fpic/PIC because the plt call stubs use r30. */
20373 if (DEFAULT_ABI == ABI_DARWIN
20374 || (DEFAULT_ABI == ABI_AIX
20375 && decl
20376 && !DECL_EXTERNAL (decl)
20377 && (*targetm.binds_local_p) (decl))
20378 || (DEFAULT_ABI == ABI_V4
20379 && (!TARGET_SECURE_PLT
20380 || !flag_pic
20381 || (decl
20382 && (*targetm.binds_local_p) (decl)))))
20383 {
20384 tree attr_list = TYPE_ATTRIBUTES (fntype);
20385
20386 if (!lookup_attribute ("longcall", attr_list)
20387 || lookup_attribute ("shortcall", attr_list))
20388 return true;
20389 }
20390
20391 return false;
20392 }
20393
20394 static int
20395 rs6000_ra_ever_killed (void)
20396 {
20397 rtx top;
20398 rtx reg;
20399 rtx insn;
20400
20401 if (cfun->is_thunk)
20402 return 0;
20403
20404 if (cfun->machine->lr_save_state)
20405 return cfun->machine->lr_save_state - 1;
20406
20407 /* regs_ever_live has LR marked as used if any sibcalls are present,
20408 but this should not force saving and restoring in the
20409 pro/epilogue. Likewise, reg_set_between_p thinks a sibcall
20410 clobbers LR, so that is inappropriate. */
20411
20412 /* Also, the prologue can generate a store into LR that
20413 doesn't really count, like this:
20414
20415 move LR->R0
20416 bcl to set PIC register
20417 move LR->R31
20418 move R0->LR
20419
20420 When we're called from the epilogue, we need to avoid counting
20421 this as a store. */
20422
20423 push_topmost_sequence ();
20424 top = get_insns ();
20425 pop_topmost_sequence ();
20426 reg = gen_rtx_REG (Pmode, LR_REGNO);
20427
20428 for (insn = NEXT_INSN (top); insn != NULL_RTX; insn = NEXT_INSN (insn))
20429 {
20430 if (INSN_P (insn))
20431 {
20432 if (CALL_P (insn))
20433 {
20434 if (!SIBLING_CALL_P (insn))
20435 return 1;
20436 }
20437 else if (find_regno_note (insn, REG_INC, LR_REGNO))
20438 return 1;
20439 else if (set_of (reg, insn) != NULL_RTX
20440 && !prologue_epilogue_contains (insn))
20441 return 1;
20442 }
20443 }
20444 return 0;
20445 }
20446 \f
20447 /* Emit instructions needed to load the TOC register.
20448 This is only needed when TARGET_TOC, TARGET_MINIMAL_TOC, and there is
20449 a constant pool; or for SVR4 -fpic. */
20450
20451 void
20452 rs6000_emit_load_toc_table (int fromprolog)
20453 {
20454 rtx dest;
20455 dest = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
20456
20457 if (TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI != ABI_AIX && flag_pic)
20458 {
20459 char buf[30];
20460 rtx lab, tmp1, tmp2, got;
20461
20462 lab = gen_label_rtx ();
20463 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (lab));
20464 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
20465 if (flag_pic == 2)
20466 got = gen_rtx_SYMBOL_REF (Pmode, toc_label_name);
20467 else
20468 got = rs6000_got_sym ();
20469 tmp1 = tmp2 = dest;
20470 if (!fromprolog)
20471 {
20472 tmp1 = gen_reg_rtx (Pmode);
20473 tmp2 = gen_reg_rtx (Pmode);
20474 }
20475 emit_insn (gen_load_toc_v4_PIC_1 (lab));
20476 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
20477 emit_insn (gen_load_toc_v4_PIC_3b (tmp2, tmp1, got, lab));
20478 emit_insn (gen_load_toc_v4_PIC_3c (dest, tmp2, got, lab));
20479 }
20480 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 1)
20481 {
20482 emit_insn (gen_load_toc_v4_pic_si ());
20483 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
20484 }
20485 else if (TARGET_ELF && DEFAULT_ABI != ABI_AIX && flag_pic == 2)
20486 {
20487 char buf[30];
20488 rtx temp0 = (fromprolog
20489 ? gen_rtx_REG (Pmode, 0)
20490 : gen_reg_rtx (Pmode));
20491
20492 if (fromprolog)
20493 {
20494 rtx symF, symL;
20495
20496 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
20497 symF = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
20498
20499 ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno);
20500 symL = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
20501
20502 emit_insn (gen_load_toc_v4_PIC_1 (symF));
20503 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
20504 emit_insn (gen_load_toc_v4_PIC_2 (temp0, dest, symL, symF));
20505 }
20506 else
20507 {
20508 rtx tocsym, lab;
20509
20510 tocsym = gen_rtx_SYMBOL_REF (Pmode, toc_label_name);
20511 lab = gen_label_rtx ();
20512 emit_insn (gen_load_toc_v4_PIC_1b (tocsym, lab));
20513 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
20514 if (TARGET_LINK_STACK)
20515 emit_insn (gen_addsi3 (dest, dest, GEN_INT (4)));
20516 emit_move_insn (temp0, gen_rtx_MEM (Pmode, dest));
20517 }
20518 emit_insn (gen_addsi3 (dest, temp0, dest));
20519 }
20520 else if (TARGET_ELF && !TARGET_AIX && flag_pic == 0 && TARGET_MINIMAL_TOC)
20521 {
20522 /* This is for AIX code running in non-PIC ELF32. */
20523 char buf[30];
20524 rtx realsym;
20525 ASM_GENERATE_INTERNAL_LABEL (buf, "LCTOC", 1);
20526 realsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
20527
20528 emit_insn (gen_elf_high (dest, realsym));
20529 emit_insn (gen_elf_low (dest, dest, realsym));
20530 }
20531 else
20532 {
20533 gcc_assert (DEFAULT_ABI == ABI_AIX);
20534
20535 if (TARGET_32BIT)
20536 emit_insn (gen_load_toc_aix_si (dest));
20537 else
20538 emit_insn (gen_load_toc_aix_di (dest));
20539 }
20540 }
20541
20542 /* Emit instructions to restore the link register after determining where
20543 its value has been stored. */
20544
20545 void
20546 rs6000_emit_eh_reg_restore (rtx source, rtx scratch)
20547 {
20548 rs6000_stack_t *info = rs6000_stack_info ();
20549 rtx operands[2];
20550
20551 operands[0] = source;
20552 operands[1] = scratch;
20553
20554 if (info->lr_save_p)
20555 {
20556 rtx frame_rtx = stack_pointer_rtx;
20557 HOST_WIDE_INT sp_offset = 0;
20558 rtx tmp;
20559
20560 if (frame_pointer_needed
20561 || cfun->calls_alloca
20562 || info->total_size > 32767)
20563 {
20564 tmp = gen_frame_mem (Pmode, frame_rtx);
20565 emit_move_insn (operands[1], tmp);
20566 frame_rtx = operands[1];
20567 }
20568 else if (info->push_p)
20569 sp_offset = info->total_size;
20570
20571 tmp = plus_constant (Pmode, frame_rtx,
20572 info->lr_save_offset + sp_offset);
20573 tmp = gen_frame_mem (Pmode, tmp);
20574 emit_move_insn (tmp, operands[0]);
20575 }
20576 else
20577 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNO), operands[0]);
20578
20579 /* Freeze lr_save_p. We've just emitted rtl that depends on the
20580 state of lr_save_p so any change from here on would be a bug. In
20581 particular, stop rs6000_ra_ever_killed from considering the SET
20582 of lr we may have added just above. */
20583 cfun->machine->lr_save_state = info->lr_save_p + 1;
20584 }
20585
20586 static GTY(()) alias_set_type set = -1;
20587
20588 alias_set_type
20589 get_TOC_alias_set (void)
20590 {
20591 if (set == -1)
20592 set = new_alias_set ();
20593 return set;
20594 }
20595
20596 /* This returns nonzero if the current function uses the TOC. This is
20597 determined by the presence of (use (unspec ... UNSPEC_TOC)), which
20598 is generated by the ABI_V4 load_toc_* patterns. */
20599 #if TARGET_ELF
20600 static int
20601 uses_TOC (void)
20602 {
20603 rtx insn;
20604
20605 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
20606 if (INSN_P (insn))
20607 {
20608 rtx pat = PATTERN (insn);
20609 int i;
20610
20611 if (GET_CODE (pat) == PARALLEL)
20612 for (i = 0; i < XVECLEN (pat, 0); i++)
20613 {
20614 rtx sub = XVECEXP (pat, 0, i);
20615 if (GET_CODE (sub) == USE)
20616 {
20617 sub = XEXP (sub, 0);
20618 if (GET_CODE (sub) == UNSPEC
20619 && XINT (sub, 1) == UNSPEC_TOC)
20620 return 1;
20621 }
20622 }
20623 }
20624 return 0;
20625 }
20626 #endif
20627
20628 rtx
20629 create_TOC_reference (rtx symbol, rtx largetoc_reg)
20630 {
20631 rtx tocrel, tocreg, hi;
20632
20633 if (TARGET_DEBUG_ADDR)
20634 {
20635 if (GET_CODE (symbol) == SYMBOL_REF)
20636 fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
20637 XSTR (symbol, 0));
20638 else
20639 {
20640 fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
20641 GET_RTX_NAME (GET_CODE (symbol)));
20642 debug_rtx (symbol);
20643 }
20644 }
20645
20646 if (!can_create_pseudo_p ())
20647 df_set_regs_ever_live (TOC_REGISTER, true);
20648
20649 tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
20650 tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
20651 if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
20652 return tocrel;
20653
20654 hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
20655 if (largetoc_reg != NULL)
20656 {
20657 emit_move_insn (largetoc_reg, hi);
20658 hi = largetoc_reg;
20659 }
20660 return gen_rtx_LO_SUM (Pmode, hi, tocrel);
20661 }
20662
20663 /* Issue assembly directives that create a reference to the given DWARF
20664 FRAME_TABLE_LABEL from the current function section. */
20665 void
20666 rs6000_aix_asm_output_dwarf_table_ref (char * frame_table_label)
20667 {
20668 fprintf (asm_out_file, "\t.ref %s\n",
20669 (* targetm.strip_name_encoding) (frame_table_label));
20670 }
20671 \f
20672 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
20673 and the change to the stack pointer. */
20674
20675 static void
20676 rs6000_emit_stack_tie (rtx fp, bool hard_frame_needed)
20677 {
20678 rtvec p;
20679 int i;
20680 rtx regs[3];
20681
20682 i = 0;
20683 regs[i++] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
20684 if (hard_frame_needed)
20685 regs[i++] = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
20686 if (!(REGNO (fp) == STACK_POINTER_REGNUM
20687 || (hard_frame_needed
20688 && REGNO (fp) == HARD_FRAME_POINTER_REGNUM)))
20689 regs[i++] = fp;
20690
20691 p = rtvec_alloc (i);
20692 while (--i >= 0)
20693 {
20694 rtx mem = gen_frame_mem (BLKmode, regs[i]);
20695 RTVEC_ELT (p, i) = gen_rtx_SET (VOIDmode, mem, const0_rtx);
20696 }
20697
20698 emit_insn (gen_stack_tie (gen_rtx_PARALLEL (VOIDmode, p)));
20699 }
20700
20701 /* Emit the correct code for allocating stack space, as insns.
20702 If COPY_REG, make sure a copy of the old frame is left there.
20703 The generated code may use hard register 0 as a temporary. */
20704
20705 static void
20706 rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg, int copy_off)
20707 {
20708 rtx insn;
20709 rtx stack_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
20710 rtx tmp_reg = gen_rtx_REG (Pmode, 0);
20711 rtx todec = gen_int_mode (-size, Pmode);
20712 rtx par, set, mem;
20713
20714 if (INTVAL (todec) != -size)
20715 {
20716 warning (0, "stack frame too large");
20717 emit_insn (gen_trap ());
20718 return;
20719 }
20720
20721 if (crtl->limit_stack)
20722 {
20723 if (REG_P (stack_limit_rtx)
20724 && REGNO (stack_limit_rtx) > 1
20725 && REGNO (stack_limit_rtx) <= 31)
20726 {
20727 emit_insn (gen_add3_insn (tmp_reg, stack_limit_rtx, GEN_INT (size)));
20728 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
20729 const0_rtx));
20730 }
20731 else if (GET_CODE (stack_limit_rtx) == SYMBOL_REF
20732 && TARGET_32BIT
20733 && DEFAULT_ABI == ABI_V4)
20734 {
20735 rtx toload = gen_rtx_CONST (VOIDmode,
20736 gen_rtx_PLUS (Pmode,
20737 stack_limit_rtx,
20738 GEN_INT (size)));
20739
20740 emit_insn (gen_elf_high (tmp_reg, toload));
20741 emit_insn (gen_elf_low (tmp_reg, tmp_reg, toload));
20742 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
20743 const0_rtx));
20744 }
20745 else
20746 warning (0, "stack limit expression is not supported");
20747 }
20748
20749 if (copy_reg)
20750 {
20751 if (copy_off != 0)
20752 emit_insn (gen_add3_insn (copy_reg, stack_reg, GEN_INT (copy_off)));
20753 else
20754 emit_move_insn (copy_reg, stack_reg);
20755 }
20756
20757 if (size > 32767)
20758 {
20759 /* Need a note here so that try_split doesn't get confused. */
20760 if (get_last_insn () == NULL_RTX)
20761 emit_note (NOTE_INSN_DELETED);
20762 insn = emit_move_insn (tmp_reg, todec);
20763 try_split (PATTERN (insn), insn, 0);
20764 todec = tmp_reg;
20765 }
20766
20767 insn = emit_insn (TARGET_32BIT
20768 ? gen_movsi_update_stack (stack_reg, stack_reg,
20769 todec, stack_reg)
20770 : gen_movdi_di_update_stack (stack_reg, stack_reg,
20771 todec, stack_reg));
20772 /* Since we didn't use gen_frame_mem to generate the MEM, grab
20773 it now and set the alias set/attributes. The above gen_*_update
20774 calls will generate a PARALLEL with the MEM set being the first
20775 operation. */
20776 par = PATTERN (insn);
20777 gcc_assert (GET_CODE (par) == PARALLEL);
20778 set = XVECEXP (par, 0, 0);
20779 gcc_assert (GET_CODE (set) == SET);
20780 mem = SET_DEST (set);
20781 gcc_assert (MEM_P (mem));
20782 MEM_NOTRAP_P (mem) = 1;
20783 set_mem_alias_set (mem, get_frame_alias_set ());
20784
20785 RTX_FRAME_RELATED_P (insn) = 1;
20786 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
20787 gen_rtx_SET (VOIDmode, stack_reg,
20788 gen_rtx_PLUS (Pmode, stack_reg,
20789 GEN_INT (-size))));
20790 }
20791
20792 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
20793
20794 #if PROBE_INTERVAL > 32768
20795 #error Cannot use indexed addressing mode for stack probing
20796 #endif
20797
20798 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
20799 inclusive. These are offsets from the current stack pointer. */
20800
20801 static void
20802 rs6000_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
20803 {
20804 /* See if we have a constant small number of probes to generate. If so,
20805 that's the easy case. */
20806 if (first + size <= 32768)
20807 {
20808 HOST_WIDE_INT i;
20809
20810 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
20811 it exceeds SIZE. If only one probe is needed, this will not
20812 generate any code. Then probe at FIRST + SIZE. */
20813 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
20814 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
20815 -(first + i)));
20816
20817 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
20818 -(first + size)));
20819 }
20820
20821 /* Otherwise, do the same as above, but in a loop. Note that we must be
20822 extra careful with variables wrapping around because we might be at
20823 the very top (or the very bottom) of the address space and we have
20824 to be able to handle this case properly; in particular, we use an
20825 equality test for the loop condition. */
20826 else
20827 {
20828 HOST_WIDE_INT rounded_size;
20829 rtx r12 = gen_rtx_REG (Pmode, 12);
20830 rtx r0 = gen_rtx_REG (Pmode, 0);
20831
20832 /* Sanity check for the addressing mode we're going to use. */
20833 gcc_assert (first <= 32768);
20834
20835 /* Step 1: round SIZE to the previous multiple of the interval. */
20836
20837 rounded_size = size & -PROBE_INTERVAL;
20838
20839
20840 /* Step 2: compute initial and final value of the loop counter. */
20841
20842 /* TEST_ADDR = SP + FIRST. */
20843 emit_insn (gen_rtx_SET (VOIDmode, r12,
20844 plus_constant (Pmode, stack_pointer_rtx,
20845 -first)));
20846
20847 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
20848 if (rounded_size > 32768)
20849 {
20850 emit_move_insn (r0, GEN_INT (-rounded_size));
20851 emit_insn (gen_rtx_SET (VOIDmode, r0,
20852 gen_rtx_PLUS (Pmode, r12, r0)));
20853 }
20854 else
20855 emit_insn (gen_rtx_SET (VOIDmode, r0,
20856 plus_constant (Pmode, r12, -rounded_size)));
20857
20858
20859 /* Step 3: the loop
20860
20861 while (TEST_ADDR != LAST_ADDR)
20862 {
20863 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
20864 probe at TEST_ADDR
20865 }
20866
20867 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
20868 until it is equal to ROUNDED_SIZE. */
20869
20870 if (TARGET_64BIT)
20871 emit_insn (gen_probe_stack_rangedi (r12, r12, r0));
20872 else
20873 emit_insn (gen_probe_stack_rangesi (r12, r12, r0));
20874
20875
20876 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
20877 that SIZE is equal to ROUNDED_SIZE. */
20878
20879 if (size != rounded_size)
20880 emit_stack_probe (plus_constant (Pmode, r12, rounded_size - size));
20881 }
20882 }
20883
20884 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
20885 absolute addresses. */
20886
20887 const char *
20888 output_probe_stack_range (rtx reg1, rtx reg2)
20889 {
20890 static int labelno = 0;
20891 char loop_lab[32], end_lab[32];
20892 rtx xops[2];
20893
20894 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
20895 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
20896
20897 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
20898
20899 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
20900 xops[0] = reg1;
20901 xops[1] = reg2;
20902 if (TARGET_64BIT)
20903 output_asm_insn ("cmpd 0,%0,%1", xops);
20904 else
20905 output_asm_insn ("cmpw 0,%0,%1", xops);
20906
20907 fputs ("\tbeq 0,", asm_out_file);
20908 assemble_name_raw (asm_out_file, end_lab);
20909 fputc ('\n', asm_out_file);
20910
20911 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
20912 xops[1] = GEN_INT (-PROBE_INTERVAL);
20913 output_asm_insn ("addi %0,%0,%1", xops);
20914
20915 /* Probe at TEST_ADDR and branch. */
20916 xops[1] = gen_rtx_REG (Pmode, 0);
20917 output_asm_insn ("stw %1,0(%0)", xops);
20918 fprintf (asm_out_file, "\tb ");
20919 assemble_name_raw (asm_out_file, loop_lab);
20920 fputc ('\n', asm_out_file);
20921
20922 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
20923
20924 return "";
20925 }
20926
20927 /* Add to 'insn' a note which is PATTERN (INSN) but with REG replaced
20928 with (plus:P (reg 1) VAL), and with REG2 replaced with RREG if REG2
20929 is not NULL. It would be nice if dwarf2out_frame_debug_expr could
20930 deduce these equivalences by itself so it wasn't necessary to hold
20931 its hand so much. Don't be tempted to always supply d2_f_d_e with
20932 the actual cfa register, ie. r31 when we are using a hard frame
20933 pointer. That fails when saving regs off r1, and sched moves the
20934 r31 setup past the reg saves. */
20935
20936 static rtx
20937 rs6000_frame_related (rtx insn, rtx reg, HOST_WIDE_INT val,
20938 rtx reg2, rtx rreg)
20939 {
20940 rtx real, temp;
20941
20942 if (REGNO (reg) == STACK_POINTER_REGNUM && reg2 == NULL_RTX)
20943 {
20944 /* No need for any replacement. Just set RTX_FRAME_RELATED_P. */
20945 int i;
20946
20947 gcc_checking_assert (val == 0);
20948 real = PATTERN (insn);
20949 if (GET_CODE (real) == PARALLEL)
20950 for (i = 0; i < XVECLEN (real, 0); i++)
20951 if (GET_CODE (XVECEXP (real, 0, i)) == SET)
20952 {
20953 rtx set = XVECEXP (real, 0, i);
20954
20955 RTX_FRAME_RELATED_P (set) = 1;
20956 }
20957 RTX_FRAME_RELATED_P (insn) = 1;
20958 return insn;
20959 }
20960
20961 /* copy_rtx will not make unique copies of registers, so we need to
20962 ensure we don't have unwanted sharing here. */
20963 if (reg == reg2)
20964 reg = gen_raw_REG (GET_MODE (reg), REGNO (reg));
20965
20966 if (reg == rreg)
20967 reg = gen_raw_REG (GET_MODE (reg), REGNO (reg));
20968
20969 real = copy_rtx (PATTERN (insn));
20970
20971 if (reg2 != NULL_RTX)
20972 real = replace_rtx (real, reg2, rreg);
20973
20974 if (REGNO (reg) == STACK_POINTER_REGNUM)
20975 gcc_checking_assert (val == 0);
20976 else
20977 real = replace_rtx (real, reg,
20978 gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode,
20979 STACK_POINTER_REGNUM),
20980 GEN_INT (val)));
20981
20982 /* We expect that 'real' is either a SET or a PARALLEL containing
20983 SETs (and possibly other stuff). In a PARALLEL, all the SETs
20984 are important so they all have to be marked RTX_FRAME_RELATED_P. */
20985
20986 if (GET_CODE (real) == SET)
20987 {
20988 rtx set = real;
20989
20990 temp = simplify_rtx (SET_SRC (set));
20991 if (temp)
20992 SET_SRC (set) = temp;
20993 temp = simplify_rtx (SET_DEST (set));
20994 if (temp)
20995 SET_DEST (set) = temp;
20996 if (GET_CODE (SET_DEST (set)) == MEM)
20997 {
20998 temp = simplify_rtx (XEXP (SET_DEST (set), 0));
20999 if (temp)
21000 XEXP (SET_DEST (set), 0) = temp;
21001 }
21002 }
21003 else
21004 {
21005 int i;
21006
21007 gcc_assert (GET_CODE (real) == PARALLEL);
21008 for (i = 0; i < XVECLEN (real, 0); i++)
21009 if (GET_CODE (XVECEXP (real, 0, i)) == SET)
21010 {
21011 rtx set = XVECEXP (real, 0, i);
21012
21013 temp = simplify_rtx (SET_SRC (set));
21014 if (temp)
21015 SET_SRC (set) = temp;
21016 temp = simplify_rtx (SET_DEST (set));
21017 if (temp)
21018 SET_DEST (set) = temp;
21019 if (GET_CODE (SET_DEST (set)) == MEM)
21020 {
21021 temp = simplify_rtx (XEXP (SET_DEST (set), 0));
21022 if (temp)
21023 XEXP (SET_DEST (set), 0) = temp;
21024 }
21025 RTX_FRAME_RELATED_P (set) = 1;
21026 }
21027 }
21028
21029 RTX_FRAME_RELATED_P (insn) = 1;
21030 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
21031
21032 return insn;
21033 }
21034
21035 /* Returns an insn that has a vrsave set operation with the
21036 appropriate CLOBBERs. */
21037
21038 static rtx
21039 generate_set_vrsave (rtx reg, rs6000_stack_t *info, int epiloguep)
21040 {
21041 int nclobs, i;
21042 rtx insn, clobs[TOTAL_ALTIVEC_REGS + 1];
21043 rtx vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
21044
21045 clobs[0]
21046 = gen_rtx_SET (VOIDmode,
21047 vrsave,
21048 gen_rtx_UNSPEC_VOLATILE (SImode,
21049 gen_rtvec (2, reg, vrsave),
21050 UNSPECV_SET_VRSAVE));
21051
21052 nclobs = 1;
21053
21054 /* We need to clobber the registers in the mask so the scheduler
21055 does not move sets to VRSAVE before sets of AltiVec registers.
21056
21057 However, if the function receives nonlocal gotos, reload will set
21058 all call saved registers live. We will end up with:
21059
21060 (set (reg 999) (mem))
21061 (parallel [ (set (reg vrsave) (unspec blah))
21062 (clobber (reg 999))])
21063
21064 The clobber will cause the store into reg 999 to be dead, and
21065 flow will attempt to delete an epilogue insn. In this case, we
21066 need an unspec use/set of the register. */
21067
21068 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
21069 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
21070 {
21071 if (!epiloguep || call_used_regs [i])
21072 clobs[nclobs++] = gen_rtx_CLOBBER (VOIDmode,
21073 gen_rtx_REG (V4SImode, i));
21074 else
21075 {
21076 rtx reg = gen_rtx_REG (V4SImode, i);
21077
21078 clobs[nclobs++]
21079 = gen_rtx_SET (VOIDmode,
21080 reg,
21081 gen_rtx_UNSPEC (V4SImode,
21082 gen_rtvec (1, reg), 27));
21083 }
21084 }
21085
21086 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nclobs));
21087
21088 for (i = 0; i < nclobs; ++i)
21089 XVECEXP (insn, 0, i) = clobs[i];
21090
21091 return insn;
21092 }
21093
21094 static rtx
21095 gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
21096 {
21097 rtx addr, mem;
21098
21099 addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset));
21100 mem = gen_frame_mem (GET_MODE (reg), addr);
21101 return gen_rtx_SET (VOIDmode, store ? mem : reg, store ? reg : mem);
21102 }
21103
21104 static rtx
21105 gen_frame_load (rtx reg, rtx frame_reg, int offset)
21106 {
21107 return gen_frame_set (reg, frame_reg, offset, false);
21108 }
21109
21110 static rtx
21111 gen_frame_store (rtx reg, rtx frame_reg, int offset)
21112 {
21113 return gen_frame_set (reg, frame_reg, offset, true);
21114 }
21115
21116 /* Save a register into the frame, and emit RTX_FRAME_RELATED_P notes.
21117 Save REGNO into [FRAME_REG + OFFSET] in mode MODE. */
21118
21119 static rtx
21120 emit_frame_save (rtx frame_reg, enum machine_mode mode,
21121 unsigned int regno, int offset, HOST_WIDE_INT frame_reg_to_sp)
21122 {
21123 rtx reg, insn;
21124
21125 /* Some cases that need register indexed addressing. */
21126 gcc_checking_assert (!((TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
21127 || (TARGET_VSX && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
21128 || (TARGET_E500_DOUBLE && mode == DFmode)
21129 || (TARGET_SPE_ABI
21130 && SPE_VECTOR_MODE (mode)
21131 && !SPE_CONST_OFFSET_OK (offset))));
21132
21133 reg = gen_rtx_REG (mode, regno);
21134 insn = emit_insn (gen_frame_store (reg, frame_reg, offset));
21135 return rs6000_frame_related (insn, frame_reg, frame_reg_to_sp,
21136 NULL_RTX, NULL_RTX);
21137 }
21138
21139 /* Emit an offset memory reference suitable for a frame store, while
21140 converting to a valid addressing mode. */
21141
21142 static rtx
21143 gen_frame_mem_offset (enum machine_mode mode, rtx reg, int offset)
21144 {
21145 rtx int_rtx, offset_rtx;
21146
21147 int_rtx = GEN_INT (offset);
21148
21149 if ((TARGET_SPE_ABI && SPE_VECTOR_MODE (mode) && !SPE_CONST_OFFSET_OK (offset))
21150 || (TARGET_E500_DOUBLE && mode == DFmode))
21151 {
21152 offset_rtx = gen_rtx_REG (Pmode, FIXED_SCRATCH);
21153 emit_move_insn (offset_rtx, int_rtx);
21154 }
21155 else
21156 offset_rtx = int_rtx;
21157
21158 return gen_frame_mem (mode, gen_rtx_PLUS (Pmode, reg, offset_rtx));
21159 }
21160
21161 #ifndef TARGET_FIX_AND_CONTINUE
21162 #define TARGET_FIX_AND_CONTINUE 0
21163 #endif
21164
21165 /* It's really GPR 13 or 14, FPR 14 and VR 20. We need the smallest. */
21166 #define FIRST_SAVRES_REGISTER FIRST_SAVED_GP_REGNO
21167 #define LAST_SAVRES_REGISTER 31
21168 #define N_SAVRES_REGISTERS (LAST_SAVRES_REGISTER - FIRST_SAVRES_REGISTER + 1)
21169
21170 enum {
21171 SAVRES_LR = 0x1,
21172 SAVRES_SAVE = 0x2,
21173 SAVRES_REG = 0x0c,
21174 SAVRES_GPR = 0,
21175 SAVRES_FPR = 4,
21176 SAVRES_VR = 8
21177 };
21178
21179 static GTY(()) rtx savres_routine_syms[N_SAVRES_REGISTERS][12];
21180
21181 /* Temporary holding space for an out-of-line register save/restore
21182 routine name. */
21183 static char savres_routine_name[30];
21184
21185 /* Return the name for an out-of-line register save/restore routine.
21186 We are saving/restoring GPRs if GPR is true. */
21187
21188 static char *
21189 rs6000_savres_routine_name (rs6000_stack_t *info, int regno, int sel)
21190 {
21191 const char *prefix = "";
21192 const char *suffix = "";
21193
21194 /* Different targets are supposed to define
21195 {SAVE,RESTORE}_FP_{PREFIX,SUFFIX} with the idea that the needed
21196 routine name could be defined with:
21197
21198 sprintf (name, "%s%d%s", SAVE_FP_PREFIX, regno, SAVE_FP_SUFFIX)
21199
21200 This is a nice idea in practice, but in reality, things are
21201 complicated in several ways:
21202
21203 - ELF targets have save/restore routines for GPRs.
21204
21205 - SPE targets use different prefixes for 32/64-bit registers, and
21206 neither of them fit neatly in the FOO_{PREFIX,SUFFIX} regimen.
21207
21208 - PPC64 ELF targets have routines for save/restore of GPRs that
21209 differ in what they do with the link register, so having a set
21210 prefix doesn't work. (We only use one of the save routines at
21211 the moment, though.)
21212
21213 - PPC32 elf targets have "exit" versions of the restore routines
21214 that restore the link register and can save some extra space.
21215 These require an extra suffix. (There are also "tail" versions
21216 of the restore routines and "GOT" versions of the save routines,
21217 but we don't generate those at present. Same problems apply,
21218 though.)
21219
21220 We deal with all this by synthesizing our own prefix/suffix and
21221 using that for the simple sprintf call shown above. */
21222 if (TARGET_SPE)
21223 {
21224 /* No floating point saves on the SPE. */
21225 gcc_assert ((sel & SAVRES_REG) == SAVRES_GPR);
21226
21227 if ((sel & SAVRES_SAVE))
21228 prefix = info->spe_64bit_regs_used ? "_save64gpr_" : "_save32gpr_";
21229 else
21230 prefix = info->spe_64bit_regs_used ? "_rest64gpr_" : "_rest32gpr_";
21231
21232 if ((sel & SAVRES_LR))
21233 suffix = "_x";
21234 }
21235 else if (DEFAULT_ABI == ABI_V4)
21236 {
21237 if (TARGET_64BIT)
21238 goto aix_names;
21239
21240 if ((sel & SAVRES_REG) == SAVRES_GPR)
21241 prefix = (sel & SAVRES_SAVE) ? "_savegpr_" : "_restgpr_";
21242 else if ((sel & SAVRES_REG) == SAVRES_FPR)
21243 prefix = (sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_";
21244 else if ((sel & SAVRES_REG) == SAVRES_VR)
21245 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
21246 else
21247 abort ();
21248
21249 if ((sel & SAVRES_LR))
21250 suffix = "_x";
21251 }
21252 else if (DEFAULT_ABI == ABI_AIX)
21253 {
21254 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
21255 /* No out-of-line save/restore routines for GPRs on AIX. */
21256 gcc_assert (!TARGET_AIX || (sel & SAVRES_REG) != SAVRES_GPR);
21257 #endif
21258
21259 aix_names:
21260 if ((sel & SAVRES_REG) == SAVRES_GPR)
21261 prefix = ((sel & SAVRES_SAVE)
21262 ? ((sel & SAVRES_LR) ? "_savegpr0_" : "_savegpr1_")
21263 : ((sel & SAVRES_LR) ? "_restgpr0_" : "_restgpr1_"));
21264 else if ((sel & SAVRES_REG) == SAVRES_FPR)
21265 {
21266 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
21267 if ((sel & SAVRES_LR))
21268 prefix = ((sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_");
21269 else
21270 #endif
21271 {
21272 prefix = (sel & SAVRES_SAVE) ? SAVE_FP_PREFIX : RESTORE_FP_PREFIX;
21273 suffix = (sel & SAVRES_SAVE) ? SAVE_FP_SUFFIX : RESTORE_FP_SUFFIX;
21274 }
21275 }
21276 else if ((sel & SAVRES_REG) == SAVRES_VR)
21277 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
21278 else
21279 abort ();
21280 }
21281
21282 if (DEFAULT_ABI == ABI_DARWIN)
21283 {
21284 /* The Darwin approach is (slightly) different, in order to be
21285 compatible with code generated by the system toolchain. There is a
21286 single symbol for the start of save sequence, and the code here
21287 embeds an offset into that code on the basis of the first register
21288 to be saved. */
21289 prefix = (sel & SAVRES_SAVE) ? "save" : "rest" ;
21290 if ((sel & SAVRES_REG) == SAVRES_GPR)
21291 sprintf (savres_routine_name, "*%sGPR%s%s%.0d ; %s r%d-r31", prefix,
21292 ((sel & SAVRES_LR) ? "x" : ""), (regno == 13 ? "" : "+"),
21293 (regno - 13) * 4, prefix, regno);
21294 else if ((sel & SAVRES_REG) == SAVRES_FPR)
21295 sprintf (savres_routine_name, "*%sFP%s%.0d ; %s f%d-f31", prefix,
21296 (regno == 14 ? "" : "+"), (regno - 14) * 4, prefix, regno);
21297 else if ((sel & SAVRES_REG) == SAVRES_VR)
21298 sprintf (savres_routine_name, "*%sVEC%s%.0d ; %s v%d-v31", prefix,
21299 (regno == 20 ? "" : "+"), (regno - 20) * 8, prefix, regno);
21300 else
21301 abort ();
21302 }
21303 else
21304 sprintf (savres_routine_name, "%s%d%s", prefix, regno, suffix);
21305
21306 return savres_routine_name;
21307 }
21308
21309 /* Return an RTL SYMBOL_REF for an out-of-line register save/restore routine.
21310 We are saving/restoring GPRs if GPR is true. */
21311
21312 static rtx
21313 rs6000_savres_routine_sym (rs6000_stack_t *info, int sel)
21314 {
21315 int regno = ((sel & SAVRES_REG) == SAVRES_GPR
21316 ? info->first_gp_reg_save
21317 : (sel & SAVRES_REG) == SAVRES_FPR
21318 ? info->first_fp_reg_save - 32
21319 : (sel & SAVRES_REG) == SAVRES_VR
21320 ? info->first_altivec_reg_save - FIRST_ALTIVEC_REGNO
21321 : -1);
21322 rtx sym;
21323 int select = sel;
21324
21325 /* On the SPE, we never have any FPRs, but we do have 32/64-bit
21326 versions of the gpr routines. */
21327 if (TARGET_SPE_ABI && (sel & SAVRES_REG) == SAVRES_GPR
21328 && info->spe_64bit_regs_used)
21329 select ^= SAVRES_FPR ^ SAVRES_GPR;
21330
21331 /* Don't generate bogus routine names. */
21332 gcc_assert (FIRST_SAVRES_REGISTER <= regno
21333 && regno <= LAST_SAVRES_REGISTER
21334 && select >= 0 && select <= 12);
21335
21336 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select];
21337
21338 if (sym == NULL)
21339 {
21340 char *name;
21341
21342 name = rs6000_savres_routine_name (info, regno, sel);
21343
21344 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select]
21345 = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
21346 SYMBOL_REF_FLAGS (sym) |= SYMBOL_FLAG_FUNCTION;
21347 }
21348
21349 return sym;
21350 }
21351
21352 /* Emit a sequence of insns, including a stack tie if needed, for
21353 resetting the stack pointer. If UPDT_REGNO is not 1, then don't
21354 reset the stack pointer, but move the base of the frame into
21355 reg UPDT_REGNO for use by out-of-line register restore routines. */
21356
21357 static rtx
21358 rs6000_emit_stack_reset (rs6000_stack_t *info,
21359 rtx frame_reg_rtx, HOST_WIDE_INT frame_off,
21360 unsigned updt_regno)
21361 {
21362 rtx updt_reg_rtx;
21363
21364 /* This blockage is needed so that sched doesn't decide to move
21365 the sp change before the register restores. */
21366 if (DEFAULT_ABI == ABI_V4
21367 || (TARGET_SPE_ABI
21368 && info->spe_64bit_regs_used != 0
21369 && info->first_gp_reg_save != 32))
21370 rs6000_emit_stack_tie (frame_reg_rtx, frame_pointer_needed);
21371
21372 /* If we are restoring registers out-of-line, we will be using the
21373 "exit" variants of the restore routines, which will reset the
21374 stack for us. But we do need to point updt_reg into the
21375 right place for those routines. */
21376 updt_reg_rtx = gen_rtx_REG (Pmode, updt_regno);
21377
21378 if (frame_off != 0)
21379 return emit_insn (gen_add3_insn (updt_reg_rtx,
21380 frame_reg_rtx, GEN_INT (frame_off)));
21381 else if (REGNO (frame_reg_rtx) != updt_regno)
21382 return emit_move_insn (updt_reg_rtx, frame_reg_rtx);
21383
21384 return NULL_RTX;
21385 }
21386
21387 /* Return the register number used as a pointer by out-of-line
21388 save/restore functions. */
21389
21390 static inline unsigned
21391 ptr_regno_for_savres (int sel)
21392 {
21393 if (DEFAULT_ABI == ABI_AIX)
21394 return (sel & SAVRES_REG) == SAVRES_FPR || (sel & SAVRES_LR) ? 1 : 12;
21395 return DEFAULT_ABI == ABI_DARWIN && (sel & SAVRES_REG) == SAVRES_FPR ? 1 : 11;
21396 }
21397
21398 /* Construct a parallel rtx describing the effect of a call to an
21399 out-of-line register save/restore routine, and emit the insn
21400 or jump_insn as appropriate. */
21401
21402 static rtx
21403 rs6000_emit_savres_rtx (rs6000_stack_t *info,
21404 rtx frame_reg_rtx, int save_area_offset, int lr_offset,
21405 enum machine_mode reg_mode, int sel)
21406 {
21407 int i;
21408 int offset, start_reg, end_reg, n_regs, use_reg;
21409 int reg_size = GET_MODE_SIZE (reg_mode);
21410 rtx sym;
21411 rtvec p;
21412 rtx par, insn;
21413
21414 offset = 0;
21415 start_reg = ((sel & SAVRES_REG) == SAVRES_GPR
21416 ? info->first_gp_reg_save
21417 : (sel & SAVRES_REG) == SAVRES_FPR
21418 ? info->first_fp_reg_save
21419 : (sel & SAVRES_REG) == SAVRES_VR
21420 ? info->first_altivec_reg_save
21421 : -1);
21422 end_reg = ((sel & SAVRES_REG) == SAVRES_GPR
21423 ? 32
21424 : (sel & SAVRES_REG) == SAVRES_FPR
21425 ? 64
21426 : (sel & SAVRES_REG) == SAVRES_VR
21427 ? LAST_ALTIVEC_REGNO + 1
21428 : -1);
21429 n_regs = end_reg - start_reg;
21430 p = rtvec_alloc (3 + ((sel & SAVRES_LR) ? 1 : 0)
21431 + ((sel & SAVRES_REG) == SAVRES_VR ? 1 : 0)
21432 + n_regs);
21433
21434 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
21435 RTVEC_ELT (p, offset++) = ret_rtx;
21436
21437 RTVEC_ELT (p, offset++)
21438 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
21439
21440 sym = rs6000_savres_routine_sym (info, sel);
21441 RTVEC_ELT (p, offset++) = gen_rtx_USE (VOIDmode, sym);
21442
21443 use_reg = ptr_regno_for_savres (sel);
21444 if ((sel & SAVRES_REG) == SAVRES_VR)
21445 {
21446 /* Vector regs are saved/restored using [reg+reg] addressing. */
21447 RTVEC_ELT (p, offset++)
21448 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, use_reg));
21449 RTVEC_ELT (p, offset++)
21450 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, 0));
21451 }
21452 else
21453 RTVEC_ELT (p, offset++)
21454 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, use_reg));
21455
21456 for (i = 0; i < end_reg - start_reg; i++)
21457 RTVEC_ELT (p, i + offset)
21458 = gen_frame_set (gen_rtx_REG (reg_mode, start_reg + i),
21459 frame_reg_rtx, save_area_offset + reg_size * i,
21460 (sel & SAVRES_SAVE) != 0);
21461
21462 if ((sel & SAVRES_SAVE) && (sel & SAVRES_LR))
21463 RTVEC_ELT (p, i + offset)
21464 = gen_frame_store (gen_rtx_REG (Pmode, 0), frame_reg_rtx, lr_offset);
21465
21466 par = gen_rtx_PARALLEL (VOIDmode, p);
21467
21468 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
21469 {
21470 insn = emit_jump_insn (par);
21471 JUMP_LABEL (insn) = ret_rtx;
21472 }
21473 else
21474 insn = emit_insn (par);
21475 return insn;
21476 }
21477
21478 /* Determine whether the gp REG is really used. */
21479
21480 static bool
21481 rs6000_reg_live_or_pic_offset_p (int reg)
21482 {
21483 /* If the function calls eh_return, claim used all the registers that would
21484 be checked for liveness otherwise. This is required for the PIC offset
21485 register with -mminimal-toc on AIX, as it is advertised as "fixed" for
21486 register allocation purposes in this case. */
21487
21488 return (((crtl->calls_eh_return || df_regs_ever_live_p (reg))
21489 && (!call_used_regs[reg]
21490 || (reg == RS6000_PIC_OFFSET_TABLE_REGNUM
21491 && !TARGET_SINGLE_PIC_BASE
21492 && TARGET_TOC && TARGET_MINIMAL_TOC)))
21493 || (reg == RS6000_PIC_OFFSET_TABLE_REGNUM
21494 && !TARGET_SINGLE_PIC_BASE
21495 && ((DEFAULT_ABI == ABI_V4 && flag_pic != 0)
21496 || (DEFAULT_ABI == ABI_DARWIN && flag_pic))));
21497 }
21498
21499 /* Emit function prologue as insns. */
21500
21501 void
21502 rs6000_emit_prologue (void)
21503 {
21504 rs6000_stack_t *info = rs6000_stack_info ();
21505 enum machine_mode reg_mode = Pmode;
21506 int reg_size = TARGET_32BIT ? 4 : 8;
21507 rtx sp_reg_rtx = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
21508 rtx frame_reg_rtx = sp_reg_rtx;
21509 unsigned int cr_save_regno;
21510 rtx cr_save_rtx = NULL_RTX;
21511 rtx insn;
21512 int strategy;
21513 int using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
21514 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
21515 && call_used_regs[STATIC_CHAIN_REGNUM]);
21516 /* Offset to top of frame for frame_reg and sp respectively. */
21517 HOST_WIDE_INT frame_off = 0;
21518 HOST_WIDE_INT sp_off = 0;
21519
21520 #ifdef ENABLE_CHECKING
21521 /* Track and check usage of r0, r11, r12. */
21522 int reg_inuse = using_static_chain_p ? 1 << 11 : 0;
21523 #define START_USE(R) do \
21524 { \
21525 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
21526 reg_inuse |= 1 << (R); \
21527 } while (0)
21528 #define END_USE(R) do \
21529 { \
21530 gcc_assert ((reg_inuse & (1 << (R))) != 0); \
21531 reg_inuse &= ~(1 << (R)); \
21532 } while (0)
21533 #define NOT_INUSE(R) do \
21534 { \
21535 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
21536 } while (0)
21537 #else
21538 #define START_USE(R) do {} while (0)
21539 #define END_USE(R) do {} while (0)
21540 #define NOT_INUSE(R) do {} while (0)
21541 #endif
21542
21543 if (flag_stack_usage_info)
21544 current_function_static_stack_size = info->total_size;
21545
21546 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
21547 {
21548 HOST_WIDE_INT size = info->total_size;
21549
21550 if (crtl->is_leaf && !cfun->calls_alloca)
21551 {
21552 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
21553 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT,
21554 size - STACK_CHECK_PROTECT);
21555 }
21556 else if (size > 0)
21557 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
21558 }
21559
21560 if (TARGET_FIX_AND_CONTINUE)
21561 {
21562 /* gdb on darwin arranges to forward a function from the old
21563 address by modifying the first 5 instructions of the function
21564 to branch to the overriding function. This is necessary to
21565 permit function pointers that point to the old function to
21566 actually forward to the new function. */
21567 emit_insn (gen_nop ());
21568 emit_insn (gen_nop ());
21569 emit_insn (gen_nop ());
21570 emit_insn (gen_nop ());
21571 emit_insn (gen_nop ());
21572 }
21573
21574 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
21575 {
21576 reg_mode = V2SImode;
21577 reg_size = 8;
21578 }
21579
21580 /* Handle world saves specially here. */
21581 if (WORLD_SAVE_P (info))
21582 {
21583 int i, j, sz;
21584 rtx treg;
21585 rtvec p;
21586 rtx reg0;
21587
21588 /* save_world expects lr in r0. */
21589 reg0 = gen_rtx_REG (Pmode, 0);
21590 if (info->lr_save_p)
21591 {
21592 insn = emit_move_insn (reg0,
21593 gen_rtx_REG (Pmode, LR_REGNO));
21594 RTX_FRAME_RELATED_P (insn) = 1;
21595 }
21596
21597 /* The SAVE_WORLD and RESTORE_WORLD routines make a number of
21598 assumptions about the offsets of various bits of the stack
21599 frame. */
21600 gcc_assert (info->gp_save_offset == -220
21601 && info->fp_save_offset == -144
21602 && info->lr_save_offset == 8
21603 && info->cr_save_offset == 4
21604 && info->push_p
21605 && info->lr_save_p
21606 && (!crtl->calls_eh_return
21607 || info->ehrd_offset == -432)
21608 && info->vrsave_save_offset == -224
21609 && info->altivec_save_offset == -416);
21610
21611 treg = gen_rtx_REG (SImode, 11);
21612 emit_move_insn (treg, GEN_INT (-info->total_size));
21613
21614 /* SAVE_WORLD takes the caller's LR in R0 and the frame size
21615 in R11. It also clobbers R12, so beware! */
21616
21617 /* Preserve CR2 for save_world prologues */
21618 sz = 5;
21619 sz += 32 - info->first_gp_reg_save;
21620 sz += 64 - info->first_fp_reg_save;
21621 sz += LAST_ALTIVEC_REGNO - info->first_altivec_reg_save + 1;
21622 p = rtvec_alloc (sz);
21623 j = 0;
21624 RTVEC_ELT (p, j++) = gen_rtx_CLOBBER (VOIDmode,
21625 gen_rtx_REG (SImode,
21626 LR_REGNO));
21627 RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode,
21628 gen_rtx_SYMBOL_REF (Pmode,
21629 "*save_world"));
21630 /* We do floats first so that the instruction pattern matches
21631 properly. */
21632 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
21633 RTVEC_ELT (p, j++)
21634 = gen_frame_store (gen_rtx_REG (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
21635 ? DFmode : SFmode,
21636 info->first_fp_reg_save + i),
21637 frame_reg_rtx,
21638 info->fp_save_offset + frame_off + 8 * i);
21639 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
21640 RTVEC_ELT (p, j++)
21641 = gen_frame_store (gen_rtx_REG (V4SImode,
21642 info->first_altivec_reg_save + i),
21643 frame_reg_rtx,
21644 info->altivec_save_offset + frame_off + 16 * i);
21645 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
21646 RTVEC_ELT (p, j++)
21647 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
21648 frame_reg_rtx,
21649 info->gp_save_offset + frame_off + reg_size * i);
21650
21651 /* CR register traditionally saved as CR2. */
21652 RTVEC_ELT (p, j++)
21653 = gen_frame_store (gen_rtx_REG (SImode, CR2_REGNO),
21654 frame_reg_rtx, info->cr_save_offset + frame_off);
21655 /* Explain about use of R0. */
21656 if (info->lr_save_p)
21657 RTVEC_ELT (p, j++)
21658 = gen_frame_store (reg0,
21659 frame_reg_rtx, info->lr_save_offset + frame_off);
21660 /* Explain what happens to the stack pointer. */
21661 {
21662 rtx newval = gen_rtx_PLUS (Pmode, sp_reg_rtx, treg);
21663 RTVEC_ELT (p, j++) = gen_rtx_SET (VOIDmode, sp_reg_rtx, newval);
21664 }
21665
21666 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
21667 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
21668 treg, GEN_INT (-info->total_size));
21669 sp_off = frame_off = info->total_size;
21670 }
21671
21672 strategy = info->savres_strategy;
21673
21674 /* For V.4, update stack before we do any saving and set back pointer. */
21675 if (! WORLD_SAVE_P (info)
21676 && info->push_p
21677 && (DEFAULT_ABI == ABI_V4
21678 || crtl->calls_eh_return))
21679 {
21680 bool need_r11 = (TARGET_SPE
21681 ? (!(strategy & SAVE_INLINE_GPRS)
21682 && info->spe_64bit_regs_used == 0)
21683 : (!(strategy & SAVE_INLINE_FPRS)
21684 || !(strategy & SAVE_INLINE_GPRS)
21685 || !(strategy & SAVE_INLINE_VRS)));
21686 int ptr_regno = -1;
21687 rtx ptr_reg = NULL_RTX;
21688 int ptr_off = 0;
21689
21690 if (info->total_size < 32767)
21691 frame_off = info->total_size;
21692 else if (need_r11)
21693 ptr_regno = 11;
21694 else if (info->cr_save_p
21695 || info->lr_save_p
21696 || info->first_fp_reg_save < 64
21697 || info->first_gp_reg_save < 32
21698 || info->altivec_size != 0
21699 || info->vrsave_mask != 0
21700 || crtl->calls_eh_return)
21701 ptr_regno = 12;
21702 else
21703 {
21704 /* The prologue won't be saving any regs so there is no need
21705 to set up a frame register to access any frame save area.
21706 We also won't be using frame_off anywhere below, but set
21707 the correct value anyway to protect against future
21708 changes to this function. */
21709 frame_off = info->total_size;
21710 }
21711 if (ptr_regno != -1)
21712 {
21713 /* Set up the frame offset to that needed by the first
21714 out-of-line save function. */
21715 START_USE (ptr_regno);
21716 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
21717 frame_reg_rtx = ptr_reg;
21718 if (!(strategy & SAVE_INLINE_FPRS) && info->fp_size != 0)
21719 gcc_checking_assert (info->fp_save_offset + info->fp_size == 0);
21720 else if (!(strategy & SAVE_INLINE_GPRS) && info->first_gp_reg_save < 32)
21721 ptr_off = info->gp_save_offset + info->gp_size;
21722 else if (!(strategy & SAVE_INLINE_VRS) && info->altivec_size != 0)
21723 ptr_off = info->altivec_save_offset + info->altivec_size;
21724 frame_off = -ptr_off;
21725 }
21726 rs6000_emit_allocate_stack (info->total_size, ptr_reg, ptr_off);
21727 sp_off = info->total_size;
21728 if (frame_reg_rtx != sp_reg_rtx)
21729 rs6000_emit_stack_tie (frame_reg_rtx, false);
21730 }
21731
21732 /* If we use the link register, get it into r0. */
21733 if (!WORLD_SAVE_P (info) && info->lr_save_p)
21734 {
21735 rtx addr, reg, mem;
21736
21737 reg = gen_rtx_REG (Pmode, 0);
21738 START_USE (0);
21739 insn = emit_move_insn (reg, gen_rtx_REG (Pmode, LR_REGNO));
21740 RTX_FRAME_RELATED_P (insn) = 1;
21741
21742 if (!(strategy & (SAVE_NOINLINE_GPRS_SAVES_LR
21743 | SAVE_NOINLINE_FPRS_SAVES_LR)))
21744 {
21745 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
21746 GEN_INT (info->lr_save_offset + frame_off));
21747 mem = gen_rtx_MEM (Pmode, addr);
21748 /* This should not be of rs6000_sr_alias_set, because of
21749 __builtin_return_address. */
21750
21751 insn = emit_move_insn (mem, reg);
21752 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
21753 NULL_RTX, NULL_RTX);
21754 END_USE (0);
21755 }
21756 }
21757
21758 /* If we need to save CR, put it into r12 or r11. Choose r12 except when
21759 r12 will be needed by out-of-line gpr restore. */
21760 cr_save_regno = (DEFAULT_ABI == ABI_AIX
21761 && !(strategy & (SAVE_INLINE_GPRS
21762 | SAVE_NOINLINE_GPRS_SAVES_LR))
21763 ? 11 : 12);
21764 if (!WORLD_SAVE_P (info)
21765 && info->cr_save_p
21766 && REGNO (frame_reg_rtx) != cr_save_regno
21767 && !(using_static_chain_p && cr_save_regno == 11))
21768 {
21769 rtx set;
21770
21771 cr_save_rtx = gen_rtx_REG (SImode, cr_save_regno);
21772 START_USE (cr_save_regno);
21773 insn = emit_insn (gen_movesi_from_cr (cr_save_rtx));
21774 RTX_FRAME_RELATED_P (insn) = 1;
21775 /* Now, there's no way that dwarf2out_frame_debug_expr is going
21776 to understand '(unspec:SI [(reg:CC 68) ...] UNSPEC_MOVESI_FROM_CR)'.
21777 But that's OK. All we have to do is specify that _one_ condition
21778 code register is saved in this stack slot. The thrower's epilogue
21779 will then restore all the call-saved registers.
21780 We use CR2_REGNO (70) to be compatible with gcc-2.95 on Linux. */
21781 set = gen_rtx_SET (VOIDmode, cr_save_rtx,
21782 gen_rtx_REG (SImode, CR2_REGNO));
21783 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
21784 }
21785
21786 /* Do any required saving of fpr's. If only one or two to save, do
21787 it ourselves. Otherwise, call function. */
21788 if (!WORLD_SAVE_P (info) && (strategy & SAVE_INLINE_FPRS))
21789 {
21790 int i;
21791 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
21792 if (save_reg_p (info->first_fp_reg_save + i))
21793 emit_frame_save (frame_reg_rtx,
21794 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
21795 ? DFmode : SFmode),
21796 info->first_fp_reg_save + i,
21797 info->fp_save_offset + frame_off + 8 * i,
21798 sp_off - frame_off);
21799 }
21800 else if (!WORLD_SAVE_P (info) && info->first_fp_reg_save != 64)
21801 {
21802 bool lr = (strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
21803 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
21804 unsigned ptr_regno = ptr_regno_for_savres (sel);
21805 rtx ptr_reg = frame_reg_rtx;
21806
21807 if (REGNO (frame_reg_rtx) == ptr_regno)
21808 gcc_checking_assert (frame_off == 0);
21809 else
21810 {
21811 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
21812 NOT_INUSE (ptr_regno);
21813 emit_insn (gen_add3_insn (ptr_reg,
21814 frame_reg_rtx, GEN_INT (frame_off)));
21815 }
21816 insn = rs6000_emit_savres_rtx (info, ptr_reg,
21817 info->fp_save_offset,
21818 info->lr_save_offset,
21819 DFmode, sel);
21820 rs6000_frame_related (insn, ptr_reg, sp_off,
21821 NULL_RTX, NULL_RTX);
21822 if (lr)
21823 END_USE (0);
21824 }
21825
21826 /* Save GPRs. This is done as a PARALLEL if we are using
21827 the store-multiple instructions. */
21828 if (!WORLD_SAVE_P (info)
21829 && TARGET_SPE_ABI
21830 && info->spe_64bit_regs_used != 0
21831 && info->first_gp_reg_save != 32)
21832 {
21833 int i;
21834 rtx spe_save_area_ptr;
21835 HOST_WIDE_INT save_off;
21836 int ool_adjust = 0;
21837
21838 /* Determine whether we can address all of the registers that need
21839 to be saved with an offset from frame_reg_rtx that fits in
21840 the small const field for SPE memory instructions. */
21841 int spe_regs_addressable
21842 = (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off
21843 + reg_size * (32 - info->first_gp_reg_save - 1))
21844 && (strategy & SAVE_INLINE_GPRS));
21845
21846 if (spe_regs_addressable)
21847 {
21848 spe_save_area_ptr = frame_reg_rtx;
21849 save_off = frame_off;
21850 }
21851 else
21852 {
21853 /* Make r11 point to the start of the SPE save area. We need
21854 to be careful here if r11 is holding the static chain. If
21855 it is, then temporarily save it in r0. */
21856 HOST_WIDE_INT offset;
21857
21858 if (!(strategy & SAVE_INLINE_GPRS))
21859 ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO);
21860 offset = info->spe_gp_save_offset + frame_off - ool_adjust;
21861 spe_save_area_ptr = gen_rtx_REG (Pmode, 11);
21862 save_off = frame_off - offset;
21863
21864 if (using_static_chain_p)
21865 {
21866 rtx r0 = gen_rtx_REG (Pmode, 0);
21867
21868 START_USE (0);
21869 gcc_assert (info->first_gp_reg_save > 11);
21870
21871 emit_move_insn (r0, spe_save_area_ptr);
21872 }
21873 else if (REGNO (frame_reg_rtx) != 11)
21874 START_USE (11);
21875
21876 emit_insn (gen_addsi3 (spe_save_area_ptr,
21877 frame_reg_rtx, GEN_INT (offset)));
21878 if (!using_static_chain_p && REGNO (frame_reg_rtx) == 11)
21879 frame_off = -info->spe_gp_save_offset + ool_adjust;
21880 }
21881
21882 if ((strategy & SAVE_INLINE_GPRS))
21883 {
21884 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
21885 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
21886 emit_frame_save (spe_save_area_ptr, reg_mode,
21887 info->first_gp_reg_save + i,
21888 (info->spe_gp_save_offset + save_off
21889 + reg_size * i),
21890 sp_off - save_off);
21891 }
21892 else
21893 {
21894 insn = rs6000_emit_savres_rtx (info, spe_save_area_ptr,
21895 info->spe_gp_save_offset + save_off,
21896 0, reg_mode,
21897 SAVRES_SAVE | SAVRES_GPR);
21898
21899 rs6000_frame_related (insn, spe_save_area_ptr, sp_off - save_off,
21900 NULL_RTX, NULL_RTX);
21901 }
21902
21903 /* Move the static chain pointer back. */
21904 if (!spe_regs_addressable)
21905 {
21906 if (using_static_chain_p)
21907 {
21908 emit_move_insn (spe_save_area_ptr, gen_rtx_REG (Pmode, 0));
21909 END_USE (0);
21910 }
21911 else if (REGNO (frame_reg_rtx) != 11)
21912 END_USE (11);
21913 }
21914 }
21915 else if (!WORLD_SAVE_P (info) && !(strategy & SAVE_INLINE_GPRS))
21916 {
21917 bool lr = (strategy & SAVE_NOINLINE_GPRS_SAVES_LR) != 0;
21918 int sel = SAVRES_SAVE | SAVRES_GPR | (lr ? SAVRES_LR : 0);
21919 unsigned ptr_regno = ptr_regno_for_savres (sel);
21920 rtx ptr_reg = frame_reg_rtx;
21921 bool ptr_set_up = REGNO (ptr_reg) == ptr_regno;
21922 int end_save = info->gp_save_offset + info->gp_size;
21923 int ptr_off;
21924
21925 if (!ptr_set_up)
21926 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
21927
21928 /* Need to adjust r11 (r12) if we saved any FPRs. */
21929 if (end_save + frame_off != 0)
21930 {
21931 rtx offset = GEN_INT (end_save + frame_off);
21932
21933 if (ptr_set_up)
21934 frame_off = -end_save;
21935 else
21936 NOT_INUSE (ptr_regno);
21937 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
21938 }
21939 else if (!ptr_set_up)
21940 {
21941 NOT_INUSE (ptr_regno);
21942 emit_move_insn (ptr_reg, frame_reg_rtx);
21943 }
21944 ptr_off = -end_save;
21945 insn = rs6000_emit_savres_rtx (info, ptr_reg,
21946 info->gp_save_offset + ptr_off,
21947 info->lr_save_offset + ptr_off,
21948 reg_mode, sel);
21949 rs6000_frame_related (insn, ptr_reg, sp_off - ptr_off,
21950 NULL_RTX, NULL_RTX);
21951 if (lr)
21952 END_USE (0);
21953 }
21954 else if (!WORLD_SAVE_P (info) && (strategy & SAVRES_MULTIPLE))
21955 {
21956 rtvec p;
21957 int i;
21958 p = rtvec_alloc (32 - info->first_gp_reg_save);
21959 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
21960 RTVEC_ELT (p, i)
21961 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
21962 frame_reg_rtx,
21963 info->gp_save_offset + frame_off + reg_size * i);
21964 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
21965 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
21966 NULL_RTX, NULL_RTX);
21967 }
21968 else if (!WORLD_SAVE_P (info))
21969 {
21970 int i;
21971 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
21972 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
21973 emit_frame_save (frame_reg_rtx, reg_mode,
21974 info->first_gp_reg_save + i,
21975 info->gp_save_offset + frame_off + reg_size * i,
21976 sp_off - frame_off);
21977 }
21978
21979 if (crtl->calls_eh_return)
21980 {
21981 unsigned int i;
21982 rtvec p;
21983
21984 for (i = 0; ; ++i)
21985 {
21986 unsigned int regno = EH_RETURN_DATA_REGNO (i);
21987 if (regno == INVALID_REGNUM)
21988 break;
21989 }
21990
21991 p = rtvec_alloc (i);
21992
21993 for (i = 0; ; ++i)
21994 {
21995 unsigned int regno = EH_RETURN_DATA_REGNO (i);
21996 if (regno == INVALID_REGNUM)
21997 break;
21998
21999 insn
22000 = gen_frame_store (gen_rtx_REG (reg_mode, regno),
22001 sp_reg_rtx,
22002 info->ehrd_offset + sp_off + reg_size * (int) i);
22003 RTVEC_ELT (p, i) = insn;
22004 RTX_FRAME_RELATED_P (insn) = 1;
22005 }
22006
22007 insn = emit_insn (gen_blockage ());
22008 RTX_FRAME_RELATED_P (insn) = 1;
22009 add_reg_note (insn, REG_FRAME_RELATED_EXPR, gen_rtx_PARALLEL (VOIDmode, p));
22010 }
22011
22012 /* In AIX ABI we need to make sure r2 is really saved. */
22013 if (TARGET_AIX && crtl->calls_eh_return)
22014 {
22015 rtx tmp_reg, tmp_reg_si, hi, lo, compare_result, toc_save_done, jump;
22016 rtx save_insn, join_insn, note;
22017 long toc_restore_insn;
22018
22019 tmp_reg = gen_rtx_REG (Pmode, 11);
22020 tmp_reg_si = gen_rtx_REG (SImode, 11);
22021 if (using_static_chain_p)
22022 {
22023 START_USE (0);
22024 emit_move_insn (gen_rtx_REG (Pmode, 0), tmp_reg);
22025 }
22026 else
22027 START_USE (11);
22028 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, LR_REGNO));
22029 /* Peek at instruction to which this function returns. If it's
22030 restoring r2, then we know we've already saved r2. We can't
22031 unconditionally save r2 because the value we have will already
22032 be updated if we arrived at this function via a plt call or
22033 toc adjusting stub. */
22034 emit_move_insn (tmp_reg_si, gen_rtx_MEM (SImode, tmp_reg));
22035 toc_restore_insn = TARGET_32BIT ? 0x80410014 : 0xE8410028;
22036 hi = gen_int_mode (toc_restore_insn & ~0xffff, SImode);
22037 emit_insn (gen_xorsi3 (tmp_reg_si, tmp_reg_si, hi));
22038 compare_result = gen_rtx_REG (CCUNSmode, CR0_REGNO);
22039 validate_condition_mode (EQ, CCUNSmode);
22040 lo = gen_int_mode (toc_restore_insn & 0xffff, SImode);
22041 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
22042 gen_rtx_COMPARE (CCUNSmode, tmp_reg_si, lo)));
22043 toc_save_done = gen_label_rtx ();
22044 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
22045 gen_rtx_EQ (VOIDmode, compare_result,
22046 const0_rtx),
22047 gen_rtx_LABEL_REF (VOIDmode, toc_save_done),
22048 pc_rtx);
22049 jump = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, jump));
22050 JUMP_LABEL (jump) = toc_save_done;
22051 LABEL_NUSES (toc_save_done) += 1;
22052
22053 save_insn = emit_frame_save (frame_reg_rtx, reg_mode,
22054 TOC_REGNUM, frame_off + 5 * reg_size,
22055 sp_off - frame_off);
22056
22057 emit_label (toc_save_done);
22058
22059 /* ??? If we leave SAVE_INSN as marked as saving R2, then we'll
22060 have a CFG that has different saves along different paths.
22061 Move the note to a dummy blockage insn, which describes that
22062 R2 is unconditionally saved after the label. */
22063 /* ??? An alternate representation might be a special insn pattern
22064 containing both the branch and the store. That might let the
22065 code that minimizes the number of DW_CFA_advance opcodes better
22066 freedom in placing the annotations. */
22067 note = find_reg_note (save_insn, REG_FRAME_RELATED_EXPR, NULL);
22068 if (note)
22069 remove_note (save_insn, note);
22070 else
22071 note = alloc_reg_note (REG_FRAME_RELATED_EXPR,
22072 copy_rtx (PATTERN (save_insn)), NULL_RTX);
22073 RTX_FRAME_RELATED_P (save_insn) = 0;
22074
22075 join_insn = emit_insn (gen_blockage ());
22076 REG_NOTES (join_insn) = note;
22077 RTX_FRAME_RELATED_P (join_insn) = 1;
22078
22079 if (using_static_chain_p)
22080 {
22081 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, 0));
22082 END_USE (0);
22083 }
22084 else
22085 END_USE (11);
22086 }
22087
22088 /* Save CR if we use any that must be preserved. */
22089 if (!WORLD_SAVE_P (info) && info->cr_save_p)
22090 {
22091 rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
22092 GEN_INT (info->cr_save_offset + frame_off));
22093 rtx mem = gen_frame_mem (SImode, addr);
22094 /* See the large comment above about why CR2_REGNO is used. */
22095 rtx magic_eh_cr_reg = gen_rtx_REG (SImode, CR2_REGNO);
22096
22097 /* If we didn't copy cr before, do so now using r0. */
22098 if (cr_save_rtx == NULL_RTX)
22099 {
22100 rtx set;
22101
22102 START_USE (0);
22103 cr_save_rtx = gen_rtx_REG (SImode, 0);
22104 insn = emit_insn (gen_movesi_from_cr (cr_save_rtx));
22105 RTX_FRAME_RELATED_P (insn) = 1;
22106 set = gen_rtx_SET (VOIDmode, cr_save_rtx, magic_eh_cr_reg);
22107 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
22108 }
22109 insn = emit_move_insn (mem, cr_save_rtx);
22110 END_USE (REGNO (cr_save_rtx));
22111
22112 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
22113 NULL_RTX, NULL_RTX);
22114 }
22115
22116 /* Update stack and set back pointer unless this is V.4,
22117 for which it was done previously. */
22118 if (!WORLD_SAVE_P (info) && info->push_p
22119 && !(DEFAULT_ABI == ABI_V4 || crtl->calls_eh_return))
22120 {
22121 rtx ptr_reg = NULL;
22122 int ptr_off = 0;
22123
22124 /* If saving altivec regs we need to be able to address all save
22125 locations using a 16-bit offset. */
22126 if ((strategy & SAVE_INLINE_VRS) == 0
22127 || (info->altivec_size != 0
22128 && (info->altivec_save_offset + info->altivec_size - 16
22129 + info->total_size - frame_off) > 32767)
22130 || (info->vrsave_size != 0
22131 && (info->vrsave_save_offset
22132 + info->total_size - frame_off) > 32767))
22133 {
22134 int sel = SAVRES_SAVE | SAVRES_VR;
22135 unsigned ptr_regno = ptr_regno_for_savres (sel);
22136
22137 if (using_static_chain_p
22138 && ptr_regno == STATIC_CHAIN_REGNUM)
22139 ptr_regno = 12;
22140 if (REGNO (frame_reg_rtx) != ptr_regno)
22141 START_USE (ptr_regno);
22142 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
22143 frame_reg_rtx = ptr_reg;
22144 ptr_off = info->altivec_save_offset + info->altivec_size;
22145 frame_off = -ptr_off;
22146 }
22147 else if (REGNO (frame_reg_rtx) == 1)
22148 frame_off = info->total_size;
22149 rs6000_emit_allocate_stack (info->total_size, ptr_reg, ptr_off);
22150 sp_off = info->total_size;
22151 if (frame_reg_rtx != sp_reg_rtx)
22152 rs6000_emit_stack_tie (frame_reg_rtx, false);
22153 }
22154
22155 /* Set frame pointer, if needed. */
22156 if (frame_pointer_needed)
22157 {
22158 insn = emit_move_insn (gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM),
22159 sp_reg_rtx);
22160 RTX_FRAME_RELATED_P (insn) = 1;
22161 }
22162
22163 /* Save AltiVec registers if needed. Save here because the red zone does
22164 not always include AltiVec registers. */
22165 if (!WORLD_SAVE_P (info) && TARGET_ALTIVEC_ABI
22166 && info->altivec_size != 0 && (strategy & SAVE_INLINE_VRS) == 0)
22167 {
22168 int end_save = info->altivec_save_offset + info->altivec_size;
22169 int ptr_off;
22170 /* Oddly, the vector save/restore functions point r0 at the end
22171 of the save area, then use r11 or r12 to load offsets for
22172 [reg+reg] addressing. */
22173 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
22174 int scratch_regno = ptr_regno_for_savres (SAVRES_SAVE | SAVRES_VR);
22175 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
22176
22177 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
22178 NOT_INUSE (0);
22179 if (end_save + frame_off != 0)
22180 {
22181 rtx offset = GEN_INT (end_save + frame_off);
22182
22183 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
22184 }
22185 else
22186 emit_move_insn (ptr_reg, frame_reg_rtx);
22187
22188 ptr_off = -end_save;
22189 insn = rs6000_emit_savres_rtx (info, scratch_reg,
22190 info->altivec_save_offset + ptr_off,
22191 0, V4SImode, SAVRES_SAVE | SAVRES_VR);
22192 rs6000_frame_related (insn, scratch_reg, sp_off - ptr_off,
22193 NULL_RTX, NULL_RTX);
22194 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
22195 {
22196 /* The oddity mentioned above clobbered our frame reg. */
22197 emit_move_insn (frame_reg_rtx, ptr_reg);
22198 frame_off = ptr_off;
22199 }
22200 }
22201 else if (!WORLD_SAVE_P (info) && TARGET_ALTIVEC_ABI
22202 && info->altivec_size != 0)
22203 {
22204 int i;
22205
22206 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
22207 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
22208 {
22209 rtx areg, savereg, mem;
22210 int offset;
22211
22212 offset = (info->altivec_save_offset + frame_off
22213 + 16 * (i - info->first_altivec_reg_save));
22214
22215 savereg = gen_rtx_REG (V4SImode, i);
22216
22217 NOT_INUSE (0);
22218 areg = gen_rtx_REG (Pmode, 0);
22219 emit_move_insn (areg, GEN_INT (offset));
22220
22221 /* AltiVec addressing mode is [reg+reg]. */
22222 mem = gen_frame_mem (V4SImode,
22223 gen_rtx_PLUS (Pmode, frame_reg_rtx, areg));
22224
22225 insn = emit_move_insn (mem, savereg);
22226
22227 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
22228 areg, GEN_INT (offset));
22229 }
22230 }
22231
22232 /* VRSAVE is a bit vector representing which AltiVec registers
22233 are used. The OS uses this to determine which vector
22234 registers to save on a context switch. We need to save
22235 VRSAVE on the stack frame, add whatever AltiVec registers we
22236 used in this function, and do the corresponding magic in the
22237 epilogue. */
22238
22239 if (!WORLD_SAVE_P (info)
22240 && TARGET_ALTIVEC
22241 && TARGET_ALTIVEC_VRSAVE
22242 && info->vrsave_mask != 0)
22243 {
22244 rtx reg, vrsave;
22245 int offset;
22246 int save_regno;
22247
22248 /* Get VRSAVE onto a GPR. Note that ABI_V4 and ABI_DARWIN might
22249 be using r12 as frame_reg_rtx and r11 as the static chain
22250 pointer for nested functions. */
22251 save_regno = 12;
22252 if (DEFAULT_ABI == ABI_AIX && !using_static_chain_p)
22253 save_regno = 11;
22254 else if (REGNO (frame_reg_rtx) == 12)
22255 {
22256 save_regno = 11;
22257 if (using_static_chain_p)
22258 save_regno = 0;
22259 }
22260
22261 NOT_INUSE (save_regno);
22262 reg = gen_rtx_REG (SImode, save_regno);
22263 vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
22264 if (TARGET_MACHO)
22265 emit_insn (gen_get_vrsave_internal (reg));
22266 else
22267 emit_insn (gen_rtx_SET (VOIDmode, reg, vrsave));
22268
22269 /* Save VRSAVE. */
22270 offset = info->vrsave_save_offset + frame_off;
22271 insn = emit_insn (gen_frame_store (reg, frame_reg_rtx, offset));
22272
22273 /* Include the registers in the mask. */
22274 emit_insn (gen_iorsi3 (reg, reg, GEN_INT ((int) info->vrsave_mask)));
22275
22276 insn = emit_insn (generate_set_vrsave (reg, info, 0));
22277 }
22278
22279 /* If we are using RS6000_PIC_OFFSET_TABLE_REGNUM, we need to set it up. */
22280 if (!TARGET_SINGLE_PIC_BASE
22281 && ((TARGET_TOC && TARGET_MINIMAL_TOC && get_pool_size () != 0)
22282 || (DEFAULT_ABI == ABI_V4
22283 && (flag_pic == 1 || (flag_pic && TARGET_SECURE_PLT))
22284 && df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))))
22285 {
22286 /* If emit_load_toc_table will use the link register, we need to save
22287 it. We use R12 for this purpose because emit_load_toc_table
22288 can use register 0. This allows us to use a plain 'blr' to return
22289 from the procedure more often. */
22290 int save_LR_around_toc_setup = (TARGET_ELF
22291 && DEFAULT_ABI != ABI_AIX
22292 && flag_pic
22293 && ! info->lr_save_p
22294 && EDGE_COUNT (EXIT_BLOCK_PTR->preds) > 0);
22295 if (save_LR_around_toc_setup)
22296 {
22297 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
22298 rtx tmp = gen_rtx_REG (Pmode, 12);
22299
22300 insn = emit_move_insn (tmp, lr);
22301 RTX_FRAME_RELATED_P (insn) = 1;
22302
22303 rs6000_emit_load_toc_table (TRUE);
22304
22305 insn = emit_move_insn (lr, tmp);
22306 add_reg_note (insn, REG_CFA_RESTORE, lr);
22307 RTX_FRAME_RELATED_P (insn) = 1;
22308 }
22309 else
22310 rs6000_emit_load_toc_table (TRUE);
22311 }
22312
22313 #if TARGET_MACHO
22314 if (!TARGET_SINGLE_PIC_BASE
22315 && DEFAULT_ABI == ABI_DARWIN
22316 && flag_pic && crtl->uses_pic_offset_table)
22317 {
22318 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
22319 rtx src = gen_rtx_SYMBOL_REF (Pmode, MACHOPIC_FUNCTION_BASE_NAME);
22320
22321 /* Save and restore LR locally around this call (in R0). */
22322 if (!info->lr_save_p)
22323 emit_move_insn (gen_rtx_REG (Pmode, 0), lr);
22324
22325 emit_insn (gen_load_macho_picbase (src));
22326
22327 emit_move_insn (gen_rtx_REG (Pmode,
22328 RS6000_PIC_OFFSET_TABLE_REGNUM),
22329 lr);
22330
22331 if (!info->lr_save_p)
22332 emit_move_insn (lr, gen_rtx_REG (Pmode, 0));
22333 }
22334 #endif
22335
22336 /* If we need to, save the TOC register after doing the stack setup.
22337 Do not emit eh frame info for this save. The unwinder wants info,
22338 conceptually attached to instructions in this function, about
22339 register values in the caller of this function. This R2 may have
22340 already been changed from the value in the caller.
22341 We don't attempt to write accurate DWARF EH frame info for R2
22342 because code emitted by gcc for a (non-pointer) function call
22343 doesn't save and restore R2. Instead, R2 is managed out-of-line
22344 by a linker generated plt call stub when the function resides in
22345 a shared library. This behaviour is costly to describe in DWARF,
22346 both in terms of the size of DWARF info and the time taken in the
22347 unwinder to interpret it. R2 changes, apart from the
22348 calls_eh_return case earlier in this function, are handled by
22349 linux-unwind.h frob_update_context. */
22350 if (rs6000_save_toc_in_prologue_p ())
22351 {
22352 rtx reg = gen_rtx_REG (reg_mode, TOC_REGNUM);
22353 emit_insn (gen_frame_store (reg, sp_reg_rtx, 5 * reg_size));
22354 }
22355 }
22356
22357 /* Write function prologue. */
22358
22359 static void
22360 rs6000_output_function_prologue (FILE *file,
22361 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
22362 {
22363 rs6000_stack_t *info = rs6000_stack_info ();
22364
22365 if (TARGET_DEBUG_STACK)
22366 debug_stack_info (info);
22367
22368 /* Write .extern for any function we will call to save and restore
22369 fp values. */
22370 if (info->first_fp_reg_save < 64
22371 && !TARGET_MACHO
22372 && !TARGET_ELF)
22373 {
22374 char *name;
22375 int regno = info->first_fp_reg_save - 32;
22376
22377 if ((info->savres_strategy & SAVE_INLINE_FPRS) == 0)
22378 {
22379 bool lr = (info->savres_strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
22380 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
22381 name = rs6000_savres_routine_name (info, regno, sel);
22382 fprintf (file, "\t.extern %s\n", name);
22383 }
22384 if ((info->savres_strategy & REST_INLINE_FPRS) == 0)
22385 {
22386 bool lr = (info->savres_strategy
22387 & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
22388 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
22389 name = rs6000_savres_routine_name (info, regno, sel);
22390 fprintf (file, "\t.extern %s\n", name);
22391 }
22392 }
22393
22394 rs6000_pic_labelno++;
22395 }
22396
22397 /* Non-zero if vmx regs are restored before the frame pop, zero if
22398 we restore after the pop when possible. */
22399 #define ALWAYS_RESTORE_ALTIVEC_BEFORE_POP 0
22400
22401 /* Restoring cr is a two step process: loading a reg from the frame
22402 save, then moving the reg to cr. For ABI_V4 we must let the
22403 unwinder know that the stack location is no longer valid at or
22404 before the stack deallocation, but we can't emit a cfa_restore for
22405 cr at the stack deallocation like we do for other registers.
22406 The trouble is that it is possible for the move to cr to be
22407 scheduled after the stack deallocation. So say exactly where cr
22408 is located on each of the two insns. */
22409
22410 static rtx
22411 load_cr_save (int regno, rtx frame_reg_rtx, int offset, bool exit_func)
22412 {
22413 rtx mem = gen_frame_mem_offset (SImode, frame_reg_rtx, offset);
22414 rtx reg = gen_rtx_REG (SImode, regno);
22415 rtx insn = emit_move_insn (reg, mem);
22416
22417 if (!exit_func && DEFAULT_ABI == ABI_V4)
22418 {
22419 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
22420 rtx set = gen_rtx_SET (VOIDmode, reg, cr);
22421
22422 add_reg_note (insn, REG_CFA_REGISTER, set);
22423 RTX_FRAME_RELATED_P (insn) = 1;
22424 }
22425 return reg;
22426 }
22427
22428 /* Reload CR from REG. */
22429
22430 static void
22431 restore_saved_cr (rtx reg, int using_mfcr_multiple, bool exit_func)
22432 {
22433 int count = 0;
22434 int i;
22435
22436 if (using_mfcr_multiple)
22437 {
22438 for (i = 0; i < 8; i++)
22439 if (save_reg_p (CR0_REGNO + i))
22440 count++;
22441 gcc_assert (count);
22442 }
22443
22444 if (using_mfcr_multiple && count > 1)
22445 {
22446 rtvec p;
22447 int ndx;
22448
22449 p = rtvec_alloc (count);
22450
22451 ndx = 0;
22452 for (i = 0; i < 8; i++)
22453 if (save_reg_p (CR0_REGNO + i))
22454 {
22455 rtvec r = rtvec_alloc (2);
22456 RTVEC_ELT (r, 0) = reg;
22457 RTVEC_ELT (r, 1) = GEN_INT (1 << (7-i));
22458 RTVEC_ELT (p, ndx) =
22459 gen_rtx_SET (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i),
22460 gen_rtx_UNSPEC (CCmode, r, UNSPEC_MOVESI_TO_CR));
22461 ndx++;
22462 }
22463 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
22464 gcc_assert (ndx == count);
22465 }
22466 else
22467 for (i = 0; i < 8; i++)
22468 if (save_reg_p (CR0_REGNO + i))
22469 emit_insn (gen_movsi_to_cr_one (gen_rtx_REG (CCmode, CR0_REGNO + i),
22470 reg));
22471
22472 if (!exit_func && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
22473 {
22474 rtx insn = get_last_insn ();
22475 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
22476
22477 add_reg_note (insn, REG_CFA_RESTORE, cr);
22478 RTX_FRAME_RELATED_P (insn) = 1;
22479 }
22480 }
22481
22482 /* Like cr, the move to lr instruction can be scheduled after the
22483 stack deallocation, but unlike cr, its stack frame save is still
22484 valid. So we only need to emit the cfa_restore on the correct
22485 instruction. */
22486
22487 static void
22488 load_lr_save (int regno, rtx frame_reg_rtx, int offset)
22489 {
22490 rtx mem = gen_frame_mem_offset (Pmode, frame_reg_rtx, offset);
22491 rtx reg = gen_rtx_REG (Pmode, regno);
22492
22493 emit_move_insn (reg, mem);
22494 }
22495
22496 static void
22497 restore_saved_lr (int regno, bool exit_func)
22498 {
22499 rtx reg = gen_rtx_REG (Pmode, regno);
22500 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
22501 rtx insn = emit_move_insn (lr, reg);
22502
22503 if (!exit_func && flag_shrink_wrap)
22504 {
22505 add_reg_note (insn, REG_CFA_RESTORE, lr);
22506 RTX_FRAME_RELATED_P (insn) = 1;
22507 }
22508 }
22509
22510 static rtx
22511 add_crlr_cfa_restore (const rs6000_stack_t *info, rtx cfa_restores)
22512 {
22513 if (info->cr_save_p)
22514 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
22515 gen_rtx_REG (SImode, CR2_REGNO),
22516 cfa_restores);
22517 if (info->lr_save_p)
22518 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
22519 gen_rtx_REG (Pmode, LR_REGNO),
22520 cfa_restores);
22521 return cfa_restores;
22522 }
22523
22524 /* Return true if OFFSET from stack pointer can be clobbered by signals.
22525 V.4 doesn't have any stack cushion, AIX ABIs have 220 or 288 bytes
22526 below stack pointer not cloberred by signals. */
22527
22528 static inline bool
22529 offset_below_red_zone_p (HOST_WIDE_INT offset)
22530 {
22531 return offset < (DEFAULT_ABI == ABI_V4
22532 ? 0
22533 : TARGET_32BIT ? -220 : -288);
22534 }
22535
22536 /* Append CFA_RESTORES to any existing REG_NOTES on the last insn. */
22537
22538 static void
22539 emit_cfa_restores (rtx cfa_restores)
22540 {
22541 rtx insn = get_last_insn ();
22542 rtx *loc = &REG_NOTES (insn);
22543
22544 while (*loc)
22545 loc = &XEXP (*loc, 1);
22546 *loc = cfa_restores;
22547 RTX_FRAME_RELATED_P (insn) = 1;
22548 }
22549
22550 /* Emit function epilogue as insns. */
22551
22552 void
22553 rs6000_emit_epilogue (int sibcall)
22554 {
22555 rs6000_stack_t *info;
22556 int restoring_GPRs_inline;
22557 int restoring_FPRs_inline;
22558 int using_load_multiple;
22559 int using_mtcr_multiple;
22560 int use_backchain_to_restore_sp;
22561 int restore_lr;
22562 int strategy;
22563 HOST_WIDE_INT frame_off = 0;
22564 rtx sp_reg_rtx = gen_rtx_REG (Pmode, 1);
22565 rtx frame_reg_rtx = sp_reg_rtx;
22566 rtx cfa_restores = NULL_RTX;
22567 rtx insn;
22568 rtx cr_save_reg = NULL_RTX;
22569 enum machine_mode reg_mode = Pmode;
22570 int reg_size = TARGET_32BIT ? 4 : 8;
22571 int i;
22572 bool exit_func;
22573 unsigned ptr_regno;
22574
22575 info = rs6000_stack_info ();
22576
22577 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
22578 {
22579 reg_mode = V2SImode;
22580 reg_size = 8;
22581 }
22582
22583 strategy = info->savres_strategy;
22584 using_load_multiple = strategy & SAVRES_MULTIPLE;
22585 restoring_FPRs_inline = sibcall || (strategy & REST_INLINE_FPRS);
22586 restoring_GPRs_inline = sibcall || (strategy & REST_INLINE_GPRS);
22587 using_mtcr_multiple = (rs6000_cpu == PROCESSOR_PPC601
22588 || rs6000_cpu == PROCESSOR_PPC603
22589 || rs6000_cpu == PROCESSOR_PPC750
22590 || optimize_size);
22591 /* Restore via the backchain when we have a large frame, since this
22592 is more efficient than an addis, addi pair. The second condition
22593 here will not trigger at the moment; We don't actually need a
22594 frame pointer for alloca, but the generic parts of the compiler
22595 give us one anyway. */
22596 use_backchain_to_restore_sp = (info->total_size > 32767 - info->lr_save_offset
22597 || (cfun->calls_alloca
22598 && !frame_pointer_needed));
22599 restore_lr = (info->lr_save_p
22600 && (restoring_FPRs_inline
22601 || (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR))
22602 && (restoring_GPRs_inline
22603 || info->first_fp_reg_save < 64));
22604
22605 if (WORLD_SAVE_P (info))
22606 {
22607 int i, j;
22608 char rname[30];
22609 const char *alloc_rname;
22610 rtvec p;
22611
22612 /* eh_rest_world_r10 will return to the location saved in the LR
22613 stack slot (which is not likely to be our caller.)
22614 Input: R10 -- stack adjustment. Clobbers R0, R11, R12, R7, R8.
22615 rest_world is similar, except any R10 parameter is ignored.
22616 The exception-handling stuff that was here in 2.95 is no
22617 longer necessary. */
22618
22619 p = rtvec_alloc (9
22620 + 1
22621 + 32 - info->first_gp_reg_save
22622 + LAST_ALTIVEC_REGNO + 1 - info->first_altivec_reg_save
22623 + 63 + 1 - info->first_fp_reg_save);
22624
22625 strcpy (rname, ((crtl->calls_eh_return) ?
22626 "*eh_rest_world_r10" : "*rest_world"));
22627 alloc_rname = ggc_strdup (rname);
22628
22629 j = 0;
22630 RTVEC_ELT (p, j++) = ret_rtx;
22631 RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode,
22632 gen_rtx_REG (Pmode,
22633 LR_REGNO));
22634 RTVEC_ELT (p, j++)
22635 = gen_rtx_USE (VOIDmode, gen_rtx_SYMBOL_REF (Pmode, alloc_rname));
22636 /* The instruction pattern requires a clobber here;
22637 it is shared with the restVEC helper. */
22638 RTVEC_ELT (p, j++)
22639 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 11));
22640
22641 {
22642 /* CR register traditionally saved as CR2. */
22643 rtx reg = gen_rtx_REG (SImode, CR2_REGNO);
22644 RTVEC_ELT (p, j++)
22645 = gen_frame_load (reg, frame_reg_rtx, info->cr_save_offset);
22646 if (flag_shrink_wrap)
22647 {
22648 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
22649 gen_rtx_REG (Pmode, LR_REGNO),
22650 cfa_restores);
22651 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
22652 }
22653 }
22654
22655 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
22656 {
22657 rtx reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
22658 RTVEC_ELT (p, j++)
22659 = gen_frame_load (reg,
22660 frame_reg_rtx, info->gp_save_offset + reg_size * i);
22661 if (flag_shrink_wrap)
22662 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
22663 }
22664 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
22665 {
22666 rtx reg = gen_rtx_REG (V4SImode, info->first_altivec_reg_save + i);
22667 RTVEC_ELT (p, j++)
22668 = gen_frame_load (reg,
22669 frame_reg_rtx, info->altivec_save_offset + 16 * i);
22670 if (flag_shrink_wrap)
22671 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
22672 }
22673 for (i = 0; info->first_fp_reg_save + i <= 63; i++)
22674 {
22675 rtx reg = gen_rtx_REG ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
22676 ? DFmode : SFmode),
22677 info->first_fp_reg_save + i);
22678 RTVEC_ELT (p, j++)
22679 = gen_frame_load (reg, frame_reg_rtx, info->fp_save_offset + 8 * i);
22680 if (flag_shrink_wrap)
22681 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
22682 }
22683 RTVEC_ELT (p, j++)
22684 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 0));
22685 RTVEC_ELT (p, j++)
22686 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 12));
22687 RTVEC_ELT (p, j++)
22688 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 7));
22689 RTVEC_ELT (p, j++)
22690 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 8));
22691 RTVEC_ELT (p, j++)
22692 = gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, 10));
22693 insn = emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
22694
22695 if (flag_shrink_wrap)
22696 {
22697 REG_NOTES (insn) = cfa_restores;
22698 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
22699 RTX_FRAME_RELATED_P (insn) = 1;
22700 }
22701 return;
22702 }
22703
22704 /* frame_reg_rtx + frame_off points to the top of this stack frame. */
22705 if (info->push_p)
22706 frame_off = info->total_size;
22707
22708 /* Restore AltiVec registers if we must do so before adjusting the
22709 stack. */
22710 if (TARGET_ALTIVEC_ABI
22711 && info->altivec_size != 0
22712 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
22713 || (DEFAULT_ABI != ABI_V4
22714 && offset_below_red_zone_p (info->altivec_save_offset))))
22715 {
22716 int i;
22717 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
22718
22719 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
22720 if (use_backchain_to_restore_sp)
22721 {
22722 int frame_regno = 11;
22723
22724 if ((strategy & REST_INLINE_VRS) == 0)
22725 {
22726 /* Of r11 and r12, select the one not clobbered by an
22727 out-of-line restore function for the frame register. */
22728 frame_regno = 11 + 12 - scratch_regno;
22729 }
22730 frame_reg_rtx = gen_rtx_REG (Pmode, frame_regno);
22731 emit_move_insn (frame_reg_rtx,
22732 gen_rtx_MEM (Pmode, sp_reg_rtx));
22733 frame_off = 0;
22734 }
22735 else if (frame_pointer_needed)
22736 frame_reg_rtx = hard_frame_pointer_rtx;
22737
22738 if ((strategy & REST_INLINE_VRS) == 0)
22739 {
22740 int end_save = info->altivec_save_offset + info->altivec_size;
22741 int ptr_off;
22742 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
22743 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
22744
22745 if (end_save + frame_off != 0)
22746 {
22747 rtx offset = GEN_INT (end_save + frame_off);
22748
22749 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
22750 }
22751 else
22752 emit_move_insn (ptr_reg, frame_reg_rtx);
22753
22754 ptr_off = -end_save;
22755 insn = rs6000_emit_savres_rtx (info, scratch_reg,
22756 info->altivec_save_offset + ptr_off,
22757 0, V4SImode, SAVRES_VR);
22758 }
22759 else
22760 {
22761 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
22762 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
22763 {
22764 rtx addr, areg, mem, reg;
22765
22766 areg = gen_rtx_REG (Pmode, 0);
22767 emit_move_insn
22768 (areg, GEN_INT (info->altivec_save_offset
22769 + frame_off
22770 + 16 * (i - info->first_altivec_reg_save)));
22771
22772 /* AltiVec addressing mode is [reg+reg]. */
22773 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
22774 mem = gen_frame_mem (V4SImode, addr);
22775
22776 reg = gen_rtx_REG (V4SImode, i);
22777 emit_move_insn (reg, mem);
22778 }
22779 }
22780
22781 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
22782 if (((strategy & REST_INLINE_VRS) == 0
22783 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
22784 && (flag_shrink_wrap
22785 || (offset_below_red_zone_p
22786 (info->altivec_save_offset
22787 + 16 * (i - info->first_altivec_reg_save)))))
22788 {
22789 rtx reg = gen_rtx_REG (V4SImode, i);
22790 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
22791 }
22792 }
22793
22794 /* Restore VRSAVE if we must do so before adjusting the stack. */
22795 if (TARGET_ALTIVEC
22796 && TARGET_ALTIVEC_VRSAVE
22797 && info->vrsave_mask != 0
22798 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
22799 || (DEFAULT_ABI != ABI_V4
22800 && offset_below_red_zone_p (info->vrsave_save_offset))))
22801 {
22802 rtx reg;
22803
22804 if (frame_reg_rtx == sp_reg_rtx)
22805 {
22806 if (use_backchain_to_restore_sp)
22807 {
22808 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
22809 emit_move_insn (frame_reg_rtx,
22810 gen_rtx_MEM (Pmode, sp_reg_rtx));
22811 frame_off = 0;
22812 }
22813 else if (frame_pointer_needed)
22814 frame_reg_rtx = hard_frame_pointer_rtx;
22815 }
22816
22817 reg = gen_rtx_REG (SImode, 12);
22818 emit_insn (gen_frame_load (reg, frame_reg_rtx,
22819 info->vrsave_save_offset + frame_off));
22820
22821 emit_insn (generate_set_vrsave (reg, info, 1));
22822 }
22823
22824 insn = NULL_RTX;
22825 /* If we have a large stack frame, restore the old stack pointer
22826 using the backchain. */
22827 if (use_backchain_to_restore_sp)
22828 {
22829 if (frame_reg_rtx == sp_reg_rtx)
22830 {
22831 /* Under V.4, don't reset the stack pointer until after we're done
22832 loading the saved registers. */
22833 if (DEFAULT_ABI == ABI_V4)
22834 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
22835
22836 insn = emit_move_insn (frame_reg_rtx,
22837 gen_rtx_MEM (Pmode, sp_reg_rtx));
22838 frame_off = 0;
22839 }
22840 else if (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
22841 && DEFAULT_ABI == ABI_V4)
22842 /* frame_reg_rtx has been set up by the altivec restore. */
22843 ;
22844 else
22845 {
22846 insn = emit_move_insn (sp_reg_rtx, frame_reg_rtx);
22847 frame_reg_rtx = sp_reg_rtx;
22848 }
22849 }
22850 /* If we have a frame pointer, we can restore the old stack pointer
22851 from it. */
22852 else if (frame_pointer_needed)
22853 {
22854 frame_reg_rtx = sp_reg_rtx;
22855 if (DEFAULT_ABI == ABI_V4)
22856 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
22857 /* Prevent reordering memory accesses against stack pointer restore. */
22858 else if (cfun->calls_alloca
22859 || offset_below_red_zone_p (-info->total_size))
22860 rs6000_emit_stack_tie (frame_reg_rtx, true);
22861
22862 insn = emit_insn (gen_add3_insn (frame_reg_rtx, hard_frame_pointer_rtx,
22863 GEN_INT (info->total_size)));
22864 frame_off = 0;
22865 }
22866 else if (info->push_p
22867 && DEFAULT_ABI != ABI_V4
22868 && !crtl->calls_eh_return)
22869 {
22870 /* Prevent reordering memory accesses against stack pointer restore. */
22871 if (cfun->calls_alloca
22872 || offset_below_red_zone_p (-info->total_size))
22873 rs6000_emit_stack_tie (frame_reg_rtx, false);
22874 insn = emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx,
22875 GEN_INT (info->total_size)));
22876 frame_off = 0;
22877 }
22878 if (insn && frame_reg_rtx == sp_reg_rtx)
22879 {
22880 if (cfa_restores)
22881 {
22882 REG_NOTES (insn) = cfa_restores;
22883 cfa_restores = NULL_RTX;
22884 }
22885 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
22886 RTX_FRAME_RELATED_P (insn) = 1;
22887 }
22888
22889 /* Restore AltiVec registers if we have not done so already. */
22890 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
22891 && TARGET_ALTIVEC_ABI
22892 && info->altivec_size != 0
22893 && (DEFAULT_ABI == ABI_V4
22894 || !offset_below_red_zone_p (info->altivec_save_offset)))
22895 {
22896 int i;
22897
22898 if ((strategy & REST_INLINE_VRS) == 0)
22899 {
22900 int end_save = info->altivec_save_offset + info->altivec_size;
22901 int ptr_off;
22902 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
22903 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
22904 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
22905
22906 if (end_save + frame_off != 0)
22907 {
22908 rtx offset = GEN_INT (end_save + frame_off);
22909
22910 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
22911 }
22912 else
22913 emit_move_insn (ptr_reg, frame_reg_rtx);
22914
22915 ptr_off = -end_save;
22916 insn = rs6000_emit_savres_rtx (info, scratch_reg,
22917 info->altivec_save_offset + ptr_off,
22918 0, V4SImode, SAVRES_VR);
22919 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
22920 {
22921 /* Frame reg was clobbered by out-of-line save. Restore it
22922 from ptr_reg, and if we are calling out-of-line gpr or
22923 fpr restore set up the correct pointer and offset. */
22924 unsigned newptr_regno = 1;
22925 if (!restoring_GPRs_inline)
22926 {
22927 bool lr = info->gp_save_offset + info->gp_size == 0;
22928 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
22929 newptr_regno = ptr_regno_for_savres (sel);
22930 end_save = info->gp_save_offset + info->gp_size;
22931 }
22932 else if (!restoring_FPRs_inline)
22933 {
22934 bool lr = !(strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR);
22935 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
22936 newptr_regno = ptr_regno_for_savres (sel);
22937 end_save = info->gp_save_offset + info->gp_size;
22938 }
22939
22940 if (newptr_regno != 1 && REGNO (frame_reg_rtx) != newptr_regno)
22941 frame_reg_rtx = gen_rtx_REG (Pmode, newptr_regno);
22942
22943 if (end_save + ptr_off != 0)
22944 {
22945 rtx offset = GEN_INT (end_save + ptr_off);
22946
22947 frame_off = -end_save;
22948 emit_insn (gen_add3_insn (frame_reg_rtx, ptr_reg, offset));
22949 }
22950 else
22951 {
22952 frame_off = ptr_off;
22953 emit_move_insn (frame_reg_rtx, ptr_reg);
22954 }
22955 }
22956 }
22957 else
22958 {
22959 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
22960 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
22961 {
22962 rtx addr, areg, mem, reg;
22963
22964 areg = gen_rtx_REG (Pmode, 0);
22965 emit_move_insn
22966 (areg, GEN_INT (info->altivec_save_offset
22967 + frame_off
22968 + 16 * (i - info->first_altivec_reg_save)));
22969
22970 /* AltiVec addressing mode is [reg+reg]. */
22971 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
22972 mem = gen_frame_mem (V4SImode, addr);
22973
22974 reg = gen_rtx_REG (V4SImode, i);
22975 emit_move_insn (reg, mem);
22976 }
22977 }
22978
22979 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
22980 if (((strategy & REST_INLINE_VRS) == 0
22981 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
22982 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
22983 {
22984 rtx reg = gen_rtx_REG (V4SImode, i);
22985 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
22986 }
22987 }
22988
22989 /* Restore VRSAVE if we have not done so already. */
22990 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
22991 && TARGET_ALTIVEC
22992 && TARGET_ALTIVEC_VRSAVE
22993 && info->vrsave_mask != 0
22994 && (DEFAULT_ABI == ABI_V4
22995 || !offset_below_red_zone_p (info->vrsave_save_offset)))
22996 {
22997 rtx reg;
22998
22999 reg = gen_rtx_REG (SImode, 12);
23000 emit_insn (gen_frame_load (reg, frame_reg_rtx,
23001 info->vrsave_save_offset + frame_off));
23002
23003 emit_insn (generate_set_vrsave (reg, info, 1));
23004 }
23005
23006 /* If we exit by an out-of-line restore function on ABI_V4 then that
23007 function will deallocate the stack, so we don't need to worry
23008 about the unwinder restoring cr from an invalid stack frame
23009 location. */
23010 exit_func = (!restoring_FPRs_inline
23011 || (!restoring_GPRs_inline
23012 && info->first_fp_reg_save == 64));
23013
23014 /* Get the old lr if we saved it. If we are restoring registers
23015 out-of-line, then the out-of-line routines can do this for us. */
23016 if (restore_lr && restoring_GPRs_inline)
23017 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
23018
23019 /* Get the old cr if we saved it. */
23020 if (info->cr_save_p)
23021 {
23022 unsigned cr_save_regno = 12;
23023
23024 if (!restoring_GPRs_inline)
23025 {
23026 /* Ensure we don't use the register used by the out-of-line
23027 gpr register restore below. */
23028 bool lr = info->gp_save_offset + info->gp_size == 0;
23029 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
23030 int gpr_ptr_regno = ptr_regno_for_savres (sel);
23031
23032 if (gpr_ptr_regno == 12)
23033 cr_save_regno = 11;
23034 gcc_checking_assert (REGNO (frame_reg_rtx) != cr_save_regno);
23035 }
23036 else if (REGNO (frame_reg_rtx) == 12)
23037 cr_save_regno = 11;
23038
23039 cr_save_reg = load_cr_save (cr_save_regno, frame_reg_rtx,
23040 info->cr_save_offset + frame_off,
23041 exit_func);
23042 }
23043
23044 /* Set LR here to try to overlap restores below. */
23045 if (restore_lr && restoring_GPRs_inline)
23046 restore_saved_lr (0, exit_func);
23047
23048 /* Load exception handler data registers, if needed. */
23049 if (crtl->calls_eh_return)
23050 {
23051 unsigned int i, regno;
23052
23053 if (TARGET_AIX)
23054 {
23055 rtx reg = gen_rtx_REG (reg_mode, 2);
23056 emit_insn (gen_frame_load (reg, frame_reg_rtx,
23057 frame_off + 5 * reg_size));
23058 }
23059
23060 for (i = 0; ; ++i)
23061 {
23062 rtx mem;
23063
23064 regno = EH_RETURN_DATA_REGNO (i);
23065 if (regno == INVALID_REGNUM)
23066 break;
23067
23068 /* Note: possible use of r0 here to address SPE regs. */
23069 mem = gen_frame_mem_offset (reg_mode, frame_reg_rtx,
23070 info->ehrd_offset + frame_off
23071 + reg_size * (int) i);
23072
23073 emit_move_insn (gen_rtx_REG (reg_mode, regno), mem);
23074 }
23075 }
23076
23077 /* Restore GPRs. This is done as a PARALLEL if we are using
23078 the load-multiple instructions. */
23079 if (TARGET_SPE_ABI
23080 && info->spe_64bit_regs_used
23081 && info->first_gp_reg_save != 32)
23082 {
23083 /* Determine whether we can address all of the registers that need
23084 to be saved with an offset from frame_reg_rtx that fits in
23085 the small const field for SPE memory instructions. */
23086 int spe_regs_addressable
23087 = (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off
23088 + reg_size * (32 - info->first_gp_reg_save - 1))
23089 && restoring_GPRs_inline);
23090
23091 if (!spe_regs_addressable)
23092 {
23093 int ool_adjust = 0;
23094 rtx old_frame_reg_rtx = frame_reg_rtx;
23095 /* Make r11 point to the start of the SPE save area. We worried about
23096 not clobbering it when we were saving registers in the prologue.
23097 There's no need to worry here because the static chain is passed
23098 anew to every function. */
23099
23100 if (!restoring_GPRs_inline)
23101 ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO);
23102 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
23103 emit_insn (gen_addsi3 (frame_reg_rtx, old_frame_reg_rtx,
23104 GEN_INT (info->spe_gp_save_offset
23105 + frame_off
23106 - ool_adjust)));
23107 /* Keep the invariant that frame_reg_rtx + frame_off points
23108 at the top of the stack frame. */
23109 frame_off = -info->spe_gp_save_offset + ool_adjust;
23110 }
23111
23112 if (restoring_GPRs_inline)
23113 {
23114 HOST_WIDE_INT spe_offset = info->spe_gp_save_offset + frame_off;
23115
23116 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
23117 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
23118 {
23119 rtx offset, addr, mem, reg;
23120
23121 /* We're doing all this to ensure that the immediate offset
23122 fits into the immediate field of 'evldd'. */
23123 gcc_assert (SPE_CONST_OFFSET_OK (spe_offset + reg_size * i));
23124
23125 offset = GEN_INT (spe_offset + reg_size * i);
23126 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, offset);
23127 mem = gen_rtx_MEM (V2SImode, addr);
23128 reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
23129
23130 emit_move_insn (reg, mem);
23131 }
23132 }
23133 else
23134 rs6000_emit_savres_rtx (info, frame_reg_rtx,
23135 info->spe_gp_save_offset + frame_off,
23136 info->lr_save_offset + frame_off,
23137 reg_mode,
23138 SAVRES_GPR | SAVRES_LR);
23139 }
23140 else if (!restoring_GPRs_inline)
23141 {
23142 /* We are jumping to an out-of-line function. */
23143 rtx ptr_reg;
23144 int end_save = info->gp_save_offset + info->gp_size;
23145 bool can_use_exit = end_save == 0;
23146 int sel = SAVRES_GPR | (can_use_exit ? SAVRES_LR : 0);
23147 int ptr_off;
23148
23149 /* Emit stack reset code if we need it. */
23150 ptr_regno = ptr_regno_for_savres (sel);
23151 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
23152 if (can_use_exit)
23153 rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno);
23154 else if (end_save + frame_off != 0)
23155 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx,
23156 GEN_INT (end_save + frame_off)));
23157 else if (REGNO (frame_reg_rtx) != ptr_regno)
23158 emit_move_insn (ptr_reg, frame_reg_rtx);
23159 if (REGNO (frame_reg_rtx) == ptr_regno)
23160 frame_off = -end_save;
23161
23162 if (can_use_exit && info->cr_save_p)
23163 restore_saved_cr (cr_save_reg, using_mtcr_multiple, true);
23164
23165 ptr_off = -end_save;
23166 rs6000_emit_savres_rtx (info, ptr_reg,
23167 info->gp_save_offset + ptr_off,
23168 info->lr_save_offset + ptr_off,
23169 reg_mode, sel);
23170 }
23171 else if (using_load_multiple)
23172 {
23173 rtvec p;
23174 p = rtvec_alloc (32 - info->first_gp_reg_save);
23175 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
23176 RTVEC_ELT (p, i)
23177 = gen_frame_load (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
23178 frame_reg_rtx,
23179 info->gp_save_offset + frame_off + reg_size * i);
23180 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
23181 }
23182 else
23183 {
23184 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
23185 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
23186 emit_insn (gen_frame_load
23187 (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
23188 frame_reg_rtx,
23189 info->gp_save_offset + frame_off + reg_size * i));
23190 }
23191
23192 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
23193 {
23194 /* If the frame pointer was used then we can't delay emitting
23195 a REG_CFA_DEF_CFA note. This must happen on the insn that
23196 restores the frame pointer, r31. We may have already emitted
23197 a REG_CFA_DEF_CFA note, but that's OK; A duplicate is
23198 discarded by dwarf2cfi.c/dwarf2out.c, and in any case would
23199 be harmless if emitted. */
23200 if (frame_pointer_needed)
23201 {
23202 insn = get_last_insn ();
23203 add_reg_note (insn, REG_CFA_DEF_CFA,
23204 plus_constant (Pmode, frame_reg_rtx, frame_off));
23205 RTX_FRAME_RELATED_P (insn) = 1;
23206 }
23207
23208 /* Set up cfa_restores. We always need these when
23209 shrink-wrapping. If not shrink-wrapping then we only need
23210 the cfa_restore when the stack location is no longer valid.
23211 The cfa_restores must be emitted on or before the insn that
23212 invalidates the stack, and of course must not be emitted
23213 before the insn that actually does the restore. The latter
23214 is why it is a bad idea to emit the cfa_restores as a group
23215 on the last instruction here that actually does a restore:
23216 That insn may be reordered with respect to others doing
23217 restores. */
23218 if (flag_shrink_wrap
23219 && !restoring_GPRs_inline
23220 && info->first_fp_reg_save == 64)
23221 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
23222
23223 for (i = info->first_gp_reg_save; i < 32; i++)
23224 if (!restoring_GPRs_inline
23225 || using_load_multiple
23226 || rs6000_reg_live_or_pic_offset_p (i))
23227 {
23228 rtx reg = gen_rtx_REG (reg_mode, i);
23229
23230 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
23231 }
23232 }
23233
23234 if (!restoring_GPRs_inline
23235 && info->first_fp_reg_save == 64)
23236 {
23237 /* We are jumping to an out-of-line function. */
23238 if (cfa_restores)
23239 emit_cfa_restores (cfa_restores);
23240 return;
23241 }
23242
23243 if (restore_lr && !restoring_GPRs_inline)
23244 {
23245 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
23246 restore_saved_lr (0, exit_func);
23247 }
23248
23249 /* Restore fpr's if we need to do it without calling a function. */
23250 if (restoring_FPRs_inline)
23251 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
23252 if (save_reg_p (info->first_fp_reg_save + i))
23253 {
23254 rtx reg = gen_rtx_REG ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
23255 ? DFmode : SFmode),
23256 info->first_fp_reg_save + i);
23257 emit_insn (gen_frame_load (reg, frame_reg_rtx,
23258 info->fp_save_offset + frame_off + 8 * i));
23259 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
23260 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
23261 }
23262
23263 /* If we saved cr, restore it here. Just those that were used. */
23264 if (info->cr_save_p)
23265 restore_saved_cr (cr_save_reg, using_mtcr_multiple, exit_func);
23266
23267 /* If this is V.4, unwind the stack pointer after all of the loads
23268 have been done, or set up r11 if we are restoring fp out of line. */
23269 ptr_regno = 1;
23270 if (!restoring_FPRs_inline)
23271 {
23272 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
23273 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
23274 ptr_regno = ptr_regno_for_savres (sel);
23275 }
23276
23277 insn = rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno);
23278 if (REGNO (frame_reg_rtx) == ptr_regno)
23279 frame_off = 0;
23280
23281 if (insn && restoring_FPRs_inline)
23282 {
23283 if (cfa_restores)
23284 {
23285 REG_NOTES (insn) = cfa_restores;
23286 cfa_restores = NULL_RTX;
23287 }
23288 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
23289 RTX_FRAME_RELATED_P (insn) = 1;
23290 }
23291
23292 if (crtl->calls_eh_return)
23293 {
23294 rtx sa = EH_RETURN_STACKADJ_RTX;
23295 emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx, sa));
23296 }
23297
23298 if (!sibcall)
23299 {
23300 rtvec p;
23301 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
23302 if (! restoring_FPRs_inline)
23303 {
23304 p = rtvec_alloc (4 + 64 - info->first_fp_reg_save);
23305 RTVEC_ELT (p, 0) = ret_rtx;
23306 }
23307 else
23308 {
23309 if (cfa_restores)
23310 {
23311 /* We can't hang the cfa_restores off a simple return,
23312 since the shrink-wrap code sometimes uses an existing
23313 return. This means there might be a path from
23314 pre-prologue code to this return, and dwarf2cfi code
23315 wants the eh_frame unwinder state to be the same on
23316 all paths to any point. So we need to emit the
23317 cfa_restores before the return. For -m64 we really
23318 don't need epilogue cfa_restores at all, except for
23319 this irritating dwarf2cfi with shrink-wrap
23320 requirement; The stack red-zone means eh_frame info
23321 from the prologue telling the unwinder to restore
23322 from the stack is perfectly good right to the end of
23323 the function. */
23324 emit_insn (gen_blockage ());
23325 emit_cfa_restores (cfa_restores);
23326 cfa_restores = NULL_RTX;
23327 }
23328 p = rtvec_alloc (2);
23329 RTVEC_ELT (p, 0) = simple_return_rtx;
23330 }
23331
23332 RTVEC_ELT (p, 1) = ((restoring_FPRs_inline || !lr)
23333 ? gen_rtx_USE (VOIDmode,
23334 gen_rtx_REG (Pmode, LR_REGNO))
23335 : gen_rtx_CLOBBER (VOIDmode,
23336 gen_rtx_REG (Pmode, LR_REGNO)));
23337
23338 /* If we have to restore more than two FP registers, branch to the
23339 restore function. It will return to our caller. */
23340 if (! restoring_FPRs_inline)
23341 {
23342 int i;
23343 rtx sym;
23344
23345 if (flag_shrink_wrap)
23346 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
23347
23348 sym = rs6000_savres_routine_sym (info,
23349 SAVRES_FPR | (lr ? SAVRES_LR : 0));
23350 RTVEC_ELT (p, 2) = gen_rtx_USE (VOIDmode, sym);
23351 RTVEC_ELT (p, 3) = gen_rtx_USE (VOIDmode,
23352 gen_rtx_REG (Pmode,
23353 DEFAULT_ABI == ABI_AIX
23354 ? 1 : 11));
23355 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
23356 {
23357 rtx reg = gen_rtx_REG (DFmode, info->first_fp_reg_save + i);
23358
23359 RTVEC_ELT (p, i + 4)
23360 = gen_frame_load (reg, sp_reg_rtx, info->fp_save_offset + 8 * i);
23361 if (flag_shrink_wrap)
23362 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg,
23363 cfa_restores);
23364 }
23365 }
23366
23367 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
23368 }
23369
23370 if (cfa_restores)
23371 {
23372 if (sibcall)
23373 /* Ensure the cfa_restores are hung off an insn that won't
23374 be reordered above other restores. */
23375 emit_insn (gen_blockage ());
23376
23377 emit_cfa_restores (cfa_restores);
23378 }
23379 }
23380
23381 /* Write function epilogue. */
23382
23383 static void
23384 rs6000_output_function_epilogue (FILE *file,
23385 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
23386 {
23387 #if TARGET_MACHO
23388 macho_branch_islands ();
23389 /* Mach-O doesn't support labels at the end of objects, so if
23390 it looks like we might want one, insert a NOP. */
23391 {
23392 rtx insn = get_last_insn ();
23393 rtx deleted_debug_label = NULL_RTX;
23394 while (insn
23395 && NOTE_P (insn)
23396 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
23397 {
23398 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
23399 notes only, instead set their CODE_LABEL_NUMBER to -1,
23400 otherwise there would be code generation differences
23401 in between -g and -g0. */
23402 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
23403 deleted_debug_label = insn;
23404 insn = PREV_INSN (insn);
23405 }
23406 if (insn
23407 && (LABEL_P (insn)
23408 || (NOTE_P (insn)
23409 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
23410 fputs ("\tnop\n", file);
23411 else if (deleted_debug_label)
23412 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
23413 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
23414 CODE_LABEL_NUMBER (insn) = -1;
23415 }
23416 #endif
23417
23418 /* Output a traceback table here. See /usr/include/sys/debug.h for info
23419 on its format.
23420
23421 We don't output a traceback table if -finhibit-size-directive was
23422 used. The documentation for -finhibit-size-directive reads
23423 ``don't output a @code{.size} assembler directive, or anything
23424 else that would cause trouble if the function is split in the
23425 middle, and the two halves are placed at locations far apart in
23426 memory.'' The traceback table has this property, since it
23427 includes the offset from the start of the function to the
23428 traceback table itself.
23429
23430 System V.4 Powerpc's (and the embedded ABI derived from it) use a
23431 different traceback table. */
23432 if (DEFAULT_ABI == ABI_AIX && ! flag_inhibit_size_directive
23433 && rs6000_traceback != traceback_none && !cfun->is_thunk)
23434 {
23435 const char *fname = NULL;
23436 const char *language_string = lang_hooks.name;
23437 int fixed_parms = 0, float_parms = 0, parm_info = 0;
23438 int i;
23439 int optional_tbtab;
23440 rs6000_stack_t *info = rs6000_stack_info ();
23441
23442 if (rs6000_traceback == traceback_full)
23443 optional_tbtab = 1;
23444 else if (rs6000_traceback == traceback_part)
23445 optional_tbtab = 0;
23446 else
23447 optional_tbtab = !optimize_size && !TARGET_ELF;
23448
23449 if (optional_tbtab)
23450 {
23451 fname = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
23452 while (*fname == '.') /* V.4 encodes . in the name */
23453 fname++;
23454
23455 /* Need label immediately before tbtab, so we can compute
23456 its offset from the function start. */
23457 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
23458 ASM_OUTPUT_LABEL (file, fname);
23459 }
23460
23461 /* The .tbtab pseudo-op can only be used for the first eight
23462 expressions, since it can't handle the possibly variable
23463 length fields that follow. However, if you omit the optional
23464 fields, the assembler outputs zeros for all optional fields
23465 anyways, giving each variable length field is minimum length
23466 (as defined in sys/debug.h). Thus we can not use the .tbtab
23467 pseudo-op at all. */
23468
23469 /* An all-zero word flags the start of the tbtab, for debuggers
23470 that have to find it by searching forward from the entry
23471 point or from the current pc. */
23472 fputs ("\t.long 0\n", file);
23473
23474 /* Tbtab format type. Use format type 0. */
23475 fputs ("\t.byte 0,", file);
23476
23477 /* Language type. Unfortunately, there does not seem to be any
23478 official way to discover the language being compiled, so we
23479 use language_string.
23480 C is 0. Fortran is 1. Pascal is 2. Ada is 3. C++ is 9.
23481 Java is 13. Objective-C is 14. Objective-C++ isn't assigned
23482 a number, so for now use 9. LTO and Go aren't assigned numbers
23483 either, so for now use 0. */
23484 if (! strcmp (language_string, "GNU C")
23485 || ! strcmp (language_string, "GNU GIMPLE")
23486 || ! strcmp (language_string, "GNU Go"))
23487 i = 0;
23488 else if (! strcmp (language_string, "GNU F77")
23489 || ! strcmp (language_string, "GNU Fortran"))
23490 i = 1;
23491 else if (! strcmp (language_string, "GNU Pascal"))
23492 i = 2;
23493 else if (! strcmp (language_string, "GNU Ada"))
23494 i = 3;
23495 else if (! strcmp (language_string, "GNU C++")
23496 || ! strcmp (language_string, "GNU Objective-C++"))
23497 i = 9;
23498 else if (! strcmp (language_string, "GNU Java"))
23499 i = 13;
23500 else if (! strcmp (language_string, "GNU Objective-C"))
23501 i = 14;
23502 else
23503 gcc_unreachable ();
23504 fprintf (file, "%d,", i);
23505
23506 /* 8 single bit fields: global linkage (not set for C extern linkage,
23507 apparently a PL/I convention?), out-of-line epilogue/prologue, offset
23508 from start of procedure stored in tbtab, internal function, function
23509 has controlled storage, function has no toc, function uses fp,
23510 function logs/aborts fp operations. */
23511 /* Assume that fp operations are used if any fp reg must be saved. */
23512 fprintf (file, "%d,",
23513 (optional_tbtab << 5) | ((info->first_fp_reg_save != 64) << 1));
23514
23515 /* 6 bitfields: function is interrupt handler, name present in
23516 proc table, function calls alloca, on condition directives
23517 (controls stack walks, 3 bits), saves condition reg, saves
23518 link reg. */
23519 /* The `function calls alloca' bit seems to be set whenever reg 31 is
23520 set up as a frame pointer, even when there is no alloca call. */
23521 fprintf (file, "%d,",
23522 ((optional_tbtab << 6)
23523 | ((optional_tbtab & frame_pointer_needed) << 5)
23524 | (info->cr_save_p << 1)
23525 | (info->lr_save_p)));
23526
23527 /* 3 bitfields: saves backchain, fixup code, number of fpr saved
23528 (6 bits). */
23529 fprintf (file, "%d,",
23530 (info->push_p << 7) | (64 - info->first_fp_reg_save));
23531
23532 /* 2 bitfields: spare bits (2 bits), number of gpr saved (6 bits). */
23533 fprintf (file, "%d,", (32 - first_reg_to_save ()));
23534
23535 if (optional_tbtab)
23536 {
23537 /* Compute the parameter info from the function decl argument
23538 list. */
23539 tree decl;
23540 int next_parm_info_bit = 31;
23541
23542 for (decl = DECL_ARGUMENTS (current_function_decl);
23543 decl; decl = DECL_CHAIN (decl))
23544 {
23545 rtx parameter = DECL_INCOMING_RTL (decl);
23546 enum machine_mode mode = GET_MODE (parameter);
23547
23548 if (GET_CODE (parameter) == REG)
23549 {
23550 if (SCALAR_FLOAT_MODE_P (mode))
23551 {
23552 int bits;
23553
23554 float_parms++;
23555
23556 switch (mode)
23557 {
23558 case SFmode:
23559 case SDmode:
23560 bits = 0x2;
23561 break;
23562
23563 case DFmode:
23564 case DDmode:
23565 case TFmode:
23566 case TDmode:
23567 bits = 0x3;
23568 break;
23569
23570 default:
23571 gcc_unreachable ();
23572 }
23573
23574 /* If only one bit will fit, don't or in this entry. */
23575 if (next_parm_info_bit > 0)
23576 parm_info |= (bits << (next_parm_info_bit - 1));
23577 next_parm_info_bit -= 2;
23578 }
23579 else
23580 {
23581 fixed_parms += ((GET_MODE_SIZE (mode)
23582 + (UNITS_PER_WORD - 1))
23583 / UNITS_PER_WORD);
23584 next_parm_info_bit -= 1;
23585 }
23586 }
23587 }
23588 }
23589
23590 /* Number of fixed point parameters. */
23591 /* This is actually the number of words of fixed point parameters; thus
23592 an 8 byte struct counts as 2; and thus the maximum value is 8. */
23593 fprintf (file, "%d,", fixed_parms);
23594
23595 /* 2 bitfields: number of floating point parameters (7 bits), parameters
23596 all on stack. */
23597 /* This is actually the number of fp registers that hold parameters;
23598 and thus the maximum value is 13. */
23599 /* Set parameters on stack bit if parameters are not in their original
23600 registers, regardless of whether they are on the stack? Xlc
23601 seems to set the bit when not optimizing. */
23602 fprintf (file, "%d\n", ((float_parms << 1) | (! optimize)));
23603
23604 if (! optional_tbtab)
23605 return;
23606
23607 /* Optional fields follow. Some are variable length. */
23608
23609 /* Parameter types, left adjusted bit fields: 0 fixed, 10 single float,
23610 11 double float. */
23611 /* There is an entry for each parameter in a register, in the order that
23612 they occur in the parameter list. Any intervening arguments on the
23613 stack are ignored. If the list overflows a long (max possible length
23614 34 bits) then completely leave off all elements that don't fit. */
23615 /* Only emit this long if there was at least one parameter. */
23616 if (fixed_parms || float_parms)
23617 fprintf (file, "\t.long %d\n", parm_info);
23618
23619 /* Offset from start of code to tb table. */
23620 fputs ("\t.long ", file);
23621 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
23622 RS6000_OUTPUT_BASENAME (file, fname);
23623 putc ('-', file);
23624 rs6000_output_function_entry (file, fname);
23625 putc ('\n', file);
23626
23627 /* Interrupt handler mask. */
23628 /* Omit this long, since we never set the interrupt handler bit
23629 above. */
23630
23631 /* Number of CTL (controlled storage) anchors. */
23632 /* Omit this long, since the has_ctl bit is never set above. */
23633
23634 /* Displacement into stack of each CTL anchor. */
23635 /* Omit this list of longs, because there are no CTL anchors. */
23636
23637 /* Length of function name. */
23638 if (*fname == '*')
23639 ++fname;
23640 fprintf (file, "\t.short %d\n", (int) strlen (fname));
23641
23642 /* Function name. */
23643 assemble_string (fname, strlen (fname));
23644
23645 /* Register for alloca automatic storage; this is always reg 31.
23646 Only emit this if the alloca bit was set above. */
23647 if (frame_pointer_needed)
23648 fputs ("\t.byte 31\n", file);
23649
23650 fputs ("\t.align 2\n", file);
23651 }
23652 }
23653 \f
23654 /* A C compound statement that outputs the assembler code for a thunk
23655 function, used to implement C++ virtual function calls with
23656 multiple inheritance. The thunk acts as a wrapper around a virtual
23657 function, adjusting the implicit object parameter before handing
23658 control off to the real function.
23659
23660 First, emit code to add the integer DELTA to the location that
23661 contains the incoming first argument. Assume that this argument
23662 contains a pointer, and is the one used to pass the `this' pointer
23663 in C++. This is the incoming argument *before* the function
23664 prologue, e.g. `%o0' on a sparc. The addition must preserve the
23665 values of all other incoming arguments.
23666
23667 After the addition, emit code to jump to FUNCTION, which is a
23668 `FUNCTION_DECL'. This is a direct pure jump, not a call, and does
23669 not touch the return address. Hence returning from FUNCTION will
23670 return to whoever called the current `thunk'.
23671
23672 The effect must be as if FUNCTION had been called directly with the
23673 adjusted first argument. This macro is responsible for emitting
23674 all of the code for a thunk function; output_function_prologue()
23675 and output_function_epilogue() are not invoked.
23676
23677 The THUNK_FNDECL is redundant. (DELTA and FUNCTION have already
23678 been extracted from it.) It might possibly be useful on some
23679 targets, but probably not.
23680
23681 If you do not define this macro, the target-independent code in the
23682 C++ frontend will generate a less efficient heavyweight thunk that
23683 calls FUNCTION instead of jumping to it. The generic approach does
23684 not support varargs. */
23685
23686 static void
23687 rs6000_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
23688 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
23689 tree function)
23690 {
23691 rtx this_rtx, insn, funexp;
23692
23693 reload_completed = 1;
23694 epilogue_completed = 1;
23695
23696 /* Mark the end of the (empty) prologue. */
23697 emit_note (NOTE_INSN_PROLOGUE_END);
23698
23699 /* Find the "this" pointer. If the function returns a structure,
23700 the structure return pointer is in r3. */
23701 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
23702 this_rtx = gen_rtx_REG (Pmode, 4);
23703 else
23704 this_rtx = gen_rtx_REG (Pmode, 3);
23705
23706 /* Apply the constant offset, if required. */
23707 if (delta)
23708 emit_insn (gen_add3_insn (this_rtx, this_rtx, GEN_INT (delta)));
23709
23710 /* Apply the offset from the vtable, if required. */
23711 if (vcall_offset)
23712 {
23713 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
23714 rtx tmp = gen_rtx_REG (Pmode, 12);
23715
23716 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
23717 if (((unsigned HOST_WIDE_INT) vcall_offset) + 0x8000 >= 0x10000)
23718 {
23719 emit_insn (gen_add3_insn (tmp, tmp, vcall_offset_rtx));
23720 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
23721 }
23722 else
23723 {
23724 rtx loc = gen_rtx_PLUS (Pmode, tmp, vcall_offset_rtx);
23725
23726 emit_move_insn (tmp, gen_rtx_MEM (Pmode, loc));
23727 }
23728 emit_insn (gen_add3_insn (this_rtx, this_rtx, tmp));
23729 }
23730
23731 /* Generate a tail call to the target function. */
23732 if (!TREE_USED (function))
23733 {
23734 assemble_external (function);
23735 TREE_USED (function) = 1;
23736 }
23737 funexp = XEXP (DECL_RTL (function), 0);
23738 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
23739
23740 #if TARGET_MACHO
23741 if (MACHOPIC_INDIRECT)
23742 funexp = machopic_indirect_call_target (funexp);
23743 #endif
23744
23745 /* gen_sibcall expects reload to convert scratch pseudo to LR so we must
23746 generate sibcall RTL explicitly. */
23747 insn = emit_call_insn (
23748 gen_rtx_PARALLEL (VOIDmode,
23749 gen_rtvec (4,
23750 gen_rtx_CALL (VOIDmode,
23751 funexp, const0_rtx),
23752 gen_rtx_USE (VOIDmode, const0_rtx),
23753 gen_rtx_USE (VOIDmode,
23754 gen_rtx_REG (SImode,
23755 LR_REGNO)),
23756 simple_return_rtx)));
23757 SIBLING_CALL_P (insn) = 1;
23758 emit_barrier ();
23759
23760 /* Run just enough of rest_of_compilation to get the insns emitted.
23761 There's not really enough bulk here to make other passes such as
23762 instruction scheduling worth while. Note that use_thunk calls
23763 assemble_start_function and assemble_end_function. */
23764 insn = get_insns ();
23765 shorten_branches (insn);
23766 final_start_function (insn, file, 1);
23767 final (insn, file, 1);
23768 final_end_function ();
23769
23770 reload_completed = 0;
23771 epilogue_completed = 0;
23772 }
23773 \f
23774 /* A quick summary of the various types of 'constant-pool tables'
23775 under PowerPC:
23776
23777 Target Flags Name One table per
23778 AIX (none) AIX TOC object file
23779 AIX -mfull-toc AIX TOC object file
23780 AIX -mminimal-toc AIX minimal TOC translation unit
23781 SVR4/EABI (none) SVR4 SDATA object file
23782 SVR4/EABI -fpic SVR4 pic object file
23783 SVR4/EABI -fPIC SVR4 PIC translation unit
23784 SVR4/EABI -mrelocatable EABI TOC function
23785 SVR4/EABI -maix AIX TOC object file
23786 SVR4/EABI -maix -mminimal-toc
23787 AIX minimal TOC translation unit
23788
23789 Name Reg. Set by entries contains:
23790 made by addrs? fp? sum?
23791
23792 AIX TOC 2 crt0 as Y option option
23793 AIX minimal TOC 30 prolog gcc Y Y option
23794 SVR4 SDATA 13 crt0 gcc N Y N
23795 SVR4 pic 30 prolog ld Y not yet N
23796 SVR4 PIC 30 prolog gcc Y option option
23797 EABI TOC 30 prolog gcc Y option option
23798
23799 */
23800
23801 /* Hash functions for the hash table. */
23802
23803 static unsigned
23804 rs6000_hash_constant (rtx k)
23805 {
23806 enum rtx_code code = GET_CODE (k);
23807 enum machine_mode mode = GET_MODE (k);
23808 unsigned result = (code << 3) ^ mode;
23809 const char *format;
23810 int flen, fidx;
23811
23812 format = GET_RTX_FORMAT (code);
23813 flen = strlen (format);
23814 fidx = 0;
23815
23816 switch (code)
23817 {
23818 case LABEL_REF:
23819 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
23820
23821 case CONST_DOUBLE:
23822 if (mode != VOIDmode)
23823 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
23824 flen = 2;
23825 break;
23826
23827 case CODE_LABEL:
23828 fidx = 3;
23829 break;
23830
23831 default:
23832 break;
23833 }
23834
23835 for (; fidx < flen; fidx++)
23836 switch (format[fidx])
23837 {
23838 case 's':
23839 {
23840 unsigned i, len;
23841 const char *str = XSTR (k, fidx);
23842 len = strlen (str);
23843 result = result * 613 + len;
23844 for (i = 0; i < len; i++)
23845 result = result * 613 + (unsigned) str[i];
23846 break;
23847 }
23848 case 'u':
23849 case 'e':
23850 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
23851 break;
23852 case 'i':
23853 case 'n':
23854 result = result * 613 + (unsigned) XINT (k, fidx);
23855 break;
23856 case 'w':
23857 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
23858 result = result * 613 + (unsigned) XWINT (k, fidx);
23859 else
23860 {
23861 size_t i;
23862 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
23863 result = result * 613 + (unsigned) (XWINT (k, fidx)
23864 >> CHAR_BIT * i);
23865 }
23866 break;
23867 case '0':
23868 break;
23869 default:
23870 gcc_unreachable ();
23871 }
23872
23873 return result;
23874 }
23875
23876 static unsigned
23877 toc_hash_function (const void *hash_entry)
23878 {
23879 const struct toc_hash_struct *thc =
23880 (const struct toc_hash_struct *) hash_entry;
23881 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
23882 }
23883
23884 /* Compare H1 and H2 for equivalence. */
23885
23886 static int
23887 toc_hash_eq (const void *h1, const void *h2)
23888 {
23889 rtx r1 = ((const struct toc_hash_struct *) h1)->key;
23890 rtx r2 = ((const struct toc_hash_struct *) h2)->key;
23891
23892 if (((const struct toc_hash_struct *) h1)->key_mode
23893 != ((const struct toc_hash_struct *) h2)->key_mode)
23894 return 0;
23895
23896 return rtx_equal_p (r1, r2);
23897 }
23898
23899 /* These are the names given by the C++ front-end to vtables, and
23900 vtable-like objects. Ideally, this logic should not be here;
23901 instead, there should be some programmatic way of inquiring as
23902 to whether or not an object is a vtable. */
23903
23904 #define VTABLE_NAME_P(NAME) \
23905 (strncmp ("_vt.", name, strlen ("_vt.")) == 0 \
23906 || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0 \
23907 || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0 \
23908 || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0 \
23909 || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0)
23910
23911 #ifdef NO_DOLLAR_IN_LABEL
23912 /* Return a GGC-allocated character string translating dollar signs in
23913 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
23914
23915 const char *
23916 rs6000_xcoff_strip_dollar (const char *name)
23917 {
23918 char *strip, *p;
23919 const char *q;
23920 size_t len;
23921
23922 q = (const char *) strchr (name, '$');
23923
23924 if (q == 0 || q == name)
23925 return name;
23926
23927 len = strlen (name);
23928 strip = XALLOCAVEC (char, len + 1);
23929 strcpy (strip, name);
23930 p = strip + (q - name);
23931 while (p)
23932 {
23933 *p = '_';
23934 p = strchr (p + 1, '$');
23935 }
23936
23937 return ggc_alloc_string (strip, len);
23938 }
23939 #endif
23940
23941 void
23942 rs6000_output_symbol_ref (FILE *file, rtx x)
23943 {
23944 /* Currently C++ toc references to vtables can be emitted before it
23945 is decided whether the vtable is public or private. If this is
23946 the case, then the linker will eventually complain that there is
23947 a reference to an unknown section. Thus, for vtables only,
23948 we emit the TOC reference to reference the symbol and not the
23949 section. */
23950 const char *name = XSTR (x, 0);
23951
23952 if (VTABLE_NAME_P (name))
23953 {
23954 RS6000_OUTPUT_BASENAME (file, name);
23955 }
23956 else
23957 assemble_name (file, name);
23958 }
23959
23960 /* Output a TOC entry. We derive the entry name from what is being
23961 written. */
23962
23963 void
23964 output_toc (FILE *file, rtx x, int labelno, enum machine_mode mode)
23965 {
23966 char buf[256];
23967 const char *name = buf;
23968 rtx base = x;
23969 HOST_WIDE_INT offset = 0;
23970
23971 gcc_assert (!TARGET_NO_TOC);
23972
23973 /* When the linker won't eliminate them, don't output duplicate
23974 TOC entries (this happens on AIX if there is any kind of TOC,
23975 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
23976 CODE_LABELs. */
23977 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
23978 {
23979 struct toc_hash_struct *h;
23980 void * * found;
23981
23982 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
23983 time because GGC is not initialized at that point. */
23984 if (toc_hash_table == NULL)
23985 toc_hash_table = htab_create_ggc (1021, toc_hash_function,
23986 toc_hash_eq, NULL);
23987
23988 h = ggc_alloc_toc_hash_struct ();
23989 h->key = x;
23990 h->key_mode = mode;
23991 h->labelno = labelno;
23992
23993 found = htab_find_slot (toc_hash_table, h, INSERT);
23994 if (*found == NULL)
23995 *found = h;
23996 else /* This is indeed a duplicate.
23997 Set this label equal to that label. */
23998 {
23999 fputs ("\t.set ", file);
24000 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
24001 fprintf (file, "%d,", labelno);
24002 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
24003 fprintf (file, "%d\n", ((*(const struct toc_hash_struct **)
24004 found)->labelno));
24005
24006 #ifdef HAVE_AS_TLS
24007 if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF
24008 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
24009 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
24010 {
24011 fputs ("\t.set ", file);
24012 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
24013 fprintf (file, "%d,", labelno);
24014 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
24015 fprintf (file, "%d\n", ((*(const struct toc_hash_struct **)
24016 found)->labelno));
24017 }
24018 #endif
24019 return;
24020 }
24021 }
24022
24023 /* If we're going to put a double constant in the TOC, make sure it's
24024 aligned properly when strict alignment is on. */
24025 if (GET_CODE (x) == CONST_DOUBLE
24026 && STRICT_ALIGNMENT
24027 && GET_MODE_BITSIZE (mode) >= 64
24028 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
24029 ASM_OUTPUT_ALIGN (file, 3);
24030 }
24031
24032 (*targetm.asm_out.internal_label) (file, "LC", labelno);
24033
24034 /* Handle FP constants specially. Note that if we have a minimal
24035 TOC, things we put here aren't actually in the TOC, so we can allow
24036 FP constants. */
24037 if (GET_CODE (x) == CONST_DOUBLE &&
24038 (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode))
24039 {
24040 REAL_VALUE_TYPE rv;
24041 long k[4];
24042
24043 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
24044 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
24045 REAL_VALUE_TO_TARGET_DECIMAL128 (rv, k);
24046 else
24047 REAL_VALUE_TO_TARGET_LONG_DOUBLE (rv, k);
24048
24049 if (TARGET_64BIT)
24050 {
24051 if (TARGET_ELF || TARGET_MINIMAL_TOC)
24052 fputs (DOUBLE_INT_ASM_OP, file);
24053 else
24054 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
24055 k[0] & 0xffffffff, k[1] & 0xffffffff,
24056 k[2] & 0xffffffff, k[3] & 0xffffffff);
24057 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
24058 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
24059 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
24060 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
24061 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
24062 return;
24063 }
24064 else
24065 {
24066 if (TARGET_ELF || TARGET_MINIMAL_TOC)
24067 fputs ("\t.long ", file);
24068 else
24069 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
24070 k[0] & 0xffffffff, k[1] & 0xffffffff,
24071 k[2] & 0xffffffff, k[3] & 0xffffffff);
24072 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
24073 k[0] & 0xffffffff, k[1] & 0xffffffff,
24074 k[2] & 0xffffffff, k[3] & 0xffffffff);
24075 return;
24076 }
24077 }
24078 else if (GET_CODE (x) == CONST_DOUBLE &&
24079 (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
24080 {
24081 REAL_VALUE_TYPE rv;
24082 long k[2];
24083
24084 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
24085
24086 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
24087 REAL_VALUE_TO_TARGET_DECIMAL64 (rv, k);
24088 else
24089 REAL_VALUE_TO_TARGET_DOUBLE (rv, k);
24090
24091 if (TARGET_64BIT)
24092 {
24093 if (TARGET_ELF || TARGET_MINIMAL_TOC)
24094 fputs (DOUBLE_INT_ASM_OP, file);
24095 else
24096 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
24097 k[0] & 0xffffffff, k[1] & 0xffffffff);
24098 fprintf (file, "0x%lx%08lx\n",
24099 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
24100 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
24101 return;
24102 }
24103 else
24104 {
24105 if (TARGET_ELF || TARGET_MINIMAL_TOC)
24106 fputs ("\t.long ", file);
24107 else
24108 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
24109 k[0] & 0xffffffff, k[1] & 0xffffffff);
24110 fprintf (file, "0x%lx,0x%lx\n",
24111 k[0] & 0xffffffff, k[1] & 0xffffffff);
24112 return;
24113 }
24114 }
24115 else if (GET_CODE (x) == CONST_DOUBLE &&
24116 (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
24117 {
24118 REAL_VALUE_TYPE rv;
24119 long l;
24120
24121 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
24122 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
24123 REAL_VALUE_TO_TARGET_DECIMAL32 (rv, l);
24124 else
24125 REAL_VALUE_TO_TARGET_SINGLE (rv, l);
24126
24127 if (TARGET_64BIT)
24128 {
24129 if (TARGET_ELF || TARGET_MINIMAL_TOC)
24130 fputs (DOUBLE_INT_ASM_OP, file);
24131 else
24132 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
24133 if (WORDS_BIG_ENDIAN)
24134 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
24135 else
24136 fprintf (file, "0x%lx\n", l & 0xffffffff);
24137 return;
24138 }
24139 else
24140 {
24141 if (TARGET_ELF || TARGET_MINIMAL_TOC)
24142 fputs ("\t.long ", file);
24143 else
24144 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
24145 fprintf (file, "0x%lx\n", l & 0xffffffff);
24146 return;
24147 }
24148 }
24149 else if (GET_MODE (x) == VOIDmode && GET_CODE (x) == CONST_INT)
24150 {
24151 unsigned HOST_WIDE_INT low;
24152 HOST_WIDE_INT high;
24153
24154 low = INTVAL (x) & 0xffffffff;
24155 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
24156
24157 /* TOC entries are always Pmode-sized, so when big-endian
24158 smaller integer constants in the TOC need to be padded.
24159 (This is still a win over putting the constants in
24160 a separate constant pool, because then we'd have
24161 to have both a TOC entry _and_ the actual constant.)
24162
24163 For a 32-bit target, CONST_INT values are loaded and shifted
24164 entirely within `low' and can be stored in one TOC entry. */
24165
24166 /* It would be easy to make this work, but it doesn't now. */
24167 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
24168
24169 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
24170 {
24171 low |= high << 32;
24172 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
24173 high = (HOST_WIDE_INT) low >> 32;
24174 low &= 0xffffffff;
24175 }
24176
24177 if (TARGET_64BIT)
24178 {
24179 if (TARGET_ELF || TARGET_MINIMAL_TOC)
24180 fputs (DOUBLE_INT_ASM_OP, file);
24181 else
24182 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
24183 (long) high & 0xffffffff, (long) low & 0xffffffff);
24184 fprintf (file, "0x%lx%08lx\n",
24185 (long) high & 0xffffffff, (long) low & 0xffffffff);
24186 return;
24187 }
24188 else
24189 {
24190 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
24191 {
24192 if (TARGET_ELF || TARGET_MINIMAL_TOC)
24193 fputs ("\t.long ", file);
24194 else
24195 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
24196 (long) high & 0xffffffff, (long) low & 0xffffffff);
24197 fprintf (file, "0x%lx,0x%lx\n",
24198 (long) high & 0xffffffff, (long) low & 0xffffffff);
24199 }
24200 else
24201 {
24202 if (TARGET_ELF || TARGET_MINIMAL_TOC)
24203 fputs ("\t.long ", file);
24204 else
24205 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
24206 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
24207 }
24208 return;
24209 }
24210 }
24211
24212 if (GET_CODE (x) == CONST)
24213 {
24214 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
24215 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT);
24216
24217 base = XEXP (XEXP (x, 0), 0);
24218 offset = INTVAL (XEXP (XEXP (x, 0), 1));
24219 }
24220
24221 switch (GET_CODE (base))
24222 {
24223 case SYMBOL_REF:
24224 name = XSTR (base, 0);
24225 break;
24226
24227 case LABEL_REF:
24228 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
24229 CODE_LABEL_NUMBER (XEXP (base, 0)));
24230 break;
24231
24232 case CODE_LABEL:
24233 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
24234 break;
24235
24236 default:
24237 gcc_unreachable ();
24238 }
24239
24240 if (TARGET_ELF || TARGET_MINIMAL_TOC)
24241 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
24242 else
24243 {
24244 fputs ("\t.tc ", file);
24245 RS6000_OUTPUT_BASENAME (file, name);
24246
24247 if (offset < 0)
24248 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
24249 else if (offset)
24250 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
24251
24252 /* Mark large TOC symbols on AIX with [TE] so they are mapped
24253 after other TOC symbols, reducing overflow of small TOC access
24254 to [TC] symbols. */
24255 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
24256 ? "[TE]," : "[TC],", file);
24257 }
24258
24259 /* Currently C++ toc references to vtables can be emitted before it
24260 is decided whether the vtable is public or private. If this is
24261 the case, then the linker will eventually complain that there is
24262 a TOC reference to an unknown section. Thus, for vtables only,
24263 we emit the TOC reference to reference the symbol and not the
24264 section. */
24265 if (VTABLE_NAME_P (name))
24266 {
24267 RS6000_OUTPUT_BASENAME (file, name);
24268 if (offset < 0)
24269 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
24270 else if (offset > 0)
24271 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
24272 }
24273 else
24274 output_addr_const (file, x);
24275
24276 #if HAVE_AS_TLS
24277 if (TARGET_XCOFF && GET_CODE (base) == SYMBOL_REF
24278 && SYMBOL_REF_TLS_MODEL (base) != 0)
24279 {
24280 if (SYMBOL_REF_TLS_MODEL (base) == TLS_MODEL_LOCAL_EXEC)
24281 fputs ("@le", file);
24282 else if (SYMBOL_REF_TLS_MODEL (base) == TLS_MODEL_INITIAL_EXEC)
24283 fputs ("@ie", file);
24284 /* Use global-dynamic for local-dynamic. */
24285 else if (SYMBOL_REF_TLS_MODEL (base) == TLS_MODEL_GLOBAL_DYNAMIC
24286 || SYMBOL_REF_TLS_MODEL (base) == TLS_MODEL_LOCAL_DYNAMIC)
24287 {
24288 putc ('\n', file);
24289 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
24290 fputs ("\t.tc .", file);
24291 RS6000_OUTPUT_BASENAME (file, name);
24292 fputs ("[TC],", file);
24293 output_addr_const (file, x);
24294 fputs ("@m", file);
24295 }
24296 }
24297 #endif
24298
24299 putc ('\n', file);
24300 }
24301 \f
24302 /* Output an assembler pseudo-op to write an ASCII string of N characters
24303 starting at P to FILE.
24304
24305 On the RS/6000, we have to do this using the .byte operation and
24306 write out special characters outside the quoted string.
24307 Also, the assembler is broken; very long strings are truncated,
24308 so we must artificially break them up early. */
24309
24310 void
24311 output_ascii (FILE *file, const char *p, int n)
24312 {
24313 char c;
24314 int i, count_string;
24315 const char *for_string = "\t.byte \"";
24316 const char *for_decimal = "\t.byte ";
24317 const char *to_close = NULL;
24318
24319 count_string = 0;
24320 for (i = 0; i < n; i++)
24321 {
24322 c = *p++;
24323 if (c >= ' ' && c < 0177)
24324 {
24325 if (for_string)
24326 fputs (for_string, file);
24327 putc (c, file);
24328
24329 /* Write two quotes to get one. */
24330 if (c == '"')
24331 {
24332 putc (c, file);
24333 ++count_string;
24334 }
24335
24336 for_string = NULL;
24337 for_decimal = "\"\n\t.byte ";
24338 to_close = "\"\n";
24339 ++count_string;
24340
24341 if (count_string >= 512)
24342 {
24343 fputs (to_close, file);
24344
24345 for_string = "\t.byte \"";
24346 for_decimal = "\t.byte ";
24347 to_close = NULL;
24348 count_string = 0;
24349 }
24350 }
24351 else
24352 {
24353 if (for_decimal)
24354 fputs (for_decimal, file);
24355 fprintf (file, "%d", c);
24356
24357 for_string = "\n\t.byte \"";
24358 for_decimal = ", ";
24359 to_close = "\n";
24360 count_string = 0;
24361 }
24362 }
24363
24364 /* Now close the string if we have written one. Then end the line. */
24365 if (to_close)
24366 fputs (to_close, file);
24367 }
24368 \f
24369 /* Generate a unique section name for FILENAME for a section type
24370 represented by SECTION_DESC. Output goes into BUF.
24371
24372 SECTION_DESC can be any string, as long as it is different for each
24373 possible section type.
24374
24375 We name the section in the same manner as xlc. The name begins with an
24376 underscore followed by the filename (after stripping any leading directory
24377 names) with the last period replaced by the string SECTION_DESC. If
24378 FILENAME does not contain a period, SECTION_DESC is appended to the end of
24379 the name. */
24380
24381 void
24382 rs6000_gen_section_name (char **buf, const char *filename,
24383 const char *section_desc)
24384 {
24385 const char *q, *after_last_slash, *last_period = 0;
24386 char *p;
24387 int len;
24388
24389 after_last_slash = filename;
24390 for (q = filename; *q; q++)
24391 {
24392 if (*q == '/')
24393 after_last_slash = q + 1;
24394 else if (*q == '.')
24395 last_period = q;
24396 }
24397
24398 len = strlen (after_last_slash) + strlen (section_desc) + 2;
24399 *buf = (char *) xmalloc (len);
24400
24401 p = *buf;
24402 *p++ = '_';
24403
24404 for (q = after_last_slash; *q; q++)
24405 {
24406 if (q == last_period)
24407 {
24408 strcpy (p, section_desc);
24409 p += strlen (section_desc);
24410 break;
24411 }
24412
24413 else if (ISALNUM (*q))
24414 *p++ = *q;
24415 }
24416
24417 if (last_period == 0)
24418 strcpy (p, section_desc);
24419 else
24420 *p = '\0';
24421 }
24422 \f
24423 /* Emit profile function. */
24424
24425 void
24426 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
24427 {
24428 /* Non-standard profiling for kernels, which just saves LR then calls
24429 _mcount without worrying about arg saves. The idea is to change
24430 the function prologue as little as possible as it isn't easy to
24431 account for arg save/restore code added just for _mcount. */
24432 if (TARGET_PROFILE_KERNEL)
24433 return;
24434
24435 if (DEFAULT_ABI == ABI_AIX)
24436 {
24437 #ifndef NO_PROFILE_COUNTERS
24438 # define NO_PROFILE_COUNTERS 0
24439 #endif
24440 if (NO_PROFILE_COUNTERS)
24441 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
24442 LCT_NORMAL, VOIDmode, 0);
24443 else
24444 {
24445 char buf[30];
24446 const char *label_name;
24447 rtx fun;
24448
24449 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
24450 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
24451 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
24452
24453 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
24454 LCT_NORMAL, VOIDmode, 1, fun, Pmode);
24455 }
24456 }
24457 else if (DEFAULT_ABI == ABI_DARWIN)
24458 {
24459 const char *mcount_name = RS6000_MCOUNT;
24460 int caller_addr_regno = LR_REGNO;
24461
24462 /* Be conservative and always set this, at least for now. */
24463 crtl->uses_pic_offset_table = 1;
24464
24465 #if TARGET_MACHO
24466 /* For PIC code, set up a stub and collect the caller's address
24467 from r0, which is where the prologue puts it. */
24468 if (MACHOPIC_INDIRECT
24469 && crtl->uses_pic_offset_table)
24470 caller_addr_regno = 0;
24471 #endif
24472 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
24473 LCT_NORMAL, VOIDmode, 1,
24474 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
24475 }
24476 }
24477
24478 /* Write function profiler code. */
24479
24480 void
24481 output_function_profiler (FILE *file, int labelno)
24482 {
24483 char buf[100];
24484
24485 switch (DEFAULT_ABI)
24486 {
24487 default:
24488 gcc_unreachable ();
24489
24490 case ABI_V4:
24491 if (!TARGET_32BIT)
24492 {
24493 warning (0, "no profiling of 64-bit code for this ABI");
24494 return;
24495 }
24496 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
24497 fprintf (file, "\tmflr %s\n", reg_names[0]);
24498 if (NO_PROFILE_COUNTERS)
24499 {
24500 asm_fprintf (file, "\tstw %s,4(%s)\n",
24501 reg_names[0], reg_names[1]);
24502 }
24503 else if (TARGET_SECURE_PLT && flag_pic)
24504 {
24505 if (TARGET_LINK_STACK)
24506 {
24507 char name[32];
24508 get_ppc476_thunk_name (name);
24509 asm_fprintf (file, "\tbl %s\n", name);
24510 }
24511 else
24512 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
24513 asm_fprintf (file, "\tstw %s,4(%s)\n",
24514 reg_names[0], reg_names[1]);
24515 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
24516 asm_fprintf (file, "\taddis %s,%s,",
24517 reg_names[12], reg_names[12]);
24518 assemble_name (file, buf);
24519 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
24520 assemble_name (file, buf);
24521 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
24522 }
24523 else if (flag_pic == 1)
24524 {
24525 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
24526 asm_fprintf (file, "\tstw %s,4(%s)\n",
24527 reg_names[0], reg_names[1]);
24528 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
24529 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
24530 assemble_name (file, buf);
24531 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
24532 }
24533 else if (flag_pic > 1)
24534 {
24535 asm_fprintf (file, "\tstw %s,4(%s)\n",
24536 reg_names[0], reg_names[1]);
24537 /* Now, we need to get the address of the label. */
24538 if (TARGET_LINK_STACK)
24539 {
24540 char name[32];
24541 get_ppc476_thunk_name (name);
24542 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
24543 assemble_name (file, buf);
24544 fputs ("-.\n1:", file);
24545 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
24546 asm_fprintf (file, "\taddi %s,%s,4\n",
24547 reg_names[11], reg_names[11]);
24548 }
24549 else
24550 {
24551 fputs ("\tbcl 20,31,1f\n\t.long ", file);
24552 assemble_name (file, buf);
24553 fputs ("-.\n1:", file);
24554 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
24555 }
24556 asm_fprintf (file, "\tlwz %s,0(%s)\n",
24557 reg_names[0], reg_names[11]);
24558 asm_fprintf (file, "\tadd %s,%s,%s\n",
24559 reg_names[0], reg_names[0], reg_names[11]);
24560 }
24561 else
24562 {
24563 asm_fprintf (file, "\tlis %s,", reg_names[12]);
24564 assemble_name (file, buf);
24565 fputs ("@ha\n", file);
24566 asm_fprintf (file, "\tstw %s,4(%s)\n",
24567 reg_names[0], reg_names[1]);
24568 asm_fprintf (file, "\tla %s,", reg_names[0]);
24569 assemble_name (file, buf);
24570 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
24571 }
24572
24573 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
24574 fprintf (file, "\tbl %s%s\n",
24575 RS6000_MCOUNT, flag_pic ? "@plt" : "");
24576 break;
24577
24578 case ABI_AIX:
24579 case ABI_DARWIN:
24580 if (!TARGET_PROFILE_KERNEL)
24581 {
24582 /* Don't do anything, done in output_profile_hook (). */
24583 }
24584 else
24585 {
24586 gcc_assert (!TARGET_32BIT);
24587
24588 asm_fprintf (file, "\tmflr %s\n", reg_names[0]);
24589 asm_fprintf (file, "\tstd %s,16(%s)\n", reg_names[0], reg_names[1]);
24590
24591 if (cfun->static_chain_decl != NULL)
24592 {
24593 asm_fprintf (file, "\tstd %s,24(%s)\n",
24594 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
24595 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
24596 asm_fprintf (file, "\tld %s,24(%s)\n",
24597 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
24598 }
24599 else
24600 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
24601 }
24602 break;
24603 }
24604 }
24605
24606 \f
24607
24608 /* The following variable value is the last issued insn. */
24609
24610 static rtx last_scheduled_insn;
24611
24612 /* The following variable helps to balance issuing of load and
24613 store instructions */
24614
24615 static int load_store_pendulum;
24616
24617 /* Power4 load update and store update instructions are cracked into a
24618 load or store and an integer insn which are executed in the same cycle.
24619 Branches have their own dispatch slot which does not count against the
24620 GCC issue rate, but it changes the program flow so there are no other
24621 instructions to issue in this cycle. */
24622
24623 static int
24624 rs6000_variable_issue_1 (rtx insn, int more)
24625 {
24626 last_scheduled_insn = insn;
24627 if (GET_CODE (PATTERN (insn)) == USE
24628 || GET_CODE (PATTERN (insn)) == CLOBBER)
24629 {
24630 cached_can_issue_more = more;
24631 return cached_can_issue_more;
24632 }
24633
24634 if (insn_terminates_group_p (insn, current_group))
24635 {
24636 cached_can_issue_more = 0;
24637 return cached_can_issue_more;
24638 }
24639
24640 /* If no reservation, but reach here */
24641 if (recog_memoized (insn) < 0)
24642 return more;
24643
24644 if (rs6000_sched_groups)
24645 {
24646 if (is_microcoded_insn (insn))
24647 cached_can_issue_more = 0;
24648 else if (is_cracked_insn (insn))
24649 cached_can_issue_more = more > 2 ? more - 2 : 0;
24650 else
24651 cached_can_issue_more = more - 1;
24652
24653 return cached_can_issue_more;
24654 }
24655
24656 if (rs6000_cpu_attr == CPU_CELL && is_nonpipeline_insn (insn))
24657 return 0;
24658
24659 cached_can_issue_more = more - 1;
24660 return cached_can_issue_more;
24661 }
24662
24663 static int
24664 rs6000_variable_issue (FILE *stream, int verbose, rtx insn, int more)
24665 {
24666 int r = rs6000_variable_issue_1 (insn, more);
24667 if (verbose)
24668 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
24669 return r;
24670 }
24671
24672 /* Adjust the cost of a scheduling dependency. Return the new cost of
24673 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
24674
24675 static int
24676 rs6000_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
24677 {
24678 enum attr_type attr_type;
24679
24680 if (! recog_memoized (insn))
24681 return 0;
24682
24683 switch (REG_NOTE_KIND (link))
24684 {
24685 case REG_DEP_TRUE:
24686 {
24687 /* Data dependency; DEP_INSN writes a register that INSN reads
24688 some cycles later. */
24689
24690 /* Separate a load from a narrower, dependent store. */
24691 if (rs6000_sched_groups
24692 && GET_CODE (PATTERN (insn)) == SET
24693 && GET_CODE (PATTERN (dep_insn)) == SET
24694 && GET_CODE (XEXP (PATTERN (insn), 1)) == MEM
24695 && GET_CODE (XEXP (PATTERN (dep_insn), 0)) == MEM
24696 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
24697 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
24698 return cost + 14;
24699
24700 attr_type = get_attr_type (insn);
24701
24702 switch (attr_type)
24703 {
24704 case TYPE_JMPREG:
24705 /* Tell the first scheduling pass about the latency between
24706 a mtctr and bctr (and mtlr and br/blr). The first
24707 scheduling pass will not know about this latency since
24708 the mtctr instruction, which has the latency associated
24709 to it, will be generated by reload. */
24710 return 4;
24711 case TYPE_BRANCH:
24712 /* Leave some extra cycles between a compare and its
24713 dependent branch, to inhibit expensive mispredicts. */
24714 if ((rs6000_cpu_attr == CPU_PPC603
24715 || rs6000_cpu_attr == CPU_PPC604
24716 || rs6000_cpu_attr == CPU_PPC604E
24717 || rs6000_cpu_attr == CPU_PPC620
24718 || rs6000_cpu_attr == CPU_PPC630
24719 || rs6000_cpu_attr == CPU_PPC750
24720 || rs6000_cpu_attr == CPU_PPC7400
24721 || rs6000_cpu_attr == CPU_PPC7450
24722 || rs6000_cpu_attr == CPU_PPCE5500
24723 || rs6000_cpu_attr == CPU_PPCE6500
24724 || rs6000_cpu_attr == CPU_POWER4
24725 || rs6000_cpu_attr == CPU_POWER5
24726 || rs6000_cpu_attr == CPU_POWER7
24727 || rs6000_cpu_attr == CPU_POWER8
24728 || rs6000_cpu_attr == CPU_CELL)
24729 && recog_memoized (dep_insn)
24730 && (INSN_CODE (dep_insn) >= 0))
24731
24732 switch (get_attr_type (dep_insn))
24733 {
24734 case TYPE_CMP:
24735 case TYPE_COMPARE:
24736 case TYPE_DELAYED_COMPARE:
24737 case TYPE_IMUL_COMPARE:
24738 case TYPE_LMUL_COMPARE:
24739 case TYPE_FPCOMPARE:
24740 case TYPE_CR_LOGICAL:
24741 case TYPE_DELAYED_CR:
24742 return cost + 2;
24743 default:
24744 break;
24745 }
24746 break;
24747
24748 case TYPE_STORE:
24749 case TYPE_STORE_U:
24750 case TYPE_STORE_UX:
24751 case TYPE_FPSTORE:
24752 case TYPE_FPSTORE_U:
24753 case TYPE_FPSTORE_UX:
24754 if ((rs6000_cpu == PROCESSOR_POWER6)
24755 && recog_memoized (dep_insn)
24756 && (INSN_CODE (dep_insn) >= 0))
24757 {
24758
24759 if (GET_CODE (PATTERN (insn)) != SET)
24760 /* If this happens, we have to extend this to schedule
24761 optimally. Return default for now. */
24762 return cost;
24763
24764 /* Adjust the cost for the case where the value written
24765 by a fixed point operation is used as the address
24766 gen value on a store. */
24767 switch (get_attr_type (dep_insn))
24768 {
24769 case TYPE_LOAD:
24770 case TYPE_LOAD_U:
24771 case TYPE_LOAD_UX:
24772 case TYPE_CNTLZ:
24773 {
24774 if (! store_data_bypass_p (dep_insn, insn))
24775 return 4;
24776 break;
24777 }
24778 case TYPE_LOAD_EXT:
24779 case TYPE_LOAD_EXT_U:
24780 case TYPE_LOAD_EXT_UX:
24781 case TYPE_VAR_SHIFT_ROTATE:
24782 case TYPE_VAR_DELAYED_COMPARE:
24783 {
24784 if (! store_data_bypass_p (dep_insn, insn))
24785 return 6;
24786 break;
24787 }
24788 case TYPE_INTEGER:
24789 case TYPE_COMPARE:
24790 case TYPE_FAST_COMPARE:
24791 case TYPE_EXTS:
24792 case TYPE_SHIFT:
24793 case TYPE_INSERT_WORD:
24794 case TYPE_INSERT_DWORD:
24795 case TYPE_FPLOAD_U:
24796 case TYPE_FPLOAD_UX:
24797 case TYPE_STORE_U:
24798 case TYPE_STORE_UX:
24799 case TYPE_FPSTORE_U:
24800 case TYPE_FPSTORE_UX:
24801 {
24802 if (! store_data_bypass_p (dep_insn, insn))
24803 return 3;
24804 break;
24805 }
24806 case TYPE_IMUL:
24807 case TYPE_IMUL2:
24808 case TYPE_IMUL3:
24809 case TYPE_LMUL:
24810 case TYPE_IMUL_COMPARE:
24811 case TYPE_LMUL_COMPARE:
24812 {
24813 if (! store_data_bypass_p (dep_insn, insn))
24814 return 17;
24815 break;
24816 }
24817 case TYPE_IDIV:
24818 {
24819 if (! store_data_bypass_p (dep_insn, insn))
24820 return 45;
24821 break;
24822 }
24823 case TYPE_LDIV:
24824 {
24825 if (! store_data_bypass_p (dep_insn, insn))
24826 return 57;
24827 break;
24828 }
24829 default:
24830 break;
24831 }
24832 }
24833 break;
24834
24835 case TYPE_LOAD:
24836 case TYPE_LOAD_U:
24837 case TYPE_LOAD_UX:
24838 case TYPE_LOAD_EXT:
24839 case TYPE_LOAD_EXT_U:
24840 case TYPE_LOAD_EXT_UX:
24841 if ((rs6000_cpu == PROCESSOR_POWER6)
24842 && recog_memoized (dep_insn)
24843 && (INSN_CODE (dep_insn) >= 0))
24844 {
24845
24846 /* Adjust the cost for the case where the value written
24847 by a fixed point instruction is used within the address
24848 gen portion of a subsequent load(u)(x) */
24849 switch (get_attr_type (dep_insn))
24850 {
24851 case TYPE_LOAD:
24852 case TYPE_LOAD_U:
24853 case TYPE_LOAD_UX:
24854 case TYPE_CNTLZ:
24855 {
24856 if (set_to_load_agen (dep_insn, insn))
24857 return 4;
24858 break;
24859 }
24860 case TYPE_LOAD_EXT:
24861 case TYPE_LOAD_EXT_U:
24862 case TYPE_LOAD_EXT_UX:
24863 case TYPE_VAR_SHIFT_ROTATE:
24864 case TYPE_VAR_DELAYED_COMPARE:
24865 {
24866 if (set_to_load_agen (dep_insn, insn))
24867 return 6;
24868 break;
24869 }
24870 case TYPE_INTEGER:
24871 case TYPE_COMPARE:
24872 case TYPE_FAST_COMPARE:
24873 case TYPE_EXTS:
24874 case TYPE_SHIFT:
24875 case TYPE_INSERT_WORD:
24876 case TYPE_INSERT_DWORD:
24877 case TYPE_FPLOAD_U:
24878 case TYPE_FPLOAD_UX:
24879 case TYPE_STORE_U:
24880 case TYPE_STORE_UX:
24881 case TYPE_FPSTORE_U:
24882 case TYPE_FPSTORE_UX:
24883 {
24884 if (set_to_load_agen (dep_insn, insn))
24885 return 3;
24886 break;
24887 }
24888 case TYPE_IMUL:
24889 case TYPE_IMUL2:
24890 case TYPE_IMUL3:
24891 case TYPE_LMUL:
24892 case TYPE_IMUL_COMPARE:
24893 case TYPE_LMUL_COMPARE:
24894 {
24895 if (set_to_load_agen (dep_insn, insn))
24896 return 17;
24897 break;
24898 }
24899 case TYPE_IDIV:
24900 {
24901 if (set_to_load_agen (dep_insn, insn))
24902 return 45;
24903 break;
24904 }
24905 case TYPE_LDIV:
24906 {
24907 if (set_to_load_agen (dep_insn, insn))
24908 return 57;
24909 break;
24910 }
24911 default:
24912 break;
24913 }
24914 }
24915 break;
24916
24917 case TYPE_FPLOAD:
24918 if ((rs6000_cpu == PROCESSOR_POWER6)
24919 && recog_memoized (dep_insn)
24920 && (INSN_CODE (dep_insn) >= 0)
24921 && (get_attr_type (dep_insn) == TYPE_MFFGPR))
24922 return 2;
24923
24924 default:
24925 break;
24926 }
24927
24928 /* Fall out to return default cost. */
24929 }
24930 break;
24931
24932 case REG_DEP_OUTPUT:
24933 /* Output dependency; DEP_INSN writes a register that INSN writes some
24934 cycles later. */
24935 if ((rs6000_cpu == PROCESSOR_POWER6)
24936 && recog_memoized (dep_insn)
24937 && (INSN_CODE (dep_insn) >= 0))
24938 {
24939 attr_type = get_attr_type (insn);
24940
24941 switch (attr_type)
24942 {
24943 case TYPE_FP:
24944 if (get_attr_type (dep_insn) == TYPE_FP)
24945 return 1;
24946 break;
24947 case TYPE_FPLOAD:
24948 if (get_attr_type (dep_insn) == TYPE_MFFGPR)
24949 return 2;
24950 break;
24951 default:
24952 break;
24953 }
24954 }
24955 case REG_DEP_ANTI:
24956 /* Anti dependency; DEP_INSN reads a register that INSN writes some
24957 cycles later. */
24958 return 0;
24959
24960 default:
24961 gcc_unreachable ();
24962 }
24963
24964 return cost;
24965 }
24966
24967 /* Debug version of rs6000_adjust_cost. */
24968
24969 static int
24970 rs6000_debug_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
24971 {
24972 int ret = rs6000_adjust_cost (insn, link, dep_insn, cost);
24973
24974 if (ret != cost)
24975 {
24976 const char *dep;
24977
24978 switch (REG_NOTE_KIND (link))
24979 {
24980 default: dep = "unknown depencency"; break;
24981 case REG_DEP_TRUE: dep = "data dependency"; break;
24982 case REG_DEP_OUTPUT: dep = "output dependency"; break;
24983 case REG_DEP_ANTI: dep = "anti depencency"; break;
24984 }
24985
24986 fprintf (stderr,
24987 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
24988 "%s, insn:\n", ret, cost, dep);
24989
24990 debug_rtx (insn);
24991 }
24992
24993 return ret;
24994 }
24995
24996 /* The function returns a true if INSN is microcoded.
24997 Return false otherwise. */
24998
24999 static bool
25000 is_microcoded_insn (rtx insn)
25001 {
25002 if (!insn || !NONDEBUG_INSN_P (insn)
25003 || GET_CODE (PATTERN (insn)) == USE
25004 || GET_CODE (PATTERN (insn)) == CLOBBER)
25005 return false;
25006
25007 if (rs6000_cpu_attr == CPU_CELL)
25008 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
25009
25010 if (rs6000_sched_groups
25011 && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
25012 {
25013 enum attr_type type = get_attr_type (insn);
25014 if (type == TYPE_LOAD_EXT_U
25015 || type == TYPE_LOAD_EXT_UX
25016 || type == TYPE_LOAD_UX
25017 || type == TYPE_STORE_UX
25018 || type == TYPE_MFCR)
25019 return true;
25020 }
25021
25022 return false;
25023 }
25024
25025 /* The function returns true if INSN is cracked into 2 instructions
25026 by the processor (and therefore occupies 2 issue slots). */
25027
25028 static bool
25029 is_cracked_insn (rtx insn)
25030 {
25031 if (!insn || !NONDEBUG_INSN_P (insn)
25032 || GET_CODE (PATTERN (insn)) == USE
25033 || GET_CODE (PATTERN (insn)) == CLOBBER)
25034 return false;
25035
25036 if (rs6000_sched_groups
25037 && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
25038 {
25039 enum attr_type type = get_attr_type (insn);
25040 if (type == TYPE_LOAD_U || type == TYPE_STORE_U
25041 || type == TYPE_FPLOAD_U || type == TYPE_FPSTORE_U
25042 || type == TYPE_FPLOAD_UX || type == TYPE_FPSTORE_UX
25043 || type == TYPE_LOAD_EXT || type == TYPE_DELAYED_CR
25044 || type == TYPE_COMPARE || type == TYPE_DELAYED_COMPARE
25045 || type == TYPE_IMUL_COMPARE || type == TYPE_LMUL_COMPARE
25046 || type == TYPE_IDIV || type == TYPE_LDIV
25047 || type == TYPE_INSERT_WORD)
25048 return true;
25049 }
25050
25051 return false;
25052 }
25053
25054 /* The function returns true if INSN can be issued only from
25055 the branch slot. */
25056
25057 static bool
25058 is_branch_slot_insn (rtx insn)
25059 {
25060 if (!insn || !NONDEBUG_INSN_P (insn)
25061 || GET_CODE (PATTERN (insn)) == USE
25062 || GET_CODE (PATTERN (insn)) == CLOBBER)
25063 return false;
25064
25065 if (rs6000_sched_groups)
25066 {
25067 enum attr_type type = get_attr_type (insn);
25068 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
25069 return true;
25070 return false;
25071 }
25072
25073 return false;
25074 }
25075
25076 /* The function returns true if out_inst sets a value that is
25077 used in the address generation computation of in_insn */
25078 static bool
25079 set_to_load_agen (rtx out_insn, rtx in_insn)
25080 {
25081 rtx out_set, in_set;
25082
25083 /* For performance reasons, only handle the simple case where
25084 both loads are a single_set. */
25085 out_set = single_set (out_insn);
25086 if (out_set)
25087 {
25088 in_set = single_set (in_insn);
25089 if (in_set)
25090 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
25091 }
25092
25093 return false;
25094 }
25095
25096 /* Try to determine base/offset/size parts of the given MEM.
25097 Return true if successful, false if all the values couldn't
25098 be determined.
25099
25100 This function only looks for REG or REG+CONST address forms.
25101 REG+REG address form will return false. */
25102
25103 static bool
25104 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
25105 HOST_WIDE_INT *size)
25106 {
25107 rtx addr_rtx;
25108 if MEM_SIZE_KNOWN_P (mem)
25109 *size = MEM_SIZE (mem);
25110 else
25111 return false;
25112
25113 if (GET_CODE (XEXP (mem, 0)) == PRE_MODIFY)
25114 addr_rtx = XEXP (XEXP (mem, 0), 1);
25115 else
25116 addr_rtx = (XEXP (mem, 0));
25117
25118 if (GET_CODE (addr_rtx) == REG)
25119 {
25120 *base = addr_rtx;
25121 *offset = 0;
25122 }
25123 else if (GET_CODE (addr_rtx) == PLUS
25124 && CONST_INT_P (XEXP (addr_rtx, 1)))
25125 {
25126 *base = XEXP (addr_rtx, 0);
25127 *offset = INTVAL (XEXP (addr_rtx, 1));
25128 }
25129 else
25130 return false;
25131
25132 return true;
25133 }
25134
25135 /* The function returns true if the target storage location of
25136 mem1 is adjacent to the target storage location of mem2 */
25137 /* Return 1 if memory locations are adjacent. */
25138
25139 static bool
25140 adjacent_mem_locations (rtx mem1, rtx mem2)
25141 {
25142 rtx reg1, reg2;
25143 HOST_WIDE_INT off1, size1, off2, size2;
25144
25145 if (get_memref_parts (mem1, &reg1, &off1, &size1)
25146 && get_memref_parts (mem2, &reg2, &off2, &size2))
25147 return ((REGNO (reg1) == REGNO (reg2))
25148 && ((off1 + size1 == off2)
25149 || (off2 + size2 == off1)));
25150
25151 return false;
25152 }
25153
25154 /* This function returns true if it can be determined that the two MEM
25155 locations overlap by at least 1 byte based on base reg/offset/size. */
25156
25157 static bool
25158 mem_locations_overlap (rtx mem1, rtx mem2)
25159 {
25160 rtx reg1, reg2;
25161 HOST_WIDE_INT off1, size1, off2, size2;
25162
25163 if (get_memref_parts (mem1, &reg1, &off1, &size1)
25164 && get_memref_parts (mem2, &reg2, &off2, &size2))
25165 return ((REGNO (reg1) == REGNO (reg2))
25166 && (((off1 <= off2) && (off1 + size1 > off2))
25167 || ((off2 <= off1) && (off2 + size2 > off1))));
25168
25169 return false;
25170 }
25171
25172 /* A C statement (sans semicolon) to update the integer scheduling
25173 priority INSN_PRIORITY (INSN). Increase the priority to execute the
25174 INSN earlier, reduce the priority to execute INSN later. Do not
25175 define this macro if you do not need to adjust the scheduling
25176 priorities of insns. */
25177
25178 static int
25179 rs6000_adjust_priority (rtx insn ATTRIBUTE_UNUSED, int priority)
25180 {
25181 rtx load_mem, str_mem;
25182 /* On machines (like the 750) which have asymmetric integer units,
25183 where one integer unit can do multiply and divides and the other
25184 can't, reduce the priority of multiply/divide so it is scheduled
25185 before other integer operations. */
25186
25187 #if 0
25188 if (! INSN_P (insn))
25189 return priority;
25190
25191 if (GET_CODE (PATTERN (insn)) == USE)
25192 return priority;
25193
25194 switch (rs6000_cpu_attr) {
25195 case CPU_PPC750:
25196 switch (get_attr_type (insn))
25197 {
25198 default:
25199 break;
25200
25201 case TYPE_IMUL:
25202 case TYPE_IDIV:
25203 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
25204 priority, priority);
25205 if (priority >= 0 && priority < 0x01000000)
25206 priority >>= 3;
25207 break;
25208 }
25209 }
25210 #endif
25211
25212 if (insn_must_be_first_in_group (insn)
25213 && reload_completed
25214 && current_sched_info->sched_max_insns_priority
25215 && rs6000_sched_restricted_insns_priority)
25216 {
25217
25218 /* Prioritize insns that can be dispatched only in the first
25219 dispatch slot. */
25220 if (rs6000_sched_restricted_insns_priority == 1)
25221 /* Attach highest priority to insn. This means that in
25222 haifa-sched.c:ready_sort(), dispatch-slot restriction considerations
25223 precede 'priority' (critical path) considerations. */
25224 return current_sched_info->sched_max_insns_priority;
25225 else if (rs6000_sched_restricted_insns_priority == 2)
25226 /* Increase priority of insn by a minimal amount. This means that in
25227 haifa-sched.c:ready_sort(), only 'priority' (critical path)
25228 considerations precede dispatch-slot restriction considerations. */
25229 return (priority + 1);
25230 }
25231
25232 if (rs6000_cpu == PROCESSOR_POWER6
25233 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
25234 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
25235 /* Attach highest priority to insn if the scheduler has just issued two
25236 stores and this instruction is a load, or two loads and this instruction
25237 is a store. Power6 wants loads and stores scheduled alternately
25238 when possible */
25239 return current_sched_info->sched_max_insns_priority;
25240
25241 return priority;
25242 }
25243
25244 /* Return true if the instruction is nonpipelined on the Cell. */
25245 static bool
25246 is_nonpipeline_insn (rtx insn)
25247 {
25248 enum attr_type type;
25249 if (!insn || !NONDEBUG_INSN_P (insn)
25250 || GET_CODE (PATTERN (insn)) == USE
25251 || GET_CODE (PATTERN (insn)) == CLOBBER)
25252 return false;
25253
25254 type = get_attr_type (insn);
25255 if (type == TYPE_IMUL
25256 || type == TYPE_IMUL2
25257 || type == TYPE_IMUL3
25258 || type == TYPE_LMUL
25259 || type == TYPE_IDIV
25260 || type == TYPE_LDIV
25261 || type == TYPE_SDIV
25262 || type == TYPE_DDIV
25263 || type == TYPE_SSQRT
25264 || type == TYPE_DSQRT
25265 || type == TYPE_MFCR
25266 || type == TYPE_MFCRF
25267 || type == TYPE_MFJMPR)
25268 {
25269 return true;
25270 }
25271 return false;
25272 }
25273
25274
25275 /* Return how many instructions the machine can issue per cycle. */
25276
25277 static int
25278 rs6000_issue_rate (void)
25279 {
25280 /* Unless scheduling for register pressure, use issue rate of 1 for
25281 first scheduling pass to decrease degradation. */
25282 if (!reload_completed && !flag_sched_pressure)
25283 return 1;
25284
25285 switch (rs6000_cpu_attr) {
25286 case CPU_RS64A:
25287 case CPU_PPC601: /* ? */
25288 case CPU_PPC7450:
25289 return 3;
25290 case CPU_PPC440:
25291 case CPU_PPC603:
25292 case CPU_PPC750:
25293 case CPU_PPC7400:
25294 case CPU_PPC8540:
25295 case CPU_PPC8548:
25296 case CPU_CELL:
25297 case CPU_PPCE300C2:
25298 case CPU_PPCE300C3:
25299 case CPU_PPCE500MC:
25300 case CPU_PPCE500MC64:
25301 case CPU_PPCE5500:
25302 case CPU_PPCE6500:
25303 case CPU_TITAN:
25304 return 2;
25305 case CPU_PPC476:
25306 case CPU_PPC604:
25307 case CPU_PPC604E:
25308 case CPU_PPC620:
25309 case CPU_PPC630:
25310 return 4;
25311 case CPU_POWER4:
25312 case CPU_POWER5:
25313 case CPU_POWER6:
25314 case CPU_POWER7:
25315 return 5;
25316 case CPU_POWER8:
25317 return 7;
25318 default:
25319 return 1;
25320 }
25321 }
25322
25323 /* Return how many instructions to look ahead for better insn
25324 scheduling. */
25325
25326 static int
25327 rs6000_use_sched_lookahead (void)
25328 {
25329 switch (rs6000_cpu_attr)
25330 {
25331 case CPU_PPC8540:
25332 case CPU_PPC8548:
25333 return 4;
25334
25335 case CPU_CELL:
25336 return (reload_completed ? 8 : 0);
25337
25338 default:
25339 return 0;
25340 }
25341 }
25342
25343 /* We are choosing insn from the ready queue. Return nonzero if INSN can be chosen. */
25344 static int
25345 rs6000_use_sched_lookahead_guard (rtx insn)
25346 {
25347 if (rs6000_cpu_attr != CPU_CELL)
25348 return 1;
25349
25350 if (insn == NULL_RTX || !INSN_P (insn))
25351 abort ();
25352
25353 if (!reload_completed
25354 || is_nonpipeline_insn (insn)
25355 || is_microcoded_insn (insn))
25356 return 0;
25357
25358 return 1;
25359 }
25360
25361 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
25362 and return true. */
25363
25364 static bool
25365 find_mem_ref (rtx pat, rtx *mem_ref)
25366 {
25367 const char * fmt;
25368 int i, j;
25369
25370 /* stack_tie does not produce any real memory traffic. */
25371 if (tie_operand (pat, VOIDmode))
25372 return false;
25373
25374 if (GET_CODE (pat) == MEM)
25375 {
25376 *mem_ref = pat;
25377 return true;
25378 }
25379
25380 /* Recursively process the pattern. */
25381 fmt = GET_RTX_FORMAT (GET_CODE (pat));
25382
25383 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
25384 {
25385 if (fmt[i] == 'e')
25386 {
25387 if (find_mem_ref (XEXP (pat, i), mem_ref))
25388 return true;
25389 }
25390 else if (fmt[i] == 'E')
25391 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
25392 {
25393 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
25394 return true;
25395 }
25396 }
25397
25398 return false;
25399 }
25400
25401 /* Determine if PAT is a PATTERN of a load insn. */
25402
25403 static bool
25404 is_load_insn1 (rtx pat, rtx *load_mem)
25405 {
25406 if (!pat || pat == NULL_RTX)
25407 return false;
25408
25409 if (GET_CODE (pat) == SET)
25410 return find_mem_ref (SET_SRC (pat), load_mem);
25411
25412 if (GET_CODE (pat) == PARALLEL)
25413 {
25414 int i;
25415
25416 for (i = 0; i < XVECLEN (pat, 0); i++)
25417 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
25418 return true;
25419 }
25420
25421 return false;
25422 }
25423
25424 /* Determine if INSN loads from memory. */
25425
25426 static bool
25427 is_load_insn (rtx insn, rtx *load_mem)
25428 {
25429 if (!insn || !INSN_P (insn))
25430 return false;
25431
25432 if (CALL_P (insn))
25433 return false;
25434
25435 return is_load_insn1 (PATTERN (insn), load_mem);
25436 }
25437
25438 /* Determine if PAT is a PATTERN of a store insn. */
25439
25440 static bool
25441 is_store_insn1 (rtx pat, rtx *str_mem)
25442 {
25443 if (!pat || pat == NULL_RTX)
25444 return false;
25445
25446 if (GET_CODE (pat) == SET)
25447 return find_mem_ref (SET_DEST (pat), str_mem);
25448
25449 if (GET_CODE (pat) == PARALLEL)
25450 {
25451 int i;
25452
25453 for (i = 0; i < XVECLEN (pat, 0); i++)
25454 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
25455 return true;
25456 }
25457
25458 return false;
25459 }
25460
25461 /* Determine if INSN stores to memory. */
25462
25463 static bool
25464 is_store_insn (rtx insn, rtx *str_mem)
25465 {
25466 if (!insn || !INSN_P (insn))
25467 return false;
25468
25469 return is_store_insn1 (PATTERN (insn), str_mem);
25470 }
25471
25472 /* Returns whether the dependence between INSN and NEXT is considered
25473 costly by the given target. */
25474
25475 static bool
25476 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
25477 {
25478 rtx insn;
25479 rtx next;
25480 rtx load_mem, str_mem;
25481
25482 /* If the flag is not enabled - no dependence is considered costly;
25483 allow all dependent insns in the same group.
25484 This is the most aggressive option. */
25485 if (rs6000_sched_costly_dep == no_dep_costly)
25486 return false;
25487
25488 /* If the flag is set to 1 - a dependence is always considered costly;
25489 do not allow dependent instructions in the same group.
25490 This is the most conservative option. */
25491 if (rs6000_sched_costly_dep == all_deps_costly)
25492 return true;
25493
25494 insn = DEP_PRO (dep);
25495 next = DEP_CON (dep);
25496
25497 if (rs6000_sched_costly_dep == store_to_load_dep_costly
25498 && is_load_insn (next, &load_mem)
25499 && is_store_insn (insn, &str_mem))
25500 /* Prevent load after store in the same group. */
25501 return true;
25502
25503 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
25504 && is_load_insn (next, &load_mem)
25505 && is_store_insn (insn, &str_mem)
25506 && DEP_TYPE (dep) == REG_DEP_TRUE
25507 && mem_locations_overlap(str_mem, load_mem))
25508 /* Prevent load after store in the same group if it is a true
25509 dependence. */
25510 return true;
25511
25512 /* The flag is set to X; dependences with latency >= X are considered costly,
25513 and will not be scheduled in the same group. */
25514 if (rs6000_sched_costly_dep <= max_dep_latency
25515 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
25516 return true;
25517
25518 return false;
25519 }
25520
25521 /* Return the next insn after INSN that is found before TAIL is reached,
25522 skipping any "non-active" insns - insns that will not actually occupy
25523 an issue slot. Return NULL_RTX if such an insn is not found. */
25524
25525 static rtx
25526 get_next_active_insn (rtx insn, rtx tail)
25527 {
25528 if (insn == NULL_RTX || insn == tail)
25529 return NULL_RTX;
25530
25531 while (1)
25532 {
25533 insn = NEXT_INSN (insn);
25534 if (insn == NULL_RTX || insn == tail)
25535 return NULL_RTX;
25536
25537 if (CALL_P (insn)
25538 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
25539 || (NONJUMP_INSN_P (insn)
25540 && GET_CODE (PATTERN (insn)) != USE
25541 && GET_CODE (PATTERN (insn)) != CLOBBER
25542 && INSN_CODE (insn) != CODE_FOR_stack_tie))
25543 break;
25544 }
25545 return insn;
25546 }
25547
25548 /* We are about to begin issuing insns for this clock cycle. */
25549
25550 static int
25551 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
25552 rtx *ready ATTRIBUTE_UNUSED,
25553 int *pn_ready ATTRIBUTE_UNUSED,
25554 int clock_var ATTRIBUTE_UNUSED)
25555 {
25556 int n_ready = *pn_ready;
25557
25558 if (sched_verbose)
25559 fprintf (dump, "// rs6000_sched_reorder :\n");
25560
25561 /* Reorder the ready list, if the second to last ready insn
25562 is a nonepipeline insn. */
25563 if (rs6000_cpu_attr == CPU_CELL && n_ready > 1)
25564 {
25565 if (is_nonpipeline_insn (ready[n_ready - 1])
25566 && (recog_memoized (ready[n_ready - 2]) > 0))
25567 /* Simply swap first two insns. */
25568 {
25569 rtx tmp = ready[n_ready - 1];
25570 ready[n_ready - 1] = ready[n_ready - 2];
25571 ready[n_ready - 2] = tmp;
25572 }
25573 }
25574
25575 if (rs6000_cpu == PROCESSOR_POWER6)
25576 load_store_pendulum = 0;
25577
25578 return rs6000_issue_rate ();
25579 }
25580
25581 /* Like rs6000_sched_reorder, but called after issuing each insn. */
25582
25583 static int
25584 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx *ready,
25585 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
25586 {
25587 if (sched_verbose)
25588 fprintf (dump, "// rs6000_sched_reorder2 :\n");
25589
25590 /* For Power6, we need to handle some special cases to try and keep the
25591 store queue from overflowing and triggering expensive flushes.
25592
25593 This code monitors how load and store instructions are being issued
25594 and skews the ready list one way or the other to increase the likelihood
25595 that a desired instruction is issued at the proper time.
25596
25597 A couple of things are done. First, we maintain a "load_store_pendulum"
25598 to track the current state of load/store issue.
25599
25600 - If the pendulum is at zero, then no loads or stores have been
25601 issued in the current cycle so we do nothing.
25602
25603 - If the pendulum is 1, then a single load has been issued in this
25604 cycle and we attempt to locate another load in the ready list to
25605 issue with it.
25606
25607 - If the pendulum is -2, then two stores have already been
25608 issued in this cycle, so we increase the priority of the first load
25609 in the ready list to increase it's likelihood of being chosen first
25610 in the next cycle.
25611
25612 - If the pendulum is -1, then a single store has been issued in this
25613 cycle and we attempt to locate another store in the ready list to
25614 issue with it, preferring a store to an adjacent memory location to
25615 facilitate store pairing in the store queue.
25616
25617 - If the pendulum is 2, then two loads have already been
25618 issued in this cycle, so we increase the priority of the first store
25619 in the ready list to increase it's likelihood of being chosen first
25620 in the next cycle.
25621
25622 - If the pendulum < -2 or > 2, then do nothing.
25623
25624 Note: This code covers the most common scenarios. There exist non
25625 load/store instructions which make use of the LSU and which
25626 would need to be accounted for to strictly model the behavior
25627 of the machine. Those instructions are currently unaccounted
25628 for to help minimize compile time overhead of this code.
25629 */
25630 if (rs6000_cpu == PROCESSOR_POWER6 && last_scheduled_insn)
25631 {
25632 int pos;
25633 int i;
25634 rtx tmp, load_mem, str_mem;
25635
25636 if (is_store_insn (last_scheduled_insn, &str_mem))
25637 /* Issuing a store, swing the load_store_pendulum to the left */
25638 load_store_pendulum--;
25639 else if (is_load_insn (last_scheduled_insn, &load_mem))
25640 /* Issuing a load, swing the load_store_pendulum to the right */
25641 load_store_pendulum++;
25642 else
25643 return cached_can_issue_more;
25644
25645 /* If the pendulum is balanced, or there is only one instruction on
25646 the ready list, then all is well, so return. */
25647 if ((load_store_pendulum == 0) || (*pn_ready <= 1))
25648 return cached_can_issue_more;
25649
25650 if (load_store_pendulum == 1)
25651 {
25652 /* A load has been issued in this cycle. Scan the ready list
25653 for another load to issue with it */
25654 pos = *pn_ready-1;
25655
25656 while (pos >= 0)
25657 {
25658 if (is_load_insn (ready[pos], &load_mem))
25659 {
25660 /* Found a load. Move it to the head of the ready list,
25661 and adjust it's priority so that it is more likely to
25662 stay there */
25663 tmp = ready[pos];
25664 for (i=pos; i<*pn_ready-1; i++)
25665 ready[i] = ready[i + 1];
25666 ready[*pn_ready-1] = tmp;
25667
25668 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
25669 INSN_PRIORITY (tmp)++;
25670 break;
25671 }
25672 pos--;
25673 }
25674 }
25675 else if (load_store_pendulum == -2)
25676 {
25677 /* Two stores have been issued in this cycle. Increase the
25678 priority of the first load in the ready list to favor it for
25679 issuing in the next cycle. */
25680 pos = *pn_ready-1;
25681
25682 while (pos >= 0)
25683 {
25684 if (is_load_insn (ready[pos], &load_mem)
25685 && !sel_sched_p ()
25686 && INSN_PRIORITY_KNOWN (ready[pos]))
25687 {
25688 INSN_PRIORITY (ready[pos])++;
25689
25690 /* Adjust the pendulum to account for the fact that a load
25691 was found and increased in priority. This is to prevent
25692 increasing the priority of multiple loads */
25693 load_store_pendulum--;
25694
25695 break;
25696 }
25697 pos--;
25698 }
25699 }
25700 else if (load_store_pendulum == -1)
25701 {
25702 /* A store has been issued in this cycle. Scan the ready list for
25703 another store to issue with it, preferring a store to an adjacent
25704 memory location */
25705 int first_store_pos = -1;
25706
25707 pos = *pn_ready-1;
25708
25709 while (pos >= 0)
25710 {
25711 if (is_store_insn (ready[pos], &str_mem))
25712 {
25713 rtx str_mem2;
25714 /* Maintain the index of the first store found on the
25715 list */
25716 if (first_store_pos == -1)
25717 first_store_pos = pos;
25718
25719 if (is_store_insn (last_scheduled_insn, &str_mem2)
25720 && adjacent_mem_locations (str_mem, str_mem2))
25721 {
25722 /* Found an adjacent store. Move it to the head of the
25723 ready list, and adjust it's priority so that it is
25724 more likely to stay there */
25725 tmp = ready[pos];
25726 for (i=pos; i<*pn_ready-1; i++)
25727 ready[i] = ready[i + 1];
25728 ready[*pn_ready-1] = tmp;
25729
25730 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
25731 INSN_PRIORITY (tmp)++;
25732
25733 first_store_pos = -1;
25734
25735 break;
25736 };
25737 }
25738 pos--;
25739 }
25740
25741 if (first_store_pos >= 0)
25742 {
25743 /* An adjacent store wasn't found, but a non-adjacent store was,
25744 so move the non-adjacent store to the front of the ready
25745 list, and adjust its priority so that it is more likely to
25746 stay there. */
25747 tmp = ready[first_store_pos];
25748 for (i=first_store_pos; i<*pn_ready-1; i++)
25749 ready[i] = ready[i + 1];
25750 ready[*pn_ready-1] = tmp;
25751 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
25752 INSN_PRIORITY (tmp)++;
25753 }
25754 }
25755 else if (load_store_pendulum == 2)
25756 {
25757 /* Two loads have been issued in this cycle. Increase the priority
25758 of the first store in the ready list to favor it for issuing in
25759 the next cycle. */
25760 pos = *pn_ready-1;
25761
25762 while (pos >= 0)
25763 {
25764 if (is_store_insn (ready[pos], &str_mem)
25765 && !sel_sched_p ()
25766 && INSN_PRIORITY_KNOWN (ready[pos]))
25767 {
25768 INSN_PRIORITY (ready[pos])++;
25769
25770 /* Adjust the pendulum to account for the fact that a store
25771 was found and increased in priority. This is to prevent
25772 increasing the priority of multiple stores */
25773 load_store_pendulum++;
25774
25775 break;
25776 }
25777 pos--;
25778 }
25779 }
25780 }
25781
25782 return cached_can_issue_more;
25783 }
25784
25785 /* Return whether the presence of INSN causes a dispatch group termination
25786 of group WHICH_GROUP.
25787
25788 If WHICH_GROUP == current_group, this function will return true if INSN
25789 causes the termination of the current group (i.e, the dispatch group to
25790 which INSN belongs). This means that INSN will be the last insn in the
25791 group it belongs to.
25792
25793 If WHICH_GROUP == previous_group, this function will return true if INSN
25794 causes the termination of the previous group (i.e, the dispatch group that
25795 precedes the group to which INSN belongs). This means that INSN will be
25796 the first insn in the group it belongs to). */
25797
25798 static bool
25799 insn_terminates_group_p (rtx insn, enum group_termination which_group)
25800 {
25801 bool first, last;
25802
25803 if (! insn)
25804 return false;
25805
25806 first = insn_must_be_first_in_group (insn);
25807 last = insn_must_be_last_in_group (insn);
25808
25809 if (first && last)
25810 return true;
25811
25812 if (which_group == current_group)
25813 return last;
25814 else if (which_group == previous_group)
25815 return first;
25816
25817 return false;
25818 }
25819
25820
25821 static bool
25822 insn_must_be_first_in_group (rtx insn)
25823 {
25824 enum attr_type type;
25825
25826 if (!insn
25827 || NOTE_P (insn)
25828 || DEBUG_INSN_P (insn)
25829 || GET_CODE (PATTERN (insn)) == USE
25830 || GET_CODE (PATTERN (insn)) == CLOBBER)
25831 return false;
25832
25833 switch (rs6000_cpu)
25834 {
25835 case PROCESSOR_POWER5:
25836 if (is_cracked_insn (insn))
25837 return true;
25838 case PROCESSOR_POWER4:
25839 if (is_microcoded_insn (insn))
25840 return true;
25841
25842 if (!rs6000_sched_groups)
25843 return false;
25844
25845 type = get_attr_type (insn);
25846
25847 switch (type)
25848 {
25849 case TYPE_MFCR:
25850 case TYPE_MFCRF:
25851 case TYPE_MTCR:
25852 case TYPE_DELAYED_CR:
25853 case TYPE_CR_LOGICAL:
25854 case TYPE_MTJMPR:
25855 case TYPE_MFJMPR:
25856 case TYPE_IDIV:
25857 case TYPE_LDIV:
25858 case TYPE_LOAD_L:
25859 case TYPE_STORE_C:
25860 case TYPE_ISYNC:
25861 case TYPE_SYNC:
25862 return true;
25863 default:
25864 break;
25865 }
25866 break;
25867 case PROCESSOR_POWER6:
25868 type = get_attr_type (insn);
25869
25870 switch (type)
25871 {
25872 case TYPE_INSERT_DWORD:
25873 case TYPE_EXTS:
25874 case TYPE_CNTLZ:
25875 case TYPE_SHIFT:
25876 case TYPE_VAR_SHIFT_ROTATE:
25877 case TYPE_TRAP:
25878 case TYPE_IMUL:
25879 case TYPE_IMUL2:
25880 case TYPE_IMUL3:
25881 case TYPE_LMUL:
25882 case TYPE_IDIV:
25883 case TYPE_INSERT_WORD:
25884 case TYPE_DELAYED_COMPARE:
25885 case TYPE_IMUL_COMPARE:
25886 case TYPE_LMUL_COMPARE:
25887 case TYPE_FPCOMPARE:
25888 case TYPE_MFCR:
25889 case TYPE_MTCR:
25890 case TYPE_MFJMPR:
25891 case TYPE_MTJMPR:
25892 case TYPE_ISYNC:
25893 case TYPE_SYNC:
25894 case TYPE_LOAD_L:
25895 case TYPE_STORE_C:
25896 case TYPE_LOAD_U:
25897 case TYPE_LOAD_UX:
25898 case TYPE_LOAD_EXT_UX:
25899 case TYPE_STORE_U:
25900 case TYPE_STORE_UX:
25901 case TYPE_FPLOAD_U:
25902 case TYPE_FPLOAD_UX:
25903 case TYPE_FPSTORE_U:
25904 case TYPE_FPSTORE_UX:
25905 return true;
25906 default:
25907 break;
25908 }
25909 break;
25910 case PROCESSOR_POWER7:
25911 type = get_attr_type (insn);
25912
25913 switch (type)
25914 {
25915 case TYPE_CR_LOGICAL:
25916 case TYPE_MFCR:
25917 case TYPE_MFCRF:
25918 case TYPE_MTCR:
25919 case TYPE_IDIV:
25920 case TYPE_LDIV:
25921 case TYPE_COMPARE:
25922 case TYPE_DELAYED_COMPARE:
25923 case TYPE_VAR_DELAYED_COMPARE:
25924 case TYPE_ISYNC:
25925 case TYPE_LOAD_L:
25926 case TYPE_STORE_C:
25927 case TYPE_LOAD_U:
25928 case TYPE_LOAD_UX:
25929 case TYPE_LOAD_EXT:
25930 case TYPE_LOAD_EXT_U:
25931 case TYPE_LOAD_EXT_UX:
25932 case TYPE_STORE_U:
25933 case TYPE_STORE_UX:
25934 case TYPE_FPLOAD_U:
25935 case TYPE_FPLOAD_UX:
25936 case TYPE_FPSTORE_U:
25937 case TYPE_FPSTORE_UX:
25938 case TYPE_MFJMPR:
25939 case TYPE_MTJMPR:
25940 return true;
25941 default:
25942 break;
25943 }
25944 break;
25945 case PROCESSOR_POWER8:
25946 type = get_attr_type (insn);
25947
25948 switch (type)
25949 {
25950 case TYPE_CR_LOGICAL:
25951 case TYPE_DELAYED_CR:
25952 case TYPE_MFCR:
25953 case TYPE_MFCRF:
25954 case TYPE_MTCR:
25955 case TYPE_COMPARE:
25956 case TYPE_DELAYED_COMPARE:
25957 case TYPE_VAR_DELAYED_COMPARE:
25958 case TYPE_IMUL_COMPARE:
25959 case TYPE_LMUL_COMPARE:
25960 case TYPE_SYNC:
25961 case TYPE_ISYNC:
25962 case TYPE_LOAD_L:
25963 case TYPE_STORE_C:
25964 case TYPE_LOAD_U:
25965 case TYPE_LOAD_UX:
25966 case TYPE_LOAD_EXT:
25967 case TYPE_LOAD_EXT_U:
25968 case TYPE_LOAD_EXT_UX:
25969 case TYPE_STORE_UX:
25970 case TYPE_VECSTORE:
25971 case TYPE_MFJMPR:
25972 case TYPE_MTJMPR:
25973 return true;
25974 default:
25975 break;
25976 }
25977 break;
25978 default:
25979 break;
25980 }
25981
25982 return false;
25983 }
25984
25985 static bool
25986 insn_must_be_last_in_group (rtx insn)
25987 {
25988 enum attr_type type;
25989
25990 if (!insn
25991 || NOTE_P (insn)
25992 || DEBUG_INSN_P (insn)
25993 || GET_CODE (PATTERN (insn)) == USE
25994 || GET_CODE (PATTERN (insn)) == CLOBBER)
25995 return false;
25996
25997 switch (rs6000_cpu) {
25998 case PROCESSOR_POWER4:
25999 case PROCESSOR_POWER5:
26000 if (is_microcoded_insn (insn))
26001 return true;
26002
26003 if (is_branch_slot_insn (insn))
26004 return true;
26005
26006 break;
26007 case PROCESSOR_POWER6:
26008 type = get_attr_type (insn);
26009
26010 switch (type)
26011 {
26012 case TYPE_EXTS:
26013 case TYPE_CNTLZ:
26014 case TYPE_SHIFT:
26015 case TYPE_VAR_SHIFT_ROTATE:
26016 case TYPE_TRAP:
26017 case TYPE_IMUL:
26018 case TYPE_IMUL2:
26019 case TYPE_IMUL3:
26020 case TYPE_LMUL:
26021 case TYPE_IDIV:
26022 case TYPE_DELAYED_COMPARE:
26023 case TYPE_IMUL_COMPARE:
26024 case TYPE_LMUL_COMPARE:
26025 case TYPE_FPCOMPARE:
26026 case TYPE_MFCR:
26027 case TYPE_MTCR:
26028 case TYPE_MFJMPR:
26029 case TYPE_MTJMPR:
26030 case TYPE_ISYNC:
26031 case TYPE_SYNC:
26032 case TYPE_LOAD_L:
26033 case TYPE_STORE_C:
26034 return true;
26035 default:
26036 break;
26037 }
26038 break;
26039 case PROCESSOR_POWER7:
26040 type = get_attr_type (insn);
26041
26042 switch (type)
26043 {
26044 case TYPE_ISYNC:
26045 case TYPE_SYNC:
26046 case TYPE_LOAD_L:
26047 case TYPE_STORE_C:
26048 case TYPE_LOAD_EXT_U:
26049 case TYPE_LOAD_EXT_UX:
26050 case TYPE_STORE_UX:
26051 return true;
26052 default:
26053 break;
26054 }
26055 break;
26056 case PROCESSOR_POWER8:
26057 type = get_attr_type (insn);
26058
26059 switch (type)
26060 {
26061 case TYPE_MFCR:
26062 case TYPE_MTCR:
26063 case TYPE_ISYNC:
26064 case TYPE_SYNC:
26065 case TYPE_LOAD_L:
26066 case TYPE_STORE_C:
26067 case TYPE_LOAD_EXT_U:
26068 case TYPE_LOAD_EXT_UX:
26069 case TYPE_STORE_UX:
26070 return true;
26071 default:
26072 break;
26073 }
26074 break;
26075 default:
26076 break;
26077 }
26078
26079 return false;
26080 }
26081
26082 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
26083 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
26084
26085 static bool
26086 is_costly_group (rtx *group_insns, rtx next_insn)
26087 {
26088 int i;
26089 int issue_rate = rs6000_issue_rate ();
26090
26091 for (i = 0; i < issue_rate; i++)
26092 {
26093 sd_iterator_def sd_it;
26094 dep_t dep;
26095 rtx insn = group_insns[i];
26096
26097 if (!insn)
26098 continue;
26099
26100 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
26101 {
26102 rtx next = DEP_CON (dep);
26103
26104 if (next == next_insn
26105 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
26106 return true;
26107 }
26108 }
26109
26110 return false;
26111 }
26112
26113 /* Utility of the function redefine_groups.
26114 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
26115 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
26116 to keep it "far" (in a separate group) from GROUP_INSNS, following
26117 one of the following schemes, depending on the value of the flag
26118 -minsert_sched_nops = X:
26119 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
26120 in order to force NEXT_INSN into a separate group.
26121 (2) X < sched_finish_regroup_exact: insert exactly X nops.
26122 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
26123 insertion (has a group just ended, how many vacant issue slots remain in the
26124 last group, and how many dispatch groups were encountered so far). */
26125
26126 static int
26127 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
26128 rtx next_insn, bool *group_end, int can_issue_more,
26129 int *group_count)
26130 {
26131 rtx nop;
26132 bool force;
26133 int issue_rate = rs6000_issue_rate ();
26134 bool end = *group_end;
26135 int i;
26136
26137 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
26138 return can_issue_more;
26139
26140 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
26141 return can_issue_more;
26142
26143 force = is_costly_group (group_insns, next_insn);
26144 if (!force)
26145 return can_issue_more;
26146
26147 if (sched_verbose > 6)
26148 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
26149 *group_count ,can_issue_more);
26150
26151 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
26152 {
26153 if (*group_end)
26154 can_issue_more = 0;
26155
26156 /* Since only a branch can be issued in the last issue_slot, it is
26157 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
26158 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
26159 in this case the last nop will start a new group and the branch
26160 will be forced to the new group. */
26161 if (can_issue_more && !is_branch_slot_insn (next_insn))
26162 can_issue_more--;
26163
26164 /* Do we have a special group ending nop? */
26165 if (rs6000_cpu_attr == CPU_POWER6 || rs6000_cpu_attr == CPU_POWER7
26166 || rs6000_cpu_attr == CPU_POWER8)
26167 {
26168 nop = gen_group_ending_nop ();
26169 emit_insn_before (nop, next_insn);
26170 can_issue_more = 0;
26171 }
26172 else
26173 while (can_issue_more > 0)
26174 {
26175 nop = gen_nop ();
26176 emit_insn_before (nop, next_insn);
26177 can_issue_more--;
26178 }
26179
26180 *group_end = true;
26181 return 0;
26182 }
26183
26184 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
26185 {
26186 int n_nops = rs6000_sched_insert_nops;
26187
26188 /* Nops can't be issued from the branch slot, so the effective
26189 issue_rate for nops is 'issue_rate - 1'. */
26190 if (can_issue_more == 0)
26191 can_issue_more = issue_rate;
26192 can_issue_more--;
26193 if (can_issue_more == 0)
26194 {
26195 can_issue_more = issue_rate - 1;
26196 (*group_count)++;
26197 end = true;
26198 for (i = 0; i < issue_rate; i++)
26199 {
26200 group_insns[i] = 0;
26201 }
26202 }
26203
26204 while (n_nops > 0)
26205 {
26206 nop = gen_nop ();
26207 emit_insn_before (nop, next_insn);
26208 if (can_issue_more == issue_rate - 1) /* new group begins */
26209 end = false;
26210 can_issue_more--;
26211 if (can_issue_more == 0)
26212 {
26213 can_issue_more = issue_rate - 1;
26214 (*group_count)++;
26215 end = true;
26216 for (i = 0; i < issue_rate; i++)
26217 {
26218 group_insns[i] = 0;
26219 }
26220 }
26221 n_nops--;
26222 }
26223
26224 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
26225 can_issue_more++;
26226
26227 /* Is next_insn going to start a new group? */
26228 *group_end
26229 = (end
26230 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
26231 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
26232 || (can_issue_more < issue_rate &&
26233 insn_terminates_group_p (next_insn, previous_group)));
26234 if (*group_end && end)
26235 (*group_count)--;
26236
26237 if (sched_verbose > 6)
26238 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
26239 *group_count, can_issue_more);
26240 return can_issue_more;
26241 }
26242
26243 return can_issue_more;
26244 }
26245
26246 /* This function tries to synch the dispatch groups that the compiler "sees"
26247 with the dispatch groups that the processor dispatcher is expected to
26248 form in practice. It tries to achieve this synchronization by forcing the
26249 estimated processor grouping on the compiler (as opposed to the function
26250 'pad_goups' which tries to force the scheduler's grouping on the processor).
26251
26252 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
26253 examines the (estimated) dispatch groups that will be formed by the processor
26254 dispatcher. It marks these group boundaries to reflect the estimated
26255 processor grouping, overriding the grouping that the scheduler had marked.
26256 Depending on the value of the flag '-minsert-sched-nops' this function can
26257 force certain insns into separate groups or force a certain distance between
26258 them by inserting nops, for example, if there exists a "costly dependence"
26259 between the insns.
26260
26261 The function estimates the group boundaries that the processor will form as
26262 follows: It keeps track of how many vacant issue slots are available after
26263 each insn. A subsequent insn will start a new group if one of the following
26264 4 cases applies:
26265 - no more vacant issue slots remain in the current dispatch group.
26266 - only the last issue slot, which is the branch slot, is vacant, but the next
26267 insn is not a branch.
26268 - only the last 2 or less issue slots, including the branch slot, are vacant,
26269 which means that a cracked insn (which occupies two issue slots) can't be
26270 issued in this group.
26271 - less than 'issue_rate' slots are vacant, and the next insn always needs to
26272 start a new group. */
26273
26274 static int
26275 redefine_groups (FILE *dump, int sched_verbose, rtx prev_head_insn, rtx tail)
26276 {
26277 rtx insn, next_insn;
26278 int issue_rate;
26279 int can_issue_more;
26280 int slot, i;
26281 bool group_end;
26282 int group_count = 0;
26283 rtx *group_insns;
26284
26285 /* Initialize. */
26286 issue_rate = rs6000_issue_rate ();
26287 group_insns = XALLOCAVEC (rtx, issue_rate);
26288 for (i = 0; i < issue_rate; i++)
26289 {
26290 group_insns[i] = 0;
26291 }
26292 can_issue_more = issue_rate;
26293 slot = 0;
26294 insn = get_next_active_insn (prev_head_insn, tail);
26295 group_end = false;
26296
26297 while (insn != NULL_RTX)
26298 {
26299 slot = (issue_rate - can_issue_more);
26300 group_insns[slot] = insn;
26301 can_issue_more =
26302 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
26303 if (insn_terminates_group_p (insn, current_group))
26304 can_issue_more = 0;
26305
26306 next_insn = get_next_active_insn (insn, tail);
26307 if (next_insn == NULL_RTX)
26308 return group_count + 1;
26309
26310 /* Is next_insn going to start a new group? */
26311 group_end
26312 = (can_issue_more == 0
26313 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
26314 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
26315 || (can_issue_more < issue_rate &&
26316 insn_terminates_group_p (next_insn, previous_group)));
26317
26318 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
26319 next_insn, &group_end, can_issue_more,
26320 &group_count);
26321
26322 if (group_end)
26323 {
26324 group_count++;
26325 can_issue_more = 0;
26326 for (i = 0; i < issue_rate; i++)
26327 {
26328 group_insns[i] = 0;
26329 }
26330 }
26331
26332 if (GET_MODE (next_insn) == TImode && can_issue_more)
26333 PUT_MODE (next_insn, VOIDmode);
26334 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
26335 PUT_MODE (next_insn, TImode);
26336
26337 insn = next_insn;
26338 if (can_issue_more == 0)
26339 can_issue_more = issue_rate;
26340 } /* while */
26341
26342 return group_count;
26343 }
26344
26345 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
26346 dispatch group boundaries that the scheduler had marked. Pad with nops
26347 any dispatch groups which have vacant issue slots, in order to force the
26348 scheduler's grouping on the processor dispatcher. The function
26349 returns the number of dispatch groups found. */
26350
26351 static int
26352 pad_groups (FILE *dump, int sched_verbose, rtx prev_head_insn, rtx tail)
26353 {
26354 rtx insn, next_insn;
26355 rtx nop;
26356 int issue_rate;
26357 int can_issue_more;
26358 int group_end;
26359 int group_count = 0;
26360
26361 /* Initialize issue_rate. */
26362 issue_rate = rs6000_issue_rate ();
26363 can_issue_more = issue_rate;
26364
26365 insn = get_next_active_insn (prev_head_insn, tail);
26366 next_insn = get_next_active_insn (insn, tail);
26367
26368 while (insn != NULL_RTX)
26369 {
26370 can_issue_more =
26371 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
26372
26373 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
26374
26375 if (next_insn == NULL_RTX)
26376 break;
26377
26378 if (group_end)
26379 {
26380 /* If the scheduler had marked group termination at this location
26381 (between insn and next_insn), and neither insn nor next_insn will
26382 force group termination, pad the group with nops to force group
26383 termination. */
26384 if (can_issue_more
26385 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
26386 && !insn_terminates_group_p (insn, current_group)
26387 && !insn_terminates_group_p (next_insn, previous_group))
26388 {
26389 if (!is_branch_slot_insn (next_insn))
26390 can_issue_more--;
26391
26392 while (can_issue_more)
26393 {
26394 nop = gen_nop ();
26395 emit_insn_before (nop, next_insn);
26396 can_issue_more--;
26397 }
26398 }
26399
26400 can_issue_more = issue_rate;
26401 group_count++;
26402 }
26403
26404 insn = next_insn;
26405 next_insn = get_next_active_insn (insn, tail);
26406 }
26407
26408 return group_count;
26409 }
26410
26411 /* We're beginning a new block. Initialize data structures as necessary. */
26412
26413 static void
26414 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
26415 int sched_verbose ATTRIBUTE_UNUSED,
26416 int max_ready ATTRIBUTE_UNUSED)
26417 {
26418 last_scheduled_insn = NULL_RTX;
26419 load_store_pendulum = 0;
26420 }
26421
26422 /* The following function is called at the end of scheduling BB.
26423 After reload, it inserts nops at insn group bundling. */
26424
26425 static void
26426 rs6000_sched_finish (FILE *dump, int sched_verbose)
26427 {
26428 int n_groups;
26429
26430 if (sched_verbose)
26431 fprintf (dump, "=== Finishing schedule.\n");
26432
26433 if (reload_completed && rs6000_sched_groups)
26434 {
26435 /* Do not run sched_finish hook when selective scheduling enabled. */
26436 if (sel_sched_p ())
26437 return;
26438
26439 if (rs6000_sched_insert_nops == sched_finish_none)
26440 return;
26441
26442 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
26443 n_groups = pad_groups (dump, sched_verbose,
26444 current_sched_info->prev_head,
26445 current_sched_info->next_tail);
26446 else
26447 n_groups = redefine_groups (dump, sched_verbose,
26448 current_sched_info->prev_head,
26449 current_sched_info->next_tail);
26450
26451 if (sched_verbose >= 6)
26452 {
26453 fprintf (dump, "ngroups = %d\n", n_groups);
26454 print_rtl (dump, current_sched_info->prev_head);
26455 fprintf (dump, "Done finish_sched\n");
26456 }
26457 }
26458 }
26459
26460 struct _rs6000_sched_context
26461 {
26462 short cached_can_issue_more;
26463 rtx last_scheduled_insn;
26464 int load_store_pendulum;
26465 };
26466
26467 typedef struct _rs6000_sched_context rs6000_sched_context_def;
26468 typedef rs6000_sched_context_def *rs6000_sched_context_t;
26469
26470 /* Allocate store for new scheduling context. */
26471 static void *
26472 rs6000_alloc_sched_context (void)
26473 {
26474 return xmalloc (sizeof (rs6000_sched_context_def));
26475 }
26476
26477 /* If CLEAN_P is true then initializes _SC with clean data,
26478 and from the global context otherwise. */
26479 static void
26480 rs6000_init_sched_context (void *_sc, bool clean_p)
26481 {
26482 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
26483
26484 if (clean_p)
26485 {
26486 sc->cached_can_issue_more = 0;
26487 sc->last_scheduled_insn = NULL_RTX;
26488 sc->load_store_pendulum = 0;
26489 }
26490 else
26491 {
26492 sc->cached_can_issue_more = cached_can_issue_more;
26493 sc->last_scheduled_insn = last_scheduled_insn;
26494 sc->load_store_pendulum = load_store_pendulum;
26495 }
26496 }
26497
26498 /* Sets the global scheduling context to the one pointed to by _SC. */
26499 static void
26500 rs6000_set_sched_context (void *_sc)
26501 {
26502 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
26503
26504 gcc_assert (sc != NULL);
26505
26506 cached_can_issue_more = sc->cached_can_issue_more;
26507 last_scheduled_insn = sc->last_scheduled_insn;
26508 load_store_pendulum = sc->load_store_pendulum;
26509 }
26510
26511 /* Free _SC. */
26512 static void
26513 rs6000_free_sched_context (void *_sc)
26514 {
26515 gcc_assert (_sc != NULL);
26516
26517 free (_sc);
26518 }
26519
26520 \f
26521 /* Length in units of the trampoline for entering a nested function. */
26522
26523 int
26524 rs6000_trampoline_size (void)
26525 {
26526 int ret = 0;
26527
26528 switch (DEFAULT_ABI)
26529 {
26530 default:
26531 gcc_unreachable ();
26532
26533 case ABI_AIX:
26534 ret = (TARGET_32BIT) ? 12 : 24;
26535 break;
26536
26537 case ABI_DARWIN:
26538 case ABI_V4:
26539 ret = (TARGET_32BIT) ? 40 : 48;
26540 break;
26541 }
26542
26543 return ret;
26544 }
26545
26546 /* Emit RTL insns to initialize the variable parts of a trampoline.
26547 FNADDR is an RTX for the address of the function's pure code.
26548 CXT is an RTX for the static chain value for the function. */
26549
26550 static void
26551 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
26552 {
26553 int regsize = (TARGET_32BIT) ? 4 : 8;
26554 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
26555 rtx ctx_reg = force_reg (Pmode, cxt);
26556 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
26557
26558 switch (DEFAULT_ABI)
26559 {
26560 default:
26561 gcc_unreachable ();
26562
26563 /* Under AIX, just build the 3 word function descriptor */
26564 case ABI_AIX:
26565 {
26566 rtx fnmem, fn_reg, toc_reg;
26567
26568 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
26569 error ("You cannot take the address of a nested function if you use "
26570 "the -mno-pointers-to-nested-functions option.");
26571
26572 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
26573 fn_reg = gen_reg_rtx (Pmode);
26574 toc_reg = gen_reg_rtx (Pmode);
26575
26576 /* Macro to shorten the code expansions below. */
26577 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
26578
26579 m_tramp = replace_equiv_address (m_tramp, addr);
26580
26581 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
26582 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
26583 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
26584 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
26585 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
26586
26587 # undef MEM_PLUS
26588 }
26589 break;
26590
26591 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
26592 case ABI_DARWIN:
26593 case ABI_V4:
26594 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
26595 LCT_NORMAL, VOIDmode, 4,
26596 addr, Pmode,
26597 GEN_INT (rs6000_trampoline_size ()), SImode,
26598 fnaddr, Pmode,
26599 ctx_reg, Pmode);
26600 break;
26601 }
26602 }
26603
26604 \f
26605 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
26606 identifier as an argument, so the front end shouldn't look it up. */
26607
26608 static bool
26609 rs6000_attribute_takes_identifier_p (const_tree attr_id)
26610 {
26611 return is_attribute_p ("altivec", attr_id);
26612 }
26613
26614 /* Handle the "altivec" attribute. The attribute may have
26615 arguments as follows:
26616
26617 __attribute__((altivec(vector__)))
26618 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
26619 __attribute__((altivec(bool__))) (always followed by 'unsigned')
26620
26621 and may appear more than once (e.g., 'vector bool char') in a
26622 given declaration. */
26623
26624 static tree
26625 rs6000_handle_altivec_attribute (tree *node,
26626 tree name ATTRIBUTE_UNUSED,
26627 tree args,
26628 int flags ATTRIBUTE_UNUSED,
26629 bool *no_add_attrs)
26630 {
26631 tree type = *node, result = NULL_TREE;
26632 enum machine_mode mode;
26633 int unsigned_p;
26634 char altivec_type
26635 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
26636 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
26637 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
26638 : '?');
26639
26640 while (POINTER_TYPE_P (type)
26641 || TREE_CODE (type) == FUNCTION_TYPE
26642 || TREE_CODE (type) == METHOD_TYPE
26643 || TREE_CODE (type) == ARRAY_TYPE)
26644 type = TREE_TYPE (type);
26645
26646 mode = TYPE_MODE (type);
26647
26648 /* Check for invalid AltiVec type qualifiers. */
26649 if (type == long_double_type_node)
26650 error ("use of %<long double%> in AltiVec types is invalid");
26651 else if (type == boolean_type_node)
26652 error ("use of boolean types in AltiVec types is invalid");
26653 else if (TREE_CODE (type) == COMPLEX_TYPE)
26654 error ("use of %<complex%> in AltiVec types is invalid");
26655 else if (DECIMAL_FLOAT_MODE_P (mode))
26656 error ("use of decimal floating point types in AltiVec types is invalid");
26657 else if (!TARGET_VSX)
26658 {
26659 if (type == long_unsigned_type_node || type == long_integer_type_node)
26660 {
26661 if (TARGET_64BIT)
26662 error ("use of %<long%> in AltiVec types is invalid for "
26663 "64-bit code without -mvsx");
26664 else if (rs6000_warn_altivec_long)
26665 warning (0, "use of %<long%> in AltiVec types is deprecated; "
26666 "use %<int%>");
26667 }
26668 else if (type == long_long_unsigned_type_node
26669 || type == long_long_integer_type_node)
26670 error ("use of %<long long%> in AltiVec types is invalid without "
26671 "-mvsx");
26672 else if (type == double_type_node)
26673 error ("use of %<double%> in AltiVec types is invalid without -mvsx");
26674 }
26675
26676 switch (altivec_type)
26677 {
26678 case 'v':
26679 unsigned_p = TYPE_UNSIGNED (type);
26680 switch (mode)
26681 {
26682 case DImode:
26683 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
26684 break;
26685 case SImode:
26686 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
26687 break;
26688 case HImode:
26689 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
26690 break;
26691 case QImode:
26692 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
26693 break;
26694 case SFmode: result = V4SF_type_node; break;
26695 case DFmode: result = V2DF_type_node; break;
26696 /* If the user says 'vector int bool', we may be handed the 'bool'
26697 attribute _before_ the 'vector' attribute, and so select the
26698 proper type in the 'b' case below. */
26699 case V4SImode: case V8HImode: case V16QImode: case V4SFmode:
26700 case V2DImode: case V2DFmode:
26701 result = type;
26702 default: break;
26703 }
26704 break;
26705 case 'b':
26706 switch (mode)
26707 {
26708 case DImode: case V2DImode: result = bool_V2DI_type_node; break;
26709 case SImode: case V4SImode: result = bool_V4SI_type_node; break;
26710 case HImode: case V8HImode: result = bool_V8HI_type_node; break;
26711 case QImode: case V16QImode: result = bool_V16QI_type_node;
26712 default: break;
26713 }
26714 break;
26715 case 'p':
26716 switch (mode)
26717 {
26718 case V8HImode: result = pixel_V8HI_type_node;
26719 default: break;
26720 }
26721 default: break;
26722 }
26723
26724 /* Propagate qualifiers attached to the element type
26725 onto the vector type. */
26726 if (result && result != type && TYPE_QUALS (type))
26727 result = build_qualified_type (result, TYPE_QUALS (type));
26728
26729 *no_add_attrs = true; /* No need to hang on to the attribute. */
26730
26731 if (result)
26732 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
26733
26734 return NULL_TREE;
26735 }
26736
26737 /* AltiVec defines four built-in scalar types that serve as vector
26738 elements; we must teach the compiler how to mangle them. */
26739
26740 static const char *
26741 rs6000_mangle_type (const_tree type)
26742 {
26743 type = TYPE_MAIN_VARIANT (type);
26744
26745 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
26746 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
26747 return NULL;
26748
26749 if (type == bool_char_type_node) return "U6__boolc";
26750 if (type == bool_short_type_node) return "U6__bools";
26751 if (type == pixel_type_node) return "u7__pixel";
26752 if (type == bool_int_type_node) return "U6__booli";
26753 if (type == bool_long_type_node) return "U6__booll";
26754
26755 /* Mangle IBM extended float long double as `g' (__float128) on
26756 powerpc*-linux where long-double-64 previously was the default. */
26757 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
26758 && TARGET_ELF
26759 && TARGET_LONG_DOUBLE_128
26760 && !TARGET_IEEEQUAD)
26761 return "g";
26762
26763 /* For all other types, use normal C++ mangling. */
26764 return NULL;
26765 }
26766
26767 /* Handle a "longcall" or "shortcall" attribute; arguments as in
26768 struct attribute_spec.handler. */
26769
26770 static tree
26771 rs6000_handle_longcall_attribute (tree *node, tree name,
26772 tree args ATTRIBUTE_UNUSED,
26773 int flags ATTRIBUTE_UNUSED,
26774 bool *no_add_attrs)
26775 {
26776 if (TREE_CODE (*node) != FUNCTION_TYPE
26777 && TREE_CODE (*node) != FIELD_DECL
26778 && TREE_CODE (*node) != TYPE_DECL)
26779 {
26780 warning (OPT_Wattributes, "%qE attribute only applies to functions",
26781 name);
26782 *no_add_attrs = true;
26783 }
26784
26785 return NULL_TREE;
26786 }
26787
26788 /* Set longcall attributes on all functions declared when
26789 rs6000_default_long_calls is true. */
26790 static void
26791 rs6000_set_default_type_attributes (tree type)
26792 {
26793 if (rs6000_default_long_calls
26794 && (TREE_CODE (type) == FUNCTION_TYPE
26795 || TREE_CODE (type) == METHOD_TYPE))
26796 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
26797 NULL_TREE,
26798 TYPE_ATTRIBUTES (type));
26799
26800 #if TARGET_MACHO
26801 darwin_set_default_type_attributes (type);
26802 #endif
26803 }
26804
26805 /* Return a reference suitable for calling a function with the
26806 longcall attribute. */
26807
26808 rtx
26809 rs6000_longcall_ref (rtx call_ref)
26810 {
26811 const char *call_name;
26812 tree node;
26813
26814 if (GET_CODE (call_ref) != SYMBOL_REF)
26815 return call_ref;
26816
26817 /* System V adds '.' to the internal name, so skip them. */
26818 call_name = XSTR (call_ref, 0);
26819 if (*call_name == '.')
26820 {
26821 while (*call_name == '.')
26822 call_name++;
26823
26824 node = get_identifier (call_name);
26825 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
26826 }
26827
26828 return force_reg (Pmode, call_ref);
26829 }
26830 \f
26831 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
26832 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
26833 #endif
26834
26835 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
26836 struct attribute_spec.handler. */
26837 static tree
26838 rs6000_handle_struct_attribute (tree *node, tree name,
26839 tree args ATTRIBUTE_UNUSED,
26840 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
26841 {
26842 tree *type = NULL;
26843 if (DECL_P (*node))
26844 {
26845 if (TREE_CODE (*node) == TYPE_DECL)
26846 type = &TREE_TYPE (*node);
26847 }
26848 else
26849 type = node;
26850
26851 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
26852 || TREE_CODE (*type) == UNION_TYPE)))
26853 {
26854 warning (OPT_Wattributes, "%qE attribute ignored", name);
26855 *no_add_attrs = true;
26856 }
26857
26858 else if ((is_attribute_p ("ms_struct", name)
26859 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
26860 || ((is_attribute_p ("gcc_struct", name)
26861 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
26862 {
26863 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
26864 name);
26865 *no_add_attrs = true;
26866 }
26867
26868 return NULL_TREE;
26869 }
26870
26871 static bool
26872 rs6000_ms_bitfield_layout_p (const_tree record_type)
26873 {
26874 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
26875 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
26876 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
26877 }
26878 \f
26879 #ifdef USING_ELFOS_H
26880
26881 /* A get_unnamed_section callback, used for switching to toc_section. */
26882
26883 static void
26884 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
26885 {
26886 if (DEFAULT_ABI == ABI_AIX
26887 && TARGET_MINIMAL_TOC
26888 && !TARGET_RELOCATABLE)
26889 {
26890 if (!toc_initialized)
26891 {
26892 toc_initialized = 1;
26893 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
26894 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
26895 fprintf (asm_out_file, "\t.tc ");
26896 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
26897 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
26898 fprintf (asm_out_file, "\n");
26899
26900 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
26901 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
26902 fprintf (asm_out_file, " = .+32768\n");
26903 }
26904 else
26905 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
26906 }
26907 else if (DEFAULT_ABI == ABI_AIX && !TARGET_RELOCATABLE)
26908 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
26909 else
26910 {
26911 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
26912 if (!toc_initialized)
26913 {
26914 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
26915 fprintf (asm_out_file, " = .+32768\n");
26916 toc_initialized = 1;
26917 }
26918 }
26919 }
26920
26921 /* Implement TARGET_ASM_INIT_SECTIONS. */
26922
26923 static void
26924 rs6000_elf_asm_init_sections (void)
26925 {
26926 toc_section
26927 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
26928
26929 sdata2_section
26930 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
26931 SDATA2_SECTION_ASM_OP);
26932 }
26933
26934 /* Implement TARGET_SELECT_RTX_SECTION. */
26935
26936 static section *
26937 rs6000_elf_select_rtx_section (enum machine_mode mode, rtx x,
26938 unsigned HOST_WIDE_INT align)
26939 {
26940 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
26941 return toc_section;
26942 else
26943 return default_elf_select_rtx_section (mode, x, align);
26944 }
26945 \f
26946 /* For a SYMBOL_REF, set generic flags and then perform some
26947 target-specific processing.
26948
26949 When the AIX ABI is requested on a non-AIX system, replace the
26950 function name with the real name (with a leading .) rather than the
26951 function descriptor name. This saves a lot of overriding code to
26952 read the prefixes. */
26953
26954 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
26955 static void
26956 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
26957 {
26958 default_encode_section_info (decl, rtl, first);
26959
26960 if (first
26961 && TREE_CODE (decl) == FUNCTION_DECL
26962 && !TARGET_AIX
26963 && DEFAULT_ABI == ABI_AIX)
26964 {
26965 rtx sym_ref = XEXP (rtl, 0);
26966 size_t len = strlen (XSTR (sym_ref, 0));
26967 char *str = XALLOCAVEC (char, len + 2);
26968 str[0] = '.';
26969 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
26970 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
26971 }
26972 }
26973
26974 static inline bool
26975 compare_section_name (const char *section, const char *templ)
26976 {
26977 int len;
26978
26979 len = strlen (templ);
26980 return (strncmp (section, templ, len) == 0
26981 && (section[len] == 0 || section[len] == '.'));
26982 }
26983
26984 bool
26985 rs6000_elf_in_small_data_p (const_tree decl)
26986 {
26987 if (rs6000_sdata == SDATA_NONE)
26988 return false;
26989
26990 /* We want to merge strings, so we never consider them small data. */
26991 if (TREE_CODE (decl) == STRING_CST)
26992 return false;
26993
26994 /* Functions are never in the small data area. */
26995 if (TREE_CODE (decl) == FUNCTION_DECL)
26996 return false;
26997
26998 if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
26999 {
27000 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (decl));
27001 if (compare_section_name (section, ".sdata")
27002 || compare_section_name (section, ".sdata2")
27003 || compare_section_name (section, ".gnu.linkonce.s")
27004 || compare_section_name (section, ".sbss")
27005 || compare_section_name (section, ".sbss2")
27006 || compare_section_name (section, ".gnu.linkonce.sb")
27007 || strcmp (section, ".PPC.EMB.sdata0") == 0
27008 || strcmp (section, ".PPC.EMB.sbss0") == 0)
27009 return true;
27010 }
27011 else
27012 {
27013 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
27014
27015 if (size > 0
27016 && size <= g_switch_value
27017 /* If it's not public, and we're not going to reference it there,
27018 there's no need to put it in the small data section. */
27019 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
27020 return true;
27021 }
27022
27023 return false;
27024 }
27025
27026 #endif /* USING_ELFOS_H */
27027 \f
27028 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
27029
27030 static bool
27031 rs6000_use_blocks_for_constant_p (enum machine_mode mode, const_rtx x)
27032 {
27033 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
27034 }
27035
27036 /* Do not place thread-local symbols refs in the object blocks. */
27037
27038 static bool
27039 rs6000_use_blocks_for_decl_p (const_tree decl)
27040 {
27041 return !DECL_THREAD_LOCAL_P (decl);
27042 }
27043 \f
27044 /* Return a REG that occurs in ADDR with coefficient 1.
27045 ADDR can be effectively incremented by incrementing REG.
27046
27047 r0 is special and we must not select it as an address
27048 register by this routine since our caller will try to
27049 increment the returned register via an "la" instruction. */
27050
27051 rtx
27052 find_addr_reg (rtx addr)
27053 {
27054 while (GET_CODE (addr) == PLUS)
27055 {
27056 if (GET_CODE (XEXP (addr, 0)) == REG
27057 && REGNO (XEXP (addr, 0)) != 0)
27058 addr = XEXP (addr, 0);
27059 else if (GET_CODE (XEXP (addr, 1)) == REG
27060 && REGNO (XEXP (addr, 1)) != 0)
27061 addr = XEXP (addr, 1);
27062 else if (CONSTANT_P (XEXP (addr, 0)))
27063 addr = XEXP (addr, 1);
27064 else if (CONSTANT_P (XEXP (addr, 1)))
27065 addr = XEXP (addr, 0);
27066 else
27067 gcc_unreachable ();
27068 }
27069 gcc_assert (GET_CODE (addr) == REG && REGNO (addr) != 0);
27070 return addr;
27071 }
27072
27073 void
27074 rs6000_fatal_bad_address (rtx op)
27075 {
27076 fatal_insn ("bad address", op);
27077 }
27078
27079 #if TARGET_MACHO
27080
27081 typedef struct branch_island_d {
27082 tree function_name;
27083 tree label_name;
27084 int line_number;
27085 } branch_island;
27086
27087
27088 static vec<branch_island, va_gc> *branch_islands;
27089
27090 /* Remember to generate a branch island for far calls to the given
27091 function. */
27092
27093 static void
27094 add_compiler_branch_island (tree label_name, tree function_name,
27095 int line_number)
27096 {
27097 branch_island bi = {function_name, label_name, line_number};
27098 vec_safe_push (branch_islands, bi);
27099 }
27100
27101 /* Generate far-jump branch islands for everything recorded in
27102 branch_islands. Invoked immediately after the last instruction of
27103 the epilogue has been emitted; the branch islands must be appended
27104 to, and contiguous with, the function body. Mach-O stubs are
27105 generated in machopic_output_stub(). */
27106
27107 static void
27108 macho_branch_islands (void)
27109 {
27110 char tmp_buf[512];
27111
27112 while (!vec_safe_is_empty (branch_islands))
27113 {
27114 branch_island *bi = &branch_islands->last ();
27115 const char *label = IDENTIFIER_POINTER (bi->label_name);
27116 const char *name = IDENTIFIER_POINTER (bi->function_name);
27117 char name_buf[512];
27118 /* Cheap copy of the details from the Darwin ASM_OUTPUT_LABELREF(). */
27119 if (name[0] == '*' || name[0] == '&')
27120 strcpy (name_buf, name+1);
27121 else
27122 {
27123 name_buf[0] = '_';
27124 strcpy (name_buf+1, name);
27125 }
27126 strcpy (tmp_buf, "\n");
27127 strcat (tmp_buf, label);
27128 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
27129 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
27130 dbxout_stabd (N_SLINE, bi->line_number);
27131 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
27132 if (flag_pic)
27133 {
27134 if (TARGET_LINK_STACK)
27135 {
27136 char name[32];
27137 get_ppc476_thunk_name (name);
27138 strcat (tmp_buf, ":\n\tmflr r0\n\tbl ");
27139 strcat (tmp_buf, name);
27140 strcat (tmp_buf, "\n");
27141 strcat (tmp_buf, label);
27142 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
27143 }
27144 else
27145 {
27146 strcat (tmp_buf, ":\n\tmflr r0\n\tbcl 20,31,");
27147 strcat (tmp_buf, label);
27148 strcat (tmp_buf, "_pic\n");
27149 strcat (tmp_buf, label);
27150 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
27151 }
27152
27153 strcat (tmp_buf, "\taddis r11,r11,ha16(");
27154 strcat (tmp_buf, name_buf);
27155 strcat (tmp_buf, " - ");
27156 strcat (tmp_buf, label);
27157 strcat (tmp_buf, "_pic)\n");
27158
27159 strcat (tmp_buf, "\tmtlr r0\n");
27160
27161 strcat (tmp_buf, "\taddi r12,r11,lo16(");
27162 strcat (tmp_buf, name_buf);
27163 strcat (tmp_buf, " - ");
27164 strcat (tmp_buf, label);
27165 strcat (tmp_buf, "_pic)\n");
27166
27167 strcat (tmp_buf, "\tmtctr r12\n\tbctr\n");
27168 }
27169 else
27170 {
27171 strcat (tmp_buf, ":\nlis r12,hi16(");
27172 strcat (tmp_buf, name_buf);
27173 strcat (tmp_buf, ")\n\tori r12,r12,lo16(");
27174 strcat (tmp_buf, name_buf);
27175 strcat (tmp_buf, ")\n\tmtctr r12\n\tbctr");
27176 }
27177 output_asm_insn (tmp_buf, 0);
27178 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
27179 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
27180 dbxout_stabd (N_SLINE, bi->line_number);
27181 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
27182 branch_islands->pop ();
27183 }
27184 }
27185
27186 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
27187 already there or not. */
27188
27189 static int
27190 no_previous_def (tree function_name)
27191 {
27192 branch_island *bi;
27193 unsigned ix;
27194
27195 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
27196 if (function_name == bi->function_name)
27197 return 0;
27198 return 1;
27199 }
27200
27201 /* GET_PREV_LABEL gets the label name from the previous definition of
27202 the function. */
27203
27204 static tree
27205 get_prev_label (tree function_name)
27206 {
27207 branch_island *bi;
27208 unsigned ix;
27209
27210 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
27211 if (function_name == bi->function_name)
27212 return bi->label_name;
27213 return NULL_TREE;
27214 }
27215
27216 /* INSN is either a function call or a millicode call. It may have an
27217 unconditional jump in its delay slot.
27218
27219 CALL_DEST is the routine we are calling. */
27220
27221 char *
27222 output_call (rtx insn, rtx *operands, int dest_operand_number,
27223 int cookie_operand_number)
27224 {
27225 static char buf[256];
27226 if (darwin_emit_branch_islands
27227 && GET_CODE (operands[dest_operand_number]) == SYMBOL_REF
27228 && (INTVAL (operands[cookie_operand_number]) & CALL_LONG))
27229 {
27230 tree labelname;
27231 tree funname = get_identifier (XSTR (operands[dest_operand_number], 0));
27232
27233 if (no_previous_def (funname))
27234 {
27235 rtx label_rtx = gen_label_rtx ();
27236 char *label_buf, temp_buf[256];
27237 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
27238 CODE_LABEL_NUMBER (label_rtx));
27239 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
27240 labelname = get_identifier (label_buf);
27241 add_compiler_branch_island (labelname, funname, insn_line (insn));
27242 }
27243 else
27244 labelname = get_prev_label (funname);
27245
27246 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
27247 instruction will reach 'foo', otherwise link as 'bl L42'".
27248 "L42" should be a 'branch island', that will do a far jump to
27249 'foo'. Branch islands are generated in
27250 macho_branch_islands(). */
27251 sprintf (buf, "jbsr %%z%d,%.246s",
27252 dest_operand_number, IDENTIFIER_POINTER (labelname));
27253 }
27254 else
27255 sprintf (buf, "bl %%z%d", dest_operand_number);
27256 return buf;
27257 }
27258
27259 /* Generate PIC and indirect symbol stubs. */
27260
27261 void
27262 machopic_output_stub (FILE *file, const char *symb, const char *stub)
27263 {
27264 unsigned int length;
27265 char *symbol_name, *lazy_ptr_name;
27266 char *local_label_0;
27267 static int label = 0;
27268
27269 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
27270 symb = (*targetm.strip_name_encoding) (symb);
27271
27272
27273 length = strlen (symb);
27274 symbol_name = XALLOCAVEC (char, length + 32);
27275 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
27276
27277 lazy_ptr_name = XALLOCAVEC (char, length + 32);
27278 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
27279
27280 if (flag_pic == 2)
27281 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
27282 else
27283 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
27284
27285 if (flag_pic == 2)
27286 {
27287 fprintf (file, "\t.align 5\n");
27288
27289 fprintf (file, "%s:\n", stub);
27290 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
27291
27292 label++;
27293 local_label_0 = XALLOCAVEC (char, sizeof ("\"L00000000000$spb\""));
27294 sprintf (local_label_0, "\"L%011d$spb\"", label);
27295
27296 fprintf (file, "\tmflr r0\n");
27297 if (TARGET_LINK_STACK)
27298 {
27299 char name[32];
27300 get_ppc476_thunk_name (name);
27301 fprintf (file, "\tbl %s\n", name);
27302 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
27303 }
27304 else
27305 {
27306 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
27307 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
27308 }
27309 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
27310 lazy_ptr_name, local_label_0);
27311 fprintf (file, "\tmtlr r0\n");
27312 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
27313 (TARGET_64BIT ? "ldu" : "lwzu"),
27314 lazy_ptr_name, local_label_0);
27315 fprintf (file, "\tmtctr r12\n");
27316 fprintf (file, "\tbctr\n");
27317 }
27318 else
27319 {
27320 fprintf (file, "\t.align 4\n");
27321
27322 fprintf (file, "%s:\n", stub);
27323 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
27324
27325 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
27326 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
27327 (TARGET_64BIT ? "ldu" : "lwzu"),
27328 lazy_ptr_name);
27329 fprintf (file, "\tmtctr r12\n");
27330 fprintf (file, "\tbctr\n");
27331 }
27332
27333 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
27334 fprintf (file, "%s:\n", lazy_ptr_name);
27335 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
27336 fprintf (file, "%sdyld_stub_binding_helper\n",
27337 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
27338 }
27339
27340 /* Legitimize PIC addresses. If the address is already
27341 position-independent, we return ORIG. Newly generated
27342 position-independent addresses go into a reg. This is REG if non
27343 zero, otherwise we allocate register(s) as necessary. */
27344
27345 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
27346
27347 rtx
27348 rs6000_machopic_legitimize_pic_address (rtx orig, enum machine_mode mode,
27349 rtx reg)
27350 {
27351 rtx base, offset;
27352
27353 if (reg == NULL && ! reload_in_progress && ! reload_completed)
27354 reg = gen_reg_rtx (Pmode);
27355
27356 if (GET_CODE (orig) == CONST)
27357 {
27358 rtx reg_temp;
27359
27360 if (GET_CODE (XEXP (orig, 0)) == PLUS
27361 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
27362 return orig;
27363
27364 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
27365
27366 /* Use a different reg for the intermediate value, as
27367 it will be marked UNCHANGING. */
27368 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
27369 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
27370 Pmode, reg_temp);
27371 offset =
27372 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
27373 Pmode, reg);
27374
27375 if (GET_CODE (offset) == CONST_INT)
27376 {
27377 if (SMALL_INT (offset))
27378 return plus_constant (Pmode, base, INTVAL (offset));
27379 else if (! reload_in_progress && ! reload_completed)
27380 offset = force_reg (Pmode, offset);
27381 else
27382 {
27383 rtx mem = force_const_mem (Pmode, orig);
27384 return machopic_legitimize_pic_address (mem, Pmode, reg);
27385 }
27386 }
27387 return gen_rtx_PLUS (Pmode, base, offset);
27388 }
27389
27390 /* Fall back on generic machopic code. */
27391 return machopic_legitimize_pic_address (orig, mode, reg);
27392 }
27393
27394 /* Output a .machine directive for the Darwin assembler, and call
27395 the generic start_file routine. */
27396
27397 static void
27398 rs6000_darwin_file_start (void)
27399 {
27400 static const struct
27401 {
27402 const char *arg;
27403 const char *name;
27404 HOST_WIDE_INT if_set;
27405 } mapping[] = {
27406 { "ppc64", "ppc64", MASK_64BIT },
27407 { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 },
27408 { "power4", "ppc970", 0 },
27409 { "G5", "ppc970", 0 },
27410 { "7450", "ppc7450", 0 },
27411 { "7400", "ppc7400", MASK_ALTIVEC },
27412 { "G4", "ppc7400", 0 },
27413 { "750", "ppc750", 0 },
27414 { "740", "ppc750", 0 },
27415 { "G3", "ppc750", 0 },
27416 { "604e", "ppc604e", 0 },
27417 { "604", "ppc604", 0 },
27418 { "603e", "ppc603", 0 },
27419 { "603", "ppc603", 0 },
27420 { "601", "ppc601", 0 },
27421 { NULL, "ppc", 0 } };
27422 const char *cpu_id = "";
27423 size_t i;
27424
27425 rs6000_file_start ();
27426 darwin_file_start ();
27427
27428 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
27429
27430 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
27431 cpu_id = rs6000_default_cpu;
27432
27433 if (global_options_set.x_rs6000_cpu_index)
27434 cpu_id = processor_target_table[rs6000_cpu_index].name;
27435
27436 /* Look through the mapping array. Pick the first name that either
27437 matches the argument, has a bit set in IF_SET that is also set
27438 in the target flags, or has a NULL name. */
27439
27440 i = 0;
27441 while (mapping[i].arg != NULL
27442 && strcmp (mapping[i].arg, cpu_id) != 0
27443 && (mapping[i].if_set & rs6000_isa_flags) == 0)
27444 i++;
27445
27446 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
27447 }
27448
27449 #endif /* TARGET_MACHO */
27450
27451 #if TARGET_ELF
27452 static int
27453 rs6000_elf_reloc_rw_mask (void)
27454 {
27455 if (flag_pic)
27456 return 3;
27457 else if (DEFAULT_ABI == ABI_AIX)
27458 return 2;
27459 else
27460 return 0;
27461 }
27462
27463 /* Record an element in the table of global constructors. SYMBOL is
27464 a SYMBOL_REF of the function to be called; PRIORITY is a number
27465 between 0 and MAX_INIT_PRIORITY.
27466
27467 This differs from default_named_section_asm_out_constructor in
27468 that we have special handling for -mrelocatable. */
27469
27470 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
27471 static void
27472 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
27473 {
27474 const char *section = ".ctors";
27475 char buf[16];
27476
27477 if (priority != DEFAULT_INIT_PRIORITY)
27478 {
27479 sprintf (buf, ".ctors.%.5u",
27480 /* Invert the numbering so the linker puts us in the proper
27481 order; constructors are run from right to left, and the
27482 linker sorts in increasing order. */
27483 MAX_INIT_PRIORITY - priority);
27484 section = buf;
27485 }
27486
27487 switch_to_section (get_section (section, SECTION_WRITE, NULL));
27488 assemble_align (POINTER_SIZE);
27489
27490 if (TARGET_RELOCATABLE)
27491 {
27492 fputs ("\t.long (", asm_out_file);
27493 output_addr_const (asm_out_file, symbol);
27494 fputs (")@fixup\n", asm_out_file);
27495 }
27496 else
27497 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
27498 }
27499
27500 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
27501 static void
27502 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
27503 {
27504 const char *section = ".dtors";
27505 char buf[16];
27506
27507 if (priority != DEFAULT_INIT_PRIORITY)
27508 {
27509 sprintf (buf, ".dtors.%.5u",
27510 /* Invert the numbering so the linker puts us in the proper
27511 order; constructors are run from right to left, and the
27512 linker sorts in increasing order. */
27513 MAX_INIT_PRIORITY - priority);
27514 section = buf;
27515 }
27516
27517 switch_to_section (get_section (section, SECTION_WRITE, NULL));
27518 assemble_align (POINTER_SIZE);
27519
27520 if (TARGET_RELOCATABLE)
27521 {
27522 fputs ("\t.long (", asm_out_file);
27523 output_addr_const (asm_out_file, symbol);
27524 fputs (")@fixup\n", asm_out_file);
27525 }
27526 else
27527 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
27528 }
27529
27530 void
27531 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
27532 {
27533 if (TARGET_64BIT)
27534 {
27535 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
27536 ASM_OUTPUT_LABEL (file, name);
27537 fputs (DOUBLE_INT_ASM_OP, file);
27538 rs6000_output_function_entry (file, name);
27539 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
27540 if (DOT_SYMBOLS)
27541 {
27542 fputs ("\t.size\t", file);
27543 assemble_name (file, name);
27544 fputs (",24\n\t.type\t.", file);
27545 assemble_name (file, name);
27546 fputs (",@function\n", file);
27547 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
27548 {
27549 fputs ("\t.globl\t.", file);
27550 assemble_name (file, name);
27551 putc ('\n', file);
27552 }
27553 }
27554 else
27555 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
27556 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
27557 rs6000_output_function_entry (file, name);
27558 fputs (":\n", file);
27559 return;
27560 }
27561
27562 if (TARGET_RELOCATABLE
27563 && !TARGET_SECURE_PLT
27564 && (get_pool_size () != 0 || crtl->profile)
27565 && uses_TOC ())
27566 {
27567 char buf[256];
27568
27569 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
27570
27571 ASM_GENERATE_INTERNAL_LABEL (buf, "LCTOC", 1);
27572 fprintf (file, "\t.long ");
27573 assemble_name (file, buf);
27574 putc ('-', file);
27575 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
27576 assemble_name (file, buf);
27577 putc ('\n', file);
27578 }
27579
27580 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
27581 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
27582
27583 if (DEFAULT_ABI == ABI_AIX)
27584 {
27585 const char *desc_name, *orig_name;
27586
27587 orig_name = (*targetm.strip_name_encoding) (name);
27588 desc_name = orig_name;
27589 while (*desc_name == '.')
27590 desc_name++;
27591
27592 if (TREE_PUBLIC (decl))
27593 fprintf (file, "\t.globl %s\n", desc_name);
27594
27595 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
27596 fprintf (file, "%s:\n", desc_name);
27597 fprintf (file, "\t.long %s\n", orig_name);
27598 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
27599 if (DEFAULT_ABI == ABI_AIX)
27600 fputs ("\t.long 0\n", file);
27601 fprintf (file, "\t.previous\n");
27602 }
27603 ASM_OUTPUT_LABEL (file, name);
27604 }
27605
27606 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
27607 static void
27608 rs6000_elf_file_end (void)
27609 {
27610 #ifdef HAVE_AS_GNU_ATTRIBUTE
27611 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
27612 {
27613 if (rs6000_passes_float)
27614 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n",
27615 ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT) ? 1
27616 : (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT) ? 3
27617 : 2));
27618 if (rs6000_passes_vector)
27619 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
27620 (TARGET_ALTIVEC_ABI ? 2
27621 : TARGET_SPE_ABI ? 3
27622 : 1));
27623 if (rs6000_returns_struct)
27624 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
27625 aix_struct_return ? 2 : 1);
27626 }
27627 #endif
27628 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
27629 if (TARGET_32BIT)
27630 file_end_indicate_exec_stack ();
27631 #endif
27632 }
27633 #endif
27634
27635 #if TARGET_XCOFF
27636 static void
27637 rs6000_xcoff_asm_output_anchor (rtx symbol)
27638 {
27639 char buffer[100];
27640
27641 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
27642 SYMBOL_REF_BLOCK_OFFSET (symbol));
27643 ASM_OUTPUT_DEF (asm_out_file, XSTR (symbol, 0), buffer);
27644 }
27645
27646 static void
27647 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
27648 {
27649 fputs (GLOBAL_ASM_OP, stream);
27650 RS6000_OUTPUT_BASENAME (stream, name);
27651 putc ('\n', stream);
27652 }
27653
27654 /* A get_unnamed_decl callback, used for read-only sections. PTR
27655 points to the section string variable. */
27656
27657 static void
27658 rs6000_xcoff_output_readonly_section_asm_op (const void *directive)
27659 {
27660 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
27661 *(const char *const *) directive,
27662 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
27663 }
27664
27665 /* Likewise for read-write sections. */
27666
27667 static void
27668 rs6000_xcoff_output_readwrite_section_asm_op (const void *directive)
27669 {
27670 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
27671 *(const char *const *) directive,
27672 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
27673 }
27674
27675 static void
27676 rs6000_xcoff_output_tls_section_asm_op (const void *directive)
27677 {
27678 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
27679 *(const char *const *) directive,
27680 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
27681 }
27682
27683 /* A get_unnamed_section callback, used for switching to toc_section. */
27684
27685 static void
27686 rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
27687 {
27688 if (TARGET_MINIMAL_TOC)
27689 {
27690 /* toc_section is always selected at least once from
27691 rs6000_xcoff_file_start, so this is guaranteed to
27692 always be defined once and only once in each file. */
27693 if (!toc_initialized)
27694 {
27695 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
27696 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
27697 toc_initialized = 1;
27698 }
27699 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
27700 (TARGET_32BIT ? "" : ",3"));
27701 }
27702 else
27703 fputs ("\t.toc\n", asm_out_file);
27704 }
27705
27706 /* Implement TARGET_ASM_INIT_SECTIONS. */
27707
27708 static void
27709 rs6000_xcoff_asm_init_sections (void)
27710 {
27711 read_only_data_section
27712 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
27713 &xcoff_read_only_section_name);
27714
27715 private_data_section
27716 = get_unnamed_section (SECTION_WRITE,
27717 rs6000_xcoff_output_readwrite_section_asm_op,
27718 &xcoff_private_data_section_name);
27719
27720 tls_data_section
27721 = get_unnamed_section (SECTION_TLS,
27722 rs6000_xcoff_output_tls_section_asm_op,
27723 &xcoff_tls_data_section_name);
27724
27725 tls_private_data_section
27726 = get_unnamed_section (SECTION_TLS,
27727 rs6000_xcoff_output_tls_section_asm_op,
27728 &xcoff_private_data_section_name);
27729
27730 read_only_private_data_section
27731 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
27732 &xcoff_private_data_section_name);
27733
27734 toc_section
27735 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
27736
27737 readonly_data_section = read_only_data_section;
27738 exception_section = data_section;
27739 }
27740
27741 static int
27742 rs6000_xcoff_reloc_rw_mask (void)
27743 {
27744 return 3;
27745 }
27746
27747 static void
27748 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
27749 tree decl ATTRIBUTE_UNUSED)
27750 {
27751 int smclass;
27752 static const char * const suffix[4] = { "PR", "RO", "RW", "TL" };
27753
27754 if (flags & SECTION_CODE)
27755 smclass = 0;
27756 else if (flags & SECTION_TLS)
27757 smclass = 3;
27758 else if (flags & SECTION_WRITE)
27759 smclass = 2;
27760 else
27761 smclass = 1;
27762
27763 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
27764 (flags & SECTION_CODE) ? "." : "",
27765 name, suffix[smclass], flags & SECTION_ENTSIZE);
27766 }
27767
27768 static section *
27769 rs6000_xcoff_select_section (tree decl, int reloc,
27770 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
27771 {
27772 if (decl_readonly_section (decl, reloc))
27773 {
27774 if (TREE_PUBLIC (decl))
27775 return read_only_data_section;
27776 else
27777 return read_only_private_data_section;
27778 }
27779 else
27780 {
27781 #if HAVE_AS_TLS
27782 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
27783 {
27784 if (TREE_PUBLIC (decl))
27785 return tls_data_section;
27786 else if (bss_initializer_p (decl))
27787 {
27788 /* Convert to COMMON to emit in BSS. */
27789 DECL_COMMON (decl) = 1;
27790 return tls_comm_section;
27791 }
27792 else
27793 return tls_private_data_section;
27794 }
27795 else
27796 #endif
27797 if (TREE_PUBLIC (decl))
27798 return data_section;
27799 else
27800 return private_data_section;
27801 }
27802 }
27803
27804 static void
27805 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
27806 {
27807 const char *name;
27808
27809 /* Use select_section for private and uninitialized data. */
27810 if (!TREE_PUBLIC (decl)
27811 || DECL_COMMON (decl)
27812 || DECL_INITIAL (decl) == NULL_TREE
27813 || DECL_INITIAL (decl) == error_mark_node
27814 || (flag_zero_initialized_in_bss
27815 && initializer_zerop (DECL_INITIAL (decl))))
27816 return;
27817
27818 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
27819 name = (*targetm.strip_name_encoding) (name);
27820 DECL_SECTION_NAME (decl) = build_string (strlen (name), name);
27821 }
27822
27823 /* Select section for constant in constant pool.
27824
27825 On RS/6000, all constants are in the private read-only data area.
27826 However, if this is being placed in the TOC it must be output as a
27827 toc entry. */
27828
27829 static section *
27830 rs6000_xcoff_select_rtx_section (enum machine_mode mode, rtx x,
27831 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
27832 {
27833 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
27834 return toc_section;
27835 else
27836 return read_only_private_data_section;
27837 }
27838
27839 /* Remove any trailing [DS] or the like from the symbol name. */
27840
27841 static const char *
27842 rs6000_xcoff_strip_name_encoding (const char *name)
27843 {
27844 size_t len;
27845 if (*name == '*')
27846 name++;
27847 len = strlen (name);
27848 if (name[len - 1] == ']')
27849 return ggc_alloc_string (name, len - 4);
27850 else
27851 return name;
27852 }
27853
27854 /* Section attributes. AIX is always PIC. */
27855
27856 static unsigned int
27857 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
27858 {
27859 unsigned int align;
27860 unsigned int flags = default_section_type_flags (decl, name, reloc);
27861
27862 /* Align to at least UNIT size. */
27863 if ((flags & SECTION_CODE) != 0 || !decl || !DECL_P (decl))
27864 align = MIN_UNITS_PER_WORD;
27865 else
27866 /* Increase alignment of large objects if not already stricter. */
27867 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
27868 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
27869 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
27870
27871 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
27872 }
27873
27874 /* Output at beginning of assembler file.
27875
27876 Initialize the section names for the RS/6000 at this point.
27877
27878 Specify filename, including full path, to assembler.
27879
27880 We want to go into the TOC section so at least one .toc will be emitted.
27881 Also, in order to output proper .bs/.es pairs, we need at least one static
27882 [RW] section emitted.
27883
27884 Finally, declare mcount when profiling to make the assembler happy. */
27885
27886 static void
27887 rs6000_xcoff_file_start (void)
27888 {
27889 rs6000_gen_section_name (&xcoff_bss_section_name,
27890 main_input_filename, ".bss_");
27891 rs6000_gen_section_name (&xcoff_private_data_section_name,
27892 main_input_filename, ".rw_");
27893 rs6000_gen_section_name (&xcoff_read_only_section_name,
27894 main_input_filename, ".ro_");
27895 rs6000_gen_section_name (&xcoff_tls_data_section_name,
27896 main_input_filename, ".tls_");
27897 rs6000_gen_section_name (&xcoff_tbss_section_name,
27898 main_input_filename, ".tbss_[UL]");
27899
27900 fputs ("\t.file\t", asm_out_file);
27901 output_quoted_string (asm_out_file, main_input_filename);
27902 fputc ('\n', asm_out_file);
27903 if (write_symbols != NO_DEBUG)
27904 switch_to_section (private_data_section);
27905 switch_to_section (text_section);
27906 if (profile_flag)
27907 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
27908 rs6000_file_start ();
27909 }
27910
27911 /* Output at end of assembler file.
27912 On the RS/6000, referencing data should automatically pull in text. */
27913
27914 static void
27915 rs6000_xcoff_file_end (void)
27916 {
27917 switch_to_section (text_section);
27918 fputs ("_section_.text:\n", asm_out_file);
27919 switch_to_section (data_section);
27920 fputs (TARGET_32BIT
27921 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
27922 asm_out_file);
27923 }
27924
27925 #ifdef HAVE_AS_TLS
27926 static void
27927 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
27928 {
27929 rtx symbol;
27930 int flags;
27931
27932 default_encode_section_info (decl, rtl, first);
27933
27934 /* Careful not to prod global register variables. */
27935 if (!MEM_P (rtl))
27936 return;
27937 symbol = XEXP (rtl, 0);
27938 if (GET_CODE (symbol) != SYMBOL_REF)
27939 return;
27940
27941 flags = SYMBOL_REF_FLAGS (symbol);
27942
27943 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
27944 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
27945
27946 SYMBOL_REF_FLAGS (symbol) = flags;
27947 }
27948 #endif /* HAVE_AS_TLS */
27949 #endif /* TARGET_XCOFF */
27950
27951 /* Compute a (partial) cost for rtx X. Return true if the complete
27952 cost has been computed, and false if subexpressions should be
27953 scanned. In either case, *TOTAL contains the cost result. */
27954
27955 static bool
27956 rs6000_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
27957 int *total, bool speed)
27958 {
27959 enum machine_mode mode = GET_MODE (x);
27960
27961 switch (code)
27962 {
27963 /* On the RS/6000, if it is valid in the insn, it is free. */
27964 case CONST_INT:
27965 if (((outer_code == SET
27966 || outer_code == PLUS
27967 || outer_code == MINUS)
27968 && (satisfies_constraint_I (x)
27969 || satisfies_constraint_L (x)))
27970 || (outer_code == AND
27971 && (satisfies_constraint_K (x)
27972 || (mode == SImode
27973 ? satisfies_constraint_L (x)
27974 : satisfies_constraint_J (x))
27975 || mask_operand (x, mode)
27976 || (mode == DImode
27977 && mask64_operand (x, DImode))))
27978 || ((outer_code == IOR || outer_code == XOR)
27979 && (satisfies_constraint_K (x)
27980 || (mode == SImode
27981 ? satisfies_constraint_L (x)
27982 : satisfies_constraint_J (x))))
27983 || outer_code == ASHIFT
27984 || outer_code == ASHIFTRT
27985 || outer_code == LSHIFTRT
27986 || outer_code == ROTATE
27987 || outer_code == ROTATERT
27988 || outer_code == ZERO_EXTRACT
27989 || (outer_code == MULT
27990 && satisfies_constraint_I (x))
27991 || ((outer_code == DIV || outer_code == UDIV
27992 || outer_code == MOD || outer_code == UMOD)
27993 && exact_log2 (INTVAL (x)) >= 0)
27994 || (outer_code == COMPARE
27995 && (satisfies_constraint_I (x)
27996 || satisfies_constraint_K (x)))
27997 || ((outer_code == EQ || outer_code == NE)
27998 && (satisfies_constraint_I (x)
27999 || satisfies_constraint_K (x)
28000 || (mode == SImode
28001 ? satisfies_constraint_L (x)
28002 : satisfies_constraint_J (x))))
28003 || (outer_code == GTU
28004 && satisfies_constraint_I (x))
28005 || (outer_code == LTU
28006 && satisfies_constraint_P (x)))
28007 {
28008 *total = 0;
28009 return true;
28010 }
28011 else if ((outer_code == PLUS
28012 && reg_or_add_cint_operand (x, VOIDmode))
28013 || (outer_code == MINUS
28014 && reg_or_sub_cint_operand (x, VOIDmode))
28015 || ((outer_code == SET
28016 || outer_code == IOR
28017 || outer_code == XOR)
28018 && (INTVAL (x)
28019 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
28020 {
28021 *total = COSTS_N_INSNS (1);
28022 return true;
28023 }
28024 /* FALLTHRU */
28025
28026 case CONST_DOUBLE:
28027 case CONST:
28028 case HIGH:
28029 case SYMBOL_REF:
28030 case MEM:
28031 /* When optimizing for size, MEM should be slightly more expensive
28032 than generating address, e.g., (plus (reg) (const)).
28033 L1 cache latency is about two instructions. */
28034 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
28035 return true;
28036
28037 case LABEL_REF:
28038 *total = 0;
28039 return true;
28040
28041 case PLUS:
28042 case MINUS:
28043 if (FLOAT_MODE_P (mode))
28044 *total = rs6000_cost->fp;
28045 else
28046 *total = COSTS_N_INSNS (1);
28047 return false;
28048
28049 case MULT:
28050 if (GET_CODE (XEXP (x, 1)) == CONST_INT
28051 && satisfies_constraint_I (XEXP (x, 1)))
28052 {
28053 if (INTVAL (XEXP (x, 1)) >= -256
28054 && INTVAL (XEXP (x, 1)) <= 255)
28055 *total = rs6000_cost->mulsi_const9;
28056 else
28057 *total = rs6000_cost->mulsi_const;
28058 }
28059 else if (mode == SFmode)
28060 *total = rs6000_cost->fp;
28061 else if (FLOAT_MODE_P (mode))
28062 *total = rs6000_cost->dmul;
28063 else if (mode == DImode)
28064 *total = rs6000_cost->muldi;
28065 else
28066 *total = rs6000_cost->mulsi;
28067 return false;
28068
28069 case FMA:
28070 if (mode == SFmode)
28071 *total = rs6000_cost->fp;
28072 else
28073 *total = rs6000_cost->dmul;
28074 break;
28075
28076 case DIV:
28077 case MOD:
28078 if (FLOAT_MODE_P (mode))
28079 {
28080 *total = mode == DFmode ? rs6000_cost->ddiv
28081 : rs6000_cost->sdiv;
28082 return false;
28083 }
28084 /* FALLTHRU */
28085
28086 case UDIV:
28087 case UMOD:
28088 if (GET_CODE (XEXP (x, 1)) == CONST_INT
28089 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
28090 {
28091 if (code == DIV || code == MOD)
28092 /* Shift, addze */
28093 *total = COSTS_N_INSNS (2);
28094 else
28095 /* Shift */
28096 *total = COSTS_N_INSNS (1);
28097 }
28098 else
28099 {
28100 if (GET_MODE (XEXP (x, 1)) == DImode)
28101 *total = rs6000_cost->divdi;
28102 else
28103 *total = rs6000_cost->divsi;
28104 }
28105 /* Add in shift and subtract for MOD. */
28106 if (code == MOD || code == UMOD)
28107 *total += COSTS_N_INSNS (2);
28108 return false;
28109
28110 case CTZ:
28111 case FFS:
28112 *total = COSTS_N_INSNS (4);
28113 return false;
28114
28115 case POPCOUNT:
28116 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
28117 return false;
28118
28119 case PARITY:
28120 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
28121 return false;
28122
28123 case NOT:
28124 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
28125 {
28126 *total = 0;
28127 return false;
28128 }
28129 /* FALLTHRU */
28130
28131 case AND:
28132 case CLZ:
28133 case IOR:
28134 case XOR:
28135 case ZERO_EXTRACT:
28136 *total = COSTS_N_INSNS (1);
28137 return false;
28138
28139 case ASHIFT:
28140 case ASHIFTRT:
28141 case LSHIFTRT:
28142 case ROTATE:
28143 case ROTATERT:
28144 /* Handle mul_highpart. */
28145 if (outer_code == TRUNCATE
28146 && GET_CODE (XEXP (x, 0)) == MULT)
28147 {
28148 if (mode == DImode)
28149 *total = rs6000_cost->muldi;
28150 else
28151 *total = rs6000_cost->mulsi;
28152 return true;
28153 }
28154 else if (outer_code == AND)
28155 *total = 0;
28156 else
28157 *total = COSTS_N_INSNS (1);
28158 return false;
28159
28160 case SIGN_EXTEND:
28161 case ZERO_EXTEND:
28162 if (GET_CODE (XEXP (x, 0)) == MEM)
28163 *total = 0;
28164 else
28165 *total = COSTS_N_INSNS (1);
28166 return false;
28167
28168 case COMPARE:
28169 case NEG:
28170 case ABS:
28171 if (!FLOAT_MODE_P (mode))
28172 {
28173 *total = COSTS_N_INSNS (1);
28174 return false;
28175 }
28176 /* FALLTHRU */
28177
28178 case FLOAT:
28179 case UNSIGNED_FLOAT:
28180 case FIX:
28181 case UNSIGNED_FIX:
28182 case FLOAT_TRUNCATE:
28183 *total = rs6000_cost->fp;
28184 return false;
28185
28186 case FLOAT_EXTEND:
28187 if (mode == DFmode)
28188 *total = 0;
28189 else
28190 *total = rs6000_cost->fp;
28191 return false;
28192
28193 case UNSPEC:
28194 switch (XINT (x, 1))
28195 {
28196 case UNSPEC_FRSP:
28197 *total = rs6000_cost->fp;
28198 return true;
28199
28200 default:
28201 break;
28202 }
28203 break;
28204
28205 case CALL:
28206 case IF_THEN_ELSE:
28207 if (!speed)
28208 {
28209 *total = COSTS_N_INSNS (1);
28210 return true;
28211 }
28212 else if (FLOAT_MODE_P (mode)
28213 && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS)
28214 {
28215 *total = rs6000_cost->fp;
28216 return false;
28217 }
28218 break;
28219
28220 case EQ:
28221 case GTU:
28222 case LTU:
28223 /* Carry bit requires mode == Pmode.
28224 NEG or PLUS already counted so only add one. */
28225 if (mode == Pmode
28226 && (outer_code == NEG || outer_code == PLUS))
28227 {
28228 *total = COSTS_N_INSNS (1);
28229 return true;
28230 }
28231 if (outer_code == SET)
28232 {
28233 if (XEXP (x, 1) == const0_rtx)
28234 {
28235 if (TARGET_ISEL && !TARGET_MFCRF)
28236 *total = COSTS_N_INSNS (8);
28237 else
28238 *total = COSTS_N_INSNS (2);
28239 return true;
28240 }
28241 else if (mode == Pmode)
28242 {
28243 *total = COSTS_N_INSNS (3);
28244 return false;
28245 }
28246 }
28247 /* FALLTHRU */
28248
28249 case GT:
28250 case LT:
28251 case UNORDERED:
28252 if (outer_code == SET && (XEXP (x, 1) == const0_rtx))
28253 {
28254 if (TARGET_ISEL && !TARGET_MFCRF)
28255 *total = COSTS_N_INSNS (8);
28256 else
28257 *total = COSTS_N_INSNS (2);
28258 return true;
28259 }
28260 /* CC COMPARE. */
28261 if (outer_code == COMPARE)
28262 {
28263 *total = 0;
28264 return true;
28265 }
28266 break;
28267
28268 default:
28269 break;
28270 }
28271
28272 return false;
28273 }
28274
28275 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
28276
28277 static bool
28278 rs6000_debug_rtx_costs (rtx x, int code, int outer_code, int opno, int *total,
28279 bool speed)
28280 {
28281 bool ret = rs6000_rtx_costs (x, code, outer_code, opno, total, speed);
28282
28283 fprintf (stderr,
28284 "\nrs6000_rtx_costs, return = %s, code = %s, outer_code = %s, "
28285 "opno = %d, total = %d, speed = %s, x:\n",
28286 ret ? "complete" : "scan inner",
28287 GET_RTX_NAME (code),
28288 GET_RTX_NAME (outer_code),
28289 opno,
28290 *total,
28291 speed ? "true" : "false");
28292
28293 debug_rtx (x);
28294
28295 return ret;
28296 }
28297
28298 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
28299
28300 static int
28301 rs6000_debug_address_cost (rtx x, enum machine_mode mode,
28302 addr_space_t as, bool speed)
28303 {
28304 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
28305
28306 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
28307 ret, speed ? "true" : "false");
28308 debug_rtx (x);
28309
28310 return ret;
28311 }
28312
28313
28314 /* A C expression returning the cost of moving data from a register of class
28315 CLASS1 to one of CLASS2. */
28316
28317 static int
28318 rs6000_register_move_cost (enum machine_mode mode,
28319 reg_class_t from, reg_class_t to)
28320 {
28321 int ret;
28322
28323 if (TARGET_DEBUG_COST)
28324 dbg_cost_ctrl++;
28325
28326 /* Moves from/to GENERAL_REGS. */
28327 if (reg_classes_intersect_p (to, GENERAL_REGS)
28328 || reg_classes_intersect_p (from, GENERAL_REGS))
28329 {
28330 reg_class_t rclass = from;
28331
28332 if (! reg_classes_intersect_p (to, GENERAL_REGS))
28333 rclass = to;
28334
28335 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
28336 ret = (rs6000_memory_move_cost (mode, rclass, false)
28337 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
28338
28339 /* It's more expensive to move CR_REGS than CR0_REGS because of the
28340 shift. */
28341 else if (rclass == CR_REGS)
28342 ret = 4;
28343
28344 /* For those processors that have slow LR/CTR moves, make them more
28345 expensive than memory in order to bias spills to memory .*/
28346 else if ((rs6000_cpu == PROCESSOR_POWER6
28347 || rs6000_cpu == PROCESSOR_POWER7
28348 || rs6000_cpu == PROCESSOR_POWER8)
28349 && reg_classes_intersect_p (rclass, LINK_OR_CTR_REGS))
28350 ret = 6 * hard_regno_nregs[0][mode];
28351
28352 else
28353 /* A move will cost one instruction per GPR moved. */
28354 ret = 2 * hard_regno_nregs[0][mode];
28355 }
28356
28357 /* If we have VSX, we can easily move between FPR or Altivec registers. */
28358 else if (VECTOR_MEM_VSX_P (mode)
28359 && reg_classes_intersect_p (to, VSX_REGS)
28360 && reg_classes_intersect_p (from, VSX_REGS))
28361 ret = 2 * hard_regno_nregs[32][mode];
28362
28363 /* Moving between two similar registers is just one instruction. */
28364 else if (reg_classes_intersect_p (to, from))
28365 ret = (mode == TFmode || mode == TDmode) ? 4 : 2;
28366
28367 /* Everything else has to go through GENERAL_REGS. */
28368 else
28369 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
28370 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
28371
28372 if (TARGET_DEBUG_COST)
28373 {
28374 if (dbg_cost_ctrl == 1)
28375 fprintf (stderr,
28376 "rs6000_register_move_cost:, ret=%d, mode=%s, from=%s, to=%s\n",
28377 ret, GET_MODE_NAME (mode), reg_class_names[from],
28378 reg_class_names[to]);
28379 dbg_cost_ctrl--;
28380 }
28381
28382 return ret;
28383 }
28384
28385 /* A C expressions returning the cost of moving data of MODE from a register to
28386 or from memory. */
28387
28388 static int
28389 rs6000_memory_move_cost (enum machine_mode mode, reg_class_t rclass,
28390 bool in ATTRIBUTE_UNUSED)
28391 {
28392 int ret;
28393
28394 if (TARGET_DEBUG_COST)
28395 dbg_cost_ctrl++;
28396
28397 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
28398 ret = 4 * hard_regno_nregs[0][mode];
28399 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
28400 || reg_classes_intersect_p (rclass, VSX_REGS)))
28401 ret = 4 * hard_regno_nregs[32][mode];
28402 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
28403 ret = 4 * hard_regno_nregs[FIRST_ALTIVEC_REGNO][mode];
28404 else
28405 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
28406
28407 if (TARGET_DEBUG_COST)
28408 {
28409 if (dbg_cost_ctrl == 1)
28410 fprintf (stderr,
28411 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
28412 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
28413 dbg_cost_ctrl--;
28414 }
28415
28416 return ret;
28417 }
28418
28419 /* Returns a code for a target-specific builtin that implements
28420 reciprocal of the function, or NULL_TREE if not available. */
28421
28422 static tree
28423 rs6000_builtin_reciprocal (unsigned int fn, bool md_fn,
28424 bool sqrt ATTRIBUTE_UNUSED)
28425 {
28426 if (optimize_insn_for_size_p ())
28427 return NULL_TREE;
28428
28429 if (md_fn)
28430 switch (fn)
28431 {
28432 case VSX_BUILTIN_XVSQRTDP:
28433 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
28434 return NULL_TREE;
28435
28436 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
28437
28438 case VSX_BUILTIN_XVSQRTSP:
28439 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
28440 return NULL_TREE;
28441
28442 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF];
28443
28444 default:
28445 return NULL_TREE;
28446 }
28447
28448 else
28449 switch (fn)
28450 {
28451 case BUILT_IN_SQRT:
28452 if (!RS6000_RECIP_AUTO_RSQRTE_P (DFmode))
28453 return NULL_TREE;
28454
28455 return rs6000_builtin_decls[RS6000_BUILTIN_RSQRT];
28456
28457 case BUILT_IN_SQRTF:
28458 if (!RS6000_RECIP_AUTO_RSQRTE_P (SFmode))
28459 return NULL_TREE;
28460
28461 return rs6000_builtin_decls[RS6000_BUILTIN_RSQRTF];
28462
28463 default:
28464 return NULL_TREE;
28465 }
28466 }
28467
28468 /* Load up a constant. If the mode is a vector mode, splat the value across
28469 all of the vector elements. */
28470
28471 static rtx
28472 rs6000_load_constant_and_splat (enum machine_mode mode, REAL_VALUE_TYPE dconst)
28473 {
28474 rtx reg;
28475
28476 if (mode == SFmode || mode == DFmode)
28477 {
28478 rtx d = CONST_DOUBLE_FROM_REAL_VALUE (dconst, mode);
28479 reg = force_reg (mode, d);
28480 }
28481 else if (mode == V4SFmode)
28482 {
28483 rtx d = CONST_DOUBLE_FROM_REAL_VALUE (dconst, SFmode);
28484 rtvec v = gen_rtvec (4, d, d, d, d);
28485 reg = gen_reg_rtx (mode);
28486 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
28487 }
28488 else if (mode == V2DFmode)
28489 {
28490 rtx d = CONST_DOUBLE_FROM_REAL_VALUE (dconst, DFmode);
28491 rtvec v = gen_rtvec (2, d, d);
28492 reg = gen_reg_rtx (mode);
28493 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
28494 }
28495 else
28496 gcc_unreachable ();
28497
28498 return reg;
28499 }
28500
28501 /* Generate an FMA instruction. */
28502
28503 static void
28504 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
28505 {
28506 enum machine_mode mode = GET_MODE (target);
28507 rtx dst;
28508
28509 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
28510 gcc_assert (dst != NULL);
28511
28512 if (dst != target)
28513 emit_move_insn (target, dst);
28514 }
28515
28516 /* Generate a FMSUB instruction: dst = fma(m1, m2, -a). */
28517
28518 static void
28519 rs6000_emit_msub (rtx target, rtx m1, rtx m2, rtx a)
28520 {
28521 enum machine_mode mode = GET_MODE (target);
28522 rtx dst;
28523
28524 /* Altivec does not support fms directly;
28525 generate in terms of fma in that case. */
28526 if (optab_handler (fms_optab, mode) != CODE_FOR_nothing)
28527 dst = expand_ternary_op (mode, fms_optab, m1, m2, a, target, 0);
28528 else
28529 {
28530 a = expand_unop (mode, neg_optab, a, NULL_RTX, 0);
28531 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
28532 }
28533 gcc_assert (dst != NULL);
28534
28535 if (dst != target)
28536 emit_move_insn (target, dst);
28537 }
28538
28539 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
28540
28541 static void
28542 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
28543 {
28544 enum machine_mode mode = GET_MODE (dst);
28545 rtx r;
28546
28547 /* This is a tad more complicated, since the fnma_optab is for
28548 a different expression: fma(-m1, m2, a), which is the same
28549 thing except in the case of signed zeros.
28550
28551 Fortunately we know that if FMA is supported that FNMSUB is
28552 also supported in the ISA. Just expand it directly. */
28553
28554 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
28555
28556 r = gen_rtx_NEG (mode, a);
28557 r = gen_rtx_FMA (mode, m1, m2, r);
28558 r = gen_rtx_NEG (mode, r);
28559 emit_insn (gen_rtx_SET (VOIDmode, dst, r));
28560 }
28561
28562 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
28563 add a reg_note saying that this was a division. Support both scalar and
28564 vector divide. Assumes no trapping math and finite arguments. */
28565
28566 void
28567 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
28568 {
28569 enum machine_mode mode = GET_MODE (dst);
28570 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
28571 int i;
28572
28573 /* Low precision estimates guarantee 5 bits of accuracy. High
28574 precision estimates guarantee 14 bits of accuracy. SFmode
28575 requires 23 bits of accuracy. DFmode requires 52 bits of
28576 accuracy. Each pass at least doubles the accuracy, leading
28577 to the following. */
28578 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
28579 if (mode == DFmode || mode == V2DFmode)
28580 passes++;
28581
28582 enum insn_code code = optab_handler (smul_optab, mode);
28583 insn_gen_fn gen_mul = GEN_FCN (code);
28584
28585 gcc_assert (code != CODE_FOR_nothing);
28586
28587 one = rs6000_load_constant_and_splat (mode, dconst1);
28588
28589 /* x0 = 1./d estimate */
28590 x0 = gen_reg_rtx (mode);
28591 emit_insn (gen_rtx_SET (VOIDmode, x0,
28592 gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
28593 UNSPEC_FRES)));
28594
28595 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
28596 if (passes > 1) {
28597
28598 /* e0 = 1. - d * x0 */
28599 e0 = gen_reg_rtx (mode);
28600 rs6000_emit_nmsub (e0, d, x0, one);
28601
28602 /* x1 = x0 + e0 * x0 */
28603 x1 = gen_reg_rtx (mode);
28604 rs6000_emit_madd (x1, e0, x0, x0);
28605
28606 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
28607 ++i, xprev = xnext, eprev = enext) {
28608
28609 /* enext = eprev * eprev */
28610 enext = gen_reg_rtx (mode);
28611 emit_insn (gen_mul (enext, eprev, eprev));
28612
28613 /* xnext = xprev + enext * xprev */
28614 xnext = gen_reg_rtx (mode);
28615 rs6000_emit_madd (xnext, enext, xprev, xprev);
28616 }
28617
28618 } else
28619 xprev = x0;
28620
28621 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
28622
28623 /* u = n * xprev */
28624 u = gen_reg_rtx (mode);
28625 emit_insn (gen_mul (u, n, xprev));
28626
28627 /* v = n - (d * u) */
28628 v = gen_reg_rtx (mode);
28629 rs6000_emit_nmsub (v, d, u, n);
28630
28631 /* dst = (v * xprev) + u */
28632 rs6000_emit_madd (dst, v, xprev, u);
28633
28634 if (note_p)
28635 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
28636 }
28637
28638 /* Newton-Raphson approximation of single/double-precision floating point
28639 rsqrt. Assumes no trapping math and finite arguments. */
28640
28641 void
28642 rs6000_emit_swrsqrt (rtx dst, rtx src)
28643 {
28644 enum machine_mode mode = GET_MODE (src);
28645 rtx x0 = gen_reg_rtx (mode);
28646 rtx y = gen_reg_rtx (mode);
28647
28648 /* Low precision estimates guarantee 5 bits of accuracy. High
28649 precision estimates guarantee 14 bits of accuracy. SFmode
28650 requires 23 bits of accuracy. DFmode requires 52 bits of
28651 accuracy. Each pass at least doubles the accuracy, leading
28652 to the following. */
28653 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
28654 if (mode == DFmode || mode == V2DFmode)
28655 passes++;
28656
28657 REAL_VALUE_TYPE dconst3_2;
28658 int i;
28659 rtx halfthree;
28660 enum insn_code code = optab_handler (smul_optab, mode);
28661 insn_gen_fn gen_mul = GEN_FCN (code);
28662
28663 gcc_assert (code != CODE_FOR_nothing);
28664
28665 /* Load up the constant 1.5 either as a scalar, or as a vector. */
28666 real_from_integer (&dconst3_2, VOIDmode, 3, 0, 0);
28667 SET_REAL_EXP (&dconst3_2, REAL_EXP (&dconst3_2) - 1);
28668
28669 halfthree = rs6000_load_constant_and_splat (mode, dconst3_2);
28670
28671 /* x0 = rsqrt estimate */
28672 emit_insn (gen_rtx_SET (VOIDmode, x0,
28673 gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
28674 UNSPEC_RSQRT)));
28675
28676 /* y = 0.5 * src = 1.5 * src - src -> fewer constants */
28677 rs6000_emit_msub (y, src, halfthree, src);
28678
28679 for (i = 0; i < passes; i++)
28680 {
28681 rtx x1 = gen_reg_rtx (mode);
28682 rtx u = gen_reg_rtx (mode);
28683 rtx v = gen_reg_rtx (mode);
28684
28685 /* x1 = x0 * (1.5 - y * (x0 * x0)) */
28686 emit_insn (gen_mul (u, x0, x0));
28687 rs6000_emit_nmsub (v, y, u, halfthree);
28688 emit_insn (gen_mul (x1, x0, v));
28689 x0 = x1;
28690 }
28691
28692 emit_move_insn (dst, x0);
28693 return;
28694 }
28695
28696 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
28697 (Power7) targets. DST is the target, and SRC is the argument operand. */
28698
28699 void
28700 rs6000_emit_popcount (rtx dst, rtx src)
28701 {
28702 enum machine_mode mode = GET_MODE (dst);
28703 rtx tmp1, tmp2;
28704
28705 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
28706 if (TARGET_POPCNTD)
28707 {
28708 if (mode == SImode)
28709 emit_insn (gen_popcntdsi2 (dst, src));
28710 else
28711 emit_insn (gen_popcntddi2 (dst, src));
28712 return;
28713 }
28714
28715 tmp1 = gen_reg_rtx (mode);
28716
28717 if (mode == SImode)
28718 {
28719 emit_insn (gen_popcntbsi2 (tmp1, src));
28720 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
28721 NULL_RTX, 0);
28722 tmp2 = force_reg (SImode, tmp2);
28723 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
28724 }
28725 else
28726 {
28727 emit_insn (gen_popcntbdi2 (tmp1, src));
28728 tmp2 = expand_mult (DImode, tmp1,
28729 GEN_INT ((HOST_WIDE_INT)
28730 0x01010101 << 32 | 0x01010101),
28731 NULL_RTX, 0);
28732 tmp2 = force_reg (DImode, tmp2);
28733 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
28734 }
28735 }
28736
28737
28738 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
28739 target, and SRC is the argument operand. */
28740
28741 void
28742 rs6000_emit_parity (rtx dst, rtx src)
28743 {
28744 enum machine_mode mode = GET_MODE (dst);
28745 rtx tmp;
28746
28747 tmp = gen_reg_rtx (mode);
28748
28749 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
28750 if (TARGET_CMPB)
28751 {
28752 if (mode == SImode)
28753 {
28754 emit_insn (gen_popcntbsi2 (tmp, src));
28755 emit_insn (gen_paritysi2_cmpb (dst, tmp));
28756 }
28757 else
28758 {
28759 emit_insn (gen_popcntbdi2 (tmp, src));
28760 emit_insn (gen_paritydi2_cmpb (dst, tmp));
28761 }
28762 return;
28763 }
28764
28765 if (mode == SImode)
28766 {
28767 /* Is mult+shift >= shift+xor+shift+xor? */
28768 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
28769 {
28770 rtx tmp1, tmp2, tmp3, tmp4;
28771
28772 tmp1 = gen_reg_rtx (SImode);
28773 emit_insn (gen_popcntbsi2 (tmp1, src));
28774
28775 tmp2 = gen_reg_rtx (SImode);
28776 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
28777 tmp3 = gen_reg_rtx (SImode);
28778 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
28779
28780 tmp4 = gen_reg_rtx (SImode);
28781 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
28782 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
28783 }
28784 else
28785 rs6000_emit_popcount (tmp, src);
28786 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
28787 }
28788 else
28789 {
28790 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
28791 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
28792 {
28793 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
28794
28795 tmp1 = gen_reg_rtx (DImode);
28796 emit_insn (gen_popcntbdi2 (tmp1, src));
28797
28798 tmp2 = gen_reg_rtx (DImode);
28799 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
28800 tmp3 = gen_reg_rtx (DImode);
28801 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
28802
28803 tmp4 = gen_reg_rtx (DImode);
28804 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
28805 tmp5 = gen_reg_rtx (DImode);
28806 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
28807
28808 tmp6 = gen_reg_rtx (DImode);
28809 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
28810 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
28811 }
28812 else
28813 rs6000_emit_popcount (tmp, src);
28814 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
28815 }
28816 }
28817
28818 /* Expand an Altivec constant permutation for little endian mode.
28819 There are two issues: First, the two input operands must be
28820 swapped so that together they form a double-wide array in LE
28821 order. Second, the vperm instruction has surprising behavior
28822 in LE mode: it interprets the elements of the source vectors
28823 in BE mode ("left to right") and interprets the elements of
28824 the destination vector in LE mode ("right to left"). To
28825 correct for this, we must subtract each element of the permute
28826 control vector from 31.
28827
28828 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
28829 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
28830 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
28831 serve as the permute control vector. Then, in BE mode,
28832
28833 vperm 9,10,11,12
28834
28835 places the desired result in vr9. However, in LE mode the
28836 vector contents will be
28837
28838 vr10 = 00000003 00000002 00000001 00000000
28839 vr11 = 00000007 00000006 00000005 00000004
28840
28841 The result of the vperm using the same permute control vector is
28842
28843 vr9 = 05000000 07000000 01000000 03000000
28844
28845 That is, the leftmost 4 bytes of vr10 are interpreted as the
28846 source for the rightmost 4 bytes of vr9, and so on.
28847
28848 If we change the permute control vector to
28849
28850 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
28851
28852 and issue
28853
28854 vperm 9,11,10,12
28855
28856 we get the desired
28857
28858 vr9 = 00000006 00000004 00000002 00000000. */
28859
28860 void
28861 altivec_expand_vec_perm_const_le (rtx operands[4])
28862 {
28863 unsigned int i;
28864 rtx perm[16];
28865 rtx constv, unspec;
28866 rtx target = operands[0];
28867 rtx op0 = operands[1];
28868 rtx op1 = operands[2];
28869 rtx sel = operands[3];
28870
28871 /* Unpack and adjust the constant selector. */
28872 for (i = 0; i < 16; ++i)
28873 {
28874 rtx e = XVECEXP (sel, 0, i);
28875 unsigned int elt = 31 - (INTVAL (e) & 31);
28876 perm[i] = GEN_INT (elt);
28877 }
28878
28879 /* Expand to a permute, swapping the inputs and using the
28880 adjusted selector. */
28881 if (!REG_P (op0))
28882 op0 = force_reg (V16QImode, op0);
28883 if (!REG_P (op1))
28884 op1 = force_reg (V16QImode, op1);
28885
28886 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
28887 constv = force_reg (V16QImode, constv);
28888 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
28889 UNSPEC_VPERM);
28890 if (!REG_P (target))
28891 {
28892 rtx tmp = gen_reg_rtx (V16QImode);
28893 emit_move_insn (tmp, unspec);
28894 unspec = tmp;
28895 }
28896
28897 emit_move_insn (target, unspec);
28898 }
28899
28900 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
28901 permute control vector. But here it's not a constant, so we must
28902 generate a vector splat/subtract to do the adjustment. */
28903
28904 void
28905 altivec_expand_vec_perm_le (rtx operands[4])
28906 {
28907 rtx splat, unspec;
28908 rtx target = operands[0];
28909 rtx op0 = operands[1];
28910 rtx op1 = operands[2];
28911 rtx sel = operands[3];
28912 rtx tmp = target;
28913
28914 /* Get everything in regs so the pattern matches. */
28915 if (!REG_P (op0))
28916 op0 = force_reg (V16QImode, op0);
28917 if (!REG_P (op1))
28918 op1 = force_reg (V16QImode, op1);
28919 if (!REG_P (sel))
28920 sel = force_reg (V16QImode, sel);
28921 if (!REG_P (target))
28922 tmp = gen_reg_rtx (V16QImode);
28923
28924 /* SEL = splat(31) - SEL. */
28925 /* We want to subtract from 31, but we can't vspltisb 31 since
28926 it's out of range. -1 works as well because only the low-order
28927 five bits of the permute control vector elements are used. */
28928 splat = gen_rtx_VEC_DUPLICATE (V16QImode,
28929 gen_rtx_CONST_INT (QImode, -1));
28930 emit_move_insn (tmp, splat);
28931 sel = gen_rtx_MINUS (V16QImode, tmp, sel);
28932 emit_move_insn (tmp, sel);
28933
28934 /* Permute with operands reversed and adjusted selector. */
28935 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, tmp),
28936 UNSPEC_VPERM);
28937
28938 /* Copy into target, possibly by way of a register. */
28939 if (!REG_P (target))
28940 {
28941 emit_move_insn (tmp, unspec);
28942 unspec = tmp;
28943 }
28944
28945 emit_move_insn (target, unspec);
28946 }
28947
28948 /* Expand an Altivec constant permutation. Return true if we match
28949 an efficient implementation; false to fall back to VPERM. */
28950
28951 bool
28952 altivec_expand_vec_perm_const (rtx operands[4])
28953 {
28954 struct altivec_perm_insn {
28955 HOST_WIDE_INT mask;
28956 enum insn_code impl;
28957 unsigned char perm[16];
28958 };
28959 static const struct altivec_perm_insn patterns[] = {
28960 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum,
28961 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
28962 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum,
28963 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
28964 { OPTION_MASK_ALTIVEC,
28965 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb : CODE_FOR_altivec_vmrglb,
28966 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
28967 { OPTION_MASK_ALTIVEC,
28968 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh : CODE_FOR_altivec_vmrglh,
28969 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
28970 { OPTION_MASK_ALTIVEC,
28971 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw : CODE_FOR_altivec_vmrglw,
28972 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
28973 { OPTION_MASK_ALTIVEC,
28974 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb : CODE_FOR_altivec_vmrghb,
28975 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
28976 { OPTION_MASK_ALTIVEC,
28977 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh : CODE_FOR_altivec_vmrghh,
28978 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
28979 { OPTION_MASK_ALTIVEC,
28980 BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw : CODE_FOR_altivec_vmrghw,
28981 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
28982 { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgew,
28983 { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
28984 { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgow,
28985 { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
28986 };
28987
28988 unsigned int i, j, elt, which;
28989 unsigned char perm[16];
28990 rtx target, op0, op1, sel, x;
28991 bool one_vec;
28992
28993 target = operands[0];
28994 op0 = operands[1];
28995 op1 = operands[2];
28996 sel = operands[3];
28997
28998 /* Unpack the constant selector. */
28999 for (i = which = 0; i < 16; ++i)
29000 {
29001 rtx e = XVECEXP (sel, 0, i);
29002 elt = INTVAL (e) & 31;
29003 which |= (elt < 16 ? 1 : 2);
29004 perm[i] = elt;
29005 }
29006
29007 /* Simplify the constant selector based on operands. */
29008 switch (which)
29009 {
29010 default:
29011 gcc_unreachable ();
29012
29013 case 3:
29014 one_vec = false;
29015 if (!rtx_equal_p (op0, op1))
29016 break;
29017 /* FALLTHRU */
29018
29019 case 2:
29020 for (i = 0; i < 16; ++i)
29021 perm[i] &= 15;
29022 op0 = op1;
29023 one_vec = true;
29024 break;
29025
29026 case 1:
29027 op1 = op0;
29028 one_vec = true;
29029 break;
29030 }
29031
29032 /* Look for splat patterns. */
29033 if (one_vec)
29034 {
29035 elt = perm[0];
29036
29037 for (i = 0; i < 16; ++i)
29038 if (perm[i] != elt)
29039 break;
29040 if (i == 16)
29041 {
29042 emit_insn (gen_altivec_vspltb (target, op0, GEN_INT (elt)));
29043 return true;
29044 }
29045
29046 if (elt % 2 == 0)
29047 {
29048 for (i = 0; i < 16; i += 2)
29049 if (perm[i] != elt || perm[i + 1] != elt + 1)
29050 break;
29051 if (i == 16)
29052 {
29053 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
29054 x = gen_reg_rtx (V8HImode);
29055 emit_insn (gen_altivec_vsplth (x, gen_lowpart (V8HImode, op0),
29056 GEN_INT (field)));
29057 emit_move_insn (target, gen_lowpart (V16QImode, x));
29058 return true;
29059 }
29060 }
29061
29062 if (elt % 4 == 0)
29063 {
29064 for (i = 0; i < 16; i += 4)
29065 if (perm[i] != elt
29066 || perm[i + 1] != elt + 1
29067 || perm[i + 2] != elt + 2
29068 || perm[i + 3] != elt + 3)
29069 break;
29070 if (i == 16)
29071 {
29072 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
29073 x = gen_reg_rtx (V4SImode);
29074 emit_insn (gen_altivec_vspltw (x, gen_lowpart (V4SImode, op0),
29075 GEN_INT (field)));
29076 emit_move_insn (target, gen_lowpart (V16QImode, x));
29077 return true;
29078 }
29079 }
29080 }
29081
29082 /* Look for merge and pack patterns. */
29083 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
29084 {
29085 bool swapped;
29086
29087 if ((patterns[j].mask & rs6000_isa_flags) == 0)
29088 continue;
29089
29090 elt = patterns[j].perm[0];
29091 if (perm[0] == elt)
29092 swapped = false;
29093 else if (perm[0] == elt + 16)
29094 swapped = true;
29095 else
29096 continue;
29097 for (i = 1; i < 16; ++i)
29098 {
29099 elt = patterns[j].perm[i];
29100 if (swapped)
29101 elt = (elt >= 16 ? elt - 16 : elt + 16);
29102 else if (one_vec && elt >= 16)
29103 elt -= 16;
29104 if (perm[i] != elt)
29105 break;
29106 }
29107 if (i == 16)
29108 {
29109 enum insn_code icode = patterns[j].impl;
29110 enum machine_mode omode = insn_data[icode].operand[0].mode;
29111 enum machine_mode imode = insn_data[icode].operand[1].mode;
29112
29113 /* For little-endian, don't use vpkuwum and vpkuhum if the
29114 underlying vector type is not V4SI and V8HI, respectively.
29115 For example, using vpkuwum with a V8HI picks up the even
29116 halfwords (BE numbering) when the even halfwords (LE
29117 numbering) are what we need. */
29118 if (!BYTES_BIG_ENDIAN
29119 && icode == CODE_FOR_altivec_vpkuwum
29120 && ((GET_CODE (op0) == REG
29121 && GET_MODE (op0) != V4SImode)
29122 || (GET_CODE (op0) == SUBREG
29123 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
29124 continue;
29125 if (!BYTES_BIG_ENDIAN
29126 && icode == CODE_FOR_altivec_vpkuhum
29127 && ((GET_CODE (op0) == REG
29128 && GET_MODE (op0) != V8HImode)
29129 || (GET_CODE (op0) == SUBREG
29130 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
29131 continue;
29132
29133 /* For little-endian, the two input operands must be swapped
29134 (or swapped back) to ensure proper right-to-left numbering
29135 from 0 to 2N-1. */
29136 if (swapped ^ !BYTES_BIG_ENDIAN)
29137 x = op0, op0 = op1, op1 = x;
29138 if (imode != V16QImode)
29139 {
29140 op0 = gen_lowpart (imode, op0);
29141 op1 = gen_lowpart (imode, op1);
29142 }
29143 if (omode == V16QImode)
29144 x = target;
29145 else
29146 x = gen_reg_rtx (omode);
29147 emit_insn (GEN_FCN (icode) (x, op0, op1));
29148 if (omode != V16QImode)
29149 emit_move_insn (target, gen_lowpart (V16QImode, x));
29150 return true;
29151 }
29152 }
29153
29154 if (!BYTES_BIG_ENDIAN)
29155 {
29156 altivec_expand_vec_perm_const_le (operands);
29157 return true;
29158 }
29159
29160 return false;
29161 }
29162
29163 /* Expand a Paired Single, VSX Permute Doubleword, or SPE constant permutation.
29164 Return true if we match an efficient implementation. */
29165
29166 static bool
29167 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
29168 unsigned char perm0, unsigned char perm1)
29169 {
29170 rtx x;
29171
29172 /* If both selectors come from the same operand, fold to single op. */
29173 if ((perm0 & 2) == (perm1 & 2))
29174 {
29175 if (perm0 & 2)
29176 op0 = op1;
29177 else
29178 op1 = op0;
29179 }
29180 /* If both operands are equal, fold to simpler permutation. */
29181 if (rtx_equal_p (op0, op1))
29182 {
29183 perm0 = perm0 & 1;
29184 perm1 = (perm1 & 1) + 2;
29185 }
29186 /* If the first selector comes from the second operand, swap. */
29187 else if (perm0 & 2)
29188 {
29189 if (perm1 & 2)
29190 return false;
29191 perm0 -= 2;
29192 perm1 += 2;
29193 x = op0, op0 = op1, op1 = x;
29194 }
29195 /* If the second selector does not come from the second operand, fail. */
29196 else if ((perm1 & 2) == 0)
29197 return false;
29198
29199 /* Success! */
29200 if (target != NULL)
29201 {
29202 enum machine_mode vmode, dmode;
29203 rtvec v;
29204
29205 vmode = GET_MODE (target);
29206 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
29207 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4);
29208
29209 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
29210 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
29211 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
29212 emit_insn (gen_rtx_SET (VOIDmode, target, x));
29213 }
29214 return true;
29215 }
29216
29217 bool
29218 rs6000_expand_vec_perm_const (rtx operands[4])
29219 {
29220 rtx target, op0, op1, sel;
29221 unsigned char perm0, perm1;
29222
29223 target = operands[0];
29224 op0 = operands[1];
29225 op1 = operands[2];
29226 sel = operands[3];
29227
29228 /* Unpack the constant selector. */
29229 perm0 = INTVAL (XVECEXP (sel, 0, 0)) & 3;
29230 perm1 = INTVAL (XVECEXP (sel, 0, 1)) & 3;
29231
29232 return rs6000_expand_vec_perm_const_1 (target, op0, op1, perm0, perm1);
29233 }
29234
29235 /* Test whether a constant permutation is supported. */
29236
29237 static bool
29238 rs6000_vectorize_vec_perm_const_ok (enum machine_mode vmode,
29239 const unsigned char *sel)
29240 {
29241 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
29242 if (TARGET_ALTIVEC)
29243 return true;
29244
29245 /* Check for ps_merge* or evmerge* insns. */
29246 if ((TARGET_PAIRED_FLOAT && vmode == V2SFmode)
29247 || (TARGET_SPE && vmode == V2SImode))
29248 {
29249 rtx op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
29250 rtx op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
29251 return rs6000_expand_vec_perm_const_1 (NULL, op0, op1, sel[0], sel[1]);
29252 }
29253
29254 return false;
29255 }
29256
29257 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave. */
29258
29259 static void
29260 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
29261 enum machine_mode vmode, unsigned nelt, rtx perm[])
29262 {
29263 enum machine_mode imode;
29264 rtx x;
29265
29266 imode = vmode;
29267 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT)
29268 {
29269 imode = GET_MODE_INNER (vmode);
29270 imode = mode_for_size (GET_MODE_BITSIZE (imode), MODE_INT, 0);
29271 imode = mode_for_vector (imode, nelt);
29272 }
29273
29274 x = gen_rtx_CONST_VECTOR (imode, gen_rtvec_v (nelt, perm));
29275 x = expand_vec_perm (vmode, op0, op1, x, target);
29276 if (x != target)
29277 emit_move_insn (target, x);
29278 }
29279
29280 /* Expand an extract even operation. */
29281
29282 void
29283 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
29284 {
29285 enum machine_mode vmode = GET_MODE (target);
29286 unsigned i, nelt = GET_MODE_NUNITS (vmode);
29287 rtx perm[16];
29288
29289 for (i = 0; i < nelt; i++)
29290 perm[i] = GEN_INT (i * 2);
29291
29292 rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
29293 }
29294
29295 /* Expand a vector interleave operation. */
29296
29297 void
29298 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
29299 {
29300 enum machine_mode vmode = GET_MODE (target);
29301 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
29302 rtx perm[16];
29303
29304 high = (highp == BYTES_BIG_ENDIAN ? 0 : nelt / 2);
29305 for (i = 0; i < nelt / 2; i++)
29306 {
29307 perm[i * 2] = GEN_INT (i + high);
29308 perm[i * 2 + 1] = GEN_INT (i + nelt + high);
29309 }
29310
29311 rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
29312 }
29313
29314 /* Return an RTX representing where to find the function value of a
29315 function returning MODE. */
29316 static rtx
29317 rs6000_complex_function_value (enum machine_mode mode)
29318 {
29319 unsigned int regno;
29320 rtx r1, r2;
29321 enum machine_mode inner = GET_MODE_INNER (mode);
29322 unsigned int inner_bytes = GET_MODE_SIZE (inner);
29323
29324 if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
29325 regno = FP_ARG_RETURN;
29326 else
29327 {
29328 regno = GP_ARG_RETURN;
29329
29330 /* 32-bit is OK since it'll go in r3/r4. */
29331 if (TARGET_32BIT && inner_bytes >= 4)
29332 return gen_rtx_REG (mode, regno);
29333 }
29334
29335 if (inner_bytes >= 8)
29336 return gen_rtx_REG (mode, regno);
29337
29338 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
29339 const0_rtx);
29340 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
29341 GEN_INT (inner_bytes));
29342 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
29343 }
29344
29345 /* Target hook for TARGET_FUNCTION_VALUE.
29346
29347 On the SPE, both FPs and vectors are returned in r3.
29348
29349 On RS/6000 an integer value is in r3 and a floating-point value is in
29350 fp1, unless -msoft-float. */
29351
29352 static rtx
29353 rs6000_function_value (const_tree valtype,
29354 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
29355 bool outgoing ATTRIBUTE_UNUSED)
29356 {
29357 enum machine_mode mode;
29358 unsigned int regno;
29359
29360 /* Special handling for structs in darwin64. */
29361 if (TARGET_MACHO
29362 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
29363 {
29364 CUMULATIVE_ARGS valcum;
29365 rtx valret;
29366
29367 valcum.words = 0;
29368 valcum.fregno = FP_ARG_MIN_REG;
29369 valcum.vregno = ALTIVEC_ARG_MIN_REG;
29370 /* Do a trial code generation as if this were going to be passed as
29371 an argument; if any part goes in memory, we return NULL. */
29372 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
29373 if (valret)
29374 return valret;
29375 /* Otherwise fall through to standard ABI rules. */
29376 }
29377
29378 if (TARGET_32BIT && TARGET_POWERPC64 && TYPE_MODE (valtype) == DImode)
29379 {
29380 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
29381 return gen_rtx_PARALLEL (DImode,
29382 gen_rtvec (2,
29383 gen_rtx_EXPR_LIST (VOIDmode,
29384 gen_rtx_REG (SImode, GP_ARG_RETURN),
29385 const0_rtx),
29386 gen_rtx_EXPR_LIST (VOIDmode,
29387 gen_rtx_REG (SImode,
29388 GP_ARG_RETURN + 1),
29389 GEN_INT (4))));
29390 }
29391 if (TARGET_32BIT && TARGET_POWERPC64 && TYPE_MODE (valtype) == DCmode)
29392 {
29393 return gen_rtx_PARALLEL (DCmode,
29394 gen_rtvec (4,
29395 gen_rtx_EXPR_LIST (VOIDmode,
29396 gen_rtx_REG (SImode, GP_ARG_RETURN),
29397 const0_rtx),
29398 gen_rtx_EXPR_LIST (VOIDmode,
29399 gen_rtx_REG (SImode,
29400 GP_ARG_RETURN + 1),
29401 GEN_INT (4)),
29402 gen_rtx_EXPR_LIST (VOIDmode,
29403 gen_rtx_REG (SImode,
29404 GP_ARG_RETURN + 2),
29405 GEN_INT (8)),
29406 gen_rtx_EXPR_LIST (VOIDmode,
29407 gen_rtx_REG (SImode,
29408 GP_ARG_RETURN + 3),
29409 GEN_INT (12))));
29410 }
29411
29412 mode = TYPE_MODE (valtype);
29413 if ((INTEGRAL_TYPE_P (valtype) && GET_MODE_BITSIZE (mode) < BITS_PER_WORD)
29414 || POINTER_TYPE_P (valtype))
29415 mode = TARGET_32BIT ? SImode : DImode;
29416
29417 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
29418 /* _Decimal128 must use an even/odd register pair. */
29419 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
29420 else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT && TARGET_FPRS
29421 && ((TARGET_SINGLE_FLOAT && (mode == SFmode)) || TARGET_DOUBLE_FLOAT))
29422 regno = FP_ARG_RETURN;
29423 else if (TREE_CODE (valtype) == COMPLEX_TYPE
29424 && targetm.calls.split_complex_arg)
29425 return rs6000_complex_function_value (mode);
29426 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
29427 return register is used in both cases, and we won't see V2DImode/V2DFmode
29428 for pure altivec, combine the two cases. */
29429 else if (TREE_CODE (valtype) == VECTOR_TYPE
29430 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
29431 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
29432 regno = ALTIVEC_ARG_RETURN;
29433 else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
29434 && (mode == DFmode || mode == DCmode
29435 || mode == TFmode || mode == TCmode))
29436 return spe_build_register_parallel (mode, GP_ARG_RETURN);
29437 else
29438 regno = GP_ARG_RETURN;
29439
29440 return gen_rtx_REG (mode, regno);
29441 }
29442
29443 /* Define how to find the value returned by a library function
29444 assuming the value has mode MODE. */
29445 rtx
29446 rs6000_libcall_value (enum machine_mode mode)
29447 {
29448 unsigned int regno;
29449
29450 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
29451 {
29452 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
29453 return gen_rtx_PARALLEL (DImode,
29454 gen_rtvec (2,
29455 gen_rtx_EXPR_LIST (VOIDmode,
29456 gen_rtx_REG (SImode, GP_ARG_RETURN),
29457 const0_rtx),
29458 gen_rtx_EXPR_LIST (VOIDmode,
29459 gen_rtx_REG (SImode,
29460 GP_ARG_RETURN + 1),
29461 GEN_INT (4))));
29462 }
29463
29464 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
29465 /* _Decimal128 must use an even/odd register pair. */
29466 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
29467 else if (SCALAR_FLOAT_MODE_P (mode)
29468 && TARGET_HARD_FLOAT && TARGET_FPRS
29469 && ((TARGET_SINGLE_FLOAT && mode == SFmode) || TARGET_DOUBLE_FLOAT))
29470 regno = FP_ARG_RETURN;
29471 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
29472 return register is used in both cases, and we won't see V2DImode/V2DFmode
29473 for pure altivec, combine the two cases. */
29474 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
29475 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
29476 regno = ALTIVEC_ARG_RETURN;
29477 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
29478 return rs6000_complex_function_value (mode);
29479 else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
29480 && (mode == DFmode || mode == DCmode
29481 || mode == TFmode || mode == TCmode))
29482 return spe_build_register_parallel (mode, GP_ARG_RETURN);
29483 else
29484 regno = GP_ARG_RETURN;
29485
29486 return gen_rtx_REG (mode, regno);
29487 }
29488
29489
29490 /* Return true if we use LRA instead of reload pass. */
29491 static bool
29492 rs6000_lra_p (void)
29493 {
29494 return rs6000_lra_flag;
29495 }
29496
29497 /* Given FROM and TO register numbers, say whether this elimination is allowed.
29498 Frame pointer elimination is automatically handled.
29499
29500 For the RS/6000, if frame pointer elimination is being done, we would like
29501 to convert ap into fp, not sp.
29502
29503 We need r30 if -mminimal-toc was specified, and there are constant pool
29504 references. */
29505
29506 static bool
29507 rs6000_can_eliminate (const int from, const int to)
29508 {
29509 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
29510 ? ! frame_pointer_needed
29511 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
29512 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC || get_pool_size () == 0
29513 : true);
29514 }
29515
29516 /* Define the offset between two registers, FROM to be eliminated and its
29517 replacement TO, at the start of a routine. */
29518 HOST_WIDE_INT
29519 rs6000_initial_elimination_offset (int from, int to)
29520 {
29521 rs6000_stack_t *info = rs6000_stack_info ();
29522 HOST_WIDE_INT offset;
29523
29524 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
29525 offset = info->push_p ? 0 : -info->total_size;
29526 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
29527 {
29528 offset = info->push_p ? 0 : -info->total_size;
29529 if (FRAME_GROWS_DOWNWARD)
29530 offset += info->fixed_size + info->vars_size + info->parm_size;
29531 }
29532 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
29533 offset = FRAME_GROWS_DOWNWARD
29534 ? info->fixed_size + info->vars_size + info->parm_size
29535 : 0;
29536 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
29537 offset = info->total_size;
29538 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
29539 offset = info->push_p ? info->total_size : 0;
29540 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
29541 offset = 0;
29542 else
29543 gcc_unreachable ();
29544
29545 return offset;
29546 }
29547
29548 static rtx
29549 rs6000_dwarf_register_span (rtx reg)
29550 {
29551 rtx parts[8];
29552 int i, words;
29553 unsigned regno = REGNO (reg);
29554 enum machine_mode mode = GET_MODE (reg);
29555
29556 if (TARGET_SPE
29557 && regno < 32
29558 && (SPE_VECTOR_MODE (GET_MODE (reg))
29559 || (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode)
29560 && mode != SFmode && mode != SDmode && mode != SCmode)))
29561 ;
29562 else
29563 return NULL_RTX;
29564
29565 regno = REGNO (reg);
29566
29567 /* The duality of the SPE register size wreaks all kinds of havoc.
29568 This is a way of distinguishing r0 in 32-bits from r0 in
29569 64-bits. */
29570 words = (GET_MODE_SIZE (mode) + UNITS_PER_FP_WORD - 1) / UNITS_PER_FP_WORD;
29571 gcc_assert (words <= 4);
29572 for (i = 0; i < words; i++, regno++)
29573 {
29574 if (BYTES_BIG_ENDIAN)
29575 {
29576 parts[2 * i] = gen_rtx_REG (SImode, regno + 1200);
29577 parts[2 * i + 1] = gen_rtx_REG (SImode, regno);
29578 }
29579 else
29580 {
29581 parts[2 * i] = gen_rtx_REG (SImode, regno);
29582 parts[2 * i + 1] = gen_rtx_REG (SImode, regno + 1200);
29583 }
29584 }
29585
29586 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (words * 2, parts));
29587 }
29588
29589 /* Fill in sizes for SPE register high parts in table used by unwinder. */
29590
29591 static void
29592 rs6000_init_dwarf_reg_sizes_extra (tree address)
29593 {
29594 if (TARGET_SPE)
29595 {
29596 int i;
29597 enum machine_mode mode = TYPE_MODE (char_type_node);
29598 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
29599 rtx mem = gen_rtx_MEM (BLKmode, addr);
29600 rtx value = gen_int_mode (4, mode);
29601
29602 for (i = 1201; i < 1232; i++)
29603 {
29604 int column = DWARF_REG_TO_UNWIND_COLUMN (i);
29605 HOST_WIDE_INT offset
29606 = DWARF_FRAME_REGNUM (column) * GET_MODE_SIZE (mode);
29607
29608 emit_move_insn (adjust_address (mem, mode, offset), value);
29609 }
29610 }
29611
29612 if (TARGET_MACHO && ! TARGET_ALTIVEC)
29613 {
29614 int i;
29615 enum machine_mode mode = TYPE_MODE (char_type_node);
29616 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
29617 rtx mem = gen_rtx_MEM (BLKmode, addr);
29618 rtx value = gen_int_mode (16, mode);
29619
29620 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
29621 The unwinder still needs to know the size of Altivec registers. */
29622
29623 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
29624 {
29625 int column = DWARF_REG_TO_UNWIND_COLUMN (i);
29626 HOST_WIDE_INT offset
29627 = DWARF_FRAME_REGNUM (column) * GET_MODE_SIZE (mode);
29628
29629 emit_move_insn (adjust_address (mem, mode, offset), value);
29630 }
29631 }
29632 }
29633
29634 /* Map internal gcc register numbers to DWARF2 register numbers. */
29635
29636 unsigned int
29637 rs6000_dbx_register_number (unsigned int regno)
29638 {
29639 if (regno <= 63 || write_symbols != DWARF2_DEBUG)
29640 return regno;
29641 if (regno == LR_REGNO)
29642 return 108;
29643 if (regno == CTR_REGNO)
29644 return 109;
29645 if (CR_REGNO_P (regno))
29646 return regno - CR0_REGNO + 86;
29647 if (regno == CA_REGNO)
29648 return 101; /* XER */
29649 if (ALTIVEC_REGNO_P (regno))
29650 return regno - FIRST_ALTIVEC_REGNO + 1124;
29651 if (regno == VRSAVE_REGNO)
29652 return 356;
29653 if (regno == VSCR_REGNO)
29654 return 67;
29655 if (regno == SPE_ACC_REGNO)
29656 return 99;
29657 if (regno == SPEFSCR_REGNO)
29658 return 612;
29659 /* SPE high reg number. We get these values of regno from
29660 rs6000_dwarf_register_span. */
29661 gcc_assert (regno >= 1200 && regno < 1232);
29662 return regno;
29663 }
29664
29665 /* target hook eh_return_filter_mode */
29666 static enum machine_mode
29667 rs6000_eh_return_filter_mode (void)
29668 {
29669 return TARGET_32BIT ? SImode : word_mode;
29670 }
29671
29672 /* Target hook for scalar_mode_supported_p. */
29673 static bool
29674 rs6000_scalar_mode_supported_p (enum machine_mode mode)
29675 {
29676 if (DECIMAL_FLOAT_MODE_P (mode))
29677 return default_decimal_float_supported_p ();
29678 else
29679 return default_scalar_mode_supported_p (mode);
29680 }
29681
29682 /* Target hook for vector_mode_supported_p. */
29683 static bool
29684 rs6000_vector_mode_supported_p (enum machine_mode mode)
29685 {
29686
29687 if (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (mode))
29688 return true;
29689
29690 if (TARGET_SPE && SPE_VECTOR_MODE (mode))
29691 return true;
29692
29693 else if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
29694 return true;
29695
29696 else
29697 return false;
29698 }
29699
29700 /* Target hook for invalid_arg_for_unprototyped_fn. */
29701 static const char *
29702 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
29703 {
29704 return (!rs6000_darwin64_abi
29705 && typelist == 0
29706 && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
29707 && (funcdecl == NULL_TREE
29708 || (TREE_CODE (funcdecl) == FUNCTION_DECL
29709 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
29710 ? N_("AltiVec argument passed to unprototyped function")
29711 : NULL;
29712 }
29713
29714 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
29715 setup by using __stack_chk_fail_local hidden function instead of
29716 calling __stack_chk_fail directly. Otherwise it is better to call
29717 __stack_chk_fail directly. */
29718
29719 static tree ATTRIBUTE_UNUSED
29720 rs6000_stack_protect_fail (void)
29721 {
29722 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
29723 ? default_hidden_stack_protect_fail ()
29724 : default_external_stack_protect_fail ();
29725 }
29726
29727 void
29728 rs6000_final_prescan_insn (rtx insn, rtx *operand ATTRIBUTE_UNUSED,
29729 int num_operands ATTRIBUTE_UNUSED)
29730 {
29731 if (rs6000_warn_cell_microcode)
29732 {
29733 const char *temp;
29734 int insn_code_number = recog_memoized (insn);
29735 location_t location = INSN_LOCATION (insn);
29736
29737 /* Punt on insns we cannot recognize. */
29738 if (insn_code_number < 0)
29739 return;
29740
29741 temp = get_insn_template (insn_code_number, insn);
29742
29743 if (get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS)
29744 warning_at (location, OPT_mwarn_cell_microcode,
29745 "emitting microcode insn %s\t[%s] #%d",
29746 temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn));
29747 else if (get_attr_cell_micro (insn) == CELL_MICRO_CONDITIONAL)
29748 warning_at (location, OPT_mwarn_cell_microcode,
29749 "emitting conditional microcode insn %s\t[%s] #%d",
29750 temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn));
29751 }
29752 }
29753
29754 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
29755
29756 #if TARGET_ELF
29757 static unsigned HOST_WIDE_INT
29758 rs6000_asan_shadow_offset (void)
29759 {
29760 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
29761 }
29762 #endif
29763 \f
29764 /* Mask options that we want to support inside of attribute((target)) and
29765 #pragma GCC target operations. Note, we do not include things like
29766 64/32-bit, endianess, hard/soft floating point, etc. that would have
29767 different calling sequences. */
29768
29769 struct rs6000_opt_mask {
29770 const char *name; /* option name */
29771 HOST_WIDE_INT mask; /* mask to set */
29772 bool invert; /* invert sense of mask */
29773 bool valid_target; /* option is a target option */
29774 };
29775
29776 static struct rs6000_opt_mask const rs6000_opt_masks[] =
29777 {
29778 { "altivec", OPTION_MASK_ALTIVEC, false, true },
29779 { "cmpb", OPTION_MASK_CMPB, false, true },
29780 { "crypto", OPTION_MASK_CRYPTO, false, true },
29781 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
29782 { "dlmzb", OPTION_MASK_DLMZB, false, true },
29783 { "fprnd", OPTION_MASK_FPRND, false, true },
29784 { "hard-dfp", OPTION_MASK_DFP, false, true },
29785 { "htm", OPTION_MASK_HTM, false, true },
29786 { "isel", OPTION_MASK_ISEL, false, true },
29787 { "mfcrf", OPTION_MASK_MFCRF, false, true },
29788 { "mfpgpr", OPTION_MASK_MFPGPR, false, true },
29789 { "mulhw", OPTION_MASK_MULHW, false, true },
29790 { "multiple", OPTION_MASK_MULTIPLE, false, true },
29791 { "popcntb", OPTION_MASK_POPCNTB, false, true },
29792 { "popcntd", OPTION_MASK_POPCNTD, false, true },
29793 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
29794 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
29795 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
29796 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
29797 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
29798 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
29799 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
29800 { "string", OPTION_MASK_STRING, false, true },
29801 { "update", OPTION_MASK_NO_UPDATE, true , true },
29802 { "upper-regs-df", OPTION_MASK_UPPER_REGS_DF, false, false },
29803 { "upper-regs-sf", OPTION_MASK_UPPER_REGS_SF, false, false },
29804 { "vsx", OPTION_MASK_VSX, false, true },
29805 { "vsx-timode", OPTION_MASK_VSX_TIMODE, false, true },
29806 #ifdef OPTION_MASK_64BIT
29807 #if TARGET_AIX_OS
29808 { "aix64", OPTION_MASK_64BIT, false, false },
29809 { "aix32", OPTION_MASK_64BIT, true, false },
29810 #else
29811 { "64", OPTION_MASK_64BIT, false, false },
29812 { "32", OPTION_MASK_64BIT, true, false },
29813 #endif
29814 #endif
29815 #ifdef OPTION_MASK_EABI
29816 { "eabi", OPTION_MASK_EABI, false, false },
29817 #endif
29818 #ifdef OPTION_MASK_LITTLE_ENDIAN
29819 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
29820 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
29821 #endif
29822 #ifdef OPTION_MASK_RELOCATABLE
29823 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
29824 #endif
29825 #ifdef OPTION_MASK_STRICT_ALIGN
29826 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
29827 #endif
29828 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
29829 { "string", OPTION_MASK_STRING, false, false },
29830 };
29831
29832 /* Builtin mask mapping for printing the flags. */
29833 static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
29834 {
29835 { "altivec", RS6000_BTM_ALTIVEC, false, false },
29836 { "vsx", RS6000_BTM_VSX, false, false },
29837 { "spe", RS6000_BTM_SPE, false, false },
29838 { "paired", RS6000_BTM_PAIRED, false, false },
29839 { "fre", RS6000_BTM_FRE, false, false },
29840 { "fres", RS6000_BTM_FRES, false, false },
29841 { "frsqrte", RS6000_BTM_FRSQRTE, false, false },
29842 { "frsqrtes", RS6000_BTM_FRSQRTES, false, false },
29843 { "popcntd", RS6000_BTM_POPCNTD, false, false },
29844 { "cell", RS6000_BTM_CELL, false, false },
29845 { "power8-vector", RS6000_BTM_P8_VECTOR, false, false },
29846 { "crypto", RS6000_BTM_CRYPTO, false, false },
29847 { "htm", RS6000_BTM_HTM, false, false },
29848 };
29849
29850 /* Option variables that we want to support inside attribute((target)) and
29851 #pragma GCC target operations. */
29852
29853 struct rs6000_opt_var {
29854 const char *name; /* option name */
29855 size_t global_offset; /* offset of the option in global_options. */
29856 size_t target_offset; /* offset of the option in target optiosn. */
29857 };
29858
29859 static struct rs6000_opt_var const rs6000_opt_vars[] =
29860 {
29861 { "friz",
29862 offsetof (struct gcc_options, x_TARGET_FRIZ),
29863 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
29864 { "avoid-indexed-addresses",
29865 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
29866 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
29867 { "paired",
29868 offsetof (struct gcc_options, x_rs6000_paired_float),
29869 offsetof (struct cl_target_option, x_rs6000_paired_float), },
29870 { "longcall",
29871 offsetof (struct gcc_options, x_rs6000_default_long_calls),
29872 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
29873 };
29874
29875 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
29876 parsing. Return true if there were no errors. */
29877
29878 static bool
29879 rs6000_inner_target_options (tree args, bool attr_p)
29880 {
29881 bool ret = true;
29882
29883 if (args == NULL_TREE)
29884 ;
29885
29886 else if (TREE_CODE (args) == STRING_CST)
29887 {
29888 char *p = ASTRDUP (TREE_STRING_POINTER (args));
29889 char *q;
29890
29891 while ((q = strtok (p, ",")) != NULL)
29892 {
29893 bool error_p = false;
29894 bool not_valid_p = false;
29895 const char *cpu_opt = NULL;
29896
29897 p = NULL;
29898 if (strncmp (q, "cpu=", 4) == 0)
29899 {
29900 int cpu_index = rs6000_cpu_name_lookup (q+4);
29901 if (cpu_index >= 0)
29902 rs6000_cpu_index = cpu_index;
29903 else
29904 {
29905 error_p = true;
29906 cpu_opt = q+4;
29907 }
29908 }
29909 else if (strncmp (q, "tune=", 5) == 0)
29910 {
29911 int tune_index = rs6000_cpu_name_lookup (q+5);
29912 if (tune_index >= 0)
29913 rs6000_tune_index = tune_index;
29914 else
29915 {
29916 error_p = true;
29917 cpu_opt = q+5;
29918 }
29919 }
29920 else
29921 {
29922 size_t i;
29923 bool invert = false;
29924 char *r = q;
29925
29926 error_p = true;
29927 if (strncmp (r, "no-", 3) == 0)
29928 {
29929 invert = true;
29930 r += 3;
29931 }
29932
29933 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
29934 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
29935 {
29936 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
29937
29938 if (!rs6000_opt_masks[i].valid_target)
29939 not_valid_p = true;
29940 else
29941 {
29942 error_p = false;
29943 rs6000_isa_flags_explicit |= mask;
29944
29945 /* VSX needs altivec, so -mvsx automagically sets
29946 altivec. */
29947 if (mask == OPTION_MASK_VSX && !invert)
29948 mask |= OPTION_MASK_ALTIVEC;
29949
29950 if (rs6000_opt_masks[i].invert)
29951 invert = !invert;
29952
29953 if (invert)
29954 rs6000_isa_flags &= ~mask;
29955 else
29956 rs6000_isa_flags |= mask;
29957 }
29958 break;
29959 }
29960
29961 if (error_p && !not_valid_p)
29962 {
29963 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
29964 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
29965 {
29966 size_t j = rs6000_opt_vars[i].global_offset;
29967 *((int *) ((char *)&global_options + j)) = !invert;
29968 error_p = false;
29969 break;
29970 }
29971 }
29972 }
29973
29974 if (error_p)
29975 {
29976 const char *eprefix, *esuffix;
29977
29978 ret = false;
29979 if (attr_p)
29980 {
29981 eprefix = "__attribute__((__target__(";
29982 esuffix = ")))";
29983 }
29984 else
29985 {
29986 eprefix = "#pragma GCC target ";
29987 esuffix = "";
29988 }
29989
29990 if (cpu_opt)
29991 error ("invalid cpu \"%s\" for %s\"%s\"%s", cpu_opt, eprefix,
29992 q, esuffix);
29993 else if (not_valid_p)
29994 error ("%s\"%s\"%s is not allowed", eprefix, q, esuffix);
29995 else
29996 error ("%s\"%s\"%s is invalid", eprefix, q, esuffix);
29997 }
29998 }
29999 }
30000
30001 else if (TREE_CODE (args) == TREE_LIST)
30002 {
30003 do
30004 {
30005 tree value = TREE_VALUE (args);
30006 if (value)
30007 {
30008 bool ret2 = rs6000_inner_target_options (value, attr_p);
30009 if (!ret2)
30010 ret = false;
30011 }
30012 args = TREE_CHAIN (args);
30013 }
30014 while (args != NULL_TREE);
30015 }
30016
30017 else
30018 gcc_unreachable ();
30019
30020 return ret;
30021 }
30022
30023 /* Print out the target options as a list for -mdebug=target. */
30024
30025 static void
30026 rs6000_debug_target_options (tree args, const char *prefix)
30027 {
30028 if (args == NULL_TREE)
30029 fprintf (stderr, "%s<NULL>", prefix);
30030
30031 else if (TREE_CODE (args) == STRING_CST)
30032 {
30033 char *p = ASTRDUP (TREE_STRING_POINTER (args));
30034 char *q;
30035
30036 while ((q = strtok (p, ",")) != NULL)
30037 {
30038 p = NULL;
30039 fprintf (stderr, "%s\"%s\"", prefix, q);
30040 prefix = ", ";
30041 }
30042 }
30043
30044 else if (TREE_CODE (args) == TREE_LIST)
30045 {
30046 do
30047 {
30048 tree value = TREE_VALUE (args);
30049 if (value)
30050 {
30051 rs6000_debug_target_options (value, prefix);
30052 prefix = ", ";
30053 }
30054 args = TREE_CHAIN (args);
30055 }
30056 while (args != NULL_TREE);
30057 }
30058
30059 else
30060 gcc_unreachable ();
30061
30062 return;
30063 }
30064
30065 \f
30066 /* Hook to validate attribute((target("..."))). */
30067
30068 static bool
30069 rs6000_valid_attribute_p (tree fndecl,
30070 tree ARG_UNUSED (name),
30071 tree args,
30072 int flags)
30073 {
30074 struct cl_target_option cur_target;
30075 bool ret;
30076 tree old_optimize = build_optimization_node (&global_options);
30077 tree new_target, new_optimize;
30078 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
30079
30080 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
30081
30082 if (TARGET_DEBUG_TARGET)
30083 {
30084 tree tname = DECL_NAME (fndecl);
30085 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
30086 if (tname)
30087 fprintf (stderr, "function: %.*s\n",
30088 (int) IDENTIFIER_LENGTH (tname),
30089 IDENTIFIER_POINTER (tname));
30090 else
30091 fprintf (stderr, "function: unknown\n");
30092
30093 fprintf (stderr, "args:");
30094 rs6000_debug_target_options (args, " ");
30095 fprintf (stderr, "\n");
30096
30097 if (flags)
30098 fprintf (stderr, "flags: 0x%x\n", flags);
30099
30100 fprintf (stderr, "--------------------\n");
30101 }
30102
30103 old_optimize = build_optimization_node (&global_options);
30104 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
30105
30106 /* If the function changed the optimization levels as well as setting target
30107 options, start with the optimizations specified. */
30108 if (func_optimize && func_optimize != old_optimize)
30109 cl_optimization_restore (&global_options,
30110 TREE_OPTIMIZATION (func_optimize));
30111
30112 /* The target attributes may also change some optimization flags, so update
30113 the optimization options if necessary. */
30114 cl_target_option_save (&cur_target, &global_options);
30115 rs6000_cpu_index = rs6000_tune_index = -1;
30116 ret = rs6000_inner_target_options (args, true);
30117
30118 /* Set up any additional state. */
30119 if (ret)
30120 {
30121 ret = rs6000_option_override_internal (false);
30122 new_target = build_target_option_node (&global_options);
30123 }
30124 else
30125 new_target = NULL;
30126
30127 new_optimize = build_optimization_node (&global_options);
30128
30129 if (!new_target)
30130 ret = false;
30131
30132 else if (fndecl)
30133 {
30134 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
30135
30136 if (old_optimize != new_optimize)
30137 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
30138 }
30139
30140 cl_target_option_restore (&global_options, &cur_target);
30141
30142 if (old_optimize != new_optimize)
30143 cl_optimization_restore (&global_options,
30144 TREE_OPTIMIZATION (old_optimize));
30145
30146 return ret;
30147 }
30148
30149 \f
30150 /* Hook to validate the current #pragma GCC target and set the state, and
30151 update the macros based on what was changed. If ARGS is NULL, then
30152 POP_TARGET is used to reset the options. */
30153
30154 bool
30155 rs6000_pragma_target_parse (tree args, tree pop_target)
30156 {
30157 tree prev_tree = build_target_option_node (&global_options);
30158 tree cur_tree;
30159 struct cl_target_option *prev_opt, *cur_opt;
30160 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
30161 HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask;
30162
30163 if (TARGET_DEBUG_TARGET)
30164 {
30165 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
30166 fprintf (stderr, "args:");
30167 rs6000_debug_target_options (args, " ");
30168 fprintf (stderr, "\n");
30169
30170 if (pop_target)
30171 {
30172 fprintf (stderr, "pop_target:\n");
30173 debug_tree (pop_target);
30174 }
30175 else
30176 fprintf (stderr, "pop_target: <NULL>\n");
30177
30178 fprintf (stderr, "--------------------\n");
30179 }
30180
30181 if (! args)
30182 {
30183 cur_tree = ((pop_target)
30184 ? pop_target
30185 : target_option_default_node);
30186 cl_target_option_restore (&global_options,
30187 TREE_TARGET_OPTION (cur_tree));
30188 }
30189 else
30190 {
30191 rs6000_cpu_index = rs6000_tune_index = -1;
30192 if (!rs6000_inner_target_options (args, false)
30193 || !rs6000_option_override_internal (false)
30194 || (cur_tree = build_target_option_node (&global_options))
30195 == NULL_TREE)
30196 {
30197 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
30198 fprintf (stderr, "invalid pragma\n");
30199
30200 return false;
30201 }
30202 }
30203
30204 target_option_current_node = cur_tree;
30205
30206 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
30207 change the macros that are defined. */
30208 if (rs6000_target_modify_macros_ptr)
30209 {
30210 prev_opt = TREE_TARGET_OPTION (prev_tree);
30211 prev_bumask = prev_opt->x_rs6000_builtin_mask;
30212 prev_flags = prev_opt->x_rs6000_isa_flags;
30213
30214 cur_opt = TREE_TARGET_OPTION (cur_tree);
30215 cur_flags = cur_opt->x_rs6000_isa_flags;
30216 cur_bumask = cur_opt->x_rs6000_builtin_mask;
30217
30218 diff_bumask = (prev_bumask ^ cur_bumask);
30219 diff_flags = (prev_flags ^ cur_flags);
30220
30221 if ((diff_flags != 0) || (diff_bumask != 0))
30222 {
30223 /* Delete old macros. */
30224 rs6000_target_modify_macros_ptr (false,
30225 prev_flags & diff_flags,
30226 prev_bumask & diff_bumask);
30227
30228 /* Define new macros. */
30229 rs6000_target_modify_macros_ptr (true,
30230 cur_flags & diff_flags,
30231 cur_bumask & diff_bumask);
30232 }
30233 }
30234
30235 return true;
30236 }
30237
30238 \f
30239 /* Remember the last target of rs6000_set_current_function. */
30240 static GTY(()) tree rs6000_previous_fndecl;
30241
30242 /* Establish appropriate back-end context for processing the function
30243 FNDECL. The argument might be NULL to indicate processing at top
30244 level, outside of any function scope. */
30245 static void
30246 rs6000_set_current_function (tree fndecl)
30247 {
30248 tree old_tree = (rs6000_previous_fndecl
30249 ? DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl)
30250 : NULL_TREE);
30251
30252 tree new_tree = (fndecl
30253 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
30254 : NULL_TREE);
30255
30256 if (TARGET_DEBUG_TARGET)
30257 {
30258 bool print_final = false;
30259 fprintf (stderr, "\n==================== rs6000_set_current_function");
30260
30261 if (fndecl)
30262 fprintf (stderr, ", fndecl %s (%p)",
30263 (DECL_NAME (fndecl)
30264 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
30265 : "<unknown>"), (void *)fndecl);
30266
30267 if (rs6000_previous_fndecl)
30268 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
30269
30270 fprintf (stderr, "\n");
30271 if (new_tree)
30272 {
30273 fprintf (stderr, "\nnew fndecl target specific options:\n");
30274 debug_tree (new_tree);
30275 print_final = true;
30276 }
30277
30278 if (old_tree)
30279 {
30280 fprintf (stderr, "\nold fndecl target specific options:\n");
30281 debug_tree (old_tree);
30282 print_final = true;
30283 }
30284
30285 if (print_final)
30286 fprintf (stderr, "--------------------\n");
30287 }
30288
30289 /* Only change the context if the function changes. This hook is called
30290 several times in the course of compiling a function, and we don't want to
30291 slow things down too much or call target_reinit when it isn't safe. */
30292 if (fndecl && fndecl != rs6000_previous_fndecl)
30293 {
30294 rs6000_previous_fndecl = fndecl;
30295 if (old_tree == new_tree)
30296 ;
30297
30298 else if (new_tree)
30299 {
30300 cl_target_option_restore (&global_options,
30301 TREE_TARGET_OPTION (new_tree));
30302 target_reinit ();
30303 }
30304
30305 else if (old_tree)
30306 {
30307 struct cl_target_option *def
30308 = TREE_TARGET_OPTION (target_option_current_node);
30309
30310 cl_target_option_restore (&global_options, def);
30311 target_reinit ();
30312 }
30313 }
30314 }
30315
30316 \f
30317 /* Save the current options */
30318
30319 static void
30320 rs6000_function_specific_save (struct cl_target_option *ptr,
30321 struct gcc_options *opts)
30322 {
30323 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
30324 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
30325 }
30326
30327 /* Restore the current options */
30328
30329 static void
30330 rs6000_function_specific_restore (struct gcc_options *opts,
30331 struct cl_target_option *ptr)
30332
30333 {
30334 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
30335 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
30336 (void) rs6000_option_override_internal (false);
30337 }
30338
30339 /* Print the current options */
30340
30341 static void
30342 rs6000_function_specific_print (FILE *file, int indent,
30343 struct cl_target_option *ptr)
30344 {
30345 rs6000_print_isa_options (file, indent, "Isa options set",
30346 ptr->x_rs6000_isa_flags);
30347
30348 rs6000_print_isa_options (file, indent, "Isa options explicit",
30349 ptr->x_rs6000_isa_flags_explicit);
30350 }
30351
30352 /* Helper function to print the current isa or misc options on a line. */
30353
30354 static void
30355 rs6000_print_options_internal (FILE *file,
30356 int indent,
30357 const char *string,
30358 HOST_WIDE_INT flags,
30359 const char *prefix,
30360 const struct rs6000_opt_mask *opts,
30361 size_t num_elements)
30362 {
30363 size_t i;
30364 size_t start_column = 0;
30365 size_t cur_column;
30366 size_t max_column = 76;
30367 const char *comma = "";
30368
30369 if (indent)
30370 start_column += fprintf (file, "%*s", indent, "");
30371
30372 if (!flags)
30373 {
30374 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
30375 return;
30376 }
30377
30378 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
30379
30380 /* Print the various mask options. */
30381 cur_column = start_column;
30382 for (i = 0; i < num_elements; i++)
30383 {
30384 if ((flags & opts[i].mask) != 0)
30385 {
30386 const char *no_str = rs6000_opt_masks[i].invert ? "no-" : "";
30387 size_t len = (strlen (comma)
30388 + strlen (prefix)
30389 + strlen (no_str)
30390 + strlen (rs6000_opt_masks[i].name));
30391
30392 cur_column += len;
30393 if (cur_column > max_column)
30394 {
30395 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
30396 cur_column = start_column + len;
30397 comma = "";
30398 }
30399
30400 fprintf (file, "%s%s%s%s", comma, prefix, no_str,
30401 rs6000_opt_masks[i].name);
30402 flags &= ~ opts[i].mask;
30403 comma = ", ";
30404 }
30405 }
30406
30407 fputs ("\n", file);
30408 }
30409
30410 /* Helper function to print the current isa options on a line. */
30411
30412 static void
30413 rs6000_print_isa_options (FILE *file, int indent, const char *string,
30414 HOST_WIDE_INT flags)
30415 {
30416 rs6000_print_options_internal (file, indent, string, flags, "-m",
30417 &rs6000_opt_masks[0],
30418 ARRAY_SIZE (rs6000_opt_masks));
30419 }
30420
30421 static void
30422 rs6000_print_builtin_options (FILE *file, int indent, const char *string,
30423 HOST_WIDE_INT flags)
30424 {
30425 rs6000_print_options_internal (file, indent, string, flags, "",
30426 &rs6000_builtin_mask_names[0],
30427 ARRAY_SIZE (rs6000_builtin_mask_names));
30428 }
30429
30430 \f
30431 /* Hook to determine if one function can safely inline another. */
30432
30433 static bool
30434 rs6000_can_inline_p (tree caller, tree callee)
30435 {
30436 bool ret = false;
30437 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
30438 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
30439
30440 /* If callee has no option attributes, then it is ok to inline. */
30441 if (!callee_tree)
30442 ret = true;
30443
30444 /* If caller has no option attributes, but callee does then it is not ok to
30445 inline. */
30446 else if (!caller_tree)
30447 ret = false;
30448
30449 else
30450 {
30451 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
30452 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
30453
30454 /* Callee's options should a subset of the caller's, i.e. a vsx function
30455 can inline an altivec function but a non-vsx function can't inline a
30456 vsx function. */
30457 if ((caller_opts->x_rs6000_isa_flags & callee_opts->x_rs6000_isa_flags)
30458 == callee_opts->x_rs6000_isa_flags)
30459 ret = true;
30460 }
30461
30462 if (TARGET_DEBUG_TARGET)
30463 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
30464 (DECL_NAME (caller)
30465 ? IDENTIFIER_POINTER (DECL_NAME (caller))
30466 : "<unknown>"),
30467 (DECL_NAME (callee)
30468 ? IDENTIFIER_POINTER (DECL_NAME (callee))
30469 : "<unknown>"),
30470 (ret ? "can" : "cannot"));
30471
30472 return ret;
30473 }
30474 \f
30475 /* Allocate a stack temp and fixup the address so it meets the particular
30476 memory requirements (either offetable or REG+REG addressing). */
30477
30478 rtx
30479 rs6000_allocate_stack_temp (enum machine_mode mode,
30480 bool offsettable_p,
30481 bool reg_reg_p)
30482 {
30483 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
30484 rtx addr = XEXP (stack, 0);
30485 int strict_p = (reload_in_progress || reload_completed);
30486
30487 if (!legitimate_indirect_address_p (addr, strict_p))
30488 {
30489 if (offsettable_p
30490 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
30491 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
30492
30493 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
30494 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
30495 }
30496
30497 return stack;
30498 }
30499
30500 /* Given a memory reference, if it is not a reg or reg+reg addressing, convert
30501 to such a form to deal with memory reference instructions like STFIWX that
30502 only take reg+reg addressing. */
30503
30504 rtx
30505 rs6000_address_for_fpconvert (rtx x)
30506 {
30507 int strict_p = (reload_in_progress || reload_completed);
30508 rtx addr;
30509
30510 gcc_assert (MEM_P (x));
30511 addr = XEXP (x, 0);
30512 if (! legitimate_indirect_address_p (addr, strict_p)
30513 && ! legitimate_indexed_address_p (addr, strict_p))
30514 {
30515 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
30516 {
30517 rtx reg = XEXP (addr, 0);
30518 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
30519 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
30520 gcc_assert (REG_P (reg));
30521 emit_insn (gen_add3_insn (reg, reg, size_rtx));
30522 addr = reg;
30523 }
30524 else if (GET_CODE (addr) == PRE_MODIFY)
30525 {
30526 rtx reg = XEXP (addr, 0);
30527 rtx expr = XEXP (addr, 1);
30528 gcc_assert (REG_P (reg));
30529 gcc_assert (GET_CODE (expr) == PLUS);
30530 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
30531 addr = reg;
30532 }
30533
30534 x = replace_equiv_address (x, copy_addr_to_reg (addr));
30535 }
30536
30537 return x;
30538 }
30539
30540 /* Given a memory reference, if it is not in the form for altivec memory
30541 reference instructions (i.e. reg or reg+reg addressing with AND of -16),
30542 convert to the altivec format. */
30543
30544 rtx
30545 rs6000_address_for_altivec (rtx x)
30546 {
30547 gcc_assert (MEM_P (x));
30548 if (!altivec_indexed_or_indirect_operand (x, GET_MODE (x)))
30549 {
30550 rtx addr = XEXP (x, 0);
30551 int strict_p = (reload_in_progress || reload_completed);
30552
30553 if (!legitimate_indexed_address_p (addr, strict_p)
30554 && !legitimate_indirect_address_p (addr, strict_p))
30555 addr = copy_to_mode_reg (Pmode, addr);
30556
30557 addr = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
30558 x = change_address (x, GET_MODE (x), addr);
30559 }
30560
30561 return x;
30562 }
30563
30564 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
30565
30566 On the RS/6000, all integer constants are acceptable, most won't be valid
30567 for particular insns, though. Only easy FP constants are acceptable. */
30568
30569 static bool
30570 rs6000_legitimate_constant_p (enum machine_mode mode, rtx x)
30571 {
30572 if (TARGET_ELF && rs6000_tls_referenced_p (x))
30573 return false;
30574
30575 return ((GET_CODE (x) != CONST_DOUBLE && GET_CODE (x) != CONST_VECTOR)
30576 || GET_MODE (x) == VOIDmode
30577 || (TARGET_POWERPC64 && mode == DImode)
30578 || easy_fp_constant (x, mode)
30579 || easy_vector_constant (x, mode));
30580 }
30581
30582 \f
30583 /* A function pointer under AIX is a pointer to a data area whose first word
30584 contains the actual address of the function, whose second word contains a
30585 pointer to its TOC, and whose third word contains a value to place in the
30586 static chain register (r11). Note that if we load the static chain, our
30587 "trampoline" need not have any executable code. */
30588
30589 void
30590 rs6000_call_indirect_aix (rtx value, rtx func_desc, rtx flag)
30591 {
30592 rtx func_addr;
30593 rtx toc_reg;
30594 rtx sc_reg;
30595 rtx stack_ptr;
30596 rtx stack_toc_offset;
30597 rtx stack_toc_mem;
30598 rtx func_toc_offset;
30599 rtx func_toc_mem;
30600 rtx func_sc_offset;
30601 rtx func_sc_mem;
30602 rtx insn;
30603 rtx (*call_func) (rtx, rtx, rtx, rtx);
30604 rtx (*call_value_func) (rtx, rtx, rtx, rtx, rtx);
30605
30606 stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
30607 toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
30608
30609 /* Load up address of the actual function. */
30610 func_desc = force_reg (Pmode, func_desc);
30611 func_addr = gen_reg_rtx (Pmode);
30612 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func_desc));
30613
30614 if (TARGET_32BIT)
30615 {
30616
30617 stack_toc_offset = GEN_INT (TOC_SAVE_OFFSET_32BIT);
30618 func_toc_offset = GEN_INT (AIX_FUNC_DESC_TOC_32BIT);
30619 func_sc_offset = GEN_INT (AIX_FUNC_DESC_SC_32BIT);
30620 if (TARGET_POINTERS_TO_NESTED_FUNCTIONS)
30621 {
30622 call_func = gen_call_indirect_aix32bit;
30623 call_value_func = gen_call_value_indirect_aix32bit;
30624 }
30625 else
30626 {
30627 call_func = gen_call_indirect_aix32bit_nor11;
30628 call_value_func = gen_call_value_indirect_aix32bit_nor11;
30629 }
30630 }
30631 else
30632 {
30633 stack_toc_offset = GEN_INT (TOC_SAVE_OFFSET_64BIT);
30634 func_toc_offset = GEN_INT (AIX_FUNC_DESC_TOC_64BIT);
30635 func_sc_offset = GEN_INT (AIX_FUNC_DESC_SC_64BIT);
30636 if (TARGET_POINTERS_TO_NESTED_FUNCTIONS)
30637 {
30638 call_func = gen_call_indirect_aix64bit;
30639 call_value_func = gen_call_value_indirect_aix64bit;
30640 }
30641 else
30642 {
30643 call_func = gen_call_indirect_aix64bit_nor11;
30644 call_value_func = gen_call_value_indirect_aix64bit_nor11;
30645 }
30646 }
30647
30648 /* Reserved spot to store the TOC. */
30649 stack_toc_mem = gen_frame_mem (Pmode,
30650 gen_rtx_PLUS (Pmode,
30651 stack_ptr,
30652 stack_toc_offset));
30653
30654 gcc_assert (cfun);
30655 gcc_assert (cfun->machine);
30656
30657 /* Can we optimize saving the TOC in the prologue or do we need to do it at
30658 every call? */
30659 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
30660 cfun->machine->save_toc_in_prologue = true;
30661
30662 else
30663 {
30664 MEM_VOLATILE_P (stack_toc_mem) = 1;
30665 emit_move_insn (stack_toc_mem, toc_reg);
30666 }
30667
30668 /* Calculate the address to load the TOC of the called function. We don't
30669 actually load this until the split after reload. */
30670 func_toc_mem = gen_rtx_MEM (Pmode,
30671 gen_rtx_PLUS (Pmode,
30672 func_desc,
30673 func_toc_offset));
30674
30675 /* If we have a static chain, load it up. */
30676 if (TARGET_POINTERS_TO_NESTED_FUNCTIONS)
30677 {
30678 func_sc_mem = gen_rtx_MEM (Pmode,
30679 gen_rtx_PLUS (Pmode,
30680 func_desc,
30681 func_sc_offset));
30682
30683 sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
30684 emit_move_insn (sc_reg, func_sc_mem);
30685 }
30686
30687 /* Create the call. */
30688 if (value)
30689 insn = call_value_func (value, func_addr, flag, func_toc_mem,
30690 stack_toc_mem);
30691 else
30692 insn = call_func (func_addr, flag, func_toc_mem, stack_toc_mem);
30693
30694 emit_call_insn (insn);
30695 }
30696
30697 /* Return whether we need to always update the saved TOC pointer when we update
30698 the stack pointer. */
30699
30700 static bool
30701 rs6000_save_toc_in_prologue_p (void)
30702 {
30703 return (cfun && cfun->machine && cfun->machine->save_toc_in_prologue);
30704 }
30705
30706 #ifdef HAVE_GAS_HIDDEN
30707 # define USE_HIDDEN_LINKONCE 1
30708 #else
30709 # define USE_HIDDEN_LINKONCE 0
30710 #endif
30711
30712 /* Fills in the label name that should be used for a 476 link stack thunk. */
30713
30714 void
30715 get_ppc476_thunk_name (char name[32])
30716 {
30717 gcc_assert (TARGET_LINK_STACK);
30718
30719 if (USE_HIDDEN_LINKONCE)
30720 sprintf (name, "__ppc476.get_thunk");
30721 else
30722 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
30723 }
30724
30725 /* This function emits the simple thunk routine that is used to preserve
30726 the link stack on the 476 cpu. */
30727
30728 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
30729 static void
30730 rs6000_code_end (void)
30731 {
30732 char name[32];
30733 tree decl;
30734
30735 if (!TARGET_LINK_STACK)
30736 return;
30737
30738 get_ppc476_thunk_name (name);
30739
30740 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
30741 build_function_type_list (void_type_node, NULL_TREE));
30742 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
30743 NULL_TREE, void_type_node);
30744 TREE_PUBLIC (decl) = 1;
30745 TREE_STATIC (decl) = 1;
30746
30747 #if RS6000_WEAK
30748 if (USE_HIDDEN_LINKONCE)
30749 {
30750 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
30751 targetm.asm_out.unique_section (decl, 0);
30752 switch_to_section (get_named_section (decl, NULL, 0));
30753 DECL_WEAK (decl) = 1;
30754 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
30755 targetm.asm_out.globalize_label (asm_out_file, name);
30756 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
30757 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
30758 }
30759 else
30760 #endif
30761 {
30762 switch_to_section (text_section);
30763 ASM_OUTPUT_LABEL (asm_out_file, name);
30764 }
30765
30766 DECL_INITIAL (decl) = make_node (BLOCK);
30767 current_function_decl = decl;
30768 init_function_start (decl);
30769 first_function_block_is_cold = false;
30770 /* Make sure unwind info is emitted for the thunk if needed. */
30771 final_start_function (emit_barrier (), asm_out_file, 1);
30772
30773 fputs ("\tblr\n", asm_out_file);
30774
30775 final_end_function ();
30776 init_insn_lengths ();
30777 free_after_compilation (cfun);
30778 set_cfun (NULL);
30779 current_function_decl = NULL;
30780 }
30781
30782 /* Add r30 to hard reg set if the prologue sets it up and it is not
30783 pic_offset_table_rtx. */
30784
30785 static void
30786 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
30787 {
30788 if (!TARGET_SINGLE_PIC_BASE
30789 && TARGET_TOC
30790 && TARGET_MINIMAL_TOC
30791 && get_pool_size () != 0)
30792 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
30793 }
30794
30795 \f
30796 /* Helper function for rs6000_split_logical to emit a logical instruction after
30797 spliting the operation to single GPR registers.
30798
30799 DEST is the destination register.
30800 OP1 and OP2 are the input source registers.
30801 CODE is the base operation (AND, IOR, XOR, NOT).
30802 MODE is the machine mode.
30803 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
30804 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
30805 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
30806 CLOBBER_REG is either NULL or a scratch register of type CC to allow
30807 formation of the AND instructions. */
30808
30809 static void
30810 rs6000_split_logical_inner (rtx dest,
30811 rtx op1,
30812 rtx op2,
30813 enum rtx_code code,
30814 enum machine_mode mode,
30815 bool complement_final_p,
30816 bool complement_op1_p,
30817 bool complement_op2_p,
30818 rtx clobber_reg)
30819 {
30820 rtx bool_rtx;
30821 rtx set_rtx;
30822
30823 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
30824 if (op2 && GET_CODE (op2) == CONST_INT
30825 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
30826 && !complement_final_p && !complement_op1_p && !complement_op2_p)
30827 {
30828 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
30829 HOST_WIDE_INT value = INTVAL (op2) & mask;
30830
30831 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
30832 if (code == AND)
30833 {
30834 if (value == 0)
30835 {
30836 emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
30837 return;
30838 }
30839
30840 else if (value == mask)
30841 {
30842 if (!rtx_equal_p (dest, op1))
30843 emit_insn (gen_rtx_SET (VOIDmode, dest, op1));
30844 return;
30845 }
30846 }
30847
30848 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
30849 into separate ORI/ORIS or XORI/XORIS instrucitons. */
30850 else if (code == IOR || code == XOR)
30851 {
30852 if (value == 0)
30853 {
30854 if (!rtx_equal_p (dest, op1))
30855 emit_insn (gen_rtx_SET (VOIDmode, dest, op1));
30856 return;
30857 }
30858 }
30859 }
30860
30861 if (complement_op1_p)
30862 op1 = gen_rtx_NOT (mode, op1);
30863
30864 if (complement_op2_p)
30865 op2 = gen_rtx_NOT (mode, op2);
30866
30867 bool_rtx = ((code == NOT)
30868 ? gen_rtx_NOT (mode, op1)
30869 : gen_rtx_fmt_ee (code, mode, op1, op2));
30870
30871 if (complement_final_p)
30872 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
30873
30874 set_rtx = gen_rtx_SET (VOIDmode, dest, bool_rtx);
30875
30876 /* Is this AND with an explicit clobber? */
30877 if (clobber_reg)
30878 {
30879 rtx clobber = gen_rtx_CLOBBER (VOIDmode, clobber_reg);
30880 set_rtx = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set_rtx, clobber));
30881 }
30882
30883 emit_insn (set_rtx);
30884 return;
30885 }
30886
30887 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
30888 operations are split immediately during RTL generation to allow for more
30889 optimizations of the AND/IOR/XOR.
30890
30891 OPERANDS is an array containing the destination and two input operands.
30892 CODE is the base operation (AND, IOR, XOR, NOT).
30893 MODE is the machine mode.
30894 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
30895 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
30896 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
30897 CLOBBER_REG is either NULL or a scratch register of type CC to allow
30898 formation of the AND instructions. */
30899
30900 static void
30901 rs6000_split_logical_di (rtx operands[3],
30902 enum rtx_code code,
30903 bool complement_final_p,
30904 bool complement_op1_p,
30905 bool complement_op2_p,
30906 rtx clobber_reg)
30907 {
30908 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
30909 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
30910 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
30911 enum hi_lo { hi = 0, lo = 1 };
30912 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
30913 size_t i;
30914
30915 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
30916 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
30917 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
30918 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
30919
30920 if (code == NOT)
30921 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
30922 else
30923 {
30924 if (GET_CODE (operands[2]) != CONST_INT)
30925 {
30926 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
30927 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
30928 }
30929 else
30930 {
30931 HOST_WIDE_INT value = INTVAL (operands[2]);
30932 HOST_WIDE_INT value_hi_lo[2];
30933
30934 gcc_assert (!complement_final_p);
30935 gcc_assert (!complement_op1_p);
30936 gcc_assert (!complement_op2_p);
30937
30938 value_hi_lo[hi] = value >> 32;
30939 value_hi_lo[lo] = value & lower_32bits;
30940
30941 for (i = 0; i < 2; i++)
30942 {
30943 HOST_WIDE_INT sub_value = value_hi_lo[i];
30944
30945 if (sub_value & sign_bit)
30946 sub_value |= upper_32bits;
30947
30948 op2_hi_lo[i] = GEN_INT (sub_value);
30949
30950 /* If this is an AND instruction, check to see if we need to load
30951 the value in a register. */
30952 if (code == AND && sub_value != -1 && sub_value != 0
30953 && !and_operand (op2_hi_lo[i], SImode))
30954 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
30955 }
30956 }
30957 }
30958
30959 for (i = 0; i < 2; i++)
30960 {
30961 /* Split large IOR/XOR operations. */
30962 if ((code == IOR || code == XOR)
30963 && GET_CODE (op2_hi_lo[i]) == CONST_INT
30964 && !complement_final_p
30965 && !complement_op1_p
30966 && !complement_op2_p
30967 && clobber_reg == NULL_RTX
30968 && !logical_const_operand (op2_hi_lo[i], SImode))
30969 {
30970 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
30971 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
30972 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
30973 rtx tmp = gen_reg_rtx (SImode);
30974
30975 /* Make sure the constant is sign extended. */
30976 if ((hi_16bits & sign_bit) != 0)
30977 hi_16bits |= upper_32bits;
30978
30979 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
30980 code, SImode, false, false, false,
30981 NULL_RTX);
30982
30983 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
30984 code, SImode, false, false, false,
30985 NULL_RTX);
30986 }
30987 else
30988 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
30989 code, SImode, complement_final_p,
30990 complement_op1_p, complement_op2_p,
30991 clobber_reg);
30992 }
30993
30994 return;
30995 }
30996
30997 /* Split the insns that make up boolean operations operating on multiple GPR
30998 registers. The boolean MD patterns ensure that the inputs either are
30999 exactly the same as the output registers, or there is no overlap.
31000
31001 OPERANDS is an array containing the destination and two input operands.
31002 CODE is the base operation (AND, IOR, XOR, NOT).
31003 MODE is the machine mode.
31004 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
31005 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
31006 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
31007 CLOBBER_REG is either NULL or a scratch register of type CC to allow
31008 formation of the AND instructions. */
31009
31010 void
31011 rs6000_split_logical (rtx operands[3],
31012 enum rtx_code code,
31013 bool complement_final_p,
31014 bool complement_op1_p,
31015 bool complement_op2_p,
31016 rtx clobber_reg)
31017 {
31018 enum machine_mode mode = GET_MODE (operands[0]);
31019 enum machine_mode sub_mode;
31020 rtx op0, op1, op2;
31021 int sub_size, regno0, regno1, nregs, i;
31022
31023 /* If this is DImode, use the specialized version that can run before
31024 register allocation. */
31025 if (mode == DImode && !TARGET_POWERPC64)
31026 {
31027 rs6000_split_logical_di (operands, code, complement_final_p,
31028 complement_op1_p, complement_op2_p,
31029 clobber_reg);
31030 return;
31031 }
31032
31033 op0 = operands[0];
31034 op1 = operands[1];
31035 op2 = (code == NOT) ? NULL_RTX : operands[2];
31036 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
31037 sub_size = GET_MODE_SIZE (sub_mode);
31038 regno0 = REGNO (op0);
31039 regno1 = REGNO (op1);
31040
31041 gcc_assert (reload_completed);
31042 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
31043 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
31044
31045 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
31046 gcc_assert (nregs > 1);
31047
31048 if (op2 && REG_P (op2))
31049 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
31050
31051 for (i = 0; i < nregs; i++)
31052 {
31053 int offset = i * sub_size;
31054 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
31055 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
31056 rtx sub_op2 = ((code == NOT)
31057 ? NULL_RTX
31058 : simplify_subreg (sub_mode, op2, mode, offset));
31059
31060 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
31061 complement_final_p, complement_op1_p,
31062 complement_op2_p, clobber_reg);
31063 }
31064
31065 return;
31066 }
31067
31068 \f
31069 /* Return true if the peephole2 can combine a load involving a combination of
31070 an addis instruction and a load with an offset that can be fused together on
31071 a power8.
31072
31073 The operands are:
31074 operands[0] register set with addis
31075 operands[1] value set via addis
31076 operands[2] target register being loaded
31077 operands[3] D-form memory reference using operands[0].
31078
31079 In addition, we are passed a boolean that is true if this is a peephole2,
31080 and we can use see if the addis_reg is dead after the insn and can be
31081 replaced by the target register. */
31082
31083 bool
31084 fusion_gpr_load_p (rtx *operands, bool peep2_p)
31085 {
31086 rtx addis_reg = operands[0];
31087 rtx addis_value = operands[1];
31088 rtx target = operands[2];
31089 rtx mem = operands[3];
31090 rtx addr;
31091 rtx base_reg;
31092
31093 /* Validate arguments. */
31094 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
31095 return false;
31096
31097 if (!base_reg_operand (target, GET_MODE (target)))
31098 return false;
31099
31100 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
31101 return false;
31102
31103 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
31104 return false;
31105
31106 /* Allow sign/zero extension. */
31107 if (GET_CODE (mem) == ZERO_EXTEND
31108 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
31109 mem = XEXP (mem, 0);
31110
31111 if (!MEM_P (mem))
31112 return false;
31113
31114 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
31115 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
31116 return false;
31117
31118 /* Validate that the register used to load the high value is either the
31119 register being loaded, or we can safely replace its use in a peephole2.
31120
31121 If this is a peephole2, we assume that there are 2 instructions in the
31122 peephole (addis and load), so we want to check if the target register was
31123 not used in the memory address and the register to hold the addis result
31124 is dead after the peephole. */
31125 if (REGNO (addis_reg) != REGNO (target))
31126 {
31127 if (!peep2_p)
31128 return false;
31129
31130 if (reg_mentioned_p (target, mem))
31131 return false;
31132
31133 if (!peep2_reg_dead_p (2, addis_reg))
31134 return false;
31135 }
31136
31137 base_reg = XEXP (addr, 0);
31138 return REGNO (addis_reg) == REGNO (base_reg);
31139 }
31140
31141 /* During the peephole2 pass, adjust and expand the insns for a load fusion
31142 sequence. We adjust the addis register to use the target register. If the
31143 load sign extends, we adjust the code to do the zero extending load, and an
31144 explicit sign extension later since the fusion only covers zero extending
31145 loads.
31146
31147 The operands are:
31148 operands[0] register set with addis (to be replaced with target)
31149 operands[1] value set via addis
31150 operands[2] target register being loaded
31151 operands[3] D-form memory reference using operands[0]. */
31152
31153 void
31154 expand_fusion_gpr_load (rtx *operands)
31155 {
31156 rtx addis_value = operands[1];
31157 rtx target = operands[2];
31158 rtx orig_mem = operands[3];
31159 rtx new_addr, new_mem, orig_addr, offset;
31160 enum rtx_code plus_or_lo_sum;
31161 enum machine_mode target_mode = GET_MODE (target);
31162 enum machine_mode extend_mode = target_mode;
31163 enum machine_mode ptr_mode = Pmode;
31164 enum rtx_code extend = UNKNOWN;
31165 rtx addis_reg = ((ptr_mode == target_mode)
31166 ? target
31167 : simplify_subreg (ptr_mode, target, target_mode, 0));
31168
31169 if (GET_CODE (orig_mem) == ZERO_EXTEND
31170 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
31171 {
31172 extend = GET_CODE (orig_mem);
31173 orig_mem = XEXP (orig_mem, 0);
31174 target_mode = GET_MODE (orig_mem);
31175 }
31176
31177 gcc_assert (MEM_P (orig_mem));
31178
31179 orig_addr = XEXP (orig_mem, 0);
31180 plus_or_lo_sum = GET_CODE (orig_addr);
31181 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
31182
31183 offset = XEXP (orig_addr, 1);
31184 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_reg, offset);
31185 new_mem = change_address (orig_mem, target_mode, new_addr);
31186
31187 if (extend != UNKNOWN)
31188 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
31189
31190 emit_insn (gen_rtx_SET (VOIDmode, addis_reg, addis_value));
31191 emit_insn (gen_rtx_SET (VOIDmode, target, new_mem));
31192
31193 if (extend == SIGN_EXTEND)
31194 {
31195 int sub_off = ((BYTES_BIG_ENDIAN)
31196 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
31197 : 0);
31198 rtx sign_reg
31199 = simplify_subreg (target_mode, target, extend_mode, sub_off);
31200
31201 emit_insn (gen_rtx_SET (VOIDmode, target,
31202 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
31203 }
31204
31205 return;
31206 }
31207
31208 /* Return a string to fuse an addis instruction with a gpr load to the same
31209 register that we loaded up the addis instruction. The code is complicated,
31210 so we call output_asm_insn directly, and just return "".
31211
31212 The operands are:
31213 operands[0] register set with addis (must be same reg as target).
31214 operands[1] value set via addis
31215 operands[2] target register being loaded
31216 operands[3] D-form memory reference using operands[0]. */
31217
31218 const char *
31219 emit_fusion_gpr_load (rtx *operands)
31220 {
31221 rtx addis_reg = operands[0];
31222 rtx addis_value = operands[1];
31223 rtx target = operands[2];
31224 rtx mem = operands[3];
31225 rtx fuse_ops[10];
31226 rtx addr;
31227 rtx load_offset;
31228 const char *addis_str = NULL;
31229 const char *load_str = NULL;
31230 const char *extend_insn = NULL;
31231 const char *mode_name = NULL;
31232 char insn_template[80];
31233 enum machine_mode mode;
31234 const char *comment_str = ASM_COMMENT_START;
31235 bool sign_p = false;
31236
31237 gcc_assert (REG_P (addis_reg) && REG_P (target));
31238 gcc_assert (REGNO (addis_reg) == REGNO (target));
31239
31240 if (*comment_str == ' ')
31241 comment_str++;
31242
31243 /* Allow sign/zero extension. */
31244 if (GET_CODE (mem) == ZERO_EXTEND)
31245 mem = XEXP (mem, 0);
31246
31247 else if (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN)
31248 {
31249 sign_p = true;
31250 mem = XEXP (mem, 0);
31251 }
31252
31253 gcc_assert (MEM_P (mem));
31254 addr = XEXP (mem, 0);
31255 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
31256 gcc_unreachable ();
31257
31258 load_offset = XEXP (addr, 1);
31259
31260 /* Now emit the load instruction to the same register. */
31261 mode = GET_MODE (mem);
31262 switch (mode)
31263 {
31264 case QImode:
31265 mode_name = "char";
31266 load_str = "lbz";
31267 extend_insn = "extsb %0,%0";
31268 break;
31269
31270 case HImode:
31271 mode_name = "short";
31272 load_str = "lhz";
31273 extend_insn = "extsh %0,%0";
31274 break;
31275
31276 case SImode:
31277 mode_name = "int";
31278 load_str = "lwz";
31279 extend_insn = "extsw %0,%0";
31280 break;
31281
31282 case DImode:
31283 if (TARGET_POWERPC64)
31284 {
31285 mode_name = "long";
31286 load_str = "ld";
31287 }
31288 else
31289 gcc_unreachable ();
31290 break;
31291
31292 default:
31293 gcc_unreachable ();
31294 }
31295
31296 /* Emit the addis instruction. */
31297 fuse_ops[0] = target;
31298 if (satisfies_constraint_L (addis_value))
31299 {
31300 fuse_ops[1] = addis_value;
31301 addis_str = "lis %0,%v1";
31302 }
31303
31304 else if (GET_CODE (addis_value) == PLUS)
31305 {
31306 rtx op0 = XEXP (addis_value, 0);
31307 rtx op1 = XEXP (addis_value, 1);
31308
31309 if (REG_P (op0) && CONST_INT_P (op1)
31310 && satisfies_constraint_L (op1))
31311 {
31312 fuse_ops[1] = op0;
31313 fuse_ops[2] = op1;
31314 addis_str = "addis %0,%1,%v2";
31315 }
31316 }
31317
31318 else if (GET_CODE (addis_value) == HIGH)
31319 {
31320 rtx value = XEXP (addis_value, 0);
31321 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
31322 {
31323 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
31324 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
31325 if (TARGET_ELF)
31326 addis_str = "addis %0,%2,%1@toc@ha";
31327
31328 else if (TARGET_XCOFF)
31329 addis_str = "addis %0,%1@u(%2)";
31330
31331 else
31332 gcc_unreachable ();
31333 }
31334
31335 else if (GET_CODE (value) == PLUS)
31336 {
31337 rtx op0 = XEXP (value, 0);
31338 rtx op1 = XEXP (value, 1);
31339
31340 if (GET_CODE (op0) == UNSPEC
31341 && XINT (op0, 1) == UNSPEC_TOCREL
31342 && CONST_INT_P (op1))
31343 {
31344 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
31345 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
31346 fuse_ops[3] = op1;
31347 if (TARGET_ELF)
31348 addis_str = "addis %0,%2,%1+%3@toc@ha";
31349
31350 else if (TARGET_XCOFF)
31351 addis_str = "addis %0,%1+%3@u(%2)";
31352
31353 else
31354 gcc_unreachable ();
31355 }
31356 }
31357
31358 else if (satisfies_constraint_L (value))
31359 {
31360 fuse_ops[1] = value;
31361 addis_str = "lis %0,%v1";
31362 }
31363
31364 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
31365 {
31366 fuse_ops[1] = value;
31367 addis_str = "lis %0,%1@ha";
31368 }
31369 }
31370
31371 if (!addis_str)
31372 fatal_insn ("Could not generate addis value for fusion", addis_value);
31373
31374 sprintf (insn_template, "%s\t\t%s gpr load fusion, type %s", addis_str,
31375 comment_str, mode_name);
31376 output_asm_insn (insn_template, fuse_ops);
31377
31378 /* Emit the D-form load instruction. */
31379 if (CONST_INT_P (load_offset) && satisfies_constraint_I (load_offset))
31380 {
31381 sprintf (insn_template, "%s %%0,%%1(%%0)", load_str);
31382 fuse_ops[1] = load_offset;
31383 output_asm_insn (insn_template, fuse_ops);
31384 }
31385
31386 else if (GET_CODE (load_offset) == UNSPEC
31387 && XINT (load_offset, 1) == UNSPEC_TOCREL)
31388 {
31389 if (TARGET_ELF)
31390 sprintf (insn_template, "%s %%0,%%1@toc@l(%%0)", load_str);
31391
31392 else if (TARGET_XCOFF)
31393 sprintf (insn_template, "%s %%0,%%1@l(%%0)", load_str);
31394
31395 else
31396 gcc_unreachable ();
31397
31398 fuse_ops[1] = XVECEXP (load_offset, 0, 0);
31399 output_asm_insn (insn_template, fuse_ops);
31400 }
31401
31402 else if (GET_CODE (load_offset) == PLUS
31403 && GET_CODE (XEXP (load_offset, 0)) == UNSPEC
31404 && XINT (XEXP (load_offset, 0), 1) == UNSPEC_TOCREL
31405 && CONST_INT_P (XEXP (load_offset, 1)))
31406 {
31407 rtx tocrel_unspec = XEXP (load_offset, 0);
31408 if (TARGET_ELF)
31409 sprintf (insn_template, "%s %%0,%%1+%%2@toc@l(%%0)", load_str);
31410
31411 else if (TARGET_XCOFF)
31412 sprintf (insn_template, "%s %%0,%%1+%%2@l(%%0)", load_str);
31413
31414 else
31415 gcc_unreachable ();
31416
31417 fuse_ops[1] = XVECEXP (tocrel_unspec, 0, 0);
31418 fuse_ops[2] = XEXP (load_offset, 1);
31419 output_asm_insn (insn_template, fuse_ops);
31420 }
31421
31422 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (load_offset))
31423 {
31424 sprintf (insn_template, "%s %%0,%%1@l(%%0)", load_str);
31425
31426 fuse_ops[1] = load_offset;
31427 output_asm_insn (insn_template, fuse_ops);
31428 }
31429
31430 else
31431 fatal_insn ("Unable to generate load offset for fusion", load_offset);
31432
31433 /* Handle sign extension. The peephole2 pass generates this as a separate
31434 insn, but we handle it just in case it got reattached. */
31435 if (sign_p)
31436 {
31437 gcc_assert (extend_insn != NULL);
31438 output_asm_insn (extend_insn, fuse_ops);
31439 }
31440
31441 return "";
31442 }
31443
31444 \f
31445 struct gcc_target targetm = TARGET_INITIALIZER;
31446
31447 #include "gt-rs6000.h"