]>
Commit | Line | Data |
---|---|---|
83349046 | 1 | /* Subroutines used for code generation on IBM RS/6000. |
85ec4feb | 2 | Copyright (C) 1991-2018 Free Software Foundation, Inc. |
83349046 SB |
3 | Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu) |
4 | ||
5 | This file is part of GCC. | |
6 | ||
7 | GCC is free software; you can redistribute it and/or modify it | |
8 | under the terms of the GNU General Public License as published | |
9 | by the Free Software Foundation; either version 3, or (at your | |
10 | option) any later version. | |
11 | ||
12 | GCC is distributed in the hope that it will be useful, but WITHOUT | |
13 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY | |
14 | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public | |
15 | License for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
18 | along with GCC; see the file COPYING3. If not see | |
19 | <http://www.gnu.org/licenses/>. */ | |
20 | ||
8fcc61f8 RS |
21 | #define IN_TARGET_CODE 1 |
22 | ||
83349046 SB |
23 | #include "config.h" |
24 | #include "system.h" | |
25 | #include "coretypes.h" | |
26 | #include "backend.h" | |
27 | #include "rtl.h" | |
28 | #include "tree.h" | |
29 | #include "memmodel.h" | |
30 | #include "gimple.h" | |
31 | #include "cfghooks.h" | |
32 | #include "cfgloop.h" | |
33 | #include "df.h" | |
34 | #include "tm_p.h" | |
35 | #include "stringpool.h" | |
314e6352 | 36 | #include "attribs.h" |
83349046 SB |
37 | #include "expmed.h" |
38 | #include "optabs.h" | |
39 | #include "regs.h" | |
40 | #include "ira.h" | |
41 | #include "recog.h" | |
42 | #include "cgraph.h" | |
43 | #include "diagnostic-core.h" | |
44 | #include "insn-attr.h" | |
45 | #include "flags.h" | |
46 | #include "alias.h" | |
47 | #include "fold-const.h" | |
48 | #include "stor-layout.h" | |
49 | #include "calls.h" | |
50 | #include "print-tree.h" | |
51 | #include "varasm.h" | |
52 | #include "explow.h" | |
53 | #include "expr.h" | |
54 | #include "output.h" | |
55 | #include "dbxout.h" | |
56 | #include "common/common-target.h" | |
57 | #include "langhooks.h" | |
58 | #include "reload.h" | |
59 | #include "sched-int.h" | |
60 | #include "gimplify.h" | |
61 | #include "gimple-fold.h" | |
62 | #include "gimple-iterator.h" | |
63 | #include "gimple-ssa.h" | |
64 | #include "gimple-walk.h" | |
65 | #include "intl.h" | |
66 | #include "params.h" | |
67 | #include "tm-constrs.h" | |
68 | #include "tree-vectorizer.h" | |
69 | #include "target-globals.h" | |
70 | #include "builtins.h" | |
71 | #include "context.h" | |
72 | #include "tree-pass.h" | |
73 | #include "except.h" | |
74 | #if TARGET_XCOFF | |
75 | #include "xcoffout.h" /* get declarations of xcoff_*_section_name */ | |
76 | #endif | |
77 | #if TARGET_MACHO | |
78 | #include "gstab.h" /* for N_SLINE */ | |
79 | #endif | |
80 | #include "case-cfn-macros.h" | |
81 | #include "ppc-auxv.h" | |
3877c560 | 82 | #include "rtx-vector-builder.h" |
83349046 SB |
83 | |
84 | /* This file should be included last. */ | |
85 | #include "target-def.h" | |
86 | ||
87 | #ifndef TARGET_NO_PROTOTYPE | |
88 | #define TARGET_NO_PROTOTYPE 0 | |
89 | #endif | |
90 | ||
91 | #define min(A,B) ((A) < (B) ? (A) : (B)) | |
92 | #define max(A,B) ((A) > (B) ? (A) : (B)) | |
93 | ||
76b0cbf8 RS |
94 | static pad_direction rs6000_function_arg_padding (machine_mode, const_tree); |
95 | ||
83349046 SB |
96 | /* Structure used to define the rs6000 stack */ |
97 | typedef struct rs6000_stack { | |
98 | int reload_completed; /* stack info won't change from here on */ | |
99 | int first_gp_reg_save; /* first callee saved GP register used */ | |
100 | int first_fp_reg_save; /* first callee saved FP register used */ | |
101 | int first_altivec_reg_save; /* first callee saved AltiVec register used */ | |
102 | int lr_save_p; /* true if the link reg needs to be saved */ | |
103 | int cr_save_p; /* true if the CR reg needs to be saved */ | |
104 | unsigned int vrsave_mask; /* mask of vec registers to save */ | |
105 | int push_p; /* true if we need to allocate stack space */ | |
106 | int calls_p; /* true if the function makes any calls */ | |
107 | int world_save_p; /* true if we're saving *everything*: | |
108 | r13-r31, cr, f14-f31, vrsave, v20-v31 */ | |
109 | enum rs6000_abi abi; /* which ABI to use */ | |
110 | int gp_save_offset; /* offset to save GP regs from initial SP */ | |
111 | int fp_save_offset; /* offset to save FP regs from initial SP */ | |
112 | int altivec_save_offset; /* offset to save AltiVec regs from initial SP */ | |
113 | int lr_save_offset; /* offset to save LR from initial SP */ | |
114 | int cr_save_offset; /* offset to save CR from initial SP */ | |
115 | int vrsave_save_offset; /* offset to save VRSAVE from initial SP */ | |
116 | int spe_gp_save_offset; /* offset to save spe 64-bit gprs */ | |
117 | int varargs_save_offset; /* offset to save the varargs registers */ | |
118 | int ehrd_offset; /* offset to EH return data */ | |
119 | int ehcr_offset; /* offset to EH CR field data */ | |
120 | int reg_size; /* register size (4 or 8) */ | |
121 | HOST_WIDE_INT vars_size; /* variable save area size */ | |
122 | int parm_size; /* outgoing parameter size */ | |
123 | int save_size; /* save area size */ | |
124 | int fixed_size; /* fixed size of stack frame */ | |
125 | int gp_size; /* size of saved GP registers */ | |
126 | int fp_size; /* size of saved FP registers */ | |
127 | int altivec_size; /* size of saved AltiVec registers */ | |
128 | int cr_size; /* size to hold CR if not in fixed area */ | |
129 | int vrsave_size; /* size to hold VRSAVE */ | |
130 | int altivec_padding_size; /* size of altivec alignment padding */ | |
131 | int spe_gp_size; /* size of 64-bit GPR save size for SPE */ | |
132 | int spe_padding_size; | |
133 | HOST_WIDE_INT total_size; /* total bytes allocated for stack */ | |
134 | int spe_64bit_regs_used; | |
135 | int savres_strategy; | |
136 | } rs6000_stack_t; | |
137 | ||
138 | /* A C structure for machine-specific, per-function data. | |
139 | This is added to the cfun structure. */ | |
140 | typedef struct GTY(()) machine_function | |
141 | { | |
142 | /* Whether the instruction chain has been scanned already. */ | |
143 | int spe_insn_chain_scanned_p; | |
144 | /* Flags if __builtin_return_address (n) with n >= 1 was used. */ | |
145 | int ra_needs_full_frame; | |
146 | /* Flags if __builtin_return_address (0) was used. */ | |
147 | int ra_need_lr; | |
148 | /* Cache lr_save_p after expansion of builtin_eh_return. */ | |
149 | int lr_save_state; | |
150 | /* Whether we need to save the TOC to the reserved stack location in the | |
151 | function prologue. */ | |
152 | bool save_toc_in_prologue; | |
153 | /* Offset from virtual_stack_vars_rtx to the start of the ABI_V4 | |
154 | varargs save area. */ | |
155 | HOST_WIDE_INT varargs_save_offset; | |
156 | /* Temporary stack slot to use for SDmode copies. This slot is | |
157 | 64-bits wide and is allocated early enough so that the offset | |
158 | does not overflow the 16-bit load/store offset field. */ | |
159 | rtx sdmode_stack_slot; | |
160 | /* Alternative internal arg pointer for -fsplit-stack. */ | |
161 | rtx split_stack_arg_pointer; | |
162 | bool split_stack_argp_used; | |
163 | /* Flag if r2 setup is needed with ELFv2 ABI. */ | |
164 | bool r2_setup_needed; | |
165 | /* The number of components we use for separate shrink-wrapping. */ | |
166 | int n_components; | |
167 | /* The components already handled by separate shrink-wrapping, which should | |
168 | not be considered by the prologue and epilogue. */ | |
169 | bool gpr_is_wrapped_separately[32]; | |
170 | bool fpr_is_wrapped_separately[32]; | |
171 | bool lr_is_wrapped_separately; | |
172 | } machine_function; | |
173 | ||
174 | /* Support targetm.vectorize.builtin_mask_for_load. */ | |
175 | static GTY(()) tree altivec_builtin_mask_for_load; | |
176 | ||
177 | /* Set to nonzero once AIX common-mode calls have been defined. */ | |
178 | static GTY(()) int common_mode_defined; | |
179 | ||
180 | /* Label number of label created for -mrelocatable, to call to so we can | |
181 | get the address of the GOT section */ | |
182 | static int rs6000_pic_labelno; | |
183 | ||
184 | #ifdef USING_ELFOS_H | |
185 | /* Counter for labels which are to be placed in .fixup. */ | |
186 | int fixuplabelno = 0; | |
187 | #endif | |
188 | ||
189 | /* Whether to use variant of AIX ABI for PowerPC64 Linux. */ | |
190 | int dot_symbols; | |
191 | ||
192 | /* Specify the machine mode that pointers have. After generation of rtl, the | |
193 | compiler makes no further distinction between pointers and any other objects | |
501623d4 RS |
194 | of this machine mode. */ |
195 | scalar_int_mode rs6000_pmode; | |
83349046 SB |
196 | |
197 | /* Width in bits of a pointer. */ | |
198 | unsigned rs6000_pointer_size; | |
199 | ||
200 | #ifdef HAVE_AS_GNU_ATTRIBUTE | |
201 | # ifndef HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE | |
202 | # define HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE 0 | |
203 | # endif | |
204 | /* Flag whether floating point values have been passed/returned. | |
205 | Note that this doesn't say whether fprs are used, since the | |
206 | Tag_GNU_Power_ABI_FP .gnu.attributes value this flag controls | |
207 | should be set for soft-float values passed in gprs and ieee128 | |
208 | values passed in vsx registers. */ | |
209 | static bool rs6000_passes_float; | |
210 | static bool rs6000_passes_long_double; | |
211 | /* Flag whether vector values have been passed/returned. */ | |
212 | static bool rs6000_passes_vector; | |
213 | /* Flag whether small (<= 8 byte) structures have been returned. */ | |
214 | static bool rs6000_returns_struct; | |
215 | #endif | |
216 | ||
217 | /* Value is TRUE if register/mode pair is acceptable. */ | |
f939c3e6 RS |
218 | static bool rs6000_hard_regno_mode_ok_p |
219 | [NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER]; | |
83349046 SB |
220 | |
221 | /* Maximum number of registers needed for a given register class and mode. */ | |
222 | unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES]; | |
223 | ||
224 | /* How many registers are needed for a given register and mode. */ | |
225 | unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER]; | |
226 | ||
227 | /* Map register number to register class. */ | |
228 | enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER]; | |
229 | ||
230 | static int dbg_cost_ctrl; | |
231 | ||
232 | /* Built in types. */ | |
233 | tree rs6000_builtin_types[RS6000_BTI_MAX]; | |
234 | tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT]; | |
235 | ||
236 | /* Flag to say the TOC is initialized */ | |
237 | int toc_initialized, need_toc_init; | |
238 | char toc_label_name[10]; | |
239 | ||
240 | /* Cached value of rs6000_variable_issue. This is cached in | |
241 | rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */ | |
242 | static short cached_can_issue_more; | |
243 | ||
244 | static GTY(()) section *read_only_data_section; | |
245 | static GTY(()) section *private_data_section; | |
246 | static GTY(()) section *tls_data_section; | |
247 | static GTY(()) section *tls_private_data_section; | |
248 | static GTY(()) section *read_only_private_data_section; | |
249 | static GTY(()) section *sdata2_section; | |
250 | static GTY(()) section *toc_section; | |
251 | ||
252 | struct builtin_description | |
253 | { | |
254 | const HOST_WIDE_INT mask; | |
255 | const enum insn_code icode; | |
256 | const char *const name; | |
257 | const enum rs6000_builtins code; | |
258 | }; | |
259 | ||
260 | /* Describe the vector unit used for modes. */ | |
261 | enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES]; | |
262 | enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES]; | |
263 | ||
264 | /* Register classes for various constraints that are based on the target | |
265 | switches. */ | |
266 | enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX]; | |
267 | ||
268 | /* Describe the alignment of a vector. */ | |
269 | int rs6000_vector_align[NUM_MACHINE_MODES]; | |
270 | ||
271 | /* Map selected modes to types for builtins. */ | |
272 | static GTY(()) tree builtin_mode_to_type[MAX_MACHINE_MODE][2]; | |
273 | ||
274 | /* What modes to automatically generate reciprocal divide estimate (fre) and | |
275 | reciprocal sqrt (frsqrte) for. */ | |
276 | unsigned char rs6000_recip_bits[MAX_MACHINE_MODE]; | |
277 | ||
278 | /* Masks to determine which reciprocal esitmate instructions to generate | |
279 | automatically. */ | |
280 | enum rs6000_recip_mask { | |
281 | RECIP_SF_DIV = 0x001, /* Use divide estimate */ | |
282 | RECIP_DF_DIV = 0x002, | |
283 | RECIP_V4SF_DIV = 0x004, | |
284 | RECIP_V2DF_DIV = 0x008, | |
285 | ||
286 | RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */ | |
287 | RECIP_DF_RSQRT = 0x020, | |
288 | RECIP_V4SF_RSQRT = 0x040, | |
289 | RECIP_V2DF_RSQRT = 0x080, | |
290 | ||
291 | /* Various combination of flags for -mrecip=xxx. */ | |
292 | RECIP_NONE = 0, | |
293 | RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV | |
294 | | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT | |
295 | | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT), | |
296 | ||
297 | RECIP_HIGH_PRECISION = RECIP_ALL, | |
298 | ||
299 | /* On low precision machines like the power5, don't enable double precision | |
300 | reciprocal square root estimate, since it isn't accurate enough. */ | |
301 | RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT)) | |
302 | }; | |
303 | ||
304 | /* -mrecip options. */ | |
305 | static struct | |
306 | { | |
307 | const char *string; /* option name */ | |
308 | unsigned int mask; /* mask bits to set */ | |
309 | } recip_options[] = { | |
310 | { "all", RECIP_ALL }, | |
311 | { "none", RECIP_NONE }, | |
312 | { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV | |
313 | | RECIP_V2DF_DIV) }, | |
314 | { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) }, | |
315 | { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) }, | |
316 | { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT | |
317 | | RECIP_V2DF_RSQRT) }, | |
318 | { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) }, | |
319 | { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) }, | |
320 | }; | |
321 | ||
322 | /* Used by __builtin_cpu_is(), mapping from PLATFORM names to values. */ | |
323 | static const struct | |
324 | { | |
325 | const char *cpu; | |
326 | unsigned int cpuid; | |
327 | } cpu_is_info[] = { | |
328 | { "power9", PPC_PLATFORM_POWER9 }, | |
329 | { "power8", PPC_PLATFORM_POWER8 }, | |
330 | { "power7", PPC_PLATFORM_POWER7 }, | |
331 | { "power6x", PPC_PLATFORM_POWER6X }, | |
332 | { "power6", PPC_PLATFORM_POWER6 }, | |
333 | { "power5+", PPC_PLATFORM_POWER5_PLUS }, | |
334 | { "power5", PPC_PLATFORM_POWER5 }, | |
335 | { "ppc970", PPC_PLATFORM_PPC970 }, | |
336 | { "power4", PPC_PLATFORM_POWER4 }, | |
337 | { "ppca2", PPC_PLATFORM_PPCA2 }, | |
338 | { "ppc476", PPC_PLATFORM_PPC476 }, | |
339 | { "ppc464", PPC_PLATFORM_PPC464 }, | |
340 | { "ppc440", PPC_PLATFORM_PPC440 }, | |
341 | { "ppc405", PPC_PLATFORM_PPC405 }, | |
342 | { "ppc-cell-be", PPC_PLATFORM_CELL_BE } | |
343 | }; | |
344 | ||
345 | /* Used by __builtin_cpu_supports(), mapping from HWCAP names to masks. */ | |
346 | static const struct | |
347 | { | |
348 | const char *hwcap; | |
349 | int mask; | |
350 | unsigned int id; | |
351 | } cpu_supports_info[] = { | |
352 | /* AT_HWCAP masks. */ | |
353 | { "4xxmac", PPC_FEATURE_HAS_4xxMAC, 0 }, | |
354 | { "altivec", PPC_FEATURE_HAS_ALTIVEC, 0 }, | |
355 | { "arch_2_05", PPC_FEATURE_ARCH_2_05, 0 }, | |
356 | { "arch_2_06", PPC_FEATURE_ARCH_2_06, 0 }, | |
357 | { "archpmu", PPC_FEATURE_PERFMON_COMPAT, 0 }, | |
358 | { "booke", PPC_FEATURE_BOOKE, 0 }, | |
359 | { "cellbe", PPC_FEATURE_CELL_BE, 0 }, | |
360 | { "dfp", PPC_FEATURE_HAS_DFP, 0 }, | |
361 | { "efpdouble", PPC_FEATURE_HAS_EFP_DOUBLE, 0 }, | |
362 | { "efpsingle", PPC_FEATURE_HAS_EFP_SINGLE, 0 }, | |
363 | { "fpu", PPC_FEATURE_HAS_FPU, 0 }, | |
364 | { "ic_snoop", PPC_FEATURE_ICACHE_SNOOP, 0 }, | |
365 | { "mmu", PPC_FEATURE_HAS_MMU, 0 }, | |
366 | { "notb", PPC_FEATURE_NO_TB, 0 }, | |
367 | { "pa6t", PPC_FEATURE_PA6T, 0 }, | |
368 | { "power4", PPC_FEATURE_POWER4, 0 }, | |
369 | { "power5", PPC_FEATURE_POWER5, 0 }, | |
370 | { "power5+", PPC_FEATURE_POWER5_PLUS, 0 }, | |
371 | { "power6x", PPC_FEATURE_POWER6_EXT, 0 }, | |
372 | { "ppc32", PPC_FEATURE_32, 0 }, | |
373 | { "ppc601", PPC_FEATURE_601_INSTR, 0 }, | |
374 | { "ppc64", PPC_FEATURE_64, 0 }, | |
375 | { "ppcle", PPC_FEATURE_PPC_LE, 0 }, | |
376 | { "smt", PPC_FEATURE_SMT, 0 }, | |
377 | { "spe", PPC_FEATURE_HAS_SPE, 0 }, | |
378 | { "true_le", PPC_FEATURE_TRUE_LE, 0 }, | |
379 | { "ucache", PPC_FEATURE_UNIFIED_CACHE, 0 }, | |
380 | { "vsx", PPC_FEATURE_HAS_VSX, 0 }, | |
381 | ||
382 | /* AT_HWCAP2 masks. */ | |
383 | { "arch_2_07", PPC_FEATURE2_ARCH_2_07, 1 }, | |
384 | { "dscr", PPC_FEATURE2_HAS_DSCR, 1 }, | |
385 | { "ebb", PPC_FEATURE2_HAS_EBB, 1 }, | |
386 | { "htm", PPC_FEATURE2_HAS_HTM, 1 }, | |
387 | { "htm-nosc", PPC_FEATURE2_HTM_NOSC, 1 }, | |
388 | { "isel", PPC_FEATURE2_HAS_ISEL, 1 }, | |
389 | { "tar", PPC_FEATURE2_HAS_TAR, 1 }, | |
390 | { "vcrypto", PPC_FEATURE2_HAS_VEC_CRYPTO, 1 }, | |
391 | { "arch_3_00", PPC_FEATURE2_ARCH_3_00, 1 }, | |
392 | { "ieee128", PPC_FEATURE2_HAS_IEEE128, 1 } | |
393 | }; | |
394 | ||
395 | /* Newer LIBCs explicitly export this symbol to declare that they provide | |
396 | the AT_PLATFORM and AT_HWCAP/AT_HWCAP2 values in the TCB. We emit a | |
397 | reference to this symbol whenever we expand a CPU builtin, so that | |
398 | we never link against an old LIBC. */ | |
399 | const char *tcb_verification_symbol = "__parse_hwcap_and_convert_at_platform"; | |
400 | ||
401 | /* True if we have expanded a CPU builtin. */ | |
402 | bool cpu_builtin_p; | |
403 | ||
404 | /* Pointer to function (in powerpcspe-c.c) that can define or undefine target | |
405 | macros that have changed. Languages that don't support the preprocessor | |
406 | don't link in powerpcspe-c.c, so we can't call it directly. */ | |
407 | void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT); | |
408 | ||
409 | /* Simplfy register classes into simpler classifications. We assume | |
410 | GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range | |
411 | check for standard register classes (gpr/floating/altivec/vsx) and | |
412 | floating/vector classes (float/altivec/vsx). */ | |
413 | ||
414 | enum rs6000_reg_type { | |
415 | NO_REG_TYPE, | |
416 | PSEUDO_REG_TYPE, | |
417 | GPR_REG_TYPE, | |
418 | VSX_REG_TYPE, | |
419 | ALTIVEC_REG_TYPE, | |
420 | FPR_REG_TYPE, | |
421 | SPR_REG_TYPE, | |
422 | CR_REG_TYPE, | |
423 | SPE_ACC_TYPE, | |
424 | SPEFSCR_REG_TYPE | |
425 | }; | |
426 | ||
427 | /* Map register class to register type. */ | |
428 | static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES]; | |
429 | ||
430 | /* First/last register type for the 'normal' register types (i.e. general | |
431 | purpose, floating point, altivec, and VSX registers). */ | |
432 | #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE) | |
433 | ||
434 | #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE) | |
435 | ||
436 | ||
437 | /* Register classes we care about in secondary reload or go if legitimate | |
438 | address. We only need to worry about GPR, FPR, and Altivec registers here, | |
439 | along an ANY field that is the OR of the 3 register classes. */ | |
440 | ||
441 | enum rs6000_reload_reg_type { | |
442 | RELOAD_REG_GPR, /* General purpose registers. */ | |
443 | RELOAD_REG_FPR, /* Traditional floating point regs. */ | |
444 | RELOAD_REG_VMX, /* Altivec (VMX) registers. */ | |
445 | RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */ | |
446 | N_RELOAD_REG | |
447 | }; | |
448 | ||
449 | /* For setting up register classes, loop through the 3 register classes mapping | |
450 | into real registers, and skip the ANY class, which is just an OR of the | |
451 | bits. */ | |
452 | #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR | |
453 | #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX | |
454 | ||
455 | /* Map reload register type to a register in the register class. */ | |
456 | struct reload_reg_map_type { | |
457 | const char *name; /* Register class name. */ | |
458 | int reg; /* Register in the register class. */ | |
459 | }; | |
460 | ||
461 | static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = { | |
462 | { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */ | |
463 | { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */ | |
464 | { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */ | |
465 | { "Any", -1 }, /* RELOAD_REG_ANY. */ | |
466 | }; | |
467 | ||
468 | /* Mask bits for each register class, indexed per mode. Historically the | |
469 | compiler has been more restrictive which types can do PRE_MODIFY instead of | |
470 | PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */ | |
471 | typedef unsigned char addr_mask_type; | |
472 | ||
473 | #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */ | |
474 | #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */ | |
475 | #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */ | |
476 | #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */ | |
477 | #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */ | |
478 | #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */ | |
479 | #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */ | |
480 | #define RELOAD_REG_QUAD_OFFSET 0x80 /* quad offset is limited. */ | |
481 | ||
482 | /* Register type masks based on the type, of valid addressing modes. */ | |
483 | struct rs6000_reg_addr { | |
484 | enum insn_code reload_load; /* INSN to reload for loading. */ | |
485 | enum insn_code reload_store; /* INSN to reload for storing. */ | |
486 | enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */ | |
487 | enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */ | |
488 | enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */ | |
489 | enum insn_code fusion_gpr_ld; /* INSN for fusing gpr ADDIS/loads. */ | |
490 | /* INSNs for fusing addi with loads | |
491 | or stores for each reg. class. */ | |
492 | enum insn_code fusion_addi_ld[(int)N_RELOAD_REG]; | |
493 | enum insn_code fusion_addi_st[(int)N_RELOAD_REG]; | |
494 | /* INSNs for fusing addis with loads | |
495 | or stores for each reg. class. */ | |
496 | enum insn_code fusion_addis_ld[(int)N_RELOAD_REG]; | |
497 | enum insn_code fusion_addis_st[(int)N_RELOAD_REG]; | |
498 | addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */ | |
499 | bool scalar_in_vmx_p; /* Scalar value can go in VMX. */ | |
500 | bool fused_toc; /* Mode supports TOC fusion. */ | |
501 | }; | |
502 | ||
503 | static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES]; | |
504 | ||
505 | /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */ | |
506 | static inline bool | |
507 | mode_supports_pre_incdec_p (machine_mode mode) | |
508 | { | |
509 | return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC) | |
510 | != 0); | |
511 | } | |
512 | ||
513 | /* Helper function to say whether a mode supports PRE_MODIFY. */ | |
514 | static inline bool | |
515 | mode_supports_pre_modify_p (machine_mode mode) | |
516 | { | |
517 | return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY) | |
518 | != 0); | |
519 | } | |
520 | ||
521 | /* Given that there exists at least one variable that is set (produced) | |
522 | by OUT_INSN and read (consumed) by IN_INSN, return true iff | |
523 | IN_INSN represents one or more memory store operations and none of | |
524 | the variables set by OUT_INSN is used by IN_INSN as the address of a | |
525 | store operation. If either IN_INSN or OUT_INSN does not represent | |
526 | a "single" RTL SET expression (as loosely defined by the | |
527 | implementation of the single_set function) or a PARALLEL with only | |
528 | SETs, CLOBBERs, and USEs inside, this function returns false. | |
529 | ||
530 | This rs6000-specific version of store_data_bypass_p checks for | |
531 | certain conditions that result in assertion failures (and internal | |
532 | compiler errors) in the generic store_data_bypass_p function and | |
533 | returns false rather than calling store_data_bypass_p if one of the | |
534 | problematic conditions is detected. */ | |
535 | ||
536 | int | |
537 | rs6000_store_data_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn) | |
538 | { | |
539 | rtx out_set, in_set; | |
540 | rtx out_pat, in_pat; | |
541 | rtx out_exp, in_exp; | |
542 | int i, j; | |
543 | ||
544 | in_set = single_set (in_insn); | |
545 | if (in_set) | |
546 | { | |
547 | if (MEM_P (SET_DEST (in_set))) | |
548 | { | |
549 | out_set = single_set (out_insn); | |
550 | if (!out_set) | |
551 | { | |
552 | out_pat = PATTERN (out_insn); | |
553 | if (GET_CODE (out_pat) == PARALLEL) | |
554 | { | |
555 | for (i = 0; i < XVECLEN (out_pat, 0); i++) | |
556 | { | |
557 | out_exp = XVECEXP (out_pat, 0, i); | |
558 | if ((GET_CODE (out_exp) == CLOBBER) | |
559 | || (GET_CODE (out_exp) == USE)) | |
560 | continue; | |
561 | else if (GET_CODE (out_exp) != SET) | |
562 | return false; | |
563 | } | |
564 | } | |
565 | } | |
566 | } | |
567 | } | |
568 | else | |
569 | { | |
570 | in_pat = PATTERN (in_insn); | |
571 | if (GET_CODE (in_pat) != PARALLEL) | |
572 | return false; | |
573 | ||
574 | for (i = 0; i < XVECLEN (in_pat, 0); i++) | |
575 | { | |
576 | in_exp = XVECEXP (in_pat, 0, i); | |
577 | if ((GET_CODE (in_exp) == CLOBBER) || (GET_CODE (in_exp) == USE)) | |
578 | continue; | |
579 | else if (GET_CODE (in_exp) != SET) | |
580 | return false; | |
581 | ||
582 | if (MEM_P (SET_DEST (in_exp))) | |
583 | { | |
584 | out_set = single_set (out_insn); | |
585 | if (!out_set) | |
586 | { | |
587 | out_pat = PATTERN (out_insn); | |
588 | if (GET_CODE (out_pat) != PARALLEL) | |
589 | return false; | |
590 | for (j = 0; j < XVECLEN (out_pat, 0); j++) | |
591 | { | |
592 | out_exp = XVECEXP (out_pat, 0, j); | |
593 | if ((GET_CODE (out_exp) == CLOBBER) | |
594 | || (GET_CODE (out_exp) == USE)) | |
595 | continue; | |
596 | else if (GET_CODE (out_exp) != SET) | |
597 | return false; | |
598 | } | |
599 | } | |
600 | } | |
601 | } | |
602 | } | |
603 | return store_data_bypass_p (out_insn, in_insn); | |
604 | } | |
605 | ||
606 | /* Return true if we have D-form addressing in altivec registers. */ | |
607 | static inline bool | |
608 | mode_supports_vmx_dform (machine_mode mode) | |
609 | { | |
610 | return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) != 0); | |
611 | } | |
612 | ||
613 | /* Return true if we have D-form addressing in VSX registers. This addressing | |
614 | is more limited than normal d-form addressing in that the offset must be | |
615 | aligned on a 16-byte boundary. */ | |
616 | static inline bool | |
617 | mode_supports_vsx_dform_quad (machine_mode mode) | |
618 | { | |
619 | return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_QUAD_OFFSET) | |
620 | != 0); | |
621 | } | |
622 | ||
623 | \f | |
624 | /* Target cpu costs. */ | |
625 | ||
626 | struct processor_costs { | |
627 | const int mulsi; /* cost of SImode multiplication. */ | |
628 | const int mulsi_const; /* cost of SImode multiplication by constant. */ | |
629 | const int mulsi_const9; /* cost of SImode mult by short constant. */ | |
630 | const int muldi; /* cost of DImode multiplication. */ | |
631 | const int divsi; /* cost of SImode division. */ | |
632 | const int divdi; /* cost of DImode division. */ | |
633 | const int fp; /* cost of simple SFmode and DFmode insns. */ | |
634 | const int dmul; /* cost of DFmode multiplication (and fmadd). */ | |
635 | const int sdiv; /* cost of SFmode division (fdivs). */ | |
636 | const int ddiv; /* cost of DFmode division (fdiv). */ | |
637 | const int cache_line_size; /* cache line size in bytes. */ | |
638 | const int l1_cache_size; /* size of l1 cache, in kilobytes. */ | |
639 | const int l2_cache_size; /* size of l2 cache, in kilobytes. */ | |
640 | const int simultaneous_prefetches; /* number of parallel prefetch | |
641 | operations. */ | |
642 | const int sfdf_convert; /* cost of SF->DF conversion. */ | |
643 | }; | |
644 | ||
645 | const struct processor_costs *rs6000_cost; | |
646 | ||
647 | /* Processor costs (relative to an add) */ | |
648 | ||
649 | /* Instruction size costs on 32bit processors. */ | |
650 | static const | |
651 | struct processor_costs size32_cost = { | |
652 | COSTS_N_INSNS (1), /* mulsi */ | |
653 | COSTS_N_INSNS (1), /* mulsi_const */ | |
654 | COSTS_N_INSNS (1), /* mulsi_const9 */ | |
655 | COSTS_N_INSNS (1), /* muldi */ | |
656 | COSTS_N_INSNS (1), /* divsi */ | |
657 | COSTS_N_INSNS (1), /* divdi */ | |
658 | COSTS_N_INSNS (1), /* fp */ | |
659 | COSTS_N_INSNS (1), /* dmul */ | |
660 | COSTS_N_INSNS (1), /* sdiv */ | |
661 | COSTS_N_INSNS (1), /* ddiv */ | |
662 | 32, /* cache line size */ | |
663 | 0, /* l1 cache */ | |
664 | 0, /* l2 cache */ | |
665 | 0, /* streams */ | |
666 | 0, /* SF->DF convert */ | |
667 | }; | |
668 | ||
669 | /* Instruction size costs on 64bit processors. */ | |
670 | static const | |
671 | struct processor_costs size64_cost = { | |
672 | COSTS_N_INSNS (1), /* mulsi */ | |
673 | COSTS_N_INSNS (1), /* mulsi_const */ | |
674 | COSTS_N_INSNS (1), /* mulsi_const9 */ | |
675 | COSTS_N_INSNS (1), /* muldi */ | |
676 | COSTS_N_INSNS (1), /* divsi */ | |
677 | COSTS_N_INSNS (1), /* divdi */ | |
678 | COSTS_N_INSNS (1), /* fp */ | |
679 | COSTS_N_INSNS (1), /* dmul */ | |
680 | COSTS_N_INSNS (1), /* sdiv */ | |
681 | COSTS_N_INSNS (1), /* ddiv */ | |
682 | 128, /* cache line size */ | |
683 | 0, /* l1 cache */ | |
684 | 0, /* l2 cache */ | |
685 | 0, /* streams */ | |
686 | 0, /* SF->DF convert */ | |
687 | }; | |
688 | ||
689 | /* Instruction costs on RS64A processors. */ | |
690 | static const | |
691 | struct processor_costs rs64a_cost = { | |
692 | COSTS_N_INSNS (20), /* mulsi */ | |
693 | COSTS_N_INSNS (12), /* mulsi_const */ | |
694 | COSTS_N_INSNS (8), /* mulsi_const9 */ | |
695 | COSTS_N_INSNS (34), /* muldi */ | |
696 | COSTS_N_INSNS (65), /* divsi */ | |
697 | COSTS_N_INSNS (67), /* divdi */ | |
698 | COSTS_N_INSNS (4), /* fp */ | |
699 | COSTS_N_INSNS (4), /* dmul */ | |
700 | COSTS_N_INSNS (31), /* sdiv */ | |
701 | COSTS_N_INSNS (31), /* ddiv */ | |
702 | 128, /* cache line size */ | |
703 | 128, /* l1 cache */ | |
704 | 2048, /* l2 cache */ | |
705 | 1, /* streams */ | |
706 | 0, /* SF->DF convert */ | |
707 | }; | |
708 | ||
709 | /* Instruction costs on MPCCORE processors. */ | |
710 | static const | |
711 | struct processor_costs mpccore_cost = { | |
712 | COSTS_N_INSNS (2), /* mulsi */ | |
713 | COSTS_N_INSNS (2), /* mulsi_const */ | |
714 | COSTS_N_INSNS (2), /* mulsi_const9 */ | |
715 | COSTS_N_INSNS (2), /* muldi */ | |
716 | COSTS_N_INSNS (6), /* divsi */ | |
717 | COSTS_N_INSNS (6), /* divdi */ | |
718 | COSTS_N_INSNS (4), /* fp */ | |
719 | COSTS_N_INSNS (5), /* dmul */ | |
720 | COSTS_N_INSNS (10), /* sdiv */ | |
721 | COSTS_N_INSNS (17), /* ddiv */ | |
722 | 32, /* cache line size */ | |
723 | 4, /* l1 cache */ | |
724 | 16, /* l2 cache */ | |
725 | 1, /* streams */ | |
726 | 0, /* SF->DF convert */ | |
727 | }; | |
728 | ||
729 | /* Instruction costs on PPC403 processors. */ | |
730 | static const | |
731 | struct processor_costs ppc403_cost = { | |
732 | COSTS_N_INSNS (4), /* mulsi */ | |
733 | COSTS_N_INSNS (4), /* mulsi_const */ | |
734 | COSTS_N_INSNS (4), /* mulsi_const9 */ | |
735 | COSTS_N_INSNS (4), /* muldi */ | |
736 | COSTS_N_INSNS (33), /* divsi */ | |
737 | COSTS_N_INSNS (33), /* divdi */ | |
738 | COSTS_N_INSNS (11), /* fp */ | |
739 | COSTS_N_INSNS (11), /* dmul */ | |
740 | COSTS_N_INSNS (11), /* sdiv */ | |
741 | COSTS_N_INSNS (11), /* ddiv */ | |
742 | 32, /* cache line size */ | |
743 | 4, /* l1 cache */ | |
744 | 16, /* l2 cache */ | |
745 | 1, /* streams */ | |
746 | 0, /* SF->DF convert */ | |
747 | }; | |
748 | ||
749 | /* Instruction costs on PPC405 processors. */ | |
750 | static const | |
751 | struct processor_costs ppc405_cost = { | |
752 | COSTS_N_INSNS (5), /* mulsi */ | |
753 | COSTS_N_INSNS (4), /* mulsi_const */ | |
754 | COSTS_N_INSNS (3), /* mulsi_const9 */ | |
755 | COSTS_N_INSNS (5), /* muldi */ | |
756 | COSTS_N_INSNS (35), /* divsi */ | |
757 | COSTS_N_INSNS (35), /* divdi */ | |
758 | COSTS_N_INSNS (11), /* fp */ | |
759 | COSTS_N_INSNS (11), /* dmul */ | |
760 | COSTS_N_INSNS (11), /* sdiv */ | |
761 | COSTS_N_INSNS (11), /* ddiv */ | |
762 | 32, /* cache line size */ | |
763 | 16, /* l1 cache */ | |
764 | 128, /* l2 cache */ | |
765 | 1, /* streams */ | |
766 | 0, /* SF->DF convert */ | |
767 | }; | |
768 | ||
769 | /* Instruction costs on PPC440 processors. */ | |
770 | static const | |
771 | struct processor_costs ppc440_cost = { | |
772 | COSTS_N_INSNS (3), /* mulsi */ | |
773 | COSTS_N_INSNS (2), /* mulsi_const */ | |
774 | COSTS_N_INSNS (2), /* mulsi_const9 */ | |
775 | COSTS_N_INSNS (3), /* muldi */ | |
776 | COSTS_N_INSNS (34), /* divsi */ | |
777 | COSTS_N_INSNS (34), /* divdi */ | |
778 | COSTS_N_INSNS (5), /* fp */ | |
779 | COSTS_N_INSNS (5), /* dmul */ | |
780 | COSTS_N_INSNS (19), /* sdiv */ | |
781 | COSTS_N_INSNS (33), /* ddiv */ | |
782 | 32, /* cache line size */ | |
783 | 32, /* l1 cache */ | |
784 | 256, /* l2 cache */ | |
785 | 1, /* streams */ | |
786 | 0, /* SF->DF convert */ | |
787 | }; | |
788 | ||
789 | /* Instruction costs on PPC476 processors. */ | |
790 | static const | |
791 | struct processor_costs ppc476_cost = { | |
792 | COSTS_N_INSNS (4), /* mulsi */ | |
793 | COSTS_N_INSNS (4), /* mulsi_const */ | |
794 | COSTS_N_INSNS (4), /* mulsi_const9 */ | |
795 | COSTS_N_INSNS (4), /* muldi */ | |
796 | COSTS_N_INSNS (11), /* divsi */ | |
797 | COSTS_N_INSNS (11), /* divdi */ | |
798 | COSTS_N_INSNS (6), /* fp */ | |
799 | COSTS_N_INSNS (6), /* dmul */ | |
800 | COSTS_N_INSNS (19), /* sdiv */ | |
801 | COSTS_N_INSNS (33), /* ddiv */ | |
802 | 32, /* l1 cache line size */ | |
803 | 32, /* l1 cache */ | |
804 | 512, /* l2 cache */ | |
805 | 1, /* streams */ | |
806 | 0, /* SF->DF convert */ | |
807 | }; | |
808 | ||
809 | /* Instruction costs on PPC601 processors. */ | |
810 | static const | |
811 | struct processor_costs ppc601_cost = { | |
812 | COSTS_N_INSNS (5), /* mulsi */ | |
813 | COSTS_N_INSNS (5), /* mulsi_const */ | |
814 | COSTS_N_INSNS (5), /* mulsi_const9 */ | |
815 | COSTS_N_INSNS (5), /* muldi */ | |
816 | COSTS_N_INSNS (36), /* divsi */ | |
817 | COSTS_N_INSNS (36), /* divdi */ | |
818 | COSTS_N_INSNS (4), /* fp */ | |
819 | COSTS_N_INSNS (5), /* dmul */ | |
820 | COSTS_N_INSNS (17), /* sdiv */ | |
821 | COSTS_N_INSNS (31), /* ddiv */ | |
822 | 32, /* cache line size */ | |
823 | 32, /* l1 cache */ | |
824 | 256, /* l2 cache */ | |
825 | 1, /* streams */ | |
826 | 0, /* SF->DF convert */ | |
827 | }; | |
828 | ||
829 | /* Instruction costs on PPC603 processors. */ | |
830 | static const | |
831 | struct processor_costs ppc603_cost = { | |
832 | COSTS_N_INSNS (5), /* mulsi */ | |
833 | COSTS_N_INSNS (3), /* mulsi_const */ | |
834 | COSTS_N_INSNS (2), /* mulsi_const9 */ | |
835 | COSTS_N_INSNS (5), /* muldi */ | |
836 | COSTS_N_INSNS (37), /* divsi */ | |
837 | COSTS_N_INSNS (37), /* divdi */ | |
838 | COSTS_N_INSNS (3), /* fp */ | |
839 | COSTS_N_INSNS (4), /* dmul */ | |
840 | COSTS_N_INSNS (18), /* sdiv */ | |
841 | COSTS_N_INSNS (33), /* ddiv */ | |
842 | 32, /* cache line size */ | |
843 | 8, /* l1 cache */ | |
844 | 64, /* l2 cache */ | |
845 | 1, /* streams */ | |
846 | 0, /* SF->DF convert */ | |
847 | }; | |
848 | ||
849 | /* Instruction costs on PPC604 processors. */ | |
850 | static const | |
851 | struct processor_costs ppc604_cost = { | |
852 | COSTS_N_INSNS (4), /* mulsi */ | |
853 | COSTS_N_INSNS (4), /* mulsi_const */ | |
854 | COSTS_N_INSNS (4), /* mulsi_const9 */ | |
855 | COSTS_N_INSNS (4), /* muldi */ | |
856 | COSTS_N_INSNS (20), /* divsi */ | |
857 | COSTS_N_INSNS (20), /* divdi */ | |
858 | COSTS_N_INSNS (3), /* fp */ | |
859 | COSTS_N_INSNS (3), /* dmul */ | |
860 | COSTS_N_INSNS (18), /* sdiv */ | |
861 | COSTS_N_INSNS (32), /* ddiv */ | |
862 | 32, /* cache line size */ | |
863 | 16, /* l1 cache */ | |
864 | 512, /* l2 cache */ | |
865 | 1, /* streams */ | |
866 | 0, /* SF->DF convert */ | |
867 | }; | |
868 | ||
869 | /* Instruction costs on PPC604e processors. */ | |
870 | static const | |
871 | struct processor_costs ppc604e_cost = { | |
872 | COSTS_N_INSNS (2), /* mulsi */ | |
873 | COSTS_N_INSNS (2), /* mulsi_const */ | |
874 | COSTS_N_INSNS (2), /* mulsi_const9 */ | |
875 | COSTS_N_INSNS (2), /* muldi */ | |
876 | COSTS_N_INSNS (20), /* divsi */ | |
877 | COSTS_N_INSNS (20), /* divdi */ | |
878 | COSTS_N_INSNS (3), /* fp */ | |
879 | COSTS_N_INSNS (3), /* dmul */ | |
880 | COSTS_N_INSNS (18), /* sdiv */ | |
881 | COSTS_N_INSNS (32), /* ddiv */ | |
882 | 32, /* cache line size */ | |
883 | 32, /* l1 cache */ | |
884 | 1024, /* l2 cache */ | |
885 | 1, /* streams */ | |
886 | 0, /* SF->DF convert */ | |
887 | }; | |
888 | ||
889 | /* Instruction costs on PPC620 processors. */ | |
890 | static const | |
891 | struct processor_costs ppc620_cost = { | |
892 | COSTS_N_INSNS (5), /* mulsi */ | |
893 | COSTS_N_INSNS (4), /* mulsi_const */ | |
894 | COSTS_N_INSNS (3), /* mulsi_const9 */ | |
895 | COSTS_N_INSNS (7), /* muldi */ | |
896 | COSTS_N_INSNS (21), /* divsi */ | |
897 | COSTS_N_INSNS (37), /* divdi */ | |
898 | COSTS_N_INSNS (3), /* fp */ | |
899 | COSTS_N_INSNS (3), /* dmul */ | |
900 | COSTS_N_INSNS (18), /* sdiv */ | |
901 | COSTS_N_INSNS (32), /* ddiv */ | |
902 | 128, /* cache line size */ | |
903 | 32, /* l1 cache */ | |
904 | 1024, /* l2 cache */ | |
905 | 1, /* streams */ | |
906 | 0, /* SF->DF convert */ | |
907 | }; | |
908 | ||
909 | /* Instruction costs on PPC630 processors. */ | |
910 | static const | |
911 | struct processor_costs ppc630_cost = { | |
912 | COSTS_N_INSNS (5), /* mulsi */ | |
913 | COSTS_N_INSNS (4), /* mulsi_const */ | |
914 | COSTS_N_INSNS (3), /* mulsi_const9 */ | |
915 | COSTS_N_INSNS (7), /* muldi */ | |
916 | COSTS_N_INSNS (21), /* divsi */ | |
917 | COSTS_N_INSNS (37), /* divdi */ | |
918 | COSTS_N_INSNS (3), /* fp */ | |
919 | COSTS_N_INSNS (3), /* dmul */ | |
920 | COSTS_N_INSNS (17), /* sdiv */ | |
921 | COSTS_N_INSNS (21), /* ddiv */ | |
922 | 128, /* cache line size */ | |
923 | 64, /* l1 cache */ | |
924 | 1024, /* l2 cache */ | |
925 | 1, /* streams */ | |
926 | 0, /* SF->DF convert */ | |
927 | }; | |
928 | ||
929 | /* Instruction costs on Cell processor. */ | |
930 | /* COSTS_N_INSNS (1) ~ one add. */ | |
931 | static const | |
932 | struct processor_costs ppccell_cost = { | |
933 | COSTS_N_INSNS (9/2)+2, /* mulsi */ | |
934 | COSTS_N_INSNS (6/2), /* mulsi_const */ | |
935 | COSTS_N_INSNS (6/2), /* mulsi_const9 */ | |
936 | COSTS_N_INSNS (15/2)+2, /* muldi */ | |
937 | COSTS_N_INSNS (38/2), /* divsi */ | |
938 | COSTS_N_INSNS (70/2), /* divdi */ | |
939 | COSTS_N_INSNS (10/2), /* fp */ | |
940 | COSTS_N_INSNS (10/2), /* dmul */ | |
941 | COSTS_N_INSNS (74/2), /* sdiv */ | |
942 | COSTS_N_INSNS (74/2), /* ddiv */ | |
943 | 128, /* cache line size */ | |
944 | 32, /* l1 cache */ | |
945 | 512, /* l2 cache */ | |
946 | 6, /* streams */ | |
947 | 0, /* SF->DF convert */ | |
948 | }; | |
949 | ||
950 | /* Instruction costs on PPC750 and PPC7400 processors. */ | |
951 | static const | |
952 | struct processor_costs ppc750_cost = { | |
953 | COSTS_N_INSNS (5), /* mulsi */ | |
954 | COSTS_N_INSNS (3), /* mulsi_const */ | |
955 | COSTS_N_INSNS (2), /* mulsi_const9 */ | |
956 | COSTS_N_INSNS (5), /* muldi */ | |
957 | COSTS_N_INSNS (17), /* divsi */ | |
958 | COSTS_N_INSNS (17), /* divdi */ | |
959 | COSTS_N_INSNS (3), /* fp */ | |
960 | COSTS_N_INSNS (3), /* dmul */ | |
961 | COSTS_N_INSNS (17), /* sdiv */ | |
962 | COSTS_N_INSNS (31), /* ddiv */ | |
963 | 32, /* cache line size */ | |
964 | 32, /* l1 cache */ | |
965 | 512, /* l2 cache */ | |
966 | 1, /* streams */ | |
967 | 0, /* SF->DF convert */ | |
968 | }; | |
969 | ||
970 | /* Instruction costs on PPC7450 processors. */ | |
971 | static const | |
972 | struct processor_costs ppc7450_cost = { | |
973 | COSTS_N_INSNS (4), /* mulsi */ | |
974 | COSTS_N_INSNS (3), /* mulsi_const */ | |
975 | COSTS_N_INSNS (3), /* mulsi_const9 */ | |
976 | COSTS_N_INSNS (4), /* muldi */ | |
977 | COSTS_N_INSNS (23), /* divsi */ | |
978 | COSTS_N_INSNS (23), /* divdi */ | |
979 | COSTS_N_INSNS (5), /* fp */ | |
980 | COSTS_N_INSNS (5), /* dmul */ | |
981 | COSTS_N_INSNS (21), /* sdiv */ | |
982 | COSTS_N_INSNS (35), /* ddiv */ | |
983 | 32, /* cache line size */ | |
984 | 32, /* l1 cache */ | |
985 | 1024, /* l2 cache */ | |
986 | 1, /* streams */ | |
987 | 0, /* SF->DF convert */ | |
988 | }; | |
989 | ||
990 | /* Instruction costs on PPC8540 processors. */ | |
991 | static const | |
992 | struct processor_costs ppc8540_cost = { | |
993 | COSTS_N_INSNS (4), /* mulsi */ | |
994 | COSTS_N_INSNS (4), /* mulsi_const */ | |
995 | COSTS_N_INSNS (4), /* mulsi_const9 */ | |
996 | COSTS_N_INSNS (4), /* muldi */ | |
997 | COSTS_N_INSNS (19), /* divsi */ | |
998 | COSTS_N_INSNS (19), /* divdi */ | |
999 | COSTS_N_INSNS (4), /* fp */ | |
1000 | COSTS_N_INSNS (4), /* dmul */ | |
1001 | COSTS_N_INSNS (29), /* sdiv */ | |
1002 | COSTS_N_INSNS (29), /* ddiv */ | |
1003 | 32, /* cache line size */ | |
1004 | 32, /* l1 cache */ | |
1005 | 256, /* l2 cache */ | |
1006 | 1, /* prefetch streams /*/ | |
1007 | 0, /* SF->DF convert */ | |
1008 | }; | |
1009 | ||
1010 | /* Instruction costs on E300C2 and E300C3 cores. */ | |
1011 | static const | |
1012 | struct processor_costs ppce300c2c3_cost = { | |
1013 | COSTS_N_INSNS (4), /* mulsi */ | |
1014 | COSTS_N_INSNS (4), /* mulsi_const */ | |
1015 | COSTS_N_INSNS (4), /* mulsi_const9 */ | |
1016 | COSTS_N_INSNS (4), /* muldi */ | |
1017 | COSTS_N_INSNS (19), /* divsi */ | |
1018 | COSTS_N_INSNS (19), /* divdi */ | |
1019 | COSTS_N_INSNS (3), /* fp */ | |
1020 | COSTS_N_INSNS (4), /* dmul */ | |
1021 | COSTS_N_INSNS (18), /* sdiv */ | |
1022 | COSTS_N_INSNS (33), /* ddiv */ | |
1023 | 32, | |
1024 | 16, /* l1 cache */ | |
1025 | 16, /* l2 cache */ | |
1026 | 1, /* prefetch streams /*/ | |
1027 | 0, /* SF->DF convert */ | |
1028 | }; | |
1029 | ||
1030 | /* Instruction costs on PPCE500MC processors. */ | |
1031 | static const | |
1032 | struct processor_costs ppce500mc_cost = { | |
1033 | COSTS_N_INSNS (4), /* mulsi */ | |
1034 | COSTS_N_INSNS (4), /* mulsi_const */ | |
1035 | COSTS_N_INSNS (4), /* mulsi_const9 */ | |
1036 | COSTS_N_INSNS (4), /* muldi */ | |
1037 | COSTS_N_INSNS (14), /* divsi */ | |
1038 | COSTS_N_INSNS (14), /* divdi */ | |
1039 | COSTS_N_INSNS (8), /* fp */ | |
1040 | COSTS_N_INSNS (10), /* dmul */ | |
1041 | COSTS_N_INSNS (36), /* sdiv */ | |
1042 | COSTS_N_INSNS (66), /* ddiv */ | |
1043 | 64, /* cache line size */ | |
1044 | 32, /* l1 cache */ | |
1045 | 128, /* l2 cache */ | |
1046 | 1, /* prefetch streams /*/ | |
1047 | 0, /* SF->DF convert */ | |
1048 | }; | |
1049 | ||
1050 | /* Instruction costs on PPCE500MC64 processors. */ | |
1051 | static const | |
1052 | struct processor_costs ppce500mc64_cost = { | |
1053 | COSTS_N_INSNS (4), /* mulsi */ | |
1054 | COSTS_N_INSNS (4), /* mulsi_const */ | |
1055 | COSTS_N_INSNS (4), /* mulsi_const9 */ | |
1056 | COSTS_N_INSNS (4), /* muldi */ | |
1057 | COSTS_N_INSNS (14), /* divsi */ | |
1058 | COSTS_N_INSNS (14), /* divdi */ | |
1059 | COSTS_N_INSNS (4), /* fp */ | |
1060 | COSTS_N_INSNS (10), /* dmul */ | |
1061 | COSTS_N_INSNS (36), /* sdiv */ | |
1062 | COSTS_N_INSNS (66), /* ddiv */ | |
1063 | 64, /* cache line size */ | |
1064 | 32, /* l1 cache */ | |
1065 | 128, /* l2 cache */ | |
1066 | 1, /* prefetch streams /*/ | |
1067 | 0, /* SF->DF convert */ | |
1068 | }; | |
1069 | ||
1070 | /* Instruction costs on PPCE5500 processors. */ | |
1071 | static const | |
1072 | struct processor_costs ppce5500_cost = { | |
1073 | COSTS_N_INSNS (5), /* mulsi */ | |
1074 | COSTS_N_INSNS (5), /* mulsi_const */ | |
1075 | COSTS_N_INSNS (4), /* mulsi_const9 */ | |
1076 | COSTS_N_INSNS (5), /* muldi */ | |
1077 | COSTS_N_INSNS (14), /* divsi */ | |
1078 | COSTS_N_INSNS (14), /* divdi */ | |
1079 | COSTS_N_INSNS (7), /* fp */ | |
1080 | COSTS_N_INSNS (10), /* dmul */ | |
1081 | COSTS_N_INSNS (36), /* sdiv */ | |
1082 | COSTS_N_INSNS (66), /* ddiv */ | |
1083 | 64, /* cache line size */ | |
1084 | 32, /* l1 cache */ | |
1085 | 128, /* l2 cache */ | |
1086 | 1, /* prefetch streams /*/ | |
1087 | 0, /* SF->DF convert */ | |
1088 | }; | |
1089 | ||
1090 | /* Instruction costs on PPCE6500 processors. */ | |
1091 | static const | |
1092 | struct processor_costs ppce6500_cost = { | |
1093 | COSTS_N_INSNS (5), /* mulsi */ | |
1094 | COSTS_N_INSNS (5), /* mulsi_const */ | |
1095 | COSTS_N_INSNS (4), /* mulsi_const9 */ | |
1096 | COSTS_N_INSNS (5), /* muldi */ | |
1097 | COSTS_N_INSNS (14), /* divsi */ | |
1098 | COSTS_N_INSNS (14), /* divdi */ | |
1099 | COSTS_N_INSNS (7), /* fp */ | |
1100 | COSTS_N_INSNS (10), /* dmul */ | |
1101 | COSTS_N_INSNS (36), /* sdiv */ | |
1102 | COSTS_N_INSNS (66), /* ddiv */ | |
1103 | 64, /* cache line size */ | |
1104 | 32, /* l1 cache */ | |
1105 | 128, /* l2 cache */ | |
1106 | 1, /* prefetch streams /*/ | |
1107 | 0, /* SF->DF convert */ | |
1108 | }; | |
1109 | ||
1110 | /* Instruction costs on AppliedMicro Titan processors. */ | |
1111 | static const | |
1112 | struct processor_costs titan_cost = { | |
1113 | COSTS_N_INSNS (5), /* mulsi */ | |
1114 | COSTS_N_INSNS (5), /* mulsi_const */ | |
1115 | COSTS_N_INSNS (5), /* mulsi_const9 */ | |
1116 | COSTS_N_INSNS (5), /* muldi */ | |
1117 | COSTS_N_INSNS (18), /* divsi */ | |
1118 | COSTS_N_INSNS (18), /* divdi */ | |
1119 | COSTS_N_INSNS (10), /* fp */ | |
1120 | COSTS_N_INSNS (10), /* dmul */ | |
1121 | COSTS_N_INSNS (46), /* sdiv */ | |
1122 | COSTS_N_INSNS (72), /* ddiv */ | |
1123 | 32, /* cache line size */ | |
1124 | 32, /* l1 cache */ | |
1125 | 512, /* l2 cache */ | |
1126 | 1, /* prefetch streams /*/ | |
1127 | 0, /* SF->DF convert */ | |
1128 | }; | |
1129 | ||
1130 | /* Instruction costs on POWER4 and POWER5 processors. */ | |
1131 | static const | |
1132 | struct processor_costs power4_cost = { | |
1133 | COSTS_N_INSNS (3), /* mulsi */ | |
1134 | COSTS_N_INSNS (2), /* mulsi_const */ | |
1135 | COSTS_N_INSNS (2), /* mulsi_const9 */ | |
1136 | COSTS_N_INSNS (4), /* muldi */ | |
1137 | COSTS_N_INSNS (18), /* divsi */ | |
1138 | COSTS_N_INSNS (34), /* divdi */ | |
1139 | COSTS_N_INSNS (3), /* fp */ | |
1140 | COSTS_N_INSNS (3), /* dmul */ | |
1141 | COSTS_N_INSNS (17), /* sdiv */ | |
1142 | COSTS_N_INSNS (17), /* ddiv */ | |
1143 | 128, /* cache line size */ | |
1144 | 32, /* l1 cache */ | |
1145 | 1024, /* l2 cache */ | |
1146 | 8, /* prefetch streams /*/ | |
1147 | 0, /* SF->DF convert */ | |
1148 | }; | |
1149 | ||
1150 | /* Instruction costs on POWER6 processors. */ | |
1151 | static const | |
1152 | struct processor_costs power6_cost = { | |
1153 | COSTS_N_INSNS (8), /* mulsi */ | |
1154 | COSTS_N_INSNS (8), /* mulsi_const */ | |
1155 | COSTS_N_INSNS (8), /* mulsi_const9 */ | |
1156 | COSTS_N_INSNS (8), /* muldi */ | |
1157 | COSTS_N_INSNS (22), /* divsi */ | |
1158 | COSTS_N_INSNS (28), /* divdi */ | |
1159 | COSTS_N_INSNS (3), /* fp */ | |
1160 | COSTS_N_INSNS (3), /* dmul */ | |
1161 | COSTS_N_INSNS (13), /* sdiv */ | |
1162 | COSTS_N_INSNS (16), /* ddiv */ | |
1163 | 128, /* cache line size */ | |
1164 | 64, /* l1 cache */ | |
1165 | 2048, /* l2 cache */ | |
1166 | 16, /* prefetch streams */ | |
1167 | 0, /* SF->DF convert */ | |
1168 | }; | |
1169 | ||
1170 | /* Instruction costs on POWER7 processors. */ | |
1171 | static const | |
1172 | struct processor_costs power7_cost = { | |
1173 | COSTS_N_INSNS (2), /* mulsi */ | |
1174 | COSTS_N_INSNS (2), /* mulsi_const */ | |
1175 | COSTS_N_INSNS (2), /* mulsi_const9 */ | |
1176 | COSTS_N_INSNS (2), /* muldi */ | |
1177 | COSTS_N_INSNS (18), /* divsi */ | |
1178 | COSTS_N_INSNS (34), /* divdi */ | |
1179 | COSTS_N_INSNS (3), /* fp */ | |
1180 | COSTS_N_INSNS (3), /* dmul */ | |
1181 | COSTS_N_INSNS (13), /* sdiv */ | |
1182 | COSTS_N_INSNS (16), /* ddiv */ | |
1183 | 128, /* cache line size */ | |
1184 | 32, /* l1 cache */ | |
1185 | 256, /* l2 cache */ | |
1186 | 12, /* prefetch streams */ | |
1187 | COSTS_N_INSNS (3), /* SF->DF convert */ | |
1188 | }; | |
1189 | ||
1190 | /* Instruction costs on POWER8 processors. */ | |
1191 | static const | |
1192 | struct processor_costs power8_cost = { | |
1193 | COSTS_N_INSNS (3), /* mulsi */ | |
1194 | COSTS_N_INSNS (3), /* mulsi_const */ | |
1195 | COSTS_N_INSNS (3), /* mulsi_const9 */ | |
1196 | COSTS_N_INSNS (3), /* muldi */ | |
1197 | COSTS_N_INSNS (19), /* divsi */ | |
1198 | COSTS_N_INSNS (35), /* divdi */ | |
1199 | COSTS_N_INSNS (3), /* fp */ | |
1200 | COSTS_N_INSNS (3), /* dmul */ | |
1201 | COSTS_N_INSNS (14), /* sdiv */ | |
1202 | COSTS_N_INSNS (17), /* ddiv */ | |
1203 | 128, /* cache line size */ | |
1204 | 32, /* l1 cache */ | |
1205 | 256, /* l2 cache */ | |
1206 | 12, /* prefetch streams */ | |
1207 | COSTS_N_INSNS (3), /* SF->DF convert */ | |
1208 | }; | |
1209 | ||
1210 | /* Instruction costs on POWER9 processors. */ | |
1211 | static const | |
1212 | struct processor_costs power9_cost = { | |
1213 | COSTS_N_INSNS (3), /* mulsi */ | |
1214 | COSTS_N_INSNS (3), /* mulsi_const */ | |
1215 | COSTS_N_INSNS (3), /* mulsi_const9 */ | |
1216 | COSTS_N_INSNS (3), /* muldi */ | |
1217 | COSTS_N_INSNS (8), /* divsi */ | |
1218 | COSTS_N_INSNS (12), /* divdi */ | |
1219 | COSTS_N_INSNS (3), /* fp */ | |
1220 | COSTS_N_INSNS (3), /* dmul */ | |
1221 | COSTS_N_INSNS (13), /* sdiv */ | |
1222 | COSTS_N_INSNS (18), /* ddiv */ | |
1223 | 128, /* cache line size */ | |
1224 | 32, /* l1 cache */ | |
1225 | 512, /* l2 cache */ | |
1226 | 8, /* prefetch streams */ | |
1227 | COSTS_N_INSNS (3), /* SF->DF convert */ | |
1228 | }; | |
1229 | ||
1230 | /* Instruction costs on POWER A2 processors. */ | |
1231 | static const | |
1232 | struct processor_costs ppca2_cost = { | |
1233 | COSTS_N_INSNS (16), /* mulsi */ | |
1234 | COSTS_N_INSNS (16), /* mulsi_const */ | |
1235 | COSTS_N_INSNS (16), /* mulsi_const9 */ | |
1236 | COSTS_N_INSNS (16), /* muldi */ | |
1237 | COSTS_N_INSNS (22), /* divsi */ | |
1238 | COSTS_N_INSNS (28), /* divdi */ | |
1239 | COSTS_N_INSNS (3), /* fp */ | |
1240 | COSTS_N_INSNS (3), /* dmul */ | |
1241 | COSTS_N_INSNS (59), /* sdiv */ | |
1242 | COSTS_N_INSNS (72), /* ddiv */ | |
1243 | 64, | |
1244 | 16, /* l1 cache */ | |
1245 | 2048, /* l2 cache */ | |
1246 | 16, /* prefetch streams */ | |
1247 | 0, /* SF->DF convert */ | |
1248 | }; | |
1249 | ||
1250 | \f | |
1251 | /* Table that classifies rs6000 builtin functions (pure, const, etc.). */ | |
1252 | #undef RS6000_BUILTIN_0 | |
1253 | #undef RS6000_BUILTIN_1 | |
1254 | #undef RS6000_BUILTIN_2 | |
1255 | #undef RS6000_BUILTIN_3 | |
1256 | #undef RS6000_BUILTIN_A | |
1257 | #undef RS6000_BUILTIN_D | |
1258 | #undef RS6000_BUILTIN_E | |
1259 | #undef RS6000_BUILTIN_H | |
1260 | #undef RS6000_BUILTIN_P | |
1261 | #undef RS6000_BUILTIN_Q | |
1262 | #undef RS6000_BUILTIN_S | |
1263 | #undef RS6000_BUILTIN_X | |
1264 | ||
1265 | #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) \ | |
1266 | { NAME, ICODE, MASK, ATTR }, | |
1267 | ||
1268 | #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \ | |
1269 | { NAME, ICODE, MASK, ATTR }, | |
1270 | ||
1271 | #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \ | |
1272 | { NAME, ICODE, MASK, ATTR }, | |
1273 | ||
1274 | #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \ | |
1275 | { NAME, ICODE, MASK, ATTR }, | |
1276 | ||
1277 | #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \ | |
1278 | { NAME, ICODE, MASK, ATTR }, | |
1279 | ||
1280 | #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \ | |
1281 | { NAME, ICODE, MASK, ATTR }, | |
1282 | ||
1283 | #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \ | |
1284 | { NAME, ICODE, MASK, ATTR }, | |
1285 | ||
1286 | #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \ | |
1287 | { NAME, ICODE, MASK, ATTR }, | |
1288 | ||
1289 | #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \ | |
1290 | { NAME, ICODE, MASK, ATTR }, | |
1291 | ||
1292 | #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \ | |
1293 | { NAME, ICODE, MASK, ATTR }, | |
1294 | ||
1295 | #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \ | |
1296 | { NAME, ICODE, MASK, ATTR }, | |
1297 | ||
1298 | #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) \ | |
1299 | { NAME, ICODE, MASK, ATTR }, | |
1300 | ||
1301 | struct rs6000_builtin_info_type { | |
1302 | const char *name; | |
1303 | const enum insn_code icode; | |
1304 | const HOST_WIDE_INT mask; | |
1305 | const unsigned attr; | |
1306 | }; | |
1307 | ||
1308 | static const struct rs6000_builtin_info_type rs6000_builtin_info[] = | |
1309 | { | |
1310 | #include "powerpcspe-builtin.def" | |
1311 | }; | |
1312 | ||
1313 | #undef RS6000_BUILTIN_0 | |
1314 | #undef RS6000_BUILTIN_1 | |
1315 | #undef RS6000_BUILTIN_2 | |
1316 | #undef RS6000_BUILTIN_3 | |
1317 | #undef RS6000_BUILTIN_A | |
1318 | #undef RS6000_BUILTIN_D | |
1319 | #undef RS6000_BUILTIN_E | |
1320 | #undef RS6000_BUILTIN_H | |
1321 | #undef RS6000_BUILTIN_P | |
1322 | #undef RS6000_BUILTIN_Q | |
1323 | #undef RS6000_BUILTIN_S | |
1324 | #undef RS6000_BUILTIN_X | |
1325 | ||
1326 | /* Support for -mveclibabi=<xxx> to control which vector library to use. */ | |
1327 | static tree (*rs6000_veclib_handler) (combined_fn, tree, tree); | |
1328 | ||
1329 | \f | |
1330 | static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool); | |
1331 | static bool spe_func_has_64bit_regs_p (void); | |
1332 | static struct machine_function * rs6000_init_machine_status (void); | |
1333 | static int rs6000_ra_ever_killed (void); | |
1334 | static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *); | |
1335 | static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *); | |
1336 | static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *); | |
1337 | static tree rs6000_builtin_vectorized_libmass (combined_fn, tree, tree); | |
1338 | static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT); | |
1339 | static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool); | |
1340 | static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool); | |
1341 | static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t, | |
1342 | bool); | |
1343 | static int rs6000_debug_adjust_cost (rtx_insn *, int, rtx_insn *, int, | |
1344 | unsigned int); | |
1345 | static bool is_microcoded_insn (rtx_insn *); | |
1346 | static bool is_nonpipeline_insn (rtx_insn *); | |
1347 | static bool is_cracked_insn (rtx_insn *); | |
1348 | static bool is_load_insn (rtx, rtx *); | |
1349 | static bool is_store_insn (rtx, rtx *); | |
1350 | static bool set_to_load_agen (rtx_insn *,rtx_insn *); | |
1351 | static bool insn_terminates_group_p (rtx_insn *, enum group_termination); | |
1352 | static bool insn_must_be_first_in_group (rtx_insn *); | |
1353 | static bool insn_must_be_last_in_group (rtx_insn *); | |
1354 | static void altivec_init_builtins (void); | |
1355 | static tree builtin_function_type (machine_mode, machine_mode, | |
1356 | machine_mode, machine_mode, | |
1357 | enum rs6000_builtins, const char *name); | |
1358 | static void rs6000_common_init_builtins (void); | |
1359 | static void paired_init_builtins (void); | |
1360 | static rtx paired_expand_predicate_builtin (enum insn_code, tree, rtx); | |
1361 | static void spe_init_builtins (void); | |
1362 | static void htm_init_builtins (void); | |
1363 | static rtx spe_expand_predicate_builtin (enum insn_code, tree, rtx); | |
1364 | static rtx spe_expand_evsel_builtin (enum insn_code, tree, rtx); | |
1365 | static int rs6000_emit_int_cmove (rtx, rtx, rtx, rtx); | |
1366 | static rs6000_stack_t *rs6000_stack_info (void); | |
1367 | static void is_altivec_return_reg (rtx, void *); | |
1368 | int easy_vector_constant (rtx, machine_mode); | |
1369 | static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode); | |
1370 | static rtx rs6000_legitimize_tls_address (rtx, enum tls_model); | |
1371 | static rtx rs6000_darwin64_record_arg (CUMULATIVE_ARGS *, const_tree, | |
1372 | bool, bool); | |
1373 | #if TARGET_MACHO | |
1374 | static void macho_branch_islands (void); | |
1375 | #endif | |
1376 | static rtx rs6000_legitimize_reload_address (rtx, machine_mode, int, int, | |
1377 | int, int *); | |
1378 | static rtx rs6000_debug_legitimize_reload_address (rtx, machine_mode, int, | |
1379 | int, int, int *); | |
1380 | static bool rs6000_mode_dependent_address (const_rtx); | |
1381 | static bool rs6000_debug_mode_dependent_address (const_rtx); | |
1382 | static enum reg_class rs6000_secondary_reload_class (enum reg_class, | |
1383 | machine_mode, rtx); | |
1384 | static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class, | |
1385 | machine_mode, | |
1386 | rtx); | |
1387 | static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class); | |
1388 | static enum reg_class rs6000_debug_preferred_reload_class (rtx, | |
1389 | enum reg_class); | |
f15643d4 RS |
1390 | static bool rs6000_debug_secondary_memory_needed (machine_mode, |
1391 | reg_class_t, | |
1392 | reg_class_t); | |
0d803030 RS |
1393 | static bool rs6000_debug_can_change_mode_class (machine_mode, |
1394 | machine_mode, | |
1395 | reg_class_t); | |
83349046 SB |
1396 | static bool rs6000_save_toc_in_prologue_p (void); |
1397 | static rtx rs6000_internal_arg_pointer (void); | |
1398 | ||
1399 | rtx (*rs6000_legitimize_reload_address_ptr) (rtx, machine_mode, int, int, | |
1400 | int, int *) | |
1401 | = rs6000_legitimize_reload_address; | |
1402 | ||
1403 | static bool (*rs6000_mode_dependent_address_ptr) (const_rtx) | |
1404 | = rs6000_mode_dependent_address; | |
1405 | ||
1406 | enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class, | |
1407 | machine_mode, rtx) | |
1408 | = rs6000_secondary_reload_class; | |
1409 | ||
1410 | enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class) | |
1411 | = rs6000_preferred_reload_class; | |
1412 | ||
83349046 SB |
1413 | const int INSN_NOT_AVAILABLE = -1; |
1414 | ||
1415 | static void rs6000_print_isa_options (FILE *, int, const char *, | |
1416 | HOST_WIDE_INT); | |
1417 | static void rs6000_print_builtin_options (FILE *, int, const char *, | |
1418 | HOST_WIDE_INT); | |
1419 | static HOST_WIDE_INT rs6000_disable_incompatible_switches (void); | |
1420 | ||
1421 | static enum rs6000_reg_type register_to_reg_type (rtx, bool *); | |
1422 | static bool rs6000_secondary_reload_move (enum rs6000_reg_type, | |
1423 | enum rs6000_reg_type, | |
1424 | machine_mode, | |
1425 | secondary_reload_info *, | |
1426 | bool); | |
1427 | rtl_opt_pass *make_pass_analyze_swaps (gcc::context*); | |
1428 | static bool rs6000_keep_leaf_when_profiled () __attribute__ ((unused)); | |
1429 | static tree rs6000_fold_builtin (tree, int, tree *, bool); | |
1430 | ||
1431 | /* Hash table stuff for keeping track of TOC entries. */ | |
1432 | ||
1433 | struct GTY((for_user)) toc_hash_struct | |
1434 | { | |
1435 | /* `key' will satisfy CONSTANT_P; in fact, it will satisfy | |
1436 | ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */ | |
1437 | rtx key; | |
1438 | machine_mode key_mode; | |
1439 | int labelno; | |
1440 | }; | |
1441 | ||
1442 | struct toc_hasher : ggc_ptr_hash<toc_hash_struct> | |
1443 | { | |
1444 | static hashval_t hash (toc_hash_struct *); | |
1445 | static bool equal (toc_hash_struct *, toc_hash_struct *); | |
1446 | }; | |
1447 | ||
1448 | static GTY (()) hash_table<toc_hasher> *toc_hash_table; | |
1449 | ||
1450 | /* Hash table to keep track of the argument types for builtin functions. */ | |
1451 | ||
1452 | struct GTY((for_user)) builtin_hash_struct | |
1453 | { | |
1454 | tree type; | |
1455 | machine_mode mode[4]; /* return value + 3 arguments. */ | |
1456 | unsigned char uns_p[4]; /* and whether the types are unsigned. */ | |
1457 | }; | |
1458 | ||
1459 | struct builtin_hasher : ggc_ptr_hash<builtin_hash_struct> | |
1460 | { | |
1461 | static hashval_t hash (builtin_hash_struct *); | |
1462 | static bool equal (builtin_hash_struct *, builtin_hash_struct *); | |
1463 | }; | |
1464 | ||
1465 | static GTY (()) hash_table<builtin_hasher> *builtin_hash_table; | |
1466 | ||
1467 | \f | |
1468 | /* Default register names. */ | |
1469 | char rs6000_reg_names[][8] = | |
1470 | { | |
1471 | "0", "1", "2", "3", "4", "5", "6", "7", | |
1472 | "8", "9", "10", "11", "12", "13", "14", "15", | |
1473 | "16", "17", "18", "19", "20", "21", "22", "23", | |
1474 | "24", "25", "26", "27", "28", "29", "30", "31", | |
1475 | "0", "1", "2", "3", "4", "5", "6", "7", | |
1476 | "8", "9", "10", "11", "12", "13", "14", "15", | |
1477 | "16", "17", "18", "19", "20", "21", "22", "23", | |
1478 | "24", "25", "26", "27", "28", "29", "30", "31", | |
1479 | "mq", "lr", "ctr","ap", | |
1480 | "0", "1", "2", "3", "4", "5", "6", "7", | |
1481 | "ca", | |
1482 | /* AltiVec registers. */ | |
1483 | "0", "1", "2", "3", "4", "5", "6", "7", | |
1484 | "8", "9", "10", "11", "12", "13", "14", "15", | |
1485 | "16", "17", "18", "19", "20", "21", "22", "23", | |
1486 | "24", "25", "26", "27", "28", "29", "30", "31", | |
1487 | "vrsave", "vscr", | |
1488 | /* SPE registers. */ | |
1489 | "spe_acc", "spefscr", | |
1490 | /* Soft frame pointer. */ | |
1491 | "sfp", | |
1492 | /* HTM SPR registers. */ | |
1493 | "tfhar", "tfiar", "texasr", | |
1494 | /* SPE High registers. */ | |
1495 | "0", "1", "2", "3", "4", "5", "6", "7", | |
1496 | "8", "9", "10", "11", "12", "13", "14", "15", | |
1497 | "16", "17", "18", "19", "20", "21", "22", "23", | |
1498 | "24", "25", "26", "27", "28", "29", "30", "31" | |
1499 | }; | |
1500 | ||
1501 | #ifdef TARGET_REGNAMES | |
1502 | static const char alt_reg_names[][8] = | |
1503 | { | |
1504 | "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7", | |
1505 | "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", | |
1506 | "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23", | |
1507 | "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31", | |
1508 | "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7", | |
1509 | "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15", | |
1510 | "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23", | |
1511 | "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31", | |
1512 | "mq", "lr", "ctr", "ap", | |
1513 | "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7", | |
1514 | "ca", | |
1515 | /* AltiVec registers. */ | |
1516 | "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7", | |
1517 | "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15", | |
1518 | "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23", | |
1519 | "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31", | |
1520 | "vrsave", "vscr", | |
1521 | /* SPE registers. */ | |
1522 | "spe_acc", "spefscr", | |
1523 | /* Soft frame pointer. */ | |
1524 | "sfp", | |
1525 | /* HTM SPR registers. */ | |
1526 | "tfhar", "tfiar", "texasr", | |
1527 | /* SPE High registers. */ | |
1528 | "%rh0", "%rh1", "%rh2", "%rh3", "%rh4", "%rh5", "%rh6", "%rh7", | |
1529 | "%rh8", "%rh9", "%rh10", "%r11", "%rh12", "%rh13", "%rh14", "%rh15", | |
1530 | "%rh16", "%rh17", "%rh18", "%rh19", "%rh20", "%rh21", "%rh22", "%rh23", | |
1531 | "%rh24", "%rh25", "%rh26", "%rh27", "%rh28", "%rh29", "%rh30", "%rh31" | |
1532 | }; | |
1533 | #endif | |
1534 | ||
1535 | /* Table of valid machine attributes. */ | |
1536 | ||
1537 | static const struct attribute_spec rs6000_attribute_table[] = | |
1538 | { | |
4849deb1 JJ |
1539 | /* { name, min_len, max_len, decl_req, type_req, fn_type_req, |
1540 | affects_type_identity, handler, exclude } */ | |
1541 | { "altivec", 1, 1, false, true, false, false, | |
1542 | rs6000_handle_altivec_attribute, NULL }, | |
1543 | { "longcall", 0, 0, false, true, true, false, | |
1544 | rs6000_handle_longcall_attribute, NULL }, | |
1545 | { "shortcall", 0, 0, false, true, true, false, | |
1546 | rs6000_handle_longcall_attribute, NULL }, | |
1547 | { "ms_struct", 0, 0, false, false, false, false, | |
1548 | rs6000_handle_struct_attribute, NULL }, | |
1549 | { "gcc_struct", 0, 0, false, false, false, false, | |
1550 | rs6000_handle_struct_attribute, NULL }, | |
83349046 SB |
1551 | #ifdef SUBTARGET_ATTRIBUTE_TABLE |
1552 | SUBTARGET_ATTRIBUTE_TABLE, | |
1553 | #endif | |
4849deb1 | 1554 | { NULL, 0, 0, false, false, false, false, NULL, NULL } |
83349046 SB |
1555 | }; |
1556 | \f | |
1557 | #ifndef TARGET_PROFILE_KERNEL | |
1558 | #define TARGET_PROFILE_KERNEL 0 | |
1559 | #endif | |
1560 | ||
1561 | /* The VRSAVE bitmask puts bit %v0 as the most significant bit. */ | |
1562 | #define ALTIVEC_REG_BIT(REGNO) (0x80000000 >> ((REGNO) - FIRST_ALTIVEC_REGNO)) | |
1563 | \f | |
1564 | /* Initialize the GCC target structure. */ | |
1565 | #undef TARGET_ATTRIBUTE_TABLE | |
1566 | #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table | |
1567 | #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES | |
1568 | #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes | |
1569 | #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P | |
1570 | #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p | |
1571 | ||
1572 | #undef TARGET_ASM_ALIGNED_DI_OP | |
1573 | #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP | |
1574 | ||
1575 | /* Default unaligned ops are only provided for ELF. Find the ops needed | |
1576 | for non-ELF systems. */ | |
1577 | #ifndef OBJECT_FORMAT_ELF | |
1578 | #if TARGET_XCOFF | |
1579 | /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on | |
1580 | 64-bit targets. */ | |
1581 | #undef TARGET_ASM_UNALIGNED_HI_OP | |
1582 | #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2," | |
1583 | #undef TARGET_ASM_UNALIGNED_SI_OP | |
1584 | #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4," | |
1585 | #undef TARGET_ASM_UNALIGNED_DI_OP | |
1586 | #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8," | |
1587 | #else | |
1588 | /* For Darwin. */ | |
1589 | #undef TARGET_ASM_UNALIGNED_HI_OP | |
1590 | #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t" | |
1591 | #undef TARGET_ASM_UNALIGNED_SI_OP | |
1592 | #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t" | |
1593 | #undef TARGET_ASM_UNALIGNED_DI_OP | |
1594 | #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t" | |
1595 | #undef TARGET_ASM_ALIGNED_DI_OP | |
1596 | #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t" | |
1597 | #endif | |
1598 | #endif | |
1599 | ||
1600 | /* This hook deals with fixups for relocatable code and DI-mode objects | |
1601 | in 64-bit code. */ | |
1602 | #undef TARGET_ASM_INTEGER | |
1603 | #define TARGET_ASM_INTEGER rs6000_assemble_integer | |
1604 | ||
1605 | #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO | |
1606 | #undef TARGET_ASM_ASSEMBLE_VISIBILITY | |
1607 | #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility | |
1608 | #endif | |
1609 | ||
1610 | #undef TARGET_SET_UP_BY_PROLOGUE | |
1611 | #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue | |
1612 | ||
1613 | #undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS | |
1614 | #define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS rs6000_get_separate_components | |
1615 | #undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB | |
1616 | #define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB rs6000_components_for_bb | |
1617 | #undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS | |
1618 | #define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS rs6000_disqualify_components | |
1619 | #undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS | |
1620 | #define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS rs6000_emit_prologue_components | |
1621 | #undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS | |
1622 | #define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS rs6000_emit_epilogue_components | |
1623 | #undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS | |
1624 | #define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS rs6000_set_handled_components | |
1625 | ||
1626 | #undef TARGET_EXTRA_LIVE_ON_ENTRY | |
1627 | #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry | |
1628 | ||
1629 | #undef TARGET_INTERNAL_ARG_POINTER | |
1630 | #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer | |
1631 | ||
1632 | #undef TARGET_HAVE_TLS | |
1633 | #define TARGET_HAVE_TLS HAVE_AS_TLS | |
1634 | ||
1635 | #undef TARGET_CANNOT_FORCE_CONST_MEM | |
1636 | #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem | |
1637 | ||
1638 | #undef TARGET_DELEGITIMIZE_ADDRESS | |
1639 | #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address | |
1640 | ||
1641 | #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P | |
1642 | #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p | |
1643 | ||
1644 | #undef TARGET_LEGITIMATE_COMBINED_INSN | |
1645 | #define TARGET_LEGITIMATE_COMBINED_INSN rs6000_legitimate_combined_insn | |
1646 | ||
1647 | #undef TARGET_ASM_FUNCTION_PROLOGUE | |
1648 | #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue | |
1649 | #undef TARGET_ASM_FUNCTION_EPILOGUE | |
1650 | #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue | |
1651 | ||
1652 | #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA | |
1653 | #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra | |
1654 | ||
1655 | #undef TARGET_LEGITIMIZE_ADDRESS | |
1656 | #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address | |
1657 | ||
1658 | #undef TARGET_SCHED_VARIABLE_ISSUE | |
1659 | #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue | |
1660 | ||
1661 | #undef TARGET_SCHED_ISSUE_RATE | |
1662 | #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate | |
1663 | #undef TARGET_SCHED_ADJUST_COST | |
1664 | #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost | |
1665 | #undef TARGET_SCHED_ADJUST_PRIORITY | |
1666 | #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority | |
1667 | #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE | |
1668 | #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence | |
1669 | #undef TARGET_SCHED_INIT | |
1670 | #define TARGET_SCHED_INIT rs6000_sched_init | |
1671 | #undef TARGET_SCHED_FINISH | |
1672 | #define TARGET_SCHED_FINISH rs6000_sched_finish | |
1673 | #undef TARGET_SCHED_REORDER | |
1674 | #define TARGET_SCHED_REORDER rs6000_sched_reorder | |
1675 | #undef TARGET_SCHED_REORDER2 | |
1676 | #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2 | |
1677 | ||
1678 | #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD | |
1679 | #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead | |
1680 | ||
1681 | #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD | |
1682 | #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard | |
1683 | ||
1684 | #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT | |
1685 | #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context | |
1686 | #undef TARGET_SCHED_INIT_SCHED_CONTEXT | |
1687 | #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context | |
1688 | #undef TARGET_SCHED_SET_SCHED_CONTEXT | |
1689 | #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context | |
1690 | #undef TARGET_SCHED_FREE_SCHED_CONTEXT | |
1691 | #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context | |
1692 | ||
1693 | #undef TARGET_SCHED_CAN_SPECULATE_INSN | |
1694 | #define TARGET_SCHED_CAN_SPECULATE_INSN rs6000_sched_can_speculate_insn | |
1695 | ||
1696 | #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD | |
1697 | #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load | |
1698 | #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT | |
1699 | #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \ | |
1700 | rs6000_builtin_support_vector_misalignment | |
1701 | #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE | |
1702 | #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable | |
1703 | #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST | |
1704 | #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \ | |
1705 | rs6000_builtin_vectorization_cost | |
1706 | #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE | |
1707 | #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \ | |
1708 | rs6000_preferred_simd_mode | |
1709 | #undef TARGET_VECTORIZE_INIT_COST | |
1710 | #define TARGET_VECTORIZE_INIT_COST rs6000_init_cost | |
1711 | #undef TARGET_VECTORIZE_ADD_STMT_COST | |
1712 | #define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost | |
1713 | #undef TARGET_VECTORIZE_FINISH_COST | |
1714 | #define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost | |
1715 | #undef TARGET_VECTORIZE_DESTROY_COST_DATA | |
1716 | #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data | |
1717 | ||
1718 | #undef TARGET_INIT_BUILTINS | |
1719 | #define TARGET_INIT_BUILTINS rs6000_init_builtins | |
1720 | #undef TARGET_BUILTIN_DECL | |
1721 | #define TARGET_BUILTIN_DECL rs6000_builtin_decl | |
1722 | ||
1723 | #undef TARGET_FOLD_BUILTIN | |
1724 | #define TARGET_FOLD_BUILTIN rs6000_fold_builtin | |
1725 | #undef TARGET_GIMPLE_FOLD_BUILTIN | |
1726 | #define TARGET_GIMPLE_FOLD_BUILTIN rs6000_gimple_fold_builtin | |
1727 | ||
1728 | #undef TARGET_EXPAND_BUILTIN | |
1729 | #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin | |
1730 | ||
1731 | #undef TARGET_MANGLE_TYPE | |
1732 | #define TARGET_MANGLE_TYPE rs6000_mangle_type | |
1733 | ||
1734 | #undef TARGET_INIT_LIBFUNCS | |
1735 | #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs | |
1736 | ||
1737 | #if TARGET_MACHO | |
1738 | #undef TARGET_BINDS_LOCAL_P | |
1739 | #define TARGET_BINDS_LOCAL_P darwin_binds_local_p | |
1740 | #endif | |
1741 | ||
1742 | #undef TARGET_MS_BITFIELD_LAYOUT_P | |
1743 | #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p | |
1744 | ||
1745 | #undef TARGET_ASM_OUTPUT_MI_THUNK | |
1746 | #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk | |
1747 | ||
1748 | #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK | |
1749 | #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true | |
1750 | ||
1751 | #undef TARGET_FUNCTION_OK_FOR_SIBCALL | |
1752 | #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall | |
1753 | ||
1754 | #undef TARGET_REGISTER_MOVE_COST | |
1755 | #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost | |
1756 | #undef TARGET_MEMORY_MOVE_COST | |
1757 | #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost | |
1758 | #undef TARGET_CANNOT_COPY_INSN_P | |
1759 | #define TARGET_CANNOT_COPY_INSN_P rs6000_cannot_copy_insn_p | |
1760 | #undef TARGET_RTX_COSTS | |
1761 | #define TARGET_RTX_COSTS rs6000_rtx_costs | |
1762 | #undef TARGET_ADDRESS_COST | |
1763 | #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0 | |
1764 | ||
1765 | #undef TARGET_DWARF_REGISTER_SPAN | |
1766 | #define TARGET_DWARF_REGISTER_SPAN rs6000_dwarf_register_span | |
1767 | ||
1768 | #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA | |
1769 | #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra | |
1770 | ||
1771 | #undef TARGET_MEMBER_TYPE_FORCES_BLK | |
1772 | #define TARGET_MEMBER_TYPE_FORCES_BLK rs6000_member_type_forces_blk | |
1773 | ||
1774 | #undef TARGET_PROMOTE_FUNCTION_MODE | |
1775 | #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode | |
1776 | ||
1777 | #undef TARGET_RETURN_IN_MEMORY | |
1778 | #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory | |
1779 | ||
1780 | #undef TARGET_RETURN_IN_MSB | |
1781 | #define TARGET_RETURN_IN_MSB rs6000_return_in_msb | |
1782 | ||
1783 | #undef TARGET_SETUP_INCOMING_VARARGS | |
1784 | #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs | |
1785 | ||
1786 | /* Always strict argument naming on rs6000. */ | |
1787 | #undef TARGET_STRICT_ARGUMENT_NAMING | |
1788 | #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true | |
1789 | #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED | |
1790 | #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true | |
1791 | #undef TARGET_SPLIT_COMPLEX_ARG | |
1792 | #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true | |
1793 | #undef TARGET_MUST_PASS_IN_STACK | |
1794 | #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack | |
1795 | #undef TARGET_PASS_BY_REFERENCE | |
1796 | #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference | |
1797 | #undef TARGET_ARG_PARTIAL_BYTES | |
1798 | #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes | |
1799 | #undef TARGET_FUNCTION_ARG_ADVANCE | |
1800 | #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance | |
1801 | #undef TARGET_FUNCTION_ARG | |
1802 | #define TARGET_FUNCTION_ARG rs6000_function_arg | |
76b0cbf8 RS |
1803 | #undef TARGET_FUNCTION_ARG_PADDING |
1804 | #define TARGET_FUNCTION_ARG_PADDING rs6000_function_arg_padding | |
83349046 SB |
1805 | #undef TARGET_FUNCTION_ARG_BOUNDARY |
1806 | #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary | |
1807 | ||
1808 | #undef TARGET_BUILD_BUILTIN_VA_LIST | |
1809 | #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list | |
1810 | ||
1811 | #undef TARGET_EXPAND_BUILTIN_VA_START | |
1812 | #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start | |
1813 | ||
1814 | #undef TARGET_GIMPLIFY_VA_ARG_EXPR | |
1815 | #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg | |
1816 | ||
1817 | #undef TARGET_EH_RETURN_FILTER_MODE | |
1818 | #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode | |
1819 | ||
1820 | #undef TARGET_SCALAR_MODE_SUPPORTED_P | |
1821 | #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p | |
1822 | ||
1823 | #undef TARGET_VECTOR_MODE_SUPPORTED_P | |
1824 | #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p | |
1825 | ||
1826 | #undef TARGET_FLOATN_MODE | |
1827 | #define TARGET_FLOATN_MODE rs6000_floatn_mode | |
1828 | ||
1829 | #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN | |
1830 | #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn | |
1831 | ||
83349046 SB |
1832 | #undef TARGET_MD_ASM_ADJUST |
1833 | #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust | |
1834 | ||
1835 | #undef TARGET_OPTION_OVERRIDE | |
1836 | #define TARGET_OPTION_OVERRIDE rs6000_option_override | |
1837 | ||
1838 | #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION | |
1839 | #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \ | |
1840 | rs6000_builtin_vectorized_function | |
1841 | ||
1842 | #undef TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION | |
1843 | #define TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION \ | |
1844 | rs6000_builtin_md_vectorized_function | |
1845 | ||
1846 | #undef TARGET_STACK_PROTECT_GUARD | |
1847 | #define TARGET_STACK_PROTECT_GUARD rs6000_init_stack_protect_guard | |
1848 | ||
1849 | #if !TARGET_MACHO | |
1850 | #undef TARGET_STACK_PROTECT_FAIL | |
1851 | #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail | |
1852 | #endif | |
1853 | ||
1854 | #ifdef HAVE_AS_TLS | |
1855 | #undef TARGET_ASM_OUTPUT_DWARF_DTPREL | |
1856 | #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel | |
1857 | #endif | |
1858 | ||
1859 | /* Use a 32-bit anchor range. This leads to sequences like: | |
1860 | ||
1861 | addis tmp,anchor,high | |
1862 | add dest,tmp,low | |
1863 | ||
1864 | where tmp itself acts as an anchor, and can be shared between | |
1865 | accesses to the same 64k page. */ | |
1866 | #undef TARGET_MIN_ANCHOR_OFFSET | |
1867 | #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1 | |
1868 | #undef TARGET_MAX_ANCHOR_OFFSET | |
1869 | #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff | |
1870 | #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P | |
1871 | #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p | |
1872 | #undef TARGET_USE_BLOCKS_FOR_DECL_P | |
1873 | #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p | |
1874 | ||
1875 | #undef TARGET_BUILTIN_RECIPROCAL | |
1876 | #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal | |
1877 | ||
1878 | #undef TARGET_EXPAND_TO_RTL_HOOK | |
1879 | #define TARGET_EXPAND_TO_RTL_HOOK rs6000_alloc_sdmode_stack_slot | |
1880 | ||
1881 | #undef TARGET_INSTANTIATE_DECLS | |
1882 | #define TARGET_INSTANTIATE_DECLS rs6000_instantiate_decls | |
1883 | ||
1884 | #undef TARGET_SECONDARY_RELOAD | |
1885 | #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload | |
f15643d4 RS |
1886 | #undef TARGET_SECONDARY_MEMORY_NEEDED |
1887 | #define TARGET_SECONDARY_MEMORY_NEEDED rs6000_secondary_memory_needed | |
94e23f53 RS |
1888 | #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE |
1889 | #define TARGET_SECONDARY_MEMORY_NEEDED_MODE rs6000_secondary_memory_needed_mode | |
83349046 SB |
1890 | |
1891 | #undef TARGET_LEGITIMATE_ADDRESS_P | |
1892 | #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p | |
1893 | ||
1894 | #undef TARGET_MODE_DEPENDENT_ADDRESS_P | |
1895 | #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p | |
1896 | ||
1897 | #undef TARGET_LRA_P | |
1898 | #define TARGET_LRA_P rs6000_lra_p | |
1899 | ||
1900 | #undef TARGET_COMPUTE_PRESSURE_CLASSES | |
1901 | #define TARGET_COMPUTE_PRESSURE_CLASSES rs6000_compute_pressure_classes | |
1902 | ||
1903 | #undef TARGET_CAN_ELIMINATE | |
1904 | #define TARGET_CAN_ELIMINATE rs6000_can_eliminate | |
1905 | ||
1906 | #undef TARGET_CONDITIONAL_REGISTER_USAGE | |
1907 | #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage | |
1908 | ||
1909 | #undef TARGET_SCHED_REASSOCIATION_WIDTH | |
1910 | #define TARGET_SCHED_REASSOCIATION_WIDTH rs6000_reassociation_width | |
1911 | ||
1912 | #undef TARGET_TRAMPOLINE_INIT | |
1913 | #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init | |
1914 | ||
1915 | #undef TARGET_FUNCTION_VALUE | |
1916 | #define TARGET_FUNCTION_VALUE rs6000_function_value | |
1917 | ||
1918 | #undef TARGET_OPTION_VALID_ATTRIBUTE_P | |
1919 | #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p | |
1920 | ||
1921 | #undef TARGET_OPTION_SAVE | |
1922 | #define TARGET_OPTION_SAVE rs6000_function_specific_save | |
1923 | ||
1924 | #undef TARGET_OPTION_RESTORE | |
1925 | #define TARGET_OPTION_RESTORE rs6000_function_specific_restore | |
1926 | ||
1927 | #undef TARGET_OPTION_PRINT | |
1928 | #define TARGET_OPTION_PRINT rs6000_function_specific_print | |
1929 | ||
1930 | #undef TARGET_CAN_INLINE_P | |
1931 | #define TARGET_CAN_INLINE_P rs6000_can_inline_p | |
1932 | ||
1933 | #undef TARGET_SET_CURRENT_FUNCTION | |
1934 | #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function | |
1935 | ||
1936 | #undef TARGET_LEGITIMATE_CONSTANT_P | |
1937 | #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p | |
1938 | ||
f151c9e1 RS |
1939 | #undef TARGET_VECTORIZE_VEC_PERM_CONST |
1940 | #define TARGET_VECTORIZE_VEC_PERM_CONST rs6000_vectorize_vec_perm_const | |
83349046 SB |
1941 | |
1942 | #undef TARGET_CAN_USE_DOLOOP_P | |
1943 | #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost | |
1944 | ||
1945 | #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV | |
1946 | #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv | |
1947 | ||
1948 | #undef TARGET_LIBGCC_CMP_RETURN_MODE | |
1949 | #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode | |
1950 | #undef TARGET_LIBGCC_SHIFT_COUNT_MODE | |
1951 | #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode | |
1952 | #undef TARGET_UNWIND_WORD_MODE | |
1953 | #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode | |
1954 | ||
1955 | #undef TARGET_OFFLOAD_OPTIONS | |
1956 | #define TARGET_OFFLOAD_OPTIONS rs6000_offload_options | |
1957 | ||
1958 | #undef TARGET_C_MODE_FOR_SUFFIX | |
1959 | #define TARGET_C_MODE_FOR_SUFFIX rs6000_c_mode_for_suffix | |
1960 | ||
1961 | #undef TARGET_INVALID_BINARY_OP | |
1962 | #define TARGET_INVALID_BINARY_OP rs6000_invalid_binary_op | |
1963 | ||
1964 | #undef TARGET_OPTAB_SUPPORTED_P | |
1965 | #define TARGET_OPTAB_SUPPORTED_P rs6000_optab_supported_p | |
1966 | ||
1967 | #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS | |
1968 | #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1 | |
80ec73f4 | 1969 | |
c43f4279 RS |
1970 | #undef TARGET_HARD_REGNO_NREGS |
1971 | #define TARGET_HARD_REGNO_NREGS rs6000_hard_regno_nregs_hook | |
f939c3e6 RS |
1972 | #undef TARGET_HARD_REGNO_MODE_OK |
1973 | #define TARGET_HARD_REGNO_MODE_OK rs6000_hard_regno_mode_ok | |
1974 | ||
99e1629f RS |
1975 | #undef TARGET_MODES_TIEABLE_P |
1976 | #define TARGET_MODES_TIEABLE_P rs6000_modes_tieable_p | |
1977 | ||
80ec73f4 RS |
1978 | #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED |
1979 | #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \ | |
1980 | rs6000_hard_regno_call_part_clobbered | |
e0bd6c9f RS |
1981 | |
1982 | #undef TARGET_SLOW_UNALIGNED_ACCESS | |
1983 | #define TARGET_SLOW_UNALIGNED_ACCESS rs6000_slow_unaligned_access | |
0d803030 RS |
1984 | |
1985 | #undef TARGET_CAN_CHANGE_MODE_CLASS | |
1986 | #define TARGET_CAN_CHANGE_MODE_CLASS rs6000_can_change_mode_class | |
58e17cf8 RS |
1987 | |
1988 | #undef TARGET_CONSTANT_ALIGNMENT | |
1989 | #define TARGET_CONSTANT_ALIGNMENT rs6000_constant_alignment | |
2a31c321 RS |
1990 | |
1991 | #undef TARGET_STARTING_FRAME_OFFSET | |
1992 | #define TARGET_STARTING_FRAME_OFFSET rs6000_starting_frame_offset | |
83349046 SB |
1993 | \f |
1994 | ||
1995 | /* Processor table. */ | |
1996 | struct rs6000_ptt | |
1997 | { | |
1998 | const char *const name; /* Canonical processor name. */ | |
1999 | const enum processor_type processor; /* Processor type enum value. */ | |
2000 | const HOST_WIDE_INT target_enable; /* Target flags to enable. */ | |
2001 | }; | |
2002 | ||
2003 | static struct rs6000_ptt const processor_target_table[] = | |
2004 | { | |
2005 | #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS }, | |
2006 | #include "powerpcspe-cpus.def" | |
2007 | #undef RS6000_CPU | |
2008 | }; | |
2009 | ||
2010 | /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the | |
2011 | name is invalid. */ | |
2012 | ||
2013 | static int | |
2014 | rs6000_cpu_name_lookup (const char *name) | |
2015 | { | |
2016 | size_t i; | |
2017 | ||
2018 | if (name != NULL) | |
2019 | { | |
2020 | for (i = 0; i < ARRAY_SIZE (processor_target_table); i++) | |
2021 | if (! strcmp (name, processor_target_table[i].name)) | |
2022 | return (int)i; | |
2023 | } | |
2024 | ||
2025 | return -1; | |
2026 | } | |
2027 | ||
2028 | \f | |
2029 | /* Return number of consecutive hard regs needed starting at reg REGNO | |
2030 | to hold something of mode MODE. | |
2031 | This is ordinarily the length in words of a value of mode MODE | |
2032 | but can be less for certain modes in special long registers. | |
2033 | ||
2034 | For the SPE, GPRs are 64 bits but only 32 bits are visible in | |
2035 | scalar instructions. The upper 32 bits are only available to the | |
2036 | SIMD instructions. | |
2037 | ||
2038 | POWER and PowerPC GPRs hold 32 bits worth; | |
2039 | PowerPC64 GPRs and FPRs point register holds 64 bits worth. */ | |
2040 | ||
2041 | static int | |
2042 | rs6000_hard_regno_nregs_internal (int regno, machine_mode mode) | |
2043 | { | |
2044 | unsigned HOST_WIDE_INT reg_size; | |
2045 | ||
2046 | /* 128-bit floating point usually takes 2 registers, unless it is IEEE | |
2047 | 128-bit floating point that can go in vector registers, which has VSX | |
2048 | memory addressing. */ | |
2049 | if (FP_REGNO_P (regno)) | |
2050 | reg_size = (VECTOR_MEM_VSX_P (mode) || FLOAT128_VECTOR_P (mode) | |
2051 | ? UNITS_PER_VSX_WORD | |
2052 | : UNITS_PER_FP_WORD); | |
2053 | ||
2054 | else if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode)) | |
2055 | reg_size = UNITS_PER_SPE_WORD; | |
2056 | ||
2057 | else if (ALTIVEC_REGNO_P (regno)) | |
2058 | reg_size = UNITS_PER_ALTIVEC_WORD; | |
2059 | ||
2060 | /* The value returned for SCmode in the E500 double case is 2 for | |
2061 | ABI compatibility; storing an SCmode value in a single register | |
2062 | would require function_arg and rs6000_spe_function_arg to handle | |
2063 | SCmode so as to pass the value correctly in a pair of | |
2064 | registers. */ | |
2065 | else if (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode) && mode != SCmode | |
2066 | && !DECIMAL_FLOAT_MODE_P (mode) && SPE_SIMD_REGNO_P (regno)) | |
2067 | reg_size = UNITS_PER_FP_WORD; | |
2068 | ||
2069 | else | |
2070 | reg_size = UNITS_PER_WORD; | |
2071 | ||
2072 | return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size; | |
2073 | } | |
2074 | ||
2075 | /* Value is 1 if hard register REGNO can hold a value of machine-mode | |
2076 | MODE. */ | |
2077 | static int | |
f939c3e6 | 2078 | rs6000_hard_regno_mode_ok_uncached (int regno, machine_mode mode) |
83349046 SB |
2079 | { |
2080 | int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1; | |
2081 | ||
2082 | if (COMPLEX_MODE_P (mode)) | |
2083 | mode = GET_MODE_INNER (mode); | |
2084 | ||
2085 | /* PTImode can only go in GPRs. Quad word memory operations require even/odd | |
2086 | register combinations, and use PTImode where we need to deal with quad | |
2087 | word memory operations. Don't allow quad words in the argument or frame | |
2088 | pointer registers, just registers 0..31. */ | |
2089 | if (mode == PTImode) | |
2090 | return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO) | |
2091 | && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO) | |
2092 | && ((regno & 1) == 0)); | |
2093 | ||
2094 | /* VSX registers that overlap the FPR registers are larger than for non-VSX | |
2095 | implementations. Don't allow an item to be split between a FP register | |
2096 | and an Altivec register. Allow TImode in all VSX registers if the user | |
2097 | asked for it. */ | |
2098 | if (TARGET_VSX && VSX_REGNO_P (regno) | |
2099 | && (VECTOR_MEM_VSX_P (mode) | |
2100 | || FLOAT128_VECTOR_P (mode) | |
2101 | || reg_addr[mode].scalar_in_vmx_p | |
2102 | || (TARGET_VSX_TIMODE && mode == TImode) | |
2103 | || (TARGET_VADDUQM && mode == V1TImode))) | |
2104 | { | |
2105 | if (FP_REGNO_P (regno)) | |
2106 | return FP_REGNO_P (last_regno); | |
2107 | ||
2108 | if (ALTIVEC_REGNO_P (regno)) | |
2109 | { | |
2110 | if (GET_MODE_SIZE (mode) != 16 && !reg_addr[mode].scalar_in_vmx_p) | |
2111 | return 0; | |
2112 | ||
2113 | return ALTIVEC_REGNO_P (last_regno); | |
2114 | } | |
2115 | } | |
2116 | ||
2117 | /* The GPRs can hold any mode, but values bigger than one register | |
2118 | cannot go past R31. */ | |
2119 | if (INT_REGNO_P (regno)) | |
2120 | return INT_REGNO_P (last_regno); | |
2121 | ||
2122 | /* The float registers (except for VSX vector modes) can only hold floating | |
2123 | modes and DImode. */ | |
2124 | if (FP_REGNO_P (regno)) | |
2125 | { | |
2126 | if (FLOAT128_VECTOR_P (mode)) | |
2127 | return false; | |
2128 | ||
2129 | if (SCALAR_FLOAT_MODE_P (mode) | |
2130 | && (mode != TDmode || (regno % 2) == 0) | |
2131 | && FP_REGNO_P (last_regno)) | |
2132 | return 1; | |
2133 | ||
2134 | if (GET_MODE_CLASS (mode) == MODE_INT) | |
2135 | { | |
2136 | if(GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD) | |
2137 | return 1; | |
2138 | ||
2139 | if (TARGET_VSX_SMALL_INTEGER) | |
2140 | { | |
2141 | if (mode == SImode) | |
2142 | return 1; | |
2143 | ||
2144 | if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode)) | |
2145 | return 1; | |
2146 | } | |
2147 | } | |
2148 | ||
2149 | if (PAIRED_SIMD_REGNO_P (regno) && TARGET_PAIRED_FLOAT | |
2150 | && PAIRED_VECTOR_MODE (mode)) | |
2151 | return 1; | |
2152 | ||
2153 | return 0; | |
2154 | } | |
2155 | ||
2156 | /* The CR register can only hold CC modes. */ | |
2157 | if (CR_REGNO_P (regno)) | |
2158 | return GET_MODE_CLASS (mode) == MODE_CC; | |
2159 | ||
2160 | if (CA_REGNO_P (regno)) | |
2161 | return mode == Pmode || mode == SImode; | |
2162 | ||
2163 | /* AltiVec only in AldyVec registers. */ | |
2164 | if (ALTIVEC_REGNO_P (regno)) | |
2165 | return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) | |
2166 | || mode == V1TImode); | |
2167 | ||
2168 | /* ...but GPRs can hold SIMD data on the SPE in one register. */ | |
2169 | if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode)) | |
2170 | return 1; | |
2171 | ||
2172 | /* We cannot put non-VSX TImode or PTImode anywhere except general register | |
2173 | and it must be able to fit within the register set. */ | |
2174 | ||
2175 | return GET_MODE_SIZE (mode) <= UNITS_PER_WORD; | |
2176 | } | |
2177 | ||
c43f4279 RS |
2178 | /* Implement TARGET_HARD_REGNO_NREGS. */ |
2179 | ||
2180 | static unsigned int | |
2181 | rs6000_hard_regno_nregs_hook (unsigned int regno, machine_mode mode) | |
2182 | { | |
2183 | return rs6000_hard_regno_nregs[mode][regno]; | |
2184 | } | |
2185 | ||
f939c3e6 RS |
2186 | /* Implement TARGET_HARD_REGNO_MODE_OK. */ |
2187 | ||
2188 | static bool | |
2189 | rs6000_hard_regno_mode_ok (unsigned int regno, machine_mode mode) | |
2190 | { | |
2191 | return rs6000_hard_regno_mode_ok_p[mode][regno]; | |
2192 | } | |
2193 | ||
99e1629f RS |
2194 | /* Implement TARGET_MODES_TIEABLE_P. |
2195 | ||
2196 | PTImode cannot tie with other modes because PTImode is restricted to even | |
2197 | GPR registers, and TImode can go in any GPR as well as VSX registers (PR | |
2198 | 57744). | |
2199 | ||
2200 | Altivec/VSX vector tests were moved ahead of scalar float mode, so that IEEE | |
2201 | 128-bit floating point on VSX systems ties with other vectors. */ | |
2202 | ||
2203 | static bool | |
2204 | rs6000_modes_tieable_p (machine_mode mode1, machine_mode mode2) | |
2205 | { | |
2206 | if (mode1 == PTImode) | |
2207 | return mode2 == PTImode; | |
2208 | if (mode2 == PTImode) | |
2209 | return false; | |
2210 | ||
2211 | if (ALTIVEC_OR_VSX_VECTOR_MODE (mode1)) | |
2212 | return ALTIVEC_OR_VSX_VECTOR_MODE (mode2); | |
2213 | if (ALTIVEC_OR_VSX_VECTOR_MODE (mode2)) | |
2214 | return false; | |
2215 | ||
2216 | if (SCALAR_FLOAT_MODE_P (mode1)) | |
2217 | return SCALAR_FLOAT_MODE_P (mode2); | |
2218 | if (SCALAR_FLOAT_MODE_P (mode2)) | |
2219 | return false; | |
2220 | ||
2221 | if (GET_MODE_CLASS (mode1) == MODE_CC) | |
2222 | return GET_MODE_CLASS (mode2) == MODE_CC; | |
2223 | if (GET_MODE_CLASS (mode2) == MODE_CC) | |
2224 | return false; | |
2225 | ||
2226 | if (SPE_VECTOR_MODE (mode1)) | |
2227 | return SPE_VECTOR_MODE (mode2); | |
2228 | if (SPE_VECTOR_MODE (mode2)) | |
2229 | return false; | |
2230 | ||
2231 | return true; | |
2232 | } | |
2233 | ||
80ec73f4 RS |
2234 | /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. */ |
2235 | ||
2236 | static bool | |
2237 | rs6000_hard_regno_call_part_clobbered (unsigned int regno, machine_mode mode) | |
2238 | { | |
2239 | if (TARGET_32BIT | |
2240 | && TARGET_POWERPC64 | |
2241 | && GET_MODE_SIZE (mode) > 4 | |
2242 | && INT_REGNO_P (regno)) | |
2243 | return true; | |
2244 | ||
2245 | if (TARGET_VSX | |
2246 | && FP_REGNO_P (regno) | |
2247 | && GET_MODE_SIZE (mode) > 8 | |
2248 | && !FLOAT128_2REG_P (mode)) | |
2249 | return true; | |
2250 | ||
2251 | return false; | |
2252 | } | |
2253 | ||
83349046 SB |
2254 | /* Print interesting facts about registers. */ |
2255 | static void | |
2256 | rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name) | |
2257 | { | |
2258 | int r, m; | |
2259 | ||
2260 | for (r = first_regno; r <= last_regno; ++r) | |
2261 | { | |
2262 | const char *comma = ""; | |
2263 | int len; | |
2264 | ||
2265 | if (first_regno == last_regno) | |
2266 | fprintf (stderr, "%s:\t", reg_name); | |
2267 | else | |
2268 | fprintf (stderr, "%s%d:\t", reg_name, r - first_regno); | |
2269 | ||
2270 | len = 8; | |
2271 | for (m = 0; m < NUM_MACHINE_MODES; ++m) | |
2272 | if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r]) | |
2273 | { | |
2274 | if (len > 70) | |
2275 | { | |
2276 | fprintf (stderr, ",\n\t"); | |
2277 | len = 8; | |
2278 | comma = ""; | |
2279 | } | |
2280 | ||
2281 | if (rs6000_hard_regno_nregs[m][r] > 1) | |
2282 | len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m), | |
2283 | rs6000_hard_regno_nregs[m][r]); | |
2284 | else | |
2285 | len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m)); | |
2286 | ||
2287 | comma = ", "; | |
2288 | } | |
2289 | ||
2290 | if (call_used_regs[r]) | |
2291 | { | |
2292 | if (len > 70) | |
2293 | { | |
2294 | fprintf (stderr, ",\n\t"); | |
2295 | len = 8; | |
2296 | comma = ""; | |
2297 | } | |
2298 | ||
2299 | len += fprintf (stderr, "%s%s", comma, "call-used"); | |
2300 | comma = ", "; | |
2301 | } | |
2302 | ||
2303 | if (fixed_regs[r]) | |
2304 | { | |
2305 | if (len > 70) | |
2306 | { | |
2307 | fprintf (stderr, ",\n\t"); | |
2308 | len = 8; | |
2309 | comma = ""; | |
2310 | } | |
2311 | ||
2312 | len += fprintf (stderr, "%s%s", comma, "fixed"); | |
2313 | comma = ", "; | |
2314 | } | |
2315 | ||
2316 | if (len > 70) | |
2317 | { | |
2318 | fprintf (stderr, ",\n\t"); | |
2319 | comma = ""; | |
2320 | } | |
2321 | ||
2322 | len += fprintf (stderr, "%sreg-class = %s", comma, | |
2323 | reg_class_names[(int)rs6000_regno_regclass[r]]); | |
2324 | comma = ", "; | |
2325 | ||
2326 | if (len > 70) | |
2327 | { | |
2328 | fprintf (stderr, ",\n\t"); | |
2329 | comma = ""; | |
2330 | } | |
2331 | ||
2332 | fprintf (stderr, "%sregno = %d\n", comma, r); | |
2333 | } | |
2334 | } | |
2335 | ||
2336 | static const char * | |
2337 | rs6000_debug_vector_unit (enum rs6000_vector v) | |
2338 | { | |
2339 | const char *ret; | |
2340 | ||
2341 | switch (v) | |
2342 | { | |
2343 | case VECTOR_NONE: ret = "none"; break; | |
2344 | case VECTOR_ALTIVEC: ret = "altivec"; break; | |
2345 | case VECTOR_VSX: ret = "vsx"; break; | |
2346 | case VECTOR_P8_VECTOR: ret = "p8_vector"; break; | |
2347 | case VECTOR_PAIRED: ret = "paired"; break; | |
2348 | case VECTOR_SPE: ret = "spe"; break; | |
2349 | case VECTOR_OTHER: ret = "other"; break; | |
2350 | default: ret = "unknown"; break; | |
2351 | } | |
2352 | ||
2353 | return ret; | |
2354 | } | |
2355 | ||
2356 | /* Inner function printing just the address mask for a particular reload | |
2357 | register class. */ | |
2358 | DEBUG_FUNCTION char * | |
2359 | rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces) | |
2360 | { | |
2361 | static char ret[8]; | |
2362 | char *p = ret; | |
2363 | ||
2364 | if ((mask & RELOAD_REG_VALID) != 0) | |
2365 | *p++ = 'v'; | |
2366 | else if (keep_spaces) | |
2367 | *p++ = ' '; | |
2368 | ||
2369 | if ((mask & RELOAD_REG_MULTIPLE) != 0) | |
2370 | *p++ = 'm'; | |
2371 | else if (keep_spaces) | |
2372 | *p++ = ' '; | |
2373 | ||
2374 | if ((mask & RELOAD_REG_INDEXED) != 0) | |
2375 | *p++ = 'i'; | |
2376 | else if (keep_spaces) | |
2377 | *p++ = ' '; | |
2378 | ||
2379 | if ((mask & RELOAD_REG_QUAD_OFFSET) != 0) | |
2380 | *p++ = 'O'; | |
2381 | else if ((mask & RELOAD_REG_OFFSET) != 0) | |
2382 | *p++ = 'o'; | |
2383 | else if (keep_spaces) | |
2384 | *p++ = ' '; | |
2385 | ||
2386 | if ((mask & RELOAD_REG_PRE_INCDEC) != 0) | |
2387 | *p++ = '+'; | |
2388 | else if (keep_spaces) | |
2389 | *p++ = ' '; | |
2390 | ||
2391 | if ((mask & RELOAD_REG_PRE_MODIFY) != 0) | |
2392 | *p++ = '+'; | |
2393 | else if (keep_spaces) | |
2394 | *p++ = ' '; | |
2395 | ||
2396 | if ((mask & RELOAD_REG_AND_M16) != 0) | |
2397 | *p++ = '&'; | |
2398 | else if (keep_spaces) | |
2399 | *p++ = ' '; | |
2400 | ||
2401 | *p = '\0'; | |
2402 | ||
2403 | return ret; | |
2404 | } | |
2405 | ||
2406 | /* Print the address masks in a human readble fashion. */ | |
2407 | DEBUG_FUNCTION void | |
2408 | rs6000_debug_print_mode (ssize_t m) | |
2409 | { | |
2410 | ssize_t rc; | |
2411 | int spaces = 0; | |
2412 | bool fuse_extra_p; | |
2413 | ||
2414 | fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m)); | |
2415 | for (rc = 0; rc < N_RELOAD_REG; rc++) | |
2416 | fprintf (stderr, " %s: %s", reload_reg_map[rc].name, | |
2417 | rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true)); | |
2418 | ||
2419 | if ((reg_addr[m].reload_store != CODE_FOR_nothing) | |
2420 | || (reg_addr[m].reload_load != CODE_FOR_nothing)) | |
2421 | fprintf (stderr, " Reload=%c%c", | |
2422 | (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*', | |
2423 | (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*'); | |
2424 | else | |
2425 | spaces += sizeof (" Reload=sl") - 1; | |
2426 | ||
2427 | if (reg_addr[m].scalar_in_vmx_p) | |
2428 | { | |
2429 | fprintf (stderr, "%*s Upper=y", spaces, ""); | |
2430 | spaces = 0; | |
2431 | } | |
2432 | else | |
2433 | spaces += sizeof (" Upper=y") - 1; | |
2434 | ||
2435 | fuse_extra_p = ((reg_addr[m].fusion_gpr_ld != CODE_FOR_nothing) | |
2436 | || reg_addr[m].fused_toc); | |
2437 | if (!fuse_extra_p) | |
2438 | { | |
2439 | for (rc = 0; rc < N_RELOAD_REG; rc++) | |
2440 | { | |
2441 | if (rc != RELOAD_REG_ANY) | |
2442 | { | |
2443 | if (reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing | |
2444 | || reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing | |
2445 | || reg_addr[m].fusion_addi_st[rc] != CODE_FOR_nothing | |
2446 | || reg_addr[m].fusion_addis_ld[rc] != CODE_FOR_nothing | |
2447 | || reg_addr[m].fusion_addis_st[rc] != CODE_FOR_nothing) | |
2448 | { | |
2449 | fuse_extra_p = true; | |
2450 | break; | |
2451 | } | |
2452 | } | |
2453 | } | |
2454 | } | |
2455 | ||
2456 | if (fuse_extra_p) | |
2457 | { | |
2458 | fprintf (stderr, "%*s Fuse:", spaces, ""); | |
2459 | spaces = 0; | |
2460 | ||
2461 | for (rc = 0; rc < N_RELOAD_REG; rc++) | |
2462 | { | |
2463 | if (rc != RELOAD_REG_ANY) | |
2464 | { | |
2465 | char load, store; | |
2466 | ||
2467 | if (reg_addr[m].fusion_addis_ld[rc] != CODE_FOR_nothing) | |
2468 | load = 'l'; | |
2469 | else if (reg_addr[m].fusion_addi_ld[rc] != CODE_FOR_nothing) | |
2470 | load = 'L'; | |
2471 | else | |
2472 | load = '-'; | |
2473 | ||
2474 | if (reg_addr[m].fusion_addis_st[rc] != CODE_FOR_nothing) | |
2475 | store = 's'; | |
2476 | else if (reg_addr[m].fusion_addi_st[rc] != CODE_FOR_nothing) | |
2477 | store = 'S'; | |
2478 | else | |
2479 | store = '-'; | |
2480 | ||
2481 | if (load == '-' && store == '-') | |
2482 | spaces += 5; | |
2483 | else | |
2484 | { | |
2485 | fprintf (stderr, "%*s%c=%c%c", (spaces + 1), "", | |
2486 | reload_reg_map[rc].name[0], load, store); | |
2487 | spaces = 0; | |
2488 | } | |
2489 | } | |
2490 | } | |
2491 | ||
2492 | if (reg_addr[m].fusion_gpr_ld != CODE_FOR_nothing) | |
2493 | { | |
2494 | fprintf (stderr, "%*sP8gpr", (spaces + 1), ""); | |
2495 | spaces = 0; | |
2496 | } | |
2497 | else | |
2498 | spaces += sizeof (" P8gpr") - 1; | |
2499 | ||
2500 | if (reg_addr[m].fused_toc) | |
2501 | { | |
2502 | fprintf (stderr, "%*sToc", (spaces + 1), ""); | |
2503 | spaces = 0; | |
2504 | } | |
2505 | else | |
2506 | spaces += sizeof (" Toc") - 1; | |
2507 | } | |
2508 | else | |
2509 | spaces += sizeof (" Fuse: G=ls F=ls v=ls P8gpr Toc") - 1; | |
2510 | ||
2511 | if (rs6000_vector_unit[m] != VECTOR_NONE | |
2512 | || rs6000_vector_mem[m] != VECTOR_NONE) | |
2513 | { | |
2514 | fprintf (stderr, "%*s vector: arith=%-10s mem=%s", | |
2515 | spaces, "", | |
2516 | rs6000_debug_vector_unit (rs6000_vector_unit[m]), | |
2517 | rs6000_debug_vector_unit (rs6000_vector_mem[m])); | |
2518 | } | |
2519 | ||
2520 | fputs ("\n", stderr); | |
2521 | } | |
2522 | ||
2523 | #define DEBUG_FMT_ID "%-32s= " | |
2524 | #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n" | |
2525 | #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: " | |
2526 | #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n" | |
2527 | ||
2528 | /* Print various interesting information with -mdebug=reg. */ | |
2529 | static void | |
2530 | rs6000_debug_reg_global (void) | |
2531 | { | |
2532 | static const char *const tf[2] = { "false", "true" }; | |
2533 | const char *nl = (const char *)0; | |
2534 | int m; | |
2535 | size_t m1, m2, v; | |
2536 | char costly_num[20]; | |
2537 | char nop_num[20]; | |
2538 | char flags_buffer[40]; | |
2539 | const char *costly_str; | |
2540 | const char *nop_str; | |
2541 | const char *trace_str; | |
2542 | const char *abi_str; | |
2543 | const char *cmodel_str; | |
2544 | struct cl_target_option cl_opts; | |
2545 | ||
2546 | /* Modes we want tieable information on. */ | |
2547 | static const machine_mode print_tieable_modes[] = { | |
2548 | QImode, | |
2549 | HImode, | |
2550 | SImode, | |
2551 | DImode, | |
2552 | TImode, | |
2553 | PTImode, | |
2554 | SFmode, | |
2555 | DFmode, | |
2556 | TFmode, | |
2557 | IFmode, | |
2558 | KFmode, | |
2559 | SDmode, | |
2560 | DDmode, | |
2561 | TDmode, | |
2562 | V8QImode, | |
2563 | V4HImode, | |
2564 | V2SImode, | |
2565 | V16QImode, | |
2566 | V8HImode, | |
2567 | V4SImode, | |
2568 | V2DImode, | |
2569 | V1TImode, | |
2570 | V32QImode, | |
2571 | V16HImode, | |
2572 | V8SImode, | |
2573 | V4DImode, | |
2574 | V2TImode, | |
2575 | V2SFmode, | |
2576 | V4SFmode, | |
2577 | V2DFmode, | |
2578 | V8SFmode, | |
2579 | V4DFmode, | |
2580 | CCmode, | |
2581 | CCUNSmode, | |
2582 | CCEQmode, | |
2583 | }; | |
2584 | ||
2585 | /* Virtual regs we are interested in. */ | |
2586 | const static struct { | |
2587 | int regno; /* register number. */ | |
2588 | const char *name; /* register name. */ | |
2589 | } virtual_regs[] = { | |
2590 | { STACK_POINTER_REGNUM, "stack pointer:" }, | |
2591 | { TOC_REGNUM, "toc: " }, | |
2592 | { STATIC_CHAIN_REGNUM, "static chain: " }, | |
2593 | { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " }, | |
2594 | { HARD_FRAME_POINTER_REGNUM, "hard frame: " }, | |
2595 | { ARG_POINTER_REGNUM, "arg pointer: " }, | |
2596 | { FRAME_POINTER_REGNUM, "frame pointer:" }, | |
2597 | { FIRST_PSEUDO_REGISTER, "first pseudo: " }, | |
2598 | { FIRST_VIRTUAL_REGISTER, "first virtual:" }, | |
2599 | { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" }, | |
2600 | { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " }, | |
2601 | { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" }, | |
2602 | { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" }, | |
2603 | { VIRTUAL_CFA_REGNUM, "cfa (frame): " }, | |
2604 | { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" }, | |
2605 | { LAST_VIRTUAL_REGISTER, "last virtual: " }, | |
2606 | }; | |
2607 | ||
2608 | fputs ("\nHard register information:\n", stderr); | |
2609 | rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr"); | |
2610 | rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp"); | |
2611 | rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO, | |
2612 | LAST_ALTIVEC_REGNO, | |
2613 | "vs"); | |
2614 | rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr"); | |
2615 | rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr"); | |
2616 | rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr"); | |
2617 | rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca"); | |
2618 | rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave"); | |
2619 | rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr"); | |
2620 | rs6000_debug_reg_print (SPE_ACC_REGNO, SPE_ACC_REGNO, "spe_a"); | |
2621 | rs6000_debug_reg_print (SPEFSCR_REGNO, SPEFSCR_REGNO, "spe_f"); | |
2622 | ||
2623 | fputs ("\nVirtual/stack/frame registers:\n", stderr); | |
2624 | for (v = 0; v < ARRAY_SIZE (virtual_regs); v++) | |
2625 | fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno); | |
2626 | ||
2627 | fprintf (stderr, | |
2628 | "\n" | |
2629 | "d reg_class = %s\n" | |
2630 | "f reg_class = %s\n" | |
2631 | "v reg_class = %s\n" | |
2632 | "wa reg_class = %s\n" | |
2633 | "wb reg_class = %s\n" | |
2634 | "wd reg_class = %s\n" | |
2635 | "we reg_class = %s\n" | |
2636 | "wf reg_class = %s\n" | |
2637 | "wg reg_class = %s\n" | |
2638 | "wh reg_class = %s\n" | |
2639 | "wi reg_class = %s\n" | |
2640 | "wj reg_class = %s\n" | |
2641 | "wk reg_class = %s\n" | |
2642 | "wl reg_class = %s\n" | |
2643 | "wm reg_class = %s\n" | |
2644 | "wo reg_class = %s\n" | |
2645 | "wp reg_class = %s\n" | |
2646 | "wq reg_class = %s\n" | |
2647 | "wr reg_class = %s\n" | |
2648 | "ws reg_class = %s\n" | |
2649 | "wt reg_class = %s\n" | |
2650 | "wu reg_class = %s\n" | |
2651 | "wv reg_class = %s\n" | |
2652 | "ww reg_class = %s\n" | |
2653 | "wx reg_class = %s\n" | |
2654 | "wy reg_class = %s\n" | |
2655 | "wz reg_class = %s\n" | |
2656 | "wA reg_class = %s\n" | |
2657 | "wH reg_class = %s\n" | |
2658 | "wI reg_class = %s\n" | |
2659 | "wJ reg_class = %s\n" | |
2660 | "wK reg_class = %s\n" | |
2661 | "\n", | |
2662 | reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]], | |
2663 | reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]], | |
2664 | reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]], | |
2665 | reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]], | |
2666 | reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wb]], | |
2667 | reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wd]], | |
2668 | reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_we]], | |
2669 | reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wf]], | |
2670 | reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wg]], | |
2671 | reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wh]], | |
2672 | reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wi]], | |
2673 | reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wj]], | |
2674 | reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wk]], | |
2675 | reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wl]], | |
2676 | reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wm]], | |
2677 | reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wo]], | |
2678 | reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wp]], | |
2679 | reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wq]], | |
2680 | reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]], | |
2681 | reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]], | |
2682 | reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wt]], | |
2683 | reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wu]], | |
2684 | reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wv]], | |
2685 | reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ww]], | |
2686 | reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]], | |
2687 | reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wy]], | |
2688 | reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wz]], | |
2689 | reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wA]], | |
2690 | reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wH]], | |
2691 | reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wI]], | |
2692 | reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wJ]], | |
2693 | reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wK]]); | |
2694 | ||
2695 | nl = "\n"; | |
2696 | for (m = 0; m < NUM_MACHINE_MODES; ++m) | |
2697 | rs6000_debug_print_mode (m); | |
2698 | ||
2699 | fputs ("\n", stderr); | |
2700 | ||
2701 | for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++) | |
2702 | { | |
2703 | machine_mode mode1 = print_tieable_modes[m1]; | |
2704 | bool first_time = true; | |
2705 | ||
2706 | nl = (const char *)0; | |
2707 | for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++) | |
2708 | { | |
2709 | machine_mode mode2 = print_tieable_modes[m2]; | |
99e1629f | 2710 | if (mode1 != mode2 && rs6000_modes_tieable_p (mode1, mode2)) |
83349046 SB |
2711 | { |
2712 | if (first_time) | |
2713 | { | |
2714 | fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1)); | |
2715 | nl = "\n"; | |
2716 | first_time = false; | |
2717 | } | |
2718 | ||
2719 | fprintf (stderr, " %s", GET_MODE_NAME (mode2)); | |
2720 | } | |
2721 | } | |
2722 | ||
2723 | if (!first_time) | |
2724 | fputs ("\n", stderr); | |
2725 | } | |
2726 | ||
2727 | if (nl) | |
2728 | fputs (nl, stderr); | |
2729 | ||
2730 | if (rs6000_recip_control) | |
2731 | { | |
2732 | fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control); | |
2733 | ||
2734 | for (m = 0; m < NUM_MACHINE_MODES; ++m) | |
2735 | if (rs6000_recip_bits[m]) | |
2736 | { | |
2737 | fprintf (stderr, | |
2738 | "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n", | |
2739 | GET_MODE_NAME (m), | |
2740 | (RS6000_RECIP_AUTO_RE_P (m) | |
2741 | ? "auto" | |
2742 | : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")), | |
2743 | (RS6000_RECIP_AUTO_RSQRTE_P (m) | |
2744 | ? "auto" | |
2745 | : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none"))); | |
2746 | } | |
2747 | ||
2748 | fputs ("\n", stderr); | |
2749 | } | |
2750 | ||
2751 | if (rs6000_cpu_index >= 0) | |
2752 | { | |
2753 | const char *name = processor_target_table[rs6000_cpu_index].name; | |
2754 | HOST_WIDE_INT flags | |
2755 | = processor_target_table[rs6000_cpu_index].target_enable; | |
2756 | ||
2757 | sprintf (flags_buffer, "-mcpu=%s flags", name); | |
2758 | rs6000_print_isa_options (stderr, 0, flags_buffer, flags); | |
2759 | } | |
2760 | else | |
2761 | fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>"); | |
2762 | ||
2763 | if (rs6000_tune_index >= 0) | |
2764 | { | |
2765 | const char *name = processor_target_table[rs6000_tune_index].name; | |
2766 | HOST_WIDE_INT flags | |
2767 | = processor_target_table[rs6000_tune_index].target_enable; | |
2768 | ||
2769 | sprintf (flags_buffer, "-mtune=%s flags", name); | |
2770 | rs6000_print_isa_options (stderr, 0, flags_buffer, flags); | |
2771 | } | |
2772 | else | |
2773 | fprintf (stderr, DEBUG_FMT_S, "tune", "<none>"); | |
2774 | ||
2775 | cl_target_option_save (&cl_opts, &global_options); | |
2776 | rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags", | |
2777 | rs6000_isa_flags); | |
2778 | ||
2779 | rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit", | |
2780 | rs6000_isa_flags_explicit); | |
2781 | ||
2782 | rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask", | |
2783 | rs6000_builtin_mask); | |
2784 | ||
2785 | rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT); | |
2786 | ||
2787 | fprintf (stderr, DEBUG_FMT_S, "--with-cpu default", | |
2788 | OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>"); | |
2789 | ||
2790 | switch (rs6000_sched_costly_dep) | |
2791 | { | |
2792 | case max_dep_latency: | |
2793 | costly_str = "max_dep_latency"; | |
2794 | break; | |
2795 | ||
2796 | case no_dep_costly: | |
2797 | costly_str = "no_dep_costly"; | |
2798 | break; | |
2799 | ||
2800 | case all_deps_costly: | |
2801 | costly_str = "all_deps_costly"; | |
2802 | break; | |
2803 | ||
2804 | case true_store_to_load_dep_costly: | |
2805 | costly_str = "true_store_to_load_dep_costly"; | |
2806 | break; | |
2807 | ||
2808 | case store_to_load_dep_costly: | |
2809 | costly_str = "store_to_load_dep_costly"; | |
2810 | break; | |
2811 | ||
2812 | default: | |
2813 | costly_str = costly_num; | |
2814 | sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep); | |
2815 | break; | |
2816 | } | |
2817 | ||
2818 | fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str); | |
2819 | ||
2820 | switch (rs6000_sched_insert_nops) | |
2821 | { | |
2822 | case sched_finish_regroup_exact: | |
2823 | nop_str = "sched_finish_regroup_exact"; | |
2824 | break; | |
2825 | ||
2826 | case sched_finish_pad_groups: | |
2827 | nop_str = "sched_finish_pad_groups"; | |
2828 | break; | |
2829 | ||
2830 | case sched_finish_none: | |
2831 | nop_str = "sched_finish_none"; | |
2832 | break; | |
2833 | ||
2834 | default: | |
2835 | nop_str = nop_num; | |
2836 | sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops); | |
2837 | break; | |
2838 | } | |
2839 | ||
2840 | fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str); | |
2841 | ||
2842 | switch (rs6000_sdata) | |
2843 | { | |
2844 | default: | |
2845 | case SDATA_NONE: | |
2846 | break; | |
2847 | ||
2848 | case SDATA_DATA: | |
2849 | fprintf (stderr, DEBUG_FMT_S, "sdata", "data"); | |
2850 | break; | |
2851 | ||
2852 | case SDATA_SYSV: | |
2853 | fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv"); | |
2854 | break; | |
2855 | ||
2856 | case SDATA_EABI: | |
2857 | fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi"); | |
2858 | break; | |
2859 | ||
2860 | } | |
2861 | ||
2862 | switch (rs6000_traceback) | |
2863 | { | |
2864 | case traceback_default: trace_str = "default"; break; | |
2865 | case traceback_none: trace_str = "none"; break; | |
2866 | case traceback_part: trace_str = "part"; break; | |
2867 | case traceback_full: trace_str = "full"; break; | |
2868 | default: trace_str = "unknown"; break; | |
2869 | } | |
2870 | ||
2871 | fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str); | |
2872 | ||
2873 | switch (rs6000_current_cmodel) | |
2874 | { | |
2875 | case CMODEL_SMALL: cmodel_str = "small"; break; | |
2876 | case CMODEL_MEDIUM: cmodel_str = "medium"; break; | |
2877 | case CMODEL_LARGE: cmodel_str = "large"; break; | |
2878 | default: cmodel_str = "unknown"; break; | |
2879 | } | |
2880 | ||
2881 | fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str); | |
2882 | ||
2883 | switch (rs6000_current_abi) | |
2884 | { | |
2885 | case ABI_NONE: abi_str = "none"; break; | |
2886 | case ABI_AIX: abi_str = "aix"; break; | |
2887 | case ABI_ELFv2: abi_str = "ELFv2"; break; | |
2888 | case ABI_V4: abi_str = "V4"; break; | |
2889 | case ABI_DARWIN: abi_str = "darwin"; break; | |
2890 | default: abi_str = "unknown"; break; | |
2891 | } | |
2892 | ||
2893 | fprintf (stderr, DEBUG_FMT_S, "abi", abi_str); | |
2894 | ||
2895 | if (rs6000_altivec_abi) | |
2896 | fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true"); | |
2897 | ||
2898 | if (rs6000_spe_abi) | |
2899 | fprintf (stderr, DEBUG_FMT_S, "spe_abi", "true"); | |
2900 | ||
2901 | if (rs6000_darwin64_abi) | |
2902 | fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true"); | |
2903 | ||
2904 | if (rs6000_float_gprs) | |
2905 | fprintf (stderr, DEBUG_FMT_S, "float_gprs", "true"); | |
2906 | ||
2907 | fprintf (stderr, DEBUG_FMT_S, "fprs", | |
2908 | (TARGET_FPRS ? "true" : "false")); | |
2909 | ||
2910 | fprintf (stderr, DEBUG_FMT_S, "single_float", | |
2911 | (TARGET_SINGLE_FLOAT ? "true" : "false")); | |
2912 | ||
2913 | fprintf (stderr, DEBUG_FMT_S, "double_float", | |
2914 | (TARGET_DOUBLE_FLOAT ? "true" : "false")); | |
2915 | ||
2916 | fprintf (stderr, DEBUG_FMT_S, "soft_float", | |
2917 | (TARGET_SOFT_FLOAT ? "true" : "false")); | |
2918 | ||
2919 | fprintf (stderr, DEBUG_FMT_S, "e500_single", | |
2920 | (TARGET_E500_SINGLE ? "true" : "false")); | |
2921 | ||
2922 | fprintf (stderr, DEBUG_FMT_S, "e500_double", | |
2923 | (TARGET_E500_DOUBLE ? "true" : "false")); | |
2924 | ||
2925 | if (TARGET_LINK_STACK) | |
2926 | fprintf (stderr, DEBUG_FMT_S, "link_stack", "true"); | |
2927 | ||
2928 | fprintf (stderr, DEBUG_FMT_S, "lra", TARGET_LRA ? "true" : "false"); | |
2929 | ||
2930 | if (TARGET_P8_FUSION) | |
2931 | { | |
2932 | char options[80]; | |
2933 | ||
2934 | strcpy (options, (TARGET_P9_FUSION) ? "power9" : "power8"); | |
2935 | if (TARGET_TOC_FUSION) | |
2936 | strcat (options, ", toc"); | |
2937 | ||
2938 | if (TARGET_P8_FUSION_SIGN) | |
2939 | strcat (options, ", sign"); | |
2940 | ||
2941 | fprintf (stderr, DEBUG_FMT_S, "fusion", options); | |
2942 | } | |
2943 | ||
2944 | fprintf (stderr, DEBUG_FMT_S, "plt-format", | |
2945 | TARGET_SECURE_PLT ? "secure" : "bss"); | |
2946 | fprintf (stderr, DEBUG_FMT_S, "struct-return", | |
2947 | aix_struct_return ? "aix" : "sysv"); | |
2948 | fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]); | |
2949 | fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]); | |
2950 | fprintf (stderr, DEBUG_FMT_S, "align_branch", | |
2951 | tf[!!rs6000_align_branch_targets]); | |
2952 | fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size); | |
2953 | fprintf (stderr, DEBUG_FMT_D, "long_double_size", | |
2954 | rs6000_long_double_type_size); | |
2955 | fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority", | |
2956 | (int)rs6000_sched_restricted_insns_priority); | |
2957 | fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins", | |
2958 | (int)END_BUILTINS); | |
2959 | fprintf (stderr, DEBUG_FMT_D, "Number of rs6000 builtins", | |
2960 | (int)RS6000_BUILTIN_COUNT); | |
2961 | ||
2962 | fprintf (stderr, DEBUG_FMT_D, "Enable float128 on VSX", | |
2963 | (int)TARGET_FLOAT128_ENABLE_TYPE); | |
2964 | ||
2965 | if (TARGET_VSX) | |
2966 | fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element", | |
2967 | (int)VECTOR_ELEMENT_SCALAR_64BIT); | |
2968 | ||
2969 | if (TARGET_DIRECT_MOVE_128) | |
2970 | fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit mfvsrld element", | |
2971 | (int)VECTOR_ELEMENT_MFVSRLD_64BIT); | |
2972 | } | |
2973 | ||
2974 | \f | |
2975 | /* Update the addr mask bits in reg_addr to help secondary reload and go if | |
2976 | legitimate address support to figure out the appropriate addressing to | |
2977 | use. */ | |
2978 | ||
2979 | static void | |
2980 | rs6000_setup_reg_addr_masks (void) | |
2981 | { | |
2982 | ssize_t rc, reg, m, nregs; | |
2983 | addr_mask_type any_addr_mask, addr_mask; | |
2984 | ||
2985 | for (m = 0; m < NUM_MACHINE_MODES; ++m) | |
2986 | { | |
2987 | machine_mode m2 = (machine_mode) m; | |
2988 | bool complex_p = false; | |
2989 | bool small_int_p = (m2 == QImode || m2 == HImode || m2 == SImode); | |
2990 | size_t msize; | |
2991 | ||
2992 | if (COMPLEX_MODE_P (m2)) | |
2993 | { | |
2994 | complex_p = true; | |
2995 | m2 = GET_MODE_INNER (m2); | |
2996 | } | |
2997 | ||
2998 | msize = GET_MODE_SIZE (m2); | |
2999 | ||
3000 | /* SDmode is special in that we want to access it only via REG+REG | |
3001 | addressing on power7 and above, since we want to use the LFIWZX and | |
3002 | STFIWZX instructions to load it. */ | |
3003 | bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK); | |
3004 | ||
3005 | any_addr_mask = 0; | |
3006 | for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++) | |
3007 | { | |
3008 | addr_mask = 0; | |
3009 | reg = reload_reg_map[rc].reg; | |
3010 | ||
3011 | /* Can mode values go in the GPR/FPR/Altivec registers? */ | |
3012 | if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg]) | |
3013 | { | |
3014 | bool small_int_vsx_p = (small_int_p | |
3015 | && (rc == RELOAD_REG_FPR | |
3016 | || rc == RELOAD_REG_VMX)); | |
3017 | ||
3018 | nregs = rs6000_hard_regno_nregs[m][reg]; | |
3019 | addr_mask |= RELOAD_REG_VALID; | |
3020 | ||
3021 | /* Indicate if the mode takes more than 1 physical register. If | |
3022 | it takes a single register, indicate it can do REG+REG | |
3023 | addressing. Small integers in VSX registers can only do | |
3024 | REG+REG addressing. */ | |
3025 | if (small_int_vsx_p) | |
3026 | addr_mask |= RELOAD_REG_INDEXED; | |
3027 | else if (nregs > 1 || m == BLKmode || complex_p) | |
3028 | addr_mask |= RELOAD_REG_MULTIPLE; | |
3029 | else | |
3030 | addr_mask |= RELOAD_REG_INDEXED; | |
3031 | ||
3032 | /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY | |
3033 | addressing. Restrict addressing on SPE for 64-bit types | |
3034 | because of the SUBREG hackery used to address 64-bit floats in | |
3035 | '32-bit' GPRs. If we allow scalars into Altivec registers, | |
3036 | don't allow PRE_INC, PRE_DEC, or PRE_MODIFY. */ | |
3037 | ||
3038 | if (TARGET_UPDATE | |
3039 | && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR) | |
3040 | && msize <= 8 | |
3041 | && !VECTOR_MODE_P (m2) | |
3042 | && !FLOAT128_VECTOR_P (m2) | |
3043 | && !complex_p | |
3044 | && !small_int_vsx_p | |
3045 | && (m2 != DFmode || !TARGET_UPPER_REGS_DF) | |
3046 | && (m2 != SFmode || !TARGET_UPPER_REGS_SF) | |
3047 | && !(TARGET_E500_DOUBLE && msize == 8)) | |
3048 | { | |
3049 | addr_mask |= RELOAD_REG_PRE_INCDEC; | |
3050 | ||
3051 | /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that | |
3052 | we don't allow PRE_MODIFY for some multi-register | |
3053 | operations. */ | |
3054 | switch (m) | |
3055 | { | |
3056 | default: | |
3057 | addr_mask |= RELOAD_REG_PRE_MODIFY; | |
3058 | break; | |
3059 | ||
4e10a5a7 | 3060 | case E_DImode: |
83349046 SB |
3061 | if (TARGET_POWERPC64) |
3062 | addr_mask |= RELOAD_REG_PRE_MODIFY; | |
3063 | break; | |
3064 | ||
4e10a5a7 RS |
3065 | case E_DFmode: |
3066 | case E_DDmode: | |
83349046 SB |
3067 | if (TARGET_DF_INSN) |
3068 | addr_mask |= RELOAD_REG_PRE_MODIFY; | |
3069 | break; | |
3070 | } | |
3071 | } | |
3072 | } | |
3073 | ||
3074 | /* GPR and FPR registers can do REG+OFFSET addressing, except | |
3075 | possibly for SDmode. ISA 3.0 (i.e. power9) adds D-form addressing | |
3076 | for 64-bit scalars and 32-bit SFmode to altivec registers. */ | |
3077 | if ((addr_mask != 0) && !indexed_only_p | |
3078 | && msize <= 8 | |
3079 | && (rc == RELOAD_REG_GPR | |
3080 | || ((msize == 8 || m2 == SFmode) | |
3081 | && (rc == RELOAD_REG_FPR | |
3082 | || (rc == RELOAD_REG_VMX | |
3083 | && TARGET_P9_DFORM_SCALAR))))) | |
3084 | addr_mask |= RELOAD_REG_OFFSET; | |
3085 | ||
3086 | /* VSX registers can do REG+OFFSET addresssing if ISA 3.0 | |
3087 | instructions are enabled. The offset for 128-bit VSX registers is | |
3088 | only 12-bits. While GPRs can handle the full offset range, VSX | |
3089 | registers can only handle the restricted range. */ | |
3090 | else if ((addr_mask != 0) && !indexed_only_p | |
3091 | && msize == 16 && TARGET_P9_DFORM_VECTOR | |
3092 | && (ALTIVEC_OR_VSX_VECTOR_MODE (m2) | |
3093 | || (m2 == TImode && TARGET_VSX_TIMODE))) | |
3094 | { | |
3095 | addr_mask |= RELOAD_REG_OFFSET; | |
3096 | if (rc == RELOAD_REG_FPR || rc == RELOAD_REG_VMX) | |
3097 | addr_mask |= RELOAD_REG_QUAD_OFFSET; | |
3098 | } | |
3099 | ||
3100 | /* VMX registers can do (REG & -16) and ((REG+REG) & -16) | |
3101 | addressing on 128-bit types. */ | |
3102 | if (rc == RELOAD_REG_VMX && msize == 16 | |
3103 | && (addr_mask & RELOAD_REG_VALID) != 0) | |
3104 | addr_mask |= RELOAD_REG_AND_M16; | |
3105 | ||
3106 | reg_addr[m].addr_mask[rc] = addr_mask; | |
3107 | any_addr_mask |= addr_mask; | |
3108 | } | |
3109 | ||
3110 | reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask; | |
3111 | } | |
3112 | } | |
3113 | ||
3114 | \f | |
3115 | /* Initialize the various global tables that are based on register size. */ | |
3116 | static void | |
3117 | rs6000_init_hard_regno_mode_ok (bool global_init_p) | |
3118 | { | |
3119 | ssize_t r, m, c; | |
3120 | int align64; | |
3121 | int align32; | |
3122 | ||
3123 | /* Precalculate REGNO_REG_CLASS. */ | |
3124 | rs6000_regno_regclass[0] = GENERAL_REGS; | |
3125 | for (r = 1; r < 32; ++r) | |
3126 | rs6000_regno_regclass[r] = BASE_REGS; | |
3127 | ||
3128 | for (r = 32; r < 64; ++r) | |
3129 | rs6000_regno_regclass[r] = FLOAT_REGS; | |
3130 | ||
3131 | for (r = 64; r < FIRST_PSEUDO_REGISTER; ++r) | |
3132 | rs6000_regno_regclass[r] = NO_REGS; | |
3133 | ||
3134 | for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r) | |
3135 | rs6000_regno_regclass[r] = ALTIVEC_REGS; | |
3136 | ||
3137 | rs6000_regno_regclass[CR0_REGNO] = CR0_REGS; | |
3138 | for (r = CR1_REGNO; r <= CR7_REGNO; ++r) | |
3139 | rs6000_regno_regclass[r] = CR_REGS; | |
3140 | ||
3141 | rs6000_regno_regclass[LR_REGNO] = LINK_REGS; | |
3142 | rs6000_regno_regclass[CTR_REGNO] = CTR_REGS; | |
3143 | rs6000_regno_regclass[CA_REGNO] = NO_REGS; | |
3144 | rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS; | |
3145 | rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS; | |
3146 | rs6000_regno_regclass[SPE_ACC_REGNO] = SPE_ACC_REGS; | |
3147 | rs6000_regno_regclass[SPEFSCR_REGNO] = SPEFSCR_REGS; | |
3148 | rs6000_regno_regclass[TFHAR_REGNO] = SPR_REGS; | |
3149 | rs6000_regno_regclass[TFIAR_REGNO] = SPR_REGS; | |
3150 | rs6000_regno_regclass[TEXASR_REGNO] = SPR_REGS; | |
3151 | rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS; | |
3152 | rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS; | |
3153 | ||
3154 | /* Precalculate register class to simpler reload register class. We don't | |
3155 | need all of the register classes that are combinations of different | |
3156 | classes, just the simple ones that have constraint letters. */ | |
3157 | for (c = 0; c < N_REG_CLASSES; c++) | |
3158 | reg_class_to_reg_type[c] = NO_REG_TYPE; | |
3159 | ||
3160 | reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE; | |
3161 | reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE; | |
3162 | reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE; | |
3163 | reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE; | |
3164 | reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE; | |
3165 | reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE; | |
3166 | reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE; | |
3167 | reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE; | |
3168 | reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE; | |
3169 | reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE; | |
3170 | reg_class_to_reg_type[(int)SPE_ACC_REGS] = SPE_ACC_TYPE; | |
3171 | reg_class_to_reg_type[(int)SPEFSCR_REGS] = SPEFSCR_REG_TYPE; | |
3172 | ||
3173 | if (TARGET_VSX) | |
3174 | { | |
3175 | reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE; | |
3176 | reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE; | |
3177 | } | |
3178 | else | |
3179 | { | |
3180 | reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE; | |
3181 | reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE; | |
3182 | } | |
3183 | ||
3184 | /* Precalculate the valid memory formats as well as the vector information, | |
3185 | this must be set up before the rs6000_hard_regno_nregs_internal calls | |
3186 | below. */ | |
3187 | gcc_assert ((int)VECTOR_NONE == 0); | |
3188 | memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit)); | |
3189 | memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_unit)); | |
3190 | ||
3191 | gcc_assert ((int)CODE_FOR_nothing == 0); | |
3192 | memset ((void *) ®_addr[0], '\0', sizeof (reg_addr)); | |
3193 | ||
3194 | gcc_assert ((int)NO_REGS == 0); | |
3195 | memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints)); | |
3196 | ||
3197 | /* The VSX hardware allows native alignment for vectors, but control whether the compiler | |
3198 | believes it can use native alignment or still uses 128-bit alignment. */ | |
3199 | if (TARGET_VSX && !TARGET_VSX_ALIGN_128) | |
3200 | { | |
3201 | align64 = 64; | |
3202 | align32 = 32; | |
3203 | } | |
3204 | else | |
3205 | { | |
3206 | align64 = 128; | |
3207 | align32 = 128; | |
3208 | } | |
3209 | ||
3210 | /* KF mode (IEEE 128-bit in VSX registers). We do not have arithmetic, so | |
3211 | only set the memory modes. Include TFmode if -mabi=ieeelongdouble. */ | |
3212 | if (TARGET_FLOAT128_TYPE) | |
3213 | { | |
3214 | rs6000_vector_mem[KFmode] = VECTOR_VSX; | |
3215 | rs6000_vector_align[KFmode] = 128; | |
3216 | ||
3217 | if (FLOAT128_IEEE_P (TFmode)) | |
3218 | { | |
3219 | rs6000_vector_mem[TFmode] = VECTOR_VSX; | |
3220 | rs6000_vector_align[TFmode] = 128; | |
3221 | } | |
3222 | } | |
3223 | ||
3224 | /* V2DF mode, VSX only. */ | |
3225 | if (TARGET_VSX) | |
3226 | { | |
3227 | rs6000_vector_unit[V2DFmode] = VECTOR_VSX; | |
3228 | rs6000_vector_mem[V2DFmode] = VECTOR_VSX; | |
3229 | rs6000_vector_align[V2DFmode] = align64; | |
3230 | } | |
3231 | ||
3232 | /* V4SF mode, either VSX or Altivec. */ | |
3233 | if (TARGET_VSX) | |
3234 | { | |
3235 | rs6000_vector_unit[V4SFmode] = VECTOR_VSX; | |
3236 | rs6000_vector_mem[V4SFmode] = VECTOR_VSX; | |
3237 | rs6000_vector_align[V4SFmode] = align32; | |
3238 | } | |
3239 | else if (TARGET_ALTIVEC) | |
3240 | { | |
3241 | rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC; | |
3242 | rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC; | |
3243 | rs6000_vector_align[V4SFmode] = align32; | |
3244 | } | |
3245 | ||
3246 | /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads | |
3247 | and stores. */ | |
3248 | if (TARGET_ALTIVEC) | |
3249 | { | |
3250 | rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC; | |
3251 | rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC; | |
3252 | rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC; | |
3253 | rs6000_vector_align[V4SImode] = align32; | |
3254 | rs6000_vector_align[V8HImode] = align32; | |
3255 | rs6000_vector_align[V16QImode] = align32; | |
3256 | ||
3257 | if (TARGET_VSX) | |
3258 | { | |
3259 | rs6000_vector_mem[V4SImode] = VECTOR_VSX; | |
3260 | rs6000_vector_mem[V8HImode] = VECTOR_VSX; | |
3261 | rs6000_vector_mem[V16QImode] = VECTOR_VSX; | |
3262 | } | |
3263 | else | |
3264 | { | |
3265 | rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC; | |
3266 | rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC; | |
3267 | rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC; | |
3268 | } | |
3269 | } | |
3270 | ||
3271 | /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to | |
3272 | do insert/splat/extract. Altivec doesn't have 64-bit integer support. */ | |
3273 | if (TARGET_VSX) | |
3274 | { | |
3275 | rs6000_vector_mem[V2DImode] = VECTOR_VSX; | |
3276 | rs6000_vector_unit[V2DImode] | |
3277 | = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE; | |
3278 | rs6000_vector_align[V2DImode] = align64; | |
3279 | ||
3280 | rs6000_vector_mem[V1TImode] = VECTOR_VSX; | |
3281 | rs6000_vector_unit[V1TImode] | |
3282 | = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE; | |
3283 | rs6000_vector_align[V1TImode] = 128; | |
3284 | } | |
3285 | ||
3286 | /* DFmode, see if we want to use the VSX unit. Memory is handled | |
3287 | differently, so don't set rs6000_vector_mem. */ | |
3288 | if (TARGET_VSX && TARGET_VSX_SCALAR_DOUBLE) | |
3289 | { | |
3290 | rs6000_vector_unit[DFmode] = VECTOR_VSX; | |
3291 | rs6000_vector_align[DFmode] = 64; | |
3292 | } | |
3293 | ||
3294 | /* SFmode, see if we want to use the VSX unit. */ | |
3295 | if (TARGET_P8_VECTOR && TARGET_VSX_SCALAR_FLOAT) | |
3296 | { | |
3297 | rs6000_vector_unit[SFmode] = VECTOR_VSX; | |
3298 | rs6000_vector_align[SFmode] = 32; | |
3299 | } | |
3300 | ||
3301 | /* Allow TImode in VSX register and set the VSX memory macros. */ | |
3302 | if (TARGET_VSX && TARGET_VSX_TIMODE) | |
3303 | { | |
3304 | rs6000_vector_mem[TImode] = VECTOR_VSX; | |
3305 | rs6000_vector_align[TImode] = align64; | |
3306 | } | |
3307 | ||
3308 | /* TODO add SPE and paired floating point vector support. */ | |
3309 | ||
3310 | /* Register class constraints for the constraints that depend on compile | |
3311 | switches. When the VSX code was added, different constraints were added | |
3312 | based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all | |
3313 | of the VSX registers are used. The register classes for scalar floating | |
3314 | point types is set, based on whether we allow that type into the upper | |
3315 | (Altivec) registers. GCC has register classes to target the Altivec | |
3316 | registers for load/store operations, to select using a VSX memory | |
3317 | operation instead of the traditional floating point operation. The | |
3318 | constraints are: | |
3319 | ||
3320 | d - Register class to use with traditional DFmode instructions. | |
3321 | f - Register class to use with traditional SFmode instructions. | |
3322 | v - Altivec register. | |
3323 | wa - Any VSX register. | |
3324 | wc - Reserved to represent individual CR bits (used in LLVM). | |
3325 | wd - Preferred register class for V2DFmode. | |
3326 | wf - Preferred register class for V4SFmode. | |
3327 | wg - Float register for power6x move insns. | |
3328 | wh - FP register for direct move instructions. | |
3329 | wi - FP or VSX register to hold 64-bit integers for VSX insns. | |
3330 | wj - FP or VSX register to hold 64-bit integers for direct moves. | |
3331 | wk - FP or VSX register to hold 64-bit doubles for direct moves. | |
3332 | wl - Float register if we can do 32-bit signed int loads. | |
3333 | wm - VSX register for ISA 2.07 direct move operations. | |
3334 | wn - always NO_REGS. | |
3335 | wr - GPR if 64-bit mode is permitted. | |
3336 | ws - Register class to do ISA 2.06 DF operations. | |
3337 | wt - VSX register for TImode in VSX registers. | |
3338 | wu - Altivec register for ISA 2.07 VSX SF/SI load/stores. | |
3339 | wv - Altivec register for ISA 2.06 VSX DF/DI load/stores. | |
3340 | ww - Register class to do SF conversions in with VSX operations. | |
3341 | wx - Float register if we can do 32-bit int stores. | |
3342 | wy - Register class to do ISA 2.07 SF operations. | |
3343 | wz - Float register if we can do 32-bit unsigned int loads. | |
3344 | wH - Altivec register if SImode is allowed in VSX registers. | |
3345 | wI - VSX register if SImode is allowed in VSX registers. | |
3346 | wJ - VSX register if QImode/HImode are allowed in VSX registers. | |
3347 | wK - Altivec register if QImode/HImode are allowed in VSX registers. */ | |
3348 | ||
3349 | if (TARGET_HARD_FLOAT && TARGET_FPRS) | |
3350 | rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS; /* SFmode */ | |
3351 | ||
3352 | if (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT) | |
3353 | rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS; /* DFmode */ | |
3354 | ||
3355 | if (TARGET_VSX) | |
3356 | { | |
3357 | rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS; | |
3358 | rs6000_constraints[RS6000_CONSTRAINT_wd] = VSX_REGS; /* V2DFmode */ | |
3359 | rs6000_constraints[RS6000_CONSTRAINT_wf] = VSX_REGS; /* V4SFmode */ | |
3360 | ||
3361 | if (TARGET_VSX_TIMODE) | |
3362 | rs6000_constraints[RS6000_CONSTRAINT_wt] = VSX_REGS; /* TImode */ | |
3363 | ||
3364 | if (TARGET_UPPER_REGS_DF) /* DFmode */ | |
3365 | { | |
3366 | rs6000_constraints[RS6000_CONSTRAINT_ws] = VSX_REGS; | |
3367 | rs6000_constraints[RS6000_CONSTRAINT_wv] = ALTIVEC_REGS; | |
3368 | } | |
3369 | else | |
3370 | rs6000_constraints[RS6000_CONSTRAINT_ws] = FLOAT_REGS; | |
3371 | ||
3372 | if (TARGET_UPPER_REGS_DI) /* DImode */ | |
3373 | rs6000_constraints[RS6000_CONSTRAINT_wi] = VSX_REGS; | |
3374 | else | |
3375 | rs6000_constraints[RS6000_CONSTRAINT_wi] = FLOAT_REGS; | |
3376 | } | |
3377 | ||
3378 | /* Add conditional constraints based on various options, to allow us to | |
3379 | collapse multiple insn patterns. */ | |
3380 | if (TARGET_ALTIVEC) | |
3381 | rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS; | |
3382 | ||
3383 | if (TARGET_MFPGPR) /* DFmode */ | |
3384 | rs6000_constraints[RS6000_CONSTRAINT_wg] = FLOAT_REGS; | |
3385 | ||
3386 | if (TARGET_LFIWAX) | |
3387 | rs6000_constraints[RS6000_CONSTRAINT_wl] = FLOAT_REGS; /* DImode */ | |
3388 | ||
3389 | if (TARGET_DIRECT_MOVE) | |
3390 | { | |
3391 | rs6000_constraints[RS6000_CONSTRAINT_wh] = FLOAT_REGS; | |
3392 | rs6000_constraints[RS6000_CONSTRAINT_wj] /* DImode */ | |
3393 | = rs6000_constraints[RS6000_CONSTRAINT_wi]; | |
3394 | rs6000_constraints[RS6000_CONSTRAINT_wk] /* DFmode */ | |
3395 | = rs6000_constraints[RS6000_CONSTRAINT_ws]; | |
3396 | rs6000_constraints[RS6000_CONSTRAINT_wm] = VSX_REGS; | |
3397 | } | |
3398 | ||
3399 | if (TARGET_POWERPC64) | |
3400 | { | |
3401 | rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS; | |
3402 | rs6000_constraints[RS6000_CONSTRAINT_wA] = BASE_REGS; | |
3403 | } | |
3404 | ||
3405 | if (TARGET_P8_VECTOR && TARGET_UPPER_REGS_SF) /* SFmode */ | |
3406 | { | |
3407 | rs6000_constraints[RS6000_CONSTRAINT_wu] = ALTIVEC_REGS; | |
3408 | rs6000_constraints[RS6000_CONSTRAINT_wy] = VSX_REGS; | |
3409 | rs6000_constraints[RS6000_CONSTRAINT_ww] = VSX_REGS; | |
3410 | } | |
3411 | else if (TARGET_P8_VECTOR) | |
3412 | { | |
3413 | rs6000_constraints[RS6000_CONSTRAINT_wy] = FLOAT_REGS; | |
3414 | rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS; | |
3415 | } | |
3416 | else if (TARGET_VSX) | |
3417 | rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS; | |
3418 | ||
3419 | if (TARGET_STFIWX) | |
3420 | rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */ | |
3421 | ||
3422 | if (TARGET_LFIWZX) | |
3423 | rs6000_constraints[RS6000_CONSTRAINT_wz] = FLOAT_REGS; /* DImode */ | |
3424 | ||
3425 | if (TARGET_FLOAT128_TYPE) | |
3426 | { | |
3427 | rs6000_constraints[RS6000_CONSTRAINT_wq] = VSX_REGS; /* KFmode */ | |
3428 | if (FLOAT128_IEEE_P (TFmode)) | |
3429 | rs6000_constraints[RS6000_CONSTRAINT_wp] = VSX_REGS; /* TFmode */ | |
3430 | } | |
3431 | ||
3432 | /* Support for new D-form instructions. */ | |
3433 | if (TARGET_P9_DFORM_SCALAR) | |
3434 | rs6000_constraints[RS6000_CONSTRAINT_wb] = ALTIVEC_REGS; | |
3435 | ||
3436 | /* Support for ISA 3.0 (power9) vectors. */ | |
3437 | if (TARGET_P9_VECTOR) | |
3438 | rs6000_constraints[RS6000_CONSTRAINT_wo] = VSX_REGS; | |
3439 | ||
3440 | /* Support for new direct moves (ISA 3.0 + 64bit). */ | |
3441 | if (TARGET_DIRECT_MOVE_128) | |
3442 | rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS; | |
3443 | ||
3444 | /* Support small integers in VSX registers. */ | |
3445 | if (TARGET_VSX_SMALL_INTEGER) | |
3446 | { | |
3447 | rs6000_constraints[RS6000_CONSTRAINT_wH] = ALTIVEC_REGS; | |
3448 | rs6000_constraints[RS6000_CONSTRAINT_wI] = FLOAT_REGS; | |
3449 | if (TARGET_P9_VECTOR) | |
3450 | { | |
3451 | rs6000_constraints[RS6000_CONSTRAINT_wJ] = FLOAT_REGS; | |
3452 | rs6000_constraints[RS6000_CONSTRAINT_wK] = ALTIVEC_REGS; | |
3453 | } | |
3454 | } | |
3455 | ||
3456 | /* Set up the reload helper and direct move functions. */ | |
3457 | if (TARGET_VSX || TARGET_ALTIVEC) | |
3458 | { | |
3459 | if (TARGET_64BIT) | |
3460 | { | |
3461 | reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store; | |
3462 | reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load; | |
3463 | reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store; | |
3464 | reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load; | |
3465 | reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store; | |
3466 | reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load; | |
3467 | reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store; | |
3468 | reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load; | |
3469 | reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store; | |
3470 | reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load; | |
3471 | reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store; | |
3472 | reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load; | |
3473 | reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store; | |
3474 | reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load; | |
3475 | reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store; | |
3476 | reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load; | |
3477 | reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store; | |
3478 | reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load; | |
3479 | reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store; | |
3480 | reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load; | |
3481 | ||
3482 | if (FLOAT128_VECTOR_P (KFmode)) | |
3483 | { | |
3484 | reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_di_store; | |
3485 | reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_di_load; | |
3486 | } | |
3487 | ||
3488 | if (FLOAT128_VECTOR_P (TFmode)) | |
3489 | { | |
3490 | reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_di_store; | |
3491 | reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_di_load; | |
3492 | } | |
3493 | ||
3494 | /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are | |
3495 | available. */ | |
3496 | if (TARGET_NO_SDMODE_STACK) | |
3497 | { | |
3498 | reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store; | |
3499 | reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load; | |
3500 | } | |
3501 | ||
3502 | if (TARGET_VSX_TIMODE) | |
3503 | { | |
3504 | reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store; | |
3505 | reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load; | |
3506 | } | |
3507 | ||
3508 | if (TARGET_DIRECT_MOVE && !TARGET_DIRECT_MOVE_128) | |
3509 | { | |
3510 | reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti; | |
3511 | reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti; | |
3512 | reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df; | |
3513 | reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di; | |
3514 | reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf; | |
3515 | reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si; | |
3516 | reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi; | |
3517 | reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi; | |
3518 | reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf; | |
3519 | ||
3520 | reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti; | |
3521 | reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti; | |
3522 | reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df; | |
3523 | reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di; | |
3524 | reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf; | |
3525 | reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si; | |
3526 | reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi; | |
3527 | reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi; | |
3528 | reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf; | |
3529 | ||
3530 | if (FLOAT128_VECTOR_P (KFmode)) | |
3531 | { | |
3532 | reg_addr[KFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxkf; | |
3533 | reg_addr[KFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprkf; | |
3534 | } | |
3535 | ||
3536 | if (FLOAT128_VECTOR_P (TFmode)) | |
3537 | { | |
3538 | reg_addr[TFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxtf; | |
3539 | reg_addr[TFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprtf; | |
3540 | } | |
3541 | } | |
3542 | } | |
3543 | else | |
3544 | { | |
3545 | reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store; | |
3546 | reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load; | |
3547 | reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store; | |
3548 | reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load; | |
3549 | reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store; | |
3550 | reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load; | |
3551 | reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store; | |
3552 | reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load; | |
3553 | reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store; | |
3554 | reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load; | |
3555 | reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store; | |
3556 | reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load; | |
3557 | reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store; | |
3558 | reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load; | |
3559 | reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store; | |
3560 | reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load; | |
3561 | reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store; | |
3562 | reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load; | |
3563 | reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store; | |
3564 | reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load; | |
3565 | ||
3566 | if (FLOAT128_VECTOR_P (KFmode)) | |
3567 | { | |
3568 | reg_addr[KFmode].reload_store = CODE_FOR_reload_kf_si_store; | |
3569 | reg_addr[KFmode].reload_load = CODE_FOR_reload_kf_si_load; | |
3570 | } | |
3571 | ||
3572 | if (FLOAT128_IEEE_P (TFmode)) | |
3573 | { | |
3574 | reg_addr[TFmode].reload_store = CODE_FOR_reload_tf_si_store; | |
3575 | reg_addr[TFmode].reload_load = CODE_FOR_reload_tf_si_load; | |
3576 | } | |
3577 | ||
3578 | /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are | |
3579 | available. */ | |
3580 | if (TARGET_NO_SDMODE_STACK) | |
3581 | { | |
3582 | reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store; | |
3583 | reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load; | |
3584 | } | |
3585 | ||
3586 | if (TARGET_VSX_TIMODE) | |
3587 | { | |
3588 | reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store; | |
3589 | reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load; | |
3590 | } | |
3591 | ||
3592 | if (TARGET_DIRECT_MOVE) | |
3593 | { | |
3594 | reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi; | |
3595 | reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd; | |
3596 | reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf; | |
3597 | } | |
3598 | } | |
3599 | ||
3600 | if (TARGET_UPPER_REGS_DF) | |
3601 | reg_addr[DFmode].scalar_in_vmx_p = true; | |
3602 | ||
3603 | if (TARGET_UPPER_REGS_DI) | |
3604 | reg_addr[DImode].scalar_in_vmx_p = true; | |
3605 | ||
3606 | if (TARGET_UPPER_REGS_SF) | |
3607 | reg_addr[SFmode].scalar_in_vmx_p = true; | |
3608 | ||
3609 | if (TARGET_VSX_SMALL_INTEGER) | |
3610 | { | |
3611 | reg_addr[SImode].scalar_in_vmx_p = true; | |
3612 | if (TARGET_P9_VECTOR) | |
3613 | { | |
3614 | reg_addr[HImode].scalar_in_vmx_p = true; | |
3615 | reg_addr[QImode].scalar_in_vmx_p = true; | |
3616 | } | |
3617 | } | |
3618 | } | |
3619 | ||
3620 | /* Setup the fusion operations. */ | |
3621 | if (TARGET_P8_FUSION) | |
3622 | { | |
3623 | reg_addr[QImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_qi; | |
3624 | reg_addr[HImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_hi; | |
3625 | reg_addr[SImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_si; | |
3626 | if (TARGET_64BIT) | |
3627 | reg_addr[DImode].fusion_gpr_ld = CODE_FOR_fusion_gpr_load_di; | |
3628 | } | |
3629 | ||
3630 | if (TARGET_P9_FUSION) | |
3631 | { | |
3632 | struct fuse_insns { | |
3633 | enum machine_mode mode; /* mode of the fused type. */ | |
3634 | enum machine_mode pmode; /* pointer mode. */ | |
3635 | enum rs6000_reload_reg_type rtype; /* register type. */ | |
3636 | enum insn_code load; /* load insn. */ | |
3637 | enum insn_code store; /* store insn. */ | |
3638 | }; | |
3639 | ||
3640 | static const struct fuse_insns addis_insns[] = { | |
0d4a1197 | 3641 | { E_SFmode, E_DImode, RELOAD_REG_FPR, |
83349046 SB |
3642 | CODE_FOR_fusion_vsx_di_sf_load, |
3643 | CODE_FOR_fusion_vsx_di_sf_store }, | |
3644 | ||
0d4a1197 | 3645 | { E_SFmode, E_SImode, RELOAD_REG_FPR, |
83349046 SB |
3646 | CODE_FOR_fusion_vsx_si_sf_load, |
3647 | CODE_FOR_fusion_vsx_si_sf_store }, | |
3648 | ||
0d4a1197 | 3649 | { E_DFmode, E_DImode, RELOAD_REG_FPR, |
83349046 SB |
3650 | CODE_FOR_fusion_vsx_di_df_load, |
3651 | CODE_FOR_fusion_vsx_di_df_store }, | |
3652 | ||
0d4a1197 | 3653 | { E_DFmode, E_SImode, RELOAD_REG_FPR, |
83349046 SB |
3654 | CODE_FOR_fusion_vsx_si_df_load, |
3655 | CODE_FOR_fusion_vsx_si_df_store }, | |
3656 | ||
0d4a1197 | 3657 | { E_DImode, E_DImode, RELOAD_REG_FPR, |
83349046 SB |
3658 | CODE_FOR_fusion_vsx_di_di_load, |
3659 | CODE_FOR_fusion_vsx_di_di_store }, | |
3660 | ||
0d4a1197 | 3661 | { E_DImode, E_SImode, RELOAD_REG_FPR, |
83349046 SB |
3662 | CODE_FOR_fusion_vsx_si_di_load, |
3663 | CODE_FOR_fusion_vsx_si_di_store }, | |
3664 | ||
0d4a1197 | 3665 | { E_QImode, E_DImode, RELOAD_REG_GPR, |
83349046 SB |
3666 | CODE_FOR_fusion_gpr_di_qi_load, |
3667 | CODE_FOR_fusion_gpr_di_qi_store }, | |
3668 | ||
0d4a1197 | 3669 | { E_QImode, E_SImode, RELOAD_REG_GPR, |
83349046 SB |
3670 | CODE_FOR_fusion_gpr_si_qi_load, |
3671 | CODE_FOR_fusion_gpr_si_qi_store }, | |
3672 | ||
0d4a1197 | 3673 | { E_HImode, E_DImode, RELOAD_REG_GPR, |
83349046 SB |
3674 | CODE_FOR_fusion_gpr_di_hi_load, |
3675 | CODE_FOR_fusion_gpr_di_hi_store }, | |
3676 | ||
0d4a1197 | 3677 | { E_HImode, E_SImode, RELOAD_REG_GPR, |
83349046 SB |
3678 | CODE_FOR_fusion_gpr_si_hi_load, |
3679 | CODE_FOR_fusion_gpr_si_hi_store }, | |
3680 | ||
0d4a1197 | 3681 | { E_SImode, E_DImode, RELOAD_REG_GPR, |
83349046 SB |
3682 | CODE_FOR_fusion_gpr_di_si_load, |
3683 | CODE_FOR_fusion_gpr_di_si_store }, | |
3684 | ||
0d4a1197 | 3685 | { E_SImode, E_SImode, RELOAD_REG_GPR, |
83349046 SB |
3686 | CODE_FOR_fusion_gpr_si_si_load, |
3687 | CODE_FOR_fusion_gpr_si_si_store }, | |
3688 | ||
0d4a1197 | 3689 | { E_SFmode, E_DImode, RELOAD_REG_GPR, |
83349046 SB |
3690 | CODE_FOR_fusion_gpr_di_sf_load, |
3691 | CODE_FOR_fusion_gpr_di_sf_store }, | |
3692 | ||
0d4a1197 | 3693 | { E_SFmode, E_SImode, RELOAD_REG_GPR, |
83349046 SB |
3694 | CODE_FOR_fusion_gpr_si_sf_load, |
3695 | CODE_FOR_fusion_gpr_si_sf_store }, | |
3696 | ||
0d4a1197 | 3697 | { E_DImode, E_DImode, RELOAD_REG_GPR, |
83349046 SB |
3698 | CODE_FOR_fusion_gpr_di_di_load, |
3699 | CODE_FOR_fusion_gpr_di_di_store }, | |
3700 | ||
0d4a1197 | 3701 | { E_DFmode, E_DImode, RELOAD_REG_GPR, |
83349046 SB |
3702 | CODE_FOR_fusion_gpr_di_df_load, |
3703 | CODE_FOR_fusion_gpr_di_df_store }, | |
3704 | }; | |
3705 | ||
b8506a8a | 3706 | machine_mode cur_pmode = Pmode; |
83349046 SB |
3707 | size_t i; |
3708 | ||
3709 | for (i = 0; i < ARRAY_SIZE (addis_insns); i++) | |
3710 | { | |
b8506a8a | 3711 | machine_mode xmode = addis_insns[i].mode; |
83349046 SB |
3712 | enum rs6000_reload_reg_type rtype = addis_insns[i].rtype; |
3713 | ||
3714 | if (addis_insns[i].pmode != cur_pmode) | |
3715 | continue; | |
3716 | ||
3717 | if (rtype == RELOAD_REG_FPR | |
3718 | && (!TARGET_HARD_FLOAT || !TARGET_FPRS)) | |
3719 | continue; | |
3720 | ||
3721 | reg_addr[xmode].fusion_addis_ld[rtype] = addis_insns[i].load; | |
3722 | reg_addr[xmode].fusion_addis_st[rtype] = addis_insns[i].store; | |
3723 | ||
3724 | if (rtype == RELOAD_REG_FPR && TARGET_P9_DFORM_SCALAR) | |
3725 | { | |
3726 | reg_addr[xmode].fusion_addis_ld[RELOAD_REG_VMX] | |
3727 | = addis_insns[i].load; | |
3728 | reg_addr[xmode].fusion_addis_st[RELOAD_REG_VMX] | |
3729 | = addis_insns[i].store; | |
3730 | } | |
3731 | } | |
3732 | } | |
3733 | ||
3734 | /* Note which types we support fusing TOC setup plus memory insn. We only do | |
3735 | fused TOCs for medium/large code models. */ | |
3736 | if (TARGET_P8_FUSION && TARGET_TOC_FUSION && TARGET_POWERPC64 | |
3737 | && (TARGET_CMODEL != CMODEL_SMALL)) | |
3738 | { | |
3739 | reg_addr[QImode].fused_toc = true; | |
3740 | reg_addr[HImode].fused_toc = true; | |
3741 | reg_addr[SImode].fused_toc = true; | |
3742 | reg_addr[DImode].fused_toc = true; | |
3743 | if (TARGET_HARD_FLOAT && TARGET_FPRS) | |
3744 | { | |
3745 | if (TARGET_SINGLE_FLOAT) | |
3746 | reg_addr[SFmode].fused_toc = true; | |
3747 | if (TARGET_DOUBLE_FLOAT) | |
3748 | reg_addr[DFmode].fused_toc = true; | |
3749 | } | |
3750 | } | |
3751 | ||
3752 | /* Precalculate HARD_REGNO_NREGS. */ | |
3753 | for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r) | |
3754 | for (m = 0; m < NUM_MACHINE_MODES; ++m) | |
3755 | rs6000_hard_regno_nregs[m][r] | |
3756 | = rs6000_hard_regno_nregs_internal (r, (machine_mode)m); | |
3757 | ||
f939c3e6 | 3758 | /* Precalculate TARGET_HARD_REGNO_MODE_OK. */ |
83349046 SB |
3759 | for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r) |
3760 | for (m = 0; m < NUM_MACHINE_MODES; ++m) | |
f939c3e6 | 3761 | if (rs6000_hard_regno_mode_ok_uncached (r, (machine_mode)m)) |
83349046 SB |
3762 | rs6000_hard_regno_mode_ok_p[m][r] = true; |
3763 | ||
3764 | /* Precalculate CLASS_MAX_NREGS sizes. */ | |
3765 | for (c = 0; c < LIM_REG_CLASSES; ++c) | |
3766 | { | |
3767 | int reg_size; | |
3768 | ||
3769 | if (TARGET_VSX && VSX_REG_CLASS_P (c)) | |
3770 | reg_size = UNITS_PER_VSX_WORD; | |
3771 | ||
3772 | else if (c == ALTIVEC_REGS) | |
3773 | reg_size = UNITS_PER_ALTIVEC_WORD; | |
3774 | ||
3775 | else if (c == FLOAT_REGS) | |
3776 | reg_size = UNITS_PER_FP_WORD; | |
3777 | ||
3778 | else | |
3779 | reg_size = UNITS_PER_WORD; | |
3780 | ||
3781 | for (m = 0; m < NUM_MACHINE_MODES; ++m) | |
3782 | { | |
3783 | machine_mode m2 = (machine_mode)m; | |
3784 | int reg_size2 = reg_size; | |
3785 | ||
3786 | /* TDmode & IBM 128-bit floating point always takes 2 registers, even | |
3787 | in VSX. */ | |
3788 | if (TARGET_VSX && VSX_REG_CLASS_P (c) && FLOAT128_2REG_P (m)) | |
3789 | reg_size2 = UNITS_PER_FP_WORD; | |
3790 | ||
3791 | rs6000_class_max_nregs[m][c] | |
3792 | = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2; | |
3793 | } | |
3794 | } | |
3795 | ||
3796 | if (TARGET_E500_DOUBLE) | |
3797 | rs6000_class_max_nregs[DFmode][GENERAL_REGS] = 1; | |
3798 | ||
3799 | /* Calculate which modes to automatically generate code to use a the | |
3800 | reciprocal divide and square root instructions. In the future, possibly | |
3801 | automatically generate the instructions even if the user did not specify | |
3802 | -mrecip. The older machines double precision reciprocal sqrt estimate is | |
3803 | not accurate enough. */ | |
3804 | memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits)); | |
3805 | if (TARGET_FRES) | |
3806 | rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE; | |
3807 | if (TARGET_FRE) | |
3808 | rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE; | |
3809 | if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)) | |
3810 | rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE; | |
3811 | if (VECTOR_UNIT_VSX_P (V2DFmode)) | |
3812 | rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE; | |
3813 | ||
3814 | if (TARGET_FRSQRTES) | |
3815 | rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE; | |
3816 | if (TARGET_FRSQRTE) | |
3817 | rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE; | |
3818 | if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)) | |
3819 | rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE; | |
3820 | if (VECTOR_UNIT_VSX_P (V2DFmode)) | |
3821 | rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE; | |
3822 | ||
3823 | if (rs6000_recip_control) | |
3824 | { | |
3825 | if (!flag_finite_math_only) | |
3826 | warning (0, "-mrecip requires -ffinite-math or -ffast-math"); | |
3827 | if (flag_trapping_math) | |
3828 | warning (0, "-mrecip requires -fno-trapping-math or -ffast-math"); | |
3829 | if (!flag_reciprocal_math) | |
3830 | warning (0, "-mrecip requires -freciprocal-math or -ffast-math"); | |
3831 | if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math) | |
3832 | { | |
3833 | if (RS6000_RECIP_HAVE_RE_P (SFmode) | |
3834 | && (rs6000_recip_control & RECIP_SF_DIV) != 0) | |
3835 | rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE; | |
3836 | ||
3837 | if (RS6000_RECIP_HAVE_RE_P (DFmode) | |
3838 | && (rs6000_recip_control & RECIP_DF_DIV) != 0) | |
3839 | rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE; | |
3840 | ||
3841 | if (RS6000_RECIP_HAVE_RE_P (V4SFmode) | |
3842 | && (rs6000_recip_control & RECIP_V4SF_DIV) != 0) | |
3843 | rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE; | |
3844 | ||
3845 | if (RS6000_RECIP_HAVE_RE_P (V2DFmode) | |
3846 | && (rs6000_recip_control & RECIP_V2DF_DIV) != 0) | |
3847 | rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE; | |
3848 | ||
3849 | if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode) | |
3850 | && (rs6000_recip_control & RECIP_SF_RSQRT) != 0) | |
3851 | rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE; | |
3852 | ||
3853 | if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode) | |
3854 | && (rs6000_recip_control & RECIP_DF_RSQRT) != 0) | |
3855 | rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE; | |
3856 | ||
3857 | if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode) | |
3858 | && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0) | |
3859 | rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE; | |
3860 | ||
3861 | if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode) | |
3862 | && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0) | |
3863 | rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE; | |
3864 | } | |
3865 | } | |
3866 | ||
3867 | /* Update the addr mask bits in reg_addr to help secondary reload and go if | |
3868 | legitimate address support to figure out the appropriate addressing to | |
3869 | use. */ | |
3870 | rs6000_setup_reg_addr_masks (); | |
3871 | ||
3872 | if (global_init_p || TARGET_DEBUG_TARGET) | |
3873 | { | |
3874 | if (TARGET_DEBUG_REG) | |
3875 | rs6000_debug_reg_global (); | |
3876 | ||
3877 | if (TARGET_DEBUG_COST || TARGET_DEBUG_REG) | |
3878 | fprintf (stderr, | |
3879 | "SImode variable mult cost = %d\n" | |
3880 | "SImode constant mult cost = %d\n" | |
3881 | "SImode short constant mult cost = %d\n" | |
3882 | "DImode multipliciation cost = %d\n" | |
3883 | "SImode division cost = %d\n" | |
3884 | "DImode division cost = %d\n" | |
3885 | "Simple fp operation cost = %d\n" | |
3886 | "DFmode multiplication cost = %d\n" | |
3887 | "SFmode division cost = %d\n" | |
3888 | "DFmode division cost = %d\n" | |
3889 | "cache line size = %d\n" | |
3890 | "l1 cache size = %d\n" | |
3891 | "l2 cache size = %d\n" | |
3892 | "simultaneous prefetches = %d\n" | |
3893 | "\n", | |
3894 | rs6000_cost->mulsi, | |
3895 | rs6000_cost->mulsi_const, | |
3896 | rs6000_cost->mulsi_const9, | |
3897 | rs6000_cost->muldi, | |
3898 | rs6000_cost->divsi, | |
3899 | rs6000_cost->divdi, | |
3900 | rs6000_cost->fp, | |
3901 | rs6000_cost->dmul, | |
3902 | rs6000_cost->sdiv, | |
3903 | rs6000_cost->ddiv, | |
3904 | rs6000_cost->cache_line_size, | |
3905 | rs6000_cost->l1_cache_size, | |
3906 | rs6000_cost->l2_cache_size, | |
3907 | rs6000_cost->simultaneous_prefetches); | |
3908 | } | |
3909 | } | |
3910 | ||
3911 | #if TARGET_MACHO | |
3912 | /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */ | |
3913 | ||
3914 | static void | |
3915 | darwin_rs6000_override_options (void) | |
3916 | { | |
3917 | /* The Darwin ABI always includes AltiVec, can't be (validly) turned | |
3918 | off. */ | |
3919 | rs6000_altivec_abi = 1; | |
3920 | TARGET_ALTIVEC_VRSAVE = 1; | |
3921 | rs6000_current_abi = ABI_DARWIN; | |
3922 | ||
3923 | if (DEFAULT_ABI == ABI_DARWIN | |
3924 | && TARGET_64BIT) | |
3925 | darwin_one_byte_bool = 1; | |
3926 | ||
3927 | if (TARGET_64BIT && ! TARGET_POWERPC64) | |
3928 | { | |
3929 | rs6000_isa_flags |= OPTION_MASK_POWERPC64; | |
3930 | warning (0, "-m64 requires PowerPC64 architecture, enabling"); | |
3931 | } | |
3932 | if (flag_mkernel) | |
3933 | { | |
3934 | rs6000_default_long_calls = 1; | |
3935 | rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT; | |
3936 | } | |
3937 | ||
3938 | /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes | |
3939 | Altivec. */ | |
3940 | if (!flag_mkernel && !flag_apple_kext | |
3941 | && TARGET_64BIT | |
3942 | && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)) | |
3943 | rs6000_isa_flags |= OPTION_MASK_ALTIVEC; | |
3944 | ||
3945 | /* Unless the user (not the configurer) has explicitly overridden | |
3946 | it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to | |
3947 | G4 unless targeting the kernel. */ | |
3948 | if (!flag_mkernel | |
3949 | && !flag_apple_kext | |
3950 | && strverscmp (darwin_macosx_version_min, "10.5") >= 0 | |
3951 | && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC) | |
3952 | && ! global_options_set.x_rs6000_cpu_index) | |
3953 | { | |
3954 | rs6000_isa_flags |= OPTION_MASK_ALTIVEC; | |
3955 | } | |
3956 | } | |
3957 | #endif | |
3958 | ||
3959 | /* If not otherwise specified by a target, make 'long double' equivalent to | |
3960 | 'double'. */ | |
3961 | ||
3962 | #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE | |
3963 | #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64 | |
3964 | #endif | |
3965 | ||
3966 | /* Return the builtin mask of the various options used that could affect which | |
3967 | builtins were used. In the past we used target_flags, but we've run out of | |
3968 | bits, and some options like SPE and PAIRED are no longer in | |
3969 | target_flags. */ | |
3970 | ||
3971 | HOST_WIDE_INT | |
3972 | rs6000_builtin_mask_calculate (void) | |
3973 | { | |
3974 | return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0) | |
3975 | | ((TARGET_CMPB) ? RS6000_BTM_CMPB : 0) | |
3976 | | ((TARGET_VSX) ? RS6000_BTM_VSX : 0) | |
3977 | | ((TARGET_SPE) ? RS6000_BTM_SPE : 0) | |
3978 | | ((TARGET_PAIRED_FLOAT) ? RS6000_BTM_PAIRED : 0) | |
3979 | | ((TARGET_FRE) ? RS6000_BTM_FRE : 0) | |
3980 | | ((TARGET_FRES) ? RS6000_BTM_FRES : 0) | |
3981 | | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0) | |
3982 | | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0) | |
3983 | | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0) | |
3984 | | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0) | |
3985 | | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0) | |
3986 | | ((TARGET_P9_VECTOR) ? RS6000_BTM_P9_VECTOR : 0) | |
3987 | | ((TARGET_P9_MISC) ? RS6000_BTM_P9_MISC : 0) | |
3988 | | ((TARGET_MODULO) ? RS6000_BTM_MODULO : 0) | |
3989 | | ((TARGET_64BIT) ? RS6000_BTM_64BIT : 0) | |
3990 | | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0) | |
3991 | | ((TARGET_HTM) ? RS6000_BTM_HTM : 0) | |
3992 | | ((TARGET_DFP) ? RS6000_BTM_DFP : 0) | |
3993 | | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0) | |
3994 | | ((TARGET_LONG_DOUBLE_128) ? RS6000_BTM_LDBL128 : 0) | |
3995 | | ((TARGET_FLOAT128_TYPE) ? RS6000_BTM_FLOAT128 : 0)); | |
3996 | } | |
3997 | ||
3998 | /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered | |
3999 | to clobber the XER[CA] bit because clobbering that bit without telling | |
4000 | the compiler worked just fine with versions of GCC before GCC 5, and | |
4001 | breaking a lot of older code in ways that are hard to track down is | |
4002 | not such a great idea. */ | |
4003 | ||
4004 | static rtx_insn * | |
4005 | rs6000_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/, | |
4006 | vec<const char *> &/*constraints*/, | |
4007 | vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs) | |
4008 | { | |
4009 | clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO)); | |
4010 | SET_HARD_REG_BIT (clobbered_regs, CA_REGNO); | |
4011 | return NULL; | |
4012 | } | |
4013 | ||
4014 | /* Override command line options. | |
4015 | ||
4016 | Combine build-specific configuration information with options | |
4017 | specified on the command line to set various state variables which | |
4018 | influence code generation, optimization, and expansion of built-in | |
4019 | functions. Assure that command-line configuration preferences are | |
4020 | compatible with each other and with the build configuration; issue | |
4021 | warnings while adjusting configuration or error messages while | |
4022 | rejecting configuration. | |
4023 | ||
4024 | Upon entry to this function: | |
4025 | ||
4026 | This function is called once at the beginning of | |
4027 | compilation, and then again at the start and end of compiling | |
4028 | each section of code that has a different configuration, as | |
4029 | indicated, for example, by adding the | |
4030 | ||
4031 | __attribute__((__target__("cpu=power9"))) | |
4032 | ||
4033 | qualifier to a function definition or, for example, by bracketing | |
4034 | code between | |
4035 | ||
4036 | #pragma GCC target("altivec") | |
4037 | ||
4038 | and | |
4039 | ||
4040 | #pragma GCC reset_options | |
4041 | ||
4042 | directives. Parameter global_init_p is true for the initial | |
4043 | invocation, which initializes global variables, and false for all | |
4044 | subsequent invocations. | |
4045 | ||
4046 | ||
4047 | Various global state information is assumed to be valid. This | |
4048 | includes OPTION_TARGET_CPU_DEFAULT, representing the name of the | |
4049 | default CPU specified at build configure time, TARGET_DEFAULT, | |
4050 | representing the default set of option flags for the default | |
4051 | target, and global_options_set.x_rs6000_isa_flags, representing | |
4052 | which options were requested on the command line. | |
4053 | ||
4054 | Upon return from this function: | |
4055 | ||
4056 | rs6000_isa_flags_explicit has a non-zero bit for each flag that | |
4057 | was set by name on the command line. Additionally, if certain | |
4058 | attributes are automatically enabled or disabled by this function | |
4059 | in order to assure compatibility between options and | |
4060 | configuration, the flags associated with those attributes are | |
4061 | also set. By setting these "explicit bits", we avoid the risk | |
4062 | that other code might accidentally overwrite these particular | |
4063 | attributes with "default values". | |
4064 | ||
4065 | The various bits of rs6000_isa_flags are set to indicate the | |
4066 | target options that have been selected for the most current | |
4067 | compilation efforts. This has the effect of also turning on the | |
4068 | associated TARGET_XXX values since these are macros which are | |
4069 | generally defined to test the corresponding bit of the | |
4070 | rs6000_isa_flags variable. | |
4071 | ||
4072 | The variable rs6000_builtin_mask is set to represent the target | |
4073 | options for the most current compilation efforts, consistent with | |
4074 | the current contents of rs6000_isa_flags. This variable controls | |
4075 | expansion of built-in functions. | |
4076 | ||
4077 | Various other global variables and fields of global structures | |
4078 | (over 50 in all) are initialized to reflect the desired options | |
4079 | for the most current compilation efforts. */ | |
4080 | ||
4081 | static bool | |
4082 | rs6000_option_override_internal (bool global_init_p) | |
4083 | { | |
4084 | bool ret = true; | |
4085 | bool have_cpu = false; | |
4086 | ||
4087 | /* The default cpu requested at configure time, if any. */ | |
4088 | const char *implicit_cpu = OPTION_TARGET_CPU_DEFAULT; | |
4089 | ||
4090 | HOST_WIDE_INT set_masks; | |
4091 | HOST_WIDE_INT ignore_masks; | |
4092 | int cpu_index; | |
4093 | int tune_index; | |
4094 | struct cl_target_option *main_target_opt | |
4095 | = ((global_init_p || target_option_default_node == NULL) | |
4096 | ? NULL : TREE_TARGET_OPTION (target_option_default_node)); | |
4097 | ||
4098 | /* Print defaults. */ | |
4099 | if ((TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) && global_init_p) | |
4100 | rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT); | |
4101 | ||
4102 | /* Remember the explicit arguments. */ | |
4103 | if (global_init_p) | |
4104 | rs6000_isa_flags_explicit = global_options_set.x_rs6000_isa_flags; | |
4105 | ||
4106 | /* On 64-bit Darwin, power alignment is ABI-incompatible with some C | |
4107 | library functions, so warn about it. The flag may be useful for | |
4108 | performance studies from time to time though, so don't disable it | |
4109 | entirely. */ | |
4110 | if (global_options_set.x_rs6000_alignment_flags | |
4111 | && rs6000_alignment_flags == MASK_ALIGN_POWER | |
4112 | && DEFAULT_ABI == ABI_DARWIN | |
4113 | && TARGET_64BIT) | |
4114 | warning (0, "-malign-power is not supported for 64-bit Darwin;" | |
4115 | " it is incompatible with the installed C and C++ libraries"); | |
4116 | ||
4117 | /* Numerous experiment shows that IRA based loop pressure | |
4118 | calculation works better for RTL loop invariant motion on targets | |
4119 | with enough (>= 32) registers. It is an expensive optimization. | |
4120 | So it is on only for peak performance. */ | |
4121 | if (optimize >= 3 && global_init_p | |
4122 | && !global_options_set.x_flag_ira_loop_pressure) | |
4123 | flag_ira_loop_pressure = 1; | |
4124 | ||
4125 | /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in order | |
4126 | for tracebacks to be complete but not if any -fasynchronous-unwind-tables | |
4127 | options were already specified. */ | |
4128 | if (flag_sanitize & SANITIZE_USER_ADDRESS | |
4129 | && !global_options_set.x_flag_asynchronous_unwind_tables) | |
4130 | flag_asynchronous_unwind_tables = 1; | |
4131 | ||
4132 | /* Set the pointer size. */ | |
4133 | if (TARGET_64BIT) | |
4134 | { | |
501623d4 | 4135 | rs6000_pmode = DImode; |
83349046 SB |
4136 | rs6000_pointer_size = 64; |
4137 | } | |
4138 | else | |
4139 | { | |
501623d4 | 4140 | rs6000_pmode = SImode; |
83349046 SB |
4141 | rs6000_pointer_size = 32; |
4142 | } | |
4143 | ||
4144 | /* Some OSs don't support saving the high part of 64-bit registers on context | |
4145 | switch. Other OSs don't support saving Altivec registers. On those OSs, | |
4146 | we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings; | |
4147 | if the user wants either, the user must explicitly specify them and we | |
4148 | won't interfere with the user's specification. */ | |
4149 | ||
4150 | set_masks = POWERPC_MASKS; | |
4151 | #ifdef OS_MISSING_POWERPC64 | |
4152 | if (OS_MISSING_POWERPC64) | |
4153 | set_masks &= ~OPTION_MASK_POWERPC64; | |
4154 | #endif | |
4155 | #ifdef OS_MISSING_ALTIVEC | |
4156 | if (OS_MISSING_ALTIVEC) | |
4157 | set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX | |
4158 | | OTHER_VSX_VECTOR_MASKS); | |
4159 | #endif | |
4160 | ||
4161 | /* Don't override by the processor default if given explicitly. */ | |
4162 | set_masks &= ~rs6000_isa_flags_explicit; | |
4163 | ||
4164 | /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed | |
4165 | the cpu in a target attribute or pragma, but did not specify a tuning | |
4166 | option, use the cpu for the tuning option rather than the option specified | |
4167 | with -mtune on the command line. Process a '--with-cpu' configuration | |
4168 | request as an implicit --cpu. */ | |
4169 | if (rs6000_cpu_index >= 0) | |
4170 | { | |
4171 | cpu_index = rs6000_cpu_index; | |
4172 | have_cpu = true; | |
4173 | } | |
4174 | else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0) | |
4175 | { | |
4176 | rs6000_cpu_index = cpu_index = main_target_opt->x_rs6000_cpu_index; | |
4177 | have_cpu = true; | |
4178 | } | |
4179 | else if (implicit_cpu) | |
4180 | { | |
4181 | rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (implicit_cpu); | |
4182 | have_cpu = true; | |
4183 | } | |
4184 | else | |
4185 | { | |
4186 | /* PowerPC 64-bit LE requires at least ISA 2.07. */ | |
4187 | const char *default_cpu = ((!TARGET_POWERPC64) | |
4188 | ? "powerpc" | |
4189 | : ((BYTES_BIG_ENDIAN) | |
4190 | ? "powerpc64" | |
4191 | : "powerpc64le")); | |
4192 | ||
4193 | rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (default_cpu); | |
4194 | have_cpu = false; | |
4195 | } | |
4196 | ||
4197 | gcc_assert (cpu_index >= 0); | |
4198 | ||
83349046 SB |
4199 | /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the |
4200 | compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits | |
4201 | with those from the cpu, except for options that were explicitly set. If | |
4202 | we don't have a cpu, do not override the target bits set in | |
4203 | TARGET_DEFAULT. */ | |
4204 | if (have_cpu) | |
4205 | { | |
4206 | rs6000_isa_flags &= ~set_masks; | |
4207 | rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable | |
4208 | & set_masks); | |
4209 | } | |
4210 | else | |
4211 | { | |
4212 | /* If no -mcpu=<xxx>, inherit any default options that were cleared via | |
4213 | POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize | |
4214 | target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched | |
4215 | to using rs6000_isa_flags, we need to do the initialization here. | |
4216 | ||
4217 | If there is a TARGET_DEFAULT, use that. Otherwise fall back to using | |
4218 | -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */ | |
4219 | HOST_WIDE_INT flags = ((TARGET_DEFAULT) ? TARGET_DEFAULT | |
4220 | : processor_target_table[cpu_index].target_enable); | |
4221 | rs6000_isa_flags |= (flags & ~rs6000_isa_flags_explicit); | |
4222 | } | |
4223 | ||
4224 | if (rs6000_tune_index >= 0) | |
4225 | tune_index = rs6000_tune_index; | |
4226 | else if (have_cpu) | |
4227 | rs6000_tune_index = tune_index = cpu_index; | |
4228 | else | |
4229 | { | |
4230 | size_t i; | |
4231 | enum processor_type tune_proc | |
4232 | = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT); | |
4233 | ||
4234 | tune_index = -1; | |
4235 | for (i = 0; i < ARRAY_SIZE (processor_target_table); i++) | |
4236 | if (processor_target_table[i].processor == tune_proc) | |
4237 | { | |
4238 | rs6000_tune_index = tune_index = i; | |
4239 | break; | |
4240 | } | |
4241 | } | |
4242 | ||
4243 | gcc_assert (tune_index >= 0); | |
4244 | rs6000_cpu = processor_target_table[tune_index].processor; | |
4245 | ||
4246 | /* Pick defaults for SPE related control flags. Do this early to make sure | |
4247 | that the TARGET_ macros are representative ASAP. */ | |
4248 | { | |
4249 | int spe_capable_cpu = | |
4250 | (rs6000_cpu == PROCESSOR_PPC8540 | |
4251 | || rs6000_cpu == PROCESSOR_PPC8548); | |
4252 | ||
4253 | if (!global_options_set.x_rs6000_spe_abi) | |
4254 | rs6000_spe_abi = spe_capable_cpu; | |
4255 | ||
4256 | if (!global_options_set.x_rs6000_spe) | |
4257 | rs6000_spe = spe_capable_cpu; | |
4258 | ||
4259 | if (!global_options_set.x_rs6000_float_gprs) | |
4260 | rs6000_float_gprs = | |
4261 | (rs6000_cpu == PROCESSOR_PPC8540 ? 1 | |
4262 | : rs6000_cpu == PROCESSOR_PPC8548 ? 2 | |
4263 | : 0); | |
4264 | } | |
4265 | ||
4266 | if (global_options_set.x_rs6000_spe_abi | |
4267 | && rs6000_spe_abi | |
4268 | && !TARGET_SPE_ABI) | |
4269 | error ("not configured for SPE ABI"); | |
4270 | ||
4271 | if (global_options_set.x_rs6000_spe | |
4272 | && rs6000_spe | |
4273 | && !TARGET_SPE) | |
4274 | error ("not configured for SPE instruction set"); | |
4275 | ||
4276 | if (main_target_opt != NULL | |
4277 | && ((main_target_opt->x_rs6000_spe_abi != rs6000_spe_abi) | |
4278 | || (main_target_opt->x_rs6000_spe != rs6000_spe) | |
4279 | || (main_target_opt->x_rs6000_float_gprs != rs6000_float_gprs))) | |
4280 | error ("target attribute or pragma changes SPE ABI"); | |
4281 | ||
4282 | if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3 | |
4283 | || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64 | |
4284 | || rs6000_cpu == PROCESSOR_PPCE5500) | |
4285 | { | |
4286 | if (TARGET_ALTIVEC) | |
4287 | error ("AltiVec not supported in this target"); | |
4288 | if (TARGET_SPE) | |
4289 | error ("SPE not supported in this target"); | |
4290 | } | |
4291 | if (rs6000_cpu == PROCESSOR_PPCE6500) | |
4292 | { | |
4293 | if (TARGET_SPE) | |
4294 | error ("SPE not supported in this target"); | |
4295 | } | |
4296 | ||
4297 | /* Disable Cell microcode if we are optimizing for the Cell | |
4298 | and not optimizing for size. */ | |
4299 | if (rs6000_gen_cell_microcode == -1) | |
4300 | rs6000_gen_cell_microcode = !(rs6000_cpu == PROCESSOR_CELL | |
4301 | && !optimize_size); | |
4302 | ||
4303 | /* If we are optimizing big endian systems for space and it's OK to | |
4304 | use instructions that would be microcoded on the Cell, use the | |
4305 | load/store multiple and string instructions. */ | |
4306 | if (BYTES_BIG_ENDIAN && optimize_size && rs6000_gen_cell_microcode) | |
4307 | rs6000_isa_flags |= ~rs6000_isa_flags_explicit & (OPTION_MASK_MULTIPLE | |
4308 | | OPTION_MASK_STRING); | |
4309 | ||
4310 | /* Don't allow -mmultiple or -mstring on little endian systems | |
4311 | unless the cpu is a 750, because the hardware doesn't support the | |
4312 | instructions used in little endian mode, and causes an alignment | |
4313 | trap. The 750 does not cause an alignment trap (except when the | |
4314 | target is unaligned). */ | |
4315 | ||
4316 | if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750) | |
4317 | { | |
4318 | if (TARGET_MULTIPLE) | |
4319 | { | |
4320 | rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE; | |
4321 | if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0) | |
4322 | warning (0, "-mmultiple is not supported on little endian systems"); | |
4323 | } | |
4324 | ||
4325 | if (TARGET_STRING) | |
4326 | { | |
4327 | rs6000_isa_flags &= ~OPTION_MASK_STRING; | |
4328 | if ((rs6000_isa_flags_explicit & OPTION_MASK_STRING) != 0) | |
4329 | warning (0, "-mstring is not supported on little endian systems"); | |
4330 | } | |
4331 | } | |
4332 | ||
4333 | /* If little-endian, default to -mstrict-align on older processors. | |
4334 | Testing for htm matches power8 and later. */ | |
4335 | if (!BYTES_BIG_ENDIAN | |
4336 | && !(processor_target_table[tune_index].target_enable & OPTION_MASK_HTM)) | |
4337 | rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN; | |
4338 | ||
4339 | /* -maltivec={le,be} implies -maltivec. */ | |
4340 | if (rs6000_altivec_element_order != 0) | |
4341 | rs6000_isa_flags |= OPTION_MASK_ALTIVEC; | |
4342 | ||
4343 | /* Disallow -maltivec=le in big endian mode for now. This is not | |
4344 | known to be useful for anyone. */ | |
4345 | if (BYTES_BIG_ENDIAN && rs6000_altivec_element_order == 1) | |
4346 | { | |
4347 | warning (0, N_("-maltivec=le not allowed for big-endian targets")); | |
4348 | rs6000_altivec_element_order = 0; | |
4349 | } | |
4350 | ||
4351 | /* Add some warnings for VSX. */ | |
4352 | if (TARGET_VSX) | |
4353 | { | |
4354 | const char *msg = NULL; | |
4355 | if (!TARGET_HARD_FLOAT || !TARGET_FPRS | |
4356 | || !TARGET_SINGLE_FLOAT || !TARGET_DOUBLE_FLOAT) | |
4357 | { | |
4358 | if (rs6000_isa_flags_explicit & OPTION_MASK_VSX) | |
4359 | msg = N_("-mvsx requires hardware floating point"); | |
4360 | else | |
4361 | { | |
4362 | rs6000_isa_flags &= ~ OPTION_MASK_VSX; | |
4363 | rs6000_isa_flags_explicit |= OPTION_MASK_VSX; | |
4364 | } | |
4365 | } | |
4366 | else if (TARGET_PAIRED_FLOAT) | |
4367 | msg = N_("-mvsx and -mpaired are incompatible"); | |
4368 | else if (TARGET_AVOID_XFORM > 0) | |
4369 | msg = N_("-mvsx needs indexed addressing"); | |
4370 | else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit | |
4371 | & OPTION_MASK_ALTIVEC)) | |
4372 | { | |
4373 | if (rs6000_isa_flags_explicit & OPTION_MASK_VSX) | |
4374 | msg = N_("-mvsx and -mno-altivec are incompatible"); | |
4375 | else | |
4376 | msg = N_("-mno-altivec disables vsx"); | |
4377 | } | |
4378 | ||
4379 | if (msg) | |
4380 | { | |
4381 | warning (0, msg); | |
4382 | rs6000_isa_flags &= ~ OPTION_MASK_VSX; | |
4383 | rs6000_isa_flags_explicit |= OPTION_MASK_VSX; | |
4384 | } | |
4385 | } | |
4386 | ||
4387 | /* If hard-float/altivec/vsx were explicitly turned off then don't allow | |
4388 | the -mcpu setting to enable options that conflict. */ | |
4389 | if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX) | |
4390 | && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT | |
4391 | | OPTION_MASK_ALTIVEC | |
4392 | | OPTION_MASK_VSX)) != 0) | |
4393 | rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO | |
4394 | | OPTION_MASK_DIRECT_MOVE) | |
4395 | & ~rs6000_isa_flags_explicit); | |
4396 | ||
4397 | if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) | |
4398 | rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags); | |
4399 | ||
4400 | /* Handle explicit -mno-{altivec,vsx,power8-vector,power9-vector} and turn | |
4401 | off all of the options that depend on those flags. */ | |
4402 | ignore_masks = rs6000_disable_incompatible_switches (); | |
4403 | ||
4404 | /* For the newer switches (vsx, dfp, etc.) set some of the older options, | |
4405 | unless the user explicitly used the -mno-<option> to disable the code. */ | |
4406 | if (TARGET_P9_VECTOR || TARGET_MODULO || TARGET_P9_DFORM_SCALAR | |
4407 | || TARGET_P9_DFORM_VECTOR || TARGET_P9_DFORM_BOTH > 0) | |
4408 | rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks); | |
4409 | else if (TARGET_P9_MINMAX) | |
4410 | { | |
4411 | if (have_cpu) | |
4412 | { | |
4413 | if (cpu_index == PROCESSOR_POWER9) | |
4414 | { | |
4415 | /* legacy behavior: allow -mcpu-power9 with certain | |
4416 | capabilities explicitly disabled. */ | |
4417 | rs6000_isa_flags |= (ISA_3_0_MASKS_SERVER & ~ignore_masks); | |
4418 | /* However, reject this automatic fix if certain | |
4419 | capabilities required for TARGET_P9_MINMAX support | |
4420 | have been explicitly disabled. */ | |
4421 | if (((OPTION_MASK_VSX | OPTION_MASK_UPPER_REGS_SF | |
4422 | | OPTION_MASK_UPPER_REGS_DF) & rs6000_isa_flags) | |
4423 | != (OPTION_MASK_VSX | OPTION_MASK_UPPER_REGS_SF | |
4424 | | OPTION_MASK_UPPER_REGS_DF)) | |
4425 | error ("-mpower9-minmax incompatible with explicitly disabled options"); | |
4426 | } | |
4427 | else | |
4428 | error ("Power9 target option is incompatible with -mcpu=<xxx> for " | |
4429 | "<xxx> less than power9"); | |
4430 | } | |
4431 | else if ((ISA_3_0_MASKS_SERVER & rs6000_isa_flags_explicit) | |
4432 | != (ISA_3_0_MASKS_SERVER & rs6000_isa_flags | |
4433 | & rs6000_isa_flags_explicit)) | |
4434 | /* Enforce that none of the ISA_3_0_MASKS_SERVER flags | |
4435 | were explicitly cleared. */ | |
4436 | error ("-mpower9-minmax incompatible with explicitly disabled options"); | |
4437 | else | |
4438 | rs6000_isa_flags |= ISA_3_0_MASKS_SERVER; | |
4439 | } | |
4440 | else if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO) | |
4441 | rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~ignore_masks); | |
4442 | else if (TARGET_VSX) | |
4443 | rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~ignore_masks); | |
4444 | else if (TARGET_POPCNTD) | |
4445 | rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~ignore_masks); | |
4446 | else if (TARGET_DFP) | |
4447 | rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~ignore_masks); | |
4448 | else if (TARGET_CMPB) | |
4449 | rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~ignore_masks); | |
4450 | else if (TARGET_FPRND) | |
4451 | rs6000_isa_flags |= (ISA_2_4_MASKS & ~ignore_masks); | |
4452 | else if (TARGET_POPCNTB) | |
4453 | rs6000_isa_flags |= (ISA_2_2_MASKS & ~ignore_masks); | |
4454 | else if (TARGET_ALTIVEC) | |
4455 | rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~ignore_masks); | |
4456 | ||
4457 | if (TARGET_CRYPTO && !TARGET_ALTIVEC) | |
4458 | { | |
4459 | if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO) | |
4460 | error ("-mcrypto requires -maltivec"); | |
4461 | rs6000_isa_flags &= ~OPTION_MASK_CRYPTO; | |
4462 | } | |
4463 | ||
4464 | if (TARGET_DIRECT_MOVE && !TARGET_VSX) | |
4465 | { | |
4466 | if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE) | |
4467 | error ("-mdirect-move requires -mvsx"); | |
4468 | rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE; | |
4469 | } | |
4470 | ||
4471 | if (TARGET_P8_VECTOR && !TARGET_ALTIVEC) | |
4472 | { | |
4473 | if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR) | |
4474 | error ("-mpower8-vector requires -maltivec"); | |
4475 | rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR; | |
4476 | } | |
4477 | ||
4478 | if (TARGET_P8_VECTOR && !TARGET_VSX) | |
4479 | { | |
4480 | if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR) | |
4481 | && (rs6000_isa_flags_explicit & OPTION_MASK_VSX)) | |
4482 | error ("-mpower8-vector requires -mvsx"); | |
4483 | else if ((rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR) == 0) | |
4484 | { | |
4485 | rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR; | |
4486 | if (rs6000_isa_flags_explicit & OPTION_MASK_VSX) | |
4487 | rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR; | |
4488 | } | |
4489 | else | |
4490 | { | |
4491 | /* OPTION_MASK_P8_VECTOR is explicit, and OPTION_MASK_VSX is | |
4492 | not explicit. */ | |
4493 | rs6000_isa_flags |= OPTION_MASK_VSX; | |
4494 | rs6000_isa_flags_explicit |= OPTION_MASK_VSX; | |
4495 | } | |
4496 | } | |
4497 | ||
4498 | if (TARGET_VSX_TIMODE && !TARGET_VSX) | |
4499 | { | |
4500 | if (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE) | |
4501 | error ("-mvsx-timode requires -mvsx"); | |
4502 | rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE; | |
4503 | } | |
4504 | ||
4505 | if (TARGET_DFP && !TARGET_HARD_FLOAT) | |
4506 | { | |
4507 | if (rs6000_isa_flags_explicit & OPTION_MASK_DFP) | |
4508 | error ("-mhard-dfp requires -mhard-float"); | |
4509 | rs6000_isa_flags &= ~OPTION_MASK_DFP; | |
4510 | } | |
4511 | ||
4512 | /* Allow an explicit -mupper-regs to set -mupper-regs-df, -mupper-regs-di, | |
4513 | and -mupper-regs-sf, depending on the cpu, unless the user explicitly also | |
4514 | set the individual option. */ | |
4515 | if (TARGET_UPPER_REGS > 0) | |
4516 | { | |
4517 | if (TARGET_VSX | |
4518 | && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)) | |
4519 | { | |
4520 | rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_DF; | |
4521 | rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF; | |
4522 | } | |
4523 | if (TARGET_VSX | |
4524 | && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DI)) | |
4525 | { | |
4526 | rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_DI; | |
4527 | rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DI; | |
4528 | } | |
4529 | if (TARGET_P8_VECTOR | |
4530 | && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF)) | |
4531 | { | |
4532 | rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_SF; | |
4533 | rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF; | |
4534 | } | |
4535 | } | |
4536 | else if (TARGET_UPPER_REGS == 0) | |
4537 | { | |
4538 | if (TARGET_VSX | |
4539 | && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)) | |
4540 | { | |
4541 | rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF; | |
4542 | rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF; | |
4543 | } | |
4544 | if (TARGET_VSX | |
4545 | && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DI)) | |
4546 | { | |
4547 | rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DI; | |
4548 | rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DI; | |
4549 | } | |
4550 | if (TARGET_P8_VECTOR | |
4551 | && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF)) | |
4552 | { | |
4553 | rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF; | |
4554 | rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF; | |
4555 | } | |
4556 | } | |
4557 | ||
4558 | if (TARGET_UPPER_REGS_DF && !TARGET_VSX) | |
4559 | { | |
4560 | if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF) | |
4561 | error ("-mupper-regs-df requires -mvsx"); | |
4562 | rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF; | |
4563 | } | |
4564 | ||
4565 | if (TARGET_UPPER_REGS_DI && !TARGET_VSX) | |
4566 | { | |
4567 | if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DI) | |
4568 | error ("-mupper-regs-di requires -mvsx"); | |
4569 | rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DI; | |
4570 | } | |
4571 | ||
4572 | if (TARGET_UPPER_REGS_SF && !TARGET_P8_VECTOR) | |
4573 | { | |
4574 | if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF) | |
4575 | error ("-mupper-regs-sf requires -mpower8-vector"); | |
4576 | rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF; | |
4577 | } | |
4578 | ||
4579 | /* The quad memory instructions only works in 64-bit mode. In 32-bit mode, | |
4580 | silently turn off quad memory mode. */ | |
4581 | if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64) | |
4582 | { | |
4583 | if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0) | |
4584 | warning (0, N_("-mquad-memory requires 64-bit mode")); | |
4585 | ||
4586 | if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0) | |
4587 | warning (0, N_("-mquad-memory-atomic requires 64-bit mode")); | |
4588 | ||
4589 | rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY | |
4590 | | OPTION_MASK_QUAD_MEMORY_ATOMIC); | |
4591 | } | |
4592 | ||
4593 | /* Non-atomic quad memory load/store are disabled for little endian, since | |
4594 | the words are reversed, but atomic operations can still be done by | |
4595 | swapping the words. */ | |
4596 | if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN) | |
4597 | { | |
4598 | if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0) | |
4599 | warning (0, N_("-mquad-memory is not available in little endian mode")); | |
4600 | ||
4601 | rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY; | |
4602 | } | |
4603 | ||
4604 | /* Assume if the user asked for normal quad memory instructions, they want | |
4605 | the atomic versions as well, unless they explicity told us not to use quad | |
4606 | word atomic instructions. */ | |
4607 | if (TARGET_QUAD_MEMORY | |
4608 | && !TARGET_QUAD_MEMORY_ATOMIC | |
4609 | && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0)) | |
4610 | rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC; | |
4611 | ||
4612 | /* Enable power8 fusion if we are tuning for power8, even if we aren't | |
4613 | generating power8 instructions. */ | |
4614 | if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)) | |
4615 | rs6000_isa_flags |= (processor_target_table[tune_index].target_enable | |
4616 | & OPTION_MASK_P8_FUSION); | |
4617 | ||
4618 | /* Setting additional fusion flags turns on base fusion. */ | |
4619 | if (!TARGET_P8_FUSION && (TARGET_P8_FUSION_SIGN || TARGET_TOC_FUSION)) | |
4620 | { | |
4621 | if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION) | |
4622 | { | |
4623 | if (TARGET_P8_FUSION_SIGN) | |
4624 | error ("-mpower8-fusion-sign requires -mpower8-fusion"); | |
4625 | ||
4626 | if (TARGET_TOC_FUSION) | |
4627 | error ("-mtoc-fusion requires -mpower8-fusion"); | |
4628 | ||
4629 | rs6000_isa_flags &= ~OPTION_MASK_P8_FUSION; | |
4630 | } | |
4631 | else | |
4632 | rs6000_isa_flags |= OPTION_MASK_P8_FUSION; | |
4633 | } | |
4634 | ||
4635 | /* Power9 fusion is a superset over power8 fusion. */ | |
4636 | if (TARGET_P9_FUSION && !TARGET_P8_FUSION) | |
4637 | { | |
4638 | if (rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION) | |
4639 | { | |
4640 | /* We prefer to not mention undocumented options in | |
4641 | error messages. However, if users have managed to select | |
4642 | power9-fusion without selecting power8-fusion, they | |
4643 | already know about undocumented flags. */ | |
4644 | error ("-mpower9-fusion requires -mpower8-fusion"); | |
4645 | rs6000_isa_flags &= ~OPTION_MASK_P9_FUSION; | |
4646 | } | |
4647 | else | |
4648 | rs6000_isa_flags |= OPTION_MASK_P8_FUSION; | |
4649 | } | |
4650 | ||
4651 | /* Enable power9 fusion if we are tuning for power9, even if we aren't | |
4652 | generating power9 instructions. */ | |
4653 | if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_FUSION)) | |
4654 | rs6000_isa_flags |= (processor_target_table[tune_index].target_enable | |
4655 | & OPTION_MASK_P9_FUSION); | |
4656 | ||
4657 | /* Power8 does not fuse sign extended loads with the addis. If we are | |
4658 | optimizing at high levels for speed, convert a sign extended load into a | |
4659 | zero extending load, and an explicit sign extension. */ | |
4660 | if (TARGET_P8_FUSION | |
4661 | && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN) | |
4662 | && optimize_function_for_speed_p (cfun) | |
4663 | && optimize >= 3) | |
4664 | rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN; | |
4665 | ||
4666 | /* TOC fusion requires 64-bit and medium/large code model. */ | |
4667 | if (TARGET_TOC_FUSION && !TARGET_POWERPC64) | |
4668 | { | |
4669 | rs6000_isa_flags &= ~OPTION_MASK_TOC_FUSION; | |
4670 | if ((rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION) != 0) | |
4671 | warning (0, N_("-mtoc-fusion requires 64-bit")); | |
4672 | } | |
4673 | ||
4674 | if (TARGET_TOC_FUSION && (TARGET_CMODEL == CMODEL_SMALL)) | |
4675 | { | |
4676 | rs6000_isa_flags &= ~OPTION_MASK_TOC_FUSION; | |
4677 | if ((rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION) != 0) | |
4678 | warning (0, N_("-mtoc-fusion requires medium/large code model")); | |
4679 | } | |
4680 | ||
4681 | /* Turn on -mtoc-fusion by default if p8-fusion and 64-bit medium/large code | |
4682 | model. */ | |
4683 | if (TARGET_P8_FUSION && !TARGET_TOC_FUSION && TARGET_POWERPC64 | |
4684 | && (TARGET_CMODEL != CMODEL_SMALL) | |
4685 | && !(rs6000_isa_flags_explicit & OPTION_MASK_TOC_FUSION)) | |
4686 | rs6000_isa_flags |= OPTION_MASK_TOC_FUSION; | |
4687 | ||
4688 | /* ISA 3.0 vector instructions include ISA 2.07. */ | |
4689 | if (TARGET_P9_VECTOR && !TARGET_P8_VECTOR) | |
4690 | { | |
4691 | /* We prefer to not mention undocumented options in | |
4692 | error messages. However, if users have managed to select | |
4693 | power9-vector without selecting power8-vector, they | |
4694 | already know about undocumented flags. */ | |
4695 | if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) && | |
4696 | (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)) | |
4697 | error ("-mpower9-vector requires -mpower8-vector"); | |
4698 | else if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) == 0) | |
4699 | { | |
4700 | rs6000_isa_flags &= ~OPTION_MASK_P9_VECTOR; | |
4701 | if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR) | |
4702 | rs6000_isa_flags_explicit |= OPTION_MASK_P9_VECTOR; | |
4703 | } | |
4704 | else | |
4705 | { | |
4706 | /* OPTION_MASK_P9_VECTOR is explicit and | |
4707 | OPTION_MASK_P8_VECTOR is not explicit. */ | |
4708 | rs6000_isa_flags |= OPTION_MASK_P8_VECTOR; | |
4709 | rs6000_isa_flags_explicit |= OPTION_MASK_P8_VECTOR; | |
4710 | } | |
4711 | } | |
4712 | ||
4713 | /* -mpower9-dform turns on both -mpower9-dform-scalar and | |
4714 | -mpower9-dform-vector. */ | |
4715 | if (TARGET_P9_DFORM_BOTH > 0) | |
4716 | { | |
4717 | if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_VECTOR)) | |
4718 | rs6000_isa_flags |= OPTION_MASK_P9_DFORM_VECTOR; | |
4719 | ||
4720 | if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_SCALAR)) | |
4721 | rs6000_isa_flags |= OPTION_MASK_P9_DFORM_SCALAR; | |
4722 | } | |
4723 | else if (TARGET_P9_DFORM_BOTH == 0) | |
4724 | { | |
4725 | if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_VECTOR)) | |
4726 | rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_VECTOR; | |
4727 | ||
4728 | if (!(rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_SCALAR)) | |
4729 | rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_SCALAR; | |
4730 | } | |
4731 | ||
4732 | /* ISA 3.0 D-form instructions require p9-vector and upper-regs. */ | |
4733 | if ((TARGET_P9_DFORM_SCALAR || TARGET_P9_DFORM_VECTOR) && !TARGET_P9_VECTOR) | |
4734 | { | |
4735 | /* We prefer to not mention undocumented options in | |
4736 | error messages. However, if users have managed to select | |
4737 | power9-dform without selecting power9-vector, they | |
4738 | already know about undocumented flags. */ | |
4739 | if ((rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) | |
4740 | && (rs6000_isa_flags_explicit & (OPTION_MASK_P9_DFORM_SCALAR | |
4741 | | OPTION_MASK_P9_DFORM_VECTOR))) | |
4742 | error ("-mpower9-dform requires -mpower9-vector"); | |
4743 | else if (rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) | |
4744 | { | |
4745 | rs6000_isa_flags &= | |
4746 | ~(OPTION_MASK_P9_DFORM_SCALAR | OPTION_MASK_P9_DFORM_VECTOR); | |
4747 | rs6000_isa_flags_explicit |= | |
4748 | (OPTION_MASK_P9_DFORM_SCALAR | OPTION_MASK_P9_DFORM_VECTOR); | |
4749 | } | |
4750 | else | |
4751 | { | |
4752 | /* We know that OPTION_MASK_P9_VECTOR is not explicit and | |
4753 | OPTION_MASK_P9_DFORM_SCALAR or OPTION_MASK_P9_DORM_VECTOR | |
4754 | may be explicit. */ | |
4755 | rs6000_isa_flags |= OPTION_MASK_P9_VECTOR; | |
4756 | rs6000_isa_flags_explicit |= OPTION_MASK_P9_VECTOR; | |
4757 | } | |
4758 | } | |
4759 | ||
4760 | if ((TARGET_P9_DFORM_SCALAR || TARGET_P9_DFORM_VECTOR) | |
4761 | && !TARGET_DIRECT_MOVE) | |
4762 | { | |
4763 | /* We prefer to not mention undocumented options in | |
4764 | error messages. However, if users have managed to select | |
4765 | power9-dform without selecting direct-move, they | |
4766 | already know about undocumented flags. */ | |
4767 | if ((rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE) | |
4768 | && ((rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_VECTOR) || | |
4769 | (rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_SCALAR) || | |
4770 | (TARGET_P9_DFORM_BOTH == 1))) | |
4771 | error ("-mpower9-dform, -mpower9-dform-vector, -mpower9-dform-scalar" | |
4772 | " require -mdirect-move"); | |
4773 | else if ((rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE) == 0) | |
4774 | { | |
4775 | rs6000_isa_flags |= OPTION_MASK_DIRECT_MOVE; | |
4776 | rs6000_isa_flags_explicit |= OPTION_MASK_DIRECT_MOVE; | |
4777 | } | |
4778 | else | |
4779 | { | |
4780 | rs6000_isa_flags &= | |
4781 | ~(OPTION_MASK_P9_DFORM_SCALAR | OPTION_MASK_P9_DFORM_VECTOR); | |
4782 | rs6000_isa_flags_explicit |= | |
4783 | (OPTION_MASK_P9_DFORM_SCALAR | OPTION_MASK_P9_DFORM_VECTOR); | |
4784 | } | |
4785 | } | |
4786 | ||
4787 | if (TARGET_P9_DFORM_SCALAR && !TARGET_UPPER_REGS_DF) | |
4788 | { | |
4789 | /* We prefer to not mention undocumented options in | |
4790 | error messages. However, if users have managed to select | |
4791 | power9-dform without selecting upper-regs-df, they | |
4792 | already know about undocumented flags. */ | |
4793 | if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF) | |
4794 | error ("-mpower9-dform requires -mupper-regs-df"); | |
4795 | rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_SCALAR; | |
4796 | } | |
4797 | ||
4798 | if (TARGET_P9_DFORM_SCALAR && !TARGET_UPPER_REGS_SF) | |
4799 | { | |
4800 | if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF) | |
4801 | error ("-mpower9-dform requires -mupper-regs-sf"); | |
4802 | rs6000_isa_flags &= ~OPTION_MASK_P9_DFORM_SCALAR; | |
4803 | } | |
4804 | ||
4805 | /* Enable LRA by default. */ | |
4806 | if ((rs6000_isa_flags_explicit & OPTION_MASK_LRA) == 0) | |
4807 | rs6000_isa_flags |= OPTION_MASK_LRA; | |
4808 | ||
4809 | /* There have been bugs with -mvsx-timode that don't show up with -mlra, | |
4810 | but do show up with -mno-lra. Given -mlra will become the default once | |
4811 | PR 69847 is fixed, turn off the options with problems by default if | |
4812 | -mno-lra was used, and warn if the user explicitly asked for the option. | |
4813 | ||
4814 | Enable -mpower9-dform-vector by default if LRA and other power9 options. | |
4815 | Enable -mvsx-timode by default if LRA and VSX. */ | |
4816 | if (!TARGET_LRA) | |
4817 | { | |
4818 | if (TARGET_VSX_TIMODE) | |
4819 | { | |
4820 | if ((rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE) != 0) | |
4821 | warning (0, "-mvsx-timode might need -mlra"); | |
4822 | ||
4823 | else | |
4824 | rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE; | |
4825 | } | |
4826 | } | |
4827 | ||
4828 | else | |
4829 | { | |
4830 | if (TARGET_VSX && !TARGET_VSX_TIMODE | |
4831 | && (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE) == 0) | |
4832 | rs6000_isa_flags |= OPTION_MASK_VSX_TIMODE; | |
4833 | } | |
4834 | ||
4835 | /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07 | |
4836 | support. If we only have ISA 2.06 support, and the user did not specify | |
4837 | the switch, leave it set to -1 so the movmisalign patterns are enabled, | |
4838 | but we don't enable the full vectorization support */ | |
4839 | if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE) | |
4840 | TARGET_ALLOW_MOVMISALIGN = 1; | |
4841 | ||
4842 | else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX) | |
4843 | { | |
4844 | if (TARGET_ALLOW_MOVMISALIGN > 0 | |
4845 | && global_options_set.x_TARGET_ALLOW_MOVMISALIGN) | |
4846 | error ("-mallow-movmisalign requires -mvsx"); | |
4847 | ||
4848 | TARGET_ALLOW_MOVMISALIGN = 0; | |
4849 | } | |
4850 | ||
4851 | /* Determine when unaligned vector accesses are permitted, and when | |
4852 | they are preferred over masked Altivec loads. Note that if | |
4853 | TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then | |
4854 | TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is | |
4855 | not true. */ | |
4856 | if (TARGET_EFFICIENT_UNALIGNED_VSX) | |
4857 | { | |
4858 | if (!TARGET_VSX) | |
4859 | { | |
4860 | if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX) | |
4861 | error ("-mefficient-unaligned-vsx requires -mvsx"); | |
4862 | ||
4863 | rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX; | |
4864 | } | |
4865 | ||
4866 | else if (!TARGET_ALLOW_MOVMISALIGN) | |
4867 | { | |
4868 | if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX) | |
4869 | error ("-mefficient-unaligned-vsx requires -mallow-movmisalign"); | |
4870 | ||
4871 | rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX; | |
4872 | } | |
4873 | } | |
4874 | ||
4875 | /* Check whether we should allow small integers into VSX registers. We | |
4876 | require direct move to prevent the register allocator from having to move | |
4877 | variables through memory to do moves. SImode can be used on ISA 2.07, | |
4878 | while HImode and QImode require ISA 3.0. */ | |
4879 | if (TARGET_VSX_SMALL_INTEGER | |
4880 | && (!TARGET_DIRECT_MOVE || !TARGET_P8_VECTOR || !TARGET_UPPER_REGS_DI)) | |
4881 | { | |
4882 | if (rs6000_isa_flags_explicit & OPTION_MASK_VSX_SMALL_INTEGER) | |
4883 | error ("-mvsx-small-integer requires -mpower8-vector, " | |
4884 | "-mupper-regs-di, and -mdirect-move"); | |
4885 | ||
4886 | rs6000_isa_flags &= ~OPTION_MASK_VSX_SMALL_INTEGER; | |
4887 | } | |
4888 | ||
4889 | /* Set long double size before the IEEE 128-bit tests. */ | |
4890 | if (!global_options_set.x_rs6000_long_double_type_size) | |
4891 | { | |
4892 | if (main_target_opt != NULL | |
4893 | && (main_target_opt->x_rs6000_long_double_type_size | |
4894 | != RS6000_DEFAULT_LONG_DOUBLE_SIZE)) | |
4895 | error ("target attribute or pragma changes long double size"); | |
4896 | else | |
4897 | rs6000_long_double_type_size = RS6000_DEFAULT_LONG_DOUBLE_SIZE; | |
4898 | } | |
4899 | ||
4900 | /* Set -mabi=ieeelongdouble on some old targets. Note, AIX and Darwin | |
4901 | explicitly redefine TARGET_IEEEQUAD to 0, so those systems will not | |
4902 | pick up this default. */ | |
4903 | #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD) | |
4904 | if (!global_options_set.x_rs6000_ieeequad) | |
4905 | rs6000_ieeequad = 1; | |
4906 | #endif | |
4907 | ||
4908 | /* Enable the default support for IEEE 128-bit floating point on Linux VSX | |
4909 | sytems, but don't enable the __float128 keyword. */ | |
4910 | if (TARGET_VSX && TARGET_LONG_DOUBLE_128 | |
4911 | && (TARGET_FLOAT128_ENABLE_TYPE || TARGET_IEEEQUAD) | |
4912 | && ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_TYPE) == 0)) | |
4913 | rs6000_isa_flags |= OPTION_MASK_FLOAT128_TYPE; | |
4914 | ||
4915 | /* IEEE 128-bit floating point requires VSX support. */ | |
4916 | if (!TARGET_VSX) | |
4917 | { | |
4918 | if (TARGET_FLOAT128_KEYWORD) | |
4919 | { | |
4920 | if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0) | |
4921 | error ("-mfloat128 requires VSX support"); | |
4922 | ||
4923 | rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_TYPE | |
4924 | | OPTION_MASK_FLOAT128_KEYWORD | |
4925 | | OPTION_MASK_FLOAT128_HW); | |
4926 | } | |
4927 | ||
4928 | else if (TARGET_FLOAT128_TYPE) | |
4929 | { | |
4930 | if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_TYPE) != 0) | |
4931 | error ("-mfloat128-type requires VSX support"); | |
4932 | ||
4933 | rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_TYPE | |
4934 | | OPTION_MASK_FLOAT128_KEYWORD | |
4935 | | OPTION_MASK_FLOAT128_HW); | |
4936 | } | |
4937 | } | |
4938 | ||
4939 | /* -mfloat128 and -mfloat128-hardware internally require the underlying IEEE | |
4940 | 128-bit floating point support to be enabled. */ | |
4941 | if (!TARGET_FLOAT128_TYPE) | |
4942 | { | |
4943 | if (TARGET_FLOAT128_KEYWORD) | |
4944 | { | |
4945 | if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) != 0) | |
4946 | { | |
4947 | error ("-mfloat128 requires -mfloat128-type"); | |
4948 | rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_TYPE | |
4949 | | OPTION_MASK_FLOAT128_KEYWORD | |
4950 | | OPTION_MASK_FLOAT128_HW); | |
4951 | } | |
4952 | else | |
4953 | rs6000_isa_flags |= OPTION_MASK_FLOAT128_TYPE; | |
4954 | } | |
4955 | ||
4956 | if (TARGET_FLOAT128_HW) | |
4957 | { | |
4958 | if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0) | |
4959 | { | |
4960 | error ("-mfloat128-hardware requires -mfloat128-type"); | |
4961 | rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW; | |
4962 | } | |
4963 | else | |
4964 | rs6000_isa_flags &= ~(OPTION_MASK_FLOAT128_TYPE | |
4965 | | OPTION_MASK_FLOAT128_KEYWORD | |
4966 | | OPTION_MASK_FLOAT128_HW); | |
4967 | } | |
4968 | } | |
4969 | ||
4970 | /* If we have -mfloat128-type and full ISA 3.0 support, enable | |
4971 | -mfloat128-hardware by default. However, don't enable the __float128 | |
4972 | keyword. If the user explicitly turned on -mfloat128-hardware, enable the | |
4973 | -mfloat128 option as well if it was not already set. */ | |
4974 | if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_HW | |
4975 | && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) == ISA_3_0_MASKS_IEEE | |
4976 | && !(rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW)) | |
4977 | rs6000_isa_flags |= OPTION_MASK_FLOAT128_HW; | |
4978 | ||
4979 | if (TARGET_FLOAT128_HW | |
4980 | && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) != ISA_3_0_MASKS_IEEE) | |
4981 | { | |
4982 | if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0) | |
4983 | error ("-mfloat128-hardware requires full ISA 3.0 support"); | |
4984 | ||
4985 | rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW; | |
4986 | } | |
4987 | ||
4988 | if (TARGET_FLOAT128_HW && !TARGET_64BIT) | |
4989 | { | |
4990 | if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0) | |
4991 | error ("-mfloat128-hardware requires -m64"); | |
4992 | ||
4993 | rs6000_isa_flags &= ~OPTION_MASK_FLOAT128_HW; | |
4994 | } | |
4995 | ||
4996 | if (TARGET_FLOAT128_HW && !TARGET_FLOAT128_KEYWORD | |
4997 | && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0 | |
4998 | && (rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_KEYWORD) == 0) | |
4999 | rs6000_isa_flags |= OPTION_MASK_FLOAT128_KEYWORD; | |
5000 | ||
5001 | /* Print the options after updating the defaults. */ | |
5002 | if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) | |
5003 | rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags); | |
5004 | ||
5005 | /* E500mc does "better" if we inline more aggressively. Respect the | |
5006 | user's opinion, though. */ | |
5007 | if (rs6000_block_move_inline_limit == 0 | |
5008 | && (rs6000_cpu == PROCESSOR_PPCE500MC | |
5009 | || rs6000_cpu == PROCESSOR_PPCE500MC64 | |
5010 | || rs6000_cpu == PROCESSOR_PPCE5500 | |
5011 | || rs6000_cpu == PROCESSOR_PPCE6500)) | |
5012 | rs6000_block_move_inline_limit = 128; | |
5013 | ||
5014 | /* store_one_arg depends on expand_block_move to handle at least the | |
5015 | size of reg_parm_stack_space. */ | |
5016 | if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32)) | |
5017 | rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32); | |
5018 | ||
5019 | if (global_init_p) | |
5020 | { | |
5021 | /* If the appropriate debug option is enabled, replace the target hooks | |
5022 | with debug versions that call the real version and then prints | |
5023 | debugging information. */ | |
5024 | if (TARGET_DEBUG_COST) | |
5025 | { | |
5026 | targetm.rtx_costs = rs6000_debug_rtx_costs; | |
5027 | targetm.address_cost = rs6000_debug_address_cost; | |
5028 | targetm.sched.adjust_cost = rs6000_debug_adjust_cost; | |
5029 | } | |
5030 | ||
5031 | if (TARGET_DEBUG_ADDR) | |
5032 | { | |
5033 | targetm.legitimate_address_p = rs6000_debug_legitimate_address_p; | |
5034 | targetm.legitimize_address = rs6000_debug_legitimize_address; | |
5035 | rs6000_secondary_reload_class_ptr | |
5036 | = rs6000_debug_secondary_reload_class; | |
f15643d4 | 5037 | targetm.secondary_memory_needed |
83349046 | 5038 | = rs6000_debug_secondary_memory_needed; |
0d803030 RS |
5039 | targetm.can_change_mode_class |
5040 | = rs6000_debug_can_change_mode_class; | |
83349046 SB |
5041 | rs6000_preferred_reload_class_ptr |
5042 | = rs6000_debug_preferred_reload_class; | |
5043 | rs6000_legitimize_reload_address_ptr | |
5044 | = rs6000_debug_legitimize_reload_address; | |
5045 | rs6000_mode_dependent_address_ptr | |
5046 | = rs6000_debug_mode_dependent_address; | |
5047 | } | |
5048 | ||
5049 | if (rs6000_veclibabi_name) | |
5050 | { | |
5051 | if (strcmp (rs6000_veclibabi_name, "mass") == 0) | |
5052 | rs6000_veclib_handler = rs6000_builtin_vectorized_libmass; | |
5053 | else | |
5054 | { | |
5055 | error ("unknown vectorization library ABI type (%s) for " | |
5056 | "-mveclibabi= switch", rs6000_veclibabi_name); | |
5057 | ret = false; | |
5058 | } | |
5059 | } | |
5060 | } | |
5061 | ||
5062 | /* Disable VSX and Altivec silently if the user switched cpus to power7 in a | |
5063 | target attribute or pragma which automatically enables both options, | |
5064 | unless the altivec ABI was set. This is set by default for 64-bit, but | |
5065 | not for 32-bit. */ | |
5066 | if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi) | |
5067 | rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC | |
5068 | | OPTION_MASK_FLOAT128_TYPE | |
5069 | | OPTION_MASK_FLOAT128_KEYWORD) | |
5070 | & ~rs6000_isa_flags_explicit); | |
5071 | ||
5072 | /* Enable Altivec ABI for AIX -maltivec. */ | |
5073 | if (TARGET_XCOFF && (TARGET_ALTIVEC || TARGET_VSX)) | |
5074 | { | |
5075 | if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi) | |
5076 | error ("target attribute or pragma changes AltiVec ABI"); | |
5077 | else | |
5078 | rs6000_altivec_abi = 1; | |
5079 | } | |
5080 | ||
5081 | /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For | |
5082 | PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can | |
5083 | be explicitly overridden in either case. */ | |
5084 | if (TARGET_ELF) | |
5085 | { | |
5086 | if (!global_options_set.x_rs6000_altivec_abi | |
5087 | && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX)) | |
5088 | { | |
5089 | if (main_target_opt != NULL && | |
5090 | !main_target_opt->x_rs6000_altivec_abi) | |
5091 | error ("target attribute or pragma changes AltiVec ABI"); | |
5092 | else | |
5093 | rs6000_altivec_abi = 1; | |
5094 | } | |
5095 | } | |
5096 | ||
5097 | /* Set the Darwin64 ABI as default for 64-bit Darwin. | |
5098 | So far, the only darwin64 targets are also MACH-O. */ | |
5099 | if (TARGET_MACHO | |
5100 | && DEFAULT_ABI == ABI_DARWIN | |
5101 | && TARGET_64BIT) | |
5102 | { | |
5103 | if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi) | |
5104 | error ("target attribute or pragma changes darwin64 ABI"); | |
5105 | else | |
5106 | { | |
5107 | rs6000_darwin64_abi = 1; | |
5108 | /* Default to natural alignment, for better performance. */ | |
5109 | rs6000_alignment_flags = MASK_ALIGN_NATURAL; | |
5110 | } | |
5111 | } | |
5112 | ||
5113 | /* Place FP constants in the constant pool instead of TOC | |
5114 | if section anchors enabled. */ | |
5115 | if (flag_section_anchors | |
5116 | && !global_options_set.x_TARGET_NO_FP_IN_TOC) | |
5117 | TARGET_NO_FP_IN_TOC = 1; | |
5118 | ||
5119 | if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) | |
5120 | rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags); | |
5121 | ||
5122 | #ifdef SUBTARGET_OVERRIDE_OPTIONS | |
5123 | SUBTARGET_OVERRIDE_OPTIONS; | |
5124 | #endif | |
5125 | #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS | |
5126 | SUBSUBTARGET_OVERRIDE_OPTIONS; | |
5127 | #endif | |
5128 | #ifdef SUB3TARGET_OVERRIDE_OPTIONS | |
5129 | SUB3TARGET_OVERRIDE_OPTIONS; | |
5130 | #endif | |
5131 | ||
5132 | if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) | |
5133 | rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags); | |
5134 | ||
5135 | /* For the E500 family of cores, reset the single/double FP flags to let us | |
5136 | check that they remain constant across attributes or pragmas. Also, | |
5137 | clear a possible request for string instructions, not supported and which | |
5138 | we might have silently queried above for -Os. | |
5139 | ||
5140 | For other families, clear ISEL in case it was set implicitly. | |
5141 | */ | |
5142 | ||
5143 | switch (rs6000_cpu) | |
5144 | { | |
5145 | case PROCESSOR_PPC8540: | |
5146 | case PROCESSOR_PPC8548: | |
5147 | case PROCESSOR_PPCE500MC: | |
5148 | case PROCESSOR_PPCE500MC64: | |
5149 | case PROCESSOR_PPCE5500: | |
5150 | case PROCESSOR_PPCE6500: | |
5151 | ||
5152 | rs6000_single_float = TARGET_E500_SINGLE || TARGET_E500_DOUBLE; | |
5153 | rs6000_double_float = TARGET_E500_DOUBLE; | |
5154 | ||
5155 | rs6000_isa_flags &= ~OPTION_MASK_STRING; | |
5156 | ||
5157 | break; | |
5158 | ||
5159 | default: | |
5160 | ||
5161 | if (have_cpu && !(rs6000_isa_flags_explicit & OPTION_MASK_ISEL)) | |
5162 | rs6000_isa_flags &= ~OPTION_MASK_ISEL; | |
5163 | ||
5164 | break; | |
5165 | } | |
5166 | ||
5167 | if (main_target_opt) | |
5168 | { | |
5169 | if (main_target_opt->x_rs6000_single_float != rs6000_single_float) | |
5170 | error ("target attribute or pragma changes single precision floating " | |
5171 | "point"); | |
5172 | if (main_target_opt->x_rs6000_double_float != rs6000_double_float) | |
5173 | error ("target attribute or pragma changes double precision floating " | |
5174 | "point"); | |
5175 | } | |
5176 | ||
5177 | /* Detect invalid option combinations with E500. */ | |
5178 | CHECK_E500_OPTIONS; | |
5179 | ||
5180 | rs6000_always_hint = (rs6000_cpu != PROCESSOR_POWER4 | |
5181 | && rs6000_cpu != PROCESSOR_POWER5 | |
5182 | && rs6000_cpu != PROCESSOR_POWER6 | |
5183 | && rs6000_cpu != PROCESSOR_POWER7 | |
5184 | && rs6000_cpu != PROCESSOR_POWER8 | |
5185 | && rs6000_cpu != PROCESSOR_POWER9 | |
5186 | && rs6000_cpu != PROCESSOR_PPCA2 | |
5187 | && rs6000_cpu != PROCESSOR_CELL | |
5188 | && rs6000_cpu != PROCESSOR_PPC476); | |
5189 | rs6000_sched_groups = (rs6000_cpu == PROCESSOR_POWER4 | |
5190 | || rs6000_cpu == PROCESSOR_POWER5 | |
5191 | || rs6000_cpu == PROCESSOR_POWER7 | |
5192 | || rs6000_cpu == PROCESSOR_POWER8); | |
5193 | rs6000_align_branch_targets = (rs6000_cpu == PROCESSOR_POWER4 | |
5194 | || rs6000_cpu == PROCESSOR_POWER5 | |
5195 | || rs6000_cpu == PROCESSOR_POWER6 | |
5196 | || rs6000_cpu == PROCESSOR_POWER7 | |
5197 | || rs6000_cpu == PROCESSOR_POWER8 | |
5198 | || rs6000_cpu == PROCESSOR_POWER9 | |
5199 | || rs6000_cpu == PROCESSOR_PPCE500MC | |
5200 | || rs6000_cpu == PROCESSOR_PPCE500MC64 | |
5201 | || rs6000_cpu == PROCESSOR_PPCE5500 | |
5202 | || rs6000_cpu == PROCESSOR_PPCE6500); | |
5203 | ||
5204 | /* Allow debug switches to override the above settings. These are set to -1 | |
5205 | in powerpcspe.opt to indicate the user hasn't directly set the switch. */ | |
5206 | if (TARGET_ALWAYS_HINT >= 0) | |
5207 | rs6000_always_hint = TARGET_ALWAYS_HINT; | |
5208 | ||
5209 | if (TARGET_SCHED_GROUPS >= 0) | |
5210 | rs6000_sched_groups = TARGET_SCHED_GROUPS; | |
5211 | ||
5212 | if (TARGET_ALIGN_BRANCH_TARGETS >= 0) | |
5213 | rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS; | |
5214 | ||
5215 | rs6000_sched_restricted_insns_priority | |
5216 | = (rs6000_sched_groups ? 1 : 0); | |
5217 | ||
5218 | /* Handle -msched-costly-dep option. */ | |
5219 | rs6000_sched_costly_dep | |
5220 | = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly); | |
5221 | ||
5222 | if (rs6000_sched_costly_dep_str) | |
5223 | { | |
5224 | if (! strcmp (rs6000_sched_costly_dep_str, "no")) | |
5225 | rs6000_sched_costly_dep = no_dep_costly; | |
5226 | else if (! strcmp (rs6000_sched_costly_dep_str, "all")) | |
5227 | rs6000_sched_costly_dep = all_deps_costly; | |
5228 | else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load")) | |
5229 | rs6000_sched_costly_dep = true_store_to_load_dep_costly; | |
5230 | else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load")) | |
5231 | rs6000_sched_costly_dep = store_to_load_dep_costly; | |
5232 | else | |
5233 | rs6000_sched_costly_dep = ((enum rs6000_dependence_cost) | |
5234 | atoi (rs6000_sched_costly_dep_str)); | |
5235 | } | |
5236 | ||
5237 | /* Handle -minsert-sched-nops option. */ | |
5238 | rs6000_sched_insert_nops | |
5239 | = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none); | |
5240 | ||
5241 | if (rs6000_sched_insert_nops_str) | |
5242 | { | |
5243 | if (! strcmp (rs6000_sched_insert_nops_str, "no")) | |
5244 | rs6000_sched_insert_nops = sched_finish_none; | |
5245 | else if (! strcmp (rs6000_sched_insert_nops_str, "pad")) | |
5246 | rs6000_sched_insert_nops = sched_finish_pad_groups; | |
5247 | else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact")) | |
5248 | rs6000_sched_insert_nops = sched_finish_regroup_exact; | |
5249 | else | |
5250 | rs6000_sched_insert_nops = ((enum rs6000_nop_insertion) | |
5251 | atoi (rs6000_sched_insert_nops_str)); | |
5252 | } | |
5253 | ||
5254 | /* Handle stack protector */ | |
5255 | if (!global_options_set.x_rs6000_stack_protector_guard) | |
5256 | #ifdef TARGET_THREAD_SSP_OFFSET | |
5257 | rs6000_stack_protector_guard = SSP_TLS; | |
5258 | #else | |
5259 | rs6000_stack_protector_guard = SSP_GLOBAL; | |
5260 | #endif | |
5261 | ||
5262 | #ifdef TARGET_THREAD_SSP_OFFSET | |
5263 | rs6000_stack_protector_guard_offset = TARGET_THREAD_SSP_OFFSET; | |
5264 | rs6000_stack_protector_guard_reg = TARGET_64BIT ? 13 : 2; | |
5265 | #endif | |
5266 | ||
5267 | if (global_options_set.x_rs6000_stack_protector_guard_offset_str) | |
5268 | { | |
5269 | char *endp; | |
5270 | const char *str = rs6000_stack_protector_guard_offset_str; | |
5271 | ||
5272 | errno = 0; | |
5273 | long offset = strtol (str, &endp, 0); | |
5274 | if (!*str || *endp || errno) | |
5275 | error ("%qs is not a valid number " | |
5276 | "in -mstack-protector-guard-offset=", str); | |
5277 | ||
5278 | if (!IN_RANGE (offset, -0x8000, 0x7fff) | |
5279 | || (TARGET_64BIT && (offset & 3))) | |
5280 | error ("%qs is not a valid offset " | |
5281 | "in -mstack-protector-guard-offset=", str); | |
5282 | ||
5283 | rs6000_stack_protector_guard_offset = offset; | |
5284 | } | |
5285 | ||
5286 | if (global_options_set.x_rs6000_stack_protector_guard_reg_str) | |
5287 | { | |
5288 | const char *str = rs6000_stack_protector_guard_reg_str; | |
5289 | int reg = decode_reg_name (str); | |
5290 | ||
5291 | if (!IN_RANGE (reg, 1, 31)) | |
5292 | error ("%qs is not a valid base register " | |
5293 | "in -mstack-protector-guard-reg=", str); | |
5294 | ||
5295 | rs6000_stack_protector_guard_reg = reg; | |
5296 | } | |
5297 | ||
5298 | if (rs6000_stack_protector_guard == SSP_TLS | |
5299 | && !IN_RANGE (rs6000_stack_protector_guard_reg, 1, 31)) | |
5300 | error ("-mstack-protector-guard=tls needs a valid base register"); | |
5301 | ||
5302 | if (global_init_p) | |
5303 | { | |
5304 | #ifdef TARGET_REGNAMES | |
5305 | /* If the user desires alternate register names, copy in the | |
5306 | alternate names now. */ | |
5307 | if (TARGET_REGNAMES) | |
5308 | memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names)); | |
5309 | #endif | |
5310 | ||
5311 | /* Set aix_struct_return last, after the ABI is determined. | |
5312 | If -maix-struct-return or -msvr4-struct-return was explicitly | |
5313 | used, don't override with the ABI default. */ | |
5314 | if (!global_options_set.x_aix_struct_return) | |
5315 | aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET); | |
5316 | ||
5317 | #if 0 | |
5318 | /* IBM XL compiler defaults to unsigned bitfields. */ | |
5319 | if (TARGET_XL_COMPAT) | |
5320 | flag_signed_bitfields = 0; | |
5321 | #endif | |
5322 | ||
5323 | if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD) | |
5324 | REAL_MODE_FORMAT (TFmode) = &ibm_extended_format; | |
5325 | ||
5326 | ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1); | |
5327 | ||
5328 | /* We can only guarantee the availability of DI pseudo-ops when | |
5329 | assembling for 64-bit targets. */ | |
5330 | if (!TARGET_64BIT) | |
5331 | { | |
5332 | targetm.asm_out.aligned_op.di = NULL; | |
5333 | targetm.asm_out.unaligned_op.di = NULL; | |
5334 | } | |
5335 | ||
5336 | ||
5337 | /* Set branch target alignment, if not optimizing for size. */ | |
5338 | if (!optimize_size) | |
5339 | { | |
5340 | /* Cell wants to be aligned 8byte for dual issue. Titan wants to be | |
5341 | aligned 8byte to avoid misprediction by the branch predictor. */ | |
5342 | if (rs6000_cpu == PROCESSOR_TITAN | |
5343 | || rs6000_cpu == PROCESSOR_CELL) | |
5344 | { | |
c518c102 ML |
5345 | if (flag_align_functions && !str_align_functions) |
5346 | str_align_functions = "8"; | |
5347 | if (flag_align_jumps && !str_align_jumps) | |
5348 | str_align_jumps = "8"; | |
5349 | if (flag_align_loops && !str_align_loops) | |
5350 | str_align_loops = "8"; | |
83349046 SB |
5351 | } |
5352 | if (rs6000_align_branch_targets) | |
5353 | { | |
c518c102 ML |
5354 | if (flag_align_functions && !str_align_functions) |
5355 | str_align_functions = "16"; | |
5356 | if (flag_align_jumps && !str_align_jumps) | |
5357 | str_align_jumps = "16"; | |
5358 | if (flag_align_loops && !str_align_loops) | |
83349046 SB |
5359 | { |
5360 | can_override_loop_align = 1; | |
c518c102 | 5361 | str_align_loops = "16"; |
83349046 SB |
5362 | } |
5363 | } | |
c518c102 ML |
5364 | |
5365 | if (flag_align_jumps && !str_align_jumps) | |
5366 | str_align_jumps = "16"; | |
5367 | if (flag_align_loops && !str_align_loops) | |
5368 | str_align_loops = "16"; | |
83349046 SB |
5369 | } |
5370 | ||
5371 | /* Arrange to save and restore machine status around nested functions. */ | |
5372 | init_machine_status = rs6000_init_machine_status; | |
5373 | ||
5374 | /* We should always be splitting complex arguments, but we can't break | |
5375 | Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */ | |
5376 | if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN) | |
5377 | targetm.calls.split_complex_arg = NULL; | |
5378 | ||
5379 | /* The AIX and ELFv1 ABIs define standard function descriptors. */ | |
5380 | if (DEFAULT_ABI == ABI_AIX) | |
5381 | targetm.calls.custom_function_descriptors = 0; | |
5382 | } | |
5383 | ||
5384 | /* Initialize rs6000_cost with the appropriate target costs. */ | |
5385 | if (optimize_size) | |
5386 | rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost; | |
5387 | else | |
5388 | switch (rs6000_cpu) | |
5389 | { | |
5390 | case PROCESSOR_RS64A: | |
5391 | rs6000_cost = &rs64a_cost; | |
5392 | break; | |
5393 | ||
5394 | case PROCESSOR_MPCCORE: | |
5395 | rs6000_cost = &mpccore_cost; | |
5396 | break; | |
5397 | ||
5398 | case PROCESSOR_PPC403: | |
5399 | rs6000_cost = &ppc403_cost; | |
5400 | break; | |
5401 | ||
5402 | case PROCESSOR_PPC405: | |
5403 | rs6000_cost = &ppc405_cost; | |
5404 | break; | |
5405 | ||
5406 | case PROCESSOR_PPC440: | |
5407 | rs6000_cost = &ppc440_cost; | |
5408 | break; | |
5409 | ||
5410 | case PROCESSOR_PPC476: | |
5411 | rs6000_cost = &ppc476_cost; | |
5412 | break; | |
5413 | ||
5414 | case PROCESSOR_PPC601: | |
5415 | rs6000_cost = &ppc601_cost; | |
5416 | break; | |
5417 | ||
5418 | case PROCESSOR_PPC603: | |
5419 | rs6000_cost = &ppc603_cost; | |
5420 | break; | |
5421 | ||
5422 | case PROCESSOR_PPC604: | |
5423 | rs6000_cost = &ppc604_cost; | |
5424 | break; | |
5425 | ||
5426 | case PROCESSOR_PPC604e: | |
5427 | rs6000_cost = &ppc604e_cost; | |
5428 | break; | |
5429 | ||
5430 | case PROCESSOR_PPC620: | |
5431 | rs6000_cost = &ppc620_cost; | |
5432 | break; | |
5433 | ||
5434 | case PROCESSOR_PPC630: | |
5435 | rs6000_cost = &ppc630_cost; | |
5436 | break; | |
5437 | ||
5438 | case PROCESSOR_CELL: | |
5439 | rs6000_cost = &ppccell_cost; | |
5440 | break; | |
5441 | ||
5442 | case PROCESSOR_PPC750: | |
5443 | case PROCESSOR_PPC7400: | |
5444 | rs6000_cost = &ppc750_cost; | |
5445 | break; | |
5446 | ||
5447 | case PROCESSOR_PPC7450: | |
5448 | rs6000_cost = &ppc7450_cost; | |
5449 | break; | |
5450 | ||
5451 | case PROCESSOR_PPC8540: | |
5452 | case PROCESSOR_PPC8548: | |
5453 | rs6000_cost = &ppc8540_cost; | |
5454 | break; | |
5455 | ||
5456 | case PROCESSOR_PPCE300C2: | |
5457 | case PROCESSOR_PPCE300C3: | |
5458 | rs6000_cost = &ppce300c2c3_cost; | |
5459 | break; | |
5460 | ||
5461 | case PROCESSOR_PPCE500MC: | |
5462 | rs6000_cost = &ppce500mc_cost; | |
5463 | break; | |
5464 | ||
5465 | case PROCESSOR_PPCE500MC64: | |
5466 | rs6000_cost = &ppce500mc64_cost; | |
5467 | break; | |
5468 | ||
5469 | case PROCESSOR_PPCE5500: | |
5470 | rs6000_cost = &ppce5500_cost; | |
5471 | break; | |
5472 | ||
5473 | case PROCESSOR_PPCE6500: | |
5474 | rs6000_cost = &ppce6500_cost; | |
5475 | break; | |
5476 | ||
5477 | case PROCESSOR_TITAN: | |
5478 | rs6000_cost = &titan_cost; | |
5479 | break; | |
5480 | ||
5481 | case PROCESSOR_POWER4: | |
5482 | case PROCESSOR_POWER5: | |
5483 | rs6000_cost = &power4_cost; | |
5484 | break; | |
5485 | ||
5486 | case PROCESSOR_POWER6: | |
5487 | rs6000_cost = &power6_cost; | |
5488 | break; | |
5489 | ||
5490 | case PROCESSOR_POWER7: | |
5491 | rs6000_cost = &power7_cost; | |
5492 | break; | |
5493 | ||
5494 | case PROCESSOR_POWER8: | |
5495 | rs6000_cost = &power8_cost; | |
5496 | break; | |
5497 | ||
5498 | case PROCESSOR_POWER9: | |
5499 | rs6000_cost = &power9_cost; | |
5500 | break; | |
5501 | ||
5502 | case PROCESSOR_PPCA2: | |
5503 | rs6000_cost = &ppca2_cost; | |
5504 | break; | |
5505 | ||
5506 | default: | |
5507 | gcc_unreachable (); | |
5508 | } | |
5509 | ||
5510 | if (global_init_p) | |
5511 | { | |
5512 | maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES, | |
5513 | rs6000_cost->simultaneous_prefetches, | |
5514 | global_options.x_param_values, | |
5515 | global_options_set.x_param_values); | |
5516 | maybe_set_param_value (PARAM_L1_CACHE_SIZE, rs6000_cost->l1_cache_size, | |
5517 | global_options.x_param_values, | |
5518 | global_options_set.x_param_values); | |
5519 | maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, | |
5520 | rs6000_cost->cache_line_size, | |
5521 | global_options.x_param_values, | |
5522 | global_options_set.x_param_values); | |
5523 | maybe_set_param_value (PARAM_L2_CACHE_SIZE, rs6000_cost->l2_cache_size, | |
5524 | global_options.x_param_values, | |
5525 | global_options_set.x_param_values); | |
5526 | ||
5527 | /* Increase loop peeling limits based on performance analysis. */ | |
5528 | maybe_set_param_value (PARAM_MAX_PEELED_INSNS, 400, | |
5529 | global_options.x_param_values, | |
5530 | global_options_set.x_param_values); | |
5531 | maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 400, | |
5532 | global_options.x_param_values, | |
5533 | global_options_set.x_param_values); | |
5534 | ||
5535 | /* Use the 'model' -fsched-pressure algorithm by default. */ | |
5536 | maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, | |
5537 | SCHED_PRESSURE_MODEL, | |
5538 | global_options.x_param_values, | |
5539 | global_options_set.x_param_values); | |
5540 | ||
5541 | /* If using typedef char *va_list, signal that | |
5542 | __builtin_va_start (&ap, 0) can be optimized to | |
5543 | ap = __builtin_next_arg (0). */ | |
5544 | if (DEFAULT_ABI != ABI_V4) | |
5545 | targetm.expand_builtin_va_start = NULL; | |
5546 | } | |
5547 | ||
5548 | /* Set up single/double float flags. | |
5549 | If TARGET_HARD_FLOAT is set, but neither single or double is set, | |
5550 | then set both flags. */ | |
5551 | if (TARGET_HARD_FLOAT && TARGET_FPRS | |
5552 | && rs6000_single_float == 0 && rs6000_double_float == 0) | |
5553 | rs6000_single_float = rs6000_double_float = 1; | |
5554 | ||
5555 | /* If not explicitly specified via option, decide whether to generate indexed | |
5556 | load/store instructions. A value of -1 indicates that the | |
5557 | initial value of this variable has not been overwritten. During | |
5558 | compilation, TARGET_AVOID_XFORM is either 0 or 1. */ | |
5559 | if (TARGET_AVOID_XFORM == -1) | |
5560 | /* Avoid indexed addressing when targeting Power6 in order to avoid the | |
5561 | DERAT mispredict penalty. However the LVE and STVE altivec instructions | |
5562 | need indexed accesses and the type used is the scalar type of the element | |
5563 | being loaded or stored. */ | |
5564 | TARGET_AVOID_XFORM = (rs6000_cpu == PROCESSOR_POWER6 && TARGET_CMPB | |
5565 | && !TARGET_ALTIVEC); | |
5566 | ||
5567 | /* Set the -mrecip options. */ | |
5568 | if (rs6000_recip_name) | |
5569 | { | |
5570 | char *p = ASTRDUP (rs6000_recip_name); | |
5571 | char *q; | |
5572 | unsigned int mask, i; | |
5573 | bool invert; | |
5574 | ||
5575 | while ((q = strtok (p, ",")) != NULL) | |
5576 | { | |
5577 | p = NULL; | |
5578 | if (*q == '!') | |
5579 | { | |
5580 | invert = true; | |
5581 | q++; | |
5582 | } | |
5583 | else | |
5584 | invert = false; | |
5585 | ||
5586 | if (!strcmp (q, "default")) | |
5587 | mask = ((TARGET_RECIP_PRECISION) | |
5588 | ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION); | |
5589 | else | |
5590 | { | |
5591 | for (i = 0; i < ARRAY_SIZE (recip_options); i++) | |
5592 | if (!strcmp (q, recip_options[i].string)) | |
5593 | { | |
5594 | mask = recip_options[i].mask; | |
5595 | break; | |
5596 | } | |
5597 | ||
5598 | if (i == ARRAY_SIZE (recip_options)) | |
5599 | { | |
5600 | error ("unknown option for -mrecip=%s", q); | |
5601 | invert = false; | |
5602 | mask = 0; | |
5603 | ret = false; | |
5604 | } | |
5605 | } | |
5606 | ||
5607 | if (invert) | |
5608 | rs6000_recip_control &= ~mask; | |
5609 | else | |
5610 | rs6000_recip_control |= mask; | |
5611 | } | |
5612 | } | |
5613 | ||
5614 | /* Set the builtin mask of the various options used that could affect which | |
5615 | builtins were used. In the past we used target_flags, but we've run out | |
5616 | of bits, and some options like SPE and PAIRED are no longer in | |
5617 | target_flags. */ | |
5618 | rs6000_builtin_mask = rs6000_builtin_mask_calculate (); | |
5619 | if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET) | |
5620 | rs6000_print_builtin_options (stderr, 0, "builtin mask", | |
5621 | rs6000_builtin_mask); | |
5622 | ||
5623 | /* Initialize all of the registers. */ | |
5624 | rs6000_init_hard_regno_mode_ok (global_init_p); | |
5625 | ||
5626 | /* Save the initial options in case the user does function specific options */ | |
5627 | if (global_init_p) | |
5628 | target_option_default_node = target_option_current_node | |
5629 | = build_target_option_node (&global_options); | |
5630 | ||
5631 | /* If not explicitly specified via option, decide whether to generate the | |
5632 | extra blr's required to preserve the link stack on some cpus (eg, 476). */ | |
5633 | if (TARGET_LINK_STACK == -1) | |
5634 | SET_TARGET_LINK_STACK (rs6000_cpu == PROCESSOR_PPC476 && flag_pic); | |
5635 | ||
5636 | return ret; | |
5637 | } | |
5638 | ||
5639 | /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to | |
5640 | define the target cpu type. */ | |
5641 | ||
5642 | static void | |
5643 | rs6000_option_override (void) | |
5644 | { | |
5645 | (void) rs6000_option_override_internal (true); | |
5646 | } | |
5647 | ||
5648 | \f | |
5649 | /* Implement targetm.vectorize.builtin_mask_for_load. */ | |
5650 | static tree | |
5651 | rs6000_builtin_mask_for_load (void) | |
5652 | { | |
5653 | /* Don't use lvsl/vperm for P8 and similarly efficient machines. */ | |
5654 | if ((TARGET_ALTIVEC && !TARGET_VSX) | |
5655 | || (TARGET_VSX && !TARGET_EFFICIENT_UNALIGNED_VSX)) | |
5656 | return altivec_builtin_mask_for_load; | |
5657 | else | |
5658 | return 0; | |
5659 | } | |
5660 | ||
5661 | /* Implement LOOP_ALIGN. */ | |
e6de5335 | 5662 | align_flags |
83349046 SB |
5663 | rs6000_loop_align (rtx label) |
5664 | { | |
5665 | basic_block bb; | |
5666 | int ninsns; | |
5667 | ||
5668 | /* Don't override loop alignment if -falign-loops was specified. */ | |
5669 | if (!can_override_loop_align) | |
e6de5335 | 5670 | return align_loops; |
83349046 SB |
5671 | |
5672 | bb = BLOCK_FOR_INSN (label); | |
5673 | ninsns = num_loop_insns(bb->loop_father); | |
5674 | ||
5675 | /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */ | |
5676 | if (ninsns > 4 && ninsns <= 8 | |
5677 | && (rs6000_cpu == PROCESSOR_POWER4 | |
5678 | || rs6000_cpu == PROCESSOR_POWER5 | |
5679 | || rs6000_cpu == PROCESSOR_POWER6 | |
5680 | || rs6000_cpu == PROCESSOR_POWER7 | |
5681 | || rs6000_cpu == PROCESSOR_POWER8 | |
5682 | || rs6000_cpu == PROCESSOR_POWER9)) | |
e6de5335 | 5683 | return align_flags (5); |
83349046 | 5684 | else |
e6de5335 | 5685 | return align_loops; |
83349046 SB |
5686 | } |
5687 | ||
5688 | /* Return true iff, data reference of TYPE can reach vector alignment (16) | |
5689 | after applying N number of iterations. This routine does not determine | |
5690 | how may iterations are required to reach desired alignment. */ | |
5691 | ||
5692 | static bool | |
5693 | rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed) | |
5694 | { | |
5695 | if (is_packed) | |
5696 | return false; | |
5697 | ||
5698 | if (TARGET_32BIT) | |
5699 | { | |
5700 | if (rs6000_alignment_flags == MASK_ALIGN_NATURAL) | |
5701 | return true; | |
5702 | ||
5703 | if (rs6000_alignment_flags == MASK_ALIGN_POWER) | |
5704 | return true; | |
5705 | ||
5706 | return false; | |
5707 | } | |
5708 | else | |
5709 | { | |
5710 | if (TARGET_MACHO) | |
5711 | return false; | |
5712 | ||
5713 | /* Assuming that all other types are naturally aligned. CHECKME! */ | |
5714 | return true; | |
5715 | } | |
5716 | } | |
5717 | ||
5718 | /* Return true if the vector misalignment factor is supported by the | |
5719 | target. */ | |
5720 | static bool | |
5721 | rs6000_builtin_support_vector_misalignment (machine_mode mode, | |
5722 | const_tree type, | |
5723 | int misalignment, | |
5724 | bool is_packed) | |
5725 | { | |
5726 | if (TARGET_VSX) | |
5727 | { | |
5728 | if (TARGET_EFFICIENT_UNALIGNED_VSX) | |
5729 | return true; | |
5730 | ||
5731 | /* Return if movmisalign pattern is not supported for this mode. */ | |
5732 | if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing) | |
5733 | return false; | |
5734 | ||
5735 | if (misalignment == -1) | |
5736 | { | |
5737 | /* Misalignment factor is unknown at compile time but we know | |
5738 | it's word aligned. */ | |
5739 | if (rs6000_vector_alignment_reachable (type, is_packed)) | |
5740 | { | |
5741 | int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type)); | |
5742 | ||
5743 | if (element_size == 64 || element_size == 32) | |
5744 | return true; | |
5745 | } | |
5746 | ||
5747 | return false; | |
5748 | } | |
5749 | ||
5750 | /* VSX supports word-aligned vector. */ | |
5751 | if (misalignment % 4 == 0) | |
5752 | return true; | |
5753 | } | |
5754 | return false; | |
5755 | } | |
5756 | ||
5757 | /* Implement targetm.vectorize.builtin_vectorization_cost. */ | |
5758 | static int | |
5759 | rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, | |
5760 | tree vectype, int misalign) | |
5761 | { | |
5762 | unsigned elements; | |
5763 | tree elem_type; | |
5764 | ||
5765 | switch (type_of_cost) | |
5766 | { | |
5767 | case scalar_stmt: | |
5768 | case scalar_load: | |
5769 | case scalar_store: | |
5770 | case vector_stmt: | |
5771 | case vector_load: | |
5772 | case vector_store: | |
5773 | case vec_to_scalar: | |
5774 | case scalar_to_vec: | |
5775 | case cond_branch_not_taken: | |
5776 | return 1; | |
5777 | ||
5778 | case vec_perm: | |
5779 | if (TARGET_VSX) | |
5780 | return 3; | |
5781 | else | |
5782 | return 1; | |
5783 | ||
5784 | case vec_promote_demote: | |
5785 | if (TARGET_VSX) | |
5786 | return 4; | |
5787 | else | |
5788 | return 1; | |
5789 | ||
5790 | case cond_branch_taken: | |
5791 | return 3; | |
5792 | ||
5793 | case unaligned_load: | |
cc9fe6bb | 5794 | case vector_gather_load: |
83349046 SB |
5795 | if (TARGET_P9_VECTOR) |
5796 | return 3; | |
5797 | ||
5798 | if (TARGET_EFFICIENT_UNALIGNED_VSX) | |
5799 | return 1; | |
5800 | ||
5801 | if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN) | |
5802 | { | |
5803 | elements = TYPE_VECTOR_SUBPARTS (vectype); | |
5804 | if (elements == 2) | |
5805 | /* Double word aligned. */ | |
5806 | return 2; | |
5807 | ||
5808 | if (elements == 4) | |
5809 | { | |
5810 | switch (misalign) | |
5811 | { | |
5812 | case 8: | |
5813 | /* Double word aligned. */ | |
5814 | return 2; | |
5815 | ||
5816 | case -1: | |
5817 | /* Unknown misalignment. */ | |
5818 | case 4: | |
5819 | case 12: | |
5820 | /* Word aligned. */ | |
5821 | return 22; | |
5822 | ||
5823 | default: | |
5824 | gcc_unreachable (); | |
5825 | } | |
5826 | } | |
5827 | } | |
5828 | ||
5829 | if (TARGET_ALTIVEC) | |
5830 | /* Misaligned loads are not supported. */ | |
5831 | gcc_unreachable (); | |
5832 | ||
5833 | return 2; | |
5834 | ||
5835 | case unaligned_store: | |
cc9fe6bb | 5836 | case vector_scatter_store: |
83349046 SB |
5837 | if (TARGET_EFFICIENT_UNALIGNED_VSX) |
5838 | return 1; | |
5839 | ||
5840 | if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN) | |
5841 | { | |
5842 | elements = TYPE_VECTOR_SUBPARTS (vectype); | |
5843 | if (elements == 2) | |
5844 | /* Double word aligned. */ | |
5845 | return 2; | |
5846 | ||
5847 | if (elements == 4) | |
5848 | { | |
5849 | switch (misalign) | |
5850 | { | |
5851 | case 8: | |
5852 | /* Double word aligned. */ | |
5853 | return 2; | |
5854 | ||
5855 | case -1: | |
5856 | /* Unknown misalignment. */ | |
5857 | case 4: | |
5858 | case 12: | |
5859 | /* Word aligned. */ | |
5860 | return 23; | |
5861 | ||
5862 | default: | |
5863 | gcc_unreachable (); | |
5864 | } | |
5865 | } | |
5866 | } | |
5867 | ||
5868 | if (TARGET_ALTIVEC) | |
5869 | /* Misaligned stores are not supported. */ | |
5870 | gcc_unreachable (); | |
5871 | ||
5872 | return 2; | |
5873 | ||
5874 | case vec_construct: | |
5875 | /* This is a rough approximation assuming non-constant elements | |
5876 | constructed into a vector via element insertion. FIXME: | |
5877 | vec_construct is not granular enough for uniformly good | |
5878 | decisions. If the initialization is a splat, this is | |
5879 | cheaper than we estimate. Improve this someday. */ | |
5880 | elem_type = TREE_TYPE (vectype); | |
5881 | /* 32-bit vectors loaded into registers are stored as double | |
5882 | precision, so we need 2 permutes, 2 converts, and 1 merge | |
5883 | to construct a vector of short floats from them. */ | |
5884 | if (SCALAR_FLOAT_TYPE_P (elem_type) | |
5885 | && TYPE_PRECISION (elem_type) == 32) | |
5886 | return 5; | |
5887 | /* On POWER9, integer vector types are built up in GPRs and then | |
5888 | use a direct move (2 cycles). For POWER8 this is even worse, | |
5889 | as we need two direct moves and a merge, and the direct moves | |
5890 | are five cycles. */ | |
5891 | else if (INTEGRAL_TYPE_P (elem_type)) | |
5892 | { | |
5893 | if (TARGET_P9_VECTOR) | |
5894 | return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 2; | |
5895 | else | |
5896 | return TYPE_VECTOR_SUBPARTS (vectype) - 1 + 11; | |
5897 | } | |
5898 | else | |
5899 | /* V2DFmode doesn't need a direct move. */ | |
5900 | return 2; | |
5901 | ||
5902 | default: | |
5903 | gcc_unreachable (); | |
5904 | } | |
5905 | } | |
5906 | ||
5907 | /* Implement targetm.vectorize.preferred_simd_mode. */ | |
5908 | ||
5909 | static machine_mode | |
005ba29c | 5910 | rs6000_preferred_simd_mode (scalar_mode mode) |
83349046 SB |
5911 | { |
5912 | if (TARGET_VSX) | |
5913 | switch (mode) | |
5914 | { | |
4e10a5a7 | 5915 | case E_DFmode: |
83349046 SB |
5916 | return V2DFmode; |
5917 | default:; | |
5918 | } | |
5919 | if (TARGET_ALTIVEC || TARGET_VSX) | |
5920 | switch (mode) | |
5921 | { | |
4e10a5a7 | 5922 | case E_SFmode: |
83349046 | 5923 | return V4SFmode; |
4e10a5a7 | 5924 | case E_TImode: |
83349046 | 5925 | return V1TImode; |
4e10a5a7 | 5926 | case E_DImode: |
83349046 | 5927 | return V2DImode; |
4e10a5a7 | 5928 | case E_SImode: |
83349046 | 5929 | return V4SImode; |
4e10a5a7 | 5930 | case E_HImode: |
83349046 | 5931 | return V8HImode; |
4e10a5a7 | 5932 | case E_QImode: |
83349046 SB |
5933 | return V16QImode; |
5934 | default:; | |
5935 | } | |
5936 | if (TARGET_SPE) | |
5937 | switch (mode) | |
5938 | { | |
4e10a5a7 | 5939 | case E_SFmode: |
83349046 | 5940 | return V2SFmode; |
4e10a5a7 | 5941 | case E_SImode: |
83349046 SB |
5942 | return V2SImode; |
5943 | default:; | |
5944 | } | |
5945 | if (TARGET_PAIRED_FLOAT | |
5946 | && mode == SFmode) | |
5947 | return V2SFmode; | |
5948 | return word_mode; | |
5949 | } | |
5950 | ||
5951 | typedef struct _rs6000_cost_data | |
5952 | { | |
5953 | struct loop *loop_info; | |
5954 | unsigned cost[3]; | |
5955 | } rs6000_cost_data; | |
5956 | ||
5957 | /* Test for likely overcommitment of vector hardware resources. If a | |
5958 | loop iteration is relatively large, and too large a percentage of | |
5959 | instructions in the loop are vectorized, the cost model may not | |
5960 | adequately reflect delays from unavailable vector resources. | |
5961 | Penalize the loop body cost for this case. */ | |
5962 | ||
5963 | static void | |
5964 | rs6000_density_test (rs6000_cost_data *data) | |
5965 | { | |
5966 | const int DENSITY_PCT_THRESHOLD = 85; | |
5967 | const int DENSITY_SIZE_THRESHOLD = 70; | |
5968 | const int DENSITY_PENALTY = 10; | |
5969 | struct loop *loop = data->loop_info; | |
5970 | basic_block *bbs = get_loop_body (loop); | |
5971 | int nbbs = loop->num_nodes; | |
6585ff8f | 5972 | loop_vec_info loop_vinfo = loop_vec_info_for_loop (data->loop_info); |
83349046 SB |
5973 | int vec_cost = data->cost[vect_body], not_vec_cost = 0; |
5974 | int i, density_pct; | |
5975 | ||
5976 | for (i = 0; i < nbbs; i++) | |
5977 | { | |
5978 | basic_block bb = bbs[i]; | |
5979 | gimple_stmt_iterator gsi; | |
5980 | ||
5981 | for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
5982 | { | |
5983 | gimple *stmt = gsi_stmt (gsi); | |
6585ff8f | 5984 | stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt); |
83349046 SB |
5985 | |
5986 | if (!STMT_VINFO_RELEVANT_P (stmt_info) | |
5987 | && !STMT_VINFO_IN_PATTERN_P (stmt_info)) | |
5988 | not_vec_cost++; | |
5989 | } | |
5990 | } | |
5991 | ||
5992 | free (bbs); | |
5993 | density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost); | |
5994 | ||
5995 | if (density_pct > DENSITY_PCT_THRESHOLD | |
5996 | && vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD) | |
5997 | { | |
5998 | data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100; | |
5999 | if (dump_enabled_p ()) | |
6000 | dump_printf_loc (MSG_NOTE, vect_location, | |
6001 | "density %d%%, cost %d exceeds threshold, penalizing " | |
6002 | "loop body cost by %d%%", density_pct, | |
6003 | vec_cost + not_vec_cost, DENSITY_PENALTY); | |
6004 | } | |
6005 | } | |
6006 | ||
6007 | /* Implement targetm.vectorize.init_cost. */ | |
6008 | ||
6009 | /* For each vectorized loop, this var holds TRUE iff a non-memory vector | |
6010 | instruction is needed by the vectorization. */ | |
6011 | static bool rs6000_vect_nonmem; | |
6012 | ||
6013 | static void * | |
6014 | rs6000_init_cost (struct loop *loop_info) | |
6015 | { | |
6016 | rs6000_cost_data *data = XNEW (struct _rs6000_cost_data); | |
6017 | data->loop_info = loop_info; | |
6018 | data->cost[vect_prologue] = 0; | |
6019 | data->cost[vect_body] = 0; | |
6020 | data->cost[vect_epilogue] = 0; | |
6021 | rs6000_vect_nonmem = false; | |
6022 | return data; | |
6023 | } | |
6024 | ||
6025 | /* Implement targetm.vectorize.add_stmt_cost. */ | |
6026 | ||
6027 | static unsigned | |
6028 | rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind, | |
6029 | struct _stmt_vec_info *stmt_info, int misalign, | |
6030 | enum vect_cost_model_location where) | |
6031 | { | |
6032 | rs6000_cost_data *cost_data = (rs6000_cost_data*) data; | |
6033 | unsigned retval = 0; | |
6034 | ||
6035 | if (flag_vect_cost_model) | |
6036 | { | |
6037 | tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE; | |
6038 | int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype, | |
6039 | misalign); | |
6040 | /* Statements in an inner loop relative to the loop being | |
6041 | vectorized are weighted more heavily. The value here is | |
6042 | arbitrary and could potentially be improved with analysis. */ | |
6043 | if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info)) | |
6044 | count *= 50; /* FIXME. */ | |
6045 | ||
6046 | retval = (unsigned) (count * stmt_cost); | |
6047 | cost_data->cost[where] += retval; | |
6048 | ||
6049 | /* Check whether we're doing something other than just a copy loop. | |
6050 | Not all such loops may be profitably vectorized; see | |
6051 | rs6000_finish_cost. */ | |
6052 | if ((kind == vec_to_scalar || kind == vec_perm | |
6053 | || kind == vec_promote_demote || kind == vec_construct | |
6054 | || kind == scalar_to_vec) | |
6055 | || (where == vect_body && kind == vector_stmt)) | |
6056 | rs6000_vect_nonmem = true; | |
6057 | } | |
6058 | ||
6059 | return retval; | |
6060 | } | |
6061 | ||
6062 | /* Implement targetm.vectorize.finish_cost. */ | |
6063 | ||
6064 | static void | |
6065 | rs6000_finish_cost (void *data, unsigned *prologue_cost, | |
6066 | unsigned *body_cost, unsigned *epilogue_cost) | |
6067 | { | |
6068 | rs6000_cost_data *cost_data = (rs6000_cost_data*) data; | |
6069 | ||
6070 | if (cost_data->loop_info) | |
6071 | rs6000_density_test (cost_data); | |
6072 | ||
6073 | /* Don't vectorize minimum-vectorization-factor, simple copy loops | |
6074 | that require versioning for any reason. The vectorization is at | |
6075 | best a wash inside the loop, and the versioning checks make | |
6076 | profitability highly unlikely and potentially quite harmful. */ | |
6077 | if (cost_data->loop_info) | |
6078 | { | |
6079 | loop_vec_info vec_info = loop_vec_info_for_loop (cost_data->loop_info); | |
6080 | if (!rs6000_vect_nonmem | |
6081 | && LOOP_VINFO_VECT_FACTOR (vec_info) == 2 | |
6082 | && LOOP_REQUIRES_VERSIONING (vec_info)) | |
6083 | cost_data->cost[vect_body] += 10000; | |
6084 | } | |
6085 | ||
6086 | *prologue_cost = cost_data->cost[vect_prologue]; | |
6087 | *body_cost = cost_data->cost[vect_body]; | |
6088 | *epilogue_cost = cost_data->cost[vect_epilogue]; | |
6089 | } | |
6090 | ||
6091 | /* Implement targetm.vectorize.destroy_cost_data. */ | |
6092 | ||
6093 | static void | |
6094 | rs6000_destroy_cost_data (void *data) | |
6095 | { | |
6096 | free (data); | |
6097 | } | |
6098 | ||
6099 | /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a | |
6100 | library with vectorized intrinsics. */ | |
6101 | ||
6102 | static tree | |
6103 | rs6000_builtin_vectorized_libmass (combined_fn fn, tree type_out, | |
6104 | tree type_in) | |
6105 | { | |
6106 | char name[32]; | |
6107 | const char *suffix = NULL; | |
6108 | tree fntype, new_fndecl, bdecl = NULL_TREE; | |
6109 | int n_args = 1; | |
6110 | const char *bname; | |
6111 | machine_mode el_mode, in_mode; | |
6112 | int n, in_n; | |
6113 | ||
6114 | /* Libmass is suitable for unsafe math only as it does not correctly support | |
6115 | parts of IEEE with the required precision such as denormals. Only support | |
6116 | it if we have VSX to use the simd d2 or f4 functions. | |
6117 | XXX: Add variable length support. */ | |
6118 | if (!flag_unsafe_math_optimizations || !TARGET_VSX) | |
6119 | return NULL_TREE; | |
6120 | ||
6121 | el_mode = TYPE_MODE (TREE_TYPE (type_out)); | |
6122 | n = TYPE_VECTOR_SUBPARTS (type_out); | |
6123 | in_mode = TYPE_MODE (TREE_TYPE (type_in)); | |
6124 | in_n = TYPE_VECTOR_SUBPARTS (type_in); | |
6125 | if (el_mode != in_mode | |
6126 | || n != in_n) | |
6127 | return NULL_TREE; | |
6128 | ||
6129 | switch (fn) | |
6130 | { | |
6131 | CASE_CFN_ATAN2: | |
6132 | CASE_CFN_HYPOT: | |
6133 | CASE_CFN_POW: | |
6134 | n_args = 2; | |
6135 | gcc_fallthrough (); | |
6136 | ||
6137 | CASE_CFN_ACOS: | |
6138 | CASE_CFN_ACOSH: | |
6139 | CASE_CFN_ASIN: | |
6140 | CASE_CFN_ASINH: | |
6141 | CASE_CFN_ATAN: | |
6142 | CASE_CFN_ATANH: | |
6143 | CASE_CFN_CBRT: | |
6144 | CASE_CFN_COS: | |
6145 | CASE_CFN_COSH: | |
6146 | CASE_CFN_ERF: | |
6147 | CASE_CFN_ERFC: | |
6148 | CASE_CFN_EXP2: | |
6149 | CASE_CFN_EXP: | |
6150 | CASE_CFN_EXPM1: | |
6151 | CASE_CFN_LGAMMA: | |
6152 | CASE_CFN_LOG10: | |
6153 | CASE_CFN_LOG1P: | |
6154 | CASE_CFN_LOG2: | |
6155 | CASE_CFN_LOG: | |
6156 | CASE_CFN_SIN: | |
6157 | CASE_CFN_SINH: | |
6158 | CASE_CFN_SQRT: | |
6159 | CASE_CFN_TAN: | |
6160 | CASE_CFN_TANH: | |
6161 | if (el_mode == DFmode && n == 2) | |
6162 | { | |
6163 | bdecl = mathfn_built_in (double_type_node, fn); | |
6164 | suffix = "d2"; /* pow -> powd2 */ | |
6165 | } | |
6166 | else if (el_mode == SFmode && n == 4) | |
6167 | { | |
6168 | bdecl = mathfn_built_in (float_type_node, fn); | |
6169 | suffix = "4"; /* powf -> powf4 */ | |
6170 | } | |
6171 | else | |
6172 | return NULL_TREE; | |
6173 | if (!bdecl) | |
6174 | return NULL_TREE; | |
6175 | break; | |
6176 | ||
6177 | default: | |
6178 | return NULL_TREE; | |
6179 | } | |
6180 | ||
6181 | gcc_assert (suffix != NULL); | |
6182 | bname = IDENTIFIER_POINTER (DECL_NAME (bdecl)); | |
6183 | if (!bname) | |
6184 | return NULL_TREE; | |
6185 | ||
6186 | strcpy (name, bname + sizeof ("__builtin_") - 1); | |
6187 | strcat (name, suffix); | |
6188 | ||
6189 | if (n_args == 1) | |
6190 | fntype = build_function_type_list (type_out, type_in, NULL); | |
6191 | else if (n_args == 2) | |
6192 | fntype = build_function_type_list (type_out, type_in, type_in, NULL); | |
6193 | else | |
6194 | gcc_unreachable (); | |
6195 | ||
6196 | /* Build a function declaration for the vectorized function. */ | |
6197 | new_fndecl = build_decl (BUILTINS_LOCATION, | |
6198 | FUNCTION_DECL, get_identifier (name), fntype); | |
6199 | TREE_PUBLIC (new_fndecl) = 1; | |
6200 | DECL_EXTERNAL (new_fndecl) = 1; | |
6201 | DECL_IS_NOVOPS (new_fndecl) = 1; | |
6202 | TREE_READONLY (new_fndecl) = 1; | |
6203 | ||
6204 | return new_fndecl; | |
6205 | } | |
6206 | ||
6207 | /* Returns a function decl for a vectorized version of the builtin function | |
6208 | with builtin function code FN and the result vector type TYPE, or NULL_TREE | |
6209 | if it is not available. */ | |
6210 | ||
6211 | static tree | |
6212 | rs6000_builtin_vectorized_function (unsigned int fn, tree type_out, | |
6213 | tree type_in) | |
6214 | { | |
6215 | machine_mode in_mode, out_mode; | |
6216 | int in_n, out_n; | |
6217 | ||
6218 | if (TARGET_DEBUG_BUILTIN) | |
6219 | fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n", | |
6220 | combined_fn_name (combined_fn (fn)), | |
6221 | GET_MODE_NAME (TYPE_MODE (type_out)), | |
6222 | GET_MODE_NAME (TYPE_MODE (type_in))); | |
6223 | ||
6224 | if (TREE_CODE (type_out) != VECTOR_TYPE | |
6225 | || TREE_CODE (type_in) != VECTOR_TYPE | |
6226 | || !TARGET_VECTORIZE_BUILTINS) | |
6227 | return NULL_TREE; | |
6228 | ||
6229 | out_mode = TYPE_MODE (TREE_TYPE (type_out)); | |
6230 | out_n = TYPE_VECTOR_SUBPARTS (type_out); | |
6231 | in_mode = TYPE_MODE (TREE_TYPE (type_in)); | |
6232 | in_n = TYPE_VECTOR_SUBPARTS (type_in); | |
6233 | ||
6234 | switch (fn) | |
6235 | { | |
6236 | CASE_CFN_COPYSIGN: | |
6237 | if (VECTOR_UNIT_VSX_P (V2DFmode) | |
6238 | && out_mode == DFmode && out_n == 2 | |
6239 | && in_mode == DFmode && in_n == 2) | |
6240 | return rs6000_builtin_decls[VSX_BUILTIN_CPSGNDP]; | |
6241 | if (VECTOR_UNIT_VSX_P (V4SFmode) | |
6242 | && out_mode == SFmode && out_n == 4 | |
6243 | && in_mode == SFmode && in_n == 4) | |
6244 | return rs6000_builtin_decls[VSX_BUILTIN_CPSGNSP]; | |
6245 | if (VECTOR_UNIT_ALTIVEC_P (V4SFmode) | |
6246 | && out_mode == SFmode && out_n == 4 | |
6247 | && in_mode == SFmode && in_n == 4) | |
6248 | return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF]; | |
6249 | break; | |
6250 | CASE_CFN_CEIL: | |
6251 | if (VECTOR_UNIT_VSX_P (V2DFmode) | |
6252 | && out_mode == DFmode && out_n == 2 | |
6253 | && in_mode == DFmode && in_n == 2) | |
6254 | return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIP]; | |
6255 | if (VECTOR_UNIT_VSX_P (V4SFmode) | |
6256 | && out_mode == SFmode && out_n == 4 | |
6257 | && in_mode == SFmode && in_n == 4) | |
6258 | return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIP]; | |
6259 | if (VECTOR_UNIT_ALTIVEC_P (V4SFmode) | |
6260 | && out_mode == SFmode && out_n == 4 | |
6261 | && in_mode == SFmode && in_n == 4) | |
6262 | return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIP]; | |
6263 | break; | |
6264 | CASE_CFN_FLOOR: | |
6265 | if (VECTOR_UNIT_VSX_P (V2DFmode) | |
6266 | && out_mode == DFmode && out_n == 2 | |
6267 | && in_mode == DFmode && in_n == 2) | |
6268 | return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIM]; | |
6269 | if (VECTOR_UNIT_VSX_P (V4SFmode) | |
6270 | && out_mode == SFmode && out_n == 4 | |
6271 | && in_mode == SFmode && in_n == 4) | |
6272 | return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIM]; | |
6273 | if (VECTOR_UNIT_ALTIVEC_P (V4SFmode) | |
6274 | && out_mode == SFmode && out_n == 4 | |
6275 | && in_mode == SFmode && in_n == 4) | |
6276 | return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM]; | |
6277 | break; | |
6278 | CASE_CFN_FMA: | |
6279 | if (VECTOR_UNIT_VSX_P (V2DFmode) | |
6280 | && out_mode == DFmode && out_n == 2 | |
6281 | && in_mode == DFmode && in_n == 2) | |
6282 | return rs6000_builtin_decls[VSX_BUILTIN_XVMADDDP]; | |
6283 | if (VECTOR_UNIT_VSX_P (V4SFmode) | |
6284 | && out_mode == SFmode && out_n == 4 | |
6285 | && in_mode == SFmode && in_n == 4) | |
6286 | return rs6000_builtin_decls[VSX_BUILTIN_XVMADDSP]; | |
6287 | if (VECTOR_UNIT_ALTIVEC_P (V4SFmode) | |
6288 | && out_mode == SFmode && out_n == 4 | |
6289 | && in_mode == SFmode && in_n == 4) | |
6290 | return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMADDFP]; | |
6291 | break; | |
6292 | CASE_CFN_TRUNC: | |
6293 | if (VECTOR_UNIT_VSX_P (V2DFmode) | |
6294 | && out_mode == DFmode && out_n == 2 | |
6295 | && in_mode == DFmode && in_n == 2) | |
6296 | return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIZ]; | |
6297 | if (VECTOR_UNIT_VSX_P (V4SFmode) | |
6298 | && out_mode == SFmode && out_n == 4 | |
6299 | && in_mode == SFmode && in_n == 4) | |
6300 | return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIZ]; | |
6301 | if (VECTOR_UNIT_ALTIVEC_P (V4SFmode) | |
6302 | && out_mode == SFmode && out_n == 4 | |
6303 | && in_mode == SFmode && in_n == 4) | |
6304 | return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIZ]; | |
6305 | break; | |
6306 | CASE_CFN_NEARBYINT: | |
6307 | if (VECTOR_UNIT_VSX_P (V2DFmode) | |
6308 | && flag_unsafe_math_optimizations | |
6309 | && out_mode == DFmode && out_n == 2 | |
6310 | && in_mode == DFmode && in_n == 2) | |
6311 | return rs6000_builtin_decls[VSX_BUILTIN_XVRDPI]; | |
6312 | if (VECTOR_UNIT_VSX_P (V4SFmode) | |
6313 | && flag_unsafe_math_optimizations | |
6314 | && out_mode == SFmode && out_n == 4 | |
6315 | && in_mode == SFmode && in_n == 4) | |
6316 | return rs6000_builtin_decls[VSX_BUILTIN_XVRSPI]; | |
6317 | break; | |
6318 | CASE_CFN_RINT: | |
6319 | if (VECTOR_UNIT_VSX_P (V2DFmode) | |
6320 | && !flag_trapping_math | |
6321 | && out_mode == DFmode && out_n == 2 | |
6322 | && in_mode == DFmode && in_n == 2) | |
6323 | return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIC]; | |
6324 | if (VECTOR_UNIT_VSX_P (V4SFmode) | |
6325 | && !flag_trapping_math | |
6326 | && out_mode == SFmode && out_n == 4 | |
6327 | && in_mode == SFmode && in_n == 4) | |
6328 | return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIC]; | |
6329 | break; | |
6330 | default: | |
6331 | break; | |
6332 | } | |
6333 | ||
6334 | /* Generate calls to libmass if appropriate. */ | |
6335 | if (rs6000_veclib_handler) | |
6336 | return rs6000_veclib_handler (combined_fn (fn), type_out, type_in); | |
6337 | ||
6338 | return NULL_TREE; | |
6339 | } | |
6340 | ||
6341 | /* Implement TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION. */ | |
6342 | ||
6343 | static tree | |
6344 | rs6000_builtin_md_vectorized_function (tree fndecl, tree type_out, | |
6345 | tree type_in) | |
6346 | { | |
6347 | machine_mode in_mode, out_mode; | |
6348 | int in_n, out_n; | |
6349 | ||
6350 | if (TARGET_DEBUG_BUILTIN) | |
6351 | fprintf (stderr, "rs6000_builtin_md_vectorized_function (%s, %s, %s)\n", | |
6352 | IDENTIFIER_POINTER (DECL_NAME (fndecl)), | |
6353 | GET_MODE_NAME (TYPE_MODE (type_out)), | |
6354 | GET_MODE_NAME (TYPE_MODE (type_in))); | |
6355 | ||
6356 | if (TREE_CODE (type_out) != VECTOR_TYPE | |
6357 | || TREE_CODE (type_in) != VECTOR_TYPE | |
6358 | || !TARGET_VECTORIZE_BUILTINS) | |
6359 | return NULL_TREE; | |
6360 | ||
6361 | out_mode = TYPE_MODE (TREE_TYPE (type_out)); | |
6362 | out_n = TYPE_VECTOR_SUBPARTS (type_out); | |
6363 | in_mode = TYPE_MODE (TREE_TYPE (type_in)); | |
6364 | in_n = TYPE_VECTOR_SUBPARTS (type_in); | |
6365 | ||
6366 | enum rs6000_builtins fn | |
6367 | = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl); | |
6368 | switch (fn) | |
6369 | { | |
6370 | case RS6000_BUILTIN_RSQRTF: | |
6371 | if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode) | |
6372 | && out_mode == SFmode && out_n == 4 | |
6373 | && in_mode == SFmode && in_n == 4) | |
6374 | return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRSQRTFP]; | |
6375 | break; | |
6376 | case RS6000_BUILTIN_RSQRT: | |
6377 | if (VECTOR_UNIT_VSX_P (V2DFmode) | |
6378 | && out_mode == DFmode && out_n == 2 | |
6379 | && in_mode == DFmode && in_n == 2) | |
6380 | return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF]; | |
6381 | break; | |
6382 | case RS6000_BUILTIN_RECIPF: | |
6383 | if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode) | |
6384 | && out_mode == SFmode && out_n == 4 | |
6385 | && in_mode == SFmode && in_n == 4) | |
6386 | return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRECIPFP]; | |
6387 | break; | |
6388 | case RS6000_BUILTIN_RECIP: | |
6389 | if (VECTOR_UNIT_VSX_P (V2DFmode) | |
6390 | && out_mode == DFmode && out_n == 2 | |
6391 | && in_mode == DFmode && in_n == 2) | |
6392 | return rs6000_builtin_decls[VSX_BUILTIN_RECIP_V2DF]; | |
6393 | break; | |
6394 | default: | |
6395 | break; | |
6396 | } | |
6397 | return NULL_TREE; | |
6398 | } | |
6399 | \f | |
6400 | /* Default CPU string for rs6000*_file_start functions. */ | |
6401 | static const char *rs6000_default_cpu; | |
6402 | ||
6403 | /* Do anything needed at the start of the asm file. */ | |
6404 | ||
6405 | static void | |
6406 | rs6000_file_start (void) | |
6407 | { | |
6408 | char buffer[80]; | |
6409 | const char *start = buffer; | |
6410 | FILE *file = asm_out_file; | |
6411 | ||
6412 | rs6000_default_cpu = TARGET_CPU_DEFAULT; | |
6413 | ||
6414 | default_file_start (); | |
6415 | ||
6416 | if (flag_verbose_asm) | |
6417 | { | |
6418 | sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START); | |
6419 | ||
6420 | if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0') | |
6421 | { | |
6422 | fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu); | |
6423 | start = ""; | |
6424 | } | |
6425 | ||
6426 | if (global_options_set.x_rs6000_cpu_index) | |
6427 | { | |
6428 | fprintf (file, "%s -mcpu=%s", start, | |
6429 | processor_target_table[rs6000_cpu_index].name); | |
6430 | start = ""; | |
6431 | } | |
6432 | ||
6433 | if (global_options_set.x_rs6000_tune_index) | |
6434 | { | |
6435 | fprintf (file, "%s -mtune=%s", start, | |
6436 | processor_target_table[rs6000_tune_index].name); | |
6437 | start = ""; | |
6438 | } | |
6439 | ||
6440 | if (PPC405_ERRATUM77) | |
6441 | { | |
6442 | fprintf (file, "%s PPC405CR_ERRATUM77", start); | |
6443 | start = ""; | |
6444 | } | |
6445 | ||
6446 | #ifdef USING_ELFOS_H | |
6447 | switch (rs6000_sdata) | |
6448 | { | |
6449 | case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break; | |
6450 | case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break; | |
6451 | case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break; | |
6452 | case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break; | |
6453 | } | |
6454 | ||
6455 | if (rs6000_sdata && g_switch_value) | |
6456 | { | |
6457 | fprintf (file, "%s -G %d", start, | |
6458 | g_switch_value); | |
6459 | start = ""; | |
6460 | } | |
6461 | #endif | |
6462 | ||
6463 | if (*start == '\0') | |
6464 | putc ('\n', file); | |
6465 | } | |
6466 | ||
6467 | #ifdef USING_ELFOS_H | |
6468 | if (!(rs6000_default_cpu && rs6000_default_cpu[0]) | |
6469 | && !global_options_set.x_rs6000_cpu_index) | |
6470 | { | |
6471 | fputs ("\t.machine ", asm_out_file); | |
6472 | if ((rs6000_isa_flags & OPTION_MASK_MODULO) != 0) | |
6473 | fputs ("power9\n", asm_out_file); | |
6474 | else if ((rs6000_isa_flags & OPTION_MASK_DIRECT_MOVE) != 0) | |
6475 | fputs ("power8\n", asm_out_file); | |
6476 | else if ((rs6000_isa_flags & OPTION_MASK_POPCNTD) != 0) | |
6477 | fputs ("power7\n", asm_out_file); | |
6478 | else if ((rs6000_isa_flags & OPTION_MASK_CMPB) != 0) | |
6479 | fputs ("power6\n", asm_out_file); | |
6480 | else if ((rs6000_isa_flags & OPTION_MASK_POPCNTB) != 0) | |
6481 | fputs ("power5\n", asm_out_file); | |
6482 | else if ((rs6000_isa_flags & OPTION_MASK_MFCRF) != 0) | |
6483 | fputs ("power4\n", asm_out_file); | |
6484 | else if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) != 0) | |
6485 | fputs ("ppc64\n", asm_out_file); | |
6486 | else | |
6487 | fputs ("ppc\n", asm_out_file); | |
6488 | } | |
6489 | #endif | |
6490 | ||
6491 | if (DEFAULT_ABI == ABI_ELFv2) | |
6492 | fprintf (file, "\t.abiversion 2\n"); | |
6493 | } | |
6494 | ||
6495 | \f | |
6496 | /* Return nonzero if this function is known to have a null epilogue. */ | |
6497 | ||
6498 | int | |
6499 | direct_return (void) | |
6500 | { | |
6501 | if (reload_completed) | |
6502 | { | |
6503 | rs6000_stack_t *info = rs6000_stack_info (); | |
6504 | ||
6505 | if (info->first_gp_reg_save == 32 | |
6506 | && info->first_fp_reg_save == 64 | |
6507 | && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1 | |
6508 | && ! info->lr_save_p | |
6509 | && ! info->cr_save_p | |
6510 | && info->vrsave_size == 0 | |
6511 | && ! info->push_p) | |
6512 | return 1; | |
6513 | } | |
6514 | ||
6515 | return 0; | |
6516 | } | |
6517 | ||
6518 | /* Return the number of instructions it takes to form a constant in an | |
6519 | integer register. */ | |
6520 | ||
6521 | int | |
6522 | num_insns_constant_wide (HOST_WIDE_INT value) | |
6523 | { | |
6524 | /* signed constant loadable with addi */ | |
6525 | if (((unsigned HOST_WIDE_INT) value + 0x8000) < 0x10000) | |
6526 | return 1; | |
6527 | ||
6528 | /* constant loadable with addis */ | |
6529 | else if ((value & 0xffff) == 0 | |
6530 | && (value >> 31 == -1 || value >> 31 == 0)) | |
6531 | return 1; | |
6532 | ||
6533 | else if (TARGET_POWERPC64) | |
6534 | { | |
6535 | HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000; | |
6536 | HOST_WIDE_INT high = value >> 31; | |
6537 | ||
6538 | if (high == 0 || high == -1) | |
6539 | return 2; | |
6540 | ||
6541 | high >>= 1; | |
6542 | ||
6543 | if (low == 0) | |
6544 | return num_insns_constant_wide (high) + 1; | |
6545 | else if (high == 0) | |
6546 | return num_insns_constant_wide (low) + 1; | |
6547 | else | |
6548 | return (num_insns_constant_wide (high) | |
6549 | + num_insns_constant_wide (low) + 1); | |
6550 | } | |
6551 | ||
6552 | else | |
6553 | return 2; | |
6554 | } | |
6555 | ||
6556 | int | |
6557 | num_insns_constant (rtx op, machine_mode mode) | |
6558 | { | |
6559 | HOST_WIDE_INT low, high; | |
6560 | ||
6561 | switch (GET_CODE (op)) | |
6562 | { | |
6563 | case CONST_INT: | |
6564 | if ((INTVAL (op) >> 31) != 0 && (INTVAL (op) >> 31) != -1 | |
6565 | && rs6000_is_valid_and_mask (op, mode)) | |
6566 | return 2; | |
6567 | else | |
6568 | return num_insns_constant_wide (INTVAL (op)); | |
6569 | ||
6570 | case CONST_WIDE_INT: | |
6571 | { | |
6572 | int i; | |
6573 | int ins = CONST_WIDE_INT_NUNITS (op) - 1; | |
6574 | for (i = 0; i < CONST_WIDE_INT_NUNITS (op); i++) | |
6575 | ins += num_insns_constant_wide (CONST_WIDE_INT_ELT (op, i)); | |
6576 | return ins; | |
6577 | } | |
6578 | ||
6579 | case CONST_DOUBLE: | |
6580 | if (mode == SFmode || mode == SDmode) | |
6581 | { | |
6582 | long l; | |
6583 | ||
6584 | if (DECIMAL_FLOAT_MODE_P (mode)) | |
6585 | REAL_VALUE_TO_TARGET_DECIMAL32 | |
6586 | (*CONST_DOUBLE_REAL_VALUE (op), l); | |
6587 | else | |
6588 | REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), l); | |
6589 | return num_insns_constant_wide ((HOST_WIDE_INT) l); | |
6590 | } | |
6591 | ||
6592 | long l[2]; | |
6593 | if (DECIMAL_FLOAT_MODE_P (mode)) | |
6594 | REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (op), l); | |
6595 | else | |
6596 | REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (op), l); | |
6597 | high = l[WORDS_BIG_ENDIAN == 0]; | |
6598 | low = l[WORDS_BIG_ENDIAN != 0]; | |
6599 | ||
6600 | if (TARGET_32BIT) | |
6601 | return (num_insns_constant_wide (low) | |
6602 | + num_insns_constant_wide (high)); | |
6603 | else | |
6604 | { | |
6605 | if ((high == 0 && low >= 0) | |
6606 | || (high == -1 && low < 0)) | |
6607 | return num_insns_constant_wide (low); | |
6608 | ||
6609 | else if (rs6000_is_valid_and_mask (op, mode)) | |
6610 | return 2; | |
6611 | ||
6612 | else if (low == 0) | |
6613 | return num_insns_constant_wide (high) + 1; | |
6614 | ||
6615 | else | |
6616 | return (num_insns_constant_wide (high) | |
6617 | + num_insns_constant_wide (low) + 1); | |
6618 | } | |
6619 | ||
6620 | default: | |
6621 | gcc_unreachable (); | |
6622 | } | |
6623 | } | |
6624 | ||
6625 | /* Interpret element ELT of the CONST_VECTOR OP as an integer value. | |
6626 | If the mode of OP is MODE_VECTOR_INT, this simply returns the | |
6627 | corresponding element of the vector, but for V4SFmode and V2SFmode, | |
6628 | the corresponding "float" is interpreted as an SImode integer. */ | |
6629 | ||
6630 | HOST_WIDE_INT | |
6631 | const_vector_elt_as_int (rtx op, unsigned int elt) | |
6632 | { | |
6633 | rtx tmp; | |
6634 | ||
6635 | /* We can't handle V2DImode and V2DFmode vector constants here yet. */ | |
6636 | gcc_assert (GET_MODE (op) != V2DImode | |
6637 | && GET_MODE (op) != V2DFmode); | |
6638 | ||
6639 | tmp = CONST_VECTOR_ELT (op, elt); | |
6640 | if (GET_MODE (op) == V4SFmode | |
6641 | || GET_MODE (op) == V2SFmode) | |
6642 | tmp = gen_lowpart (SImode, tmp); | |
6643 | return INTVAL (tmp); | |
6644 | } | |
6645 | ||
6646 | /* Return true if OP can be synthesized with a particular vspltisb, vspltish | |
6647 | or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used | |
6648 | depends on STEP and COPIES, one of which will be 1. If COPIES > 1, | |
6649 | all items are set to the same value and contain COPIES replicas of the | |
6650 | vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's | |
6651 | operand and the others are set to the value of the operand's msb. */ | |
6652 | ||
6653 | static bool | |
6654 | vspltis_constant (rtx op, unsigned step, unsigned copies) | |
6655 | { | |
6656 | machine_mode mode = GET_MODE (op); | |
6657 | machine_mode inner = GET_MODE_INNER (mode); | |
6658 | ||
6659 | unsigned i; | |
6660 | unsigned nunits; | |
6661 | unsigned bitsize; | |
6662 | unsigned mask; | |
6663 | ||
6664 | HOST_WIDE_INT val; | |
6665 | HOST_WIDE_INT splat_val; | |
6666 | HOST_WIDE_INT msb_val; | |
6667 | ||
6668 | if (mode == V2DImode || mode == V2DFmode || mode == V1TImode) | |
6669 | return false; | |
6670 | ||
6671 | nunits = GET_MODE_NUNITS (mode); | |
6672 | bitsize = GET_MODE_BITSIZE (inner); | |
6673 | mask = GET_MODE_MASK (inner); | |
6674 | ||
6675 | val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0); | |
6676 | splat_val = val; | |
6677 | msb_val = val >= 0 ? 0 : -1; | |
6678 | ||
6679 | /* Construct the value to be splatted, if possible. If not, return 0. */ | |
6680 | for (i = 2; i <= copies; i *= 2) | |
6681 | { | |
6682 | HOST_WIDE_INT small_val; | |
6683 | bitsize /= 2; | |
6684 | small_val = splat_val >> bitsize; | |
6685 | mask >>= bitsize; | |
6686 | if (splat_val != ((HOST_WIDE_INT) | |
6687 | ((unsigned HOST_WIDE_INT) small_val << bitsize) | |
6688 | | (small_val & mask))) | |
6689 | return false; | |
6690 | splat_val = small_val; | |
6691 | } | |
6692 | ||
6693 | /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */ | |
6694 | if (EASY_VECTOR_15 (splat_val)) | |
6695 | ; | |
6696 | ||
6697 | /* Also check if we can splat, and then add the result to itself. Do so if | |
6698 | the value is positive, of if the splat instruction is using OP's mode; | |
6699 | for splat_val < 0, the splat and the add should use the same mode. */ | |
6700 | else if (EASY_VECTOR_15_ADD_SELF (splat_val) | |
6701 | && (splat_val >= 0 || (step == 1 && copies == 1))) | |
6702 | ; | |
6703 | ||
6704 | /* Also check if are loading up the most significant bit which can be done by | |
6705 | loading up -1 and shifting the value left by -1. */ | |
6706 | else if (EASY_VECTOR_MSB (splat_val, inner)) | |
6707 | ; | |
6708 | ||
6709 | else | |
6710 | return false; | |
6711 | ||
6712 | /* Check if VAL is present in every STEP-th element, and the | |
6713 | other elements are filled with its most significant bit. */ | |
6714 | for (i = 1; i < nunits; ++i) | |
6715 | { | |
6716 | HOST_WIDE_INT desired_val; | |
6717 | unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i; | |
6718 | if ((i & (step - 1)) == 0) | |
6719 | desired_val = val; | |
6720 | else | |
6721 | desired_val = msb_val; | |
6722 | ||
6723 | if (desired_val != const_vector_elt_as_int (op, elt)) | |
6724 | return false; | |
6725 | } | |
6726 | ||
6727 | return true; | |
6728 | } | |
6729 | ||
6730 | /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI | |
6731 | instruction, filling in the bottom elements with 0 or -1. | |
6732 | ||
6733 | Return 0 if the constant cannot be generated with VSLDOI. Return positive | |
6734 | for the number of zeroes to shift in, or negative for the number of 0xff | |
6735 | bytes to shift in. | |
6736 | ||
6737 | OP is a CONST_VECTOR. */ | |
6738 | ||
6739 | int | |
6740 | vspltis_shifted (rtx op) | |
6741 | { | |
6742 | machine_mode mode = GET_MODE (op); | |
6743 | machine_mode inner = GET_MODE_INNER (mode); | |
6744 | ||
6745 | unsigned i, j; | |
6746 | unsigned nunits; | |
6747 | unsigned mask; | |
6748 | ||
6749 | HOST_WIDE_INT val; | |
6750 | ||
6751 | if (mode != V16QImode && mode != V8HImode && mode != V4SImode) | |
6752 | return false; | |
6753 | ||
6754 | /* We need to create pseudo registers to do the shift, so don't recognize | |
6755 | shift vector constants after reload. */ | |
6756 | if (!can_create_pseudo_p ()) | |
6757 | return false; | |
6758 | ||
6759 | nunits = GET_MODE_NUNITS (mode); | |
6760 | mask = GET_MODE_MASK (inner); | |
6761 | ||
6762 | val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? 0 : nunits - 1); | |
6763 | ||
6764 | /* Check if the value can really be the operand of a vspltis[bhw]. */ | |
6765 | if (EASY_VECTOR_15 (val)) | |
6766 | ; | |
6767 | ||
6768 | /* Also check if we are loading up the most significant bit which can be done | |
6769 | by loading up -1 and shifting the value left by -1. */ | |
6770 | else if (EASY_VECTOR_MSB (val, inner)) | |
6771 | ; | |
6772 | ||
6773 | else | |
6774 | return 0; | |
6775 | ||
6776 | /* Check if VAL is present in every STEP-th element until we find elements | |
6777 | that are 0 or all 1 bits. */ | |
6778 | for (i = 1; i < nunits; ++i) | |
6779 | { | |
6780 | unsigned elt = BYTES_BIG_ENDIAN ? i : nunits - 1 - i; | |
6781 | HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt); | |
6782 | ||
6783 | /* If the value isn't the splat value, check for the remaining elements | |
6784 | being 0/-1. */ | |
6785 | if (val != elt_val) | |
6786 | { | |
6787 | if (elt_val == 0) | |
6788 | { | |
6789 | for (j = i+1; j < nunits; ++j) | |
6790 | { | |
6791 | unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j; | |
6792 | if (const_vector_elt_as_int (op, elt2) != 0) | |
6793 | return 0; | |
6794 | } | |
6795 | ||
6796 | return (nunits - i) * GET_MODE_SIZE (inner); | |
6797 | } | |
6798 | ||
6799 | else if ((elt_val & mask) == mask) | |
6800 | { | |
6801 | for (j = i+1; j < nunits; ++j) | |
6802 | { | |
6803 | unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j; | |
6804 | if ((const_vector_elt_as_int (op, elt2) & mask) != mask) | |
6805 | return 0; | |
6806 | } | |
6807 | ||
6808 | return -((nunits - i) * GET_MODE_SIZE (inner)); | |
6809 | } | |
6810 | ||
6811 | else | |
6812 | return 0; | |
6813 | } | |
6814 | } | |
6815 | ||
6816 | /* If all elements are equal, we don't need to do VLSDOI. */ | |
6817 | return 0; | |
6818 | } | |
6819 | ||
6820 | ||
6821 | /* Return true if OP is of the given MODE and can be synthesized | |
6822 | with a vspltisb, vspltish or vspltisw. */ | |
6823 | ||
6824 | bool | |
6825 | easy_altivec_constant (rtx op, machine_mode mode) | |
6826 | { | |
6827 | unsigned step, copies; | |
6828 | ||
6829 | if (mode == VOIDmode) | |
6830 | mode = GET_MODE (op); | |
6831 | else if (mode != GET_MODE (op)) | |
6832 | return false; | |
6833 | ||
6834 | /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy | |
6835 | constants. */ | |
6836 | if (mode == V2DFmode) | |
6837 | return zero_constant (op, mode); | |
6838 | ||
6839 | else if (mode == V2DImode) | |
6840 | { | |
6841 | if (GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT | |
6842 | || GET_CODE (CONST_VECTOR_ELT (op, 1)) != CONST_INT) | |
6843 | return false; | |
6844 | ||
6845 | if (zero_constant (op, mode)) | |
6846 | return true; | |
6847 | ||
6848 | if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1 | |
6849 | && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1) | |
6850 | return true; | |
6851 | ||
6852 | return false; | |
6853 | } | |
6854 | ||
6855 | /* V1TImode is a special container for TImode. Ignore for now. */ | |
6856 | else if (mode == V1TImode) | |
6857 | return false; | |
6858 | ||
6859 | /* Start with a vspltisw. */ | |
6860 | step = GET_MODE_NUNITS (mode) / 4; | |
6861 | copies = 1; | |
6862 | ||
6863 | if (vspltis_constant (op, step, copies)) | |
6864 | return true; | |
6865 | ||
6866 | /* Then try with a vspltish. */ | |
6867 | if (step == 1) | |
6868 | copies <<= 1; | |
6869 | else | |
6870 | step >>= 1; | |
6871 | ||
6872 | if (vspltis_constant (op, step, copies)) | |
6873 | return true; | |
6874 | ||
6875 | /* And finally a vspltisb. */ | |
6876 | if (step == 1) | |
6877 | copies <<= 1; | |
6878 | else | |
6879 | step >>= 1; | |
6880 | ||
6881 | if (vspltis_constant (op, step, copies)) | |
6882 | return true; | |
6883 | ||
6884 | if (vspltis_shifted (op) != 0) | |
6885 | return true; | |
6886 | ||
6887 | return false; | |
6888 | } | |
6889 | ||
6890 | /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose | |
6891 | result is OP. Abort if it is not possible. */ | |
6892 | ||
6893 | rtx | |
6894 | gen_easy_altivec_constant (rtx op) | |
6895 | { | |
6896 | machine_mode mode = GET_MODE (op); | |
6897 | int nunits = GET_MODE_NUNITS (mode); | |
6898 | rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0); | |
6899 | unsigned step = nunits / 4; | |
6900 | unsigned copies = 1; | |
6901 | ||
6902 | /* Start with a vspltisw. */ | |
6903 | if (vspltis_constant (op, step, copies)) | |
6904 | return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val)); | |
6905 | ||
6906 | /* Then try with a vspltish. */ | |
6907 | if (step == 1) | |
6908 | copies <<= 1; | |
6909 | else | |
6910 | step >>= 1; | |
6911 | ||
6912 | if (vspltis_constant (op, step, copies)) | |
6913 | return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val)); | |
6914 | ||
6915 | /* And finally a vspltisb. */ | |
6916 | if (step == 1) | |
6917 | copies <<= 1; | |
6918 | else | |
6919 | step >>= 1; | |
6920 | ||
6921 | if (vspltis_constant (op, step, copies)) | |
6922 | return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val)); | |
6923 | ||
6924 | gcc_unreachable (); | |
6925 | } | |
6926 | ||
6927 | /* Return true if OP is of the given MODE and can be synthesized with ISA 3.0 | |
6928 | instructions (xxspltib, vupkhsb/vextsb2w/vextb2d). | |
6929 | ||
6930 | Return the number of instructions needed (1 or 2) into the address pointed | |
6931 | via NUM_INSNS_PTR. | |
6932 | ||
6933 | Return the constant that is being split via CONSTANT_PTR. */ | |
6934 | ||
6935 | bool | |
6936 | xxspltib_constant_p (rtx op, | |
6937 | machine_mode mode, | |
6938 | int *num_insns_ptr, | |
6939 | int *constant_ptr) | |
6940 | { | |
6941 | size_t nunits = GET_MODE_NUNITS (mode); | |
6942 | size_t i; | |
6943 | HOST_WIDE_INT value; | |
6944 | rtx element; | |
6945 | ||
6946 | /* Set the returned values to out of bound values. */ | |
6947 | *num_insns_ptr = -1; | |
6948 | *constant_ptr = 256; | |
6949 | ||
6950 | if (!TARGET_P9_VECTOR) | |
6951 | return false; | |
6952 | ||
6953 | if (mode == VOIDmode) | |
6954 | mode = GET_MODE (op); | |
6955 | ||
6956 | else if (mode != GET_MODE (op) && GET_MODE (op) != VOIDmode) | |
6957 | return false; | |
6958 | ||
6959 | /* Handle (vec_duplicate <constant>). */ | |
6960 | if (GET_CODE (op) == VEC_DUPLICATE) | |
6961 | { | |
6962 | if (mode != V16QImode && mode != V8HImode && mode != V4SImode | |
6963 | && mode != V2DImode) | |
6964 | return false; | |
6965 | ||
6966 | element = XEXP (op, 0); | |
6967 | if (!CONST_INT_P (element)) | |
6968 | return false; | |
6969 | ||
6970 | value = INTVAL (element); | |
6971 | if (!IN_RANGE (value, -128, 127)) | |
6972 | return false; | |
6973 | } | |
6974 | ||
6975 | /* Handle (const_vector [...]). */ | |
6976 | else if (GET_CODE (op) == CONST_VECTOR) | |
6977 | { | |
6978 | if (mode != V16QImode && mode != V8HImode && mode != V4SImode | |
6979 | && mode != V2DImode) | |
6980 | return false; | |
6981 | ||
6982 | element = CONST_VECTOR_ELT (op, 0); | |
6983 | if (!CONST_INT_P (element)) | |
6984 | return false; | |
6985 | ||
6986 | value = INTVAL (element); | |
6987 | if (!IN_RANGE (value, -128, 127)) | |
6988 | return false; | |
6989 | ||
6990 | for (i = 1; i < nunits; i++) | |
6991 | { | |
6992 | element = CONST_VECTOR_ELT (op, i); | |
6993 | if (!CONST_INT_P (element)) | |
6994 | return false; | |
6995 | ||
6996 | if (value != INTVAL (element)) | |
6997 | return false; | |
6998 | } | |
6999 | } | |
7000 | ||
7001 | /* Handle integer constants being loaded into the upper part of the VSX | |
7002 | register as a scalar. If the value isn't 0/-1, only allow it if the mode | |
7003 | can go in Altivec registers. Prefer VSPLTISW/VUPKHSW over XXSPLITIB. */ | |
7004 | else if (CONST_INT_P (op)) | |
7005 | { | |
7006 | if (!SCALAR_INT_MODE_P (mode)) | |
7007 | return false; | |
7008 | ||
7009 | value = INTVAL (op); | |
7010 | if (!IN_RANGE (value, -128, 127)) | |
7011 | return false; | |
7012 | ||
7013 | if (!IN_RANGE (value, -1, 0)) | |
7014 | { | |
7015 | if (!(reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID)) | |
7016 | return false; | |
7017 | ||
7018 | if (EASY_VECTOR_15 (value)) | |
7019 | return false; | |
7020 | } | |
7021 | } | |
7022 | ||
7023 | else | |
7024 | return false; | |
7025 | ||
7026 | /* See if we could generate vspltisw/vspltish directly instead of xxspltib + | |
7027 | sign extend. Special case 0/-1 to allow getting any VSX register instead | |
7028 | of an Altivec register. */ | |
7029 | if ((mode == V4SImode || mode == V8HImode) && !IN_RANGE (value, -1, 0) | |
7030 | && EASY_VECTOR_15 (value)) | |
7031 | return false; | |
7032 | ||
7033 | /* Return # of instructions and the constant byte for XXSPLTIB. */ | |
7034 | if (mode == V16QImode) | |
7035 | *num_insns_ptr = 1; | |
7036 | ||
7037 | else if (IN_RANGE (value, -1, 0)) | |
7038 | *num_insns_ptr = 1; | |
7039 | ||
7040 | else | |
7041 | *num_insns_ptr = 2; | |
7042 | ||
7043 | *constant_ptr = (int) value; | |
7044 | return true; | |
7045 | } | |
7046 | ||
7047 | const char * | |
7048 | output_vec_const_move (rtx *operands) | |
7049 | { | |
7050 | int cst, cst2, shift; | |
7051 | machine_mode mode; | |
7052 | rtx dest, vec; | |
7053 | ||
7054 | dest = operands[0]; | |
7055 | vec = operands[1]; | |
7056 | mode = GET_MODE (dest); | |
7057 | ||
7058 | if (TARGET_VSX) | |
7059 | { | |
7060 | bool dest_vmx_p = ALTIVEC_REGNO_P (REGNO (dest)); | |
7061 | int xxspltib_value = 256; | |
7062 | int num_insns = -1; | |
7063 | ||
7064 | if (zero_constant (vec, mode)) | |
7065 | { | |
7066 | if (TARGET_P9_VECTOR) | |
7067 | return "xxspltib %x0,0"; | |
7068 | ||
7069 | else if (dest_vmx_p) | |
7070 | return "vspltisw %0,0"; | |
7071 | ||
7072 | else | |
7073 | return "xxlxor %x0,%x0,%x0"; | |
7074 | } | |
7075 | ||
7076 | if (all_ones_constant (vec, mode)) | |
7077 | { | |
7078 | if (TARGET_P9_VECTOR) | |
7079 | return "xxspltib %x0,255"; | |
7080 | ||
7081 | else if (dest_vmx_p) | |
7082 | return "vspltisw %0,-1"; | |
7083 | ||
7084 | else if (TARGET_P8_VECTOR) | |
7085 | return "xxlorc %x0,%x0,%x0"; | |
7086 | ||
7087 | else | |
7088 | gcc_unreachable (); | |
7089 | } | |
7090 | ||
7091 | if (TARGET_P9_VECTOR | |
7092 | && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value)) | |
7093 | { | |
7094 | if (num_insns == 1) | |
7095 | { | |
7096 | operands[2] = GEN_INT (xxspltib_value & 0xff); | |
7097 | return "xxspltib %x0,%2"; | |
7098 | } | |
7099 | ||
7100 | return "#"; | |
7101 | } | |
7102 | } | |
7103 | ||
7104 | if (TARGET_ALTIVEC) | |
7105 | { | |
7106 | rtx splat_vec; | |
7107 | ||
7108 | gcc_assert (ALTIVEC_REGNO_P (REGNO (dest))); | |
7109 | if (zero_constant (vec, mode)) | |
7110 | return "vspltisw %0,0"; | |
7111 | ||
7112 | if (all_ones_constant (vec, mode)) | |
7113 | return "vspltisw %0,-1"; | |
7114 | ||
7115 | /* Do we need to construct a value using VSLDOI? */ | |
7116 | shift = vspltis_shifted (vec); | |
7117 | if (shift != 0) | |
7118 | return "#"; | |
7119 | ||
7120 | splat_vec = gen_easy_altivec_constant (vec); | |
7121 | gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE); | |
7122 | operands[1] = XEXP (splat_vec, 0); | |
7123 | if (!EASY_VECTOR_15 (INTVAL (operands[1]))) | |
7124 | return "#"; | |
7125 | ||
7126 | switch (GET_MODE (splat_vec)) | |
7127 | { | |
4e10a5a7 | 7128 | case E_V4SImode: |
83349046 SB |
7129 | return "vspltisw %0,%1"; |
7130 | ||
4e10a5a7 | 7131 | case E_V8HImode: |
83349046 SB |
7132 | return "vspltish %0,%1"; |
7133 | ||
4e10a5a7 | 7134 | case E_V16QImode: |
83349046 SB |
7135 | return "vspltisb %0,%1"; |
7136 | ||
7137 | default: | |
7138 | gcc_unreachable (); | |
7139 | } | |
7140 | } | |
7141 | ||
7142 | gcc_assert (TARGET_SPE); | |
7143 | ||
7144 | /* Vector constant 0 is handled as a splitter of V2SI, and in the | |
7145 | pattern of V1DI, V4HI, and V2SF. | |
7146 | ||
7147 | FIXME: We should probably return # and add post reload | |
7148 | splitters for these, but this way is so easy ;-). */ | |
7149 | cst = INTVAL (CONST_VECTOR_ELT (vec, 0)); | |
7150 | cst2 = INTVAL (CONST_VECTOR_ELT (vec, 1)); | |
7151 | operands[1] = CONST_VECTOR_ELT (vec, 0); | |
7152 | operands[2] = CONST_VECTOR_ELT (vec, 1); | |
7153 | if (cst == cst2) | |
7154 | return "li %0,%1\n\tevmergelo %0,%0,%0"; | |
7155 | else if (WORDS_BIG_ENDIAN) | |
7156 | return "li %0,%1\n\tevmergelo %0,%0,%0\n\tli %0,%2"; | |
7157 | else | |
7158 | return "li %0,%2\n\tevmergelo %0,%0,%0\n\tli %0,%1"; | |
7159 | } | |
7160 | ||
7161 | /* Initialize TARGET of vector PAIRED to VALS. */ | |
7162 | ||
7163 | void | |
7164 | paired_expand_vector_init (rtx target, rtx vals) | |
7165 | { | |
7166 | machine_mode mode = GET_MODE (target); | |
7167 | int n_elts = GET_MODE_NUNITS (mode); | |
7168 | int n_var = 0; | |
7169 | rtx x, new_rtx, tmp, constant_op, op1, op2; | |
7170 | int i; | |
7171 | ||
7172 | for (i = 0; i < n_elts; ++i) | |
7173 | { | |
7174 | x = XVECEXP (vals, 0, i); | |
7175 | if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x))) | |
7176 | ++n_var; | |
7177 | } | |
7178 | if (n_var == 0) | |
7179 | { | |
7180 | /* Load from constant pool. */ | |
7181 | emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0))); | |
7182 | return; | |
7183 | } | |
7184 | ||
7185 | if (n_var == 2) | |
7186 | { | |
7187 | /* The vector is initialized only with non-constants. */ | |
7188 | new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, XVECEXP (vals, 0, 0), | |
7189 | XVECEXP (vals, 0, 1)); | |
7190 | ||
7191 | emit_move_insn (target, new_rtx); | |
7192 | return; | |
7193 | } | |
7194 | ||
7195 | /* One field is non-constant and the other one is a constant. Load the | |
7196 | constant from the constant pool and use ps_merge instruction to | |
7197 | construct the whole vector. */ | |
7198 | op1 = XVECEXP (vals, 0, 0); | |
7199 | op2 = XVECEXP (vals, 0, 1); | |
7200 | ||
7201 | constant_op = (CONSTANT_P (op1)) ? op1 : op2; | |
7202 | ||
7203 | tmp = gen_reg_rtx (GET_MODE (constant_op)); | |
7204 | emit_move_insn (tmp, constant_op); | |
7205 | ||
7206 | if (CONSTANT_P (op1)) | |
7207 | new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, tmp, op2); | |
7208 | else | |
7209 | new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, op1, tmp); | |
7210 | ||
7211 | emit_move_insn (target, new_rtx); | |
7212 | } | |
7213 | ||
7214 | void | |
7215 | paired_expand_vector_move (rtx operands[]) | |
7216 | { | |
7217 | rtx op0 = operands[0], op1 = operands[1]; | |
7218 | ||
7219 | emit_move_insn (op0, op1); | |
7220 | } | |
7221 | ||
7222 | /* Emit vector compare for code RCODE. DEST is destination, OP1 and | |
7223 | OP2 are two VEC_COND_EXPR operands, CC_OP0 and CC_OP1 are the two | |
7224 | operands for the relation operation COND. This is a recursive | |
7225 | function. */ | |
7226 | ||
7227 | static void | |
7228 | paired_emit_vector_compare (enum rtx_code rcode, | |
7229 | rtx dest, rtx op0, rtx op1, | |
7230 | rtx cc_op0, rtx cc_op1) | |
7231 | { | |
7232 | rtx tmp = gen_reg_rtx (V2SFmode); | |
7233 | rtx tmp1, max, min; | |
7234 | ||
7235 | gcc_assert (TARGET_PAIRED_FLOAT); | |
7236 | gcc_assert (GET_MODE (op0) == GET_MODE (op1)); | |
7237 | ||
7238 | switch (rcode) | |
7239 | { | |
7240 | case LT: | |
7241 | case LTU: | |
7242 | paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1); | |
7243 | return; | |
7244 | case GE: | |
7245 | case GEU: | |
7246 | emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1)); | |
7247 | emit_insn (gen_selv2sf4 (dest, tmp, op0, op1, CONST0_RTX (SFmode))); | |
7248 | return; | |
7249 | case LE: | |
7250 | case LEU: | |
7251 | paired_emit_vector_compare (GE, dest, op0, op1, cc_op1, cc_op0); | |
7252 | return; | |
7253 | case GT: | |
7254 | paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1); | |
7255 | return; | |
7256 | case EQ: | |
7257 | tmp1 = gen_reg_rtx (V2SFmode); | |
7258 | max = gen_reg_rtx (V2SFmode); | |
7259 | min = gen_reg_rtx (V2SFmode); | |
7260 | gen_reg_rtx (V2SFmode); | |
7261 | ||
7262 | emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1)); | |
7263 | emit_insn (gen_selv2sf4 | |
7264 | (max, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode))); | |
7265 | emit_insn (gen_subv2sf3 (tmp, cc_op1, cc_op0)); | |
7266 | emit_insn (gen_selv2sf4 | |
7267 | (min, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode))); | |
7268 | emit_insn (gen_subv2sf3 (tmp1, min, max)); | |
7269 | emit_insn (gen_selv2sf4 (dest, tmp1, op0, op1, CONST0_RTX (SFmode))); | |
7270 | return; | |
7271 | case NE: | |
7272 | paired_emit_vector_compare (EQ, dest, op1, op0, cc_op0, cc_op1); | |
7273 | return; | |
7274 | case UNLE: | |
7275 | paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1); | |
7276 | return; | |
7277 | case UNLT: | |
7278 | paired_emit_vector_compare (LT, dest, op1, op0, cc_op0, cc_op1); | |
7279 | return; | |
7280 | case UNGE: | |
7281 | paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1); | |
7282 | return; | |
7283 | case UNGT: | |
7284 | paired_emit_vector_compare (GT, dest, op1, op0, cc_op0, cc_op1); | |
7285 | return; | |
7286 | default: | |
7287 | gcc_unreachable (); | |
7288 | } | |
7289 | ||
7290 | return; | |
7291 | } | |
7292 | ||
7293 | /* Emit vector conditional expression. | |
7294 | DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands. | |
7295 | CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */ | |
7296 | ||
7297 | int | |
7298 | paired_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2, | |
7299 | rtx cond, rtx cc_op0, rtx cc_op1) | |
7300 | { | |
7301 | enum rtx_code rcode = GET_CODE (cond); | |
7302 | ||
7303 | if (!TARGET_PAIRED_FLOAT) | |
7304 | return 0; | |
7305 | ||
7306 | paired_emit_vector_compare (rcode, dest, op1, op2, cc_op0, cc_op1); | |
7307 | ||
7308 | return 1; | |
7309 | } | |
7310 | ||
7311 | /* Initialize vector TARGET to VALS. */ | |
7312 | ||
7313 | void | |
7314 | rs6000_expand_vector_init (rtx target, rtx vals) | |
7315 | { | |
7316 | machine_mode mode = GET_MODE (target); | |
7317 | machine_mode inner_mode = GET_MODE_INNER (mode); | |
7318 | int n_elts = GET_MODE_NUNITS (mode); | |
7319 | int n_var = 0, one_var = -1; | |
7320 | bool all_same = true, all_const_zero = true; | |
7321 | rtx x, mem; | |
7322 | int i; | |
7323 | ||
7324 | for (i = 0; i < n_elts; ++i) | |
7325 | { | |
7326 | x = XVECEXP (vals, 0, i); | |
7327 | if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x))) | |
7328 | ++n_var, one_var = i; | |
7329 | else if (x != CONST0_RTX (inner_mode)) | |
7330 | all_const_zero = false; | |
7331 | ||
7332 | if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0))) | |
7333 | all_same = false; | |
7334 | } | |
7335 | ||
7336 | if (n_var == 0) | |
7337 | { | |
7338 | rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)); | |
7339 | bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT); | |
7340 | if ((int_vector_p || TARGET_VSX) && all_const_zero) | |
7341 | { | |
7342 | /* Zero register. */ | |
7343 | emit_move_insn (target, CONST0_RTX (mode)); | |
7344 | return; | |
7345 | } | |
7346 | else if (int_vector_p && easy_vector_constant (const_vec, mode)) | |
7347 | { | |
7348 | /* Splat immediate. */ | |
7349 | emit_insn (gen_rtx_SET (target, const_vec)); | |
7350 | return; | |
7351 | } | |
7352 | else | |
7353 | { | |
7354 | /* Load from constant pool. */ | |
7355 | emit_move_insn (target, const_vec); | |
7356 | return; | |
7357 | } | |
7358 | } | |
7359 | ||
7360 | /* Double word values on VSX can use xxpermdi or lxvdsx. */ | |
7361 | if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode)) | |
7362 | { | |
7363 | rtx op[2]; | |
7364 | size_t i; | |
7365 | size_t num_elements = all_same ? 1 : 2; | |
7366 | for (i = 0; i < num_elements; i++) | |
7367 | { | |
7368 | op[i] = XVECEXP (vals, 0, i); | |
7369 | /* Just in case there is a SUBREG with a smaller mode, do a | |
7370 | conversion. */ | |
7371 | if (GET_MODE (op[i]) != inner_mode) | |
7372 | { | |
7373 | rtx tmp = gen_reg_rtx (inner_mode); | |
7374 | convert_move (tmp, op[i], 0); | |
7375 | op[i] = tmp; | |
7376 | } | |
7377 | /* Allow load with splat double word. */ | |
7378 | else if (MEM_P (op[i])) | |
7379 | { | |
7380 | if (!all_same) | |
7381 | op[i] = force_reg (inner_mode, op[i]); | |
7382 | } | |
7383 | else if (!REG_P (op[i])) | |
7384 | op[i] = force_reg (inner_mode, op[i]); | |
7385 | } | |
7386 | ||
7387 | if (all_same) | |
7388 | { | |
7389 | if (mode == V2DFmode) | |
7390 | emit_insn (gen_vsx_splat_v2df (target, op[0])); | |
7391 | else | |
7392 | emit_insn (gen_vsx_splat_v2di (target, op[0])); | |
7393 | } | |
7394 | else | |
7395 | { | |
7396 | if (mode == V2DFmode) | |
7397 | emit_insn (gen_vsx_concat_v2df (target, op[0], op[1])); | |
7398 | else | |
7399 | emit_insn (gen_vsx_concat_v2di (target, op[0], op[1])); | |
7400 | } | |
7401 | return; | |
7402 | } | |
7403 | ||
7404 | /* Special case initializing vector int if we are on 64-bit systems with | |
7405 | direct move or we have the ISA 3.0 instructions. */ | |
7406 | if (mode == V4SImode && VECTOR_MEM_VSX_P (V4SImode) | |
7407 | && TARGET_DIRECT_MOVE_64BIT) | |
7408 | { | |
7409 | if (all_same) | |
7410 | { | |
7411 | rtx element0 = XVECEXP (vals, 0, 0); | |
7412 | if (MEM_P (element0)) | |
7413 | element0 = rs6000_address_for_fpconvert (element0); | |
7414 | else | |
7415 | element0 = force_reg (SImode, element0); | |
7416 | ||
7417 | if (TARGET_P9_VECTOR) | |
7418 | emit_insn (gen_vsx_splat_v4si (target, element0)); | |
7419 | else | |
7420 | { | |
7421 | rtx tmp = gen_reg_rtx (DImode); | |
7422 | emit_insn (gen_zero_extendsidi2 (tmp, element0)); | |
7423 | emit_insn (gen_vsx_splat_v4si_di (target, tmp)); | |
7424 | } | |
7425 | return; | |
7426 | } | |
7427 | else | |
7428 | { | |
7429 | rtx elements[4]; | |
7430 | size_t i; | |
7431 | ||
7432 | for (i = 0; i < 4; i++) | |
7433 | { | |
7434 | elements[i] = XVECEXP (vals, 0, i); | |
7435 | if (!CONST_INT_P (elements[i]) && !REG_P (elements[i])) | |
7436 | elements[i] = copy_to_mode_reg (SImode, elements[i]); | |
7437 | } | |
7438 | ||
7439 | emit_insn (gen_vsx_init_v4si (target, elements[0], elements[1], | |
7440 | elements[2], elements[3])); | |
7441 | return; | |
7442 | } | |
7443 | } | |
7444 | ||
7445 | /* With single precision floating point on VSX, know that internally single | |
7446 | precision is actually represented as a double, and either make 2 V2DF | |
7447 | vectors, and convert these vectors to single precision, or do one | |
7448 | conversion, and splat the result to the other elements. */ | |
7449 | if (mode == V4SFmode && VECTOR_MEM_VSX_P (V4SFmode)) | |
7450 | { | |
7451 | if (all_same) | |
7452 | { | |
7453 | rtx element0 = XVECEXP (vals, 0, 0); | |
7454 | ||
7455 | if (TARGET_P9_VECTOR) | |
7456 | { | |
7457 | if (MEM_P (element0)) | |
7458 | element0 = rs6000_address_for_fpconvert (element0); | |
7459 | ||
7460 | emit_insn (gen_vsx_splat_v4sf (target, element0)); | |
7461 | } | |
7462 | ||
7463 | else | |
7464 | { | |
7465 | rtx freg = gen_reg_rtx (V4SFmode); | |
7466 | rtx sreg = force_reg (SFmode, element0); | |
7467 | rtx cvt = (TARGET_XSCVDPSPN | |
7468 | ? gen_vsx_xscvdpspn_scalar (freg, sreg) | |
7469 | : gen_vsx_xscvdpsp_scalar (freg, sreg)); | |
7470 | ||
7471 | emit_insn (cvt); | |
7472 | emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg, | |
7473 | const0_rtx)); | |
7474 | } | |
7475 | } | |
7476 | else | |
7477 | { | |
7478 | rtx dbl_even = gen_reg_rtx (V2DFmode); | |
7479 | rtx dbl_odd = gen_reg_rtx (V2DFmode); | |
7480 | rtx flt_even = gen_reg_rtx (V4SFmode); | |
7481 | rtx flt_odd = gen_reg_rtx (V4SFmode); | |
7482 | rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0)); | |
7483 | rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1)); | |
7484 | rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2)); | |
7485 | rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3)); | |
7486 | ||
7487 | /* Use VMRGEW if we can instead of doing a permute. */ | |
7488 | if (TARGET_P8_VECTOR) | |
7489 | { | |
7490 | emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op2)); | |
7491 | emit_insn (gen_vsx_concat_v2sf (dbl_odd, op1, op3)); | |
7492 | emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even)); | |
7493 | emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd)); | |
7494 | if (BYTES_BIG_ENDIAN) | |
7495 | emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_even, flt_odd)); | |
7496 | else | |
7497 | emit_insn (gen_p8_vmrgew_v4sf_direct (target, flt_odd, flt_even)); | |
7498 | } | |
7499 | else | |
7500 | { | |
7501 | emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1)); | |
7502 | emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3)); | |
7503 | emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even)); | |
7504 | emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd)); | |
7505 | rs6000_expand_extract_even (target, flt_even, flt_odd); | |
7506 | } | |
7507 | } | |
7508 | return; | |
7509 | } | |
7510 | ||
7511 | /* Special case initializing vector short/char that are splats if we are on | |
7512 | 64-bit systems with direct move. */ | |
7513 | if (all_same && TARGET_DIRECT_MOVE_64BIT | |
7514 | && (mode == V16QImode || mode == V8HImode)) | |
7515 | { | |
7516 | rtx op0 = XVECEXP (vals, 0, 0); | |
7517 | rtx di_tmp = gen_reg_rtx (DImode); | |
7518 | ||
7519 | if (!REG_P (op0)) | |
7520 | op0 = force_reg (GET_MODE_INNER (mode), op0); | |
7521 | ||
7522 | if (mode == V16QImode) | |
7523 | { | |
7524 | emit_insn (gen_zero_extendqidi2 (di_tmp, op0)); | |
7525 | emit_insn (gen_vsx_vspltb_di (target, di_tmp)); | |
7526 | return; | |
7527 | } | |
7528 | ||
7529 | if (mode == V8HImode) | |
7530 | { | |
7531 | emit_insn (gen_zero_extendhidi2 (di_tmp, op0)); | |
7532 | emit_insn (gen_vsx_vsplth_di (target, di_tmp)); | |
7533 | return; | |
7534 | } | |
7535 | } | |
7536 | ||
7537 | /* Store value to stack temp. Load vector element. Splat. However, splat | |
7538 | of 64-bit items is not supported on Altivec. */ | |
7539 | if (all_same && GET_MODE_SIZE (inner_mode) <= 4) | |
7540 | { | |
7541 | mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode)); | |
7542 | emit_move_insn (adjust_address_nv (mem, inner_mode, 0), | |
7543 | XVECEXP (vals, 0, 0)); | |
7544 | x = gen_rtx_UNSPEC (VOIDmode, | |
7545 | gen_rtvec (1, const0_rtx), UNSPEC_LVE); | |
7546 | emit_insn (gen_rtx_PARALLEL (VOIDmode, | |
7547 | gen_rtvec (2, | |
7548 | gen_rtx_SET (target, mem), | |
7549 | x))); | |
7550 | x = gen_rtx_VEC_SELECT (inner_mode, target, | |
7551 | gen_rtx_PARALLEL (VOIDmode, | |
7552 | gen_rtvec (1, const0_rtx))); | |
7553 | emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x))); | |
7554 | return; | |
7555 | } | |
7556 | ||
7557 | /* One field is non-constant. Load constant then overwrite | |
7558 | varying field. */ | |
7559 | if (n_var == 1) | |
7560 | { | |
7561 | rtx copy = copy_rtx (vals); | |
7562 | ||
7563 | /* Load constant part of vector, substitute neighboring value for | |
7564 | varying element. */ | |
7565 | XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts); | |
7566 | rs6000_expand_vector_init (target, copy); | |
7567 | ||
7568 | /* Insert variable. */ | |
7569 | rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var), one_var); | |
7570 | return; | |
7571 | } | |
7572 | ||
7573 | /* Construct the vector in memory one field at a time | |
7574 | and load the whole vector. */ | |
7575 | mem = assign_stack_temp (mode, GET_MODE_SIZE (mode)); | |
7576 | for (i = 0; i < n_elts; i++) | |
7577 | emit_move_insn (adjust_address_nv (mem, inner_mode, | |
7578 | i * GET_MODE_SIZE (inner_mode)), | |
7579 | XVECEXP (vals, 0, i)); | |
7580 | emit_move_insn (target, mem); | |
7581 | } | |
7582 | ||
7583 | /* Set field ELT of TARGET to VAL. */ | |
7584 | ||
7585 | void | |
7586 | rs6000_expand_vector_set (rtx target, rtx val, int elt) | |
7587 | { | |
7588 | machine_mode mode = GET_MODE (target); | |
7589 | machine_mode inner_mode = GET_MODE_INNER (mode); | |
7590 | rtx reg = gen_reg_rtx (mode); | |
7591 | rtx mask, mem, x; | |
7592 | int width = GET_MODE_SIZE (inner_mode); | |
7593 | int i; | |
7594 | ||
7595 | val = force_reg (GET_MODE (val), val); | |
7596 | ||
7597 | if (VECTOR_MEM_VSX_P (mode)) | |
7598 | { | |
7599 | rtx insn = NULL_RTX; | |
7600 | rtx elt_rtx = GEN_INT (elt); | |
7601 | ||
7602 | if (mode == V2DFmode) | |
7603 | insn = gen_vsx_set_v2df (target, target, val, elt_rtx); | |
7604 | ||
7605 | else if (mode == V2DImode) | |
7606 | insn = gen_vsx_set_v2di (target, target, val, elt_rtx); | |
7607 | ||
7608 | else if (TARGET_P9_VECTOR && TARGET_VSX_SMALL_INTEGER | |
7609 | && TARGET_UPPER_REGS_DI && TARGET_POWERPC64) | |
7610 | { | |
7611 | if (mode == V4SImode) | |
7612 | insn = gen_vsx_set_v4si_p9 (target, target, val, elt_rtx); | |
7613 | else if (mode == V8HImode) | |
7614 | insn = gen_vsx_set_v8hi_p9 (target, target, val, elt_rtx); | |
7615 | else if (mode == V16QImode) | |
7616 | insn = gen_vsx_set_v16qi_p9 (target, target, val, elt_rtx); | |
7617 | } | |
7618 | ||
7619 | if (insn) | |
7620 | { | |
7621 | emit_insn (insn); | |
7622 | return; | |
7623 | } | |
7624 | } | |
7625 | ||
7626 | /* Simplify setting single element vectors like V1TImode. */ | |
7627 | if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode) && elt == 0) | |
7628 | { | |
7629 | emit_move_insn (target, gen_lowpart (mode, val)); | |
7630 | return; | |
7631 | } | |
7632 | ||
7633 | /* Load single variable value. */ | |
7634 | mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode)); | |
7635 | emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val); | |
7636 | x = gen_rtx_UNSPEC (VOIDmode, | |
7637 | gen_rtvec (1, const0_rtx), UNSPEC_LVE); | |
7638 | emit_insn (gen_rtx_PARALLEL (VOIDmode, | |
7639 | gen_rtvec (2, | |
7640 | gen_rtx_SET (reg, mem), | |
7641 | x))); | |
7642 | ||
7643 | /* Linear sequence. */ | |
7644 | mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16)); | |
7645 | for (i = 0; i < 16; ++i) | |
7646 | XVECEXP (mask, 0, i) = GEN_INT (i); | |
7647 | ||
7648 | /* Set permute mask to insert element into target. */ | |
7649 | for (i = 0; i < width; ++i) | |
7650 | XVECEXP (mask, 0, elt*width + i) | |
7651 | = GEN_INT (i + 0x10); | |
7652 | x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0)); | |
7653 | ||
7654 | if (BYTES_BIG_ENDIAN) | |
7655 | x = gen_rtx_UNSPEC (mode, | |
7656 | gen_rtvec (3, target, reg, | |
7657 | force_reg (V16QImode, x)), | |
7658 | UNSPEC_VPERM); | |
7659 | else | |
7660 | { | |
7661 | if (TARGET_P9_VECTOR) | |
7662 | x = gen_rtx_UNSPEC (mode, | |
7663 | gen_rtvec (3, target, reg, | |
7664 | force_reg (V16QImode, x)), | |
7665 | UNSPEC_VPERMR); | |
7666 | else | |
7667 | { | |
7668 | /* Invert selector. We prefer to generate VNAND on P8 so | |
7669 | that future fusion opportunities can kick in, but must | |
7670 | generate VNOR elsewhere. */ | |
7671 | rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x)); | |
7672 | rtx iorx = (TARGET_P8_VECTOR | |
7673 | ? gen_rtx_IOR (V16QImode, notx, notx) | |
7674 | : gen_rtx_AND (V16QImode, notx, notx)); | |
7675 | rtx tmp = gen_reg_rtx (V16QImode); | |
7676 | emit_insn (gen_rtx_SET (tmp, iorx)); | |
7677 | ||
7678 | /* Permute with operands reversed and adjusted selector. */ | |
7679 | x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp), | |
7680 | UNSPEC_VPERM); | |
7681 | } | |
7682 | } | |
7683 | ||
7684 | emit_insn (gen_rtx_SET (target, x)); | |
7685 | } | |
7686 | ||
7687 | /* Extract field ELT from VEC into TARGET. */ | |
7688 | ||
7689 | void | |
7690 | rs6000_expand_vector_extract (rtx target, rtx vec, rtx elt) | |
7691 | { | |
7692 | machine_mode mode = GET_MODE (vec); | |
7693 | machine_mode inner_mode = GET_MODE_INNER (mode); | |
7694 | rtx mem; | |
7695 | ||
7696 | if (VECTOR_MEM_VSX_P (mode) && CONST_INT_P (elt)) | |
7697 | { | |
7698 | switch (mode) | |
7699 | { | |
7700 | default: | |
7701 | break; | |
4e10a5a7 | 7702 | case E_V1TImode: |
83349046 SB |
7703 | gcc_assert (INTVAL (elt) == 0 && inner_mode == TImode); |
7704 | emit_move_insn (target, gen_lowpart (TImode, vec)); | |
7705 | break; | |
4e10a5a7 | 7706 | case E_V2DFmode: |
83349046 SB |
7707 | emit_insn (gen_vsx_extract_v2df (target, vec, elt)); |
7708 | return; | |
4e10a5a7 | 7709 | case E_V2DImode: |
83349046 SB |
7710 | emit_insn (gen_vsx_extract_v2di (target, vec, elt)); |
7711 | return; | |
4e10a5a7 | 7712 | case E_V4SFmode: |
83349046 SB |
7713 | emit_insn (gen_vsx_extract_v4sf (target, vec, elt)); |
7714 | return; | |
4e10a5a7 | 7715 | case E_V16QImode: |
83349046 SB |
7716 | if (TARGET_DIRECT_MOVE_64BIT) |
7717 | { | |
7718 | emit_insn (gen_vsx_extract_v16qi (target, vec, elt)); | |
7719 | return; | |
7720 | } | |
7721 | else | |
7722 | break; | |
4e10a5a7 | 7723 | case E_V8HImode: |
83349046 SB |
7724 | if (TARGET_DIRECT_MOVE_64BIT) |
7725 | { | |
7726 | emit_insn (gen_vsx_extract_v8hi (target, vec, elt)); | |
7727 | return; | |
7728 | } | |
7729 | else | |
7730 | break; | |
4e10a5a7 | 7731 | case E_V4SImode: |
83349046 SB |
7732 | if (TARGET_DIRECT_MOVE_64BIT) |
7733 | { | |
7734 | emit_insn (gen_vsx_extract_v4si (target, vec, elt)); | |
7735 | return; | |
7736 | } | |
7737 | break; | |
7738 | } | |
7739 | } | |
7740 | else if (VECTOR_MEM_VSX_P (mode) && !CONST_INT_P (elt) | |
7741 | && TARGET_DIRECT_MOVE_64BIT) | |
7742 | { | |
7743 | if (GET_MODE (elt) != DImode) | |
7744 | { | |
7745 | rtx tmp = gen_reg_rtx (DImode); | |
7746 | convert_move (tmp, elt, 0); | |
7747 | elt = tmp; | |
7748 | } | |
7749 | else if (!REG_P (elt)) | |
7750 | elt = force_reg (DImode, elt); | |
7751 | ||
7752 | switch (mode) | |
7753 | { | |
4e10a5a7 | 7754 | case E_V2DFmode: |
83349046 SB |
7755 | emit_insn (gen_vsx_extract_v2df_var (target, vec, elt)); |
7756 | return; | |
7757 | ||
4e10a5a7 | 7758 | case E_V2DImode: |
83349046 SB |
7759 | emit_insn (gen_vsx_extract_v2di_var (target, vec, elt)); |
7760 | return; | |
7761 | ||
4e10a5a7 | 7762 | case E_V4SFmode: |
83349046 SB |
7763 | emit_insn (gen_vsx_extract_v4sf_var (target, vec, elt)); |
7764 | return; | |
7765 | ||
4e10a5a7 | 7766 | case E_V4SImode: |
83349046 SB |
7767 | emit_insn (gen_vsx_extract_v4si_var (target, vec, elt)); |
7768 | return; | |
7769 | ||
4e10a5a7 | 7770 | case E_V8HImode: |
83349046 SB |
7771 | emit_insn (gen_vsx_extract_v8hi_var (target, vec, elt)); |
7772 | return; | |
7773 | ||
4e10a5a7 | 7774 | case E_V16QImode: |
83349046 SB |
7775 | emit_insn (gen_vsx_extract_v16qi_var (target, vec, elt)); |
7776 | return; | |
7777 | ||
7778 | default: | |
7779 | gcc_unreachable (); | |
7780 | } | |
7781 | } | |
7782 | ||
7783 | gcc_assert (CONST_INT_P (elt)); | |
7784 | ||
7785 | /* Allocate mode-sized buffer. */ | |
7786 | mem = assign_stack_temp (mode, GET_MODE_SIZE (mode)); | |
7787 | ||
7788 | emit_move_insn (mem, vec); | |
7789 | ||
7790 | /* Add offset to field within buffer matching vector element. */ | |
7791 | mem = adjust_address_nv (mem, inner_mode, | |
7792 | INTVAL (elt) * GET_MODE_SIZE (inner_mode)); | |
7793 | ||
7794 | emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0)); | |
7795 | } | |
7796 | ||
7797 | /* Helper function to return the register number of a RTX. */ | |
7798 | static inline int | |
7799 | regno_or_subregno (rtx op) | |
7800 | { | |
7801 | if (REG_P (op)) | |
7802 | return REGNO (op); | |
7803 | else if (SUBREG_P (op)) | |
7804 | return subreg_regno (op); | |
7805 | else | |
7806 | gcc_unreachable (); | |
7807 | } | |
7808 | ||
7809 | /* Adjust a memory address (MEM) of a vector type to point to a scalar field | |
7810 | within the vector (ELEMENT) with a mode (SCALAR_MODE). Use a base register | |
7811 | temporary (BASE_TMP) to fixup the address. Return the new memory address | |
7812 | that is valid for reads or writes to a given register (SCALAR_REG). */ | |
7813 | ||
7814 | rtx | |
7815 | rs6000_adjust_vec_address (rtx scalar_reg, | |
7816 | rtx mem, | |
7817 | rtx element, | |
7818 | rtx base_tmp, | |
7819 | machine_mode scalar_mode) | |
7820 | { | |
7821 | unsigned scalar_size = GET_MODE_SIZE (scalar_mode); | |
7822 | rtx addr = XEXP (mem, 0); | |
7823 | rtx element_offset; | |
7824 | rtx new_addr; | |
7825 | bool valid_addr_p; | |
7826 | ||
7827 | /* Vector addresses should not have PRE_INC, PRE_DEC, or PRE_MODIFY. */ | |
7828 | gcc_assert (GET_RTX_CLASS (GET_CODE (addr)) != RTX_AUTOINC); | |
7829 | ||
7830 | /* Calculate what we need to add to the address to get the element | |
7831 | address. */ | |
7832 | if (CONST_INT_P (element)) | |
7833 | element_offset = GEN_INT (INTVAL (element) * scalar_size); | |
7834 | else | |
7835 | { | |
7836 | int byte_shift = exact_log2 (scalar_size); | |
7837 | gcc_assert (byte_shift >= 0); | |
7838 | ||
7839 | if (byte_shift == 0) | |
7840 | element_offset = element; | |
7841 | ||
7842 | else | |
7843 | { | |
7844 | if (TARGET_POWERPC64) | |
7845 | emit_insn (gen_ashldi3 (base_tmp, element, GEN_INT (byte_shift))); | |
7846 | else | |
7847 | emit_insn (gen_ashlsi3 (base_tmp, element, GEN_INT (byte_shift))); | |
7848 | ||
7849 | element_offset = base_tmp; | |
7850 | } | |
7851 | } | |
7852 | ||
7853 | /* Create the new address pointing to the element within the vector. If we | |
7854 | are adding 0, we don't have to change the address. */ | |
7855 | if (element_offset == const0_rtx) | |
7856 | new_addr = addr; | |
7857 | ||
7858 | /* A simple indirect address can be converted into a reg + offset | |
7859 | address. */ | |
7860 | else if (REG_P (addr) || SUBREG_P (addr)) | |
7861 | new_addr = gen_rtx_PLUS (Pmode, addr, element_offset); | |
7862 | ||
7863 | /* Optimize D-FORM addresses with constant offset with a constant element, to | |
7864 | include the element offset in the address directly. */ | |
7865 | else if (GET_CODE (addr) == PLUS) | |
7866 | { | |
7867 | rtx op0 = XEXP (addr, 0); | |
7868 | rtx op1 = XEXP (addr, 1); | |
7869 | rtx insn; | |
7870 | ||
7871 | gcc_assert (REG_P (op0) || SUBREG_P (op0)); | |
7872 | if (CONST_INT_P (op1) && CONST_INT_P (element_offset)) | |
7873 | { | |
7874 | HOST_WIDE_INT offset = INTVAL (op1) + INTVAL (element_offset); | |
7875 | rtx offset_rtx = GEN_INT (offset); | |
7876 | ||
7877 | if (IN_RANGE (offset, -32768, 32767) | |
7878 | && (scalar_size < 8 || (offset & 0x3) == 0)) | |
7879 | new_addr = gen_rtx_PLUS (Pmode, op0, offset_rtx); | |
7880 | else | |
7881 | { | |
7882 | emit_move_insn (base_tmp, offset_rtx); | |
7883 | new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp); | |
7884 | } | |
7885 | } | |
7886 | else | |
7887 | { | |
7888 | bool op1_reg_p = (REG_P (op1) || SUBREG_P (op1)); | |
7889 | bool ele_reg_p = (REG_P (element_offset) || SUBREG_P (element_offset)); | |
7890 | ||
7891 | /* Note, ADDI requires the register being added to be a base | |
7892 | register. If the register was R0, load it up into the temporary | |
7893 | and do the add. */ | |
7894 | if (op1_reg_p | |
7895 | && (ele_reg_p || reg_or_subregno (op1) != FIRST_GPR_REGNO)) | |
7896 | { | |
7897 | insn = gen_add3_insn (base_tmp, op1, element_offset); | |
7898 | gcc_assert (insn != NULL_RTX); | |
7899 | emit_insn (insn); | |
7900 | } | |
7901 | ||
7902 | else if (ele_reg_p | |
7903 | && reg_or_subregno (element_offset) != FIRST_GPR_REGNO) | |
7904 | { | |
7905 | insn = gen_add3_insn (base_tmp, element_offset, op1); | |
7906 | gcc_assert (insn != NULL_RTX); | |
7907 | emit_insn (insn); | |
7908 | } | |
7909 | ||
7910 | else | |
7911 | { | |
7912 | emit_move_insn (base_tmp, op1); | |
7913 | emit_insn (gen_add2_insn (base_tmp, element_offset)); | |
7914 | } | |
7915 | ||
7916 | new_addr = gen_rtx_PLUS (Pmode, op0, base_tmp); | |
7917 | } | |
7918 | } | |
7919 | ||
7920 | else | |
7921 | { | |
7922 | emit_move_insn (base_tmp, addr); | |
7923 | new_addr = gen_rtx_PLUS (Pmode, base_tmp, element_offset); | |
7924 | } | |
7925 | ||
7926 | /* If we have a PLUS, we need to see whether the particular register class | |
7927 | allows for D-FORM or X-FORM addressing. */ | |
7928 | if (GET_CODE (new_addr) == PLUS) | |
7929 | { | |
7930 | rtx op1 = XEXP (new_addr, 1); | |
7931 | addr_mask_type addr_mask; | |
7932 | int scalar_regno = regno_or_subregno (scalar_reg); | |
7933 | ||
7934 | gcc_assert (scalar_regno < FIRST_PSEUDO_REGISTER); | |
7935 | if (INT_REGNO_P (scalar_regno)) | |
7936 | addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_GPR]; | |
7937 | ||
7938 | else if (FP_REGNO_P (scalar_regno)) | |
7939 | addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_FPR]; | |
7940 | ||
7941 | else if (ALTIVEC_REGNO_P (scalar_regno)) | |
7942 | addr_mask = reg_addr[scalar_mode].addr_mask[RELOAD_REG_VMX]; | |
7943 | ||
7944 | else | |
7945 | gcc_unreachable (); | |
7946 | ||
7947 | if (REG_P (op1) || SUBREG_P (op1)) | |
7948 | valid_addr_p = (addr_mask & RELOAD_REG_INDEXED) != 0; | |
7949 | else | |
7950 | valid_addr_p = (addr_mask & RELOAD_REG_OFFSET) != 0; | |
7951 | } | |
7952 | ||
7953 | else if (REG_P (new_addr) || SUBREG_P (new_addr)) | |
7954 | valid_addr_p = true; | |
7955 | ||
7956 | else | |
7957 | valid_addr_p = false; | |
7958 | ||
7959 | if (!valid_addr_p) | |
7960 | { | |
7961 | emit_move_insn (base_tmp, new_addr); | |
7962 | new_addr = base_tmp; | |
7963 | } | |
7964 | ||
7965 | return change_address (mem, scalar_mode, new_addr); | |
7966 | } | |
7967 | ||
7968 | /* Split a variable vec_extract operation into the component instructions. */ | |
7969 | ||
7970 | void | |
7971 | rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr, | |
7972 | rtx tmp_altivec) | |
7973 | { | |
7974 | machine_mode mode = GET_MODE (src); | |
7975 | machine_mode scalar_mode = GET_MODE (dest); | |
7976 | unsigned scalar_size = GET_MODE_SIZE (scalar_mode); | |
7977 | int byte_shift = exact_log2 (scalar_size); | |
7978 | ||
7979 | gcc_assert (byte_shift >= 0); | |
7980 | ||
7981 | /* If we are given a memory address, optimize to load just the element. We | |
7982 | don't have to adjust the vector element number on little endian | |
7983 | systems. */ | |
7984 | if (MEM_P (src)) | |
7985 | { | |
7986 | gcc_assert (REG_P (tmp_gpr)); | |
7987 | emit_move_insn (dest, rs6000_adjust_vec_address (dest, src, element, | |
7988 | tmp_gpr, scalar_mode)); | |
7989 | return; | |
7990 | } | |
7991 | ||
7992 | else if (REG_P (src) || SUBREG_P (src)) | |
7993 | { | |
7994 | int bit_shift = byte_shift + 3; | |
7995 | rtx element2; | |
7996 | int dest_regno = regno_or_subregno (dest); | |
7997 | int src_regno = regno_or_subregno (src); | |
7998 | int element_regno = regno_or_subregno (element); | |
7999 | ||
8000 | gcc_assert (REG_P (tmp_gpr)); | |
8001 | ||
8002 | /* See if we want to generate VEXTU{B,H,W}{L,R}X if the destination is in | |
8003 | a general purpose register. */ | |
8004 | if (TARGET_P9_VECTOR | |
8005 | && (mode == V16QImode || mode == V8HImode || mode == V4SImode) | |
8006 | && INT_REGNO_P (dest_regno) | |
8007 | && ALTIVEC_REGNO_P (src_regno) | |
8008 | && INT_REGNO_P (element_regno)) | |
8009 | { | |
8010 | rtx dest_si = gen_rtx_REG (SImode, dest_regno); | |
8011 | rtx element_si = gen_rtx_REG (SImode, element_regno); | |
8012 | ||
8013 | if (mode == V16QImode) | |
8014 | emit_insn (VECTOR_ELT_ORDER_BIG | |
8015 | ? gen_vextublx (dest_si, element_si, src) | |
8016 | : gen_vextubrx (dest_si, element_si, src)); | |
8017 | ||
8018 | else if (mode == V8HImode) | |
8019 | { | |
8020 | rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr)); | |
8021 | emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const1_rtx)); | |
8022 | emit_insn (VECTOR_ELT_ORDER_BIG | |
8023 | ? gen_vextuhlx (dest_si, tmp_gpr_si, src) | |
8024 | : gen_vextuhrx (dest_si, tmp_gpr_si, src)); | |
8025 | } | |
8026 | ||
8027 | ||
8028 | else | |
8029 | { | |
8030 | rtx tmp_gpr_si = gen_rtx_REG (SImode, REGNO (tmp_gpr)); | |
8031 | emit_insn (gen_ashlsi3 (tmp_gpr_si, element_si, const2_rtx)); | |
8032 | emit_insn (VECTOR_ELT_ORDER_BIG | |
8033 | ? gen_vextuwlx (dest_si, tmp_gpr_si, src) | |
8034 | : gen_vextuwrx (dest_si, tmp_gpr_si, src)); | |
8035 | } | |
8036 | ||
8037 | return; | |
8038 | } | |
8039 | ||
8040 | ||
8041 | gcc_assert (REG_P (tmp_altivec)); | |
8042 | ||
8043 | /* For little endian, adjust element ordering. For V2DI/V2DF, we can use | |
8044 | an XOR, otherwise we need to subtract. The shift amount is so VSLO | |
8045 | will shift the element into the upper position (adding 3 to convert a | |
8046 | byte shift into a bit shift). */ | |
8047 | if (scalar_size == 8) | |
8048 | { | |
8049 | if (!VECTOR_ELT_ORDER_BIG) | |
8050 | { | |
8051 | emit_insn (gen_xordi3 (tmp_gpr, element, const1_rtx)); | |
8052 | element2 = tmp_gpr; | |
8053 | } | |
8054 | else | |
8055 | element2 = element; | |
8056 | ||
8057 | /* Generate RLDIC directly to shift left 6 bits and retrieve 1 | |
8058 | bit. */ | |
8059 | emit_insn (gen_rtx_SET (tmp_gpr, | |
8060 | gen_rtx_AND (DImode, | |
8061 | gen_rtx_ASHIFT (DImode, | |
8062 | element2, | |
8063 | GEN_INT (6)), | |
8064 | GEN_INT (64)))); | |
8065 | } | |
8066 | else | |
8067 | { | |
8068 | if (!VECTOR_ELT_ORDER_BIG) | |
8069 | { | |
8070 | rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (mode) - 1); | |
8071 | ||
8072 | emit_insn (gen_anddi3 (tmp_gpr, element, num_ele_m1)); | |
8073 | emit_insn (gen_subdi3 (tmp_gpr, num_ele_m1, tmp_gpr)); | |
8074 | element2 = tmp_gpr; | |
8075 | } | |
8076 | else | |
8077 | element2 = element; | |
8078 | ||
8079 | emit_insn (gen_ashldi3 (tmp_gpr, element2, GEN_INT (bit_shift))); | |
8080 | } | |
8081 | ||
8082 | /* Get the value into the lower byte of the Altivec register where VSLO | |
8083 | expects it. */ | |
8084 | if (TARGET_P9_VECTOR) | |
8085 | emit_insn (gen_vsx_splat_v2di (tmp_altivec, tmp_gpr)); | |
8086 | else if (can_create_pseudo_p ()) | |
8087 | emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_gpr, tmp_gpr)); | |
8088 | else | |
8089 | { | |
8090 | rtx tmp_di = gen_rtx_REG (DImode, REGNO (tmp_altivec)); | |
8091 | emit_move_insn (tmp_di, tmp_gpr); | |
8092 | emit_insn (gen_vsx_concat_v2di (tmp_altivec, tmp_di, tmp_di)); | |
8093 | } | |
8094 | ||
8095 | /* Do the VSLO to get the value into the final location. */ | |
8096 | switch (mode) | |
8097 | { | |
4e10a5a7 | 8098 | case E_V2DFmode: |
83349046 SB |
8099 | emit_insn (gen_vsx_vslo_v2df (dest, src, tmp_altivec)); |
8100 | return; | |
8101 | ||
4e10a5a7 | 8102 | case E_V2DImode: |
83349046 SB |
8103 | emit_insn (gen_vsx_vslo_v2di (dest, src, tmp_altivec)); |
8104 | return; | |
8105 | ||
4e10a5a7 | 8106 | case E_V4SFmode: |
83349046 SB |
8107 | { |
8108 | rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec)); | |
8109 | rtx tmp_altivec_v4sf = gen_rtx_REG (V4SFmode, REGNO (tmp_altivec)); | |
8110 | rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src)); | |
8111 | emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di, | |
8112 | tmp_altivec)); | |
8113 | ||
8114 | emit_insn (gen_vsx_xscvspdp_scalar2 (dest, tmp_altivec_v4sf)); | |
8115 | return; | |
8116 | } | |
8117 | ||
4e10a5a7 RS |
8118 | case E_V4SImode: |
8119 | case E_V8HImode: | |
8120 | case E_V16QImode: | |
83349046 SB |
8121 | { |
8122 | rtx tmp_altivec_di = gen_rtx_REG (DImode, REGNO (tmp_altivec)); | |
8123 | rtx src_v2di = gen_rtx_REG (V2DImode, REGNO (src)); | |
8124 | rtx tmp_gpr_di = gen_rtx_REG (DImode, REGNO (dest)); | |
8125 | emit_insn (gen_vsx_vslo_v2di (tmp_altivec_di, src_v2di, | |
8126 | tmp_altivec)); | |
8127 | emit_move_insn (tmp_gpr_di, tmp_altivec_di); | |
8128 | emit_insn (gen_ashrdi3 (tmp_gpr_di, tmp_gpr_di, | |
8129 | GEN_INT (64 - (8 * scalar_size)))); | |
8130 | return; | |
8131 | } | |
8132 | ||
8133 | default: | |
8134 | gcc_unreachable (); | |
8135 | } | |
8136 | ||
8137 | return; | |
8138 | } | |
8139 | else | |
8140 | gcc_unreachable (); | |
8141 | } | |
8142 | ||
8143 | /* Helper function for rs6000_split_v4si_init to build up a DImode value from | |
8144 | two SImode values. */ | |
8145 | ||
8146 | static void | |
8147 | rs6000_split_v4si_init_di_reg (rtx dest, rtx si1, rtx si2, rtx tmp) | |
8148 | { | |
8149 | const unsigned HOST_WIDE_INT mask_32bit = HOST_WIDE_INT_C (0xffffffff); | |
8150 | ||
8151 | if (CONST_INT_P (si1) && CONST_INT_P (si2)) | |
8152 | { | |
8153 | unsigned HOST_WIDE_INT const1 = (UINTVAL (si1) & mask_32bit) << 32; | |
8154 | unsigned HOST_WIDE_INT const2 = UINTVAL (si2) & mask_32bit; | |
8155 | ||
8156 | emit_move_insn (dest, GEN_INT (const1 | const2)); | |
8157 | return; | |
8158 | } | |
8159 | ||
8160 | /* Put si1 into upper 32-bits of dest. */ | |
8161 | if (CONST_INT_P (si1)) | |
8162 | emit_move_insn (dest, GEN_INT ((UINTVAL (si1) & mask_32bit) << 32)); | |
8163 | else | |
8164 | { | |
8165 | /* Generate RLDIC. */ | |
8166 | rtx si1_di = gen_rtx_REG (DImode, regno_or_subregno (si1)); | |
8167 | rtx shift_rtx = gen_rtx_ASHIFT (DImode, si1_di, GEN_INT (32)); | |
8168 | rtx mask_rtx = GEN_INT (mask_32bit << 32); | |
8169 | rtx and_rtx = gen_rtx_AND (DImode, shift_rtx, mask_rtx); | |
8170 | gcc_assert (!reg_overlap_mentioned_p (dest, si1)); | |
8171 | emit_insn (gen_rtx_SET (dest, and_rtx)); | |
8172 | } | |
8173 | ||
8174 | /* Put si2 into the temporary. */ | |
8175 | gcc_assert (!reg_overlap_mentioned_p (dest, tmp)); | |
8176 | if (CONST_INT_P (si2)) | |
8177 | emit_move_insn (tmp, GEN_INT (UINTVAL (si2) & mask_32bit)); | |
8178 | else | |
8179 | emit_insn (gen_zero_extendsidi2 (tmp, si2)); | |
8180 | ||
8181 | /* Combine the two parts. */ | |
8182 | emit_insn (gen_iordi3 (dest, dest, tmp)); | |
8183 | return; | |
8184 | } | |
8185 | ||
8186 | /* Split a V4SI initialization. */ | |
8187 | ||
8188 | void | |
8189 | rs6000_split_v4si_init (rtx operands[]) | |
8190 | { | |
8191 | rtx dest = operands[0]; | |
8192 | ||
8193 | /* Destination is a GPR, build up the two DImode parts in place. */ | |
8194 | if (REG_P (dest) || SUBREG_P (dest)) | |
8195 | { | |
8196 | int d_regno = regno_or_subregno (dest); | |
8197 | rtx scalar1 = operands[1]; | |
8198 | rtx scalar2 = operands[2]; | |
8199 | rtx scalar3 = operands[3]; | |
8200 | rtx scalar4 = operands[4]; | |
8201 | rtx tmp1 = operands[5]; | |
8202 | rtx tmp2 = operands[6]; | |
8203 | ||
8204 | /* Even though we only need one temporary (plus the destination, which | |
8205 | has an early clobber constraint, try to use two temporaries, one for | |
8206 | each double word created. That way the 2nd insn scheduling pass can | |
8207 | rearrange things so the two parts are done in parallel. */ | |
8208 | if (BYTES_BIG_ENDIAN) | |
8209 | { | |
8210 | rtx di_lo = gen_rtx_REG (DImode, d_regno); | |
8211 | rtx di_hi = gen_rtx_REG (DImode, d_regno + 1); | |
8212 | rs6000_split_v4si_init_di_reg (di_lo, scalar1, scalar2, tmp1); | |
8213 | rs6000_split_v4si_init_di_reg (di_hi, scalar3, scalar4, tmp2); | |
8214 | } | |
8215 | else | |
8216 | { | |
8217 | rtx di_lo = gen_rtx_REG (DImode, d_regno + 1); | |
8218 | rtx di_hi = gen_rtx_REG (DImode, d_regno); | |
8219 | gcc_assert (!VECTOR_ELT_ORDER_BIG); | |
8220 | rs6000_split_v4si_init_di_reg (di_lo, scalar4, scalar3, tmp1); | |
8221 | rs6000_split_v4si_init_di_reg (di_hi, scalar2, scalar1, tmp2); | |
8222 | } | |
8223 | return; | |
8224 | } | |
8225 | ||
8226 | else | |
8227 | gcc_unreachable (); | |
8228 | } | |
8229 | ||
8230 | /* Return TRUE if OP is an invalid SUBREG operation on the e500. */ | |
8231 | ||
8232 | bool | |
8233 | invalid_e500_subreg (rtx op, machine_mode mode) | |
8234 | { | |
8235 | if (TARGET_E500_DOUBLE) | |
8236 | { | |
8237 | /* Reject (subreg:SI (reg:DF)); likewise with subreg:DI or | |
8238 | subreg:TI and reg:TF. Decimal float modes are like integer | |
8239 | modes (only low part of each register used) for this | |
8240 | purpose. */ | |
8241 | if (GET_CODE (op) == SUBREG | |
8242 | && (mode == SImode || mode == DImode || mode == TImode | |
8243 | || mode == DDmode || mode == TDmode || mode == PTImode) | |
8244 | && REG_P (SUBREG_REG (op)) | |
8245 | && (GET_MODE (SUBREG_REG (op)) == DFmode | |
8246 | || GET_MODE (SUBREG_REG (op)) == TFmode | |
8247 | || GET_MODE (SUBREG_REG (op)) == IFmode | |
8248 | || GET_MODE (SUBREG_REG (op)) == KFmode)) | |
8249 | return true; | |
8250 | ||
8251 | /* Reject (subreg:DF (reg:DI)); likewise with subreg:TF and | |
8252 | reg:TI. */ | |
8253 | if (GET_CODE (op) == SUBREG | |
8254 | && (mode == DFmode || mode == TFmode || mode == IFmode | |
8255 | || mode == KFmode) | |
8256 | && REG_P (SUBREG_REG (op)) | |
8257 | && (GET_MODE (SUBREG_REG (op)) == DImode | |
8258 | || GET_MODE (SUBREG_REG (op)) == TImode | |
8259 | || GET_MODE (SUBREG_REG (op)) == PTImode | |
8260 | || GET_MODE (SUBREG_REG (op)) == DDmode | |
8261 | || GET_MODE (SUBREG_REG (op)) == TDmode)) | |
8262 | return true; | |
8263 | } | |
8264 | ||
8265 | if (TARGET_SPE | |
8266 | && GET_CODE (op) == SUBREG | |
8267 | && mode == SImode | |
8268 | && REG_P (SUBREG_REG (op)) | |
8269 | && SPE_VECTOR_MODE (GET_MODE (SUBREG_REG (op)))) | |
8270 | return true; | |
8271 | ||
8272 | return false; | |
8273 | } | |
8274 | ||
8275 | /* Return alignment of TYPE. Existing alignment is ALIGN. HOW | |
8276 | selects whether the alignment is abi mandated, optional, or | |
8277 | both abi and optional alignment. */ | |
8278 | ||
8279 | unsigned int | |
8280 | rs6000_data_alignment (tree type, unsigned int align, enum data_align how) | |
8281 | { | |
8282 | if (how != align_opt) | |
8283 | { | |
8284 | if (TREE_CODE (type) == VECTOR_TYPE) | |
8285 | { | |
8286 | if ((TARGET_SPE && SPE_VECTOR_MODE (TYPE_MODE (type))) | |
8287 | || (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (TYPE_MODE (type)))) | |
8288 | { | |
8289 | if (align < 64) | |
8290 | align = 64; | |
8291 | } | |
8292 | else if (align < 128) | |
8293 | align = 128; | |
8294 | } | |
8295 | else if (TARGET_E500_DOUBLE | |
8296 | && TREE_CODE (type) == REAL_TYPE | |
8297 | && TYPE_MODE (type) == DFmode) | |
8298 | { | |
8299 | if (align < 64) | |
8300 | align = 64; | |
8301 | } | |
8302 | } | |
8303 | ||
8304 | if (how != align_abi) | |
8305 | { | |
8306 | if (TREE_CODE (type) == ARRAY_TYPE | |
8307 | && TYPE_MODE (TREE_TYPE (type)) == QImode) | |
8308 | { | |
8309 | if (align < BITS_PER_WORD) | |
8310 | align = BITS_PER_WORD; | |
8311 | } | |
8312 | } | |
8313 | ||
8314 | return align; | |
8315 | } | |
8316 | ||
e0bd6c9f RS |
8317 | /* Implement TARGET_SLOW_UNALIGNED_ACCESS. Altivec vector memory |
8318 | instructions simply ignore the low bits; SPE vector memory | |
8319 | instructions trap on unaligned accesses; VSX memory instructions are | |
8320 | aligned to 4 or 8 bytes. */ | |
8321 | ||
8322 | static bool | |
8323 | rs6000_slow_unaligned_access (machine_mode mode, unsigned int align) | |
8324 | { | |
8325 | return (STRICT_ALIGNMENT | |
8326 | || (!TARGET_EFFICIENT_UNALIGNED_VSX | |
8327 | && ((SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && align < 32) | |
8328 | || ((VECTOR_MODE_P (mode) || FLOAT128_VECTOR_P (mode)) | |
8329 | && (int) align < VECTOR_ALIGN (mode))))); | |
8330 | } | |
8331 | ||
83349046 SB |
8332 | /* Previous GCC releases forced all vector types to have 16-byte alignment. */ |
8333 | ||
8334 | bool | |
8335 | rs6000_special_adjust_field_align_p (tree type, unsigned int computed) | |
8336 | { | |
8337 | if (TARGET_ALTIVEC && TREE_CODE (type) == VECTOR_TYPE) | |
8338 | { | |
8339 | if (computed != 128) | |
8340 | { | |
8341 | static bool warned; | |
8342 | if (!warned && warn_psabi) | |
8343 | { | |
8344 | warned = true; | |
8345 | inform (input_location, | |
8346 | "the layout of aggregates containing vectors with" | |
8347 | " %d-byte alignment has changed in GCC 5", | |
8348 | computed / BITS_PER_UNIT); | |
8349 | } | |
8350 | } | |
8351 | /* In current GCC there is no special case. */ | |
8352 | return false; | |
8353 | } | |
8354 | ||
8355 | return false; | |
8356 | } | |
8357 | ||
8358 | /* AIX increases natural record alignment to doubleword if the first | |
8359 | field is an FP double while the FP fields remain word aligned. */ | |
8360 | ||
8361 | unsigned int | |
8362 | rs6000_special_round_type_align (tree type, unsigned int computed, | |
8363 | unsigned int specified) | |
8364 | { | |
8365 | unsigned int align = MAX (computed, specified); | |
8366 | tree field = TYPE_FIELDS (type); | |
8367 | ||
8368 | /* Skip all non field decls */ | |
8369 | while (field != NULL && TREE_CODE (field) != FIELD_DECL) | |
8370 | field = DECL_CHAIN (field); | |
8371 | ||
8372 | if (field != NULL && field != type) | |
8373 | { | |
8374 | type = TREE_TYPE (field); | |
8375 | while (TREE_CODE (type) == ARRAY_TYPE) | |
8376 | type = TREE_TYPE (type); | |
8377 | ||
8378 | if (type != error_mark_node && TYPE_MODE (type) == DFmode) | |
8379 | align = MAX (align, 64); | |
8380 | } | |
8381 | ||
8382 | return align; | |
8383 | } | |
8384 | ||
8385 | /* Darwin increases record alignment to the natural alignment of | |
8386 | the first field. */ | |
8387 | ||
8388 | unsigned int | |
8389 | darwin_rs6000_special_round_type_align (tree type, unsigned int computed, | |
8390 | unsigned int specified) | |
8391 | { | |
8392 | unsigned int align = MAX (computed, specified); | |
8393 | ||
8394 | if (TYPE_PACKED (type)) | |
8395 | return align; | |
8396 | ||
8397 | /* Find the first field, looking down into aggregates. */ | |
8398 | do { | |
8399 | tree field = TYPE_FIELDS (type); | |
8400 | /* Skip all non field decls */ | |
8401 | while (field != NULL && TREE_CODE (field) != FIELD_DECL) | |
8402 | field = DECL_CHAIN (field); | |
8403 | if (! field) | |
8404 | break; | |
8405 | /* A packed field does not contribute any extra alignment. */ | |
8406 | if (DECL_PACKED (field)) | |
8407 | return align; | |
8408 | type = TREE_TYPE (field); | |
8409 | while (TREE_CODE (type) == ARRAY_TYPE) | |
8410 | type = TREE_TYPE (type); | |
8411 | } while (AGGREGATE_TYPE_P (type)); | |
8412 | ||
8413 | if (! AGGREGATE_TYPE_P (type) && type != error_mark_node) | |
8414 | align = MAX (align, TYPE_ALIGN (type)); | |
8415 | ||
8416 | return align; | |
8417 | } | |
8418 | ||
8419 | /* Return 1 for an operand in small memory on V.4/eabi. */ | |
8420 | ||
8421 | int | |
8422 | small_data_operand (rtx op ATTRIBUTE_UNUSED, | |
8423 | machine_mode mode ATTRIBUTE_UNUSED) | |
8424 | { | |
8425 | #if TARGET_ELF | |
8426 | rtx sym_ref; | |
8427 | ||
8428 | if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA) | |
8429 | return 0; | |
8430 | ||
8431 | if (DEFAULT_ABI != ABI_V4) | |
8432 | return 0; | |
8433 | ||
8434 | /* Vector and float memory instructions have a limited offset on the | |
8435 | SPE, so using a vector or float variable directly as an operand is | |
8436 | not useful. */ | |
8437 | if (TARGET_SPE | |
8438 | && (SPE_VECTOR_MODE (mode) || FLOAT_MODE_P (mode))) | |
8439 | return 0; | |
8440 | ||
8441 | if (GET_CODE (op) == SYMBOL_REF) | |
8442 | sym_ref = op; | |
8443 | ||
8444 | else if (GET_CODE (op) != CONST | |
8445 | || GET_CODE (XEXP (op, 0)) != PLUS | |
8446 | || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF | |
8447 | || GET_CODE (XEXP (XEXP (op, 0), 1)) != CONST_INT) | |
8448 | return 0; | |
8449 | ||
8450 | else | |
8451 | { | |
8452 | rtx sum = XEXP (op, 0); | |
8453 | HOST_WIDE_INT summand; | |
8454 | ||
8455 | /* We have to be careful here, because it is the referenced address | |
8456 | that must be 32k from _SDA_BASE_, not just the symbol. */ | |
8457 | summand = INTVAL (XEXP (sum, 1)); | |
8458 | if (summand < 0 || summand > g_switch_value) | |
8459 | return 0; | |
8460 | ||
8461 | sym_ref = XEXP (sum, 0); | |
8462 | } | |
8463 | ||
8464 | return SYMBOL_REF_SMALL_P (sym_ref); | |
8465 | #else | |
8466 | return 0; | |
8467 | #endif | |
8468 | } | |
8469 | ||
8470 | /* Return true if either operand is a general purpose register. */ | |
8471 | ||
8472 | bool | |
8473 | gpr_or_gpr_p (rtx op0, rtx op1) | |
8474 | { | |
8475 | return ((REG_P (op0) && INT_REGNO_P (REGNO (op0))) | |
8476 | || (REG_P (op1) && INT_REGNO_P (REGNO (op1)))); | |
8477 | } | |
8478 | ||
8479 | /* Return true if this is a move direct operation between GPR registers and | |
8480 | floating point/VSX registers. */ | |
8481 | ||
8482 | bool | |
8483 | direct_move_p (rtx op0, rtx op1) | |
8484 | { | |
8485 | int regno0, regno1; | |
8486 | ||
8487 | if (!REG_P (op0) || !REG_P (op1)) | |
8488 | return false; | |
8489 | ||
8490 | if (!TARGET_DIRECT_MOVE && !TARGET_MFPGPR) | |
8491 | return false; | |
8492 | ||
8493 | regno0 = REGNO (op0); | |
8494 | regno1 = REGNO (op1); | |
8495 | if (regno0 >= FIRST_PSEUDO_REGISTER || regno1 >= FIRST_PSEUDO_REGISTER) | |
8496 | return false; | |
8497 | ||
8498 | if (INT_REGNO_P (regno0)) | |
8499 | return (TARGET_DIRECT_MOVE) ? VSX_REGNO_P (regno1) : FP_REGNO_P (regno1); | |
8500 | ||
8501 | else if (INT_REGNO_P (regno1)) | |
8502 | { | |
8503 | if (TARGET_MFPGPR && FP_REGNO_P (regno0)) | |
8504 | return true; | |
8505 | ||
8506 | else if (TARGET_DIRECT_MOVE && VSX_REGNO_P (regno0)) | |
8507 | return true; | |
8508 | } | |
8509 | ||
8510 | return false; | |
8511 | } | |
8512 | ||
8513 | /* Return true if the OFFSET is valid for the quad address instructions that | |
8514 | use d-form (register + offset) addressing. */ | |
8515 | ||
8516 | static inline bool | |
8517 | quad_address_offset_p (HOST_WIDE_INT offset) | |
8518 | { | |
8519 | return (IN_RANGE (offset, -32768, 32767) && ((offset) & 0xf) == 0); | |
8520 | } | |
8521 | ||
8522 | /* Return true if the ADDR is an acceptable address for a quad memory | |
8523 | operation of mode MODE (either LQ/STQ for general purpose registers, or | |
8524 | LXV/STXV for vector registers under ISA 3.0. GPR_P is true if this address | |
8525 | is intended for LQ/STQ. If it is false, the address is intended for the ISA | |
8526 | 3.0 LXV/STXV instruction. */ | |
8527 | ||
8528 | bool | |
8529 | quad_address_p (rtx addr, machine_mode mode, bool strict) | |
8530 | { | |
8531 | rtx op0, op1; | |
8532 | ||
8533 | if (GET_MODE_SIZE (mode) != 16) | |
8534 | return false; | |
8535 | ||
8536 | if (legitimate_indirect_address_p (addr, strict)) | |
8537 | return true; | |
8538 | ||
8539 | if (VECTOR_MODE_P (mode) && !mode_supports_vsx_dform_quad (mode)) | |
8540 | return false; | |
8541 | ||
8542 | if (GET_CODE (addr) != PLUS) | |
8543 | return false; | |
8544 | ||
8545 | op0 = XEXP (addr, 0); | |
8546 | if (!REG_P (op0) || !INT_REG_OK_FOR_BASE_P (op0, strict)) | |
8547 | return false; | |
8548 | ||
8549 | op1 = XEXP (addr, 1); | |
8550 | if (!CONST_INT_P (op1)) | |
8551 | return false; | |
8552 | ||
8553 | return quad_address_offset_p (INTVAL (op1)); | |
8554 | } | |
8555 | ||
8556 | /* Return true if this is a load or store quad operation. This function does | |
8557 | not handle the atomic quad memory instructions. */ | |
8558 | ||
8559 | bool | |
8560 | quad_load_store_p (rtx op0, rtx op1) | |
8561 | { | |
8562 | bool ret; | |
8563 | ||
8564 | if (!TARGET_QUAD_MEMORY) | |
8565 | ret = false; | |
8566 | ||
8567 | else if (REG_P (op0) && MEM_P (op1)) | |
8568 | ret = (quad_int_reg_operand (op0, GET_MODE (op0)) | |
8569 | && quad_memory_operand (op1, GET_MODE (op1)) | |
8570 | && !reg_overlap_mentioned_p (op0, op1)); | |
8571 | ||
8572 | else if (MEM_P (op0) && REG_P (op1)) | |
8573 | ret = (quad_memory_operand (op0, GET_MODE (op0)) | |
8574 | && quad_int_reg_operand (op1, GET_MODE (op1))); | |
8575 | ||
8576 | else | |
8577 | ret = false; | |
8578 | ||
8579 | if (TARGET_DEBUG_ADDR) | |
8580 | { | |
8581 | fprintf (stderr, "\n========== quad_load_store, return %s\n", | |
8582 | ret ? "true" : "false"); | |
8583 | debug_rtx (gen_rtx_SET (op0, op1)); | |
8584 | } | |
8585 | ||
8586 | return ret; | |
8587 | } | |
8588 | ||
8589 | /* Given an address, return a constant offset term if one exists. */ | |
8590 | ||
8591 | static rtx | |
8592 | address_offset (rtx op) | |
8593 | { | |
8594 | if (GET_CODE (op) == PRE_INC | |
8595 | || GET_CODE (op) == PRE_DEC) | |
8596 | op = XEXP (op, 0); | |
8597 | else if (GET_CODE (op) == PRE_MODIFY | |
8598 | || GET_CODE (op) == LO_SUM) | |
8599 | op = XEXP (op, 1); | |
8600 | ||
8601 | if (GET_CODE (op) == CONST) | |
8602 | op = XEXP (op, 0); | |
8603 | ||
8604 | if (GET_CODE (op) == PLUS) | |
8605 | op = XEXP (op, 1); | |
8606 | ||
8607 | if (CONST_INT_P (op)) | |
8608 | return op; | |
8609 | ||
8610 | return NULL_RTX; | |
8611 | } | |
8612 | ||
8613 | /* Return true if the MEM operand is a memory operand suitable for use | |
8614 | with a (full width, possibly multiple) gpr load/store. On | |
8615 | powerpc64 this means the offset must be divisible by 4. | |
8616 | Implements 'Y' constraint. | |
8617 | ||
8618 | Accept direct, indexed, offset, lo_sum and tocref. Since this is | |
8619 | a constraint function we know the operand has satisfied a suitable | |
8620 | memory predicate. Also accept some odd rtl generated by reload | |
8621 | (see rs6000_legitimize_reload_address for various forms). It is | |
8622 | important that reload rtl be accepted by appropriate constraints | |
8623 | but not by the operand predicate. | |
8624 | ||
8625 | Offsetting a lo_sum should not be allowed, except where we know by | |
8626 | alignment that a 32k boundary is not crossed, but see the ??? | |
8627 | comment in rs6000_legitimize_reload_address. Note that by | |
8628 | "offsetting" here we mean a further offset to access parts of the | |
8629 | MEM. It's fine to have a lo_sum where the inner address is offset | |
8630 | from a sym, since the same sym+offset will appear in the high part | |
8631 | of the address calculation. */ | |
8632 | ||
8633 | bool | |
8634 | mem_operand_gpr (rtx op, machine_mode mode) | |
8635 | { | |
8636 | unsigned HOST_WIDE_INT offset; | |
8637 | int extra; | |
8638 | rtx addr = XEXP (op, 0); | |
8639 | ||
8640 | op = address_offset (addr); | |
8641 | if (op == NULL_RTX) | |
8642 | return true; | |
8643 | ||
8644 | offset = INTVAL (op); | |
8645 | if (TARGET_POWERPC64 && (offset & 3) != 0) | |
8646 | return false; | |
8647 | ||
8648 | extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD; | |
8649 | if (extra < 0) | |
8650 | extra = 0; | |
8651 | ||
8652 | if (GET_CODE (addr) == LO_SUM) | |
8653 | /* For lo_sum addresses, we must allow any offset except one that | |
8654 | causes a wrap, so test only the low 16 bits. */ | |
8655 | offset = ((offset & 0xffff) ^ 0x8000) - 0x8000; | |
8656 | ||
8657 | return offset + 0x8000 < 0x10000u - extra; | |
8658 | } | |
8659 | ||
8660 | /* As above, but for DS-FORM VSX insns. Unlike mem_operand_gpr, | |
8661 | enforce an offset divisible by 4 even for 32-bit. */ | |
8662 | ||
8663 | bool | |
8664 | mem_operand_ds_form (rtx op, machine_mode mode) | |
8665 | { | |
8666 | unsigned HOST_WIDE_INT offset; | |
8667 | int extra; | |
8668 | rtx addr = XEXP (op, 0); | |
8669 | ||
8670 | if (!offsettable_address_p (false, mode, addr)) | |
8671 | return false; | |
8672 | ||
8673 | op = address_offset (addr); | |
8674 | if (op == NULL_RTX) | |
8675 | return true; | |
8676 | ||
8677 | offset = INTVAL (op); | |
8678 | if ((offset & 3) != 0) | |
8679 | return false; | |
8680 | ||
8681 | extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD; | |
8682 | if (extra < 0) | |
8683 | extra = 0; | |
8684 | ||
8685 | if (GET_CODE (addr) == LO_SUM) | |
8686 | /* For lo_sum addresses, we must allow any offset except one that | |
8687 | causes a wrap, so test only the low 16 bits. */ | |
8688 | offset = ((offset & 0xffff) ^ 0x8000) - 0x8000; | |
8689 | ||
8690 | return offset + 0x8000 < 0x10000u - extra; | |
8691 | } | |
8692 | \f | |
8693 | /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */ | |
8694 | ||
8695 | static bool | |
8696 | reg_offset_addressing_ok_p (machine_mode mode) | |
8697 | { | |
8698 | switch (mode) | |
8699 | { | |
4e10a5a7 RS |
8700 | case E_V16QImode: |
8701 | case E_V8HImode: | |
8702 | case E_V4SFmode: | |
8703 | case E_V4SImode: | |
8704 | case E_V2DFmode: | |
8705 | case E_V2DImode: | |
8706 | case E_V1TImode: | |
8707 | case E_TImode: | |
8708 | case E_TFmode: | |
8709 | case E_KFmode: | |
83349046 SB |
8710 | /* AltiVec/VSX vector modes. Only reg+reg addressing was valid until the |
8711 | ISA 3.0 vector d-form addressing mode was added. While TImode is not | |
8712 | a vector mode, if we want to use the VSX registers to move it around, | |
8713 | we need to restrict ourselves to reg+reg addressing. Similarly for | |
8714 | IEEE 128-bit floating point that is passed in a single vector | |
8715 | register. */ | |
8716 | if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)) | |
8717 | return mode_supports_vsx_dform_quad (mode); | |
8718 | break; | |
8719 | ||
4e10a5a7 RS |
8720 | case E_V4HImode: |
8721 | case E_V2SImode: | |
8722 | case E_V1DImode: | |
8723 | case E_V2SFmode: | |
83349046 SB |
8724 | /* Paired vector modes. Only reg+reg addressing is valid. */ |
8725 | if (TARGET_PAIRED_FLOAT) | |
8726 | return false; | |
8727 | break; | |
8728 | ||
4e10a5a7 | 8729 | case E_SDmode: |
83349046 SB |
8730 | /* If we can do direct load/stores of SDmode, restrict it to reg+reg |
8731 | addressing for the LFIWZX and STFIWX instructions. */ | |
8732 | if (TARGET_NO_SDMODE_STACK) | |
8733 | return false; | |
8734 | break; | |
8735 | ||
8736 | default: | |
8737 | break; | |
8738 | } | |
8739 | ||
8740 | return true; | |
8741 | } | |
8742 | ||
8743 | static bool | |
8744 | virtual_stack_registers_memory_p (rtx op) | |
8745 | { | |
8746 | int regnum; | |
8747 | ||
8748 | if (GET_CODE (op) == REG) | |
8749 | regnum = REGNO (op); | |
8750 | ||
8751 | else if (GET_CODE (op) == PLUS | |
8752 | && GET_CODE (XEXP (op, 0)) == REG | |
8753 | && GET_CODE (XEXP (op, 1)) == CONST_INT) | |
8754 | regnum = REGNO (XEXP (op, 0)); | |
8755 | ||
8756 | else | |
8757 | return false; | |
8758 | ||
8759 | return (regnum >= FIRST_VIRTUAL_REGISTER | |
8760 | && regnum <= LAST_VIRTUAL_POINTER_REGISTER); | |
8761 | } | |
8762 | ||
8763 | /* Return true if a MODE sized memory accesses to OP plus OFFSET | |
8764 | is known to not straddle a 32k boundary. This function is used | |
8765 | to determine whether -mcmodel=medium code can use TOC pointer | |
8766 | relative addressing for OP. This means the alignment of the TOC | |
8767 | pointer must also be taken into account, and unfortunately that is | |
8768 | only 8 bytes. */ | |
8769 | ||
8770 | #ifndef POWERPC64_TOC_POINTER_ALIGNMENT | |
8771 | #define POWERPC64_TOC_POINTER_ALIGNMENT 8 | |
8772 | #endif | |
8773 | ||
8774 | static bool | |
8775 | offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset, | |
8776 | machine_mode mode) | |
8777 | { | |
8778 | tree decl; | |
8779 | unsigned HOST_WIDE_INT dsize, dalign, lsb, mask; | |
8780 | ||
8781 | if (GET_CODE (op) != SYMBOL_REF) | |
8782 | return false; | |
8783 | ||
8784 | /* ISA 3.0 vector d-form addressing is restricted, don't allow | |
8785 | SYMBOL_REF. */ | |
8786 | if (mode_supports_vsx_dform_quad (mode)) | |
8787 | return false; | |
8788 | ||
8789 | dsize = GET_MODE_SIZE (mode); | |
8790 | decl = SYMBOL_REF_DECL (op); | |
8791 | if (!decl) | |
8792 | { | |
8793 | if (dsize == 0) | |
8794 | return false; | |
8795 | ||
8796 | /* -fsection-anchors loses the original SYMBOL_REF_DECL when | |
8797 | replacing memory addresses with an anchor plus offset. We | |
8798 | could find the decl by rummaging around in the block->objects | |
8799 | VEC for the given offset but that seems like too much work. */ | |
8800 | dalign = BITS_PER_UNIT; | |
8801 | if (SYMBOL_REF_HAS_BLOCK_INFO_P (op) | |
8802 | && SYMBOL_REF_ANCHOR_P (op) | |
8803 | && SYMBOL_REF_BLOCK (op) != NULL) | |
8804 | { | |
8805 | struct object_block *block = SYMBOL_REF_BLOCK (op); | |
8806 | ||
8807 | dalign = block->alignment; | |
8808 | offset += SYMBOL_REF_BLOCK_OFFSET (op); | |
8809 | } | |
8810 | else if (CONSTANT_POOL_ADDRESS_P (op)) | |
8811 | { | |
8812 | /* It would be nice to have get_pool_align().. */ | |
8813 | machine_mode cmode = get_pool_mode (op); | |
8814 | ||
8815 | dalign = GET_MODE_ALIGNMENT (cmode); | |
8816 | } | |
8817 | } | |
8818 | else if (DECL_P (decl)) | |
8819 | { | |
8820 | dalign = DECL_ALIGN (decl); | |
8821 | ||
8822 | if (dsize == 0) | |
8823 | { | |
8824 | /* Allow BLKmode when the entire object is known to not | |
8825 | cross a 32k boundary. */ | |
8826 | if (!DECL_SIZE_UNIT (decl)) | |
8827 | return false; | |
8828 | ||
8829 | if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl))) | |
8830 | return false; | |
8831 | ||
8832 | dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl)); | |
8833 | if (dsize > 32768) | |
8834 | return false; | |
8835 | ||
8836 | dalign /= BITS_PER_UNIT; | |
8837 | if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT) | |
8838 | dalign = POWERPC64_TOC_POINTER_ALIGNMENT; | |
8839 | return dalign >= dsize; | |
8840 | } | |
8841 | } | |
8842 | else | |
8843 | gcc_unreachable (); | |
8844 | ||
8845 | /* Find how many bits of the alignment we know for this access. */ | |
8846 | dalign /= BITS_PER_UNIT; | |
8847 | if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT) | |
8848 | dalign = POWERPC64_TOC_POINTER_ALIGNMENT; | |
8849 | mask = dalign - 1; | |
8850 | lsb = offset & -offset; | |
8851 | mask &= lsb - 1; | |
8852 | dalign = mask + 1; | |
8853 | ||
8854 | return dalign >= dsize; | |
8855 | } | |
8856 | ||
8857 | static bool | |
8858 | constant_pool_expr_p (rtx op) | |
8859 | { | |
8860 | rtx base, offset; | |
8861 | ||
8862 | split_const (op, &base, &offset); | |
8863 | return (GET_CODE (base) == SYMBOL_REF | |
8864 | && CONSTANT_POOL_ADDRESS_P (base) | |
8865 | && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode)); | |
8866 | } | |
8867 | ||
8868 | static const_rtx tocrel_base, tocrel_offset; | |
8869 | ||
8870 | /* Return true if OP is a toc pointer relative address (the output | |
8871 | of create_TOC_reference). If STRICT, do not match non-split | |
8872 | -mcmodel=large/medium toc pointer relative addresses. */ | |
8873 | ||
8874 | bool | |
8875 | toc_relative_expr_p (const_rtx op, bool strict) | |
8876 | { | |
8877 | if (!TARGET_TOC) | |
8878 | return false; | |
8879 | ||
8880 | if (TARGET_CMODEL != CMODEL_SMALL) | |
8881 | { | |
8882 | /* When strict ensure we have everything tidy. */ | |
8883 | if (strict | |
8884 | && !(GET_CODE (op) == LO_SUM | |
8885 | && REG_P (XEXP (op, 0)) | |
8886 | && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict))) | |
8887 | return false; | |
8888 | ||
8889 | /* When not strict, allow non-split TOC addresses and also allow | |
8890 | (lo_sum (high ..)) TOC addresses created during reload. */ | |
8891 | if (GET_CODE (op) == LO_SUM) | |
8892 | op = XEXP (op, 1); | |
8893 | } | |
8894 | ||
8895 | tocrel_base = op; | |
8896 | tocrel_offset = const0_rtx; | |
8897 | if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op))) | |
8898 | { | |
8899 | tocrel_base = XEXP (op, 0); | |
8900 | tocrel_offset = XEXP (op, 1); | |
8901 | } | |
8902 | ||
8903 | return (GET_CODE (tocrel_base) == UNSPEC | |
8904 | && XINT (tocrel_base, 1) == UNSPEC_TOCREL); | |
8905 | } | |
8906 | ||
8907 | /* Return true if X is a constant pool address, and also for cmodel=medium | |
8908 | if X is a toc-relative address known to be offsettable within MODE. */ | |
8909 | ||
8910 | bool | |
8911 | legitimate_constant_pool_address_p (const_rtx x, machine_mode mode, | |
8912 | bool strict) | |
8913 | { | |
8914 | return (toc_relative_expr_p (x, strict) | |
8915 | && (TARGET_CMODEL != CMODEL_MEDIUM | |
8916 | || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0)) | |
8917 | || mode == QImode | |
8918 | || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0), | |
8919 | INTVAL (tocrel_offset), mode))); | |
8920 | } | |
8921 | ||
8922 | static bool | |
8923 | legitimate_small_data_p (machine_mode mode, rtx x) | |
8924 | { | |
8925 | return (DEFAULT_ABI == ABI_V4 | |
8926 | && !flag_pic && !TARGET_TOC | |
8927 | && (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST) | |
8928 | && small_data_operand (x, mode)); | |
8929 | } | |
8930 | ||
8931 | /* SPE offset addressing is limited to 5-bits worth of double words. */ | |
8932 | #define SPE_CONST_OFFSET_OK(x) (((x) & ~0xf8) == 0) | |
8933 | ||
8934 | bool | |
8935 | rs6000_legitimate_offset_address_p (machine_mode mode, rtx x, | |
8936 | bool strict, bool worst_case) | |
8937 | { | |
8938 | unsigned HOST_WIDE_INT offset; | |
8939 | unsigned int extra; | |
8940 | ||
8941 | if (GET_CODE (x) != PLUS) | |
8942 | return false; | |
8943 | if (!REG_P (XEXP (x, 0))) | |
8944 | return false; | |
8945 | if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict)) | |
8946 | return false; | |
8947 | if (mode_supports_vsx_dform_quad (mode)) | |
8948 | return quad_address_p (x, mode, strict); | |
8949 | if (!reg_offset_addressing_ok_p (mode)) | |
8950 | return virtual_stack_registers_memory_p (x); | |
8951 | if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress)) | |
8952 | return true; | |
8953 | if (GET_CODE (XEXP (x, 1)) != CONST_INT) | |
8954 | return false; | |
8955 | ||
8956 | offset = INTVAL (XEXP (x, 1)); | |
8957 | extra = 0; | |
8958 | switch (mode) | |
8959 | { | |
4e10a5a7 RS |
8960 | case E_V4HImode: |
8961 | case E_V2SImode: | |
8962 | case E_V1DImode: | |
8963 | case E_V2SFmode: | |
83349046 SB |
8964 | /* SPE vector modes. */ |
8965 | return SPE_CONST_OFFSET_OK (offset); | |
8966 | ||
4e10a5a7 RS |
8967 | case E_DFmode: |
8968 | case E_DDmode: | |
8969 | case E_DImode: | |
83349046 SB |
8970 | /* On e500v2, we may have: |
8971 | ||
8972 | (subreg:DF (mem:DI (plus (reg) (const_int))) 0). | |
8973 | ||
8974 | Which gets addressed with evldd instructions. */ | |
8975 | if (TARGET_E500_DOUBLE) | |
8976 | return SPE_CONST_OFFSET_OK (offset); | |
8977 | ||
8978 | /* If we are using VSX scalar loads, restrict ourselves to reg+reg | |
8979 | addressing. */ | |
8980 | if (VECTOR_MEM_VSX_P (mode)) | |
8981 | return false; | |
8982 | ||
8983 | if (!worst_case) | |
8984 | break; | |
8985 | if (!TARGET_POWERPC64) | |
8986 | extra = 4; | |
8987 | else if (offset & 3) | |
8988 | return false; | |
8989 | break; | |
8990 | ||
4e10a5a7 RS |
8991 | case E_TFmode: |
8992 | case E_IFmode: | |
8993 | case E_KFmode: | |
8994 | case E_TDmode: | |
8995 | case E_TImode: | |
8996 | case E_PTImode: | |
83349046 SB |
8997 | if (TARGET_E500_DOUBLE) |
8998 | return (SPE_CONST_OFFSET_OK (offset) | |
8999 | && SPE_CONST_OFFSET_OK (offset + 8)); | |
9000 | ||
9001 | extra = 8; | |
9002 | if (!worst_case) | |
9003 | break; | |
9004 | if (!TARGET_POWERPC64) | |
9005 | extra = 12; | |
9006 | else if (offset & 3) | |
9007 | return false; | |
9008 | break; | |
9009 | ||
9010 | default: | |
9011 | break; | |
9012 | } | |
9013 | ||
9014 | offset += 0x8000; | |
9015 | return offset < 0x10000 - extra; | |
9016 | } | |
9017 | ||
9018 | bool | |
9019 | legitimate_indexed_address_p (rtx x, int strict) | |
9020 | { | |
9021 | rtx op0, op1; | |
9022 | ||
9023 | if (GET_CODE (x) != PLUS) | |
9024 | return false; | |
9025 | ||
9026 | op0 = XEXP (x, 0); | |
9027 | op1 = XEXP (x, 1); | |
9028 | ||
9029 | /* Recognize the rtl generated by reload which we know will later be | |
9030 | replaced with proper base and index regs. */ | |
9031 | if (!strict | |
9032 | && reload_in_progress | |
9033 | && (REG_P (op0) || GET_CODE (op0) == PLUS) | |
9034 | && REG_P (op1)) | |
9035 | return true; | |
9036 | ||
9037 | return (REG_P (op0) && REG_P (op1) | |
9038 | && ((INT_REG_OK_FOR_BASE_P (op0, strict) | |
9039 | && INT_REG_OK_FOR_INDEX_P (op1, strict)) | |
9040 | || (INT_REG_OK_FOR_BASE_P (op1, strict) | |
9041 | && INT_REG_OK_FOR_INDEX_P (op0, strict)))); | |
9042 | } | |
9043 | ||
9044 | bool | |
9045 | avoiding_indexed_address_p (machine_mode mode) | |
9046 | { | |
9047 | /* Avoid indexed addressing for modes that have non-indexed | |
9048 | load/store instruction forms. */ | |
9049 | return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode)); | |
9050 | } | |
9051 | ||
9052 | bool | |
9053 | legitimate_indirect_address_p (rtx x, int strict) | |
9054 | { | |
9055 | return GET_CODE (x) == REG && INT_REG_OK_FOR_BASE_P (x, strict); | |
9056 | } | |
9057 | ||
9058 | bool | |
9059 | macho_lo_sum_memory_operand (rtx x, machine_mode mode) | |
9060 | { | |
9061 | if (!TARGET_MACHO || !flag_pic | |
9062 | || mode != SImode || GET_CODE (x) != MEM) | |
9063 | return false; | |
9064 | x = XEXP (x, 0); | |
9065 | ||
9066 | if (GET_CODE (x) != LO_SUM) | |
9067 | return false; | |
9068 | if (GET_CODE (XEXP (x, 0)) != REG) | |
9069 | return false; | |
9070 | if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0)) | |
9071 | return false; | |
9072 | x = XEXP (x, 1); | |
9073 | ||
9074 | return CONSTANT_P (x); | |
9075 | } | |
9076 | ||
9077 | static bool | |
9078 | legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict) | |
9079 | { | |
9080 | if (GET_CODE (x) != LO_SUM) | |
9081 | return false; | |
9082 | if (GET_CODE (XEXP (x, 0)) != REG) | |
9083 | return false; | |
9084 | if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict)) | |
9085 | return false; | |
9086 | /* quad word addresses are restricted, and we can't use LO_SUM. */ | |
9087 | if (mode_supports_vsx_dform_quad (mode)) | |
9088 | return false; | |
9089 | /* Restrict addressing for DI because of our SUBREG hackery. */ | |
9090 | if (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD) | |
9091 | return false; | |
9092 | x = XEXP (x, 1); | |
9093 | ||
9094 | if (TARGET_ELF || TARGET_MACHO) | |
9095 | { | |
9096 | bool large_toc_ok; | |
9097 | ||
9098 | if (DEFAULT_ABI == ABI_V4 && flag_pic) | |
9099 | return false; | |
9100 | /* LRA doesn't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls | |
9101 | push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS | |
9102 | recognizes some LO_SUM addresses as valid although this | |
9103 | function says opposite. In most cases, LRA through different | |
9104 | transformations can generate correct code for address reloads. | |
9105 | It can not manage only some LO_SUM cases. So we need to add | |
9106 | code analogous to one in rs6000_legitimize_reload_address for | |
9107 | LOW_SUM here saying that some addresses are still valid. */ | |
9108 | large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL | |
9109 | && small_toc_ref (x, VOIDmode)); | |
9110 | if (TARGET_TOC && ! large_toc_ok) | |
9111 | return false; | |
9112 | if (GET_MODE_NUNITS (mode) != 1) | |
9113 | return false; | |
9114 | if (GET_MODE_SIZE (mode) > UNITS_PER_WORD | |
9115 | && !(/* ??? Assume floating point reg based on mode? */ | |
9116 | TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT | |
9117 | && (mode == DFmode || mode == DDmode))) | |
9118 | return false; | |
9119 | ||
9120 | return CONSTANT_P (x) || large_toc_ok; | |
9121 | } | |
9122 | ||
9123 | return false; | |
9124 | } | |
9125 | ||
9126 | ||
9127 | /* Try machine-dependent ways of modifying an illegitimate address | |
9128 | to be legitimate. If we find one, return the new, valid address. | |
9129 | This is used from only one place: `memory_address' in explow.c. | |
9130 | ||
9131 | OLDX is the address as it was before break_out_memory_refs was | |
9132 | called. In some cases it is useful to look at this to decide what | |
9133 | needs to be done. | |
9134 | ||
9135 | It is always safe for this function to do nothing. It exists to | |
9136 | recognize opportunities to optimize the output. | |
9137 | ||
9138 | On RS/6000, first check for the sum of a register with a constant | |
9139 | integer that is out of range. If so, generate code to add the | |
9140 | constant with the low-order 16 bits masked to the register and force | |
9141 | this result into another register (this can be done with `cau'). | |
9142 | Then generate an address of REG+(CONST&0xffff), allowing for the | |
9143 | possibility of bit 16 being a one. | |
9144 | ||
9145 | Then check for the sum of a register and something not constant, try to | |
9146 | load the other things into a register and return the sum. */ | |
9147 | ||
9148 | static rtx | |
9149 | rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, | |
9150 | machine_mode mode) | |
9151 | { | |
9152 | unsigned int extra; | |
9153 | ||
9154 | if (!reg_offset_addressing_ok_p (mode) | |
9155 | || mode_supports_vsx_dform_quad (mode)) | |
9156 | { | |
9157 | if (virtual_stack_registers_memory_p (x)) | |
9158 | return x; | |
9159 | ||
9160 | /* In theory we should not be seeing addresses of the form reg+0, | |
9161 | but just in case it is generated, optimize it away. */ | |
9162 | if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx) | |
9163 | return force_reg (Pmode, XEXP (x, 0)); | |
9164 | ||
9165 | /* For TImode with load/store quad, restrict addresses to just a single | |
9166 | pointer, so it works with both GPRs and VSX registers. */ | |
9167 | /* Make sure both operands are registers. */ | |
9168 | else if (GET_CODE (x) == PLUS | |
9169 | && (mode != TImode || !TARGET_VSX_TIMODE)) | |
9170 | return gen_rtx_PLUS (Pmode, | |
9171 | force_reg (Pmode, XEXP (x, 0)), | |
9172 | force_reg (Pmode, XEXP (x, 1))); | |
9173 | else | |
9174 | return force_reg (Pmode, x); | |
9175 | } | |
9176 | if (GET_CODE (x) == SYMBOL_REF) | |
9177 | { | |
9178 | enum tls_model model = SYMBOL_REF_TLS_MODEL (x); | |
9179 | if (model != 0) | |
9180 | return rs6000_legitimize_tls_address (x, model); | |
9181 | } | |
9182 | ||
9183 | extra = 0; | |
9184 | switch (mode) | |
9185 | { | |
4e10a5a7 RS |
9186 | case E_TFmode: |
9187 | case E_TDmode: | |
9188 | case E_TImode: | |
9189 | case E_PTImode: | |
9190 | case E_IFmode: | |
9191 | case E_KFmode: | |
83349046 SB |
9192 | /* As in legitimate_offset_address_p we do not assume |
9193 | worst-case. The mode here is just a hint as to the registers | |
9194 | used. A TImode is usually in gprs, but may actually be in | |
9195 | fprs. Leave worst-case scenario for reload to handle via | |
9196 | insn constraints. PTImode is only GPRs. */ | |
9197 | extra = 8; | |
9198 | break; | |
9199 | default: | |
9200 | break; | |
9201 | } | |
9202 | ||
9203 | if (GET_CODE (x) == PLUS | |
9204 | && GET_CODE (XEXP (x, 0)) == REG | |
9205 | && GET_CODE (XEXP (x, 1)) == CONST_INT | |
9206 | && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000) | |
9207 | >= 0x10000 - extra) | |
9208 | && !(SPE_VECTOR_MODE (mode) | |
9209 | || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD))) | |
9210 | { | |
9211 | HOST_WIDE_INT high_int, low_int; | |
9212 | rtx sum; | |
9213 | low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000; | |
9214 | if (low_int >= 0x8000 - extra) | |
9215 | low_int = 0; | |
9216 | high_int = INTVAL (XEXP (x, 1)) - low_int; | |
9217 | sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0), | |
9218 | GEN_INT (high_int)), 0); | |
9219 | return plus_constant (Pmode, sum, low_int); | |
9220 | } | |
9221 | else if (GET_CODE (x) == PLUS | |
9222 | && GET_CODE (XEXP (x, 0)) == REG | |
9223 | && GET_CODE (XEXP (x, 1)) != CONST_INT | |
9224 | && GET_MODE_NUNITS (mode) == 1 | |
9225 | && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD | |
9226 | || (/* ??? Assume floating point reg based on mode? */ | |
9227 | (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT) | |
9228 | && (mode == DFmode || mode == DDmode))) | |
9229 | && !avoiding_indexed_address_p (mode)) | |
9230 | { | |
9231 | return gen_rtx_PLUS (Pmode, XEXP (x, 0), | |
9232 | force_reg (Pmode, force_operand (XEXP (x, 1), 0))); | |
9233 | } | |
9234 | else if (SPE_VECTOR_MODE (mode) | |
9235 | || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)) | |
9236 | { | |
9237 | if (mode == DImode) | |
9238 | return x; | |
9239 | /* We accept [reg + reg] and [reg + OFFSET]. */ | |
9240 | ||
9241 | if (GET_CODE (x) == PLUS) | |
9242 | { | |
9243 | rtx op1 = XEXP (x, 0); | |
9244 | rtx op2 = XEXP (x, 1); | |
9245 | rtx y; | |
9246 | ||
9247 | op1 = force_reg (Pmode, op1); | |
9248 | ||
9249 | if (GET_CODE (op2) != REG | |
9250 | && (GET_CODE (op2) != CONST_INT | |
9251 | || !SPE_CONST_OFFSET_OK (INTVAL (op2)) | |
9252 | || (GET_MODE_SIZE (mode) > 8 | |
9253 | && !SPE_CONST_OFFSET_OK (INTVAL (op2) + 8)))) | |
9254 | op2 = force_reg (Pmode, op2); | |
9255 | ||
9256 | /* We can't always do [reg + reg] for these, because [reg + | |
9257 | reg + offset] is not a legitimate addressing mode. */ | |
9258 | y = gen_rtx_PLUS (Pmode, op1, op2); | |
9259 | ||
9260 | if ((GET_MODE_SIZE (mode) > 8 || mode == DDmode) && REG_P (op2)) | |
9261 | return force_reg (Pmode, y); | |
9262 | else | |
9263 | return y; | |
9264 | } | |
9265 | ||
9266 | return force_reg (Pmode, x); | |
9267 | } | |
9268 | else if ((TARGET_ELF | |
9269 | #if TARGET_MACHO | |
9270 | || !MACHO_DYNAMIC_NO_PIC_P | |
9271 | #endif | |
9272 | ) | |
9273 | && TARGET_32BIT | |
9274 | && TARGET_NO_TOC | |
9275 | && ! flag_pic | |
9276 | && GET_CODE (x) != CONST_INT | |
9277 | && GET_CODE (x) != CONST_WIDE_INT | |
9278 | && GET_CODE (x) != CONST_DOUBLE | |
9279 | && CONSTANT_P (x) | |
9280 | && GET_MODE_NUNITS (mode) == 1 | |
9281 | && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD | |
9282 | || (/* ??? Assume floating point reg based on mode? */ | |
9283 | (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT) | |
9284 | && (mode == DFmode || mode == DDmode)))) | |
9285 | { | |
9286 | rtx reg = gen_reg_rtx (Pmode); | |
9287 | if (TARGET_ELF) | |
9288 | emit_insn (gen_elf_high (reg, x)); | |
9289 | else | |
9290 | emit_insn (gen_macho_high (reg, x)); | |
9291 | return gen_rtx_LO_SUM (Pmode, reg, x); | |
9292 | } | |
9293 | else if (TARGET_TOC | |
9294 | && GET_CODE (x) == SYMBOL_REF | |
9295 | && constant_pool_expr_p (x) | |
9296 | && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode)) | |
9297 | return create_TOC_reference (x, NULL_RTX); | |
9298 | else | |
9299 | return x; | |
9300 | } | |
9301 | ||
9302 | /* Debug version of rs6000_legitimize_address. */ | |
9303 | static rtx | |
9304 | rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode) | |
9305 | { | |
9306 | rtx ret; | |
9307 | rtx_insn *insns; | |
9308 | ||
9309 | start_sequence (); | |
9310 | ret = rs6000_legitimize_address (x, oldx, mode); | |
9311 | insns = get_insns (); | |
9312 | end_sequence (); | |
9313 | ||
9314 | if (ret != x) | |
9315 | { | |
9316 | fprintf (stderr, | |
9317 | "\nrs6000_legitimize_address: mode %s, old code %s, " | |
9318 | "new code %s, modified\n", | |
9319 | GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)), | |
9320 | GET_RTX_NAME (GET_CODE (ret))); | |
9321 | ||
9322 | fprintf (stderr, "Original address:\n"); | |
9323 | debug_rtx (x); | |
9324 | ||
9325 | fprintf (stderr, "oldx:\n"); | |
9326 | debug_rtx (oldx); | |
9327 | ||
9328 | fprintf (stderr, "New address:\n"); | |
9329 | debug_rtx (ret); | |
9330 | ||
9331 | if (insns) | |
9332 | { | |
9333 | fprintf (stderr, "Insns added:\n"); | |
9334 | debug_rtx_list (insns, 20); | |
9335 | } | |
9336 | } | |
9337 | else | |
9338 | { | |
9339 | fprintf (stderr, | |
9340 | "\nrs6000_legitimize_address: mode %s, code %s, no change:\n", | |
9341 | GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x))); | |
9342 | ||
9343 | debug_rtx (x); | |
9344 | } | |
9345 | ||
9346 | if (insns) | |
9347 | emit_insn (insns); | |
9348 | ||
9349 | return ret; | |
9350 | } | |
9351 | ||
9352 | /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL. | |
9353 | We need to emit DTP-relative relocations. */ | |
9354 | ||
9355 | static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED; | |
9356 | static void | |
9357 | rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x) | |
9358 | { | |
9359 | switch (size) | |
9360 | { | |
9361 | case 4: | |
9362 | fputs ("\t.long\t", file); | |
9363 | break; | |
9364 | case 8: | |
9365 | fputs (DOUBLE_INT_ASM_OP, file); | |
9366 | break; | |
9367 | default: | |
9368 | gcc_unreachable (); | |
9369 | } | |
9370 | output_addr_const (file, x); | |
9371 | if (TARGET_ELF) | |
9372 | fputs ("@dtprel+0x8000", file); | |
9373 | else if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF) | |
9374 | { | |
9375 | switch (SYMBOL_REF_TLS_MODEL (x)) | |
9376 | { | |
9377 | case 0: | |
9378 | break; | |
9379 | case TLS_MODEL_LOCAL_EXEC: | |
9380 | fputs ("@le", file); | |
9381 | break; | |
9382 | case TLS_MODEL_INITIAL_EXEC: | |
9383 | fputs ("@ie", file); | |
9384 | break; | |
9385 | case TLS_MODEL_GLOBAL_DYNAMIC: | |
9386 | case TLS_MODEL_LOCAL_DYNAMIC: | |
9387 | fputs ("@m", file); | |
9388 | break; | |
9389 | default: | |
9390 | gcc_unreachable (); | |
9391 | } | |
9392 | } | |
9393 | } | |
9394 | ||
9395 | /* Return true if X is a symbol that refers to real (rather than emulated) | |
9396 | TLS. */ | |
9397 | ||
9398 | static bool | |
9399 | rs6000_real_tls_symbol_ref_p (rtx x) | |
9400 | { | |
9401 | return (GET_CODE (x) == SYMBOL_REF | |
9402 | && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL); | |
9403 | } | |
9404 | ||
9405 | /* In the name of slightly smaller debug output, and to cater to | |
9406 | general assembler lossage, recognize various UNSPEC sequences | |
9407 | and turn them back into a direct symbol reference. */ | |
9408 | ||
9409 | static rtx | |
9410 | rs6000_delegitimize_address (rtx orig_x) | |
9411 | { | |
9412 | rtx x, y, offset; | |
9413 | ||
9414 | orig_x = delegitimize_mem_from_attrs (orig_x); | |
9415 | x = orig_x; | |
9416 | if (MEM_P (x)) | |
9417 | x = XEXP (x, 0); | |
9418 | ||
9419 | y = x; | |
9420 | if (TARGET_CMODEL != CMODEL_SMALL | |
9421 | && GET_CODE (y) == LO_SUM) | |
9422 | y = XEXP (y, 1); | |
9423 | ||
9424 | offset = NULL_RTX; | |
9425 | if (GET_CODE (y) == PLUS | |
9426 | && GET_MODE (y) == Pmode | |
9427 | && CONST_INT_P (XEXP (y, 1))) | |
9428 | { | |
9429 | offset = XEXP (y, 1); | |
9430 | y = XEXP (y, 0); | |
9431 | } | |
9432 | ||
9433 | if (GET_CODE (y) == UNSPEC | |
9434 | && XINT (y, 1) == UNSPEC_TOCREL) | |
9435 | { | |
9436 | y = XVECEXP (y, 0, 0); | |
9437 | ||
9438 | #ifdef HAVE_AS_TLS | |
9439 | /* Do not associate thread-local symbols with the original | |
9440 | constant pool symbol. */ | |
9441 | if (TARGET_XCOFF | |
9442 | && GET_CODE (y) == SYMBOL_REF | |
9443 | && CONSTANT_POOL_ADDRESS_P (y) | |
9444 | && rs6000_real_tls_symbol_ref_p (get_pool_constant (y))) | |
9445 | return orig_x; | |
9446 | #endif | |
9447 | ||
9448 | if (offset != NULL_RTX) | |
9449 | y = gen_rtx_PLUS (Pmode, y, offset); | |
9450 | if (!MEM_P (orig_x)) | |
9451 | return y; | |
9452 | else | |
9453 | return replace_equiv_address_nv (orig_x, y); | |
9454 | } | |
9455 | ||
9456 | if (TARGET_MACHO | |
9457 | && GET_CODE (orig_x) == LO_SUM | |
9458 | && GET_CODE (XEXP (orig_x, 1)) == CONST) | |
9459 | { | |
9460 | y = XEXP (XEXP (orig_x, 1), 0); | |
9461 | if (GET_CODE (y) == UNSPEC | |
9462 | && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET) | |
9463 | return XVECEXP (y, 0, 0); | |
9464 | } | |
9465 | ||
9466 | return orig_x; | |
9467 | } | |
9468 | ||
9469 | /* Return true if X shouldn't be emitted into the debug info. | |
9470 | The linker doesn't like .toc section references from | |
9471 | .debug_* sections, so reject .toc section symbols. */ | |
9472 | ||
9473 | static bool | |
9474 | rs6000_const_not_ok_for_debug_p (rtx x) | |
9475 | { | |
6b10f174 JJ |
9476 | if (GET_CODE (x) == UNSPEC) |
9477 | return true; | |
83349046 SB |
9478 | if (GET_CODE (x) == SYMBOL_REF |
9479 | && CONSTANT_POOL_ADDRESS_P (x)) | |
9480 | { | |
9481 | rtx c = get_pool_constant (x); | |
9482 | machine_mode cmode = get_pool_mode (x); | |
9483 | if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode)) | |
9484 | return true; | |
9485 | } | |
9486 | ||
9487 | return false; | |
9488 | } | |
9489 | ||
9490 | ||
9491 | /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */ | |
9492 | ||
9493 | static bool | |
9494 | rs6000_legitimate_combined_insn (rtx_insn *insn) | |
9495 | { | |
9496 | int icode = INSN_CODE (insn); | |
9497 | ||
9498 | /* Reject creating doloop insns. Combine should not be allowed | |
9499 | to create these for a number of reasons: | |
9500 | 1) In a nested loop, if combine creates one of these in an | |
9501 | outer loop and the register allocator happens to allocate ctr | |
9502 | to the outer loop insn, then the inner loop can't use ctr. | |
9503 | Inner loops ought to be more highly optimized. | |
9504 | 2) Combine often wants to create one of these from what was | |
9505 | originally a three insn sequence, first combining the three | |
9506 | insns to two, then to ctrsi/ctrdi. When ctrsi/ctrdi is not | |
9507 | allocated ctr, the splitter takes use back to the three insn | |
9508 | sequence. It's better to stop combine at the two insn | |
9509 | sequence. | |
9510 | 3) Faced with not being able to allocate ctr for ctrsi/crtdi | |
9511 | insns, the register allocator sometimes uses floating point | |
9512 | or vector registers for the pseudo. Since ctrsi/ctrdi is a | |
9513 | jump insn and output reloads are not implemented for jumps, | |
9514 | the ctrsi/ctrdi splitters need to handle all possible cases. | |
9515 | That's a pain, and it gets to be seriously difficult when a | |
9516 | splitter that runs after reload needs memory to transfer from | |
9517 | a gpr to fpr. See PR70098 and PR71763 which are not fixed | |
9518 | for the difficult case. It's better to not create problems | |
9519 | in the first place. */ | |
9520 | if (icode != CODE_FOR_nothing | |
9521 | && (icode == CODE_FOR_ctrsi_internal1 | |
9522 | || icode == CODE_FOR_ctrdi_internal1 | |
9523 | || icode == CODE_FOR_ctrsi_internal2 | |
9524 | || icode == CODE_FOR_ctrdi_internal2 | |
9525 | || icode == CODE_FOR_ctrsi_internal3 | |
9526 | || icode == CODE_FOR_ctrdi_internal3 | |
9527 | || icode == CODE_FOR_ctrsi_internal4 | |
9528 | || icode == CODE_FOR_ctrdi_internal4)) | |
9529 | return false; | |
9530 | ||
9531 | return true; | |
9532 | } | |
9533 | ||
9534 | /* Construct the SYMBOL_REF for the tls_get_addr function. */ | |
9535 | ||
9536 | static GTY(()) rtx rs6000_tls_symbol; | |
9537 | static rtx | |
9538 | rs6000_tls_get_addr (void) | |
9539 | { | |
9540 | if (!rs6000_tls_symbol) | |
9541 | rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr"); | |
9542 | ||
9543 | return rs6000_tls_symbol; | |
9544 | } | |
9545 | ||
9546 | /* Construct the SYMBOL_REF for TLS GOT references. */ | |
9547 | ||
9548 | static GTY(()) rtx rs6000_got_symbol; | |
9549 | static rtx | |
9550 | rs6000_got_sym (void) | |
9551 | { | |
9552 | if (!rs6000_got_symbol) | |
9553 | { | |
9554 | rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_"); | |
9555 | SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL; | |
9556 | SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL; | |
9557 | } | |
9558 | ||
9559 | return rs6000_got_symbol; | |
9560 | } | |
9561 | ||
9562 | /* AIX Thread-Local Address support. */ | |
9563 | ||
9564 | static rtx | |
9565 | rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model) | |
9566 | { | |
9567 | rtx sym, mem, tocref, tlsreg, tmpreg, dest, tlsaddr; | |
9568 | const char *name; | |
9569 | char *tlsname; | |
9570 | ||
9571 | name = XSTR (addr, 0); | |
9572 | /* Append TLS CSECT qualifier, unless the symbol already is qualified | |
9573 | or the symbol will be in TLS private data section. */ | |
9574 | if (name[strlen (name) - 1] != ']' | |
9575 | && (TREE_PUBLIC (SYMBOL_REF_DECL (addr)) | |
9576 | || bss_initializer_p (SYMBOL_REF_DECL (addr)))) | |
9577 | { | |
9578 | tlsname = XALLOCAVEC (char, strlen (name) + 4); | |
9579 | strcpy (tlsname, name); | |
9580 | strcat (tlsname, | |
9581 | bss_initializer_p (SYMBOL_REF_DECL (addr)) ? "[UL]" : "[TL]"); | |
9582 | tlsaddr = copy_rtx (addr); | |
9583 | XSTR (tlsaddr, 0) = ggc_strdup (tlsname); | |
9584 | } | |
9585 | else | |
9586 | tlsaddr = addr; | |
9587 | ||
9588 | /* Place addr into TOC constant pool. */ | |
9589 | sym = force_const_mem (GET_MODE (tlsaddr), tlsaddr); | |
9590 | ||
9591 | /* Output the TOC entry and create the MEM referencing the value. */ | |
9592 | if (constant_pool_expr_p (XEXP (sym, 0)) | |
9593 | && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode)) | |
9594 | { | |
9595 | tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX); | |
9596 | mem = gen_const_mem (Pmode, tocref); | |
9597 | set_mem_alias_set (mem, get_TOC_alias_set ()); | |
9598 | } | |
9599 | else | |
9600 | return sym; | |
9601 | ||
9602 | /* Use global-dynamic for local-dynamic. */ | |
9603 | if (model == TLS_MODEL_GLOBAL_DYNAMIC | |
9604 | || model == TLS_MODEL_LOCAL_DYNAMIC) | |
9605 | { | |
9606 | /* Create new TOC reference for @m symbol. */ | |
9607 | name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0); | |
9608 | tlsname = XALLOCAVEC (char, strlen (name) + 1); | |
9609 | strcpy (tlsname, "*LCM"); | |
9610 | strcat (tlsname, name + 3); | |
9611 | rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname)); | |
9612 | SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL; | |
9613 | tocref = create_TOC_reference (modaddr, NULL_RTX); | |
9614 | rtx modmem = gen_const_mem (Pmode, tocref); | |
9615 | set_mem_alias_set (modmem, get_TOC_alias_set ()); | |
9616 | ||
9617 | rtx modreg = gen_reg_rtx (Pmode); | |
9618 | emit_insn (gen_rtx_SET (modreg, modmem)); | |
9619 | ||
9620 | tmpreg = gen_reg_rtx (Pmode); | |
9621 | emit_insn (gen_rtx_SET (tmpreg, mem)); | |
9622 | ||
9623 | dest = gen_reg_rtx (Pmode); | |
9624 | if (TARGET_32BIT) | |
9625 | emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg)); | |
9626 | else | |
9627 | emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg)); | |
9628 | return dest; | |
9629 | } | |
9630 | /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */ | |
9631 | else if (TARGET_32BIT) | |
9632 | { | |
9633 | tlsreg = gen_reg_rtx (SImode); | |
9634 | emit_insn (gen_tls_get_tpointer (tlsreg)); | |
9635 | } | |
9636 | else | |
9637 | tlsreg = gen_rtx_REG (DImode, 13); | |
9638 | ||
9639 | /* Load the TOC value into temporary register. */ | |
9640 | tmpreg = gen_reg_rtx (Pmode); | |
9641 | emit_insn (gen_rtx_SET (tmpreg, mem)); | |
9642 | set_unique_reg_note (get_last_insn (), REG_EQUAL, | |
9643 | gen_rtx_MINUS (Pmode, addr, tlsreg)); | |
9644 | ||
9645 | /* Add TOC symbol value to TLS pointer. */ | |
9646 | dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg)); | |
9647 | ||
9648 | return dest; | |
9649 | } | |
9650 | ||
9651 | /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute | |
9652 | this (thread-local) address. */ | |
9653 | ||
9654 | static rtx | |
9655 | rs6000_legitimize_tls_address (rtx addr, enum tls_model model) | |
9656 | { | |
9657 | rtx dest, insn; | |
9658 | ||
9659 | if (TARGET_XCOFF) | |
9660 | return rs6000_legitimize_tls_address_aix (addr, model); | |
9661 | ||
9662 | dest = gen_reg_rtx (Pmode); | |
9663 | if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 16) | |
9664 | { | |
9665 | rtx tlsreg; | |
9666 | ||
9667 | if (TARGET_64BIT) | |
9668 | { | |
9669 | tlsreg = gen_rtx_REG (Pmode, 13); | |
9670 | insn = gen_tls_tprel_64 (dest, tlsreg, addr); | |
9671 | } | |
9672 | else | |
9673 | { | |
9674 | tlsreg = gen_rtx_REG (Pmode, 2); | |
9675 | insn = gen_tls_tprel_32 (dest, tlsreg, addr); | |
9676 | } | |
9677 | emit_insn (insn); | |
9678 | } | |
9679 | else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32) | |
9680 | { | |
9681 | rtx tlsreg, tmp; | |
9682 | ||
9683 | tmp = gen_reg_rtx (Pmode); | |
9684 | if (TARGET_64BIT) | |
9685 | { | |
9686 | tlsreg = gen_rtx_REG (Pmode, 13); | |
9687 | insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr); | |
9688 | } | |
9689 | else | |
9690 | { | |
9691 | tlsreg = gen_rtx_REG (Pmode, 2); | |
9692 | insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr); | |
9693 | } | |
9694 | emit_insn (insn); | |
9695 | if (TARGET_64BIT) | |
9696 | insn = gen_tls_tprel_lo_64 (dest, tmp, addr); | |
9697 | else | |
9698 | insn = gen_tls_tprel_lo_32 (dest, tmp, addr); | |
9699 | emit_insn (insn); | |
9700 | } | |
9701 | else | |
9702 | { | |
9703 | rtx r3, got, tga, tmp1, tmp2, call_insn; | |
9704 | ||
9705 | /* We currently use relocations like @got@tlsgd for tls, which | |
9706 | means the linker will handle allocation of tls entries, placing | |
9707 | them in the .got section. So use a pointer to the .got section, | |
9708 | not one to secondary TOC sections used by 64-bit -mminimal-toc, | |
9709 | or to secondary GOT sections used by 32-bit -fPIC. */ | |
9710 | if (TARGET_64BIT) | |
9711 | got = gen_rtx_REG (Pmode, 2); | |
9712 | else | |
9713 | { | |
9714 | if (flag_pic == 1) | |
9715 | got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM); | |
9716 | else | |
9717 | { | |
9718 | rtx gsym = rs6000_got_sym (); | |
9719 | got = gen_reg_rtx (Pmode); | |
9720 | if (flag_pic == 0) | |
9721 | rs6000_emit_move (got, gsym, Pmode); | |
9722 | else | |
9723 | { | |
9724 | rtx mem, lab; | |
9725 | ||
9726 | tmp1 = gen_reg_rtx (Pmode); | |
9727 | tmp2 = gen_reg_rtx (Pmode); | |
9728 | mem = gen_const_mem (Pmode, tmp1); | |
9729 | lab = gen_label_rtx (); | |
9730 | emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab)); | |
9731 | emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO)); | |
9732 | if (TARGET_LINK_STACK) | |
9733 | emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4))); | |
9734 | emit_move_insn (tmp2, mem); | |
9735 | rtx_insn *last = emit_insn (gen_addsi3 (got, tmp1, tmp2)); | |
9736 | set_unique_reg_note (last, REG_EQUAL, gsym); | |
9737 | } | |
9738 | } | |
9739 | } | |
9740 | ||
9741 | if (model == TLS_MODEL_GLOBAL_DYNAMIC) | |
9742 | { | |
9743 | tga = rs6000_tls_get_addr (); | |
9744 | emit_library_call_value (tga, dest, LCT_CONST, Pmode, | |
db69559b | 9745 | const0_rtx, Pmode); |
83349046 SB |
9746 | |
9747 | r3 = gen_rtx_REG (Pmode, 3); | |
9748 | if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) | |
9749 | { | |
9750 | if (TARGET_64BIT) | |
9751 | insn = gen_tls_gd_aix64 (r3, got, addr, tga, const0_rtx); | |
9752 | else | |
9753 | insn = gen_tls_gd_aix32 (r3, got, addr, tga, const0_rtx); | |
9754 | } | |
9755 | else if (DEFAULT_ABI == ABI_V4) | |
9756 | insn = gen_tls_gd_sysvsi (r3, got, addr, tga, const0_rtx); | |
9757 | else | |
9758 | gcc_unreachable (); | |
9759 | call_insn = last_call_insn (); | |
9760 | PATTERN (call_insn) = insn; | |
9761 | if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic) | |
9762 | use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), | |
9763 | pic_offset_table_rtx); | |
9764 | } | |
9765 | else if (model == TLS_MODEL_LOCAL_DYNAMIC) | |
9766 | { | |
9767 | tga = rs6000_tls_get_addr (); | |
9768 | tmp1 = gen_reg_rtx (Pmode); | |
9769 | emit_library_call_value (tga, tmp1, LCT_CONST, Pmode, | |
db69559b | 9770 | const0_rtx, Pmode); |
83349046 SB |
9771 | |
9772 | r3 = gen_rtx_REG (Pmode, 3); | |
9773 | if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) | |
9774 | { | |
9775 | if (TARGET_64BIT) | |
9776 | insn = gen_tls_ld_aix64 (r3, got, tga, const0_rtx); | |
9777 | else | |
9778 | insn = gen_tls_ld_aix32 (r3, got, tga, const0_rtx); | |
9779 | } | |
9780 | else if (DEFAULT_ABI == ABI_V4) | |
9781 | insn = gen_tls_ld_sysvsi (r3, got, tga, const0_rtx); | |
9782 | else | |
9783 | gcc_unreachable (); | |
9784 | call_insn = last_call_insn (); | |
9785 | PATTERN (call_insn) = insn; | |
9786 | if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic) | |
9787 | use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), | |
9788 | pic_offset_table_rtx); | |
9789 | ||
9790 | if (rs6000_tls_size == 16) | |
9791 | { | |
9792 | if (TARGET_64BIT) | |
9793 | insn = gen_tls_dtprel_64 (dest, tmp1, addr); | |
9794 | else | |
9795 | insn = gen_tls_dtprel_32 (dest, tmp1, addr); | |
9796 | } | |
9797 | else if (rs6000_tls_size == 32) | |
9798 | { | |
9799 | tmp2 = gen_reg_rtx (Pmode); | |
9800 | if (TARGET_64BIT) | |
9801 | insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr); | |
9802 | else | |
9803 | insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr); | |
9804 | emit_insn (insn); | |
9805 | if (TARGET_64BIT) | |
9806 | insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr); | |
9807 | else | |
9808 | insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr); | |
9809 | } | |
9810 | else | |
9811 | { | |
9812 | tmp2 = gen_reg_rtx (Pmode); | |
9813 | if (TARGET_64BIT) | |
9814 | insn = gen_tls_got_dtprel_64 (tmp2, got, addr); | |
9815 | else | |
9816 | insn = gen_tls_got_dtprel_32 (tmp2, got, addr); | |
9817 | emit_insn (insn); | |
9818 | insn = gen_rtx_SET (dest, gen_rtx_PLUS (Pmode, tmp2, tmp1)); | |
9819 | } | |
9820 | emit_insn (insn); | |
9821 | } | |
9822 | else | |
9823 | { | |
9824 | /* IE, or 64-bit offset LE. */ | |
9825 | tmp2 = gen_reg_rtx (Pmode); | |
9826 | if (TARGET_64BIT) | |
9827 | insn = gen_tls_got_tprel_64 (tmp2, got, addr); | |
9828 | else | |
9829 | insn = gen_tls_got_tprel_32 (tmp2, got, addr); | |
9830 | emit_insn (insn); | |
9831 | if (TARGET_64BIT) | |
9832 | insn = gen_tls_tls_64 (dest, tmp2, addr); | |
9833 | else | |
9834 | insn = gen_tls_tls_32 (dest, tmp2, addr); | |
9835 | emit_insn (insn); | |
9836 | } | |
9837 | } | |
9838 | ||
9839 | return dest; | |
9840 | } | |
9841 | ||
9842 | /* Only create the global variable for the stack protect guard if we are using | |
9843 | the global flavor of that guard. */ | |
9844 | static tree | |
9845 | rs6000_init_stack_protect_guard (void) | |
9846 | { | |
9847 | if (rs6000_stack_protector_guard == SSP_GLOBAL) | |
9848 | return default_stack_protect_guard (); | |
9849 | ||
9850 | return NULL_TREE; | |
9851 | } | |
9852 | ||
9853 | /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */ | |
9854 | ||
9855 | static bool | |
9856 | rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x) | |
9857 | { | |
9858 | if (GET_CODE (x) == HIGH | |
9859 | && GET_CODE (XEXP (x, 0)) == UNSPEC) | |
9860 | return true; | |
9861 | ||
9862 | /* A TLS symbol in the TOC cannot contain a sum. */ | |
9863 | if (GET_CODE (x) == CONST | |
9864 | && GET_CODE (XEXP (x, 0)) == PLUS | |
9865 | && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF | |
9866 | && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0) | |
9867 | return true; | |
9868 | ||
9869 | /* Do not place an ELF TLS symbol in the constant pool. */ | |
9870 | return TARGET_ELF && tls_referenced_p (x); | |
9871 | } | |
9872 | ||
9873 | /* Return true iff the given SYMBOL_REF refers to a constant pool entry | |
9874 | that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF | |
9875 | can be addressed relative to the toc pointer. */ | |
9876 | ||
9877 | static bool | |
9878 | use_toc_relative_ref (rtx sym, machine_mode mode) | |
9879 | { | |
9880 | return ((constant_pool_expr_p (sym) | |
9881 | && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym), | |
9882 | get_pool_mode (sym))) | |
9883 | || (TARGET_CMODEL == CMODEL_MEDIUM | |
9884 | && SYMBOL_REF_LOCAL_P (sym) | |
9885 | && GET_MODE_SIZE (mode) <= POWERPC64_TOC_POINTER_ALIGNMENT)); | |
9886 | } | |
9887 | ||
9888 | /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to | |
9889 | replace the input X, or the original X if no replacement is called for. | |
9890 | The output parameter *WIN is 1 if the calling macro should goto WIN, | |
9891 | 0 if it should not. | |
9892 | ||
9893 | For RS/6000, we wish to handle large displacements off a base | |
9894 | register by splitting the addend across an addiu/addis and the mem insn. | |
9895 | This cuts number of extra insns needed from 3 to 1. | |
9896 | ||
9897 | On Darwin, we use this to generate code for floating point constants. | |
9898 | A movsf_low is generated so we wind up with 2 instructions rather than 3. | |
9899 | The Darwin code is inside #if TARGET_MACHO because only then are the | |
9900 | machopic_* functions defined. */ | |
9901 | static rtx | |
9902 | rs6000_legitimize_reload_address (rtx x, machine_mode mode, | |
9903 | int opnum, int type, | |
9904 | int ind_levels ATTRIBUTE_UNUSED, int *win) | |
9905 | { | |
9906 | bool reg_offset_p = reg_offset_addressing_ok_p (mode); | |
9907 | bool quad_offset_p = mode_supports_vsx_dform_quad (mode); | |
9908 | ||
9909 | /* Nasty hack for vsx_splat_v2df/v2di load from mem, which takes a | |
9910 | DFmode/DImode MEM. Ditto for ISA 3.0 vsx_splat_v4sf/v4si. */ | |
9911 | if (reg_offset_p | |
9912 | && opnum == 1 | |
9913 | && ((mode == DFmode && recog_data.operand_mode[0] == V2DFmode) | |
9914 | || (mode == DImode && recog_data.operand_mode[0] == V2DImode) | |
9915 | || (mode == SFmode && recog_data.operand_mode[0] == V4SFmode | |
9916 | && TARGET_P9_VECTOR) | |
9917 | || (mode == SImode && recog_data.operand_mode[0] == V4SImode | |
9918 | && TARGET_P9_VECTOR))) | |
9919 | reg_offset_p = false; | |
9920 | ||
9921 | /* We must recognize output that we have already generated ourselves. */ | |
9922 | if (GET_CODE (x) == PLUS | |
9923 | && GET_CODE (XEXP (x, 0)) == PLUS | |
9924 | && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG | |
9925 | && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT | |
9926 | && GET_CODE (XEXP (x, 1)) == CONST_INT) | |
9927 | { | |
9928 | if (TARGET_DEBUG_ADDR) | |
9929 | { | |
9930 | fprintf (stderr, "\nlegitimize_reload_address push_reload #1:\n"); | |
9931 | debug_rtx (x); | |
9932 | } | |
9933 | push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL, | |
9934 | BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0, | |
9935 | opnum, (enum reload_type) type); | |
9936 | *win = 1; | |
9937 | return x; | |
9938 | } | |
9939 | ||
9940 | /* Likewise for (lo_sum (high ...) ...) output we have generated. */ | |
9941 | if (GET_CODE (x) == LO_SUM | |
9942 | && GET_CODE (XEXP (x, 0)) == HIGH) | |
9943 | { | |
9944 | if (TARGET_DEBUG_ADDR) | |
9945 | { | |
9946 | fprintf (stderr, "\nlegitimize_reload_address push_reload #2:\n"); | |
9947 | debug_rtx (x); | |
9948 | } | |
9949 | push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL, | |
9950 | BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, | |
9951 | opnum, (enum reload_type) type); | |
9952 | *win = 1; | |
9953 | return x; | |
9954 | } | |
9955 | ||
9956 | #if TARGET_MACHO | |
9957 | if (DEFAULT_ABI == ABI_DARWIN && flag_pic | |
9958 | && GET_CODE (x) == LO_SUM | |
9959 | && GET_CODE (XEXP (x, 0)) == PLUS | |
9960 | && XEXP (XEXP (x, 0), 0) == pic_offset_table_rtx | |
9961 | && GET_CODE (XEXP (XEXP (x, 0), 1)) == HIGH | |
9962 | && XEXP (XEXP (XEXP (x, 0), 1), 0) == XEXP (x, 1) | |
9963 | && machopic_operand_p (XEXP (x, 1))) | |
9964 | { | |
9965 | /* Result of previous invocation of this function on Darwin | |
9966 | floating point constant. */ | |
9967 | push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL, | |
9968 | BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, | |
9969 | opnum, (enum reload_type) type); | |
9970 | *win = 1; | |
9971 | return x; | |
9972 | } | |
9973 | #endif | |
9974 | ||
9975 | if (TARGET_CMODEL != CMODEL_SMALL | |
9976 | && reg_offset_p | |
9977 | && !quad_offset_p | |
9978 | && small_toc_ref (x, VOIDmode)) | |
9979 | { | |
9980 | rtx hi = gen_rtx_HIGH (Pmode, copy_rtx (x)); | |
9981 | x = gen_rtx_LO_SUM (Pmode, hi, x); | |
9982 | if (TARGET_DEBUG_ADDR) | |
9983 | { | |
9984 | fprintf (stderr, "\nlegitimize_reload_address push_reload #3:\n"); | |
9985 | debug_rtx (x); | |
9986 | } | |
9987 | push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL, | |
9988 | BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, | |
9989 | opnum, (enum reload_type) type); | |
9990 | *win = 1; | |
9991 | return x; | |
9992 | } | |
9993 | ||
9994 | if (GET_CODE (x) == PLUS | |
9995 | && REG_P (XEXP (x, 0)) | |
9996 | && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER | |
9997 | && INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 1) | |
9998 | && CONST_INT_P (XEXP (x, 1)) | |
9999 | && reg_offset_p | |
10000 | && !SPE_VECTOR_MODE (mode) | |
10001 | && !(TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD) | |
10002 | && (quad_offset_p || !VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode))) | |
10003 | { | |
10004 | HOST_WIDE_INT val = INTVAL (XEXP (x, 1)); | |
10005 | HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000; | |
10006 | HOST_WIDE_INT high | |
10007 | = (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000; | |
10008 | ||
10009 | /* Check for 32-bit overflow or quad addresses with one of the | |
10010 | four least significant bits set. */ | |
10011 | if (high + low != val | |
10012 | || (quad_offset_p && (low & 0xf))) | |
10013 | { | |
10014 | *win = 0; | |
10015 | return x; | |
10016 | } | |
10017 | ||
10018 | /* Reload the high part into a base reg; leave the low part | |
10019 | in the mem directly. */ | |
10020 | ||
10021 | x = gen_rtx_PLUS (GET_MODE (x), | |
10022 | gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0), | |
10023 | GEN_INT (high)), | |
10024 | GEN_INT (low)); | |
10025 | ||
10026 | if (TARGET_DEBUG_ADDR) | |
10027 | { | |
10028 | fprintf (stderr, "\nlegitimize_reload_address push_reload #4:\n"); | |
10029 | debug_rtx (x); | |
10030 | } | |
10031 | push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL, | |
10032 | BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0, | |
10033 | opnum, (enum reload_type) type); | |
10034 | *win = 1; | |
10035 | return x; | |
10036 | } | |
10037 | ||
10038 | if (GET_CODE (x) == SYMBOL_REF | |
10039 | && reg_offset_p | |
10040 | && !quad_offset_p | |
10041 | && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode)) | |
10042 | && !SPE_VECTOR_MODE (mode) | |
10043 | #if TARGET_MACHO | |
10044 | && DEFAULT_ABI == ABI_DARWIN | |
10045 | && (flag_pic || MACHO_DYNAMIC_NO_PIC_P) | |
10046 | && machopic_symbol_defined_p (x) | |
10047 | #else | |
10048 | && DEFAULT_ABI == ABI_V4 | |
10049 | && !flag_pic | |
10050 | #endif | |
10051 | /* Don't do this for TFmode or TDmode, since the result isn't offsettable. | |
10052 | The same goes for DImode without 64-bit gprs and DFmode and DDmode | |
10053 | without fprs. | |
10054 | ??? Assume floating point reg based on mode? This assumption is | |
10055 | violated by eg. powerpc-linux -m32 compile of gcc.dg/pr28796-2.c | |
10056 | where reload ends up doing a DFmode load of a constant from | |
10057 | mem using two gprs. Unfortunately, at this point reload | |
10058 | hasn't yet selected regs so poking around in reload data | |
10059 | won't help and even if we could figure out the regs reliably, | |
10060 | we'd still want to allow this transformation when the mem is | |
10061 | naturally aligned. Since we say the address is good here, we | |
10062 | can't disable offsets from LO_SUMs in mem_operand_gpr. | |
10063 | FIXME: Allow offset from lo_sum for other modes too, when | |
10064 | mem is sufficiently aligned. | |
10065 | ||
10066 | Also disallow this if the type can go in VMX/Altivec registers, since | |
10067 | those registers do not have d-form (reg+offset) address modes. */ | |
10068 | && !reg_addr[mode].scalar_in_vmx_p | |
10069 | && mode != TFmode | |
10070 | && mode != TDmode | |
10071 | && mode != IFmode | |
10072 | && mode != KFmode | |
10073 | && (mode != TImode || !TARGET_VSX_TIMODE) | |
10074 | && mode != PTImode | |
10075 | && (mode != DImode || TARGET_POWERPC64) | |
10076 | && ((mode != DFmode && mode != DDmode) || TARGET_POWERPC64 | |
10077 | || (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT))) | |
10078 | { | |
10079 | #if TARGET_MACHO | |
10080 | if (flag_pic) | |
10081 | { | |
10082 | rtx offset = machopic_gen_offset (x); | |
10083 | x = gen_rtx_LO_SUM (GET_MODE (x), | |
10084 | gen_rtx_PLUS (Pmode, pic_offset_table_rtx, | |
10085 | gen_rtx_HIGH (Pmode, offset)), offset); | |
10086 | } | |
10087 | else | |
10088 | #endif | |
10089 | x = gen_rtx_LO_SUM (GET_MODE (x), | |
10090 | gen_rtx_HIGH (Pmode, x), x); | |
10091 | ||
10092 | if (TARGET_DEBUG_ADDR) | |
10093 | { | |
10094 | fprintf (stderr, "\nlegitimize_reload_address push_reload #5:\n"); | |
10095 | debug_rtx (x); | |
10096 | } | |
10097 | push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL, | |
10098 | BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, | |
10099 | opnum, (enum reload_type) type); | |
10100 | *win = 1; | |
10101 | return x; | |
10102 | } | |
10103 | ||
10104 | /* Reload an offset address wrapped by an AND that represents the | |
10105 | masking of the lower bits. Strip the outer AND and let reload | |
10106 | convert the offset address into an indirect address. For VSX, | |
10107 | force reload to create the address with an AND in a separate | |
10108 | register, because we can't guarantee an altivec register will | |
10109 | be used. */ | |
10110 | if (VECTOR_MEM_ALTIVEC_P (mode) | |
10111 | && GET_CODE (x) == AND | |
10112 | && GET_CODE (XEXP (x, 0)) == PLUS | |
10113 | && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG | |
10114 | && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT | |
10115 | && GET_CODE (XEXP (x, 1)) == CONST_INT | |
10116 | && INTVAL (XEXP (x, 1)) == -16) | |
10117 | { | |
10118 | x = XEXP (x, 0); | |
10119 | *win = 1; | |
10120 | return x; | |
10121 | } | |
10122 | ||
10123 | if (TARGET_TOC | |
10124 | && reg_offset_p | |
10125 | && !quad_offset_p | |
10126 | && GET_CODE (x) == SYMBOL_REF | |
10127 | && use_toc_relative_ref (x, mode)) | |
10128 | { | |
10129 | x = create_TOC_reference (x, NULL_RTX); | |
10130 | if (TARGET_CMODEL != CMODEL_SMALL) | |
10131 | { | |
10132 | if (TARGET_DEBUG_ADDR) | |
10133 | { | |
10134 | fprintf (stderr, "\nlegitimize_reload_address push_reload #6:\n"); | |
10135 | debug_rtx (x); | |
10136 | } | |
10137 | push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL, | |
10138 | BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, | |
10139 | opnum, (enum reload_type) type); | |
10140 | } | |
10141 | *win = 1; | |
10142 | return x; | |
10143 | } | |
10144 | *win = 0; | |
10145 | return x; | |
10146 | } | |
10147 | ||
10148 | /* Debug version of rs6000_legitimize_reload_address. */ | |
10149 | static rtx | |
10150 | rs6000_debug_legitimize_reload_address (rtx x, machine_mode mode, | |
10151 | int opnum, int type, | |
10152 | int ind_levels, int *win) | |
10153 | { | |
10154 | rtx ret = rs6000_legitimize_reload_address (x, mode, opnum, type, | |
10155 | ind_levels, win); | |
10156 | fprintf (stderr, | |
10157 | "\nrs6000_legitimize_reload_address: mode = %s, opnum = %d, " | |
10158 | "type = %d, ind_levels = %d, win = %d, original addr:\n", | |
10159 | GET_MODE_NAME (mode), opnum, type, ind_levels, *win); | |
10160 | debug_rtx (x); | |
10161 | ||
10162 | if (x == ret) | |
10163 | fprintf (stderr, "Same address returned\n"); | |
10164 | else if (!ret) | |
10165 | fprintf (stderr, "NULL returned\n"); | |
10166 | else | |
10167 | { | |
10168 | fprintf (stderr, "New address:\n"); | |
10169 | debug_rtx (ret); | |
10170 | } | |
10171 | ||
10172 | return ret; | |
10173 | } | |
10174 | ||
10175 | /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression | |
10176 | that is a valid memory address for an instruction. | |
10177 | The MODE argument is the machine mode for the MEM expression | |
10178 | that wants to use this address. | |
10179 | ||
10180 | On the RS/6000, there are four valid address: a SYMBOL_REF that | |
10181 | refers to a constant pool entry of an address (or the sum of it | |
10182 | plus a constant), a short (16-bit signed) constant plus a register, | |
10183 | the sum of two registers, or a register indirect, possibly with an | |
10184 | auto-increment. For DFmode, DDmode and DImode with a constant plus | |
10185 | register, we must ensure that both words are addressable or PowerPC64 | |
10186 | with offset word aligned. | |
10187 | ||
10188 | For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs, | |
10189 | 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used | |
10190 | because adjacent memory cells are accessed by adding word-sized offsets | |
10191 | during assembly output. */ | |
10192 | static bool | |
10193 | rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict) | |
10194 | { | |
10195 | bool reg_offset_p = reg_offset_addressing_ok_p (mode); | |
10196 | bool quad_offset_p = mode_supports_vsx_dform_quad (mode); | |
10197 | ||
10198 | /* If this is an unaligned stvx/ldvx type address, discard the outer AND. */ | |
10199 | if (VECTOR_MEM_ALTIVEC_P (mode) | |
10200 | && GET_CODE (x) == AND | |
10201 | && GET_CODE (XEXP (x, 1)) == CONST_INT | |
10202 | && INTVAL (XEXP (x, 1)) == -16) | |
10203 | x = XEXP (x, 0); | |
10204 | ||
10205 | if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x)) | |
10206 | return 0; | |
10207 | if (legitimate_indirect_address_p (x, reg_ok_strict)) | |
10208 | return 1; | |
10209 | if (TARGET_UPDATE | |
10210 | && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC) | |
10211 | && mode_supports_pre_incdec_p (mode) | |
10212 | && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)) | |
10213 | return 1; | |
10214 | /* Handle restricted vector d-form offsets in ISA 3.0. */ | |
10215 | if (quad_offset_p) | |
10216 | { | |
10217 | if (quad_address_p (x, mode, reg_ok_strict)) | |
10218 | return 1; | |
10219 | } | |
10220 | else if (virtual_stack_registers_memory_p (x)) | |
10221 | return 1; | |
10222 | ||
10223 | else if (reg_offset_p) | |
10224 | { | |
10225 | if (legitimate_small_data_p (mode, x)) | |
10226 | return 1; | |
10227 | if (legitimate_constant_pool_address_p (x, mode, | |
10228 | reg_ok_strict || lra_in_progress)) | |
10229 | return 1; | |
10230 | if (reg_addr[mode].fused_toc && GET_CODE (x) == UNSPEC | |
10231 | && XINT (x, 1) == UNSPEC_FUSION_ADDIS) | |
10232 | return 1; | |
10233 | } | |
10234 | ||
10235 | /* For TImode, if we have TImode in VSX registers, only allow register | |
10236 | indirect addresses. This will allow the values to go in either GPRs | |
10237 | or VSX registers without reloading. The vector types would tend to | |
10238 | go into VSX registers, so we allow REG+REG, while TImode seems | |
10239 | somewhat split, in that some uses are GPR based, and some VSX based. */ | |
10240 | /* FIXME: We could loosen this by changing the following to | |
10241 | if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX_TIMODE) | |
10242 | but currently we cannot allow REG+REG addressing for TImode. See | |
10243 | PR72827 for complete details on how this ends up hoodwinking DSE. */ | |
10244 | if (mode == TImode && TARGET_VSX_TIMODE) | |
10245 | return 0; | |
10246 | /* If not REG_OK_STRICT (before reload) let pass any stack offset. */ | |
10247 | if (! reg_ok_strict | |
10248 | && reg_offset_p | |
10249 | && GET_CODE (x) == PLUS | |
10250 | && GET_CODE (XEXP (x, 0)) == REG | |
10251 | && (XEXP (x, 0) == virtual_stack_vars_rtx | |
10252 | || XEXP (x, 0) == arg_pointer_rtx) | |
10253 | && GET_CODE (XEXP (x, 1)) == CONST_INT) | |
10254 | return 1; | |
10255 | if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false)) | |
10256 | return 1; | |
10257 | if (!FLOAT128_2REG_P (mode) | |
10258 | && ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT) | |
10259 | || TARGET_POWERPC64 | |
10260 | || (mode != DFmode && mode != DDmode) | |
10261 | || (TARGET_E500_DOUBLE && mode != DDmode)) | |
10262 | && (TARGET_POWERPC64 || mode != DImode) | |
10263 | && (mode != TImode || VECTOR_MEM_VSX_P (TImode)) | |
10264 | && mode != PTImode | |
10265 | && !avoiding_indexed_address_p (mode) | |
10266 | && legitimate_indexed_address_p (x, reg_ok_strict)) | |
10267 | return 1; | |
10268 | if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY | |
10269 | && mode_supports_pre_modify_p (mode) | |
10270 | && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict) | |
10271 | && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1), | |
10272 | reg_ok_strict, false) | |
10273 | || (!avoiding_indexed_address_p (mode) | |
10274 | && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict))) | |
10275 | && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0))) | |
10276 | return 1; | |
10277 | if (reg_offset_p && !quad_offset_p | |
10278 | && legitimate_lo_sum_address_p (mode, x, reg_ok_strict)) | |
10279 | return 1; | |
10280 | return 0; | |
10281 | } | |
10282 | ||
10283 | /* Debug version of rs6000_legitimate_address_p. */ | |
10284 | static bool | |
10285 | rs6000_debug_legitimate_address_p (machine_mode mode, rtx x, | |
10286 | bool reg_ok_strict) | |
10287 | { | |
10288 | bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict); | |
10289 | fprintf (stderr, | |
10290 | "\nrs6000_legitimate_address_p: return = %s, mode = %s, " | |
10291 | "strict = %d, reload = %s, code = %s\n", | |
10292 | ret ? "true" : "false", | |
10293 | GET_MODE_NAME (mode), | |
10294 | reg_ok_strict, | |
10295 | (reload_completed | |
10296 | ? "after" | |
10297 | : (reload_in_progress ? "progress" : "before")), | |
10298 | GET_RTX_NAME (GET_CODE (x))); | |
10299 | debug_rtx (x); | |
10300 | ||
10301 | return ret; | |
10302 | } | |
10303 | ||
10304 | /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */ | |
10305 | ||
10306 | static bool | |
10307 | rs6000_mode_dependent_address_p (const_rtx addr, | |
10308 | addr_space_t as ATTRIBUTE_UNUSED) | |
10309 | { | |
10310 | return rs6000_mode_dependent_address_ptr (addr); | |
10311 | } | |
10312 | ||
10313 | /* Go to LABEL if ADDR (a legitimate address expression) | |
10314 | has an effect that depends on the machine mode it is used for. | |
10315 | ||
10316 | On the RS/6000 this is true of all integral offsets (since AltiVec | |
10317 | and VSX modes don't allow them) or is a pre-increment or decrement. | |
10318 | ||
10319 | ??? Except that due to conceptual problems in offsettable_address_p | |
10320 | we can't really report the problems of integral offsets. So leave | |
10321 | this assuming that the adjustable offset must be valid for the | |
10322 | sub-words of a TFmode operand, which is what we had before. */ | |
10323 | ||
10324 | static bool | |
10325 | rs6000_mode_dependent_address (const_rtx addr) | |
10326 | { | |
10327 | switch (GET_CODE (addr)) | |
10328 | { | |
10329 | case PLUS: | |
10330 | /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx | |
10331 | is considered a legitimate address before reload, so there | |
10332 | are no offset restrictions in that case. Note that this | |
10333 | condition is safe in strict mode because any address involving | |
10334 | virtual_stack_vars_rtx or arg_pointer_rtx would already have | |
10335 | been rejected as illegitimate. */ | |
10336 | if (XEXP (addr, 0) != virtual_stack_vars_rtx | |
10337 | && XEXP (addr, 0) != arg_pointer_rtx | |
10338 | && GET_CODE (XEXP (addr, 1)) == CONST_INT) | |
10339 | { | |
10340 | unsigned HOST_WIDE_INT val = INTVAL (XEXP (addr, 1)); | |
10341 | return val + 0x8000 >= 0x10000 - (TARGET_POWERPC64 ? 8 : 12); | |
10342 | } | |
10343 | break; | |
10344 | ||
10345 | case LO_SUM: | |
10346 | /* Anything in the constant pool is sufficiently aligned that | |
10347 | all bytes have the same high part address. */ | |
10348 | return !legitimate_constant_pool_address_p (addr, QImode, false); | |
10349 | ||
10350 | /* Auto-increment cases are now treated generically in recog.c. */ | |
10351 | case PRE_MODIFY: | |
10352 | return TARGET_UPDATE; | |
10353 | ||
10354 | /* AND is only allowed in Altivec loads. */ | |
10355 | case AND: | |
10356 | return true; | |
10357 | ||
10358 | default: | |
10359 | break; | |
10360 | } | |
10361 | ||
10362 | return false; | |
10363 | } | |
10364 | ||
10365 | /* Debug version of rs6000_mode_dependent_address. */ | |
10366 | static bool | |
10367 | rs6000_debug_mode_dependent_address (const_rtx addr) | |
10368 | { | |
10369 | bool ret = rs6000_mode_dependent_address (addr); | |
10370 | ||
10371 | fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n", | |
10372 | ret ? "true" : "false"); | |
10373 | debug_rtx (addr); | |
10374 | ||
10375 | return ret; | |
10376 | } | |
10377 | ||
10378 | /* Implement FIND_BASE_TERM. */ | |
10379 | ||
10380 | rtx | |
10381 | rs6000_find_base_term (rtx op) | |
10382 | { | |
10383 | rtx base; | |
10384 | ||
10385 | base = op; | |
10386 | if (GET_CODE (base) == CONST) | |
10387 | base = XEXP (base, 0); | |
10388 | if (GET_CODE (base) == PLUS) | |
10389 | base = XEXP (base, 0); | |
10390 | if (GET_CODE (base) == UNSPEC) | |
10391 | switch (XINT (base, 1)) | |
10392 | { | |
10393 | case UNSPEC_TOCREL: | |
10394 | case UNSPEC_MACHOPIC_OFFSET: | |
10395 | /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term | |
10396 | for aliasing purposes. */ | |
10397 | return XVECEXP (base, 0, 0); | |
10398 | } | |
10399 | ||
10400 | return op; | |
10401 | } | |
10402 | ||
10403 | /* More elaborate version of recog's offsettable_memref_p predicate | |
10404 | that works around the ??? note of rs6000_mode_dependent_address. | |
10405 | In particular it accepts | |
10406 | ||
10407 | (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8]))) | |
10408 | ||
10409 | in 32-bit mode, that the recog predicate rejects. */ | |
10410 | ||
10411 | static bool | |
10412 | rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode) | |
10413 | { | |
10414 | bool worst_case; | |
10415 | ||
10416 | if (!MEM_P (op)) | |
10417 | return false; | |
10418 | ||
10419 | /* First mimic offsettable_memref_p. */ | |
10420 | if (offsettable_address_p (true, GET_MODE (op), XEXP (op, 0))) | |
10421 | return true; | |
10422 | ||
10423 | /* offsettable_address_p invokes rs6000_mode_dependent_address, but | |
10424 | the latter predicate knows nothing about the mode of the memory | |
10425 | reference and, therefore, assumes that it is the largest supported | |
10426 | mode (TFmode). As a consequence, legitimate offsettable memory | |
10427 | references are rejected. rs6000_legitimate_offset_address_p contains | |
10428 | the correct logic for the PLUS case of rs6000_mode_dependent_address, | |
10429 | at least with a little bit of help here given that we know the | |
10430 | actual registers used. */ | |
10431 | worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT) | |
10432 | || GET_MODE_SIZE (reg_mode) == 4); | |
10433 | return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0), | |
10434 | true, worst_case); | |
10435 | } | |
10436 | ||
10437 | /* Determine the reassociation width to be used in reassociate_bb. | |
10438 | This takes into account how many parallel operations we | |
10439 | can actually do of a given type, and also the latency. | |
10440 | P8: | |
10441 | int add/sub 6/cycle | |
10442 | mul 2/cycle | |
10443 | vect add/sub/mul 2/cycle | |
10444 | fp add/sub/mul 2/cycle | |
10445 | dfp 1/cycle | |
10446 | */ | |
10447 | ||
10448 | static int | |
10449 | rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED, | |
b8506a8a | 10450 | machine_mode mode) |
83349046 SB |
10451 | { |
10452 | switch (rs6000_cpu) | |
10453 | { | |
10454 | case PROCESSOR_POWER8: | |
10455 | case PROCESSOR_POWER9: | |
10456 | if (DECIMAL_FLOAT_MODE_P (mode)) | |
10457 | return 1; | |
10458 | if (VECTOR_MODE_P (mode)) | |
10459 | return 4; | |
10460 | if (INTEGRAL_MODE_P (mode)) | |
10461 | return opc == MULT_EXPR ? 4 : 6; | |
10462 | if (FLOAT_MODE_P (mode)) | |
10463 | return 4; | |
10464 | break; | |
10465 | default: | |
10466 | break; | |
10467 | } | |
10468 | return 1; | |
10469 | } | |
10470 | ||
10471 | /* Change register usage conditional on target flags. */ | |
10472 | static void | |
10473 | rs6000_conditional_register_usage (void) | |
10474 | { | |
10475 | int i; | |
10476 | ||
10477 | if (TARGET_DEBUG_TARGET) | |
10478 | fprintf (stderr, "rs6000_conditional_register_usage called\n"); | |
10479 | ||
10480 | /* Set MQ register fixed (already call_used) so that it will not be | |
10481 | allocated. */ | |
10482 | fixed_regs[64] = 1; | |
10483 | ||
10484 | /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */ | |
10485 | if (TARGET_64BIT) | |
10486 | fixed_regs[13] = call_used_regs[13] | |
10487 | = call_really_used_regs[13] = 1; | |
10488 | ||
10489 | /* Conditionally disable FPRs. */ | |
10490 | if (TARGET_SOFT_FLOAT || !TARGET_FPRS) | |
10491 | for (i = 32; i < 64; i++) | |
10492 | fixed_regs[i] = call_used_regs[i] | |
10493 | = call_really_used_regs[i] = 1; | |
10494 | ||
10495 | /* The TOC register is not killed across calls in a way that is | |
10496 | visible to the compiler. */ | |
10497 | if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) | |
10498 | call_really_used_regs[2] = 0; | |
10499 | ||
10500 | if (DEFAULT_ABI == ABI_V4 && flag_pic == 2) | |
10501 | fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1; | |
10502 | ||
10503 | if (DEFAULT_ABI == ABI_V4 && flag_pic == 1) | |
10504 | fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] | |
10505 | = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] | |
10506 | = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1; | |
10507 | ||
10508 | if (DEFAULT_ABI == ABI_DARWIN && flag_pic) | |
10509 | fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] | |
10510 | = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] | |
10511 | = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1; | |
10512 | ||
10513 | if (TARGET_TOC && TARGET_MINIMAL_TOC) | |
10514 | fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] | |
10515 | = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1; | |
10516 | ||
10517 | if (TARGET_SPE) | |
10518 | { | |
10519 | global_regs[SPEFSCR_REGNO] = 1; | |
10520 | /* We used to use r14 as FIXED_SCRATCH to address SPE 64-bit | |
10521 | registers in prologues and epilogues. We no longer use r14 | |
10522 | for FIXED_SCRATCH, but we're keeping r14 out of the allocation | |
10523 | pool for link-compatibility with older versions of GCC. Once | |
10524 | "old" code has died out, we can return r14 to the allocation | |
10525 | pool. */ | |
10526 | fixed_regs[14] | |
10527 | = call_used_regs[14] | |
10528 | = call_really_used_regs[14] = 1; | |
10529 | } | |
10530 | ||
10531 | if (!TARGET_ALTIVEC && !TARGET_VSX) | |
10532 | { | |
10533 | for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i) | |
10534 | fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1; | |
10535 | call_really_used_regs[VRSAVE_REGNO] = 1; | |
10536 | } | |
10537 | ||
10538 | if (TARGET_ALTIVEC || TARGET_VSX) | |
10539 | global_regs[VSCR_REGNO] = 1; | |
10540 | ||
10541 | if (TARGET_ALTIVEC_ABI) | |
10542 | { | |
10543 | for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i) | |
10544 | call_used_regs[i] = call_really_used_regs[i] = 1; | |
10545 | ||
10546 | /* AIX reserves VR20:31 in non-extended ABI mode. */ | |
10547 | if (TARGET_XCOFF) | |
10548 | for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i) | |
10549 | fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1; | |
10550 | } | |
10551 | } | |
10552 | ||
10553 | \f | |
10554 | /* Output insns to set DEST equal to the constant SOURCE as a series of | |
10555 | lis, ori and shl instructions and return TRUE. */ | |
10556 | ||
10557 | bool | |
10558 | rs6000_emit_set_const (rtx dest, rtx source) | |
10559 | { | |
10560 | machine_mode mode = GET_MODE (dest); | |
10561 | rtx temp, set; | |
10562 | rtx_insn *insn; | |
10563 | HOST_WIDE_INT c; | |
10564 | ||
10565 | gcc_checking_assert (CONST_INT_P (source)); | |
10566 | c = INTVAL (source); | |
10567 | switch (mode) | |
10568 | { | |
4e10a5a7 RS |
10569 | case E_QImode: |
10570 | case E_HImode: | |
83349046 SB |
10571 | emit_insn (gen_rtx_SET (dest, source)); |
10572 | return true; | |
10573 | ||
4e10a5a7 | 10574 | case E_SImode: |
83349046 SB |
10575 | temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode); |
10576 | ||
10577 | emit_insn (gen_rtx_SET (copy_rtx (temp), | |
10578 | GEN_INT (c & ~(HOST_WIDE_INT) 0xffff))); | |
10579 | emit_insn (gen_rtx_SET (dest, | |
10580 | gen_rtx_IOR (SImode, copy_rtx (temp), | |
10581 | GEN_INT (c & 0xffff)))); | |
10582 | break; | |
10583 | ||
4e10a5a7 | 10584 | case E_DImode: |
83349046 SB |
10585 | if (!TARGET_POWERPC64) |
10586 | { | |
10587 | rtx hi, lo; | |
10588 | ||
10589 | hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0, | |
10590 | DImode); | |
10591 | lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0, | |
10592 | DImode); | |
10593 | emit_move_insn (hi, GEN_INT (c >> 32)); | |
10594 | c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000; | |
10595 | emit_move_insn (lo, GEN_INT (c)); | |
10596 | } | |
10597 | else | |
10598 | rs6000_emit_set_long_const (dest, c); | |
10599 | break; | |
10600 | ||
10601 | default: | |
10602 | gcc_unreachable (); | |
10603 | } | |
10604 | ||
10605 | insn = get_last_insn (); | |
10606 | set = single_set (insn); | |
10607 | if (! CONSTANT_P (SET_SRC (set))) | |
10608 | set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c)); | |
10609 | ||
10610 | return true; | |
10611 | } | |
10612 | ||
10613 | /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode. | |
10614 | Output insns to set DEST equal to the constant C as a series of | |
10615 | lis, ori and shl instructions. */ | |
10616 | ||
10617 | static void | |
10618 | rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c) | |
10619 | { | |
10620 | rtx temp; | |
10621 | HOST_WIDE_INT ud1, ud2, ud3, ud4; | |
10622 | ||
10623 | ud1 = c & 0xffff; | |
10624 | c = c >> 16; | |
10625 | ud2 = c & 0xffff; | |
10626 | c = c >> 16; | |
10627 | ud3 = c & 0xffff; | |
10628 | c = c >> 16; | |
10629 | ud4 = c & 0xffff; | |
10630 | ||
10631 | if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000)) | |
10632 | || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000))) | |
10633 | emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000)); | |
10634 | ||
10635 | else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000)) | |
10636 | || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000))) | |
10637 | { | |
10638 | temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); | |
10639 | ||
10640 | emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest, | |
10641 | GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000)); | |
10642 | if (ud1 != 0) | |
10643 | emit_move_insn (dest, | |
10644 | gen_rtx_IOR (DImode, copy_rtx (temp), | |
10645 | GEN_INT (ud1))); | |
10646 | } | |
10647 | else if (ud3 == 0 && ud4 == 0) | |
10648 | { | |
10649 | temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); | |
10650 | ||
10651 | gcc_assert (ud2 & 0x8000); | |
10652 | emit_move_insn (copy_rtx (temp), | |
10653 | GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000)); | |
10654 | if (ud1 != 0) | |
10655 | emit_move_insn (copy_rtx (temp), | |
10656 | gen_rtx_IOR (DImode, copy_rtx (temp), | |
10657 | GEN_INT (ud1))); | |
10658 | emit_move_insn (dest, | |
10659 | gen_rtx_ZERO_EXTEND (DImode, | |
10660 | gen_lowpart (SImode, | |
10661 | copy_rtx (temp)))); | |
10662 | } | |
10663 | else if ((ud4 == 0xffff && (ud3 & 0x8000)) | |
10664 | || (ud4 == 0 && ! (ud3 & 0x8000))) | |
10665 | { | |
10666 | temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); | |
10667 | ||
10668 | emit_move_insn (copy_rtx (temp), | |
10669 | GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000)); | |
10670 | if (ud2 != 0) | |
10671 | emit_move_insn (copy_rtx (temp), | |
10672 | gen_rtx_IOR (DImode, copy_rtx (temp), | |
10673 | GEN_INT (ud2))); | |
10674 | emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest, | |
10675 | gen_rtx_ASHIFT (DImode, copy_rtx (temp), | |
10676 | GEN_INT (16))); | |
10677 | if (ud1 != 0) | |
10678 | emit_move_insn (dest, | |
10679 | gen_rtx_IOR (DImode, copy_rtx (temp), | |
10680 | GEN_INT (ud1))); | |
10681 | } | |
10682 | else | |
10683 | { | |
10684 | temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode); | |
10685 | ||
10686 | emit_move_insn (copy_rtx (temp), | |
10687 | GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000)); | |
10688 | if (ud3 != 0) | |
10689 | emit_move_insn (copy_rtx (temp), | |
10690 | gen_rtx_IOR (DImode, copy_rtx (temp), | |
10691 | GEN_INT (ud3))); | |
10692 | ||
10693 | emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest, | |
10694 | gen_rtx_ASHIFT (DImode, copy_rtx (temp), | |
10695 | GEN_INT (32))); | |
10696 | if (ud2 != 0) | |
10697 | emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest, | |
10698 | gen_rtx_IOR (DImode, copy_rtx (temp), | |
10699 | GEN_INT (ud2 << 16))); | |
10700 | if (ud1 != 0) | |
10701 | emit_move_insn (dest, | |
10702 | gen_rtx_IOR (DImode, copy_rtx (temp), | |
10703 | GEN_INT (ud1))); | |
10704 | } | |
10705 | } | |
10706 | ||
10707 | /* Helper for the following. Get rid of [r+r] memory refs | |
10708 | in cases where it won't work (TImode, TFmode, TDmode, PTImode). */ | |
10709 | ||
10710 | static void | |
10711 | rs6000_eliminate_indexed_memrefs (rtx operands[2]) | |
10712 | { | |
10713 | if (reload_in_progress) | |
10714 | return; | |
10715 | ||
10716 | if (GET_CODE (operands[0]) == MEM | |
10717 | && GET_CODE (XEXP (operands[0], 0)) != REG | |
10718 | && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0), | |
10719 | GET_MODE (operands[0]), false)) | |
10720 | operands[0] | |
10721 | = replace_equiv_address (operands[0], | |
10722 | copy_addr_to_reg (XEXP (operands[0], 0))); | |
10723 | ||
10724 | if (GET_CODE (operands[1]) == MEM | |
10725 | && GET_CODE (XEXP (operands[1], 0)) != REG | |
10726 | && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0), | |
10727 | GET_MODE (operands[1]), false)) | |
10728 | operands[1] | |
10729 | = replace_equiv_address (operands[1], | |
10730 | copy_addr_to_reg (XEXP (operands[1], 0))); | |
10731 | } | |
10732 | ||
10733 | /* Generate a vector of constants to permute MODE for a little-endian | |
10734 | storage operation by swapping the two halves of a vector. */ | |
10735 | static rtvec | |
10736 | rs6000_const_vec (machine_mode mode) | |
10737 | { | |
10738 | int i, subparts; | |
10739 | rtvec v; | |
10740 | ||
10741 | switch (mode) | |
10742 | { | |
4e10a5a7 | 10743 | case E_V1TImode: |
83349046 SB |
10744 | subparts = 1; |
10745 | break; | |
4e10a5a7 RS |
10746 | case E_V2DFmode: |
10747 | case E_V2DImode: | |
83349046 SB |
10748 | subparts = 2; |
10749 | break; | |
4e10a5a7 RS |
10750 | case E_V4SFmode: |
10751 | case E_V4SImode: | |
83349046 SB |
10752 | subparts = 4; |
10753 | break; | |
4e10a5a7 | 10754 | case E_V8HImode: |
83349046 SB |
10755 | subparts = 8; |
10756 | break; | |
4e10a5a7 | 10757 | case E_V16QImode: |
83349046 SB |
10758 | subparts = 16; |
10759 | break; | |
10760 | default: | |
10761 | gcc_unreachable(); | |
10762 | } | |
10763 | ||
10764 | v = rtvec_alloc (subparts); | |
10765 | ||
10766 | for (i = 0; i < subparts / 2; ++i) | |
10767 | RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2); | |
10768 | for (i = subparts / 2; i < subparts; ++i) | |
10769 | RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2); | |
10770 | ||
10771 | return v; | |
10772 | } | |
10773 | ||
10774 | /* Generate a permute rtx that represents an lxvd2x, stxvd2x, or xxpermdi | |
10775 | for a VSX load or store operation. */ | |
10776 | rtx | |
10777 | rs6000_gen_le_vsx_permute (rtx source, machine_mode mode) | |
10778 | { | |
10779 | /* Use ROTATE instead of VEC_SELECT on IEEE 128-bit floating point, and | |
10780 | 128-bit integers if they are allowed in VSX registers. */ | |
10781 | if (FLOAT128_VECTOR_P (mode) || mode == TImode || mode == V1TImode) | |
10782 | return gen_rtx_ROTATE (mode, source, GEN_INT (64)); | |
10783 | else | |
10784 | { | |
10785 | rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode)); | |
10786 | return gen_rtx_VEC_SELECT (mode, source, par); | |
10787 | } | |
10788 | } | |
10789 | ||
10790 | /* Emit a little-endian load from vector memory location SOURCE to VSX | |
10791 | register DEST in mode MODE. The load is done with two permuting | |
10792 | insn's that represent an lxvd2x and xxpermdi. */ | |
10793 | void | |
10794 | rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode) | |
10795 | { | |
10796 | rtx tmp, permute_mem, permute_reg; | |
10797 | ||
10798 | /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode, | |
10799 | V1TImode). */ | |
10800 | if (mode == TImode || mode == V1TImode) | |
10801 | { | |
10802 | mode = V2DImode; | |
10803 | dest = gen_lowpart (V2DImode, dest); | |
10804 | source = adjust_address (source, V2DImode, 0); | |
10805 | } | |
10806 | ||
10807 | tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest; | |
10808 | permute_mem = rs6000_gen_le_vsx_permute (source, mode); | |
10809 | permute_reg = rs6000_gen_le_vsx_permute (tmp, mode); | |
10810 | emit_insn (gen_rtx_SET (tmp, permute_mem)); | |
10811 | emit_insn (gen_rtx_SET (dest, permute_reg)); | |
10812 | } | |
10813 | ||
10814 | /* Emit a little-endian store to vector memory location DEST from VSX | |
10815 | register SOURCE in mode MODE. The store is done with two permuting | |
10816 | insn's that represent an xxpermdi and an stxvd2x. */ | |
10817 | void | |
10818 | rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode) | |
10819 | { | |
10820 | rtx tmp, permute_src, permute_tmp; | |
10821 | ||
10822 | /* This should never be called during or after reload, because it does | |
10823 | not re-permute the source register. It is intended only for use | |
10824 | during expand. */ | |
10825 | gcc_assert (!reload_in_progress && !lra_in_progress && !reload_completed); | |
10826 | ||
10827 | /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode, | |
10828 | V1TImode). */ | |
10829 | if (mode == TImode || mode == V1TImode) | |
10830 | { | |
10831 | mode = V2DImode; | |
10832 | dest = adjust_address (dest, V2DImode, 0); | |
10833 | source = gen_lowpart (V2DImode, source); | |
10834 | } | |
10835 | ||
10836 | tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source; | |
10837 | permute_src = rs6000_gen_le_vsx_permute (source, mode); | |
10838 | permute_tmp = rs6000_gen_le_vsx_permute (tmp, mode); | |
10839 | emit_insn (gen_rtx_SET (tmp, permute_src)); | |
10840 | emit_insn (gen_rtx_SET (dest, permute_tmp)); | |
10841 | } | |
10842 | ||
10843 | /* Emit a sequence representing a little-endian VSX load or store, | |
10844 | moving data from SOURCE to DEST in mode MODE. This is done | |
10845 | separately from rs6000_emit_move to ensure it is called only | |
10846 | during expand. LE VSX loads and stores introduced later are | |
10847 | handled with a split. The expand-time RTL generation allows | |
10848 | us to optimize away redundant pairs of register-permutes. */ | |
10849 | void | |
10850 | rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode) | |
10851 | { | |
10852 | gcc_assert (!BYTES_BIG_ENDIAN | |
10853 | && VECTOR_MEM_VSX_P (mode) | |
10854 | && !TARGET_P9_VECTOR | |
10855 | && !gpr_or_gpr_p (dest, source) | |
10856 | && (MEM_P (source) ^ MEM_P (dest))); | |
10857 | ||
10858 | if (MEM_P (source)) | |
10859 | { | |
10860 | gcc_assert (REG_P (dest) || GET_CODE (dest) == SUBREG); | |
10861 | rs6000_emit_le_vsx_load (dest, source, mode); | |
10862 | } | |
10863 | else | |
10864 | { | |
10865 | if (!REG_P (source)) | |
10866 | source = force_reg (mode, source); | |
10867 | rs6000_emit_le_vsx_store (dest, source, mode); | |
10868 | } | |
10869 | } | |
10870 | ||
10871 | /* Return whether a SFmode or SImode move can be done without converting one | |
10872 | mode to another. This arrises when we have: | |
10873 | ||
10874 | (SUBREG:SF (REG:SI ...)) | |
10875 | (SUBREG:SI (REG:SF ...)) | |
10876 | ||
10877 | and one of the values is in a floating point/vector register, where SFmode | |
10878 | scalars are stored in DFmode format. */ | |
10879 | ||
10880 | bool | |
10881 | valid_sf_si_move (rtx dest, rtx src, machine_mode mode) | |
10882 | { | |
10883 | if (TARGET_ALLOW_SF_SUBREG) | |
10884 | return true; | |
10885 | ||
10886 | if (mode != SFmode && GET_MODE_CLASS (mode) != MODE_INT) | |
10887 | return true; | |
10888 | ||
10889 | if (!SUBREG_P (src) || !sf_subreg_operand (src, mode)) | |
10890 | return true; | |
10891 | ||
10892 | /*. Allow (set (SUBREG:SI (REG:SF)) (SUBREG:SI (REG:SF))). */ | |
10893 | if (SUBREG_P (dest)) | |
10894 | { | |
10895 | rtx dest_subreg = SUBREG_REG (dest); | |
10896 | rtx src_subreg = SUBREG_REG (src); | |
10897 | return GET_MODE (dest_subreg) == GET_MODE (src_subreg); | |
10898 | } | |
10899 | ||
10900 | return false; | |
10901 | } | |
10902 | ||
10903 | ||
10904 | /* Helper function to change moves with: | |
10905 | ||
10906 | (SUBREG:SF (REG:SI)) and | |
10907 | (SUBREG:SI (REG:SF)) | |
10908 | ||
10909 | into separate UNSPEC insns. In the PowerPC architecture, scalar SFmode | |
10910 | values are stored as DFmode values in the VSX registers. We need to convert | |
10911 | the bits before we can use a direct move or operate on the bits in the | |
10912 | vector register as an integer type. | |
10913 | ||
10914 | Skip things like (set (SUBREG:SI (...) (SUBREG:SI (...)). */ | |
10915 | ||
10916 | static bool | |
10917 | rs6000_emit_move_si_sf_subreg (rtx dest, rtx source, machine_mode mode) | |
10918 | { | |
10919 | if (TARGET_DIRECT_MOVE_64BIT && !reload_in_progress && !reload_completed | |
10920 | && !lra_in_progress | |
10921 | && (!SUBREG_P (dest) || !sf_subreg_operand (dest, mode)) | |
10922 | && SUBREG_P (source) && sf_subreg_operand (source, mode)) | |
10923 | { | |
10924 | rtx inner_source = SUBREG_REG (source); | |
10925 | machine_mode inner_mode = GET_MODE (inner_source); | |
10926 | ||
10927 | if (mode == SImode && inner_mode == SFmode) | |
10928 | { | |
10929 | emit_insn (gen_movsi_from_sf (dest, inner_source)); | |
10930 | return true; | |
10931 | } | |
10932 | ||
10933 | if (mode == SFmode && inner_mode == SImode) | |
10934 | { | |
10935 | emit_insn (gen_movsf_from_si (dest, inner_source)); | |
10936 | return true; | |
10937 | } | |
10938 | } | |
10939 | ||
10940 | return false; | |
10941 | } | |
10942 | ||
10943 | /* Emit a move from SOURCE to DEST in mode MODE. */ | |
10944 | void | |
10945 | rs6000_emit_move (rtx dest, rtx source, machine_mode mode) | |
10946 | { | |
10947 | rtx operands[2]; | |
10948 | operands[0] = dest; | |
10949 | operands[1] = source; | |
10950 | ||
10951 | if (TARGET_DEBUG_ADDR) | |
10952 | { | |
10953 | fprintf (stderr, | |
10954 | "\nrs6000_emit_move: mode = %s, reload_in_progress = %d, " | |
10955 | "reload_completed = %d, can_create_pseudos = %d.\ndest:\n", | |
10956 | GET_MODE_NAME (mode), | |
10957 | reload_in_progress, | |
10958 | reload_completed, | |
10959 | can_create_pseudo_p ()); | |
10960 | debug_rtx (dest); | |
10961 | fprintf (stderr, "source:\n"); | |
10962 | debug_rtx (source); | |
10963 | } | |
10964 | ||
10965 | /* Sanity checks. Check that we get CONST_DOUBLE only when we should. */ | |
10966 | if (CONST_WIDE_INT_P (operands[1]) | |
10967 | && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT) | |
10968 | { | |
10969 | /* This should be fixed with the introduction of CONST_WIDE_INT. */ | |
10970 | gcc_unreachable (); | |
10971 | } | |
10972 | ||
10973 | /* See if we need to special case SImode/SFmode SUBREG moves. */ | |
10974 | if ((mode == SImode || mode == SFmode) && SUBREG_P (source) | |
10975 | && rs6000_emit_move_si_sf_subreg (dest, source, mode)) | |
10976 | return; | |
10977 | ||
10978 | /* Check if GCC is setting up a block move that will end up using FP | |
10979 | registers as temporaries. We must make sure this is acceptable. */ | |
10980 | if (GET_CODE (operands[0]) == MEM | |
10981 | && GET_CODE (operands[1]) == MEM | |
10982 | && mode == DImode | |
e0bd6c9f RS |
10983 | && (rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[0])) |
10984 | || rs6000_slow_unaligned_access (DImode, MEM_ALIGN (operands[1]))) | |
10985 | && ! (rs6000_slow_unaligned_access (SImode, | |
10986 | (MEM_ALIGN (operands[0]) > 32 | |
10987 | ? 32 : MEM_ALIGN (operands[0]))) | |
10988 | || rs6000_slow_unaligned_access (SImode, | |
10989 | (MEM_ALIGN (operands[1]) > 32 | |
10990 | ? 32 : MEM_ALIGN (operands[1])))) | |
83349046 SB |
10991 | && ! MEM_VOLATILE_P (operands [0]) |
10992 | && ! MEM_VOLATILE_P (operands [1])) | |
10993 | { | |
10994 | emit_move_insn (adjust_address (operands[0], SImode, 0), | |
10995 | adjust_address (operands[1], SImode, 0)); | |
10996 | emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4), | |
10997 | adjust_address (copy_rtx (operands[1]), SImode, 4)); | |
10998 | return; | |
10999 | } | |
11000 | ||
11001 | if (can_create_pseudo_p () && GET_CODE (operands[0]) == MEM | |
11002 | && !gpc_reg_operand (operands[1], mode)) | |
11003 | operands[1] = force_reg (mode, operands[1]); | |
11004 | ||
11005 | /* Recognize the case where operand[1] is a reference to thread-local | |
11006 | data and load its address to a register. */ | |
11007 | if (tls_referenced_p (operands[1])) | |
11008 | { | |
11009 | enum tls_model model; | |
11010 | rtx tmp = operands[1]; | |
11011 | rtx addend = NULL; | |
11012 | ||
11013 | if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS) | |
11014 | { | |
11015 | addend = XEXP (XEXP (tmp, 0), 1); | |
11016 | tmp = XEXP (XEXP (tmp, 0), 0); | |
11017 | } | |
11018 | ||
11019 | gcc_assert (GET_CODE (tmp) == SYMBOL_REF); | |
11020 | model = SYMBOL_REF_TLS_MODEL (tmp); | |
11021 | gcc_assert (model != 0); | |
11022 | ||
11023 | tmp = rs6000_legitimize_tls_address (tmp, model); | |
11024 | if (addend) | |
11025 | { | |
11026 | tmp = gen_rtx_PLUS (mode, tmp, addend); | |
11027 | tmp = force_operand (tmp, operands[0]); | |
11028 | } | |
11029 | operands[1] = tmp; | |
11030 | } | |
11031 | ||
11032 | /* Handle the case where reload calls us with an invalid address. */ | |
11033 | if (reload_in_progress && mode == Pmode | |
11034 | && (! general_operand (operands[1], mode) | |
11035 | || ! nonimmediate_operand (operands[0], mode))) | |
11036 | goto emit_set; | |
11037 | ||
11038 | /* 128-bit constant floating-point values on Darwin should really be loaded | |
11039 | as two parts. However, this premature splitting is a problem when DFmode | |
11040 | values can go into Altivec registers. */ | |
11041 | if (FLOAT128_IBM_P (mode) && !reg_addr[DFmode].scalar_in_vmx_p | |
11042 | && GET_CODE (operands[1]) == CONST_DOUBLE) | |
11043 | { | |
11044 | rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0), | |
11045 | simplify_gen_subreg (DFmode, operands[1], mode, 0), | |
11046 | DFmode); | |
11047 | rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, | |
11048 | GET_MODE_SIZE (DFmode)), | |
11049 | simplify_gen_subreg (DFmode, operands[1], mode, | |
11050 | GET_MODE_SIZE (DFmode)), | |
11051 | DFmode); | |
11052 | return; | |
11053 | } | |
11054 | ||
11055 | if (reload_in_progress && cfun->machine->sdmode_stack_slot != NULL_RTX) | |
11056 | cfun->machine->sdmode_stack_slot = | |
11057 | eliminate_regs (cfun->machine->sdmode_stack_slot, VOIDmode, NULL_RTX); | |
11058 | ||
11059 | ||
11060 | /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD), | |
11061 | p1:SD) if p1 is not of floating point class and p0 is spilled as | |
11062 | we can have no analogous movsd_store for this. */ | |
11063 | if (lra_in_progress && mode == DDmode | |
11064 | && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER | |
11065 | && reg_preferred_class (REGNO (operands[0])) == NO_REGS | |
11066 | && GET_CODE (operands[1]) == SUBREG && REG_P (SUBREG_REG (operands[1])) | |
11067 | && GET_MODE (SUBREG_REG (operands[1])) == SDmode) | |
11068 | { | |
11069 | enum reg_class cl; | |
11070 | int regno = REGNO (SUBREG_REG (operands[1])); | |
11071 | ||
11072 | if (regno >= FIRST_PSEUDO_REGISTER) | |
11073 | { | |
11074 | cl = reg_preferred_class (regno); | |
11075 | regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1]; | |
11076 | } | |
11077 | if (regno >= 0 && ! FP_REGNO_P (regno)) | |
11078 | { | |
11079 | mode = SDmode; | |
11080 | operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]); | |
11081 | operands[1] = SUBREG_REG (operands[1]); | |
11082 | } | |
11083 | } | |
11084 | if (lra_in_progress | |
11085 | && mode == SDmode | |
11086 | && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER | |
11087 | && reg_preferred_class (REGNO (operands[0])) == NO_REGS | |
11088 | && (REG_P (operands[1]) | |
11089 | || (GET_CODE (operands[1]) == SUBREG | |
11090 | && REG_P (SUBREG_REG (operands[1]))))) | |
11091 | { | |
11092 | int regno = REGNO (GET_CODE (operands[1]) == SUBREG | |
11093 | ? SUBREG_REG (operands[1]) : operands[1]); | |
11094 | enum reg_class cl; | |
11095 | ||
11096 | if (regno >= FIRST_PSEUDO_REGISTER) | |
11097 | { | |
11098 | cl = reg_preferred_class (regno); | |
11099 | gcc_assert (cl != NO_REGS); | |
11100 | regno = ira_class_hard_regs[cl][0]; | |
11101 | } | |
11102 | if (FP_REGNO_P (regno)) | |
11103 | { | |
11104 | if (GET_MODE (operands[0]) != DDmode) | |
11105 | operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0); | |
11106 | emit_insn (gen_movsd_store (operands[0], operands[1])); | |
11107 | } | |
11108 | else if (INT_REGNO_P (regno)) | |
11109 | emit_insn (gen_movsd_hardfloat (operands[0], operands[1])); | |
11110 | else | |
11111 | gcc_unreachable(); | |
11112 | return; | |
11113 | } | |
11114 | /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD | |
11115 | p:DD)) if p0 is not of floating point class and p1 is spilled as | |
11116 | we can have no analogous movsd_load for this. */ | |
11117 | if (lra_in_progress && mode == DDmode | |
11118 | && GET_CODE (operands[0]) == SUBREG && REG_P (SUBREG_REG (operands[0])) | |
11119 | && GET_MODE (SUBREG_REG (operands[0])) == SDmode | |
11120 | && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER | |
11121 | && reg_preferred_class (REGNO (operands[1])) == NO_REGS) | |
11122 | { | |
11123 | enum reg_class cl; | |
11124 | int regno = REGNO (SUBREG_REG (operands[0])); | |
11125 | ||
11126 | if (regno >= FIRST_PSEUDO_REGISTER) | |
11127 | { | |
11128 | cl = reg_preferred_class (regno); | |
11129 | regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0]; | |
11130 | } | |
11131 | if (regno >= 0 && ! FP_REGNO_P (regno)) | |
11132 | { | |
11133 | mode = SDmode; | |
11134 | operands[0] = SUBREG_REG (operands[0]); | |
11135 | operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]); | |
11136 | } | |
11137 | } | |
11138 | if (lra_in_progress | |
11139 | && mode == SDmode | |
11140 | && (REG_P (operands[0]) | |
11141 | || (GET_CODE (operands[0]) == SUBREG | |
11142 | && REG_P (SUBREG_REG (operands[0])))) | |
11143 | && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER | |
11144 | && reg_preferred_class (REGNO (operands[1])) == NO_REGS) | |
11145 | { | |
11146 | int regno = REGNO (GET_CODE (operands[0]) == SUBREG | |
11147 | ? SUBREG_REG (operands[0]) : operands[0]); | |
11148 | enum reg_class cl; | |
11149 | ||
11150 | if (regno >= FIRST_PSEUDO_REGISTER) | |
11151 | { | |
11152 | cl = reg_preferred_class (regno); | |
11153 | gcc_assert (cl != NO_REGS); | |
11154 | regno = ira_class_hard_regs[cl][0]; | |
11155 | } | |
11156 | if (FP_REGNO_P (regno)) | |
11157 | { | |
11158 | if (GET_MODE (operands[1]) != DDmode) | |
11159 | operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0); | |
11160 | emit_insn (gen_movsd_load (operands[0], operands[1])); | |
11161 | } | |
11162 | else if (INT_REGNO_P (regno)) | |
11163 | emit_insn (gen_movsd_hardfloat (operands[0], operands[1])); | |
11164 | else | |
11165 | gcc_unreachable(); | |
11166 | return; | |
11167 | } | |
11168 | ||
11169 | if (reload_in_progress | |
11170 | && mode == SDmode | |
11171 | && cfun->machine->sdmode_stack_slot != NULL_RTX | |
11172 | && MEM_P (operands[0]) | |
11173 | && rtx_equal_p (operands[0], cfun->machine->sdmode_stack_slot) | |
11174 | && REG_P (operands[1])) | |
11175 | { | |
11176 | if (FP_REGNO_P (REGNO (operands[1]))) | |
11177 | { | |
11178 | rtx mem = adjust_address_nv (operands[0], DDmode, 0); | |
11179 | mem = eliminate_regs (mem, VOIDmode, NULL_RTX); | |
11180 | emit_insn (gen_movsd_store (mem, operands[1])); | |
11181 | } | |
11182 | else if (INT_REGNO_P (REGNO (operands[1]))) | |
11183 | { | |
11184 | rtx mem = operands[0]; | |
11185 | if (BYTES_BIG_ENDIAN) | |
11186 | mem = adjust_address_nv (mem, mode, 4); | |
11187 | mem = eliminate_regs (mem, VOIDmode, NULL_RTX); | |
11188 | emit_insn (gen_movsd_hardfloat (mem, operands[1])); | |
11189 | } | |
11190 | else | |
11191 | gcc_unreachable(); | |
11192 | return; | |
11193 | } | |
11194 | if (reload_in_progress | |
11195 | && mode == SDmode | |
11196 | && REG_P (operands[0]) | |
11197 | && MEM_P (operands[1]) | |
11198 | && cfun->machine->sdmode_stack_slot != NULL_RTX | |
11199 | && rtx_equal_p (operands[1], cfun->machine->sdmode_stack_slot)) | |
11200 | { | |
11201 | if (FP_REGNO_P (REGNO (operands[0]))) | |
11202 | { | |
11203 | rtx mem = adjust_address_nv (operands[1], DDmode, 0); | |
11204 | mem = eliminate_regs (mem, VOIDmode, NULL_RTX); | |
11205 | emit_insn (gen_movsd_load (operands[0], mem)); | |
11206 | } | |
11207 | else if (INT_REGNO_P (REGNO (operands[0]))) | |
11208 | { | |
11209 | rtx mem = operands[1]; | |
11210 | if (BYTES_BIG_ENDIAN) | |
11211 | mem = adjust_address_nv (mem, mode, 4); | |
11212 | mem = eliminate_regs (mem, VOIDmode, NULL_RTX); | |
11213 | emit_insn (gen_movsd_hardfloat (operands[0], mem)); | |
11214 | } | |
11215 | else | |
11216 | gcc_unreachable(); | |
11217 | return; | |
11218 | } | |
11219 | ||
11220 | /* FIXME: In the long term, this switch statement should go away | |
11221 | and be replaced by a sequence of tests based on things like | |
11222 | mode == Pmode. */ | |
11223 | switch (mode) | |
11224 | { | |
4e10a5a7 RS |
11225 | case E_HImode: |
11226 | case E_QImode: | |
83349046 SB |
11227 | if (CONSTANT_P (operands[1]) |
11228 | && GET_CODE (operands[1]) != CONST_INT) | |
11229 | operands[1] = force_const_mem (mode, operands[1]); | |
11230 | break; | |
11231 | ||
4e10a5a7 RS |
11232 | case E_TFmode: |
11233 | case E_TDmode: | |
11234 | case E_IFmode: | |
11235 | case E_KFmode: | |
83349046 SB |
11236 | if (FLOAT128_2REG_P (mode)) |
11237 | rs6000_eliminate_indexed_memrefs (operands); | |
11238 | /* fall through */ | |
11239 | ||
4e10a5a7 RS |
11240 | case E_DFmode: |
11241 | case E_DDmode: | |
11242 | case E_SFmode: | |
11243 | case E_SDmode: | |
83349046 SB |
11244 | if (CONSTANT_P (operands[1]) |
11245 | && ! easy_fp_constant (operands[1], mode)) | |
11246 | operands[1] = force_const_mem (mode, operands[1]); | |
11247 | break; | |
11248 | ||
4e10a5a7 RS |
11249 | case E_V16QImode: |
11250 | case E_V8HImode: | |
11251 | case E_V4SFmode: | |
11252 | case E_V4SImode: | |
11253 | case E_V4HImode: | |
11254 | case E_V2SFmode: | |
11255 | case E_V2SImode: | |
11256 | case E_V1DImode: | |
11257 | case E_V2DFmode: | |
11258 | case E_V2DImode: | |
11259 | case E_V1TImode: | |
83349046 SB |
11260 | if (CONSTANT_P (operands[1]) |
11261 | && !easy_vector_constant (operands[1], mode)) | |
11262 | operands[1] = force_const_mem (mode, operands[1]); | |
11263 | break; | |
11264 | ||
4e10a5a7 RS |
11265 | case E_SImode: |
11266 | case E_DImode: | |
83349046 SB |
11267 | /* Use default pattern for address of ELF small data */ |
11268 | if (TARGET_ELF | |
11269 | && mode == Pmode | |
11270 | && DEFAULT_ABI == ABI_V4 | |
11271 | && (GET_CODE (operands[1]) == SYMBOL_REF | |
11272 | || GET_CODE (operands[1]) == CONST) | |
11273 | && small_data_operand (operands[1], mode)) | |
11274 | { | |
11275 | emit_insn (gen_rtx_SET (operands[0], operands[1])); | |
11276 | return; | |
11277 | } | |
11278 | ||
11279 | if (DEFAULT_ABI == ABI_V4 | |
11280 | && mode == Pmode && mode == SImode | |
11281 | && flag_pic == 1 && got_operand (operands[1], mode)) | |
11282 | { | |
11283 | emit_insn (gen_movsi_got (operands[0], operands[1])); | |
11284 | return; | |
11285 | } | |
11286 | ||
11287 | if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN) | |
11288 | && TARGET_NO_TOC | |
11289 | && ! flag_pic | |
11290 | && mode == Pmode | |
11291 | && CONSTANT_P (operands[1]) | |
11292 | && GET_CODE (operands[1]) != HIGH | |
11293 | && GET_CODE (operands[1]) != CONST_INT) | |
11294 | { | |
11295 | rtx target = (!can_create_pseudo_p () | |
11296 | ? operands[0] | |
11297 | : gen_reg_rtx (mode)); | |
11298 | ||
11299 | /* If this is a function address on -mcall-aixdesc, | |
11300 | convert it to the address of the descriptor. */ | |
11301 | if (DEFAULT_ABI == ABI_AIX | |
11302 | && GET_CODE (operands[1]) == SYMBOL_REF | |
11303 | && XSTR (operands[1], 0)[0] == '.') | |
11304 | { | |
11305 | const char *name = XSTR (operands[1], 0); | |
11306 | rtx new_ref; | |
11307 | while (*name == '.') | |
11308 | name++; | |
11309 | new_ref = gen_rtx_SYMBOL_REF (Pmode, name); | |
11310 | CONSTANT_POOL_ADDRESS_P (new_ref) | |
11311 | = CONSTANT_POOL_ADDRESS_P (operands[1]); | |
11312 | SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]); | |
11313 | SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]); | |
11314 | SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]); | |
11315 | operands[1] = new_ref; | |
11316 | } | |
11317 | ||
11318 | if (DEFAULT_ABI == ABI_DARWIN) | |
11319 | { | |
11320 | #if TARGET_MACHO | |
11321 | if (MACHO_DYNAMIC_NO_PIC_P) | |
11322 | { | |
11323 | /* Take care of any required data indirection. */ | |
11324 | operands[1] = rs6000_machopic_legitimize_pic_address ( | |
11325 | operands[1], mode, operands[0]); | |
11326 | if (operands[0] != operands[1]) | |
11327 | emit_insn (gen_rtx_SET (operands[0], operands[1])); | |
11328 | return; | |
11329 | } | |
11330 | #endif | |
11331 | emit_insn (gen_macho_high (target, operands[1])); | |
11332 | emit_insn (gen_macho_low (operands[0], target, operands[1])); | |
11333 | return; | |
11334 | } | |
11335 | ||
11336 | emit_insn (gen_elf_high (target, operands[1])); | |
11337 | emit_insn (gen_elf_low (operands[0], target, operands[1])); | |
11338 | return; | |
11339 | } | |
11340 | ||
11341 | /* If this is a SYMBOL_REF that refers to a constant pool entry, | |
11342 | and we have put it in the TOC, we just need to make a TOC-relative | |
11343 | reference to it. */ | |
11344 | if (TARGET_TOC | |
11345 | && GET_CODE (operands[1]) == SYMBOL_REF | |
11346 | && use_toc_relative_ref (operands[1], mode)) | |
11347 | operands[1] = create_TOC_reference (operands[1], operands[0]); | |
11348 | else if (mode == Pmode | |
11349 | && CONSTANT_P (operands[1]) | |
11350 | && GET_CODE (operands[1]) != HIGH | |
11351 | && ((GET_CODE (operands[1]) != CONST_INT | |
11352 | && ! easy_fp_constant (operands[1], mode)) | |
11353 | || (GET_CODE (operands[1]) == CONST_INT | |
11354 | && (num_insns_constant (operands[1], mode) | |
11355 | > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2))) | |
11356 | || (GET_CODE (operands[0]) == REG | |
11357 | && FP_REGNO_P (REGNO (operands[0])))) | |
11358 | && !toc_relative_expr_p (operands[1], false) | |
11359 | && (TARGET_CMODEL == CMODEL_SMALL | |
11360 | || can_create_pseudo_p () | |
11361 | || (REG_P (operands[0]) | |
11362 | && INT_REG_OK_FOR_BASE_P (operands[0], true)))) | |
11363 | { | |
11364 | ||
11365 | #if TARGET_MACHO | |
11366 | /* Darwin uses a special PIC legitimizer. */ | |
11367 | if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT) | |
11368 | { | |
11369 | operands[1] = | |
11370 | rs6000_machopic_legitimize_pic_address (operands[1], mode, | |
11371 | operands[0]); | |
11372 | if (operands[0] != operands[1]) | |
11373 | emit_insn (gen_rtx_SET (operands[0], operands[1])); | |
11374 | return; | |
11375 | } | |
11376 | #endif | |
11377 | ||
11378 | /* If we are to limit the number of things we put in the TOC and | |
11379 | this is a symbol plus a constant we can add in one insn, | |
11380 | just put the symbol in the TOC and add the constant. Don't do | |
11381 | this if reload is in progress. */ | |
11382 | if (GET_CODE (operands[1]) == CONST | |
11383 | && TARGET_NO_SUM_IN_TOC && ! reload_in_progress | |
11384 | && GET_CODE (XEXP (operands[1], 0)) == PLUS | |
11385 | && add_operand (XEXP (XEXP (operands[1], 0), 1), mode) | |
11386 | && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF | |
11387 | || GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == SYMBOL_REF) | |
11388 | && ! side_effects_p (operands[0])) | |
11389 | { | |
11390 | rtx sym = | |
11391 | force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0)); | |
11392 | rtx other = XEXP (XEXP (operands[1], 0), 1); | |
11393 | ||
11394 | sym = force_reg (mode, sym); | |
11395 | emit_insn (gen_add3_insn (operands[0], sym, other)); | |
11396 | return; | |
11397 | } | |
11398 | ||
11399 | operands[1] = force_const_mem (mode, operands[1]); | |
11400 | ||
11401 | if (TARGET_TOC | |
11402 | && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF | |
11403 | && use_toc_relative_ref (XEXP (operands[1], 0), mode)) | |
11404 | { | |
11405 | rtx tocref = create_TOC_reference (XEXP (operands[1], 0), | |
11406 | operands[0]); | |
11407 | operands[1] = gen_const_mem (mode, tocref); | |
11408 | set_mem_alias_set (operands[1], get_TOC_alias_set ()); | |
11409 | } | |
11410 | } | |
11411 | break; | |
11412 | ||
4e10a5a7 | 11413 | case E_TImode: |
83349046 SB |
11414 | if (!VECTOR_MEM_VSX_P (TImode)) |
11415 | rs6000_eliminate_indexed_memrefs (operands); | |
11416 | break; | |
11417 | ||
4e10a5a7 | 11418 | case E_PTImode: |
83349046 SB |
11419 | rs6000_eliminate_indexed_memrefs (operands); |
11420 | break; | |
11421 | ||
11422 | default: | |
11423 | fatal_insn ("bad move", gen_rtx_SET (dest, source)); | |
11424 | } | |
11425 | ||
11426 | /* Above, we may have called force_const_mem which may have returned | |
11427 | an invalid address. If we can, fix this up; otherwise, reload will | |
11428 | have to deal with it. */ | |
11429 | if (GET_CODE (operands[1]) == MEM && ! reload_in_progress) | |
11430 | operands[1] = validize_mem (operands[1]); | |
11431 | ||
11432 | emit_set: | |
11433 | emit_insn (gen_rtx_SET (operands[0], operands[1])); | |
11434 | } | |
11435 | ||
11436 | /* Return true if a structure, union or array containing FIELD should be | |
11437 | accessed using `BLKMODE'. | |
11438 | ||
11439 | For the SPE, simd types are V2SI, and gcc can be tempted to put the | |
11440 | entire thing in a DI and use subregs to access the internals. | |
11441 | store_bit_field() will force (subreg:DI (reg:V2SI x))'s to the | |
11442 | back-end. Because a single GPR can hold a V2SI, but not a DI, the | |
11443 | best thing to do is set structs to BLKmode and avoid Severe Tire | |
11444 | Damage. | |
11445 | ||
11446 | On e500 v2, DF and DI modes suffer from the same anomaly. DF can | |
11447 | fit into 1, whereas DI still needs two. */ | |
11448 | ||
11449 | static bool | |
11450 | rs6000_member_type_forces_blk (const_tree field, machine_mode mode) | |
11451 | { | |
11452 | return ((TARGET_SPE && TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE) | |
11453 | || (TARGET_E500_DOUBLE && mode == DFmode)); | |
11454 | } | |
11455 | \f | |
11456 | /* Nonzero if we can use a floating-point register to pass this arg. */ | |
11457 | #define USE_FP_FOR_ARG_P(CUM,MODE) \ | |
11458 | (SCALAR_FLOAT_MODE_NOT_VECTOR_P (MODE) \ | |
11459 | && (CUM)->fregno <= FP_ARG_MAX_REG \ | |
11460 | && TARGET_HARD_FLOAT && TARGET_FPRS) | |
11461 | ||
11462 | /* Nonzero if we can use an AltiVec register to pass this arg. */ | |
11463 | #define USE_ALTIVEC_FOR_ARG_P(CUM,MODE,NAMED) \ | |
11464 | (ALTIVEC_OR_VSX_VECTOR_MODE (MODE) \ | |
11465 | && (CUM)->vregno <= ALTIVEC_ARG_MAX_REG \ | |
11466 | && TARGET_ALTIVEC_ABI \ | |
11467 | && (NAMED)) | |
11468 | ||
11469 | /* Walk down the type tree of TYPE counting consecutive base elements. | |
11470 | If *MODEP is VOIDmode, then set it to the first valid floating point | |
11471 | or vector type. If a non-floating point or vector type is found, or | |
11472 | if a floating point or vector type that doesn't match a non-VOIDmode | |
11473 | *MODEP is found, then return -1, otherwise return the count in the | |
11474 | sub-tree. */ | |
11475 | ||
11476 | static int | |
11477 | rs6000_aggregate_candidate (const_tree type, machine_mode *modep) | |
11478 | { | |
11479 | machine_mode mode; | |
11480 | HOST_WIDE_INT size; | |
11481 | ||
11482 | switch (TREE_CODE (type)) | |
11483 | { | |
11484 | case REAL_TYPE: | |
11485 | mode = TYPE_MODE (type); | |
11486 | if (!SCALAR_FLOAT_MODE_P (mode)) | |
11487 | return -1; | |
11488 | ||
11489 | if (*modep == VOIDmode) | |
11490 | *modep = mode; | |
11491 | ||
11492 | if (*modep == mode) | |
11493 | return 1; | |
11494 | ||
11495 | break; | |
11496 | ||
11497 | case COMPLEX_TYPE: | |
11498 | mode = TYPE_MODE (TREE_TYPE (type)); | |
11499 | if (!SCALAR_FLOAT_MODE_P (mode)) | |
11500 | return -1; | |
11501 | ||
11502 | if (*modep == VOIDmode) | |
11503 | *modep = mode; | |
11504 | ||
11505 | if (*modep == mode) | |
11506 | return 2; | |
11507 | ||
11508 | break; | |
11509 | ||
11510 | case VECTOR_TYPE: | |
11511 | if (!TARGET_ALTIVEC_ABI || !TARGET_ALTIVEC) | |
11512 | return -1; | |
11513 | ||
11514 | /* Use V4SImode as representative of all 128-bit vector types. */ | |
11515 | size = int_size_in_bytes (type); | |
11516 | switch (size) | |
11517 | { | |
11518 | case 16: | |
11519 | mode = V4SImode; | |
11520 | break; | |
11521 | default: | |
11522 | return -1; | |
11523 | } | |
11524 | ||
11525 | if (*modep == VOIDmode) | |
11526 | *modep = mode; | |
11527 | ||
11528 | /* Vector modes are considered to be opaque: two vectors are | |
11529 | equivalent for the purposes of being homogeneous aggregates | |
11530 | if they are the same size. */ | |
11531 | if (*modep == mode) | |
11532 | return 1; | |
11533 | ||
11534 | break; | |
11535 | ||
11536 | case ARRAY_TYPE: | |
11537 | { | |
11538 | int count; | |
11539 | tree index = TYPE_DOMAIN (type); | |
11540 | ||
11541 | /* Can't handle incomplete types nor sizes that are not | |
11542 | fixed. */ | |
11543 | if (!COMPLETE_TYPE_P (type) | |
11544 | || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST) | |
11545 | return -1; | |
11546 | ||
11547 | count = rs6000_aggregate_candidate (TREE_TYPE (type), modep); | |
11548 | if (count == -1 | |
11549 | || !index | |
11550 | || !TYPE_MAX_VALUE (index) | |
11551 | || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index)) | |
11552 | || !TYPE_MIN_VALUE (index) | |
11553 | || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index)) | |
11554 | || count < 0) | |
11555 | return -1; | |
11556 | ||
11557 | count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index)) | |
11558 | - tree_to_uhwi (TYPE_MIN_VALUE (index))); | |
11559 | ||
11560 | /* There must be no padding. */ | |
8e6cdc90 RS |
11561 | if (wi::to_wide (TYPE_SIZE (type)) |
11562 | != count * GET_MODE_BITSIZE (*modep)) | |
83349046 SB |
11563 | return -1; |
11564 | ||
11565 | return count; | |
11566 | } | |
11567 | ||
11568 | case RECORD_TYPE: | |
11569 | { | |
11570 | int count = 0; | |
11571 | int sub_count; | |
11572 | tree field; | |
11573 | ||
11574 | /* Can't handle incomplete types nor sizes that are not | |
11575 | fixed. */ | |
11576 | if (!COMPLETE_TYPE_P (type) | |
11577 | || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST) | |
11578 | return -1; | |
11579 | ||
11580 | for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) | |
11581 | { | |
11582 | if (TREE_CODE (field) != FIELD_DECL) | |
11583 | continue; | |
11584 | ||
11585 | sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep); | |
11586 | if (sub_count < 0) | |
11587 | return -1; | |
11588 | count += sub_count; | |
11589 | } | |
11590 | ||
11591 | /* There must be no padding. */ | |
8e6cdc90 RS |
11592 | if (wi::to_wide (TYPE_SIZE (type)) |
11593 | != count * GET_MODE_BITSIZE (*modep)) | |
83349046 SB |
11594 | return -1; |
11595 | ||
11596 | return count; | |
11597 | } | |
11598 | ||
11599 | case UNION_TYPE: | |
11600 | case QUAL_UNION_TYPE: | |
11601 | { | |
11602 | /* These aren't very interesting except in a degenerate case. */ | |
11603 | int count = 0; | |
11604 | int sub_count; | |
11605 | tree field; | |
11606 | ||
11607 | /* Can't handle incomplete types nor sizes that are not | |
11608 | fixed. */ | |
11609 | if (!COMPLETE_TYPE_P (type) | |
11610 | || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST) | |
11611 | return -1; | |
11612 | ||
11613 | for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) | |
11614 | { | |
11615 | if (TREE_CODE (field) != FIELD_DECL) | |
11616 | continue; | |
11617 | ||
11618 | sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep); | |
11619 | if (sub_count < 0) | |
11620 | return -1; | |
11621 | count = count > sub_count ? count : sub_count; | |
11622 | } | |
11623 | ||
11624 | /* There must be no padding. */ | |
8e6cdc90 RS |
11625 | if (wi::to_wide (TYPE_SIZE (type)) |
11626 | != count * GET_MODE_BITSIZE (*modep)) | |
83349046 SB |
11627 | return -1; |
11628 | ||
11629 | return count; | |
11630 | } | |
11631 | ||
11632 | default: | |
11633 | break; | |
11634 | } | |
11635 | ||
11636 | return -1; | |
11637 | } | |
11638 | ||
11639 | /* If an argument, whose type is described by TYPE and MODE, is a homogeneous | |
11640 | float or vector aggregate that shall be passed in FP/vector registers | |
11641 | according to the ELFv2 ABI, return the homogeneous element mode in | |
11642 | *ELT_MODE and the number of elements in *N_ELTS, and return TRUE. | |
11643 | ||
11644 | Otherwise, set *ELT_MODE to MODE and *N_ELTS to 1, and return FALSE. */ | |
11645 | ||
11646 | static bool | |
11647 | rs6000_discover_homogeneous_aggregate (machine_mode mode, const_tree type, | |
11648 | machine_mode *elt_mode, | |
11649 | int *n_elts) | |
11650 | { | |
11651 | /* Note that we do not accept complex types at the top level as | |
11652 | homogeneous aggregates; these types are handled via the | |
11653 | targetm.calls.split_complex_arg mechanism. Complex types | |
11654 | can be elements of homogeneous aggregates, however. */ | |
11655 | if (DEFAULT_ABI == ABI_ELFv2 && type && AGGREGATE_TYPE_P (type)) | |
11656 | { | |
11657 | machine_mode field_mode = VOIDmode; | |
11658 | int field_count = rs6000_aggregate_candidate (type, &field_mode); | |
11659 | ||
11660 | if (field_count > 0) | |
11661 | { | |
11662 | int n_regs = (SCALAR_FLOAT_MODE_P (field_mode) ? | |
11663 | (GET_MODE_SIZE (field_mode) + 7) >> 3 : 1); | |
11664 | ||
11665 | /* The ELFv2 ABI allows homogeneous aggregates to occupy | |
11666 | up to AGGR_ARG_NUM_REG registers. */ | |
11667 | if (field_count * n_regs <= AGGR_ARG_NUM_REG) | |
11668 | { | |
11669 | if (elt_mode) | |
11670 | *elt_mode = field_mode; | |
11671 | if (n_elts) | |
11672 | *n_elts = field_count; | |
11673 | return true; | |
11674 | } | |
11675 | } | |
11676 | } | |
11677 | ||
11678 | if (elt_mode) | |
11679 | *elt_mode = mode; | |
11680 | if (n_elts) | |
11681 | *n_elts = 1; | |
11682 | return false; | |
11683 | } | |
11684 | ||
11685 | /* Return a nonzero value to say to return the function value in | |
11686 | memory, just as large structures are always returned. TYPE will be | |
11687 | the data type of the value, and FNTYPE will be the type of the | |
11688 | function doing the returning, or @code{NULL} for libcalls. | |
11689 | ||
11690 | The AIX ABI for the RS/6000 specifies that all structures are | |
11691 | returned in memory. The Darwin ABI does the same. | |
11692 | ||
11693 | For the Darwin 64 Bit ABI, a function result can be returned in | |
11694 | registers or in memory, depending on the size of the return data | |
11695 | type. If it is returned in registers, the value occupies the same | |
11696 | registers as it would if it were the first and only function | |
11697 | argument. Otherwise, the function places its result in memory at | |
11698 | the location pointed to by GPR3. | |
11699 | ||
11700 | The SVR4 ABI specifies that structures <= 8 bytes are returned in r3/r4, | |
11701 | but a draft put them in memory, and GCC used to implement the draft | |
11702 | instead of the final standard. Therefore, aix_struct_return | |
11703 | controls this instead of DEFAULT_ABI; V.4 targets needing backward | |
11704 | compatibility can change DRAFT_V4_STRUCT_RET to override the | |
11705 | default, and -m switches get the final word. See | |
11706 | rs6000_option_override_internal for more details. | |
11707 | ||
11708 | The PPC32 SVR4 ABI uses IEEE double extended for long double, if 128-bit | |
11709 | long double support is enabled. These values are returned in memory. | |
11710 | ||
11711 | int_size_in_bytes returns -1 for variable size objects, which go in | |
11712 | memory always. The cast to unsigned makes -1 > 8. */ | |
11713 | ||
11714 | static bool | |
11715 | rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) | |
11716 | { | |
11717 | /* For the Darwin64 ABI, test if we can fit the return value in regs. */ | |
11718 | if (TARGET_MACHO | |
11719 | && rs6000_darwin64_abi | |
11720 | && TREE_CODE (type) == RECORD_TYPE | |
11721 | && int_size_in_bytes (type) > 0) | |
11722 | { | |
11723 | CUMULATIVE_ARGS valcum; | |
11724 | rtx valret; | |
11725 | ||
11726 | valcum.words = 0; | |
11727 | valcum.fregno = FP_ARG_MIN_REG; | |
11728 | valcum.vregno = ALTIVEC_ARG_MIN_REG; | |
11729 | /* Do a trial code generation as if this were going to be passed | |
11730 | as an argument; if any part goes in memory, we return NULL. */ | |
11731 | valret = rs6000_darwin64_record_arg (&valcum, type, true, true); | |
11732 | if (valret) | |
11733 | return false; | |
11734 | /* Otherwise fall through to more conventional ABI rules. */ | |
11735 | } | |
11736 | ||
11737 | /* The ELFv2 ABI returns homogeneous VFP aggregates in registers */ | |
11738 | if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (type), type, | |
11739 | NULL, NULL)) | |
11740 | return false; | |
11741 | ||
11742 | /* The ELFv2 ABI returns aggregates up to 16B in registers */ | |
11743 | if (DEFAULT_ABI == ABI_ELFv2 && AGGREGATE_TYPE_P (type) | |
11744 | && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) <= 16) | |
11745 | return false; | |
11746 | ||
11747 | if (AGGREGATE_TYPE_P (type) | |
11748 | && (aix_struct_return | |
11749 | || (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8)) | |
11750 | return true; | |
11751 | ||
11752 | /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector | |
11753 | modes only exist for GCC vector types if -maltivec. */ | |
11754 | if (TARGET_32BIT && !TARGET_ALTIVEC_ABI | |
11755 | && ALTIVEC_VECTOR_MODE (TYPE_MODE (type))) | |
11756 | return false; | |
11757 | ||
11758 | /* Return synthetic vectors in memory. */ | |
11759 | if (TREE_CODE (type) == VECTOR_TYPE | |
11760 | && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8)) | |
11761 | { | |
11762 | static bool warned_for_return_big_vectors = false; | |
11763 | if (!warned_for_return_big_vectors) | |
11764 | { | |
11765 | warning (OPT_Wpsabi, "GCC vector returned by reference: " | |
11766 | "non-standard ABI extension with no compatibility guarantee"); | |
11767 | warned_for_return_big_vectors = true; | |
11768 | } | |
11769 | return true; | |
11770 | } | |
11771 | ||
11772 | if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD | |
11773 | && FLOAT128_IEEE_P (TYPE_MODE (type))) | |
11774 | return true; | |
11775 | ||
11776 | return false; | |
11777 | } | |
11778 | ||
11779 | /* Specify whether values returned in registers should be at the most | |
11780 | significant end of a register. We want aggregates returned by | |
11781 | value to match the way aggregates are passed to functions. */ | |
11782 | ||
11783 | static bool | |
11784 | rs6000_return_in_msb (const_tree valtype) | |
11785 | { | |
11786 | return (DEFAULT_ABI == ABI_ELFv2 | |
11787 | && BYTES_BIG_ENDIAN | |
11788 | && AGGREGATE_TYPE_P (valtype) | |
76b0cbf8 RS |
11789 | && rs6000_function_arg_padding (TYPE_MODE (valtype), |
11790 | valtype) == PAD_UPWARD); | |
83349046 SB |
11791 | } |
11792 | ||
11793 | #ifdef HAVE_AS_GNU_ATTRIBUTE | |
11794 | /* Return TRUE if a call to function FNDECL may be one that | |
11795 | potentially affects the function calling ABI of the object file. */ | |
11796 | ||
11797 | static bool | |
11798 | call_ABI_of_interest (tree fndecl) | |
11799 | { | |
11800 | if (rs6000_gnu_attr && symtab->state == EXPANSION) | |
11801 | { | |
11802 | struct cgraph_node *c_node; | |
11803 | ||
11804 | /* Libcalls are always interesting. */ | |
11805 | if (fndecl == NULL_TREE) | |
11806 | return true; | |
11807 | ||
11808 | /* Any call to an external function is interesting. */ | |
11809 | if (DECL_EXTERNAL (fndecl)) | |
11810 | return true; | |
11811 | ||
11812 | /* Interesting functions that we are emitting in this object file. */ | |
11813 | c_node = cgraph_node::get (fndecl); | |
11814 | c_node = c_node->ultimate_alias_target (); | |
11815 | return !c_node->only_called_directly_p (); | |
11816 | } | |
11817 | return false; | |
11818 | } | |
11819 | #endif | |
11820 | ||
11821 | /* Initialize a variable CUM of type CUMULATIVE_ARGS | |
11822 | for a call to a function whose data type is FNTYPE. | |
11823 | For a library call, FNTYPE is 0 and RETURN_MODE the return value mode. | |
11824 | ||
11825 | For incoming args we set the number of arguments in the prototype large | |
11826 | so we never return a PARALLEL. */ | |
11827 | ||
11828 | void | |
11829 | init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype, | |
11830 | rtx libname ATTRIBUTE_UNUSED, int incoming, | |
11831 | int libcall, int n_named_args, | |
11832 | tree fndecl ATTRIBUTE_UNUSED, | |
11833 | machine_mode return_mode ATTRIBUTE_UNUSED) | |
11834 | { | |
11835 | static CUMULATIVE_ARGS zero_cumulative; | |
11836 | ||
11837 | *cum = zero_cumulative; | |
11838 | cum->words = 0; | |
11839 | cum->fregno = FP_ARG_MIN_REG; | |
11840 | cum->vregno = ALTIVEC_ARG_MIN_REG; | |
11841 | cum->prototype = (fntype && prototype_p (fntype)); | |
11842 | cum->call_cookie = ((DEFAULT_ABI == ABI_V4 && libcall) | |
11843 | ? CALL_LIBCALL : CALL_NORMAL); | |
11844 | cum->sysv_gregno = GP_ARG_MIN_REG; | |
11845 | cum->stdarg = stdarg_p (fntype); | |
11846 | cum->libcall = libcall; | |
11847 | ||
11848 | cum->nargs_prototype = 0; | |
11849 | if (incoming || cum->prototype) | |
11850 | cum->nargs_prototype = n_named_args; | |
11851 | ||
11852 | /* Check for a longcall attribute. */ | |
11853 | if ((!fntype && rs6000_default_long_calls) | |
11854 | || (fntype | |
11855 | && lookup_attribute ("longcall", TYPE_ATTRIBUTES (fntype)) | |
11856 | && !lookup_attribute ("shortcall", TYPE_ATTRIBUTES (fntype)))) | |
11857 | cum->call_cookie |= CALL_LONG; | |
11858 | ||
11859 | if (TARGET_DEBUG_ARG) | |
11860 | { | |
11861 | fprintf (stderr, "\ninit_cumulative_args:"); | |
11862 | if (fntype) | |
11863 | { | |
11864 | tree ret_type = TREE_TYPE (fntype); | |
11865 | fprintf (stderr, " ret code = %s,", | |
11866 | get_tree_code_name (TREE_CODE (ret_type))); | |
11867 | } | |
11868 | ||
11869 | if (cum->call_cookie & CALL_LONG) | |
11870 | fprintf (stderr, " longcall,"); | |
11871 | ||
11872 | fprintf (stderr, " proto = %d, nargs = %d\n", | |
11873 | cum->prototype, cum->nargs_prototype); | |
11874 | } | |
11875 | ||
11876 | #ifdef HAVE_AS_GNU_ATTRIBUTE | |
11877 | if (TARGET_ELF && (TARGET_64BIT || DEFAULT_ABI == ABI_V4)) | |
11878 | { | |
11879 | cum->escapes = call_ABI_of_interest (fndecl); | |
11880 | if (cum->escapes) | |
11881 | { | |
11882 | tree return_type; | |
11883 | ||
11884 | if (fntype) | |
11885 | { | |
11886 | return_type = TREE_TYPE (fntype); | |
11887 | return_mode = TYPE_MODE (return_type); | |
11888 | } | |
11889 | else | |
11890 | return_type = lang_hooks.types.type_for_mode (return_mode, 0); | |
11891 | ||
11892 | if (return_type != NULL) | |
11893 | { | |
11894 | if (TREE_CODE (return_type) == RECORD_TYPE | |
11895 | && TYPE_TRANSPARENT_AGGR (return_type)) | |
11896 | { | |
11897 | return_type = TREE_TYPE (first_field (return_type)); | |
11898 | return_mode = TYPE_MODE (return_type); | |
11899 | } | |
11900 | if (AGGREGATE_TYPE_P (return_type) | |
11901 | && ((unsigned HOST_WIDE_INT) int_size_in_bytes (return_type) | |
11902 | <= 8)) | |
11903 | rs6000_returns_struct = true; | |
11904 | } | |
11905 | if (SCALAR_FLOAT_MODE_P (return_mode)) | |
11906 | { | |
11907 | rs6000_passes_float = true; | |
11908 | if ((HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT) | |
11909 | && (FLOAT128_IBM_P (return_mode) | |
11910 | || FLOAT128_IEEE_P (return_mode) | |
11911 | || (return_type != NULL | |
11912 | && (TYPE_MAIN_VARIANT (return_type) | |
11913 | == long_double_type_node)))) | |
11914 | rs6000_passes_long_double = true; | |
11915 | } | |
11916 | if (ALTIVEC_OR_VSX_VECTOR_MODE (return_mode) | |
11917 | || SPE_VECTOR_MODE (return_mode)) | |
11918 | rs6000_passes_vector = true; | |
11919 | } | |
11920 | } | |
11921 | #endif | |
11922 | ||
11923 | if (fntype | |
11924 | && !TARGET_ALTIVEC | |
11925 | && TARGET_ALTIVEC_ABI | |
11926 | && ALTIVEC_VECTOR_MODE (TYPE_MODE (TREE_TYPE (fntype)))) | |
11927 | { | |
11928 | error ("cannot return value in vector register because" | |
11929 | " altivec instructions are disabled, use -maltivec" | |
11930 | " to enable them"); | |
11931 | } | |
11932 | } | |
11933 | \f | |
11934 | /* The mode the ABI uses for a word. This is not the same as word_mode | |
11935 | for -m32 -mpowerpc64. This is used to implement various target hooks. */ | |
11936 | ||
095a2d76 | 11937 | static scalar_int_mode |
83349046 SB |
11938 | rs6000_abi_word_mode (void) |
11939 | { | |
11940 | return TARGET_32BIT ? SImode : DImode; | |
11941 | } | |
11942 | ||
11943 | /* Implement the TARGET_OFFLOAD_OPTIONS hook. */ | |
11944 | static char * | |
11945 | rs6000_offload_options (void) | |
11946 | { | |
11947 | if (TARGET_64BIT) | |
11948 | return xstrdup ("-foffload-abi=lp64"); | |
11949 | else | |
11950 | return xstrdup ("-foffload-abi=ilp32"); | |
11951 | } | |
11952 | ||
11953 | /* On rs6000, function arguments are promoted, as are function return | |
11954 | values. */ | |
11955 | ||
11956 | static machine_mode | |
11957 | rs6000_promote_function_mode (const_tree type ATTRIBUTE_UNUSED, | |
11958 | machine_mode mode, | |
11959 | int *punsignedp ATTRIBUTE_UNUSED, | |
11960 | const_tree, int) | |
11961 | { | |
11962 | PROMOTE_MODE (mode, *punsignedp, type); | |
11963 | ||
11964 | return mode; | |
11965 | } | |
11966 | ||
11967 | /* Return true if TYPE must be passed on the stack and not in registers. */ | |
11968 | ||
11969 | static bool | |
11970 | rs6000_must_pass_in_stack (machine_mode mode, const_tree type) | |
11971 | { | |
11972 | if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2 || TARGET_64BIT) | |
11973 | return must_pass_in_stack_var_size (mode, type); | |
11974 | else | |
11975 | return must_pass_in_stack_var_size_or_pad (mode, type); | |
11976 | } | |
11977 | ||
11978 | static inline bool | |
11979 | is_complex_IBM_long_double (machine_mode mode) | |
11980 | { | |
11981 | return mode == ICmode || (!TARGET_IEEEQUAD && mode == TCmode); | |
11982 | } | |
11983 | ||
11984 | /* Whether ABI_V4 passes MODE args to a function in floating point | |
11985 | registers. */ | |
11986 | ||
11987 | static bool | |
11988 | abi_v4_pass_in_fpr (machine_mode mode) | |
11989 | { | |
11990 | if (!TARGET_FPRS || !TARGET_HARD_FLOAT) | |
11991 | return false; | |
11992 | if (TARGET_SINGLE_FLOAT && mode == SFmode) | |
11993 | return true; | |
11994 | if (TARGET_DOUBLE_FLOAT && mode == DFmode) | |
11995 | return true; | |
11996 | /* ABI_V4 passes complex IBM long double in 8 gprs. | |
11997 | Stupid, but we can't change the ABI now. */ | |
11998 | if (is_complex_IBM_long_double (mode)) | |
11999 | return false; | |
12000 | if (FLOAT128_2REG_P (mode)) | |
12001 | return true; | |
12002 | if (DECIMAL_FLOAT_MODE_P (mode)) | |
12003 | return true; | |
12004 | return false; | |
12005 | } | |
12006 | ||
76b0cbf8 | 12007 | /* Implement TARGET_FUNCTION_ARG_PADDING |
83349046 SB |
12008 | |
12009 | For the AIX ABI structs are always stored left shifted in their | |
12010 | argument slot. */ | |
12011 | ||
76b0cbf8 RS |
12012 | static pad_direction |
12013 | rs6000_function_arg_padding (machine_mode mode, const_tree type) | |
83349046 SB |
12014 | { |
12015 | #ifndef AGGREGATE_PADDING_FIXED | |
12016 | #define AGGREGATE_PADDING_FIXED 0 | |
12017 | #endif | |
12018 | #ifndef AGGREGATES_PAD_UPWARD_ALWAYS | |
12019 | #define AGGREGATES_PAD_UPWARD_ALWAYS 0 | |
12020 | #endif | |
12021 | ||
12022 | if (!AGGREGATE_PADDING_FIXED) | |
12023 | { | |
12024 | /* GCC used to pass structures of the same size as integer types as | |
76b0cbf8 | 12025 | if they were in fact integers, ignoring TARGET_FUNCTION_ARG_PADDING. |
83349046 SB |
12026 | i.e. Structures of size 1 or 2 (or 4 when TARGET_64BIT) were |
12027 | passed padded downward, except that -mstrict-align further | |
12028 | muddied the water in that multi-component structures of 2 and 4 | |
12029 | bytes in size were passed padded upward. | |
12030 | ||
12031 | The following arranges for best compatibility with previous | |
12032 | versions of gcc, but removes the -mstrict-align dependency. */ | |
12033 | if (BYTES_BIG_ENDIAN) | |
12034 | { | |
12035 | HOST_WIDE_INT size = 0; | |
12036 | ||
12037 | if (mode == BLKmode) | |
12038 | { | |
12039 | if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST) | |
12040 | size = int_size_in_bytes (type); | |
12041 | } | |
12042 | else | |
12043 | size = GET_MODE_SIZE (mode); | |
12044 | ||
12045 | if (size == 1 || size == 2 || size == 4) | |
76b0cbf8 | 12046 | return PAD_DOWNWARD; |
83349046 | 12047 | } |
76b0cbf8 | 12048 | return PAD_UPWARD; |
83349046 SB |
12049 | } |
12050 | ||
12051 | if (AGGREGATES_PAD_UPWARD_ALWAYS) | |
12052 | { | |
12053 | if (type != 0 && AGGREGATE_TYPE_P (type)) | |
76b0cbf8 | 12054 | return PAD_UPWARD; |
83349046 SB |
12055 | } |
12056 | ||
12057 | /* Fall back to the default. */ | |
76b0cbf8 | 12058 | return default_function_arg_padding (mode, type); |
83349046 SB |
12059 | } |
12060 | ||
12061 | /* If defined, a C expression that gives the alignment boundary, in bits, | |
12062 | of an argument with the specified mode and type. If it is not defined, | |
12063 | PARM_BOUNDARY is used for all arguments. | |
12064 | ||
12065 | V.4 wants long longs and doubles to be double word aligned. Just | |
12066 | testing the mode size is a boneheaded way to do this as it means | |
12067 | that other types such as complex int are also double word aligned. | |
12068 | However, we're stuck with this because changing the ABI might break | |
12069 | existing library interfaces. | |
12070 | ||
12071 | Doubleword align SPE vectors. | |
12072 | Quadword align Altivec/VSX vectors. | |
12073 | Quadword align large synthetic vector types. */ | |
12074 | ||
12075 | static unsigned int | |
12076 | rs6000_function_arg_boundary (machine_mode mode, const_tree type) | |
12077 | { | |
12078 | machine_mode elt_mode; | |
12079 | int n_elts; | |
12080 | ||
12081 | rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts); | |
12082 | ||
12083 | if (DEFAULT_ABI == ABI_V4 | |
12084 | && (GET_MODE_SIZE (mode) == 8 | |
12085 | || (TARGET_HARD_FLOAT | |
12086 | && TARGET_FPRS | |
12087 | && !is_complex_IBM_long_double (mode) | |
12088 | && FLOAT128_2REG_P (mode)))) | |
12089 | return 64; | |
12090 | else if (FLOAT128_VECTOR_P (mode)) | |
12091 | return 128; | |
12092 | else if (SPE_VECTOR_MODE (mode) | |
12093 | || (type && TREE_CODE (type) == VECTOR_TYPE | |
12094 | && int_size_in_bytes (type) >= 8 | |
12095 | && int_size_in_bytes (type) < 16)) | |
12096 | return 64; | |
12097 | else if (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode) | |
12098 | || (type && TREE_CODE (type) == VECTOR_TYPE | |
12099 | && int_size_in_bytes (type) >= 16)) | |
12100 | return 128; | |
12101 | ||
12102 | /* Aggregate types that need > 8 byte alignment are quadword-aligned | |
12103 | in the parameter area in the ELFv2 ABI, and in the AIX ABI unless | |
12104 | -mcompat-align-parm is used. */ | |
12105 | if (((DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm) | |
12106 | || DEFAULT_ABI == ABI_ELFv2) | |
12107 | && type && TYPE_ALIGN (type) > 64) | |
12108 | { | |
12109 | /* "Aggregate" means any AGGREGATE_TYPE except for single-element | |
12110 | or homogeneous float/vector aggregates here. We already handled | |
12111 | vector aggregates above, but still need to check for float here. */ | |
12112 | bool aggregate_p = (AGGREGATE_TYPE_P (type) | |
12113 | && !SCALAR_FLOAT_MODE_P (elt_mode)); | |
12114 | ||
12115 | /* We used to check for BLKmode instead of the above aggregate type | |
12116 | check. Warn when this results in any difference to the ABI. */ | |
12117 | if (aggregate_p != (mode == BLKmode)) | |
12118 | { | |
12119 | static bool warned; | |
12120 | if (!warned && warn_psabi) | |
12121 | { | |
12122 | warned = true; | |
12123 | inform (input_location, | |
12124 | "the ABI of passing aggregates with %d-byte alignment" | |
12125 | " has changed in GCC 5", | |
12126 | (int) TYPE_ALIGN (type) / BITS_PER_UNIT); | |
12127 | } | |
12128 | } | |
12129 | ||
12130 | if (aggregate_p) | |
12131 | return 128; | |
12132 | } | |
12133 | ||
12134 | /* Similar for the Darwin64 ABI. Note that for historical reasons we | |
12135 | implement the "aggregate type" check as a BLKmode check here; this | |
12136 | means certain aggregate types are in fact not aligned. */ | |
12137 | if (TARGET_MACHO && rs6000_darwin64_abi | |
12138 | && mode == BLKmode | |
12139 | && type && TYPE_ALIGN (type) > 64) | |
12140 | return 128; | |
12141 | ||
12142 | return PARM_BOUNDARY; | |
12143 | } | |
12144 | ||
12145 | /* The offset in words to the start of the parameter save area. */ | |
12146 | ||
12147 | static unsigned int | |
12148 | rs6000_parm_offset (void) | |
12149 | { | |
12150 | return (DEFAULT_ABI == ABI_V4 ? 2 | |
12151 | : DEFAULT_ABI == ABI_ELFv2 ? 4 | |
12152 | : 6); | |
12153 | } | |
12154 | ||
12155 | /* For a function parm of MODE and TYPE, return the starting word in | |
12156 | the parameter area. NWORDS of the parameter area are already used. */ | |
12157 | ||
12158 | static unsigned int | |
12159 | rs6000_parm_start (machine_mode mode, const_tree type, | |
12160 | unsigned int nwords) | |
12161 | { | |
12162 | unsigned int align; | |
12163 | ||
12164 | align = rs6000_function_arg_boundary (mode, type) / PARM_BOUNDARY - 1; | |
12165 | return nwords + (-(rs6000_parm_offset () + nwords) & align); | |
12166 | } | |
12167 | ||
12168 | /* Compute the size (in words) of a function argument. */ | |
12169 | ||
12170 | static unsigned long | |
12171 | rs6000_arg_size (machine_mode mode, const_tree type) | |
12172 | { | |
12173 | unsigned long size; | |
12174 | ||
12175 | if (mode != BLKmode) | |
12176 | size = GET_MODE_SIZE (mode); | |
12177 | else | |
12178 | size = int_size_in_bytes (type); | |
12179 | ||
12180 | if (TARGET_32BIT) | |
12181 | return (size + 3) >> 2; | |
12182 | else | |
12183 | return (size + 7) >> 3; | |
12184 | } | |
12185 | \f | |
12186 | /* Use this to flush pending int fields. */ | |
12187 | ||
12188 | static void | |
12189 | rs6000_darwin64_record_arg_advance_flush (CUMULATIVE_ARGS *cum, | |
12190 | HOST_WIDE_INT bitpos, int final) | |
12191 | { | |
12192 | unsigned int startbit, endbit; | |
12193 | int intregs, intoffset; | |
83349046 SB |
12194 | |
12195 | /* Handle the situations where a float is taking up the first half | |
12196 | of the GPR, and the other half is empty (typically due to | |
12197 | alignment restrictions). We can detect this by a 8-byte-aligned | |
12198 | int field, or by seeing that this is the final flush for this | |
12199 | argument. Count the word and continue on. */ | |
12200 | if (cum->floats_in_gpr == 1 | |
12201 | && (cum->intoffset % 64 == 0 | |
12202 | || (cum->intoffset == -1 && final))) | |
12203 | { | |
12204 | cum->words++; | |
12205 | cum->floats_in_gpr = 0; | |
12206 | } | |
12207 | ||
12208 | if (cum->intoffset == -1) | |
12209 | return; | |
12210 | ||
12211 | intoffset = cum->intoffset; | |
12212 | cum->intoffset = -1; | |
12213 | cum->floats_in_gpr = 0; | |
12214 | ||
12215 | if (intoffset % BITS_PER_WORD != 0) | |
12216 | { | |
f4b31647 RS |
12217 | unsigned int bits = BITS_PER_WORD - intoffset % BITS_PER_WORD; |
12218 | if (!int_mode_for_size (bits, 0).exists ()) | |
83349046 SB |
12219 | { |
12220 | /* We couldn't find an appropriate mode, which happens, | |
12221 | e.g., in packed structs when there are 3 bytes to load. | |
12222 | Back intoffset back to the beginning of the word in this | |
12223 | case. */ | |
12224 | intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD); | |
12225 | } | |
12226 | } | |
12227 | ||
12228 | startbit = ROUND_DOWN (intoffset, BITS_PER_WORD); | |
12229 | endbit = ROUND_UP (bitpos, BITS_PER_WORD); | |
12230 | intregs = (endbit - startbit) / BITS_PER_WORD; | |
12231 | cum->words += intregs; | |
12232 | /* words should be unsigned. */ | |
12233 | if ((unsigned)cum->words < (endbit/BITS_PER_WORD)) | |
12234 | { | |
12235 | int pad = (endbit/BITS_PER_WORD) - cum->words; | |
12236 | cum->words += pad; | |
12237 | } | |
12238 | } | |
12239 | ||
12240 | /* The darwin64 ABI calls for us to recurse down through structs, | |
12241 | looking for elements passed in registers. Unfortunately, we have | |
12242 | to track int register count here also because of misalignments | |
12243 | in powerpc alignment mode. */ | |
12244 | ||
12245 | static void | |
12246 | rs6000_darwin64_record_arg_advance_recurse (CUMULATIVE_ARGS *cum, | |
12247 | const_tree type, | |
12248 | HOST_WIDE_INT startbitpos) | |
12249 | { | |
12250 | tree f; | |
12251 | ||
12252 | for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f)) | |
12253 | if (TREE_CODE (f) == FIELD_DECL) | |
12254 | { | |
12255 | HOST_WIDE_INT bitpos = startbitpos; | |
12256 | tree ftype = TREE_TYPE (f); | |
12257 | machine_mode mode; | |
12258 | if (ftype == error_mark_node) | |
12259 | continue; | |
12260 | mode = TYPE_MODE (ftype); | |
12261 | ||
12262 | if (DECL_SIZE (f) != 0 | |
12263 | && tree_fits_uhwi_p (bit_position (f))) | |
12264 | bitpos += int_bit_position (f); | |
12265 | ||
12266 | /* ??? FIXME: else assume zero offset. */ | |
12267 | ||
12268 | if (TREE_CODE (ftype) == RECORD_TYPE) | |
12269 | rs6000_darwin64_record_arg_advance_recurse (cum, ftype, bitpos); | |
12270 | else if (USE_FP_FOR_ARG_P (cum, mode)) | |
12271 | { | |
12272 | unsigned n_fpregs = (GET_MODE_SIZE (mode) + 7) >> 3; | |
12273 | rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0); | |
12274 | cum->fregno += n_fpregs; | |
12275 | /* Single-precision floats present a special problem for | |
12276 | us, because they are smaller than an 8-byte GPR, and so | |
12277 | the structure-packing rules combined with the standard | |
12278 | varargs behavior mean that we want to pack float/float | |
12279 | and float/int combinations into a single register's | |
12280 | space. This is complicated by the arg advance flushing, | |
12281 | which works on arbitrarily large groups of int-type | |
12282 | fields. */ | |
12283 | if (mode == SFmode) | |
12284 | { | |
12285 | if (cum->floats_in_gpr == 1) | |
12286 | { | |
12287 | /* Two floats in a word; count the word and reset | |
12288 | the float count. */ | |
12289 | cum->words++; | |
12290 | cum->floats_in_gpr = 0; | |
12291 | } | |
12292 | else if (bitpos % 64 == 0) | |
12293 | { | |
12294 | /* A float at the beginning of an 8-byte word; | |
12295 | count it and put off adjusting cum->words until | |
12296 | we see if a arg advance flush is going to do it | |
12297 | for us. */ | |
12298 | cum->floats_in_gpr++; | |
12299 | } | |
12300 | else | |
12301 | { | |
12302 | /* The float is at the end of a word, preceded | |
12303 | by integer fields, so the arg advance flush | |
12304 | just above has already set cum->words and | |
12305 | everything is taken care of. */ | |
12306 | } | |
12307 | } | |
12308 | else | |
12309 | cum->words += n_fpregs; | |
12310 | } | |
12311 | else if (USE_ALTIVEC_FOR_ARG_P (cum, mode, 1)) | |
12312 | { | |
12313 | rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0); | |
12314 | cum->vregno++; | |
12315 | cum->words += 2; | |
12316 | } | |
12317 | else if (cum->intoffset == -1) | |
12318 | cum->intoffset = bitpos; | |
12319 | } | |
12320 | } | |
12321 | ||
12322 | /* Check for an item that needs to be considered specially under the darwin 64 | |
12323 | bit ABI. These are record types where the mode is BLK or the structure is | |
12324 | 8 bytes in size. */ | |
12325 | static int | |
12326 | rs6000_darwin64_struct_check_p (machine_mode mode, const_tree type) | |
12327 | { | |
12328 | return rs6000_darwin64_abi | |
12329 | && ((mode == BLKmode | |
12330 | && TREE_CODE (type) == RECORD_TYPE | |
12331 | && int_size_in_bytes (type) > 0) | |
12332 | || (type && TREE_CODE (type) == RECORD_TYPE | |
12333 | && int_size_in_bytes (type) == 8)) ? 1 : 0; | |
12334 | } | |
12335 | ||
12336 | /* Update the data in CUM to advance over an argument | |
12337 | of mode MODE and data type TYPE. | |
12338 | (TYPE is null for libcalls where that information may not be available.) | |
12339 | ||
12340 | Note that for args passed by reference, function_arg will be called | |
12341 | with MODE and TYPE set to that of the pointer to the arg, not the arg | |
12342 | itself. */ | |
12343 | ||
12344 | static void | |
12345 | rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, machine_mode mode, | |
12346 | const_tree type, bool named, int depth) | |
12347 | { | |
12348 | machine_mode elt_mode; | |
12349 | int n_elts; | |
12350 | ||
12351 | rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts); | |
12352 | ||
12353 | /* Only tick off an argument if we're not recursing. */ | |
12354 | if (depth == 0) | |
12355 | cum->nargs_prototype--; | |
12356 | ||
12357 | #ifdef HAVE_AS_GNU_ATTRIBUTE | |
12358 | if (TARGET_ELF && (TARGET_64BIT || DEFAULT_ABI == ABI_V4) | |
12359 | && cum->escapes) | |
12360 | { | |
12361 | if (SCALAR_FLOAT_MODE_P (mode)) | |
12362 | { | |
12363 | rs6000_passes_float = true; | |
12364 | if ((HAVE_LD_PPC_GNU_ATTR_LONG_DOUBLE || TARGET_64BIT) | |
12365 | && (FLOAT128_IBM_P (mode) | |
12366 | || FLOAT128_IEEE_P (mode) | |
12367 | || (type != NULL | |
12368 | && TYPE_MAIN_VARIANT (type) == long_double_type_node))) | |
12369 | rs6000_passes_long_double = true; | |
12370 | } | |
12371 | if ((named && ALTIVEC_OR_VSX_VECTOR_MODE (mode)) | |
12372 | || (SPE_VECTOR_MODE (mode) | |
12373 | && !cum->stdarg | |
12374 | && cum->sysv_gregno <= GP_ARG_MAX_REG)) | |
12375 | rs6000_passes_vector = true; | |
12376 | } | |
12377 | #endif | |
12378 | ||
12379 | if (TARGET_ALTIVEC_ABI | |
12380 | && (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode) | |
12381 | || (type && TREE_CODE (type) == VECTOR_TYPE | |
12382 | && int_size_in_bytes (type) == 16))) | |
12383 | { | |
12384 | bool stack = false; | |
12385 | ||
12386 | if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named)) | |
12387 | { | |
12388 | cum->vregno += n_elts; | |
12389 | ||
12390 | if (!TARGET_ALTIVEC) | |
12391 | error ("cannot pass argument in vector register because" | |
12392 | " altivec instructions are disabled, use -maltivec" | |
12393 | " to enable them"); | |
12394 | ||
12395 | /* PowerPC64 Linux and AIX allocate GPRs for a vector argument | |
12396 | even if it is going to be passed in a vector register. | |
12397 | Darwin does the same for variable-argument functions. */ | |
12398 | if (((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) | |
12399 | && TARGET_64BIT) | |
12400 | || (cum->stdarg && DEFAULT_ABI != ABI_V4)) | |
12401 | stack = true; | |
12402 | } | |
12403 | else | |
12404 | stack = true; | |
12405 | ||
12406 | if (stack) | |
12407 | { | |
12408 | int align; | |
12409 | ||
12410 | /* Vector parameters must be 16-byte aligned. In 32-bit | |
12411 | mode this means we need to take into account the offset | |
12412 | to the parameter save area. In 64-bit mode, they just | |
12413 | have to start on an even word, since the parameter save | |
12414 | area is 16-byte aligned. */ | |
12415 | if (TARGET_32BIT) | |
12416 | align = -(rs6000_parm_offset () + cum->words) & 3; | |
12417 | else | |
12418 | align = cum->words & 1; | |
12419 | cum->words += align + rs6000_arg_size (mode, type); | |
12420 | ||
12421 | if (TARGET_DEBUG_ARG) | |
12422 | { | |
12423 | fprintf (stderr, "function_adv: words = %2d, align=%d, ", | |
12424 | cum->words, align); | |
12425 | fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s\n", | |
12426 | cum->nargs_prototype, cum->prototype, | |
12427 | GET_MODE_NAME (mode)); | |
12428 | } | |
12429 | } | |
12430 | } | |
12431 | else if (TARGET_SPE_ABI && TARGET_SPE && SPE_VECTOR_MODE (mode) | |
12432 | && !cum->stdarg | |
12433 | && cum->sysv_gregno <= GP_ARG_MAX_REG) | |
12434 | cum->sysv_gregno++; | |
12435 | ||
12436 | else if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type)) | |
12437 | { | |
12438 | int size = int_size_in_bytes (type); | |
12439 | /* Variable sized types have size == -1 and are | |
12440 | treated as if consisting entirely of ints. | |
12441 | Pad to 16 byte boundary if needed. */ | |
12442 | if (TYPE_ALIGN (type) >= 2 * BITS_PER_WORD | |
12443 | && (cum->words % 2) != 0) | |
12444 | cum->words++; | |
12445 | /* For varargs, we can just go up by the size of the struct. */ | |
12446 | if (!named) | |
12447 | cum->words += (size + 7) / 8; | |
12448 | else | |
12449 | { | |
12450 | /* It is tempting to say int register count just goes up by | |
12451 | sizeof(type)/8, but this is wrong in a case such as | |
12452 | { int; double; int; } [powerpc alignment]. We have to | |
12453 | grovel through the fields for these too. */ | |
12454 | cum->intoffset = 0; | |
12455 | cum->floats_in_gpr = 0; | |
12456 | rs6000_darwin64_record_arg_advance_recurse (cum, type, 0); | |
12457 | rs6000_darwin64_record_arg_advance_flush (cum, | |
12458 | size * BITS_PER_UNIT, 1); | |
12459 | } | |
12460 | if (TARGET_DEBUG_ARG) | |
12461 | { | |
12462 | fprintf (stderr, "function_adv: words = %2d, align=%d, size=%d", | |
12463 | cum->words, TYPE_ALIGN (type), size); | |
12464 | fprintf (stderr, | |
12465 | "nargs = %4d, proto = %d, mode = %4s (darwin64 abi)\n", | |
12466 | cum->nargs_prototype, cum->prototype, | |
12467 | GET_MODE_NAME (mode)); | |
12468 | } | |
12469 | } | |
12470 | else if (DEFAULT_ABI == ABI_V4) | |
12471 | { | |
12472 | if (abi_v4_pass_in_fpr (mode)) | |
12473 | { | |
12474 | /* _Decimal128 must use an even/odd register pair. This assumes | |
12475 | that the register number is odd when fregno is odd. */ | |
12476 | if (mode == TDmode && (cum->fregno % 2) == 1) | |
12477 | cum->fregno++; | |
12478 | ||
12479 | if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0) | |
12480 | <= FP_ARG_V4_MAX_REG) | |
12481 | cum->fregno += (GET_MODE_SIZE (mode) + 7) >> 3; | |
12482 | else | |
12483 | { | |
12484 | cum->fregno = FP_ARG_V4_MAX_REG + 1; | |
12485 | if (mode == DFmode || FLOAT128_IBM_P (mode) | |
12486 | || mode == DDmode || mode == TDmode) | |
12487 | cum->words += cum->words & 1; | |
12488 | cum->words += rs6000_arg_size (mode, type); | |
12489 | } | |
12490 | } | |
12491 | else | |
12492 | { | |
12493 | int n_words = rs6000_arg_size (mode, type); | |
12494 | int gregno = cum->sysv_gregno; | |
12495 | ||
12496 | /* Long long and SPE vectors are put in (r3,r4), (r5,r6), | |
12497 | (r7,r8) or (r9,r10). As does any other 2 word item such | |
12498 | as complex int due to a historical mistake. */ | |
12499 | if (n_words == 2) | |
12500 | gregno += (1 - gregno) & 1; | |
12501 | ||
12502 | /* Multi-reg args are not split between registers and stack. */ | |
12503 | if (gregno + n_words - 1 > GP_ARG_MAX_REG) | |
12504 | { | |
12505 | /* Long long and SPE vectors are aligned on the stack. | |
12506 | So are other 2 word items such as complex int due to | |
12507 | a historical mistake. */ | |
12508 | if (n_words == 2) | |
12509 | cum->words += cum->words & 1; | |
12510 | cum->words += n_words; | |
12511 | } | |
12512 | ||
12513 | /* Note: continuing to accumulate gregno past when we've started | |
12514 | spilling to the stack indicates the fact that we've started | |
12515 | spilling to the stack to expand_builtin_saveregs. */ | |
12516 | cum->sysv_gregno = gregno + n_words; | |
12517 | } | |
12518 | ||
12519 | if (TARGET_DEBUG_ARG) | |
12520 | { | |
12521 | fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ", | |
12522 | cum->words, cum->fregno); | |
12523 | fprintf (stderr, "gregno = %2d, nargs = %4d, proto = %d, ", | |
12524 | cum->sysv_gregno, cum->nargs_prototype, cum->prototype); | |
12525 | fprintf (stderr, "mode = %4s, named = %d\n", | |
12526 | GET_MODE_NAME (mode), named); | |
12527 | } | |
12528 | } | |
12529 | else | |
12530 | { | |
12531 | int n_words = rs6000_arg_size (mode, type); | |
12532 | int start_words = cum->words; | |
12533 | int align_words = rs6000_parm_start (mode, type, start_words); | |
12534 | ||
12535 | cum->words = align_words + n_words; | |
12536 | ||
12537 | if (SCALAR_FLOAT_MODE_P (elt_mode) && TARGET_HARD_FLOAT && TARGET_FPRS) | |
12538 | { | |
12539 | /* _Decimal128 must be passed in an even/odd float register pair. | |
12540 | This assumes that the register number is odd when fregno is | |
12541 | odd. */ | |
12542 | if (elt_mode == TDmode && (cum->fregno % 2) == 1) | |
12543 | cum->fregno++; | |
12544 | cum->fregno += n_elts * ((GET_MODE_SIZE (elt_mode) + 7) >> 3); | |
12545 | } | |
12546 | ||
12547 | if (TARGET_DEBUG_ARG) | |
12548 | { | |
12549 | fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ", | |
12550 | cum->words, cum->fregno); | |
12551 | fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s, ", | |
12552 | cum->nargs_prototype, cum->prototype, GET_MODE_NAME (mode)); | |
12553 | fprintf (stderr, "named = %d, align = %d, depth = %d\n", | |
12554 | named, align_words - start_words, depth); | |
12555 | } | |
12556 | } | |
12557 | } | |
12558 | ||
12559 | static void | |
12560 | rs6000_function_arg_advance (cumulative_args_t cum, machine_mode mode, | |
12561 | const_tree type, bool named) | |
12562 | { | |
12563 | rs6000_function_arg_advance_1 (get_cumulative_args (cum), mode, type, named, | |
12564 | 0); | |
12565 | } | |
12566 | ||
12567 | static rtx | |
12568 | spe_build_register_parallel (machine_mode mode, int gregno) | |
12569 | { | |
12570 | rtx r1, r3, r5, r7; | |
12571 | ||
12572 | switch (mode) | |
12573 | { | |
4e10a5a7 | 12574 | case E_DFmode: |
83349046 SB |
12575 | r1 = gen_rtx_REG (DImode, gregno); |
12576 | r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx); | |
12577 | return gen_rtx_PARALLEL (mode, gen_rtvec (1, r1)); | |
12578 | ||
4e10a5a7 RS |
12579 | case E_DCmode: |
12580 | case E_TFmode: | |
83349046 SB |
12581 | r1 = gen_rtx_REG (DImode, gregno); |
12582 | r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx); | |
12583 | r3 = gen_rtx_REG (DImode, gregno + 2); | |
12584 | r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8)); | |
12585 | return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r3)); | |
12586 | ||
4e10a5a7 | 12587 | case E_TCmode: |
83349046 SB |
12588 | r1 = gen_rtx_REG (DImode, gregno); |
12589 | r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx); | |
12590 | r3 = gen_rtx_REG (DImode, gregno + 2); | |
12591 | r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8)); | |
12592 | r5 = gen_rtx_REG (DImode, gregno + 4); | |
12593 | r5 = gen_rtx_EXPR_LIST (VOIDmode, r5, GEN_INT (16)); | |
12594 | r7 = gen_rtx_REG (DImode, gregno + 6); | |
12595 | r7 = gen_rtx_EXPR_LIST (VOIDmode, r7, GEN_INT (24)); | |
12596 | return gen_rtx_PARALLEL (mode, gen_rtvec (4, r1, r3, r5, r7)); | |
12597 | ||
12598 | default: | |
12599 | gcc_unreachable (); | |
12600 | } | |
12601 | } | |
12602 | ||
12603 | /* Determine where to put a SIMD argument on the SPE. */ | |
12604 | static rtx | |
12605 | rs6000_spe_function_arg (const CUMULATIVE_ARGS *cum, machine_mode mode, | |
12606 | const_tree type) | |
12607 | { | |
12608 | int gregno = cum->sysv_gregno; | |
12609 | ||
12610 | /* On E500 v2, double arithmetic is done on the full 64-bit GPR, but | |
12611 | are passed and returned in a pair of GPRs for ABI compatibility. */ | |
12612 | if (TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode | |
12613 | || mode == DCmode || mode == TCmode)) | |
12614 | { | |
12615 | int n_words = rs6000_arg_size (mode, type); | |
12616 | ||
12617 | /* Doubles go in an odd/even register pair (r5/r6, etc). */ | |
12618 | if (mode == DFmode) | |
12619 | gregno += (1 - gregno) & 1; | |
12620 | ||
12621 | /* Multi-reg args are not split between registers and stack. */ | |
12622 | if (gregno + n_words - 1 > GP_ARG_MAX_REG) | |
12623 | return NULL_RTX; | |
12624 | ||
12625 | return spe_build_register_parallel (mode, gregno); | |
12626 | } | |
12627 | if (cum->stdarg) | |
12628 | { | |
12629 | int n_words = rs6000_arg_size (mode, type); | |
12630 | ||
12631 | /* SPE vectors are put in odd registers. */ | |
12632 | if (n_words == 2 && (gregno & 1) == 0) | |
12633 | gregno += 1; | |
12634 | ||
12635 | if (gregno + n_words - 1 <= GP_ARG_MAX_REG) | |
12636 | { | |
12637 | rtx r1, r2; | |
12638 | machine_mode m = SImode; | |
12639 | ||
12640 | r1 = gen_rtx_REG (m, gregno); | |
12641 | r1 = gen_rtx_EXPR_LIST (m, r1, const0_rtx); | |
12642 | r2 = gen_rtx_REG (m, gregno + 1); | |
12643 | r2 = gen_rtx_EXPR_LIST (m, r2, GEN_INT (4)); | |
12644 | return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2)); | |
12645 | } | |
12646 | else | |
12647 | return NULL_RTX; | |
12648 | } | |
12649 | else | |
12650 | { | |
12651 | if (gregno <= GP_ARG_MAX_REG) | |
12652 | return gen_rtx_REG (mode, gregno); | |
12653 | else | |
12654 | return NULL_RTX; | |
12655 | } | |
12656 | } | |
12657 | ||
12658 | /* A subroutine of rs6000_darwin64_record_arg. Assign the bits of the | |
12659 | structure between cum->intoffset and bitpos to integer registers. */ | |
12660 | ||
12661 | static void | |
12662 | rs6000_darwin64_record_arg_flush (CUMULATIVE_ARGS *cum, | |
12663 | HOST_WIDE_INT bitpos, rtx rvec[], int *k) | |
12664 | { | |
12665 | machine_mode mode; | |
12666 | unsigned int regno; | |
12667 | unsigned int startbit, endbit; | |
12668 | int this_regno, intregs, intoffset; | |
12669 | rtx reg; | |
12670 | ||
12671 | if (cum->intoffset == -1) | |
12672 | return; | |
12673 | ||
12674 | intoffset = cum->intoffset; | |
12675 | cum->intoffset = -1; | |
12676 | ||
12677 | /* If this is the trailing part of a word, try to only load that | |
12678 | much into the register. Otherwise load the whole register. Note | |
12679 | that in the latter case we may pick up unwanted bits. It's not a | |
12680 | problem at the moment but may wish to revisit. */ | |
12681 | ||
12682 | if (intoffset % BITS_PER_WORD != 0) | |
12683 | { | |
f4b31647 RS |
12684 | unsigned int bits = BITS_PER_WORD - intoffset % BITS_PER_WORD; |
12685 | if (!int_mode_for_size (bits, 0).exists (&mode)) | |
83349046 SB |
12686 | { |
12687 | /* We couldn't find an appropriate mode, which happens, | |
12688 | e.g., in packed structs when there are 3 bytes to load. | |
12689 | Back intoffset back to the beginning of the word in this | |
12690 | case. */ | |
12691 | intoffset = ROUND_DOWN (intoffset, BITS_PER_WORD); | |
12692 | mode = word_mode; | |
12693 | } | |
12694 | } | |
12695 | else | |
12696 | mode = word_mode; | |
12697 | ||
12698 | startbit = ROUND_DOWN (intoffset, BITS_PER_WORD); | |
12699 | endbit = ROUND_UP (bitpos, BITS_PER_WORD); | |
12700 | intregs = (endbit - startbit) / BITS_PER_WORD; | |
12701 | this_regno = cum->words + intoffset / BITS_PER_WORD; | |
12702 | ||
12703 | if (intregs > 0 && intregs > GP_ARG_NUM_REG - this_regno) | |
12704 | cum->use_stack = 1; | |
12705 | ||
12706 | intregs = MIN (intregs, GP_ARG_NUM_REG - this_regno); | |
12707 | if (intregs <= 0) | |
12708 | return; | |
12709 | ||
12710 | intoffset /= BITS_PER_UNIT; | |
12711 | do | |
12712 | { | |
12713 | regno = GP_ARG_MIN_REG + this_regno; | |
12714 | reg = gen_rtx_REG (mode, regno); | |
12715 | rvec[(*k)++] = | |
12716 | gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset)); | |
12717 | ||
12718 | this_regno += 1; | |
12719 | intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1; | |
12720 | mode = word_mode; | |
12721 | intregs -= 1; | |
12722 | } | |
12723 | while (intregs > 0); | |
12724 | } | |
12725 | ||
12726 | /* Recursive workhorse for the following. */ | |
12727 | ||
12728 | static void | |
12729 | rs6000_darwin64_record_arg_recurse (CUMULATIVE_ARGS *cum, const_tree type, | |
12730 | HOST_WIDE_INT startbitpos, rtx rvec[], | |
12731 | int *k) | |
12732 | { | |
12733 | tree f; | |
12734 | ||
12735 | for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f)) | |
12736 | if (TREE_CODE (f) == FIELD_DECL) | |
12737 | { | |
12738 | HOST_WIDE_INT bitpos = startbitpos; | |
12739 | tree ftype = TREE_TYPE (f); | |
12740 | machine_mode mode; | |
12741 | if (ftype == error_mark_node) | |
12742 | continue; | |
12743 | mode = TYPE_MODE (ftype); | |
12744 | ||
12745 | if (DECL_SIZE (f) != 0 | |
12746 | && tree_fits_uhwi_p (bit_position (f))) | |
12747 | bitpos += int_bit_position (f); | |
12748 | ||
12749 | /* ??? FIXME: else assume zero offset. */ | |
12750 | ||
12751 | if (TREE_CODE (ftype) == RECORD_TYPE) | |
12752 | rs6000_darwin64_record_arg_recurse (cum, ftype, bitpos, rvec, k); | |
12753 | else if (cum->named && USE_FP_FOR_ARG_P (cum, mode)) | |
12754 | { | |
12755 | unsigned n_fpreg = (GET_MODE_SIZE (mode) + 7) >> 3; | |
12756 | #if 0 | |
12757 | switch (mode) | |
12758 | { | |
4e10a5a7 RS |
12759 | case E_SCmode: mode = SFmode; break; |
12760 | case E_DCmode: mode = DFmode; break; | |
12761 | case E_TCmode: mode = TFmode; break; | |
83349046 SB |
12762 | default: break; |
12763 | } | |
12764 | #endif | |
12765 | rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k); | |
12766 | if (cum->fregno + n_fpreg > FP_ARG_MAX_REG + 1) | |
12767 | { | |
12768 | gcc_assert (cum->fregno == FP_ARG_MAX_REG | |
12769 | && (mode == TFmode || mode == TDmode)); | |
12770 | /* Long double or _Decimal128 split over regs and memory. */ | |
12771 | mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode : DFmode; | |
12772 | cum->use_stack=1; | |
12773 | } | |
12774 | rvec[(*k)++] | |
12775 | = gen_rtx_EXPR_LIST (VOIDmode, | |
12776 | gen_rtx_REG (mode, cum->fregno++), | |
12777 | GEN_INT (bitpos / BITS_PER_UNIT)); | |
12778 | if (FLOAT128_2REG_P (mode)) | |
12779 | cum->fregno++; | |
12780 | } | |
12781 | else if (cum->named && USE_ALTIVEC_FOR_ARG_P (cum, mode, 1)) | |
12782 | { | |
12783 | rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k); | |
12784 | rvec[(*k)++] | |
12785 | = gen_rtx_EXPR_LIST (VOIDmode, | |
12786 | gen_rtx_REG (mode, cum->vregno++), | |
12787 | GEN_INT (bitpos / BITS_PER_UNIT)); | |
12788 | } | |
12789 | else if (cum->intoffset == -1) | |
12790 | cum->intoffset = bitpos; | |
12791 | } | |
12792 | } | |
12793 | ||
12794 | /* For the darwin64 ABI, we want to construct a PARALLEL consisting of | |
12795 | the register(s) to be used for each field and subfield of a struct | |
12796 | being passed by value, along with the offset of where the | |
12797 | register's value may be found in the block. FP fields go in FP | |
12798 | register, vector fields go in vector registers, and everything | |
12799 | else goes in int registers, packed as in memory. | |
12800 | ||
12801 | This code is also used for function return values. RETVAL indicates | |
12802 | whether this is the case. | |
12803 | ||
12804 | Much of this is taken from the SPARC V9 port, which has a similar | |
12805 | calling convention. */ | |
12806 | ||
12807 | static rtx | |
12808 | rs6000_darwin64_record_arg (CUMULATIVE_ARGS *orig_cum, const_tree type, | |
12809 | bool named, bool retval) | |
12810 | { | |
12811 | rtx rvec[FIRST_PSEUDO_REGISTER]; | |
12812 | int k = 1, kbase = 1; | |
12813 | HOST_WIDE_INT typesize = int_size_in_bytes (type); | |
12814 | /* This is a copy; modifications are not visible to our caller. */ | |
12815 | CUMULATIVE_ARGS copy_cum = *orig_cum; | |
12816 | CUMULATIVE_ARGS *cum = ©_cum; | |
12817 | ||
12818 | /* Pad to 16 byte boundary if needed. */ | |
12819 | if (!retval && TYPE_ALIGN (type) >= 2 * BITS_PER_WORD | |
12820 | && (cum->words % 2) != 0) | |
12821 | cum->words++; | |
12822 | ||
12823 | cum->intoffset = 0; | |
12824 | cum->use_stack = 0; | |
12825 | cum->named = named; | |
12826 | ||
12827 | /* Put entries into rvec[] for individual FP and vector fields, and | |
12828 | for the chunks of memory that go in int regs. Note we start at | |
12829 | element 1; 0 is reserved for an indication of using memory, and | |
12830 | may or may not be filled in below. */ | |
12831 | rs6000_darwin64_record_arg_recurse (cum, type, /* startbit pos= */ 0, rvec, &k); | |
12832 | rs6000_darwin64_record_arg_flush (cum, typesize * BITS_PER_UNIT, rvec, &k); | |
12833 | ||
12834 | /* If any part of the struct went on the stack put all of it there. | |
12835 | This hack is because the generic code for | |
12836 | FUNCTION_ARG_PARTIAL_NREGS cannot handle cases where the register | |
12837 | parts of the struct are not at the beginning. */ | |
12838 | if (cum->use_stack) | |
12839 | { | |
12840 | if (retval) | |
12841 | return NULL_RTX; /* doesn't go in registers at all */ | |
12842 | kbase = 0; | |
12843 | rvec[0] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx); | |
12844 | } | |
12845 | if (k > 1 || cum->use_stack) | |
12846 | return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (k - kbase, &rvec[kbase])); | |
12847 | else | |
12848 | return NULL_RTX; | |
12849 | } | |
12850 | ||
12851 | /* Determine where to place an argument in 64-bit mode with 32-bit ABI. */ | |
12852 | ||
12853 | static rtx | |
12854 | rs6000_mixed_function_arg (machine_mode mode, const_tree type, | |
12855 | int align_words) | |
12856 | { | |
12857 | int n_units; | |
12858 | int i, k; | |
12859 | rtx rvec[GP_ARG_NUM_REG + 1]; | |
12860 | ||
12861 | if (align_words >= GP_ARG_NUM_REG) | |
12862 | return NULL_RTX; | |
12863 | ||
12864 | n_units = rs6000_arg_size (mode, type); | |
12865 | ||
12866 | /* Optimize the simple case where the arg fits in one gpr, except in | |
12867 | the case of BLKmode due to assign_parms assuming that registers are | |
12868 | BITS_PER_WORD wide. */ | |
12869 | if (n_units == 0 | |
12870 | || (n_units == 1 && mode != BLKmode)) | |
12871 | return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words); | |
12872 | ||
12873 | k = 0; | |
12874 | if (align_words + n_units > GP_ARG_NUM_REG) | |
12875 | /* Not all of the arg fits in gprs. Say that it goes in memory too, | |
12876 | using a magic NULL_RTX component. | |
12877 | This is not strictly correct. Only some of the arg belongs in | |
12878 | memory, not all of it. However, the normal scheme using | |
12879 | function_arg_partial_nregs can result in unusual subregs, eg. | |
12880 | (subreg:SI (reg:DF) 4), which are not handled well. The code to | |
12881 | store the whole arg to memory is often more efficient than code | |
12882 | to store pieces, and we know that space is available in the right | |
12883 | place for the whole arg. */ | |
12884 | rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx); | |
12885 | ||
12886 | i = 0; | |
12887 | do | |
12888 | { | |
12889 | rtx r = gen_rtx_REG (SImode, GP_ARG_MIN_REG + align_words); | |
12890 | rtx off = GEN_INT (i++ * 4); | |
12891 | rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off); | |
12892 | } | |
12893 | while (++align_words < GP_ARG_NUM_REG && --n_units != 0); | |
12894 | ||
12895 | return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec)); | |
12896 | } | |
12897 | ||
12898 | /* We have an argument of MODE and TYPE that goes into FPRs or VRs, | |
12899 | but must also be copied into the parameter save area starting at | |
12900 | offset ALIGN_WORDS. Fill in RVEC with the elements corresponding | |
12901 | to the GPRs and/or memory. Return the number of elements used. */ | |
12902 | ||
12903 | static int | |
12904 | rs6000_psave_function_arg (machine_mode mode, const_tree type, | |
12905 | int align_words, rtx *rvec) | |
12906 | { | |
12907 | int k = 0; | |
12908 | ||
12909 | if (align_words < GP_ARG_NUM_REG) | |
12910 | { | |
12911 | int n_words = rs6000_arg_size (mode, type); | |
12912 | ||
12913 | if (align_words + n_words > GP_ARG_NUM_REG | |
12914 | || mode == BLKmode | |
12915 | || (TARGET_32BIT && TARGET_POWERPC64)) | |
12916 | { | |
12917 | /* If this is partially on the stack, then we only | |
12918 | include the portion actually in registers here. */ | |
12919 | machine_mode rmode = TARGET_32BIT ? SImode : DImode; | |
12920 | int i = 0; | |
12921 | ||
12922 | if (align_words + n_words > GP_ARG_NUM_REG) | |
12923 | { | |
12924 | /* Not all of the arg fits in gprs. Say that it goes in memory | |
12925 | too, using a magic NULL_RTX component. Also see comment in | |
12926 | rs6000_mixed_function_arg for why the normal | |
12927 | function_arg_partial_nregs scheme doesn't work in this case. */ | |
12928 | rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx); | |
12929 | } | |
12930 | ||
12931 | do | |
12932 | { | |
12933 | rtx r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words); | |
12934 | rtx off = GEN_INT (i++ * GET_MODE_SIZE (rmode)); | |
12935 | rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off); | |
12936 | } | |
12937 | while (++align_words < GP_ARG_NUM_REG && --n_words != 0); | |
12938 | } | |
12939 | else | |
12940 | { | |
12941 | /* The whole arg fits in gprs. */ | |
12942 | rtx r = gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words); | |
12943 | rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, const0_rtx); | |
12944 | } | |
12945 | } | |
12946 | else | |
12947 | { | |
12948 | /* It's entirely in memory. */ | |
12949 | rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx); | |
12950 | } | |
12951 | ||
12952 | return k; | |
12953 | } | |
12954 | ||
12955 | /* RVEC is a vector of K components of an argument of mode MODE. | |
12956 | Construct the final function_arg return value from it. */ | |
12957 | ||
12958 | static rtx | |
12959 | rs6000_finish_function_arg (machine_mode mode, rtx *rvec, int k) | |
12960 | { | |
12961 | gcc_assert (k >= 1); | |
12962 | ||
12963 | /* Avoid returning a PARALLEL in the trivial cases. */ | |
12964 | if (k == 1) | |
12965 | { | |
12966 | if (XEXP (rvec[0], 0) == NULL_RTX) | |
12967 | return NULL_RTX; | |
12968 | ||
12969 | if (GET_MODE (XEXP (rvec[0], 0)) == mode) | |
12970 | return XEXP (rvec[0], 0); | |
12971 | } | |
12972 | ||
12973 | return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec)); | |
12974 | } | |
12975 | ||
12976 | /* Determine where to put an argument to a function. | |
12977 | Value is zero to push the argument on the stack, | |
12978 | or a hard register in which to store the argument. | |
12979 | ||
12980 | MODE is the argument's machine mode. | |
12981 | TYPE is the data type of the argument (as a tree). | |
12982 | This is null for libcalls where that information may | |
12983 | not be available. | |
12984 | CUM is a variable of type CUMULATIVE_ARGS which gives info about | |
12985 | the preceding args and about the function being called. It is | |
12986 | not modified in this routine. | |
12987 | NAMED is nonzero if this argument is a named parameter | |
12988 | (otherwise it is an extra parameter matching an ellipsis). | |
12989 | ||
12990 | On RS/6000 the first eight words of non-FP are normally in registers | |
12991 | and the rest are pushed. Under AIX, the first 13 FP args are in registers. | |
12992 | Under V.4, the first 8 FP args are in registers. | |
12993 | ||
12994 | If this is floating-point and no prototype is specified, we use | |
12995 | both an FP and integer register (or possibly FP reg and stack). Library | |
12996 | functions (when CALL_LIBCALL is set) always have the proper types for args, | |
12997 | so we can pass the FP value just in one register. emit_library_function | |
12998 | doesn't support PARALLEL anyway. | |
12999 | ||
13000 | Note that for args passed by reference, function_arg will be called | |
13001 | with MODE and TYPE set to that of the pointer to the arg, not the arg | |
13002 | itself. */ | |
13003 | ||
13004 | static rtx | |
13005 | rs6000_function_arg (cumulative_args_t cum_v, machine_mode mode, | |
13006 | const_tree type, bool named) | |
13007 | { | |
13008 | CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); | |
13009 | enum rs6000_abi abi = DEFAULT_ABI; | |
13010 | machine_mode elt_mode; | |
13011 | int n_elts; | |
13012 | ||
13013 | /* Return a marker to indicate whether CR1 needs to set or clear the | |
13014 | bit that V.4 uses to say fp args were passed in registers. | |
13015 | Assume that we don't need the marker for software floating point, | |
13016 | or compiler generated library calls. */ | |
13017 | if (mode == VOIDmode) | |
13018 | { | |
13019 | if (abi == ABI_V4 | |
13020 | && (cum->call_cookie & CALL_LIBCALL) == 0 | |
13021 | && (cum->stdarg | |
13022 | || (cum->nargs_prototype < 0 | |
13023 | && (cum->prototype || TARGET_NO_PROTOTYPE)))) | |
13024 | { | |
13025 | /* For the SPE, we need to crxor CR6 always. */ | |
13026 | if (TARGET_SPE_ABI) | |
13027 | return GEN_INT (cum->call_cookie | CALL_V4_SET_FP_ARGS); | |
13028 | else if (TARGET_HARD_FLOAT && TARGET_FPRS) | |
13029 | return GEN_INT (cum->call_cookie | |
13030 | | ((cum->fregno == FP_ARG_MIN_REG) | |
13031 | ? CALL_V4_SET_FP_ARGS | |
13032 | : CALL_V4_CLEAR_FP_ARGS)); | |
13033 | } | |
13034 | ||
13035 | return GEN_INT (cum->call_cookie & ~CALL_LIBCALL); | |
13036 | } | |
13037 | ||
13038 | rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts); | |
13039 | ||
13040 | if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type)) | |
13041 | { | |
13042 | rtx rslt = rs6000_darwin64_record_arg (cum, type, named, /*retval= */false); | |
13043 | if (rslt != NULL_RTX) | |
13044 | return rslt; | |
13045 | /* Else fall through to usual handling. */ | |
13046 | } | |
13047 | ||
13048 | if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named)) | |
13049 | { | |
13050 | rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1]; | |
13051 | rtx r, off; | |
13052 | int i, k = 0; | |
13053 | ||
13054 | /* Do we also need to pass this argument in the parameter save area? | |
13055 | Library support functions for IEEE 128-bit are assumed to not need the | |
13056 | value passed both in GPRs and in vector registers. */ | |
13057 | if (TARGET_64BIT && !cum->prototype | |
13058 | && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode))) | |
13059 | { | |
13060 | int align_words = ROUND_UP (cum->words, 2); | |
13061 | k = rs6000_psave_function_arg (mode, type, align_words, rvec); | |
13062 | } | |
13063 | ||
13064 | /* Describe where this argument goes in the vector registers. */ | |
13065 | for (i = 0; i < n_elts && cum->vregno + i <= ALTIVEC_ARG_MAX_REG; i++) | |
13066 | { | |
13067 | r = gen_rtx_REG (elt_mode, cum->vregno + i); | |
13068 | off = GEN_INT (i * GET_MODE_SIZE (elt_mode)); | |
13069 | rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off); | |
13070 | } | |
13071 | ||
13072 | return rs6000_finish_function_arg (mode, rvec, k); | |
13073 | } | |
13074 | else if (TARGET_ALTIVEC_ABI | |
13075 | && (ALTIVEC_OR_VSX_VECTOR_MODE (mode) | |
13076 | || (type && TREE_CODE (type) == VECTOR_TYPE | |
13077 | && int_size_in_bytes (type) == 16))) | |
13078 | { | |
13079 | if (named || abi == ABI_V4) | |
13080 | return NULL_RTX; | |
13081 | else | |
13082 | { | |
13083 | /* Vector parameters to varargs functions under AIX or Darwin | |
13084 | get passed in memory and possibly also in GPRs. */ | |
13085 | int align, align_words, n_words; | |
13086 | machine_mode part_mode; | |
13087 | ||
13088 | /* Vector parameters must be 16-byte aligned. In 32-bit | |
13089 | mode this means we need to take into account the offset | |
13090 | to the parameter save area. In 64-bit mode, they just | |
13091 | have to start on an even word, since the parameter save | |
13092 | area is 16-byte aligned. */ | |
13093 | if (TARGET_32BIT) | |
13094 | align = -(rs6000_parm_offset () + cum->words) & 3; | |
13095 | else | |
13096 | align = cum->words & 1; | |
13097 | align_words = cum->words + align; | |
13098 | ||
13099 | /* Out of registers? Memory, then. */ | |
13100 | if (align_words >= GP_ARG_NUM_REG) | |
13101 | return NULL_RTX; | |
13102 | ||
13103 | if (TARGET_32BIT && TARGET_POWERPC64) | |
13104 | return rs6000_mixed_function_arg (mode, type, align_words); | |
13105 | ||
13106 | /* The vector value goes in GPRs. Only the part of the | |
13107 | value in GPRs is reported here. */ | |
13108 | part_mode = mode; | |
13109 | n_words = rs6000_arg_size (mode, type); | |
13110 | if (align_words + n_words > GP_ARG_NUM_REG) | |
13111 | /* Fortunately, there are only two possibilities, the value | |
13112 | is either wholly in GPRs or half in GPRs and half not. */ | |
13113 | part_mode = DImode; | |
13114 | ||
13115 | return gen_rtx_REG (part_mode, GP_ARG_MIN_REG + align_words); | |
13116 | } | |
13117 | } | |
13118 | else if (TARGET_SPE_ABI && TARGET_SPE | |
13119 | && (SPE_VECTOR_MODE (mode) | |
13120 | || (TARGET_E500_DOUBLE && (mode == DFmode | |
13121 | || mode == DCmode | |
13122 | || mode == TFmode | |
13123 | || mode == TCmode)))) | |
13124 | return rs6000_spe_function_arg (cum, mode, type); | |
13125 | ||
13126 | else if (abi == ABI_V4) | |
13127 | { | |
13128 | if (abi_v4_pass_in_fpr (mode)) | |
13129 | { | |
13130 | /* _Decimal128 must use an even/odd register pair. This assumes | |
13131 | that the register number is odd when fregno is odd. */ | |
13132 | if (mode == TDmode && (cum->fregno % 2) == 1) | |
13133 | cum->fregno++; | |
13134 | ||
13135 | if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0) | |
13136 | <= FP_ARG_V4_MAX_REG) | |
13137 | return gen_rtx_REG (mode, cum->fregno); | |
13138 | else | |
13139 | return NULL_RTX; | |
13140 | } | |
13141 | else | |
13142 | { | |
13143 | int n_words = rs6000_arg_size (mode, type); | |
13144 | int gregno = cum->sysv_gregno; | |
13145 | ||
13146 | /* Long long and SPE vectors are put in (r3,r4), (r5,r6), | |
13147 | (r7,r8) or (r9,r10). As does any other 2 word item such | |
13148 | as complex int due to a historical mistake. */ | |
13149 | if (n_words == 2) | |
13150 | gregno += (1 - gregno) & 1; | |
13151 | ||
13152 | /* Multi-reg args are not split between registers and stack. */ | |
13153 | if (gregno + n_words - 1 > GP_ARG_MAX_REG) | |
13154 | return NULL_RTX; | |
13155 | ||
13156 | if (TARGET_32BIT && TARGET_POWERPC64) | |
13157 | return rs6000_mixed_function_arg (mode, type, | |
13158 | gregno - GP_ARG_MIN_REG); | |
13159 | return gen_rtx_REG (mode, gregno); | |
13160 | } | |
13161 | } | |
13162 | else | |
13163 | { | |
13164 | int align_words = rs6000_parm_start (mode, type, cum->words); | |
13165 | ||
13166 | /* _Decimal128 must be passed in an even/odd float register pair. | |
13167 | This assumes that the register number is odd when fregno is odd. */ | |
13168 | if (elt_mode == TDmode && (cum->fregno % 2) == 1) | |
13169 | cum->fregno++; | |
13170 | ||
13171 | if (USE_FP_FOR_ARG_P (cum, elt_mode)) | |
13172 | { | |
13173 | rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1]; | |
13174 | rtx r, off; | |
13175 | int i, k = 0; | |
13176 | unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3; | |
13177 | int fpr_words; | |
13178 | ||
13179 | /* Do we also need to pass this argument in the parameter | |
13180 | save area? */ | |
13181 | if (type && (cum->nargs_prototype <= 0 | |
13182 | || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) | |
13183 | && TARGET_XL_COMPAT | |
13184 | && align_words >= GP_ARG_NUM_REG))) | |
13185 | k = rs6000_psave_function_arg (mode, type, align_words, rvec); | |
13186 | ||
13187 | /* Describe where this argument goes in the fprs. */ | |
13188 | for (i = 0; i < n_elts | |
13189 | && cum->fregno + i * n_fpreg <= FP_ARG_MAX_REG; i++) | |
13190 | { | |
13191 | /* Check if the argument is split over registers and memory. | |
13192 | This can only ever happen for long double or _Decimal128; | |
13193 | complex types are handled via split_complex_arg. */ | |
13194 | machine_mode fmode = elt_mode; | |
13195 | if (cum->fregno + (i + 1) * n_fpreg > FP_ARG_MAX_REG + 1) | |
13196 | { | |
13197 | gcc_assert (FLOAT128_2REG_P (fmode)); | |
13198 | fmode = DECIMAL_FLOAT_MODE_P (fmode) ? DDmode : DFmode; | |
13199 | } | |
13200 | ||
13201 | r = gen_rtx_REG (fmode, cum->fregno + i * n_fpreg); | |
13202 | off = GEN_INT (i * GET_MODE_SIZE (elt_mode)); | |
13203 | rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off); | |
13204 | } | |
13205 | ||
13206 | /* If there were not enough FPRs to hold the argument, the rest | |
13207 | usually goes into memory. However, if the current position | |
13208 | is still within the register parameter area, a portion may | |
13209 | actually have to go into GPRs. | |
13210 | ||
13211 | Note that it may happen that the portion of the argument | |
13212 | passed in the first "half" of the first GPR was already | |
13213 | passed in the last FPR as well. | |
13214 | ||
13215 | For unnamed arguments, we already set up GPRs to cover the | |
13216 | whole argument in rs6000_psave_function_arg, so there is | |
13217 | nothing further to do at this point. */ | |
13218 | fpr_words = (i * GET_MODE_SIZE (elt_mode)) / (TARGET_32BIT ? 4 : 8); | |
13219 | if (i < n_elts && align_words + fpr_words < GP_ARG_NUM_REG | |
13220 | && cum->nargs_prototype > 0) | |
13221 | { | |
13222 | static bool warned; | |
13223 | ||
13224 | machine_mode rmode = TARGET_32BIT ? SImode : DImode; | |
13225 | int n_words = rs6000_arg_size (mode, type); | |
13226 | ||
13227 | align_words += fpr_words; | |
13228 | n_words -= fpr_words; | |
13229 | ||
13230 | do | |
13231 | { | |
13232 | r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words); | |
13233 | off = GEN_INT (fpr_words++ * GET_MODE_SIZE (rmode)); | |
13234 | rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off); | |
13235 | } | |
13236 | while (++align_words < GP_ARG_NUM_REG && --n_words != 0); | |
13237 | ||
13238 | if (!warned && warn_psabi) | |
13239 | { | |
13240 | warned = true; | |
13241 | inform (input_location, | |
13242 | "the ABI of passing homogeneous float aggregates" | |
13243 | " has changed in GCC 5"); | |
13244 | } | |
13245 | } | |
13246 | ||
13247 | return rs6000_finish_function_arg (mode, rvec, k); | |
13248 | } | |
13249 | else if (align_words < GP_ARG_NUM_REG) | |
13250 | { | |
13251 | if (TARGET_32BIT && TARGET_POWERPC64) | |
13252 | return rs6000_mixed_function_arg (mode, type, align_words); | |
13253 | ||
13254 | return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words); | |
13255 | } | |
13256 | else | |
13257 | return NULL_RTX; | |
13258 | } | |
13259 | } | |
13260 | \f | |
13261 | /* For an arg passed partly in registers and partly in memory, this is | |
13262 | the number of bytes passed in registers. For args passed entirely in | |
13263 | registers or entirely in memory, zero. When an arg is described by a | |
13264 | PARALLEL, perhaps using more than one register type, this function | |
13265 | returns the number of bytes used by the first element of the PARALLEL. */ | |
13266 | ||
13267 | static int | |
13268 | rs6000_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode, | |
13269 | tree type, bool named) | |
13270 | { | |
13271 | CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); | |
13272 | bool passed_in_gprs = true; | |
13273 | int ret = 0; | |
13274 | int align_words; | |
13275 | machine_mode elt_mode; | |
13276 | int n_elts; | |
13277 | ||
13278 | rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts); | |
13279 | ||
13280 | if (DEFAULT_ABI == ABI_V4) | |
13281 | return 0; | |
13282 | ||
13283 | if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named)) | |
13284 | { | |
13285 | /* If we are passing this arg in the fixed parameter save area (gprs or | |
13286 | memory) as well as VRs, we do not use the partial bytes mechanism; | |
13287 | instead, rs6000_function_arg will return a PARALLEL including a memory | |
13288 | element as necessary. Library support functions for IEEE 128-bit are | |
13289 | assumed to not need the value passed both in GPRs and in vector | |
13290 | registers. */ | |
13291 | if (TARGET_64BIT && !cum->prototype | |
13292 | && (!cum->libcall || !FLOAT128_VECTOR_P (elt_mode))) | |
13293 | return 0; | |
13294 | ||
13295 | /* Otherwise, we pass in VRs only. Check for partial copies. */ | |
13296 | passed_in_gprs = false; | |
13297 | if (cum->vregno + n_elts > ALTIVEC_ARG_MAX_REG + 1) | |
13298 | ret = (ALTIVEC_ARG_MAX_REG + 1 - cum->vregno) * 16; | |
13299 | } | |
13300 | ||
13301 | /* In this complicated case we just disable the partial_nregs code. */ | |
13302 | if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type)) | |
13303 | return 0; | |
13304 | ||
13305 | align_words = rs6000_parm_start (mode, type, cum->words); | |
13306 | ||
13307 | if (USE_FP_FOR_ARG_P (cum, elt_mode)) | |
13308 | { | |
13309 | unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3; | |
13310 | ||
13311 | /* If we are passing this arg in the fixed parameter save area | |
13312 | (gprs or memory) as well as FPRs, we do not use the partial | |
13313 | bytes mechanism; instead, rs6000_function_arg will return a | |
13314 | PARALLEL including a memory element as necessary. */ | |
13315 | if (type | |
13316 | && (cum->nargs_prototype <= 0 | |
13317 | || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) | |
13318 | && TARGET_XL_COMPAT | |
13319 | && align_words >= GP_ARG_NUM_REG))) | |
13320 | return 0; | |
13321 | ||
13322 | /* Otherwise, we pass in FPRs only. Check for partial copies. */ | |
13323 | passed_in_gprs = false; | |
13324 | if (cum->fregno + n_elts * n_fpreg > FP_ARG_MAX_REG + 1) | |
13325 | { | |
13326 | /* Compute number of bytes / words passed in FPRs. If there | |
13327 | is still space available in the register parameter area | |
13328 | *after* that amount, a part of the argument will be passed | |
13329 | in GPRs. In that case, the total amount passed in any | |
13330 | registers is equal to the amount that would have been passed | |
13331 | in GPRs if everything were passed there, so we fall back to | |
13332 | the GPR code below to compute the appropriate value. */ | |
13333 | int fpr = ((FP_ARG_MAX_REG + 1 - cum->fregno) | |
13334 | * MIN (8, GET_MODE_SIZE (elt_mode))); | |
13335 | int fpr_words = fpr / (TARGET_32BIT ? 4 : 8); | |
13336 | ||
13337 | if (align_words + fpr_words < GP_ARG_NUM_REG) | |
13338 | passed_in_gprs = true; | |
13339 | else | |
13340 | ret = fpr; | |
13341 | } | |
13342 | } | |
13343 | ||
13344 | if (passed_in_gprs | |
13345 | && align_words < GP_ARG_NUM_REG | |
13346 | && GP_ARG_NUM_REG < align_words + rs6000_arg_size (mode, type)) | |
13347 | ret = (GP_ARG_NUM_REG - align_words) * (TARGET_32BIT ? 4 : 8); | |
13348 | ||
13349 | if (ret != 0 && TARGET_DEBUG_ARG) | |
13350 | fprintf (stderr, "rs6000_arg_partial_bytes: %d\n", ret); | |
13351 | ||
13352 | return ret; | |
13353 | } | |
13354 | \f | |
13355 | /* A C expression that indicates when an argument must be passed by | |
13356 | reference. If nonzero for an argument, a copy of that argument is | |
13357 | made in memory and a pointer to the argument is passed instead of | |
13358 | the argument itself. The pointer is passed in whatever way is | |
13359 | appropriate for passing a pointer to that type. | |
13360 | ||
13361 | Under V.4, aggregates and long double are passed by reference. | |
13362 | ||
13363 | As an extension to all 32-bit ABIs, AltiVec vectors are passed by | |
13364 | reference unless the AltiVec vector extension ABI is in force. | |
13365 | ||
13366 | As an extension to all ABIs, variable sized types are passed by | |
13367 | reference. */ | |
13368 | ||
13369 | static bool | |
13370 | rs6000_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED, | |
13371 | machine_mode mode, const_tree type, | |
13372 | bool named ATTRIBUTE_UNUSED) | |
13373 | { | |
13374 | if (!type) | |
13375 | return 0; | |
13376 | ||
13377 | if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD | |
13378 | && FLOAT128_IEEE_P (TYPE_MODE (type))) | |
13379 | { | |
13380 | if (TARGET_DEBUG_ARG) | |
13381 | fprintf (stderr, "function_arg_pass_by_reference: V4 IEEE 128-bit\n"); | |
13382 | return 1; | |
13383 | } | |
13384 | ||
13385 | if (DEFAULT_ABI == ABI_V4 && AGGREGATE_TYPE_P (type)) | |
13386 | { | |
13387 | if (TARGET_DEBUG_ARG) | |
13388 | fprintf (stderr, "function_arg_pass_by_reference: V4 aggregate\n"); | |
13389 | return 1; | |
13390 | } | |
13391 | ||
13392 | if (int_size_in_bytes (type) < 0) | |
13393 | { | |
13394 | if (TARGET_DEBUG_ARG) | |
13395 | fprintf (stderr, "function_arg_pass_by_reference: variable size\n"); | |
13396 | return 1; | |
13397 | } | |
13398 | ||
13399 | /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector | |
13400 | modes only exist for GCC vector types if -maltivec. */ | |
13401 | if (TARGET_32BIT && !TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode)) | |
13402 | { | |
13403 | if (TARGET_DEBUG_ARG) | |
13404 | fprintf (stderr, "function_arg_pass_by_reference: AltiVec\n"); | |
13405 | return 1; | |
13406 | } | |
13407 | ||
13408 | /* Pass synthetic vectors in memory. */ | |
13409 | if (TREE_CODE (type) == VECTOR_TYPE | |
13410 | && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8)) | |
13411 | { | |
13412 | static bool warned_for_pass_big_vectors = false; | |
13413 | if (TARGET_DEBUG_ARG) | |
13414 | fprintf (stderr, "function_arg_pass_by_reference: synthetic vector\n"); | |
13415 | if (!warned_for_pass_big_vectors) | |
13416 | { | |
13417 | warning (OPT_Wpsabi, "GCC vector passed by reference: " | |
13418 | "non-standard ABI extension with no compatibility guarantee"); | |
13419 | warned_for_pass_big_vectors = true; | |
13420 | } | |
13421 | return 1; | |
13422 | } | |
13423 | ||
13424 | return 0; | |
13425 | } | |
13426 | ||
13427 | /* Process parameter of type TYPE after ARGS_SO_FAR parameters were | |
13428 | already processes. Return true if the parameter must be passed | |
13429 | (fully or partially) on the stack. */ | |
13430 | ||
13431 | static bool | |
13432 | rs6000_parm_needs_stack (cumulative_args_t args_so_far, tree type) | |
13433 | { | |
13434 | machine_mode mode; | |
13435 | int unsignedp; | |
13436 | rtx entry_parm; | |
13437 | ||
13438 | /* Catch errors. */ | |
13439 | if (type == NULL || type == error_mark_node) | |
13440 | return true; | |
13441 | ||
13442 | /* Handle types with no storage requirement. */ | |
13443 | if (TYPE_MODE (type) == VOIDmode) | |
13444 | return false; | |
13445 | ||
13446 | /* Handle complex types. */ | |
13447 | if (TREE_CODE (type) == COMPLEX_TYPE) | |
13448 | return (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type)) | |
13449 | || rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type))); | |
13450 | ||
13451 | /* Handle transparent aggregates. */ | |
13452 | if ((TREE_CODE (type) == UNION_TYPE || TREE_CODE (type) == RECORD_TYPE) | |
13453 | && TYPE_TRANSPARENT_AGGR (type)) | |
13454 | type = TREE_TYPE (first_field (type)); | |
13455 | ||
13456 | /* See if this arg was passed by invisible reference. */ | |
13457 | if (pass_by_reference (get_cumulative_args (args_so_far), | |
13458 | TYPE_MODE (type), type, true)) | |
13459 | type = build_pointer_type (type); | |
13460 | ||
13461 | /* Find mode as it is passed by the ABI. */ | |
13462 | unsignedp = TYPE_UNSIGNED (type); | |
13463 | mode = promote_mode (type, TYPE_MODE (type), &unsignedp); | |
13464 | ||
13465 | /* If we must pass in stack, we need a stack. */ | |
13466 | if (rs6000_must_pass_in_stack (mode, type)) | |
13467 | return true; | |
13468 | ||
13469 | /* If there is no incoming register, we need a stack. */ | |
13470 | entry_parm = rs6000_function_arg (args_so_far, mode, type, true); | |
13471 | if (entry_parm == NULL) | |
13472 | return true; | |
13473 | ||
13474 | /* Likewise if we need to pass both in registers and on the stack. */ | |
13475 | if (GET_CODE (entry_parm) == PARALLEL | |
13476 | && XEXP (XVECEXP (entry_parm, 0, 0), 0) == NULL_RTX) | |
13477 | return true; | |
13478 | ||
13479 | /* Also true if we're partially in registers and partially not. */ | |
13480 | if (rs6000_arg_partial_bytes (args_so_far, mode, type, true) != 0) | |
13481 | return true; | |
13482 | ||
13483 | /* Update info on where next arg arrives in registers. */ | |
13484 | rs6000_function_arg_advance (args_so_far, mode, type, true); | |
13485 | return false; | |
13486 | } | |
13487 | ||
13488 | /* Return true if FUN has no prototype, has a variable argument | |
13489 | list, or passes any parameter in memory. */ | |
13490 | ||
13491 | static bool | |
13492 | rs6000_function_parms_need_stack (tree fun, bool incoming) | |
13493 | { | |
13494 | tree fntype, result; | |
13495 | CUMULATIVE_ARGS args_so_far_v; | |
13496 | cumulative_args_t args_so_far; | |
13497 | ||
13498 | if (!fun) | |
13499 | /* Must be a libcall, all of which only use reg parms. */ | |
13500 | return false; | |
13501 | ||
13502 | fntype = fun; | |
13503 | if (!TYPE_P (fun)) | |
13504 | fntype = TREE_TYPE (fun); | |
13505 | ||
13506 | /* Varargs functions need the parameter save area. */ | |
13507 | if ((!incoming && !prototype_p (fntype)) || stdarg_p (fntype)) | |
13508 | return true; | |
13509 | ||
13510 | INIT_CUMULATIVE_INCOMING_ARGS (args_so_far_v, fntype, NULL_RTX); | |
13511 | args_so_far = pack_cumulative_args (&args_so_far_v); | |
13512 | ||
13513 | /* When incoming, we will have been passed the function decl. | |
13514 | It is necessary to use the decl to handle K&R style functions, | |
13515 | where TYPE_ARG_TYPES may not be available. */ | |
13516 | if (incoming) | |
13517 | { | |
13518 | gcc_assert (DECL_P (fun)); | |
13519 | result = DECL_RESULT (fun); | |
13520 | } | |
13521 | else | |
13522 | result = TREE_TYPE (fntype); | |
13523 | ||
13524 | if (result && aggregate_value_p (result, fntype)) | |
13525 | { | |
13526 | if (!TYPE_P (result)) | |
13527 | result = TREE_TYPE (result); | |
13528 | result = build_pointer_type (result); | |
13529 | rs6000_parm_needs_stack (args_so_far, result); | |
13530 | } | |
13531 | ||
13532 | if (incoming) | |
13533 | { | |
13534 | tree parm; | |
13535 | ||
13536 | for (parm = DECL_ARGUMENTS (fun); | |
13537 | parm && parm != void_list_node; | |
13538 | parm = TREE_CHAIN (parm)) | |
13539 | if (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (parm))) | |
13540 | return true; | |
13541 | } | |
13542 | else | |
13543 | { | |
13544 | function_args_iterator args_iter; | |
13545 | tree arg_type; | |
13546 | ||
13547 | FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter) | |
13548 | if (rs6000_parm_needs_stack (args_so_far, arg_type)) | |
13549 | return true; | |
13550 | } | |
13551 | ||
13552 | return false; | |
13553 | } | |
13554 | ||
13555 | /* Return the size of the REG_PARM_STACK_SPACE are for FUN. This is | |
13556 | usually a constant depending on the ABI. However, in the ELFv2 ABI | |
13557 | the register parameter area is optional when calling a function that | |
13558 | has a prototype is scope, has no variable argument list, and passes | |
13559 | all parameters in registers. */ | |
13560 | ||
13561 | int | |
13562 | rs6000_reg_parm_stack_space (tree fun, bool incoming) | |
13563 | { | |
13564 | int reg_parm_stack_space; | |
13565 | ||
13566 | switch (DEFAULT_ABI) | |
13567 | { | |
13568 | default: | |
13569 | reg_parm_stack_space = 0; | |
13570 | break; | |
13571 | ||
13572 | case ABI_AIX: | |
13573 | case ABI_DARWIN: | |
13574 | reg_parm_stack_space = TARGET_64BIT ? 64 : 32; | |
13575 | break; | |
13576 | ||
13577 | case ABI_ELFv2: | |
13578 | /* ??? Recomputing this every time is a bit expensive. Is there | |
13579 | a place to cache this information? */ | |
13580 | if (rs6000_function_parms_need_stack (fun, incoming)) | |
13581 | reg_parm_stack_space = TARGET_64BIT ? 64 : 32; | |
13582 | else | |
13583 | reg_parm_stack_space = 0; | |
13584 | break; | |
13585 | } | |
13586 | ||
13587 | return reg_parm_stack_space; | |
13588 | } | |
13589 | ||
13590 | static void | |
13591 | rs6000_move_block_from_reg (int regno, rtx x, int nregs) | |
13592 | { | |
13593 | int i; | |
13594 | machine_mode reg_mode = TARGET_32BIT ? SImode : DImode; | |
13595 | ||
13596 | if (nregs == 0) | |
13597 | return; | |
13598 | ||
13599 | for (i = 0; i < nregs; i++) | |
13600 | { | |
13601 | rtx tem = adjust_address_nv (x, reg_mode, i * GET_MODE_SIZE (reg_mode)); | |
13602 | if (reload_completed) | |
13603 | { | |
13604 | if (! strict_memory_address_p (reg_mode, XEXP (tem, 0))) | |
13605 | tem = NULL_RTX; | |
13606 | else | |
13607 | tem = simplify_gen_subreg (reg_mode, x, BLKmode, | |
13608 | i * GET_MODE_SIZE (reg_mode)); | |
13609 | } | |
13610 | else | |
13611 | tem = replace_equiv_address (tem, XEXP (tem, 0)); | |
13612 | ||
13613 | gcc_assert (tem); | |
13614 | ||
13615 | emit_move_insn (tem, gen_rtx_REG (reg_mode, regno + i)); | |
13616 | } | |
13617 | } | |
13618 | \f | |
13619 | /* Perform any needed actions needed for a function that is receiving a | |
13620 | variable number of arguments. | |
13621 | ||
13622 | CUM is as above. | |
13623 | ||
13624 | MODE and TYPE are the mode and type of the current parameter. | |
13625 | ||
13626 | PRETEND_SIZE is a variable that should be set to the amount of stack | |
13627 | that must be pushed by the prolog to pretend that our caller pushed | |
13628 | it. | |
13629 | ||
13630 | Normally, this macro will push all remaining incoming registers on the | |
13631 | stack and set PRETEND_SIZE to the length of the registers pushed. */ | |
13632 | ||
13633 | static void | |
13634 | setup_incoming_varargs (cumulative_args_t cum, machine_mode mode, | |
13635 | tree type, int *pretend_size ATTRIBUTE_UNUSED, | |
13636 | int no_rtl) | |
13637 | { | |
13638 | CUMULATIVE_ARGS next_cum; | |
13639 | int reg_size = TARGET_32BIT ? 4 : 8; | |
13640 | rtx save_area = NULL_RTX, mem; | |
13641 | int first_reg_offset; | |
13642 | alias_set_type set; | |
13643 | ||
13644 | /* Skip the last named argument. */ | |
13645 | next_cum = *get_cumulative_args (cum); | |
13646 | rs6000_function_arg_advance_1 (&next_cum, mode, type, true, 0); | |
13647 | ||
13648 | if (DEFAULT_ABI == ABI_V4) | |
13649 | { | |
13650 | first_reg_offset = next_cum.sysv_gregno - GP_ARG_MIN_REG; | |
13651 | ||
13652 | if (! no_rtl) | |
13653 | { | |
13654 | int gpr_reg_num = 0, gpr_size = 0, fpr_size = 0; | |
13655 | HOST_WIDE_INT offset = 0; | |
13656 | ||
13657 | /* Try to optimize the size of the varargs save area. | |
13658 | The ABI requires that ap.reg_save_area is doubleword | |
13659 | aligned, but we don't need to allocate space for all | |
13660 | the bytes, only those to which we actually will save | |
13661 | anything. */ | |
13662 | if (cfun->va_list_gpr_size && first_reg_offset < GP_ARG_NUM_REG) | |
13663 | gpr_reg_num = GP_ARG_NUM_REG - first_reg_offset; | |
13664 | if (TARGET_HARD_FLOAT && TARGET_FPRS | |
13665 | && next_cum.fregno <= FP_ARG_V4_MAX_REG | |
13666 | && cfun->va_list_fpr_size) | |
13667 | { | |
13668 | if (gpr_reg_num) | |
13669 | fpr_size = (next_cum.fregno - FP_ARG_MIN_REG) | |
13670 | * UNITS_PER_FP_WORD; | |
13671 | if (cfun->va_list_fpr_size | |
13672 | < FP_ARG_V4_MAX_REG + 1 - next_cum.fregno) | |
13673 | fpr_size += cfun->va_list_fpr_size * UNITS_PER_FP_WORD; | |
13674 | else | |
13675 | fpr_size += (FP_ARG_V4_MAX_REG + 1 - next_cum.fregno) | |
13676 | * UNITS_PER_FP_WORD; | |
13677 | } | |
13678 | if (gpr_reg_num) | |
13679 | { | |
13680 | offset = -((first_reg_offset * reg_size) & ~7); | |
13681 | if (!fpr_size && gpr_reg_num > cfun->va_list_gpr_size) | |
13682 | { | |
13683 | gpr_reg_num = cfun->va_list_gpr_size; | |
13684 | if (reg_size == 4 && (first_reg_offset & 1)) | |
13685 | gpr_reg_num++; | |
13686 | } | |
13687 | gpr_size = (gpr_reg_num * reg_size + 7) & ~7; | |
13688 | } | |
13689 | else if (fpr_size) | |
13690 | offset = - (int) (next_cum.fregno - FP_ARG_MIN_REG) | |
13691 | * UNITS_PER_FP_WORD | |
13692 | - (int) (GP_ARG_NUM_REG * reg_size); | |
13693 | ||
13694 | if (gpr_size + fpr_size) | |
13695 | { | |
13696 | rtx reg_save_area | |
13697 | = assign_stack_local (BLKmode, gpr_size + fpr_size, 64); | |
13698 | gcc_assert (GET_CODE (reg_save_area) == MEM); | |
13699 | reg_save_area = XEXP (reg_save_area, 0); | |
13700 | if (GET_CODE (reg_save_area) == PLUS) | |
13701 | { | |
13702 | gcc_assert (XEXP (reg_save_area, 0) | |
13703 | == virtual_stack_vars_rtx); | |
13704 | gcc_assert (GET_CODE (XEXP (reg_save_area, 1)) == CONST_INT); | |
13705 | offset += INTVAL (XEXP (reg_save_area, 1)); | |
13706 | } | |
13707 | else | |
13708 | gcc_assert (reg_save_area == virtual_stack_vars_rtx); | |
13709 | } | |
13710 | ||
13711 | cfun->machine->varargs_save_offset = offset; | |
13712 | save_area = plus_constant (Pmode, virtual_stack_vars_rtx, offset); | |
13713 | } | |
13714 | } | |
13715 | else | |
13716 | { | |
13717 | first_reg_offset = next_cum.words; | |
13718 | save_area = crtl->args.internal_arg_pointer; | |
13719 | ||
13720 | if (targetm.calls.must_pass_in_stack (mode, type)) | |
13721 | first_reg_offset += rs6000_arg_size (TYPE_MODE (type), type); | |
13722 | } | |
13723 | ||
13724 | set = get_varargs_alias_set (); | |
13725 | if (! no_rtl && first_reg_offset < GP_ARG_NUM_REG | |
13726 | && cfun->va_list_gpr_size) | |
13727 | { | |
13728 | int n_gpr, nregs = GP_ARG_NUM_REG - first_reg_offset; | |
13729 | ||
13730 | if (va_list_gpr_counter_field) | |
13731 | /* V4 va_list_gpr_size counts number of registers needed. */ | |
13732 | n_gpr = cfun->va_list_gpr_size; | |
13733 | else | |
13734 | /* char * va_list instead counts number of bytes needed. */ | |
13735 | n_gpr = (cfun->va_list_gpr_size + reg_size - 1) / reg_size; | |
13736 | ||
13737 | if (nregs > n_gpr) | |
13738 | nregs = n_gpr; | |
13739 | ||
13740 | mem = gen_rtx_MEM (BLKmode, | |
13741 | plus_constant (Pmode, save_area, | |
13742 | first_reg_offset * reg_size)); | |
13743 | MEM_NOTRAP_P (mem) = 1; | |
13744 | set_mem_alias_set (mem, set); | |
13745 | set_mem_align (mem, BITS_PER_WORD); | |
13746 | ||
13747 | rs6000_move_block_from_reg (GP_ARG_MIN_REG + first_reg_offset, mem, | |
13748 | nregs); | |
13749 | } | |
13750 | ||
13751 | /* Save FP registers if needed. */ | |
13752 | if (DEFAULT_ABI == ABI_V4 | |
13753 | && TARGET_HARD_FLOAT && TARGET_FPRS | |
13754 | && ! no_rtl | |
13755 | && next_cum.fregno <= FP_ARG_V4_MAX_REG | |
13756 | && cfun->va_list_fpr_size) | |
13757 | { | |
13758 | int fregno = next_cum.fregno, nregs; | |
13759 | rtx cr1 = gen_rtx_REG (CCmode, CR1_REGNO); | |
13760 | rtx lab = gen_label_rtx (); | |
13761 | int off = (GP_ARG_NUM_REG * reg_size) + ((fregno - FP_ARG_MIN_REG) | |
13762 | * UNITS_PER_FP_WORD); | |
13763 | ||
13764 | emit_jump_insn | |
13765 | (gen_rtx_SET (pc_rtx, | |
13766 | gen_rtx_IF_THEN_ELSE (VOIDmode, | |
13767 | gen_rtx_NE (VOIDmode, cr1, | |
13768 | const0_rtx), | |
13769 | gen_rtx_LABEL_REF (VOIDmode, lab), | |
13770 | pc_rtx))); | |
13771 | ||
13772 | for (nregs = 0; | |
13773 | fregno <= FP_ARG_V4_MAX_REG && nregs < cfun->va_list_fpr_size; | |
13774 | fregno++, off += UNITS_PER_FP_WORD, nregs++) | |
13775 | { | |
13776 | mem = gen_rtx_MEM ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) | |
13777 | ? DFmode : SFmode, | |
13778 | plus_constant (Pmode, save_area, off)); | |
13779 | MEM_NOTRAP_P (mem) = 1; | |
13780 | set_mem_alias_set (mem, set); | |
13781 | set_mem_align (mem, GET_MODE_ALIGNMENT ( | |
13782 | (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) | |
13783 | ? DFmode : SFmode)); | |
13784 | emit_move_insn (mem, gen_rtx_REG ( | |
13785 | (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) | |
13786 | ? DFmode : SFmode, fregno)); | |
13787 | } | |
13788 | ||
13789 | emit_label (lab); | |
13790 | } | |
13791 | } | |
13792 | ||
13793 | /* Create the va_list data type. */ | |
13794 | ||
13795 | static tree | |
13796 | rs6000_build_builtin_va_list (void) | |
13797 | { | |
13798 | tree f_gpr, f_fpr, f_res, f_ovf, f_sav, record, type_decl; | |
13799 | ||
13800 | /* For AIX, prefer 'char *' because that's what the system | |
13801 | header files like. */ | |
13802 | if (DEFAULT_ABI != ABI_V4) | |
13803 | return build_pointer_type (char_type_node); | |
13804 | ||
13805 | record = (*lang_hooks.types.make_type) (RECORD_TYPE); | |
13806 | type_decl = build_decl (BUILTINS_LOCATION, TYPE_DECL, | |
13807 | get_identifier ("__va_list_tag"), record); | |
13808 | ||
13809 | f_gpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("gpr"), | |
13810 | unsigned_char_type_node); | |
13811 | f_fpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("fpr"), | |
13812 | unsigned_char_type_node); | |
13813 | /* Give the two bytes of padding a name, so that -Wpadded won't warn on | |
13814 | every user file. */ | |
13815 | f_res = build_decl (BUILTINS_LOCATION, FIELD_DECL, | |
13816 | get_identifier ("reserved"), short_unsigned_type_node); | |
13817 | f_ovf = build_decl (BUILTINS_LOCATION, FIELD_DECL, | |
13818 | get_identifier ("overflow_arg_area"), | |
13819 | ptr_type_node); | |
13820 | f_sav = build_decl (BUILTINS_LOCATION, FIELD_DECL, | |
13821 | get_identifier ("reg_save_area"), | |
13822 | ptr_type_node); | |
13823 | ||
13824 | va_list_gpr_counter_field = f_gpr; | |
13825 | va_list_fpr_counter_field = f_fpr; | |
13826 | ||
13827 | DECL_FIELD_CONTEXT (f_gpr) = record; | |
13828 | DECL_FIELD_CONTEXT (f_fpr) = record; | |
13829 | DECL_FIELD_CONTEXT (f_res) = record; | |
13830 | DECL_FIELD_CONTEXT (f_ovf) = record; | |
13831 | DECL_FIELD_CONTEXT (f_sav) = record; | |
13832 | ||
13833 | TYPE_STUB_DECL (record) = type_decl; | |
13834 | TYPE_NAME (record) = type_decl; | |
13835 | TYPE_FIELDS (record) = f_gpr; | |
13836 | DECL_CHAIN (f_gpr) = f_fpr; | |
13837 | DECL_CHAIN (f_fpr) = f_res; | |
13838 | DECL_CHAIN (f_res) = f_ovf; | |
13839 | DECL_CHAIN (f_ovf) = f_sav; | |
13840 | ||
13841 | layout_type (record); | |
13842 | ||
13843 | /* The correct type is an array type of one element. */ | |
13844 | return build_array_type (record, build_index_type (size_zero_node)); | |
13845 | } | |
13846 | ||
13847 | /* Implement va_start. */ | |
13848 | ||
13849 | static void | |
13850 | rs6000_va_start (tree valist, rtx nextarg) | |
13851 | { | |
13852 | HOST_WIDE_INT words, n_gpr, n_fpr; | |
13853 | tree f_gpr, f_fpr, f_res, f_ovf, f_sav; | |
13854 | tree gpr, fpr, ovf, sav, t; | |
13855 | ||
13856 | /* Only SVR4 needs something special. */ | |
13857 | if (DEFAULT_ABI != ABI_V4) | |
13858 | { | |
13859 | std_expand_builtin_va_start (valist, nextarg); | |
13860 | return; | |
13861 | } | |
13862 | ||
13863 | f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node)); | |
13864 | f_fpr = DECL_CHAIN (f_gpr); | |
13865 | f_res = DECL_CHAIN (f_fpr); | |
13866 | f_ovf = DECL_CHAIN (f_res); | |
13867 | f_sav = DECL_CHAIN (f_ovf); | |
13868 | ||
13869 | valist = build_simple_mem_ref (valist); | |
13870 | gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE); | |
13871 | fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist), | |
13872 | f_fpr, NULL_TREE); | |
13873 | ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist), | |
13874 | f_ovf, NULL_TREE); | |
13875 | sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist), | |
13876 | f_sav, NULL_TREE); | |
13877 | ||
13878 | /* Count number of gp and fp argument registers used. */ | |
13879 | words = crtl->args.info.words; | |
13880 | n_gpr = MIN (crtl->args.info.sysv_gregno - GP_ARG_MIN_REG, | |
13881 | GP_ARG_NUM_REG); | |
13882 | n_fpr = MIN (crtl->args.info.fregno - FP_ARG_MIN_REG, | |
13883 | FP_ARG_NUM_REG); | |
13884 | ||
13885 | if (TARGET_DEBUG_ARG) | |
13886 | fprintf (stderr, "va_start: words = " HOST_WIDE_INT_PRINT_DEC", n_gpr = " | |
13887 | HOST_WIDE_INT_PRINT_DEC", n_fpr = " HOST_WIDE_INT_PRINT_DEC"\n", | |
13888 | words, n_gpr, n_fpr); | |
13889 | ||
13890 | if (cfun->va_list_gpr_size) | |
13891 | { | |
13892 | t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, | |
13893 | build_int_cst (NULL_TREE, n_gpr)); | |
13894 | TREE_SIDE_EFFECTS (t) = 1; | |
13895 | expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); | |
13896 | } | |
13897 | ||
13898 | if (cfun->va_list_fpr_size) | |
13899 | { | |
13900 | t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, | |
13901 | build_int_cst (NULL_TREE, n_fpr)); | |
13902 | TREE_SIDE_EFFECTS (t) = 1; | |
13903 | expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); | |
13904 | ||
13905 | #ifdef HAVE_AS_GNU_ATTRIBUTE | |
13906 | if (call_ABI_of_interest (cfun->decl)) | |
13907 | rs6000_passes_float = true; | |
13908 | #endif | |
13909 | } | |
13910 | ||
13911 | /* Find the overflow area. */ | |
13912 | t = make_tree (TREE_TYPE (ovf), crtl->args.internal_arg_pointer); | |
13913 | if (words != 0) | |
13914 | t = fold_build_pointer_plus_hwi (t, words * MIN_UNITS_PER_WORD); | |
13915 | t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t); | |
13916 | TREE_SIDE_EFFECTS (t) = 1; | |
13917 | expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); | |
13918 | ||
13919 | /* If there were no va_arg invocations, don't set up the register | |
13920 | save area. */ | |
13921 | if (!cfun->va_list_gpr_size | |
13922 | && !cfun->va_list_fpr_size | |
13923 | && n_gpr < GP_ARG_NUM_REG | |
13924 | && n_fpr < FP_ARG_V4_MAX_REG) | |
13925 | return; | |
13926 | ||
13927 | /* Find the register save area. */ | |
13928 | t = make_tree (TREE_TYPE (sav), virtual_stack_vars_rtx); | |
13929 | if (cfun->machine->varargs_save_offset) | |
13930 | t = fold_build_pointer_plus_hwi (t, cfun->machine->varargs_save_offset); | |
13931 | t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t); | |
13932 | TREE_SIDE_EFFECTS (t) = 1; | |
13933 | expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); | |
13934 | } | |
13935 | ||
13936 | /* Implement va_arg. */ | |
13937 | ||
13938 | static tree | |
13939 | rs6000_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, | |
13940 | gimple_seq *post_p) | |
13941 | { | |
13942 | tree f_gpr, f_fpr, f_res, f_ovf, f_sav; | |
13943 | tree gpr, fpr, ovf, sav, reg, t, u; | |
13944 | int size, rsize, n_reg, sav_ofs, sav_scale; | |
13945 | tree lab_false, lab_over, addr; | |
13946 | int align; | |
13947 | tree ptrtype = build_pointer_type_for_mode (type, ptr_mode, true); | |
13948 | int regalign = 0; | |
13949 | gimple *stmt; | |
13950 | ||
13951 | if (pass_by_reference (NULL, TYPE_MODE (type), type, false)) | |
13952 | { | |
13953 | t = rs6000_gimplify_va_arg (valist, ptrtype, pre_p, post_p); | |
13954 | return build_va_arg_indirect_ref (t); | |
13955 | } | |
13956 | ||
13957 | /* We need to deal with the fact that the darwin ppc64 ABI is defined by an | |
13958 | earlier version of gcc, with the property that it always applied alignment | |
13959 | adjustments to the va-args (even for zero-sized types). The cheapest way | |
13960 | to deal with this is to replicate the effect of the part of | |
13961 | std_gimplify_va_arg_expr that carries out the align adjust, for the case | |
13962 | of relevance. | |
13963 | We don't need to check for pass-by-reference because of the test above. | |
13964 | We can return a simplifed answer, since we know there's no offset to add. */ | |
13965 | ||
13966 | if (((TARGET_MACHO | |
13967 | && rs6000_darwin64_abi) | |
13968 | || DEFAULT_ABI == ABI_ELFv2 | |
13969 | || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm)) | |
13970 | && integer_zerop (TYPE_SIZE (type))) | |
13971 | { | |
13972 | unsigned HOST_WIDE_INT align, boundary; | |
13973 | tree valist_tmp = get_initialized_tmp_var (valist, pre_p, NULL); | |
13974 | align = PARM_BOUNDARY / BITS_PER_UNIT; | |
13975 | boundary = rs6000_function_arg_boundary (TYPE_MODE (type), type); | |
13976 | if (boundary > MAX_SUPPORTED_STACK_ALIGNMENT) | |
13977 | boundary = MAX_SUPPORTED_STACK_ALIGNMENT; | |
13978 | boundary /= BITS_PER_UNIT; | |
13979 | if (boundary > align) | |
13980 | { | |
13981 | tree t ; | |
13982 | /* This updates arg ptr by the amount that would be necessary | |
13983 | to align the zero-sized (but not zero-alignment) item. */ | |
13984 | t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp, | |
13985 | fold_build_pointer_plus_hwi (valist_tmp, boundary - 1)); | |
13986 | gimplify_and_add (t, pre_p); | |
13987 | ||
13988 | t = fold_convert (sizetype, valist_tmp); | |
13989 | t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp, | |
13990 | fold_convert (TREE_TYPE (valist), | |
13991 | fold_build2 (BIT_AND_EXPR, sizetype, t, | |
13992 | size_int (-boundary)))); | |
13993 | t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t); | |
13994 | gimplify_and_add (t, pre_p); | |
13995 | } | |
13996 | /* Since it is zero-sized there's no increment for the item itself. */ | |
13997 | valist_tmp = fold_convert (build_pointer_type (type), valist_tmp); | |
13998 | return build_va_arg_indirect_ref (valist_tmp); | |
13999 | } | |
14000 | ||
14001 | if (DEFAULT_ABI != ABI_V4) | |
14002 | { | |
14003 | if (targetm.calls.split_complex_arg && TREE_CODE (type) == COMPLEX_TYPE) | |
14004 | { | |
14005 | tree elem_type = TREE_TYPE (type); | |
14006 | machine_mode elem_mode = TYPE_MODE (elem_type); | |
14007 | int elem_size = GET_MODE_SIZE (elem_mode); | |
14008 | ||
14009 | if (elem_size < UNITS_PER_WORD) | |
14010 | { | |
14011 | tree real_part, imag_part; | |
14012 | gimple_seq post = NULL; | |
14013 | ||
14014 | real_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p, | |
14015 | &post); | |
14016 | /* Copy the value into a temporary, lest the formal temporary | |
14017 | be reused out from under us. */ | |
14018 | real_part = get_initialized_tmp_var (real_part, pre_p, &post); | |
14019 | gimple_seq_add_seq (pre_p, post); | |
14020 | ||
14021 | imag_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p, | |
14022 | post_p); | |
14023 | ||
14024 | return build2 (COMPLEX_EXPR, type, real_part, imag_part); | |
14025 | } | |
14026 | } | |
14027 | ||
14028 | return std_gimplify_va_arg_expr (valist, type, pre_p, post_p); | |
14029 | } | |
14030 | ||
14031 | f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node)); | |
14032 | f_fpr = DECL_CHAIN (f_gpr); | |
14033 | f_res = DECL_CHAIN (f_fpr); | |
14034 | f_ovf = DECL_CHAIN (f_res); | |
14035 | f_sav = DECL_CHAIN (f_ovf); | |
14036 | ||
14037 | gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE); | |
14038 | fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist), | |
14039 | f_fpr, NULL_TREE); | |
14040 | ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist), | |
14041 | f_ovf, NULL_TREE); | |
14042 | sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist), | |
14043 | f_sav, NULL_TREE); | |
14044 | ||
14045 | size = int_size_in_bytes (type); | |
14046 | rsize = (size + 3) / 4; | |
14047 | int pad = 4 * rsize - size; | |
14048 | align = 1; | |
14049 | ||
14050 | machine_mode mode = TYPE_MODE (type); | |
14051 | if (abi_v4_pass_in_fpr (mode)) | |
14052 | { | |
14053 | /* FP args go in FP registers, if present. */ | |
14054 | reg = fpr; | |
14055 | n_reg = (size + 7) / 8; | |
14056 | sav_ofs = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4) * 4; | |
14057 | sav_scale = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4); | |
14058 | if (mode != SFmode && mode != SDmode) | |
14059 | align = 8; | |
14060 | } | |
14061 | else | |
14062 | { | |
14063 | /* Otherwise into GP registers. */ | |
14064 | reg = gpr; | |
14065 | n_reg = rsize; | |
14066 | sav_ofs = 0; | |
14067 | sav_scale = 4; | |
14068 | if (n_reg == 2) | |
14069 | align = 8; | |
14070 | } | |
14071 | ||
14072 | /* Pull the value out of the saved registers.... */ | |
14073 | ||
14074 | lab_over = NULL; | |
14075 | addr = create_tmp_var (ptr_type_node, "addr"); | |
14076 | ||
14077 | /* AltiVec vectors never go in registers when -mabi=altivec. */ | |
14078 | if (TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode)) | |
14079 | align = 16; | |
14080 | else | |
14081 | { | |
14082 | lab_false = create_artificial_label (input_location); | |
14083 | lab_over = create_artificial_label (input_location); | |
14084 | ||
14085 | /* Long long and SPE vectors are aligned in the registers. | |
14086 | As are any other 2 gpr item such as complex int due to a | |
14087 | historical mistake. */ | |
14088 | u = reg; | |
14089 | if (n_reg == 2 && reg == gpr) | |
14090 | { | |
14091 | regalign = 1; | |
14092 | u = build2 (BIT_AND_EXPR, TREE_TYPE (reg), unshare_expr (reg), | |
14093 | build_int_cst (TREE_TYPE (reg), n_reg - 1)); | |
14094 | u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg), | |
14095 | unshare_expr (reg), u); | |
14096 | } | |
14097 | /* _Decimal128 is passed in even/odd fpr pairs; the stored | |
14098 | reg number is 0 for f1, so we want to make it odd. */ | |
14099 | else if (reg == fpr && mode == TDmode) | |
14100 | { | |
14101 | t = build2 (BIT_IOR_EXPR, TREE_TYPE (reg), unshare_expr (reg), | |
14102 | build_int_cst (TREE_TYPE (reg), 1)); | |
14103 | u = build2 (MODIFY_EXPR, void_type_node, unshare_expr (reg), t); | |
14104 | } | |
14105 | ||
14106 | t = fold_convert (TREE_TYPE (reg), size_int (8 - n_reg + 1)); | |
14107 | t = build2 (GE_EXPR, boolean_type_node, u, t); | |
14108 | u = build1 (GOTO_EXPR, void_type_node, lab_false); | |
14109 | t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE); | |
14110 | gimplify_and_add (t, pre_p); | |
14111 | ||
14112 | t = sav; | |
14113 | if (sav_ofs) | |
14114 | t = fold_build_pointer_plus_hwi (sav, sav_ofs); | |
14115 | ||
14116 | u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg), unshare_expr (reg), | |
14117 | build_int_cst (TREE_TYPE (reg), n_reg)); | |
14118 | u = fold_convert (sizetype, u); | |
14119 | u = build2 (MULT_EXPR, sizetype, u, size_int (sav_scale)); | |
14120 | t = fold_build_pointer_plus (t, u); | |
14121 | ||
14122 | /* _Decimal32 varargs are located in the second word of the 64-bit | |
14123 | FP register for 32-bit binaries. */ | |
14124 | if (TARGET_32BIT | |
14125 | && TARGET_HARD_FLOAT && TARGET_FPRS | |
14126 | && mode == SDmode) | |
14127 | t = fold_build_pointer_plus_hwi (t, size); | |
14128 | ||
14129 | /* Args are passed right-aligned. */ | |
14130 | if (BYTES_BIG_ENDIAN) | |
14131 | t = fold_build_pointer_plus_hwi (t, pad); | |
14132 | ||
14133 | gimplify_assign (addr, t, pre_p); | |
14134 | ||
14135 | gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over)); | |
14136 | ||
14137 | stmt = gimple_build_label (lab_false); | |
14138 | gimple_seq_add_stmt (pre_p, stmt); | |
14139 | ||
14140 | if ((n_reg == 2 && !regalign) || n_reg > 2) | |
14141 | { | |
14142 | /* Ensure that we don't find any more args in regs. | |
14143 | Alignment has taken care of for special cases. */ | |
14144 | gimplify_assign (reg, build_int_cst (TREE_TYPE (reg), 8), pre_p); | |
14145 | } | |
14146 | } | |
14147 | ||
14148 | /* ... otherwise out of the overflow area. */ | |
14149 | ||
14150 | /* Care for on-stack alignment if needed. */ | |
14151 | t = ovf; | |
14152 | if (align != 1) | |
14153 | { | |
14154 | t = fold_build_pointer_plus_hwi (t, align - 1); | |
14155 | t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, | |
14156 | build_int_cst (TREE_TYPE (t), -align)); | |
14157 | } | |
14158 | ||
14159 | /* Args are passed right-aligned. */ | |
14160 | if (BYTES_BIG_ENDIAN) | |
14161 | t = fold_build_pointer_plus_hwi (t, pad); | |
14162 | ||
14163 | gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue); | |
14164 | ||
14165 | gimplify_assign (unshare_expr (addr), t, pre_p); | |
14166 | ||
14167 | t = fold_build_pointer_plus_hwi (t, size); | |
14168 | gimplify_assign (unshare_expr (ovf), t, pre_p); | |
14169 | ||
14170 | if (lab_over) | |
14171 | { | |
14172 | stmt = gimple_build_label (lab_over); | |
14173 | gimple_seq_add_stmt (pre_p, stmt); | |
14174 | } | |
14175 | ||
14176 | if (STRICT_ALIGNMENT | |
14177 | && (TYPE_ALIGN (type) | |
14178 | > (unsigned) BITS_PER_UNIT * (align < 4 ? 4 : align))) | |
14179 | { | |
14180 | /* The value (of type complex double, for example) may not be | |
14181 | aligned in memory in the saved registers, so copy via a | |
14182 | temporary. (This is the same code as used for SPARC.) */ | |
14183 | tree tmp = create_tmp_var (type, "va_arg_tmp"); | |
14184 | tree dest_addr = build_fold_addr_expr (tmp); | |
14185 | ||
14186 | tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY), | |
14187 | 3, dest_addr, addr, size_int (rsize * 4)); | |
2956d3a8 | 14188 | TREE_ADDRESSABLE (tmp) = 1; |
83349046 SB |
14189 | |
14190 | gimplify_and_add (copy, pre_p); | |
14191 | addr = dest_addr; | |
14192 | } | |
14193 | ||
14194 | addr = fold_convert (ptrtype, addr); | |
14195 | return build_va_arg_indirect_ref (addr); | |
14196 | } | |
14197 | ||
14198 | /* Builtins. */ | |
14199 | ||
14200 | static void | |
14201 | def_builtin (const char *name, tree type, enum rs6000_builtins code) | |
14202 | { | |
14203 | tree t; | |
14204 | unsigned classify = rs6000_builtin_info[(int)code].attr; | |
14205 | const char *attr_string = ""; | |
14206 | ||
14207 | gcc_assert (name != NULL); | |
14208 | gcc_assert (IN_RANGE ((int)code, 0, (int)RS6000_BUILTIN_COUNT)); | |
14209 | ||
14210 | if (rs6000_builtin_decls[(int)code]) | |
14211 | fatal_error (input_location, | |
14212 | "internal error: builtin function %s already processed", name); | |
14213 | ||
14214 | rs6000_builtin_decls[(int)code] = t = | |
14215 | add_builtin_function (name, type, (int)code, BUILT_IN_MD, NULL, NULL_TREE); | |
14216 | ||
14217 | /* Set any special attributes. */ | |
14218 | if ((classify & RS6000_BTC_CONST) != 0) | |
14219 | { | |
14220 | /* const function, function only depends on the inputs. */ | |
14221 | TREE_READONLY (t) = 1; | |
14222 | TREE_NOTHROW (t) = 1; | |
14223 | attr_string = ", const"; | |
14224 | } | |
14225 | else if ((classify & RS6000_BTC_PURE) != 0) | |
14226 | { | |
14227 | /* pure function, function can read global memory, but does not set any | |
14228 | external state. */ | |
14229 | DECL_PURE_P (t) = 1; | |
14230 | TREE_NOTHROW (t) = 1; | |
14231 | attr_string = ", pure"; | |
14232 | } | |
14233 | else if ((classify & RS6000_BTC_FP) != 0) | |
14234 | { | |
14235 | /* Function is a math function. If rounding mode is on, then treat the | |
14236 | function as not reading global memory, but it can have arbitrary side | |
14237 | effects. If it is off, then assume the function is a const function. | |
14238 | This mimics the ATTR_MATHFN_FPROUNDING attribute in | |
14239 | builtin-attribute.def that is used for the math functions. */ | |
14240 | TREE_NOTHROW (t) = 1; | |
14241 | if (flag_rounding_math) | |
14242 | { | |
14243 | DECL_PURE_P (t) = 1; | |
14244 | DECL_IS_NOVOPS (t) = 1; | |
14245 | attr_string = ", fp, pure"; | |
14246 | } | |
14247 | else | |
14248 | { | |
14249 | TREE_READONLY (t) = 1; | |
14250 | attr_string = ", fp, const"; | |
14251 | } | |
14252 | } | |
14253 | else if ((classify & RS6000_BTC_ATTR_MASK) != 0) | |
14254 | gcc_unreachable (); | |
14255 | ||
14256 | if (TARGET_DEBUG_BUILTIN) | |
14257 | fprintf (stderr, "rs6000_builtin, code = %4d, %s%s\n", | |
14258 | (int)code, name, attr_string); | |
14259 | } | |
14260 | ||
14261 | /* Simple ternary operations: VECd = foo (VECa, VECb, VECc). */ | |
14262 | ||
14263 | #undef RS6000_BUILTIN_0 | |
14264 | #undef RS6000_BUILTIN_1 | |
14265 | #undef RS6000_BUILTIN_2 | |
14266 | #undef RS6000_BUILTIN_3 | |
14267 | #undef RS6000_BUILTIN_A | |
14268 | #undef RS6000_BUILTIN_D | |
14269 | #undef RS6000_BUILTIN_E | |
14270 | #undef RS6000_BUILTIN_H | |
14271 | #undef RS6000_BUILTIN_P | |
14272 | #undef RS6000_BUILTIN_Q | |
14273 | #undef RS6000_BUILTIN_S | |
14274 | #undef RS6000_BUILTIN_X | |
14275 | ||
14276 | #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) | |
14277 | #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) | |
14278 | #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) | |
14279 | #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \ | |
14280 | { MASK, ICODE, NAME, ENUM }, | |
14281 | ||
14282 | #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) | |
14283 | #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) | |
14284 | #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) | |
14285 | #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) | |
14286 | #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) | |
14287 | #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) | |
14288 | #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) | |
14289 | #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) | |
14290 | ||
14291 | static const struct builtin_description bdesc_3arg[] = | |
14292 | { | |
14293 | #include "powerpcspe-builtin.def" | |
14294 | }; | |
14295 | ||
14296 | /* DST operations: void foo (void *, const int, const char). */ | |
14297 | ||
14298 | #undef RS6000_BUILTIN_0 | |
14299 | #undef RS6000_BUILTIN_1 | |
14300 | #undef RS6000_BUILTIN_2 | |
14301 | #undef RS6000_BUILTIN_3 | |
14302 | #undef RS6000_BUILTIN_A | |
14303 | #undef RS6000_BUILTIN_D | |
14304 | #undef RS6000_BUILTIN_E | |
14305 | #undef RS6000_BUILTIN_H | |
14306 | #undef RS6000_BUILTIN_P | |
14307 | #undef RS6000_BUILTIN_Q | |
14308 | #undef RS6000_BUILTIN_S | |
14309 | #undef RS6000_BUILTIN_X | |
14310 | ||
14311 | #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) | |
14312 | #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) | |
14313 | #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) | |
14314 | #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) | |
14315 | #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) | |
14316 | #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \ | |
14317 | { MASK, ICODE, NAME, ENUM }, | |
14318 | ||
14319 | #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) | |
14320 | #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) | |
14321 | #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) | |
14322 | #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) | |
14323 | #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) | |
14324 | #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) | |
14325 | ||
14326 | static const struct builtin_description bdesc_dst[] = | |
14327 | { | |
14328 | #include "powerpcspe-builtin.def" | |
14329 | }; | |
14330 | ||
14331 | /* Simple binary operations: VECc = foo (VECa, VECb). */ | |
14332 | ||
14333 | #undef RS6000_BUILTIN_0 | |
14334 | #undef RS6000_BUILTIN_1 | |
14335 | #undef RS6000_BUILTIN_2 | |
14336 | #undef RS6000_BUILTIN_3 | |
14337 | #undef RS6000_BUILTIN_A | |
14338 | #undef RS6000_BUILTIN_D | |
14339 | #undef RS6000_BUILTIN_E | |
14340 | #undef RS6000_BUILTIN_H | |
14341 | #undef RS6000_BUILTIN_P | |
14342 | #undef RS6000_BUILTIN_Q | |
14343 | #undef RS6000_BUILTIN_S | |
14344 | #undef RS6000_BUILTIN_X | |
14345 | ||
14346 | #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) | |
14347 | #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) | |
14348 | #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \ | |
14349 | { MASK, ICODE, NAME, ENUM }, | |
14350 | ||
14351 | #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) | |
14352 | #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) | |
14353 | #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) | |
14354 | #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) | |
14355 | #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) | |
14356 | #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) | |
14357 | #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) | |
14358 | #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) | |
14359 | #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) | |
14360 | ||
14361 | static const struct builtin_description bdesc_2arg[] = | |
14362 | { | |
14363 | #include "powerpcspe-builtin.def" | |
14364 | }; | |
14365 | ||
14366 | #undef RS6000_BUILTIN_0 | |
14367 | #undef RS6000_BUILTIN_1 | |
14368 | #undef RS6000_BUILTIN_2 | |
14369 | #undef RS6000_BUILTIN_3 | |
14370 | #undef RS6000_BUILTIN_A | |
14371 | #undef RS6000_BUILTIN_D | |
14372 | #undef RS6000_BUILTIN_E | |
14373 | #undef RS6000_BUILTIN_H | |
14374 | #undef RS6000_BUILTIN_P | |
14375 | #undef RS6000_BUILTIN_Q | |
14376 | #undef RS6000_BUILTIN_S | |
14377 | #undef RS6000_BUILTIN_X | |
14378 | ||
14379 | #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) | |
14380 | #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) | |
14381 | #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) | |
14382 | #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) | |
14383 | #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) | |
14384 | #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) | |
14385 | #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) | |
14386 | #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) | |
14387 | #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \ | |
14388 | { MASK, ICODE, NAME, ENUM }, | |
14389 | ||
14390 | #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) | |
14391 | #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) | |
14392 | #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) | |
14393 | ||
14394 | /* AltiVec predicates. */ | |
14395 | ||
14396 | static const struct builtin_description bdesc_altivec_preds[] = | |
14397 | { | |
14398 | #include "powerpcspe-builtin.def" | |
14399 | }; | |
14400 | ||
14401 | /* SPE predicates. */ | |
14402 | #undef RS6000_BUILTIN_0 | |
14403 | #undef RS6000_BUILTIN_1 | |
14404 | #undef RS6000_BUILTIN_2 | |
14405 | #undef RS6000_BUILTIN_3 | |
14406 | #undef RS6000_BUILTIN_A | |
14407 | #undef RS6000_BUILTIN_D | |
14408 | #undef RS6000_BUILTIN_E | |
14409 | #undef RS6000_BUILTIN_H | |
14410 | #undef RS6000_BUILTIN_P | |
14411 | #undef RS6000_BUILTIN_Q | |
14412 | #undef RS6000_BUILTIN_S | |
14413 | #undef RS6000_BUILTIN_X | |
14414 | ||
14415 | #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) | |
14416 | #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) | |
14417 | #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) | |
14418 | #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) | |
14419 | #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) | |
14420 | #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) | |
14421 | #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) | |
14422 | #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) | |
14423 | #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) | |
14424 | #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) | |
14425 | #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \ | |
14426 | { MASK, ICODE, NAME, ENUM }, | |
14427 | ||
14428 | #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) | |
14429 | ||
14430 | static const struct builtin_description bdesc_spe_predicates[] = | |
14431 | { | |
14432 | #include "powerpcspe-builtin.def" | |
14433 | }; | |
14434 | ||
14435 | /* SPE evsel predicates. */ | |
14436 | #undef RS6000_BUILTIN_0 | |
14437 | #undef RS6000_BUILTIN_1 | |
14438 | #undef RS6000_BUILTIN_2 | |
14439 | #undef RS6000_BUILTIN_3 | |
14440 | #undef RS6000_BUILTIN_A | |
14441 | #undef RS6000_BUILTIN_D | |
14442 | #undef RS6000_BUILTIN_E | |
14443 | #undef RS6000_BUILTIN_H | |
14444 | #undef RS6000_BUILTIN_P | |
14445 | #undef RS6000_BUILTIN_Q | |
14446 | #undef RS6000_BUILTIN_S | |
14447 | #undef RS6000_BUILTIN_X | |
14448 | ||
14449 | #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) | |
14450 | #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) | |
14451 | #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) | |
14452 | #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) | |
14453 | #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) | |
14454 | #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) | |
14455 | #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \ | |
14456 | { MASK, ICODE, NAME, ENUM }, | |
14457 | ||
14458 | #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) | |
14459 | #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) | |
14460 | #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) | |
14461 | #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) | |
14462 | #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) | |
14463 | ||
14464 | static const struct builtin_description bdesc_spe_evsel[] = | |
14465 | { | |
14466 | #include "powerpcspe-builtin.def" | |
14467 | }; | |
14468 | ||
14469 | /* PAIRED predicates. */ | |
14470 | #undef RS6000_BUILTIN_0 | |
14471 | #undef RS6000_BUILTIN_1 | |
14472 | #undef RS6000_BUILTIN_2 | |
14473 | #undef RS6000_BUILTIN_3 | |
14474 | #undef RS6000_BUILTIN_A | |
14475 | #undef RS6000_BUILTIN_D | |
14476 | #undef RS6000_BUILTIN_E | |
14477 | #undef RS6000_BUILTIN_H | |
14478 | #undef RS6000_BUILTIN_P | |
14479 | #undef RS6000_BUILTIN_Q | |
14480 | #undef RS6000_BUILTIN_S | |
14481 | #undef RS6000_BUILTIN_X | |
14482 | ||
14483 | #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) | |
14484 | #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) | |
14485 | #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) | |
14486 | #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) | |
14487 | #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) | |
14488 | #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) | |
14489 | #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) | |
14490 | #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) | |
14491 | #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) | |
14492 | #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \ | |
14493 | { MASK, ICODE, NAME, ENUM }, | |
14494 | ||
14495 | #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) | |
14496 | #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) | |
14497 | ||
14498 | static const struct builtin_description bdesc_paired_preds[] = | |
14499 | { | |
14500 | #include "powerpcspe-builtin.def" | |
14501 | }; | |
14502 | ||
14503 | /* ABS* operations. */ | |
14504 | ||
14505 | #undef RS6000_BUILTIN_0 | |
14506 | #undef RS6000_BUILTIN_1 | |
14507 | #undef RS6000_BUILTIN_2 | |
14508 | #undef RS6000_BUILTIN_3 | |
14509 | #undef RS6000_BUILTIN_A | |
14510 | #undef RS6000_BUILTIN_D | |
14511 | #undef RS6000_BUILTIN_E | |
14512 | #undef RS6000_BUILTIN_H | |
14513 | #undef RS6000_BUILTIN_P | |
14514 | #undef RS6000_BUILTIN_Q | |
14515 | #undef RS6000_BUILTIN_S | |
14516 | #undef RS6000_BUILTIN_X | |
14517 | ||
14518 | #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) | |
14519 | #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) | |
14520 | #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) | |
14521 | #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) | |
14522 | #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \ | |
14523 | { MASK, ICODE, NAME, ENUM }, | |
14524 | ||
14525 | #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) | |
14526 | #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) | |
14527 | #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) | |
14528 | #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) | |
14529 | #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) | |
14530 | #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) | |
14531 | #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) | |
14532 | ||
14533 | static const struct builtin_description bdesc_abs[] = | |
14534 | { | |
14535 | #include "powerpcspe-builtin.def" | |
14536 | }; | |
14537 | ||
14538 | /* Simple unary operations: VECb = foo (unsigned literal) or VECb = | |
14539 | foo (VECa). */ | |
14540 | ||
14541 | #undef RS6000_BUILTIN_0 | |
14542 | #undef RS6000_BUILTIN_1 | |
14543 | #undef RS6000_BUILTIN_2 | |
14544 | #undef RS6000_BUILTIN_3 | |
14545 | #undef RS6000_BUILTIN_A | |
14546 | #undef RS6000_BUILTIN_D | |
14547 | #undef RS6000_BUILTIN_E | |
14548 | #undef RS6000_BUILTIN_H | |
14549 | #undef RS6000_BUILTIN_P | |
14550 | #undef RS6000_BUILTIN_Q | |
14551 | #undef RS6000_BUILTIN_S | |
14552 | #undef RS6000_BUILTIN_X | |
14553 | ||
14554 | #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) | |
14555 | #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \ | |
14556 | { MASK, ICODE, NAME, ENUM }, | |
14557 | ||
14558 | #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) | |
14559 | #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) | |
14560 | #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) | |
14561 | #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) | |
14562 | #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) | |
14563 | #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) | |
14564 | #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) | |
14565 | #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) | |
14566 | #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) | |
14567 | #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) | |
14568 | ||
14569 | static const struct builtin_description bdesc_1arg[] = | |
14570 | { | |
14571 | #include "powerpcspe-builtin.def" | |
14572 | }; | |
14573 | ||
14574 | /* Simple no-argument operations: result = __builtin_darn_32 () */ | |
14575 | ||
14576 | #undef RS6000_BUILTIN_0 | |
14577 | #undef RS6000_BUILTIN_1 | |
14578 | #undef RS6000_BUILTIN_2 | |
14579 | #undef RS6000_BUILTIN_3 | |
14580 | #undef RS6000_BUILTIN_A | |
14581 | #undef RS6000_BUILTIN_D | |
14582 | #undef RS6000_BUILTIN_E | |
14583 | #undef RS6000_BUILTIN_H | |
14584 | #undef RS6000_BUILTIN_P | |
14585 | #undef RS6000_BUILTIN_Q | |
14586 | #undef RS6000_BUILTIN_S | |
14587 | #undef RS6000_BUILTIN_X | |
14588 | ||
14589 | #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) \ | |
14590 | { MASK, ICODE, NAME, ENUM }, | |
14591 | ||
14592 | #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) | |
14593 | #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) | |
14594 | #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) | |
14595 | #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) | |
14596 | #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) | |
14597 | #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) | |
14598 | #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) | |
14599 | #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) | |
14600 | #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) | |
14601 | #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) | |
14602 | #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) | |
14603 | ||
14604 | static const struct builtin_description bdesc_0arg[] = | |
14605 | { | |
14606 | #include "powerpcspe-builtin.def" | |
14607 | }; | |
14608 | ||
14609 | /* HTM builtins. */ | |
14610 | #undef RS6000_BUILTIN_0 | |
14611 | #undef RS6000_BUILTIN_1 | |
14612 | #undef RS6000_BUILTIN_2 | |
14613 | #undef RS6000_BUILTIN_3 | |
14614 | #undef RS6000_BUILTIN_A | |
14615 | #undef RS6000_BUILTIN_D | |
14616 | #undef RS6000_BUILTIN_E | |
14617 | #undef RS6000_BUILTIN_H | |
14618 | #undef RS6000_BUILTIN_P | |
14619 | #undef RS6000_BUILTIN_Q | |
14620 | #undef RS6000_BUILTIN_S | |
14621 | #undef RS6000_BUILTIN_X | |
14622 | ||
14623 | #define RS6000_BUILTIN_0(ENUM, NAME, MASK, ATTR, ICODE) | |
14624 | #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) | |
14625 | #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) | |
14626 | #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) | |
14627 | #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) | |
14628 | #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) | |
14629 | #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) | |
14630 | #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \ | |
14631 | { MASK, ICODE, NAME, ENUM }, | |
14632 | ||
14633 | #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) | |
14634 | #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) | |
14635 | #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) | |
14636 | #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) | |
14637 | ||
14638 | static const struct builtin_description bdesc_htm[] = | |
14639 | { | |
14640 | #include "powerpcspe-builtin.def" | |
14641 | }; | |
14642 | ||
14643 | #undef RS6000_BUILTIN_0 | |
14644 | #undef RS6000_BUILTIN_1 | |
14645 | #undef RS6000_BUILTIN_2 | |
14646 | #undef RS6000_BUILTIN_3 | |
14647 | #undef RS6000_BUILTIN_A | |
14648 | #undef RS6000_BUILTIN_D | |
14649 | #undef RS6000_BUILTIN_E | |
14650 | #undef RS6000_BUILTIN_H | |
14651 | #undef RS6000_BUILTIN_P | |
14652 | #undef RS6000_BUILTIN_Q | |
14653 | #undef RS6000_BUILTIN_S | |
14654 | ||
14655 | /* Return true if a builtin function is overloaded. */ | |
14656 | bool | |
14657 | rs6000_overloaded_builtin_p (enum rs6000_builtins fncode) | |
14658 | { | |
14659 | return (rs6000_builtin_info[(int)fncode].attr & RS6000_BTC_OVERLOADED) != 0; | |
14660 | } | |
14661 | ||
14662 | const char * | |
14663 | rs6000_overloaded_builtin_name (enum rs6000_builtins fncode) | |
14664 | { | |
14665 | return rs6000_builtin_info[(int)fncode].name; | |
14666 | } | |
14667 | ||
14668 | /* Expand an expression EXP that calls a builtin without arguments. */ | |
14669 | static rtx | |
14670 | rs6000_expand_zeroop_builtin (enum insn_code icode, rtx target) | |
14671 | { | |
14672 | rtx pat; | |
14673 | machine_mode tmode = insn_data[icode].operand[0].mode; | |
14674 | ||
14675 | if (icode == CODE_FOR_nothing) | |
14676 | /* Builtin not supported on this processor. */ | |
14677 | return 0; | |
14678 | ||
14679 | if (target == 0 | |
14680 | || GET_MODE (target) != tmode | |
14681 | || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) | |
14682 | target = gen_reg_rtx (tmode); | |
14683 | ||
14684 | pat = GEN_FCN (icode) (target); | |
14685 | if (! pat) | |
14686 | return 0; | |
14687 | emit_insn (pat); | |
14688 | ||
14689 | return target; | |
14690 | } | |
14691 | ||
14692 | ||
14693 | static rtx | |
14694 | rs6000_expand_mtfsf_builtin (enum insn_code icode, tree exp) | |
14695 | { | |
14696 | rtx pat; | |
14697 | tree arg0 = CALL_EXPR_ARG (exp, 0); | |
14698 | tree arg1 = CALL_EXPR_ARG (exp, 1); | |
14699 | rtx op0 = expand_normal (arg0); | |
14700 | rtx op1 = expand_normal (arg1); | |
14701 | machine_mode mode0 = insn_data[icode].operand[0].mode; | |
14702 | machine_mode mode1 = insn_data[icode].operand[1].mode; | |
14703 | ||
14704 | if (icode == CODE_FOR_nothing) | |
14705 | /* Builtin not supported on this processor. */ | |
14706 | return 0; | |
14707 | ||
14708 | /* If we got invalid arguments bail out before generating bad rtl. */ | |
14709 | if (arg0 == error_mark_node || arg1 == error_mark_node) | |
14710 | return const0_rtx; | |
14711 | ||
14712 | if (GET_CODE (op0) != CONST_INT | |
14713 | || INTVAL (op0) > 255 | |
14714 | || INTVAL (op0) < 0) | |
14715 | { | |
14716 | error ("argument 1 must be an 8-bit field value"); | |
14717 | return const0_rtx; | |
14718 | } | |
14719 | ||
14720 | if (! (*insn_data[icode].operand[0].predicate) (op0, mode0)) | |
14721 | op0 = copy_to_mode_reg (mode0, op0); | |
14722 | ||
14723 | if (! (*insn_data[icode].operand[1].predicate) (op1, mode1)) | |
14724 | op1 = copy_to_mode_reg (mode1, op1); | |
14725 | ||
14726 | pat = GEN_FCN (icode) (op0, op1); | |
14727 | if (! pat) | |
14728 | return const0_rtx; | |
14729 | emit_insn (pat); | |
14730 | ||
14731 | return NULL_RTX; | |
14732 | } | |
14733 | ||
14734 | static rtx | |
14735 | rs6000_expand_unop_builtin (enum insn_code icode, tree exp, rtx target) | |
14736 | { | |
14737 | rtx pat; | |
14738 | tree arg0 = CALL_EXPR_ARG (exp, 0); | |
14739 | rtx op0 = expand_normal (arg0); | |
14740 | machine_mode tmode = insn_data[icode].operand[0].mode; | |
14741 | machine_mode mode0 = insn_data[icode].operand[1].mode; | |
14742 | ||
14743 | if (icode == CODE_FOR_nothing) | |
14744 | /* Builtin not supported on this processor. */ | |
14745 | return 0; | |
14746 | ||
14747 | /* If we got invalid arguments bail out before generating bad rtl. */ | |
14748 | if (arg0 == error_mark_node) | |
14749 | return const0_rtx; | |
14750 | ||
14751 | if (icode == CODE_FOR_altivec_vspltisb | |
14752 | || icode == CODE_FOR_altivec_vspltish | |
14753 | || icode == CODE_FOR_altivec_vspltisw | |
14754 | || icode == CODE_FOR_spe_evsplatfi | |
14755 | || icode == CODE_FOR_spe_evsplati) | |
14756 | { | |
14757 | /* Only allow 5-bit *signed* literals. */ | |
14758 | if (GET_CODE (op0) != CONST_INT | |
14759 | || INTVAL (op0) > 15 | |
14760 | || INTVAL (op0) < -16) | |
14761 | { | |
14762 | error ("argument 1 must be a 5-bit signed literal"); | |
14763 | return CONST0_RTX (tmode); | |
14764 | } | |
14765 | } | |
14766 | ||
14767 | if (target == 0 | |
14768 | || GET_MODE (target) != tmode | |
14769 | || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) | |
14770 | target = gen_reg_rtx (tmode); | |
14771 | ||
14772 | if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) | |
14773 | op0 = copy_to_mode_reg (mode0, op0); | |
14774 | ||
14775 | pat = GEN_FCN (icode) (target, op0); | |
14776 | if (! pat) | |
14777 | return 0; | |
14778 | emit_insn (pat); | |
14779 | ||
14780 | return target; | |
14781 | } | |
14782 | ||
14783 | static rtx | |
14784 | altivec_expand_abs_builtin (enum insn_code icode, tree exp, rtx target) | |
14785 | { | |
14786 | rtx pat, scratch1, scratch2; | |
14787 | tree arg0 = CALL_EXPR_ARG (exp, 0); | |
14788 | rtx op0 = expand_normal (arg0); | |
14789 | machine_mode tmode = insn_data[icode].operand[0].mode; | |
14790 | machine_mode mode0 = insn_data[icode].operand[1].mode; | |
14791 | ||
14792 | /* If we have invalid arguments, bail out before generating bad rtl. */ | |
14793 | if (arg0 == error_mark_node) | |
14794 | return const0_rtx; | |
14795 | ||
14796 | if (target == 0 | |
14797 | || GET_MODE (target) != tmode | |
14798 | || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) | |
14799 | target = gen_reg_rtx (tmode); | |
14800 | ||
14801 | if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) | |
14802 | op0 = copy_to_mode_reg (mode0, op0); | |
14803 | ||
14804 | scratch1 = gen_reg_rtx (mode0); | |
14805 | scratch2 = gen_reg_rtx (mode0); | |
14806 | ||
14807 | pat = GEN_FCN (icode) (target, op0, scratch1, scratch2); | |
14808 | if (! pat) | |
14809 | return 0; | |
14810 | emit_insn (pat); | |
14811 | ||
14812 | return target; | |
14813 | } | |
14814 | ||
14815 | static rtx | |
14816 | rs6000_expand_binop_builtin (enum insn_code icode, tree exp, rtx target) | |
14817 | { | |
14818 | rtx pat; | |
14819 | tree arg0 = CALL_EXPR_ARG (exp, 0); | |
14820 | tree arg1 = CALL_EXPR_ARG (exp, 1); | |
14821 | rtx op0 = expand_normal (arg0); | |
14822 | rtx op1 = expand_normal (arg1); | |
14823 | machine_mode tmode = insn_data[icode].operand[0].mode; | |
14824 | machine_mode mode0 = insn_data[icode].operand[1].mode; | |
14825 | machine_mode mode1 = insn_data[icode].operand[2].mode; | |
14826 | ||
14827 | if (icode == CODE_FOR_nothing) | |
14828 | /* Builtin not supported on this processor. */ | |
14829 | return 0; | |
14830 | ||
14831 | /* If we got invalid arguments bail out before generating bad rtl. */ | |
14832 | if (arg0 == error_mark_node || arg1 == error_mark_node) | |
14833 | return const0_rtx; | |
14834 | ||
14835 | if (icode == CODE_FOR_altivec_vcfux | |
14836 | || icode == CODE_FOR_altivec_vcfsx | |
14837 | || icode == CODE_FOR_altivec_vctsxs | |
14838 | || icode == CODE_FOR_altivec_vctuxs | |
14839 | || icode == CODE_FOR_altivec_vspltb | |
14840 | || icode == CODE_FOR_altivec_vsplth | |
14841 | || icode == CODE_FOR_altivec_vspltw | |
14842 | || icode == CODE_FOR_spe_evaddiw | |
14843 | || icode == CODE_FOR_spe_evldd | |
14844 | || icode == CODE_FOR_spe_evldh | |
14845 | || icode == CODE_FOR_spe_evldw | |
14846 | || icode == CODE_FOR_spe_evlhhesplat | |
14847 | || icode == CODE_FOR_spe_evlhhossplat | |
14848 | || icode == CODE_FOR_spe_evlhhousplat | |
14849 | || icode == CODE_FOR_spe_evlwhe | |
14850 | || icode == CODE_FOR_spe_evlwhos | |
14851 | || icode == CODE_FOR_spe_evlwhou | |
14852 | || icode == CODE_FOR_spe_evlwhsplat | |
14853 | || icode == CODE_FOR_spe_evlwwsplat | |
14854 | || icode == CODE_FOR_spe_evrlwi | |
14855 | || icode == CODE_FOR_spe_evslwi | |
14856 | || icode == CODE_FOR_spe_evsrwis | |
14857 | || icode == CODE_FOR_spe_evsubifw | |
14858 | || icode == CODE_FOR_spe_evsrwiu) | |
14859 | { | |
14860 | /* Only allow 5-bit unsigned literals. */ | |
14861 | STRIP_NOPS (arg1); | |
14862 | if (TREE_CODE (arg1) != INTEGER_CST | |
14863 | || TREE_INT_CST_LOW (arg1) & ~0x1f) | |
14864 | { | |
14865 | error ("argument 2 must be a 5-bit unsigned literal"); | |
14866 | return CONST0_RTX (tmode); | |
14867 | } | |
14868 | } | |
14869 | else if (icode == CODE_FOR_dfptstsfi_eq_dd | |
14870 | || icode == CODE_FOR_dfptstsfi_lt_dd | |
14871 | || icode == CODE_FOR_dfptstsfi_gt_dd | |
14872 | || icode == CODE_FOR_dfptstsfi_unordered_dd | |
14873 | || icode == CODE_FOR_dfptstsfi_eq_td | |
14874 | || icode == CODE_FOR_dfptstsfi_lt_td | |
14875 | || icode == CODE_FOR_dfptstsfi_gt_td | |
14876 | || icode == CODE_FOR_dfptstsfi_unordered_td) | |
14877 | { | |
14878 | /* Only allow 6-bit unsigned literals. */ | |
14879 | STRIP_NOPS (arg0); | |
14880 | if (TREE_CODE (arg0) != INTEGER_CST | |
14881 | || !IN_RANGE (TREE_INT_CST_LOW (arg0), 0, 63)) | |
14882 | { | |
14883 | error ("argument 1 must be a 6-bit unsigned literal"); | |
14884 | return CONST0_RTX (tmode); | |
14885 | } | |
14886 | } | |
14887 | else if (icode == CODE_FOR_xststdcdp | |
14888 | || icode == CODE_FOR_xststdcsp | |
14889 | || icode == CODE_FOR_xvtstdcdp | |
14890 | || icode == CODE_FOR_xvtstdcsp) | |
14891 | { | |
14892 | /* Only allow 7-bit unsigned literals. */ | |
14893 | STRIP_NOPS (arg1); | |
14894 | if (TREE_CODE (arg1) != INTEGER_CST | |
14895 | || !IN_RANGE (TREE_INT_CST_LOW (arg1), 0, 127)) | |
14896 | { | |
14897 | error ("argument 2 must be a 7-bit unsigned literal"); | |
14898 | return CONST0_RTX (tmode); | |
14899 | } | |
14900 | } | |
14901 | ||
14902 | if (target == 0 | |
14903 | || GET_MODE (target) != tmode | |
14904 | || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) | |
14905 | target = gen_reg_rtx (tmode); | |
14906 | ||
14907 | if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) | |
14908 | op0 = copy_to_mode_reg (mode0, op0); | |
14909 | if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) | |
14910 | op1 = copy_to_mode_reg (mode1, op1); | |
14911 | ||
14912 | pat = GEN_FCN (icode) (target, op0, op1); | |
14913 | if (! pat) | |
14914 | return 0; | |
14915 | emit_insn (pat); | |
14916 | ||
14917 | return target; | |
14918 | } | |
14919 | ||
14920 | static rtx | |
14921 | altivec_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target) | |
14922 | { | |
14923 | rtx pat, scratch; | |
14924 | tree cr6_form = CALL_EXPR_ARG (exp, 0); | |
14925 | tree arg0 = CALL_EXPR_ARG (exp, 1); | |
14926 | tree arg1 = CALL_EXPR_ARG (exp, 2); | |
14927 | rtx op0 = expand_normal (arg0); | |
14928 | rtx op1 = expand_normal (arg1); | |
14929 | machine_mode tmode = SImode; | |
14930 | machine_mode mode0 = insn_data[icode].operand[1].mode; | |
14931 | machine_mode mode1 = insn_data[icode].operand[2].mode; | |
14932 | int cr6_form_int; | |
14933 | ||
14934 | if (TREE_CODE (cr6_form) != INTEGER_CST) | |
14935 | { | |
14936 | error ("argument 1 of __builtin_altivec_predicate must be a constant"); | |
14937 | return const0_rtx; | |
14938 | } | |
14939 | else | |
14940 | cr6_form_int = TREE_INT_CST_LOW (cr6_form); | |
14941 | ||
14942 | gcc_assert (mode0 == mode1); | |
14943 | ||
14944 | /* If we have invalid arguments, bail out before generating bad rtl. */ | |
14945 | if (arg0 == error_mark_node || arg1 == error_mark_node) | |
14946 | return const0_rtx; | |
14947 | ||
14948 | if (target == 0 | |
14949 | || GET_MODE (target) != tmode | |
14950 | || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) | |
14951 | target = gen_reg_rtx (tmode); | |
14952 | ||
14953 | if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) | |
14954 | op0 = copy_to_mode_reg (mode0, op0); | |
14955 | if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) | |
14956 | op1 = copy_to_mode_reg (mode1, op1); | |
14957 | ||
14958 | /* Note that for many of the relevant operations (e.g. cmpne or | |
14959 | cmpeq) with float or double operands, it makes more sense for the | |
14960 | mode of the allocated scratch register to select a vector of | |
14961 | integer. But the choice to copy the mode of operand 0 was made | |
14962 | long ago and there are no plans to change it. */ | |
14963 | scratch = gen_reg_rtx (mode0); | |
14964 | ||
14965 | pat = GEN_FCN (icode) (scratch, op0, op1); | |
14966 | if (! pat) | |
14967 | return 0; | |
14968 | emit_insn (pat); | |
14969 | ||
14970 | /* The vec_any* and vec_all* predicates use the same opcodes for two | |
14971 | different operations, but the bits in CR6 will be different | |
14972 | depending on what information we want. So we have to play tricks | |
14973 | with CR6 to get the right bits out. | |
14974 | ||
14975 | If you think this is disgusting, look at the specs for the | |
14976 | AltiVec predicates. */ | |
14977 | ||
14978 | switch (cr6_form_int) | |
14979 | { | |
14980 | case 0: | |
14981 | emit_insn (gen_cr6_test_for_zero (target)); | |
14982 | break; | |
14983 | case 1: | |
14984 | emit_insn (gen_cr6_test_for_zero_reverse (target)); | |
14985 | break; | |
14986 | case 2: | |
14987 | emit_insn (gen_cr6_test_for_lt (target)); | |
14988 | break; | |
14989 | case 3: | |
14990 | emit_insn (gen_cr6_test_for_lt_reverse (target)); | |
14991 | break; | |
14992 | default: | |
14993 | error ("argument 1 of __builtin_altivec_predicate is out of range"); | |
14994 | break; | |
14995 | } | |
14996 | ||
14997 | return target; | |
14998 | } | |
14999 | ||
15000 | static rtx | |
15001 | paired_expand_lv_builtin (enum insn_code icode, tree exp, rtx target) | |
15002 | { | |
15003 | rtx pat, addr; | |
15004 | tree arg0 = CALL_EXPR_ARG (exp, 0); | |
15005 | tree arg1 = CALL_EXPR_ARG (exp, 1); | |
15006 | machine_mode tmode = insn_data[icode].operand[0].mode; | |
15007 | machine_mode mode0 = Pmode; | |
15008 | machine_mode mode1 = Pmode; | |
15009 | rtx op0 = expand_normal (arg0); | |
15010 | rtx op1 = expand_normal (arg1); | |
15011 | ||
15012 | if (icode == CODE_FOR_nothing) | |
15013 | /* Builtin not supported on this processor. */ | |
15014 | return 0; | |
15015 | ||
15016 | /* If we got invalid arguments bail out before generating bad rtl. */ | |
15017 | if (arg0 == error_mark_node || arg1 == error_mark_node) | |
15018 | return const0_rtx; | |
15019 | ||
15020 | if (target == 0 | |
15021 | || GET_MODE (target) != tmode | |
15022 | || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) | |
15023 | target = gen_reg_rtx (tmode); | |
15024 | ||
15025 | op1 = copy_to_mode_reg (mode1, op1); | |
15026 | ||
15027 | if (op0 == const0_rtx) | |
15028 | { | |
15029 | addr = gen_rtx_MEM (tmode, op1); | |
15030 | } | |
15031 | else | |
15032 | { | |
15033 | op0 = copy_to_mode_reg (mode0, op0); | |
15034 | addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op0, op1)); | |
15035 | } | |
15036 | ||
15037 | pat = GEN_FCN (icode) (target, addr); | |
15038 | ||
15039 | if (! pat) | |
15040 | return 0; | |
15041 | emit_insn (pat); | |
15042 | ||
15043 | return target; | |
15044 | } | |
15045 | ||
15046 | /* Return a constant vector for use as a little-endian permute control vector | |
15047 | to reverse the order of elements of the given vector mode. */ | |
15048 | static rtx | |
15049 | swap_selector_for_mode (machine_mode mode) | |
15050 | { | |
15051 | /* These are little endian vectors, so their elements are reversed | |
15052 | from what you would normally expect for a permute control vector. */ | |
15053 | unsigned int swap2[16] = {7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8}; | |
15054 | unsigned int swap4[16] = {3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12}; | |
15055 | unsigned int swap8[16] = {1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14}; | |
15056 | unsigned int swap16[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; | |
15057 | unsigned int *swaparray, i; | |
15058 | rtx perm[16]; | |
15059 | ||
15060 | switch (mode) | |
15061 | { | |
4e10a5a7 RS |
15062 | case E_V2DFmode: |
15063 | case E_V2DImode: | |
83349046 SB |
15064 | swaparray = swap2; |
15065 | break; | |
4e10a5a7 RS |
15066 | case E_V4SFmode: |
15067 | case E_V4SImode: | |
83349046 SB |
15068 | swaparray = swap4; |
15069 | break; | |
4e10a5a7 | 15070 | case E_V8HImode: |
83349046 SB |
15071 | swaparray = swap8; |
15072 | break; | |
4e10a5a7 | 15073 | case E_V16QImode: |
83349046 SB |
15074 | swaparray = swap16; |
15075 | break; | |
15076 | default: | |
15077 | gcc_unreachable (); | |
15078 | } | |
15079 | ||
15080 | for (i = 0; i < 16; ++i) | |
15081 | perm[i] = GEN_INT (swaparray[i]); | |
15082 | ||
15083 | return force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm))); | |
15084 | } | |
15085 | ||
15086 | /* Generate code for an "lvxl", or "lve*x" built-in for a little endian target | |
15087 | with -maltivec=be specified. Issue the load followed by an element- | |
15088 | reversing permute. */ | |
15089 | void | |
15090 | altivec_expand_lvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec) | |
15091 | { | |
15092 | rtx tmp = gen_reg_rtx (mode); | |
15093 | rtx load = gen_rtx_SET (tmp, op1); | |
15094 | rtx lvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec); | |
15095 | rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, load, lvx)); | |
15096 | rtx sel = swap_selector_for_mode (mode); | |
15097 | rtx vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, tmp, tmp, sel), UNSPEC_VPERM); | |
15098 | ||
15099 | gcc_assert (REG_P (op0)); | |
15100 | emit_insn (par); | |
15101 | emit_insn (gen_rtx_SET (op0, vperm)); | |
15102 | } | |
15103 | ||
15104 | /* Generate code for a "stvxl" built-in for a little endian target with | |
15105 | -maltivec=be specified. Issue the store preceded by an element-reversing | |
15106 | permute. */ | |
15107 | void | |
15108 | altivec_expand_stvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec) | |
15109 | { | |
15110 | rtx tmp = gen_reg_rtx (mode); | |
15111 | rtx store = gen_rtx_SET (op0, tmp); | |
15112 | rtx stvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec); | |
15113 | rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, store, stvx)); | |
15114 | rtx sel = swap_selector_for_mode (mode); | |
15115 | rtx vperm; | |
15116 | ||
15117 | gcc_assert (REG_P (op1)); | |
15118 | vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM); | |
15119 | emit_insn (gen_rtx_SET (tmp, vperm)); | |
15120 | emit_insn (par); | |
15121 | } | |
15122 | ||
15123 | /* Generate code for a "stve*x" built-in for a little endian target with -maltivec=be | |
15124 | specified. Issue the store preceded by an element-reversing permute. */ | |
15125 | void | |
15126 | altivec_expand_stvex_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec) | |
15127 | { | |
15128 | machine_mode inner_mode = GET_MODE_INNER (mode); | |
15129 | rtx tmp = gen_reg_rtx (mode); | |
15130 | rtx stvx = gen_rtx_UNSPEC (inner_mode, gen_rtvec (1, tmp), unspec); | |
15131 | rtx sel = swap_selector_for_mode (mode); | |
15132 | rtx vperm; | |
15133 | ||
15134 | gcc_assert (REG_P (op1)); | |
15135 | vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM); | |
15136 | emit_insn (gen_rtx_SET (tmp, vperm)); | |
15137 | emit_insn (gen_rtx_SET (op0, stvx)); | |
15138 | } | |
15139 | ||
15140 | static rtx | |
15141 | altivec_expand_lv_builtin (enum insn_code icode, tree exp, rtx target, bool blk) | |
15142 | { | |
15143 | rtx pat, addr; | |
15144 | tree arg0 = CALL_EXPR_ARG (exp, 0); | |
15145 | tree arg1 = CALL_EXPR_ARG (exp, 1); | |
15146 | machine_mode tmode = insn_data[icode].operand[0].mode; | |
15147 | machine_mode mode0 = Pmode; | |
15148 | machine_mode mode1 = Pmode; | |
15149 | rtx op0 = expand_normal (arg0); | |
15150 | rtx op1 = expand_normal (arg1); | |
15151 | ||
15152 | if (icode == CODE_FOR_nothing) | |
15153 | /* Builtin not supported on this processor. */ | |
15154 | return 0; | |
15155 | ||
15156 | /* If we got invalid arguments bail out before generating bad rtl. */ | |
15157 | if (arg0 == error_mark_node || arg1 == error_mark_node) | |
15158 | return const0_rtx; | |
15159 | ||
15160 | if (target == 0 | |
15161 | || GET_MODE (target) != tmode | |
15162 | || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) | |
15163 | target = gen_reg_rtx (tmode); | |
15164 | ||
15165 | op1 = copy_to_mode_reg (mode1, op1); | |
15166 | ||
15167 | /* For LVX, express the RTL accurately by ANDing the address with -16. | |
15168 | LVXL and LVE*X expand to use UNSPECs to hide their special behavior, | |
15169 | so the raw address is fine. */ | |
15170 | if (icode == CODE_FOR_altivec_lvx_v2df_2op | |
15171 | || icode == CODE_FOR_altivec_lvx_v2di_2op | |
15172 | || icode == CODE_FOR_altivec_lvx_v4sf_2op | |
15173 | || icode == CODE_FOR_altivec_lvx_v4si_2op | |
15174 | || icode == CODE_FOR_altivec_lvx_v8hi_2op | |
15175 | || icode == CODE_FOR_altivec_lvx_v16qi_2op) | |
15176 | { | |
15177 | rtx rawaddr; | |
15178 | if (op0 == const0_rtx) | |
15179 | rawaddr = op1; | |
15180 | else | |
15181 | { | |
15182 | op0 = copy_to_mode_reg (mode0, op0); | |
15183 | rawaddr = gen_rtx_PLUS (Pmode, op1, op0); | |
15184 | } | |
15185 | addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16)); | |
15186 | addr = gen_rtx_MEM (blk ? BLKmode : tmode, addr); | |
15187 | ||
15188 | /* For -maltivec=be, emit the load and follow it up with a | |
15189 | permute to swap the elements. */ | |
15190 | if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) | |
15191 | { | |
15192 | rtx temp = gen_reg_rtx (tmode); | |
15193 | emit_insn (gen_rtx_SET (temp, addr)); | |
15194 | ||
15195 | rtx sel = swap_selector_for_mode (tmode); | |
15196 | rtx vperm = gen_rtx_UNSPEC (tmode, gen_rtvec (3, temp, temp, sel), | |
15197 | UNSPEC_VPERM); | |
15198 | emit_insn (gen_rtx_SET (target, vperm)); | |
15199 | } | |
15200 | else | |
15201 | emit_insn (gen_rtx_SET (target, addr)); | |
15202 | } | |
15203 | else | |
15204 | { | |
15205 | if (op0 == const0_rtx) | |
15206 | addr = gen_rtx_MEM (blk ? BLKmode : tmode, op1); | |
15207 | else | |
15208 | { | |
15209 | op0 = copy_to_mode_reg (mode0, op0); | |
15210 | addr = gen_rtx_MEM (blk ? BLKmode : tmode, | |
15211 | gen_rtx_PLUS (Pmode, op1, op0)); | |
15212 | } | |
15213 | ||
15214 | pat = GEN_FCN (icode) (target, addr); | |
15215 | if (! pat) | |
15216 | return 0; | |
15217 | emit_insn (pat); | |
15218 | } | |
15219 | ||
15220 | return target; | |
15221 | } | |
15222 | ||
15223 | static rtx | |
15224 | spe_expand_stv_builtin (enum insn_code icode, tree exp) | |
15225 | { | |
15226 | tree arg0 = CALL_EXPR_ARG (exp, 0); | |
15227 | tree arg1 = CALL_EXPR_ARG (exp, 1); | |
15228 | tree arg2 = CALL_EXPR_ARG (exp, 2); | |
15229 | rtx op0 = expand_normal (arg0); | |
15230 | rtx op1 = expand_normal (arg1); | |
15231 | rtx op2 = expand_normal (arg2); | |
15232 | rtx pat; | |
15233 | machine_mode mode0 = insn_data[icode].operand[0].mode; | |
15234 | machine_mode mode1 = insn_data[icode].operand[1].mode; | |
15235 | machine_mode mode2 = insn_data[icode].operand[2].mode; | |
15236 | ||
15237 | /* Invalid arguments. Bail before doing anything stoopid! */ | |
15238 | if (arg0 == error_mark_node | |
15239 | || arg1 == error_mark_node | |
15240 | || arg2 == error_mark_node) | |
15241 | return const0_rtx; | |
15242 | ||
15243 | if (! (*insn_data[icode].operand[2].predicate) (op0, mode2)) | |
15244 | op0 = copy_to_mode_reg (mode2, op0); | |
15245 | if (! (*insn_data[icode].operand[0].predicate) (op1, mode0)) | |
15246 | op1 = copy_to_mode_reg (mode0, op1); | |
15247 | if (! (*insn_data[icode].operand[1].predicate) (op2, mode1)) | |
15248 | op2 = copy_to_mode_reg (mode1, op2); | |
15249 | ||
15250 | pat = GEN_FCN (icode) (op1, op2, op0); | |
15251 | if (pat) | |
15252 | emit_insn (pat); | |
15253 | return NULL_RTX; | |
15254 | } | |
15255 | ||
15256 | static rtx | |
15257 | paired_expand_stv_builtin (enum insn_code icode, tree exp) | |
15258 | { | |
15259 | tree arg0 = CALL_EXPR_ARG (exp, 0); | |
15260 | tree arg1 = CALL_EXPR_ARG (exp, 1); | |
15261 | tree arg2 = CALL_EXPR_ARG (exp, 2); | |
15262 | rtx op0 = expand_normal (arg0); | |
15263 | rtx op1 = expand_normal (arg1); | |
15264 | rtx op2 = expand_normal (arg2); | |
15265 | rtx pat, addr; | |
15266 | machine_mode tmode = insn_data[icode].operand[0].mode; | |
15267 | machine_mode mode1 = Pmode; | |
15268 | machine_mode mode2 = Pmode; | |
15269 | ||
15270 | /* Invalid arguments. Bail before doing anything stoopid! */ | |
15271 | if (arg0 == error_mark_node | |
15272 | || arg1 == error_mark_node | |
15273 | || arg2 == error_mark_node) | |
15274 | return const0_rtx; | |
15275 | ||
15276 | if (! (*insn_data[icode].operand[1].predicate) (op0, tmode)) | |
15277 | op0 = copy_to_mode_reg (tmode, op0); | |
15278 | ||
15279 | op2 = copy_to_mode_reg (mode2, op2); | |
15280 | ||
15281 | if (op1 == const0_rtx) | |
15282 | { | |
15283 | addr = gen_rtx_MEM (tmode, op2); | |
15284 | } | |
15285 | else | |
15286 | { | |
15287 | op1 = copy_to_mode_reg (mode1, op1); | |
15288 | addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op1, op2)); | |
15289 | } | |
15290 | ||
15291 | pat = GEN_FCN (icode) (addr, op0); | |
15292 | if (pat) | |
15293 | emit_insn (pat); | |
15294 | return NULL_RTX; | |
15295 | } | |
15296 | ||
15297 | static rtx | |
15298 | altivec_expand_stxvl_builtin (enum insn_code icode, tree exp) | |
15299 | { | |
15300 | rtx pat; | |
15301 | tree arg0 = CALL_EXPR_ARG (exp, 0); | |
15302 | tree arg1 = CALL_EXPR_ARG (exp, 1); | |
15303 | tree arg2 = CALL_EXPR_ARG (exp, 2); | |
15304 | rtx op0 = expand_normal (arg0); | |
15305 | rtx op1 = expand_normal (arg1); | |
15306 | rtx op2 = expand_normal (arg2); | |
15307 | machine_mode mode0 = insn_data[icode].operand[0].mode; | |
15308 | machine_mode mode1 = insn_data[icode].operand[1].mode; | |
15309 | machine_mode mode2 = insn_data[icode].operand[2].mode; | |
15310 | ||
15311 | if (icode == CODE_FOR_nothing) | |
15312 | /* Builtin not supported on this processor. */ | |
15313 | return NULL_RTX; | |
15314 | ||
15315 | /* If we got invalid arguments bail out before generating bad rtl. */ | |
15316 | if (arg0 == error_mark_node | |
15317 | || arg1 == error_mark_node | |
15318 | || arg2 == error_mark_node) | |
15319 | return NULL_RTX; | |
15320 | ||
15321 | if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) | |
15322 | op0 = copy_to_mode_reg (mode0, op0); | |
15323 | if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) | |
15324 | op1 = copy_to_mode_reg (mode1, op1); | |
15325 | if (! (*insn_data[icode].operand[3].predicate) (op2, mode2)) | |
15326 | op2 = copy_to_mode_reg (mode2, op2); | |
15327 | ||
15328 | pat = GEN_FCN (icode) (op0, op1, op2); | |
15329 | if (pat) | |
15330 | emit_insn (pat); | |
15331 | ||
15332 | return NULL_RTX; | |
15333 | } | |
15334 | ||
15335 | static rtx | |
15336 | altivec_expand_stv_builtin (enum insn_code icode, tree exp) | |
15337 | { | |
15338 | tree arg0 = CALL_EXPR_ARG (exp, 0); | |
15339 | tree arg1 = CALL_EXPR_ARG (exp, 1); | |
15340 | tree arg2 = CALL_EXPR_ARG (exp, 2); | |
15341 | rtx op0 = expand_normal (arg0); | |
15342 | rtx op1 = expand_normal (arg1); | |
15343 | rtx op2 = expand_normal (arg2); | |
15344 | rtx pat, addr, rawaddr; | |
15345 | machine_mode tmode = insn_data[icode].operand[0].mode; | |
15346 | machine_mode smode = insn_data[icode].operand[1].mode; | |
15347 | machine_mode mode1 = Pmode; | |
15348 | machine_mode mode2 = Pmode; | |
15349 | ||
15350 | /* Invalid arguments. Bail before doing anything stoopid! */ | |
15351 | if (arg0 == error_mark_node | |
15352 | || arg1 == error_mark_node | |
15353 | || arg2 == error_mark_node) | |
15354 | return const0_rtx; | |
15355 | ||
15356 | op2 = copy_to_mode_reg (mode2, op2); | |
15357 | ||
15358 | /* For STVX, express the RTL accurately by ANDing the address with -16. | |
15359 | STVXL and STVE*X expand to use UNSPECs to hide their special behavior, | |
15360 | so the raw address is fine. */ | |
15361 | if (icode == CODE_FOR_altivec_stvx_v2df_2op | |
15362 | || icode == CODE_FOR_altivec_stvx_v2di_2op | |
15363 | || icode == CODE_FOR_altivec_stvx_v4sf_2op | |
15364 | || icode == CODE_FOR_altivec_stvx_v4si_2op | |
15365 | || icode == CODE_FOR_altivec_stvx_v8hi_2op | |
15366 | || icode == CODE_FOR_altivec_stvx_v16qi_2op) | |
15367 | { | |
15368 | if (op1 == const0_rtx) | |
15369 | rawaddr = op2; | |
15370 | else | |
15371 | { | |
15372 | op1 = copy_to_mode_reg (mode1, op1); | |
15373 | rawaddr = gen_rtx_PLUS (Pmode, op2, op1); | |
15374 | } | |
15375 | ||
15376 | addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16)); | |
15377 | addr = gen_rtx_MEM (tmode, addr); | |
15378 | ||
15379 | op0 = copy_to_mode_reg (tmode, op0); | |
15380 | ||
15381 | /* For -maltivec=be, emit a permute to swap the elements, followed | |
15382 | by the store. */ | |
15383 | if (!BYTES_BIG_ENDIAN && VECTOR_ELT_ORDER_BIG) | |
15384 | { | |
15385 | rtx temp = gen_reg_rtx (tmode); | |
15386 | rtx sel = swap_selector_for_mode (tmode); | |
15387 | rtx vperm = gen_rtx_UNSPEC (tmode, gen_rtvec (3, op0, op0, sel), | |
15388 | UNSPEC_VPERM); | |
15389 | emit_insn (gen_rtx_SET (temp, vperm)); | |
15390 | emit_insn (gen_rtx_SET (addr, temp)); | |
15391 | } | |
15392 | else | |
15393 | emit_insn (gen_rtx_SET (addr, op0)); | |
15394 | } | |
15395 | else | |
15396 | { | |
15397 | if (! (*insn_data[icode].operand[1].predicate) (op0, smode)) | |
15398 | op0 = copy_to_mode_reg (smode, op0); | |
15399 | ||
15400 | if (op1 == const0_rtx) | |
15401 | addr = gen_rtx_MEM (tmode, op2); | |
15402 | else | |
15403 | { | |
15404 | op1 = copy_to_mode_reg (mode1, op1); | |
15405 | addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op2, op1)); | |
15406 | } | |
15407 | ||
15408 | pat = GEN_FCN (icode) (addr, op0); | |
15409 | if (pat) | |
15410 | emit_insn (pat); | |
15411 | } | |
15412 | ||
15413 | return NULL_RTX; | |
15414 | } | |
15415 | ||
15416 | /* Return the appropriate SPR number associated with the given builtin. */ | |
15417 | static inline HOST_WIDE_INT | |
15418 | htm_spr_num (enum rs6000_builtins code) | |
15419 | { | |
15420 | if (code == HTM_BUILTIN_GET_TFHAR | |
15421 | || code == HTM_BUILTIN_SET_TFHAR) | |
15422 | return TFHAR_SPR; | |
15423 | else if (code == HTM_BUILTIN_GET_TFIAR | |
15424 | || code == HTM_BUILTIN_SET_TFIAR) | |
15425 | return TFIAR_SPR; | |
15426 | else if (code == HTM_BUILTIN_GET_TEXASR | |
15427 | || code == HTM_BUILTIN_SET_TEXASR) | |
15428 | return TEXASR_SPR; | |
15429 | gcc_assert (code == HTM_BUILTIN_GET_TEXASRU | |
15430 | || code == HTM_BUILTIN_SET_TEXASRU); | |
15431 | return TEXASRU_SPR; | |
15432 | } | |
15433 | ||
15434 | /* Return the appropriate SPR regno associated with the given builtin. */ | |
15435 | static inline HOST_WIDE_INT | |
15436 | htm_spr_regno (enum rs6000_builtins code) | |
15437 | { | |
15438 | if (code == HTM_BUILTIN_GET_TFHAR | |
15439 | || code == HTM_BUILTIN_SET_TFHAR) | |
15440 | return TFHAR_REGNO; | |
15441 | else if (code == HTM_BUILTIN_GET_TFIAR | |
15442 | || code == HTM_BUILTIN_SET_TFIAR) | |
15443 | return TFIAR_REGNO; | |
15444 | gcc_assert (code == HTM_BUILTIN_GET_TEXASR | |
15445 | || code == HTM_BUILTIN_SET_TEXASR | |
15446 | || code == HTM_BUILTIN_GET_TEXASRU | |
15447 | || code == HTM_BUILTIN_SET_TEXASRU); | |
15448 | return TEXASR_REGNO; | |
15449 | } | |
15450 | ||
15451 | /* Return the correct ICODE value depending on whether we are | |
15452 | setting or reading the HTM SPRs. */ | |
15453 | static inline enum insn_code | |
15454 | rs6000_htm_spr_icode (bool nonvoid) | |
15455 | { | |
15456 | if (nonvoid) | |
15457 | return (TARGET_POWERPC64) ? CODE_FOR_htm_mfspr_di : CODE_FOR_htm_mfspr_si; | |
15458 | else | |
15459 | return (TARGET_POWERPC64) ? CODE_FOR_htm_mtspr_di : CODE_FOR_htm_mtspr_si; | |
15460 | } | |
15461 | ||
15462 | /* Expand the HTM builtin in EXP and store the result in TARGET. | |
15463 | Store true in *EXPANDEDP if we found a builtin to expand. */ | |
15464 | static rtx | |
15465 | htm_expand_builtin (tree exp, rtx target, bool * expandedp) | |
15466 | { | |
15467 | tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); | |
15468 | bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node; | |
15469 | enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl); | |
15470 | const struct builtin_description *d; | |
15471 | size_t i; | |
15472 | ||
15473 | *expandedp = true; | |
15474 | ||
15475 | if (!TARGET_POWERPC64 | |
15476 | && (fcode == HTM_BUILTIN_TABORTDC | |
15477 | || fcode == HTM_BUILTIN_TABORTDCI)) | |
15478 | { | |
15479 | size_t uns_fcode = (size_t)fcode; | |
15480 | const char *name = rs6000_builtin_info[uns_fcode].name; | |
15481 | error ("builtin %s is only valid in 64-bit mode", name); | |
15482 | return const0_rtx; | |
15483 | } | |
15484 | ||
15485 | /* Expand the HTM builtins. */ | |
15486 | d = bdesc_htm; | |
15487 | for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++) | |
15488 | if (d->code == fcode) | |
15489 | { | |
15490 | rtx op[MAX_HTM_OPERANDS], pat; | |
15491 | int nopnds = 0; | |
15492 | tree arg; | |
15493 | call_expr_arg_iterator iter; | |
15494 | unsigned attr = rs6000_builtin_info[fcode].attr; | |
15495 | enum insn_code icode = d->icode; | |
15496 | const struct insn_operand_data *insn_op; | |
15497 | bool uses_spr = (attr & RS6000_BTC_SPR); | |
15498 | rtx cr = NULL_RTX; | |
15499 | ||
15500 | if (uses_spr) | |
15501 | icode = rs6000_htm_spr_icode (nonvoid); | |
15502 | insn_op = &insn_data[icode].operand[0]; | |
15503 | ||
15504 | if (nonvoid) | |
15505 | { | |
0d4a1197 | 15506 | machine_mode tmode = (uses_spr) ? insn_op->mode : E_SImode; |
83349046 SB |
15507 | if (!target |
15508 | || GET_MODE (target) != tmode | |
15509 | || (uses_spr && !(*insn_op->predicate) (target, tmode))) | |
15510 | target = gen_reg_rtx (tmode); | |
15511 | if (uses_spr) | |
15512 | op[nopnds++] = target; | |
15513 | } | |
15514 | ||
15515 | FOR_EACH_CALL_EXPR_ARG (arg, iter, exp) | |
15516 | { | |
15517 | if (arg == error_mark_node || nopnds >= MAX_HTM_OPERANDS) | |
15518 | return const0_rtx; | |
15519 | ||
15520 | insn_op = &insn_data[icode].operand[nopnds]; | |
15521 | ||
15522 | op[nopnds] = expand_normal (arg); | |
15523 | ||
15524 | if (!(*insn_op->predicate) (op[nopnds], insn_op->mode)) | |
15525 | { | |
15526 | if (!strcmp (insn_op->constraint, "n")) | |
15527 | { | |
15528 | int arg_num = (nonvoid) ? nopnds : nopnds + 1; | |
15529 | if (!CONST_INT_P (op[nopnds])) | |
15530 | error ("argument %d must be an unsigned literal", arg_num); | |
15531 | else | |
15532 | error ("argument %d is an unsigned literal that is " | |
15533 | "out of range", arg_num); | |
15534 | return const0_rtx; | |
15535 | } | |
15536 | op[nopnds] = copy_to_mode_reg (insn_op->mode, op[nopnds]); | |
15537 | } | |
15538 | ||
15539 | nopnds++; | |
15540 | } | |
15541 | ||
15542 | /* Handle the builtins for extended mnemonics. These accept | |
15543 | no arguments, but map to builtins that take arguments. */ | |
15544 | switch (fcode) | |
15545 | { | |
15546 | case HTM_BUILTIN_TENDALL: /* Alias for: tend. 1 */ | |
15547 | case HTM_BUILTIN_TRESUME: /* Alias for: tsr. 1 */ | |
15548 | op[nopnds++] = GEN_INT (1); | |
15549 | if (flag_checking) | |
15550 | attr |= RS6000_BTC_UNARY; | |
15551 | break; | |
15552 | case HTM_BUILTIN_TSUSPEND: /* Alias for: tsr. 0 */ | |
15553 | op[nopnds++] = GEN_INT (0); | |
15554 | if (flag_checking) | |
15555 | attr |= RS6000_BTC_UNARY; | |
15556 | break; | |
15557 | default: | |
15558 | break; | |
15559 | } | |
15560 | ||
15561 | /* If this builtin accesses SPRs, then pass in the appropriate | |
15562 | SPR number and SPR regno as the last two operands. */ | |
15563 | if (uses_spr) | |
15564 | { | |
15565 | machine_mode mode = (TARGET_POWERPC64) ? DImode : SImode; | |
15566 | op[nopnds++] = gen_rtx_CONST_INT (mode, htm_spr_num (fcode)); | |
15567 | op[nopnds++] = gen_rtx_REG (mode, htm_spr_regno (fcode)); | |
15568 | } | |
15569 | /* If this builtin accesses a CR, then pass in a scratch | |
15570 | CR as the last operand. */ | |
15571 | else if (attr & RS6000_BTC_CR) | |
15572 | { cr = gen_reg_rtx (CCmode); | |
15573 | op[nopnds++] = cr; | |
15574 | } | |
15575 | ||
15576 | if (flag_checking) | |
15577 | { | |
15578 | int expected_nopnds = 0; | |
15579 | if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_UNARY) | |
15580 | expected_nopnds = 1; | |
15581 | else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_BINARY) | |
15582 | expected_nopnds = 2; | |
15583 | else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_TERNARY) | |
15584 | expected_nopnds = 3; | |
15585 | if (!(attr & RS6000_BTC_VOID)) | |
15586 | expected_nopnds += 1; | |
15587 | if (uses_spr) | |
15588 | expected_nopnds += 2; | |
15589 | ||
15590 | gcc_assert (nopnds == expected_nopnds | |
15591 | && nopnds <= MAX_HTM_OPERANDS); | |
15592 | } | |
15593 | ||
15594 | switch (nopnds) | |
15595 | { | |
15596 | case 1: | |
15597 | pat = GEN_FCN (icode) (op[0]); | |
15598 | break; | |
15599 | case 2: | |
15600 | pat = GEN_FCN (icode) (op[0], op[1]); | |
15601 | break; | |
15602 | case 3: | |
15603 | pat = GEN_FCN (icode) (op[0], op[1], op[2]); | |
15604 | break; | |
15605 | case 4: | |
15606 | pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]); | |
15607 | break; | |
15608 | default: | |
15609 | gcc_unreachable (); | |
15610 | } | |
15611 | if (!pat) | |
15612 | return NULL_RTX; | |
15613 | emit_insn (pat); | |
15614 | ||
15615 | if (attr & RS6000_BTC_CR) | |
15616 | { | |
15617 | if (fcode == HTM_BUILTIN_TBEGIN) | |
15618 | { | |
15619 | /* Emit code to set TARGET to true or false depending on | |
15620 | whether the tbegin. instruction successfully or failed | |
15621 | to start a transaction. We do this by placing the 1's | |
15622 | complement of CR's EQ bit into TARGET. */ | |
15623 | rtx scratch = gen_reg_rtx (SImode); | |
15624 | emit_insn (gen_rtx_SET (scratch, | |
15625 | gen_rtx_EQ (SImode, cr, | |
15626 | const0_rtx))); | |
15627 | emit_insn (gen_rtx_SET (target, | |
15628 | gen_rtx_XOR (SImode, scratch, | |
15629 | GEN_INT (1)))); | |
15630 | } | |
15631 | else | |
15632 | { | |
15633 | /* Emit code to copy the 4-bit condition register field | |
15634 | CR into the least significant end of register TARGET. */ | |
15635 | rtx scratch1 = gen_reg_rtx (SImode); | |
15636 | rtx scratch2 = gen_reg_rtx (SImode); | |
15637 | rtx subreg = simplify_gen_subreg (CCmode, scratch1, SImode, 0); | |
15638 | emit_insn (gen_movcc (subreg, cr)); | |
15639 | emit_insn (gen_lshrsi3 (scratch2, scratch1, GEN_INT (28))); | |
15640 | emit_insn (gen_andsi3 (target, scratch2, GEN_INT (0xf))); | |
15641 | } | |
15642 | } | |
15643 | ||
15644 | if (nonvoid) | |
15645 | return target; | |
15646 | return const0_rtx; | |
15647 | } | |
15648 | ||
15649 | *expandedp = false; | |
15650 | return NULL_RTX; | |
15651 | } | |
15652 | ||
15653 | /* Expand the CPU builtin in FCODE and store the result in TARGET. */ | |
15654 | ||
15655 | static rtx | |
15656 | cpu_expand_builtin (enum rs6000_builtins fcode, tree exp ATTRIBUTE_UNUSED, | |
15657 | rtx target) | |
15658 | { | |
15659 | /* __builtin_cpu_init () is a nop, so expand to nothing. */ | |
15660 | if (fcode == RS6000_BUILTIN_CPU_INIT) | |
15661 | return const0_rtx; | |
15662 | ||
15663 | if (target == 0 || GET_MODE (target) != SImode) | |
15664 | target = gen_reg_rtx (SImode); | |
15665 | ||
15666 | #ifdef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB | |
15667 | tree arg = TREE_OPERAND (CALL_EXPR_ARG (exp, 0), 0); | |
15668 | if (TREE_CODE (arg) != STRING_CST) | |
15669 | { | |
15670 | error ("builtin %s only accepts a string argument", | |
15671 | rs6000_builtin_info[(size_t) fcode].name); | |
15672 | return const0_rtx; | |
15673 | } | |
15674 | ||
15675 | if (fcode == RS6000_BUILTIN_CPU_IS) | |
15676 | { | |
15677 | const char *cpu = TREE_STRING_POINTER (arg); | |
15678 | rtx cpuid = NULL_RTX; | |
15679 | for (size_t i = 0; i < ARRAY_SIZE (cpu_is_info); i++) | |
15680 | if (strcmp (cpu, cpu_is_info[i].cpu) == 0) | |
15681 | { | |
15682 | /* The CPUID value in the TCB is offset by _DL_FIRST_PLATFORM. */ | |
15683 | cpuid = GEN_INT (cpu_is_info[i].cpuid + _DL_FIRST_PLATFORM); | |
15684 | break; | |
15685 | } | |
15686 | if (cpuid == NULL_RTX) | |
15687 | { | |
15688 | /* Invalid CPU argument. */ | |
15689 | error ("cpu %s is an invalid argument to builtin %s", | |
15690 | cpu, rs6000_builtin_info[(size_t) fcode].name); | |
15691 | return const0_rtx; | |
15692 | } | |
15693 | ||
15694 | rtx platform = gen_reg_rtx (SImode); | |
15695 | rtx tcbmem = gen_const_mem (SImode, | |
15696 | gen_rtx_PLUS (Pmode, | |
15697 | gen_rtx_REG (Pmode, TLS_REGNUM), | |
15698 | GEN_INT (TCB_PLATFORM_OFFSET))); | |
15699 | emit_move_insn (platform, tcbmem); | |
15700 | emit_insn (gen_eqsi3 (target, platform, cpuid)); | |
15701 | } | |
15702 | else if (fcode == RS6000_BUILTIN_CPU_SUPPORTS) | |
15703 | { | |
15704 | const char *hwcap = TREE_STRING_POINTER (arg); | |
15705 | rtx mask = NULL_RTX; | |
15706 | int hwcap_offset; | |
15707 | for (size_t i = 0; i < ARRAY_SIZE (cpu_supports_info); i++) | |
15708 | if (strcmp (hwcap, cpu_supports_info[i].hwcap) == 0) | |
15709 | { | |
15710 | mask = GEN_INT (cpu_supports_info[i].mask); | |
15711 | hwcap_offset = TCB_HWCAP_OFFSET (cpu_supports_info[i].id); | |
15712 | break; | |
15713 | } | |
15714 | if (mask == NULL_RTX) | |
15715 | { | |
15716 | /* Invalid HWCAP argument. */ | |
15717 | error ("hwcap %s is an invalid argument to builtin %s", | |
15718 | hwcap, rs6000_builtin_info[(size_t) fcode].name); | |
15719 | return const0_rtx; | |
15720 | } | |
15721 | ||
15722 | rtx tcb_hwcap = gen_reg_rtx (SImode); | |
15723 | rtx tcbmem = gen_const_mem (SImode, | |
15724 | gen_rtx_PLUS (Pmode, | |
15725 | gen_rtx_REG (Pmode, TLS_REGNUM), | |
15726 | GEN_INT (hwcap_offset))); | |
15727 | emit_move_insn (tcb_hwcap, tcbmem); | |
15728 | rtx scratch1 = gen_reg_rtx (SImode); | |
15729 | emit_insn (gen_rtx_SET (scratch1, gen_rtx_AND (SImode, tcb_hwcap, mask))); | |
15730 | rtx scratch2 = gen_reg_rtx (SImode); | |
15731 | emit_insn (gen_eqsi3 (scratch2, scratch1, const0_rtx)); | |
15732 | emit_insn (gen_rtx_SET (target, gen_rtx_XOR (SImode, scratch2, const1_rtx))); | |
15733 | } | |
15734 | ||
15735 | /* Record that we have expanded a CPU builtin, so that we can later | |
15736 | emit a reference to the special symbol exported by LIBC to ensure we | |
15737 | do not link against an old LIBC that doesn't support this feature. */ | |
15738 | cpu_builtin_p = true; | |
15739 | ||
15740 | #else | |
15741 | /* For old LIBCs, always return FALSE. */ | |
15742 | emit_move_insn (target, GEN_INT (0)); | |
15743 | #endif /* TARGET_LIBC_PROVIDES_HWCAP_IN_TCB */ | |
15744 | ||
15745 | return target; | |
15746 | } | |
15747 | ||
15748 | static rtx | |
15749 | rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target) | |
15750 | { | |
15751 | rtx pat; | |
15752 | tree arg0 = CALL_EXPR_ARG (exp, 0); | |
15753 | tree arg1 = CALL_EXPR_ARG (exp, 1); | |
15754 | tree arg2 = CALL_EXPR_ARG (exp, 2); | |
15755 | rtx op0 = expand_normal (arg0); | |
15756 | rtx op1 = expand_normal (arg1); | |
15757 | rtx op2 = expand_normal (arg2); | |
15758 | machine_mode tmode = insn_data[icode].operand[0].mode; | |
15759 | machine_mode mode0 = insn_data[icode].operand[1].mode; | |
15760 | machine_mode mode1 = insn_data[icode].operand[2].mode; | |
15761 | machine_mode mode2 = insn_data[icode].operand[3].mode; | |
15762 | ||
15763 | if (icode == CODE_FOR_nothing) | |
15764 | /* Builtin not supported on this processor. */ | |
15765 | return 0; | |
15766 | ||
15767 | /* If we got invalid arguments bail out before generating bad rtl. */ | |
15768 | if (arg0 == error_mark_node | |
15769 | || arg1 == error_mark_node | |
15770 | || arg2 == error_mark_node) | |
15771 | return const0_rtx; | |
15772 | ||
15773 | /* Check and prepare argument depending on the instruction code. | |
15774 | ||
15775 | Note that a switch statement instead of the sequence of tests | |
15776 | would be incorrect as many of the CODE_FOR values could be | |
15777 | CODE_FOR_nothing and that would yield multiple alternatives | |
15778 | with identical values. We'd never reach here at runtime in | |
15779 | this case. */ | |
15780 | if (icode == CODE_FOR_altivec_vsldoi_v4sf | |
15781 | || icode == CODE_FOR_altivec_vsldoi_v2df | |
15782 | || icode == CODE_FOR_altivec_vsldoi_v4si | |
15783 | || icode == CODE_FOR_altivec_vsldoi_v8hi | |
15784 | || icode == CODE_FOR_altivec_vsldoi_v16qi) | |
15785 | { | |
15786 | /* Only allow 4-bit unsigned literals. */ | |
15787 | STRIP_NOPS (arg2); | |
15788 | if (TREE_CODE (arg2) != INTEGER_CST | |
15789 | || TREE_INT_CST_LOW (arg2) & ~0xf) | |
15790 | { | |
15791 | error ("argument 3 must be a 4-bit unsigned literal"); | |
15792 | return CONST0_RTX (tmode); | |
15793 | } | |
15794 | } | |
15795 | else if (icode == CODE_FOR_vsx_xxpermdi_v2df | |
15796 | || icode == CODE_FOR_vsx_xxpermdi_v2di | |
15797 | || icode == CODE_FOR_vsx_xxpermdi_v2df_be | |
15798 | || icode == CODE_FOR_vsx_xxpermdi_v2di_be | |
15799 | || icode == CODE_FOR_vsx_xxpermdi_v1ti | |
15800 | || icode == CODE_FOR_vsx_xxpermdi_v4sf | |
15801 | || icode == CODE_FOR_vsx_xxpermdi_v4si | |
15802 | || icode == CODE_FOR_vsx_xxpermdi_v8hi | |
15803 | || icode == CODE_FOR_vsx_xxpermdi_v16qi | |
15804 | || icode == CODE_FOR_vsx_xxsldwi_v16qi | |
15805 | || icode == CODE_FOR_vsx_xxsldwi_v8hi | |
15806 | || icode == CODE_FOR_vsx_xxsldwi_v4si | |
15807 | || icode == CODE_FOR_vsx_xxsldwi_v4sf | |
15808 | || icode == CODE_FOR_vsx_xxsldwi_v2di | |
15809 | || icode == CODE_FOR_vsx_xxsldwi_v2df) | |
15810 | { | |
15811 | /* Only allow 2-bit unsigned literals. */ | |
15812 | STRIP_NOPS (arg2); | |
15813 | if (TREE_CODE (arg2) != INTEGER_CST | |
15814 | || TREE_INT_CST_LOW (arg2) & ~0x3) | |
15815 | { | |
15816 | error ("argument 3 must be a 2-bit unsigned literal"); | |
15817 | return CONST0_RTX (tmode); | |
15818 | } | |
15819 | } | |
15820 | else if (icode == CODE_FOR_vsx_set_v2df | |
15821 | || icode == CODE_FOR_vsx_set_v2di | |
15822 | || icode == CODE_FOR_bcdadd | |
15823 | || icode == CODE_FOR_bcdadd_lt | |
15824 | || icode == CODE_FOR_bcdadd_eq | |
15825 | || icode == CODE_FOR_bcdadd_gt | |
15826 | || icode == CODE_FOR_bcdsub | |
15827 | || icode == CODE_FOR_bcdsub_lt | |
15828 | || icode == CODE_FOR_bcdsub_eq | |
15829 | || icode == CODE_FOR_bcdsub_gt) | |
15830 | { | |
15831 | /* Only allow 1-bit unsigned literals. */ | |
15832 | STRIP_NOPS (arg2); | |
15833 | if (TREE_CODE (arg2) != INTEGER_CST | |
15834 | || TREE_INT_CST_LOW (arg2) & ~0x1) | |
15835 | { | |
15836 | error ("argument 3 must be a 1-bit unsigned literal"); | |
15837 | return CONST0_RTX (tmode); | |
15838 | } | |
15839 | } | |
15840 | else if (icode == CODE_FOR_dfp_ddedpd_dd | |
15841 | || icode == CODE_FOR_dfp_ddedpd_td) | |
15842 | { | |
15843 | /* Only allow 2-bit unsigned literals where the value is 0 or 2. */ | |
15844 | STRIP_NOPS (arg0); | |
15845 | if (TREE_CODE (arg0) != INTEGER_CST | |
15846 | || TREE_INT_CST_LOW (arg2) & ~0x3) | |
15847 | { | |
15848 | error ("argument 1 must be 0 or 2"); | |
15849 | return CONST0_RTX (tmode); | |
15850 | } | |
15851 | } | |
15852 | else if (icode == CODE_FOR_dfp_denbcd_dd | |
15853 | || icode == CODE_FOR_dfp_denbcd_td) | |
15854 | { | |
15855 | /* Only allow 1-bit unsigned literals. */ | |
15856 | STRIP_NOPS (arg0); | |
15857 | if (TREE_CODE (arg0) != INTEGER_CST | |
15858 | || TREE_INT_CST_LOW (arg0) & ~0x1) | |
15859 | { | |
15860 | error ("argument 1 must be a 1-bit unsigned literal"); | |
15861 | return CONST0_RTX (tmode); | |
15862 | } | |
15863 | } | |
15864 | else if (icode == CODE_FOR_dfp_dscli_dd | |
15865 | || icode == CODE_FOR_dfp_dscli_td | |
15866 | || icode == CODE_FOR_dfp_dscri_dd | |
15867 | || icode == CODE_FOR_dfp_dscri_td) | |
15868 | { | |
15869 | /* Only allow 6-bit unsigned literals. */ | |
15870 | STRIP_NOPS (arg1); | |
15871 | if (TREE_CODE (arg1) != INTEGER_CST | |
15872 | || TREE_INT_CST_LOW (arg1) & ~0x3f) | |
15873 | { | |
15874 | error ("argument 2 must be a 6-bit unsigned literal"); | |
15875 | return CONST0_RTX (tmode); | |
15876 | } | |
15877 | } | |
15878 | else if (icode == CODE_FOR_crypto_vshasigmaw | |
15879 | || icode == CODE_FOR_crypto_vshasigmad) | |
15880 | { | |
15881 | /* Check whether the 2nd and 3rd arguments are integer constants and in | |
15882 | range and prepare arguments. */ | |
15883 | STRIP_NOPS (arg1); | |
8e6cdc90 | 15884 | if (TREE_CODE (arg1) != INTEGER_CST || wi::geu_p (wi::to_wide (arg1), 2)) |
83349046 SB |
15885 | { |
15886 | error ("argument 2 must be 0 or 1"); | |
15887 | return CONST0_RTX (tmode); | |
15888 | } | |
15889 | ||
15890 | STRIP_NOPS (arg2); | |
8e6cdc90 RS |
15891 | if (TREE_CODE (arg2) != INTEGER_CST |
15892 | || wi::geu_p (wi::to_wide (arg2), 16)) | |
83349046 SB |
15893 | { |
15894 | error ("argument 3 must be in the range 0..15"); | |
15895 | return CONST0_RTX (tmode); | |
15896 | } | |
15897 | } | |
15898 | ||
15899 | if (target == 0 | |
15900 | || GET_MODE (target) != tmode | |
15901 | || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) | |
15902 | target = gen_reg_rtx (tmode); | |
15903 | ||
15904 | if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) | |
15905 | op0 = copy_to_mode_reg (mode0, op0); | |
15906 | if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) | |
15907 | op1 = copy_to_mode_reg (mode1, op1); | |
15908 | if (! (*insn_data[icode].operand[3].predicate) (op2, mode2)) | |
15909 | op2 = copy_to_mode_reg (mode2, op2); | |
15910 | ||
15911 | if (TARGET_PAIRED_FLOAT && icode == CODE_FOR_selv2sf4) | |
15912 | pat = GEN_FCN (icode) (target, op0, op1, op2, CONST0_RTX (SFmode)); | |
15913 | else | |
15914 | pat = GEN_FCN (icode) (target, op0, op1, op2); | |
15915 | if (! pat) | |
15916 | return 0; | |
15917 | emit_insn (pat); | |
15918 | ||
15919 | return target; | |
15920 | } | |
15921 | ||
15922 | /* Expand the lvx builtins. */ | |
15923 | static rtx | |
15924 | altivec_expand_ld_builtin (tree exp, rtx target, bool *expandedp) | |
15925 | { | |
15926 | tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); | |
15927 | unsigned int fcode = DECL_FUNCTION_CODE (fndecl); | |
15928 | tree arg0; | |
15929 | machine_mode tmode, mode0; | |
15930 | rtx pat, op0; | |
15931 | enum insn_code icode; | |
15932 | ||
15933 | switch (fcode) | |
15934 | { | |
15935 | case ALTIVEC_BUILTIN_LD_INTERNAL_16qi: | |
15936 | icode = CODE_FOR_vector_altivec_load_v16qi; | |
15937 | break; | |
15938 | case ALTIVEC_BUILTIN_LD_INTERNAL_8hi: | |
15939 | icode = CODE_FOR_vector_altivec_load_v8hi; | |
15940 | break; | |
15941 | case ALTIVEC_BUILTIN_LD_INTERNAL_4si: | |
15942 | icode = CODE_FOR_vector_altivec_load_v4si; | |
15943 | break; | |
15944 | case ALTIVEC_BUILTIN_LD_INTERNAL_4sf: | |
15945 | icode = CODE_FOR_vector_altivec_load_v4sf; | |
15946 | break; | |
15947 | case ALTIVEC_BUILTIN_LD_INTERNAL_2df: | |
15948 | icode = CODE_FOR_vector_altivec_load_v2df; | |
15949 | break; | |
15950 | case ALTIVEC_BUILTIN_LD_INTERNAL_2di: | |
15951 | icode = CODE_FOR_vector_altivec_load_v2di; | |
15952 | break; | |
15953 | case ALTIVEC_BUILTIN_LD_INTERNAL_1ti: | |
15954 | icode = CODE_FOR_vector_altivec_load_v1ti; | |
15955 | break; | |
15956 | default: | |
15957 | *expandedp = false; | |
15958 | return NULL_RTX; | |
15959 | } | |
15960 | ||
15961 | *expandedp = true; | |
15962 | ||
15963 | arg0 = CALL_EXPR_ARG (exp, 0); | |
15964 | op0 = expand_normal (arg0); | |
15965 | tmode = insn_data[icode].operand[0].mode; | |
15966 | mode0 = insn_data[icode].operand[1].mode; | |
15967 | ||
15968 | if (target == 0 | |
15969 | || GET_MODE (target) != tmode | |
15970 | || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) | |
15971 | target = gen_reg_rtx (tmode); | |
15972 | ||
15973 | if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) | |
15974 | op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); | |
15975 | ||
15976 | pat = GEN_FCN (icode) (target, op0); | |
15977 | if (! pat) | |
15978 | return 0; | |
15979 | emit_insn (pat); | |
15980 | return target; | |
15981 | } | |
15982 | ||
15983 | /* Expand the stvx builtins. */ | |
15984 | static rtx | |
15985 | altivec_expand_st_builtin (tree exp, rtx target ATTRIBUTE_UNUSED, | |
15986 | bool *expandedp) | |
15987 | { | |
15988 | tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); | |
15989 | unsigned int fcode = DECL_FUNCTION_CODE (fndecl); | |
15990 | tree arg0, arg1; | |
15991 | machine_mode mode0, mode1; | |
15992 | rtx pat, op0, op1; | |
15993 | enum insn_code icode; | |
15994 | ||
15995 | switch (fcode) | |
15996 | { | |
15997 | case ALTIVEC_BUILTIN_ST_INTERNAL_16qi: | |
15998 | icode = CODE_FOR_vector_altivec_store_v16qi; | |
15999 | break; | |
16000 | case ALTIVEC_BUILTIN_ST_INTERNAL_8hi: | |
16001 | icode = CODE_FOR_vector_altivec_store_v8hi; | |
16002 | break; | |
16003 | case ALTIVEC_BUILTIN_ST_INTERNAL_4si: | |
16004 | icode = CODE_FOR_vector_altivec_store_v4si; | |
16005 | break; | |
16006 | case ALTIVEC_BUILTIN_ST_INTERNAL_4sf: | |
16007 | icode = CODE_FOR_vector_altivec_store_v4sf; | |
16008 | break; | |
16009 | case ALTIVEC_BUILTIN_ST_INTERNAL_2df: | |
16010 | icode = CODE_FOR_vector_altivec_store_v2df; | |
16011 | break; | |
16012 | case ALTIVEC_BUILTIN_ST_INTERNAL_2di: | |
16013 | icode = CODE_FOR_vector_altivec_store_v2di; | |
16014 | break; | |
16015 | case ALTIVEC_BUILTIN_ST_INTERNAL_1ti: | |
16016 | icode = CODE_FOR_vector_altivec_store_v1ti; | |
16017 | break; | |
16018 | default: | |
16019 | *expandedp = false; | |
16020 | return NULL_RTX; | |
16021 | } | |
16022 | ||
16023 | arg0 = CALL_EXPR_ARG (exp, 0); | |
16024 | arg1 = CALL_EXPR_ARG (exp, 1); | |
16025 | op0 = expand_normal (arg0); | |
16026 | op1 = expand_normal (arg1); | |
16027 | mode0 = insn_data[icode].operand[0].mode; | |
16028 | mode1 = insn_data[icode].operand[1].mode; | |
16029 | ||
16030 | if (! (*insn_data[icode].operand[0].predicate) (op0, mode0)) | |
16031 | op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); | |
16032 | if (! (*insn_data[icode].operand[1].predicate) (op1, mode1)) | |
16033 | op1 = copy_to_mode_reg (mode1, op1); | |
16034 | ||
16035 | pat = GEN_FCN (icode) (op0, op1); | |
16036 | if (pat) | |
16037 | emit_insn (pat); | |
16038 | ||
16039 | *expandedp = true; | |
16040 | return NULL_RTX; | |
16041 | } | |
16042 | ||
16043 | /* Expand the dst builtins. */ | |
16044 | static rtx | |
16045 | altivec_expand_dst_builtin (tree exp, rtx target ATTRIBUTE_UNUSED, | |
16046 | bool *expandedp) | |
16047 | { | |
16048 | tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); | |
16049 | enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl); | |
16050 | tree arg0, arg1, arg2; | |
16051 | machine_mode mode0, mode1; | |
16052 | rtx pat, op0, op1, op2; | |
16053 | const struct builtin_description *d; | |
16054 | size_t i; | |
16055 | ||
16056 | *expandedp = false; | |
16057 | ||
16058 | /* Handle DST variants. */ | |
16059 | d = bdesc_dst; | |
16060 | for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++) | |
16061 | if (d->code == fcode) | |
16062 | { | |
16063 | arg0 = CALL_EXPR_ARG (exp, 0); | |
16064 | arg1 = CALL_EXPR_ARG (exp, 1); | |
16065 | arg2 = CALL_EXPR_ARG (exp, 2); | |
16066 | op0 = expand_normal (arg0); | |
16067 | op1 = expand_normal (arg1); | |
16068 | op2 = expand_normal (arg2); | |
16069 | mode0 = insn_data[d->icode].operand[0].mode; | |
16070 | mode1 = insn_data[d->icode].operand[1].mode; | |
16071 | ||
16072 | /* Invalid arguments, bail out before generating bad rtl. */ | |
16073 | if (arg0 == error_mark_node | |
16074 | || arg1 == error_mark_node | |
16075 | || arg2 == error_mark_node) | |
16076 | return const0_rtx; | |
16077 | ||
16078 | *expandedp = true; | |
16079 | STRIP_NOPS (arg2); | |
16080 | if (TREE_CODE (arg2) != INTEGER_CST | |
16081 | || TREE_INT_CST_LOW (arg2) & ~0x3) | |
16082 | { | |
16083 | error ("argument to %qs must be a 2-bit unsigned literal", d->name); | |
16084 | return const0_rtx; | |
16085 | } | |
16086 | ||
16087 | if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0)) | |
16088 | op0 = copy_to_mode_reg (Pmode, op0); | |
16089 | if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1)) | |
16090 | op1 = copy_to_mode_reg (mode1, op1); | |
16091 | ||
16092 | pat = GEN_FCN (d->icode) (op0, op1, op2); | |
16093 | if (pat != 0) | |
16094 | emit_insn (pat); | |
16095 | ||
16096 | return NULL_RTX; | |
16097 | } | |
16098 | ||
16099 | return NULL_RTX; | |
16100 | } | |
16101 | ||
16102 | /* Expand vec_init builtin. */ | |
16103 | static rtx | |
16104 | altivec_expand_vec_init_builtin (tree type, tree exp, rtx target) | |
16105 | { | |
16106 | machine_mode tmode = TYPE_MODE (type); | |
16107 | machine_mode inner_mode = GET_MODE_INNER (tmode); | |
16108 | int i, n_elt = GET_MODE_NUNITS (tmode); | |
16109 | ||
16110 | gcc_assert (VECTOR_MODE_P (tmode)); | |
16111 | gcc_assert (n_elt == call_expr_nargs (exp)); | |
16112 | ||
16113 | if (!target || !register_operand (target, tmode)) | |
16114 | target = gen_reg_rtx (tmode); | |
16115 | ||
16116 | /* If we have a vector compromised of a single element, such as V1TImode, do | |
16117 | the initialization directly. */ | |
16118 | if (n_elt == 1 && GET_MODE_SIZE (tmode) == GET_MODE_SIZE (inner_mode)) | |
16119 | { | |
16120 | rtx x = expand_normal (CALL_EXPR_ARG (exp, 0)); | |
16121 | emit_move_insn (target, gen_lowpart (tmode, x)); | |
16122 | } | |
16123 | else | |
16124 | { | |
16125 | rtvec v = rtvec_alloc (n_elt); | |
16126 | ||
16127 | for (i = 0; i < n_elt; ++i) | |
16128 | { | |
16129 | rtx x = expand_normal (CALL_EXPR_ARG (exp, i)); | |
16130 | RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x); | |
16131 | } | |
16132 | ||
16133 | rs6000_expand_vector_init (target, gen_rtx_PARALLEL (tmode, v)); | |
16134 | } | |
16135 | ||
16136 | return target; | |
16137 | } | |
16138 | ||
16139 | /* Return the integer constant in ARG. Constrain it to be in the range | |
16140 | of the subparts of VEC_TYPE; issue an error if not. */ | |
16141 | ||
16142 | static int | |
16143 | get_element_number (tree vec_type, tree arg) | |
16144 | { | |
16145 | unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1; | |
16146 | ||
16147 | if (!tree_fits_uhwi_p (arg) | |
16148 | || (elt = tree_to_uhwi (arg), elt > max)) | |
16149 | { | |
16150 | error ("selector must be an integer constant in the range 0..%wi", max); | |
16151 | return 0; | |
16152 | } | |
16153 | ||
16154 | return elt; | |
16155 | } | |
16156 | ||
16157 | /* Expand vec_set builtin. */ | |
16158 | static rtx | |
16159 | altivec_expand_vec_set_builtin (tree exp) | |
16160 | { | |
16161 | machine_mode tmode, mode1; | |
16162 | tree arg0, arg1, arg2; | |
16163 | int elt; | |
16164 | rtx op0, op1; | |
16165 | ||
16166 | arg0 = CALL_EXPR_ARG (exp, 0); | |
16167 | arg1 = CALL_EXPR_ARG (exp, 1); | |
16168 | arg2 = CALL_EXPR_ARG (exp, 2); | |
16169 | ||
16170 | tmode = TYPE_MODE (TREE_TYPE (arg0)); | |
16171 | mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0))); | |
16172 | gcc_assert (VECTOR_MODE_P (tmode)); | |
16173 | ||
16174 | op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL); | |
16175 | op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL); | |
16176 | elt = get_element_number (TREE_TYPE (arg0), arg2); | |
16177 | ||
16178 | if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode) | |
16179 | op1 = convert_modes (mode1, GET_MODE (op1), op1, true); | |
16180 | ||
16181 | op0 = force_reg (tmode, op0); | |
16182 | op1 = force_reg (mode1, op1); | |
16183 | ||
16184 | rs6000_expand_vector_set (op0, op1, elt); | |
16185 | ||
16186 | return op0; | |
16187 | } | |
16188 | ||
16189 | /* Expand vec_ext builtin. */ | |
16190 | static rtx | |
16191 | altivec_expand_vec_ext_builtin (tree exp, rtx target) | |
16192 | { | |
16193 | machine_mode tmode, mode0; | |
16194 | tree arg0, arg1; | |
16195 | rtx op0; | |
16196 | rtx op1; | |
16197 | ||
16198 | arg0 = CALL_EXPR_ARG (exp, 0); | |
16199 | arg1 = CALL_EXPR_ARG (exp, 1); | |
16200 | ||
16201 | op0 = expand_normal (arg0); | |
16202 | op1 = expand_normal (arg1); | |
16203 | ||
16204 | /* Call get_element_number to validate arg1 if it is a constant. */ | |
16205 | if (TREE_CODE (arg1) == INTEGER_CST) | |
16206 | (void) get_element_number (TREE_TYPE (arg0), arg1); | |
16207 | ||
16208 | tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0))); | |
16209 | mode0 = TYPE_MODE (TREE_TYPE (arg0)); | |
16210 | gcc_assert (VECTOR_MODE_P (mode0)); | |
16211 | ||
16212 | op0 = force_reg (mode0, op0); | |
16213 | ||
16214 | if (optimize || !target || !register_operand (target, tmode)) | |
16215 | target = gen_reg_rtx (tmode); | |
16216 | ||
16217 | rs6000_expand_vector_extract (target, op0, op1); | |
16218 | ||
16219 | return target; | |
16220 | } | |
16221 | ||
16222 | /* Expand the builtin in EXP and store the result in TARGET. Store | |
16223 | true in *EXPANDEDP if we found a builtin to expand. */ | |
16224 | static rtx | |
16225 | altivec_expand_builtin (tree exp, rtx target, bool *expandedp) | |
16226 | { | |
16227 | const struct builtin_description *d; | |
16228 | size_t i; | |
16229 | enum insn_code icode; | |
16230 | tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); | |
16231 | tree arg0, arg1, arg2; | |
16232 | rtx op0, pat; | |
16233 | machine_mode tmode, mode0; | |
16234 | enum rs6000_builtins fcode | |
16235 | = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl); | |
16236 | ||
16237 | if (rs6000_overloaded_builtin_p (fcode)) | |
16238 | { | |
16239 | *expandedp = true; | |
16240 | error ("unresolved overload for Altivec builtin %qF", fndecl); | |
16241 | ||
16242 | /* Given it is invalid, just generate a normal call. */ | |
16243 | return expand_call (exp, target, false); | |
16244 | } | |
16245 | ||
16246 | target = altivec_expand_ld_builtin (exp, target, expandedp); | |
16247 | if (*expandedp) | |
16248 | return target; | |
16249 | ||
16250 | target = altivec_expand_st_builtin (exp, target, expandedp); | |
16251 | if (*expandedp) | |
16252 | return target; | |
16253 | ||
16254 | target = altivec_expand_dst_builtin (exp, target, expandedp); | |
16255 | if (*expandedp) | |
16256 | return target; | |
16257 | ||
16258 | *expandedp = true; | |
16259 | ||
16260 | switch (fcode) | |
16261 | { | |
16262 | case ALTIVEC_BUILTIN_STVX_V2DF: | |
16263 | return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2df_2op, exp); | |
16264 | case ALTIVEC_BUILTIN_STVX_V2DI: | |
16265 | return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2di_2op, exp); | |
16266 | case ALTIVEC_BUILTIN_STVX_V4SF: | |
16267 | return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4sf_2op, exp); | |
16268 | case ALTIVEC_BUILTIN_STVX: | |
16269 | case ALTIVEC_BUILTIN_STVX_V4SI: | |
16270 | return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4si_2op, exp); | |
16271 | case ALTIVEC_BUILTIN_STVX_V8HI: | |
16272 | return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v8hi_2op, exp); | |
16273 | case ALTIVEC_BUILTIN_STVX_V16QI: | |
16274 | return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v16qi_2op, exp); | |
16275 | case ALTIVEC_BUILTIN_STVEBX: | |
16276 | return altivec_expand_stv_builtin (CODE_FOR_altivec_stvebx, exp); | |
16277 | case ALTIVEC_BUILTIN_STVEHX: | |
16278 | return altivec_expand_stv_builtin (CODE_FOR_altivec_stvehx, exp); | |
16279 | case ALTIVEC_BUILTIN_STVEWX: | |
16280 | return altivec_expand_stv_builtin (CODE_FOR_altivec_stvewx, exp); | |
16281 | case ALTIVEC_BUILTIN_STVXL_V2DF: | |
16282 | return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2df, exp); | |
16283 | case ALTIVEC_BUILTIN_STVXL_V2DI: | |
16284 | return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2di, exp); | |
16285 | case ALTIVEC_BUILTIN_STVXL_V4SF: | |
16286 | return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4sf, exp); | |
16287 | case ALTIVEC_BUILTIN_STVXL: | |
16288 | case ALTIVEC_BUILTIN_STVXL_V4SI: | |
16289 | return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4si, exp); | |
16290 | case ALTIVEC_BUILTIN_STVXL_V8HI: | |
16291 | return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v8hi, exp); | |
16292 | case ALTIVEC_BUILTIN_STVXL_V16QI: | |
16293 | return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v16qi, exp); | |
16294 | ||
16295 | case ALTIVEC_BUILTIN_STVLX: | |
16296 | return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlx, exp); | |
16297 | case ALTIVEC_BUILTIN_STVLXL: | |
16298 | return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlxl, exp); | |
16299 | case ALTIVEC_BUILTIN_STVRX: | |
16300 | return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrx, exp); | |
16301 | case ALTIVEC_BUILTIN_STVRXL: | |
16302 | return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrxl, exp); | |
16303 | ||
16304 | case P9V_BUILTIN_STXVL: | |
16305 | return altivec_expand_stxvl_builtin (CODE_FOR_stxvl, exp); | |
16306 | ||
16307 | case VSX_BUILTIN_STXVD2X_V1TI: | |
16308 | return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v1ti, exp); | |
16309 | case VSX_BUILTIN_STXVD2X_V2DF: | |
16310 | return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2df, exp); | |
16311 | case VSX_BUILTIN_STXVD2X_V2DI: | |
16312 | return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2di, exp); | |
16313 | case VSX_BUILTIN_STXVW4X_V4SF: | |
16314 | return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4sf, exp); | |
16315 | case VSX_BUILTIN_STXVW4X_V4SI: | |
16316 | return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4si, exp); | |
16317 | case VSX_BUILTIN_STXVW4X_V8HI: | |
16318 | return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v8hi, exp); | |
16319 | case VSX_BUILTIN_STXVW4X_V16QI: | |
16320 | return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v16qi, exp); | |
16321 | ||
16322 | /* For the following on big endian, it's ok to use any appropriate | |
16323 | unaligned-supporting store, so use a generic expander. For | |
16324 | little-endian, the exact element-reversing instruction must | |
16325 | be used. */ | |
16326 | case VSX_BUILTIN_ST_ELEMREV_V2DF: | |
16327 | { | |
16328 | enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2df | |
16329 | : CODE_FOR_vsx_st_elemrev_v2df); | |
16330 | return altivec_expand_stv_builtin (code, exp); | |
16331 | } | |
16332 | case VSX_BUILTIN_ST_ELEMREV_V2DI: | |
16333 | { | |
16334 | enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2di | |
16335 | : CODE_FOR_vsx_st_elemrev_v2di); | |
16336 | return altivec_expand_stv_builtin (code, exp); | |
16337 | } | |
16338 | case VSX_BUILTIN_ST_ELEMREV_V4SF: | |
16339 | { | |
16340 | enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4sf | |
16341 | : CODE_FOR_vsx_st_elemrev_v4sf); | |
16342 | return altivec_expand_stv_builtin (code, exp); | |
16343 | } | |
16344 | case VSX_BUILTIN_ST_ELEMREV_V4SI: | |
16345 | { | |
16346 | enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4si | |
16347 | : CODE_FOR_vsx_st_elemrev_v4si); | |
16348 | return altivec_expand_stv_builtin (code, exp); | |
16349 | } | |
16350 | case VSX_BUILTIN_ST_ELEMREV_V8HI: | |
16351 | { | |
16352 | enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v8hi | |
16353 | : CODE_FOR_vsx_st_elemrev_v8hi); | |
16354 | return altivec_expand_stv_builtin (code, exp); | |
16355 | } | |
16356 | case VSX_BUILTIN_ST_ELEMREV_V16QI: | |
16357 | { | |
16358 | enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v16qi | |
16359 | : CODE_FOR_vsx_st_elemrev_v16qi); | |
16360 | return altivec_expand_stv_builtin (code, exp); | |
16361 | } | |
16362 | ||
16363 | case ALTIVEC_BUILTIN_MFVSCR: | |
16364 | icode = CODE_FOR_altivec_mfvscr; | |
16365 | tmode = insn_data[icode].operand[0].mode; | |
16366 | ||
16367 | if (target == 0 | |
16368 | || GET_MODE (target) != tmode | |
16369 | || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) | |
16370 | target = gen_reg_rtx (tmode); | |
16371 | ||
16372 | pat = GEN_FCN (icode) (target); | |
16373 | if (! pat) | |
16374 | return 0; | |
16375 | emit_insn (pat); | |
16376 | return target; | |
16377 | ||
16378 | case ALTIVEC_BUILTIN_MTVSCR: | |
16379 | icode = CODE_FOR_altivec_mtvscr; | |
16380 | arg0 = CALL_EXPR_ARG (exp, 0); | |
16381 | op0 = expand_normal (arg0); | |
16382 | mode0 = insn_data[icode].operand[0].mode; | |
16383 | ||
16384 | /* If we got invalid arguments bail out before generating bad rtl. */ | |
16385 | if (arg0 == error_mark_node) | |
16386 | return const0_rtx; | |
16387 | ||
16388 | if (! (*insn_data[icode].operand[0].predicate) (op0, mode0)) | |
16389 | op0 = copy_to_mode_reg (mode0, op0); | |
16390 | ||
16391 | pat = GEN_FCN (icode) (op0); | |
16392 | if (pat) | |
16393 | emit_insn (pat); | |
16394 | return NULL_RTX; | |
16395 | ||
16396 | case ALTIVEC_BUILTIN_DSSALL: | |
16397 | emit_insn (gen_altivec_dssall ()); | |
16398 | return NULL_RTX; | |
16399 | ||
16400 | case ALTIVEC_BUILTIN_DSS: | |
16401 | icode = CODE_FOR_altivec_dss; | |
16402 | arg0 = CALL_EXPR_ARG (exp, 0); | |
16403 | STRIP_NOPS (arg0); | |
16404 | op0 = expand_normal (arg0); | |
16405 | mode0 = insn_data[icode].operand[0].mode; | |
16406 | ||
16407 | /* If we got invalid arguments bail out before generating bad rtl. */ | |
16408 | if (arg0 == error_mark_node) | |
16409 | return const0_rtx; | |
16410 | ||
16411 | if (TREE_CODE (arg0) != INTEGER_CST | |
16412 | || TREE_INT_CST_LOW (arg0) & ~0x3) | |
16413 | { | |
16414 | error ("argument to dss must be a 2-bit unsigned literal"); | |
16415 | return const0_rtx; | |
16416 | } | |
16417 | ||
16418 | if (! (*insn_data[icode].operand[0].predicate) (op0, mode0)) | |
16419 | op0 = copy_to_mode_reg (mode0, op0); | |
16420 | ||
16421 | emit_insn (gen_altivec_dss (op0)); | |
16422 | return NULL_RTX; | |
16423 | ||
16424 | case ALTIVEC_BUILTIN_VEC_INIT_V4SI: | |
16425 | case ALTIVEC_BUILTIN_VEC_INIT_V8HI: | |
16426 | case ALTIVEC_BUILTIN_VEC_INIT_V16QI: | |
16427 | case ALTIVEC_BUILTIN_VEC_INIT_V4SF: | |
16428 | case VSX_BUILTIN_VEC_INIT_V2DF: | |
16429 | case VSX_BUILTIN_VEC_INIT_V2DI: | |
16430 | case VSX_BUILTIN_VEC_INIT_V1TI: | |
16431 | return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target); | |
16432 | ||
16433 | case ALTIVEC_BUILTIN_VEC_SET_V4SI: | |
16434 | case ALTIVEC_BUILTIN_VEC_SET_V8HI: | |
16435 | case ALTIVEC_BUILTIN_VEC_SET_V16QI: | |
16436 | case ALTIVEC_BUILTIN_VEC_SET_V4SF: | |
16437 | case VSX_BUILTIN_VEC_SET_V2DF: | |
16438 | case VSX_BUILTIN_VEC_SET_V2DI: | |
16439 | case VSX_BUILTIN_VEC_SET_V1TI: | |
16440 | return altivec_expand_vec_set_builtin (exp); | |
16441 | ||
16442 | case ALTIVEC_BUILTIN_VEC_EXT_V4SI: | |
16443 | case ALTIVEC_BUILTIN_VEC_EXT_V8HI: | |
16444 | case ALTIVEC_BUILTIN_VEC_EXT_V16QI: | |
16445 | case ALTIVEC_BUILTIN_VEC_EXT_V4SF: | |
16446 | case VSX_BUILTIN_VEC_EXT_V2DF: | |
16447 | case VSX_BUILTIN_VEC_EXT_V2DI: | |
16448 | case VSX_BUILTIN_VEC_EXT_V1TI: | |
16449 | return altivec_expand_vec_ext_builtin (exp, target); | |
16450 | ||
16451 | case P9V_BUILTIN_VEXTRACT4B: | |
16452 | case P9V_BUILTIN_VEC_VEXTRACT4B: | |
16453 | arg1 = CALL_EXPR_ARG (exp, 1); | |
16454 | STRIP_NOPS (arg1); | |
16455 | ||
16456 | /* Generate a normal call if it is invalid. */ | |
16457 | if (arg1 == error_mark_node) | |
16458 | return expand_call (exp, target, false); | |
16459 | ||
16460 | if (TREE_CODE (arg1) != INTEGER_CST || TREE_INT_CST_LOW (arg1) > 12) | |
16461 | { | |
16462 | error ("second argument to vec_vextract4b must be 0..12"); | |
16463 | return expand_call (exp, target, false); | |
16464 | } | |
16465 | break; | |
16466 | ||
16467 | case P9V_BUILTIN_VINSERT4B: | |
16468 | case P9V_BUILTIN_VINSERT4B_DI: | |
16469 | case P9V_BUILTIN_VEC_VINSERT4B: | |
16470 | arg2 = CALL_EXPR_ARG (exp, 2); | |
16471 | STRIP_NOPS (arg2); | |
16472 | ||
16473 | /* Generate a normal call if it is invalid. */ | |
16474 | if (arg2 == error_mark_node) | |
16475 | return expand_call (exp, target, false); | |
16476 | ||
16477 | if (TREE_CODE (arg2) != INTEGER_CST || TREE_INT_CST_LOW (arg2) > 12) | |
16478 | { | |
16479 | error ("third argument to vec_vinsert4b must be 0..12"); | |
16480 | return expand_call (exp, target, false); | |
16481 | } | |
16482 | break; | |
16483 | ||
16484 | default: | |
16485 | break; | |
16486 | /* Fall through. */ | |
16487 | } | |
16488 | ||
16489 | /* Expand abs* operations. */ | |
16490 | d = bdesc_abs; | |
16491 | for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++) | |
16492 | if (d->code == fcode) | |
16493 | return altivec_expand_abs_builtin (d->icode, exp, target); | |
16494 | ||
16495 | /* Expand the AltiVec predicates. */ | |
16496 | d = bdesc_altivec_preds; | |
16497 | for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++) | |
16498 | if (d->code == fcode) | |
16499 | return altivec_expand_predicate_builtin (d->icode, exp, target); | |
16500 | ||
16501 | /* LV* are funky. We initialized them differently. */ | |
16502 | switch (fcode) | |
16503 | { | |
16504 | case ALTIVEC_BUILTIN_LVSL: | |
16505 | return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsl, | |
16506 | exp, target, false); | |
16507 | case ALTIVEC_BUILTIN_LVSR: | |
16508 | return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsr, | |
16509 | exp, target, false); | |
16510 | case ALTIVEC_BUILTIN_LVEBX: | |
16511 | return altivec_expand_lv_builtin (CODE_FOR_altivec_lvebx, | |
16512 | exp, target, false); | |
16513 | case ALTIVEC_BUILTIN_LVEHX: | |
16514 | return altivec_expand_lv_builtin (CODE_FOR_altivec_lvehx, | |
16515 | exp, target, false); | |
16516 | case ALTIVEC_BUILTIN_LVEWX: | |
16517 | return altivec_expand_lv_builtin (CODE_FOR_altivec_lvewx, | |
16518 | exp, target, false); | |
16519 | case ALTIVEC_BUILTIN_LVXL_V2DF: | |
16520 | return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2df, | |
16521 | exp, target, false); | |
16522 | case ALTIVEC_BUILTIN_LVXL_V2DI: | |
16523 | return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2di, | |
16524 | exp, target, false); | |
16525 | case ALTIVEC_BUILTIN_LVXL_V4SF: | |
16526 | return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4sf, | |
16527 | exp, target, false); | |
16528 | case ALTIVEC_BUILTIN_LVXL: | |
16529 | case ALTIVEC_BUILTIN_LVXL_V4SI: | |
16530 | return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4si, | |
16531 | exp, target, false); | |
16532 | case ALTIVEC_BUILTIN_LVXL_V8HI: | |
16533 | return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v8hi, | |
16534 | exp, target, false); | |
16535 | case ALTIVEC_BUILTIN_LVXL_V16QI: | |
16536 | return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v16qi, | |
16537 | exp, target, false); | |
16538 | case ALTIVEC_BUILTIN_LVX_V2DF: | |
16539 | return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2df_2op, | |
16540 | exp, target, false); | |
16541 | case ALTIVEC_BUILTIN_LVX_V2DI: | |
16542 | return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2di_2op, | |
16543 | exp, target, false); | |
16544 | case ALTIVEC_BUILTIN_LVX_V4SF: | |
16545 | return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4sf_2op, | |
16546 | exp, target, false); | |
16547 | case ALTIVEC_BUILTIN_LVX: | |
16548 | case ALTIVEC_BUILTIN_LVX_V4SI: | |
16549 | return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4si_2op, | |
16550 | exp, target, false); | |
16551 | case ALTIVEC_BUILTIN_LVX_V8HI: | |
16552 | return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v8hi_2op, | |
16553 | exp, target, false); | |
16554 | case ALTIVEC_BUILTIN_LVX_V16QI: | |
16555 | return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v16qi_2op, | |
16556 | exp, target, false); | |
16557 | case ALTIVEC_BUILTIN_LVLX: | |
16558 | return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlx, | |
16559 | exp, target, true); | |
16560 | case ALTIVEC_BUILTIN_LVLXL: | |
16561 | return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlxl, | |
16562 | exp, target, true); | |
16563 | case ALTIVEC_BUILTIN_LVRX: | |
16564 | return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrx, | |
16565 | exp, target, true); | |
16566 | case ALTIVEC_BUILTIN_LVRXL: | |
16567 | return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrxl, | |
16568 | exp, target, true); | |
16569 | case VSX_BUILTIN_LXVD2X_V1TI: | |
16570 | return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v1ti, | |
16571 | exp, target, false); | |
16572 | case VSX_BUILTIN_LXVD2X_V2DF: | |
16573 | return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2df, | |
16574 | exp, target, false); | |
16575 | case VSX_BUILTIN_LXVD2X_V2DI: | |
16576 | return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2di, | |
16577 | exp, target, false); | |
16578 | case VSX_BUILTIN_LXVW4X_V4SF: | |
16579 | return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4sf, | |
16580 | exp, target, false); | |
16581 | case VSX_BUILTIN_LXVW4X_V4SI: | |
16582 | return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4si, | |
16583 | exp, target, false); | |
16584 | case VSX_BUILTIN_LXVW4X_V8HI: | |
16585 | return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v8hi, | |
16586 | exp, target, false); | |
16587 | case VSX_BUILTIN_LXVW4X_V16QI: | |
16588 | return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v16qi, | |
16589 | exp, target, false); | |
16590 | /* For the following on big endian, it's ok to use any appropriate | |
16591 | unaligned-supporting load, so use a generic expander. For | |
16592 | little-endian, the exact element-reversing instruction must | |
16593 | be used. */ | |
16594 | case VSX_BUILTIN_LD_ELEMREV_V2DF: | |
16595 | { | |
16596 | enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2df | |
16597 | : CODE_FOR_vsx_ld_elemrev_v2df); | |
16598 | return altivec_expand_lv_builtin (code, exp, target, false); | |
16599 | } | |
16600 | case VSX_BUILTIN_LD_ELEMREV_V2DI: | |
16601 | { | |
16602 | enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2di | |
16603 | : CODE_FOR_vsx_ld_elemrev_v2di); | |
16604 | return altivec_expand_lv_builtin (code, exp, target, false); | |
16605 | } | |
16606 | case VSX_BUILTIN_LD_ELEMREV_V4SF: | |
16607 | { | |
16608 | enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4sf | |
16609 | : CODE_FOR_vsx_ld_elemrev_v4sf); | |
16610 | return altivec_expand_lv_builtin (code, exp, target, false); | |
16611 | } | |
16612 | case VSX_BUILTIN_LD_ELEMREV_V4SI: | |
16613 | { | |
16614 | enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4si | |
16615 | : CODE_FOR_vsx_ld_elemrev_v4si); | |
16616 | return altivec_expand_lv_builtin (code, exp, target, false); | |
16617 | } | |
16618 | case VSX_BUILTIN_LD_ELEMREV_V8HI: | |
16619 | { | |
16620 | enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v8hi | |
16621 | : CODE_FOR_vsx_ld_elemrev_v8hi); | |
16622 | return altivec_expand_lv_builtin (code, exp, target, false); | |
16623 | } | |
16624 | case VSX_BUILTIN_LD_ELEMREV_V16QI: | |
16625 | { | |
16626 | enum insn_code code = (BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v16qi | |
16627 | : CODE_FOR_vsx_ld_elemrev_v16qi); | |
16628 | return altivec_expand_lv_builtin (code, exp, target, false); | |
16629 | } | |
16630 | break; | |
16631 | default: | |
16632 | break; | |
16633 | /* Fall through. */ | |
16634 | } | |
16635 | ||
16636 | *expandedp = false; | |
16637 | return NULL_RTX; | |
16638 | } | |
16639 | ||
16640 | /* Expand the builtin in EXP and store the result in TARGET. Store | |
16641 | true in *EXPANDEDP if we found a builtin to expand. */ | |
16642 | static rtx | |
16643 | paired_expand_builtin (tree exp, rtx target, bool * expandedp) | |
16644 | { | |
16645 | tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); | |
16646 | enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl); | |
16647 | const struct builtin_description *d; | |
16648 | size_t i; | |
16649 | ||
16650 | *expandedp = true; | |
16651 | ||
16652 | switch (fcode) | |
16653 | { | |
16654 | case PAIRED_BUILTIN_STX: | |
16655 | return paired_expand_stv_builtin (CODE_FOR_paired_stx, exp); | |
16656 | case PAIRED_BUILTIN_LX: | |
16657 | return paired_expand_lv_builtin (CODE_FOR_paired_lx, exp, target); | |
16658 | default: | |
16659 | break; | |
16660 | /* Fall through. */ | |
16661 | } | |
16662 | ||
16663 | /* Expand the paired predicates. */ | |
16664 | d = bdesc_paired_preds; | |
16665 | for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); i++, d++) | |
16666 | if (d->code == fcode) | |
16667 | return paired_expand_predicate_builtin (d->icode, exp, target); | |
16668 | ||
16669 | *expandedp = false; | |
16670 | return NULL_RTX; | |
16671 | } | |
16672 | ||
16673 | /* Binops that need to be initialized manually, but can be expanded | |
16674 | automagically by rs6000_expand_binop_builtin. */ | |
16675 | static const struct builtin_description bdesc_2arg_spe[] = | |
16676 | { | |
16677 | { RS6000_BTM_SPE, CODE_FOR_spe_evlddx, "__builtin_spe_evlddx", SPE_BUILTIN_EVLDDX }, | |
16678 | { RS6000_BTM_SPE, CODE_FOR_spe_evldwx, "__builtin_spe_evldwx", SPE_BUILTIN_EVLDWX }, | |
16679 | { RS6000_BTM_SPE, CODE_FOR_spe_evldhx, "__builtin_spe_evldhx", SPE_BUILTIN_EVLDHX }, | |
16680 | { RS6000_BTM_SPE, CODE_FOR_spe_evlwhex, "__builtin_spe_evlwhex", SPE_BUILTIN_EVLWHEX }, | |
16681 | { RS6000_BTM_SPE, CODE_FOR_spe_evlwhoux, "__builtin_spe_evlwhoux", SPE_BUILTIN_EVLWHOUX }, | |
16682 | { RS6000_BTM_SPE, CODE_FOR_spe_evlwhosx, "__builtin_spe_evlwhosx", SPE_BUILTIN_EVLWHOSX }, | |
16683 | { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplatx, "__builtin_spe_evlwwsplatx", SPE_BUILTIN_EVLWWSPLATX }, | |
16684 | { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplatx, "__builtin_spe_evlwhsplatx", SPE_BUILTIN_EVLWHSPLATX }, | |
16685 | { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplatx, "__builtin_spe_evlhhesplatx", SPE_BUILTIN_EVLHHESPLATX }, | |
16686 | { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplatx, "__builtin_spe_evlhhousplatx", SPE_BUILTIN_EVLHHOUSPLATX }, | |
16687 | { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplatx, "__builtin_spe_evlhhossplatx", SPE_BUILTIN_EVLHHOSSPLATX }, | |
16688 | { RS6000_BTM_SPE, CODE_FOR_spe_evldd, "__builtin_spe_evldd", SPE_BUILTIN_EVLDD }, | |
16689 | { RS6000_BTM_SPE, CODE_FOR_spe_evldw, "__builtin_spe_evldw", SPE_BUILTIN_EVLDW }, | |
16690 | { RS6000_BTM_SPE, CODE_FOR_spe_evldh, "__builtin_spe_evldh", SPE_BUILTIN_EVLDH }, | |
16691 | { RS6000_BTM_SPE, CODE_FOR_spe_evlwhe, "__builtin_spe_evlwhe", SPE_BUILTIN_EVLWHE }, | |
16692 | { RS6000_BTM_SPE, CODE_FOR_spe_evlwhou, "__builtin_spe_evlwhou", SPE_BUILTIN_EVLWHOU }, | |
16693 | { RS6000_BTM_SPE, CODE_FOR_spe_evlwhos, "__builtin_spe_evlwhos", SPE_BUILTIN_EVLWHOS }, | |
16694 | { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplat, "__builtin_spe_evlwwsplat", SPE_BUILTIN_EVLWWSPLAT }, | |
16695 | { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplat, "__builtin_spe_evlwhsplat", SPE_BUILTIN_EVLWHSPLAT }, | |
16696 | { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplat, "__builtin_spe_evlhhesplat", SPE_BUILTIN_EVLHHESPLAT }, | |
16697 | { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplat, "__builtin_spe_evlhhousplat", SPE_BUILTIN_EVLHHOUSPLAT }, | |
16698 | { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplat, "__builtin_spe_evlhhossplat", SPE_BUILTIN_EVLHHOSSPLAT } | |
16699 | }; | |
16700 | ||
16701 | /* Expand the builtin in EXP and store the result in TARGET. Store | |
16702 | true in *EXPANDEDP if we found a builtin to expand. | |
16703 | ||
16704 | This expands the SPE builtins that are not simple unary and binary | |
16705 | operations. */ | |
16706 | static rtx | |
16707 | spe_expand_builtin (tree exp, rtx target, bool *expandedp) | |
16708 | { | |
16709 | tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); | |
16710 | tree arg1, arg0; | |
16711 | enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl); | |
16712 | enum insn_code icode; | |
16713 | machine_mode tmode, mode0; | |
16714 | rtx pat, op0; | |
16715 | const struct builtin_description *d; | |
16716 | size_t i; | |
16717 | ||
16718 | *expandedp = true; | |
16719 | ||
16720 | /* Syntax check for a 5-bit unsigned immediate. */ | |
16721 | switch (fcode) | |
16722 | { | |
16723 | case SPE_BUILTIN_EVSTDD: | |
16724 | case SPE_BUILTIN_EVSTDH: | |
16725 | case SPE_BUILTIN_EVSTDW: | |
16726 | case SPE_BUILTIN_EVSTWHE: | |
16727 | case SPE_BUILTIN_EVSTWHO: | |
16728 | case SPE_BUILTIN_EVSTWWE: | |
16729 | case SPE_BUILTIN_EVSTWWO: | |
16730 | arg1 = CALL_EXPR_ARG (exp, 2); | |
16731 | if (TREE_CODE (arg1) != INTEGER_CST | |
16732 | || TREE_INT_CST_LOW (arg1) & ~0x1f) | |
16733 | { | |
16734 | error ("argument 2 must be a 5-bit unsigned literal"); | |
16735 | return const0_rtx; | |
16736 | } | |
16737 | break; | |
16738 | default: | |
16739 | break; | |
16740 | } | |
16741 | ||
16742 | /* The evsplat*i instructions are not quite generic. */ | |
16743 | switch (fcode) | |
16744 | { | |
16745 | case SPE_BUILTIN_EVSPLATFI: | |
16746 | return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplatfi, | |
16747 | exp, target); | |
16748 | case SPE_BUILTIN_EVSPLATI: | |
16749 | return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplati, | |
16750 | exp, target); | |
16751 | default: | |
16752 | break; | |
16753 | } | |
16754 | ||
16755 | d = bdesc_2arg_spe; | |
16756 | for (i = 0; i < ARRAY_SIZE (bdesc_2arg_spe); ++i, ++d) | |
16757 | if (d->code == fcode) | |
16758 | return rs6000_expand_binop_builtin (d->icode, exp, target); | |
16759 | ||
16760 | d = bdesc_spe_predicates; | |
16761 | for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, ++d) | |
16762 | if (d->code == fcode) | |
16763 | return spe_expand_predicate_builtin (d->icode, exp, target); | |
16764 | ||
16765 | d = bdesc_spe_evsel; | |
16766 | for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, ++d) | |
16767 | if (d->code == fcode) | |
16768 | return spe_expand_evsel_builtin (d->icode, exp, target); | |
16769 | ||
16770 | switch (fcode) | |
16771 | { | |
16772 | case SPE_BUILTIN_EVSTDDX: | |
16773 | return spe_expand_stv_builtin (CODE_FOR_spe_evstddx, exp); | |
16774 | case SPE_BUILTIN_EVSTDHX: | |
16775 | return spe_expand_stv_builtin (CODE_FOR_spe_evstdhx, exp); | |
16776 | case SPE_BUILTIN_EVSTDWX: | |
16777 | return spe_expand_stv_builtin (CODE_FOR_spe_evstdwx, exp); | |
16778 | case SPE_BUILTIN_EVSTWHEX: | |
16779 | return spe_expand_stv_builtin (CODE_FOR_spe_evstwhex, exp); | |
16780 | case SPE_BUILTIN_EVSTWHOX: | |
16781 | return spe_expand_stv_builtin (CODE_FOR_spe_evstwhox, exp); | |
16782 | case SPE_BUILTIN_EVSTWWEX: | |
16783 | return spe_expand_stv_builtin (CODE_FOR_spe_evstwwex, exp); | |
16784 | case SPE_BUILTIN_EVSTWWOX: | |
16785 | return spe_expand_stv_builtin (CODE_FOR_spe_evstwwox, exp); | |
16786 | case SPE_BUILTIN_EVSTDD: | |
16787 | return spe_expand_stv_builtin (CODE_FOR_spe_evstdd, exp); | |
16788 | case SPE_BUILTIN_EVSTDH: | |
16789 | return spe_expand_stv_builtin (CODE_FOR_spe_evstdh, exp); | |
16790 | case SPE_BUILTIN_EVSTDW: | |
16791 | return spe_expand_stv_builtin (CODE_FOR_spe_evstdw, exp); | |
16792 | case SPE_BUILTIN_EVSTWHE: | |
16793 | return spe_expand_stv_builtin (CODE_FOR_spe_evstwhe, exp); | |
16794 | case SPE_BUILTIN_EVSTWHO: | |
16795 | return spe_expand_stv_builtin (CODE_FOR_spe_evstwho, exp); | |
16796 | case SPE_BUILTIN_EVSTWWE: | |
16797 | return spe_expand_stv_builtin (CODE_FOR_spe_evstwwe, exp); | |
16798 | case SPE_BUILTIN_EVSTWWO: | |
16799 | return spe_expand_stv_builtin (CODE_FOR_spe_evstwwo, exp); | |
16800 | case SPE_BUILTIN_MFSPEFSCR: | |
16801 | icode = CODE_FOR_spe_mfspefscr; | |
16802 | tmode = insn_data[icode].operand[0].mode; | |
16803 | ||
16804 | if (target == 0 | |
16805 | || GET_MODE (target) != tmode | |
16806 | || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) | |
16807 | target = gen_reg_rtx (tmode); | |
16808 | ||
16809 | pat = GEN_FCN (icode) (target); | |
16810 | if (! pat) | |
16811 | return 0; | |
16812 | emit_insn (pat); | |
16813 | return target; | |
16814 | case SPE_BUILTIN_MTSPEFSCR: | |
16815 | icode = CODE_FOR_spe_mtspefscr; | |
16816 | arg0 = CALL_EXPR_ARG (exp, 0); | |
16817 | op0 = expand_normal (arg0); | |
16818 | mode0 = insn_data[icode].operand[0].mode; | |
16819 | ||
16820 | if (arg0 == error_mark_node) | |
16821 | return const0_rtx; | |
16822 | ||
16823 | if (! (*insn_data[icode].operand[0].predicate) (op0, mode0)) | |
16824 | op0 = copy_to_mode_reg (mode0, op0); | |
16825 | ||
16826 | pat = GEN_FCN (icode) (op0); | |
16827 | if (pat) | |
16828 | emit_insn (pat); | |
16829 | return NULL_RTX; | |
16830 | default: | |
16831 | break; | |
16832 | } | |
16833 | ||
16834 | *expandedp = false; | |
16835 | return NULL_RTX; | |
16836 | } | |
16837 | ||
16838 | static rtx | |
16839 | paired_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target) | |
16840 | { | |
16841 | rtx pat, scratch, tmp; | |
16842 | tree form = CALL_EXPR_ARG (exp, 0); | |
16843 | tree arg0 = CALL_EXPR_ARG (exp, 1); | |
16844 | tree arg1 = CALL_EXPR_ARG (exp, 2); | |
16845 | rtx op0 = expand_normal (arg0); | |
16846 | rtx op1 = expand_normal (arg1); | |
16847 | machine_mode mode0 = insn_data[icode].operand[1].mode; | |
16848 | machine_mode mode1 = insn_data[icode].operand[2].mode; | |
16849 | int form_int; | |
16850 | enum rtx_code code; | |
16851 | ||
16852 | if (TREE_CODE (form) != INTEGER_CST) | |
16853 | { | |
16854 | error ("argument 1 of __builtin_paired_predicate must be a constant"); | |
16855 | return const0_rtx; | |
16856 | } | |
16857 | else | |
16858 | form_int = TREE_INT_CST_LOW (form); | |
16859 | ||
16860 | gcc_assert (mode0 == mode1); | |
16861 | ||
16862 | if (arg0 == error_mark_node || arg1 == error_mark_node) | |
16863 | return const0_rtx; | |
16864 | ||
16865 | if (target == 0 | |
16866 | || GET_MODE (target) != SImode | |
16867 | || !(*insn_data[icode].operand[0].predicate) (target, SImode)) | |
16868 | target = gen_reg_rtx (SImode); | |
16869 | if (!(*insn_data[icode].operand[1].predicate) (op0, mode0)) | |
16870 | op0 = copy_to_mode_reg (mode0, op0); | |
16871 | if (!(*insn_data[icode].operand[2].predicate) (op1, mode1)) | |
16872 | op1 = copy_to_mode_reg (mode1, op1); | |
16873 | ||
16874 | scratch = gen_reg_rtx (CCFPmode); | |
16875 | ||
16876 | pat = GEN_FCN (icode) (scratch, op0, op1); | |
16877 | if (!pat) | |
16878 | return const0_rtx; | |
16879 | ||
16880 | emit_insn (pat); | |
16881 | ||
16882 | switch (form_int) | |
16883 | { | |
16884 | /* LT bit. */ | |
16885 | case 0: | |
16886 | code = LT; | |
16887 | break; | |
16888 | /* GT bit. */ | |
16889 | case 1: | |
16890 | code = GT; | |
16891 | break; | |
16892 | /* EQ bit. */ | |
16893 | case 2: | |
16894 | code = EQ; | |
16895 | break; | |
16896 | /* UN bit. */ | |
16897 | case 3: | |
16898 | emit_insn (gen_move_from_CR_ov_bit (target, scratch)); | |
16899 | return target; | |
16900 | default: | |
16901 | error ("argument 1 of __builtin_paired_predicate is out of range"); | |
16902 | return const0_rtx; | |
16903 | } | |
16904 | ||
16905 | tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx); | |
16906 | emit_move_insn (target, tmp); | |
16907 | return target; | |
16908 | } | |
16909 | ||
16910 | static rtx | |
16911 | spe_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target) | |
16912 | { | |
16913 | rtx pat, scratch, tmp; | |
16914 | tree form = CALL_EXPR_ARG (exp, 0); | |
16915 | tree arg0 = CALL_EXPR_ARG (exp, 1); | |
16916 | tree arg1 = CALL_EXPR_ARG (exp, 2); | |
16917 | rtx op0 = expand_normal (arg0); | |
16918 | rtx op1 = expand_normal (arg1); | |
16919 | machine_mode mode0 = insn_data[icode].operand[1].mode; | |
16920 | machine_mode mode1 = insn_data[icode].operand[2].mode; | |
16921 | int form_int; | |
16922 | enum rtx_code code; | |
16923 | ||
16924 | if (TREE_CODE (form) != INTEGER_CST) | |
16925 | { | |
16926 | error ("argument 1 of __builtin_spe_predicate must be a constant"); | |
16927 | return const0_rtx; | |
16928 | } | |
16929 | else | |
16930 | form_int = TREE_INT_CST_LOW (form); | |
16931 | ||
16932 | gcc_assert (mode0 == mode1); | |
16933 | ||
16934 | if (arg0 == error_mark_node || arg1 == error_mark_node) | |
16935 | return const0_rtx; | |
16936 | ||
16937 | if (target == 0 | |
16938 | || GET_MODE (target) != SImode | |
16939 | || ! (*insn_data[icode].operand[0].predicate) (target, SImode)) | |
16940 | target = gen_reg_rtx (SImode); | |
16941 | ||
16942 | if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) | |
16943 | op0 = copy_to_mode_reg (mode0, op0); | |
16944 | if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) | |
16945 | op1 = copy_to_mode_reg (mode1, op1); | |
16946 | ||
16947 | scratch = gen_reg_rtx (CCmode); | |
16948 | ||
16949 | pat = GEN_FCN (icode) (scratch, op0, op1); | |
16950 | if (! pat) | |
16951 | return const0_rtx; | |
16952 | emit_insn (pat); | |
16953 | ||
16954 | /* There are 4 variants for each predicate: _any_, _all_, _upper_, | |
16955 | _lower_. We use one compare, but look in different bits of the | |
16956 | CR for each variant. | |
16957 | ||
16958 | There are 2 elements in each SPE simd type (upper/lower). The CR | |
16959 | bits are set as follows: | |
16960 | ||
16961 | BIT0 | BIT 1 | BIT 2 | BIT 3 | |
16962 | U | L | (U | L) | (U & L) | |
16963 | ||
16964 | So, for an "all" relationship, BIT 3 would be set. | |
16965 | For an "any" relationship, BIT 2 would be set. Etc. | |
16966 | ||
16967 | Following traditional nomenclature, these bits map to: | |
16968 | ||
16969 | BIT0 | BIT 1 | BIT 2 | BIT 3 | |
16970 | LT | GT | EQ | OV | |
16971 | ||
16972 | Later, we will generate rtl to look in the LT/EQ/EQ/OV bits. | |
16973 | */ | |
16974 | ||
16975 | switch (form_int) | |
16976 | { | |
16977 | /* All variant. OV bit. */ | |
16978 | case 0: | |
16979 | /* We need to get to the OV bit, which is the ORDERED bit. We | |
16980 | could generate (ordered:SI (reg:CC xx) (const_int 0)), but | |
16981 | that's ugly and will make validate_condition_mode die. | |
16982 | So let's just use another pattern. */ | |
16983 | emit_insn (gen_move_from_CR_ov_bit (target, scratch)); | |
16984 | return target; | |
16985 | /* Any variant. EQ bit. */ | |
16986 | case 1: | |
16987 | code = EQ; | |
16988 | break; | |
16989 | /* Upper variant. LT bit. */ | |
16990 | case 2: | |
16991 | code = LT; | |
16992 | break; | |
16993 | /* Lower variant. GT bit. */ | |
16994 | case 3: | |
16995 | code = GT; | |
16996 | break; | |
16997 | default: | |
16998 | error ("argument 1 of __builtin_spe_predicate is out of range"); | |
16999 | return const0_rtx; | |
17000 | } | |
17001 | ||
17002 | tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx); | |
17003 | emit_move_insn (target, tmp); | |
17004 | ||
17005 | return target; | |
17006 | } | |
17007 | ||
17008 | /* The evsel builtins look like this: | |
17009 | ||
17010 | e = __builtin_spe_evsel_OP (a, b, c, d); | |
17011 | ||
17012 | and work like this: | |
17013 | ||
17014 | e[upper] = a[upper] *OP* b[upper] ? c[upper] : d[upper]; | |
17015 | e[lower] = a[lower] *OP* b[lower] ? c[lower] : d[lower]; | |
17016 | */ | |
17017 | ||
17018 | static rtx | |
17019 | spe_expand_evsel_builtin (enum insn_code icode, tree exp, rtx target) | |
17020 | { | |
17021 | rtx pat, scratch; | |
17022 | tree arg0 = CALL_EXPR_ARG (exp, 0); | |
17023 | tree arg1 = CALL_EXPR_ARG (exp, 1); | |
17024 | tree arg2 = CALL_EXPR_ARG (exp, 2); | |
17025 | tree arg3 = CALL_EXPR_ARG (exp, 3); | |
17026 | rtx op0 = expand_normal (arg0); | |
17027 | rtx op1 = expand_normal (arg1); | |
17028 | rtx op2 = expand_normal (arg2); | |
17029 | rtx op3 = expand_normal (arg3); | |
17030 | machine_mode mode0 = insn_data[icode].operand[1].mode; | |
17031 | machine_mode mode1 = insn_data[icode].operand[2].mode; | |
17032 | ||
17033 | gcc_assert (mode0 == mode1); | |
17034 | ||
17035 | if (arg0 == error_mark_node || arg1 == error_mark_node | |
17036 | || arg2 == error_mark_node || arg3 == error_mark_node) | |
17037 | return const0_rtx; | |
17038 | ||
17039 | if (target == 0 | |
17040 | || GET_MODE (target) != mode0 | |
17041 | || ! (*insn_data[icode].operand[0].predicate) (target, mode0)) | |
17042 | target = gen_reg_rtx (mode0); | |
17043 | ||
17044 | if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) | |
17045 | op0 = copy_to_mode_reg (mode0, op0); | |
17046 | if (! (*insn_data[icode].operand[1].predicate) (op1, mode1)) | |
17047 | op1 = copy_to_mode_reg (mode0, op1); | |
17048 | if (! (*insn_data[icode].operand[1].predicate) (op2, mode1)) | |
17049 | op2 = copy_to_mode_reg (mode0, op2); | |
17050 | if (! (*insn_data[icode].operand[1].predicate) (op3, mode1)) | |
17051 | op3 = copy_to_mode_reg (mode0, op3); | |
17052 | ||
17053 | /* Generate the compare. */ | |
17054 | scratch = gen_reg_rtx (CCmode); | |
17055 | pat = GEN_FCN (icode) (scratch, op0, op1); | |
17056 | if (! pat) | |
17057 | return const0_rtx; | |
17058 | emit_insn (pat); | |
17059 | ||
17060 | if (mode0 == V2SImode) | |
17061 | emit_insn (gen_spe_evsel (target, op2, op3, scratch)); | |
17062 | else | |
17063 | emit_insn (gen_spe_evsel_fs (target, op2, op3, scratch)); | |
17064 | ||
17065 | return target; | |
17066 | } | |
17067 | ||
17068 | /* Raise an error message for a builtin function that is called without the | |
17069 | appropriate target options being set. */ | |
17070 | ||
17071 | static void | |
17072 | rs6000_invalid_builtin (enum rs6000_builtins fncode) | |
17073 | { | |
17074 | size_t uns_fncode = (size_t)fncode; | |
17075 | const char *name = rs6000_builtin_info[uns_fncode].name; | |
17076 | HOST_WIDE_INT fnmask = rs6000_builtin_info[uns_fncode].mask; | |
17077 | ||
17078 | gcc_assert (name != NULL); | |
17079 | if ((fnmask & RS6000_BTM_CELL) != 0) | |
17080 | error ("Builtin function %s is only valid for the cell processor", name); | |
17081 | else if ((fnmask & RS6000_BTM_VSX) != 0) | |
17082 | error ("Builtin function %s requires the -mvsx option", name); | |
17083 | else if ((fnmask & RS6000_BTM_HTM) != 0) | |
17084 | error ("Builtin function %s requires the -mhtm option", name); | |
17085 | else if ((fnmask & RS6000_BTM_ALTIVEC) != 0) | |
17086 | error ("Builtin function %s requires the -maltivec option", name); | |
17087 | else if ((fnmask & RS6000_BTM_PAIRED) != 0) | |
17088 | error ("Builtin function %s requires the -mpaired option", name); | |
17089 | else if ((fnmask & RS6000_BTM_SPE) != 0) | |
17090 | error ("Builtin function %s requires the -mspe option", name); | |
17091 | else if ((fnmask & (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR)) | |
17092 | == (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR)) | |
17093 | error ("Builtin function %s requires the -mhard-dfp and" | |
17094 | " -mpower8-vector options", name); | |
17095 | else if ((fnmask & RS6000_BTM_DFP) != 0) | |
17096 | error ("Builtin function %s requires the -mhard-dfp option", name); | |
17097 | else if ((fnmask & RS6000_BTM_P8_VECTOR) != 0) | |
17098 | error ("Builtin function %s requires the -mpower8-vector option", name); | |
17099 | else if ((fnmask & (RS6000_BTM_P9_VECTOR | RS6000_BTM_64BIT)) | |
17100 | == (RS6000_BTM_P9_VECTOR | RS6000_BTM_64BIT)) | |
17101 | error ("Builtin function %s requires the -mcpu=power9 and" | |
17102 | " -m64 options", name); | |
17103 | else if ((fnmask & RS6000_BTM_P9_VECTOR) != 0) | |
17104 | error ("Builtin function %s requires the -mcpu=power9 option", name); | |
17105 | else if ((fnmask & (RS6000_BTM_P9_MISC | RS6000_BTM_64BIT)) | |
17106 | == (RS6000_BTM_P9_MISC | RS6000_BTM_64BIT)) | |
17107 | error ("Builtin function %s requires the -mcpu=power9 and" | |
17108 | " -m64 options", name); | |
17109 | else if ((fnmask & RS6000_BTM_P9_MISC) == RS6000_BTM_P9_MISC) | |
17110 | error ("Builtin function %s requires the -mcpu=power9 option", name); | |
17111 | else if ((fnmask & (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128)) | |
17112 | == (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128)) | |
17113 | error ("Builtin function %s requires the -mhard-float and" | |
17114 | " -mlong-double-128 options", name); | |
17115 | else if ((fnmask & RS6000_BTM_HARD_FLOAT) != 0) | |
17116 | error ("Builtin function %s requires the -mhard-float option", name); | |
17117 | else if ((fnmask & RS6000_BTM_FLOAT128) != 0) | |
17118 | error ("Builtin function %s requires the -mfloat128 option", name); | |
17119 | else | |
17120 | error ("Builtin function %s is not supported with the current options", | |
17121 | name); | |
17122 | } | |
17123 | ||
17124 | /* Target hook for early folding of built-ins, shamelessly stolen | |
17125 | from ia64.c. */ | |
17126 | ||
17127 | static tree | |
17128 | rs6000_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED, | |
17129 | tree *args, bool ignore ATTRIBUTE_UNUSED) | |
17130 | { | |
17131 | if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD) | |
17132 | { | |
17133 | enum rs6000_builtins fn_code | |
17134 | = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl); | |
17135 | switch (fn_code) | |
17136 | { | |
17137 | case RS6000_BUILTIN_NANQ: | |
17138 | case RS6000_BUILTIN_NANSQ: | |
17139 | { | |
17140 | tree type = TREE_TYPE (TREE_TYPE (fndecl)); | |
17141 | const char *str = c_getstr (*args); | |
17142 | int quiet = fn_code == RS6000_BUILTIN_NANQ; | |
17143 | REAL_VALUE_TYPE real; | |
17144 | ||
17145 | if (str && real_nan (&real, str, quiet, TYPE_MODE (type))) | |
17146 | return build_real (type, real); | |
17147 | return NULL_TREE; | |
17148 | } | |
17149 | case RS6000_BUILTIN_INFQ: | |
17150 | case RS6000_BUILTIN_HUGE_VALQ: | |
17151 | { | |
17152 | tree type = TREE_TYPE (TREE_TYPE (fndecl)); | |
17153 | REAL_VALUE_TYPE inf; | |
17154 | real_inf (&inf); | |
17155 | return build_real (type, inf); | |
17156 | } | |
17157 | default: | |
17158 | break; | |
17159 | } | |
17160 | } | |
17161 | #ifdef SUBTARGET_FOLD_BUILTIN | |
17162 | return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore); | |
17163 | #else | |
17164 | return NULL_TREE; | |
17165 | #endif | |
17166 | } | |
17167 | ||
17168 | /* Fold a machine-dependent built-in in GIMPLE. (For folding into | |
17169 | a constant, use rs6000_fold_builtin.) */ | |
17170 | ||
17171 | bool | |
17172 | rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) | |
17173 | { | |
17174 | gimple *stmt = gsi_stmt (*gsi); | |
17175 | tree fndecl = gimple_call_fndecl (stmt); | |
17176 | gcc_checking_assert (fndecl && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD); | |
17177 | enum rs6000_builtins fn_code | |
17178 | = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl); | |
17179 | tree arg0, arg1, lhs; | |
17180 | ||
17181 | switch (fn_code) | |
17182 | { | |
17183 | /* Flavors of vec_add. We deliberately don't expand | |
17184 | P8V_BUILTIN_VADDUQM as it gets lowered from V1TImode to | |
17185 | TImode, resulting in much poorer code generation. */ | |
17186 | case ALTIVEC_BUILTIN_VADDUBM: | |
17187 | case ALTIVEC_BUILTIN_VADDUHM: | |
17188 | case ALTIVEC_BUILTIN_VADDUWM: | |
17189 | case P8V_BUILTIN_VADDUDM: | |
17190 | case ALTIVEC_BUILTIN_VADDFP: | |
17191 | case VSX_BUILTIN_XVADDDP: | |
17192 | { | |
17193 | arg0 = gimple_call_arg (stmt, 0); | |
17194 | arg1 = gimple_call_arg (stmt, 1); | |
17195 | lhs = gimple_call_lhs (stmt); | |
17196 | gimple *g = gimple_build_assign (lhs, PLUS_EXPR, arg0, arg1); | |
17197 | gimple_set_location (g, gimple_location (stmt)); | |
17198 | gsi_replace (gsi, g, true); | |
17199 | return true; | |
17200 | } | |
17201 | /* Flavors of vec_sub. We deliberately don't expand | |
17202 | P8V_BUILTIN_VSUBUQM. */ | |
17203 | case ALTIVEC_BUILTIN_VSUBUBM: | |
17204 | case ALTIVEC_BUILTIN_VSUBUHM: | |
17205 | case ALTIVEC_BUILTIN_VSUBUWM: | |
17206 | case P8V_BUILTIN_VSUBUDM: | |
17207 | case ALTIVEC_BUILTIN_VSUBFP: | |
17208 | case VSX_BUILTIN_XVSUBDP: | |
17209 | { | |
17210 | arg0 = gimple_call_arg (stmt, 0); | |
17211 | arg1 = gimple_call_arg (stmt, 1); | |
17212 | lhs = gimple_call_lhs (stmt); | |
17213 | gimple *g = gimple_build_assign (lhs, MINUS_EXPR, arg0, arg1); | |
17214 | gimple_set_location (g, gimple_location (stmt)); | |
17215 | gsi_replace (gsi, g, true); | |
17216 | return true; | |
17217 | } | |
17218 | case VSX_BUILTIN_XVMULSP: | |
17219 | case VSX_BUILTIN_XVMULDP: | |
17220 | { | |
17221 | arg0 = gimple_call_arg (stmt, 0); | |
17222 | arg1 = gimple_call_arg (stmt, 1); | |
17223 | lhs = gimple_call_lhs (stmt); | |
17224 | gimple *g = gimple_build_assign (lhs, MULT_EXPR, arg0, arg1); | |
17225 | gimple_set_location (g, gimple_location (stmt)); | |
17226 | gsi_replace (gsi, g, true); | |
17227 | return true; | |
17228 | } | |
17229 | /* Even element flavors of vec_mul (signed). */ | |
17230 | case ALTIVEC_BUILTIN_VMULESB: | |
17231 | case ALTIVEC_BUILTIN_VMULESH: | |
17232 | /* Even element flavors of vec_mul (unsigned). */ | |
17233 | case ALTIVEC_BUILTIN_VMULEUB: | |
17234 | case ALTIVEC_BUILTIN_VMULEUH: | |
17235 | { | |
17236 | arg0 = gimple_call_arg (stmt, 0); | |
17237 | arg1 = gimple_call_arg (stmt, 1); | |
17238 | lhs = gimple_call_lhs (stmt); | |
17239 | gimple *g = gimple_build_assign (lhs, VEC_WIDEN_MULT_EVEN_EXPR, arg0, arg1); | |
17240 | gimple_set_location (g, gimple_location (stmt)); | |
17241 | gsi_replace (gsi, g, true); | |
17242 | return true; | |
17243 | } | |
17244 | /* Odd element flavors of vec_mul (signed). */ | |
17245 | case ALTIVEC_BUILTIN_VMULOSB: | |
17246 | case ALTIVEC_BUILTIN_VMULOSH: | |
17247 | /* Odd element flavors of vec_mul (unsigned). */ | |
17248 | case ALTIVEC_BUILTIN_VMULOUB: | |
17249 | case ALTIVEC_BUILTIN_VMULOUH: | |
17250 | { | |
17251 | arg0 = gimple_call_arg (stmt, 0); | |
17252 | arg1 = gimple_call_arg (stmt, 1); | |
17253 | lhs = gimple_call_lhs (stmt); | |
17254 | gimple *g = gimple_build_assign (lhs, VEC_WIDEN_MULT_ODD_EXPR, arg0, arg1); | |
17255 | gimple_set_location (g, gimple_location (stmt)); | |
17256 | gsi_replace (gsi, g, true); | |
17257 | return true; | |
17258 | } | |
17259 | /* Flavors of vec_div (Integer). */ | |
17260 | case VSX_BUILTIN_DIV_V2DI: | |
17261 | case VSX_BUILTIN_UDIV_V2DI: | |
17262 | { | |
17263 | arg0 = gimple_call_arg (stmt, 0); | |
17264 | arg1 = gimple_call_arg (stmt, 1); | |
17265 | lhs = gimple_call_lhs (stmt); | |
17266 | gimple *g = gimple_build_assign (lhs, TRUNC_DIV_EXPR, arg0, arg1); | |
17267 | gimple_set_location (g, gimple_location (stmt)); | |
17268 | gsi_replace (gsi, g, true); | |
17269 | return true; | |
17270 | } | |
17271 | /* Flavors of vec_div (Float). */ | |
17272 | case VSX_BUILTIN_XVDIVSP: | |
17273 | case VSX_BUILTIN_XVDIVDP: | |
17274 | { | |
17275 | arg0 = gimple_call_arg (stmt, 0); | |
17276 | arg1 = gimple_call_arg (stmt, 1); | |
17277 | lhs = gimple_call_lhs (stmt); | |
17278 | gimple *g = gimple_build_assign (lhs, RDIV_EXPR, arg0, arg1); | |
17279 | gimple_set_location (g, gimple_location (stmt)); | |
17280 | gsi_replace (gsi, g, true); | |
17281 | return true; | |
17282 | } | |
17283 | /* Flavors of vec_and. */ | |
17284 | case ALTIVEC_BUILTIN_VAND: | |
17285 | { | |
17286 | arg0 = gimple_call_arg (stmt, 0); | |
17287 | arg1 = gimple_call_arg (stmt, 1); | |
17288 | lhs = gimple_call_lhs (stmt); | |
17289 | gimple *g = gimple_build_assign (lhs, BIT_AND_EXPR, arg0, arg1); | |
17290 | gimple_set_location (g, gimple_location (stmt)); | |
17291 | gsi_replace (gsi, g, true); | |
17292 | return true; | |
17293 | } | |
17294 | /* Flavors of vec_andc. */ | |
17295 | case ALTIVEC_BUILTIN_VANDC: | |
17296 | { | |
17297 | arg0 = gimple_call_arg (stmt, 0); | |
17298 | arg1 = gimple_call_arg (stmt, 1); | |
17299 | lhs = gimple_call_lhs (stmt); | |
17300 | tree temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1)); | |
17301 | gimple *g = gimple_build_assign(temp, BIT_NOT_EXPR, arg1); | |
17302 | gimple_set_location (g, gimple_location (stmt)); | |
17303 | gsi_insert_before(gsi, g, GSI_SAME_STMT); | |
17304 | g = gimple_build_assign (lhs, BIT_AND_EXPR, arg0, temp); | |
17305 | gimple_set_location (g, gimple_location (stmt)); | |
17306 | gsi_replace (gsi, g, true); | |
17307 | return true; | |
17308 | } | |
17309 | /* Flavors of vec_nand. */ | |
17310 | case P8V_BUILTIN_VEC_NAND: | |
17311 | case P8V_BUILTIN_NAND_V16QI: | |
17312 | case P8V_BUILTIN_NAND_V8HI: | |
17313 | case P8V_BUILTIN_NAND_V4SI: | |
17314 | case P8V_BUILTIN_NAND_V4SF: | |
17315 | case P8V_BUILTIN_NAND_V2DF: | |
17316 | case P8V_BUILTIN_NAND_V2DI: | |
17317 | { | |
17318 | arg0 = gimple_call_arg (stmt, 0); | |
17319 | arg1 = gimple_call_arg (stmt, 1); | |
17320 | lhs = gimple_call_lhs (stmt); | |
17321 | tree temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1)); | |
17322 | gimple *g = gimple_build_assign(temp, BIT_AND_EXPR, arg0, arg1); | |
17323 | gimple_set_location (g, gimple_location (stmt)); | |
17324 | gsi_insert_before(gsi, g, GSI_SAME_STMT); | |
17325 | g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp); | |
17326 | gimple_set_location (g, gimple_location (stmt)); | |
17327 | gsi_replace (gsi, g, true); | |
17328 | return true; | |
17329 | } | |
17330 | /* Flavors of vec_or. */ | |
17331 | case ALTIVEC_BUILTIN_VOR: | |
17332 | { | |
17333 | arg0 = gimple_call_arg (stmt, 0); | |
17334 | arg1 = gimple_call_arg (stmt, 1); | |
17335 | lhs = gimple_call_lhs (stmt); | |
17336 | gimple *g = gimple_build_assign (lhs, BIT_IOR_EXPR, arg0, arg1); | |
17337 | gimple_set_location (g, gimple_location (stmt)); | |
17338 | gsi_replace (gsi, g, true); | |
17339 | return true; | |
17340 | } | |
17341 | /* flavors of vec_orc. */ | |
17342 | case P8V_BUILTIN_ORC_V16QI: | |
17343 | case P8V_BUILTIN_ORC_V8HI: | |
17344 | case P8V_BUILTIN_ORC_V4SI: | |
17345 | case P8V_BUILTIN_ORC_V4SF: | |
17346 | case P8V_BUILTIN_ORC_V2DF: | |
17347 | case P8V_BUILTIN_ORC_V2DI: | |
17348 | { | |
17349 | arg0 = gimple_call_arg (stmt, 0); | |
17350 | arg1 = gimple_call_arg (stmt, 1); | |
17351 | lhs = gimple_call_lhs (stmt); | |
17352 | tree temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1)); | |
17353 | gimple *g = gimple_build_assign(temp, BIT_NOT_EXPR, arg1); | |
17354 | gimple_set_location (g, gimple_location (stmt)); | |
17355 | gsi_insert_before(gsi, g, GSI_SAME_STMT); | |
17356 | g = gimple_build_assign (lhs, BIT_IOR_EXPR, arg0, temp); | |
17357 | gimple_set_location (g, gimple_location (stmt)); | |
17358 | gsi_replace (gsi, g, true); | |
17359 | return true; | |
17360 | } | |
17361 | /* Flavors of vec_xor. */ | |
17362 | case ALTIVEC_BUILTIN_VXOR: | |
17363 | { | |
17364 | arg0 = gimple_call_arg (stmt, 0); | |
17365 | arg1 = gimple_call_arg (stmt, 1); | |
17366 | lhs = gimple_call_lhs (stmt); | |
17367 | gimple *g = gimple_build_assign (lhs, BIT_XOR_EXPR, arg0, arg1); | |
17368 | gimple_set_location (g, gimple_location (stmt)); | |
17369 | gsi_replace (gsi, g, true); | |
17370 | return true; | |
17371 | } | |
17372 | /* Flavors of vec_nor. */ | |
17373 | case ALTIVEC_BUILTIN_VNOR: | |
17374 | { | |
17375 | arg0 = gimple_call_arg (stmt, 0); | |
17376 | arg1 = gimple_call_arg (stmt, 1); | |
17377 | lhs = gimple_call_lhs (stmt); | |
17378 | tree temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1)); | |
17379 | gimple *g = gimple_build_assign (temp, BIT_IOR_EXPR, arg0, arg1); | |
17380 | gimple_set_location (g, gimple_location (stmt)); | |
17381 | gsi_insert_before(gsi, g, GSI_SAME_STMT); | |
17382 | g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp); | |
17383 | gimple_set_location (g, gimple_location (stmt)); | |
17384 | gsi_replace (gsi, g, true); | |
17385 | return true; | |
17386 | } | |
17387 | default: | |
17388 | break; | |
17389 | } | |
17390 | ||
17391 | return false; | |
17392 | } | |
17393 | ||
17394 | /* Expand an expression EXP that calls a built-in function, | |
17395 | with result going to TARGET if that's convenient | |
17396 | (and in mode MODE if that's convenient). | |
17397 | SUBTARGET may be used as the target for computing one of EXP's operands. | |
17398 | IGNORE is nonzero if the value is to be ignored. */ | |
17399 | ||
17400 | static rtx | |
17401 | rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, | |
17402 | machine_mode mode ATTRIBUTE_UNUSED, | |
17403 | int ignore ATTRIBUTE_UNUSED) | |
17404 | { | |
17405 | tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); | |
17406 | enum rs6000_builtins fcode | |
17407 | = (enum rs6000_builtins)DECL_FUNCTION_CODE (fndecl); | |
17408 | size_t uns_fcode = (size_t)fcode; | |
17409 | const struct builtin_description *d; | |
17410 | size_t i; | |
17411 | rtx ret; | |
17412 | bool success; | |
17413 | HOST_WIDE_INT mask = rs6000_builtin_info[uns_fcode].mask; | |
17414 | bool func_valid_p = ((rs6000_builtin_mask & mask) == mask); | |
17415 | ||
17416 | if (TARGET_DEBUG_BUILTIN) | |
17417 | { | |
17418 | enum insn_code icode = rs6000_builtin_info[uns_fcode].icode; | |
17419 | const char *name1 = rs6000_builtin_info[uns_fcode].name; | |
17420 | const char *name2 = ((icode != CODE_FOR_nothing) | |
17421 | ? get_insn_name ((int)icode) | |
17422 | : "nothing"); | |
17423 | const char *name3; | |
17424 | ||
17425 | switch (rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK) | |
17426 | { | |
17427 | default: name3 = "unknown"; break; | |
17428 | case RS6000_BTC_SPECIAL: name3 = "special"; break; | |
17429 | case RS6000_BTC_UNARY: name3 = "unary"; break; | |
17430 | case RS6000_BTC_BINARY: name3 = "binary"; break; | |
17431 | case RS6000_BTC_TERNARY: name3 = "ternary"; break; | |
17432 | case RS6000_BTC_PREDICATE: name3 = "predicate"; break; | |
17433 | case RS6000_BTC_ABS: name3 = "abs"; break; | |
17434 | case RS6000_BTC_EVSEL: name3 = "evsel"; break; | |
17435 | case RS6000_BTC_DST: name3 = "dst"; break; | |
17436 | } | |
17437 | ||
17438 | ||
17439 | fprintf (stderr, | |
17440 | "rs6000_expand_builtin, %s (%d), insn = %s (%d), type=%s%s\n", | |
17441 | (name1) ? name1 : "---", fcode, | |
17442 | (name2) ? name2 : "---", (int)icode, | |
17443 | name3, | |
17444 | func_valid_p ? "" : ", not valid"); | |
17445 | } | |
17446 | ||
17447 | if (!func_valid_p) | |
17448 | { | |
17449 | rs6000_invalid_builtin (fcode); | |
17450 | ||
17451 | /* Given it is invalid, just generate a normal call. */ | |
17452 | return expand_call (exp, target, ignore); | |
17453 | } | |
17454 | ||
17455 | switch (fcode) | |
17456 | { | |
17457 | case RS6000_BUILTIN_RECIP: | |
17458 | return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target); | |
17459 | ||
17460 | case RS6000_BUILTIN_RECIPF: | |
17461 | return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target); | |
17462 | ||
17463 | case RS6000_BUILTIN_RSQRTF: | |
17464 | return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target); | |
17465 | ||
17466 | case RS6000_BUILTIN_RSQRT: | |
17467 | return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target); | |
17468 | ||
17469 | case POWER7_BUILTIN_BPERMD: | |
17470 | return rs6000_expand_binop_builtin (((TARGET_64BIT) | |
17471 | ? CODE_FOR_bpermd_di | |
17472 | : CODE_FOR_bpermd_si), exp, target); | |
17473 | ||
17474 | case RS6000_BUILTIN_GET_TB: | |
17475 | return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase, | |
17476 | target); | |
17477 | ||
17478 | case RS6000_BUILTIN_MFTB: | |
17479 | return rs6000_expand_zeroop_builtin (((TARGET_64BIT) | |
17480 | ? CODE_FOR_rs6000_mftb_di | |
17481 | : CODE_FOR_rs6000_mftb_si), | |
17482 | target); | |
17483 | ||
17484 | case RS6000_BUILTIN_MFFS: | |
17485 | return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target); | |
17486 | ||
17487 | case RS6000_BUILTIN_MTFSF: | |
17488 | return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp); | |
17489 | ||
17490 | case RS6000_BUILTIN_CPU_INIT: | |
17491 | case RS6000_BUILTIN_CPU_IS: | |
17492 | case RS6000_BUILTIN_CPU_SUPPORTS: | |
17493 | return cpu_expand_builtin (fcode, exp, target); | |
17494 | ||
17495 | case ALTIVEC_BUILTIN_MASK_FOR_LOAD: | |
17496 | case ALTIVEC_BUILTIN_MASK_FOR_STORE: | |
17497 | { | |
17498 | int icode = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct | |
17499 | : (int) CODE_FOR_altivec_lvsl_direct); | |
17500 | machine_mode tmode = insn_data[icode].operand[0].mode; | |
17501 | machine_mode mode = insn_data[icode].operand[1].mode; | |
17502 | tree arg; | |
17503 | rtx op, addr, pat; | |
17504 | ||
17505 | gcc_assert (TARGET_ALTIVEC); | |
17506 | ||
17507 | arg = CALL_EXPR_ARG (exp, 0); | |
17508 | gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg))); | |
17509 | op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL); | |
17510 | addr = memory_address (mode, op); | |
17511 | if (fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE) | |
17512 | op = addr; | |
17513 | else | |
17514 | { | |
17515 | /* For the load case need to negate the address. */ | |
17516 | op = gen_reg_rtx (GET_MODE (addr)); | |
17517 | emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr))); | |
17518 | } | |
17519 | op = gen_rtx_MEM (mode, op); | |
17520 | ||
17521 | if (target == 0 | |
17522 | || GET_MODE (target) != tmode | |
17523 | || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) | |
17524 | target = gen_reg_rtx (tmode); | |
17525 | ||
17526 | pat = GEN_FCN (icode) (target, op); | |
17527 | if (!pat) | |
17528 | return 0; | |
17529 | emit_insn (pat); | |
17530 | ||
17531 | return target; | |
17532 | } | |
17533 | ||
17534 | case ALTIVEC_BUILTIN_VCFUX: | |
17535 | case ALTIVEC_BUILTIN_VCFSX: | |
17536 | case ALTIVEC_BUILTIN_VCTUXS: | |
17537 | case ALTIVEC_BUILTIN_VCTSXS: | |
17538 | /* FIXME: There's got to be a nicer way to handle this case than | |
17539 | constructing a new CALL_EXPR. */ | |
17540 | if (call_expr_nargs (exp) == 1) | |
17541 | { | |
17542 | exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp), | |
17543 | 2, CALL_EXPR_ARG (exp, 0), integer_zero_node); | |
17544 | } | |
17545 | break; | |
17546 | ||
17547 | default: | |
17548 | break; | |
17549 | } | |
17550 | ||
17551 | if (TARGET_ALTIVEC) | |
17552 | { | |
17553 | ret = altivec_expand_builtin (exp, target, &success); | |
17554 | ||
17555 | if (success) | |
17556 | return ret; | |
17557 | } | |
17558 | if (TARGET_SPE) | |
17559 | { | |
17560 | ret = spe_expand_builtin (exp, target, &success); | |
17561 | ||
17562 | if (success) | |
17563 | return ret; | |
17564 | } | |
17565 | if (TARGET_PAIRED_FLOAT) | |
17566 | { | |
17567 | ret = paired_expand_builtin (exp, target, &success); | |
17568 | ||
17569 | if (success) | |
17570 | return ret; | |
17571 | } | |
17572 | if (TARGET_HTM) | |
17573 | { | |
17574 | ret = htm_expand_builtin (exp, target, &success); | |
17575 | ||
17576 | if (success) | |
17577 | return ret; | |
17578 | } | |
17579 | ||
17580 | unsigned attr = rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK; | |
17581 | /* RS6000_BTC_SPECIAL represents no-operand operators. */ | |
17582 | gcc_assert (attr == RS6000_BTC_UNARY | |
17583 | || attr == RS6000_BTC_BINARY | |
17584 | || attr == RS6000_BTC_TERNARY | |
17585 | || attr == RS6000_BTC_SPECIAL); | |
17586 | ||
17587 | /* Handle simple unary operations. */ | |
17588 | d = bdesc_1arg; | |
17589 | for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++) | |
17590 | if (d->code == fcode) | |
17591 | return rs6000_expand_unop_builtin (d->icode, exp, target); | |
17592 | ||
17593 | /* Handle simple binary operations. */ | |
17594 | d = bdesc_2arg; | |
17595 | for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++) | |
17596 | if (d->code == fcode) | |
17597 | return rs6000_expand_binop_builtin (d->icode, exp, target); | |
17598 | ||
17599 | /* Handle simple ternary operations. */ | |
17600 | d = bdesc_3arg; | |
17601 | for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++) | |
17602 | if (d->code == fcode) | |
17603 | return rs6000_expand_ternop_builtin (d->icode, exp, target); | |
17604 | ||
17605 | /* Handle simple no-argument operations. */ | |
17606 | d = bdesc_0arg; | |
17607 | for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++) | |
17608 | if (d->code == fcode) | |
17609 | return rs6000_expand_zeroop_builtin (d->icode, target); | |
17610 | ||
17611 | gcc_unreachable (); | |
17612 | } | |
17613 | ||
17614 | /* Create a builtin vector type with a name. Taking care not to give | |
17615 | the canonical type a name. */ | |
17616 | ||
17617 | static tree | |
17618 | rs6000_vector_type (const char *name, tree elt_type, unsigned num_elts) | |
17619 | { | |
17620 | tree result = build_vector_type (elt_type, num_elts); | |
17621 | ||
17622 | /* Copy so we don't give the canonical type a name. */ | |
17623 | result = build_variant_type_copy (result); | |
17624 | ||
17625 | add_builtin_type (name, result); | |
17626 | ||
17627 | return result; | |
17628 | } | |
17629 | ||
17630 | static void | |
17631 | rs6000_init_builtins (void) | |
17632 | { | |
17633 | tree tdecl; | |
17634 | tree ftype; | |
17635 | machine_mode mode; | |
17636 | ||
17637 | if (TARGET_DEBUG_BUILTIN) | |
17638 | fprintf (stderr, "rs6000_init_builtins%s%s%s%s\n", | |
17639 | (TARGET_PAIRED_FLOAT) ? ", paired" : "", | |
17640 | (TARGET_SPE) ? ", spe" : "", | |
17641 | (TARGET_ALTIVEC) ? ", altivec" : "", | |
17642 | (TARGET_VSX) ? ", vsx" : ""); | |
17643 | ||
17644 | V2SI_type_node = build_vector_type (intSI_type_node, 2); | |
17645 | V2SF_type_node = build_vector_type (float_type_node, 2); | |
17646 | V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64 ? "__vector long" | |
17647 | : "__vector long long", | |
17648 | intDI_type_node, 2); | |
17649 | V2DF_type_node = rs6000_vector_type ("__vector double", double_type_node, 2); | |
17650 | V4HI_type_node = build_vector_type (intHI_type_node, 4); | |
17651 | V4SI_type_node = rs6000_vector_type ("__vector signed int", | |
17652 | intSI_type_node, 4); | |
17653 | V4SF_type_node = rs6000_vector_type ("__vector float", float_type_node, 4); | |
17654 | V8HI_type_node = rs6000_vector_type ("__vector signed short", | |
17655 | intHI_type_node, 8); | |
17656 | V16QI_type_node = rs6000_vector_type ("__vector signed char", | |
17657 | intQI_type_node, 16); | |
17658 | ||
17659 | unsigned_V16QI_type_node = rs6000_vector_type ("__vector unsigned char", | |
17660 | unsigned_intQI_type_node, 16); | |
17661 | unsigned_V8HI_type_node = rs6000_vector_type ("__vector unsigned short", | |
17662 | unsigned_intHI_type_node, 8); | |
17663 | unsigned_V4SI_type_node = rs6000_vector_type ("__vector unsigned int", | |
17664 | unsigned_intSI_type_node, 4); | |
17665 | unsigned_V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64 | |
17666 | ? "__vector unsigned long" | |
17667 | : "__vector unsigned long long", | |
17668 | unsigned_intDI_type_node, 2); | |
17669 | ||
17670 | opaque_V2SF_type_node = build_opaque_vector_type (float_type_node, 2); | |
17671 | opaque_V2SI_type_node = build_opaque_vector_type (intSI_type_node, 2); | |
17672 | opaque_p_V2SI_type_node = build_pointer_type (opaque_V2SI_type_node); | |
17673 | opaque_V4SI_type_node = build_opaque_vector_type (intSI_type_node, 4); | |
17674 | ||
17675 | const_str_type_node | |
17676 | = build_pointer_type (build_qualified_type (char_type_node, | |
17677 | TYPE_QUAL_CONST)); | |
17678 | ||
17679 | /* We use V1TI mode as a special container to hold __int128_t items that | |
17680 | must live in VSX registers. */ | |
17681 | if (intTI_type_node) | |
17682 | { | |
17683 | V1TI_type_node = rs6000_vector_type ("__vector __int128", | |
17684 | intTI_type_node, 1); | |
17685 | unsigned_V1TI_type_node | |
17686 | = rs6000_vector_type ("__vector unsigned __int128", | |
17687 | unsigned_intTI_type_node, 1); | |
17688 | } | |
17689 | ||
17690 | /* The 'vector bool ...' types must be kept distinct from 'vector unsigned ...' | |
17691 | types, especially in C++ land. Similarly, 'vector pixel' is distinct from | |
17692 | 'vector unsigned short'. */ | |
17693 | ||
17694 | bool_char_type_node = build_distinct_type_copy (unsigned_intQI_type_node); | |
17695 | bool_short_type_node = build_distinct_type_copy (unsigned_intHI_type_node); | |
17696 | bool_int_type_node = build_distinct_type_copy (unsigned_intSI_type_node); | |
17697 | bool_long_type_node = build_distinct_type_copy (unsigned_intDI_type_node); | |
17698 | pixel_type_node = build_distinct_type_copy (unsigned_intHI_type_node); | |
17699 | ||
17700 | long_integer_type_internal_node = long_integer_type_node; | |
17701 | long_unsigned_type_internal_node = long_unsigned_type_node; | |
17702 | long_long_integer_type_internal_node = long_long_integer_type_node; | |
17703 | long_long_unsigned_type_internal_node = long_long_unsigned_type_node; | |
17704 | intQI_type_internal_node = intQI_type_node; | |
17705 | uintQI_type_internal_node = unsigned_intQI_type_node; | |
17706 | intHI_type_internal_node = intHI_type_node; | |
17707 | uintHI_type_internal_node = unsigned_intHI_type_node; | |
17708 | intSI_type_internal_node = intSI_type_node; | |
17709 | uintSI_type_internal_node = unsigned_intSI_type_node; | |
17710 | intDI_type_internal_node = intDI_type_node; | |
17711 | uintDI_type_internal_node = unsigned_intDI_type_node; | |
17712 | intTI_type_internal_node = intTI_type_node; | |
17713 | uintTI_type_internal_node = unsigned_intTI_type_node; | |
17714 | float_type_internal_node = float_type_node; | |
17715 | double_type_internal_node = double_type_node; | |
17716 | long_double_type_internal_node = long_double_type_node; | |
17717 | dfloat64_type_internal_node = dfloat64_type_node; | |
17718 | dfloat128_type_internal_node = dfloat128_type_node; | |
17719 | void_type_internal_node = void_type_node; | |
17720 | ||
17721 | /* 128-bit floating point support. KFmode is IEEE 128-bit floating point. | |
17722 | IFmode is the IBM extended 128-bit format that is a pair of doubles. | |
17723 | TFmode will be either IEEE 128-bit floating point or the IBM double-double | |
17724 | format that uses a pair of doubles, depending on the switches and | |
17725 | defaults. | |
17726 | ||
17727 | We do not enable the actual __float128 keyword unless the user explicitly | |
17728 | asks for it, because the library support is not yet complete. | |
17729 | ||
17730 | If we don't support for either 128-bit IBM double double or IEEE 128-bit | |
17731 | floating point, we need make sure the type is non-zero or else self-test | |
17732 | fails during bootstrap. | |
17733 | ||
17734 | We don't register a built-in type for __ibm128 if the type is the same as | |
17735 | long double. Instead we add a #define for __ibm128 in | |
17736 | rs6000_cpu_cpp_builtins to long double. */ | |
17737 | if (TARGET_LONG_DOUBLE_128 && FLOAT128_IEEE_P (TFmode)) | |
17738 | { | |
17739 | ibm128_float_type_node = make_node (REAL_TYPE); | |
17740 | TYPE_PRECISION (ibm128_float_type_node) = 128; | |
17741 | SET_TYPE_MODE (ibm128_float_type_node, IFmode); | |
17742 | layout_type (ibm128_float_type_node); | |
17743 | ||
17744 | lang_hooks.types.register_builtin_type (ibm128_float_type_node, | |
17745 | "__ibm128"); | |
17746 | } | |
17747 | else | |
17748 | ibm128_float_type_node = long_double_type_node; | |
17749 | ||
17750 | if (TARGET_FLOAT128_KEYWORD) | |
17751 | { | |
17752 | ieee128_float_type_node = float128_type_node; | |
17753 | lang_hooks.types.register_builtin_type (ieee128_float_type_node, | |
17754 | "__float128"); | |
17755 | } | |
17756 | ||
17757 | else if (TARGET_FLOAT128_TYPE) | |
17758 | { | |
17759 | ieee128_float_type_node = make_node (REAL_TYPE); | |
17760 | TYPE_PRECISION (ibm128_float_type_node) = 128; | |
17761 | SET_TYPE_MODE (ieee128_float_type_node, KFmode); | |
17762 | layout_type (ieee128_float_type_node); | |
17763 | ||
17764 | /* If we are not exporting the __float128/_Float128 keywords, we need a | |
17765 | keyword to get the types created. Use __ieee128 as the dummy | |
17766 | keyword. */ | |
17767 | lang_hooks.types.register_builtin_type (ieee128_float_type_node, | |
17768 | "__ieee128"); | |
17769 | } | |
17770 | ||
17771 | else | |
17772 | ieee128_float_type_node = long_double_type_node; | |
17773 | ||
17774 | /* Initialize the modes for builtin_function_type, mapping a machine mode to | |
17775 | tree type node. */ | |
17776 | builtin_mode_to_type[QImode][0] = integer_type_node; | |
17777 | builtin_mode_to_type[HImode][0] = integer_type_node; | |
17778 | builtin_mode_to_type[SImode][0] = intSI_type_node; | |
17779 | builtin_mode_to_type[SImode][1] = unsigned_intSI_type_node; | |
17780 | builtin_mode_to_type[DImode][0] = intDI_type_node; | |
17781 | builtin_mode_to_type[DImode][1] = unsigned_intDI_type_node; | |
17782 | builtin_mode_to_type[TImode][0] = intTI_type_node; | |
17783 | builtin_mode_to_type[TImode][1] = unsigned_intTI_type_node; | |
17784 | builtin_mode_to_type[SFmode][0] = float_type_node; | |
17785 | builtin_mode_to_type[DFmode][0] = double_type_node; | |
17786 | builtin_mode_to_type[IFmode][0] = ibm128_float_type_node; | |
17787 | builtin_mode_to_type[KFmode][0] = ieee128_float_type_node; | |
17788 | builtin_mode_to_type[TFmode][0] = long_double_type_node; | |
17789 | builtin_mode_to_type[DDmode][0] = dfloat64_type_node; | |
17790 | builtin_mode_to_type[TDmode][0] = dfloat128_type_node; | |
17791 | builtin_mode_to_type[V1TImode][0] = V1TI_type_node; | |
17792 | builtin_mode_to_type[V1TImode][1] = unsigned_V1TI_type_node; | |
17793 | builtin_mode_to_type[V2SImode][0] = V2SI_type_node; | |
17794 | builtin_mode_to_type[V2SFmode][0] = V2SF_type_node; | |
17795 | builtin_mode_to_type[V2DImode][0] = V2DI_type_node; | |
17796 | builtin_mode_to_type[V2DImode][1] = unsigned_V2DI_type_node; | |
17797 | builtin_mode_to_type[V2DFmode][0] = V2DF_type_node; | |
17798 | builtin_mode_to_type[V4HImode][0] = V4HI_type_node; | |
17799 | builtin_mode_to_type[V4SImode][0] = V4SI_type_node; | |
17800 | builtin_mode_to_type[V4SImode][1] = unsigned_V4SI_type_node; | |
17801 | builtin_mode_to_type[V4SFmode][0] = V4SF_type_node; | |
17802 | builtin_mode_to_type[V8HImode][0] = V8HI_type_node; | |
17803 | builtin_mode_to_type[V8HImode][1] = unsigned_V8HI_type_node; | |
17804 | builtin_mode_to_type[V16QImode][0] = V16QI_type_node; | |
17805 | builtin_mode_to_type[V16QImode][1] = unsigned_V16QI_type_node; | |
17806 | ||
17807 | tdecl = add_builtin_type ("__bool char", bool_char_type_node); | |
17808 | TYPE_NAME (bool_char_type_node) = tdecl; | |
17809 | ||
17810 | tdecl = add_builtin_type ("__bool short", bool_short_type_node); | |
17811 | TYPE_NAME (bool_short_type_node) = tdecl; | |
17812 | ||
17813 | tdecl = add_builtin_type ("__bool int", bool_int_type_node); | |
17814 | TYPE_NAME (bool_int_type_node) = tdecl; | |
17815 | ||
17816 | tdecl = add_builtin_type ("__pixel", pixel_type_node); | |
17817 | TYPE_NAME (pixel_type_node) = tdecl; | |
17818 | ||
17819 | bool_V16QI_type_node = rs6000_vector_type ("__vector __bool char", | |
17820 | bool_char_type_node, 16); | |
17821 | bool_V8HI_type_node = rs6000_vector_type ("__vector __bool short", | |
17822 | bool_short_type_node, 8); | |
17823 | bool_V4SI_type_node = rs6000_vector_type ("__vector __bool int", | |
17824 | bool_int_type_node, 4); | |
17825 | bool_V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64 | |
17826 | ? "__vector __bool long" | |
17827 | : "__vector __bool long long", | |
17828 | bool_long_type_node, 2); | |
17829 | pixel_V8HI_type_node = rs6000_vector_type ("__vector __pixel", | |
17830 | pixel_type_node, 8); | |
17831 | ||
17832 | /* Paired and SPE builtins are only available if you build a compiler with | |
17833 | the appropriate options, so only create those builtins with the | |
17834 | appropriate compiler option. Create Altivec and VSX builtins on machines | |
17835 | with at least the general purpose extensions (970 and newer) to allow the | |
17836 | use of the target attribute. */ | |
17837 | if (TARGET_PAIRED_FLOAT) | |
17838 | paired_init_builtins (); | |
17839 | if (TARGET_SPE) | |
17840 | spe_init_builtins (); | |
17841 | if (TARGET_EXTRA_BUILTINS) | |
17842 | altivec_init_builtins (); | |
17843 | if (TARGET_HTM) | |
17844 | htm_init_builtins (); | |
17845 | ||
17846 | if (TARGET_EXTRA_BUILTINS || TARGET_SPE || TARGET_PAIRED_FLOAT) | |
17847 | rs6000_common_init_builtins (); | |
17848 | ||
17849 | ftype = build_function_type_list (ieee128_float_type_node, | |
17850 | const_str_type_node, NULL_TREE); | |
17851 | def_builtin ("__builtin_nanq", ftype, RS6000_BUILTIN_NANQ); | |
17852 | def_builtin ("__builtin_nansq", ftype, RS6000_BUILTIN_NANSQ); | |
17853 | ||
17854 | ftype = build_function_type_list (ieee128_float_type_node, NULL_TREE); | |
17855 | def_builtin ("__builtin_infq", ftype, RS6000_BUILTIN_INFQ); | |
17856 | def_builtin ("__builtin_huge_valq", ftype, RS6000_BUILTIN_HUGE_VALQ); | |
17857 | ||
17858 | ftype = builtin_function_type (DFmode, DFmode, DFmode, VOIDmode, | |
17859 | RS6000_BUILTIN_RECIP, "__builtin_recipdiv"); | |
17860 | def_builtin ("__builtin_recipdiv", ftype, RS6000_BUILTIN_RECIP); | |
17861 | ||
17862 | ftype = builtin_function_type (SFmode, SFmode, SFmode, VOIDmode, | |
17863 | RS6000_BUILTIN_RECIPF, "__builtin_recipdivf"); | |
17864 | def_builtin ("__builtin_recipdivf", ftype, RS6000_BUILTIN_RECIPF); | |
17865 | ||
17866 | ftype = builtin_function_type (DFmode, DFmode, VOIDmode, VOIDmode, | |
17867 | RS6000_BUILTIN_RSQRT, "__builtin_rsqrt"); | |
17868 | def_builtin ("__builtin_rsqrt", ftype, RS6000_BUILTIN_RSQRT); | |
17869 | ||
17870 | ftype = builtin_function_type (SFmode, SFmode, VOIDmode, VOIDmode, | |
17871 | RS6000_BUILTIN_RSQRTF, "__builtin_rsqrtf"); | |
17872 | def_builtin ("__builtin_rsqrtf", ftype, RS6000_BUILTIN_RSQRTF); | |
17873 | ||
17874 | mode = (TARGET_64BIT) ? DImode : SImode; | |
17875 | ftype = builtin_function_type (mode, mode, mode, VOIDmode, | |
17876 | POWER7_BUILTIN_BPERMD, "__builtin_bpermd"); | |
17877 | def_builtin ("__builtin_bpermd", ftype, POWER7_BUILTIN_BPERMD); | |
17878 | ||
17879 | ftype = build_function_type_list (unsigned_intDI_type_node, | |
17880 | NULL_TREE); | |
17881 | def_builtin ("__builtin_ppc_get_timebase", ftype, RS6000_BUILTIN_GET_TB); | |
17882 | ||
17883 | if (TARGET_64BIT) | |
17884 | ftype = build_function_type_list (unsigned_intDI_type_node, | |
17885 | NULL_TREE); | |
17886 | else | |
17887 | ftype = build_function_type_list (unsigned_intSI_type_node, | |
17888 | NULL_TREE); | |
17889 | def_builtin ("__builtin_ppc_mftb", ftype, RS6000_BUILTIN_MFTB); | |
17890 | ||
17891 | ftype = build_function_type_list (double_type_node, NULL_TREE); | |
17892 | def_builtin ("__builtin_mffs", ftype, RS6000_BUILTIN_MFFS); | |
17893 | ||
17894 | ftype = build_function_type_list (void_type_node, | |
17895 | intSI_type_node, double_type_node, | |
17896 | NULL_TREE); | |
17897 | def_builtin ("__builtin_mtfsf", ftype, RS6000_BUILTIN_MTFSF); | |
17898 | ||
17899 | ftype = build_function_type_list (void_type_node, NULL_TREE); | |
17900 | def_builtin ("__builtin_cpu_init", ftype, RS6000_BUILTIN_CPU_INIT); | |
17901 | ||
17902 | ftype = build_function_type_list (bool_int_type_node, const_ptr_type_node, | |
17903 | NULL_TREE); | |
17904 | def_builtin ("__builtin_cpu_is", ftype, RS6000_BUILTIN_CPU_IS); | |
17905 | def_builtin ("__builtin_cpu_supports", ftype, RS6000_BUILTIN_CPU_SUPPORTS); | |
17906 | ||
17907 | /* AIX libm provides clog as __clog. */ | |
17908 | if (TARGET_XCOFF && | |
17909 | (tdecl = builtin_decl_explicit (BUILT_IN_CLOG)) != NULL_TREE) | |
17910 | set_user_assembler_name (tdecl, "__clog"); | |
17911 | ||
17912 | #ifdef SUBTARGET_INIT_BUILTINS | |
17913 | SUBTARGET_INIT_BUILTINS; | |
17914 | #endif | |
17915 | } | |
17916 | ||
17917 | /* Returns the rs6000 builtin decl for CODE. */ | |
17918 | ||
17919 | static tree | |
17920 | rs6000_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED) | |
17921 | { | |
17922 | HOST_WIDE_INT fnmask; | |
17923 | ||
17924 | if (code >= RS6000_BUILTIN_COUNT) | |
17925 | return error_mark_node; | |
17926 | ||
17927 | fnmask = rs6000_builtin_info[code].mask; | |
17928 | if ((fnmask & rs6000_builtin_mask) != fnmask) | |
17929 | { | |
17930 | rs6000_invalid_builtin ((enum rs6000_builtins)code); | |
17931 | return error_mark_node; | |
17932 | } | |
17933 | ||
17934 | return rs6000_builtin_decls[code]; | |
17935 | } | |
17936 | ||
17937 | static void | |
17938 | spe_init_builtins (void) | |
17939 | { | |
17940 | tree puint_type_node = build_pointer_type (unsigned_type_node); | |
17941 | tree pushort_type_node = build_pointer_type (short_unsigned_type_node); | |
17942 | const struct builtin_description *d; | |
17943 | size_t i; | |
17944 | HOST_WIDE_INT builtin_mask = rs6000_builtin_mask; | |
17945 | ||
17946 | tree v2si_ftype_4_v2si | |
17947 | = build_function_type_list (opaque_V2SI_type_node, | |
17948 | opaque_V2SI_type_node, | |
17949 | opaque_V2SI_type_node, | |
17950 | opaque_V2SI_type_node, | |
17951 | opaque_V2SI_type_node, | |
17952 | NULL_TREE); | |
17953 | ||
17954 | tree v2sf_ftype_4_v2sf | |
17955 | = build_function_type_list (opaque_V2SF_type_node, | |
17956 | opaque_V2SF_type_node, | |
17957 | opaque_V2SF_type_node, | |
17958 | opaque_V2SF_type_node, | |
17959 | opaque_V2SF_type_node, | |
17960 | NULL_TREE); | |
17961 | ||
17962 | tree int_ftype_int_v2si_v2si | |
17963 | = build_function_type_list (integer_type_node, | |
17964 | integer_type_node, | |
17965 | opaque_V2SI_type_node, | |
17966 | opaque_V2SI_type_node, | |
17967 | NULL_TREE); | |
17968 | ||
17969 | tree int_ftype_int_v2sf_v2sf | |
17970 | = build_function_type_list (integer_type_node, | |
17971 | integer_type_node, | |
17972 | opaque_V2SF_type_node, | |
17973 | opaque_V2SF_type_node, | |
17974 | NULL_TREE); | |
17975 | ||
17976 | tree void_ftype_v2si_puint_int | |
17977 | = build_function_type_list (void_type_node, | |
17978 | opaque_V2SI_type_node, | |
17979 | puint_type_node, | |
17980 | integer_type_node, | |
17981 | NULL_TREE); | |
17982 | ||
17983 | tree void_ftype_v2si_puint_char | |
17984 | = build_function_type_list (void_type_node, | |
17985 | opaque_V2SI_type_node, | |
17986 | puint_type_node, | |
17987 | char_type_node, | |
17988 | NULL_TREE); | |
17989 | ||
17990 | tree void_ftype_v2si_pv2si_int | |
17991 | = build_function_type_list (void_type_node, | |
17992 | opaque_V2SI_type_node, | |
17993 | opaque_p_V2SI_type_node, | |
17994 | integer_type_node, | |
17995 | NULL_TREE); | |
17996 | ||
17997 | tree void_ftype_v2si_pv2si_char | |
17998 | = build_function_type_list (void_type_node, | |
17999 | opaque_V2SI_type_node, | |
18000 | opaque_p_V2SI_type_node, | |
18001 | char_type_node, | |
18002 | NULL_TREE); | |
18003 | ||
18004 | tree void_ftype_int | |
18005 | = build_function_type_list (void_type_node, integer_type_node, NULL_TREE); | |
18006 | ||
18007 | tree int_ftype_void | |
18008 | = build_function_type_list (integer_type_node, NULL_TREE); | |
18009 | ||
18010 | tree v2si_ftype_pv2si_int | |
18011 | = build_function_type_list (opaque_V2SI_type_node, | |
18012 | opaque_p_V2SI_type_node, | |
18013 | integer_type_node, | |
18014 | NULL_TREE); | |
18015 | ||
18016 | tree v2si_ftype_puint_int | |
18017 | = build_function_type_list (opaque_V2SI_type_node, | |
18018 | puint_type_node, | |
18019 | integer_type_node, | |
18020 | NULL_TREE); | |
18021 | ||
18022 | tree v2si_ftype_pushort_int | |
18023 | = build_function_type_list (opaque_V2SI_type_node, | |
18024 | pushort_type_node, | |
18025 | integer_type_node, | |
18026 | NULL_TREE); | |
18027 | ||
18028 | tree v2si_ftype_signed_char | |
18029 | = build_function_type_list (opaque_V2SI_type_node, | |
18030 | signed_char_type_node, | |
18031 | NULL_TREE); | |
18032 | ||
18033 | add_builtin_type ("__ev64_opaque__", opaque_V2SI_type_node); | |
18034 | ||
18035 | /* Initialize irregular SPE builtins. */ | |
18036 | ||
18037 | def_builtin ("__builtin_spe_mtspefscr", void_ftype_int, SPE_BUILTIN_MTSPEFSCR); | |
18038 | def_builtin ("__builtin_spe_mfspefscr", int_ftype_void, SPE_BUILTIN_MFSPEFSCR); | |
18039 | def_builtin ("__builtin_spe_evstddx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDDX); | |
18040 | def_builtin ("__builtin_spe_evstdhx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDHX); | |
18041 | def_builtin ("__builtin_spe_evstdwx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDWX); | |
18042 | def_builtin ("__builtin_spe_evstwhex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHEX); | |
18043 | def_builtin ("__builtin_spe_evstwhox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHOX); | |
18044 | def_builtin ("__builtin_spe_evstwwex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWEX); | |
18045 | def_builtin ("__builtin_spe_evstwwox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWOX); | |
18046 | def_builtin ("__builtin_spe_evstdd", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDD); | |
18047 | def_builtin ("__builtin_spe_evstdh", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDH); | |
18048 | def_builtin ("__builtin_spe_evstdw", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDW); | |
18049 | def_builtin ("__builtin_spe_evstwhe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHE); | |
18050 | def_builtin ("__builtin_spe_evstwho", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHO); | |
18051 | def_builtin ("__builtin_spe_evstwwe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWE); | |
18052 | def_builtin ("__builtin_spe_evstwwo", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWO); | |
18053 | def_builtin ("__builtin_spe_evsplatfi", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATFI); | |
18054 | def_builtin ("__builtin_spe_evsplati", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATI); | |
18055 | ||
18056 | /* Loads. */ | |
18057 | def_builtin ("__builtin_spe_evlddx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDDX); | |
18058 | def_builtin ("__builtin_spe_evldwx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDWX); | |
18059 | def_builtin ("__builtin_spe_evldhx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDHX); | |
18060 | def_builtin ("__builtin_spe_evlwhex", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHEX); | |
18061 | def_builtin ("__builtin_spe_evlwhoux", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOUX); | |
18062 | def_builtin ("__builtin_spe_evlwhosx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOSX); | |
18063 | def_builtin ("__builtin_spe_evlwwsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLATX); | |
18064 | def_builtin ("__builtin_spe_evlwhsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLATX); | |
18065 | def_builtin ("__builtin_spe_evlhhesplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLATX); | |
18066 | def_builtin ("__builtin_spe_evlhhousplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLATX); | |
18067 | def_builtin ("__builtin_spe_evlhhossplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLATX); | |
18068 | def_builtin ("__builtin_spe_evldd", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDD); | |
18069 | def_builtin ("__builtin_spe_evldw", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDW); | |
18070 | def_builtin ("__builtin_spe_evldh", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDH); | |
18071 | def_builtin ("__builtin_spe_evlhhesplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLAT); | |
18072 | def_builtin ("__builtin_spe_evlhhossplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLAT); | |
18073 | def_builtin ("__builtin_spe_evlhhousplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLAT); | |
18074 | def_builtin ("__builtin_spe_evlwhe", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHE); | |
18075 | def_builtin ("__builtin_spe_evlwhos", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOS); | |
18076 | def_builtin ("__builtin_spe_evlwhou", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOU); | |
18077 | def_builtin ("__builtin_spe_evlwhsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLAT); | |
18078 | def_builtin ("__builtin_spe_evlwwsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLAT); | |
18079 | ||
18080 | /* Predicates. */ | |
18081 | d = bdesc_spe_predicates; | |
18082 | for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, d++) | |
18083 | { | |
18084 | tree type; | |
18085 | HOST_WIDE_INT mask = d->mask; | |
18086 | ||
18087 | if ((mask & builtin_mask) != mask) | |
18088 | { | |
18089 | if (TARGET_DEBUG_BUILTIN) | |
18090 | fprintf (stderr, "spe_init_builtins, skip predicate %s\n", | |
18091 | d->name); | |
18092 | continue; | |
18093 | } | |
18094 | ||
18095 | /* Cannot define builtin if the instruction is disabled. */ | |
18096 | gcc_assert (d->icode != CODE_FOR_nothing); | |
18097 | switch (insn_data[d->icode].operand[1].mode) | |
18098 | { | |
4e10a5a7 | 18099 | case E_V2SImode: |
83349046 SB |
18100 | type = int_ftype_int_v2si_v2si; |
18101 | break; | |
4e10a5a7 | 18102 | case E_V2SFmode: |
83349046 SB |
18103 | type = int_ftype_int_v2sf_v2sf; |
18104 | break; | |
18105 | default: | |
18106 | gcc_unreachable (); | |
18107 | } | |
18108 | ||
18109 | def_builtin (d->name, type, d->code); | |
18110 | } | |
18111 | ||
18112 | /* Evsel predicates. */ | |
18113 | d = bdesc_spe_evsel; | |
18114 | for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, d++) | |
18115 | { | |
18116 | tree type; | |
18117 | HOST_WIDE_INT mask = d->mask; | |
18118 | ||
18119 | if ((mask & builtin_mask) != mask) | |
18120 | { | |
18121 | if (TARGET_DEBUG_BUILTIN) | |
18122 | fprintf (stderr, "spe_init_builtins, skip evsel %s\n", | |
18123 | d->name); | |
18124 | continue; | |
18125 | } | |
18126 | ||
18127 | /* Cannot define builtin if the instruction is disabled. */ | |
18128 | gcc_assert (d->icode != CODE_FOR_nothing); | |
18129 | switch (insn_data[d->icode].operand[1].mode) | |
18130 | { | |
4e10a5a7 | 18131 | case E_V2SImode: |
83349046 SB |
18132 | type = v2si_ftype_4_v2si; |
18133 | break; | |
4e10a5a7 | 18134 | case E_V2SFmode: |
83349046 SB |
18135 | type = v2sf_ftype_4_v2sf; |
18136 | break; | |
18137 | default: | |
18138 | gcc_unreachable (); | |
18139 | } | |
18140 | ||
18141 | def_builtin (d->name, type, d->code); | |
18142 | } | |
18143 | } | |
18144 | ||
18145 | static void | |
18146 | paired_init_builtins (void) | |
18147 | { | |
18148 | const struct builtin_description *d; | |
18149 | size_t i; | |
18150 | HOST_WIDE_INT builtin_mask = rs6000_builtin_mask; | |
18151 | ||
18152 | tree int_ftype_int_v2sf_v2sf | |
18153 | = build_function_type_list (integer_type_node, | |
18154 | integer_type_node, | |
18155 | V2SF_type_node, | |
18156 | V2SF_type_node, | |
18157 | NULL_TREE); | |
18158 | tree pcfloat_type_node = | |
18159 | build_pointer_type (build_qualified_type | |
18160 | (float_type_node, TYPE_QUAL_CONST)); | |
18161 | ||
18162 | tree v2sf_ftype_long_pcfloat = build_function_type_list (V2SF_type_node, | |
18163 | long_integer_type_node, | |
18164 | pcfloat_type_node, | |
18165 | NULL_TREE); | |
18166 | tree void_ftype_v2sf_long_pcfloat = | |
18167 | build_function_type_list (void_type_node, | |
18168 | V2SF_type_node, | |
18169 | long_integer_type_node, | |
18170 | pcfloat_type_node, | |
18171 | NULL_TREE); | |
18172 | ||
18173 | ||
18174 | def_builtin ("__builtin_paired_lx", v2sf_ftype_long_pcfloat, | |
18175 | PAIRED_BUILTIN_LX); | |
18176 | ||
18177 | ||
18178 | def_builtin ("__builtin_paired_stx", void_ftype_v2sf_long_pcfloat, | |
18179 | PAIRED_BUILTIN_STX); | |
18180 | ||
18181 | /* Predicates. */ | |
18182 | d = bdesc_paired_preds; | |
18183 | for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); ++i, d++) | |
18184 | { | |
18185 | tree type; | |
18186 | HOST_WIDE_INT mask = d->mask; | |
18187 | ||
18188 | if ((mask & builtin_mask) != mask) | |
18189 | { | |
18190 | if (TARGET_DEBUG_BUILTIN) | |
18191 | fprintf (stderr, "paired_init_builtins, skip predicate %s\n", | |
18192 | d->name); | |
18193 | continue; | |
18194 | } | |
18195 | ||
18196 | /* Cannot define builtin if the instruction is disabled. */ | |
18197 | gcc_assert (d->icode != CODE_FOR_nothing); | |
18198 | ||
18199 | if (TARGET_DEBUG_BUILTIN) | |
18200 | fprintf (stderr, "paired pred #%d, insn = %s [%d], mode = %s\n", | |
18201 | (int)i, get_insn_name (d->icode), (int)d->icode, | |
18202 | GET_MODE_NAME (insn_data[d->icode].operand[1].mode)); | |
18203 | ||
18204 | switch (insn_data[d->icode].operand[1].mode) | |
18205 | { | |
4e10a5a7 | 18206 | case E_V2SFmode: |
83349046 SB |
18207 | type = int_ftype_int_v2sf_v2sf; |
18208 | break; | |
18209 | default: | |
18210 | gcc_unreachable (); | |
18211 | } | |
18212 | ||
18213 | def_builtin (d->name, type, d->code); | |
18214 | } | |
18215 | } | |
18216 | ||
18217 | static void | |
18218 | altivec_init_builtins (void) | |
18219 | { | |
18220 | const struct builtin_description *d; | |
18221 | size_t i; | |
18222 | tree ftype; | |
18223 | tree decl; | |
18224 | HOST_WIDE_INT builtin_mask = rs6000_builtin_mask; | |
18225 | ||
18226 | tree pvoid_type_node = build_pointer_type (void_type_node); | |
18227 | ||
18228 | tree pcvoid_type_node | |
18229 | = build_pointer_type (build_qualified_type (void_type_node, | |
18230 | TYPE_QUAL_CONST)); | |
18231 | ||
18232 | tree int_ftype_opaque | |
18233 | = build_function_type_list (integer_type_node, | |
18234 | opaque_V4SI_type_node, NULL_TREE); | |
18235 | tree opaque_ftype_opaque | |
18236 | = build_function_type_list (integer_type_node, NULL_TREE); | |
18237 | tree opaque_ftype_opaque_int | |
18238 | = build_function_type_list (opaque_V4SI_type_node, | |
18239 | opaque_V4SI_type_node, integer_type_node, NULL_TREE); | |
18240 | tree opaque_ftype_opaque_opaque_int | |
18241 | = build_function_type_list (opaque_V4SI_type_node, | |
18242 | opaque_V4SI_type_node, opaque_V4SI_type_node, | |
18243 | integer_type_node, NULL_TREE); | |
18244 | tree opaque_ftype_opaque_opaque_opaque | |
18245 | = build_function_type_list (opaque_V4SI_type_node, | |
18246 | opaque_V4SI_type_node, opaque_V4SI_type_node, | |
18247 | opaque_V4SI_type_node, NULL_TREE); | |
18248 | tree opaque_ftype_opaque_opaque | |
18249 | = build_function_type_list (opaque_V4SI_type_node, | |
18250 | opaque_V4SI_type_node, opaque_V4SI_type_node, | |
18251 | NULL_TREE); | |
18252 | tree int_ftype_int_opaque_opaque | |
18253 | = build_function_type_list (integer_type_node, | |
18254 | integer_type_node, opaque_V4SI_type_node, | |
18255 | opaque_V4SI_type_node, NULL_TREE); | |
18256 | tree int_ftype_int_v4si_v4si | |
18257 | = build_function_type_list (integer_type_node, | |
18258 | integer_type_node, V4SI_type_node, | |
18259 | V4SI_type_node, NULL_TREE); | |
18260 | tree int_ftype_int_v2di_v2di | |
18261 | = build_function_type_list (integer_type_node, | |
18262 | integer_type_node, V2DI_type_node, | |
18263 | V2DI_type_node, NULL_TREE); | |
18264 | tree void_ftype_v4si | |
18265 | = build_function_type_list (void_type_node, V4SI_type_node, NULL_TREE); | |
18266 | tree v8hi_ftype_void | |
18267 | = build_function_type_list (V8HI_type_node, NULL_TREE); | |
18268 | tree void_ftype_void | |
18269 | = build_function_type_list (void_type_node, NULL_TREE); | |
18270 | tree void_ftype_int | |
18271 | = build_function_type_list (void_type_node, integer_type_node, NULL_TREE); | |
18272 | ||
18273 | tree opaque_ftype_long_pcvoid | |
18274 | = build_function_type_list (opaque_V4SI_type_node, | |
18275 | long_integer_type_node, pcvoid_type_node, | |
18276 | NULL_TREE); | |
18277 | tree v16qi_ftype_long_pcvoid | |
18278 | = build_function_type_list (V16QI_type_node, | |
18279 | long_integer_type_node, pcvoid_type_node, | |
18280 | NULL_TREE); | |
18281 | tree v8hi_ftype_long_pcvoid | |
18282 | = build_function_type_list (V8HI_type_node, | |
18283 | long_integer_type_node, pcvoid_type_node, | |
18284 | NULL_TREE); | |
18285 | tree v4si_ftype_long_pcvoid | |
18286 | = build_function_type_list (V4SI_type_node, | |
18287 | long_integer_type_node, pcvoid_type_node, | |
18288 | NULL_TREE); | |
18289 | tree v4sf_ftype_long_pcvoid | |
18290 | = build_function_type_list (V4SF_type_node, | |
18291 | long_integer_type_node, pcvoid_type_node, | |
18292 | NULL_TREE); | |
18293 | tree v2df_ftype_long_pcvoid | |
18294 | = build_function_type_list (V2DF_type_node, | |
18295 | long_integer_type_node, pcvoid_type_node, | |
18296 | NULL_TREE); | |
18297 | tree v2di_ftype_long_pcvoid | |
18298 | = build_function_type_list (V2DI_type_node, | |
18299 | long_integer_type_node, pcvoid_type_node, | |
18300 | NULL_TREE); | |
18301 | ||
18302 | tree void_ftype_opaque_long_pvoid | |
18303 | = build_function_type_list (void_type_node, | |
18304 | opaque_V4SI_type_node, long_integer_type_node, | |
18305 | pvoid_type_node, NULL_TREE); | |
18306 | tree void_ftype_v4si_long_pvoid | |
18307 | = build_function_type_list (void_type_node, | |
18308 | V4SI_type_node, long_integer_type_node, | |
18309 | pvoid_type_node, NULL_TREE); | |
18310 | tree void_ftype_v16qi_long_pvoid | |
18311 | = build_function_type_list (void_type_node, | |
18312 | V16QI_type_node, long_integer_type_node, | |
18313 | pvoid_type_node, NULL_TREE); | |
18314 | ||
18315 | tree void_ftype_v16qi_pvoid_long | |
18316 | = build_function_type_list (void_type_node, | |
18317 | V16QI_type_node, pvoid_type_node, | |
18318 | long_integer_type_node, NULL_TREE); | |
18319 | ||
18320 | tree void_ftype_v8hi_long_pvoid | |
18321 | = build_function_type_list (void_type_node, | |
18322 | V8HI_type_node, long_integer_type_node, | |
18323 | pvoid_type_node, NULL_TREE); | |
18324 | tree void_ftype_v4sf_long_pvoid | |
18325 | = build_function_type_list (void_type_node, | |
18326 | V4SF_type_node, long_integer_type_node, | |
18327 | pvoid_type_node, NULL_TREE); | |
18328 | tree void_ftype_v2df_long_pvoid | |
18329 | = build_function_type_list (void_type_node, | |
18330 | V2DF_type_node, long_integer_type_node, | |
18331 | pvoid_type_node, NULL_TREE); | |
18332 | tree void_ftype_v2di_long_pvoid | |
18333 | = build_function_type_list (void_type_node, | |
18334 | V2DI_type_node, long_integer_type_node, | |
18335 | pvoid_type_node, NULL_TREE); | |
18336 | tree int_ftype_int_v8hi_v8hi | |
18337 | = build_function_type_list (integer_type_node, | |
18338 | integer_type_node, V8HI_type_node, | |
18339 | V8HI_type_node, NULL_TREE); | |
18340 | tree int_ftype_int_v16qi_v16qi | |
18341 | = build_function_type_list (integer_type_node, | |
18342 | integer_type_node, V16QI_type_node, | |
18343 | V16QI_type_node, NULL_TREE); | |
18344 | tree int_ftype_int_v4sf_v4sf | |
18345 | = build_function_type_list (integer_type_node, | |
18346 | integer_type_node, V4SF_type_node, | |
18347 | V4SF_type_node, NULL_TREE); | |
18348 | tree int_ftype_int_v2df_v2df | |
18349 | = build_function_type_list (integer_type_node, | |
18350 | integer_type_node, V2DF_type_node, | |
18351 | V2DF_type_node, NULL_TREE); | |
18352 | tree v2di_ftype_v2di | |
18353 | = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE); | |
18354 | tree v4si_ftype_v4si | |
18355 | = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE); | |
18356 | tree v8hi_ftype_v8hi | |
18357 | = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE); | |
18358 | tree v16qi_ftype_v16qi | |
18359 | = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE); | |
18360 | tree v4sf_ftype_v4sf | |
18361 | = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE); | |
18362 | tree v2df_ftype_v2df | |
18363 | = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE); | |
18364 | tree void_ftype_pcvoid_int_int | |
18365 | = build_function_type_list (void_type_node, | |
18366 | pcvoid_type_node, integer_type_node, | |
18367 | integer_type_node, NULL_TREE); | |
18368 | ||
18369 | def_builtin ("__builtin_altivec_mtvscr", void_ftype_v4si, ALTIVEC_BUILTIN_MTVSCR); | |
18370 | def_builtin ("__builtin_altivec_mfvscr", v8hi_ftype_void, ALTIVEC_BUILTIN_MFVSCR); | |
18371 | def_builtin ("__builtin_altivec_dssall", void_ftype_void, ALTIVEC_BUILTIN_DSSALL); | |
18372 | def_builtin ("__builtin_altivec_dss", void_ftype_int, ALTIVEC_BUILTIN_DSS); | |
18373 | def_builtin ("__builtin_altivec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSL); | |
18374 | def_builtin ("__builtin_altivec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSR); | |
18375 | def_builtin ("__builtin_altivec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEBX); | |
18376 | def_builtin ("__builtin_altivec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEHX); | |
18377 | def_builtin ("__builtin_altivec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEWX); | |
18378 | def_builtin ("__builtin_altivec_lvxl", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVXL); | |
18379 | def_builtin ("__builtin_altivec_lvxl_v2df", v2df_ftype_long_pcvoid, | |
18380 | ALTIVEC_BUILTIN_LVXL_V2DF); | |
18381 | def_builtin ("__builtin_altivec_lvxl_v2di", v2di_ftype_long_pcvoid, | |
18382 | ALTIVEC_BUILTIN_LVXL_V2DI); | |
18383 | def_builtin ("__builtin_altivec_lvxl_v4sf", v4sf_ftype_long_pcvoid, | |
18384 | ALTIVEC_BUILTIN_LVXL_V4SF); | |
18385 | def_builtin ("__builtin_altivec_lvxl_v4si", v4si_ftype_long_pcvoid, | |
18386 | ALTIVEC_BUILTIN_LVXL_V4SI); | |
18387 | def_builtin ("__builtin_altivec_lvxl_v8hi", v8hi_ftype_long_pcvoid, | |
18388 | ALTIVEC_BUILTIN_LVXL_V8HI); | |
18389 | def_builtin ("__builtin_altivec_lvxl_v16qi", v16qi_ftype_long_pcvoid, | |
18390 | ALTIVEC_BUILTIN_LVXL_V16QI); | |
18391 | def_builtin ("__builtin_altivec_lvx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVX); | |
18392 | def_builtin ("__builtin_altivec_lvx_v2df", v2df_ftype_long_pcvoid, | |
18393 | ALTIVEC_BUILTIN_LVX_V2DF); | |
18394 | def_builtin ("__builtin_altivec_lvx_v2di", v2di_ftype_long_pcvoid, | |
18395 | ALTIVEC_BUILTIN_LVX_V2DI); | |
18396 | def_builtin ("__builtin_altivec_lvx_v4sf", v4sf_ftype_long_pcvoid, | |
18397 | ALTIVEC_BUILTIN_LVX_V4SF); | |
18398 | def_builtin ("__builtin_altivec_lvx_v4si", v4si_ftype_long_pcvoid, | |
18399 | ALTIVEC_BUILTIN_LVX_V4SI); | |
18400 | def_builtin ("__builtin_altivec_lvx_v8hi", v8hi_ftype_long_pcvoid, | |
18401 | ALTIVEC_BUILTIN_LVX_V8HI); | |
18402 | def_builtin ("__builtin_altivec_lvx_v16qi", v16qi_ftype_long_pcvoid, | |
18403 | ALTIVEC_BUILTIN_LVX_V16QI); | |
18404 | def_builtin ("__builtin_altivec_stvx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVX); | |
18405 | def_builtin ("__builtin_altivec_stvx_v2df", void_ftype_v2df_long_pvoid, | |
18406 | ALTIVEC_BUILTIN_STVX_V2DF); | |
18407 | def_builtin ("__builtin_altivec_stvx_v2di", void_ftype_v2di_long_pvoid, | |
18408 | ALTIVEC_BUILTIN_STVX_V2DI); | |
18409 | def_builtin ("__builtin_altivec_stvx_v4sf", void_ftype_v4sf_long_pvoid, | |
18410 | ALTIVEC_BUILTIN_STVX_V4SF); | |
18411 | def_builtin ("__builtin_altivec_stvx_v4si", void_ftype_v4si_long_pvoid, | |
18412 | ALTIVEC_BUILTIN_STVX_V4SI); | |
18413 | def_builtin ("__builtin_altivec_stvx_v8hi", void_ftype_v8hi_long_pvoid, | |
18414 | ALTIVEC_BUILTIN_STVX_V8HI); | |
18415 | def_builtin ("__builtin_altivec_stvx_v16qi", void_ftype_v16qi_long_pvoid, | |
18416 | ALTIVEC_BUILTIN_STVX_V16QI); | |
18417 | def_builtin ("__builtin_altivec_stvewx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVEWX); | |
18418 | def_builtin ("__builtin_altivec_stvxl", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVXL); | |
18419 | def_builtin ("__builtin_altivec_stvxl_v2df", void_ftype_v2df_long_pvoid, | |
18420 | ALTIVEC_BUILTIN_STVXL_V2DF); | |
18421 | def_builtin ("__builtin_altivec_stvxl_v2di", void_ftype_v2di_long_pvoid, | |
18422 | ALTIVEC_BUILTIN_STVXL_V2DI); | |
18423 | def_builtin ("__builtin_altivec_stvxl_v4sf", void_ftype_v4sf_long_pvoid, | |
18424 | ALTIVEC_BUILTIN_STVXL_V4SF); | |
18425 | def_builtin ("__builtin_altivec_stvxl_v4si", void_ftype_v4si_long_pvoid, | |
18426 | ALTIVEC_BUILTIN_STVXL_V4SI); | |
18427 | def_builtin ("__builtin_altivec_stvxl_v8hi", void_ftype_v8hi_long_pvoid, | |
18428 | ALTIVEC_BUILTIN_STVXL_V8HI); | |
18429 | def_builtin ("__builtin_altivec_stvxl_v16qi", void_ftype_v16qi_long_pvoid, | |
18430 | ALTIVEC_BUILTIN_STVXL_V16QI); | |
18431 | def_builtin ("__builtin_altivec_stvebx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVEBX); | |
18432 | def_builtin ("__builtin_altivec_stvehx", void_ftype_v8hi_long_pvoid, ALTIVEC_BUILTIN_STVEHX); | |
18433 | def_builtin ("__builtin_vec_ld", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LD); | |
18434 | def_builtin ("__builtin_vec_lde", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDE); | |
18435 | def_builtin ("__builtin_vec_ldl", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDL); | |
18436 | def_builtin ("__builtin_vec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSL); | |
18437 | def_builtin ("__builtin_vec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSR); | |
18438 | def_builtin ("__builtin_vec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEBX); | |
18439 | def_builtin ("__builtin_vec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEHX); | |
18440 | def_builtin ("__builtin_vec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEWX); | |
18441 | def_builtin ("__builtin_vec_st", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_ST); | |
18442 | def_builtin ("__builtin_vec_ste", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STE); | |
18443 | def_builtin ("__builtin_vec_stl", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STL); | |
18444 | def_builtin ("__builtin_vec_stvewx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEWX); | |
18445 | def_builtin ("__builtin_vec_stvebx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEBX); | |
18446 | def_builtin ("__builtin_vec_stvehx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEHX); | |
18447 | ||
18448 | def_builtin ("__builtin_vsx_lxvd2x_v2df", v2df_ftype_long_pcvoid, | |
18449 | VSX_BUILTIN_LXVD2X_V2DF); | |
18450 | def_builtin ("__builtin_vsx_lxvd2x_v2di", v2di_ftype_long_pcvoid, | |
18451 | VSX_BUILTIN_LXVD2X_V2DI); | |
18452 | def_builtin ("__builtin_vsx_lxvw4x_v4sf", v4sf_ftype_long_pcvoid, | |
18453 | VSX_BUILTIN_LXVW4X_V4SF); | |
18454 | def_builtin ("__builtin_vsx_lxvw4x_v4si", v4si_ftype_long_pcvoid, | |
18455 | VSX_BUILTIN_LXVW4X_V4SI); | |
18456 | def_builtin ("__builtin_vsx_lxvw4x_v8hi", v8hi_ftype_long_pcvoid, | |
18457 | VSX_BUILTIN_LXVW4X_V8HI); | |
18458 | def_builtin ("__builtin_vsx_lxvw4x_v16qi", v16qi_ftype_long_pcvoid, | |
18459 | VSX_BUILTIN_LXVW4X_V16QI); | |
18460 | def_builtin ("__builtin_vsx_stxvd2x_v2df", void_ftype_v2df_long_pvoid, | |
18461 | VSX_BUILTIN_STXVD2X_V2DF); | |
18462 | def_builtin ("__builtin_vsx_stxvd2x_v2di", void_ftype_v2di_long_pvoid, | |
18463 | VSX_BUILTIN_STXVD2X_V2DI); | |
18464 | def_builtin ("__builtin_vsx_stxvw4x_v4sf", void_ftype_v4sf_long_pvoid, | |
18465 | VSX_BUILTIN_STXVW4X_V4SF); | |
18466 | def_builtin ("__builtin_vsx_stxvw4x_v4si", void_ftype_v4si_long_pvoid, | |
18467 | VSX_BUILTIN_STXVW4X_V4SI); | |
18468 | def_builtin ("__builtin_vsx_stxvw4x_v8hi", void_ftype_v8hi_long_pvoid, | |
18469 | VSX_BUILTIN_STXVW4X_V8HI); | |
18470 | def_builtin ("__builtin_vsx_stxvw4x_v16qi", void_ftype_v16qi_long_pvoid, | |
18471 | VSX_BUILTIN_STXVW4X_V16QI); | |
18472 | ||
18473 | def_builtin ("__builtin_vsx_ld_elemrev_v2df", v2df_ftype_long_pcvoid, | |
18474 | VSX_BUILTIN_LD_ELEMREV_V2DF); | |
18475 | def_builtin ("__builtin_vsx_ld_elemrev_v2di", v2di_ftype_long_pcvoid, | |
18476 | VSX_BUILTIN_LD_ELEMREV_V2DI); | |
18477 | def_builtin ("__builtin_vsx_ld_elemrev_v4sf", v4sf_ftype_long_pcvoid, | |
18478 | VSX_BUILTIN_LD_ELEMREV_V4SF); | |
18479 | def_builtin ("__builtin_vsx_ld_elemrev_v4si", v4si_ftype_long_pcvoid, | |
18480 | VSX_BUILTIN_LD_ELEMREV_V4SI); | |
18481 | def_builtin ("__builtin_vsx_st_elemrev_v2df", void_ftype_v2df_long_pvoid, | |
18482 | VSX_BUILTIN_ST_ELEMREV_V2DF); | |
18483 | def_builtin ("__builtin_vsx_st_elemrev_v2di", void_ftype_v2di_long_pvoid, | |
18484 | VSX_BUILTIN_ST_ELEMREV_V2DI); | |
18485 | def_builtin ("__builtin_vsx_st_elemrev_v4sf", void_ftype_v4sf_long_pvoid, | |
18486 | VSX_BUILTIN_ST_ELEMREV_V4SF); | |
18487 | def_builtin ("__builtin_vsx_st_elemrev_v4si", void_ftype_v4si_long_pvoid, | |
18488 | VSX_BUILTIN_ST_ELEMREV_V4SI); | |
18489 | ||
18490 | if (TARGET_P9_VECTOR) | |
18491 | { | |
18492 | def_builtin ("__builtin_vsx_ld_elemrev_v8hi", v8hi_ftype_long_pcvoid, | |
18493 | VSX_BUILTIN_LD_ELEMREV_V8HI); | |
18494 | def_builtin ("__builtin_vsx_ld_elemrev_v16qi", v16qi_ftype_long_pcvoid, | |
18495 | VSX_BUILTIN_LD_ELEMREV_V16QI); | |
18496 | def_builtin ("__builtin_vsx_st_elemrev_v8hi", | |
18497 | void_ftype_v8hi_long_pvoid, VSX_BUILTIN_ST_ELEMREV_V8HI); | |
18498 | def_builtin ("__builtin_vsx_st_elemrev_v16qi", | |
18499 | void_ftype_v16qi_long_pvoid, VSX_BUILTIN_ST_ELEMREV_V16QI); | |
18500 | } | |
18501 | else | |
18502 | { | |
18503 | rs6000_builtin_decls[(int) VSX_BUILTIN_LD_ELEMREV_V8HI] | |
18504 | = rs6000_builtin_decls[(int) VSX_BUILTIN_LXVW4X_V8HI]; | |
18505 | rs6000_builtin_decls[(int) VSX_BUILTIN_LD_ELEMREV_V16QI] | |
18506 | = rs6000_builtin_decls[(int) VSX_BUILTIN_LXVW4X_V16QI]; | |
18507 | rs6000_builtin_decls[(int) VSX_BUILTIN_ST_ELEMREV_V8HI] | |
18508 | = rs6000_builtin_decls[(int) VSX_BUILTIN_STXVW4X_V8HI]; | |
18509 | rs6000_builtin_decls[(int) VSX_BUILTIN_ST_ELEMREV_V16QI] | |
18510 | = rs6000_builtin_decls[(int) VSX_BUILTIN_STXVW4X_V16QI]; | |
18511 | } | |
18512 | ||
18513 | def_builtin ("__builtin_vec_vsx_ld", opaque_ftype_long_pcvoid, | |
18514 | VSX_BUILTIN_VEC_LD); | |
18515 | def_builtin ("__builtin_vec_vsx_st", void_ftype_opaque_long_pvoid, | |
18516 | VSX_BUILTIN_VEC_ST); | |
18517 | def_builtin ("__builtin_vec_xl", opaque_ftype_long_pcvoid, | |
18518 | VSX_BUILTIN_VEC_XL); | |
18519 | def_builtin ("__builtin_vec_xst", void_ftype_opaque_long_pvoid, | |
18520 | VSX_BUILTIN_VEC_XST); | |
18521 | ||
18522 | def_builtin ("__builtin_vec_step", int_ftype_opaque, ALTIVEC_BUILTIN_VEC_STEP); | |
18523 | def_builtin ("__builtin_vec_splats", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_SPLATS); | |
18524 | def_builtin ("__builtin_vec_promote", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_PROMOTE); | |
18525 | ||
18526 | def_builtin ("__builtin_vec_sld", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_SLD); | |
18527 | def_builtin ("__builtin_vec_splat", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_SPLAT); | |
18528 | def_builtin ("__builtin_vec_extract", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_EXTRACT); | |
18529 | def_builtin ("__builtin_vec_insert", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_INSERT); | |
18530 | def_builtin ("__builtin_vec_vspltw", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTW); | |
18531 | def_builtin ("__builtin_vec_vsplth", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTH); | |
18532 | def_builtin ("__builtin_vec_vspltb", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTB); | |
18533 | def_builtin ("__builtin_vec_ctf", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTF); | |
18534 | def_builtin ("__builtin_vec_vcfsx", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFSX); | |
18535 | def_builtin ("__builtin_vec_vcfux", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFUX); | |
18536 | def_builtin ("__builtin_vec_cts", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTS); | |
18537 | def_builtin ("__builtin_vec_ctu", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTU); | |
18538 | ||
18539 | def_builtin ("__builtin_vec_adde", opaque_ftype_opaque_opaque_opaque, | |
18540 | ALTIVEC_BUILTIN_VEC_ADDE); | |
18541 | def_builtin ("__builtin_vec_addec", opaque_ftype_opaque_opaque_opaque, | |
18542 | ALTIVEC_BUILTIN_VEC_ADDEC); | |
18543 | def_builtin ("__builtin_vec_cmpne", opaque_ftype_opaque_opaque, | |
18544 | ALTIVEC_BUILTIN_VEC_CMPNE); | |
18545 | def_builtin ("__builtin_vec_mul", opaque_ftype_opaque_opaque, | |
18546 | ALTIVEC_BUILTIN_VEC_MUL); | |
18547 | ||
18548 | /* Cell builtins. */ | |
18549 | def_builtin ("__builtin_altivec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLX); | |
18550 | def_builtin ("__builtin_altivec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLXL); | |
18551 | def_builtin ("__builtin_altivec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRX); | |
18552 | def_builtin ("__builtin_altivec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRXL); | |
18553 | ||
18554 | def_builtin ("__builtin_vec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLX); | |
18555 | def_builtin ("__builtin_vec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLXL); | |
18556 | def_builtin ("__builtin_vec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRX); | |
18557 | def_builtin ("__builtin_vec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRXL); | |
18558 | ||
18559 | def_builtin ("__builtin_altivec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLX); | |
18560 | def_builtin ("__builtin_altivec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLXL); | |
18561 | def_builtin ("__builtin_altivec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRX); | |
18562 | def_builtin ("__builtin_altivec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRXL); | |
18563 | ||
18564 | def_builtin ("__builtin_vec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLX); | |
18565 | def_builtin ("__builtin_vec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLXL); | |
18566 | def_builtin ("__builtin_vec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRX); | |
18567 | def_builtin ("__builtin_vec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRXL); | |
18568 | ||
18569 | if (TARGET_P9_VECTOR) | |
18570 | def_builtin ("__builtin_altivec_stxvl", void_ftype_v16qi_pvoid_long, | |
18571 | P9V_BUILTIN_STXVL); | |
18572 | ||
18573 | /* Add the DST variants. */ | |
18574 | d = bdesc_dst; | |
18575 | for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++) | |
18576 | { | |
18577 | HOST_WIDE_INT mask = d->mask; | |
18578 | ||
18579 | /* It is expected that these dst built-in functions may have | |
18580 | d->icode equal to CODE_FOR_nothing. */ | |
18581 | if ((mask & builtin_mask) != mask) | |
18582 | { | |
18583 | if (TARGET_DEBUG_BUILTIN) | |
18584 | fprintf (stderr, "altivec_init_builtins, skip dst %s\n", | |
18585 | d->name); | |
18586 | continue; | |
18587 | } | |
18588 | def_builtin (d->name, void_ftype_pcvoid_int_int, d->code); | |
18589 | } | |
18590 | ||
18591 | /* Initialize the predicates. */ | |
18592 | d = bdesc_altivec_preds; | |
18593 | for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++) | |
18594 | { | |
18595 | machine_mode mode1; | |
18596 | tree type; | |
18597 | HOST_WIDE_INT mask = d->mask; | |
18598 | ||
18599 | if ((mask & builtin_mask) != mask) | |
18600 | { | |
18601 | if (TARGET_DEBUG_BUILTIN) | |
18602 | fprintf (stderr, "altivec_init_builtins, skip predicate %s\n", | |
18603 | d->name); | |
18604 | continue; | |
18605 | } | |
18606 | ||
18607 | if (rs6000_overloaded_builtin_p (d->code)) | |
18608 | mode1 = VOIDmode; | |
18609 | else | |
18610 | { | |
18611 | /* Cannot define builtin if the instruction is disabled. */ | |
18612 | gcc_assert (d->icode != CODE_FOR_nothing); | |
18613 | mode1 = insn_data[d->icode].operand[1].mode; | |
18614 | } | |
18615 | ||
18616 | switch (mode1) | |
18617 | { | |
4e10a5a7 | 18618 | case E_VOIDmode: |
83349046 SB |
18619 | type = int_ftype_int_opaque_opaque; |
18620 | break; | |
4e10a5a7 | 18621 | case E_V2DImode: |
83349046 SB |
18622 | type = int_ftype_int_v2di_v2di; |
18623 | break; | |
4e10a5a7 | 18624 | case E_V4SImode: |
83349046 SB |
18625 | type = int_ftype_int_v4si_v4si; |
18626 | break; | |
4e10a5a7 | 18627 | case E_V8HImode: |
83349046 SB |
18628 | type = int_ftype_int_v8hi_v8hi; |
18629 | break; | |
4e10a5a7 | 18630 | case E_V16QImode: |
83349046 SB |
18631 | type = int_ftype_int_v16qi_v16qi; |
18632 | break; | |
4e10a5a7 | 18633 | case E_V4SFmode: |
83349046 SB |
18634 | type = int_ftype_int_v4sf_v4sf; |
18635 | break; | |
4e10a5a7 | 18636 | case E_V2DFmode: |
83349046 SB |
18637 | type = int_ftype_int_v2df_v2df; |
18638 | break; | |
18639 | default: | |
18640 | gcc_unreachable (); | |
18641 | } | |
18642 | ||
18643 | def_builtin (d->name, type, d->code); | |
18644 | } | |
18645 | ||
18646 | /* Initialize the abs* operators. */ | |
18647 | d = bdesc_abs; | |
18648 | for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++) | |
18649 | { | |
18650 | machine_mode mode0; | |
18651 | tree type; | |
18652 | HOST_WIDE_INT mask = d->mask; | |
18653 | ||
18654 | if ((mask & builtin_mask) != mask) | |
18655 | { | |
18656 | if (TARGET_DEBUG_BUILTIN) | |
18657 | fprintf (stderr, "altivec_init_builtins, skip abs %s\n", | |
18658 | d->name); | |
18659 | continue; | |
18660 | } | |
18661 | ||
18662 | /* Cannot define builtin if the instruction is disabled. */ | |
18663 | gcc_assert (d->icode != CODE_FOR_nothing); | |
18664 | mode0 = insn_data[d->icode].operand[0].mode; | |
18665 | ||
18666 | switch (mode0) | |
18667 | { | |
4e10a5a7 | 18668 | case E_V2DImode: |
83349046 SB |
18669 | type = v2di_ftype_v2di; |
18670 | break; | |
4e10a5a7 | 18671 | case E_V4SImode: |
83349046 SB |
18672 | type = v4si_ftype_v4si; |
18673 | break; | |
4e10a5a7 | 18674 | case E_V8HImode: |
83349046 SB |
18675 | type = v8hi_ftype_v8hi; |
18676 | break; | |
4e10a5a7 | 18677 | case E_V16QImode: |
83349046 SB |
18678 | type = v16qi_ftype_v16qi; |
18679 | break; | |
4e10a5a7 | 18680 | case E_V4SFmode: |
83349046 SB |
18681 | type = v4sf_ftype_v4sf; |
18682 | break; | |
4e10a5a7 | 18683 | case E_V2DFmode: |
83349046 SB |
18684 | type = v2df_ftype_v2df; |
18685 | break; | |
18686 | default: | |
18687 | gcc_unreachable (); | |
18688 | } | |
18689 | ||
18690 | def_builtin (d->name, type, d->code); | |
18691 | } | |
18692 | ||
18693 | /* Initialize target builtin that implements | |
18694 | targetm.vectorize.builtin_mask_for_load. */ | |
18695 | ||
18696 | decl = add_builtin_function ("__builtin_altivec_mask_for_load", | |
18697 | v16qi_ftype_long_pcvoid, | |
18698 | ALTIVEC_BUILTIN_MASK_FOR_LOAD, | |
18699 | BUILT_IN_MD, NULL, NULL_TREE); | |
18700 | TREE_READONLY (decl) = 1; | |
18701 | /* Record the decl. Will be used by rs6000_builtin_mask_for_load. */ | |
18702 | altivec_builtin_mask_for_load = decl; | |
18703 | ||
18704 | /* Access to the vec_init patterns. */ | |
18705 | ftype = build_function_type_list (V4SI_type_node, integer_type_node, | |
18706 | integer_type_node, integer_type_node, | |
18707 | integer_type_node, NULL_TREE); | |
18708 | def_builtin ("__builtin_vec_init_v4si", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SI); | |
18709 | ||
18710 | ftype = build_function_type_list (V8HI_type_node, short_integer_type_node, | |
18711 | short_integer_type_node, | |
18712 | short_integer_type_node, | |
18713 | short_integer_type_node, | |
18714 | short_integer_type_node, | |
18715 | short_integer_type_node, | |
18716 | short_integer_type_node, | |
18717 | short_integer_type_node, NULL_TREE); | |
18718 | def_builtin ("__builtin_vec_init_v8hi", ftype, ALTIVEC_BUILTIN_VEC_INIT_V8HI); | |
18719 | ||
18720 | ftype = build_function_type_list (V16QI_type_node, char_type_node, | |
18721 | char_type_node, char_type_node, | |
18722 | char_type_node, char_type_node, | |
18723 | char_type_node, char_type_node, | |
18724 | char_type_node, char_type_node, | |
18725 | char_type_node, char_type_node, | |
18726 | char_type_node, char_type_node, | |
18727 | char_type_node, char_type_node, | |
18728 | char_type_node, NULL_TREE); | |
18729 | def_builtin ("__builtin_vec_init_v16qi", ftype, | |
18730 | ALTIVEC_BUILTIN_VEC_INIT_V16QI); | |
18731 | ||
18732 | ftype = build_function_type_list (V4SF_type_node, float_type_node, | |
18733 | float_type_node, float_type_node, | |
18734 | float_type_node, NULL_TREE); | |
18735 | def_builtin ("__builtin_vec_init_v4sf", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SF); | |
18736 | ||
18737 | /* VSX builtins. */ | |
18738 | ftype = build_function_type_list (V2DF_type_node, double_type_node, | |
18739 | double_type_node, NULL_TREE); | |
18740 | def_builtin ("__builtin_vec_init_v2df", ftype, VSX_BUILTIN_VEC_INIT_V2DF); | |
18741 | ||
18742 | ftype = build_function_type_list (V2DI_type_node, intDI_type_node, | |
18743 | intDI_type_node, NULL_TREE); | |
18744 | def_builtin ("__builtin_vec_init_v2di", ftype, VSX_BUILTIN_VEC_INIT_V2DI); | |
18745 | ||
18746 | /* Access to the vec_set patterns. */ | |
18747 | ftype = build_function_type_list (V4SI_type_node, V4SI_type_node, | |
18748 | intSI_type_node, | |
18749 | integer_type_node, NULL_TREE); | |
18750 | def_builtin ("__builtin_vec_set_v4si", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SI); | |
18751 | ||
18752 | ftype = build_function_type_list (V8HI_type_node, V8HI_type_node, | |
18753 | intHI_type_node, | |
18754 | integer_type_node, NULL_TREE); | |
18755 | def_builtin ("__builtin_vec_set_v8hi", ftype, ALTIVEC_BUILTIN_VEC_SET_V8HI); | |
18756 | ||
18757 | ftype = build_function_type_list (V16QI_type_node, V16QI_type_node, | |
18758 | intQI_type_node, | |
18759 | integer_type_node, NULL_TREE); | |
18760 | def_builtin ("__builtin_vec_set_v16qi", ftype, ALTIVEC_BUILTIN_VEC_SET_V16QI); | |
18761 | ||
18762 | ftype = build_function_type_list (V4SF_type_node, V4SF_type_node, | |
18763 | float_type_node, | |
18764 | integer_type_node, NULL_TREE); | |
18765 | def_builtin ("__builtin_vec_set_v4sf", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SF); | |
18766 | ||
18767 | ftype = build_function_type_list (V2DF_type_node, V2DF_type_node, | |
18768 | double_type_node, | |
18769 | integer_type_node, NULL_TREE); | |
18770 | def_builtin ("__builtin_vec_set_v2df", ftype, VSX_BUILTIN_VEC_SET_V2DF); | |
18771 | ||
18772 | ftype = build_function_type_list (V2DI_type_node, V2DI_type_node, | |
18773 | intDI_type_node, | |
18774 | integer_type_node, NULL_TREE); | |
18775 | def_builtin ("__builtin_vec_set_v2di", ftype, VSX_BUILTIN_VEC_SET_V2DI); | |
18776 | ||
18777 | /* Access to the vec_extract patterns. */ | |
18778 | ftype = build_function_type_list (intSI_type_node, V4SI_type_node, | |
18779 | integer_type_node, NULL_TREE); | |
18780 | def_builtin ("__builtin_vec_ext_v4si", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SI); | |
18781 | ||
18782 | ftype = build_function_type_list (intHI_type_node, V8HI_type_node, | |
18783 | integer_type_node, NULL_TREE); | |
18784 | def_builtin ("__builtin_vec_ext_v8hi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V8HI); | |
18785 | ||
18786 | ftype = build_function_type_list (intQI_type_node, V16QI_type_node, | |
18787 | integer_type_node, NULL_TREE); | |
18788 | def_builtin ("__builtin_vec_ext_v16qi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V16QI); | |
18789 | ||
18790 | ftype = build_function_type_list (float_type_node, V4SF_type_node, | |
18791 | integer_type_node, NULL_TREE); | |
18792 | def_builtin ("__builtin_vec_ext_v4sf", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SF); | |
18793 | ||
18794 | ftype = build_function_type_list (double_type_node, V2DF_type_node, | |
18795 | integer_type_node, NULL_TREE); | |
18796 | def_builtin ("__builtin_vec_ext_v2df", ftype, VSX_BUILTIN_VEC_EXT_V2DF); | |
18797 | ||
18798 | ftype = build_function_type_list (intDI_type_node, V2DI_type_node, | |
18799 | integer_type_node, NULL_TREE); | |
18800 | def_builtin ("__builtin_vec_ext_v2di", ftype, VSX_BUILTIN_VEC_EXT_V2DI); | |
18801 | ||
18802 | ||
18803 | if (V1TI_type_node) | |
18804 | { | |
18805 | tree v1ti_ftype_long_pcvoid | |
18806 | = build_function_type_list (V1TI_type_node, | |
18807 | long_integer_type_node, pcvoid_type_node, | |
18808 | NULL_TREE); | |
18809 | tree void_ftype_v1ti_long_pvoid | |
18810 | = build_function_type_list (void_type_node, | |
18811 | V1TI_type_node, long_integer_type_node, | |
18812 | pvoid_type_node, NULL_TREE); | |
18813 | def_builtin ("__builtin_vsx_lxvd2x_v1ti", v1ti_ftype_long_pcvoid, | |
18814 | VSX_BUILTIN_LXVD2X_V1TI); | |
18815 | def_builtin ("__builtin_vsx_stxvd2x_v1ti", void_ftype_v1ti_long_pvoid, | |
18816 | VSX_BUILTIN_STXVD2X_V1TI); | |
18817 | ftype = build_function_type_list (V1TI_type_node, intTI_type_node, | |
18818 | NULL_TREE, NULL_TREE); | |
18819 | def_builtin ("__builtin_vec_init_v1ti", ftype, VSX_BUILTIN_VEC_INIT_V1TI); | |
18820 | ftype = build_function_type_list (V1TI_type_node, V1TI_type_node, | |
18821 | intTI_type_node, | |
18822 | integer_type_node, NULL_TREE); | |
18823 | def_builtin ("__builtin_vec_set_v1ti", ftype, VSX_BUILTIN_VEC_SET_V1TI); | |
18824 | ftype = build_function_type_list (intTI_type_node, V1TI_type_node, | |
18825 | integer_type_node, NULL_TREE); | |
18826 | def_builtin ("__builtin_vec_ext_v1ti", ftype, VSX_BUILTIN_VEC_EXT_V1TI); | |
18827 | } | |
18828 | ||
18829 | } | |
18830 | ||
18831 | static void | |
18832 | htm_init_builtins (void) | |
18833 | { | |
18834 | HOST_WIDE_INT builtin_mask = rs6000_builtin_mask; | |
18835 | const struct builtin_description *d; | |
18836 | size_t i; | |
18837 | ||
18838 | d = bdesc_htm; | |
18839 | for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++) | |
18840 | { | |
18841 | tree op[MAX_HTM_OPERANDS], type; | |
18842 | HOST_WIDE_INT mask = d->mask; | |
18843 | unsigned attr = rs6000_builtin_info[d->code].attr; | |
18844 | bool void_func = (attr & RS6000_BTC_VOID); | |
18845 | int attr_args = (attr & RS6000_BTC_TYPE_MASK); | |
18846 | int nopnds = 0; | |
18847 | tree gpr_type_node; | |
18848 | tree rettype; | |
18849 | tree argtype; | |
18850 | ||
18851 | /* It is expected that these htm built-in functions may have | |
18852 | d->icode equal to CODE_FOR_nothing. */ | |
18853 | ||
18854 | if (TARGET_32BIT && TARGET_POWERPC64) | |
18855 | gpr_type_node = long_long_unsigned_type_node; | |
18856 | else | |
18857 | gpr_type_node = long_unsigned_type_node; | |
18858 | ||
18859 | if (attr & RS6000_BTC_SPR) | |
18860 | { | |
18861 | rettype = gpr_type_node; | |
18862 | argtype = gpr_type_node; | |
18863 | } | |
18864 | else if (d->code == HTM_BUILTIN_TABORTDC | |
18865 | || d->code == HTM_BUILTIN_TABORTDCI) | |
18866 | { | |
18867 | rettype = unsigned_type_node; | |
18868 | argtype = gpr_type_node; | |
18869 | } | |
18870 | else | |
18871 | { | |
18872 | rettype = unsigned_type_node; | |
18873 | argtype = unsigned_type_node; | |
18874 | } | |
18875 | ||
18876 | if ((mask & builtin_mask) != mask) | |
18877 | { | |
18878 | if (TARGET_DEBUG_BUILTIN) | |
18879 | fprintf (stderr, "htm_builtin, skip binary %s\n", d->name); | |
18880 | continue; | |
18881 | } | |
18882 | ||
18883 | if (d->name == 0) | |
18884 | { | |
18885 | if (TARGET_DEBUG_BUILTIN) | |
18886 | fprintf (stderr, "htm_builtin, bdesc_htm[%ld] no name\n", | |
18887 | (long unsigned) i); | |
18888 | continue; | |
18889 | } | |
18890 | ||
18891 | op[nopnds++] = (void_func) ? void_type_node : rettype; | |
18892 | ||
18893 | if (attr_args == RS6000_BTC_UNARY) | |
18894 | op[nopnds++] = argtype; | |
18895 | else if (attr_args == RS6000_BTC_BINARY) | |
18896 | { | |
18897 | op[nopnds++] = argtype; | |
18898 | op[nopnds++] = argtype; | |
18899 | } | |
18900 | else if (attr_args == RS6000_BTC_TERNARY) | |
18901 | { | |
18902 | op[nopnds++] = argtype; | |
18903 | op[nopnds++] = argtype; | |
18904 | op[nopnds++] = argtype; | |
18905 | } | |
18906 | ||
18907 | switch (nopnds) | |
18908 | { | |
18909 | case 1: | |
18910 | type = build_function_type_list (op[0], NULL_TREE); | |
18911 | break; | |
18912 | case 2: | |
18913 | type = build_function_type_list (op[0], op[1], NULL_TREE); | |
18914 | break; | |
18915 | case 3: | |
18916 | type = build_function_type_list (op[0], op[1], op[2], NULL_TREE); | |
18917 | break; | |
18918 | case 4: | |
18919 | type = build_function_type_list (op[0], op[1], op[2], op[3], | |
18920 | NULL_TREE); | |
18921 | break; | |
18922 | default: | |
18923 | gcc_unreachable (); | |
18924 | } | |
18925 | ||
18926 | def_builtin (d->name, type, d->code); | |
18927 | } | |
18928 | } | |
18929 | ||
18930 | /* Hash function for builtin functions with up to 3 arguments and a return | |
18931 | type. */ | |
18932 | hashval_t | |
18933 | builtin_hasher::hash (builtin_hash_struct *bh) | |
18934 | { | |
18935 | unsigned ret = 0; | |
18936 | int i; | |
18937 | ||
18938 | for (i = 0; i < 4; i++) | |
18939 | { | |
18940 | ret = (ret * (unsigned)MAX_MACHINE_MODE) + ((unsigned)bh->mode[i]); | |
18941 | ret = (ret * 2) + bh->uns_p[i]; | |
18942 | } | |
18943 | ||
18944 | return ret; | |
18945 | } | |
18946 | ||
18947 | /* Compare builtin hash entries H1 and H2 for equivalence. */ | |
18948 | bool | |
18949 | builtin_hasher::equal (builtin_hash_struct *p1, builtin_hash_struct *p2) | |
18950 | { | |
18951 | return ((p1->mode[0] == p2->mode[0]) | |
18952 | && (p1->mode[1] == p2->mode[1]) | |
18953 | && (p1->mode[2] == p2->mode[2]) | |
18954 | && (p1->mode[3] == p2->mode[3]) | |
18955 | && (p1->uns_p[0] == p2->uns_p[0]) | |
18956 | && (p1->uns_p[1] == p2->uns_p[1]) | |
18957 | && (p1->uns_p[2] == p2->uns_p[2]) | |
18958 | && (p1->uns_p[3] == p2->uns_p[3])); | |
18959 | } | |
18960 | ||
18961 | /* Map types for builtin functions with an explicit return type and up to 3 | |
18962 | arguments. Functions with fewer than 3 arguments use VOIDmode as the type | |
18963 | of the argument. */ | |
18964 | static tree | |
18965 | builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0, | |
18966 | machine_mode mode_arg1, machine_mode mode_arg2, | |
18967 | enum rs6000_builtins builtin, const char *name) | |
18968 | { | |
18969 | struct builtin_hash_struct h; | |
18970 | struct builtin_hash_struct *h2; | |
18971 | int num_args = 3; | |
18972 | int i; | |
18973 | tree ret_type = NULL_TREE; | |
18974 | tree arg_type[3] = { NULL_TREE, NULL_TREE, NULL_TREE }; | |
18975 | ||
18976 | /* Create builtin_hash_table. */ | |
18977 | if (builtin_hash_table == NULL) | |
18978 | builtin_hash_table = hash_table<builtin_hasher>::create_ggc (1500); | |
18979 | ||
18980 | h.type = NULL_TREE; | |
18981 | h.mode[0] = mode_ret; | |
18982 | h.mode[1] = mode_arg0; | |
18983 | h.mode[2] = mode_arg1; | |
18984 | h.mode[3] = mode_arg2; | |
18985 | h.uns_p[0] = 0; | |
18986 | h.uns_p[1] = 0; | |
18987 | h.uns_p[2] = 0; | |
18988 | h.uns_p[3] = 0; | |
18989 | ||
18990 | /* If the builtin is a type that produces unsigned results or takes unsigned | |
18991 | arguments, and it is returned as a decl for the vectorizer (such as | |
18992 | widening multiplies, permute), make sure the arguments and return value | |
18993 | are type correct. */ | |
18994 | switch (builtin) | |
18995 | { | |
18996 | /* unsigned 1 argument functions. */ | |
18997 | case CRYPTO_BUILTIN_VSBOX: | |
18998 | case P8V_BUILTIN_VGBBD: | |
18999 | case MISC_BUILTIN_CDTBCD: | |
19000 | case MISC_BUILTIN_CBCDTD: | |
19001 | h.uns_p[0] = 1; | |
19002 | h.uns_p[1] = 1; | |
19003 | break; | |
19004 | ||
19005 | /* unsigned 2 argument functions. */ | |
19006 | case ALTIVEC_BUILTIN_VMULEUB: | |
19007 | case ALTIVEC_BUILTIN_VMULEUH: | |
19008 | case ALTIVEC_BUILTIN_VMULOUB: | |
19009 | case ALTIVEC_BUILTIN_VMULOUH: | |
19010 | case CRYPTO_BUILTIN_VCIPHER: | |
19011 | case CRYPTO_BUILTIN_VCIPHERLAST: | |
19012 | case CRYPTO_BUILTIN_VNCIPHER: | |
19013 | case CRYPTO_BUILTIN_VNCIPHERLAST: | |
19014 | case CRYPTO_BUILTIN_VPMSUMB: | |
19015 | case CRYPTO_BUILTIN_VPMSUMH: | |
19016 | case CRYPTO_BUILTIN_VPMSUMW: | |
19017 | case CRYPTO_BUILTIN_VPMSUMD: | |
19018 | case CRYPTO_BUILTIN_VPMSUM: | |
19019 | case MISC_BUILTIN_ADDG6S: | |
19020 | case MISC_BUILTIN_DIVWEU: | |
19021 | case MISC_BUILTIN_DIVWEUO: | |
19022 | case MISC_BUILTIN_DIVDEU: | |
19023 | case MISC_BUILTIN_DIVDEUO: | |
19024 | case VSX_BUILTIN_UDIV_V2DI: | |
19025 | h.uns_p[0] = 1; | |
19026 | h.uns_p[1] = 1; | |
19027 | h.uns_p[2] = 1; | |
19028 | break; | |
19029 | ||
19030 | /* unsigned 3 argument functions. */ | |
19031 | case ALTIVEC_BUILTIN_VPERM_16QI_UNS: | |
19032 | case ALTIVEC_BUILTIN_VPERM_8HI_UNS: | |
19033 | case ALTIVEC_BUILTIN_VPERM_4SI_UNS: | |
19034 | case ALTIVEC_BUILTIN_VPERM_2DI_UNS: | |
19035 | case ALTIVEC_BUILTIN_VSEL_16QI_UNS: | |
19036 | case ALTIVEC_BUILTIN_VSEL_8HI_UNS: | |
19037 | case ALTIVEC_BUILTIN_VSEL_4SI_UNS: | |
19038 | case ALTIVEC_BUILTIN_VSEL_2DI_UNS: | |
19039 | case VSX_BUILTIN_VPERM_16QI_UNS: | |
19040 | case VSX_BUILTIN_VPERM_8HI_UNS: | |
19041 | case VSX_BUILTIN_VPERM_4SI_UNS: | |
19042 | case VSX_BUILTIN_VPERM_2DI_UNS: | |
19043 | case VSX_BUILTIN_XXSEL_16QI_UNS: | |
19044 | case VSX_BUILTIN_XXSEL_8HI_UNS: | |
19045 | case VSX_BUILTIN_XXSEL_4SI_UNS: | |
19046 | case VSX_BUILTIN_XXSEL_2DI_UNS: | |
19047 | case CRYPTO_BUILTIN_VPERMXOR: | |
19048 | case CRYPTO_BUILTIN_VPERMXOR_V2DI: | |
19049 | case CRYPTO_BUILTIN_VPERMXOR_V4SI: | |
19050 | case CRYPTO_BUILTIN_VPERMXOR_V8HI: | |
19051 | case CRYPTO_BUILTIN_VPERMXOR_V16QI: | |
19052 | case CRYPTO_BUILTIN_VSHASIGMAW: | |
19053 | case CRYPTO_BUILTIN_VSHASIGMAD: | |
19054 | case CRYPTO_BUILTIN_VSHASIGMA: | |
19055 | h.uns_p[0] = 1; | |
19056 | h.uns_p[1] = 1; | |
19057 | h.uns_p[2] = 1; | |
19058 | h.uns_p[3] = 1; | |
19059 | break; | |
19060 | ||
19061 | /* signed permute functions with unsigned char mask. */ | |
19062 | case ALTIVEC_BUILTIN_VPERM_16QI: | |
19063 | case ALTIVEC_BUILTIN_VPERM_8HI: | |
19064 | case ALTIVEC_BUILTIN_VPERM_4SI: | |
19065 | case ALTIVEC_BUILTIN_VPERM_4SF: | |
19066 | case ALTIVEC_BUILTIN_VPERM_2DI: | |
19067 | case ALTIVEC_BUILTIN_VPERM_2DF: | |
19068 | case VSX_BUILTIN_VPERM_16QI: | |
19069 | case VSX_BUILTIN_VPERM_8HI: | |
19070 | case VSX_BUILTIN_VPERM_4SI: | |
19071 | case VSX_BUILTIN_VPERM_4SF: | |
19072 | case VSX_BUILTIN_VPERM_2DI: | |
19073 | case VSX_BUILTIN_VPERM_2DF: | |
19074 | h.uns_p[3] = 1; | |
19075 | break; | |
19076 | ||
19077 | /* unsigned args, signed return. */ | |
19078 | case VSX_BUILTIN_XVCVUXDSP: | |
19079 | case VSX_BUILTIN_XVCVUXDDP_UNS: | |
19080 | case ALTIVEC_BUILTIN_UNSFLOAT_V4SI_V4SF: | |
19081 | h.uns_p[1] = 1; | |
19082 | break; | |
19083 | ||
19084 | /* signed args, unsigned return. */ | |
19085 | case VSX_BUILTIN_XVCVDPUXDS_UNS: | |
19086 | case ALTIVEC_BUILTIN_FIXUNS_V4SF_V4SI: | |
19087 | case MISC_BUILTIN_UNPACK_TD: | |
19088 | case MISC_BUILTIN_UNPACK_V1TI: | |
19089 | h.uns_p[0] = 1; | |
19090 | break; | |
19091 | ||
19092 | /* unsigned arguments for 128-bit pack instructions. */ | |
19093 | case MISC_BUILTIN_PACK_TD: | |
19094 | case MISC_BUILTIN_PACK_V1TI: | |
19095 | h.uns_p[1] = 1; | |
19096 | h.uns_p[2] = 1; | |
19097 | break; | |
19098 | ||
19099 | default: | |
19100 | break; | |
19101 | } | |
19102 | ||
19103 | /* Figure out how many args are present. */ | |
19104 | while (num_args > 0 && h.mode[num_args] == VOIDmode) | |
19105 | num_args--; | |
19106 | ||
19107 | ret_type = builtin_mode_to_type[h.mode[0]][h.uns_p[0]]; | |
19108 | if (!ret_type && h.uns_p[0]) | |
19109 | ret_type = builtin_mode_to_type[h.mode[0]][0]; | |
19110 | ||
19111 | if (!ret_type) | |
19112 | fatal_error (input_location, | |
19113 | "internal error: builtin function %s had an unexpected " | |
19114 | "return type %s", name, GET_MODE_NAME (h.mode[0])); | |
19115 | ||
19116 | for (i = 0; i < (int) ARRAY_SIZE (arg_type); i++) | |
19117 | arg_type[i] = NULL_TREE; | |
19118 | ||
19119 | for (i = 0; i < num_args; i++) | |
19120 | { | |
19121 | int m = (int) h.mode[i+1]; | |
19122 | int uns_p = h.uns_p[i+1]; | |
19123 | ||
19124 | arg_type[i] = builtin_mode_to_type[m][uns_p]; | |
19125 | if (!arg_type[i] && uns_p) | |
19126 | arg_type[i] = builtin_mode_to_type[m][0]; | |
19127 | ||
19128 | if (!arg_type[i]) | |
19129 | fatal_error (input_location, | |
19130 | "internal error: builtin function %s, argument %d " | |
19131 | "had unexpected argument type %s", name, i, | |
19132 | GET_MODE_NAME (m)); | |
19133 | } | |
19134 | ||
19135 | builtin_hash_struct **found = builtin_hash_table->find_slot (&h, INSERT); | |
19136 | if (*found == NULL) | |
19137 | { | |
19138 | h2 = ggc_alloc<builtin_hash_struct> (); | |
19139 | *h2 = h; | |
19140 | *found = h2; | |
19141 | ||
19142 | h2->type = build_function_type_list (ret_type, arg_type[0], arg_type[1], | |
19143 | arg_type[2], NULL_TREE); | |
19144 | } | |
19145 | ||
19146 | return (*found)->type; | |
19147 | } | |
19148 | ||
19149 | static void | |
19150 | rs6000_common_init_builtins (void) | |
19151 | { | |
19152 | const struct builtin_description *d; | |
19153 | size_t i; | |
19154 | ||
19155 | tree opaque_ftype_opaque = NULL_TREE; | |
19156 | tree opaque_ftype_opaque_opaque = NULL_TREE; | |
19157 | tree opaque_ftype_opaque_opaque_opaque = NULL_TREE; | |
19158 | tree v2si_ftype = NULL_TREE; | |
19159 | tree v2si_ftype_qi = NULL_TREE; | |
19160 | tree v2si_ftype_v2si_qi = NULL_TREE; | |
19161 | tree v2si_ftype_int_qi = NULL_TREE; | |
19162 | HOST_WIDE_INT builtin_mask = rs6000_builtin_mask; | |
19163 | ||
19164 | if (!TARGET_PAIRED_FLOAT) | |
19165 | { | |
19166 | builtin_mode_to_type[V2SImode][0] = opaque_V2SI_type_node; | |
19167 | builtin_mode_to_type[V2SFmode][0] = opaque_V2SF_type_node; | |
19168 | } | |
19169 | ||
19170 | /* Paired and SPE builtins are only available if you build a compiler with | |
19171 | the appropriate options, so only create those builtins with the | |
19172 | appropriate compiler option. Create Altivec and VSX builtins on machines | |
19173 | with at least the general purpose extensions (970 and newer) to allow the | |
19174 | use of the target attribute.. */ | |
19175 | ||
19176 | if (TARGET_EXTRA_BUILTINS) | |
19177 | builtin_mask |= RS6000_BTM_COMMON; | |
19178 | ||
19179 | /* Add the ternary operators. */ | |
19180 | d = bdesc_3arg; | |
19181 | for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++) | |
19182 | { | |
19183 | tree type; | |
19184 | HOST_WIDE_INT mask = d->mask; | |
19185 | ||
19186 | if ((mask & builtin_mask) != mask) | |
19187 | { | |
19188 | if (TARGET_DEBUG_BUILTIN) | |
19189 | fprintf (stderr, "rs6000_builtin, skip ternary %s\n", d->name); | |
19190 | continue; | |
19191 | } | |
19192 | ||
19193 | if (rs6000_overloaded_builtin_p (d->code)) | |
19194 | { | |
19195 | if (! (type = opaque_ftype_opaque_opaque_opaque)) | |
19196 | type = opaque_ftype_opaque_opaque_opaque | |
19197 | = build_function_type_list (opaque_V4SI_type_node, | |
19198 | opaque_V4SI_type_node, | |
19199 | opaque_V4SI_type_node, | |
19200 | opaque_V4SI_type_node, | |
19201 | NULL_TREE); | |
19202 | } | |
19203 | else | |
19204 | { | |
19205 | enum insn_code icode = d->icode; | |
19206 | if (d->name == 0) | |
19207 | { | |
19208 | if (TARGET_DEBUG_BUILTIN) | |
19209 | fprintf (stderr, "rs6000_builtin, bdesc_3arg[%ld] no name\n", | |
19210 | (long unsigned)i); | |
19211 | ||
19212 | continue; | |
19213 | } | |
19214 | ||
19215 | if (icode == CODE_FOR_nothing) | |
19216 | { | |
19217 | if (TARGET_DEBUG_BUILTIN) | |
19218 | fprintf (stderr, "rs6000_builtin, skip ternary %s (no code)\n", | |
19219 | d->name); | |
19220 | ||
19221 | continue; | |
19222 | } | |
19223 | ||
19224 | type = builtin_function_type (insn_data[icode].operand[0].mode, | |
19225 | insn_data[icode].operand[1].mode, | |
19226 | insn_data[icode].operand[2].mode, | |
19227 | insn_data[icode].operand[3].mode, | |
19228 | d->code, d->name); | |
19229 | } | |
19230 | ||
19231 | def_builtin (d->name, type, d->code); | |
19232 | } | |
19233 | ||
19234 | /* Add the binary operators. */ | |
19235 | d = bdesc_2arg; | |
19236 | for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++) | |
19237 | { | |
19238 | machine_mode mode0, mode1, mode2; | |
19239 | tree type; | |
19240 | HOST_WIDE_INT mask = d->mask; | |
19241 | ||
19242 | if ((mask & builtin_mask) != mask) | |
19243 | { | |
19244 | if (TARGET_DEBUG_BUILTIN) | |
19245 | fprintf (stderr, "rs6000_builtin, skip binary %s\n", d->name); | |
19246 | continue; | |
19247 | } | |
19248 | ||
19249 | if (rs6000_overloaded_builtin_p (d->code)) | |
19250 | { | |
19251 | if (! (type = opaque_ftype_opaque_opaque)) | |
19252 | type = opaque_ftype_opaque_opaque | |
19253 | = build_function_type_list (opaque_V4SI_type_node, | |
19254 | opaque_V4SI_type_node, | |
19255 | opaque_V4SI_type_node, | |
19256 | NULL_TREE); | |
19257 | } | |
19258 | else | |
19259 | { | |
19260 | enum insn_code icode = d->icode; | |
19261 | if (d->name == 0) | |
19262 | { | |
19263 | if (TARGET_DEBUG_BUILTIN) | |
19264 | fprintf (stderr, "rs6000_builtin, bdesc_2arg[%ld] no name\n", | |
19265 | (long unsigned)i); | |
19266 | ||
19267 | continue; | |
19268 | } | |
19269 | ||
19270 | if (icode == CODE_FOR_nothing) | |
19271 | { | |
19272 | if (TARGET_DEBUG_BUILTIN) | |
19273 | fprintf (stderr, "rs6000_builtin, skip binary %s (no code)\n", | |
19274 | d->name); | |
19275 | ||
19276 | continue; | |
19277 | } | |
19278 | ||
19279 | mode0 = insn_data[icode].operand[0].mode; | |
19280 | mode1 = insn_data[icode].operand[1].mode; | |
19281 | mode2 = insn_data[icode].operand[2].mode; | |
19282 | ||
19283 | if (mode0 == V2SImode && mode1 == V2SImode && mode2 == QImode) | |
19284 | { | |
19285 | if (! (type = v2si_ftype_v2si_qi)) | |
19286 | type = v2si_ftype_v2si_qi | |
19287 | = build_function_type_list (opaque_V2SI_type_node, | |
19288 | opaque_V2SI_type_node, | |
19289 | char_type_node, | |
19290 | NULL_TREE); | |
19291 | } | |
19292 | ||
19293 | else if (mode0 == V2SImode && GET_MODE_CLASS (mode1) == MODE_INT | |
19294 | && mode2 == QImode) | |
19295 | { | |
19296 | if (! (type = v2si_ftype_int_qi)) | |
19297 | type = v2si_ftype_int_qi | |
19298 | = build_function_type_list (opaque_V2SI_type_node, | |
19299 | integer_type_node, | |
19300 | char_type_node, | |
19301 | NULL_TREE); | |
19302 | } | |
19303 | ||
19304 | else | |
19305 | type = builtin_function_type (mode0, mode1, mode2, VOIDmode, | |
19306 | d->code, d->name); | |
19307 | } | |
19308 | ||
19309 | def_builtin (d->name, type, d->code); | |
19310 | } | |
19311 | ||
19312 | /* Add the simple unary operators. */ | |
19313 | d = bdesc_1arg; | |
19314 | for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++) | |
19315 | { | |
19316 | machine_mode mode0, mode1; | |
19317 | tree type; | |
19318 | HOST_WIDE_INT mask = d->mask; | |
19319 | ||
19320 | if ((mask & builtin_mask) != mask) | |
19321 | { | |
19322 | if (TARGET_DEBUG_BUILTIN) | |
19323 | fprintf (stderr, "rs6000_builtin, skip unary %s\n", d->name); | |
19324 | continue; | |
19325 | } | |
19326 | ||
19327 | if (rs6000_overloaded_builtin_p (d->code)) | |
19328 | { | |
19329 | if (! (type = opaque_ftype_opaque)) | |
19330 | type = opaque_ftype_opaque | |
19331 | = build_function_type_list (opaque_V4SI_type_node, | |
19332 | opaque_V4SI_type_node, | |
19333 | NULL_TREE); | |
19334 | } | |
19335 | else | |
19336 | { | |
19337 | enum insn_code icode = d->icode; | |
19338 | if (d->name == 0) | |
19339 | { | |
19340 | if (TARGET_DEBUG_BUILTIN) | |
19341 | fprintf (stderr, "rs6000_builtin, bdesc_1arg[%ld] no name\n", | |
19342 | (long unsigned)i); | |
19343 | ||
19344 | continue; | |
19345 | } | |
19346 | ||
19347 | if (icode == CODE_FOR_nothing) | |
19348 | { | |
19349 | if (TARGET_DEBUG_BUILTIN) | |
19350 | fprintf (stderr, "rs6000_builtin, skip unary %s (no code)\n", | |
19351 | d->name); | |
19352 | ||
19353 | continue; | |
19354 | } | |
19355 | ||
19356 | mode0 = insn_data[icode].operand[0].mode; | |
19357 | mode1 = insn_data[icode].operand[1].mode; | |
19358 | ||
19359 | if (mode0 == V2SImode && mode1 == QImode) | |
19360 | { | |
19361 | if (! (type = v2si_ftype_qi)) | |
19362 | type = v2si_ftype_qi | |
19363 | = build_function_type_list (opaque_V2SI_type_node, | |
19364 | char_type_node, | |
19365 | NULL_TREE); | |
19366 | } | |
19367 | ||
19368 | else | |
19369 | type = builtin_function_type (mode0, mode1, VOIDmode, VOIDmode, | |
19370 | d->code, d->name); | |
19371 | } | |
19372 | ||
19373 | def_builtin (d->name, type, d->code); | |
19374 | } | |
19375 | ||
19376 | /* Add the simple no-argument operators. */ | |
19377 | d = bdesc_0arg; | |
19378 | for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++) | |
19379 | { | |
19380 | machine_mode mode0; | |
19381 | tree type; | |
19382 | HOST_WIDE_INT mask = d->mask; | |
19383 | ||
19384 | if ((mask & builtin_mask) != mask) | |
19385 | { | |
19386 | if (TARGET_DEBUG_BUILTIN) | |
19387 | fprintf (stderr, "rs6000_builtin, skip no-argument %s\n", d->name); | |
19388 | continue; | |
19389 | } | |
19390 | if (rs6000_overloaded_builtin_p (d->code)) | |
19391 | { | |
19392 | if (!opaque_ftype_opaque) | |
19393 | opaque_ftype_opaque | |
19394 | = build_function_type_list (opaque_V4SI_type_node, NULL_TREE); | |
19395 | type = opaque_ftype_opaque; | |
19396 | } | |
19397 | else | |
19398 | { | |
19399 | enum insn_code icode = d->icode; | |
19400 | if (d->name == 0) | |
19401 | { | |
19402 | if (TARGET_DEBUG_BUILTIN) | |
19403 | fprintf (stderr, "rs6000_builtin, bdesc_0arg[%lu] no name\n", | |
19404 | (long unsigned) i); | |
19405 | continue; | |
19406 | } | |
19407 | if (icode == CODE_FOR_nothing) | |
19408 | { | |
19409 | if (TARGET_DEBUG_BUILTIN) | |
19410 | fprintf (stderr, | |
19411 | "rs6000_builtin, skip no-argument %s (no code)\n", | |
19412 | d->name); | |
19413 | continue; | |
19414 | } | |
19415 | mode0 = insn_data[icode].operand[0].mode; | |
19416 | if (mode0 == V2SImode) | |
19417 | { | |
19418 | /* code for SPE */ | |
19419 | if (! (type = v2si_ftype)) | |
19420 | { | |
19421 | v2si_ftype | |
19422 | = build_function_type_list (opaque_V2SI_type_node, | |
19423 | NULL_TREE); | |
19424 | type = v2si_ftype; | |
19425 | } | |
19426 | } | |
19427 | else | |
19428 | type = builtin_function_type (mode0, VOIDmode, VOIDmode, VOIDmode, | |
19429 | d->code, d->name); | |
19430 | } | |
19431 | def_builtin (d->name, type, d->code); | |
19432 | } | |
19433 | } | |
19434 | ||
19435 | /* Set up AIX/Darwin/64-bit Linux quad floating point routines. */ | |
19436 | static void | |
19437 | init_float128_ibm (machine_mode mode) | |
19438 | { | |
19439 | if (!TARGET_XL_COMPAT) | |
19440 | { | |
19441 | set_optab_libfunc (add_optab, mode, "__gcc_qadd"); | |
19442 | set_optab_libfunc (sub_optab, mode, "__gcc_qsub"); | |
19443 | set_optab_libfunc (smul_optab, mode, "__gcc_qmul"); | |
19444 | set_optab_libfunc (sdiv_optab, mode, "__gcc_qdiv"); | |
19445 | ||
19446 | if (!(TARGET_HARD_FLOAT && (TARGET_FPRS || TARGET_E500_DOUBLE))) | |
19447 | { | |
19448 | set_optab_libfunc (neg_optab, mode, "__gcc_qneg"); | |
19449 | set_optab_libfunc (eq_optab, mode, "__gcc_qeq"); | |
19450 | set_optab_libfunc (ne_optab, mode, "__gcc_qne"); | |
19451 | set_optab_libfunc (gt_optab, mode, "__gcc_qgt"); | |
19452 | set_optab_libfunc (ge_optab, mode, "__gcc_qge"); | |
19453 | set_optab_libfunc (lt_optab, mode, "__gcc_qlt"); | |
19454 | set_optab_libfunc (le_optab, mode, "__gcc_qle"); | |
19455 | ||
19456 | set_conv_libfunc (sext_optab, mode, SFmode, "__gcc_stoq"); | |
19457 | set_conv_libfunc (sext_optab, mode, DFmode, "__gcc_dtoq"); | |
19458 | set_conv_libfunc (trunc_optab, SFmode, mode, "__gcc_qtos"); | |
19459 | set_conv_libfunc (trunc_optab, DFmode, mode, "__gcc_qtod"); | |
19460 | set_conv_libfunc (sfix_optab, SImode, mode, "__gcc_qtoi"); | |
19461 | set_conv_libfunc (ufix_optab, SImode, mode, "__gcc_qtou"); | |
19462 | set_conv_libfunc (sfloat_optab, mode, SImode, "__gcc_itoq"); | |
19463 | set_conv_libfunc (ufloat_optab, mode, SImode, "__gcc_utoq"); | |
19464 | } | |
19465 | ||
19466 | if (!(TARGET_HARD_FLOAT && TARGET_FPRS)) | |
19467 | set_optab_libfunc (unord_optab, mode, "__gcc_qunord"); | |
19468 | } | |
19469 | else | |
19470 | { | |
19471 | set_optab_libfunc (add_optab, mode, "_xlqadd"); | |
19472 | set_optab_libfunc (sub_optab, mode, "_xlqsub"); | |
19473 | set_optab_libfunc (smul_optab, mode, "_xlqmul"); | |
19474 | set_optab_libfunc (sdiv_optab, mode, "_xlqdiv"); | |
19475 | } | |
19476 | ||
19477 | /* Add various conversions for IFmode to use the traditional TFmode | |
19478 | names. */ | |
19479 | if (mode == IFmode) | |
19480 | { | |
19481 | set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdtf2"); | |
19482 | set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddtf2"); | |
19483 | set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunctftd2"); | |
19484 | set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunctfsd2"); | |
19485 | set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunctfdd2"); | |
19486 | set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtdtf2"); | |
19487 | ||
19488 | if (TARGET_POWERPC64) | |
19489 | { | |
19490 | set_conv_libfunc (sfix_optab, TImode, mode, "__fixtfti"); | |
19491 | set_conv_libfunc (ufix_optab, TImode, mode, "__fixunstfti"); | |
19492 | set_conv_libfunc (sfloat_optab, mode, TImode, "__floattitf"); | |
19493 | set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntitf"); | |
19494 | } | |
19495 | } | |
19496 | } | |
19497 | ||
19498 | /* Set up IEEE 128-bit floating point routines. Use different names if the | |
19499 | arguments can be passed in a vector register. The historical PowerPC | |
19500 | implementation of IEEE 128-bit floating point used _q_<op> for the names, so | |
19501 | continue to use that if we aren't using vector registers to pass IEEE | |
19502 | 128-bit floating point. */ | |
19503 | ||
19504 | static void | |
19505 | init_float128_ieee (machine_mode mode) | |
19506 | { | |
19507 | if (FLOAT128_VECTOR_P (mode)) | |
19508 | { | |
19509 | set_optab_libfunc (add_optab, mode, "__addkf3"); | |
19510 | set_optab_libfunc (sub_optab, mode, "__subkf3"); | |
19511 | set_optab_libfunc (neg_optab, mode, "__negkf2"); | |
19512 | set_optab_libfunc (smul_optab, mode, "__mulkf3"); | |
19513 | set_optab_libfunc (sdiv_optab, mode, "__divkf3"); | |
19514 | set_optab_libfunc (sqrt_optab, mode, "__sqrtkf2"); | |
19515 | set_optab_libfunc (abs_optab, mode, "__abstkf2"); | |
19516 | ||
19517 | set_optab_libfunc (eq_optab, mode, "__eqkf2"); | |
19518 | set_optab_libfunc (ne_optab, mode, "__nekf2"); | |
19519 | set_optab_libfunc (gt_optab, mode, "__gtkf2"); | |
19520 | set_optab_libfunc (ge_optab, mode, "__gekf2"); | |
19521 | set_optab_libfunc (lt_optab, mode, "__ltkf2"); | |
19522 | set_optab_libfunc (le_optab, mode, "__lekf2"); | |
19523 | set_optab_libfunc (unord_optab, mode, "__unordkf2"); | |
19524 | ||
19525 | set_conv_libfunc (sext_optab, mode, SFmode, "__extendsfkf2"); | |
19526 | set_conv_libfunc (sext_optab, mode, DFmode, "__extenddfkf2"); | |
19527 | set_conv_libfunc (trunc_optab, SFmode, mode, "__trunckfsf2"); | |
19528 | set_conv_libfunc (trunc_optab, DFmode, mode, "__trunckfdf2"); | |
19529 | ||
19530 | set_conv_libfunc (sext_optab, mode, IFmode, "__extendtfkf2"); | |
19531 | if (mode != TFmode && FLOAT128_IBM_P (TFmode)) | |
19532 | set_conv_libfunc (sext_optab, mode, TFmode, "__extendtfkf2"); | |
19533 | ||
19534 | set_conv_libfunc (trunc_optab, IFmode, mode, "__trunckftf2"); | |
19535 | if (mode != TFmode && FLOAT128_IBM_P (TFmode)) | |
19536 | set_conv_libfunc (trunc_optab, TFmode, mode, "__trunckftf2"); | |
19537 | ||
19538 | set_conv_libfunc (sext_optab, mode, SDmode, "__dpd_extendsdkf2"); | |
19539 | set_conv_libfunc (sext_optab, mode, DDmode, "__dpd_extendddkf2"); | |
19540 | set_conv_libfunc (trunc_optab, mode, TDmode, "__dpd_trunckftd2"); | |
19541 | set_conv_libfunc (trunc_optab, SDmode, mode, "__dpd_trunckfsd2"); | |
19542 | set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunckfdd2"); | |
19543 | set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtdkf2"); | |
19544 | ||
19545 | set_conv_libfunc (sfix_optab, SImode, mode, "__fixkfsi"); | |
19546 | set_conv_libfunc (ufix_optab, SImode, mode, "__fixunskfsi"); | |
19547 | set_conv_libfunc (sfix_optab, DImode, mode, "__fixkfdi"); | |
19548 | set_conv_libfunc (ufix_optab, DImode, mode, "__fixunskfdi"); | |
19549 | ||
19550 | set_conv_libfunc (sfloat_optab, mode, SImode, "__floatsikf"); | |
19551 | set_conv_libfunc (ufloat_optab, mode, SImode, "__floatunsikf"); | |
19552 | set_conv_libfunc (sfloat_optab, mode, DImode, "__floatdikf"); | |
19553 | set_conv_libfunc (ufloat_optab, mode, DImode, "__floatundikf"); | |
19554 | ||
19555 | if (TARGET_POWERPC64) | |
19556 | { | |
19557 | set_conv_libfunc (sfix_optab, TImode, mode, "__fixkfti"); | |
19558 | set_conv_libfunc (ufix_optab, TImode, mode, "__fixunskfti"); | |
19559 | set_conv_libfunc (sfloat_optab, mode, TImode, "__floattikf"); | |
19560 | set_conv_libfunc (ufloat_optab, mode, TImode, "__floatuntikf"); | |
19561 | } | |
19562 | } | |
19563 | ||
19564 | else | |
19565 | { | |
19566 | set_optab_libfunc (add_optab, mode, "_q_add"); | |
19567 | set_optab_libfunc (sub_optab, mode, "_q_sub"); | |
19568 | set_optab_libfunc (neg_optab, mode, "_q_neg"); | |
19569 | set_optab_libfunc (smul_optab, mode, "_q_mul"); | |
19570 | set_optab_libfunc (sdiv_optab, mode, "_q_div"); | |
19571 | if (TARGET_PPC_GPOPT) | |
19572 | set_optab_libfunc (sqrt_optab, mode, "_q_sqrt"); | |
19573 | ||
19574 | set_optab_libfunc (eq_optab, mode, "_q_feq"); | |
19575 | set_optab_libfunc (ne_optab, mode, "_q_fne"); | |
19576 | set_optab_libfunc (gt_optab, mode, "_q_fgt"); | |
19577 | set_optab_libfunc (ge_optab, mode, "_q_fge"); | |
19578 | set_optab_libfunc (lt_optab, mode, "_q_flt"); | |
19579 | set_optab_libfunc (le_optab, mode, "_q_fle"); | |
19580 | ||
19581 | set_conv_libfunc (sext_optab, mode, SFmode, "_q_stoq"); | |
19582 | set_conv_libfunc (sext_optab, mode, DFmode, "_q_dtoq"); | |
19583 | set_conv_libfunc (trunc_optab, SFmode, mode, "_q_qtos"); | |
19584 | set_conv_libfunc (trunc_optab, DFmode, mode, "_q_qtod"); | |
19585 | set_conv_libfunc (sfix_optab, SImode, mode, "_q_qtoi"); | |
19586 | set_conv_libfunc (ufix_optab, SImode, mode, "_q_qtou"); | |
19587 | set_conv_libfunc (sfloat_optab, mode, SImode, "_q_itoq"); | |
19588 | set_conv_libfunc (ufloat_optab, mode, SImode, "_q_utoq"); | |
19589 | } | |
19590 | } | |
19591 | ||
19592 | static void | |
19593 | rs6000_init_libfuncs (void) | |
19594 | { | |
19595 | /* __float128 support. */ | |
19596 | if (TARGET_FLOAT128_TYPE) | |
19597 | { | |
19598 | init_float128_ibm (IFmode); | |
19599 | init_float128_ieee (KFmode); | |
19600 | } | |
19601 | ||
19602 | /* AIX/Darwin/64-bit Linux quad floating point routines. */ | |
19603 | if (TARGET_LONG_DOUBLE_128) | |
19604 | { | |
19605 | if (!TARGET_IEEEQUAD) | |
19606 | init_float128_ibm (TFmode); | |
19607 | ||
19608 | /* IEEE 128-bit including 32-bit SVR4 quad floating point routines. */ | |
19609 | else | |
19610 | init_float128_ieee (TFmode); | |
19611 | } | |
19612 | } | |
19613 | ||
19614 | \f | |
19615 | /* Expand a block clear operation, and return 1 if successful. Return 0 | |
19616 | if we should let the compiler generate normal code. | |
19617 | ||
19618 | operands[0] is the destination | |
19619 | operands[1] is the length | |
19620 | operands[3] is the alignment */ | |
19621 | ||
19622 | int | |
19623 | expand_block_clear (rtx operands[]) | |
19624 | { | |
19625 | rtx orig_dest = operands[0]; | |
19626 | rtx bytes_rtx = operands[1]; | |
19627 | rtx align_rtx = operands[3]; | |
19628 | bool constp = (GET_CODE (bytes_rtx) == CONST_INT); | |
19629 | HOST_WIDE_INT align; | |
19630 | HOST_WIDE_INT bytes; | |
19631 | int offset; | |
19632 | int clear_bytes; | |
19633 | int clear_step; | |
19634 | ||
19635 | /* If this is not a fixed size move, just call memcpy */ | |
19636 | if (! constp) | |
19637 | return 0; | |
19638 | ||
19639 | /* This must be a fixed size alignment */ | |
19640 | gcc_assert (GET_CODE (align_rtx) == CONST_INT); | |
19641 | align = INTVAL (align_rtx) * BITS_PER_UNIT; | |
19642 | ||
19643 | /* Anything to clear? */ | |
19644 | bytes = INTVAL (bytes_rtx); | |
19645 | if (bytes <= 0) | |
19646 | return 1; | |
19647 | ||
19648 | /* Use the builtin memset after a point, to avoid huge code bloat. | |
19649 | When optimize_size, avoid any significant code bloat; calling | |
19650 | memset is about 4 instructions, so allow for one instruction to | |
19651 | load zero and three to do clearing. */ | |
19652 | if (TARGET_ALTIVEC && align >= 128) | |
19653 | clear_step = 16; | |
19654 | else if (TARGET_POWERPC64 && (align >= 64 || !STRICT_ALIGNMENT)) | |
19655 | clear_step = 8; | |
19656 | else if (TARGET_SPE && align >= 64) | |
19657 | clear_step = 8; | |
19658 | else | |
19659 | clear_step = 4; | |
19660 | ||
19661 | if (optimize_size && bytes > 3 * clear_step) | |
19662 | return 0; | |
19663 | if (! optimize_size && bytes > 8 * clear_step) | |
19664 | return 0; | |
19665 | ||
19666 | for (offset = 0; bytes > 0; offset += clear_bytes, bytes -= clear_bytes) | |
19667 | { | |
19668 | machine_mode mode = BLKmode; | |
19669 | rtx dest; | |
19670 | ||
19671 | if (bytes >= 16 && TARGET_ALTIVEC && align >= 128) | |
19672 | { | |
19673 | clear_bytes = 16; | |
19674 | mode = V4SImode; | |
19675 | } | |
19676 | else if (bytes >= 8 && TARGET_SPE && align >= 64) | |
19677 | { | |
19678 | clear_bytes = 8; | |
19679 | mode = V2SImode; | |
19680 | } | |
19681 | else if (bytes >= 8 && TARGET_POWERPC64 | |
19682 | && (align >= 64 || !STRICT_ALIGNMENT)) | |
19683 | { | |
19684 | clear_bytes = 8; | |
19685 | mode = DImode; | |
19686 | if (offset == 0 && align < 64) | |
19687 | { | |
19688 | rtx addr; | |
19689 | ||
19690 | /* If the address form is reg+offset with offset not a | |
19691 | multiple of four, reload into reg indirect form here | |
19692 | rather than waiting for reload. This way we get one | |
19693 | reload, not one per store. */ | |
19694 | addr = XEXP (orig_dest, 0); | |
19695 | if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM) | |
19696 | && GET_CODE (XEXP (addr, 1)) == CONST_INT | |
19697 | && (INTVAL (XEXP (addr, 1)) & 3) != 0) | |
19698 | { | |
19699 | addr = copy_addr_to_reg (addr); | |
19700 | orig_dest = replace_equiv_address (orig_dest, addr); | |
19701 | } | |
19702 | } | |
19703 | } | |
19704 | else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT)) | |
19705 | { /* move 4 bytes */ | |
19706 | clear_bytes = 4; | |
19707 | mode = SImode; | |
19708 | } | |
19709 | else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT)) | |
19710 | { /* move 2 bytes */ | |
19711 | clear_bytes = 2; | |
19712 | mode = HImode; | |
19713 | } | |
19714 | else /* move 1 byte at a time */ | |
19715 | { | |
19716 | clear_bytes = 1; | |
19717 | mode = QImode; | |
19718 | } | |
19719 | ||
19720 | dest = adjust_address (orig_dest, mode, offset); | |
19721 | ||
19722 | emit_move_insn (dest, CONST0_RTX (mode)); | |
19723 | } | |
19724 | ||
19725 | return 1; | |
19726 | } | |
19727 | ||
19728 | /* Emit a potentially record-form instruction, setting DST from SRC. | |
19729 | If DOT is 0, that is all; otherwise, set CCREG to the result of the | |
19730 | signed comparison of DST with zero. If DOT is 1, the generated RTL | |
19731 | doesn't care about the DST result; if DOT is 2, it does. If CCREG | |
19732 | is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and | |
19733 | a separate COMPARE. */ | |
19734 | ||
19735 | static void | |
19736 | rs6000_emit_dot_insn (rtx dst, rtx src, int dot, rtx ccreg) | |
19737 | { | |
19738 | if (dot == 0) | |
19739 | { | |
19740 | emit_move_insn (dst, src); | |
19741 | return; | |
19742 | } | |
19743 | ||
19744 | if (cc_reg_not_cr0_operand (ccreg, CCmode)) | |
19745 | { | |
19746 | emit_move_insn (dst, src); | |
19747 | emit_move_insn (ccreg, gen_rtx_COMPARE (CCmode, dst, const0_rtx)); | |
19748 | return; | |
19749 | } | |
19750 | ||
19751 | rtx ccset = gen_rtx_SET (ccreg, gen_rtx_COMPARE (CCmode, src, const0_rtx)); | |
19752 | if (dot == 1) | |
19753 | { | |
19754 | rtx clobber = gen_rtx_CLOBBER (VOIDmode, dst); | |
19755 | emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, clobber))); | |
19756 | } | |
19757 | else | |
19758 | { | |
19759 | rtx set = gen_rtx_SET (dst, src); | |
19760 | emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, set))); | |
19761 | } | |
19762 | } | |
19763 | ||
19764 | /* Figure out the correct instructions to generate to load data for | |
19765 | block compare. MODE is used for the read from memory, and | |
19766 | data is zero extended if REG is wider than MODE. If LE code | |
19767 | is being generated, bswap loads are used. | |
19768 | ||
19769 | REG is the destination register to move the data into. | |
19770 | MEM is the memory block being read. | |
19771 | MODE is the mode of memory to use for the read. */ | |
19772 | static void | |
19773 | do_load_for_compare (rtx reg, rtx mem, machine_mode mode) | |
19774 | { | |
19775 | switch (GET_MODE (reg)) | |
19776 | { | |
4e10a5a7 | 19777 | case E_DImode: |
83349046 SB |
19778 | switch (mode) |
19779 | { | |
4e10a5a7 | 19780 | case E_QImode: |
83349046 SB |
19781 | emit_insn (gen_zero_extendqidi2 (reg, mem)); |
19782 | break; | |
4e10a5a7 | 19783 | case E_HImode: |
83349046 SB |
19784 | { |
19785 | rtx src = mem; | |
19786 | if (!BYTES_BIG_ENDIAN) | |
19787 | { | |
19788 | src = gen_reg_rtx (HImode); | |
19789 | emit_insn (gen_bswaphi2 (src, mem)); | |
19790 | } | |
19791 | emit_insn (gen_zero_extendhidi2 (reg, src)); | |
19792 | break; | |
19793 | } | |
4e10a5a7 | 19794 | case E_SImode: |
83349046 SB |
19795 | { |
19796 | rtx src = mem; | |
19797 | if (!BYTES_BIG_ENDIAN) | |
19798 | { | |
19799 | src = gen_reg_rtx (SImode); | |
19800 | emit_insn (gen_bswapsi2 (src, mem)); | |
19801 | } | |
19802 | emit_insn (gen_zero_extendsidi2 (reg, src)); | |
19803 | } | |
19804 | break; | |
4e10a5a7 | 19805 | case E_DImode: |
83349046 SB |
19806 | if (!BYTES_BIG_ENDIAN) |
19807 | emit_insn (gen_bswapdi2 (reg, mem)); | |
19808 | else | |
19809 | emit_insn (gen_movdi (reg, mem)); | |
19810 | break; | |
19811 | default: | |
19812 | gcc_unreachable (); | |
19813 | } | |
19814 | break; | |
19815 | ||
4e10a5a7 | 19816 | case E_SImode: |
83349046 SB |
19817 | switch (mode) |
19818 | { | |
4e10a5a7 | 19819 | case E_QImode: |
83349046 SB |
19820 | emit_insn (gen_zero_extendqisi2 (reg, mem)); |
19821 | break; | |
4e10a5a7 | 19822 | case E_HImode: |
83349046 SB |
19823 | { |
19824 | rtx src = mem; | |
19825 | if (!BYTES_BIG_ENDIAN) | |
19826 | { | |
19827 | src = gen_reg_rtx (HImode); | |
19828 | emit_insn (gen_bswaphi2 (src, mem)); | |
19829 | } | |
19830 | emit_insn (gen_zero_extendhisi2 (reg, src)); | |
19831 | break; | |
19832 | } | |
4e10a5a7 | 19833 | case E_SImode: |
83349046 SB |
19834 | if (!BYTES_BIG_ENDIAN) |
19835 | emit_insn (gen_bswapsi2 (reg, mem)); | |
19836 | else | |
19837 | emit_insn (gen_movsi (reg, mem)); | |
19838 | break; | |
4e10a5a7 | 19839 | case E_DImode: |
83349046 SB |
19840 | /* DImode is larger than the destination reg so is not expected. */ |
19841 | gcc_unreachable (); | |
19842 | break; | |
19843 | default: | |
19844 | gcc_unreachable (); | |
19845 | } | |
19846 | break; | |
19847 | default: | |
19848 | gcc_unreachable (); | |
19849 | break; | |
19850 | } | |
19851 | } | |
19852 | ||
19853 | /* Select the mode to be used for reading the next chunk of bytes | |
19854 | in the compare. | |
19855 | ||
19856 | OFFSET is the current read offset from the beginning of the block. | |
19857 | BYTES is the number of bytes remaining to be read. | |
19858 | ALIGN is the minimum alignment of the memory blocks being compared in bytes. | |
19859 | WORD_MODE_OK indicates using WORD_MODE is allowed, else SImode is | |
19860 | the largest allowable mode. */ | |
19861 | static machine_mode | |
19862 | select_block_compare_mode (unsigned HOST_WIDE_INT offset, | |
19863 | unsigned HOST_WIDE_INT bytes, | |
19864 | unsigned HOST_WIDE_INT align, bool word_mode_ok) | |
19865 | { | |
19866 | /* First see if we can do a whole load unit | |
19867 | as that will be more efficient than a larger load + shift. */ | |
19868 | ||
19869 | /* If big, use biggest chunk. | |
19870 | If exactly chunk size, use that size. | |
19871 | If remainder can be done in one piece with shifting, do that. | |
19872 | Do largest chunk possible without violating alignment rules. */ | |
19873 | ||
19874 | /* The most we can read without potential page crossing. */ | |
19875 | unsigned HOST_WIDE_INT maxread = ROUND_UP (bytes, align); | |
19876 | ||
19877 | if (word_mode_ok && bytes >= UNITS_PER_WORD) | |
19878 | return word_mode; | |
19879 | else if (bytes == GET_MODE_SIZE (SImode)) | |
19880 | return SImode; | |
19881 | else if (bytes == GET_MODE_SIZE (HImode)) | |
19882 | return HImode; | |
19883 | else if (bytes == GET_MODE_SIZE (QImode)) | |
19884 | return QImode; | |
19885 | else if (bytes < GET_MODE_SIZE (SImode) | |
19886 | && offset >= GET_MODE_SIZE (SImode) - bytes) | |
19887 | /* This matches the case were we have SImode and 3 bytes | |
19888 | and offset >= 1 and permits us to move back one and overlap | |
19889 | with the previous read, thus avoiding having to shift | |
19890 | unwanted bytes off of the input. */ | |
19891 | return SImode; | |
19892 | else if (word_mode_ok && bytes < UNITS_PER_WORD | |
19893 | && offset >= UNITS_PER_WORD-bytes) | |
19894 | /* Similarly, if we can use DImode it will get matched here and | |
19895 | can do an overlapping read that ends at the end of the block. */ | |
19896 | return word_mode; | |
19897 | else if (word_mode_ok && maxread >= UNITS_PER_WORD) | |
19898 | /* It is safe to do all remaining in one load of largest size, | |
19899 | possibly with a shift to get rid of unwanted bytes. */ | |
19900 | return word_mode; | |
19901 | else if (maxread >= GET_MODE_SIZE (SImode)) | |
19902 | /* It is safe to do all remaining in one SImode load, | |
19903 | possibly with a shift to get rid of unwanted bytes. */ | |
19904 | return SImode; | |
19905 | else if (bytes > GET_MODE_SIZE (SImode)) | |
19906 | return SImode; | |
19907 | else if (bytes > GET_MODE_SIZE (HImode)) | |
19908 | return HImode; | |
19909 | ||
19910 | /* final fallback is do one byte */ | |
19911 | return QImode; | |
19912 | } | |
19913 | ||
19914 | /* Compute the alignment of pointer+OFFSET where the original alignment | |
19915 | of pointer was BASE_ALIGN. */ | |
19916 | static unsigned HOST_WIDE_INT | |
19917 | compute_current_alignment (unsigned HOST_WIDE_INT base_align, | |
19918 | unsigned HOST_WIDE_INT offset) | |
19919 | { | |
19920 | if (offset == 0) | |
19921 | return base_align; | |
19922 | return min (base_align, offset & -offset); | |
19923 | } | |
19924 | ||
19925 | /* Expand a block compare operation, and return true if successful. | |
19926 | Return false if we should let the compiler generate normal code, | |
19927 | probably a memcmp call. | |
19928 | ||
19929 | OPERANDS[0] is the target (result). | |
19930 | OPERANDS[1] is the first source. | |
19931 | OPERANDS[2] is the second source. | |
19932 | OPERANDS[3] is the length. | |
19933 | OPERANDS[4] is the alignment. */ | |
19934 | bool | |
19935 | expand_block_compare (rtx operands[]) | |
19936 | { | |
19937 | rtx target = operands[0]; | |
19938 | rtx orig_src1 = operands[1]; | |
19939 | rtx orig_src2 = operands[2]; | |
19940 | rtx bytes_rtx = operands[3]; | |
19941 | rtx align_rtx = operands[4]; | |
19942 | HOST_WIDE_INT cmp_bytes = 0; | |
19943 | rtx src1 = orig_src1; | |
19944 | rtx src2 = orig_src2; | |
19945 | ||
19946 | /* This case is complicated to handle because the subtract | |
19947 | with carry instructions do not generate the 64-bit | |
19948 | carry and so we must emit code to calculate it ourselves. | |
19949 | We choose not to implement this yet. */ | |
19950 | if (TARGET_32BIT && TARGET_POWERPC64) | |
19951 | return false; | |
19952 | ||
19953 | /* If this is not a fixed size compare, just call memcmp. */ | |
19954 | if (!CONST_INT_P (bytes_rtx)) | |
19955 | return false; | |
19956 | ||
19957 | /* This must be a fixed size alignment. */ | |
19958 | if (!CONST_INT_P (align_rtx)) | |
19959 | return false; | |
19960 | ||
19961 | unsigned int base_align = UINTVAL (align_rtx) / BITS_PER_UNIT; | |
19962 | ||
e0bd6c9f RS |
19963 | /* rs6000_slow_unaligned_access -- don't do unaligned stuff. */ |
19964 | if (rs6000_slow_unaligned_access (word_mode, MEM_ALIGN (orig_src1)) | |
19965 | || rs6000_slow_unaligned_access (word_mode, MEM_ALIGN (orig_src2))) | |
83349046 SB |
19966 | return false; |
19967 | ||
19968 | gcc_assert (GET_MODE (target) == SImode); | |
19969 | ||
19970 | /* Anything to move? */ | |
19971 | unsigned HOST_WIDE_INT bytes = UINTVAL (bytes_rtx); | |
19972 | if (bytes == 0) | |
19973 | return true; | |
19974 | ||
19975 | /* The code generated for p7 and older is not faster than glibc | |
19976 | memcmp if alignment is small and length is not short, so bail | |
19977 | out to avoid those conditions. */ | |
19978 | if (!TARGET_EFFICIENT_OVERLAPPING_UNALIGNED | |
19979 | && ((base_align == 1 && bytes > 16) | |
19980 | || (base_align == 2 && bytes > 32))) | |
19981 | return false; | |
19982 | ||
19983 | rtx tmp_reg_src1 = gen_reg_rtx (word_mode); | |
19984 | rtx tmp_reg_src2 = gen_reg_rtx (word_mode); | |
19985 | /* P7/P8 code uses cond for subfc. but P9 uses | |
19986 | it for cmpld which needs CCUNSmode. */ | |
19987 | rtx cond; | |
19988 | if (TARGET_P9_MISC) | |
19989 | cond = gen_reg_rtx (CCUNSmode); | |
19990 | else | |
19991 | cond = gen_reg_rtx (CCmode); | |
19992 | ||
19993 | /* If we have an LE target without ldbrx and word_mode is DImode, | |
19994 | then we must avoid using word_mode. */ | |
19995 | int word_mode_ok = !(!BYTES_BIG_ENDIAN && !TARGET_LDBRX | |
19996 | && word_mode == DImode); | |
19997 | ||
19998 | /* Strategy phase. How many ops will this take and should we expand it? */ | |
19999 | ||
20000 | unsigned HOST_WIDE_INT offset = 0; | |
20001 | machine_mode load_mode = | |
20002 | select_block_compare_mode (offset, bytes, base_align, word_mode_ok); | |
20003 | unsigned int load_mode_size = GET_MODE_SIZE (load_mode); | |
20004 | ||
20005 | /* We don't want to generate too much code. */ | |
20006 | unsigned HOST_WIDE_INT max_bytes = | |
20007 | load_mode_size * (unsigned HOST_WIDE_INT) rs6000_block_compare_inline_limit; | |
20008 | if (!IN_RANGE (bytes, 1, max_bytes)) | |
20009 | return false; | |
20010 | ||
20011 | bool generate_6432_conversion = false; | |
20012 | rtx convert_label = NULL; | |
20013 | rtx final_label = NULL; | |
20014 | ||
20015 | /* Example of generated code for 18 bytes aligned 1 byte. | |
20016 | Compiled with -fno-reorder-blocks for clarity. | |
20017 | ldbrx 10,31,8 | |
20018 | ldbrx 9,7,8 | |
20019 | subfc. 9,9,10 | |
20020 | bne 0,.L6487 | |
20021 | addi 9,12,8 | |
20022 | addi 5,11,8 | |
20023 | ldbrx 10,0,9 | |
20024 | ldbrx 9,0,5 | |
20025 | subfc. 9,9,10 | |
20026 | bne 0,.L6487 | |
20027 | addi 9,12,16 | |
20028 | lhbrx 10,0,9 | |
20029 | addi 9,11,16 | |
20030 | lhbrx 9,0,9 | |
20031 | subf 9,9,10 | |
20032 | b .L6488 | |
20033 | .p2align 4,,15 | |
20034 | .L6487: #convert_label | |
20035 | popcntd 9,9 | |
20036 | subfe 10,10,10 | |
20037 | or 9,9,10 | |
20038 | .L6488: #final_label | |
20039 | extsw 10,9 | |
20040 | ||
20041 | We start off with DImode for two blocks that jump to the DI->SI conversion | |
20042 | if the difference is found there, then a final block of HImode that skips | |
20043 | the DI->SI conversion. */ | |
20044 | ||
20045 | while (bytes > 0) | |
20046 | { | |
20047 | unsigned int align = compute_current_alignment (base_align, offset); | |
20048 | if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED) | |
20049 | load_mode = select_block_compare_mode (offset, bytes, align, | |
20050 | word_mode_ok); | |
20051 | else | |
20052 | load_mode = select_block_compare_mode (0, bytes, align, word_mode_ok); | |
20053 | load_mode_size = GET_MODE_SIZE (load_mode); | |
20054 | if (bytes >= load_mode_size) | |
20055 | cmp_bytes = load_mode_size; | |
20056 | else if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED) | |
20057 | { | |
20058 | /* Move this load back so it doesn't go past the end. | |
20059 | P8/P9 can do this efficiently. */ | |
20060 | unsigned int extra_bytes = load_mode_size - bytes; | |
20061 | cmp_bytes = bytes; | |
20062 | if (extra_bytes < offset) | |
20063 | { | |
20064 | offset -= extra_bytes; | |
20065 | cmp_bytes = load_mode_size; | |
20066 | bytes = cmp_bytes; | |
20067 | } | |
20068 | } | |
20069 | else | |
20070 | /* P7 and earlier can't do the overlapping load trick fast, | |
20071 | so this forces a non-overlapping load and a shift to get | |
20072 | rid of the extra bytes. */ | |
20073 | cmp_bytes = bytes; | |
20074 | ||
20075 | src1 = adjust_address (orig_src1, load_mode, offset); | |
20076 | src2 = adjust_address (orig_src2, load_mode, offset); | |
20077 | ||
20078 | if (!REG_P (XEXP (src1, 0))) | |
20079 | { | |
20080 | rtx src1_reg = copy_addr_to_reg (XEXP (src1, 0)); | |
20081 | src1 = replace_equiv_address (src1, src1_reg); | |
20082 | } | |
20083 | set_mem_size (src1, cmp_bytes); | |
20084 | ||
20085 | if (!REG_P (XEXP (src2, 0))) | |
20086 | { | |
20087 | rtx src2_reg = copy_addr_to_reg (XEXP (src2, 0)); | |
20088 | src2 = replace_equiv_address (src2, src2_reg); | |
20089 | } | |
20090 | set_mem_size (src2, cmp_bytes); | |
20091 | ||
20092 | do_load_for_compare (tmp_reg_src1, src1, load_mode); | |
20093 | do_load_for_compare (tmp_reg_src2, src2, load_mode); | |
20094 | ||
20095 | if (cmp_bytes < load_mode_size) | |
20096 | { | |
20097 | /* Shift unneeded bytes off. */ | |
20098 | rtx sh = GEN_INT (BITS_PER_UNIT * (load_mode_size - cmp_bytes)); | |
20099 | if (word_mode == DImode) | |
20100 | { | |
20101 | emit_insn (gen_lshrdi3 (tmp_reg_src1, tmp_reg_src1, sh)); | |
20102 | emit_insn (gen_lshrdi3 (tmp_reg_src2, tmp_reg_src2, sh)); | |
20103 | } | |
20104 | else | |
20105 | { | |
20106 | emit_insn (gen_lshrsi3 (tmp_reg_src1, tmp_reg_src1, sh)); | |
20107 | emit_insn (gen_lshrsi3 (tmp_reg_src2, tmp_reg_src2, sh)); | |
20108 | } | |
20109 | } | |
20110 | ||
20111 | int remain = bytes - cmp_bytes; | |
20112 | if (GET_MODE_SIZE (GET_MODE (target)) > GET_MODE_SIZE (load_mode)) | |
20113 | { | |
20114 | /* Target is larger than load size so we don't need to | |
20115 | reduce result size. */ | |
20116 | ||
20117 | /* We previously did a block that need 64->32 conversion but | |
20118 | the current block does not, so a label is needed to jump | |
20119 | to the end. */ | |
20120 | if (generate_6432_conversion && !final_label) | |
20121 | final_label = gen_label_rtx (); | |
20122 | ||
20123 | if (remain > 0) | |
20124 | { | |
20125 | /* This is not the last block, branch to the end if the result | |
20126 | of this subtract is not zero. */ | |
20127 | if (!final_label) | |
20128 | final_label = gen_label_rtx (); | |
20129 | rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_label); | |
20130 | rtx tmp = gen_rtx_MINUS (word_mode, tmp_reg_src1, tmp_reg_src2); | |
20131 | rtx cr = gen_reg_rtx (CCmode); | |
20132 | rs6000_emit_dot_insn (tmp_reg_src2, tmp, 2, cr); | |
20133 | emit_insn (gen_movsi (target, | |
20134 | gen_lowpart (SImode, tmp_reg_src2))); | |
20135 | rtx ne_rtx = gen_rtx_NE (VOIDmode, cr, const0_rtx); | |
20136 | rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx, | |
20137 | fin_ref, pc_rtx); | |
20138 | rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse)); | |
20139 | JUMP_LABEL (j) = final_label; | |
20140 | LABEL_NUSES (final_label) += 1; | |
20141 | } | |
20142 | else | |
20143 | { | |
20144 | if (word_mode == DImode) | |
20145 | { | |
20146 | emit_insn (gen_subdi3 (tmp_reg_src2, tmp_reg_src1, | |
20147 | tmp_reg_src2)); | |
20148 | emit_insn (gen_movsi (target, | |
20149 | gen_lowpart (SImode, tmp_reg_src2))); | |
20150 | } | |
20151 | else | |
20152 | emit_insn (gen_subsi3 (target, tmp_reg_src1, tmp_reg_src2)); | |
20153 | ||
20154 | if (final_label) | |
20155 | { | |
20156 | rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_label); | |
20157 | rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, fin_ref)); | |
20158 | JUMP_LABEL(j) = final_label; | |
20159 | LABEL_NUSES (final_label) += 1; | |
20160 | emit_barrier (); | |
20161 | } | |
20162 | } | |
20163 | } | |
20164 | else | |
20165 | { | |
20166 | /* Do we need a 64->32 conversion block? We need the 64->32 | |
20167 | conversion even if target size == load_mode size because | |
20168 | the subtract generates one extra bit. */ | |
20169 | generate_6432_conversion = true; | |
20170 | ||
20171 | if (remain > 0) | |
20172 | { | |
20173 | if (!convert_label) | |
20174 | convert_label = gen_label_rtx (); | |
20175 | ||
20176 | /* Compare to zero and branch to convert_label if not zero. */ | |
20177 | rtx cvt_ref = gen_rtx_LABEL_REF (VOIDmode, convert_label); | |
20178 | if (TARGET_P9_MISC) | |
20179 | { | |
20180 | /* Generate a compare, and convert with a setb later. */ | |
20181 | rtx cmp = gen_rtx_COMPARE (CCUNSmode, tmp_reg_src1, | |
20182 | tmp_reg_src2); | |
20183 | emit_insn (gen_rtx_SET (cond, cmp)); | |
20184 | } | |
20185 | else | |
20186 | /* Generate a subfc. and use the longer | |
20187 | sequence for conversion. */ | |
20188 | if (TARGET_64BIT) | |
20189 | emit_insn (gen_subfdi3_carry_dot2 (tmp_reg_src2, tmp_reg_src2, | |
20190 | tmp_reg_src1, cond)); | |
20191 | else | |
20192 | emit_insn (gen_subfsi3_carry_dot2 (tmp_reg_src2, tmp_reg_src2, | |
20193 | tmp_reg_src1, cond)); | |
20194 | rtx ne_rtx = gen_rtx_NE (VOIDmode, cond, const0_rtx); | |
20195 | rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx, | |
20196 | cvt_ref, pc_rtx); | |
20197 | rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse)); | |
20198 | JUMP_LABEL(j) = convert_label; | |
20199 | LABEL_NUSES (convert_label) += 1; | |
20200 | } | |
20201 | else | |
20202 | { | |
20203 | /* Just do the subtract/compare. Since this is the last block | |
20204 | the convert code will be generated immediately following. */ | |
20205 | if (TARGET_P9_MISC) | |
20206 | { | |
20207 | rtx cmp = gen_rtx_COMPARE (CCUNSmode, tmp_reg_src1, | |
20208 | tmp_reg_src2); | |
20209 | emit_insn (gen_rtx_SET (cond, cmp)); | |
20210 | } | |
20211 | else | |
20212 | if (TARGET_64BIT) | |
20213 | emit_insn (gen_subfdi3_carry (tmp_reg_src2, tmp_reg_src2, | |
20214 | tmp_reg_src1)); | |
20215 | else | |
20216 | emit_insn (gen_subfsi3_carry (tmp_reg_src2, tmp_reg_src2, | |
20217 | tmp_reg_src1)); | |
20218 | } | |
20219 | } | |
20220 | ||
20221 | offset += cmp_bytes; | |
20222 | bytes -= cmp_bytes; | |
20223 | } | |
20224 | ||
20225 | if (generate_6432_conversion) | |
20226 | { | |
20227 | if (convert_label) | |
20228 | emit_label (convert_label); | |
20229 | ||
20230 | /* We need to produce DI result from sub, then convert to target SI | |
20231 | while maintaining <0 / ==0 / >0 properties. This sequence works: | |
20232 | subfc L,A,B | |
20233 | subfe H,H,H | |
20234 | popcntd L,L | |
20235 | rldimi L,H,6,0 | |
20236 | ||
20237 | This is an alternate one Segher cooked up if somebody | |
20238 | wants to expand this for something that doesn't have popcntd: | |
20239 | subfc L,a,b | |
20240 | subfe H,x,x | |
20241 | addic t,L,-1 | |
20242 | subfe v,t,L | |
20243 | or z,v,H | |
20244 | ||
20245 | And finally, p9 can just do this: | |
20246 | cmpld A,B | |
20247 | setb r */ | |
20248 | ||
20249 | if (TARGET_P9_MISC) | |
20250 | { | |
20251 | emit_insn (gen_setb_unsigned (target, cond)); | |
20252 | } | |
20253 | else | |
20254 | { | |
20255 | if (TARGET_64BIT) | |
20256 | { | |
20257 | rtx tmp_reg_ca = gen_reg_rtx (DImode); | |
20258 | emit_insn (gen_subfdi3_carry_in_xx (tmp_reg_ca)); | |
20259 | emit_insn (gen_popcntddi2 (tmp_reg_src2, tmp_reg_src2)); | |
20260 | emit_insn (gen_iordi3 (tmp_reg_src2, tmp_reg_src2, tmp_reg_ca)); | |
20261 | emit_insn (gen_movsi (target, gen_lowpart (SImode, tmp_reg_src2))); | |
20262 | } | |
20263 | else | |
20264 | { | |
20265 | rtx tmp_reg_ca = gen_reg_rtx (SImode); | |
20266 | emit_insn (gen_subfsi3_carry_in_xx (tmp_reg_ca)); | |
20267 | emit_insn (gen_popcntdsi2 (tmp_reg_src2, tmp_reg_src2)); | |
20268 | emit_insn (gen_iorsi3 (target, tmp_reg_src2, tmp_reg_ca)); | |
20269 | } | |
20270 | } | |
20271 | } | |
20272 | ||
20273 | if (final_label) | |
20274 | emit_label (final_label); | |
20275 | ||
20276 | gcc_assert (bytes == 0); | |
20277 | return true; | |
20278 | } | |
20279 | ||
20280 | /* Generate alignment check and branch code to set up for | |
20281 | strncmp when we don't have DI alignment. | |
20282 | STRNCMP_LABEL is the label to branch if there is a page crossing. | |
20283 | SRC is the string pointer to be examined. | |
20284 | BYTES is the max number of bytes to compare. */ | |
20285 | static void | |
20286 | expand_strncmp_align_check (rtx strncmp_label, rtx src, HOST_WIDE_INT bytes) | |
20287 | { | |
20288 | rtx lab_ref = gen_rtx_LABEL_REF (VOIDmode, strncmp_label); | |
20289 | rtx src_check = copy_addr_to_reg (XEXP (src, 0)); | |
20290 | if (GET_MODE (src_check) == SImode) | |
20291 | emit_insn (gen_andsi3 (src_check, src_check, GEN_INT (0xfff))); | |
20292 | else | |
20293 | emit_insn (gen_anddi3 (src_check, src_check, GEN_INT (0xfff))); | |
20294 | rtx cond = gen_reg_rtx (CCmode); | |
20295 | emit_move_insn (cond, gen_rtx_COMPARE (CCmode, src_check, | |
20296 | GEN_INT (4096 - bytes))); | |
20297 | ||
20298 | rtx cmp_rtx = gen_rtx_LT (VOIDmode, cond, const0_rtx); | |
20299 | ||
20300 | rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx, | |
20301 | pc_rtx, lab_ref); | |
20302 | rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse)); | |
20303 | JUMP_LABEL (j) = strncmp_label; | |
20304 | LABEL_NUSES (strncmp_label) += 1; | |
20305 | } | |
20306 | ||
20307 | /* Expand a string compare operation with length, and return | |
20308 | true if successful. Return false if we should let the | |
20309 | compiler generate normal code, probably a strncmp call. | |
20310 | ||
20311 | OPERANDS[0] is the target (result). | |
20312 | OPERANDS[1] is the first source. | |
20313 | OPERANDS[2] is the second source. | |
20314 | If NO_LENGTH is zero, then: | |
20315 | OPERANDS[3] is the length. | |
20316 | OPERANDS[4] is the alignment in bytes. | |
20317 | If NO_LENGTH is nonzero, then: | |
20318 | OPERANDS[3] is the alignment in bytes. */ | |
20319 | bool | |
20320 | expand_strn_compare (rtx operands[], int no_length) | |
20321 | { | |
20322 | rtx target = operands[0]; | |
20323 | rtx orig_src1 = operands[1]; | |
20324 | rtx orig_src2 = operands[2]; | |
20325 | rtx bytes_rtx, align_rtx; | |
20326 | if (no_length) | |
20327 | { | |
20328 | bytes_rtx = NULL; | |
20329 | align_rtx = operands[3]; | |
20330 | } | |
20331 | else | |
20332 | { | |
20333 | bytes_rtx = operands[3]; | |
20334 | align_rtx = operands[4]; | |
20335 | } | |
20336 | unsigned HOST_WIDE_INT cmp_bytes = 0; | |
20337 | rtx src1 = orig_src1; | |
20338 | rtx src2 = orig_src2; | |
20339 | ||
20340 | /* If we have a length, it must be constant. This simplifies things | |
20341 | a bit as we don't have to generate code to check if we've exceeded | |
20342 | the length. Later this could be expanded to handle this case. */ | |
20343 | if (!no_length && !CONST_INT_P (bytes_rtx)) | |
20344 | return false; | |
20345 | ||
20346 | /* This must be a fixed size alignment. */ | |
20347 | if (!CONST_INT_P (align_rtx)) | |
20348 | return false; | |
20349 | ||
20350 | unsigned int base_align = UINTVAL (align_rtx); | |
20351 | int align1 = MEM_ALIGN (orig_src1) / BITS_PER_UNIT; | |
20352 | int align2 = MEM_ALIGN (orig_src2) / BITS_PER_UNIT; | |
20353 | ||
e0bd6c9f RS |
20354 | /* rs6000_slow_unaligned_access -- don't do unaligned stuff. */ |
20355 | if (rs6000_slow_unaligned_access (word_mode, align1) | |
20356 | || rs6000_slow_unaligned_access (word_mode, align2)) | |
83349046 SB |
20357 | return false; |
20358 | ||
20359 | gcc_assert (GET_MODE (target) == SImode); | |
20360 | ||
20361 | /* If we have an LE target without ldbrx and word_mode is DImode, | |
20362 | then we must avoid using word_mode. */ | |
20363 | int word_mode_ok = !(!BYTES_BIG_ENDIAN && !TARGET_LDBRX | |
20364 | && word_mode == DImode); | |
20365 | ||
20366 | unsigned int word_mode_size = GET_MODE_SIZE (word_mode); | |
20367 | ||
20368 | unsigned HOST_WIDE_INT offset = 0; | |
20369 | unsigned HOST_WIDE_INT bytes; /* N from the strncmp args if available. */ | |
20370 | unsigned HOST_WIDE_INT compare_length; /* How much to compare inline. */ | |
20371 | if (no_length) | |
20372 | /* Use this as a standin to determine the mode to use. */ | |
20373 | bytes = rs6000_string_compare_inline_limit * word_mode_size; | |
20374 | else | |
20375 | bytes = UINTVAL (bytes_rtx); | |
20376 | ||
20377 | machine_mode load_mode = | |
20378 | select_block_compare_mode (offset, bytes, base_align, word_mode_ok); | |
20379 | unsigned int load_mode_size = GET_MODE_SIZE (load_mode); | |
20380 | compare_length = rs6000_string_compare_inline_limit * load_mode_size; | |
20381 | ||
20382 | /* If we have equality at the end of the last compare and we have not | |
20383 | found the end of the string, we need to call strcmp/strncmp to | |
20384 | compare the remainder. */ | |
20385 | bool equality_compare_rest = false; | |
20386 | ||
20387 | if (no_length) | |
20388 | { | |
20389 | bytes = compare_length; | |
20390 | equality_compare_rest = true; | |
20391 | } | |
20392 | else | |
20393 | { | |
20394 | if (bytes <= compare_length) | |
20395 | compare_length = bytes; | |
20396 | else | |
20397 | equality_compare_rest = true; | |
20398 | } | |
20399 | ||
20400 | rtx result_reg = gen_reg_rtx (word_mode); | |
20401 | rtx final_move_label = gen_label_rtx (); | |
20402 | rtx final_label = gen_label_rtx (); | |
20403 | rtx begin_compare_label = NULL; | |
20404 | ||
20405 | if (base_align < 8) | |
20406 | { | |
20407 | /* Generate code that checks distance to 4k boundary for this case. */ | |
20408 | begin_compare_label = gen_label_rtx (); | |
20409 | rtx strncmp_label = gen_label_rtx (); | |
20410 | rtx jmp; | |
20411 | ||
20412 | /* Strncmp for power8 in glibc does this: | |
20413 | rldicl r8,r3,0,52 | |
20414 | cmpldi cr7,r8,4096-16 | |
20415 | bgt cr7,L(pagecross) */ | |
20416 | ||
20417 | /* Make sure that the length we use for the alignment test and | |
20418 | the subsequent code generation are in agreement so we do not | |
20419 | go past the length we tested for a 4k boundary crossing. */ | |
20420 | unsigned HOST_WIDE_INT align_test = compare_length; | |
20421 | if (align_test < 8) | |
20422 | { | |
20423 | align_test = HOST_WIDE_INT_1U << ceil_log2 (align_test); | |
20424 | base_align = align_test; | |
20425 | } | |
20426 | else | |
20427 | { | |
20428 | align_test = ROUND_UP (align_test, 8); | |
20429 | base_align = 8; | |
20430 | } | |
20431 | ||
20432 | if (align1 < 8) | |
20433 | expand_strncmp_align_check (strncmp_label, src1, align_test); | |
20434 | if (align2 < 8) | |
20435 | expand_strncmp_align_check (strncmp_label, src2, align_test); | |
20436 | ||
20437 | /* Now generate the following sequence: | |
20438 | - branch to begin_compare | |
20439 | - strncmp_label | |
20440 | - call to strncmp | |
20441 | - branch to final_label | |
20442 | - begin_compare_label */ | |
20443 | ||
20444 | rtx cmp_ref = gen_rtx_LABEL_REF (VOIDmode, begin_compare_label); | |
20445 | jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, cmp_ref)); | |
20446 | JUMP_LABEL (jmp) = begin_compare_label; | |
20447 | LABEL_NUSES (begin_compare_label) += 1; | |
20448 | emit_barrier (); | |
20449 | ||
20450 | emit_label (strncmp_label); | |
20451 | ||
20452 | if (!REG_P (XEXP (src1, 0))) | |
20453 | { | |
20454 | rtx src1_reg = copy_addr_to_reg (XEXP (src1, 0)); | |
20455 | src1 = replace_equiv_address (src1, src1_reg); | |
20456 | } | |
20457 | ||
20458 | if (!REG_P (XEXP (src2, 0))) | |
20459 | { | |
20460 | rtx src2_reg = copy_addr_to_reg (XEXP (src2, 0)); | |
20461 | src2 = replace_equiv_address (src2, src2_reg); | |
20462 | } | |
20463 | ||
20464 | if (no_length) | |
20465 | { | |
20466 | tree fun = builtin_decl_explicit (BUILT_IN_STRCMP); | |
20467 | emit_library_call_value (XEXP (DECL_RTL (fun), 0), | |
db69559b | 20468 | target, LCT_NORMAL, GET_MODE (target), |
83349046 SB |
20469 | force_reg (Pmode, XEXP (src1, 0)), Pmode, |
20470 | force_reg (Pmode, XEXP (src2, 0)), Pmode); | |
20471 | } | |
20472 | else | |
20473 | { | |
20474 | /* -m32 -mpowerpc64 results in word_mode being DImode even | |
20475 | though otherwise it is 32-bit. The length arg to strncmp | |
20476 | is a size_t which will be the same size as pointers. */ | |
20477 | rtx len_rtx; | |
20478 | if (TARGET_64BIT) | |
20479 | len_rtx = gen_reg_rtx (DImode); | |
20480 | else | |
20481 | len_rtx = gen_reg_rtx (SImode); | |
20482 | ||
20483 | emit_move_insn (len_rtx, bytes_rtx); | |
20484 | ||
20485 | tree fun = builtin_decl_explicit (BUILT_IN_STRNCMP); | |
20486 | emit_library_call_value (XEXP (DECL_RTL (fun), 0), | |
db69559b | 20487 | target, LCT_NORMAL, GET_MODE (target), |
83349046 SB |
20488 | force_reg (Pmode, XEXP (src1, 0)), Pmode, |
20489 | force_reg (Pmode, XEXP (src2, 0)), Pmode, | |
20490 | len_rtx, GET_MODE (len_rtx)); | |
20491 | } | |
20492 | ||
20493 | rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_label); | |
20494 | jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, fin_ref)); | |
20495 | JUMP_LABEL (jmp) = final_label; | |
20496 | LABEL_NUSES (final_label) += 1; | |
20497 | emit_barrier (); | |
20498 | emit_label (begin_compare_label); | |
20499 | } | |
20500 | ||
20501 | rtx cleanup_label = NULL; | |
20502 | rtx tmp_reg_src1 = gen_reg_rtx (word_mode); | |
20503 | rtx tmp_reg_src2 = gen_reg_rtx (word_mode); | |
20504 | ||
20505 | /* Generate sequence of ld/ldbrx, cmpb to compare out | |
20506 | to the length specified. */ | |
20507 | unsigned HOST_WIDE_INT bytes_to_compare = compare_length; | |
20508 | while (bytes_to_compare > 0) | |
20509 | { | |
20510 | /* Compare sequence: | |
20511 | check each 8B with: ld/ld cmpd bne | |
20512 | If equal, use rldicr/cmpb to check for zero byte. | |
20513 | cleanup code at end: | |
20514 | cmpb get byte that differs | |
20515 | cmpb look for zero byte | |
20516 | orc combine | |
20517 | cntlzd get bit of first zero/diff byte | |
20518 | subfic convert for rldcl use | |
20519 | rldcl rldcl extract diff/zero byte | |
20520 | subf subtract for final result | |
20521 | ||
20522 | The last compare can branch around the cleanup code if the | |
20523 | result is zero because the strings are exactly equal. */ | |
20524 | unsigned int align = compute_current_alignment (base_align, offset); | |
20525 | if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED) | |
20526 | load_mode = select_block_compare_mode (offset, bytes_to_compare, align, | |
20527 | word_mode_ok); | |
20528 | else | |
20529 | load_mode = select_block_compare_mode (0, bytes_to_compare, align, | |
20530 | word_mode_ok); | |
20531 | load_mode_size = GET_MODE_SIZE (load_mode); | |
20532 | if (bytes_to_compare >= load_mode_size) | |
20533 | cmp_bytes = load_mode_size; | |
20534 | else if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED) | |
20535 | { | |
20536 | /* Move this load back so it doesn't go past the end. | |
20537 | P8/P9 can do this efficiently. */ | |
20538 | unsigned int extra_bytes = load_mode_size - bytes_to_compare; | |
20539 | cmp_bytes = bytes_to_compare; | |
20540 | if (extra_bytes < offset) | |
20541 | { | |
20542 | offset -= extra_bytes; | |
20543 | cmp_bytes = load_mode_size; | |
20544 | bytes_to_compare = cmp_bytes; | |
20545 | } | |
20546 | } | |
20547 | else | |
20548 | /* P7 and earlier can't do the overlapping load trick fast, | |
20549 | so this forces a non-overlapping load and a shift to get | |
20550 | rid of the extra bytes. */ | |
20551 | cmp_bytes = bytes_to_compare; | |
20552 | ||
20553 | src1 = adjust_address (orig_src1, load_mode, offset); | |
20554 | src2 = adjust_address (orig_src2, load_mode, offset); | |
20555 | ||
20556 | if (!REG_P (XEXP (src1, 0))) | |
20557 | { | |
20558 | rtx src1_reg = copy_addr_to_reg (XEXP (src1, 0)); | |
20559 | src1 = replace_equiv_address (src1, src1_reg); | |
20560 | } | |
20561 | set_mem_size (src1, cmp_bytes); | |
20562 | ||
20563 | if (!REG_P (XEXP (src2, 0))) | |
20564 | { | |
20565 | rtx src2_reg = copy_addr_to_reg (XEXP (src2, 0)); | |
20566 | src2 = replace_equiv_address (src2, src2_reg); | |
20567 | } | |
20568 | set_mem_size (src2, cmp_bytes); | |
20569 | ||
20570 | do_load_for_compare (tmp_reg_src1, src1, load_mode); | |
20571 | do_load_for_compare (tmp_reg_src2, src2, load_mode); | |
20572 | ||
20573 | /* We must always left-align the data we read, and | |
20574 | clear any bytes to the right that are beyond the string. | |
20575 | Otherwise the cmpb sequence won't produce the correct | |
20576 | results. The beginning of the compare will be done | |
20577 | with word_mode so will not have any extra shifts or | |
20578 | clear rights. */ | |
20579 | ||
20580 | if (load_mode_size < word_mode_size) | |
20581 | { | |
20582 | /* Rotate left first. */ | |
20583 | rtx sh = GEN_INT (BITS_PER_UNIT * (word_mode_size - load_mode_size)); | |
20584 | if (word_mode == DImode) | |
20585 | { | |
20586 | emit_insn (gen_rotldi3 (tmp_reg_src1, tmp_reg_src1, sh)); | |
20587 | emit_insn (gen_rotldi3 (tmp_reg_src2, tmp_reg_src2, sh)); | |
20588 | } | |
20589 | else | |
20590 | { | |
20591 | emit_insn (gen_rotlsi3 (tmp_reg_src1, tmp_reg_src1, sh)); | |
20592 | emit_insn (gen_rotlsi3 (tmp_reg_src2, tmp_reg_src2, sh)); | |
20593 | } | |
20594 | } | |
20595 | ||
20596 | if (cmp_bytes < word_mode_size) | |
20597 | { | |
20598 | /* Now clear right. This plus the rotate can be | |
20599 | turned into a rldicr instruction. */ | |
20600 | HOST_WIDE_INT mb = BITS_PER_UNIT * (word_mode_size - cmp_bytes); | |
20601 | rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb); | |
20602 | if (word_mode == DImode) | |
20603 | { | |
20604 | emit_insn (gen_anddi3_mask (tmp_reg_src1, tmp_reg_src1, mask)); | |
20605 | emit_insn (gen_anddi3_mask (tmp_reg_src2, tmp_reg_src2, mask)); | |
20606 | } | |
20607 | else | |
20608 | { | |
20609 | emit_insn (gen_andsi3_mask (tmp_reg_src1, tmp_reg_src1, mask)); | |
20610 | emit_insn (gen_andsi3_mask (tmp_reg_src2, tmp_reg_src2, mask)); | |
20611 | } | |
20612 | } | |
20613 | ||
20614 | /* Cases to handle. A and B are chunks of the two strings. | |
20615 | 1: Not end of comparison: | |
20616 | A != B: branch to cleanup code to compute result. | |
20617 | A == B: check for 0 byte, next block if not found. | |
20618 | 2: End of the inline comparison: | |
20619 | A != B: branch to cleanup code to compute result. | |
20620 | A == B: check for 0 byte, call strcmp/strncmp | |
20621 | 3: compared requested N bytes: | |
20622 | A == B: branch to result 0. | |
20623 | A != B: cleanup code to compute result. */ | |
20624 | ||
20625 | unsigned HOST_WIDE_INT remain = bytes_to_compare - cmp_bytes; | |
20626 | ||
20627 | rtx dst_label; | |
20628 | if (remain > 0 || equality_compare_rest) | |
20629 | { | |
20630 | /* Branch to cleanup code, otherwise fall through to do | |
20631 | more compares. */ | |
20632 | if (!cleanup_label) | |
20633 | cleanup_label = gen_label_rtx (); | |
20634 | dst_label = cleanup_label; | |
20635 | } | |
20636 | else | |
20637 | /* Branch to end and produce result of 0. */ | |
20638 | dst_label = final_move_label; | |
20639 | ||
20640 | rtx lab_ref = gen_rtx_LABEL_REF (VOIDmode, dst_label); | |
20641 | rtx cond = gen_reg_rtx (CCmode); | |
20642 | ||
20643 | /* Always produce the 0 result, it is needed if | |
20644 | cmpb finds a 0 byte in this chunk. */ | |
20645 | rtx tmp = gen_rtx_MINUS (word_mode, tmp_reg_src1, tmp_reg_src2); | |
20646 | rs6000_emit_dot_insn (result_reg, tmp, 1, cond); | |
20647 | ||
20648 | rtx cmp_rtx; | |
20649 | if (remain == 0 && !equality_compare_rest) | |
20650 | cmp_rtx = gen_rtx_EQ (VOIDmode, cond, const0_rtx); | |
20651 | else | |
20652 | cmp_rtx = gen_rtx_NE (VOIDmode, cond, const0_rtx); | |
20653 | ||
20654 | rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx, | |
20655 | lab_ref, pc_rtx); | |
20656 | rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse)); | |
20657 | JUMP_LABEL (j) = dst_label; | |
20658 | LABEL_NUSES (dst_label) += 1; | |
20659 | ||
20660 | if (remain > 0 || equality_compare_rest) | |
20661 | { | |
20662 | /* Generate a cmpb to test for a 0 byte and branch | |
20663 | to final result if found. */ | |
20664 | rtx cmpb_zero = gen_reg_rtx (word_mode); | |
20665 | rtx lab_ref_fin = gen_rtx_LABEL_REF (VOIDmode, final_move_label); | |
20666 | rtx condz = gen_reg_rtx (CCmode); | |
20667 | rtx zero_reg = gen_reg_rtx (word_mode); | |
20668 | if (word_mode == SImode) | |
20669 | { | |
20670 | emit_insn (gen_movsi (zero_reg, GEN_INT (0))); | |
20671 | emit_insn (gen_cmpbsi3 (cmpb_zero, tmp_reg_src1, zero_reg)); | |
20672 | if (cmp_bytes < word_mode_size) | |
20673 | { | |
20674 | /* Don't want to look at zero bytes past end. */ | |
20675 | HOST_WIDE_INT mb = | |
20676 | BITS_PER_UNIT * (word_mode_size - cmp_bytes); | |
20677 | rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb); | |
20678 | emit_insn (gen_andsi3_mask (cmpb_zero, cmpb_zero, mask)); | |
20679 | } | |
20680 | } | |
20681 | else | |
20682 | { | |
20683 | emit_insn (gen_movdi (zero_reg, GEN_INT (0))); | |
20684 | emit_insn (gen_cmpbdi3 (cmpb_zero, tmp_reg_src1, zero_reg)); | |
20685 | if (cmp_bytes < word_mode_size) | |
20686 | { | |
20687 | /* Don't want to look at zero bytes past end. */ | |
20688 | HOST_WIDE_INT mb = | |
20689 | BITS_PER_UNIT * (word_mode_size - cmp_bytes); | |
20690 | rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb); | |
20691 | emit_insn (gen_anddi3_mask (cmpb_zero, cmpb_zero, mask)); | |
20692 | } | |
20693 | } | |
20694 | ||
20695 | emit_move_insn (condz, gen_rtx_COMPARE (CCmode, cmpb_zero, zero_reg)); | |
20696 | rtx cmpnz_rtx = gen_rtx_NE (VOIDmode, condz, const0_rtx); | |
20697 | rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmpnz_rtx, | |
20698 | lab_ref_fin, pc_rtx); | |
20699 | rtx j2 = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse)); | |
20700 | JUMP_LABEL (j2) = final_move_label; | |
20701 | LABEL_NUSES (final_move_label) += 1; | |
20702 | ||
20703 | } | |
20704 | ||
20705 | offset += cmp_bytes; | |
20706 | bytes_to_compare -= cmp_bytes; | |
20707 | } | |
20708 | ||
20709 | if (equality_compare_rest) | |
20710 | { | |
20711 | /* Update pointers past what has been compared already. */ | |
20712 | src1 = adjust_address (orig_src1, load_mode, offset); | |
20713 | src2 = adjust_address (orig_src2, load_mode, offset); | |
20714 | ||
20715 | if (!REG_P (XEXP (src1, 0))) | |
20716 | { | |
20717 | rtx src1_reg = copy_addr_to_reg (XEXP (src1, 0)); | |
20718 | src1 = replace_equiv_address (src1, src1_reg); | |
20719 | } | |
20720 | set_mem_size (src1, cmp_bytes); | |
20721 | ||
20722 | if (!REG_P (XEXP (src2, 0))) | |
20723 | { | |
20724 | rtx src2_reg = copy_addr_to_reg (XEXP (src2, 0)); | |
20725 | src2 = replace_equiv_address (src2, src2_reg); | |
20726 | } | |
20727 | set_mem_size (src2, cmp_bytes); | |
20728 | ||
20729 | /* Construct call to strcmp/strncmp to compare the rest of the string. */ | |
20730 | if (no_length) | |
20731 | { | |
20732 | tree fun = builtin_decl_explicit (BUILT_IN_STRCMP); | |
20733 | emit_library_call_value (XEXP (DECL_RTL (fun), 0), | |
db69559b | 20734 | target, LCT_NORMAL, GET_MODE (target), |
83349046 SB |
20735 | force_reg (Pmode, XEXP (src1, 0)), Pmode, |
20736 | force_reg (Pmode, XEXP (src2, 0)), Pmode); | |
20737 | } | |
20738 | else | |
20739 | { | |
20740 | rtx len_rtx; | |
20741 | if (TARGET_64BIT) | |
20742 | len_rtx = gen_reg_rtx (DImode); | |
20743 | else | |
20744 | len_rtx = gen_reg_rtx (SImode); | |
20745 | ||
20746 | emit_move_insn (len_rtx, GEN_INT (bytes - compare_length)); | |
20747 | tree fun = builtin_decl_explicit (BUILT_IN_STRNCMP); | |
20748 | emit_library_call_value (XEXP (DECL_RTL (fun), 0), | |
db69559b | 20749 | target, LCT_NORMAL, GET_MODE (target), |
83349046 SB |
20750 | force_reg (Pmode, XEXP (src1, 0)), Pmode, |
20751 | force_reg (Pmode, XEXP (src2, 0)), Pmode, | |
20752 | len_rtx, GET_MODE (len_rtx)); | |
20753 | } | |
20754 | ||
20755 | rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_label); | |
20756 | rtx jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, fin_ref)); | |
20757 | JUMP_LABEL (jmp) = final_label; | |
20758 | LABEL_NUSES (final_label) += 1; | |
20759 | emit_barrier (); | |
20760 | } | |
20761 | ||
20762 | if (cleanup_label) | |
20763 | emit_label (cleanup_label); | |
20764 | ||
20765 | /* Generate the final sequence that identifies the differing | |
20766 | byte and generates the final result, taking into account | |
20767 | zero bytes: | |
20768 | ||
20769 | cmpb cmpb_result1, src1, src2 | |
20770 | cmpb cmpb_result2, src1, zero | |
20771 | orc cmpb_result1, cmp_result1, cmpb_result2 | |
20772 | cntlzd get bit of first zero/diff byte | |
20773 | addi convert for rldcl use | |
20774 | rldcl rldcl extract diff/zero byte | |
20775 | subf subtract for final result | |
20776 | */ | |
20777 | ||
20778 | rtx cmpb_diff = gen_reg_rtx (word_mode); | |
20779 | rtx cmpb_zero = gen_reg_rtx (word_mode); | |
20780 | rtx rot_amt = gen_reg_rtx (word_mode); | |
20781 | rtx zero_reg = gen_reg_rtx (word_mode); | |
20782 | ||
20783 | rtx rot1_1 = gen_reg_rtx (word_mode); | |
20784 | rtx rot1_2 = gen_reg_rtx (word_mode); | |
20785 | rtx rot2_1 = gen_reg_rtx (word_mode); | |
20786 | rtx rot2_2 = gen_reg_rtx (word_mode); | |
20787 | ||
20788 | if (word_mode == SImode) | |
20789 | { | |
20790 | emit_insn (gen_cmpbsi3 (cmpb_diff, tmp_reg_src1, tmp_reg_src2)); | |
20791 | emit_insn (gen_movsi (zero_reg, GEN_INT (0))); | |
20792 | emit_insn (gen_cmpbsi3 (cmpb_zero, tmp_reg_src1, zero_reg)); | |
20793 | emit_insn (gen_one_cmplsi2 (cmpb_diff,cmpb_diff)); | |
20794 | emit_insn (gen_iorsi3 (cmpb_diff, cmpb_diff, cmpb_zero)); | |
20795 | emit_insn (gen_clzsi2 (rot_amt, cmpb_diff)); | |
20796 | emit_insn (gen_addsi3 (rot_amt, rot_amt, GEN_INT (8))); | |
20797 | emit_insn (gen_rotlsi3 (rot1_1, tmp_reg_src1, | |
20798 | gen_lowpart (SImode, rot_amt))); | |
20799 | emit_insn (gen_andsi3_mask (rot1_2, rot1_1, GEN_INT (0xff))); | |
20800 | emit_insn (gen_rotlsi3 (rot2_1, tmp_reg_src2, | |
20801 | gen_lowpart (SImode, rot_amt))); | |
20802 | emit_insn (gen_andsi3_mask (rot2_2, rot2_1, GEN_INT (0xff))); | |
20803 | emit_insn (gen_subsi3 (result_reg, rot1_2, rot2_2)); | |
20804 | } | |
20805 | else | |
20806 | { | |
20807 | emit_insn (gen_cmpbdi3 (cmpb_diff, tmp_reg_src1, tmp_reg_src2)); | |
20808 | emit_insn (gen_movdi (zero_reg, GEN_INT (0))); | |
20809 | emit_insn (gen_cmpbdi3 (cmpb_zero, tmp_reg_src1, zero_reg)); | |
20810 | emit_insn (gen_one_cmpldi2 (cmpb_diff,cmpb_diff)); | |
20811 | emit_insn (gen_iordi3 (cmpb_diff, cmpb_diff, cmpb_zero)); | |
20812 | emit_insn (gen_clzdi2 (rot_amt, cmpb_diff)); | |
20813 | emit_insn (gen_adddi3 (rot_amt, rot_amt, GEN_INT (8))); | |
20814 | emit_insn (gen_rotldi3 (rot1_1, tmp_reg_src1, | |
20815 | gen_lowpart (SImode, rot_amt))); | |
20816 | emit_insn (gen_anddi3_mask (rot1_2, rot1_1, GEN_INT (0xff))); | |
20817 | emit_insn (gen_rotldi3 (rot2_1, tmp_reg_src2, | |
20818 | gen_lowpart (SImode, rot_amt))); | |
20819 | emit_insn (gen_anddi3_mask (rot2_2, rot2_1, GEN_INT (0xff))); | |
20820 | emit_insn (gen_subdi3 (result_reg, rot1_2, rot2_2)); | |
20821 | } | |
20822 | ||
20823 | emit_label (final_move_label); | |
20824 | emit_insn (gen_movsi (target, | |
20825 | gen_lowpart (SImode, result_reg))); | |
20826 | emit_label (final_label); | |
20827 | return true; | |
20828 | } | |
20829 | ||
20830 | /* Expand a block move operation, and return 1 if successful. Return 0 | |
20831 | if we should let the compiler generate normal code. | |
20832 | ||
20833 | operands[0] is the destination | |
20834 | operands[1] is the source | |
20835 | operands[2] is the length | |
20836 | operands[3] is the alignment */ | |
20837 | ||
20838 | #define MAX_MOVE_REG 4 | |
20839 | ||
20840 | int | |
20841 | expand_block_move (rtx operands[]) | |
20842 | { | |
20843 | rtx orig_dest = operands[0]; | |
20844 | rtx orig_src = operands[1]; | |
20845 | rtx bytes_rtx = operands[2]; | |
20846 | rtx align_rtx = operands[3]; | |
20847 | int constp = (GET_CODE (bytes_rtx) == CONST_INT); | |
20848 | int align; | |
20849 | int bytes; | |
20850 | int offset; | |
20851 | int move_bytes; | |
20852 | rtx stores[MAX_MOVE_REG]; | |
20853 | int num_reg = 0; | |
20854 | ||
20855 | /* If this is not a fixed size move, just call memcpy */ | |
20856 | if (! constp) | |
20857 | return 0; | |
20858 | ||
20859 | /* This must be a fixed size alignment */ | |
20860 | gcc_assert (GET_CODE (align_rtx) == CONST_INT); | |
20861 | align = INTVAL (align_rtx) * BITS_PER_UNIT; | |
20862 | ||
20863 | /* Anything to move? */ | |
20864 | bytes = INTVAL (bytes_rtx); | |
20865 | if (bytes <= 0) | |
20866 | return 1; | |
20867 | ||
20868 | if (bytes > rs6000_block_move_inline_limit) | |
20869 | return 0; | |
20870 | ||
20871 | for (offset = 0; bytes > 0; offset += move_bytes, bytes -= move_bytes) | |
20872 | { | |
20873 | union { | |
20874 | rtx (*movmemsi) (rtx, rtx, rtx, rtx); | |
20875 | rtx (*mov) (rtx, rtx); | |
20876 | } gen_func; | |
20877 | machine_mode mode = BLKmode; | |
20878 | rtx src, dest; | |
20879 | ||
20880 | /* Altivec first, since it will be faster than a string move | |
20881 | when it applies, and usually not significantly larger. */ | |
20882 | if (TARGET_ALTIVEC && bytes >= 16 && align >= 128) | |
20883 | { | |
20884 | move_bytes = 16; | |
20885 | mode = V4SImode; | |
20886 | gen_func.mov = gen_movv4si; | |
20887 | } | |
20888 | else if (TARGET_SPE && bytes >= 8 && align >= 64) | |
20889 | { | |
20890 | move_bytes = 8; | |
20891 | mode = V2SImode; | |
20892 | gen_func.mov = gen_movv2si; | |
20893 | } | |
20894 | else if (TARGET_STRING | |
20895 | && bytes > 24 /* move up to 32 bytes at a time */ | |
20896 | && ! fixed_regs[5] | |
20897 | && ! fixed_regs[6] | |
20898 | && ! fixed_regs[7] | |
20899 | && ! fixed_regs[8] | |
20900 | && ! fixed_regs[9] | |
20901 | && ! fixed_regs[10] | |
20902 | && ! fixed_regs[11] | |
20903 | && ! fixed_regs[12]) | |
20904 | { | |
20905 | move_bytes = (bytes > 32) ? 32 : bytes; | |
20906 | gen_func.movmemsi = gen_movmemsi_8reg; | |
20907 | } | |
20908 | else if (TARGET_STRING | |
20909 | && bytes > 16 /* move up to 24 bytes at a time */ | |
20910 | && ! fixed_regs[5] | |
20911 | && ! fixed_regs[6] | |
20912 | && ! fixed_regs[7] | |
20913 | && ! fixed_regs[8] | |
20914 | && ! fixed_regs[9] | |
20915 | && ! fixed_regs[10]) | |
20916 | { | |
20917 | move_bytes = (bytes > 24) ? 24 : bytes; | |
20918 | gen_func.movmemsi = gen_movmemsi_6reg; | |
20919 | } | |
20920 | else if (TARGET_STRING | |
20921 | && bytes > 8 /* move up to 16 bytes at a time */ | |
20922 | && ! fixed_regs[5] | |
20923 | && ! fixed_regs[6] | |
20924 | && ! fixed_regs[7] | |
20925 | && ! fixed_regs[8]) | |
20926 | { | |
20927 | move_bytes = (bytes > 16) ? 16 : bytes; | |
20928 | gen_func.movmemsi = gen_movmemsi_4reg; | |
20929 | } | |
20930 | else if (bytes >= 8 && TARGET_POWERPC64 | |
20931 | && (align >= 64 || !STRICT_ALIGNMENT)) | |
20932 | { | |
20933 | move_bytes = 8; | |
20934 | mode = DImode; | |
20935 | gen_func.mov = gen_movdi; | |
20936 | if (offset == 0 && align < 64) | |
20937 | { | |
20938 | rtx addr; | |
20939 | ||
20940 | /* If the address form is reg+offset with offset not a | |
20941 | multiple of four, reload into reg indirect form here | |
20942 | rather than waiting for reload. This way we get one | |
20943 | reload, not one per load and/or store. */ | |
20944 | addr = XEXP (orig_dest, 0); | |
20945 | if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM) | |
20946 | && GET_CODE (XEXP (addr, 1)) == CONST_INT | |
20947 | && (INTVAL (XEXP (addr, 1)) & 3) != 0) | |
20948 | { | |
20949 | addr = copy_addr_to_reg (addr); | |
20950 | orig_dest = replace_equiv_address (orig_dest, addr); | |
20951 | } | |
20952 | addr = XEXP (orig_src, 0); | |
20953 | if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM) | |
20954 | && GET_CODE (XEXP (addr, 1)) == CONST_INT | |
20955 | && (INTVAL (XEXP (addr, 1)) & 3) != 0) | |
20956 | { | |
20957 | addr = copy_addr_to_reg (addr); | |
20958 | orig_src = replace_equiv_address (orig_src, addr); | |
20959 | } | |
20960 | } | |
20961 | } | |
20962 | else if (TARGET_STRING && bytes > 4 && !TARGET_POWERPC64) | |
20963 | { /* move up to 8 bytes at a time */ | |
20964 | move_bytes = (bytes > 8) ? 8 : bytes; | |
20965 | gen_func.movmemsi = gen_movmemsi_2reg; | |
20966 | } | |
20967 | else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT)) | |
20968 | { /* move 4 bytes */ | |
20969 | move_bytes = 4; | |
20970 | mode = SImode; | |
20971 | gen_func.mov = gen_movsi; | |
20972 | } | |
20973 | else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT)) | |
20974 | { /* move 2 bytes */ | |
20975 | move_bytes = 2; | |
20976 | mode = HImode; | |
20977 | gen_func.mov = gen_movhi; | |
20978 | } | |
20979 | else if (TARGET_STRING && bytes > 1) | |
20980 | { /* move up to 4 bytes at a time */ | |
20981 | move_bytes = (bytes > 4) ? 4 : bytes; | |
20982 | gen_func.movmemsi = gen_movmemsi_1reg; | |
20983 | } | |
20984 | else /* move 1 byte at a time */ | |
20985 | { | |
20986 | move_bytes = 1; | |
20987 | mode = QImode; | |
20988 | gen_func.mov = gen_movqi; | |
20989 | } | |
20990 | ||
20991 | src = adjust_address (orig_src, mode, offset); | |
20992 | dest = adjust_address (orig_dest, mode, offset); | |
20993 | ||
20994 | if (mode != BLKmode) | |
20995 | { | |
20996 | rtx tmp_reg = gen_reg_rtx (mode); | |
20997 | ||
20998 | emit_insn ((*gen_func.mov) (tmp_reg, src)); | |
20999 | stores[num_reg++] = (*gen_func.mov) (dest, tmp_reg); | |
21000 | } | |
21001 | ||
21002 | if (mode == BLKmode || num_reg >= MAX_MOVE_REG || bytes == move_bytes) | |
21003 | { | |
21004 | int i; | |
21005 | for (i = 0; i < num_reg; i++) | |
21006 | emit_insn (stores[i]); | |
21007 | num_reg = 0; | |
21008 | } | |
21009 | ||
21010 | if (mode == BLKmode) | |
21011 | { | |
21012 | /* Move the address into scratch registers. The movmemsi | |
21013 | patterns require zero offset. */ | |
21014 | if (!REG_P (XEXP (src, 0))) | |
21015 | { | |
21016 | rtx src_reg = copy_addr_to_reg (XEXP (src, 0)); | |
21017 | src = replace_equiv_address (src, src_reg); | |
21018 | } | |
21019 | set_mem_size (src, move_bytes); | |
21020 | ||
21021 | if (!REG_P (XEXP (dest, 0))) | |
21022 | { | |
21023 | rtx dest_reg = copy_addr_to_reg (XEXP (dest, 0)); | |
21024 | dest = replace_equiv_address (dest, dest_reg); | |
21025 | } | |
21026 | set_mem_size (dest, move_bytes); | |
21027 | ||
21028 | emit_insn ((*gen_func.movmemsi) (dest, src, | |
21029 | GEN_INT (move_bytes & 31), | |
21030 | align_rtx)); | |
21031 | } | |
21032 | } | |
21033 | ||
21034 | return 1; | |
21035 | } | |
21036 | ||
21037 | \f | |
21038 | /* Return a string to perform a load_multiple operation. | |
21039 | operands[0] is the vector. | |
21040 | operands[1] is the source address. | |
21041 | operands[2] is the first destination register. */ | |
21042 | ||
21043 | const char * | |
21044 | rs6000_output_load_multiple (rtx operands[3]) | |
21045 | { | |
21046 | /* We have to handle the case where the pseudo used to contain the address | |
21047 | is assigned to one of the output registers. */ | |
21048 | int i, j; | |
21049 | int words = XVECLEN (operands[0], 0); | |
21050 | rtx xop[10]; | |
21051 | ||
21052 | if (XVECLEN (operands[0], 0) == 1) | |
21053 | return "lwz %2,0(%1)"; | |
21054 | ||
21055 | for (i = 0; i < words; i++) | |
21056 | if (refers_to_regno_p (REGNO (operands[2]) + i, operands[1])) | |
21057 | { | |
21058 | if (i == words-1) | |
21059 | { | |
21060 | xop[0] = GEN_INT (4 * (words-1)); | |
21061 | xop[1] = operands[1]; | |
21062 | xop[2] = operands[2]; | |
21063 | output_asm_insn ("lswi %2,%1,%0\n\tlwz %1,%0(%1)", xop); | |
21064 | return ""; | |
21065 | } | |
21066 | else if (i == 0) | |
21067 | { | |
21068 | xop[0] = GEN_INT (4 * (words-1)); | |
21069 | xop[1] = operands[1]; | |
21070 | xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + 1); | |
21071 | output_asm_insn ("addi %1,%1,4\n\tlswi %2,%1,%0\n\tlwz %1,-4(%1)", xop); | |
21072 | return ""; | |
21073 | } | |
21074 | else | |
21075 | { | |
21076 | for (j = 0; j < words; j++) | |
21077 | if (j != i) | |
21078 | { | |
21079 | xop[0] = GEN_INT (j * 4); | |
21080 | xop[1] = operands[1]; | |
21081 | xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + j); | |
21082 | output_asm_insn ("lwz %2,%0(%1)", xop); | |
21083 | } | |
21084 | xop[0] = GEN_INT (i * 4); | |
21085 | xop[1] = operands[1]; | |
21086 | output_asm_insn ("lwz %1,%0(%1)", xop); | |
21087 | return ""; | |
21088 | } | |
21089 | } | |
21090 | ||
21091 | return "lswi %2,%1,%N0"; | |
21092 | } | |
21093 | ||
21094 | \f | |
21095 | /* A validation routine: say whether CODE, a condition code, and MODE | |
21096 | match. The other alternatives either don't make sense or should | |
21097 | never be generated. */ | |
21098 | ||
21099 | void | |
21100 | validate_condition_mode (enum rtx_code code, machine_mode mode) | |
21101 | { | |
21102 | gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE | |
21103 | || GET_RTX_CLASS (code) == RTX_COMM_COMPARE) | |
21104 | && GET_MODE_CLASS (mode) == MODE_CC); | |
21105 | ||
21106 | /* These don't make sense. */ | |
21107 | gcc_assert ((code != GT && code != LT && code != GE && code != LE) | |
21108 | || mode != CCUNSmode); | |
21109 | ||
21110 | gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU) | |
21111 | || mode == CCUNSmode); | |
21112 | ||
21113 | gcc_assert (mode == CCFPmode | |
21114 | || (code != ORDERED && code != UNORDERED | |
21115 | && code != UNEQ && code != LTGT | |
21116 | && code != UNGT && code != UNLT | |
21117 | && code != UNGE && code != UNLE)); | |
21118 | ||
21119 | /* These should never be generated except for | |
21120 | flag_finite_math_only. */ | |
21121 | gcc_assert (mode != CCFPmode | |
21122 | || flag_finite_math_only | |
21123 | || (code != LE && code != GE | |
21124 | && code != UNEQ && code != LTGT | |
21125 | && code != UNGT && code != UNLT)); | |
21126 | ||
21127 | /* These are invalid; the information is not there. */ | |
21128 | gcc_assert (mode != CCEQmode || code == EQ || code == NE); | |
21129 | } | |
21130 | ||
21131 | \f | |
21132 | /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, | |
21133 | rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is | |
21134 | not zero, store there the bit offset (counted from the right) where | |
21135 | the single stretch of 1 bits begins; and similarly for B, the bit | |
21136 | offset where it ends. */ | |
21137 | ||
21138 | bool | |
21139 | rs6000_is_valid_mask (rtx mask, int *b, int *e, machine_mode mode) | |
21140 | { | |
21141 | unsigned HOST_WIDE_INT val = INTVAL (mask); | |
21142 | unsigned HOST_WIDE_INT bit; | |
21143 | int nb, ne; | |
21144 | int n = GET_MODE_PRECISION (mode); | |
21145 | ||
21146 | if (mode != DImode && mode != SImode) | |
21147 | return false; | |
21148 | ||
21149 | if (INTVAL (mask) >= 0) | |
21150 | { | |
21151 | bit = val & -val; | |
21152 | ne = exact_log2 (bit); | |
21153 | nb = exact_log2 (val + bit); | |
21154 | } | |
21155 | else if (val + 1 == 0) | |
21156 | { | |
21157 | nb = n; | |
21158 | ne = 0; | |
21159 | } | |
21160 | else if (val & 1) | |
21161 | { | |
21162 | val = ~val; | |
21163 | bit = val & -val; | |
21164 | nb = exact_log2 (bit); | |
21165 | ne = exact_log2 (val + bit); | |
21166 | } | |
21167 | else | |
21168 | { | |
21169 | bit = val & -val; | |
21170 | ne = exact_log2 (bit); | |
21171 | if (val + bit == 0) | |
21172 | nb = n; | |
21173 | else | |
21174 | nb = 0; | |
21175 | } | |
21176 | ||
21177 | nb--; | |
21178 | ||
21179 | if (nb < 0 || ne < 0 || nb >= n || ne >= n) | |
21180 | return false; | |
21181 | ||
21182 | if (b) | |
21183 | *b = nb; | |
21184 | if (e) | |
21185 | *e = ne; | |
21186 | ||
21187 | return true; | |
21188 | } | |
21189 | ||
21190 | /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl, | |
21191 | or rldicr instruction, to implement an AND with it in mode MODE. */ | |
21192 | ||
21193 | bool | |
21194 | rs6000_is_valid_and_mask (rtx mask, machine_mode mode) | |
21195 | { | |
21196 | int nb, ne; | |
21197 | ||
21198 | if (!rs6000_is_valid_mask (mask, &nb, &ne, mode)) | |
21199 | return false; | |
21200 | ||
21201 | /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that | |
21202 | does not wrap. */ | |
21203 | if (mode == DImode) | |
21204 | return (ne == 0 || nb == 63 || (nb < 32 && ne <= nb)); | |
21205 | ||
21206 | /* For SImode, rlwinm can do everything. */ | |
21207 | if (mode == SImode) | |
21208 | return (nb < 32 && ne < 32); | |
21209 | ||
21210 | return false; | |
21211 | } | |
21212 | ||
21213 | /* Return the instruction template for an AND with mask in mode MODE, with | |
21214 | operands OPERANDS. If DOT is true, make it a record-form instruction. */ | |
21215 | ||
21216 | const char * | |
21217 | rs6000_insn_for_and_mask (machine_mode mode, rtx *operands, bool dot) | |
21218 | { | |
21219 | int nb, ne; | |
21220 | ||
21221 | if (!rs6000_is_valid_mask (operands[2], &nb, &ne, mode)) | |
21222 | gcc_unreachable (); | |
21223 | ||
21224 | if (mode == DImode && ne == 0) | |
21225 | { | |
21226 | operands[3] = GEN_INT (63 - nb); | |
21227 | if (dot) | |
21228 | return "rldicl. %0,%1,0,%3"; | |
21229 | return "rldicl %0,%1,0,%3"; | |
21230 | } | |
21231 | ||
21232 | if (mode == DImode && nb == 63) | |
21233 | { | |
21234 | operands[3] = GEN_INT (63 - ne); | |
21235 | if (dot) | |
21236 | return "rldicr. %0,%1,0,%3"; | |
21237 | return "rldicr %0,%1,0,%3"; | |
21238 | } | |
21239 | ||
21240 | if (nb < 32 && ne < 32) | |
21241 | { | |
21242 | operands[3] = GEN_INT (31 - nb); | |
21243 | operands[4] = GEN_INT (31 - ne); | |
21244 | if (dot) | |
21245 | return "rlwinm. %0,%1,0,%3,%4"; | |
21246 | return "rlwinm %0,%1,0,%3,%4"; | |
21247 | } | |
21248 | ||
21249 | gcc_unreachable (); | |
21250 | } | |
21251 | ||
21252 | /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm, | |
21253 | rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with | |
21254 | shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */ | |
21255 | ||
21256 | bool | |
21257 | rs6000_is_valid_shift_mask (rtx mask, rtx shift, machine_mode mode) | |
21258 | { | |
21259 | int nb, ne; | |
21260 | ||
21261 | if (!rs6000_is_valid_mask (mask, &nb, &ne, mode)) | |
21262 | return false; | |
21263 | ||
21264 | int n = GET_MODE_PRECISION (mode); | |
21265 | int sh = -1; | |
21266 | ||
21267 | if (CONST_INT_P (XEXP (shift, 1))) | |
21268 | { | |
21269 | sh = INTVAL (XEXP (shift, 1)); | |
21270 | if (sh < 0 || sh >= n) | |
21271 | return false; | |
21272 | } | |
21273 | ||
21274 | rtx_code code = GET_CODE (shift); | |
21275 | ||
21276 | /* Convert any shift by 0 to a rotate, to simplify below code. */ | |
21277 | if (sh == 0) | |
21278 | code = ROTATE; | |
21279 | ||
21280 | /* Convert rotate to simple shift if we can, to make analysis simpler. */ | |
21281 | if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh) | |
21282 | code = ASHIFT; | |
21283 | if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh) | |
21284 | { | |
21285 | code = LSHIFTRT; | |
21286 | sh = n - sh; | |
21287 | } | |
21288 | ||
21289 | /* DImode rotates need rld*. */ | |
21290 | if (mode == DImode && code == ROTATE) | |
21291 | return (nb == 63 || ne == 0 || ne == sh); | |
21292 | ||
21293 | /* SImode rotates need rlw*. */ | |
21294 | if (mode == SImode && code == ROTATE) | |
21295 | return (nb < 32 && ne < 32 && sh < 32); | |
21296 | ||
21297 | /* Wrap-around masks are only okay for rotates. */ | |
21298 | if (ne > nb) | |
21299 | return false; | |
21300 | ||
21301 | /* Variable shifts are only okay for rotates. */ | |
21302 | if (sh < 0) | |
21303 | return false; | |
21304 | ||
21305 | /* Don't allow ASHIFT if the mask is wrong for that. */ | |
21306 | if (code == ASHIFT && ne < sh) | |
21307 | return false; | |
21308 | ||
21309 | /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT | |
21310 | if the mask is wrong for that. */ | |
21311 | if (nb < 32 && ne < 32 && sh < 32 | |
21312 | && !(code == LSHIFTRT && nb >= 32 - sh)) | |
21313 | return true; | |
21314 | ||
21315 | /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT | |
21316 | if the mask is wrong for that. */ | |
21317 | if (code == LSHIFTRT) | |
21318 | sh = 64 - sh; | |
21319 | if (nb == 63 || ne == 0 || ne == sh) | |
21320 | return !(code == LSHIFTRT && nb >= sh); | |
21321 | ||
21322 | return false; | |
21323 | } | |
21324 | ||
21325 | /* Return the instruction template for a shift with mask in mode MODE, with | |
21326 | operands OPERANDS. If DOT is true, make it a record-form instruction. */ | |
21327 | ||
21328 | const char * | |
21329 | rs6000_insn_for_shift_mask (machine_mode mode, rtx *operands, bool dot) | |
21330 | { | |
21331 | int nb, ne; | |
21332 | ||
21333 | if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode)) | |
21334 | gcc_unreachable (); | |
21335 | ||
21336 | if (mode == DImode && ne == 0) | |
21337 | { | |
21338 | if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2])) | |
21339 | operands[2] = GEN_INT (64 - INTVAL (operands[2])); | |
21340 | operands[3] = GEN_INT (63 - nb); | |
21341 | if (dot) | |
21342 | return "rld%I2cl. %0,%1,%2,%3"; | |
21343 | return "rld%I2cl %0,%1,%2,%3"; | |
21344 | } | |
21345 | ||
21346 | if (mode == DImode && nb == 63) | |
21347 | { | |
21348 | operands[3] = GEN_INT (63 - ne); | |
21349 | if (dot) | |
21350 | return "rld%I2cr. %0,%1,%2,%3"; | |
21351 | return "rld%I2cr %0,%1,%2,%3"; | |
21352 | } | |
21353 | ||
21354 | if (mode == DImode | |
21355 | && GET_CODE (operands[4]) != LSHIFTRT | |
21356 | && CONST_INT_P (operands[2]) | |
21357 | && ne == INTVAL (operands[2])) | |
21358 | { | |
21359 | operands[3] = GEN_INT (63 - nb); | |
21360 | if (dot) | |
21361 | return "rld%I2c. %0,%1,%2,%3"; | |
21362 | return "rld%I2c %0,%1,%2,%3"; | |
21363 | } | |
21364 | ||
21365 | if (nb < 32 && ne < 32) | |
21366 | { | |
21367 | if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2])) | |
21368 | operands[2] = GEN_INT (32 - INTVAL (operands[2])); | |
21369 | operands[3] = GEN_INT (31 - nb); | |
21370 | operands[4] = GEN_INT (31 - ne); | |
21371 | /* This insn can also be a 64-bit rotate with mask that really makes | |
21372 | it just a shift right (with mask); the %h below are to adjust for | |
21373 | that situation (shift count is >= 32 in that case). */ | |
21374 | if (dot) | |
21375 | return "rlw%I2nm. %0,%1,%h2,%3,%4"; | |
21376 | return "rlw%I2nm %0,%1,%h2,%3,%4"; | |
21377 | } | |
21378 | ||
21379 | gcc_unreachable (); | |
21380 | } | |
21381 | ||
21382 | /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or | |
21383 | rldimi instruction, to implement an insert with shift SHIFT (a ROTATE, | |
21384 | ASHIFT, or LSHIFTRT) in mode MODE. */ | |
21385 | ||
21386 | bool | |
21387 | rs6000_is_valid_insert_mask (rtx mask, rtx shift, machine_mode mode) | |
21388 | { | |
21389 | int nb, ne; | |
21390 | ||
21391 | if (!rs6000_is_valid_mask (mask, &nb, &ne, mode)) | |
21392 | return false; | |
21393 | ||
21394 | int n = GET_MODE_PRECISION (mode); | |
21395 | ||
21396 | int sh = INTVAL (XEXP (shift, 1)); | |
21397 | if (sh < 0 || sh >= n) | |
21398 | return false; | |
21399 | ||
21400 | rtx_code code = GET_CODE (shift); | |
21401 | ||
21402 | /* Convert any shift by 0 to a rotate, to simplify below code. */ | |
21403 | if (sh == 0) | |
21404 | code = ROTATE; | |
21405 | ||
21406 | /* Convert rotate to simple shift if we can, to make analysis simpler. */ | |
21407 | if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh) | |
21408 | code = ASHIFT; | |
21409 | if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh) | |
21410 | { | |
21411 | code = LSHIFTRT; | |
21412 | sh = n - sh; | |
21413 | } | |
21414 | ||
21415 | /* DImode rotates need rldimi. */ | |
21416 | if (mode == DImode && code == ROTATE) | |
21417 | return (ne == sh); | |
21418 | ||
21419 | /* SImode rotates need rlwimi. */ | |
21420 | if (mode == SImode && code == ROTATE) | |
21421 | return (nb < 32 && ne < 32 && sh < 32); | |
21422 | ||
21423 | /* Wrap-around masks are only okay for rotates. */ | |
21424 | if (ne > nb) | |
21425 | return false; | |
21426 | ||
21427 | /* Don't allow ASHIFT if the mask is wrong for that. */ | |
21428 | if (code == ASHIFT && ne < sh) | |
21429 | return false; | |
21430 | ||
21431 | /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT | |
21432 | if the mask is wrong for that. */ | |
21433 | if (nb < 32 && ne < 32 && sh < 32 | |
21434 | && !(code == LSHIFTRT && nb >= 32 - sh)) | |
21435 | return true; | |
21436 | ||
21437 | /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT | |
21438 | if the mask is wrong for that. */ | |
21439 | if (code == LSHIFTRT) | |
21440 | sh = 64 - sh; | |
21441 | if (ne == sh) | |
21442 | return !(code == LSHIFTRT && nb >= sh); | |
21443 | ||
21444 | return false; | |
21445 | } | |
21446 | ||
21447 | /* Return the instruction template for an insert with mask in mode MODE, with | |
21448 | operands OPERANDS. If DOT is true, make it a record-form instruction. */ | |
21449 | ||
21450 | const char * | |
21451 | rs6000_insn_for_insert_mask (machine_mode mode, rtx *operands, bool dot) | |
21452 | { | |
21453 | int nb, ne; | |
21454 | ||
21455 | if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode)) | |
21456 | gcc_unreachable (); | |
21457 | ||
21458 | /* Prefer rldimi because rlwimi is cracked. */ | |
21459 | if (TARGET_POWERPC64 | |
21460 | && (!dot || mode == DImode) | |
21461 | && GET_CODE (operands[4]) != LSHIFTRT | |
21462 | && ne == INTVAL (operands[2])) | |
21463 | { | |
21464 | operands[3] = GEN_INT (63 - nb); | |
21465 | if (dot) | |
21466 | return "rldimi. %0,%1,%2,%3"; | |
21467 | return "rldimi %0,%1,%2,%3"; | |
21468 | } | |
21469 | ||
21470 | if (nb < 32 && ne < 32) | |
21471 | { | |
21472 | if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2])) | |
21473 | operands[2] = GEN_INT (32 - INTVAL (operands[2])); | |
21474 | operands[3] = GEN_INT (31 - nb); | |
21475 | operands[4] = GEN_INT (31 - ne); | |
21476 | if (dot) | |
21477 | return "rlwimi. %0,%1,%2,%3,%4"; | |
21478 | return "rlwimi %0,%1,%2,%3,%4"; | |
21479 | } | |
21480 | ||
21481 | gcc_unreachable (); | |
21482 | } | |
21483 | ||
21484 | /* Return whether an AND with C (a CONST_INT) in mode MODE can be done | |
21485 | using two machine instructions. */ | |
21486 | ||
21487 | bool | |
21488 | rs6000_is_valid_2insn_and (rtx c, machine_mode mode) | |
21489 | { | |
21490 | /* There are two kinds of AND we can handle with two insns: | |
21491 | 1) those we can do with two rl* insn; | |
21492 | 2) ori[s];xori[s]. | |
21493 | ||
21494 | We do not handle that last case yet. */ | |
21495 | ||
21496 | /* If there is just one stretch of ones, we can do it. */ | |
21497 | if (rs6000_is_valid_mask (c, NULL, NULL, mode)) | |
21498 | return true; | |
21499 | ||
21500 | /* Otherwise, fill in the lowest "hole"; if we can do the result with | |
21501 | one insn, we can do the whole thing with two. */ | |
21502 | unsigned HOST_WIDE_INT val = INTVAL (c); | |
21503 | unsigned HOST_WIDE_INT bit1 = val & -val; | |
21504 | unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val; | |
21505 | unsigned HOST_WIDE_INT val1 = (val + bit1) & val; | |
21506 | unsigned HOST_WIDE_INT bit3 = val1 & -val1; | |
21507 | return rs6000_is_valid_and_mask (GEN_INT (val + bit3 - bit2), mode); | |
21508 | } | |
21509 | ||
21510 | /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS. | |
21511 | If EXPAND is true, split rotate-and-mask instructions we generate to | |
21512 | their constituent parts as well (this is used during expand); if DOT | |
21513 | is 1, make the last insn a record-form instruction clobbering the | |
21514 | destination GPR and setting the CC reg (from operands[3]); if 2, set | |
21515 | that GPR as well as the CC reg. */ | |
21516 | ||
21517 | void | |
21518 | rs6000_emit_2insn_and (machine_mode mode, rtx *operands, bool expand, int dot) | |
21519 | { | |
21520 | gcc_assert (!(expand && dot)); | |
21521 | ||
21522 | unsigned HOST_WIDE_INT val = INTVAL (operands[2]); | |
21523 | ||
21524 | /* If it is one stretch of ones, it is DImode; shift left, mask, then | |
21525 | shift right. This generates better code than doing the masks without | |
21526 | shifts, or shifting first right and then left. */ | |
21527 | int nb, ne; | |
21528 | if (rs6000_is_valid_mask (operands[2], &nb, &ne, mode) && nb >= ne) | |
21529 | { | |
21530 | gcc_assert (mode == DImode); | |
21531 | ||
21532 | int shift = 63 - nb; | |
21533 | if (expand) | |
21534 | { | |
21535 | rtx tmp1 = gen_reg_rtx (DImode); | |
21536 | rtx tmp2 = gen_reg_rtx (DImode); | |
21537 | emit_insn (gen_ashldi3 (tmp1, operands[1], GEN_INT (shift))); | |
21538 | emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (val << shift))); | |
21539 | emit_insn (gen_lshrdi3 (operands[0], tmp2, GEN_INT (shift))); | |
21540 | } | |
21541 | else | |
21542 | { | |
21543 | rtx tmp = gen_rtx_ASHIFT (mode, operands[1], GEN_INT (shift)); | |
21544 | tmp = gen_rtx_AND (mode, tmp, GEN_INT (val << shift)); | |
21545 | emit_move_insn (operands[0], tmp); | |
21546 | tmp = gen_rtx_LSHIFTRT (mode, operands[0], GEN_INT (shift)); | |
21547 | rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0); | |
21548 | } | |
21549 | return; | |
21550 | } | |
21551 | ||
21552 | /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1 | |
21553 | that does the rest. */ | |
21554 | unsigned HOST_WIDE_INT bit1 = val & -val; | |
21555 | unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val; | |
21556 | unsigned HOST_WIDE_INT val1 = (val + bit1) & val; | |
21557 | unsigned HOST_WIDE_INT bit3 = val1 & -val1; | |
21558 | ||
21559 | unsigned HOST_WIDE_INT mask1 = -bit3 + bit2 - 1; | |
21560 | unsigned HOST_WIDE_INT mask2 = val + bit3 - bit2; | |
21561 | ||
21562 | gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2), mode)); | |
21563 | ||
21564 | /* Two "no-rotate"-and-mask instructions, for SImode. */ | |
21565 | if (rs6000_is_valid_and_mask (GEN_INT (mask1), mode)) | |
21566 | { | |
21567 | gcc_assert (mode == SImode); | |
21568 | ||
21569 | rtx reg = expand ? gen_reg_rtx (mode) : operands[0]; | |
21570 | rtx tmp = gen_rtx_AND (mode, operands[1], GEN_INT (mask1)); | |
21571 | emit_move_insn (reg, tmp); | |
21572 | tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2)); | |
21573 | rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0); | |
21574 | return; | |
21575 | } | |
21576 | ||
21577 | gcc_assert (mode == DImode); | |
21578 | ||
21579 | /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm | |
21580 | insns; we have to do the first in SImode, because it wraps. */ | |
21581 | if (mask2 <= 0xffffffff | |
21582 | && rs6000_is_valid_and_mask (GEN_INT (mask1), SImode)) | |
21583 | { | |
21584 | rtx reg = expand ? gen_reg_rtx (mode) : operands[0]; | |
21585 | rtx tmp = gen_rtx_AND (SImode, gen_lowpart (SImode, operands[1]), | |
21586 | GEN_INT (mask1)); | |
21587 | rtx reg_low = gen_lowpart (SImode, reg); | |
21588 | emit_move_insn (reg_low, tmp); | |
21589 | tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2)); | |
21590 | rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0); | |
21591 | return; | |
21592 | } | |
21593 | ||
21594 | /* Two rld* insns: rotate, clear the hole in the middle (which now is | |
21595 | at the top end), rotate back and clear the other hole. */ | |
21596 | int right = exact_log2 (bit3); | |
21597 | int left = 64 - right; | |
21598 | ||
21599 | /* Rotate the mask too. */ | |
21600 | mask1 = (mask1 >> right) | ((bit2 - 1) << left); | |
21601 | ||
21602 | if (expand) | |
21603 | { | |
21604 | rtx tmp1 = gen_reg_rtx (DImode); | |
21605 | rtx tmp2 = gen_reg_rtx (DImode); | |
21606 | rtx tmp3 = gen_reg_rtx (DImode); | |
21607 | emit_insn (gen_rotldi3 (tmp1, operands[1], GEN_INT (left))); | |
21608 | emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (mask1))); | |
21609 | emit_insn (gen_rotldi3 (tmp3, tmp2, GEN_INT (right))); | |
21610 | emit_insn (gen_anddi3 (operands[0], tmp3, GEN_INT (mask2))); | |
21611 | } | |
21612 | else | |
21613 | { | |
21614 | rtx tmp = gen_rtx_ROTATE (mode, operands[1], GEN_INT (left)); | |
21615 | tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask1)); | |
21616 | emit_move_insn (operands[0], tmp); | |
21617 | tmp = gen_rtx_ROTATE (mode, operands[0], GEN_INT (right)); | |
21618 | tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask2)); | |
21619 | rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0); | |
21620 | } | |
21621 | } | |
21622 | \f | |
21623 | /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates | |
21624 | for lfq and stfq insns iff the registers are hard registers. */ | |
21625 | ||
21626 | int | |
21627 | registers_ok_for_quad_peep (rtx reg1, rtx reg2) | |
21628 | { | |
21629 | /* We might have been passed a SUBREG. */ | |
21630 | if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG) | |
21631 | return 0; | |
21632 | ||
21633 | /* We might have been passed non floating point registers. */ | |
21634 | if (!FP_REGNO_P (REGNO (reg1)) | |
21635 | || !FP_REGNO_P (REGNO (reg2))) | |
21636 | return 0; | |
21637 | ||
21638 | return (REGNO (reg1) == REGNO (reg2) - 1); | |
21639 | } | |
21640 | ||
21641 | /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn. | |
21642 | addr1 and addr2 must be in consecutive memory locations | |
21643 | (addr2 == addr1 + 8). */ | |
21644 | ||
21645 | int | |
21646 | mems_ok_for_quad_peep (rtx mem1, rtx mem2) | |
21647 | { | |
21648 | rtx addr1, addr2; | |
21649 | unsigned int reg1, reg2; | |
21650 | int offset1, offset2; | |
21651 | ||
21652 | /* The mems cannot be volatile. */ | |
21653 | if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2)) | |
21654 | return 0; | |
21655 | ||
21656 | addr1 = XEXP (mem1, 0); | |
21657 | addr2 = XEXP (mem2, 0); | |
21658 | ||
21659 | /* Extract an offset (if used) from the first addr. */ | |
21660 | if (GET_CODE (addr1) == PLUS) | |
21661 | { | |
21662 | /* If not a REG, return zero. */ | |
21663 | if (GET_CODE (XEXP (addr1, 0)) != REG) | |
21664 | return 0; | |
21665 | else | |
21666 | { | |
21667 | reg1 = REGNO (XEXP (addr1, 0)); | |
21668 | /* The offset must be constant! */ | |
21669 | if (GET_CODE (XEXP (addr1, 1)) != CONST_INT) | |
21670 | return 0; | |
21671 | offset1 = INTVAL (XEXP (addr1, 1)); | |
21672 | } | |
21673 | } | |
21674 | else if (GET_CODE (addr1) != REG) | |
21675 | return 0; | |
21676 | else | |
21677 | { | |
21678 | reg1 = REGNO (addr1); | |
21679 | /* This was a simple (mem (reg)) expression. Offset is 0. */ | |
21680 | offset1 = 0; | |
21681 | } | |
21682 | ||
21683 | /* And now for the second addr. */ | |
21684 | if (GET_CODE (addr2) == PLUS) | |
21685 | { | |
21686 | /* If not a REG, return zero. */ | |
21687 | if (GET_CODE (XEXP (addr2, 0)) != REG) | |
21688 | return 0; | |
21689 | else | |
21690 | { | |
21691 | reg2 = REGNO (XEXP (addr2, 0)); | |
21692 | /* The offset must be constant. */ | |
21693 | if (GET_CODE (XEXP (addr2, 1)) != CONST_INT) | |
21694 | return 0; | |
21695 | offset2 = INTVAL (XEXP (addr2, 1)); | |
21696 | } | |
21697 | } | |
21698 | else if (GET_CODE (addr2) != REG) | |
21699 | return 0; | |
21700 | else | |
21701 | { | |
21702 | reg2 = REGNO (addr2); | |
21703 | /* This was a simple (mem (reg)) expression. Offset is 0. */ | |
21704 | offset2 = 0; | |
21705 | } | |
21706 | ||
21707 | /* Both of these must have the same base register. */ | |
21708 | if (reg1 != reg2) | |
21709 | return 0; | |
21710 | ||
21711 | /* The offset for the second addr must be 8 more than the first addr. */ | |
21712 | if (offset2 != offset1 + 8) | |
21713 | return 0; | |
21714 | ||
21715 | /* All the tests passed. addr1 and addr2 are valid for lfq or stfq | |
21716 | instructions. */ | |
21717 | return 1; | |
21718 | } | |
21719 | \f | |
21720 | ||
21721 | rtx | |
21722 | rs6000_secondary_memory_needed_rtx (machine_mode mode) | |
21723 | { | |
21724 | static bool eliminated = false; | |
21725 | rtx ret; | |
21726 | ||
21727 | if (mode != SDmode || TARGET_NO_SDMODE_STACK) | |
21728 | ret = assign_stack_local (mode, GET_MODE_SIZE (mode), 0); | |
21729 | else | |
21730 | { | |
21731 | rtx mem = cfun->machine->sdmode_stack_slot; | |
21732 | gcc_assert (mem != NULL_RTX); | |
21733 | ||
21734 | if (!eliminated) | |
21735 | { | |
21736 | mem = eliminate_regs (mem, VOIDmode, NULL_RTX); | |
21737 | cfun->machine->sdmode_stack_slot = mem; | |
21738 | eliminated = true; | |
21739 | } | |
21740 | ret = mem; | |
21741 | } | |
21742 | ||
21743 | if (TARGET_DEBUG_ADDR) | |
21744 | { | |
21745 | fprintf (stderr, "\nrs6000_secondary_memory_needed_rtx, mode %s, rtx:\n", | |
21746 | GET_MODE_NAME (mode)); | |
21747 | if (!ret) | |
21748 | fprintf (stderr, "\tNULL_RTX\n"); | |
21749 | else | |
21750 | debug_rtx (ret); | |
21751 | } | |
21752 | ||
21753 | return ret; | |
21754 | } | |
21755 | ||
94e23f53 RS |
21756 | /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE. For SDmode values we |
21757 | need to use DDmode, in all other cases we can use the same mode. */ | |
21758 | static machine_mode | |
83349046 SB |
21759 | rs6000_secondary_memory_needed_mode (machine_mode mode) |
21760 | { | |
21761 | if (lra_in_progress && mode == SDmode) | |
21762 | return DDmode; | |
21763 | return mode; | |
21764 | } | |
21765 | ||
21766 | static tree | |
21767 | rs6000_check_sdmode (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED) | |
21768 | { | |
21769 | /* Don't walk into types. */ | |
21770 | if (*tp == NULL_TREE || *tp == error_mark_node || TYPE_P (*tp)) | |
21771 | { | |
21772 | *walk_subtrees = 0; | |
21773 | return NULL_TREE; | |
21774 | } | |
21775 | ||
21776 | switch (TREE_CODE (*tp)) | |
21777 | { | |
21778 | case VAR_DECL: | |
21779 | case PARM_DECL: | |
21780 | case FIELD_DECL: | |
21781 | case RESULT_DECL: | |
21782 | case SSA_NAME: | |
21783 | case REAL_CST: | |
21784 | case MEM_REF: | |
21785 | case VIEW_CONVERT_EXPR: | |
21786 | if (TYPE_MODE (TREE_TYPE (*tp)) == SDmode) | |
21787 | return *tp; | |
21788 | break; | |
21789 | default: | |
21790 | break; | |
21791 | } | |
21792 | ||
21793 | return NULL_TREE; | |
21794 | } | |
21795 | ||
21796 | /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work | |
21797 | on traditional floating point registers, and the VMRGOW/VMRGEW instructions | |
21798 | only work on the traditional altivec registers, note if an altivec register | |
21799 | was chosen. */ | |
21800 | ||
21801 | static enum rs6000_reg_type | |
21802 | register_to_reg_type (rtx reg, bool *is_altivec) | |
21803 | { | |
21804 | HOST_WIDE_INT regno; | |
21805 | enum reg_class rclass; | |
21806 | ||
21807 | if (GET_CODE (reg) == SUBREG) | |
21808 | reg = SUBREG_REG (reg); | |
21809 | ||
21810 | if (!REG_P (reg)) | |
21811 | return NO_REG_TYPE; | |
21812 | ||
21813 | regno = REGNO (reg); | |
21814 | if (regno >= FIRST_PSEUDO_REGISTER) | |
21815 | { | |
21816 | if (!lra_in_progress && !reload_in_progress && !reload_completed) | |
21817 | return PSEUDO_REG_TYPE; | |
21818 | ||
21819 | regno = true_regnum (reg); | |
21820 | if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER) | |
21821 | return PSEUDO_REG_TYPE; | |
21822 | } | |
21823 | ||
21824 | gcc_assert (regno >= 0); | |
21825 | ||
21826 | if (is_altivec && ALTIVEC_REGNO_P (regno)) | |
21827 | *is_altivec = true; | |
21828 | ||
21829 | rclass = rs6000_regno_regclass[regno]; | |
21830 | return reg_class_to_reg_type[(int)rclass]; | |
21831 | } | |
21832 | ||
21833 | /* Helper function to return the cost of adding a TOC entry address. */ | |
21834 | ||
21835 | static inline int | |
21836 | rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask) | |
21837 | { | |
21838 | int ret; | |
21839 | ||
21840 | if (TARGET_CMODEL != CMODEL_SMALL) | |
21841 | ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2; | |
21842 | ||
21843 | else | |
21844 | ret = (TARGET_MINIMAL_TOC) ? 6 : 3; | |
21845 | ||
21846 | return ret; | |
21847 | } | |
21848 | ||
21849 | /* Helper function for rs6000_secondary_reload to determine whether the memory | |
21850 | address (ADDR) with a given register class (RCLASS) and machine mode (MODE) | |
21851 | needs reloading. Return negative if the memory is not handled by the memory | |
21852 | helper functions and to try a different reload method, 0 if no additional | |
21853 | instructions are need, and positive to give the extra cost for the | |
21854 | memory. */ | |
21855 | ||
21856 | static int | |
21857 | rs6000_secondary_reload_memory (rtx addr, | |
21858 | enum reg_class rclass, | |
21859 | machine_mode mode) | |
21860 | { | |
21861 | int extra_cost = 0; | |
21862 | rtx reg, and_arg, plus_arg0, plus_arg1; | |
21863 | addr_mask_type addr_mask; | |
21864 | const char *type = NULL; | |
21865 | const char *fail_msg = NULL; | |
21866 | ||
21867 | if (GPR_REG_CLASS_P (rclass)) | |
21868 | addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR]; | |
21869 | ||
21870 | else if (rclass == FLOAT_REGS) | |
21871 | addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR]; | |
21872 | ||
21873 | else if (rclass == ALTIVEC_REGS) | |
21874 | addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX]; | |
21875 | ||
21876 | /* For the combined VSX_REGS, turn off Altivec AND -16. */ | |
21877 | else if (rclass == VSX_REGS) | |
21878 | addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX] | |
21879 | & ~RELOAD_REG_AND_M16); | |
21880 | ||
21881 | /* If the register allocator hasn't made up its mind yet on the register | |
21882 | class to use, settle on defaults to use. */ | |
21883 | else if (rclass == NO_REGS) | |
21884 | { | |
21885 | addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_ANY] | |
21886 | & ~RELOAD_REG_AND_M16); | |
21887 | ||
21888 | if ((addr_mask & RELOAD_REG_MULTIPLE) != 0) | |
21889 | addr_mask &= ~(RELOAD_REG_INDEXED | |
21890 | | RELOAD_REG_PRE_INCDEC | |
21891 | | RELOAD_REG_PRE_MODIFY); | |
21892 | } | |
21893 | ||
21894 | else | |
21895 | addr_mask = 0; | |
21896 | ||
21897 | /* If the register isn't valid in this register class, just return now. */ | |
21898 | if ((addr_mask & RELOAD_REG_VALID) == 0) | |
21899 | { | |
21900 | if (TARGET_DEBUG_ADDR) | |
21901 | { | |
21902 | fprintf (stderr, | |
21903 | "rs6000_secondary_reload_memory: mode = %s, class = %s, " | |
21904 | "not valid in class\n", | |
21905 | GET_MODE_NAME (mode), reg_class_names[rclass]); | |
21906 | debug_rtx (addr); | |
21907 | } | |
21908 | ||
21909 | return -1; | |
21910 | } | |
21911 | ||
21912 | switch (GET_CODE (addr)) | |
21913 | { | |
21914 | /* Does the register class supports auto update forms for this mode? We | |
21915 | don't need a scratch register, since the powerpc only supports | |
21916 | PRE_INC, PRE_DEC, and PRE_MODIFY. */ | |
21917 | case PRE_INC: | |
21918 | case PRE_DEC: | |
21919 | reg = XEXP (addr, 0); | |
21920 | if (!base_reg_operand (addr, GET_MODE (reg))) | |
21921 | { | |
21922 | fail_msg = "no base register #1"; | |
21923 | extra_cost = -1; | |
21924 | } | |
21925 | ||
21926 | else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0) | |
21927 | { | |
21928 | extra_cost = 1; | |
21929 | type = "update"; | |
21930 | } | |
21931 | break; | |
21932 | ||
21933 | case PRE_MODIFY: | |
21934 | reg = XEXP (addr, 0); | |
21935 | plus_arg1 = XEXP (addr, 1); | |
21936 | if (!base_reg_operand (reg, GET_MODE (reg)) | |
21937 | || GET_CODE (plus_arg1) != PLUS | |
21938 | || !rtx_equal_p (reg, XEXP (plus_arg1, 0))) | |
21939 | { | |
21940 | fail_msg = "bad PRE_MODIFY"; | |
21941 | extra_cost = -1; | |
21942 | } | |
21943 | ||
21944 | else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0) | |
21945 | { | |
21946 | extra_cost = 1; | |
21947 | type = "update"; | |
21948 | } | |
21949 | break; | |
21950 | ||
21951 | /* Do we need to simulate AND -16 to clear the bottom address bits used | |
21952 | in VMX load/stores? Only allow the AND for vector sizes. */ | |
21953 | case AND: | |
21954 | and_arg = XEXP (addr, 0); | |
21955 | if (GET_MODE_SIZE (mode) != 16 | |
21956 | || GET_CODE (XEXP (addr, 1)) != CONST_INT | |
21957 | || INTVAL (XEXP (addr, 1)) != -16) | |
21958 | { | |
21959 | fail_msg = "bad Altivec AND #1"; | |
21960 | extra_cost = -1; | |
21961 | } | |
21962 | ||
21963 | if (rclass != ALTIVEC_REGS) | |
21964 | { | |
21965 | if (legitimate_indirect_address_p (and_arg, false)) | |
21966 | extra_cost = 1; | |
21967 | ||
21968 | else if (legitimate_indexed_address_p (and_arg, false)) | |
21969 | extra_cost = 2; | |
21970 | ||
21971 | else | |
21972 | { | |
21973 | fail_msg = "bad Altivec AND #2"; | |
21974 | extra_cost = -1; | |
21975 | } | |
21976 | ||
21977 | type = "and"; | |
21978 | } | |
21979 | break; | |
21980 | ||
21981 | /* If this is an indirect address, make sure it is a base register. */ | |
21982 | case REG: | |
21983 | case SUBREG: | |
21984 | if (!legitimate_indirect_address_p (addr, false)) | |
21985 | { | |
21986 | extra_cost = 1; | |
21987 | type = "move"; | |
21988 | } | |
21989 | break; | |
21990 | ||
21991 | /* If this is an indexed address, make sure the register class can handle | |
21992 | indexed addresses for this mode. */ | |
21993 | case PLUS: | |
21994 | plus_arg0 = XEXP (addr, 0); | |
21995 | plus_arg1 = XEXP (addr, 1); | |
21996 | ||
21997 | /* (plus (plus (reg) (constant)) (constant)) is generated during | |
21998 | push_reload processing, so handle it now. */ | |
21999 | if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1)) | |
22000 | { | |
22001 | if ((addr_mask & RELOAD_REG_OFFSET) == 0) | |
22002 | { | |
22003 | extra_cost = 1; | |
22004 | type = "offset"; | |
22005 | } | |
22006 | } | |
22007 | ||
22008 | /* (plus (plus (reg) (constant)) (reg)) is also generated during | |
22009 | push_reload processing, so handle it now. */ | |
22010 | else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1)) | |
22011 | { | |
22012 | if ((addr_mask & RELOAD_REG_INDEXED) == 0) | |
22013 | { | |
22014 | extra_cost = 1; | |
22015 | type = "indexed #2"; | |
22016 | } | |
22017 | } | |
22018 | ||
22019 | else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0))) | |
22020 | { | |
22021 | fail_msg = "no base register #2"; | |
22022 | extra_cost = -1; | |
22023 | } | |
22024 | ||
22025 | else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1))) | |
22026 | { | |
22027 | if ((addr_mask & RELOAD_REG_INDEXED) == 0 | |
22028 | || !legitimate_indexed_address_p (addr, false)) | |
22029 | { | |
22030 | extra_cost = 1; | |
22031 | type = "indexed"; | |
22032 | } | |
22033 | } | |
22034 | ||
22035 | else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0 | |
22036 | && CONST_INT_P (plus_arg1)) | |
22037 | { | |
22038 | if (!quad_address_offset_p (INTVAL (plus_arg1))) | |
22039 | { | |
22040 | extra_cost = 1; | |
22041 | type = "vector d-form offset"; | |
22042 | } | |
22043 | } | |
22044 | ||
22045 | /* Make sure the register class can handle offset addresses. */ | |
22046 | else if (rs6000_legitimate_offset_address_p (mode, addr, false, true)) | |
22047 | { | |
22048 | if ((addr_mask & RELOAD_REG_OFFSET) == 0) | |
22049 | { | |
22050 | extra_cost = 1; | |
22051 | type = "offset #2"; | |
22052 | } | |
22053 | } | |
22054 | ||
22055 | else | |
22056 | { | |
22057 | fail_msg = "bad PLUS"; | |
22058 | extra_cost = -1; | |
22059 | } | |
22060 | ||
22061 | break; | |
22062 | ||
22063 | case LO_SUM: | |
22064 | /* Quad offsets are restricted and can't handle normal addresses. */ | |
22065 | if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0) | |
22066 | { | |
22067 | extra_cost = -1; | |
22068 | type = "vector d-form lo_sum"; | |
22069 | } | |
22070 | ||
22071 | else if (!legitimate_lo_sum_address_p (mode, addr, false)) | |
22072 | { | |
22073 | fail_msg = "bad LO_SUM"; | |
22074 | extra_cost = -1; | |
22075 | } | |
22076 | ||
22077 | if ((addr_mask & RELOAD_REG_OFFSET) == 0) | |
22078 | { | |
22079 | extra_cost = 1; | |
22080 | type = "lo_sum"; | |
22081 | } | |
22082 | break; | |
22083 | ||
22084 | /* Static addresses need to create a TOC entry. */ | |
22085 | case CONST: | |
22086 | case SYMBOL_REF: | |
22087 | case LABEL_REF: | |
22088 | if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0) | |
22089 | { | |
22090 | extra_cost = -1; | |
22091 | type = "vector d-form lo_sum #2"; | |
22092 | } | |
22093 | ||
22094 | else | |
22095 | { | |
22096 | type = "address"; | |
22097 | extra_cost = rs6000_secondary_reload_toc_costs (addr_mask); | |
22098 | } | |
22099 | break; | |
22100 | ||
22101 | /* TOC references look like offsetable memory. */ | |
22102 | case UNSPEC: | |
22103 | if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL) | |
22104 | { | |
22105 | fail_msg = "bad UNSPEC"; | |
22106 | extra_cost = -1; | |
22107 | } | |
22108 | ||
22109 | else if ((addr_mask & RELOAD_REG_QUAD_OFFSET) != 0) | |
22110 | { | |
22111 | extra_cost = -1; | |
22112 | type = "vector d-form lo_sum #3"; | |
22113 | } | |
22114 | ||
22115 | else if ((addr_mask & RELOAD_REG_OFFSET) == 0) | |
22116 | { | |
22117 | extra_cost = 1; | |
22118 | type = "toc reference"; | |
22119 | } | |
22120 | break; | |
22121 | ||
22122 | default: | |
22123 | { | |
22124 | fail_msg = "bad address"; | |
22125 | extra_cost = -1; | |
22126 | } | |
22127 | } | |
22128 | ||
22129 | if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */) | |
22130 | { | |
22131 | if (extra_cost < 0) | |
22132 | fprintf (stderr, | |
22133 | "rs6000_secondary_reload_memory error: mode = %s, " | |
22134 | "class = %s, addr_mask = '%s', %s\n", | |
22135 | GET_MODE_NAME (mode), | |
22136 | reg_class_names[rclass], | |
22137 | rs6000_debug_addr_mask (addr_mask, false), | |
22138 | (fail_msg != NULL) ? fail_msg : "<bad address>"); | |
22139 | ||
22140 | else | |
22141 | fprintf (stderr, | |
22142 | "rs6000_secondary_reload_memory: mode = %s, class = %s, " | |
22143 | "addr_mask = '%s', extra cost = %d, %s\n", | |
22144 | GET_MODE_NAME (mode), | |
22145 | reg_class_names[rclass], | |
22146 | rs6000_debug_addr_mask (addr_mask, false), | |
22147 | extra_cost, | |
22148 | (type) ? type : "<none>"); | |
22149 | ||
22150 | debug_rtx (addr); | |
22151 | } | |
22152 | ||
22153 | return extra_cost; | |
22154 | } | |
22155 | ||
22156 | /* Helper function for rs6000_secondary_reload to return true if a move to a | |
22157 | different register classe is really a simple move. */ | |
22158 | ||
22159 | static bool | |
22160 | rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type, | |
22161 | enum rs6000_reg_type from_type, | |
22162 | machine_mode mode) | |
22163 | { | |
22164 | int size = GET_MODE_SIZE (mode); | |
22165 | ||
22166 | /* Add support for various direct moves available. In this function, we only | |
22167 | look at cases where we don't need any extra registers, and one or more | |
22168 | simple move insns are issued. Originally small integers are not allowed | |
22169 | in FPR/VSX registers. Single precision binary floating is not a simple | |
22170 | move because we need to convert to the single precision memory layout. | |
22171 | The 4-byte SDmode can be moved. TDmode values are disallowed since they | |
22172 | need special direct move handling, which we do not support yet. */ | |
22173 | if (TARGET_DIRECT_MOVE | |
22174 | && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE) | |
22175 | || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE))) | |
22176 | { | |
22177 | if (TARGET_POWERPC64) | |
22178 | { | |
22179 | /* ISA 2.07: MTVSRD or MVFVSRD. */ | |
22180 | if (size == 8) | |
22181 | return true; | |
22182 | ||
22183 | /* ISA 3.0: MTVSRDD or MFVSRD + MFVSRLD. */ | |
22184 | if (size == 16 && TARGET_P9_VECTOR && mode != TDmode) | |
22185 | return true; | |
22186 | } | |
22187 | ||
22188 | /* ISA 2.07: MTVSRWZ or MFVSRWZ. */ | |
22189 | if (TARGET_VSX_SMALL_INTEGER) | |
22190 | { | |
22191 | if (mode == SImode) | |
22192 | return true; | |
22193 | ||
22194 | if (TARGET_P9_VECTOR && (mode == HImode || mode == QImode)) | |
22195 | return true; | |
22196 | } | |
22197 | ||
22198 | /* ISA 2.07: MTVSRWZ or MFVSRWZ. */ | |
22199 | if (mode == SDmode) | |
22200 | return true; | |
22201 | } | |
22202 | ||
22203 | /* Power6+: MFTGPR or MFFGPR. */ | |
22204 | else if (TARGET_MFPGPR && TARGET_POWERPC64 && size == 8 | |
22205 | && ((to_type == GPR_REG_TYPE && from_type == FPR_REG_TYPE) | |
22206 | || (to_type == FPR_REG_TYPE && from_type == GPR_REG_TYPE))) | |
22207 | return true; | |
22208 | ||
22209 | /* Move to/from SPR. */ | |
22210 | else if ((size == 4 || (TARGET_POWERPC64 && size == 8)) | |
22211 | && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE) | |
22212 | || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE))) | |
22213 | return true; | |
22214 | ||
22215 | return false; | |
22216 | } | |
22217 | ||
22218 | /* Direct move helper function for rs6000_secondary_reload, handle all of the | |
22219 | special direct moves that involve allocating an extra register, return the | |
22220 | insn code of the helper function if there is such a function or | |
22221 | CODE_FOR_nothing if not. */ | |
22222 | ||
22223 | static bool | |
22224 | rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type, | |
22225 | enum rs6000_reg_type from_type, | |
22226 | machine_mode mode, | |
22227 | secondary_reload_info *sri, | |
22228 | bool altivec_p) | |
22229 | { | |
22230 | bool ret = false; | |
22231 | enum insn_code icode = CODE_FOR_nothing; | |
22232 | int cost = 0; | |
22233 | int size = GET_MODE_SIZE (mode); | |
22234 | ||
22235 | if (TARGET_POWERPC64 && size == 16) | |
22236 | { | |
22237 | /* Handle moving 128-bit values from GPRs to VSX point registers on | |
22238 | ISA 2.07 (power8, power9) when running in 64-bit mode using | |
22239 | XXPERMDI to glue the two 64-bit values back together. */ | |
22240 | if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE) | |
22241 | { | |
22242 | cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */ | |
22243 | icode = reg_addr[mode].reload_vsx_gpr; | |
22244 | } | |
22245 | ||
22246 | /* Handle moving 128-bit values from VSX point registers to GPRs on | |
22247 | ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the | |
22248 | bottom 64-bit value. */ | |
22249 | else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE) | |
22250 | { | |
22251 | cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */ | |
22252 | icode = reg_addr[mode].reload_gpr_vsx; | |
22253 | } | |
22254 | } | |
22255 | ||
22256 | else if (TARGET_POWERPC64 && mode == SFmode) | |
22257 | { | |
22258 | if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE) | |
22259 | { | |
22260 | cost = 3; /* xscvdpspn, mfvsrd, and. */ | |
22261 | icode = reg_addr[mode].reload_gpr_vsx; | |
22262 | } | |
22263 | ||
22264 | else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE) | |
22265 | { | |
22266 | cost = 2; /* mtvsrz, xscvspdpn. */ | |
22267 | icode = reg_addr[mode].reload_vsx_gpr; | |
22268 | } | |
22269 | } | |
22270 | ||
22271 | else if (!TARGET_POWERPC64 && size == 8) | |
22272 | { | |
22273 | /* Handle moving 64-bit values from GPRs to floating point registers on | |
22274 | ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two | |
22275 | 32-bit values back together. Altivec register classes must be handled | |
22276 | specially since a different instruction is used, and the secondary | |
22277 | reload support requires a single instruction class in the scratch | |
22278 | register constraint. However, right now TFmode is not allowed in | |
22279 | Altivec registers, so the pattern will never match. */ | |
22280 | if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p) | |
22281 | { | |
22282 | cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */ | |
22283 | icode = reg_addr[mode].reload_fpr_gpr; | |
22284 | } | |
22285 | } | |
22286 | ||
22287 | if (icode != CODE_FOR_nothing) | |
22288 | { | |
22289 | ret = true; | |
22290 | if (sri) | |
22291 | { | |
22292 | sri->icode = icode; | |
22293 | sri->extra_cost = cost; | |
22294 | } | |
22295 | } | |
22296 | ||
22297 | return ret; | |
22298 | } | |
22299 | ||
22300 | /* Return whether a move between two register classes can be done either | |
22301 | directly (simple move) or via a pattern that uses a single extra temporary | |
22302 | (using ISA 2.07's direct move in this case. */ | |
22303 | ||
22304 | static bool | |
22305 | rs6000_secondary_reload_move (enum rs6000_reg_type to_type, | |
22306 | enum rs6000_reg_type from_type, | |
22307 | machine_mode mode, | |
22308 | secondary_reload_info *sri, | |
22309 | bool altivec_p) | |
22310 | { | |
22311 | /* Fall back to load/store reloads if either type is not a register. */ | |
22312 | if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE) | |
22313 | return false; | |
22314 | ||
22315 | /* If we haven't allocated registers yet, assume the move can be done for the | |
22316 | standard register types. */ | |
22317 | if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE) | |
22318 | || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type)) | |
22319 | || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type))) | |
22320 | return true; | |
22321 | ||
22322 | /* Moves to the same set of registers is a simple move for non-specialized | |
22323 | registers. */ | |
22324 | if (to_type == from_type && IS_STD_REG_TYPE (to_type)) | |
22325 | return true; | |
22326 | ||
22327 | /* Check whether a simple move can be done directly. */ | |
22328 | if (rs6000_secondary_reload_simple_move (to_type, from_type, mode)) | |
22329 | { | |
22330 | if (sri) | |
22331 | { | |
22332 | sri->icode = CODE_FOR_nothing; | |
22333 | sri->extra_cost = 0; | |
22334 | } | |
22335 | return true; | |
22336 | } | |
22337 | ||
22338 | /* Now check if we can do it in a few steps. */ | |
22339 | return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri, | |
22340 | altivec_p); | |
22341 | } | |
22342 | ||
22343 | /* Inform reload about cases where moving X with a mode MODE to a register in | |
22344 | RCLASS requires an extra scratch or immediate register. Return the class | |
22345 | needed for the immediate register. | |
22346 | ||
22347 | For VSX and Altivec, we may need a register to convert sp+offset into | |
22348 | reg+sp. | |
22349 | ||
22350 | For misaligned 64-bit gpr loads and stores we need a register to | |
22351 | convert an offset address to indirect. */ | |
22352 | ||
22353 | static reg_class_t | |
22354 | rs6000_secondary_reload (bool in_p, | |
22355 | rtx x, | |
22356 | reg_class_t rclass_i, | |
22357 | machine_mode mode, | |
22358 | secondary_reload_info *sri) | |
22359 | { | |
22360 | enum reg_class rclass = (enum reg_class) rclass_i; | |
22361 | reg_class_t ret = ALL_REGS; | |
22362 | enum insn_code icode; | |
22363 | bool default_p = false; | |
22364 | bool done_p = false; | |
22365 | ||
22366 | /* Allow subreg of memory before/during reload. */ | |
22367 | bool memory_p = (MEM_P (x) | |
22368 | || (!reload_completed && GET_CODE (x) == SUBREG | |
22369 | && MEM_P (SUBREG_REG (x)))); | |
22370 | ||
22371 | sri->icode = CODE_FOR_nothing; | |
22372 | sri->t_icode = CODE_FOR_nothing; | |
22373 | sri->extra_cost = 0; | |
22374 | icode = ((in_p) | |
22375 | ? reg_addr[mode].reload_load | |
22376 | : reg_addr[mode].reload_store); | |
22377 | ||
22378 | if (REG_P (x) || register_operand (x, mode)) | |
22379 | { | |
22380 | enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass]; | |
22381 | bool altivec_p = (rclass == ALTIVEC_REGS); | |
22382 | enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p); | |
22383 | ||
22384 | if (!in_p) | |
22385 | std::swap (to_type, from_type); | |
22386 | ||
22387 | /* Can we do a direct move of some sort? */ | |
22388 | if (rs6000_secondary_reload_move (to_type, from_type, mode, sri, | |
22389 | altivec_p)) | |
22390 | { | |
22391 | icode = (enum insn_code)sri->icode; | |
22392 | default_p = false; | |
22393 | done_p = true; | |
22394 | ret = NO_REGS; | |
22395 | } | |
22396 | } | |
22397 | ||
22398 | /* Make sure 0.0 is not reloaded or forced into memory. */ | |
22399 | if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass)) | |
22400 | { | |
22401 | ret = NO_REGS; | |
22402 | default_p = false; | |
22403 | done_p = true; | |
22404 | } | |
22405 | ||
22406 | /* If this is a scalar floating point value and we want to load it into the | |
22407 | traditional Altivec registers, do it via a move via a traditional floating | |
22408 | point register, unless we have D-form addressing. Also make sure that | |
22409 | non-zero constants use a FPR. */ | |
22410 | if (!done_p && reg_addr[mode].scalar_in_vmx_p | |
22411 | && !mode_supports_vmx_dform (mode) | |
22412 | && (rclass == VSX_REGS || rclass == ALTIVEC_REGS) | |
22413 | && (memory_p || (GET_CODE (x) == CONST_DOUBLE))) | |
22414 | { | |
22415 | ret = FLOAT_REGS; | |
22416 | default_p = false; | |
22417 | done_p = true; | |
22418 | } | |
22419 | ||
22420 | /* Handle reload of load/stores if we have reload helper functions. */ | |
22421 | if (!done_p && icode != CODE_FOR_nothing && memory_p) | |
22422 | { | |
22423 | int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass, | |
22424 | mode); | |
22425 | ||
22426 | if (extra_cost >= 0) | |
22427 | { | |
22428 | done_p = true; | |
22429 | ret = NO_REGS; | |
22430 | if (extra_cost > 0) | |
22431 | { | |
22432 | sri->extra_cost = extra_cost; | |
22433 | sri->icode = icode; | |
22434 | } | |
22435 | } | |
22436 | } | |
22437 | ||
22438 | /* Handle unaligned loads and stores of integer registers. */ | |
22439 | if (!done_p && TARGET_POWERPC64 | |
22440 | && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE | |
22441 | && memory_p | |
22442 | && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD) | |
22443 | { | |
22444 | rtx addr = XEXP (x, 0); | |
22445 | rtx off = address_offset (addr); | |
22446 | ||
22447 | if (off != NULL_RTX) | |
22448 | { | |
22449 | unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD; | |
22450 | unsigned HOST_WIDE_INT offset = INTVAL (off); | |
22451 | ||
22452 | /* We need a secondary reload when our legitimate_address_p | |
22453 | says the address is good (as otherwise the entire address | |
22454 | will be reloaded), and the offset is not a multiple of | |
22455 | four or we have an address wrap. Address wrap will only | |
22456 | occur for LO_SUMs since legitimate_offset_address_p | |
22457 | rejects addresses for 16-byte mems that will wrap. */ | |
22458 | if (GET_CODE (addr) == LO_SUM | |
22459 | ? (1 /* legitimate_address_p allows any offset for lo_sum */ | |
22460 | && ((offset & 3) != 0 | |
22461 | || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra)) | |
22462 | : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */ | |
22463 | && (offset & 3) != 0)) | |
22464 | { | |
22465 | /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */ | |
22466 | if (in_p) | |
22467 | sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load | |
22468 | : CODE_FOR_reload_di_load); | |
22469 | else | |
22470 | sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store | |
22471 | : CODE_FOR_reload_di_store); | |
22472 | sri->extra_cost = 2; | |
22473 | ret = NO_REGS; | |
22474 | done_p = true; | |
22475 | } | |
22476 | else | |
22477 | default_p = true; | |
22478 | } | |
22479 | else | |
22480 | default_p = true; | |
22481 | } | |
22482 | ||
22483 | if (!done_p && !TARGET_POWERPC64 | |
22484 | && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE | |
22485 | && memory_p | |
22486 | && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD) | |
22487 | { | |
22488 | rtx addr = XEXP (x, 0); | |
22489 | rtx off = address_offset (addr); | |
22490 | ||
22491 | if (off != NULL_RTX) | |
22492 | { | |
22493 | unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD; | |
22494 | unsigned HOST_WIDE_INT offset = INTVAL (off); | |
22495 | ||
22496 | /* We need a secondary reload when our legitimate_address_p | |
22497 | says the address is good (as otherwise the entire address | |
22498 | will be reloaded), and we have a wrap. | |
22499 | ||
22500 | legitimate_lo_sum_address_p allows LO_SUM addresses to | |
22501 | have any offset so test for wrap in the low 16 bits. | |
22502 | ||
22503 | legitimate_offset_address_p checks for the range | |
22504 | [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7] | |
22505 | for mode size of 16. We wrap at [0x7ffc,0x7fff] and | |
22506 | [0x7ff4,0x7fff] respectively, so test for the | |
22507 | intersection of these ranges, [0x7ffc,0x7fff] and | |
22508 | [0x7ff4,0x7ff7] respectively. | |
22509 | ||
22510 | Note that the address we see here may have been | |
22511 | manipulated by legitimize_reload_address. */ | |
22512 | if (GET_CODE (addr) == LO_SUM | |
22513 | ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra | |
22514 | : offset - (0x8000 - extra) < UNITS_PER_WORD) | |
22515 | { | |
22516 | if (in_p) | |
22517 | sri->icode = CODE_FOR_reload_si_load; | |
22518 | else | |
22519 | sri->icode = CODE_FOR_reload_si_store; | |
22520 | sri->extra_cost = 2; | |
22521 | ret = NO_REGS; | |
22522 | done_p = true; | |
22523 | } | |
22524 | else | |
22525 | default_p = true; | |
22526 | } | |
22527 | else | |
22528 | default_p = true; | |
22529 | } | |
22530 | ||
22531 | if (!done_p) | |
22532 | default_p = true; | |
22533 | ||
22534 | if (default_p) | |
22535 | ret = default_secondary_reload (in_p, x, rclass, mode, sri); | |
22536 | ||
22537 | gcc_assert (ret != ALL_REGS); | |
22538 | ||
22539 | if (TARGET_DEBUG_ADDR) | |
22540 | { | |
22541 | fprintf (stderr, | |
22542 | "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, " | |
22543 | "mode = %s", | |
22544 | reg_class_names[ret], | |
22545 | in_p ? "true" : "false", | |
22546 | reg_class_names[rclass], | |
22547 | GET_MODE_NAME (mode)); | |
22548 | ||
22549 | if (reload_completed) | |
22550 | fputs (", after reload", stderr); | |
22551 | ||
22552 | if (!done_p) | |
22553 | fputs (", done_p not set", stderr); | |
22554 | ||
22555 | if (default_p) | |
22556 | fputs (", default secondary reload", stderr); | |
22557 | ||
22558 | if (sri->icode != CODE_FOR_nothing) | |
22559 | fprintf (stderr, ", reload func = %s, extra cost = %d", | |
22560 | insn_data[sri->icode].name, sri->extra_cost); | |
22561 | ||
22562 | else if (sri->extra_cost > 0) | |
22563 | fprintf (stderr, ", extra cost = %d", sri->extra_cost); | |
22564 | ||
22565 | fputs ("\n", stderr); | |
22566 | debug_rtx (x); | |
22567 | } | |
22568 | ||
22569 | return ret; | |
22570 | } | |
22571 | ||
22572 | /* Better tracing for rs6000_secondary_reload_inner. */ | |
22573 | ||
22574 | static void | |
22575 | rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch, | |
22576 | bool store_p) | |
22577 | { | |
22578 | rtx set, clobber; | |
22579 | ||
22580 | gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX); | |
22581 | ||
22582 | fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line, | |
22583 | store_p ? "store" : "load"); | |
22584 | ||
22585 | if (store_p) | |
22586 | set = gen_rtx_SET (mem, reg); | |
22587 | else | |
22588 | set = gen_rtx_SET (reg, mem); | |
22589 | ||
22590 | clobber = gen_rtx_CLOBBER (VOIDmode, scratch); | |
22591 | debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber))); | |
22592 | } | |
22593 | ||
22594 | static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool) | |
22595 | ATTRIBUTE_NORETURN; | |
22596 | ||
22597 | static void | |
22598 | rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch, | |
22599 | bool store_p) | |
22600 | { | |
22601 | rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p); | |
22602 | gcc_unreachable (); | |
22603 | } | |
22604 | ||
22605 | /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have | |
22606 | reload helper functions. These were identified in | |
22607 | rs6000_secondary_reload_memory, and if reload decided to use the secondary | |
22608 | reload, it calls the insns: | |
22609 | reload_<RELOAD:mode>_<P:mptrsize>_store | |
22610 | reload_<RELOAD:mode>_<P:mptrsize>_load | |
22611 | ||
22612 | which in turn calls this function, to do whatever is necessary to create | |
22613 | valid addresses. */ | |
22614 | ||
22615 | void | |
22616 | rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p) | |
22617 | { | |
22618 | int regno = true_regnum (reg); | |
22619 | machine_mode mode = GET_MODE (reg); | |
22620 | addr_mask_type addr_mask; | |
22621 | rtx addr; | |
22622 | rtx new_addr; | |
22623 | rtx op_reg, op0, op1; | |
22624 | rtx and_op; | |
22625 | rtx cc_clobber; | |
22626 | rtvec rv; | |
22627 | ||
22628 | if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER || !MEM_P (mem) | |
22629 | || !base_reg_operand (scratch, GET_MODE (scratch))) | |
22630 | rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); | |
22631 | ||
22632 | if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)) | |
22633 | addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR]; | |
22634 | ||
22635 | else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO)) | |
22636 | addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR]; | |
22637 | ||
22638 | else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO)) | |
22639 | addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX]; | |
22640 | ||
22641 | else | |
22642 | rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); | |
22643 | ||
22644 | /* Make sure the mode is valid in this register class. */ | |
22645 | if ((addr_mask & RELOAD_REG_VALID) == 0) | |
22646 | rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); | |
22647 | ||
22648 | if (TARGET_DEBUG_ADDR) | |
22649 | rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p); | |
22650 | ||
22651 | new_addr = addr = XEXP (mem, 0); | |
22652 | switch (GET_CODE (addr)) | |
22653 | { | |
22654 | /* Does the register class support auto update forms for this mode? If | |
22655 | not, do the update now. We don't need a scratch register, since the | |
22656 | powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */ | |
22657 | case PRE_INC: | |
22658 | case PRE_DEC: | |
22659 | op_reg = XEXP (addr, 0); | |
22660 | if (!base_reg_operand (op_reg, Pmode)) | |
22661 | rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); | |
22662 | ||
22663 | if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0) | |
22664 | { | |
22665 | emit_insn (gen_add2_insn (op_reg, GEN_INT (GET_MODE_SIZE (mode)))); | |
22666 | new_addr = op_reg; | |
22667 | } | |
22668 | break; | |
22669 | ||
22670 | case PRE_MODIFY: | |
22671 | op0 = XEXP (addr, 0); | |
22672 | op1 = XEXP (addr, 1); | |
22673 | if (!base_reg_operand (op0, Pmode) | |
22674 | || GET_CODE (op1) != PLUS | |
22675 | || !rtx_equal_p (op0, XEXP (op1, 0))) | |
22676 | rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); | |
22677 | ||
22678 | if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0) | |
22679 | { | |
22680 | emit_insn (gen_rtx_SET (op0, op1)); | |
22681 | new_addr = reg; | |
22682 | } | |
22683 | break; | |
22684 | ||
22685 | /* Do we need to simulate AND -16 to clear the bottom address bits used | |
22686 | in VMX load/stores? */ | |
22687 | case AND: | |
22688 | op0 = XEXP (addr, 0); | |
22689 | op1 = XEXP (addr, 1); | |
22690 | if ((addr_mask & RELOAD_REG_AND_M16) == 0) | |
22691 | { | |
22692 | if (REG_P (op0) || GET_CODE (op0) == SUBREG) | |
22693 | op_reg = op0; | |
22694 | ||
22695 | else if (GET_CODE (op1) == PLUS) | |
22696 | { | |
22697 | emit_insn (gen_rtx_SET (scratch, op1)); | |
22698 | op_reg = scratch; | |
22699 | } | |
22700 | ||
22701 | else | |
22702 | rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); | |
22703 | ||
22704 | and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1); | |
22705 | cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode)); | |
22706 | rv = gen_rtvec (2, gen_rtx_SET (scratch, and_op), cc_clobber); | |
22707 | emit_insn (gen_rtx_PARALLEL (VOIDmode, rv)); | |
22708 | new_addr = scratch; | |
22709 | } | |
22710 | break; | |
22711 | ||
22712 | /* If this is an indirect address, make sure it is a base register. */ | |
22713 | case REG: | |
22714 | case SUBREG: | |
22715 | if (!base_reg_operand (addr, GET_MODE (addr))) | |
22716 | { | |
22717 | emit_insn (gen_rtx_SET (scratch, addr)); | |
22718 | new_addr = scratch; | |
22719 | } | |
22720 | break; | |
22721 | ||
22722 | /* If this is an indexed address, make sure the register class can handle | |
22723 | indexed addresses for this mode. */ | |
22724 | case PLUS: | |
22725 | op0 = XEXP (addr, 0); | |
22726 | op1 = XEXP (addr, 1); | |
22727 | if (!base_reg_operand (op0, Pmode)) | |
22728 | rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); | |
22729 | ||
22730 | else if (int_reg_operand (op1, Pmode)) | |
22731 | { | |
22732 | if ((addr_mask & RELOAD_REG_INDEXED) == 0) | |
22733 | { | |
22734 | emit_insn (gen_rtx_SET (scratch, addr)); | |
22735 | new_addr = scratch; | |
22736 | } | |
22737 | } | |
22738 | ||
22739 | else if (mode_supports_vsx_dform_quad (mode) && CONST_INT_P (op1)) | |
22740 | { | |
22741 | if (((addr_mask & RELOAD_REG_QUAD_OFFSET) == 0) | |
22742 | || !quad_address_p (addr, mode, false)) | |
22743 | { | |
22744 | emit_insn (gen_rtx_SET (scratch, addr)); | |
22745 | new_addr = scratch; | |
22746 | } | |
22747 | } | |
22748 | ||
22749 | /* Make sure the register class can handle offset addresses. */ | |
22750 | else if (rs6000_legitimate_offset_address_p (mode, addr, false, true)) | |
22751 | { | |
22752 | if ((addr_mask & RELOAD_REG_OFFSET) == 0) | |
22753 | { | |
22754 | emit_insn (gen_rtx_SET (scratch, addr)); | |
22755 | new_addr = scratch; | |
22756 | } | |
22757 | } | |
22758 | ||
22759 | else | |
22760 | rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); | |
22761 | ||
22762 | break; | |
22763 | ||
22764 | case LO_SUM: | |
22765 | op0 = XEXP (addr, 0); | |
22766 | op1 = XEXP (addr, 1); | |
22767 | if (!base_reg_operand (op0, Pmode)) | |
22768 | rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); | |
22769 | ||
22770 | else if (int_reg_operand (op1, Pmode)) | |
22771 | { | |
22772 | if ((addr_mask & RELOAD_REG_INDEXED) == 0) | |
22773 | { | |
22774 | emit_insn (gen_rtx_SET (scratch, addr)); | |
22775 | new_addr = scratch; | |
22776 | } | |
22777 | } | |
22778 | ||
22779 | /* Quad offsets are restricted and can't handle normal addresses. */ | |
22780 | else if (mode_supports_vsx_dform_quad (mode)) | |
22781 | { | |
22782 | emit_insn (gen_rtx_SET (scratch, addr)); | |
22783 | new_addr = scratch; | |
22784 | } | |
22785 | ||
22786 | /* Make sure the register class can handle offset addresses. */ | |
22787 | else if (legitimate_lo_sum_address_p (mode, addr, false)) | |
22788 | { | |
22789 | if ((addr_mask & RELOAD_REG_OFFSET) == 0) | |
22790 | { | |
22791 | emit_insn (gen_rtx_SET (scratch, addr)); | |
22792 | new_addr = scratch; | |
22793 | } | |
22794 | } | |
22795 | ||
22796 | else | |
22797 | rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); | |
22798 | ||
22799 | break; | |
22800 | ||
22801 | case SYMBOL_REF: | |
22802 | case CONST: | |
22803 | case LABEL_REF: | |
22804 | rs6000_emit_move (scratch, addr, Pmode); | |
22805 | new_addr = scratch; | |
22806 | break; | |
22807 | ||
22808 | default: | |
22809 | rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p); | |
22810 | } | |
22811 | ||
22812 | /* Adjust the address if it changed. */ | |
22813 | if (addr != new_addr) | |
22814 | { | |
22815 | mem = replace_equiv_address_nv (mem, new_addr); | |
22816 | if (TARGET_DEBUG_ADDR) | |
22817 | fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n"); | |
22818 | } | |
22819 | ||
22820 | /* Now create the move. */ | |
22821 | if (store_p) | |
22822 | emit_insn (gen_rtx_SET (mem, reg)); | |
22823 | else | |
22824 | emit_insn (gen_rtx_SET (reg, mem)); | |
22825 | ||
22826 | return; | |
22827 | } | |
22828 | ||
22829 | /* Convert reloads involving 64-bit gprs and misaligned offset | |
22830 | addressing, or multiple 32-bit gprs and offsets that are too large, | |
22831 | to use indirect addressing. */ | |
22832 | ||
22833 | void | |
22834 | rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p) | |
22835 | { | |
22836 | int regno = true_regnum (reg); | |
22837 | enum reg_class rclass; | |
22838 | rtx addr; | |
22839 | rtx scratch_or_premodify = scratch; | |
22840 | ||
22841 | if (TARGET_DEBUG_ADDR) | |
22842 | { | |
22843 | fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n", | |
22844 | store_p ? "store" : "load"); | |
22845 | fprintf (stderr, "reg:\n"); | |
22846 | debug_rtx (reg); | |
22847 | fprintf (stderr, "mem:\n"); | |
22848 | debug_rtx (mem); | |
22849 | fprintf (stderr, "scratch:\n"); | |
22850 | debug_rtx (scratch); | |
22851 | } | |
22852 | ||
22853 | gcc_assert (regno >= 0 && regno < FIRST_PSEUDO_REGISTER); | |
22854 | gcc_assert (GET_CODE (mem) == MEM); | |
22855 | rclass = REGNO_REG_CLASS (regno); | |
22856 | gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS); | |
22857 | addr = XEXP (mem, 0); | |
22858 | ||
22859 | if (GET_CODE (addr) == PRE_MODIFY) | |
22860 | { | |
22861 | gcc_assert (REG_P (XEXP (addr, 0)) | |
22862 | && GET_CODE (XEXP (addr, 1)) == PLUS | |
22863 | && XEXP (XEXP (addr, 1), 0) == XEXP (addr, 0)); | |
22864 | scratch_or_premodify = XEXP (addr, 0); | |
22865 | if (!HARD_REGISTER_P (scratch_or_premodify)) | |
22866 | /* If we have a pseudo here then reload will have arranged | |
22867 | to have it replaced, but only in the original insn. | |
22868 | Use the replacement here too. */ | |
22869 | scratch_or_premodify = find_replacement (&XEXP (addr, 0)); | |
22870 | ||
22871 | /* RTL emitted by rs6000_secondary_reload_gpr uses RTL | |
22872 | expressions from the original insn, without unsharing them. | |
22873 | Any RTL that points into the original insn will of course | |
22874 | have register replacements applied. That is why we don't | |
22875 | need to look for replacements under the PLUS. */ | |
22876 | addr = XEXP (addr, 1); | |
22877 | } | |
22878 | gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM); | |
22879 | ||
22880 | rs6000_emit_move (scratch_or_premodify, addr, Pmode); | |
22881 | ||
22882 | mem = replace_equiv_address_nv (mem, scratch_or_premodify); | |
22883 | ||
22884 | /* Now create the move. */ | |
22885 | if (store_p) | |
22886 | emit_insn (gen_rtx_SET (mem, reg)); | |
22887 | else | |
22888 | emit_insn (gen_rtx_SET (reg, mem)); | |
22889 | ||
22890 | return; | |
22891 | } | |
22892 | ||
22893 | /* Allocate a 64-bit stack slot to be used for copying SDmode values through if | |
22894 | this function has any SDmode references. If we are on a power7 or later, we | |
22895 | don't need the 64-bit stack slot since the LFIWZX and STIFWX instructions | |
22896 | can load/store the value. */ | |
22897 | ||
22898 | static void | |
22899 | rs6000_alloc_sdmode_stack_slot (void) | |
22900 | { | |
22901 | tree t; | |
22902 | basic_block bb; | |
22903 | gimple_stmt_iterator gsi; | |
22904 | ||
22905 | gcc_assert (cfun->machine->sdmode_stack_slot == NULL_RTX); | |
22906 | /* We use a different approach for dealing with the secondary | |
22907 | memory in LRA. */ | |
22908 | if (ira_use_lra_p) | |
22909 | return; | |
22910 | ||
22911 | if (TARGET_NO_SDMODE_STACK) | |
22912 | return; | |
22913 | ||
22914 | FOR_EACH_BB_FN (bb, cfun) | |
22915 | for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
22916 | { | |
22917 | tree ret = walk_gimple_op (gsi_stmt (gsi), rs6000_check_sdmode, NULL); | |
22918 | if (ret) | |
22919 | { | |
22920 | rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0); | |
22921 | cfun->machine->sdmode_stack_slot = adjust_address_nv (stack, | |
22922 | SDmode, 0); | |
22923 | return; | |
22924 | } | |
22925 | } | |
22926 | ||
22927 | /* Check for any SDmode parameters of the function. */ | |
22928 | for (t = DECL_ARGUMENTS (cfun->decl); t; t = DECL_CHAIN (t)) | |
22929 | { | |
22930 | if (TREE_TYPE (t) == error_mark_node) | |
22931 | continue; | |
22932 | ||
22933 | if (TYPE_MODE (TREE_TYPE (t)) == SDmode | |
22934 | || TYPE_MODE (DECL_ARG_TYPE (t)) == SDmode) | |
22935 | { | |
22936 | rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0); | |
22937 | cfun->machine->sdmode_stack_slot = adjust_address_nv (stack, | |
22938 | SDmode, 0); | |
22939 | return; | |
22940 | } | |
22941 | } | |
22942 | } | |
22943 | ||
22944 | static void | |
22945 | rs6000_instantiate_decls (void) | |
22946 | { | |
22947 | if (cfun->machine->sdmode_stack_slot != NULL_RTX) | |
22948 | instantiate_decl_rtl (cfun->machine->sdmode_stack_slot); | |
22949 | } | |
22950 | ||
22951 | /* Given an rtx X being reloaded into a reg required to be | |
22952 | in class CLASS, return the class of reg to actually use. | |
22953 | In general this is just CLASS; but on some machines | |
22954 | in some cases it is preferable to use a more restrictive class. | |
22955 | ||
22956 | On the RS/6000, we have to return NO_REGS when we want to reload a | |
22957 | floating-point CONST_DOUBLE to force it to be copied to memory. | |
22958 | ||
22959 | We also don't want to reload integer values into floating-point | |
22960 | registers if we can at all help it. In fact, this can | |
22961 | cause reload to die, if it tries to generate a reload of CTR | |
22962 | into a FP register and discovers it doesn't have the memory location | |
22963 | required. | |
22964 | ||
22965 | ??? Would it be a good idea to have reload do the converse, that is | |
22966 | try to reload floating modes into FP registers if possible? | |
22967 | */ | |
22968 | ||
22969 | static enum reg_class | |
22970 | rs6000_preferred_reload_class (rtx x, enum reg_class rclass) | |
22971 | { | |
22972 | machine_mode mode = GET_MODE (x); | |
22973 | bool is_constant = CONSTANT_P (x); | |
22974 | ||
22975 | /* If a mode can't go in FPR/ALTIVEC/VSX registers, don't return a preferred | |
22976 | reload class for it. */ | |
22977 | if ((rclass == ALTIVEC_REGS || rclass == VSX_REGS) | |
22978 | && (reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID) == 0) | |
22979 | return NO_REGS; | |
22980 | ||
22981 | if ((rclass == FLOAT_REGS || rclass == VSX_REGS) | |
22982 | && (reg_addr[mode].addr_mask[RELOAD_REG_FPR] & RELOAD_REG_VALID) == 0) | |
22983 | return NO_REGS; | |
22984 | ||
22985 | /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow | |
22986 | the reloading of address expressions using PLUS into floating point | |
22987 | registers. */ | |
22988 | if (TARGET_VSX && VSX_REG_CLASS_P (rclass) && GET_CODE (x) != PLUS) | |
22989 | { | |
22990 | if (is_constant) | |
22991 | { | |
22992 | /* Zero is always allowed in all VSX registers. */ | |
22993 | if (x == CONST0_RTX (mode)) | |
22994 | return rclass; | |
22995 | ||
22996 | /* If this is a vector constant that can be formed with a few Altivec | |
22997 | instructions, we want altivec registers. */ | |
22998 | if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode)) | |
22999 | return ALTIVEC_REGS; | |
23000 | ||
23001 | /* If this is an integer constant that can easily be loaded into | |
23002 | vector registers, allow it. */ | |
23003 | if (CONST_INT_P (x)) | |
23004 | { | |
23005 | HOST_WIDE_INT value = INTVAL (x); | |
23006 | ||
23007 | /* ISA 2.07 can generate -1 in all registers with XXLORC. ISA | |
23008 | 2.06 can generate it in the Altivec registers with | |
23009 | VSPLTI<x>. */ | |
23010 | if (value == -1) | |
23011 | { | |
23012 | if (TARGET_P8_VECTOR) | |
23013 | return rclass; | |
23014 | else if (rclass == ALTIVEC_REGS || rclass == VSX_REGS) | |
23015 | return ALTIVEC_REGS; | |
23016 | else | |
23017 | return NO_REGS; | |
23018 | } | |
23019 | ||
23020 | /* ISA 3.0 can load -128..127 using the XXSPLTIB instruction and | |
23021 | a sign extend in the Altivec registers. */ | |
23022 | if (IN_RANGE (value, -128, 127) && TARGET_P9_VECTOR | |
23023 | && TARGET_VSX_SMALL_INTEGER | |
23024 | && (rclass == ALTIVEC_REGS || rclass == VSX_REGS)) | |
23025 | return ALTIVEC_REGS; | |
23026 | } | |
23027 | ||
23028 | /* Force constant to memory. */ | |
23029 | return NO_REGS; | |
23030 | } | |
23031 | ||
23032 | /* D-form addressing can easily reload the value. */ | |
23033 | if (mode_supports_vmx_dform (mode) | |
23034 | || mode_supports_vsx_dform_quad (mode)) | |
23035 | return rclass; | |
23036 | ||
23037 | /* If this is a scalar floating point value and we don't have D-form | |
23038 | addressing, prefer the traditional floating point registers so that we | |
23039 | can use D-form (register+offset) addressing. */ | |
23040 | if (rclass == VSX_REGS | |
23041 | && (mode == SFmode || GET_MODE_SIZE (mode) == 8)) | |
23042 | return FLOAT_REGS; | |
23043 | ||
23044 | /* Prefer the Altivec registers if Altivec is handling the vector | |
23045 | operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec | |
23046 | loads. */ | |
23047 | if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode) | |
23048 | || mode == V1TImode) | |
23049 | return ALTIVEC_REGS; | |
23050 | ||
23051 | return rclass; | |
23052 | } | |
23053 | ||
23054 | if (is_constant || GET_CODE (x) == PLUS) | |
23055 | { | |
23056 | if (reg_class_subset_p (GENERAL_REGS, rclass)) | |
23057 | return GENERAL_REGS; | |
23058 | if (reg_class_subset_p (BASE_REGS, rclass)) | |
23059 | return BASE_REGS; | |
23060 | return NO_REGS; | |
23061 | } | |
23062 | ||
23063 | if (GET_MODE_CLASS (mode) == MODE_INT && rclass == NON_SPECIAL_REGS) | |
23064 | return GENERAL_REGS; | |
23065 | ||
23066 | return rclass; | |
23067 | } | |
23068 | ||
23069 | /* Debug version of rs6000_preferred_reload_class. */ | |
23070 | static enum reg_class | |
23071 | rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass) | |
23072 | { | |
23073 | enum reg_class ret = rs6000_preferred_reload_class (x, rclass); | |
23074 | ||
23075 | fprintf (stderr, | |
23076 | "\nrs6000_preferred_reload_class, return %s, rclass = %s, " | |
23077 | "mode = %s, x:\n", | |
23078 | reg_class_names[ret], reg_class_names[rclass], | |
23079 | GET_MODE_NAME (GET_MODE (x))); | |
23080 | debug_rtx (x); | |
23081 | ||
23082 | return ret; | |
23083 | } | |
23084 | ||
23085 | /* If we are copying between FP or AltiVec registers and anything else, we need | |
23086 | a memory location. The exception is when we are targeting ppc64 and the | |
23087 | move to/from fpr to gpr instructions are available. Also, under VSX, you | |
23088 | can copy vector registers from the FP register set to the Altivec register | |
23089 | set and vice versa. */ | |
23090 | ||
23091 | static bool | |
f15643d4 RS |
23092 | rs6000_secondary_memory_needed (machine_mode mode, |
23093 | reg_class_t from_class, | |
23094 | reg_class_t to_class) | |
83349046 SB |
23095 | { |
23096 | enum rs6000_reg_type from_type, to_type; | |
23097 | bool altivec_p = ((from_class == ALTIVEC_REGS) | |
23098 | || (to_class == ALTIVEC_REGS)); | |
23099 | ||
23100 | /* If a simple/direct move is available, we don't need secondary memory */ | |
23101 | from_type = reg_class_to_reg_type[(int)from_class]; | |
23102 | to_type = reg_class_to_reg_type[(int)to_class]; | |
23103 | ||
23104 | if (rs6000_secondary_reload_move (to_type, from_type, mode, | |
23105 | (secondary_reload_info *)0, altivec_p)) | |
23106 | return false; | |
23107 | ||
23108 | /* If we have a floating point or vector register class, we need to use | |
23109 | memory to transfer the data. */ | |
23110 | if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type)) | |
23111 | return true; | |
23112 | ||
23113 | return false; | |
23114 | } | |
23115 | ||
23116 | /* Debug version of rs6000_secondary_memory_needed. */ | |
23117 | static bool | |
f15643d4 RS |
23118 | rs6000_debug_secondary_memory_needed (machine_mode mode, |
23119 | reg_class_t from_class, | |
23120 | reg_class_t to_class) | |
83349046 | 23121 | { |
f15643d4 | 23122 | bool ret = rs6000_secondary_memory_needed (mode, from_class, to_class); |
83349046 SB |
23123 | |
23124 | fprintf (stderr, | |
23125 | "rs6000_secondary_memory_needed, return: %s, from_class = %s, " | |
23126 | "to_class = %s, mode = %s\n", | |
23127 | ret ? "true" : "false", | |
23128 | reg_class_names[from_class], | |
23129 | reg_class_names[to_class], | |
23130 | GET_MODE_NAME (mode)); | |
23131 | ||
23132 | return ret; | |
23133 | } | |
23134 | ||
23135 | /* Return the register class of a scratch register needed to copy IN into | |
23136 | or out of a register in RCLASS in MODE. If it can be done directly, | |
23137 | NO_REGS is returned. */ | |
23138 | ||
23139 | static enum reg_class | |
23140 | rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode, | |
23141 | rtx in) | |
23142 | { | |
23143 | int regno; | |
23144 | ||
23145 | if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN | |
23146 | #if TARGET_MACHO | |
23147 | && MACHOPIC_INDIRECT | |
23148 | #endif | |
23149 | )) | |
23150 | { | |
23151 | /* We cannot copy a symbolic operand directly into anything | |
23152 | other than BASE_REGS for TARGET_ELF. So indicate that a | |
23153 | register from BASE_REGS is needed as an intermediate | |
23154 | register. | |
23155 | ||
23156 | On Darwin, pic addresses require a load from memory, which | |
23157 | needs a base register. */ | |
23158 | if (rclass != BASE_REGS | |
23159 | && (GET_CODE (in) == SYMBOL_REF | |
23160 | || GET_CODE (in) == HIGH | |
23161 | || GET_CODE (in) == LABEL_REF | |
23162 | || GET_CODE (in) == CONST)) | |
23163 | return BASE_REGS; | |
23164 | } | |
23165 | ||
23166 | if (GET_CODE (in) == REG) | |
23167 | { | |
23168 | regno = REGNO (in); | |
23169 | if (regno >= FIRST_PSEUDO_REGISTER) | |
23170 | { | |
23171 | regno = true_regnum (in); | |
23172 | if (regno >= FIRST_PSEUDO_REGISTER) | |
23173 | regno = -1; | |
23174 | } | |
23175 | } | |
23176 | else if (GET_CODE (in) == SUBREG) | |
23177 | { | |
23178 | regno = true_regnum (in); | |
23179 | if (regno >= FIRST_PSEUDO_REGISTER) | |
23180 | regno = -1; | |
23181 | } | |
23182 | else | |
23183 | regno = -1; | |
23184 | ||
23185 | /* If we have VSX register moves, prefer moving scalar values between | |
23186 | Altivec registers and GPR by going via an FPR (and then via memory) | |
23187 | instead of reloading the secondary memory address for Altivec moves. */ | |
23188 | if (TARGET_VSX | |
23189 | && GET_MODE_SIZE (mode) < 16 | |
23190 | && !mode_supports_vmx_dform (mode) | |
23191 | && (((rclass == GENERAL_REGS || rclass == BASE_REGS) | |
23192 | && (regno >= 0 && ALTIVEC_REGNO_P (regno))) | |
23193 | || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS) | |
23194 | && (regno >= 0 && INT_REGNO_P (regno))))) | |
23195 | return FLOAT_REGS; | |
23196 | ||
23197 | /* We can place anything into GENERAL_REGS and can put GENERAL_REGS | |
23198 | into anything. */ | |
23199 | if (rclass == GENERAL_REGS || rclass == BASE_REGS | |
23200 | || (regno >= 0 && INT_REGNO_P (regno))) | |
23201 | return NO_REGS; | |
23202 | ||
23203 | /* Constants, memory, and VSX registers can go into VSX registers (both the | |
23204 | traditional floating point and the altivec registers). */ | |
23205 | if (rclass == VSX_REGS | |
23206 | && (regno == -1 || VSX_REGNO_P (regno))) | |
23207 | return NO_REGS; | |
23208 | ||
23209 | /* Constants, memory, and FP registers can go into FP registers. */ | |
23210 | if ((regno == -1 || FP_REGNO_P (regno)) | |
23211 | && (rclass == FLOAT_REGS || rclass == NON_SPECIAL_REGS)) | |
23212 | return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS; | |
23213 | ||
23214 | /* Memory, and AltiVec registers can go into AltiVec registers. */ | |
23215 | if ((regno == -1 || ALTIVEC_REGNO_P (regno)) | |
23216 | && rclass == ALTIVEC_REGS) | |
23217 | return NO_REGS; | |
23218 | ||
23219 | /* We can copy among the CR registers. */ | |
23220 | if ((rclass == CR_REGS || rclass == CR0_REGS) | |
23221 | && regno >= 0 && CR_REGNO_P (regno)) | |
23222 | return NO_REGS; | |
23223 | ||
23224 | /* Otherwise, we need GENERAL_REGS. */ | |
23225 | return GENERAL_REGS; | |
23226 | } | |
23227 | ||
23228 | /* Debug version of rs6000_secondary_reload_class. */ | |
23229 | static enum reg_class | |
23230 | rs6000_debug_secondary_reload_class (enum reg_class rclass, | |
23231 | machine_mode mode, rtx in) | |
23232 | { | |
23233 | enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in); | |
23234 | fprintf (stderr, | |
23235 | "\nrs6000_secondary_reload_class, return %s, rclass = %s, " | |
23236 | "mode = %s, input rtx:\n", | |
23237 | reg_class_names[ret], reg_class_names[rclass], | |
23238 | GET_MODE_NAME (mode)); | |
23239 | debug_rtx (in); | |
23240 | ||
23241 | return ret; | |
23242 | } | |
23243 | ||
0d803030 | 23244 | /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */ |
83349046 SB |
23245 | |
23246 | static bool | |
0d803030 RS |
23247 | rs6000_can_change_mode_class (machine_mode from, |
23248 | machine_mode to, | |
23249 | reg_class_t rclass) | |
83349046 SB |
23250 | { |
23251 | unsigned from_size = GET_MODE_SIZE (from); | |
23252 | unsigned to_size = GET_MODE_SIZE (to); | |
23253 | ||
23254 | if (from_size != to_size) | |
23255 | { | |
23256 | enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS; | |
23257 | ||
23258 | if (reg_classes_intersect_p (xclass, rclass)) | |
23259 | { | |
ad474626 RS |
23260 | unsigned to_nregs = hard_regno_nregs (FIRST_FPR_REGNO, to); |
23261 | unsigned from_nregs = hard_regno_nregs (FIRST_FPR_REGNO, from); | |
83349046 SB |
23262 | bool to_float128_vector_p = FLOAT128_VECTOR_P (to); |
23263 | bool from_float128_vector_p = FLOAT128_VECTOR_P (from); | |
23264 | ||
23265 | /* Don't allow 64-bit types to overlap with 128-bit types that take a | |
23266 | single register under VSX because the scalar part of the register | |
23267 | is in the upper 64-bits, and not the lower 64-bits. Types like | |
23268 | TFmode/TDmode that take 2 scalar register can overlap. 128-bit | |
23269 | IEEE floating point can't overlap, and neither can small | |
23270 | values. */ | |
23271 | ||
23272 | if (to_float128_vector_p && from_float128_vector_p) | |
0d803030 | 23273 | return true; |
83349046 SB |
23274 | |
23275 | else if (to_float128_vector_p || from_float128_vector_p) | |
0d803030 | 23276 | return false; |
83349046 SB |
23277 | |
23278 | /* TDmode in floating-mode registers must always go into a register | |
23279 | pair with the most significant word in the even-numbered register | |
23280 | to match ISA requirements. In little-endian mode, this does not | |
23281 | match subreg numbering, so we cannot allow subregs. */ | |
23282 | if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode)) | |
0d803030 | 23283 | return false; |
83349046 SB |
23284 | |
23285 | if (from_size < 8 || to_size < 8) | |
0d803030 | 23286 | return false; |
83349046 SB |
23287 | |
23288 | if (from_size == 8 && (8 * to_nregs) != to_size) | |
0d803030 | 23289 | return false; |
83349046 SB |
23290 | |
23291 | if (to_size == 8 && (8 * from_nregs) != from_size) | |
0d803030 | 23292 | return false; |
83349046 | 23293 | |
0d803030 | 23294 | return true; |
83349046 SB |
23295 | } |
23296 | else | |
0d803030 | 23297 | return true; |
83349046 SB |
23298 | } |
23299 | ||
23300 | if (TARGET_E500_DOUBLE | |
23301 | && ((((to) == DFmode) + ((from) == DFmode)) == 1 | |
23302 | || (((to) == TFmode) + ((from) == TFmode)) == 1 | |
23303 | || (((to) == IFmode) + ((from) == IFmode)) == 1 | |
23304 | || (((to) == KFmode) + ((from) == KFmode)) == 1 | |
23305 | || (((to) == DDmode) + ((from) == DDmode)) == 1 | |
23306 | || (((to) == TDmode) + ((from) == TDmode)) == 1 | |
23307 | || (((to) == DImode) + ((from) == DImode)) == 1)) | |
0d803030 | 23308 | return false; |
83349046 SB |
23309 | |
23310 | /* Since the VSX register set includes traditional floating point registers | |
23311 | and altivec registers, just check for the size being different instead of | |
23312 | trying to check whether the modes are vector modes. Otherwise it won't | |
23313 | allow say DF and DI to change classes. For types like TFmode and TDmode | |
23314 | that take 2 64-bit registers, rather than a single 128-bit register, don't | |
23315 | allow subregs of those types to other 128 bit types. */ | |
23316 | if (TARGET_VSX && VSX_REG_CLASS_P (rclass)) | |
23317 | { | |
23318 | unsigned num_regs = (from_size + 15) / 16; | |
ad474626 RS |
23319 | if (hard_regno_nregs (FIRST_FPR_REGNO, to) > num_regs |
23320 | || hard_regno_nregs (FIRST_FPR_REGNO, from) > num_regs) | |
0d803030 | 23321 | return false; |
83349046 | 23322 | |
0d803030 | 23323 | return (from_size == 8 || from_size == 16); |
83349046 SB |
23324 | } |
23325 | ||
23326 | if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS | |
23327 | && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1) | |
0d803030 | 23328 | return false; |
83349046 SB |
23329 | |
23330 | if (TARGET_SPE && (SPE_VECTOR_MODE (from) + SPE_VECTOR_MODE (to)) == 1 | |
23331 | && reg_classes_intersect_p (GENERAL_REGS, rclass)) | |
0d803030 | 23332 | return false; |
83349046 | 23333 | |
0d803030 | 23334 | return true; |
83349046 SB |
23335 | } |
23336 | ||
0d803030 | 23337 | /* Debug version of rs6000_can_change_mode_class. */ |
83349046 | 23338 | static bool |
0d803030 RS |
23339 | rs6000_debug_can_change_mode_class (machine_mode from, |
23340 | machine_mode to, | |
23341 | reg_class_t rclass) | |
83349046 | 23342 | { |
0d803030 | 23343 | bool ret = rs6000_can_change_mode_class (from, to, rclass); |
83349046 SB |
23344 | |
23345 | fprintf (stderr, | |
0d803030 | 23346 | "rs6000_can_change_mode_class, return %s, from = %s, " |
83349046 SB |
23347 | "to = %s, rclass = %s\n", |
23348 | ret ? "true" : "false", | |
23349 | GET_MODE_NAME (from), GET_MODE_NAME (to), | |
23350 | reg_class_names[rclass]); | |
23351 | ||
23352 | return ret; | |
23353 | } | |
23354 | \f | |
23355 | /* Return a string to do a move operation of 128 bits of data. */ | |
23356 | ||
23357 | const char * | |
23358 | rs6000_output_move_128bit (rtx operands[]) | |
23359 | { | |
23360 | rtx dest = operands[0]; | |
23361 | rtx src = operands[1]; | |
23362 | machine_mode mode = GET_MODE (dest); | |
23363 | int dest_regno; | |
23364 | int src_regno; | |
23365 | bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p; | |
23366 | bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p; | |
23367 | ||
23368 | if (REG_P (dest)) | |
23369 | { | |
23370 | dest_regno = REGNO (dest); | |
23371 | dest_gpr_p = INT_REGNO_P (dest_regno); | |
23372 | dest_fp_p = FP_REGNO_P (dest_regno); | |
23373 | dest_vmx_p = ALTIVEC_REGNO_P (dest_regno); | |
23374 | dest_vsx_p = dest_fp_p | dest_vmx_p; | |
23375 | } | |
23376 | else | |
23377 | { | |
23378 | dest_regno = -1; | |
23379 | dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false; | |
23380 | } | |
23381 | ||
23382 | if (REG_P (src)) | |
23383 | { | |
23384 | src_regno = REGNO (src); | |
23385 | src_gpr_p = INT_REGNO_P (src_regno); | |
23386 | src_fp_p = FP_REGNO_P (src_regno); | |
23387 | src_vmx_p = ALTIVEC_REGNO_P (src_regno); | |
23388 | src_vsx_p = src_fp_p | src_vmx_p; | |
23389 | } | |
23390 | else | |
23391 | { | |
23392 | src_regno = -1; | |
23393 | src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false; | |
23394 | } | |
23395 | ||
23396 | /* Register moves. */ | |
23397 | if (dest_regno >= 0 && src_regno >= 0) | |
23398 | { | |
23399 | if (dest_gpr_p) | |
23400 | { | |
23401 | if (src_gpr_p) | |
23402 | return "#"; | |
23403 | ||
23404 | if (TARGET_DIRECT_MOVE_128 && src_vsx_p) | |
23405 | return (WORDS_BIG_ENDIAN | |
23406 | ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1" | |
23407 | : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1"); | |
23408 | ||
23409 | else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p) | |
23410 | return "#"; | |
23411 | } | |
23412 | ||
23413 | else if (TARGET_VSX && dest_vsx_p) | |
23414 | { | |
23415 | if (src_vsx_p) | |
23416 | return "xxlor %x0,%x1,%x1"; | |
23417 | ||
23418 | else if (TARGET_DIRECT_MOVE_128 && src_gpr_p) | |
23419 | return (WORDS_BIG_ENDIAN | |
23420 | ? "mtvsrdd %x0,%1,%L1" | |
23421 | : "mtvsrdd %x0,%L1,%1"); | |
23422 | ||
23423 | else if (TARGET_DIRECT_MOVE && src_gpr_p) | |
23424 | return "#"; | |
23425 | } | |
23426 | ||
23427 | else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p) | |
23428 | return "vor %0,%1,%1"; | |
23429 | ||
23430 | else if (dest_fp_p && src_fp_p) | |
23431 | return "#"; | |
23432 | } | |
23433 | ||
23434 | /* Loads. */ | |
23435 | else if (dest_regno >= 0 && MEM_P (src)) | |
23436 | { | |
23437 | if (dest_gpr_p) | |
23438 | { | |
23439 | if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src)) | |
23440 | return "lq %0,%1"; | |
23441 | else | |
23442 | return "#"; | |
23443 | } | |
23444 | ||
23445 | else if (TARGET_ALTIVEC && dest_vmx_p | |
23446 | && altivec_indexed_or_indirect_operand (src, mode)) | |
23447 | return "lvx %0,%y1"; | |
23448 | ||
23449 | else if (TARGET_VSX && dest_vsx_p) | |
23450 | { | |
23451 | if (mode_supports_vsx_dform_quad (mode) | |
23452 | && quad_address_p (XEXP (src, 0), mode, true)) | |
23453 | return "lxv %x0,%1"; | |
23454 | ||
23455 | else if (TARGET_P9_VECTOR) | |
23456 | return "lxvx %x0,%y1"; | |
23457 | ||
23458 | else if (mode == V16QImode || mode == V8HImode || mode == V4SImode) | |
23459 | return "lxvw4x %x0,%y1"; | |
23460 | ||
23461 | else | |
23462 | return "lxvd2x %x0,%y1"; | |
23463 | } | |
23464 | ||
23465 | else if (TARGET_ALTIVEC && dest_vmx_p) | |
23466 | return "lvx %0,%y1"; | |
23467 | ||
23468 | else if (dest_fp_p) | |
23469 | return "#"; | |
23470 | } | |
23471 | ||
23472 | /* Stores. */ | |
23473 | else if (src_regno >= 0 && MEM_P (dest)) | |
23474 | { | |
23475 | if (src_gpr_p) | |
23476 | { | |
23477 | if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src)) | |
23478 | return "stq %1,%0"; | |
23479 | else | |
23480 | return "#"; | |
23481 | } | |
23482 | ||
23483 | else if (TARGET_ALTIVEC && src_vmx_p | |
23484 | && altivec_indexed_or_indirect_operand (src, mode)) | |
23485 | return "stvx %1,%y0"; | |
23486 | ||
23487 | else if (TARGET_VSX && src_vsx_p) | |
23488 | { | |
23489 | if (mode_supports_vsx_dform_quad (mode) | |
23490 | && quad_address_p (XEXP (dest, 0), mode, true)) | |
23491 | return "stxv %x1,%0"; | |
23492 | ||
23493 | else if (TARGET_P9_VECTOR) | |
23494 | return "stxvx %x1,%y0"; | |
23495 | ||
23496 | else if (mode == V16QImode || mode == V8HImode || mode == V4SImode) | |
23497 | return "stxvw4x %x1,%y0"; | |
23498 | ||
23499 | else | |
23500 | return "stxvd2x %x1,%y0"; | |
23501 | } | |
23502 | ||
23503 | else if (TARGET_ALTIVEC && src_vmx_p) | |
23504 | return "stvx %1,%y0"; | |
23505 | ||
23506 | else if (src_fp_p) | |
23507 | return "#"; | |
23508 | } | |
23509 | ||
23510 | /* Constants. */ | |
23511 | else if (dest_regno >= 0 | |
23512 | && (GET_CODE (src) == CONST_INT | |
23513 | || GET_CODE (src) == CONST_WIDE_INT | |
23514 | || GET_CODE (src) == CONST_DOUBLE | |
23515 | || GET_CODE (src) == CONST_VECTOR)) | |
23516 | { | |
23517 | if (dest_gpr_p) | |
23518 | return "#"; | |
23519 | ||
23520 | else if ((dest_vmx_p && TARGET_ALTIVEC) | |
23521 | || (dest_vsx_p && TARGET_VSX)) | |
23522 | return output_vec_const_move (operands); | |
23523 | } | |
23524 | ||
23525 | fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src)); | |
23526 | } | |
23527 | ||
23528 | /* Validate a 128-bit move. */ | |
23529 | bool | |
23530 | rs6000_move_128bit_ok_p (rtx operands[]) | |
23531 | { | |
23532 | machine_mode mode = GET_MODE (operands[0]); | |
23533 | return (gpc_reg_operand (operands[0], mode) | |
23534 | || gpc_reg_operand (operands[1], mode)); | |
23535 | } | |
23536 | ||
23537 | /* Return true if a 128-bit move needs to be split. */ | |
23538 | bool | |
23539 | rs6000_split_128bit_ok_p (rtx operands[]) | |
23540 | { | |
23541 | if (!reload_completed) | |
23542 | return false; | |
23543 | ||
23544 | if (!gpr_or_gpr_p (operands[0], operands[1])) | |
23545 | return false; | |
23546 | ||
23547 | if (quad_load_store_p (operands[0], operands[1])) | |
23548 | return false; | |
23549 | ||
23550 | return true; | |
23551 | } | |
23552 | ||
23553 | \f | |
23554 | /* Given a comparison operation, return the bit number in CCR to test. We | |
23555 | know this is a valid comparison. | |
23556 | ||
23557 | SCC_P is 1 if this is for an scc. That means that %D will have been | |
23558 | used instead of %C, so the bits will be in different places. | |
23559 | ||
23560 | Return -1 if OP isn't a valid comparison for some reason. */ | |
23561 | ||
23562 | int | |
23563 | ccr_bit (rtx op, int scc_p) | |
23564 | { | |
23565 | enum rtx_code code = GET_CODE (op); | |
23566 | machine_mode cc_mode; | |
23567 | int cc_regnum; | |
23568 | int base_bit; | |
23569 | rtx reg; | |
23570 | ||
23571 | if (!COMPARISON_P (op)) | |
23572 | return -1; | |
23573 | ||
23574 | reg = XEXP (op, 0); | |
23575 | ||
23576 | gcc_assert (GET_CODE (reg) == REG && CR_REGNO_P (REGNO (reg))); | |
23577 | ||
23578 | cc_mode = GET_MODE (reg); | |
23579 | cc_regnum = REGNO (reg); | |
23580 | base_bit = 4 * (cc_regnum - CR0_REGNO); | |
23581 | ||
23582 | validate_condition_mode (code, cc_mode); | |
23583 | ||
23584 | /* When generating a sCOND operation, only positive conditions are | |
23585 | allowed. */ | |
23586 | gcc_assert (!scc_p | |
23587 | || code == EQ || code == GT || code == LT || code == UNORDERED | |
23588 | || code == GTU || code == LTU); | |
23589 | ||
23590 | switch (code) | |
23591 | { | |
23592 | case NE: | |
23593 | return scc_p ? base_bit + 3 : base_bit + 2; | |
23594 | case EQ: | |
23595 | return base_bit + 2; | |
23596 | case GT: case GTU: case UNLE: | |
23597 | return base_bit + 1; | |
23598 | case LT: case LTU: case UNGE: | |
23599 | return base_bit; | |
23600 | case ORDERED: case UNORDERED: | |
23601 | return base_bit + 3; | |
23602 | ||
23603 | case GE: case GEU: | |
23604 | /* If scc, we will have done a cror to put the bit in the | |
23605 | unordered position. So test that bit. For integer, this is ! LT | |
23606 | unless this is an scc insn. */ | |
23607 | return scc_p ? base_bit + 3 : base_bit; | |
23608 | ||
23609 | case LE: case LEU: | |
23610 | return scc_p ? base_bit + 3 : base_bit + 1; | |
23611 | ||
23612 | default: | |
23613 | gcc_unreachable (); | |
23614 | } | |
23615 | } | |
23616 | \f | |
23617 | /* Return the GOT register. */ | |
23618 | ||
23619 | rtx | |
23620 | rs6000_got_register (rtx value ATTRIBUTE_UNUSED) | |
23621 | { | |
23622 | /* The second flow pass currently (June 1999) can't update | |
23623 | regs_ever_live without disturbing other parts of the compiler, so | |
23624 | update it here to make the prolog/epilogue code happy. */ | |
23625 | if (!can_create_pseudo_p () | |
23626 | && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM)) | |
23627 | df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true); | |
23628 | ||
23629 | crtl->uses_pic_offset_table = 1; | |
23630 | ||
23631 | return pic_offset_table_rtx; | |
23632 | } | |
23633 | \f | |
23634 | static rs6000_stack_t stack_info; | |
23635 | ||
23636 | /* Function to init struct machine_function. | |
23637 | This will be called, via a pointer variable, | |
23638 | from push_function_context. */ | |
23639 | ||
23640 | static struct machine_function * | |
23641 | rs6000_init_machine_status (void) | |
23642 | { | |
23643 | stack_info.reload_completed = 0; | |
23644 | return ggc_cleared_alloc<machine_function> (); | |
23645 | } | |
23646 | \f | |
23647 | #define INT_P(X) (GET_CODE (X) == CONST_INT && GET_MODE (X) == VOIDmode) | |
23648 | ||
23649 | /* Write out a function code label. */ | |
23650 | ||
23651 | void | |
23652 | rs6000_output_function_entry (FILE *file, const char *fname) | |
23653 | { | |
23654 | if (fname[0] != '.') | |
23655 | { | |
23656 | switch (DEFAULT_ABI) | |
23657 | { | |
23658 | default: | |
23659 | gcc_unreachable (); | |
23660 | ||
23661 | case ABI_AIX: | |
23662 | if (DOT_SYMBOLS) | |
23663 | putc ('.', file); | |
23664 | else | |
23665 | ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L."); | |
23666 | break; | |
23667 | ||
23668 | case ABI_ELFv2: | |
23669 | case ABI_V4: | |
23670 | case ABI_DARWIN: | |
23671 | break; | |
23672 | } | |
23673 | } | |
23674 | ||
23675 | RS6000_OUTPUT_BASENAME (file, fname); | |
23676 | } | |
23677 | ||
23678 | /* Print an operand. Recognize special options, documented below. */ | |
23679 | ||
23680 | #if TARGET_ELF | |
23681 | #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel") | |
23682 | #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13) | |
23683 | #else | |
23684 | #define SMALL_DATA_RELOC "sda21" | |
23685 | #define SMALL_DATA_REG 0 | |
23686 | #endif | |
23687 | ||
23688 | void | |
23689 | print_operand (FILE *file, rtx x, int code) | |
23690 | { | |
23691 | int i; | |
23692 | unsigned HOST_WIDE_INT uval; | |
23693 | ||
23694 | switch (code) | |
23695 | { | |
23696 | /* %a is output_address. */ | |
23697 | ||
23698 | /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise | |
23699 | output_operand. */ | |
23700 | ||
23701 | case 'D': | |
23702 | /* Like 'J' but get to the GT bit only. */ | |
23703 | gcc_assert (REG_P (x)); | |
23704 | ||
23705 | /* Bit 1 is GT bit. */ | |
23706 | i = 4 * (REGNO (x) - CR0_REGNO) + 1; | |
23707 | ||
23708 | /* Add one for shift count in rlinm for scc. */ | |
23709 | fprintf (file, "%d", i + 1); | |
23710 | return; | |
23711 | ||
23712 | case 'e': | |
23713 | /* If the low 16 bits are 0, but some other bit is set, write 's'. */ | |
23714 | if (! INT_P (x)) | |
23715 | { | |
23716 | output_operand_lossage ("invalid %%e value"); | |
23717 | return; | |
23718 | } | |
23719 | ||
23720 | uval = INTVAL (x); | |
23721 | if ((uval & 0xffff) == 0 && uval != 0) | |
23722 | putc ('s', file); | |
23723 | return; | |
23724 | ||
23725 | case 'E': | |
23726 | /* X is a CR register. Print the number of the EQ bit of the CR */ | |
23727 | if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x))) | |
23728 | output_operand_lossage ("invalid %%E value"); | |
23729 | else | |
23730 | fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2); | |
23731 | return; | |
23732 | ||
23733 | case 'f': | |
23734 | /* X is a CR register. Print the shift count needed to move it | |
23735 | to the high-order four bits. */ | |
23736 | if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x))) | |
23737 | output_operand_lossage ("invalid %%f value"); | |
23738 | else | |
23739 | fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO)); | |
23740 | return; | |
23741 | ||
23742 | case 'F': | |
23743 | /* Similar, but print the count for the rotate in the opposite | |
23744 | direction. */ | |
23745 | if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x))) | |
23746 | output_operand_lossage ("invalid %%F value"); | |
23747 | else | |
23748 | fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO)); | |
23749 | return; | |
23750 | ||
23751 | case 'G': | |
23752 | /* X is a constant integer. If it is negative, print "m", | |
23753 | otherwise print "z". This is to make an aze or ame insn. */ | |
23754 | if (GET_CODE (x) != CONST_INT) | |
23755 | output_operand_lossage ("invalid %%G value"); | |
23756 | else if (INTVAL (x) >= 0) | |
23757 | putc ('z', file); | |
23758 | else | |
23759 | putc ('m', file); | |
23760 | return; | |
23761 | ||
23762 | case 'h': | |
23763 | /* If constant, output low-order five bits. Otherwise, write | |
23764 | normally. */ | |
23765 | if (INT_P (x)) | |
23766 | fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31); | |
23767 | else | |
23768 | print_operand (file, x, 0); | |
23769 | return; | |
23770 | ||
23771 | case 'H': | |
23772 | /* If constant, output low-order six bits. Otherwise, write | |
23773 | normally. */ | |
23774 | if (INT_P (x)) | |
23775 | fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63); | |
23776 | else | |
23777 | print_operand (file, x, 0); | |
23778 | return; | |
23779 | ||
23780 | case 'I': | |
23781 | /* Print `i' if this is a constant, else nothing. */ | |
23782 | if (INT_P (x)) | |
23783 | putc ('i', file); | |
23784 | return; | |
23785 | ||
23786 | case 'j': | |
23787 | /* Write the bit number in CCR for jump. */ | |
23788 | i = ccr_bit (x, 0); | |
23789 | if (i == -1) | |
23790 | output_operand_lossage ("invalid %%j code"); | |
23791 | else | |
23792 | fprintf (file, "%d", i); | |
23793 | return; | |
23794 | ||
23795 | case 'J': | |
23796 | /* Similar, but add one for shift count in rlinm for scc and pass | |
23797 | scc flag to `ccr_bit'. */ | |
23798 | i = ccr_bit (x, 1); | |
23799 | if (i == -1) | |
23800 | output_operand_lossage ("invalid %%J code"); | |
23801 | else | |
23802 | /* If we want bit 31, write a shift count of zero, not 32. */ | |
23803 | fprintf (file, "%d", i == 31 ? 0 : i + 1); | |
23804 | return; | |
23805 | ||
23806 | case 'k': | |
23807 | /* X must be a constant. Write the 1's complement of the | |
23808 | constant. */ | |
23809 | if (! INT_P (x)) | |
23810 | output_operand_lossage ("invalid %%k value"); | |
23811 | else | |
23812 | fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x)); | |
23813 | return; | |
23814 | ||
23815 | case 'K': | |
23816 | /* X must be a symbolic constant on ELF. Write an | |
23817 | expression suitable for an 'addi' that adds in the low 16 | |
23818 | bits of the MEM. */ | |
23819 | if (GET_CODE (x) == CONST) | |
23820 | { | |
23821 | if (GET_CODE (XEXP (x, 0)) != PLUS | |
23822 | || (GET_CODE (XEXP (XEXP (x, 0), 0)) != SYMBOL_REF | |
23823 | && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF) | |
23824 | || GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT) | |
23825 | output_operand_lossage ("invalid %%K value"); | |
23826 | } | |
23827 | print_operand_address (file, x); | |
23828 | fputs ("@l", file); | |
23829 | return; | |
23830 | ||
23831 | /* %l is output_asm_label. */ | |
23832 | ||
23833 | case 'L': | |
23834 | /* Write second word of DImode or DFmode reference. Works on register | |
23835 | or non-indexed memory only. */ | |
23836 | if (REG_P (x)) | |
23837 | fputs (reg_names[REGNO (x) + 1], file); | |
23838 | else if (MEM_P (x)) | |
23839 | { | |
23840 | machine_mode mode = GET_MODE (x); | |
23841 | /* Handle possible auto-increment. Since it is pre-increment and | |
23842 | we have already done it, we can just use an offset of word. */ | |
23843 | if (GET_CODE (XEXP (x, 0)) == PRE_INC | |
23844 | || GET_CODE (XEXP (x, 0)) == PRE_DEC) | |
23845 | output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0), | |
23846 | UNITS_PER_WORD)); | |
23847 | else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY) | |
23848 | output_address (mode, plus_constant (Pmode, XEXP (XEXP (x, 0), 0), | |
23849 | UNITS_PER_WORD)); | |
23850 | else | |
23851 | output_address (mode, XEXP (adjust_address_nv (x, SImode, | |
23852 | UNITS_PER_WORD), | |
23853 | 0)); | |
23854 | ||
23855 | if (small_data_operand (x, GET_MODE (x))) | |
23856 | fprintf (file, "@%s(%s)", SMALL_DATA_RELOC, | |
23857 | reg_names[SMALL_DATA_REG]); | |
23858 | } | |
23859 | return; | |
23860 | ||
23861 | case 'N': | |
23862 | /* Write the number of elements in the vector times 4. */ | |
23863 | if (GET_CODE (x) != PARALLEL) | |
23864 | output_operand_lossage ("invalid %%N value"); | |
23865 | else | |
23866 | fprintf (file, "%d", XVECLEN (x, 0) * 4); | |
23867 | return; | |
23868 | ||
23869 | case 'O': | |
23870 | /* Similar, but subtract 1 first. */ | |
23871 | if (GET_CODE (x) != PARALLEL) | |
23872 | output_operand_lossage ("invalid %%O value"); | |
23873 | else | |
23874 | fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4); | |
23875 | return; | |
23876 | ||
23877 | case 'p': | |
23878 | /* X is a CONST_INT that is a power of two. Output the logarithm. */ | |
23879 | if (! INT_P (x) | |
23880 | || INTVAL (x) < 0 | |
23881 | || (i = exact_log2 (INTVAL (x))) < 0) | |
23882 | output_operand_lossage ("invalid %%p value"); | |
23883 | else | |
23884 | fprintf (file, "%d", i); | |
23885 | return; | |
23886 | ||
23887 | case 'P': | |
23888 | /* The operand must be an indirect memory reference. The result | |
23889 | is the register name. */ | |
23890 | if (GET_CODE (x) != MEM || GET_CODE (XEXP (x, 0)) != REG | |
23891 | || REGNO (XEXP (x, 0)) >= 32) | |
23892 | output_operand_lossage ("invalid %%P value"); | |
23893 | else | |
23894 | fputs (reg_names[REGNO (XEXP (x, 0))], file); | |
23895 | return; | |
23896 | ||
23897 | case 'q': | |
23898 | /* This outputs the logical code corresponding to a boolean | |
23899 | expression. The expression may have one or both operands | |
23900 | negated (if one, only the first one). For condition register | |
23901 | logical operations, it will also treat the negated | |
23902 | CR codes as NOTs, but not handle NOTs of them. */ | |
23903 | { | |
23904 | const char *const *t = 0; | |
23905 | const char *s; | |
23906 | enum rtx_code code = GET_CODE (x); | |
23907 | static const char * const tbl[3][3] = { | |
23908 | { "and", "andc", "nor" }, | |
23909 | { "or", "orc", "nand" }, | |
23910 | { "xor", "eqv", "xor" } }; | |
23911 | ||
23912 | if (code == AND) | |
23913 | t = tbl[0]; | |
23914 | else if (code == IOR) | |
23915 | t = tbl[1]; | |
23916 | else if (code == XOR) | |
23917 | t = tbl[2]; | |
23918 | else | |
23919 | output_operand_lossage ("invalid %%q value"); | |
23920 | ||
23921 | if (GET_CODE (XEXP (x, 0)) != NOT) | |
23922 | s = t[0]; | |
23923 | else | |
23924 | { | |
23925 | if (GET_CODE (XEXP (x, 1)) == NOT) | |
23926 | s = t[2]; | |
23927 | else | |
23928 | s = t[1]; | |
23929 | } | |
23930 | ||
23931 | fputs (s, file); | |
23932 | } | |
23933 | return; | |
23934 | ||
23935 | case 'Q': | |
23936 | if (! TARGET_MFCRF) | |
23937 | return; | |
23938 | fputc (',', file); | |
23939 | /* FALLTHRU */ | |
23940 | ||
23941 | case 'R': | |
23942 | /* X is a CR register. Print the mask for `mtcrf'. */ | |
23943 | if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x))) | |
23944 | output_operand_lossage ("invalid %%R value"); | |
23945 | else | |
23946 | fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO)); | |
23947 | return; | |
23948 | ||
23949 | case 's': | |
23950 | /* Low 5 bits of 32 - value */ | |
23951 | if (! INT_P (x)) | |
23952 | output_operand_lossage ("invalid %%s value"); | |
23953 | else | |
23954 | fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31); | |
23955 | return; | |
23956 | ||
23957 | case 't': | |
23958 | /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */ | |
23959 | gcc_assert (REG_P (x) && GET_MODE (x) == CCmode); | |
23960 | ||
23961 | /* Bit 3 is OV bit. */ | |
23962 | i = 4 * (REGNO (x) - CR0_REGNO) + 3; | |
23963 | ||
23964 | /* If we want bit 31, write a shift count of zero, not 32. */ | |
23965 | fprintf (file, "%d", i == 31 ? 0 : i + 1); | |
23966 | return; | |
23967 | ||
23968 | case 'T': | |
23969 | /* Print the symbolic name of a branch target register. */ | |
23970 | if (GET_CODE (x) != REG || (REGNO (x) != LR_REGNO | |
23971 | && REGNO (x) != CTR_REGNO)) | |
23972 | output_operand_lossage ("invalid %%T value"); | |
23973 | else if (REGNO (x) == LR_REGNO) | |
23974 | fputs ("lr", file); | |
23975 | else | |
23976 | fputs ("ctr", file); | |
23977 | return; | |
23978 | ||
23979 | case 'u': | |
23980 | /* High-order or low-order 16 bits of constant, whichever is non-zero, | |
23981 | for use in unsigned operand. */ | |
23982 | if (! INT_P (x)) | |
23983 | { | |
23984 | output_operand_lossage ("invalid %%u value"); | |
23985 | return; | |
23986 | } | |
23987 | ||
23988 | uval = INTVAL (x); | |
23989 | if ((uval & 0xffff) == 0) | |
23990 | uval >>= 16; | |
23991 | ||
23992 | fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff); | |
23993 | return; | |
23994 | ||
23995 | case 'v': | |
23996 | /* High-order 16 bits of constant for use in signed operand. */ | |
23997 | if (! INT_P (x)) | |
23998 | output_operand_lossage ("invalid %%v value"); | |
23999 | else | |
24000 | fprintf (file, HOST_WIDE_INT_PRINT_HEX, | |
24001 | (INTVAL (x) >> 16) & 0xffff); | |
24002 | return; | |
24003 | ||
24004 | case 'U': | |
24005 | /* Print `u' if this has an auto-increment or auto-decrement. */ | |
24006 | if (MEM_P (x) | |
24007 | && (GET_CODE (XEXP (x, 0)) == PRE_INC | |
24008 | || GET_CODE (XEXP (x, 0)) == PRE_DEC | |
24009 | || GET_CODE (XEXP (x, 0)) == PRE_MODIFY)) | |
24010 | putc ('u', file); | |
24011 | return; | |
24012 | ||
24013 | case 'V': | |
24014 | /* Print the trap code for this operand. */ | |
24015 | switch (GET_CODE (x)) | |
24016 | { | |
24017 | case EQ: | |
24018 | fputs ("eq", file); /* 4 */ | |
24019 | break; | |
24020 | case NE: | |
24021 | fputs ("ne", file); /* 24 */ | |
24022 | break; | |
24023 | case LT: | |
24024 | fputs ("lt", file); /* 16 */ | |
24025 | break; | |
24026 | case LE: | |
24027 | fputs ("le", file); /* 20 */ | |
24028 | break; | |
24029 | case GT: | |
24030 | fputs ("gt", file); /* 8 */ | |
24031 | break; | |
24032 | case GE: | |
24033 | fputs ("ge", file); /* 12 */ | |
24034 | break; | |
24035 | case LTU: | |
24036 | fputs ("llt", file); /* 2 */ | |
24037 | break; | |
24038 | case LEU: | |
24039 | fputs ("lle", file); /* 6 */ | |
24040 | break; | |
24041 | case GTU: | |
24042 | fputs ("lgt", file); /* 1 */ | |
24043 | break; | |
24044 | case GEU: | |
24045 | fputs ("lge", file); /* 5 */ | |
24046 | break; | |
24047 | default: | |
24048 | gcc_unreachable (); | |
24049 | } | |
24050 | break; | |
24051 | ||
24052 | case 'w': | |
24053 | /* If constant, low-order 16 bits of constant, signed. Otherwise, write | |
24054 | normally. */ | |
24055 | if (INT_P (x)) | |
24056 | fprintf (file, HOST_WIDE_INT_PRINT_DEC, | |
24057 | ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000); | |
24058 | else | |
24059 | print_operand (file, x, 0); | |
24060 | return; | |
24061 | ||
24062 | case 'x': | |
24063 | /* X is a FPR or Altivec register used in a VSX context. */ | |
24064 | if (GET_CODE (x) != REG || !VSX_REGNO_P (REGNO (x))) | |
24065 | output_operand_lossage ("invalid %%x value"); | |
24066 | else | |
24067 | { | |
24068 | int reg = REGNO (x); | |
24069 | int vsx_reg = (FP_REGNO_P (reg) | |
24070 | ? reg - 32 | |
24071 | : reg - FIRST_ALTIVEC_REGNO + 32); | |
24072 | ||
24073 | #ifdef TARGET_REGNAMES | |
24074 | if (TARGET_REGNAMES) | |
24075 | fprintf (file, "%%vs%d", vsx_reg); | |
24076 | else | |
24077 | #endif | |
24078 | fprintf (file, "%d", vsx_reg); | |
24079 | } | |
24080 | return; | |
24081 | ||
24082 | case 'X': | |
24083 | if (MEM_P (x) | |
24084 | && (legitimate_indexed_address_p (XEXP (x, 0), 0) | |
24085 | || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY | |
24086 | && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0)))) | |
24087 | putc ('x', file); | |
24088 | return; | |
24089 | ||
24090 | case 'Y': | |
24091 | /* Like 'L', for third word of TImode/PTImode */ | |
24092 | if (REG_P (x)) | |
24093 | fputs (reg_names[REGNO (x) + 2], file); | |
24094 | else if (MEM_P (x)) | |
24095 | { | |
24096 | machine_mode mode = GET_MODE (x); | |
24097 | if (GET_CODE (XEXP (x, 0)) == PRE_INC | |
24098 | || GET_CODE (XEXP (x, 0)) == PRE_DEC) | |
24099 | output_address (mode, plus_constant (Pmode, | |
24100 | XEXP (XEXP (x, 0), 0), 8)); | |
24101 | else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY) | |
24102 | output_address (mode, plus_constant (Pmode, | |
24103 | XEXP (XEXP (x, 0), 0), 8)); | |
24104 | else | |
24105 | output_address (mode, XEXP (adjust_address_nv (x, SImode, 8), 0)); | |
24106 | if (small_data_operand (x, GET_MODE (x))) | |
24107 | fprintf (file, "@%s(%s)", SMALL_DATA_RELOC, | |
24108 | reg_names[SMALL_DATA_REG]); | |
24109 | } | |
24110 | return; | |
24111 | ||
24112 | case 'z': | |
24113 | /* X is a SYMBOL_REF. Write out the name preceded by a | |
24114 | period and without any trailing data in brackets. Used for function | |
24115 | names. If we are configured for System V (or the embedded ABI) on | |
24116 | the PowerPC, do not emit the period, since those systems do not use | |
24117 | TOCs and the like. */ | |
24118 | gcc_assert (GET_CODE (x) == SYMBOL_REF); | |
24119 | ||
24120 | /* For macho, check to see if we need a stub. */ | |
24121 | if (TARGET_MACHO) | |
24122 | { | |
24123 | const char *name = XSTR (x, 0); | |
24124 | #if TARGET_MACHO | |
24125 | if (darwin_emit_branch_islands | |
24126 | && MACHOPIC_INDIRECT | |
24127 | && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION) | |
24128 | name = machopic_indirection_name (x, /*stub_p=*/true); | |
24129 | #endif | |
24130 | assemble_name (file, name); | |
24131 | } | |
24132 | else if (!DOT_SYMBOLS) | |
24133 | assemble_name (file, XSTR (x, 0)); | |
24134 | else | |
24135 | rs6000_output_function_entry (file, XSTR (x, 0)); | |
24136 | return; | |
24137 | ||
24138 | case 'Z': | |
24139 | /* Like 'L', for last word of TImode/PTImode. */ | |
24140 | if (REG_P (x)) | |
24141 | fputs (reg_names[REGNO (x) + 3], file); | |
24142 | else if (MEM_P (x)) | |
24143 | { | |
24144 | machine_mode mode = GET_MODE (x); | |
24145 | if (GET_CODE (XEXP (x, 0)) == PRE_INC | |
24146 | || GET_CODE (XEXP (x, 0)) == PRE_DEC) | |
24147 | output_address (mode, plus_constant (Pmode, | |
24148 | XEXP (XEXP (x, 0), 0), 12)); | |
24149 | else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY) | |
24150 | output_address (mode, plus_constant (Pmode, | |
24151 | XEXP (XEXP (x, 0), 0), 12)); | |
24152 | else | |
24153 | output_address (mode, XEXP (adjust_address_nv (x, SImode, 12), 0)); | |
24154 | if (small_data_operand (x, GET_MODE (x))) | |
24155 | fprintf (file, "@%s(%s)", SMALL_DATA_RELOC, | |
24156 | reg_names[SMALL_DATA_REG]); | |
24157 | } | |
24158 | return; | |
24159 | ||
24160 | /* Print AltiVec or SPE memory operand. */ | |
24161 | case 'y': | |
24162 | { | |
24163 | rtx tmp; | |
24164 | ||
24165 | gcc_assert (MEM_P (x)); | |
24166 | ||
24167 | tmp = XEXP (x, 0); | |
24168 | ||
24169 | /* Ugly hack because %y is overloaded. */ | |
24170 | if ((TARGET_SPE || TARGET_E500_DOUBLE) | |
24171 | && (GET_MODE_SIZE (GET_MODE (x)) == 8 | |
24172 | || FLOAT128_2REG_P (GET_MODE (x)) | |
24173 | || GET_MODE (x) == TImode | |
24174 | || GET_MODE (x) == PTImode)) | |
24175 | { | |
24176 | /* Handle [reg]. */ | |
24177 | if (REG_P (tmp)) | |
24178 | { | |
24179 | fprintf (file, "0(%s)", reg_names[REGNO (tmp)]); | |
24180 | break; | |
24181 | } | |
24182 | /* Handle [reg+UIMM]. */ | |
24183 | else if (GET_CODE (tmp) == PLUS && | |
24184 | GET_CODE (XEXP (tmp, 1)) == CONST_INT) | |
24185 | { | |
24186 | int x; | |
24187 | ||
24188 | gcc_assert (REG_P (XEXP (tmp, 0))); | |
24189 | ||
24190 | x = INTVAL (XEXP (tmp, 1)); | |
24191 | fprintf (file, "%d(%s)", x, reg_names[REGNO (XEXP (tmp, 0))]); | |
24192 | break; | |
24193 | } | |
24194 | ||
24195 | /* Fall through. Must be [reg+reg]. */ | |
24196 | } | |
24197 | if (VECTOR_MEM_ALTIVEC_P (GET_MODE (x)) | |
24198 | && GET_CODE (tmp) == AND | |
24199 | && GET_CODE (XEXP (tmp, 1)) == CONST_INT | |
24200 | && INTVAL (XEXP (tmp, 1)) == -16) | |
24201 | tmp = XEXP (tmp, 0); | |
24202 | else if (VECTOR_MEM_VSX_P (GET_MODE (x)) | |
24203 | && GET_CODE (tmp) == PRE_MODIFY) | |
24204 | tmp = XEXP (tmp, 1); | |
24205 | if (REG_P (tmp)) | |
24206 | fprintf (file, "0,%s", reg_names[REGNO (tmp)]); | |
24207 | else | |
24208 | { | |
24209 | if (GET_CODE (tmp) != PLUS | |
24210 | || !REG_P (XEXP (tmp, 0)) | |
24211 | || !REG_P (XEXP (tmp, 1))) | |
24212 | { | |
24213 | output_operand_lossage ("invalid %%y value, try using the 'Z' constraint"); | |
24214 | break; | |
24215 | } | |
24216 | ||
24217 | if (REGNO (XEXP (tmp, 0)) == 0) | |
24218 | fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ], | |
24219 | reg_names[ REGNO (XEXP (tmp, 0)) ]); | |
24220 | else | |
24221 | fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ], | |
24222 | reg_names[ REGNO (XEXP (tmp, 1)) ]); | |
24223 | } | |
24224 | break; | |
24225 | } | |
24226 | ||
24227 | case 0: | |
24228 | if (REG_P (x)) | |
24229 | fprintf (file, "%s", reg_names[REGNO (x)]); | |
24230 | else if (MEM_P (x)) | |
24231 | { | |
24232 | /* We need to handle PRE_INC and PRE_DEC here, since we need to | |
24233 | know the width from the mode. */ | |
24234 | if (GET_CODE (XEXP (x, 0)) == PRE_INC) | |
24235 | fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)), | |
24236 | reg_names[REGNO (XEXP (XEXP (x, 0), 0))]); | |
24237 | else if (GET_CODE (XEXP (x, 0)) == PRE_DEC) | |
24238 | fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)), | |
24239 | reg_names[REGNO (XEXP (XEXP (x, 0), 0))]); | |
24240 | else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY) | |
24241 | output_address (GET_MODE (x), XEXP (XEXP (x, 0), 1)); | |
24242 | else | |
24243 | output_address (GET_MODE (x), XEXP (x, 0)); | |
24244 | } | |
24245 | else | |
24246 | { | |
24247 | if (toc_relative_expr_p (x, false)) | |
24248 | /* This hack along with a corresponding hack in | |
24249 | rs6000_output_addr_const_extra arranges to output addends | |
24250 | where the assembler expects to find them. eg. | |
24251 | (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4) | |
24252 | without this hack would be output as "x@toc+4". We | |
24253 | want "x+4@toc". */ | |
24254 | output_addr_const (file, CONST_CAST_RTX (tocrel_base)); | |
24255 | else | |
24256 | output_addr_const (file, x); | |
24257 | } | |
24258 | return; | |
24259 | ||
24260 | case '&': | |
24261 | if (const char *name = get_some_local_dynamic_name ()) | |
24262 | assemble_name (file, name); | |
24263 | else | |
24264 | output_operand_lossage ("'%%&' used without any " | |
24265 | "local dynamic TLS references"); | |
24266 | return; | |
24267 | ||
24268 | default: | |
24269 | output_operand_lossage ("invalid %%xn code"); | |
24270 | } | |
24271 | } | |
24272 | \f | |
24273 | /* Print the address of an operand. */ | |
24274 | ||
24275 | void | |
24276 | print_operand_address (FILE *file, rtx x) | |
24277 | { | |
24278 | if (REG_P (x)) | |
24279 | fprintf (file, "0(%s)", reg_names[ REGNO (x) ]); | |
24280 | else if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST | |
24281 | || GET_CODE (x) == LABEL_REF) | |
24282 | { | |
24283 | output_addr_const (file, x); | |
24284 | if (small_data_operand (x, GET_MODE (x))) | |
24285 | fprintf (file, "@%s(%s)", SMALL_DATA_RELOC, | |
24286 | reg_names[SMALL_DATA_REG]); | |
24287 | else | |
24288 | gcc_assert (!TARGET_TOC); | |
24289 | } | |
24290 | else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0)) | |
24291 | && REG_P (XEXP (x, 1))) | |
24292 | { | |
24293 | if (REGNO (XEXP (x, 0)) == 0) | |
24294 | fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ], | |
24295 | reg_names[ REGNO (XEXP (x, 0)) ]); | |
24296 | else | |
24297 | fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ], | |
24298 | reg_names[ REGNO (XEXP (x, 1)) ]); | |
24299 | } | |
24300 | else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0)) | |
24301 | && GET_CODE (XEXP (x, 1)) == CONST_INT) | |
24302 | fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)", | |
24303 | INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]); | |
24304 | #if TARGET_MACHO | |
24305 | else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0)) | |
24306 | && CONSTANT_P (XEXP (x, 1))) | |
24307 | { | |
24308 | fprintf (file, "lo16("); | |
24309 | output_addr_const (file, XEXP (x, 1)); | |
24310 | fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]); | |
24311 | } | |
24312 | #endif | |
24313 | #if TARGET_ELF | |
24314 | else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0)) | |
24315 | && CONSTANT_P (XEXP (x, 1))) | |
24316 | { | |
24317 | output_addr_const (file, XEXP (x, 1)); | |
24318 | fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]); | |
24319 | } | |
24320 | #endif | |
24321 | else if (toc_relative_expr_p (x, false)) | |
24322 | { | |
24323 | /* This hack along with a corresponding hack in | |
24324 | rs6000_output_addr_const_extra arranges to output addends | |
24325 | where the assembler expects to find them. eg. | |
24326 | (lo_sum (reg 9) | |
24327 | . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8)) | |
24328 | without this hack would be output as "x@toc+8@l(9)". We | |
24329 | want "x+8@toc@l(9)". */ | |
24330 | output_addr_const (file, CONST_CAST_RTX (tocrel_base)); | |
24331 | if (GET_CODE (x) == LO_SUM) | |
24332 | fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]); | |
24333 | else | |
24334 | fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base, 0, 1))]); | |
24335 | } | |
24336 | else | |
24337 | gcc_unreachable (); | |
24338 | } | |
24339 | \f | |
24340 | /* Implement TARGET_OUTPUT_ADDR_CONST_EXTRA. */ | |
24341 | ||
24342 | static bool | |
24343 | rs6000_output_addr_const_extra (FILE *file, rtx x) | |
24344 | { | |
24345 | if (GET_CODE (x) == UNSPEC) | |
24346 | switch (XINT (x, 1)) | |
24347 | { | |
24348 | case UNSPEC_TOCREL: | |
24349 | gcc_checking_assert (GET_CODE (XVECEXP (x, 0, 0)) == SYMBOL_REF | |
24350 | && REG_P (XVECEXP (x, 0, 1)) | |
24351 | && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER); | |
24352 | output_addr_const (file, XVECEXP (x, 0, 0)); | |
24353 | if (x == tocrel_base && tocrel_offset != const0_rtx) | |
24354 | { | |
24355 | if (INTVAL (tocrel_offset) >= 0) | |
24356 | fprintf (file, "+"); | |
24357 | output_addr_const (file, CONST_CAST_RTX (tocrel_offset)); | |
24358 | } | |
24359 | if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC)) | |
24360 | { | |
24361 | putc ('-', file); | |
24362 | assemble_name (file, toc_label_name); | |
24363 | need_toc_init = 1; | |
24364 | } | |
24365 | else if (TARGET_ELF) | |
24366 | fputs ("@toc", file); | |
24367 | return true; | |
24368 | ||
24369 | #if TARGET_MACHO | |
24370 | case UNSPEC_MACHOPIC_OFFSET: | |
24371 | output_addr_const (file, XVECEXP (x, 0, 0)); | |
24372 | putc ('-', file); | |
24373 | machopic_output_function_base_name (file); | |
24374 | return true; | |
24375 | #endif | |
24376 | } | |
24377 | return false; | |
24378 | } | |
24379 | \f | |
24380 | /* Target hook for assembling integer objects. The PowerPC version has | |
24381 | to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP | |
24382 | is defined. It also needs to handle DI-mode objects on 64-bit | |
24383 | targets. */ | |
24384 | ||
24385 | static bool | |
24386 | rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p) | |
24387 | { | |
24388 | #ifdef RELOCATABLE_NEEDS_FIXUP | |
24389 | /* Special handling for SI values. */ | |
24390 | if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p) | |
24391 | { | |
24392 | static int recurse = 0; | |
24393 | ||
24394 | /* For -mrelocatable, we mark all addresses that need to be fixed up in | |
24395 | the .fixup section. Since the TOC section is already relocated, we | |
24396 | don't need to mark it here. We used to skip the text section, but it | |
24397 | should never be valid for relocated addresses to be placed in the text | |
24398 | section. */ | |
24399 | if (DEFAULT_ABI == ABI_V4 | |
24400 | && (TARGET_RELOCATABLE || flag_pic > 1) | |
24401 | && in_section != toc_section | |
24402 | && !recurse | |
24403 | && !CONST_SCALAR_INT_P (x) | |
24404 | && CONSTANT_P (x)) | |
24405 | { | |
24406 | char buf[256]; | |
24407 | ||
24408 | recurse = 1; | |
24409 | ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno); | |
24410 | fixuplabelno++; | |
24411 | ASM_OUTPUT_LABEL (asm_out_file, buf); | |
24412 | fprintf (asm_out_file, "\t.long\t("); | |
24413 | output_addr_const (asm_out_file, x); | |
24414 | fprintf (asm_out_file, ")@fixup\n"); | |
24415 | fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n"); | |
24416 | ASM_OUTPUT_ALIGN (asm_out_file, 2); | |
24417 | fprintf (asm_out_file, "\t.long\t"); | |
24418 | assemble_name (asm_out_file, buf); | |
24419 | fprintf (asm_out_file, "\n\t.previous\n"); | |
24420 | recurse = 0; | |
24421 | return true; | |
24422 | } | |
24423 | /* Remove initial .'s to turn a -mcall-aixdesc function | |
24424 | address into the address of the descriptor, not the function | |
24425 | itself. */ | |
24426 | else if (GET_CODE (x) == SYMBOL_REF | |
24427 | && XSTR (x, 0)[0] == '.' | |
24428 | && DEFAULT_ABI == ABI_AIX) | |
24429 | { | |
24430 | const char *name = XSTR (x, 0); | |
24431 | while (*name == '.') | |
24432 | name++; | |
24433 | ||
24434 | fprintf (asm_out_file, "\t.long\t%s\n", name); | |
24435 | return true; | |
24436 | } | |
24437 | } | |
24438 | #endif /* RELOCATABLE_NEEDS_FIXUP */ | |
24439 | return default_assemble_integer (x, size, aligned_p); | |
24440 | } | |
24441 | ||
24442 | #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO | |
24443 | /* Emit an assembler directive to set symbol visibility for DECL to | |
24444 | VISIBILITY_TYPE. */ | |
24445 | ||
24446 | static void | |
24447 | rs6000_assemble_visibility (tree decl, int vis) | |
24448 | { | |
24449 | if (TARGET_XCOFF) | |
24450 | return; | |
24451 | ||
24452 | /* Functions need to have their entry point symbol visibility set as | |
24453 | well as their descriptor symbol visibility. */ | |
24454 | if (DEFAULT_ABI == ABI_AIX | |
24455 | && DOT_SYMBOLS | |
24456 | && TREE_CODE (decl) == FUNCTION_DECL) | |
24457 | { | |
24458 | static const char * const visibility_types[] = { | |
24459 | NULL, "protected", "hidden", "internal" | |
24460 | }; | |
24461 | ||
24462 | const char *name, *type; | |
24463 | ||
24464 | name = ((* targetm.strip_name_encoding) | |
24465 | (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)))); | |
24466 | type = visibility_types[vis]; | |
24467 | ||
24468 | fprintf (asm_out_file, "\t.%s\t%s\n", type, name); | |
24469 | fprintf (asm_out_file, "\t.%s\t.%s\n", type, name); | |
24470 | } | |
24471 | else | |
24472 | default_assemble_visibility (decl, vis); | |
24473 | } | |
24474 | #endif | |
24475 | \f | |
24476 | enum rtx_code | |
24477 | rs6000_reverse_condition (machine_mode mode, enum rtx_code code) | |
24478 | { | |
24479 | /* Reversal of FP compares takes care -- an ordered compare | |
24480 | becomes an unordered compare and vice versa. */ | |
24481 | if (mode == CCFPmode | |
24482 | && (!flag_finite_math_only | |
24483 | || code == UNLT || code == UNLE || code == UNGT || code == UNGE | |
24484 | || code == UNEQ || code == LTGT)) | |
24485 | return reverse_condition_maybe_unordered (code); | |
24486 | else | |
24487 | return reverse_condition (code); | |
24488 | } | |
24489 | ||
24490 | /* Generate a compare for CODE. Return a brand-new rtx that | |
24491 | represents the result of the compare. */ | |
24492 | ||
24493 | static rtx | |
24494 | rs6000_generate_compare (rtx cmp, machine_mode mode) | |
24495 | { | |
24496 | machine_mode comp_mode; | |
24497 | rtx compare_result; | |
24498 | enum rtx_code code = GET_CODE (cmp); | |
24499 | rtx op0 = XEXP (cmp, 0); | |
24500 | rtx op1 = XEXP (cmp, 1); | |
24501 | ||
24502 | if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode)) | |
24503 | comp_mode = CCmode; | |
24504 | else if (FLOAT_MODE_P (mode)) | |
24505 | comp_mode = CCFPmode; | |
24506 | else if (code == GTU || code == LTU | |
24507 | || code == GEU || code == LEU) | |
24508 | comp_mode = CCUNSmode; | |
24509 | else if ((code == EQ || code == NE) | |
24510 | && unsigned_reg_p (op0) | |
24511 | && (unsigned_reg_p (op1) | |
24512 | || (CONST_INT_P (op1) && INTVAL (op1) != 0))) | |
24513 | /* These are unsigned values, perhaps there will be a later | |
24514 | ordering compare that can be shared with this one. */ | |
24515 | comp_mode = CCUNSmode; | |
24516 | else | |
24517 | comp_mode = CCmode; | |
24518 | ||
24519 | /* If we have an unsigned compare, make sure we don't have a signed value as | |
24520 | an immediate. */ | |
24521 | if (comp_mode == CCUNSmode && GET_CODE (op1) == CONST_INT | |
24522 | && INTVAL (op1) < 0) | |
24523 | { | |
24524 | op0 = copy_rtx_if_shared (op0); | |
24525 | op1 = force_reg (GET_MODE (op0), op1); | |
24526 | cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1); | |
24527 | } | |
24528 | ||
24529 | /* First, the compare. */ | |
24530 | compare_result = gen_reg_rtx (comp_mode); | |
24531 | ||
24532 | /* E500 FP compare instructions on the GPRs. Yuck! */ | |
24533 | if ((!TARGET_FPRS && TARGET_HARD_FLOAT) | |
24534 | && FLOAT_MODE_P (mode)) | |
24535 | { | |
24536 | rtx cmp, or_result, compare_result2; | |
24537 | machine_mode op_mode = GET_MODE (op0); | |
24538 | bool reverse_p; | |
24539 | ||
24540 | if (op_mode == VOIDmode) | |
24541 | op_mode = GET_MODE (op1); | |
24542 | ||
24543 | /* First reverse the condition codes that aren't directly supported. */ | |
24544 | switch (code) | |
24545 | { | |
24546 | case NE: | |
24547 | case UNLT: | |
24548 | case UNLE: | |
24549 | case UNGT: | |
24550 | case UNGE: | |
24551 | code = reverse_condition_maybe_unordered (code); | |
24552 | reverse_p = true; | |
24553 | break; | |
24554 | ||
24555 | case EQ: | |
24556 | case LT: | |
24557 | case LE: | |
24558 | case GT: | |
24559 | case GE: | |
24560 | reverse_p = false; | |
24561 | break; | |
24562 | ||
24563 | default: | |
24564 | gcc_unreachable (); | |
24565 | } | |
24566 | ||
24567 | /* The E500 FP compare instructions toggle the GT bit (CR bit 1) only. | |
24568 | This explains the following mess. */ | |
24569 | ||
24570 | switch (code) | |
24571 | { | |
24572 | case EQ: | |
24573 | switch (op_mode) | |
24574 | { | |
4e10a5a7 | 24575 | case E_SFmode: |
83349046 SB |
24576 | cmp = (flag_finite_math_only && !flag_trapping_math) |
24577 | ? gen_tstsfeq_gpr (compare_result, op0, op1) | |
24578 | : gen_cmpsfeq_gpr (compare_result, op0, op1); | |
24579 | break; | |
24580 | ||
4e10a5a7 | 24581 | case E_DFmode: |
83349046 SB |
24582 | cmp = (flag_finite_math_only && !flag_trapping_math) |
24583 | ? gen_tstdfeq_gpr (compare_result, op0, op1) | |
24584 | : gen_cmpdfeq_gpr (compare_result, op0, op1); | |
24585 | break; | |
24586 | ||
4e10a5a7 RS |
24587 | case E_TFmode: |
24588 | case E_IFmode: | |
24589 | case E_KFmode: | |
83349046 SB |
24590 | cmp = (flag_finite_math_only && !flag_trapping_math) |
24591 | ? gen_tsttfeq_gpr (compare_result, op0, op1) | |
24592 | : gen_cmptfeq_gpr (compare_result, op0, op1); | |
24593 | break; | |
24594 | ||
24595 | default: | |
24596 | gcc_unreachable (); | |
24597 | } | |
24598 | break; | |
24599 | ||
24600 | case GT: | |
24601 | case GE: | |
24602 | switch (op_mode) | |
24603 | { | |
4e10a5a7 | 24604 | case E_SFmode: |
83349046 SB |
24605 | cmp = (flag_finite_math_only && !flag_trapping_math) |
24606 | ? gen_tstsfgt_gpr (compare_result, op0, op1) | |
24607 | : gen_cmpsfgt_gpr (compare_result, op0, op1); | |
24608 | break; | |
24609 | ||
4e10a5a7 | 24610 | case E_DFmode: |
83349046 SB |
24611 | cmp = (flag_finite_math_only && !flag_trapping_math) |
24612 | ? gen_tstdfgt_gpr (compare_result, op0, op1) | |
24613 | : gen_cmpdfgt_gpr (compare_result, op0, op1); | |
24614 | break; | |
24615 | ||
4e10a5a7 RS |
24616 | case E_TFmode: |
24617 | case E_IFmode: | |
24618 | case E_KFmode: | |
83349046 SB |
24619 | cmp = (flag_finite_math_only && !flag_trapping_math) |
24620 | ? gen_tsttfgt_gpr (compare_result, op0, op1) | |
24621 | : gen_cmptfgt_gpr (compare_result, op0, op1); | |
24622 | break; | |
24623 | ||
24624 | default: | |
24625 | gcc_unreachable (); | |
24626 | } | |
24627 | break; | |
24628 | ||
24629 | case LT: | |
24630 | case LE: | |
24631 | switch (op_mode) | |
24632 | { | |
4e10a5a7 | 24633 | case E_SFmode: |
83349046 SB |
24634 | cmp = (flag_finite_math_only && !flag_trapping_math) |
24635 | ? gen_tstsflt_gpr (compare_result, op0, op1) | |
24636 | : gen_cmpsflt_gpr (compare_result, op0, op1); | |
24637 | break; | |
24638 | ||
4e10a5a7 | 24639 | case E_DFmode: |
83349046 SB |
24640 | cmp = (flag_finite_math_only && !flag_trapping_math) |
24641 | ? gen_tstdflt_gpr (compare_result, op0, op1) | |
24642 | : gen_cmpdflt_gpr (compare_result, op0, op1); | |
24643 | break; | |
24644 | ||
4e10a5a7 RS |
24645 | case E_TFmode: |
24646 | case E_IFmode: | |
24647 | case E_KFmode: | |
83349046 SB |
24648 | cmp = (flag_finite_math_only && !flag_trapping_math) |
24649 | ? gen_tsttflt_gpr (compare_result, op0, op1) | |
24650 | : gen_cmptflt_gpr (compare_result, op0, op1); | |
24651 | break; | |
24652 | ||
24653 | default: | |
24654 | gcc_unreachable (); | |
24655 | } | |
24656 | break; | |
24657 | ||
24658 | default: | |
24659 | gcc_unreachable (); | |
24660 | } | |
24661 | ||
24662 | /* Synthesize LE and GE from LT/GT || EQ. */ | |
24663 | if (code == LE || code == GE) | |
24664 | { | |
24665 | emit_insn (cmp); | |
24666 | ||
24667 | compare_result2 = gen_reg_rtx (CCFPmode); | |
24668 | ||
24669 | /* Do the EQ. */ | |
24670 | switch (op_mode) | |
24671 | { | |
4e10a5a7 | 24672 | case E_SFmode: |
83349046 SB |
24673 | cmp = (flag_finite_math_only && !flag_trapping_math) |
24674 | ? gen_tstsfeq_gpr (compare_result2, op0, op1) | |
24675 | : gen_cmpsfeq_gpr (compare_result2, op0, op1); | |
24676 | break; | |
24677 | ||
4e10a5a7 | 24678 | case E_DFmode: |
83349046 SB |
24679 | cmp = (flag_finite_math_only && !flag_trapping_math) |
24680 | ? gen_tstdfeq_gpr (compare_result2, op0, op1) | |
24681 | : gen_cmpdfeq_gpr (compare_result2, op0, op1); | |
24682 | break; | |
24683 | ||
4e10a5a7 RS |
24684 | case E_TFmode: |
24685 | case E_IFmode: | |
24686 | case E_KFmode: | |
83349046 SB |
24687 | cmp = (flag_finite_math_only && !flag_trapping_math) |
24688 | ? gen_tsttfeq_gpr (compare_result2, op0, op1) | |
24689 | : gen_cmptfeq_gpr (compare_result2, op0, op1); | |
24690 | break; | |
24691 | ||
24692 | default: | |
24693 | gcc_unreachable (); | |
24694 | } | |
24695 | ||
24696 | emit_insn (cmp); | |
24697 | ||
24698 | /* OR them together. */ | |
24699 | or_result = gen_reg_rtx (CCFPmode); | |
24700 | cmp = gen_e500_cr_ior_compare (or_result, compare_result, | |
24701 | compare_result2); | |
24702 | compare_result = or_result; | |
24703 | } | |
24704 | ||
24705 | code = reverse_p ? NE : EQ; | |
24706 | ||
24707 | emit_insn (cmp); | |
24708 | } | |
24709 | ||
24710 | /* IEEE 128-bit support in VSX registers when we do not have hardware | |
24711 | support. */ | |
24712 | else if (!TARGET_FLOAT128_HW && FLOAT128_VECTOR_P (mode)) | |
24713 | { | |
24714 | rtx libfunc = NULL_RTX; | |
24715 | bool check_nan = false; | |
24716 | rtx dest; | |
24717 | ||
24718 | switch (code) | |
24719 | { | |
24720 | case EQ: | |
24721 | case NE: | |
24722 | libfunc = optab_libfunc (eq_optab, mode); | |
24723 | break; | |
24724 | ||
24725 | case GT: | |
24726 | case GE: | |
24727 | libfunc = optab_libfunc (ge_optab, mode); | |
24728 | break; | |
24729 | ||
24730 | case LT: | |
24731 | case LE: | |
24732 | libfunc = optab_libfunc (le_optab, mode); | |
24733 | break; | |
24734 | ||
24735 | case UNORDERED: | |
24736 | case ORDERED: | |
24737 | libfunc = optab_libfunc (unord_optab, mode); | |
24738 | code = (code == UNORDERED) ? NE : EQ; | |
24739 | break; | |
24740 | ||
24741 | case UNGE: | |
24742 | case UNGT: | |
24743 | check_nan = true; | |
24744 | libfunc = optab_libfunc (ge_optab, mode); | |
24745 | code = (code == UNGE) ? GE : GT; | |
24746 | break; | |
24747 | ||
24748 | case UNLE: | |
24749 | case UNLT: | |
24750 | check_nan = true; | |
24751 | libfunc = optab_libfunc (le_optab, mode); | |
24752 | code = (code == UNLE) ? LE : LT; | |
24753 | break; | |
24754 | ||
24755 | case UNEQ: | |
24756 | case LTGT: | |
24757 | check_nan = true; | |
24758 | libfunc = optab_libfunc (eq_optab, mode); | |
24759 | code = (code = UNEQ) ? EQ : NE; | |
24760 | break; | |
24761 | ||
24762 | default: | |
24763 | gcc_unreachable (); | |
24764 | } | |
24765 | ||
24766 | gcc_assert (libfunc); | |
24767 | ||
24768 | if (!check_nan) | |
24769 | dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST, | |
db69559b | 24770 | SImode, op0, mode, op1, mode); |
83349046 SB |
24771 | |
24772 | /* The library signals an exception for signalling NaNs, so we need to | |
24773 | handle isgreater, etc. by first checking isordered. */ | |
24774 | else | |
24775 | { | |
24776 | rtx ne_rtx, normal_dest, unord_dest; | |
24777 | rtx unord_func = optab_libfunc (unord_optab, mode); | |
24778 | rtx join_label = gen_label_rtx (); | |
24779 | rtx join_ref = gen_rtx_LABEL_REF (VOIDmode, join_label); | |
24780 | rtx unord_cmp = gen_reg_rtx (comp_mode); | |
24781 | ||
24782 | ||
24783 | /* Test for either value being a NaN. */ | |
24784 | gcc_assert (unord_func); | |
24785 | unord_dest = emit_library_call_value (unord_func, NULL_RTX, LCT_CONST, | |
db69559b | 24786 | SImode, op0, mode, op1, mode); |
83349046 SB |
24787 | |
24788 | /* Set value (0) if either value is a NaN, and jump to the join | |
24789 | label. */ | |
24790 | dest = gen_reg_rtx (SImode); | |
24791 | emit_move_insn (dest, const1_rtx); | |
24792 | emit_insn (gen_rtx_SET (unord_cmp, | |
24793 | gen_rtx_COMPARE (comp_mode, unord_dest, | |
24794 | const0_rtx))); | |
24795 | ||
24796 | ne_rtx = gen_rtx_NE (comp_mode, unord_cmp, const0_rtx); | |
24797 | emit_jump_insn (gen_rtx_SET (pc_rtx, | |
24798 | gen_rtx_IF_THEN_ELSE (VOIDmode, ne_rtx, | |
24799 | join_ref, | |
24800 | pc_rtx))); | |
24801 | ||
24802 | /* Do the normal comparison, knowing that the values are not | |
24803 | NaNs. */ | |
24804 | normal_dest = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST, | |
db69559b | 24805 | SImode, op0, mode, op1, mode); |
83349046 SB |
24806 | |
24807 | emit_insn (gen_cstoresi4 (dest, | |
24808 | gen_rtx_fmt_ee (code, SImode, normal_dest, | |
24809 | const0_rtx), | |
24810 | normal_dest, const0_rtx)); | |
24811 | ||
24812 | /* Join NaN and non-Nan paths. Compare dest against 0. */ | |
24813 | emit_label (join_label); | |
24814 | code = NE; | |
24815 | } | |
24816 | ||
24817 | emit_insn (gen_rtx_SET (compare_result, | |
24818 | gen_rtx_COMPARE (comp_mode, dest, const0_rtx))); | |
24819 | } | |
24820 | ||
24821 | else | |
24822 | { | |
24823 | /* Generate XLC-compatible TFmode compare as PARALLEL with extra | |
24824 | CLOBBERs to match cmptf_internal2 pattern. */ | |
24825 | if (comp_mode == CCFPmode && TARGET_XL_COMPAT | |
24826 | && FLOAT128_IBM_P (GET_MODE (op0)) | |
24827 | && TARGET_HARD_FLOAT && TARGET_FPRS) | |
24828 | emit_insn (gen_rtx_PARALLEL (VOIDmode, | |
24829 | gen_rtvec (10, | |
24830 | gen_rtx_SET (compare_result, | |
24831 | gen_rtx_COMPARE (comp_mode, op0, op1)), | |
24832 | gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)), | |
24833 | gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)), | |
24834 | gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)), | |
24835 | gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)), | |
24836 | gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)), | |
24837 | gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)), | |
24838 | gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)), | |
24839 | gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)), | |
24840 | gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode))))); | |
24841 | else if (GET_CODE (op1) == UNSPEC | |
24842 | && XINT (op1, 1) == UNSPEC_SP_TEST) | |
24843 | { | |
24844 | rtx op1b = XVECEXP (op1, 0, 0); | |
24845 | comp_mode = CCEQmode; | |
24846 | compare_result = gen_reg_rtx (CCEQmode); | |
24847 | if (TARGET_64BIT) | |
24848 | emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b)); | |
24849 | else | |
24850 | emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b)); | |
24851 | } | |
24852 | else | |
24853 | emit_insn (gen_rtx_SET (compare_result, | |
24854 | gen_rtx_COMPARE (comp_mode, op0, op1))); | |
24855 | } | |
24856 | ||
24857 | /* Some kinds of FP comparisons need an OR operation; | |
24858 | under flag_finite_math_only we don't bother. */ | |
24859 | if (FLOAT_MODE_P (mode) | |
24860 | && (!FLOAT128_IEEE_P (mode) || TARGET_FLOAT128_HW) | |
24861 | && !flag_finite_math_only | |
24862 | && !(TARGET_HARD_FLOAT && !TARGET_FPRS) | |
24863 | && (code == LE || code == GE | |
24864 | || code == UNEQ || code == LTGT | |
24865 | || code == UNGT || code == UNLT)) | |
24866 | { | |
24867 | enum rtx_code or1, or2; | |
24868 | rtx or1_rtx, or2_rtx, compare2_rtx; | |
24869 | rtx or_result = gen_reg_rtx (CCEQmode); | |
24870 | ||
24871 | switch (code) | |
24872 | { | |
24873 | case LE: or1 = LT; or2 = EQ; break; | |
24874 | case GE: or1 = GT; or2 = EQ; break; | |
24875 | case UNEQ: or1 = UNORDERED; or2 = EQ; break; | |
24876 | case LTGT: or1 = LT; or2 = GT; break; | |
24877 | case UNGT: or1 = UNORDERED; or2 = GT; break; | |
24878 | case UNLT: or1 = UNORDERED; or2 = LT; break; | |
24879 | default: gcc_unreachable (); | |
24880 | } | |
24881 | validate_condition_mode (or1, comp_mode); | |
24882 | validate_condition_mode (or2, comp_mode); | |
24883 | or1_rtx = gen_rtx_fmt_ee (or1, SImode, compare_result, const0_rtx); | |
24884 | or2_rtx = gen_rtx_fmt_ee (or2, SImode, compare_result, const0_rtx); | |
24885 | compare2_rtx = gen_rtx_COMPARE (CCEQmode, | |
24886 | gen_rtx_IOR (SImode, or1_rtx, or2_rtx), | |
24887 | const_true_rtx); | |
24888 | emit_insn (gen_rtx_SET (or_result, compare2_rtx)); | |
24889 | ||
24890 | compare_result = or_result; | |
24891 | code = EQ; | |
24892 | } | |
24893 | ||
24894 | validate_condition_mode (code, GET_MODE (compare_result)); | |
24895 | ||
24896 | return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx); | |
24897 | } | |
24898 | ||
24899 | \f | |
24900 | /* Return the diagnostic message string if the binary operation OP is | |
24901 | not permitted on TYPE1 and TYPE2, NULL otherwise. */ | |
24902 | ||
24903 | static const char* | |
24904 | rs6000_invalid_binary_op (int op ATTRIBUTE_UNUSED, | |
24905 | const_tree type1, | |
24906 | const_tree type2) | |
24907 | { | |
b8506a8a RS |
24908 | machine_mode mode1 = TYPE_MODE (type1); |
24909 | machine_mode mode2 = TYPE_MODE (type2); | |
83349046 SB |
24910 | |
24911 | /* For complex modes, use the inner type. */ | |
24912 | if (COMPLEX_MODE_P (mode1)) | |
24913 | mode1 = GET_MODE_INNER (mode1); | |
24914 | ||
24915 | if (COMPLEX_MODE_P (mode2)) | |
24916 | mode2 = GET_MODE_INNER (mode2); | |
24917 | ||
24918 | /* Don't allow IEEE 754R 128-bit binary floating point and IBM extended | |
24919 | double to intermix unless -mfloat128-convert. */ | |
24920 | if (mode1 == mode2) | |
24921 | return NULL; | |
24922 | ||
24923 | if (!TARGET_FLOAT128_CVT) | |
24924 | { | |
24925 | if ((mode1 == KFmode && mode2 == IFmode) | |
24926 | || (mode1 == IFmode && mode2 == KFmode)) | |
24927 | return N_("__float128 and __ibm128 cannot be used in the same " | |
24928 | "expression"); | |
24929 | ||
24930 | if (TARGET_IEEEQUAD | |
24931 | && ((mode1 == IFmode && mode2 == TFmode) | |
24932 | || (mode1 == TFmode && mode2 == IFmode))) | |
24933 | return N_("__ibm128 and long double cannot be used in the same " | |
24934 | "expression"); | |
24935 | ||
24936 | if (!TARGET_IEEEQUAD | |
24937 | && ((mode1 == KFmode && mode2 == TFmode) | |
24938 | || (mode1 == TFmode && mode2 == KFmode))) | |
24939 | return N_("__float128 and long double cannot be used in the same " | |
24940 | "expression"); | |
24941 | } | |
24942 | ||
24943 | return NULL; | |
24944 | } | |
24945 | ||
24946 | \f | |
24947 | /* Expand floating point conversion to/from __float128 and __ibm128. */ | |
24948 | ||
24949 | void | |
24950 | rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p) | |
24951 | { | |
24952 | machine_mode dest_mode = GET_MODE (dest); | |
24953 | machine_mode src_mode = GET_MODE (src); | |
24954 | convert_optab cvt = unknown_optab; | |
24955 | bool do_move = false; | |
24956 | rtx libfunc = NULL_RTX; | |
24957 | rtx dest2; | |
24958 | typedef rtx (*rtx_2func_t) (rtx, rtx); | |
24959 | rtx_2func_t hw_convert = (rtx_2func_t)0; | |
24960 | size_t kf_or_tf; | |
24961 | ||
24962 | struct hw_conv_t { | |
24963 | rtx_2func_t from_df; | |
24964 | rtx_2func_t from_sf; | |
24965 | rtx_2func_t from_si_sign; | |
24966 | rtx_2func_t from_si_uns; | |
24967 | rtx_2func_t from_di_sign; | |
24968 | rtx_2func_t from_di_uns; | |
24969 | rtx_2func_t to_df; | |
24970 | rtx_2func_t to_sf; | |
24971 | rtx_2func_t to_si_sign; | |
24972 | rtx_2func_t to_si_uns; | |
24973 | rtx_2func_t to_di_sign; | |
24974 | rtx_2func_t to_di_uns; | |
24975 | } hw_conversions[2] = { | |
24976 | /* convertions to/from KFmode */ | |
24977 | { | |
24978 | gen_extenddfkf2_hw, /* KFmode <- DFmode. */ | |
24979 | gen_extendsfkf2_hw, /* KFmode <- SFmode. */ | |
24980 | gen_float_kfsi2_hw, /* KFmode <- SImode (signed). */ | |
24981 | gen_floatuns_kfsi2_hw, /* KFmode <- SImode (unsigned). */ | |
24982 | gen_float_kfdi2_hw, /* KFmode <- DImode (signed). */ | |
24983 | gen_floatuns_kfdi2_hw, /* KFmode <- DImode (unsigned). */ | |
24984 | gen_trunckfdf2_hw, /* DFmode <- KFmode. */ | |
24985 | gen_trunckfsf2_hw, /* SFmode <- KFmode. */ | |
24986 | gen_fix_kfsi2_hw, /* SImode <- KFmode (signed). */ | |
24987 | gen_fixuns_kfsi2_hw, /* SImode <- KFmode (unsigned). */ | |
24988 | gen_fix_kfdi2_hw, /* DImode <- KFmode (signed). */ | |
24989 | gen_fixuns_kfdi2_hw, /* DImode <- KFmode (unsigned). */ | |
24990 | }, | |
24991 | ||
24992 | /* convertions to/from TFmode */ | |
24993 | { | |
24994 | gen_extenddftf2_hw, /* TFmode <- DFmode. */ | |
24995 | gen_extendsftf2_hw, /* TFmode <- SFmode. */ | |
24996 | gen_float_tfsi2_hw, /* TFmode <- SImode (signed). */ | |
24997 | gen_floatuns_tfsi2_hw, /* TFmode <- SImode (unsigned). */ | |
24998 | gen_float_tfdi2_hw, /* TFmode <- DImode (signed). */ | |
24999 | gen_floatuns_tfdi2_hw, /* TFmode <- DImode (unsigned). */ | |
25000 | gen_trunctfdf2_hw, /* DFmode <- TFmode. */ | |
25001 | gen_trunctfsf2_hw, /* SFmode <- TFmode. */ | |
25002 | gen_fix_tfsi2_hw, /* SImode <- TFmode (signed). */ | |
25003 | gen_fixuns_tfsi2_hw, /* SImode <- TFmode (unsigned). */ | |
25004 | gen_fix_tfdi2_hw, /* DImode <- TFmode (signed). */ | |
25005 | gen_fixuns_tfdi2_hw, /* DImode <- TFmode (unsigned). */ | |
25006 | }, | |
25007 | }; | |
25008 | ||
25009 | if (dest_mode == src_mode) | |
25010 | gcc_unreachable (); | |
25011 | ||
25012 | /* Eliminate memory operations. */ | |
25013 | if (MEM_P (src)) | |
25014 | src = force_reg (src_mode, src); | |
25015 | ||
25016 | if (MEM_P (dest)) | |
25017 | { | |
25018 | rtx tmp = gen_reg_rtx (dest_mode); | |
25019 | rs6000_expand_float128_convert (tmp, src, unsigned_p); | |
25020 | rs6000_emit_move (dest, tmp, dest_mode); | |
25021 | return; | |
25022 | } | |
25023 | ||
25024 | /* Convert to IEEE 128-bit floating point. */ | |
25025 | if (FLOAT128_IEEE_P (dest_mode)) | |
25026 | { | |
25027 | if (dest_mode == KFmode) | |
25028 | kf_or_tf = 0; | |
25029 | else if (dest_mode == TFmode) | |
25030 | kf_or_tf = 1; | |
25031 | else | |
25032 | gcc_unreachable (); | |
25033 | ||
25034 | switch (src_mode) | |
25035 | { | |
4e10a5a7 | 25036 | case E_DFmode: |
83349046 SB |
25037 | cvt = sext_optab; |
25038 | hw_convert = hw_conversions[kf_or_tf].from_df; | |
25039 | break; | |
25040 | ||
4e10a5a7 | 25041 | case E_SFmode: |
83349046 SB |
25042 | cvt = sext_optab; |
25043 | hw_convert = hw_conversions[kf_or_tf].from_sf; | |
25044 | break; | |
25045 | ||
4e10a5a7 RS |
25046 | case E_KFmode: |
25047 | case E_IFmode: | |
25048 | case E_TFmode: | |
83349046 SB |
25049 | if (FLOAT128_IBM_P (src_mode)) |
25050 | cvt = sext_optab; | |
25051 | else | |
25052 | do_move = true; | |
25053 | break; | |
25054 | ||
4e10a5a7 | 25055 | case E_SImode: |
83349046 SB |
25056 | if (unsigned_p) |
25057 | { | |
25058 | cvt = ufloat_optab; | |
25059 | hw_convert = hw_conversions[kf_or_tf].from_si_uns; | |
25060 | } | |
25061 | else | |
25062 | { | |
25063 | cvt = sfloat_optab; | |
25064 | hw_convert = hw_conversions[kf_or_tf].from_si_sign; | |
25065 | } | |
25066 | break; | |
25067 | ||
4e10a5a7 | 25068 | case E_DImode: |
83349046 SB |
25069 | if (unsigned_p) |
25070 | { | |
25071 | cvt = ufloat_optab; | |
25072 | hw_convert = hw_conversions[kf_or_tf].from_di_uns; | |
25073 | } | |
25074 | else | |
25075 | { | |
25076 | cvt = sfloat_optab; | |
25077 | hw_convert = hw_conversions[kf_or_tf].from_di_sign; | |
25078 | } | |
25079 | break; | |
25080 | ||
25081 | default: | |
25082 | gcc_unreachable (); | |
25083 | } | |
25084 | } | |
25085 | ||
25086 | /* Convert from IEEE 128-bit floating point. */ | |
25087 | else if (FLOAT128_IEEE_P (src_mode)) | |
25088 | { | |
25089 | if (src_mode == KFmode) | |
25090 | kf_or_tf = 0; | |
25091 | else if (src_mode == TFmode) | |
25092 | kf_or_tf = 1; | |
25093 | else | |
25094 | gcc_unreachable (); | |
25095 | ||
25096 | switch (dest_mode) | |
25097 | { | |
4e10a5a7 | 25098 | case E_DFmode: |
83349046 SB |
25099 | cvt = trunc_optab; |
25100 | hw_convert = hw_conversions[kf_or_tf].to_df; | |
25101 | break; | |
25102 | ||
4e10a5a7 | 25103 | case E_SFmode: |
83349046 SB |
25104 | cvt = trunc_optab; |
25105 | hw_convert = hw_conversions[kf_or_tf].to_sf; | |
25106 | break; | |
25107 | ||
4e10a5a7 RS |
25108 | case E_KFmode: |
25109 | case E_IFmode: | |
25110 | case E_TFmode: | |
83349046 SB |
25111 | if (FLOAT128_IBM_P (dest_mode)) |
25112 | cvt = trunc_optab; | |
25113 | else | |
25114 | do_move = true; | |
25115 | break; | |
25116 | ||
4e10a5a7 | 25117 | case E_SImode: |
83349046 SB |
25118 | if (unsigned_p) |
25119 | { | |
25120 | cvt = ufix_optab; | |
25121 | hw_convert = hw_conversions[kf_or_tf].to_si_uns; | |
25122 | } | |
25123 | else | |
25124 | { | |
25125 | cvt = sfix_optab; | |
25126 | hw_convert = hw_conversions[kf_or_tf].to_si_sign; | |
25127 | } | |
25128 | break; | |
25129 | ||
4e10a5a7 | 25130 | case E_DImode: |
83349046 SB |
25131 | if (unsigned_p) |
25132 | { | |
25133 | cvt = ufix_optab; | |
25134 | hw_convert = hw_conversions[kf_or_tf].to_di_uns; | |
25135 | } | |
25136 | else | |
25137 | { | |
25138 | cvt = sfix_optab; | |
25139 | hw_convert = hw_conversions[kf_or_tf].to_di_sign; | |
25140 | } | |
25141 | break; | |
25142 | ||
25143 | default: | |
25144 | gcc_unreachable (); | |
25145 | } | |
25146 | } | |
25147 | ||
25148 | /* Both IBM format. */ | |
25149 | else if (FLOAT128_IBM_P (dest_mode) && FLOAT128_IBM_P (src_mode)) | |
25150 | do_move = true; | |
25151 | ||
25152 | else | |
25153 | gcc_unreachable (); | |
25154 | ||
25155 | /* Handle conversion between TFmode/KFmode. */ | |
25156 | if (do_move) | |
25157 | emit_move_insn (dest, gen_lowpart (dest_mode, src)); | |
25158 | ||
25159 | /* Handle conversion if we have hardware support. */ | |
25160 | else if (TARGET_FLOAT128_HW && hw_convert) | |
25161 | emit_insn ((hw_convert) (dest, src)); | |
25162 | ||
25163 | /* Call an external function to do the conversion. */ | |
25164 | else if (cvt != unknown_optab) | |
25165 | { | |
25166 | libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode); | |
25167 | gcc_assert (libfunc != NULL_RTX); | |
25168 | ||
db69559b RS |
25169 | dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode, |
25170 | src, src_mode); | |
83349046 SB |
25171 | |
25172 | gcc_assert (dest2 != NULL_RTX); | |
25173 | if (!rtx_equal_p (dest, dest2)) | |
25174 | emit_move_insn (dest, dest2); | |
25175 | } | |
25176 | ||
25177 | else | |
25178 | gcc_unreachable (); | |
25179 | ||
25180 | return; | |
25181 | } | |
25182 | ||
25183 | \f | |
25184 | /* Emit the RTL for an sISEL pattern. */ | |
25185 | ||
25186 | void | |
25187 | rs6000_emit_sISEL (machine_mode mode ATTRIBUTE_UNUSED, rtx operands[]) | |
25188 | { | |
25189 | rs6000_emit_int_cmove (operands[0], operands[1], const1_rtx, const0_rtx); | |
25190 | } | |
25191 | ||
25192 | /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH | |
25193 | can be used as that dest register. Return the dest register. */ | |
25194 | ||
25195 | rtx | |
25196 | rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch) | |
25197 | { | |
25198 | if (op2 == const0_rtx) | |
25199 | return op1; | |
25200 | ||
25201 | if (GET_CODE (scratch) == SCRATCH) | |
25202 | scratch = gen_reg_rtx (mode); | |
25203 | ||
25204 | if (logical_operand (op2, mode)) | |
25205 | emit_insn (gen_rtx_SET (scratch, gen_rtx_XOR (mode, op1, op2))); | |
25206 | else | |
25207 | emit_insn (gen_rtx_SET (scratch, | |
25208 | gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2)))); | |
25209 | ||
25210 | return scratch; | |
25211 | } | |
25212 | ||
25213 | void | |
25214 | rs6000_emit_sCOND (machine_mode mode, rtx operands[]) | |
25215 | { | |
25216 | rtx condition_rtx; | |
25217 | machine_mode op_mode; | |
25218 | enum rtx_code cond_code; | |
25219 | rtx result = operands[0]; | |
25220 | ||
25221 | condition_rtx = rs6000_generate_compare (operands[1], mode); | |
25222 | cond_code = GET_CODE (condition_rtx); | |
25223 | ||
25224 | if (FLOAT_MODE_P (mode) | |
25225 | && !TARGET_FPRS && TARGET_HARD_FLOAT) | |
25226 | { | |
25227 | rtx t; | |
25228 | ||
25229 | PUT_MODE (condition_rtx, SImode); | |
25230 | t = XEXP (condition_rtx, 0); | |
25231 | ||
25232 | gcc_assert (cond_code == NE || cond_code == EQ); | |
25233 | ||
25234 | if (cond_code == NE) | |
25235 | emit_insn (gen_e500_flip_gt_bit (t, t)); | |
25236 | ||
25237 | emit_insn (gen_move_from_CR_gt_bit (result, t)); | |
25238 | return; | |
25239 | } | |
25240 | ||
25241 | if (cond_code == NE | |
25242 | || cond_code == GE || cond_code == LE | |
25243 | || cond_code == GEU || cond_code == LEU | |
25244 | || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE) | |
25245 | { | |
25246 | rtx not_result = gen_reg_rtx (CCEQmode); | |
25247 | rtx not_op, rev_cond_rtx; | |
25248 | machine_mode cc_mode; | |
25249 | ||
25250 | cc_mode = GET_MODE (XEXP (condition_rtx, 0)); | |
25251 | ||
25252 | rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code), | |
25253 | SImode, XEXP (condition_rtx, 0), const0_rtx); | |
25254 | not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx); | |
25255 | emit_insn (gen_rtx_SET (not_result, not_op)); | |
25256 | condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx); | |
25257 | } | |
25258 | ||
25259 | op_mode = GET_MODE (XEXP (operands[1], 0)); | |
25260 | if (op_mode == VOIDmode) | |
25261 | op_mode = GET_MODE (XEXP (operands[1], 1)); | |
25262 | ||
25263 | if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode))) | |
25264 | { | |
25265 | PUT_MODE (condition_rtx, DImode); | |
25266 | convert_move (result, condition_rtx, 0); | |
25267 | } | |
25268 | else | |
25269 | { | |
25270 | PUT_MODE (condition_rtx, SImode); | |
25271 | emit_insn (gen_rtx_SET (result, condition_rtx)); | |
25272 | } | |
25273 | } | |
25274 | ||
25275 | /* Emit a branch of kind CODE to location LOC. */ | |
25276 | ||
25277 | void | |
25278 | rs6000_emit_cbranch (machine_mode mode, rtx operands[]) | |
25279 | { | |
25280 | rtx condition_rtx, loc_ref; | |
25281 | ||
25282 | condition_rtx = rs6000_generate_compare (operands[0], mode); | |
25283 | loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]); | |
25284 | emit_jump_insn (gen_rtx_SET (pc_rtx, | |
25285 | gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx, | |
25286 | loc_ref, pc_rtx))); | |
25287 | } | |
25288 | ||
25289 | /* Return the string to output a conditional branch to LABEL, which is | |
25290 | the operand template of the label, or NULL if the branch is really a | |
25291 | conditional return. | |
25292 | ||
25293 | OP is the conditional expression. XEXP (OP, 0) is assumed to be a | |
25294 | condition code register and its mode specifies what kind of | |
25295 | comparison we made. | |
25296 | ||
25297 | REVERSED is nonzero if we should reverse the sense of the comparison. | |
25298 | ||
25299 | INSN is the insn. */ | |
25300 | ||
25301 | char * | |
25302 | output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn) | |
25303 | { | |
25304 | static char string[64]; | |
25305 | enum rtx_code code = GET_CODE (op); | |
25306 | rtx cc_reg = XEXP (op, 0); | |
25307 | machine_mode mode = GET_MODE (cc_reg); | |
25308 | int cc_regno = REGNO (cc_reg) - CR0_REGNO; | |
25309 | int need_longbranch = label != NULL && get_attr_length (insn) == 8; | |
25310 | int really_reversed = reversed ^ need_longbranch; | |
25311 | char *s = string; | |
25312 | const char *ccode; | |
25313 | const char *pred; | |
25314 | rtx note; | |
25315 | ||
25316 | validate_condition_mode (code, mode); | |
25317 | ||
25318 | /* Work out which way this really branches. We could use | |
25319 | reverse_condition_maybe_unordered here always but this | |
25320 | makes the resulting assembler clearer. */ | |
25321 | if (really_reversed) | |
25322 | { | |
25323 | /* Reversal of FP compares takes care -- an ordered compare | |
25324 | becomes an unordered compare and vice versa. */ | |
25325 | if (mode == CCFPmode) | |
25326 | code = reverse_condition_maybe_unordered (code); | |
25327 | else | |
25328 | code = reverse_condition (code); | |
25329 | } | |
25330 | ||
25331 | if ((!TARGET_FPRS && TARGET_HARD_FLOAT) && mode == CCFPmode) | |
25332 | { | |
25333 | /* The efscmp/tst* instructions twiddle bit 2, which maps nicely | |
25334 | to the GT bit. */ | |
25335 | switch (code) | |
25336 | { | |
25337 | case EQ: | |
25338 | /* Opposite of GT. */ | |
25339 | code = GT; | |
25340 | break; | |
25341 | ||
25342 | case NE: | |
25343 | code = UNLE; | |
25344 | break; | |
25345 | ||
25346 | default: | |
25347 | gcc_unreachable (); | |
25348 | } | |
25349 | } | |
25350 | ||
25351 | switch (code) | |
25352 | { | |
25353 | /* Not all of these are actually distinct opcodes, but | |
25354 | we distinguish them for clarity of the resulting assembler. */ | |
25355 | case NE: case LTGT: | |
25356 | ccode = "ne"; break; | |
25357 | case EQ: case UNEQ: | |
25358 | ccode = "eq"; break; | |
25359 | case GE: case GEU: | |
25360 | ccode = "ge"; break; | |
25361 | case GT: case GTU: case UNGT: | |
25362 | ccode = "gt"; break; | |
25363 | case LE: case LEU: | |
25364 | ccode = "le"; break; | |
25365 | case LT: case LTU: case UNLT: | |
25366 | ccode = "lt"; break; | |
25367 | case UNORDERED: ccode = "un"; break; | |
25368 | case ORDERED: ccode = "nu"; break; | |
25369 | case UNGE: ccode = "nl"; break; | |
25370 | case UNLE: ccode = "ng"; break; | |
25371 | default: | |
25372 | gcc_unreachable (); | |
25373 | } | |
25374 | ||
25375 | /* Maybe we have a guess as to how likely the branch is. */ | |
25376 | pred = ""; | |
25377 | note = find_reg_note (insn, REG_BR_PROB, NULL_RTX); | |
25378 | if (note != NULL_RTX) | |
25379 | { | |
25380 | /* PROB is the difference from 50%. */ | |
5fa396ad JH |
25381 | int prob = profile_probability::from_reg_br_prob_note (XINT (note, 0)) |
25382 | .to_reg_br_prob_base () - REG_BR_PROB_BASE / 2; | |
83349046 SB |
25383 | |
25384 | /* Only hint for highly probable/improbable branches on newer cpus when | |
25385 | we have real profile data, as static prediction overrides processor | |
25386 | dynamic prediction. For older cpus we may as well always hint, but | |
25387 | assume not taken for branches that are very close to 50% as a | |
25388 | mispredicted taken branch is more expensive than a | |
25389 | mispredicted not-taken branch. */ | |
25390 | if (rs6000_always_hint | |
25391 | || (abs (prob) > REG_BR_PROB_BASE / 100 * 48 | |
25392 | && (profile_status_for_fn (cfun) != PROFILE_GUESSED) | |
25393 | && br_prob_note_reliable_p (note))) | |
25394 | { | |
25395 | if (abs (prob) > REG_BR_PROB_BASE / 20 | |
25396 | && ((prob > 0) ^ need_longbranch)) | |
25397 | pred = "+"; | |
25398 | else | |
25399 | pred = "-"; | |
25400 | } | |
25401 | } | |
25402 | ||
25403 | if (label == NULL) | |
25404 | s += sprintf (s, "b%slr%s ", ccode, pred); | |
25405 | else | |
25406 | s += sprintf (s, "b%s%s ", ccode, pred); | |
25407 | ||
25408 | /* We need to escape any '%' characters in the reg_names string. | |
25409 | Assume they'd only be the first character.... */ | |
25410 | if (reg_names[cc_regno + CR0_REGNO][0] == '%') | |
25411 | *s++ = '%'; | |
25412 | s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]); | |
25413 | ||
25414 | if (label != NULL) | |
25415 | { | |
25416 | /* If the branch distance was too far, we may have to use an | |
25417 | unconditional branch to go the distance. */ | |
25418 | if (need_longbranch) | |
25419 | s += sprintf (s, ",$+8\n\tb %s", label); | |
25420 | else | |
25421 | s += sprintf (s, ",%s", label); | |
25422 | } | |
25423 | ||
25424 | return string; | |
25425 | } | |
25426 | ||
25427 | /* Return the string to flip the GT bit on a CR. */ | |
25428 | char * | |
25429 | output_e500_flip_gt_bit (rtx dst, rtx src) | |
25430 | { | |
25431 | static char string[64]; | |
25432 | int a, b; | |
25433 | ||
25434 | gcc_assert (GET_CODE (dst) == REG && CR_REGNO_P (REGNO (dst)) | |
25435 | && GET_CODE (src) == REG && CR_REGNO_P (REGNO (src))); | |
25436 | ||
25437 | /* GT bit. */ | |
25438 | a = 4 * (REGNO (dst) - CR0_REGNO) + 1; | |
25439 | b = 4 * (REGNO (src) - CR0_REGNO) + 1; | |
25440 | ||
25441 | sprintf (string, "crnot %d,%d", a, b); | |
25442 | return string; | |
25443 | } | |
25444 | ||
25445 | /* Return insn for VSX or Altivec comparisons. */ | |
25446 | ||
25447 | static rtx | |
25448 | rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1) | |
25449 | { | |
25450 | rtx mask; | |
25451 | machine_mode mode = GET_MODE (op0); | |
25452 | ||
25453 | switch (code) | |
25454 | { | |
25455 | default: | |
25456 | break; | |
25457 | ||
25458 | case GE: | |
25459 | if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT) | |
25460 | return NULL_RTX; | |
25461 | /* FALLTHRU */ | |
25462 | ||
25463 | case EQ: | |
25464 | case GT: | |
25465 | case GTU: | |
25466 | case ORDERED: | |
25467 | case UNORDERED: | |
25468 | case UNEQ: | |
25469 | case LTGT: | |
25470 | mask = gen_reg_rtx (mode); | |
25471 | emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (code, mode, op0, op1))); | |
25472 | return mask; | |
25473 | } | |
25474 | ||
25475 | return NULL_RTX; | |
25476 | } | |
25477 | ||
25478 | /* Emit vector compare for operands OP0 and OP1 using code RCODE. | |
25479 | DMODE is expected destination mode. This is a recursive function. */ | |
25480 | ||
25481 | static rtx | |
25482 | rs6000_emit_vector_compare (enum rtx_code rcode, | |
25483 | rtx op0, rtx op1, | |
25484 | machine_mode dmode) | |
25485 | { | |
25486 | rtx mask; | |
25487 | bool swap_operands = false; | |
25488 | bool try_again = false; | |
25489 | ||
25490 | gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode)); | |
25491 | gcc_assert (GET_MODE (op0) == GET_MODE (op1)); | |
25492 | ||
25493 | /* See if the comparison works as is. */ | |
25494 | mask = rs6000_emit_vector_compare_inner (rcode, op0, op1); | |
25495 | if (mask) | |
25496 | return mask; | |
25497 | ||
25498 | switch (rcode) | |
25499 | { | |
25500 | case LT: | |
25501 | rcode = GT; | |
25502 | swap_operands = true; | |
25503 | try_again = true; | |
25504 | break; | |
25505 | case LTU: | |
25506 | rcode = GTU; | |
25507 | swap_operands = true; | |
25508 | try_again = true; | |
25509 | break; | |
25510 | case NE: | |
25511 | case UNLE: | |
25512 | case UNLT: | |
25513 | case UNGE: | |
25514 | case UNGT: | |
25515 | /* Invert condition and try again. | |
25516 | e.g., A != B becomes ~(A==B). */ | |
25517 | { | |
25518 | enum rtx_code rev_code; | |
25519 | enum insn_code nor_code; | |
25520 | rtx mask2; | |
25521 | ||
25522 | rev_code = reverse_condition_maybe_unordered (rcode); | |
25523 | if (rev_code == UNKNOWN) | |
25524 | return NULL_RTX; | |
25525 | ||
25526 | nor_code = optab_handler (one_cmpl_optab, dmode); | |
25527 | if (nor_code == CODE_FOR_nothing) | |
25528 | return NULL_RTX; | |
25529 | ||
25530 | mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode); | |
25531 | if (!mask2) | |
25532 | return NULL_RTX; | |
25533 | ||
25534 | mask = gen_reg_rtx (dmode); | |
25535 | emit_insn (GEN_FCN (nor_code) (mask, mask2)); | |
25536 | return mask; | |
25537 | } | |
25538 | break; | |
25539 | case GE: | |
25540 | case GEU: | |
25541 | case LE: | |
25542 | case LEU: | |
25543 | /* Try GT/GTU/LT/LTU OR EQ */ | |
25544 | { | |
25545 | rtx c_rtx, eq_rtx; | |
25546 | enum insn_code ior_code; | |
25547 | enum rtx_code new_code; | |
25548 | ||
25549 | switch (rcode) | |
25550 | { | |
25551 | case GE: | |
25552 | new_code = GT; | |
25553 | break; | |
25554 | ||
25555 | case GEU: | |
25556 | new_code = GTU; | |
25557 | break; | |
25558 | ||
25559 | case LE: | |
25560 | new_code = LT; | |
25561 | break; | |
25562 | ||
25563 | case LEU: | |
25564 | new_code = LTU; | |
25565 | break; | |
25566 | ||
25567 | default: | |
25568 | gcc_unreachable (); | |
25569 | } | |
25570 | ||
25571 | ior_code = optab_handler (ior_optab, dmode); | |
25572 | if (ior_code == CODE_FOR_nothing) | |
25573 | return NULL_RTX; | |
25574 | ||
25575 | c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode); | |
25576 | if (!c_rtx) | |
25577 | return NULL_RTX; | |
25578 | ||
25579 | eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode); | |
25580 | if (!eq_rtx) | |
25581 | return NULL_RTX; | |
25582 | ||
25583 | mask = gen_reg_rtx (dmode); | |
25584 | emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx)); | |
25585 | return mask; | |
25586 | } | |
25587 | break; | |
25588 | default: | |
25589 | return NULL_RTX; | |
25590 | } | |
25591 | ||
25592 | if (try_again) | |
25593 | { | |
25594 | if (swap_operands) | |
25595 | std::swap (op0, op1); | |
25596 | ||
25597 | mask = rs6000_emit_vector_compare_inner (rcode, op0, op1); | |
25598 | if (mask) | |
25599 | return mask; | |
25600 | } | |
25601 | ||
25602 | /* You only get two chances. */ | |
25603 | return NULL_RTX; | |
25604 | } | |
25605 | ||
25606 | /* Emit vector conditional expression. DEST is destination. OP_TRUE and | |
25607 | OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two | |
25608 | operands for the relation operation COND. */ | |
25609 | ||
25610 | int | |
25611 | rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false, | |
25612 | rtx cond, rtx cc_op0, rtx cc_op1) | |
25613 | { | |
25614 | machine_mode dest_mode = GET_MODE (dest); | |
25615 | machine_mode mask_mode = GET_MODE (cc_op0); | |
25616 | enum rtx_code rcode = GET_CODE (cond); | |
25617 | machine_mode cc_mode = CCmode; | |
25618 | rtx mask; | |
25619 | rtx cond2; | |
25620 | bool invert_move = false; | |
25621 | ||
25622 | if (VECTOR_UNIT_NONE_P (dest_mode)) | |
25623 | return 0; | |
25624 | ||
25625 | gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode) | |
25626 | && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode)); | |
25627 | ||
25628 | switch (rcode) | |
25629 | { | |
25630 | /* Swap operands if we can, and fall back to doing the operation as | |
25631 | specified, and doing a NOR to invert the test. */ | |
25632 | case NE: | |
25633 | case UNLE: | |
25634 | case UNLT: | |
25635 | case UNGE: | |
25636 | case UNGT: | |
25637 | /* Invert condition and try again. | |
25638 | e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */ | |
25639 | invert_move = true; | |
25640 | rcode = reverse_condition_maybe_unordered (rcode); | |
25641 | if (rcode == UNKNOWN) | |
25642 | return 0; | |
25643 | break; | |
25644 | ||
25645 | case GE: | |
25646 | case LE: | |
25647 | if (GET_MODE_CLASS (mask_mode) == MODE_VECTOR_INT) | |
25648 | { | |
25649 | /* Invert condition to avoid compound test. */ | |
25650 | invert_move = true; | |
25651 | rcode = reverse_condition (rcode); | |
25652 | } | |
25653 | break; | |
25654 | ||
25655 | case GTU: | |
25656 | case GEU: | |
25657 | case LTU: | |
25658 | case LEU: | |
25659 | /* Mark unsigned tests with CCUNSmode. */ | |
25660 | cc_mode = CCUNSmode; | |
25661 | ||
25662 | /* Invert condition to avoid compound test if necessary. */ | |
25663 | if (rcode == GEU || rcode == LEU) | |
25664 | { | |
25665 | invert_move = true; | |
25666 | rcode = reverse_condition (rcode); | |
25667 | } | |
25668 | break; | |
25669 | ||
25670 | default: | |
25671 | break; | |
25672 | } | |
25673 | ||
25674 | /* Get the vector mask for the given relational operations. */ | |
25675 | mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode); | |
25676 | ||
25677 | if (!mask) | |
25678 | return 0; | |
25679 | ||
25680 | if (invert_move) | |
25681 | std::swap (op_true, op_false); | |
25682 | ||
25683 | /* Optimize vec1 == vec2, to know the mask generates -1/0. */ | |
25684 | if (GET_MODE_CLASS (dest_mode) == MODE_VECTOR_INT | |
25685 | && (GET_CODE (op_true) == CONST_VECTOR | |
25686 | || GET_CODE (op_false) == CONST_VECTOR)) | |
25687 | { | |
25688 | rtx constant_0 = CONST0_RTX (dest_mode); | |
25689 | rtx constant_m1 = CONSTM1_RTX (dest_mode); | |
25690 | ||
25691 | if (op_true == constant_m1 && op_false == constant_0) | |
25692 | { | |
25693 | emit_move_insn (dest, mask); | |
25694 | return 1; | |
25695 | } | |
25696 | ||
25697 | else if (op_true == constant_0 && op_false == constant_m1) | |
25698 | { | |
25699 | emit_insn (gen_rtx_SET (dest, gen_rtx_NOT (dest_mode, mask))); | |
25700 | return 1; | |
25701 | } | |
25702 | ||
25703 | /* If we can't use the vector comparison directly, perhaps we can use | |
25704 | the mask for the true or false fields, instead of loading up a | |
25705 | constant. */ | |
25706 | if (op_true == constant_m1) | |
25707 | op_true = mask; | |
25708 | ||
25709 | if (op_false == constant_0) | |
25710 | op_false = mask; | |
25711 | } | |
25712 | ||
25713 | if (!REG_P (op_true) && !SUBREG_P (op_true)) | |
25714 | op_true = force_reg (dest_mode, op_true); | |
25715 | ||
25716 | if (!REG_P (op_false) && !SUBREG_P (op_false)) | |
25717 | op_false = force_reg (dest_mode, op_false); | |
25718 | ||
25719 | cond2 = gen_rtx_fmt_ee (NE, cc_mode, gen_lowpart (dest_mode, mask), | |
25720 | CONST0_RTX (dest_mode)); | |
25721 | emit_insn (gen_rtx_SET (dest, | |
25722 | gen_rtx_IF_THEN_ELSE (dest_mode, | |
25723 | cond2, | |
25724 | op_true, | |
25725 | op_false))); | |
25726 | return 1; | |
25727 | } | |
25728 | ||
25729 | /* ISA 3.0 (power9) minmax subcase to emit a XSMAXCDP or XSMINCDP instruction | |
25730 | for SF/DF scalars. Move TRUE_COND to DEST if OP of the operands of the last | |
25731 | comparison is nonzero/true, FALSE_COND if it is zero/false. Return 0 if the | |
25732 | hardware has no such operation. */ | |
25733 | ||
25734 | static int | |
25735 | rs6000_emit_p9_fp_minmax (rtx dest, rtx op, rtx true_cond, rtx false_cond) | |
25736 | { | |
25737 | enum rtx_code code = GET_CODE (op); | |
25738 | rtx op0 = XEXP (op, 0); | |
25739 | rtx op1 = XEXP (op, 1); | |
25740 | machine_mode compare_mode = GET_MODE (op0); | |
25741 | machine_mode result_mode = GET_MODE (dest); | |
25742 | bool max_p = false; | |
25743 | ||
25744 | if (result_mode != compare_mode) | |
25745 | return 0; | |
25746 | ||
25747 | if (code == GE || code == GT) | |
25748 | max_p = true; | |
25749 | else if (code == LE || code == LT) | |
25750 | max_p = false; | |
25751 | else | |
25752 | return 0; | |
25753 | ||
25754 | if (rtx_equal_p (op0, true_cond) && rtx_equal_p (op1, false_cond)) | |
25755 | ; | |
25756 | ||
25757 | else if (rtx_equal_p (op1, true_cond) && rtx_equal_p (op0, false_cond)) | |
25758 | max_p = !max_p; | |
25759 | ||
25760 | else | |
25761 | return 0; | |
25762 | ||
25763 | rs6000_emit_minmax (dest, max_p ? SMAX : SMIN, op0, op1); | |
25764 | return 1; | |
25765 | } | |
25766 | ||
25767 | /* ISA 3.0 (power9) conditional move subcase to emit XSCMP{EQ,GE,GT,NE}DP and | |
25768 | XXSEL instructions for SF/DF scalars. Move TRUE_COND to DEST if OP of the | |
25769 | operands of the last comparison is nonzero/true, FALSE_COND if it is | |
25770 | zero/false. Return 0 if the hardware has no such operation. */ | |
25771 | ||
25772 | static int | |
25773 | rs6000_emit_p9_fp_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond) | |
25774 | { | |
25775 | enum rtx_code code = GET_CODE (op); | |
25776 | rtx op0 = XEXP (op, 0); | |
25777 | rtx op1 = XEXP (op, 1); | |
25778 | machine_mode result_mode = GET_MODE (dest); | |
25779 | rtx compare_rtx; | |
25780 | rtx cmove_rtx; | |
25781 | rtx clobber_rtx; | |
25782 | ||
25783 | if (!can_create_pseudo_p ()) | |
25784 | return 0; | |
25785 | ||
25786 | switch (code) | |
25787 | { | |
25788 | case EQ: | |
25789 | case GE: | |
25790 | case GT: | |
25791 | break; | |
25792 | ||
25793 | case NE: | |
25794 | case LT: | |
25795 | case LE: | |
25796 | code = swap_condition (code); | |
25797 | std::swap (op0, op1); | |
25798 | break; | |
25799 | ||
25800 | default: | |
25801 | return 0; | |
25802 | } | |
25803 | ||
25804 | /* Generate: [(parallel [(set (dest) | |
25805 | (if_then_else (op (cmp1) (cmp2)) | |
25806 | (true) | |
25807 | (false))) | |
25808 | (clobber (scratch))])]. */ | |
25809 | ||
25810 | compare_rtx = gen_rtx_fmt_ee (code, CCFPmode, op0, op1); | |
25811 | cmove_rtx = gen_rtx_SET (dest, | |
25812 | gen_rtx_IF_THEN_ELSE (result_mode, | |
25813 | compare_rtx, | |
25814 | true_cond, | |
25815 | false_cond)); | |
25816 | ||
25817 | clobber_rtx = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode)); | |
25818 | emit_insn (gen_rtx_PARALLEL (VOIDmode, | |
25819 | gen_rtvec (2, cmove_rtx, clobber_rtx))); | |
25820 | ||
25821 | return 1; | |
25822 | } | |
25823 | ||
25824 | /* Emit a conditional move: move TRUE_COND to DEST if OP of the | |
25825 | operands of the last comparison is nonzero/true, FALSE_COND if it | |
25826 | is zero/false. Return 0 if the hardware has no such operation. */ | |
25827 | ||
25828 | int | |
25829 | rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond) | |
25830 | { | |
25831 | enum rtx_code code = GET_CODE (op); | |
25832 | rtx op0 = XEXP (op, 0); | |
25833 | rtx op1 = XEXP (op, 1); | |
25834 | machine_mode compare_mode = GET_MODE (op0); | |
25835 | machine_mode result_mode = GET_MODE (dest); | |
25836 | rtx temp; | |
25837 | bool is_against_zero; | |
25838 | ||
25839 | /* These modes should always match. */ | |
25840 | if (GET_MODE (op1) != compare_mode | |
25841 | /* In the isel case however, we can use a compare immediate, so | |
25842 | op1 may be a small constant. */ | |
25843 | && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode))) | |
25844 | return 0; | |
25845 | if (GET_MODE (true_cond) != result_mode) | |
25846 | return 0; | |
25847 | if (GET_MODE (false_cond) != result_mode) | |
25848 | return 0; | |
25849 | ||
25850 | /* See if we can use the ISA 3.0 (power9) min/max/compare functions. */ | |
25851 | if (TARGET_P9_MINMAX | |
25852 | && (compare_mode == SFmode || compare_mode == DFmode) | |
25853 | && (result_mode == SFmode || result_mode == DFmode)) | |
25854 | { | |
25855 | if (rs6000_emit_p9_fp_minmax (dest, op, true_cond, false_cond)) | |
25856 | return 1; | |
25857 | ||
25858 | if (rs6000_emit_p9_fp_cmove (dest, op, true_cond, false_cond)) | |
25859 | return 1; | |
25860 | } | |
25861 | ||
25862 | /* Don't allow using floating point comparisons for integer results for | |
25863 | now. */ | |
25864 | if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode)) | |
25865 | return 0; | |
25866 | ||
25867 | /* First, work out if the hardware can do this at all, or | |
25868 | if it's too slow.... */ | |
25869 | if (!FLOAT_MODE_P (compare_mode)) | |
25870 | { | |
25871 | if (TARGET_ISEL) | |
25872 | return rs6000_emit_int_cmove (dest, op, true_cond, false_cond); | |
25873 | return 0; | |
25874 | } | |
25875 | else if (TARGET_HARD_FLOAT && !TARGET_FPRS | |
25876 | && SCALAR_FLOAT_MODE_P (compare_mode)) | |
25877 | return 0; | |
25878 | ||
25879 | is_against_zero = op1 == CONST0_RTX (compare_mode); | |
25880 | ||
25881 | /* A floating-point subtract might overflow, underflow, or produce | |
25882 | an inexact result, thus changing the floating-point flags, so it | |
25883 | can't be generated if we care about that. It's safe if one side | |
25884 | of the construct is zero, since then no subtract will be | |
25885 | generated. */ | |
25886 | if (SCALAR_FLOAT_MODE_P (compare_mode) | |
25887 | && flag_trapping_math && ! is_against_zero) | |
25888 | return 0; | |
25889 | ||
25890 | /* Eliminate half of the comparisons by switching operands, this | |
25891 | makes the remaining code simpler. */ | |
25892 | if (code == UNLT || code == UNGT || code == UNORDERED || code == NE | |
25893 | || code == LTGT || code == LT || code == UNLE) | |
25894 | { | |
25895 | code = reverse_condition_maybe_unordered (code); | |
25896 | temp = true_cond; | |
25897 | true_cond = false_cond; | |
25898 | false_cond = temp; | |
25899 | } | |
25900 | ||
25901 | /* UNEQ and LTGT take four instructions for a comparison with zero, | |
25902 | it'll probably be faster to use a branch here too. */ | |
25903 | if (code == UNEQ && HONOR_NANS (compare_mode)) | |
25904 | return 0; | |
25905 | ||
25906 | /* We're going to try to implement comparisons by performing | |
25907 | a subtract, then comparing against zero. Unfortunately, | |
25908 | Inf - Inf is NaN which is not zero, and so if we don't | |
25909 | know that the operand is finite and the comparison | |
25910 | would treat EQ different to UNORDERED, we can't do it. */ | |
25911 | if (HONOR_INFINITIES (compare_mode) | |
25912 | && code != GT && code != UNGE | |
25913 | && (GET_CODE (op1) != CONST_DOUBLE | |
25914 | || real_isinf (CONST_DOUBLE_REAL_VALUE (op1))) | |
25915 | /* Constructs of the form (a OP b ? a : b) are safe. */ | |
25916 | && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond)) | |
25917 | || (! rtx_equal_p (op0, true_cond) | |
25918 | && ! rtx_equal_p (op1, true_cond)))) | |
25919 | return 0; | |
25920 | ||
25921 | /* At this point we know we can use fsel. */ | |
25922 | ||
25923 | /* Reduce the comparison to a comparison against zero. */ | |
25924 | if (! is_against_zero) | |
25925 | { | |
25926 | temp = gen_reg_rtx (compare_mode); | |
25927 | emit_insn (gen_rtx_SET (temp, gen_rtx_MINUS (compare_mode, op0, op1))); | |
25928 | op0 = temp; | |
25929 | op1 = CONST0_RTX (compare_mode); | |
25930 | } | |
25931 | ||
25932 | /* If we don't care about NaNs we can reduce some of the comparisons | |
25933 | down to faster ones. */ | |
25934 | if (! HONOR_NANS (compare_mode)) | |
25935 | switch (code) | |
25936 | { | |
25937 | case GT: | |
25938 | code = LE; | |
25939 | temp = true_cond; | |
25940 | true_cond = false_cond; | |
25941 | false_cond = temp; | |
25942 | break; | |
25943 | case UNGE: | |
25944 | code = GE; | |
25945 | break; | |
25946 | case UNEQ: | |
25947 | code = EQ; | |
25948 | break; | |
25949 | default: | |
25950 | break; | |
25951 | } | |
25952 | ||
25953 | /* Now, reduce everything down to a GE. */ | |
25954 | switch (code) | |
25955 | { | |
25956 | case GE: | |
25957 | break; | |
25958 | ||
25959 | case LE: | |
25960 | temp = gen_reg_rtx (compare_mode); | |
25961 | emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0))); | |
25962 | op0 = temp; | |
25963 | break; | |
25964 | ||
25965 | case ORDERED: | |
25966 | temp = gen_reg_rtx (compare_mode); | |
25967 | emit_insn (gen_rtx_SET (temp, gen_rtx_ABS (compare_mode, op0))); | |
25968 | op0 = temp; | |
25969 | break; | |
25970 | ||
25971 | case EQ: | |
25972 | temp = gen_reg_rtx (compare_mode); | |
25973 | emit_insn (gen_rtx_SET (temp, | |
25974 | gen_rtx_NEG (compare_mode, | |
25975 | gen_rtx_ABS (compare_mode, op0)))); | |
25976 | op0 = temp; | |
25977 | break; | |
25978 | ||
25979 | case UNGE: | |
25980 | /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */ | |
25981 | temp = gen_reg_rtx (result_mode); | |
25982 | emit_insn (gen_rtx_SET (temp, | |
25983 | gen_rtx_IF_THEN_ELSE (result_mode, | |
25984 | gen_rtx_GE (VOIDmode, | |
25985 | op0, op1), | |
25986 | true_cond, false_cond))); | |
25987 | false_cond = true_cond; | |
25988 | true_cond = temp; | |
25989 | ||
25990 | temp = gen_reg_rtx (compare_mode); | |
25991 | emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0))); | |
25992 | op0 = temp; | |
25993 | break; | |
25994 | ||
25995 | case GT: | |
25996 | /* a GT 0 <-> (a GE 0 && -a UNLT 0) */ | |
25997 | temp = gen_reg_rtx (result_mode); | |
25998 | emit_insn (gen_rtx_SET (temp, | |
25999 | gen_rtx_IF_THEN_ELSE (result_mode, | |
26000 | gen_rtx_GE (VOIDmode, | |
26001 | op0, op1), | |
26002 | true_cond, false_cond))); | |
26003 | true_cond = false_cond; | |
26004 | false_cond = temp; | |
26005 | ||
26006 | temp = gen_reg_rtx (compare_mode); | |
26007 | emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0))); | |
26008 | op0 = temp; | |
26009 | break; | |
26010 | ||
26011 | default: | |
26012 | gcc_unreachable (); | |
26013 | } | |
26014 | ||
26015 | emit_insn (gen_rtx_SET (dest, | |
26016 | gen_rtx_IF_THEN_ELSE (result_mode, | |
26017 | gen_rtx_GE (VOIDmode, | |
26018 | op0, op1), | |
26019 | true_cond, false_cond))); | |
26020 | return 1; | |
26021 | } | |
26022 | ||
26023 | /* Same as above, but for ints (isel). */ | |
26024 | ||
26025 | static int | |
26026 | rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond) | |
26027 | { | |
26028 | rtx condition_rtx, cr; | |
26029 | machine_mode mode = GET_MODE (dest); | |
26030 | enum rtx_code cond_code; | |
26031 | rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx); | |
26032 | bool signedp; | |
26033 | ||
26034 | if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode)) | |
26035 | return 0; | |
26036 | ||
26037 | /* We still have to do the compare, because isel doesn't do a | |
26038 | compare, it just looks at the CRx bits set by a previous compare | |
26039 | instruction. */ | |
26040 | condition_rtx = rs6000_generate_compare (op, mode); | |
26041 | cond_code = GET_CODE (condition_rtx); | |
26042 | cr = XEXP (condition_rtx, 0); | |
26043 | signedp = GET_MODE (cr) == CCmode; | |
26044 | ||
26045 | isel_func = (mode == SImode | |
26046 | ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si) | |
26047 | : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di)); | |
26048 | ||
26049 | switch (cond_code) | |
26050 | { | |
26051 | case LT: case GT: case LTU: case GTU: case EQ: | |
26052 | /* isel handles these directly. */ | |
26053 | break; | |
26054 | ||
26055 | default: | |
26056 | /* We need to swap the sense of the comparison. */ | |
26057 | { | |
26058 | std::swap (false_cond, true_cond); | |
26059 | PUT_CODE (condition_rtx, reverse_condition (cond_code)); | |
26060 | } | |
26061 | break; | |
26062 | } | |
26063 | ||
26064 | false_cond = force_reg (mode, false_cond); | |
26065 | if (true_cond != const0_rtx) | |
26066 | true_cond = force_reg (mode, true_cond); | |
26067 | ||
26068 | emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr)); | |
26069 | ||
26070 | return 1; | |
26071 | } | |
26072 | ||
26073 | const char * | |
26074 | output_isel (rtx *operands) | |
26075 | { | |
26076 | enum rtx_code code; | |
26077 | ||
26078 | code = GET_CODE (operands[1]); | |
26079 | ||
26080 | if (code == GE || code == GEU || code == LE || code == LEU || code == NE) | |
26081 | { | |
26082 | gcc_assert (GET_CODE (operands[2]) == REG | |
26083 | && GET_CODE (operands[3]) == REG); | |
26084 | PUT_CODE (operands[1], reverse_condition (code)); | |
26085 | return "isel %0,%3,%2,%j1"; | |
26086 | } | |
26087 | ||
26088 | return "isel %0,%2,%3,%j1"; | |
26089 | } | |
26090 | ||
26091 | void | |
26092 | rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1) | |
26093 | { | |
26094 | machine_mode mode = GET_MODE (op0); | |
26095 | enum rtx_code c; | |
26096 | rtx target; | |
26097 | ||
26098 | /* VSX/altivec have direct min/max insns. */ | |
26099 | if ((code == SMAX || code == SMIN) | |
26100 | && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode) | |
26101 | || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode)))) | |
26102 | { | |
26103 | emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1))); | |
26104 | return; | |
26105 | } | |
26106 | ||
26107 | if (code == SMAX || code == SMIN) | |
26108 | c = GE; | |
26109 | else | |
26110 | c = GEU; | |
26111 | ||
26112 | if (code == SMAX || code == UMAX) | |
26113 | target = emit_conditional_move (dest, c, op0, op1, mode, | |
26114 | op0, op1, mode, 0); | |
26115 | else | |
26116 | target = emit_conditional_move (dest, c, op0, op1, mode, | |
26117 | op1, op0, mode, 0); | |
26118 | gcc_assert (target); | |
26119 | if (target != dest) | |
26120 | emit_move_insn (dest, target); | |
26121 | } | |
26122 | ||
26123 | /* Split a signbit operation on 64-bit machines with direct move. Also allow | |
26124 | for the value to come from memory or if it is already loaded into a GPR. */ | |
26125 | ||
26126 | void | |
26127 | rs6000_split_signbit (rtx dest, rtx src) | |
26128 | { | |
26129 | machine_mode d_mode = GET_MODE (dest); | |
26130 | machine_mode s_mode = GET_MODE (src); | |
26131 | rtx dest_di = (d_mode == DImode) ? dest : gen_lowpart (DImode, dest); | |
26132 | rtx shift_reg = dest_di; | |
26133 | ||
26134 | gcc_assert (FLOAT128_IEEE_P (s_mode) && TARGET_POWERPC64); | |
26135 | ||
26136 | if (MEM_P (src)) | |
26137 | { | |
26138 | rtx mem = (WORDS_BIG_ENDIAN | |
26139 | ? adjust_address (src, DImode, 0) | |
26140 | : adjust_address (src, DImode, 8)); | |
26141 | emit_insn (gen_rtx_SET (dest_di, mem)); | |
26142 | } | |
26143 | ||
26144 | else | |
26145 | { | |
26146 | unsigned int r = reg_or_subregno (src); | |
26147 | ||
26148 | if (INT_REGNO_P (r)) | |
26149 | shift_reg = gen_rtx_REG (DImode, r + (BYTES_BIG_ENDIAN == 0)); | |
26150 | ||
26151 | else | |
26152 | { | |
26153 | /* Generate the special mfvsrd instruction to get it in a GPR. */ | |
26154 | gcc_assert (VSX_REGNO_P (r)); | |
26155 | if (s_mode == KFmode) | |
26156 | emit_insn (gen_signbitkf2_dm2 (dest_di, src)); | |
26157 | else | |
26158 | emit_insn (gen_signbittf2_dm2 (dest_di, src)); | |
26159 | } | |
26160 | } | |
26161 | ||
26162 | emit_insn (gen_lshrdi3 (dest_di, shift_reg, GEN_INT (63))); | |
26163 | return; | |
26164 | } | |
26165 | ||
26166 | /* A subroutine of the atomic operation splitters. Jump to LABEL if | |
26167 | COND is true. Mark the jump as unlikely to be taken. */ | |
26168 | ||
26169 | static void | |
26170 | emit_unlikely_jump (rtx cond, rtx label) | |
26171 | { | |
83349046 SB |
26172 | rtx x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx); |
26173 | rtx_insn *insn = emit_jump_insn (gen_rtx_SET (pc_rtx, x)); | |
5fa396ad | 26174 | add_reg_br_prob_note (insn, profile_probability::very_unlikely ()); |
83349046 SB |
26175 | } |
26176 | ||
26177 | /* A subroutine of the atomic operation splitters. Emit a load-locked | |
26178 | instruction in MODE. For QI/HImode, possibly use a pattern than includes | |
26179 | the zero_extend operation. */ | |
26180 | ||
26181 | static void | |
26182 | emit_load_locked (machine_mode mode, rtx reg, rtx mem) | |
26183 | { | |
26184 | rtx (*fn) (rtx, rtx) = NULL; | |
26185 | ||
26186 | switch (mode) | |
26187 | { | |
4e10a5a7 | 26188 | case E_QImode: |
83349046 SB |
26189 | fn = gen_load_lockedqi; |
26190 | break; | |
4e10a5a7 | 26191 | case E_HImode: |
83349046 SB |
26192 | fn = gen_load_lockedhi; |
26193 | break; | |
4e10a5a7 | 26194 | case E_SImode: |
83349046 SB |
26195 | if (GET_MODE (mem) == QImode) |
26196 | fn = gen_load_lockedqi_si; | |
26197 | else if (GET_MODE (mem) == HImode) | |
26198 | fn = gen_load_lockedhi_si; | |
26199 | else | |
26200 | fn = gen_load_lockedsi; | |
26201 | break; | |
4e10a5a7 | 26202 | case E_DImode: |
83349046 SB |
26203 | fn = gen_load_lockeddi; |
26204 | break; | |
4e10a5a7 | 26205 | case E_TImode: |
83349046 SB |
26206 | fn = gen_load_lockedti; |
26207 | break; | |
26208 | default: | |
26209 | gcc_unreachable (); | |
26210 | } | |
26211 | emit_insn (fn (reg, mem)); | |
26212 | } | |
26213 | ||
26214 | /* A subroutine of the atomic operation splitters. Emit a store-conditional | |
26215 | instruction in MODE. */ | |
26216 | ||
26217 | static void | |
26218 | emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val) | |
26219 | { | |
26220 | rtx (*fn) (rtx, rtx, rtx) = NULL; | |
26221 | ||
26222 | switch (mode) | |
26223 | { | |
4e10a5a7 | 26224 | case E_QImode: |
83349046 SB |
26225 | fn = gen_store_conditionalqi; |
26226 | break; | |
4e10a5a7 | 26227 | case E_HImode: |
83349046 SB |
26228 | fn = gen_store_conditionalhi; |
26229 | break; | |
4e10a5a7 | 26230 | case E_SImode: |
83349046 SB |
26231 | fn = gen_store_conditionalsi; |
26232 | break; | |
4e10a5a7 | 26233 | case E_DImode: |
83349046 SB |
26234 | fn = gen_store_conditionaldi; |
26235 | break; | |
4e10a5a7 | 26236 | case E_TImode: |
83349046 SB |
26237 | fn = gen_store_conditionalti; |
26238 | break; | |
26239 | default: | |
26240 | gcc_unreachable (); | |
26241 | } | |
26242 | ||
26243 | /* Emit sync before stwcx. to address PPC405 Erratum. */ | |
26244 | if (PPC405_ERRATUM77) | |
26245 | emit_insn (gen_hwsync ()); | |
26246 | ||
26247 | emit_insn (fn (res, mem, val)); | |
26248 | } | |
26249 | ||
26250 | /* Expand barriers before and after a load_locked/store_cond sequence. */ | |
26251 | ||
26252 | static rtx | |
26253 | rs6000_pre_atomic_barrier (rtx mem, enum memmodel model) | |
26254 | { | |
26255 | rtx addr = XEXP (mem, 0); | |
26256 | int strict_p = (reload_in_progress || reload_completed); | |
26257 | ||
26258 | if (!legitimate_indirect_address_p (addr, strict_p) | |
26259 | && !legitimate_indexed_address_p (addr, strict_p)) | |
26260 | { | |
26261 | addr = force_reg (Pmode, addr); | |
26262 | mem = replace_equiv_address_nv (mem, addr); | |
26263 | } | |
26264 | ||
26265 | switch (model) | |
26266 | { | |
26267 | case MEMMODEL_RELAXED: | |
26268 | case MEMMODEL_CONSUME: | |
26269 | case MEMMODEL_ACQUIRE: | |
26270 | break; | |
26271 | case MEMMODEL_RELEASE: | |
26272 | case MEMMODEL_ACQ_REL: | |
26273 | emit_insn (gen_lwsync ()); | |
26274 | break; | |
26275 | case MEMMODEL_SEQ_CST: | |
26276 | emit_insn (gen_hwsync ()); | |
26277 | break; | |
26278 | default: | |
26279 | gcc_unreachable (); | |
26280 | } | |
26281 | return mem; | |
26282 | } | |
26283 | ||
26284 | static void | |
26285 | rs6000_post_atomic_barrier (enum memmodel model) | |
26286 | { | |
26287 | switch (model) | |
26288 | { | |
26289 | case MEMMODEL_RELAXED: | |
26290 | case MEMMODEL_CONSUME: | |
26291 | case MEMMODEL_RELEASE: | |
26292 | break; | |
26293 | case MEMMODEL_ACQUIRE: | |
26294 | case MEMMODEL_ACQ_REL: | |
26295 | case MEMMODEL_SEQ_CST: | |
26296 | emit_insn (gen_isync ()); | |
26297 | break; | |
26298 | default: | |
26299 | gcc_unreachable (); | |
26300 | } | |
26301 | } | |
26302 | ||
26303 | /* A subroutine of the various atomic expanders. For sub-word operations, | |
26304 | we must adjust things to operate on SImode. Given the original MEM, | |
26305 | return a new aligned memory. Also build and return the quantities by | |
26306 | which to shift and mask. */ | |
26307 | ||
26308 | static rtx | |
26309 | rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask) | |
26310 | { | |
26311 | rtx addr, align, shift, mask, mem; | |
26312 | HOST_WIDE_INT shift_mask; | |
26313 | machine_mode mode = GET_MODE (orig_mem); | |
26314 | ||
26315 | /* For smaller modes, we have to implement this via SImode. */ | |
26316 | shift_mask = (mode == QImode ? 0x18 : 0x10); | |
26317 | ||
26318 | addr = XEXP (orig_mem, 0); | |
26319 | addr = force_reg (GET_MODE (addr), addr); | |
26320 | ||
26321 | /* Aligned memory containing subword. Generate a new memory. We | |
26322 | do not want any of the existing MEM_ATTR data, as we're now | |
26323 | accessing memory outside the original object. */ | |
26324 | align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4), | |
26325 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
26326 | mem = gen_rtx_MEM (SImode, align); | |
26327 | MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem); | |
26328 | if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER) | |
26329 | set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER); | |
26330 | ||
26331 | /* Shift amount for subword relative to aligned word. */ | |
26332 | shift = gen_reg_rtx (SImode); | |
26333 | addr = gen_lowpart (SImode, addr); | |
26334 | rtx tmp = gen_reg_rtx (SImode); | |
26335 | emit_insn (gen_ashlsi3 (tmp, addr, GEN_INT (3))); | |
26336 | emit_insn (gen_andsi3 (shift, tmp, GEN_INT (shift_mask))); | |
26337 | if (BYTES_BIG_ENDIAN) | |
26338 | shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask), | |
26339 | shift, 1, OPTAB_LIB_WIDEN); | |
26340 | *pshift = shift; | |
26341 | ||
26342 | /* Mask for insertion. */ | |
26343 | mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)), | |
26344 | shift, NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
26345 | *pmask = mask; | |
26346 | ||
26347 | return mem; | |
26348 | } | |
26349 | ||
26350 | /* A subroutine of the various atomic expanders. For sub-word operands, | |
26351 | combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */ | |
26352 | ||
26353 | static rtx | |
26354 | rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask) | |
26355 | { | |
26356 | rtx x; | |
26357 | ||
26358 | x = gen_reg_rtx (SImode); | |
26359 | emit_insn (gen_rtx_SET (x, gen_rtx_AND (SImode, | |
26360 | gen_rtx_NOT (SImode, mask), | |
26361 | oldval))); | |
26362 | ||
26363 | x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN); | |
26364 | ||
26365 | return x; | |
26366 | } | |
26367 | ||
26368 | /* A subroutine of the various atomic expanders. For sub-word operands, | |
26369 | extract WIDE to NARROW via SHIFT. */ | |
26370 | ||
26371 | static void | |
26372 | rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift) | |
26373 | { | |
26374 | wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift, | |
26375 | wide, 1, OPTAB_LIB_WIDEN); | |
26376 | emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide)); | |
26377 | } | |
26378 | ||
26379 | /* Expand an atomic compare and swap operation. */ | |
26380 | ||
26381 | void | |
26382 | rs6000_expand_atomic_compare_and_swap (rtx operands[]) | |
26383 | { | |
26384 | rtx boolval, retval, mem, oldval, newval, cond; | |
26385 | rtx label1, label2, x, mask, shift; | |
26386 | machine_mode mode, orig_mode; | |
26387 | enum memmodel mod_s, mod_f; | |
26388 | bool is_weak; | |
26389 | ||
26390 | boolval = operands[0]; | |
26391 | retval = operands[1]; | |
26392 | mem = operands[2]; | |
26393 | oldval = operands[3]; | |
26394 | newval = operands[4]; | |
26395 | is_weak = (INTVAL (operands[5]) != 0); | |
26396 | mod_s = memmodel_base (INTVAL (operands[6])); | |
26397 | mod_f = memmodel_base (INTVAL (operands[7])); | |
26398 | orig_mode = mode = GET_MODE (mem); | |
26399 | ||
26400 | mask = shift = NULL_RTX; | |
26401 | if (mode == QImode || mode == HImode) | |
26402 | { | |
26403 | /* Before power8, we didn't have access to lbarx/lharx, so generate a | |
26404 | lwarx and shift/mask operations. With power8, we need to do the | |
26405 | comparison in SImode, but the store is still done in QI/HImode. */ | |
26406 | oldval = convert_modes (SImode, mode, oldval, 1); | |
26407 | ||
26408 | if (!TARGET_SYNC_HI_QI) | |
26409 | { | |
26410 | mem = rs6000_adjust_atomic_subword (mem, &shift, &mask); | |
26411 | ||
26412 | /* Shift and mask OLDVAL into position with the word. */ | |
26413 | oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift, | |
26414 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
26415 | ||
26416 | /* Shift and mask NEWVAL into position within the word. */ | |
26417 | newval = convert_modes (SImode, mode, newval, 1); | |
26418 | newval = expand_simple_binop (SImode, ASHIFT, newval, shift, | |
26419 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
26420 | } | |
26421 | ||
26422 | /* Prepare to adjust the return value. */ | |
26423 | retval = gen_reg_rtx (SImode); | |
26424 | mode = SImode; | |
26425 | } | |
26426 | else if (reg_overlap_mentioned_p (retval, oldval)) | |
26427 | oldval = copy_to_reg (oldval); | |
26428 | ||
26429 | if (mode != TImode && !reg_or_short_operand (oldval, mode)) | |
26430 | oldval = copy_to_mode_reg (mode, oldval); | |
26431 | ||
26432 | if (reg_overlap_mentioned_p (retval, newval)) | |
26433 | newval = copy_to_reg (newval); | |
26434 | ||
26435 | mem = rs6000_pre_atomic_barrier (mem, mod_s); | |
26436 | ||
26437 | label1 = NULL_RTX; | |
26438 | if (!is_weak) | |
26439 | { | |
26440 | label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ()); | |
26441 | emit_label (XEXP (label1, 0)); | |
26442 | } | |
26443 | label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ()); | |
26444 | ||
26445 | emit_load_locked (mode, retval, mem); | |
26446 | ||
26447 | x = retval; | |
26448 | if (mask) | |
26449 | x = expand_simple_binop (SImode, AND, retval, mask, | |
26450 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
26451 | ||
26452 | cond = gen_reg_rtx (CCmode); | |
26453 | /* If we have TImode, synthesize a comparison. */ | |
26454 | if (mode != TImode) | |
26455 | x = gen_rtx_COMPARE (CCmode, x, oldval); | |
26456 | else | |
26457 | { | |
26458 | rtx xor1_result = gen_reg_rtx (DImode); | |
26459 | rtx xor2_result = gen_reg_rtx (DImode); | |
26460 | rtx or_result = gen_reg_rtx (DImode); | |
26461 | rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0); | |
26462 | rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8); | |
26463 | rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0); | |
26464 | rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8); | |
26465 | ||
26466 | emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0)); | |
26467 | emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1)); | |
26468 | emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result)); | |
26469 | x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx); | |
26470 | } | |
26471 | ||
26472 | emit_insn (gen_rtx_SET (cond, x)); | |
26473 | ||
26474 | x = gen_rtx_NE (VOIDmode, cond, const0_rtx); | |
26475 | emit_unlikely_jump (x, label2); | |
26476 | ||
26477 | x = newval; | |
26478 | if (mask) | |
26479 | x = rs6000_mask_atomic_subword (retval, newval, mask); | |
26480 | ||
26481 | emit_store_conditional (orig_mode, cond, mem, x); | |
26482 | ||
26483 | if (!is_weak) | |
26484 | { | |
26485 | x = gen_rtx_NE (VOIDmode, cond, const0_rtx); | |
26486 | emit_unlikely_jump (x, label1); | |
26487 | } | |
26488 | ||
26489 | if (!is_mm_relaxed (mod_f)) | |
26490 | emit_label (XEXP (label2, 0)); | |
26491 | ||
26492 | rs6000_post_atomic_barrier (mod_s); | |
26493 | ||
26494 | if (is_mm_relaxed (mod_f)) | |
26495 | emit_label (XEXP (label2, 0)); | |
26496 | ||
26497 | if (shift) | |
26498 | rs6000_finish_atomic_subword (operands[1], retval, shift); | |
26499 | else if (mode != GET_MODE (operands[1])) | |
26500 | convert_move (operands[1], retval, 1); | |
26501 | ||
26502 | /* In all cases, CR0 contains EQ on success, and NE on failure. */ | |
26503 | x = gen_rtx_EQ (SImode, cond, const0_rtx); | |
26504 | emit_insn (gen_rtx_SET (boolval, x)); | |
26505 | } | |
26506 | ||
26507 | /* Expand an atomic exchange operation. */ | |
26508 | ||
26509 | void | |
26510 | rs6000_expand_atomic_exchange (rtx operands[]) | |
26511 | { | |
26512 | rtx retval, mem, val, cond; | |
26513 | machine_mode mode; | |
26514 | enum memmodel model; | |
26515 | rtx label, x, mask, shift; | |
26516 | ||
26517 | retval = operands[0]; | |
26518 | mem = operands[1]; | |
26519 | val = operands[2]; | |
26520 | model = memmodel_base (INTVAL (operands[3])); | |
26521 | mode = GET_MODE (mem); | |
26522 | ||
26523 | mask = shift = NULL_RTX; | |
26524 | if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode)) | |
26525 | { | |
26526 | mem = rs6000_adjust_atomic_subword (mem, &shift, &mask); | |
26527 | ||
26528 | /* Shift and mask VAL into position with the word. */ | |
26529 | val = convert_modes (SImode, mode, val, 1); | |
26530 | val = expand_simple_binop (SImode, ASHIFT, val, shift, | |
26531 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
26532 | ||
26533 | /* Prepare to adjust the return value. */ | |
26534 | retval = gen_reg_rtx (SImode); | |
26535 | mode = SImode; | |
26536 | } | |
26537 | ||
26538 | mem = rs6000_pre_atomic_barrier (mem, model); | |
26539 | ||
26540 | label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ()); | |
26541 | emit_label (XEXP (label, 0)); | |
26542 | ||
26543 | emit_load_locked (mode, retval, mem); | |
26544 | ||
26545 | x = val; | |
26546 | if (mask) | |
26547 | x = rs6000_mask_atomic_subword (retval, val, mask); | |
26548 | ||
26549 | cond = gen_reg_rtx (CCmode); | |
26550 | emit_store_conditional (mode, cond, mem, x); | |
26551 | ||
26552 | x = gen_rtx_NE (VOIDmode, cond, const0_rtx); | |
26553 | emit_unlikely_jump (x, label); | |
26554 | ||
26555 | rs6000_post_atomic_barrier (model); | |
26556 | ||
26557 | if (shift) | |
26558 | rs6000_finish_atomic_subword (operands[0], retval, shift); | |
26559 | } | |
26560 | ||
26561 | /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation | |
26562 | to perform. MEM is the memory on which to operate. VAL is the second | |
26563 | operand of the binary operator. BEFORE and AFTER are optional locations to | |
26564 | return the value of MEM either before of after the operation. MODEL_RTX | |
26565 | is a CONST_INT containing the memory model to use. */ | |
26566 | ||
26567 | void | |
26568 | rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val, | |
26569 | rtx orig_before, rtx orig_after, rtx model_rtx) | |
26570 | { | |
26571 | enum memmodel model = memmodel_base (INTVAL (model_rtx)); | |
26572 | machine_mode mode = GET_MODE (mem); | |
26573 | machine_mode store_mode = mode; | |
26574 | rtx label, x, cond, mask, shift; | |
26575 | rtx before = orig_before, after = orig_after; | |
26576 | ||
26577 | mask = shift = NULL_RTX; | |
26578 | /* On power8, we want to use SImode for the operation. On previous systems, | |
26579 | use the operation in a subword and shift/mask to get the proper byte or | |
26580 | halfword. */ | |
26581 | if (mode == QImode || mode == HImode) | |
26582 | { | |
26583 | if (TARGET_SYNC_HI_QI) | |
26584 | { | |
26585 | val = convert_modes (SImode, mode, val, 1); | |
26586 | ||
26587 | /* Prepare to adjust the return value. */ | |
26588 | before = gen_reg_rtx (SImode); | |
26589 | if (after) | |
26590 | after = gen_reg_rtx (SImode); | |
26591 | mode = SImode; | |
26592 | } | |
26593 | else | |
26594 | { | |
26595 | mem = rs6000_adjust_atomic_subword (mem, &shift, &mask); | |
26596 | ||
26597 | /* Shift and mask VAL into position with the word. */ | |
26598 | val = convert_modes (SImode, mode, val, 1); | |
26599 | val = expand_simple_binop (SImode, ASHIFT, val, shift, | |
26600 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
26601 | ||
26602 | switch (code) | |
26603 | { | |
26604 | case IOR: | |
26605 | case XOR: | |
26606 | /* We've already zero-extended VAL. That is sufficient to | |
26607 | make certain that it does not affect other bits. */ | |
26608 | mask = NULL; | |
26609 | break; | |
26610 | ||
26611 | case AND: | |
26612 | /* If we make certain that all of the other bits in VAL are | |
26613 | set, that will be sufficient to not affect other bits. */ | |
26614 | x = gen_rtx_NOT (SImode, mask); | |
26615 | x = gen_rtx_IOR (SImode, x, val); | |
26616 | emit_insn (gen_rtx_SET (val, x)); | |
26617 | mask = NULL; | |
26618 | break; | |
26619 | ||
26620 | case NOT: | |
26621 | case PLUS: | |
26622 | case MINUS: | |
26623 | /* These will all affect bits outside the field and need | |
26624 | adjustment via MASK within the loop. */ | |
26625 | break; | |
26626 | ||
26627 | default: | |
26628 | gcc_unreachable (); | |
26629 | } | |
26630 | ||
26631 | /* Prepare to adjust the return value. */ | |
26632 | before = gen_reg_rtx (SImode); | |
26633 | if (after) | |
26634 | after = gen_reg_rtx (SImode); | |
26635 | store_mode = mode = SImode; | |
26636 | } | |
26637 | } | |
26638 | ||
26639 | mem = rs6000_pre_atomic_barrier (mem, model); | |
26640 | ||
26641 | label = gen_label_rtx (); | |
26642 | emit_label (label); | |
26643 | label = gen_rtx_LABEL_REF (VOIDmode, label); | |
26644 | ||
26645 | if (before == NULL_RTX) | |
26646 | before = gen_reg_rtx (mode); | |
26647 | ||
26648 | emit_load_locked (mode, before, mem); | |
26649 | ||
26650 | if (code == NOT) | |
26651 | { | |
26652 | x = expand_simple_binop (mode, AND, before, val, | |
26653 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
26654 | after = expand_simple_unop (mode, NOT, x, after, 1); | |
26655 | } | |
26656 | else | |
26657 | { | |
26658 | after = expand_simple_binop (mode, code, before, val, | |
26659 | after, 1, OPTAB_LIB_WIDEN); | |
26660 | } | |
26661 | ||
26662 | x = after; | |
26663 | if (mask) | |
26664 | { | |
26665 | x = expand_simple_binop (SImode, AND, after, mask, | |
26666 | NULL_RTX, 1, OPTAB_LIB_WIDEN); | |
26667 | x = rs6000_mask_atomic_subword (before, x, mask); | |
26668 | } | |
26669 | else if (store_mode != mode) | |
26670 | x = convert_modes (store_mode, mode, x, 1); | |
26671 | ||
26672 | cond = gen_reg_rtx (CCmode); | |
26673 | emit_store_conditional (store_mode, cond, mem, x); | |
26674 | ||
26675 | x = gen_rtx_NE (VOIDmode, cond, const0_rtx); | |
26676 | emit_unlikely_jump (x, label); | |
26677 | ||
26678 | rs6000_post_atomic_barrier (model); | |
26679 | ||
26680 | if (shift) | |
26681 | { | |
26682 | /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and | |
26683 | then do the calcuations in a SImode register. */ | |
26684 | if (orig_before) | |
26685 | rs6000_finish_atomic_subword (orig_before, before, shift); | |
26686 | if (orig_after) | |
26687 | rs6000_finish_atomic_subword (orig_after, after, shift); | |
26688 | } | |
26689 | else if (store_mode != mode) | |
26690 | { | |
26691 | /* QImode/HImode on machines with lbarx/lharx where we do the native | |
26692 | operation and then do the calcuations in a SImode register. */ | |
26693 | if (orig_before) | |
26694 | convert_move (orig_before, before, 1); | |
26695 | if (orig_after) | |
26696 | convert_move (orig_after, after, 1); | |
26697 | } | |
26698 | else if (orig_after && after != orig_after) | |
26699 | emit_move_insn (orig_after, after); | |
26700 | } | |
26701 | ||
26702 | /* Emit instructions to move SRC to DST. Called by splitters for | |
26703 | multi-register moves. It will emit at most one instruction for | |
26704 | each register that is accessed; that is, it won't emit li/lis pairs | |
26705 | (or equivalent for 64-bit code). One of SRC or DST must be a hard | |
26706 | register. */ | |
26707 | ||
26708 | void | |
26709 | rs6000_split_multireg_move (rtx dst, rtx src) | |
26710 | { | |
26711 | /* The register number of the first register being moved. */ | |
26712 | int reg; | |
26713 | /* The mode that is to be moved. */ | |
26714 | machine_mode mode; | |
26715 | /* The mode that the move is being done in, and its size. */ | |
26716 | machine_mode reg_mode; | |
26717 | int reg_mode_size; | |
26718 | /* The number of registers that will be moved. */ | |
26719 | int nregs; | |
26720 | ||
26721 | reg = REG_P (dst) ? REGNO (dst) : REGNO (src); | |
26722 | mode = GET_MODE (dst); | |
ad474626 | 26723 | nregs = hard_regno_nregs (reg, mode); |
83349046 SB |
26724 | if (FP_REGNO_P (reg)) |
26725 | reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode : | |
26726 | ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? DFmode : SFmode); | |
26727 | else if (ALTIVEC_REGNO_P (reg)) | |
26728 | reg_mode = V16QImode; | |
26729 | else if (TARGET_E500_DOUBLE && FLOAT128_2REG_P (mode)) | |
26730 | reg_mode = DFmode; | |
26731 | else | |
26732 | reg_mode = word_mode; | |
26733 | reg_mode_size = GET_MODE_SIZE (reg_mode); | |
26734 | ||
26735 | gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode)); | |
26736 | ||
26737 | /* TDmode residing in FP registers is special, since the ISA requires that | |
26738 | the lower-numbered word of a register pair is always the most significant | |
26739 | word, even in little-endian mode. This does not match the usual subreg | |
26740 | semantics, so we cannnot use simplify_gen_subreg in those cases. Access | |
26741 | the appropriate constituent registers "by hand" in little-endian mode. | |
26742 | ||
26743 | Note we do not need to check for destructive overlap here since TDmode | |
26744 | can only reside in even/odd register pairs. */ | |
26745 | if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN) | |
26746 | { | |
26747 | rtx p_src, p_dst; | |
26748 | int i; | |
26749 | ||
26750 | for (i = 0; i < nregs; i++) | |
26751 | { | |
26752 | if (REG_P (src) && FP_REGNO_P (REGNO (src))) | |
26753 | p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i); | |
26754 | else | |
26755 | p_src = simplify_gen_subreg (reg_mode, src, mode, | |
26756 | i * reg_mode_size); | |
26757 | ||
26758 | if (REG_P (dst) && FP_REGNO_P (REGNO (dst))) | |
26759 | p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i); | |
26760 | else | |
26761 | p_dst = simplify_gen_subreg (reg_mode, dst, mode, | |
26762 | i * reg_mode_size); | |
26763 | ||
26764 | emit_insn (gen_rtx_SET (p_dst, p_src)); | |
26765 | } | |
26766 | ||
26767 | return; | |
26768 | } | |
26769 | ||
26770 | if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst))) | |
26771 | { | |
26772 | /* Move register range backwards, if we might have destructive | |
26773 | overlap. */ | |
26774 | int i; | |
26775 | for (i = nregs - 1; i >= 0; i--) | |
26776 | emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode, | |
26777 | i * reg_mode_size), | |
26778 | simplify_gen_subreg (reg_mode, src, mode, | |
26779 | i * reg_mode_size))); | |
26780 | } | |
26781 | else | |
26782 | { | |
26783 | int i; | |
26784 | int j = -1; | |
26785 | bool used_update = false; | |
26786 | rtx restore_basereg = NULL_RTX; | |
26787 | ||
26788 | if (MEM_P (src) && INT_REGNO_P (reg)) | |
26789 | { | |
26790 | rtx breg; | |
26791 | ||
26792 | if (GET_CODE (XEXP (src, 0)) == PRE_INC | |
26793 | || GET_CODE (XEXP (src, 0)) == PRE_DEC) | |
26794 | { | |
26795 | rtx delta_rtx; | |
26796 | breg = XEXP (XEXP (src, 0), 0); | |
26797 | delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC | |
26798 | ? GEN_INT (GET_MODE_SIZE (GET_MODE (src))) | |
26799 | : GEN_INT (-GET_MODE_SIZE (GET_MODE (src)))); | |
26800 | emit_insn (gen_add3_insn (breg, breg, delta_rtx)); | |
26801 | src = replace_equiv_address (src, breg); | |
26802 | } | |
26803 | else if (! rs6000_offsettable_memref_p (src, reg_mode)) | |
26804 | { | |
26805 | if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY) | |
26806 | { | |
26807 | rtx basereg = XEXP (XEXP (src, 0), 0); | |
26808 | if (TARGET_UPDATE) | |
26809 | { | |
26810 | rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0); | |
26811 | emit_insn (gen_rtx_SET (ndst, | |
26812 | gen_rtx_MEM (reg_mode, | |
26813 | XEXP (src, 0)))); | |
26814 | used_update = true; | |
26815 | } | |
26816 | else | |
26817 | emit_insn (gen_rtx_SET (basereg, | |
26818 | XEXP (XEXP (src, 0), 1))); | |
26819 | src = replace_equiv_address (src, basereg); | |
26820 | } | |
26821 | else | |
26822 | { | |
26823 | rtx basereg = gen_rtx_REG (Pmode, reg); | |
26824 | emit_insn (gen_rtx_SET (basereg, XEXP (src, 0))); | |
26825 | src = replace_equiv_address (src, basereg); | |
26826 | } | |
26827 | } | |
26828 | ||
26829 | breg = XEXP (src, 0); | |
26830 | if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM) | |
26831 | breg = XEXP (breg, 0); | |
26832 | ||
26833 | /* If the base register we are using to address memory is | |
26834 | also a destination reg, then change that register last. */ | |
26835 | if (REG_P (breg) | |
26836 | && REGNO (breg) >= REGNO (dst) | |
26837 | && REGNO (breg) < REGNO (dst) + nregs) | |
26838 | j = REGNO (breg) - REGNO (dst); | |
26839 | } | |
26840 | else if (MEM_P (dst) && INT_REGNO_P (reg)) | |
26841 | { | |
26842 | rtx breg; | |
26843 | ||
26844 | if (GET_CODE (XEXP (dst, 0)) == PRE_INC | |
26845 | || GET_CODE (XEXP (dst, 0)) == PRE_DEC) | |
26846 | { | |
26847 | rtx delta_rtx; | |
26848 | breg = XEXP (XEXP (dst, 0), 0); | |
26849 | delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC | |
26850 | ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst))) | |
26851 | : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst)))); | |
26852 | ||
26853 | /* We have to update the breg before doing the store. | |
26854 | Use store with update, if available. */ | |
26855 | ||
26856 | if (TARGET_UPDATE) | |
26857 | { | |
26858 | rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0); | |
26859 | emit_insn (TARGET_32BIT | |
26860 | ? (TARGET_POWERPC64 | |
26861 | ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc) | |
26862 | : gen_movsi_update (breg, breg, delta_rtx, nsrc)) | |
26863 | : gen_movdi_di_update (breg, breg, delta_rtx, nsrc)); | |
26864 | used_update = true; | |
26865 | } | |
26866 | else | |
26867 | emit_insn (gen_add3_insn (breg, breg, delta_rtx)); | |
26868 | dst = replace_equiv_address (dst, breg); | |
26869 | } | |
26870 | else if (!rs6000_offsettable_memref_p (dst, reg_mode) | |
26871 | && GET_CODE (XEXP (dst, 0)) != LO_SUM) | |
26872 | { | |
26873 | if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY) | |
26874 | { | |
26875 | rtx basereg = XEXP (XEXP (dst, 0), 0); | |
26876 | if (TARGET_UPDATE) | |
26877 | { | |
26878 | rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0); | |
26879 | emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode, | |
26880 | XEXP (dst, 0)), | |
26881 | nsrc)); | |
26882 | used_update = true; | |
26883 | } | |
26884 | else | |
26885 | emit_insn (gen_rtx_SET (basereg, | |
26886 | XEXP (XEXP (dst, 0), 1))); | |
26887 | dst = replace_equiv_address (dst, basereg); | |
26888 | } | |
26889 | else | |
26890 | { | |
26891 | rtx basereg = XEXP (XEXP (dst, 0), 0); | |
26892 | rtx offsetreg = XEXP (XEXP (dst, 0), 1); | |
26893 | gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS | |
26894 | && REG_P (basereg) | |
26895 | && REG_P (offsetreg) | |
26896 | && REGNO (basereg) != REGNO (offsetreg)); | |
26897 | if (REGNO (basereg) == 0) | |
26898 | { | |
26899 | rtx tmp = offsetreg; | |
26900 | offsetreg = basereg; | |
26901 | basereg = tmp; | |
26902 | } | |
26903 | emit_insn (gen_add3_insn (basereg, basereg, offsetreg)); | |
26904 | restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg); | |
26905 | dst = replace_equiv_address (dst, basereg); | |
26906 | } | |
26907 | } | |
26908 | else if (GET_CODE (XEXP (dst, 0)) != LO_SUM) | |
26909 | gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode)); | |
26910 | } | |
26911 | ||
26912 | for (i = 0; i < nregs; i++) | |
26913 | { | |
26914 | /* Calculate index to next subword. */ | |
26915 | ++j; | |
26916 | if (j == nregs) | |
26917 | j = 0; | |
26918 | ||
26919 | /* If compiler already emitted move of first word by | |
26920 | store with update, no need to do anything. */ | |
26921 | if (j == 0 && used_update) | |
26922 | continue; | |
26923 | ||
26924 | emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode, | |
26925 | j * reg_mode_size), | |
26926 | simplify_gen_subreg (reg_mode, src, mode, | |
26927 | j * reg_mode_size))); | |
26928 | } | |
26929 | if (restore_basereg != NULL_RTX) | |
26930 | emit_insn (restore_basereg); | |
26931 | } | |
26932 | } | |
26933 | ||
26934 | \f | |
26935 | /* This page contains routines that are used to determine what the | |
26936 | function prologue and epilogue code will do and write them out. */ | |
26937 | ||
26938 | static inline bool | |
26939 | save_reg_p (int r) | |
26940 | { | |
26941 | return !call_used_regs[r] && df_regs_ever_live_p (r); | |
26942 | } | |
26943 | ||
26944 | /* Determine whether the gp REG is really used. */ | |
26945 | ||
26946 | static bool | |
26947 | rs6000_reg_live_or_pic_offset_p (int reg) | |
26948 | { | |
26949 | /* We need to mark the PIC offset register live for the same conditions | |
26950 | as it is set up, or otherwise it won't be saved before we clobber it. */ | |
26951 | ||
26952 | if (reg == RS6000_PIC_OFFSET_TABLE_REGNUM && !TARGET_SINGLE_PIC_BASE) | |
26953 | { | |
26954 | if (TARGET_TOC && TARGET_MINIMAL_TOC | |
26955 | && (crtl->calls_eh_return | |
26956 | || df_regs_ever_live_p (reg) | |
26957 | || !constant_pool_empty_p ())) | |
26958 | return true; | |
26959 | ||
26960 | if ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN) | |
26961 | && flag_pic) | |
26962 | return true; | |
26963 | } | |
26964 | ||
26965 | /* If the function calls eh_return, claim used all the registers that would | |
26966 | be checked for liveness otherwise. */ | |
26967 | ||
26968 | return ((crtl->calls_eh_return || df_regs_ever_live_p (reg)) | |
26969 | && !call_used_regs[reg]); | |
26970 | } | |
26971 | ||
26972 | /* Return the first fixed-point register that is required to be | |
26973 | saved. 32 if none. */ | |
26974 | ||
26975 | int | |
26976 | first_reg_to_save (void) | |
26977 | { | |
26978 | int first_reg; | |
26979 | ||
26980 | /* Find lowest numbered live register. */ | |
26981 | for (first_reg = 13; first_reg <= 31; first_reg++) | |
26982 | if (save_reg_p (first_reg)) | |
26983 | break; | |
26984 | ||
26985 | if (first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM | |
26986 | && ((DEFAULT_ABI == ABI_V4 && flag_pic != 0) | |
26987 | || (DEFAULT_ABI == ABI_DARWIN && flag_pic) | |
26988 | || (TARGET_TOC && TARGET_MINIMAL_TOC)) | |
26989 | && rs6000_reg_live_or_pic_offset_p (RS6000_PIC_OFFSET_TABLE_REGNUM)) | |
26990 | first_reg = RS6000_PIC_OFFSET_TABLE_REGNUM; | |
26991 | ||
26992 | #if TARGET_MACHO | |
26993 | if (flag_pic | |
26994 | && crtl->uses_pic_offset_table | |
26995 | && first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM) | |
26996 | return RS6000_PIC_OFFSET_TABLE_REGNUM; | |
26997 | #endif | |
26998 | ||
26999 | return first_reg; | |
27000 | } | |
27001 | ||
27002 | /* Similar, for FP regs. */ | |
27003 | ||
27004 | int | |
27005 | first_fp_reg_to_save (void) | |
27006 | { | |
27007 | int first_reg; | |
27008 | ||
27009 | /* Find lowest numbered live register. */ | |
27010 | for (first_reg = 14 + 32; first_reg <= 63; first_reg++) | |
27011 | if (save_reg_p (first_reg)) | |
27012 | break; | |
27013 | ||
27014 | return first_reg; | |
27015 | } | |
27016 | ||
27017 | /* Similar, for AltiVec regs. */ | |
27018 | ||
27019 | static int | |
27020 | first_altivec_reg_to_save (void) | |
27021 | { | |
27022 | int i; | |
27023 | ||
27024 | /* Stack frame remains as is unless we are in AltiVec ABI. */ | |
27025 | if (! TARGET_ALTIVEC_ABI) | |
27026 | return LAST_ALTIVEC_REGNO + 1; | |
27027 | ||
27028 | /* On Darwin, the unwind routines are compiled without | |
27029 | TARGET_ALTIVEC, and use save_world to save/restore the | |
27030 | altivec registers when necessary. */ | |
27031 | if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return | |
27032 | && ! TARGET_ALTIVEC) | |
27033 | return FIRST_ALTIVEC_REGNO + 20; | |
27034 | ||
27035 | /* Find lowest numbered live register. */ | |
27036 | for (i = FIRST_ALTIVEC_REGNO + 20; i <= LAST_ALTIVEC_REGNO; ++i) | |
27037 | if (save_reg_p (i)) | |
27038 | break; | |
27039 | ||
27040 | return i; | |
27041 | } | |
27042 | ||
27043 | /* Return a 32-bit mask of the AltiVec registers we need to set in | |
27044 | VRSAVE. Bit n of the return value is 1 if Vn is live. The MSB in | |
27045 | the 32-bit word is 0. */ | |
27046 | ||
27047 | static unsigned int | |
27048 | compute_vrsave_mask (void) | |
27049 | { | |
27050 | unsigned int i, mask = 0; | |
27051 | ||
27052 | /* On Darwin, the unwind routines are compiled without | |
27053 | TARGET_ALTIVEC, and use save_world to save/restore the | |
27054 | call-saved altivec registers when necessary. */ | |
27055 | if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return | |
27056 | && ! TARGET_ALTIVEC) | |
27057 | mask |= 0xFFF; | |
27058 | ||
27059 | /* First, find out if we use _any_ altivec registers. */ | |
27060 | for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i) | |
27061 | if (df_regs_ever_live_p (i)) | |
27062 | mask |= ALTIVEC_REG_BIT (i); | |
27063 | ||
27064 | if (mask == 0) | |
27065 | return mask; | |
27066 | ||
27067 | /* Next, remove the argument registers from the set. These must | |
27068 | be in the VRSAVE mask set by the caller, so we don't need to add | |
27069 | them in again. More importantly, the mask we compute here is | |
27070 | used to generate CLOBBERs in the set_vrsave insn, and we do not | |
27071 | wish the argument registers to die. */ | |
27072 | for (i = ALTIVEC_ARG_MIN_REG; i < (unsigned) crtl->args.info.vregno; i++) | |
27073 | mask &= ~ALTIVEC_REG_BIT (i); | |
27074 | ||
27075 | /* Similarly, remove the return value from the set. */ | |
27076 | { | |
27077 | bool yes = false; | |
27078 | diddle_return_value (is_altivec_return_reg, &yes); | |
27079 | if (yes) | |
27080 | mask &= ~ALTIVEC_REG_BIT (ALTIVEC_ARG_RETURN); | |
27081 | } | |
27082 | ||
27083 | return mask; | |
27084 | } | |
27085 | ||
27086 | /* For a very restricted set of circumstances, we can cut down the | |
27087 | size of prologues/epilogues by calling our own save/restore-the-world | |
27088 | routines. */ | |
27089 | ||
27090 | static void | |
27091 | compute_save_world_info (rs6000_stack_t *info) | |
27092 | { | |
27093 | info->world_save_p = 1; | |
27094 | info->world_save_p | |
27095 | = (WORLD_SAVE_P (info) | |
27096 | && DEFAULT_ABI == ABI_DARWIN | |
27097 | && !cfun->has_nonlocal_label | |
27098 | && info->first_fp_reg_save == FIRST_SAVED_FP_REGNO | |
27099 | && info->first_gp_reg_save == FIRST_SAVED_GP_REGNO | |
27100 | && info->first_altivec_reg_save == FIRST_SAVED_ALTIVEC_REGNO | |
27101 | && info->cr_save_p); | |
27102 | ||
27103 | /* This will not work in conjunction with sibcalls. Make sure there | |
27104 | are none. (This check is expensive, but seldom executed.) */ | |
27105 | if (WORLD_SAVE_P (info)) | |
27106 | { | |
27107 | rtx_insn *insn; | |
27108 | for (insn = get_last_insn_anywhere (); insn; insn = PREV_INSN (insn)) | |
27109 | if (CALL_P (insn) && SIBLING_CALL_P (insn)) | |
27110 | { | |
27111 | info->world_save_p = 0; | |
27112 | break; | |
27113 | } | |
27114 | } | |
27115 | ||
27116 | if (WORLD_SAVE_P (info)) | |
27117 | { | |
27118 | /* Even if we're not touching VRsave, make sure there's room on the | |
27119 | stack for it, if it looks like we're calling SAVE_WORLD, which | |
27120 | will attempt to save it. */ | |
27121 | info->vrsave_size = 4; | |
27122 | ||
27123 | /* If we are going to save the world, we need to save the link register too. */ | |
27124 | info->lr_save_p = 1; | |
27125 | ||
27126 | /* "Save" the VRsave register too if we're saving the world. */ | |
27127 | if (info->vrsave_mask == 0) | |
27128 | info->vrsave_mask = compute_vrsave_mask (); | |
27129 | ||
27130 | /* Because the Darwin register save/restore routines only handle | |
27131 | F14 .. F31 and V20 .. V31 as per the ABI, perform a consistency | |
27132 | check. */ | |
27133 | gcc_assert (info->first_fp_reg_save >= FIRST_SAVED_FP_REGNO | |
27134 | && (info->first_altivec_reg_save | |
27135 | >= FIRST_SAVED_ALTIVEC_REGNO)); | |
27136 | } | |
27137 | ||
27138 | return; | |
27139 | } | |
27140 | ||
27141 | ||
27142 | static void | |
27143 | is_altivec_return_reg (rtx reg, void *xyes) | |
27144 | { | |
27145 | bool *yes = (bool *) xyes; | |
27146 | if (REGNO (reg) == ALTIVEC_ARG_RETURN) | |
27147 | *yes = true; | |
27148 | } | |
27149 | ||
27150 | \f | |
27151 | /* Return whether REG is a global user reg or has been specifed by | |
27152 | -ffixed-REG. We should not restore these, and so cannot use | |
27153 | lmw or out-of-line restore functions if there are any. We also | |
27154 | can't save them (well, emit frame notes for them), because frame | |
27155 | unwinding during exception handling will restore saved registers. */ | |
27156 | ||
27157 | static bool | |
27158 | fixed_reg_p (int reg) | |
27159 | { | |
27160 | /* Ignore fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] when the | |
27161 | backend sets it, overriding anything the user might have given. */ | |
27162 | if (reg == RS6000_PIC_OFFSET_TABLE_REGNUM | |
27163 | && ((DEFAULT_ABI == ABI_V4 && flag_pic) | |
27164 | || (DEFAULT_ABI == ABI_DARWIN && flag_pic) | |
27165 | || (TARGET_TOC && TARGET_MINIMAL_TOC))) | |
27166 | return false; | |
27167 | ||
27168 | return fixed_regs[reg]; | |
27169 | } | |
27170 | ||
27171 | /* Determine the strategy for savings/restoring registers. */ | |
27172 | ||
27173 | enum { | |
27174 | SAVE_MULTIPLE = 0x1, | |
27175 | SAVE_INLINE_GPRS = 0x2, | |
27176 | SAVE_INLINE_FPRS = 0x4, | |
27177 | SAVE_NOINLINE_GPRS_SAVES_LR = 0x8, | |
27178 | SAVE_NOINLINE_FPRS_SAVES_LR = 0x10, | |
27179 | SAVE_INLINE_VRS = 0x20, | |
27180 | REST_MULTIPLE = 0x100, | |
27181 | REST_INLINE_GPRS = 0x200, | |
27182 | REST_INLINE_FPRS = 0x400, | |
27183 | REST_NOINLINE_FPRS_DOESNT_RESTORE_LR = 0x800, | |
27184 | REST_INLINE_VRS = 0x1000 | |
27185 | }; | |
27186 | ||
27187 | static int | |
27188 | rs6000_savres_strategy (rs6000_stack_t *info, | |
27189 | bool using_static_chain_p) | |
27190 | { | |
27191 | int strategy = 0; | |
27192 | ||
27193 | /* Select between in-line and out-of-line save and restore of regs. | |
27194 | First, all the obvious cases where we don't use out-of-line. */ | |
27195 | if (crtl->calls_eh_return | |
27196 | || cfun->machine->ra_need_lr) | |
27197 | strategy |= (SAVE_INLINE_FPRS | REST_INLINE_FPRS | |
27198 | | SAVE_INLINE_GPRS | REST_INLINE_GPRS | |
27199 | | SAVE_INLINE_VRS | REST_INLINE_VRS); | |
27200 | ||
27201 | if (info->first_gp_reg_save == 32) | |
27202 | strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS; | |
27203 | ||
27204 | if (info->first_fp_reg_save == 64 | |
27205 | /* The out-of-line FP routines use double-precision stores; | |
27206 | we can't use those routines if we don't have such stores. */ | |
27207 | || (TARGET_HARD_FLOAT && !TARGET_DOUBLE_FLOAT)) | |
27208 | strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS; | |
27209 | ||
27210 | if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1) | |
27211 | strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS; | |
27212 | ||
27213 | /* Define cutoff for using out-of-line functions to save registers. */ | |
27214 | if (DEFAULT_ABI == ABI_V4 || TARGET_ELF) | |
27215 | { | |
27216 | if (!optimize_size) | |
27217 | { | |
27218 | strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS; | |
27219 | strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS; | |
27220 | strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS; | |
27221 | } | |
27222 | else | |
27223 | { | |
27224 | /* Prefer out-of-line restore if it will exit. */ | |
27225 | if (info->first_fp_reg_save > 61) | |
27226 | strategy |= SAVE_INLINE_FPRS; | |
27227 | if (info->first_gp_reg_save > 29) | |
27228 | { | |
27229 | if (info->first_fp_reg_save == 64) | |
27230 | strategy |= SAVE_INLINE_GPRS; | |
27231 | else | |
27232 | strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS; | |
27233 | } | |
27234 | if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO) | |
27235 | strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS; | |
27236 | } | |
27237 | } | |
27238 | else if (DEFAULT_ABI == ABI_DARWIN) | |
27239 | { | |
27240 | if (info->first_fp_reg_save > 60) | |
27241 | strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS; | |
27242 | if (info->first_gp_reg_save > 29) | |
27243 | strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS; | |
27244 | strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS; | |
27245 | } | |
27246 | else | |
27247 | { | |
27248 | gcc_checking_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2); | |
27249 | if ((flag_shrink_wrap_separate && optimize_function_for_speed_p (cfun)) | |
27250 | || info->first_fp_reg_save > 61) | |
27251 | strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS; | |
27252 | strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS; | |
27253 | strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS; | |
27254 | } | |
27255 | ||
27256 | /* Don't bother to try to save things out-of-line if r11 is occupied | |
27257 | by the static chain. It would require too much fiddling and the | |
27258 | static chain is rarely used anyway. FPRs are saved w.r.t the stack | |
27259 | pointer on Darwin, and AIX uses r1 or r12. */ | |
27260 | if (using_static_chain_p | |
27261 | && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)) | |
27262 | strategy |= ((DEFAULT_ABI == ABI_DARWIN ? 0 : SAVE_INLINE_FPRS) | |
27263 | | SAVE_INLINE_GPRS | |
27264 | | SAVE_INLINE_VRS); | |
27265 | ||
27266 | /* Saving CR interferes with the exit routines used on the SPE, so | |
27267 | just punt here. */ | |
27268 | if (TARGET_SPE_ABI | |
27269 | && info->spe_64bit_regs_used | |
27270 | && info->cr_save_p) | |
27271 | strategy |= REST_INLINE_GPRS; | |
27272 | ||
27273 | /* We can only use the out-of-line routines to restore fprs if we've | |
27274 | saved all the registers from first_fp_reg_save in the prologue. | |
27275 | Otherwise, we risk loading garbage. Of course, if we have saved | |
27276 | out-of-line then we know we haven't skipped any fprs. */ | |
27277 | if ((strategy & SAVE_INLINE_FPRS) | |
27278 | && !(strategy & REST_INLINE_FPRS)) | |
27279 | { | |
27280 | int i; | |
27281 | ||
27282 | for (i = info->first_fp_reg_save; i < 64; i++) | |
27283 | if (fixed_regs[i] || !save_reg_p (i)) | |
27284 | { | |
27285 | strategy |= REST_INLINE_FPRS; | |
27286 | break; | |
27287 | } | |
27288 | } | |
27289 | ||
27290 | /* Similarly, for altivec regs. */ | |
27291 | if ((strategy & SAVE_INLINE_VRS) | |
27292 | && !(strategy & REST_INLINE_VRS)) | |
27293 | { | |
27294 | int i; | |
27295 | ||
27296 | for (i = info->first_altivec_reg_save; i < LAST_ALTIVEC_REGNO + 1; i++) | |
27297 | if (fixed_regs[i] || !save_reg_p (i)) | |
27298 | { | |
27299 | strategy |= REST_INLINE_VRS; | |
27300 | break; | |
27301 | } | |
27302 | } | |
27303 | ||
27304 | /* info->lr_save_p isn't yet set if the only reason lr needs to be | |
27305 | saved is an out-of-line save or restore. Set up the value for | |
27306 | the next test (excluding out-of-line gprs). */ | |
27307 | bool lr_save_p = (info->lr_save_p | |
27308 | || !(strategy & SAVE_INLINE_FPRS) | |
27309 | || !(strategy & SAVE_INLINE_VRS) | |
27310 | || !(strategy & REST_INLINE_FPRS) | |
27311 | || !(strategy & REST_INLINE_VRS)); | |
27312 | ||
27313 | if (TARGET_MULTIPLE | |
27314 | && !TARGET_POWERPC64 | |
27315 | && !(TARGET_SPE_ABI && info->spe_64bit_regs_used) | |
27316 | && info->first_gp_reg_save < 31 | |
27317 | && !(flag_shrink_wrap | |
27318 | && flag_shrink_wrap_separate | |
27319 | && optimize_function_for_speed_p (cfun))) | |
27320 | { | |
27321 | /* Prefer store multiple for saves over out-of-line routines, | |
27322 | since the store-multiple instruction will always be smaller. */ | |
27323 | strategy |= SAVE_INLINE_GPRS | SAVE_MULTIPLE; | |
27324 | ||
27325 | /* The situation is more complicated with load multiple. We'd | |
27326 | prefer to use the out-of-line routines for restores, since the | |
27327 | "exit" out-of-line routines can handle the restore of LR and the | |
27328 | frame teardown. However if doesn't make sense to use the | |
27329 | out-of-line routine if that is the only reason we'd need to save | |
27330 | LR, and we can't use the "exit" out-of-line gpr restore if we | |
27331 | have saved some fprs; In those cases it is advantageous to use | |
27332 | load multiple when available. */ | |
27333 | if (info->first_fp_reg_save != 64 || !lr_save_p) | |
27334 | strategy |= REST_INLINE_GPRS | REST_MULTIPLE; | |
27335 | } | |
27336 | ||
27337 | /* Using the "exit" out-of-line routine does not improve code size | |
27338 | if using it would require lr to be saved and if only saving one | |
27339 | or two gprs. */ | |
27340 | else if (!lr_save_p && info->first_gp_reg_save > 29) | |
27341 | strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS; | |
27342 | ||
27343 | /* We can only use load multiple or the out-of-line routines to | |
27344 | restore gprs if we've saved all the registers from | |
27345 | first_gp_reg_save. Otherwise, we risk loading garbage. | |
27346 | Of course, if we have saved out-of-line or used stmw then we know | |
27347 | we haven't skipped any gprs. */ | |
27348 | if ((strategy & (SAVE_INLINE_GPRS | SAVE_MULTIPLE)) == SAVE_INLINE_GPRS | |
27349 | && (strategy & (REST_INLINE_GPRS | REST_MULTIPLE)) != REST_INLINE_GPRS) | |
27350 | { | |
27351 | int i; | |
27352 | ||
27353 | for (i = info->first_gp_reg_save; i < 32; i++) | |
27354 | if (fixed_reg_p (i) || !save_reg_p (i)) | |
27355 | { | |
27356 | strategy |= REST_INLINE_GPRS; | |
27357 | strategy &= ~REST_MULTIPLE; | |
27358 | break; | |
27359 | } | |
27360 | } | |
27361 | ||
27362 | if (TARGET_ELF && TARGET_64BIT) | |
27363 | { | |
27364 | if (!(strategy & SAVE_INLINE_FPRS)) | |
27365 | strategy |= SAVE_NOINLINE_FPRS_SAVES_LR; | |
27366 | else if (!(strategy & SAVE_INLINE_GPRS) | |
27367 | && info->first_fp_reg_save == 64) | |
27368 | strategy |= SAVE_NOINLINE_GPRS_SAVES_LR; | |
27369 | } | |
27370 | else if (TARGET_AIX && !(strategy & REST_INLINE_FPRS)) | |
27371 | strategy |= REST_NOINLINE_FPRS_DOESNT_RESTORE_LR; | |
27372 | ||
27373 | if (TARGET_MACHO && !(strategy & SAVE_INLINE_FPRS)) | |
27374 | strategy |= SAVE_NOINLINE_FPRS_SAVES_LR; | |
27375 | ||
27376 | return strategy; | |
27377 | } | |
27378 | ||
27379 | /* Calculate the stack information for the current function. This is | |
27380 | complicated by having two separate calling sequences, the AIX calling | |
27381 | sequence and the V.4 calling sequence. | |
27382 | ||
27383 | AIX (and Darwin/Mac OS X) stack frames look like: | |
27384 | 32-bit 64-bit | |
27385 | SP----> +---------------------------------------+ | |
27386 | | back chain to caller | 0 0 | |
27387 | +---------------------------------------+ | |
27388 | | saved CR | 4 8 (8-11) | |
27389 | +---------------------------------------+ | |
27390 | | saved LR | 8 16 | |
27391 | +---------------------------------------+ | |
27392 | | reserved for compilers | 12 24 | |
27393 | +---------------------------------------+ | |
27394 | | reserved for binders | 16 32 | |
27395 | +---------------------------------------+ | |
27396 | | saved TOC pointer | 20 40 | |
27397 | +---------------------------------------+ | |
27398 | | Parameter save area (+padding*) (P) | 24 48 | |
27399 | +---------------------------------------+ | |
27400 | | Alloca space (A) | 24+P etc. | |
27401 | +---------------------------------------+ | |
27402 | | Local variable space (L) | 24+P+A | |
27403 | +---------------------------------------+ | |
27404 | | Float/int conversion temporary (X) | 24+P+A+L | |
27405 | +---------------------------------------+ | |
27406 | | Save area for AltiVec registers (W) | 24+P+A+L+X | |
27407 | +---------------------------------------+ | |
27408 | | AltiVec alignment padding (Y) | 24+P+A+L+X+W | |
27409 | +---------------------------------------+ | |
27410 | | Save area for VRSAVE register (Z) | 24+P+A+L+X+W+Y | |
27411 | +---------------------------------------+ | |
27412 | | Save area for GP registers (G) | 24+P+A+X+L+X+W+Y+Z | |
27413 | +---------------------------------------+ | |
27414 | | Save area for FP registers (F) | 24+P+A+X+L+X+W+Y+Z+G | |
27415 | +---------------------------------------+ | |
27416 | old SP->| back chain to caller's caller | | |
27417 | +---------------------------------------+ | |
27418 | ||
27419 | * If the alloca area is present, the parameter save area is | |
27420 | padded so that the former starts 16-byte aligned. | |
27421 | ||
27422 | The required alignment for AIX configurations is two words (i.e., 8 | |
27423 | or 16 bytes). | |
27424 | ||
27425 | The ELFv2 ABI is a variant of the AIX ABI. Stack frames look like: | |
27426 | ||
27427 | SP----> +---------------------------------------+ | |
27428 | | Back chain to caller | 0 | |
27429 | +---------------------------------------+ | |
27430 | | Save area for CR | 8 | |
27431 | +---------------------------------------+ | |
27432 | | Saved LR | 16 | |
27433 | +---------------------------------------+ | |
27434 | | Saved TOC pointer | 24 | |
27435 | +---------------------------------------+ | |
27436 | | Parameter save area (+padding*) (P) | 32 | |
27437 | +---------------------------------------+ | |
27438 | | Alloca space (A) | 32+P | |
27439 | +---------------------------------------+ | |
27440 | | Local variable space (L) | 32+P+A | |
27441 | +---------------------------------------+ | |
27442 | | Save area for AltiVec registers (W) | 32+P+A+L | |
27443 | +---------------------------------------+ | |
27444 | | AltiVec alignment padding (Y) | 32+P+A+L+W | |
27445 | +---------------------------------------+ | |
27446 | | Save area for GP registers (G) | 32+P+A+L+W+Y | |
27447 | +---------------------------------------+ | |
27448 | | Save area for FP registers (F) | 32+P+A+L+W+Y+G | |
27449 | +---------------------------------------+ | |
27450 | old SP->| back chain to caller's caller | 32+P+A+L+W+Y+G+F | |
27451 | +---------------------------------------+ | |
27452 | ||
27453 | * If the alloca area is present, the parameter save area is | |
27454 | padded so that the former starts 16-byte aligned. | |
27455 | ||
27456 | V.4 stack frames look like: | |
27457 | ||
27458 | SP----> +---------------------------------------+ | |
27459 | | back chain to caller | 0 | |
27460 | +---------------------------------------+ | |
27461 | | caller's saved LR | 4 | |
27462 | +---------------------------------------+ | |
27463 | | Parameter save area (+padding*) (P) | 8 | |
27464 | +---------------------------------------+ | |
27465 | | Alloca space (A) | 8+P | |
27466 | +---------------------------------------+ | |
27467 | | Varargs save area (V) | 8+P+A | |
27468 | +---------------------------------------+ | |
27469 | | Local variable space (L) | 8+P+A+V | |
27470 | +---------------------------------------+ | |
27471 | | Float/int conversion temporary (X) | 8+P+A+V+L | |
27472 | +---------------------------------------+ | |
27473 | | Save area for AltiVec registers (W) | 8+P+A+V+L+X | |
27474 | +---------------------------------------+ | |
27475 | | AltiVec alignment padding (Y) | 8+P+A+V+L+X+W | |
27476 | +---------------------------------------+ | |
27477 | | Save area for VRSAVE register (Z) | 8+P+A+V+L+X+W+Y | |
27478 | +---------------------------------------+ | |
27479 | | SPE: area for 64-bit GP registers | | |
27480 | +---------------------------------------+ | |
27481 | | SPE alignment padding | | |
27482 | +---------------------------------------+ | |
27483 | | saved CR (C) | 8+P+A+V+L+X+W+Y+Z | |
27484 | +---------------------------------------+ | |
27485 | | Save area for GP registers (G) | 8+P+A+V+L+X+W+Y+Z+C | |
27486 | +---------------------------------------+ | |
27487 | | Save area for FP registers (F) | 8+P+A+V+L+X+W+Y+Z+C+G | |
27488 | +---------------------------------------+ | |
27489 | old SP->| back chain to caller's caller | | |
27490 | +---------------------------------------+ | |
27491 | ||
27492 | * If the alloca area is present and the required alignment is | |
27493 | 16 bytes, the parameter save area is padded so that the | |
27494 | alloca area starts 16-byte aligned. | |
27495 | ||
27496 | The required alignment for V.4 is 16 bytes, or 8 bytes if -meabi is | |
27497 | given. (But note below and in sysv4.h that we require only 8 and | |
27498 | may round up the size of our stack frame anyways. The historical | |
27499 | reason is early versions of powerpc-linux which didn't properly | |
27500 | align the stack at program startup. A happy side-effect is that | |
27501 | -mno-eabi libraries can be used with -meabi programs.) | |
27502 | ||
27503 | The EABI configuration defaults to the V.4 layout. However, | |
27504 | the stack alignment requirements may differ. If -mno-eabi is not | |
27505 | given, the required stack alignment is 8 bytes; if -mno-eabi is | |
27506 | given, the required alignment is 16 bytes. (But see V.4 comment | |
27507 | above.) */ | |
27508 | ||
27509 | #ifndef ABI_STACK_BOUNDARY | |
27510 | #define ABI_STACK_BOUNDARY STACK_BOUNDARY | |
27511 | #endif | |
27512 | ||
27513 | static rs6000_stack_t * | |
27514 | rs6000_stack_info (void) | |
27515 | { | |
27516 | /* We should never be called for thunks, we are not set up for that. */ | |
27517 | gcc_assert (!cfun->is_thunk); | |
27518 | ||
27519 | rs6000_stack_t *info = &stack_info; | |
27520 | int reg_size = TARGET_32BIT ? 4 : 8; | |
27521 | int ehrd_size; | |
27522 | int ehcr_size; | |
27523 | int save_align; | |
27524 | int first_gp; | |
27525 | HOST_WIDE_INT non_fixed_size; | |
27526 | bool using_static_chain_p; | |
27527 | ||
27528 | if (reload_completed && info->reload_completed) | |
27529 | return info; | |
27530 | ||
27531 | memset (info, 0, sizeof (*info)); | |
27532 | info->reload_completed = reload_completed; | |
27533 | ||
27534 | if (TARGET_SPE) | |
27535 | { | |
27536 | /* Cache value so we don't rescan instruction chain over and over. */ | |
27537 | if (cfun->machine->spe_insn_chain_scanned_p == 0) | |
27538 | cfun->machine->spe_insn_chain_scanned_p | |
27539 | = spe_func_has_64bit_regs_p () + 1; | |
27540 | info->spe_64bit_regs_used = cfun->machine->spe_insn_chain_scanned_p - 1; | |
27541 | } | |
27542 | ||
27543 | /* Select which calling sequence. */ | |
27544 | info->abi = DEFAULT_ABI; | |
27545 | ||
27546 | /* Calculate which registers need to be saved & save area size. */ | |
27547 | info->first_gp_reg_save = first_reg_to_save (); | |
27548 | /* Assume that we will have to save RS6000_PIC_OFFSET_TABLE_REGNUM, | |
27549 | even if it currently looks like we won't. Reload may need it to | |
27550 | get at a constant; if so, it will have already created a constant | |
27551 | pool entry for it. */ | |
27552 | if (((TARGET_TOC && TARGET_MINIMAL_TOC) | |
27553 | || (flag_pic == 1 && DEFAULT_ABI == ABI_V4) | |
27554 | || (flag_pic && DEFAULT_ABI == ABI_DARWIN)) | |
27555 | && crtl->uses_const_pool | |
27556 | && info->first_gp_reg_save > RS6000_PIC_OFFSET_TABLE_REGNUM) | |
27557 | first_gp = RS6000_PIC_OFFSET_TABLE_REGNUM; | |
27558 | else | |
27559 | first_gp = info->first_gp_reg_save; | |
27560 | ||
27561 | info->gp_size = reg_size * (32 - first_gp); | |
27562 | ||
27563 | /* For the SPE, we have an additional upper 32-bits on each GPR. | |
27564 | Ideally we should save the entire 64-bits only when the upper | |
27565 | half is used in SIMD instructions. Since we only record | |
27566 | registers live (not the size they are used in), this proves | |
27567 | difficult because we'd have to traverse the instruction chain at | |
27568 | the right time, taking reload into account. This is a real pain, | |
27569 | so we opt to save the GPRs in 64-bits always if but one register | |
27570 | gets used in 64-bits. Otherwise, all the registers in the frame | |
27571 | get saved in 32-bits. | |
27572 | ||
27573 | So... since when we save all GPRs (except the SP) in 64-bits, the | |
27574 | traditional GP save area will be empty. */ | |
27575 | if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0) | |
27576 | info->gp_size = 0; | |
27577 | ||
27578 | info->first_fp_reg_save = first_fp_reg_to_save (); | |
27579 | info->fp_size = 8 * (64 - info->first_fp_reg_save); | |
27580 | ||
27581 | info->first_altivec_reg_save = first_altivec_reg_to_save (); | |
27582 | info->altivec_size = 16 * (LAST_ALTIVEC_REGNO + 1 | |
27583 | - info->first_altivec_reg_save); | |
27584 | ||
27585 | /* Does this function call anything? */ | |
27586 | info->calls_p = (!crtl->is_leaf || cfun->machine->ra_needs_full_frame); | |
27587 | ||
27588 | /* Determine if we need to save the condition code registers. */ | |
27589 | if (save_reg_p (CR2_REGNO) | |
27590 | || save_reg_p (CR3_REGNO) | |
27591 | || save_reg_p (CR4_REGNO)) | |
27592 | { | |
27593 | info->cr_save_p = 1; | |
27594 | if (DEFAULT_ABI == ABI_V4) | |
27595 | info->cr_size = reg_size; | |
27596 | } | |
27597 | ||
27598 | /* If the current function calls __builtin_eh_return, then we need | |
27599 | to allocate stack space for registers that will hold data for | |
27600 | the exception handler. */ | |
27601 | if (crtl->calls_eh_return) | |
27602 | { | |
27603 | unsigned int i; | |
27604 | for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i) | |
27605 | continue; | |
27606 | ||
27607 | /* SPE saves EH registers in 64-bits. */ | |
27608 | ehrd_size = i * (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0 | |
27609 | ? UNITS_PER_SPE_WORD : UNITS_PER_WORD); | |
27610 | } | |
27611 | else | |
27612 | ehrd_size = 0; | |
27613 | ||
27614 | /* In the ELFv2 ABI, we also need to allocate space for separate | |
27615 | CR field save areas if the function calls __builtin_eh_return. */ | |
27616 | if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return) | |
27617 | { | |
27618 | /* This hard-codes that we have three call-saved CR fields. */ | |
27619 | ehcr_size = 3 * reg_size; | |
27620 | /* We do *not* use the regular CR save mechanism. */ | |
27621 | info->cr_save_p = 0; | |
27622 | } | |
27623 | else | |
27624 | ehcr_size = 0; | |
27625 | ||
27626 | /* Determine various sizes. */ | |
27627 | info->reg_size = reg_size; | |
27628 | info->fixed_size = RS6000_SAVE_AREA; | |
27629 | info->vars_size = RS6000_ALIGN (get_frame_size (), 8); | |
27630 | if (cfun->calls_alloca) | |
27631 | info->parm_size = | |
27632 | RS6000_ALIGN (crtl->outgoing_args_size + info->fixed_size, | |
27633 | STACK_BOUNDARY / BITS_PER_UNIT) - info->fixed_size; | |
27634 | else | |
27635 | info->parm_size = RS6000_ALIGN (crtl->outgoing_args_size, | |
27636 | TARGET_ALTIVEC ? 16 : 8); | |
27637 | if (FRAME_GROWS_DOWNWARD) | |
27638 | info->vars_size | |
27639 | += RS6000_ALIGN (info->fixed_size + info->vars_size + info->parm_size, | |
27640 | ABI_STACK_BOUNDARY / BITS_PER_UNIT) | |
27641 | - (info->fixed_size + info->vars_size + info->parm_size); | |
27642 | ||
27643 | if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0) | |
27644 | info->spe_gp_size = 8 * (32 - first_gp); | |
27645 | ||
27646 | if (TARGET_ALTIVEC_ABI) | |
27647 | info->vrsave_mask = compute_vrsave_mask (); | |
27648 | ||
27649 | if (TARGET_ALTIVEC_VRSAVE && info->vrsave_mask) | |
27650 | info->vrsave_size = 4; | |
27651 | ||
27652 | compute_save_world_info (info); | |
27653 | ||
27654 | /* Calculate the offsets. */ | |
27655 | switch (DEFAULT_ABI) | |
27656 | { | |
27657 | case ABI_NONE: | |
27658 | default: | |
27659 | gcc_unreachable (); | |
27660 | ||
27661 | case ABI_AIX: | |
27662 | case ABI_ELFv2: | |
27663 | case ABI_DARWIN: | |
27664 | info->fp_save_offset = -info->fp_size; | |
27665 | info->gp_save_offset = info->fp_save_offset - info->gp_size; | |
27666 | ||
27667 | if (TARGET_ALTIVEC_ABI) | |
27668 | { | |
27669 | info->vrsave_save_offset = info->gp_save_offset - info->vrsave_size; | |
27670 | ||
27671 | /* Align stack so vector save area is on a quadword boundary. | |
27672 | The padding goes above the vectors. */ | |
27673 | if (info->altivec_size != 0) | |
27674 | info->altivec_padding_size = info->vrsave_save_offset & 0xF; | |
27675 | ||
27676 | info->altivec_save_offset = info->vrsave_save_offset | |
27677 | - info->altivec_padding_size | |
27678 | - info->altivec_size; | |
27679 | gcc_assert (info->altivec_size == 0 | |
27680 | || info->altivec_save_offset % 16 == 0); | |
27681 | ||
27682 | /* Adjust for AltiVec case. */ | |
27683 | info->ehrd_offset = info->altivec_save_offset - ehrd_size; | |
27684 | } | |
27685 | else | |
27686 | info->ehrd_offset = info->gp_save_offset - ehrd_size; | |
27687 | ||
27688 | info->ehcr_offset = info->ehrd_offset - ehcr_size; | |
27689 | info->cr_save_offset = reg_size; /* first word when 64-bit. */ | |
27690 | info->lr_save_offset = 2*reg_size; | |
27691 | break; | |
27692 | ||
27693 | case ABI_V4: | |
27694 | info->fp_save_offset = -info->fp_size; | |
27695 | info->gp_save_offset = info->fp_save_offset - info->gp_size; | |
27696 | info->cr_save_offset = info->gp_save_offset - info->cr_size; | |
27697 | ||
27698 | if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0) | |
27699 | { | |
27700 | /* Align stack so SPE GPR save area is aligned on a | |
27701 | double-word boundary. */ | |
27702 | if (info->spe_gp_size != 0 && info->cr_save_offset != 0) | |
27703 | info->spe_padding_size = 8 - (-info->cr_save_offset % 8); | |
27704 | else | |
27705 | info->spe_padding_size = 0; | |
27706 | ||
27707 | info->spe_gp_save_offset = info->cr_save_offset | |
27708 | - info->spe_padding_size | |
27709 | - info->spe_gp_size; | |
27710 | ||
27711 | /* Adjust for SPE case. */ | |
27712 | info->ehrd_offset = info->spe_gp_save_offset; | |
27713 | } | |
27714 | else if (TARGET_ALTIVEC_ABI) | |
27715 | { | |
27716 | info->vrsave_save_offset = info->cr_save_offset - info->vrsave_size; | |
27717 | ||
27718 | /* Align stack so vector save area is on a quadword boundary. */ | |
27719 | if (info->altivec_size != 0) | |
27720 | info->altivec_padding_size = 16 - (-info->vrsave_save_offset % 16); | |
27721 | ||
27722 | info->altivec_save_offset = info->vrsave_save_offset | |
27723 | - info->altivec_padding_size | |
27724 | - info->altivec_size; | |
27725 | ||
27726 | /* Adjust for AltiVec case. */ | |
27727 | info->ehrd_offset = info->altivec_save_offset; | |
27728 | } | |
27729 | else | |
27730 | info->ehrd_offset = info->cr_save_offset; | |
27731 | ||
27732 | info->ehrd_offset -= ehrd_size; | |
27733 | info->lr_save_offset = reg_size; | |
27734 | } | |
27735 | ||
27736 | save_align = (TARGET_ALTIVEC_ABI || DEFAULT_ABI == ABI_DARWIN) ? 16 : 8; | |
27737 | info->save_size = RS6000_ALIGN (info->fp_size | |
27738 | + info->gp_size | |
27739 | + info->altivec_size | |
27740 | + info->altivec_padding_size | |
27741 | + info->spe_gp_size | |
27742 | + info->spe_padding_size | |
27743 | + ehrd_size | |
27744 | + ehcr_size | |
27745 | + info->cr_size | |
27746 | + info->vrsave_size, | |
27747 | save_align); | |
27748 | ||
27749 | non_fixed_size = info->vars_size + info->parm_size + info->save_size; | |
27750 | ||
27751 | info->total_size = RS6000_ALIGN (non_fixed_size + info->fixed_size, | |
27752 | ABI_STACK_BOUNDARY / BITS_PER_UNIT); | |
27753 | ||
27754 | /* Determine if we need to save the link register. */ | |
27755 | if (info->calls_p | |
27756 | || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) | |
27757 | && crtl->profile | |
27758 | && !TARGET_PROFILE_KERNEL) | |
27759 | || (DEFAULT_ABI == ABI_V4 && cfun->calls_alloca) | |
27760 | #ifdef TARGET_RELOCATABLE | |
27761 | || (DEFAULT_ABI == ABI_V4 | |
27762 | && (TARGET_RELOCATABLE || flag_pic > 1) | |
27763 | && !constant_pool_empty_p ()) | |
27764 | #endif | |
27765 | || rs6000_ra_ever_killed ()) | |
27766 | info->lr_save_p = 1; | |
27767 | ||
27768 | using_static_chain_p = (cfun->static_chain_decl != NULL_TREE | |
27769 | && df_regs_ever_live_p (STATIC_CHAIN_REGNUM) | |
27770 | && call_used_regs[STATIC_CHAIN_REGNUM]); | |
27771 | info->savres_strategy = rs6000_savres_strategy (info, using_static_chain_p); | |
27772 | ||
27773 | if (!(info->savres_strategy & SAVE_INLINE_GPRS) | |
27774 | || !(info->savres_strategy & SAVE_INLINE_FPRS) | |
27775 | || !(info->savres_strategy & SAVE_INLINE_VRS) | |
27776 | || !(info->savres_strategy & REST_INLINE_GPRS) | |
27777 | || !(info->savres_strategy & REST_INLINE_FPRS) | |
27778 | || !(info->savres_strategy & REST_INLINE_VRS)) | |
27779 | info->lr_save_p = 1; | |
27780 | ||
27781 | if (info->lr_save_p) | |
27782 | df_set_regs_ever_live (LR_REGNO, true); | |
27783 | ||
27784 | /* Determine if we need to allocate any stack frame: | |
27785 | ||
27786 | For AIX we need to push the stack if a frame pointer is needed | |
27787 | (because the stack might be dynamically adjusted), if we are | |
27788 | debugging, if we make calls, or if the sum of fp_save, gp_save, | |
27789 | and local variables are more than the space needed to save all | |
27790 | non-volatile registers: 32-bit: 18*8 + 19*4 = 220 or 64-bit: 18*8 | |
27791 | + 18*8 = 288 (GPR13 reserved). | |
27792 | ||
27793 | For V.4 we don't have the stack cushion that AIX uses, but assume | |
27794 | that the debugger can handle stackless frames. */ | |
27795 | ||
27796 | if (info->calls_p) | |
27797 | info->push_p = 1; | |
27798 | ||
27799 | else if (DEFAULT_ABI == ABI_V4) | |
27800 | info->push_p = non_fixed_size != 0; | |
27801 | ||
27802 | else if (frame_pointer_needed) | |
27803 | info->push_p = 1; | |
27804 | ||
27805 | else if (TARGET_XCOFF && write_symbols != NO_DEBUG) | |
27806 | info->push_p = 1; | |
27807 | ||
27808 | else | |
27809 | info->push_p = non_fixed_size > (TARGET_32BIT ? 220 : 288); | |
27810 | ||
27811 | return info; | |
27812 | } | |
27813 | ||
27814 | /* Return true if the current function uses any GPRs in 64-bit SIMD | |
27815 | mode. */ | |
27816 | ||
27817 | static bool | |
27818 | spe_func_has_64bit_regs_p (void) | |
27819 | { | |
27820 | rtx_insn *insns, *insn; | |
27821 | ||
27822 | /* Functions that save and restore all the call-saved registers will | |
27823 | need to save/restore the registers in 64-bits. */ | |
27824 | if (crtl->calls_eh_return | |
27825 | || cfun->calls_setjmp | |
27826 | || crtl->has_nonlocal_goto) | |
27827 | return true; | |
27828 | ||
27829 | insns = get_insns (); | |
27830 | ||
27831 | for (insn = NEXT_INSN (insns); insn != NULL_RTX; insn = NEXT_INSN (insn)) | |
27832 | { | |
27833 | if (INSN_P (insn)) | |
27834 | { | |
27835 | rtx i; | |
27836 | ||
27837 | /* FIXME: This should be implemented with attributes... | |
27838 | ||
27839 | (set_attr "spe64" "true")....then, | |
27840 | if (get_spe64(insn)) return true; | |
27841 | ||
27842 | It's the only reliable way to do the stuff below. */ | |
27843 | ||
27844 | i = PATTERN (insn); | |
27845 | if (GET_CODE (i) == SET) | |
27846 | { | |
27847 | machine_mode mode = GET_MODE (SET_SRC (i)); | |
27848 | ||
27849 | if (SPE_VECTOR_MODE (mode)) | |
27850 | return true; | |
27851 | if (TARGET_E500_DOUBLE | |
27852 | && (mode == DFmode || FLOAT128_2REG_P (mode))) | |
27853 | return true; | |
27854 | } | |
27855 | } | |
27856 | } | |
27857 | ||
27858 | return false; | |
27859 | } | |
27860 | ||
27861 | static void | |
27862 | debug_stack_info (rs6000_stack_t *info) | |
27863 | { | |
27864 | const char *abi_string; | |
27865 | ||
27866 | if (! info) | |
27867 | info = rs6000_stack_info (); | |
27868 | ||
27869 | fprintf (stderr, "\nStack information for function %s:\n", | |
27870 | ((current_function_decl && DECL_NAME (current_function_decl)) | |
27871 | ? IDENTIFIER_POINTER (DECL_NAME (current_function_decl)) | |
27872 | : "<unknown>")); | |
27873 | ||
27874 | switch (info->abi) | |
27875 | { | |
27876 | default: abi_string = "Unknown"; break; | |
27877 | case ABI_NONE: abi_string = "NONE"; break; | |
27878 | case ABI_AIX: abi_string = "AIX"; break; | |
27879 | case ABI_ELFv2: abi_string = "ELFv2"; break; | |
27880 | case ABI_DARWIN: abi_string = "Darwin"; break; | |
27881 | case ABI_V4: abi_string = "V.4"; break; | |
27882 | } | |
27883 | ||
27884 | fprintf (stderr, "\tABI = %5s\n", abi_string); | |
27885 | ||
27886 | if (TARGET_ALTIVEC_ABI) | |
27887 | fprintf (stderr, "\tALTIVEC ABI extensions enabled.\n"); | |
27888 | ||
27889 | if (TARGET_SPE_ABI) | |
27890 | fprintf (stderr, "\tSPE ABI extensions enabled.\n"); | |
27891 | ||
27892 | if (info->first_gp_reg_save != 32) | |
27893 | fprintf (stderr, "\tfirst_gp_reg_save = %5d\n", info->first_gp_reg_save); | |
27894 | ||
27895 | if (info->first_fp_reg_save != 64) | |
27896 | fprintf (stderr, "\tfirst_fp_reg_save = %5d\n", info->first_fp_reg_save); | |
27897 | ||
27898 | if (info->first_altivec_reg_save <= LAST_ALTIVEC_REGNO) | |
27899 | fprintf (stderr, "\tfirst_altivec_reg_save = %5d\n", | |
27900 | info->first_altivec_reg_save); | |
27901 | ||
27902 | if (info->lr_save_p) | |
27903 | fprintf (stderr, "\tlr_save_p = %5d\n", info->lr_save_p); | |
27904 | ||
27905 | if (info->cr_save_p) | |
27906 | fprintf (stderr, "\tcr_save_p = %5d\n", info->cr_save_p); | |
27907 | ||
27908 | if (info->vrsave_mask) | |
27909 | fprintf (stderr, "\tvrsave_mask = 0x%x\n", info->vrsave_mask); | |
27910 | ||
27911 | if (info->push_p) | |
27912 | fprintf (stderr, "\tpush_p = %5d\n", info->push_p); | |
27913 | ||
27914 | if (info->calls_p) | |
27915 | fprintf (stderr, "\tcalls_p = %5d\n", info->calls_p); | |
27916 | ||
27917 | if (info->gp_size) | |
27918 | fprintf (stderr, "\tgp_save_offset = %5d\n", info->gp_save_offset); | |
27919 | ||
27920 | if (info->fp_size) | |
27921 | fprintf (stderr, "\tfp_save_offset = %5d\n", info->fp_save_offset); | |
27922 | ||
27923 | if (info->altivec_size) | |
27924 | fprintf (stderr, "\taltivec_save_offset = %5d\n", | |
27925 | info->altivec_save_offset); | |
27926 | ||
27927 | if (info->spe_gp_size) | |
27928 | fprintf (stderr, "\tspe_gp_save_offset = %5d\n", | |
27929 | info->spe_gp_save_offset); | |
27930 | ||
27931 | if (info->vrsave_size) | |
27932 | fprintf (stderr, "\tvrsave_save_offset = %5d\n", | |
27933 | info->vrsave_save_offset); | |
27934 | ||
27935 | if (info->lr_save_p) | |
27936 | fprintf (stderr, "\tlr_save_offset = %5d\n", info->lr_save_offset); | |
27937 | ||
27938 | if (info->cr_save_p) | |
27939 | fprintf (stderr, "\tcr_save_offset = %5d\n", info->cr_save_offset); | |
27940 | ||
27941 | if (info->varargs_save_offset) | |
27942 | fprintf (stderr, "\tvarargs_save_offset = %5d\n", info->varargs_save_offset); | |
27943 | ||
27944 | if (info->total_size) | |
27945 | fprintf (stderr, "\ttotal_size = " HOST_WIDE_INT_PRINT_DEC"\n", | |
27946 | info->total_size); | |
27947 | ||
27948 | if (info->vars_size) | |
27949 | fprintf (stderr, "\tvars_size = " HOST_WIDE_INT_PRINT_DEC"\n", | |
27950 | info->vars_size); | |
27951 | ||
27952 | if (info->parm_size) | |
27953 | fprintf (stderr, "\tparm_size = %5d\n", info->parm_size); | |
27954 | ||
27955 | if (info->fixed_size) | |
27956 | fprintf (stderr, "\tfixed_size = %5d\n", info->fixed_size); | |
27957 | ||
27958 | if (info->gp_size) | |
27959 | fprintf (stderr, "\tgp_size = %5d\n", info->gp_size); | |
27960 | ||
27961 | if (info->spe_gp_size) | |
27962 | fprintf (stderr, "\tspe_gp_size = %5d\n", info->spe_gp_size); | |
27963 | ||
27964 | if (info->fp_size) | |
27965 | fprintf (stderr, "\tfp_size = %5d\n", info->fp_size); | |
27966 | ||
27967 | if (info->altivec_size) | |
27968 | fprintf (stderr, "\taltivec_size = %5d\n", info->altivec_size); | |
27969 | ||
27970 | if (info->vrsave_size) | |
27971 | fprintf (stderr, "\tvrsave_size = %5d\n", info->vrsave_size); | |
27972 | ||
27973 | if (info->altivec_padding_size) | |
27974 | fprintf (stderr, "\taltivec_padding_size= %5d\n", | |
27975 | info->altivec_padding_size); | |
27976 | ||
27977 | if (info->spe_padding_size) | |
27978 | fprintf (stderr, "\tspe_padding_size = %5d\n", | |
27979 | info->spe_padding_size); | |
27980 | ||
27981 | if (info->cr_size) | |
27982 | fprintf (stderr, "\tcr_size = %5d\n", info->cr_size); | |
27983 | ||
27984 | if (info->save_size) | |
27985 | fprintf (stderr, "\tsave_size = %5d\n", info->save_size); | |
27986 | ||
27987 | if (info->reg_size != 4) | |
27988 | fprintf (stderr, "\treg_size = %5d\n", info->reg_size); | |
27989 | ||
27990 | fprintf (stderr, "\tsave-strategy = %04x\n", info->savres_strategy); | |
27991 | ||
27992 | fprintf (stderr, "\n"); | |
27993 | } | |
27994 | ||
27995 | rtx | |
27996 | rs6000_return_addr (int count, rtx frame) | |
27997 | { | |
27998 | /* Currently we don't optimize very well between prolog and body | |
27999 | code and for PIC code the code can be actually quite bad, so | |
28000 | don't try to be too clever here. */ | |
28001 | if (count != 0 | |
28002 | || ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN) && flag_pic)) | |
28003 | { | |
28004 | cfun->machine->ra_needs_full_frame = 1; | |
28005 | ||
28006 | return | |
28007 | gen_rtx_MEM | |
28008 | (Pmode, | |
28009 | memory_address | |
28010 | (Pmode, | |
28011 | plus_constant (Pmode, | |
28012 | copy_to_reg | |
28013 | (gen_rtx_MEM (Pmode, | |
28014 | memory_address (Pmode, frame))), | |
28015 | RETURN_ADDRESS_OFFSET))); | |
28016 | } | |
28017 | ||
28018 | cfun->machine->ra_need_lr = 1; | |
28019 | return get_hard_reg_initial_val (Pmode, LR_REGNO); | |
28020 | } | |
28021 | ||
28022 | /* Say whether a function is a candidate for sibcall handling or not. */ | |
28023 | ||
28024 | static bool | |
28025 | rs6000_function_ok_for_sibcall (tree decl, tree exp) | |
28026 | { | |
28027 | tree fntype; | |
28028 | ||
28029 | if (decl) | |
28030 | fntype = TREE_TYPE (decl); | |
28031 | else | |
28032 | fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp))); | |
28033 | ||
28034 | /* We can't do it if the called function has more vector parameters | |
28035 | than the current function; there's nowhere to put the VRsave code. */ | |
28036 | if (TARGET_ALTIVEC_ABI | |
28037 | && TARGET_ALTIVEC_VRSAVE | |
28038 | && !(decl && decl == current_function_decl)) | |
28039 | { | |
28040 | function_args_iterator args_iter; | |
28041 | tree type; | |
28042 | int nvreg = 0; | |
28043 | ||
28044 | /* Functions with vector parameters are required to have a | |
28045 | prototype, so the argument type info must be available | |
28046 | here. */ | |
28047 | FOREACH_FUNCTION_ARGS(fntype, type, args_iter) | |
28048 | if (TREE_CODE (type) == VECTOR_TYPE | |
28049 | && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type))) | |
28050 | nvreg++; | |
28051 | ||
28052 | FOREACH_FUNCTION_ARGS(TREE_TYPE (current_function_decl), type, args_iter) | |
28053 | if (TREE_CODE (type) == VECTOR_TYPE | |
28054 | && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type))) | |
28055 | nvreg--; | |
28056 | ||
28057 | if (nvreg > 0) | |
28058 | return false; | |
28059 | } | |
28060 | ||
28061 | /* Under the AIX or ELFv2 ABIs we can't allow calls to non-local | |
28062 | functions, because the callee may have a different TOC pointer to | |
28063 | the caller and there's no way to ensure we restore the TOC when | |
28064 | we return. With the secure-plt SYSV ABI we can't make non-local | |
28065 | calls when -fpic/PIC because the plt call stubs use r30. */ | |
28066 | if (DEFAULT_ABI == ABI_DARWIN | |
28067 | || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) | |
28068 | && decl | |
28069 | && !DECL_EXTERNAL (decl) | |
28070 | && !DECL_WEAK (decl) | |
28071 | && (*targetm.binds_local_p) (decl)) | |
28072 | || (DEFAULT_ABI == ABI_V4 | |
28073 | && (!TARGET_SECURE_PLT | |
28074 | || !flag_pic | |
28075 | || (decl | |
28076 | && (*targetm.binds_local_p) (decl))))) | |
28077 | { | |
28078 | tree attr_list = TYPE_ATTRIBUTES (fntype); | |
28079 | ||
28080 | if (!lookup_attribute ("longcall", attr_list) | |
28081 | || lookup_attribute ("shortcall", attr_list)) | |
28082 | return true; | |
28083 | } | |
28084 | ||
28085 | return false; | |
28086 | } | |
28087 | ||
28088 | static int | |
28089 | rs6000_ra_ever_killed (void) | |
28090 | { | |
28091 | rtx_insn *top; | |
28092 | rtx reg; | |
28093 | rtx_insn *insn; | |
28094 | ||
28095 | if (cfun->is_thunk) | |
28096 | return 0; | |
28097 | ||
28098 | if (cfun->machine->lr_save_state) | |
28099 | return cfun->machine->lr_save_state - 1; | |
28100 | ||
28101 | /* regs_ever_live has LR marked as used if any sibcalls are present, | |
28102 | but this should not force saving and restoring in the | |
28103 | pro/epilogue. Likewise, reg_set_between_p thinks a sibcall | |
28104 | clobbers LR, so that is inappropriate. */ | |
28105 | ||
28106 | /* Also, the prologue can generate a store into LR that | |
28107 | doesn't really count, like this: | |
28108 | ||
28109 | move LR->R0 | |
28110 | bcl to set PIC register | |
28111 | move LR->R31 | |
28112 | move R0->LR | |
28113 | ||
28114 | When we're called from the epilogue, we need to avoid counting | |
28115 | this as a store. */ | |
28116 | ||
28117 | push_topmost_sequence (); | |
28118 | top = get_insns (); | |
28119 | pop_topmost_sequence (); | |
28120 | reg = gen_rtx_REG (Pmode, LR_REGNO); | |
28121 | ||
28122 | for (insn = NEXT_INSN (top); insn != NULL_RTX; insn = NEXT_INSN (insn)) | |
28123 | { | |
28124 | if (INSN_P (insn)) | |
28125 | { | |
28126 | if (CALL_P (insn)) | |
28127 | { | |
28128 | if (!SIBLING_CALL_P (insn)) | |
28129 | return 1; | |
28130 | } | |
28131 | else if (find_regno_note (insn, REG_INC, LR_REGNO)) | |
28132 | return 1; | |
28133 | else if (set_of (reg, insn) != NULL_RTX | |
28134 | && !prologue_epilogue_contains (insn)) | |
28135 | return 1; | |
28136 | } | |
28137 | } | |
28138 | return 0; | |
28139 | } | |
28140 | \f | |
28141 | /* Emit instructions needed to load the TOC register. | |
28142 | This is only needed when TARGET_TOC, TARGET_MINIMAL_TOC, and there is | |
28143 | a constant pool; or for SVR4 -fpic. */ | |
28144 | ||
28145 | void | |
28146 | rs6000_emit_load_toc_table (int fromprolog) | |
28147 | { | |
28148 | rtx dest; | |
28149 | dest = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM); | |
28150 | ||
28151 | if (TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI == ABI_V4 && flag_pic) | |
28152 | { | |
28153 | char buf[30]; | |
28154 | rtx lab, tmp1, tmp2, got; | |
28155 | ||
28156 | lab = gen_label_rtx (); | |
28157 | ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (lab)); | |
28158 | lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf)); | |
28159 | if (flag_pic == 2) | |
28160 | { | |
28161 | got = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name)); | |
28162 | need_toc_init = 1; | |
28163 | } | |
28164 | else | |
28165 | got = rs6000_got_sym (); | |
28166 | tmp1 = tmp2 = dest; | |
28167 | if (!fromprolog) | |
28168 | { | |
28169 | tmp1 = gen_reg_rtx (Pmode); | |
28170 | tmp2 = gen_reg_rtx (Pmode); | |
28171 | } | |
28172 | emit_insn (gen_load_toc_v4_PIC_1 (lab)); | |
28173 | emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO)); | |
28174 | emit_insn (gen_load_toc_v4_PIC_3b (tmp2, tmp1, got, lab)); | |
28175 | emit_insn (gen_load_toc_v4_PIC_3c (dest, tmp2, got, lab)); | |
28176 | } | |
28177 | else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 1) | |
28178 | { | |
28179 | emit_insn (gen_load_toc_v4_pic_si ()); | |
28180 | emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO)); | |
28181 | } | |
28182 | else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2) | |
28183 | { | |
28184 | char buf[30]; | |
28185 | rtx temp0 = (fromprolog | |
28186 | ? gen_rtx_REG (Pmode, 0) | |
28187 | : gen_reg_rtx (Pmode)); | |
28188 | ||
28189 | if (fromprolog) | |
28190 | { | |
28191 | rtx symF, symL; | |
28192 | ||
28193 | ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno); | |
28194 | symF = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf)); | |
28195 | ||
28196 | ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno); | |
28197 | symL = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf)); | |
28198 | ||
28199 | emit_insn (gen_load_toc_v4_PIC_1 (symF)); | |
28200 | emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO)); | |
28201 | emit_insn (gen_load_toc_v4_PIC_2 (temp0, dest, symL, symF)); | |
28202 | } | |
28203 | else | |
28204 | { | |
28205 | rtx tocsym, lab; | |
28206 | ||
28207 | tocsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name)); | |
28208 | need_toc_init = 1; | |
28209 | lab = gen_label_rtx (); | |
28210 | emit_insn (gen_load_toc_v4_PIC_1b (tocsym, lab)); | |
28211 | emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO)); | |
28212 | if (TARGET_LINK_STACK) | |
28213 | emit_insn (gen_addsi3 (dest, dest, GEN_INT (4))); | |
28214 | emit_move_insn (temp0, gen_rtx_MEM (Pmode, dest)); | |
28215 | } | |
28216 | emit_insn (gen_addsi3 (dest, temp0, dest)); | |
28217 | } | |
28218 | else if (TARGET_ELF && !TARGET_AIX && flag_pic == 0 && TARGET_MINIMAL_TOC) | |
28219 | { | |
28220 | /* This is for AIX code running in non-PIC ELF32. */ | |
28221 | rtx realsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (toc_label_name)); | |
28222 | ||
28223 | need_toc_init = 1; | |
28224 | emit_insn (gen_elf_high (dest, realsym)); | |
28225 | emit_insn (gen_elf_low (dest, dest, realsym)); | |
28226 | } | |
28227 | else | |
28228 | { | |
28229 | gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2); | |
28230 | ||
28231 | if (TARGET_32BIT) | |
28232 | emit_insn (gen_load_toc_aix_si (dest)); | |
28233 | else | |
28234 | emit_insn (gen_load_toc_aix_di (dest)); | |
28235 | } | |
28236 | } | |
28237 | ||
28238 | /* Emit instructions to restore the link register after determining where | |
28239 | its value has been stored. */ | |
28240 | ||
28241 | void | |
28242 | rs6000_emit_eh_reg_restore (rtx source, rtx scratch) | |
28243 | { | |
28244 | rs6000_stack_t *info = rs6000_stack_info (); | |
28245 | rtx operands[2]; | |
28246 | ||
28247 | operands[0] = source; | |
28248 | operands[1] = scratch; | |
28249 | ||
28250 | if (info->lr_save_p) | |
28251 | { | |
28252 | rtx frame_rtx = stack_pointer_rtx; | |
28253 | HOST_WIDE_INT sp_offset = 0; | |
28254 | rtx tmp; | |
28255 | ||
28256 | if (frame_pointer_needed | |
28257 | || cfun->calls_alloca | |
28258 | || info->total_size > 32767) | |
28259 | { | |
28260 | tmp = gen_frame_mem (Pmode, frame_rtx); | |
28261 | emit_move_insn (operands[1], tmp); | |
28262 | frame_rtx = operands[1]; | |
28263 | } | |
28264 | else if (info->push_p) | |
28265 | sp_offset = info->total_size; | |
28266 | ||
28267 | tmp = plus_constant (Pmode, frame_rtx, | |
28268 | info->lr_save_offset + sp_offset); | |
28269 | tmp = gen_frame_mem (Pmode, tmp); | |
28270 | emit_move_insn (tmp, operands[0]); | |
28271 | } | |
28272 | else | |
28273 | emit_move_insn (gen_rtx_REG (Pmode, LR_REGNO), operands[0]); | |
28274 | ||
28275 | /* Freeze lr_save_p. We've just emitted rtl that depends on the | |
28276 | state of lr_save_p so any change from here on would be a bug. In | |
28277 | particular, stop rs6000_ra_ever_killed from considering the SET | |
28278 | of lr we may have added just above. */ | |
28279 | cfun->machine->lr_save_state = info->lr_save_p + 1; | |
28280 | } | |
28281 | ||
28282 | static GTY(()) alias_set_type set = -1; | |
28283 | ||
28284 | alias_set_type | |
28285 | get_TOC_alias_set (void) | |
28286 | { | |
28287 | if (set == -1) | |
28288 | set = new_alias_set (); | |
28289 | return set; | |
28290 | } | |
28291 | ||
28292 | /* This returns nonzero if the current function uses the TOC. This is | |
28293 | determined by the presence of (use (unspec ... UNSPEC_TOC)), which | |
28294 | is generated by the ABI_V4 load_toc_* patterns. */ | |
28295 | #if TARGET_ELF | |
28296 | static int | |
28297 | uses_TOC (void) | |
28298 | { | |
28299 | rtx_insn *insn; | |
28300 | ||
28301 | for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) | |
28302 | if (INSN_P (insn)) | |
28303 | { | |
28304 | rtx pat = PATTERN (insn); | |
28305 | int i; | |
28306 | ||
28307 | if (GET_CODE (pat) == PARALLEL) | |
28308 | for (i = 0; i < XVECLEN (pat, 0); i++) | |
28309 | { | |
28310 | rtx sub = XVECEXP (pat, 0, i); | |
28311 | if (GET_CODE (sub) == USE) | |
28312 | { | |
28313 | sub = XEXP (sub, 0); | |
28314 | if (GET_CODE (sub) == UNSPEC | |
28315 | && XINT (sub, 1) == UNSPEC_TOC) | |
28316 | return 1; | |
28317 | } | |
28318 | } | |
28319 | } | |
28320 | return 0; | |
28321 | } | |
28322 | #endif | |
28323 | ||
28324 | rtx | |
28325 | create_TOC_reference (rtx symbol, rtx largetoc_reg) | |
28326 | { | |
28327 | rtx tocrel, tocreg, hi; | |
28328 | ||
28329 | if (TARGET_DEBUG_ADDR) | |
28330 | { | |
28331 | if (GET_CODE (symbol) == SYMBOL_REF) | |
28332 | fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n", | |
28333 | XSTR (symbol, 0)); | |
28334 | else | |
28335 | { | |
28336 | fprintf (stderr, "\ncreate_TOC_reference, code %s:\n", | |
28337 | GET_RTX_NAME (GET_CODE (symbol))); | |
28338 | debug_rtx (symbol); | |
28339 | } | |
28340 | } | |
28341 | ||
28342 | if (!can_create_pseudo_p ()) | |
28343 | df_set_regs_ever_live (TOC_REGISTER, true); | |
28344 | ||
28345 | tocreg = gen_rtx_REG (Pmode, TOC_REGISTER); | |
28346 | tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL); | |
28347 | if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ()) | |
28348 | return tocrel; | |
28349 | ||
28350 | hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel)); | |
28351 | if (largetoc_reg != NULL) | |
28352 | { | |
28353 | emit_move_insn (largetoc_reg, hi); | |
28354 | hi = largetoc_reg; | |
28355 | } | |
28356 | return gen_rtx_LO_SUM (Pmode, hi, tocrel); | |
28357 | } | |
28358 | ||
28359 | /* Issue assembly directives that create a reference to the given DWARF | |
28360 | FRAME_TABLE_LABEL from the current function section. */ | |
28361 | void | |
28362 | rs6000_aix_asm_output_dwarf_table_ref (char * frame_table_label) | |
28363 | { | |
28364 | fprintf (asm_out_file, "\t.ref %s\n", | |
28365 | (* targetm.strip_name_encoding) (frame_table_label)); | |
28366 | } | |
28367 | \f | |
28368 | /* This ties together stack memory (MEM with an alias set of frame_alias_set) | |
28369 | and the change to the stack pointer. */ | |
28370 | ||
28371 | static void | |
28372 | rs6000_emit_stack_tie (rtx fp, bool hard_frame_needed) | |
28373 | { | |
28374 | rtvec p; | |
28375 | int i; | |
28376 | rtx regs[3]; | |
28377 | ||
28378 | i = 0; | |
28379 | regs[i++] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM); | |
28380 | if (hard_frame_needed) | |
28381 | regs[i++] = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM); | |
28382 | if (!(REGNO (fp) == STACK_POINTER_REGNUM | |
28383 | || (hard_frame_needed | |
28384 | && REGNO (fp) == HARD_FRAME_POINTER_REGNUM))) | |
28385 | regs[i++] = fp; | |
28386 | ||
28387 | p = rtvec_alloc (i); | |
28388 | while (--i >= 0) | |
28389 | { | |
28390 | rtx mem = gen_frame_mem (BLKmode, regs[i]); | |
28391 | RTVEC_ELT (p, i) = gen_rtx_SET (mem, const0_rtx); | |
28392 | } | |
28393 | ||
28394 | emit_insn (gen_stack_tie (gen_rtx_PARALLEL (VOIDmode, p))); | |
28395 | } | |
28396 | ||
28397 | /* Emit the correct code for allocating stack space, as insns. | |
28398 | If COPY_REG, make sure a copy of the old frame is left there. | |
28399 | The generated code may use hard register 0 as a temporary. */ | |
28400 | ||
28401 | static rtx_insn * | |
28402 | rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg, int copy_off) | |
28403 | { | |
28404 | rtx_insn *insn; | |
28405 | rtx stack_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM); | |
28406 | rtx tmp_reg = gen_rtx_REG (Pmode, 0); | |
28407 | rtx todec = gen_int_mode (-size, Pmode); | |
28408 | rtx par, set, mem; | |
28409 | ||
28410 | if (INTVAL (todec) != -size) | |
28411 | { | |
28412 | warning (0, "stack frame too large"); | |
28413 | emit_insn (gen_trap ()); | |
28414 | return 0; | |
28415 | } | |
28416 | ||
28417 | if (crtl->limit_stack) | |
28418 | { | |
28419 | if (REG_P (stack_limit_rtx) | |
28420 | && REGNO (stack_limit_rtx) > 1 | |
28421 | && REGNO (stack_limit_rtx) <= 31) | |
28422 | { | |
28423 | emit_insn (gen_add3_insn (tmp_reg, stack_limit_rtx, GEN_INT (size))); | |
28424 | emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg, | |
28425 | const0_rtx)); | |
28426 | } | |
28427 | else if (GET_CODE (stack_limit_rtx) == SYMBOL_REF | |
28428 | && TARGET_32BIT | |
28429 | && DEFAULT_ABI == ABI_V4 | |
28430 | && !flag_pic) | |
28431 | { | |
28432 | rtx toload = gen_rtx_CONST (VOIDmode, | |
28433 | gen_rtx_PLUS (Pmode, | |
28434 | stack_limit_rtx, | |
28435 | GEN_INT (size))); | |
28436 | ||
28437 | emit_insn (gen_elf_high (tmp_reg, toload)); | |
28438 | emit_insn (gen_elf_low (tmp_reg, tmp_reg, toload)); | |
28439 | emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg, | |
28440 | const0_rtx)); | |
28441 | } | |
28442 | else | |
28443 | warning (0, "stack limit expression is not supported"); | |
28444 | } | |
28445 | ||
28446 | if (copy_reg) | |
28447 | { | |
28448 | if (copy_off != 0) | |
28449 | emit_insn (gen_add3_insn (copy_reg, stack_reg, GEN_INT (copy_off))); | |
28450 | else | |
28451 | emit_move_insn (copy_reg, stack_reg); | |
28452 | } | |
28453 | ||
28454 | if (size > 32767) | |
28455 | { | |
28456 | /* Need a note here so that try_split doesn't get confused. */ | |
28457 | if (get_last_insn () == NULL_RTX) | |
28458 | emit_note (NOTE_INSN_DELETED); | |
28459 | insn = emit_move_insn (tmp_reg, todec); | |
28460 | try_split (PATTERN (insn), insn, 0); | |
28461 | todec = tmp_reg; | |
28462 | } | |
28463 | ||
28464 | insn = emit_insn (TARGET_32BIT | |
28465 | ? gen_movsi_update_stack (stack_reg, stack_reg, | |
28466 | todec, stack_reg) | |
28467 | : gen_movdi_di_update_stack (stack_reg, stack_reg, | |
28468 | todec, stack_reg)); | |
28469 | /* Since we didn't use gen_frame_mem to generate the MEM, grab | |
28470 | it now and set the alias set/attributes. The above gen_*_update | |
28471 | calls will generate a PARALLEL with the MEM set being the first | |
28472 | operation. */ | |
28473 | par = PATTERN (insn); | |
28474 | gcc_assert (GET_CODE (par) == PARALLEL); | |
28475 | set = XVECEXP (par, 0, 0); | |
28476 | gcc_assert (GET_CODE (set) == SET); | |
28477 | mem = SET_DEST (set); | |
28478 | gcc_assert (MEM_P (mem)); | |
28479 | MEM_NOTRAP_P (mem) = 1; | |
28480 | set_mem_alias_set (mem, get_frame_alias_set ()); | |
28481 | ||
28482 | RTX_FRAME_RELATED_P (insn) = 1; | |
28483 | add_reg_note (insn, REG_FRAME_RELATED_EXPR, | |
28484 | gen_rtx_SET (stack_reg, gen_rtx_PLUS (Pmode, stack_reg, | |
28485 | GEN_INT (-size)))); | |
28486 | return insn; | |
28487 | } | |
28488 | ||
28489 | #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP) | |
28490 | ||
28491 | #if PROBE_INTERVAL > 32768 | |
28492 | #error Cannot use indexed addressing mode for stack probing | |
28493 | #endif | |
28494 | ||
28495 | /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE, | |
28496 | inclusive. These are offsets from the current stack pointer. */ | |
28497 | ||
28498 | static void | |
28499 | rs6000_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size) | |
28500 | { | |
28501 | /* See if we have a constant small number of probes to generate. If so, | |
28502 | that's the easy case. */ | |
28503 | if (first + size <= 32768) | |
28504 | { | |
28505 | HOST_WIDE_INT i; | |
28506 | ||
28507 | /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until | |
28508 | it exceeds SIZE. If only one probe is needed, this will not | |
28509 | generate any code. Then probe at FIRST + SIZE. */ | |
28510 | for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL) | |
28511 | emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, | |
28512 | -(first + i))); | |
28513 | ||
28514 | emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, | |
28515 | -(first + size))); | |
28516 | } | |
28517 | ||
28518 | /* Otherwise, do the same as above, but in a loop. Note that we must be | |
28519 | extra careful with variables wrapping around because we might be at | |
28520 | the very top (or the very bottom) of the address space and we have | |
28521 | to be able to handle this case properly; in particular, we use an | |
28522 | equality test for the loop condition. */ | |
28523 | else | |
28524 | { | |
28525 | HOST_WIDE_INT rounded_size; | |
28526 | rtx r12 = gen_rtx_REG (Pmode, 12); | |
28527 | rtx r0 = gen_rtx_REG (Pmode, 0); | |
28528 | ||
28529 | /* Sanity check for the addressing mode we're going to use. */ | |
28530 | gcc_assert (first <= 32768); | |
28531 | ||
28532 | /* Step 1: round SIZE to the previous multiple of the interval. */ | |
28533 | ||
28534 | rounded_size = ROUND_DOWN (size, PROBE_INTERVAL); | |
28535 | ||
28536 | ||
28537 | /* Step 2: compute initial and final value of the loop counter. */ | |
28538 | ||
28539 | /* TEST_ADDR = SP + FIRST. */ | |
28540 | emit_insn (gen_rtx_SET (r12, plus_constant (Pmode, stack_pointer_rtx, | |
28541 | -first))); | |
28542 | ||
28543 | /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */ | |
28544 | if (rounded_size > 32768) | |
28545 | { | |
28546 | emit_move_insn (r0, GEN_INT (-rounded_size)); | |
28547 | emit_insn (gen_rtx_SET (r0, gen_rtx_PLUS (Pmode, r12, r0))); | |
28548 | } | |
28549 | else | |
28550 | emit_insn (gen_rtx_SET (r0, plus_constant (Pmode, r12, | |
28551 | -rounded_size))); | |
28552 | ||
28553 | ||
28554 | /* Step 3: the loop | |
28555 | ||
28556 | do | |
28557 | { | |
28558 | TEST_ADDR = TEST_ADDR + PROBE_INTERVAL | |
28559 | probe at TEST_ADDR | |
28560 | } | |
28561 | while (TEST_ADDR != LAST_ADDR) | |
28562 | ||
28563 | probes at FIRST + N * PROBE_INTERVAL for values of N from 1 | |
28564 | until it is equal to ROUNDED_SIZE. */ | |
28565 | ||
28566 | if (TARGET_64BIT) | |
28567 | emit_insn (gen_probe_stack_rangedi (r12, r12, r0)); | |
28568 | else | |
28569 | emit_insn (gen_probe_stack_rangesi (r12, r12, r0)); | |
28570 | ||
28571 | ||
28572 | /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time | |
28573 | that SIZE is equal to ROUNDED_SIZE. */ | |
28574 | ||
28575 | if (size != rounded_size) | |
28576 | emit_stack_probe (plus_constant (Pmode, r12, rounded_size - size)); | |
28577 | } | |
28578 | } | |
28579 | ||
28580 | /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are | |
28581 | absolute addresses. */ | |
28582 | ||
28583 | const char * | |
28584 | output_probe_stack_range (rtx reg1, rtx reg2) | |
28585 | { | |
28586 | static int labelno = 0; | |
28587 | char loop_lab[32]; | |
28588 | rtx xops[2]; | |
28589 | ||
28590 | ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++); | |
28591 | ||
28592 | /* Loop. */ | |
28593 | ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab); | |
28594 | ||
28595 | /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */ | |
28596 | xops[0] = reg1; | |
28597 | xops[1] = GEN_INT (-PROBE_INTERVAL); | |
28598 | output_asm_insn ("addi %0,%0,%1", xops); | |
28599 | ||
28600 | /* Probe at TEST_ADDR. */ | |
28601 | xops[1] = gen_rtx_REG (Pmode, 0); | |
28602 | output_asm_insn ("stw %1,0(%0)", xops); | |
28603 | ||
28604 | /* Test if TEST_ADDR == LAST_ADDR. */ | |
28605 | xops[1] = reg2; | |
28606 | if (TARGET_64BIT) | |
28607 | output_asm_insn ("cmpd 0,%0,%1", xops); | |
28608 | else | |
28609 | output_asm_insn ("cmpw 0,%0,%1", xops); | |
28610 | ||
28611 | /* Branch. */ | |
28612 | fputs ("\tbne 0,", asm_out_file); | |
28613 | assemble_name_raw (asm_out_file, loop_lab); | |
28614 | fputc ('\n', asm_out_file); | |
28615 | ||
28616 | return ""; | |
28617 | } | |
28618 | ||
28619 | /* Add to 'insn' a note which is PATTERN (INSN) but with REG replaced | |
28620 | with (plus:P (reg 1) VAL), and with REG2 replaced with REPL2 if REG2 | |
28621 | is not NULL. It would be nice if dwarf2out_frame_debug_expr could | |
28622 | deduce these equivalences by itself so it wasn't necessary to hold | |
28623 | its hand so much. Don't be tempted to always supply d2_f_d_e with | |
28624 | the actual cfa register, ie. r31 when we are using a hard frame | |
28625 | pointer. That fails when saving regs off r1, and sched moves the | |
28626 | r31 setup past the reg saves. */ | |
28627 | ||
28628 | static rtx_insn * | |
28629 | rs6000_frame_related (rtx_insn *insn, rtx reg, HOST_WIDE_INT val, | |
28630 | rtx reg2, rtx repl2) | |
28631 | { | |
28632 | rtx repl; | |
28633 | ||
28634 | if (REGNO (reg) == STACK_POINTER_REGNUM) | |
28635 | { | |
28636 | gcc_checking_assert (val == 0); | |
28637 | repl = NULL_RTX; | |
28638 | } | |
28639 | else | |
28640 | repl = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM), | |
28641 | GEN_INT (val)); | |
28642 | ||
28643 | rtx pat = PATTERN (insn); | |
28644 | if (!repl && !reg2) | |
28645 | { | |
28646 | /* No need for any replacement. Just set RTX_FRAME_RELATED_P. */ | |
28647 | if (GET_CODE (pat) == PARALLEL) | |
28648 | for (int i = 0; i < XVECLEN (pat, 0); i++) | |
28649 | if (GET_CODE (XVECEXP (pat, 0, i)) == SET) | |
28650 | { | |
28651 | rtx set = XVECEXP (pat, 0, i); | |
28652 | ||
28653 | /* If this PARALLEL has been emitted for out-of-line | |
28654 | register save functions, or store multiple, then omit | |
28655 | eh_frame info for any user-defined global regs. If | |
28656 | eh_frame info is supplied, frame unwinding will | |
28657 | restore a user reg. */ | |
28658 | if (!REG_P (SET_SRC (set)) | |
28659 | || !fixed_reg_p (REGNO (SET_SRC (set)))) | |
28660 | RTX_FRAME_RELATED_P (set) = 1; | |
28661 | } | |
28662 | RTX_FRAME_RELATED_P (insn) = 1; | |
28663 | return insn; | |
28664 | } | |
28665 | ||
28666 | /* We expect that 'pat' is either a SET or a PARALLEL containing | |
28667 | SETs (and possibly other stuff). In a PARALLEL, all the SETs | |
28668 | are important so they all have to be marked RTX_FRAME_RELATED_P. | |
28669 | Call simplify_replace_rtx on the SETs rather than the whole insn | |
28670 | so as to leave the other stuff alone (for example USE of r12). */ | |
28671 | ||
28672 | set_used_flags (pat); | |
28673 | if (GET_CODE (pat) == SET) | |
28674 | { | |
28675 | if (repl) | |
28676 | pat = simplify_replace_rtx (pat, reg, repl); | |
28677 | if (reg2) | |
28678 | pat = simplify_replace_rtx (pat, reg2, repl2); | |
28679 | } | |
28680 | else if (GET_CODE (pat) == PARALLEL) | |
28681 | { | |
28682 | pat = shallow_copy_rtx (pat); | |
28683 | XVEC (pat, 0) = shallow_copy_rtvec (XVEC (pat, 0)); | |
28684 | ||
28685 | for (int i = 0; i < XVECLEN (pat, 0); i++) | |
28686 | if (GET_CODE (XVECEXP (pat, 0, i)) == SET) | |
28687 | { | |
28688 | rtx set = XVECEXP (pat, 0, i); | |
28689 | ||
28690 | if (repl) | |
28691 | set = simplify_replace_rtx (set, reg, repl); | |
28692 | if (reg2) | |
28693 | set = simplify_replace_rtx (set, reg2, repl2); | |
28694 | XVECEXP (pat, 0, i) = set; | |
28695 | ||
28696 | /* Omit eh_frame info for any user-defined global regs. */ | |
28697 | if (!REG_P (SET_SRC (set)) | |
28698 | || !fixed_reg_p (REGNO (SET_SRC (set)))) | |
28699 | RTX_FRAME_RELATED_P (set) = 1; | |
28700 | } | |
28701 | } | |
28702 | else | |
28703 | gcc_unreachable (); | |
28704 | ||
28705 | RTX_FRAME_RELATED_P (insn) = 1; | |
28706 | add_reg_note (insn, REG_FRAME_RELATED_EXPR, copy_rtx_if_shared (pat)); | |
28707 | ||
28708 | return insn; | |
28709 | } | |
28710 | ||
28711 | /* Returns an insn that has a vrsave set operation with the | |
28712 | appropriate CLOBBERs. */ | |
28713 | ||
28714 | static rtx | |
28715 | generate_set_vrsave (rtx reg, rs6000_stack_t *info, int epiloguep) | |
28716 | { | |
28717 | int nclobs, i; | |
28718 | rtx insn, clobs[TOTAL_ALTIVEC_REGS + 1]; | |
28719 | rtx vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO); | |
28720 | ||
28721 | clobs[0] | |
28722 | = gen_rtx_SET (vrsave, | |
28723 | gen_rtx_UNSPEC_VOLATILE (SImode, | |
28724 | gen_rtvec (2, reg, vrsave), | |
28725 | UNSPECV_SET_VRSAVE)); | |
28726 | ||
28727 | nclobs = 1; | |
28728 | ||
28729 | /* We need to clobber the registers in the mask so the scheduler | |
28730 | does not move sets to VRSAVE before sets of AltiVec registers. | |
28731 | ||
28732 | However, if the function receives nonlocal gotos, reload will set | |
28733 | all call saved registers live. We will end up with: | |
28734 | ||
28735 | (set (reg 999) (mem)) | |
28736 | (parallel [ (set (reg vrsave) (unspec blah)) | |
28737 | (clobber (reg 999))]) | |
28738 | ||
28739 | The clobber will cause the store into reg 999 to be dead, and | |
28740 | flow will attempt to delete an epilogue insn. In this case, we | |
28741 | need an unspec use/set of the register. */ | |
28742 | ||
28743 | for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i) | |
28744 | if (info->vrsave_mask & ALTIVEC_REG_BIT (i)) | |
28745 | { | |
28746 | if (!epiloguep || call_used_regs [i]) | |
28747 | clobs[nclobs++] = gen_rtx_CLOBBER (VOIDmode, | |
28748 | gen_rtx_REG (V4SImode, i)); | |
28749 | else | |
28750 | { | |
28751 | rtx reg = gen_rtx_REG (V4SImode, i); | |
28752 | ||
28753 | clobs[nclobs++] | |
28754 | = gen_rtx_SET (reg, | |
28755 | gen_rtx_UNSPEC (V4SImode, | |
28756 | gen_rtvec (1, reg), 27)); | |
28757 | } | |
28758 | } | |
28759 | ||
28760 | insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nclobs)); | |
28761 | ||
28762 | for (i = 0; i < nclobs; ++i) | |
28763 | XVECEXP (insn, 0, i) = clobs[i]; | |
28764 | ||
28765 | return insn; | |
28766 | } | |
28767 | ||
28768 | static rtx | |
28769 | gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store) | |
28770 | { | |
28771 | rtx addr, mem; | |
28772 | ||
28773 | addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset)); | |
28774 | mem = gen_frame_mem (GET_MODE (reg), addr); | |
28775 | return gen_rtx_SET (store ? mem : reg, store ? reg : mem); | |
28776 | } | |
28777 | ||
28778 | static rtx | |
28779 | gen_frame_load (rtx reg, rtx frame_reg, int offset) | |
28780 | { | |
28781 | return gen_frame_set (reg, frame_reg, offset, false); | |
28782 | } | |
28783 | ||
28784 | static rtx | |
28785 | gen_frame_store (rtx reg, rtx frame_reg, int offset) | |
28786 | { | |
28787 | return gen_frame_set (reg, frame_reg, offset, true); | |
28788 | } | |
28789 | ||
28790 | /* Save a register into the frame, and emit RTX_FRAME_RELATED_P notes. | |
28791 | Save REGNO into [FRAME_REG + OFFSET] in mode MODE. */ | |
28792 | ||
28793 | static rtx_insn * | |
28794 | emit_frame_save (rtx frame_reg, machine_mode mode, | |
28795 | unsigned int regno, int offset, HOST_WIDE_INT frame_reg_to_sp) | |
28796 | { | |
28797 | rtx reg; | |
28798 | ||
28799 | /* Some cases that need register indexed addressing. */ | |
28800 | gcc_checking_assert (!((TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode)) | |
28801 | || (TARGET_VSX && ALTIVEC_OR_VSX_VECTOR_MODE (mode)) | |
28802 | || (TARGET_E500_DOUBLE && mode == DFmode) | |
28803 | || (TARGET_SPE_ABI | |
28804 | && SPE_VECTOR_MODE (mode) | |
28805 | && !SPE_CONST_OFFSET_OK (offset)))); | |
28806 | ||
28807 | reg = gen_rtx_REG (mode, regno); | |
28808 | rtx_insn *insn = emit_insn (gen_frame_store (reg, frame_reg, offset)); | |
28809 | return rs6000_frame_related (insn, frame_reg, frame_reg_to_sp, | |
28810 | NULL_RTX, NULL_RTX); | |
28811 | } | |
28812 | ||
28813 | /* Emit an offset memory reference suitable for a frame store, while | |
28814 | converting to a valid addressing mode. */ | |
28815 | ||
28816 | static rtx | |
28817 | gen_frame_mem_offset (machine_mode mode, rtx reg, int offset) | |
28818 | { | |
28819 | rtx int_rtx, offset_rtx; | |
28820 | ||
28821 | int_rtx = GEN_INT (offset); | |
28822 | ||
28823 | if ((TARGET_SPE_ABI && SPE_VECTOR_MODE (mode) && !SPE_CONST_OFFSET_OK (offset)) | |
28824 | || (TARGET_E500_DOUBLE && mode == DFmode)) | |
28825 | { | |
28826 | offset_rtx = gen_rtx_REG (Pmode, FIXED_SCRATCH); | |
28827 | emit_move_insn (offset_rtx, int_rtx); | |
28828 | } | |
28829 | else | |
28830 | offset_rtx = int_rtx; | |
28831 | ||
28832 | return gen_frame_mem (mode, gen_rtx_PLUS (Pmode, reg, offset_rtx)); | |
28833 | } | |
28834 | ||
28835 | #ifndef TARGET_FIX_AND_CONTINUE | |
28836 | #define TARGET_FIX_AND_CONTINUE 0 | |
28837 | #endif | |
28838 | ||
28839 | /* It's really GPR 13 or 14, FPR 14 and VR 20. We need the smallest. */ | |
28840 | #define FIRST_SAVRES_REGISTER FIRST_SAVED_GP_REGNO | |
28841 | #define LAST_SAVRES_REGISTER 31 | |
28842 | #define N_SAVRES_REGISTERS (LAST_SAVRES_REGISTER - FIRST_SAVRES_REGISTER + 1) | |
28843 | ||
28844 | enum { | |
28845 | SAVRES_LR = 0x1, | |
28846 | SAVRES_SAVE = 0x2, | |
28847 | SAVRES_REG = 0x0c, | |
28848 | SAVRES_GPR = 0, | |
28849 | SAVRES_FPR = 4, | |
28850 | SAVRES_VR = 8 | |
28851 | }; | |
28852 | ||
28853 | static GTY(()) rtx savres_routine_syms[N_SAVRES_REGISTERS][12]; | |
28854 | ||
28855 | /* Temporary holding space for an out-of-line register save/restore | |
28856 | routine name. */ | |
28857 | static char savres_routine_name[30]; | |
28858 | ||
28859 | /* Return the name for an out-of-line register save/restore routine. | |
28860 | We are saving/restoring GPRs if GPR is true. */ | |
28861 | ||
28862 | static char * | |
28863 | rs6000_savres_routine_name (rs6000_stack_t *info, int regno, int sel) | |
28864 | { | |
28865 | const char *prefix = ""; | |
28866 | const char *suffix = ""; | |
28867 | ||
28868 | /* Different targets are supposed to define | |
28869 | {SAVE,RESTORE}_FP_{PREFIX,SUFFIX} with the idea that the needed | |
28870 | routine name could be defined with: | |
28871 | ||
28872 | sprintf (name, "%s%d%s", SAVE_FP_PREFIX, regno, SAVE_FP_SUFFIX) | |
28873 | ||
28874 | This is a nice idea in practice, but in reality, things are | |
28875 | complicated in several ways: | |
28876 | ||
28877 | - ELF targets have save/restore routines for GPRs. | |
28878 | ||
28879 | - SPE targets use different prefixes for 32/64-bit registers, and | |
28880 | neither of them fit neatly in the FOO_{PREFIX,SUFFIX} regimen. | |
28881 | ||
28882 | - PPC64 ELF targets have routines for save/restore of GPRs that | |
28883 | differ in what they do with the link register, so having a set | |
28884 | prefix doesn't work. (We only use one of the save routines at | |
28885 | the moment, though.) | |
28886 | ||
28887 | - PPC32 elf targets have "exit" versions of the restore routines | |
28888 | that restore the link register and can save some extra space. | |
28889 | These require an extra suffix. (There are also "tail" versions | |
28890 | of the restore routines and "GOT" versions of the save routines, | |
28891 | but we don't generate those at present. Same problems apply, | |
28892 | though.) | |
28893 | ||
28894 | We deal with all this by synthesizing our own prefix/suffix and | |
28895 | using that for the simple sprintf call shown above. */ | |
28896 | if (TARGET_SPE) | |
28897 | { | |
28898 | /* No floating point saves on the SPE. */ | |
28899 | gcc_assert ((sel & SAVRES_REG) == SAVRES_GPR); | |
28900 | ||
28901 | if ((sel & SAVRES_SAVE)) | |
28902 | prefix = info->spe_64bit_regs_used ? "_save64gpr_" : "_save32gpr_"; | |
28903 | else | |
28904 | prefix = info->spe_64bit_regs_used ? "_rest64gpr_" : "_rest32gpr_"; | |
28905 | ||
28906 | if ((sel & SAVRES_LR)) | |
28907 | suffix = "_x"; | |
28908 | } | |
28909 | else if (DEFAULT_ABI == ABI_V4) | |
28910 | { | |
28911 | if (TARGET_64BIT) | |
28912 | goto aix_names; | |
28913 | ||
28914 | if ((sel & SAVRES_REG) == SAVRES_GPR) | |
28915 | prefix = (sel & SAVRES_SAVE) ? "_savegpr_" : "_restgpr_"; | |
28916 | else if ((sel & SAVRES_REG) == SAVRES_FPR) | |
28917 | prefix = (sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_"; | |
28918 | else if ((sel & SAVRES_REG) == SAVRES_VR) | |
28919 | prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_"; | |
28920 | else | |
28921 | abort (); | |
28922 | ||
28923 | if ((sel & SAVRES_LR)) | |
28924 | suffix = "_x"; | |
28925 | } | |
28926 | else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) | |
28927 | { | |
28928 | #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD) | |
28929 | /* No out-of-line save/restore routines for GPRs on AIX. */ | |
28930 | gcc_assert (!TARGET_AIX || (sel & SAVRES_REG) != SAVRES_GPR); | |
28931 | #endif | |
28932 | ||
28933 | aix_names: | |
28934 | if ((sel & SAVRES_REG) == SAVRES_GPR) | |
28935 | prefix = ((sel & SAVRES_SAVE) | |
28936 | ? ((sel & SAVRES_LR) ? "_savegpr0_" : "_savegpr1_") | |
28937 | : ((sel & SAVRES_LR) ? "_restgpr0_" : "_restgpr1_")); | |
28938 | else if ((sel & SAVRES_REG) == SAVRES_FPR) | |
28939 | { | |
28940 | #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD) | |
28941 | if ((sel & SAVRES_LR)) | |
28942 | prefix = ((sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_"); | |
28943 | else | |
28944 | #endif | |
28945 | { | |
28946 | prefix = (sel & SAVRES_SAVE) ? SAVE_FP_PREFIX : RESTORE_FP_PREFIX; | |
28947 | suffix = (sel & SAVRES_SAVE) ? SAVE_FP_SUFFIX : RESTORE_FP_SUFFIX; | |
28948 | } | |
28949 | } | |
28950 | else if ((sel & SAVRES_REG) == SAVRES_VR) | |
28951 | prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_"; | |
28952 | else | |
28953 | abort (); | |
28954 | } | |
28955 | ||
28956 | if (DEFAULT_ABI == ABI_DARWIN) | |
28957 | { | |
28958 | /* The Darwin approach is (slightly) different, in order to be | |
28959 | compatible with code generated by the system toolchain. There is a | |
28960 | single symbol for the start of save sequence, and the code here | |
28961 | embeds an offset into that code on the basis of the first register | |
28962 | to be saved. */ | |
28963 | prefix = (sel & SAVRES_SAVE) ? "save" : "rest" ; | |
28964 | if ((sel & SAVRES_REG) == SAVRES_GPR) | |
28965 | sprintf (savres_routine_name, "*%sGPR%s%s%.0d ; %s r%d-r31", prefix, | |
28966 | ((sel & SAVRES_LR) ? "x" : ""), (regno == 13 ? "" : "+"), | |
28967 | (regno - 13) * 4, prefix, regno); | |
28968 | else if ((sel & SAVRES_REG) == SAVRES_FPR) | |
28969 | sprintf (savres_routine_name, "*%sFP%s%.0d ; %s f%d-f31", prefix, | |
28970 | (regno == 14 ? "" : "+"), (regno - 14) * 4, prefix, regno); | |
28971 | else if ((sel & SAVRES_REG) == SAVRES_VR) | |
28972 | sprintf (savres_routine_name, "*%sVEC%s%.0d ; %s v%d-v31", prefix, | |
28973 | (regno == 20 ? "" : "+"), (regno - 20) * 8, prefix, regno); | |
28974 | else | |
28975 | abort (); | |
28976 | } | |
28977 | else | |
28978 | sprintf (savres_routine_name, "%s%d%s", prefix, regno, suffix); | |
28979 | ||
28980 | return savres_routine_name; | |
28981 | } | |
28982 | ||
28983 | /* Return an RTL SYMBOL_REF for an out-of-line register save/restore routine. | |
28984 | We are saving/restoring GPRs if GPR is true. */ | |
28985 | ||
28986 | static rtx | |
28987 | rs6000_savres_routine_sym (rs6000_stack_t *info, int sel) | |
28988 | { | |
28989 | int regno = ((sel & SAVRES_REG) == SAVRES_GPR | |
28990 | ? info->first_gp_reg_save | |
28991 | : (sel & SAVRES_REG) == SAVRES_FPR | |
28992 | ? info->first_fp_reg_save - 32 | |
28993 | : (sel & SAVRES_REG) == SAVRES_VR | |
28994 | ? info->first_altivec_reg_save - FIRST_ALTIVEC_REGNO | |
28995 | : -1); | |
28996 | rtx sym; | |
28997 | int select = sel; | |
28998 | ||
28999 | /* On the SPE, we never have any FPRs, but we do have 32/64-bit | |
29000 | versions of the gpr routines. */ | |
29001 | if (TARGET_SPE_ABI && (sel & SAVRES_REG) == SAVRES_GPR | |
29002 | && info->spe_64bit_regs_used) | |
29003 | select ^= SAVRES_FPR ^ SAVRES_GPR; | |
29004 | ||
29005 | /* Don't generate bogus routine names. */ | |
29006 | gcc_assert (FIRST_SAVRES_REGISTER <= regno | |
29007 | && regno <= LAST_SAVRES_REGISTER | |
29008 | && select >= 0 && select <= 12); | |
29009 | ||
29010 | sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select]; | |
29011 | ||
29012 | if (sym == NULL) | |
29013 | { | |
29014 | char *name; | |
29015 | ||
29016 | name = rs6000_savres_routine_name (info, regno, sel); | |
29017 | ||
29018 | sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select] | |
29019 | = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name)); | |
29020 | SYMBOL_REF_FLAGS (sym) |= SYMBOL_FLAG_FUNCTION; | |
29021 | } | |
29022 | ||
29023 | return sym; | |
29024 | } | |
29025 | ||
29026 | /* Emit a sequence of insns, including a stack tie if needed, for | |
29027 | resetting the stack pointer. If UPDT_REGNO is not 1, then don't | |
29028 | reset the stack pointer, but move the base of the frame into | |
29029 | reg UPDT_REGNO for use by out-of-line register restore routines. */ | |
29030 | ||
29031 | static rtx | |
29032 | rs6000_emit_stack_reset (rs6000_stack_t *info, | |
29033 | rtx frame_reg_rtx, HOST_WIDE_INT frame_off, | |
29034 | unsigned updt_regno) | |
29035 | { | |
29036 | /* If there is nothing to do, don't do anything. */ | |
29037 | if (frame_off == 0 && REGNO (frame_reg_rtx) == updt_regno) | |
29038 | return NULL_RTX; | |
29039 | ||
29040 | rtx updt_reg_rtx = gen_rtx_REG (Pmode, updt_regno); | |
29041 | ||
29042 | /* This blockage is needed so that sched doesn't decide to move | |
29043 | the sp change before the register restores. */ | |
29044 | if (DEFAULT_ABI == ABI_V4 | |
29045 | || (TARGET_SPE_ABI | |
29046 | && info->spe_64bit_regs_used != 0 | |
29047 | && info->first_gp_reg_save != 32)) | |
29048 | return emit_insn (gen_stack_restore_tie (updt_reg_rtx, frame_reg_rtx, | |
29049 | GEN_INT (frame_off))); | |
29050 | ||
29051 | /* If we are restoring registers out-of-line, we will be using the | |
29052 | "exit" variants of the restore routines, which will reset the | |
29053 | stack for us. But we do need to point updt_reg into the | |
29054 | right place for those routines. */ | |
29055 | if (frame_off != 0) | |
29056 | return emit_insn (gen_add3_insn (updt_reg_rtx, | |
29057 | frame_reg_rtx, GEN_INT (frame_off))); | |
29058 | else | |
29059 | return emit_move_insn (updt_reg_rtx, frame_reg_rtx); | |
29060 | ||
29061 | return NULL_RTX; | |
29062 | } | |
29063 | ||
29064 | /* Return the register number used as a pointer by out-of-line | |
29065 | save/restore functions. */ | |
29066 | ||
29067 | static inline unsigned | |
29068 | ptr_regno_for_savres (int sel) | |
29069 | { | |
29070 | if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) | |
29071 | return (sel & SAVRES_REG) == SAVRES_FPR || (sel & SAVRES_LR) ? 1 : 12; | |
29072 | return DEFAULT_ABI == ABI_DARWIN && (sel & SAVRES_REG) == SAVRES_FPR ? 1 : 11; | |
29073 | } | |
29074 | ||
29075 | /* Construct a parallel rtx describing the effect of a call to an | |
29076 | out-of-line register save/restore routine, and emit the insn | |
29077 | or jump_insn as appropriate. */ | |
29078 | ||
29079 | static rtx_insn * | |
29080 | rs6000_emit_savres_rtx (rs6000_stack_t *info, | |
29081 | rtx frame_reg_rtx, int save_area_offset, int lr_offset, | |
29082 | machine_mode reg_mode, int sel) | |
29083 | { | |
29084 | int i; | |
29085 | int offset, start_reg, end_reg, n_regs, use_reg; | |
29086 | int reg_size = GET_MODE_SIZE (reg_mode); | |
29087 | rtx sym; | |
29088 | rtvec p; | |
29089 | rtx par; | |
29090 | rtx_insn *insn; | |
29091 | ||
29092 | offset = 0; | |
29093 | start_reg = ((sel & SAVRES_REG) == SAVRES_GPR | |
29094 | ? info->first_gp_reg_save | |
29095 | : (sel & SAVRES_REG) == SAVRES_FPR | |
29096 | ? info->first_fp_reg_save | |
29097 | : (sel & SAVRES_REG) == SAVRES_VR | |
29098 | ? info->first_altivec_reg_save | |
29099 | : -1); | |
29100 | end_reg = ((sel & SAVRES_REG) == SAVRES_GPR | |
29101 | ? 32 | |
29102 | : (sel & SAVRES_REG) == SAVRES_FPR | |
29103 | ? 64 | |
29104 | : (sel & SAVRES_REG) == SAVRES_VR | |
29105 | ? LAST_ALTIVEC_REGNO + 1 | |
29106 | : -1); | |
29107 | n_regs = end_reg - start_reg; | |
29108 | p = rtvec_alloc (3 + ((sel & SAVRES_LR) ? 1 : 0) | |
29109 | + ((sel & SAVRES_REG) == SAVRES_VR ? 1 : 0) | |
29110 | + n_regs); | |
29111 | ||
29112 | if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR)) | |
29113 | RTVEC_ELT (p, offset++) = ret_rtx; | |
29114 | ||
29115 | RTVEC_ELT (p, offset++) | |
29116 | = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO)); | |
29117 | ||
29118 | sym = rs6000_savres_routine_sym (info, sel); | |
29119 | RTVEC_ELT (p, offset++) = gen_rtx_USE (VOIDmode, sym); | |
29120 | ||
29121 | use_reg = ptr_regno_for_savres (sel); | |
29122 | if ((sel & SAVRES_REG) == SAVRES_VR) | |
29123 | { | |
29124 | /* Vector regs are saved/restored using [reg+reg] addressing. */ | |
29125 | RTVEC_ELT (p, offset++) | |
29126 | = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, use_reg)); | |
29127 | RTVEC_ELT (p, offset++) | |
29128 | = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, 0)); | |
29129 | } | |
29130 | else | |
29131 | RTVEC_ELT (p, offset++) | |
29132 | = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, use_reg)); | |
29133 | ||
29134 | for (i = 0; i < end_reg - start_reg; i++) | |
29135 | RTVEC_ELT (p, i + offset) | |
29136 | = gen_frame_set (gen_rtx_REG (reg_mode, start_reg + i), | |
29137 | frame_reg_rtx, save_area_offset + reg_size * i, | |
29138 | (sel & SAVRES_SAVE) != 0); | |
29139 | ||
29140 | if ((sel & SAVRES_SAVE) && (sel & SAVRES_LR)) | |
29141 | RTVEC_ELT (p, i + offset) | |
29142 | = gen_frame_store (gen_rtx_REG (Pmode, 0), frame_reg_rtx, lr_offset); | |
29143 | ||
29144 | par = gen_rtx_PARALLEL (VOIDmode, p); | |
29145 | ||
29146 | if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR)) | |
29147 | { | |
29148 | insn = emit_jump_insn (par); | |
29149 | JUMP_LABEL (insn) = ret_rtx; | |
29150 | } | |
29151 | else | |
29152 | insn = emit_insn (par); | |
29153 | return insn; | |
29154 | } | |
29155 | ||
29156 | /* Emit code to store CR fields that need to be saved into REG. */ | |
29157 | ||
29158 | static void | |
29159 | rs6000_emit_move_from_cr (rtx reg) | |
29160 | { | |
29161 | /* Only the ELFv2 ABI allows storing only selected fields. */ | |
29162 | if (DEFAULT_ABI == ABI_ELFv2 && TARGET_MFCRF) | |
29163 | { | |
29164 | int i, cr_reg[8], count = 0; | |
29165 | ||
29166 | /* Collect CR fields that must be saved. */ | |
29167 | for (i = 0; i < 8; i++) | |
29168 | if (save_reg_p (CR0_REGNO + i)) | |
29169 | cr_reg[count++] = i; | |
29170 | ||
29171 | /* If it's just a single one, use mfcrf. */ | |
29172 | if (count == 1) | |
29173 | { | |
29174 | rtvec p = rtvec_alloc (1); | |
29175 | rtvec r = rtvec_alloc (2); | |
29176 | RTVEC_ELT (r, 0) = gen_rtx_REG (CCmode, CR0_REGNO + cr_reg[0]); | |
29177 | RTVEC_ELT (r, 1) = GEN_INT (1 << (7 - cr_reg[0])); | |
29178 | RTVEC_ELT (p, 0) | |
29179 | = gen_rtx_SET (reg, | |
29180 | gen_rtx_UNSPEC (SImode, r, UNSPEC_MOVESI_FROM_CR)); | |
29181 | ||
29182 | emit_insn (gen_rtx_PARALLEL (VOIDmode, p)); | |
29183 | return; | |
29184 | } | |
29185 | ||
29186 | /* ??? It might be better to handle count == 2 / 3 cases here | |
29187 | as well, using logical operations to combine the values. */ | |
29188 | } | |
29189 | ||
29190 | emit_insn (gen_movesi_from_cr (reg)); | |
29191 | } | |
29192 | ||
29193 | /* Return whether the split-stack arg pointer (r12) is used. */ | |
29194 | ||
29195 | static bool | |
29196 | split_stack_arg_pointer_used_p (void) | |
29197 | { | |
29198 | /* If the pseudo holding the arg pointer is no longer a pseudo, | |
29199 | then the arg pointer is used. */ | |
29200 | if (cfun->machine->split_stack_arg_pointer != NULL_RTX | |
29201 | && (!REG_P (cfun->machine->split_stack_arg_pointer) | |
29202 | || (REGNO (cfun->machine->split_stack_arg_pointer) | |
29203 | < FIRST_PSEUDO_REGISTER))) | |
29204 | return true; | |
29205 | ||
29206 | /* Unfortunately we also need to do some code scanning, since | |
29207 | r12 may have been substituted for the pseudo. */ | |
29208 | rtx_insn *insn; | |
29209 | basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb; | |
29210 | FOR_BB_INSNS (bb, insn) | |
29211 | if (NONDEBUG_INSN_P (insn)) | |
29212 | { | |
29213 | /* A call destroys r12. */ | |
29214 | if (CALL_P (insn)) | |
29215 | return false; | |
29216 | ||
29217 | df_ref use; | |
29218 | FOR_EACH_INSN_USE (use, insn) | |
29219 | { | |
29220 | rtx x = DF_REF_REG (use); | |
29221 | if (REG_P (x) && REGNO (x) == 12) | |
29222 | return true; | |
29223 | } | |
29224 | df_ref def; | |
29225 | FOR_EACH_INSN_DEF (def, insn) | |
29226 | { | |
29227 | rtx x = DF_REF_REG (def); | |
29228 | if (REG_P (x) && REGNO (x) == 12) | |
29229 | return false; | |
29230 | } | |
29231 | } | |
29232 | return bitmap_bit_p (DF_LR_OUT (bb), 12); | |
29233 | } | |
29234 | ||
29235 | /* Return whether we need to emit an ELFv2 global entry point prologue. */ | |
29236 | ||
29237 | static bool | |
29238 | rs6000_global_entry_point_needed_p (void) | |
29239 | { | |
29240 | /* Only needed for the ELFv2 ABI. */ | |
29241 | if (DEFAULT_ABI != ABI_ELFv2) | |
29242 | return false; | |
29243 | ||
29244 | /* With -msingle-pic-base, we assume the whole program shares the same | |
29245 | TOC, so no global entry point prologues are needed anywhere. */ | |
29246 | if (TARGET_SINGLE_PIC_BASE) | |
29247 | return false; | |
29248 | ||
29249 | /* Ensure we have a global entry point for thunks. ??? We could | |
29250 | avoid that if the target routine doesn't need a global entry point, | |
29251 | but we do not know whether this is the case at this point. */ | |
29252 | if (cfun->is_thunk) | |
29253 | return true; | |
29254 | ||
29255 | /* For regular functions, rs6000_emit_prologue sets this flag if the | |
29256 | routine ever uses the TOC pointer. */ | |
29257 | return cfun->machine->r2_setup_needed; | |
29258 | } | |
29259 | ||
29260 | /* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS. */ | |
29261 | static sbitmap | |
29262 | rs6000_get_separate_components (void) | |
29263 | { | |
29264 | rs6000_stack_t *info = rs6000_stack_info (); | |
29265 | ||
29266 | if (WORLD_SAVE_P (info)) | |
29267 | return NULL; | |
29268 | ||
29269 | if (TARGET_SPE_ABI) | |
29270 | return NULL; | |
29271 | ||
29272 | gcc_assert (!(info->savres_strategy & SAVE_MULTIPLE) | |
29273 | && !(info->savres_strategy & REST_MULTIPLE)); | |
29274 | ||
29275 | /* Component 0 is the save/restore of LR (done via GPR0). | |
29276 | Components 13..31 are the save/restore of GPR13..GPR31. | |
29277 | Components 46..63 are the save/restore of FPR14..FPR31. */ | |
29278 | ||
29279 | cfun->machine->n_components = 64; | |
29280 | ||
29281 | sbitmap components = sbitmap_alloc (cfun->machine->n_components); | |
29282 | bitmap_clear (components); | |
29283 | ||
29284 | int reg_size = TARGET_32BIT ? 4 : 8; | |
29285 | int fp_reg_size = 8; | |
29286 | ||
29287 | /* The GPRs we need saved to the frame. */ | |
29288 | if ((info->savres_strategy & SAVE_INLINE_GPRS) | |
29289 | && (info->savres_strategy & REST_INLINE_GPRS)) | |
29290 | { | |
29291 | int offset = info->gp_save_offset; | |
29292 | if (info->push_p) | |
29293 | offset += info->total_size; | |
29294 | ||
29295 | for (unsigned regno = info->first_gp_reg_save; regno < 32; regno++) | |
29296 | { | |
29297 | if (IN_RANGE (offset, -0x8000, 0x7fff) | |
29298 | && rs6000_reg_live_or_pic_offset_p (regno)) | |
29299 | bitmap_set_bit (components, regno); | |
29300 | ||
29301 | offset += reg_size; | |
29302 | } | |
29303 | } | |
29304 | ||
29305 | /* Don't mess with the hard frame pointer. */ | |
29306 | if (frame_pointer_needed) | |
29307 | bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM); | |
29308 | ||
29309 | /* Don't mess with the fixed TOC register. */ | |
29310 | if ((TARGET_TOC && TARGET_MINIMAL_TOC) | |
29311 | || (flag_pic == 1 && DEFAULT_ABI == ABI_V4) | |
29312 | || (flag_pic && DEFAULT_ABI == ABI_DARWIN)) | |
29313 | bitmap_clear_bit (components, RS6000_PIC_OFFSET_TABLE_REGNUM); | |
29314 | ||
29315 | /* The FPRs we need saved to the frame. */ | |
29316 | if ((info->savres_strategy & SAVE_INLINE_FPRS) | |
29317 | && (info->savres_strategy & REST_INLINE_FPRS)) | |
29318 | { | |
29319 | int offset = info->fp_save_offset; | |
29320 | if (info->push_p) | |
29321 | offset += info->total_size; | |
29322 | ||
29323 | for (unsigned regno = info->first_fp_reg_save; regno < 64; regno++) | |
29324 | { | |
29325 | if (IN_RANGE (offset, -0x8000, 0x7fff) && save_reg_p (regno)) | |
29326 | bitmap_set_bit (components, regno); | |
29327 | ||
29328 | offset += fp_reg_size; | |
29329 | } | |
29330 | } | |
29331 | ||
29332 | /* Optimize LR save and restore if we can. This is component 0. Any | |
29333 | out-of-line register save/restore routines need LR. */ | |
29334 | if (info->lr_save_p | |
29335 | && !(flag_pic && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)) | |
29336 | && (info->savres_strategy & SAVE_INLINE_GPRS) | |
29337 | && (info->savres_strategy & REST_INLINE_GPRS) | |
29338 | && (info->savres_strategy & SAVE_INLINE_FPRS) | |
29339 | && (info->savres_strategy & REST_INLINE_FPRS) | |
29340 | && (info->savres_strategy & SAVE_INLINE_VRS) | |
29341 | && (info->savres_strategy & REST_INLINE_VRS)) | |
29342 | { | |
29343 | int offset = info->lr_save_offset; | |
29344 | if (info->push_p) | |
29345 | offset += info->total_size; | |
29346 | if (IN_RANGE (offset, -0x8000, 0x7fff)) | |
29347 | bitmap_set_bit (components, 0); | |
29348 | } | |
29349 | ||
29350 | return components; | |
29351 | } | |
29352 | ||
29353 | /* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB. */ | |
29354 | static sbitmap | |
29355 | rs6000_components_for_bb (basic_block bb) | |
29356 | { | |
29357 | rs6000_stack_t *info = rs6000_stack_info (); | |
29358 | ||
29359 | bitmap in = DF_LIVE_IN (bb); | |
29360 | bitmap gen = &DF_LIVE_BB_INFO (bb)->gen; | |
29361 | bitmap kill = &DF_LIVE_BB_INFO (bb)->kill; | |
29362 | ||
29363 | sbitmap components = sbitmap_alloc (cfun->machine->n_components); | |
29364 | bitmap_clear (components); | |
29365 | ||
29366 | /* A register is used in a bb if it is in the IN, GEN, or KILL sets. */ | |
29367 | ||
29368 | /* GPRs. */ | |
29369 | for (unsigned regno = info->first_gp_reg_save; regno < 32; regno++) | |
29370 | if (bitmap_bit_p (in, regno) | |
29371 | || bitmap_bit_p (gen, regno) | |
29372 | || bitmap_bit_p (kill, regno)) | |
29373 | bitmap_set_bit (components, regno); | |
29374 | ||
29375 | /* FPRs. */ | |
29376 | for (unsigned regno = info->first_fp_reg_save; regno < 64; regno++) | |
29377 | if (bitmap_bit_p (in, regno) | |
29378 | || bitmap_bit_p (gen, regno) | |
29379 | || bitmap_bit_p (kill, regno)) | |
29380 | bitmap_set_bit (components, regno); | |
29381 | ||
29382 | /* The link register. */ | |
29383 | if (bitmap_bit_p (in, LR_REGNO) | |
29384 | || bitmap_bit_p (gen, LR_REGNO) | |
29385 | || bitmap_bit_p (kill, LR_REGNO)) | |
29386 | bitmap_set_bit (components, 0); | |
29387 | ||
29388 | return components; | |
29389 | } | |
29390 | ||
29391 | /* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS. */ | |
29392 | static void | |
29393 | rs6000_disqualify_components (sbitmap components, edge e, | |
29394 | sbitmap edge_components, bool /*is_prologue*/) | |
29395 | { | |
29396 | /* Our LR pro/epilogue code moves LR via R0, so R0 had better not be | |
29397 | live where we want to place that code. */ | |
29398 | if (bitmap_bit_p (edge_components, 0) | |
29399 | && bitmap_bit_p (DF_LIVE_IN (e->dest), 0)) | |
29400 | { | |
29401 | if (dump_file) | |
29402 | fprintf (dump_file, "Disqualifying LR because GPR0 is live " | |
29403 | "on entry to bb %d\n", e->dest->index); | |
29404 | bitmap_clear_bit (components, 0); | |
29405 | } | |
29406 | } | |
29407 | ||
29408 | /* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS. */ | |
29409 | static void | |
29410 | rs6000_emit_prologue_components (sbitmap components) | |
29411 | { | |
29412 | rs6000_stack_t *info = rs6000_stack_info (); | |
29413 | rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed | |
29414 | ? HARD_FRAME_POINTER_REGNUM | |
29415 | : STACK_POINTER_REGNUM); | |
29416 | ||
29417 | machine_mode reg_mode = Pmode; | |
29418 | int reg_size = TARGET_32BIT ? 4 : 8; | |
29419 | machine_mode fp_reg_mode = (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) | |
29420 | ? DFmode : SFmode; | |
29421 | int fp_reg_size = 8; | |
29422 | ||
29423 | /* Prologue for LR. */ | |
29424 | if (bitmap_bit_p (components, 0)) | |
29425 | { | |
29426 | rtx reg = gen_rtx_REG (reg_mode, 0); | |
29427 | rtx_insn *insn = emit_move_insn (reg, gen_rtx_REG (reg_mode, LR_REGNO)); | |
29428 | RTX_FRAME_RELATED_P (insn) = 1; | |
29429 | add_reg_note (insn, REG_CFA_REGISTER, NULL); | |
29430 | ||
29431 | int offset = info->lr_save_offset; | |
29432 | if (info->push_p) | |
29433 | offset += info->total_size; | |
29434 | ||
29435 | insn = emit_insn (gen_frame_store (reg, ptr_reg, offset)); | |
29436 | RTX_FRAME_RELATED_P (insn) = 1; | |
29437 | rtx lr = gen_rtx_REG (reg_mode, LR_REGNO); | |
29438 | rtx mem = copy_rtx (SET_DEST (single_set (insn))); | |
29439 | add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (mem, lr)); | |
29440 | } | |
29441 | ||
29442 | /* Prologue for the GPRs. */ | |
29443 | int offset = info->gp_save_offset; | |
29444 | if (info->push_p) | |
29445 | offset += info->total_size; | |
29446 | ||
29447 | for (int i = info->first_gp_reg_save; i < 32; i++) | |
29448 | { | |
29449 | if (bitmap_bit_p (components, i)) | |
29450 | { | |
29451 | rtx reg = gen_rtx_REG (reg_mode, i); | |
29452 | rtx_insn *insn = emit_insn (gen_frame_store (reg, ptr_reg, offset)); | |
29453 | RTX_FRAME_RELATED_P (insn) = 1; | |
29454 | rtx set = copy_rtx (single_set (insn)); | |
29455 | add_reg_note (insn, REG_CFA_OFFSET, set); | |
29456 | } | |
29457 | ||
29458 | offset += reg_size; | |
29459 | } | |
29460 | ||
29461 | /* Prologue for the FPRs. */ | |
29462 | offset = info->fp_save_offset; | |
29463 | if (info->push_p) | |
29464 | offset += info->total_size; | |
29465 | ||
29466 | for (int i = info->first_fp_reg_save; i < 64; i++) | |
29467 | { | |
29468 | if (bitmap_bit_p (components, i)) | |
29469 | { | |
29470 | rtx reg = gen_rtx_REG (fp_reg_mode, i); | |
29471 | rtx_insn *insn = emit_insn (gen_frame_store (reg, ptr_reg, offset)); | |
29472 | RTX_FRAME_RELATED_P (insn) = 1; | |
29473 | rtx set = copy_rtx (single_set (insn)); | |
29474 | add_reg_note (insn, REG_CFA_OFFSET, set); | |
29475 | } | |
29476 | ||
29477 | offset += fp_reg_size; | |
29478 | } | |
29479 | } | |
29480 | ||
29481 | /* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS. */ | |
29482 | static void | |
29483 | rs6000_emit_epilogue_components (sbitmap components) | |
29484 | { | |
29485 | rs6000_stack_t *info = rs6000_stack_info (); | |
29486 | rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed | |
29487 | ? HARD_FRAME_POINTER_REGNUM | |
29488 | : STACK_POINTER_REGNUM); | |
29489 | ||
29490 | machine_mode reg_mode = Pmode; | |
29491 | int reg_size = TARGET_32BIT ? 4 : 8; | |
29492 | ||
29493 | machine_mode fp_reg_mode = (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) | |
29494 | ? DFmode : SFmode; | |
29495 | int fp_reg_size = 8; | |
29496 | ||
29497 | /* Epilogue for the FPRs. */ | |
29498 | int offset = info->fp_save_offset; | |
29499 | if (info->push_p) | |
29500 | offset += info->total_size; | |
29501 | ||
29502 | for (int i = info->first_fp_reg_save; i < 64; i++) | |
29503 | { | |
29504 | if (bitmap_bit_p (components, i)) | |
29505 | { | |
29506 | rtx reg = gen_rtx_REG (fp_reg_mode, i); | |
29507 | rtx_insn *insn = emit_insn (gen_frame_load (reg, ptr_reg, offset)); | |
29508 | RTX_FRAME_RELATED_P (insn) = 1; | |
29509 | add_reg_note (insn, REG_CFA_RESTORE, reg); | |
29510 | } | |
29511 | ||
29512 | offset += fp_reg_size; | |
29513 | } | |
29514 | ||
29515 | /* Epilogue for the GPRs. */ | |
29516 | offset = info->gp_save_offset; | |
29517 | if (info->push_p) | |
29518 | offset += info->total_size; | |
29519 | ||
29520 | for (int i = info->first_gp_reg_save; i < 32; i++) | |
29521 | { | |
29522 | if (bitmap_bit_p (components, i)) | |
29523 | { | |
29524 | rtx reg = gen_rtx_REG (reg_mode, i); | |
29525 | rtx_insn *insn = emit_insn (gen_frame_load (reg, ptr_reg, offset)); | |
29526 | RTX_FRAME_RELATED_P (insn) = 1; | |
29527 | add_reg_note (insn, REG_CFA_RESTORE, reg); | |
29528 | } | |
29529 | ||
29530 | offset += reg_size; | |
29531 | } | |
29532 | ||
29533 | /* Epilogue for LR. */ | |
29534 | if (bitmap_bit_p (components, 0)) | |
29535 | { | |
29536 | int offset = info->lr_save_offset; | |
29537 | if (info->push_p) | |
29538 | offset += info->total_size; | |
29539 | ||
29540 | rtx reg = gen_rtx_REG (reg_mode, 0); | |
29541 | rtx_insn *insn = emit_insn (gen_frame_load (reg, ptr_reg, offset)); | |
29542 | ||
29543 | rtx lr = gen_rtx_REG (Pmode, LR_REGNO); | |
29544 | insn = emit_move_insn (lr, reg); | |
29545 | RTX_FRAME_RELATED_P (insn) = 1; | |
29546 | add_reg_note (insn, REG_CFA_RESTORE, lr); | |
29547 | } | |
29548 | } | |
29549 | ||
29550 | /* Implement TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS. */ | |
29551 | static void | |
29552 | rs6000_set_handled_components (sbitmap components) | |
29553 | { | |
29554 | rs6000_stack_t *info = rs6000_stack_info (); | |
29555 | ||
29556 | for (int i = info->first_gp_reg_save; i < 32; i++) | |
29557 | if (bitmap_bit_p (components, i)) | |
29558 | cfun->machine->gpr_is_wrapped_separately[i] = true; | |
29559 | ||
29560 | for (int i = info->first_fp_reg_save; i < 64; i++) | |
29561 | if (bitmap_bit_p (components, i)) | |
29562 | cfun->machine->fpr_is_wrapped_separately[i - 32] = true; | |
29563 | ||
29564 | if (bitmap_bit_p (components, 0)) | |
29565 | cfun->machine->lr_is_wrapped_separately = true; | |
29566 | } | |
29567 | ||
29568 | /* Emit function prologue as insns. */ | |
29569 | ||
29570 | void | |
29571 | rs6000_emit_prologue (void) | |
29572 | { | |
29573 | rs6000_stack_t *info = rs6000_stack_info (); | |
29574 | machine_mode reg_mode = Pmode; | |
29575 | int reg_size = TARGET_32BIT ? 4 : 8; | |
29576 | machine_mode fp_reg_mode = (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) | |
29577 | ? DFmode : SFmode; | |
29578 | int fp_reg_size = 8; | |
29579 | rtx sp_reg_rtx = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM); | |
29580 | rtx frame_reg_rtx = sp_reg_rtx; | |
29581 | unsigned int cr_save_regno; | |
29582 | rtx cr_save_rtx = NULL_RTX; | |
29583 | rtx_insn *insn; | |
29584 | int strategy; | |
29585 | int using_static_chain_p = (cfun->static_chain_decl != NULL_TREE | |
29586 | && df_regs_ever_live_p (STATIC_CHAIN_REGNUM) | |
29587 | && call_used_regs[STATIC_CHAIN_REGNUM]); | |
29588 | int using_split_stack = (flag_split_stack | |
29589 | && (lookup_attribute ("no_split_stack", | |
29590 | DECL_ATTRIBUTES (cfun->decl)) | |
29591 | == NULL)); | |
29592 | ||
29593 | /* Offset to top of frame for frame_reg and sp respectively. */ | |
29594 | HOST_WIDE_INT frame_off = 0; | |
29595 | HOST_WIDE_INT sp_off = 0; | |
29596 | /* sp_adjust is the stack adjusting instruction, tracked so that the | |
29597 | insn setting up the split-stack arg pointer can be emitted just | |
29598 | prior to it, when r12 is not used here for other purposes. */ | |
29599 | rtx_insn *sp_adjust = 0; | |
29600 | ||
29601 | #if CHECKING_P | |
29602 | /* Track and check usage of r0, r11, r12. */ | |
29603 | int reg_inuse = using_static_chain_p ? 1 << 11 : 0; | |
29604 | #define START_USE(R) do \ | |
29605 | { \ | |
29606 | gcc_assert ((reg_inuse & (1 << (R))) == 0); \ | |
29607 | reg_inuse |= 1 << (R); \ | |
29608 | } while (0) | |
29609 | #define END_USE(R) do \ | |
29610 | { \ | |
29611 | gcc_assert ((reg_inuse & (1 << (R))) != 0); \ | |
29612 | reg_inuse &= ~(1 << (R)); \ | |
29613 | } while (0) | |
29614 | #define NOT_INUSE(R) do \ | |
29615 | { \ | |
29616 | gcc_assert ((reg_inuse & (1 << (R))) == 0); \ | |
29617 | } while (0) | |
29618 | #else | |
29619 | #define START_USE(R) do {} while (0) | |
29620 | #define END_USE(R) do {} while (0) | |
29621 | #define NOT_INUSE(R) do {} while (0) | |
29622 | #endif | |
29623 | ||
29624 | if (DEFAULT_ABI == ABI_ELFv2 | |
29625 | && !TARGET_SINGLE_PIC_BASE) | |
29626 | { | |
29627 | cfun->machine->r2_setup_needed = df_regs_ever_live_p (TOC_REGNUM); | |
29628 | ||
29629 | /* With -mminimal-toc we may generate an extra use of r2 below. */ | |
29630 | if (TARGET_TOC && TARGET_MINIMAL_TOC | |
29631 | && !constant_pool_empty_p ()) | |
29632 | cfun->machine->r2_setup_needed = true; | |
29633 | } | |
29634 | ||
29635 | ||
29636 | if (flag_stack_usage_info) | |
29637 | current_function_static_stack_size = info->total_size; | |
29638 | ||
9c1b56c4 | 29639 | if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK |
329c9aee | 29640 | || flag_stack_clash_protection) |
83349046 SB |
29641 | { |
29642 | HOST_WIDE_INT size = info->total_size; | |
29643 | ||
29644 | if (crtl->is_leaf && !cfun->calls_alloca) | |
29645 | { | |
8c1dd970 JL |
29646 | if (size > PROBE_INTERVAL && size > get_stack_check_protect ()) |
29647 | rs6000_emit_probe_stack_range (get_stack_check_protect (), | |
29648 | size - get_stack_check_protect ()); | |
83349046 SB |
29649 | } |
29650 | else if (size > 0) | |
8c1dd970 | 29651 | rs6000_emit_probe_stack_range (get_stack_check_protect (), size); |
83349046 SB |
29652 | } |
29653 | ||
29654 | if (TARGET_FIX_AND_CONTINUE) | |
29655 | { | |
29656 | /* gdb on darwin arranges to forward a function from the old | |
29657 | address by modifying the first 5 instructions of the function | |
29658 | to branch to the overriding function. This is necessary to | |
29659 | permit function pointers that point to the old function to | |
29660 | actually forward to the new function. */ | |
29661 | emit_insn (gen_nop ()); | |
29662 | emit_insn (gen_nop ()); | |
29663 | emit_insn (gen_nop ()); | |
29664 | emit_insn (gen_nop ()); | |
29665 | emit_insn (gen_nop ()); | |
29666 | } | |
29667 | ||
29668 | if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0) | |
29669 | { | |
29670 | reg_mode = V2SImode; | |
29671 | reg_size = 8; | |
29672 | } | |
29673 | ||
29674 | /* Handle world saves specially here. */ | |
29675 | if (WORLD_SAVE_P (info)) | |
29676 | { | |
29677 | int i, j, sz; | |
29678 | rtx treg; | |
29679 | rtvec p; | |
29680 | rtx reg0; | |
29681 | ||
29682 | /* save_world expects lr in r0. */ | |
29683 | reg0 = gen_rtx_REG (Pmode, 0); | |
29684 | if (info->lr_save_p) | |
29685 | { | |
29686 | insn = emit_move_insn (reg0, | |
29687 | gen_rtx_REG (Pmode, LR_REGNO)); | |
29688 | RTX_FRAME_RELATED_P (insn) = 1; | |
29689 | } | |
29690 | ||
29691 | /* The SAVE_WORLD and RESTORE_WORLD routines make a number of | |
29692 | assumptions about the offsets of various bits of the stack | |
29693 | frame. */ | |
29694 | gcc_assert (info->gp_save_offset == -220 | |
29695 | && info->fp_save_offset == -144 | |
29696 | && info->lr_save_offset == 8 | |
29697 | && info->cr_save_offset == 4 | |
29698 | && info->push_p | |
29699 | && info->lr_save_p | |
29700 | && (!crtl->calls_eh_return | |
29701 | || info->ehrd_offset == -432) | |
29702 | && info->vrsave_save_offset == -224 | |
29703 | && info->altivec_save_offset == -416); | |
29704 | ||
29705 | treg = gen_rtx_REG (SImode, 11); | |
29706 | emit_move_insn (treg, GEN_INT (-info->total_size)); | |
29707 | ||
29708 | /* SAVE_WORLD takes the caller's LR in R0 and the frame size | |
29709 | in R11. It also clobbers R12, so beware! */ | |
29710 | ||
29711 | /* Preserve CR2 for save_world prologues */ | |
29712 | sz = 5; | |
29713 | sz += 32 - info->first_gp_reg_save; | |
29714 | sz += 64 - info->first_fp_reg_save; | |
29715 | sz += LAST_ALTIVEC_REGNO - info->first_altivec_reg_save + 1; | |
29716 | p = rtvec_alloc (sz); | |
29717 | j = 0; | |
29718 | RTVEC_ELT (p, j++) = gen_rtx_CLOBBER (VOIDmode, | |
29719 | gen_rtx_REG (SImode, | |
29720 | LR_REGNO)); | |
29721 | RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode, | |
29722 | gen_rtx_SYMBOL_REF (Pmode, | |
29723 | "*save_world")); | |
29724 | /* We do floats first so that the instruction pattern matches | |
29725 | properly. */ | |
29726 | for (i = 0; i < 64 - info->first_fp_reg_save; i++) | |
29727 | RTVEC_ELT (p, j++) | |
29728 | = gen_frame_store (gen_rtx_REG (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT | |
29729 | ? DFmode : SFmode, | |
29730 | info->first_fp_reg_save + i), | |
29731 | frame_reg_rtx, | |
29732 | info->fp_save_offset + frame_off + 8 * i); | |
29733 | for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++) | |
29734 | RTVEC_ELT (p, j++) | |
29735 | = gen_frame_store (gen_rtx_REG (V4SImode, | |
29736 | info->first_altivec_reg_save + i), | |
29737 | frame_reg_rtx, | |
29738 | info->altivec_save_offset + frame_off + 16 * i); | |
29739 | for (i = 0; i < 32 - info->first_gp_reg_save; i++) | |
29740 | RTVEC_ELT (p, j++) | |
29741 | = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i), | |
29742 | frame_reg_rtx, | |
29743 | info->gp_save_offset + frame_off + reg_size * i); | |
29744 | ||
29745 | /* CR register traditionally saved as CR2. */ | |
29746 | RTVEC_ELT (p, j++) | |
29747 | = gen_frame_store (gen_rtx_REG (SImode, CR2_REGNO), | |
29748 | frame_reg_rtx, info->cr_save_offset + frame_off); | |
29749 | /* Explain about use of R0. */ | |
29750 | if (info->lr_save_p) | |
29751 | RTVEC_ELT (p, j++) | |
29752 | = gen_frame_store (reg0, | |
29753 | frame_reg_rtx, info->lr_save_offset + frame_off); | |
29754 | /* Explain what happens to the stack pointer. */ | |
29755 | { | |
29756 | rtx newval = gen_rtx_PLUS (Pmode, sp_reg_rtx, treg); | |
29757 | RTVEC_ELT (p, j++) = gen_rtx_SET (sp_reg_rtx, newval); | |
29758 | } | |
29759 | ||
29760 | insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p)); | |
29761 | rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off, | |
29762 | treg, GEN_INT (-info->total_size)); | |
29763 | sp_off = frame_off = info->total_size; | |
29764 | } | |
29765 | ||
29766 | strategy = info->savres_strategy; | |
29767 | ||
29768 | /* For V.4, update stack before we do any saving and set back pointer. */ | |
29769 | if (! WORLD_SAVE_P (info) | |
29770 | && info->push_p | |
29771 | && (DEFAULT_ABI == ABI_V4 | |
29772 | || crtl->calls_eh_return)) | |
29773 | { | |
29774 | bool need_r11 = (TARGET_SPE | |
29775 | ? (!(strategy & SAVE_INLINE_GPRS) | |
29776 | && info->spe_64bit_regs_used == 0) | |
29777 | : (!(strategy & SAVE_INLINE_FPRS) | |
29778 | || !(strategy & SAVE_INLINE_GPRS) | |
29779 | || !(strategy & SAVE_INLINE_VRS))); | |
29780 | int ptr_regno = -1; | |
29781 | rtx ptr_reg = NULL_RTX; | |
29782 | int ptr_off = 0; | |
29783 | ||
29784 | if (info->total_size < 32767) | |
29785 | frame_off = info->total_size; | |
29786 | else if (need_r11) | |
29787 | ptr_regno = 11; | |
29788 | else if (info->cr_save_p | |
29789 | || info->lr_save_p | |
29790 | || info->first_fp_reg_save < 64 | |
29791 | || info->first_gp_reg_save < 32 | |
29792 | || info->altivec_size != 0 | |
29793 | || info->vrsave_size != 0 | |
29794 | || crtl->calls_eh_return) | |
29795 | ptr_regno = 12; | |
29796 | else | |
29797 | { | |
29798 | /* The prologue won't be saving any regs so there is no need | |
29799 | to set up a frame register to access any frame save area. | |
29800 | We also won't be using frame_off anywhere below, but set | |
29801 | the correct value anyway to protect against future | |
29802 | changes to this function. */ | |
29803 | frame_off = info->total_size; | |
29804 | } | |
29805 | if (ptr_regno != -1) | |
29806 | { | |
29807 | /* Set up the frame offset to that needed by the first | |
29808 | out-of-line save function. */ | |
29809 | START_USE (ptr_regno); | |
29810 | ptr_reg = gen_rtx_REG (Pmode, ptr_regno); | |
29811 | frame_reg_rtx = ptr_reg; | |
29812 | if (!(strategy & SAVE_INLINE_FPRS) && info->fp_size != 0) | |
29813 | gcc_checking_assert (info->fp_save_offset + info->fp_size == 0); | |
29814 | else if (!(strategy & SAVE_INLINE_GPRS) && info->first_gp_reg_save < 32) | |
29815 | ptr_off = info->gp_save_offset + info->gp_size; | |
29816 | else if (!(strategy & SAVE_INLINE_VRS) && info->altivec_size != 0) | |
29817 | ptr_off = info->altivec_save_offset + info->altivec_size; | |
29818 | frame_off = -ptr_off; | |
29819 | } | |
29820 | sp_adjust = rs6000_emit_allocate_stack (info->total_size, | |
29821 | ptr_reg, ptr_off); | |
29822 | if (REGNO (frame_reg_rtx) == 12) | |
29823 | sp_adjust = 0; | |
29824 | sp_off = info->total_size; | |
29825 | if (frame_reg_rtx != sp_reg_rtx) | |
29826 | rs6000_emit_stack_tie (frame_reg_rtx, false); | |
29827 | } | |
29828 | ||
29829 | /* If we use the link register, get it into r0. */ | |
29830 | if (!WORLD_SAVE_P (info) && info->lr_save_p | |
29831 | && !cfun->machine->lr_is_wrapped_separately) | |
29832 | { | |
29833 | rtx addr, reg, mem; | |
29834 | ||
29835 | reg = gen_rtx_REG (Pmode, 0); | |
29836 | START_USE (0); | |
29837 | insn = emit_move_insn (reg, gen_rtx_REG (Pmode, LR_REGNO)); | |
29838 | RTX_FRAME_RELATED_P (insn) = 1; | |
29839 | ||
29840 | if (!(strategy & (SAVE_NOINLINE_GPRS_SAVES_LR | |
29841 | | SAVE_NOINLINE_FPRS_SAVES_LR))) | |
29842 | { | |
29843 | addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, | |
29844 | GEN_INT (info->lr_save_offset + frame_off)); | |
29845 | mem = gen_rtx_MEM (Pmode, addr); | |
29846 | /* This should not be of rs6000_sr_alias_set, because of | |
29847 | __builtin_return_address. */ | |
29848 | ||
29849 | insn = emit_move_insn (mem, reg); | |
29850 | rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off, | |
29851 | NULL_RTX, NULL_RTX); | |
29852 | END_USE (0); | |
29853 | } | |
29854 | } | |
29855 | ||
29856 | /* If we need to save CR, put it into r12 or r11. Choose r12 except when | |
29857 | r12 will be needed by out-of-line gpr restore. */ | |
29858 | cr_save_regno = ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) | |
29859 | && !(strategy & (SAVE_INLINE_GPRS | |
29860 | | SAVE_NOINLINE_GPRS_SAVES_LR)) | |
29861 | ? 11 : 12); | |
29862 | if (!WORLD_SAVE_P (info) | |
29863 | && info->cr_save_p | |
29864 | && REGNO (frame_reg_rtx) != cr_save_regno | |
29865 | && !(using_static_chain_p && cr_save_regno == 11) | |
29866 | && !(using_split_stack && cr_save_regno == 12 && sp_adjust)) | |
29867 | { | |
29868 | cr_save_rtx = gen_rtx_REG (SImode, cr_save_regno); | |
29869 | START_USE (cr_save_regno); | |
29870 | rs6000_emit_move_from_cr (cr_save_rtx); | |
29871 | } | |
29872 | ||
29873 | /* Do any required saving of fpr's. If only one or two to save, do | |
29874 | it ourselves. Otherwise, call function. */ | |
29875 | if (!WORLD_SAVE_P (info) && (strategy & SAVE_INLINE_FPRS)) | |
29876 | { | |
29877 | int offset = info->fp_save_offset + frame_off; | |
29878 | for (int i = info->first_fp_reg_save; i < 64; i++) | |
29879 | { | |
29880 | if (save_reg_p (i) | |
29881 | && !cfun->machine->fpr_is_wrapped_separately[i - 32]) | |
29882 | emit_frame_save (frame_reg_rtx, fp_reg_mode, i, offset, | |
29883 | sp_off - frame_off); | |
29884 | ||
29885 | offset += fp_reg_size; | |
29886 | } | |
29887 | } | |
29888 | else if (!WORLD_SAVE_P (info) && info->first_fp_reg_save != 64) | |
29889 | { | |
29890 | bool lr = (strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0; | |
29891 | int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0); | |
29892 | unsigned ptr_regno = ptr_regno_for_savres (sel); | |
29893 | rtx ptr_reg = frame_reg_rtx; | |
29894 | ||
29895 | if (REGNO (frame_reg_rtx) == ptr_regno) | |
29896 | gcc_checking_assert (frame_off == 0); | |
29897 | else | |
29898 | { | |
29899 | ptr_reg = gen_rtx_REG (Pmode, ptr_regno); | |
29900 | NOT_INUSE (ptr_regno); | |
29901 | emit_insn (gen_add3_insn (ptr_reg, | |
29902 | frame_reg_rtx, GEN_INT (frame_off))); | |
29903 | } | |
29904 | insn = rs6000_emit_savres_rtx (info, ptr_reg, | |
29905 | info->fp_save_offset, | |
29906 | info->lr_save_offset, | |
29907 | DFmode, sel); | |
29908 | rs6000_frame_related (insn, ptr_reg, sp_off, | |
29909 | NULL_RTX, NULL_RTX); | |
29910 | if (lr) | |
29911 | END_USE (0); | |
29912 | } | |
29913 | ||
29914 | /* Save GPRs. This is done as a PARALLEL if we are using | |
29915 | the store-multiple instructions. */ | |
29916 | if (!WORLD_SAVE_P (info) | |
29917 | && TARGET_SPE_ABI | |
29918 | && info->spe_64bit_regs_used != 0 | |
29919 | && info->first_gp_reg_save != 32) | |
29920 | { | |
29921 | int i; | |
29922 | rtx spe_save_area_ptr; | |
29923 | HOST_WIDE_INT save_off; | |
29924 | int ool_adjust = 0; | |
29925 | ||
29926 | /* Determine whether we can address all of the registers that need | |
29927 | to be saved with an offset from frame_reg_rtx that fits in | |
29928 | the small const field for SPE memory instructions. */ | |
29929 | int spe_regs_addressable | |
29930 | = (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off | |
29931 | + reg_size * (32 - info->first_gp_reg_save - 1)) | |
29932 | && (strategy & SAVE_INLINE_GPRS)); | |
29933 | ||
29934 | if (spe_regs_addressable) | |
29935 | { | |
29936 | spe_save_area_ptr = frame_reg_rtx; | |
29937 | save_off = frame_off; | |
29938 | } | |
29939 | else | |
29940 | { | |
29941 | /* Make r11 point to the start of the SPE save area. We need | |
29942 | to be careful here if r11 is holding the static chain. If | |
29943 | it is, then temporarily save it in r0. */ | |
29944 | HOST_WIDE_INT offset; | |
29945 | ||
29946 | if (!(strategy & SAVE_INLINE_GPRS)) | |
29947 | ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO); | |
29948 | offset = info->spe_gp_save_offset + frame_off - ool_adjust; | |
29949 | spe_save_area_ptr = gen_rtx_REG (Pmode, 11); | |
29950 | save_off = frame_off - offset; | |
29951 | ||
29952 | if (using_static_chain_p) | |
29953 | { | |
29954 | rtx r0 = gen_rtx_REG (Pmode, 0); | |
29955 | ||
29956 | START_USE (0); | |
29957 | gcc_assert (info->first_gp_reg_save > 11); | |
29958 | ||
29959 | emit_move_insn (r0, spe_save_area_ptr); | |
29960 | } | |
29961 | else if (REGNO (frame_reg_rtx) != 11) | |
29962 | START_USE (11); | |
29963 | ||
29964 | emit_insn (gen_addsi3 (spe_save_area_ptr, | |
29965 | frame_reg_rtx, GEN_INT (offset))); | |
29966 | if (!using_static_chain_p && REGNO (frame_reg_rtx) == 11) | |
29967 | frame_off = -info->spe_gp_save_offset + ool_adjust; | |
29968 | } | |
29969 | ||
29970 | if ((strategy & SAVE_INLINE_GPRS)) | |
29971 | { | |
29972 | for (i = 0; i < 32 - info->first_gp_reg_save; i++) | |
29973 | if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i)) | |
29974 | emit_frame_save (spe_save_area_ptr, reg_mode, | |
29975 | info->first_gp_reg_save + i, | |
29976 | (info->spe_gp_save_offset + save_off | |
29977 | + reg_size * i), | |
29978 | sp_off - save_off); | |
29979 | } | |
29980 | else | |
29981 | { | |
29982 | insn = rs6000_emit_savres_rtx (info, spe_save_area_ptr, | |
29983 | info->spe_gp_save_offset + save_off, | |
29984 | 0, reg_mode, | |
29985 | SAVRES_SAVE | SAVRES_GPR); | |
29986 | ||
29987 | rs6000_frame_related (insn, spe_save_area_ptr, sp_off - save_off, | |
29988 | NULL_RTX, NULL_RTX); | |
29989 | } | |
29990 | ||
29991 | /* Move the static chain pointer back. */ | |
29992 | if (!spe_regs_addressable) | |
29993 | { | |
29994 | if (using_static_chain_p) | |
29995 | { | |
29996 | emit_move_insn (spe_save_area_ptr, gen_rtx_REG (Pmode, 0)); | |
29997 | END_USE (0); | |
29998 | } | |
29999 | else if (REGNO (frame_reg_rtx) != 11) | |
30000 | END_USE (11); | |
30001 | } | |
30002 | } | |
30003 | else if (!WORLD_SAVE_P (info) && !(strategy & SAVE_INLINE_GPRS)) | |
30004 | { | |
30005 | bool lr = (strategy & SAVE_NOINLINE_GPRS_SAVES_LR) != 0; | |
30006 | int sel = SAVRES_SAVE | SAVRES_GPR | (lr ? SAVRES_LR : 0); | |
30007 | unsigned ptr_regno = ptr_regno_for_savres (sel); | |
30008 | rtx ptr_reg = frame_reg_rtx; | |
30009 | bool ptr_set_up = REGNO (ptr_reg) == ptr_regno; | |
30010 | int end_save = info->gp_save_offset + info->gp_size; | |
30011 | int ptr_off; | |
30012 | ||
30013 | if (ptr_regno == 12) | |
30014 | sp_adjust = 0; | |
30015 | if (!ptr_set_up) | |
30016 | ptr_reg = gen_rtx_REG (Pmode, ptr_regno); | |
30017 | ||
30018 | /* Need to adjust r11 (r12) if we saved any FPRs. */ | |
30019 | if (end_save + frame_off != 0) | |
30020 | { | |
30021 | rtx offset = GEN_INT (end_save + frame_off); | |
30022 | ||
30023 | if (ptr_set_up) | |
30024 | frame_off = -end_save; | |
30025 | else | |
30026 | NOT_INUSE (ptr_regno); | |
30027 | emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset)); | |
30028 | } | |
30029 | else if (!ptr_set_up) | |
30030 | { | |
30031 | NOT_INUSE (ptr_regno); | |
30032 | emit_move_insn (ptr_reg, frame_reg_rtx); | |
30033 | } | |
30034 | ptr_off = -end_save; | |
30035 | insn = rs6000_emit_savres_rtx (info, ptr_reg, | |
30036 | info->gp_save_offset + ptr_off, | |
30037 | info->lr_save_offset + ptr_off, | |
30038 | reg_mode, sel); | |
30039 | rs6000_frame_related (insn, ptr_reg, sp_off - ptr_off, | |
30040 | NULL_RTX, NULL_RTX); | |
30041 | if (lr) | |
30042 | END_USE (0); | |
30043 | } | |
30044 | else if (!WORLD_SAVE_P (info) && (strategy & SAVE_MULTIPLE)) | |
30045 | { | |
30046 | rtvec p; | |
30047 | int i; | |
30048 | p = rtvec_alloc (32 - info->first_gp_reg_save); | |
30049 | for (i = 0; i < 32 - info->first_gp_reg_save; i++) | |
30050 | RTVEC_ELT (p, i) | |
30051 | = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i), | |
30052 | frame_reg_rtx, | |
30053 | info->gp_save_offset + frame_off + reg_size * i); | |
30054 | insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p)); | |
30055 | rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off, | |
30056 | NULL_RTX, NULL_RTX); | |
30057 | } | |
30058 | else if (!WORLD_SAVE_P (info)) | |
30059 | { | |
30060 | int offset = info->gp_save_offset + frame_off; | |
30061 | for (int i = info->first_gp_reg_save; i < 32; i++) | |
30062 | { | |
30063 | if (rs6000_reg_live_or_pic_offset_p (i) | |
30064 | && !cfun->machine->gpr_is_wrapped_separately[i]) | |
30065 | emit_frame_save (frame_reg_rtx, reg_mode, i, offset, | |
30066 | sp_off - frame_off); | |
30067 | ||
30068 | offset += reg_size; | |
30069 | } | |
30070 | } | |
30071 | ||
30072 | if (crtl->calls_eh_return) | |
30073 | { | |
30074 | unsigned int i; | |
30075 | rtvec p; | |
30076 | ||
30077 | for (i = 0; ; ++i) | |
30078 | { | |
30079 | unsigned int regno = EH_RETURN_DATA_REGNO (i); | |
30080 | if (regno == INVALID_REGNUM) | |
30081 | break; | |
30082 | } | |
30083 | ||
30084 | p = rtvec_alloc (i); | |
30085 | ||
30086 | for (i = 0; ; ++i) | |
30087 | { | |
30088 | unsigned int regno = EH_RETURN_DATA_REGNO (i); | |
30089 | if (regno == INVALID_REGNUM) | |
30090 | break; | |
30091 | ||
30092 | rtx set | |
30093 | = gen_frame_store (gen_rtx_REG (reg_mode, regno), | |
30094 | sp_reg_rtx, | |
30095 | info->ehrd_offset + sp_off + reg_size * (int) i); | |
30096 | RTVEC_ELT (p, i) = set; | |
30097 | RTX_FRAME_RELATED_P (set) = 1; | |
30098 | } | |
30099 | ||
30100 | insn = emit_insn (gen_blockage ()); | |
30101 | RTX_FRAME_RELATED_P (insn) = 1; | |
30102 | add_reg_note (insn, REG_FRAME_RELATED_EXPR, gen_rtx_PARALLEL (VOIDmode, p)); | |
30103 | } | |
30104 | ||
30105 | /* In AIX ABI we need to make sure r2 is really saved. */ | |
30106 | if (TARGET_AIX && crtl->calls_eh_return) | |
30107 | { | |
30108 | rtx tmp_reg, tmp_reg_si, hi, lo, compare_result, toc_save_done, jump; | |
30109 | rtx join_insn, note; | |
30110 | rtx_insn *save_insn; | |
30111 | long toc_restore_insn; | |
30112 | ||
30113 | tmp_reg = gen_rtx_REG (Pmode, 11); | |
30114 | tmp_reg_si = gen_rtx_REG (SImode, 11); | |
30115 | if (using_static_chain_p) | |
30116 | { | |
30117 | START_USE (0); | |
30118 | emit_move_insn (gen_rtx_REG (Pmode, 0), tmp_reg); | |
30119 | } | |
30120 | else | |
30121 | START_USE (11); | |
30122 | emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, LR_REGNO)); | |
30123 | /* Peek at instruction to which this function returns. If it's | |
30124 | restoring r2, then we know we've already saved r2. We can't | |
30125 | unconditionally save r2 because the value we have will already | |
30126 | be updated if we arrived at this function via a plt call or | |
30127 | toc adjusting stub. */ | |
30128 | emit_move_insn (tmp_reg_si, gen_rtx_MEM (SImode, tmp_reg)); | |
30129 | toc_restore_insn = ((TARGET_32BIT ? 0x80410000 : 0xE8410000) | |
30130 | + RS6000_TOC_SAVE_SLOT); | |
30131 | hi = gen_int_mode (toc_restore_insn & ~0xffff, SImode); | |
30132 | emit_insn (gen_xorsi3 (tmp_reg_si, tmp_reg_si, hi)); | |
30133 | compare_result = gen_rtx_REG (CCUNSmode, CR0_REGNO); | |
30134 | validate_condition_mode (EQ, CCUNSmode); | |
30135 | lo = gen_int_mode (toc_restore_insn & 0xffff, SImode); | |
30136 | emit_insn (gen_rtx_SET (compare_result, | |
30137 | gen_rtx_COMPARE (CCUNSmode, tmp_reg_si, lo))); | |
30138 | toc_save_done = gen_label_rtx (); | |
30139 | jump = gen_rtx_IF_THEN_ELSE (VOIDmode, | |
30140 | gen_rtx_EQ (VOIDmode, compare_result, | |
30141 | const0_rtx), | |
30142 | gen_rtx_LABEL_REF (VOIDmode, toc_save_done), | |
30143 | pc_rtx); | |
30144 | jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump)); | |
30145 | JUMP_LABEL (jump) = toc_save_done; | |
30146 | LABEL_NUSES (toc_save_done) += 1; | |
30147 | ||
30148 | save_insn = emit_frame_save (frame_reg_rtx, reg_mode, | |
30149 | TOC_REGNUM, frame_off + RS6000_TOC_SAVE_SLOT, | |
30150 | sp_off - frame_off); | |
30151 | ||
30152 | emit_label (toc_save_done); | |
30153 | ||
30154 | /* ??? If we leave SAVE_INSN as marked as saving R2, then we'll | |
30155 | have a CFG that has different saves along different paths. | |
30156 | Move the note to a dummy blockage insn, which describes that | |
30157 | R2 is unconditionally saved after the label. */ | |
30158 | /* ??? An alternate representation might be a special insn pattern | |
30159 | containing both the branch and the store. That might let the | |
30160 | code that minimizes the number of DW_CFA_advance opcodes better | |
30161 | freedom in placing the annotations. */ | |
30162 | note = find_reg_note (save_insn, REG_FRAME_RELATED_EXPR, NULL); | |
30163 | if (note) | |
30164 | remove_note (save_insn, note); | |
30165 | else | |
30166 | note = alloc_reg_note (REG_FRAME_RELATED_EXPR, | |
30167 | copy_rtx (PATTERN (save_insn)), NULL_RTX); | |
30168 | RTX_FRAME_RELATED_P (save_insn) = 0; | |
30169 | ||
30170 | join_insn = emit_insn (gen_blockage ()); | |
30171 | REG_NOTES (join_insn) = note; | |
30172 | RTX_FRAME_RELATED_P (join_insn) = 1; | |
30173 | ||
30174 | if (using_static_chain_p) | |
30175 | { | |
30176 | emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, 0)); | |
30177 | END_USE (0); | |
30178 | } | |
30179 | else | |
30180 | END_USE (11); | |
30181 | } | |
30182 | ||
30183 | /* Save CR if we use any that must be preserved. */ | |
30184 | if (!WORLD_SAVE_P (info) && info->cr_save_p) | |
30185 | { | |
30186 | rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, | |
30187 | GEN_INT (info->cr_save_offset + frame_off)); | |
30188 | rtx mem = gen_frame_mem (SImode, addr); | |
30189 | ||
30190 | /* If we didn't copy cr before, do so now using r0. */ | |
30191 | if (cr_save_rtx == NULL_RTX) | |
30192 | { | |
30193 | START_USE (0); | |
30194 | cr_save_rtx = gen_rtx_REG (SImode, 0); | |
30195 | rs6000_emit_move_from_cr (cr_save_rtx); | |
30196 | } | |
30197 | ||
30198 | /* Saving CR requires a two-instruction sequence: one instruction | |
30199 | to move the CR to a general-purpose register, and a second | |
30200 | instruction that stores the GPR to memory. | |
30201 | ||
30202 | We do not emit any DWARF CFI records for the first of these, | |
30203 | because we cannot properly represent the fact that CR is saved in | |
30204 | a register. One reason is that we cannot express that multiple | |
30205 | CR fields are saved; another reason is that on 64-bit, the size | |
30206 | of the CR register in DWARF (4 bytes) differs from the size of | |
30207 | a general-purpose register. | |
30208 | ||
30209 | This means if any intervening instruction were to clobber one of | |
30210 | the call-saved CR fields, we'd have incorrect CFI. To prevent | |
30211 | this from happening, we mark the store to memory as a use of | |
30212 | those CR fields, which prevents any such instruction from being | |
30213 | scheduled in between the two instructions. */ | |
30214 | rtx crsave_v[9]; | |
30215 | int n_crsave = 0; | |
30216 | int i; | |
30217 | ||
30218 | crsave_v[n_crsave++] = gen_rtx_SET (mem, cr_save_rtx); | |
30219 | for (i = 0; i < 8; i++) | |
30220 | if (save_reg_p (CR0_REGNO + i)) | |
30221 | crsave_v[n_crsave++] | |
30222 | = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i)); | |
30223 | ||
30224 | insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, | |
30225 | gen_rtvec_v (n_crsave, crsave_v))); | |
30226 | END_USE (REGNO (cr_save_rtx)); | |
30227 | ||
30228 | /* Now, there's no way that dwarf2out_frame_debug_expr is going to | |
30229 | understand '(unspec:SI [(reg:CC 68) ...] UNSPEC_MOVESI_FROM_CR)', | |
30230 | so we need to construct a frame expression manually. */ | |
30231 | RTX_FRAME_RELATED_P (insn) = 1; | |
30232 | ||
30233 | /* Update address to be stack-pointer relative, like | |
30234 | rs6000_frame_related would do. */ | |
30235 | addr = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM), | |
30236 | GEN_INT (info->cr_save_offset + sp_off)); | |
30237 | mem = gen_frame_mem (SImode, addr); | |
30238 | ||
30239 | if (DEFAULT_ABI == ABI_ELFv2) | |
30240 | { | |
30241 | /* In the ELFv2 ABI we generate separate CFI records for each | |
30242 | CR field that was actually saved. They all point to the | |
30243 | same 32-bit stack slot. */ | |
30244 | rtx crframe[8]; | |
30245 | int n_crframe = 0; | |
30246 | ||
30247 | for (i = 0; i < 8; i++) | |
30248 | if (save_reg_p (CR0_REGNO + i)) | |
30249 | { | |
30250 | crframe[n_crframe] | |
30251 | = gen_rtx_SET (mem, gen_rtx_REG (SImode, CR0_REGNO + i)); | |
30252 | ||
30253 | RTX_FRAME_RELATED_P (crframe[n_crframe]) = 1; | |
30254 | n_crframe++; | |
30255 | } | |
30256 | ||
30257 | add_reg_note (insn, REG_FRAME_RELATED_EXPR, | |
30258 | gen_rtx_PARALLEL (VOIDmode, | |
30259 | gen_rtvec_v (n_crframe, crframe))); | |
30260 | } | |
30261 | else | |
30262 | { | |
30263 | /* In other ABIs, by convention, we use a single CR regnum to | |
30264 | represent the fact that all call-saved CR fields are saved. | |
30265 | We use CR2_REGNO to be compatible with gcc-2.95 on Linux. */ | |
30266 | rtx set = gen_rtx_SET (mem, gen_rtx_REG (SImode, CR2_REGNO)); | |
30267 | add_reg_note (insn, REG_FRAME_RELATED_EXPR, set); | |
30268 | } | |
30269 | } | |
30270 | ||
30271 | /* In the ELFv2 ABI we need to save all call-saved CR fields into | |
30272 | *separate* slots if the routine calls __builtin_eh_return, so | |
30273 | that they can be independently restored by the unwinder. */ | |
30274 | if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return) | |
30275 | { | |
30276 | int i, cr_off = info->ehcr_offset; | |
30277 | rtx crsave; | |
30278 | ||
30279 | /* ??? We might get better performance by using multiple mfocrf | |
30280 | instructions. */ | |
30281 | crsave = gen_rtx_REG (SImode, 0); | |
30282 | emit_insn (gen_movesi_from_cr (crsave)); | |
30283 | ||
30284 | for (i = 0; i < 8; i++) | |
30285 | if (!call_used_regs[CR0_REGNO + i]) | |
30286 | { | |
30287 | rtvec p = rtvec_alloc (2); | |
30288 | RTVEC_ELT (p, 0) | |
30289 | = gen_frame_store (crsave, frame_reg_rtx, cr_off + frame_off); | |
30290 | RTVEC_ELT (p, 1) | |
30291 | = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i)); | |
30292 | ||
30293 | insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p)); | |
30294 | ||
30295 | RTX_FRAME_RELATED_P (insn) = 1; | |
30296 | add_reg_note (insn, REG_FRAME_RELATED_EXPR, | |
30297 | gen_frame_store (gen_rtx_REG (SImode, CR0_REGNO + i), | |
30298 | sp_reg_rtx, cr_off + sp_off)); | |
30299 | ||
30300 | cr_off += reg_size; | |
30301 | } | |
30302 | } | |
30303 | ||
30304 | /* Update stack and set back pointer unless this is V.4, | |
30305 | for which it was done previously. */ | |
30306 | if (!WORLD_SAVE_P (info) && info->push_p | |
30307 | && !(DEFAULT_ABI == ABI_V4 || crtl->calls_eh_return)) | |
30308 | { | |
30309 | rtx ptr_reg = NULL; | |
30310 | int ptr_off = 0; | |
30311 | ||
30312 | /* If saving altivec regs we need to be able to address all save | |
30313 | locations using a 16-bit offset. */ | |
30314 | if ((strategy & SAVE_INLINE_VRS) == 0 | |
30315 | || (info->altivec_size != 0 | |
30316 | && (info->altivec_save_offset + info->altivec_size - 16 | |
30317 | + info->total_size - frame_off) > 32767) | |
30318 | || (info->vrsave_size != 0 | |
30319 | && (info->vrsave_save_offset | |
30320 | + info->total_size - frame_off) > 32767)) | |
30321 | { | |
30322 | int sel = SAVRES_SAVE | SAVRES_VR; | |
30323 | unsigned ptr_regno = ptr_regno_for_savres (sel); | |
30324 | ||
30325 | if (using_static_chain_p | |
30326 | && ptr_regno == STATIC_CHAIN_REGNUM) | |
30327 | ptr_regno = 12; | |
30328 | if (REGNO (frame_reg_rtx) != ptr_regno) | |
30329 | START_USE (ptr_regno); | |
30330 | ptr_reg = gen_rtx_REG (Pmode, ptr_regno); | |
30331 | frame_reg_rtx = ptr_reg; | |
30332 | ptr_off = info->altivec_save_offset + info->altivec_size; | |
30333 | frame_off = -ptr_off; | |
30334 | } | |
30335 | else if (REGNO (frame_reg_rtx) == 1) | |
30336 | frame_off = info->total_size; | |
30337 | sp_adjust = rs6000_emit_allocate_stack (info->total_size, | |
30338 | ptr_reg, ptr_off); | |
30339 | if (REGNO (frame_reg_rtx) == 12) | |
30340 | sp_adjust = 0; | |
30341 | sp_off = info->total_size; | |
30342 | if (frame_reg_rtx != sp_reg_rtx) | |
30343 | rs6000_emit_stack_tie (frame_reg_rtx, false); | |
30344 | } | |
30345 | ||
30346 | /* Set frame pointer, if needed. */ | |
30347 | if (frame_pointer_needed) | |
30348 | { | |
30349 | insn = emit_move_insn (gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM), | |
30350 | sp_reg_rtx); | |
30351 | RTX_FRAME_RELATED_P (insn) = 1; | |
30352 | } | |
30353 | ||
30354 | /* Save AltiVec registers if needed. Save here because the red zone does | |
30355 | not always include AltiVec registers. */ | |
30356 | if (!WORLD_SAVE_P (info) | |
30357 | && info->altivec_size != 0 && (strategy & SAVE_INLINE_VRS) == 0) | |
30358 | { | |
30359 | int end_save = info->altivec_save_offset + info->altivec_size; | |
30360 | int ptr_off; | |
30361 | /* Oddly, the vector save/restore functions point r0 at the end | |
30362 | of the save area, then use r11 or r12 to load offsets for | |
30363 | [reg+reg] addressing. */ | |
30364 | rtx ptr_reg = gen_rtx_REG (Pmode, 0); | |
30365 | int scratch_regno = ptr_regno_for_savres (SAVRES_SAVE | SAVRES_VR); | |
30366 | rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno); | |
30367 | ||
30368 | gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12); | |
30369 | NOT_INUSE (0); | |
30370 | if (scratch_regno == 12) | |
30371 | sp_adjust = 0; | |
30372 | if (end_save + frame_off != 0) | |
30373 | { | |
30374 | rtx offset = GEN_INT (end_save + frame_off); | |
30375 | ||
30376 | emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset)); | |
30377 | } | |
30378 | else | |
30379 | emit_move_insn (ptr_reg, frame_reg_rtx); | |
30380 | ||
30381 | ptr_off = -end_save; | |
30382 | insn = rs6000_emit_savres_rtx (info, scratch_reg, | |
30383 | info->altivec_save_offset + ptr_off, | |
30384 | 0, V4SImode, SAVRES_SAVE | SAVRES_VR); | |
30385 | rs6000_frame_related (insn, scratch_reg, sp_off - ptr_off, | |
30386 | NULL_RTX, NULL_RTX); | |
30387 | if (REGNO (frame_reg_rtx) == REGNO (scratch_reg)) | |
30388 | { | |
30389 | /* The oddity mentioned above clobbered our frame reg. */ | |
30390 | emit_move_insn (frame_reg_rtx, ptr_reg); | |
30391 | frame_off = ptr_off; | |
30392 | } | |
30393 | } | |
30394 | else if (!WORLD_SAVE_P (info) | |
30395 | && info->altivec_size != 0) | |
30396 | { | |
30397 | int i; | |
30398 | ||
30399 | for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i) | |
30400 | if (info->vrsave_mask & ALTIVEC_REG_BIT (i)) | |
30401 | { | |
30402 | rtx areg, savereg, mem; | |
30403 | HOST_WIDE_INT offset; | |
30404 | ||
30405 | offset = (info->altivec_save_offset + frame_off | |
30406 | + 16 * (i - info->first_altivec_reg_save)); | |
30407 | ||
30408 | savereg = gen_rtx_REG (V4SImode, i); | |
30409 | ||
30410 | if (TARGET_P9_DFORM_VECTOR && quad_address_offset_p (offset)) | |
30411 | { | |
30412 | mem = gen_frame_mem (V4SImode, | |
30413 | gen_rtx_PLUS (Pmode, frame_reg_rtx, | |
30414 | GEN_INT (offset))); | |
30415 | insn = emit_insn (gen_rtx_SET (mem, savereg)); | |
30416 | areg = NULL_RTX; | |
30417 | } | |
30418 | else | |
30419 | { | |
30420 | NOT_INUSE (0); | |
30421 | areg = gen_rtx_REG (Pmode, 0); | |
30422 | emit_move_insn (areg, GEN_INT (offset)); | |
30423 | ||
30424 | /* AltiVec addressing mode is [reg+reg]. */ | |
30425 | mem = gen_frame_mem (V4SImode, | |
30426 | gen_rtx_PLUS (Pmode, frame_reg_rtx, areg)); | |
30427 | ||
30428 | /* Rather than emitting a generic move, force use of the stvx | |
30429 | instruction, which we always want on ISA 2.07 (power8) systems. | |
30430 | In particular we don't want xxpermdi/stxvd2x for little | |
30431 | endian. */ | |
30432 | insn = emit_insn (gen_altivec_stvx_v4si_internal (mem, savereg)); | |
30433 | } | |
30434 | ||
30435 | rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off, | |
30436 | areg, GEN_INT (offset)); | |
30437 | } | |
30438 | } | |
30439 | ||
30440 | /* VRSAVE is a bit vector representing which AltiVec registers | |
30441 | are used. The OS uses this to determine which vector | |
30442 | registers to save on a context switch. We need to save | |
30443 | VRSAVE on the stack frame, add whatever AltiVec registers we | |
30444 | used in this function, and do the corresponding magic in the | |
30445 | epilogue. */ | |
30446 | ||
30447 | if (!WORLD_SAVE_P (info) | |
30448 | && info->vrsave_size != 0) | |
30449 | { | |
30450 | rtx reg, vrsave; | |
30451 | int offset; | |
30452 | int save_regno; | |
30453 | ||
30454 | /* Get VRSAVE onto a GPR. Note that ABI_V4 and ABI_DARWIN might | |
30455 | be using r12 as frame_reg_rtx and r11 as the static chain | |
30456 | pointer for nested functions. */ | |
30457 | save_regno = 12; | |
30458 | if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) | |
30459 | && !using_static_chain_p) | |
30460 | save_regno = 11; | |
30461 | else if (using_split_stack || REGNO (frame_reg_rtx) == 12) | |
30462 | { | |
30463 | save_regno = 11; | |
30464 | if (using_static_chain_p) | |
30465 | save_regno = 0; | |
30466 | } | |
30467 | ||
30468 | NOT_INUSE (save_regno); | |
30469 | reg = gen_rtx_REG (SImode, save_regno); | |
30470 | vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO); | |
30471 | if (TARGET_MACHO) | |
30472 | emit_insn (gen_get_vrsave_internal (reg)); | |
30473 | else | |
30474 | emit_insn (gen_rtx_SET (reg, vrsave)); | |
30475 | ||
30476 | /* Save VRSAVE. */ | |
30477 | offset = info->vrsave_save_offset + frame_off; | |
30478 | insn = emit_insn (gen_frame_store (reg, frame_reg_rtx, offset)); | |
30479 | ||
30480 | /* Include the registers in the mask. */ | |
30481 | emit_insn (gen_iorsi3 (reg, reg, GEN_INT ((int) info->vrsave_mask))); | |
30482 | ||
30483 | insn = emit_insn (generate_set_vrsave (reg, info, 0)); | |
30484 | } | |
30485 | ||
30486 | /* If we are using RS6000_PIC_OFFSET_TABLE_REGNUM, we need to set it up. */ | |
30487 | if (!TARGET_SINGLE_PIC_BASE | |
30488 | && ((TARGET_TOC && TARGET_MINIMAL_TOC | |
30489 | && !constant_pool_empty_p ()) | |
30490 | || (DEFAULT_ABI == ABI_V4 | |
30491 | && (flag_pic == 1 || (flag_pic && TARGET_SECURE_PLT)) | |
30492 | && df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM)))) | |
30493 | { | |
30494 | /* If emit_load_toc_table will use the link register, we need to save | |
30495 | it. We use R12 for this purpose because emit_load_toc_table | |
30496 | can use register 0. This allows us to use a plain 'blr' to return | |
30497 | from the procedure more often. */ | |
30498 | int save_LR_around_toc_setup = (TARGET_ELF | |
30499 | && DEFAULT_ABI == ABI_V4 | |
30500 | && flag_pic | |
30501 | && ! info->lr_save_p | |
30502 | && EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) > 0); | |
30503 | if (save_LR_around_toc_setup) | |
30504 | { | |
30505 | rtx lr = gen_rtx_REG (Pmode, LR_REGNO); | |
30506 | rtx tmp = gen_rtx_REG (Pmode, 12); | |
30507 | ||
30508 | sp_adjust = 0; | |
30509 | insn = emit_move_insn (tmp, lr); | |
30510 | RTX_FRAME_RELATED_P (insn) = 1; | |
30511 | ||
30512 | rs6000_emit_load_toc_table (TRUE); | |
30513 | ||
30514 | insn = emit_move_insn (lr, tmp); | |
30515 | add_reg_note (insn, REG_CFA_RESTORE, lr); | |
30516 | RTX_FRAME_RELATED_P (insn) = 1; | |
30517 | } | |
30518 | else | |
30519 | rs6000_emit_load_toc_table (TRUE); | |
30520 | } | |
30521 | ||
30522 | #if TARGET_MACHO | |
30523 | if (!TARGET_SINGLE_PIC_BASE | |
30524 | && DEFAULT_ABI == ABI_DARWIN | |
30525 | && flag_pic && crtl->uses_pic_offset_table) | |
30526 | { | |
30527 | rtx lr = gen_rtx_REG (Pmode, LR_REGNO); | |
30528 | rtx src = gen_rtx_SYMBOL_REF (Pmode, MACHOPIC_FUNCTION_BASE_NAME); | |
30529 | ||
30530 | /* Save and restore LR locally around this call (in R0). */ | |
30531 | if (!info->lr_save_p) | |
30532 | emit_move_insn (gen_rtx_REG (Pmode, 0), lr); | |
30533 | ||
30534 | emit_insn (gen_load_macho_picbase (src)); | |
30535 | ||
30536 | emit_move_insn (gen_rtx_REG (Pmode, | |
30537 | RS6000_PIC_OFFSET_TABLE_REGNUM), | |
30538 | lr); | |
30539 | ||
30540 | if (!info->lr_save_p) | |
30541 | emit_move_insn (lr, gen_rtx_REG (Pmode, 0)); | |
30542 | } | |
30543 | #endif | |
30544 | ||
30545 | /* If we need to, save the TOC register after doing the stack setup. | |
30546 | Do not emit eh frame info for this save. The unwinder wants info, | |
30547 | conceptually attached to instructions in this function, about | |
30548 | register values in the caller of this function. This R2 may have | |
30549 | already been changed from the value in the caller. | |
30550 | We don't attempt to write accurate DWARF EH frame info for R2 | |
30551 | because code emitted by gcc for a (non-pointer) function call | |
30552 | doesn't save and restore R2. Instead, R2 is managed out-of-line | |
30553 | by a linker generated plt call stub when the function resides in | |
30554 | a shared library. This behavior is costly to describe in DWARF, | |
30555 | both in terms of the size of DWARF info and the time taken in the | |
30556 | unwinder to interpret it. R2 changes, apart from the | |
30557 | calls_eh_return case earlier in this function, are handled by | |
30558 | linux-unwind.h frob_update_context. */ | |
30559 | if (rs6000_save_toc_in_prologue_p ()) | |
30560 | { | |
30561 | rtx reg = gen_rtx_REG (reg_mode, TOC_REGNUM); | |
30562 | emit_insn (gen_frame_store (reg, sp_reg_rtx, RS6000_TOC_SAVE_SLOT)); | |
30563 | } | |
30564 | ||
30565 | if (using_split_stack && split_stack_arg_pointer_used_p ()) | |
30566 | { | |
30567 | /* Set up the arg pointer (r12) for -fsplit-stack code. If | |
30568 | __morestack was called, it left the arg pointer to the old | |
30569 | stack in r29. Otherwise, the arg pointer is the top of the | |
30570 | current frame. */ | |
30571 | cfun->machine->split_stack_argp_used = true; | |
30572 | if (sp_adjust) | |
30573 | { | |
30574 | rtx r12 = gen_rtx_REG (Pmode, 12); | |
30575 | rtx set_r12 = gen_rtx_SET (r12, sp_reg_rtx); | |
30576 | emit_insn_before (set_r12, sp_adjust); | |
30577 | } | |
30578 | else if (frame_off != 0 || REGNO (frame_reg_rtx) != 12) | |
30579 | { | |
30580 | rtx r12 = gen_rtx_REG (Pmode, 12); | |
30581 | if (frame_off == 0) | |
30582 | emit_move_insn (r12, frame_reg_rtx); | |
30583 | else | |
30584 | emit_insn (gen_add3_insn (r12, frame_reg_rtx, GEN_INT (frame_off))); | |
30585 | } | |
30586 | if (info->push_p) | |
30587 | { | |
30588 | rtx r12 = gen_rtx_REG (Pmode, 12); | |
30589 | rtx r29 = gen_rtx_REG (Pmode, 29); | |
30590 | rtx cr7 = gen_rtx_REG (CCUNSmode, CR7_REGNO); | |
30591 | rtx not_more = gen_label_rtx (); | |
30592 | rtx jump; | |
30593 | ||
30594 | jump = gen_rtx_IF_THEN_ELSE (VOIDmode, | |
30595 | gen_rtx_GEU (VOIDmode, cr7, const0_rtx), | |
30596 | gen_rtx_LABEL_REF (VOIDmode, not_more), | |
30597 | pc_rtx); | |
30598 | jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump)); | |
30599 | JUMP_LABEL (jump) = not_more; | |
30600 | LABEL_NUSES (not_more) += 1; | |
30601 | emit_move_insn (r12, r29); | |
30602 | emit_label (not_more); | |
30603 | } | |
30604 | } | |
30605 | } | |
30606 | ||
30607 | /* Output .extern statements for the save/restore routines we use. */ | |
30608 | ||
30609 | static void | |
30610 | rs6000_output_savres_externs (FILE *file) | |
30611 | { | |
30612 | rs6000_stack_t *info = rs6000_stack_info (); | |
30613 | ||
30614 | if (TARGET_DEBUG_STACK) | |
30615 | debug_stack_info (info); | |
30616 | ||
30617 | /* Write .extern for any function we will call to save and restore | |
30618 | fp values. */ | |
30619 | if (info->first_fp_reg_save < 64 | |
30620 | && !TARGET_MACHO | |
30621 | && !TARGET_ELF) | |
30622 | { | |
30623 | char *name; | |
30624 | int regno = info->first_fp_reg_save - 32; | |
30625 | ||
30626 | if ((info->savres_strategy & SAVE_INLINE_FPRS) == 0) | |
30627 | { | |
30628 | bool lr = (info->savres_strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0; | |
30629 | int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0); | |
30630 | name = rs6000_savres_routine_name (info, regno, sel); | |
30631 | fprintf (file, "\t.extern %s\n", name); | |
30632 | } | |
30633 | if ((info->savres_strategy & REST_INLINE_FPRS) == 0) | |
30634 | { | |
30635 | bool lr = (info->savres_strategy | |
30636 | & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0; | |
30637 | int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0); | |
30638 | name = rs6000_savres_routine_name (info, regno, sel); | |
30639 | fprintf (file, "\t.extern %s\n", name); | |
30640 | } | |
30641 | } | |
30642 | } | |
30643 | ||
30644 | /* Write function prologue. */ | |
30645 | ||
30646 | static void | |
42776416 | 30647 | rs6000_output_function_prologue (FILE *file) |
83349046 SB |
30648 | { |
30649 | if (!cfun->is_thunk) | |
30650 | rs6000_output_savres_externs (file); | |
30651 | ||
30652 | /* ELFv2 ABI r2 setup code and local entry point. This must follow | |
30653 | immediately after the global entry point label. */ | |
30654 | if (rs6000_global_entry_point_needed_p ()) | |
30655 | { | |
30656 | const char *name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0); | |
30657 | ||
30658 | (*targetm.asm_out.internal_label) (file, "LCF", rs6000_pic_labelno); | |
30659 | ||
30660 | if (TARGET_CMODEL != CMODEL_LARGE) | |
30661 | { | |
30662 | /* In the small and medium code models, we assume the TOC is less | |
30663 | 2 GB away from the text section, so it can be computed via the | |
30664 | following two-instruction sequence. */ | |
30665 | char buf[256]; | |
30666 | ||
30667 | ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno); | |
30668 | fprintf (file, "0:\taddis 2,12,.TOC.-"); | |
30669 | assemble_name (file, buf); | |
30670 | fprintf (file, "@ha\n"); | |
30671 | fprintf (file, "\taddi 2,2,.TOC.-"); | |
30672 | assemble_name (file, buf); | |
30673 | fprintf (file, "@l\n"); | |
30674 | } | |
30675 | else | |
30676 | { | |
30677 | /* In the large code model, we allow arbitrary offsets between the | |
30678 | TOC and the text section, so we have to load the offset from | |
30679 | memory. The data field is emitted directly before the global | |
30680 | entry point in rs6000_elf_declare_function_name. */ | |
30681 | char buf[256]; | |
30682 | ||
30683 | #ifdef HAVE_AS_ENTRY_MARKERS | |
30684 | /* If supported by the linker, emit a marker relocation. If the | |
30685 | total code size of the final executable or shared library | |
30686 | happens to fit into 2 GB after all, the linker will replace | |
30687 | this code sequence with the sequence for the small or medium | |
30688 | code model. */ | |
30689 | fprintf (file, "\t.reloc .,R_PPC64_ENTRY\n"); | |
30690 | #endif | |
30691 | fprintf (file, "\tld 2,"); | |
30692 | ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno); | |
30693 | assemble_name (file, buf); | |
30694 | fprintf (file, "-"); | |
30695 | ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno); | |
30696 | assemble_name (file, buf); | |
30697 | fprintf (file, "(12)\n"); | |
30698 | fprintf (file, "\tadd 2,2,12\n"); | |
30699 | } | |
30700 | ||
30701 | fputs ("\t.localentry\t", file); | |
30702 | assemble_name (file, name); | |
30703 | fputs (",.-", file); | |
30704 | assemble_name (file, name); | |
30705 | fputs ("\n", file); | |
30706 | } | |
30707 | ||
30708 | /* Output -mprofile-kernel code. This needs to be done here instead of | |
30709 | in output_function_profile since it must go after the ELFv2 ABI | |
30710 | local entry point. */ | |
30711 | if (TARGET_PROFILE_KERNEL && crtl->profile) | |
30712 | { | |
30713 | gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2); | |
30714 | gcc_assert (!TARGET_32BIT); | |
30715 | ||
30716 | asm_fprintf (file, "\tmflr %s\n", reg_names[0]); | |
30717 | ||
30718 | /* In the ELFv2 ABI we have no compiler stack word. It must be | |
30719 | the resposibility of _mcount to preserve the static chain | |
30720 | register if required. */ | |
30721 | if (DEFAULT_ABI != ABI_ELFv2 | |
30722 | && cfun->static_chain_decl != NULL) | |
30723 | { | |
30724 | asm_fprintf (file, "\tstd %s,24(%s)\n", | |
30725 | reg_names[STATIC_CHAIN_REGNUM], reg_names[1]); | |
30726 | fprintf (file, "\tbl %s\n", RS6000_MCOUNT); | |
30727 | asm_fprintf (file, "\tld %s,24(%s)\n", | |
30728 | reg_names[STATIC_CHAIN_REGNUM], reg_names[1]); | |
30729 | } | |
30730 | else | |
30731 | fprintf (file, "\tbl %s\n", RS6000_MCOUNT); | |
30732 | } | |
30733 | ||
30734 | rs6000_pic_labelno++; | |
30735 | } | |
30736 | ||
30737 | /* -mprofile-kernel code calls mcount before the function prolog, | |
30738 | so a profiled leaf function should stay a leaf function. */ | |
30739 | static bool | |
30740 | rs6000_keep_leaf_when_profiled () | |
30741 | { | |
30742 | return TARGET_PROFILE_KERNEL; | |
30743 | } | |
30744 | ||
30745 | /* Non-zero if vmx regs are restored before the frame pop, zero if | |
30746 | we restore after the pop when possible. */ | |
30747 | #define ALWAYS_RESTORE_ALTIVEC_BEFORE_POP 0 | |
30748 | ||
30749 | /* Restoring cr is a two step process: loading a reg from the frame | |
30750 | save, then moving the reg to cr. For ABI_V4 we must let the | |
30751 | unwinder know that the stack location is no longer valid at or | |
30752 | before the stack deallocation, but we can't emit a cfa_restore for | |
30753 | cr at the stack deallocation like we do for other registers. | |
30754 | The trouble is that it is possible for the move to cr to be | |
30755 | scheduled after the stack deallocation. So say exactly where cr | |
30756 | is located on each of the two insns. */ | |
30757 | ||
30758 | static rtx | |
30759 | load_cr_save (int regno, rtx frame_reg_rtx, int offset, bool exit_func) | |
30760 | { | |
30761 | rtx mem = gen_frame_mem_offset (SImode, frame_reg_rtx, offset); | |
30762 | rtx reg = gen_rtx_REG (SImode, regno); | |
30763 | rtx_insn *insn = emit_move_insn (reg, mem); | |
30764 | ||
30765 | if (!exit_func && DEFAULT_ABI == ABI_V4) | |
30766 | { | |
30767 | rtx cr = gen_rtx_REG (SImode, CR2_REGNO); | |
30768 | rtx set = gen_rtx_SET (reg, cr); | |
30769 | ||
30770 | add_reg_note (insn, REG_CFA_REGISTER, set); | |
30771 | RTX_FRAME_RELATED_P (insn) = 1; | |
30772 | } | |
30773 | return reg; | |
30774 | } | |
30775 | ||
30776 | /* Reload CR from REG. */ | |
30777 | ||
30778 | static void | |
30779 | restore_saved_cr (rtx reg, int using_mfcr_multiple, bool exit_func) | |
30780 | { | |
30781 | int count = 0; | |
30782 | int i; | |
30783 | ||
30784 | if (using_mfcr_multiple) | |
30785 | { | |
30786 | for (i = 0; i < 8; i++) | |
30787 | if (save_reg_p (CR0_REGNO + i)) | |
30788 | count++; | |
30789 | gcc_assert (count); | |
30790 | } | |
30791 | ||
30792 | if (using_mfcr_multiple && count > 1) | |
30793 | { | |
30794 | rtx_insn *insn; | |
30795 | rtvec p; | |
30796 | int ndx; | |
30797 | ||
30798 | p = rtvec_alloc (count); | |
30799 | ||
30800 | ndx = 0; | |
30801 | for (i = 0; i < 8; i++) | |
30802 | if (save_reg_p (CR0_REGNO + i)) | |
30803 | { | |
30804 | rtvec r = rtvec_alloc (2); | |
30805 | RTVEC_ELT (r, 0) = reg; | |
30806 | RTVEC_ELT (r, 1) = GEN_INT (1 << (7-i)); | |
30807 | RTVEC_ELT (p, ndx) = | |
30808 | gen_rtx_SET (gen_rtx_REG (CCmode, CR0_REGNO + i), | |
30809 | gen_rtx_UNSPEC (CCmode, r, UNSPEC_MOVESI_TO_CR)); | |
30810 | ndx++; | |
30811 | } | |
30812 | insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p)); | |
30813 | gcc_assert (ndx == count); | |
30814 | ||
30815 | /* For the ELFv2 ABI we generate a CFA_RESTORE for each | |
30816 | CR field separately. */ | |
30817 | if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap) | |
30818 | { | |
30819 | for (i = 0; i < 8; i++) | |
30820 | if (save_reg_p (CR0_REGNO + i)) | |
30821 | add_reg_note (insn, REG_CFA_RESTORE, | |
30822 | gen_rtx_REG (SImode, CR0_REGNO + i)); | |
30823 | ||
30824 | RTX_FRAME_RELATED_P (insn) = 1; | |
30825 | } | |
30826 | } | |
30827 | else | |
30828 | for (i = 0; i < 8; i++) | |
30829 | if (save_reg_p (CR0_REGNO + i)) | |
30830 | { | |
30831 | rtx insn = emit_insn (gen_movsi_to_cr_one | |
30832 | (gen_rtx_REG (CCmode, CR0_REGNO + i), reg)); | |
30833 | ||
30834 | /* For the ELFv2 ABI we generate a CFA_RESTORE for each | |
30835 | CR field separately, attached to the insn that in fact | |
30836 | restores this particular CR field. */ | |
30837 | if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap) | |
30838 | { | |
30839 | add_reg_note (insn, REG_CFA_RESTORE, | |
30840 | gen_rtx_REG (SImode, CR0_REGNO + i)); | |
30841 | ||
30842 | RTX_FRAME_RELATED_P (insn) = 1; | |
30843 | } | |
30844 | } | |
30845 | ||
30846 | /* For other ABIs, we just generate a single CFA_RESTORE for CR2. */ | |
30847 | if (!exit_func && DEFAULT_ABI != ABI_ELFv2 | |
30848 | && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)) | |
30849 | { | |
30850 | rtx_insn *insn = get_last_insn (); | |
30851 | rtx cr = gen_rtx_REG (SImode, CR2_REGNO); | |
30852 | ||
30853 | add_reg_note (insn, REG_CFA_RESTORE, cr); | |
30854 | RTX_FRAME_RELATED_P (insn) = 1; | |
30855 | } | |
30856 | } | |
30857 | ||
30858 | /* Like cr, the move to lr instruction can be scheduled after the | |
30859 | stack deallocation, but unlike cr, its stack frame save is still | |
30860 | valid. So we only need to emit the cfa_restore on the correct | |
30861 | instruction. */ | |
30862 | ||
30863 | static void | |
30864 | load_lr_save (int regno, rtx frame_reg_rtx, int offset) | |
30865 | { | |
30866 | rtx mem = gen_frame_mem_offset (Pmode, frame_reg_rtx, offset); | |
30867 | rtx reg = gen_rtx_REG (Pmode, regno); | |
30868 | ||
30869 | emit_move_insn (reg, mem); | |
30870 | } | |
30871 | ||
30872 | static void | |
30873 | restore_saved_lr (int regno, bool exit_func) | |
30874 | { | |
30875 | rtx reg = gen_rtx_REG (Pmode, regno); | |
30876 | rtx lr = gen_rtx_REG (Pmode, LR_REGNO); | |
30877 | rtx_insn *insn = emit_move_insn (lr, reg); | |
30878 | ||
30879 | if (!exit_func && flag_shrink_wrap) | |
30880 | { | |
30881 | add_reg_note (insn, REG_CFA_RESTORE, lr); | |
30882 | RTX_FRAME_RELATED_P (insn) = 1; | |
30883 | } | |
30884 | } | |
30885 | ||
30886 | static rtx | |
30887 | add_crlr_cfa_restore (const rs6000_stack_t *info, rtx cfa_restores) | |
30888 | { | |
30889 | if (DEFAULT_ABI == ABI_ELFv2) | |
30890 | { | |
30891 | int i; | |
30892 | for (i = 0; i < 8; i++) | |
30893 | if (save_reg_p (CR0_REGNO + i)) | |
30894 | { | |
30895 | rtx cr = gen_rtx_REG (SImode, CR0_REGNO + i); | |
30896 | cfa_restores = alloc_reg_note (REG_CFA_RESTORE, cr, | |
30897 | cfa_restores); | |
30898 | } | |
30899 | } | |
30900 | else if (info->cr_save_p) | |
30901 | cfa_restores = alloc_reg_note (REG_CFA_RESTORE, | |
30902 | gen_rtx_REG (SImode, CR2_REGNO), | |
30903 | cfa_restores); | |
30904 | ||
30905 | if (info->lr_save_p) | |
30906 | cfa_restores = alloc_reg_note (REG_CFA_RESTORE, | |
30907 | gen_rtx_REG (Pmode, LR_REGNO), | |
30908 | cfa_restores); | |
30909 | return cfa_restores; | |
30910 | } | |
30911 | ||
30912 | /* Return true if OFFSET from stack pointer can be clobbered by signals. | |
30913 | V.4 doesn't have any stack cushion, AIX ABIs have 220 or 288 bytes | |
30914 | below stack pointer not cloberred by signals. */ | |
30915 | ||
30916 | static inline bool | |
30917 | offset_below_red_zone_p (HOST_WIDE_INT offset) | |
30918 | { | |
30919 | return offset < (DEFAULT_ABI == ABI_V4 | |
30920 | ? 0 | |
30921 | : TARGET_32BIT ? -220 : -288); | |
30922 | } | |
30923 | ||
30924 | /* Append CFA_RESTORES to any existing REG_NOTES on the last insn. */ | |
30925 | ||
30926 | static void | |
30927 | emit_cfa_restores (rtx cfa_restores) | |
30928 | { | |
30929 | rtx_insn *insn = get_last_insn (); | |
30930 | rtx *loc = ®_NOTES (insn); | |
30931 | ||
30932 | while (*loc) | |
30933 | loc = &XEXP (*loc, 1); | |
30934 | *loc = cfa_restores; | |
30935 | RTX_FRAME_RELATED_P (insn) = 1; | |
30936 | } | |
30937 | ||
30938 | /* Emit function epilogue as insns. */ | |
30939 | ||
30940 | void | |
30941 | rs6000_emit_epilogue (int sibcall) | |
30942 | { | |
30943 | rs6000_stack_t *info; | |
30944 | int restoring_GPRs_inline; | |
30945 | int restoring_FPRs_inline; | |
30946 | int using_load_multiple; | |
30947 | int using_mtcr_multiple; | |
30948 | int use_backchain_to_restore_sp; | |
30949 | int restore_lr; | |
30950 | int strategy; | |
30951 | HOST_WIDE_INT frame_off = 0; | |
30952 | rtx sp_reg_rtx = gen_rtx_REG (Pmode, 1); | |
30953 | rtx frame_reg_rtx = sp_reg_rtx; | |
30954 | rtx cfa_restores = NULL_RTX; | |
30955 | rtx insn; | |
30956 | rtx cr_save_reg = NULL_RTX; | |
30957 | machine_mode reg_mode = Pmode; | |
30958 | int reg_size = TARGET_32BIT ? 4 : 8; | |
30959 | machine_mode fp_reg_mode = (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) | |
30960 | ? DFmode : SFmode; | |
30961 | int fp_reg_size = 8; | |
30962 | int i; | |
30963 | bool exit_func; | |
30964 | unsigned ptr_regno; | |
30965 | ||
30966 | info = rs6000_stack_info (); | |
30967 | ||
30968 | if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0) | |
30969 | { | |
30970 | reg_mode = V2SImode; | |
30971 | reg_size = 8; | |
30972 | } | |
30973 | ||
30974 | strategy = info->savres_strategy; | |
30975 | using_load_multiple = strategy & REST_MULTIPLE; | |
30976 | restoring_FPRs_inline = sibcall || (strategy & REST_INLINE_FPRS); | |
30977 | restoring_GPRs_inline = sibcall || (strategy & REST_INLINE_GPRS); | |
30978 | using_mtcr_multiple = (rs6000_cpu == PROCESSOR_PPC601 | |
30979 | || rs6000_cpu == PROCESSOR_PPC603 | |
30980 | || rs6000_cpu == PROCESSOR_PPC750 | |
30981 | || optimize_size); | |
30982 | /* Restore via the backchain when we have a large frame, since this | |
30983 | is more efficient than an addis, addi pair. The second condition | |
30984 | here will not trigger at the moment; We don't actually need a | |
30985 | frame pointer for alloca, but the generic parts of the compiler | |
30986 | give us one anyway. */ | |
30987 | use_backchain_to_restore_sp = (info->total_size + (info->lr_save_p | |
30988 | ? info->lr_save_offset | |
30989 | : 0) > 32767 | |
30990 | || (cfun->calls_alloca | |
30991 | && !frame_pointer_needed)); | |
30992 | restore_lr = (info->lr_save_p | |
30993 | && (restoring_FPRs_inline | |
30994 | || (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR)) | |
30995 | && (restoring_GPRs_inline | |
30996 | || info->first_fp_reg_save < 64) | |
30997 | && !cfun->machine->lr_is_wrapped_separately); | |
30998 | ||
30999 | ||
31000 | if (WORLD_SAVE_P (info)) | |
31001 | { | |
31002 | int i, j; | |
31003 | char rname[30]; | |
31004 | const char *alloc_rname; | |
31005 | rtvec p; | |
31006 | ||
31007 | /* eh_rest_world_r10 will return to the location saved in the LR | |
31008 | stack slot (which is not likely to be our caller.) | |
31009 | Input: R10 -- stack adjustment. Clobbers R0, R11, R12, R7, R8. | |
31010 | rest_world is similar, except any R10 parameter is ignored. | |
31011 | The exception-handling stuff that was here in 2.95 is no | |
31012 | longer necessary. */ | |
31013 | ||
31014 | p = rtvec_alloc (9 | |
31015 | + 32 - info->first_gp_reg_save | |
31016 | + LAST_ALTIVEC_REGNO + 1 - info->first_altivec_reg_save | |
31017 | + 63 + 1 - info->first_fp_reg_save); | |
31018 | ||
31019 | strcpy (rname, ((crtl->calls_eh_return) ? | |
31020 | "*eh_rest_world_r10" : "*rest_world")); | |
31021 | alloc_rname = ggc_strdup (rname); | |
31022 | ||
31023 | j = 0; | |
31024 | RTVEC_ELT (p, j++) = ret_rtx; | |
31025 | RTVEC_ELT (p, j++) | |
31026 | = gen_rtx_USE (VOIDmode, gen_rtx_SYMBOL_REF (Pmode, alloc_rname)); | |
31027 | /* The instruction pattern requires a clobber here; | |
31028 | it is shared with the restVEC helper. */ | |
31029 | RTVEC_ELT (p, j++) | |
31030 | = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 11)); | |
31031 | ||
31032 | { | |
31033 | /* CR register traditionally saved as CR2. */ | |
31034 | rtx reg = gen_rtx_REG (SImode, CR2_REGNO); | |
31035 | RTVEC_ELT (p, j++) | |
31036 | = gen_frame_load (reg, frame_reg_rtx, info->cr_save_offset); | |
31037 | if (flag_shrink_wrap) | |
31038 | { | |
31039 | cfa_restores = alloc_reg_note (REG_CFA_RESTORE, | |
31040 | gen_rtx_REG (Pmode, LR_REGNO), | |
31041 | cfa_restores); | |
31042 | cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores); | |
31043 | } | |
31044 | } | |
31045 | ||
31046 | for (i = 0; i < 32 - info->first_gp_reg_save; i++) | |
31047 | { | |
31048 | rtx reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i); | |
31049 | RTVEC_ELT (p, j++) | |
31050 | = gen_frame_load (reg, | |
31051 | frame_reg_rtx, info->gp_save_offset + reg_size * i); | |
31052 | if (flag_shrink_wrap) | |
31053 | cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores); | |
31054 | } | |
31055 | for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++) | |
31056 | { | |
31057 | rtx reg = gen_rtx_REG (V4SImode, info->first_altivec_reg_save + i); | |
31058 | RTVEC_ELT (p, j++) | |
31059 | = gen_frame_load (reg, | |
31060 | frame_reg_rtx, info->altivec_save_offset + 16 * i); | |
31061 | if (flag_shrink_wrap) | |
31062 | cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores); | |
31063 | } | |
31064 | for (i = 0; info->first_fp_reg_save + i <= 63; i++) | |
31065 | { | |
31066 | rtx reg = gen_rtx_REG ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT | |
31067 | ? DFmode : SFmode), | |
31068 | info->first_fp_reg_save + i); | |
31069 | RTVEC_ELT (p, j++) | |
31070 | = gen_frame_load (reg, frame_reg_rtx, info->fp_save_offset + 8 * i); | |
31071 | if (flag_shrink_wrap) | |
31072 | cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores); | |
31073 | } | |
31074 | RTVEC_ELT (p, j++) | |
31075 | = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 0)); | |
31076 | RTVEC_ELT (p, j++) | |
31077 | = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 12)); | |
31078 | RTVEC_ELT (p, j++) | |
31079 | = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 7)); | |
31080 | RTVEC_ELT (p, j++) | |
31081 | = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 8)); | |
31082 | RTVEC_ELT (p, j++) | |
31083 | = gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, 10)); | |
31084 | insn = emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p)); | |
31085 | ||
31086 | if (flag_shrink_wrap) | |
31087 | { | |
31088 | REG_NOTES (insn) = cfa_restores; | |
31089 | add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx); | |
31090 | RTX_FRAME_RELATED_P (insn) = 1; | |
31091 | } | |
31092 | return; | |
31093 | } | |
31094 | ||
31095 | /* frame_reg_rtx + frame_off points to the top of this stack frame. */ | |
31096 | if (info->push_p) | |
31097 | frame_off = info->total_size; | |
31098 | ||
31099 | /* Restore AltiVec registers if we must do so before adjusting the | |
31100 | stack. */ | |
31101 | if (info->altivec_size != 0 | |
31102 | && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP | |
31103 | || (DEFAULT_ABI != ABI_V4 | |
31104 | && offset_below_red_zone_p (info->altivec_save_offset)))) | |
31105 | { | |
31106 | int i; | |
31107 | int scratch_regno = ptr_regno_for_savres (SAVRES_VR); | |
31108 | ||
31109 | gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12); | |
31110 | if (use_backchain_to_restore_sp) | |
31111 | { | |
31112 | int frame_regno = 11; | |
31113 | ||
31114 | if ((strategy & REST_INLINE_VRS) == 0) | |
31115 | { | |
31116 | /* Of r11 and r12, select the one not clobbered by an | |
31117 | out-of-line restore function for the frame register. */ | |
31118 | frame_regno = 11 + 12 - scratch_regno; | |
31119 | } | |
31120 | frame_reg_rtx = gen_rtx_REG (Pmode, frame_regno); | |
31121 | emit_move_insn (frame_reg_rtx, | |
31122 | gen_rtx_MEM (Pmode, sp_reg_rtx)); | |
31123 | frame_off = 0; | |
31124 | } | |
31125 | else if (frame_pointer_needed) | |
31126 | frame_reg_rtx = hard_frame_pointer_rtx; | |
31127 | ||
31128 | if ((strategy & REST_INLINE_VRS) == 0) | |
31129 | { | |
31130 | int end_save = info->altivec_save_offset + info->altivec_size; | |
31131 | int ptr_off; | |
31132 | rtx ptr_reg = gen_rtx_REG (Pmode, 0); | |
31133 | rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno); | |
31134 | ||
31135 | if (end_save + frame_off != 0) | |
31136 | { | |
31137 | rtx offset = GEN_INT (end_save + frame_off); | |
31138 | ||
31139 | emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset)); | |
31140 | } | |
31141 | else | |
31142 | emit_move_insn (ptr_reg, frame_reg_rtx); | |
31143 | ||
31144 | ptr_off = -end_save; | |
31145 | insn = rs6000_emit_savres_rtx (info, scratch_reg, | |
31146 | info->altivec_save_offset + ptr_off, | |
31147 | 0, V4SImode, SAVRES_VR); | |
31148 | } | |
31149 | else | |
31150 | { | |
31151 | for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i) | |
31152 | if (info->vrsave_mask & ALTIVEC_REG_BIT (i)) | |
31153 | { | |
31154 | rtx addr, areg, mem, insn; | |
31155 | rtx reg = gen_rtx_REG (V4SImode, i); | |
31156 | HOST_WIDE_INT offset | |
31157 | = (info->altivec_save_offset + frame_off | |
31158 | + 16 * (i - info->first_altivec_reg_save)); | |
31159 | ||
31160 | if (TARGET_P9_DFORM_VECTOR && quad_address_offset_p (offset)) | |
31161 | { | |
31162 | mem = gen_frame_mem (V4SImode, | |
31163 | gen_rtx_PLUS (Pmode, frame_reg_rtx, | |
31164 | GEN_INT (offset))); | |
31165 | insn = gen_rtx_SET (reg, mem); | |
31166 | } | |
31167 | else | |
31168 | { | |
31169 | areg = gen_rtx_REG (Pmode, 0); | |
31170 | emit_move_insn (areg, GEN_INT (offset)); | |
31171 | ||
31172 | /* AltiVec addressing mode is [reg+reg]. */ | |
31173 | addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg); | |
31174 | mem = gen_frame_mem (V4SImode, addr); | |
31175 | ||
31176 | /* Rather than emitting a generic move, force use of the | |
31177 | lvx instruction, which we always want. In particular we | |
31178 | don't want lxvd2x/xxpermdi for little endian. */ | |
31179 | insn = gen_altivec_lvx_v4si_internal (reg, mem); | |
31180 | } | |
31181 | ||
31182 | (void) emit_insn (insn); | |
31183 | } | |
31184 | } | |
31185 | ||
31186 | for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i) | |
31187 | if (((strategy & REST_INLINE_VRS) == 0 | |
31188 | || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0) | |
31189 | && (flag_shrink_wrap | |
31190 | || (offset_below_red_zone_p | |
31191 | (info->altivec_save_offset | |
31192 | + 16 * (i - info->first_altivec_reg_save))))) | |
31193 | { | |
31194 | rtx reg = gen_rtx_REG (V4SImode, i); | |
31195 | cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores); | |
31196 | } | |
31197 | } | |
31198 | ||
31199 | /* Restore VRSAVE if we must do so before adjusting the stack. */ | |
31200 | if (info->vrsave_size != 0 | |
31201 | && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP | |
31202 | || (DEFAULT_ABI != ABI_V4 | |
31203 | && offset_below_red_zone_p (info->vrsave_save_offset)))) | |
31204 | { | |
31205 | rtx reg; | |
31206 | ||
31207 | if (frame_reg_rtx == sp_reg_rtx) | |
31208 | { | |
31209 | if (use_backchain_to_restore_sp) | |
31210 | { | |
31211 | frame_reg_rtx = gen_rtx_REG (Pmode, 11); | |
31212 | emit_move_insn (frame_reg_rtx, | |
31213 | gen_rtx_MEM (Pmode, sp_reg_rtx)); | |
31214 | frame_off = 0; | |
31215 | } | |
31216 | else if (frame_pointer_needed) | |
31217 | frame_reg_rtx = hard_frame_pointer_rtx; | |
31218 | } | |
31219 | ||
31220 | reg = gen_rtx_REG (SImode, 12); | |
31221 | emit_insn (gen_frame_load (reg, frame_reg_rtx, | |
31222 | info->vrsave_save_offset + frame_off)); | |
31223 | ||
31224 | emit_insn (generate_set_vrsave (reg, info, 1)); | |
31225 | } | |
31226 | ||
31227 | insn = NULL_RTX; | |
31228 | /* If we have a large stack frame, restore the old stack pointer | |
31229 | using the backchain. */ | |
31230 | if (use_backchain_to_restore_sp) | |
31231 | { | |
31232 | if (frame_reg_rtx == sp_reg_rtx) | |
31233 | { | |
31234 | /* Under V.4, don't reset the stack pointer until after we're done | |
31235 | loading the saved registers. */ | |
31236 | if (DEFAULT_ABI == ABI_V4) | |
31237 | frame_reg_rtx = gen_rtx_REG (Pmode, 11); | |
31238 | ||
31239 | insn = emit_move_insn (frame_reg_rtx, | |
31240 | gen_rtx_MEM (Pmode, sp_reg_rtx)); | |
31241 | frame_off = 0; | |
31242 | } | |
31243 | else if (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP | |
31244 | && DEFAULT_ABI == ABI_V4) | |
31245 | /* frame_reg_rtx has been set up by the altivec restore. */ | |
31246 | ; | |
31247 | else | |
31248 | { | |
31249 | insn = emit_move_insn (sp_reg_rtx, frame_reg_rtx); | |
31250 | frame_reg_rtx = sp_reg_rtx; | |
31251 | } | |
31252 | } | |
31253 | /* If we have a frame pointer, we can restore the old stack pointer | |
31254 | from it. */ | |
31255 | else if (frame_pointer_needed) | |
31256 | { | |
31257 | frame_reg_rtx = sp_reg_rtx; | |
31258 | if (DEFAULT_ABI == ABI_V4) | |
31259 | frame_reg_rtx = gen_rtx_REG (Pmode, 11); | |
31260 | /* Prevent reordering memory accesses against stack pointer restore. */ | |
31261 | else if (cfun->calls_alloca | |
31262 | || offset_below_red_zone_p (-info->total_size)) | |
31263 | rs6000_emit_stack_tie (frame_reg_rtx, true); | |
31264 | ||
31265 | insn = emit_insn (gen_add3_insn (frame_reg_rtx, hard_frame_pointer_rtx, | |
31266 | GEN_INT (info->total_size))); | |
31267 | frame_off = 0; | |
31268 | } | |
31269 | else if (info->push_p | |
31270 | && DEFAULT_ABI != ABI_V4 | |
31271 | && !crtl->calls_eh_return) | |
31272 | { | |
31273 | /* Prevent reordering memory accesses against stack pointer restore. */ | |
31274 | if (cfun->calls_alloca | |
31275 | || offset_below_red_zone_p (-info->total_size)) | |
31276 | rs6000_emit_stack_tie (frame_reg_rtx, false); | |
31277 | insn = emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx, | |
31278 | GEN_INT (info->total_size))); | |
31279 | frame_off = 0; | |
31280 | } | |
31281 | if (insn && frame_reg_rtx == sp_reg_rtx) | |
31282 | { | |
31283 | if (cfa_restores) | |
31284 | { | |
31285 | REG_NOTES (insn) = cfa_restores; | |
31286 | cfa_restores = NULL_RTX; | |
31287 | } | |
31288 | add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx); | |
31289 | RTX_FRAME_RELATED_P (insn) = 1; | |
31290 | } | |
31291 | ||
31292 | /* Restore AltiVec registers if we have not done so already. */ | |
31293 | if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP | |
31294 | && info->altivec_size != 0 | |
31295 | && (DEFAULT_ABI == ABI_V4 | |
31296 | || !offset_below_red_zone_p (info->altivec_save_offset))) | |
31297 | { | |
31298 | int i; | |
31299 | ||
31300 | if ((strategy & REST_INLINE_VRS) == 0) | |
31301 | { | |
31302 | int end_save = info->altivec_save_offset + info->altivec_size; | |
31303 | int ptr_off; | |
31304 | rtx ptr_reg = gen_rtx_REG (Pmode, 0); | |
31305 | int scratch_regno = ptr_regno_for_savres (SAVRES_VR); | |
31306 | rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno); | |
31307 | ||
31308 | if (end_save + frame_off != 0) | |
31309 | { | |
31310 | rtx offset = GEN_INT (end_save + frame_off); | |
31311 | ||
31312 | emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset)); | |
31313 | } | |
31314 | else | |
31315 | emit_move_insn (ptr_reg, frame_reg_rtx); | |
31316 | ||
31317 | ptr_off = -end_save; | |
31318 | insn = rs6000_emit_savres_rtx (info, scratch_reg, | |
31319 | info->altivec_save_offset + ptr_off, | |
31320 | 0, V4SImode, SAVRES_VR); | |
31321 | if (REGNO (frame_reg_rtx) == REGNO (scratch_reg)) | |
31322 | { | |
31323 | /* Frame reg was clobbered by out-of-line save. Restore it | |
31324 | from ptr_reg, and if we are calling out-of-line gpr or | |
31325 | fpr restore set up the correct pointer and offset. */ | |
31326 | unsigned newptr_regno = 1; | |
31327 | if (!restoring_GPRs_inline) | |
31328 | { | |
31329 | bool lr = info->gp_save_offset + info->gp_size == 0; | |
31330 | int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0); | |
31331 | newptr_regno = ptr_regno_for_savres (sel); | |
31332 | end_save = info->gp_save_offset + info->gp_size; | |
31333 | } | |
31334 | else if (!restoring_FPRs_inline) | |
31335 | { | |
31336 | bool lr = !(strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR); | |
31337 | int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0); | |
31338 | newptr_regno = ptr_regno_for_savres (sel); | |
31339 | end_save = info->fp_save_offset + info->fp_size; | |
31340 | } | |
31341 | ||
31342 | if (newptr_regno != 1 && REGNO (frame_reg_rtx) != newptr_regno) | |
31343 | frame_reg_rtx = gen_rtx_REG (Pmode, newptr_regno); | |
31344 | ||
31345 | if (end_save + ptr_off != 0) | |
31346 | { | |
31347 | rtx offset = GEN_INT (end_save + ptr_off); | |
31348 | ||
31349 | frame_off = -end_save; | |
31350 | if (TARGET_32BIT) | |
31351 | emit_insn (gen_addsi3_carry (frame_reg_rtx, | |
31352 | ptr_reg, offset)); | |
31353 | else | |
31354 | emit_insn (gen_adddi3_carry (frame_reg_rtx, | |
31355 | ptr_reg, offset)); | |
31356 | } | |
31357 | else | |
31358 | { | |
31359 | frame_off = ptr_off; | |
31360 | emit_move_insn (frame_reg_rtx, ptr_reg); | |
31361 | } | |
31362 | } | |
31363 | } | |
31364 | else | |
31365 | { | |
31366 | for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i) | |
31367 | if (info->vrsave_mask & ALTIVEC_REG_BIT (i)) | |
31368 | { | |
31369 | rtx addr, areg, mem, insn; | |
31370 | rtx reg = gen_rtx_REG (V4SImode, i); | |
31371 | HOST_WIDE_INT offset | |
31372 | = (info->altivec_save_offset + frame_off | |
31373 | + 16 * (i - info->first_altivec_reg_save)); | |
31374 | ||
31375 | if (TARGET_P9_DFORM_VECTOR && quad_address_offset_p (offset)) | |
31376 | { | |
31377 | mem = gen_frame_mem (V4SImode, | |
31378 | gen_rtx_PLUS (Pmode, frame_reg_rtx, | |
31379 | GEN_INT (offset))); | |
31380 | insn = gen_rtx_SET (reg, mem); | |
31381 | } | |
31382 | else | |
31383 | { | |
31384 | areg = gen_rtx_REG (Pmode, 0); | |
31385 | emit_move_insn (areg, GEN_INT (offset)); | |
31386 | ||
31387 | /* AltiVec addressing mode is [reg+reg]. */ | |
31388 | addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg); | |
31389 | mem = gen_frame_mem (V4SImode, addr); | |
31390 | ||
31391 | /* Rather than emitting a generic move, force use of the | |
31392 | lvx instruction, which we always want. In particular we | |
31393 | don't want lxvd2x/xxpermdi for little endian. */ | |
31394 | insn = gen_altivec_lvx_v4si_internal (reg, mem); | |
31395 | } | |
31396 | ||
31397 | (void) emit_insn (insn); | |
31398 | } | |
31399 | } | |
31400 | ||
31401 | for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i) | |
31402 | if (((strategy & REST_INLINE_VRS) == 0 | |
31403 | || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0) | |
31404 | && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)) | |
31405 | { | |
31406 | rtx reg = gen_rtx_REG (V4SImode, i); | |
31407 | cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores); | |
31408 | } | |
31409 | } | |
31410 | ||
31411 | /* Restore VRSAVE if we have not done so already. */ | |
31412 | if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP | |
31413 | && info->vrsave_size != 0 | |
31414 | && (DEFAULT_ABI == ABI_V4 | |
31415 | || !offset_below_red_zone_p (info->vrsave_save_offset))) | |
31416 | { | |
31417 | rtx reg; | |
31418 | ||
31419 | reg = gen_rtx_REG (SImode, 12); | |
31420 | emit_insn (gen_frame_load (reg, frame_reg_rtx, | |
31421 | info->vrsave_save_offset + frame_off)); | |
31422 | ||
31423 | emit_insn (generate_set_vrsave (reg, info, 1)); | |
31424 | } | |
31425 | ||
31426 | /* If we exit by an out-of-line restore function on ABI_V4 then that | |
31427 | function will deallocate the stack, so we don't need to worry | |
31428 | about the unwinder restoring cr from an invalid stack frame | |
31429 | location. */ | |
31430 | exit_func = (!restoring_FPRs_inline | |
31431 | || (!restoring_GPRs_inline | |
31432 | && info->first_fp_reg_save == 64)); | |
31433 | ||
31434 | /* In the ELFv2 ABI we need to restore all call-saved CR fields from | |
31435 | *separate* slots if the routine calls __builtin_eh_return, so | |
31436 | that they can be independently restored by the unwinder. */ | |
31437 | if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return) | |
31438 | { | |
31439 | int i, cr_off = info->ehcr_offset; | |
31440 | ||
31441 | for (i = 0; i < 8; i++) | |
31442 | if (!call_used_regs[CR0_REGNO + i]) | |
31443 | { | |
31444 | rtx reg = gen_rtx_REG (SImode, 0); | |
31445 | emit_insn (gen_frame_load (reg, frame_reg_rtx, | |
31446 | cr_off + frame_off)); | |
31447 | ||
31448 | insn = emit_insn (gen_movsi_to_cr_one | |
31449 | (gen_rtx_REG (CCmode, CR0_REGNO + i), reg)); | |
31450 | ||
31451 | if (!exit_func && flag_shrink_wrap) | |
31452 | { | |
31453 | add_reg_note (insn, REG_CFA_RESTORE, | |
31454 | gen_rtx_REG (SImode, CR0_REGNO + i)); | |
31455 | ||
31456 | RTX_FRAME_RELATED_P (insn) = 1; | |
31457 | } | |
31458 | ||
31459 | cr_off += reg_size; | |
31460 | } | |
31461 | } | |
31462 | ||
31463 | /* Get the old lr if we saved it. If we are restoring registers | |
31464 | out-of-line, then the out-of-line routines can do this for us. */ | |
31465 | if (restore_lr && restoring_GPRs_inline) | |
31466 | load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off); | |
31467 | ||
31468 | /* Get the old cr if we saved it. */ | |
31469 | if (info->cr_save_p) | |
31470 | { | |
31471 | unsigned cr_save_regno = 12; | |
31472 | ||
31473 | if (!restoring_GPRs_inline) | |
31474 | { | |
31475 | /* Ensure we don't use the register used by the out-of-line | |
31476 | gpr register restore below. */ | |
31477 | bool lr = info->gp_save_offset + info->gp_size == 0; | |
31478 | int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0); | |
31479 | int gpr_ptr_regno = ptr_regno_for_savres (sel); | |
31480 | ||
31481 | if (gpr_ptr_regno == 12) | |
31482 | cr_save_regno = 11; | |
31483 | gcc_checking_assert (REGNO (frame_reg_rtx) != cr_save_regno); | |
31484 | } | |
31485 | else if (REGNO (frame_reg_rtx) == 12) | |
31486 | cr_save_regno = 11; | |
31487 | ||
31488 | cr_save_reg = load_cr_save (cr_save_regno, frame_reg_rtx, | |
31489 | info->cr_save_offset + frame_off, | |
31490 | exit_func); | |
31491 | } | |
31492 | ||
31493 | /* Set LR here to try to overlap restores below. */ | |
31494 | if (restore_lr && restoring_GPRs_inline) | |
31495 | restore_saved_lr (0, exit_func); | |
31496 | ||
31497 | /* Load exception handler data registers, if needed. */ | |
31498 | if (crtl->calls_eh_return) | |
31499 | { | |
31500 | unsigned int i, regno; | |
31501 | ||
31502 | if (TARGET_AIX) | |
31503 | { | |
31504 | rtx reg = gen_rtx_REG (reg_mode, 2); | |
31505 | emit_insn (gen_frame_load (reg, frame_reg_rtx, | |
31506 | frame_off + RS6000_TOC_SAVE_SLOT)); | |
31507 | } | |
31508 | ||
31509 | for (i = 0; ; ++i) | |
31510 | { | |
31511 | rtx mem; | |
31512 | ||
31513 | regno = EH_RETURN_DATA_REGNO (i); | |
31514 | if (regno == INVALID_REGNUM) | |
31515 | break; | |
31516 | ||
31517 | /* Note: possible use of r0 here to address SPE regs. */ | |
31518 | mem = gen_frame_mem_offset (reg_mode, frame_reg_rtx, | |
31519 | info->ehrd_offset + frame_off | |
31520 | + reg_size * (int) i); | |
31521 | ||
31522 | emit_move_insn (gen_rtx_REG (reg_mode, regno), mem); | |
31523 | } | |
31524 | } | |
31525 | ||
31526 | /* Restore GPRs. This is done as a PARALLEL if we are using | |
31527 | the load-multiple instructions. */ | |
31528 | if (TARGET_SPE_ABI | |
31529 | && info->spe_64bit_regs_used | |
31530 | && info->first_gp_reg_save != 32) | |
31531 | { | |
31532 | /* Determine whether we can address all of the registers that need | |
31533 | to be saved with an offset from frame_reg_rtx that fits in | |
31534 | the small const field for SPE memory instructions. */ | |
31535 | int spe_regs_addressable | |
31536 | = (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off | |
31537 | + reg_size * (32 - info->first_gp_reg_save - 1)) | |
31538 | && restoring_GPRs_inline); | |
31539 | ||
31540 | if (!spe_regs_addressable) | |
31541 | { | |
31542 | int ool_adjust = 0; | |
31543 | rtx old_frame_reg_rtx = frame_reg_rtx; | |
31544 | /* Make r11 point to the start of the SPE save area. We worried about | |
31545 | not clobbering it when we were saving registers in the prologue. | |
31546 | There's no need to worry here because the static chain is passed | |
31547 | anew to every function. */ | |
31548 | ||
31549 | if (!restoring_GPRs_inline) | |
31550 | ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO); | |
31551 | frame_reg_rtx = gen_rtx_REG (Pmode, 11); | |
31552 | emit_insn (gen_addsi3 (frame_reg_rtx, old_frame_reg_rtx, | |
31553 | GEN_INT (info->spe_gp_save_offset | |
31554 | + frame_off | |
31555 | - ool_adjust))); | |
31556 | /* Keep the invariant that frame_reg_rtx + frame_off points | |
31557 | at the top of the stack frame. */ | |
31558 | frame_off = -info->spe_gp_save_offset + ool_adjust; | |
31559 | } | |
31560 | ||
31561 | if (restoring_GPRs_inline) | |
31562 | { | |
31563 | HOST_WIDE_INT spe_offset = info->spe_gp_save_offset + frame_off; | |
31564 | ||
31565 | for (i = 0; i < 32 - info->first_gp_reg_save; i++) | |
31566 | if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i)) | |
31567 | { | |
31568 | rtx offset, addr, mem, reg; | |
31569 | ||
31570 | /* We're doing all this to ensure that the immediate offset | |
31571 | fits into the immediate field of 'evldd'. */ | |
31572 | gcc_assert (SPE_CONST_OFFSET_OK (spe_offset + reg_size * i)); | |
31573 | ||
31574 | offset = GEN_INT (spe_offset + reg_size * i); | |
31575 | addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, offset); | |
31576 | mem = gen_rtx_MEM (V2SImode, addr); | |
31577 | reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i); | |
31578 | ||
31579 | emit_move_insn (reg, mem); | |
31580 | } | |
31581 | } | |
31582 | else | |
31583 | rs6000_emit_savres_rtx (info, frame_reg_rtx, | |
31584 | info->spe_gp_save_offset + frame_off, | |
31585 | info->lr_save_offset + frame_off, | |
31586 | reg_mode, | |
31587 | SAVRES_GPR | SAVRES_LR); | |
31588 | } | |
31589 | else if (!restoring_GPRs_inline) | |
31590 | { | |
31591 | /* We are jumping to an out-of-line function. */ | |
31592 | rtx ptr_reg; | |
31593 | int end_save = info->gp_save_offset + info->gp_size; | |
31594 | bool can_use_exit = end_save == 0; | |
31595 | int sel = SAVRES_GPR | (can_use_exit ? SAVRES_LR : 0); | |
31596 | int ptr_off; | |
31597 | ||
31598 | /* Emit stack reset code if we need it. */ | |
31599 | ptr_regno = ptr_regno_for_savres (sel); | |
31600 | ptr_reg = gen_rtx_REG (Pmode, ptr_regno); | |
31601 | if (can_use_exit) | |
31602 | rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno); | |
31603 | else if (end_save + frame_off != 0) | |
31604 | emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, | |
31605 | GEN_INT (end_save + frame_off))); | |
31606 | else if (REGNO (frame_reg_rtx) != ptr_regno) | |
31607 | emit_move_insn (ptr_reg, frame_reg_rtx); | |
31608 | if (REGNO (frame_reg_rtx) == ptr_regno) | |
31609 | frame_off = -end_save; | |
31610 | ||
31611 | if (can_use_exit && info->cr_save_p) | |
31612 | restore_saved_cr (cr_save_reg, using_mtcr_multiple, true); | |
31613 | ||
31614 | ptr_off = -end_save; | |
31615 | rs6000_emit_savres_rtx (info, ptr_reg, | |
31616 | info->gp_save_offset + ptr_off, | |
31617 | info->lr_save_offset + ptr_off, | |
31618 | reg_mode, sel); | |
31619 | } | |
31620 | else if (using_load_multiple) | |
31621 | { | |
31622 | rtvec p; | |
31623 | p = rtvec_alloc (32 - info->first_gp_reg_save); | |
31624 | for (i = 0; i < 32 - info->first_gp_reg_save; i++) | |
31625 | RTVEC_ELT (p, i) | |
31626 | = gen_frame_load (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i), | |
31627 | frame_reg_rtx, | |
31628 | info->gp_save_offset + frame_off + reg_size * i); | |
31629 | emit_insn (gen_rtx_PARALLEL (VOIDmode, p)); | |
31630 | } | |
31631 | else | |
31632 | { | |
31633 | int offset = info->gp_save_offset + frame_off; | |
31634 | for (i = info->first_gp_reg_save; i < 32; i++) | |
31635 | { | |
31636 | if (rs6000_reg_live_or_pic_offset_p (i) | |
31637 | && !cfun->machine->gpr_is_wrapped_separately[i]) | |
31638 | { | |
31639 | rtx reg = gen_rtx_REG (reg_mode, i); | |
31640 | emit_insn (gen_frame_load (reg, frame_reg_rtx, offset)); | |
31641 | } | |
31642 | ||
31643 | offset += reg_size; | |
31644 | } | |
31645 | } | |
31646 | ||
31647 | if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap) | |
31648 | { | |
31649 | /* If the frame pointer was used then we can't delay emitting | |
31650 | a REG_CFA_DEF_CFA note. This must happen on the insn that | |
31651 | restores the frame pointer, r31. We may have already emitted | |
31652 | a REG_CFA_DEF_CFA note, but that's OK; A duplicate is | |
31653 | discarded by dwarf2cfi.c/dwarf2out.c, and in any case would | |
31654 | be harmless if emitted. */ | |
31655 | if (frame_pointer_needed) | |
31656 | { | |
31657 | insn = get_last_insn (); | |
31658 | add_reg_note (insn, REG_CFA_DEF_CFA, | |
31659 | plus_constant (Pmode, frame_reg_rtx, frame_off)); | |
31660 | RTX_FRAME_RELATED_P (insn) = 1; | |
31661 | } | |
31662 | ||
31663 | /* Set up cfa_restores. We always need these when | |
31664 | shrink-wrapping. If not shrink-wrapping then we only need | |
31665 | the cfa_restore when the stack location is no longer valid. | |
31666 | The cfa_restores must be emitted on or before the insn that | |
31667 | invalidates the stack, and of course must not be emitted | |
31668 | before the insn that actually does the restore. The latter | |
31669 | is why it is a bad idea to emit the cfa_restores as a group | |
31670 | on the last instruction here that actually does a restore: | |
31671 | That insn may be reordered with respect to others doing | |
31672 | restores. */ | |
31673 | if (flag_shrink_wrap | |
31674 | && !restoring_GPRs_inline | |
31675 | && info->first_fp_reg_save == 64) | |
31676 | cfa_restores = add_crlr_cfa_restore (info, cfa_restores); | |
31677 | ||
31678 | for (i = info->first_gp_reg_save; i < 32; i++) | |
31679 | if (!restoring_GPRs_inline | |
31680 | || using_load_multiple | |
31681 | || rs6000_reg_live_or_pic_offset_p (i)) | |
31682 | { | |
31683 | if (cfun->machine->gpr_is_wrapped_separately[i]) | |
31684 | continue; | |
31685 | ||
31686 | rtx reg = gen_rtx_REG (reg_mode, i); | |
31687 | cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores); | |
31688 | } | |
31689 | } | |
31690 | ||
31691 | if (!restoring_GPRs_inline | |
31692 | && info->first_fp_reg_save == 64) | |
31693 | { | |
31694 | /* We are jumping to an out-of-line function. */ | |
31695 | if (cfa_restores) | |
31696 | emit_cfa_restores (cfa_restores); | |
31697 | return; | |
31698 | } | |
31699 | ||
31700 | if (restore_lr && !restoring_GPRs_inline) | |
31701 | { | |
31702 | load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off); | |
31703 | restore_saved_lr (0, exit_func); | |
31704 | } | |
31705 | ||
31706 | /* Restore fpr's if we need to do it without calling a function. */ | |
31707 | if (restoring_FPRs_inline) | |
31708 | { | |
31709 | int offset = info->fp_save_offset + frame_off; | |
31710 | for (i = info->first_fp_reg_save; i < 64; i++) | |
31711 | { | |
31712 | if (save_reg_p (i) | |
31713 | && !cfun->machine->fpr_is_wrapped_separately[i - 32]) | |
31714 | { | |
31715 | rtx reg = gen_rtx_REG (fp_reg_mode, i); | |
31716 | emit_insn (gen_frame_load (reg, frame_reg_rtx, offset)); | |
31717 | if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap) | |
31718 | cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, | |
31719 | cfa_restores); | |
31720 | } | |
31721 | ||
31722 | offset += fp_reg_size; | |
31723 | } | |
31724 | } | |
31725 | ||
31726 | /* If we saved cr, restore it here. Just those that were used. */ | |
31727 | if (info->cr_save_p) | |
31728 | restore_saved_cr (cr_save_reg, using_mtcr_multiple, exit_func); | |
31729 | ||
31730 | /* If this is V.4, unwind the stack pointer after all of the loads | |
31731 | have been done, or set up r11 if we are restoring fp out of line. */ | |
31732 | ptr_regno = 1; | |
31733 | if (!restoring_FPRs_inline) | |
31734 | { | |
31735 | bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0; | |
31736 | int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0); | |
31737 | ptr_regno = ptr_regno_for_savres (sel); | |
31738 | } | |
31739 | ||
31740 | insn = rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno); | |
31741 | if (REGNO (frame_reg_rtx) == ptr_regno) | |
31742 | frame_off = 0; | |
31743 | ||
31744 | if (insn && restoring_FPRs_inline) | |
31745 | { | |
31746 | if (cfa_restores) | |
31747 | { | |
31748 | REG_NOTES (insn) = cfa_restores; | |
31749 | cfa_restores = NULL_RTX; | |
31750 | } | |
31751 | add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx); | |
31752 | RTX_FRAME_RELATED_P (insn) = 1; | |
31753 | } | |
31754 | ||
31755 | if (crtl->calls_eh_return) | |
31756 | { | |
31757 | rtx sa = EH_RETURN_STACKADJ_RTX; | |
31758 | emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx, sa)); | |
31759 | } | |
31760 | ||
31761 | if (!sibcall && restoring_FPRs_inline) | |
31762 | { | |
31763 | if (cfa_restores) | |
31764 | { | |
31765 | /* We can't hang the cfa_restores off a simple return, | |
31766 | since the shrink-wrap code sometimes uses an existing | |
31767 | return. This means there might be a path from | |
31768 | pre-prologue code to this return, and dwarf2cfi code | |
31769 | wants the eh_frame unwinder state to be the same on | |
31770 | all paths to any point. So we need to emit the | |
31771 | cfa_restores before the return. For -m64 we really | |
31772 | don't need epilogue cfa_restores at all, except for | |
31773 | this irritating dwarf2cfi with shrink-wrap | |
31774 | requirement; The stack red-zone means eh_frame info | |
31775 | from the prologue telling the unwinder to restore | |
31776 | from the stack is perfectly good right to the end of | |
31777 | the function. */ | |
31778 | emit_insn (gen_blockage ()); | |
31779 | emit_cfa_restores (cfa_restores); | |
31780 | cfa_restores = NULL_RTX; | |
31781 | } | |
31782 | ||
31783 | emit_jump_insn (targetm.gen_simple_return ()); | |
31784 | } | |
31785 | ||
31786 | if (!sibcall && !restoring_FPRs_inline) | |
31787 | { | |
31788 | bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0; | |
31789 | rtvec p = rtvec_alloc (3 + !!lr + 64 - info->first_fp_reg_save); | |
31790 | int elt = 0; | |
31791 | RTVEC_ELT (p, elt++) = ret_rtx; | |
31792 | if (lr) | |
31793 | RTVEC_ELT (p, elt++) | |
31794 | = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO)); | |
31795 | ||
31796 | /* We have to restore more than two FP registers, so branch to the | |
31797 | restore function. It will return to our caller. */ | |
31798 | int i; | |
31799 | int reg; | |
31800 | rtx sym; | |
31801 | ||
31802 | if (flag_shrink_wrap) | |
31803 | cfa_restores = add_crlr_cfa_restore (info, cfa_restores); | |
31804 | ||
31805 | sym = rs6000_savres_routine_sym (info, SAVRES_FPR | (lr ? SAVRES_LR : 0)); | |
31806 | RTVEC_ELT (p, elt++) = gen_rtx_USE (VOIDmode, sym); | |
31807 | reg = (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)? 1 : 11; | |
31808 | RTVEC_ELT (p, elt++) = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, reg)); | |
31809 | ||
31810 | for (i = 0; i < 64 - info->first_fp_reg_save; i++) | |
31811 | { | |
31812 | rtx reg = gen_rtx_REG (DFmode, info->first_fp_reg_save + i); | |
31813 | ||
31814 | RTVEC_ELT (p, elt++) | |
31815 | = gen_frame_load (reg, sp_reg_rtx, info->fp_save_offset + 8 * i); | |
31816 | if (flag_shrink_wrap) | |
31817 | cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores); | |
31818 | } | |
31819 | ||
31820 | emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p)); | |
31821 | } | |
31822 | ||
31823 | if (cfa_restores) | |
31824 | { | |
31825 | if (sibcall) | |
31826 | /* Ensure the cfa_restores are hung off an insn that won't | |
31827 | be reordered above other restores. */ | |
31828 | emit_insn (gen_blockage ()); | |
31829 | ||
31830 | emit_cfa_restores (cfa_restores); | |
31831 | } | |
31832 | } | |
31833 | ||
31834 | /* Write function epilogue. */ | |
31835 | ||
31836 | static void | |
42776416 | 31837 | rs6000_output_function_epilogue (FILE *file) |
83349046 SB |
31838 | { |
31839 | #if TARGET_MACHO | |
31840 | macho_branch_islands (); | |
31841 | ||
31842 | { | |
31843 | rtx_insn *insn = get_last_insn (); | |
31844 | rtx_insn *deleted_debug_label = NULL; | |
31845 | ||
31846 | /* Mach-O doesn't support labels at the end of objects, so if | |
31847 | it looks like we might want one, take special action. | |
31848 | ||
31849 | First, collect any sequence of deleted debug labels. */ | |
31850 | while (insn | |
31851 | && NOTE_P (insn) | |
31852 | && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL) | |
31853 | { | |
31854 | /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL | |
31855 | notes only, instead set their CODE_LABEL_NUMBER to -1, | |
31856 | otherwise there would be code generation differences | |
31857 | in between -g and -g0. */ | |
31858 | if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL) | |
31859 | deleted_debug_label = insn; | |
31860 | insn = PREV_INSN (insn); | |
31861 | } | |
31862 | ||
31863 | /* Second, if we have: | |
31864 | label: | |
31865 | barrier | |
31866 | then this needs to be detected, so skip past the barrier. */ | |
31867 | ||
31868 | if (insn && BARRIER_P (insn)) | |
31869 | insn = PREV_INSN (insn); | |
31870 | ||
31871 | /* Up to now we've only seen notes or barriers. */ | |
31872 | if (insn) | |
31873 | { | |
31874 | if (LABEL_P (insn) | |
31875 | || (NOTE_P (insn) | |
31876 | && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)) | |
31877 | /* Trailing label: <barrier>. */ | |
31878 | fputs ("\tnop\n", file); | |
31879 | else | |
31880 | { | |
31881 | /* Lastly, see if we have a completely empty function body. */ | |
31882 | while (insn && ! INSN_P (insn)) | |
31883 | insn = PREV_INSN (insn); | |
31884 | /* If we don't find any insns, we've got an empty function body; | |
31885 | I.e. completely empty - without a return or branch. This is | |
31886 | taken as the case where a function body has been removed | |
31887 | because it contains an inline __builtin_unreachable(). GCC | |
31888 | states that reaching __builtin_unreachable() means UB so we're | |
31889 | not obliged to do anything special; however, we want | |
31890 | non-zero-sized function bodies. To meet this, and help the | |
31891 | user out, let's trap the case. */ | |
31892 | if (insn == NULL) | |
31893 | fputs ("\ttrap\n", file); | |
31894 | } | |
31895 | } | |
31896 | else if (deleted_debug_label) | |
31897 | for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn)) | |
31898 | if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL) | |
31899 | CODE_LABEL_NUMBER (insn) = -1; | |
31900 | } | |
31901 | #endif | |
31902 | ||
31903 | /* Output a traceback table here. See /usr/include/sys/debug.h for info | |
31904 | on its format. | |
31905 | ||
31906 | We don't output a traceback table if -finhibit-size-directive was | |
31907 | used. The documentation for -finhibit-size-directive reads | |
31908 | ``don't output a @code{.size} assembler directive, or anything | |
31909 | else that would cause trouble if the function is split in the | |
31910 | middle, and the two halves are placed at locations far apart in | |
31911 | memory.'' The traceback table has this property, since it | |
31912 | includes the offset from the start of the function to the | |
31913 | traceback table itself. | |
31914 | ||
31915 | System V.4 Powerpc's (and the embedded ABI derived from it) use a | |
31916 | different traceback table. */ | |
31917 | if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) | |
31918 | && ! flag_inhibit_size_directive | |
31919 | && rs6000_traceback != traceback_none && !cfun->is_thunk) | |
31920 | { | |
31921 | const char *fname = NULL; | |
31922 | const char *language_string = lang_hooks.name; | |
31923 | int fixed_parms = 0, float_parms = 0, parm_info = 0; | |
31924 | int i; | |
31925 | int optional_tbtab; | |
31926 | rs6000_stack_t *info = rs6000_stack_info (); | |
31927 | ||
31928 | if (rs6000_traceback == traceback_full) | |
31929 | optional_tbtab = 1; | |
31930 | else if (rs6000_traceback == traceback_part) | |
31931 | optional_tbtab = 0; | |
31932 | else | |
31933 | optional_tbtab = !optimize_size && !TARGET_ELF; | |
31934 | ||
31935 | if (optional_tbtab) | |
31936 | { | |
31937 | fname = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0); | |
31938 | while (*fname == '.') /* V.4 encodes . in the name */ | |
31939 | fname++; | |
31940 | ||
31941 | /* Need label immediately before tbtab, so we can compute | |
31942 | its offset from the function start. */ | |
31943 | ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT"); | |
31944 | ASM_OUTPUT_LABEL (file, fname); | |
31945 | } | |
31946 | ||
31947 | /* The .tbtab pseudo-op can only be used for the first eight | |
31948 | expressions, since it can't handle the possibly variable | |
31949 | length fields that follow. However, if you omit the optional | |
31950 | fields, the assembler outputs zeros for all optional fields | |
31951 | anyways, giving each variable length field is minimum length | |
31952 | (as defined in sys/debug.h). Thus we can not use the .tbtab | |
31953 | pseudo-op at all. */ | |
31954 | ||
31955 | /* An all-zero word flags the start of the tbtab, for debuggers | |
31956 | that have to find it by searching forward from the entry | |
31957 | point or from the current pc. */ | |
31958 | fputs ("\t.long 0\n", file); | |
31959 | ||
31960 | /* Tbtab format type. Use format type 0. */ | |
31961 | fputs ("\t.byte 0,", file); | |
31962 | ||
31963 | /* Language type. Unfortunately, there does not seem to be any | |
31964 | official way to discover the language being compiled, so we | |
31965 | use language_string. | |
87677ac7 | 31966 | C is 0. Fortran is 1. Ada is 3. C++ is 9. |
83349046 | 31967 | Java is 13. Objective-C is 14. Objective-C++ isn't assigned |
b4c522fa IB |
31968 | a number, so for now use 9. LTO, Go, D and JIT aren't assigned |
31969 | numbers either, so for now use 0. */ | |
83349046 SB |
31970 | if (lang_GNU_C () |
31971 | || ! strcmp (language_string, "GNU GIMPLE") | |
31972 | || ! strcmp (language_string, "GNU Go") | |
b4c522fa | 31973 | || ! strcmp (language_string, "GNU D") |
83349046 SB |
31974 | || ! strcmp (language_string, "libgccjit")) |
31975 | i = 0; | |
31976 | else if (! strcmp (language_string, "GNU F77") | |
31977 | || lang_GNU_Fortran ()) | |
31978 | i = 1; | |
83349046 SB |
31979 | else if (! strcmp (language_string, "GNU Ada")) |
31980 | i = 3; | |
31981 | else if (lang_GNU_CXX () | |
31982 | || ! strcmp (language_string, "GNU Objective-C++")) | |
31983 | i = 9; | |
31984 | else if (! strcmp (language_string, "GNU Java")) | |
31985 | i = 13; | |
31986 | else if (! strcmp (language_string, "GNU Objective-C")) | |
31987 | i = 14; | |
31988 | else | |
31989 | gcc_unreachable (); | |
31990 | fprintf (file, "%d,", i); | |
31991 | ||
31992 | /* 8 single bit fields: global linkage (not set for C extern linkage, | |
31993 | apparently a PL/I convention?), out-of-line epilogue/prologue, offset | |
31994 | from start of procedure stored in tbtab, internal function, function | |
31995 | has controlled storage, function has no toc, function uses fp, | |
31996 | function logs/aborts fp operations. */ | |
31997 | /* Assume that fp operations are used if any fp reg must be saved. */ | |
31998 | fprintf (file, "%d,", | |
31999 | (optional_tbtab << 5) | ((info->first_fp_reg_save != 64) << 1)); | |
32000 | ||
32001 | /* 6 bitfields: function is interrupt handler, name present in | |
32002 | proc table, function calls alloca, on condition directives | |
32003 | (controls stack walks, 3 bits), saves condition reg, saves | |
32004 | link reg. */ | |
32005 | /* The `function calls alloca' bit seems to be set whenever reg 31 is | |
32006 | set up as a frame pointer, even when there is no alloca call. */ | |
32007 | fprintf (file, "%d,", | |
32008 | ((optional_tbtab << 6) | |
32009 | | ((optional_tbtab & frame_pointer_needed) << 5) | |
32010 | | (info->cr_save_p << 1) | |
32011 | | (info->lr_save_p))); | |
32012 | ||
32013 | /* 3 bitfields: saves backchain, fixup code, number of fpr saved | |
32014 | (6 bits). */ | |
32015 | fprintf (file, "%d,", | |
32016 | (info->push_p << 7) | (64 - info->first_fp_reg_save)); | |
32017 | ||
32018 | /* 2 bitfields: spare bits (2 bits), number of gpr saved (6 bits). */ | |
32019 | fprintf (file, "%d,", (32 - first_reg_to_save ())); | |
32020 | ||
32021 | if (optional_tbtab) | |
32022 | { | |
32023 | /* Compute the parameter info from the function decl argument | |
32024 | list. */ | |
32025 | tree decl; | |
32026 | int next_parm_info_bit = 31; | |
32027 | ||
32028 | for (decl = DECL_ARGUMENTS (current_function_decl); | |
32029 | decl; decl = DECL_CHAIN (decl)) | |
32030 | { | |
32031 | rtx parameter = DECL_INCOMING_RTL (decl); | |
32032 | machine_mode mode = GET_MODE (parameter); | |
32033 | ||
32034 | if (GET_CODE (parameter) == REG) | |
32035 | { | |
32036 | if (SCALAR_FLOAT_MODE_P (mode)) | |
32037 | { | |
32038 | int bits; | |
32039 | ||
32040 | float_parms++; | |
32041 | ||
32042 | switch (mode) | |
32043 | { | |
4e10a5a7 RS |
32044 | case E_SFmode: |
32045 | case E_SDmode: | |
83349046 SB |
32046 | bits = 0x2; |
32047 | break; | |
32048 | ||
4e10a5a7 RS |
32049 | case E_DFmode: |
32050 | case E_DDmode: | |
32051 | case E_TFmode: | |
32052 | case E_TDmode: | |
32053 | case E_IFmode: | |
32054 | case E_KFmode: | |
83349046 SB |
32055 | bits = 0x3; |
32056 | break; | |
32057 | ||
32058 | default: | |
32059 | gcc_unreachable (); | |
32060 | } | |
32061 | ||
32062 | /* If only one bit will fit, don't or in this entry. */ | |
32063 | if (next_parm_info_bit > 0) | |
32064 | parm_info |= (bits << (next_parm_info_bit - 1)); | |
32065 | next_parm_info_bit -= 2; | |
32066 | } | |
32067 | else | |
32068 | { | |
32069 | fixed_parms += ((GET_MODE_SIZE (mode) | |
32070 | + (UNITS_PER_WORD - 1)) | |
32071 | / UNITS_PER_WORD); | |
32072 | next_parm_info_bit -= 1; | |
32073 | } | |
32074 | } | |
32075 | } | |
32076 | } | |
32077 | ||
32078 | /* Number of fixed point parameters. */ | |
32079 | /* This is actually the number of words of fixed point parameters; thus | |
32080 | an 8 byte struct counts as 2; and thus the maximum value is 8. */ | |
32081 | fprintf (file, "%d,", fixed_parms); | |
32082 | ||
32083 | /* 2 bitfields: number of floating point parameters (7 bits), parameters | |
32084 | all on stack. */ | |
32085 | /* This is actually the number of fp registers that hold parameters; | |
32086 | and thus the maximum value is 13. */ | |
32087 | /* Set parameters on stack bit if parameters are not in their original | |
32088 | registers, regardless of whether they are on the stack? Xlc | |
32089 | seems to set the bit when not optimizing. */ | |
32090 | fprintf (file, "%d\n", ((float_parms << 1) | (! optimize))); | |
32091 | ||
32092 | if (optional_tbtab) | |
32093 | { | |
32094 | /* Optional fields follow. Some are variable length. */ | |
32095 | ||
32096 | /* Parameter types, left adjusted bit fields: 0 fixed, 10 single | |
32097 | float, 11 double float. */ | |
32098 | /* There is an entry for each parameter in a register, in the order | |
32099 | that they occur in the parameter list. Any intervening arguments | |
32100 | on the stack are ignored. If the list overflows a long (max | |
32101 | possible length 34 bits) then completely leave off all elements | |
32102 | that don't fit. */ | |
32103 | /* Only emit this long if there was at least one parameter. */ | |
32104 | if (fixed_parms || float_parms) | |
32105 | fprintf (file, "\t.long %d\n", parm_info); | |
32106 | ||
32107 | /* Offset from start of code to tb table. */ | |
32108 | fputs ("\t.long ", file); | |
32109 | ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT"); | |
32110 | RS6000_OUTPUT_BASENAME (file, fname); | |
32111 | putc ('-', file); | |
32112 | rs6000_output_function_entry (file, fname); | |
32113 | putc ('\n', file); | |
32114 | ||
32115 | /* Interrupt handler mask. */ | |
32116 | /* Omit this long, since we never set the interrupt handler bit | |
32117 | above. */ | |
32118 | ||
32119 | /* Number of CTL (controlled storage) anchors. */ | |
32120 | /* Omit this long, since the has_ctl bit is never set above. */ | |
32121 | ||
32122 | /* Displacement into stack of each CTL anchor. */ | |
32123 | /* Omit this list of longs, because there are no CTL anchors. */ | |
32124 | ||
32125 | /* Length of function name. */ | |
32126 | if (*fname == '*') | |
32127 | ++fname; | |
32128 | fprintf (file, "\t.short %d\n", (int) strlen (fname)); | |
32129 | ||
32130 | /* Function name. */ | |
32131 | assemble_string (fname, strlen (fname)); | |
32132 | ||
32133 | /* Register for alloca automatic storage; this is always reg 31. | |
32134 | Only emit this if the alloca bit was set above. */ | |
32135 | if (frame_pointer_needed) | |
32136 | fputs ("\t.byte 31\n", file); | |
32137 | ||
32138 | fputs ("\t.align 2\n", file); | |
32139 | } | |
32140 | } | |
32141 | ||
32142 | /* Arrange to define .LCTOC1 label, if not already done. */ | |
32143 | if (need_toc_init) | |
32144 | { | |
32145 | need_toc_init = 0; | |
32146 | if (!toc_initialized) | |
32147 | { | |
32148 | switch_to_section (toc_section); | |
32149 | switch_to_section (current_function_section ()); | |
32150 | } | |
32151 | } | |
32152 | } | |
32153 | ||
32154 | /* -fsplit-stack support. */ | |
32155 | ||
32156 | /* A SYMBOL_REF for __morestack. */ | |
32157 | static GTY(()) rtx morestack_ref; | |
32158 | ||
32159 | static rtx | |
32160 | gen_add3_const (rtx rt, rtx ra, long c) | |
32161 | { | |
32162 | if (TARGET_64BIT) | |
32163 | return gen_adddi3 (rt, ra, GEN_INT (c)); | |
32164 | else | |
32165 | return gen_addsi3 (rt, ra, GEN_INT (c)); | |
32166 | } | |
32167 | ||
32168 | /* Emit -fsplit-stack prologue, which goes before the regular function | |
32169 | prologue (at local entry point in the case of ELFv2). */ | |
32170 | ||
32171 | void | |
32172 | rs6000_expand_split_stack_prologue (void) | |
32173 | { | |
32174 | rs6000_stack_t *info = rs6000_stack_info (); | |
32175 | unsigned HOST_WIDE_INT allocate; | |
32176 | long alloc_hi, alloc_lo; | |
32177 | rtx r0, r1, r12, lr, ok_label, compare, jump, call_fusage; | |
32178 | rtx_insn *insn; | |
32179 | ||
32180 | gcc_assert (flag_split_stack && reload_completed); | |
32181 | ||
32182 | if (!info->push_p) | |
32183 | return; | |
32184 | ||
32185 | if (global_regs[29]) | |
32186 | { | |
32187 | error ("-fsplit-stack uses register r29"); | |
32188 | inform (DECL_SOURCE_LOCATION (global_regs_decl[29]), | |
32189 | "conflicts with %qD", global_regs_decl[29]); | |
32190 | } | |
32191 | ||
32192 | allocate = info->total_size; | |
32193 | if (allocate > (unsigned HOST_WIDE_INT) 1 << 31) | |
32194 | { | |
32195 | sorry ("Stack frame larger than 2G is not supported for -fsplit-stack"); | |
32196 | return; | |
32197 | } | |
32198 | if (morestack_ref == NULL_RTX) | |
32199 | { | |
32200 | morestack_ref = gen_rtx_SYMBOL_REF (Pmode, "__morestack"); | |
32201 | SYMBOL_REF_FLAGS (morestack_ref) |= (SYMBOL_FLAG_LOCAL | |
32202 | | SYMBOL_FLAG_FUNCTION); | |
32203 | } | |
32204 | ||
32205 | r0 = gen_rtx_REG (Pmode, 0); | |
32206 | r1 = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM); | |
32207 | r12 = gen_rtx_REG (Pmode, 12); | |
32208 | emit_insn (gen_load_split_stack_limit (r0)); | |
32209 | /* Always emit two insns here to calculate the requested stack, | |
32210 | so that the linker can edit them when adjusting size for calling | |
32211 | non-split-stack code. */ | |
32212 | alloc_hi = (-allocate + 0x8000) & ~0xffffL; | |
32213 | alloc_lo = -allocate - alloc_hi; | |
32214 | if (alloc_hi != 0) | |
32215 | { | |
32216 | emit_insn (gen_add3_const (r12, r1, alloc_hi)); | |
32217 | if (alloc_lo != 0) | |
32218 | emit_insn (gen_add3_const (r12, r12, alloc_lo)); | |
32219 | else | |
32220 | emit_insn (gen_nop ()); | |
32221 | } | |
32222 | else | |
32223 | { | |
32224 | emit_insn (gen_add3_const (r12, r1, alloc_lo)); | |
32225 | emit_insn (gen_nop ()); | |
32226 | } | |
32227 | ||
32228 | compare = gen_rtx_REG (CCUNSmode, CR7_REGNO); | |
32229 | emit_insn (gen_rtx_SET (compare, gen_rtx_COMPARE (CCUNSmode, r12, r0))); | |
32230 | ok_label = gen_label_rtx (); | |
32231 | jump = gen_rtx_IF_THEN_ELSE (VOIDmode, | |
32232 | gen_rtx_GEU (VOIDmode, compare, const0_rtx), | |
32233 | gen_rtx_LABEL_REF (VOIDmode, ok_label), | |
32234 | pc_rtx); | |
32235 | insn = emit_jump_insn (gen_rtx_SET (pc_rtx, jump)); | |
32236 | JUMP_LABEL (insn) = ok_label; | |
32237 | /* Mark the jump as very likely to be taken. */ | |
5fa396ad | 32238 | add_reg_br_prob_note (insn, profile_probability::very_likely ()); |
83349046 SB |
32239 | |
32240 | lr = gen_rtx_REG (Pmode, LR_REGNO); | |
32241 | insn = emit_move_insn (r0, lr); | |
32242 | RTX_FRAME_RELATED_P (insn) = 1; | |
32243 | insn = emit_insn (gen_frame_store (r0, r1, info->lr_save_offset)); | |
32244 | RTX_FRAME_RELATED_P (insn) = 1; | |
32245 | ||
32246 | insn = emit_call_insn (gen_call (gen_rtx_MEM (SImode, morestack_ref), | |
32247 | const0_rtx, const0_rtx)); | |
32248 | call_fusage = NULL_RTX; | |
32249 | use_reg (&call_fusage, r12); | |
32250 | /* Say the call uses r0, even though it doesn't, to stop regrename | |
32251 | from twiddling with the insns saving lr, trashing args for cfun. | |
32252 | The insns restoring lr are similarly protected by making | |
32253 | split_stack_return use r0. */ | |
32254 | use_reg (&call_fusage, r0); | |
32255 | add_function_usage_to (insn, call_fusage); | |
32256 | /* Indicate that this function can't jump to non-local gotos. */ | |
32257 | make_reg_eh_region_note_nothrow_nononlocal (insn); | |
32258 | emit_insn (gen_frame_load (r0, r1, info->lr_save_offset)); | |
32259 | insn = emit_move_insn (lr, r0); | |
32260 | add_reg_note (insn, REG_CFA_RESTORE, lr); | |
32261 | RTX_FRAME_RELATED_P (insn) = 1; | |
32262 | emit_insn (gen_split_stack_return ()); | |
32263 | ||
32264 | emit_label (ok_label); | |
32265 | LABEL_NUSES (ok_label) = 1; | |
32266 | } | |
32267 | ||
32268 | /* Return the internal arg pointer used for function incoming | |
32269 | arguments. When -fsplit-stack, the arg pointer is r12 so we need | |
32270 | to copy it to a pseudo in order for it to be preserved over calls | |
32271 | and suchlike. We'd really like to use a pseudo here for the | |
32272 | internal arg pointer but data-flow analysis is not prepared to | |
32273 | accept pseudos as live at the beginning of a function. */ | |
32274 | ||
32275 | static rtx | |
32276 | rs6000_internal_arg_pointer (void) | |
32277 | { | |
32278 | if (flag_split_stack | |
32279 | && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl)) | |
32280 | == NULL)) | |
32281 | ||
32282 | { | |
32283 | if (cfun->machine->split_stack_arg_pointer == NULL_RTX) | |
32284 | { | |
32285 | rtx pat; | |
32286 | ||
32287 | cfun->machine->split_stack_arg_pointer = gen_reg_rtx (Pmode); | |
32288 | REG_POINTER (cfun->machine->split_stack_arg_pointer) = 1; | |
32289 | ||
32290 | /* Put the pseudo initialization right after the note at the | |
32291 | beginning of the function. */ | |
32292 | pat = gen_rtx_SET (cfun->machine->split_stack_arg_pointer, | |
32293 | gen_rtx_REG (Pmode, 12)); | |
32294 | push_topmost_sequence (); | |
32295 | emit_insn_after (pat, get_insns ()); | |
32296 | pop_topmost_sequence (); | |
32297 | } | |
32298 | return plus_constant (Pmode, cfun->machine->split_stack_arg_pointer, | |
32299 | FIRST_PARM_OFFSET (current_function_decl)); | |
32300 | } | |
32301 | return virtual_incoming_args_rtx; | |
32302 | } | |
32303 | ||
32304 | /* We may have to tell the dataflow pass that the split stack prologue | |
32305 | is initializing a register. */ | |
32306 | ||
32307 | static void | |
32308 | rs6000_live_on_entry (bitmap regs) | |
32309 | { | |
32310 | if (flag_split_stack) | |
32311 | bitmap_set_bit (regs, 12); | |
32312 | } | |
32313 | ||
32314 | /* Emit -fsplit-stack dynamic stack allocation space check. */ | |
32315 | ||
32316 | void | |
32317 | rs6000_split_stack_space_check (rtx size, rtx label) | |
32318 | { | |
32319 | rtx sp = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM); | |
32320 | rtx limit = gen_reg_rtx (Pmode); | |
32321 | rtx requested = gen_reg_rtx (Pmode); | |
32322 | rtx cmp = gen_reg_rtx (CCUNSmode); | |
32323 | rtx jump; | |
32324 | ||
32325 | emit_insn (gen_load_split_stack_limit (limit)); | |
32326 | if (CONST_INT_P (size)) | |
32327 | emit_insn (gen_add3_insn (requested, sp, GEN_INT (-INTVAL (size)))); | |
32328 | else | |
32329 | { | |
32330 | size = force_reg (Pmode, size); | |
32331 | emit_move_insn (requested, gen_rtx_MINUS (Pmode, sp, size)); | |
32332 | } | |
32333 | emit_insn (gen_rtx_SET (cmp, gen_rtx_COMPARE (CCUNSmode, requested, limit))); | |
32334 | jump = gen_rtx_IF_THEN_ELSE (VOIDmode, | |
32335 | gen_rtx_GEU (VOIDmode, cmp, const0_rtx), | |
32336 | gen_rtx_LABEL_REF (VOIDmode, label), | |
32337 | pc_rtx); | |
32338 | jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump)); | |
32339 | JUMP_LABEL (jump) = label; | |
32340 | } | |
32341 | \f | |
32342 | /* A C compound statement that outputs the assembler code for a thunk | |
32343 | function, used to implement C++ virtual function calls with | |
32344 | multiple inheritance. The thunk acts as a wrapper around a virtual | |
32345 | function, adjusting the implicit object parameter before handing | |
32346 | control off to the real function. | |
32347 | ||
32348 | First, emit code to add the integer DELTA to the location that | |
32349 | contains the incoming first argument. Assume that this argument | |
32350 | contains a pointer, and is the one used to pass the `this' pointer | |
32351 | in C++. This is the incoming argument *before* the function | |
32352 | prologue, e.g. `%o0' on a sparc. The addition must preserve the | |
32353 | values of all other incoming arguments. | |
32354 | ||
32355 | After the addition, emit code to jump to FUNCTION, which is a | |
32356 | `FUNCTION_DECL'. This is a direct pure jump, not a call, and does | |
32357 | not touch the return address. Hence returning from FUNCTION will | |
32358 | return to whoever called the current `thunk'. | |
32359 | ||
32360 | The effect must be as if FUNCTION had been called directly with the | |
32361 | adjusted first argument. This macro is responsible for emitting | |
32362 | all of the code for a thunk function; output_function_prologue() | |
32363 | and output_function_epilogue() are not invoked. | |
32364 | ||
32365 | The THUNK_FNDECL is redundant. (DELTA and FUNCTION have already | |
32366 | been extracted from it.) It might possibly be useful on some | |
32367 | targets, but probably not. | |
32368 | ||
32369 | If you do not define this macro, the target-independent code in the | |
32370 | C++ frontend will generate a less efficient heavyweight thunk that | |
32371 | calls FUNCTION instead of jumping to it. The generic approach does | |
32372 | not support varargs. */ | |
32373 | ||
32374 | static void | |
32375 | rs6000_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED, | |
32376 | HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset, | |
32377 | tree function) | |
32378 | { | |
32379 | rtx this_rtx, funexp; | |
32380 | rtx_insn *insn; | |
32381 | ||
32382 | reload_completed = 1; | |
32383 | epilogue_completed = 1; | |
32384 | ||
32385 | /* Mark the end of the (empty) prologue. */ | |
32386 | emit_note (NOTE_INSN_PROLOGUE_END); | |
32387 | ||
32388 | /* Find the "this" pointer. If the function returns a structure, | |
32389 | the structure return pointer is in r3. */ | |
32390 | if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)) | |
32391 | this_rtx = gen_rtx_REG (Pmode, 4); | |
32392 | else | |
32393 | this_rtx = gen_rtx_REG (Pmode, 3); | |
32394 | ||
32395 | /* Apply the constant offset, if required. */ | |
32396 | if (delta) | |
32397 | emit_insn (gen_add3_insn (this_rtx, this_rtx, GEN_INT (delta))); | |
32398 | ||
32399 | /* Apply the offset from the vtable, if required. */ | |
32400 | if (vcall_offset) | |
32401 | { | |
32402 | rtx vcall_offset_rtx = GEN_INT (vcall_offset); | |
32403 | rtx tmp = gen_rtx_REG (Pmode, 12); | |
32404 | ||
32405 | emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx)); | |
32406 | if (((unsigned HOST_WIDE_INT) vcall_offset) + 0x8000 >= 0x10000) | |
32407 | { | |
32408 | emit_insn (gen_add3_insn (tmp, tmp, vcall_offset_rtx)); | |
32409 | emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp)); | |
32410 | } | |
32411 | else | |
32412 | { | |
32413 | rtx loc = gen_rtx_PLUS (Pmode, tmp, vcall_offset_rtx); | |
32414 | ||
32415 | emit_move_insn (tmp, gen_rtx_MEM (Pmode, loc)); | |
32416 | } | |
32417 | emit_insn (gen_add3_insn (this_rtx, this_rtx, tmp)); | |
32418 | } | |
32419 | ||
32420 | /* Generate a tail call to the target function. */ | |
32421 | if (!TREE_USED (function)) | |
32422 | { | |
32423 | assemble_external (function); | |
32424 | TREE_USED (function) = 1; | |
32425 | } | |
32426 | funexp = XEXP (DECL_RTL (function), 0); | |
32427 | funexp = gen_rtx_MEM (FUNCTION_MODE, funexp); | |
32428 | ||
32429 | #if TARGET_MACHO | |
32430 | if (MACHOPIC_INDIRECT) | |
32431 | funexp = machopic_indirect_call_target (funexp); | |
32432 | #endif | |
32433 | ||
32434 | /* gen_sibcall expects reload to convert scratch pseudo to LR so we must | |
32435 | generate sibcall RTL explicitly. */ | |
32436 | insn = emit_call_insn ( | |
32437 | gen_rtx_PARALLEL (VOIDmode, | |
32438 | gen_rtvec (3, | |
32439 | gen_rtx_CALL (VOIDmode, | |
32440 | funexp, const0_rtx), | |
32441 | gen_rtx_USE (VOIDmode, const0_rtx), | |
32442 | simple_return_rtx))); | |
32443 | SIBLING_CALL_P (insn) = 1; | |
32444 | emit_barrier (); | |
32445 | ||
32446 | /* Run just enough of rest_of_compilation to get the insns emitted. | |
32447 | There's not really enough bulk here to make other passes such as | |
32448 | instruction scheduling worth while. Note that use_thunk calls | |
32449 | assemble_start_function and assemble_end_function. */ | |
32450 | insn = get_insns (); | |
32451 | shorten_branches (insn); | |
32452 | final_start_function (insn, file, 1); | |
32453 | final (insn, file, 1); | |
32454 | final_end_function (); | |
32455 | ||
32456 | reload_completed = 0; | |
32457 | epilogue_completed = 0; | |
32458 | } | |
32459 | \f | |
32460 | /* A quick summary of the various types of 'constant-pool tables' | |
32461 | under PowerPC: | |
32462 | ||
32463 | Target Flags Name One table per | |
32464 | AIX (none) AIX TOC object file | |
32465 | AIX -mfull-toc AIX TOC object file | |
32466 | AIX -mminimal-toc AIX minimal TOC translation unit | |
32467 | SVR4/EABI (none) SVR4 SDATA object file | |
32468 | SVR4/EABI -fpic SVR4 pic object file | |
32469 | SVR4/EABI -fPIC SVR4 PIC translation unit | |
32470 | SVR4/EABI -mrelocatable EABI TOC function | |
32471 | SVR4/EABI -maix AIX TOC object file | |
32472 | SVR4/EABI -maix -mminimal-toc | |
32473 | AIX minimal TOC translation unit | |
32474 | ||
32475 | Name Reg. Set by entries contains: | |
32476 | made by addrs? fp? sum? | |
32477 | ||
32478 | AIX TOC 2 crt0 as Y option option | |
32479 | AIX minimal TOC 30 prolog gcc Y Y option | |
32480 | SVR4 SDATA 13 crt0 gcc N Y N | |
32481 | SVR4 pic 30 prolog ld Y not yet N | |
32482 | SVR4 PIC 30 prolog gcc Y option option | |
32483 | EABI TOC 30 prolog gcc Y option option | |
32484 | ||
32485 | */ | |
32486 | ||
32487 | /* Hash functions for the hash table. */ | |
32488 | ||
32489 | static unsigned | |
32490 | rs6000_hash_constant (rtx k) | |
32491 | { | |
32492 | enum rtx_code code = GET_CODE (k); | |
32493 | machine_mode mode = GET_MODE (k); | |
32494 | unsigned result = (code << 3) ^ mode; | |
32495 | const char *format; | |
32496 | int flen, fidx; | |
32497 | ||
32498 | format = GET_RTX_FORMAT (code); | |
32499 | flen = strlen (format); | |
32500 | fidx = 0; | |
32501 | ||
32502 | switch (code) | |
32503 | { | |
32504 | case LABEL_REF: | |
32505 | return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0)); | |
32506 | ||
32507 | case CONST_WIDE_INT: | |
32508 | { | |
32509 | int i; | |
32510 | flen = CONST_WIDE_INT_NUNITS (k); | |
32511 | for (i = 0; i < flen; i++) | |
32512 | result = result * 613 + CONST_WIDE_INT_ELT (k, i); | |
32513 | return result; | |
32514 | } | |
32515 | ||
32516 | case CONST_DOUBLE: | |
32517 | if (mode != VOIDmode) | |
32518 | return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result; | |
32519 | flen = 2; | |
32520 | break; | |
32521 | ||
32522 | case CODE_LABEL: | |
32523 | fidx = 3; | |
32524 | break; | |
32525 | ||
32526 | default: | |
32527 | break; | |
32528 | } | |
32529 | ||
32530 | for (; fidx < flen; fidx++) | |
32531 | switch (format[fidx]) | |
32532 | { | |
32533 | case 's': | |
32534 | { | |
32535 | unsigned i, len; | |
32536 | const char *str = XSTR (k, fidx); | |
32537 | len = strlen (str); | |
32538 | result = result * 613 + len; | |
32539 | for (i = 0; i < len; i++) | |
32540 | result = result * 613 + (unsigned) str[i]; | |
32541 | break; | |
32542 | } | |
32543 | case 'u': | |
32544 | case 'e': | |
32545 | result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx)); | |
32546 | break; | |
32547 | case 'i': | |
32548 | case 'n': | |
32549 | result = result * 613 + (unsigned) XINT (k, fidx); | |
32550 | break; | |
32551 | case 'w': | |
32552 | if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT)) | |
32553 | result = result * 613 + (unsigned) XWINT (k, fidx); | |
32554 | else | |
32555 | { | |
32556 | size_t i; | |
32557 | for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++) | |
32558 | result = result * 613 + (unsigned) (XWINT (k, fidx) | |
32559 | >> CHAR_BIT * i); | |
32560 | } | |
32561 | break; | |
32562 | case '0': | |
32563 | break; | |
32564 | default: | |
32565 | gcc_unreachable (); | |
32566 | } | |
32567 | ||
32568 | return result; | |
32569 | } | |
32570 | ||
32571 | hashval_t | |
32572 | toc_hasher::hash (toc_hash_struct *thc) | |
32573 | { | |
32574 | return rs6000_hash_constant (thc->key) ^ thc->key_mode; | |
32575 | } | |
32576 | ||
32577 | /* Compare H1 and H2 for equivalence. */ | |
32578 | ||
32579 | bool | |
32580 | toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2) | |
32581 | { | |
32582 | rtx r1 = h1->key; | |
32583 | rtx r2 = h2->key; | |
32584 | ||
32585 | if (h1->key_mode != h2->key_mode) | |
32586 | return 0; | |
32587 | ||
32588 | return rtx_equal_p (r1, r2); | |
32589 | } | |
32590 | ||
32591 | /* These are the names given by the C++ front-end to vtables, and | |
32592 | vtable-like objects. Ideally, this logic should not be here; | |
32593 | instead, there should be some programmatic way of inquiring as | |
32594 | to whether or not an object is a vtable. */ | |
32595 | ||
32596 | #define VTABLE_NAME_P(NAME) \ | |
32597 | (strncmp ("_vt.", name, strlen ("_vt.")) == 0 \ | |
32598 | || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0 \ | |
32599 | || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0 \ | |
32600 | || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0 \ | |
32601 | || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0) | |
32602 | ||
32603 | #ifdef NO_DOLLAR_IN_LABEL | |
32604 | /* Return a GGC-allocated character string translating dollar signs in | |
32605 | input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */ | |
32606 | ||
32607 | const char * | |
32608 | rs6000_xcoff_strip_dollar (const char *name) | |
32609 | { | |
32610 | char *strip, *p; | |
32611 | const char *q; | |
32612 | size_t len; | |
32613 | ||
32614 | q = (const char *) strchr (name, '$'); | |
32615 | ||
32616 | if (q == 0 || q == name) | |
32617 | return name; | |
32618 | ||
32619 | len = strlen (name); | |
32620 | strip = XALLOCAVEC (char, len + 1); | |
32621 | strcpy (strip, name); | |
32622 | p = strip + (q - name); | |
32623 | while (p) | |
32624 | { | |
32625 | *p = '_'; | |
32626 | p = strchr (p + 1, '$'); | |
32627 | } | |
32628 | ||
32629 | return ggc_alloc_string (strip, len); | |
32630 | } | |
32631 | #endif | |
32632 | ||
32633 | void | |
32634 | rs6000_output_symbol_ref (FILE *file, rtx x) | |
32635 | { | |
32636 | const char *name = XSTR (x, 0); | |
32637 | ||
32638 | /* Currently C++ toc references to vtables can be emitted before it | |
32639 | is decided whether the vtable is public or private. If this is | |
32640 | the case, then the linker will eventually complain that there is | |
32641 | a reference to an unknown section. Thus, for vtables only, | |
32642 | we emit the TOC reference to reference the identifier and not the | |
32643 | symbol. */ | |
32644 | if (VTABLE_NAME_P (name)) | |
32645 | { | |
32646 | RS6000_OUTPUT_BASENAME (file, name); | |
32647 | } | |
32648 | else | |
32649 | assemble_name (file, name); | |
32650 | } | |
32651 | ||
32652 | /* Output a TOC entry. We derive the entry name from what is being | |
32653 | written. */ | |
32654 | ||
32655 | void | |
32656 | output_toc (FILE *file, rtx x, int labelno, machine_mode mode) | |
32657 | { | |
32658 | char buf[256]; | |
32659 | const char *name = buf; | |
32660 | rtx base = x; | |
32661 | HOST_WIDE_INT offset = 0; | |
32662 | ||
32663 | gcc_assert (!TARGET_NO_TOC); | |
32664 | ||
32665 | /* When the linker won't eliminate them, don't output duplicate | |
32666 | TOC entries (this happens on AIX if there is any kind of TOC, | |
32667 | and on SVR4 under -fPIC or -mrelocatable). Don't do this for | |
32668 | CODE_LABELs. */ | |
32669 | if (TARGET_TOC && GET_CODE (x) != LABEL_REF) | |
32670 | { | |
32671 | struct toc_hash_struct *h; | |
32672 | ||
32673 | /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE | |
32674 | time because GGC is not initialized at that point. */ | |
32675 | if (toc_hash_table == NULL) | |
32676 | toc_hash_table = hash_table<toc_hasher>::create_ggc (1021); | |
32677 | ||
32678 | h = ggc_alloc<toc_hash_struct> (); | |
32679 | h->key = x; | |
32680 | h->key_mode = mode; | |
32681 | h->labelno = labelno; | |
32682 | ||
32683 | toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT); | |
32684 | if (*found == NULL) | |
32685 | *found = h; | |
32686 | else /* This is indeed a duplicate. | |
32687 | Set this label equal to that label. */ | |
32688 | { | |
32689 | fputs ("\t.set ", file); | |
32690 | ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC"); | |
32691 | fprintf (file, "%d,", labelno); | |
32692 | ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC"); | |
32693 | fprintf (file, "%d\n", ((*found)->labelno)); | |
32694 | ||
32695 | #ifdef HAVE_AS_TLS | |
32696 | if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF | |
32697 | && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC | |
32698 | || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)) | |
32699 | { | |
32700 | fputs ("\t.set ", file); | |
32701 | ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM"); | |
32702 | fprintf (file, "%d,", labelno); | |
32703 | ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM"); | |
32704 | fprintf (file, "%d\n", ((*found)->labelno)); | |
32705 | } | |
32706 | #endif | |
32707 | return; | |
32708 | } | |
32709 | } | |
32710 | ||
32711 | /* If we're going to put a double constant in the TOC, make sure it's | |
32712 | aligned properly when strict alignment is on. */ | |
32713 | if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x)) | |
32714 | && STRICT_ALIGNMENT | |
32715 | && GET_MODE_BITSIZE (mode) >= 64 | |
32716 | && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) { | |
32717 | ASM_OUTPUT_ALIGN (file, 3); | |
32718 | } | |
32719 | ||
32720 | (*targetm.asm_out.internal_label) (file, "LC", labelno); | |
32721 | ||
32722 | /* Handle FP constants specially. Note that if we have a minimal | |
32723 | TOC, things we put here aren't actually in the TOC, so we can allow | |
32724 | FP constants. */ | |
32725 | if (GET_CODE (x) == CONST_DOUBLE && | |
32726 | (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode | |
32727 | || GET_MODE (x) == IFmode || GET_MODE (x) == KFmode)) | |
32728 | { | |
32729 | long k[4]; | |
32730 | ||
32731 | if (DECIMAL_FLOAT_MODE_P (GET_MODE (x))) | |
32732 | REAL_VALUE_TO_TARGET_DECIMAL128 (*CONST_DOUBLE_REAL_VALUE (x), k); | |
32733 | else | |
32734 | REAL_VALUE_TO_TARGET_LONG_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k); | |
32735 | ||
32736 | if (TARGET_64BIT) | |
32737 | { | |
32738 | if (TARGET_ELF || TARGET_MINIMAL_TOC) | |
32739 | fputs (DOUBLE_INT_ASM_OP, file); | |
32740 | else | |
32741 | fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],", | |
32742 | k[0] & 0xffffffff, k[1] & 0xffffffff, | |
32743 | k[2] & 0xffffffff, k[3] & 0xffffffff); | |
32744 | fprintf (file, "0x%lx%08lx,0x%lx%08lx\n", | |
32745 | k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff, | |
32746 | k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff, | |
32747 | k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff, | |
32748 | k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff); | |
32749 | return; | |
32750 | } | |
32751 | else | |
32752 | { | |
32753 | if (TARGET_ELF || TARGET_MINIMAL_TOC) | |
32754 | fputs ("\t.long ", file); | |
32755 | else | |
32756 | fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],", | |
32757 | k[0] & 0xffffffff, k[1] & 0xffffffff, | |
32758 | k[2] & 0xffffffff, k[3] & 0xffffffff); | |
32759 | fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n", | |
32760 | k[0] & 0xffffffff, k[1] & 0xffffffff, | |
32761 | k[2] & 0xffffffff, k[3] & 0xffffffff); | |
32762 | return; | |
32763 | } | |
32764 | } | |
32765 | else if (GET_CODE (x) == CONST_DOUBLE && | |
32766 | (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode)) | |
32767 | { | |
32768 | long k[2]; | |
32769 | ||
32770 | if (DECIMAL_FLOAT_MODE_P (GET_MODE (x))) | |
32771 | REAL_VALUE_TO_TARGET_DECIMAL64 (*CONST_DOUBLE_REAL_VALUE (x), k); | |
32772 | else | |
32773 | REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), k); | |
32774 | ||
32775 | if (TARGET_64BIT) | |
32776 | { | |
32777 | if (TARGET_ELF || TARGET_MINIMAL_TOC) | |
32778 | fputs (DOUBLE_INT_ASM_OP, file); | |
32779 | else | |
32780 | fprintf (file, "\t.tc FD_%lx_%lx[TC],", | |
32781 | k[0] & 0xffffffff, k[1] & 0xffffffff); | |
32782 | fprintf (file, "0x%lx%08lx\n", | |
32783 | k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff, | |
32784 | k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff); | |
32785 | return; | |
32786 | } | |
32787 | else | |
32788 | { | |
32789 | if (TARGET_ELF || TARGET_MINIMAL_TOC) | |
32790 | fputs ("\t.long ", file); | |
32791 | else | |
32792 | fprintf (file, "\t.tc FD_%lx_%lx[TC],", | |
32793 | k[0] & 0xffffffff, k[1] & 0xffffffff); | |
32794 | fprintf (file, "0x%lx,0x%lx\n", | |
32795 | k[0] & 0xffffffff, k[1] & 0xffffffff); | |
32796 | return; | |
32797 | } | |
32798 | } | |
32799 | else if (GET_CODE (x) == CONST_DOUBLE && | |
32800 | (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode)) | |
32801 | { | |
32802 | long l; | |
32803 | ||
32804 | if (DECIMAL_FLOAT_MODE_P (GET_MODE (x))) | |
32805 | REAL_VALUE_TO_TARGET_DECIMAL32 (*CONST_DOUBLE_REAL_VALUE (x), l); | |
32806 | else | |
32807 | REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), l); | |
32808 | ||
32809 | if (TARGET_64BIT) | |
32810 | { | |
32811 | if (TARGET_ELF || TARGET_MINIMAL_TOC) | |
32812 | fputs (DOUBLE_INT_ASM_OP, file); | |
32813 | else | |
32814 | fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff); | |
32815 | if (WORDS_BIG_ENDIAN) | |
32816 | fprintf (file, "0x%lx00000000\n", l & 0xffffffff); | |
32817 | else | |
32818 | fprintf (file, "0x%lx\n", l & 0xffffffff); | |
32819 | return; | |
32820 | } | |
32821 | else | |
32822 | { | |
32823 | if (TARGET_ELF || TARGET_MINIMAL_TOC) | |
32824 | fputs ("\t.long ", file); | |
32825 | else | |
32826 | fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff); | |
32827 | fprintf (file, "0x%lx\n", l & 0xffffffff); | |
32828 | return; | |
32829 | } | |
32830 | } | |
32831 | else if (GET_MODE (x) == VOIDmode && GET_CODE (x) == CONST_INT) | |
32832 | { | |
32833 | unsigned HOST_WIDE_INT low; | |
32834 | HOST_WIDE_INT high; | |
32835 | ||
32836 | low = INTVAL (x) & 0xffffffff; | |
32837 | high = (HOST_WIDE_INT) INTVAL (x) >> 32; | |
32838 | ||
32839 | /* TOC entries are always Pmode-sized, so when big-endian | |
32840 | smaller integer constants in the TOC need to be padded. | |
32841 | (This is still a win over putting the constants in | |
32842 | a separate constant pool, because then we'd have | |
32843 | to have both a TOC entry _and_ the actual constant.) | |
32844 | ||
32845 | For a 32-bit target, CONST_INT values are loaded and shifted | |
32846 | entirely within `low' and can be stored in one TOC entry. */ | |
32847 | ||
32848 | /* It would be easy to make this work, but it doesn't now. */ | |
32849 | gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode)); | |
32850 | ||
32851 | if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode)) | |
32852 | { | |
32853 | low |= high << 32; | |
32854 | low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode); | |
32855 | high = (HOST_WIDE_INT) low >> 32; | |
32856 | low &= 0xffffffff; | |
32857 | } | |
32858 | ||
32859 | if (TARGET_64BIT) | |
32860 | { | |
32861 | if (TARGET_ELF || TARGET_MINIMAL_TOC) | |
32862 | fputs (DOUBLE_INT_ASM_OP, file); | |
32863 | else | |
32864 | fprintf (file, "\t.tc ID_%lx_%lx[TC],", | |
32865 | (long) high & 0xffffffff, (long) low & 0xffffffff); | |
32866 | fprintf (file, "0x%lx%08lx\n", | |
32867 | (long) high & 0xffffffff, (long) low & 0xffffffff); | |
32868 | return; | |
32869 | } | |
32870 | else | |
32871 | { | |
32872 | if (POINTER_SIZE < GET_MODE_BITSIZE (mode)) | |
32873 | { | |
32874 | if (TARGET_ELF || TARGET_MINIMAL_TOC) | |
32875 | fputs ("\t.long ", file); | |
32876 | else | |
32877 | fprintf (file, "\t.tc ID_%lx_%lx[TC],", | |
32878 | (long) high & 0xffffffff, (long) low & 0xffffffff); | |
32879 | fprintf (file, "0x%lx,0x%lx\n", | |
32880 | (long) high & 0xffffffff, (long) low & 0xffffffff); | |
32881 | } | |
32882 | else | |
32883 | { | |
32884 | if (TARGET_ELF || TARGET_MINIMAL_TOC) | |
32885 | fputs ("\t.long ", file); | |
32886 | else | |
32887 | fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff); | |
32888 | fprintf (file, "0x%lx\n", (long) low & 0xffffffff); | |
32889 | } | |
32890 | return; | |
32891 | } | |
32892 | } | |
32893 | ||
32894 | if (GET_CODE (x) == CONST) | |
32895 | { | |
32896 | gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS | |
32897 | && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT); | |
32898 | ||
32899 | base = XEXP (XEXP (x, 0), 0); | |
32900 | offset = INTVAL (XEXP (XEXP (x, 0), 1)); | |
32901 | } | |
32902 | ||
32903 | switch (GET_CODE (base)) | |
32904 | { | |
32905 | case SYMBOL_REF: | |
32906 | name = XSTR (base, 0); | |
32907 | break; | |
32908 | ||
32909 | case LABEL_REF: | |
32910 | ASM_GENERATE_INTERNAL_LABEL (buf, "L", | |
32911 | CODE_LABEL_NUMBER (XEXP (base, 0))); | |
32912 | break; | |
32913 | ||
32914 | case CODE_LABEL: | |
32915 | ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base)); | |
32916 | break; | |
32917 | ||
32918 | default: | |
32919 | gcc_unreachable (); | |
32920 | } | |
32921 | ||
32922 | if (TARGET_ELF || TARGET_MINIMAL_TOC) | |
32923 | fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file); | |
32924 | else | |
32925 | { | |
32926 | fputs ("\t.tc ", file); | |
32927 | RS6000_OUTPUT_BASENAME (file, name); | |
32928 | ||
32929 | if (offset < 0) | |
32930 | fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset); | |
32931 | else if (offset) | |
32932 | fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset); | |
32933 | ||
32934 | /* Mark large TOC symbols on AIX with [TE] so they are mapped | |
32935 | after other TOC symbols, reducing overflow of small TOC access | |
32936 | to [TC] symbols. */ | |
32937 | fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL | |
32938 | ? "[TE]," : "[TC],", file); | |
32939 | } | |
32940 | ||
32941 | /* Currently C++ toc references to vtables can be emitted before it | |
32942 | is decided whether the vtable is public or private. If this is | |
32943 | the case, then the linker will eventually complain that there is | |
32944 | a TOC reference to an unknown section. Thus, for vtables only, | |
32945 | we emit the TOC reference to reference the symbol and not the | |
32946 | section. */ | |
32947 | if (VTABLE_NAME_P (name)) | |
32948 | { | |
32949 | RS6000_OUTPUT_BASENAME (file, name); | |
32950 | if (offset < 0) | |
32951 | fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset); | |
32952 | else if (offset > 0) | |
32953 | fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset); | |
32954 | } | |
32955 | else | |
32956 | output_addr_const (file, x); | |
32957 | ||
32958 | #if HAVE_AS_TLS | |
32959 | if (TARGET_XCOFF && GET_CODE (base) == SYMBOL_REF) | |
32960 | { | |
32961 | switch (SYMBOL_REF_TLS_MODEL (base)) | |
32962 | { | |
32963 | case 0: | |
32964 | break; | |
32965 | case TLS_MODEL_LOCAL_EXEC: | |
32966 | fputs ("@le", file); | |
32967 | break; | |
32968 | case TLS_MODEL_INITIAL_EXEC: | |
32969 | fputs ("@ie", file); | |
32970 | break; | |
32971 | /* Use global-dynamic for local-dynamic. */ | |
32972 | case TLS_MODEL_GLOBAL_DYNAMIC: | |
32973 | case TLS_MODEL_LOCAL_DYNAMIC: | |
32974 | putc ('\n', file); | |
32975 | (*targetm.asm_out.internal_label) (file, "LCM", labelno); | |
32976 | fputs ("\t.tc .", file); | |
32977 | RS6000_OUTPUT_BASENAME (file, name); | |
32978 | fputs ("[TC],", file); | |
32979 | output_addr_const (file, x); | |
32980 | fputs ("@m", file); | |
32981 | break; | |
32982 | default: | |
32983 | gcc_unreachable (); | |
32984 | } | |
32985 | } | |
32986 | #endif | |
32987 | ||
32988 | putc ('\n', file); | |
32989 | } | |
32990 | \f | |
32991 | /* Output an assembler pseudo-op to write an ASCII string of N characters | |
32992 | starting at P to FILE. | |
32993 | ||
32994 | On the RS/6000, we have to do this using the .byte operation and | |
32995 | write out special characters outside the quoted string. | |
32996 | Also, the assembler is broken; very long strings are truncated, | |
32997 | so we must artificially break them up early. */ | |
32998 | ||
32999 | void | |
33000 | output_ascii (FILE *file, const char *p, int n) | |
33001 | { | |
33002 | char c; | |
33003 | int i, count_string; | |
33004 | const char *for_string = "\t.byte \""; | |
33005 | const char *for_decimal = "\t.byte "; | |
33006 | const char *to_close = NULL; | |
33007 | ||
33008 | count_string = 0; | |
33009 | for (i = 0; i < n; i++) | |
33010 | { | |
33011 | c = *p++; | |
33012 | if (c >= ' ' && c < 0177) | |
33013 | { | |
33014 | if (for_string) | |
33015 | fputs (for_string, file); | |
33016 | putc (c, file); | |
33017 | ||
33018 | /* Write two quotes to get one. */ | |
33019 | if (c == '"') | |
33020 | { | |
33021 | putc (c, file); | |
33022 | ++count_string; | |
33023 | } | |
33024 | ||
33025 | for_string = NULL; | |
33026 | for_decimal = "\"\n\t.byte "; | |
33027 | to_close = "\"\n"; | |
33028 | ++count_string; | |
33029 | ||
33030 | if (count_string >= 512) | |
33031 | { | |
33032 | fputs (to_close, file); | |
33033 | ||
33034 | for_string = "\t.byte \""; | |
33035 | for_decimal = "\t.byte "; | |
33036 | to_close = NULL; | |
33037 | count_string = 0; | |
33038 | } | |
33039 | } | |
33040 | else | |
33041 | { | |
33042 | if (for_decimal) | |
33043 | fputs (for_decimal, file); | |
33044 | fprintf (file, "%d", c); | |
33045 | ||
33046 | for_string = "\n\t.byte \""; | |
33047 | for_decimal = ", "; | |
33048 | to_close = "\n"; | |
33049 | count_string = 0; | |
33050 | } | |
33051 | } | |
33052 | ||
33053 | /* Now close the string if we have written one. Then end the line. */ | |
33054 | if (to_close) | |
33055 | fputs (to_close, file); | |
33056 | } | |
33057 | \f | |
33058 | /* Generate a unique section name for FILENAME for a section type | |
33059 | represented by SECTION_DESC. Output goes into BUF. | |
33060 | ||
33061 | SECTION_DESC can be any string, as long as it is different for each | |
33062 | possible section type. | |
33063 | ||
33064 | We name the section in the same manner as xlc. The name begins with an | |
33065 | underscore followed by the filename (after stripping any leading directory | |
33066 | names) with the last period replaced by the string SECTION_DESC. If | |
33067 | FILENAME does not contain a period, SECTION_DESC is appended to the end of | |
33068 | the name. */ | |
33069 | ||
33070 | void | |
33071 | rs6000_gen_section_name (char **buf, const char *filename, | |
33072 | const char *section_desc) | |
33073 | { | |
33074 | const char *q, *after_last_slash, *last_period = 0; | |
33075 | char *p; | |
33076 | int len; | |
33077 | ||
33078 | after_last_slash = filename; | |
33079 | for (q = filename; *q; q++) | |
33080 | { | |
33081 | if (*q == '/') | |
33082 | after_last_slash = q + 1; | |
33083 | else if (*q == '.') | |
33084 | last_period = q; | |
33085 | } | |
33086 | ||
33087 | len = strlen (after_last_slash) + strlen (section_desc) + 2; | |
33088 | *buf = (char *) xmalloc (len); | |
33089 | ||
33090 | p = *buf; | |
33091 | *p++ = '_'; | |
33092 | ||
33093 | for (q = after_last_slash; *q; q++) | |
33094 | { | |
33095 | if (q == last_period) | |
33096 | { | |
33097 | strcpy (p, section_desc); | |
33098 | p += strlen (section_desc); | |
33099 | break; | |
33100 | } | |
33101 | ||
33102 | else if (ISALNUM (*q)) | |
33103 | *p++ = *q; | |
33104 | } | |
33105 | ||
33106 | if (last_period == 0) | |
33107 | strcpy (p, section_desc); | |
33108 | else | |
33109 | *p = '\0'; | |
33110 | } | |
33111 | \f | |
33112 | /* Emit profile function. */ | |
33113 | ||
33114 | void | |
33115 | output_profile_hook (int labelno ATTRIBUTE_UNUSED) | |
33116 | { | |
33117 | /* Non-standard profiling for kernels, which just saves LR then calls | |
33118 | _mcount without worrying about arg saves. The idea is to change | |
33119 | the function prologue as little as possible as it isn't easy to | |
33120 | account for arg save/restore code added just for _mcount. */ | |
33121 | if (TARGET_PROFILE_KERNEL) | |
33122 | return; | |
33123 | ||
33124 | if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) | |
33125 | { | |
33126 | #ifndef NO_PROFILE_COUNTERS | |
33127 | # define NO_PROFILE_COUNTERS 0 | |
33128 | #endif | |
33129 | if (NO_PROFILE_COUNTERS) | |
33130 | emit_library_call (init_one_libfunc (RS6000_MCOUNT), | |
db69559b | 33131 | LCT_NORMAL, VOIDmode); |
83349046 SB |
33132 | else |
33133 | { | |
33134 | char buf[30]; | |
33135 | const char *label_name; | |
33136 | rtx fun; | |
33137 | ||
33138 | ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno); | |
33139 | label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf)); | |
33140 | fun = gen_rtx_SYMBOL_REF (Pmode, label_name); | |
33141 | ||
33142 | emit_library_call (init_one_libfunc (RS6000_MCOUNT), | |
db69559b | 33143 | LCT_NORMAL, VOIDmode, fun, Pmode); |
83349046 SB |
33144 | } |
33145 | } | |
33146 | else if (DEFAULT_ABI == ABI_DARWIN) | |
33147 | { | |
33148 | const char *mcount_name = RS6000_MCOUNT; | |
33149 | int caller_addr_regno = LR_REGNO; | |
33150 | ||
33151 | /* Be conservative and always set this, at least for now. */ | |
33152 | crtl->uses_pic_offset_table = 1; | |
33153 | ||
33154 | #if TARGET_MACHO | |
33155 | /* For PIC code, set up a stub and collect the caller's address | |
33156 | from r0, which is where the prologue puts it. */ | |
33157 | if (MACHOPIC_INDIRECT | |
33158 | && crtl->uses_pic_offset_table) | |
33159 | caller_addr_regno = 0; | |
33160 | #endif | |
33161 | emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name), | |
db69559b | 33162 | LCT_NORMAL, VOIDmode, |
83349046 SB |
33163 | gen_rtx_REG (Pmode, caller_addr_regno), Pmode); |
33164 | } | |
33165 | } | |
33166 | ||
33167 | /* Write function profiler code. */ | |
33168 | ||
33169 | void | |
33170 | output_function_profiler (FILE *file, int labelno) | |
33171 | { | |
33172 | char buf[100]; | |
33173 | ||
33174 | switch (DEFAULT_ABI) | |
33175 | { | |
33176 | default: | |
33177 | gcc_unreachable (); | |
33178 | ||
33179 | case ABI_V4: | |
33180 | if (!TARGET_32BIT) | |
33181 | { | |
33182 | warning (0, "no profiling of 64-bit code for this ABI"); | |
33183 | return; | |
33184 | } | |
33185 | ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno); | |
33186 | fprintf (file, "\tmflr %s\n", reg_names[0]); | |
33187 | if (NO_PROFILE_COUNTERS) | |
33188 | { | |
33189 | asm_fprintf (file, "\tstw %s,4(%s)\n", | |
33190 | reg_names[0], reg_names[1]); | |
33191 | } | |
33192 | else if (TARGET_SECURE_PLT && flag_pic) | |
33193 | { | |
33194 | if (TARGET_LINK_STACK) | |
33195 | { | |
33196 | char name[32]; | |
33197 | get_ppc476_thunk_name (name); | |
33198 | asm_fprintf (file, "\tbl %s\n", name); | |
33199 | } | |
33200 | else | |
33201 | asm_fprintf (file, "\tbcl 20,31,1f\n1:\n"); | |
33202 | asm_fprintf (file, "\tstw %s,4(%s)\n", | |
33203 | reg_names[0], reg_names[1]); | |
33204 | asm_fprintf (file, "\tmflr %s\n", reg_names[12]); | |
33205 | asm_fprintf (file, "\taddis %s,%s,", | |
33206 | reg_names[12], reg_names[12]); | |
33207 | assemble_name (file, buf); | |
33208 | asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]); | |
33209 | assemble_name (file, buf); | |
33210 | asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]); | |
33211 | } | |
33212 | else if (flag_pic == 1) | |
33213 | { | |
33214 | fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file); | |
33215 | asm_fprintf (file, "\tstw %s,4(%s)\n", | |
33216 | reg_names[0], reg_names[1]); | |
33217 | asm_fprintf (file, "\tmflr %s\n", reg_names[12]); | |
33218 | asm_fprintf (file, "\tlwz %s,", reg_names[0]); | |
33219 | assemble_name (file, buf); | |
33220 | asm_fprintf (file, "@got(%s)\n", reg_names[12]); | |
33221 | } | |
33222 | else if (flag_pic > 1) | |
33223 | { | |
33224 | asm_fprintf (file, "\tstw %s,4(%s)\n", | |
33225 | reg_names[0], reg_names[1]); | |
33226 | /* Now, we need to get the address of the label. */ | |
33227 | if (TARGET_LINK_STACK) | |
33228 | { | |
33229 | char name[32]; | |
33230 | get_ppc476_thunk_name (name); | |
33231 | asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name); | |
33232 | assemble_name (file, buf); | |
33233 | fputs ("-.\n1:", file); | |
33234 | asm_fprintf (file, "\tmflr %s\n", reg_names[11]); | |
33235 | asm_fprintf (file, "\taddi %s,%s,4\n", | |
33236 | reg_names[11], reg_names[11]); | |
33237 | } | |
33238 | else | |
33239 | { | |
33240 | fputs ("\tbcl 20,31,1f\n\t.long ", file); | |
33241 | assemble_name (file, buf); | |
33242 | fputs ("-.\n1:", file); | |
33243 | asm_fprintf (file, "\tmflr %s\n", reg_names[11]); | |
33244 | } | |
33245 | asm_fprintf (file, "\tlwz %s,0(%s)\n", | |
33246 | reg_names[0], reg_names[11]); | |
33247 | asm_fprintf (file, "\tadd %s,%s,%s\n", | |
33248 | reg_names[0], reg_names[0], reg_names[11]); | |
33249 | } | |
33250 | else | |
33251 | { | |
33252 | asm_fprintf (file, "\tlis %s,", reg_names[12]); | |
33253 | assemble_name (file, buf); | |
33254 | fputs ("@ha\n", file); | |
33255 | asm_fprintf (file, "\tstw %s,4(%s)\n", | |
33256 | reg_names[0], reg_names[1]); | |
33257 | asm_fprintf (file, "\tla %s,", reg_names[0]); | |
33258 | assemble_name (file, buf); | |
33259 | asm_fprintf (file, "@l(%s)\n", reg_names[12]); | |
33260 | } | |
33261 | ||
33262 | /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */ | |
33263 | fprintf (file, "\tbl %s%s\n", | |
33264 | RS6000_MCOUNT, flag_pic ? "@plt" : ""); | |
33265 | break; | |
33266 | ||
33267 | case ABI_AIX: | |
33268 | case ABI_ELFv2: | |
33269 | case ABI_DARWIN: | |
33270 | /* Don't do anything, done in output_profile_hook (). */ | |
33271 | break; | |
33272 | } | |
33273 | } | |
33274 | ||
33275 | \f | |
33276 | ||
33277 | /* The following variable value is the last issued insn. */ | |
33278 | ||
33279 | static rtx_insn *last_scheduled_insn; | |
33280 | ||
33281 | /* The following variable helps to balance issuing of load and | |
33282 | store instructions */ | |
33283 | ||
33284 | static int load_store_pendulum; | |
33285 | ||
33286 | /* The following variable helps pair divide insns during scheduling. */ | |
33287 | static int divide_cnt; | |
33288 | /* The following variable helps pair and alternate vector and vector load | |
33289 | insns during scheduling. */ | |
33290 | static int vec_pairing; | |
33291 | ||
33292 | ||
33293 | /* Power4 load update and store update instructions are cracked into a | |
33294 | load or store and an integer insn which are executed in the same cycle. | |
33295 | Branches have their own dispatch slot which does not count against the | |
33296 | GCC issue rate, but it changes the program flow so there are no other | |
33297 | instructions to issue in this cycle. */ | |
33298 | ||
33299 | static int | |
33300 | rs6000_variable_issue_1 (rtx_insn *insn, int more) | |
33301 | { | |
33302 | last_scheduled_insn = insn; | |
33303 | if (GET_CODE (PATTERN (insn)) == USE | |
33304 | || GET_CODE (PATTERN (insn)) == CLOBBER) | |
33305 | { | |
33306 | cached_can_issue_more = more; | |
33307 | return cached_can_issue_more; | |
33308 | } | |
33309 | ||
33310 | if (insn_terminates_group_p (insn, current_group)) | |
33311 | { | |
33312 | cached_can_issue_more = 0; | |
33313 | return cached_can_issue_more; | |
33314 | } | |
33315 | ||
33316 | /* If no reservation, but reach here */ | |
33317 | if (recog_memoized (insn) < 0) | |
33318 | return more; | |
33319 | ||
33320 | if (rs6000_sched_groups) | |
33321 | { | |
33322 | if (is_microcoded_insn (insn)) | |
33323 | cached_can_issue_more = 0; | |
33324 | else if (is_cracked_insn (insn)) | |
33325 | cached_can_issue_more = more > 2 ? more - 2 : 0; | |
33326 | else | |
33327 | cached_can_issue_more = more - 1; | |
33328 | ||
33329 | return cached_can_issue_more; | |
33330 | } | |
33331 | ||
33332 | if (rs6000_cpu_attr == CPU_CELL && is_nonpipeline_insn (insn)) | |
33333 | return 0; | |
33334 | ||
33335 | cached_can_issue_more = more - 1; | |
33336 | return cached_can_issue_more; | |
33337 | } | |
33338 | ||
33339 | static int | |
33340 | rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more) | |
33341 | { | |
33342 | int r = rs6000_variable_issue_1 (insn, more); | |
33343 | if (verbose) | |
33344 | fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r); | |
33345 | return r; | |
33346 | } | |
33347 | ||
33348 | /* Adjust the cost of a scheduling dependency. Return the new cost of | |
33349 | a dependency LINK or INSN on DEP_INSN. COST is the current cost. */ | |
33350 | ||
33351 | static int | |
33352 | rs6000_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost, | |
33353 | unsigned int) | |
33354 | { | |
33355 | enum attr_type attr_type; | |
33356 | ||
33357 | if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0) | |
33358 | return cost; | |
33359 | ||
33360 | switch (dep_type) | |
33361 | { | |
33362 | case REG_DEP_TRUE: | |
33363 | { | |
33364 | /* Data dependency; DEP_INSN writes a register that INSN reads | |
33365 | some cycles later. */ | |
33366 | ||
33367 | /* Separate a load from a narrower, dependent store. */ | |
33368 | if ((rs6000_sched_groups || rs6000_cpu_attr == CPU_POWER9) | |
33369 | && GET_CODE (PATTERN (insn)) == SET | |
33370 | && GET_CODE (PATTERN (dep_insn)) == SET | |
33371 | && GET_CODE (XEXP (PATTERN (insn), 1)) == MEM | |
33372 | && GET_CODE (XEXP (PATTERN (dep_insn), 0)) == MEM | |
33373 | && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1))) | |
33374 | > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0))))) | |
33375 | return cost + 14; | |
33376 | ||
33377 | attr_type = get_attr_type (insn); | |
33378 | ||
33379 | switch (attr_type) | |
33380 | { | |
33381 | case TYPE_JMPREG: | |
33382 | /* Tell the first scheduling pass about the latency between | |
33383 | a mtctr and bctr (and mtlr and br/blr). The first | |
33384 | scheduling pass will not know about this latency since | |
33385 | the mtctr instruction, which has the latency associated | |
33386 | to it, will be generated by reload. */ | |
33387 | return 4; | |
33388 | case TYPE_BRANCH: | |
33389 | /* Leave some extra cycles between a compare and its | |
33390 | dependent branch, to inhibit expensive mispredicts. */ | |
33391 | if ((rs6000_cpu_attr == CPU_PPC603 | |
33392 | || rs6000_cpu_attr == CPU_PPC604 | |
33393 | || rs6000_cpu_attr == CPU_PPC604E | |
33394 | || rs6000_cpu_attr == CPU_PPC620 | |
33395 | || rs6000_cpu_attr == CPU_PPC630 | |
33396 | || rs6000_cpu_attr == CPU_PPC750 | |
33397 | || rs6000_cpu_attr == CPU_PPC7400 | |
33398 | || rs6000_cpu_attr == CPU_PPC7450 | |
33399 | || rs6000_cpu_attr == CPU_PPCE5500 | |
33400 | || rs6000_cpu_attr == CPU_PPCE6500 | |
33401 | || rs6000_cpu_attr == CPU_POWER4 | |
33402 | || rs6000_cpu_attr == CPU_POWER5 | |
33403 | || rs6000_cpu_attr == CPU_POWER7 | |
33404 | || rs6000_cpu_attr == CPU_POWER8 | |
33405 | || rs6000_cpu_attr == CPU_POWER9 | |
33406 | || rs6000_cpu_attr == CPU_CELL) | |
33407 | && recog_memoized (dep_insn) | |
33408 | && (INSN_CODE (dep_insn) >= 0)) | |
33409 | ||
33410 | switch (get_attr_type (dep_insn)) | |
33411 | { | |
33412 | case TYPE_CMP: | |
33413 | case TYPE_FPCOMPARE: | |
33414 | case TYPE_CR_LOGICAL: | |
33415 | case TYPE_DELAYED_CR: | |
33416 | return cost + 2; | |
33417 | case TYPE_EXTS: | |
33418 | case TYPE_MUL: | |
33419 | if (get_attr_dot (dep_insn) == DOT_YES) | |
33420 | return cost + 2; | |
33421 | else | |
33422 | break; | |
33423 | case TYPE_SHIFT: | |
33424 | if (get_attr_dot (dep_insn) == DOT_YES | |
33425 | && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO) | |
33426 | return cost + 2; | |
33427 | else | |
33428 | break; | |
33429 | default: | |
33430 | break; | |
33431 | } | |
33432 | break; | |
33433 | ||
33434 | case TYPE_STORE: | |
33435 | case TYPE_FPSTORE: | |
33436 | if ((rs6000_cpu == PROCESSOR_POWER6) | |
33437 | && recog_memoized (dep_insn) | |
33438 | && (INSN_CODE (dep_insn) >= 0)) | |
33439 | { | |
33440 | ||
33441 | if (GET_CODE (PATTERN (insn)) != SET) | |
33442 | /* If this happens, we have to extend this to schedule | |
33443 | optimally. Return default for now. */ | |
33444 | return cost; | |
33445 | ||
33446 | /* Adjust the cost for the case where the value written | |
33447 | by a fixed point operation is used as the address | |
33448 | gen value on a store. */ | |
33449 | switch (get_attr_type (dep_insn)) | |
33450 | { | |
33451 | case TYPE_LOAD: | |
33452 | case TYPE_CNTLZ: | |
33453 | { | |
33454 | if (! rs6000_store_data_bypass_p (dep_insn, insn)) | |
33455 | return get_attr_sign_extend (dep_insn) | |
33456 | == SIGN_EXTEND_YES ? 6 : 4; | |
33457 | break; | |
33458 | } | |
33459 | case TYPE_SHIFT: | |
33460 | { | |
33461 | if (! rs6000_store_data_bypass_p (dep_insn, insn)) | |
33462 | return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ? | |
33463 | 6 : 3; | |
33464 | break; | |
33465 | } | |
33466 | case TYPE_INTEGER: | |
33467 | case TYPE_ADD: | |
33468 | case TYPE_LOGICAL: | |
33469 | case TYPE_EXTS: | |
33470 | case TYPE_INSERT: | |
33471 | { | |
33472 | if (! rs6000_store_data_bypass_p (dep_insn, insn)) | |
33473 | return 3; | |
33474 | break; | |
33475 | } | |
33476 | case TYPE_STORE: | |
33477 | case TYPE_FPLOAD: | |
33478 | case TYPE_FPSTORE: | |
33479 | { | |
33480 | if (get_attr_update (dep_insn) == UPDATE_YES | |
33481 | && ! rs6000_store_data_bypass_p (dep_insn, insn)) | |
33482 | return 3; | |
33483 | break; | |
33484 | } | |
33485 | case TYPE_MUL: | |
33486 | { | |
33487 | if (! rs6000_store_data_bypass_p (dep_insn, insn)) | |
33488 | return 17; | |
33489 | break; | |
33490 | } | |
33491 | case TYPE_DIV: | |
33492 | { | |
33493 | if (! rs6000_store_data_bypass_p (dep_insn, insn)) | |
33494 | return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57; | |
33495 | break; | |
33496 | } | |
33497 | default: | |
33498 | break; | |
33499 | } | |
33500 | } | |
33501 | break; | |
33502 | ||
33503 | case TYPE_LOAD: | |
33504 | if ((rs6000_cpu == PROCESSOR_POWER6) | |
33505 | && recog_memoized (dep_insn) | |
33506 | && (INSN_CODE (dep_insn) >= 0)) | |
33507 | { | |
33508 | ||
33509 | /* Adjust the cost for the case where the value written | |
33510 | by a fixed point instruction is used within the address | |
33511 | gen portion of a subsequent load(u)(x) */ | |
33512 | switch (get_attr_type (dep_insn)) | |
33513 | { | |
33514 | case TYPE_LOAD: | |
33515 | case TYPE_CNTLZ: | |
33516 | { | |
33517 | if (set_to_load_agen (dep_insn, insn)) | |
33518 | return get_attr_sign_extend (dep_insn) | |
33519 | == SIGN_EXTEND_YES ? 6 : 4; | |
33520 | break; | |
33521 | } | |
33522 | case TYPE_SHIFT: | |
33523 | { | |
33524 | if (set_to_load_agen (dep_insn, insn)) | |
33525 | return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ? | |
33526 | 6 : 3; | |
33527 | break; | |
33528 | } | |
33529 | case TYPE_INTEGER: | |
33530 | case TYPE_ADD: | |
33531 | case TYPE_LOGICAL: | |
33532 | case TYPE_EXTS: | |
33533 | case TYPE_INSERT: | |
33534 | { | |
33535 | if (set_to_load_agen (dep_insn, insn)) | |
33536 | return 3; | |
33537 | break; | |
33538 | } | |
33539 | case TYPE_STORE: | |
33540 | case TYPE_FPLOAD: | |
33541 | case TYPE_FPSTORE: | |
33542 | { | |
33543 | if (get_attr_update (dep_insn) == UPDATE_YES | |
33544 | && set_to_load_agen (dep_insn, insn)) | |
33545 | return 3; | |
33546 | break; | |
33547 | } | |
33548 | case TYPE_MUL: | |
33549 | { | |
33550 | if (set_to_load_agen (dep_insn, insn)) | |
33551 | return 17; | |
33552 | break; | |
33553 | } | |
33554 | case TYPE_DIV: | |
33555 | { | |
33556 | if (set_to_load_agen (dep_insn, insn)) | |
33557 | return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57; | |
33558 | break; | |
33559 | } | |
33560 | default: | |
33561 | break; | |
33562 | } | |
33563 | } | |
33564 | break; | |
33565 | ||
33566 | case TYPE_FPLOAD: | |
33567 | if ((rs6000_cpu == PROCESSOR_POWER6) | |
33568 | && get_attr_update (insn) == UPDATE_NO | |
33569 | && recog_memoized (dep_insn) | |
33570 | && (INSN_CODE (dep_insn) >= 0) | |
33571 | && (get_attr_type (dep_insn) == TYPE_MFFGPR)) | |
33572 | return 2; | |
33573 | ||
33574 | default: | |
33575 | break; | |
33576 | } | |
33577 | ||
33578 | /* Fall out to return default cost. */ | |
33579 | } | |
33580 | break; | |
33581 | ||
33582 | case REG_DEP_OUTPUT: | |
33583 | /* Output dependency; DEP_INSN writes a register that INSN writes some | |
33584 | cycles later. */ | |
33585 | if ((rs6000_cpu == PROCESSOR_POWER6) | |
33586 | && recog_memoized (dep_insn) | |
33587 | && (INSN_CODE (dep_insn) >= 0)) | |
33588 | { | |
33589 | attr_type = get_attr_type (insn); | |
33590 | ||
33591 | switch (attr_type) | |
33592 | { | |
33593 | case TYPE_FP: | |
33594 | case TYPE_FPSIMPLE: | |
33595 | if (get_attr_type (dep_insn) == TYPE_FP | |
33596 | || get_attr_type (dep_insn) == TYPE_FPSIMPLE) | |
33597 | return 1; | |
33598 | break; | |
33599 | case TYPE_FPLOAD: | |
33600 | if (get_attr_update (insn) == UPDATE_NO | |
33601 | && get_attr_type (dep_insn) == TYPE_MFFGPR) | |
33602 | return 2; | |
33603 | break; | |
33604 | default: | |
33605 | break; | |
33606 | } | |
33607 | } | |
33608 | /* Fall through, no cost for output dependency. */ | |
33609 | /* FALLTHRU */ | |
33610 | ||
33611 | case REG_DEP_ANTI: | |
33612 | /* Anti dependency; DEP_INSN reads a register that INSN writes some | |
33613 | cycles later. */ | |
33614 | return 0; | |
33615 | ||
33616 | default: | |
33617 | gcc_unreachable (); | |
33618 | } | |
33619 | ||
33620 | return cost; | |
33621 | } | |
33622 | ||
33623 | /* Debug version of rs6000_adjust_cost. */ | |
33624 | ||
33625 | static int | |
33626 | rs6000_debug_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, | |
33627 | int cost, unsigned int dw) | |
33628 | { | |
33629 | int ret = rs6000_adjust_cost (insn, dep_type, dep_insn, cost, dw); | |
33630 | ||
33631 | if (ret != cost) | |
33632 | { | |
33633 | const char *dep; | |
33634 | ||
33635 | switch (dep_type) | |
33636 | { | |
33637 | default: dep = "unknown depencency"; break; | |
33638 | case REG_DEP_TRUE: dep = "data dependency"; break; | |
33639 | case REG_DEP_OUTPUT: dep = "output dependency"; break; | |
33640 | case REG_DEP_ANTI: dep = "anti depencency"; break; | |
33641 | } | |
33642 | ||
33643 | fprintf (stderr, | |
33644 | "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, " | |
33645 | "%s, insn:\n", ret, cost, dep); | |
33646 | ||
33647 | debug_rtx (insn); | |
33648 | } | |
33649 | ||
33650 | return ret; | |
33651 | } | |
33652 | ||
33653 | /* The function returns a true if INSN is microcoded. | |
33654 | Return false otherwise. */ | |
33655 | ||
33656 | static bool | |
33657 | is_microcoded_insn (rtx_insn *insn) | |
33658 | { | |
33659 | if (!insn || !NONDEBUG_INSN_P (insn) | |
33660 | || GET_CODE (PATTERN (insn)) == USE | |
33661 | || GET_CODE (PATTERN (insn)) == CLOBBER) | |
33662 | return false; | |
33663 | ||
33664 | if (rs6000_cpu_attr == CPU_CELL) | |
33665 | return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS; | |
33666 | ||
33667 | if (rs6000_sched_groups | |
33668 | && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5)) | |
33669 | { | |
33670 | enum attr_type type = get_attr_type (insn); | |
33671 | if ((type == TYPE_LOAD | |
33672 | && get_attr_update (insn) == UPDATE_YES | |
33673 | && get_attr_sign_extend (insn) == SIGN_EXTEND_YES) | |
33674 | || ((type == TYPE_LOAD || type == TYPE_STORE) | |
33675 | && get_attr_update (insn) == UPDATE_YES | |
33676 | && get_attr_indexed (insn) == INDEXED_YES) | |
33677 | || type == TYPE_MFCR) | |
33678 | return true; | |
33679 | } | |
33680 | ||
33681 | return false; | |
33682 | } | |
33683 | ||
33684 | /* The function returns true if INSN is cracked into 2 instructions | |
33685 | by the processor (and therefore occupies 2 issue slots). */ | |
33686 | ||
33687 | static bool | |
33688 | is_cracked_insn (rtx_insn *insn) | |
33689 | { | |
33690 | if (!insn || !NONDEBUG_INSN_P (insn) | |
33691 | || GET_CODE (PATTERN (insn)) == USE | |
33692 | || GET_CODE (PATTERN (insn)) == CLOBBER) | |
33693 | return false; | |
33694 | ||
33695 | if (rs6000_sched_groups | |
33696 | && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5)) | |
33697 | { | |
33698 | enum attr_type type = get_attr_type (insn); | |
33699 | if ((type == TYPE_LOAD | |
33700 | && get_attr_sign_extend (insn) == SIGN_EXTEND_YES | |
33701 | && get_attr_update (insn) == UPDATE_NO) | |
33702 | || (type == TYPE_LOAD | |
33703 | && get_attr_sign_extend (insn) == SIGN_EXTEND_NO | |
33704 | && get_attr_update (insn) == UPDATE_YES | |
33705 | && get_attr_indexed (insn) == INDEXED_NO) | |
33706 | || (type == TYPE_STORE | |
33707 | && get_attr_update (insn) == UPDATE_YES | |
33708 | && get_attr_indexed (insn) == INDEXED_NO) | |
33709 | || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE) | |
33710 | && get_attr_update (insn) == UPDATE_YES) | |
33711 | || type == TYPE_DELAYED_CR | |
33712 | || (type == TYPE_EXTS | |
33713 | && get_attr_dot (insn) == DOT_YES) | |
33714 | || (type == TYPE_SHIFT | |
33715 | && get_attr_dot (insn) == DOT_YES | |
33716 | && get_attr_var_shift (insn) == VAR_SHIFT_NO) | |
33717 | || (type == TYPE_MUL | |
33718 | && get_attr_dot (insn) == DOT_YES) | |
33719 | || type == TYPE_DIV | |
33720 | || (type == TYPE_INSERT | |
33721 | && get_attr_size (insn) == SIZE_32)) | |
33722 | return true; | |
33723 | } | |
33724 | ||
33725 | return false; | |
33726 | } | |
33727 | ||
33728 | /* The function returns true if INSN can be issued only from | |
33729 | the branch slot. */ | |
33730 | ||
33731 | static bool | |
33732 | is_branch_slot_insn (rtx_insn *insn) | |
33733 | { | |
33734 | if (!insn || !NONDEBUG_INSN_P (insn) | |
33735 | || GET_CODE (PATTERN (insn)) == USE | |
33736 | || GET_CODE (PATTERN (insn)) == CLOBBER) | |
33737 | return false; | |
33738 | ||
33739 | if (rs6000_sched_groups) | |
33740 | { | |
33741 | enum attr_type type = get_attr_type (insn); | |
33742 | if (type == TYPE_BRANCH || type == TYPE_JMPREG) | |
33743 | return true; | |
33744 | return false; | |
33745 | } | |
33746 | ||
33747 | return false; | |
33748 | } | |
33749 | ||
33750 | /* The function returns true if out_inst sets a value that is | |
33751 | used in the address generation computation of in_insn */ | |
33752 | static bool | |
33753 | set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn) | |
33754 | { | |
33755 | rtx out_set, in_set; | |
33756 | ||
33757 | /* For performance reasons, only handle the simple case where | |
33758 | both loads are a single_set. */ | |
33759 | out_set = single_set (out_insn); | |
33760 | if (out_set) | |
33761 | { | |
33762 | in_set = single_set (in_insn); | |
33763 | if (in_set) | |
33764 | return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set)); | |
33765 | } | |
33766 | ||
33767 | return false; | |
33768 | } | |
33769 | ||
33770 | /* Try to determine base/offset/size parts of the given MEM. | |
33771 | Return true if successful, false if all the values couldn't | |
33772 | be determined. | |
33773 | ||
33774 | This function only looks for REG or REG+CONST address forms. | |
33775 | REG+REG address form will return false. */ | |
33776 | ||
33777 | static bool | |
33778 | get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset, | |
33779 | HOST_WIDE_INT *size) | |
33780 | { | |
33781 | rtx addr_rtx; | |
33782 | if MEM_SIZE_KNOWN_P (mem) | |
33783 | *size = MEM_SIZE (mem); | |
33784 | else | |
33785 | return false; | |
33786 | ||
33787 | addr_rtx = (XEXP (mem, 0)); | |
33788 | if (GET_CODE (addr_rtx) == PRE_MODIFY) | |
33789 | addr_rtx = XEXP (addr_rtx, 1); | |
33790 | ||
33791 | *offset = 0; | |
33792 | while (GET_CODE (addr_rtx) == PLUS | |
33793 | && CONST_INT_P (XEXP (addr_rtx, 1))) | |
33794 | { | |
33795 | *offset += INTVAL (XEXP (addr_rtx, 1)); | |
33796 | addr_rtx = XEXP (addr_rtx, 0); | |
33797 | } | |
33798 | if (!REG_P (addr_rtx)) | |
33799 | return false; | |
33800 | ||
33801 | *base = addr_rtx; | |
33802 | return true; | |
33803 | } | |
33804 | ||
33805 | /* The function returns true if the target storage location of | |
33806 | mem1 is adjacent to the target storage location of mem2 */ | |
33807 | /* Return 1 if memory locations are adjacent. */ | |
33808 | ||
33809 | static bool | |
33810 | adjacent_mem_locations (rtx mem1, rtx mem2) | |
33811 | { | |
33812 | rtx reg1, reg2; | |
33813 | HOST_WIDE_INT off1, size1, off2, size2; | |
33814 | ||
33815 | if (get_memref_parts (mem1, ®1, &off1, &size1) | |
33816 | && get_memref_parts (mem2, ®2, &off2, &size2)) | |
33817 | return ((REGNO (reg1) == REGNO (reg2)) | |
33818 | && ((off1 + size1 == off2) | |
33819 | || (off2 + size2 == off1))); | |
33820 | ||
33821 | return false; | |
33822 | } | |
33823 | ||
33824 | /* This function returns true if it can be determined that the two MEM | |
33825 | locations overlap by at least 1 byte based on base reg/offset/size. */ | |
33826 | ||
33827 | static bool | |
33828 | mem_locations_overlap (rtx mem1, rtx mem2) | |
33829 | { | |
33830 | rtx reg1, reg2; | |
33831 | HOST_WIDE_INT off1, size1, off2, size2; | |
33832 | ||
33833 | if (get_memref_parts (mem1, ®1, &off1, &size1) | |
33834 | && get_memref_parts (mem2, ®2, &off2, &size2)) | |
33835 | return ((REGNO (reg1) == REGNO (reg2)) | |
33836 | && (((off1 <= off2) && (off1 + size1 > off2)) | |
33837 | || ((off2 <= off1) && (off2 + size2 > off1)))); | |
33838 | ||
33839 | return false; | |
33840 | } | |
33841 | ||
33842 | /* A C statement (sans semicolon) to update the integer scheduling | |
33843 | priority INSN_PRIORITY (INSN). Increase the priority to execute the | |
33844 | INSN earlier, reduce the priority to execute INSN later. Do not | |
33845 | define this macro if you do not need to adjust the scheduling | |
33846 | priorities of insns. */ | |
33847 | ||
33848 | static int | |
33849 | rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority) | |
33850 | { | |
33851 | rtx load_mem, str_mem; | |
33852 | /* On machines (like the 750) which have asymmetric integer units, | |
33853 | where one integer unit can do multiply and divides and the other | |
33854 | can't, reduce the priority of multiply/divide so it is scheduled | |
33855 | before other integer operations. */ | |
33856 | ||
33857 | #if 0 | |
33858 | if (! INSN_P (insn)) | |
33859 | return priority; | |
33860 | ||
33861 | if (GET_CODE (PATTERN (insn)) == USE) | |
33862 | return priority; | |
33863 | ||
33864 | switch (rs6000_cpu_attr) { | |
33865 | case CPU_PPC750: | |
33866 | switch (get_attr_type (insn)) | |
33867 | { | |
33868 | default: | |
33869 | break; | |
33870 | ||
33871 | case TYPE_MUL: | |
33872 | case TYPE_DIV: | |
33873 | fprintf (stderr, "priority was %#x (%d) before adjustment\n", | |
33874 | priority, priority); | |
33875 | if (priority >= 0 && priority < 0x01000000) | |
33876 | priority >>= 3; | |
33877 | break; | |
33878 | } | |
33879 | } | |
33880 | #endif | |
33881 | ||
33882 | if (insn_must_be_first_in_group (insn) | |
33883 | && reload_completed | |
33884 | && current_sched_info->sched_max_insns_priority | |
33885 | && rs6000_sched_restricted_insns_priority) | |
33886 | { | |
33887 | ||
33888 | /* Prioritize insns that can be dispatched only in the first | |
33889 | dispatch slot. */ | |
33890 | if (rs6000_sched_restricted_insns_priority == 1) | |
33891 | /* Attach highest priority to insn. This means that in | |
33892 | haifa-sched.c:ready_sort(), dispatch-slot restriction considerations | |
33893 | precede 'priority' (critical path) considerations. */ | |
33894 | return current_sched_info->sched_max_insns_priority; | |
33895 | else if (rs6000_sched_restricted_insns_priority == 2) | |
33896 | /* Increase priority of insn by a minimal amount. This means that in | |
33897 | haifa-sched.c:ready_sort(), only 'priority' (critical path) | |
33898 | considerations precede dispatch-slot restriction considerations. */ | |
33899 | return (priority + 1); | |
33900 | } | |
33901 | ||
33902 | if (rs6000_cpu == PROCESSOR_POWER6 | |
33903 | && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem)) | |
33904 | || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem)))) | |
33905 | /* Attach highest priority to insn if the scheduler has just issued two | |
33906 | stores and this instruction is a load, or two loads and this instruction | |
33907 | is a store. Power6 wants loads and stores scheduled alternately | |
33908 | when possible */ | |
33909 | return current_sched_info->sched_max_insns_priority; | |
33910 | ||
33911 | return priority; | |
33912 | } | |
33913 | ||
33914 | /* Return true if the instruction is nonpipelined on the Cell. */ | |
33915 | static bool | |
33916 | is_nonpipeline_insn (rtx_insn *insn) | |
33917 | { | |
33918 | enum attr_type type; | |
33919 | if (!insn || !NONDEBUG_INSN_P (insn) | |
33920 | || GET_CODE (PATTERN (insn)) == USE | |
33921 | || GET_CODE (PATTERN (insn)) == CLOBBER) | |
33922 | return false; | |
33923 | ||
33924 | type = get_attr_type (insn); | |
33925 | if (type == TYPE_MUL | |
33926 | || type == TYPE_DIV | |
33927 | || type == TYPE_SDIV | |
33928 | || type == TYPE_DDIV | |
33929 | || type == TYPE_SSQRT | |
33930 | || type == TYPE_DSQRT | |
33931 | || type == TYPE_MFCR | |
33932 | || type == TYPE_MFCRF | |
33933 | || type == TYPE_MFJMPR) | |
33934 | { | |
33935 | return true; | |
33936 | } | |
33937 | return false; | |
33938 | } | |
33939 | ||
33940 | ||
33941 | /* Return how many instructions the machine can issue per cycle. */ | |
33942 | ||
33943 | static int | |
33944 | rs6000_issue_rate (void) | |
33945 | { | |
33946 | /* Unless scheduling for register pressure, use issue rate of 1 for | |
33947 | first scheduling pass to decrease degradation. */ | |
33948 | if (!reload_completed && !flag_sched_pressure) | |
33949 | return 1; | |
33950 | ||
33951 | switch (rs6000_cpu_attr) { | |
33952 | case CPU_RS64A: | |
33953 | case CPU_PPC601: /* ? */ | |
33954 | case CPU_PPC7450: | |
33955 | return 3; | |
33956 | case CPU_PPC440: | |
33957 | case CPU_PPC603: | |
33958 | case CPU_PPC750: | |
33959 | case CPU_PPC7400: | |
33960 | case CPU_PPC8540: | |
33961 | case CPU_PPC8548: | |
33962 | case CPU_CELL: | |
33963 | case CPU_PPCE300C2: | |
33964 | case CPU_PPCE300C3: | |
33965 | case CPU_PPCE500MC: | |
33966 | case CPU_PPCE500MC64: | |
33967 | case CPU_PPCE5500: | |
33968 | case CPU_PPCE6500: | |
33969 | case CPU_TITAN: | |
33970 | return 2; | |
33971 | case CPU_PPC476: | |
33972 | case CPU_PPC604: | |
33973 | case CPU_PPC604E: | |
33974 | case CPU_PPC620: | |
33975 | case CPU_PPC630: | |
33976 | return 4; | |
33977 | case CPU_POWER4: | |
33978 | case CPU_POWER5: | |
33979 | case CPU_POWER6: | |
33980 | case CPU_POWER7: | |
33981 | return 5; | |
33982 | case CPU_POWER8: | |
33983 | return 7; | |
33984 | case CPU_POWER9: | |
33985 | return 6; | |
33986 | default: | |
33987 | return 1; | |
33988 | } | |
33989 | } | |
33990 | ||
33991 | /* Return how many instructions to look ahead for better insn | |
33992 | scheduling. */ | |
33993 | ||
33994 | static int | |
33995 | rs6000_use_sched_lookahead (void) | |
33996 | { | |
33997 | switch (rs6000_cpu_attr) | |
33998 | { | |
33999 | case CPU_PPC8540: | |
34000 | case CPU_PPC8548: | |
34001 | return 4; | |
34002 | ||
34003 | case CPU_CELL: | |
34004 | return (reload_completed ? 8 : 0); | |
34005 | ||
34006 | default: | |
34007 | return 0; | |
34008 | } | |
34009 | } | |
34010 | ||
34011 | /* We are choosing insn from the ready queue. Return zero if INSN can be | |
34012 | chosen. */ | |
34013 | static int | |
34014 | rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index) | |
34015 | { | |
34016 | if (ready_index == 0) | |
34017 | return 0; | |
34018 | ||
34019 | if (rs6000_cpu_attr != CPU_CELL) | |
34020 | return 0; | |
34021 | ||
34022 | gcc_assert (insn != NULL_RTX && INSN_P (insn)); | |
34023 | ||
34024 | if (!reload_completed | |
34025 | || is_nonpipeline_insn (insn) | |
34026 | || is_microcoded_insn (insn)) | |
34027 | return 1; | |
34028 | ||
34029 | return 0; | |
34030 | } | |
34031 | ||
34032 | /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx | |
34033 | and return true. */ | |
34034 | ||
34035 | static bool | |
34036 | find_mem_ref (rtx pat, rtx *mem_ref) | |
34037 | { | |
34038 | const char * fmt; | |
34039 | int i, j; | |
34040 | ||
34041 | /* stack_tie does not produce any real memory traffic. */ | |
34042 | if (tie_operand (pat, VOIDmode)) | |
34043 | return false; | |
34044 | ||
34045 | if (GET_CODE (pat) == MEM) | |
34046 | { | |
34047 | *mem_ref = pat; | |
34048 | return true; | |
34049 | } | |
34050 | ||
34051 | /* Recursively process the pattern. */ | |
34052 | fmt = GET_RTX_FORMAT (GET_CODE (pat)); | |
34053 | ||
34054 | for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--) | |
34055 | { | |
34056 | if (fmt[i] == 'e') | |
34057 | { | |
34058 | if (find_mem_ref (XEXP (pat, i), mem_ref)) | |
34059 | return true; | |
34060 | } | |
34061 | else if (fmt[i] == 'E') | |
34062 | for (j = XVECLEN (pat, i) - 1; j >= 0; j--) | |
34063 | { | |
34064 | if (find_mem_ref (XVECEXP (pat, i, j), mem_ref)) | |
34065 | return true; | |
34066 | } | |
34067 | } | |
34068 | ||
34069 | return false; | |
34070 | } | |
34071 | ||
34072 | /* Determine if PAT is a PATTERN of a load insn. */ | |
34073 | ||
34074 | static bool | |
34075 | is_load_insn1 (rtx pat, rtx *load_mem) | |
34076 | { | |
34077 | if (!pat || pat == NULL_RTX) | |
34078 | return false; | |
34079 | ||
34080 | if (GET_CODE (pat) == SET) | |
34081 | return find_mem_ref (SET_SRC (pat), load_mem); | |
34082 | ||
34083 | if (GET_CODE (pat) == PARALLEL) | |
34084 | { | |
34085 | int i; | |
34086 | ||
34087 | for (i = 0; i < XVECLEN (pat, 0); i++) | |
34088 | if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem)) | |
34089 | return true; | |
34090 | } | |
34091 | ||
34092 | return false; | |
34093 | } | |
34094 | ||
34095 | /* Determine if INSN loads from memory. */ | |
34096 | ||
34097 | static bool | |
34098 | is_load_insn (rtx insn, rtx *load_mem) | |
34099 | { | |
34100 | if (!insn || !INSN_P (insn)) | |
34101 | return false; | |
34102 | ||
34103 | if (CALL_P (insn)) | |
34104 | return false; | |
34105 | ||
34106 | return is_load_insn1 (PATTERN (insn), load_mem); | |
34107 | } | |
34108 | ||
34109 | /* Determine if PAT is a PATTERN of a store insn. */ | |
34110 | ||
34111 | static bool | |
34112 | is_store_insn1 (rtx pat, rtx *str_mem) | |
34113 | { | |
34114 | if (!pat || pat == NULL_RTX) | |
34115 | return false; | |
34116 | ||
34117 | if (GET_CODE (pat) == SET) | |
34118 | return find_mem_ref (SET_DEST (pat), str_mem); | |
34119 | ||
34120 | if (GET_CODE (pat) == PARALLEL) | |
34121 | { | |
34122 | int i; | |
34123 | ||
34124 | for (i = 0; i < XVECLEN (pat, 0); i++) | |
34125 | if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem)) | |
34126 | return true; | |
34127 | } | |
34128 | ||
34129 | return false; | |
34130 | } | |
34131 | ||
34132 | /* Determine if INSN stores to memory. */ | |
34133 | ||
34134 | static bool | |
34135 | is_store_insn (rtx insn, rtx *str_mem) | |
34136 | { | |
34137 | if (!insn || !INSN_P (insn)) | |
34138 | return false; | |
34139 | ||
34140 | return is_store_insn1 (PATTERN (insn), str_mem); | |
34141 | } | |
34142 | ||
34143 | /* Return whether TYPE is a Power9 pairable vector instruction type. */ | |
34144 | ||
34145 | static bool | |
34146 | is_power9_pairable_vec_type (enum attr_type type) | |
34147 | { | |
34148 | switch (type) | |
34149 | { | |
34150 | case TYPE_VECSIMPLE: | |
34151 | case TYPE_VECCOMPLEX: | |
34152 | case TYPE_VECDIV: | |
34153 | case TYPE_VECCMP: | |
34154 | case TYPE_VECPERM: | |
34155 | case TYPE_VECFLOAT: | |
34156 | case TYPE_VECFDIV: | |
34157 | case TYPE_VECDOUBLE: | |
34158 | return true; | |
34159 | default: | |
34160 | break; | |
34161 | } | |
34162 | return false; | |
34163 | } | |
34164 | ||
34165 | /* Returns whether the dependence between INSN and NEXT is considered | |
34166 | costly by the given target. */ | |
34167 | ||
34168 | static bool | |
34169 | rs6000_is_costly_dependence (dep_t dep, int cost, int distance) | |
34170 | { | |
34171 | rtx insn; | |
34172 | rtx next; | |
34173 | rtx load_mem, str_mem; | |
34174 | ||
34175 | /* If the flag is not enabled - no dependence is considered costly; | |
34176 | allow all dependent insns in the same group. | |
34177 | This is the most aggressive option. */ | |
34178 | if (rs6000_sched_costly_dep == no_dep_costly) | |
34179 | return false; | |
34180 | ||
34181 | /* If the flag is set to 1 - a dependence is always considered costly; | |
34182 | do not allow dependent instructions in the same group. | |
34183 | This is the most conservative option. */ | |
34184 | if (rs6000_sched_costly_dep == all_deps_costly) | |
34185 | return true; | |
34186 | ||
34187 | insn = DEP_PRO (dep); | |
34188 | next = DEP_CON (dep); | |
34189 | ||
34190 | if (rs6000_sched_costly_dep == store_to_load_dep_costly | |
34191 | && is_load_insn (next, &load_mem) | |
34192 | && is_store_insn (insn, &str_mem)) | |
34193 | /* Prevent load after store in the same group. */ | |
34194 | return true; | |
34195 | ||
34196 | if (rs6000_sched_costly_dep == true_store_to_load_dep_costly | |
34197 | && is_load_insn (next, &load_mem) | |
34198 | && is_store_insn (insn, &str_mem) | |
34199 | && DEP_TYPE (dep) == REG_DEP_TRUE | |
34200 | && mem_locations_overlap(str_mem, load_mem)) | |
34201 | /* Prevent load after store in the same group if it is a true | |
34202 | dependence. */ | |
34203 | return true; | |
34204 | ||
34205 | /* The flag is set to X; dependences with latency >= X are considered costly, | |
34206 | and will not be scheduled in the same group. */ | |
34207 | if (rs6000_sched_costly_dep <= max_dep_latency | |
34208 | && ((cost - distance) >= (int)rs6000_sched_costly_dep)) | |
34209 | return true; | |
34210 | ||
34211 | return false; | |
34212 | } | |
34213 | ||
34214 | /* Return the next insn after INSN that is found before TAIL is reached, | |
34215 | skipping any "non-active" insns - insns that will not actually occupy | |
34216 | an issue slot. Return NULL_RTX if such an insn is not found. */ | |
34217 | ||
34218 | static rtx_insn * | |
34219 | get_next_active_insn (rtx_insn *insn, rtx_insn *tail) | |
34220 | { | |
34221 | if (insn == NULL_RTX || insn == tail) | |
34222 | return NULL; | |
34223 | ||
34224 | while (1) | |
34225 | { | |
34226 | insn = NEXT_INSN (insn); | |
34227 | if (insn == NULL_RTX || insn == tail) | |
34228 | return NULL; | |
34229 | ||
34230 | if (CALL_P (insn) | |
34231 | || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn) | |
34232 | || (NONJUMP_INSN_P (insn) | |
34233 | && GET_CODE (PATTERN (insn)) != USE | |
34234 | && GET_CODE (PATTERN (insn)) != CLOBBER | |
34235 | && INSN_CODE (insn) != CODE_FOR_stack_tie)) | |
34236 | break; | |
34237 | } | |
34238 | return insn; | |
34239 | } | |
34240 | ||
34241 | /* Do Power9 specific sched_reorder2 reordering of ready list. */ | |
34242 | ||
34243 | static int | |
34244 | power9_sched_reorder2 (rtx_insn **ready, int lastpos) | |
34245 | { | |
34246 | int pos; | |
34247 | int i; | |
34248 | rtx_insn *tmp; | |
34249 | enum attr_type type, type2; | |
34250 | ||
34251 | type = get_attr_type (last_scheduled_insn); | |
34252 | ||
34253 | /* Try to issue fixed point divides back-to-back in pairs so they will be | |
34254 | routed to separate execution units and execute in parallel. */ | |
34255 | if (type == TYPE_DIV && divide_cnt == 0) | |
34256 | { | |
34257 | /* First divide has been scheduled. */ | |
34258 | divide_cnt = 1; | |
34259 | ||
34260 | /* Scan the ready list looking for another divide, if found move it | |
34261 | to the end of the list so it is chosen next. */ | |
34262 | pos = lastpos; | |
34263 | while (pos >= 0) | |
34264 | { | |
34265 | if (recog_memoized (ready[pos]) >= 0 | |
34266 | && get_attr_type (ready[pos]) == TYPE_DIV) | |
34267 | { | |
34268 | tmp = ready[pos]; | |
34269 | for (i = pos; i < lastpos; i++) | |
34270 | ready[i] = ready[i + 1]; | |
34271 | ready[lastpos] = tmp; | |
34272 | break; | |
34273 | } | |
34274 | pos--; | |
34275 | } | |
34276 | } | |
34277 | else | |
34278 | { | |
34279 | /* Last insn was the 2nd divide or not a divide, reset the counter. */ | |
34280 | divide_cnt = 0; | |
34281 | ||
34282 | /* The best dispatch throughput for vector and vector load insns can be | |
34283 | achieved by interleaving a vector and vector load such that they'll | |
34284 | dispatch to the same superslice. If this pairing cannot be achieved | |
34285 | then it is best to pair vector insns together and vector load insns | |
34286 | together. | |
34287 | ||
34288 | To aid in this pairing, vec_pairing maintains the current state with | |
34289 | the following values: | |
34290 | ||
34291 | 0 : Initial state, no vecload/vector pairing has been started. | |
34292 | ||
34293 | 1 : A vecload or vector insn has been issued and a candidate for | |
34294 | pairing has been found and moved to the end of the ready | |
34295 | list. */ | |
34296 | if (type == TYPE_VECLOAD) | |
34297 | { | |
34298 | /* Issued a vecload. */ | |
34299 | if (vec_pairing == 0) | |
34300 | { | |
34301 | int vecload_pos = -1; | |
34302 | /* We issued a single vecload, look for a vector insn to pair it | |
34303 | with. If one isn't found, try to pair another vecload. */ | |
34304 | pos = lastpos; | |
34305 | while (pos >= 0) | |
34306 | { | |
34307 | if (recog_memoized (ready[pos]) >= 0) | |
34308 | { | |
34309 | type2 = get_attr_type (ready[pos]); | |
34310 | if (is_power9_pairable_vec_type (type2)) | |
34311 | { | |
34312 | /* Found a vector insn to pair with, move it to the | |
34313 | end of the ready list so it is scheduled next. */ | |
34314 | tmp = ready[pos]; | |
34315 | for (i = pos; i < lastpos; i++) | |
34316 | ready[i] = ready[i + 1]; | |
34317 | ready[lastpos] = tmp; | |
34318 | vec_pairing = 1; | |
34319 | return cached_can_issue_more; | |
34320 | } | |
34321 | else if (type2 == TYPE_VECLOAD && vecload_pos == -1) | |
34322 | /* Remember position of first vecload seen. */ | |
34323 | vecload_pos = pos; | |
34324 | } | |
34325 | pos--; | |
34326 | } | |
34327 | if (vecload_pos >= 0) | |
34328 | { | |
34329 | /* Didn't find a vector to pair with but did find a vecload, | |
34330 | move it to the end of the ready list. */ | |
34331 | tmp = ready[vecload_pos]; | |
34332 | for (i = vecload_pos; i < lastpos; i++) | |
34333 | ready[i] = ready[i + 1]; | |
34334 | ready[lastpos] = tmp; | |
34335 | vec_pairing = 1; | |
34336 | return cached_can_issue_more; | |
34337 | } | |
34338 | } | |
34339 | } | |
34340 | else if (is_power9_pairable_vec_type (type)) | |
34341 | { | |
34342 | /* Issued a vector operation. */ | |
34343 | if (vec_pairing == 0) | |
34344 | { | |
34345 | int vec_pos = -1; | |
34346 | /* We issued a single vector insn, look for a vecload to pair it | |
34347 | with. If one isn't found, try to pair another vector. */ | |
34348 | pos = lastpos; | |
34349 | while (pos >= 0) | |
34350 | { | |
34351 | if (recog_memoized (ready[pos]) >= 0) | |
34352 | { | |
34353 | type2 = get_attr_type (ready[pos]); | |
34354 | if (type2 == TYPE_VECLOAD) | |
34355 | { | |
34356 | /* Found a vecload insn to pair with, move it to the | |
34357 | end of the ready list so it is scheduled next. */ | |
34358 | tmp = ready[pos]; | |
34359 | for (i = pos; i < lastpos; i++) | |
34360 | ready[i] = ready[i + 1]; | |
34361 | ready[lastpos] = tmp; | |
34362 | vec_pairing = 1; | |
34363 | return cached_can_issue_more; | |
34364 | } | |
34365 | else if (is_power9_pairable_vec_type (type2) | |
34366 | && vec_pos == -1) | |
34367 | /* Remember position of first vector insn seen. */ | |
34368 | vec_pos = pos; | |
34369 | } | |
34370 | pos--; | |
34371 | } | |
34372 | if (vec_pos >= 0) | |
34373 | { | |
34374 | /* Didn't find a vecload to pair with but did find a vector | |
34375 | insn, move it to the end of the ready list. */ | |
34376 | tmp = ready[vec_pos]; | |
34377 | for (i = vec_pos; i < lastpos; i++) | |
34378 | ready[i] = ready[i + 1]; | |
34379 | ready[lastpos] = tmp; | |
34380 | vec_pairing = 1; | |
34381 | return cached_can_issue_more; | |
34382 | } | |
34383 | } | |
34384 | } | |
34385 | ||
34386 | /* We've either finished a vec/vecload pair, couldn't find an insn to | |
34387 | continue the current pair, or the last insn had nothing to do with | |
34388 | with pairing. In any case, reset the state. */ | |
34389 | vec_pairing = 0; | |
34390 | } | |
34391 | ||
34392 | return cached_can_issue_more; | |
34393 | } | |
34394 | ||
34395 | /* We are about to begin issuing insns for this clock cycle. */ | |
34396 | ||
34397 | static int | |
34398 | rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose, | |
34399 | rtx_insn **ready ATTRIBUTE_UNUSED, | |
34400 | int *pn_ready ATTRIBUTE_UNUSED, | |
34401 | int clock_var ATTRIBUTE_UNUSED) | |
34402 | { | |
34403 | int n_ready = *pn_ready; | |
34404 | ||
34405 | if (sched_verbose) | |
34406 | fprintf (dump, "// rs6000_sched_reorder :\n"); | |
34407 | ||
34408 | /* Reorder the ready list, if the second to last ready insn | |
34409 | is a nonepipeline insn. */ | |
34410 | if (rs6000_cpu_attr == CPU_CELL && n_ready > 1) | |
34411 | { | |
34412 | if (is_nonpipeline_insn (ready[n_ready - 1]) | |
34413 | && (recog_memoized (ready[n_ready - 2]) > 0)) | |
34414 | /* Simply swap first two insns. */ | |
34415 | std::swap (ready[n_ready - 1], ready[n_ready - 2]); | |
34416 | } | |
34417 | ||
34418 | if (rs6000_cpu == PROCESSOR_POWER6) | |
34419 | load_store_pendulum = 0; | |
34420 | ||
34421 | return rs6000_issue_rate (); | |
34422 | } | |
34423 | ||
34424 | /* Like rs6000_sched_reorder, but called after issuing each insn. */ | |
34425 | ||
34426 | static int | |
34427 | rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready, | |
34428 | int *pn_ready, int clock_var ATTRIBUTE_UNUSED) | |
34429 | { | |
34430 | if (sched_verbose) | |
34431 | fprintf (dump, "// rs6000_sched_reorder2 :\n"); | |
34432 | ||
34433 | /* For Power6, we need to handle some special cases to try and keep the | |
34434 | store queue from overflowing and triggering expensive flushes. | |
34435 | ||
34436 | This code monitors how load and store instructions are being issued | |
34437 | and skews the ready list one way or the other to increase the likelihood | |
34438 | that a desired instruction is issued at the proper time. | |
34439 | ||
34440 | A couple of things are done. First, we maintain a "load_store_pendulum" | |
34441 | to track the current state of load/store issue. | |
34442 | ||
34443 | - If the pendulum is at zero, then no loads or stores have been | |
34444 | issued in the current cycle so we do nothing. | |
34445 | ||
34446 | - If the pendulum is 1, then a single load has been issued in this | |
34447 | cycle and we attempt to locate another load in the ready list to | |
34448 | issue with it. | |
34449 | ||
34450 | - If the pendulum is -2, then two stores have already been | |
34451 | issued in this cycle, so we increase the priority of the first load | |
34452 | in the ready list to increase it's likelihood of being chosen first | |
34453 | in the next cycle. | |
34454 | ||
34455 | - If the pendulum is -1, then a single store has been issued in this | |
34456 | cycle and we attempt to locate another store in the ready list to | |
34457 | issue with it, preferring a store to an adjacent memory location to | |
34458 | facilitate store pairing in the store queue. | |
34459 | ||
34460 | - If the pendulum is 2, then two loads have already been | |
34461 | issued in this cycle, so we increase the priority of the first store | |
34462 | in the ready list to increase it's likelihood of being chosen first | |
34463 | in the next cycle. | |
34464 | ||
34465 | - If the pendulum < -2 or > 2, then do nothing. | |
34466 | ||
34467 | Note: This code covers the most common scenarios. There exist non | |
34468 | load/store instructions which make use of the LSU and which | |
34469 | would need to be accounted for to strictly model the behavior | |
34470 | of the machine. Those instructions are currently unaccounted | |
34471 | for to help minimize compile time overhead of this code. | |
34472 | */ | |
34473 | if (rs6000_cpu == PROCESSOR_POWER6 && last_scheduled_insn) | |
34474 | { | |
34475 | int pos; | |
34476 | int i; | |
34477 | rtx_insn *tmp; | |
34478 | rtx load_mem, str_mem; | |
34479 | ||
34480 | if (is_store_insn (last_scheduled_insn, &str_mem)) | |
34481 | /* Issuing a store, swing the load_store_pendulum to the left */ | |
34482 | load_store_pendulum--; | |
34483 | else if (is_load_insn (last_scheduled_insn, &load_mem)) | |
34484 | /* Issuing a load, swing the load_store_pendulum to the right */ | |
34485 | load_store_pendulum++; | |
34486 | else | |
34487 | return cached_can_issue_more; | |
34488 | ||
34489 | /* If the pendulum is balanced, or there is only one instruction on | |
34490 | the ready list, then all is well, so return. */ | |
34491 | if ((load_store_pendulum == 0) || (*pn_ready <= 1)) | |
34492 | return cached_can_issue_more; | |
34493 | ||
34494 | if (load_store_pendulum == 1) | |
34495 | { | |
34496 | /* A load has been issued in this cycle. Scan the ready list | |
34497 | for another load to issue with it */ | |
34498 | pos = *pn_ready-1; | |
34499 | ||
34500 | while (pos >= 0) | |
34501 | { | |
34502 | if (is_load_insn (ready[pos], &load_mem)) | |
34503 | { | |
34504 | /* Found a load. Move it to the head of the ready list, | |
34505 | and adjust it's priority so that it is more likely to | |
34506 | stay there */ | |
34507 | tmp = ready[pos]; | |
34508 | for (i=pos; i<*pn_ready-1; i++) | |
34509 | ready[i] = ready[i + 1]; | |
34510 | ready[*pn_ready-1] = tmp; | |
34511 | ||
34512 | if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp)) | |
34513 | INSN_PRIORITY (tmp)++; | |
34514 | break; | |
34515 | } | |
34516 | pos--; | |
34517 | } | |
34518 | } | |
34519 | else if (load_store_pendulum == -2) | |
34520 | { | |
34521 | /* Two stores have been issued in this cycle. Increase the | |
34522 | priority of the first load in the ready list to favor it for | |
34523 | issuing in the next cycle. */ | |
34524 | pos = *pn_ready-1; | |
34525 | ||
34526 | while (pos >= 0) | |
34527 | { | |
34528 | if (is_load_insn (ready[pos], &load_mem) | |
34529 | && !sel_sched_p () | |
34530 | && INSN_PRIORITY_KNOWN (ready[pos])) | |
34531 | { | |
34532 | INSN_PRIORITY (ready[pos])++; | |
34533 | ||
34534 | /* Adjust the pendulum to account for the fact that a load | |
34535 | was found and increased in priority. This is to prevent | |
34536 | increasing the priority of multiple loads */ | |
34537 | load_store_pendulum--; | |
34538 | ||
34539 | break; | |
34540 | } | |
34541 | pos--; | |
34542 | } | |
34543 | } | |
34544 | else if (load_store_pendulum == -1) | |
34545 | { | |
34546 | /* A store has been issued in this cycle. Scan the ready list for | |
34547 | another store to issue with it, preferring a store to an adjacent | |
34548 | memory location */ | |
34549 | int first_store_pos = -1; | |
34550 | ||
34551 | pos = *pn_ready-1; | |
34552 | ||
34553 | while (pos >= 0) | |
34554 | { | |
34555 | if (is_store_insn (ready[pos], &str_mem)) | |
34556 | { | |
34557 | rtx str_mem2; | |
34558 | /* Maintain the index of the first store found on the | |
34559 | list */ | |
34560 | if (first_store_pos == -1) | |
34561 | first_store_pos = pos; | |
34562 | ||
34563 | if (is_store_insn (last_scheduled_insn, &str_mem2) | |
34564 | && adjacent_mem_locations (str_mem, str_mem2)) | |
34565 | { | |
34566 | /* Found an adjacent store. Move it to the head of the | |
34567 | ready list, and adjust it's priority so that it is | |
34568 | more likely to stay there */ | |
34569 | tmp = ready[pos]; | |
34570 | for (i=pos; i<*pn_ready-1; i++) | |
34571 | ready[i] = ready[i + 1]; | |
34572 | ready[*pn_ready-1] = tmp; | |
34573 | ||
34574 | if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp)) | |
34575 | INSN_PRIORITY (tmp)++; | |
34576 | ||
34577 | first_store_pos = -1; | |
34578 | ||
34579 | break; | |
34580 | }; | |
34581 | } | |
34582 | pos--; | |
34583 | } | |
34584 | ||
34585 | if (first_store_pos >= 0) | |
34586 | { | |
34587 | /* An adjacent store wasn't found, but a non-adjacent store was, | |
34588 | so move the non-adjacent store to the front of the ready | |
34589 | list, and adjust its priority so that it is more likely to | |
34590 | stay there. */ | |
34591 | tmp = ready[first_store_pos]; | |
34592 | for (i=first_store_pos; i<*pn_ready-1; i++) | |
34593 | ready[i] = ready[i + 1]; | |
34594 | ready[*pn_ready-1] = tmp; | |
34595 | if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp)) | |
34596 | INSN_PRIORITY (tmp)++; | |
34597 | } | |
34598 | } | |
34599 | else if (load_store_pendulum == 2) | |
34600 | { | |
34601 | /* Two loads have been issued in this cycle. Increase the priority | |
34602 | of the first store in the ready list to favor it for issuing in | |
34603 | the next cycle. */ | |
34604 | pos = *pn_ready-1; | |
34605 | ||
34606 | while (pos >= 0) | |
34607 | { | |
34608 | if (is_store_insn (ready[pos], &str_mem) | |
34609 | && !sel_sched_p () | |
34610 | && INSN_PRIORITY_KNOWN (ready[pos])) | |
34611 | { | |
34612 | INSN_PRIORITY (ready[pos])++; | |
34613 | ||
34614 | /* Adjust the pendulum to account for the fact that a store | |
34615 | was found and increased in priority. This is to prevent | |
34616 | increasing the priority of multiple stores */ | |
34617 | load_store_pendulum++; | |
34618 | ||
34619 | break; | |
34620 | } | |
34621 | pos--; | |
34622 | } | |
34623 | } | |
34624 | } | |
34625 | ||
34626 | /* Do Power9 dependent reordering if necessary. */ | |
34627 | if (rs6000_cpu == PROCESSOR_POWER9 && last_scheduled_insn | |
34628 | && recog_memoized (last_scheduled_insn) >= 0) | |
34629 | return power9_sched_reorder2 (ready, *pn_ready - 1); | |
34630 | ||
34631 | return cached_can_issue_more; | |
34632 | } | |
34633 | ||
34634 | /* Return whether the presence of INSN causes a dispatch group termination | |
34635 | of group WHICH_GROUP. | |
34636 | ||
34637 | If WHICH_GROUP == current_group, this function will return true if INSN | |
34638 | causes the termination of the current group (i.e, the dispatch group to | |
34639 | which INSN belongs). This means that INSN will be the last insn in the | |
34640 | group it belongs to. | |
34641 | ||
34642 | If WHICH_GROUP == previous_group, this function will return true if INSN | |
34643 | causes the termination of the previous group (i.e, the dispatch group that | |
34644 | precedes the group to which INSN belongs). This means that INSN will be | |
34645 | the first insn in the group it belongs to). */ | |
34646 | ||
34647 | static bool | |
34648 | insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group) | |
34649 | { | |
34650 | bool first, last; | |
34651 | ||
34652 | if (! insn) | |
34653 | return false; | |
34654 | ||
34655 | first = insn_must_be_first_in_group (insn); | |
34656 | last = insn_must_be_last_in_group (insn); | |
34657 | ||
34658 | if (first && last) | |
34659 | return true; | |
34660 | ||
34661 | if (which_group == current_group) | |
34662 | return last; | |
34663 | else if (which_group == previous_group) | |
34664 | return first; | |
34665 | ||
34666 | return false; | |
34667 | } | |
34668 | ||
34669 | ||
34670 | static bool | |
34671 | insn_must_be_first_in_group (rtx_insn *insn) | |
34672 | { | |
34673 | enum attr_type type; | |
34674 | ||
34675 | if (!insn | |
34676 | || NOTE_P (insn) | |
34677 | || DEBUG_INSN_P (insn) | |
34678 | || GET_CODE (PATTERN (insn)) == USE | |
34679 | || GET_CODE (PATTERN (insn)) == CLOBBER) | |
34680 | return false; | |
34681 | ||
34682 | switch (rs6000_cpu) | |
34683 | { | |
34684 | case PROCESSOR_POWER5: | |
34685 | if (is_cracked_insn (insn)) | |
34686 | return true; | |
34687 | /* FALLTHRU */ | |
34688 | case PROCESSOR_POWER4: | |
34689 | if (is_microcoded_insn (insn)) | |
34690 | return true; | |
34691 | ||
34692 | if (!rs6000_sched_groups) | |
34693 | return false; | |
34694 | ||
34695 | type = get_attr_type (insn); | |
34696 | ||
34697 | switch (type) | |
34698 | { | |
34699 | case TYPE_MFCR: | |
34700 | case TYPE_MFCRF: | |
34701 | case TYPE_MTCR: | |
34702 | case TYPE_DELAYED_CR: | |
34703 | case TYPE_CR_LOGICAL: | |
34704 | case TYPE_MTJMPR: | |
34705 | case TYPE_MFJMPR: | |
34706 | case TYPE_DIV: | |
34707 | case TYPE_LOAD_L: | |
34708 | case TYPE_STORE_C: | |
34709 | case TYPE_ISYNC: | |
34710 | case TYPE_SYNC: | |
34711 | return true; | |
34712 | default: | |
34713 | break; | |
34714 | } | |
34715 | break; | |
34716 | case PROCESSOR_POWER6: | |
34717 | type = get_attr_type (insn); | |
34718 | ||
34719 | switch (type) | |
34720 | { | |
34721 | case TYPE_EXTS: | |
34722 | case TYPE_CNTLZ: | |
34723 | case TYPE_TRAP: | |
34724 | case TYPE_MUL: | |
34725 | case TYPE_INSERT: | |
34726 | case TYPE_FPCOMPARE: | |
34727 | case TYPE_MFCR: | |
34728 | case TYPE_MTCR: | |
34729 | case TYPE_MFJMPR: | |
34730 | case TYPE_MTJMPR: | |
34731 | case TYPE_ISYNC: | |
34732 | case TYPE_SYNC: | |
34733 | case TYPE_LOAD_L: | |
34734 | case TYPE_STORE_C: | |
34735 | return true; | |
34736 | case TYPE_SHIFT: | |
34737 | if (get_attr_dot (insn) == DOT_NO | |
34738 | || get_attr_var_shift (insn) == VAR_SHIFT_NO) | |
34739 | return true; | |
34740 | else | |
34741 | break; | |
34742 | case TYPE_DIV: | |
34743 | if (get_attr_size (insn) == SIZE_32) | |
34744 | return true; | |
34745 | else | |
34746 | break; | |
34747 | case TYPE_LOAD: | |
34748 | case TYPE_STORE: | |
34749 | case TYPE_FPLOAD: | |
34750 | case TYPE_FPSTORE: | |
34751 | if (get_attr_update (insn) == UPDATE_YES) | |
34752 | return true; | |
34753 | else | |
34754 | break; | |
34755 | default: | |
34756 | break; | |
34757 | } | |
34758 | break; | |
34759 | case PROCESSOR_POWER7: | |
34760 | type = get_attr_type (insn); | |
34761 | ||
34762 | switch (type) | |
34763 | { | |
34764 | case TYPE_CR_LOGICAL: | |
34765 | case TYPE_MFCR: | |
34766 | case TYPE_MFCRF: | |
34767 | case TYPE_MTCR: | |
34768 | case TYPE_DIV: | |
34769 | case TYPE_ISYNC: | |
34770 | case TYPE_LOAD_L: | |
34771 | case TYPE_STORE_C: | |
34772 | case TYPE_MFJMPR: | |
34773 | case TYPE_MTJMPR: | |
34774 | return true; | |
34775 | case TYPE_MUL: | |
34776 | case TYPE_SHIFT: | |
34777 | case TYPE_EXTS: | |
34778 | if (get_attr_dot (insn) == DOT_YES) | |
34779 | return true; | |
34780 | else | |
34781 | break; | |
34782 | case TYPE_LOAD: | |
34783 | if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES | |
34784 | || get_attr_update (insn) == UPDATE_YES) | |
34785 | return true; | |
34786 | else | |
34787 | break; | |
34788 | case TYPE_STORE: | |
34789 | case TYPE_FPLOAD: | |
34790 | case TYPE_FPSTORE: | |
34791 | if (get_attr_update (insn) == UPDATE_YES) | |
34792 | return true; | |
34793 | else | |
34794 | break; | |
34795 | default: | |
34796 | break; | |
34797 | } | |
34798 | break; | |
34799 | case PROCESSOR_POWER8: | |
34800 | type = get_attr_type (insn); | |
34801 | ||
34802 | switch (type) | |
34803 | { | |
34804 | case TYPE_CR_LOGICAL: | |
34805 | case TYPE_DELAYED_CR: | |
34806 | case TYPE_MFCR: | |
34807 | case TYPE_MFCRF: | |
34808 | case TYPE_MTCR: | |
34809 | case TYPE_SYNC: | |
34810 | case TYPE_ISYNC: | |
34811 | case TYPE_LOAD_L: | |
34812 | case TYPE_STORE_C: | |
34813 | case TYPE_VECSTORE: | |
34814 | case TYPE_MFJMPR: | |
34815 | case TYPE_MTJMPR: | |
34816 | return true; | |
34817 | case TYPE_SHIFT: | |
34818 | case TYPE_EXTS: | |
34819 | case TYPE_MUL: | |
34820 | if (get_attr_dot (insn) == DOT_YES) | |
34821 | return true; | |
34822 | else | |
34823 | break; | |
34824 | case TYPE_LOAD: | |
34825 | if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES | |
34826 | || get_attr_update (insn) == UPDATE_YES) | |
34827 | return true; | |
34828 | else | |
34829 | break; | |
34830 | case TYPE_STORE: | |
34831 | if (get_attr_update (insn) == UPDATE_YES | |
34832 | && get_attr_indexed (insn) == INDEXED_YES) | |
34833 | return true; | |
34834 | else | |
34835 | break; | |
34836 | default: | |
34837 | break; | |
34838 | } | |
34839 | break; | |
34840 | default: | |
34841 | break; | |
34842 | } | |
34843 | ||
34844 | return false; | |
34845 | } | |
34846 | ||
34847 | static bool | |
34848 | insn_must_be_last_in_group (rtx_insn *insn) | |
34849 | { | |
34850 | enum attr_type type; | |
34851 | ||
34852 | if (!insn | |
34853 | || NOTE_P (insn) | |
34854 | || DEBUG_INSN_P (insn) | |
34855 | || GET_CODE (PATTERN (insn)) == USE | |
34856 | || GET_CODE (PATTERN (insn)) == CLOBBER) | |
34857 | return false; | |
34858 | ||
34859 | switch (rs6000_cpu) { | |
34860 | case PROCESSOR_POWER4: | |
34861 | case PROCESSOR_POWER5: | |
34862 | if (is_microcoded_insn (insn)) | |
34863 | return true; | |
34864 | ||
34865 | if (is_branch_slot_insn (insn)) | |
34866 | return true; | |
34867 | ||
34868 | break; | |
34869 | case PROCESSOR_POWER6: | |
34870 | type = get_attr_type (insn); | |
34871 | ||
34872 | switch (type) | |
34873 | { | |
34874 | case TYPE_EXTS: | |
34875 | case TYPE_CNTLZ: | |
34876 | case TYPE_TRAP: | |
34877 | case TYPE_MUL: | |
34878 | case TYPE_FPCOMPARE: | |
34879 | case TYPE_MFCR: | |
34880 | case TYPE_MTCR: | |
34881 | case TYPE_MFJMPR: | |
34882 | case TYPE_MTJMPR: | |
34883 | case TYPE_ISYNC: | |
34884 | case TYPE_SYNC: | |
34885 | case TYPE_LOAD_L: | |
34886 | case TYPE_STORE_C: | |
34887 | return true; | |
34888 | case TYPE_SHIFT: | |
34889 | if (get_attr_dot (insn) == DOT_NO | |
34890 | || get_attr_var_shift (insn) == VAR_SHIFT_NO) | |
34891 | return true; | |
34892 | else | |
34893 | break; | |
34894 | case TYPE_DIV: | |
34895 | if (get_attr_size (insn) == SIZE_32) | |
34896 | return true; | |
34897 | else | |
34898 | break; | |
34899 | default: | |
34900 | break; | |
34901 | } | |
34902 | break; | |
34903 | case PROCESSOR_POWER7: | |
34904 | type = get_attr_type (insn); | |
34905 | ||
34906 | switch (type) | |
34907 | { | |
34908 | case TYPE_ISYNC: | |
34909 | case TYPE_SYNC: | |
34910 | case TYPE_LOAD_L: | |
34911 | case TYPE_STORE_C: | |
34912 | return true; | |
34913 | case TYPE_LOAD: | |
34914 | if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES | |
34915 | && get_attr_update (insn) == UPDATE_YES) | |
34916 | return true; | |
34917 | else | |
34918 | break; | |
34919 | case TYPE_STORE: | |
34920 | if (get_attr_update (insn) == UPDATE_YES | |
34921 | && get_attr_indexed (insn) == INDEXED_YES) | |
34922 | return true; | |
34923 | else | |
34924 | break; | |
34925 | default: | |
34926 | break; | |
34927 | } | |
34928 | break; | |
34929 | case PROCESSOR_POWER8: | |
34930 | type = get_attr_type (insn); | |
34931 | ||
34932 | switch (type) | |
34933 | { | |
34934 | case TYPE_MFCR: | |
34935 | case TYPE_MTCR: | |
34936 | case TYPE_ISYNC: | |
34937 | case TYPE_SYNC: | |
34938 | case TYPE_LOAD_L: | |
34939 | case TYPE_STORE_C: | |
34940 | return true; | |
34941 | case TYPE_LOAD: | |
34942 | if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES | |
34943 | && get_attr_update (insn) == UPDATE_YES) | |
34944 | return true; | |
34945 | else | |
34946 | break; | |
34947 | case TYPE_STORE: | |
34948 | if (get_attr_update (insn) == UPDATE_YES | |
34949 | && get_attr_indexed (insn) == INDEXED_YES) | |
34950 | return true; | |
34951 | else | |
34952 | break; | |
34953 | default: | |
34954 | break; | |
34955 | } | |
34956 | break; | |
34957 | default: | |
34958 | break; | |
34959 | } | |
34960 | ||
34961 | return false; | |
34962 | } | |
34963 | ||
34964 | /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate | |
34965 | dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */ | |
34966 | ||
34967 | static bool | |
34968 | is_costly_group (rtx *group_insns, rtx next_insn) | |
34969 | { | |
34970 | int i; | |
34971 | int issue_rate = rs6000_issue_rate (); | |
34972 | ||
34973 | for (i = 0; i < issue_rate; i++) | |
34974 | { | |
34975 | sd_iterator_def sd_it; | |
34976 | dep_t dep; | |
34977 | rtx insn = group_insns[i]; | |
34978 | ||
34979 | if (!insn) | |
34980 | continue; | |
34981 | ||
34982 | FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep) | |
34983 | { | |
34984 | rtx next = DEP_CON (dep); | |
34985 | ||
34986 | if (next == next_insn | |
34987 | && rs6000_is_costly_dependence (dep, dep_cost (dep), 0)) | |
34988 | return true; | |
34989 | } | |
34990 | } | |
34991 | ||
34992 | return false; | |
34993 | } | |
34994 | ||
34995 | /* Utility of the function redefine_groups. | |
34996 | Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS | |
34997 | in the same dispatch group. If so, insert nops before NEXT_INSN, in order | |
34998 | to keep it "far" (in a separate group) from GROUP_INSNS, following | |
34999 | one of the following schemes, depending on the value of the flag | |
35000 | -minsert_sched_nops = X: | |
35001 | (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed | |
35002 | in order to force NEXT_INSN into a separate group. | |
35003 | (2) X < sched_finish_regroup_exact: insert exactly X nops. | |
35004 | GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop | |
35005 | insertion (has a group just ended, how many vacant issue slots remain in the | |
35006 | last group, and how many dispatch groups were encountered so far). */ | |
35007 | ||
35008 | static int | |
35009 | force_new_group (int sched_verbose, FILE *dump, rtx *group_insns, | |
35010 | rtx_insn *next_insn, bool *group_end, int can_issue_more, | |
35011 | int *group_count) | |
35012 | { | |
35013 | rtx nop; | |
35014 | bool force; | |
35015 | int issue_rate = rs6000_issue_rate (); | |
35016 | bool end = *group_end; | |
35017 | int i; | |
35018 | ||
35019 | if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn)) | |
35020 | return can_issue_more; | |
35021 | ||
35022 | if (rs6000_sched_insert_nops > sched_finish_regroup_exact) | |
35023 | return can_issue_more; | |
35024 | ||
35025 | force = is_costly_group (group_insns, next_insn); | |
35026 | if (!force) | |
35027 | return can_issue_more; | |
35028 | ||
35029 | if (sched_verbose > 6) | |
35030 | fprintf (dump,"force: group count = %d, can_issue_more = %d\n", | |
35031 | *group_count ,can_issue_more); | |
35032 | ||
35033 | if (rs6000_sched_insert_nops == sched_finish_regroup_exact) | |
35034 | { | |
35035 | if (*group_end) | |
35036 | can_issue_more = 0; | |
35037 | ||
35038 | /* Since only a branch can be issued in the last issue_slot, it is | |
35039 | sufficient to insert 'can_issue_more - 1' nops if next_insn is not | |
35040 | a branch. If next_insn is a branch, we insert 'can_issue_more' nops; | |
35041 | in this case the last nop will start a new group and the branch | |
35042 | will be forced to the new group. */ | |
35043 | if (can_issue_more && !is_branch_slot_insn (next_insn)) | |
35044 | can_issue_more--; | |
35045 | ||
35046 | /* Do we have a special group ending nop? */ | |
35047 | if (rs6000_cpu_attr == CPU_POWER6 || rs6000_cpu_attr == CPU_POWER7 | |
35048 | || rs6000_cpu_attr == CPU_POWER8) | |
35049 | { | |
35050 | nop = gen_group_ending_nop (); | |
35051 | emit_insn_before (nop, next_insn); | |
35052 | can_issue_more = 0; | |
35053 | } | |
35054 | else | |
35055 | while (can_issue_more > 0) | |
35056 | { | |
35057 | nop = gen_nop (); | |
35058 | emit_insn_before (nop, next_insn); | |
35059 | can_issue_more--; | |
35060 | } | |
35061 | ||
35062 | *group_end = true; | |
35063 | return 0; | |
35064 | } | |
35065 | ||
35066 | if (rs6000_sched_insert_nops < sched_finish_regroup_exact) | |
35067 | { | |
35068 | int n_nops = rs6000_sched_insert_nops; | |
35069 | ||
35070 | /* Nops can't be issued from the branch slot, so the effective | |
35071 | issue_rate for nops is 'issue_rate - 1'. */ | |
35072 | if (can_issue_more == 0) | |
35073 | can_issue_more = issue_rate; | |
35074 | can_issue_more--; | |
35075 | if (can_issue_more == 0) | |
35076 | { | |
35077 | can_issue_more = issue_rate - 1; | |
35078 | (*group_count)++; | |
35079 | end = true; | |
35080 | for (i = 0; i < issue_rate; i++) | |
35081 | { | |
35082 | group_insns[i] = 0; | |
35083 | } | |
35084 | } | |
35085 | ||
35086 | while (n_nops > 0) | |
35087 | { | |
35088 | nop = gen_nop (); | |
35089 | emit_insn_before (nop, next_insn); | |
35090 | if (can_issue_more == issue_rate - 1) /* new group begins */ | |
35091 | end = false; | |
35092 | can_issue_more--; | |
35093 | if (can_issue_more == 0) | |
35094 | { | |
35095 | can_issue_more = issue_rate - 1; | |
35096 | (*group_count)++; | |
35097 | end = true; | |
35098 | for (i = 0; i < issue_rate; i++) | |
35099 | { | |
35100 | group_insns[i] = 0; | |
35101 | } | |
35102 | } | |
35103 | n_nops--; | |
35104 | } | |
35105 | ||
35106 | /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */ | |
35107 | can_issue_more++; | |
35108 | ||
35109 | /* Is next_insn going to start a new group? */ | |
35110 | *group_end | |
35111 | = (end | |
35112 | || (can_issue_more == 1 && !is_branch_slot_insn (next_insn)) | |
35113 | || (can_issue_more <= 2 && is_cracked_insn (next_insn)) | |
35114 | || (can_issue_more < issue_rate && | |
35115 | insn_terminates_group_p (next_insn, previous_group))); | |
35116 | if (*group_end && end) | |
35117 | (*group_count)--; | |
35118 | ||
35119 | if (sched_verbose > 6) | |
35120 | fprintf (dump, "done force: group count = %d, can_issue_more = %d\n", | |
35121 | *group_count, can_issue_more); | |
35122 | return can_issue_more; | |
35123 | } | |
35124 | ||
35125 | return can_issue_more; | |
35126 | } | |
35127 | ||
35128 | /* This function tries to synch the dispatch groups that the compiler "sees" | |
35129 | with the dispatch groups that the processor dispatcher is expected to | |
35130 | form in practice. It tries to achieve this synchronization by forcing the | |
35131 | estimated processor grouping on the compiler (as opposed to the function | |
35132 | 'pad_goups' which tries to force the scheduler's grouping on the processor). | |
35133 | ||
35134 | The function scans the insn sequence between PREV_HEAD_INSN and TAIL and | |
35135 | examines the (estimated) dispatch groups that will be formed by the processor | |
35136 | dispatcher. It marks these group boundaries to reflect the estimated | |
35137 | processor grouping, overriding the grouping that the scheduler had marked. | |
35138 | Depending on the value of the flag '-minsert-sched-nops' this function can | |
35139 | force certain insns into separate groups or force a certain distance between | |
35140 | them by inserting nops, for example, if there exists a "costly dependence" | |
35141 | between the insns. | |
35142 | ||
35143 | The function estimates the group boundaries that the processor will form as | |
35144 | follows: It keeps track of how many vacant issue slots are available after | |
35145 | each insn. A subsequent insn will start a new group if one of the following | |
35146 | 4 cases applies: | |
35147 | - no more vacant issue slots remain in the current dispatch group. | |
35148 | - only the last issue slot, which is the branch slot, is vacant, but the next | |
35149 | insn is not a branch. | |
35150 | - only the last 2 or less issue slots, including the branch slot, are vacant, | |
35151 | which means that a cracked insn (which occupies two issue slots) can't be | |
35152 | issued in this group. | |
35153 | - less than 'issue_rate' slots are vacant, and the next insn always needs to | |
35154 | start a new group. */ | |
35155 | ||
35156 | static int | |
35157 | redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn, | |
35158 | rtx_insn *tail) | |
35159 | { | |
35160 | rtx_insn *insn, *next_insn; | |
35161 | int issue_rate; | |
35162 | int can_issue_more; | |
35163 | int slot, i; | |
35164 | bool group_end; | |
35165 | int group_count = 0; | |
35166 | rtx *group_insns; | |
35167 | ||
35168 | /* Initialize. */ | |
35169 | issue_rate = rs6000_issue_rate (); | |
35170 | group_insns = XALLOCAVEC (rtx, issue_rate); | |
35171 | for (i = 0; i < issue_rate; i++) | |
35172 | { | |
35173 | group_insns[i] = 0; | |
35174 | } | |
35175 | can_issue_more = issue_rate; | |
35176 | slot = 0; | |
35177 | insn = get_next_active_insn (prev_head_insn, tail); | |
35178 | group_end = false; | |
35179 | ||
35180 | while (insn != NULL_RTX) | |
35181 | { | |
35182 | slot = (issue_rate - can_issue_more); | |
35183 | group_insns[slot] = insn; | |
35184 | can_issue_more = | |
35185 | rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more); | |
35186 | if (insn_terminates_group_p (insn, current_group)) | |
35187 | can_issue_more = 0; | |
35188 | ||
35189 | next_insn = get_next_active_insn (insn, tail); | |
35190 | if (next_insn == NULL_RTX) | |
35191 | return group_count + 1; | |
35192 | ||
35193 | /* Is next_insn going to start a new group? */ | |
35194 | group_end | |
35195 | = (can_issue_more == 0 | |
35196 | || (can_issue_more == 1 && !is_branch_slot_insn (next_insn)) | |
35197 | || (can_issue_more <= 2 && is_cracked_insn (next_insn)) | |
35198 | || (can_issue_more < issue_rate && | |
35199 | insn_terminates_group_p (next_insn, previous_group))); | |
35200 | ||
35201 | can_issue_more = force_new_group (sched_verbose, dump, group_insns, | |
35202 | next_insn, &group_end, can_issue_more, | |
35203 | &group_count); | |
35204 | ||
35205 | if (group_end) | |
35206 | { | |
35207 | group_count++; | |
35208 | can_issue_more = 0; | |
35209 | for (i = 0; i < issue_rate; i++) | |
35210 | { | |
35211 | group_insns[i] = 0; | |
35212 | } | |
35213 | } | |
35214 | ||
35215 | if (GET_MODE (next_insn) == TImode && can_issue_more) | |
35216 | PUT_MODE (next_insn, VOIDmode); | |
35217 | else if (!can_issue_more && GET_MODE (next_insn) != TImode) | |
35218 | PUT_MODE (next_insn, TImode); | |
35219 | ||
35220 | insn = next_insn; | |
35221 | if (can_issue_more == 0) | |
35222 | can_issue_more = issue_rate; | |
35223 | } /* while */ | |
35224 | ||
35225 | return group_count; | |
35226 | } | |
35227 | ||
35228 | /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the | |
35229 | dispatch group boundaries that the scheduler had marked. Pad with nops | |
35230 | any dispatch groups which have vacant issue slots, in order to force the | |
35231 | scheduler's grouping on the processor dispatcher. The function | |
35232 | returns the number of dispatch groups found. */ | |
35233 | ||
35234 | static int | |
35235 | pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn, | |
35236 | rtx_insn *tail) | |
35237 | { | |
35238 | rtx_insn *insn, *next_insn; | |
35239 | rtx nop; | |
35240 | int issue_rate; | |
35241 | int can_issue_more; | |
35242 | int group_end; | |
35243 | int group_count = 0; | |
35244 | ||
35245 | /* Initialize issue_rate. */ | |
35246 | issue_rate = rs6000_issue_rate (); | |
35247 | can_issue_more = issue_rate; | |
35248 | ||
35249 | insn = get_next_active_insn (prev_head_insn, tail); | |
35250 | next_insn = get_next_active_insn (insn, tail); | |
35251 | ||
35252 | while (insn != NULL_RTX) | |
35253 | { | |
35254 | can_issue_more = | |
35255 | rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more); | |
35256 | ||
35257 | group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode); | |
35258 | ||
35259 | if (next_insn == NULL_RTX) | |
35260 | break; | |
35261 | ||
35262 | if (group_end) | |
35263 | { | |
35264 | /* If the scheduler had marked group termination at this location | |
35265 | (between insn and next_insn), and neither insn nor next_insn will | |
35266 | force group termination, pad the group with nops to force group | |
35267 | termination. */ | |
35268 | if (can_issue_more | |
35269 | && (rs6000_sched_insert_nops == sched_finish_pad_groups) | |
35270 | && !insn_terminates_group_p (insn, current_group) | |
35271 | && !insn_terminates_group_p (next_insn, previous_group)) | |
35272 | { | |
35273 | if (!is_branch_slot_insn (next_insn)) | |
35274 | can_issue_more--; | |
35275 | ||
35276 | while (can_issue_more) | |
35277 | { | |
35278 | nop = gen_nop (); | |
35279 | emit_insn_before (nop, next_insn); | |
35280 | can_issue_more--; | |
35281 | } | |
35282 | } | |
35283 | ||
35284 | can_issue_more = issue_rate; | |
35285 | group_count++; | |
35286 | } | |
35287 | ||
35288 | insn = next_insn; | |
35289 | next_insn = get_next_active_insn (insn, tail); | |
35290 | } | |
35291 | ||
35292 | return group_count; | |
35293 | } | |
35294 | ||
35295 | /* We're beginning a new block. Initialize data structures as necessary. */ | |
35296 | ||
35297 | static void | |
35298 | rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED, | |
35299 | int sched_verbose ATTRIBUTE_UNUSED, | |
35300 | int max_ready ATTRIBUTE_UNUSED) | |
35301 | { | |
35302 | last_scheduled_insn = NULL; | |
35303 | load_store_pendulum = 0; | |
35304 | divide_cnt = 0; | |
35305 | vec_pairing = 0; | |
35306 | } | |
35307 | ||
35308 | /* The following function is called at the end of scheduling BB. | |
35309 | After reload, it inserts nops at insn group bundling. */ | |
35310 | ||
35311 | static void | |
35312 | rs6000_sched_finish (FILE *dump, int sched_verbose) | |
35313 | { | |
35314 | int n_groups; | |
35315 | ||
35316 | if (sched_verbose) | |
35317 | fprintf (dump, "=== Finishing schedule.\n"); | |
35318 | ||
35319 | if (reload_completed && rs6000_sched_groups) | |
35320 | { | |
35321 | /* Do not run sched_finish hook when selective scheduling enabled. */ | |
35322 | if (sel_sched_p ()) | |
35323 | return; | |
35324 | ||
35325 | if (rs6000_sched_insert_nops == sched_finish_none) | |
35326 | return; | |
35327 | ||
35328 | if (rs6000_sched_insert_nops == sched_finish_pad_groups) | |
35329 | n_groups = pad_groups (dump, sched_verbose, | |
35330 | current_sched_info->prev_head, | |
35331 | current_sched_info->next_tail); | |
35332 | else | |
35333 | n_groups = redefine_groups (dump, sched_verbose, | |
35334 | current_sched_info->prev_head, | |
35335 | current_sched_info->next_tail); | |
35336 | ||
35337 | if (sched_verbose >= 6) | |
35338 | { | |
35339 | fprintf (dump, "ngroups = %d\n", n_groups); | |
35340 | print_rtl (dump, current_sched_info->prev_head); | |
35341 | fprintf (dump, "Done finish_sched\n"); | |
35342 | } | |
35343 | } | |
35344 | } | |
35345 | ||
35346 | struct rs6000_sched_context | |
35347 | { | |
35348 | short cached_can_issue_more; | |
35349 | rtx_insn *last_scheduled_insn; | |
35350 | int load_store_pendulum; | |
35351 | int divide_cnt; | |
35352 | int vec_pairing; | |
35353 | }; | |
35354 | ||
35355 | typedef struct rs6000_sched_context rs6000_sched_context_def; | |
35356 | typedef rs6000_sched_context_def *rs6000_sched_context_t; | |
35357 | ||
35358 | /* Allocate store for new scheduling context. */ | |
35359 | static void * | |
35360 | rs6000_alloc_sched_context (void) | |
35361 | { | |
35362 | return xmalloc (sizeof (rs6000_sched_context_def)); | |
35363 | } | |
35364 | ||
35365 | /* If CLEAN_P is true then initializes _SC with clean data, | |
35366 | and from the global context otherwise. */ | |
35367 | static void | |
35368 | rs6000_init_sched_context (void *_sc, bool clean_p) | |
35369 | { | |
35370 | rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc; | |
35371 | ||
35372 | if (clean_p) | |
35373 | { | |
35374 | sc->cached_can_issue_more = 0; | |
35375 | sc->last_scheduled_insn = NULL; | |
35376 | sc->load_store_pendulum = 0; | |
35377 | sc->divide_cnt = 0; | |
35378 | sc->vec_pairing = 0; | |
35379 | } | |
35380 | else | |
35381 | { | |
35382 | sc->cached_can_issue_more = cached_can_issue_more; | |
35383 | sc->last_scheduled_insn = last_scheduled_insn; | |
35384 | sc->load_store_pendulum = load_store_pendulum; | |
35385 | sc->divide_cnt = divide_cnt; | |
35386 | sc->vec_pairing = vec_pairing; | |
35387 | } | |
35388 | } | |
35389 | ||
35390 | /* Sets the global scheduling context to the one pointed to by _SC. */ | |
35391 | static void | |
35392 | rs6000_set_sched_context (void *_sc) | |
35393 | { | |
35394 | rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc; | |
35395 | ||
35396 | gcc_assert (sc != NULL); | |
35397 | ||
35398 | cached_can_issue_more = sc->cached_can_issue_more; | |
35399 | last_scheduled_insn = sc->last_scheduled_insn; | |
35400 | load_store_pendulum = sc->load_store_pendulum; | |
35401 | divide_cnt = sc->divide_cnt; | |
35402 | vec_pairing = sc->vec_pairing; | |
35403 | } | |
35404 | ||
35405 | /* Free _SC. */ | |
35406 | static void | |
35407 | rs6000_free_sched_context (void *_sc) | |
35408 | { | |
35409 | gcc_assert (_sc != NULL); | |
35410 | ||
35411 | free (_sc); | |
35412 | } | |
35413 | ||
35414 | static bool | |
35415 | rs6000_sched_can_speculate_insn (rtx_insn *insn) | |
35416 | { | |
35417 | switch (get_attr_type (insn)) | |
35418 | { | |
35419 | case TYPE_DIV: | |
35420 | case TYPE_SDIV: | |
35421 | case TYPE_DDIV: | |
35422 | case TYPE_VECDIV: | |
35423 | case TYPE_SSQRT: | |
35424 | case TYPE_DSQRT: | |
35425 | return false; | |
35426 | ||
35427 | default: | |
35428 | return true; | |
35429 | } | |
35430 | } | |
35431 | \f | |
35432 | /* Length in units of the trampoline for entering a nested function. */ | |
35433 | ||
35434 | int | |
35435 | rs6000_trampoline_size (void) | |
35436 | { | |
35437 | int ret = 0; | |
35438 | ||
35439 | switch (DEFAULT_ABI) | |
35440 | { | |
35441 | default: | |
35442 | gcc_unreachable (); | |
35443 | ||
35444 | case ABI_AIX: | |
35445 | ret = (TARGET_32BIT) ? 12 : 24; | |
35446 | break; | |
35447 | ||
35448 | case ABI_ELFv2: | |
35449 | gcc_assert (!TARGET_32BIT); | |
35450 | ret = 32; | |
35451 | break; | |
35452 | ||
35453 | case ABI_DARWIN: | |
35454 | case ABI_V4: | |
35455 | ret = (TARGET_32BIT) ? 40 : 48; | |
35456 | break; | |
35457 | } | |
35458 | ||
35459 | return ret; | |
35460 | } | |
35461 | ||
35462 | /* Emit RTL insns to initialize the variable parts of a trampoline. | |
35463 | FNADDR is an RTX for the address of the function's pure code. | |
35464 | CXT is an RTX for the static chain value for the function. */ | |
35465 | ||
35466 | static void | |
35467 | rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt) | |
35468 | { | |
35469 | int regsize = (TARGET_32BIT) ? 4 : 8; | |
35470 | rtx fnaddr = XEXP (DECL_RTL (fndecl), 0); | |
35471 | rtx ctx_reg = force_reg (Pmode, cxt); | |
35472 | rtx addr = force_reg (Pmode, XEXP (m_tramp, 0)); | |
35473 | ||
35474 | switch (DEFAULT_ABI) | |
35475 | { | |
35476 | default: | |
35477 | gcc_unreachable (); | |
35478 | ||
35479 | /* Under AIX, just build the 3 word function descriptor */ | |
35480 | case ABI_AIX: | |
35481 | { | |
35482 | rtx fnmem, fn_reg, toc_reg; | |
35483 | ||
35484 | if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS) | |
35485 | error ("You cannot take the address of a nested function if you use " | |
35486 | "the -mno-pointers-to-nested-functions option."); | |
35487 | ||
35488 | fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr)); | |
35489 | fn_reg = gen_reg_rtx (Pmode); | |
35490 | toc_reg = gen_reg_rtx (Pmode); | |
35491 | ||
35492 | /* Macro to shorten the code expansions below. */ | |
35493 | # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET) | |
35494 | ||
35495 | m_tramp = replace_equiv_address (m_tramp, addr); | |
35496 | ||
35497 | emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0)); | |
35498 | emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize)); | |
35499 | emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg); | |
35500 | emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg); | |
35501 | emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg); | |
35502 | ||
35503 | # undef MEM_PLUS | |
35504 | } | |
35505 | break; | |
35506 | ||
35507 | /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */ | |
35508 | case ABI_ELFv2: | |
35509 | case ABI_DARWIN: | |
35510 | case ABI_V4: | |
35511 | emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"), | |
db69559b | 35512 | LCT_NORMAL, VOIDmode, |
83349046 SB |
35513 | addr, Pmode, |
35514 | GEN_INT (rs6000_trampoline_size ()), SImode, | |
35515 | fnaddr, Pmode, | |
35516 | ctx_reg, Pmode); | |
35517 | break; | |
35518 | } | |
35519 | } | |
35520 | ||
35521 | \f | |
35522 | /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain | |
35523 | identifier as an argument, so the front end shouldn't look it up. */ | |
35524 | ||
35525 | static bool | |
35526 | rs6000_attribute_takes_identifier_p (const_tree attr_id) | |
35527 | { | |
35528 | return is_attribute_p ("altivec", attr_id); | |
35529 | } | |
35530 | ||
35531 | /* Handle the "altivec" attribute. The attribute may have | |
35532 | arguments as follows: | |
35533 | ||
35534 | __attribute__((altivec(vector__))) | |
35535 | __attribute__((altivec(pixel__))) (always followed by 'unsigned short') | |
35536 | __attribute__((altivec(bool__))) (always followed by 'unsigned') | |
35537 | ||
35538 | and may appear more than once (e.g., 'vector bool char') in a | |
35539 | given declaration. */ | |
35540 | ||
35541 | static tree | |
35542 | rs6000_handle_altivec_attribute (tree *node, | |
35543 | tree name ATTRIBUTE_UNUSED, | |
35544 | tree args, | |
35545 | int flags ATTRIBUTE_UNUSED, | |
35546 | bool *no_add_attrs) | |
35547 | { | |
35548 | tree type = *node, result = NULL_TREE; | |
35549 | machine_mode mode; | |
35550 | int unsigned_p; | |
35551 | char altivec_type | |
35552 | = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args) | |
35553 | && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE) | |
35554 | ? *IDENTIFIER_POINTER (TREE_VALUE (args)) | |
35555 | : '?'); | |
35556 | ||
35557 | while (POINTER_TYPE_P (type) | |
35558 | || TREE_CODE (type) == FUNCTION_TYPE | |
35559 | || TREE_CODE (type) == METHOD_TYPE | |
35560 | || TREE_CODE (type) == ARRAY_TYPE) | |
35561 | type = TREE_TYPE (type); | |
35562 | ||
35563 | mode = TYPE_MODE (type); | |
35564 | ||
35565 | /* Check for invalid AltiVec type qualifiers. */ | |
35566 | if (type == long_double_type_node) | |
35567 | error ("use of %<long double%> in AltiVec types is invalid"); | |
35568 | else if (type == boolean_type_node) | |
35569 | error ("use of boolean types in AltiVec types is invalid"); | |
35570 | else if (TREE_CODE (type) == COMPLEX_TYPE) | |
35571 | error ("use of %<complex%> in AltiVec types is invalid"); | |
35572 | else if (DECIMAL_FLOAT_MODE_P (mode)) | |
35573 | error ("use of decimal floating point types in AltiVec types is invalid"); | |
35574 | else if (!TARGET_VSX) | |
35575 | { | |
35576 | if (type == long_unsigned_type_node || type == long_integer_type_node) | |
35577 | { | |
35578 | if (TARGET_64BIT) | |
35579 | error ("use of %<long%> in AltiVec types is invalid for " | |
35580 | "64-bit code without -mvsx"); | |
35581 | else if (rs6000_warn_altivec_long) | |
35582 | warning (0, "use of %<long%> in AltiVec types is deprecated; " | |
35583 | "use %<int%>"); | |
35584 | } | |
35585 | else if (type == long_long_unsigned_type_node | |
35586 | || type == long_long_integer_type_node) | |
35587 | error ("use of %<long long%> in AltiVec types is invalid without " | |
35588 | "-mvsx"); | |
35589 | else if (type == double_type_node) | |
35590 | error ("use of %<double%> in AltiVec types is invalid without -mvsx"); | |
35591 | } | |
35592 | ||
35593 | switch (altivec_type) | |
35594 | { | |
35595 | case 'v': | |
35596 | unsigned_p = TYPE_UNSIGNED (type); | |
35597 | switch (mode) | |
35598 | { | |
4e10a5a7 | 35599 | case E_TImode: |
83349046 SB |
35600 | result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node); |
35601 | break; | |
4e10a5a7 | 35602 | case E_DImode: |
83349046 SB |
35603 | result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node); |
35604 | break; | |
4e10a5a7 | 35605 | case E_SImode: |
83349046 SB |
35606 | result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node); |
35607 | break; | |
4e10a5a7 | 35608 | case E_HImode: |
83349046 SB |
35609 | result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node); |
35610 | break; | |
4e10a5a7 | 35611 | case E_QImode: |
83349046 SB |
35612 | result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node); |
35613 | break; | |
4e10a5a7 RS |
35614 | case E_SFmode: result = V4SF_type_node; break; |
35615 | case E_DFmode: result = V2DF_type_node; break; | |
83349046 SB |
35616 | /* If the user says 'vector int bool', we may be handed the 'bool' |
35617 | attribute _before_ the 'vector' attribute, and so select the | |
35618 | proper type in the 'b' case below. */ | |
4e10a5a7 RS |
35619 | case E_V4SImode: case E_V8HImode: case E_V16QImode: case E_V4SFmode: |
35620 | case E_V2DImode: case E_V2DFmode: | |
83349046 SB |
35621 | result = type; |
35622 | default: break; | |
35623 | } | |
35624 | break; | |
35625 | case 'b': | |
35626 | switch (mode) | |
35627 | { | |
4e10a5a7 RS |
35628 | case E_DImode: case E_V2DImode: result = bool_V2DI_type_node; break; |
35629 | case E_SImode: case E_V4SImode: result = bool_V4SI_type_node; break; | |
35630 | case E_HImode: case E_V8HImode: result = bool_V8HI_type_node; break; | |
35631 | case E_QImode: case E_V16QImode: result = bool_V16QI_type_node; | |
83349046 SB |
35632 | default: break; |
35633 | } | |
35634 | break; | |
35635 | case 'p': | |
35636 | switch (mode) | |
35637 | { | |
4e10a5a7 | 35638 | case E_V8HImode: result = pixel_V8HI_type_node; |
83349046 SB |
35639 | default: break; |
35640 | } | |
35641 | default: break; | |
35642 | } | |
35643 | ||
35644 | /* Propagate qualifiers attached to the element type | |
35645 | onto the vector type. */ | |
35646 | if (result && result != type && TYPE_QUALS (type)) | |
35647 | result = build_qualified_type (result, TYPE_QUALS (type)); | |
35648 | ||
35649 | *no_add_attrs = true; /* No need to hang on to the attribute. */ | |
35650 | ||
35651 | if (result) | |
35652 | *node = lang_hooks.types.reconstruct_complex_type (*node, result); | |
35653 | ||
35654 | return NULL_TREE; | |
35655 | } | |
35656 | ||
35657 | /* AltiVec defines four built-in scalar types that serve as vector | |
35658 | elements; we must teach the compiler how to mangle them. */ | |
35659 | ||
35660 | static const char * | |
35661 | rs6000_mangle_type (const_tree type) | |
35662 | { | |
35663 | type = TYPE_MAIN_VARIANT (type); | |
35664 | ||
35665 | if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE | |
35666 | && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE) | |
35667 | return NULL; | |
35668 | ||
35669 | if (type == bool_char_type_node) return "U6__boolc"; | |
35670 | if (type == bool_short_type_node) return "U6__bools"; | |
35671 | if (type == pixel_type_node) return "u7__pixel"; | |
35672 | if (type == bool_int_type_node) return "U6__booli"; | |
35673 | if (type == bool_long_type_node) return "U6__booll"; | |
35674 | ||
35675 | /* Use a unique name for __float128 rather than trying to use "e" or "g". Use | |
35676 | "g" for IBM extended double, no matter whether it is long double (using | |
35677 | -mabi=ibmlongdouble) or the distinct __ibm128 type. */ | |
35678 | if (TARGET_FLOAT128_TYPE) | |
35679 | { | |
35680 | if (type == ieee128_float_type_node) | |
35681 | return "U10__float128"; | |
35682 | ||
35683 | if (type == ibm128_float_type_node) | |
35684 | return "g"; | |
35685 | ||
35686 | if (type == long_double_type_node && TARGET_LONG_DOUBLE_128) | |
35687 | return (TARGET_IEEEQUAD) ? "U10__float128" : "g"; | |
35688 | } | |
35689 | ||
35690 | /* Mangle IBM extended float long double as `g' (__float128) on | |
35691 | powerpc*-linux where long-double-64 previously was the default. */ | |
35692 | if (TYPE_MAIN_VARIANT (type) == long_double_type_node | |
35693 | && TARGET_ELF | |
35694 | && TARGET_LONG_DOUBLE_128 | |
35695 | && !TARGET_IEEEQUAD) | |
35696 | return "g"; | |
35697 | ||
35698 | /* For all other types, use normal C++ mangling. */ | |
35699 | return NULL; | |
35700 | } | |
35701 | ||
35702 | /* Handle a "longcall" or "shortcall" attribute; arguments as in | |
35703 | struct attribute_spec.handler. */ | |
35704 | ||
35705 | static tree | |
35706 | rs6000_handle_longcall_attribute (tree *node, tree name, | |
35707 | tree args ATTRIBUTE_UNUSED, | |
35708 | int flags ATTRIBUTE_UNUSED, | |
35709 | bool *no_add_attrs) | |
35710 | { | |
35711 | if (TREE_CODE (*node) != FUNCTION_TYPE | |
35712 | && TREE_CODE (*node) != FIELD_DECL | |
35713 | && TREE_CODE (*node) != TYPE_DECL) | |
35714 | { | |
35715 | warning (OPT_Wattributes, "%qE attribute only applies to functions", | |
35716 | name); | |
35717 | *no_add_attrs = true; | |
35718 | } | |
35719 | ||
35720 | return NULL_TREE; | |
35721 | } | |
35722 | ||
35723 | /* Set longcall attributes on all functions declared when | |
35724 | rs6000_default_long_calls is true. */ | |
35725 | static void | |
35726 | rs6000_set_default_type_attributes (tree type) | |
35727 | { | |
35728 | if (rs6000_default_long_calls | |
35729 | && (TREE_CODE (type) == FUNCTION_TYPE | |
35730 | || TREE_CODE (type) == METHOD_TYPE)) | |
35731 | TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"), | |
35732 | NULL_TREE, | |
35733 | TYPE_ATTRIBUTES (type)); | |
35734 | ||
35735 | #if TARGET_MACHO | |
35736 | darwin_set_default_type_attributes (type); | |
35737 | #endif | |
35738 | } | |
35739 | ||
35740 | /* Return a reference suitable for calling a function with the | |
35741 | longcall attribute. */ | |
35742 | ||
35743 | rtx | |
35744 | rs6000_longcall_ref (rtx call_ref) | |
35745 | { | |
35746 | const char *call_name; | |
35747 | tree node; | |
35748 | ||
35749 | if (GET_CODE (call_ref) != SYMBOL_REF) | |
35750 | return call_ref; | |
35751 | ||
35752 | /* System V adds '.' to the internal name, so skip them. */ | |
35753 | call_name = XSTR (call_ref, 0); | |
35754 | if (*call_name == '.') | |
35755 | { | |
35756 | while (*call_name == '.') | |
35757 | call_name++; | |
35758 | ||
35759 | node = get_identifier (call_name); | |
35760 | call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node)); | |
35761 | } | |
35762 | ||
35763 | return force_reg (Pmode, call_ref); | |
35764 | } | |
35765 | \f | |
35766 | #ifndef TARGET_USE_MS_BITFIELD_LAYOUT | |
35767 | #define TARGET_USE_MS_BITFIELD_LAYOUT 0 | |
35768 | #endif | |
35769 | ||
35770 | /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in | |
35771 | struct attribute_spec.handler. */ | |
35772 | static tree | |
35773 | rs6000_handle_struct_attribute (tree *node, tree name, | |
35774 | tree args ATTRIBUTE_UNUSED, | |
35775 | int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) | |
35776 | { | |
35777 | tree *type = NULL; | |
35778 | if (DECL_P (*node)) | |
35779 | { | |
35780 | if (TREE_CODE (*node) == TYPE_DECL) | |
35781 | type = &TREE_TYPE (*node); | |
35782 | } | |
35783 | else | |
35784 | type = node; | |
35785 | ||
35786 | if (!(type && (TREE_CODE (*type) == RECORD_TYPE | |
35787 | || TREE_CODE (*type) == UNION_TYPE))) | |
35788 | { | |
35789 | warning (OPT_Wattributes, "%qE attribute ignored", name); | |
35790 | *no_add_attrs = true; | |
35791 | } | |
35792 | ||
35793 | else if ((is_attribute_p ("ms_struct", name) | |
35794 | && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type))) | |
35795 | || ((is_attribute_p ("gcc_struct", name) | |
35796 | && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type))))) | |
35797 | { | |
35798 | warning (OPT_Wattributes, "%qE incompatible attribute ignored", | |
35799 | name); | |
35800 | *no_add_attrs = true; | |
35801 | } | |
35802 | ||
35803 | return NULL_TREE; | |
35804 | } | |
35805 | ||
35806 | static bool | |
35807 | rs6000_ms_bitfield_layout_p (const_tree record_type) | |
35808 | { | |
35809 | return (TARGET_USE_MS_BITFIELD_LAYOUT && | |
35810 | !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type))) | |
35811 | || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)); | |
35812 | } | |
35813 | \f | |
35814 | #ifdef USING_ELFOS_H | |
35815 | ||
35816 | /* A get_unnamed_section callback, used for switching to toc_section. */ | |
35817 | ||
35818 | static void | |
35819 | rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED) | |
35820 | { | |
35821 | if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) | |
35822 | && TARGET_MINIMAL_TOC) | |
35823 | { | |
35824 | if (!toc_initialized) | |
35825 | { | |
35826 | fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP); | |
35827 | ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2); | |
35828 | (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0); | |
35829 | fprintf (asm_out_file, "\t.tc "); | |
35830 | ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],"); | |
35831 | ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1"); | |
35832 | fprintf (asm_out_file, "\n"); | |
35833 | ||
35834 | fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP); | |
35835 | ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2); | |
35836 | ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1"); | |
35837 | fprintf (asm_out_file, " = .+32768\n"); | |
35838 | toc_initialized = 1; | |
35839 | } | |
35840 | else | |
35841 | fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP); | |
35842 | } | |
35843 | else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) | |
35844 | { | |
35845 | fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP); | |
35846 | if (!toc_initialized) | |
35847 | { | |
35848 | ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2); | |
35849 | toc_initialized = 1; | |
35850 | } | |
35851 | } | |
35852 | else | |
35853 | { | |
35854 | fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP); | |
35855 | if (!toc_initialized) | |
35856 | { | |
35857 | ASM_OUTPUT_ALIGN (asm_out_file, TARGET_64BIT ? 3 : 2); | |
35858 | ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1"); | |
35859 | fprintf (asm_out_file, " = .+32768\n"); | |
35860 | toc_initialized = 1; | |
35861 | } | |
35862 | } | |
35863 | } | |
35864 | ||
35865 | /* Implement TARGET_ASM_INIT_SECTIONS. */ | |
35866 | ||
35867 | static void | |
35868 | rs6000_elf_asm_init_sections (void) | |
35869 | { | |
35870 | toc_section | |
35871 | = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL); | |
35872 | ||
35873 | sdata2_section | |
35874 | = get_unnamed_section (SECTION_WRITE, output_section_asm_op, | |
35875 | SDATA2_SECTION_ASM_OP); | |
35876 | } | |
35877 | ||
35878 | /* Implement TARGET_SELECT_RTX_SECTION. */ | |
35879 | ||
35880 | static section * | |
35881 | rs6000_elf_select_rtx_section (machine_mode mode, rtx x, | |
35882 | unsigned HOST_WIDE_INT align) | |
35883 | { | |
35884 | if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode)) | |
35885 | return toc_section; | |
35886 | else | |
35887 | return default_elf_select_rtx_section (mode, x, align); | |
35888 | } | |
35889 | \f | |
35890 | /* For a SYMBOL_REF, set generic flags and then perform some | |
35891 | target-specific processing. | |
35892 | ||
35893 | When the AIX ABI is requested on a non-AIX system, replace the | |
35894 | function name with the real name (with a leading .) rather than the | |
35895 | function descriptor name. This saves a lot of overriding code to | |
35896 | read the prefixes. */ | |
35897 | ||
35898 | static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED; | |
35899 | static void | |
35900 | rs6000_elf_encode_section_info (tree decl, rtx rtl, int first) | |
35901 | { | |
35902 | default_encode_section_info (decl, rtl, first); | |
35903 | ||
35904 | if (first | |
35905 | && TREE_CODE (decl) == FUNCTION_DECL | |
35906 | && !TARGET_AIX | |
35907 | && DEFAULT_ABI == ABI_AIX) | |
35908 | { | |
35909 | rtx sym_ref = XEXP (rtl, 0); | |
35910 | size_t len = strlen (XSTR (sym_ref, 0)); | |
35911 | char *str = XALLOCAVEC (char, len + 2); | |
35912 | str[0] = '.'; | |
35913 | memcpy (str + 1, XSTR (sym_ref, 0), len + 1); | |
35914 | XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1); | |
35915 | } | |
35916 | } | |
35917 | ||
35918 | static inline bool | |
35919 | compare_section_name (const char *section, const char *templ) | |
35920 | { | |
35921 | int len; | |
35922 | ||
35923 | len = strlen (templ); | |
35924 | return (strncmp (section, templ, len) == 0 | |
35925 | && (section[len] == 0 || section[len] == '.')); | |
35926 | } | |
35927 | ||
35928 | bool | |
35929 | rs6000_elf_in_small_data_p (const_tree decl) | |
35930 | { | |
35931 | if (rs6000_sdata == SDATA_NONE) | |
35932 | return false; | |
35933 | ||
35934 | /* We want to merge strings, so we never consider them small data. */ | |
35935 | if (TREE_CODE (decl) == STRING_CST) | |
35936 | return false; | |
35937 | ||
35938 | /* Functions are never in the small data area. */ | |
35939 | if (TREE_CODE (decl) == FUNCTION_DECL) | |
35940 | return false; | |
35941 | ||
35942 | if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl)) | |
35943 | { | |
35944 | const char *section = DECL_SECTION_NAME (decl); | |
35945 | if (compare_section_name (section, ".sdata") | |
35946 | || compare_section_name (section, ".sdata2") | |
35947 | || compare_section_name (section, ".gnu.linkonce.s") | |
35948 | || compare_section_name (section, ".sbss") | |
35949 | || compare_section_name (section, ".sbss2") | |
35950 | || compare_section_name (section, ".gnu.linkonce.sb") | |
35951 | || strcmp (section, ".PPC.EMB.sdata0") == 0 | |
35952 | || strcmp (section, ".PPC.EMB.sbss0") == 0) | |
35953 | return true; | |
35954 | } | |
35955 | else | |
35956 | { | |
35957 | HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl)); | |
35958 | ||
35959 | if (size > 0 | |
35960 | && size <= g_switch_value | |
35961 | /* If it's not public, and we're not going to reference it there, | |
35962 | there's no need to put it in the small data section. */ | |
35963 | && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl))) | |
35964 | return true; | |
35965 | } | |
35966 | ||
35967 | return false; | |
35968 | } | |
35969 | ||
35970 | #endif /* USING_ELFOS_H */ | |
35971 | \f | |
35972 | /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */ | |
35973 | ||
35974 | static bool | |
35975 | rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x) | |
35976 | { | |
35977 | return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode); | |
35978 | } | |
35979 | ||
35980 | /* Do not place thread-local symbols refs in the object blocks. */ | |
35981 | ||
35982 | static bool | |
35983 | rs6000_use_blocks_for_decl_p (const_tree decl) | |
35984 | { | |
35985 | return !DECL_THREAD_LOCAL_P (decl); | |
35986 | } | |
35987 | \f | |
35988 | /* Return a REG that occurs in ADDR with coefficient 1. | |
35989 | ADDR can be effectively incremented by incrementing REG. | |
35990 | ||
35991 | r0 is special and we must not select it as an address | |
35992 | register by this routine since our caller will try to | |
35993 | increment the returned register via an "la" instruction. */ | |
35994 | ||
35995 | rtx | |
35996 | find_addr_reg (rtx addr) | |
35997 | { | |
35998 | while (GET_CODE (addr) == PLUS) | |
35999 | { | |
36000 | if (GET_CODE (XEXP (addr, 0)) == REG | |
36001 | && REGNO (XEXP (addr, 0)) != 0) | |
36002 | addr = XEXP (addr, 0); | |
36003 | else if (GET_CODE (XEXP (addr, 1)) == REG | |
36004 | && REGNO (XEXP (addr, 1)) != 0) | |
36005 | addr = XEXP (addr, 1); | |
36006 | else if (CONSTANT_P (XEXP (addr, 0))) | |
36007 | addr = XEXP (addr, 1); | |
36008 | else if (CONSTANT_P (XEXP (addr, 1))) | |
36009 | addr = XEXP (addr, 0); | |
36010 | else | |
36011 | gcc_unreachable (); | |
36012 | } | |
36013 | gcc_assert (GET_CODE (addr) == REG && REGNO (addr) != 0); | |
36014 | return addr; | |
36015 | } | |
36016 | ||
36017 | void | |
36018 | rs6000_fatal_bad_address (rtx op) | |
36019 | { | |
36020 | fatal_insn ("bad address", op); | |
36021 | } | |
36022 | ||
36023 | #if TARGET_MACHO | |
36024 | ||
36025 | typedef struct branch_island_d { | |
36026 | tree function_name; | |
36027 | tree label_name; | |
36028 | int line_number; | |
36029 | } branch_island; | |
36030 | ||
36031 | ||
36032 | static vec<branch_island, va_gc> *branch_islands; | |
36033 | ||
36034 | /* Remember to generate a branch island for far calls to the given | |
36035 | function. */ | |
36036 | ||
36037 | static void | |
36038 | add_compiler_branch_island (tree label_name, tree function_name, | |
36039 | int line_number) | |
36040 | { | |
36041 | branch_island bi = {function_name, label_name, line_number}; | |
36042 | vec_safe_push (branch_islands, bi); | |
36043 | } | |
36044 | ||
36045 | /* Generate far-jump branch islands for everything recorded in | |
36046 | branch_islands. Invoked immediately after the last instruction of | |
36047 | the epilogue has been emitted; the branch islands must be appended | |
36048 | to, and contiguous with, the function body. Mach-O stubs are | |
36049 | generated in machopic_output_stub(). */ | |
36050 | ||
36051 | static void | |
36052 | macho_branch_islands (void) | |
36053 | { | |
36054 | char tmp_buf[512]; | |
36055 | ||
36056 | while (!vec_safe_is_empty (branch_islands)) | |
36057 | { | |
36058 | branch_island *bi = &branch_islands->last (); | |
36059 | const char *label = IDENTIFIER_POINTER (bi->label_name); | |
36060 | const char *name = IDENTIFIER_POINTER (bi->function_name); | |
36061 | char name_buf[512]; | |
36062 | /* Cheap copy of the details from the Darwin ASM_OUTPUT_LABELREF(). */ | |
36063 | if (name[0] == '*' || name[0] == '&') | |
36064 | strcpy (name_buf, name+1); | |
36065 | else | |
36066 | { | |
36067 | name_buf[0] = '_'; | |
36068 | strcpy (name_buf+1, name); | |
36069 | } | |
36070 | strcpy (tmp_buf, "\n"); | |
36071 | strcat (tmp_buf, label); | |
36072 | #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO) | |
36073 | if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG) | |
36074 | dbxout_stabd (N_SLINE, bi->line_number); | |
36075 | #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */ | |
36076 | if (flag_pic) | |
36077 | { | |
36078 | if (TARGET_LINK_STACK) | |
36079 | { | |
36080 | char name[32]; | |
36081 | get_ppc476_thunk_name (name); | |
36082 | strcat (tmp_buf, ":\n\tmflr r0\n\tbl "); | |
36083 | strcat (tmp_buf, name); | |
36084 | strcat (tmp_buf, "\n"); | |
36085 | strcat (tmp_buf, label); | |
36086 | strcat (tmp_buf, "_pic:\n\tmflr r11\n"); | |
36087 | } | |
36088 | else | |
36089 | { | |
36090 | strcat (tmp_buf, ":\n\tmflr r0\n\tbcl 20,31,"); | |
36091 | strcat (tmp_buf, label); | |
36092 | strcat (tmp_buf, "_pic\n"); | |
36093 | strcat (tmp_buf, label); | |
36094 | strcat (tmp_buf, "_pic:\n\tmflr r11\n"); | |
36095 | } | |
36096 | ||
36097 | strcat (tmp_buf, "\taddis r11,r11,ha16("); | |
36098 | strcat (tmp_buf, name_buf); | |
36099 | strcat (tmp_buf, " - "); | |
36100 | strcat (tmp_buf, label); | |
36101 | strcat (tmp_buf, "_pic)\n"); | |
36102 | ||
36103 | strcat (tmp_buf, "\tmtlr r0\n"); | |
36104 | ||
36105 | strcat (tmp_buf, "\taddi r12,r11,lo16("); | |
36106 | strcat (tmp_buf, name_buf); | |
36107 | strcat (tmp_buf, " - "); | |
36108 | strcat (tmp_buf, label); | |
36109 | strcat (tmp_buf, "_pic)\n"); | |
36110 | ||
36111 | strcat (tmp_buf, "\tmtctr r12\n\tbctr\n"); | |
36112 | } | |
36113 | else | |
36114 | { | |
36115 | strcat (tmp_buf, ":\nlis r12,hi16("); | |
36116 | strcat (tmp_buf, name_buf); | |
36117 | strcat (tmp_buf, ")\n\tori r12,r12,lo16("); | |
36118 | strcat (tmp_buf, name_buf); | |
36119 | strcat (tmp_buf, ")\n\tmtctr r12\n\tbctr"); | |
36120 | } | |
36121 | output_asm_insn (tmp_buf, 0); | |
36122 | #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO) | |
36123 | if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG) | |
36124 | dbxout_stabd (N_SLINE, bi->line_number); | |
36125 | #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */ | |
36126 | branch_islands->pop (); | |
36127 | } | |
36128 | } | |
36129 | ||
36130 | /* NO_PREVIOUS_DEF checks in the link list whether the function name is | |
36131 | already there or not. */ | |
36132 | ||
36133 | static int | |
36134 | no_previous_def (tree function_name) | |
36135 | { | |
36136 | branch_island *bi; | |
36137 | unsigned ix; | |
36138 | ||
36139 | FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi) | |
36140 | if (function_name == bi->function_name) | |
36141 | return 0; | |
36142 | return 1; | |
36143 | } | |
36144 | ||
36145 | /* GET_PREV_LABEL gets the label name from the previous definition of | |
36146 | the function. */ | |
36147 | ||
36148 | static tree | |
36149 | get_prev_label (tree function_name) | |
36150 | { | |
36151 | branch_island *bi; | |
36152 | unsigned ix; | |
36153 | ||
36154 | FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi) | |
36155 | if (function_name == bi->function_name) | |
36156 | return bi->label_name; | |
36157 | return NULL_TREE; | |
36158 | } | |
36159 | ||
36160 | /* INSN is either a function call or a millicode call. It may have an | |
36161 | unconditional jump in its delay slot. | |
36162 | ||
36163 | CALL_DEST is the routine we are calling. */ | |
36164 | ||
36165 | char * | |
36166 | output_call (rtx_insn *insn, rtx *operands, int dest_operand_number, | |
36167 | int cookie_operand_number) | |
36168 | { | |
36169 | static char buf[256]; | |
36170 | if (darwin_emit_branch_islands | |
36171 | && GET_CODE (operands[dest_operand_number]) == SYMBOL_REF | |
36172 | && (INTVAL (operands[cookie_operand_number]) & CALL_LONG)) | |
36173 | { | |
36174 | tree labelname; | |
36175 | tree funname = get_identifier (XSTR (operands[dest_operand_number], 0)); | |
36176 | ||
36177 | if (no_previous_def (funname)) | |
36178 | { | |
36179 | rtx label_rtx = gen_label_rtx (); | |
36180 | char *label_buf, temp_buf[256]; | |
36181 | ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L", | |
36182 | CODE_LABEL_NUMBER (label_rtx)); | |
36183 | label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf; | |
36184 | labelname = get_identifier (label_buf); | |
36185 | add_compiler_branch_island (labelname, funname, insn_line (insn)); | |
36186 | } | |
36187 | else | |
36188 | labelname = get_prev_label (funname); | |
36189 | ||
36190 | /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl' | |
36191 | instruction will reach 'foo', otherwise link as 'bl L42'". | |
36192 | "L42" should be a 'branch island', that will do a far jump to | |
36193 | 'foo'. Branch islands are generated in | |
36194 | macho_branch_islands(). */ | |
36195 | sprintf (buf, "jbsr %%z%d,%.246s", | |
36196 | dest_operand_number, IDENTIFIER_POINTER (labelname)); | |
36197 | } | |
36198 | else | |
36199 | sprintf (buf, "bl %%z%d", dest_operand_number); | |
36200 | return buf; | |
36201 | } | |
36202 | ||
36203 | /* Generate PIC and indirect symbol stubs. */ | |
36204 | ||
36205 | void | |
36206 | machopic_output_stub (FILE *file, const char *symb, const char *stub) | |
36207 | { | |
36208 | unsigned int length; | |
36209 | char *symbol_name, *lazy_ptr_name; | |
36210 | char *local_label_0; | |
36211 | static int label = 0; | |
36212 | ||
36213 | /* Lose our funky encoding stuff so it doesn't contaminate the stub. */ | |
36214 | symb = (*targetm.strip_name_encoding) (symb); | |
36215 | ||
36216 | ||
36217 | length = strlen (symb); | |
36218 | symbol_name = XALLOCAVEC (char, length + 32); | |
36219 | GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length); | |
36220 | ||
36221 | lazy_ptr_name = XALLOCAVEC (char, length + 32); | |
36222 | GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length); | |
36223 | ||
36224 | if (flag_pic == 2) | |
36225 | switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]); | |
36226 | else | |
36227 | switch_to_section (darwin_sections[machopic_symbol_stub1_section]); | |
36228 | ||
36229 | if (flag_pic == 2) | |
36230 | { | |
36231 | fprintf (file, "\t.align 5\n"); | |
36232 | ||
36233 | fprintf (file, "%s:\n", stub); | |
36234 | fprintf (file, "\t.indirect_symbol %s\n", symbol_name); | |
36235 | ||
36236 | label++; | |
36237 | local_label_0 = XALLOCAVEC (char, sizeof ("\"L00000000000$spb\"")); | |
36238 | sprintf (local_label_0, "\"L%011d$spb\"", label); | |
36239 | ||
36240 | fprintf (file, "\tmflr r0\n"); | |
36241 | if (TARGET_LINK_STACK) | |
36242 | { | |
36243 | char name[32]; | |
36244 | get_ppc476_thunk_name (name); | |
36245 | fprintf (file, "\tbl %s\n", name); | |
36246 | fprintf (file, "%s:\n\tmflr r11\n", local_label_0); | |
36247 | } | |
36248 | else | |
36249 | { | |
36250 | fprintf (file, "\tbcl 20,31,%s\n", local_label_0); | |
36251 | fprintf (file, "%s:\n\tmflr r11\n", local_label_0); | |
36252 | } | |
36253 | fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n", | |
36254 | lazy_ptr_name, local_label_0); | |
36255 | fprintf (file, "\tmtlr r0\n"); | |
36256 | fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n", | |
36257 | (TARGET_64BIT ? "ldu" : "lwzu"), | |
36258 | lazy_ptr_name, local_label_0); | |
36259 | fprintf (file, "\tmtctr r12\n"); | |
36260 | fprintf (file, "\tbctr\n"); | |
36261 | } | |
36262 | else | |
36263 | { | |
36264 | fprintf (file, "\t.align 4\n"); | |
36265 | ||
36266 | fprintf (file, "%s:\n", stub); | |
36267 | fprintf (file, "\t.indirect_symbol %s\n", symbol_name); | |
36268 | ||
36269 | fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name); | |
36270 | fprintf (file, "\t%s r12,lo16(%s)(r11)\n", | |
36271 | (TARGET_64BIT ? "ldu" : "lwzu"), | |
36272 | lazy_ptr_name); | |
36273 | fprintf (file, "\tmtctr r12\n"); | |
36274 | fprintf (file, "\tbctr\n"); | |
36275 | } | |
36276 | ||
36277 | switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]); | |
36278 | fprintf (file, "%s:\n", lazy_ptr_name); | |
36279 | fprintf (file, "\t.indirect_symbol %s\n", symbol_name); | |
36280 | fprintf (file, "%sdyld_stub_binding_helper\n", | |
36281 | (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t")); | |
36282 | } | |
36283 | ||
36284 | /* Legitimize PIC addresses. If the address is already | |
36285 | position-independent, we return ORIG. Newly generated | |
36286 | position-independent addresses go into a reg. This is REG if non | |
36287 | zero, otherwise we allocate register(s) as necessary. */ | |
36288 | ||
36289 | #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000) | |
36290 | ||
36291 | rtx | |
36292 | rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode, | |
36293 | rtx reg) | |
36294 | { | |
36295 | rtx base, offset; | |
36296 | ||
36297 | if (reg == NULL && ! reload_in_progress && ! reload_completed) | |
36298 | reg = gen_reg_rtx (Pmode); | |
36299 | ||
36300 | if (GET_CODE (orig) == CONST) | |
36301 | { | |
36302 | rtx reg_temp; | |
36303 | ||
36304 | if (GET_CODE (XEXP (orig, 0)) == PLUS | |
36305 | && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx) | |
36306 | return orig; | |
36307 | ||
36308 | gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS); | |
36309 | ||
36310 | /* Use a different reg for the intermediate value, as | |
36311 | it will be marked UNCHANGING. */ | |
36312 | reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode); | |
36313 | base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), | |
36314 | Pmode, reg_temp); | |
36315 | offset = | |
36316 | rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1), | |
36317 | Pmode, reg); | |
36318 | ||
36319 | if (GET_CODE (offset) == CONST_INT) | |
36320 | { | |
36321 | if (SMALL_INT (offset)) | |
36322 | return plus_constant (Pmode, base, INTVAL (offset)); | |
36323 | else if (! reload_in_progress && ! reload_completed) | |
36324 | offset = force_reg (Pmode, offset); | |
36325 | else | |
36326 | { | |
36327 | rtx mem = force_const_mem (Pmode, orig); | |
36328 | return machopic_legitimize_pic_address (mem, Pmode, reg); | |
36329 | } | |
36330 | } | |
36331 | return gen_rtx_PLUS (Pmode, base, offset); | |
36332 | } | |
36333 | ||
36334 | /* Fall back on generic machopic code. */ | |
36335 | return machopic_legitimize_pic_address (orig, mode, reg); | |
36336 | } | |
36337 | ||
36338 | /* Output a .machine directive for the Darwin assembler, and call | |
36339 | the generic start_file routine. */ | |
36340 | ||
36341 | static void | |
36342 | rs6000_darwin_file_start (void) | |
36343 | { | |
36344 | static const struct | |
36345 | { | |
36346 | const char *arg; | |
36347 | const char *name; | |
36348 | HOST_WIDE_INT if_set; | |
36349 | } mapping[] = { | |
36350 | { "ppc64", "ppc64", MASK_64BIT }, | |
36351 | { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 }, | |
36352 | { "power4", "ppc970", 0 }, | |
36353 | { "G5", "ppc970", 0 }, | |
36354 | { "7450", "ppc7450", 0 }, | |
36355 | { "7400", "ppc7400", MASK_ALTIVEC }, | |
36356 | { "G4", "ppc7400", 0 }, | |
36357 | { "750", "ppc750", 0 }, | |
36358 | { "740", "ppc750", 0 }, | |
36359 | { "G3", "ppc750", 0 }, | |
36360 | { "604e", "ppc604e", 0 }, | |
36361 | { "604", "ppc604", 0 }, | |
36362 | { "603e", "ppc603", 0 }, | |
36363 | { "603", "ppc603", 0 }, | |
36364 | { "601", "ppc601", 0 }, | |
36365 | { NULL, "ppc", 0 } }; | |
36366 | const char *cpu_id = ""; | |
36367 | size_t i; | |
36368 | ||
36369 | rs6000_file_start (); | |
36370 | darwin_file_start (); | |
36371 | ||
36372 | /* Determine the argument to -mcpu=. Default to G3 if not specified. */ | |
36373 | ||
36374 | if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0') | |
36375 | cpu_id = rs6000_default_cpu; | |
36376 | ||
36377 | if (global_options_set.x_rs6000_cpu_index) | |
36378 | cpu_id = processor_target_table[rs6000_cpu_index].name; | |
36379 | ||
36380 | /* Look through the mapping array. Pick the first name that either | |
36381 | matches the argument, has a bit set in IF_SET that is also set | |
36382 | in the target flags, or has a NULL name. */ | |
36383 | ||
36384 | i = 0; | |
36385 | while (mapping[i].arg != NULL | |
36386 | && strcmp (mapping[i].arg, cpu_id) != 0 | |
36387 | && (mapping[i].if_set & rs6000_isa_flags) == 0) | |
36388 | i++; | |
36389 | ||
36390 | fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name); | |
36391 | } | |
36392 | ||
36393 | #endif /* TARGET_MACHO */ | |
36394 | ||
36395 | #if TARGET_ELF | |
36396 | static int | |
36397 | rs6000_elf_reloc_rw_mask (void) | |
36398 | { | |
36399 | if (flag_pic) | |
36400 | return 3; | |
36401 | else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) | |
36402 | return 2; | |
36403 | else | |
36404 | return 0; | |
36405 | } | |
36406 | ||
36407 | /* Record an element in the table of global constructors. SYMBOL is | |
36408 | a SYMBOL_REF of the function to be called; PRIORITY is a number | |
36409 | between 0 and MAX_INIT_PRIORITY. | |
36410 | ||
36411 | This differs from default_named_section_asm_out_constructor in | |
36412 | that we have special handling for -mrelocatable. */ | |
36413 | ||
36414 | static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED; | |
36415 | static void | |
36416 | rs6000_elf_asm_out_constructor (rtx symbol, int priority) | |
36417 | { | |
36418 | const char *section = ".ctors"; | |
36419 | char buf[18]; | |
36420 | ||
36421 | if (priority != DEFAULT_INIT_PRIORITY) | |
36422 | { | |
36423 | sprintf (buf, ".ctors.%.5u", | |
36424 | /* Invert the numbering so the linker puts us in the proper | |
36425 | order; constructors are run from right to left, and the | |
36426 | linker sorts in increasing order. */ | |
36427 | MAX_INIT_PRIORITY - priority); | |
36428 | section = buf; | |
36429 | } | |
36430 | ||
36431 | switch_to_section (get_section (section, SECTION_WRITE, NULL)); | |
36432 | assemble_align (POINTER_SIZE); | |
36433 | ||
36434 | if (DEFAULT_ABI == ABI_V4 | |
36435 | && (TARGET_RELOCATABLE || flag_pic > 1)) | |
36436 | { | |
36437 | fputs ("\t.long (", asm_out_file); | |
36438 | output_addr_const (asm_out_file, symbol); | |
36439 | fputs (")@fixup\n", asm_out_file); | |
36440 | } | |
36441 | else | |
36442 | assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1); | |
36443 | } | |
36444 | ||
36445 | static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED; | |
36446 | static void | |
36447 | rs6000_elf_asm_out_destructor (rtx symbol, int priority) | |
36448 | { | |
36449 | const char *section = ".dtors"; | |
36450 | char buf[18]; | |
36451 | ||
36452 | if (priority != DEFAULT_INIT_PRIORITY) | |
36453 | { | |
36454 | sprintf (buf, ".dtors.%.5u", | |
36455 | /* Invert the numbering so the linker puts us in the proper | |
36456 | order; constructors are run from right to left, and the | |
36457 | linker sorts in increasing order. */ | |
36458 | MAX_INIT_PRIORITY - priority); | |
36459 | section = buf; | |
36460 | } | |
36461 | ||
36462 | switch_to_section (get_section (section, SECTION_WRITE, NULL)); | |
36463 | assemble_align (POINTER_SIZE); | |
36464 | ||
36465 | if (DEFAULT_ABI == ABI_V4 | |
36466 | && (TARGET_RELOCATABLE || flag_pic > 1)) | |
36467 | { | |
36468 | fputs ("\t.long (", asm_out_file); | |
36469 | output_addr_const (asm_out_file, symbol); | |
36470 | fputs (")@fixup\n", asm_out_file); | |
36471 | } | |
36472 | else | |
36473 | assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1); | |
36474 | } | |
36475 | ||
36476 | void | |
36477 | rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl) | |
36478 | { | |
36479 | if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2) | |
36480 | { | |
36481 | fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file); | |
36482 | ASM_OUTPUT_LABEL (file, name); | |
36483 | fputs (DOUBLE_INT_ASM_OP, file); | |
36484 | rs6000_output_function_entry (file, name); | |
36485 | fputs (",.TOC.@tocbase,0\n\t.previous\n", file); | |
36486 | if (DOT_SYMBOLS) | |
36487 | { | |
36488 | fputs ("\t.size\t", file); | |
36489 | assemble_name (file, name); | |
36490 | fputs (",24\n\t.type\t.", file); | |
36491 | assemble_name (file, name); | |
36492 | fputs (",@function\n", file); | |
36493 | if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl)) | |
36494 | { | |
36495 | fputs ("\t.globl\t.", file); | |
36496 | assemble_name (file, name); | |
36497 | putc ('\n', file); | |
36498 | } | |
36499 | } | |
36500 | else | |
36501 | ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function"); | |
36502 | ASM_DECLARE_RESULT (file, DECL_RESULT (decl)); | |
36503 | rs6000_output_function_entry (file, name); | |
36504 | fputs (":\n", file); | |
36505 | return; | |
36506 | } | |
36507 | ||
36508 | if (DEFAULT_ABI == ABI_V4 | |
36509 | && (TARGET_RELOCATABLE || flag_pic > 1) | |
36510 | && !TARGET_SECURE_PLT | |
36511 | && (!constant_pool_empty_p () || crtl->profile) | |
36512 | && uses_TOC ()) | |
36513 | { | |
36514 | char buf[256]; | |
36515 | ||
36516 | (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno); | |
36517 | ||
36518 | fprintf (file, "\t.long "); | |
36519 | assemble_name (file, toc_label_name); | |
36520 | need_toc_init = 1; | |
36521 | putc ('-', file); | |
36522 | ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno); | |
36523 | assemble_name (file, buf); | |
36524 | putc ('\n', file); | |
36525 | } | |
36526 | ||
36527 | ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function"); | |
36528 | ASM_DECLARE_RESULT (file, DECL_RESULT (decl)); | |
36529 | ||
36530 | if (TARGET_CMODEL == CMODEL_LARGE && rs6000_global_entry_point_needed_p ()) | |
36531 | { | |
36532 | char buf[256]; | |
36533 | ||
36534 | (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno); | |
36535 | ||
36536 | fprintf (file, "\t.quad .TOC.-"); | |
36537 | ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno); | |
36538 | assemble_name (file, buf); | |
36539 | putc ('\n', file); | |
36540 | } | |
36541 | ||
36542 | if (DEFAULT_ABI == ABI_AIX) | |
36543 | { | |
36544 | const char *desc_name, *orig_name; | |
36545 | ||
36546 | orig_name = (*targetm.strip_name_encoding) (name); | |
36547 | desc_name = orig_name; | |
36548 | while (*desc_name == '.') | |
36549 | desc_name++; | |
36550 | ||
36551 | if (TREE_PUBLIC (decl)) | |
36552 | fprintf (file, "\t.globl %s\n", desc_name); | |
36553 | ||
36554 | fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP); | |
36555 | fprintf (file, "%s:\n", desc_name); | |
36556 | fprintf (file, "\t.long %s\n", orig_name); | |
36557 | fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file); | |
36558 | fputs ("\t.long 0\n", file); | |
36559 | fprintf (file, "\t.previous\n"); | |
36560 | } | |
36561 | ASM_OUTPUT_LABEL (file, name); | |
36562 | } | |
36563 | ||
36564 | static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED; | |
36565 | static void | |
36566 | rs6000_elf_file_end (void) | |
36567 | { | |
36568 | #ifdef HAVE_AS_GNU_ATTRIBUTE | |
36569 | /* ??? The value emitted depends on options active at file end. | |
36570 | Assume anyone using #pragma or attributes that might change | |
36571 | options knows what they are doing. */ | |
36572 | if ((TARGET_64BIT || DEFAULT_ABI == ABI_V4) | |
36573 | && rs6000_passes_float) | |
36574 | { | |
36575 | int fp; | |
36576 | ||
36577 | if (TARGET_DF_FPR | TARGET_DF_SPE) | |
36578 | fp = 1; | |
36579 | else if (TARGET_SF_FPR | TARGET_SF_SPE) | |
36580 | fp = 3; | |
36581 | else | |
36582 | fp = 2; | |
36583 | if (rs6000_passes_long_double) | |
36584 | { | |
36585 | if (!TARGET_LONG_DOUBLE_128) | |
36586 | fp |= 2 * 4; | |
36587 | else if (TARGET_IEEEQUAD) | |
36588 | fp |= 3 * 4; | |
36589 | else | |
36590 | fp |= 1 * 4; | |
36591 | } | |
36592 | fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n", fp); | |
36593 | } | |
36594 | if (TARGET_32BIT && DEFAULT_ABI == ABI_V4) | |
36595 | { | |
36596 | if (rs6000_passes_vector) | |
36597 | fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n", | |
36598 | (TARGET_ALTIVEC_ABI ? 2 | |
36599 | : TARGET_SPE_ABI ? 3 | |
36600 | : 1)); | |
36601 | if (rs6000_returns_struct) | |
36602 | fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n", | |
36603 | aix_struct_return ? 2 : 1); | |
36604 | } | |
36605 | #endif | |
36606 | #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD) | |
36607 | if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2) | |
36608 | file_end_indicate_exec_stack (); | |
36609 | #endif | |
36610 | ||
36611 | if (flag_split_stack) | |
36612 | file_end_indicate_split_stack (); | |
36613 | ||
36614 | if (cpu_builtin_p) | |
36615 | { | |
36616 | /* We have expanded a CPU builtin, so we need to emit a reference to | |
36617 | the special symbol that LIBC uses to declare it supports the | |
36618 | AT_PLATFORM and AT_HWCAP/AT_HWCAP2 in the TCB feature. */ | |
36619 | switch_to_section (data_section); | |
36620 | fprintf (asm_out_file, "\t.align %u\n", TARGET_32BIT ? 2 : 3); | |
36621 | fprintf (asm_out_file, "\t%s %s\n", | |
36622 | TARGET_32BIT ? ".long" : ".quad", tcb_verification_symbol); | |
36623 | } | |
36624 | } | |
36625 | #endif | |
36626 | ||
36627 | #if TARGET_XCOFF | |
36628 | ||
36629 | #ifndef HAVE_XCOFF_DWARF_EXTRAS | |
36630 | #define HAVE_XCOFF_DWARF_EXTRAS 0 | |
36631 | #endif | |
36632 | ||
36633 | static enum unwind_info_type | |
36634 | rs6000_xcoff_debug_unwind_info (void) | |
36635 | { | |
36636 | return UI_NONE; | |
36637 | } | |
36638 | ||
36639 | static void | |
36640 | rs6000_xcoff_asm_output_anchor (rtx symbol) | |
36641 | { | |
36642 | char buffer[100]; | |
36643 | ||
36644 | sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC, | |
36645 | SYMBOL_REF_BLOCK_OFFSET (symbol)); | |
36646 | fprintf (asm_out_file, "%s", SET_ASM_OP); | |
36647 | RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0)); | |
36648 | fprintf (asm_out_file, ","); | |
36649 | RS6000_OUTPUT_BASENAME (asm_out_file, buffer); | |
36650 | fprintf (asm_out_file, "\n"); | |
36651 | } | |
36652 | ||
36653 | static void | |
36654 | rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name) | |
36655 | { | |
36656 | fputs (GLOBAL_ASM_OP, stream); | |
36657 | RS6000_OUTPUT_BASENAME (stream, name); | |
36658 | putc ('\n', stream); | |
36659 | } | |
36660 | ||
36661 | /* A get_unnamed_decl callback, used for read-only sections. PTR | |
36662 | points to the section string variable. */ | |
36663 | ||
36664 | static void | |
36665 | rs6000_xcoff_output_readonly_section_asm_op (const void *directive) | |
36666 | { | |
36667 | fprintf (asm_out_file, "\t.csect %s[RO],%s\n", | |
36668 | *(const char *const *) directive, | |
36669 | XCOFF_CSECT_DEFAULT_ALIGNMENT_STR); | |
36670 | } | |
36671 | ||
36672 | /* Likewise for read-write sections. */ | |
36673 | ||
36674 | static void | |
36675 | rs6000_xcoff_output_readwrite_section_asm_op (const void *directive) | |
36676 | { | |
36677 | fprintf (asm_out_file, "\t.csect %s[RW],%s\n", | |
36678 | *(const char *const *) directive, | |
36679 | XCOFF_CSECT_DEFAULT_ALIGNMENT_STR); | |
36680 | } | |
36681 | ||
36682 | static void | |
36683 | rs6000_xcoff_output_tls_section_asm_op (const void *directive) | |
36684 | { | |
36685 | fprintf (asm_out_file, "\t.csect %s[TL],%s\n", | |
36686 | *(const char *const *) directive, | |
36687 | XCOFF_CSECT_DEFAULT_ALIGNMENT_STR); | |
36688 | } | |
36689 | ||
36690 | /* A get_unnamed_section callback, used for switching to toc_section. */ | |
36691 | ||
36692 | static void | |
36693 | rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED) | |
36694 | { | |
36695 | if (TARGET_MINIMAL_TOC) | |
36696 | { | |
36697 | /* toc_section is always selected at least once from | |
36698 | rs6000_xcoff_file_start, so this is guaranteed to | |
36699 | always be defined once and only once in each file. */ | |
36700 | if (!toc_initialized) | |
36701 | { | |
36702 | fputs ("\t.toc\nLCTOC..1:\n", asm_out_file); | |
36703 | fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file); | |
36704 | toc_initialized = 1; | |
36705 | } | |
36706 | fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n", | |
36707 | (TARGET_32BIT ? "" : ",3")); | |
36708 | } | |
36709 | else | |
36710 | fputs ("\t.toc\n", asm_out_file); | |
36711 | } | |
36712 | ||
36713 | /* Implement TARGET_ASM_INIT_SECTIONS. */ | |
36714 | ||
36715 | static void | |
36716 | rs6000_xcoff_asm_init_sections (void) | |
36717 | { | |
36718 | read_only_data_section | |
36719 | = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op, | |
36720 | &xcoff_read_only_section_name); | |
36721 | ||
36722 | private_data_section | |
36723 | = get_unnamed_section (SECTION_WRITE, | |
36724 | rs6000_xcoff_output_readwrite_section_asm_op, | |
36725 | &xcoff_private_data_section_name); | |
36726 | ||
36727 | tls_data_section | |
36728 | = get_unnamed_section (SECTION_TLS, | |
36729 | rs6000_xcoff_output_tls_section_asm_op, | |
36730 | &xcoff_tls_data_section_name); | |
36731 | ||
36732 | tls_private_data_section | |
36733 | = get_unnamed_section (SECTION_TLS, | |
36734 | rs6000_xcoff_output_tls_section_asm_op, | |
36735 | &xcoff_private_data_section_name); | |
36736 | ||
36737 | read_only_private_data_section | |
36738 | = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op, | |
36739 | &xcoff_private_data_section_name); | |
36740 | ||
36741 | toc_section | |
36742 | = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL); | |
36743 | ||
36744 | readonly_data_section = read_only_data_section; | |
36745 | } | |
36746 | ||
36747 | static int | |
36748 | rs6000_xcoff_reloc_rw_mask (void) | |
36749 | { | |
36750 | return 3; | |
36751 | } | |
36752 | ||
36753 | static void | |
36754 | rs6000_xcoff_asm_named_section (const char *name, unsigned int flags, | |
36755 | tree decl ATTRIBUTE_UNUSED) | |
36756 | { | |
36757 | int smclass; | |
36758 | static const char * const suffix[5] = { "PR", "RO", "RW", "TL", "XO" }; | |
36759 | ||
36760 | if (flags & SECTION_EXCLUDE) | |
36761 | smclass = 4; | |
36762 | else if (flags & SECTION_DEBUG) | |
36763 | { | |
36764 | fprintf (asm_out_file, "\t.dwsect %s\n", name); | |
36765 | return; | |
36766 | } | |
36767 | else if (flags & SECTION_CODE) | |
36768 | smclass = 0; | |
36769 | else if (flags & SECTION_TLS) | |
36770 | smclass = 3; | |
36771 | else if (flags & SECTION_WRITE) | |
36772 | smclass = 2; | |
36773 | else | |
36774 | smclass = 1; | |
36775 | ||
36776 | fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n", | |
36777 | (flags & SECTION_CODE) ? "." : "", | |
36778 | name, suffix[smclass], flags & SECTION_ENTSIZE); | |
36779 | } | |
36780 | ||
36781 | #define IN_NAMED_SECTION(DECL) \ | |
36782 | ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \ | |
36783 | && DECL_SECTION_NAME (DECL) != NULL) | |
36784 | ||
36785 | static section * | |
36786 | rs6000_xcoff_select_section (tree decl, int reloc, | |
36787 | unsigned HOST_WIDE_INT align) | |
36788 | { | |
36789 | /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into | |
36790 | named section. */ | |
36791 | if (align > BIGGEST_ALIGNMENT) | |
36792 | { | |
36793 | resolve_unique_section (decl, reloc, true); | |
36794 | if (IN_NAMED_SECTION (decl)) | |
36795 | return get_named_section (decl, NULL, reloc); | |
36796 | } | |
36797 | ||
36798 | if (decl_readonly_section (decl, reloc)) | |
36799 | { | |
36800 | if (TREE_PUBLIC (decl)) | |
36801 | return read_only_data_section; | |
36802 | else | |
36803 | return read_only_private_data_section; | |
36804 | } | |
36805 | else | |
36806 | { | |
36807 | #if HAVE_AS_TLS | |
36808 | if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl)) | |
36809 | { | |
36810 | if (TREE_PUBLIC (decl)) | |
36811 | return tls_data_section; | |
36812 | else if (bss_initializer_p (decl)) | |
36813 | { | |
36814 | /* Convert to COMMON to emit in BSS. */ | |
36815 | DECL_COMMON (decl) = 1; | |
36816 | return tls_comm_section; | |
36817 | } | |
36818 | else | |
36819 | return tls_private_data_section; | |
36820 | } | |
36821 | else | |
36822 | #endif | |
36823 | if (TREE_PUBLIC (decl)) | |
36824 | return data_section; | |
36825 | else | |
36826 | return private_data_section; | |
36827 | } | |
36828 | } | |
36829 | ||
36830 | static void | |
36831 | rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED) | |
36832 | { | |
36833 | const char *name; | |
36834 | ||
36835 | /* Use select_section for private data and uninitialized data with | |
36836 | alignment <= BIGGEST_ALIGNMENT. */ | |
36837 | if (!TREE_PUBLIC (decl) | |
36838 | || DECL_COMMON (decl) | |
36839 | || (DECL_INITIAL (decl) == NULL_TREE | |
36840 | && DECL_ALIGN (decl) <= BIGGEST_ALIGNMENT) | |
36841 | || DECL_INITIAL (decl) == error_mark_node | |
36842 | || (flag_zero_initialized_in_bss | |
36843 | && initializer_zerop (DECL_INITIAL (decl)))) | |
36844 | return; | |
36845 | ||
36846 | name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)); | |
36847 | name = (*targetm.strip_name_encoding) (name); | |
36848 | set_decl_section_name (decl, name); | |
36849 | } | |
36850 | ||
36851 | /* Select section for constant in constant pool. | |
36852 | ||
36853 | On RS/6000, all constants are in the private read-only data area. | |
36854 | However, if this is being placed in the TOC it must be output as a | |
36855 | toc entry. */ | |
36856 | ||
36857 | static section * | |
36858 | rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x, | |
36859 | unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED) | |
36860 | { | |
36861 | if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode)) | |
36862 | return toc_section; | |
36863 | else | |
36864 | return read_only_private_data_section; | |
36865 | } | |
36866 | ||
36867 | /* Remove any trailing [DS] or the like from the symbol name. */ | |
36868 | ||
36869 | static const char * | |
36870 | rs6000_xcoff_strip_name_encoding (const char *name) | |
36871 | { | |
36872 | size_t len; | |
36873 | if (*name == '*') | |
36874 | name++; | |
36875 | len = strlen (name); | |
36876 | if (name[len - 1] == ']') | |
36877 | return ggc_alloc_string (name, len - 4); | |
36878 | else | |
36879 | return name; | |
36880 | } | |
36881 | ||
36882 | /* Section attributes. AIX is always PIC. */ | |
36883 | ||
36884 | static unsigned int | |
36885 | rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc) | |
36886 | { | |
36887 | unsigned int align; | |
36888 | unsigned int flags = default_section_type_flags (decl, name, reloc); | |
36889 | ||
36890 | /* Align to at least UNIT size. */ | |
36891 | if ((flags & SECTION_CODE) != 0 || !decl || !DECL_P (decl)) | |
36892 | align = MIN_UNITS_PER_WORD; | |
36893 | else | |
36894 | /* Increase alignment of large objects if not already stricter. */ | |
36895 | align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT), | |
36896 | int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD | |
36897 | ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD); | |
36898 | ||
36899 | return flags | (exact_log2 (align) & SECTION_ENTSIZE); | |
36900 | } | |
36901 | ||
36902 | /* Output at beginning of assembler file. | |
36903 | ||
36904 | Initialize the section names for the RS/6000 at this point. | |
36905 | ||
36906 | Specify filename, including full path, to assembler. | |
36907 | ||
36908 | We want to go into the TOC section so at least one .toc will be emitted. | |
36909 | Also, in order to output proper .bs/.es pairs, we need at least one static | |
36910 | [RW] section emitted. | |
36911 | ||
36912 | Finally, declare mcount when profiling to make the assembler happy. */ | |
36913 | ||
36914 | static void | |
36915 | rs6000_xcoff_file_start (void) | |
36916 | { | |
36917 | rs6000_gen_section_name (&xcoff_bss_section_name, | |
36918 | main_input_filename, ".bss_"); | |
36919 | rs6000_gen_section_name (&xcoff_private_data_section_name, | |
36920 | main_input_filename, ".rw_"); | |
36921 | rs6000_gen_section_name (&xcoff_read_only_section_name, | |
36922 | main_input_filename, ".ro_"); | |
36923 | rs6000_gen_section_name (&xcoff_tls_data_section_name, | |
36924 | main_input_filename, ".tls_"); | |
36925 | rs6000_gen_section_name (&xcoff_tbss_section_name, | |
36926 | main_input_filename, ".tbss_[UL]"); | |
36927 | ||
36928 | fputs ("\t.file\t", asm_out_file); | |
36929 | output_quoted_string (asm_out_file, main_input_filename); | |
36930 | fputc ('\n', asm_out_file); | |
36931 | if (write_symbols != NO_DEBUG) | |
36932 | switch_to_section (private_data_section); | |
36933 | switch_to_section (toc_section); | |
36934 | switch_to_section (text_section); | |
36935 | if (profile_flag) | |
36936 | fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT); | |
36937 | rs6000_file_start (); | |
36938 | } | |
36939 | ||
36940 | /* Output at end of assembler file. | |
36941 | On the RS/6000, referencing data should automatically pull in text. */ | |
36942 | ||
36943 | static void | |
36944 | rs6000_xcoff_file_end (void) | |
36945 | { | |
36946 | switch_to_section (text_section); | |
36947 | fputs ("_section_.text:\n", asm_out_file); | |
36948 | switch_to_section (data_section); | |
36949 | fputs (TARGET_32BIT | |
36950 | ? "\t.long _section_.text\n" : "\t.llong _section_.text\n", | |
36951 | asm_out_file); | |
36952 | } | |
36953 | ||
36954 | struct declare_alias_data | |
36955 | { | |
36956 | FILE *file; | |
36957 | bool function_descriptor; | |
36958 | }; | |
36959 | ||
36960 | /* Declare alias N. A helper function for for_node_and_aliases. */ | |
36961 | ||
36962 | static bool | |
36963 | rs6000_declare_alias (struct symtab_node *n, void *d) | |
36964 | { | |
36965 | struct declare_alias_data *data = (struct declare_alias_data *)d; | |
36966 | /* Main symbol is output specially, because varasm machinery does part of | |
36967 | the job for us - we do not need to declare .globl/lglobs and such. */ | |
36968 | if (!n->alias || n->weakref) | |
36969 | return false; | |
36970 | ||
36971 | if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl))) | |
36972 | return false; | |
36973 | ||
36974 | /* Prevent assemble_alias from trying to use .set pseudo operation | |
36975 | that does not behave as expected by the middle-end. */ | |
36976 | TREE_ASM_WRITTEN (n->decl) = true; | |
36977 | ||
36978 | const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl)); | |
36979 | char *buffer = (char *) alloca (strlen (name) + 2); | |
36980 | char *p; | |
36981 | int dollar_inside = 0; | |
36982 | ||
36983 | strcpy (buffer, name); | |
36984 | p = strchr (buffer, '$'); | |
36985 | while (p) { | |
36986 | *p = '_'; | |
36987 | dollar_inside++; | |
36988 | p = strchr (p + 1, '$'); | |
36989 | } | |
36990 | if (TREE_PUBLIC (n->decl)) | |
36991 | { | |
36992 | if (!RS6000_WEAK || !DECL_WEAK (n->decl)) | |
36993 | { | |
36994 | if (dollar_inside) { | |
36995 | if (data->function_descriptor) | |
36996 | fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name); | |
36997 | fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name); | |
36998 | } | |
36999 | if (data->function_descriptor) | |
37000 | { | |
37001 | fputs ("\t.globl .", data->file); | |
37002 | RS6000_OUTPUT_BASENAME (data->file, buffer); | |
37003 | putc ('\n', data->file); | |
37004 | } | |
37005 | fputs ("\t.globl ", data->file); | |
37006 | RS6000_OUTPUT_BASENAME (data->file, buffer); | |
37007 | putc ('\n', data->file); | |
37008 | } | |
37009 | #ifdef ASM_WEAKEN_DECL | |
37010 | else if (DECL_WEAK (n->decl) && !data->function_descriptor) | |
37011 | ASM_WEAKEN_DECL (data->file, n->decl, name, NULL); | |
37012 | #endif | |
37013 | } | |
37014 | else | |
37015 | { | |
37016 | if (dollar_inside) | |
37017 | { | |
37018 | if (data->function_descriptor) | |
37019 | fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name); | |
37020 | fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name); | |
37021 | } | |
37022 | if (data->function_descriptor) | |
37023 | { | |
37024 | fputs ("\t.lglobl .", data->file); | |
37025 | RS6000_OUTPUT_BASENAME (data->file, buffer); | |
37026 | putc ('\n', data->file); | |
37027 | } | |
37028 | fputs ("\t.lglobl ", data->file); | |
37029 | RS6000_OUTPUT_BASENAME (data->file, buffer); | |
37030 | putc ('\n', data->file); | |
37031 | } | |
37032 | if (data->function_descriptor) | |
37033 | fputs (".", data->file); | |
37034 | RS6000_OUTPUT_BASENAME (data->file, buffer); | |
37035 | fputs (":\n", data->file); | |
37036 | return false; | |
37037 | } | |
37038 | ||
37039 | ||
37040 | #ifdef HAVE_GAS_HIDDEN | |
37041 | /* Helper function to calculate visibility of a DECL | |
37042 | and return the value as a const string. */ | |
37043 | ||
37044 | static const char * | |
37045 | rs6000_xcoff_visibility (tree decl) | |
37046 | { | |
37047 | static const char * const visibility_types[] = { | |
37048 | "", ",protected", ",hidden", ",internal" | |
37049 | }; | |
37050 | ||
37051 | enum symbol_visibility vis = DECL_VISIBILITY (decl); | |
83349046 SB |
37052 | return visibility_types[vis]; |
37053 | } | |
37054 | #endif | |
37055 | ||
37056 | ||
37057 | /* This macro produces the initial definition of a function name. | |
37058 | On the RS/6000, we need to place an extra '.' in the function name and | |
37059 | output the function descriptor. | |
37060 | Dollar signs are converted to underscores. | |
37061 | ||
37062 | The csect for the function will have already been created when | |
37063 | text_section was selected. We do have to go back to that csect, however. | |
37064 | ||
37065 | The third and fourth parameters to the .function pseudo-op (16 and 044) | |
37066 | are placeholders which no longer have any use. | |
37067 | ||
37068 | Because AIX assembler's .set command has unexpected semantics, we output | |
37069 | all aliases as alternative labels in front of the definition. */ | |
37070 | ||
37071 | void | |
37072 | rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl) | |
37073 | { | |
37074 | char *buffer = (char *) alloca (strlen (name) + 1); | |
37075 | char *p; | |
37076 | int dollar_inside = 0; | |
37077 | struct declare_alias_data data = {file, false}; | |
37078 | ||
37079 | strcpy (buffer, name); | |
37080 | p = strchr (buffer, '$'); | |
37081 | while (p) { | |
37082 | *p = '_'; | |
37083 | dollar_inside++; | |
37084 | p = strchr (p + 1, '$'); | |
37085 | } | |
37086 | if (TREE_PUBLIC (decl)) | |
37087 | { | |
37088 | if (!RS6000_WEAK || !DECL_WEAK (decl)) | |
37089 | { | |
37090 | if (dollar_inside) { | |
37091 | fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name); | |
37092 | fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name); | |
37093 | } | |
37094 | fputs ("\t.globl .", file); | |
37095 | RS6000_OUTPUT_BASENAME (file, buffer); | |
37096 | #ifdef HAVE_GAS_HIDDEN | |
37097 | fputs (rs6000_xcoff_visibility (decl), file); | |
37098 | #endif | |
37099 | putc ('\n', file); | |
37100 | } | |
37101 | } | |
37102 | else | |
37103 | { | |
37104 | if (dollar_inside) { | |
37105 | fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name); | |
37106 | fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name); | |
37107 | } | |
37108 | fputs ("\t.lglobl .", file); | |
37109 | RS6000_OUTPUT_BASENAME (file, buffer); | |
37110 | putc ('\n', file); | |
37111 | } | |
37112 | fputs ("\t.csect ", file); | |
37113 | RS6000_OUTPUT_BASENAME (file, buffer); | |
37114 | fputs (TARGET_32BIT ? "[DS]\n" : "[DS],3\n", file); | |
37115 | RS6000_OUTPUT_BASENAME (file, buffer); | |
37116 | fputs (":\n", file); | |
37117 | symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, | |
37118 | &data, true); | |
37119 | fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file); | |
37120 | RS6000_OUTPUT_BASENAME (file, buffer); | |
37121 | fputs (", TOC[tc0], 0\n", file); | |
37122 | in_section = NULL; | |
37123 | switch_to_section (function_section (decl)); | |
37124 | putc ('.', file); | |
37125 | RS6000_OUTPUT_BASENAME (file, buffer); | |
37126 | fputs (":\n", file); | |
37127 | data.function_descriptor = true; | |
37128 | symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, | |
37129 | &data, true); | |
37130 | if (!DECL_IGNORED_P (decl)) | |
37131 | { | |
37132 | if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG) | |
37133 | xcoffout_declare_function (file, decl, buffer); | |
37134 | else if (write_symbols == DWARF2_DEBUG) | |
37135 | { | |
37136 | name = (*targetm.strip_name_encoding) (name); | |
37137 | fprintf (file, "\t.function .%s,.%s,2,0\n", name, name); | |
37138 | } | |
37139 | } | |
37140 | return; | |
37141 | } | |
37142 | ||
37143 | ||
37144 | /* Output assembly language to globalize a symbol from a DECL, | |
37145 | possibly with visibility. */ | |
37146 | ||
37147 | void | |
37148 | rs6000_xcoff_asm_globalize_decl_name (FILE *stream, tree decl) | |
37149 | { | |
37150 | const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0); | |
37151 | fputs (GLOBAL_ASM_OP, stream); | |
37152 | RS6000_OUTPUT_BASENAME (stream, name); | |
37153 | #ifdef HAVE_GAS_HIDDEN | |
37154 | fputs (rs6000_xcoff_visibility (decl), stream); | |
37155 | #endif | |
37156 | putc ('\n', stream); | |
37157 | } | |
37158 | ||
37159 | /* Output assembly language to define a symbol as COMMON from a DECL, | |
37160 | possibly with visibility. */ | |
37161 | ||
37162 | void | |
37163 | rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream, | |
37164 | tree decl ATTRIBUTE_UNUSED, | |
37165 | const char *name, | |
37166 | unsigned HOST_WIDE_INT size, | |
37167 | unsigned HOST_WIDE_INT align) | |
37168 | { | |
37169 | unsigned HOST_WIDE_INT align2 = 2; | |
37170 | ||
37171 | if (align > 32) | |
37172 | align2 = floor_log2 (align / BITS_PER_UNIT); | |
37173 | else if (size > 4) | |
37174 | align2 = 3; | |
37175 | ||
37176 | fputs (COMMON_ASM_OP, stream); | |
37177 | RS6000_OUTPUT_BASENAME (stream, name); | |
37178 | ||
37179 | fprintf (stream, | |
37180 | "," HOST_WIDE_INT_PRINT_UNSIGNED "," HOST_WIDE_INT_PRINT_UNSIGNED, | |
37181 | size, align2); | |
37182 | ||
37183 | #ifdef HAVE_GAS_HIDDEN | |
37184 | fputs (rs6000_xcoff_visibility (decl), stream); | |
37185 | #endif | |
37186 | putc ('\n', stream); | |
37187 | } | |
37188 | ||
37189 | /* This macro produces the initial definition of a object (variable) name. | |
37190 | Because AIX assembler's .set command has unexpected semantics, we output | |
37191 | all aliases as alternative labels in front of the definition. */ | |
37192 | ||
37193 | void | |
37194 | rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl) | |
37195 | { | |
37196 | struct declare_alias_data data = {file, false}; | |
37197 | RS6000_OUTPUT_BASENAME (file, name); | |
37198 | fputs (":\n", file); | |
37199 | symtab_node::get_create (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, | |
37200 | &data, true); | |
37201 | } | |
37202 | ||
37203 | /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */ | |
37204 | ||
37205 | void | |
37206 | rs6000_asm_output_dwarf_pcrel (FILE *file, int size, const char *label) | |
37207 | { | |
37208 | fputs (integer_asm_op (size, FALSE), file); | |
37209 | assemble_name (file, label); | |
37210 | fputs ("-$", file); | |
37211 | } | |
37212 | ||
37213 | /* Output a symbol offset relative to the dbase for the current object. | |
37214 | We use __gcc_unwind_dbase as an arbitrary base for dbase and assume | |
37215 | signed offsets. | |
37216 | ||
37217 | __gcc_unwind_dbase is embedded in all executables/libraries through | |
37218 | libgcc/config/rs6000/crtdbase.S. */ | |
37219 | ||
37220 | void | |
37221 | rs6000_asm_output_dwarf_datarel (FILE *file, int size, const char *label) | |
37222 | { | |
37223 | fputs (integer_asm_op (size, FALSE), file); | |
37224 | assemble_name (file, label); | |
37225 | fputs("-__gcc_unwind_dbase", file); | |
37226 | } | |
37227 | ||
37228 | #ifdef HAVE_AS_TLS | |
37229 | static void | |
37230 | rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first) | |
37231 | { | |
37232 | rtx symbol; | |
37233 | int flags; | |
37234 | const char *symname; | |
37235 | ||
37236 | default_encode_section_info (decl, rtl, first); | |
37237 | ||
37238 | /* Careful not to prod global register variables. */ | |
37239 | if (!MEM_P (rtl)) | |
37240 | return; | |
37241 | symbol = XEXP (rtl, 0); | |
37242 | if (GET_CODE (symbol) != SYMBOL_REF) | |
37243 | return; | |
37244 | ||
37245 | flags = SYMBOL_REF_FLAGS (symbol); | |
37246 | ||
37247 | if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl)) | |
37248 | flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO; | |
37249 | ||
37250 | SYMBOL_REF_FLAGS (symbol) = flags; | |
37251 | ||
37252 | /* Append mapping class to extern decls. */ | |
37253 | symname = XSTR (symbol, 0); | |
37254 | if (decl /* sync condition with assemble_external () */ | |
37255 | && DECL_P (decl) && DECL_EXTERNAL (decl) && TREE_PUBLIC (decl) | |
37256 | && ((TREE_CODE (decl) == VAR_DECL && !DECL_THREAD_LOCAL_P (decl)) | |
37257 | || TREE_CODE (decl) == FUNCTION_DECL) | |
37258 | && symname[strlen (symname) - 1] != ']') | |
37259 | { | |
37260 | char *newname = (char *) alloca (strlen (symname) + 5); | |
37261 | strcpy (newname, symname); | |
37262 | strcat (newname, (TREE_CODE (decl) == FUNCTION_DECL | |
37263 | ? "[DS]" : "[UA]")); | |
37264 | XSTR (symbol, 0) = ggc_strdup (newname); | |
37265 | } | |
37266 | } | |
37267 | #endif /* HAVE_AS_TLS */ | |
37268 | #endif /* TARGET_XCOFF */ | |
37269 | ||
37270 | void | |
37271 | rs6000_asm_weaken_decl (FILE *stream, tree decl, | |
37272 | const char *name, const char *val) | |
37273 | { | |
37274 | fputs ("\t.weak\t", stream); | |
37275 | RS6000_OUTPUT_BASENAME (stream, name); | |
37276 | if (decl && TREE_CODE (decl) == FUNCTION_DECL | |
37277 | && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS) | |
37278 | { | |
37279 | if (TARGET_XCOFF) | |
37280 | fputs ("[DS]", stream); | |
37281 | #if TARGET_XCOFF && HAVE_GAS_HIDDEN | |
37282 | if (TARGET_XCOFF) | |
37283 | fputs (rs6000_xcoff_visibility (decl), stream); | |
37284 | #endif | |
37285 | fputs ("\n\t.weak\t.", stream); | |
37286 | RS6000_OUTPUT_BASENAME (stream, name); | |
37287 | } | |
37288 | #if TARGET_XCOFF && HAVE_GAS_HIDDEN | |
37289 | if (TARGET_XCOFF) | |
37290 | fputs (rs6000_xcoff_visibility (decl), stream); | |
37291 | #endif | |
37292 | fputc ('\n', stream); | |
37293 | if (val) | |
37294 | { | |
37295 | #ifdef ASM_OUTPUT_DEF | |
37296 | ASM_OUTPUT_DEF (stream, name, val); | |
37297 | #endif | |
37298 | if (decl && TREE_CODE (decl) == FUNCTION_DECL | |
37299 | && DEFAULT_ABI == ABI_AIX && DOT_SYMBOLS) | |
37300 | { | |
37301 | fputs ("\t.set\t.", stream); | |
37302 | RS6000_OUTPUT_BASENAME (stream, name); | |
37303 | fputs (",.", stream); | |
37304 | RS6000_OUTPUT_BASENAME (stream, val); | |
37305 | fputc ('\n', stream); | |
37306 | } | |
37307 | } | |
37308 | } | |
37309 | ||
37310 | ||
37311 | /* Return true if INSN should not be copied. */ | |
37312 | ||
37313 | static bool | |
37314 | rs6000_cannot_copy_insn_p (rtx_insn *insn) | |
37315 | { | |
37316 | return recog_memoized (insn) >= 0 | |
37317 | && get_attr_cannot_copy (insn); | |
37318 | } | |
37319 | ||
37320 | /* Compute a (partial) cost for rtx X. Return true if the complete | |
37321 | cost has been computed, and false if subexpressions should be | |
37322 | scanned. In either case, *TOTAL contains the cost result. */ | |
37323 | ||
37324 | static bool | |
37325 | rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code, | |
37326 | int opno ATTRIBUTE_UNUSED, int *total, bool speed) | |
37327 | { | |
37328 | int code = GET_CODE (x); | |
37329 | ||
37330 | switch (code) | |
37331 | { | |
37332 | /* On the RS/6000, if it is valid in the insn, it is free. */ | |
37333 | case CONST_INT: | |
37334 | if (((outer_code == SET | |
37335 | || outer_code == PLUS | |
37336 | || outer_code == MINUS) | |
37337 | && (satisfies_constraint_I (x) | |
37338 | || satisfies_constraint_L (x))) | |
37339 | || (outer_code == AND | |
37340 | && (satisfies_constraint_K (x) | |
37341 | || (mode == SImode | |
37342 | ? satisfies_constraint_L (x) | |
37343 | : satisfies_constraint_J (x)))) | |
37344 | || ((outer_code == IOR || outer_code == XOR) | |
37345 | && (satisfies_constraint_K (x) | |
37346 | || (mode == SImode | |
37347 | ? satisfies_constraint_L (x) | |
37348 | : satisfies_constraint_J (x)))) | |
37349 | || outer_code == ASHIFT | |
37350 | || outer_code == ASHIFTRT | |
37351 | || outer_code == LSHIFTRT | |
37352 | || outer_code == ROTATE | |
37353 | || outer_code == ROTATERT | |
37354 | || outer_code == ZERO_EXTRACT | |
37355 | || (outer_code == MULT | |
37356 | && satisfies_constraint_I (x)) | |
37357 | || ((outer_code == DIV || outer_code == UDIV | |
37358 | || outer_code == MOD || outer_code == UMOD) | |
37359 | && exact_log2 (INTVAL (x)) >= 0) | |
37360 | || (outer_code == COMPARE | |
37361 | && (satisfies_constraint_I (x) | |
37362 | || satisfies_constraint_K (x))) | |
37363 | || ((outer_code == EQ || outer_code == NE) | |
37364 | && (satisfies_constraint_I (x) | |
37365 | || satisfies_constraint_K (x) | |
37366 | || (mode == SImode | |
37367 | ? satisfies_constraint_L (x) | |
37368 | : satisfies_constraint_J (x)))) | |
37369 | || (outer_code == GTU | |
37370 | && satisfies_constraint_I (x)) | |
37371 | || (outer_code == LTU | |
37372 | && satisfies_constraint_P (x))) | |
37373 | { | |
37374 | *total = 0; | |
37375 | return true; | |
37376 | } | |
37377 | else if ((outer_code == PLUS | |
37378 | && reg_or_add_cint_operand (x, VOIDmode)) | |
37379 | || (outer_code == MINUS | |
37380 | && reg_or_sub_cint_operand (x, VOIDmode)) | |
37381 | || ((outer_code == SET | |
37382 | || outer_code == IOR | |
37383 | || outer_code == XOR) | |
37384 | && (INTVAL (x) | |
37385 | & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0)) | |
37386 | { | |
37387 | *total = COSTS_N_INSNS (1); | |
37388 | return true; | |
37389 | } | |
37390 | /* FALLTHRU */ | |
37391 | ||
37392 | case CONST_DOUBLE: | |
37393 | case CONST_WIDE_INT: | |
37394 | case CONST: | |
37395 | case HIGH: | |
37396 | case SYMBOL_REF: | |
37397 | *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2); | |
37398 | return true; | |
37399 | ||
37400 | case MEM: | |
37401 | /* When optimizing for size, MEM should be slightly more expensive | |
37402 | than generating address, e.g., (plus (reg) (const)). | |
37403 | L1 cache latency is about two instructions. */ | |
37404 | *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2); | |
e0bd6c9f | 37405 | if (rs6000_slow_unaligned_access (mode, MEM_ALIGN (x))) |
83349046 SB |
37406 | *total += COSTS_N_INSNS (100); |
37407 | return true; | |
37408 | ||
37409 | case LABEL_REF: | |
37410 | *total = 0; | |
37411 | return true; | |
37412 | ||
37413 | case PLUS: | |
37414 | case MINUS: | |
37415 | if (FLOAT_MODE_P (mode)) | |
37416 | *total = rs6000_cost->fp; | |
37417 | else | |
37418 | *total = COSTS_N_INSNS (1); | |
37419 | return false; | |
37420 | ||
37421 | case MULT: | |
37422 | if (GET_CODE (XEXP (x, 1)) == CONST_INT | |
37423 | && satisfies_constraint_I (XEXP (x, 1))) | |
37424 | { | |
37425 | if (INTVAL (XEXP (x, 1)) >= -256 | |
37426 | && INTVAL (XEXP (x, 1)) <= 255) | |
37427 | *total = rs6000_cost->mulsi_const9; | |
37428 | else | |
37429 | *total = rs6000_cost->mulsi_const; | |
37430 | } | |
37431 | else if (mode == SFmode) | |
37432 | *total = rs6000_cost->fp; | |
37433 | else if (FLOAT_MODE_P (mode)) | |
37434 | *total = rs6000_cost->dmul; | |
37435 | else if (mode == DImode) | |
37436 | *total = rs6000_cost->muldi; | |
37437 | else | |
37438 | *total = rs6000_cost->mulsi; | |
37439 | return false; | |
37440 | ||
37441 | case FMA: | |
37442 | if (mode == SFmode) | |
37443 | *total = rs6000_cost->fp; | |
37444 | else | |
37445 | *total = rs6000_cost->dmul; | |
37446 | break; | |
37447 | ||
37448 | case DIV: | |
37449 | case MOD: | |
37450 | if (FLOAT_MODE_P (mode)) | |
37451 | { | |
37452 | *total = mode == DFmode ? rs6000_cost->ddiv | |
37453 | : rs6000_cost->sdiv; | |
37454 | return false; | |
37455 | } | |
37456 | /* FALLTHRU */ | |
37457 | ||
37458 | case UDIV: | |
37459 | case UMOD: | |
37460 | if (GET_CODE (XEXP (x, 1)) == CONST_INT | |
37461 | && exact_log2 (INTVAL (XEXP (x, 1))) >= 0) | |
37462 | { | |
37463 | if (code == DIV || code == MOD) | |
37464 | /* Shift, addze */ | |
37465 | *total = COSTS_N_INSNS (2); | |
37466 | else | |
37467 | /* Shift */ | |
37468 | *total = COSTS_N_INSNS (1); | |
37469 | } | |
37470 | else | |
37471 | { | |
37472 | if (GET_MODE (XEXP (x, 1)) == DImode) | |
37473 | *total = rs6000_cost->divdi; | |
37474 | else | |
37475 | *total = rs6000_cost->divsi; | |
37476 | } | |
37477 | /* Add in shift and subtract for MOD unless we have a mod instruction. */ | |
37478 | if (!TARGET_MODULO && (code == MOD || code == UMOD)) | |
37479 | *total += COSTS_N_INSNS (2); | |
37480 | return false; | |
37481 | ||
37482 | case CTZ: | |
37483 | *total = COSTS_N_INSNS (TARGET_CTZ ? 1 : 4); | |
37484 | return false; | |
37485 | ||
37486 | case FFS: | |
37487 | *total = COSTS_N_INSNS (4); | |
37488 | return false; | |
37489 | ||
37490 | case POPCOUNT: | |
37491 | *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6); | |
37492 | return false; | |
37493 | ||
37494 | case PARITY: | |
37495 | *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6); | |
37496 | return false; | |
37497 | ||
37498 | case NOT: | |
37499 | if (outer_code == AND || outer_code == IOR || outer_code == XOR) | |
37500 | *total = 0; | |
37501 | else | |
37502 | *total = COSTS_N_INSNS (1); | |
37503 | return false; | |
37504 | ||
37505 | case AND: | |
37506 | if (CONST_INT_P (XEXP (x, 1))) | |
37507 | { | |
37508 | rtx left = XEXP (x, 0); | |
37509 | rtx_code left_code = GET_CODE (left); | |
37510 | ||
37511 | /* rotate-and-mask: 1 insn. */ | |
37512 | if ((left_code == ROTATE | |
37513 | || left_code == ASHIFT | |
37514 | || left_code == LSHIFTRT) | |
37515 | && rs6000_is_valid_shift_mask (XEXP (x, 1), left, mode)) | |
37516 | { | |
37517 | *total = rtx_cost (XEXP (left, 0), mode, left_code, 0, speed); | |
37518 | if (!CONST_INT_P (XEXP (left, 1))) | |
37519 | *total += rtx_cost (XEXP (left, 1), SImode, left_code, 1, speed); | |
37520 | *total += COSTS_N_INSNS (1); | |
37521 | return true; | |
37522 | } | |
37523 | ||
37524 | /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */ | |
37525 | HOST_WIDE_INT val = INTVAL (XEXP (x, 1)); | |
37526 | if (rs6000_is_valid_and_mask (XEXP (x, 1), mode) | |
37527 | || (val & 0xffff) == val | |
37528 | || (val & 0xffff0000) == val | |
37529 | || ((val & 0xffff) == 0 && mode == SImode)) | |
37530 | { | |
37531 | *total = rtx_cost (left, mode, AND, 0, speed); | |
37532 | *total += COSTS_N_INSNS (1); | |
37533 | return true; | |
37534 | } | |
37535 | ||
37536 | /* 2 insns. */ | |
37537 | if (rs6000_is_valid_2insn_and (XEXP (x, 1), mode)) | |
37538 | { | |
37539 | *total = rtx_cost (left, mode, AND, 0, speed); | |
37540 | *total += COSTS_N_INSNS (2); | |
37541 | return true; | |
37542 | } | |
37543 | } | |
37544 | ||
37545 | *total = COSTS_N_INSNS (1); | |
37546 | return false; | |
37547 | ||
37548 | case IOR: | |
37549 | /* FIXME */ | |
37550 | *total = COSTS_N_INSNS (1); | |
37551 | return true; | |
37552 | ||
37553 | case CLZ: | |
37554 | case XOR: | |
37555 | case ZERO_EXTRACT: | |
37556 | *total = COSTS_N_INSNS (1); | |
37557 | return false; | |
37558 | ||
37559 | case ASHIFT: | |
37560 | /* The EXTSWSLI instruction is a combined instruction. Don't count both | |
37561 | the sign extend and shift separately within the insn. */ | |
37562 | if (TARGET_EXTSWSLI && mode == DImode | |
37563 | && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND | |
37564 | && GET_MODE (XEXP (XEXP (x, 0), 0)) == SImode) | |
37565 | { | |
37566 | *total = 0; | |
37567 | return false; | |
37568 | } | |
37569 | /* fall through */ | |
37570 | ||
37571 | case ASHIFTRT: | |
37572 | case LSHIFTRT: | |
37573 | case ROTATE: | |
37574 | case ROTATERT: | |
37575 | /* Handle mul_highpart. */ | |
37576 | if (outer_code == TRUNCATE | |
37577 | && GET_CODE (XEXP (x, 0)) == MULT) | |
37578 | { | |
37579 | if (mode == DImode) | |
37580 | *total = rs6000_cost->muldi; | |
37581 | else | |
37582 | *total = rs6000_cost->mulsi; | |
37583 | return true; | |
37584 | } | |
37585 | else if (outer_code == AND) | |
37586 | *total = 0; | |
37587 | else | |
37588 | *total = COSTS_N_INSNS (1); | |
37589 | return false; | |
37590 | ||
37591 | case SIGN_EXTEND: | |
37592 | case ZERO_EXTEND: | |
37593 | if (GET_CODE (XEXP (x, 0)) == MEM) | |
37594 | *total = 0; | |
37595 | else | |
37596 | *total = COSTS_N_INSNS (1); | |
37597 | return false; | |
37598 | ||
37599 | case COMPARE: | |
37600 | case NEG: | |
37601 | case ABS: | |
37602 | if (!FLOAT_MODE_P (mode)) | |
37603 | { | |
37604 | *total = COSTS_N_INSNS (1); | |
37605 | return false; | |
37606 | } | |
37607 | /* FALLTHRU */ | |
37608 | ||
37609 | case FLOAT: | |
37610 | case UNSIGNED_FLOAT: | |
37611 | case FIX: | |
37612 | case UNSIGNED_FIX: | |
37613 | case FLOAT_TRUNCATE: | |
37614 | *total = rs6000_cost->fp; | |
37615 | return false; | |
37616 | ||
37617 | case FLOAT_EXTEND: | |
37618 | if (mode == DFmode) | |
37619 | *total = rs6000_cost->sfdf_convert; | |
37620 | else | |
37621 | *total = rs6000_cost->fp; | |
37622 | return false; | |
37623 | ||
37624 | case UNSPEC: | |
37625 | switch (XINT (x, 1)) | |
37626 | { | |
37627 | case UNSPEC_FRSP: | |
37628 | *total = rs6000_cost->fp; | |
37629 | return true; | |
37630 | ||
37631 | default: | |
37632 | break; | |
37633 | } | |
37634 | break; | |
37635 | ||
37636 | case CALL: | |
37637 | case IF_THEN_ELSE: | |
37638 | if (!speed) | |
37639 | { | |
37640 | *total = COSTS_N_INSNS (1); | |
37641 | return true; | |
37642 | } | |
37643 | else if (FLOAT_MODE_P (mode) | |
37644 | && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS) | |
37645 | { | |
37646 | *total = rs6000_cost->fp; | |
37647 | return false; | |
37648 | } | |
37649 | break; | |
37650 | ||
37651 | case NE: | |
37652 | case EQ: | |
37653 | case GTU: | |
37654 | case LTU: | |
37655 | /* Carry bit requires mode == Pmode. | |
37656 | NEG or PLUS already counted so only add one. */ | |
37657 | if (mode == Pmode | |
37658 | && (outer_code == NEG || outer_code == PLUS)) | |
37659 | { | |
37660 | *total = COSTS_N_INSNS (1); | |
37661 | return true; | |
37662 | } | |
37663 | if (outer_code == SET) | |
37664 | { | |
37665 | if (XEXP (x, 1) == const0_rtx) | |
37666 | { | |
37667 | if (TARGET_ISEL && !TARGET_MFCRF) | |
37668 | *total = COSTS_N_INSNS (8); | |
37669 | else | |
37670 | *total = COSTS_N_INSNS (2); | |
37671 | return true; | |
37672 | } | |
37673 | else | |
37674 | { | |
37675 | *total = COSTS_N_INSNS (3); | |
37676 | return false; | |
37677 | } | |
37678 | } | |
37679 | /* FALLTHRU */ | |
37680 | ||
37681 | case GT: | |
37682 | case LT: | |
37683 | case UNORDERED: | |
37684 | if (outer_code == SET && (XEXP (x, 1) == const0_rtx)) | |
37685 | { | |
37686 | if (TARGET_ISEL && !TARGET_MFCRF) | |
37687 | *total = COSTS_N_INSNS (8); | |
37688 | else | |
37689 | *total = COSTS_N_INSNS (2); | |
37690 | return true; | |
37691 | } | |
37692 | /* CC COMPARE. */ | |
37693 | if (outer_code == COMPARE) | |
37694 | { | |
37695 | *total = 0; | |
37696 | return true; | |
37697 | } | |
37698 | break; | |
37699 | ||
37700 | default: | |
37701 | break; | |
37702 | } | |
37703 | ||
37704 | return false; | |
37705 | } | |
37706 | ||
37707 | /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */ | |
37708 | ||
37709 | static bool | |
37710 | rs6000_debug_rtx_costs (rtx x, machine_mode mode, int outer_code, | |
37711 | int opno, int *total, bool speed) | |
37712 | { | |
37713 | bool ret = rs6000_rtx_costs (x, mode, outer_code, opno, total, speed); | |
37714 | ||
37715 | fprintf (stderr, | |
37716 | "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, " | |
37717 | "opno = %d, total = %d, speed = %s, x:\n", | |
37718 | ret ? "complete" : "scan inner", | |
37719 | GET_MODE_NAME (mode), | |
37720 | GET_RTX_NAME (outer_code), | |
37721 | opno, | |
37722 | *total, | |
37723 | speed ? "true" : "false"); | |
37724 | ||
37725 | debug_rtx (x); | |
37726 | ||
37727 | return ret; | |
37728 | } | |
37729 | ||
37730 | /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */ | |
37731 | ||
37732 | static int | |
37733 | rs6000_debug_address_cost (rtx x, machine_mode mode, | |
37734 | addr_space_t as, bool speed) | |
37735 | { | |
37736 | int ret = TARGET_ADDRESS_COST (x, mode, as, speed); | |
37737 | ||
37738 | fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n", | |
37739 | ret, speed ? "true" : "false"); | |
37740 | debug_rtx (x); | |
37741 | ||
37742 | return ret; | |
37743 | } | |
37744 | ||
37745 | ||
37746 | /* A C expression returning the cost of moving data from a register of class | |
37747 | CLASS1 to one of CLASS2. */ | |
37748 | ||
37749 | static int | |
37750 | rs6000_register_move_cost (machine_mode mode, | |
37751 | reg_class_t from, reg_class_t to) | |
37752 | { | |
37753 | int ret; | |
37754 | ||
37755 | if (TARGET_DEBUG_COST) | |
37756 | dbg_cost_ctrl++; | |
37757 | ||
37758 | /* Moves from/to GENERAL_REGS. */ | |
37759 | if (reg_classes_intersect_p (to, GENERAL_REGS) | |
37760 | || reg_classes_intersect_p (from, GENERAL_REGS)) | |
37761 | { | |
37762 | reg_class_t rclass = from; | |
37763 | ||
37764 | if (! reg_classes_intersect_p (to, GENERAL_REGS)) | |
37765 | rclass = to; | |
37766 | ||
37767 | if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS) | |
37768 | ret = (rs6000_memory_move_cost (mode, rclass, false) | |
37769 | + rs6000_memory_move_cost (mode, GENERAL_REGS, false)); | |
37770 | ||
37771 | /* It's more expensive to move CR_REGS than CR0_REGS because of the | |
37772 | shift. */ | |
37773 | else if (rclass == CR_REGS) | |
37774 | ret = 4; | |
37775 | ||
37776 | /* For those processors that have slow LR/CTR moves, make them more | |
37777 | expensive than memory in order to bias spills to memory .*/ | |
37778 | else if ((rs6000_cpu == PROCESSOR_POWER6 | |
37779 | || rs6000_cpu == PROCESSOR_POWER7 | |
37780 | || rs6000_cpu == PROCESSOR_POWER8 | |
37781 | || rs6000_cpu == PROCESSOR_POWER9) | |
37782 | && reg_classes_intersect_p (rclass, LINK_OR_CTR_REGS)) | |
ad474626 | 37783 | ret = 6 * hard_regno_nregs (0, mode); |
83349046 SB |
37784 | |
37785 | else | |
37786 | /* A move will cost one instruction per GPR moved. */ | |
ad474626 | 37787 | ret = 2 * hard_regno_nregs (0, mode); |
83349046 SB |
37788 | } |
37789 | ||
37790 | /* If we have VSX, we can easily move between FPR or Altivec registers. */ | |
37791 | else if (VECTOR_MEM_VSX_P (mode) | |
37792 | && reg_classes_intersect_p (to, VSX_REGS) | |
37793 | && reg_classes_intersect_p (from, VSX_REGS)) | |
ad474626 | 37794 | ret = 2 * hard_regno_nregs (FIRST_FPR_REGNO, mode); |
83349046 SB |
37795 | |
37796 | /* Moving between two similar registers is just one instruction. */ | |
37797 | else if (reg_classes_intersect_p (to, from)) | |
37798 | ret = (FLOAT128_2REG_P (mode)) ? 4 : 2; | |
37799 | ||
37800 | /* Everything else has to go through GENERAL_REGS. */ | |
37801 | else | |
37802 | ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to) | |
37803 | + rs6000_register_move_cost (mode, from, GENERAL_REGS)); | |
37804 | ||
37805 | if (TARGET_DEBUG_COST) | |
37806 | { | |
37807 | if (dbg_cost_ctrl == 1) | |
37808 | fprintf (stderr, | |
37809 | "rs6000_register_move_cost:, ret=%d, mode=%s, from=%s, to=%s\n", | |
37810 | ret, GET_MODE_NAME (mode), reg_class_names[from], | |
37811 | reg_class_names[to]); | |
37812 | dbg_cost_ctrl--; | |
37813 | } | |
37814 | ||
37815 | return ret; | |
37816 | } | |
37817 | ||
37818 | /* A C expressions returning the cost of moving data of MODE from a register to | |
37819 | or from memory. */ | |
37820 | ||
37821 | static int | |
37822 | rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass, | |
37823 | bool in ATTRIBUTE_UNUSED) | |
37824 | { | |
37825 | int ret; | |
37826 | ||
37827 | if (TARGET_DEBUG_COST) | |
37828 | dbg_cost_ctrl++; | |
37829 | ||
37830 | if (reg_classes_intersect_p (rclass, GENERAL_REGS)) | |
ad474626 | 37831 | ret = 4 * hard_regno_nregs (0, mode); |
83349046 SB |
37832 | else if ((reg_classes_intersect_p (rclass, FLOAT_REGS) |
37833 | || reg_classes_intersect_p (rclass, VSX_REGS))) | |
ad474626 | 37834 | ret = 4 * hard_regno_nregs (32, mode); |
83349046 | 37835 | else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS)) |
ad474626 | 37836 | ret = 4 * hard_regno_nregs (FIRST_ALTIVEC_REGNO, mode); |
83349046 SB |
37837 | else |
37838 | ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS); | |
37839 | ||
37840 | if (TARGET_DEBUG_COST) | |
37841 | { | |
37842 | if (dbg_cost_ctrl == 1) | |
37843 | fprintf (stderr, | |
37844 | "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n", | |
37845 | ret, GET_MODE_NAME (mode), reg_class_names[rclass], in); | |
37846 | dbg_cost_ctrl--; | |
37847 | } | |
37848 | ||
37849 | return ret; | |
37850 | } | |
37851 | ||
37852 | /* Returns a code for a target-specific builtin that implements | |
37853 | reciprocal of the function, or NULL_TREE if not available. */ | |
37854 | ||
37855 | static tree | |
37856 | rs6000_builtin_reciprocal (tree fndecl) | |
37857 | { | |
37858 | switch (DECL_FUNCTION_CODE (fndecl)) | |
37859 | { | |
37860 | case VSX_BUILTIN_XVSQRTDP: | |
37861 | if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode)) | |
37862 | return NULL_TREE; | |
37863 | ||
37864 | return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF]; | |
37865 | ||
37866 | case VSX_BUILTIN_XVSQRTSP: | |
37867 | if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode)) | |
37868 | return NULL_TREE; | |
37869 | ||
37870 | return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF]; | |
37871 | ||
37872 | default: | |
37873 | return NULL_TREE; | |
37874 | } | |
37875 | } | |
37876 | ||
37877 | /* Load up a constant. If the mode is a vector mode, splat the value across | |
37878 | all of the vector elements. */ | |
37879 | ||
37880 | static rtx | |
37881 | rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst) | |
37882 | { | |
37883 | rtx reg; | |
37884 | ||
37885 | if (mode == SFmode || mode == DFmode) | |
37886 | { | |
37887 | rtx d = const_double_from_real_value (dconst, mode); | |
37888 | reg = force_reg (mode, d); | |
37889 | } | |
37890 | else if (mode == V4SFmode) | |
37891 | { | |
37892 | rtx d = const_double_from_real_value (dconst, SFmode); | |
37893 | rtvec v = gen_rtvec (4, d, d, d, d); | |
37894 | reg = gen_reg_rtx (mode); | |
37895 | rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v)); | |
37896 | } | |
37897 | else if (mode == V2DFmode) | |
37898 | { | |
37899 | rtx d = const_double_from_real_value (dconst, DFmode); | |
37900 | rtvec v = gen_rtvec (2, d, d); | |
37901 | reg = gen_reg_rtx (mode); | |
37902 | rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v)); | |
37903 | } | |
37904 | else | |
37905 | gcc_unreachable (); | |
37906 | ||
37907 | return reg; | |
37908 | } | |
37909 | ||
37910 | /* Generate an FMA instruction. */ | |
37911 | ||
37912 | static void | |
37913 | rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a) | |
37914 | { | |
37915 | machine_mode mode = GET_MODE (target); | |
37916 | rtx dst; | |
37917 | ||
37918 | dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0); | |
37919 | gcc_assert (dst != NULL); | |
37920 | ||
37921 | if (dst != target) | |
37922 | emit_move_insn (target, dst); | |
37923 | } | |
37924 | ||
37925 | /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */ | |
37926 | ||
37927 | static void | |
37928 | rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a) | |
37929 | { | |
37930 | machine_mode mode = GET_MODE (dst); | |
37931 | rtx r; | |
37932 | ||
37933 | /* This is a tad more complicated, since the fnma_optab is for | |
37934 | a different expression: fma(-m1, m2, a), which is the same | |
37935 | thing except in the case of signed zeros. | |
37936 | ||
37937 | Fortunately we know that if FMA is supported that FNMSUB is | |
37938 | also supported in the ISA. Just expand it directly. */ | |
37939 | ||
37940 | gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing); | |
37941 | ||
37942 | r = gen_rtx_NEG (mode, a); | |
37943 | r = gen_rtx_FMA (mode, m1, m2, r); | |
37944 | r = gen_rtx_NEG (mode, r); | |
37945 | emit_insn (gen_rtx_SET (dst, r)); | |
37946 | } | |
37947 | ||
37948 | /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P, | |
37949 | add a reg_note saying that this was a division. Support both scalar and | |
37950 | vector divide. Assumes no trapping math and finite arguments. */ | |
37951 | ||
37952 | void | |
37953 | rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p) | |
37954 | { | |
37955 | machine_mode mode = GET_MODE (dst); | |
37956 | rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v; | |
37957 | int i; | |
37958 | ||
37959 | /* Low precision estimates guarantee 5 bits of accuracy. High | |
37960 | precision estimates guarantee 14 bits of accuracy. SFmode | |
37961 | requires 23 bits of accuracy. DFmode requires 52 bits of | |
37962 | accuracy. Each pass at least doubles the accuracy, leading | |
37963 | to the following. */ | |
37964 | int passes = (TARGET_RECIP_PRECISION) ? 1 : 3; | |
37965 | if (mode == DFmode || mode == V2DFmode) | |
37966 | passes++; | |
37967 | ||
37968 | enum insn_code code = optab_handler (smul_optab, mode); | |
37969 | insn_gen_fn gen_mul = GEN_FCN (code); | |
37970 | ||
37971 | gcc_assert (code != CODE_FOR_nothing); | |
37972 | ||
37973 | one = rs6000_load_constant_and_splat (mode, dconst1); | |
37974 | ||
37975 | /* x0 = 1./d estimate */ | |
37976 | x0 = gen_reg_rtx (mode); | |
37977 | emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d), | |
37978 | UNSPEC_FRES))); | |
37979 | ||
37980 | /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */ | |
37981 | if (passes > 1) { | |
37982 | ||
37983 | /* e0 = 1. - d * x0 */ | |
37984 | e0 = gen_reg_rtx (mode); | |
37985 | rs6000_emit_nmsub (e0, d, x0, one); | |
37986 | ||
37987 | /* x1 = x0 + e0 * x0 */ | |
37988 | x1 = gen_reg_rtx (mode); | |
37989 | rs6000_emit_madd (x1, e0, x0, x0); | |
37990 | ||
37991 | for (i = 0, xprev = x1, eprev = e0; i < passes - 2; | |
37992 | ++i, xprev = xnext, eprev = enext) { | |
37993 | ||
37994 | /* enext = eprev * eprev */ | |
37995 | enext = gen_reg_rtx (mode); | |
37996 | emit_insn (gen_mul (enext, eprev, eprev)); | |
37997 | ||
37998 | /* xnext = xprev + enext * xprev */ | |
37999 | xnext = gen_reg_rtx (mode); | |
38000 | rs6000_emit_madd (xnext, enext, xprev, xprev); | |
38001 | } | |
38002 | ||
38003 | } else | |
38004 | xprev = x0; | |
38005 | ||
38006 | /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */ | |
38007 | ||
38008 | /* u = n * xprev */ | |
38009 | u = gen_reg_rtx (mode); | |
38010 | emit_insn (gen_mul (u, n, xprev)); | |
38011 | ||
38012 | /* v = n - (d * u) */ | |
38013 | v = gen_reg_rtx (mode); | |
38014 | rs6000_emit_nmsub (v, d, u, n); | |
38015 | ||
38016 | /* dst = (v * xprev) + u */ | |
38017 | rs6000_emit_madd (dst, v, xprev, u); | |
38018 | ||
38019 | if (note_p) | |
38020 | add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d)); | |
38021 | } | |
38022 | ||
38023 | /* Goldschmidt's Algorithm for single/double-precision floating point | |
38024 | sqrt and rsqrt. Assumes no trapping math and finite arguments. */ | |
38025 | ||
38026 | void | |
38027 | rs6000_emit_swsqrt (rtx dst, rtx src, bool recip) | |
38028 | { | |
38029 | machine_mode mode = GET_MODE (src); | |
38030 | rtx e = gen_reg_rtx (mode); | |
38031 | rtx g = gen_reg_rtx (mode); | |
38032 | rtx h = gen_reg_rtx (mode); | |
38033 | ||
38034 | /* Low precision estimates guarantee 5 bits of accuracy. High | |
38035 | precision estimates guarantee 14 bits of accuracy. SFmode | |
38036 | requires 23 bits of accuracy. DFmode requires 52 bits of | |
38037 | accuracy. Each pass at least doubles the accuracy, leading | |
38038 | to the following. */ | |
38039 | int passes = (TARGET_RECIP_PRECISION) ? 1 : 3; | |
38040 | if (mode == DFmode || mode == V2DFmode) | |
38041 | passes++; | |
38042 | ||
38043 | int i; | |
38044 | rtx mhalf; | |
38045 | enum insn_code code = optab_handler (smul_optab, mode); | |
38046 | insn_gen_fn gen_mul = GEN_FCN (code); | |
38047 | ||
38048 | gcc_assert (code != CODE_FOR_nothing); | |
38049 | ||
38050 | mhalf = rs6000_load_constant_and_splat (mode, dconsthalf); | |
38051 | ||
38052 | /* e = rsqrt estimate */ | |
38053 | emit_insn (gen_rtx_SET (e, gen_rtx_UNSPEC (mode, gen_rtvec (1, src), | |
38054 | UNSPEC_RSQRT))); | |
38055 | ||
38056 | /* If (src == 0.0) filter infinity to prevent NaN for sqrt(0.0). */ | |
38057 | if (!recip) | |
38058 | { | |
38059 | rtx zero = force_reg (mode, CONST0_RTX (mode)); | |
38060 | ||
38061 | if (mode == SFmode) | |
38062 | { | |
38063 | rtx target = emit_conditional_move (e, GT, src, zero, mode, | |
38064 | e, zero, mode, 0); | |
38065 | if (target != e) | |
38066 | emit_move_insn (e, target); | |
38067 | } | |
38068 | else | |
38069 | { | |
38070 | rtx cond = gen_rtx_GT (VOIDmode, e, zero); | |
38071 | rs6000_emit_vector_cond_expr (e, e, zero, cond, src, zero); | |
38072 | } | |
38073 | } | |
38074 | ||
38075 | /* g = sqrt estimate. */ | |
38076 | emit_insn (gen_mul (g, e, src)); | |
38077 | /* h = 1/(2*sqrt) estimate. */ | |
38078 | emit_insn (gen_mul (h, e, mhalf)); | |
38079 | ||
38080 | if (recip) | |
38081 | { | |
38082 | if (passes == 1) | |
38083 | { | |
38084 | rtx t = gen_reg_rtx (mode); | |
38085 | rs6000_emit_nmsub (t, g, h, mhalf); | |
38086 | /* Apply correction directly to 1/rsqrt estimate. */ | |
38087 | rs6000_emit_madd (dst, e, t, e); | |
38088 | } | |
38089 | else | |
38090 | { | |
38091 | for (i = 0; i < passes; i++) | |
38092 | { | |
38093 | rtx t1 = gen_reg_rtx (mode); | |
38094 | rtx g1 = gen_reg_rtx (mode); | |
38095 | rtx h1 = gen_reg_rtx (mode); | |
38096 | ||
38097 | rs6000_emit_nmsub (t1, g, h, mhalf); | |
38098 | rs6000_emit_madd (g1, g, t1, g); | |
38099 | rs6000_emit_madd (h1, h, t1, h); | |
38100 | ||
38101 | g = g1; | |
38102 | h = h1; | |
38103 | } | |
38104 | /* Multiply by 2 for 1/rsqrt. */ | |
38105 | emit_insn (gen_add3_insn (dst, h, h)); | |
38106 | } | |
38107 | } | |
38108 | else | |
38109 | { | |
38110 | rtx t = gen_reg_rtx (mode); | |
38111 | rs6000_emit_nmsub (t, g, h, mhalf); | |
38112 | rs6000_emit_madd (dst, g, t, g); | |
38113 | } | |
38114 | ||
38115 | return; | |
38116 | } | |
38117 | ||
38118 | /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD | |
38119 | (Power7) targets. DST is the target, and SRC is the argument operand. */ | |
38120 | ||
38121 | void | |
38122 | rs6000_emit_popcount (rtx dst, rtx src) | |
38123 | { | |
38124 | machine_mode mode = GET_MODE (dst); | |
38125 | rtx tmp1, tmp2; | |
38126 | ||
38127 | /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */ | |
38128 | if (TARGET_POPCNTD) | |
38129 | { | |
38130 | if (mode == SImode) | |
38131 | emit_insn (gen_popcntdsi2 (dst, src)); | |
38132 | else | |
38133 | emit_insn (gen_popcntddi2 (dst, src)); | |
38134 | return; | |
38135 | } | |
38136 | ||
38137 | tmp1 = gen_reg_rtx (mode); | |
38138 | ||
38139 | if (mode == SImode) | |
38140 | { | |
38141 | emit_insn (gen_popcntbsi2 (tmp1, src)); | |
38142 | tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101), | |
38143 | NULL_RTX, 0); | |
38144 | tmp2 = force_reg (SImode, tmp2); | |
38145 | emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24))); | |
38146 | } | |
38147 | else | |
38148 | { | |
38149 | emit_insn (gen_popcntbdi2 (tmp1, src)); | |
38150 | tmp2 = expand_mult (DImode, tmp1, | |
38151 | GEN_INT ((HOST_WIDE_INT) | |
38152 | 0x01010101 << 32 | 0x01010101), | |
38153 | NULL_RTX, 0); | |
38154 | tmp2 = force_reg (DImode, tmp2); | |
38155 | emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56))); | |
38156 | } | |
38157 | } | |
38158 | ||
38159 | ||
38160 | /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the | |
38161 | target, and SRC is the argument operand. */ | |
38162 | ||
38163 | void | |
38164 | rs6000_emit_parity (rtx dst, rtx src) | |
38165 | { | |
38166 | machine_mode mode = GET_MODE (dst); | |
38167 | rtx tmp; | |
38168 | ||
38169 | tmp = gen_reg_rtx (mode); | |
38170 | ||
38171 | /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */ | |
38172 | if (TARGET_CMPB) | |
38173 | { | |
38174 | if (mode == SImode) | |
38175 | { | |
38176 | emit_insn (gen_popcntbsi2 (tmp, src)); | |
38177 | emit_insn (gen_paritysi2_cmpb (dst, tmp)); | |
38178 | } | |
38179 | else | |
38180 | { | |
38181 | emit_insn (gen_popcntbdi2 (tmp, src)); | |
38182 | emit_insn (gen_paritydi2_cmpb (dst, tmp)); | |
38183 | } | |
38184 | return; | |
38185 | } | |
38186 | ||
38187 | if (mode == SImode) | |
38188 | { | |
38189 | /* Is mult+shift >= shift+xor+shift+xor? */ | |
38190 | if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3)) | |
38191 | { | |
38192 | rtx tmp1, tmp2, tmp3, tmp4; | |
38193 | ||
38194 | tmp1 = gen_reg_rtx (SImode); | |
38195 | emit_insn (gen_popcntbsi2 (tmp1, src)); | |
38196 | ||
38197 | tmp2 = gen_reg_rtx (SImode); | |
38198 | emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16))); | |
38199 | tmp3 = gen_reg_rtx (SImode); | |
38200 | emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2)); | |
38201 | ||
38202 | tmp4 = gen_reg_rtx (SImode); | |
38203 | emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8))); | |
38204 | emit_insn (gen_xorsi3 (tmp, tmp3, tmp4)); | |
38205 | } | |
38206 | else | |
38207 | rs6000_emit_popcount (tmp, src); | |
38208 | emit_insn (gen_andsi3 (dst, tmp, const1_rtx)); | |
38209 | } | |
38210 | else | |
38211 | { | |
38212 | /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */ | |
38213 | if (rs6000_cost->muldi >= COSTS_N_INSNS (5)) | |
38214 | { | |
38215 | rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6; | |
38216 | ||
38217 | tmp1 = gen_reg_rtx (DImode); | |
38218 | emit_insn (gen_popcntbdi2 (tmp1, src)); | |
38219 | ||
38220 | tmp2 = gen_reg_rtx (DImode); | |
38221 | emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32))); | |
38222 | tmp3 = gen_reg_rtx (DImode); | |
38223 | emit_insn (gen_xordi3 (tmp3, tmp1, tmp2)); | |
38224 | ||
38225 | tmp4 = gen_reg_rtx (DImode); | |
38226 | emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16))); | |
38227 | tmp5 = gen_reg_rtx (DImode); | |
38228 | emit_insn (gen_xordi3 (tmp5, tmp3, tmp4)); | |
38229 | ||
38230 | tmp6 = gen_reg_rtx (DImode); | |
38231 | emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8))); | |
38232 | emit_insn (gen_xordi3 (tmp, tmp5, tmp6)); | |
38233 | } | |
38234 | else | |
38235 | rs6000_emit_popcount (tmp, src); | |
38236 | emit_insn (gen_anddi3 (dst, tmp, const1_rtx)); | |
38237 | } | |
38238 | } | |
38239 | ||
38240 | /* Expand an Altivec constant permutation for little endian mode. | |
f151c9e1 RS |
38241 | OP0 and OP1 are the input vectors and TARGET is the output vector. |
38242 | SEL specifies the constant permutation vector. | |
38243 | ||
83349046 SB |
38244 | There are two issues: First, the two input operands must be |
38245 | swapped so that together they form a double-wide array in LE | |
38246 | order. Second, the vperm instruction has surprising behavior | |
38247 | in LE mode: it interprets the elements of the source vectors | |
38248 | in BE mode ("left to right") and interprets the elements of | |
38249 | the destination vector in LE mode ("right to left"). To | |
38250 | correct for this, we must subtract each element of the permute | |
38251 | control vector from 31. | |
38252 | ||
38253 | For example, suppose we want to concatenate vr10 = {0, 1, 2, 3} | |
38254 | with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm. | |
38255 | We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to | |
38256 | serve as the permute control vector. Then, in BE mode, | |
38257 | ||
38258 | vperm 9,10,11,12 | |
38259 | ||
38260 | places the desired result in vr9. However, in LE mode the | |
38261 | vector contents will be | |
38262 | ||
38263 | vr10 = 00000003 00000002 00000001 00000000 | |
38264 | vr11 = 00000007 00000006 00000005 00000004 | |
38265 | ||
38266 | The result of the vperm using the same permute control vector is | |
38267 | ||
38268 | vr9 = 05000000 07000000 01000000 03000000 | |
38269 | ||
38270 | That is, the leftmost 4 bytes of vr10 are interpreted as the | |
38271 | source for the rightmost 4 bytes of vr9, and so on. | |
38272 | ||
38273 | If we change the permute control vector to | |
38274 | ||
38275 | vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4} | |
38276 | ||
38277 | and issue | |
38278 | ||
38279 | vperm 9,11,10,12 | |
38280 | ||
38281 | we get the desired | |
38282 | ||
38283 | vr9 = 00000006 00000004 00000002 00000000. */ | |
38284 | ||
f151c9e1 RS |
38285 | static void |
38286 | altivec_expand_vec_perm_const_le (rtx target, rtx op0, rtx op1, | |
38287 | const vec_perm_indices &sel) | |
83349046 SB |
38288 | { |
38289 | unsigned int i; | |
38290 | rtx perm[16]; | |
38291 | rtx constv, unspec; | |
83349046 SB |
38292 | |
38293 | /* Unpack and adjust the constant selector. */ | |
38294 | for (i = 0; i < 16; ++i) | |
38295 | { | |
f151c9e1 | 38296 | unsigned int elt = 31 - (sel[i] & 31); |
83349046 SB |
38297 | perm[i] = GEN_INT (elt); |
38298 | } | |
38299 | ||
38300 | /* Expand to a permute, swapping the inputs and using the | |
38301 | adjusted selector. */ | |
38302 | if (!REG_P (op0)) | |
38303 | op0 = force_reg (V16QImode, op0); | |
38304 | if (!REG_P (op1)) | |
38305 | op1 = force_reg (V16QImode, op1); | |
38306 | ||
38307 | constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm)); | |
38308 | constv = force_reg (V16QImode, constv); | |
38309 | unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv), | |
38310 | UNSPEC_VPERM); | |
38311 | if (!REG_P (target)) | |
38312 | { | |
38313 | rtx tmp = gen_reg_rtx (V16QImode); | |
38314 | emit_move_insn (tmp, unspec); | |
38315 | unspec = tmp; | |
38316 | } | |
38317 | ||
38318 | emit_move_insn (target, unspec); | |
38319 | } | |
38320 | ||
38321 | /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the | |
38322 | permute control vector. But here it's not a constant, so we must | |
38323 | generate a vector NAND or NOR to do the adjustment. */ | |
38324 | ||
38325 | void | |
38326 | altivec_expand_vec_perm_le (rtx operands[4]) | |
38327 | { | |
38328 | rtx notx, iorx, unspec; | |
38329 | rtx target = operands[0]; | |
38330 | rtx op0 = operands[1]; | |
38331 | rtx op1 = operands[2]; | |
38332 | rtx sel = operands[3]; | |
38333 | rtx tmp = target; | |
38334 | rtx norreg = gen_reg_rtx (V16QImode); | |
38335 | machine_mode mode = GET_MODE (target); | |
38336 | ||
38337 | /* Get everything in regs so the pattern matches. */ | |
38338 | if (!REG_P (op0)) | |
38339 | op0 = force_reg (mode, op0); | |
38340 | if (!REG_P (op1)) | |
38341 | op1 = force_reg (mode, op1); | |
38342 | if (!REG_P (sel)) | |
38343 | sel = force_reg (V16QImode, sel); | |
38344 | if (!REG_P (target)) | |
38345 | tmp = gen_reg_rtx (mode); | |
38346 | ||
38347 | if (TARGET_P9_VECTOR) | |
38348 | { | |
38349 | unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op0, op1, sel), | |
38350 | UNSPEC_VPERMR); | |
38351 | } | |
38352 | else | |
38353 | { | |
38354 | /* Invert the selector with a VNAND if available, else a VNOR. | |
38355 | The VNAND is preferred for future fusion opportunities. */ | |
38356 | notx = gen_rtx_NOT (V16QImode, sel); | |
38357 | iorx = (TARGET_P8_VECTOR | |
38358 | ? gen_rtx_IOR (V16QImode, notx, notx) | |
38359 | : gen_rtx_AND (V16QImode, notx, notx)); | |
38360 | emit_insn (gen_rtx_SET (norreg, iorx)); | |
38361 | ||
38362 | /* Permute with operands reversed and adjusted selector. */ | |
38363 | unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg), | |
38364 | UNSPEC_VPERM); | |
38365 | } | |
38366 | ||
38367 | /* Copy into target, possibly by way of a register. */ | |
38368 | if (!REG_P (target)) | |
38369 | { | |
38370 | emit_move_insn (tmp, unspec); | |
38371 | unspec = tmp; | |
38372 | } | |
38373 | ||
38374 | emit_move_insn (target, unspec); | |
38375 | } | |
38376 | ||
38377 | /* Expand an Altivec constant permutation. Return true if we match | |
f151c9e1 | 38378 | an efficient implementation; false to fall back to VPERM. |
83349046 | 38379 | |
f151c9e1 RS |
38380 | OP0 and OP1 are the input vectors and TARGET is the output vector. |
38381 | SEL specifies the constant permutation vector. */ | |
38382 | ||
38383 | static bool | |
38384 | altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1, | |
38385 | const vec_perm_indices &sel) | |
83349046 SB |
38386 | { |
38387 | struct altivec_perm_insn { | |
38388 | HOST_WIDE_INT mask; | |
38389 | enum insn_code impl; | |
38390 | unsigned char perm[16]; | |
38391 | }; | |
38392 | static const struct altivec_perm_insn patterns[] = { | |
38393 | { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum_direct, | |
38394 | { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } }, | |
38395 | { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum_direct, | |
38396 | { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } }, | |
38397 | { OPTION_MASK_ALTIVEC, | |
38398 | (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct | |
38399 | : CODE_FOR_altivec_vmrglb_direct), | |
38400 | { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } }, | |
38401 | { OPTION_MASK_ALTIVEC, | |
38402 | (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct | |
38403 | : CODE_FOR_altivec_vmrglh_direct), | |
38404 | { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } }, | |
38405 | { OPTION_MASK_ALTIVEC, | |
38406 | (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct | |
38407 | : CODE_FOR_altivec_vmrglw_direct), | |
38408 | { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } }, | |
38409 | { OPTION_MASK_ALTIVEC, | |
38410 | (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct | |
38411 | : CODE_FOR_altivec_vmrghb_direct), | |
38412 | { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } }, | |
38413 | { OPTION_MASK_ALTIVEC, | |
38414 | (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct | |
38415 | : CODE_FOR_altivec_vmrghh_direct), | |
38416 | { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } }, | |
38417 | { OPTION_MASK_ALTIVEC, | |
38418 | (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct | |
38419 | : CODE_FOR_altivec_vmrghw_direct), | |
38420 | { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } }, | |
38421 | { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgew, | |
38422 | { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } }, | |
38423 | { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgow, | |
38424 | { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } } | |
38425 | }; | |
38426 | ||
38427 | unsigned int i, j, elt, which; | |
38428 | unsigned char perm[16]; | |
f151c9e1 | 38429 | rtx x; |
83349046 SB |
38430 | bool one_vec; |
38431 | ||
83349046 SB |
38432 | /* Unpack the constant selector. */ |
38433 | for (i = which = 0; i < 16; ++i) | |
38434 | { | |
f151c9e1 | 38435 | elt = sel[i] & 31; |
83349046 SB |
38436 | which |= (elt < 16 ? 1 : 2); |
38437 | perm[i] = elt; | |
38438 | } | |
38439 | ||
38440 | /* Simplify the constant selector based on operands. */ | |
38441 | switch (which) | |
38442 | { | |
38443 | default: | |
38444 | gcc_unreachable (); | |
38445 | ||
38446 | case 3: | |
38447 | one_vec = false; | |
38448 | if (!rtx_equal_p (op0, op1)) | |
38449 | break; | |
38450 | /* FALLTHRU */ | |
38451 | ||
38452 | case 2: | |
38453 | for (i = 0; i < 16; ++i) | |
38454 | perm[i] &= 15; | |
38455 | op0 = op1; | |
38456 | one_vec = true; | |
38457 | break; | |
38458 | ||
38459 | case 1: | |
38460 | op1 = op0; | |
38461 | one_vec = true; | |
38462 | break; | |
38463 | } | |
38464 | ||
38465 | /* Look for splat patterns. */ | |
38466 | if (one_vec) | |
38467 | { | |
38468 | elt = perm[0]; | |
38469 | ||
38470 | for (i = 0; i < 16; ++i) | |
38471 | if (perm[i] != elt) | |
38472 | break; | |
38473 | if (i == 16) | |
38474 | { | |
38475 | if (!BYTES_BIG_ENDIAN) | |
38476 | elt = 15 - elt; | |
38477 | emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt))); | |
38478 | return true; | |
38479 | } | |
38480 | ||
38481 | if (elt % 2 == 0) | |
38482 | { | |
38483 | for (i = 0; i < 16; i += 2) | |
38484 | if (perm[i] != elt || perm[i + 1] != elt + 1) | |
38485 | break; | |
38486 | if (i == 16) | |
38487 | { | |
38488 | int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2; | |
38489 | x = gen_reg_rtx (V8HImode); | |
38490 | emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0), | |
38491 | GEN_INT (field))); | |
38492 | emit_move_insn (target, gen_lowpart (V16QImode, x)); | |
38493 | return true; | |
38494 | } | |
38495 | } | |
38496 | ||
38497 | if (elt % 4 == 0) | |
38498 | { | |
38499 | for (i = 0; i < 16; i += 4) | |
38500 | if (perm[i] != elt | |
38501 | || perm[i + 1] != elt + 1 | |
38502 | || perm[i + 2] != elt + 2 | |
38503 | || perm[i + 3] != elt + 3) | |
38504 | break; | |
38505 | if (i == 16) | |
38506 | { | |
38507 | int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4; | |
38508 | x = gen_reg_rtx (V4SImode); | |
38509 | emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0), | |
38510 | GEN_INT (field))); | |
38511 | emit_move_insn (target, gen_lowpart (V16QImode, x)); | |
38512 | return true; | |
38513 | } | |
38514 | } | |
38515 | } | |
38516 | ||
38517 | /* Look for merge and pack patterns. */ | |
38518 | for (j = 0; j < ARRAY_SIZE (patterns); ++j) | |
38519 | { | |
38520 | bool swapped; | |
38521 | ||
38522 | if ((patterns[j].mask & rs6000_isa_flags) == 0) | |
38523 | continue; | |
38524 | ||
38525 | elt = patterns[j].perm[0]; | |
38526 | if (perm[0] == elt) | |
38527 | swapped = false; | |
38528 | else if (perm[0] == elt + 16) | |
38529 | swapped = true; | |
38530 | else | |
38531 | continue; | |
38532 | for (i = 1; i < 16; ++i) | |
38533 | { | |
38534 | elt = patterns[j].perm[i]; | |
38535 | if (swapped) | |
38536 | elt = (elt >= 16 ? elt - 16 : elt + 16); | |
38537 | else if (one_vec && elt >= 16) | |
38538 | elt -= 16; | |
38539 | if (perm[i] != elt) | |
38540 | break; | |
38541 | } | |
38542 | if (i == 16) | |
38543 | { | |
38544 | enum insn_code icode = patterns[j].impl; | |
38545 | machine_mode omode = insn_data[icode].operand[0].mode; | |
38546 | machine_mode imode = insn_data[icode].operand[1].mode; | |
38547 | ||
38548 | /* For little-endian, don't use vpkuwum and vpkuhum if the | |
38549 | underlying vector type is not V4SI and V8HI, respectively. | |
38550 | For example, using vpkuwum with a V8HI picks up the even | |
38551 | halfwords (BE numbering) when the even halfwords (LE | |
38552 | numbering) are what we need. */ | |
38553 | if (!BYTES_BIG_ENDIAN | |
38554 | && icode == CODE_FOR_altivec_vpkuwum_direct | |
38555 | && ((GET_CODE (op0) == REG | |
38556 | && GET_MODE (op0) != V4SImode) | |
38557 | || (GET_CODE (op0) == SUBREG | |
38558 | && GET_MODE (XEXP (op0, 0)) != V4SImode))) | |
38559 | continue; | |
38560 | if (!BYTES_BIG_ENDIAN | |
38561 | && icode == CODE_FOR_altivec_vpkuhum_direct | |
38562 | && ((GET_CODE (op0) == REG | |
38563 | && GET_MODE (op0) != V8HImode) | |
38564 | || (GET_CODE (op0) == SUBREG | |
38565 | && GET_MODE (XEXP (op0, 0)) != V8HImode))) | |
38566 | continue; | |
38567 | ||
38568 | /* For little-endian, the two input operands must be swapped | |
38569 | (or swapped back) to ensure proper right-to-left numbering | |
38570 | from 0 to 2N-1. */ | |
38571 | if (swapped ^ !BYTES_BIG_ENDIAN) | |
38572 | std::swap (op0, op1); | |
38573 | if (imode != V16QImode) | |
38574 | { | |
38575 | op0 = gen_lowpart (imode, op0); | |
38576 | op1 = gen_lowpart (imode, op1); | |
38577 | } | |
38578 | if (omode == V16QImode) | |
38579 | x = target; | |
38580 | else | |
38581 | x = gen_reg_rtx (omode); | |
38582 | emit_insn (GEN_FCN (icode) (x, op0, op1)); | |
38583 | if (omode != V16QImode) | |
38584 | emit_move_insn (target, gen_lowpart (V16QImode, x)); | |
38585 | return true; | |
38586 | } | |
38587 | } | |
38588 | ||
38589 | if (!BYTES_BIG_ENDIAN) | |
38590 | { | |
f151c9e1 | 38591 | altivec_expand_vec_perm_const_le (target, op0, op1, sel); |
83349046 SB |
38592 | return true; |
38593 | } | |
38594 | ||
38595 | return false; | |
38596 | } | |
38597 | ||
38598 | /* Expand a Paired Single, VSX Permute Doubleword, or SPE constant permutation. | |
38599 | Return true if we match an efficient implementation. */ | |
38600 | ||
38601 | static bool | |
38602 | rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1, | |
38603 | unsigned char perm0, unsigned char perm1) | |
38604 | { | |
38605 | rtx x; | |
38606 | ||
38607 | /* If both selectors come from the same operand, fold to single op. */ | |
38608 | if ((perm0 & 2) == (perm1 & 2)) | |
38609 | { | |
38610 | if (perm0 & 2) | |
38611 | op0 = op1; | |
38612 | else | |
38613 | op1 = op0; | |
38614 | } | |
38615 | /* If both operands are equal, fold to simpler permutation. */ | |
38616 | if (rtx_equal_p (op0, op1)) | |
38617 | { | |
38618 | perm0 = perm0 & 1; | |
38619 | perm1 = (perm1 & 1) + 2; | |
38620 | } | |
38621 | /* If the first selector comes from the second operand, swap. */ | |
38622 | else if (perm0 & 2) | |
38623 | { | |
38624 | if (perm1 & 2) | |
38625 | return false; | |
38626 | perm0 -= 2; | |
38627 | perm1 += 2; | |
38628 | std::swap (op0, op1); | |
38629 | } | |
38630 | /* If the second selector does not come from the second operand, fail. */ | |
38631 | else if ((perm1 & 2) == 0) | |
38632 | return false; | |
38633 | ||
38634 | /* Success! */ | |
38635 | if (target != NULL) | |
38636 | { | |
38637 | machine_mode vmode, dmode; | |
38638 | rtvec v; | |
38639 | ||
38640 | vmode = GET_MODE (target); | |
38641 | gcc_assert (GET_MODE_NUNITS (vmode) == 2); | |
9da15d40 | 38642 | dmode = mode_for_vector (GET_MODE_INNER (vmode), 4).require (); |
83349046 SB |
38643 | x = gen_rtx_VEC_CONCAT (dmode, op0, op1); |
38644 | v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1)); | |
38645 | x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v)); | |
38646 | emit_insn (gen_rtx_SET (target, x)); | |
38647 | } | |
38648 | return true; | |
38649 | } | |
38650 | ||
f151c9e1 | 38651 | /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */ |
83349046 SB |
38652 | |
38653 | static bool | |
f151c9e1 RS |
38654 | rs6000_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, |
38655 | rtx op1, const vec_perm_indices &sel) | |
83349046 | 38656 | { |
f151c9e1 RS |
38657 | bool testing_p = !target; |
38658 | ||
83349046 | 38659 | /* AltiVec (and thus VSX) can handle arbitrary permutations. */ |
f151c9e1 | 38660 | if (TARGET_ALTIVEC && testing_p) |
83349046 SB |
38661 | return true; |
38662 | ||
f151c9e1 RS |
38663 | /* Check for ps_merge*, evmerge* or xxperm* insns. */ |
38664 | if ((vmode == V2SFmode && TARGET_PAIRED_FLOAT) | |
38665 | || (vmode == V2SImode && TARGET_SPE) | |
38666 | || ((vmode == V2DFmode || vmode == V2DImode) | |
38667 | && VECTOR_MEM_VSX_P (vmode))) | |
38668 | { | |
38669 | if (testing_p) | |
38670 | { | |
38671 | op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1); | |
38672 | op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2); | |
38673 | } | |
38674 | if (rs6000_expand_vec_perm_const_1 (target, op0, op1, sel[0], sel[1])) | |
38675 | return true; | |
38676 | } | |
38677 | ||
38678 | if (TARGET_ALTIVEC) | |
83349046 | 38679 | { |
f151c9e1 RS |
38680 | /* Force the target-independent code to lower to V16QImode. */ |
38681 | if (vmode != V16QImode) | |
38682 | return false; | |
38683 | if (altivec_expand_vec_perm_const (target, op0, op1, sel)) | |
38684 | return true; | |
83349046 SB |
38685 | } |
38686 | ||
38687 | return false; | |
38688 | } | |
38689 | ||
f151c9e1 RS |
38690 | /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave. |
38691 | OP0 and OP1 are the input vectors and TARGET is the output vector. | |
38692 | PERM specifies the constant permutation vector. */ | |
83349046 SB |
38693 | |
38694 | static void | |
38695 | rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1, | |
f151c9e1 | 38696 | machine_mode vmode, const vec_perm_builder &perm) |
83349046 | 38697 | { |
f151c9e1 | 38698 | rtx x = expand_vec_perm_const (vmode, op0, op1, perm, BLKmode, target); |
83349046 SB |
38699 | if (x != target) |
38700 | emit_move_insn (target, x); | |
38701 | } | |
38702 | ||
38703 | /* Expand an extract even operation. */ | |
38704 | ||
38705 | void | |
38706 | rs6000_expand_extract_even (rtx target, rtx op0, rtx op1) | |
38707 | { | |
38708 | machine_mode vmode = GET_MODE (target); | |
38709 | unsigned i, nelt = GET_MODE_NUNITS (vmode); | |
e3342de4 | 38710 | vec_perm_builder perm (nelt, nelt, 1); |
83349046 SB |
38711 | |
38712 | for (i = 0; i < nelt; i++) | |
f151c9e1 | 38713 | perm.quick_push (i * 2); |
83349046 | 38714 | |
f151c9e1 | 38715 | rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm); |
83349046 SB |
38716 | } |
38717 | ||
38718 | /* Expand a vector interleave operation. */ | |
38719 | ||
38720 | void | |
38721 | rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp) | |
38722 | { | |
38723 | machine_mode vmode = GET_MODE (target); | |
38724 | unsigned i, high, nelt = GET_MODE_NUNITS (vmode); | |
e3342de4 | 38725 | vec_perm_builder perm (nelt, nelt, 1); |
83349046 SB |
38726 | |
38727 | high = (highp ? 0 : nelt / 2); | |
38728 | for (i = 0; i < nelt / 2; i++) | |
38729 | { | |
f151c9e1 RS |
38730 | perm.quick_push (i + high); |
38731 | perm.quick_push (i + nelt + high); | |
83349046 SB |
38732 | } |
38733 | ||
f151c9e1 | 38734 | rs6000_do_expand_vec_perm (target, op0, op1, vmode, perm); |
83349046 SB |
38735 | } |
38736 | ||
38737 | /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */ | |
38738 | void | |
38739 | rs6000_scale_v2df (rtx tgt, rtx src, int scale) | |
38740 | { | |
38741 | HOST_WIDE_INT hwi_scale (scale); | |
38742 | REAL_VALUE_TYPE r_pow; | |
38743 | rtvec v = rtvec_alloc (2); | |
38744 | rtx elt; | |
38745 | rtx scale_vec = gen_reg_rtx (V2DFmode); | |
38746 | (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale); | |
38747 | elt = const_double_from_real_value (r_pow, DFmode); | |
38748 | RTVEC_ELT (v, 0) = elt; | |
38749 | RTVEC_ELT (v, 1) = elt; | |
38750 | rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v)); | |
38751 | emit_insn (gen_mulv2df3 (tgt, src, scale_vec)); | |
38752 | } | |
38753 | ||
38754 | /* Return an RTX representing where to find the function value of a | |
38755 | function returning MODE. */ | |
38756 | static rtx | |
38757 | rs6000_complex_function_value (machine_mode mode) | |
38758 | { | |
38759 | unsigned int regno; | |
38760 | rtx r1, r2; | |
38761 | machine_mode inner = GET_MODE_INNER (mode); | |
38762 | unsigned int inner_bytes = GET_MODE_UNIT_SIZE (mode); | |
38763 | ||
38764 | if (TARGET_FLOAT128_TYPE | |
38765 | && (mode == KCmode | |
38766 | || (mode == TCmode && TARGET_IEEEQUAD))) | |
38767 | regno = ALTIVEC_ARG_RETURN; | |
38768 | ||
38769 | else if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS) | |
38770 | regno = FP_ARG_RETURN; | |
38771 | ||
38772 | else | |
38773 | { | |
38774 | regno = GP_ARG_RETURN; | |
38775 | ||
38776 | /* 32-bit is OK since it'll go in r3/r4. */ | |
38777 | if (TARGET_32BIT && inner_bytes >= 4) | |
38778 | return gen_rtx_REG (mode, regno); | |
38779 | } | |
38780 | ||
38781 | if (inner_bytes >= 8) | |
38782 | return gen_rtx_REG (mode, regno); | |
38783 | ||
38784 | r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno), | |
38785 | const0_rtx); | |
38786 | r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1), | |
38787 | GEN_INT (inner_bytes)); | |
38788 | return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2)); | |
38789 | } | |
38790 | ||
38791 | /* Return an rtx describing a return value of MODE as a PARALLEL | |
38792 | in N_ELTS registers, each of mode ELT_MODE, starting at REGNO, | |
38793 | stride REG_STRIDE. */ | |
38794 | ||
38795 | static rtx | |
38796 | rs6000_parallel_return (machine_mode mode, | |
38797 | int n_elts, machine_mode elt_mode, | |
38798 | unsigned int regno, unsigned int reg_stride) | |
38799 | { | |
38800 | rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts)); | |
38801 | ||
38802 | int i; | |
38803 | for (i = 0; i < n_elts; i++) | |
38804 | { | |
38805 | rtx r = gen_rtx_REG (elt_mode, regno); | |
38806 | rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode)); | |
38807 | XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off); | |
38808 | regno += reg_stride; | |
38809 | } | |
38810 | ||
38811 | return par; | |
38812 | } | |
38813 | ||
38814 | /* Target hook for TARGET_FUNCTION_VALUE. | |
38815 | ||
38816 | On the SPE, both FPs and vectors are returned in r3. | |
38817 | ||
38818 | On RS/6000 an integer value is in r3 and a floating-point value is in | |
38819 | fp1, unless -msoft-float. */ | |
38820 | ||
38821 | static rtx | |
38822 | rs6000_function_value (const_tree valtype, | |
38823 | const_tree fn_decl_or_type ATTRIBUTE_UNUSED, | |
38824 | bool outgoing ATTRIBUTE_UNUSED) | |
38825 | { | |
38826 | machine_mode mode; | |
38827 | unsigned int regno; | |
38828 | machine_mode elt_mode; | |
38829 | int n_elts; | |
38830 | ||
38831 | /* Special handling for structs in darwin64. */ | |
38832 | if (TARGET_MACHO | |
38833 | && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype)) | |
38834 | { | |
38835 | CUMULATIVE_ARGS valcum; | |
38836 | rtx valret; | |
38837 | ||
38838 | valcum.words = 0; | |
38839 | valcum.fregno = FP_ARG_MIN_REG; | |
38840 | valcum.vregno = ALTIVEC_ARG_MIN_REG; | |
38841 | /* Do a trial code generation as if this were going to be passed as | |
38842 | an argument; if any part goes in memory, we return NULL. */ | |
38843 | valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true); | |
38844 | if (valret) | |
38845 | return valret; | |
38846 | /* Otherwise fall through to standard ABI rules. */ | |
38847 | } | |
38848 | ||
38849 | mode = TYPE_MODE (valtype); | |
38850 | ||
38851 | /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */ | |
38852 | if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts)) | |
38853 | { | |
38854 | int first_reg, n_regs; | |
38855 | ||
38856 | if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode)) | |
38857 | { | |
38858 | /* _Decimal128 must use even/odd register pairs. */ | |
38859 | first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN; | |
38860 | n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3; | |
38861 | } | |
38862 | else | |
38863 | { | |
38864 | first_reg = ALTIVEC_ARG_RETURN; | |
38865 | n_regs = 1; | |
38866 | } | |
38867 | ||
38868 | return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs); | |
38869 | } | |
38870 | ||
38871 | /* Some return value types need be split in -mpowerpc64, 32bit ABI. */ | |
38872 | if (TARGET_32BIT && TARGET_POWERPC64) | |
38873 | switch (mode) | |
38874 | { | |
38875 | default: | |
38876 | break; | |
4e10a5a7 RS |
38877 | case E_DImode: |
38878 | case E_SCmode: | |
38879 | case E_DCmode: | |
38880 | case E_TCmode: | |
83349046 SB |
38881 | int count = GET_MODE_SIZE (mode) / 4; |
38882 | return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1); | |
38883 | } | |
38884 | ||
38885 | if ((INTEGRAL_TYPE_P (valtype) | |
38886 | && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64)) | |
38887 | || POINTER_TYPE_P (valtype)) | |
38888 | mode = TARGET_32BIT ? SImode : DImode; | |
38889 | ||
38890 | if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS) | |
38891 | /* _Decimal128 must use an even/odd register pair. */ | |
38892 | regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN; | |
38893 | else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT && TARGET_FPRS | |
38894 | && !FLOAT128_VECTOR_P (mode) | |
38895 | && ((TARGET_SINGLE_FLOAT && (mode == SFmode)) || TARGET_DOUBLE_FLOAT)) | |
38896 | regno = FP_ARG_RETURN; | |
38897 | else if (TREE_CODE (valtype) == COMPLEX_TYPE | |
38898 | && targetm.calls.split_complex_arg) | |
38899 | return rs6000_complex_function_value (mode); | |
38900 | /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same | |
38901 | return register is used in both cases, and we won't see V2DImode/V2DFmode | |
38902 | for pure altivec, combine the two cases. */ | |
38903 | else if ((TREE_CODE (valtype) == VECTOR_TYPE || FLOAT128_VECTOR_P (mode)) | |
38904 | && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI | |
38905 | && ALTIVEC_OR_VSX_VECTOR_MODE (mode)) | |
38906 | regno = ALTIVEC_ARG_RETURN; | |
38907 | else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT | |
38908 | && (mode == DFmode || mode == DCmode | |
38909 | || FLOAT128_IBM_P (mode) || mode == TCmode)) | |
38910 | return spe_build_register_parallel (mode, GP_ARG_RETURN); | |
38911 | else | |
38912 | regno = GP_ARG_RETURN; | |
38913 | ||
38914 | return gen_rtx_REG (mode, regno); | |
38915 | } | |
38916 | ||
38917 | /* Define how to find the value returned by a library function | |
38918 | assuming the value has mode MODE. */ | |
38919 | rtx | |
38920 | rs6000_libcall_value (machine_mode mode) | |
38921 | { | |
38922 | unsigned int regno; | |
38923 | ||
38924 | /* Long long return value need be split in -mpowerpc64, 32bit ABI. */ | |
38925 | if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode) | |
38926 | return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1); | |
38927 | ||
38928 | if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS) | |
38929 | /* _Decimal128 must use an even/odd register pair. */ | |
38930 | regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN; | |
38931 | else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) | |
38932 | && TARGET_HARD_FLOAT && TARGET_FPRS | |
38933 | && ((TARGET_SINGLE_FLOAT && mode == SFmode) || TARGET_DOUBLE_FLOAT)) | |
38934 | regno = FP_ARG_RETURN; | |
38935 | /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same | |
38936 | return register is used in both cases, and we won't see V2DImode/V2DFmode | |
38937 | for pure altivec, combine the two cases. */ | |
38938 | else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) | |
38939 | && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI) | |
38940 | regno = ALTIVEC_ARG_RETURN; | |
38941 | else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg) | |
38942 | return rs6000_complex_function_value (mode); | |
38943 | else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT | |
38944 | && (mode == DFmode || mode == DCmode | |
38945 | || FLOAT128_IBM_P (mode) || mode == TCmode)) | |
38946 | return spe_build_register_parallel (mode, GP_ARG_RETURN); | |
38947 | else | |
38948 | regno = GP_ARG_RETURN; | |
38949 | ||
38950 | return gen_rtx_REG (mode, regno); | |
38951 | } | |
38952 | ||
38953 | ||
38954 | /* Return true if we use LRA instead of reload pass. */ | |
38955 | static bool | |
38956 | rs6000_lra_p (void) | |
38957 | { | |
38958 | return TARGET_LRA; | |
38959 | } | |
38960 | ||
38961 | /* Compute register pressure classes. We implement the target hook to avoid | |
38962 | IRA picking something like NON_SPECIAL_REGS as a pressure class, which can | |
38963 | lead to incorrect estimates of number of available registers and therefor | |
38964 | increased register pressure/spill. */ | |
38965 | static int | |
38966 | rs6000_compute_pressure_classes (enum reg_class *pressure_classes) | |
38967 | { | |
38968 | int n; | |
38969 | ||
38970 | n = 0; | |
38971 | pressure_classes[n++] = GENERAL_REGS; | |
38972 | if (TARGET_VSX) | |
38973 | pressure_classes[n++] = VSX_REGS; | |
38974 | else | |
38975 | { | |
38976 | if (TARGET_ALTIVEC) | |
38977 | pressure_classes[n++] = ALTIVEC_REGS; | |
38978 | if (TARGET_HARD_FLOAT && TARGET_FPRS) | |
38979 | pressure_classes[n++] = FLOAT_REGS; | |
38980 | } | |
38981 | pressure_classes[n++] = CR_REGS; | |
38982 | pressure_classes[n++] = SPECIAL_REGS; | |
38983 | ||
38984 | return n; | |
38985 | } | |
38986 | ||
38987 | /* Given FROM and TO register numbers, say whether this elimination is allowed. | |
38988 | Frame pointer elimination is automatically handled. | |
38989 | ||
38990 | For the RS/6000, if frame pointer elimination is being done, we would like | |
38991 | to convert ap into fp, not sp. | |
38992 | ||
38993 | We need r30 if -mminimal-toc was specified, and there are constant pool | |
38994 | references. */ | |
38995 | ||
38996 | static bool | |
38997 | rs6000_can_eliminate (const int from, const int to) | |
38998 | { | |
38999 | return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM | |
39000 | ? ! frame_pointer_needed | |
39001 | : from == RS6000_PIC_OFFSET_TABLE_REGNUM | |
39002 | ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC | |
39003 | || constant_pool_empty_p () | |
39004 | : true); | |
39005 | } | |
39006 | ||
39007 | /* Define the offset between two registers, FROM to be eliminated and its | |
39008 | replacement TO, at the start of a routine. */ | |
39009 | HOST_WIDE_INT | |
39010 | rs6000_initial_elimination_offset (int from, int to) | |
39011 | { | |
39012 | rs6000_stack_t *info = rs6000_stack_info (); | |
39013 | HOST_WIDE_INT offset; | |
39014 | ||
39015 | if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM) | |
39016 | offset = info->push_p ? 0 : -info->total_size; | |
39017 | else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM) | |
39018 | { | |
39019 | offset = info->push_p ? 0 : -info->total_size; | |
39020 | if (FRAME_GROWS_DOWNWARD) | |
39021 | offset += info->fixed_size + info->vars_size + info->parm_size; | |
39022 | } | |
39023 | else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) | |
39024 | offset = FRAME_GROWS_DOWNWARD | |
39025 | ? info->fixed_size + info->vars_size + info->parm_size | |
39026 | : 0; | |
39027 | else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) | |
39028 | offset = info->total_size; | |
39029 | else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM) | |
39030 | offset = info->push_p ? info->total_size : 0; | |
39031 | else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM) | |
39032 | offset = 0; | |
39033 | else | |
39034 | gcc_unreachable (); | |
39035 | ||
39036 | return offset; | |
39037 | } | |
39038 | ||
39039 | static rtx | |
39040 | rs6000_dwarf_register_span (rtx reg) | |
39041 | { | |
39042 | rtx parts[8]; | |
39043 | int i, words; | |
39044 | unsigned regno = REGNO (reg); | |
39045 | machine_mode mode = GET_MODE (reg); | |
39046 | ||
39047 | if (TARGET_SPE | |
39048 | && regno < 32 | |
39049 | && (SPE_VECTOR_MODE (GET_MODE (reg)) | |
39050 | || (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode) | |
39051 | && mode != SFmode && mode != SDmode && mode != SCmode))) | |
39052 | ; | |
39053 | else | |
39054 | return NULL_RTX; | |
39055 | ||
39056 | regno = REGNO (reg); | |
39057 | ||
39058 | /* The duality of the SPE register size wreaks all kinds of havoc. | |
39059 | This is a way of distinguishing r0 in 32-bits from r0 in | |
39060 | 64-bits. */ | |
39061 | words = (GET_MODE_SIZE (mode) + UNITS_PER_FP_WORD - 1) / UNITS_PER_FP_WORD; | |
39062 | gcc_assert (words <= 4); | |
39063 | for (i = 0; i < words; i++, regno++) | |
39064 | { | |
39065 | if (BYTES_BIG_ENDIAN) | |
39066 | { | |
39067 | parts[2 * i] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO); | |
39068 | parts[2 * i + 1] = gen_rtx_REG (SImode, regno); | |
39069 | } | |
39070 | else | |
39071 | { | |
39072 | parts[2 * i] = gen_rtx_REG (SImode, regno); | |
39073 | parts[2 * i + 1] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO); | |
39074 | } | |
39075 | } | |
39076 | ||
39077 | return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (words * 2, parts)); | |
39078 | } | |
39079 | ||
39080 | /* Fill in sizes for SPE register high parts in table used by unwinder. */ | |
39081 | ||
39082 | static void | |
39083 | rs6000_init_dwarf_reg_sizes_extra (tree address) | |
39084 | { | |
39085 | if (TARGET_SPE) | |
39086 | { | |
39087 | int i; | |
39088 | machine_mode mode = TYPE_MODE (char_type_node); | |
39089 | rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL); | |
39090 | rtx mem = gen_rtx_MEM (BLKmode, addr); | |
39091 | rtx value = gen_int_mode (4, mode); | |
39092 | ||
39093 | for (i = FIRST_SPE_HIGH_REGNO; i < LAST_SPE_HIGH_REGNO+1; i++) | |
39094 | { | |
39095 | int column = DWARF_REG_TO_UNWIND_COLUMN | |
39096 | (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true)); | |
39097 | HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode); | |
39098 | ||
39099 | emit_move_insn (adjust_address (mem, mode, offset), value); | |
39100 | } | |
39101 | } | |
39102 | ||
39103 | if (TARGET_MACHO && ! TARGET_ALTIVEC) | |
39104 | { | |
39105 | int i; | |
39106 | machine_mode mode = TYPE_MODE (char_type_node); | |
39107 | rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL); | |
39108 | rtx mem = gen_rtx_MEM (BLKmode, addr); | |
39109 | rtx value = gen_int_mode (16, mode); | |
39110 | ||
39111 | /* On Darwin, libgcc may be built to run on both G3 and G4/5. | |
39112 | The unwinder still needs to know the size of Altivec registers. */ | |
39113 | ||
39114 | for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++) | |
39115 | { | |
39116 | int column = DWARF_REG_TO_UNWIND_COLUMN | |
39117 | (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true)); | |
39118 | HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode); | |
39119 | ||
39120 | emit_move_insn (adjust_address (mem, mode, offset), value); | |
39121 | } | |
39122 | } | |
39123 | } | |
39124 | ||
39125 | /* Map internal gcc register numbers to debug format register numbers. | |
39126 | FORMAT specifies the type of debug register number to use: | |
39127 | 0 -- debug information, except for frame-related sections | |
39128 | 1 -- DWARF .debug_frame section | |
39129 | 2 -- DWARF .eh_frame section */ | |
39130 | ||
39131 | unsigned int | |
39132 | rs6000_dbx_register_number (unsigned int regno, unsigned int format) | |
39133 | { | |
39134 | /* We never use the GCC internal number for SPE high registers. | |
39135 | Those are mapped to the 1200..1231 range for all debug formats. */ | |
39136 | if (SPE_HIGH_REGNO_P (regno)) | |
39137 | return regno - FIRST_SPE_HIGH_REGNO + 1200; | |
39138 | ||
39139 | /* Except for the above, we use the internal number for non-DWARF | |
39140 | debug information, and also for .eh_frame. */ | |
39141 | if ((format == 0 && write_symbols != DWARF2_DEBUG) || format == 2) | |
39142 | return regno; | |
39143 | ||
39144 | /* On some platforms, we use the standard DWARF register | |
39145 | numbering for .debug_info and .debug_frame. */ | |
39146 | #ifdef RS6000_USE_DWARF_NUMBERING | |
39147 | if (regno <= 63) | |
39148 | return regno; | |
39149 | if (regno == LR_REGNO) | |
39150 | return 108; | |
39151 | if (regno == CTR_REGNO) | |
39152 | return 109; | |
39153 | /* Special handling for CR for .debug_frame: rs6000_emit_prologue has | |
39154 | translated any combination of CR2, CR3, CR4 saves to a save of CR2. | |
39155 | The actual code emitted saves the whole of CR, so we map CR2_REGNO | |
39156 | to the DWARF reg for CR. */ | |
39157 | if (format == 1 && regno == CR2_REGNO) | |
39158 | return 64; | |
39159 | if (CR_REGNO_P (regno)) | |
39160 | return regno - CR0_REGNO + 86; | |
39161 | if (regno == CA_REGNO) | |
39162 | return 101; /* XER */ | |
39163 | if (ALTIVEC_REGNO_P (regno)) | |
39164 | return regno - FIRST_ALTIVEC_REGNO + 1124; | |
39165 | if (regno == VRSAVE_REGNO) | |
39166 | return 356; | |
39167 | if (regno == VSCR_REGNO) | |
39168 | return 67; | |
39169 | if (regno == SPE_ACC_REGNO) | |
39170 | return 99; | |
39171 | if (regno == SPEFSCR_REGNO) | |
39172 | return 612; | |
39173 | #endif | |
39174 | return regno; | |
39175 | } | |
39176 | ||
39177 | /* target hook eh_return_filter_mode */ | |
095a2d76 | 39178 | static scalar_int_mode |
83349046 SB |
39179 | rs6000_eh_return_filter_mode (void) |
39180 | { | |
39181 | return TARGET_32BIT ? SImode : word_mode; | |
39182 | } | |
39183 | ||
39184 | /* Target hook for scalar_mode_supported_p. */ | |
39185 | static bool | |
18e2a8b8 | 39186 | rs6000_scalar_mode_supported_p (scalar_mode mode) |
83349046 SB |
39187 | { |
39188 | /* -m32 does not support TImode. This is the default, from | |
39189 | default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the | |
39190 | same ABI as for -m32. But default_scalar_mode_supported_p allows | |
39191 | integer modes of precision 2 * BITS_PER_WORD, which matches TImode | |
39192 | for -mpowerpc64. */ | |
39193 | if (TARGET_32BIT && mode == TImode) | |
39194 | return false; | |
39195 | ||
39196 | if (DECIMAL_FLOAT_MODE_P (mode)) | |
39197 | return default_decimal_float_supported_p (); | |
39198 | else if (TARGET_FLOAT128_TYPE && (mode == KFmode || mode == IFmode)) | |
39199 | return true; | |
39200 | else | |
39201 | return default_scalar_mode_supported_p (mode); | |
39202 | } | |
39203 | ||
39204 | /* Target hook for vector_mode_supported_p. */ | |
39205 | static bool | |
39206 | rs6000_vector_mode_supported_p (machine_mode mode) | |
39207 | { | |
39208 | ||
39209 | if (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (mode)) | |
39210 | return true; | |
39211 | ||
39212 | if (TARGET_SPE && SPE_VECTOR_MODE (mode)) | |
39213 | return true; | |
39214 | ||
39215 | /* There is no vector form for IEEE 128-bit. If we return true for IEEE | |
39216 | 128-bit, the compiler might try to widen IEEE 128-bit to IBM | |
39217 | double-double. */ | |
39218 | else if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) && !FLOAT128_IEEE_P (mode)) | |
39219 | return true; | |
39220 | ||
39221 | else | |
39222 | return false; | |
39223 | } | |
39224 | ||
39225 | /* Target hook for floatn_mode. */ | |
1ce87609 | 39226 | static opt_scalar_float_mode |
83349046 SB |
39227 | rs6000_floatn_mode (int n, bool extended) |
39228 | { | |
39229 | if (extended) | |
39230 | { | |
39231 | switch (n) | |
39232 | { | |
39233 | case 32: | |
39234 | return DFmode; | |
39235 | ||
39236 | case 64: | |
39237 | if (TARGET_FLOAT128_KEYWORD) | |
39238 | return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode; | |
39239 | else | |
1ce87609 | 39240 | return opt_scalar_float_mode (); |
83349046 SB |
39241 | |
39242 | case 128: | |
1ce87609 | 39243 | return opt_scalar_float_mode (); |
83349046 SB |
39244 | |
39245 | default: | |
39246 | /* Those are the only valid _FloatNx types. */ | |
39247 | gcc_unreachable (); | |
39248 | } | |
39249 | } | |
39250 | else | |
39251 | { | |
39252 | switch (n) | |
39253 | { | |
39254 | case 32: | |
39255 | return SFmode; | |
39256 | ||
39257 | case 64: | |
39258 | return DFmode; | |
39259 | ||
39260 | case 128: | |
39261 | if (TARGET_FLOAT128_KEYWORD) | |
39262 | return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode; | |
39263 | else | |
1ce87609 | 39264 | return opt_scalar_float_mode (); |
83349046 SB |
39265 | |
39266 | default: | |
1ce87609 | 39267 | return opt_scalar_float_mode (); |
83349046 SB |
39268 | } |
39269 | } | |
39270 | ||
39271 | } | |
39272 | ||
39273 | /* Target hook for c_mode_for_suffix. */ | |
39274 | static machine_mode | |
39275 | rs6000_c_mode_for_suffix (char suffix) | |
39276 | { | |
39277 | if (TARGET_FLOAT128_TYPE) | |
39278 | { | |
39279 | if (suffix == 'q' || suffix == 'Q') | |
39280 | return (FLOAT128_IEEE_P (TFmode)) ? TFmode : KFmode; | |
39281 | ||
39282 | /* At the moment, we are not defining a suffix for IBM extended double. | |
39283 | If/when the default for -mabi=ieeelongdouble is changed, and we want | |
39284 | to support __ibm128 constants in legacy library code, we may need to | |
39285 | re-evalaute this decision. Currently, c-lex.c only supports 'w' and | |
39286 | 'q' as machine dependent suffixes. The x86_64 port uses 'w' for | |
39287 | __float80 constants. */ | |
39288 | } | |
39289 | ||
39290 | return VOIDmode; | |
39291 | } | |
39292 | ||
39293 | /* Target hook for invalid_arg_for_unprototyped_fn. */ | |
39294 | static const char * | |
39295 | invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val) | |
39296 | { | |
39297 | return (!rs6000_darwin64_abi | |
39298 | && typelist == 0 | |
39299 | && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE | |
39300 | && (funcdecl == NULL_TREE | |
39301 | || (TREE_CODE (funcdecl) == FUNCTION_DECL | |
39302 | && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD))) | |
39303 | ? N_("AltiVec argument passed to unprototyped function") | |
39304 | : NULL; | |
39305 | } | |
39306 | ||
39307 | /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register | |
39308 | setup by using __stack_chk_fail_local hidden function instead of | |
39309 | calling __stack_chk_fail directly. Otherwise it is better to call | |
39310 | __stack_chk_fail directly. */ | |
39311 | ||
39312 | static tree ATTRIBUTE_UNUSED | |
39313 | rs6000_stack_protect_fail (void) | |
39314 | { | |
39315 | return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic) | |
39316 | ? default_hidden_stack_protect_fail () | |
39317 | : default_external_stack_protect_fail (); | |
39318 | } | |
39319 | ||
39320 | void | |
39321 | rs6000_final_prescan_insn (rtx_insn *insn, rtx *operand ATTRIBUTE_UNUSED, | |
39322 | int num_operands ATTRIBUTE_UNUSED) | |
39323 | { | |
39324 | if (rs6000_warn_cell_microcode) | |
39325 | { | |
39326 | const char *temp; | |
39327 | int insn_code_number = recog_memoized (insn); | |
39328 | location_t location = INSN_LOCATION (insn); | |
39329 | ||
39330 | /* Punt on insns we cannot recognize. */ | |
39331 | if (insn_code_number < 0) | |
39332 | return; | |
39333 | ||
39334 | /* get_insn_template can modify recog_data, so save and restore it. */ | |
39335 | struct recog_data_d recog_data_save = recog_data; | |
39336 | for (int i = 0; i < recog_data.n_operands; i++) | |
39337 | recog_data.operand[i] = copy_rtx (recog_data.operand[i]); | |
39338 | temp = get_insn_template (insn_code_number, insn); | |
39339 | recog_data = recog_data_save; | |
39340 | ||
39341 | if (get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS) | |
39342 | warning_at (location, OPT_mwarn_cell_microcode, | |
39343 | "emitting microcode insn %s\t[%s] #%d", | |
39344 | temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn)); | |
39345 | else if (get_attr_cell_micro (insn) == CELL_MICRO_CONDITIONAL) | |
39346 | warning_at (location, OPT_mwarn_cell_microcode, | |
39347 | "emitting conditional microcode insn %s\t[%s] #%d", | |
39348 | temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn)); | |
39349 | } | |
39350 | } | |
39351 | ||
39352 | /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */ | |
39353 | ||
39354 | #if TARGET_ELF | |
39355 | static unsigned HOST_WIDE_INT | |
39356 | rs6000_asan_shadow_offset (void) | |
39357 | { | |
39358 | return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29); | |
39359 | } | |
39360 | #endif | |
39361 | \f | |
39362 | /* Mask options that we want to support inside of attribute((target)) and | |
39363 | #pragma GCC target operations. Note, we do not include things like | |
39364 | 64/32-bit, endianness, hard/soft floating point, etc. that would have | |
39365 | different calling sequences. */ | |
39366 | ||
39367 | struct rs6000_opt_mask { | |
39368 | const char *name; /* option name */ | |
39369 | HOST_WIDE_INT mask; /* mask to set */ | |
39370 | bool invert; /* invert sense of mask */ | |
39371 | bool valid_target; /* option is a target option */ | |
39372 | }; | |
39373 | ||
39374 | static struct rs6000_opt_mask const rs6000_opt_masks[] = | |
39375 | { | |
39376 | { "altivec", OPTION_MASK_ALTIVEC, false, true }, | |
39377 | { "cmpb", OPTION_MASK_CMPB, false, true }, | |
39378 | { "crypto", OPTION_MASK_CRYPTO, false, true }, | |
39379 | { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true }, | |
39380 | { "dlmzb", OPTION_MASK_DLMZB, false, true }, | |
39381 | { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX, | |
39382 | false, true }, | |
39383 | { "float128", OPTION_MASK_FLOAT128_KEYWORD, false, false }, | |
39384 | { "float128-type", OPTION_MASK_FLOAT128_TYPE, false, false }, | |
39385 | { "float128-hardware", OPTION_MASK_FLOAT128_HW, false, false }, | |
39386 | { "fprnd", OPTION_MASK_FPRND, false, true }, | |
39387 | { "hard-dfp", OPTION_MASK_DFP, false, true }, | |
39388 | { "htm", OPTION_MASK_HTM, false, true }, | |
39389 | { "isel", OPTION_MASK_ISEL, false, true }, | |
39390 | { "mfcrf", OPTION_MASK_MFCRF, false, true }, | |
39391 | { "mfpgpr", OPTION_MASK_MFPGPR, false, true }, | |
39392 | { "modulo", OPTION_MASK_MODULO, false, true }, | |
39393 | { "mulhw", OPTION_MASK_MULHW, false, true }, | |
39394 | { "multiple", OPTION_MASK_MULTIPLE, false, true }, | |
39395 | { "popcntb", OPTION_MASK_POPCNTB, false, true }, | |
39396 | { "popcntd", OPTION_MASK_POPCNTD, false, true }, | |
39397 | { "power8-fusion", OPTION_MASK_P8_FUSION, false, true }, | |
39398 | { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true }, | |
39399 | { "power8-vector", OPTION_MASK_P8_VECTOR, false, true }, | |
39400 | { "power9-dform-scalar", OPTION_MASK_P9_DFORM_SCALAR, false, true }, | |
39401 | { "power9-dform-vector", OPTION_MASK_P9_DFORM_VECTOR, false, true }, | |
39402 | { "power9-fusion", OPTION_MASK_P9_FUSION, false, true }, | |
39403 | { "power9-minmax", OPTION_MASK_P9_MINMAX, false, true }, | |
39404 | { "power9-misc", OPTION_MASK_P9_MISC, false, true }, | |
39405 | { "power9-vector", OPTION_MASK_P9_VECTOR, false, true }, | |
39406 | { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true }, | |
39407 | { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true }, | |
39408 | { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true }, | |
39409 | { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true }, | |
39410 | { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true }, | |
39411 | { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT, false, true }, | |
39412 | { "string", OPTION_MASK_STRING, false, true }, | |
39413 | { "toc-fusion", OPTION_MASK_TOC_FUSION, false, true }, | |
39414 | { "update", OPTION_MASK_NO_UPDATE, true , true }, | |
39415 | { "upper-regs-di", OPTION_MASK_UPPER_REGS_DI, false, true }, | |
39416 | { "upper-regs-df", OPTION_MASK_UPPER_REGS_DF, false, true }, | |
39417 | { "upper-regs-sf", OPTION_MASK_UPPER_REGS_SF, false, true }, | |
39418 | { "vsx", OPTION_MASK_VSX, false, true }, | |
39419 | { "vsx-small-integer", OPTION_MASK_VSX_SMALL_INTEGER, false, true }, | |
39420 | { "vsx-timode", OPTION_MASK_VSX_TIMODE, false, true }, | |
39421 | #ifdef OPTION_MASK_64BIT | |
39422 | #if TARGET_AIX_OS | |
39423 | { "aix64", OPTION_MASK_64BIT, false, false }, | |
39424 | { "aix32", OPTION_MASK_64BIT, true, false }, | |
39425 | #else | |
39426 | { "64", OPTION_MASK_64BIT, false, false }, | |
39427 | { "32", OPTION_MASK_64BIT, true, false }, | |
39428 | #endif | |
39429 | #endif | |
39430 | #ifdef OPTION_MASK_EABI | |
39431 | { "eabi", OPTION_MASK_EABI, false, false }, | |
39432 | #endif | |
39433 | #ifdef OPTION_MASK_LITTLE_ENDIAN | |
39434 | { "little", OPTION_MASK_LITTLE_ENDIAN, false, false }, | |
39435 | { "big", OPTION_MASK_LITTLE_ENDIAN, true, false }, | |
39436 | #endif | |
39437 | #ifdef OPTION_MASK_RELOCATABLE | |
39438 | { "relocatable", OPTION_MASK_RELOCATABLE, false, false }, | |
39439 | #endif | |
39440 | #ifdef OPTION_MASK_STRICT_ALIGN | |
39441 | { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false }, | |
39442 | #endif | |
39443 | { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false }, | |
39444 | { "string", OPTION_MASK_STRING, false, false }, | |
39445 | }; | |
39446 | ||
39447 | /* Builtin mask mapping for printing the flags. */ | |
39448 | static struct rs6000_opt_mask const rs6000_builtin_mask_names[] = | |
39449 | { | |
39450 | { "altivec", RS6000_BTM_ALTIVEC, false, false }, | |
39451 | { "vsx", RS6000_BTM_VSX, false, false }, | |
39452 | { "spe", RS6000_BTM_SPE, false, false }, | |
39453 | { "paired", RS6000_BTM_PAIRED, false, false }, | |
39454 | { "fre", RS6000_BTM_FRE, false, false }, | |
39455 | { "fres", RS6000_BTM_FRES, false, false }, | |
39456 | { "frsqrte", RS6000_BTM_FRSQRTE, false, false }, | |
39457 | { "frsqrtes", RS6000_BTM_FRSQRTES, false, false }, | |
39458 | { "popcntd", RS6000_BTM_POPCNTD, false, false }, | |
39459 | { "cell", RS6000_BTM_CELL, false, false }, | |
39460 | { "power8-vector", RS6000_BTM_P8_VECTOR, false, false }, | |
39461 | { "power9-vector", RS6000_BTM_P9_VECTOR, false, false }, | |
39462 | { "power9-misc", RS6000_BTM_P9_MISC, false, false }, | |
39463 | { "crypto", RS6000_BTM_CRYPTO, false, false }, | |
39464 | { "htm", RS6000_BTM_HTM, false, false }, | |
39465 | { "hard-dfp", RS6000_BTM_DFP, false, false }, | |
39466 | { "hard-float", RS6000_BTM_HARD_FLOAT, false, false }, | |
39467 | { "long-double-128", RS6000_BTM_LDBL128, false, false }, | |
39468 | { "float128", RS6000_BTM_FLOAT128, false, false }, | |
39469 | }; | |
39470 | ||
39471 | /* Option variables that we want to support inside attribute((target)) and | |
39472 | #pragma GCC target operations. */ | |
39473 | ||
39474 | struct rs6000_opt_var { | |
39475 | const char *name; /* option name */ | |
39476 | size_t global_offset; /* offset of the option in global_options. */ | |
39477 | size_t target_offset; /* offset of the option in target options. */ | |
39478 | }; | |
39479 | ||
39480 | static struct rs6000_opt_var const rs6000_opt_vars[] = | |
39481 | { | |
39482 | { "friz", | |
39483 | offsetof (struct gcc_options, x_TARGET_FRIZ), | |
39484 | offsetof (struct cl_target_option, x_TARGET_FRIZ), }, | |
39485 | { "avoid-indexed-addresses", | |
39486 | offsetof (struct gcc_options, x_TARGET_AVOID_XFORM), | |
39487 | offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) }, | |
39488 | { "paired", | |
39489 | offsetof (struct gcc_options, x_rs6000_paired_float), | |
39490 | offsetof (struct cl_target_option, x_rs6000_paired_float), }, | |
39491 | { "longcall", | |
39492 | offsetof (struct gcc_options, x_rs6000_default_long_calls), | |
39493 | offsetof (struct cl_target_option, x_rs6000_default_long_calls), }, | |
39494 | { "optimize-swaps", | |
39495 | offsetof (struct gcc_options, x_rs6000_optimize_swaps), | |
39496 | offsetof (struct cl_target_option, x_rs6000_optimize_swaps), }, | |
39497 | { "allow-movmisalign", | |
39498 | offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN), | |
39499 | offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), }, | |
39500 | { "allow-df-permute", | |
39501 | offsetof (struct gcc_options, x_TARGET_ALLOW_DF_PERMUTE), | |
39502 | offsetof (struct cl_target_option, x_TARGET_ALLOW_DF_PERMUTE), }, | |
39503 | { "sched-groups", | |
39504 | offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS), | |
39505 | offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), }, | |
39506 | { "always-hint", | |
39507 | offsetof (struct gcc_options, x_TARGET_ALWAYS_HINT), | |
39508 | offsetof (struct cl_target_option, x_TARGET_ALWAYS_HINT), }, | |
39509 | { "align-branch-targets", | |
39510 | offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS), | |
39511 | offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), }, | |
39512 | { "vectorize-builtins", | |
39513 | offsetof (struct gcc_options, x_TARGET_VECTORIZE_BUILTINS), | |
39514 | offsetof (struct cl_target_option, x_TARGET_VECTORIZE_BUILTINS), }, | |
39515 | { "tls-markers", | |
39516 | offsetof (struct gcc_options, x_tls_markers), | |
39517 | offsetof (struct cl_target_option, x_tls_markers), }, | |
39518 | { "sched-prolog", | |
39519 | offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG), | |
39520 | offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), }, | |
39521 | { "sched-epilog", | |
39522 | offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG), | |
39523 | offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), }, | |
39524 | { "gen-cell-microcode", | |
39525 | offsetof (struct gcc_options, x_rs6000_gen_cell_microcode), | |
39526 | offsetof (struct cl_target_option, x_rs6000_gen_cell_microcode), }, | |
39527 | { "warn-cell-microcode", | |
39528 | offsetof (struct gcc_options, x_rs6000_warn_cell_microcode), | |
39529 | offsetof (struct cl_target_option, x_rs6000_warn_cell_microcode), }, | |
39530 | }; | |
39531 | ||
39532 | /* Inner function to handle attribute((target("..."))) and #pragma GCC target | |
39533 | parsing. Return true if there were no errors. */ | |
39534 | ||
39535 | static bool | |
39536 | rs6000_inner_target_options (tree args, bool attr_p) | |
39537 | { | |
39538 | bool ret = true; | |
39539 | ||
39540 | if (args == NULL_TREE) | |
39541 | ; | |
39542 | ||
39543 | else if (TREE_CODE (args) == STRING_CST) | |
39544 | { | |
39545 | char *p = ASTRDUP (TREE_STRING_POINTER (args)); | |
39546 | char *q; | |
39547 | ||
39548 | while ((q = strtok (p, ",")) != NULL) | |
39549 | { | |
39550 | bool error_p = false; | |
39551 | bool not_valid_p = false; | |
39552 | const char *cpu_opt = NULL; | |
39553 | ||
39554 | p = NULL; | |
39555 | if (strncmp (q, "cpu=", 4) == 0) | |
39556 | { | |
39557 | int cpu_index = rs6000_cpu_name_lookup (q+4); | |
39558 | if (cpu_index >= 0) | |
39559 | rs6000_cpu_index = cpu_index; | |
39560 | else | |
39561 | { | |
39562 | error_p = true; | |
39563 | cpu_opt = q+4; | |
39564 | } | |
39565 | } | |
39566 | else if (strncmp (q, "tune=", 5) == 0) | |
39567 | { | |
39568 | int tune_index = rs6000_cpu_name_lookup (q+5); | |
39569 | if (tune_index >= 0) | |
39570 | rs6000_tune_index = tune_index; | |
39571 | else | |
39572 | { | |
39573 | error_p = true; | |
39574 | cpu_opt = q+5; | |
39575 | } | |
39576 | } | |
39577 | else | |
39578 | { | |
39579 | size_t i; | |
39580 | bool invert = false; | |
39581 | char *r = q; | |
39582 | ||
39583 | error_p = true; | |
39584 | if (strncmp (r, "no-", 3) == 0) | |
39585 | { | |
39586 | invert = true; | |
39587 | r += 3; | |
39588 | } | |
39589 | ||
39590 | for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++) | |
39591 | if (strcmp (r, rs6000_opt_masks[i].name) == 0) | |
39592 | { | |
39593 | HOST_WIDE_INT mask = rs6000_opt_masks[i].mask; | |
39594 | ||
39595 | if (!rs6000_opt_masks[i].valid_target) | |
39596 | not_valid_p = true; | |
39597 | else | |
39598 | { | |
39599 | error_p = false; | |
39600 | rs6000_isa_flags_explicit |= mask; | |
39601 | ||
39602 | /* VSX needs altivec, so -mvsx automagically sets | |
39603 | altivec and disables -mavoid-indexed-addresses. */ | |
39604 | if (!invert) | |
39605 | { | |
39606 | if (mask == OPTION_MASK_VSX) | |
39607 | { | |
39608 | mask |= OPTION_MASK_ALTIVEC; | |
39609 | TARGET_AVOID_XFORM = 0; | |
39610 | } | |
39611 | } | |
39612 | ||
39613 | if (rs6000_opt_masks[i].invert) | |
39614 | invert = !invert; | |
39615 | ||
39616 | if (invert) | |
39617 | rs6000_isa_flags &= ~mask; | |
39618 | else | |
39619 | rs6000_isa_flags |= mask; | |
39620 | } | |
39621 | break; | |
39622 | } | |
39623 | ||
39624 | if (error_p && !not_valid_p) | |
39625 | { | |
39626 | for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++) | |
39627 | if (strcmp (r, rs6000_opt_vars[i].name) == 0) | |
39628 | { | |
39629 | size_t j = rs6000_opt_vars[i].global_offset; | |
39630 | *((int *) ((char *)&global_options + j)) = !invert; | |
39631 | error_p = false; | |
39632 | not_valid_p = false; | |
39633 | break; | |
39634 | } | |
39635 | } | |
39636 | } | |
39637 | ||
39638 | if (error_p) | |
39639 | { | |
39640 | const char *eprefix, *esuffix; | |
39641 | ||
39642 | ret = false; | |
39643 | if (attr_p) | |
39644 | { | |
39645 | eprefix = "__attribute__((__target__("; | |
39646 | esuffix = ")))"; | |
39647 | } | |
39648 | else | |
39649 | { | |
39650 | eprefix = "#pragma GCC target "; | |
39651 | esuffix = ""; | |
39652 | } | |
39653 | ||
39654 | if (cpu_opt) | |
39655 | error ("invalid cpu \"%s\" for %s\"%s\"%s", cpu_opt, eprefix, | |
39656 | q, esuffix); | |
39657 | else if (not_valid_p) | |
39658 | error ("%s\"%s\"%s is not allowed", eprefix, q, esuffix); | |
39659 | else | |
39660 | error ("%s\"%s\"%s is invalid", eprefix, q, esuffix); | |
39661 | } | |
39662 | } | |
39663 | } | |
39664 | ||
39665 | else if (TREE_CODE (args) == TREE_LIST) | |
39666 | { | |
39667 | do | |
39668 | { | |
39669 | tree value = TREE_VALUE (args); | |
39670 | if (value) | |
39671 | { | |
39672 | bool ret2 = rs6000_inner_target_options (value, attr_p); | |
39673 | if (!ret2) | |
39674 | ret = false; | |
39675 | } | |
39676 | args = TREE_CHAIN (args); | |
39677 | } | |
39678 | while (args != NULL_TREE); | |
39679 | } | |
39680 | ||
39681 | else | |
39682 | { | |
39683 | error ("attribute %<target%> argument not a string"); | |
39684 | return false; | |
39685 | } | |
39686 | ||
39687 | return ret; | |
39688 | } | |
39689 | ||
39690 | /* Print out the target options as a list for -mdebug=target. */ | |
39691 | ||
39692 | static void | |
39693 | rs6000_debug_target_options (tree args, const char *prefix) | |
39694 | { | |
39695 | if (args == NULL_TREE) | |
39696 | fprintf (stderr, "%s<NULL>", prefix); | |
39697 | ||
39698 | else if (TREE_CODE (args) == STRING_CST) | |
39699 | { | |
39700 | char *p = ASTRDUP (TREE_STRING_POINTER (args)); | |
39701 | char *q; | |
39702 | ||
39703 | while ((q = strtok (p, ",")) != NULL) | |
39704 | { | |
39705 | p = NULL; | |
39706 | fprintf (stderr, "%s\"%s\"", prefix, q); | |
39707 | prefix = ", "; | |
39708 | } | |
39709 | } | |
39710 | ||
39711 | else if (TREE_CODE (args) == TREE_LIST) | |
39712 | { | |
39713 | do | |
39714 | { | |
39715 | tree value = TREE_VALUE (args); | |
39716 | if (value) | |
39717 | { | |
39718 | rs6000_debug_target_options (value, prefix); | |
39719 | prefix = ", "; | |
39720 | } | |
39721 | args = TREE_CHAIN (args); | |
39722 | } | |
39723 | while (args != NULL_TREE); | |
39724 | } | |
39725 | ||
39726 | else | |
39727 | gcc_unreachable (); | |
39728 | ||
39729 | return; | |
39730 | } | |
39731 | ||
39732 | \f | |
39733 | /* Hook to validate attribute((target("..."))). */ | |
39734 | ||
39735 | static bool | |
39736 | rs6000_valid_attribute_p (tree fndecl, | |
39737 | tree ARG_UNUSED (name), | |
39738 | tree args, | |
39739 | int flags) | |
39740 | { | |
39741 | struct cl_target_option cur_target; | |
39742 | bool ret; | |
39743 | tree old_optimize = build_optimization_node (&global_options); | |
39744 | tree new_target, new_optimize; | |
39745 | tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl); | |
39746 | ||
39747 | gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE)); | |
39748 | ||
39749 | if (TARGET_DEBUG_TARGET) | |
39750 | { | |
39751 | tree tname = DECL_NAME (fndecl); | |
39752 | fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n"); | |
39753 | if (tname) | |
39754 | fprintf (stderr, "function: %.*s\n", | |
39755 | (int) IDENTIFIER_LENGTH (tname), | |
39756 | IDENTIFIER_POINTER (tname)); | |
39757 | else | |
39758 | fprintf (stderr, "function: unknown\n"); | |
39759 | ||
39760 | fprintf (stderr, "args:"); | |
39761 | rs6000_debug_target_options (args, " "); | |
39762 | fprintf (stderr, "\n"); | |
39763 | ||
39764 | if (flags) | |
39765 | fprintf (stderr, "flags: 0x%x\n", flags); | |
39766 | ||
39767 | fprintf (stderr, "--------------------\n"); | |
39768 | } | |
39769 | ||
39770 | old_optimize = build_optimization_node (&global_options); | |
39771 | func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl); | |
39772 | ||
39773 | /* If the function changed the optimization levels as well as setting target | |
39774 | options, start with the optimizations specified. */ | |
39775 | if (func_optimize && func_optimize != old_optimize) | |
39776 | cl_optimization_restore (&global_options, | |
39777 | TREE_OPTIMIZATION (func_optimize)); | |
39778 | ||
39779 | /* The target attributes may also change some optimization flags, so update | |
39780 | the optimization options if necessary. */ | |
39781 | cl_target_option_save (&cur_target, &global_options); | |
39782 | rs6000_cpu_index = rs6000_tune_index = -1; | |
39783 | ret = rs6000_inner_target_options (args, true); | |
39784 | ||
39785 | /* Set up any additional state. */ | |
39786 | if (ret) | |
39787 | { | |
39788 | ret = rs6000_option_override_internal (false); | |
39789 | new_target = build_target_option_node (&global_options); | |
39790 | } | |
39791 | else | |
39792 | new_target = NULL; | |
39793 | ||
39794 | new_optimize = build_optimization_node (&global_options); | |
39795 | ||
39796 | if (!new_target) | |
39797 | ret = false; | |
39798 | ||
39799 | else if (fndecl) | |
39800 | { | |
39801 | DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target; | |
39802 | ||
39803 | if (old_optimize != new_optimize) | |
39804 | DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize; | |
39805 | } | |
39806 | ||
39807 | cl_target_option_restore (&global_options, &cur_target); | |
39808 | ||
39809 | if (old_optimize != new_optimize) | |
39810 | cl_optimization_restore (&global_options, | |
39811 | TREE_OPTIMIZATION (old_optimize)); | |
39812 | ||
39813 | return ret; | |
39814 | } | |
39815 | ||
39816 | \f | |
39817 | /* Hook to validate the current #pragma GCC target and set the state, and | |
39818 | update the macros based on what was changed. If ARGS is NULL, then | |
39819 | POP_TARGET is used to reset the options. */ | |
39820 | ||
39821 | bool | |
39822 | rs6000_pragma_target_parse (tree args, tree pop_target) | |
39823 | { | |
39824 | tree prev_tree = build_target_option_node (&global_options); | |
39825 | tree cur_tree; | |
39826 | struct cl_target_option *prev_opt, *cur_opt; | |
39827 | HOST_WIDE_INT prev_flags, cur_flags, diff_flags; | |
39828 | HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask; | |
39829 | ||
39830 | if (TARGET_DEBUG_TARGET) | |
39831 | { | |
39832 | fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n"); | |
39833 | fprintf (stderr, "args:"); | |
39834 | rs6000_debug_target_options (args, " "); | |
39835 | fprintf (stderr, "\n"); | |
39836 | ||
39837 | if (pop_target) | |
39838 | { | |
39839 | fprintf (stderr, "pop_target:\n"); | |
39840 | debug_tree (pop_target); | |
39841 | } | |
39842 | else | |
39843 | fprintf (stderr, "pop_target: <NULL>\n"); | |
39844 | ||
39845 | fprintf (stderr, "--------------------\n"); | |
39846 | } | |
39847 | ||
39848 | if (! args) | |
39849 | { | |
39850 | cur_tree = ((pop_target) | |
39851 | ? pop_target | |
39852 | : target_option_default_node); | |
39853 | cl_target_option_restore (&global_options, | |
39854 | TREE_TARGET_OPTION (cur_tree)); | |
39855 | } | |
39856 | else | |
39857 | { | |
39858 | rs6000_cpu_index = rs6000_tune_index = -1; | |
39859 | if (!rs6000_inner_target_options (args, false) | |
39860 | || !rs6000_option_override_internal (false) | |
39861 | || (cur_tree = build_target_option_node (&global_options)) | |
39862 | == NULL_TREE) | |
39863 | { | |
39864 | if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET) | |
39865 | fprintf (stderr, "invalid pragma\n"); | |
39866 | ||
39867 | return false; | |
39868 | } | |
39869 | } | |
39870 | ||
39871 | target_option_current_node = cur_tree; | |
39872 | ||
39873 | /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly | |
39874 | change the macros that are defined. */ | |
39875 | if (rs6000_target_modify_macros_ptr) | |
39876 | { | |
39877 | prev_opt = TREE_TARGET_OPTION (prev_tree); | |
39878 | prev_bumask = prev_opt->x_rs6000_builtin_mask; | |
39879 | prev_flags = prev_opt->x_rs6000_isa_flags; | |
39880 | ||
39881 | cur_opt = TREE_TARGET_OPTION (cur_tree); | |
39882 | cur_flags = cur_opt->x_rs6000_isa_flags; | |
39883 | cur_bumask = cur_opt->x_rs6000_builtin_mask; | |
39884 | ||
39885 | diff_bumask = (prev_bumask ^ cur_bumask); | |
39886 | diff_flags = (prev_flags ^ cur_flags); | |
39887 | ||
39888 | if ((diff_flags != 0) || (diff_bumask != 0)) | |
39889 | { | |
39890 | /* Delete old macros. */ | |
39891 | rs6000_target_modify_macros_ptr (false, | |
39892 | prev_flags & diff_flags, | |
39893 | prev_bumask & diff_bumask); | |
39894 | ||
39895 | /* Define new macros. */ | |
39896 | rs6000_target_modify_macros_ptr (true, | |
39897 | cur_flags & diff_flags, | |
39898 | cur_bumask & diff_bumask); | |
39899 | } | |
39900 | } | |
39901 | ||
39902 | return true; | |
39903 | } | |
39904 | ||
39905 | \f | |
39906 | /* Remember the last target of rs6000_set_current_function. */ | |
39907 | static GTY(()) tree rs6000_previous_fndecl; | |
39908 | ||
39909 | /* Establish appropriate back-end context for processing the function | |
39910 | FNDECL. The argument might be NULL to indicate processing at top | |
39911 | level, outside of any function scope. */ | |
39912 | static void | |
39913 | rs6000_set_current_function (tree fndecl) | |
39914 | { | |
39915 | tree old_tree = (rs6000_previous_fndecl | |
39916 | ? DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl) | |
39917 | : NULL_TREE); | |
39918 | ||
39919 | tree new_tree = (fndecl | |
39920 | ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl) | |
39921 | : NULL_TREE); | |
39922 | ||
39923 | if (TARGET_DEBUG_TARGET) | |
39924 | { | |
39925 | bool print_final = false; | |
39926 | fprintf (stderr, "\n==================== rs6000_set_current_function"); | |
39927 | ||
39928 | if (fndecl) | |
39929 | fprintf (stderr, ", fndecl %s (%p)", | |
39930 | (DECL_NAME (fndecl) | |
39931 | ? IDENTIFIER_POINTER (DECL_NAME (fndecl)) | |
39932 | : "<unknown>"), (void *)fndecl); | |
39933 | ||
39934 | if (rs6000_previous_fndecl) | |
39935 | fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl); | |
39936 | ||
39937 | fprintf (stderr, "\n"); | |
39938 | if (new_tree) | |
39939 | { | |
39940 | fprintf (stderr, "\nnew fndecl target specific options:\n"); | |
39941 | debug_tree (new_tree); | |
39942 | print_final = true; | |
39943 | } | |
39944 | ||
39945 | if (old_tree) | |
39946 | { | |
39947 | fprintf (stderr, "\nold fndecl target specific options:\n"); | |
39948 | debug_tree (old_tree); | |
39949 | print_final = true; | |
39950 | } | |
39951 | ||
39952 | if (print_final) | |
39953 | fprintf (stderr, "--------------------\n"); | |
39954 | } | |
39955 | ||
39956 | /* Only change the context if the function changes. This hook is called | |
39957 | several times in the course of compiling a function, and we don't want to | |
39958 | slow things down too much or call target_reinit when it isn't safe. */ | |
39959 | if (fndecl && fndecl != rs6000_previous_fndecl) | |
39960 | { | |
39961 | rs6000_previous_fndecl = fndecl; | |
39962 | if (old_tree == new_tree) | |
39963 | ; | |
39964 | ||
39965 | else if (new_tree && new_tree != target_option_default_node) | |
39966 | { | |
39967 | cl_target_option_restore (&global_options, | |
39968 | TREE_TARGET_OPTION (new_tree)); | |
39969 | if (TREE_TARGET_GLOBALS (new_tree)) | |
39970 | restore_target_globals (TREE_TARGET_GLOBALS (new_tree)); | |
39971 | else | |
39972 | TREE_TARGET_GLOBALS (new_tree) | |
39973 | = save_target_globals_default_opts (); | |
39974 | } | |
39975 | ||
39976 | else if (old_tree && old_tree != target_option_default_node) | |
39977 | { | |
39978 | new_tree = target_option_current_node; | |
39979 | cl_target_option_restore (&global_options, | |
39980 | TREE_TARGET_OPTION (new_tree)); | |
39981 | if (TREE_TARGET_GLOBALS (new_tree)) | |
39982 | restore_target_globals (TREE_TARGET_GLOBALS (new_tree)); | |
39983 | else if (new_tree == target_option_default_node) | |
39984 | restore_target_globals (&default_target_globals); | |
39985 | else | |
39986 | TREE_TARGET_GLOBALS (new_tree) | |
39987 | = save_target_globals_default_opts (); | |
39988 | } | |
39989 | } | |
39990 | } | |
39991 | ||
39992 | \f | |
39993 | /* Save the current options */ | |
39994 | ||
39995 | static void | |
39996 | rs6000_function_specific_save (struct cl_target_option *ptr, | |
39997 | struct gcc_options *opts) | |
39998 | { | |
39999 | ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags; | |
40000 | ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit; | |
40001 | } | |
40002 | ||
40003 | /* Restore the current options */ | |
40004 | ||
40005 | static void | |
40006 | rs6000_function_specific_restore (struct gcc_options *opts, | |
40007 | struct cl_target_option *ptr) | |
40008 | ||
40009 | { | |
40010 | opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags; | |
40011 | opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit; | |
40012 | (void) rs6000_option_override_internal (false); | |
40013 | } | |
40014 | ||
40015 | /* Print the current options */ | |
40016 | ||
40017 | static void | |
40018 | rs6000_function_specific_print (FILE *file, int indent, | |
40019 | struct cl_target_option *ptr) | |
40020 | { | |
40021 | rs6000_print_isa_options (file, indent, "Isa options set", | |
40022 | ptr->x_rs6000_isa_flags); | |
40023 | ||
40024 | rs6000_print_isa_options (file, indent, "Isa options explicit", | |
40025 | ptr->x_rs6000_isa_flags_explicit); | |
40026 | } | |
40027 | ||
40028 | /* Helper function to print the current isa or misc options on a line. */ | |
40029 | ||
40030 | static void | |
40031 | rs6000_print_options_internal (FILE *file, | |
40032 | int indent, | |
40033 | const char *string, | |
40034 | HOST_WIDE_INT flags, | |
40035 | const char *prefix, | |
40036 | const struct rs6000_opt_mask *opts, | |
40037 | size_t num_elements) | |
40038 | { | |
40039 | size_t i; | |
40040 | size_t start_column = 0; | |
40041 | size_t cur_column; | |
40042 | size_t max_column = 120; | |
40043 | size_t prefix_len = strlen (prefix); | |
40044 | size_t comma_len = 0; | |
40045 | const char *comma = ""; | |
40046 | ||
40047 | if (indent) | |
40048 | start_column += fprintf (file, "%*s", indent, ""); | |
40049 | ||
40050 | if (!flags) | |
40051 | { | |
40052 | fprintf (stderr, DEBUG_FMT_S, string, "<none>"); | |
40053 | return; | |
40054 | } | |
40055 | ||
40056 | start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags); | |
40057 | ||
40058 | /* Print the various mask options. */ | |
40059 | cur_column = start_column; | |
40060 | for (i = 0; i < num_elements; i++) | |
40061 | { | |
40062 | bool invert = opts[i].invert; | |
40063 | const char *name = opts[i].name; | |
40064 | const char *no_str = ""; | |
40065 | HOST_WIDE_INT mask = opts[i].mask; | |
40066 | size_t len = comma_len + prefix_len + strlen (name); | |
40067 | ||
40068 | if (!invert) | |
40069 | { | |
40070 | if ((flags & mask) == 0) | |
40071 | { | |
40072 | no_str = "no-"; | |
40073 | len += sizeof ("no-") - 1; | |
40074 | } | |
40075 | ||
40076 | flags &= ~mask; | |
40077 | } | |
40078 | ||
40079 | else | |
40080 | { | |
40081 | if ((flags & mask) != 0) | |
40082 | { | |
40083 | no_str = "no-"; | |
40084 | len += sizeof ("no-") - 1; | |
40085 | } | |
40086 | ||
40087 | flags |= mask; | |
40088 | } | |
40089 | ||
40090 | cur_column += len; | |
40091 | if (cur_column > max_column) | |
40092 | { | |
40093 | fprintf (stderr, ", \\\n%*s", (int)start_column, ""); | |
40094 | cur_column = start_column + len; | |
40095 | comma = ""; | |
40096 | } | |
40097 | ||
40098 | fprintf (file, "%s%s%s%s", comma, prefix, no_str, name); | |
40099 | comma = ", "; | |
40100 | comma_len = sizeof (", ") - 1; | |
40101 | } | |
40102 | ||
40103 | fputs ("\n", file); | |
40104 | } | |
40105 | ||
40106 | /* Helper function to print the current isa options on a line. */ | |
40107 | ||
40108 | static void | |
40109 | rs6000_print_isa_options (FILE *file, int indent, const char *string, | |
40110 | HOST_WIDE_INT flags) | |
40111 | { | |
40112 | rs6000_print_options_internal (file, indent, string, flags, "-m", | |
40113 | &rs6000_opt_masks[0], | |
40114 | ARRAY_SIZE (rs6000_opt_masks)); | |
40115 | } | |
40116 | ||
40117 | static void | |
40118 | rs6000_print_builtin_options (FILE *file, int indent, const char *string, | |
40119 | HOST_WIDE_INT flags) | |
40120 | { | |
40121 | rs6000_print_options_internal (file, indent, string, flags, "", | |
40122 | &rs6000_builtin_mask_names[0], | |
40123 | ARRAY_SIZE (rs6000_builtin_mask_names)); | |
40124 | } | |
40125 | ||
40126 | /* If the user used -mno-vsx, we need turn off all of the implicit ISA 2.06, | |
40127 | 2.07, and 3.0 options that relate to the vector unit (-mdirect-move, | |
40128 | -mvsx-timode, -mupper-regs-df). | |
40129 | ||
40130 | If the user used -mno-power8-vector, we need to turn off all of the implicit | |
40131 | ISA 2.07 and 3.0 options that relate to the vector unit. | |
40132 | ||
40133 | If the user used -mno-power9-vector, we need to turn off all of the implicit | |
40134 | ISA 3.0 options that relate to the vector unit. | |
40135 | ||
40136 | This function does not handle explicit options such as the user specifying | |
40137 | -mdirect-move. These are handled in rs6000_option_override_internal, and | |
40138 | the appropriate error is given if needed. | |
40139 | ||
40140 | We return a mask of all of the implicit options that should not be enabled | |
40141 | by default. */ | |
40142 | ||
40143 | static HOST_WIDE_INT | |
40144 | rs6000_disable_incompatible_switches (void) | |
40145 | { | |
40146 | HOST_WIDE_INT ignore_masks = rs6000_isa_flags_explicit; | |
40147 | size_t i, j; | |
40148 | ||
40149 | static const struct { | |
40150 | const HOST_WIDE_INT no_flag; /* flag explicitly turned off. */ | |
40151 | const HOST_WIDE_INT dep_flags; /* flags that depend on this option. */ | |
40152 | const char *const name; /* name of the switch. */ | |
40153 | } flags[] = { | |
40154 | { OPTION_MASK_P9_VECTOR, OTHER_P9_VECTOR_MASKS, "power9-vector" }, | |
40155 | { OPTION_MASK_P8_VECTOR, OTHER_P8_VECTOR_MASKS, "power8-vector" }, | |
40156 | { OPTION_MASK_VSX, OTHER_VSX_VECTOR_MASKS, "vsx" }, | |
40157 | }; | |
40158 | ||
40159 | for (i = 0; i < ARRAY_SIZE (flags); i++) | |
40160 | { | |
40161 | HOST_WIDE_INT no_flag = flags[i].no_flag; | |
40162 | ||
40163 | if ((rs6000_isa_flags & no_flag) == 0 | |
40164 | && (rs6000_isa_flags_explicit & no_flag) != 0) | |
40165 | { | |
40166 | HOST_WIDE_INT dep_flags = flags[i].dep_flags; | |
40167 | HOST_WIDE_INT set_flags = (rs6000_isa_flags_explicit | |
40168 | & rs6000_isa_flags | |
40169 | & dep_flags); | |
40170 | ||
40171 | if (set_flags) | |
40172 | { | |
40173 | for (j = 0; j < ARRAY_SIZE (rs6000_opt_masks); j++) | |
40174 | if ((set_flags & rs6000_opt_masks[j].mask) != 0) | |
40175 | { | |
40176 | set_flags &= ~rs6000_opt_masks[j].mask; | |
40177 | error ("-mno-%s turns off -m%s", | |
40178 | flags[i].name, | |
40179 | rs6000_opt_masks[j].name); | |
40180 | } | |
40181 | ||
40182 | gcc_assert (!set_flags); | |
40183 | } | |
40184 | ||
40185 | rs6000_isa_flags &= ~dep_flags; | |
40186 | ignore_masks |= no_flag | dep_flags; | |
40187 | } | |
40188 | } | |
40189 | ||
40190 | if (!TARGET_P9_VECTOR | |
40191 | && (rs6000_isa_flags_explicit & OPTION_MASK_P9_VECTOR) != 0 | |
40192 | && TARGET_P9_DFORM_BOTH > 0) | |
40193 | { | |
40194 | error ("-mno-power9-vector turns off -mpower9-dform"); | |
40195 | TARGET_P9_DFORM_BOTH = 0; | |
40196 | } | |
40197 | ||
40198 | return ignore_masks; | |
40199 | } | |
40200 | ||
40201 | \f | |
40202 | /* Hook to determine if one function can safely inline another. */ | |
40203 | ||
40204 | static bool | |
40205 | rs6000_can_inline_p (tree caller, tree callee) | |
40206 | { | |
40207 | bool ret = false; | |
40208 | tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller); | |
40209 | tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee); | |
40210 | ||
40211 | /* If callee has no option attributes, then it is ok to inline. */ | |
40212 | if (!callee_tree) | |
40213 | ret = true; | |
40214 | ||
40215 | /* If caller has no option attributes, but callee does then it is not ok to | |
40216 | inline. */ | |
40217 | else if (!caller_tree) | |
40218 | ret = false; | |
40219 | ||
40220 | else | |
40221 | { | |
40222 | struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree); | |
40223 | struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree); | |
40224 | ||
40225 | /* Callee's options should a subset of the caller's, i.e. a vsx function | |
40226 | can inline an altivec function but a non-vsx function can't inline a | |
40227 | vsx function. */ | |
40228 | if ((caller_opts->x_rs6000_isa_flags & callee_opts->x_rs6000_isa_flags) | |
40229 | == callee_opts->x_rs6000_isa_flags) | |
40230 | ret = true; | |
40231 | } | |
40232 | ||
40233 | if (TARGET_DEBUG_TARGET) | |
40234 | fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n", | |
40235 | (DECL_NAME (caller) | |
40236 | ? IDENTIFIER_POINTER (DECL_NAME (caller)) | |
40237 | : "<unknown>"), | |
40238 | (DECL_NAME (callee) | |
40239 | ? IDENTIFIER_POINTER (DECL_NAME (callee)) | |
40240 | : "<unknown>"), | |
40241 | (ret ? "can" : "cannot")); | |
40242 | ||
40243 | return ret; | |
40244 | } | |
40245 | \f | |
40246 | /* Allocate a stack temp and fixup the address so it meets the particular | |
40247 | memory requirements (either offetable or REG+REG addressing). */ | |
40248 | ||
40249 | rtx | |
40250 | rs6000_allocate_stack_temp (machine_mode mode, | |
40251 | bool offsettable_p, | |
40252 | bool reg_reg_p) | |
40253 | { | |
40254 | rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode)); | |
40255 | rtx addr = XEXP (stack, 0); | |
40256 | int strict_p = (reload_in_progress || reload_completed); | |
40257 | ||
40258 | if (!legitimate_indirect_address_p (addr, strict_p)) | |
40259 | { | |
40260 | if (offsettable_p | |
40261 | && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true)) | |
40262 | stack = replace_equiv_address (stack, copy_addr_to_reg (addr)); | |
40263 | ||
40264 | else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p)) | |
40265 | stack = replace_equiv_address (stack, copy_addr_to_reg (addr)); | |
40266 | } | |
40267 | ||
40268 | return stack; | |
40269 | } | |
40270 | ||
40271 | /* Given a memory reference, if it is not a reg or reg+reg addressing, convert | |
40272 | to such a form to deal with memory reference instructions like STFIWX that | |
40273 | only take reg+reg addressing. */ | |
40274 | ||
40275 | rtx | |
40276 | rs6000_address_for_fpconvert (rtx x) | |
40277 | { | |
40278 | int strict_p = (reload_in_progress || reload_completed); | |
40279 | rtx addr; | |
40280 | ||
40281 | gcc_assert (MEM_P (x)); | |
40282 | addr = XEXP (x, 0); | |
40283 | if (! legitimate_indirect_address_p (addr, strict_p) | |
40284 | && ! legitimate_indexed_address_p (addr, strict_p)) | |
40285 | { | |
40286 | if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC) | |
40287 | { | |
40288 | rtx reg = XEXP (addr, 0); | |
40289 | HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x)); | |
40290 | rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size); | |
40291 | gcc_assert (REG_P (reg)); | |
40292 | emit_insn (gen_add3_insn (reg, reg, size_rtx)); | |
40293 | addr = reg; | |
40294 | } | |
40295 | else if (GET_CODE (addr) == PRE_MODIFY) | |
40296 | { | |
40297 | rtx reg = XEXP (addr, 0); | |
40298 | rtx expr = XEXP (addr, 1); | |
40299 | gcc_assert (REG_P (reg)); | |
40300 | gcc_assert (GET_CODE (expr) == PLUS); | |
40301 | emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1))); | |
40302 | addr = reg; | |
40303 | } | |
40304 | ||
40305 | x = replace_equiv_address (x, copy_addr_to_reg (addr)); | |
40306 | } | |
40307 | ||
40308 | return x; | |
40309 | } | |
40310 | ||
40311 | /* Given a memory reference, if it is not in the form for altivec memory | |
40312 | reference instructions (i.e. reg or reg+reg addressing with AND of -16), | |
40313 | convert to the altivec format. */ | |
40314 | ||
40315 | rtx | |
40316 | rs6000_address_for_altivec (rtx x) | |
40317 | { | |
40318 | gcc_assert (MEM_P (x)); | |
40319 | if (!altivec_indexed_or_indirect_operand (x, GET_MODE (x))) | |
40320 | { | |
40321 | rtx addr = XEXP (x, 0); | |
40322 | int strict_p = (reload_in_progress || reload_completed); | |
40323 | ||
40324 | if (!legitimate_indexed_address_p (addr, strict_p) | |
40325 | && !legitimate_indirect_address_p (addr, strict_p)) | |
40326 | addr = copy_to_mode_reg (Pmode, addr); | |
40327 | ||
40328 | addr = gen_rtx_AND (Pmode, addr, GEN_INT (-16)); | |
40329 | x = change_address (x, GET_MODE (x), addr); | |
40330 | } | |
40331 | ||
40332 | return x; | |
40333 | } | |
40334 | ||
40335 | /* Implement TARGET_LEGITIMATE_CONSTANT_P. | |
40336 | ||
40337 | On the RS/6000, all integer constants are acceptable, most won't be valid | |
40338 | for particular insns, though. Only easy FP constants are acceptable. */ | |
40339 | ||
40340 | static bool | |
40341 | rs6000_legitimate_constant_p (machine_mode mode, rtx x) | |
40342 | { | |
40343 | if (TARGET_ELF && tls_referenced_p (x)) | |
40344 | return false; | |
40345 | ||
40346 | return ((GET_CODE (x) != CONST_DOUBLE && GET_CODE (x) != CONST_VECTOR) | |
40347 | || GET_MODE (x) == VOIDmode | |
40348 | || (TARGET_POWERPC64 && mode == DImode) | |
40349 | || easy_fp_constant (x, mode) | |
40350 | || easy_vector_constant (x, mode)); | |
40351 | } | |
40352 | ||
40353 | \f | |
40354 | /* Return TRUE iff the sequence ending in LAST sets the static chain. */ | |
40355 | ||
40356 | static bool | |
40357 | chain_already_loaded (rtx_insn *last) | |
40358 | { | |
40359 | for (; last != NULL; last = PREV_INSN (last)) | |
40360 | { | |
40361 | if (NONJUMP_INSN_P (last)) | |
40362 | { | |
40363 | rtx patt = PATTERN (last); | |
40364 | ||
40365 | if (GET_CODE (patt) == SET) | |
40366 | { | |
40367 | rtx lhs = XEXP (patt, 0); | |
40368 | ||
40369 | if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM) | |
40370 | return true; | |
40371 | } | |
40372 | } | |
40373 | } | |
40374 | return false; | |
40375 | } | |
40376 | ||
40377 | /* Expand code to perform a call under the AIX or ELFv2 ABI. */ | |
40378 | ||
40379 | void | |
40380 | rs6000_call_aix (rtx value, rtx func_desc, rtx flag, rtx cookie) | |
40381 | { | |
40382 | const bool direct_call_p | |
40383 | = GET_CODE (func_desc) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (func_desc); | |
40384 | rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM); | |
40385 | rtx toc_load = NULL_RTX; | |
40386 | rtx toc_restore = NULL_RTX; | |
40387 | rtx func_addr; | |
40388 | rtx abi_reg = NULL_RTX; | |
40389 | rtx call[4]; | |
40390 | int n_call; | |
40391 | rtx insn; | |
40392 | ||
40393 | /* Handle longcall attributes. */ | |
40394 | if (INTVAL (cookie) & CALL_LONG) | |
40395 | func_desc = rs6000_longcall_ref (func_desc); | |
40396 | ||
40397 | /* Handle indirect calls. */ | |
40398 | if (GET_CODE (func_desc) != SYMBOL_REF | |
40399 | || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func_desc))) | |
40400 | { | |
40401 | /* Save the TOC into its reserved slot before the call, | |
40402 | and prepare to restore it after the call. */ | |
40403 | rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM); | |
40404 | rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT); | |
40405 | rtx stack_toc_mem = gen_frame_mem (Pmode, | |
40406 | gen_rtx_PLUS (Pmode, stack_ptr, | |
40407 | stack_toc_offset)); | |
40408 | rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode, | |
40409 | gen_rtvec (1, stack_toc_offset), | |
40410 | UNSPEC_TOCSLOT); | |
40411 | toc_restore = gen_rtx_SET (toc_reg, stack_toc_unspec); | |
40412 | ||
40413 | /* Can we optimize saving the TOC in the prologue or | |
40414 | do we need to do it at every call? */ | |
40415 | if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca) | |
40416 | cfun->machine->save_toc_in_prologue = true; | |
40417 | else | |
40418 | { | |
40419 | MEM_VOLATILE_P (stack_toc_mem) = 1; | |
40420 | emit_move_insn (stack_toc_mem, toc_reg); | |
40421 | } | |
40422 | ||
40423 | if (DEFAULT_ABI == ABI_ELFv2) | |
40424 | { | |
40425 | /* A function pointer in the ELFv2 ABI is just a plain address, but | |
40426 | the ABI requires it to be loaded into r12 before the call. */ | |
40427 | func_addr = gen_rtx_REG (Pmode, 12); | |
40428 | emit_move_insn (func_addr, func_desc); | |
40429 | abi_reg = func_addr; | |
40430 | } | |
40431 | else | |
40432 | { | |
40433 | /* A function pointer under AIX is a pointer to a data area whose | |
40434 | first word contains the actual address of the function, whose | |
40435 | second word contains a pointer to its TOC, and whose third word | |
40436 | contains a value to place in the static chain register (r11). | |
40437 | Note that if we load the static chain, our "trampoline" need | |
40438 | not have any executable code. */ | |
40439 | ||
40440 | /* Load up address of the actual function. */ | |
40441 | func_desc = force_reg (Pmode, func_desc); | |
40442 | func_addr = gen_reg_rtx (Pmode); | |
40443 | emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func_desc)); | |
40444 | ||
40445 | /* Prepare to load the TOC of the called function. Note that the | |
40446 | TOC load must happen immediately before the actual call so | |
40447 | that unwinding the TOC registers works correctly. See the | |
40448 | comment in frob_update_context. */ | |
40449 | rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode)); | |
40450 | rtx func_toc_mem = gen_rtx_MEM (Pmode, | |
40451 | gen_rtx_PLUS (Pmode, func_desc, | |
40452 | func_toc_offset)); | |
40453 | toc_load = gen_rtx_USE (VOIDmode, func_toc_mem); | |
40454 | ||
40455 | /* If we have a static chain, load it up. But, if the call was | |
40456 | originally direct, the 3rd word has not been written since no | |
40457 | trampoline has been built, so we ought not to load it, lest we | |
40458 | override a static chain value. */ | |
40459 | if (!direct_call_p | |
40460 | && TARGET_POINTERS_TO_NESTED_FUNCTIONS | |
40461 | && !chain_already_loaded (get_current_sequence ()->next->last)) | |
40462 | { | |
40463 | rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM); | |
40464 | rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode)); | |
40465 | rtx func_sc_mem = gen_rtx_MEM (Pmode, | |
40466 | gen_rtx_PLUS (Pmode, func_desc, | |
40467 | func_sc_offset)); | |
40468 | emit_move_insn (sc_reg, func_sc_mem); | |
40469 | abi_reg = sc_reg; | |
40470 | } | |
40471 | } | |
40472 | } | |
40473 | else | |
40474 | { | |
40475 | /* Direct calls use the TOC: for local calls, the callee will | |
40476 | assume the TOC register is set; for non-local calls, the | |
40477 | PLT stub needs the TOC register. */ | |
40478 | abi_reg = toc_reg; | |
40479 | func_addr = func_desc; | |
40480 | } | |
40481 | ||
40482 | /* Create the call. */ | |
40483 | call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), flag); | |
40484 | if (value != NULL_RTX) | |
40485 | call[0] = gen_rtx_SET (value, call[0]); | |
40486 | n_call = 1; | |
40487 | ||
40488 | if (toc_load) | |
40489 | call[n_call++] = toc_load; | |
40490 | if (toc_restore) | |
40491 | call[n_call++] = toc_restore; | |
40492 | ||
40493 | call[n_call++] = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO)); | |
40494 | ||
40495 | insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call)); | |
40496 | insn = emit_call_insn (insn); | |
40497 | ||
40498 | /* Mention all registers defined by the ABI to hold information | |
40499 | as uses in CALL_INSN_FUNCTION_USAGE. */ | |
40500 | if (abi_reg) | |
40501 | use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg); | |
40502 | } | |
40503 | ||
40504 | /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */ | |
40505 | ||
40506 | void | |
40507 | rs6000_sibcall_aix (rtx value, rtx func_desc, rtx flag, rtx cookie) | |
40508 | { | |
40509 | rtx call[2]; | |
40510 | rtx insn; | |
40511 | ||
40512 | gcc_assert (INTVAL (cookie) == 0); | |
40513 | ||
40514 | /* Create the call. */ | |
40515 | call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_desc), flag); | |
40516 | if (value != NULL_RTX) | |
40517 | call[0] = gen_rtx_SET (value, call[0]); | |
40518 | ||
40519 | call[1] = simple_return_rtx; | |
40520 | ||
40521 | insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call)); | |
40522 | insn = emit_call_insn (insn); | |
40523 | ||
40524 | /* Note use of the TOC register. */ | |
40525 | use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, TOC_REGNUM)); | |
40526 | } | |
40527 | ||
40528 | /* Return whether we need to always update the saved TOC pointer when we update | |
40529 | the stack pointer. */ | |
40530 | ||
40531 | static bool | |
40532 | rs6000_save_toc_in_prologue_p (void) | |
40533 | { | |
40534 | return (cfun && cfun->machine && cfun->machine->save_toc_in_prologue); | |
40535 | } | |
40536 | ||
40537 | #ifdef HAVE_GAS_HIDDEN | |
40538 | # define USE_HIDDEN_LINKONCE 1 | |
40539 | #else | |
40540 | # define USE_HIDDEN_LINKONCE 0 | |
40541 | #endif | |
40542 | ||
40543 | /* Fills in the label name that should be used for a 476 link stack thunk. */ | |
40544 | ||
40545 | void | |
40546 | get_ppc476_thunk_name (char name[32]) | |
40547 | { | |
40548 | gcc_assert (TARGET_LINK_STACK); | |
40549 | ||
40550 | if (USE_HIDDEN_LINKONCE) | |
40551 | sprintf (name, "__ppc476.get_thunk"); | |
40552 | else | |
40553 | ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0); | |
40554 | } | |
40555 | ||
40556 | /* This function emits the simple thunk routine that is used to preserve | |
40557 | the link stack on the 476 cpu. */ | |
40558 | ||
40559 | static void rs6000_code_end (void) ATTRIBUTE_UNUSED; | |
40560 | static void | |
40561 | rs6000_code_end (void) | |
40562 | { | |
40563 | char name[32]; | |
40564 | tree decl; | |
40565 | ||
40566 | if (!TARGET_LINK_STACK) | |
40567 | return; | |
40568 | ||
40569 | get_ppc476_thunk_name (name); | |
40570 | ||
40571 | decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name), | |
40572 | build_function_type_list (void_type_node, NULL_TREE)); | |
40573 | DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL, | |
40574 | NULL_TREE, void_type_node); | |
40575 | TREE_PUBLIC (decl) = 1; | |
40576 | TREE_STATIC (decl) = 1; | |
40577 | ||
40578 | #if RS6000_WEAK | |
40579 | if (USE_HIDDEN_LINKONCE && !TARGET_XCOFF) | |
40580 | { | |
40581 | cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl)); | |
40582 | targetm.asm_out.unique_section (decl, 0); | |
40583 | switch_to_section (get_named_section (decl, NULL, 0)); | |
40584 | DECL_WEAK (decl) = 1; | |
40585 | ASM_WEAKEN_DECL (asm_out_file, decl, name, 0); | |
40586 | targetm.asm_out.globalize_label (asm_out_file, name); | |
40587 | targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN); | |
40588 | ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl); | |
40589 | } | |
40590 | else | |
40591 | #endif | |
40592 | { | |
40593 | switch_to_section (text_section); | |
40594 | ASM_OUTPUT_LABEL (asm_out_file, name); | |
40595 | } | |
40596 | ||
40597 | DECL_INITIAL (decl) = make_node (BLOCK); | |
40598 | current_function_decl = decl; | |
40599 | allocate_struct_function (decl, false); | |
40600 | init_function_start (decl); | |
40601 | first_function_block_is_cold = false; | |
40602 | /* Make sure unwind info is emitted for the thunk if needed. */ | |
40603 | final_start_function (emit_barrier (), asm_out_file, 1); | |
40604 | ||
40605 | fputs ("\tblr\n", asm_out_file); | |
40606 | ||
40607 | final_end_function (); | |
40608 | init_insn_lengths (); | |
40609 | free_after_compilation (cfun); | |
40610 | set_cfun (NULL); | |
40611 | current_function_decl = NULL; | |
40612 | } | |
40613 | ||
40614 | /* Add r30 to hard reg set if the prologue sets it up and it is not | |
40615 | pic_offset_table_rtx. */ | |
40616 | ||
40617 | static void | |
40618 | rs6000_set_up_by_prologue (struct hard_reg_set_container *set) | |
40619 | { | |
40620 | if (!TARGET_SINGLE_PIC_BASE | |
40621 | && TARGET_TOC | |
40622 | && TARGET_MINIMAL_TOC | |
40623 | && !constant_pool_empty_p ()) | |
40624 | add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM); | |
40625 | if (cfun->machine->split_stack_argp_used) | |
40626 | add_to_hard_reg_set (&set->set, Pmode, 12); | |
40627 | } | |
40628 | ||
40629 | \f | |
40630 | /* Helper function for rs6000_split_logical to emit a logical instruction after | |
40631 | spliting the operation to single GPR registers. | |
40632 | ||
40633 | DEST is the destination register. | |
40634 | OP1 and OP2 are the input source registers. | |
40635 | CODE is the base operation (AND, IOR, XOR, NOT). | |
40636 | MODE is the machine mode. | |
40637 | If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT. | |
40638 | If COMPLEMENT_OP1_P is true, wrap operand1 with NOT. | |
40639 | If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */ | |
40640 | ||
40641 | static void | |
40642 | rs6000_split_logical_inner (rtx dest, | |
40643 | rtx op1, | |
40644 | rtx op2, | |
40645 | enum rtx_code code, | |
40646 | machine_mode mode, | |
40647 | bool complement_final_p, | |
40648 | bool complement_op1_p, | |
40649 | bool complement_op2_p) | |
40650 | { | |
40651 | rtx bool_rtx; | |
40652 | ||
40653 | /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */ | |
40654 | if (op2 && GET_CODE (op2) == CONST_INT | |
40655 | && (mode == SImode || (mode == DImode && TARGET_POWERPC64)) | |
40656 | && !complement_final_p && !complement_op1_p && !complement_op2_p) | |
40657 | { | |
40658 | HOST_WIDE_INT mask = GET_MODE_MASK (mode); | |
40659 | HOST_WIDE_INT value = INTVAL (op2) & mask; | |
40660 | ||
40661 | /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */ | |
40662 | if (code == AND) | |
40663 | { | |
40664 | if (value == 0) | |
40665 | { | |
40666 | emit_insn (gen_rtx_SET (dest, const0_rtx)); | |
40667 | return; | |
40668 | } | |
40669 | ||
40670 | else if (value == mask) | |
40671 | { | |
40672 | if (!rtx_equal_p (dest, op1)) | |
40673 | emit_insn (gen_rtx_SET (dest, op1)); | |
40674 | return; | |
40675 | } | |
40676 | } | |
40677 | ||
40678 | /* Optimize IOR/XOR of 0 to be a simple move. Split large operations | |
40679 | into separate ORI/ORIS or XORI/XORIS instrucitons. */ | |
40680 | else if (code == IOR || code == XOR) | |
40681 | { | |
40682 | if (value == 0) | |
40683 | { | |
40684 | if (!rtx_equal_p (dest, op1)) | |
40685 | emit_insn (gen_rtx_SET (dest, op1)); | |
40686 | return; | |
40687 | } | |
40688 | } | |
40689 | } | |
40690 | ||
40691 | if (code == AND && mode == SImode | |
40692 | && !complement_final_p && !complement_op1_p && !complement_op2_p) | |
40693 | { | |
40694 | emit_insn (gen_andsi3 (dest, op1, op2)); | |
40695 | return; | |
40696 | } | |
40697 | ||
40698 | if (complement_op1_p) | |
40699 | op1 = gen_rtx_NOT (mode, op1); | |
40700 | ||
40701 | if (complement_op2_p) | |
40702 | op2 = gen_rtx_NOT (mode, op2); | |
40703 | ||
40704 | /* For canonical RTL, if only one arm is inverted it is the first. */ | |
40705 | if (!complement_op1_p && complement_op2_p) | |
40706 | std::swap (op1, op2); | |
40707 | ||
40708 | bool_rtx = ((code == NOT) | |
40709 | ? gen_rtx_NOT (mode, op1) | |
40710 | : gen_rtx_fmt_ee (code, mode, op1, op2)); | |
40711 | ||
40712 | if (complement_final_p) | |
40713 | bool_rtx = gen_rtx_NOT (mode, bool_rtx); | |
40714 | ||
40715 | emit_insn (gen_rtx_SET (dest, bool_rtx)); | |
40716 | } | |
40717 | ||
40718 | /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These | |
40719 | operations are split immediately during RTL generation to allow for more | |
40720 | optimizations of the AND/IOR/XOR. | |
40721 | ||
40722 | OPERANDS is an array containing the destination and two input operands. | |
40723 | CODE is the base operation (AND, IOR, XOR, NOT). | |
40724 | MODE is the machine mode. | |
40725 | If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT. | |
40726 | If COMPLEMENT_OP1_P is true, wrap operand1 with NOT. | |
40727 | If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. | |
40728 | CLOBBER_REG is either NULL or a scratch register of type CC to allow | |
40729 | formation of the AND instructions. */ | |
40730 | ||
40731 | static void | |
40732 | rs6000_split_logical_di (rtx operands[3], | |
40733 | enum rtx_code code, | |
40734 | bool complement_final_p, | |
40735 | bool complement_op1_p, | |
40736 | bool complement_op2_p) | |
40737 | { | |
40738 | const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff); | |
40739 | const HOST_WIDE_INT upper_32bits = ~ lower_32bits; | |
40740 | const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000); | |
40741 | enum hi_lo { hi = 0, lo = 1 }; | |
40742 | rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2]; | |
40743 | size_t i; | |
40744 | ||
40745 | op0_hi_lo[hi] = gen_highpart (SImode, operands[0]); | |
40746 | op1_hi_lo[hi] = gen_highpart (SImode, operands[1]); | |
40747 | op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]); | |
40748 | op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]); | |
40749 | ||
40750 | if (code == NOT) | |
40751 | op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX; | |
40752 | else | |
40753 | { | |
40754 | if (GET_CODE (operands[2]) != CONST_INT) | |
40755 | { | |
40756 | op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]); | |
40757 | op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]); | |
40758 | } | |
40759 | else | |
40760 | { | |
40761 | HOST_WIDE_INT value = INTVAL (operands[2]); | |
40762 | HOST_WIDE_INT value_hi_lo[2]; | |
40763 | ||
40764 | gcc_assert (!complement_final_p); | |
40765 | gcc_assert (!complement_op1_p); | |
40766 | gcc_assert (!complement_op2_p); | |
40767 | ||
40768 | value_hi_lo[hi] = value >> 32; | |
40769 | value_hi_lo[lo] = value & lower_32bits; | |
40770 | ||
40771 | for (i = 0; i < 2; i++) | |
40772 | { | |
40773 | HOST_WIDE_INT sub_value = value_hi_lo[i]; | |
40774 | ||
40775 | if (sub_value & sign_bit) | |
40776 | sub_value |= upper_32bits; | |
40777 | ||
40778 | op2_hi_lo[i] = GEN_INT (sub_value); | |
40779 | ||
40780 | /* If this is an AND instruction, check to see if we need to load | |
40781 | the value in a register. */ | |
40782 | if (code == AND && sub_value != -1 && sub_value != 0 | |
40783 | && !and_operand (op2_hi_lo[i], SImode)) | |
40784 | op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]); | |
40785 | } | |
40786 | } | |
40787 | } | |
40788 | ||
40789 | for (i = 0; i < 2; i++) | |
40790 | { | |
40791 | /* Split large IOR/XOR operations. */ | |
40792 | if ((code == IOR || code == XOR) | |
40793 | && GET_CODE (op2_hi_lo[i]) == CONST_INT | |
40794 | && !complement_final_p | |
40795 | && !complement_op1_p | |
40796 | && !complement_op2_p | |
40797 | && !logical_const_operand (op2_hi_lo[i], SImode)) | |
40798 | { | |
40799 | HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]); | |
40800 | HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000); | |
40801 | HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff); | |
40802 | rtx tmp = gen_reg_rtx (SImode); | |
40803 | ||
40804 | /* Make sure the constant is sign extended. */ | |
40805 | if ((hi_16bits & sign_bit) != 0) | |
40806 | hi_16bits |= upper_32bits; | |
40807 | ||
40808 | rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits), | |
40809 | code, SImode, false, false, false); | |
40810 | ||
40811 | rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits), | |
40812 | code, SImode, false, false, false); | |
40813 | } | |
40814 | else | |
40815 | rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i], | |
40816 | code, SImode, complement_final_p, | |
40817 | complement_op1_p, complement_op2_p); | |
40818 | } | |
40819 | ||
40820 | return; | |
40821 | } | |
40822 | ||
40823 | /* Split the insns that make up boolean operations operating on multiple GPR | |
40824 | registers. The boolean MD patterns ensure that the inputs either are | |
40825 | exactly the same as the output registers, or there is no overlap. | |
40826 | ||
40827 | OPERANDS is an array containing the destination and two input operands. | |
40828 | CODE is the base operation (AND, IOR, XOR, NOT). | |
40829 | If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT. | |
40830 | If COMPLEMENT_OP1_P is true, wrap operand1 with NOT. | |
40831 | If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */ | |
40832 | ||
40833 | void | |
40834 | rs6000_split_logical (rtx operands[3], | |
40835 | enum rtx_code code, | |
40836 | bool complement_final_p, | |
40837 | bool complement_op1_p, | |
40838 | bool complement_op2_p) | |
40839 | { | |
40840 | machine_mode mode = GET_MODE (operands[0]); | |
40841 | machine_mode sub_mode; | |
40842 | rtx op0, op1, op2; | |
40843 | int sub_size, regno0, regno1, nregs, i; | |
40844 | ||
40845 | /* If this is DImode, use the specialized version that can run before | |
40846 | register allocation. */ | |
40847 | if (mode == DImode && !TARGET_POWERPC64) | |
40848 | { | |
40849 | rs6000_split_logical_di (operands, code, complement_final_p, | |
40850 | complement_op1_p, complement_op2_p); | |
40851 | return; | |
40852 | } | |
40853 | ||
40854 | op0 = operands[0]; | |
40855 | op1 = operands[1]; | |
40856 | op2 = (code == NOT) ? NULL_RTX : operands[2]; | |
40857 | sub_mode = (TARGET_POWERPC64) ? DImode : SImode; | |
40858 | sub_size = GET_MODE_SIZE (sub_mode); | |
40859 | regno0 = REGNO (op0); | |
40860 | regno1 = REGNO (op1); | |
40861 | ||
40862 | gcc_assert (reload_completed); | |
40863 | gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO)); | |
40864 | gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO)); | |
40865 | ||
40866 | nregs = rs6000_hard_regno_nregs[(int)mode][regno0]; | |
40867 | gcc_assert (nregs > 1); | |
40868 | ||
40869 | if (op2 && REG_P (op2)) | |
40870 | gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO)); | |
40871 | ||
40872 | for (i = 0; i < nregs; i++) | |
40873 | { | |
40874 | int offset = i * sub_size; | |
40875 | rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset); | |
40876 | rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset); | |
40877 | rtx sub_op2 = ((code == NOT) | |
40878 | ? NULL_RTX | |
40879 | : simplify_subreg (sub_mode, op2, mode, offset)); | |
40880 | ||
40881 | rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode, | |
40882 | complement_final_p, complement_op1_p, | |
40883 | complement_op2_p); | |
40884 | } | |
40885 | ||
40886 | return; | |
40887 | } | |
40888 | ||
40889 | \f | |
40890 | /* Return true if the peephole2 can combine a load involving a combination of | |
40891 | an addis instruction and a load with an offset that can be fused together on | |
40892 | a power8. */ | |
40893 | ||
40894 | bool | |
40895 | fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */ | |
40896 | rtx addis_value, /* addis value. */ | |
40897 | rtx target, /* target register that is loaded. */ | |
40898 | rtx mem) /* bottom part of the memory addr. */ | |
40899 | { | |
40900 | rtx addr; | |
40901 | rtx base_reg; | |
40902 | ||
40903 | /* Validate arguments. */ | |
40904 | if (!base_reg_operand (addis_reg, GET_MODE (addis_reg))) | |
40905 | return false; | |
40906 | ||
40907 | if (!base_reg_operand (target, GET_MODE (target))) | |
40908 | return false; | |
40909 | ||
40910 | if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value))) | |
40911 | return false; | |
40912 | ||
40913 | /* Allow sign/zero extension. */ | |
40914 | if (GET_CODE (mem) == ZERO_EXTEND | |
40915 | || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN)) | |
40916 | mem = XEXP (mem, 0); | |
40917 | ||
40918 | if (!MEM_P (mem)) | |
40919 | return false; | |
40920 | ||
40921 | if (!fusion_gpr_mem_load (mem, GET_MODE (mem))) | |
40922 | return false; | |
40923 | ||
40924 | addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */ | |
40925 | if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM) | |
40926 | return false; | |
40927 | ||
40928 | /* Validate that the register used to load the high value is either the | |
40929 | register being loaded, or we can safely replace its use. | |
40930 | ||
40931 | This function is only called from the peephole2 pass and we assume that | |
40932 | there are 2 instructions in the peephole (addis and load), so we want to | |
40933 | check if the target register was not used in the memory address and the | |
40934 | register to hold the addis result is dead after the peephole. */ | |
40935 | if (REGNO (addis_reg) != REGNO (target)) | |
40936 | { | |
40937 | if (reg_mentioned_p (target, mem)) | |
40938 | return false; | |
40939 | ||
40940 | if (!peep2_reg_dead_p (2, addis_reg)) | |
40941 | return false; | |
40942 | ||
40943 | /* If the target register being loaded is the stack pointer, we must | |
40944 | avoid loading any other value into it, even temporarily. */ | |
40945 | if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM) | |
40946 | return false; | |
40947 | } | |
40948 | ||
40949 | base_reg = XEXP (addr, 0); | |
40950 | return REGNO (addis_reg) == REGNO (base_reg); | |
40951 | } | |
40952 | ||
40953 | /* During the peephole2 pass, adjust and expand the insns for a load fusion | |
40954 | sequence. We adjust the addis register to use the target register. If the | |
40955 | load sign extends, we adjust the code to do the zero extending load, and an | |
40956 | explicit sign extension later since the fusion only covers zero extending | |
40957 | loads. | |
40958 | ||
40959 | The operands are: | |
40960 | operands[0] register set with addis (to be replaced with target) | |
40961 | operands[1] value set via addis | |
40962 | operands[2] target register being loaded | |
40963 | operands[3] D-form memory reference using operands[0]. */ | |
40964 | ||
40965 | void | |
40966 | expand_fusion_gpr_load (rtx *operands) | |
40967 | { | |
40968 | rtx addis_value = operands[1]; | |
40969 | rtx target = operands[2]; | |
40970 | rtx orig_mem = operands[3]; | |
40971 | rtx new_addr, new_mem, orig_addr, offset; | |
40972 | enum rtx_code plus_or_lo_sum; | |
40973 | machine_mode target_mode = GET_MODE (target); | |
40974 | machine_mode extend_mode = target_mode; | |
40975 | machine_mode ptr_mode = Pmode; | |
40976 | enum rtx_code extend = UNKNOWN; | |
40977 | ||
40978 | if (GET_CODE (orig_mem) == ZERO_EXTEND | |
40979 | || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND)) | |
40980 | { | |
40981 | extend = GET_CODE (orig_mem); | |
40982 | orig_mem = XEXP (orig_mem, 0); | |
40983 | target_mode = GET_MODE (orig_mem); | |
40984 | } | |
40985 | ||
40986 | gcc_assert (MEM_P (orig_mem)); | |
40987 | ||
40988 | orig_addr = XEXP (orig_mem, 0); | |
40989 | plus_or_lo_sum = GET_CODE (orig_addr); | |
40990 | gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM); | |
40991 | ||
40992 | offset = XEXP (orig_addr, 1); | |
40993 | new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset); | |
40994 | new_mem = replace_equiv_address_nv (orig_mem, new_addr, false); | |
40995 | ||
40996 | if (extend != UNKNOWN) | |
40997 | new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem); | |
40998 | ||
40999 | new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem), | |
41000 | UNSPEC_FUSION_GPR); | |
41001 | emit_insn (gen_rtx_SET (target, new_mem)); | |
41002 | ||
41003 | if (extend == SIGN_EXTEND) | |
41004 | { | |
41005 | int sub_off = ((BYTES_BIG_ENDIAN) | |
41006 | ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode) | |
41007 | : 0); | |
41008 | rtx sign_reg | |
41009 | = simplify_subreg (target_mode, target, extend_mode, sub_off); | |
41010 | ||
41011 | emit_insn (gen_rtx_SET (target, | |
41012 | gen_rtx_SIGN_EXTEND (extend_mode, sign_reg))); | |
41013 | } | |
41014 | ||
41015 | return; | |
41016 | } | |
41017 | ||
41018 | /* Emit the addis instruction that will be part of a fused instruction | |
41019 | sequence. */ | |
41020 | ||
41021 | void | |
41022 | emit_fusion_addis (rtx target, rtx addis_value, const char *comment, | |
41023 | const char *mode_name) | |
41024 | { | |
41025 | rtx fuse_ops[10]; | |
41026 | char insn_template[80]; | |
41027 | const char *addis_str = NULL; | |
41028 | const char *comment_str = ASM_COMMENT_START; | |
41029 | ||
41030 | if (*comment_str == ' ') | |
41031 | comment_str++; | |
41032 | ||
41033 | /* Emit the addis instruction. */ | |
41034 | fuse_ops[0] = target; | |
41035 | if (satisfies_constraint_L (addis_value)) | |
41036 | { | |
41037 | fuse_ops[1] = addis_value; | |
41038 | addis_str = "lis %0,%v1"; | |
41039 | } | |
41040 | ||
41041 | else if (GET_CODE (addis_value) == PLUS) | |
41042 | { | |
41043 | rtx op0 = XEXP (addis_value, 0); | |
41044 | rtx op1 = XEXP (addis_value, 1); | |
41045 | ||
41046 | if (REG_P (op0) && CONST_INT_P (op1) | |
41047 | && satisfies_constraint_L (op1)) | |
41048 | { | |
41049 | fuse_ops[1] = op0; | |
41050 | fuse_ops[2] = op1; | |
41051 | addis_str = "addis %0,%1,%v2"; | |
41052 | } | |
41053 | } | |
41054 | ||
41055 | else if (GET_CODE (addis_value) == HIGH) | |
41056 | { | |
41057 | rtx value = XEXP (addis_value, 0); | |
41058 | if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL) | |
41059 | { | |
41060 | fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */ | |
41061 | fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */ | |
41062 | if (TARGET_ELF) | |
41063 | addis_str = "addis %0,%2,%1@toc@ha"; | |
41064 | ||
41065 | else if (TARGET_XCOFF) | |
41066 | addis_str = "addis %0,%1@u(%2)"; | |
41067 | ||
41068 | else | |
41069 | gcc_unreachable (); | |
41070 | } | |
41071 | ||
41072 | else if (GET_CODE (value) == PLUS) | |
41073 | { | |
41074 | rtx op0 = XEXP (value, 0); | |
41075 | rtx op1 = XEXP (value, 1); | |
41076 | ||
41077 | if (GET_CODE (op0) == UNSPEC | |
41078 | && XINT (op0, 1) == UNSPEC_TOCREL | |
41079 | && CONST_INT_P (op1)) | |
41080 | { | |
41081 | fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */ | |
41082 | fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */ | |
41083 | fuse_ops[3] = op1; | |
41084 | if (TARGET_ELF) | |
41085 | addis_str = "addis %0,%2,%1+%3@toc@ha"; | |
41086 | ||
41087 | else if (TARGET_XCOFF) | |
41088 | addis_str = "addis %0,%1+%3@u(%2)"; | |
41089 | ||
41090 | else | |
41091 | gcc_unreachable (); | |
41092 | } | |
41093 | } | |
41094 | ||
41095 | else if (satisfies_constraint_L (value)) | |
41096 | { | |
41097 | fuse_ops[1] = value; | |
41098 | addis_str = "lis %0,%v1"; | |
41099 | } | |
41100 | ||
41101 | else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value)) | |
41102 | { | |
41103 | fuse_ops[1] = value; | |
41104 | addis_str = "lis %0,%1@ha"; | |
41105 | } | |
41106 | } | |
41107 | ||
41108 | if (!addis_str) | |
41109 | fatal_insn ("Could not generate addis value for fusion", addis_value); | |
41110 | ||
41111 | sprintf (insn_template, "%s\t\t%s %s, type %s", addis_str, comment_str, | |
41112 | comment, mode_name); | |
41113 | output_asm_insn (insn_template, fuse_ops); | |
41114 | } | |
41115 | ||
41116 | /* Emit a D-form load or store instruction that is the second instruction | |
41117 | of a fusion sequence. */ | |
41118 | ||
41119 | void | |
41120 | emit_fusion_load_store (rtx load_store_reg, rtx addis_reg, rtx offset, | |
41121 | const char *insn_str) | |
41122 | { | |
41123 | rtx fuse_ops[10]; | |
41124 | char insn_template[80]; | |
41125 | ||
41126 | fuse_ops[0] = load_store_reg; | |
41127 | fuse_ops[1] = addis_reg; | |
41128 | ||
41129 | if (CONST_INT_P (offset) && satisfies_constraint_I (offset)) | |
41130 | { | |
41131 | sprintf (insn_template, "%s %%0,%%2(%%1)", insn_str); | |
41132 | fuse_ops[2] = offset; | |
41133 | output_asm_insn (insn_template, fuse_ops); | |
41134 | } | |
41135 | ||
41136 | else if (GET_CODE (offset) == UNSPEC | |
41137 | && XINT (offset, 1) == UNSPEC_TOCREL) | |
41138 | { | |
41139 | if (TARGET_ELF) | |
41140 | sprintf (insn_template, "%s %%0,%%2@toc@l(%%1)", insn_str); | |
41141 | ||
41142 | else if (TARGET_XCOFF) | |
41143 | sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str); | |
41144 | ||
41145 | else | |
41146 | gcc_unreachable (); | |
41147 | ||
41148 | fuse_ops[2] = XVECEXP (offset, 0, 0); | |
41149 | output_asm_insn (insn_template, fuse_ops); | |
41150 | } | |
41151 | ||
41152 | else if (GET_CODE (offset) == PLUS | |
41153 | && GET_CODE (XEXP (offset, 0)) == UNSPEC | |
41154 | && XINT (XEXP (offset, 0), 1) == UNSPEC_TOCREL | |
41155 | && CONST_INT_P (XEXP (offset, 1))) | |
41156 | { | |
41157 | rtx tocrel_unspec = XEXP (offset, 0); | |
41158 | if (TARGET_ELF) | |
41159 | sprintf (insn_template, "%s %%0,%%2+%%3@toc@l(%%1)", insn_str); | |
41160 | ||
41161 | else if (TARGET_XCOFF) | |
41162 | sprintf (insn_template, "%s %%0,%%2+%%3@l(%%1)", insn_str); | |
41163 | ||
41164 | else | |
41165 | gcc_unreachable (); | |
41166 | ||
41167 | fuse_ops[2] = XVECEXP (tocrel_unspec, 0, 0); | |
41168 | fuse_ops[3] = XEXP (offset, 1); | |
41169 | output_asm_insn (insn_template, fuse_ops); | |
41170 | } | |
41171 | ||
41172 | else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (offset)) | |
41173 | { | |
41174 | sprintf (insn_template, "%s %%0,%%2@l(%%1)", insn_str); | |
41175 | ||
41176 | fuse_ops[2] = offset; | |
41177 | output_asm_insn (insn_template, fuse_ops); | |
41178 | } | |
41179 | ||
41180 | else | |
41181 | fatal_insn ("Unable to generate load/store offset for fusion", offset); | |
41182 | ||
41183 | return; | |
41184 | } | |
41185 | ||
41186 | /* Wrap a TOC address that can be fused to indicate that special fusion | |
41187 | processing is needed. */ | |
41188 | ||
41189 | rtx | |
41190 | fusion_wrap_memory_address (rtx old_mem) | |
41191 | { | |
41192 | rtx old_addr = XEXP (old_mem, 0); | |
41193 | rtvec v = gen_rtvec (1, old_addr); | |
41194 | rtx new_addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_FUSION_ADDIS); | |
41195 | return replace_equiv_address_nv (old_mem, new_addr, false); | |
41196 | } | |
41197 | ||
41198 | /* Given an address, convert it into the addis and load offset parts. Addresses | |
41199 | created during the peephole2 process look like: | |
41200 | (lo_sum (high (unspec [(sym)] UNSPEC_TOCREL)) | |
41201 | (unspec [(...)] UNSPEC_TOCREL)) | |
41202 | ||
41203 | Addresses created via toc fusion look like: | |
41204 | (unspec [(unspec [(...)] UNSPEC_TOCREL)] UNSPEC_FUSION_ADDIS)) */ | |
41205 | ||
41206 | static void | |
41207 | fusion_split_address (rtx addr, rtx *p_hi, rtx *p_lo) | |
41208 | { | |
41209 | rtx hi, lo; | |
41210 | ||
41211 | if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_FUSION_ADDIS) | |
41212 | { | |
41213 | lo = XVECEXP (addr, 0, 0); | |
41214 | hi = gen_rtx_HIGH (Pmode, lo); | |
41215 | } | |
41216 | else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM) | |
41217 | { | |
41218 | hi = XEXP (addr, 0); | |
41219 | lo = XEXP (addr, 1); | |
41220 | } | |
41221 | else | |
41222 | gcc_unreachable (); | |
41223 | ||
41224 | *p_hi = hi; | |
41225 | *p_lo = lo; | |
41226 | } | |
41227 | ||
41228 | /* Return a string to fuse an addis instruction with a gpr load to the same | |
41229 | register that we loaded up the addis instruction. The address that is used | |
41230 | is the logical address that was formed during peephole2: | |
41231 | (lo_sum (high) (low-part)) | |
41232 | ||
41233 | Or the address is the TOC address that is wrapped before register allocation: | |
41234 | (unspec [(addr) (toc-reg)] UNSPEC_FUSION_ADDIS) | |
41235 | ||
41236 | The code is complicated, so we call output_asm_insn directly, and just | |
41237 | return "". */ | |
41238 | ||
41239 | const char * | |
41240 | emit_fusion_gpr_load (rtx target, rtx mem) | |
41241 | { | |
41242 | rtx addis_value; | |
41243 | rtx addr; | |
41244 | rtx load_offset; | |
41245 | const char *load_str = NULL; | |
41246 | const char *mode_name = NULL; | |
41247 | machine_mode mode; | |
41248 | ||
41249 | if (GET_CODE (mem) == ZERO_EXTEND) | |
41250 | mem = XEXP (mem, 0); | |
41251 | ||
41252 | gcc_assert (REG_P (target) && MEM_P (mem)); | |
41253 | ||
41254 | addr = XEXP (mem, 0); | |
41255 | fusion_split_address (addr, &addis_value, &load_offset); | |
41256 | ||
41257 | /* Now emit the load instruction to the same register. */ | |
41258 | mode = GET_MODE (mem); | |
41259 | switch (mode) | |
41260 | { | |
4e10a5a7 | 41261 | case E_QImode: |
83349046 SB |
41262 | mode_name = "char"; |
41263 | load_str = "lbz"; | |
41264 | break; | |
41265 | ||
4e10a5a7 | 41266 | case E_HImode: |
83349046 SB |
41267 | mode_name = "short"; |
41268 | load_str = "lhz"; | |
41269 | break; | |
41270 | ||
4e10a5a7 RS |
41271 | case E_SImode: |
41272 | case E_SFmode: | |
83349046 SB |
41273 | mode_name = (mode == SFmode) ? "float" : "int"; |
41274 | load_str = "lwz"; | |
41275 | break; | |
41276 | ||
4e10a5a7 RS |
41277 | case E_DImode: |
41278 | case E_DFmode: | |
83349046 SB |
41279 | gcc_assert (TARGET_POWERPC64); |
41280 | mode_name = (mode == DFmode) ? "double" : "long"; | |
41281 | load_str = "ld"; | |
41282 | break; | |
41283 | ||
41284 | default: | |
41285 | fatal_insn ("Bad GPR fusion", gen_rtx_SET (target, mem)); | |
41286 | } | |
41287 | ||
41288 | /* Emit the addis instruction. */ | |
41289 | emit_fusion_addis (target, addis_value, "gpr load fusion", mode_name); | |
41290 | ||
41291 | /* Emit the D-form load instruction. */ | |
41292 | emit_fusion_load_store (target, target, load_offset, load_str); | |
41293 | ||
41294 | return ""; | |
41295 | } | |
41296 | \f | |
41297 | ||
41298 | /* Return true if the peephole2 can combine a load/store involving a | |
41299 | combination of an addis instruction and the memory operation. This was | |
41300 | added to the ISA 3.0 (power9) hardware. */ | |
41301 | ||
41302 | bool | |
41303 | fusion_p9_p (rtx addis_reg, /* register set via addis. */ | |
41304 | rtx addis_value, /* addis value. */ | |
41305 | rtx dest, /* destination (memory or register). */ | |
41306 | rtx src) /* source (register or memory). */ | |
41307 | { | |
41308 | rtx addr, mem, offset; | |
b8506a8a | 41309 | machine_mode mode = GET_MODE (src); |
83349046 SB |
41310 | |
41311 | /* Validate arguments. */ | |
41312 | if (!base_reg_operand (addis_reg, GET_MODE (addis_reg))) | |
41313 | return false; | |
41314 | ||
41315 | if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value))) | |
41316 | return false; | |
41317 | ||
41318 | /* Ignore extend operations that are part of the load. */ | |
41319 | if (GET_CODE (src) == FLOAT_EXTEND || GET_CODE (src) == ZERO_EXTEND) | |
41320 | src = XEXP (src, 0); | |
41321 | ||
41322 | /* Test for memory<-register or register<-memory. */ | |
41323 | if (fpr_reg_operand (src, mode) || int_reg_operand (src, mode)) | |
41324 | { | |
41325 | if (!MEM_P (dest)) | |
41326 | return false; | |
41327 | ||
41328 | mem = dest; | |
41329 | } | |
41330 | ||
41331 | else if (MEM_P (src)) | |
41332 | { | |
41333 | if (!fpr_reg_operand (dest, mode) && !int_reg_operand (dest, mode)) | |
41334 | return false; | |
41335 | ||
41336 | mem = src; | |
41337 | } | |
41338 | ||
41339 | else | |
41340 | return false; | |
41341 | ||
41342 | addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */ | |
41343 | if (GET_CODE (addr) == PLUS) | |
41344 | { | |
41345 | if (!rtx_equal_p (addis_reg, XEXP (addr, 0))) | |
41346 | return false; | |
41347 | ||
41348 | return satisfies_constraint_I (XEXP (addr, 1)); | |
41349 | } | |
41350 | ||
41351 | else if (GET_CODE (addr) == LO_SUM) | |
41352 | { | |
41353 | if (!rtx_equal_p (addis_reg, XEXP (addr, 0))) | |
41354 | return false; | |
41355 | ||
41356 | offset = XEXP (addr, 1); | |
41357 | if (TARGET_XCOFF || (TARGET_ELF && TARGET_POWERPC64)) | |
41358 | return small_toc_ref (offset, GET_MODE (offset)); | |
41359 | ||
41360 | else if (TARGET_ELF && !TARGET_POWERPC64) | |
41361 | return CONSTANT_P (offset); | |
41362 | } | |
41363 | ||
41364 | return false; | |
41365 | } | |
41366 | ||
41367 | /* During the peephole2 pass, adjust and expand the insns for an extended fusion | |
41368 | load sequence. | |
41369 | ||
41370 | The operands are: | |
41371 | operands[0] register set with addis | |
41372 | operands[1] value set via addis | |
41373 | operands[2] target register being loaded | |
41374 | operands[3] D-form memory reference using operands[0]. | |
41375 | ||
41376 | This is similar to the fusion introduced with power8, except it scales to | |
41377 | both loads/stores and does not require the result register to be the same as | |
41378 | the base register. At the moment, we only do this if register set with addis | |
41379 | is dead. */ | |
41380 | ||
41381 | void | |
41382 | expand_fusion_p9_load (rtx *operands) | |
41383 | { | |
41384 | rtx tmp_reg = operands[0]; | |
41385 | rtx addis_value = operands[1]; | |
41386 | rtx target = operands[2]; | |
41387 | rtx orig_mem = operands[3]; | |
41388 | rtx new_addr, new_mem, orig_addr, offset, set, clobber, insn; | |
41389 | enum rtx_code plus_or_lo_sum; | |
41390 | machine_mode target_mode = GET_MODE (target); | |
41391 | machine_mode extend_mode = target_mode; | |
41392 | machine_mode ptr_mode = Pmode; | |
41393 | enum rtx_code extend = UNKNOWN; | |
41394 | ||
41395 | if (GET_CODE (orig_mem) == FLOAT_EXTEND || GET_CODE (orig_mem) == ZERO_EXTEND) | |
41396 | { | |
41397 | extend = GET_CODE (orig_mem); | |
41398 | orig_mem = XEXP (orig_mem, 0); | |
41399 | target_mode = GET_MODE (orig_mem); | |
41400 | } | |
41401 | ||
41402 | gcc_assert (MEM_P (orig_mem)); | |
41403 | ||
41404 | orig_addr = XEXP (orig_mem, 0); | |
41405 | plus_or_lo_sum = GET_CODE (orig_addr); | |
41406 | gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM); | |
41407 | ||
41408 | offset = XEXP (orig_addr, 1); | |
41409 | new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset); | |
41410 | new_mem = replace_equiv_address_nv (orig_mem, new_addr, false); | |
41411 | ||
41412 | if (extend != UNKNOWN) | |
41413 | new_mem = gen_rtx_fmt_e (extend, extend_mode, new_mem); | |
41414 | ||
41415 | new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem), | |
41416 | UNSPEC_FUSION_P9); | |
41417 | ||
41418 | set = gen_rtx_SET (target, new_mem); | |
41419 | clobber = gen_rtx_CLOBBER (VOIDmode, tmp_reg); | |
41420 | insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)); | |
41421 | emit_insn (insn); | |
41422 | ||
41423 | return; | |
41424 | } | |
41425 | ||
41426 | /* During the peephole2 pass, adjust and expand the insns for an extended fusion | |
41427 | store sequence. | |
41428 | ||
41429 | The operands are: | |
41430 | operands[0] register set with addis | |
41431 | operands[1] value set via addis | |
41432 | operands[2] target D-form memory being stored to | |
41433 | operands[3] register being stored | |
41434 | ||
41435 | This is similar to the fusion introduced with power8, except it scales to | |
41436 | both loads/stores and does not require the result register to be the same as | |
41437 | the base register. At the moment, we only do this if register set with addis | |
41438 | is dead. */ | |
41439 | ||
41440 | void | |
41441 | expand_fusion_p9_store (rtx *operands) | |
41442 | { | |
41443 | rtx tmp_reg = operands[0]; | |
41444 | rtx addis_value = operands[1]; | |
41445 | rtx orig_mem = operands[2]; | |
41446 | rtx src = operands[3]; | |
41447 | rtx new_addr, new_mem, orig_addr, offset, set, clobber, insn, new_src; | |
41448 | enum rtx_code plus_or_lo_sum; | |
41449 | machine_mode target_mode = GET_MODE (orig_mem); | |
41450 | machine_mode ptr_mode = Pmode; | |
41451 | ||
41452 | gcc_assert (MEM_P (orig_mem)); | |
41453 | ||
41454 | orig_addr = XEXP (orig_mem, 0); | |
41455 | plus_or_lo_sum = GET_CODE (orig_addr); | |
41456 | gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM); | |
41457 | ||
41458 | offset = XEXP (orig_addr, 1); | |
41459 | new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset); | |
41460 | new_mem = replace_equiv_address_nv (orig_mem, new_addr, false); | |
41461 | ||
41462 | new_src = gen_rtx_UNSPEC (target_mode, gen_rtvec (1, src), | |
41463 | UNSPEC_FUSION_P9); | |
41464 | ||
41465 | set = gen_rtx_SET (new_mem, new_src); | |
41466 | clobber = gen_rtx_CLOBBER (VOIDmode, tmp_reg); | |
41467 | insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)); | |
41468 | emit_insn (insn); | |
41469 | ||
41470 | return; | |
41471 | } | |
41472 | ||
41473 | /* Return a string to fuse an addis instruction with a load using extended | |
41474 | fusion. The address that is used is the logical address that was formed | |
41475 | during peephole2: (lo_sum (high) (low-part)) | |
41476 | ||
41477 | The code is complicated, so we call output_asm_insn directly, and just | |
41478 | return "". */ | |
41479 | ||
41480 | const char * | |
41481 | emit_fusion_p9_load (rtx reg, rtx mem, rtx tmp_reg) | |
41482 | { | |
b8506a8a | 41483 | machine_mode mode = GET_MODE (reg); |
83349046 SB |
41484 | rtx hi; |
41485 | rtx lo; | |
41486 | rtx addr; | |
41487 | const char *load_string; | |
41488 | int r; | |
41489 | ||
41490 | if (GET_CODE (mem) == FLOAT_EXTEND || GET_CODE (mem) == ZERO_EXTEND) | |
41491 | { | |
41492 | mem = XEXP (mem, 0); | |
41493 | mode = GET_MODE (mem); | |
41494 | } | |
41495 | ||
41496 | if (GET_CODE (reg) == SUBREG) | |
41497 | { | |
41498 | gcc_assert (SUBREG_BYTE (reg) == 0); | |
41499 | reg = SUBREG_REG (reg); | |
41500 | } | |
41501 | ||
41502 | if (!REG_P (reg)) | |
41503 | fatal_insn ("emit_fusion_p9_load, bad reg #1", reg); | |
41504 | ||
41505 | r = REGNO (reg); | |
41506 | if (FP_REGNO_P (r)) | |
41507 | { | |
41508 | if (mode == SFmode) | |
41509 | load_string = "lfs"; | |
41510 | else if (mode == DFmode || mode == DImode) | |
41511 | load_string = "lfd"; | |
41512 | else | |
41513 | gcc_unreachable (); | |
41514 | } | |
41515 | else if (ALTIVEC_REGNO_P (r) && TARGET_P9_DFORM_SCALAR) | |
41516 | { | |
41517 | if (mode == SFmode) | |
41518 | load_string = "lxssp"; | |
41519 | else if (mode == DFmode || mode == DImode) | |
41520 | load_string = "lxsd"; | |
41521 | else | |
41522 | gcc_unreachable (); | |
41523 | } | |
41524 | else if (INT_REGNO_P (r)) | |
41525 | { | |
41526 | switch (mode) | |
41527 | { | |
4e10a5a7 | 41528 | case E_QImode: |
83349046 SB |
41529 | load_string = "lbz"; |
41530 | break; | |
4e10a5a7 | 41531 | case E_HImode: |
83349046 SB |
41532 | load_string = "lhz"; |
41533 | break; | |
4e10a5a7 RS |
41534 | case E_SImode: |
41535 | case E_SFmode: | |
83349046 SB |
41536 | load_string = "lwz"; |
41537 | break; | |
4e10a5a7 RS |
41538 | case E_DImode: |
41539 | case E_DFmode: | |
83349046 SB |
41540 | if (!TARGET_POWERPC64) |
41541 | gcc_unreachable (); | |
41542 | load_string = "ld"; | |
41543 | break; | |
41544 | default: | |
41545 | gcc_unreachable (); | |
41546 | } | |
41547 | } | |
41548 | else | |
41549 | fatal_insn ("emit_fusion_p9_load, bad reg #2", reg); | |
41550 | ||
41551 | if (!MEM_P (mem)) | |
41552 | fatal_insn ("emit_fusion_p9_load not MEM", mem); | |
41553 | ||
41554 | addr = XEXP (mem, 0); | |
41555 | fusion_split_address (addr, &hi, &lo); | |
41556 | ||
41557 | /* Emit the addis instruction. */ | |
41558 | emit_fusion_addis (tmp_reg, hi, "power9 load fusion", GET_MODE_NAME (mode)); | |
41559 | ||
41560 | /* Emit the D-form load instruction. */ | |
41561 | emit_fusion_load_store (reg, tmp_reg, lo, load_string); | |
41562 | ||
41563 | return ""; | |
41564 | } | |
41565 | ||
41566 | /* Return a string to fuse an addis instruction with a store using extended | |
41567 | fusion. The address that is used is the logical address that was formed | |
41568 | during peephole2: (lo_sum (high) (low-part)) | |
41569 | ||
41570 | The code is complicated, so we call output_asm_insn directly, and just | |
41571 | return "". */ | |
41572 | ||
41573 | const char * | |
41574 | emit_fusion_p9_store (rtx mem, rtx reg, rtx tmp_reg) | |
41575 | { | |
b8506a8a | 41576 | machine_mode mode = GET_MODE (reg); |
83349046 SB |
41577 | rtx hi; |
41578 | rtx lo; | |
41579 | rtx addr; | |
41580 | const char *store_string; | |
41581 | int r; | |
41582 | ||
41583 | if (GET_CODE (reg) == SUBREG) | |
41584 | { | |
41585 | gcc_assert (SUBREG_BYTE (reg) == 0); | |
41586 | reg = SUBREG_REG (reg); | |
41587 | } | |
41588 | ||
41589 | if (!REG_P (reg)) | |
41590 | fatal_insn ("emit_fusion_p9_store, bad reg #1", reg); | |
41591 | ||
41592 | r = REGNO (reg); | |
41593 | if (FP_REGNO_P (r)) | |
41594 | { | |
41595 | if (mode == SFmode) | |
41596 | store_string = "stfs"; | |
41597 | else if (mode == DFmode) | |
41598 | store_string = "stfd"; | |
41599 | else | |
41600 | gcc_unreachable (); | |
41601 | } | |
41602 | else if (ALTIVEC_REGNO_P (r) && TARGET_P9_DFORM_SCALAR) | |
41603 | { | |
41604 | if (mode == SFmode) | |
41605 | store_string = "stxssp"; | |
41606 | else if (mode == DFmode || mode == DImode) | |
41607 | store_string = "stxsd"; | |
41608 | else | |
41609 | gcc_unreachable (); | |
41610 | } | |
41611 | else if (INT_REGNO_P (r)) | |
41612 | { | |
41613 | switch (mode) | |
41614 | { | |
4e10a5a7 | 41615 | case E_QImode: |
83349046 SB |
41616 | store_string = "stb"; |
41617 | break; | |
4e10a5a7 | 41618 | case E_HImode: |
83349046 SB |
41619 | store_string = "sth"; |
41620 | break; | |
4e10a5a7 RS |
41621 | case E_SImode: |
41622 | case E_SFmode: | |
83349046 SB |
41623 | store_string = "stw"; |
41624 | break; | |
4e10a5a7 RS |
41625 | case E_DImode: |
41626 | case E_DFmode: | |
83349046 SB |
41627 | if (!TARGET_POWERPC64) |
41628 | gcc_unreachable (); | |
41629 | store_string = "std"; | |
41630 | break; | |
41631 | default: | |
41632 | gcc_unreachable (); | |
41633 | } | |
41634 | } | |
41635 | else | |
41636 | fatal_insn ("emit_fusion_p9_store, bad reg #2", reg); | |
41637 | ||
41638 | if (!MEM_P (mem)) | |
41639 | fatal_insn ("emit_fusion_p9_store not MEM", mem); | |
41640 | ||
41641 | addr = XEXP (mem, 0); | |
41642 | fusion_split_address (addr, &hi, &lo); | |
41643 | ||
41644 | /* Emit the addis instruction. */ | |
41645 | emit_fusion_addis (tmp_reg, hi, "power9 store fusion", GET_MODE_NAME (mode)); | |
41646 | ||
41647 | /* Emit the D-form load instruction. */ | |
41648 | emit_fusion_load_store (reg, tmp_reg, lo, store_string); | |
41649 | ||
41650 | return ""; | |
41651 | } | |
41652 | ||
41653 | \f | |
41654 | /* Analyze vector computations and remove unnecessary doubleword | |
41655 | swaps (xxswapdi instructions). This pass is performed only | |
41656 | for little-endian VSX code generation. | |
41657 | ||
41658 | For this specific case, loads and stores of 4x32 and 2x64 vectors | |
41659 | are inefficient. These are implemented using the lvx2dx and | |
41660 | stvx2dx instructions, which invert the order of doublewords in | |
41661 | a vector register. Thus the code generation inserts an xxswapdi | |
41662 | after each such load, and prior to each such store. (For spill | |
41663 | code after register assignment, an additional xxswapdi is inserted | |
41664 | following each store in order to return a hard register to its | |
41665 | unpermuted value.) | |
41666 | ||
41667 | The extra xxswapdi instructions reduce performance. This can be | |
41668 | particularly bad for vectorized code. The purpose of this pass | |
41669 | is to reduce the number of xxswapdi instructions required for | |
41670 | correctness. | |
41671 | ||
41672 | The primary insight is that much code that operates on vectors | |
41673 | does not care about the relative order of elements in a register, | |
41674 | so long as the correct memory order is preserved. If we have | |
41675 | a computation where all input values are provided by lvxd2x/xxswapdi | |
41676 | sequences, all outputs are stored using xxswapdi/stvxd2x sequences, | |
41677 | and all intermediate computations are pure SIMD (independent of | |
41678 | element order), then all the xxswapdi's associated with the loads | |
41679 | and stores may be removed. | |
41680 | ||
41681 | This pass uses some of the infrastructure and logical ideas from | |
41682 | the "web" pass in web.c. We create maximal webs of computations | |
41683 | fitting the description above using union-find. Each such web is | |
41684 | then optimized by removing its unnecessary xxswapdi instructions. | |
41685 | ||
41686 | The pass is placed prior to global optimization so that we can | |
41687 | perform the optimization in the safest and simplest way possible; | |
41688 | that is, by replacing each xxswapdi insn with a register copy insn. | |
41689 | Subsequent forward propagation will remove copies where possible. | |
41690 | ||
41691 | There are some operations sensitive to element order for which we | |
41692 | can still allow the operation, provided we modify those operations. | |
41693 | These include CONST_VECTORs, for which we must swap the first and | |
41694 | second halves of the constant vector; and SUBREGs, for which we | |
41695 | must adjust the byte offset to account for the swapped doublewords. | |
41696 | A remaining opportunity would be non-immediate-form splats, for | |
41697 | which we should adjust the selected lane of the input. We should | |
41698 | also make code generation adjustments for sum-across operations, | |
41699 | since this is a common vectorizer reduction. | |
41700 | ||
41701 | Because we run prior to the first split, we can see loads and stores | |
41702 | here that match *vsx_le_perm_{load,store}_<mode>. These are vanilla | |
41703 | vector loads and stores that have not yet been split into a permuting | |
41704 | load/store and a swap. (One way this can happen is with a builtin | |
41705 | call to vec_vsx_{ld,st}.) We can handle these as well, but rather | |
41706 | than deleting a swap, we convert the load/store into a permuting | |
41707 | load/store (which effectively removes the swap). */ | |
41708 | ||
41709 | /* Notes on Permutes | |
41710 | ||
41711 | We do not currently handle computations that contain permutes. There | |
41712 | is a general transformation that can be performed correctly, but it | |
41713 | may introduce more expensive code than it replaces. To handle these | |
41714 | would require a cost model to determine when to perform the optimization. | |
41715 | This commentary records how this could be done if desired. | |
41716 | ||
41717 | The most general permute is something like this (example for V16QI): | |
41718 | ||
41719 | (vec_select:V16QI (vec_concat:V32QI (op1:V16QI) (op2:V16QI)) | |
41720 | (parallel [(const_int a0) (const_int a1) | |
41721 | ... | |
41722 | (const_int a14) (const_int a15)])) | |
41723 | ||
41724 | where a0,...,a15 are in [0,31] and select elements from op1 and op2 | |
41725 | to produce in the result. | |
41726 | ||
41727 | Regardless of mode, we can convert the PARALLEL to a mask of 16 | |
41728 | byte-element selectors. Let's call this M, with M[i] representing | |
41729 | the ith byte-element selector value. Then if we swap doublewords | |
41730 | throughout the computation, we can get correct behavior by replacing | |
41731 | M with M' as follows: | |
41732 | ||
41733 | M'[i] = { (M[i]+8)%16 : M[i] in [0,15] | |
41734 | { ((M[i]+8)%16)+16 : M[i] in [16,31] | |
41735 | ||
41736 | This seems promising at first, since we are just replacing one mask | |
41737 | with another. But certain masks are preferable to others. If M | |
41738 | is a mask that matches a vmrghh pattern, for example, M' certainly | |
41739 | will not. Instead of a single vmrghh, we would generate a load of | |
41740 | M' and a vperm. So we would need to know how many xxswapd's we can | |
41741 | remove as a result of this transformation to determine if it's | |
41742 | profitable; and preferably the logic would need to be aware of all | |
41743 | the special preferable masks. | |
41744 | ||
41745 | Another form of permute is an UNSPEC_VPERM, in which the mask is | |
41746 | already in a register. In some cases, this mask may be a constant | |
41747 | that we can discover with ud-chains, in which case the above | |
41748 | transformation is ok. However, the common usage here is for the | |
41749 | mask to be produced by an UNSPEC_LVSL, in which case the mask | |
41750 | cannot be known at compile time. In such a case we would have to | |
41751 | generate several instructions to compute M' as above at run time, | |
41752 | and a cost model is needed again. | |
41753 | ||
41754 | However, when the mask M for an UNSPEC_VPERM is loaded from the | |
41755 | constant pool, we can replace M with M' as above at no cost | |
41756 | beyond adding a constant pool entry. */ | |
41757 | ||
41758 | /* This is based on the union-find logic in web.c. web_entry_base is | |
41759 | defined in df.h. */ | |
41760 | class swap_web_entry : public web_entry_base | |
41761 | { | |
41762 | public: | |
41763 | /* Pointer to the insn. */ | |
41764 | rtx_insn *insn; | |
41765 | /* Set if insn contains a mention of a vector register. All other | |
41766 | fields are undefined if this field is unset. */ | |
41767 | unsigned int is_relevant : 1; | |
41768 | /* Set if insn is a load. */ | |
41769 | unsigned int is_load : 1; | |
41770 | /* Set if insn is a store. */ | |
41771 | unsigned int is_store : 1; | |
41772 | /* Set if insn is a doubleword swap. This can either be a register swap | |
41773 | or a permuting load or store (test is_load and is_store for this). */ | |
41774 | unsigned int is_swap : 1; | |
41775 | /* Set if the insn has a live-in use of a parameter register. */ | |
41776 | unsigned int is_live_in : 1; | |
41777 | /* Set if the insn has a live-out def of a return register. */ | |
41778 | unsigned int is_live_out : 1; | |
41779 | /* Set if the insn contains a subreg reference of a vector register. */ | |
41780 | unsigned int contains_subreg : 1; | |
41781 | /* Set if the insn contains a 128-bit integer operand. */ | |
41782 | unsigned int is_128_int : 1; | |
41783 | /* Set if this is a call-insn. */ | |
41784 | unsigned int is_call : 1; | |
41785 | /* Set if this insn does not perform a vector operation for which | |
41786 | element order matters, or if we know how to fix it up if it does. | |
41787 | Undefined if is_swap is set. */ | |
41788 | unsigned int is_swappable : 1; | |
41789 | /* A nonzero value indicates what kind of special handling for this | |
41790 | insn is required if doublewords are swapped. Undefined if | |
41791 | is_swappable is not set. */ | |
41792 | unsigned int special_handling : 4; | |
41793 | /* Set if the web represented by this entry cannot be optimized. */ | |
41794 | unsigned int web_not_optimizable : 1; | |
41795 | /* Set if this insn should be deleted. */ | |
41796 | unsigned int will_delete : 1; | |
41797 | }; | |
41798 | ||
41799 | enum special_handling_values { | |
41800 | SH_NONE = 0, | |
41801 | SH_CONST_VECTOR, | |
41802 | SH_SUBREG, | |
41803 | SH_NOSWAP_LD, | |
41804 | SH_NOSWAP_ST, | |
41805 | SH_EXTRACT, | |
41806 | SH_SPLAT, | |
41807 | SH_XXPERMDI, | |
41808 | SH_CONCAT, | |
41809 | SH_VPERM | |
41810 | }; | |
41811 | ||
41812 | /* Union INSN with all insns containing definitions that reach USE. | |
41813 | Detect whether USE is live-in to the current function. */ | |
41814 | static void | |
41815 | union_defs (swap_web_entry *insn_entry, rtx insn, df_ref use) | |
41816 | { | |
41817 | struct df_link *link = DF_REF_CHAIN (use); | |
41818 | ||
41819 | if (!link) | |
41820 | insn_entry[INSN_UID (insn)].is_live_in = 1; | |
41821 | ||
41822 | while (link) | |
41823 | { | |
41824 | if (DF_REF_IS_ARTIFICIAL (link->ref)) | |
41825 | insn_entry[INSN_UID (insn)].is_live_in = 1; | |
41826 | ||
41827 | if (DF_REF_INSN_INFO (link->ref)) | |
41828 | { | |
41829 | rtx def_insn = DF_REF_INSN (link->ref); | |
41830 | (void)unionfind_union (insn_entry + INSN_UID (insn), | |
41831 | insn_entry + INSN_UID (def_insn)); | |
41832 | } | |
41833 | ||
41834 | link = link->next; | |
41835 | } | |
41836 | } | |
41837 | ||
41838 | /* Union INSN with all insns containing uses reached from DEF. | |
41839 | Detect whether DEF is live-out from the current function. */ | |
41840 | static void | |
41841 | union_uses (swap_web_entry *insn_entry, rtx insn, df_ref def) | |
41842 | { | |
41843 | struct df_link *link = DF_REF_CHAIN (def); | |
41844 | ||
41845 | if (!link) | |
41846 | insn_entry[INSN_UID (insn)].is_live_out = 1; | |
41847 | ||
41848 | while (link) | |
41849 | { | |
41850 | /* This could be an eh use or some other artificial use; | |
41851 | we treat these all the same (killing the optimization). */ | |
41852 | if (DF_REF_IS_ARTIFICIAL (link->ref)) | |
41853 | insn_entry[INSN_UID (insn)].is_live_out = 1; | |
41854 | ||
41855 | if (DF_REF_INSN_INFO (link->ref)) | |
41856 | { | |
41857 | rtx use_insn = DF_REF_INSN (link->ref); | |
41858 | (void)unionfind_union (insn_entry + INSN_UID (insn), | |
41859 | insn_entry + INSN_UID (use_insn)); | |
41860 | } | |
41861 | ||
41862 | link = link->next; | |
41863 | } | |
41864 | } | |
41865 | ||
41866 | /* Return 1 iff INSN is a load insn, including permuting loads that | |
41867 | represent an lvxd2x instruction; else return 0. */ | |
41868 | static unsigned int | |
41869 | insn_is_load_p (rtx insn) | |
41870 | { | |
41871 | rtx body = PATTERN (insn); | |
41872 | ||
41873 | if (GET_CODE (body) == SET) | |
41874 | { | |
41875 | if (GET_CODE (SET_SRC (body)) == MEM) | |
41876 | return 1; | |
41877 | ||
41878 | if (GET_CODE (SET_SRC (body)) == VEC_SELECT | |
41879 | && GET_CODE (XEXP (SET_SRC (body), 0)) == MEM) | |
41880 | return 1; | |
41881 | ||
41882 | return 0; | |
41883 | } | |
41884 | ||
41885 | if (GET_CODE (body) != PARALLEL) | |
41886 | return 0; | |
41887 | ||
41888 | rtx set = XVECEXP (body, 0, 0); | |
41889 | ||
41890 | if (GET_CODE (set) == SET && GET_CODE (SET_SRC (set)) == MEM) | |
41891 | return 1; | |
41892 | ||
41893 | return 0; | |
41894 | } | |
41895 | ||
41896 | /* Return 1 iff INSN is a store insn, including permuting stores that | |
41897 | represent an stvxd2x instruction; else return 0. */ | |
41898 | static unsigned int | |
41899 | insn_is_store_p (rtx insn) | |
41900 | { | |
41901 | rtx body = PATTERN (insn); | |
41902 | if (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == MEM) | |
41903 | return 1; | |
41904 | if (GET_CODE (body) != PARALLEL) | |
41905 | return 0; | |
41906 | rtx set = XVECEXP (body, 0, 0); | |
41907 | if (GET_CODE (set) == SET && GET_CODE (SET_DEST (set)) == MEM) | |
41908 | return 1; | |
41909 | return 0; | |
41910 | } | |
41911 | ||
41912 | /* Return 1 iff INSN swaps doublewords. This may be a reg-reg swap, | |
41913 | a permuting load, or a permuting store. */ | |
41914 | static unsigned int | |
41915 | insn_is_swap_p (rtx insn) | |
41916 | { | |
41917 | rtx body = PATTERN (insn); | |
41918 | if (GET_CODE (body) != SET) | |
41919 | return 0; | |
41920 | rtx rhs = SET_SRC (body); | |
41921 | if (GET_CODE (rhs) != VEC_SELECT) | |
41922 | return 0; | |
41923 | rtx parallel = XEXP (rhs, 1); | |
41924 | if (GET_CODE (parallel) != PARALLEL) | |
41925 | return 0; | |
41926 | unsigned int len = XVECLEN (parallel, 0); | |
41927 | if (len != 2 && len != 4 && len != 8 && len != 16) | |
41928 | return 0; | |
41929 | for (unsigned int i = 0; i < len / 2; ++i) | |
41930 | { | |
41931 | rtx op = XVECEXP (parallel, 0, i); | |
41932 | if (GET_CODE (op) != CONST_INT || INTVAL (op) != len / 2 + i) | |
41933 | return 0; | |
41934 | } | |
41935 | for (unsigned int i = len / 2; i < len; ++i) | |
41936 | { | |
41937 | rtx op = XVECEXP (parallel, 0, i); | |
41938 | if (GET_CODE (op) != CONST_INT || INTVAL (op) != i - len / 2) | |
41939 | return 0; | |
41940 | } | |
41941 | return 1; | |
41942 | } | |
41943 | ||
41944 | /* Return TRUE if insn is a swap fed by a load from the constant pool. */ | |
41945 | static bool | |
41946 | const_load_sequence_p (swap_web_entry *insn_entry, rtx insn) | |
41947 | { | |
41948 | unsigned uid = INSN_UID (insn); | |
41949 | if (!insn_entry[uid].is_swap || insn_entry[uid].is_load) | |
41950 | return false; | |
41951 | ||
41952 | /* Find the unique use in the swap and locate its def. If the def | |
41953 | isn't unique, punt. */ | |
41954 | struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); | |
41955 | df_ref use; | |
41956 | FOR_EACH_INSN_INFO_USE (use, insn_info) | |
41957 | { | |
41958 | struct df_link *def_link = DF_REF_CHAIN (use); | |
41959 | if (!def_link || def_link->next) | |
41960 | return false; | |
41961 | ||
41962 | rtx def_insn = DF_REF_INSN (def_link->ref); | |
41963 | unsigned uid2 = INSN_UID (def_insn); | |
41964 | if (!insn_entry[uid2].is_load || !insn_entry[uid2].is_swap) | |
41965 | return false; | |
41966 | ||
41967 | rtx body = PATTERN (def_insn); | |
41968 | if (GET_CODE (body) != SET | |
41969 | || GET_CODE (SET_SRC (body)) != VEC_SELECT | |
41970 | || GET_CODE (XEXP (SET_SRC (body), 0)) != MEM) | |
41971 | return false; | |
41972 | ||
41973 | rtx mem = XEXP (SET_SRC (body), 0); | |
41974 | rtx base_reg = XEXP (mem, 0); | |
41975 | ||
41976 | df_ref base_use; | |
41977 | insn_info = DF_INSN_INFO_GET (def_insn); | |
41978 | FOR_EACH_INSN_INFO_USE (base_use, insn_info) | |
41979 | { | |
41980 | if (!rtx_equal_p (DF_REF_REG (base_use), base_reg)) | |
41981 | continue; | |
41982 | ||
41983 | struct df_link *base_def_link = DF_REF_CHAIN (base_use); | |
41984 | if (!base_def_link || base_def_link->next) | |
41985 | return false; | |
41986 | ||
41987 | rtx tocrel_insn = DF_REF_INSN (base_def_link->ref); | |
41988 | rtx tocrel_body = PATTERN (tocrel_insn); | |
41989 | rtx base, offset; | |
41990 | if (GET_CODE (tocrel_body) != SET) | |
41991 | return false; | |
41992 | /* There is an extra level of indirection for small/large | |
41993 | code models. */ | |
41994 | rtx tocrel_expr = SET_SRC (tocrel_body); | |
41995 | if (GET_CODE (tocrel_expr) == MEM) | |
41996 | tocrel_expr = XEXP (tocrel_expr, 0); | |
41997 | if (!toc_relative_expr_p (tocrel_expr, false)) | |
41998 | return false; | |
41999 | split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset); | |
42000 | if (GET_CODE (base) != SYMBOL_REF || !CONSTANT_POOL_ADDRESS_P (base)) | |
42001 | return false; | |
42002 | } | |
42003 | } | |
42004 | return true; | |
42005 | } | |
42006 | ||
42007 | /* Return TRUE iff OP matches a V2DF reduction pattern. See the | |
42008 | definition of vsx_reduc_<VEC_reduc_name>_v2df in vsx.md. */ | |
42009 | static bool | |
42010 | v2df_reduction_p (rtx op) | |
42011 | { | |
42012 | if (GET_MODE (op) != V2DFmode) | |
42013 | return false; | |
42014 | ||
42015 | enum rtx_code code = GET_CODE (op); | |
42016 | if (code != PLUS && code != SMIN && code != SMAX) | |
42017 | return false; | |
42018 | ||
42019 | rtx concat = XEXP (op, 0); | |
42020 | if (GET_CODE (concat) != VEC_CONCAT) | |
42021 | return false; | |
42022 | ||
42023 | rtx select0 = XEXP (concat, 0); | |
42024 | rtx select1 = XEXP (concat, 1); | |
42025 | if (GET_CODE (select0) != VEC_SELECT || GET_CODE (select1) != VEC_SELECT) | |
42026 | return false; | |
42027 | ||
42028 | rtx reg0 = XEXP (select0, 0); | |
42029 | rtx reg1 = XEXP (select1, 0); | |
42030 | if (!rtx_equal_p (reg0, reg1) || !REG_P (reg0)) | |
42031 | return false; | |
42032 | ||
42033 | rtx parallel0 = XEXP (select0, 1); | |
42034 | rtx parallel1 = XEXP (select1, 1); | |
42035 | if (GET_CODE (parallel0) != PARALLEL || GET_CODE (parallel1) != PARALLEL) | |
42036 | return false; | |
42037 | ||
42038 | if (!rtx_equal_p (XVECEXP (parallel0, 0, 0), const1_rtx) | |
42039 | || !rtx_equal_p (XVECEXP (parallel1, 0, 0), const0_rtx)) | |
42040 | return false; | |
42041 | ||
42042 | return true; | |
42043 | } | |
42044 | ||
42045 | /* Return 1 iff OP is an operand that will not be affected by having | |
42046 | vector doublewords swapped in memory. */ | |
42047 | static unsigned int | |
42048 | rtx_is_swappable_p (rtx op, unsigned int *special) | |
42049 | { | |
42050 | enum rtx_code code = GET_CODE (op); | |
42051 | int i, j; | |
42052 | rtx parallel; | |
42053 | ||
42054 | switch (code) | |
42055 | { | |
42056 | case LABEL_REF: | |
42057 | case SYMBOL_REF: | |
42058 | case CLOBBER: | |
42059 | case REG: | |
42060 | return 1; | |
42061 | ||
42062 | case VEC_CONCAT: | |
42063 | case ASM_INPUT: | |
42064 | case ASM_OPERANDS: | |
42065 | return 0; | |
42066 | ||
42067 | case CONST_VECTOR: | |
42068 | { | |
42069 | *special = SH_CONST_VECTOR; | |
42070 | return 1; | |
42071 | } | |
42072 | ||
42073 | case VEC_DUPLICATE: | |
42074 | /* Opportunity: If XEXP (op, 0) has the same mode as the result, | |
42075 | and XEXP (op, 1) is a PARALLEL with a single QImode const int, | |
42076 | it represents a vector splat for which we can do special | |
42077 | handling. */ | |
42078 | if (GET_CODE (XEXP (op, 0)) == CONST_INT) | |
42079 | return 1; | |
42080 | else if (REG_P (XEXP (op, 0)) | |
42081 | && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0))) | |
42082 | /* This catches V2DF and V2DI splat, at a minimum. */ | |
42083 | return 1; | |
42084 | else if (GET_CODE (XEXP (op, 0)) == TRUNCATE | |
42085 | && REG_P (XEXP (XEXP (op, 0), 0)) | |
42086 | && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0))) | |
42087 | /* This catches splat of a truncated value. */ | |
42088 | return 1; | |
42089 | else if (GET_CODE (XEXP (op, 0)) == VEC_SELECT) | |
42090 | /* If the duplicated item is from a select, defer to the select | |
42091 | processing to see if we can change the lane for the splat. */ | |
42092 | return rtx_is_swappable_p (XEXP (op, 0), special); | |
42093 | else | |
42094 | return 0; | |
42095 | ||
42096 | case VEC_SELECT: | |
42097 | /* A vec_extract operation is ok if we change the lane. */ | |
42098 | if (GET_CODE (XEXP (op, 0)) == REG | |
42099 | && GET_MODE_INNER (GET_MODE (XEXP (op, 0))) == GET_MODE (op) | |
42100 | && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL | |
42101 | && XVECLEN (parallel, 0) == 1 | |
42102 | && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT) | |
42103 | { | |
42104 | *special = SH_EXTRACT; | |
42105 | return 1; | |
42106 | } | |
42107 | /* An XXPERMDI is ok if we adjust the lanes. Note that if the | |
42108 | XXPERMDI is a swap operation, it will be identified by | |
42109 | insn_is_swap_p and therefore we won't get here. */ | |
42110 | else if (GET_CODE (XEXP (op, 0)) == VEC_CONCAT | |
42111 | && (GET_MODE (XEXP (op, 0)) == V4DFmode | |
42112 | || GET_MODE (XEXP (op, 0)) == V4DImode) | |
42113 | && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL | |
42114 | && XVECLEN (parallel, 0) == 2 | |
42115 | && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT | |
42116 | && GET_CODE (XVECEXP (parallel, 0, 1)) == CONST_INT) | |
42117 | { | |
42118 | *special = SH_XXPERMDI; | |
42119 | return 1; | |
42120 | } | |
42121 | else if (v2df_reduction_p (op)) | |
42122 | return 1; | |
42123 | else | |
42124 | return 0; | |
42125 | ||
42126 | case UNSPEC: | |
42127 | { | |
42128 | /* Various operations are unsafe for this optimization, at least | |
42129 | without significant additional work. Permutes are obviously | |
42130 | problematic, as both the permute control vector and the ordering | |
42131 | of the target values are invalidated by doubleword swapping. | |
42132 | Vector pack and unpack modify the number of vector lanes. | |
42133 | Merge-high/low will not operate correctly on swapped operands. | |
42134 | Vector shifts across element boundaries are clearly uncool, | |
42135 | as are vector select and concatenate operations. Vector | |
42136 | sum-across instructions define one operand with a specific | |
42137 | order-dependent element, so additional fixup code would be | |
42138 | needed to make those work. Vector set and non-immediate-form | |
42139 | vector splat are element-order sensitive. A few of these | |
42140 | cases might be workable with special handling if required. | |
42141 | Adding cost modeling would be appropriate in some cases. */ | |
42142 | int val = XINT (op, 1); | |
42143 | switch (val) | |
42144 | { | |
42145 | default: | |
42146 | break; | |
42147 | case UNSPEC_VMRGH_DIRECT: | |
42148 | case UNSPEC_VMRGL_DIRECT: | |
42149 | case UNSPEC_VPACK_SIGN_SIGN_SAT: | |
42150 | case UNSPEC_VPACK_SIGN_UNS_SAT: | |
42151 | case UNSPEC_VPACK_UNS_UNS_MOD: | |
42152 | case UNSPEC_VPACK_UNS_UNS_MOD_DIRECT: | |
42153 | case UNSPEC_VPACK_UNS_UNS_SAT: | |
42154 | case UNSPEC_VPERM: | |
42155 | case UNSPEC_VPERM_UNS: | |
42156 | case UNSPEC_VPERMHI: | |
42157 | case UNSPEC_VPERMSI: | |
42158 | case UNSPEC_VPKPX: | |
42159 | case UNSPEC_VSLDOI: | |
42160 | case UNSPEC_VSLO: | |
42161 | case UNSPEC_VSRO: | |
42162 | case UNSPEC_VSUM2SWS: | |
42163 | case UNSPEC_VSUM4S: | |
42164 | case UNSPEC_VSUM4UBS: | |
42165 | case UNSPEC_VSUMSWS: | |
42166 | case UNSPEC_VSUMSWS_DIRECT: | |
42167 | case UNSPEC_VSX_CONCAT: | |
42168 | case UNSPEC_VSX_SET: | |
42169 | case UNSPEC_VSX_SLDWI: | |
42170 | case UNSPEC_VUNPACK_HI_SIGN: | |
42171 | case UNSPEC_VUNPACK_HI_SIGN_DIRECT: | |
42172 | case UNSPEC_VUNPACK_LO_SIGN: | |
42173 | case UNSPEC_VUNPACK_LO_SIGN_DIRECT: | |
42174 | case UNSPEC_VUPKHPX: | |
42175 | case UNSPEC_VUPKHS_V4SF: | |
42176 | case UNSPEC_VUPKHU_V4SF: | |
42177 | case UNSPEC_VUPKLPX: | |
42178 | case UNSPEC_VUPKLS_V4SF: | |
42179 | case UNSPEC_VUPKLU_V4SF: | |
42180 | case UNSPEC_VSX_CVDPSPN: | |
42181 | case UNSPEC_VSX_CVSPDP: | |
42182 | case UNSPEC_VSX_CVSPDPN: | |
42183 | case UNSPEC_VSX_EXTRACT: | |
42184 | case UNSPEC_VSX_VSLO: | |
42185 | case UNSPEC_VSX_VEC_INIT: | |
42186 | return 0; | |
42187 | case UNSPEC_VSPLT_DIRECT: | |
42188 | case UNSPEC_VSX_XXSPLTD: | |
42189 | *special = SH_SPLAT; | |
42190 | return 1; | |
42191 | case UNSPEC_REDUC_PLUS: | |
42192 | case UNSPEC_REDUC: | |
42193 | return 1; | |
42194 | } | |
42195 | } | |
42196 | ||
42197 | default: | |
42198 | break; | |
42199 | } | |
42200 | ||
42201 | const char *fmt = GET_RTX_FORMAT (code); | |
42202 | int ok = 1; | |
42203 | ||
42204 | for (i = 0; i < GET_RTX_LENGTH (code); ++i) | |
42205 | if (fmt[i] == 'e' || fmt[i] == 'u') | |
42206 | { | |
42207 | unsigned int special_op = SH_NONE; | |
42208 | ok &= rtx_is_swappable_p (XEXP (op, i), &special_op); | |
42209 | if (special_op == SH_NONE) | |
42210 | continue; | |
42211 | /* Ensure we never have two kinds of special handling | |
42212 | for the same insn. */ | |
42213 | if (*special != SH_NONE && *special != special_op) | |
42214 | return 0; | |
42215 | *special = special_op; | |
42216 | } | |
42217 | else if (fmt[i] == 'E') | |
42218 | for (j = 0; j < XVECLEN (op, i); ++j) | |
42219 | { | |
42220 | unsigned int special_op = SH_NONE; | |
42221 | ok &= rtx_is_swappable_p (XVECEXP (op, i, j), &special_op); | |
42222 | if (special_op == SH_NONE) | |
42223 | continue; | |
42224 | /* Ensure we never have two kinds of special handling | |
42225 | for the same insn. */ | |
42226 | if (*special != SH_NONE && *special != special_op) | |
42227 | return 0; | |
42228 | *special = special_op; | |
42229 | } | |
42230 | ||
42231 | return ok; | |
42232 | } | |
42233 | ||
42234 | /* Return 1 iff INSN is an operand that will not be affected by | |
42235 | having vector doublewords swapped in memory (in which case | |
42236 | *SPECIAL is unchanged), or that can be modified to be correct | |
42237 | if vector doublewords are swapped in memory (in which case | |
42238 | *SPECIAL is changed to a value indicating how). */ | |
42239 | static unsigned int | |
42240 | insn_is_swappable_p (swap_web_entry *insn_entry, rtx insn, | |
42241 | unsigned int *special) | |
42242 | { | |
42243 | /* Calls are always bad. */ | |
42244 | if (GET_CODE (insn) == CALL_INSN) | |
42245 | return 0; | |
42246 | ||
42247 | /* Loads and stores seen here are not permuting, but we can still | |
42248 | fix them up by converting them to permuting ones. Exceptions: | |
42249 | UNSPEC_LVE, UNSPEC_LVX, and UNSPEC_STVX, which have a PARALLEL | |
42250 | body instead of a SET; and UNSPEC_STVE, which has an UNSPEC | |
42251 | for the SET source. Also we must now make an exception for lvx | |
42252 | and stvx when they are not in the UNSPEC_LVX/STVX form (with the | |
42253 | explicit "& -16") since this leads to unrecognizable insns. */ | |
42254 | rtx body = PATTERN (insn); | |
42255 | int i = INSN_UID (insn); | |
42256 | ||
42257 | if (insn_entry[i].is_load) | |
42258 | { | |
42259 | if (GET_CODE (body) == SET) | |
42260 | { | |
42261 | rtx rhs = SET_SRC (body); | |
42262 | /* Even without a swap, the RHS might be a vec_select for, say, | |
42263 | a byte-reversing load. */ | |
42264 | if (GET_CODE (rhs) != MEM) | |
42265 | return 0; | |
42266 | if (GET_CODE (XEXP (rhs, 0)) == AND) | |
42267 | return 0; | |
42268 | ||
42269 | *special = SH_NOSWAP_LD; | |
42270 | return 1; | |
42271 | } | |
42272 | else | |
42273 | return 0; | |
42274 | } | |
42275 | ||
42276 | if (insn_entry[i].is_store) | |
42277 | { | |
42278 | if (GET_CODE (body) == SET | |
42279 | && GET_CODE (SET_SRC (body)) != UNSPEC) | |
42280 | { | |
42281 | rtx lhs = SET_DEST (body); | |
42282 | /* Even without a swap, the LHS might be a vec_select for, say, | |
42283 | a byte-reversing store. */ | |
42284 | if (GET_CODE (lhs) != MEM) | |
42285 | return 0; | |
42286 | if (GET_CODE (XEXP (lhs, 0)) == AND) | |
42287 | return 0; | |
42288 | ||
42289 | *special = SH_NOSWAP_ST; | |
42290 | return 1; | |
42291 | } | |
42292 | else | |
42293 | return 0; | |
42294 | } | |
42295 | ||
42296 | /* A convert to single precision can be left as is provided that | |
42297 | all of its uses are in xxspltw instructions that splat BE element | |
42298 | zero. */ | |
42299 | if (GET_CODE (body) == SET | |
42300 | && GET_CODE (SET_SRC (body)) == UNSPEC | |
42301 | && XINT (SET_SRC (body), 1) == UNSPEC_VSX_CVDPSPN) | |
42302 | { | |
42303 | df_ref def; | |
42304 | struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); | |
42305 | ||
42306 | FOR_EACH_INSN_INFO_DEF (def, insn_info) | |
42307 | { | |
42308 | struct df_link *link = DF_REF_CHAIN (def); | |
42309 | if (!link) | |
42310 | return 0; | |
42311 | ||
42312 | for (; link; link = link->next) { | |
42313 | rtx use_insn = DF_REF_INSN (link->ref); | |
42314 | rtx use_body = PATTERN (use_insn); | |
42315 | if (GET_CODE (use_body) != SET | |
42316 | || GET_CODE (SET_SRC (use_body)) != UNSPEC | |
42317 | || XINT (SET_SRC (use_body), 1) != UNSPEC_VSX_XXSPLTW | |
42318 | || XVECEXP (SET_SRC (use_body), 0, 1) != const0_rtx) | |
42319 | return 0; | |
42320 | } | |
42321 | } | |
42322 | ||
42323 | return 1; | |
42324 | } | |
42325 | ||
42326 | /* A concatenation of two doublewords is ok if we reverse the | |
42327 | order of the inputs. */ | |
42328 | if (GET_CODE (body) == SET | |
42329 | && GET_CODE (SET_SRC (body)) == VEC_CONCAT | |
42330 | && (GET_MODE (SET_SRC (body)) == V2DFmode | |
42331 | || GET_MODE (SET_SRC (body)) == V2DImode)) | |
42332 | { | |
42333 | *special = SH_CONCAT; | |
42334 | return 1; | |
42335 | } | |
42336 | ||
42337 | /* V2DF reductions are always swappable. */ | |
42338 | if (GET_CODE (body) == PARALLEL) | |
42339 | { | |
42340 | rtx expr = XVECEXP (body, 0, 0); | |
42341 | if (GET_CODE (expr) == SET | |
42342 | && v2df_reduction_p (SET_SRC (expr))) | |
42343 | return 1; | |
42344 | } | |
42345 | ||
42346 | /* An UNSPEC_VPERM is ok if the mask operand is loaded from the | |
42347 | constant pool. */ | |
42348 | if (GET_CODE (body) == SET | |
42349 | && GET_CODE (SET_SRC (body)) == UNSPEC | |
42350 | && XINT (SET_SRC (body), 1) == UNSPEC_VPERM | |
42351 | && XVECLEN (SET_SRC (body), 0) == 3 | |
42352 | && GET_CODE (XVECEXP (SET_SRC (body), 0, 2)) == REG) | |
42353 | { | |
42354 | rtx mask_reg = XVECEXP (SET_SRC (body), 0, 2); | |
42355 | struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); | |
42356 | df_ref use; | |
42357 | FOR_EACH_INSN_INFO_USE (use, insn_info) | |
42358 | if (rtx_equal_p (DF_REF_REG (use), mask_reg)) | |
42359 | { | |
42360 | struct df_link *def_link = DF_REF_CHAIN (use); | |
42361 | /* Punt if multiple definitions for this reg. */ | |
42362 | if (def_link && !def_link->next && | |
42363 | const_load_sequence_p (insn_entry, | |
42364 | DF_REF_INSN (def_link->ref))) | |
42365 | { | |
42366 | *special = SH_VPERM; | |
42367 | return 1; | |
42368 | } | |
42369 | } | |
42370 | } | |
42371 | ||
42372 | /* Otherwise check the operands for vector lane violations. */ | |
42373 | return rtx_is_swappable_p (body, special); | |
42374 | } | |
42375 | ||
42376 | enum chain_purpose { FOR_LOADS, FOR_STORES }; | |
42377 | ||
42378 | /* Return true if the UD or DU chain headed by LINK is non-empty, | |
42379 | and every entry on the chain references an insn that is a | |
42380 | register swap. Furthermore, if PURPOSE is FOR_LOADS, each such | |
42381 | register swap must have only permuting loads as reaching defs. | |
42382 | If PURPOSE is FOR_STORES, each such register swap must have only | |
42383 | register swaps or permuting stores as reached uses. */ | |
42384 | static bool | |
42385 | chain_contains_only_swaps (swap_web_entry *insn_entry, struct df_link *link, | |
42386 | enum chain_purpose purpose) | |
42387 | { | |
42388 | if (!link) | |
42389 | return false; | |
42390 | ||
42391 | for (; link; link = link->next) | |
42392 | { | |
42393 | if (!ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (DF_REF_REG (link->ref)))) | |
42394 | continue; | |
42395 | ||
42396 | if (DF_REF_IS_ARTIFICIAL (link->ref)) | |
42397 | return false; | |
42398 | ||
42399 | rtx reached_insn = DF_REF_INSN (link->ref); | |
42400 | unsigned uid = INSN_UID (reached_insn); | |
42401 | struct df_insn_info *insn_info = DF_INSN_INFO_GET (reached_insn); | |
42402 | ||
42403 | if (!insn_entry[uid].is_swap || insn_entry[uid].is_load | |
42404 | || insn_entry[uid].is_store) | |
42405 | return false; | |
42406 | ||
42407 | if (purpose == FOR_LOADS) | |
42408 | { | |
42409 | df_ref use; | |
42410 | FOR_EACH_INSN_INFO_USE (use, insn_info) | |
42411 | { | |
42412 | struct df_link *swap_link = DF_REF_CHAIN (use); | |
42413 | ||
42414 | while (swap_link) | |
42415 | { | |
42416 | if (DF_REF_IS_ARTIFICIAL (link->ref)) | |
42417 | return false; | |
42418 | ||
42419 | rtx swap_def_insn = DF_REF_INSN (swap_link->ref); | |
42420 | unsigned uid2 = INSN_UID (swap_def_insn); | |
42421 | ||
42422 | /* Only permuting loads are allowed. */ | |
42423 | if (!insn_entry[uid2].is_swap || !insn_entry[uid2].is_load) | |
42424 | return false; | |
42425 | ||
42426 | swap_link = swap_link->next; | |
42427 | } | |
42428 | } | |
42429 | } | |
42430 | else if (purpose == FOR_STORES) | |
42431 | { | |
42432 | df_ref def; | |
42433 | FOR_EACH_INSN_INFO_DEF (def, insn_info) | |
42434 | { | |
42435 | struct df_link *swap_link = DF_REF_CHAIN (def); | |
42436 | ||
42437 | while (swap_link) | |
42438 | { | |
42439 | if (DF_REF_IS_ARTIFICIAL (link->ref)) | |
42440 | return false; | |
42441 | ||
42442 | rtx swap_use_insn = DF_REF_INSN (swap_link->ref); | |
42443 | unsigned uid2 = INSN_UID (swap_use_insn); | |
42444 | ||
42445 | /* Permuting stores or register swaps are allowed. */ | |
42446 | if (!insn_entry[uid2].is_swap || insn_entry[uid2].is_load) | |
42447 | return false; | |
42448 | ||
42449 | swap_link = swap_link->next; | |
42450 | } | |
42451 | } | |
42452 | } | |
42453 | } | |
42454 | ||
42455 | return true; | |
42456 | } | |
42457 | ||
42458 | /* Mark the xxswapdi instructions associated with permuting loads and | |
42459 | stores for removal. Note that we only flag them for deletion here, | |
42460 | as there is a possibility of a swap being reached from multiple | |
42461 | loads, etc. */ | |
42462 | static void | |
42463 | mark_swaps_for_removal (swap_web_entry *insn_entry, unsigned int i) | |
42464 | { | |
42465 | rtx insn = insn_entry[i].insn; | |
42466 | struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); | |
42467 | ||
42468 | if (insn_entry[i].is_load) | |
42469 | { | |
42470 | df_ref def; | |
42471 | FOR_EACH_INSN_INFO_DEF (def, insn_info) | |
42472 | { | |
42473 | struct df_link *link = DF_REF_CHAIN (def); | |
42474 | ||
42475 | /* We know by now that these are swaps, so we can delete | |
42476 | them confidently. */ | |
42477 | while (link) | |
42478 | { | |
42479 | rtx use_insn = DF_REF_INSN (link->ref); | |
42480 | insn_entry[INSN_UID (use_insn)].will_delete = 1; | |
42481 | link = link->next; | |
42482 | } | |
42483 | } | |
42484 | } | |
42485 | else if (insn_entry[i].is_store) | |
42486 | { | |
42487 | df_ref use; | |
42488 | FOR_EACH_INSN_INFO_USE (use, insn_info) | |
42489 | { | |
42490 | /* Ignore uses for addressability. */ | |
42491 | machine_mode mode = GET_MODE (DF_REF_REG (use)); | |
42492 | if (!ALTIVEC_OR_VSX_VECTOR_MODE (mode)) | |
42493 | continue; | |
42494 | ||
42495 | struct df_link *link = DF_REF_CHAIN (use); | |
42496 | ||
42497 | /* We know by now that these are swaps, so we can delete | |
42498 | them confidently. */ | |
42499 | while (link) | |
42500 | { | |
42501 | rtx def_insn = DF_REF_INSN (link->ref); | |
42502 | insn_entry[INSN_UID (def_insn)].will_delete = 1; | |
42503 | link = link->next; | |
42504 | } | |
42505 | } | |
42506 | } | |
42507 | } | |
42508 | ||
3877c560 | 42509 | /* *OP_PTR is either a CONST_VECTOR or an expression containing one. |
83349046 SB |
42510 | Swap the first half of the vector with the second in the first |
42511 | case. Recurse to find it in the second. */ | |
42512 | static void | |
3877c560 | 42513 | swap_const_vector_halves (rtx *op_ptr) |
83349046 SB |
42514 | { |
42515 | int i; | |
3877c560 | 42516 | rtx op = *op_ptr; |
83349046 SB |
42517 | enum rtx_code code = GET_CODE (op); |
42518 | if (GET_CODE (op) == CONST_VECTOR) | |
42519 | { | |
3877c560 RS |
42520 | int units = GET_MODE_NUNITS (GET_MODE (op)); |
42521 | rtx_vector_builder builder (GET_MODE (op), units, 1); | |
42522 | for (i = 0; i < units / 2; ++i) | |
42523 | builder.quick_push (CONST_VECTOR_ELT (op, i + units / 2)); | |
42524 | for (i = 0; i < units / 2; ++i) | |
42525 | builder.quick_push (CONST_VECTOR_ELT (op, i)); | |
42526 | *op_ptr = builder.build (); | |
83349046 SB |
42527 | } |
42528 | else | |
42529 | { | |
42530 | int j; | |
42531 | const char *fmt = GET_RTX_FORMAT (code); | |
42532 | for (i = 0; i < GET_RTX_LENGTH (code); ++i) | |
42533 | if (fmt[i] == 'e' || fmt[i] == 'u') | |
3877c560 | 42534 | swap_const_vector_halves (&XEXP (op, i)); |
83349046 SB |
42535 | else if (fmt[i] == 'E') |
42536 | for (j = 0; j < XVECLEN (op, i); ++j) | |
3877c560 | 42537 | swap_const_vector_halves (&XVECEXP (op, i, j)); |
83349046 SB |
42538 | } |
42539 | } | |
42540 | ||
42541 | /* Find all subregs of a vector expression that perform a narrowing, | |
42542 | and adjust the subreg index to account for doubleword swapping. */ | |
42543 | static void | |
42544 | adjust_subreg_index (rtx op) | |
42545 | { | |
42546 | enum rtx_code code = GET_CODE (op); | |
42547 | if (code == SUBREG | |
42548 | && (GET_MODE_SIZE (GET_MODE (op)) | |
42549 | < GET_MODE_SIZE (GET_MODE (XEXP (op, 0))))) | |
42550 | { | |
42551 | unsigned int index = SUBREG_BYTE (op); | |
42552 | if (index < 8) | |
42553 | index += 8; | |
42554 | else | |
42555 | index -= 8; | |
42556 | SUBREG_BYTE (op) = index; | |
42557 | } | |
42558 | ||
42559 | const char *fmt = GET_RTX_FORMAT (code); | |
42560 | int i,j; | |
42561 | for (i = 0; i < GET_RTX_LENGTH (code); ++i) | |
42562 | if (fmt[i] == 'e' || fmt[i] == 'u') | |
42563 | adjust_subreg_index (XEXP (op, i)); | |
42564 | else if (fmt[i] == 'E') | |
42565 | for (j = 0; j < XVECLEN (op, i); ++j) | |
42566 | adjust_subreg_index (XVECEXP (op, i, j)); | |
42567 | } | |
42568 | ||
42569 | /* Convert the non-permuting load INSN to a permuting one. */ | |
42570 | static void | |
42571 | permute_load (rtx_insn *insn) | |
42572 | { | |
42573 | rtx body = PATTERN (insn); | |
42574 | rtx mem_op = SET_SRC (body); | |
42575 | rtx tgt_reg = SET_DEST (body); | |
42576 | machine_mode mode = GET_MODE (tgt_reg); | |
42577 | int n_elts = GET_MODE_NUNITS (mode); | |
42578 | int half_elts = n_elts / 2; | |
42579 | rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts)); | |
42580 | int i, j; | |
42581 | for (i = 0, j = half_elts; i < half_elts; ++i, ++j) | |
42582 | XVECEXP (par, 0, i) = GEN_INT (j); | |
42583 | for (i = half_elts, j = 0; j < half_elts; ++i, ++j) | |
42584 | XVECEXP (par, 0, i) = GEN_INT (j); | |
42585 | rtx sel = gen_rtx_VEC_SELECT (mode, mem_op, par); | |
42586 | SET_SRC (body) = sel; | |
42587 | INSN_CODE (insn) = -1; /* Force re-recognition. */ | |
42588 | df_insn_rescan (insn); | |
42589 | ||
42590 | if (dump_file) | |
42591 | fprintf (dump_file, "Replacing load %d with permuted load\n", | |
42592 | INSN_UID (insn)); | |
42593 | } | |
42594 | ||
42595 | /* Convert the non-permuting store INSN to a permuting one. */ | |
42596 | static void | |
42597 | permute_store (rtx_insn *insn) | |
42598 | { | |
42599 | rtx body = PATTERN (insn); | |
42600 | rtx src_reg = SET_SRC (body); | |
42601 | machine_mode mode = GET_MODE (src_reg); | |
42602 | int n_elts = GET_MODE_NUNITS (mode); | |
42603 | int half_elts = n_elts / 2; | |
42604 | rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts)); | |
42605 | int i, j; | |
42606 | for (i = 0, j = half_elts; i < half_elts; ++i, ++j) | |
42607 | XVECEXP (par, 0, i) = GEN_INT (j); | |
42608 | for (i = half_elts, j = 0; j < half_elts; ++i, ++j) | |
42609 | XVECEXP (par, 0, i) = GEN_INT (j); | |
42610 | rtx sel = gen_rtx_VEC_SELECT (mode, src_reg, par); | |
42611 | SET_SRC (body) = sel; | |
42612 | INSN_CODE (insn) = -1; /* Force re-recognition. */ | |
42613 | df_insn_rescan (insn); | |
42614 | ||
42615 | if (dump_file) | |
42616 | fprintf (dump_file, "Replacing store %d with permuted store\n", | |
42617 | INSN_UID (insn)); | |
42618 | } | |
42619 | ||
42620 | /* Given OP that contains a vector extract operation, adjust the index | |
42621 | of the extracted lane to account for the doubleword swap. */ | |
42622 | static void | |
42623 | adjust_extract (rtx_insn *insn) | |
42624 | { | |
42625 | rtx pattern = PATTERN (insn); | |
42626 | if (GET_CODE (pattern) == PARALLEL) | |
42627 | pattern = XVECEXP (pattern, 0, 0); | |
42628 | rtx src = SET_SRC (pattern); | |
42629 | /* The vec_select may be wrapped in a vec_duplicate for a splat, so | |
42630 | account for that. */ | |
42631 | rtx sel = GET_CODE (src) == VEC_DUPLICATE ? XEXP (src, 0) : src; | |
42632 | rtx par = XEXP (sel, 1); | |
42633 | int half_elts = GET_MODE_NUNITS (GET_MODE (XEXP (sel, 0))) >> 1; | |
42634 | int lane = INTVAL (XVECEXP (par, 0, 0)); | |
42635 | lane = lane >= half_elts ? lane - half_elts : lane + half_elts; | |
42636 | XVECEXP (par, 0, 0) = GEN_INT (lane); | |
42637 | INSN_CODE (insn) = -1; /* Force re-recognition. */ | |
42638 | df_insn_rescan (insn); | |
42639 | ||
42640 | if (dump_file) | |
42641 | fprintf (dump_file, "Changing lane for extract %d\n", INSN_UID (insn)); | |
42642 | } | |
42643 | ||
42644 | /* Given OP that contains a vector direct-splat operation, adjust the index | |
42645 | of the source lane to account for the doubleword swap. */ | |
42646 | static void | |
42647 | adjust_splat (rtx_insn *insn) | |
42648 | { | |
42649 | rtx body = PATTERN (insn); | |
42650 | rtx unspec = XEXP (body, 1); | |
42651 | int half_elts = GET_MODE_NUNITS (GET_MODE (unspec)) >> 1; | |
42652 | int lane = INTVAL (XVECEXP (unspec, 0, 1)); | |
42653 | lane = lane >= half_elts ? lane - half_elts : lane + half_elts; | |
42654 | XVECEXP (unspec, 0, 1) = GEN_INT (lane); | |
42655 | INSN_CODE (insn) = -1; /* Force re-recognition. */ | |
42656 | df_insn_rescan (insn); | |
42657 | ||
42658 | if (dump_file) | |
42659 | fprintf (dump_file, "Changing lane for splat %d\n", INSN_UID (insn)); | |
42660 | } | |
42661 | ||
42662 | /* Given OP that contains an XXPERMDI operation (that is not a doubleword | |
42663 | swap), reverse the order of the source operands and adjust the indices | |
42664 | of the source lanes to account for doubleword reversal. */ | |
42665 | static void | |
42666 | adjust_xxpermdi (rtx_insn *insn) | |
42667 | { | |
42668 | rtx set = PATTERN (insn); | |
42669 | rtx select = XEXP (set, 1); | |
42670 | rtx concat = XEXP (select, 0); | |
42671 | rtx src0 = XEXP (concat, 0); | |
42672 | XEXP (concat, 0) = XEXP (concat, 1); | |
42673 | XEXP (concat, 1) = src0; | |
42674 | rtx parallel = XEXP (select, 1); | |
42675 | int lane0 = INTVAL (XVECEXP (parallel, 0, 0)); | |
42676 | int lane1 = INTVAL (XVECEXP (parallel, 0, 1)); | |
42677 | int new_lane0 = 3 - lane1; | |
42678 | int new_lane1 = 3 - lane0; | |
42679 | XVECEXP (parallel, 0, 0) = GEN_INT (new_lane0); | |
42680 | XVECEXP (parallel, 0, 1) = GEN_INT (new_lane1); | |
42681 | INSN_CODE (insn) = -1; /* Force re-recognition. */ | |
42682 | df_insn_rescan (insn); | |
42683 | ||
42684 | if (dump_file) | |
42685 | fprintf (dump_file, "Changing lanes for xxpermdi %d\n", INSN_UID (insn)); | |
42686 | } | |
42687 | ||
42688 | /* Given OP that contains a VEC_CONCAT operation of two doublewords, | |
42689 | reverse the order of those inputs. */ | |
42690 | static void | |
42691 | adjust_concat (rtx_insn *insn) | |
42692 | { | |
42693 | rtx set = PATTERN (insn); | |
42694 | rtx concat = XEXP (set, 1); | |
42695 | rtx src0 = XEXP (concat, 0); | |
42696 | XEXP (concat, 0) = XEXP (concat, 1); | |
42697 | XEXP (concat, 1) = src0; | |
42698 | INSN_CODE (insn) = -1; /* Force re-recognition. */ | |
42699 | df_insn_rescan (insn); | |
42700 | ||
42701 | if (dump_file) | |
42702 | fprintf (dump_file, "Reversing inputs for concat %d\n", INSN_UID (insn)); | |
42703 | } | |
42704 | ||
42705 | /* Given an UNSPEC_VPERM insn, modify the mask loaded from the | |
42706 | constant pool to reflect swapped doublewords. */ | |
42707 | static void | |
42708 | adjust_vperm (rtx_insn *insn) | |
42709 | { | |
42710 | /* We previously determined that the UNSPEC_VPERM was fed by a | |
42711 | swap of a swapping load of a TOC-relative constant pool symbol. | |
42712 | Find the MEM in the swapping load and replace it with a MEM for | |
42713 | the adjusted mask constant. */ | |
42714 | rtx set = PATTERN (insn); | |
42715 | rtx mask_reg = XVECEXP (SET_SRC (set), 0, 2); | |
42716 | ||
42717 | /* Find the swap. */ | |
42718 | struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); | |
42719 | df_ref use; | |
42720 | rtx_insn *swap_insn = 0; | |
42721 | FOR_EACH_INSN_INFO_USE (use, insn_info) | |
42722 | if (rtx_equal_p (DF_REF_REG (use), mask_reg)) | |
42723 | { | |
42724 | struct df_link *def_link = DF_REF_CHAIN (use); | |
42725 | gcc_assert (def_link && !def_link->next); | |
42726 | swap_insn = DF_REF_INSN (def_link->ref); | |
42727 | break; | |
42728 | } | |
42729 | gcc_assert (swap_insn); | |
42730 | ||
42731 | /* Find the load. */ | |
42732 | insn_info = DF_INSN_INFO_GET (swap_insn); | |
42733 | rtx_insn *load_insn = 0; | |
42734 | FOR_EACH_INSN_INFO_USE (use, insn_info) | |
42735 | { | |
42736 | struct df_link *def_link = DF_REF_CHAIN (use); | |
42737 | gcc_assert (def_link && !def_link->next); | |
42738 | load_insn = DF_REF_INSN (def_link->ref); | |
42739 | break; | |
42740 | } | |
42741 | gcc_assert (load_insn); | |
42742 | ||
42743 | /* Find the TOC-relative symbol access. */ | |
42744 | insn_info = DF_INSN_INFO_GET (load_insn); | |
42745 | rtx_insn *tocrel_insn = 0; | |
42746 | FOR_EACH_INSN_INFO_USE (use, insn_info) | |
42747 | { | |
42748 | struct df_link *def_link = DF_REF_CHAIN (use); | |
42749 | gcc_assert (def_link && !def_link->next); | |
42750 | tocrel_insn = DF_REF_INSN (def_link->ref); | |
42751 | break; | |
42752 | } | |
42753 | gcc_assert (tocrel_insn); | |
42754 | ||
42755 | /* Find the embedded CONST_VECTOR. We have to call toc_relative_expr_p | |
42756 | to set tocrel_base; otherwise it would be unnecessary as we've | |
42757 | already established it will return true. */ | |
42758 | rtx base, offset; | |
42759 | rtx tocrel_expr = SET_SRC (PATTERN (tocrel_insn)); | |
42760 | /* There is an extra level of indirection for small/large code models. */ | |
42761 | if (GET_CODE (tocrel_expr) == MEM) | |
42762 | tocrel_expr = XEXP (tocrel_expr, 0); | |
42763 | if (!toc_relative_expr_p (tocrel_expr, false)) | |
42764 | gcc_unreachable (); | |
42765 | split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset); | |
42766 | rtx const_vector = get_pool_constant (base); | |
42767 | /* With the extra indirection, get_pool_constant will produce the | |
42768 | real constant from the reg_equal expression, so get the real | |
42769 | constant. */ | |
42770 | if (GET_CODE (const_vector) == SYMBOL_REF) | |
42771 | const_vector = get_pool_constant (const_vector); | |
42772 | gcc_assert (GET_CODE (const_vector) == CONST_VECTOR); | |
42773 | ||
42774 | /* Create an adjusted mask from the initial mask. */ | |
42775 | unsigned int new_mask[16], i, val; | |
42776 | for (i = 0; i < 16; ++i) { | |
42777 | val = INTVAL (XVECEXP (const_vector, 0, i)); | |
42778 | if (val < 16) | |
42779 | new_mask[i] = (val + 8) % 16; | |
42780 | else | |
42781 | new_mask[i] = ((val + 8) % 16) + 16; | |
42782 | } | |
42783 | ||
42784 | /* Create a new CONST_VECTOR and a MEM that references it. */ | |
42785 | rtx vals = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16)); | |
42786 | for (i = 0; i < 16; ++i) | |
42787 | XVECEXP (vals, 0, i) = GEN_INT (new_mask[i]); | |
42788 | rtx new_const_vector = gen_rtx_CONST_VECTOR (V16QImode, XVEC (vals, 0)); | |
42789 | rtx new_mem = force_const_mem (V16QImode, new_const_vector); | |
42790 | /* This gives us a MEM whose base operand is a SYMBOL_REF, which we | |
42791 | can't recognize. Force the SYMBOL_REF into a register. */ | |
42792 | if (!REG_P (XEXP (new_mem, 0))) { | |
42793 | rtx base_reg = force_reg (Pmode, XEXP (new_mem, 0)); | |
42794 | XEXP (new_mem, 0) = base_reg; | |
42795 | /* Move the newly created insn ahead of the load insn. */ | |
42796 | rtx_insn *force_insn = get_last_insn (); | |
42797 | remove_insn (force_insn); | |
42798 | rtx_insn *before_load_insn = PREV_INSN (load_insn); | |
42799 | add_insn_after (force_insn, before_load_insn, BLOCK_FOR_INSN (load_insn)); | |
42800 | df_insn_rescan (before_load_insn); | |
42801 | df_insn_rescan (force_insn); | |
42802 | } | |
42803 | ||
42804 | /* Replace the MEM in the load instruction and rescan it. */ | |
42805 | XEXP (SET_SRC (PATTERN (load_insn)), 0) = new_mem; | |
42806 | INSN_CODE (load_insn) = -1; /* Force re-recognition. */ | |
42807 | df_insn_rescan (load_insn); | |
42808 | ||
42809 | if (dump_file) | |
42810 | fprintf (dump_file, "Adjusting mask for vperm %d\n", INSN_UID (insn)); | |
42811 | } | |
42812 | ||
42813 | /* The insn described by INSN_ENTRY[I] can be swapped, but only | |
42814 | with special handling. Take care of that here. */ | |
42815 | static void | |
42816 | handle_special_swappables (swap_web_entry *insn_entry, unsigned i) | |
42817 | { | |
42818 | rtx_insn *insn = insn_entry[i].insn; | |
42819 | rtx body = PATTERN (insn); | |
42820 | ||
42821 | switch (insn_entry[i].special_handling) | |
42822 | { | |
42823 | default: | |
42824 | gcc_unreachable (); | |
42825 | case SH_CONST_VECTOR: | |
42826 | { | |
42827 | /* A CONST_VECTOR will only show up somewhere in the RHS of a SET. */ | |
42828 | gcc_assert (GET_CODE (body) == SET); | |
3877c560 | 42829 | swap_const_vector_halves (&SET_SRC (body)); |
83349046 SB |
42830 | if (dump_file) |
42831 | fprintf (dump_file, "Swapping constant halves in insn %d\n", i); | |
42832 | break; | |
42833 | } | |
42834 | case SH_SUBREG: | |
42835 | /* A subreg of the same size is already safe. For subregs that | |
42836 | select a smaller portion of a reg, adjust the index for | |
42837 | swapped doublewords. */ | |
42838 | adjust_subreg_index (body); | |
42839 | if (dump_file) | |
42840 | fprintf (dump_file, "Adjusting subreg in insn %d\n", i); | |
42841 | break; | |
42842 | case SH_NOSWAP_LD: | |
42843 | /* Convert a non-permuting load to a permuting one. */ | |
42844 | permute_load (insn); | |
42845 | break; | |
42846 | case SH_NOSWAP_ST: | |
42847 | /* Convert a non-permuting store to a permuting one. */ | |
42848 | permute_store (insn); | |
42849 | break; | |
42850 | case SH_EXTRACT: | |
42851 | /* Change the lane on an extract operation. */ | |
42852 | adjust_extract (insn); | |
42853 | break; | |
42854 | case SH_SPLAT: | |
42855 | /* Change the lane on a direct-splat operation. */ | |
42856 | adjust_splat (insn); | |
42857 | break; | |
42858 | case SH_XXPERMDI: | |
42859 | /* Change the lanes on an XXPERMDI operation. */ | |
42860 | adjust_xxpermdi (insn); | |
42861 | break; | |
42862 | case SH_CONCAT: | |
42863 | /* Reverse the order of a concatenation operation. */ | |
42864 | adjust_concat (insn); | |
42865 | break; | |
42866 | case SH_VPERM: | |
42867 | /* Change the mask loaded from the constant pool for a VPERM. */ | |
42868 | adjust_vperm (insn); | |
42869 | break; | |
42870 | } | |
42871 | } | |
42872 | ||
42873 | /* Find the insn from the Ith table entry, which is known to be a | |
42874 | register swap Y = SWAP(X). Replace it with a copy Y = X. */ | |
42875 | static void | |
42876 | replace_swap_with_copy (swap_web_entry *insn_entry, unsigned i) | |
42877 | { | |
42878 | rtx_insn *insn = insn_entry[i].insn; | |
42879 | rtx body = PATTERN (insn); | |
42880 | rtx src_reg = XEXP (SET_SRC (body), 0); | |
42881 | rtx copy = gen_rtx_SET (SET_DEST (body), src_reg); | |
42882 | rtx_insn *new_insn = emit_insn_before (copy, insn); | |
42883 | set_block_for_insn (new_insn, BLOCK_FOR_INSN (insn)); | |
42884 | df_insn_rescan (new_insn); | |
42885 | ||
42886 | if (dump_file) | |
42887 | { | |
42888 | unsigned int new_uid = INSN_UID (new_insn); | |
42889 | fprintf (dump_file, "Replacing swap %d with copy %d\n", i, new_uid); | |
42890 | } | |
42891 | ||
42892 | df_insn_delete (insn); | |
42893 | remove_insn (insn); | |
42894 | insn->set_deleted (); | |
42895 | } | |
42896 | ||
42897 | /* Dump the swap table to DUMP_FILE. */ | |
42898 | static void | |
42899 | dump_swap_insn_table (swap_web_entry *insn_entry) | |
42900 | { | |
42901 | int e = get_max_uid (); | |
42902 | fprintf (dump_file, "\nRelevant insns with their flag settings\n\n"); | |
42903 | ||
42904 | for (int i = 0; i < e; ++i) | |
42905 | if (insn_entry[i].is_relevant) | |
42906 | { | |
42907 | swap_web_entry *pred_entry = (swap_web_entry *)insn_entry[i].pred (); | |
42908 | fprintf (dump_file, "%6d %6d ", i, | |
42909 | pred_entry && pred_entry->insn | |
42910 | ? INSN_UID (pred_entry->insn) : 0); | |
42911 | if (insn_entry[i].is_load) | |
42912 | fputs ("load ", dump_file); | |
42913 | if (insn_entry[i].is_store) | |
42914 | fputs ("store ", dump_file); | |
42915 | if (insn_entry[i].is_swap) | |
42916 | fputs ("swap ", dump_file); | |
42917 | if (insn_entry[i].is_live_in) | |
42918 | fputs ("live-in ", dump_file); | |
42919 | if (insn_entry[i].is_live_out) | |
42920 | fputs ("live-out ", dump_file); | |
42921 | if (insn_entry[i].contains_subreg) | |
42922 | fputs ("subreg ", dump_file); | |
42923 | if (insn_entry[i].is_128_int) | |
42924 | fputs ("int128 ", dump_file); | |
42925 | if (insn_entry[i].is_call) | |
42926 | fputs ("call ", dump_file); | |
42927 | if (insn_entry[i].is_swappable) | |
42928 | { | |
42929 | fputs ("swappable ", dump_file); | |
42930 | if (insn_entry[i].special_handling == SH_CONST_VECTOR) | |
42931 | fputs ("special:constvec ", dump_file); | |
42932 | else if (insn_entry[i].special_handling == SH_SUBREG) | |
42933 | fputs ("special:subreg ", dump_file); | |
42934 | else if (insn_entry[i].special_handling == SH_NOSWAP_LD) | |
42935 | fputs ("special:load ", dump_file); | |
42936 | else if (insn_entry[i].special_handling == SH_NOSWAP_ST) | |
42937 | fputs ("special:store ", dump_file); | |
42938 | else if (insn_entry[i].special_handling == SH_EXTRACT) | |
42939 | fputs ("special:extract ", dump_file); | |
42940 | else if (insn_entry[i].special_handling == SH_SPLAT) | |
42941 | fputs ("special:splat ", dump_file); | |
42942 | else if (insn_entry[i].special_handling == SH_XXPERMDI) | |
42943 | fputs ("special:xxpermdi ", dump_file); | |
42944 | else if (insn_entry[i].special_handling == SH_CONCAT) | |
42945 | fputs ("special:concat ", dump_file); | |
42946 | else if (insn_entry[i].special_handling == SH_VPERM) | |
42947 | fputs ("special:vperm ", dump_file); | |
42948 | } | |
42949 | if (insn_entry[i].web_not_optimizable) | |
42950 | fputs ("unoptimizable ", dump_file); | |
42951 | if (insn_entry[i].will_delete) | |
42952 | fputs ("delete ", dump_file); | |
42953 | fputs ("\n", dump_file); | |
42954 | } | |
42955 | fputs ("\n", dump_file); | |
42956 | } | |
42957 | ||
42958 | /* Return RTX with its address canonicalized to (reg) or (+ reg reg). | |
42959 | Here RTX is an (& addr (const_int -16)). Always return a new copy | |
42960 | to avoid problems with combine. */ | |
42961 | static rtx | |
42962 | alignment_with_canonical_addr (rtx align) | |
42963 | { | |
42964 | rtx canon; | |
42965 | rtx addr = XEXP (align, 0); | |
42966 | ||
42967 | if (REG_P (addr)) | |
42968 | canon = addr; | |
42969 | ||
42970 | else if (GET_CODE (addr) == PLUS) | |
42971 | { | |
42972 | rtx addrop0 = XEXP (addr, 0); | |
42973 | rtx addrop1 = XEXP (addr, 1); | |
42974 | ||
42975 | if (!REG_P (addrop0)) | |
42976 | addrop0 = force_reg (GET_MODE (addrop0), addrop0); | |
42977 | ||
42978 | if (!REG_P (addrop1)) | |
42979 | addrop1 = force_reg (GET_MODE (addrop1), addrop1); | |
42980 | ||
42981 | canon = gen_rtx_PLUS (GET_MODE (addr), addrop0, addrop1); | |
42982 | } | |
42983 | ||
42984 | else | |
42985 | canon = force_reg (GET_MODE (addr), addr); | |
42986 | ||
42987 | return gen_rtx_AND (GET_MODE (align), canon, GEN_INT (-16)); | |
42988 | } | |
42989 | ||
42990 | /* Check whether an rtx is an alignment mask, and if so, return | |
42991 | a fully-expanded rtx for the masking operation. */ | |
42992 | static rtx | |
42993 | alignment_mask (rtx_insn *insn) | |
42994 | { | |
42995 | rtx body = PATTERN (insn); | |
42996 | ||
42997 | if (GET_CODE (body) != SET | |
42998 | || GET_CODE (SET_SRC (body)) != AND | |
42999 | || !REG_P (XEXP (SET_SRC (body), 0))) | |
43000 | return 0; | |
43001 | ||
43002 | rtx mask = XEXP (SET_SRC (body), 1); | |
43003 | ||
43004 | if (GET_CODE (mask) == CONST_INT) | |
43005 | { | |
43006 | if (INTVAL (mask) == -16) | |
43007 | return alignment_with_canonical_addr (SET_SRC (body)); | |
43008 | else | |
43009 | return 0; | |
43010 | } | |
43011 | ||
43012 | if (!REG_P (mask)) | |
43013 | return 0; | |
43014 | ||
43015 | struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); | |
43016 | df_ref use; | |
43017 | rtx real_mask = 0; | |
43018 | ||
43019 | FOR_EACH_INSN_INFO_USE (use, insn_info) | |
43020 | { | |
43021 | if (!rtx_equal_p (DF_REF_REG (use), mask)) | |
43022 | continue; | |
43023 | ||
43024 | struct df_link *def_link = DF_REF_CHAIN (use); | |
43025 | if (!def_link || def_link->next) | |
43026 | return 0; | |
43027 | ||
43028 | rtx_insn *const_insn = DF_REF_INSN (def_link->ref); | |
43029 | rtx const_body = PATTERN (const_insn); | |
43030 | if (GET_CODE (const_body) != SET) | |
43031 | return 0; | |
43032 | ||
43033 | real_mask = SET_SRC (const_body); | |
43034 | ||
43035 | if (GET_CODE (real_mask) != CONST_INT | |
43036 | || INTVAL (real_mask) != -16) | |
43037 | return 0; | |
43038 | } | |
43039 | ||
43040 | if (real_mask == 0) | |
43041 | return 0; | |
43042 | ||
43043 | return alignment_with_canonical_addr (SET_SRC (body)); | |
43044 | } | |
43045 | ||
43046 | /* Given INSN that's a load or store based at BASE_REG, look for a | |
43047 | feeding computation that aligns its address on a 16-byte boundary. */ | |
43048 | static rtx | |
43049 | find_alignment_op (rtx_insn *insn, rtx base_reg) | |
43050 | { | |
43051 | df_ref base_use; | |
43052 | struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); | |
43053 | rtx and_operation = 0; | |
43054 | ||
43055 | FOR_EACH_INSN_INFO_USE (base_use, insn_info) | |
43056 | { | |
43057 | if (!rtx_equal_p (DF_REF_REG (base_use), base_reg)) | |
43058 | continue; | |
43059 | ||
43060 | struct df_link *base_def_link = DF_REF_CHAIN (base_use); | |
43061 | if (!base_def_link || base_def_link->next) | |
43062 | break; | |
43063 | ||
43064 | /* With stack-protector code enabled, and possibly in other | |
43065 | circumstances, there may not be an associated insn for | |
43066 | the def. */ | |
43067 | if (DF_REF_IS_ARTIFICIAL (base_def_link->ref)) | |
43068 | break; | |
43069 | ||
43070 | rtx_insn *and_insn = DF_REF_INSN (base_def_link->ref); | |
43071 | and_operation = alignment_mask (and_insn); | |
43072 | if (and_operation != 0) | |
43073 | break; | |
43074 | } | |
43075 | ||
43076 | return and_operation; | |
43077 | } | |
43078 | ||
43079 | struct del_info { bool replace; rtx_insn *replace_insn; }; | |
43080 | ||
43081 | /* If INSN is the load for an lvx pattern, put it in canonical form. */ | |
43082 | static void | |
43083 | recombine_lvx_pattern (rtx_insn *insn, del_info *to_delete) | |
43084 | { | |
43085 | rtx body = PATTERN (insn); | |
43086 | gcc_assert (GET_CODE (body) == SET | |
43087 | && GET_CODE (SET_SRC (body)) == VEC_SELECT | |
43088 | && GET_CODE (XEXP (SET_SRC (body), 0)) == MEM); | |
43089 | ||
43090 | rtx mem = XEXP (SET_SRC (body), 0); | |
43091 | rtx base_reg = XEXP (mem, 0); | |
43092 | ||
43093 | rtx and_operation = find_alignment_op (insn, base_reg); | |
43094 | ||
43095 | if (and_operation != 0) | |
43096 | { | |
43097 | df_ref def; | |
43098 | struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); | |
43099 | FOR_EACH_INSN_INFO_DEF (def, insn_info) | |
43100 | { | |
43101 | struct df_link *link = DF_REF_CHAIN (def); | |
43102 | if (!link || link->next) | |
43103 | break; | |
43104 | ||
43105 | rtx_insn *swap_insn = DF_REF_INSN (link->ref); | |
43106 | if (!insn_is_swap_p (swap_insn) | |
43107 | || insn_is_load_p (swap_insn) | |
43108 | || insn_is_store_p (swap_insn)) | |
43109 | break; | |
43110 | ||
43111 | /* Expected lvx pattern found. Change the swap to | |
43112 | a copy, and propagate the AND operation into the | |
43113 | load. */ | |
43114 | to_delete[INSN_UID (swap_insn)].replace = true; | |
43115 | to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn; | |
43116 | ||
43117 | XEXP (mem, 0) = and_operation; | |
43118 | SET_SRC (body) = mem; | |
43119 | INSN_CODE (insn) = -1; /* Force re-recognition. */ | |
43120 | df_insn_rescan (insn); | |
43121 | ||
43122 | if (dump_file) | |
43123 | fprintf (dump_file, "lvx opportunity found at %d\n", | |
43124 | INSN_UID (insn)); | |
43125 | } | |
43126 | } | |
43127 | } | |
43128 | ||
43129 | /* If INSN is the store for an stvx pattern, put it in canonical form. */ | |
43130 | static void | |
43131 | recombine_stvx_pattern (rtx_insn *insn, del_info *to_delete) | |
43132 | { | |
43133 | rtx body = PATTERN (insn); | |
43134 | gcc_assert (GET_CODE (body) == SET | |
43135 | && GET_CODE (SET_DEST (body)) == MEM | |
43136 | && GET_CODE (SET_SRC (body)) == VEC_SELECT); | |
43137 | rtx mem = SET_DEST (body); | |
43138 | rtx base_reg = XEXP (mem, 0); | |
43139 | ||
43140 | rtx and_operation = find_alignment_op (insn, base_reg); | |
43141 | ||
43142 | if (and_operation != 0) | |
43143 | { | |
43144 | rtx src_reg = XEXP (SET_SRC (body), 0); | |
43145 | df_ref src_use; | |
43146 | struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); | |
43147 | FOR_EACH_INSN_INFO_USE (src_use, insn_info) | |
43148 | { | |
43149 | if (!rtx_equal_p (DF_REF_REG (src_use), src_reg)) | |
43150 | continue; | |
43151 | ||
43152 | struct df_link *link = DF_REF_CHAIN (src_use); | |
43153 | if (!link || link->next) | |
43154 | break; | |
43155 | ||
43156 | rtx_insn *swap_insn = DF_REF_INSN (link->ref); | |
43157 | if (!insn_is_swap_p (swap_insn) | |
43158 | || insn_is_load_p (swap_insn) | |
43159 | || insn_is_store_p (swap_insn)) | |
43160 | break; | |
43161 | ||
43162 | /* Expected stvx pattern found. Change the swap to | |
43163 | a copy, and propagate the AND operation into the | |
43164 | store. */ | |
43165 | to_delete[INSN_UID (swap_insn)].replace = true; | |
43166 | to_delete[INSN_UID (swap_insn)].replace_insn = swap_insn; | |
43167 | ||
43168 | XEXP (mem, 0) = and_operation; | |
43169 | SET_SRC (body) = src_reg; | |
43170 | INSN_CODE (insn) = -1; /* Force re-recognition. */ | |
43171 | df_insn_rescan (insn); | |
43172 | ||
43173 | if (dump_file) | |
43174 | fprintf (dump_file, "stvx opportunity found at %d\n", | |
43175 | INSN_UID (insn)); | |
43176 | } | |
43177 | } | |
43178 | } | |
43179 | ||
43180 | /* Look for patterns created from builtin lvx and stvx calls, and | |
43181 | canonicalize them to be properly recognized as such. */ | |
43182 | static void | |
43183 | recombine_lvx_stvx_patterns (function *fun) | |
43184 | { | |
43185 | int i; | |
43186 | basic_block bb; | |
43187 | rtx_insn *insn; | |
43188 | ||
43189 | int num_insns = get_max_uid (); | |
43190 | del_info *to_delete = XCNEWVEC (del_info, num_insns); | |
43191 | ||
43192 | FOR_ALL_BB_FN (bb, fun) | |
43193 | FOR_BB_INSNS (bb, insn) | |
43194 | { | |
43195 | if (!NONDEBUG_INSN_P (insn)) | |
43196 | continue; | |
43197 | ||
43198 | if (insn_is_load_p (insn) && insn_is_swap_p (insn)) | |
43199 | recombine_lvx_pattern (insn, to_delete); | |
43200 | else if (insn_is_store_p (insn) && insn_is_swap_p (insn)) | |
43201 | recombine_stvx_pattern (insn, to_delete); | |
43202 | } | |
43203 | ||
43204 | /* Turning swaps into copies is delayed until now, to avoid problems | |
43205 | with deleting instructions during the insn walk. */ | |
43206 | for (i = 0; i < num_insns; i++) | |
43207 | if (to_delete[i].replace) | |
43208 | { | |
43209 | rtx swap_body = PATTERN (to_delete[i].replace_insn); | |
43210 | rtx src_reg = XEXP (SET_SRC (swap_body), 0); | |
43211 | rtx copy = gen_rtx_SET (SET_DEST (swap_body), src_reg); | |
43212 | rtx_insn *new_insn = emit_insn_before (copy, | |
43213 | to_delete[i].replace_insn); | |
43214 | set_block_for_insn (new_insn, | |
43215 | BLOCK_FOR_INSN (to_delete[i].replace_insn)); | |
43216 | df_insn_rescan (new_insn); | |
43217 | df_insn_delete (to_delete[i].replace_insn); | |
43218 | remove_insn (to_delete[i].replace_insn); | |
43219 | to_delete[i].replace_insn->set_deleted (); | |
43220 | } | |
43221 | ||
43222 | free (to_delete); | |
43223 | } | |
43224 | ||
43225 | /* Main entry point for this pass. */ | |
43226 | unsigned int | |
43227 | rs6000_analyze_swaps (function *fun) | |
43228 | { | |
43229 | swap_web_entry *insn_entry; | |
43230 | basic_block bb; | |
43231 | rtx_insn *insn, *curr_insn = 0; | |
43232 | ||
43233 | /* Dataflow analysis for use-def chains. */ | |
43234 | df_set_flags (DF_RD_PRUNE_DEAD_DEFS); | |
43235 | df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN); | |
43236 | df_analyze (); | |
43237 | df_set_flags (DF_DEFER_INSN_RESCAN); | |
43238 | ||
43239 | /* Pre-pass to recombine lvx and stvx patterns so we don't lose info. */ | |
43240 | recombine_lvx_stvx_patterns (fun); | |
43241 | ||
43242 | /* Allocate structure to represent webs of insns. */ | |
43243 | insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ()); | |
43244 | ||
43245 | /* Walk the insns to gather basic data. */ | |
43246 | FOR_ALL_BB_FN (bb, fun) | |
43247 | FOR_BB_INSNS_SAFE (bb, insn, curr_insn) | |
43248 | { | |
43249 | unsigned int uid = INSN_UID (insn); | |
43250 | if (NONDEBUG_INSN_P (insn)) | |
43251 | { | |
43252 | insn_entry[uid].insn = insn; | |
43253 | ||
43254 | if (GET_CODE (insn) == CALL_INSN) | |
43255 | insn_entry[uid].is_call = 1; | |
43256 | ||
43257 | /* Walk the uses and defs to see if we mention vector regs. | |
43258 | Record any constraints on optimization of such mentions. */ | |
43259 | struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); | |
43260 | df_ref mention; | |
43261 | FOR_EACH_INSN_INFO_USE (mention, insn_info) | |
43262 | { | |
43263 | /* We use DF_REF_REAL_REG here to get inside any subregs. */ | |
43264 | machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention)); | |
43265 | ||
43266 | /* If a use gets its value from a call insn, it will be | |
43267 | a hard register and will look like (reg:V4SI 3 3). | |
43268 | The df analysis creates two mentions for GPR3 and GPR4, | |
43269 | both DImode. We must recognize this and treat it as a | |
43270 | vector mention to ensure the call is unioned with this | |
43271 | use. */ | |
43272 | if (mode == DImode && DF_REF_INSN_INFO (mention)) | |
43273 | { | |
43274 | rtx feeder = DF_REF_INSN (mention); | |
43275 | /* FIXME: It is pretty hard to get from the df mention | |
43276 | to the mode of the use in the insn. We arbitrarily | |
43277 | pick a vector mode here, even though the use might | |
43278 | be a real DImode. We can be too conservative | |
43279 | (create a web larger than necessary) because of | |
43280 | this, so consider eventually fixing this. */ | |
43281 | if (GET_CODE (feeder) == CALL_INSN) | |
43282 | mode = V4SImode; | |
43283 | } | |
43284 | ||
43285 | if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode) | |
43286 | { | |
43287 | insn_entry[uid].is_relevant = 1; | |
43288 | if (mode == TImode || mode == V1TImode | |
43289 | || FLOAT128_VECTOR_P (mode)) | |
43290 | insn_entry[uid].is_128_int = 1; | |
43291 | if (DF_REF_INSN_INFO (mention)) | |
43292 | insn_entry[uid].contains_subreg | |
43293 | = !rtx_equal_p (DF_REF_REG (mention), | |
43294 | DF_REF_REAL_REG (mention)); | |
43295 | union_defs (insn_entry, insn, mention); | |
43296 | } | |
43297 | } | |
43298 | FOR_EACH_INSN_INFO_DEF (mention, insn_info) | |
43299 | { | |
43300 | /* We use DF_REF_REAL_REG here to get inside any subregs. */ | |
43301 | machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention)); | |
43302 | ||
43303 | /* If we're loading up a hard vector register for a call, | |
43304 | it looks like (set (reg:V4SI 9 9) (...)). The df | |
43305 | analysis creates two mentions for GPR9 and GPR10, both | |
43306 | DImode. So relying on the mode from the mentions | |
43307 | isn't sufficient to ensure we union the call into the | |
43308 | web with the parameter setup code. */ | |
43309 | if (mode == DImode && GET_CODE (insn) == SET | |
43310 | && ALTIVEC_OR_VSX_VECTOR_MODE (GET_MODE (SET_DEST (insn)))) | |
43311 | mode = GET_MODE (SET_DEST (insn)); | |
43312 | ||
43313 | if (ALTIVEC_OR_VSX_VECTOR_MODE (mode) || mode == TImode) | |
43314 | { | |
43315 | insn_entry[uid].is_relevant = 1; | |
43316 | if (mode == TImode || mode == V1TImode | |
43317 | || FLOAT128_VECTOR_P (mode)) | |
43318 | insn_entry[uid].is_128_int = 1; | |
43319 | if (DF_REF_INSN_INFO (mention)) | |
43320 | insn_entry[uid].contains_subreg | |
43321 | = !rtx_equal_p (DF_REF_REG (mention), | |
43322 | DF_REF_REAL_REG (mention)); | |
43323 | /* REG_FUNCTION_VALUE_P is not valid for subregs. */ | |
43324 | else if (REG_FUNCTION_VALUE_P (DF_REF_REG (mention))) | |
43325 | insn_entry[uid].is_live_out = 1; | |
43326 | union_uses (insn_entry, insn, mention); | |
43327 | } | |
43328 | } | |
43329 | ||
43330 | if (insn_entry[uid].is_relevant) | |
43331 | { | |
43332 | /* Determine if this is a load or store. */ | |
43333 | insn_entry[uid].is_load = insn_is_load_p (insn); | |
43334 | insn_entry[uid].is_store = insn_is_store_p (insn); | |
43335 | ||
43336 | /* Determine if this is a doubleword swap. If not, | |
43337 | determine whether it can legally be swapped. */ | |
43338 | if (insn_is_swap_p (insn)) | |
43339 | insn_entry[uid].is_swap = 1; | |
43340 | else | |
43341 | { | |
43342 | unsigned int special = SH_NONE; | |
43343 | insn_entry[uid].is_swappable | |
43344 | = insn_is_swappable_p (insn_entry, insn, &special); | |
43345 | if (special != SH_NONE && insn_entry[uid].contains_subreg) | |
43346 | insn_entry[uid].is_swappable = 0; | |
43347 | else if (special != SH_NONE) | |
43348 | insn_entry[uid].special_handling = special; | |
43349 | else if (insn_entry[uid].contains_subreg) | |
43350 | insn_entry[uid].special_handling = SH_SUBREG; | |
43351 | } | |
43352 | } | |
43353 | } | |
43354 | } | |
43355 | ||
43356 | if (dump_file) | |
43357 | { | |
43358 | fprintf (dump_file, "\nSwap insn entry table when first built\n"); | |
43359 | dump_swap_insn_table (insn_entry); | |
43360 | } | |
43361 | ||
43362 | /* Record unoptimizable webs. */ | |
43363 | unsigned e = get_max_uid (), i; | |
43364 | for (i = 0; i < e; ++i) | |
43365 | { | |
43366 | if (!insn_entry[i].is_relevant) | |
43367 | continue; | |
43368 | ||
43369 | swap_web_entry *root | |
43370 | = (swap_web_entry*)(&insn_entry[i])->unionfind_root (); | |
43371 | ||
43372 | if (insn_entry[i].is_live_in || insn_entry[i].is_live_out | |
43373 | || (insn_entry[i].contains_subreg | |
43374 | && insn_entry[i].special_handling != SH_SUBREG) | |
43375 | || insn_entry[i].is_128_int || insn_entry[i].is_call | |
43376 | || !(insn_entry[i].is_swappable || insn_entry[i].is_swap)) | |
43377 | root->web_not_optimizable = 1; | |
43378 | ||
43379 | /* If we have loads or stores that aren't permuting then the | |
43380 | optimization isn't appropriate. */ | |
43381 | else if ((insn_entry[i].is_load || insn_entry[i].is_store) | |
43382 | && !insn_entry[i].is_swap && !insn_entry[i].is_swappable) | |
43383 | root->web_not_optimizable = 1; | |
43384 | ||
43385 | /* If we have permuting loads or stores that are not accompanied | |
43386 | by a register swap, the optimization isn't appropriate. */ | |
43387 | else if (insn_entry[i].is_load && insn_entry[i].is_swap) | |
43388 | { | |
43389 | rtx insn = insn_entry[i].insn; | |
43390 | struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); | |
43391 | df_ref def; | |
43392 | ||
43393 | FOR_EACH_INSN_INFO_DEF (def, insn_info) | |
43394 | { | |
43395 | struct df_link *link = DF_REF_CHAIN (def); | |
43396 | ||
43397 | if (!chain_contains_only_swaps (insn_entry, link, FOR_LOADS)) | |
43398 | { | |
43399 | root->web_not_optimizable = 1; | |
43400 | break; | |
43401 | } | |
43402 | } | |
43403 | } | |
43404 | else if (insn_entry[i].is_store && insn_entry[i].is_swap) | |
43405 | { | |
43406 | rtx insn = insn_entry[i].insn; | |
43407 | struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); | |
43408 | df_ref use; | |
43409 | ||
43410 | FOR_EACH_INSN_INFO_USE (use, insn_info) | |
43411 | { | |
43412 | struct df_link *link = DF_REF_CHAIN (use); | |
43413 | ||
43414 | if (!chain_contains_only_swaps (insn_entry, link, FOR_STORES)) | |
43415 | { | |
43416 | root->web_not_optimizable = 1; | |
43417 | break; | |
43418 | } | |
43419 | } | |
43420 | } | |
43421 | } | |
43422 | ||
43423 | if (dump_file) | |
43424 | { | |
43425 | fprintf (dump_file, "\nSwap insn entry table after web analysis\n"); | |
43426 | dump_swap_insn_table (insn_entry); | |
43427 | } | |
43428 | ||
43429 | /* For each load and store in an optimizable web (which implies | |
43430 | the loads and stores are permuting), find the associated | |
43431 | register swaps and mark them for removal. Due to various | |
43432 | optimizations we may mark the same swap more than once. Also | |
43433 | perform special handling for swappable insns that require it. */ | |
43434 | for (i = 0; i < e; ++i) | |
43435 | if ((insn_entry[i].is_load || insn_entry[i].is_store) | |
43436 | && insn_entry[i].is_swap) | |
43437 | { | |
43438 | swap_web_entry* root_entry | |
43439 | = (swap_web_entry*)((&insn_entry[i])->unionfind_root ()); | |
43440 | if (!root_entry->web_not_optimizable) | |
43441 | mark_swaps_for_removal (insn_entry, i); | |
43442 | } | |
43443 | else if (insn_entry[i].is_swappable && insn_entry[i].special_handling) | |
43444 | { | |
43445 | swap_web_entry* root_entry | |
43446 | = (swap_web_entry*)((&insn_entry[i])->unionfind_root ()); | |
43447 | if (!root_entry->web_not_optimizable) | |
43448 | handle_special_swappables (insn_entry, i); | |
43449 | } | |
43450 | ||
43451 | /* Now delete the swaps marked for removal. */ | |
43452 | for (i = 0; i < e; ++i) | |
43453 | if (insn_entry[i].will_delete) | |
43454 | replace_swap_with_copy (insn_entry, i); | |
43455 | ||
43456 | /* Clean up. */ | |
43457 | free (insn_entry); | |
43458 | return 0; | |
43459 | } | |
43460 | ||
43461 | const pass_data pass_data_analyze_swaps = | |
43462 | { | |
43463 | RTL_PASS, /* type */ | |
43464 | "swaps", /* name */ | |
43465 | OPTGROUP_NONE, /* optinfo_flags */ | |
43466 | TV_NONE, /* tv_id */ | |
43467 | 0, /* properties_required */ | |
43468 | 0, /* properties_provided */ | |
43469 | 0, /* properties_destroyed */ | |
43470 | 0, /* todo_flags_start */ | |
43471 | TODO_df_finish, /* todo_flags_finish */ | |
43472 | }; | |
43473 | ||
43474 | class pass_analyze_swaps : public rtl_opt_pass | |
43475 | { | |
43476 | public: | |
43477 | pass_analyze_swaps(gcc::context *ctxt) | |
43478 | : rtl_opt_pass(pass_data_analyze_swaps, ctxt) | |
43479 | {} | |
43480 | ||
43481 | /* opt_pass methods: */ | |
43482 | virtual bool gate (function *) | |
43483 | { | |
43484 | return (optimize > 0 && !BYTES_BIG_ENDIAN && TARGET_VSX | |
43485 | && !TARGET_P9_VECTOR && rs6000_optimize_swaps); | |
43486 | } | |
43487 | ||
43488 | virtual unsigned int execute (function *fun) | |
43489 | { | |
43490 | return rs6000_analyze_swaps (fun); | |
43491 | } | |
43492 | ||
43493 | opt_pass *clone () | |
43494 | { | |
43495 | return new pass_analyze_swaps (m_ctxt); | |
43496 | } | |
43497 | ||
43498 | }; // class pass_analyze_swaps | |
43499 | ||
43500 | rtl_opt_pass * | |
43501 | make_pass_analyze_swaps (gcc::context *ctxt) | |
43502 | { | |
43503 | return new pass_analyze_swaps (ctxt); | |
43504 | } | |
43505 | ||
43506 | #ifdef RS6000_GLIBC_ATOMIC_FENV | |
43507 | /* Function declarations for rs6000_atomic_assign_expand_fenv. */ | |
43508 | static tree atomic_hold_decl, atomic_clear_decl, atomic_update_decl; | |
43509 | #endif | |
43510 | ||
43511 | /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */ | |
43512 | ||
43513 | static void | |
43514 | rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) | |
43515 | { | |
43516 | if (!TARGET_HARD_FLOAT || !TARGET_FPRS) | |
43517 | { | |
43518 | #ifdef RS6000_GLIBC_ATOMIC_FENV | |
43519 | if (atomic_hold_decl == NULL_TREE) | |
43520 | { | |
43521 | atomic_hold_decl | |
43522 | = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, | |
43523 | get_identifier ("__atomic_feholdexcept"), | |
43524 | build_function_type_list (void_type_node, | |
43525 | double_ptr_type_node, | |
43526 | NULL_TREE)); | |
43527 | TREE_PUBLIC (atomic_hold_decl) = 1; | |
43528 | DECL_EXTERNAL (atomic_hold_decl) = 1; | |
43529 | } | |
43530 | ||
43531 | if (atomic_clear_decl == NULL_TREE) | |
43532 | { | |
43533 | atomic_clear_decl | |
43534 | = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, | |
43535 | get_identifier ("__atomic_feclearexcept"), | |
43536 | build_function_type_list (void_type_node, | |
43537 | NULL_TREE)); | |
43538 | TREE_PUBLIC (atomic_clear_decl) = 1; | |
43539 | DECL_EXTERNAL (atomic_clear_decl) = 1; | |
43540 | } | |
43541 | ||
43542 | tree const_double = build_qualified_type (double_type_node, | |
43543 | TYPE_QUAL_CONST); | |
43544 | tree const_double_ptr = build_pointer_type (const_double); | |
43545 | if (atomic_update_decl == NULL_TREE) | |
43546 | { | |
43547 | atomic_update_decl | |
43548 | = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, | |
43549 | get_identifier ("__atomic_feupdateenv"), | |
43550 | build_function_type_list (void_type_node, | |
43551 | const_double_ptr, | |
43552 | NULL_TREE)); | |
43553 | TREE_PUBLIC (atomic_update_decl) = 1; | |
43554 | DECL_EXTERNAL (atomic_update_decl) = 1; | |
43555 | } | |
43556 | ||
43557 | tree fenv_var = create_tmp_var_raw (double_type_node); | |
43558 | TREE_ADDRESSABLE (fenv_var) = 1; | |
43559 | tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node, fenv_var); | |
43560 | ||
43561 | *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr); | |
43562 | *clear = build_call_expr (atomic_clear_decl, 0); | |
43563 | *update = build_call_expr (atomic_update_decl, 1, | |
43564 | fold_convert (const_double_ptr, fenv_addr)); | |
43565 | #endif | |
43566 | return; | |
43567 | } | |
43568 | ||
43569 | tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS]; | |
43570 | tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF]; | |
43571 | tree call_mffs = build_call_expr (mffs, 0); | |
43572 | ||
43573 | /* Generates the equivalent of feholdexcept (&fenv_var) | |
43574 | ||
43575 | *fenv_var = __builtin_mffs (); | |
43576 | double fenv_hold; | |
43577 | *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL; | |
43578 | __builtin_mtfsf (0xff, fenv_hold); */ | |
43579 | ||
43580 | /* Mask to clear everything except for the rounding modes and non-IEEE | |
43581 | arithmetic flag. */ | |
43582 | const unsigned HOST_WIDE_INT hold_exception_mask = | |
43583 | HOST_WIDE_INT_C (0xffffffff00000007); | |
43584 | ||
43585 | tree fenv_var = create_tmp_var_raw (double_type_node); | |
43586 | ||
43587 | tree hold_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_var, call_mffs); | |
43588 | ||
43589 | tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var); | |
43590 | tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu, | |
43591 | build_int_cst (uint64_type_node, | |
43592 | hold_exception_mask)); | |
43593 | ||
43594 | tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node, | |
43595 | fenv_llu_and); | |
43596 | ||
43597 | tree hold_mtfsf = build_call_expr (mtfsf, 2, | |
43598 | build_int_cst (unsigned_type_node, 0xff), | |
43599 | fenv_hold_mtfsf); | |
43600 | ||
43601 | *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf); | |
43602 | ||
43603 | /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT): | |
43604 | ||
43605 | double fenv_clear = __builtin_mffs (); | |
43606 | *(uint64_t)&fenv_clear &= 0xffffffff00000000LL; | |
43607 | __builtin_mtfsf (0xff, fenv_clear); */ | |
43608 | ||
43609 | /* Mask to clear everything except for the rounding modes and non-IEEE | |
43610 | arithmetic flag. */ | |
43611 | const unsigned HOST_WIDE_INT clear_exception_mask = | |
43612 | HOST_WIDE_INT_C (0xffffffff00000000); | |
43613 | ||
43614 | tree fenv_clear = create_tmp_var_raw (double_type_node); | |
43615 | ||
43616 | tree clear_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_clear, call_mffs); | |
43617 | ||
43618 | tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear); | |
43619 | tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, | |
43620 | fenv_clean_llu, | |
43621 | build_int_cst (uint64_type_node, | |
43622 | clear_exception_mask)); | |
43623 | ||
43624 | tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node, | |
43625 | fenv_clear_llu_and); | |
43626 | ||
43627 | tree clear_mtfsf = build_call_expr (mtfsf, 2, | |
43628 | build_int_cst (unsigned_type_node, 0xff), | |
43629 | fenv_clear_mtfsf); | |
43630 | ||
43631 | *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf); | |
43632 | ||
43633 | /* Generates the equivalent of feupdateenv (&fenv_var) | |
43634 | ||
43635 | double old_fenv = __builtin_mffs (); | |
43636 | double fenv_update; | |
43637 | *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) | | |
43638 | (*(uint64_t*)fenv_var 0x1ff80fff); | |
43639 | __builtin_mtfsf (0xff, fenv_update); */ | |
43640 | ||
43641 | const unsigned HOST_WIDE_INT update_exception_mask = | |
43642 | HOST_WIDE_INT_C (0xffffffff1fffff00); | |
43643 | const unsigned HOST_WIDE_INT new_exception_mask = | |
43644 | HOST_WIDE_INT_C (0x1ff80fff); | |
43645 | ||
43646 | tree old_fenv = create_tmp_var_raw (double_type_node); | |
43647 | tree update_mffs = build2 (MODIFY_EXPR, void_type_node, old_fenv, call_mffs); | |
43648 | ||
43649 | tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv); | |
43650 | tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu, | |
43651 | build_int_cst (uint64_type_node, | |
43652 | update_exception_mask)); | |
43653 | ||
43654 | tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu, | |
43655 | build_int_cst (uint64_type_node, | |
43656 | new_exception_mask)); | |
43657 | ||
43658 | tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node, | |
43659 | old_llu_and, new_llu_and); | |
43660 | ||
43661 | tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node, | |
43662 | new_llu_mask); | |
43663 | ||
43664 | tree update_mtfsf = build_call_expr (mtfsf, 2, | |
43665 | build_int_cst (unsigned_type_node, 0xff), | |
43666 | fenv_update_mtfsf); | |
43667 | ||
43668 | *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf); | |
43669 | } | |
43670 | ||
43671 | /* Implement the TARGET_OPTAB_SUPPORTED_P hook. */ | |
43672 | ||
43673 | static bool | |
43674 | rs6000_optab_supported_p (int op, machine_mode mode1, machine_mode, | |
43675 | optimization_type opt_type) | |
43676 | { | |
43677 | switch (op) | |
43678 | { | |
43679 | case rsqrt_optab: | |
43680 | return (opt_type == OPTIMIZE_FOR_SPEED | |
43681 | && RS6000_RECIP_AUTO_RSQRTE_P (mode1)); | |
43682 | ||
43683 | default: | |
43684 | return true; | |
43685 | } | |
43686 | } | |
58e17cf8 RS |
43687 | |
43688 | /* Implement TARGET_CONSTANT_ALIGNMENT. */ | |
43689 | ||
43690 | static HOST_WIDE_INT | |
43691 | rs6000_constant_alignment (const_tree exp, HOST_WIDE_INT align) | |
43692 | { | |
43693 | if (TREE_CODE (exp) == STRING_CST | |
43694 | && (STRICT_ALIGNMENT || !optimize_size)) | |
43695 | return MAX (align, BITS_PER_WORD); | |
43696 | return align; | |
43697 | } | |
2a31c321 RS |
43698 | |
43699 | /* Implement TARGET_STARTING_FRAME_OFFSET. */ | |
43700 | ||
43701 | static HOST_WIDE_INT | |
43702 | rs6000_starting_frame_offset (void) | |
43703 | { | |
43704 | if (FRAME_GROWS_DOWNWARD) | |
43705 | return 0; | |
43706 | return RS6000_STARTING_FRAME_OFFSET; | |
43707 | } | |
83349046 SB |
43708 | \f |
43709 | struct gcc_target targetm = TARGET_INITIALIZER; | |
43710 | ||
43711 | #include "gt-powerpcspe.h" |