1 /* Subroutines used for code generation on IBM S/390 and zSeries
2 Copyright (C) 1999-2022 Free Software Foundation, Inc.
3 Contributed by Hartmut Penner (hpenner@de.ibm.com) and
4 Ulrich Weigand (uweigand@de.ibm.com) and
5 Andreas Krebbel (Andreas.Krebbel@de.ibm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #define IN_TARGET_CODE 1
27 #include "coretypes.h"
30 #include "target-globals.h"
39 #include "stringpool.h"
47 #include "diagnostic-core.h"
48 #include "diagnostic.h"
50 #include "fold-const.h"
51 #include "print-tree.h"
52 #include "stor-layout.h"
55 #include "conditions.h"
57 #include "insn-attr.h"
69 #include "cfgcleanup.h"
71 #include "langhooks.h"
72 #include "internal-fn.h"
73 #include "gimple-fold.h"
77 #include "tree-pass.h"
82 #include "tm-constrs.h"
84 #include "symbol-summary.h"
86 #include "ipa-fnsummary.h"
87 #include "sched-int.h"
89 /* This file should be included last. */
90 #include "target-def.h"
92 static bool s390_hard_regno_mode_ok (unsigned int, machine_mode
);
94 /* Remember the last target of s390_set_current_function. */
95 static GTY(()) tree s390_previous_fndecl
;
97 /* Define the specific costs for a given cpu. */
99 struct processor_costs
102 const int m
; /* cost of an M instruction. */
103 const int mghi
; /* cost of an MGHI instruction. */
104 const int mh
; /* cost of an MH instruction. */
105 const int mhi
; /* cost of an MHI instruction. */
106 const int ml
; /* cost of an ML instruction. */
107 const int mr
; /* cost of an MR instruction. */
108 const int ms
; /* cost of an MS instruction. */
109 const int msg
; /* cost of an MSG instruction. */
110 const int msgf
; /* cost of an MSGF instruction. */
111 const int msgfr
; /* cost of an MSGFR instruction. */
112 const int msgr
; /* cost of an MSGR instruction. */
113 const int msr
; /* cost of an MSR instruction. */
114 const int mult_df
; /* cost of multiplication in DFmode. */
117 const int sqxbr
; /* cost of square root in TFmode. */
118 const int sqdbr
; /* cost of square root in DFmode. */
119 const int sqebr
; /* cost of square root in SFmode. */
120 /* multiply and add */
121 const int madbr
; /* cost of multiply and add in DFmode. */
122 const int maebr
; /* cost of multiply and add in SFmode. */
134 #define s390_cost ((const struct processor_costs *)(s390_cost_pointer))
137 struct processor_costs z900_cost
=
139 COSTS_N_INSNS (5), /* M */
140 COSTS_N_INSNS (10), /* MGHI */
141 COSTS_N_INSNS (5), /* MH */
142 COSTS_N_INSNS (4), /* MHI */
143 COSTS_N_INSNS (5), /* ML */
144 COSTS_N_INSNS (5), /* MR */
145 COSTS_N_INSNS (4), /* MS */
146 COSTS_N_INSNS (15), /* MSG */
147 COSTS_N_INSNS (7), /* MSGF */
148 COSTS_N_INSNS (7), /* MSGFR */
149 COSTS_N_INSNS (10), /* MSGR */
150 COSTS_N_INSNS (4), /* MSR */
151 COSTS_N_INSNS (7), /* multiplication in DFmode */
152 COSTS_N_INSNS (13), /* MXBR */
153 COSTS_N_INSNS (136), /* SQXBR */
154 COSTS_N_INSNS (44), /* SQDBR */
155 COSTS_N_INSNS (35), /* SQEBR */
156 COSTS_N_INSNS (18), /* MADBR */
157 COSTS_N_INSNS (13), /* MAEBR */
158 COSTS_N_INSNS (134), /* DXBR */
159 COSTS_N_INSNS (30), /* DDBR */
160 COSTS_N_INSNS (27), /* DEBR */
161 COSTS_N_INSNS (220), /* DLGR */
162 COSTS_N_INSNS (34), /* DLR */
163 COSTS_N_INSNS (34), /* DR */
164 COSTS_N_INSNS (32), /* DSGFR */
165 COSTS_N_INSNS (32), /* DSGR */
169 struct processor_costs z990_cost
=
171 COSTS_N_INSNS (4), /* M */
172 COSTS_N_INSNS (2), /* MGHI */
173 COSTS_N_INSNS (2), /* MH */
174 COSTS_N_INSNS (2), /* MHI */
175 COSTS_N_INSNS (4), /* ML */
176 COSTS_N_INSNS (4), /* MR */
177 COSTS_N_INSNS (5), /* MS */
178 COSTS_N_INSNS (6), /* MSG */
179 COSTS_N_INSNS (4), /* MSGF */
180 COSTS_N_INSNS (4), /* MSGFR */
181 COSTS_N_INSNS (4), /* MSGR */
182 COSTS_N_INSNS (4), /* MSR */
183 COSTS_N_INSNS (1), /* multiplication in DFmode */
184 COSTS_N_INSNS (28), /* MXBR */
185 COSTS_N_INSNS (130), /* SQXBR */
186 COSTS_N_INSNS (66), /* SQDBR */
187 COSTS_N_INSNS (38), /* SQEBR */
188 COSTS_N_INSNS (1), /* MADBR */
189 COSTS_N_INSNS (1), /* MAEBR */
190 COSTS_N_INSNS (60), /* DXBR */
191 COSTS_N_INSNS (40), /* DDBR */
192 COSTS_N_INSNS (26), /* DEBR */
193 COSTS_N_INSNS (176), /* DLGR */
194 COSTS_N_INSNS (31), /* DLR */
195 COSTS_N_INSNS (31), /* DR */
196 COSTS_N_INSNS (31), /* DSGFR */
197 COSTS_N_INSNS (31), /* DSGR */
201 struct processor_costs z9_109_cost
=
203 COSTS_N_INSNS (4), /* M */
204 COSTS_N_INSNS (2), /* MGHI */
205 COSTS_N_INSNS (2), /* MH */
206 COSTS_N_INSNS (2), /* MHI */
207 COSTS_N_INSNS (4), /* ML */
208 COSTS_N_INSNS (4), /* MR */
209 COSTS_N_INSNS (5), /* MS */
210 COSTS_N_INSNS (6), /* MSG */
211 COSTS_N_INSNS (4), /* MSGF */
212 COSTS_N_INSNS (4), /* MSGFR */
213 COSTS_N_INSNS (4), /* MSGR */
214 COSTS_N_INSNS (4), /* MSR */
215 COSTS_N_INSNS (1), /* multiplication in DFmode */
216 COSTS_N_INSNS (28), /* MXBR */
217 COSTS_N_INSNS (130), /* SQXBR */
218 COSTS_N_INSNS (66), /* SQDBR */
219 COSTS_N_INSNS (38), /* SQEBR */
220 COSTS_N_INSNS (1), /* MADBR */
221 COSTS_N_INSNS (1), /* MAEBR */
222 COSTS_N_INSNS (60), /* DXBR */
223 COSTS_N_INSNS (40), /* DDBR */
224 COSTS_N_INSNS (26), /* DEBR */
225 COSTS_N_INSNS (30), /* DLGR */
226 COSTS_N_INSNS (23), /* DLR */
227 COSTS_N_INSNS (23), /* DR */
228 COSTS_N_INSNS (24), /* DSGFR */
229 COSTS_N_INSNS (24), /* DSGR */
233 struct processor_costs z10_cost
=
235 COSTS_N_INSNS (10), /* M */
236 COSTS_N_INSNS (10), /* MGHI */
237 COSTS_N_INSNS (10), /* MH */
238 COSTS_N_INSNS (10), /* MHI */
239 COSTS_N_INSNS (10), /* ML */
240 COSTS_N_INSNS (10), /* MR */
241 COSTS_N_INSNS (10), /* MS */
242 COSTS_N_INSNS (10), /* MSG */
243 COSTS_N_INSNS (10), /* MSGF */
244 COSTS_N_INSNS (10), /* MSGFR */
245 COSTS_N_INSNS (10), /* MSGR */
246 COSTS_N_INSNS (10), /* MSR */
247 COSTS_N_INSNS (1) , /* multiplication in DFmode */
248 COSTS_N_INSNS (50), /* MXBR */
249 COSTS_N_INSNS (120), /* SQXBR */
250 COSTS_N_INSNS (52), /* SQDBR */
251 COSTS_N_INSNS (38), /* SQEBR */
252 COSTS_N_INSNS (1), /* MADBR */
253 COSTS_N_INSNS (1), /* MAEBR */
254 COSTS_N_INSNS (111), /* DXBR */
255 COSTS_N_INSNS (39), /* DDBR */
256 COSTS_N_INSNS (32), /* DEBR */
257 COSTS_N_INSNS (160), /* DLGR */
258 COSTS_N_INSNS (71), /* DLR */
259 COSTS_N_INSNS (71), /* DR */
260 COSTS_N_INSNS (71), /* DSGFR */
261 COSTS_N_INSNS (71), /* DSGR */
265 struct processor_costs z196_cost
=
267 COSTS_N_INSNS (7), /* M */
268 COSTS_N_INSNS (5), /* MGHI */
269 COSTS_N_INSNS (5), /* MH */
270 COSTS_N_INSNS (5), /* MHI */
271 COSTS_N_INSNS (7), /* ML */
272 COSTS_N_INSNS (7), /* MR */
273 COSTS_N_INSNS (6), /* MS */
274 COSTS_N_INSNS (8), /* MSG */
275 COSTS_N_INSNS (6), /* MSGF */
276 COSTS_N_INSNS (6), /* MSGFR */
277 COSTS_N_INSNS (8), /* MSGR */
278 COSTS_N_INSNS (6), /* MSR */
279 COSTS_N_INSNS (1) , /* multiplication in DFmode */
280 COSTS_N_INSNS (40), /* MXBR B+40 */
281 COSTS_N_INSNS (100), /* SQXBR B+100 */
282 COSTS_N_INSNS (42), /* SQDBR B+42 */
283 COSTS_N_INSNS (28), /* SQEBR B+28 */
284 COSTS_N_INSNS (1), /* MADBR B */
285 COSTS_N_INSNS (1), /* MAEBR B */
286 COSTS_N_INSNS (101), /* DXBR B+101 */
287 COSTS_N_INSNS (29), /* DDBR */
288 COSTS_N_INSNS (22), /* DEBR */
289 COSTS_N_INSNS (160), /* DLGR cracked */
290 COSTS_N_INSNS (160), /* DLR cracked */
291 COSTS_N_INSNS (160), /* DR expanded */
292 COSTS_N_INSNS (160), /* DSGFR cracked */
293 COSTS_N_INSNS (160), /* DSGR cracked */
297 struct processor_costs zEC12_cost
=
299 COSTS_N_INSNS (7), /* M */
300 COSTS_N_INSNS (5), /* MGHI */
301 COSTS_N_INSNS (5), /* MH */
302 COSTS_N_INSNS (5), /* MHI */
303 COSTS_N_INSNS (7), /* ML */
304 COSTS_N_INSNS (7), /* MR */
305 COSTS_N_INSNS (6), /* MS */
306 COSTS_N_INSNS (8), /* MSG */
307 COSTS_N_INSNS (6), /* MSGF */
308 COSTS_N_INSNS (6), /* MSGFR */
309 COSTS_N_INSNS (8), /* MSGR */
310 COSTS_N_INSNS (6), /* MSR */
311 COSTS_N_INSNS (1) , /* multiplication in DFmode */
312 COSTS_N_INSNS (40), /* MXBR B+40 */
313 COSTS_N_INSNS (100), /* SQXBR B+100 */
314 COSTS_N_INSNS (42), /* SQDBR B+42 */
315 COSTS_N_INSNS (28), /* SQEBR B+28 */
316 COSTS_N_INSNS (1), /* MADBR B */
317 COSTS_N_INSNS (1), /* MAEBR B */
318 COSTS_N_INSNS (131), /* DXBR B+131 */
319 COSTS_N_INSNS (29), /* DDBR */
320 COSTS_N_INSNS (22), /* DEBR */
321 COSTS_N_INSNS (160), /* DLGR cracked */
322 COSTS_N_INSNS (160), /* DLR cracked */
323 COSTS_N_INSNS (160), /* DR expanded */
324 COSTS_N_INSNS (160), /* DSGFR cracked */
325 COSTS_N_INSNS (160), /* DSGR cracked */
328 const struct s390_processor processor_table
[] =
330 { "z900", "z900", PROCESSOR_2064_Z900
, &z900_cost
, 5 },
331 { "z990", "z990", PROCESSOR_2084_Z990
, &z990_cost
, 6 },
332 { "z9-109", "z9-109", PROCESSOR_2094_Z9_109
, &z9_109_cost
, 7 },
333 { "z9-ec", "z9-ec", PROCESSOR_2094_Z9_EC
, &z9_109_cost
, 7 },
334 { "z10", "z10", PROCESSOR_2097_Z10
, &z10_cost
, 8 },
335 { "z196", "z196", PROCESSOR_2817_Z196
, &z196_cost
, 9 },
336 { "zEC12", "zEC12", PROCESSOR_2827_ZEC12
, &zEC12_cost
, 10 },
337 { "z13", "z13", PROCESSOR_2964_Z13
, &zEC12_cost
, 11 },
338 { "z14", "arch12", PROCESSOR_3906_Z14
, &zEC12_cost
, 12 },
339 { "z15", "arch13", PROCESSOR_8561_Z15
, &zEC12_cost
, 13 },
340 { "arch14", "arch14", PROCESSOR_ARCH14
, &zEC12_cost
, 14 },
341 { "native", "", PROCESSOR_NATIVE
, NULL
, 0 }
344 extern int reload_completed
;
346 /* Kept up to date using the SCHED_VARIABLE_ISSUE hook. */
347 static rtx_insn
*last_scheduled_insn
;
350 #define MAX_SCHED_UNITS 4
351 static int last_scheduled_unit_distance
[MAX_SCHED_UNITS
][NUM_SIDES
];
353 /* Estimate of number of cycles a long-running insn occupies an
355 static int fxd_longrunning
[NUM_SIDES
];
356 static int fpd_longrunning
[NUM_SIDES
];
358 /* The maximum score added for an instruction whose unit hasn't been
359 in use for MAX_SCHED_MIX_DISTANCE steps. Increase this value to
360 give instruction mix scheduling more priority over instruction
362 #define MAX_SCHED_MIX_SCORE 2
364 /* The maximum distance up to which individual scores will be
365 calculated. Everything beyond this gives MAX_SCHED_MIX_SCORE.
366 Increase this with the OOO windows size of the machine. */
367 #define MAX_SCHED_MIX_DISTANCE 70
369 /* Structure used to hold the components of a S/390 memory
370 address. A legitimate address on S/390 is of the general
372 base + index + displacement
373 where any of the components is optional.
375 base and index are registers of the class ADDR_REGS,
376 displacement is an unsigned 12-bit immediate constant. */
378 /* The max number of insns of backend generated memset/memcpy/memcmp
379 loops. This value is used in the unroll adjust hook to detect such
380 loops. Current max is 9 coming from the memcmp loop. */
381 #define BLOCK_MEM_OPS_LOOP_INSNS 9
392 /* Few accessor macros for struct cfun->machine->s390_frame_layout. */
394 #define cfun_frame_layout (cfun->machine->frame_layout)
395 #define cfun_save_high_fprs_p (!!cfun_frame_layout.high_fprs)
396 #define cfun_save_arg_fprs_p (!!(TARGET_64BIT \
397 ? cfun_frame_layout.fpr_bitmap & 0x0f \
398 : cfun_frame_layout.fpr_bitmap & 0x03))
399 #define cfun_gprs_save_area_size ((cfun_frame_layout.last_save_gpr_slot - \
400 cfun_frame_layout.first_save_gpr_slot + 1) * UNITS_PER_LONG)
401 #define cfun_set_fpr_save(REGNO) (cfun->machine->frame_layout.fpr_bitmap |= \
402 (1 << (REGNO - FPR0_REGNUM)))
403 #define cfun_fpr_save_p(REGNO) (!!(cfun->machine->frame_layout.fpr_bitmap & \
404 (1 << (REGNO - FPR0_REGNUM))))
405 #define cfun_gpr_save_slot(REGNO) \
406 cfun->machine->frame_layout.gpr_save_slots[REGNO]
408 /* Number of GPRs and FPRs used for argument passing. */
409 #define GP_ARG_NUM_REG 5
410 #define FP_ARG_NUM_REG (TARGET_64BIT? 4 : 2)
411 #define VEC_ARG_NUM_REG 8
413 /* A couple of shortcuts. */
414 #define CONST_OK_FOR_J(x) \
415 CONST_OK_FOR_CONSTRAINT_P((x), 'J', "J")
416 #define CONST_OK_FOR_K(x) \
417 CONST_OK_FOR_CONSTRAINT_P((x), 'K', "K")
418 #define CONST_OK_FOR_Os(x) \
419 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Os")
420 #define CONST_OK_FOR_Op(x) \
421 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Op")
422 #define CONST_OK_FOR_On(x) \
423 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "On")
425 #define REGNO_PAIR_OK(REGNO, MODE) \
426 (s390_hard_regno_nregs ((REGNO), (MODE)) == 1 || !((REGNO) & 1))
428 /* That's the read ahead of the dynamic branch prediction unit in
429 bytes on a z10 (or higher) CPU. */
430 #define PREDICT_DISTANCE (TARGET_Z10 ? 384 : 2048)
432 /* Masks per jump target register indicating which thunk need to be
434 static GTY(()) int indirect_branch_prez10thunk_mask
= 0;
435 static GTY(()) int indirect_branch_z10thunk_mask
= 0;
437 #define INDIRECT_BRANCH_NUM_OPTIONS 4
439 enum s390_indirect_branch_option
441 s390_opt_indirect_branch_jump
= 0,
442 s390_opt_indirect_branch_call
,
443 s390_opt_function_return_reg
,
444 s390_opt_function_return_mem
447 static GTY(()) int indirect_branch_table_label_no
[INDIRECT_BRANCH_NUM_OPTIONS
] = { 0 };
448 const char *indirect_branch_table_label
[INDIRECT_BRANCH_NUM_OPTIONS
] = \
449 { "LJUMP", "LCALL", "LRETREG", "LRETMEM" };
450 const char *indirect_branch_table_name
[INDIRECT_BRANCH_NUM_OPTIONS
] = \
451 { ".s390_indirect_jump", ".s390_indirect_call",
452 ".s390_return_reg", ".s390_return_mem" };
455 s390_return_addr_from_memory ()
457 return cfun_gpr_save_slot(RETURN_REGNUM
) == SAVE_SLOT_STACK
;
460 /* Return nonzero if it's OK to use fused multiply-add for MODE. */
462 s390_fma_allowed_p (machine_mode mode
)
464 if (TARGET_VXE
&& mode
== TFmode
)
465 return flag_vx_long_double_fma
;
470 /* Indicate which ABI has been used for passing vector args.
471 0 - no vector type arguments have been passed where the ABI is relevant
472 1 - the old ABI has been used
473 2 - a vector type argument has been passed either in a vector register
474 or on the stack by value */
475 static int s390_vector_abi
= 0;
477 /* Set the vector ABI marker if TYPE is subject to the vector ABI
478 switch. The vector ABI affects only vector data types. There are
479 two aspects of the vector ABI relevant here:
481 1. vectors >= 16 bytes have an alignment of 8 bytes with the new
482 ABI and natural alignment with the old.
484 2. vector <= 16 bytes are passed in VRs or by value on the stack
485 with the new ABI but by reference on the stack with the old.
487 If ARG_P is true TYPE is used for a function argument or return
488 value. The ABI marker then is set for all vector data types. If
489 ARG_P is false only type 1 vectors are being checked. */
492 s390_check_type_for_vector_abi (const_tree type
, bool arg_p
, bool in_struct_p
)
494 static hash_set
<const_tree
> visited_types_hash
;
499 if (type
== NULL_TREE
|| TREE_CODE (type
) == ERROR_MARK
)
502 if (visited_types_hash
.contains (type
))
505 visited_types_hash
.add (type
);
507 if (VECTOR_TYPE_P (type
))
509 int type_size
= int_size_in_bytes (type
);
511 /* Outside arguments only the alignment is changing and this
512 only happens for vector types >= 16 bytes. */
513 if (!arg_p
&& type_size
< 16)
516 /* In arguments vector types > 16 are passed as before (GCC
517 never enforced the bigger alignment for arguments which was
518 required by the old vector ABI). However, it might still be
519 ABI relevant due to the changed alignment if it is a struct
521 if (arg_p
&& type_size
> 16 && !in_struct_p
)
524 s390_vector_abi
= TARGET_VX_ABI
? 2 : 1;
526 else if (POINTER_TYPE_P (type
) || TREE_CODE (type
) == ARRAY_TYPE
)
528 /* ARRAY_TYPE: Since with neither of the ABIs we have more than
529 natural alignment there will never be ABI dependent padding
530 in an array type. That's why we do not set in_struct_p to
532 s390_check_type_for_vector_abi (TREE_TYPE (type
), arg_p
, in_struct_p
);
534 else if (TREE_CODE (type
) == FUNCTION_TYPE
|| TREE_CODE (type
) == METHOD_TYPE
)
538 /* Check the return type. */
539 s390_check_type_for_vector_abi (TREE_TYPE (type
), true, false);
541 for (arg_chain
= TYPE_ARG_TYPES (type
);
543 arg_chain
= TREE_CHAIN (arg_chain
))
544 s390_check_type_for_vector_abi (TREE_VALUE (arg_chain
), true, false);
546 else if (RECORD_OR_UNION_TYPE_P (type
))
550 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
552 if (TREE_CODE (field
) != FIELD_DECL
)
555 s390_check_type_for_vector_abi (TREE_TYPE (field
), arg_p
, true);
561 /* System z builtins. */
563 #include "s390-builtins.h"
565 const unsigned int bflags_builtin
[S390_BUILTIN_MAX
+ 1] =
570 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, ...) BFLAGS,
572 #define OB_DEF_VAR(...)
573 #include "s390-builtins.def"
577 const unsigned int opflags_builtin
[S390_BUILTIN_MAX
+ 1] =
582 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, ...) OPFLAGS,
584 #define OB_DEF_VAR(...)
585 #include "s390-builtins.def"
589 const unsigned int bflags_overloaded_builtin
[S390_OVERLOADED_BUILTIN_MAX
+ 1] =
595 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, ...) BFLAGS,
596 #define OB_DEF_VAR(...)
597 #include "s390-builtins.def"
602 bflags_overloaded_builtin_var
[S390_OVERLOADED_BUILTIN_VAR_MAX
+ 1] =
609 #define OB_DEF_VAR(NAME, PATTERN, FLAGS, OPFLAGS, FNTYPE) FLAGS,
610 #include "s390-builtins.def"
615 opflags_overloaded_builtin_var
[S390_OVERLOADED_BUILTIN_VAR_MAX
+ 1] =
622 #define OB_DEF_VAR(NAME, PATTERN, FLAGS, OPFLAGS, FNTYPE) OPFLAGS,
623 #include "s390-builtins.def"
627 tree s390_builtin_types
[BT_MAX
];
628 tree s390_builtin_fn_types
[BT_FN_MAX
];
629 tree s390_builtin_decls
[S390_BUILTIN_MAX
+
630 S390_OVERLOADED_BUILTIN_MAX
+
631 S390_OVERLOADED_BUILTIN_VAR_MAX
];
633 static enum insn_code
const code_for_builtin
[S390_BUILTIN_MAX
+ 1] = {
637 #define B_DEF(NAME, PATTERN, ...) CODE_FOR_##PATTERN,
639 #define OB_DEF_VAR(...)
641 #include "s390-builtins.def"
646 s390_init_builtins (void)
648 /* These definitions are being used in s390-builtins.def. */
649 tree returns_twice_attr
= tree_cons (get_identifier ("returns_twice"),
651 tree noreturn_attr
= tree_cons (get_identifier ("noreturn"), NULL
, NULL
);
652 tree c_uint64_type_node
;
654 /* The uint64_type_node from tree.c is not compatible to the C99
655 uint64_t data type. What we want is c_uint64_type_node from
656 c-common.c. But since backend code is not supposed to interface
657 with the frontend we recreate it here. */
659 c_uint64_type_node
= long_unsigned_type_node
;
661 c_uint64_type_node
= long_long_unsigned_type_node
;
664 #define DEF_TYPE(INDEX, NODE, CONST_P) \
665 if (s390_builtin_types[INDEX] == NULL) \
666 s390_builtin_types[INDEX] = (!CONST_P) ? \
667 (NODE) : build_type_variant ((NODE), 1, 0);
669 #undef DEF_POINTER_TYPE
670 #define DEF_POINTER_TYPE(INDEX, INDEX_BASE) \
671 if (s390_builtin_types[INDEX] == NULL) \
672 s390_builtin_types[INDEX] = \
673 build_pointer_type (s390_builtin_types[INDEX_BASE]);
675 #undef DEF_DISTINCT_TYPE
676 #define DEF_DISTINCT_TYPE(INDEX, INDEX_BASE) \
677 if (s390_builtin_types[INDEX] == NULL) \
678 s390_builtin_types[INDEX] = \
679 build_distinct_type_copy (s390_builtin_types[INDEX_BASE]);
681 #undef DEF_VECTOR_TYPE
682 #define DEF_VECTOR_TYPE(INDEX, INDEX_BASE, ELEMENTS) \
683 if (s390_builtin_types[INDEX] == NULL) \
684 s390_builtin_types[INDEX] = \
685 build_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
687 #undef DEF_OPAQUE_VECTOR_TYPE
688 #define DEF_OPAQUE_VECTOR_TYPE(INDEX, INDEX_BASE, ELEMENTS) \
689 if (s390_builtin_types[INDEX] == NULL) \
690 s390_builtin_types[INDEX] = \
691 build_opaque_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
694 #define DEF_FN_TYPE(INDEX, args...) \
695 if (s390_builtin_fn_types[INDEX] == NULL) \
696 s390_builtin_fn_types[INDEX] = \
697 build_function_type_list (args, NULL_TREE);
699 #define DEF_OV_TYPE(...)
700 #include "s390-builtin-types.def"
703 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, FNTYPE) \
704 if (s390_builtin_decls[S390_BUILTIN_##NAME] == NULL) \
705 s390_builtin_decls[S390_BUILTIN_##NAME] = \
706 add_builtin_function ("__builtin_" #NAME, \
707 s390_builtin_fn_types[FNTYPE], \
708 S390_BUILTIN_##NAME, \
713 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, FNTYPE) \
714 if (s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] \
716 s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] = \
717 add_builtin_function ("__builtin_" #NAME, \
718 s390_builtin_fn_types[FNTYPE], \
719 S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX, \
724 #define OB_DEF_VAR(...)
725 #include "s390-builtins.def"
729 /* Return true if ARG is appropriate as argument number ARGNUM of
730 builtin DECL. The operand flags from s390-builtins.def have to
731 passed as OP_FLAGS. */
733 s390_const_operand_ok (tree arg
, int argnum
, int op_flags
, tree decl
)
735 if (O_UIMM_P (op_flags
))
737 unsigned HOST_WIDE_INT bitwidths
[] = { 1, 2, 3, 4, 5, 8, 12, 16, 32, 4 };
738 unsigned HOST_WIDE_INT bitmasks
[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 12 };
739 unsigned HOST_WIDE_INT bitwidth
= bitwidths
[op_flags
- O_U1
];
740 unsigned HOST_WIDE_INT bitmask
= bitmasks
[op_flags
- O_U1
];
742 gcc_assert(ARRAY_SIZE(bitwidths
) == (O_M12
- O_U1
+ 1));
743 gcc_assert(ARRAY_SIZE(bitmasks
) == (O_M12
- O_U1
+ 1));
745 if (!tree_fits_uhwi_p (arg
)
746 || tree_to_uhwi (arg
) > (HOST_WIDE_INT_1U
<< bitwidth
) - 1
747 || (bitmask
&& tree_to_uhwi (arg
) & ~bitmask
))
751 gcc_assert (bitmask
< 16);
752 char values
[120] = "";
754 for (unsigned HOST_WIDE_INT i
= 0; i
<= bitmask
; i
++)
759 int ret
= snprintf (buf
, 5, HOST_WIDE_INT_PRINT_UNSIGNED
, i
& bitmask
);
760 gcc_assert (ret
< 5);
761 strcat (values
, buf
);
763 strcat (values
, ", ");
765 error ("constant argument %d for builtin %qF is invalid (%s)",
766 argnum
, decl
, values
);
769 error ("constant argument %d for builtin %qF is out of range (0..%wu)",
770 argnum
, decl
, (HOST_WIDE_INT_1U
<< bitwidth
) - 1);
776 if (O_SIMM_P (op_flags
))
778 int bitwidths
[] = { 2, 3, 4, 5, 8, 12, 16, 32 };
779 int bitwidth
= bitwidths
[op_flags
- O_S2
];
781 if (!tree_fits_shwi_p (arg
)
782 || tree_to_shwi (arg
) < -(HOST_WIDE_INT_1
<< (bitwidth
- 1))
783 || tree_to_shwi (arg
) > ((HOST_WIDE_INT_1
<< (bitwidth
- 1)) - 1))
785 error ("constant argument %d for builtin %qF is out of range "
786 "(%wd..%wd)", argnum
, decl
,
787 -(HOST_WIDE_INT_1
<< (bitwidth
- 1)),
788 (HOST_WIDE_INT_1
<< (bitwidth
- 1)) - 1);
795 /* Expand an expression EXP that calls a built-in function,
796 with result going to TARGET if that's convenient
797 (and in mode MODE if that's convenient).
798 SUBTARGET may be used as the target for computing one of EXP's operands.
799 IGNORE is nonzero if the value is to be ignored. */
802 s390_expand_builtin (tree exp
, rtx target
, rtx subtarget ATTRIBUTE_UNUSED
,
803 machine_mode mode ATTRIBUTE_UNUSED
,
804 int ignore ATTRIBUTE_UNUSED
)
808 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
809 unsigned int fcode
= DECL_MD_FUNCTION_CODE (fndecl
);
810 enum insn_code icode
;
811 rtx op
[MAX_ARGS
], pat
;
815 call_expr_arg_iterator iter
;
816 unsigned int all_op_flags
= opflags_for_builtin (fcode
);
817 machine_mode last_vec_mode
= VOIDmode
;
819 if (TARGET_DEBUG_ARG
)
822 "s390_expand_builtin, code = %4d, %s, bflags = 0x%x\n",
823 (int)fcode
, IDENTIFIER_POINTER (DECL_NAME (fndecl
)),
824 bflags_for_builtin (fcode
));
827 if (S390_USE_TARGET_ATTRIBUTE
)
831 bflags
= bflags_for_builtin (fcode
);
832 if ((bflags
& B_HTM
) && !TARGET_HTM
)
834 error ("builtin %qF is not supported without %<-mhtm%> "
835 "(default with %<-march=zEC12%> and higher).", fndecl
);
838 if (((bflags
& B_VX
) || (bflags
& B_VXE
)) && !TARGET_VX
)
840 error ("builtin %qF requires %<-mvx%> "
841 "(default with %<-march=z13%> and higher).", fndecl
);
845 if ((bflags
& B_VXE
) && !TARGET_VXE
)
847 error ("Builtin %qF requires z14 or higher.", fndecl
);
851 if ((bflags
& B_VXE2
) && !TARGET_VXE2
)
853 error ("Builtin %qF requires z15 or higher.", fndecl
);
857 if ((bflags
& B_NNPA
) && !TARGET_NNPA
)
859 error ("Builtin %qF requires arch14 or higher.", fndecl
);
863 if (fcode
>= S390_OVERLOADED_BUILTIN_VAR_OFFSET
864 && fcode
< S390_ALL_BUILTIN_MAX
)
868 else if (fcode
< S390_OVERLOADED_BUILTIN_OFFSET
)
870 icode
= code_for_builtin
[fcode
];
871 /* Set a flag in the machine specific cfun part in order to support
872 saving/restoring of FPRs. */
873 if (fcode
== S390_BUILTIN_tbegin
|| fcode
== S390_BUILTIN_tbegin_retry
)
874 cfun
->machine
->tbegin_p
= true;
876 else if (fcode
< S390_OVERLOADED_BUILTIN_VAR_OFFSET
)
878 error ("unresolved overloaded builtin");
882 internal_error ("bad builtin fcode");
885 internal_error ("bad builtin icode");
887 nonvoid
= TREE_TYPE (TREE_TYPE (fndecl
)) != void_type_node
;
891 machine_mode tmode
= insn_data
[icode
].operand
[0].mode
;
893 || GET_MODE (target
) != tmode
894 || !(*insn_data
[icode
].operand
[0].predicate
) (target
, tmode
))
895 target
= gen_reg_rtx (tmode
);
897 /* There are builtins (e.g. vec_promote) with no vector
898 arguments but an element selector. So we have to also look
899 at the vector return type when emitting the modulo
901 if (VECTOR_MODE_P (insn_data
[icode
].operand
[0].mode
))
902 last_vec_mode
= insn_data
[icode
].operand
[0].mode
;
906 FOR_EACH_CALL_EXPR_ARG (arg
, iter
, exp
)
909 const struct insn_operand_data
*insn_op
;
910 unsigned int op_flags
= all_op_flags
& ((1 << O_SHIFT
) - 1);
912 all_op_flags
= all_op_flags
>> O_SHIFT
;
914 if (arg
== error_mark_node
)
916 if (arity
>= MAX_ARGS
)
919 if (O_IMM_P (op_flags
)
920 && TREE_CODE (arg
) != INTEGER_CST
)
922 error ("constant value required for builtin %qF argument %d",
927 if (!s390_const_operand_ok (arg
, arity
+ 1, op_flags
, fndecl
))
930 insn_op
= &insn_data
[icode
].operand
[arity
+ nonvoid
];
931 op
[arity
] = expand_expr (arg
, NULL_RTX
, insn_op
->mode
, EXPAND_NORMAL
);
933 /* expand_expr truncates constants to the target mode only if it
934 is "convenient". However, our checks below rely on this
936 if (CONST_INT_P (op
[arity
])
937 && SCALAR_INT_MODE_P (insn_op
->mode
)
938 && GET_MODE (op
[arity
]) != insn_op
->mode
)
939 op
[arity
] = GEN_INT (trunc_int_for_mode (INTVAL (op
[arity
]),
942 /* Wrap the expanded RTX for pointer types into a MEM expr with
943 the proper mode. This allows us to use e.g. (match_operand
944 "memory_operand"..) in the insn patterns instead of (mem
945 (match_operand "address_operand)). This is helpful for
946 patterns not just accepting MEMs. */
947 if (POINTER_TYPE_P (TREE_TYPE (arg
))
948 && insn_op
->predicate
!= address_operand
)
949 op
[arity
] = gen_rtx_MEM (insn_op
->mode
, op
[arity
]);
951 /* Expand the module operation required on element selectors. */
952 if (op_flags
== O_ELEM
)
954 gcc_assert (last_vec_mode
!= VOIDmode
);
955 op
[arity
] = simplify_expand_binop (SImode
, code_to_optab (AND
),
957 GEN_INT (GET_MODE_NUNITS (last_vec_mode
) - 1),
958 NULL_RTX
, 1, OPTAB_DIRECT
);
961 /* Record the vector mode used for an element selector. This assumes:
962 1. There is no builtin with two different vector modes and an element selector
963 2. The element selector comes after the vector type it is referring to.
964 This currently the true for all the builtins but FIXME we
965 should better check for that. */
966 if (VECTOR_MODE_P (insn_op
->mode
))
967 last_vec_mode
= insn_op
->mode
;
969 if (insn_op
->predicate (op
[arity
], insn_op
->mode
))
975 /* A memory operand is rejected by the memory_operand predicate.
976 Try making the address legal by copying it into a register. */
977 if (MEM_P (op
[arity
])
978 && insn_op
->predicate
== memory_operand
979 && (GET_MODE (XEXP (op
[arity
], 0)) == Pmode
980 || GET_MODE (XEXP (op
[arity
], 0)) == VOIDmode
))
982 op
[arity
] = replace_equiv_address (op
[arity
],
983 copy_to_mode_reg (Pmode
,
984 XEXP (op
[arity
], 0)));
986 /* Some of the builtins require different modes/types than the
987 pattern in order to implement a specific API. Instead of
988 adding many expanders which do the mode change we do it here.
989 E.g. s390_vec_add_u128 required to have vector unsigned char
990 arguments is mapped to addti3. */
991 else if (insn_op
->mode
!= VOIDmode
992 && GET_MODE (op
[arity
]) != VOIDmode
993 && GET_MODE (op
[arity
]) != insn_op
->mode
994 && ((tmp_rtx
= simplify_gen_subreg (insn_op
->mode
, op
[arity
],
995 GET_MODE (op
[arity
]), 0))
1001 /* The predicate rejects the operand although the mode is fine.
1002 Copy the operand to register. */
1003 if (!insn_op
->predicate (op
[arity
], insn_op
->mode
)
1004 && (GET_MODE (op
[arity
]) == insn_op
->mode
1005 || GET_MODE (op
[arity
]) == VOIDmode
1006 || (insn_op
->predicate
== address_operand
1007 && GET_MODE (op
[arity
]) == Pmode
)))
1009 /* An address_operand usually has VOIDmode in the expander
1010 so we cannot use this. */
1011 machine_mode target_mode
=
1012 (insn_op
->predicate
== address_operand
1013 ? (machine_mode
) Pmode
: insn_op
->mode
);
1014 op
[arity
] = copy_to_mode_reg (target_mode
, op
[arity
]);
1017 if (!insn_op
->predicate (op
[arity
], insn_op
->mode
))
1019 error ("invalid argument %d for builtin %qF", arity
+ 1, fndecl
);
1028 pat
= GEN_FCN (icode
) (target
);
1032 pat
= GEN_FCN (icode
) (target
, op
[0]);
1034 pat
= GEN_FCN (icode
) (op
[0]);
1038 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1]);
1040 pat
= GEN_FCN (icode
) (op
[0], op
[1]);
1044 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2]);
1046 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2]);
1050 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2], op
[3]);
1052 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3]);
1056 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2], op
[3], op
[4]);
1058 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3], op
[4]);
1062 pat
= GEN_FCN (icode
) (target
, op
[0], op
[1], op
[2], op
[3], op
[4], op
[5]);
1064 pat
= GEN_FCN (icode
) (op
[0], op
[1], op
[2], op
[3], op
[4], op
[5]);
1080 static const int s390_hotpatch_hw_max
= 1000000;
1081 static int s390_hotpatch_hw_before_label
= 0;
1082 static int s390_hotpatch_hw_after_label
= 0;
1084 /* Check whether the hotpatch attribute is applied to a function and, if it has
1085 an argument, the argument is valid. */
1088 s390_handle_hotpatch_attribute (tree
*node
, tree name
, tree args
,
1089 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
1095 if (TREE_CODE (*node
) != FUNCTION_DECL
)
1097 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
1099 *no_add_attrs
= true;
1101 if (args
!= NULL
&& TREE_CHAIN (args
) != NULL
)
1103 expr
= TREE_VALUE (args
);
1104 expr2
= TREE_VALUE (TREE_CHAIN (args
));
1106 if (args
== NULL
|| TREE_CHAIN (args
) == NULL
)
1108 else if (TREE_CODE (expr
) != INTEGER_CST
1109 || !INTEGRAL_TYPE_P (TREE_TYPE (expr
))
1110 || wi::gtu_p (wi::to_wide (expr
), s390_hotpatch_hw_max
))
1112 else if (TREE_CODE (expr2
) != INTEGER_CST
1113 || !INTEGRAL_TYPE_P (TREE_TYPE (expr2
))
1114 || wi::gtu_p (wi::to_wide (expr2
), s390_hotpatch_hw_max
))
1120 error ("requested %qE attribute is not a comma separated pair of"
1121 " non-negative integer constants or too large (max. %d)", name
,
1122 s390_hotpatch_hw_max
);
1123 *no_add_attrs
= true;
1129 /* Expand the s390_vector_bool type attribute. */
1132 s390_handle_vectorbool_attribute (tree
*node
, tree name ATTRIBUTE_UNUSED
,
1133 tree args ATTRIBUTE_UNUSED
,
1134 int flags ATTRIBUTE_UNUSED
, bool *no_add_attrs
)
1136 tree type
= *node
, result
= NULL_TREE
;
1139 while (POINTER_TYPE_P (type
)
1140 || TREE_CODE (type
) == FUNCTION_TYPE
1141 || TREE_CODE (type
) == METHOD_TYPE
1142 || TREE_CODE (type
) == ARRAY_TYPE
)
1143 type
= TREE_TYPE (type
);
1145 mode
= TYPE_MODE (type
);
1148 case E_DImode
: case E_V2DImode
:
1149 result
= s390_builtin_types
[BT_BV2DI
];
1151 case E_SImode
: case E_V4SImode
:
1152 result
= s390_builtin_types
[BT_BV4SI
];
1154 case E_HImode
: case E_V8HImode
:
1155 result
= s390_builtin_types
[BT_BV8HI
];
1157 case E_QImode
: case E_V16QImode
:
1158 result
= s390_builtin_types
[BT_BV16QI
];
1164 *no_add_attrs
= true; /* No need to hang on to the attribute. */
1167 *node
= lang_hooks
.types
.reconstruct_complex_type (*node
, result
);
1172 /* Check syntax of function decl attributes having a string type value. */
1175 s390_handle_string_attribute (tree
*node
, tree name ATTRIBUTE_UNUSED
,
1176 tree args ATTRIBUTE_UNUSED
,
1177 int flags ATTRIBUTE_UNUSED
,
1182 if (TREE_CODE (*node
) != FUNCTION_DECL
)
1184 warning (OPT_Wattributes
, "%qE attribute only applies to functions",
1186 *no_add_attrs
= true;
1189 cst
= TREE_VALUE (args
);
1191 if (TREE_CODE (cst
) != STRING_CST
)
1193 warning (OPT_Wattributes
,
1194 "%qE attribute requires a string constant argument",
1196 *no_add_attrs
= true;
1199 if (is_attribute_p ("indirect_branch", name
)
1200 || is_attribute_p ("indirect_branch_call", name
)
1201 || is_attribute_p ("function_return", name
)
1202 || is_attribute_p ("function_return_reg", name
)
1203 || is_attribute_p ("function_return_mem", name
))
1205 if (strcmp (TREE_STRING_POINTER (cst
), "keep") != 0
1206 && strcmp (TREE_STRING_POINTER (cst
), "thunk") != 0
1207 && strcmp (TREE_STRING_POINTER (cst
), "thunk-extern") != 0)
1209 warning (OPT_Wattributes
,
1210 "argument to %qE attribute is not "
1211 "(keep|thunk|thunk-extern)", name
);
1212 *no_add_attrs
= true;
1216 if (is_attribute_p ("indirect_branch_jump", name
)
1217 && strcmp (TREE_STRING_POINTER (cst
), "keep") != 0
1218 && strcmp (TREE_STRING_POINTER (cst
), "thunk") != 0
1219 && strcmp (TREE_STRING_POINTER (cst
), "thunk-inline") != 0
1220 && strcmp (TREE_STRING_POINTER (cst
), "thunk-extern") != 0)
1222 warning (OPT_Wattributes
,
1223 "argument to %qE attribute is not "
1224 "(keep|thunk|thunk-inline|thunk-extern)", name
);
1225 *no_add_attrs
= true;
1231 static const struct attribute_spec s390_attribute_table
[] = {
1232 { "hotpatch", 2, 2, true, false, false, false,
1233 s390_handle_hotpatch_attribute
, NULL
},
1234 { "s390_vector_bool", 0, 0, false, true, false, true,
1235 s390_handle_vectorbool_attribute
, NULL
},
1236 { "indirect_branch", 1, 1, true, false, false, false,
1237 s390_handle_string_attribute
, NULL
},
1238 { "indirect_branch_jump", 1, 1, true, false, false, false,
1239 s390_handle_string_attribute
, NULL
},
1240 { "indirect_branch_call", 1, 1, true, false, false, false,
1241 s390_handle_string_attribute
, NULL
},
1242 { "function_return", 1, 1, true, false, false, false,
1243 s390_handle_string_attribute
, NULL
},
1244 { "function_return_reg", 1, 1, true, false, false, false,
1245 s390_handle_string_attribute
, NULL
},
1246 { "function_return_mem", 1, 1, true, false, false, false,
1247 s390_handle_string_attribute
, NULL
},
1250 { NULL
, 0, 0, false, false, false, false, NULL
, NULL
}
1253 /* Return the alignment for LABEL. We default to the -falign-labels
1254 value except for the literal pool base label. */
1256 s390_label_align (rtx_insn
*label
)
1258 rtx_insn
*prev_insn
= prev_active_insn (label
);
1261 if (prev_insn
== NULL_RTX
)
1264 set
= single_set (prev_insn
);
1266 if (set
== NULL_RTX
)
1269 src
= SET_SRC (set
);
1271 /* Don't align literal pool base labels. */
1272 if (GET_CODE (src
) == UNSPEC
1273 && XINT (src
, 1) == UNSPEC_MAIN_BASE
)
1277 return align_labels
.levels
[0].log
;
1280 static GTY(()) rtx got_symbol
;
1282 /* Return the GOT table symbol. The symbol will be created when the
1283 function is invoked for the first time. */
1286 s390_got_symbol (void)
1290 got_symbol
= gen_rtx_SYMBOL_REF (Pmode
, "_GLOBAL_OFFSET_TABLE_");
1291 SYMBOL_REF_FLAGS (got_symbol
) = SYMBOL_FLAG_LOCAL
;
1297 static scalar_int_mode
1298 s390_libgcc_cmp_return_mode (void)
1300 return TARGET_64BIT
? DImode
: SImode
;
1303 static scalar_int_mode
1304 s390_libgcc_shift_count_mode (void)
1306 return TARGET_64BIT
? DImode
: SImode
;
1309 static scalar_int_mode
1310 s390_unwind_word_mode (void)
1312 return TARGET_64BIT
? DImode
: SImode
;
1315 /* Return true if the back end supports mode MODE. */
1317 s390_scalar_mode_supported_p (scalar_mode mode
)
1319 /* In contrast to the default implementation reject TImode constants on 31bit
1320 TARGET_ZARCH for ABI compliance. */
1321 if (!TARGET_64BIT
&& TARGET_ZARCH
&& mode
== TImode
)
1324 if (DECIMAL_FLOAT_MODE_P (mode
))
1325 return default_decimal_float_supported_p ();
1327 return default_scalar_mode_supported_p (mode
);
1330 /* Return true if the back end supports vector mode MODE. */
1332 s390_vector_mode_supported_p (machine_mode mode
)
1336 if (!VECTOR_MODE_P (mode
)
1338 || GET_MODE_SIZE (mode
) > 16)
1341 inner
= GET_MODE_INNER (mode
);
1359 /* Set the has_landing_pad_p flag in struct machine_function to VALUE. */
1362 s390_set_has_landing_pad_p (bool value
)
1364 cfun
->machine
->has_landing_pad_p
= value
;
1367 /* If two condition code modes are compatible, return a condition code
1368 mode which is compatible with both. Otherwise, return
1372 s390_cc_modes_compatible (machine_mode m1
, machine_mode m2
)
1380 if (m2
== CCUmode
|| m2
== CCTmode
|| m2
== CCZ1mode
1381 || m2
== CCSmode
|| m2
== CCSRmode
|| m2
== CCURmode
)
1402 /* Return true if SET either doesn't set the CC register, or else
1403 the source and destination have matching CC modes and that
1404 CC mode is at least as constrained as REQ_MODE. */
1407 s390_match_ccmode_set (rtx set
, machine_mode req_mode
)
1409 machine_mode set_mode
;
1411 gcc_assert (GET_CODE (set
) == SET
);
1413 /* These modes are supposed to be used only in CC consumer
1415 gcc_assert (req_mode
!= CCVIALLmode
&& req_mode
!= CCVIANYmode
1416 && req_mode
!= CCVFALLmode
&& req_mode
!= CCVFANYmode
);
1418 if (GET_CODE (SET_DEST (set
)) != REG
|| !CC_REGNO_P (REGNO (SET_DEST (set
))))
1421 set_mode
= GET_MODE (SET_DEST (set
));
1443 if (req_mode
!= set_mode
)
1448 if (req_mode
!= CCSmode
&& req_mode
!= CCUmode
&& req_mode
!= CCTmode
1449 && req_mode
!= CCSRmode
&& req_mode
!= CCURmode
1450 && req_mode
!= CCZ1mode
)
1456 if (req_mode
!= CCAmode
)
1464 return (GET_MODE (SET_SRC (set
)) == set_mode
);
1467 /* Return true if every SET in INSN that sets the CC register
1468 has source and destination with matching CC modes and that
1469 CC mode is at least as constrained as REQ_MODE.
1470 If REQ_MODE is VOIDmode, always return false. */
1473 s390_match_ccmode (rtx_insn
*insn
, machine_mode req_mode
)
1477 /* s390_tm_ccmode returns VOIDmode to indicate failure. */
1478 if (req_mode
== VOIDmode
)
1481 if (GET_CODE (PATTERN (insn
)) == SET
)
1482 return s390_match_ccmode_set (PATTERN (insn
), req_mode
);
1484 if (GET_CODE (PATTERN (insn
)) == PARALLEL
)
1485 for (i
= 0; i
< XVECLEN (PATTERN (insn
), 0); i
++)
1487 rtx set
= XVECEXP (PATTERN (insn
), 0, i
);
1488 if (GET_CODE (set
) == SET
)
1489 if (!s390_match_ccmode_set (set
, req_mode
))
1496 /* If a test-under-mask instruction can be used to implement
1497 (compare (and ... OP1) OP2), return the CC mode required
1498 to do that. Otherwise, return VOIDmode.
1499 MIXED is true if the instruction can distinguish between
1500 CC1 and CC2 for mixed selected bits (TMxx), it is false
1501 if the instruction cannot (TM). */
1504 s390_tm_ccmode (rtx op1
, rtx op2
, bool mixed
)
1508 /* ??? Fixme: should work on CONST_WIDE_INT as well. */
1509 if (GET_CODE (op1
) != CONST_INT
|| GET_CODE (op2
) != CONST_INT
)
1512 /* Selected bits all zero: CC0.
1513 e.g.: int a; if ((a & (16 + 128)) == 0) */
1514 if (INTVAL (op2
) == 0)
1517 /* Selected bits all one: CC3.
1518 e.g.: int a; if ((a & (16 + 128)) == 16 + 128) */
1519 if (INTVAL (op2
) == INTVAL (op1
))
1522 /* Exactly two bits selected, mixed zeroes and ones: CC1 or CC2. e.g.:
1524 if ((a & (16 + 128)) == 16) -> CCT1
1525 if ((a & (16 + 128)) == 128) -> CCT2 */
1528 bit1
= exact_log2 (INTVAL (op2
));
1529 bit0
= exact_log2 (INTVAL (op1
) ^ INTVAL (op2
));
1530 if (bit0
!= -1 && bit1
!= -1)
1531 return bit0
> bit1
? CCT1mode
: CCT2mode
;
1537 /* Given a comparison code OP (EQ, NE, etc.) and the operands
1538 OP0 and OP1 of a COMPARE, return the mode to be used for the
1542 s390_select_ccmode (enum rtx_code code
, rtx op0
, rtx op1
)
1548 if ((GET_CODE (op0
) == NEG
|| GET_CODE (op0
) == ABS
)
1549 && GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
)
1551 if (GET_CODE (op0
) == PLUS
&& GET_CODE (XEXP (op0
, 1)) == CONST_INT
1552 && CONST_OK_FOR_K (INTVAL (XEXP (op0
, 1))))
1554 if ((GET_CODE (op0
) == PLUS
|| GET_CODE (op0
) == MINUS
1555 || GET_CODE (op1
) == NEG
)
1556 && GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
)
1559 if (GET_CODE (op0
) == AND
)
1561 /* Check whether we can potentially do it via TM. */
1562 machine_mode ccmode
;
1563 ccmode
= s390_tm_ccmode (XEXP (op0
, 1), op1
, 1);
1564 if (ccmode
!= VOIDmode
)
1566 /* Relax CCTmode to CCZmode to allow fall-back to AND
1567 if that turns out to be beneficial. */
1568 return ccmode
== CCTmode
? CCZmode
: ccmode
;
1572 if (register_operand (op0
, HImode
)
1573 && GET_CODE (op1
) == CONST_INT
1574 && (INTVAL (op1
) == -1 || INTVAL (op1
) == 65535))
1576 if (register_operand (op0
, QImode
)
1577 && GET_CODE (op1
) == CONST_INT
1578 && (INTVAL (op1
) == -1 || INTVAL (op1
) == 255))
1587 /* The only overflow condition of NEG and ABS happens when
1588 -INT_MAX is used as parameter, which stays negative. So
1589 we have an overflow from a positive value to a negative.
1590 Using CCAP mode the resulting cc can be used for comparisons. */
1591 if ((GET_CODE (op0
) == NEG
|| GET_CODE (op0
) == ABS
)
1592 && GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
)
1595 /* If constants are involved in an add instruction it is possible to use
1596 the resulting cc for comparisons with zero. Knowing the sign of the
1597 constant the overflow behavior gets predictable. e.g.:
1598 int a, b; if ((b = a + c) > 0)
1599 with c as a constant value: c < 0 -> CCAN and c >= 0 -> CCAP */
1600 if (GET_CODE (op0
) == PLUS
&& GET_CODE (XEXP (op0
, 1)) == CONST_INT
1601 && (CONST_OK_FOR_K (INTVAL (XEXP (op0
, 1)))
1602 || (CONST_OK_FOR_CONSTRAINT_P (INTVAL (XEXP (op0
, 1)), 'O', "Os")
1603 /* Avoid INT32_MIN on 32 bit. */
1604 && (!TARGET_ZARCH
|| INTVAL (XEXP (op0
, 1)) != -0x7fffffff - 1))))
1606 if (INTVAL (XEXP((op0
), 1)) < 0)
1614 if (HONOR_NANS (op0
) || HONOR_NANS (op1
))
1625 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op0
) == ZERO_EXTEND
)
1626 && GET_CODE (op1
) != CONST_INT
)
1632 if (GET_CODE (op0
) == PLUS
1633 && GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
)
1636 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op0
) == ZERO_EXTEND
)
1637 && GET_CODE (op1
) != CONST_INT
)
1643 if (GET_CODE (op0
) == MINUS
1644 && GET_MODE_CLASS (GET_MODE (op0
)) == MODE_INT
)
1647 if ((GET_CODE (op0
) == SIGN_EXTEND
|| GET_CODE (op0
) == ZERO_EXTEND
)
1648 && GET_CODE (op1
) != CONST_INT
)
1657 /* Replace the comparison OP0 CODE OP1 by a semantically equivalent one
1658 that we can implement more efficiently. */
1661 s390_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
1662 bool op0_preserve_value
)
1664 if (op0_preserve_value
)
1667 /* Convert ZERO_EXTRACT back to AND to enable TM patterns. */
1668 if ((*code
== EQ
|| *code
== NE
)
1669 && *op1
== const0_rtx
1670 && GET_CODE (*op0
) == ZERO_EXTRACT
1671 && GET_CODE (XEXP (*op0
, 1)) == CONST_INT
1672 && GET_CODE (XEXP (*op0
, 2)) == CONST_INT
1673 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0
, 0))))
1675 rtx inner
= XEXP (*op0
, 0);
1676 HOST_WIDE_INT modesize
= GET_MODE_BITSIZE (GET_MODE (inner
));
1677 HOST_WIDE_INT len
= INTVAL (XEXP (*op0
, 1));
1678 HOST_WIDE_INT pos
= INTVAL (XEXP (*op0
, 2));
1680 if (len
> 0 && len
< modesize
1681 && pos
>= 0 && pos
+ len
<= modesize
1682 && modesize
<= HOST_BITS_PER_WIDE_INT
)
1684 unsigned HOST_WIDE_INT block
;
1685 block
= (HOST_WIDE_INT_1U
<< len
) - 1;
1686 block
<<= modesize
- pos
- len
;
1688 *op0
= gen_rtx_AND (GET_MODE (inner
), inner
,
1689 gen_int_mode (block
, GET_MODE (inner
)));
1693 /* Narrow AND of memory against immediate to enable TM. */
1694 if ((*code
== EQ
|| *code
== NE
)
1695 && *op1
== const0_rtx
1696 && GET_CODE (*op0
) == AND
1697 && GET_CODE (XEXP (*op0
, 1)) == CONST_INT
1698 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0
, 0))))
1700 rtx inner
= XEXP (*op0
, 0);
1701 rtx mask
= XEXP (*op0
, 1);
1703 /* Ignore paradoxical SUBREGs if all extra bits are masked out. */
1704 if (GET_CODE (inner
) == SUBREG
1705 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (inner
)))
1706 && (GET_MODE_SIZE (GET_MODE (inner
))
1707 >= GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner
))))
1709 & GET_MODE_MASK (GET_MODE (inner
))
1710 & ~GET_MODE_MASK (GET_MODE (SUBREG_REG (inner
))))
1712 inner
= SUBREG_REG (inner
);
1714 /* Do not change volatile MEMs. */
1715 if (MEM_P (inner
) && !MEM_VOLATILE_P (inner
))
1717 int part
= s390_single_part (XEXP (*op0
, 1),
1718 GET_MODE (inner
), QImode
, 0);
1721 mask
= gen_int_mode (s390_extract_part (mask
, QImode
, 0), QImode
);
1722 inner
= adjust_address_nv (inner
, QImode
, part
);
1723 *op0
= gen_rtx_AND (QImode
, inner
, mask
);
1728 /* Narrow comparisons against 0xffff to HImode if possible. */
1729 if ((*code
== EQ
|| *code
== NE
)
1730 && GET_CODE (*op1
) == CONST_INT
1731 && INTVAL (*op1
) == 0xffff
1732 && SCALAR_INT_MODE_P (GET_MODE (*op0
))
1733 && (nonzero_bits (*op0
, GET_MODE (*op0
))
1734 & ~HOST_WIDE_INT_UC (0xffff)) == 0)
1736 *op0
= gen_lowpart (HImode
, *op0
);
1740 /* Remove redundant UNSPEC_STRCMPCC_TO_INT conversions if possible. */
1741 if (GET_CODE (*op0
) == UNSPEC
1742 && XINT (*op0
, 1) == UNSPEC_STRCMPCC_TO_INT
1743 && XVECLEN (*op0
, 0) == 1
1744 && GET_MODE (XVECEXP (*op0
, 0, 0)) == CCUmode
1745 && GET_CODE (XVECEXP (*op0
, 0, 0)) == REG
1746 && REGNO (XVECEXP (*op0
, 0, 0)) == CC_REGNUM
1747 && *op1
== const0_rtx
)
1749 enum rtx_code new_code
= UNKNOWN
;
1752 case EQ
: new_code
= EQ
; break;
1753 case NE
: new_code
= NE
; break;
1754 case LT
: new_code
= GTU
; break;
1755 case GT
: new_code
= LTU
; break;
1756 case LE
: new_code
= GEU
; break;
1757 case GE
: new_code
= LEU
; break;
1761 if (new_code
!= UNKNOWN
)
1763 *op0
= XVECEXP (*op0
, 0, 0);
1768 /* Remove redundant UNSPEC_CC_TO_INT conversions if possible. */
1769 if (GET_CODE (*op0
) == UNSPEC
1770 && XINT (*op0
, 1) == UNSPEC_CC_TO_INT
1771 && XVECLEN (*op0
, 0) == 1
1772 && GET_CODE (XVECEXP (*op0
, 0, 0)) == REG
1773 && REGNO (XVECEXP (*op0
, 0, 0)) == CC_REGNUM
1774 && CONST_INT_P (*op1
))
1776 enum rtx_code new_code
= UNKNOWN
;
1777 switch (GET_MODE (XVECEXP (*op0
, 0, 0)))
1783 case EQ
: new_code
= EQ
; break;
1784 case NE
: new_code
= NE
; break;
1791 if (new_code
!= UNKNOWN
)
1793 /* For CCRAWmode put the required cc mask into the second
1795 if (GET_MODE (XVECEXP (*op0
, 0, 0)) == CCRAWmode
1796 && INTVAL (*op1
) >= 0 && INTVAL (*op1
) <= 3)
1797 *op1
= gen_rtx_CONST_INT (VOIDmode
, 1 << (3 - INTVAL (*op1
)));
1798 *op0
= XVECEXP (*op0
, 0, 0);
1803 /* Simplify cascaded EQ, NE with const0_rtx. */
1804 if ((*code
== NE
|| *code
== EQ
)
1805 && (GET_CODE (*op0
) == EQ
|| GET_CODE (*op0
) == NE
)
1806 && GET_MODE (*op0
) == SImode
1807 && GET_MODE (XEXP (*op0
, 0)) == CCZ1mode
1808 && REG_P (XEXP (*op0
, 0))
1809 && XEXP (*op0
, 1) == const0_rtx
1810 && *op1
== const0_rtx
)
1812 if ((*code
== EQ
&& GET_CODE (*op0
) == NE
)
1813 || (*code
== NE
&& GET_CODE (*op0
) == EQ
))
1817 *op0
= XEXP (*op0
, 0);
1820 /* Prefer register over memory as first operand. */
1821 if (MEM_P (*op0
) && REG_P (*op1
))
1823 rtx tem
= *op0
; *op0
= *op1
; *op1
= tem
;
1824 *code
= (int)swap_condition ((enum rtx_code
)*code
);
1827 /* A comparison result is compared against zero. Replace it with
1828 the (perhaps inverted) original comparison.
1829 This probably should be done by simplify_relational_operation. */
1830 if ((*code
== EQ
|| *code
== NE
)
1831 && *op1
== const0_rtx
1832 && COMPARISON_P (*op0
)
1833 && CC_REG_P (XEXP (*op0
, 0)))
1835 enum rtx_code new_code
;
1838 new_code
= reversed_comparison_code_parts (GET_CODE (*op0
),
1840 XEXP (*op0
, 1), NULL
);
1842 new_code
= GET_CODE (*op0
);
1844 if (new_code
!= UNKNOWN
)
1847 *op1
= XEXP (*op0
, 1);
1848 *op0
= XEXP (*op0
, 0);
1852 /* ~a==b -> ~(a^b)==0 ~a!=b -> ~(a^b)!=0 */
1854 && (*code
== EQ
|| *code
== NE
)
1855 && (GET_MODE (*op0
) == DImode
|| GET_MODE (*op0
) == SImode
)
1856 && GET_CODE (*op0
) == NOT
)
1858 machine_mode mode
= GET_MODE (*op0
);
1859 *op0
= gen_rtx_XOR (mode
, XEXP (*op0
, 0), *op1
);
1860 *op0
= gen_rtx_NOT (mode
, *op0
);
1864 /* a&b == -1 -> ~a|~b == 0 a|b == -1 -> ~a&~b == 0 */
1866 && (*code
== EQ
|| *code
== NE
)
1867 && (GET_CODE (*op0
) == AND
|| GET_CODE (*op0
) == IOR
)
1868 && (GET_MODE (*op0
) == DImode
|| GET_MODE (*op0
) == SImode
)
1869 && CONST_INT_P (*op1
)
1870 && *op1
== constm1_rtx
)
1872 machine_mode mode
= GET_MODE (*op0
);
1873 rtx op00
= gen_rtx_NOT (mode
, XEXP (*op0
, 0));
1874 rtx op01
= gen_rtx_NOT (mode
, XEXP (*op0
, 1));
1876 if (GET_CODE (*op0
) == AND
)
1877 *op0
= gen_rtx_IOR (mode
, op00
, op01
);
1879 *op0
= gen_rtx_AND (mode
, op00
, op01
);
1886 /* Emit a compare instruction suitable to implement the comparison
1887 OP0 CODE OP1. Return the correct condition RTL to be placed in
1888 the IF_THEN_ELSE of the conditional branch testing the result. */
1891 s390_emit_compare (enum rtx_code code
, rtx op0
, rtx op1
)
1893 machine_mode mode
= s390_select_ccmode (code
, op0
, op1
);
1896 /* Force OP1 into register in order to satisfy VXE TFmode patterns. */
1897 if (TARGET_VXE
&& GET_MODE (op1
) == TFmode
)
1898 op1
= force_reg (TFmode
, op1
);
1900 if (GET_MODE_CLASS (GET_MODE (op0
)) == MODE_CC
)
1902 /* Do not output a redundant compare instruction if a
1903 compare_and_swap pattern already computed the result and the
1904 machine modes are compatible. */
1905 gcc_assert (s390_cc_modes_compatible (GET_MODE (op0
), mode
)
1911 cc
= gen_rtx_REG (mode
, CC_REGNUM
);
1912 emit_insn (gen_rtx_SET (cc
, gen_rtx_COMPARE (mode
, op0
, op1
)));
1915 return gen_rtx_fmt_ee (code
, VOIDmode
, cc
, const0_rtx
);
1918 /* If MEM is not a legitimate compare-and-swap memory operand, return a new
1919 MEM, whose address is a pseudo containing the original MEM's address. */
1922 s390_legitimize_cs_operand (rtx mem
)
1926 if (!contains_symbol_ref_p (mem
))
1928 tmp
= gen_reg_rtx (Pmode
);
1929 emit_move_insn (tmp
, copy_rtx (XEXP (mem
, 0)));
1930 return change_address (mem
, VOIDmode
, tmp
);
1933 /* Emit a SImode compare and swap instruction setting MEM to NEW_RTX if OLD
1935 Return the correct condition RTL to be placed in the IF_THEN_ELSE of the
1936 conditional branch testing the result. */
1939 s390_emit_compare_and_swap (enum rtx_code code
, rtx old
, rtx mem
,
1940 rtx cmp
, rtx new_rtx
, machine_mode ccmode
)
1944 mem
= s390_legitimize_cs_operand (mem
);
1945 cc
= gen_rtx_REG (ccmode
, CC_REGNUM
);
1946 switch (GET_MODE (mem
))
1949 emit_insn (gen_atomic_compare_and_swapsi_internal (old
, mem
, cmp
,
1953 emit_insn (gen_atomic_compare_and_swapdi_internal (old
, mem
, cmp
,
1957 emit_insn (gen_atomic_compare_and_swapti_internal (old
, mem
, cmp
,
1965 return s390_emit_compare (code
, cc
, const0_rtx
);
1968 /* Emit a jump instruction to TARGET and return it. If COND is
1969 NULL_RTX, emit an unconditional jump, else a conditional jump under
1973 s390_emit_jump (rtx target
, rtx cond
)
1977 target
= gen_rtx_LABEL_REF (VOIDmode
, target
);
1979 target
= gen_rtx_IF_THEN_ELSE (VOIDmode
, cond
, target
, pc_rtx
);
1981 insn
= gen_rtx_SET (pc_rtx
, target
);
1982 return emit_jump_insn (insn
);
1985 /* Return branch condition mask to implement a branch
1986 specified by CODE. Return -1 for invalid comparisons. */
1989 s390_branch_condition_mask (rtx code
)
1991 const int CC0
= 1 << 3;
1992 const int CC1
= 1 << 2;
1993 const int CC2
= 1 << 1;
1994 const int CC3
= 1 << 0;
1996 gcc_assert (GET_CODE (XEXP (code
, 0)) == REG
);
1997 gcc_assert (REGNO (XEXP (code
, 0)) == CC_REGNUM
);
1998 gcc_assert (XEXP (code
, 1) == const0_rtx
1999 || (GET_MODE (XEXP (code
, 0)) == CCRAWmode
2000 && CONST_INT_P (XEXP (code
, 1))));
2003 switch (GET_MODE (XEXP (code
, 0)))
2007 switch (GET_CODE (code
))
2009 case EQ
: return CC0
;
2010 case NE
: return CC1
| CC2
| CC3
;
2016 switch (GET_CODE (code
))
2018 case EQ
: return CC1
;
2019 case NE
: return CC0
| CC2
| CC3
;
2025 switch (GET_CODE (code
))
2027 case EQ
: return CC2
;
2028 case NE
: return CC0
| CC1
| CC3
;
2034 switch (GET_CODE (code
))
2036 case EQ
: return CC3
;
2037 case NE
: return CC0
| CC1
| CC2
;
2043 switch (GET_CODE (code
))
2045 case EQ
: return CC0
| CC2
;
2046 case NE
: return CC1
| CC3
;
2052 switch (GET_CODE (code
))
2054 case LTU
: return CC2
| CC3
; /* carry */
2055 case GEU
: return CC0
| CC1
; /* no carry */
2061 switch (GET_CODE (code
))
2063 case GTU
: return CC0
| CC1
; /* borrow */
2064 case LEU
: return CC2
| CC3
; /* no borrow */
2070 switch (GET_CODE (code
))
2072 case EQ
: return CC0
| CC2
;
2073 case NE
: return CC1
| CC3
;
2074 case LTU
: return CC1
;
2075 case GTU
: return CC3
;
2076 case LEU
: return CC1
| CC2
;
2077 case GEU
: return CC2
| CC3
;
2082 switch (GET_CODE (code
))
2084 case EQ
: return CC0
;
2085 case NE
: return CC1
| CC2
| CC3
;
2086 case LTU
: return CC1
;
2087 case GTU
: return CC2
;
2088 case LEU
: return CC0
| CC1
;
2089 case GEU
: return CC0
| CC2
;
2095 switch (GET_CODE (code
))
2097 case EQ
: return CC0
;
2098 case NE
: return CC2
| CC1
| CC3
;
2099 case LTU
: return CC2
;
2100 case GTU
: return CC1
;
2101 case LEU
: return CC0
| CC2
;
2102 case GEU
: return CC0
| CC1
;
2108 switch (GET_CODE (code
))
2110 case EQ
: return CC0
;
2111 case NE
: return CC1
| CC2
| CC3
;
2112 case LT
: return CC1
| CC3
;
2113 case GT
: return CC2
;
2114 case LE
: return CC0
| CC1
| CC3
;
2115 case GE
: return CC0
| CC2
;
2121 switch (GET_CODE (code
))
2123 case EQ
: return CC0
;
2124 case NE
: return CC1
| CC2
| CC3
;
2125 case LT
: return CC1
;
2126 case GT
: return CC2
| CC3
;
2127 case LE
: return CC0
| CC1
;
2128 case GE
: return CC0
| CC2
| CC3
;
2134 switch (GET_CODE (code
))
2136 case EQ
: return CC0
| CC1
| CC2
;
2137 case NE
: return CC3
;
2144 switch (GET_CODE (code
))
2146 case EQ
: return CC0
;
2147 case NE
: return CC1
| CC2
| CC3
;
2148 case LT
: return CC1
;
2149 case GT
: return CC2
;
2150 case LE
: return CC0
| CC1
;
2151 case GE
: return CC0
| CC2
;
2152 case UNORDERED
: return CC3
;
2153 case ORDERED
: return CC0
| CC1
| CC2
;
2154 case UNEQ
: return CC0
| CC3
;
2155 case UNLT
: return CC1
| CC3
;
2156 case UNGT
: return CC2
| CC3
;
2157 case UNLE
: return CC0
| CC1
| CC3
;
2158 case UNGE
: return CC0
| CC2
| CC3
;
2159 case LTGT
: return CC1
| CC2
;
2165 switch (GET_CODE (code
))
2167 case EQ
: return CC0
;
2168 case NE
: return CC2
| CC1
| CC3
;
2169 case LT
: return CC2
;
2170 case GT
: return CC1
;
2171 case LE
: return CC0
| CC2
;
2172 case GE
: return CC0
| CC1
;
2173 case UNORDERED
: return CC3
;
2174 case ORDERED
: return CC0
| CC2
| CC1
;
2175 case UNEQ
: return CC0
| CC3
;
2176 case UNLT
: return CC2
| CC3
;
2177 case UNGT
: return CC1
| CC3
;
2178 case UNLE
: return CC0
| CC2
| CC3
;
2179 case UNGE
: return CC0
| CC1
| CC3
;
2180 case LTGT
: return CC2
| CC1
;
2185 /* Vector comparison modes. */
2186 /* CC2 will never be set. It however is part of the negated
2189 switch (GET_CODE (code
))
2194 case GE
: return CC0
;
2195 /* The inverted modes are in fact *any* modes. */
2199 case LT
: return CC3
| CC1
| CC2
;
2204 switch (GET_CODE (code
))
2209 case GE
: return CC0
| CC1
;
2210 /* The inverted modes are in fact *all* modes. */
2214 case LT
: return CC3
| CC2
;
2218 switch (GET_CODE (code
))
2222 case GE
: return CC0
;
2223 /* The inverted modes are in fact *any* modes. */
2226 case UNLT
: return CC3
| CC1
| CC2
;
2231 switch (GET_CODE (code
))
2235 case GE
: return CC0
| CC1
;
2236 /* The inverted modes are in fact *all* modes. */
2239 case UNLT
: return CC3
| CC2
;
2244 switch (GET_CODE (code
))
2247 return INTVAL (XEXP (code
, 1));
2249 return (INTVAL (XEXP (code
, 1))) ^ 0xf;
2260 /* Return branch condition mask to implement a compare and branch
2261 specified by CODE. Return -1 for invalid comparisons. */
2264 s390_compare_and_branch_condition_mask (rtx code
)
2266 const int CC0
= 1 << 3;
2267 const int CC1
= 1 << 2;
2268 const int CC2
= 1 << 1;
2270 switch (GET_CODE (code
))
2294 /* If INV is false, return assembler mnemonic string to implement
2295 a branch specified by CODE. If INV is true, return mnemonic
2296 for the corresponding inverted branch. */
2299 s390_branch_condition_mnemonic (rtx code
, int inv
)
2303 static const char *const mnemonic
[16] =
2305 NULL
, "o", "h", "nle",
2306 "l", "nhe", "lh", "ne",
2307 "e", "nlh", "he", "nl",
2308 "le", "nh", "no", NULL
2311 if (GET_CODE (XEXP (code
, 0)) == REG
2312 && REGNO (XEXP (code
, 0)) == CC_REGNUM
2313 && (XEXP (code
, 1) == const0_rtx
2314 || (GET_MODE (XEXP (code
, 0)) == CCRAWmode
2315 && CONST_INT_P (XEXP (code
, 1)))))
2316 mask
= s390_branch_condition_mask (code
);
2318 mask
= s390_compare_and_branch_condition_mask (code
);
2320 gcc_assert (mask
>= 0);
2325 gcc_assert (mask
>= 1 && mask
<= 14);
2327 return mnemonic
[mask
];
2330 /* Return the part of op which has a value different from def.
2331 The size of the part is determined by mode.
2332 Use this function only if you already know that op really
2333 contains such a part. */
2335 unsigned HOST_WIDE_INT
2336 s390_extract_part (rtx op
, machine_mode mode
, int def
)
2338 unsigned HOST_WIDE_INT value
= 0;
2339 int max_parts
= HOST_BITS_PER_WIDE_INT
/ GET_MODE_BITSIZE (mode
);
2340 int part_bits
= GET_MODE_BITSIZE (mode
);
2341 unsigned HOST_WIDE_INT part_mask
= (HOST_WIDE_INT_1U
<< part_bits
) - 1;
2344 for (i
= 0; i
< max_parts
; i
++)
2347 value
= UINTVAL (op
);
2349 value
>>= part_bits
;
2351 if ((value
& part_mask
) != (def
& part_mask
))
2352 return value
& part_mask
;
2358 /* If OP is an integer constant of mode MODE with exactly one
2359 part of mode PART_MODE unequal to DEF, return the number of that
2360 part. Otherwise, return -1. */
2363 s390_single_part (rtx op
,
2365 machine_mode part_mode
,
2368 unsigned HOST_WIDE_INT value
= 0;
2369 int n_parts
= GET_MODE_SIZE (mode
) / GET_MODE_SIZE (part_mode
);
2370 unsigned HOST_WIDE_INT part_mask
2371 = (HOST_WIDE_INT_1U
<< GET_MODE_BITSIZE (part_mode
)) - 1;
2374 if (GET_CODE (op
) != CONST_INT
)
2377 for (i
= 0; i
< n_parts
; i
++)
2380 value
= UINTVAL (op
);
2382 value
>>= GET_MODE_BITSIZE (part_mode
);
2384 if ((value
& part_mask
) != (def
& part_mask
))
2392 return part
== -1 ? -1 : n_parts
- 1 - part
;
2395 /* Return true if IN contains a contiguous bitfield in the lower SIZE
2396 bits and no other bits are set in (the lower SIZE bits of) IN.
2398 PSTART and PEND can be used to obtain the start and end
2399 position (inclusive) of the bitfield relative to 64
2400 bits. *PSTART / *PEND gives the position of the first/last bit
2401 of the bitfield counting from the highest order bit starting
2405 s390_contiguous_bitmask_nowrap_p (unsigned HOST_WIDE_INT in
, int size
,
2406 int *pstart
, int *pend
)
2410 int lowbit
= HOST_BITS_PER_WIDE_INT
- 1;
2411 int highbit
= HOST_BITS_PER_WIDE_INT
- size
;
2412 unsigned HOST_WIDE_INT bitmask
= HOST_WIDE_INT_1U
;
2414 gcc_assert (!!pstart
== !!pend
);
2415 for (start
= lowbit
; start
>= highbit
; bitmask
<<= 1, start
--)
2418 /* Look for the rightmost bit of a contiguous range of ones. */
2425 /* Look for the firt zero bit after the range of ones. */
2426 if (! (bitmask
& in
))
2430 /* We're one past the last one-bit. */
2434 /* No one bits found. */
2437 if (start
> highbit
)
2439 unsigned HOST_WIDE_INT mask
;
2441 /* Calculate a mask for all bits beyond the contiguous bits. */
2442 mask
= ((~HOST_WIDE_INT_0U
>> highbit
)
2443 & (~HOST_WIDE_INT_0U
<< (lowbit
- start
+ 1)));
2445 /* There are more bits set beyond the first range of one bits. */
2458 /* Same as s390_contiguous_bitmask_nowrap_p but also returns true
2459 if ~IN contains a contiguous bitfield. In that case, *END is <
2462 If WRAP_P is true, a bitmask that wraps around is also tested.
2463 When a wraparoud occurs *START is greater than *END (in
2464 non-null pointers), and the uppermost (64 - SIZE) bits are thus
2465 part of the range. If WRAP_P is false, no wraparound is
2469 s390_contiguous_bitmask_p (unsigned HOST_WIDE_INT in
, bool wrap_p
,
2470 int size
, int *start
, int *end
)
2472 int bs
= HOST_BITS_PER_WIDE_INT
;
2475 gcc_assert (!!start
== !!end
);
2476 if ((in
& ((~HOST_WIDE_INT_0U
) >> (bs
- size
))) == 0)
2477 /* This cannot be expressed as a contiguous bitmask. Exit early because
2478 the second call of s390_contiguous_bitmask_nowrap_p would accept this as
2481 b
= s390_contiguous_bitmask_nowrap_p (in
, size
, start
, end
);
2486 b
= s390_contiguous_bitmask_nowrap_p (~in
, size
, start
, end
);
2492 gcc_assert (s
>= 1);
2493 *start
= ((e
+ 1) & (bs
- 1));
2494 *end
= ((s
- 1 + bs
) & (bs
- 1));
2500 /* Return true if OP contains the same contiguous bitfield in *all*
2501 its elements. START and END can be used to obtain the start and
2502 end position of the bitfield.
2504 START/STOP give the position of the first/last bit of the bitfield
2505 counting from the lowest order bit starting with zero. In order to
2506 use these values for S/390 instructions this has to be converted to
2507 "bits big endian" style. */
2510 s390_contiguous_bitmask_vector_p (rtx op
, int *start
, int *end
)
2512 unsigned HOST_WIDE_INT mask
;
2517 /* Handle floats by bitcasting them to ints. */
2518 op
= gen_lowpart (related_int_vector_mode (GET_MODE (op
)).require (), op
);
2520 gcc_assert (!!start
== !!end
);
2521 if (!const_vec_duplicate_p (op
, &elt
)
2522 || !CONST_INT_P (elt
))
2525 size
= GET_MODE_UNIT_BITSIZE (GET_MODE (op
));
2527 /* We cannot deal with V1TI/V1TF. This would require a vgmq. */
2531 mask
= UINTVAL (elt
);
2533 b
= s390_contiguous_bitmask_p (mask
, true, size
, start
, end
);
2538 *start
-= (HOST_BITS_PER_WIDE_INT
- size
);
2539 *end
-= (HOST_BITS_PER_WIDE_INT
- size
);
2547 /* Return true if C consists only of byte chunks being either 0 or
2548 0xff. If MASK is !=NULL a byte mask is generated which is
2549 appropriate for the vector generate byte mask instruction. */
2552 s390_bytemask_vector_p (rtx op
, unsigned *mask
)
2555 unsigned tmp_mask
= 0;
2556 int nunit
, unit_size
;
2558 if (!VECTOR_MODE_P (GET_MODE (op
))
2559 || GET_CODE (op
) != CONST_VECTOR
2560 || !CONST_INT_P (XVECEXP (op
, 0, 0)))
2563 nunit
= GET_MODE_NUNITS (GET_MODE (op
));
2564 unit_size
= GET_MODE_UNIT_SIZE (GET_MODE (op
));
2566 for (i
= 0; i
< nunit
; i
++)
2568 unsigned HOST_WIDE_INT c
;
2571 if (!CONST_INT_P (XVECEXP (op
, 0, i
)))
2574 c
= UINTVAL (XVECEXP (op
, 0, i
));
2575 for (j
= 0; j
< unit_size
; j
++)
2577 if ((c
& 0xff) != 0 && (c
& 0xff) != 0xff)
2579 tmp_mask
|= (c
& 1) << ((nunit
- 1 - i
) * unit_size
+ j
);
2580 c
= c
>> BITS_PER_UNIT
;
2590 /* Check whether a rotate of ROTL followed by an AND of CONTIG is
2591 equivalent to a shift followed by the AND. In particular, CONTIG
2592 should not overlap the (rotated) bit 0/bit 63 gap. Negative values
2593 for ROTL indicate a rotate to the right. */
2596 s390_extzv_shift_ok (int bitsize
, int rotl
, unsigned HOST_WIDE_INT contig
)
2601 ok
= s390_contiguous_bitmask_nowrap_p (contig
, bitsize
, &start
, &end
);
2605 return (64 - end
>= rotl
);
2608 /* Translate "- rotate right" in BITSIZE mode to "rotate left" in
2610 rotl
= -rotl
+ (64 - bitsize
);
2611 return (start
>= rotl
);
2615 /* Check whether we can (and want to) split a double-word
2616 move in mode MODE from SRC to DST into two single-word
2617 moves, moving the subword FIRST_SUBWORD first. */
2620 s390_split_ok_p (rtx dst
, rtx src
, machine_mode mode
, int first_subword
)
2622 /* Floating point and vector registers cannot be split. */
2623 if (FP_REG_P (src
) || FP_REG_P (dst
) || VECTOR_REG_P (src
) || VECTOR_REG_P (dst
))
2626 /* Non-offsettable memory references cannot be split. */
2627 if ((GET_CODE (src
) == MEM
&& !offsettable_memref_p (src
))
2628 || (GET_CODE (dst
) == MEM
&& !offsettable_memref_p (dst
)))
2631 /* Moving the first subword must not clobber a register
2632 needed to move the second subword. */
2633 if (register_operand (dst
, mode
))
2635 rtx subreg
= operand_subword (dst
, first_subword
, 0, mode
);
2636 if (reg_overlap_mentioned_p (subreg
, src
))
2643 /* Return true if it can be proven that [MEM1, MEM1 + SIZE]
2644 and [MEM2, MEM2 + SIZE] do overlap and false
2648 s390_overlap_p (rtx mem1
, rtx mem2
, HOST_WIDE_INT size
)
2650 rtx addr1
, addr2
, addr_delta
;
2651 HOST_WIDE_INT delta
;
2653 if (GET_CODE (mem1
) != MEM
|| GET_CODE (mem2
) != MEM
)
2659 addr1
= XEXP (mem1
, 0);
2660 addr2
= XEXP (mem2
, 0);
2662 addr_delta
= simplify_binary_operation (MINUS
, Pmode
, addr2
, addr1
);
2664 /* This overlapping check is used by peepholes merging memory block operations.
2665 Overlapping operations would otherwise be recognized by the S/390 hardware
2666 and would fall back to a slower implementation. Allowing overlapping
2667 operations would lead to slow code but not to wrong code. Therefore we are
2668 somewhat optimistic if we cannot prove that the memory blocks are
2670 That's why we return false here although this may accept operations on
2671 overlapping memory areas. */
2672 if (!addr_delta
|| GET_CODE (addr_delta
) != CONST_INT
)
2675 delta
= INTVAL (addr_delta
);
2678 || (delta
> 0 && delta
< size
)
2679 || (delta
< 0 && -delta
< size
))
2685 /* Check whether the address of memory reference MEM2 equals exactly
2686 the address of memory reference MEM1 plus DELTA. Return true if
2687 we can prove this to be the case, false otherwise. */
2690 s390_offset_p (rtx mem1
, rtx mem2
, rtx delta
)
2692 rtx addr1
, addr2
, addr_delta
;
2694 if (GET_CODE (mem1
) != MEM
|| GET_CODE (mem2
) != MEM
)
2697 addr1
= XEXP (mem1
, 0);
2698 addr2
= XEXP (mem2
, 0);
2700 addr_delta
= simplify_binary_operation (MINUS
, Pmode
, addr2
, addr1
);
2701 if (!addr_delta
|| !rtx_equal_p (addr_delta
, delta
))
2707 /* Expand logical operator CODE in mode MODE with operands OPERANDS. */
2710 s390_expand_logical_operator (enum rtx_code code
, machine_mode mode
,
2713 machine_mode wmode
= mode
;
2714 rtx dst
= operands
[0];
2715 rtx src1
= operands
[1];
2716 rtx src2
= operands
[2];
2719 /* If we cannot handle the operation directly, use a temp register. */
2720 if (!s390_logical_operator_ok_p (operands
))
2721 dst
= gen_reg_rtx (mode
);
2723 /* QImode and HImode patterns make sense only if we have a destination
2724 in memory. Otherwise perform the operation in SImode. */
2725 if ((mode
== QImode
|| mode
== HImode
) && GET_CODE (dst
) != MEM
)
2728 /* Widen operands if required. */
2731 if (GET_CODE (dst
) == SUBREG
2732 && (tem
= simplify_subreg (wmode
, dst
, mode
, 0)) != 0)
2734 else if (REG_P (dst
))
2735 dst
= gen_rtx_SUBREG (wmode
, dst
, 0);
2737 dst
= gen_reg_rtx (wmode
);
2739 if (GET_CODE (src1
) == SUBREG
2740 && (tem
= simplify_subreg (wmode
, src1
, mode
, 0)) != 0)
2742 else if (GET_MODE (src1
) != VOIDmode
)
2743 src1
= gen_rtx_SUBREG (wmode
, force_reg (mode
, src1
), 0);
2745 if (GET_CODE (src2
) == SUBREG
2746 && (tem
= simplify_subreg (wmode
, src2
, mode
, 0)) != 0)
2748 else if (GET_MODE (src2
) != VOIDmode
)
2749 src2
= gen_rtx_SUBREG (wmode
, force_reg (mode
, src2
), 0);
2752 /* Emit the instruction. */
2753 op
= gen_rtx_SET (dst
, gen_rtx_fmt_ee (code
, wmode
, src1
, src2
));
2754 clob
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, CC_REGNUM
));
2755 emit_insn (gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clob
)));
2757 /* Fix up the destination if needed. */
2758 if (dst
!= operands
[0])
2759 emit_move_insn (operands
[0], gen_lowpart (mode
, dst
));
2762 /* Check whether OPERANDS are OK for a logical operation (AND, IOR, XOR). */
2765 s390_logical_operator_ok_p (rtx
*operands
)
2767 /* If the destination operand is in memory, it needs to coincide
2768 with one of the source operands. After reload, it has to be
2769 the first source operand. */
2770 if (GET_CODE (operands
[0]) == MEM
)
2771 return rtx_equal_p (operands
[0], operands
[1])
2772 || (!reload_completed
&& rtx_equal_p (operands
[0], operands
[2]));
2777 /* Narrow logical operation CODE of memory operand MEMOP with immediate
2778 operand IMMOP to switch from SS to SI type instructions. */
2781 s390_narrow_logical_operator (enum rtx_code code
, rtx
*memop
, rtx
*immop
)
2783 int def
= code
== AND
? -1 : 0;
2787 gcc_assert (GET_CODE (*memop
) == MEM
);
2788 gcc_assert (!MEM_VOLATILE_P (*memop
));
2790 mask
= s390_extract_part (*immop
, QImode
, def
);
2791 part
= s390_single_part (*immop
, GET_MODE (*memop
), QImode
, def
);
2792 gcc_assert (part
>= 0);
2794 *memop
= adjust_address (*memop
, QImode
, part
);
2795 *immop
= gen_int_mode (mask
, QImode
);
2799 /* How to allocate a 'struct machine_function'. */
2801 static struct machine_function
*
2802 s390_init_machine_status (void)
2804 return ggc_cleared_alloc
<machine_function
> ();
2807 /* Map for smallest class containing reg regno. */
2809 const enum reg_class regclass_map
[FIRST_PSEUDO_REGISTER
] =
2810 { GENERAL_REGS
, ADDR_REGS
, ADDR_REGS
, ADDR_REGS
, /* 0 */
2811 ADDR_REGS
, ADDR_REGS
, ADDR_REGS
, ADDR_REGS
, /* 4 */
2812 ADDR_REGS
, ADDR_REGS
, ADDR_REGS
, ADDR_REGS
, /* 8 */
2813 ADDR_REGS
, ADDR_REGS
, ADDR_REGS
, ADDR_REGS
, /* 12 */
2814 FP_REGS
, FP_REGS
, FP_REGS
, FP_REGS
, /* 16 */
2815 FP_REGS
, FP_REGS
, FP_REGS
, FP_REGS
, /* 20 */
2816 FP_REGS
, FP_REGS
, FP_REGS
, FP_REGS
, /* 24 */
2817 FP_REGS
, FP_REGS
, FP_REGS
, FP_REGS
, /* 28 */
2818 ADDR_REGS
, CC_REGS
, ADDR_REGS
, ADDR_REGS
, /* 32 */
2819 ACCESS_REGS
, ACCESS_REGS
, VEC_REGS
, VEC_REGS
, /* 36 */
2820 VEC_REGS
, VEC_REGS
, VEC_REGS
, VEC_REGS
, /* 40 */
2821 VEC_REGS
, VEC_REGS
, VEC_REGS
, VEC_REGS
, /* 44 */
2822 VEC_REGS
, VEC_REGS
, VEC_REGS
, VEC_REGS
, /* 48 */
2823 VEC_REGS
, VEC_REGS
/* 52 */
2826 /* Return attribute type of insn. */
2828 static enum attr_type
2829 s390_safe_attr_type (rtx_insn
*insn
)
2831 if (recog_memoized (insn
) >= 0)
2832 return get_attr_type (insn
);
2837 /* Return attribute relative_long of insn. */
2840 s390_safe_relative_long_p (rtx_insn
*insn
)
2842 if (recog_memoized (insn
) >= 0)
2843 return get_attr_relative_long (insn
) == RELATIVE_LONG_YES
;
2848 /* Return true if DISP is a valid short displacement. */
2851 s390_short_displacement (rtx disp
)
2853 /* No displacement is OK. */
2857 /* Without the long displacement facility we don't need to
2858 distingiush between long and short displacement. */
2859 if (!TARGET_LONG_DISPLACEMENT
)
2862 /* Integer displacement in range. */
2863 if (GET_CODE (disp
) == CONST_INT
)
2864 return INTVAL (disp
) >= 0 && INTVAL (disp
) < 4096;
2866 /* GOT offset is not OK, the GOT can be large. */
2867 if (GET_CODE (disp
) == CONST
2868 && GET_CODE (XEXP (disp
, 0)) == UNSPEC
2869 && (XINT (XEXP (disp
, 0), 1) == UNSPEC_GOT
2870 || XINT (XEXP (disp
, 0), 1) == UNSPEC_GOTNTPOFF
))
2873 /* All other symbolic constants are literal pool references,
2874 which are OK as the literal pool must be small. */
2875 if (GET_CODE (disp
) == CONST
)
2881 /* Attempts to split `ref', which should be UNSPEC_LTREF, into (base + `disp').
2882 If successful, also determines the
2883 following characteristics of `ref': `is_ptr' - whether it can be an
2884 LA argument, `is_base_ptr' - whether the resulting base is a well-known
2885 base register (stack/frame pointer, etc), `is_pool_ptr` - whether it is
2886 considered a literal pool pointer for purposes of avoiding two different
2887 literal pool pointers per insn during or after reload (`B' constraint). */
2889 s390_decompose_constant_pool_ref (rtx
*ref
, rtx
*disp
, bool *is_ptr
,
2890 bool *is_base_ptr
, bool *is_pool_ptr
)
2895 if (GET_CODE (*ref
) == UNSPEC
)
2896 switch (XINT (*ref
, 1))
2900 *disp
= gen_rtx_UNSPEC (Pmode
,
2901 gen_rtvec (1, XVECEXP (*ref
, 0, 0)),
2902 UNSPEC_LTREL_OFFSET
);
2906 *ref
= XVECEXP (*ref
, 0, 1);
2913 if (!REG_P (*ref
) || GET_MODE (*ref
) != Pmode
)
2916 if (REGNO (*ref
) == STACK_POINTER_REGNUM
2917 || REGNO (*ref
) == FRAME_POINTER_REGNUM
2918 || ((reload_completed
|| reload_in_progress
)
2919 && frame_pointer_needed
2920 && REGNO (*ref
) == HARD_FRAME_POINTER_REGNUM
)
2921 || REGNO (*ref
) == ARG_POINTER_REGNUM
2923 && REGNO (*ref
) == PIC_OFFSET_TABLE_REGNUM
))
2924 *is_ptr
= *is_base_ptr
= true;
2926 if ((reload_completed
|| reload_in_progress
)
2927 && *ref
== cfun
->machine
->base_reg
)
2928 *is_ptr
= *is_base_ptr
= *is_pool_ptr
= true;
2933 /* Decompose a RTL expression ADDR for a memory address into
2934 its components, returned in OUT.
2936 Returns false if ADDR is not a valid memory address, true
2937 otherwise. If OUT is NULL, don't return the components,
2938 but check for validity only.
2940 Note: Only addresses in canonical form are recognized.
2941 LEGITIMIZE_ADDRESS should convert non-canonical forms to the
2942 canonical form so that they will be recognized. */
2945 s390_decompose_address (rtx addr
, struct s390_address
*out
)
2947 HOST_WIDE_INT offset
= 0;
2948 rtx base
= NULL_RTX
;
2949 rtx indx
= NULL_RTX
;
2950 rtx disp
= NULL_RTX
;
2952 bool pointer
= false;
2953 bool base_ptr
= false;
2954 bool indx_ptr
= false;
2955 bool literal_pool
= false;
2957 /* We may need to substitute the literal pool base register into the address
2958 below. However, at this point we do not know which register is going to
2959 be used as base, so we substitute the arg pointer register. This is going
2960 to be treated as holding a pointer below -- it shouldn't be used for any
2962 rtx fake_pool_base
= gen_rtx_REG (Pmode
, ARG_POINTER_REGNUM
);
2964 /* Decompose address into base + index + displacement. */
2966 if (GET_CODE (addr
) == REG
|| GET_CODE (addr
) == UNSPEC
)
2969 else if (GET_CODE (addr
) == PLUS
)
2971 rtx op0
= XEXP (addr
, 0);
2972 rtx op1
= XEXP (addr
, 1);
2973 enum rtx_code code0
= GET_CODE (op0
);
2974 enum rtx_code code1
= GET_CODE (op1
);
2976 if (code0
== REG
|| code0
== UNSPEC
)
2978 if (code1
== REG
|| code1
== UNSPEC
)
2980 indx
= op0
; /* index + base */
2986 base
= op0
; /* base + displacement */
2991 else if (code0
== PLUS
)
2993 indx
= XEXP (op0
, 0); /* index + base + disp */
2994 base
= XEXP (op0
, 1);
3005 disp
= addr
; /* displacement */
3007 /* Extract integer part of displacement. */
3011 if (GET_CODE (disp
) == CONST_INT
)
3013 offset
= INTVAL (disp
);
3016 else if (GET_CODE (disp
) == CONST
3017 && GET_CODE (XEXP (disp
, 0)) == PLUS
3018 && GET_CODE (XEXP (XEXP (disp
, 0), 1)) == CONST_INT
)
3020 offset
= INTVAL (XEXP (XEXP (disp
, 0), 1));
3021 disp
= XEXP (XEXP (disp
, 0), 0);
3025 /* Strip off CONST here to avoid special case tests later. */
3026 if (disp
&& GET_CODE (disp
) == CONST
)
3027 disp
= XEXP (disp
, 0);
3029 /* We can convert literal pool addresses to
3030 displacements by basing them off the base register. */
3031 if (disp
&& GET_CODE (disp
) == SYMBOL_REF
&& CONSTANT_POOL_ADDRESS_P (disp
))
3036 base
= fake_pool_base
, literal_pool
= true;
3038 /* Mark up the displacement. */
3039 disp
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, disp
),
3040 UNSPEC_LTREL_OFFSET
);
3043 /* Validate base register. */
3044 if (!s390_decompose_constant_pool_ref (&base
, &disp
, &pointer
, &base_ptr
,
3048 /* Validate index register. */
3049 if (!s390_decompose_constant_pool_ref (&indx
, &disp
, &pointer
, &indx_ptr
,
3053 /* Prefer to use pointer as base, not index. */
3054 if (base
&& indx
&& !base_ptr
3055 && (indx_ptr
|| (!REG_POINTER (base
) && REG_POINTER (indx
))))
3062 /* Validate displacement. */
3065 /* If virtual registers are involved, the displacement will change later
3066 anyway as the virtual registers get eliminated. This could make a
3067 valid displacement invalid, but it is more likely to make an invalid
3068 displacement valid, because we sometimes access the register save area
3069 via negative offsets to one of those registers.
3070 Thus we don't check the displacement for validity here. If after
3071 elimination the displacement turns out to be invalid after all,
3072 this is fixed up by reload in any case. */
3073 /* LRA maintains always displacements up to date and we need to
3074 know the displacement is right during all LRA not only at the
3075 final elimination. */
3077 || (base
!= arg_pointer_rtx
3078 && indx
!= arg_pointer_rtx
3079 && base
!= return_address_pointer_rtx
3080 && indx
!= return_address_pointer_rtx
3081 && base
!= frame_pointer_rtx
3082 && indx
!= frame_pointer_rtx
3083 && base
!= virtual_stack_vars_rtx
3084 && indx
!= virtual_stack_vars_rtx
))
3085 if (!DISP_IN_RANGE (offset
))
3090 /* All the special cases are pointers. */
3093 /* In the small-PIC case, the linker converts @GOT
3094 and @GOTNTPOFF offsets to possible displacements. */
3095 if (GET_CODE (disp
) == UNSPEC
3096 && (XINT (disp
, 1) == UNSPEC_GOT
3097 || XINT (disp
, 1) == UNSPEC_GOTNTPOFF
)
3103 /* Accept pool label offsets. */
3104 else if (GET_CODE (disp
) == UNSPEC
3105 && XINT (disp
, 1) == UNSPEC_POOL_OFFSET
)
3108 /* Accept literal pool references. */
3109 else if (GET_CODE (disp
) == UNSPEC
3110 && XINT (disp
, 1) == UNSPEC_LTREL_OFFSET
)
3112 /* In case CSE pulled a non literal pool reference out of
3113 the pool we have to reject the address. This is
3114 especially important when loading the GOT pointer on non
3115 zarch CPUs. In this case the literal pool contains an lt
3116 relative offset to the _GLOBAL_OFFSET_TABLE_ label which
3117 will most likely exceed the displacement. */
3118 if (GET_CODE (XVECEXP (disp
, 0, 0)) != SYMBOL_REF
3119 || !CONSTANT_POOL_ADDRESS_P (XVECEXP (disp
, 0, 0)))
3122 orig_disp
= gen_rtx_CONST (Pmode
, disp
);
3125 /* If we have an offset, make sure it does not
3126 exceed the size of the constant pool entry.
3127 Otherwise we might generate an out-of-range
3128 displacement for the base register form. */
3129 rtx sym
= XVECEXP (disp
, 0, 0);
3130 if (offset
>= GET_MODE_SIZE (get_pool_mode (sym
)))
3133 orig_disp
= plus_constant (Pmode
, orig_disp
, offset
);
3148 out
->disp
= orig_disp
;
3149 out
->pointer
= pointer
;
3150 out
->literal_pool
= literal_pool
;
3156 /* Decompose a RTL expression OP for an address style operand into its
3157 components, and return the base register in BASE and the offset in
3158 OFFSET. While OP looks like an address it is never supposed to be
3161 Return true if OP is a valid address operand, false if not. */
3164 s390_decompose_addrstyle_without_index (rtx op
, rtx
*base
,
3165 HOST_WIDE_INT
*offset
)
3169 /* We can have an integer constant, an address register,
3170 or a sum of the two. */
3171 if (CONST_SCALAR_INT_P (op
))
3176 if (op
&& GET_CODE (op
) == PLUS
&& CONST_SCALAR_INT_P (XEXP (op
, 1)))
3181 while (op
&& GET_CODE (op
) == SUBREG
)
3182 op
= SUBREG_REG (op
);
3184 if (op
&& GET_CODE (op
) != REG
)
3189 if (off
== NULL_RTX
)
3191 else if (CONST_INT_P (off
))
3192 *offset
= INTVAL (off
);
3193 else if (CONST_WIDE_INT_P (off
))
3194 /* The offset will anyway be cut down to 12 bits so take just
3195 the lowest order chunk of the wide int. */
3196 *offset
= CONST_WIDE_INT_ELT (off
, 0);
3206 /* Check that OP is a valid shift count operand.
3207 It should be of the following structure:
3208 (subreg (and (plus (reg imm_op)) 2^k-1) 7)
3209 where subreg, and and plus are optional.
3211 If IMPLICIT_MASK is > 0 and OP contains and
3213 it is checked whether IMPLICIT_MASK and the immediate match.
3214 Otherwise, no checking is performed.
3217 s390_valid_shift_count (rtx op
, HOST_WIDE_INT implicit_mask
)
3220 while (GET_CODE (op
) == SUBREG
&& subreg_lowpart_p (op
))
3223 /* Check for an and with proper constant. */
3224 if (GET_CODE (op
) == AND
)
3226 rtx op1
= XEXP (op
, 0);
3227 rtx imm
= XEXP (op
, 1);
3229 if (GET_CODE (op1
) == SUBREG
&& subreg_lowpart_p (op1
))
3230 op1
= XEXP (op1
, 0);
3232 if (!(register_operand (op1
, GET_MODE (op1
)) || GET_CODE (op1
) == PLUS
))
3235 if (!immediate_operand (imm
, GET_MODE (imm
)))
3238 HOST_WIDE_INT val
= INTVAL (imm
);
3239 if (implicit_mask
> 0
3240 && (val
& implicit_mask
) != implicit_mask
)
3246 /* Check the rest. */
3247 return s390_decompose_addrstyle_without_index (op
, NULL
, NULL
);
3250 /* Return true if CODE is a valid address without index. */
3253 s390_legitimate_address_without_index_p (rtx op
)
3255 struct s390_address addr
;
3257 if (!s390_decompose_address (XEXP (op
, 0), &addr
))
3266 /* Return TRUE if ADDR is an operand valid for a load/store relative
3267 instruction. Be aware that the alignment of the operand needs to
3268 be checked separately.
3269 Valid addresses are single references or a sum of a reference and a
3270 constant integer. Return these parts in SYMREF and ADDEND. You can
3271 pass NULL in REF and/or ADDEND if you are not interested in these
3275 s390_loadrelative_operand_p (rtx addr
, rtx
*symref
, HOST_WIDE_INT
*addend
)
3277 HOST_WIDE_INT tmpaddend
= 0;
3279 if (GET_CODE (addr
) == CONST
)
3280 addr
= XEXP (addr
, 0);
3282 if (GET_CODE (addr
) == PLUS
)
3284 if (!CONST_INT_P (XEXP (addr
, 1)))
3287 tmpaddend
= INTVAL (XEXP (addr
, 1));
3288 addr
= XEXP (addr
, 0);
3291 if (GET_CODE (addr
) == SYMBOL_REF
3292 || (GET_CODE (addr
) == UNSPEC
3293 && (XINT (addr
, 1) == UNSPEC_GOTENT
3294 || XINT (addr
, 1) == UNSPEC_PLT31
)))
3299 *addend
= tmpaddend
;
3306 /* Return true if the address in OP is valid for constraint letter C
3307 if wrapped in a MEM rtx. Set LIT_POOL_OK to true if it literal
3308 pool MEMs should be accepted. Only the Q, R, S, T constraint
3309 letters are allowed for C. */
3312 s390_check_qrst_address (char c
, rtx op
, bool lit_pool_ok
)
3315 struct s390_address addr
;
3316 bool decomposed
= false;
3318 if (!address_operand (op
, GET_MODE (op
)))
3321 /* This check makes sure that no symbolic address (except literal
3322 pool references) are accepted by the R or T constraints. */
3323 if (s390_loadrelative_operand_p (op
, &symref
, NULL
)
3325 || !SYMBOL_REF_P (symref
)
3326 || !CONSTANT_POOL_ADDRESS_P (symref
)))
3329 /* Ensure literal pool references are only accepted if LIT_POOL_OK. */
3332 if (!s390_decompose_address (op
, &addr
))
3334 if (addr
.literal_pool
)
3339 /* With reload, we sometimes get intermediate address forms that are
3340 actually invalid as-is, but we need to accept them in the most
3341 generic cases below ('R' or 'T'), since reload will in fact fix
3342 them up. LRA behaves differently here; we never see such forms,
3343 but on the other hand, we need to strictly reject every invalid
3344 address form. After both reload and LRA invalid address forms
3345 must be rejected, because nothing will fix them up later. Perform
3346 this check right up front. */
3347 if (lra_in_progress
|| reload_completed
)
3349 if (!decomposed
&& !s390_decompose_address (op
, &addr
))
3356 case 'Q': /* no index short displacement */
3357 if (!decomposed
&& !s390_decompose_address (op
, &addr
))
3361 if (!s390_short_displacement (addr
.disp
))
3365 case 'R': /* with index short displacement */
3366 if (TARGET_LONG_DISPLACEMENT
)
3368 if (!decomposed
&& !s390_decompose_address (op
, &addr
))
3370 if (!s390_short_displacement (addr
.disp
))
3373 /* Any invalid address here will be fixed up by reload,
3374 so accept it for the most generic constraint. */
3377 case 'S': /* no index long displacement */
3378 if (!decomposed
&& !s390_decompose_address (op
, &addr
))
3384 case 'T': /* with index long displacement */
3385 /* Any invalid address here will be fixed up by reload,
3386 so accept it for the most generic constraint. */
3396 /* Evaluates constraint strings described by the regular expression
3397 ([A|B|Z](Q|R|S|T))|Y and returns 1 if OP is a valid operand for
3398 the constraint given in STR, or 0 else. */
3401 s390_mem_constraint (const char *str
, rtx op
)
3408 /* Check for offsettable variants of memory constraints. */
3409 if (!MEM_P (op
) || MEM_VOLATILE_P (op
))
3411 if ((reload_completed
|| reload_in_progress
)
3412 ? !offsettable_memref_p (op
) : !offsettable_nonstrict_memref_p (op
))
3414 return s390_check_qrst_address (str
[1], XEXP (op
, 0), true);
3416 /* Check for non-literal-pool variants of memory constraints. */
3419 return s390_check_qrst_address (str
[1], XEXP (op
, 0), false);
3424 if (GET_CODE (op
) != MEM
)
3426 return s390_check_qrst_address (c
, XEXP (op
, 0), true);
3428 /* Simply check for the basic form of a shift count. Reload will
3429 take care of making sure we have a proper base register. */
3430 if (!s390_decompose_addrstyle_without_index (op
, NULL
, NULL
))
3434 return s390_check_qrst_address (str
[1], op
, true);
3442 /* Evaluates constraint strings starting with letter O. Input
3443 parameter C is the second letter following the "O" in the constraint
3444 string. Returns 1 if VALUE meets the respective constraint and 0
3448 s390_O_constraint_str (const char c
, HOST_WIDE_INT value
)
3456 return trunc_int_for_mode (value
, SImode
) == value
;
3460 || s390_single_part (GEN_INT (value
), DImode
, SImode
, 0) == 1;
3463 return s390_single_part (GEN_INT (value
- 1), DImode
, SImode
, -1) == 1;
3471 /* Evaluates constraint strings starting with letter N. Parameter STR
3472 contains the letters following letter "N" in the constraint string.
3473 Returns true if VALUE matches the constraint. */
3476 s390_N_constraint_str (const char *str
, HOST_WIDE_INT value
)
3478 machine_mode mode
, part_mode
;
3480 int part
, part_goal
;
3486 part_goal
= str
[0] - '0';
3530 if (GET_MODE_SIZE (mode
) <= GET_MODE_SIZE (part_mode
))
3533 part
= s390_single_part (GEN_INT (value
), mode
, part_mode
, def
);
3536 if (part_goal
!= -1 && part_goal
!= part
)
3543 /* Returns true if the input parameter VALUE is a float zero. */
3546 s390_float_const_zero_p (rtx value
)
3548 return (GET_MODE_CLASS (GET_MODE (value
)) == MODE_FLOAT
3549 && value
== CONST0_RTX (GET_MODE (value
)));
3552 /* Implement TARGET_REGISTER_MOVE_COST. */
3555 s390_register_move_cost (machine_mode mode
,
3556 reg_class_t from
, reg_class_t to
)
3558 /* On s390, copy between fprs and gprs is expensive. */
3560 /* It becomes somewhat faster having ldgr/lgdr. */
3561 if (TARGET_Z10
&& GET_MODE_SIZE (mode
) == 8)
3563 /* ldgr is single cycle. */
3564 if (reg_classes_intersect_p (from
, GENERAL_REGS
)
3565 && reg_classes_intersect_p (to
, FP_REGS
))
3567 /* lgdr needs 3 cycles. */
3568 if (reg_classes_intersect_p (to
, GENERAL_REGS
)
3569 && reg_classes_intersect_p (from
, FP_REGS
))
3573 /* Otherwise copying is done via memory. */
3574 if ((reg_classes_intersect_p (from
, GENERAL_REGS
)
3575 && reg_classes_intersect_p (to
, FP_REGS
))
3576 || (reg_classes_intersect_p (from
, FP_REGS
)
3577 && reg_classes_intersect_p (to
, GENERAL_REGS
)))
3580 /* We usually do not want to copy via CC. */
3581 if (reg_classes_intersect_p (from
, CC_REGS
)
3582 || reg_classes_intersect_p (to
, CC_REGS
))
3588 /* Implement TARGET_MEMORY_MOVE_COST. */
3591 s390_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED
,
3592 reg_class_t rclass ATTRIBUTE_UNUSED
,
3593 bool in ATTRIBUTE_UNUSED
)
3598 /* Compute a (partial) cost for rtx X. Return true if the complete
3599 cost has been computed, and false if subexpressions should be
3600 scanned. In either case, *TOTAL contains the cost result. The
3601 initial value of *TOTAL is the default value computed by
3602 rtx_cost. It may be left unmodified. OUTER_CODE contains the
3603 code of the superexpression of x. */
3606 s390_rtx_costs (rtx x
, machine_mode mode
, int outer_code
,
3607 int opno ATTRIBUTE_UNUSED
,
3608 int *total
, bool speed ATTRIBUTE_UNUSED
)
3610 int code
= GET_CODE (x
);
3618 case CONST_WIDE_INT
:
3625 /* Without this a conditional move instruction would be
3626 accounted as 3 * COSTS_N_INSNS (set, if_then_else,
3627 comparison operator). That's a bit pessimistic. */
3629 if (!TARGET_Z196
|| GET_CODE (SET_SRC (x
)) != IF_THEN_ELSE
)
3632 rtx cond
= XEXP (SET_SRC (x
), 0);
3634 if (!CC_REG_P (XEXP (cond
, 0)) || !CONST_INT_P (XEXP (cond
, 1)))
3637 /* It is going to be a load/store on condition. Make it
3638 slightly more expensive than a normal load. */
3639 *total
= COSTS_N_INSNS (1) + 1;
3641 rtx dst
= SET_DEST (x
);
3642 rtx then
= XEXP (SET_SRC (x
), 1);
3643 rtx els
= XEXP (SET_SRC (x
), 2);
3645 /* It is a real IF-THEN-ELSE. An additional move will be
3646 needed to implement that. */
3649 && !rtx_equal_p (dst
, then
)
3650 && !rtx_equal_p (dst
, els
))
3651 *total
+= COSTS_N_INSNS (1) / 2;
3653 /* A minor penalty for constants we cannot directly handle. */
3654 if ((CONST_INT_P (then
) || CONST_INT_P (els
))
3655 && (!TARGET_Z13
|| MEM_P (dst
)
3656 || (CONST_INT_P (then
) && !satisfies_constraint_K (then
))
3657 || (CONST_INT_P (els
) && !satisfies_constraint_K (els
))))
3658 *total
+= COSTS_N_INSNS (1) / 2;
3660 /* A store on condition can only handle register src operands. */
3661 if (MEM_P (dst
) && (!REG_P (then
) || !REG_P (els
)))
3662 *total
+= COSTS_N_INSNS (1) / 2;
3670 && (mode
== SImode
|| mode
== DImode
)
3671 && GET_CODE (XEXP (x
, 0)) == NOT
3672 && GET_CODE (XEXP (x
, 1)) == NOT
)
3674 *total
= COSTS_N_INSNS (1);
3675 if (!REG_P (XEXP (XEXP (x
, 0), 0)))
3677 if (!REG_P (XEXP (XEXP (x
, 1), 0)))
3683 if (GET_CODE (XEXP (x
, 0)) == AND
3684 && GET_CODE (XEXP (x
, 1)) == ASHIFT
3685 && REG_P (XEXP (XEXP (x
, 0), 0))
3686 && REG_P (XEXP (XEXP (x
, 1), 0))
3687 && CONST_INT_P (XEXP (XEXP (x
, 0), 1))
3688 && CONST_INT_P (XEXP (XEXP (x
, 1), 1))
3689 && (UINTVAL (XEXP (XEXP (x
, 0), 1)) ==
3690 (HOST_WIDE_INT_1U
<< UINTVAL (XEXP (XEXP (x
, 1), 1))) - 1))
3692 *total
= COSTS_N_INSNS (2);
3696 /* ~AND on a 128 bit mode. This can be done using a vector
3699 && GET_CODE (XEXP (x
, 0)) == NOT
3700 && GET_CODE (XEXP (x
, 1)) == NOT
3701 && REG_P (XEXP (XEXP (x
, 0), 0))
3702 && REG_P (XEXP (XEXP (x
, 1), 0))
3703 && GET_MODE_SIZE (GET_MODE (XEXP (XEXP (x
, 0), 0))) == 16
3704 && s390_hard_regno_mode_ok (VR0_REGNUM
,
3705 GET_MODE (XEXP (XEXP (x
, 0), 0))))
3707 *total
= COSTS_N_INSNS (1);
3711 *total
= COSTS_N_INSNS (1);
3717 && (mode
== SImode
|| mode
== DImode
)
3718 && GET_CODE (XEXP (x
, 0)) == NOT
3719 && GET_CODE (XEXP (x
, 1)) == NOT
)
3721 *total
= COSTS_N_INSNS (1);
3722 if (!REG_P (XEXP (XEXP (x
, 0), 0)))
3724 if (!REG_P (XEXP (XEXP (x
, 1), 0)))
3739 *total
= COSTS_N_INSNS (1);
3747 rtx left
= XEXP (x
, 0);
3748 rtx right
= XEXP (x
, 1);
3749 if (GET_CODE (right
) == CONST_INT
3750 && CONST_OK_FOR_K (INTVAL (right
)))
3751 *total
= s390_cost
->mhi
;
3752 else if (GET_CODE (left
) == SIGN_EXTEND
)
3753 *total
= s390_cost
->mh
;
3755 *total
= s390_cost
->ms
; /* msr, ms, msy */
3760 rtx left
= XEXP (x
, 0);
3761 rtx right
= XEXP (x
, 1);
3764 if (GET_CODE (right
) == CONST_INT
3765 && CONST_OK_FOR_K (INTVAL (right
)))
3766 *total
= s390_cost
->mghi
;
3767 else if (GET_CODE (left
) == SIGN_EXTEND
)
3768 *total
= s390_cost
->msgf
;
3770 *total
= s390_cost
->msg
; /* msgr, msg */
3772 else /* TARGET_31BIT */
3774 if (GET_CODE (left
) == SIGN_EXTEND
3775 && GET_CODE (right
) == SIGN_EXTEND
)
3776 /* mulsidi case: mr, m */
3777 *total
= s390_cost
->m
;
3778 else if (GET_CODE (left
) == ZERO_EXTEND
3779 && GET_CODE (right
) == ZERO_EXTEND
)
3780 /* umulsidi case: ml, mlr */
3781 *total
= s390_cost
->ml
;
3783 /* Complex calculation is required. */
3784 *total
= COSTS_N_INSNS (40);
3790 *total
= s390_cost
->mult_df
;
3793 *total
= s390_cost
->mxbr
;
3804 *total
= s390_cost
->madbr
;
3807 *total
= s390_cost
->maebr
;
3812 /* Negate in the third argument is free: FMSUB. */
3813 if (GET_CODE (XEXP (x
, 2)) == NEG
)
3815 *total
+= (rtx_cost (XEXP (x
, 0), mode
, FMA
, 0, speed
)
3816 + rtx_cost (XEXP (x
, 1), mode
, FMA
, 1, speed
)
3817 + rtx_cost (XEXP (XEXP (x
, 2), 0), mode
, FMA
, 2, speed
));
3824 if (mode
== TImode
) /* 128 bit division */
3825 *total
= s390_cost
->dlgr
;
3826 else if (mode
== DImode
)
3828 rtx right
= XEXP (x
, 1);
3829 if (GET_CODE (right
) == ZERO_EXTEND
) /* 64 by 32 bit division */
3830 *total
= s390_cost
->dlr
;
3831 else /* 64 by 64 bit division */
3832 *total
= s390_cost
->dlgr
;
3834 else if (mode
== SImode
) /* 32 bit division */
3835 *total
= s390_cost
->dlr
;
3842 rtx right
= XEXP (x
, 1);
3843 if (GET_CODE (right
) == ZERO_EXTEND
) /* 64 by 32 bit division */
3845 *total
= s390_cost
->dsgfr
;
3847 *total
= s390_cost
->dr
;
3848 else /* 64 by 64 bit division */
3849 *total
= s390_cost
->dsgr
;
3851 else if (mode
== SImode
) /* 32 bit division */
3852 *total
= s390_cost
->dlr
;
3853 else if (mode
== SFmode
)
3855 *total
= s390_cost
->debr
;
3857 else if (mode
== DFmode
)
3859 *total
= s390_cost
->ddbr
;
3861 else if (mode
== TFmode
)
3863 *total
= s390_cost
->dxbr
;
3869 *total
= s390_cost
->sqebr
;
3870 else if (mode
== DFmode
)
3871 *total
= s390_cost
->sqdbr
;
3873 *total
= s390_cost
->sqxbr
;
3878 if (outer_code
== MULT
|| outer_code
== DIV
|| outer_code
== MOD
3879 || outer_code
== PLUS
|| outer_code
== MINUS
3880 || outer_code
== COMPARE
)
3885 *total
= COSTS_N_INSNS (1);
3887 /* nxrk, nxgrk ~(a^b)==0 */
3889 && GET_CODE (XEXP (x
, 0)) == NOT
3890 && XEXP (x
, 1) == const0_rtx
3891 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == XOR
3892 && (GET_MODE (XEXP (x
, 0)) == SImode
|| GET_MODE (XEXP (x
, 0)) == DImode
)
3895 if (!REG_P (XEXP (XEXP (XEXP (x
, 0), 0), 0)))
3897 if (!REG_P (XEXP (XEXP (XEXP (x
, 0), 0), 1)))
3902 /* nnrk, nngrk, nork, nogrk */
3904 && (GET_CODE (XEXP (x
, 0)) == AND
|| GET_CODE (XEXP (x
, 0)) == IOR
)
3905 && XEXP (x
, 1) == const0_rtx
3906 && (GET_MODE (XEXP (x
, 0)) == SImode
|| GET_MODE (XEXP (x
, 0)) == DImode
)
3907 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == NOT
3908 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == NOT
3911 if (!REG_P (XEXP (XEXP (XEXP (x
, 0), 0), 0)))
3913 if (!REG_P (XEXP (XEXP (XEXP (x
, 0), 1), 0)))
3918 if (GET_CODE (XEXP (x
, 0)) == AND
3919 && GET_CODE (XEXP (x
, 1)) == CONST_INT
3920 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
)
3922 rtx op0
= XEXP (XEXP (x
, 0), 0);
3923 rtx op1
= XEXP (XEXP (x
, 0), 1);
3924 rtx op2
= XEXP (x
, 1);
3926 if (memory_operand (op0
, GET_MODE (op0
))
3927 && s390_tm_ccmode (op1
, op2
, 0) != VOIDmode
)
3929 if (register_operand (op0
, GET_MODE (op0
))
3930 && s390_tm_ccmode (op1
, op2
, 1) != VOIDmode
)
3940 /* Return the cost of an address rtx ADDR. */
3943 s390_address_cost (rtx addr
, machine_mode mode ATTRIBUTE_UNUSED
,
3944 addr_space_t as ATTRIBUTE_UNUSED
,
3945 bool speed ATTRIBUTE_UNUSED
)
3947 struct s390_address ad
;
3948 if (!s390_decompose_address (addr
, &ad
))
3951 return ad
.indx
? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (1);
3954 /* Implement targetm.vectorize.builtin_vectorization_cost. */
3956 s390_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
3958 int misalign ATTRIBUTE_UNUSED
)
3960 switch (type_of_cost
)
3968 case vector_gather_load
:
3969 case vector_scatter_store
:
3972 case cond_branch_not_taken
:
3974 case vec_promote_demote
:
3975 case unaligned_load
:
3976 case unaligned_store
:
3979 case cond_branch_taken
:
3983 return TYPE_VECTOR_SUBPARTS (vectype
) - 1;
3990 /* If OP is a SYMBOL_REF of a thread-local symbol, return its TLS mode,
3991 otherwise return 0. */
3994 tls_symbolic_operand (rtx op
)
3996 if (GET_CODE (op
) != SYMBOL_REF
)
3998 return SYMBOL_REF_TLS_MODEL (op
);
4001 /* Split DImode access register reference REG (on 64-bit) into its constituent
4002 low and high parts, and store them into LO and HI. Note that gen_lowpart/
4003 gen_highpart cannot be used as they assume all registers are word-sized,
4004 while our access registers have only half that size. */
4007 s390_split_access_reg (rtx reg
, rtx
*lo
, rtx
*hi
)
4009 gcc_assert (TARGET_64BIT
);
4010 gcc_assert (ACCESS_REG_P (reg
));
4011 gcc_assert (GET_MODE (reg
) == DImode
);
4012 gcc_assert (!(REGNO (reg
) & 1));
4014 *lo
= gen_rtx_REG (SImode
, REGNO (reg
) + 1);
4015 *hi
= gen_rtx_REG (SImode
, REGNO (reg
));
4018 /* Return true if OP contains a symbol reference */
4021 symbolic_reference_mentioned_p (rtx op
)
4026 if (GET_CODE (op
) == SYMBOL_REF
|| GET_CODE (op
) == LABEL_REF
)
4029 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
4030 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
4036 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
4037 if (symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
4041 else if (fmt
[i
] == 'e' && symbolic_reference_mentioned_p (XEXP (op
, i
)))
4048 /* Return true if OP contains a reference to a thread-local symbol. */
4051 tls_symbolic_reference_mentioned_p (rtx op
)
4056 if (GET_CODE (op
) == SYMBOL_REF
)
4057 return tls_symbolic_operand (op
);
4059 fmt
= GET_RTX_FORMAT (GET_CODE (op
));
4060 for (i
= GET_RTX_LENGTH (GET_CODE (op
)) - 1; i
>= 0; i
--)
4066 for (j
= XVECLEN (op
, i
) - 1; j
>= 0; j
--)
4067 if (tls_symbolic_reference_mentioned_p (XVECEXP (op
, i
, j
)))
4071 else if (fmt
[i
] == 'e' && tls_symbolic_reference_mentioned_p (XEXP (op
, i
)))
4079 /* Return true if OP is a legitimate general operand when
4080 generating PIC code. It is given that flag_pic is on
4081 and that OP satisfies CONSTANT_P. */
4084 legitimate_pic_operand_p (rtx op
)
4086 /* Accept all non-symbolic constants. */
4087 if (!SYMBOLIC_CONST (op
))
4090 /* Accept addresses that can be expressed relative to (pc). */
4091 if (larl_operand (op
, VOIDmode
))
4094 /* Reject everything else; must be handled
4095 via emit_symbolic_move. */
4099 /* Returns true if the constant value OP is a legitimate general operand.
4100 It is given that OP satisfies CONSTANT_P. */
4103 s390_legitimate_constant_p (machine_mode mode
, rtx op
)
4105 if (TARGET_VX
&& VECTOR_MODE_P (mode
) && GET_CODE (op
) == CONST_VECTOR
)
4107 if (GET_MODE_SIZE (mode
) != 16)
4110 if (!satisfies_constraint_j00 (op
)
4111 && !satisfies_constraint_jm1 (op
)
4112 && !satisfies_constraint_jKK (op
)
4113 && !satisfies_constraint_jxx (op
)
4114 && !satisfies_constraint_jyy (op
))
4118 /* Accept all non-symbolic constants. */
4119 if (!SYMBOLIC_CONST (op
))
4122 /* Accept immediate LARL operands. */
4123 if (larl_operand (op
, mode
))
4126 /* Thread-local symbols are never legal constants. This is
4127 so that emit_call knows that computing such addresses
4128 might require a function call. */
4129 if (TLS_SYMBOLIC_CONST (op
))
4132 /* In the PIC case, symbolic constants must *not* be
4133 forced into the literal pool. We accept them here,
4134 so that they will be handled by emit_symbolic_move. */
4138 /* All remaining non-PIC symbolic constants are
4139 forced into the literal pool. */
4143 /* Determine if it's legal to put X into the constant pool. This
4144 is not possible if X contains the address of a symbol that is
4145 not constant (TLS) or not known at final link time (PIC). */
4148 s390_cannot_force_const_mem (machine_mode mode
, rtx x
)
4150 switch (GET_CODE (x
))
4154 case CONST_WIDE_INT
:
4156 /* Accept all non-symbolic constants. */
4160 /* Accept an unary '-' only on scalar numeric constants. */
4161 switch (GET_CODE (XEXP (x
, 0)))
4165 case CONST_WIDE_INT
:
4172 /* Labels are OK iff we are non-PIC. */
4173 return flag_pic
!= 0;
4176 /* 'Naked' TLS symbol references are never OK,
4177 non-TLS symbols are OK iff we are non-PIC. */
4178 if (tls_symbolic_operand (x
))
4181 return flag_pic
!= 0;
4184 return s390_cannot_force_const_mem (mode
, XEXP (x
, 0));
4187 return s390_cannot_force_const_mem (mode
, XEXP (x
, 0))
4188 || s390_cannot_force_const_mem (mode
, XEXP (x
, 1));
4191 switch (XINT (x
, 1))
4193 /* Only lt-relative or GOT-relative UNSPECs are OK. */
4194 case UNSPEC_LTREL_OFFSET
:
4202 case UNSPEC_GOTNTPOFF
:
4203 case UNSPEC_INDNTPOFF
:
4206 /* If the literal pool shares the code section, be put
4207 execute template placeholders into the pool as well. */
4219 /* Returns true if the constant value OP is a legitimate general
4220 operand during and after reload. The difference to
4221 legitimate_constant_p is that this function will not accept
4222 a constant that would need to be forced to the literal pool
4223 before it can be used as operand.
4224 This function accepts all constants which can be loaded directly
4228 legitimate_reload_constant_p (rtx op
)
4230 /* Accept la(y) operands. */
4231 if (GET_CODE (op
) == CONST_INT
4232 && DISP_IN_RANGE (INTVAL (op
)))
4235 /* Accept l(g)hi/l(g)fi operands. */
4236 if (GET_CODE (op
) == CONST_INT
4237 && (CONST_OK_FOR_K (INTVAL (op
)) || CONST_OK_FOR_Os (INTVAL (op
))))
4240 /* Accept lliXX operands. */
4242 && GET_CODE (op
) == CONST_INT
4243 && trunc_int_for_mode (INTVAL (op
), word_mode
) == INTVAL (op
)
4244 && s390_single_part (op
, word_mode
, HImode
, 0) >= 0)
4248 && GET_CODE (op
) == CONST_INT
4249 && trunc_int_for_mode (INTVAL (op
), word_mode
) == INTVAL (op
)
4250 && s390_single_part (op
, word_mode
, SImode
, 0) >= 0)
4253 /* Accept larl operands. */
4254 if (larl_operand (op
, VOIDmode
))
4257 /* Accept floating-point zero operands that fit into a single GPR. */
4258 if (GET_CODE (op
) == CONST_DOUBLE
4259 && s390_float_const_zero_p (op
)
4260 && GET_MODE_SIZE (GET_MODE (op
)) <= UNITS_PER_WORD
)
4263 /* Accept double-word operands that can be split. */
4264 if (GET_CODE (op
) == CONST_WIDE_INT
4265 || (GET_CODE (op
) == CONST_INT
4266 && trunc_int_for_mode (INTVAL (op
), word_mode
) != INTVAL (op
)))
4268 machine_mode dword_mode
= word_mode
== SImode
? DImode
: TImode
;
4269 rtx hi
= operand_subword (op
, 0, 0, dword_mode
);
4270 rtx lo
= operand_subword (op
, 1, 0, dword_mode
);
4271 return legitimate_reload_constant_p (hi
)
4272 && legitimate_reload_constant_p (lo
);
4275 /* Everything else cannot be handled without reload. */
4279 /* Returns true if the constant value OP is a legitimate fp operand
4280 during and after reload.
4281 This function accepts all constants which can be loaded directly
4285 legitimate_reload_fp_constant_p (rtx op
)
4287 /* Accept floating-point zero operands if the load zero instruction
4288 can be used. Prior to z196 the load fp zero instruction caused a
4289 performance penalty if the result is used as BFP number. */
4291 && GET_CODE (op
) == CONST_DOUBLE
4292 && s390_float_const_zero_p (op
))
4298 /* Returns true if the constant value OP is a legitimate vector operand
4299 during and after reload.
4300 This function accepts all constants which can be loaded directly
4304 legitimate_reload_vector_constant_p (rtx op
)
4306 if (TARGET_VX
&& GET_MODE_SIZE (GET_MODE (op
)) == 16
4307 && (satisfies_constraint_j00 (op
)
4308 || satisfies_constraint_jm1 (op
)
4309 || satisfies_constraint_jKK (op
)
4310 || satisfies_constraint_jxx (op
)
4311 || satisfies_constraint_jyy (op
)))
4317 /* Given an rtx OP being reloaded into a reg required to be in class RCLASS,
4318 return the class of reg to actually use. */
4321 s390_preferred_reload_class (rtx op
, reg_class_t rclass
)
4323 switch (GET_CODE (op
))
4325 /* Constants we cannot reload into general registers
4326 must be forced into the literal pool. */
4330 case CONST_WIDE_INT
:
4331 if (reg_class_subset_p (GENERAL_REGS
, rclass
)
4332 && legitimate_reload_constant_p (op
))
4333 return GENERAL_REGS
;
4334 else if (reg_class_subset_p (ADDR_REGS
, rclass
)
4335 && legitimate_reload_constant_p (op
))
4337 else if (reg_class_subset_p (FP_REGS
, rclass
)
4338 && legitimate_reload_fp_constant_p (op
))
4340 else if (reg_class_subset_p (VEC_REGS
, rclass
)
4341 && legitimate_reload_vector_constant_p (op
))
4346 /* If a symbolic constant or a PLUS is reloaded,
4347 it is most likely being used as an address, so
4348 prefer ADDR_REGS. If 'class' is not a superset
4349 of ADDR_REGS, e.g. FP_REGS, reject this reload. */
4351 /* Symrefs cannot be pushed into the literal pool with -fPIC
4352 so we *MUST NOT* return NO_REGS for these cases
4353 (s390_cannot_force_const_mem will return true).
4355 On the other hand we MUST return NO_REGS for symrefs with
4356 invalid addend which might have been pushed to the literal
4357 pool (no -fPIC). Usually we would expect them to be
4358 handled via secondary reload but this does not happen if
4359 they are used as literal pool slot replacement in reload
4360 inheritance (see emit_input_reload_insns). */
4361 if (GET_CODE (XEXP (op
, 0)) == PLUS
4362 && GET_CODE (XEXP (XEXP(op
, 0), 0)) == SYMBOL_REF
4363 && GET_CODE (XEXP (XEXP(op
, 0), 1)) == CONST_INT
)
4365 if (flag_pic
&& reg_class_subset_p (ADDR_REGS
, rclass
))
4373 if (!legitimate_reload_constant_p (op
))
4377 /* load address will be used. */
4378 if (reg_class_subset_p (ADDR_REGS
, rclass
))
4390 /* Return true if ADDR is SYMBOL_REF + addend with addend being a
4391 multiple of ALIGNMENT and the SYMBOL_REF being naturally
4395 s390_check_symref_alignment (rtx addr
, HOST_WIDE_INT alignment
)
4397 HOST_WIDE_INT addend
;
4400 /* The "required alignment" might be 0 (e.g. for certain structs
4401 accessed via BLKmode). Early abort in this case, as well as when
4402 an alignment > 8 is required. */
4403 if (alignment
< 2 || alignment
> 8)
4406 if (!s390_loadrelative_operand_p (addr
, &symref
, &addend
))
4409 if (addend
& (alignment
- 1))
4412 if (GET_CODE (symref
) == SYMBOL_REF
)
4414 /* s390_encode_section_info is not called for anchors, since they don't
4415 have corresponding VAR_DECLs. Therefore, we cannot rely on
4416 SYMBOL_FLAG_NOTALIGN{2,4,8}_P returning useful information. */
4417 if (SYMBOL_REF_ANCHOR_P (symref
))
4419 HOST_WIDE_INT block_offset
= SYMBOL_REF_BLOCK_OFFSET (symref
);
4420 unsigned int block_alignment
= (SYMBOL_REF_BLOCK (symref
)->alignment
4423 gcc_assert (block_offset
>= 0);
4424 return ((block_offset
& (alignment
- 1)) == 0
4425 && block_alignment
>= alignment
);
4428 /* We have load-relative instructions for 2-byte, 4-byte, and
4429 8-byte alignment so allow only these. */
4432 case 8: return !SYMBOL_FLAG_NOTALIGN8_P (symref
);
4433 case 4: return !SYMBOL_FLAG_NOTALIGN4_P (symref
);
4434 case 2: return !SYMBOL_FLAG_NOTALIGN2_P (symref
);
4435 default: return false;
4439 if (GET_CODE (symref
) == UNSPEC
4440 && alignment
<= UNITS_PER_LONG
)
4446 /* ADDR is moved into REG using larl. If ADDR isn't a valid larl
4447 operand SCRATCH is used to reload the even part of the address and
4451 s390_reload_larl_operand (rtx reg
, rtx addr
, rtx scratch
)
4453 HOST_WIDE_INT addend
;
4456 if (!s390_loadrelative_operand_p (addr
, &symref
, &addend
))
4460 /* Easy case. The addend is even so larl will do fine. */
4461 emit_move_insn (reg
, addr
);
4464 /* We can leave the scratch register untouched if the target
4465 register is a valid base register. */
4466 if (REGNO (reg
) < FIRST_PSEUDO_REGISTER
4467 && REGNO_REG_CLASS (REGNO (reg
)) == ADDR_REGS
)
4470 gcc_assert (REGNO (scratch
) < FIRST_PSEUDO_REGISTER
);
4471 gcc_assert (REGNO_REG_CLASS (REGNO (scratch
)) == ADDR_REGS
);
4474 emit_move_insn (scratch
,
4475 gen_rtx_CONST (Pmode
,
4476 gen_rtx_PLUS (Pmode
, symref
,
4477 GEN_INT (addend
- 1))));
4479 emit_move_insn (scratch
, symref
);
4481 /* Increment the address using la in order to avoid clobbering cc. */
4482 s390_load_address (reg
, gen_rtx_PLUS (Pmode
, scratch
, const1_rtx
));
4486 /* Generate what is necessary to move between REG and MEM using
4487 SCRATCH. The direction is given by TOMEM. */
4490 s390_reload_symref_address (rtx reg
, rtx mem
, rtx scratch
, bool tomem
)
4492 /* Reload might have pulled a constant out of the literal pool.
4493 Force it back in. */
4494 if (CONST_INT_P (mem
) || GET_CODE (mem
) == CONST_DOUBLE
4495 || GET_CODE (mem
) == CONST_WIDE_INT
4496 || GET_CODE (mem
) == CONST_VECTOR
4497 || GET_CODE (mem
) == CONST
)
4498 mem
= force_const_mem (GET_MODE (reg
), mem
);
4500 gcc_assert (MEM_P (mem
));
4502 /* For a load from memory we can leave the scratch register
4503 untouched if the target register is a valid base register. */
4505 && REGNO (reg
) < FIRST_PSEUDO_REGISTER
4506 && REGNO_REG_CLASS (REGNO (reg
)) == ADDR_REGS
4507 && GET_MODE (reg
) == GET_MODE (scratch
))
4510 /* Load address into scratch register. Since we can't have a
4511 secondary reload for a secondary reload we have to cover the case
4512 where larl would need a secondary reload here as well. */
4513 s390_reload_larl_operand (scratch
, XEXP (mem
, 0), scratch
);
4515 /* Now we can use a standard load/store to do the move. */
4517 emit_move_insn (replace_equiv_address (mem
, scratch
), reg
);
4519 emit_move_insn (reg
, replace_equiv_address (mem
, scratch
));
4522 /* Inform reload about cases where moving X with a mode MODE to a register in
4523 RCLASS requires an extra scratch or immediate register. Return the class
4524 needed for the immediate register. */
4527 s390_secondary_reload (bool in_p
, rtx x
, reg_class_t rclass_i
,
4528 machine_mode mode
, secondary_reload_info
*sri
)
4530 enum reg_class rclass
= (enum reg_class
) rclass_i
;
4532 /* Intermediate register needed. */
4533 if (reg_classes_intersect_p (CC_REGS
, rclass
))
4534 return GENERAL_REGS
;
4538 /* The vst/vl vector move instructions allow only for short
4541 && GET_CODE (XEXP (x
, 0)) == PLUS
4542 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
4543 && !SHORT_DISP_IN_RANGE(INTVAL (XEXP (XEXP (x
, 0), 1)))
4544 && reg_class_subset_p (rclass
, VEC_REGS
)
4545 && (!reg_class_subset_p (rclass
, FP_REGS
)
4546 || (GET_MODE_SIZE (mode
) > 8
4547 && s390_class_max_nregs (FP_REGS
, mode
) == 1)))
4550 sri
->icode
= (TARGET_64BIT
?
4551 CODE_FOR_reloaddi_la_in
:
4552 CODE_FOR_reloadsi_la_in
);
4554 sri
->icode
= (TARGET_64BIT
?
4555 CODE_FOR_reloaddi_la_out
:
4556 CODE_FOR_reloadsi_la_out
);
4562 HOST_WIDE_INT offset
;
4565 /* On z10 several optimizer steps may generate larl operands with
4568 && s390_loadrelative_operand_p (x
, &symref
, &offset
)
4570 && !SYMBOL_FLAG_NOTALIGN2_P (symref
)
4571 && (offset
& 1) == 1)
4572 sri
->icode
= ((mode
== DImode
) ? CODE_FOR_reloaddi_larl_odd_addend_z10
4573 : CODE_FOR_reloadsi_larl_odd_addend_z10
);
4575 /* Handle all the (mem (symref)) accesses we cannot use the z10
4576 instructions for. */
4578 && s390_loadrelative_operand_p (XEXP (x
, 0), NULL
, NULL
)
4580 || !reg_class_subset_p (rclass
, GENERAL_REGS
)
4581 || GET_MODE_SIZE (mode
) > UNITS_PER_WORD
4582 || !s390_check_symref_alignment (XEXP (x
, 0),
4583 GET_MODE_SIZE (mode
))))
4585 #define __SECONDARY_RELOAD_CASE(M,m) \
4588 sri->icode = in_p ? CODE_FOR_reload##m##di_toreg_z10 : \
4589 CODE_FOR_reload##m##di_tomem_z10; \
4591 sri->icode = in_p ? CODE_FOR_reload##m##si_toreg_z10 : \
4592 CODE_FOR_reload##m##si_tomem_z10; \
4595 switch (GET_MODE (x
))
4597 __SECONDARY_RELOAD_CASE (QI
, qi
);
4598 __SECONDARY_RELOAD_CASE (HI
, hi
);
4599 __SECONDARY_RELOAD_CASE (SI
, si
);
4600 __SECONDARY_RELOAD_CASE (DI
, di
);
4601 __SECONDARY_RELOAD_CASE (TI
, ti
);
4602 __SECONDARY_RELOAD_CASE (SF
, sf
);
4603 __SECONDARY_RELOAD_CASE (DF
, df
);
4604 __SECONDARY_RELOAD_CASE (TF
, tf
);
4605 __SECONDARY_RELOAD_CASE (SD
, sd
);
4606 __SECONDARY_RELOAD_CASE (DD
, dd
);
4607 __SECONDARY_RELOAD_CASE (TD
, td
);
4608 __SECONDARY_RELOAD_CASE (V1QI
, v1qi
);
4609 __SECONDARY_RELOAD_CASE (V2QI
, v2qi
);
4610 __SECONDARY_RELOAD_CASE (V4QI
, v4qi
);
4611 __SECONDARY_RELOAD_CASE (V8QI
, v8qi
);
4612 __SECONDARY_RELOAD_CASE (V16QI
, v16qi
);
4613 __SECONDARY_RELOAD_CASE (V1HI
, v1hi
);
4614 __SECONDARY_RELOAD_CASE (V2HI
, v2hi
);
4615 __SECONDARY_RELOAD_CASE (V4HI
, v4hi
);
4616 __SECONDARY_RELOAD_CASE (V8HI
, v8hi
);
4617 __SECONDARY_RELOAD_CASE (V1SI
, v1si
);
4618 __SECONDARY_RELOAD_CASE (V2SI
, v2si
);
4619 __SECONDARY_RELOAD_CASE (V4SI
, v4si
);
4620 __SECONDARY_RELOAD_CASE (V1DI
, v1di
);
4621 __SECONDARY_RELOAD_CASE (V2DI
, v2di
);
4622 __SECONDARY_RELOAD_CASE (V1TI
, v1ti
);
4623 __SECONDARY_RELOAD_CASE (V1SF
, v1sf
);
4624 __SECONDARY_RELOAD_CASE (V2SF
, v2sf
);
4625 __SECONDARY_RELOAD_CASE (V4SF
, v4sf
);
4626 __SECONDARY_RELOAD_CASE (V1DF
, v1df
);
4627 __SECONDARY_RELOAD_CASE (V2DF
, v2df
);
4628 __SECONDARY_RELOAD_CASE (V1TF
, v1tf
);
4632 #undef __SECONDARY_RELOAD_CASE
4636 /* We need a scratch register when loading a PLUS expression which
4637 is not a legitimate operand of the LOAD ADDRESS instruction. */
4638 /* LRA can deal with transformation of plus op very well -- so we
4639 don't need to prompt LRA in this case. */
4640 if (! lra_in_progress
&& in_p
&& s390_plus_operand (x
, mode
))
4641 sri
->icode
= (TARGET_64BIT
?
4642 CODE_FOR_reloaddi_plus
: CODE_FOR_reloadsi_plus
);
4644 /* Performing a multiword move from or to memory we have to make sure the
4645 second chunk in memory is addressable without causing a displacement
4646 overflow. If that would be the case we calculate the address in
4647 a scratch register. */
4649 && GET_CODE (XEXP (x
, 0)) == PLUS
4650 && GET_CODE (XEXP (XEXP (x
, 0), 1)) == CONST_INT
4651 && !DISP_IN_RANGE (INTVAL (XEXP (XEXP (x
, 0), 1))
4652 + GET_MODE_SIZE (mode
) - 1))
4654 /* For GENERAL_REGS a displacement overflow is no problem if occurring
4655 in a s_operand address since we may fallback to lm/stm. So we only
4656 have to care about overflows in the b+i+d case. */
4657 if ((reg_classes_intersect_p (GENERAL_REGS
, rclass
)
4658 && s390_class_max_nregs (GENERAL_REGS
, mode
) > 1
4659 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == PLUS
)
4660 /* For FP_REGS no lm/stm is available so this check is triggered
4661 for displacement overflows in b+i+d and b+d like addresses. */
4662 || (reg_classes_intersect_p (FP_REGS
, rclass
)
4663 && s390_class_max_nregs (FP_REGS
, mode
) > 1))
4666 sri
->icode
= (TARGET_64BIT
?
4667 CODE_FOR_reloaddi_la_in
:
4668 CODE_FOR_reloadsi_la_in
);
4670 sri
->icode
= (TARGET_64BIT
?
4671 CODE_FOR_reloaddi_la_out
:
4672 CODE_FOR_reloadsi_la_out
);
4676 /* A scratch address register is needed when a symbolic constant is
4677 copied to r0 compiling with -fPIC. In other cases the target
4678 register might be used as temporary (see legitimize_pic_address). */
4679 if (in_p
&& SYMBOLIC_CONST (x
) && flag_pic
== 2 && rclass
!= ADDR_REGS
)
4680 sri
->icode
= (TARGET_64BIT
?
4681 CODE_FOR_reloaddi_PIC_addr
:
4682 CODE_FOR_reloadsi_PIC_addr
);
4684 /* Either scratch or no register needed. */
4688 /* Implement TARGET_SECONDARY_MEMORY_NEEDED.
4690 We need secondary memory to move data between GPRs and FPRs.
4692 - With DFP the ldgr lgdr instructions are available. Due to the
4693 different alignment we cannot use them for SFmode. For 31 bit a
4694 64 bit value in GPR would be a register pair so here we still
4695 need to go via memory.
4697 - With z13 we can do the SF/SImode moves with vlgvf. Due to the
4698 overlapping of FPRs and VRs we still disallow TF/TD modes to be
4699 in full VRs so as before also on z13 we do these moves via
4702 FIXME: Should we try splitting it into two vlgvg's/vlvg's instead? */
4705 s390_secondary_memory_needed (machine_mode mode
,
4706 reg_class_t class1
, reg_class_t class2
)
4708 return (((reg_classes_intersect_p (class1
, VEC_REGS
)
4709 && reg_classes_intersect_p (class2
, GENERAL_REGS
))
4710 || (reg_classes_intersect_p (class1
, GENERAL_REGS
)
4711 && reg_classes_intersect_p (class2
, VEC_REGS
)))
4712 && (TARGET_TPF
|| !TARGET_DFP
|| !TARGET_64BIT
4713 || GET_MODE_SIZE (mode
) != 8)
4714 && (!TARGET_VX
|| (SCALAR_FLOAT_MODE_P (mode
)
4715 && GET_MODE_SIZE (mode
) > 8)));
4718 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
4720 get_secondary_mem widens its argument to BITS_PER_WORD which loses on 64bit
4721 because the movsi and movsf patterns don't handle r/f moves. */
4724 s390_secondary_memory_needed_mode (machine_mode mode
)
4726 if (GET_MODE_BITSIZE (mode
) < 32)
4727 return mode_for_size (32, GET_MODE_CLASS (mode
), 0).require ();
4731 /* Generate code to load SRC, which is PLUS that is not a
4732 legitimate operand for the LA instruction, into TARGET.
4733 SCRATCH may be used as scratch register. */
4736 s390_expand_plus_operand (rtx target
, rtx src
,
4740 struct s390_address ad
;
4742 /* src must be a PLUS; get its two operands. */
4743 gcc_assert (GET_CODE (src
) == PLUS
);
4744 gcc_assert (GET_MODE (src
) == Pmode
);
4746 /* Check if any of the two operands is already scheduled
4747 for replacement by reload. This can happen e.g. when
4748 float registers occur in an address. */
4749 sum1
= find_replacement (&XEXP (src
, 0));
4750 sum2
= find_replacement (&XEXP (src
, 1));
4751 src
= gen_rtx_PLUS (Pmode
, sum1
, sum2
);
4753 /* If the address is already strictly valid, there's nothing to do. */
4754 if (!s390_decompose_address (src
, &ad
)
4755 || (ad
.base
&& !REGNO_OK_FOR_BASE_P (REGNO (ad
.base
)))
4756 || (ad
.indx
&& !REGNO_OK_FOR_INDEX_P (REGNO (ad
.indx
))))
4758 /* Otherwise, one of the operands cannot be an address register;
4759 we reload its value into the scratch register. */
4760 if (true_regnum (sum1
) < 1 || true_regnum (sum1
) > 15)
4762 emit_move_insn (scratch
, sum1
);
4765 if (true_regnum (sum2
) < 1 || true_regnum (sum2
) > 15)
4767 emit_move_insn (scratch
, sum2
);
4771 /* According to the way these invalid addresses are generated
4772 in reload.c, it should never happen (at least on s390) that
4773 *neither* of the PLUS components, after find_replacements
4774 was applied, is an address register. */
4775 if (sum1
== scratch
&& sum2
== scratch
)
4781 src
= gen_rtx_PLUS (Pmode
, sum1
, sum2
);
4784 /* Emit the LOAD ADDRESS pattern. Note that reload of PLUS
4785 is only ever performed on addresses, so we can mark the
4786 sum as legitimate for LA in any case. */
4787 s390_load_address (target
, src
);
4791 /* Return true if ADDR is a valid memory address.
4792 STRICT specifies whether strict register checking applies. */
4795 s390_legitimate_address_p (machine_mode mode
, rtx addr
, bool strict
)
4797 struct s390_address ad
;
4800 && larl_operand (addr
, VOIDmode
)
4801 && (mode
== VOIDmode
4802 || s390_check_symref_alignment (addr
, GET_MODE_SIZE (mode
))))
4805 if (!s390_decompose_address (addr
, &ad
))
4808 /* The vector memory instructions only support short displacements.
4809 Reject invalid displacements early to prevent plenty of lay
4810 instructions to be generated later which then cannot be merged
4813 && VECTOR_MODE_P (mode
)
4814 && ad
.disp
!= NULL_RTX
4815 && CONST_INT_P (ad
.disp
)
4816 && !SHORT_DISP_IN_RANGE (INTVAL (ad
.disp
)))
4821 if (ad
.base
&& !REGNO_OK_FOR_BASE_P (REGNO (ad
.base
)))
4824 if (ad
.indx
&& !REGNO_OK_FOR_INDEX_P (REGNO (ad
.indx
)))
4830 && !(REGNO (ad
.base
) >= FIRST_PSEUDO_REGISTER
4831 || REGNO_REG_CLASS (REGNO (ad
.base
)) == ADDR_REGS
))
4835 && !(REGNO (ad
.indx
) >= FIRST_PSEUDO_REGISTER
4836 || REGNO_REG_CLASS (REGNO (ad
.indx
)) == ADDR_REGS
))
4842 /* Return true if OP is a valid operand for the LA instruction.
4843 In 31-bit, we need to prove that the result is used as an
4844 address, as LA performs only a 31-bit addition. */
4847 legitimate_la_operand_p (rtx op
)
4849 struct s390_address addr
;
4850 if (!s390_decompose_address (op
, &addr
))
4853 return (TARGET_64BIT
|| addr
.pointer
);
4856 /* Return true if it is valid *and* preferable to use LA to
4857 compute the sum of OP1 and OP2. */
4860 preferred_la_operand_p (rtx op1
, rtx op2
)
4862 struct s390_address addr
;
4864 if (op2
!= const0_rtx
)
4865 op1
= gen_rtx_PLUS (Pmode
, op1
, op2
);
4867 if (!s390_decompose_address (op1
, &addr
))
4869 if (addr
.base
&& !REGNO_OK_FOR_BASE_P (REGNO (addr
.base
)))
4871 if (addr
.indx
&& !REGNO_OK_FOR_INDEX_P (REGNO (addr
.indx
)))
4874 /* Avoid LA instructions with index (and base) register on z196 or
4875 later; it is preferable to use regular add instructions when
4876 possible. Starting with zEC12 the la with index register is
4877 "uncracked" again but still slower than a regular add. */
4878 if (addr
.indx
&& s390_tune
>= PROCESSOR_2817_Z196
)
4881 if (!TARGET_64BIT
&& !addr
.pointer
)
4887 if ((addr
.base
&& REG_P (addr
.base
) && REG_POINTER (addr
.base
))
4888 || (addr
.indx
&& REG_P (addr
.indx
) && REG_POINTER (addr
.indx
)))
4894 /* Emit a forced load-address operation to load SRC into DST.
4895 This will use the LOAD ADDRESS instruction even in situations
4896 where legitimate_la_operand_p (SRC) returns false. */
4899 s390_load_address (rtx dst
, rtx src
)
4902 emit_move_insn (dst
, src
);
4904 emit_insn (gen_force_la_31 (dst
, src
));
4907 /* Return true if it ok to use SYMBOL_REF in a relative address. */
4910 s390_rel_address_ok_p (rtx symbol_ref
)
4914 if (symbol_ref
== s390_got_symbol () || CONSTANT_POOL_ADDRESS_P (symbol_ref
))
4917 decl
= SYMBOL_REF_DECL (symbol_ref
);
4919 if (!flag_pic
|| SYMBOL_REF_LOCAL_P (symbol_ref
))
4920 return (s390_pic_data_is_text_relative
4922 && TREE_CODE (decl
) == FUNCTION_DECL
));
4927 /* Return a legitimate reference for ORIG (an address) using the
4928 register REG. If REG is 0, a new pseudo is generated.
4930 There are two types of references that must be handled:
4932 1. Global data references must load the address from the GOT, via
4933 the PIC reg. An insn is emitted to do this load, and the reg is
4936 2. Static data references, constant pool addresses, and code labels
4937 compute the address as an offset from the GOT, whose base is in
4938 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
4939 differentiate them from global data objects. The returned
4940 address is the PIC reg + an unspec constant.
4942 TARGET_LEGITIMIZE_ADDRESS_P rejects symbolic references unless the PIC
4943 reg also appears in the address. */
4946 legitimize_pic_address (rtx orig
, rtx reg
)
4949 rtx addend
= const0_rtx
;
4952 gcc_assert (!TLS_SYMBOLIC_CONST (addr
));
4954 if (GET_CODE (addr
) == CONST
)
4955 addr
= XEXP (addr
, 0);
4957 if (GET_CODE (addr
) == PLUS
)
4959 addend
= XEXP (addr
, 1);
4960 addr
= XEXP (addr
, 0);
4963 if ((GET_CODE (addr
) == LABEL_REF
4964 || (SYMBOL_REF_P (addr
) && s390_rel_address_ok_p (addr
))
4965 || (GET_CODE (addr
) == UNSPEC
&&
4966 (XINT (addr
, 1) == UNSPEC_GOTENT
4967 || XINT (addr
, 1) == UNSPEC_PLT31
)))
4968 && GET_CODE (addend
) == CONST_INT
)
4970 /* This can be locally addressed. */
4972 /* larl_operand requires UNSPECs to be wrapped in a const rtx. */
4973 rtx const_addr
= (GET_CODE (addr
) == UNSPEC
?
4974 gen_rtx_CONST (Pmode
, addr
) : addr
);
4976 if (larl_operand (const_addr
, VOIDmode
)
4977 && INTVAL (addend
) < HOST_WIDE_INT_1
<< 31
4978 && INTVAL (addend
) >= -(HOST_WIDE_INT_1
<< 31))
4980 if (INTVAL (addend
) & 1)
4982 /* LARL can't handle odd offsets, so emit a pair of LARL
4984 rtx temp
= reg
? reg
: gen_reg_rtx (Pmode
);
4986 if (!DISP_IN_RANGE (INTVAL (addend
)))
4988 HOST_WIDE_INT even
= INTVAL (addend
) - 1;
4989 addr
= gen_rtx_PLUS (Pmode
, addr
, GEN_INT (even
));
4990 addr
= gen_rtx_CONST (Pmode
, addr
);
4991 addend
= const1_rtx
;
4994 emit_move_insn (temp
, addr
);
4995 new_rtx
= gen_rtx_PLUS (Pmode
, temp
, addend
);
4999 s390_load_address (reg
, new_rtx
);
5005 /* If the offset is even, we can just use LARL. This
5006 will happen automatically. */
5011 /* No larl - Access local symbols relative to the GOT. */
5013 rtx temp
= reg
? reg
: gen_reg_rtx (Pmode
);
5015 if (reload_in_progress
|| reload_completed
)
5016 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
5018 addr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTOFF
);
5019 if (addend
!= const0_rtx
)
5020 addr
= gen_rtx_PLUS (Pmode
, addr
, addend
);
5021 addr
= gen_rtx_CONST (Pmode
, addr
);
5022 addr
= force_const_mem (Pmode
, addr
);
5023 emit_move_insn (temp
, addr
);
5025 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, temp
);
5028 s390_load_address (reg
, new_rtx
);
5033 else if (GET_CODE (addr
) == SYMBOL_REF
&& addend
== const0_rtx
)
5035 /* A non-local symbol reference without addend.
5037 The symbol ref is wrapped into an UNSPEC to make sure the
5038 proper operand modifier (@GOT or @GOTENT) will be emitted.
5039 This will tell the linker to put the symbol into the GOT.
5041 Additionally the code dereferencing the GOT slot is emitted here.
5043 An addend to the symref needs to be added afterwards.
5044 legitimize_pic_address calls itself recursively to handle
5045 that case. So no need to do it here. */
5048 reg
= gen_reg_rtx (Pmode
);
5052 /* Use load relative if possible.
5053 lgrl <target>, sym@GOTENT */
5054 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTENT
);
5055 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
5056 new_rtx
= gen_const_mem (GET_MODE (reg
), new_rtx
);
5058 emit_move_insn (reg
, new_rtx
);
5061 else if (flag_pic
== 1)
5063 /* Assume GOT offset is a valid displacement operand (< 4k
5064 or < 512k with z990). This is handled the same way in
5065 both 31- and 64-bit code (@GOT).
5066 lg <target>, sym@GOT(r12) */
5068 if (reload_in_progress
|| reload_completed
)
5069 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
5071 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOT
);
5072 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
5073 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
5074 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
5075 emit_move_insn (reg
, new_rtx
);
5080 /* If the GOT offset might be >= 4k, we determine the position
5081 of the GOT entry via a PC-relative LARL (@GOTENT).
5082 larl temp, sym@GOTENT
5083 lg <target>, 0(temp) */
5085 rtx temp
= reg
? reg
: gen_reg_rtx (Pmode
);
5087 gcc_assert (REGNO (temp
) >= FIRST_PSEUDO_REGISTER
5088 || REGNO_REG_CLASS (REGNO (temp
)) == ADDR_REGS
);
5090 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTENT
);
5091 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
5092 emit_move_insn (temp
, new_rtx
);
5093 new_rtx
= gen_const_mem (Pmode
, temp
);
5094 emit_move_insn (reg
, new_rtx
);
5099 else if (GET_CODE (addr
) == UNSPEC
&& GET_CODE (addend
) == CONST_INT
)
5101 gcc_assert (XVECLEN (addr
, 0) == 1);
5102 switch (XINT (addr
, 1))
5104 /* These address symbols (or PLT slots) relative to the GOT
5105 (not GOT slots!). In general this will exceed the
5106 displacement range so these value belong into the literal
5110 new_rtx
= force_const_mem (Pmode
, orig
);
5113 /* For -fPIC the GOT size might exceed the displacement
5114 range so make sure the value is in the literal pool. */
5117 new_rtx
= force_const_mem (Pmode
, orig
);
5120 /* For @GOTENT larl is used. This is handled like local
5126 /* For @PLT larl is used. This is handled like local
5132 /* Everything else cannot happen. */
5137 else if (addend
!= const0_rtx
)
5139 /* Otherwise, compute the sum. */
5141 rtx base
= legitimize_pic_address (addr
, reg
);
5142 new_rtx
= legitimize_pic_address (addend
,
5143 base
== reg
? NULL_RTX
: reg
);
5144 if (GET_CODE (new_rtx
) == CONST_INT
)
5145 new_rtx
= plus_constant (Pmode
, base
, INTVAL (new_rtx
));
5148 if (GET_CODE (new_rtx
) == PLUS
&& CONSTANT_P (XEXP (new_rtx
, 1)))
5150 base
= gen_rtx_PLUS (Pmode
, base
, XEXP (new_rtx
, 0));
5151 new_rtx
= XEXP (new_rtx
, 1);
5153 new_rtx
= gen_rtx_PLUS (Pmode
, base
, new_rtx
);
5156 if (GET_CODE (new_rtx
) == CONST
)
5157 new_rtx
= XEXP (new_rtx
, 0);
5158 new_rtx
= force_operand (new_rtx
, 0);
5164 /* Load the thread pointer into a register. */
5167 s390_get_thread_pointer (void)
5169 rtx tp
= gen_reg_rtx (Pmode
);
5171 emit_insn (gen_get_thread_pointer (Pmode
, tp
));
5173 mark_reg_pointer (tp
, BITS_PER_WORD
);
5178 /* Emit a tls call insn. The call target is the SYMBOL_REF stored
5179 in s390_tls_symbol which always refers to __tls_get_offset.
5180 The returned offset is written to RESULT_REG and an USE rtx is
5181 generated for TLS_CALL. */
5183 static GTY(()) rtx s390_tls_symbol
;
5186 s390_emit_tls_call_insn (rtx result_reg
, rtx tls_call
)
5191 emit_insn (s390_load_got ());
5193 if (!s390_tls_symbol
)
5195 s390_tls_symbol
= gen_rtx_SYMBOL_REF (Pmode
, "__tls_get_offset");
5196 SYMBOL_REF_FLAGS (s390_tls_symbol
) |= SYMBOL_FLAG_FUNCTION
;
5199 insn
= s390_emit_call (s390_tls_symbol
, tls_call
, result_reg
,
5200 gen_rtx_REG (Pmode
, RETURN_REGNUM
));
5202 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), result_reg
);
5203 RTL_CONST_CALL_P (insn
) = 1;
5206 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
5207 this (thread-local) address. REG may be used as temporary. */
5210 legitimize_tls_address (rtx addr
, rtx reg
)
5212 rtx new_rtx
, tls_call
, temp
, base
, r2
;
5215 if (GET_CODE (addr
) == SYMBOL_REF
)
5216 switch (tls_symbolic_operand (addr
))
5218 case TLS_MODEL_GLOBAL_DYNAMIC
:
5220 r2
= gen_rtx_REG (Pmode
, 2);
5221 tls_call
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_TLSGD
);
5222 new_rtx
= gen_rtx_CONST (Pmode
, tls_call
);
5223 new_rtx
= force_const_mem (Pmode
, new_rtx
);
5224 emit_move_insn (r2
, new_rtx
);
5225 s390_emit_tls_call_insn (r2
, tls_call
);
5226 insn
= get_insns ();
5229 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_NTPOFF
);
5230 temp
= gen_reg_rtx (Pmode
);
5231 emit_libcall_block (insn
, temp
, r2
, new_rtx
);
5233 new_rtx
= gen_rtx_PLUS (Pmode
, s390_get_thread_pointer (), temp
);
5236 s390_load_address (reg
, new_rtx
);
5241 case TLS_MODEL_LOCAL_DYNAMIC
:
5243 r2
= gen_rtx_REG (Pmode
, 2);
5244 tls_call
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_TLSLDM
);
5245 new_rtx
= gen_rtx_CONST (Pmode
, tls_call
);
5246 new_rtx
= force_const_mem (Pmode
, new_rtx
);
5247 emit_move_insn (r2
, new_rtx
);
5248 s390_emit_tls_call_insn (r2
, tls_call
);
5249 insn
= get_insns ();
5252 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, const0_rtx
), UNSPEC_TLSLDM_NTPOFF
);
5253 temp
= gen_reg_rtx (Pmode
);
5254 emit_libcall_block (insn
, temp
, r2
, new_rtx
);
5256 new_rtx
= gen_rtx_PLUS (Pmode
, s390_get_thread_pointer (), temp
);
5257 base
= gen_reg_rtx (Pmode
);
5258 s390_load_address (base
, new_rtx
);
5260 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_DTPOFF
);
5261 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
5262 new_rtx
= force_const_mem (Pmode
, new_rtx
);
5263 temp
= gen_reg_rtx (Pmode
);
5264 emit_move_insn (temp
, new_rtx
);
5266 new_rtx
= gen_rtx_PLUS (Pmode
, base
, temp
);
5269 s390_load_address (reg
, new_rtx
);
5274 case TLS_MODEL_INITIAL_EXEC
:
5277 /* Assume GOT offset < 4k. This is handled the same way
5278 in both 31- and 64-bit code. */
5280 if (reload_in_progress
|| reload_completed
)
5281 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM
, true);
5283 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_GOTNTPOFF
);
5284 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
5285 new_rtx
= gen_rtx_PLUS (Pmode
, pic_offset_table_rtx
, new_rtx
);
5286 new_rtx
= gen_const_mem (Pmode
, new_rtx
);
5287 temp
= gen_reg_rtx (Pmode
);
5288 emit_move_insn (temp
, new_rtx
);
5292 /* If the GOT offset might be >= 4k, we determine the position
5293 of the GOT entry via a PC-relative LARL. */
5295 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_INDNTPOFF
);
5296 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
5297 temp
= gen_reg_rtx (Pmode
);
5298 emit_move_insn (temp
, new_rtx
);
5300 new_rtx
= gen_const_mem (Pmode
, temp
);
5301 temp
= gen_reg_rtx (Pmode
);
5302 emit_move_insn (temp
, new_rtx
);
5305 new_rtx
= gen_rtx_PLUS (Pmode
, s390_get_thread_pointer (), temp
);
5308 s390_load_address (reg
, new_rtx
);
5313 case TLS_MODEL_LOCAL_EXEC
:
5314 new_rtx
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, addr
), UNSPEC_NTPOFF
);
5315 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
5316 new_rtx
= force_const_mem (Pmode
, new_rtx
);
5317 temp
= gen_reg_rtx (Pmode
);
5318 emit_move_insn (temp
, new_rtx
);
5320 new_rtx
= gen_rtx_PLUS (Pmode
, s390_get_thread_pointer (), temp
);
5323 s390_load_address (reg
, new_rtx
);
5332 else if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == UNSPEC
)
5334 switch (XINT (XEXP (addr
, 0), 1))
5337 case UNSPEC_INDNTPOFF
:
5346 else if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == PLUS
5347 && GET_CODE (XEXP (XEXP (addr
, 0), 1)) == CONST_INT
)
5349 new_rtx
= XEXP (XEXP (addr
, 0), 0);
5350 if (GET_CODE (new_rtx
) != SYMBOL_REF
)
5351 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
5353 new_rtx
= legitimize_tls_address (new_rtx
, reg
);
5354 new_rtx
= plus_constant (Pmode
, new_rtx
,
5355 INTVAL (XEXP (XEXP (addr
, 0), 1)));
5356 new_rtx
= force_operand (new_rtx
, 0);
5359 /* (const (neg (unspec (symbol_ref)))) -> (neg (const (unspec (symbol_ref)))) */
5360 else if (GET_CODE (addr
) == CONST
&& GET_CODE (XEXP (addr
, 0)) == NEG
)
5362 new_rtx
= XEXP (XEXP (addr
, 0), 0);
5363 if (GET_CODE (new_rtx
) != SYMBOL_REF
)
5364 new_rtx
= gen_rtx_CONST (Pmode
, new_rtx
);
5366 new_rtx
= legitimize_tls_address (new_rtx
, reg
);
5367 new_rtx
= gen_rtx_NEG (Pmode
, new_rtx
);
5368 new_rtx
= force_operand (new_rtx
, 0);
5372 gcc_unreachable (); /* for now ... */
5377 /* Emit insns making the address in operands[1] valid for a standard
5378 move to operands[0]. operands[1] is replaced by an address which
5379 should be used instead of the former RTX to emit the move
5383 emit_symbolic_move (rtx
*operands
)
5385 rtx temp
= !can_create_pseudo_p () ? operands
[0] : gen_reg_rtx (Pmode
);
5387 if (GET_CODE (operands
[0]) == MEM
)
5388 operands
[1] = force_reg (Pmode
, operands
[1]);
5389 else if (TLS_SYMBOLIC_CONST (operands
[1]))
5390 operands
[1] = legitimize_tls_address (operands
[1], temp
);
5392 operands
[1] = legitimize_pic_address (operands
[1], temp
);
5395 /* Try machine-dependent ways of modifying an illegitimate address X
5396 to be legitimate. If we find one, return the new, valid address.
5398 OLDX is the address as it was before break_out_memory_refs was called.
5399 In some cases it is useful to look at this to decide what needs to be done.
5401 MODE is the mode of the operand pointed to by X.
5403 When -fpic is used, special handling is needed for symbolic references.
5404 See comments by legitimize_pic_address for details. */
5407 s390_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
5408 machine_mode mode ATTRIBUTE_UNUSED
)
5410 rtx constant_term
= const0_rtx
;
5412 if (TLS_SYMBOLIC_CONST (x
))
5414 x
= legitimize_tls_address (x
, 0);
5416 if (s390_legitimate_address_p (mode
, x
, FALSE
))
5419 else if (GET_CODE (x
) == PLUS
5420 && (TLS_SYMBOLIC_CONST (XEXP (x
, 0))
5421 || TLS_SYMBOLIC_CONST (XEXP (x
, 1))))
5427 if (SYMBOLIC_CONST (x
)
5428 || (GET_CODE (x
) == PLUS
5429 && (SYMBOLIC_CONST (XEXP (x
, 0))
5430 || SYMBOLIC_CONST (XEXP (x
, 1)))))
5431 x
= legitimize_pic_address (x
, 0);
5433 if (s390_legitimate_address_p (mode
, x
, FALSE
))
5437 x
= eliminate_constant_term (x
, &constant_term
);
5439 /* Optimize loading of large displacements by splitting them
5440 into the multiple of 4K and the rest; this allows the
5441 former to be CSE'd if possible.
5443 Don't do this if the displacement is added to a register
5444 pointing into the stack frame, as the offsets will
5445 change later anyway. */
5447 if (GET_CODE (constant_term
) == CONST_INT
5448 && !TARGET_LONG_DISPLACEMENT
5449 && !DISP_IN_RANGE (INTVAL (constant_term
))
5450 && !(REG_P (x
) && REGNO_PTR_FRAME_P (REGNO (x
))))
5452 HOST_WIDE_INT lower
= INTVAL (constant_term
) & 0xfff;
5453 HOST_WIDE_INT upper
= INTVAL (constant_term
) ^ lower
;
5455 rtx temp
= gen_reg_rtx (Pmode
);
5456 rtx val
= force_operand (GEN_INT (upper
), temp
);
5458 emit_move_insn (temp
, val
);
5460 x
= gen_rtx_PLUS (Pmode
, x
, temp
);
5461 constant_term
= GEN_INT (lower
);
5464 if (GET_CODE (x
) == PLUS
)
5466 if (GET_CODE (XEXP (x
, 0)) == REG
)
5468 rtx temp
= gen_reg_rtx (Pmode
);
5469 rtx val
= force_operand (XEXP (x
, 1), temp
);
5471 emit_move_insn (temp
, val
);
5473 x
= gen_rtx_PLUS (Pmode
, XEXP (x
, 0), temp
);
5476 else if (GET_CODE (XEXP (x
, 1)) == REG
)
5478 rtx temp
= gen_reg_rtx (Pmode
);
5479 rtx val
= force_operand (XEXP (x
, 0), temp
);
5481 emit_move_insn (temp
, val
);
5483 x
= gen_rtx_PLUS (Pmode
, temp
, XEXP (x
, 1));
5487 if (constant_term
!= const0_rtx
)
5488 x
= gen_rtx_PLUS (Pmode
, x
, constant_term
);
5493 /* Try a machine-dependent way of reloading an illegitimate address AD
5494 operand. If we find one, push the reload and return the new address.
5496 MODE is the mode of the enclosing MEM. OPNUM is the operand number
5497 and TYPE is the reload type of the current reload. */
5500 legitimize_reload_address (rtx ad
, machine_mode mode ATTRIBUTE_UNUSED
,
5501 int opnum
, int type
)
5503 if (!optimize
|| TARGET_LONG_DISPLACEMENT
)
5506 if (GET_CODE (ad
) == PLUS
)
5508 rtx tem
= simplify_binary_operation (PLUS
, Pmode
,
5509 XEXP (ad
, 0), XEXP (ad
, 1));
5514 if (GET_CODE (ad
) == PLUS
5515 && GET_CODE (XEXP (ad
, 0)) == REG
5516 && GET_CODE (XEXP (ad
, 1)) == CONST_INT
5517 && !DISP_IN_RANGE (INTVAL (XEXP (ad
, 1))))
5519 HOST_WIDE_INT lower
= INTVAL (XEXP (ad
, 1)) & 0xfff;
5520 HOST_WIDE_INT upper
= INTVAL (XEXP (ad
, 1)) ^ lower
;
5521 rtx cst
, tem
, new_rtx
;
5523 cst
= GEN_INT (upper
);
5524 if (!legitimate_reload_constant_p (cst
))
5525 cst
= force_const_mem (Pmode
, cst
);
5527 tem
= gen_rtx_PLUS (Pmode
, XEXP (ad
, 0), cst
);
5528 new_rtx
= gen_rtx_PLUS (Pmode
, tem
, GEN_INT (lower
));
5530 push_reload (XEXP (tem
, 1), 0, &XEXP (tem
, 1), 0,
5531 BASE_REG_CLASS
, Pmode
, VOIDmode
, 0, 0,
5532 opnum
, (enum reload_type
) type
);
5539 /* Emit code to move LEN bytes from DST to SRC. */
5542 s390_expand_cpymem (rtx dst
, rtx src
, rtx len
)
5544 /* When tuning for z10 or higher we rely on the Glibc functions to
5545 do the right thing. Only for constant lengths below 64k we will
5546 generate inline code. */
5547 if (s390_tune
>= PROCESSOR_2097_Z10
5548 && (GET_CODE (len
) != CONST_INT
|| INTVAL (len
) > (1<<16)))
5551 /* Expand memcpy for constant length operands without a loop if it
5552 is shorter that way.
5554 With a constant length argument a
5555 memcpy loop (without pfd) is 36 bytes -> 6 * mvc */
5556 if (GET_CODE (len
) == CONST_INT
5557 && INTVAL (len
) >= 0
5558 && INTVAL (len
) <= 256 * 6
5559 && (!TARGET_MVCLE
|| INTVAL (len
) <= 256))
5563 for (l
= INTVAL (len
), o
= 0; l
> 0; l
-= 256, o
+= 256)
5565 rtx newdst
= adjust_address (dst
, BLKmode
, o
);
5566 rtx newsrc
= adjust_address (src
, BLKmode
, o
);
5567 emit_insn (gen_cpymem_short (newdst
, newsrc
,
5568 GEN_INT (l
> 256 ? 255 : l
- 1)));
5572 else if (TARGET_MVCLE
)
5574 emit_insn (gen_cpymem_long (dst
, src
, convert_to_mode (Pmode
, len
, 1)));
5579 rtx dst_addr
, src_addr
, count
, blocks
, temp
;
5580 rtx_code_label
*loop_start_label
= gen_label_rtx ();
5581 rtx_code_label
*loop_end_label
= gen_label_rtx ();
5582 rtx_code_label
*end_label
= gen_label_rtx ();
5585 mode
= GET_MODE (len
);
5586 if (mode
== VOIDmode
)
5589 dst_addr
= gen_reg_rtx (Pmode
);
5590 src_addr
= gen_reg_rtx (Pmode
);
5591 count
= gen_reg_rtx (mode
);
5592 blocks
= gen_reg_rtx (mode
);
5594 convert_move (count
, len
, 1);
5595 emit_cmp_and_jump_insns (count
, const0_rtx
,
5596 EQ
, NULL_RTX
, mode
, 1, end_label
);
5598 emit_move_insn (dst_addr
, force_operand (XEXP (dst
, 0), NULL_RTX
));
5599 emit_move_insn (src_addr
, force_operand (XEXP (src
, 0), NULL_RTX
));
5600 dst
= change_address (dst
, VOIDmode
, dst_addr
);
5601 src
= change_address (src
, VOIDmode
, src_addr
);
5603 temp
= expand_binop (mode
, add_optab
, count
, constm1_rtx
, count
, 1,
5606 emit_move_insn (count
, temp
);
5608 temp
= expand_binop (mode
, lshr_optab
, count
, GEN_INT (8), blocks
, 1,
5611 emit_move_insn (blocks
, temp
);
5613 emit_cmp_and_jump_insns (blocks
, const0_rtx
,
5614 EQ
, NULL_RTX
, mode
, 1, loop_end_label
);
5616 emit_label (loop_start_label
);
5619 && (GET_CODE (len
) != CONST_INT
|| INTVAL (len
) > 768))
5623 /* Issue a read prefetch for the +3 cache line. */
5624 prefetch
= gen_prefetch (gen_rtx_PLUS (Pmode
, src_addr
, GEN_INT (768)),
5625 const0_rtx
, const0_rtx
);
5626 PREFETCH_SCHEDULE_BARRIER_P (prefetch
) = true;
5627 emit_insn (prefetch
);
5629 /* Issue a write prefetch for the +3 cache line. */
5630 prefetch
= gen_prefetch (gen_rtx_PLUS (Pmode
, dst_addr
, GEN_INT (768)),
5631 const1_rtx
, const0_rtx
);
5632 PREFETCH_SCHEDULE_BARRIER_P (prefetch
) = true;
5633 emit_insn (prefetch
);
5636 emit_insn (gen_cpymem_short (dst
, src
, GEN_INT (255)));
5637 s390_load_address (dst_addr
,
5638 gen_rtx_PLUS (Pmode
, dst_addr
, GEN_INT (256)));
5639 s390_load_address (src_addr
,
5640 gen_rtx_PLUS (Pmode
, src_addr
, GEN_INT (256)));
5642 temp
= expand_binop (mode
, add_optab
, blocks
, constm1_rtx
, blocks
, 1,
5645 emit_move_insn (blocks
, temp
);
5647 emit_cmp_and_jump_insns (blocks
, const0_rtx
,
5648 EQ
, NULL_RTX
, mode
, 1, loop_end_label
);
5650 emit_jump (loop_start_label
);
5651 emit_label (loop_end_label
);
5653 emit_insn (gen_cpymem_short (dst
, src
,
5654 convert_to_mode (Pmode
, count
, 1)));
5655 emit_label (end_label
);
5660 /* Emit code to set LEN bytes at DST to VAL.
5661 Make use of clrmem if VAL is zero. */
5664 s390_expand_setmem (rtx dst
, rtx len
, rtx val
)
5666 if (GET_CODE (len
) == CONST_INT
&& INTVAL (len
) <= 0)
5669 gcc_assert (GET_CODE (val
) == CONST_INT
|| GET_MODE (val
) == QImode
);
5671 /* Expand setmem/clrmem for a constant length operand without a
5672 loop if it will be shorter that way.
5673 clrmem loop (with PFD) is 30 bytes -> 5 * xc
5674 clrmem loop (without PFD) is 24 bytes -> 4 * xc
5675 setmem loop (with PFD) is 38 bytes -> ~4 * (mvi/stc + mvc)
5676 setmem loop (without PFD) is 32 bytes -> ~4 * (mvi/stc + mvc) */
5677 if (GET_CODE (len
) == CONST_INT
5678 && ((val
== const0_rtx
5679 && (INTVAL (len
) <= 256 * 4
5680 || (INTVAL (len
) <= 256 * 5 && TARGET_SETMEM_PFD(val
,len
))))
5681 || (val
!= const0_rtx
&& INTVAL (len
) <= 257 * 4))
5682 && (!TARGET_MVCLE
|| INTVAL (len
) <= 256))
5686 if (val
== const0_rtx
)
5687 /* clrmem: emit 256 byte blockwise XCs. */
5688 for (l
= INTVAL (len
), o
= 0; l
> 0; l
-= 256, o
+= 256)
5690 rtx newdst
= adjust_address (dst
, BLKmode
, o
);
5691 emit_insn (gen_clrmem_short (newdst
,
5692 GEN_INT (l
> 256 ? 255 : l
- 1)));
5695 /* setmem: emit 1(mvi) + 256(mvc) byte blockwise memsets by
5696 setting first byte to val and using a 256 byte mvc with one
5697 byte overlap to propagate the byte. */
5698 for (l
= INTVAL (len
), o
= 0; l
> 0; l
-= 257, o
+= 257)
5700 rtx newdst
= adjust_address (dst
, BLKmode
, o
);
5701 emit_move_insn (adjust_address (dst
, QImode
, o
), val
);
5704 rtx newdstp1
= adjust_address (dst
, BLKmode
, o
+ 1);
5705 emit_insn (gen_cpymem_short (newdstp1
, newdst
,
5706 GEN_INT (l
> 257 ? 255 : l
- 2)));
5711 else if (TARGET_MVCLE
)
5713 val
= force_not_mem (convert_modes (Pmode
, QImode
, val
, 1));
5715 emit_insn (gen_setmem_long_di (dst
, convert_to_mode (Pmode
, len
, 1),
5718 emit_insn (gen_setmem_long_si (dst
, convert_to_mode (Pmode
, len
, 1),
5724 rtx dst_addr
, count
, blocks
, temp
, dstp1
= NULL_RTX
;
5725 rtx_code_label
*loop_start_label
= gen_label_rtx ();
5726 rtx_code_label
*onebyte_end_label
= gen_label_rtx ();
5727 rtx_code_label
*zerobyte_end_label
= gen_label_rtx ();
5728 rtx_code_label
*restbyte_end_label
= gen_label_rtx ();
5731 mode
= GET_MODE (len
);
5732 if (mode
== VOIDmode
)
5735 dst_addr
= gen_reg_rtx (Pmode
);
5736 count
= gen_reg_rtx (mode
);
5737 blocks
= gen_reg_rtx (mode
);
5739 convert_move (count
, len
, 1);
5740 emit_cmp_and_jump_insns (count
, const0_rtx
,
5741 EQ
, NULL_RTX
, mode
, 1, zerobyte_end_label
,
5742 profile_probability::very_unlikely ());
5744 /* We need to make a copy of the target address since memset is
5745 supposed to return it unmodified. We have to make it here
5746 already since the new reg is used at onebyte_end_label. */
5747 emit_move_insn (dst_addr
, force_operand (XEXP (dst
, 0), NULL_RTX
));
5748 dst
= change_address (dst
, VOIDmode
, dst_addr
);
5750 if (val
!= const0_rtx
)
5752 /* When using the overlapping mvc the original target
5753 address is only accessed as single byte entity (even by
5754 the mvc reading this value). */
5755 set_mem_size (dst
, 1);
5756 dstp1
= adjust_address (dst
, VOIDmode
, 1);
5757 emit_cmp_and_jump_insns (count
,
5758 const1_rtx
, EQ
, NULL_RTX
, mode
, 1,
5760 profile_probability::very_unlikely ());
5763 /* There is one unconditional (mvi+mvc)/xc after the loop
5764 dealing with the rest of the bytes, subtracting two (mvi+mvc)
5765 or one (xc) here leaves this number of bytes to be handled by
5767 temp
= expand_binop (mode
, add_optab
, count
,
5768 val
== const0_rtx
? constm1_rtx
: GEN_INT (-2),
5769 count
, 1, OPTAB_DIRECT
);
5771 emit_move_insn (count
, temp
);
5773 temp
= expand_binop (mode
, lshr_optab
, count
, GEN_INT (8), blocks
, 1,
5776 emit_move_insn (blocks
, temp
);
5778 emit_cmp_and_jump_insns (blocks
, const0_rtx
,
5779 EQ
, NULL_RTX
, mode
, 1, restbyte_end_label
);
5781 emit_jump (loop_start_label
);
5783 if (val
!= const0_rtx
)
5785 /* The 1 byte != 0 special case. Not handled efficiently
5786 since we require two jumps for that. However, this
5787 should be very rare. */
5788 emit_label (onebyte_end_label
);
5789 emit_move_insn (adjust_address (dst
, QImode
, 0), val
);
5790 emit_jump (zerobyte_end_label
);
5793 emit_label (loop_start_label
);
5795 if (TARGET_SETMEM_PFD (val
, len
))
5797 /* Issue a write prefetch. */
5798 rtx distance
= GEN_INT (TARGET_SETMEM_PREFETCH_DISTANCE
);
5799 rtx prefetch
= gen_prefetch (gen_rtx_PLUS (Pmode
, dst_addr
, distance
),
5800 const1_rtx
, const0_rtx
);
5801 emit_insn (prefetch
);
5802 PREFETCH_SCHEDULE_BARRIER_P (prefetch
) = true;
5805 if (val
== const0_rtx
)
5806 emit_insn (gen_clrmem_short (dst
, GEN_INT (255)));
5809 /* Set the first byte in the block to the value and use an
5810 overlapping mvc for the block. */
5811 emit_move_insn (adjust_address (dst
, QImode
, 0), val
);
5812 emit_insn (gen_cpymem_short (dstp1
, dst
, GEN_INT (254)));
5814 s390_load_address (dst_addr
,
5815 gen_rtx_PLUS (Pmode
, dst_addr
, GEN_INT (256)));
5817 temp
= expand_binop (mode
, add_optab
, blocks
, constm1_rtx
, blocks
, 1,
5820 emit_move_insn (blocks
, temp
);
5822 emit_cmp_and_jump_insns (blocks
, const0_rtx
,
5823 NE
, NULL_RTX
, mode
, 1, loop_start_label
);
5825 emit_label (restbyte_end_label
);
5827 if (val
== const0_rtx
)
5828 emit_insn (gen_clrmem_short (dst
, convert_to_mode (Pmode
, count
, 1)));
5831 /* Set the first byte in the block to the value and use an
5832 overlapping mvc for the block. */
5833 emit_move_insn (adjust_address (dst
, QImode
, 0), val
);
5834 /* execute only uses the lowest 8 bits of count that's
5835 exactly what we need here. */
5836 emit_insn (gen_cpymem_short (dstp1
, dst
,
5837 convert_to_mode (Pmode
, count
, 1)));
5840 emit_label (zerobyte_end_label
);
5844 /* Emit code to compare LEN bytes at OP0 with those at OP1,
5845 and return the result in TARGET. */
5848 s390_expand_cmpmem (rtx target
, rtx op0
, rtx op1
, rtx len
)
5850 rtx ccreg
= gen_rtx_REG (CCUmode
, CC_REGNUM
);
5853 /* When tuning for z10 or higher we rely on the Glibc functions to
5854 do the right thing. Only for constant lengths below 64k we will
5855 generate inline code. */
5856 if (s390_tune
>= PROCESSOR_2097_Z10
5857 && (GET_CODE (len
) != CONST_INT
|| INTVAL (len
) > (1<<16)))
5860 /* As the result of CMPINT is inverted compared to what we need,
5861 we have to swap the operands. */
5862 tmp
= op0
; op0
= op1
; op1
= tmp
;
5864 if (GET_CODE (len
) == CONST_INT
&& INTVAL (len
) >= 0 && INTVAL (len
) <= 256)
5866 if (INTVAL (len
) > 0)
5868 emit_insn (gen_cmpmem_short (op0
, op1
, GEN_INT (INTVAL (len
) - 1)));
5869 emit_insn (gen_cmpint (target
, ccreg
));
5872 emit_move_insn (target
, const0_rtx
);
5874 else if (TARGET_MVCLE
)
5876 emit_insn (gen_cmpmem_long (op0
, op1
, convert_to_mode (Pmode
, len
, 1)));
5877 emit_insn (gen_cmpint (target
, ccreg
));
5881 rtx addr0
, addr1
, count
, blocks
, temp
;
5882 rtx_code_label
*loop_start_label
= gen_label_rtx ();
5883 rtx_code_label
*loop_end_label
= gen_label_rtx ();
5884 rtx_code_label
*end_label
= gen_label_rtx ();
5887 mode
= GET_MODE (len
);
5888 if (mode
== VOIDmode
)
5891 addr0
= gen_reg_rtx (Pmode
);
5892 addr1
= gen_reg_rtx (Pmode
);
5893 count
= gen_reg_rtx (mode
);
5894 blocks
= gen_reg_rtx (mode
);
5896 convert_move (count
, len
, 1);
5897 emit_cmp_and_jump_insns (count
, const0_rtx
,
5898 EQ
, NULL_RTX
, mode
, 1, end_label
);
5900 emit_move_insn (addr0
, force_operand (XEXP (op0
, 0), NULL_RTX
));
5901 emit_move_insn (addr1
, force_operand (XEXP (op1
, 0), NULL_RTX
));
5902 op0
= change_address (op0
, VOIDmode
, addr0
);
5903 op1
= change_address (op1
, VOIDmode
, addr1
);
5905 temp
= expand_binop (mode
, add_optab
, count
, constm1_rtx
, count
, 1,
5908 emit_move_insn (count
, temp
);
5910 temp
= expand_binop (mode
, lshr_optab
, count
, GEN_INT (8), blocks
, 1,
5913 emit_move_insn (blocks
, temp
);
5915 emit_cmp_and_jump_insns (blocks
, const0_rtx
,
5916 EQ
, NULL_RTX
, mode
, 1, loop_end_label
);
5918 emit_label (loop_start_label
);
5921 && (GET_CODE (len
) != CONST_INT
|| INTVAL (len
) > 512))
5925 /* Issue a read prefetch for the +2 cache line of operand 1. */
5926 prefetch
= gen_prefetch (gen_rtx_PLUS (Pmode
, addr0
, GEN_INT (512)),
5927 const0_rtx
, const0_rtx
);
5928 emit_insn (prefetch
);
5929 PREFETCH_SCHEDULE_BARRIER_P (prefetch
) = true;
5931 /* Issue a read prefetch for the +2 cache line of operand 2. */
5932 prefetch
= gen_prefetch (gen_rtx_PLUS (Pmode
, addr1
, GEN_INT (512)),
5933 const0_rtx
, const0_rtx
);
5934 emit_insn (prefetch
);
5935 PREFETCH_SCHEDULE_BARRIER_P (prefetch
) = true;
5938 emit_insn (gen_cmpmem_short (op0
, op1
, GEN_INT (255)));
5939 temp
= gen_rtx_NE (VOIDmode
, ccreg
, const0_rtx
);
5940 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
5941 gen_rtx_LABEL_REF (VOIDmode
, end_label
), pc_rtx
);
5942 temp
= gen_rtx_SET (pc_rtx
, temp
);
5943 emit_jump_insn (temp
);
5945 s390_load_address (addr0
,
5946 gen_rtx_PLUS (Pmode
, addr0
, GEN_INT (256)));
5947 s390_load_address (addr1
,
5948 gen_rtx_PLUS (Pmode
, addr1
, GEN_INT (256)));
5950 temp
= expand_binop (mode
, add_optab
, blocks
, constm1_rtx
, blocks
, 1,
5953 emit_move_insn (blocks
, temp
);
5955 emit_cmp_and_jump_insns (blocks
, const0_rtx
,
5956 EQ
, NULL_RTX
, mode
, 1, loop_end_label
);
5958 emit_jump (loop_start_label
);
5959 emit_label (loop_end_label
);
5961 emit_insn (gen_cmpmem_short (op0
, op1
,
5962 convert_to_mode (Pmode
, count
, 1)));
5963 emit_label (end_label
);
5965 emit_insn (gen_cmpint (target
, ccreg
));
5970 /* Emit a conditional jump to LABEL for condition code mask MASK using
5971 comparsion operator COMPARISON. Return the emitted jump insn. */
5974 s390_emit_ccraw_jump (HOST_WIDE_INT mask
, enum rtx_code comparison
, rtx label
)
5978 gcc_assert (comparison
== EQ
|| comparison
== NE
);
5979 gcc_assert (mask
> 0 && mask
< 15);
5981 temp
= gen_rtx_fmt_ee (comparison
, VOIDmode
,
5982 gen_rtx_REG (CCRAWmode
, CC_REGNUM
), GEN_INT (mask
));
5983 temp
= gen_rtx_IF_THEN_ELSE (VOIDmode
, temp
,
5984 gen_rtx_LABEL_REF (VOIDmode
, label
), pc_rtx
);
5985 temp
= gen_rtx_SET (pc_rtx
, temp
);
5986 return emit_jump_insn (temp
);
5989 /* Emit the instructions to implement strlen of STRING and store the
5990 result in TARGET. The string has the known ALIGNMENT. This
5991 version uses vector instructions and is therefore not appropriate
5992 for targets prior to z13. */
5995 s390_expand_vec_strlen (rtx target
, rtx string
, rtx alignment
)
5997 rtx highest_index_to_load_reg
= gen_reg_rtx (Pmode
);
5998 rtx str_reg
= gen_reg_rtx (V16QImode
);
5999 rtx str_addr_base_reg
= gen_reg_rtx (Pmode
);
6000 rtx str_idx_reg
= gen_reg_rtx (Pmode
);
6001 rtx result_reg
= gen_reg_rtx (V16QImode
);
6002 rtx is_aligned_label
= gen_label_rtx ();
6003 rtx into_loop_label
= NULL_RTX
;
6004 rtx loop_start_label
= gen_label_rtx ();
6006 rtx len
= gen_reg_rtx (QImode
);
6010 s390_load_address (str_addr_base_reg
, XEXP (string
, 0));
6011 emit_move_insn (str_idx_reg
, const0_rtx
);
6013 if (INTVAL (alignment
) < 16)
6015 /* Check whether the address happens to be aligned properly so
6016 jump directly to the aligned loop. */
6017 emit_cmp_and_jump_insns (gen_rtx_AND (Pmode
,
6018 str_addr_base_reg
, GEN_INT (15)),
6019 const0_rtx
, EQ
, NULL_RTX
,
6020 Pmode
, 1, is_aligned_label
);
6022 temp
= gen_reg_rtx (Pmode
);
6023 temp
= expand_binop (Pmode
, and_optab
, str_addr_base_reg
,
6024 GEN_INT (15), temp
, 1, OPTAB_DIRECT
);
6025 gcc_assert (REG_P (temp
));
6026 highest_index_to_load_reg
=
6027 expand_binop (Pmode
, sub_optab
, GEN_INT (15), temp
,
6028 highest_index_to_load_reg
, 1, OPTAB_DIRECT
);
6029 gcc_assert (REG_P (highest_index_to_load_reg
));
6030 emit_insn (gen_vllv16qi (str_reg
,
6031 convert_to_mode (SImode
, highest_index_to_load_reg
, 1),
6032 gen_rtx_MEM (BLKmode
, str_addr_base_reg
)));
6034 into_loop_label
= gen_label_rtx ();
6035 s390_emit_jump (into_loop_label
, NULL_RTX
);
6039 emit_label (is_aligned_label
);
6040 LABEL_NUSES (is_aligned_label
) = INTVAL (alignment
) < 16 ? 2 : 1;
6042 /* Reaching this point we are only performing 16 bytes aligned
6044 emit_move_insn (highest_index_to_load_reg
, GEN_INT (15));
6046 emit_label (loop_start_label
);
6047 LABEL_NUSES (loop_start_label
) = 1;
6049 /* Load 16 bytes of the string into VR. */
6050 mem
= gen_rtx_MEM (V16QImode
,
6051 gen_rtx_PLUS (Pmode
, str_idx_reg
, str_addr_base_reg
));
6052 set_mem_align (mem
, 128);
6053 emit_move_insn (str_reg
, mem
);
6054 if (into_loop_label
!= NULL_RTX
)
6056 emit_label (into_loop_label
);
6057 LABEL_NUSES (into_loop_label
) = 1;
6060 /* Increment string index by 16 bytes. */
6061 expand_binop (Pmode
, add_optab
, str_idx_reg
, GEN_INT (16),
6062 str_idx_reg
, 1, OPTAB_DIRECT
);
6064 emit_insn (gen_vec_vfenesv16qi (result_reg
, str_reg
, str_reg
,
6065 GEN_INT (VSTRING_FLAG_ZS
| VSTRING_FLAG_CS
)));
6067 add_int_reg_note (s390_emit_ccraw_jump (8, NE
, loop_start_label
),
6069 profile_probability::very_likely ().to_reg_br_prob_note ());
6070 emit_insn (gen_vec_extractv16qiqi (len
, result_reg
, GEN_INT (7)));
6072 /* If the string pointer wasn't aligned we have loaded less then 16
6073 bytes and the remaining bytes got filled with zeros (by vll).
6074 Now we have to check whether the resulting index lies within the
6075 bytes actually part of the string. */
6077 cond
= s390_emit_compare (GT
, convert_to_mode (Pmode
, len
, 1),
6078 highest_index_to_load_reg
);
6079 s390_load_address (highest_index_to_load_reg
,
6080 gen_rtx_PLUS (Pmode
, highest_index_to_load_reg
,
6083 emit_insn (gen_movdicc (str_idx_reg
, cond
,
6084 highest_index_to_load_reg
, str_idx_reg
));
6086 emit_insn (gen_movsicc (str_idx_reg
, cond
,
6087 highest_index_to_load_reg
, str_idx_reg
));
6089 add_reg_br_prob_note (s390_emit_jump (is_aligned_label
, cond
),
6090 profile_probability::very_unlikely ());
6092 expand_binop (Pmode
, add_optab
, str_idx_reg
,
6093 GEN_INT (-16), str_idx_reg
, 1, OPTAB_DIRECT
);
6094 /* FIXME: len is already zero extended - so avoid the llgcr emitted
6096 temp
= expand_binop (Pmode
, add_optab
, str_idx_reg
,
6097 convert_to_mode (Pmode
, len
, 1),
6098 target
, 1, OPTAB_DIRECT
);
6100 emit_move_insn (target
, temp
);
6104 s390_expand_vec_movstr (rtx result
, rtx dst
, rtx src
)
6106 rtx temp
= gen_reg_rtx (Pmode
);
6107 rtx src_addr
= XEXP (src
, 0);
6108 rtx dst_addr
= XEXP (dst
, 0);
6109 rtx src_addr_reg
= gen_reg_rtx (Pmode
);
6110 rtx dst_addr_reg
= gen_reg_rtx (Pmode
);
6111 rtx offset
= gen_reg_rtx (Pmode
);
6112 rtx vsrc
= gen_reg_rtx (V16QImode
);
6113 rtx vpos
= gen_reg_rtx (V16QImode
);
6114 rtx loadlen
= gen_reg_rtx (SImode
);
6115 rtx gpos_qi
= gen_reg_rtx(QImode
);
6116 rtx gpos
= gen_reg_rtx (SImode
);
6117 rtx done_label
= gen_label_rtx ();
6118 rtx loop_label
= gen_label_rtx ();
6119 rtx exit_label
= gen_label_rtx ();
6120 rtx full_label
= gen_label_rtx ();
6122 /* Perform a quick check for string ending on the first up to 16
6123 bytes and exit early if successful. */
6125 emit_insn (gen_vlbb (vsrc
, src
, GEN_INT (6)));
6126 emit_insn (gen_lcbb (loadlen
, src_addr
, GEN_INT (6)));
6127 emit_insn (gen_vfenezv16qi (vpos
, vsrc
, vsrc
));
6128 emit_insn (gen_vec_extractv16qiqi (gpos_qi
, vpos
, GEN_INT (7)));
6129 emit_move_insn (gpos
, gen_rtx_SUBREG (SImode
, gpos_qi
, 0));
6130 /* gpos is the byte index if a zero was found and 16 otherwise.
6131 So if it is lower than the loaded bytes we have a hit. */
6132 emit_cmp_and_jump_insns (gpos
, loadlen
, GE
, NULL_RTX
, SImode
, 1,
6134 emit_insn (gen_vstlv16qi (vsrc
, gpos
, dst
));
6136 force_expand_binop (Pmode
, add_optab
, dst_addr
, gpos
, result
,
6138 emit_jump (exit_label
);
6141 emit_label (full_label
);
6142 LABEL_NUSES (full_label
) = 1;
6144 /* Calculate `offset' so that src + offset points to the last byte
6145 before 16 byte alignment. */
6147 /* temp = src_addr & 0xf */
6148 force_expand_binop (Pmode
, and_optab
, src_addr
, GEN_INT (15), temp
,
6151 /* offset = 0xf - temp */
6152 emit_move_insn (offset
, GEN_INT (15));
6153 force_expand_binop (Pmode
, sub_optab
, offset
, temp
, offset
,
6156 /* Store `offset' bytes in the dstination string. The quick check
6157 has loaded at least `offset' bytes into vsrc. */
6159 emit_insn (gen_vstlv16qi (vsrc
, gen_lowpart (SImode
, offset
), dst
));
6161 /* Advance to the next byte to be loaded. */
6162 force_expand_binop (Pmode
, add_optab
, offset
, const1_rtx
, offset
,
6165 /* Make sure the addresses are single regs which can be used as a
6167 emit_move_insn (src_addr_reg
, src_addr
);
6168 emit_move_insn (dst_addr_reg
, dst_addr
);
6172 emit_label (loop_label
);
6173 LABEL_NUSES (loop_label
) = 1;
6175 emit_move_insn (vsrc
,
6176 gen_rtx_MEM (V16QImode
,
6177 gen_rtx_PLUS (Pmode
, src_addr_reg
, offset
)));
6179 emit_insn (gen_vec_vfenesv16qi (vpos
, vsrc
, vsrc
,
6180 GEN_INT (VSTRING_FLAG_ZS
| VSTRING_FLAG_CS
)));
6181 add_int_reg_note (s390_emit_ccraw_jump (8, EQ
, done_label
),
6182 REG_BR_PROB
, profile_probability::very_unlikely ()
6183 .to_reg_br_prob_note ());
6185 emit_move_insn (gen_rtx_MEM (V16QImode
,
6186 gen_rtx_PLUS (Pmode
, dst_addr_reg
, offset
)),
6189 force_expand_binop (Pmode
, add_optab
, offset
, GEN_INT (16),
6190 offset
, 1, OPTAB_DIRECT
);
6192 emit_jump (loop_label
);
6197 /* We are done. Add the offset of the zero character to the dst_addr
6198 pointer to get the result. */
6200 emit_label (done_label
);
6201 LABEL_NUSES (done_label
) = 1;
6203 force_expand_binop (Pmode
, add_optab
, dst_addr_reg
, offset
, dst_addr_reg
,
6206 emit_insn (gen_vec_extractv16qiqi (gpos_qi
, vpos
, GEN_INT (7)));
6207 emit_move_insn (gpos
, gen_rtx_SUBREG (SImode
, gpos_qi
, 0));
6209 emit_insn (gen_vstlv16qi (vsrc
, gpos
, gen_rtx_MEM (BLKmode
, dst_addr_reg
)));
6211 force_expand_binop (Pmode
, add_optab
, dst_addr_reg
, gpos
, result
,
6216 emit_label (exit_label
);
6217 LABEL_NUSES (exit_label
) = 1;
6221 /* Expand conditional increment or decrement using alc/slb instructions.
6222 Should generate code setting DST to either SRC or SRC + INCREMENT,
6223 depending on the result of the comparison CMP_OP0 CMP_CODE CMP_OP1.
6224 Returns true if successful, false otherwise.
6226 That makes it possible to implement some if-constructs without jumps e.g.:
6227 (borrow = CC0 | CC1 and carry = CC2 | CC3)
6228 unsigned int a, b, c;
6229 if (a < b) c++; -> CCU b > a -> CC2; c += carry;
6230 if (a < b) c--; -> CCL3 a - b -> borrow; c -= borrow;
6231 if (a <= b) c++; -> CCL3 b - a -> borrow; c += carry;
6232 if (a <= b) c--; -> CCU a <= b -> borrow; c -= borrow;
6234 Checks for EQ and NE with a nonzero value need an additional xor e.g.:
6235 if (a == b) c++; -> CCL3 a ^= b; 0 - a -> borrow; c += carry;
6236 if (a == b) c--; -> CCU a ^= b; a <= 0 -> CC0 | CC1; c -= borrow;
6237 if (a != b) c++; -> CCU a ^= b; a > 0 -> CC2; c += carry;
6238 if (a != b) c--; -> CCL3 a ^= b; 0 - a -> borrow; c -= borrow; */
6241 s390_expand_addcc (enum rtx_code cmp_code
, rtx cmp_op0
, rtx cmp_op1
,
6242 rtx dst
, rtx src
, rtx increment
)
6244 machine_mode cmp_mode
;
6245 machine_mode cc_mode
;
6251 if ((GET_MODE (cmp_op0
) == SImode
|| GET_MODE (cmp_op0
) == VOIDmode
)
6252 && (GET_MODE (cmp_op1
) == SImode
|| GET_MODE (cmp_op1
) == VOIDmode
))
6254 else if ((GET_MODE (cmp_op0
) == DImode
|| GET_MODE (cmp_op0
) == VOIDmode
)
6255 && (GET_MODE (cmp_op1
) == DImode
|| GET_MODE (cmp_op1
) == VOIDmode
))
6260 /* Try ADD LOGICAL WITH CARRY. */
6261 if (increment
== const1_rtx
)
6263 /* Determine CC mode to use. */
6264 if (cmp_code
== EQ
|| cmp_code
== NE
)
6266 if (cmp_op1
!= const0_rtx
)
6268 cmp_op0
= expand_simple_binop (cmp_mode
, XOR
, cmp_op0
, cmp_op1
,
6269 NULL_RTX
, 0, OPTAB_WIDEN
);
6270 cmp_op1
= const0_rtx
;
6273 cmp_code
= cmp_code
== EQ
? LEU
: GTU
;
6276 if (cmp_code
== LTU
|| cmp_code
== LEU
)
6281 cmp_code
= swap_condition (cmp_code
);
6298 /* Emit comparison instruction pattern. */
6299 if (!register_operand (cmp_op0
, cmp_mode
))
6300 cmp_op0
= force_reg (cmp_mode
, cmp_op0
);
6302 insn
= gen_rtx_SET (gen_rtx_REG (cc_mode
, CC_REGNUM
),
6303 gen_rtx_COMPARE (cc_mode
, cmp_op0
, cmp_op1
));
6304 /* We use insn_invalid_p here to add clobbers if required. */
6305 ret
= insn_invalid_p (emit_insn (insn
), false);
6308 /* Emit ALC instruction pattern. */
6309 op_res
= gen_rtx_fmt_ee (cmp_code
, GET_MODE (dst
),
6310 gen_rtx_REG (cc_mode
, CC_REGNUM
),
6313 if (src
!= const0_rtx
)
6315 if (!register_operand (src
, GET_MODE (dst
)))
6316 src
= force_reg (GET_MODE (dst
), src
);
6318 op_res
= gen_rtx_PLUS (GET_MODE (dst
), op_res
, src
);
6319 op_res
= gen_rtx_PLUS (GET_MODE (dst
), op_res
, const0_rtx
);
6322 p
= rtvec_alloc (2);
6324 gen_rtx_SET (dst
, op_res
);
6326 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, CC_REGNUM
));
6327 emit_insn (gen_rtx_PARALLEL (VOIDmode
, p
));
6332 /* Try SUBTRACT LOGICAL WITH BORROW. */
6333 if (increment
== constm1_rtx
)
6335 /* Determine CC mode to use. */
6336 if (cmp_code
== EQ
|| cmp_code
== NE
)
6338 if (cmp_op1
!= const0_rtx
)
6340 cmp_op0
= expand_simple_binop (cmp_mode
, XOR
, cmp_op0
, cmp_op1
,
6341 NULL_RTX
, 0, OPTAB_WIDEN
);
6342 cmp_op1
= const0_rtx
;
6345 cmp_code
= cmp_code
== EQ
? LEU
: GTU
;
6348 if (cmp_code
== GTU
|| cmp_code
== GEU
)
6353 cmp_code
= swap_condition (cmp_code
);
6370 /* Emit comparison instruction pattern. */
6371 if (!register_operand (cmp_op0
, cmp_mode
))
6372 cmp_op0
= force_reg (cmp_mode
, cmp_op0
);
6374 insn
= gen_rtx_SET (gen_rtx_REG (cc_mode
, CC_REGNUM
),
6375 gen_rtx_COMPARE (cc_mode
, cmp_op0
, cmp_op1
));
6376 /* We use insn_invalid_p here to add clobbers if required. */
6377 ret
= insn_invalid_p (emit_insn (insn
), false);
6380 /* Emit SLB instruction pattern. */
6381 if (!register_operand (src
, GET_MODE (dst
)))
6382 src
= force_reg (GET_MODE (dst
), src
);
6384 op_res
= gen_rtx_MINUS (GET_MODE (dst
),
6385 gen_rtx_MINUS (GET_MODE (dst
), src
, const0_rtx
),
6386 gen_rtx_fmt_ee (cmp_code
, GET_MODE (dst
),
6387 gen_rtx_REG (cc_mode
, CC_REGNUM
),
6389 p
= rtvec_alloc (2);
6391 gen_rtx_SET (dst
, op_res
);
6393 gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, CC_REGNUM
));
6394 emit_insn (gen_rtx_PARALLEL (VOIDmode
, p
));
6402 /* Expand code for the insv template. Return true if successful. */
6405 s390_expand_insv (rtx dest
, rtx op1
, rtx op2
, rtx src
)
6407 int bitsize
= INTVAL (op1
);
6408 int bitpos
= INTVAL (op2
);
6409 machine_mode mode
= GET_MODE (dest
);
6411 int smode_bsize
, mode_bsize
;
6414 if (bitsize
+ bitpos
> GET_MODE_BITSIZE (mode
))
6419 && bitsize
== GET_MODE_BITSIZE (GET_MODE (src
))
6420 && mode
== GET_MODE (src
))
6422 emit_move_insn (dest
, src
);
6426 /* Generate INSERT IMMEDIATE (IILL et al). */
6427 /* (set (ze (reg)) (const_int)). */
6429 && register_operand (dest
, word_mode
)
6430 && (bitpos
% 16) == 0
6431 && (bitsize
% 16) == 0
6432 && const_int_operand (src
, VOIDmode
))
6434 HOST_WIDE_INT val
= INTVAL (src
);
6435 int regpos
= bitpos
+ bitsize
;
6437 while (regpos
> bitpos
)
6439 machine_mode putmode
;
6442 if (TARGET_EXTIMM
&& (regpos
% 32 == 0) && (regpos
>= bitpos
+ 32))
6447 putsize
= GET_MODE_BITSIZE (putmode
);
6449 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode
, dest
,
6452 gen_int_mode (val
, putmode
));
6455 gcc_assert (regpos
== bitpos
);
6459 smode
= smallest_int_mode_for_size (bitsize
);
6460 smode_bsize
= GET_MODE_BITSIZE (smode
);
6461 mode_bsize
= GET_MODE_BITSIZE (mode
);
6463 /* Generate STORE CHARACTERS UNDER MASK (STCM et al). */
6465 && (bitsize
% BITS_PER_UNIT
) == 0
6467 && (register_operand (src
, word_mode
)
6468 || const_int_operand (src
, VOIDmode
)))
6470 /* Emit standard pattern if possible. */
6471 if (smode_bsize
== bitsize
)
6473 emit_move_insn (adjust_address (dest
, smode
, 0),
6474 gen_lowpart (smode
, src
));
6478 /* (set (ze (mem)) (const_int)). */
6479 else if (const_int_operand (src
, VOIDmode
))
6481 int size
= bitsize
/ BITS_PER_UNIT
;
6482 rtx src_mem
= adjust_address (force_const_mem (word_mode
, src
),
6484 UNITS_PER_WORD
- size
);
6486 dest
= adjust_address (dest
, BLKmode
, 0);
6487 set_mem_size (dest
, size
);
6488 s390_expand_cpymem (dest
, src_mem
, GEN_INT (size
));
6492 /* (set (ze (mem)) (reg)). */
6493 else if (register_operand (src
, word_mode
))
6496 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode
, dest
, op1
,
6500 /* Emit st,stcmh sequence. */
6501 int stcmh_width
= bitsize
- 32;
6502 int size
= stcmh_width
/ BITS_PER_UNIT
;
6504 emit_move_insn (adjust_address (dest
, SImode
, size
),
6505 gen_lowpart (SImode
, src
));
6506 set_mem_size (dest
, size
);
6507 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode
, dest
,
6508 GEN_INT (stcmh_width
),
6510 gen_rtx_LSHIFTRT (word_mode
, src
, GEN_INT (32)));
6516 /* Generate INSERT CHARACTERS UNDER MASK (IC, ICM et al). */
6517 if ((bitpos
% BITS_PER_UNIT
) == 0
6518 && (bitsize
% BITS_PER_UNIT
) == 0
6519 && (bitpos
& 32) == ((bitpos
+ bitsize
- 1) & 32)
6521 && (mode
== DImode
|| mode
== SImode
)
6523 && register_operand (dest
, mode
))
6525 /* Emit a strict_low_part pattern if possible. */
6526 if (smode_bsize
== bitsize
&& bitpos
== mode_bsize
- smode_bsize
)
6528 rtx low_dest
= gen_lowpart (smode
, dest
);
6529 rtx low_src
= gen_lowpart (smode
, src
);
6533 case E_QImode
: emit_insn (gen_movstrictqi (low_dest
, low_src
)); return true;
6534 case E_HImode
: emit_insn (gen_movstricthi (low_dest
, low_src
)); return true;
6535 case E_SImode
: emit_insn (gen_movstrictsi (low_dest
, low_src
)); return true;
6540 /* ??? There are more powerful versions of ICM that are not
6541 completely represented in the md file. */
6544 /* For z10, generate ROTATE THEN INSERT SELECTED BITS (RISBG et al). */
6545 if (TARGET_Z10
&& (mode
== DImode
|| mode
== SImode
))
6547 machine_mode mode_s
= GET_MODE (src
);
6549 if (CONSTANT_P (src
))
6551 /* For constant zero values the representation with AND
6552 appears to be folded in more situations than the (set
6553 (zero_extract) ...).
6554 We only do this when the start and end of the bitfield
6555 remain in the same SImode chunk. That way nihf or nilf
6557 The AND patterns might still generate a risbg for this. */
6558 if (src
== const0_rtx
&& bitpos
/ 32 == (bitpos
+ bitsize
- 1) / 32)
6561 src
= force_reg (mode
, src
);
6563 else if (mode_s
!= mode
)
6565 gcc_assert (GET_MODE_BITSIZE (mode_s
) >= bitsize
);
6566 src
= force_reg (mode_s
, src
);
6567 src
= gen_lowpart (mode
, src
);
6570 op
= gen_rtx_ZERO_EXTRACT (mode
, dest
, op1
, op2
),
6571 op
= gen_rtx_SET (op
, src
);
6575 clobber
= gen_rtx_CLOBBER (VOIDmode
, gen_rtx_REG (CCmode
, CC_REGNUM
));
6576 op
= gen_rtx_PARALLEL (VOIDmode
, gen_rtvec (2, op
, clobber
));
6586 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic which returns a
6587 register that holds VAL of mode MODE shifted by COUNT bits. */
6590 s390_expand_mask_and_shift (rtx val
, machine_mode mode
, rtx count
)
6592 val
= expand_simple_binop (SImode
, AND
, val
, GEN_INT (GET_MODE_MASK (mode
)),
6593 NULL_RTX
, 1, OPTAB_DIRECT
);
6594 return expand_simple_binop (SImode
, ASHIFT
, val
, count
,
6595 NULL_RTX
, 1, OPTAB_DIRECT
);
6598 /* Generate a vector comparison COND of CMP_OP1 and CMP_OP2 and store
6599 the result in TARGET. */
6602 s390_expand_vec_compare (rtx target
, enum rtx_code cond
,
6603 rtx cmp_op1
, rtx cmp_op2
)
6605 machine_mode mode
= GET_MODE (target
);
6606 bool neg_p
= false, swap_p
= false;
6609 if (GET_MODE_CLASS (GET_MODE (cmp_op1
)) == MODE_VECTOR_FLOAT
)
6611 cmp_op2
= force_reg (GET_MODE (cmp_op1
), cmp_op2
);
6614 /* NE a != b -> !(a == b) */
6615 case NE
: cond
= EQ
; neg_p
= true; break;
6617 emit_insn (gen_vec_cmpungt (target
, cmp_op1
, cmp_op2
));
6620 emit_insn (gen_vec_cmpunge (target
, cmp_op1
, cmp_op2
));
6622 case LE
: cond
= GE
; swap_p
= true; break;
6623 /* UNLE: (a u<= b) -> (b u>= a). */
6625 emit_insn (gen_vec_cmpunge (target
, cmp_op2
, cmp_op1
));
6627 /* LT: a < b -> b > a */
6628 case LT
: cond
= GT
; swap_p
= true; break;
6629 /* UNLT: (a u< b) -> (b u> a). */
6631 emit_insn (gen_vec_cmpungt (target
, cmp_op2
, cmp_op1
));
6634 emit_insn (gen_vec_cmpuneq (target
, cmp_op1
, cmp_op2
));
6637 emit_insn (gen_vec_cmpltgt (target
, cmp_op1
, cmp_op2
));
6640 emit_insn (gen_vec_cmpordered (target
, cmp_op1
, cmp_op2
));
6643 emit_insn (gen_vec_cmpunordered (target
, cmp_op1
, cmp_op2
));
6650 /* Turn x < 0 into x >> (bits per element - 1) */
6651 if (cond
== LT
&& cmp_op2
== CONST0_RTX (mode
))
6653 int shift
= GET_MODE_BITSIZE (GET_MODE_INNER (mode
)) - 1;
6654 rtx res
= expand_simple_binop (mode
, ASHIFTRT
, cmp_op1
,
6655 GEN_INT (shift
), target
,
6658 emit_move_insn (target
, res
);
6661 cmp_op2
= force_reg (GET_MODE (cmp_op1
), cmp_op2
);
6665 /* NE: a != b -> !(a == b) */
6666 case NE
: cond
= EQ
; neg_p
= true; break;
6667 /* GE: a >= b -> !(b > a) */
6668 case GE
: cond
= GT
; neg_p
= true; swap_p
= true; break;
6669 /* GEU: a >= b -> !(b > a) */
6670 case GEU
: cond
= GTU
; neg_p
= true; swap_p
= true; break;
6671 /* LE: a <= b -> !(a > b) */
6672 case LE
: cond
= GT
; neg_p
= true; break;
6673 /* LEU: a <= b -> !(a > b) */
6674 case LEU
: cond
= GTU
; neg_p
= true; break;
6675 /* LT: a < b -> b > a */
6676 case LT
: cond
= GT
; swap_p
= true; break;
6677 /* LTU: a < b -> b > a */
6678 case LTU
: cond
= GTU
; swap_p
= true; break;
6685 tmp
= cmp_op1
; cmp_op1
= cmp_op2
; cmp_op2
= tmp
;
6688 emit_insn (gen_rtx_SET (target
, gen_rtx_fmt_ee (cond
,
6690 cmp_op1
, cmp_op2
)));
6692 emit_insn (gen_rtx_SET (target
, gen_rtx_NOT (mode
, target
)));
6695 /* Expand the comparison CODE of CMP1 and CMP2 and copy 1 or 0 into
6696 TARGET if either all (ALL_P is true) or any (ALL_P is false) of the
6697 elements in CMP1 and CMP2 fulfill the comparison.
6698 This function is only used to emit patterns for the vx builtins and
6699 therefore only handles comparison codes required by the
6702 s390_expand_vec_compare_cc (rtx target
, enum rtx_code code
,
6703 rtx cmp1
, rtx cmp2
, bool all_p
)
6705 machine_mode cc_producer_mode
, cc_consumer_mode
, scratch_mode
;
6706 rtx tmp_reg
= gen_reg_rtx (SImode
);
6707 bool swap_p
= false;
6709 if (GET_MODE_CLASS (GET_MODE (cmp1
)) == MODE_VECTOR_INT
)
6715 cc_producer_mode
= CCVEQmode
;
6719 code
= swap_condition (code
);
6724 cc_producer_mode
= CCVIHmode
;
6728 code
= swap_condition (code
);
6733 cc_producer_mode
= CCVIHUmode
;
6739 scratch_mode
= GET_MODE (cmp1
);
6740 /* These codes represent inverted CC interpretations. Inverting
6741 an ALL CC mode results in an ANY CC mode and the other way
6742 around. Invert the all_p flag here to compensate for
6744 if (code
== NE
|| code
== LE
|| code
== LEU
)
6747 cc_consumer_mode
= all_p
? CCVIALLmode
: CCVIANYmode
;
6749 else if (GET_MODE_CLASS (GET_MODE (cmp1
)) == MODE_VECTOR_FLOAT
)
6755 case EQ
: cc_producer_mode
= CCVEQmode
; break;
6756 case NE
: cc_producer_mode
= CCVEQmode
; inv_p
= true; break;
6757 case GT
: cc_producer_mode
= CCVFHmode
; break;
6758 case GE
: cc_producer_mode
= CCVFHEmode
; break;
6759 case UNLE
: cc_producer_mode
= CCVFHmode
; inv_p
= true; break;
6760 case UNLT
: cc_producer_mode
= CCVFHEmode
; inv_p
= true; break;
6761 case LT
: cc_producer_mode
= CCVFHmode
; code
= GT
; swap_p
= true; break;
6762 case LE
: cc_producer_mode
= CCVFHEmode
; code
= GE
; swap_p
= true; break;
6763 default: gcc_unreachable ();
6765 scratch_mode
= related_int_vector_mode (GET_MODE (cmp1
)).require ();
6770 cc_consumer_mode
= all_p
? CCVFALLmode
: CCVFANYmode
;
6782 emit_insn (gen_rtx_PARALLEL (VOIDmode
,
6783 gen_rtvec (2, gen_rtx_SET (
6784 gen_rtx_REG (cc_producer_mode
, CC_REGNUM
),
6785 gen_rtx_COMPARE (cc_producer_mode
, cmp1
, cmp2
)),
6786 gen_rtx_CLOBBER (VOIDmode
,
6787 gen_rtx_SCRATCH (scratch_mode
)))));
6788 emit_move_insn (target
, const0_rtx
);
6789 emit_move_insn (tmp_reg
, const1_rtx
);
6791 emit_move_insn (target
,
6792 gen_rtx_IF_THEN_ELSE (SImode
,
6793 gen_rtx_fmt_ee (code
, VOIDmode
,
6794 gen_rtx_REG (cc_consumer_mode
, CC_REGNUM
),
6799 /* Invert the comparison CODE applied to a CC mode. This is only safe
6800 if we know whether there result was created by a floating point
6801 compare or not. For the CCV modes this is encoded as part of the
6804 s390_reverse_condition (machine_mode mode
, enum rtx_code code
)
6806 /* Reversal of FP compares takes care -- an ordered compare
6807 becomes an unordered compare and vice versa. */
6808 if (mode
== CCVFALLmode
|| mode
== CCVFANYmode
|| mode
== CCSFPSmode
)
6809 return reverse_condition_maybe_unordered (code
);
6810 else if (mode
== CCVIALLmode
|| mode
== CCVIANYmode
)
6811 return reverse_condition (code
);
6816 /* Generate a vector comparison expression loading either elements of
6817 THEN or ELS into TARGET depending on the comparison COND of CMP_OP1
6821 s390_expand_vcond (rtx target
, rtx then
, rtx els
,
6822 enum rtx_code cond
, rtx cmp_op1
, rtx cmp_op2
)
6825 machine_mode result_mode
;
6828 machine_mode target_mode
= GET_MODE (target
);
6829 machine_mode cmp_mode
= GET_MODE (cmp_op1
);
6830 rtx op
= (cond
== LT
) ? els
: then
;
6832 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
6833 and x < 0 ? 1 : 0 into (unsigned) x >> 31. Likewise
6834 for short and byte (x >> 15 and x >> 7 respectively). */
6835 if ((cond
== LT
|| cond
== GE
)
6836 && target_mode
== cmp_mode
6837 && cmp_op2
== CONST0_RTX (cmp_mode
)
6838 && op
== CONST0_RTX (target_mode
)
6839 && s390_vector_mode_supported_p (target_mode
)
6840 && GET_MODE_CLASS (target_mode
) == MODE_VECTOR_INT
)
6842 rtx negop
= (cond
== LT
) ? then
: els
;
6844 int shift
= GET_MODE_BITSIZE (GET_MODE_INNER (target_mode
)) - 1;
6846 /* if x < 0 ? 1 : 0 or if x >= 0 ? 0 : 1 */
6847 if (negop
== CONST1_RTX (target_mode
))
6849 rtx res
= expand_simple_binop (cmp_mode
, LSHIFTRT
, cmp_op1
,
6850 GEN_INT (shift
), target
,
6853 emit_move_insn (target
, res
);
6857 /* if x < 0 ? -1 : 0 or if x >= 0 ? 0 : -1 */
6858 else if (all_ones_operand (negop
, target_mode
))
6860 rtx res
= expand_simple_binop (cmp_mode
, ASHIFTRT
, cmp_op1
,
6861 GEN_INT (shift
), target
,
6864 emit_move_insn (target
, res
);
6869 /* We always use an integral type vector to hold the comparison
6871 result_mode
= related_int_vector_mode (cmp_mode
).require ();
6872 result_target
= gen_reg_rtx (result_mode
);
6874 /* We allow vector immediates as comparison operands that
6875 can be handled by the optimization above but not by the
6876 following code. Hence, force them into registers here. */
6877 if (!REG_P (cmp_op1
))
6878 cmp_op1
= force_reg (GET_MODE (cmp_op1
), cmp_op1
);
6880 s390_expand_vec_compare (result_target
, cond
, cmp_op1
, cmp_op2
);
6882 /* If the results are supposed to be either -1 or 0 we are done
6883 since this is what our compare instructions generate anyway. */
6884 if (all_ones_operand (then
, GET_MODE (then
))
6885 && const0_operand (els
, GET_MODE (els
)))
6887 emit_move_insn (target
, gen_rtx_SUBREG (target_mode
,
6892 /* Otherwise we will do a vsel afterwards. */
6893 /* This gets triggered e.g.
6894 with gcc.c-torture/compile/pr53410-1.c */
6896 then
= force_reg (target_mode
, then
);
6899 els
= force_reg (target_mode
, els
);
6901 tmp
= gen_rtx_fmt_ee (EQ
, VOIDmode
,
6903 CONST0_RTX (result_mode
));
6905 /* We compared the result against zero above so we have to swap then
6907 tmp
= gen_rtx_IF_THEN_ELSE (target_mode
, tmp
, els
, then
);
6909 gcc_assert (target_mode
== GET_MODE (then
));
6910 emit_insn (gen_rtx_SET (target
, tmp
));
6913 /* Emit the RTX necessary to initialize the vector TARGET with values
6916 s390_expand_vec_init (rtx target
, rtx vals
)
6918 machine_mode mode
= GET_MODE (target
);
6919 machine_mode inner_mode
= GET_MODE_INNER (mode
);
6920 int n_elts
= GET_MODE_NUNITS (mode
);
6921 bool all_same
= true, all_regs
= true, all_const_int
= true;
6925 for (i
= 0; i
< n_elts
; ++i
)
6927 x
= XVECEXP (vals
, 0, i
);
6929 if (!CONST_INT_P (x
))
6930 all_const_int
= false;
6932 if (i
> 0 && !rtx_equal_p (x
, XVECEXP (vals
, 0, 0)))
6939 /* Use vector gen mask or vector gen byte mask if possible. */
6940 if (all_same
&& all_const_int
)
6942 rtx vec
= gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0));
6943 if (XVECEXP (vals
, 0, 0) == const0_rtx
6944 || s390_contiguous_bitmask_vector_p (vec
, NULL
, NULL
)
6945 || s390_bytemask_vector_p (vec
, NULL
))
6947 emit_insn (gen_rtx_SET (target
, vec
));
6952 /* Use vector replicate instructions. vlrep/vrepi/vrep */
6955 rtx elem
= XVECEXP (vals
, 0, 0);
6957 /* vec_splats accepts general_operand as source. */
6958 if (!general_operand (elem
, GET_MODE (elem
)))
6959 elem
= force_reg (inner_mode
, elem
);
6961 emit_insn (gen_rtx_SET (target
, gen_rtx_VEC_DUPLICATE (mode
, elem
)));
6968 && GET_MODE_SIZE (inner_mode
) == 8)
6970 /* Use vector load pair. */
6971 emit_insn (gen_rtx_SET (target
,
6972 gen_rtx_VEC_CONCAT (mode
,
6973 XVECEXP (vals
, 0, 0),
6974 XVECEXP (vals
, 0, 1))));
6978 /* Use vector load logical element and zero. */
6979 if (TARGET_VXE
&& (mode
== V4SImode
|| mode
== V4SFmode
))
6983 x
= XVECEXP (vals
, 0, 0);
6984 if (memory_operand (x
, inner_mode
))
6986 for (i
= 1; i
< n_elts
; ++i
)
6987 found
= found
&& XVECEXP (vals
, 0, i
) == const0_rtx
;
6991 machine_mode half_mode
= (inner_mode
== SFmode
6992 ? V2SFmode
: V2SImode
);
6993 emit_insn (gen_rtx_SET (target
,
6994 gen_rtx_VEC_CONCAT (mode
,
6995 gen_rtx_VEC_CONCAT (half_mode
,
6998 gen_rtx_VEC_CONCAT (half_mode
,
7006 /* We are about to set the vector elements one by one. Zero out the
7007 full register first in order to help the data flow framework to
7008 detect it as full VR set. */
7009 emit_insn (gen_rtx_SET (target
, CONST0_RTX (mode
)));
7011 /* Unfortunately the vec_init expander is not allowed to fail. So
7012 we have to implement the fallback ourselves. */
7013 for (i
= 0; i
< n_elts
; i
++)
7015 rtx elem
= XVECEXP (vals
, 0, i
);
7016 if (!general_operand (elem
, GET_MODE (elem
)))
7017 elem
= force_reg (inner_mode
, elem
);
7019 emit_insn (gen_rtx_SET (target
,
7020 gen_rtx_UNSPEC (mode
,
7022 GEN_INT (i
), target
),
7027 /* Return a parallel of constant integers to be used as permutation
7028 vector for a vector merge operation in MODE. If HIGH_P is true the
7029 left-most elements of the source vectors are merged otherwise the
7030 right-most elements. */
7032 s390_expand_merge_perm_const (machine_mode mode
, bool high_p
)
7034 int nelts
= GET_MODE_NUNITS (mode
);
7036 int addend
= high_p
? 0 : nelts
;
7038 for (int i
= 0; i
< nelts
; i
++)
7039 perm
[i
] = GEN_INT ((i
+ addend
) / 2 + (i
% 2) * nelts
);
7041 return gen_rtx_PARALLEL (VOIDmode
, gen_rtvec_v (nelts
, perm
));
7044 /* Emit RTL to implement a vector merge operation of SRC1 and SRC2
7045 which creates the result in TARGET. HIGH_P determines whether a
7046 merge hi or lo will be generated. */
7048 s390_expand_merge (rtx target
, rtx src1
, rtx src2
, bool high_p
)
7050 machine_mode mode
= GET_MODE (target
);
7051 opt_machine_mode opt_mode_2x
= mode_for_vector (GET_MODE_INNER (mode
),
7052 2 * GET_MODE_NUNITS (mode
));
7053 gcc_assert (opt_mode_2x
.exists ());
7054 machine_mode mode_double_nelts
= opt_mode_2x
.require ();
7055 rtx constv
= s390_expand_merge_perm_const (mode
, high_p
);
7056 src1
= force_reg (GET_MODE (src1
), src1
);
7057 src2
= force_reg (GET_MODE (src2
), src2
);
7058 rtx x
= gen_rtx_VEC_CONCAT (mode_double_nelts
, src1
, src2
);
7059 x
= gen_rtx_VEC_SELECT (mode
, x
, constv
);
7060 emit_insn (gen_rtx_SET (target
, x
));
7063 /* Emit a vector constant that contains 1s in each element's sign bit position
7064 and 0s in other positions. MODE is the desired constant's mode. */
7066 s390_build_signbit_mask (machine_mode mode
)
7068 if (mode
== TFmode
&& TARGET_VXE
)
7070 wide_int mask_val
= wi::set_bit_in_zero (127, 128);
7071 rtx mask
= immed_wide_int_const (mask_val
, TImode
);
7072 return gen_lowpart (TFmode
, mask
);
7075 /* Generate the integral element mask value. */
7076 machine_mode inner_mode
= GET_MODE_INNER (mode
);
7077 int inner_bitsize
= GET_MODE_BITSIZE (inner_mode
);
7078 wide_int mask_val
= wi::set_bit_in_zero (inner_bitsize
- 1, inner_bitsize
);
7080 /* Emit the element mask rtx. Use gen_lowpart in order to cast the integral
7081 value to the desired mode. */
7082 machine_mode int_mode
= related_int_vector_mode (mode
).require ();
7083 rtx mask
= immed_wide_int_const (mask_val
, GET_MODE_INNER (int_mode
));
7084 mask
= gen_lowpart (inner_mode
, mask
);
7086 /* Emit the vector mask rtx by mode the element mask rtx. */
7087 int nunits
= GET_MODE_NUNITS (mode
);
7088 rtvec v
= rtvec_alloc (nunits
);
7089 for (int i
= 0; i
< nunits
; i
++)
7090 RTVEC_ELT (v
, i
) = mask
;
7091 return gen_rtx_CONST_VECTOR (mode
, v
);
7094 /* Structure to hold the initial parameters for a compare_and_swap operation
7095 in HImode and QImode. */
7097 struct alignment_context
7099 rtx memsi
; /* SI aligned memory location. */
7100 rtx shift
; /* Bit offset with regard to lsb. */
7101 rtx modemask
; /* Mask of the HQImode shifted by SHIFT bits. */
7102 rtx modemaski
; /* ~modemask */
7103 bool aligned
; /* True if memory is aligned, false else. */
7106 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic to initialize
7107 structure AC for transparent simplifying, if the memory alignment is known
7108 to be at least 32bit. MEM is the memory location for the actual operation
7109 and MODE its mode. */
7112 init_alignment_context (struct alignment_context
*ac
, rtx mem
,
7115 ac
->shift
= GEN_INT (GET_MODE_SIZE (SImode
) - GET_MODE_SIZE (mode
));
7116 ac
->aligned
= (MEM_ALIGN (mem
) >= GET_MODE_BITSIZE (SImode
));
7119 ac
->memsi
= adjust_address (mem
, SImode
, 0); /* Memory is aligned. */
7122 /* Alignment is unknown. */
7123 rtx byteoffset
, addr
, align
;
7125 /* Force the address into a register. */
7126 addr
= force_reg (Pmode
, XEXP (mem
, 0));
7128 /* Align it to SImode. */
7129 align
= expand_simple_binop (Pmode
, AND
, addr
,
7130 GEN_INT (-GET_MODE_SIZE (SImode
)),
7131 NULL_RTX
, 1, OPTAB_DIRECT
);
7133 ac
->memsi
= gen_rtx_MEM (SImode
, align
);
7134 MEM_VOLATILE_P (ac
->memsi
) = MEM_VOLATILE_P (mem
);
7135 set_mem_alias_set (ac
->memsi
, ALIAS_SET_MEMORY_BARRIER
);
7136 set_mem_align (ac
->memsi
, GET_MODE_BITSIZE (SImode
));
7138 /* Calculate shiftcount. */
7139 byteoffset
= expand_simple_binop (Pmode
, AND
, addr
,
7140 GEN_INT (GET_MODE_SIZE (SImode
) - 1),
7141 NULL_RTX
, 1, OPTAB_DIRECT
);
7142 /* As we already have some offset, evaluate the remaining distance. */
7143 ac
->shift
= expand_simple_binop (SImode
, MINUS
, ac
->shift
, byteoffset
,
7144 NULL_RTX
, 1, OPTAB_DIRECT
);
7147 /* Shift is the byte count, but we need the bitcount. */
7148 ac
->shift
= expand_simple_binop (SImode
, ASHIFT
, ac
->shift
, GEN_INT (3),
7149 NULL_RTX
, 1, OPTAB_DIRECT
);
7151 /* Calculate masks. */
7152 ac
->modemask
= expand_simple_binop (SImode
, ASHIFT
,
7153 GEN_INT (GET_MODE_MASK (mode
)),
7154 ac
->shift
, NULL_RTX
, 1, OPTAB_DIRECT
);
7155 ac
->modemaski
= expand_simple_unop (SImode
, NOT
, ac
->modemask
,
7159 /* A subroutine of s390_expand_cs_hqi. Insert INS into VAL. If possible,
7160 use a single insv insn into SEQ2. Otherwise, put prep insns in SEQ1 and
7161 perform the merge in SEQ2. */
7164 s390_two_part_insv (struct alignment_context
*ac
, rtx
*seq1
, rtx
*seq2
,
7165 machine_mode mode
, rtx val
, rtx ins
)
7172 tmp
= copy_to_mode_reg (SImode
, val
);
7173 if (s390_expand_insv (tmp
, GEN_INT (GET_MODE_BITSIZE (mode
)),
7177 *seq2
= get_insns ();
7184 /* Failed to use insv. Generate a two part shift and mask. */
7186 tmp
= s390_expand_mask_and_shift (ins
, mode
, ac
->shift
);
7187 *seq1
= get_insns ();
7191 tmp
= expand_simple_binop (SImode
, IOR
, tmp
, val
, NULL_RTX
, 1, OPTAB_DIRECT
);
7192 *seq2
= get_insns ();
7198 /* Expand an atomic compare and swap operation for HImode and QImode. MEM is
7199 the memory location, CMP the old value to compare MEM with and NEW_RTX the
7200 value to set if CMP == MEM. */
7203 s390_expand_cs_hqi (machine_mode mode
, rtx btarget
, rtx vtarget
, rtx mem
,
7204 rtx cmp
, rtx new_rtx
, bool is_weak
)
7206 struct alignment_context ac
;
7207 rtx cmpv
, newv
, val
, cc
, seq0
, seq1
, seq2
, seq3
;
7208 rtx res
= gen_reg_rtx (SImode
);
7209 rtx_code_label
*csloop
= NULL
, *csend
= NULL
;
7211 gcc_assert (MEM_P (mem
));
7213 init_alignment_context (&ac
, mem
, mode
);
7215 /* Load full word. Subsequent loads are performed by CS. */
7216 val
= expand_simple_binop (SImode
, AND
, ac
.memsi
, ac
.modemaski
,
7217 NULL_RTX
, 1, OPTAB_DIRECT
);
7219 /* Prepare insertions of cmp and new_rtx into the loaded value. When
7220 possible, we try to use insv to make this happen efficiently. If
7221 that fails we'll generate code both inside and outside the loop. */
7222 cmpv
= s390_two_part_insv (&ac
, &seq0
, &seq2
, mode
, val
, cmp
);
7223 newv
= s390_two_part_insv (&ac
, &seq1
, &seq3
, mode
, val
, new_rtx
);
7230 /* Start CS loop. */
7233 /* Begin assuming success. */
7234 emit_move_insn (btarget
, const1_rtx
);
7236 csloop
= gen_label_rtx ();
7237 csend
= gen_label_rtx ();
7238 emit_label (csloop
);
7241 /* val = "<mem>00..0<mem>"
7242 * cmp = "00..0<cmp>00..0"
7243 * new = "00..0<new>00..0"
7249 cc
= s390_emit_compare_and_swap (EQ
, res
, ac
.memsi
, cmpv
, newv
, CCZ1mode
);
7251 emit_insn (gen_cstorecc4 (btarget
, cc
, XEXP (cc
, 0), XEXP (cc
, 1)));
7256 /* Jump to end if we're done (likely?). */
7257 s390_emit_jump (csend
, cc
);
7259 /* Check for changes outside mode, and loop internal if so.
7260 Arrange the moves so that the compare is adjacent to the
7261 branch so that we can generate CRJ. */
7262 tmp
= copy_to_reg (val
);
7263 force_expand_binop (SImode
, and_optab
, res
, ac
.modemaski
, val
,
7265 cc
= s390_emit_compare (NE
, val
, tmp
);
7266 s390_emit_jump (csloop
, cc
);
7269 emit_move_insn (btarget
, const0_rtx
);
7273 /* Return the correct part of the bitfield. */
7274 convert_move (vtarget
, expand_simple_binop (SImode
, LSHIFTRT
, res
, ac
.shift
,
7275 NULL_RTX
, 1, OPTAB_DIRECT
), 1);
7278 /* Variant of s390_expand_cs for SI, DI and TI modes. */
7280 s390_expand_cs_tdsi (machine_mode mode
, rtx btarget
, rtx vtarget
, rtx mem
,
7281 rtx cmp
, rtx new_rtx
, bool is_weak
)
7283 rtx output
= vtarget
;
7284 rtx_code_label
*skip_cs_label
= NULL
;
7285 bool do_const_opt
= false;
7287 if (!register_operand (output
, mode
))
7288 output
= gen_reg_rtx (mode
);
7290 /* If IS_WEAK is true and the INPUT value is a constant, compare the memory
7291 with the constant first and skip the compare_and_swap because its very
7292 expensive and likely to fail anyway.
7293 Note 1: This is done only for IS_WEAK. C11 allows optimizations that may
7294 cause spurious in that case.
7295 Note 2: It may be useful to do this also for non-constant INPUT.
7296 Note 3: Currently only targets with "load on condition" are supported
7297 (z196 and newer). */
7300 && (mode
== SImode
|| mode
== DImode
))
7301 do_const_opt
= (is_weak
&& CONST_INT_P (cmp
));
7305 rtx cc
= gen_rtx_REG (CCZmode
, CC_REGNUM
);
7307 skip_cs_label
= gen_label_rtx ();
7308 emit_move_insn (btarget
, const0_rtx
);
7309 if (CONST_INT_P (cmp
) && INTVAL (cmp
) == 0)
7311 rtvec lt
= rtvec_alloc (2);
7313 /* Load-and-test + conditional jump. */
7315 = gen_rtx_SET (cc
, gen_rtx_COMPARE (CCZmode
, mem
, cmp
));
7316 RTVEC_ELT (lt
, 1) = gen_rtx_SET (output
, mem
);
7317 emit_insn (gen_rtx_PARALLEL (VOIDmode
, lt
));
7321 emit_move_insn (output
, mem
);
7322 emit_insn (gen_rtx_SET (cc
, gen_rtx_COMPARE (CCZmode
, output
, cmp
)));
7324 s390_emit_jump (skip_cs_label
, gen_rtx_NE (VOIDmode
, cc
, const0_rtx
));
7325 add_reg_br_prob_note (get_last_insn (),
7326 profile_probability::very_unlikely ());
7327 /* If the jump is not taken, OUTPUT is the expected value. */
7329 /* Reload newval to a register manually, *after* the compare and jump
7330 above. Otherwise Reload might place it before the jump. */
7333 cmp
= force_reg (mode
, cmp
);
7334 new_rtx
= force_reg (mode
, new_rtx
);
7335 s390_emit_compare_and_swap (EQ
, output
, mem
, cmp
, new_rtx
,
7336 (do_const_opt
) ? CCZmode
: CCZ1mode
);
7337 if (skip_cs_label
!= NULL
)
7338 emit_label (skip_cs_label
);
7340 /* We deliberately accept non-register operands in the predicate
7341 to ensure the write back to the output operand happens *before*
7342 the store-flags code below. This makes it easier for combine
7343 to merge the store-flags code with a potential test-and-branch
7344 pattern following (immediately!) afterwards. */
7345 if (output
!= vtarget
)
7346 emit_move_insn (vtarget
, output
);
7352 /* Do not use gen_cstorecc4 here because it writes either 1 or 0, but
7353 btarget has already been initialized with 0 above. */
7354 cc
= gen_rtx_REG (CCZmode
, CC_REGNUM
);
7355 cond
= gen_rtx_EQ (VOIDmode
, cc
, const0_rtx
);
7356 ite
= gen_rtx_IF_THEN_ELSE (SImode
, cond
, const1_rtx
, btarget
);
7357 emit_insn (gen_rtx_SET (btarget
, ite
));
7363 cc
= gen_rtx_REG (CCZ1mode
, CC_REGNUM
);
7364 cond
= gen_rtx_EQ (SImode
, cc
, const0_rtx
);
7365 emit_insn (gen_cstorecc4 (btarget
, cond
, cc
, const0_rtx
));
7369 /* Expand an atomic compare and swap operation. MEM is the memory location,
7370 CMP the old value to compare MEM with and NEW_RTX the value to set if
7374 s390_expand_cs (machine_mode mode
, rtx btarget
, rtx vtarget
, rtx mem
,
7375 rtx cmp
, rtx new_rtx
, bool is_weak
)
7382 s390_expand_cs_tdsi (mode
, btarget
, vtarget
, mem
, cmp
, new_rtx
, is_weak
);
7386 s390_expand_cs_hqi (mode
, btarget
, vtarget
, mem
, cmp
, new_rtx
, is_weak
);
7393 /* Expand an atomic_exchange operation simulated with a compare-and-swap loop.
7394 The memory location MEM is set to INPUT. OUTPUT is set to the previous value
7398 s390_expand_atomic_exchange_tdsi (rtx output
, rtx mem
, rtx input
)
7400 machine_mode mode
= GET_MODE (mem
);
7401 rtx_code_label
*csloop
;
7404 && (mode
== DImode
|| mode
== SImode
)
7405 && CONST_INT_P (input
) && INTVAL (input
) == 0)
7407 emit_move_insn (output
, const0_rtx
);
7409 emit_insn (gen_atomic_fetch_anddi (output
, mem
, const0_rtx
, input
));
7411 emit_insn (gen_atomic_fetch_andsi (output
, mem
, const0_rtx
, input
));
7415 input
= force_reg (mode
, input
);
7416 emit_move_insn (output
, mem
);
7417 csloop
= gen_label_rtx ();
7418 emit_label (csloop
);
7419 s390_emit_jump (csloop
, s390_emit_compare_and_swap (NE
, output
, mem
, output
,
7423 /* Expand an atomic operation CODE of mode MODE. MEM is the memory location
7424 and VAL the value to play with. If AFTER is true then store the value
7425 MEM holds after the operation, if AFTER is false then store the value MEM
7426 holds before the operation. If TARGET is zero then discard that value, else
7427 store it to TARGET. */
7430 s390_expand_atomic (machine_mode mode
, enum rtx_code code
,
7431 rtx target
, rtx mem
, rtx val
, bool after
)
7433 struct alignment_context ac
;
7435 rtx new_rtx
= gen_reg_rtx (SImode
);
7436 rtx orig
= gen_reg_rtx (SImode
);
7437 rtx_code_label
*csloop
= gen_label_rtx ();
7439 gcc_assert (!target
|| register_operand (target
, VOIDmode
));
7440 gcc_assert (MEM_P (mem
));
7442 init_alignment_context (&ac
, mem
, mode
);
7444 /* Shift val to the correct bit positions.
7445 Preserve "icm", but prevent "ex icm". */
7446 if (!(ac
.aligned
&& code
== SET
&& MEM_P (val
)))
7447 val
= s390_expand_mask_and_shift (val
, mode
, ac
.shift
);
7449 /* Further preparation insns. */
7450 if (code
== PLUS
|| code
== MINUS
)
7451 emit_move_insn (orig
, val
);
7452 else if (code
== MULT
|| code
== AND
) /* val = "11..1<val>11..1" */
7453 val
= expand_simple_binop (SImode
, XOR
, val
, ac
.modemaski
,
7454 NULL_RTX
, 1, OPTAB_DIRECT
);
7456 /* Load full word. Subsequent loads are performed by CS. */
7457 cmp
= force_reg (SImode
, ac
.memsi
);
7459 /* Start CS loop. */
7460 emit_label (csloop
);
7461 emit_move_insn (new_rtx
, cmp
);
7463 /* Patch new with val at correct position. */
7468 val
= expand_simple_binop (SImode
, code
, new_rtx
, orig
,
7469 NULL_RTX
, 1, OPTAB_DIRECT
);
7470 val
= expand_simple_binop (SImode
, AND
, val
, ac
.modemask
,
7471 NULL_RTX
, 1, OPTAB_DIRECT
);
7474 if (ac
.aligned
&& MEM_P (val
))
7475 store_bit_field (new_rtx
, GET_MODE_BITSIZE (mode
), 0,
7476 0, 0, SImode
, val
, false);
7479 new_rtx
= expand_simple_binop (SImode
, AND
, new_rtx
, ac
.modemaski
,
7480 NULL_RTX
, 1, OPTAB_DIRECT
);
7481 new_rtx
= expand_simple_binop (SImode
, IOR
, new_rtx
, val
,
7482 NULL_RTX
, 1, OPTAB_DIRECT
);
7488 new_rtx
= expand_simple_binop (SImode
, code
, new_rtx
, val
,
7489 NULL_RTX
, 1, OPTAB_DIRECT
);
7491 case MULT
: /* NAND */
7492 new_rtx
= expand_simple_binop (SImode
, AND
, new_rtx
, val
,
7493 NULL_RTX
, 1, OPTAB_DIRECT
);
7494 new_rtx
= expand_simple_binop (SImode
, XOR
, new_rtx
, ac
.modemask
,
7495 NULL_RTX
, 1, OPTAB_DIRECT
);
7501 s390_emit_jump (csloop
, s390_emit_compare_and_swap (NE
, cmp
,
7502 ac
.memsi
, cmp
, new_rtx
,
7505 /* Return the correct part of the bitfield. */
7507 convert_move (target
, expand_simple_binop (SImode
, LSHIFTRT
,
7508 after
? new_rtx
: cmp
, ac
.shift
,
7509 NULL_RTX
, 1, OPTAB_DIRECT
), 1);
7512 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7513 We need to emit DTP-relative relocations. */
7515 static void s390_output_dwarf_dtprel (FILE *, int, rtx
) ATTRIBUTE_UNUSED
;
7518 s390_output_dwarf_dtprel (FILE *file
, int size
, rtx x
)
7523 fputs ("\t.long\t", file
);
7526 fputs ("\t.quad\t", file
);
7531 output_addr_const (file
, x
);
7532 fputs ("@DTPOFF", file
);
7535 /* Return the proper mode for REGNO being represented in the dwarf
7538 s390_dwarf_frame_reg_mode (int regno
)
7540 machine_mode save_mode
= default_dwarf_frame_reg_mode (regno
);
7542 /* Make sure not to return DImode for any GPR with -m31 -mzarch. */
7543 if (GENERAL_REGNO_P (regno
))
7546 /* The rightmost 64 bits of vector registers are call-clobbered. */
7547 if (GET_MODE_SIZE (save_mode
) > 8)
7553 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
7554 /* Implement TARGET_MANGLE_TYPE. */
7557 s390_mangle_type (const_tree type
)
7559 type
= TYPE_MAIN_VARIANT (type
);
7561 if (TREE_CODE (type
) != VOID_TYPE
&& TREE_CODE (type
) != BOOLEAN_TYPE
7562 && TREE_CODE (type
) != INTEGER_TYPE
&& TREE_CODE (type
) != REAL_TYPE
)
7565 if (type
== s390_builtin_types
[BT_BV16QI
]) return "U6__boolc";
7566 if (type
== s390_builtin_types
[BT_BV8HI
]) return "U6__bools";
7567 if (type
== s390_builtin_types
[BT_BV4SI
]) return "U6__booli";
7568 if (type
== s390_builtin_types
[BT_BV2DI
]) return "U6__booll";
7570 if (TYPE_MAIN_VARIANT (type
) == long_double_type_node
7571 && TARGET_LONG_DOUBLE_128
)
7574 /* For all other types, use normal C++ mangling. */
7579 /* In the name of slightly smaller debug output, and to cater to
7580 general assembler lossage, recognize various UNSPEC sequences
7581 and turn them back into a direct symbol reference. */
7584 s390_delegitimize_address (rtx orig_x
)
7588 orig_x
= delegitimize_mem_from_attrs (orig_x
);
7591 /* Extract the symbol ref from:
7592 (plus:SI (reg:SI 12 %r12)
7593 (const:SI (unspec:SI [(symbol_ref/f:SI ("*.LC0"))]
7594 UNSPEC_GOTOFF/PLTOFF)))
7596 (plus:SI (reg:SI 12 %r12)
7597 (const:SI (plus:SI (unspec:SI [(symbol_ref:SI ("L"))]
7598 UNSPEC_GOTOFF/PLTOFF)
7599 (const_int 4 [0x4])))) */
7600 if (GET_CODE (x
) == PLUS
7601 && REG_P (XEXP (x
, 0))
7602 && REGNO (XEXP (x
, 0)) == PIC_OFFSET_TABLE_REGNUM
7603 && GET_CODE (XEXP (x
, 1)) == CONST
)
7605 HOST_WIDE_INT offset
= 0;
7607 /* The const operand. */
7608 y
= XEXP (XEXP (x
, 1), 0);
7610 if (GET_CODE (y
) == PLUS
7611 && GET_CODE (XEXP (y
, 1)) == CONST_INT
)
7613 offset
= INTVAL (XEXP (y
, 1));
7617 if (GET_CODE (y
) == UNSPEC
7618 && (XINT (y
, 1) == UNSPEC_GOTOFF
7619 || XINT (y
, 1) == UNSPEC_PLTOFF
))
7620 return plus_constant (Pmode
, XVECEXP (y
, 0, 0), offset
);
7623 if (GET_CODE (x
) != MEM
)
7627 if (GET_CODE (x
) == PLUS
7628 && GET_CODE (XEXP (x
, 1)) == CONST
7629 && GET_CODE (XEXP (x
, 0)) == REG
7630 && REGNO (XEXP (x
, 0)) == PIC_OFFSET_TABLE_REGNUM
)
7632 y
= XEXP (XEXP (x
, 1), 0);
7633 if (GET_CODE (y
) == UNSPEC
7634 && XINT (y
, 1) == UNSPEC_GOT
)
7635 y
= XVECEXP (y
, 0, 0);
7639 else if (GET_CODE (x
) == CONST
)
7641 /* Extract the symbol ref from:
7642 (mem:QI (const:DI (unspec:DI [(symbol_ref:DI ("foo"))]
7643 UNSPEC_PLT/GOTENT))) */
7646 if (GET_CODE (y
) == UNSPEC
7647 && (XINT (y
, 1) == UNSPEC_GOTENT
7648 || XINT (y
, 1) == UNSPEC_PLT31
))
7649 y
= XVECEXP (y
, 0, 0);
7656 if (GET_MODE (orig_x
) != Pmode
)
7658 if (GET_MODE (orig_x
) == BLKmode
)
7660 y
= lowpart_subreg (GET_MODE (orig_x
), y
, Pmode
);
7667 /* Output operand OP to stdio stream FILE.
7668 OP is an address (register + offset) which is not used to address data;
7669 instead the rightmost bits are interpreted as the value. */
7672 print_addrstyle_operand (FILE *file
, rtx op
)
7674 HOST_WIDE_INT offset
;
7677 /* Extract base register and offset. */
7678 if (!s390_decompose_addrstyle_without_index (op
, &base
, &offset
))
7684 gcc_assert (GET_CODE (base
) == REG
);
7685 gcc_assert (REGNO (base
) < FIRST_PSEUDO_REGISTER
);
7686 gcc_assert (REGNO_REG_CLASS (REGNO (base
)) == ADDR_REGS
);
7689 /* Offsets are constricted to twelve bits. */
7690 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, offset
& ((1 << 12) - 1));
7692 fprintf (file
, "(%s)", reg_names
[REGNO (base
)]);
7695 /* Print the shift count operand OP to FILE.
7696 OP is an address-style operand in a form which
7697 s390_valid_shift_count permits. Subregs and no-op
7698 and-masking of the operand are stripped. */
7701 print_shift_count_operand (FILE *file
, rtx op
)
7703 /* No checking of the and mask required here. */
7704 if (!s390_valid_shift_count (op
, 0))
7707 while (op
&& GET_CODE (op
) == SUBREG
)
7708 op
= SUBREG_REG (op
);
7710 if (GET_CODE (op
) == AND
)
7713 print_addrstyle_operand (file
, op
);
7716 /* Assigns the number of NOP halfwords to be emitted before and after the
7717 function label to *HW_BEFORE and *HW_AFTER. Both pointers must not be NULL.
7718 If hotpatching is disabled for the function, the values are set to zero.
7722 s390_function_num_hotpatch_hw (tree decl
,
7728 attr
= lookup_attribute ("hotpatch", DECL_ATTRIBUTES (decl
));
7730 /* Handle the arguments of the hotpatch attribute. The values
7731 specified via attribute might override the cmdline argument
7735 tree args
= TREE_VALUE (attr
);
7737 *hw_before
= TREE_INT_CST_LOW (TREE_VALUE (args
));
7738 *hw_after
= TREE_INT_CST_LOW (TREE_VALUE (TREE_CHAIN (args
)));
7742 /* Use the values specified by the cmdline arguments. */
7743 *hw_before
= s390_hotpatch_hw_before_label
;
7744 *hw_after
= s390_hotpatch_hw_after_label
;
7748 /* Write the current .machine and .machinemode specification to the assembler
7751 #ifdef HAVE_AS_MACHINE_MACHINEMODE
7753 s390_asm_output_machine_for_arch (FILE *asm_out_file
)
7755 fprintf (asm_out_file
, "\t.machinemode %s\n",
7756 (TARGET_ZARCH
) ? "zarch" : "esa");
7757 fprintf (asm_out_file
, "\t.machine \"%s",
7758 processor_table
[s390_arch
].binutils_name
);
7759 if (S390_USE_ARCHITECTURE_MODIFIERS
)
7763 cpu_flags
= processor_flags_table
[(int) s390_arch
];
7764 if (TARGET_HTM
&& !(cpu_flags
& PF_TX
))
7765 fprintf (asm_out_file
, "+htm");
7766 else if (!TARGET_HTM
&& (cpu_flags
& PF_TX
))
7767 fprintf (asm_out_file
, "+nohtm");
7768 if (TARGET_VX
&& !(cpu_flags
& PF_VX
))
7769 fprintf (asm_out_file
, "+vx");
7770 else if (!TARGET_VX
&& (cpu_flags
& PF_VX
))
7771 fprintf (asm_out_file
, "+novx");
7773 fprintf (asm_out_file
, "\"\n");
7776 /* Write an extra function header before the very start of the function. */
7779 s390_asm_output_function_prefix (FILE *asm_out_file
,
7780 const char *fnname ATTRIBUTE_UNUSED
)
7782 if (DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl
) == NULL
)
7784 /* Since only the function specific options are saved but not the indications
7785 which options are set, it's too much work here to figure out which options
7786 have actually changed. Thus, generate .machine and .machinemode whenever a
7787 function has the target attribute or pragma. */
7788 fprintf (asm_out_file
, "\t.machinemode push\n");
7789 fprintf (asm_out_file
, "\t.machine push\n");
7790 s390_asm_output_machine_for_arch (asm_out_file
);
7793 /* Write an extra function footer after the very end of the function. */
7796 s390_asm_declare_function_size (FILE *asm_out_file
,
7797 const char *fnname
, tree decl
)
7799 if (!flag_inhibit_size_directive
)
7800 ASM_OUTPUT_MEASURED_SIZE (asm_out_file
, fnname
);
7801 if (DECL_FUNCTION_SPECIFIC_TARGET (decl
) == NULL
)
7803 fprintf (asm_out_file
, "\t.machine pop\n");
7804 fprintf (asm_out_file
, "\t.machinemode pop\n");
7808 /* Write the extra assembler code needed to declare a function properly. */
7811 s390_asm_output_function_label (FILE *out_file
, const char *fname
,
7814 int hw_before
, hw_after
;
7816 s390_function_num_hotpatch_hw (decl
, &hw_before
, &hw_after
);
7819 unsigned int function_alignment
;
7822 /* Add a trampoline code area before the function label and initialize it
7823 with two-byte nop instructions. This area can be overwritten with code
7824 that jumps to a patched version of the function. */
7825 asm_fprintf (out_file
, "\tnopr\t%%r0"
7826 "\t# pre-label NOPs for hotpatch (%d halfwords)\n",
7828 for (i
= 1; i
< hw_before
; i
++)
7829 fputs ("\tnopr\t%r0\n", out_file
);
7831 /* Note: The function label must be aligned so that (a) the bytes of the
7832 following nop do not cross a cacheline boundary, and (b) a jump address
7833 (eight bytes for 64 bit targets, 4 bytes for 32 bit targets) can be
7834 stored directly before the label without crossing a cacheline
7835 boundary. All this is necessary to make sure the trampoline code can
7836 be changed atomically.
7837 This alignment is done automatically using the FOUNCTION_BOUNDARY, but
7838 if there are NOPs before the function label, the alignment is placed
7839 before them. So it is necessary to duplicate the alignment after the
7841 function_alignment
= MAX (8, DECL_ALIGN (decl
) / BITS_PER_UNIT
);
7842 if (! DECL_USER_ALIGN (decl
))
7844 = MAX (function_alignment
,
7845 (unsigned int) align_functions
.levels
[0].get_value ());
7846 fputs ("\t# alignment for hotpatch\n", out_file
);
7847 ASM_OUTPUT_ALIGN (out_file
, align_functions
.levels
[0].log
);
7850 if (S390_USE_TARGET_ATTRIBUTE
&& TARGET_DEBUG_ARG
)
7852 asm_fprintf (out_file
, "\t# fn:%s ar%d\n", fname
, s390_arch
);
7853 asm_fprintf (out_file
, "\t# fn:%s tu%d\n", fname
, s390_tune
);
7854 asm_fprintf (out_file
, "\t# fn:%s sg%d\n", fname
, s390_stack_guard
);
7855 asm_fprintf (out_file
, "\t# fn:%s ss%d\n", fname
, s390_stack_size
);
7856 asm_fprintf (out_file
, "\t# fn:%s bc%d\n", fname
, s390_branch_cost
);
7857 asm_fprintf (out_file
, "\t# fn:%s wf%d\n", fname
,
7858 s390_warn_framesize
);
7859 asm_fprintf (out_file
, "\t# fn:%s ba%d\n", fname
, TARGET_BACKCHAIN
);
7860 asm_fprintf (out_file
, "\t# fn:%s hd%d\n", fname
, TARGET_HARD_DFP
);
7861 asm_fprintf (out_file
, "\t# fn:%s hf%d\n", fname
, !TARGET_SOFT_FLOAT
);
7862 asm_fprintf (out_file
, "\t# fn:%s ht%d\n", fname
, TARGET_OPT_HTM
);
7863 asm_fprintf (out_file
, "\t# fn:%s vx%d\n", fname
, TARGET_OPT_VX
);
7864 asm_fprintf (out_file
, "\t# fn:%s ps%d\n", fname
,
7865 TARGET_PACKED_STACK
);
7866 asm_fprintf (out_file
, "\t# fn:%s se%d\n", fname
, TARGET_SMALL_EXEC
);
7867 asm_fprintf (out_file
, "\t# fn:%s mv%d\n", fname
, TARGET_MVCLE
);
7868 asm_fprintf (out_file
, "\t# fn:%s zv%d\n", fname
, TARGET_ZVECTOR
);
7869 asm_fprintf (out_file
, "\t# fn:%s wd%d\n", fname
,
7870 s390_warn_dynamicstack_p
);
7872 ASM_OUTPUT_LABEL (out_file
, fname
);
7874 asm_fprintf (out_file
,
7875 "\t# post-label NOPs for hotpatch (%d halfwords)\n",
7879 /* Output machine-dependent UNSPECs occurring in address constant X
7880 in assembler syntax to stdio stream FILE. Returns true if the
7881 constant X could be recognized, false otherwise. */
7884 s390_output_addr_const_extra (FILE *file
, rtx x
)
7886 if (GET_CODE (x
) == UNSPEC
&& XVECLEN (x
, 0) == 1)
7887 switch (XINT (x
, 1))
7890 output_addr_const (file
, XVECEXP (x
, 0, 0));
7891 fprintf (file
, "@GOTENT");
7894 output_addr_const (file
, XVECEXP (x
, 0, 0));
7895 fprintf (file
, "@GOT");
7898 output_addr_const (file
, XVECEXP (x
, 0, 0));
7899 fprintf (file
, "@GOTOFF");
7902 output_addr_const (file
, XVECEXP (x
, 0, 0));
7903 fprintf (file
, "@PLT");
7906 output_addr_const (file
, XVECEXP (x
, 0, 0));
7907 fprintf (file
, "@PLTOFF");
7910 output_addr_const (file
, XVECEXP (x
, 0, 0));
7911 fprintf (file
, "@TLSGD");
7914 assemble_name (file
, get_some_local_dynamic_name ());
7915 fprintf (file
, "@TLSLDM");
7918 output_addr_const (file
, XVECEXP (x
, 0, 0));
7919 fprintf (file
, "@DTPOFF");
7922 output_addr_const (file
, XVECEXP (x
, 0, 0));
7923 fprintf (file
, "@NTPOFF");
7925 case UNSPEC_GOTNTPOFF
:
7926 output_addr_const (file
, XVECEXP (x
, 0, 0));
7927 fprintf (file
, "@GOTNTPOFF");
7929 case UNSPEC_INDNTPOFF
:
7930 output_addr_const (file
, XVECEXP (x
, 0, 0));
7931 fprintf (file
, "@INDNTPOFF");
7935 if (GET_CODE (x
) == UNSPEC
&& XVECLEN (x
, 0) == 2)
7936 switch (XINT (x
, 1))
7938 case UNSPEC_POOL_OFFSET
:
7939 x
= gen_rtx_MINUS (GET_MODE (x
), XVECEXP (x
, 0, 0), XVECEXP (x
, 0, 1));
7940 output_addr_const (file
, x
);
7946 /* Output address operand ADDR in assembler syntax to
7947 stdio stream FILE. */
7950 print_operand_address (FILE *file
, rtx addr
)
7952 struct s390_address ad
;
7953 memset (&ad
, 0, sizeof (s390_address
));
7955 if (s390_loadrelative_operand_p (addr
, NULL
, NULL
))
7959 output_operand_lossage ("symbolic memory references are "
7960 "only supported on z10 or later");
7963 output_addr_const (file
, addr
);
7967 if (!s390_decompose_address (addr
, &ad
)
7968 || (ad
.base
&& !REGNO_OK_FOR_BASE_P (REGNO (ad
.base
)))
7969 || (ad
.indx
&& !REGNO_OK_FOR_INDEX_P (REGNO (ad
.indx
))))
7970 output_operand_lossage ("cannot decompose address");
7973 output_addr_const (file
, ad
.disp
);
7975 fprintf (file
, "0");
7977 if (ad
.base
&& ad
.indx
)
7978 fprintf (file
, "(%s,%s)", reg_names
[REGNO (ad
.indx
)],
7979 reg_names
[REGNO (ad
.base
)]);
7981 fprintf (file
, "(%s)", reg_names
[REGNO (ad
.base
)]);
7984 /* Output operand X in assembler syntax to stdio stream FILE.
7985 CODE specified the format flag. The following format flags
7988 'A': On z14 or higher: If operand is a mem print the alignment
7989 hint usable with vl/vst prefixed by a comma.
7990 'C': print opcode suffix for branch condition.
7991 'D': print opcode suffix for inverse branch condition.
7992 'E': print opcode suffix for branch on index instruction.
7993 'G': print the size of the operand in bytes.
7994 'J': print tls_load/tls_gdcall/tls_ldcall suffix
7995 'K': print @PLT suffix for call targets and load address values.
7996 'M': print the second word of a TImode operand.
7997 'N': print the second word of a DImode operand.
7998 'O': print only the displacement of a memory reference or address.
7999 'R': print only the base register of a memory reference or address.
8000 'S': print S-type memory reference (base+displacement).
8001 'Y': print address style operand without index (e.g. shift count or setmem
8004 'b': print integer X as if it's an unsigned byte.
8005 'c': print integer X as if it's an signed byte.
8006 'e': "end" contiguous bitmask X in either DImode or vector inner mode.
8007 'f': "end" contiguous bitmask X in SImode.
8008 'h': print integer X as if it's a signed halfword.
8009 'i': print the first nonzero HImode part of X.
8010 'j': print the first HImode part unequal to -1 of X.
8011 'k': print the first nonzero SImode part of X.
8012 'm': print the first SImode part unequal to -1 of X.
8013 'o': print integer X as if it's an unsigned 32bit word.
8014 's': "start" of contiguous bitmask X in either DImode or vector inner mode.
8015 't': CONST_INT: "start" of contiguous bitmask X in SImode.
8016 CONST_VECTOR: Generate a bitmask for vgbm instruction.
8017 'x': print integer X as if it's an unsigned halfword.
8018 'v': print register number as vector register (v1 instead of f1).
8019 'V': print the second word of a TFmode operand as vector register.
8023 print_operand (FILE *file
, rtx x
, int code
)
8030 if (TARGET_VECTOR_LOADSTORE_ALIGNMENT_HINTS
&& MEM_P (x
))
8032 if (MEM_ALIGN (x
) >= 128)
8033 fprintf (file
, ",4");
8034 else if (MEM_ALIGN (x
) == 64)
8035 fprintf (file
, ",3");
8039 fprintf (file
, s390_branch_condition_mnemonic (x
, FALSE
));
8043 fprintf (file
, s390_branch_condition_mnemonic (x
, TRUE
));
8047 if (GET_CODE (x
) == LE
)
8048 fprintf (file
, "l");
8049 else if (GET_CODE (x
) == GT
)
8050 fprintf (file
, "h");
8052 output_operand_lossage ("invalid comparison operator "
8053 "for 'E' output modifier");
8057 if (GET_CODE (x
) == SYMBOL_REF
)
8059 fprintf (file
, "%s", ":tls_load:");
8060 output_addr_const (file
, x
);
8062 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLSGD
)
8064 fprintf (file
, "%s", ":tls_gdcall:");
8065 output_addr_const (file
, XVECEXP (x
, 0, 0));
8067 else if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_TLSLDM
)
8069 fprintf (file
, "%s", ":tls_ldcall:");
8070 const char *name
= get_some_local_dynamic_name ();
8072 assemble_name (file
, name
);
8075 output_operand_lossage ("invalid reference for 'J' output modifier");
8079 fprintf (file
, "%u", GET_MODE_SIZE (GET_MODE (x
)));
8084 struct s390_address ad
;
8087 ret
= s390_decompose_address (MEM_P (x
) ? XEXP (x
, 0) : x
, &ad
);
8090 || (ad
.base
&& !REGNO_OK_FOR_BASE_P (REGNO (ad
.base
)))
8093 output_operand_lossage ("invalid address for 'O' output modifier");
8098 output_addr_const (file
, ad
.disp
);
8100 fprintf (file
, "0");
8106 struct s390_address ad
;
8109 ret
= s390_decompose_address (MEM_P (x
) ? XEXP (x
, 0) : x
, &ad
);
8112 || (ad
.base
&& !REGNO_OK_FOR_BASE_P (REGNO (ad
.base
)))
8115 output_operand_lossage ("invalid address for 'R' output modifier");
8120 fprintf (file
, "%s", reg_names
[REGNO (ad
.base
)]);
8122 fprintf (file
, "0");
8128 struct s390_address ad
;
8133 output_operand_lossage ("memory reference expected for "
8134 "'S' output modifier");
8137 ret
= s390_decompose_address (XEXP (x
, 0), &ad
);
8140 || (ad
.base
&& !REGNO_OK_FOR_BASE_P (REGNO (ad
.base
)))
8143 output_operand_lossage ("invalid address for 'S' output modifier");
8148 output_addr_const (file
, ad
.disp
);
8150 fprintf (file
, "0");
8153 fprintf (file
, "(%s)", reg_names
[REGNO (ad
.base
)]);
8158 if (GET_CODE (x
) == REG
)
8159 x
= gen_rtx_REG (GET_MODE (x
), REGNO (x
) + 1);
8160 else if (GET_CODE (x
) == MEM
)
8161 x
= change_address (x
, VOIDmode
,
8162 plus_constant (Pmode
, XEXP (x
, 0), 4));
8164 output_operand_lossage ("register or memory expression expected "
8165 "for 'N' output modifier");
8169 if (GET_CODE (x
) == REG
)
8170 x
= gen_rtx_REG (GET_MODE (x
), REGNO (x
) + 1);
8171 else if (GET_CODE (x
) == MEM
)
8172 x
= change_address (x
, VOIDmode
,
8173 plus_constant (Pmode
, XEXP (x
, 0), 8));
8175 output_operand_lossage ("register or memory expression expected "
8176 "for 'M' output modifier");
8180 print_shift_count_operand (file
, x
);
8184 /* Append @PLT to both local and non-local symbols in order to support
8185 Linux Kernel livepatching: patches contain individual functions and
8186 are loaded further than 2G away from vmlinux, and therefore they must
8187 call even static functions via PLT. ld will optimize @PLT away for
8188 normal code, and keep it for patches.
8190 Do not indiscriminately add @PLT in 31-bit mode due to the %r12
8191 restriction, use UNSPEC_PLT31 instead.
8193 @PLT only makes sense for functions, data is taken care of by
8194 -mno-pic-data-is-text-relative.
8196 Adding @PLT interferes with handling of weak symbols in non-PIC code,
8197 since their addresses are loaded with larl, which then always produces
8198 a non-NULL result, so skip them here as well. */
8200 && GET_CODE (x
) == SYMBOL_REF
8201 && SYMBOL_REF_FUNCTION_P (x
)
8202 && !(SYMBOL_REF_WEAK (x
) && !flag_pic
))
8203 fprintf (file
, "@PLT");
8207 switch (GET_CODE (x
))
8210 /* Print FP regs as fx instead of vx when they are accessed
8211 through non-vector mode. */
8212 if ((code
== 'v' || code
== 'V')
8213 || VECTOR_NOFP_REG_P (x
)
8214 || (FP_REG_P (x
) && VECTOR_MODE_P (GET_MODE (x
)))
8215 || (VECTOR_REG_P (x
)
8216 && (GET_MODE_SIZE (GET_MODE (x
)) /
8217 s390_class_max_nregs (FP_REGS
, GET_MODE (x
))) > 8))
8218 fprintf (file
, "%%v%s", reg_names
[REGNO (x
) + (code
== 'V')] + 2);
8220 fprintf (file
, "%s", reg_names
[REGNO (x
)]);
8224 output_address (GET_MODE (x
), XEXP (x
, 0));
8231 output_addr_const (file
, x
);
8244 ival
= ((ival
& 0xff) ^ 0x80) - 0x80;
8250 ival
= ((ival
& 0xffff) ^ 0x8000) - 0x8000;
8253 ival
= s390_extract_part (x
, HImode
, 0);
8256 ival
= s390_extract_part (x
, HImode
, -1);
8259 ival
= s390_extract_part (x
, SImode
, 0);
8262 ival
= s390_extract_part (x
, SImode
, -1);
8274 len
= (code
== 's' || code
== 'e' ? 64 : 32);
8275 ok
= s390_contiguous_bitmask_p (ival
, true, len
, &start
, &end
);
8277 if (code
== 's' || code
== 't')
8284 output_operand_lossage ("invalid constant for output modifier '%c'", code
);
8286 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, ival
);
8289 case CONST_WIDE_INT
:
8291 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
,
8292 CONST_WIDE_INT_ELT (x
, 0) & 0xff);
8293 else if (code
== 'x')
8294 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
,
8295 CONST_WIDE_INT_ELT (x
, 0) & 0xffff);
8296 else if (code
== 'h')
8297 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
,
8298 ((CONST_WIDE_INT_ELT (x
, 0) & 0xffff) ^ 0x8000) - 0x8000);
8302 output_operand_lossage ("invalid constant - try using "
8303 "an output modifier");
8305 output_operand_lossage ("invalid constant for output modifier '%c'",
8313 gcc_assert (const_vec_duplicate_p (x
));
8314 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
,
8315 ((INTVAL (XVECEXP (x
, 0, 0)) & 0xffff) ^ 0x8000) - 0x8000);
8323 ok
= s390_contiguous_bitmask_vector_p (x
, &start
, &end
);
8325 ival
= (code
== 's') ? start
: end
;
8326 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, ival
);
8332 bool ok
= s390_bytemask_vector_p (x
, &mask
);
8334 fprintf (file
, "%u", mask
);
8339 output_operand_lossage ("invalid constant vector for output "
8340 "modifier '%c'", code
);
8346 output_operand_lossage ("invalid expression - try using "
8347 "an output modifier");
8349 output_operand_lossage ("invalid expression for output "
8350 "modifier '%c'", code
);
8355 /* Target hook for assembling integer objects. We need to define it
8356 here to work a round a bug in some versions of GAS, which couldn't
8357 handle values smaller than INT_MIN when printed in decimal. */
8360 s390_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
8362 if (size
== 8 && aligned_p
8363 && GET_CODE (x
) == CONST_INT
&& INTVAL (x
) < INT_MIN
)
8365 fprintf (asm_out_file
, "\t.quad\t" HOST_WIDE_INT_PRINT_HEX
"\n",
8369 return default_assemble_integer (x
, size
, aligned_p
);
8372 /* Returns true if register REGNO is used for forming
8373 a memory address in expression X. */
8376 reg_used_in_mem_p (int regno
, rtx x
)
8378 enum rtx_code code
= GET_CODE (x
);
8384 if (refers_to_regno_p (regno
, XEXP (x
, 0)))
8387 else if (code
== SET
8388 && GET_CODE (SET_DEST (x
)) == PC
)
8390 if (refers_to_regno_p (regno
, SET_SRC (x
)))
8394 fmt
= GET_RTX_FORMAT (code
);
8395 for (i
= GET_RTX_LENGTH (code
) - 1; i
>= 0; i
--)
8398 && reg_used_in_mem_p (regno
, XEXP (x
, i
)))
8401 else if (fmt
[i
] == 'E')
8402 for (j
= 0; j
< XVECLEN (x
, i
); j
++)
8403 if (reg_used_in_mem_p (regno
, XVECEXP (x
, i
, j
)))
8409 /* Returns true if expression DEP_RTX sets an address register
8410 used by instruction INSN to address memory. */
8413 addr_generation_dependency_p (rtx dep_rtx
, rtx_insn
*insn
)
8417 if (NONJUMP_INSN_P (dep_rtx
))
8418 dep_rtx
= PATTERN (dep_rtx
);
8420 if (GET_CODE (dep_rtx
) == SET
)
8422 target
= SET_DEST (dep_rtx
);
8423 if (GET_CODE (target
) == STRICT_LOW_PART
)
8424 target
= XEXP (target
, 0);
8425 while (GET_CODE (target
) == SUBREG
)
8426 target
= SUBREG_REG (target
);
8428 if (GET_CODE (target
) == REG
)
8430 int regno
= REGNO (target
);
8432 if (s390_safe_attr_type (insn
) == TYPE_LA
)
8434 pat
= PATTERN (insn
);
8435 if (GET_CODE (pat
) == PARALLEL
)
8437 gcc_assert (XVECLEN (pat
, 0) == 2);
8438 pat
= XVECEXP (pat
, 0, 0);
8440 gcc_assert (GET_CODE (pat
) == SET
);
8441 return refers_to_regno_p (regno
, SET_SRC (pat
));
8443 else if (get_attr_atype (insn
) == ATYPE_AGEN
)
8444 return reg_used_in_mem_p (regno
, PATTERN (insn
));
8450 /* Return 1, if dep_insn sets register used in insn in the agen unit. */
8453 s390_agen_dep_p (rtx_insn
*dep_insn
, rtx_insn
*insn
)
8455 rtx dep_rtx
= PATTERN (dep_insn
);
8458 if (GET_CODE (dep_rtx
) == SET
8459 && addr_generation_dependency_p (dep_rtx
, insn
))
8461 else if (GET_CODE (dep_rtx
) == PARALLEL
)
8463 for (i
= 0; i
< XVECLEN (dep_rtx
, 0); i
++)
8465 if (addr_generation_dependency_p (XVECEXP (dep_rtx
, 0, i
), insn
))
8473 /* A C statement (sans semicolon) to update the integer scheduling priority
8474 INSN_PRIORITY (INSN). Increase the priority to execute the INSN earlier,
8475 reduce the priority to execute INSN later. Do not define this macro if
8476 you do not need to adjust the scheduling priorities of insns.
8478 A STD instruction should be scheduled earlier,
8479 in order to use the bypass. */
8481 s390_adjust_priority (rtx_insn
*insn
, int priority
)
8483 if (! INSN_P (insn
))
8486 if (s390_tune
<= PROCESSOR_2064_Z900
)
8489 switch (s390_safe_attr_type (insn
))
8493 priority
= priority
<< 3;
8497 priority
= priority
<< 1;
8506 /* The number of instructions that can be issued per cycle. */
8509 s390_issue_rate (void)
8513 case PROCESSOR_2084_Z990
:
8514 case PROCESSOR_2094_Z9_109
:
8515 case PROCESSOR_2094_Z9_EC
:
8516 case PROCESSOR_2817_Z196
:
8518 case PROCESSOR_2097_Z10
:
8520 case PROCESSOR_2064_Z900
:
8521 /* Starting with EC12 we use the sched_reorder hook to take care
8522 of instruction dispatch constraints. The algorithm only
8523 picks the best instruction and assumes only a single
8524 instruction gets issued per cycle. */
8525 case PROCESSOR_2827_ZEC12
:
8526 case PROCESSOR_2964_Z13
:
8527 case PROCESSOR_3906_Z14
:
8528 case PROCESSOR_ARCH14
:
8535 s390_first_cycle_multipass_dfa_lookahead (void)
8541 annotate_constant_pool_refs_1 (rtx
*x
)
8546 gcc_assert (GET_CODE (*x
) != SYMBOL_REF
8547 || !CONSTANT_POOL_ADDRESS_P (*x
));
8549 /* Literal pool references can only occur inside a MEM ... */
8550 if (GET_CODE (*x
) == MEM
)
8552 rtx memref
= XEXP (*x
, 0);
8554 if (GET_CODE (memref
) == SYMBOL_REF
8555 && CONSTANT_POOL_ADDRESS_P (memref
))
8557 rtx base
= cfun
->machine
->base_reg
;
8558 rtx addr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, memref
, base
),
8561 *x
= replace_equiv_address (*x
, addr
);
8565 if (GET_CODE (memref
) == CONST
8566 && GET_CODE (XEXP (memref
, 0)) == PLUS
8567 && GET_CODE (XEXP (XEXP (memref
, 0), 1)) == CONST_INT
8568 && GET_CODE (XEXP (XEXP (memref
, 0), 0)) == SYMBOL_REF
8569 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (memref
, 0), 0)))
8571 HOST_WIDE_INT off
= INTVAL (XEXP (XEXP (memref
, 0), 1));
8572 rtx sym
= XEXP (XEXP (memref
, 0), 0);
8573 rtx base
= cfun
->machine
->base_reg
;
8574 rtx addr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, sym
, base
),
8577 *x
= replace_equiv_address (*x
, plus_constant (Pmode
, addr
, off
));
8582 /* ... or a load-address type pattern. */
8583 if (GET_CODE (*x
) == SET
)
8585 rtx addrref
= SET_SRC (*x
);
8587 if (GET_CODE (addrref
) == SYMBOL_REF
8588 && CONSTANT_POOL_ADDRESS_P (addrref
))
8590 rtx base
= cfun
->machine
->base_reg
;
8591 rtx addr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, addrref
, base
),
8594 SET_SRC (*x
) = addr
;
8598 if (GET_CODE (addrref
) == CONST
8599 && GET_CODE (XEXP (addrref
, 0)) == PLUS
8600 && GET_CODE (XEXP (XEXP (addrref
, 0), 1)) == CONST_INT
8601 && GET_CODE (XEXP (XEXP (addrref
, 0), 0)) == SYMBOL_REF
8602 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (addrref
, 0), 0)))
8604 HOST_WIDE_INT off
= INTVAL (XEXP (XEXP (addrref
, 0), 1));
8605 rtx sym
= XEXP (XEXP (addrref
, 0), 0);
8606 rtx base
= cfun
->machine
->base_reg
;
8607 rtx addr
= gen_rtx_UNSPEC (Pmode
, gen_rtvec (2, sym
, base
),
8610 SET_SRC (*x
) = plus_constant (Pmode
, addr
, off
);
8615 fmt
= GET_RTX_FORMAT (GET_CODE (*x
));
8616 for (i
= GET_RTX_LENGTH (GET_CODE (*x
)) - 1; i
>= 0; i
--)
8620 annotate_constant_pool_refs_1 (&XEXP (*x
, i
));
8622 else if (fmt
[i
] == 'E')
8624 for (j
= 0; j
< XVECLEN (*x
, i
); j
++)
8625 annotate_constant_pool_refs_1 (&XVECEXP (*x
, i
, j
));
8630 /* Annotate every literal pool reference in INSN by an UNSPEC_LTREF expression.
8631 Fix up MEMs as required.
8632 Skip insns which support relative addressing, because they do not use a base
8636 annotate_constant_pool_refs (rtx_insn
*insn
)
8638 if (s390_safe_relative_long_p (insn
))
8640 annotate_constant_pool_refs_1 (&PATTERN (insn
));
8644 find_constant_pool_ref_1 (rtx x
, rtx
*ref
)
8649 /* Likewise POOL_ENTRY insns. */
8650 if (GET_CODE (x
) == UNSPEC_VOLATILE
8651 && XINT (x
, 1) == UNSPECV_POOL_ENTRY
)
8654 gcc_assert (GET_CODE (x
) != SYMBOL_REF
8655 || !CONSTANT_POOL_ADDRESS_P (x
));
8657 if (GET_CODE (x
) == UNSPEC
&& XINT (x
, 1) == UNSPEC_LTREF
)
8659 rtx sym
= XVECEXP (x
, 0, 0);
8660 gcc_assert (GET_CODE (sym
) == SYMBOL_REF
8661 && CONSTANT_POOL_ADDRESS_P (sym
));
8663 if (*ref
== NULL_RTX
)
8666 gcc_assert (*ref
== sym
);
8671 fmt
= GET_RTX_FORMAT (GET_CODE (x
));
8672 for (i
= GET_RTX_LENGTH (GET_CODE (x
)) - 1; i
>= 0; i
--)
8676 find_constant_pool_ref_1 (XEXP (x
, i
), ref
);
8678 else if (fmt
[i
] == 'E')
8680 for (j
= 0; j
< XVECLEN (x
, i
); j
++)
8681 find_constant_pool_ref_1 (XVECEXP (x
, i
, j
), ref
);
8686 /* Find an annotated literal pool symbol referenced in INSN,
8687 and store it at REF. Will abort if INSN contains references to
8688 more than one such pool symbol; multiple references to the same
8689 symbol are allowed, however.
8691 The rtx pointed to by REF must be initialized to NULL_RTX
8692 by the caller before calling this routine.
8694 Skip insns which support relative addressing, because they do not use a base
8698 find_constant_pool_ref (rtx_insn
*insn
, rtx
*ref
)
8700 if (s390_safe_relative_long_p (insn
))
8702 find_constant_pool_ref_1 (PATTERN (insn
), ref
);
8706 replace_constant_pool_ref_1 (rtx
*x
, rtx ref
, rtx offset
)
8711 gcc_assert (*x
!= ref
);
8713 if (GET_CODE (*x
) == UNSPEC
8714 && XINT (*x
, 1) == UNSPEC_LTREF
8715 && XVECEXP (*x
, 0, 0) == ref
)
8717 *x
= gen_rtx_PLUS (Pmode
, XVECEXP (*x
, 0, 1), offset
);
8721 if (GET_CODE (*x
) == PLUS
8722 && GET_CODE (XEXP (*x
, 1)) == CONST_INT
8723 && GET_CODE (XEXP (*x
, 0)) == UNSPEC
8724 && XINT (XEXP (*x
, 0), 1) == UNSPEC_LTREF
8725 && XVECEXP (XEXP (*x
, 0), 0, 0) == ref
)
8727 rtx addr
= gen_rtx_PLUS (Pmode
, XVECEXP (XEXP (*x
, 0), 0, 1), offset
);
8728 *x
= plus_constant (Pmode
, addr
, INTVAL (XEXP (*x
, 1)));
8732 fmt
= GET_RTX_FORMAT (GET_CODE (*x
));
8733 for (i
= GET_RTX_LENGTH (GET_CODE (*x
)) - 1; i
>= 0; i
--)
8737 replace_constant_pool_ref_1 (&XEXP (*x
, i
), ref
, offset
);
8739 else if (fmt
[i
] == 'E')
8741 for (j
= 0; j
< XVECLEN (*x
, i
); j
++)
8742 replace_constant_pool_ref_1 (&XVECEXP (*x
, i
, j
), ref
, offset
);
8747 /* Replace every reference to the annotated literal pool
8748 symbol REF in INSN by its base plus OFFSET.
8749 Skip insns which support relative addressing, because they do not use a base
8753 replace_constant_pool_ref (rtx_insn
*insn
, rtx ref
, rtx offset
)
8755 if (s390_safe_relative_long_p (insn
))
8757 replace_constant_pool_ref_1 (&PATTERN (insn
), ref
, offset
);
8760 /* We keep a list of constants which we have to add to internal
8761 constant tables in the middle of large functions. */
8763 static machine_mode constant_modes
[] =
8765 TFmode
, FPRX2mode
, TImode
, TDmode
,
8766 V16QImode
, V8HImode
, V4SImode
, V2DImode
, V1TImode
,
8767 V4SFmode
, V2DFmode
, V1TFmode
,
8768 DFmode
, DImode
, DDmode
,
8769 V8QImode
, V4HImode
, V2SImode
, V1DImode
, V2SFmode
, V1DFmode
,
8770 SFmode
, SImode
, SDmode
,
8771 V4QImode
, V2HImode
, V1SImode
, V1SFmode
,
8777 #define NR_C_MODES (sizeof (constant_modes) / sizeof (constant_modes[0]))
8781 struct constant
*next
;
8783 rtx_code_label
*label
;
8786 struct constant_pool
8788 struct constant_pool
*next
;
8789 rtx_insn
*first_insn
;
8790 rtx_insn
*pool_insn
;
8792 rtx_insn
*emit_pool_after
;
8794 struct constant
*constants
[NR_C_MODES
];
8795 struct constant
*execute
;
8796 rtx_code_label
*label
;
8800 /* Allocate new constant_pool structure. */
8802 static struct constant_pool
*
8803 s390_alloc_pool (void)
8805 struct constant_pool
*pool
;
8808 pool
= (struct constant_pool
*) xmalloc (sizeof *pool
);
8810 for (i
= 0; i
< NR_C_MODES
; i
++)
8811 pool
->constants
[i
] = NULL
;
8813 pool
->execute
= NULL
;
8814 pool
->label
= gen_label_rtx ();
8815 pool
->first_insn
= NULL
;
8816 pool
->pool_insn
= NULL
;
8817 pool
->insns
= BITMAP_ALLOC (NULL
);
8819 pool
->emit_pool_after
= NULL
;
8824 /* Create new constant pool covering instructions starting at INSN
8825 and chain it to the end of POOL_LIST. */
8827 static struct constant_pool
*
8828 s390_start_pool (struct constant_pool
**pool_list
, rtx_insn
*insn
)
8830 struct constant_pool
*pool
, **prev
;
8832 pool
= s390_alloc_pool ();
8833 pool
->first_insn
= insn
;
8835 for (prev
= pool_list
; *prev
; prev
= &(*prev
)->next
)
8842 /* End range of instructions covered by POOL at INSN and emit
8843 placeholder insn representing the pool. */
8846 s390_end_pool (struct constant_pool
*pool
, rtx_insn
*insn
)
8848 rtx pool_size
= GEN_INT (pool
->size
+ 8 /* alignment slop */);
8851 insn
= get_last_insn ();
8853 pool
->pool_insn
= emit_insn_after (gen_pool (pool_size
), insn
);
8854 INSN_ADDRESSES_NEW (pool
->pool_insn
, -1);
8857 /* Add INSN to the list of insns covered by POOL. */
8860 s390_add_pool_insn (struct constant_pool
*pool
, rtx insn
)
8862 bitmap_set_bit (pool
->insns
, INSN_UID (insn
));
8865 /* Return pool out of POOL_LIST that covers INSN. */
8867 static struct constant_pool
*
8868 s390_find_pool (struct constant_pool
*pool_list
, rtx insn
)
8870 struct constant_pool
*pool
;
8872 for (pool
= pool_list
; pool
; pool
= pool
->next
)
8873 if (bitmap_bit_p (pool
->insns
, INSN_UID (insn
)))
8879 /* Add constant VAL of mode MODE to the constant pool POOL. */
8882 s390_add_constant (struct constant_pool
*pool
, rtx val
, machine_mode mode
)
8887 for (i
= 0; i
< NR_C_MODES
; i
++)
8888 if (constant_modes
[i
] == mode
)
8890 gcc_assert (i
!= NR_C_MODES
);
8892 for (c
= pool
->constants
[i
]; c
!= NULL
; c
= c
->next
)
8893 if (rtx_equal_p (val
, c
->value
))
8898 c
= (struct constant
*) xmalloc (sizeof *c
);
8900 c
->label
= gen_label_rtx ();
8901 c
->next
= pool
->constants
[i
];
8902 pool
->constants
[i
] = c
;
8903 pool
->size
+= GET_MODE_SIZE (mode
);
8907 /* Return an rtx that represents the offset of X from the start of
8911 s390_pool_offset (struct constant_pool
*pool
, rtx x
)
8915 label
= gen_rtx_LABEL_REF (GET_MODE (x
), pool
->label
);
8916 x
= gen_rtx_UNSPEC (GET_MODE (x
), gen_rtvec (2, x
, label
),
8917 UNSPEC_POOL_OFFSET
);
8918 return gen_rtx_CONST (GET_MODE (x
), x
);
8921 /* Find constant VAL of mode MODE in the constant pool POOL.
8922 Return an RTX describing the distance from the start of
8923 the pool to the location of the new constant. */
8926 s390_find_constant (struct constant_pool
*pool
, rtx val
,
8932 for (i
= 0; i
< NR_C_MODES
; i
++)
8933 if (constant_modes
[i
] == mode
)
8935 gcc_assert (i
!= NR_C_MODES
);
8937 for (c
= pool
->constants
[i
]; c
!= NULL
; c
= c
->next
)
8938 if (rtx_equal_p (val
, c
->value
))
8943 return s390_pool_offset (pool
, gen_rtx_LABEL_REF (Pmode
, c
->label
));
8946 /* Check whether INSN is an execute. Return the label_ref to its
8947 execute target template if so, NULL_RTX otherwise. */
8950 s390_execute_label (rtx insn
)
8953 && GET_CODE (PATTERN (insn
)) == PARALLEL
8954 && GET_CODE (XVECEXP (PATTERN (insn
), 0, 0)) == UNSPEC
8955 && (XINT (XVECEXP (PATTERN (insn
), 0, 0), 1) == UNSPEC_EXECUTE
8956 || XINT (XVECEXP (PATTERN (insn
), 0, 0), 1) == UNSPEC_EXECUTE_JUMP
))
8958 if (XINT (XVECEXP (PATTERN (insn
), 0, 0), 1) == UNSPEC_EXECUTE
)
8959 return XVECEXP (XVECEXP (PATTERN (insn
), 0, 0), 0, 2);
8962 gcc_assert (JUMP_P (insn
));
8963 /* For jump insns as execute target:
8964 - There is one operand less in the parallel (the
8965 modification register of the execute is always 0).
8966 - The execute target label is wrapped into an
8967 if_then_else in order to hide it from jump analysis. */
8968 return XEXP (XVECEXP (XVECEXP (PATTERN (insn
), 0, 0), 0, 0), 0);
8975 /* Find execute target for INSN in the constant pool POOL.
8976 Return an RTX describing the distance from the start of
8977 the pool to the location of the execute target. */
8980 s390_find_execute (struct constant_pool
*pool
, rtx insn
)
8984 for (c
= pool
->execute
; c
!= NULL
; c
= c
->next
)
8985 if (INSN_UID (insn
) == INSN_UID (c
->value
))
8990 return s390_pool_offset (pool
, gen_rtx_LABEL_REF (Pmode
, c
->label
));
8993 /* For an execute INSN, extract the execute target template. */
8996 s390_execute_target (rtx insn
)
8998 rtx pattern
= PATTERN (insn
);
8999 gcc_assert (s390_execute_label (insn
));
9001 if (XVECLEN (pattern
, 0) == 2)
9003 pattern
= copy_rtx (XVECEXP (pattern
, 0, 1));
9007 rtvec vec
= rtvec_alloc (XVECLEN (pattern
, 0) - 1);
9010 for (i
= 0; i
< XVECLEN (pattern
, 0) - 1; i
++)
9011 RTVEC_ELT (vec
, i
) = copy_rtx (XVECEXP (pattern
, 0, i
+ 1));
9013 pattern
= gen_rtx_PARALLEL (VOIDmode
, vec
);
9019 /* Indicate that INSN cannot be duplicated. This is the case for
9020 execute insns that carry a unique label. */
9023 s390_cannot_copy_insn_p (rtx_insn
*insn
)
9025 rtx label
= s390_execute_label (insn
);
9026 return label
&& label
!= const0_rtx
;
9029 /* Dump out the constants in POOL. If REMOTE_LABEL is true,
9030 do not emit the pool base label. */
9033 s390_dump_pool (struct constant_pool
*pool
, bool remote_label
)
9036 rtx_insn
*insn
= pool
->pool_insn
;
9039 /* Switch to rodata section. */
9040 insn
= emit_insn_after (gen_pool_section_start (), insn
);
9041 INSN_ADDRESSES_NEW (insn
, -1);
9043 /* Ensure minimum pool alignment. */
9044 insn
= emit_insn_after (gen_pool_align (GEN_INT (8)), insn
);
9045 INSN_ADDRESSES_NEW (insn
, -1);
9047 /* Emit pool base label. */
9050 insn
= emit_label_after (pool
->label
, insn
);
9051 INSN_ADDRESSES_NEW (insn
, -1);
9054 /* Dump constants in descending alignment requirement order,
9055 ensuring proper alignment for every constant. */
9056 for (i
= 0; i
< NR_C_MODES
; i
++)
9057 for (c
= pool
->constants
[i
]; c
; c
= c
->next
)
9059 /* Convert UNSPEC_LTREL_OFFSET unspecs to pool-relative references. */
9060 rtx value
= copy_rtx (c
->value
);
9061 if (GET_CODE (value
) == CONST
9062 && GET_CODE (XEXP (value
, 0)) == UNSPEC
9063 && XINT (XEXP (value
, 0), 1) == UNSPEC_LTREL_OFFSET
9064 && XVECLEN (XEXP (value
, 0), 0) == 1)
9065 value
= s390_pool_offset (pool
, XVECEXP (XEXP (value
, 0), 0, 0));
9067 insn
= emit_label_after (c
->label
, insn
);
9068 INSN_ADDRESSES_NEW (insn
, -1);
9070 value
= gen_rtx_UNSPEC_VOLATILE (constant_modes
[i
],
9071 gen_rtvec (1, value
),
9072 UNSPECV_POOL_ENTRY
);
9073 insn
= emit_insn_after (value
, insn
);
9074 INSN_ADDRESSES_NEW (insn
, -1);
9077 /* Ensure minimum alignment for instructions. */
9078 insn
= emit_insn_after (gen_pool_align (GEN_INT (2)), insn
);
9079 INSN_ADDRESSES_NEW (insn
, -1);
9081 /* Output in-pool execute template insns. */
9082 for (c
= pool
->execute
; c
; c
= c
->next
)
9084 insn
= emit_label_after (c
->label
, insn
);
9085 INSN_ADDRESSES_NEW (insn
, -1);
9087 insn
= emit_insn_after (s390_execute_target (c
->value
), insn
);
9088 INSN_ADDRESSES_NEW (insn
, -1);
9091 /* Switch back to previous section. */
9092 insn
= emit_insn_after (gen_pool_section_end (), insn
);
9093 INSN_ADDRESSES_NEW (insn
, -1);
9095 insn
= emit_barrier_after (insn
);
9096 INSN_ADDRESSES_NEW (insn
, -1);
9098 /* Remove placeholder insn. */
9099 remove_insn (pool
->pool_insn
);
9102 /* Free all memory used by POOL. */
9105 s390_free_pool (struct constant_pool
*pool
)
9107 struct constant
*c
, *next
;
9110 for (i
= 0; i
< NR_C_MODES
; i
++)
9111 for (c
= pool
->constants
[i
]; c
; c
= next
)
9117 for (c
= pool
->execute
; c
; c
= next
)
9123 BITMAP_FREE (pool
->insns
);
9128 /* Collect main literal pool. Return NULL on overflow. */
9130 static struct constant_pool
*
9131 s390_mainpool_start (void)
9133 struct constant_pool
*pool
;
9136 pool
= s390_alloc_pool ();
9138 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
9140 if (NONJUMP_INSN_P (insn
)
9141 && GET_CODE (PATTERN (insn
)) == SET
9142 && GET_CODE (SET_SRC (PATTERN (insn
))) == UNSPEC_VOLATILE
9143 && XINT (SET_SRC (PATTERN (insn
)), 1) == UNSPECV_MAIN_POOL
)
9145 /* There might be two main_pool instructions if base_reg
9146 is call-clobbered; one for shrink-wrapped code and one
9147 for the rest. We want to keep the first. */
9148 if (pool
->pool_insn
)
9150 insn
= PREV_INSN (insn
);
9151 delete_insn (NEXT_INSN (insn
));
9154 pool
->pool_insn
= insn
;
9157 if (NONJUMP_INSN_P (insn
) || CALL_P (insn
))
9159 rtx pool_ref
= NULL_RTX
;
9160 find_constant_pool_ref (insn
, &pool_ref
);
9163 rtx constant
= get_pool_constant (pool_ref
);
9164 machine_mode mode
= get_pool_mode (pool_ref
);
9165 s390_add_constant (pool
, constant
, mode
);
9169 /* If hot/cold partitioning is enabled we have to make sure that
9170 the literal pool is emitted in the same section where the
9171 initialization of the literal pool base pointer takes place.
9172 emit_pool_after is only used in the non-overflow case on non
9173 Z cpus where we can emit the literal pool at the end of the
9174 function body within the text section. */
9176 && NOTE_KIND (insn
) == NOTE_INSN_SWITCH_TEXT_SECTIONS
9177 && !pool
->emit_pool_after
)
9178 pool
->emit_pool_after
= PREV_INSN (insn
);
9181 gcc_assert (pool
->pool_insn
|| pool
->size
== 0);
9183 if (pool
->size
>= 4096)
9185 /* We're going to chunkify the pool, so remove the main
9186 pool placeholder insn. */
9187 remove_insn (pool
->pool_insn
);
9189 s390_free_pool (pool
);
9193 /* If the functions ends with the section where the literal pool
9194 should be emitted set the marker to its end. */
9195 if (pool
&& !pool
->emit_pool_after
)
9196 pool
->emit_pool_after
= get_last_insn ();
9201 /* POOL holds the main literal pool as collected by s390_mainpool_start.
9202 Modify the current function to output the pool constants as well as
9203 the pool register setup instruction. */
9206 s390_mainpool_finish (struct constant_pool
*pool
)
9208 rtx base_reg
= cfun
->machine
->base_reg
;
9212 /* If the pool is empty, we're done. */
9213 if (pool
->size
== 0)
9215 /* We don't actually need a base register after all. */
9216 cfun
->machine
->base_reg
= NULL_RTX
;
9218 if (pool
->pool_insn
)
9219 remove_insn (pool
->pool_insn
);
9220 s390_free_pool (pool
);
9224 /* We need correct insn addresses. */
9225 shorten_branches (get_insns ());
9227 /* Use a LARL to load the pool register. The pool is
9228 located in the .rodata section, so we emit it after the function. */
9229 set
= gen_main_base_64 (base_reg
, pool
->label
);
9230 insn
= emit_insn_after (set
, pool
->pool_insn
);
9231 INSN_ADDRESSES_NEW (insn
, -1);
9232 remove_insn (pool
->pool_insn
);
9234 insn
= get_last_insn ();
9235 pool
->pool_insn
= emit_insn_after (gen_pool (const0_rtx
), insn
);
9236 INSN_ADDRESSES_NEW (pool
->pool_insn
, -1);
9238 s390_dump_pool (pool
, 0);
9240 /* Replace all literal pool references. */
9242 for (rtx_insn
*insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
9244 if (NONJUMP_INSN_P (insn
) || CALL_P (insn
))
9246 rtx addr
, pool_ref
= NULL_RTX
;
9247 find_constant_pool_ref (insn
, &pool_ref
);
9250 if (s390_execute_label (insn
))
9251 addr
= s390_find_execute (pool
, insn
);
9253 addr
= s390_find_constant (pool
, get_pool_constant (pool_ref
),
9254 get_pool_mode (pool_ref
));
9256 replace_constant_pool_ref (insn
, pool_ref
, addr
);
9257 INSN_CODE (insn
) = -1;
9263 /* Free the pool. */
9264 s390_free_pool (pool
);
9267 /* Chunkify the literal pool. */
9269 #define S390_POOL_CHUNK_MIN 0xc00
9270 #define S390_POOL_CHUNK_MAX 0xe00
9272 static struct constant_pool
*
9273 s390_chunkify_start (void)
9275 struct constant_pool
*curr_pool
= NULL
, *pool_list
= NULL
;
9279 /* We need correct insn addresses. */
9281 shorten_branches (get_insns ());
9283 /* Scan all insns and move literals to pool chunks. */
9285 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
9287 if (NONJUMP_INSN_P (insn
) || CALL_P (insn
))
9289 rtx pool_ref
= NULL_RTX
;
9290 find_constant_pool_ref (insn
, &pool_ref
);
9293 rtx constant
= get_pool_constant (pool_ref
);
9294 machine_mode mode
= get_pool_mode (pool_ref
);
9297 curr_pool
= s390_start_pool (&pool_list
, insn
);
9299 s390_add_constant (curr_pool
, constant
, mode
);
9300 s390_add_pool_insn (curr_pool
, insn
);
9304 if (JUMP_P (insn
) || JUMP_TABLE_DATA_P (insn
) || LABEL_P (insn
))
9307 s390_add_pool_insn (curr_pool
, insn
);
9310 if (NOTE_P (insn
) && NOTE_KIND (insn
) == NOTE_INSN_VAR_LOCATION
)
9314 || INSN_ADDRESSES_SIZE () <= (size_t) INSN_UID (insn
)
9315 || INSN_ADDRESSES (INSN_UID (insn
)) == -1)
9318 if (curr_pool
->size
< S390_POOL_CHUNK_MAX
)
9321 s390_end_pool (curr_pool
, NULL
);
9326 s390_end_pool (curr_pool
, NULL
);
9328 /* Find all labels that are branched into
9329 from an insn belonging to a different chunk. */
9331 far_labels
= BITMAP_ALLOC (NULL
);
9333 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
9335 rtx_jump_table_data
*table
;
9337 /* Labels marked with LABEL_PRESERVE_P can be target
9338 of non-local jumps, so we have to mark them.
9339 The same holds for named labels.
9341 Don't do that, however, if it is the label before
9345 && (LABEL_PRESERVE_P (insn
) || LABEL_NAME (insn
)))
9347 rtx_insn
*vec_insn
= NEXT_INSN (insn
);
9348 if (! vec_insn
|| ! JUMP_TABLE_DATA_P (vec_insn
))
9349 bitmap_set_bit (far_labels
, CODE_LABEL_NUMBER (insn
));
9351 /* Check potential targets in a table jump (casesi_jump). */
9352 else if (tablejump_p (insn
, NULL
, &table
))
9354 rtx vec_pat
= PATTERN (table
);
9355 int i
, diff_p
= GET_CODE (vec_pat
) == ADDR_DIFF_VEC
;
9357 for (i
= 0; i
< XVECLEN (vec_pat
, diff_p
); i
++)
9359 rtx label
= XEXP (XVECEXP (vec_pat
, diff_p
, i
), 0);
9361 if (s390_find_pool (pool_list
, label
)
9362 != s390_find_pool (pool_list
, insn
))
9363 bitmap_set_bit (far_labels
, CODE_LABEL_NUMBER (label
));
9366 /* If we have a direct jump (conditional or unconditional),
9367 check all potential targets. */
9368 else if (JUMP_P (insn
))
9370 rtx pat
= PATTERN (insn
);
9372 if (GET_CODE (pat
) == PARALLEL
)
9373 pat
= XVECEXP (pat
, 0, 0);
9375 if (GET_CODE (pat
) == SET
)
9377 rtx label
= JUMP_LABEL (insn
);
9378 if (label
&& !ANY_RETURN_P (label
))
9380 if (s390_find_pool (pool_list
, label
)
9381 != s390_find_pool (pool_list
, insn
))
9382 bitmap_set_bit (far_labels
, CODE_LABEL_NUMBER (label
));
9388 /* Insert base register reload insns before every pool. */
9390 for (curr_pool
= pool_list
; curr_pool
; curr_pool
= curr_pool
->next
)
9392 rtx new_insn
= gen_reload_base_64 (cfun
->machine
->base_reg
,
9394 rtx_insn
*insn
= curr_pool
->first_insn
;
9395 INSN_ADDRESSES_NEW (emit_insn_before (new_insn
, insn
), -1);
9398 /* Insert base register reload insns at every far label. */
9400 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
9402 && bitmap_bit_p (far_labels
, CODE_LABEL_NUMBER (insn
)))
9404 struct constant_pool
*pool
= s390_find_pool (pool_list
, insn
);
9407 rtx new_insn
= gen_reload_base_64 (cfun
->machine
->base_reg
,
9409 INSN_ADDRESSES_NEW (emit_insn_after (new_insn
, insn
), -1);
9414 BITMAP_FREE (far_labels
);
9417 /* Recompute insn addresses. */
9419 init_insn_lengths ();
9420 shorten_branches (get_insns ());
9425 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
9426 After we have decided to use this list, finish implementing
9427 all changes to the current function as required. */
9430 s390_chunkify_finish (struct constant_pool
*pool_list
)
9432 struct constant_pool
*curr_pool
= NULL
;
9436 /* Replace all literal pool references. */
9438 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
9440 curr_pool
= s390_find_pool (pool_list
, insn
);
9444 if (NONJUMP_INSN_P (insn
) || CALL_P (insn
))
9446 rtx addr
, pool_ref
= NULL_RTX
;
9447 find_constant_pool_ref (insn
, &pool_ref
);
9450 if (s390_execute_label (insn
))
9451 addr
= s390_find_execute (curr_pool
, insn
);
9453 addr
= s390_find_constant (curr_pool
,
9454 get_pool_constant (pool_ref
),
9455 get_pool_mode (pool_ref
));
9457 replace_constant_pool_ref (insn
, pool_ref
, addr
);
9458 INSN_CODE (insn
) = -1;
9463 /* Dump out all literal pools. */
9465 for (curr_pool
= pool_list
; curr_pool
; curr_pool
= curr_pool
->next
)
9466 s390_dump_pool (curr_pool
, 0);
9468 /* Free pool list. */
9472 struct constant_pool
*next
= pool_list
->next
;
9473 s390_free_pool (pool_list
);
9478 /* Output the constant pool entry EXP in mode MODE with alignment ALIGN. */
9481 s390_output_pool_entry (rtx exp
, machine_mode mode
, unsigned int align
)
9483 switch (GET_MODE_CLASS (mode
))
9486 case MODE_DECIMAL_FLOAT
:
9487 gcc_assert (GET_CODE (exp
) == CONST_DOUBLE
);
9489 assemble_real (*CONST_DOUBLE_REAL_VALUE (exp
),
9490 as_a
<scalar_float_mode
> (mode
), align
);
9494 assemble_integer (exp
, GET_MODE_SIZE (mode
), align
, 1);
9495 mark_symbol_refs_as_used (exp
);
9498 case MODE_VECTOR_INT
:
9499 case MODE_VECTOR_FLOAT
:
9502 machine_mode inner_mode
;
9503 gcc_assert (GET_CODE (exp
) == CONST_VECTOR
);
9505 inner_mode
= GET_MODE_INNER (GET_MODE (exp
));
9506 for (i
= 0; i
< XVECLEN (exp
, 0); i
++)
9507 s390_output_pool_entry (XVECEXP (exp
, 0, i
),
9511 : GET_MODE_BITSIZE (inner_mode
));
9520 /* Return true if MEM refers to an integer constant in the literal pool. If
9521 VAL is not nullptr, then also fill it with the constant's value. */
9524 s390_const_int_pool_entry_p (rtx mem
, HOST_WIDE_INT
*val
)
9526 /* Try to match the following:
9527 - (mem (unspec [(symbol_ref) (reg)] UNSPEC_LTREF)).
9528 - (mem (symbol_ref)). */
9533 rtx addr
= XEXP (mem
, 0);
9535 if (GET_CODE (addr
) == UNSPEC
&& XINT (addr
, 1) == UNSPEC_LTREF
)
9536 sym
= XVECEXP (addr
, 0, 0);
9540 if (!SYMBOL_REF_P (sym
) || !CONSTANT_POOL_ADDRESS_P (sym
))
9543 rtx val_rtx
= get_pool_constant (sym
);
9544 if (!CONST_INT_P (val_rtx
))
9548 *val
= INTVAL (val_rtx
);
9552 /* Return an RTL expression representing the value of the return address
9553 for the frame COUNT steps up from the current frame. FRAME is the
9554 frame pointer of that frame. */
9557 s390_return_addr_rtx (int count
, rtx frame ATTRIBUTE_UNUSED
)
9562 /* Without backchain, we fail for all but the current frame. */
9564 if (!TARGET_BACKCHAIN
&& count
> 0)
9567 /* For the current frame, we need to make sure the initial
9568 value of RETURN_REGNUM is actually saved. */
9571 return get_hard_reg_initial_val (Pmode
, RETURN_REGNUM
);
9573 if (TARGET_PACKED_STACK
)
9574 offset
= -2 * UNITS_PER_LONG
;
9576 offset
= RETURN_REGNUM
* UNITS_PER_LONG
;
9578 addr
= plus_constant (Pmode
, frame
, offset
);
9579 addr
= memory_address (Pmode
, addr
);
9580 return gen_rtx_MEM (Pmode
, addr
);
9583 /* Return an RTL expression representing the back chain stored in
9584 the current stack frame. */
9587 s390_back_chain_rtx (void)
9591 gcc_assert (TARGET_BACKCHAIN
);
9593 if (TARGET_PACKED_STACK
)
9594 chain
= plus_constant (Pmode
, stack_pointer_rtx
,
9595 STACK_POINTER_OFFSET
- UNITS_PER_LONG
);
9597 chain
= stack_pointer_rtx
;
9599 chain
= gen_rtx_MEM (Pmode
, chain
);
9603 /* Find first call clobbered register unused in a function.
9604 This could be used as base register in a leaf function
9605 or for holding the return address before epilogue. */
9608 find_unused_clobbered_reg (void)
9611 for (i
= 0; i
< 6; i
++)
9612 if (!df_regs_ever_live_p (i
))
9618 /* Helper function for s390_regs_ever_clobbered. Sets the fields in DATA for all
9619 clobbered hard regs in SETREG. */
9622 s390_reg_clobbered_rtx (rtx setreg
, const_rtx set_insn ATTRIBUTE_UNUSED
, void *data
)
9624 char *regs_ever_clobbered
= (char *)data
;
9625 unsigned int i
, regno
;
9626 machine_mode mode
= GET_MODE (setreg
);
9628 if (GET_CODE (setreg
) == SUBREG
)
9630 rtx inner
= SUBREG_REG (setreg
);
9631 if (!GENERAL_REG_P (inner
) && !FP_REG_P (inner
))
9633 regno
= subreg_regno (setreg
);
9635 else if (GENERAL_REG_P (setreg
) || FP_REG_P (setreg
))
9636 regno
= REGNO (setreg
);
9641 i
< end_hard_regno (mode
, regno
);
9643 regs_ever_clobbered
[i
] = 1;
9646 /* Walks through all basic blocks of the current function looking
9647 for clobbered hard regs using s390_reg_clobbered_rtx. The fields
9648 of the passed integer array REGS_EVER_CLOBBERED are set to one for
9649 each of those regs. */
9652 s390_regs_ever_clobbered (char regs_ever_clobbered
[])
9658 memset (regs_ever_clobbered
, 0, 32);
9660 /* For non-leaf functions we have to consider all call clobbered regs to be
9664 for (i
= 0; i
< 32; i
++)
9665 regs_ever_clobbered
[i
] = call_used_regs
[i
];
9668 /* Make the "magic" eh_return registers live if necessary. For regs_ever_live
9669 this work is done by liveness analysis (mark_regs_live_at_end).
9670 Special care is needed for functions containing landing pads. Landing pads
9671 may use the eh registers, but the code which sets these registers is not
9672 contained in that function. Hence s390_regs_ever_clobbered is not able to
9673 deal with this automatically. */
9674 if (crtl
->calls_eh_return
|| cfun
->machine
->has_landing_pad_p
)
9675 for (i
= 0; EH_RETURN_DATA_REGNO (i
) != INVALID_REGNUM
; i
++)
9676 if (crtl
->calls_eh_return
9677 || (cfun
->machine
->has_landing_pad_p
9678 && df_regs_ever_live_p (EH_RETURN_DATA_REGNO (i
))))
9679 regs_ever_clobbered
[EH_RETURN_DATA_REGNO (i
)] = 1;
9681 /* For nonlocal gotos all call-saved registers have to be saved.
9682 This flag is also set for the unwinding code in libgcc.
9683 See expand_builtin_unwind_init. For regs_ever_live this is done by
9685 if (crtl
->saves_all_registers
)
9686 for (i
= 0; i
< 32; i
++)
9687 if (!call_used_regs
[i
])
9688 regs_ever_clobbered
[i
] = 1;
9690 FOR_EACH_BB_FN (cur_bb
, cfun
)
9692 FOR_BB_INSNS (cur_bb
, cur_insn
)
9696 if (!INSN_P (cur_insn
))
9699 pat
= PATTERN (cur_insn
);
9701 /* Ignore GPR restore insns. */
9702 if (epilogue_completed
&& RTX_FRAME_RELATED_P (cur_insn
))
9704 if (GET_CODE (pat
) == SET
9705 && GENERAL_REG_P (SET_DEST (pat
)))
9708 if (GET_MODE (SET_SRC (pat
)) == DImode
9709 && FP_REG_P (SET_SRC (pat
)))
9713 if (GET_CODE (SET_SRC (pat
)) == MEM
)
9718 if (GET_CODE (pat
) == PARALLEL
9719 && load_multiple_operation (pat
, VOIDmode
))
9723 note_stores (cur_insn
,
9724 s390_reg_clobbered_rtx
,
9725 regs_ever_clobbered
);
9730 /* Determine the frame area which actually has to be accessed
9731 in the function epilogue. The values are stored at the
9732 given pointers AREA_BOTTOM (address of the lowest used stack
9733 address) and AREA_TOP (address of the first item which does
9734 not belong to the stack frame). */
9737 s390_frame_area (int *area_bottom
, int *area_top
)
9744 if (cfun_frame_layout
.first_restore_gpr
!= -1)
9746 b
= (cfun_frame_layout
.gprs_offset
9747 + cfun_frame_layout
.first_restore_gpr
* UNITS_PER_LONG
);
9748 t
= b
+ (cfun_frame_layout
.last_restore_gpr
9749 - cfun_frame_layout
.first_restore_gpr
+ 1) * UNITS_PER_LONG
;
9752 if (TARGET_64BIT
&& cfun_save_high_fprs_p
)
9754 b
= MIN (b
, cfun_frame_layout
.f8_offset
);
9755 t
= MAX (t
, (cfun_frame_layout
.f8_offset
9756 + cfun_frame_layout
.high_fprs
* 8));
9761 if (cfun_fpr_save_p (FPR4_REGNUM
))
9763 b
= MIN (b
, cfun_frame_layout
.f4_offset
);
9764 t
= MAX (t
, cfun_frame_layout
.f4_offset
+ 8);
9766 if (cfun_fpr_save_p (FPR6_REGNUM
))
9768 b
= MIN (b
, cfun_frame_layout
.f4_offset
+ 8);
9769 t
= MAX (t
, cfun_frame_layout
.f4_offset
+ 16);
9775 /* Update gpr_save_slots in the frame layout trying to make use of
9776 FPRs as GPR save slots.
9777 This is a helper routine of s390_register_info. */
9780 s390_register_info_gprtofpr ()
9782 int save_reg_slot
= FPR0_REGNUM
;
9785 if (TARGET_TPF
|| !TARGET_Z10
|| !TARGET_HARD_FLOAT
|| !crtl
->is_leaf
)
9788 /* builtin_eh_return needs to be able to modify the return address
9789 on the stack. It could also adjust the FPR save slot instead but
9790 is it worth the trouble?! */
9791 if (crtl
->calls_eh_return
)
9794 for (i
= 15; i
>= 6; i
--)
9796 if (cfun_gpr_save_slot (i
) == SAVE_SLOT_NONE
)
9799 /* Advance to the next FP register which can be used as a
9801 while ((!call_used_regs
[save_reg_slot
]
9802 || df_regs_ever_live_p (save_reg_slot
)
9803 || cfun_fpr_save_p (save_reg_slot
))
9804 && FP_REGNO_P (save_reg_slot
))
9806 if (!FP_REGNO_P (save_reg_slot
))
9808 /* We only want to use ldgr/lgdr if we can get rid of
9809 stm/lm entirely. So undo the gpr slot allocation in
9810 case we ran out of FPR save slots. */
9811 for (j
= 6; j
<= 15; j
++)
9812 if (FP_REGNO_P (cfun_gpr_save_slot (j
)))
9813 cfun_gpr_save_slot (j
) = SAVE_SLOT_STACK
;
9816 cfun_gpr_save_slot (i
) = save_reg_slot
++;
9820 /* Set the bits in fpr_bitmap for FPRs which need to be saved due to
9822 This is a helper routine for s390_register_info. */
9825 s390_register_info_stdarg_fpr ()
9831 /* Save the FP argument regs for stdarg. f0, f2 for 31 bit and
9832 f0-f4 for 64 bit. */
9834 || !TARGET_HARD_FLOAT
9835 || !cfun
->va_list_fpr_size
9836 || crtl
->args
.info
.fprs
>= FP_ARG_NUM_REG
)
9839 min_fpr
= crtl
->args
.info
.fprs
;
9840 max_fpr
= min_fpr
+ cfun
->va_list_fpr_size
- 1;
9841 if (max_fpr
>= FP_ARG_NUM_REG
)
9842 max_fpr
= FP_ARG_NUM_REG
- 1;
9844 /* FPR argument regs start at f0. */
9845 min_fpr
+= FPR0_REGNUM
;
9846 max_fpr
+= FPR0_REGNUM
;
9848 for (i
= min_fpr
; i
<= max_fpr
; i
++)
9849 cfun_set_fpr_save (i
);
9852 /* Reserve the GPR save slots for GPRs which need to be saved due to
9854 This is a helper routine for s390_register_info. */
9857 s390_register_info_stdarg_gpr ()
9864 || !cfun
->va_list_gpr_size
9865 || crtl
->args
.info
.gprs
>= GP_ARG_NUM_REG
)
9868 min_gpr
= crtl
->args
.info
.gprs
;
9869 max_gpr
= min_gpr
+ cfun
->va_list_gpr_size
- 1;
9870 if (max_gpr
>= GP_ARG_NUM_REG
)
9871 max_gpr
= GP_ARG_NUM_REG
- 1;
9873 /* GPR argument regs start at r2. */
9874 min_gpr
+= GPR2_REGNUM
;
9875 max_gpr
+= GPR2_REGNUM
;
9877 /* If r6 was supposed to be saved into an FPR and now needs to go to
9878 the stack for vararg we have to adjust the restore range to make
9879 sure that the restore is done from stack as well. */
9880 if (FP_REGNO_P (cfun_gpr_save_slot (GPR6_REGNUM
))
9881 && min_gpr
<= GPR6_REGNUM
9882 && max_gpr
>= GPR6_REGNUM
)
9884 if (cfun_frame_layout
.first_restore_gpr
== -1
9885 || cfun_frame_layout
.first_restore_gpr
> GPR6_REGNUM
)
9886 cfun_frame_layout
.first_restore_gpr
= GPR6_REGNUM
;
9887 if (cfun_frame_layout
.last_restore_gpr
== -1
9888 || cfun_frame_layout
.last_restore_gpr
< GPR6_REGNUM
)
9889 cfun_frame_layout
.last_restore_gpr
= GPR6_REGNUM
;
9892 if (cfun_frame_layout
.first_save_gpr
== -1
9893 || cfun_frame_layout
.first_save_gpr
> min_gpr
)
9894 cfun_frame_layout
.first_save_gpr
= min_gpr
;
9896 if (cfun_frame_layout
.last_save_gpr
== -1
9897 || cfun_frame_layout
.last_save_gpr
< max_gpr
)
9898 cfun_frame_layout
.last_save_gpr
= max_gpr
;
9900 for (i
= min_gpr
; i
<= max_gpr
; i
++)
9901 cfun_gpr_save_slot (i
) = SAVE_SLOT_STACK
;
9904 /* Calculate the save and restore ranges for stm(g) and lm(g) in the
9905 prologue and epilogue. */
9908 s390_register_info_set_ranges ()
9912 /* Find the first and the last save slot supposed to use the stack
9913 to set the restore range.
9914 Vararg regs might be marked as save to stack but only the
9915 call-saved regs really need restoring (i.e. r6). This code
9916 assumes that the vararg regs have not yet been recorded in
9917 cfun_gpr_save_slot. */
9918 for (i
= 0; i
< 16 && cfun_gpr_save_slot (i
) != SAVE_SLOT_STACK
; i
++);
9919 for (j
= 15; j
> i
&& cfun_gpr_save_slot (j
) != SAVE_SLOT_STACK
; j
--);
9920 cfun_frame_layout
.first_restore_gpr
= (i
== 16) ? -1 : i
;
9921 cfun_frame_layout
.last_restore_gpr
= (i
== 16) ? -1 : j
;
9922 cfun_frame_layout
.first_save_gpr
= (i
== 16) ? -1 : i
;
9923 cfun_frame_layout
.last_save_gpr
= (i
== 16) ? -1 : j
;
9926 /* The GPR and FPR save slots in cfun->machine->frame_layout are set
9927 for registers which need to be saved in function prologue.
9928 This function can be used until the insns emitted for save/restore
9929 of the regs are visible in the RTL stream. */
9932 s390_register_info ()
9935 char clobbered_regs
[32];
9937 gcc_assert (!epilogue_completed
);
9939 if (reload_completed
)
9940 /* After reload we rely on our own routine to determine which
9941 registers need saving. */
9942 s390_regs_ever_clobbered (clobbered_regs
);
9944 /* During reload we use regs_ever_live as a base since reload
9945 does changes in there which we otherwise would not be aware
9947 for (i
= 0; i
< 32; i
++)
9948 clobbered_regs
[i
] = df_regs_ever_live_p (i
);
9950 for (i
= 0; i
< 32; i
++)
9951 clobbered_regs
[i
] = clobbered_regs
[i
] && !global_regs
[i
];
9953 /* Mark the call-saved FPRs which need to be saved.
9954 This needs to be done before checking the special GPRs since the
9955 stack pointer usage depends on whether high FPRs have to be saved
9957 cfun_frame_layout
.fpr_bitmap
= 0;
9958 cfun_frame_layout
.high_fprs
= 0;
9959 for (i
= FPR0_REGNUM
; i
<= FPR15_REGNUM
; i
++)
9960 if (clobbered_regs
[i
] && !call_used_regs
[i
])
9962 cfun_set_fpr_save (i
);
9963 if (i
>= FPR8_REGNUM
)
9964 cfun_frame_layout
.high_fprs
++;
9967 /* Register 12 is used for GOT address, but also as temp in prologue
9968 for split-stack stdarg functions (unless r14 is available). */
9970 |= ((flag_pic
&& df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
9971 || (flag_split_stack
&& cfun
->stdarg
9972 && (crtl
->is_leaf
|| TARGET_TPF_PROFILING
9973 || has_hard_reg_initial_val (Pmode
, RETURN_REGNUM
))));
9975 clobbered_regs
[BASE_REGNUM
]
9976 |= (cfun
->machine
->base_reg
9977 && REGNO (cfun
->machine
->base_reg
) == BASE_REGNUM
);
9979 clobbered_regs
[HARD_FRAME_POINTER_REGNUM
]
9980 |= !!frame_pointer_needed
;
9982 /* On pre z900 machines this might take until machine dependent
9984 save_return_addr_p will only be set on non-zarch machines so
9985 there is no risk that r14 goes into an FPR instead of a stack
9987 clobbered_regs
[RETURN_REGNUM
]
9989 || TARGET_TPF_PROFILING
9990 || cfun_frame_layout
.save_return_addr_p
9991 || crtl
->calls_eh_return
);
9993 clobbered_regs
[STACK_POINTER_REGNUM
]
9995 || TARGET_TPF_PROFILING
9996 || cfun_save_high_fprs_p
9997 || get_frame_size () > 0
9998 || (reload_completed
&& cfun_frame_layout
.frame_size
> 0)
9999 || cfun
->calls_alloca
);
10001 memset (cfun_frame_layout
.gpr_save_slots
, SAVE_SLOT_NONE
, 16);
10003 for (i
= 6; i
< 16; i
++)
10004 if (clobbered_regs
[i
])
10005 cfun_gpr_save_slot (i
) = SAVE_SLOT_STACK
;
10007 s390_register_info_stdarg_fpr ();
10008 s390_register_info_gprtofpr ();
10009 s390_register_info_set_ranges ();
10010 /* stdarg functions might need to save GPRs 2 to 6. This might
10011 override the GPR->FPR save decision made by
10012 s390_register_info_gprtofpr for r6 since vararg regs must go to
10014 s390_register_info_stdarg_gpr ();
10017 /* Return true if REGNO is a global register, but not one
10018 of the special ones that need to be saved/restored in anyway. */
10021 global_not_special_regno_p (int regno
)
10023 return (global_regs
[regno
]
10024 /* These registers are special and need to be
10025 restored in any case. */
10026 && !(regno
== STACK_POINTER_REGNUM
10027 || regno
== RETURN_REGNUM
10028 || regno
== BASE_REGNUM
10029 || (flag_pic
&& regno
== (int)PIC_OFFSET_TABLE_REGNUM
)));
10032 /* This function is called by s390_optimize_prologue in order to get
10033 rid of unnecessary GPR save/restore instructions. The register info
10034 for the GPRs is re-computed and the ranges are re-calculated. */
10037 s390_optimize_register_info ()
10039 char clobbered_regs
[32];
10042 gcc_assert (epilogue_completed
);
10044 s390_regs_ever_clobbered (clobbered_regs
);
10046 /* Global registers do not need to be saved and restored unless it
10047 is one of our special regs. (r12, r13, r14, or r15). */
10048 for (i
= 0; i
< 32; i
++)
10049 clobbered_regs
[i
] = clobbered_regs
[i
] && !global_not_special_regno_p (i
);
10051 /* There is still special treatment needed for cases invisible to
10052 s390_regs_ever_clobbered. */
10053 clobbered_regs
[RETURN_REGNUM
]
10054 |= (TARGET_TPF_PROFILING
10055 /* When expanding builtin_return_addr in ESA mode we do not
10056 know whether r14 will later be needed as scratch reg when
10057 doing branch splitting. So the builtin always accesses the
10058 r14 save slot and we need to stick to the save/restore
10059 decision for r14 even if it turns out that it didn't get
10061 || cfun_frame_layout
.save_return_addr_p
10062 || crtl
->calls_eh_return
);
10064 memset (cfun_frame_layout
.gpr_save_slots
, SAVE_SLOT_NONE
, 6);
10066 for (i
= 6; i
< 16; i
++)
10067 if (!clobbered_regs
[i
])
10068 cfun_gpr_save_slot (i
) = SAVE_SLOT_NONE
;
10070 s390_register_info_set_ranges ();
10071 s390_register_info_stdarg_gpr ();
10074 /* Fill cfun->machine with info about frame of current function. */
10077 s390_frame_info (void)
10079 HOST_WIDE_INT lowest_offset
;
10081 cfun_frame_layout
.first_save_gpr_slot
= cfun_frame_layout
.first_save_gpr
;
10082 cfun_frame_layout
.last_save_gpr_slot
= cfun_frame_layout
.last_save_gpr
;
10084 /* The va_arg builtin uses a constant distance of 16 *
10085 UNITS_PER_LONG (r0-r15) to reach the FPRs from the reg_save_area
10086 pointer. So even if we are going to save the stack pointer in an
10087 FPR we need the stack space in order to keep the offsets
10089 if (cfun
->stdarg
&& cfun_save_arg_fprs_p
)
10091 cfun_frame_layout
.last_save_gpr_slot
= STACK_POINTER_REGNUM
;
10093 if (cfun_frame_layout
.first_save_gpr_slot
== -1)
10094 cfun_frame_layout
.first_save_gpr_slot
= STACK_POINTER_REGNUM
;
10097 cfun_frame_layout
.frame_size
= get_frame_size ();
10098 if (!TARGET_64BIT
&& cfun_frame_layout
.frame_size
> 0x7fff0000)
10099 fatal_error (input_location
,
10100 "total size of local variables exceeds architecture limit");
10102 if (!TARGET_PACKED_STACK
)
10104 /* Fixed stack layout. */
10105 cfun_frame_layout
.backchain_offset
= 0;
10106 cfun_frame_layout
.f0_offset
= 16 * UNITS_PER_LONG
;
10107 cfun_frame_layout
.f4_offset
= cfun_frame_layout
.f0_offset
+ 2 * 8;
10108 cfun_frame_layout
.f8_offset
= -cfun_frame_layout
.high_fprs
* 8;
10109 cfun_frame_layout
.gprs_offset
= (cfun_frame_layout
.first_save_gpr_slot
10112 else if (TARGET_BACKCHAIN
)
10114 /* Kernel stack layout - packed stack, backchain, no float */
10115 gcc_assert (TARGET_SOFT_FLOAT
);
10116 cfun_frame_layout
.backchain_offset
= (STACK_POINTER_OFFSET
10119 /* The distance between the backchain and the return address
10120 save slot must not change. So we always need a slot for the
10121 stack pointer which resides in between. */
10122 cfun_frame_layout
.last_save_gpr_slot
= STACK_POINTER_REGNUM
;
10124 cfun_frame_layout
.gprs_offset
10125 = cfun_frame_layout
.backchain_offset
- cfun_gprs_save_area_size
;
10127 /* FPRs will not be saved. Nevertheless pick sane values to
10128 keep area calculations valid. */
10129 cfun_frame_layout
.f0_offset
=
10130 cfun_frame_layout
.f4_offset
=
10131 cfun_frame_layout
.f8_offset
= cfun_frame_layout
.gprs_offset
;
10137 /* Packed stack layout without backchain. */
10139 /* With stdarg FPRs need their dedicated slots. */
10140 num_fprs
= (TARGET_64BIT
&& cfun
->stdarg
? 2
10141 : (cfun_fpr_save_p (FPR4_REGNUM
) +
10142 cfun_fpr_save_p (FPR6_REGNUM
)));
10143 cfun_frame_layout
.f4_offset
= STACK_POINTER_OFFSET
- 8 * num_fprs
;
10145 num_fprs
= (cfun
->stdarg
? 2
10146 : (cfun_fpr_save_p (FPR0_REGNUM
)
10147 + cfun_fpr_save_p (FPR2_REGNUM
)));
10148 cfun_frame_layout
.f0_offset
= cfun_frame_layout
.f4_offset
- 8 * num_fprs
;
10150 cfun_frame_layout
.gprs_offset
10151 = cfun_frame_layout
.f0_offset
- cfun_gprs_save_area_size
;
10153 cfun_frame_layout
.f8_offset
= (cfun_frame_layout
.gprs_offset
10154 - cfun_frame_layout
.high_fprs
* 8);
10157 if (cfun_save_high_fprs_p
)
10158 cfun_frame_layout
.frame_size
+= cfun_frame_layout
.high_fprs
* 8;
10160 if (!crtl
->is_leaf
)
10161 cfun_frame_layout
.frame_size
+= crtl
->outgoing_args_size
;
10163 /* In the following cases we have to allocate a STACK_POINTER_OFFSET
10164 sized area at the bottom of the stack. This is required also for
10165 leaf functions. When GCC generates a local stack reference it
10166 will always add STACK_POINTER_OFFSET to all these references. */
10168 && !TARGET_TPF_PROFILING
10169 && cfun_frame_layout
.frame_size
== 0
10170 && !cfun
->calls_alloca
)
10173 /* Calculate the number of bytes we have used in our own register
10174 save area. With the packed stack layout we can re-use the
10175 remaining bytes for normal stack elements. */
10177 if (TARGET_PACKED_STACK
)
10178 lowest_offset
= MIN (MIN (cfun_frame_layout
.f0_offset
,
10179 cfun_frame_layout
.f4_offset
),
10180 cfun_frame_layout
.gprs_offset
);
10184 if (TARGET_BACKCHAIN
)
10185 lowest_offset
= MIN (lowest_offset
, cfun_frame_layout
.backchain_offset
);
10187 cfun_frame_layout
.frame_size
+= STACK_POINTER_OFFSET
- lowest_offset
;
10189 /* If under 31 bit an odd number of gprs has to be saved we have to
10190 adjust the frame size to sustain 8 byte alignment of stack
10192 cfun_frame_layout
.frame_size
= ((cfun_frame_layout
.frame_size
+
10193 STACK_BOUNDARY
/ BITS_PER_UNIT
- 1)
10194 & ~(STACK_BOUNDARY
/ BITS_PER_UNIT
- 1));
10197 /* Generate frame layout. Fills in register and frame data for the current
10198 function in cfun->machine. This routine can be called multiple times;
10199 it will re-do the complete frame layout every time. */
10202 s390_init_frame_layout (void)
10204 HOST_WIDE_INT frame_size
;
10207 /* After LRA the frame layout is supposed to be read-only and should
10208 not be re-computed. */
10209 if (reload_completed
)
10214 frame_size
= cfun_frame_layout
.frame_size
;
10216 /* Try to predict whether we'll need the base register. */
10217 base_used
= crtl
->uses_const_pool
10218 || (!DISP_IN_RANGE (frame_size
)
10219 && !CONST_OK_FOR_K (frame_size
));
10221 /* Decide which register to use as literal pool base. In small
10222 leaf functions, try to use an unused call-clobbered register
10223 as base register to avoid save/restore overhead. */
10225 cfun
->machine
->base_reg
= NULL_RTX
;
10231 /* Prefer r5 (most likely to be free). */
10232 for (br
= 5; br
>= 2 && df_regs_ever_live_p (br
); br
--)
10234 cfun
->machine
->base_reg
=
10235 gen_rtx_REG (Pmode
, (br
>= 2) ? br
: BASE_REGNUM
);
10238 s390_register_info ();
10239 s390_frame_info ();
10241 while (frame_size
!= cfun_frame_layout
.frame_size
);
10244 /* Remove the FPR clobbers from a tbegin insn if it can be proven that
10245 the TX is nonescaping. A transaction is considered escaping if
10246 there is at least one path from tbegin returning CC0 to the
10247 function exit block without an tend.
10249 The check so far has some limitations:
10250 - only single tbegin/tend BBs are supported
10251 - the first cond jump after tbegin must separate the CC0 path from ~CC0
10252 - when CC is copied to a GPR and the CC0 check is done with the GPR
10253 this is not supported
10257 s390_optimize_nonescaping_tx (void)
10259 const unsigned int CC0
= 1 << 3;
10260 basic_block tbegin_bb
= NULL
;
10261 basic_block tend_bb
= NULL
;
10264 bool result
= true;
10266 rtx_insn
*tbegin_insn
= NULL
;
10268 if (!cfun
->machine
->tbegin_p
)
10271 for (bb_index
= 0; bb_index
< n_basic_blocks_for_fn (cfun
); bb_index
++)
10273 bb
= BASIC_BLOCK_FOR_FN (cfun
, bb_index
);
10278 FOR_BB_INSNS (bb
, insn
)
10280 rtx ite
, cc
, pat
, target
;
10281 unsigned HOST_WIDE_INT mask
;
10283 if (!INSN_P (insn
) || INSN_CODE (insn
) <= 0)
10286 pat
= PATTERN (insn
);
10288 if (GET_CODE (pat
) == PARALLEL
)
10289 pat
= XVECEXP (pat
, 0, 0);
10291 if (GET_CODE (pat
) != SET
10292 || GET_CODE (SET_SRC (pat
)) != UNSPEC_VOLATILE
)
10295 if (XINT (SET_SRC (pat
), 1) == UNSPECV_TBEGIN
)
10299 tbegin_insn
= insn
;
10301 /* Just return if the tbegin doesn't have clobbers. */
10302 if (GET_CODE (PATTERN (insn
)) != PARALLEL
)
10305 if (tbegin_bb
!= NULL
)
10308 /* Find the next conditional jump. */
10309 for (tmp
= NEXT_INSN (insn
);
10311 tmp
= NEXT_INSN (tmp
))
10313 if (reg_set_p (gen_rtx_REG (CCmode
, CC_REGNUM
), tmp
))
10318 ite
= SET_SRC (PATTERN (tmp
));
10319 if (GET_CODE (ite
) != IF_THEN_ELSE
)
10322 cc
= XEXP (XEXP (ite
, 0), 0);
10323 if (!REG_P (cc
) || !CC_REGNO_P (REGNO (cc
))
10324 || GET_MODE (cc
) != CCRAWmode
10325 || GET_CODE (XEXP (XEXP (ite
, 0), 1)) != CONST_INT
)
10328 if (bb
->succs
->length () != 2)
10331 mask
= INTVAL (XEXP (XEXP (ite
, 0), 1));
10332 if (GET_CODE (XEXP (ite
, 0)) == NE
)
10336 target
= XEXP (ite
, 1);
10337 else if (mask
== (CC0
^ 0xf))
10338 target
= XEXP (ite
, 2);
10346 ei
= ei_start (bb
->succs
);
10347 e1
= ei_safe_edge (ei
);
10349 e2
= ei_safe_edge (ei
);
10351 if (e2
->flags
& EDGE_FALLTHRU
)
10354 e1
= ei_safe_edge (ei
);
10357 if (!(e1
->flags
& EDGE_FALLTHRU
))
10360 tbegin_bb
= (target
== pc_rtx
) ? e1
->dest
: e2
->dest
;
10362 if (tmp
== BB_END (bb
))
10367 if (XINT (SET_SRC (pat
), 1) == UNSPECV_TEND
)
10369 if (tend_bb
!= NULL
)
10376 /* Either we successfully remove the FPR clobbers here or we are not
10377 able to do anything for this TX. Both cases don't qualify for
10379 cfun
->machine
->tbegin_p
= false;
10381 if (tbegin_bb
== NULL
|| tend_bb
== NULL
)
10384 calculate_dominance_info (CDI_POST_DOMINATORS
);
10385 result
= dominated_by_p (CDI_POST_DOMINATORS
, tbegin_bb
, tend_bb
);
10386 free_dominance_info (CDI_POST_DOMINATORS
);
10391 PATTERN (tbegin_insn
) = gen_rtx_PARALLEL (VOIDmode
,
10393 XVECEXP (PATTERN (tbegin_insn
), 0, 0),
10394 XVECEXP (PATTERN (tbegin_insn
), 0, 1)));
10395 INSN_CODE (tbegin_insn
) = -1;
10396 df_insn_rescan (tbegin_insn
);
10401 /* Implement TARGET_HARD_REGNO_NREGS. Because all registers in a class
10402 have the same size, this is equivalent to CLASS_MAX_NREGS. */
10404 static unsigned int
10405 s390_hard_regno_nregs (unsigned int regno
, machine_mode mode
)
10407 return s390_class_max_nregs (REGNO_REG_CLASS (regno
), mode
);
10410 /* Implement TARGET_HARD_REGNO_MODE_OK.
10412 Integer modes <= word size fit into any GPR.
10413 Integer modes > word size fit into successive GPRs, starting with
10414 an even-numbered register.
10415 SImode and DImode fit into FPRs as well.
10417 Floating point modes <= word size fit into any FPR or GPR.
10418 Floating point modes > word size (i.e. DFmode on 32-bit) fit
10419 into any FPR, or an even-odd GPR pair.
10420 TFmode fits only into an even-odd FPR pair.
10422 Complex floating point modes fit either into two FPRs, or into
10423 successive GPRs (again starting with an even number).
10424 TCmode fits only into two successive even-odd FPR pairs.
10426 Condition code modes fit only into the CC register. */
10429 s390_hard_regno_mode_ok (unsigned int regno
, machine_mode mode
)
10431 if (!TARGET_VX
&& VECTOR_NOFP_REGNO_P (regno
))
10434 switch (REGNO_REG_CLASS (regno
))
10437 return ((GET_MODE_CLASS (mode
) == MODE_INT
10438 && s390_class_max_nregs (VEC_REGS
, mode
) == 1)
10440 || (TARGET_VXE
&& mode
== SFmode
)
10441 || s390_vector_mode_supported_p (mode
));
10445 && ((GET_MODE_CLASS (mode
) == MODE_INT
10446 && s390_class_max_nregs (FP_REGS
, mode
) == 1)
10448 || s390_vector_mode_supported_p (mode
)))
10451 if (REGNO_PAIR_OK (regno
, mode
))
10453 if (mode
== SImode
|| mode
== DImode
)
10456 if (FLOAT_MODE_P (mode
) && GET_MODE_CLASS (mode
) != MODE_VECTOR_FLOAT
)
10461 if (FRAME_REGNO_P (regno
) && mode
== Pmode
)
10466 if (REGNO_PAIR_OK (regno
, mode
))
10469 || (mode
!= TFmode
&& mode
!= TCmode
&& mode
!= TDmode
))
10474 if (GET_MODE_CLASS (mode
) == MODE_CC
)
10478 if (REGNO_PAIR_OK (regno
, mode
))
10480 if (mode
== SImode
|| mode
== Pmode
)
10491 /* Implement TARGET_MODES_TIEABLE_P. */
10494 s390_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
10496 return ((mode1
== SFmode
|| mode1
== DFmode
)
10497 == (mode2
== SFmode
|| mode2
== DFmode
));
10500 /* Return nonzero if register OLD_REG can be renamed to register NEW_REG. */
10503 s390_hard_regno_rename_ok (unsigned int old_reg
, unsigned int new_reg
)
10505 /* Once we've decided upon a register to use as base register, it must
10506 no longer be used for any other purpose. */
10507 if (cfun
->machine
->base_reg
)
10508 if (REGNO (cfun
->machine
->base_reg
) == old_reg
10509 || REGNO (cfun
->machine
->base_reg
) == new_reg
)
10512 /* Prevent regrename from using call-saved regs which haven't
10513 actually been saved. This is necessary since regrename assumes
10514 the backend save/restore decisions are based on
10515 df_regs_ever_live. Since we have our own routine we have to tell
10516 regrename manually about it. */
10517 if (GENERAL_REGNO_P (new_reg
)
10518 && !call_used_regs
[new_reg
]
10519 && cfun_gpr_save_slot (new_reg
) == SAVE_SLOT_NONE
)
10525 /* Return nonzero if register REGNO can be used as a scratch register
10529 s390_hard_regno_scratch_ok (unsigned int regno
)
10531 /* See s390_hard_regno_rename_ok. */
10532 if (GENERAL_REGNO_P (regno
)
10533 && !call_used_regs
[regno
]
10534 && cfun_gpr_save_slot (regno
) == SAVE_SLOT_NONE
)
10540 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. When generating
10541 code that runs in z/Architecture mode, but conforms to the 31-bit
10542 ABI, GPRs can hold 8 bytes; the ABI guarantees only that the lower 4
10543 bytes are saved across calls, however. */
10546 s390_hard_regno_call_part_clobbered (unsigned int, unsigned int regno
,
10549 /* For r12 we know that the only bits we actually care about are
10550 preserved across function calls. Since r12 is a fixed reg all
10551 accesses to r12 are generated by the backend.
10553 This workaround is necessary until gcse implements proper
10554 tracking of partially clobbered registers. */
10557 && GET_MODE_SIZE (mode
) > 4
10558 && (!flag_pic
|| regno
!= PIC_OFFSET_TABLE_REGNUM
)
10559 && ((regno
>= 6 && regno
<= 15) || regno
== 32))
10563 && GET_MODE_SIZE (mode
) > 8
10564 && (((TARGET_64BIT
&& regno
>= 24 && regno
<= 31))
10565 || (!TARGET_64BIT
&& (regno
== 18 || regno
== 19))))
10571 /* Maximum number of registers to represent a value of mode MODE
10572 in a register of class RCLASS. */
10575 s390_class_max_nregs (enum reg_class rclass
, machine_mode mode
)
10578 bool reg_pair_required_p
= false;
10584 reg_size
= TARGET_VX
? 16 : 8;
10586 /* TF and TD modes would fit into a VR but we put them into a
10587 register pair since we do not have 128bit FP instructions on
10590 && SCALAR_FLOAT_MODE_P (mode
)
10591 && GET_MODE_SIZE (mode
) >= 16
10592 && !(TARGET_VXE
&& mode
== TFmode
))
10593 reg_pair_required_p
= true;
10595 /* Even if complex types would fit into a single FPR/VR we force
10596 them into a register pair to deal with the parts more easily.
10597 (FIXME: What about complex ints?) */
10598 if (GET_MODE_CLASS (mode
) == MODE_COMPLEX_FLOAT
)
10599 reg_pair_required_p
= true;
10605 reg_size
= UNITS_PER_WORD
;
10609 if (reg_pair_required_p
)
10610 return 2 * ((GET_MODE_SIZE (mode
) / 2 + reg_size
- 1) / reg_size
);
10612 return (GET_MODE_SIZE (mode
) + reg_size
- 1) / reg_size
;
10615 /* Return nonzero if mode M describes a 128-bit float in a floating point
10619 s390_is_fpr128 (machine_mode m
)
10621 return m
== FPRX2mode
|| (!TARGET_VXE
&& m
== TFmode
);
10624 /* Return nonzero if mode M describes a 128-bit float in a vector
10628 s390_is_vr128 (machine_mode m
)
10630 return m
== V1TFmode
|| (TARGET_VXE
&& m
== TFmode
);
10633 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
10636 s390_can_change_mode_class (machine_mode from_mode
,
10637 machine_mode to_mode
,
10638 reg_class_t rclass
)
10640 machine_mode small_mode
;
10641 machine_mode big_mode
;
10643 /* 128-bit values have different representations in floating point and
10644 vector registers. */
10645 if (reg_classes_intersect_p (VEC_REGS
, rclass
)
10646 && ((s390_is_fpr128 (from_mode
) && s390_is_vr128 (to_mode
))
10647 || (s390_is_vr128 (from_mode
) && s390_is_fpr128 (to_mode
))))
10650 if (GET_MODE_SIZE (from_mode
) == GET_MODE_SIZE (to_mode
))
10653 if (GET_MODE_SIZE (from_mode
) < GET_MODE_SIZE (to_mode
))
10655 small_mode
= from_mode
;
10656 big_mode
= to_mode
;
10660 small_mode
= to_mode
;
10661 big_mode
= from_mode
;
10664 /* Values residing in VRs are little-endian style. All modes are
10665 placed left-aligned in an VR. This means that we cannot allow
10666 switching between modes with differing sizes. Also if the vector
10667 facility is available we still place TFmode values in VR register
10668 pairs, since the only instructions we have operating on TFmodes
10669 only deal with register pairs. Therefore we have to allow DFmode
10670 subregs of TFmodes to enable the TFmode splitters. */
10671 if (reg_classes_intersect_p (VEC_REGS
, rclass
)
10672 && (GET_MODE_SIZE (small_mode
) < 8
10673 || s390_class_max_nregs (VEC_REGS
, big_mode
) == 1))
10676 /* Likewise for access registers, since they have only half the
10677 word size on 64-bit. */
10678 if (reg_classes_intersect_p (ACCESS_REGS
, rclass
))
10684 /* Return true if we use LRA instead of reload pass. */
10688 return s390_lra_flag
;
10691 /* Return true if register FROM can be eliminated via register TO. */
10694 s390_can_eliminate (const int from
, const int to
)
10696 /* We have not marked the base register as fixed.
10697 Instead, we have an elimination rule BASE_REGNUM -> BASE_REGNUM.
10698 If a function requires the base register, we say here that this
10699 elimination cannot be performed. This will cause reload to free
10700 up the base register (as if it were fixed). On the other hand,
10701 if the current function does *not* require the base register, we
10702 say here the elimination succeeds, which in turn allows reload
10703 to allocate the base register for any other purpose. */
10704 if (from
== BASE_REGNUM
&& to
== BASE_REGNUM
)
10706 s390_init_frame_layout ();
10707 return cfun
->machine
->base_reg
== NULL_RTX
;
10710 /* Everything else must point into the stack frame. */
10711 gcc_assert (to
== STACK_POINTER_REGNUM
10712 || to
== HARD_FRAME_POINTER_REGNUM
);
10714 gcc_assert (from
== FRAME_POINTER_REGNUM
10715 || from
== ARG_POINTER_REGNUM
10716 || from
== RETURN_ADDRESS_POINTER_REGNUM
);
10718 /* Make sure we actually saved the return address. */
10719 if (from
== RETURN_ADDRESS_POINTER_REGNUM
)
10720 if (!crtl
->calls_eh_return
10722 && !cfun_frame_layout
.save_return_addr_p
)
10728 /* Return offset between register FROM and TO initially after prolog. */
10731 s390_initial_elimination_offset (int from
, int to
)
10733 HOST_WIDE_INT offset
;
10735 /* ??? Why are we called for non-eliminable pairs? */
10736 if (!s390_can_eliminate (from
, to
))
10741 case FRAME_POINTER_REGNUM
:
10742 offset
= (get_frame_size()
10743 + STACK_POINTER_OFFSET
10744 + crtl
->outgoing_args_size
);
10747 case ARG_POINTER_REGNUM
:
10748 s390_init_frame_layout ();
10749 offset
= cfun_frame_layout
.frame_size
+ STACK_POINTER_OFFSET
;
10752 case RETURN_ADDRESS_POINTER_REGNUM
:
10753 s390_init_frame_layout ();
10755 if (cfun_frame_layout
.first_save_gpr_slot
== -1)
10757 /* If it turns out that for stdarg nothing went into the reg
10758 save area we also do not need the return address
10760 if (cfun
->stdarg
&& !cfun_save_arg_fprs_p
)
10763 gcc_unreachable ();
10766 /* In order to make the following work it is not necessary for
10767 r14 to have a save slot. It is sufficient if one other GPR
10768 got one. Since the GPRs are always stored without gaps we
10769 are able to calculate where the r14 save slot would
10771 offset
= (cfun_frame_layout
.frame_size
+ cfun_frame_layout
.gprs_offset
+
10772 (RETURN_REGNUM
- cfun_frame_layout
.first_save_gpr_slot
) *
10781 gcc_unreachable ();
10787 /* Emit insn to save fpr REGNUM at offset OFFSET relative
10788 to register BASE. Return generated insn. */
10791 save_fpr (rtx base
, int offset
, int regnum
)
10794 addr
= gen_rtx_MEM (DFmode
, plus_constant (Pmode
, base
, offset
));
10796 if (regnum
>= 16 && regnum
<= (16 + FP_ARG_NUM_REG
))
10797 set_mem_alias_set (addr
, get_varargs_alias_set ());
10799 set_mem_alias_set (addr
, get_frame_alias_set ());
10801 return emit_move_insn (addr
, gen_rtx_REG (DFmode
, regnum
));
10804 /* Emit insn to restore fpr REGNUM from offset OFFSET relative
10805 to register BASE. Return generated insn. */
10808 restore_fpr (rtx base
, int offset
, int regnum
)
10811 addr
= gen_rtx_MEM (DFmode
, plus_constant (Pmode
, base
, offset
));
10812 set_mem_alias_set (addr
, get_frame_alias_set ());
10814 return emit_move_insn (gen_rtx_REG (DFmode
, regnum
), addr
);
10817 /* Generate insn to save registers FIRST to LAST into
10818 the register save area located at offset OFFSET
10819 relative to register BASE. */
10822 save_gprs (rtx base
, int offset
, int first
, int last
)
10824 rtx addr
, insn
, note
;
10827 addr
= plus_constant (Pmode
, base
, offset
);
10828 addr
= gen_rtx_MEM (Pmode
, addr
);
10830 set_mem_alias_set (addr
, get_frame_alias_set ());
10832 /* Special-case single register. */
10836 insn
= gen_movdi (addr
, gen_rtx_REG (Pmode
, first
));
10838 insn
= gen_movsi (addr
, gen_rtx_REG (Pmode
, first
));
10840 if (!global_not_special_regno_p (first
))
10841 RTX_FRAME_RELATED_P (insn
) = 1;
10846 insn
= gen_store_multiple (addr
,
10847 gen_rtx_REG (Pmode
, first
),
10848 GEN_INT (last
- first
+ 1));
10850 if (first
<= 6 && cfun
->stdarg
)
10851 for (i
= 0; i
< XVECLEN (PATTERN (insn
), 0); i
++)
10853 rtx mem
= XEXP (XVECEXP (PATTERN (insn
), 0, i
), 0);
10855 if (first
+ i
<= 6)
10856 set_mem_alias_set (mem
, get_varargs_alias_set ());
10859 /* We need to set the FRAME_RELATED flag on all SETs
10860 inside the store-multiple pattern.
10862 However, we must not emit DWARF records for registers 2..5
10863 if they are stored for use by variable arguments ...
10865 ??? Unfortunately, it is not enough to simply not the
10866 FRAME_RELATED flags for those SETs, because the first SET
10867 of the PARALLEL is always treated as if it had the flag
10868 set, even if it does not. Therefore we emit a new pattern
10869 without those registers as REG_FRAME_RELATED_EXPR note. */
10871 if (first
>= 6 && !global_not_special_regno_p (first
))
10873 rtx pat
= PATTERN (insn
);
10875 for (i
= 0; i
< XVECLEN (pat
, 0); i
++)
10876 if (GET_CODE (XVECEXP (pat
, 0, i
)) == SET
10877 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (pat
,
10879 RTX_FRAME_RELATED_P (XVECEXP (pat
, 0, i
)) = 1;
10881 RTX_FRAME_RELATED_P (insn
) = 1;
10883 else if (last
>= 6)
10887 for (start
= first
>= 6 ? first
: 6; start
<= last
; start
++)
10888 if (!global_not_special_regno_p (start
))
10894 addr
= plus_constant (Pmode
, base
,
10895 offset
+ (start
- first
) * UNITS_PER_LONG
);
10900 note
= gen_movdi (gen_rtx_MEM (Pmode
, addr
),
10901 gen_rtx_REG (Pmode
, start
));
10903 note
= gen_movsi (gen_rtx_MEM (Pmode
, addr
),
10904 gen_rtx_REG (Pmode
, start
));
10905 note
= PATTERN (note
);
10907 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, note
);
10908 RTX_FRAME_RELATED_P (insn
) = 1;
10913 note
= gen_store_multiple (gen_rtx_MEM (Pmode
, addr
),
10914 gen_rtx_REG (Pmode
, start
),
10915 GEN_INT (last
- start
+ 1));
10916 note
= PATTERN (note
);
10918 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, note
);
10920 for (i
= 0; i
< XVECLEN (note
, 0); i
++)
10921 if (GET_CODE (XVECEXP (note
, 0, i
)) == SET
10922 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (note
,
10924 RTX_FRAME_RELATED_P (XVECEXP (note
, 0, i
)) = 1;
10926 RTX_FRAME_RELATED_P (insn
) = 1;
10932 /* Generate insn to restore registers FIRST to LAST from
10933 the register save area located at offset OFFSET
10934 relative to register BASE. */
10937 restore_gprs (rtx base
, int offset
, int first
, int last
)
10941 addr
= plus_constant (Pmode
, base
, offset
);
10942 addr
= gen_rtx_MEM (Pmode
, addr
);
10943 set_mem_alias_set (addr
, get_frame_alias_set ());
10945 /* Special-case single register. */
10949 insn
= gen_movdi (gen_rtx_REG (Pmode
, first
), addr
);
10951 insn
= gen_movsi (gen_rtx_REG (Pmode
, first
), addr
);
10953 RTX_FRAME_RELATED_P (insn
) = 1;
10957 insn
= gen_load_multiple (gen_rtx_REG (Pmode
, first
),
10959 GEN_INT (last
- first
+ 1));
10960 RTX_FRAME_RELATED_P (insn
) = 1;
10964 /* Return insn sequence to load the GOT register. */
10967 s390_load_got (void)
10971 /* We cannot use pic_offset_table_rtx here since we use this
10972 function also for non-pic if __tls_get_offset is called and in
10973 that case PIC_OFFSET_TABLE_REGNUM as well as pic_offset_table_rtx
10975 rtx got_rtx
= gen_rtx_REG (Pmode
, 12);
10979 emit_move_insn (got_rtx
, s390_got_symbol ());
10981 insns
= get_insns ();
10986 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
10987 and the change to the stack pointer. */
10990 s390_emit_stack_tie (void)
10992 rtx mem
= gen_frame_mem (BLKmode
,
10993 gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
));
10995 emit_insn (gen_stack_tie (mem
));
10998 /* Copy GPRS into FPR save slots. */
11001 s390_save_gprs_to_fprs (void)
11005 if (!TARGET_Z10
|| !TARGET_HARD_FLOAT
|| !crtl
->is_leaf
)
11008 for (i
= 6; i
< 16; i
++)
11010 if (FP_REGNO_P (cfun_gpr_save_slot (i
)))
11013 emit_move_insn (gen_rtx_REG (DImode
, cfun_gpr_save_slot (i
)),
11014 gen_rtx_REG (DImode
, i
));
11015 RTX_FRAME_RELATED_P (insn
) = 1;
11016 /* This prevents dwarf2cfi from interpreting the set. Doing
11017 so it might emit def_cfa_register infos setting an FPR as
11019 add_reg_note (insn
, REG_CFA_REGISTER
, copy_rtx (PATTERN (insn
)));
11024 /* Restore GPRs from FPR save slots. */
11027 s390_restore_gprs_from_fprs (void)
11031 if (!TARGET_Z10
|| !TARGET_HARD_FLOAT
|| !crtl
->is_leaf
)
11034 /* Restore the GPRs starting with the stack pointer. That way the
11035 stack pointer already has its original value when it comes to
11036 restoring the hard frame pointer. So we can set the cfa reg back
11037 to the stack pointer. */
11038 for (i
= STACK_POINTER_REGNUM
; i
>= 6; i
--)
11042 if (!FP_REGNO_P (cfun_gpr_save_slot (i
)))
11045 rtx fpr
= gen_rtx_REG (DImode
, cfun_gpr_save_slot (i
));
11047 if (i
== STACK_POINTER_REGNUM
)
11048 insn
= emit_insn (gen_stack_restore_from_fpr (fpr
));
11050 insn
= emit_move_insn (gen_rtx_REG (DImode
, i
), fpr
);
11052 df_set_regs_ever_live (i
, true);
11053 add_reg_note (insn
, REG_CFA_RESTORE
, gen_rtx_REG (DImode
, i
));
11055 /* If either the stack pointer or the frame pointer get restored
11056 set the CFA value to its value at function start. Doing this
11057 for the frame pointer results in .cfi_def_cfa_register 15
11058 what is ok since if the stack pointer got modified it has
11059 been restored already. */
11060 if (i
== STACK_POINTER_REGNUM
|| i
== HARD_FRAME_POINTER_REGNUM
)
11061 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11062 plus_constant (Pmode
, stack_pointer_rtx
,
11063 STACK_POINTER_OFFSET
));
11064 RTX_FRAME_RELATED_P (insn
) = 1;
11069 /* A pass run immediately before shrink-wrapping and prologue and epilogue
11074 const pass_data pass_data_s390_early_mach
=
11076 RTL_PASS
, /* type */
11077 "early_mach", /* name */
11078 OPTGROUP_NONE
, /* optinfo_flags */
11079 TV_MACH_DEP
, /* tv_id */
11080 0, /* properties_required */
11081 0, /* properties_provided */
11082 0, /* properties_destroyed */
11083 0, /* todo_flags_start */
11084 ( TODO_df_verify
| TODO_df_finish
), /* todo_flags_finish */
11087 class pass_s390_early_mach
: public rtl_opt_pass
11090 pass_s390_early_mach (gcc::context
*ctxt
)
11091 : rtl_opt_pass (pass_data_s390_early_mach
, ctxt
)
11094 /* opt_pass methods: */
11095 virtual unsigned int execute (function
*);
11097 }; // class pass_s390_early_mach
11100 pass_s390_early_mach::execute (function
*fun
)
11104 /* Try to get rid of the FPR clobbers. */
11105 s390_optimize_nonescaping_tx ();
11107 /* Re-compute register info. */
11108 s390_register_info ();
11110 /* If we're using a base register, ensure that it is always valid for
11111 the first non-prologue instruction. */
11112 if (fun
->machine
->base_reg
)
11113 emit_insn_at_entry (gen_main_pool (fun
->machine
->base_reg
));
11115 /* Annotate all constant pool references to let the scheduler know
11116 they implicitly use the base register. */
11117 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
11120 annotate_constant_pool_refs (insn
);
11121 df_insn_rescan (insn
);
11126 } // anon namespace
11129 make_pass_s390_early_mach (gcc::context
*ctxt
)
11131 return new pass_s390_early_mach (ctxt
);
11134 /* Calculate TARGET = REG + OFFSET as s390_emit_prologue would do it.
11135 - push too big immediates to the literal pool and annotate the refs
11136 - emit frame related notes for stack pointer changes. */
11139 s390_prologue_plus_offset (rtx target
, rtx reg
, rtx offset
, bool frame_related_p
)
11142 rtx orig_offset
= offset
;
11144 gcc_assert (REG_P (target
));
11145 gcc_assert (REG_P (reg
));
11146 gcc_assert (CONST_INT_P (offset
));
11148 if (offset
== const0_rtx
) /* lr/lgr */
11150 insn
= emit_move_insn (target
, reg
);
11152 else if (DISP_IN_RANGE (INTVAL (offset
))) /* la */
11154 insn
= emit_move_insn (target
, gen_rtx_PLUS (Pmode
, reg
,
11159 if (!satisfies_constraint_K (offset
) /* ahi/aghi */
11161 || (!satisfies_constraint_Op (offset
) /* alfi/algfi */
11162 && !satisfies_constraint_On (offset
)))) /* slfi/slgfi */
11163 offset
= force_const_mem (Pmode
, offset
);
11167 insn
= emit_move_insn (target
, reg
);
11168 RTX_FRAME_RELATED_P (insn
) = frame_related_p
? 1 : 0;
11171 insn
= emit_insn (gen_add2_insn (target
, offset
));
11173 if (!CONST_INT_P (offset
))
11175 annotate_constant_pool_refs (insn
);
11177 if (frame_related_p
)
11178 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
11179 gen_rtx_SET (target
,
11180 gen_rtx_PLUS (Pmode
, target
,
11185 RTX_FRAME_RELATED_P (insn
) = frame_related_p
? 1 : 0;
11187 /* If this is a stack adjustment and we are generating a stack clash
11188 prologue, then add a REG_STACK_CHECK note to signal that this insn
11189 should be left alone. */
11190 if (flag_stack_clash_protection
&& target
== stack_pointer_rtx
)
11191 add_reg_note (insn
, REG_STACK_CHECK
, const0_rtx
);
11196 /* Emit a compare instruction with a volatile memory access as stack
11197 probe. It does not waste store tags and does not clobber any
11198 registers apart from the condition code. */
11200 s390_emit_stack_probe (rtx addr
)
11202 rtx mem
= gen_rtx_MEM (word_mode
, addr
);
11203 MEM_VOLATILE_P (mem
) = 1;
11204 emit_insn (gen_probe_stack (mem
));
11207 /* Use a runtime loop if we have to emit more probes than this. */
11208 #define MIN_UNROLL_PROBES 3
11210 /* Allocate SIZE bytes of stack space, using TEMP_REG as a temporary
11211 if necessary. LAST_PROBE_OFFSET contains the offset of the closest
11212 probe relative to the stack pointer.
11214 Note that SIZE is negative.
11216 The return value is true if TEMP_REG has been clobbered. */
11218 allocate_stack_space (rtx size
, HOST_WIDE_INT last_probe_offset
,
11221 bool temp_reg_clobbered_p
= false;
11222 HOST_WIDE_INT probe_interval
11223 = 1 << param_stack_clash_protection_probe_interval
;
11224 HOST_WIDE_INT guard_size
11225 = 1 << param_stack_clash_protection_guard_size
;
11227 if (flag_stack_clash_protection
)
11229 if (last_probe_offset
+ -INTVAL (size
) < guard_size
)
11230 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME
, true);
11233 rtx offset
= GEN_INT (probe_interval
- UNITS_PER_LONG
);
11234 HOST_WIDE_INT rounded_size
= -INTVAL (size
) & -probe_interval
;
11235 HOST_WIDE_INT num_probes
= rounded_size
/ probe_interval
;
11236 HOST_WIDE_INT residual
= -INTVAL (size
) - rounded_size
;
11238 if (num_probes
< MIN_UNROLL_PROBES
)
11240 /* Emit unrolled probe statements. */
11242 for (unsigned int i
= 0; i
< num_probes
; i
++)
11244 s390_prologue_plus_offset (stack_pointer_rtx
,
11246 GEN_INT (-probe_interval
), true);
11247 s390_emit_stack_probe (gen_rtx_PLUS (Pmode
,
11251 if (num_probes
> 0)
11252 last_probe_offset
= INTVAL (offset
);
11253 dump_stack_clash_frame_info (PROBE_INLINE
, residual
!= 0);
11257 /* Emit a loop probing the pages. */
11259 rtx_code_label
*loop_start_label
= gen_label_rtx ();
11261 /* From now on temp_reg will be the CFA register. */
11262 s390_prologue_plus_offset (temp_reg
, stack_pointer_rtx
,
11263 GEN_INT (-rounded_size
), true);
11264 emit_label (loop_start_label
);
11266 s390_prologue_plus_offset (stack_pointer_rtx
,
11268 GEN_INT (-probe_interval
), false);
11269 s390_emit_stack_probe (gen_rtx_PLUS (Pmode
,
11272 emit_cmp_and_jump_insns (stack_pointer_rtx
, temp_reg
,
11274 Pmode
, 1, loop_start_label
);
11276 /* Without this make_edges ICEes. */
11277 JUMP_LABEL (get_last_insn ()) = loop_start_label
;
11278 LABEL_NUSES (loop_start_label
) = 1;
11280 /* That's going to be a NOP since stack pointer and
11281 temp_reg are supposed to be the same here. We just
11282 emit it to set the CFA reg back to r15. */
11283 s390_prologue_plus_offset (stack_pointer_rtx
, temp_reg
,
11285 temp_reg_clobbered_p
= true;
11286 last_probe_offset
= INTVAL (offset
);
11287 dump_stack_clash_frame_info (PROBE_LOOP
, residual
!= 0);
11290 /* Handle any residual allocation request. */
11291 s390_prologue_plus_offset (stack_pointer_rtx
,
11293 GEN_INT (-residual
), true);
11294 last_probe_offset
+= residual
;
11295 if (last_probe_offset
>= probe_interval
)
11296 s390_emit_stack_probe (gen_rtx_PLUS (Pmode
,
11299 - UNITS_PER_LONG
)));
11301 return temp_reg_clobbered_p
;
11305 /* Subtract frame size from stack pointer. */
11306 s390_prologue_plus_offset (stack_pointer_rtx
,
11310 return temp_reg_clobbered_p
;
11313 /* Expand the prologue into a bunch of separate insns. */
11316 s390_emit_prologue (void)
11324 /* Choose best register to use for temp use within prologue.
11325 TPF with profiling must avoid the register 14 - the tracing function
11326 needs the original contents of r14 to be preserved. */
11328 if (!has_hard_reg_initial_val (Pmode
, RETURN_REGNUM
)
11330 && !TARGET_TPF_PROFILING
)
11331 temp_reg
= gen_rtx_REG (Pmode
, RETURN_REGNUM
);
11332 else if (flag_split_stack
&& cfun
->stdarg
)
11333 temp_reg
= gen_rtx_REG (Pmode
, 12);
11335 temp_reg
= gen_rtx_REG (Pmode
, 1);
11337 /* When probing for stack-clash mitigation, we have to track the distance
11338 between the stack pointer and closest known reference.
11340 Most of the time we have to make a worst case assumption. The
11341 only exception is when TARGET_BACKCHAIN is active, in which case
11342 we know *sp (offset 0) was written. */
11343 HOST_WIDE_INT probe_interval
11344 = 1 << param_stack_clash_protection_probe_interval
;
11345 HOST_WIDE_INT last_probe_offset
11346 = (TARGET_BACKCHAIN
11347 ? (TARGET_PACKED_STACK
? STACK_POINTER_OFFSET
- UNITS_PER_LONG
: 0)
11348 : probe_interval
- (STACK_BOUNDARY
/ UNITS_PER_WORD
));
11350 s390_save_gprs_to_fprs ();
11352 /* Save call saved gprs. */
11353 if (cfun_frame_layout
.first_save_gpr
!= -1)
11355 insn
= save_gprs (stack_pointer_rtx
,
11356 cfun_frame_layout
.gprs_offset
+
11357 UNITS_PER_LONG
* (cfun_frame_layout
.first_save_gpr
11358 - cfun_frame_layout
.first_save_gpr_slot
),
11359 cfun_frame_layout
.first_save_gpr
,
11360 cfun_frame_layout
.last_save_gpr
);
11362 /* This is not 100% correct. If we have more than one register saved,
11363 then LAST_PROBE_OFFSET can move even closer to sp. */
11365 = (cfun_frame_layout
.gprs_offset
+
11366 UNITS_PER_LONG
* (cfun_frame_layout
.first_save_gpr
11367 - cfun_frame_layout
.first_save_gpr_slot
));
11372 /* Dummy insn to mark literal pool slot. */
11374 if (cfun
->machine
->base_reg
)
11375 emit_insn (gen_main_pool (cfun
->machine
->base_reg
));
11377 offset
= cfun_frame_layout
.f0_offset
;
11379 /* Save f0 and f2. */
11380 for (i
= FPR0_REGNUM
; i
<= FPR0_REGNUM
+ 1; i
++)
11382 if (cfun_fpr_save_p (i
))
11384 save_fpr (stack_pointer_rtx
, offset
, i
);
11385 if (offset
< last_probe_offset
)
11386 last_probe_offset
= offset
;
11389 else if (!TARGET_PACKED_STACK
|| cfun
->stdarg
)
11393 /* Save f4 and f6. */
11394 offset
= cfun_frame_layout
.f4_offset
;
11395 for (i
= FPR4_REGNUM
; i
<= FPR4_REGNUM
+ 1; i
++)
11397 if (cfun_fpr_save_p (i
))
11399 insn
= save_fpr (stack_pointer_rtx
, offset
, i
);
11400 if (offset
< last_probe_offset
)
11401 last_probe_offset
= offset
;
11404 /* If f4 and f6 are call clobbered they are saved due to
11405 stdargs and therefore are not frame related. */
11406 if (!call_used_regs
[i
])
11407 RTX_FRAME_RELATED_P (insn
) = 1;
11409 else if (!TARGET_PACKED_STACK
|| call_used_regs
[i
])
11413 if (TARGET_PACKED_STACK
11414 && cfun_save_high_fprs_p
11415 && cfun_frame_layout
.f8_offset
+ cfun_frame_layout
.high_fprs
* 8 > 0)
11417 offset
= (cfun_frame_layout
.f8_offset
11418 + (cfun_frame_layout
.high_fprs
- 1) * 8);
11420 for (i
= FPR15_REGNUM
; i
>= FPR8_REGNUM
&& offset
>= 0; i
--)
11421 if (cfun_fpr_save_p (i
))
11423 insn
= save_fpr (stack_pointer_rtx
, offset
, i
);
11424 if (offset
< last_probe_offset
)
11425 last_probe_offset
= offset
;
11427 RTX_FRAME_RELATED_P (insn
) = 1;
11430 if (offset
>= cfun_frame_layout
.f8_offset
)
11434 if (!TARGET_PACKED_STACK
)
11435 next_fpr
= cfun_save_high_fprs_p
? FPR15_REGNUM
: 0;
11437 if (flag_stack_usage_info
)
11438 current_function_static_stack_size
= cfun_frame_layout
.frame_size
;
11440 /* Decrement stack pointer. */
11442 if (cfun_frame_layout
.frame_size
> 0)
11444 rtx frame_off
= GEN_INT (-cfun_frame_layout
.frame_size
);
11445 rtx_insn
*stack_pointer_backup_loc
;
11446 bool temp_reg_clobbered_p
;
11448 if (s390_stack_size
)
11450 HOST_WIDE_INT stack_guard
;
11452 if (s390_stack_guard
)
11453 stack_guard
= s390_stack_guard
;
11456 /* If no value for stack guard is provided the smallest power of 2
11457 larger than the current frame size is chosen. */
11459 while (stack_guard
< cfun_frame_layout
.frame_size
)
11463 if (cfun_frame_layout
.frame_size
>= s390_stack_size
)
11465 warning (0, "frame size of function %qs is %wd"
11466 " bytes exceeding user provided stack limit of "
11468 "An unconditional trap is added.",
11469 current_function_name(), cfun_frame_layout
.frame_size
,
11471 emit_insn (gen_trap ());
11476 /* stack_guard has to be smaller than s390_stack_size.
11477 Otherwise we would emit an AND with zero which would
11478 not match the test under mask pattern. */
11479 if (stack_guard
>= s390_stack_size
)
11481 warning (0, "frame size of function %qs is %wd"
11482 " bytes which is more than half the stack size. "
11483 "The dynamic check would not be reliable. "
11484 "No check emitted for this function.",
11485 current_function_name(),
11486 cfun_frame_layout
.frame_size
);
11490 HOST_WIDE_INT stack_check_mask
= ((s390_stack_size
- 1)
11491 & ~(stack_guard
- 1));
11493 rtx t
= gen_rtx_AND (Pmode
, stack_pointer_rtx
,
11494 GEN_INT (stack_check_mask
));
11496 emit_insn (gen_ctrapdi4 (gen_rtx_EQ (VOIDmode
,
11498 t
, const0_rtx
, const0_rtx
));
11500 emit_insn (gen_ctrapsi4 (gen_rtx_EQ (VOIDmode
,
11502 t
, const0_rtx
, const0_rtx
));
11507 if (s390_warn_framesize
> 0
11508 && cfun_frame_layout
.frame_size
>= s390_warn_framesize
)
11509 warning (0, "frame size of %qs is %wd bytes",
11510 current_function_name (), cfun_frame_layout
.frame_size
);
11512 if (s390_warn_dynamicstack_p
&& cfun
->calls_alloca
)
11513 warning (0, "%qs uses dynamic stack allocation", current_function_name ());
11515 /* Save the location where we could backup the incoming stack
11517 stack_pointer_backup_loc
= get_last_insn ();
11519 temp_reg_clobbered_p
= allocate_stack_space (frame_off
, last_probe_offset
,
11522 if (TARGET_BACKCHAIN
|| next_fpr
)
11524 if (temp_reg_clobbered_p
)
11526 /* allocate_stack_space had to make use of temp_reg and
11527 we need it to hold a backup of the incoming stack
11528 pointer. Calculate back that value from the current
11530 s390_prologue_plus_offset (temp_reg
, stack_pointer_rtx
,
11531 GEN_INT (cfun_frame_layout
.frame_size
),
11536 /* allocate_stack_space didn't actually required
11537 temp_reg. Insert the stack pointer backup insn
11538 before the stack pointer decrement code - knowing now
11539 that the value will survive. */
11540 emit_insn_after (gen_move_insn (temp_reg
, stack_pointer_rtx
),
11541 stack_pointer_backup_loc
);
11545 /* Set backchain. */
11547 if (TARGET_BACKCHAIN
)
11549 if (cfun_frame_layout
.backchain_offset
)
11550 addr
= gen_rtx_MEM (Pmode
,
11551 plus_constant (Pmode
, stack_pointer_rtx
,
11552 cfun_frame_layout
.backchain_offset
));
11554 addr
= gen_rtx_MEM (Pmode
, stack_pointer_rtx
);
11555 set_mem_alias_set (addr
, get_frame_alias_set ());
11556 insn
= emit_insn (gen_move_insn (addr
, temp_reg
));
11559 /* If we support non-call exceptions (e.g. for Java),
11560 we need to make sure the backchain pointer is set up
11561 before any possibly trapping memory access. */
11562 if (TARGET_BACKCHAIN
&& cfun
->can_throw_non_call_exceptions
)
11564 addr
= gen_rtx_MEM (BLKmode
, gen_rtx_SCRATCH (VOIDmode
));
11565 emit_clobber (addr
);
11568 else if (flag_stack_clash_protection
)
11569 dump_stack_clash_frame_info (NO_PROBE_NO_FRAME
, false);
11571 /* Save fprs 8 - 15 (64 bit ABI). */
11573 if (cfun_save_high_fprs_p
&& next_fpr
)
11575 /* If the stack might be accessed through a different register
11576 we have to make sure that the stack pointer decrement is not
11577 moved below the use of the stack slots. */
11578 s390_emit_stack_tie ();
11580 insn
= emit_insn (gen_add2_insn (temp_reg
,
11581 GEN_INT (cfun_frame_layout
.f8_offset
)));
11585 for (i
= FPR8_REGNUM
; i
<= next_fpr
; i
++)
11586 if (cfun_fpr_save_p (i
))
11588 rtx addr
= plus_constant (Pmode
, stack_pointer_rtx
,
11589 cfun_frame_layout
.frame_size
11590 + cfun_frame_layout
.f8_offset
11593 insn
= save_fpr (temp_reg
, offset
, i
);
11595 RTX_FRAME_RELATED_P (insn
) = 1;
11596 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
,
11597 gen_rtx_SET (gen_rtx_MEM (DFmode
, addr
),
11598 gen_rtx_REG (DFmode
, i
)));
11602 /* Set frame pointer, if needed. */
11604 if (frame_pointer_needed
)
11606 insn
= emit_move_insn (hard_frame_pointer_rtx
, stack_pointer_rtx
);
11607 RTX_FRAME_RELATED_P (insn
) = 1;
11610 /* Set up got pointer, if needed. */
11612 if (flag_pic
&& df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM
))
11614 rtx_insn
*insns
= s390_load_got ();
11616 for (rtx_insn
*insn
= insns
; insn
; insn
= NEXT_INSN (insn
))
11617 annotate_constant_pool_refs (insn
);
11622 #if TARGET_TPF != 0
11623 if (TARGET_TPF_PROFILING
)
11625 /* Generate a BAS instruction to serve as a function entry
11626 intercept to facilitate the use of tracing algorithms located
11627 at the branch target. */
11628 emit_insn (gen_prologue_tpf (
11629 GEN_INT (s390_tpf_trace_hook_prologue_check
),
11630 GEN_INT (s390_tpf_trace_hook_prologue_target
)));
11632 /* Emit a blockage here so that all code lies between the
11633 profiling mechanisms. */
11634 emit_insn (gen_blockage ());
11639 /* Expand the epilogue into a bunch of separate insns. */
11642 s390_emit_epilogue (bool sibcall
)
11644 rtx frame_pointer
, return_reg
= NULL_RTX
, cfa_restores
= NULL_RTX
;
11645 int area_bottom
, area_top
, offset
= 0;
11649 #if TARGET_TPF != 0
11650 if (TARGET_TPF_PROFILING
)
11652 /* Generate a BAS instruction to serve as a function entry
11653 intercept to facilitate the use of tracing algorithms located
11654 at the branch target. */
11656 /* Emit a blockage here so that all code lies between the
11657 profiling mechanisms. */
11658 emit_insn (gen_blockage ());
11660 emit_insn (gen_epilogue_tpf (
11661 GEN_INT (s390_tpf_trace_hook_epilogue_check
),
11662 GEN_INT (s390_tpf_trace_hook_epilogue_target
)));
11666 /* Check whether to use frame or stack pointer for restore. */
11668 frame_pointer
= (frame_pointer_needed
11669 ? hard_frame_pointer_rtx
: stack_pointer_rtx
);
11671 s390_frame_area (&area_bottom
, &area_top
);
11673 /* Check whether we can access the register save area.
11674 If not, increment the frame pointer as required. */
11676 if (area_top
<= area_bottom
)
11678 /* Nothing to restore. */
11680 else if (DISP_IN_RANGE (cfun_frame_layout
.frame_size
+ area_bottom
)
11681 && DISP_IN_RANGE (cfun_frame_layout
.frame_size
+ area_top
- 1))
11683 /* Area is in range. */
11684 offset
= cfun_frame_layout
.frame_size
;
11689 rtx frame_off
, cfa
;
11691 offset
= area_bottom
< 0 ? -area_bottom
: 0;
11692 frame_off
= GEN_INT (cfun_frame_layout
.frame_size
- offset
);
11694 cfa
= gen_rtx_SET (frame_pointer
,
11695 gen_rtx_PLUS (Pmode
, frame_pointer
, frame_off
));
11696 if (DISP_IN_RANGE (INTVAL (frame_off
)))
11700 set
= gen_rtx_SET (frame_pointer
,
11701 gen_rtx_PLUS (Pmode
, frame_pointer
, frame_off
));
11702 insn
= emit_insn (set
);
11706 if (!CONST_OK_FOR_K (INTVAL (frame_off
)))
11707 frame_off
= force_const_mem (Pmode
, frame_off
);
11709 insn
= emit_insn (gen_add2_insn (frame_pointer
, frame_off
));
11710 annotate_constant_pool_refs (insn
);
11712 add_reg_note (insn
, REG_CFA_ADJUST_CFA
, cfa
);
11713 RTX_FRAME_RELATED_P (insn
) = 1;
11716 /* Restore call saved fprs. */
11720 if (cfun_save_high_fprs_p
)
11722 next_offset
= cfun_frame_layout
.f8_offset
;
11723 for (i
= FPR8_REGNUM
; i
<= FPR15_REGNUM
; i
++)
11725 if (cfun_fpr_save_p (i
))
11727 restore_fpr (frame_pointer
,
11728 offset
+ next_offset
, i
);
11730 = alloc_reg_note (REG_CFA_RESTORE
,
11731 gen_rtx_REG (DFmode
, i
), cfa_restores
);
11740 next_offset
= cfun_frame_layout
.f4_offset
;
11742 for (i
= FPR4_REGNUM
; i
<= FPR4_REGNUM
+ 1; i
++)
11744 if (cfun_fpr_save_p (i
))
11746 restore_fpr (frame_pointer
,
11747 offset
+ next_offset
, i
);
11749 = alloc_reg_note (REG_CFA_RESTORE
,
11750 gen_rtx_REG (DFmode
, i
), cfa_restores
);
11753 else if (!TARGET_PACKED_STACK
)
11759 /* Restore call saved gprs. */
11761 if (cfun_frame_layout
.first_restore_gpr
!= -1)
11766 /* Check for global register and save them
11767 to stack location from where they get restored. */
11769 for (i
= cfun_frame_layout
.first_restore_gpr
;
11770 i
<= cfun_frame_layout
.last_restore_gpr
;
11773 if (global_not_special_regno_p (i
))
11775 addr
= plus_constant (Pmode
, frame_pointer
,
11776 offset
+ cfun_frame_layout
.gprs_offset
11777 + (i
- cfun_frame_layout
.first_save_gpr_slot
)
11779 addr
= gen_rtx_MEM (Pmode
, addr
);
11780 set_mem_alias_set (addr
, get_frame_alias_set ());
11781 emit_move_insn (addr
, gen_rtx_REG (Pmode
, i
));
11785 = alloc_reg_note (REG_CFA_RESTORE
,
11786 gen_rtx_REG (Pmode
, i
), cfa_restores
);
11789 /* Fetch return address from stack before load multiple,
11790 this will do good for scheduling.
11792 Only do this if we already decided that r14 needs to be
11793 saved to a stack slot. (And not just because r14 happens to
11794 be in between two GPRs which need saving.) Otherwise it
11795 would be difficult to take that decision back in
11796 s390_optimize_prologue.
11798 This optimization is only helpful on in-order machines. */
11800 && cfun_gpr_save_slot (RETURN_REGNUM
) == SAVE_SLOT_STACK
11801 && s390_tune
<= PROCESSOR_2097_Z10
)
11803 int return_regnum
= find_unused_clobbered_reg();
11805 || (TARGET_INDIRECT_BRANCH_NOBP_RET_OPTION
11807 && return_regnum
== INDIRECT_BRANCH_THUNK_REGNUM
))
11809 gcc_assert (INDIRECT_BRANCH_THUNK_REGNUM
!= 4);
11812 return_reg
= gen_rtx_REG (Pmode
, return_regnum
);
11814 addr
= plus_constant (Pmode
, frame_pointer
,
11815 offset
+ cfun_frame_layout
.gprs_offset
11817 - cfun_frame_layout
.first_save_gpr_slot
)
11819 addr
= gen_rtx_MEM (Pmode
, addr
);
11820 set_mem_alias_set (addr
, get_frame_alias_set ());
11821 emit_move_insn (return_reg
, addr
);
11823 /* Once we did that optimization we have to make sure
11824 s390_optimize_prologue does not try to remove the store
11825 of r14 since we will not be able to find the load issued
11827 cfun_frame_layout
.save_return_addr_p
= true;
11830 insn
= restore_gprs (frame_pointer
,
11831 offset
+ cfun_frame_layout
.gprs_offset
11832 + (cfun_frame_layout
.first_restore_gpr
11833 - cfun_frame_layout
.first_save_gpr_slot
)
11835 cfun_frame_layout
.first_restore_gpr
,
11836 cfun_frame_layout
.last_restore_gpr
);
11837 insn
= emit_insn (insn
);
11838 REG_NOTES (insn
) = cfa_restores
;
11839 add_reg_note (insn
, REG_CFA_DEF_CFA
,
11840 plus_constant (Pmode
, stack_pointer_rtx
,
11841 STACK_POINTER_OFFSET
));
11842 RTX_FRAME_RELATED_P (insn
) = 1;
11845 s390_restore_gprs_from_fprs ();
11849 if (!return_reg
&& !s390_can_use_return_insn ())
11850 /* We planned to emit (return), be we are not allowed to. */
11851 return_reg
= gen_rtx_REG (Pmode
, RETURN_REGNUM
);
11854 /* Emit (return) and (use). */
11855 emit_jump_insn (gen_return_use (return_reg
));
11857 /* The fact that RETURN_REGNUM is used is already reflected by
11858 EPILOGUE_USES. Emit plain (return). */
11859 emit_jump_insn (gen_return ());
11863 /* Implement TARGET_SET_UP_BY_PROLOGUE. */
11866 s300_set_up_by_prologue (hard_reg_set_container
*regs
)
11868 if (cfun
->machine
->base_reg
11869 && !call_used_regs
[REGNO (cfun
->machine
->base_reg
)])
11870 SET_HARD_REG_BIT (regs
->set
, REGNO (cfun
->machine
->base_reg
));
11873 /* -fsplit-stack support. */
11875 /* A SYMBOL_REF for __morestack. */
11876 static GTY(()) rtx morestack_ref
;
11878 /* When using -fsplit-stack, the allocation routines set a field in
11879 the TCB to the bottom of the stack plus this much space, measured
11882 #define SPLIT_STACK_AVAILABLE 1024
11884 /* Emit the parmblock for __morestack into .rodata section. It
11885 consists of 3 pointer size entries:
11887 - size of stack arguments
11888 - offset between parm block and __morestack return label */
11891 s390_output_split_stack_data (rtx parm_block
, rtx call_done
,
11892 rtx frame_size
, rtx args_size
)
11894 rtx ops
[] = { parm_block
, call_done
};
11896 switch_to_section (targetm
.asm_out
.function_rodata_section
11897 (current_function_decl
, false));
11900 output_asm_insn (".align\t8", NULL
);
11902 output_asm_insn (".align\t4", NULL
);
11904 (*targetm
.asm_out
.internal_label
) (asm_out_file
, "L",
11905 CODE_LABEL_NUMBER (parm_block
));
11908 output_asm_insn (".quad\t%0", &frame_size
);
11909 output_asm_insn (".quad\t%0", &args_size
);
11910 output_asm_insn (".quad\t%1-%0", ops
);
11914 output_asm_insn (".long\t%0", &frame_size
);
11915 output_asm_insn (".long\t%0", &args_size
);
11916 output_asm_insn (".long\t%1-%0", ops
);
11919 switch_to_section (current_function_section ());
11922 /* Emit -fsplit-stack prologue, which goes before the regular function
11926 s390_expand_split_stack_prologue (void)
11928 rtx r1
, guard
, cc
= NULL
;
11930 /* Offset from thread pointer to __private_ss. */
11931 int psso
= TARGET_64BIT
? 0x38 : 0x20;
11932 /* Pointer size in bytes. */
11933 /* Frame size and argument size - the two parameters to __morestack. */
11934 HOST_WIDE_INT frame_size
= cfun_frame_layout
.frame_size
;
11935 /* Align argument size to 8 bytes - simplifies __morestack code. */
11936 HOST_WIDE_INT args_size
= crtl
->args
.size
>= 0
11937 ? ((crtl
->args
.size
+ 7) & ~7)
11939 /* Label to be called by __morestack. */
11940 rtx_code_label
*call_done
= NULL
;
11941 rtx_code_label
*parm_base
= NULL
;
11944 gcc_assert (flag_split_stack
&& reload_completed
);
11946 r1
= gen_rtx_REG (Pmode
, 1);
11948 /* If no stack frame will be allocated, don't do anything. */
11951 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
11953 /* If va_start is used, just use r15. */
11954 emit_move_insn (r1
,
11955 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
11956 GEN_INT (STACK_POINTER_OFFSET
)));
11962 if (morestack_ref
== NULL_RTX
)
11964 morestack_ref
= gen_rtx_SYMBOL_REF (Pmode
, "__morestack");
11965 SYMBOL_REF_FLAGS (morestack_ref
) |= (SYMBOL_FLAG_LOCAL
11966 | SYMBOL_FLAG_FUNCTION
);
11969 if (CONST_OK_FOR_K (frame_size
) || CONST_OK_FOR_Op (frame_size
))
11971 /* If frame_size will fit in an add instruction, do a stack space
11972 check, and only call __morestack if there's not enough space. */
11974 /* Get thread pointer. r1 is the only register we can always destroy - r0
11975 could contain a static chain (and cannot be used to address memory
11976 anyway), r2-r6 can contain parameters, and r6-r15 are callee-saved. */
11977 emit_insn (gen_get_thread_pointer (Pmode
, r1
));
11978 /* Aim at __private_ss. */
11979 guard
= gen_rtx_MEM (Pmode
, plus_constant (Pmode
, r1
, psso
));
11981 /* If less that 1kiB used, skip addition and compare directly with
11983 if (frame_size
> SPLIT_STACK_AVAILABLE
)
11985 emit_move_insn (r1
, guard
);
11987 emit_insn (gen_adddi3 (r1
, r1
, GEN_INT (frame_size
)));
11989 emit_insn (gen_addsi3 (r1
, r1
, GEN_INT (frame_size
)));
11993 /* Compare the (maybe adjusted) guard with the stack pointer. */
11994 cc
= s390_emit_compare (LT
, stack_pointer_rtx
, guard
);
11997 call_done
= gen_label_rtx ();
11998 parm_base
= gen_label_rtx ();
11999 LABEL_NUSES (parm_base
)++;
12000 LABEL_NUSES (call_done
)++;
12002 /* %r1 = litbase. */
12003 insn
= emit_move_insn (r1
, gen_rtx_LABEL_REF (VOIDmode
, parm_base
));
12004 add_reg_note (insn
, REG_LABEL_OPERAND
, parm_base
);
12005 LABEL_NUSES (parm_base
)++;
12007 /* Now, we need to call __morestack. It has very special calling
12008 conventions: it preserves param/return/static chain registers for
12009 calling main function body, and looks for its own parameters at %r1. */
12011 tmp
= gen_split_stack_cond_call (Pmode
,
12015 GEN_INT (frame_size
),
12016 GEN_INT (args_size
),
12019 tmp
= gen_split_stack_call (Pmode
,
12023 GEN_INT (frame_size
),
12024 GEN_INT (args_size
));
12026 insn
= emit_jump_insn (tmp
);
12027 JUMP_LABEL (insn
) = call_done
;
12028 add_reg_note (insn
, REG_LABEL_OPERAND
, parm_base
);
12029 add_reg_note (insn
, REG_LABEL_OPERAND
, call_done
);
12033 /* Mark the jump as very unlikely to be taken. */
12034 add_reg_br_prob_note (insn
,
12035 profile_probability::very_unlikely ());
12037 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
12039 /* If va_start is used, and __morestack was not called, just use
12041 emit_move_insn (r1
,
12042 gen_rtx_PLUS (Pmode
, stack_pointer_rtx
,
12043 GEN_INT (STACK_POINTER_OFFSET
)));
12051 /* __morestack will call us here. */
12053 emit_label (call_done
);
12056 /* We may have to tell the dataflow pass that the split stack prologue
12057 is initializing a register. */
12060 s390_live_on_entry (bitmap regs
)
12062 if (cfun
->machine
->split_stack_varargs_pointer
!= NULL_RTX
)
12064 gcc_assert (flag_split_stack
);
12065 bitmap_set_bit (regs
, 1);
12069 /* Return true if the function can use simple_return to return outside
12070 of a shrink-wrapped region. At present shrink-wrapping is supported
12074 s390_can_use_simple_return_insn (void)
12079 /* Return true if the epilogue is guaranteed to contain only a return
12080 instruction and if a direct return can therefore be used instead.
12081 One of the main advantages of using direct return instructions
12082 is that we can then use conditional returns. */
12085 s390_can_use_return_insn (void)
12089 if (!reload_completed
)
12095 if (TARGET_TPF_PROFILING
)
12098 for (i
= 0; i
< 16; i
++)
12099 if (cfun_gpr_save_slot (i
) != SAVE_SLOT_NONE
)
12102 /* For 31 bit this is not covered by the frame_size check below
12103 since f4, f6 are saved in the register save area without needing
12104 additional stack space. */
12106 && (cfun_fpr_save_p (FPR4_REGNUM
) || cfun_fpr_save_p (FPR6_REGNUM
)))
12109 if (cfun
->machine
->base_reg
12110 && !call_used_regs
[REGNO (cfun
->machine
->base_reg
)])
12113 return cfun_frame_layout
.frame_size
== 0;
12116 /* The VX ABI differs for vararg functions. Therefore we need the
12117 prototype of the callee to be available when passing vector type
12119 static const char *
12120 s390_invalid_arg_for_unprototyped_fn (const_tree typelist
, const_tree funcdecl
, const_tree val
)
12122 return ((TARGET_VX_ABI
12124 && VECTOR_TYPE_P (TREE_TYPE (val
))
12125 && (funcdecl
== NULL_TREE
12126 || (TREE_CODE (funcdecl
) == FUNCTION_DECL
12127 && DECL_BUILT_IN_CLASS (funcdecl
) != BUILT_IN_MD
)))
12128 ? N_("vector argument passed to unprototyped function")
12133 /* Return the size in bytes of a function argument of
12134 type TYPE and/or mode MODE. At least one of TYPE or
12135 MODE must be specified. */
12138 s390_function_arg_size (machine_mode mode
, const_tree type
)
12141 return int_size_in_bytes (type
);
12143 /* No type info available for some library calls ... */
12144 if (mode
!= BLKmode
)
12145 return GET_MODE_SIZE (mode
);
12147 /* If we have neither type nor mode, abort */
12148 gcc_unreachable ();
12151 /* Return true if a function argument of type TYPE and mode MODE
12152 is to be passed in a vector register, if available. */
12155 s390_function_arg_vector (machine_mode mode
, const_tree type
)
12157 if (!TARGET_VX_ABI
)
12160 if (s390_function_arg_size (mode
, type
) > 16)
12163 /* No type info available for some library calls ... */
12165 return VECTOR_MODE_P (mode
);
12167 /* The ABI says that record types with a single member are treated
12168 just like that member would be. */
12169 int empty_base_seen
= 0;
12170 const_tree orig_type
= type
;
12171 while (TREE_CODE (type
) == RECORD_TYPE
)
12173 tree field
, single
= NULL_TREE
;
12175 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
12177 if (TREE_CODE (field
) != FIELD_DECL
)
12180 if (DECL_FIELD_ABI_IGNORED (field
))
12182 if (lookup_attribute ("no_unique_address",
12183 DECL_ATTRIBUTES (field
)))
12184 empty_base_seen
|= 2;
12186 empty_base_seen
|= 1;
12190 if (single
== NULL_TREE
)
12191 single
= TREE_TYPE (field
);
12196 if (single
== NULL_TREE
)
12200 /* If the field declaration adds extra byte due to
12201 e.g. padding this is not accepted as vector type. */
12202 if (int_size_in_bytes (single
) <= 0
12203 || int_size_in_bytes (single
) != int_size_in_bytes (type
))
12209 if (!VECTOR_TYPE_P (type
))
12212 if (warn_psabi
&& empty_base_seen
)
12214 static unsigned last_reported_type_uid
;
12215 unsigned uid
= TYPE_UID (TYPE_MAIN_VARIANT (orig_type
));
12216 if (uid
!= last_reported_type_uid
)
12218 const char *url
= CHANGES_ROOT_URL
"gcc-10/changes.html#empty_base";
12219 last_reported_type_uid
= uid
;
12220 if (empty_base_seen
& 1)
12221 inform (input_location
,
12222 "parameter passing for argument of type %qT when C++17 "
12223 "is enabled changed to match C++14 %{in GCC 10.1%}",
12226 inform (input_location
,
12227 "parameter passing for argument of type %qT with "
12228 "%<[[no_unique_address]]%> members changed "
12229 "%{in GCC 10.1%}", orig_type
, url
);
12235 /* Return true if a function argument of type TYPE and mode MODE
12236 is to be passed in a floating-point register, if available. */
12239 s390_function_arg_float (machine_mode mode
, const_tree type
)
12241 if (s390_function_arg_size (mode
, type
) > 8)
12244 /* Soft-float changes the ABI: no floating-point registers are used. */
12245 if (TARGET_SOFT_FLOAT
)
12248 /* No type info available for some library calls ... */
12250 return mode
== SFmode
|| mode
== DFmode
|| mode
== SDmode
|| mode
== DDmode
;
12252 /* The ABI says that record types with a single member are treated
12253 just like that member would be. */
12254 int empty_base_seen
= 0;
12255 const_tree orig_type
= type
;
12256 while (TREE_CODE (type
) == RECORD_TYPE
)
12258 tree field
, single
= NULL_TREE
;
12260 for (field
= TYPE_FIELDS (type
); field
; field
= DECL_CHAIN (field
))
12262 if (TREE_CODE (field
) != FIELD_DECL
)
12264 if (DECL_FIELD_ABI_IGNORED (field
))
12266 if (lookup_attribute ("no_unique_address",
12267 DECL_ATTRIBUTES (field
)))
12268 empty_base_seen
|= 2;
12270 empty_base_seen
|= 1;
12274 if (single
== NULL_TREE
)
12275 single
= TREE_TYPE (field
);
12280 if (single
== NULL_TREE
)
12286 if (TREE_CODE (type
) != REAL_TYPE
)
12289 if (warn_psabi
&& empty_base_seen
)
12291 static unsigned last_reported_type_uid
;
12292 unsigned uid
= TYPE_UID (TYPE_MAIN_VARIANT (orig_type
));
12293 if (uid
!= last_reported_type_uid
)
12295 const char *url
= CHANGES_ROOT_URL
"gcc-10/changes.html#empty_base";
12296 last_reported_type_uid
= uid
;
12297 if (empty_base_seen
& 1)
12298 inform (input_location
,
12299 "parameter passing for argument of type %qT when C++17 "
12300 "is enabled changed to match C++14 %{in GCC 10.1%}",
12303 inform (input_location
,
12304 "parameter passing for argument of type %qT with "
12305 "%<[[no_unique_address]]%> members changed "
12306 "%{in GCC 10.1%}", orig_type
, url
);
12313 /* Return true if a function argument of type TYPE and mode MODE
12314 is to be passed in an integer register, or a pair of integer
12315 registers, if available. */
12318 s390_function_arg_integer (machine_mode mode
, const_tree type
)
12320 int size
= s390_function_arg_size (mode
, type
);
12324 /* No type info available for some library calls ... */
12326 return GET_MODE_CLASS (mode
) == MODE_INT
12327 || (TARGET_SOFT_FLOAT
&& SCALAR_FLOAT_MODE_P (mode
));
12329 /* We accept small integral (and similar) types. */
12330 if (INTEGRAL_TYPE_P (type
)
12331 || POINTER_TYPE_P (type
)
12332 || TREE_CODE (type
) == NULLPTR_TYPE
12333 || TREE_CODE (type
) == OFFSET_TYPE
12334 || (TARGET_SOFT_FLOAT
&& TREE_CODE (type
) == REAL_TYPE
))
12337 /* We also accept structs of size 1, 2, 4, 8 that are not
12338 passed in floating-point registers. */
12339 if (AGGREGATE_TYPE_P (type
)
12340 && exact_log2 (size
) >= 0
12341 && !s390_function_arg_float (mode
, type
))
12347 /* Return 1 if a function argument ARG is to be passed by reference.
12348 The ABI specifies that only structures of size 1, 2, 4, or 8 bytes
12349 are passed by value, all other structures (and complex numbers) are
12350 passed by reference. */
12353 s390_pass_by_reference (cumulative_args_t
, const function_arg_info
&arg
)
12355 int size
= s390_function_arg_size (arg
.mode
, arg
.type
);
12357 if (s390_function_arg_vector (arg
.mode
, arg
.type
))
12363 if (tree type
= arg
.type
)
12365 if (AGGREGATE_TYPE_P (type
) && exact_log2 (size
) < 0)
12368 if (TREE_CODE (type
) == COMPLEX_TYPE
12369 || TREE_CODE (type
) == VECTOR_TYPE
)
12376 /* Update the data in CUM to advance over argument ARG. */
12379 s390_function_arg_advance (cumulative_args_t cum_v
,
12380 const function_arg_info
&arg
)
12382 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
12384 if (s390_function_arg_vector (arg
.mode
, arg
.type
))
12386 /* We are called for unnamed vector stdarg arguments which are
12387 passed on the stack. In this case this hook does not have to
12388 do anything since stack arguments are tracked by common
12394 else if (s390_function_arg_float (arg
.mode
, arg
.type
))
12398 else if (s390_function_arg_integer (arg
.mode
, arg
.type
))
12400 int size
= s390_function_arg_size (arg
.mode
, arg
.type
);
12401 cum
->gprs
+= ((size
+ UNITS_PER_LONG
- 1) / UNITS_PER_LONG
);
12404 gcc_unreachable ();
12407 /* Define where to put the arguments to a function.
12408 Value is zero to push the argument on the stack,
12409 or a hard register in which to store the argument.
12411 CUM is a variable of type CUMULATIVE_ARGS which gives info about
12412 the preceding args and about the function being called.
12413 ARG is a description of the argument.
12415 On S/390, we use general purpose registers 2 through 6 to
12416 pass integer, pointer, and certain structure arguments, and
12417 floating point registers 0 and 2 (0, 2, 4, and 6 on 64-bit)
12418 to pass floating point arguments. All remaining arguments
12419 are pushed to the stack. */
12422 s390_function_arg (cumulative_args_t cum_v
, const function_arg_info
&arg
)
12424 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
12427 s390_check_type_for_vector_abi (arg
.type
, true, false);
12429 if (s390_function_arg_vector (arg
.mode
, arg
.type
))
12431 /* Vector arguments being part of the ellipsis are passed on the
12433 if (!arg
.named
|| (cum
->vrs
+ 1 > VEC_ARG_NUM_REG
))
12436 return gen_rtx_REG (arg
.mode
, cum
->vrs
+ FIRST_VEC_ARG_REGNO
);
12438 else if (s390_function_arg_float (arg
.mode
, arg
.type
))
12440 if (cum
->fprs
+ 1 > FP_ARG_NUM_REG
)
12443 return gen_rtx_REG (arg
.mode
, cum
->fprs
+ 16);
12445 else if (s390_function_arg_integer (arg
.mode
, arg
.type
))
12447 int size
= s390_function_arg_size (arg
.mode
, arg
.type
);
12448 int n_gprs
= (size
+ UNITS_PER_LONG
- 1) / UNITS_PER_LONG
;
12450 if (cum
->gprs
+ n_gprs
> GP_ARG_NUM_REG
)
12452 else if (n_gprs
== 1 || UNITS_PER_WORD
== UNITS_PER_LONG
)
12453 return gen_rtx_REG (arg
.mode
, cum
->gprs
+ 2);
12454 else if (n_gprs
== 2)
12456 rtvec p
= rtvec_alloc (2);
12459 = gen_rtx_EXPR_LIST (SImode
, gen_rtx_REG (SImode
, cum
->gprs
+ 2),
12462 = gen_rtx_EXPR_LIST (SImode
, gen_rtx_REG (SImode
, cum
->gprs
+ 3),
12465 return gen_rtx_PARALLEL (arg
.mode
, p
);
12469 /* After the real arguments, expand_call calls us once again with an
12470 end marker. Whatever we return here is passed as operand 2 to the
12473 We don't need this feature ... */
12474 else if (arg
.end_marker_p ())
12477 gcc_unreachable ();
12480 /* Implement TARGET_FUNCTION_ARG_BOUNDARY. Vector arguments are
12481 left-justified when placed on the stack during parameter passing. */
12483 static pad_direction
12484 s390_function_arg_padding (machine_mode mode
, const_tree type
)
12486 if (s390_function_arg_vector (mode
, type
))
12489 return default_function_arg_padding (mode
, type
);
12492 /* Return true if return values of type TYPE should be returned
12493 in a memory buffer whose address is passed by the caller as
12494 hidden first argument. */
12497 s390_return_in_memory (const_tree type
, const_tree fundecl ATTRIBUTE_UNUSED
)
12499 /* We accept small integral (and similar) types. */
12500 if (INTEGRAL_TYPE_P (type
)
12501 || POINTER_TYPE_P (type
)
12502 || TREE_CODE (type
) == OFFSET_TYPE
12503 || TREE_CODE (type
) == REAL_TYPE
)
12504 return int_size_in_bytes (type
) > 8;
12506 /* vector types which fit into a VR. */
12508 && VECTOR_TYPE_P (type
)
12509 && int_size_in_bytes (type
) <= 16)
12512 /* Aggregates and similar constructs are always returned
12514 if (AGGREGATE_TYPE_P (type
)
12515 || TREE_CODE (type
) == COMPLEX_TYPE
12516 || VECTOR_TYPE_P (type
))
12519 /* ??? We get called on all sorts of random stuff from
12520 aggregate_value_p. We can't abort, but it's not clear
12521 what's safe to return. Pretend it's a struct I guess. */
12525 /* Function arguments and return values are promoted to word size. */
12527 static machine_mode
12528 s390_promote_function_mode (const_tree type
, machine_mode mode
,
12530 const_tree fntype ATTRIBUTE_UNUSED
,
12531 int for_return ATTRIBUTE_UNUSED
)
12533 if (INTEGRAL_MODE_P (mode
)
12534 && GET_MODE_SIZE (mode
) < UNITS_PER_LONG
)
12536 if (type
!= NULL_TREE
&& POINTER_TYPE_P (type
))
12537 *punsignedp
= POINTERS_EXTEND_UNSIGNED
;
12544 /* Define where to return a (scalar) value of type RET_TYPE.
12545 If RET_TYPE is null, define where to return a (scalar)
12546 value of mode MODE from a libcall. */
12549 s390_function_and_libcall_value (machine_mode mode
,
12550 const_tree ret_type
,
12551 const_tree fntype_or_decl
,
12552 bool outgoing ATTRIBUTE_UNUSED
)
12554 /* For vector return types it is important to use the RET_TYPE
12555 argument whenever available since the middle-end might have
12556 changed the mode to a scalar mode. */
12557 bool vector_ret_type_p
= ((ret_type
&& VECTOR_TYPE_P (ret_type
))
12558 || (!ret_type
&& VECTOR_MODE_P (mode
)));
12560 /* For normal functions perform the promotion as
12561 promote_function_mode would do. */
12564 int unsignedp
= TYPE_UNSIGNED (ret_type
);
12565 mode
= promote_function_mode (ret_type
, mode
, &unsignedp
,
12566 fntype_or_decl
, 1);
12569 gcc_assert (GET_MODE_CLASS (mode
) == MODE_INT
12570 || SCALAR_FLOAT_MODE_P (mode
)
12571 || (TARGET_VX_ABI
&& vector_ret_type_p
));
12572 gcc_assert (GET_MODE_SIZE (mode
) <= (TARGET_VX_ABI
? 16 : 8));
12574 if (TARGET_VX_ABI
&& vector_ret_type_p
)
12575 return gen_rtx_REG (mode
, FIRST_VEC_ARG_REGNO
);
12576 else if (TARGET_HARD_FLOAT
&& SCALAR_FLOAT_MODE_P (mode
))
12577 return gen_rtx_REG (mode
, 16);
12578 else if (GET_MODE_SIZE (mode
) <= UNITS_PER_LONG
12579 || UNITS_PER_LONG
== UNITS_PER_WORD
)
12580 return gen_rtx_REG (mode
, 2);
12581 else if (GET_MODE_SIZE (mode
) == 2 * UNITS_PER_LONG
)
12583 /* This case is triggered when returning a 64 bit value with
12584 -m31 -mzarch. Although the value would fit into a single
12585 register it has to be forced into a 32 bit register pair in
12586 order to match the ABI. */
12587 rtvec p
= rtvec_alloc (2);
12590 = gen_rtx_EXPR_LIST (SImode
, gen_rtx_REG (SImode
, 2), const0_rtx
);
12592 = gen_rtx_EXPR_LIST (SImode
, gen_rtx_REG (SImode
, 3), GEN_INT (4));
12594 return gen_rtx_PARALLEL (mode
, p
);
12597 gcc_unreachable ();
12600 /* Define where to return a scalar return value of type RET_TYPE. */
12603 s390_function_value (const_tree ret_type
, const_tree fn_decl_or_type
,
12606 return s390_function_and_libcall_value (TYPE_MODE (ret_type
), ret_type
,
12607 fn_decl_or_type
, outgoing
);
12610 /* Define where to return a scalar libcall return value of mode
12614 s390_libcall_value (machine_mode mode
, const_rtx fun ATTRIBUTE_UNUSED
)
12616 return s390_function_and_libcall_value (mode
, NULL_TREE
,
12621 /* Create and return the va_list datatype.
12623 On S/390, va_list is an array type equivalent to
12625 typedef struct __va_list_tag
12629 void *__overflow_arg_area;
12630 void *__reg_save_area;
12633 where __gpr and __fpr hold the number of general purpose
12634 or floating point arguments used up to now, respectively,
12635 __overflow_arg_area points to the stack location of the
12636 next argument passed on the stack, and __reg_save_area
12637 always points to the start of the register area in the
12638 call frame of the current function. The function prologue
12639 saves all registers used for argument passing into this
12640 area if the function uses variable arguments. */
12643 s390_build_builtin_va_list (void)
12645 tree f_gpr
, f_fpr
, f_ovf
, f_sav
, record
, type_decl
;
12647 record
= lang_hooks
.types
.make_type (RECORD_TYPE
);
12650 build_decl (BUILTINS_LOCATION
,
12651 TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
12653 f_gpr
= build_decl (BUILTINS_LOCATION
,
12654 FIELD_DECL
, get_identifier ("__gpr"),
12655 long_integer_type_node
);
12656 f_fpr
= build_decl (BUILTINS_LOCATION
,
12657 FIELD_DECL
, get_identifier ("__fpr"),
12658 long_integer_type_node
);
12659 f_ovf
= build_decl (BUILTINS_LOCATION
,
12660 FIELD_DECL
, get_identifier ("__overflow_arg_area"),
12662 f_sav
= build_decl (BUILTINS_LOCATION
,
12663 FIELD_DECL
, get_identifier ("__reg_save_area"),
12666 va_list_gpr_counter_field
= f_gpr
;
12667 va_list_fpr_counter_field
= f_fpr
;
12669 DECL_FIELD_CONTEXT (f_gpr
) = record
;
12670 DECL_FIELD_CONTEXT (f_fpr
) = record
;
12671 DECL_FIELD_CONTEXT (f_ovf
) = record
;
12672 DECL_FIELD_CONTEXT (f_sav
) = record
;
12674 TYPE_STUB_DECL (record
) = type_decl
;
12675 TYPE_NAME (record
) = type_decl
;
12676 TYPE_FIELDS (record
) = f_gpr
;
12677 DECL_CHAIN (f_gpr
) = f_fpr
;
12678 DECL_CHAIN (f_fpr
) = f_ovf
;
12679 DECL_CHAIN (f_ovf
) = f_sav
;
12681 layout_type (record
);
12683 /* The correct type is an array type of one element. */
12684 return build_array_type (record
, build_index_type (size_zero_node
));
12687 /* Implement va_start by filling the va_list structure VALIST.
12688 STDARG_P is always true, and ignored.
12689 NEXTARG points to the first anonymous stack argument.
12691 The following global variables are used to initialize
12692 the va_list structure:
12695 holds number of gprs and fprs used for named arguments.
12696 crtl->args.arg_offset_rtx:
12697 holds the offset of the first anonymous stack argument
12698 (relative to the virtual arg pointer). */
12701 s390_va_start (tree valist
, rtx nextarg ATTRIBUTE_UNUSED
)
12703 HOST_WIDE_INT n_gpr
, n_fpr
;
12705 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
12706 tree gpr
, fpr
, ovf
, sav
, t
;
12708 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
12709 f_fpr
= DECL_CHAIN (f_gpr
);
12710 f_ovf
= DECL_CHAIN (f_fpr
);
12711 f_sav
= DECL_CHAIN (f_ovf
);
12713 valist
= build_simple_mem_ref (valist
);
12714 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
12715 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
12716 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
12717 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
12719 /* Count number of gp and fp argument registers used. */
12721 n_gpr
= crtl
->args
.info
.gprs
;
12722 n_fpr
= crtl
->args
.info
.fprs
;
12724 if (cfun
->va_list_gpr_size
)
12726 t
= build2 (MODIFY_EXPR
, TREE_TYPE (gpr
), gpr
,
12727 build_int_cst (NULL_TREE
, n_gpr
));
12728 TREE_SIDE_EFFECTS (t
) = 1;
12729 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
12732 if (cfun
->va_list_fpr_size
)
12734 t
= build2 (MODIFY_EXPR
, TREE_TYPE (fpr
), fpr
,
12735 build_int_cst (NULL_TREE
, n_fpr
));
12736 TREE_SIDE_EFFECTS (t
) = 1;
12737 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
12740 if (flag_split_stack
12741 && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun
->decl
))
12743 && cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
12748 reg
= gen_reg_rtx (Pmode
);
12749 cfun
->machine
->split_stack_varargs_pointer
= reg
;
12752 emit_move_insn (reg
, gen_rtx_REG (Pmode
, 1));
12753 seq
= get_insns ();
12756 push_topmost_sequence ();
12757 emit_insn_after (seq
, entry_of_function ());
12758 pop_topmost_sequence ();
12761 /* Find the overflow area.
12762 FIXME: This currently is too pessimistic when the vector ABI is
12763 enabled. In that case we *always* set up the overflow area
12765 if (n_gpr
+ cfun
->va_list_gpr_size
> GP_ARG_NUM_REG
12766 || n_fpr
+ cfun
->va_list_fpr_size
> FP_ARG_NUM_REG
12769 if (cfun
->machine
->split_stack_varargs_pointer
== NULL_RTX
)
12770 t
= make_tree (TREE_TYPE (ovf
), virtual_incoming_args_rtx
);
12772 t
= make_tree (TREE_TYPE (ovf
), cfun
->machine
->split_stack_varargs_pointer
);
12774 off
= INTVAL (crtl
->args
.arg_offset_rtx
);
12775 off
= off
< 0 ? 0 : off
;
12776 if (TARGET_DEBUG_ARG
)
12777 fprintf (stderr
, "va_start: n_gpr = %d, n_fpr = %d off %d\n",
12778 (int)n_gpr
, (int)n_fpr
, off
);
12780 t
= fold_build_pointer_plus_hwi (t
, off
);
12782 t
= build2 (MODIFY_EXPR
, TREE_TYPE (ovf
), ovf
, t
);
12783 TREE_SIDE_EFFECTS (t
) = 1;
12784 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
12787 /* Find the register save area. */
12788 if ((cfun
->va_list_gpr_size
&& n_gpr
< GP_ARG_NUM_REG
)
12789 || (cfun
->va_list_fpr_size
&& n_fpr
< FP_ARG_NUM_REG
))
12791 t
= make_tree (TREE_TYPE (sav
), return_address_pointer_rtx
);
12792 t
= fold_build_pointer_plus_hwi (t
, -RETURN_REGNUM
* UNITS_PER_LONG
);
12794 t
= build2 (MODIFY_EXPR
, TREE_TYPE (sav
), sav
, t
);
12795 TREE_SIDE_EFFECTS (t
) = 1;
12796 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
12800 /* Implement va_arg by updating the va_list structure
12801 VALIST as required to retrieve an argument of type
12802 TYPE, and returning that argument.
12804 Generates code equivalent to:
12806 if (integral value) {
12807 if (size <= 4 && args.gpr < 5 ||
12808 size > 4 && args.gpr < 4 )
12809 ret = args.reg_save_area[args.gpr+8]
12811 ret = *args.overflow_arg_area++;
12812 } else if (vector value) {
12813 ret = *args.overflow_arg_area;
12814 args.overflow_arg_area += size / 8;
12815 } else if (float value) {
12817 ret = args.reg_save_area[args.fpr+64]
12819 ret = *args.overflow_arg_area++;
12820 } else if (aggregate value) {
12822 ret = *args.reg_save_area[args.gpr]
12824 ret = **args.overflow_arg_area++;
12828 s390_gimplify_va_arg (tree valist
, tree type
, gimple_seq
*pre_p
,
12829 gimple_seq
*post_p ATTRIBUTE_UNUSED
)
12831 tree f_gpr
, f_fpr
, f_ovf
, f_sav
;
12832 tree gpr
, fpr
, ovf
, sav
, reg
, t
, u
;
12833 int indirect_p
, size
, n_reg
, sav_ofs
, sav_scale
, max_reg
;
12834 tree lab_false
, lab_over
= NULL_TREE
;
12835 tree addr
= create_tmp_var (ptr_type_node
, "addr");
12836 bool left_align_p
; /* How a value < UNITS_PER_LONG is aligned within
12839 f_gpr
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
12840 f_fpr
= DECL_CHAIN (f_gpr
);
12841 f_ovf
= DECL_CHAIN (f_fpr
);
12842 f_sav
= DECL_CHAIN (f_ovf
);
12844 gpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_gpr
), valist
, f_gpr
, NULL_TREE
);
12845 fpr
= build3 (COMPONENT_REF
, TREE_TYPE (f_fpr
), valist
, f_fpr
, NULL_TREE
);
12846 sav
= build3 (COMPONENT_REF
, TREE_TYPE (f_sav
), valist
, f_sav
, NULL_TREE
);
12848 /* The tree for args* cannot be shared between gpr/fpr and ovf since
12849 both appear on a lhs. */
12850 valist
= unshare_expr (valist
);
12851 ovf
= build3 (COMPONENT_REF
, TREE_TYPE (f_ovf
), valist
, f_ovf
, NULL_TREE
);
12853 size
= int_size_in_bytes (type
);
12855 s390_check_type_for_vector_abi (type
, true, false);
12857 if (pass_va_arg_by_reference (type
))
12859 if (TARGET_DEBUG_ARG
)
12861 fprintf (stderr
, "va_arg: aggregate type");
12865 /* Aggregates are passed by reference. */
12870 /* kernel stack layout on 31 bit: It is assumed here that no padding
12871 will be added by s390_frame_info because for va_args always an even
12872 number of gprs has to be saved r15-r2 = 14 regs. */
12873 sav_ofs
= 2 * UNITS_PER_LONG
;
12874 sav_scale
= UNITS_PER_LONG
;
12875 size
= UNITS_PER_LONG
;
12876 max_reg
= GP_ARG_NUM_REG
- n_reg
;
12877 left_align_p
= false;
12879 else if (s390_function_arg_vector (TYPE_MODE (type
), type
))
12881 if (TARGET_DEBUG_ARG
)
12883 fprintf (stderr
, "va_arg: vector type");
12893 left_align_p
= true;
12895 else if (s390_function_arg_float (TYPE_MODE (type
), type
))
12897 if (TARGET_DEBUG_ARG
)
12899 fprintf (stderr
, "va_arg: float type");
12903 /* FP args go in FP registers, if present. */
12907 sav_ofs
= 16 * UNITS_PER_LONG
;
12909 max_reg
= FP_ARG_NUM_REG
- n_reg
;
12910 left_align_p
= false;
12914 if (TARGET_DEBUG_ARG
)
12916 fprintf (stderr
, "va_arg: other type");
12920 /* Otherwise into GP registers. */
12923 n_reg
= (size
+ UNITS_PER_LONG
- 1) / UNITS_PER_LONG
;
12925 /* kernel stack layout on 31 bit: It is assumed here that no padding
12926 will be added by s390_frame_info because for va_args always an even
12927 number of gprs has to be saved r15-r2 = 14 regs. */
12928 sav_ofs
= 2 * UNITS_PER_LONG
;
12930 if (size
< UNITS_PER_LONG
)
12931 sav_ofs
+= UNITS_PER_LONG
- size
;
12933 sav_scale
= UNITS_PER_LONG
;
12934 max_reg
= GP_ARG_NUM_REG
- n_reg
;
12935 left_align_p
= false;
12938 /* Pull the value out of the saved registers ... */
12940 if (reg
!= NULL_TREE
)
12943 if (reg > ((typeof (reg))max_reg))
12946 addr = sav + sav_ofs + reg * save_scale;
12953 lab_false
= create_artificial_label (UNKNOWN_LOCATION
);
12954 lab_over
= create_artificial_label (UNKNOWN_LOCATION
);
12956 t
= fold_convert (TREE_TYPE (reg
), size_int (max_reg
));
12957 t
= build2 (GT_EXPR
, boolean_type_node
, reg
, t
);
12958 u
= build1 (GOTO_EXPR
, void_type_node
, lab_false
);
12959 t
= build3 (COND_EXPR
, void_type_node
, t
, u
, NULL_TREE
);
12960 gimplify_and_add (t
, pre_p
);
12962 t
= fold_build_pointer_plus_hwi (sav
, sav_ofs
);
12963 u
= build2 (MULT_EXPR
, TREE_TYPE (reg
), reg
,
12964 fold_convert (TREE_TYPE (reg
), size_int (sav_scale
)));
12965 t
= fold_build_pointer_plus (t
, u
);
12967 gimplify_assign (addr
, t
, pre_p
);
12969 gimple_seq_add_stmt (pre_p
, gimple_build_goto (lab_over
));
12971 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_false
));
12974 /* ... Otherwise out of the overflow area. */
12977 if (size
< UNITS_PER_LONG
&& !left_align_p
)
12978 t
= fold_build_pointer_plus_hwi (t
, UNITS_PER_LONG
- size
);
12980 gimplify_expr (&t
, pre_p
, NULL
, is_gimple_val
, fb_rvalue
);
12982 gimplify_assign (addr
, t
, pre_p
);
12984 if (size
< UNITS_PER_LONG
&& left_align_p
)
12985 t
= fold_build_pointer_plus_hwi (t
, UNITS_PER_LONG
);
12987 t
= fold_build_pointer_plus_hwi (t
, size
);
12989 gimplify_assign (ovf
, t
, pre_p
);
12991 if (reg
!= NULL_TREE
)
12992 gimple_seq_add_stmt (pre_p
, gimple_build_label (lab_over
));
12995 /* Increment register save count. */
12999 u
= build2 (PREINCREMENT_EXPR
, TREE_TYPE (reg
), reg
,
13000 fold_convert (TREE_TYPE (reg
), size_int (n_reg
)));
13001 gimplify_and_add (u
, pre_p
);
13006 t
= build_pointer_type_for_mode (build_pointer_type (type
),
13008 addr
= fold_convert (t
, addr
);
13009 addr
= build_va_arg_indirect_ref (addr
);
13013 t
= build_pointer_type_for_mode (type
, ptr_mode
, true);
13014 addr
= fold_convert (t
, addr
);
13017 return build_va_arg_indirect_ref (addr
);
13020 /* Emit rtl for the tbegin or tbegin_retry (RETRY != NULL_RTX)
13022 DEST - Register location where CC will be stored.
13023 TDB - Pointer to a 256 byte area where to store the transaction.
13024 diagnostic block. NULL if TDB is not needed.
13025 RETRY - Retry count value. If non-NULL a retry loop for CC2
13027 CLOBBER_FPRS_P - If true clobbers for all FPRs are emitted as part
13028 of the tbegin instruction pattern. */
13031 s390_expand_tbegin (rtx dest
, rtx tdb
, rtx retry
, bool clobber_fprs_p
)
13033 rtx retry_plus_two
= gen_reg_rtx (SImode
);
13034 rtx retry_reg
= gen_reg_rtx (SImode
);
13035 rtx_code_label
*retry_label
= NULL
;
13037 if (retry
!= NULL_RTX
)
13039 emit_move_insn (retry_reg
, retry
);
13040 emit_insn (gen_addsi3 (retry_plus_two
, retry_reg
, const2_rtx
));
13041 emit_insn (gen_addsi3 (retry_reg
, retry_reg
, const1_rtx
));
13042 retry_label
= gen_label_rtx ();
13043 emit_label (retry_label
);
13046 if (clobber_fprs_p
)
13049 emit_insn (gen_tbegin_1_z13 (gen_rtx_CONST_INT (VOIDmode
, TBEGIN_MASK
),
13052 emit_insn (gen_tbegin_1 (gen_rtx_CONST_INT (VOIDmode
, TBEGIN_MASK
),
13056 emit_insn (gen_tbegin_nofloat_1 (gen_rtx_CONST_INT (VOIDmode
, TBEGIN_MASK
),
13059 emit_move_insn (dest
, gen_rtx_UNSPEC (SImode
,
13060 gen_rtvec (1, gen_rtx_REG (CCRAWmode
,
13062 UNSPEC_CC_TO_INT
));
13063 if (retry
!= NULL_RTX
)
13065 const int CC0
= 1 << 3;
13066 const int CC1
= 1 << 2;
13067 const int CC3
= 1 << 0;
13069 rtx count
= gen_reg_rtx (SImode
);
13070 rtx_code_label
*leave_label
= gen_label_rtx ();
13072 /* Exit for success and permanent failures. */
13073 jump
= s390_emit_jump (leave_label
,
13074 gen_rtx_EQ (VOIDmode
,
13075 gen_rtx_REG (CCRAWmode
, CC_REGNUM
),
13076 gen_rtx_CONST_INT (VOIDmode
, CC0
| CC1
| CC3
)));
13077 LABEL_NUSES (leave_label
) = 1;
13079 /* CC2 - transient failure. Perform retry with ppa. */
13080 emit_move_insn (count
, retry_plus_two
);
13081 emit_insn (gen_subsi3 (count
, count
, retry_reg
));
13082 emit_insn (gen_tx_assist (count
));
13083 jump
= emit_jump_insn (gen_doloop_si64 (retry_label
,
13086 JUMP_LABEL (jump
) = retry_label
;
13087 LABEL_NUSES (retry_label
) = 1;
13088 emit_label (leave_label
);
13093 /* Return the decl for the target specific builtin with the function
13097 s390_builtin_decl (unsigned fcode
, bool initialized_p ATTRIBUTE_UNUSED
)
13099 if (fcode
>= S390_BUILTIN_MAX
)
13100 return error_mark_node
;
13102 return s390_builtin_decls
[fcode
];
13105 /* We call mcount before the function prologue. So a profiled leaf
13106 function should stay a leaf function. */
13109 s390_keep_leaf_when_profiled ()
13114 /* Output assembly code for the trampoline template to
13117 On S/390, we use gpr 1 internally in the trampoline code;
13118 gpr 0 is used to hold the static chain. */
13121 s390_asm_trampoline_template (FILE *file
)
13124 op
[0] = gen_rtx_REG (Pmode
, 0);
13125 op
[1] = gen_rtx_REG (Pmode
, 1);
13129 output_asm_insn ("basr\t%1,0", op
); /* 2 byte */
13130 output_asm_insn ("lmg\t%0,%1,14(%1)", op
); /* 6 byte */
13131 output_asm_insn ("br\t%1", op
); /* 2 byte */
13132 ASM_OUTPUT_SKIP (file
, (HOST_WIDE_INT
)(TRAMPOLINE_SIZE
- 10));
13136 output_asm_insn ("basr\t%1,0", op
); /* 2 byte */
13137 output_asm_insn ("lm\t%0,%1,6(%1)", op
); /* 4 byte */
13138 output_asm_insn ("br\t%1", op
); /* 2 byte */
13139 ASM_OUTPUT_SKIP (file
, (HOST_WIDE_INT
)(TRAMPOLINE_SIZE
- 8));
13143 /* Emit RTL insns to initialize the variable parts of a trampoline.
13144 FNADDR is an RTX for the address of the function's pure code.
13145 CXT is an RTX for the static chain value for the function. */
13148 s390_trampoline_init (rtx m_tramp
, tree fndecl
, rtx cxt
)
13150 rtx fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
13153 emit_block_move (m_tramp
, assemble_trampoline_template (),
13154 GEN_INT (2 * UNITS_PER_LONG
), BLOCK_OP_NORMAL
);
13156 mem
= adjust_address (m_tramp
, Pmode
, 2 * UNITS_PER_LONG
);
13157 emit_move_insn (mem
, cxt
);
13158 mem
= adjust_address (m_tramp
, Pmode
, 3 * UNITS_PER_LONG
);
13159 emit_move_insn (mem
, fnaddr
);
13163 output_asm_nops (const char *user
, int hw
)
13165 asm_fprintf (asm_out_file
, "\t# NOPs for %s (%d halfwords)\n", user
, hw
);
13170 output_asm_insn ("brcl\t0,0", NULL
);
13175 output_asm_insn ("bc\t0,0", NULL
);
13180 output_asm_insn ("bcr\t0,0", NULL
);
13186 /* Output assembler code to FILE to call a profiler hook. */
13189 s390_function_profiler (FILE *file
, int labelno ATTRIBUTE_UNUSED
)
13193 fprintf (file
, "# function profiler \n");
13195 op
[0] = gen_rtx_REG (Pmode
, RETURN_REGNUM
);
13196 op
[1] = gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
13197 op
[1] = gen_rtx_MEM (Pmode
, plus_constant (Pmode
, op
[1], UNITS_PER_LONG
));
13198 op
[3] = GEN_INT (UNITS_PER_LONG
);
13200 op
[2] = gen_rtx_SYMBOL_REF (Pmode
, flag_fentry
? "__fentry__" : "_mcount");
13201 SYMBOL_REF_FLAGS (op
[2]) |= SYMBOL_FLAG_FUNCTION
;
13202 if (flag_pic
&& !TARGET_64BIT
)
13204 op
[2] = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op
[2]), UNSPEC_PLT31
);
13205 op
[2] = gen_rtx_CONST (Pmode
, op
[2]);
13208 if (flag_record_mcount
)
13209 fprintf (file
, "1:\n");
13213 if (flag_nop_mcount
)
13214 output_asm_nops ("-mnop-mcount", /* brasl */ 3);
13215 else if (cfun
->static_chain_decl
)
13216 warning (OPT_Wcannot_profile
, "nested functions cannot be profiled "
13217 "with %<-mfentry%> on s390");
13219 output_asm_insn ("brasl\t0,%2%K2", op
);
13221 else if (TARGET_64BIT
)
13223 if (flag_nop_mcount
)
13224 output_asm_nops ("-mnop-mcount", /* stg */ 3 + /* brasl */ 3 +
13228 output_asm_insn ("stg\t%0,%1", op
);
13229 if (flag_dwarf2_cfi_asm
)
13230 output_asm_insn (".cfi_rel_offset\t%0,%3", op
);
13231 output_asm_insn ("brasl\t%0,%2%K2", op
);
13232 output_asm_insn ("lg\t%0,%1", op
);
13233 if (flag_dwarf2_cfi_asm
)
13234 output_asm_insn (".cfi_restore\t%0", op
);
13239 if (flag_nop_mcount
)
13240 output_asm_nops ("-mnop-mcount", /* st */ 2 + /* brasl */ 3 +
13244 output_asm_insn ("st\t%0,%1", op
);
13245 if (flag_dwarf2_cfi_asm
)
13246 output_asm_insn (".cfi_rel_offset\t%0,%3", op
);
13247 output_asm_insn ("brasl\t%0,%2%K2", op
);
13248 output_asm_insn ("l\t%0,%1", op
);
13249 if (flag_dwarf2_cfi_asm
)
13250 output_asm_insn (".cfi_restore\t%0", op
);
13254 if (flag_record_mcount
)
13256 fprintf (file
, "\t.section __mcount_loc, \"a\",@progbits\n");
13257 fprintf (file
, "\t.%s 1b\n", TARGET_64BIT
? "quad" : "long");
13258 fprintf (file
, "\t.previous\n");
13262 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
13263 into its SYMBOL_REF_FLAGS. */
13266 s390_encode_section_info (tree decl
, rtx rtl
, int first
)
13268 default_encode_section_info (decl
, rtl
, first
);
13270 if (TREE_CODE (decl
) == VAR_DECL
)
13272 /* Store the alignment to be able to check if we can use
13273 a larl/load-relative instruction. We only handle the cases
13274 that can go wrong (i.e. no FUNC_DECLs). */
13275 if (DECL_ALIGN (decl
) == 0 || DECL_ALIGN (decl
) % 16)
13276 SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl
, 0));
13277 else if (DECL_ALIGN (decl
) % 32)
13278 SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl
, 0));
13279 else if (DECL_ALIGN (decl
) % 64)
13280 SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl
, 0));
13283 /* Literal pool references don't have a decl so they are handled
13284 differently here. We rely on the information in the MEM_ALIGN
13285 entry to decide upon the alignment. */
13287 && GET_CODE (XEXP (rtl
, 0)) == SYMBOL_REF
13288 && TREE_CONSTANT_POOL_ADDRESS_P (XEXP (rtl
, 0)))
13290 if (MEM_ALIGN (rtl
) == 0 || MEM_ALIGN (rtl
) % 16)
13291 SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl
, 0));
13292 else if (MEM_ALIGN (rtl
) % 32)
13293 SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl
, 0));
13294 else if (MEM_ALIGN (rtl
) % 64)
13295 SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl
, 0));
13299 /* Output thunk to FILE that implements a C++ virtual function call (with
13300 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
13301 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
13302 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
13303 relative to the resulting this pointer. */
13306 s390_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
13307 HOST_WIDE_INT delta
, HOST_WIDE_INT vcall_offset
,
13310 const char *fnname
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk
));
13314 assemble_start_function (thunk
, fnname
);
13315 /* Make sure unwind info is emitted for the thunk if needed. */
13316 final_start_function (emit_barrier (), file
, 1);
13318 /* Operand 0 is the target function. */
13319 op
[0] = XEXP (DECL_RTL (function
), 0);
13320 if (flag_pic
&& !SYMBOL_REF_LOCAL_P (op
[0]))
13325 op
[0] = gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, op
[0]), UNSPEC_GOT
);
13326 op
[0] = gen_rtx_CONST (Pmode
, op
[0]);
13330 /* Operand 1 is the 'this' pointer. */
13331 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
))
13332 op
[1] = gen_rtx_REG (Pmode
, 3);
13334 op
[1] = gen_rtx_REG (Pmode
, 2);
13336 /* Operand 2 is the delta. */
13337 op
[2] = GEN_INT (delta
);
13339 /* Operand 3 is the vcall_offset. */
13340 op
[3] = GEN_INT (vcall_offset
);
13342 /* Operand 4 is the temporary register. */
13343 op
[4] = gen_rtx_REG (Pmode
, 1);
13345 /* Operands 5 to 8 can be used as labels. */
13351 /* Operand 9 can be used for temporary register. */
13354 /* Generate code. */
13357 /* Setup literal pool pointer if required. */
13358 if ((!DISP_IN_RANGE (delta
)
13359 && !CONST_OK_FOR_K (delta
)
13360 && !CONST_OK_FOR_Os (delta
))
13361 || (!DISP_IN_RANGE (vcall_offset
)
13362 && !CONST_OK_FOR_K (vcall_offset
)
13363 && !CONST_OK_FOR_Os (vcall_offset
)))
13365 op
[5] = gen_label_rtx ();
13366 output_asm_insn ("larl\t%4,%5", op
);
13369 /* Add DELTA to this pointer. */
13372 if (CONST_OK_FOR_J (delta
))
13373 output_asm_insn ("la\t%1,%2(%1)", op
);
13374 else if (DISP_IN_RANGE (delta
))
13375 output_asm_insn ("lay\t%1,%2(%1)", op
);
13376 else if (CONST_OK_FOR_K (delta
))
13377 output_asm_insn ("aghi\t%1,%2", op
);
13378 else if (CONST_OK_FOR_Os (delta
))
13379 output_asm_insn ("agfi\t%1,%2", op
);
13382 op
[6] = gen_label_rtx ();
13383 output_asm_insn ("agf\t%1,%6-%5(%4)", op
);
13387 /* Perform vcall adjustment. */
13390 if (DISP_IN_RANGE (vcall_offset
))
13392 output_asm_insn ("lg\t%4,0(%1)", op
);
13393 output_asm_insn ("ag\t%1,%3(%4)", op
);
13395 else if (CONST_OK_FOR_K (vcall_offset
))
13397 output_asm_insn ("lghi\t%4,%3", op
);
13398 output_asm_insn ("ag\t%4,0(%1)", op
);
13399 output_asm_insn ("ag\t%1,0(%4)", op
);
13401 else if (CONST_OK_FOR_Os (vcall_offset
))
13403 output_asm_insn ("lgfi\t%4,%3", op
);
13404 output_asm_insn ("ag\t%4,0(%1)", op
);
13405 output_asm_insn ("ag\t%1,0(%4)", op
);
13409 op
[7] = gen_label_rtx ();
13410 output_asm_insn ("llgf\t%4,%7-%5(%4)", op
);
13411 output_asm_insn ("ag\t%4,0(%1)", op
);
13412 output_asm_insn ("ag\t%1,0(%4)", op
);
13416 /* Jump to target. */
13417 output_asm_insn ("jg\t%0%K0", op
);
13419 /* Output literal pool if required. */
13422 output_asm_insn (".align\t4", op
);
13423 targetm
.asm_out
.internal_label (file
, "L",
13424 CODE_LABEL_NUMBER (op
[5]));
13428 targetm
.asm_out
.internal_label (file
, "L",
13429 CODE_LABEL_NUMBER (op
[6]));
13430 output_asm_insn (".long\t%2", op
);
13434 targetm
.asm_out
.internal_label (file
, "L",
13435 CODE_LABEL_NUMBER (op
[7]));
13436 output_asm_insn (".long\t%3", op
);
13441 /* Setup base pointer if required. */
13443 || (!DISP_IN_RANGE (delta
)
13444 && !CONST_OK_FOR_K (delta
)
13445 && !CONST_OK_FOR_Os (delta
))
13446 || (!DISP_IN_RANGE (delta
)
13447 && !CONST_OK_FOR_K (vcall_offset
)
13448 && !CONST_OK_FOR_Os (vcall_offset
)))
13450 op
[5] = gen_label_rtx ();
13451 output_asm_insn ("basr\t%4,0", op
);
13452 targetm
.asm_out
.internal_label (file
, "L",
13453 CODE_LABEL_NUMBER (op
[5]));
13456 /* Add DELTA to this pointer. */
13459 if (CONST_OK_FOR_J (delta
))
13460 output_asm_insn ("la\t%1,%2(%1)", op
);
13461 else if (DISP_IN_RANGE (delta
))
13462 output_asm_insn ("lay\t%1,%2(%1)", op
);
13463 else if (CONST_OK_FOR_K (delta
))
13464 output_asm_insn ("ahi\t%1,%2", op
);
13465 else if (CONST_OK_FOR_Os (delta
))
13466 output_asm_insn ("afi\t%1,%2", op
);
13469 op
[6] = gen_label_rtx ();
13470 output_asm_insn ("a\t%1,%6-%5(%4)", op
);
13474 /* Perform vcall adjustment. */
13477 if (CONST_OK_FOR_J (vcall_offset
))
13479 output_asm_insn ("l\t%4,0(%1)", op
);
13480 output_asm_insn ("a\t%1,%3(%4)", op
);
13482 else if (DISP_IN_RANGE (vcall_offset
))
13484 output_asm_insn ("l\t%4,0(%1)", op
);
13485 output_asm_insn ("ay\t%1,%3(%4)", op
);
13487 else if (CONST_OK_FOR_K (vcall_offset
))
13489 output_asm_insn ("lhi\t%4,%3", op
);
13490 output_asm_insn ("a\t%4,0(%1)", op
);
13491 output_asm_insn ("a\t%1,0(%4)", op
);
13493 else if (CONST_OK_FOR_Os (vcall_offset
))
13495 output_asm_insn ("iilf\t%4,%3", op
);
13496 output_asm_insn ("a\t%4,0(%1)", op
);
13497 output_asm_insn ("a\t%1,0(%4)", op
);
13501 op
[7] = gen_label_rtx ();
13502 output_asm_insn ("l\t%4,%7-%5(%4)", op
);
13503 output_asm_insn ("a\t%4,0(%1)", op
);
13504 output_asm_insn ("a\t%1,0(%4)", op
);
13507 /* We had to clobber the base pointer register.
13508 Re-setup the base pointer (with a different base). */
13509 op
[5] = gen_label_rtx ();
13510 output_asm_insn ("basr\t%4,0", op
);
13511 targetm
.asm_out
.internal_label (file
, "L",
13512 CODE_LABEL_NUMBER (op
[5]));
13515 /* Jump to target. */
13516 op
[8] = gen_label_rtx ();
13519 output_asm_insn ("l\t%4,%8-%5(%4)", op
);
13520 else if (!nonlocal
)
13521 output_asm_insn ("a\t%4,%8-%5(%4)", op
);
13522 /* We cannot call through .plt, since .plt requires %r12 loaded. */
13523 else if (flag_pic
== 1)
13525 output_asm_insn ("a\t%4,%8-%5(%4)", op
);
13526 output_asm_insn ("l\t%4,%0(%4)", op
);
13528 else if (flag_pic
== 2)
13530 op
[9] = gen_rtx_REG (Pmode
, 0);
13531 output_asm_insn ("l\t%9,%8-4-%5(%4)", op
);
13532 output_asm_insn ("a\t%4,%8-%5(%4)", op
);
13533 output_asm_insn ("ar\t%4,%9", op
);
13534 output_asm_insn ("l\t%4,0(%4)", op
);
13537 output_asm_insn ("br\t%4", op
);
13539 /* Output literal pool. */
13540 output_asm_insn (".align\t4", op
);
13542 if (nonlocal
&& flag_pic
== 2)
13543 output_asm_insn (".long\t%0", op
);
13546 op
[0] = gen_rtx_SYMBOL_REF (Pmode
, "_GLOBAL_OFFSET_TABLE_");
13547 SYMBOL_REF_FLAGS (op
[0]) = SYMBOL_FLAG_LOCAL
;
13550 targetm
.asm_out
.internal_label (file
, "L", CODE_LABEL_NUMBER (op
[8]));
13552 output_asm_insn (".long\t%0", op
);
13554 output_asm_insn (".long\t%0-%5", op
);
13558 targetm
.asm_out
.internal_label (file
, "L",
13559 CODE_LABEL_NUMBER (op
[6]));
13560 output_asm_insn (".long\t%2", op
);
13564 targetm
.asm_out
.internal_label (file
, "L",
13565 CODE_LABEL_NUMBER (op
[7]));
13566 output_asm_insn (".long\t%3", op
);
13569 final_end_function ();
13570 assemble_end_function (thunk
, fnname
);
13573 /* Output either an indirect jump or an indirect call
13574 (RETURN_ADDR_REGNO != INVALID_REGNUM) with target register REGNO
13575 using a branch trampoline disabling branch target prediction. */
13578 s390_indirect_branch_via_thunk (unsigned int regno
,
13579 unsigned int return_addr_regno
,
13580 rtx comparison_operator
,
13581 enum s390_indirect_branch_type type
)
13583 enum s390_indirect_branch_option option
;
13585 if (type
== s390_indirect_branch_type_return
)
13587 if (s390_return_addr_from_memory ())
13588 option
= s390_opt_function_return_mem
;
13590 option
= s390_opt_function_return_reg
;
13592 else if (type
== s390_indirect_branch_type_jump
)
13593 option
= s390_opt_indirect_branch_jump
;
13594 else if (type
== s390_indirect_branch_type_call
)
13595 option
= s390_opt_indirect_branch_call
;
13597 gcc_unreachable ();
13599 if (TARGET_INDIRECT_BRANCH_TABLE
)
13603 ASM_GENERATE_INTERNAL_LABEL (label
,
13604 indirect_branch_table_label
[option
],
13605 indirect_branch_table_label_no
[option
]++);
13606 ASM_OUTPUT_LABEL (asm_out_file
, label
);
13609 if (return_addr_regno
!= INVALID_REGNUM
)
13611 gcc_assert (comparison_operator
== NULL_RTX
);
13612 fprintf (asm_out_file
, " \tbrasl\t%%r%d,", return_addr_regno
);
13616 fputs (" \tjg", asm_out_file
);
13617 if (comparison_operator
!= NULL_RTX
)
13618 print_operand (asm_out_file
, comparison_operator
, 'C');
13620 fputs ("\t", asm_out_file
);
13623 if (TARGET_CPU_Z10
)
13624 fprintf (asm_out_file
,
13625 TARGET_INDIRECT_BRANCH_THUNK_NAME_EXRL
"\n",
13628 fprintf (asm_out_file
,
13629 TARGET_INDIRECT_BRANCH_THUNK_NAME_EX
"\n",
13630 INDIRECT_BRANCH_THUNK_REGNUM
, regno
);
13632 if ((option
== s390_opt_indirect_branch_jump
13633 && cfun
->machine
->indirect_branch_jump
== indirect_branch_thunk
)
13634 || (option
== s390_opt_indirect_branch_call
13635 && cfun
->machine
->indirect_branch_call
== indirect_branch_thunk
)
13636 || (option
== s390_opt_function_return_reg
13637 && cfun
->machine
->function_return_reg
== indirect_branch_thunk
)
13638 || (option
== s390_opt_function_return_mem
13639 && cfun
->machine
->function_return_mem
== indirect_branch_thunk
))
13641 if (TARGET_CPU_Z10
)
13642 indirect_branch_z10thunk_mask
|= (1 << regno
);
13644 indirect_branch_prez10thunk_mask
|= (1 << regno
);
13648 /* Output an inline thunk for indirect jumps. EXECUTE_TARGET can
13649 either be an address register or a label pointing to the location
13650 of the jump instruction. */
13653 s390_indirect_branch_via_inline_thunk (rtx execute_target
)
13655 if (TARGET_INDIRECT_BRANCH_TABLE
)
13659 ASM_GENERATE_INTERNAL_LABEL (label
,
13660 indirect_branch_table_label
[s390_opt_indirect_branch_jump
],
13661 indirect_branch_table_label_no
[s390_opt_indirect_branch_jump
]++);
13662 ASM_OUTPUT_LABEL (asm_out_file
, label
);
13666 fputs ("\t.machinemode zarch\n", asm_out_file
);
13668 if (REG_P (execute_target
))
13669 fprintf (asm_out_file
, "\tex\t%%r0,0(%%r%d)\n", REGNO (execute_target
));
13671 output_asm_insn ("\texrl\t%%r0,%0", &execute_target
);
13674 fputs ("\t.machinemode esa\n", asm_out_file
);
13676 fputs ("0:\tj\t0b\n", asm_out_file
);
13680 s390_valid_pointer_mode (scalar_int_mode mode
)
13682 return (mode
== SImode
|| (TARGET_64BIT
&& mode
== DImode
));
13685 /* Checks whether the given CALL_EXPR would use a caller
13686 saved register. This is used to decide whether sibling call
13687 optimization could be performed on the respective function
13691 s390_call_saved_register_used (tree call_expr
)
13693 CUMULATIVE_ARGS cum_v
;
13694 cumulative_args_t cum
;
13699 INIT_CUMULATIVE_ARGS (cum_v
, NULL
, NULL
, 0, 0);
13700 cum
= pack_cumulative_args (&cum_v
);
13702 for (i
= 0; i
< call_expr_nargs (call_expr
); i
++)
13704 parameter
= CALL_EXPR_ARG (call_expr
, i
);
13705 gcc_assert (parameter
);
13707 /* For an undeclared variable passed as parameter we will get
13708 an ERROR_MARK node here. */
13709 if (TREE_CODE (parameter
) == ERROR_MARK
)
13712 /* We assume that in the target function all parameters are
13713 named. This only has an impact on vector argument register
13714 usage none of which is call-saved. */
13715 function_arg_info
arg (TREE_TYPE (parameter
), /*named=*/true);
13716 apply_pass_by_reference_rules (&cum_v
, arg
);
13718 parm_rtx
= s390_function_arg (cum
, arg
);
13720 s390_function_arg_advance (cum
, arg
);
13725 if (REG_P (parm_rtx
))
13727 for (reg
= 0; reg
< REG_NREGS (parm_rtx
); reg
++)
13728 if (!call_used_or_fixed_reg_p (reg
+ REGNO (parm_rtx
)))
13732 if (GET_CODE (parm_rtx
) == PARALLEL
)
13736 for (i
= 0; i
< XVECLEN (parm_rtx
, 0); i
++)
13738 rtx r
= XEXP (XVECEXP (parm_rtx
, 0, i
), 0);
13740 gcc_assert (REG_P (r
));
13742 for (reg
= 0; reg
< REG_NREGS (r
); reg
++)
13743 if (!call_used_or_fixed_reg_p (reg
+ REGNO (r
)))
13752 /* Return true if the given call expression can be
13753 turned into a sibling call.
13754 DECL holds the declaration of the function to be called whereas
13755 EXP is the call expression itself. */
13758 s390_function_ok_for_sibcall (tree decl
, tree exp
)
13760 /* The TPF epilogue uses register 1. */
13761 if (TARGET_TPF_PROFILING
)
13764 /* The 31 bit PLT code uses register 12 (GOT pointer - caller saved)
13765 which would have to be restored before the sibcall. */
13766 if (!TARGET_64BIT
&& flag_pic
&& decl
&& !targetm
.binds_local_p (decl
))
13769 /* The thunks for indirect branches require r1 if no exrl is
13770 available. r1 might not be available when doing a sibling
13772 if (TARGET_INDIRECT_BRANCH_NOBP_CALL
13777 /* Register 6 on s390 is available as an argument register but unfortunately
13778 "caller saved". This makes functions needing this register for arguments
13779 not suitable for sibcalls. */
13780 return !s390_call_saved_register_used (exp
);
13783 /* Return the fixed registers used for condition codes. */
13786 s390_fixed_condition_code_regs (unsigned int *p1
, unsigned int *p2
)
13789 *p2
= INVALID_REGNUM
;
13794 /* This function is used by the call expanders of the machine description.
13795 It emits the call insn itself together with the necessary operations
13796 to adjust the target address and returns the emitted insn.
13797 ADDR_LOCATION is the target address rtx
13798 TLS_CALL the location of the thread-local symbol
13799 RESULT_REG the register where the result of the call should be stored
13800 RETADDR_REG the register where the return address should be stored
13801 If this parameter is NULL_RTX the call is considered
13802 to be a sibling call. */
13805 s390_emit_call (rtx addr_location
, rtx tls_call
, rtx result_reg
,
13808 bool plt31_call_p
= false;
13810 rtx vec
[4] = { NULL_RTX
};
13812 rtx
*call
= &vec
[0];
13813 rtx
*clobber_ret_reg
= &vec
[1];
13814 rtx
*use
= &vec
[2];
13815 rtx
*clobber_thunk_reg
= &vec
[3];
13818 /* Direct function calls need special treatment. */
13819 if (GET_CODE (addr_location
) == SYMBOL_REF
)
13821 /* When calling a global routine in PIC mode, we must
13822 replace the symbol itself with the PLT stub. */
13823 if (flag_pic
&& !SYMBOL_REF_LOCAL_P (addr_location
) && !TARGET_64BIT
)
13825 if (retaddr_reg
!= NULL_RTX
)
13827 addr_location
= gen_rtx_UNSPEC (Pmode
,
13828 gen_rtvec (1, addr_location
),
13830 addr_location
= gen_rtx_CONST (Pmode
, addr_location
);
13831 plt31_call_p
= true;
13834 /* For -fpic code the PLT entries might use r12 which is
13835 call-saved. Therefore we cannot do a sibcall when
13836 calling directly using a symbol ref. When reaching
13837 this point we decided (in s390_function_ok_for_sibcall)
13838 to do a sibcall for a function pointer but one of the
13839 optimizers was able to get rid of the function pointer
13840 by propagating the symbol ref into the call. This
13841 optimization is illegal for S/390 so we turn the direct
13842 call into a indirect call again. */
13843 addr_location
= force_reg (Pmode
, addr_location
);
13847 /* If it is already an indirect call or the code above moved the
13848 SYMBOL_REF to somewhere else make sure the address can be found in
13850 if (retaddr_reg
== NULL_RTX
13851 && GET_CODE (addr_location
) != SYMBOL_REF
13854 emit_move_insn (gen_rtx_REG (Pmode
, SIBCALL_REGNUM
), addr_location
);
13855 addr_location
= gen_rtx_REG (Pmode
, SIBCALL_REGNUM
);
13858 if (TARGET_INDIRECT_BRANCH_NOBP_CALL
13859 && GET_CODE (addr_location
) != SYMBOL_REF
13862 /* Indirect branch thunks require the target to be a single GPR. */
13863 addr_location
= force_reg (Pmode
, addr_location
);
13865 /* Without exrl the indirect branch thunks need an additional
13866 register for larl;ex */
13867 if (!TARGET_CPU_Z10
)
13869 *clobber_thunk_reg
= gen_rtx_REG (Pmode
, INDIRECT_BRANCH_THUNK_REGNUM
);
13870 *clobber_thunk_reg
= gen_rtx_CLOBBER (VOIDmode
, *clobber_thunk_reg
);
13874 addr_location
= gen_rtx_MEM (QImode
, addr_location
);
13875 *call
= gen_rtx_CALL (VOIDmode
, addr_location
, const0_rtx
);
13877 if (result_reg
!= NULL_RTX
)
13878 *call
= gen_rtx_SET (result_reg
, *call
);
13880 if (retaddr_reg
!= NULL_RTX
)
13882 *clobber_ret_reg
= gen_rtx_CLOBBER (VOIDmode
, retaddr_reg
);
13884 if (tls_call
!= NULL_RTX
)
13885 *use
= gen_rtx_USE (VOIDmode
, tls_call
);
13889 for (i
= 0; i
< 4; i
++)
13890 if (vec
[i
] != NULL_RTX
)
13898 v
= rtvec_alloc (elts
);
13899 for (i
= 0; i
< 4; i
++)
13900 if (vec
[i
] != NULL_RTX
)
13902 RTVEC_ELT (v
, e
) = vec
[i
];
13906 *call
= gen_rtx_PARALLEL (VOIDmode
, v
);
13909 insn
= emit_call_insn (*call
);
13911 /* 31-bit PLT stubs and tls calls use the GOT register implicitly. */
13912 if (plt31_call_p
|| tls_call
!= NULL_RTX
)
13914 /* s390_function_ok_for_sibcall should
13915 have denied sibcalls in this case. */
13916 gcc_assert (retaddr_reg
!= NULL_RTX
);
13917 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), gen_rtx_REG (Pmode
, 12));
13922 /* Implement TARGET_CONDITIONAL_REGISTER_USAGE. */
13925 s390_conditional_register_usage (void)
13930 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
13931 fixed_regs
[BASE_REGNUM
] = 0;
13932 fixed_regs
[RETURN_REGNUM
] = 0;
13935 for (i
= FPR8_REGNUM
; i
<= FPR15_REGNUM
; i
++)
13936 call_used_regs
[i
] = 0;
13940 call_used_regs
[FPR4_REGNUM
] = 0;
13941 call_used_regs
[FPR6_REGNUM
] = 0;
13944 if (TARGET_SOFT_FLOAT
)
13946 for (i
= FPR0_REGNUM
; i
<= FPR15_REGNUM
; i
++)
13950 /* Disable v16 - v31 for non-vector target. */
13953 for (i
= VR16_REGNUM
; i
<= VR31_REGNUM
; i
++)
13954 fixed_regs
[i
] = call_used_regs
[i
] = 1;
13958 /* Corresponding function to eh_return expander. */
13960 static GTY(()) rtx s390_tpf_eh_return_symbol
;
13962 s390_emit_tpf_eh_return (rtx target
)
13967 if (!s390_tpf_eh_return_symbol
)
13969 s390_tpf_eh_return_symbol
= gen_rtx_SYMBOL_REF (Pmode
, "__tpf_eh_return");
13970 SYMBOL_REF_FLAGS (s390_tpf_eh_return_symbol
) |= SYMBOL_FLAG_FUNCTION
;
13973 reg
= gen_rtx_REG (Pmode
, 2);
13974 orig_ra
= gen_rtx_REG (Pmode
, 3);
13976 emit_move_insn (reg
, target
);
13977 emit_move_insn (orig_ra
, get_hard_reg_initial_val (Pmode
, RETURN_REGNUM
));
13978 insn
= s390_emit_call (s390_tpf_eh_return_symbol
, NULL_RTX
, reg
,
13979 gen_rtx_REG (Pmode
, RETURN_REGNUM
));
13980 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), reg
);
13981 use_reg (&CALL_INSN_FUNCTION_USAGE (insn
), orig_ra
);
13983 emit_move_insn (EH_RETURN_HANDLER_RTX
, reg
);
13986 /* Rework the prologue/epilogue to avoid saving/restoring
13987 registers unnecessarily. */
13990 s390_optimize_prologue (void)
13992 rtx_insn
*insn
, *new_insn
, *next_insn
;
13994 /* Do a final recompute of the frame-related data. */
13995 s390_optimize_register_info ();
13997 /* If all special registers are in fact used, there's nothing we
13998 can do, so no point in walking the insn list. */
14000 if (cfun_frame_layout
.first_save_gpr
<= BASE_REGNUM
14001 && cfun_frame_layout
.last_save_gpr
>= BASE_REGNUM
)
14004 /* Search for prologue/epilogue insns and replace them. */
14005 for (insn
= get_insns (); insn
; insn
= next_insn
)
14007 int first
, last
, off
;
14008 rtx set
, base
, offset
;
14011 next_insn
= NEXT_INSN (insn
);
14013 if (! NONJUMP_INSN_P (insn
) || ! RTX_FRAME_RELATED_P (insn
))
14016 pat
= PATTERN (insn
);
14018 /* Remove ldgr/lgdr instructions used for saving and restore
14019 GPRs if possible. */
14024 if (INSN_CODE (insn
) == CODE_FOR_stack_restore_from_fpr
)
14025 tmp_pat
= XVECEXP (pat
, 0, 0);
14027 if (GET_CODE (tmp_pat
) == SET
14028 && GET_MODE (SET_SRC (tmp_pat
)) == DImode
14029 && REG_P (SET_SRC (tmp_pat
))
14030 && REG_P (SET_DEST (tmp_pat
)))
14032 int src_regno
= REGNO (SET_SRC (tmp_pat
));
14033 int dest_regno
= REGNO (SET_DEST (tmp_pat
));
14037 if (!((GENERAL_REGNO_P (src_regno
)
14038 && FP_REGNO_P (dest_regno
))
14039 || (FP_REGNO_P (src_regno
)
14040 && GENERAL_REGNO_P (dest_regno
))))
14043 gpr_regno
= GENERAL_REGNO_P (src_regno
) ? src_regno
: dest_regno
;
14044 fpr_regno
= FP_REGNO_P (src_regno
) ? src_regno
: dest_regno
;
14046 /* GPR must be call-saved, FPR must be call-clobbered. */
14047 if (!call_used_regs
[fpr_regno
]
14048 || call_used_regs
[gpr_regno
])
14051 /* It must not happen that what we once saved in an FPR now
14052 needs a stack slot. */
14053 gcc_assert (cfun_gpr_save_slot (gpr_regno
) != SAVE_SLOT_STACK
);
14055 if (cfun_gpr_save_slot (gpr_regno
) == SAVE_SLOT_NONE
)
14057 remove_insn (insn
);
14063 if (GET_CODE (pat
) == PARALLEL
14064 && store_multiple_operation (pat
, VOIDmode
))
14066 set
= XVECEXP (pat
, 0, 0);
14067 first
= REGNO (SET_SRC (set
));
14068 last
= first
+ XVECLEN (pat
, 0) - 1;
14069 offset
= const0_rtx
;
14070 base
= eliminate_constant_term (XEXP (SET_DEST (set
), 0), &offset
);
14071 off
= INTVAL (offset
);
14073 if (GET_CODE (base
) != REG
|| off
< 0)
14075 if (cfun_frame_layout
.first_save_gpr
!= -1
14076 && (cfun_frame_layout
.first_save_gpr
< first
14077 || cfun_frame_layout
.last_save_gpr
> last
))
14079 if (REGNO (base
) != STACK_POINTER_REGNUM
14080 && REGNO (base
) != HARD_FRAME_POINTER_REGNUM
)
14082 if (first
> BASE_REGNUM
|| last
< BASE_REGNUM
)
14085 if (cfun_frame_layout
.first_save_gpr
!= -1)
14087 rtx s_pat
= save_gprs (base
,
14088 off
+ (cfun_frame_layout
.first_save_gpr
14089 - first
) * UNITS_PER_LONG
,
14090 cfun_frame_layout
.first_save_gpr
,
14091 cfun_frame_layout
.last_save_gpr
);
14092 new_insn
= emit_insn_before (s_pat
, insn
);
14093 INSN_ADDRESSES_NEW (new_insn
, -1);
14096 remove_insn (insn
);
14100 if (cfun_frame_layout
.first_save_gpr
== -1
14101 && GET_CODE (pat
) == SET
14102 && GENERAL_REG_P (SET_SRC (pat
))
14103 && GET_CODE (SET_DEST (pat
)) == MEM
)
14106 first
= REGNO (SET_SRC (set
));
14107 offset
= const0_rtx
;
14108 base
= eliminate_constant_term (XEXP (SET_DEST (set
), 0), &offset
);
14109 off
= INTVAL (offset
);
14111 if (GET_CODE (base
) != REG
|| off
< 0)
14113 if (REGNO (base
) != STACK_POINTER_REGNUM
14114 && REGNO (base
) != HARD_FRAME_POINTER_REGNUM
)
14117 remove_insn (insn
);
14121 if (GET_CODE (pat
) == PARALLEL
14122 && load_multiple_operation (pat
, VOIDmode
))
14124 set
= XVECEXP (pat
, 0, 0);
14125 first
= REGNO (SET_DEST (set
));
14126 last
= first
+ XVECLEN (pat
, 0) - 1;
14127 offset
= const0_rtx
;
14128 base
= eliminate_constant_term (XEXP (SET_SRC (set
), 0), &offset
);
14129 off
= INTVAL (offset
);
14131 if (GET_CODE (base
) != REG
|| off
< 0)
14134 if (cfun_frame_layout
.first_restore_gpr
!= -1
14135 && (cfun_frame_layout
.first_restore_gpr
< first
14136 || cfun_frame_layout
.last_restore_gpr
> last
))
14138 if (REGNO (base
) != STACK_POINTER_REGNUM
14139 && REGNO (base
) != HARD_FRAME_POINTER_REGNUM
)
14141 if (first
> BASE_REGNUM
|| last
< BASE_REGNUM
)
14144 if (cfun_frame_layout
.first_restore_gpr
!= -1)
14146 rtx rpat
= restore_gprs (base
,
14147 off
+ (cfun_frame_layout
.first_restore_gpr
14148 - first
) * UNITS_PER_LONG
,
14149 cfun_frame_layout
.first_restore_gpr
,
14150 cfun_frame_layout
.last_restore_gpr
);
14152 /* Remove REG_CFA_RESTOREs for registers that we no
14153 longer need to save. */
14154 REG_NOTES (rpat
) = REG_NOTES (insn
);
14155 for (rtx
*ptr
= ®_NOTES (rpat
); *ptr
; )
14156 if (REG_NOTE_KIND (*ptr
) == REG_CFA_RESTORE
14157 && ((int) REGNO (XEXP (*ptr
, 0))
14158 < cfun_frame_layout
.first_restore_gpr
))
14159 *ptr
= XEXP (*ptr
, 1);
14161 ptr
= &XEXP (*ptr
, 1);
14162 new_insn
= emit_insn_before (rpat
, insn
);
14163 RTX_FRAME_RELATED_P (new_insn
) = 1;
14164 INSN_ADDRESSES_NEW (new_insn
, -1);
14167 remove_insn (insn
);
14171 if (cfun_frame_layout
.first_restore_gpr
== -1
14172 && GET_CODE (pat
) == SET
14173 && GENERAL_REG_P (SET_DEST (pat
))
14174 && GET_CODE (SET_SRC (pat
)) == MEM
)
14177 first
= REGNO (SET_DEST (set
));
14178 offset
= const0_rtx
;
14179 base
= eliminate_constant_term (XEXP (SET_SRC (set
), 0), &offset
);
14180 off
= INTVAL (offset
);
14182 if (GET_CODE (base
) != REG
|| off
< 0)
14185 if (REGNO (base
) != STACK_POINTER_REGNUM
14186 && REGNO (base
) != HARD_FRAME_POINTER_REGNUM
)
14189 remove_insn (insn
);
14195 /* On z10 and later the dynamic branch prediction must see the
14196 backward jump within a certain windows. If not it falls back to
14197 the static prediction. This function rearranges the loop backward
14198 branch in a way which makes the static prediction always correct.
14199 The function returns true if it added an instruction. */
14201 s390_fix_long_loop_prediction (rtx_insn
*insn
)
14203 rtx set
= single_set (insn
);
14204 rtx code_label
, label_ref
;
14205 rtx_insn
*uncond_jump
;
14206 rtx_insn
*cur_insn
;
14210 /* This will exclude branch on count and branch on index patterns
14211 since these are correctly statically predicted.
14213 The additional check for a PARALLEL is required here since
14214 single_set might be != NULL for PARALLELs where the set of the
14215 iteration variable is dead. */
14216 if (GET_CODE (PATTERN (insn
)) == PARALLEL
14218 || SET_DEST (set
) != pc_rtx
14219 || GET_CODE (SET_SRC(set
)) != IF_THEN_ELSE
)
14222 /* Skip conditional returns. */
14223 if (ANY_RETURN_P (XEXP (SET_SRC (set
), 1))
14224 && XEXP (SET_SRC (set
), 2) == pc_rtx
)
14227 label_ref
= (GET_CODE (XEXP (SET_SRC (set
), 1)) == LABEL_REF
?
14228 XEXP (SET_SRC (set
), 1) : XEXP (SET_SRC (set
), 2));
14230 gcc_assert (GET_CODE (label_ref
) == LABEL_REF
);
14232 code_label
= XEXP (label_ref
, 0);
14234 if (INSN_ADDRESSES (INSN_UID (code_label
)) == -1
14235 || INSN_ADDRESSES (INSN_UID (insn
)) == -1
14236 || (INSN_ADDRESSES (INSN_UID (insn
))
14237 - INSN_ADDRESSES (INSN_UID (code_label
)) < PREDICT_DISTANCE
))
14240 for (distance
= 0, cur_insn
= PREV_INSN (insn
);
14241 distance
< PREDICT_DISTANCE
- 6;
14242 distance
+= get_attr_length (cur_insn
), cur_insn
= PREV_INSN (cur_insn
))
14243 if (!cur_insn
|| JUMP_P (cur_insn
) || LABEL_P (cur_insn
))
14246 rtx_code_label
*new_label
= gen_label_rtx ();
14247 uncond_jump
= emit_jump_insn_after (
14248 gen_rtx_SET (pc_rtx
,
14249 gen_rtx_LABEL_REF (VOIDmode
, code_label
)),
14251 emit_label_after (new_label
, uncond_jump
);
14253 tmp
= XEXP (SET_SRC (set
), 1);
14254 XEXP (SET_SRC (set
), 1) = XEXP (SET_SRC (set
), 2);
14255 XEXP (SET_SRC (set
), 2) = tmp
;
14256 INSN_CODE (insn
) = -1;
14258 XEXP (label_ref
, 0) = new_label
;
14259 JUMP_LABEL (insn
) = new_label
;
14260 JUMP_LABEL (uncond_jump
) = code_label
;
14265 /* Returns 1 if INSN reads the value of REG for purposes not related
14266 to addressing of memory, and 0 otherwise. */
14268 s390_non_addr_reg_read_p (rtx reg
, rtx_insn
*insn
)
14270 return reg_referenced_p (reg
, PATTERN (insn
))
14271 && !reg_used_in_mem_p (REGNO (reg
), PATTERN (insn
));
14274 /* Starting from INSN find_cond_jump looks downwards in the insn
14275 stream for a single jump insn which is the last user of the
14276 condition code set in INSN. */
14278 find_cond_jump (rtx_insn
*insn
)
14280 for (; insn
; insn
= NEXT_INSN (insn
))
14284 if (LABEL_P (insn
))
14287 if (!JUMP_P (insn
))
14289 if (reg_mentioned_p (gen_rtx_REG (CCmode
, CC_REGNUM
), insn
))
14294 /* This will be triggered by a return. */
14295 if (GET_CODE (PATTERN (insn
)) != SET
)
14298 gcc_assert (SET_DEST (PATTERN (insn
)) == pc_rtx
);
14299 ite
= SET_SRC (PATTERN (insn
));
14301 if (GET_CODE (ite
) != IF_THEN_ELSE
)
14304 cc
= XEXP (XEXP (ite
, 0), 0);
14305 if (!REG_P (cc
) || !CC_REGNO_P (REGNO (cc
)))
14308 if (find_reg_note (insn
, REG_DEAD
, cc
))
14316 /* Swap the condition in COND and the operands in OP0 and OP1 so that
14317 the semantics does not change. If NULL_RTX is passed as COND the
14318 function tries to find the conditional jump starting with INSN. */
14320 s390_swap_cmp (rtx cond
, rtx
*op0
, rtx
*op1
, rtx_insn
*insn
)
14324 if (cond
== NULL_RTX
)
14326 rtx_insn
*jump
= find_cond_jump (NEXT_INSN (insn
));
14327 rtx set
= jump
? single_set (jump
) : NULL_RTX
;
14329 if (set
== NULL_RTX
)
14332 cond
= XEXP (SET_SRC (set
), 0);
14337 PUT_CODE (cond
, swap_condition (GET_CODE (cond
)));
14340 /* On z10, instructions of the compare-and-branch family have the
14341 property to access the register occurring as second operand with
14342 its bits complemented. If such a compare is grouped with a second
14343 instruction that accesses the same register non-complemented, and
14344 if that register's value is delivered via a bypass, then the
14345 pipeline recycles, thereby causing significant performance decline.
14346 This function locates such situations and exchanges the two
14347 operands of the compare. The function return true whenever it
14350 s390_z10_optimize_cmp (rtx_insn
*insn
)
14352 rtx_insn
*prev_insn
, *next_insn
;
14353 bool insn_added_p
= false;
14354 rtx cond
, *op0
, *op1
;
14356 if (GET_CODE (PATTERN (insn
)) == PARALLEL
)
14358 /* Handle compare and branch and branch on count
14360 rtx pattern
= single_set (insn
);
14363 || SET_DEST (pattern
) != pc_rtx
14364 || GET_CODE (SET_SRC (pattern
)) != IF_THEN_ELSE
)
14367 cond
= XEXP (SET_SRC (pattern
), 0);
14368 op0
= &XEXP (cond
, 0);
14369 op1
= &XEXP (cond
, 1);
14371 else if (GET_CODE (PATTERN (insn
)) == SET
)
14375 /* Handle normal compare instructions. */
14376 src
= SET_SRC (PATTERN (insn
));
14377 dest
= SET_DEST (PATTERN (insn
));
14380 || !CC_REGNO_P (REGNO (dest
))
14381 || GET_CODE (src
) != COMPARE
)
14384 /* s390_swap_cmp will try to find the conditional
14385 jump when passing NULL_RTX as condition. */
14387 op0
= &XEXP (src
, 0);
14388 op1
= &XEXP (src
, 1);
14393 if (!REG_P (*op0
) || !REG_P (*op1
))
14396 if (GET_MODE_CLASS (GET_MODE (*op0
)) != MODE_INT
)
14399 /* Swap the COMPARE arguments and its mask if there is a
14400 conflicting access in the previous insn. */
14401 prev_insn
= prev_active_insn (insn
);
14402 if (prev_insn
!= NULL_RTX
&& INSN_P (prev_insn
)
14403 && reg_referenced_p (*op1
, PATTERN (prev_insn
)))
14404 s390_swap_cmp (cond
, op0
, op1
, insn
);
14406 /* Check if there is a conflict with the next insn. If there
14407 was no conflict with the previous insn, then swap the
14408 COMPARE arguments and its mask. If we already swapped
14409 the operands, or if swapping them would cause a conflict
14410 with the previous insn, issue a NOP after the COMPARE in
14411 order to separate the two instuctions. */
14412 next_insn
= next_active_insn (insn
);
14413 if (next_insn
!= NULL_RTX
&& INSN_P (next_insn
)
14414 && s390_non_addr_reg_read_p (*op1
, next_insn
))
14416 if (prev_insn
!= NULL_RTX
&& INSN_P (prev_insn
)
14417 && s390_non_addr_reg_read_p (*op0
, prev_insn
))
14419 if (REGNO (*op1
) == 0)
14420 emit_insn_after (gen_nop_lr1 (), insn
);
14422 emit_insn_after (gen_nop_lr0 (), insn
);
14423 insn_added_p
= true;
14426 s390_swap_cmp (cond
, op0
, op1
, insn
);
14428 return insn_added_p
;
14431 /* Number of INSNs to be scanned backward in the last BB of the loop
14432 and forward in the first BB of the loop. This usually should be a
14433 bit more than the number of INSNs which could go into one
14435 #define S390_OSC_SCAN_INSN_NUM 5
14437 /* Scan LOOP for static OSC collisions and return true if a osc_break
14438 should be issued for this loop. */
14440 s390_adjust_loop_scan_osc (struct loop
* loop
)
14443 HARD_REG_SET modregs
, newregs
;
14444 rtx_insn
*insn
, *store_insn
= NULL
;
14446 struct s390_address addr_store
, addr_load
;
14447 subrtx_iterator::array_type array
;
14450 CLEAR_HARD_REG_SET (modregs
);
14453 FOR_BB_INSNS_REVERSE (loop
->latch
, insn
)
14455 if (!INSN_P (insn
) || INSN_CODE (insn
) <= 0)
14459 if (insn_count
> S390_OSC_SCAN_INSN_NUM
)
14462 find_all_hard_reg_sets (insn
, &newregs
, true);
14463 modregs
|= newregs
;
14465 set
= single_set (insn
);
14469 if (MEM_P (SET_DEST (set
))
14470 && s390_decompose_address (XEXP (SET_DEST (set
), 0), &addr_store
))
14477 if (store_insn
== NULL_RTX
)
14481 FOR_BB_INSNS (loop
->header
, insn
)
14483 if (!INSN_P (insn
) || INSN_CODE (insn
) <= 0)
14486 if (insn
== store_insn
)
14490 if (insn_count
> S390_OSC_SCAN_INSN_NUM
)
14493 find_all_hard_reg_sets (insn
, &newregs
, true);
14494 modregs
|= newregs
;
14496 set
= single_set (insn
);
14500 /* An intermediate store disrupts static OSC checking
14502 if (MEM_P (SET_DEST (set
))
14503 && s390_decompose_address (XEXP (SET_DEST (set
), 0), NULL
))
14506 FOR_EACH_SUBRTX (iter
, array
, SET_SRC (set
), NONCONST
)
14508 && s390_decompose_address (XEXP (*iter
, 0), &addr_load
)
14509 && rtx_equal_p (addr_load
.base
, addr_store
.base
)
14510 && rtx_equal_p (addr_load
.indx
, addr_store
.indx
)
14511 && rtx_equal_p (addr_load
.disp
, addr_store
.disp
))
14513 if ((addr_load
.base
!= NULL_RTX
14514 && TEST_HARD_REG_BIT (modregs
, REGNO (addr_load
.base
)))
14515 || (addr_load
.indx
!= NULL_RTX
14516 && TEST_HARD_REG_BIT (modregs
, REGNO (addr_load
.indx
))))
14523 /* Look for adjustments which can be done on simple innermost
14526 s390_adjust_loops ()
14529 compute_bb_for_insn ();
14531 /* Find the loops. */
14532 loop_optimizer_init (AVOID_CFG_MODIFICATIONS
);
14534 for (auto loop
: loops_list (cfun
, LI_ONLY_INNERMOST
))
14538 flow_loop_dump (loop
, dump_file
, NULL
, 0);
14539 fprintf (dump_file
, ";; OSC loop scan Loop: ");
14541 if (loop
->latch
== NULL
14542 || pc_set (BB_END (loop
->latch
)) == NULL_RTX
14543 || !s390_adjust_loop_scan_osc (loop
))
14547 if (loop
->latch
== NULL
)
14548 fprintf (dump_file
, " muliple backward jumps\n");
14551 fprintf (dump_file
, " header insn: %d latch insn: %d ",
14552 INSN_UID (BB_HEAD (loop
->header
)),
14553 INSN_UID (BB_END (loop
->latch
)));
14554 if (pc_set (BB_END (loop
->latch
)) == NULL_RTX
)
14555 fprintf (dump_file
, " loop does not end with jump\n");
14557 fprintf (dump_file
, " not instrumented\n");
14563 rtx_insn
*new_insn
;
14566 fprintf (dump_file
, " adding OSC break insn: ");
14567 new_insn
= emit_insn_before (gen_osc_break (),
14568 BB_END (loop
->latch
));
14569 INSN_ADDRESSES_NEW (new_insn
, -1);
14573 loop_optimizer_finalize ();
14575 df_finish_pass (false);
14578 /* Perform machine-dependent processing. */
14583 struct constant_pool
*pool
;
14585 int hw_before
, hw_after
;
14587 if (s390_tune
== PROCESSOR_2964_Z13
)
14588 s390_adjust_loops ();
14590 /* Make sure all splits have been performed; splits after
14591 machine_dependent_reorg might confuse insn length counts. */
14592 split_all_insns_noflow ();
14594 /* Install the main literal pool and the associated base
14595 register load insns. The literal pool might be > 4096 bytes in
14596 size, so that some of its elements cannot be directly accessed.
14598 To fix this, we split the single literal pool into multiple
14599 pool chunks, reloading the pool base register at various
14600 points throughout the function to ensure it always points to
14601 the pool chunk the following code expects. */
14603 /* Collect the literal pool. */
14604 pool
= s390_mainpool_start ();
14607 /* Finish up literal pool related changes. */
14608 s390_mainpool_finish (pool
);
14612 /* If literal pool overflowed, chunkify it. */
14613 pool
= s390_chunkify_start ();
14614 s390_chunkify_finish (pool
);
14617 /* Generate out-of-pool execute target insns. */
14618 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
14623 label
= s390_execute_label (insn
);
14627 gcc_assert (label
!= const0_rtx
);
14629 target
= emit_label (XEXP (label
, 0));
14630 INSN_ADDRESSES_NEW (target
, -1);
14634 target
= emit_jump_insn (s390_execute_target (insn
));
14635 /* This is important in order to keep a table jump
14636 pointing at the jump table label. Only this makes it
14637 being recognized as table jump. */
14638 JUMP_LABEL (target
) = JUMP_LABEL (insn
);
14641 target
= emit_insn (s390_execute_target (insn
));
14642 INSN_ADDRESSES_NEW (target
, -1);
14645 /* Try to optimize prologue and epilogue further. */
14646 s390_optimize_prologue ();
14648 /* Walk over the insns and do some >=z10 specific changes. */
14649 if (s390_tune
>= PROCESSOR_2097_Z10
)
14652 bool insn_added_p
= false;
14654 /* The insn lengths and addresses have to be up to date for the
14655 following manipulations. */
14656 shorten_branches (get_insns ());
14658 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
14660 if (!INSN_P (insn
) || INSN_CODE (insn
) <= 0)
14664 insn_added_p
|= s390_fix_long_loop_prediction (insn
);
14666 if ((GET_CODE (PATTERN (insn
)) == PARALLEL
14667 || GET_CODE (PATTERN (insn
)) == SET
)
14668 && s390_tune
== PROCESSOR_2097_Z10
)
14669 insn_added_p
|= s390_z10_optimize_cmp (insn
);
14672 /* Adjust branches if we added new instructions. */
14674 shorten_branches (get_insns ());
14677 s390_function_num_hotpatch_hw (current_function_decl
, &hw_before
, &hw_after
);
14682 /* Insert NOPs for hotpatching. */
14683 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
14685 1. inside the area covered by debug information to allow setting
14686 breakpoints at the NOPs,
14687 2. before any insn which results in an asm instruction,
14688 3. before in-function labels to avoid jumping to the NOPs, for
14689 example as part of a loop,
14690 4. before any barrier in case the function is completely empty
14691 (__builtin_unreachable ()) and has neither internal labels nor
14694 if (active_insn_p (insn
) || BARRIER_P (insn
) || LABEL_P (insn
))
14696 /* Output a series of NOPs before the first active insn. */
14697 while (insn
&& hw_after
> 0)
14701 emit_insn_before (gen_nop_6_byte (), insn
);
14704 else if (hw_after
>= 2)
14706 emit_insn_before (gen_nop_4_byte (), insn
);
14711 emit_insn_before (gen_nop_2_byte (), insn
);
14718 /* Return true if INSN is a fp load insn writing register REGNO. */
14720 s390_fpload_toreg (rtx_insn
*insn
, unsigned int regno
)
14723 enum attr_type flag
= s390_safe_attr_type (insn
);
14725 if (flag
!= TYPE_FLOADSF
&& flag
!= TYPE_FLOADDF
)
14728 set
= single_set (insn
);
14730 if (set
== NULL_RTX
)
14733 if (!REG_P (SET_DEST (set
)) || !MEM_P (SET_SRC (set
)))
14736 if (REGNO (SET_DEST (set
)) != regno
)
14742 /* This value describes the distance to be avoided between an
14743 arithmetic fp instruction and an fp load writing the same register.
14744 Z10_EARLYLOAD_DISTANCE - 1 as well as Z10_EARLYLOAD_DISTANCE + 1 is
14745 fine but the exact value has to be avoided. Otherwise the FP
14746 pipeline will throw an exception causing a major penalty. */
14747 #define Z10_EARLYLOAD_DISTANCE 7
14749 /* Rearrange the ready list in order to avoid the situation described
14750 for Z10_EARLYLOAD_DISTANCE. A problematic load instruction is
14751 moved to the very end of the ready list. */
14753 s390_z10_prevent_earlyload_conflicts (rtx_insn
**ready
, int *nready_p
)
14755 unsigned int regno
;
14756 int nready
= *nready_p
;
14761 enum attr_type flag
;
14764 /* Skip DISTANCE - 1 active insns. */
14765 for (insn
= last_scheduled_insn
, distance
= Z10_EARLYLOAD_DISTANCE
- 1;
14766 distance
> 0 && insn
!= NULL_RTX
;
14767 distance
--, insn
= prev_active_insn (insn
))
14768 if (CALL_P (insn
) || JUMP_P (insn
))
14771 if (insn
== NULL_RTX
)
14774 set
= single_set (insn
);
14776 if (set
== NULL_RTX
|| !REG_P (SET_DEST (set
))
14777 || GET_MODE_CLASS (GET_MODE (SET_DEST (set
))) != MODE_FLOAT
)
14780 flag
= s390_safe_attr_type (insn
);
14782 if (flag
== TYPE_FLOADSF
|| flag
== TYPE_FLOADDF
)
14785 regno
= REGNO (SET_DEST (set
));
14788 while (!s390_fpload_toreg (ready
[i
], regno
) && i
> 0)
14795 memmove (&ready
[1], &ready
[0], sizeof (rtx_insn
*) * i
);
14799 /* Returns TRUE if BB is entered via a fallthru edge and all other
14800 incoming edges are less than likely. */
14802 s390_bb_fallthru_entry_likely (basic_block bb
)
14804 edge e
, fallthru_edge
;
14810 fallthru_edge
= find_fallthru_edge (bb
->preds
);
14811 if (!fallthru_edge
)
14814 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
14815 if (e
!= fallthru_edge
14816 && e
->probability
>= profile_probability::likely ())
14822 struct s390_sched_state
14824 /* Number of insns in the group. */
14826 /* Execution side of the group. */
14828 /* Group can only hold two insns. */
14830 } s390_sched_state
;
14832 static struct s390_sched_state sched_state
= {0, 1, false};
14834 #define S390_SCHED_ATTR_MASK_CRACKED 0x1
14835 #define S390_SCHED_ATTR_MASK_EXPANDED 0x2
14836 #define S390_SCHED_ATTR_MASK_ENDGROUP 0x4
14837 #define S390_SCHED_ATTR_MASK_GROUPALONE 0x8
14838 #define S390_SCHED_ATTR_MASK_GROUPOFTWO 0x10
14840 static unsigned int
14841 s390_get_sched_attrmask (rtx_insn
*insn
)
14843 unsigned int mask
= 0;
14847 case PROCESSOR_2827_ZEC12
:
14848 if (get_attr_zEC12_cracked (insn
))
14849 mask
|= S390_SCHED_ATTR_MASK_CRACKED
;
14850 if (get_attr_zEC12_expanded (insn
))
14851 mask
|= S390_SCHED_ATTR_MASK_EXPANDED
;
14852 if (get_attr_zEC12_endgroup (insn
))
14853 mask
|= S390_SCHED_ATTR_MASK_ENDGROUP
;
14854 if (get_attr_zEC12_groupalone (insn
))
14855 mask
|= S390_SCHED_ATTR_MASK_GROUPALONE
;
14857 case PROCESSOR_2964_Z13
:
14858 if (get_attr_z13_cracked (insn
))
14859 mask
|= S390_SCHED_ATTR_MASK_CRACKED
;
14860 if (get_attr_z13_expanded (insn
))
14861 mask
|= S390_SCHED_ATTR_MASK_EXPANDED
;
14862 if (get_attr_z13_endgroup (insn
))
14863 mask
|= S390_SCHED_ATTR_MASK_ENDGROUP
;
14864 if (get_attr_z13_groupalone (insn
))
14865 mask
|= S390_SCHED_ATTR_MASK_GROUPALONE
;
14866 if (get_attr_z13_groupoftwo (insn
))
14867 mask
|= S390_SCHED_ATTR_MASK_GROUPOFTWO
;
14869 case PROCESSOR_3906_Z14
:
14870 if (get_attr_z14_cracked (insn
))
14871 mask
|= S390_SCHED_ATTR_MASK_CRACKED
;
14872 if (get_attr_z14_expanded (insn
))
14873 mask
|= S390_SCHED_ATTR_MASK_EXPANDED
;
14874 if (get_attr_z14_endgroup (insn
))
14875 mask
|= S390_SCHED_ATTR_MASK_ENDGROUP
;
14876 if (get_attr_z14_groupalone (insn
))
14877 mask
|= S390_SCHED_ATTR_MASK_GROUPALONE
;
14878 if (get_attr_z14_groupoftwo (insn
))
14879 mask
|= S390_SCHED_ATTR_MASK_GROUPOFTWO
;
14881 case PROCESSOR_8561_Z15
:
14882 case PROCESSOR_ARCH14
:
14883 if (get_attr_z15_cracked (insn
))
14884 mask
|= S390_SCHED_ATTR_MASK_CRACKED
;
14885 if (get_attr_z15_expanded (insn
))
14886 mask
|= S390_SCHED_ATTR_MASK_EXPANDED
;
14887 if (get_attr_z15_endgroup (insn
))
14888 mask
|= S390_SCHED_ATTR_MASK_ENDGROUP
;
14889 if (get_attr_z15_groupalone (insn
))
14890 mask
|= S390_SCHED_ATTR_MASK_GROUPALONE
;
14891 if (get_attr_z15_groupoftwo (insn
))
14892 mask
|= S390_SCHED_ATTR_MASK_GROUPOFTWO
;
14895 gcc_unreachable ();
14900 static unsigned int
14901 s390_get_unit_mask (rtx_insn
*insn
, int *units
)
14903 unsigned int mask
= 0;
14907 case PROCESSOR_2964_Z13
:
14909 if (get_attr_z13_unit_lsu (insn
))
14911 if (get_attr_z13_unit_fxa (insn
))
14913 if (get_attr_z13_unit_fxb (insn
))
14915 if (get_attr_z13_unit_vfu (insn
))
14918 case PROCESSOR_3906_Z14
:
14920 if (get_attr_z14_unit_lsu (insn
))
14922 if (get_attr_z14_unit_fxa (insn
))
14924 if (get_attr_z14_unit_fxb (insn
))
14926 if (get_attr_z14_unit_vfu (insn
))
14929 case PROCESSOR_8561_Z15
:
14930 case PROCESSOR_ARCH14
:
14932 if (get_attr_z15_unit_lsu (insn
))
14934 if (get_attr_z15_unit_fxa (insn
))
14936 if (get_attr_z15_unit_fxb (insn
))
14938 if (get_attr_z15_unit_vfu (insn
))
14942 gcc_unreachable ();
14948 s390_is_fpd (rtx_insn
*insn
)
14950 if (insn
== NULL_RTX
)
14953 return get_attr_z13_unit_fpd (insn
) || get_attr_z14_unit_fpd (insn
)
14954 || get_attr_z15_unit_fpd (insn
);
14958 s390_is_fxd (rtx_insn
*insn
)
14960 if (insn
== NULL_RTX
)
14963 return get_attr_z13_unit_fxd (insn
) || get_attr_z14_unit_fxd (insn
)
14964 || get_attr_z15_unit_fxd (insn
);
14967 /* Returns TRUE if INSN is a long-running instruction. */
14969 s390_is_longrunning (rtx_insn
*insn
)
14971 if (insn
== NULL_RTX
)
14974 return s390_is_fxd (insn
) || s390_is_fpd (insn
);
14978 /* Return the scheduling score for INSN. The higher the score the
14979 better. The score is calculated from the OOO scheduling attributes
14980 of INSN and the scheduling state sched_state. */
14982 s390_sched_score (rtx_insn
*insn
)
14984 unsigned int mask
= s390_get_sched_attrmask (insn
);
14987 switch (sched_state
.group_state
)
14990 /* Try to put insns into the first slot which would otherwise
14992 if ((mask
& S390_SCHED_ATTR_MASK_CRACKED
) != 0
14993 || (mask
& S390_SCHED_ATTR_MASK_EXPANDED
) != 0)
14995 if ((mask
& S390_SCHED_ATTR_MASK_GROUPALONE
) != 0)
14999 /* Prefer not cracked insns while trying to put together a
15001 if ((mask
& S390_SCHED_ATTR_MASK_CRACKED
) == 0
15002 && (mask
& S390_SCHED_ATTR_MASK_EXPANDED
) == 0
15003 && (mask
& S390_SCHED_ATTR_MASK_GROUPALONE
) == 0)
15005 if ((mask
& S390_SCHED_ATTR_MASK_ENDGROUP
) == 0)
15007 /* If we are in a group of two already, try to schedule another
15008 group-of-two insn to avoid shortening another group. */
15009 if (sched_state
.group_of_two
15010 && (mask
& S390_SCHED_ATTR_MASK_GROUPOFTWO
) != 0)
15014 /* Prefer not cracked insns while trying to put together a
15016 if ((mask
& S390_SCHED_ATTR_MASK_CRACKED
) == 0
15017 && (mask
& S390_SCHED_ATTR_MASK_EXPANDED
) == 0
15018 && (mask
& S390_SCHED_ATTR_MASK_GROUPALONE
) == 0)
15020 /* Prefer endgroup insns in the last slot. */
15021 if ((mask
& S390_SCHED_ATTR_MASK_ENDGROUP
) != 0)
15023 /* Try to avoid group-of-two insns in the last slot as they will
15024 shorten this group as well as the next one. */
15025 if ((mask
& S390_SCHED_ATTR_MASK_GROUPOFTWO
) != 0)
15026 score
= MAX (0, score
- 15);
15030 if (s390_tune
>= PROCESSOR_2964_Z13
)
15033 unsigned unit_mask
, m
= 1;
15035 unit_mask
= s390_get_unit_mask (insn
, &units
);
15036 gcc_assert (units
<= MAX_SCHED_UNITS
);
15038 /* Add a score in range 0..MAX_SCHED_MIX_SCORE depending on how long
15039 ago the last insn of this unit type got scheduled. This is
15040 supposed to help providing a proper instruction mix to the
15042 for (i
= 0; i
< units
; i
++, m
<<= 1)
15044 score
+= (last_scheduled_unit_distance
[i
][sched_state
.side
]
15045 * MAX_SCHED_MIX_SCORE
/ MAX_SCHED_MIX_DISTANCE
);
15047 int other_side
= 1 - sched_state
.side
;
15049 /* Try to delay long-running insns when side is busy. */
15050 if (s390_is_longrunning (insn
))
15052 if (s390_is_fxd (insn
))
15054 if (fxd_longrunning
[sched_state
.side
]
15055 && fxd_longrunning
[other_side
]
15056 <= fxd_longrunning
[sched_state
.side
])
15057 score
= MAX (0, score
- 10);
15059 else if (fxd_longrunning
[other_side
]
15060 >= fxd_longrunning
[sched_state
.side
])
15064 if (s390_is_fpd (insn
))
15066 if (fpd_longrunning
[sched_state
.side
]
15067 && fpd_longrunning
[other_side
]
15068 <= fpd_longrunning
[sched_state
.side
])
15069 score
= MAX (0, score
- 10);
15071 else if (fpd_longrunning
[other_side
]
15072 >= fpd_longrunning
[sched_state
.side
])
15081 /* This function is called via hook TARGET_SCHED_REORDER before
15082 issuing one insn from list READY which contains *NREADYP entries.
15083 For target z10 it reorders load instructions to avoid early load
15084 conflicts in the floating point pipeline */
15086 s390_sched_reorder (FILE *file
, int verbose
,
15087 rtx_insn
**ready
, int *nreadyp
, int clock ATTRIBUTE_UNUSED
)
15089 if (s390_tune
== PROCESSOR_2097_Z10
15090 && reload_completed
15092 s390_z10_prevent_earlyload_conflicts (ready
, nreadyp
);
15094 if (s390_tune
>= PROCESSOR_2827_ZEC12
15095 && reload_completed
15099 int last_index
= *nreadyp
- 1;
15100 int max_index
= -1;
15101 int max_score
= -1;
15104 /* Just move the insn with the highest score to the top (the
15105 end) of the list. A full sort is not needed since a conflict
15106 in the hazard recognition cannot happen. So the top insn in
15107 the ready list will always be taken. */
15108 for (i
= last_index
; i
>= 0; i
--)
15112 if (recog_memoized (ready
[i
]) < 0)
15115 score
= s390_sched_score (ready
[i
]);
15116 if (score
> max_score
)
15123 if (max_index
!= -1)
15125 if (max_index
!= last_index
)
15127 tmp
= ready
[max_index
];
15128 ready
[max_index
] = ready
[last_index
];
15129 ready
[last_index
] = tmp
;
15133 ";;\t\tBACKEND: move insn %d to the top of list\n",
15134 INSN_UID (ready
[last_index
]));
15136 else if (verbose
> 5)
15138 ";;\t\tBACKEND: best insn %d already on top\n",
15139 INSN_UID (ready
[last_index
]));
15144 fprintf (file
, "ready list ooo attributes - sched state: %d\n",
15145 sched_state
.group_state
);
15147 for (i
= last_index
; i
>= 0; i
--)
15149 unsigned int sched_mask
;
15150 rtx_insn
*insn
= ready
[i
];
15152 if (recog_memoized (insn
) < 0)
15155 sched_mask
= s390_get_sched_attrmask (insn
);
15156 fprintf (file
, ";;\t\tBACKEND: insn %d score: %d: ",
15158 s390_sched_score (insn
));
15159 #define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ",\
15160 ((M) & sched_mask) ? #ATTR : "");
15161 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED
, cracked
);
15162 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED
, expanded
);
15163 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP
, endgroup
);
15164 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE
, groupalone
);
15165 #undef PRINT_SCHED_ATTR
15166 if (s390_tune
>= PROCESSOR_2964_Z13
)
15168 unsigned int unit_mask
, m
= 1;
15171 unit_mask
= s390_get_unit_mask (insn
, &units
);
15172 fprintf (file
, "(units:");
15173 for (j
= 0; j
< units
; j
++, m
<<= 1)
15175 fprintf (file
, " u%d", j
);
15176 fprintf (file
, ")");
15178 fprintf (file
, "\n");
15183 return s390_issue_rate ();
15187 /* This function is called via hook TARGET_SCHED_VARIABLE_ISSUE after
15188 the scheduler has issued INSN. It stores the last issued insn into
15189 last_scheduled_insn in order to make it available for
15190 s390_sched_reorder. */
15192 s390_sched_variable_issue (FILE *file
, int verbose
, rtx_insn
*insn
, int more
)
15194 last_scheduled_insn
= insn
;
15196 bool ends_group
= false;
15198 if (s390_tune
>= PROCESSOR_2827_ZEC12
15199 && reload_completed
15200 && recog_memoized (insn
) >= 0)
15202 unsigned int mask
= s390_get_sched_attrmask (insn
);
15204 if ((mask
& S390_SCHED_ATTR_MASK_GROUPOFTWO
) != 0)
15205 sched_state
.group_of_two
= true;
15207 /* If this is a group-of-two insn, we actually ended the last group
15208 and this insn is the first one of the new group. */
15209 if (sched_state
.group_state
== 2 && sched_state
.group_of_two
)
15211 sched_state
.side
= sched_state
.side
? 0 : 1;
15212 sched_state
.group_state
= 0;
15215 /* Longrunning and side bookkeeping. */
15216 for (int i
= 0; i
< 2; i
++)
15218 fxd_longrunning
[i
] = MAX (0, fxd_longrunning
[i
] - 1);
15219 fpd_longrunning
[i
] = MAX (0, fpd_longrunning
[i
] - 1);
15222 unsigned latency
= insn_default_latency (insn
);
15223 if (s390_is_longrunning (insn
))
15225 if (s390_is_fxd (insn
))
15226 fxd_longrunning
[sched_state
.side
] = latency
;
15228 fpd_longrunning
[sched_state
.side
] = latency
;
15231 if (s390_tune
>= PROCESSOR_2964_Z13
)
15234 unsigned unit_mask
, m
= 1;
15236 unit_mask
= s390_get_unit_mask (insn
, &units
);
15237 gcc_assert (units
<= MAX_SCHED_UNITS
);
15239 for (i
= 0; i
< units
; i
++, m
<<= 1)
15241 last_scheduled_unit_distance
[i
][sched_state
.side
] = 0;
15242 else if (last_scheduled_unit_distance
[i
][sched_state
.side
]
15243 < MAX_SCHED_MIX_DISTANCE
)
15244 last_scheduled_unit_distance
[i
][sched_state
.side
]++;
15247 if ((mask
& S390_SCHED_ATTR_MASK_CRACKED
) != 0
15248 || (mask
& S390_SCHED_ATTR_MASK_EXPANDED
) != 0
15249 || (mask
& S390_SCHED_ATTR_MASK_GROUPALONE
) != 0
15250 || (mask
& S390_SCHED_ATTR_MASK_ENDGROUP
) != 0)
15252 sched_state
.group_state
= 0;
15257 switch (sched_state
.group_state
)
15260 sched_state
.group_state
++;
15263 sched_state
.group_state
++;
15264 if (sched_state
.group_of_two
)
15266 sched_state
.group_state
= 0;
15271 sched_state
.group_state
++;
15279 unsigned int sched_mask
;
15281 sched_mask
= s390_get_sched_attrmask (insn
);
15283 fprintf (file
, ";;\t\tBACKEND: insn %d: ", INSN_UID (insn
));
15284 #define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ", ((M) & sched_mask) ? #ATTR : "");
15285 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED
, cracked
);
15286 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED
, expanded
);
15287 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP
, endgroup
);
15288 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE
, groupalone
);
15289 #undef PRINT_SCHED_ATTR
15291 if (s390_tune
>= PROCESSOR_2964_Z13
)
15293 unsigned int unit_mask
, m
= 1;
15296 unit_mask
= s390_get_unit_mask (insn
, &units
);
15297 fprintf (file
, "(units:");
15298 for (j
= 0; j
< units
; j
++, m
<<= 1)
15300 fprintf (file
, " %d", j
);
15301 fprintf (file
, ")");
15303 fprintf (file
, " sched state: %d\n", sched_state
.group_state
);
15305 if (s390_tune
>= PROCESSOR_2964_Z13
)
15309 s390_get_unit_mask (insn
, &units
);
15311 fprintf (file
, ";;\t\tBACKEND: units on this side unused for: ");
15312 for (j
= 0; j
< units
; j
++)
15313 fprintf (file
, "%d:%d ", j
,
15314 last_scheduled_unit_distance
[j
][sched_state
.side
]);
15315 fprintf (file
, "\n");
15319 /* If this insn ended a group, the next will be on the other side. */
15322 sched_state
.group_state
= 0;
15323 sched_state
.side
= sched_state
.side
? 0 : 1;
15324 sched_state
.group_of_two
= false;
15328 if (GET_CODE (PATTERN (insn
)) != USE
15329 && GET_CODE (PATTERN (insn
)) != CLOBBER
)
15336 s390_sched_init (FILE *file ATTRIBUTE_UNUSED
,
15337 int verbose ATTRIBUTE_UNUSED
,
15338 int max_ready ATTRIBUTE_UNUSED
)
15340 /* If the next basic block is most likely entered via a fallthru edge
15341 we keep the last sched state. Otherwise we start a new group.
15342 The scheduler traverses basic blocks in "instruction stream" ordering
15343 so if we see a fallthru edge here, sched_state will be of its
15346 current_sched_info->prev_head is the insn before the first insn of the
15347 block of insns to be scheduled.
15349 rtx_insn
*insn
= current_sched_info
->prev_head
15350 ? NEXT_INSN (current_sched_info
->prev_head
) : NULL
;
15351 basic_block bb
= insn
? BLOCK_FOR_INSN (insn
) : NULL
;
15352 if (s390_tune
< PROCESSOR_2964_Z13
|| !s390_bb_fallthru_entry_likely (bb
))
15354 last_scheduled_insn
= NULL
;
15355 memset (last_scheduled_unit_distance
, 0,
15356 MAX_SCHED_UNITS
* NUM_SIDES
* sizeof (int));
15357 sched_state
.group_state
= 0;
15358 sched_state
.group_of_two
= false;
15362 /* This target hook implementation for TARGET_LOOP_UNROLL_ADJUST calculates
15363 a new number struct loop *loop should be unrolled if tuned for cpus with
15364 a built-in stride prefetcher.
15365 The loop is analyzed for memory accesses by calling check_dpu for
15366 each rtx of the loop. Depending on the loop_depth and the amount of
15367 memory accesses a new number <=nunroll is returned to improve the
15368 behavior of the hardware prefetch unit. */
15370 s390_loop_unroll_adjust (unsigned nunroll
, struct loop
*loop
)
15375 unsigned mem_count
= 0;
15377 if (s390_tune
< PROCESSOR_2097_Z10
)
15380 /* Count the number of memory references within the loop body. */
15381 bbs
= get_loop_body (loop
);
15382 subrtx_iterator::array_type array
;
15383 for (i
= 0; i
< loop
->num_nodes
; i
++)
15384 FOR_BB_INSNS (bbs
[i
], insn
)
15385 if (INSN_P (insn
) && INSN_CODE (insn
) != -1)
15389 /* The runtime of small loops with memory block operations
15390 will be determined by the memory operation. Doing
15391 unrolling doesn't help here. Measurements to confirm
15392 this where only done on recent CPU levels. So better do
15393 not change anything for older CPUs. */
15394 if (s390_tune
>= PROCESSOR_2964_Z13
15395 && loop
->ninsns
<= BLOCK_MEM_OPS_LOOP_INSNS
15396 && ((set
= single_set (insn
)) != NULL_RTX
)
15397 && ((GET_MODE (SET_DEST (set
)) == BLKmode
15398 && (GET_MODE (SET_SRC (set
)) == BLKmode
15399 || SET_SRC (set
) == const0_rtx
))
15400 || (GET_CODE (SET_SRC (set
)) == COMPARE
15401 && GET_MODE (XEXP (SET_SRC (set
), 0)) == BLKmode
15402 && GET_MODE (XEXP (SET_SRC (set
), 1)) == BLKmode
)))
15408 FOR_EACH_SUBRTX (iter
, array
, PATTERN (insn
), NONCONST
)
15414 /* Prevent division by zero, and we do not need to adjust nunroll in this case. */
15415 if (mem_count
== 0)
15418 switch (loop_depth(loop
))
15421 return MIN (nunroll
, 28 / mem_count
);
15423 return MIN (nunroll
, 22 / mem_count
);
15425 return MIN (nunroll
, 16 / mem_count
);
15429 /* Restore the current options. This is a hook function and also called
15433 s390_function_specific_restore (struct gcc_options
*opts
,
15434 struct gcc_options */
* opts_set */
,
15435 struct cl_target_option
*ptr ATTRIBUTE_UNUSED
)
15437 opts
->x_s390_cost_pointer
= (long)processor_table
[opts
->x_s390_tune
].cost
;
15441 s390_default_align (struct gcc_options
*opts
)
15443 /* Set the default function alignment to 16 in order to get rid of
15444 some unwanted performance effects. */
15445 if (opts
->x_flag_align_functions
&& !opts
->x_str_align_functions
15446 && opts
->x_s390_tune
>= PROCESSOR_2964_Z13
)
15447 opts
->x_str_align_functions
= "16";
15451 s390_override_options_after_change (void)
15453 s390_default_align (&global_options
);
15457 s390_option_override_internal (struct gcc_options
*opts
,
15458 struct gcc_options
*opts_set
)
15460 /* Architecture mode defaults according to ABI. */
15461 if (!(opts_set
->x_target_flags
& MASK_ZARCH
))
15464 opts
->x_target_flags
|= MASK_ZARCH
;
15466 opts
->x_target_flags
&= ~MASK_ZARCH
;
15469 /* Set the march default in case it hasn't been specified on cmdline. */
15470 if (!opts_set
->x_s390_arch
)
15471 opts
->x_s390_arch
= PROCESSOR_2064_Z900
;
15473 opts
->x_s390_arch_flags
= processor_flags_table
[(int) opts
->x_s390_arch
];
15475 /* Determine processor to tune for. */
15476 if (!opts_set
->x_s390_tune
)
15477 opts
->x_s390_tune
= opts
->x_s390_arch
;
15479 opts
->x_s390_tune_flags
= processor_flags_table
[opts
->x_s390_tune
];
15481 /* Sanity checks. */
15482 if (opts
->x_s390_arch
== PROCESSOR_NATIVE
15483 || opts
->x_s390_tune
== PROCESSOR_NATIVE
)
15484 gcc_unreachable ();
15485 if (TARGET_64BIT
&& !TARGET_ZARCH_P (opts
->x_target_flags
))
15486 error ("64-bit ABI not supported in ESA/390 mode");
15488 if (opts
->x_s390_indirect_branch
== indirect_branch_thunk_inline
15489 || opts
->x_s390_indirect_branch_call
== indirect_branch_thunk_inline
15490 || opts
->x_s390_function_return
== indirect_branch_thunk_inline
15491 || opts
->x_s390_function_return_reg
== indirect_branch_thunk_inline
15492 || opts
->x_s390_function_return_mem
== indirect_branch_thunk_inline
)
15493 error ("thunk-inline is only supported with %<-mindirect-branch-jump%>");
15495 if (opts
->x_s390_indirect_branch
!= indirect_branch_keep
)
15497 if (!opts_set
->x_s390_indirect_branch_call
)
15498 opts
->x_s390_indirect_branch_call
= opts
->x_s390_indirect_branch
;
15500 if (!opts_set
->x_s390_indirect_branch_jump
)
15501 opts
->x_s390_indirect_branch_jump
= opts
->x_s390_indirect_branch
;
15504 if (opts
->x_s390_function_return
!= indirect_branch_keep
)
15506 if (!opts_set
->x_s390_function_return_reg
)
15507 opts
->x_s390_function_return_reg
= opts
->x_s390_function_return
;
15509 if (!opts_set
->x_s390_function_return_mem
)
15510 opts
->x_s390_function_return_mem
= opts
->x_s390_function_return
;
15513 /* Enable hardware transactions if available and not explicitly
15514 disabled by user. E.g. with -m31 -march=zEC12 -mzarch */
15515 if (!TARGET_OPT_HTM_P (opts_set
->x_target_flags
))
15517 if (TARGET_CPU_HTM_P (opts
) && TARGET_ZARCH_P (opts
->x_target_flags
))
15518 opts
->x_target_flags
|= MASK_OPT_HTM
;
15520 opts
->x_target_flags
&= ~MASK_OPT_HTM
;
15523 if (TARGET_OPT_VX_P (opts_set
->x_target_flags
))
15525 if (TARGET_OPT_VX_P (opts
->x_target_flags
))
15527 if (!TARGET_CPU_VX_P (opts
))
15528 error ("hardware vector support not available on %s",
15529 processor_table
[(int)opts
->x_s390_arch
].name
);
15530 if (TARGET_SOFT_FLOAT_P (opts
->x_target_flags
))
15531 error ("hardware vector support not available with "
15532 "%<-msoft-float%>");
15537 if (TARGET_CPU_VX_P (opts
))
15538 /* Enable vector support if available and not explicitly disabled
15539 by user. E.g. with -m31 -march=z13 -mzarch */
15540 opts
->x_target_flags
|= MASK_OPT_VX
;
15542 opts
->x_target_flags
&= ~MASK_OPT_VX
;
15545 /* Use hardware DFP if available and not explicitly disabled by
15546 user. E.g. with -m31 -march=z10 -mzarch */
15547 if (!TARGET_HARD_DFP_P (opts_set
->x_target_flags
))
15549 if (TARGET_DFP_P (opts
))
15550 opts
->x_target_flags
|= MASK_HARD_DFP
;
15552 opts
->x_target_flags
&= ~MASK_HARD_DFP
;
15555 if (TARGET_HARD_DFP_P (opts
->x_target_flags
) && !TARGET_DFP_P (opts
))
15557 if (TARGET_HARD_DFP_P (opts_set
->x_target_flags
))
15559 if (!TARGET_CPU_DFP_P (opts
))
15560 error ("hardware decimal floating point instructions"
15561 " not available on %s",
15562 processor_table
[(int)opts
->x_s390_arch
].name
);
15563 if (!TARGET_ZARCH_P (opts
->x_target_flags
))
15564 error ("hardware decimal floating point instructions"
15565 " not available in ESA/390 mode");
15568 opts
->x_target_flags
&= ~MASK_HARD_DFP
;
15571 if (TARGET_SOFT_FLOAT_P (opts_set
->x_target_flags
)
15572 && TARGET_SOFT_FLOAT_P (opts
->x_target_flags
))
15574 if (TARGET_HARD_DFP_P (opts_set
->x_target_flags
)
15575 && TARGET_HARD_DFP_P (opts
->x_target_flags
))
15576 error ("%<-mhard-dfp%> can%'t be used in conjunction with "
15577 "%<-msoft-float%>");
15579 opts
->x_target_flags
&= ~MASK_HARD_DFP
;
15582 if (TARGET_BACKCHAIN_P (opts
->x_target_flags
)
15583 && TARGET_PACKED_STACK_P (opts
->x_target_flags
)
15584 && TARGET_HARD_FLOAT_P (opts
->x_target_flags
))
15585 error ("%<-mbackchain%> %<-mpacked-stack%> %<-mhard-float%> are not "
15586 "supported in combination");
15588 if (opts
->x_s390_stack_size
)
15590 if (opts
->x_s390_stack_guard
>= opts
->x_s390_stack_size
)
15591 error ("stack size must be greater than the stack guard value");
15592 else if (opts
->x_s390_stack_size
> 1 << 16)
15593 error ("stack size must not be greater than 64k");
15595 else if (opts
->x_s390_stack_guard
)
15596 error ("%<-mstack-guard%> implies use of %<-mstack-size%>");
15598 /* Our implementation of the stack probe requires the probe interval
15599 to be used as displacement in an address operand. The maximum
15600 probe interval currently is 64k. This would exceed short
15601 displacements. Trim that value down to 4k if that happens. This
15602 might result in too many probes being generated only on the
15603 oldest supported machine level z900. */
15604 if (!DISP_IN_RANGE ((1 << param_stack_clash_protection_probe_interval
)))
15605 param_stack_clash_protection_probe_interval
= 12;
15607 #if TARGET_TPF != 0
15608 if (!CONST_OK_FOR_J (opts
->x_s390_tpf_trace_hook_prologue_check
))
15609 error ("-mtpf-trace-hook-prologue-check requires integer in range 0..4095");
15611 if (!CONST_OK_FOR_J (opts
->x_s390_tpf_trace_hook_prologue_target
))
15612 error ("-mtpf-trace-hook-prologue-target requires integer in range 0..4095");
15614 if (!CONST_OK_FOR_J (opts
->x_s390_tpf_trace_hook_epilogue_check
))
15615 error ("-mtpf-trace-hook-epilogue-check requires integer in range 0..4095");
15617 if (!CONST_OK_FOR_J (opts
->x_s390_tpf_trace_hook_epilogue_target
))
15618 error ("-mtpf-trace-hook-epilogue-target requires integer in range 0..4095");
15620 if (s390_tpf_trace_skip
)
15622 opts
->x_s390_tpf_trace_hook_prologue_target
= TPF_TRACE_PROLOGUE_SKIP_TARGET
;
15623 opts
->x_s390_tpf_trace_hook_epilogue_target
= TPF_TRACE_EPILOGUE_SKIP_TARGET
;
15627 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
15628 if (!TARGET_LONG_DOUBLE_128_P (opts_set
->x_target_flags
))
15629 opts
->x_target_flags
|= MASK_LONG_DOUBLE_128
;
15632 if (opts
->x_s390_tune
>= PROCESSOR_2097_Z10
)
15634 SET_OPTION_IF_UNSET (opts
, opts_set
, param_max_unrolled_insns
,
15636 SET_OPTION_IF_UNSET (opts
, opts_set
, param_max_unroll_times
, 32);
15637 SET_OPTION_IF_UNSET (opts
, opts_set
, param_max_completely_peeled_insns
,
15639 SET_OPTION_IF_UNSET (opts
, opts_set
, param_max_completely_peel_times
,
15643 SET_OPTION_IF_UNSET (opts
, opts_set
, param_max_pending_list_length
,
15645 /* values for loop prefetching */
15646 SET_OPTION_IF_UNSET (opts
, opts_set
, param_l1_cache_line_size
, 256);
15647 SET_OPTION_IF_UNSET (opts
, opts_set
, param_l1_cache_size
, 128);
15648 /* s390 has more than 2 levels and the size is much larger. Since
15649 we are always running virtualized assume that we only get a small
15650 part of the caches above l1. */
15651 SET_OPTION_IF_UNSET (opts
, opts_set
, param_l2_cache_size
, 1500);
15652 SET_OPTION_IF_UNSET (opts
, opts_set
,
15653 param_prefetch_min_insn_to_mem_ratio
, 2);
15654 SET_OPTION_IF_UNSET (opts
, opts_set
, param_simultaneous_prefetches
, 6);
15656 /* Use the alternative scheduling-pressure algorithm by default. */
15657 SET_OPTION_IF_UNSET (opts
, opts_set
, param_sched_pressure_algorithm
, 2);
15658 SET_OPTION_IF_UNSET (opts
, opts_set
, param_min_vect_loop_bound
, 2);
15660 /* Set the default alignment. */
15661 s390_default_align (opts
);
15663 /* Call target specific restore function to do post-init work. At the moment,
15664 this just sets opts->x_s390_cost_pointer. */
15665 s390_function_specific_restore (opts
, opts_set
, NULL
);
15667 /* Check whether -mfentry is supported. It cannot be used in 31-bit mode,
15668 because 31-bit PLT stubs assume that %r12 contains GOT address, which is
15669 not the case when the code runs before the prolog. */
15670 if (opts
->x_flag_fentry
&& !TARGET_64BIT
)
15671 error ("%<-mfentry%> is supported only for 64-bit CPUs");
15675 s390_option_override (void)
15678 cl_deferred_option
*opt
;
15679 vec
<cl_deferred_option
> *v
=
15680 (vec
<cl_deferred_option
> *) s390_deferred_options
;
15683 FOR_EACH_VEC_ELT (*v
, i
, opt
)
15685 switch (opt
->opt_index
)
15687 case OPT_mhotpatch_
:
15691 char *s
= strtok (ASTRDUP (opt
->arg
), ",");
15692 char *t
= strtok (NULL
, "\0");
15696 val1
= integral_argument (s
);
15697 val2
= integral_argument (t
);
15704 if (val1
== -1 || val2
== -1)
15706 /* argument is not a plain number */
15707 error ("arguments to %qs should be non-negative integers",
15711 else if (val1
> s390_hotpatch_hw_max
15712 || val2
> s390_hotpatch_hw_max
)
15714 error ("argument to %qs is too large (max. %d)",
15715 "-mhotpatch=n,m", s390_hotpatch_hw_max
);
15718 s390_hotpatch_hw_before_label
= val1
;
15719 s390_hotpatch_hw_after_label
= val2
;
15723 gcc_unreachable ();
15727 /* Set up function hooks. */
15728 init_machine_status
= s390_init_machine_status
;
15730 s390_option_override_internal (&global_options
, &global_options_set
);
15732 /* Save the initial options in case the user does function specific
15734 target_option_default_node
15735 = build_target_option_node (&global_options
, &global_options_set
);
15736 target_option_current_node
= target_option_default_node
;
15738 /* This cannot reside in s390_option_optimization_table since HAVE_prefetch
15739 requires the arch flags to be evaluated already. Since prefetching
15740 is beneficial on s390, we enable it if available. */
15741 if (flag_prefetch_loop_arrays
< 0 && HAVE_prefetch
&& optimize
>= 3)
15742 flag_prefetch_loop_arrays
= 1;
15744 if (!s390_pic_data_is_text_relative
&& !flag_pic
)
15745 error ("%<-mno-pic-data-is-text-relative%> cannot be used without "
15746 "%<-fpic%>/%<-fPIC%>");
15750 /* Don't emit DWARF3/4 unless specifically selected. The TPF
15751 debuggers do not yet support DWARF 3/4. */
15752 if (!OPTION_SET_P (dwarf_strict
))
15754 if (!OPTION_SET_P (dwarf_version
))
15759 #if S390_USE_TARGET_ATTRIBUTE
15760 /* Inner function to process the attribute((target(...))), take an argument and
15761 set the current options from the argument. If we have a list, recursively go
15765 s390_valid_target_attribute_inner_p (tree args
,
15766 struct gcc_options
*opts
,
15767 struct gcc_options
*new_opts_set
,
15773 #define S390_ATTRIB(S,O,A) { S, sizeof (S)-1, O, A, 0 }
15774 #define S390_PRAGMA(S,O,A) { S, sizeof (S)-1, O, A, 1 }
15775 static const struct
15777 const char *string
;
15781 int only_as_pragma
;
15784 S390_ATTRIB ("arch=", OPT_march_
, 1),
15785 S390_ATTRIB ("tune=", OPT_mtune_
, 1),
15786 /* uinteger options */
15787 S390_ATTRIB ("stack-guard=", OPT_mstack_guard_
, 1),
15788 S390_ATTRIB ("stack-size=", OPT_mstack_size_
, 1),
15789 S390_ATTRIB ("branch-cost=", OPT_mbranch_cost_
, 1),
15790 S390_ATTRIB ("warn-framesize=", OPT_mwarn_framesize_
, 1),
15792 S390_ATTRIB ("backchain", OPT_mbackchain
, 0),
15793 S390_ATTRIB ("hard-dfp", OPT_mhard_dfp
, 0),
15794 S390_ATTRIB ("hard-float", OPT_mhard_float
, 0),
15795 S390_ATTRIB ("htm", OPT_mhtm
, 0),
15796 S390_ATTRIB ("vx", OPT_mvx
, 0),
15797 S390_ATTRIB ("packed-stack", OPT_mpacked_stack
, 0),
15798 S390_ATTRIB ("small-exec", OPT_msmall_exec
, 0),
15799 S390_ATTRIB ("soft-float", OPT_msoft_float
, 0),
15800 S390_ATTRIB ("mvcle", OPT_mmvcle
, 0),
15801 S390_PRAGMA ("zvector", OPT_mzvector
, 0),
15802 /* boolean options */
15803 S390_ATTRIB ("warn-dynamicstack", OPT_mwarn_dynamicstack
, 0),
15808 /* If this is a list, recurse to get the options. */
15809 if (TREE_CODE (args
) == TREE_LIST
)
15812 int num_pragma_values
;
15815 /* Note: attribs.c:decl_attributes prepends the values from
15816 current_target_pragma to the list of target attributes. To determine
15817 whether we're looking at a value of the attribute or the pragma we
15818 assume that the first [list_length (current_target_pragma)] values in
15819 the list are the values from the pragma. */
15820 num_pragma_values
= (!force_pragma
&& current_target_pragma
!= NULL
)
15821 ? list_length (current_target_pragma
) : 0;
15822 for (i
= 0; args
; args
= TREE_CHAIN (args
), i
++)
15826 is_pragma
= (force_pragma
|| i
< num_pragma_values
);
15827 if (TREE_VALUE (args
)
15828 && !s390_valid_target_attribute_inner_p (TREE_VALUE (args
),
15829 opts
, new_opts_set
,
15838 else if (TREE_CODE (args
) != STRING_CST
)
15840 error ("attribute %<target%> argument not a string");
15844 /* Handle multiple arguments separated by commas. */
15845 next_optstr
= ASTRDUP (TREE_STRING_POINTER (args
));
15847 while (next_optstr
&& *next_optstr
!= '\0')
15849 char *p
= next_optstr
;
15851 char *comma
= strchr (next_optstr
, ',');
15852 size_t len
, opt_len
;
15858 enum cl_var_type var_type
;
15864 len
= comma
- next_optstr
;
15865 next_optstr
= comma
+ 1;
15870 next_optstr
= NULL
;
15873 /* Recognize no-xxx. */
15874 if (len
> 3 && p
[0] == 'n' && p
[1] == 'o' && p
[2] == '-')
15883 /* Find the option. */
15886 for (i
= 0; i
< ARRAY_SIZE (attrs
); i
++)
15888 opt_len
= attrs
[i
].len
;
15889 if (ch
== attrs
[i
].string
[0]
15890 && ((attrs
[i
].has_arg
) ? len
> opt_len
: len
== opt_len
)
15891 && memcmp (p
, attrs
[i
].string
, opt_len
) == 0)
15893 opt
= attrs
[i
].opt
;
15894 if (!opt_set_p
&& cl_options
[opt
].cl_reject_negative
)
15896 mask
= cl_options
[opt
].var_value
;
15897 var_type
= cl_options
[opt
].var_type
;
15903 /* Process the option. */
15906 error ("attribute(target(\"%s\")) is unknown", orig_p
);
15909 else if (attrs
[i
].only_as_pragma
&& !force_pragma
)
15911 /* Value is not allowed for the target attribute. */
15912 error ("value %qs is not supported by attribute %<target%>",
15917 else if (var_type
== CLVC_BIT_SET
|| var_type
== CLVC_BIT_CLEAR
)
15919 if (var_type
== CLVC_BIT_CLEAR
)
15920 opt_set_p
= !opt_set_p
;
15923 opts
->x_target_flags
|= mask
;
15925 opts
->x_target_flags
&= ~mask
;
15926 new_opts_set
->x_target_flags
|= mask
;
15929 else if (cl_options
[opt
].var_type
== CLVC_INTEGER
)
15933 if (cl_options
[opt
].cl_uinteger
)
15935 /* Unsigned integer argument. Code based on the function
15936 decode_cmdline_option () in opts-common.c. */
15937 value
= integral_argument (p
+ opt_len
);
15940 value
= (opt_set_p
) ? 1 : 0;
15944 struct cl_decoded_option decoded
;
15946 /* Value range check; only implemented for numeric and boolean
15947 options at the moment. */
15948 generate_option (opt
, NULL
, value
, CL_TARGET
, &decoded
);
15949 s390_handle_option (opts
, new_opts_set
, &decoded
, input_location
);
15950 set_option (opts
, new_opts_set
, opt
, value
,
15951 p
+ opt_len
, DK_UNSPECIFIED
, input_location
,
15956 error ("attribute(target(\"%s\")) is unknown", orig_p
);
15961 else if (cl_options
[opt
].var_type
== CLVC_ENUM
)
15966 arg_ok
= opt_enum_arg_to_value (opt
, p
+ opt_len
, &value
, CL_TARGET
);
15968 set_option (opts
, new_opts_set
, opt
, value
,
15969 p
+ opt_len
, DK_UNSPECIFIED
, input_location
,
15973 error ("attribute(target(\"%s\")) is unknown", orig_p
);
15979 gcc_unreachable ();
15984 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
15987 s390_valid_target_attribute_tree (tree args
,
15988 struct gcc_options
*opts
,
15989 const struct gcc_options
*opts_set
,
15992 tree t
= NULL_TREE
;
15993 struct gcc_options new_opts_set
;
15995 memset (&new_opts_set
, 0, sizeof (new_opts_set
));
15997 /* Process each of the options on the chain. */
15998 if (! s390_valid_target_attribute_inner_p (args
, opts
, &new_opts_set
,
16000 return error_mark_node
;
16002 /* If some option was set (even if it has not changed), rerun
16003 s390_option_override_internal, and then save the options away. */
16004 if (new_opts_set
.x_target_flags
16005 || new_opts_set
.x_s390_arch
16006 || new_opts_set
.x_s390_tune
16007 || new_opts_set
.x_s390_stack_guard
16008 || new_opts_set
.x_s390_stack_size
16009 || new_opts_set
.x_s390_branch_cost
16010 || new_opts_set
.x_s390_warn_framesize
16011 || new_opts_set
.x_s390_warn_dynamicstack_p
)
16013 const unsigned char *src
= (const unsigned char *)opts_set
;
16014 unsigned char *dest
= (unsigned char *)&new_opts_set
;
16017 /* Merge the original option flags into the new ones. */
16018 for (i
= 0; i
< sizeof(*opts_set
); i
++)
16021 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
16022 s390_option_override_internal (opts
, &new_opts_set
);
16023 /* Save the current options unless we are validating options for
16025 t
= build_target_option_node (opts
, &new_opts_set
);
16030 /* Hook to validate attribute((target("string"))). */
16033 s390_valid_target_attribute_p (tree fndecl
,
16034 tree
ARG_UNUSED (name
),
16036 int ARG_UNUSED (flags
))
16038 struct gcc_options func_options
, func_options_set
;
16039 tree new_target
, new_optimize
;
16042 /* attribute((target("default"))) does nothing, beyond
16043 affecting multi-versioning. */
16044 if (TREE_VALUE (args
)
16045 && TREE_CODE (TREE_VALUE (args
)) == STRING_CST
16046 && TREE_CHAIN (args
) == NULL_TREE
16047 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args
)), "default") == 0)
16051 = build_optimization_node (&global_options
, &global_options_set
);
16053 /* Get the optimization options of the current function. */
16054 tree func_optimize
= DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
);
16056 if (!func_optimize
)
16057 func_optimize
= old_optimize
;
16059 /* Init func_options. */
16060 memset (&func_options
, 0, sizeof (func_options
));
16061 init_options_struct (&func_options
, NULL
);
16062 lang_hooks
.init_options_struct (&func_options
);
16063 memset (&func_options_set
, 0, sizeof (func_options_set
));
16065 cl_optimization_restore (&func_options
, &func_options_set
,
16066 TREE_OPTIMIZATION (func_optimize
));
16068 /* Initialize func_options to the default before its target options can
16070 cl_target_option_restore (&func_options
, &func_options_set
,
16071 TREE_TARGET_OPTION (target_option_default_node
));
16073 new_target
= s390_valid_target_attribute_tree (args
, &func_options
,
16074 &global_options_set
,
16076 current_target_pragma
));
16077 new_optimize
= build_optimization_node (&func_options
, &func_options_set
);
16078 if (new_target
== error_mark_node
)
16080 else if (fndecl
&& new_target
)
16082 DECL_FUNCTION_SPECIFIC_TARGET (fndecl
) = new_target
;
16083 if (old_optimize
!= new_optimize
)
16084 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl
) = new_optimize
;
16089 /* Hook to determine if one function can safely inline another. */
16092 s390_can_inline_p (tree caller
, tree callee
)
16094 tree caller_tree
= DECL_FUNCTION_SPECIFIC_TARGET (caller
);
16095 tree callee_tree
= DECL_FUNCTION_SPECIFIC_TARGET (callee
);
16098 callee_tree
= target_option_default_node
;
16100 caller_tree
= target_option_default_node
;
16101 if (callee_tree
== caller_tree
)
16104 struct cl_target_option
*caller_opts
= TREE_TARGET_OPTION (caller_tree
);
16105 struct cl_target_option
*callee_opts
= TREE_TARGET_OPTION (callee_tree
);
16108 if ((caller_opts
->x_target_flags
& ~(MASK_SOFT_FLOAT
| MASK_HARD_DFP
))
16109 != (callee_opts
->x_target_flags
& ~(MASK_SOFT_FLOAT
| MASK_HARD_DFP
)))
16112 /* Don't inline functions to be compiled for a more recent arch into a
16113 function for an older arch. */
16114 else if (caller_opts
->x_s390_arch
< callee_opts
->x_s390_arch
)
16117 /* Inlining a hard float function into a soft float function is only
16118 allowed if the hard float function doesn't actually make use of
16121 We are called from FEs for multi-versioning call optimization, so
16122 beware of ipa_fn_summaries not available. */
16123 else if (((TARGET_SOFT_FLOAT_P (caller_opts
->x_target_flags
)
16124 && !TARGET_SOFT_FLOAT_P (callee_opts
->x_target_flags
))
16125 || (!TARGET_HARD_DFP_P (caller_opts
->x_target_flags
)
16126 && TARGET_HARD_DFP_P (callee_opts
->x_target_flags
)))
16127 && (! ipa_fn_summaries
16128 || ipa_fn_summaries
->get
16129 (cgraph_node::get (callee
))->fp_expressions
))
16136 /* Set VAL to correct enum value according to the indirect-branch or
16137 function-return attribute in ATTR. */
16140 s390_indirect_branch_attrvalue (tree attr
, enum indirect_branch
*val
)
16142 const char *str
= TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr
)));
16143 if (strcmp (str
, "keep") == 0)
16144 *val
= indirect_branch_keep
;
16145 else if (strcmp (str
, "thunk") == 0)
16146 *val
= indirect_branch_thunk
;
16147 else if (strcmp (str
, "thunk-inline") == 0)
16148 *val
= indirect_branch_thunk_inline
;
16149 else if (strcmp (str
, "thunk-extern") == 0)
16150 *val
= indirect_branch_thunk_extern
;
16153 /* Memorize the setting for -mindirect-branch* and -mfunction-return*
16154 from either the cmdline or the function attributes in
16158 s390_indirect_branch_settings (tree fndecl
)
16165 /* Initialize with the cmdline options and let the attributes
16167 cfun
->machine
->indirect_branch_jump
= s390_indirect_branch_jump
;
16168 cfun
->machine
->indirect_branch_call
= s390_indirect_branch_call
;
16170 cfun
->machine
->function_return_reg
= s390_function_return_reg
;
16171 cfun
->machine
->function_return_mem
= s390_function_return_mem
;
16173 if ((attr
= lookup_attribute ("indirect_branch",
16174 DECL_ATTRIBUTES (fndecl
))))
16176 s390_indirect_branch_attrvalue (attr
,
16177 &cfun
->machine
->indirect_branch_jump
);
16178 s390_indirect_branch_attrvalue (attr
,
16179 &cfun
->machine
->indirect_branch_call
);
16182 if ((attr
= lookup_attribute ("indirect_branch_jump",
16183 DECL_ATTRIBUTES (fndecl
))))
16184 s390_indirect_branch_attrvalue (attr
, &cfun
->machine
->indirect_branch_jump
);
16186 if ((attr
= lookup_attribute ("indirect_branch_call",
16187 DECL_ATTRIBUTES (fndecl
))))
16188 s390_indirect_branch_attrvalue (attr
, &cfun
->machine
->indirect_branch_call
);
16190 if ((attr
= lookup_attribute ("function_return",
16191 DECL_ATTRIBUTES (fndecl
))))
16193 s390_indirect_branch_attrvalue (attr
,
16194 &cfun
->machine
->function_return_reg
);
16195 s390_indirect_branch_attrvalue (attr
,
16196 &cfun
->machine
->function_return_mem
);
16199 if ((attr
= lookup_attribute ("function_return_reg",
16200 DECL_ATTRIBUTES (fndecl
))))
16201 s390_indirect_branch_attrvalue (attr
, &cfun
->machine
->function_return_reg
);
16203 if ((attr
= lookup_attribute ("function_return_mem",
16204 DECL_ATTRIBUTES (fndecl
))))
16205 s390_indirect_branch_attrvalue (attr
, &cfun
->machine
->function_return_mem
);
16208 #if S390_USE_TARGET_ATTRIBUTE
16209 /* Restore targets globals from NEW_TREE and invalidate s390_previous_fndecl
16213 s390_activate_target_options (tree new_tree
)
16215 cl_target_option_restore (&global_options
, &global_options_set
,
16216 TREE_TARGET_OPTION (new_tree
));
16217 if (TREE_TARGET_GLOBALS (new_tree
))
16218 restore_target_globals (TREE_TARGET_GLOBALS (new_tree
));
16219 else if (new_tree
== target_option_default_node
)
16220 restore_target_globals (&default_target_globals
);
16222 TREE_TARGET_GLOBALS (new_tree
) = save_target_globals_default_opts ();
16223 s390_previous_fndecl
= NULL_TREE
;
16227 /* Establish appropriate back-end context for processing the function
16228 FNDECL. The argument might be NULL to indicate processing at top
16229 level, outside of any function scope. */
16231 s390_set_current_function (tree fndecl
)
16233 #if S390_USE_TARGET_ATTRIBUTE
16234 /* Only change the context if the function changes. This hook is called
16235 several times in the course of compiling a function, and we don't want to
16236 slow things down too much or call target_reinit when it isn't safe. */
16237 if (fndecl
== s390_previous_fndecl
)
16239 s390_indirect_branch_settings (fndecl
);
16244 if (s390_previous_fndecl
== NULL_TREE
)
16245 old_tree
= target_option_current_node
;
16246 else if (DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl
))
16247 old_tree
= DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl
);
16249 old_tree
= target_option_default_node
;
16251 if (fndecl
== NULL_TREE
)
16253 if (old_tree
!= target_option_current_node
)
16254 s390_activate_target_options (target_option_current_node
);
16258 tree new_tree
= DECL_FUNCTION_SPECIFIC_TARGET (fndecl
);
16259 if (new_tree
== NULL_TREE
)
16260 new_tree
= target_option_default_node
;
16262 if (old_tree
!= new_tree
)
16263 s390_activate_target_options (new_tree
);
16264 s390_previous_fndecl
= fndecl
;
16266 s390_indirect_branch_settings (fndecl
);
16269 /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */
16272 s390_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size
,
16273 unsigned int align ATTRIBUTE_UNUSED
,
16274 enum by_pieces_operation op ATTRIBUTE_UNUSED
,
16275 bool speed_p ATTRIBUTE_UNUSED
)
16277 return (size
== 1 || size
== 2
16278 || size
== 4 || (TARGET_ZARCH
&& size
== 8));
16281 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
16284 s390_atomic_assign_expand_fenv (tree
*hold
, tree
*clear
, tree
*update
)
16286 tree sfpc
= s390_builtin_decls
[S390_BUILTIN_s390_sfpc
];
16287 tree efpc
= s390_builtin_decls
[S390_BUILTIN_s390_efpc
];
16288 tree call_efpc
= build_call_expr (efpc
, 0);
16289 tree fenv_var
= create_tmp_var_raw (unsigned_type_node
);
16291 #define FPC_EXCEPTION_MASK HOST_WIDE_INT_UC (0xf8000000)
16292 #define FPC_FLAGS_MASK HOST_WIDE_INT_UC (0x00f80000)
16293 #define FPC_DXC_MASK HOST_WIDE_INT_UC (0x0000ff00)
16294 #define FPC_EXCEPTION_MASK_SHIFT HOST_WIDE_INT_UC (24)
16295 #define FPC_FLAGS_SHIFT HOST_WIDE_INT_UC (16)
16296 #define FPC_DXC_SHIFT HOST_WIDE_INT_UC (8)
16298 /* Generates the equivalent of feholdexcept (&fenv_var)
16300 fenv_var = __builtin_s390_efpc ();
16301 __builtin_s390_sfpc (fenv_var & mask) */
16302 tree old_fpc
= build4 (TARGET_EXPR
, unsigned_type_node
, fenv_var
, call_efpc
,
16303 NULL_TREE
, NULL_TREE
);
16305 = build2 (BIT_AND_EXPR
, unsigned_type_node
, fenv_var
,
16306 build_int_cst (unsigned_type_node
,
16307 ~(FPC_DXC_MASK
| FPC_FLAGS_MASK
16308 | FPC_EXCEPTION_MASK
)));
16309 tree set_new_fpc
= build_call_expr (sfpc
, 1, new_fpc
);
16310 *hold
= build2 (COMPOUND_EXPR
, void_type_node
, old_fpc
, set_new_fpc
);
16312 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT)
16314 __builtin_s390_sfpc (__builtin_s390_efpc () & mask) */
16315 new_fpc
= build2 (BIT_AND_EXPR
, unsigned_type_node
, call_efpc
,
16316 build_int_cst (unsigned_type_node
,
16317 ~(FPC_DXC_MASK
| FPC_FLAGS_MASK
)));
16318 *clear
= build_call_expr (sfpc
, 1, new_fpc
);
16320 /* Generates the equivalent of feupdateenv (fenv_var)
16322 old_fpc = __builtin_s390_efpc ();
16323 __builtin_s390_sfpc (fenv_var);
16324 __atomic_feraiseexcept ((old_fpc & FPC_FLAGS_MASK) >> FPC_FLAGS_SHIFT); */
16326 old_fpc
= create_tmp_var_raw (unsigned_type_node
);
16327 tree store_old_fpc
= build4 (TARGET_EXPR
, void_type_node
, old_fpc
, call_efpc
,
16328 NULL_TREE
, NULL_TREE
);
16330 set_new_fpc
= build_call_expr (sfpc
, 1, fenv_var
);
16332 tree raise_old_except
= build2 (BIT_AND_EXPR
, unsigned_type_node
, old_fpc
,
16333 build_int_cst (unsigned_type_node
,
16335 raise_old_except
= build2 (RSHIFT_EXPR
, unsigned_type_node
, raise_old_except
,
16336 build_int_cst (unsigned_type_node
,
16338 tree atomic_feraiseexcept
16339 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT
);
16340 raise_old_except
= build_call_expr (atomic_feraiseexcept
,
16341 1, raise_old_except
);
16343 *update
= build2 (COMPOUND_EXPR
, void_type_node
,
16344 build2 (COMPOUND_EXPR
, void_type_node
,
16345 store_old_fpc
, set_new_fpc
),
16348 #undef FPC_EXCEPTION_MASK
16349 #undef FPC_FLAGS_MASK
16350 #undef FPC_DXC_MASK
16351 #undef FPC_EXCEPTION_MASK_SHIFT
16352 #undef FPC_FLAGS_SHIFT
16353 #undef FPC_DXC_SHIFT
16356 /* Return the vector mode to be used for inner mode MODE when doing
16358 static machine_mode
16359 s390_preferred_simd_mode (scalar_mode mode
)
16387 /* Our hardware does not require vectors to be strictly aligned. */
16389 s390_support_vector_misalignment (machine_mode mode ATTRIBUTE_UNUSED
,
16390 const_tree type ATTRIBUTE_UNUSED
,
16391 int misalignment ATTRIBUTE_UNUSED
,
16392 bool is_packed ATTRIBUTE_UNUSED
)
16397 return default_builtin_support_vector_misalignment (mode
, type
, misalignment
,
16401 /* The vector ABI requires vector types to be aligned on an 8 byte
16402 boundary (our stack alignment). However, we allow this to be
16403 overriden by the user, while this definitely breaks the ABI. */
16404 static HOST_WIDE_INT
16405 s390_vector_alignment (const_tree type
)
16407 tree size
= TYPE_SIZE (type
);
16409 if (!TARGET_VX_ABI
)
16410 return default_vector_alignment (type
);
16412 if (TYPE_USER_ALIGN (type
))
16413 return TYPE_ALIGN (type
);
16415 if (tree_fits_uhwi_p (size
)
16416 && tree_to_uhwi (size
) < BIGGEST_ALIGNMENT
)
16417 return tree_to_uhwi (size
);
16419 return BIGGEST_ALIGNMENT
;
16422 /* Implement TARGET_CONSTANT_ALIGNMENT. Alignment on even addresses for
16423 LARL instruction. */
16425 static HOST_WIDE_INT
16426 s390_constant_alignment (const_tree
, HOST_WIDE_INT align
)
16428 return MAX (align
, 16);
16431 #ifdef HAVE_AS_MACHINE_MACHINEMODE
16432 /* Implement TARGET_ASM_FILE_START. */
16434 s390_asm_file_start (void)
16436 default_file_start ();
16437 s390_asm_output_machine_for_arch (asm_out_file
);
16441 /* Implement TARGET_ASM_FILE_END. */
16443 s390_asm_file_end (void)
16445 #ifdef HAVE_AS_GNU_ATTRIBUTE
16446 varpool_node
*vnode
;
16447 cgraph_node
*cnode
;
16449 FOR_EACH_VARIABLE (vnode
)
16450 if (TREE_PUBLIC (vnode
->decl
))
16451 s390_check_type_for_vector_abi (TREE_TYPE (vnode
->decl
), false, false);
16453 FOR_EACH_FUNCTION (cnode
)
16454 if (TREE_PUBLIC (cnode
->decl
))
16455 s390_check_type_for_vector_abi (TREE_TYPE (cnode
->decl
), false, false);
16458 if (s390_vector_abi
!= 0)
16459 fprintf (asm_out_file
, "\t.gnu_attribute 8, %d\n",
16462 file_end_indicate_exec_stack ();
16464 if (flag_split_stack
)
16465 file_end_indicate_split_stack ();
16468 /* Return true if TYPE is a vector bool type. */
16470 s390_vector_bool_type_p (const_tree type
)
16472 return TYPE_VECTOR_OPAQUE (type
);
16475 /* Return the diagnostic message string if the binary operation OP is
16476 not permitted on TYPE1 and TYPE2, NULL otherwise. */
16478 s390_invalid_binary_op (int op ATTRIBUTE_UNUSED
, const_tree type1
, const_tree type2
)
16480 bool bool1_p
, bool2_p
;
16484 machine_mode mode1
, mode2
;
16486 if (!TARGET_ZVECTOR
)
16489 if (!VECTOR_TYPE_P (type1
) || !VECTOR_TYPE_P (type2
))
16492 bool1_p
= s390_vector_bool_type_p (type1
);
16493 bool2_p
= s390_vector_bool_type_p (type2
);
16495 /* Mixing signed and unsigned types is forbidden for all
16497 if (!bool1_p
&& !bool2_p
16498 && TYPE_UNSIGNED (type1
) != TYPE_UNSIGNED (type2
))
16499 return N_("types differ in signedness");
16501 plusminus_p
= (op
== PLUS_EXPR
|| op
== MINUS_EXPR
);
16502 muldiv_p
= (op
== MULT_EXPR
|| op
== RDIV_EXPR
|| op
== TRUNC_DIV_EXPR
16503 || op
== CEIL_DIV_EXPR
|| op
== FLOOR_DIV_EXPR
16504 || op
== ROUND_DIV_EXPR
);
16505 compare_p
= (op
== LT_EXPR
|| op
== LE_EXPR
|| op
== GT_EXPR
|| op
== GE_EXPR
16506 || op
== EQ_EXPR
|| op
== NE_EXPR
);
16508 if (bool1_p
&& bool2_p
&& (plusminus_p
|| muldiv_p
))
16509 return N_("binary operator does not support two vector bool operands");
16511 if (bool1_p
!= bool2_p
&& (muldiv_p
|| compare_p
))
16512 return N_("binary operator does not support vector bool operand");
16514 mode1
= TYPE_MODE (type1
);
16515 mode2
= TYPE_MODE (type2
);
16517 if (bool1_p
!= bool2_p
&& plusminus_p
16518 && (GET_MODE_CLASS (mode1
) == MODE_VECTOR_FLOAT
16519 || GET_MODE_CLASS (mode2
) == MODE_VECTOR_FLOAT
))
16520 return N_("binary operator does not support mixing vector "
16521 "bool with floating point vector operands");
16526 #if ENABLE_S390_EXCESS_FLOAT_PRECISION == 1
16527 /* Implement TARGET_C_EXCESS_PRECISION to maintain historic behavior with older
16530 For historical reasons, float_t and double_t had been typedef'ed to
16531 double on s390, causing operations on float_t to operate in a higher
16532 precision than is necessary. However, it is not the case that SFmode
16533 operations have implicit excess precision, and we generate more optimal
16534 code if we let the compiler know no implicit extra precision is added.
16536 With a glibc with that "historic" definition, configure will enable this hook
16537 to set FLT_EVAL_METHOD to 1 for -fexcess-precision=standard (e.g., as implied
16538 by -std=cXY). That means when we are compiling with -fexcess-precision=fast,
16539 the value we set for FLT_EVAL_METHOD will be out of line with the actual
16540 precision of float_t.
16542 Newer versions of glibc will be modified to derive the definition of float_t
16543 from FLT_EVAL_METHOD on s390x, as on many other architectures. There,
16544 configure will disable this hook by default, so that we defer to the default
16545 of FLT_EVAL_METHOD_PROMOTE_TO_FLOAT and a resulting typedef of float_t to
16546 float. Note that in that scenario, float_t and FLT_EVAL_METHOD will be in
16547 line independent of -fexcess-precision. */
16549 static enum flt_eval_method
16550 s390_excess_precision (enum excess_precision_type type
)
16554 case EXCESS_PRECISION_TYPE_IMPLICIT
:
16555 case EXCESS_PRECISION_TYPE_FAST
:
16556 /* The fastest type to promote to will always be the native type,
16557 whether that occurs with implicit excess precision or
16559 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT
;
16560 case EXCESS_PRECISION_TYPE_STANDARD
:
16561 /* Otherwise, when we are in a standards compliant mode, to
16562 ensure consistency with the implementation in glibc, report that
16563 float is evaluated to the range and precision of double. */
16564 return FLT_EVAL_METHOD_PROMOTE_TO_DOUBLE
;
16565 case EXCESS_PRECISION_TYPE_FLOAT16
:
16566 error ("%<-fexcess-precision=16%> is not supported on this target");
16569 gcc_unreachable ();
16571 return FLT_EVAL_METHOD_UNPREDICTABLE
;
16576 s390_rawmemchr (machine_mode elt_mode
, rtx dst
, rtx src
, rtx pat
)
16578 machine_mode vec_mode
= mode_for_vector (as_a
<scalar_int_mode
> (elt_mode
),
16579 16 / GET_MODE_SIZE (elt_mode
)).require();
16580 rtx lens
= gen_reg_rtx (V16QImode
);
16581 rtx pattern
= gen_reg_rtx (vec_mode
);
16582 rtx loop_start
= gen_label_rtx ();
16583 rtx loop_end
= gen_label_rtx ();
16584 rtx addr
= gen_reg_rtx (Pmode
);
16585 rtx offset
= gen_reg_rtx (Pmode
);
16586 rtx loadlen
= gen_reg_rtx (SImode
);
16587 rtx matchlen
= gen_reg_rtx (SImode
);
16590 pat
= GEN_INT (trunc_int_for_mode (INTVAL (pat
), elt_mode
));
16591 emit_insn (gen_rtx_SET (pattern
, gen_rtx_VEC_DUPLICATE (vec_mode
, pat
)));
16593 emit_move_insn (addr
, XEXP (src
, 0));
16596 emit_insn (gen_vlbb (lens
, gen_rtx_MEM (BLKmode
, addr
), GEN_INT (6)));
16597 emit_insn (gen_lcbb (loadlen
, addr
, GEN_INT (6)));
16598 lens
= convert_to_mode (vec_mode
, lens
, 1);
16599 emit_insn (gen_vec_vfees (vec_mode
, lens
, lens
, pattern
, GEN_INT (0)));
16600 lens
= convert_to_mode (V4SImode
, lens
, 1);
16601 emit_insn (gen_vec_extractv4sisi (matchlen
, lens
, GEN_INT (1)));
16602 lens
= convert_to_mode (vec_mode
, lens
, 1);
16603 emit_cmp_and_jump_insns (matchlen
, loadlen
, LT
, NULL_RTX
, SImode
, 1, loop_end
);
16604 force_expand_binop (Pmode
, add_optab
, addr
, GEN_INT(16), addr
, 1, OPTAB_DIRECT
);
16605 force_expand_binop (Pmode
, and_optab
, addr
, GEN_INT(~HOST_WIDE_INT_UC(0xf)), addr
, 1, OPTAB_DIRECT
);
16606 // now, addr is 16-byte aligned
16608 mem
= gen_rtx_MEM (vec_mode
, addr
);
16609 set_mem_align (mem
, 128);
16610 emit_move_insn (lens
, mem
);
16611 emit_insn (gen_vec_vfees (vec_mode
, lens
, lens
, pattern
, GEN_INT (VSTRING_FLAG_CS
)));
16612 add_int_reg_note (s390_emit_ccraw_jump (4, EQ
, loop_end
),
16614 profile_probability::very_unlikely ().to_reg_br_prob_note ());
16616 emit_label (loop_start
);
16617 LABEL_NUSES (loop_start
) = 1;
16619 force_expand_binop (Pmode
, add_optab
, addr
, GEN_INT (16), addr
, 1, OPTAB_DIRECT
);
16620 mem
= gen_rtx_MEM (vec_mode
, addr
);
16621 set_mem_align (mem
, 128);
16622 emit_move_insn (lens
, mem
);
16623 emit_insn (gen_vec_vfees (vec_mode
, lens
, lens
, pattern
, GEN_INT (VSTRING_FLAG_CS
)));
16624 add_int_reg_note (s390_emit_ccraw_jump (4, NE
, loop_start
),
16626 profile_probability::very_likely ().to_reg_br_prob_note ());
16628 emit_label (loop_end
);
16629 LABEL_NUSES (loop_end
) = 1;
16633 lens
= convert_to_mode (V2DImode
, lens
, 1);
16634 emit_insn (gen_vec_extractv2didi (offset
, lens
, GEN_INT (0)));
16638 lens
= convert_to_mode (V4SImode
, lens
, 1);
16639 emit_insn (gen_vec_extractv4sisi (offset
, lens
, GEN_INT (1)));
16641 force_expand_binop (Pmode
, add_optab
, addr
, offset
, dst
, 1, OPTAB_DIRECT
);
16644 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
16646 static unsigned HOST_WIDE_INT
16647 s390_asan_shadow_offset (void)
16649 return TARGET_64BIT
? HOST_WIDE_INT_1U
<< 52 : HOST_WIDE_INT_UC (0x20000000);
16652 #ifdef HAVE_GAS_HIDDEN
16653 # define USE_HIDDEN_LINKONCE 1
16655 # define USE_HIDDEN_LINKONCE 0
16658 /* Output an indirect branch trampoline for target register REGNO. */
16661 s390_output_indirect_thunk_function (unsigned int regno
, bool z10_p
)
16664 char thunk_label
[32];
16668 sprintf (thunk_label
, TARGET_INDIRECT_BRANCH_THUNK_NAME_EXRL
, regno
);
16670 sprintf (thunk_label
, TARGET_INDIRECT_BRANCH_THUNK_NAME_EX
,
16671 INDIRECT_BRANCH_THUNK_REGNUM
, regno
);
16673 decl
= build_decl (BUILTINS_LOCATION
, FUNCTION_DECL
,
16674 get_identifier (thunk_label
),
16675 build_function_type_list (void_type_node
, NULL_TREE
));
16676 DECL_RESULT (decl
) = build_decl (BUILTINS_LOCATION
, RESULT_DECL
,
16677 NULL_TREE
, void_type_node
);
16678 TREE_PUBLIC (decl
) = 1;
16679 TREE_STATIC (decl
) = 1;
16680 DECL_IGNORED_P (decl
) = 1;
16682 if (USE_HIDDEN_LINKONCE
)
16684 cgraph_node::create (decl
)->set_comdat_group (DECL_ASSEMBLER_NAME (decl
));
16686 targetm
.asm_out
.unique_section (decl
, 0);
16687 switch_to_section (get_named_section (decl
, NULL
, 0));
16689 targetm
.asm_out
.globalize_label (asm_out_file
, thunk_label
);
16690 fputs ("\t.hidden\t", asm_out_file
);
16691 assemble_name (asm_out_file
, thunk_label
);
16692 putc ('\n', asm_out_file
);
16693 ASM_DECLARE_FUNCTION_NAME (asm_out_file
, thunk_label
, decl
);
16697 switch_to_section (text_section
);
16698 ASM_OUTPUT_LABEL (asm_out_file
, thunk_label
);
16701 DECL_INITIAL (decl
) = make_node (BLOCK
);
16702 current_function_decl
= decl
;
16703 allocate_struct_function (decl
, false);
16704 init_function_start (decl
);
16705 cfun
->is_thunk
= true;
16706 first_function_block_is_cold
= false;
16707 final_start_function (emit_barrier (), asm_out_file
, 1);
16709 /* This makes CFI at least usable for indirect jumps.
16711 Stopping in the thunk: backtrace will point to the thunk target
16712 is if it was interrupted by a signal. For a call this means that
16713 the call chain will be: caller->callee->thunk */
16714 if (flag_asynchronous_unwind_tables
&& flag_dwarf2_cfi_asm
)
16716 fputs ("\t.cfi_signal_frame\n", asm_out_file
);
16717 fprintf (asm_out_file
, "\t.cfi_return_column %d\n", regno
);
16718 for (i
= 0; i
< FPR15_REGNUM
; i
++)
16719 fprintf (asm_out_file
, "\t.cfi_same_value %s\n", reg_names
[i
]);
16726 /* We generate a thunk for z10 compiled code although z10 is
16727 currently not enabled. Tell the assembler to accept the
16729 if (!TARGET_CPU_Z10
)
16731 fputs ("\t.machine push\n", asm_out_file
);
16732 fputs ("\t.machine z10\n", asm_out_file
);
16734 /* We use exrl even if -mzarch hasn't been specified on the
16735 command line so we have to tell the assembler to accept
16738 fputs ("\t.machinemode zarch\n", asm_out_file
);
16740 fputs ("\texrl\t0,1f\n", asm_out_file
);
16743 fputs ("\t.machinemode esa\n", asm_out_file
);
16745 if (!TARGET_CPU_Z10
)
16746 fputs ("\t.machine pop\n", asm_out_file
);
16751 fprintf (asm_out_file
, "\tlarl\t%%r%d,1f\n",
16752 INDIRECT_BRANCH_THUNK_REGNUM
);
16755 fprintf (asm_out_file
, "\tex\t0,0(%%r%d)\n",
16756 INDIRECT_BRANCH_THUNK_REGNUM
);
16760 fputs ("0:\tj\t0b\n", asm_out_file
);
16762 /* 1: br <regno> */
16763 fprintf (asm_out_file
, "1:\tbr\t%%r%d\n", regno
);
16765 final_end_function ();
16766 init_insn_lengths ();
16767 free_after_compilation (cfun
);
16769 current_function_decl
= NULL
;
16772 /* Implement the asm.code_end target hook. */
16775 s390_code_end (void)
16779 for (i
= 1; i
< 16; i
++)
16781 if (indirect_branch_z10thunk_mask
& (1 << i
))
16782 s390_output_indirect_thunk_function (i
, true);
16784 if (indirect_branch_prez10thunk_mask
& (1 << i
))
16785 s390_output_indirect_thunk_function (i
, false);
16788 if (TARGET_INDIRECT_BRANCH_TABLE
)
16793 for (o
= 0; o
< INDIRECT_BRANCH_NUM_OPTIONS
; o
++)
16795 if (indirect_branch_table_label_no
[o
] == 0)
16798 switch_to_section (get_section (indirect_branch_table_name
[o
],
16801 for (i
= 0; i
< indirect_branch_table_label_no
[o
]; i
++)
16803 char label_start
[32];
16805 ASM_GENERATE_INTERNAL_LABEL (label_start
,
16806 indirect_branch_table_label
[o
], i
);
16808 fputs ("\t.long\t", asm_out_file
);
16809 assemble_name_raw (asm_out_file
, label_start
);
16810 fputs ("-.\n", asm_out_file
);
16812 switch_to_section (current_function_section ());
16817 /* Implement the TARGET_CASE_VALUES_THRESHOLD target hook. */
16820 s390_case_values_threshold (void)
16822 /* Disabling branch prediction for indirect jumps makes jump tables
16823 much more expensive. */
16824 if (TARGET_INDIRECT_BRANCH_NOBP_JUMP
)
16827 return default_case_values_threshold ();
16830 /* Evaluate the insns between HEAD and TAIL and do back-end to install
16831 back-end specific dependencies.
16833 Establish an ANTI dependency between r11 and r15 restores from FPRs
16834 to prevent the instructions scheduler from reordering them since
16835 this would break CFI. No further handling in the sched_reorder
16836 hook is required since the r11 and r15 restore will never appear in
16837 the same ready list with that change. */
16839 s390_sched_dependencies_evaluation (rtx_insn
*head
, rtx_insn
*tail
)
16841 if (!frame_pointer_needed
|| !epilogue_completed
)
16844 while (head
!= tail
&& DEBUG_INSN_P (head
))
16845 head
= NEXT_INSN (head
);
16847 rtx_insn
*r15_restore
= NULL
, *r11_restore
= NULL
;
16849 for (rtx_insn
*insn
= tail
; insn
!= head
; insn
= PREV_INSN (insn
))
16851 rtx set
= single_set (insn
);
16853 || !RTX_FRAME_RELATED_P (insn
)
16855 || !REG_P (SET_DEST (set
))
16856 || !FP_REG_P (SET_SRC (set
)))
16859 if (REGNO (SET_DEST (set
)) == HARD_FRAME_POINTER_REGNUM
)
16860 r11_restore
= insn
;
16862 if (REGNO (SET_DEST (set
)) == STACK_POINTER_REGNUM
)
16863 r15_restore
= insn
;
16866 if (r11_restore
== NULL
|| r15_restore
== NULL
)
16868 add_dependence (r11_restore
, r15_restore
, REG_DEP_ANTI
);
16871 /* Implement TARGET_SHIFT_TRUNCATION_MASK for integer shifts. */
16873 static unsigned HOST_WIDE_INT
16874 s390_shift_truncation_mask (machine_mode mode
)
16876 return mode
== DImode
|| mode
== SImode
? 63 : 0;
16879 /* Return TRUE iff CONSTRAINT is an "f" constraint, possibly with additional
16883 f_constraint_p (const char *constraint
)
16885 bool seen_f_p
= false;
16886 bool seen_v_p
= false;
16888 for (size_t i
= 0, c_len
= strlen (constraint
); i
< c_len
;
16889 i
+= CONSTRAINT_LEN (constraint
[i
], constraint
+ i
))
16891 if (constraint
[i
] == 'f')
16893 if (constraint
[i
] == 'v')
16897 /* Treat "fv" constraints as "v", because LRA will choose the widest register
16899 return seen_f_p
&& !seen_v_p
;
16902 /* Return TRUE iff X is a hard floating-point (and not a vector) register. */
16905 s390_hard_fp_reg_p (rtx x
)
16907 if (!(REG_P (x
) && HARD_REGISTER_P (x
) && REG_ATTRS (x
)))
16910 tree decl
= REG_EXPR (x
);
16911 if (!(HAS_DECL_ASSEMBLER_NAME_P (decl
) && DECL_ASSEMBLER_NAME_SET_P (decl
)))
16914 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
));
16916 return name
[0] == '*' && name
[1] == 'f';
16919 /* Implement TARGET_MD_ASM_ADJUST hook in order to fix up "f"
16920 constraints when long doubles are stored in vector registers. */
16923 s390_md_asm_adjust (vec
<rtx
> &outputs
, vec
<rtx
> &inputs
,
16924 vec
<machine_mode
> &input_modes
,
16925 vec
<const char *> &constraints
, vec
<rtx
> & /*clobbers*/,
16926 HARD_REG_SET
& /*clobbered_regs*/, location_t
/*loc*/)
16929 /* Long doubles are stored in FPR pairs - nothing to do. */
16932 rtx_insn
*after_md_seq
= NULL
, *after_md_end
= NULL
;
16934 unsigned ninputs
= inputs
.length ();
16935 unsigned noutputs
= outputs
.length ();
16936 for (unsigned i
= 0; i
< noutputs
; i
++)
16938 if (GET_MODE (outputs
[i
]) != TFmode
)
16939 /* Not a long double - nothing to do. */
16941 const char *constraint
= constraints
[i
];
16942 bool allows_mem
, allows_reg
, is_inout
;
16943 bool ok
= parse_output_constraint (&constraint
, i
, ninputs
, noutputs
,
16944 &allows_mem
, &allows_reg
, &is_inout
);
16946 if (!f_constraint_p (constraint
))
16947 /* Long double with a constraint other than "=f" - nothing to do. */
16949 gcc_assert (allows_reg
);
16950 gcc_assert (!is_inout
);
16951 /* Copy output value from a FPR pair into a vector register. */
16953 push_to_sequence2 (after_md_seq
, after_md_end
);
16954 if (s390_hard_fp_reg_p (outputs
[i
]))
16956 fprx2
= gen_rtx_REG (FPRX2mode
, REGNO (outputs
[i
]));
16957 /* The first half is already at the correct location, copy only the
16958 * second one. Use the UNSPEC pattern instead of the SUBREG one,
16959 * since s390_can_change_mode_class() rejects
16960 * (subreg:DF (reg:TF %fN) 8) and thus subreg validation fails. */
16961 rtx v1
= gen_rtx_REG (V2DFmode
, REGNO (outputs
[i
]));
16962 rtx v3
= gen_rtx_REG (V2DFmode
, REGNO (outputs
[i
]) + 1);
16963 emit_insn (gen_vec_permiv2df (v1
, v1
, v3
, const0_rtx
));
16967 fprx2
= gen_reg_rtx (FPRX2mode
);
16968 emit_insn (gen_fprx2_to_tf (outputs
[i
], fprx2
));
16970 after_md_seq
= get_insns ();
16971 after_md_end
= get_last_insn ();
16973 outputs
[i
] = fprx2
;
16976 for (unsigned i
= 0; i
< ninputs
; i
++)
16978 if (GET_MODE (inputs
[i
]) != TFmode
)
16979 /* Not a long double - nothing to do. */
16981 const char *constraint
= constraints
[noutputs
+ i
];
16982 bool allows_mem
, allows_reg
;
16983 bool ok
= parse_input_constraint (&constraint
, i
, ninputs
, noutputs
, 0,
16984 constraints
.address (), &allows_mem
,
16987 if (!f_constraint_p (constraint
))
16988 /* Long double with a constraint other than "f" (or "=f" for inout
16989 operands) - nothing to do. */
16991 gcc_assert (allows_reg
);
16992 /* Copy input value from a vector register into a FPR pair. */
16994 if (s390_hard_fp_reg_p (inputs
[i
]))
16996 fprx2
= gen_rtx_REG (FPRX2mode
, REGNO (inputs
[i
]));
16997 /* Copy only the second half. */
16998 rtx v1
= gen_rtx_REG (V2DFmode
, REGNO (inputs
[i
]) + 1);
16999 rtx v2
= gen_rtx_REG (V2DFmode
, REGNO (inputs
[i
]));
17000 emit_insn (gen_vec_permiv2df (v1
, v2
, v1
, GEN_INT (3)));
17004 fprx2
= gen_reg_rtx (FPRX2mode
);
17005 emit_insn (gen_tf_to_fprx2 (fprx2
, inputs
[i
]));
17008 input_modes
[i
] = FPRX2mode
;
17011 return after_md_seq
;
17014 #define MAX_VECT_LEN 16
17016 struct expand_vec_perm_d
17018 rtx target
, op0
, op1
;
17019 unsigned char perm
[MAX_VECT_LEN
];
17020 machine_mode vmode
;
17021 unsigned char nelt
;
17025 /* Try to expand the vector permute operation described by D using the
17026 vector merge instructions vml and vmh. Return true if vector merge
17029 expand_perm_with_merge (const struct expand_vec_perm_d
&d
)
17031 bool merge_lo_p
= true;
17032 bool merge_hi_p
= true;
17037 // For V4SI this checks for: { 0, 4, 1, 5 }
17038 for (int telt
= 0; telt
< d
.nelt
; telt
++)
17039 if (d
.perm
[telt
] != telt
/ 2 + (telt
% 2) * d
.nelt
)
17041 merge_hi_p
= false;
17047 // For V4SI this checks for: { 2, 6, 3, 7 }
17048 for (int telt
= 0; telt
< d
.nelt
; telt
++)
17049 if (d
.perm
[telt
] != (telt
+ d
.nelt
) / 2 + (telt
% 2) * d
.nelt
)
17051 merge_lo_p
= false;
17056 merge_lo_p
= false;
17059 return merge_lo_p
|| merge_hi_p
;
17061 if (merge_lo_p
|| merge_hi_p
)
17062 s390_expand_merge (d
.target
, d
.op0
, d
.op1
, merge_hi_p
);
17064 return merge_lo_p
|| merge_hi_p
;
17067 /* Try to expand the vector permute operation described by D using the
17068 vector permute doubleword immediate instruction vpdi. Return true
17069 if vpdi could be used.
17071 VPDI allows 4 different immediate values (0, 1, 4, 5). The 0 and 5
17072 cases are covered by vmrhg and vmrlg already. So we only care
17073 about the 1, 4 cases here.
17074 1 - First element of src1 and second of src2
17075 4 - Second element of src1 and first of src2 */
17077 expand_perm_with_vpdi (const struct expand_vec_perm_d
&d
)
17079 bool vpdi1_p
= false;
17080 bool vpdi4_p
= false;
17081 rtx op0_reg
, op1_reg
;
17083 // Only V2DI and V2DF are supported here.
17087 if (d
.perm
[0] == 0 && d
.perm
[1] == 3)
17090 if (d
.perm
[0] == 1 && d
.perm
[1] == 2)
17093 if (!vpdi1_p
&& !vpdi4_p
)
17099 op0_reg
= force_reg (GET_MODE (d
.op0
), d
.op0
);
17100 op1_reg
= force_reg (GET_MODE (d
.op1
), d
.op1
);
17103 emit_insn (gen_vpdi1 (d
.vmode
, d
.target
, op0_reg
, op1_reg
));
17106 emit_insn (gen_vpdi4 (d
.vmode
, d
.target
, op0_reg
, op1_reg
));
17111 /* Try to find the best sequence for the vector permute operation
17112 described by D. Return true if the operation could be
17115 vectorize_vec_perm_const_1 (const struct expand_vec_perm_d
&d
)
17117 if (expand_perm_with_merge (d
))
17120 if (expand_perm_with_vpdi (d
))
17126 /* Return true if we can emit instructions for the constant
17127 permutation vector in SEL. If OUTPUT, IN0, IN1 are non-null the
17128 hook is supposed to emit the required INSNs. */
17131 s390_vectorize_vec_perm_const (machine_mode vmode
, rtx target
, rtx op0
, rtx op1
,
17132 const vec_perm_indices
&sel
)
17134 struct expand_vec_perm_d d
;
17135 unsigned int i
, nelt
;
17137 if (!s390_vector_mode_supported_p (vmode
) || GET_MODE_SIZE (vmode
) != 16)
17145 gcc_assert (VECTOR_MODE_P (d
.vmode
));
17146 d
.nelt
= nelt
= GET_MODE_NUNITS (d
.vmode
);
17147 d
.testing_p
= target
== NULL_RTX
;
17149 gcc_assert (target
== NULL_RTX
|| REG_P (target
));
17150 gcc_assert (sel
.length () == nelt
);
17152 for (i
= 0; i
< nelt
; i
++)
17154 unsigned char e
= sel
[i
];
17155 gcc_assert (e
< 2 * nelt
);
17159 return vectorize_vec_perm_const_1 (d
);
17162 /* Initialize GCC target structure. */
17164 #undef TARGET_ASM_ALIGNED_HI_OP
17165 #define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
17166 #undef TARGET_ASM_ALIGNED_DI_OP
17167 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
17168 #undef TARGET_ASM_INTEGER
17169 #define TARGET_ASM_INTEGER s390_assemble_integer
17171 #undef TARGET_ASM_OPEN_PAREN
17172 #define TARGET_ASM_OPEN_PAREN ""
17174 #undef TARGET_ASM_CLOSE_PAREN
17175 #define TARGET_ASM_CLOSE_PAREN ""
17177 #undef TARGET_OPTION_OVERRIDE
17178 #define TARGET_OPTION_OVERRIDE s390_option_override
17180 #ifdef TARGET_THREAD_SSP_OFFSET
17181 #undef TARGET_STACK_PROTECT_GUARD
17182 #define TARGET_STACK_PROTECT_GUARD hook_tree_void_null
17185 #undef TARGET_ENCODE_SECTION_INFO
17186 #define TARGET_ENCODE_SECTION_INFO s390_encode_section_info
17188 #undef TARGET_SCALAR_MODE_SUPPORTED_P
17189 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
17192 #undef TARGET_HAVE_TLS
17193 #define TARGET_HAVE_TLS true
17195 #undef TARGET_CANNOT_FORCE_CONST_MEM
17196 #define TARGET_CANNOT_FORCE_CONST_MEM s390_cannot_force_const_mem
17198 #undef TARGET_DELEGITIMIZE_ADDRESS
17199 #define TARGET_DELEGITIMIZE_ADDRESS s390_delegitimize_address
17201 #undef TARGET_LEGITIMIZE_ADDRESS
17202 #define TARGET_LEGITIMIZE_ADDRESS s390_legitimize_address
17204 #undef TARGET_RETURN_IN_MEMORY
17205 #define TARGET_RETURN_IN_MEMORY s390_return_in_memory
17207 #undef TARGET_INIT_BUILTINS
17208 #define TARGET_INIT_BUILTINS s390_init_builtins
17209 #undef TARGET_EXPAND_BUILTIN
17210 #define TARGET_EXPAND_BUILTIN s390_expand_builtin
17211 #undef TARGET_BUILTIN_DECL
17212 #define TARGET_BUILTIN_DECL s390_builtin_decl
17214 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
17215 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA s390_output_addr_const_extra
17217 #undef TARGET_ASM_OUTPUT_MI_THUNK
17218 #define TARGET_ASM_OUTPUT_MI_THUNK s390_output_mi_thunk
17219 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
17220 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
17222 #if ENABLE_S390_EXCESS_FLOAT_PRECISION == 1
17223 /* This hook is only needed to maintain the historic behavior with glibc
17224 versions that typedef float_t to double. */
17225 #undef TARGET_C_EXCESS_PRECISION
17226 #define TARGET_C_EXCESS_PRECISION s390_excess_precision
17229 #undef TARGET_SCHED_ADJUST_PRIORITY
17230 #define TARGET_SCHED_ADJUST_PRIORITY s390_adjust_priority
17231 #undef TARGET_SCHED_ISSUE_RATE
17232 #define TARGET_SCHED_ISSUE_RATE s390_issue_rate
17233 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
17234 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD s390_first_cycle_multipass_dfa_lookahead
17236 #undef TARGET_SCHED_VARIABLE_ISSUE
17237 #define TARGET_SCHED_VARIABLE_ISSUE s390_sched_variable_issue
17238 #undef TARGET_SCHED_REORDER
17239 #define TARGET_SCHED_REORDER s390_sched_reorder
17240 #undef TARGET_SCHED_INIT
17241 #define TARGET_SCHED_INIT s390_sched_init
17243 #undef TARGET_CANNOT_COPY_INSN_P
17244 #define TARGET_CANNOT_COPY_INSN_P s390_cannot_copy_insn_p
17245 #undef TARGET_RTX_COSTS
17246 #define TARGET_RTX_COSTS s390_rtx_costs
17247 #undef TARGET_ADDRESS_COST
17248 #define TARGET_ADDRESS_COST s390_address_cost
17249 #undef TARGET_REGISTER_MOVE_COST
17250 #define TARGET_REGISTER_MOVE_COST s390_register_move_cost
17251 #undef TARGET_MEMORY_MOVE_COST
17252 #define TARGET_MEMORY_MOVE_COST s390_memory_move_cost
17253 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
17254 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
17255 s390_builtin_vectorization_cost
17257 #undef TARGET_MACHINE_DEPENDENT_REORG
17258 #define TARGET_MACHINE_DEPENDENT_REORG s390_reorg
17260 #undef TARGET_VALID_POINTER_MODE
17261 #define TARGET_VALID_POINTER_MODE s390_valid_pointer_mode
17263 #undef TARGET_BUILD_BUILTIN_VA_LIST
17264 #define TARGET_BUILD_BUILTIN_VA_LIST s390_build_builtin_va_list
17265 #undef TARGET_EXPAND_BUILTIN_VA_START
17266 #define TARGET_EXPAND_BUILTIN_VA_START s390_va_start
17267 #undef TARGET_ASAN_SHADOW_OFFSET
17268 #define TARGET_ASAN_SHADOW_OFFSET s390_asan_shadow_offset
17269 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
17270 #define TARGET_GIMPLIFY_VA_ARG_EXPR s390_gimplify_va_arg
17272 #undef TARGET_PROMOTE_FUNCTION_MODE
17273 #define TARGET_PROMOTE_FUNCTION_MODE s390_promote_function_mode
17274 #undef TARGET_PASS_BY_REFERENCE
17275 #define TARGET_PASS_BY_REFERENCE s390_pass_by_reference
17277 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
17278 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE s390_override_options_after_change
17280 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
17281 #define TARGET_FUNCTION_OK_FOR_SIBCALL s390_function_ok_for_sibcall
17282 #undef TARGET_FUNCTION_ARG
17283 #define TARGET_FUNCTION_ARG s390_function_arg
17284 #undef TARGET_FUNCTION_ARG_ADVANCE
17285 #define TARGET_FUNCTION_ARG_ADVANCE s390_function_arg_advance
17286 #undef TARGET_FUNCTION_ARG_PADDING
17287 #define TARGET_FUNCTION_ARG_PADDING s390_function_arg_padding
17288 #undef TARGET_FUNCTION_VALUE
17289 #define TARGET_FUNCTION_VALUE s390_function_value
17290 #undef TARGET_LIBCALL_VALUE
17291 #define TARGET_LIBCALL_VALUE s390_libcall_value
17292 #undef TARGET_STRICT_ARGUMENT_NAMING
17293 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
17295 #undef TARGET_KEEP_LEAF_WHEN_PROFILED
17296 #define TARGET_KEEP_LEAF_WHEN_PROFILED s390_keep_leaf_when_profiled
17298 #undef TARGET_FIXED_CONDITION_CODE_REGS
17299 #define TARGET_FIXED_CONDITION_CODE_REGS s390_fixed_condition_code_regs
17301 #undef TARGET_CC_MODES_COMPATIBLE
17302 #define TARGET_CC_MODES_COMPATIBLE s390_cc_modes_compatible
17304 #undef TARGET_INVALID_WITHIN_DOLOOP
17305 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null
17308 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
17309 #define TARGET_ASM_OUTPUT_DWARF_DTPREL s390_output_dwarf_dtprel
17312 #undef TARGET_DWARF_FRAME_REG_MODE
17313 #define TARGET_DWARF_FRAME_REG_MODE s390_dwarf_frame_reg_mode
17315 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
17316 #undef TARGET_MANGLE_TYPE
17317 #define TARGET_MANGLE_TYPE s390_mangle_type
17320 #undef TARGET_SCALAR_MODE_SUPPORTED_P
17321 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
17323 #undef TARGET_VECTOR_MODE_SUPPORTED_P
17324 #define TARGET_VECTOR_MODE_SUPPORTED_P s390_vector_mode_supported_p
17326 #undef TARGET_PREFERRED_RELOAD_CLASS
17327 #define TARGET_PREFERRED_RELOAD_CLASS s390_preferred_reload_class
17329 #undef TARGET_SECONDARY_RELOAD
17330 #define TARGET_SECONDARY_RELOAD s390_secondary_reload
17331 #undef TARGET_SECONDARY_MEMORY_NEEDED
17332 #define TARGET_SECONDARY_MEMORY_NEEDED s390_secondary_memory_needed
17333 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
17334 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE s390_secondary_memory_needed_mode
17336 #undef TARGET_LIBGCC_CMP_RETURN_MODE
17337 #define TARGET_LIBGCC_CMP_RETURN_MODE s390_libgcc_cmp_return_mode
17339 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
17340 #define TARGET_LIBGCC_SHIFT_COUNT_MODE s390_libgcc_shift_count_mode
17342 #undef TARGET_LEGITIMATE_ADDRESS_P
17343 #define TARGET_LEGITIMATE_ADDRESS_P s390_legitimate_address_p
17345 #undef TARGET_LEGITIMATE_CONSTANT_P
17346 #define TARGET_LEGITIMATE_CONSTANT_P s390_legitimate_constant_p
17348 #undef TARGET_LRA_P
17349 #define TARGET_LRA_P s390_lra_p
17351 #undef TARGET_CAN_ELIMINATE
17352 #define TARGET_CAN_ELIMINATE s390_can_eliminate
17354 #undef TARGET_CONDITIONAL_REGISTER_USAGE
17355 #define TARGET_CONDITIONAL_REGISTER_USAGE s390_conditional_register_usage
17357 #undef TARGET_LOOP_UNROLL_ADJUST
17358 #define TARGET_LOOP_UNROLL_ADJUST s390_loop_unroll_adjust
17360 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
17361 #define TARGET_ASM_TRAMPOLINE_TEMPLATE s390_asm_trampoline_template
17362 #undef TARGET_TRAMPOLINE_INIT
17363 #define TARGET_TRAMPOLINE_INIT s390_trampoline_init
17366 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
17367 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
17369 #undef TARGET_UNWIND_WORD_MODE
17370 #define TARGET_UNWIND_WORD_MODE s390_unwind_word_mode
17372 #undef TARGET_CANONICALIZE_COMPARISON
17373 #define TARGET_CANONICALIZE_COMPARISON s390_canonicalize_comparison
17375 #undef TARGET_HARD_REGNO_SCRATCH_OK
17376 #define TARGET_HARD_REGNO_SCRATCH_OK s390_hard_regno_scratch_ok
17378 #undef TARGET_HARD_REGNO_NREGS
17379 #define TARGET_HARD_REGNO_NREGS s390_hard_regno_nregs
17380 #undef TARGET_HARD_REGNO_MODE_OK
17381 #define TARGET_HARD_REGNO_MODE_OK s390_hard_regno_mode_ok
17382 #undef TARGET_MODES_TIEABLE_P
17383 #define TARGET_MODES_TIEABLE_P s390_modes_tieable_p
17385 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
17386 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
17387 s390_hard_regno_call_part_clobbered
17389 #undef TARGET_ATTRIBUTE_TABLE
17390 #define TARGET_ATTRIBUTE_TABLE s390_attribute_table
17392 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
17393 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
17395 #undef TARGET_SET_UP_BY_PROLOGUE
17396 #define TARGET_SET_UP_BY_PROLOGUE s300_set_up_by_prologue
17398 #undef TARGET_EXTRA_LIVE_ON_ENTRY
17399 #define TARGET_EXTRA_LIVE_ON_ENTRY s390_live_on_entry
17401 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
17402 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
17403 s390_use_by_pieces_infrastructure_p
17405 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
17406 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV s390_atomic_assign_expand_fenv
17408 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
17409 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN s390_invalid_arg_for_unprototyped_fn
17411 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
17412 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE s390_preferred_simd_mode
17414 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
17415 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT s390_support_vector_misalignment
17417 #undef TARGET_VECTOR_ALIGNMENT
17418 #define TARGET_VECTOR_ALIGNMENT s390_vector_alignment
17420 #undef TARGET_INVALID_BINARY_OP
17421 #define TARGET_INVALID_BINARY_OP s390_invalid_binary_op
17423 #ifdef HAVE_AS_MACHINE_MACHINEMODE
17424 #undef TARGET_ASM_FILE_START
17425 #define TARGET_ASM_FILE_START s390_asm_file_start
17428 #undef TARGET_ASM_FILE_END
17429 #define TARGET_ASM_FILE_END s390_asm_file_end
17431 #undef TARGET_SET_CURRENT_FUNCTION
17432 #define TARGET_SET_CURRENT_FUNCTION s390_set_current_function
17434 #if S390_USE_TARGET_ATTRIBUTE
17435 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
17436 #define TARGET_OPTION_VALID_ATTRIBUTE_P s390_valid_target_attribute_p
17438 #undef TARGET_CAN_INLINE_P
17439 #define TARGET_CAN_INLINE_P s390_can_inline_p
17442 #undef TARGET_OPTION_RESTORE
17443 #define TARGET_OPTION_RESTORE s390_function_specific_restore
17445 #undef TARGET_CAN_CHANGE_MODE_CLASS
17446 #define TARGET_CAN_CHANGE_MODE_CLASS s390_can_change_mode_class
17448 #undef TARGET_CONSTANT_ALIGNMENT
17449 #define TARGET_CONSTANT_ALIGNMENT s390_constant_alignment
17451 #undef TARGET_ASM_CODE_END
17452 #define TARGET_ASM_CODE_END s390_code_end
17454 #undef TARGET_CASE_VALUES_THRESHOLD
17455 #define TARGET_CASE_VALUES_THRESHOLD s390_case_values_threshold
17457 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
17458 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
17459 s390_sched_dependencies_evaluation
17461 #undef TARGET_SHIFT_TRUNCATION_MASK
17462 #define TARGET_SHIFT_TRUNCATION_MASK s390_shift_truncation_mask
17464 /* Use only short displacement, since long displacement is not available for
17465 the floating point instructions. */
17466 #undef TARGET_MAX_ANCHOR_OFFSET
17467 #define TARGET_MAX_ANCHOR_OFFSET 0xfff
17469 #undef TARGET_MD_ASM_ADJUST
17470 #define TARGET_MD_ASM_ADJUST s390_md_asm_adjust
17472 #undef TARGET_VECTORIZE_VEC_PERM_CONST
17473 #define TARGET_VECTORIZE_VEC_PERM_CONST s390_vectorize_vec_perm_const
17476 struct gcc_target targetm
= TARGET_INITIALIZER
;
17478 #include "gt-s390.h"