]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/s390/s390.c
2015-07-07 Andrew MacLeod <amacleod@redhat.com>
[thirdparty/gcc.git] / gcc / config / s390 / s390.c
1 /* Subroutines used for code generation on IBM S/390 and zSeries
2 Copyright (C) 1999-2015 Free Software Foundation, Inc.
3 Contributed by Hartmut Penner (hpenner@de.ibm.com) and
4 Ulrich Weigand (uweigand@de.ibm.com) and
5 Andreas Krebbel (Andreas.Krebbel@de.ibm.com).
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "rtl.h"
30 #include "df.h"
31 #include "alias.h"
32 #include "fold-const.h"
33 #include "print-tree.h"
34 #include "stringpool.h"
35 #include "stor-layout.h"
36 #include "varasm.h"
37 #include "calls.h"
38 #include "tm_p.h"
39 #include "regs.h"
40 #include "insn-config.h"
41 #include "conditions.h"
42 #include "output.h"
43 #include "insn-attr.h"
44 #include "flags.h"
45 #include "except.h"
46 #include "recog.h"
47 #include "expmed.h"
48 #include "dojump.h"
49 #include "explow.h"
50 #include "emit-rtl.h"
51 #include "stmt.h"
52 #include "expr.h"
53 #include "reload.h"
54 #include "diagnostic-core.h"
55 #include "cfgrtl.h"
56 #include "cfganal.h"
57 #include "lcm.h"
58 #include "cfgbuild.h"
59 #include "cfgcleanup.h"
60 #include "target.h"
61 #include "debug.h"
62 #include "langhooks.h"
63 #include "insn-codes.h"
64 #include "optabs.h"
65 #include "internal-fn.h"
66 #include "gimple-fold.h"
67 #include "tree-eh.h"
68 #include "gimplify.h"
69 #include "params.h"
70 #include "cfgloop.h"
71 #include "opts.h"
72 #include "tree-pass.h"
73 #include "context.h"
74 #include "builtins.h"
75 #include "rtl-iter.h"
76 #include "intl.h"
77 #include "cgraph.h"
78
79 /* This file should be included last. */
80 #include "target-def.h"
81
82 /* Define the specific costs for a given cpu. */
83
84 struct processor_costs
85 {
86 /* multiplication */
87 const int m; /* cost of an M instruction. */
88 const int mghi; /* cost of an MGHI instruction. */
89 const int mh; /* cost of an MH instruction. */
90 const int mhi; /* cost of an MHI instruction. */
91 const int ml; /* cost of an ML instruction. */
92 const int mr; /* cost of an MR instruction. */
93 const int ms; /* cost of an MS instruction. */
94 const int msg; /* cost of an MSG instruction. */
95 const int msgf; /* cost of an MSGF instruction. */
96 const int msgfr; /* cost of an MSGFR instruction. */
97 const int msgr; /* cost of an MSGR instruction. */
98 const int msr; /* cost of an MSR instruction. */
99 const int mult_df; /* cost of multiplication in DFmode. */
100 const int mxbr;
101 /* square root */
102 const int sqxbr; /* cost of square root in TFmode. */
103 const int sqdbr; /* cost of square root in DFmode. */
104 const int sqebr; /* cost of square root in SFmode. */
105 /* multiply and add */
106 const int madbr; /* cost of multiply and add in DFmode. */
107 const int maebr; /* cost of multiply and add in SFmode. */
108 /* division */
109 const int dxbr;
110 const int ddbr;
111 const int debr;
112 const int dlgr;
113 const int dlr;
114 const int dr;
115 const int dsgfr;
116 const int dsgr;
117 };
118
119 const struct processor_costs *s390_cost;
120
121 static const
122 struct processor_costs z900_cost =
123 {
124 COSTS_N_INSNS (5), /* M */
125 COSTS_N_INSNS (10), /* MGHI */
126 COSTS_N_INSNS (5), /* MH */
127 COSTS_N_INSNS (4), /* MHI */
128 COSTS_N_INSNS (5), /* ML */
129 COSTS_N_INSNS (5), /* MR */
130 COSTS_N_INSNS (4), /* MS */
131 COSTS_N_INSNS (15), /* MSG */
132 COSTS_N_INSNS (7), /* MSGF */
133 COSTS_N_INSNS (7), /* MSGFR */
134 COSTS_N_INSNS (10), /* MSGR */
135 COSTS_N_INSNS (4), /* MSR */
136 COSTS_N_INSNS (7), /* multiplication in DFmode */
137 COSTS_N_INSNS (13), /* MXBR */
138 COSTS_N_INSNS (136), /* SQXBR */
139 COSTS_N_INSNS (44), /* SQDBR */
140 COSTS_N_INSNS (35), /* SQEBR */
141 COSTS_N_INSNS (18), /* MADBR */
142 COSTS_N_INSNS (13), /* MAEBR */
143 COSTS_N_INSNS (134), /* DXBR */
144 COSTS_N_INSNS (30), /* DDBR */
145 COSTS_N_INSNS (27), /* DEBR */
146 COSTS_N_INSNS (220), /* DLGR */
147 COSTS_N_INSNS (34), /* DLR */
148 COSTS_N_INSNS (34), /* DR */
149 COSTS_N_INSNS (32), /* DSGFR */
150 COSTS_N_INSNS (32), /* DSGR */
151 };
152
153 static const
154 struct processor_costs z990_cost =
155 {
156 COSTS_N_INSNS (4), /* M */
157 COSTS_N_INSNS (2), /* MGHI */
158 COSTS_N_INSNS (2), /* MH */
159 COSTS_N_INSNS (2), /* MHI */
160 COSTS_N_INSNS (4), /* ML */
161 COSTS_N_INSNS (4), /* MR */
162 COSTS_N_INSNS (5), /* MS */
163 COSTS_N_INSNS (6), /* MSG */
164 COSTS_N_INSNS (4), /* MSGF */
165 COSTS_N_INSNS (4), /* MSGFR */
166 COSTS_N_INSNS (4), /* MSGR */
167 COSTS_N_INSNS (4), /* MSR */
168 COSTS_N_INSNS (1), /* multiplication in DFmode */
169 COSTS_N_INSNS (28), /* MXBR */
170 COSTS_N_INSNS (130), /* SQXBR */
171 COSTS_N_INSNS (66), /* SQDBR */
172 COSTS_N_INSNS (38), /* SQEBR */
173 COSTS_N_INSNS (1), /* MADBR */
174 COSTS_N_INSNS (1), /* MAEBR */
175 COSTS_N_INSNS (60), /* DXBR */
176 COSTS_N_INSNS (40), /* DDBR */
177 COSTS_N_INSNS (26), /* DEBR */
178 COSTS_N_INSNS (176), /* DLGR */
179 COSTS_N_INSNS (31), /* DLR */
180 COSTS_N_INSNS (31), /* DR */
181 COSTS_N_INSNS (31), /* DSGFR */
182 COSTS_N_INSNS (31), /* DSGR */
183 };
184
185 static const
186 struct processor_costs z9_109_cost =
187 {
188 COSTS_N_INSNS (4), /* M */
189 COSTS_N_INSNS (2), /* MGHI */
190 COSTS_N_INSNS (2), /* MH */
191 COSTS_N_INSNS (2), /* MHI */
192 COSTS_N_INSNS (4), /* ML */
193 COSTS_N_INSNS (4), /* MR */
194 COSTS_N_INSNS (5), /* MS */
195 COSTS_N_INSNS (6), /* MSG */
196 COSTS_N_INSNS (4), /* MSGF */
197 COSTS_N_INSNS (4), /* MSGFR */
198 COSTS_N_INSNS (4), /* MSGR */
199 COSTS_N_INSNS (4), /* MSR */
200 COSTS_N_INSNS (1), /* multiplication in DFmode */
201 COSTS_N_INSNS (28), /* MXBR */
202 COSTS_N_INSNS (130), /* SQXBR */
203 COSTS_N_INSNS (66), /* SQDBR */
204 COSTS_N_INSNS (38), /* SQEBR */
205 COSTS_N_INSNS (1), /* MADBR */
206 COSTS_N_INSNS (1), /* MAEBR */
207 COSTS_N_INSNS (60), /* DXBR */
208 COSTS_N_INSNS (40), /* DDBR */
209 COSTS_N_INSNS (26), /* DEBR */
210 COSTS_N_INSNS (30), /* DLGR */
211 COSTS_N_INSNS (23), /* DLR */
212 COSTS_N_INSNS (23), /* DR */
213 COSTS_N_INSNS (24), /* DSGFR */
214 COSTS_N_INSNS (24), /* DSGR */
215 };
216
217 static const
218 struct processor_costs z10_cost =
219 {
220 COSTS_N_INSNS (10), /* M */
221 COSTS_N_INSNS (10), /* MGHI */
222 COSTS_N_INSNS (10), /* MH */
223 COSTS_N_INSNS (10), /* MHI */
224 COSTS_N_INSNS (10), /* ML */
225 COSTS_N_INSNS (10), /* MR */
226 COSTS_N_INSNS (10), /* MS */
227 COSTS_N_INSNS (10), /* MSG */
228 COSTS_N_INSNS (10), /* MSGF */
229 COSTS_N_INSNS (10), /* MSGFR */
230 COSTS_N_INSNS (10), /* MSGR */
231 COSTS_N_INSNS (10), /* MSR */
232 COSTS_N_INSNS (1) , /* multiplication in DFmode */
233 COSTS_N_INSNS (50), /* MXBR */
234 COSTS_N_INSNS (120), /* SQXBR */
235 COSTS_N_INSNS (52), /* SQDBR */
236 COSTS_N_INSNS (38), /* SQEBR */
237 COSTS_N_INSNS (1), /* MADBR */
238 COSTS_N_INSNS (1), /* MAEBR */
239 COSTS_N_INSNS (111), /* DXBR */
240 COSTS_N_INSNS (39), /* DDBR */
241 COSTS_N_INSNS (32), /* DEBR */
242 COSTS_N_INSNS (160), /* DLGR */
243 COSTS_N_INSNS (71), /* DLR */
244 COSTS_N_INSNS (71), /* DR */
245 COSTS_N_INSNS (71), /* DSGFR */
246 COSTS_N_INSNS (71), /* DSGR */
247 };
248
249 static const
250 struct processor_costs z196_cost =
251 {
252 COSTS_N_INSNS (7), /* M */
253 COSTS_N_INSNS (5), /* MGHI */
254 COSTS_N_INSNS (5), /* MH */
255 COSTS_N_INSNS (5), /* MHI */
256 COSTS_N_INSNS (7), /* ML */
257 COSTS_N_INSNS (7), /* MR */
258 COSTS_N_INSNS (6), /* MS */
259 COSTS_N_INSNS (8), /* MSG */
260 COSTS_N_INSNS (6), /* MSGF */
261 COSTS_N_INSNS (6), /* MSGFR */
262 COSTS_N_INSNS (8), /* MSGR */
263 COSTS_N_INSNS (6), /* MSR */
264 COSTS_N_INSNS (1) , /* multiplication in DFmode */
265 COSTS_N_INSNS (40), /* MXBR B+40 */
266 COSTS_N_INSNS (100), /* SQXBR B+100 */
267 COSTS_N_INSNS (42), /* SQDBR B+42 */
268 COSTS_N_INSNS (28), /* SQEBR B+28 */
269 COSTS_N_INSNS (1), /* MADBR B */
270 COSTS_N_INSNS (1), /* MAEBR B */
271 COSTS_N_INSNS (101), /* DXBR B+101 */
272 COSTS_N_INSNS (29), /* DDBR */
273 COSTS_N_INSNS (22), /* DEBR */
274 COSTS_N_INSNS (160), /* DLGR cracked */
275 COSTS_N_INSNS (160), /* DLR cracked */
276 COSTS_N_INSNS (160), /* DR expanded */
277 COSTS_N_INSNS (160), /* DSGFR cracked */
278 COSTS_N_INSNS (160), /* DSGR cracked */
279 };
280
281 static const
282 struct processor_costs zEC12_cost =
283 {
284 COSTS_N_INSNS (7), /* M */
285 COSTS_N_INSNS (5), /* MGHI */
286 COSTS_N_INSNS (5), /* MH */
287 COSTS_N_INSNS (5), /* MHI */
288 COSTS_N_INSNS (7), /* ML */
289 COSTS_N_INSNS (7), /* MR */
290 COSTS_N_INSNS (6), /* MS */
291 COSTS_N_INSNS (8), /* MSG */
292 COSTS_N_INSNS (6), /* MSGF */
293 COSTS_N_INSNS (6), /* MSGFR */
294 COSTS_N_INSNS (8), /* MSGR */
295 COSTS_N_INSNS (6), /* MSR */
296 COSTS_N_INSNS (1) , /* multiplication in DFmode */
297 COSTS_N_INSNS (40), /* MXBR B+40 */
298 COSTS_N_INSNS (100), /* SQXBR B+100 */
299 COSTS_N_INSNS (42), /* SQDBR B+42 */
300 COSTS_N_INSNS (28), /* SQEBR B+28 */
301 COSTS_N_INSNS (1), /* MADBR B */
302 COSTS_N_INSNS (1), /* MAEBR B */
303 COSTS_N_INSNS (131), /* DXBR B+131 */
304 COSTS_N_INSNS (29), /* DDBR */
305 COSTS_N_INSNS (22), /* DEBR */
306 COSTS_N_INSNS (160), /* DLGR cracked */
307 COSTS_N_INSNS (160), /* DLR cracked */
308 COSTS_N_INSNS (160), /* DR expanded */
309 COSTS_N_INSNS (160), /* DSGFR cracked */
310 COSTS_N_INSNS (160), /* DSGR cracked */
311 };
312
313 extern int reload_completed;
314
315 /* Kept up to date using the SCHED_VARIABLE_ISSUE hook. */
316 static rtx_insn *last_scheduled_insn;
317
318 /* Structure used to hold the components of a S/390 memory
319 address. A legitimate address on S/390 is of the general
320 form
321 base + index + displacement
322 where any of the components is optional.
323
324 base and index are registers of the class ADDR_REGS,
325 displacement is an unsigned 12-bit immediate constant. */
326
327 struct s390_address
328 {
329 rtx base;
330 rtx indx;
331 rtx disp;
332 bool pointer;
333 bool literal_pool;
334 };
335
336 /* The following structure is embedded in the machine
337 specific part of struct function. */
338
339 struct GTY (()) s390_frame_layout
340 {
341 /* Offset within stack frame. */
342 HOST_WIDE_INT gprs_offset;
343 HOST_WIDE_INT f0_offset;
344 HOST_WIDE_INT f4_offset;
345 HOST_WIDE_INT f8_offset;
346 HOST_WIDE_INT backchain_offset;
347
348 /* Number of first and last gpr where slots in the register
349 save area are reserved for. */
350 int first_save_gpr_slot;
351 int last_save_gpr_slot;
352
353 /* Location (FP register number) where GPRs (r0-r15) should
354 be saved to.
355 0 - does not need to be saved at all
356 -1 - stack slot */
357 signed char gpr_save_slots[16];
358
359 /* Number of first and last gpr to be saved, restored. */
360 int first_save_gpr;
361 int first_restore_gpr;
362 int last_save_gpr;
363 int last_restore_gpr;
364
365 /* Bits standing for floating point registers. Set, if the
366 respective register has to be saved. Starting with reg 16 (f0)
367 at the rightmost bit.
368 Bit 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
369 fpr 15 13 11 9 14 12 10 8 7 5 3 1 6 4 2 0
370 reg 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 */
371 unsigned int fpr_bitmap;
372
373 /* Number of floating point registers f8-f15 which must be saved. */
374 int high_fprs;
375
376 /* Set if return address needs to be saved.
377 This flag is set by s390_return_addr_rtx if it could not use
378 the initial value of r14 and therefore depends on r14 saved
379 to the stack. */
380 bool save_return_addr_p;
381
382 /* Size of stack frame. */
383 HOST_WIDE_INT frame_size;
384 };
385
386 /* Define the structure for the machine field in struct function. */
387
388 struct GTY(()) machine_function
389 {
390 struct s390_frame_layout frame_layout;
391
392 /* Literal pool base register. */
393 rtx base_reg;
394
395 /* True if we may need to perform branch splitting. */
396 bool split_branches_pending_p;
397
398 bool has_landing_pad_p;
399
400 /* True if the current function may contain a tbegin clobbering
401 FPRs. */
402 bool tbegin_p;
403 };
404
405 /* Few accessor macros for struct cfun->machine->s390_frame_layout. */
406
407 #define cfun_frame_layout (cfun->machine->frame_layout)
408 #define cfun_save_high_fprs_p (!!cfun_frame_layout.high_fprs)
409 #define cfun_save_arg_fprs_p (!!(TARGET_64BIT \
410 ? cfun_frame_layout.fpr_bitmap & 0x0f \
411 : cfun_frame_layout.fpr_bitmap & 0x03))
412 #define cfun_gprs_save_area_size ((cfun_frame_layout.last_save_gpr_slot - \
413 cfun_frame_layout.first_save_gpr_slot + 1) * UNITS_PER_LONG)
414 #define cfun_set_fpr_save(REGNO) (cfun->machine->frame_layout.fpr_bitmap |= \
415 (1 << (REGNO - FPR0_REGNUM)))
416 #define cfun_fpr_save_p(REGNO) (!!(cfun->machine->frame_layout.fpr_bitmap & \
417 (1 << (REGNO - FPR0_REGNUM))))
418 #define cfun_gpr_save_slot(REGNO) \
419 cfun->machine->frame_layout.gpr_save_slots[REGNO]
420
421 /* Number of GPRs and FPRs used for argument passing. */
422 #define GP_ARG_NUM_REG 5
423 #define FP_ARG_NUM_REG (TARGET_64BIT? 4 : 2)
424 #define VEC_ARG_NUM_REG 8
425
426 /* A couple of shortcuts. */
427 #define CONST_OK_FOR_J(x) \
428 CONST_OK_FOR_CONSTRAINT_P((x), 'J', "J")
429 #define CONST_OK_FOR_K(x) \
430 CONST_OK_FOR_CONSTRAINT_P((x), 'K', "K")
431 #define CONST_OK_FOR_Os(x) \
432 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Os")
433 #define CONST_OK_FOR_Op(x) \
434 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Op")
435 #define CONST_OK_FOR_On(x) \
436 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "On")
437
438 #define REGNO_PAIR_OK(REGNO, MODE) \
439 (HARD_REGNO_NREGS ((REGNO), (MODE)) == 1 || !((REGNO) & 1))
440
441 /* That's the read ahead of the dynamic branch prediction unit in
442 bytes on a z10 (or higher) CPU. */
443 #define PREDICT_DISTANCE (TARGET_Z10 ? 384 : 2048)
444
445
446 /* Indicate which ABI has been used for passing vector args.
447 0 - no vector type arguments have been passed where the ABI is relevant
448 1 - the old ABI has been used
449 2 - a vector type argument has been passed either in a vector register
450 or on the stack by value */
451 static int s390_vector_abi = 0;
452
453 /* Set the vector ABI marker if TYPE is subject to the vector ABI
454 switch. The vector ABI affects only vector data types. There are
455 two aspects of the vector ABI relevant here:
456
457 1. vectors >= 16 bytes have an alignment of 8 bytes with the new
458 ABI and natural alignment with the old.
459
460 2. vector <= 16 bytes are passed in VRs or by value on the stack
461 with the new ABI but by reference on the stack with the old.
462
463 If ARG_P is true TYPE is used for a function argument or return
464 value. The ABI marker then is set for all vector data types. If
465 ARG_P is false only type 1 vectors are being checked. */
466
467 static void
468 s390_check_type_for_vector_abi (const_tree type, bool arg_p, bool in_struct_p)
469 {
470 static hash_set<const_tree> visited_types_hash;
471
472 if (s390_vector_abi)
473 return;
474
475 if (type == NULL_TREE || TREE_CODE (type) == ERROR_MARK)
476 return;
477
478 if (visited_types_hash.contains (type))
479 return;
480
481 visited_types_hash.add (type);
482
483 if (VECTOR_TYPE_P (type))
484 {
485 int type_size = int_size_in_bytes (type);
486
487 /* Outside arguments only the alignment is changing and this
488 only happens for vector types >= 16 bytes. */
489 if (!arg_p && type_size < 16)
490 return;
491
492 /* In arguments vector types > 16 are passed as before (GCC
493 never enforced the bigger alignment for arguments which was
494 required by the old vector ABI). However, it might still be
495 ABI relevant due to the changed alignment if it is a struct
496 member. */
497 if (arg_p && type_size > 16 && !in_struct_p)
498 return;
499
500 s390_vector_abi = TARGET_VX_ABI ? 2 : 1;
501 }
502 else if (POINTER_TYPE_P (type) || TREE_CODE (type) == ARRAY_TYPE)
503 {
504 /* ARRAY_TYPE: Since with neither of the ABIs we have more than
505 natural alignment there will never be ABI dependent padding
506 in an array type. That's why we do not set in_struct_p to
507 true here. */
508 s390_check_type_for_vector_abi (TREE_TYPE (type), arg_p, in_struct_p);
509 }
510 else if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
511 {
512 tree arg_chain;
513
514 /* Check the return type. */
515 s390_check_type_for_vector_abi (TREE_TYPE (type), true, false);
516
517 for (arg_chain = TYPE_ARG_TYPES (type);
518 arg_chain;
519 arg_chain = TREE_CHAIN (arg_chain))
520 s390_check_type_for_vector_abi (TREE_VALUE (arg_chain), true, false);
521 }
522 else if (RECORD_OR_UNION_TYPE_P (type))
523 {
524 tree field;
525
526 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
527 {
528 if (TREE_CODE (field) != FIELD_DECL)
529 continue;
530
531 s390_check_type_for_vector_abi (TREE_TYPE (field), arg_p, true);
532 }
533 }
534 }
535
536
537 /* System z builtins. */
538
539 #include "s390-builtins.h"
540
541 const unsigned int bflags_builtin[S390_BUILTIN_MAX + 1] =
542 {
543 #undef B_DEF
544 #undef OB_DEF
545 #undef OB_DEF_VAR
546 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, ...) BFLAGS,
547 #define OB_DEF(...)
548 #define OB_DEF_VAR(...)
549 #include "s390-builtins.def"
550 0
551 };
552
553 const unsigned int opflags_builtin[S390_BUILTIN_MAX + 1] =
554 {
555 #undef B_DEF
556 #undef OB_DEF
557 #undef OB_DEF_VAR
558 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, ...) OPFLAGS,
559 #define OB_DEF(...)
560 #define OB_DEF_VAR(...)
561 #include "s390-builtins.def"
562 0
563 };
564
565 const unsigned int bflags_overloaded_builtin[S390_OVERLOADED_BUILTIN_MAX + 1] =
566 {
567 #undef B_DEF
568 #undef OB_DEF
569 #undef OB_DEF_VAR
570 #define B_DEF(...)
571 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, ...) BFLAGS,
572 #define OB_DEF_VAR(...)
573 #include "s390-builtins.def"
574 0
575 };
576
577 const unsigned int
578 opflags_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX + 1] =
579 {
580 #undef B_DEF
581 #undef OB_DEF
582 #undef OB_DEF_VAR
583 #define B_DEF(...)
584 #define OB_DEF(...)
585 #define OB_DEF_VAR(NAME, PATTERN, FLAGS, FNTYPE) FLAGS,
586 #include "s390-builtins.def"
587 0
588 };
589
590 tree s390_builtin_types[BT_MAX];
591 tree s390_builtin_fn_types[BT_FN_MAX];
592 tree s390_builtin_decls[S390_BUILTIN_MAX +
593 S390_OVERLOADED_BUILTIN_MAX +
594 S390_OVERLOADED_BUILTIN_VAR_MAX];
595
596 static enum insn_code const code_for_builtin[S390_BUILTIN_MAX + 1] = {
597 #undef B_DEF
598 #undef OB_DEF
599 #undef OB_DEF_VAR
600 #define B_DEF(NAME, PATTERN, ...) CODE_FOR_##PATTERN,
601 #define OB_DEF(...)
602 #define OB_DEF_VAR(...)
603
604 #include "s390-builtins.def"
605 CODE_FOR_nothing
606 };
607
608 static void
609 s390_init_builtins (void)
610 {
611 /* These definitions are being used in s390-builtins.def. */
612 tree returns_twice_attr = tree_cons (get_identifier ("returns_twice"),
613 NULL, NULL);
614 tree noreturn_attr = tree_cons (get_identifier ("noreturn"), NULL, NULL);
615 tree c_uint64_type_node;
616 unsigned int bflags_mask = (BFLAGS_MASK_INIT);
617
618 bflags_mask |= (TARGET_VX) ? B_VX : 0;
619 bflags_mask |= (TARGET_HTM) ? B_HTM : 0;
620
621 /* The uint64_type_node from tree.c is not compatible to the C99
622 uint64_t data type. What we want is c_uint64_type_node from
623 c-common.c. But since backend code is not supposed to interface
624 with the frontend we recreate it here. */
625 if (TARGET_64BIT)
626 c_uint64_type_node = long_unsigned_type_node;
627 else
628 c_uint64_type_node = long_long_unsigned_type_node;
629
630 #undef DEF_TYPE
631 #define DEF_TYPE(INDEX, BFLAGS, NODE, CONST_P) \
632 if ((BFLAGS) == 0 || ((BFLAGS) & bflags_mask)) \
633 s390_builtin_types[INDEX] = (!CONST_P) ? \
634 (NODE) : build_type_variant ((NODE), 1, 0);
635
636 #undef DEF_POINTER_TYPE
637 #define DEF_POINTER_TYPE(INDEX, BFLAGS, INDEX_BASE) \
638 if ((BFLAGS) == 0 || ((BFLAGS) & bflags_mask)) \
639 s390_builtin_types[INDEX] = \
640 build_pointer_type (s390_builtin_types[INDEX_BASE]);
641
642 #undef DEF_DISTINCT_TYPE
643 #define DEF_DISTINCT_TYPE(INDEX, BFLAGS, INDEX_BASE) \
644 if ((BFLAGS) == 0 || ((BFLAGS) & bflags_mask)) \
645 s390_builtin_types[INDEX] = \
646 build_distinct_type_copy (s390_builtin_types[INDEX_BASE]);
647
648 #undef DEF_VECTOR_TYPE
649 #define DEF_VECTOR_TYPE(INDEX, BFLAGS, INDEX_BASE, ELEMENTS) \
650 if ((BFLAGS) == 0 || ((BFLAGS) & bflags_mask)) \
651 s390_builtin_types[INDEX] = \
652 build_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
653
654 #undef DEF_OPAQUE_VECTOR_TYPE
655 #define DEF_OPAQUE_VECTOR_TYPE(INDEX, BFLAGS, INDEX_BASE, ELEMENTS) \
656 if ((BFLAGS) == 0 || ((BFLAGS) & bflags_mask)) \
657 s390_builtin_types[INDEX] = \
658 build_opaque_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
659
660 #undef DEF_FN_TYPE
661 #define DEF_FN_TYPE(INDEX, BFLAGS, args...) \
662 if ((BFLAGS) == 0 || ((BFLAGS) & bflags_mask)) \
663 s390_builtin_fn_types[INDEX] = \
664 build_function_type_list (args, NULL_TREE);
665 #undef DEF_OV_TYPE
666 #define DEF_OV_TYPE(...)
667 #include "s390-builtin-types.def"
668
669 #undef B_DEF
670 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, FNTYPE) \
671 if (((BFLAGS) & ~bflags_mask) == 0) \
672 s390_builtin_decls[S390_BUILTIN_##NAME] = \
673 add_builtin_function ("__builtin_" #NAME, \
674 s390_builtin_fn_types[FNTYPE], \
675 S390_BUILTIN_##NAME, \
676 BUILT_IN_MD, \
677 NULL, \
678 ATTRS);
679 #undef OB_DEF
680 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, FNTYPE) \
681 if (((BFLAGS) & ~bflags_mask) == 0) \
682 s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] = \
683 add_builtin_function ("__builtin_" #NAME, \
684 s390_builtin_fn_types[FNTYPE], \
685 S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX, \
686 BUILT_IN_MD, \
687 NULL, \
688 0);
689 #undef OB_DEF_VAR
690 #define OB_DEF_VAR(...)
691 #include "s390-builtins.def"
692
693 }
694
695 /* Return true if ARG is appropriate as argument number ARGNUM of
696 builtin DECL. The operand flags from s390-builtins.def have to
697 passed as OP_FLAGS. */
698 bool
699 s390_const_operand_ok (tree arg, int argnum, int op_flags, tree decl)
700 {
701 if (O_UIMM_P (op_flags))
702 {
703 int bitwidths[] = { 1, 2, 3, 4, 5, 8, 12, 16, 32 };
704 int bitwidth = bitwidths[op_flags - O_U1];
705
706 if (!tree_fits_uhwi_p (arg)
707 || tree_to_uhwi (arg) > ((unsigned HOST_WIDE_INT)1 << bitwidth) - 1)
708 {
709 error("constant argument %d for builtin %qF is out of range (0.."
710 HOST_WIDE_INT_PRINT_UNSIGNED ")",
711 argnum, decl,
712 ((unsigned HOST_WIDE_INT)1 << bitwidth) - 1);
713 return false;
714 }
715 }
716
717 if (O_SIMM_P (op_flags))
718 {
719 int bitwidths[] = { 2, 3, 4, 5, 8, 12, 16, 32 };
720 int bitwidth = bitwidths[op_flags - O_S2];
721
722 if (!tree_fits_shwi_p (arg)
723 || tree_to_shwi (arg) < -((HOST_WIDE_INT)1 << (bitwidth - 1))
724 || tree_to_shwi (arg) > (((HOST_WIDE_INT)1 << (bitwidth - 1)) - 1))
725 {
726 error("constant argument %d for builtin %qF is out of range ("
727 HOST_WIDE_INT_PRINT_DEC ".."
728 HOST_WIDE_INT_PRINT_DEC ")",
729 argnum, decl,
730 -(HOST_WIDE_INT)1 << (bitwidth - 1),
731 ((HOST_WIDE_INT)1 << (bitwidth - 1)) - 1);
732 return false;
733 }
734 }
735 return true;
736 }
737
738 /* Expand an expression EXP that calls a built-in function,
739 with result going to TARGET if that's convenient
740 (and in mode MODE if that's convenient).
741 SUBTARGET may be used as the target for computing one of EXP's operands.
742 IGNORE is nonzero if the value is to be ignored. */
743
744 static rtx
745 s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
746 machine_mode mode ATTRIBUTE_UNUSED,
747 int ignore ATTRIBUTE_UNUSED)
748 {
749 #define MAX_ARGS 5
750
751 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
752 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
753 enum insn_code icode;
754 rtx op[MAX_ARGS], pat;
755 int arity;
756 bool nonvoid;
757 tree arg;
758 call_expr_arg_iterator iter;
759 unsigned int all_op_flags = opflags_for_builtin (fcode);
760 machine_mode last_vec_mode = VOIDmode;
761
762 if (TARGET_DEBUG_ARG)
763 {
764 fprintf (stderr,
765 "s390_expand_builtin, code = %4d, %s\n",
766 (int)fcode, IDENTIFIER_POINTER (DECL_NAME (fndecl)));
767 }
768
769 if (fcode >= S390_OVERLOADED_BUILTIN_VAR_OFFSET
770 && fcode < S390_ALL_BUILTIN_MAX)
771 {
772 gcc_unreachable ();
773 }
774 else if (fcode < S390_OVERLOADED_BUILTIN_OFFSET)
775 {
776 icode = code_for_builtin[fcode];
777 /* Set a flag in the machine specific cfun part in order to support
778 saving/restoring of FPRs. */
779 if (fcode == S390_BUILTIN_tbegin || fcode == S390_BUILTIN_tbegin_retry)
780 cfun->machine->tbegin_p = true;
781 }
782 else if (fcode < S390_OVERLOADED_BUILTIN_VAR_OFFSET)
783 {
784 error ("Unresolved overloaded builtin");
785 return const0_rtx;
786 }
787 else
788 internal_error ("bad builtin fcode");
789
790 if (icode == 0)
791 internal_error ("bad builtin icode");
792
793 nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
794
795 if (nonvoid)
796 {
797 machine_mode tmode = insn_data[icode].operand[0].mode;
798 if (!target
799 || GET_MODE (target) != tmode
800 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
801 target = gen_reg_rtx (tmode);
802
803 /* There are builtins (e.g. vec_promote) with no vector
804 arguments but an element selector. So we have to also look
805 at the vector return type when emitting the modulo
806 operation. */
807 if (VECTOR_MODE_P (insn_data[icode].operand[0].mode))
808 last_vec_mode = insn_data[icode].operand[0].mode;
809 }
810
811 arity = 0;
812 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
813 {
814 const struct insn_operand_data *insn_op;
815 unsigned int op_flags = all_op_flags & ((1 << O_SHIFT) - 1);
816
817 all_op_flags = all_op_flags >> O_SHIFT;
818
819 if (arg == error_mark_node)
820 return NULL_RTX;
821 if (arity >= MAX_ARGS)
822 return NULL_RTX;
823
824 if (O_IMM_P (op_flags)
825 && TREE_CODE (arg) != INTEGER_CST)
826 {
827 error ("constant value required for builtin %qF argument %d",
828 fndecl, arity + 1);
829 return const0_rtx;
830 }
831
832 if (!s390_const_operand_ok (arg, arity + 1, op_flags, fndecl))
833 return const0_rtx;
834
835 insn_op = &insn_data[icode].operand[arity + nonvoid];
836 op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
837
838 /* Wrap the expanded RTX for pointer types into a MEM expr with
839 the proper mode. This allows us to use e.g. (match_operand
840 "memory_operand"..) in the insn patterns instead of (mem
841 (match_operand "address_operand)). This is helpful for
842 patterns not just accepting MEMs. */
843 if (POINTER_TYPE_P (TREE_TYPE (arg))
844 && insn_op->predicate != address_operand)
845 op[arity] = gen_rtx_MEM (insn_op->mode, op[arity]);
846
847 /* Expand the module operation required on element selectors. */
848 if (op_flags == O_ELEM)
849 {
850 gcc_assert (last_vec_mode != VOIDmode);
851 op[arity] = simplify_expand_binop (SImode, code_to_optab (AND),
852 op[arity],
853 GEN_INT (GET_MODE_NUNITS (last_vec_mode) - 1),
854 NULL_RTX, 1, OPTAB_DIRECT);
855 }
856
857 /* Record the vector mode used for an element selector. This assumes:
858 1. There is no builtin with two different vector modes and an element selector
859 2. The element selector comes after the vector type it is referring to.
860 This currently the true for all the builtins but FIXME we
861 should better check for that. */
862 if (VECTOR_MODE_P (insn_op->mode))
863 last_vec_mode = insn_op->mode;
864
865 if (insn_op->predicate (op[arity], insn_op->mode))
866 {
867 arity++;
868 continue;
869 }
870
871 if (MEM_P (op[arity])
872 && insn_op->predicate == memory_operand
873 && (GET_MODE (XEXP (op[arity], 0)) == Pmode
874 || GET_MODE (XEXP (op[arity], 0)) == VOIDmode))
875 {
876 op[arity] = replace_equiv_address (op[arity],
877 copy_to_mode_reg (Pmode,
878 XEXP (op[arity], 0)));
879 }
880 else if (GET_MODE (op[arity]) == insn_op->mode
881 || GET_MODE (op[arity]) == VOIDmode
882 || (insn_op->predicate == address_operand
883 && GET_MODE (op[arity]) == Pmode))
884 {
885 /* An address_operand usually has VOIDmode in the expander
886 so we cannot use this. */
887 machine_mode target_mode =
888 (insn_op->predicate == address_operand
889 ? Pmode : insn_op->mode);
890 op[arity] = copy_to_mode_reg (target_mode, op[arity]);
891 }
892
893 if (!insn_op->predicate (op[arity], insn_op->mode))
894 {
895 error ("Invalid argument %d for builtin %qF", arity + 1, fndecl);
896 return const0_rtx;
897 }
898 arity++;
899 }
900
901 if (last_vec_mode != VOIDmode && !TARGET_VX)
902 {
903 error ("Vector type builtin %qF is not supported without -mvx "
904 "(default with -march=z13).",
905 fndecl);
906 return const0_rtx;
907 }
908
909 switch (arity)
910 {
911 case 0:
912 pat = GEN_FCN (icode) (target);
913 break;
914 case 1:
915 if (nonvoid)
916 pat = GEN_FCN (icode) (target, op[0]);
917 else
918 pat = GEN_FCN (icode) (op[0]);
919 break;
920 case 2:
921 if (nonvoid)
922 pat = GEN_FCN (icode) (target, op[0], op[1]);
923 else
924 pat = GEN_FCN (icode) (op[0], op[1]);
925 break;
926 case 3:
927 if (nonvoid)
928 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
929 else
930 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
931 break;
932 case 4:
933 if (nonvoid)
934 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
935 else
936 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
937 break;
938 case 5:
939 if (nonvoid)
940 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
941 else
942 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
943 break;
944 case 6:
945 if (nonvoid)
946 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4], op[5]);
947 else
948 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]);
949 break;
950 default:
951 gcc_unreachable ();
952 }
953 if (!pat)
954 return NULL_RTX;
955 emit_insn (pat);
956
957 if (nonvoid)
958 return target;
959 else
960 return const0_rtx;
961 }
962
963
964 static const int s390_hotpatch_hw_max = 1000000;
965 static int s390_hotpatch_hw_before_label = 0;
966 static int s390_hotpatch_hw_after_label = 0;
967
968 /* Check whether the hotpatch attribute is applied to a function and, if it has
969 an argument, the argument is valid. */
970
971 static tree
972 s390_handle_hotpatch_attribute (tree *node, tree name, tree args,
973 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
974 {
975 tree expr;
976 tree expr2;
977 int err;
978
979 if (TREE_CODE (*node) != FUNCTION_DECL)
980 {
981 warning (OPT_Wattributes, "%qE attribute only applies to functions",
982 name);
983 *no_add_attrs = true;
984 }
985 if (args != NULL && TREE_CHAIN (args) != NULL)
986 {
987 expr = TREE_VALUE (args);
988 expr2 = TREE_VALUE (TREE_CHAIN (args));
989 }
990 if (args == NULL || TREE_CHAIN (args) == NULL)
991 err = 1;
992 else if (TREE_CODE (expr) != INTEGER_CST
993 || !INTEGRAL_TYPE_P (TREE_TYPE (expr))
994 || wi::gtu_p (expr, s390_hotpatch_hw_max))
995 err = 1;
996 else if (TREE_CODE (expr2) != INTEGER_CST
997 || !INTEGRAL_TYPE_P (TREE_TYPE (expr2))
998 || wi::gtu_p (expr2, s390_hotpatch_hw_max))
999 err = 1;
1000 else
1001 err = 0;
1002 if (err)
1003 {
1004 error ("requested %qE attribute is not a comma separated pair of"
1005 " non-negative integer constants or too large (max. %d)", name,
1006 s390_hotpatch_hw_max);
1007 *no_add_attrs = true;
1008 }
1009
1010 return NULL_TREE;
1011 }
1012
1013 /* Expand the s390_vector_bool type attribute. */
1014
1015 static tree
1016 s390_handle_vectorbool_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
1017 tree args ATTRIBUTE_UNUSED,
1018 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1019 {
1020 tree type = *node, result = NULL_TREE;
1021 machine_mode mode;
1022
1023 while (POINTER_TYPE_P (type)
1024 || TREE_CODE (type) == FUNCTION_TYPE
1025 || TREE_CODE (type) == METHOD_TYPE
1026 || TREE_CODE (type) == ARRAY_TYPE)
1027 type = TREE_TYPE (type);
1028
1029 mode = TYPE_MODE (type);
1030 switch (mode)
1031 {
1032 case DImode: case V2DImode: result = s390_builtin_types[BT_BV2DI]; break;
1033 case SImode: case V4SImode: result = s390_builtin_types[BT_BV4SI]; break;
1034 case HImode: case V8HImode: result = s390_builtin_types[BT_BV8HI]; break;
1035 case QImode: case V16QImode: result = s390_builtin_types[BT_BV16QI];
1036 default: break;
1037 }
1038
1039 *no_add_attrs = true; /* No need to hang on to the attribute. */
1040
1041 if (result)
1042 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
1043
1044 return NULL_TREE;
1045 }
1046
1047 static const struct attribute_spec s390_attribute_table[] = {
1048 { "hotpatch", 2, 2, true, false, false, s390_handle_hotpatch_attribute, false },
1049 { "s390_vector_bool", 0, 0, false, true, false, s390_handle_vectorbool_attribute, true },
1050 /* End element. */
1051 { NULL, 0, 0, false, false, false, NULL, false }
1052 };
1053
1054 /* Return the alignment for LABEL. We default to the -falign-labels
1055 value except for the literal pool base label. */
1056 int
1057 s390_label_align (rtx label)
1058 {
1059 rtx_insn *prev_insn = prev_active_insn (label);
1060 rtx set, src;
1061
1062 if (prev_insn == NULL_RTX)
1063 goto old;
1064
1065 set = single_set (prev_insn);
1066
1067 if (set == NULL_RTX)
1068 goto old;
1069
1070 src = SET_SRC (set);
1071
1072 /* Don't align literal pool base labels. */
1073 if (GET_CODE (src) == UNSPEC
1074 && XINT (src, 1) == UNSPEC_MAIN_BASE)
1075 return 0;
1076
1077 old:
1078 return align_labels_log;
1079 }
1080
1081 static machine_mode
1082 s390_libgcc_cmp_return_mode (void)
1083 {
1084 return TARGET_64BIT ? DImode : SImode;
1085 }
1086
1087 static machine_mode
1088 s390_libgcc_shift_count_mode (void)
1089 {
1090 return TARGET_64BIT ? DImode : SImode;
1091 }
1092
1093 static machine_mode
1094 s390_unwind_word_mode (void)
1095 {
1096 return TARGET_64BIT ? DImode : SImode;
1097 }
1098
1099 /* Return true if the back end supports mode MODE. */
1100 static bool
1101 s390_scalar_mode_supported_p (machine_mode mode)
1102 {
1103 /* In contrast to the default implementation reject TImode constants on 31bit
1104 TARGET_ZARCH for ABI compliance. */
1105 if (!TARGET_64BIT && TARGET_ZARCH && mode == TImode)
1106 return false;
1107
1108 if (DECIMAL_FLOAT_MODE_P (mode))
1109 return default_decimal_float_supported_p ();
1110
1111 return default_scalar_mode_supported_p (mode);
1112 }
1113
1114 /* Return true if the back end supports vector mode MODE. */
1115 static bool
1116 s390_vector_mode_supported_p (machine_mode mode)
1117 {
1118 machine_mode inner;
1119
1120 if (!VECTOR_MODE_P (mode)
1121 || !TARGET_VX
1122 || GET_MODE_SIZE (mode) > 16)
1123 return false;
1124
1125 inner = GET_MODE_INNER (mode);
1126
1127 switch (inner)
1128 {
1129 case QImode:
1130 case HImode:
1131 case SImode:
1132 case DImode:
1133 case TImode:
1134 case SFmode:
1135 case DFmode:
1136 case TFmode:
1137 return true;
1138 default:
1139 return false;
1140 }
1141 }
1142
1143 /* Set the has_landing_pad_p flag in struct machine_function to VALUE. */
1144
1145 void
1146 s390_set_has_landing_pad_p (bool value)
1147 {
1148 cfun->machine->has_landing_pad_p = value;
1149 }
1150
1151 /* If two condition code modes are compatible, return a condition code
1152 mode which is compatible with both. Otherwise, return
1153 VOIDmode. */
1154
1155 static machine_mode
1156 s390_cc_modes_compatible (machine_mode m1, machine_mode m2)
1157 {
1158 if (m1 == m2)
1159 return m1;
1160
1161 switch (m1)
1162 {
1163 case CCZmode:
1164 if (m2 == CCUmode || m2 == CCTmode || m2 == CCZ1mode
1165 || m2 == CCSmode || m2 == CCSRmode || m2 == CCURmode)
1166 return m2;
1167 return VOIDmode;
1168
1169 case CCSmode:
1170 case CCUmode:
1171 case CCTmode:
1172 case CCSRmode:
1173 case CCURmode:
1174 case CCZ1mode:
1175 if (m2 == CCZmode)
1176 return m1;
1177
1178 return VOIDmode;
1179
1180 default:
1181 return VOIDmode;
1182 }
1183 return VOIDmode;
1184 }
1185
1186 /* Return true if SET either doesn't set the CC register, or else
1187 the source and destination have matching CC modes and that
1188 CC mode is at least as constrained as REQ_MODE. */
1189
1190 static bool
1191 s390_match_ccmode_set (rtx set, machine_mode req_mode)
1192 {
1193 machine_mode set_mode;
1194
1195 gcc_assert (GET_CODE (set) == SET);
1196
1197 if (GET_CODE (SET_DEST (set)) != REG || !CC_REGNO_P (REGNO (SET_DEST (set))))
1198 return 1;
1199
1200 set_mode = GET_MODE (SET_DEST (set));
1201 switch (set_mode)
1202 {
1203 case CCSmode:
1204 case CCSRmode:
1205 case CCUmode:
1206 case CCURmode:
1207 case CCLmode:
1208 case CCL1mode:
1209 case CCL2mode:
1210 case CCL3mode:
1211 case CCT1mode:
1212 case CCT2mode:
1213 case CCT3mode:
1214 case CCVEQmode:
1215 case CCVHmode:
1216 case CCVHUmode:
1217 case CCVFHmode:
1218 case CCVFHEmode:
1219 if (req_mode != set_mode)
1220 return 0;
1221 break;
1222
1223 case CCZmode:
1224 if (req_mode != CCSmode && req_mode != CCUmode && req_mode != CCTmode
1225 && req_mode != CCSRmode && req_mode != CCURmode)
1226 return 0;
1227 break;
1228
1229 case CCAPmode:
1230 case CCANmode:
1231 if (req_mode != CCAmode)
1232 return 0;
1233 break;
1234
1235 default:
1236 gcc_unreachable ();
1237 }
1238
1239 return (GET_MODE (SET_SRC (set)) == set_mode);
1240 }
1241
1242 /* Return true if every SET in INSN that sets the CC register
1243 has source and destination with matching CC modes and that
1244 CC mode is at least as constrained as REQ_MODE.
1245 If REQ_MODE is VOIDmode, always return false. */
1246
1247 bool
1248 s390_match_ccmode (rtx_insn *insn, machine_mode req_mode)
1249 {
1250 int i;
1251
1252 /* s390_tm_ccmode returns VOIDmode to indicate failure. */
1253 if (req_mode == VOIDmode)
1254 return false;
1255
1256 if (GET_CODE (PATTERN (insn)) == SET)
1257 return s390_match_ccmode_set (PATTERN (insn), req_mode);
1258
1259 if (GET_CODE (PATTERN (insn)) == PARALLEL)
1260 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
1261 {
1262 rtx set = XVECEXP (PATTERN (insn), 0, i);
1263 if (GET_CODE (set) == SET)
1264 if (!s390_match_ccmode_set (set, req_mode))
1265 return false;
1266 }
1267
1268 return true;
1269 }
1270
1271 /* If a test-under-mask instruction can be used to implement
1272 (compare (and ... OP1) OP2), return the CC mode required
1273 to do that. Otherwise, return VOIDmode.
1274 MIXED is true if the instruction can distinguish between
1275 CC1 and CC2 for mixed selected bits (TMxx), it is false
1276 if the instruction cannot (TM). */
1277
1278 machine_mode
1279 s390_tm_ccmode (rtx op1, rtx op2, bool mixed)
1280 {
1281 int bit0, bit1;
1282
1283 /* ??? Fixme: should work on CONST_DOUBLE as well. */
1284 if (GET_CODE (op1) != CONST_INT || GET_CODE (op2) != CONST_INT)
1285 return VOIDmode;
1286
1287 /* Selected bits all zero: CC0.
1288 e.g.: int a; if ((a & (16 + 128)) == 0) */
1289 if (INTVAL (op2) == 0)
1290 return CCTmode;
1291
1292 /* Selected bits all one: CC3.
1293 e.g.: int a; if ((a & (16 + 128)) == 16 + 128) */
1294 if (INTVAL (op2) == INTVAL (op1))
1295 return CCT3mode;
1296
1297 /* Exactly two bits selected, mixed zeroes and ones: CC1 or CC2. e.g.:
1298 int a;
1299 if ((a & (16 + 128)) == 16) -> CCT1
1300 if ((a & (16 + 128)) == 128) -> CCT2 */
1301 if (mixed)
1302 {
1303 bit1 = exact_log2 (INTVAL (op2));
1304 bit0 = exact_log2 (INTVAL (op1) ^ INTVAL (op2));
1305 if (bit0 != -1 && bit1 != -1)
1306 return bit0 > bit1 ? CCT1mode : CCT2mode;
1307 }
1308
1309 return VOIDmode;
1310 }
1311
1312 /* Given a comparison code OP (EQ, NE, etc.) and the operands
1313 OP0 and OP1 of a COMPARE, return the mode to be used for the
1314 comparison. */
1315
1316 machine_mode
1317 s390_select_ccmode (enum rtx_code code, rtx op0, rtx op1)
1318 {
1319 if (TARGET_VX
1320 && register_operand (op0, DFmode)
1321 && register_operand (op1, DFmode))
1322 {
1323 /* LT, LE, UNGT, UNGE require swapping OP0 and OP1. Either
1324 s390_emit_compare or s390_canonicalize_comparison will take
1325 care of it. */
1326 switch (code)
1327 {
1328 case EQ:
1329 case NE:
1330 return CCVEQmode;
1331 case GT:
1332 case UNLE:
1333 return CCVFHmode;
1334 case GE:
1335 case UNLT:
1336 return CCVFHEmode;
1337 default:
1338 ;
1339 }
1340 }
1341
1342 switch (code)
1343 {
1344 case EQ:
1345 case NE:
1346 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
1347 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1348 return CCAPmode;
1349 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
1350 && CONST_OK_FOR_K (INTVAL (XEXP (op0, 1))))
1351 return CCAPmode;
1352 if ((GET_CODE (op0) == PLUS || GET_CODE (op0) == MINUS
1353 || GET_CODE (op1) == NEG)
1354 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1355 return CCLmode;
1356
1357 if (GET_CODE (op0) == AND)
1358 {
1359 /* Check whether we can potentially do it via TM. */
1360 machine_mode ccmode;
1361 ccmode = s390_tm_ccmode (XEXP (op0, 1), op1, 1);
1362 if (ccmode != VOIDmode)
1363 {
1364 /* Relax CCTmode to CCZmode to allow fall-back to AND
1365 if that turns out to be beneficial. */
1366 return ccmode == CCTmode ? CCZmode : ccmode;
1367 }
1368 }
1369
1370 if (register_operand (op0, HImode)
1371 && GET_CODE (op1) == CONST_INT
1372 && (INTVAL (op1) == -1 || INTVAL (op1) == 65535))
1373 return CCT3mode;
1374 if (register_operand (op0, QImode)
1375 && GET_CODE (op1) == CONST_INT
1376 && (INTVAL (op1) == -1 || INTVAL (op1) == 255))
1377 return CCT3mode;
1378
1379 return CCZmode;
1380
1381 case LE:
1382 case LT:
1383 case GE:
1384 case GT:
1385 /* The only overflow condition of NEG and ABS happens when
1386 -INT_MAX is used as parameter, which stays negative. So
1387 we have an overflow from a positive value to a negative.
1388 Using CCAP mode the resulting cc can be used for comparisons. */
1389 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
1390 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1391 return CCAPmode;
1392
1393 /* If constants are involved in an add instruction it is possible to use
1394 the resulting cc for comparisons with zero. Knowing the sign of the
1395 constant the overflow behavior gets predictable. e.g.:
1396 int a, b; if ((b = a + c) > 0)
1397 with c as a constant value: c < 0 -> CCAN and c >= 0 -> CCAP */
1398 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
1399 && (CONST_OK_FOR_K (INTVAL (XEXP (op0, 1)))
1400 || (CONST_OK_FOR_CONSTRAINT_P (INTVAL (XEXP (op0, 1)), 'O', "Os")
1401 /* Avoid INT32_MIN on 32 bit. */
1402 && (!TARGET_ZARCH || INTVAL (XEXP (op0, 1)) != -0x7fffffff - 1))))
1403 {
1404 if (INTVAL (XEXP((op0), 1)) < 0)
1405 return CCANmode;
1406 else
1407 return CCAPmode;
1408 }
1409 /* Fall through. */
1410 case UNORDERED:
1411 case ORDERED:
1412 case UNEQ:
1413 case UNLE:
1414 case UNLT:
1415 case UNGE:
1416 case UNGT:
1417 case LTGT:
1418 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1419 && GET_CODE (op1) != CONST_INT)
1420 return CCSRmode;
1421 return CCSmode;
1422
1423 case LTU:
1424 case GEU:
1425 if (GET_CODE (op0) == PLUS
1426 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1427 return CCL1mode;
1428
1429 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1430 && GET_CODE (op1) != CONST_INT)
1431 return CCURmode;
1432 return CCUmode;
1433
1434 case LEU:
1435 case GTU:
1436 if (GET_CODE (op0) == MINUS
1437 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1438 return CCL2mode;
1439
1440 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1441 && GET_CODE (op1) != CONST_INT)
1442 return CCURmode;
1443 return CCUmode;
1444
1445 default:
1446 gcc_unreachable ();
1447 }
1448 }
1449
1450 /* Replace the comparison OP0 CODE OP1 by a semantically equivalent one
1451 that we can implement more efficiently. */
1452
1453 static void
1454 s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
1455 bool op0_preserve_value)
1456 {
1457 if (op0_preserve_value)
1458 return;
1459
1460 /* Convert ZERO_EXTRACT back to AND to enable TM patterns. */
1461 if ((*code == EQ || *code == NE)
1462 && *op1 == const0_rtx
1463 && GET_CODE (*op0) == ZERO_EXTRACT
1464 && GET_CODE (XEXP (*op0, 1)) == CONST_INT
1465 && GET_CODE (XEXP (*op0, 2)) == CONST_INT
1466 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
1467 {
1468 rtx inner = XEXP (*op0, 0);
1469 HOST_WIDE_INT modesize = GET_MODE_BITSIZE (GET_MODE (inner));
1470 HOST_WIDE_INT len = INTVAL (XEXP (*op0, 1));
1471 HOST_WIDE_INT pos = INTVAL (XEXP (*op0, 2));
1472
1473 if (len > 0 && len < modesize
1474 && pos >= 0 && pos + len <= modesize
1475 && modesize <= HOST_BITS_PER_WIDE_INT)
1476 {
1477 unsigned HOST_WIDE_INT block;
1478 block = ((unsigned HOST_WIDE_INT) 1 << len) - 1;
1479 block <<= modesize - pos - len;
1480
1481 *op0 = gen_rtx_AND (GET_MODE (inner), inner,
1482 gen_int_mode (block, GET_MODE (inner)));
1483 }
1484 }
1485
1486 /* Narrow AND of memory against immediate to enable TM. */
1487 if ((*code == EQ || *code == NE)
1488 && *op1 == const0_rtx
1489 && GET_CODE (*op0) == AND
1490 && GET_CODE (XEXP (*op0, 1)) == CONST_INT
1491 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
1492 {
1493 rtx inner = XEXP (*op0, 0);
1494 rtx mask = XEXP (*op0, 1);
1495
1496 /* Ignore paradoxical SUBREGs if all extra bits are masked out. */
1497 if (GET_CODE (inner) == SUBREG
1498 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (inner)))
1499 && (GET_MODE_SIZE (GET_MODE (inner))
1500 >= GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1501 && ((INTVAL (mask)
1502 & GET_MODE_MASK (GET_MODE (inner))
1503 & ~GET_MODE_MASK (GET_MODE (SUBREG_REG (inner))))
1504 == 0))
1505 inner = SUBREG_REG (inner);
1506
1507 /* Do not change volatile MEMs. */
1508 if (MEM_P (inner) && !MEM_VOLATILE_P (inner))
1509 {
1510 int part = s390_single_part (XEXP (*op0, 1),
1511 GET_MODE (inner), QImode, 0);
1512 if (part >= 0)
1513 {
1514 mask = gen_int_mode (s390_extract_part (mask, QImode, 0), QImode);
1515 inner = adjust_address_nv (inner, QImode, part);
1516 *op0 = gen_rtx_AND (QImode, inner, mask);
1517 }
1518 }
1519 }
1520
1521 /* Narrow comparisons against 0xffff to HImode if possible. */
1522 if ((*code == EQ || *code == NE)
1523 && GET_CODE (*op1) == CONST_INT
1524 && INTVAL (*op1) == 0xffff
1525 && SCALAR_INT_MODE_P (GET_MODE (*op0))
1526 && (nonzero_bits (*op0, GET_MODE (*op0))
1527 & ~(unsigned HOST_WIDE_INT) 0xffff) == 0)
1528 {
1529 *op0 = gen_lowpart (HImode, *op0);
1530 *op1 = constm1_rtx;
1531 }
1532
1533 /* Remove redundant UNSPEC_STRCMPCC_TO_INT conversions if possible. */
1534 if (GET_CODE (*op0) == UNSPEC
1535 && XINT (*op0, 1) == UNSPEC_STRCMPCC_TO_INT
1536 && XVECLEN (*op0, 0) == 1
1537 && GET_MODE (XVECEXP (*op0, 0, 0)) == CCUmode
1538 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
1539 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
1540 && *op1 == const0_rtx)
1541 {
1542 enum rtx_code new_code = UNKNOWN;
1543 switch (*code)
1544 {
1545 case EQ: new_code = EQ; break;
1546 case NE: new_code = NE; break;
1547 case LT: new_code = GTU; break;
1548 case GT: new_code = LTU; break;
1549 case LE: new_code = GEU; break;
1550 case GE: new_code = LEU; break;
1551 default: break;
1552 }
1553
1554 if (new_code != UNKNOWN)
1555 {
1556 *op0 = XVECEXP (*op0, 0, 0);
1557 *code = new_code;
1558 }
1559 }
1560
1561 /* Remove redundant UNSPEC_CC_TO_INT conversions if possible. */
1562 if (GET_CODE (*op0) == UNSPEC
1563 && XINT (*op0, 1) == UNSPEC_CC_TO_INT
1564 && XVECLEN (*op0, 0) == 1
1565 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
1566 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
1567 && CONST_INT_P (*op1))
1568 {
1569 enum rtx_code new_code = UNKNOWN;
1570 switch (GET_MODE (XVECEXP (*op0, 0, 0)))
1571 {
1572 case CCZmode:
1573 case CCRAWmode:
1574 switch (*code)
1575 {
1576 case EQ: new_code = EQ; break;
1577 case NE: new_code = NE; break;
1578 default: break;
1579 }
1580 break;
1581 default: break;
1582 }
1583
1584 if (new_code != UNKNOWN)
1585 {
1586 /* For CCRAWmode put the required cc mask into the second
1587 operand. */
1588 if (GET_MODE (XVECEXP (*op0, 0, 0)) == CCRAWmode
1589 && INTVAL (*op1) >= 0 && INTVAL (*op1) <= 3)
1590 *op1 = gen_rtx_CONST_INT (VOIDmode, 1 << (3 - INTVAL (*op1)));
1591 *op0 = XVECEXP (*op0, 0, 0);
1592 *code = new_code;
1593 }
1594 }
1595
1596 /* Simplify cascaded EQ, NE with const0_rtx. */
1597 if ((*code == NE || *code == EQ)
1598 && (GET_CODE (*op0) == EQ || GET_CODE (*op0) == NE)
1599 && GET_MODE (*op0) == SImode
1600 && GET_MODE (XEXP (*op0, 0)) == CCZ1mode
1601 && REG_P (XEXP (*op0, 0))
1602 && XEXP (*op0, 1) == const0_rtx
1603 && *op1 == const0_rtx)
1604 {
1605 if ((*code == EQ && GET_CODE (*op0) == NE)
1606 || (*code == NE && GET_CODE (*op0) == EQ))
1607 *code = EQ;
1608 else
1609 *code = NE;
1610 *op0 = XEXP (*op0, 0);
1611 }
1612
1613 /* Prefer register over memory as first operand. */
1614 if (MEM_P (*op0) && REG_P (*op1))
1615 {
1616 rtx tem = *op0; *op0 = *op1; *op1 = tem;
1617 *code = (int)swap_condition ((enum rtx_code)*code);
1618 }
1619
1620 /* Using the scalar variants of vector instructions for 64 bit FP
1621 comparisons might require swapping the operands. */
1622 if (TARGET_VX
1623 && register_operand (*op0, DFmode)
1624 && register_operand (*op1, DFmode)
1625 && (*code == LT || *code == LE || *code == UNGT || *code == UNGE))
1626 {
1627 rtx tmp;
1628
1629 switch (*code)
1630 {
1631 case LT: *code = GT; break;
1632 case LE: *code = GE; break;
1633 case UNGT: *code = UNLE; break;
1634 case UNGE: *code = UNLT; break;
1635 default: ;
1636 }
1637 tmp = *op0; *op0 = *op1; *op1 = tmp;
1638 }
1639 }
1640
1641 /* Helper function for s390_emit_compare. If possible emit a 64 bit
1642 FP compare using the single element variant of vector instructions.
1643 Replace CODE with the comparison code to be used in the CC reg
1644 compare and return the condition code register RTX in CC. */
1645
1646 static bool
1647 s390_expand_vec_compare_scalar (enum rtx_code *code, rtx cmp1, rtx cmp2,
1648 rtx *cc)
1649 {
1650 machine_mode cmp_mode;
1651 bool swap_p = false;
1652
1653 switch (*code)
1654 {
1655 case EQ: cmp_mode = CCVEQmode; break;
1656 case NE: cmp_mode = CCVEQmode; break;
1657 case GT: cmp_mode = CCVFHmode; break;
1658 case GE: cmp_mode = CCVFHEmode; break;
1659 case UNLE: cmp_mode = CCVFHmode; break;
1660 case UNLT: cmp_mode = CCVFHEmode; break;
1661 case LT: cmp_mode = CCVFHmode; *code = GT; swap_p = true; break;
1662 case LE: cmp_mode = CCVFHEmode; *code = GE; swap_p = true; break;
1663 case UNGE: cmp_mode = CCVFHmode; *code = UNLE; swap_p = true; break;
1664 case UNGT: cmp_mode = CCVFHEmode; *code = UNLT; swap_p = true; break;
1665 default: return false;
1666 }
1667
1668 if (swap_p)
1669 {
1670 rtx tmp = cmp2;
1671 cmp2 = cmp1;
1672 cmp1 = tmp;
1673 }
1674 *cc = gen_rtx_REG (cmp_mode, CC_REGNUM);
1675 emit_insn (gen_rtx_PARALLEL (VOIDmode,
1676 gen_rtvec (2,
1677 gen_rtx_SET (*cc,
1678 gen_rtx_COMPARE (cmp_mode, cmp1,
1679 cmp2)),
1680 gen_rtx_CLOBBER (VOIDmode,
1681 gen_rtx_SCRATCH (V2DImode)))));
1682 return true;
1683 }
1684
1685
1686 /* Emit a compare instruction suitable to implement the comparison
1687 OP0 CODE OP1. Return the correct condition RTL to be placed in
1688 the IF_THEN_ELSE of the conditional branch testing the result. */
1689
1690 rtx
1691 s390_emit_compare (enum rtx_code code, rtx op0, rtx op1)
1692 {
1693 machine_mode mode = s390_select_ccmode (code, op0, op1);
1694 rtx cc;
1695
1696 if (TARGET_VX
1697 && register_operand (op0, DFmode)
1698 && register_operand (op1, DFmode)
1699 && s390_expand_vec_compare_scalar (&code, op0, op1, &cc))
1700 {
1701 /* Work has been done by s390_expand_vec_compare_scalar already. */
1702 }
1703 else if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
1704 {
1705 /* Do not output a redundant compare instruction if a
1706 compare_and_swap pattern already computed the result and the
1707 machine modes are compatible. */
1708 gcc_assert (s390_cc_modes_compatible (GET_MODE (op0), mode)
1709 == GET_MODE (op0));
1710 cc = op0;
1711 }
1712 else
1713 {
1714 cc = gen_rtx_REG (mode, CC_REGNUM);
1715 emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (mode, op0, op1)));
1716 }
1717
1718 return gen_rtx_fmt_ee (code, VOIDmode, cc, const0_rtx);
1719 }
1720
1721 /* Emit a SImode compare and swap instruction setting MEM to NEW_RTX if OLD
1722 matches CMP.
1723 Return the correct condition RTL to be placed in the IF_THEN_ELSE of the
1724 conditional branch testing the result. */
1725
1726 static rtx
1727 s390_emit_compare_and_swap (enum rtx_code code, rtx old, rtx mem,
1728 rtx cmp, rtx new_rtx)
1729 {
1730 emit_insn (gen_atomic_compare_and_swapsi_internal (old, mem, cmp, new_rtx));
1731 return s390_emit_compare (code, gen_rtx_REG (CCZ1mode, CC_REGNUM),
1732 const0_rtx);
1733 }
1734
1735 /* Emit a jump instruction to TARGET and return it. If COND is
1736 NULL_RTX, emit an unconditional jump, else a conditional jump under
1737 condition COND. */
1738
1739 rtx_insn *
1740 s390_emit_jump (rtx target, rtx cond)
1741 {
1742 rtx insn;
1743
1744 target = gen_rtx_LABEL_REF (VOIDmode, target);
1745 if (cond)
1746 target = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, target, pc_rtx);
1747
1748 insn = gen_rtx_SET (pc_rtx, target);
1749 return emit_jump_insn (insn);
1750 }
1751
1752 /* Return branch condition mask to implement a branch
1753 specified by CODE. Return -1 for invalid comparisons. */
1754
1755 int
1756 s390_branch_condition_mask (rtx code)
1757 {
1758 const int CC0 = 1 << 3;
1759 const int CC1 = 1 << 2;
1760 const int CC2 = 1 << 1;
1761 const int CC3 = 1 << 0;
1762
1763 gcc_assert (GET_CODE (XEXP (code, 0)) == REG);
1764 gcc_assert (REGNO (XEXP (code, 0)) == CC_REGNUM);
1765 gcc_assert (XEXP (code, 1) == const0_rtx
1766 || (GET_MODE (XEXP (code, 0)) == CCRAWmode
1767 && CONST_INT_P (XEXP (code, 1))));
1768
1769
1770 switch (GET_MODE (XEXP (code, 0)))
1771 {
1772 case CCZmode:
1773 case CCZ1mode:
1774 switch (GET_CODE (code))
1775 {
1776 case EQ: return CC0;
1777 case NE: return CC1 | CC2 | CC3;
1778 default: return -1;
1779 }
1780 break;
1781
1782 case CCT1mode:
1783 switch (GET_CODE (code))
1784 {
1785 case EQ: return CC1;
1786 case NE: return CC0 | CC2 | CC3;
1787 default: return -1;
1788 }
1789 break;
1790
1791 case CCT2mode:
1792 switch (GET_CODE (code))
1793 {
1794 case EQ: return CC2;
1795 case NE: return CC0 | CC1 | CC3;
1796 default: return -1;
1797 }
1798 break;
1799
1800 case CCT3mode:
1801 switch (GET_CODE (code))
1802 {
1803 case EQ: return CC3;
1804 case NE: return CC0 | CC1 | CC2;
1805 default: return -1;
1806 }
1807 break;
1808
1809 case CCLmode:
1810 switch (GET_CODE (code))
1811 {
1812 case EQ: return CC0 | CC2;
1813 case NE: return CC1 | CC3;
1814 default: return -1;
1815 }
1816 break;
1817
1818 case CCL1mode:
1819 switch (GET_CODE (code))
1820 {
1821 case LTU: return CC2 | CC3; /* carry */
1822 case GEU: return CC0 | CC1; /* no carry */
1823 default: return -1;
1824 }
1825 break;
1826
1827 case CCL2mode:
1828 switch (GET_CODE (code))
1829 {
1830 case GTU: return CC0 | CC1; /* borrow */
1831 case LEU: return CC2 | CC3; /* no borrow */
1832 default: return -1;
1833 }
1834 break;
1835
1836 case CCL3mode:
1837 switch (GET_CODE (code))
1838 {
1839 case EQ: return CC0 | CC2;
1840 case NE: return CC1 | CC3;
1841 case LTU: return CC1;
1842 case GTU: return CC3;
1843 case LEU: return CC1 | CC2;
1844 case GEU: return CC2 | CC3;
1845 default: return -1;
1846 }
1847
1848 case CCUmode:
1849 switch (GET_CODE (code))
1850 {
1851 case EQ: return CC0;
1852 case NE: return CC1 | CC2 | CC3;
1853 case LTU: return CC1;
1854 case GTU: return CC2;
1855 case LEU: return CC0 | CC1;
1856 case GEU: return CC0 | CC2;
1857 default: return -1;
1858 }
1859 break;
1860
1861 case CCURmode:
1862 switch (GET_CODE (code))
1863 {
1864 case EQ: return CC0;
1865 case NE: return CC2 | CC1 | CC3;
1866 case LTU: return CC2;
1867 case GTU: return CC1;
1868 case LEU: return CC0 | CC2;
1869 case GEU: return CC0 | CC1;
1870 default: return -1;
1871 }
1872 break;
1873
1874 case CCAPmode:
1875 switch (GET_CODE (code))
1876 {
1877 case EQ: return CC0;
1878 case NE: return CC1 | CC2 | CC3;
1879 case LT: return CC1 | CC3;
1880 case GT: return CC2;
1881 case LE: return CC0 | CC1 | CC3;
1882 case GE: return CC0 | CC2;
1883 default: return -1;
1884 }
1885 break;
1886
1887 case CCANmode:
1888 switch (GET_CODE (code))
1889 {
1890 case EQ: return CC0;
1891 case NE: return CC1 | CC2 | CC3;
1892 case LT: return CC1;
1893 case GT: return CC2 | CC3;
1894 case LE: return CC0 | CC1;
1895 case GE: return CC0 | CC2 | CC3;
1896 default: return -1;
1897 }
1898 break;
1899
1900 case CCSmode:
1901 switch (GET_CODE (code))
1902 {
1903 case EQ: return CC0;
1904 case NE: return CC1 | CC2 | CC3;
1905 case LT: return CC1;
1906 case GT: return CC2;
1907 case LE: return CC0 | CC1;
1908 case GE: return CC0 | CC2;
1909 case UNORDERED: return CC3;
1910 case ORDERED: return CC0 | CC1 | CC2;
1911 case UNEQ: return CC0 | CC3;
1912 case UNLT: return CC1 | CC3;
1913 case UNGT: return CC2 | CC3;
1914 case UNLE: return CC0 | CC1 | CC3;
1915 case UNGE: return CC0 | CC2 | CC3;
1916 case LTGT: return CC1 | CC2;
1917 default: return -1;
1918 }
1919 break;
1920
1921 case CCSRmode:
1922 switch (GET_CODE (code))
1923 {
1924 case EQ: return CC0;
1925 case NE: return CC2 | CC1 | CC3;
1926 case LT: return CC2;
1927 case GT: return CC1;
1928 case LE: return CC0 | CC2;
1929 case GE: return CC0 | CC1;
1930 case UNORDERED: return CC3;
1931 case ORDERED: return CC0 | CC2 | CC1;
1932 case UNEQ: return CC0 | CC3;
1933 case UNLT: return CC2 | CC3;
1934 case UNGT: return CC1 | CC3;
1935 case UNLE: return CC0 | CC2 | CC3;
1936 case UNGE: return CC0 | CC1 | CC3;
1937 case LTGT: return CC2 | CC1;
1938 default: return -1;
1939 }
1940 break;
1941
1942 /* Vector comparison modes. */
1943
1944 case CCVEQmode:
1945 switch (GET_CODE (code))
1946 {
1947 case EQ: return CC0;
1948 case NE: return CC3;
1949 default: return -1;
1950 }
1951
1952 case CCVEQANYmode:
1953 switch (GET_CODE (code))
1954 {
1955 case EQ: return CC0 | CC1;
1956 case NE: return CC3 | CC1;
1957 default: return -1;
1958 }
1959
1960 /* Integer vector compare modes. */
1961
1962 case CCVHmode:
1963 switch (GET_CODE (code))
1964 {
1965 case GT: return CC0;
1966 case LE: return CC3;
1967 default: return -1;
1968 }
1969
1970 case CCVHANYmode:
1971 switch (GET_CODE (code))
1972 {
1973 case GT: return CC0 | CC1;
1974 case LE: return CC3 | CC1;
1975 default: return -1;
1976 }
1977
1978 case CCVHUmode:
1979 switch (GET_CODE (code))
1980 {
1981 case GTU: return CC0;
1982 case LEU: return CC3;
1983 default: return -1;
1984 }
1985
1986 case CCVHUANYmode:
1987 switch (GET_CODE (code))
1988 {
1989 case GTU: return CC0 | CC1;
1990 case LEU: return CC3 | CC1;
1991 default: return -1;
1992 }
1993
1994 /* FP vector compare modes. */
1995
1996 case CCVFHmode:
1997 switch (GET_CODE (code))
1998 {
1999 case GT: return CC0;
2000 case UNLE: return CC3;
2001 default: return -1;
2002 }
2003
2004 case CCVFHANYmode:
2005 switch (GET_CODE (code))
2006 {
2007 case GT: return CC0 | CC1;
2008 case UNLE: return CC3 | CC1;
2009 default: return -1;
2010 }
2011
2012 case CCVFHEmode:
2013 switch (GET_CODE (code))
2014 {
2015 case GE: return CC0;
2016 case UNLT: return CC3;
2017 default: return -1;
2018 }
2019
2020 case CCVFHEANYmode:
2021 switch (GET_CODE (code))
2022 {
2023 case GE: return CC0 | CC1;
2024 case UNLT: return CC3 | CC1;
2025 default: return -1;
2026 }
2027
2028
2029 case CCRAWmode:
2030 switch (GET_CODE (code))
2031 {
2032 case EQ:
2033 return INTVAL (XEXP (code, 1));
2034 case NE:
2035 return (INTVAL (XEXP (code, 1))) ^ 0xf;
2036 default:
2037 gcc_unreachable ();
2038 }
2039
2040 default:
2041 return -1;
2042 }
2043 }
2044
2045
2046 /* Return branch condition mask to implement a compare and branch
2047 specified by CODE. Return -1 for invalid comparisons. */
2048
2049 int
2050 s390_compare_and_branch_condition_mask (rtx code)
2051 {
2052 const int CC0 = 1 << 3;
2053 const int CC1 = 1 << 2;
2054 const int CC2 = 1 << 1;
2055
2056 switch (GET_CODE (code))
2057 {
2058 case EQ:
2059 return CC0;
2060 case NE:
2061 return CC1 | CC2;
2062 case LT:
2063 case LTU:
2064 return CC1;
2065 case GT:
2066 case GTU:
2067 return CC2;
2068 case LE:
2069 case LEU:
2070 return CC0 | CC1;
2071 case GE:
2072 case GEU:
2073 return CC0 | CC2;
2074 default:
2075 gcc_unreachable ();
2076 }
2077 return -1;
2078 }
2079
2080 /* If INV is false, return assembler mnemonic string to implement
2081 a branch specified by CODE. If INV is true, return mnemonic
2082 for the corresponding inverted branch. */
2083
2084 static const char *
2085 s390_branch_condition_mnemonic (rtx code, int inv)
2086 {
2087 int mask;
2088
2089 static const char *const mnemonic[16] =
2090 {
2091 NULL, "o", "h", "nle",
2092 "l", "nhe", "lh", "ne",
2093 "e", "nlh", "he", "nl",
2094 "le", "nh", "no", NULL
2095 };
2096
2097 if (GET_CODE (XEXP (code, 0)) == REG
2098 && REGNO (XEXP (code, 0)) == CC_REGNUM
2099 && (XEXP (code, 1) == const0_rtx
2100 || (GET_MODE (XEXP (code, 0)) == CCRAWmode
2101 && CONST_INT_P (XEXP (code, 1)))))
2102 mask = s390_branch_condition_mask (code);
2103 else
2104 mask = s390_compare_and_branch_condition_mask (code);
2105
2106 gcc_assert (mask >= 0);
2107
2108 if (inv)
2109 mask ^= 15;
2110
2111 gcc_assert (mask >= 1 && mask <= 14);
2112
2113 return mnemonic[mask];
2114 }
2115
2116 /* Return the part of op which has a value different from def.
2117 The size of the part is determined by mode.
2118 Use this function only if you already know that op really
2119 contains such a part. */
2120
2121 unsigned HOST_WIDE_INT
2122 s390_extract_part (rtx op, machine_mode mode, int def)
2123 {
2124 unsigned HOST_WIDE_INT value = 0;
2125 int max_parts = HOST_BITS_PER_WIDE_INT / GET_MODE_BITSIZE (mode);
2126 int part_bits = GET_MODE_BITSIZE (mode);
2127 unsigned HOST_WIDE_INT part_mask
2128 = ((unsigned HOST_WIDE_INT)1 << part_bits) - 1;
2129 int i;
2130
2131 for (i = 0; i < max_parts; i++)
2132 {
2133 if (i == 0)
2134 value = (unsigned HOST_WIDE_INT) INTVAL (op);
2135 else
2136 value >>= part_bits;
2137
2138 if ((value & part_mask) != (def & part_mask))
2139 return value & part_mask;
2140 }
2141
2142 gcc_unreachable ();
2143 }
2144
2145 /* If OP is an integer constant of mode MODE with exactly one
2146 part of mode PART_MODE unequal to DEF, return the number of that
2147 part. Otherwise, return -1. */
2148
2149 int
2150 s390_single_part (rtx op,
2151 machine_mode mode,
2152 machine_mode part_mode,
2153 int def)
2154 {
2155 unsigned HOST_WIDE_INT value = 0;
2156 int n_parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (part_mode);
2157 unsigned HOST_WIDE_INT part_mask
2158 = ((unsigned HOST_WIDE_INT)1 << GET_MODE_BITSIZE (part_mode)) - 1;
2159 int i, part = -1;
2160
2161 if (GET_CODE (op) != CONST_INT)
2162 return -1;
2163
2164 for (i = 0; i < n_parts; i++)
2165 {
2166 if (i == 0)
2167 value = (unsigned HOST_WIDE_INT) INTVAL (op);
2168 else
2169 value >>= GET_MODE_BITSIZE (part_mode);
2170
2171 if ((value & part_mask) != (def & part_mask))
2172 {
2173 if (part != -1)
2174 return -1;
2175 else
2176 part = i;
2177 }
2178 }
2179 return part == -1 ? -1 : n_parts - 1 - part;
2180 }
2181
2182 /* Return true if IN contains a contiguous bitfield in the lower SIZE
2183 bits and no other bits are set in IN. POS and LENGTH can be used
2184 to obtain the start position and the length of the bitfield.
2185
2186 POS gives the position of the first bit of the bitfield counting
2187 from the lowest order bit starting with zero. In order to use this
2188 value for S/390 instructions this has to be converted to "bits big
2189 endian" style. */
2190
2191 bool
2192 s390_contiguous_bitmask_p (unsigned HOST_WIDE_INT in, int size,
2193 int *pos, int *length)
2194 {
2195 int tmp_pos = 0;
2196 int tmp_length = 0;
2197 int i;
2198 unsigned HOST_WIDE_INT mask = 1ULL;
2199 bool contiguous = false;
2200
2201 for (i = 0; i < size; mask <<= 1, i++)
2202 {
2203 if (contiguous)
2204 {
2205 if (mask & in)
2206 tmp_length++;
2207 else
2208 break;
2209 }
2210 else
2211 {
2212 if (mask & in)
2213 {
2214 contiguous = true;
2215 tmp_length++;
2216 }
2217 else
2218 tmp_pos++;
2219 }
2220 }
2221
2222 if (!tmp_length)
2223 return false;
2224
2225 /* Calculate a mask for all bits beyond the contiguous bits. */
2226 mask = (-1LL & ~(((1ULL << (tmp_length + tmp_pos - 1)) << 1) - 1));
2227
2228 if ((unsigned)size < sizeof (HOST_WIDE_INT) * BITS_PER_UNIT)
2229 mask &= (HOST_WIDE_INT_1U << size) - 1;
2230
2231 if (mask & in)
2232 return false;
2233
2234 if (tmp_length + tmp_pos - 1 > size)
2235 return false;
2236
2237 if (length)
2238 *length = tmp_length;
2239
2240 if (pos)
2241 *pos = tmp_pos;
2242
2243 return true;
2244 }
2245
2246 /* Return true if OP contains the same contiguous bitfield in *all*
2247 its elements. START and END can be used to obtain the start and
2248 end position of the bitfield.
2249
2250 START/STOP give the position of the first/last bit of the bitfield
2251 counting from the lowest order bit starting with zero. In order to
2252 use these values for S/390 instructions this has to be converted to
2253 "bits big endian" style. */
2254
2255 bool
2256 s390_contiguous_bitmask_vector_p (rtx op, int *start, int *end)
2257 {
2258 unsigned HOST_WIDE_INT mask;
2259 int length, size;
2260
2261 if (!VECTOR_MODE_P (GET_MODE (op))
2262 || GET_CODE (op) != CONST_VECTOR
2263 || !CONST_INT_P (XVECEXP (op, 0, 0)))
2264 return false;
2265
2266 if (GET_MODE_NUNITS (GET_MODE (op)) > 1)
2267 {
2268 int i;
2269
2270 for (i = 1; i < GET_MODE_NUNITS (GET_MODE (op)); ++i)
2271 if (!rtx_equal_p (XVECEXP (op, 0, i), XVECEXP (op, 0, 0)))
2272 return false;
2273 }
2274
2275 size = GET_MODE_UNIT_BITSIZE (GET_MODE (op));
2276 mask = UINTVAL (XVECEXP (op, 0, 0));
2277 if (s390_contiguous_bitmask_p (mask, size, start,
2278 end != NULL ? &length : NULL))
2279 {
2280 if (end != NULL)
2281 *end = *start + length - 1;
2282 return true;
2283 }
2284 /* 0xff00000f style immediates can be covered by swapping start and
2285 end indices in vgm. */
2286 if (s390_contiguous_bitmask_p (~mask, size, start,
2287 end != NULL ? &length : NULL))
2288 {
2289 if (end != NULL)
2290 *end = *start - 1;
2291 if (start != NULL)
2292 *start = *start + length;
2293 return true;
2294 }
2295 return false;
2296 }
2297
2298 /* Return true if C consists only of byte chunks being either 0 or
2299 0xff. If MASK is !=NULL a byte mask is generated which is
2300 appropriate for the vector generate byte mask instruction. */
2301
2302 bool
2303 s390_bytemask_vector_p (rtx op, unsigned *mask)
2304 {
2305 int i;
2306 unsigned tmp_mask = 0;
2307 int nunit, unit_size;
2308
2309 if (!VECTOR_MODE_P (GET_MODE (op))
2310 || GET_CODE (op) != CONST_VECTOR
2311 || !CONST_INT_P (XVECEXP (op, 0, 0)))
2312 return false;
2313
2314 nunit = GET_MODE_NUNITS (GET_MODE (op));
2315 unit_size = GET_MODE_UNIT_SIZE (GET_MODE (op));
2316
2317 for (i = 0; i < nunit; i++)
2318 {
2319 unsigned HOST_WIDE_INT c;
2320 int j;
2321
2322 if (!CONST_INT_P (XVECEXP (op, 0, i)))
2323 return false;
2324
2325 c = UINTVAL (XVECEXP (op, 0, i));
2326 for (j = 0; j < unit_size; j++)
2327 {
2328 if ((c & 0xff) != 0 && (c & 0xff) != 0xff)
2329 return false;
2330 tmp_mask |= (c & 1) << ((nunit - 1 - i) * unit_size + j);
2331 c = c >> BITS_PER_UNIT;
2332 }
2333 }
2334
2335 if (mask != NULL)
2336 *mask = tmp_mask;
2337
2338 return true;
2339 }
2340
2341 /* Check whether a rotate of ROTL followed by an AND of CONTIG is
2342 equivalent to a shift followed by the AND. In particular, CONTIG
2343 should not overlap the (rotated) bit 0/bit 63 gap. Negative values
2344 for ROTL indicate a rotate to the right. */
2345
2346 bool
2347 s390_extzv_shift_ok (int bitsize, int rotl, unsigned HOST_WIDE_INT contig)
2348 {
2349 int pos, len;
2350 bool ok;
2351
2352 ok = s390_contiguous_bitmask_p (contig, bitsize, &pos, &len);
2353 gcc_assert (ok);
2354
2355 return ((rotl >= 0 && rotl <= pos)
2356 || (rotl < 0 && -rotl <= bitsize - len - pos));
2357 }
2358
2359 /* Check whether we can (and want to) split a double-word
2360 move in mode MODE from SRC to DST into two single-word
2361 moves, moving the subword FIRST_SUBWORD first. */
2362
2363 bool
2364 s390_split_ok_p (rtx dst, rtx src, machine_mode mode, int first_subword)
2365 {
2366 /* Floating point and vector registers cannot be split. */
2367 if (FP_REG_P (src) || FP_REG_P (dst) || VECTOR_REG_P (src) || VECTOR_REG_P (dst))
2368 return false;
2369
2370 /* We don't need to split if operands are directly accessible. */
2371 if (s_operand (src, mode) || s_operand (dst, mode))
2372 return false;
2373
2374 /* Non-offsettable memory references cannot be split. */
2375 if ((GET_CODE (src) == MEM && !offsettable_memref_p (src))
2376 || (GET_CODE (dst) == MEM && !offsettable_memref_p (dst)))
2377 return false;
2378
2379 /* Moving the first subword must not clobber a register
2380 needed to move the second subword. */
2381 if (register_operand (dst, mode))
2382 {
2383 rtx subreg = operand_subword (dst, first_subword, 0, mode);
2384 if (reg_overlap_mentioned_p (subreg, src))
2385 return false;
2386 }
2387
2388 return true;
2389 }
2390
2391 /* Return true if it can be proven that [MEM1, MEM1 + SIZE]
2392 and [MEM2, MEM2 + SIZE] do overlap and false
2393 otherwise. */
2394
2395 bool
2396 s390_overlap_p (rtx mem1, rtx mem2, HOST_WIDE_INT size)
2397 {
2398 rtx addr1, addr2, addr_delta;
2399 HOST_WIDE_INT delta;
2400
2401 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
2402 return true;
2403
2404 if (size == 0)
2405 return false;
2406
2407 addr1 = XEXP (mem1, 0);
2408 addr2 = XEXP (mem2, 0);
2409
2410 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
2411
2412 /* This overlapping check is used by peepholes merging memory block operations.
2413 Overlapping operations would otherwise be recognized by the S/390 hardware
2414 and would fall back to a slower implementation. Allowing overlapping
2415 operations would lead to slow code but not to wrong code. Therefore we are
2416 somewhat optimistic if we cannot prove that the memory blocks are
2417 overlapping.
2418 That's why we return false here although this may accept operations on
2419 overlapping memory areas. */
2420 if (!addr_delta || GET_CODE (addr_delta) != CONST_INT)
2421 return false;
2422
2423 delta = INTVAL (addr_delta);
2424
2425 if (delta == 0
2426 || (delta > 0 && delta < size)
2427 || (delta < 0 && -delta < size))
2428 return true;
2429
2430 return false;
2431 }
2432
2433 /* Check whether the address of memory reference MEM2 equals exactly
2434 the address of memory reference MEM1 plus DELTA. Return true if
2435 we can prove this to be the case, false otherwise. */
2436
2437 bool
2438 s390_offset_p (rtx mem1, rtx mem2, rtx delta)
2439 {
2440 rtx addr1, addr2, addr_delta;
2441
2442 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
2443 return false;
2444
2445 addr1 = XEXP (mem1, 0);
2446 addr2 = XEXP (mem2, 0);
2447
2448 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
2449 if (!addr_delta || !rtx_equal_p (addr_delta, delta))
2450 return false;
2451
2452 return true;
2453 }
2454
2455 /* Expand logical operator CODE in mode MODE with operands OPERANDS. */
2456
2457 void
2458 s390_expand_logical_operator (enum rtx_code code, machine_mode mode,
2459 rtx *operands)
2460 {
2461 machine_mode wmode = mode;
2462 rtx dst = operands[0];
2463 rtx src1 = operands[1];
2464 rtx src2 = operands[2];
2465 rtx op, clob, tem;
2466
2467 /* If we cannot handle the operation directly, use a temp register. */
2468 if (!s390_logical_operator_ok_p (operands))
2469 dst = gen_reg_rtx (mode);
2470
2471 /* QImode and HImode patterns make sense only if we have a destination
2472 in memory. Otherwise perform the operation in SImode. */
2473 if ((mode == QImode || mode == HImode) && GET_CODE (dst) != MEM)
2474 wmode = SImode;
2475
2476 /* Widen operands if required. */
2477 if (mode != wmode)
2478 {
2479 if (GET_CODE (dst) == SUBREG
2480 && (tem = simplify_subreg (wmode, dst, mode, 0)) != 0)
2481 dst = tem;
2482 else if (REG_P (dst))
2483 dst = gen_rtx_SUBREG (wmode, dst, 0);
2484 else
2485 dst = gen_reg_rtx (wmode);
2486
2487 if (GET_CODE (src1) == SUBREG
2488 && (tem = simplify_subreg (wmode, src1, mode, 0)) != 0)
2489 src1 = tem;
2490 else if (GET_MODE (src1) != VOIDmode)
2491 src1 = gen_rtx_SUBREG (wmode, force_reg (mode, src1), 0);
2492
2493 if (GET_CODE (src2) == SUBREG
2494 && (tem = simplify_subreg (wmode, src2, mode, 0)) != 0)
2495 src2 = tem;
2496 else if (GET_MODE (src2) != VOIDmode)
2497 src2 = gen_rtx_SUBREG (wmode, force_reg (mode, src2), 0);
2498 }
2499
2500 /* Emit the instruction. */
2501 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, wmode, src1, src2));
2502 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
2503 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
2504
2505 /* Fix up the destination if needed. */
2506 if (dst != operands[0])
2507 emit_move_insn (operands[0], gen_lowpart (mode, dst));
2508 }
2509
2510 /* Check whether OPERANDS are OK for a logical operation (AND, IOR, XOR). */
2511
2512 bool
2513 s390_logical_operator_ok_p (rtx *operands)
2514 {
2515 /* If the destination operand is in memory, it needs to coincide
2516 with one of the source operands. After reload, it has to be
2517 the first source operand. */
2518 if (GET_CODE (operands[0]) == MEM)
2519 return rtx_equal_p (operands[0], operands[1])
2520 || (!reload_completed && rtx_equal_p (operands[0], operands[2]));
2521
2522 return true;
2523 }
2524
2525 /* Narrow logical operation CODE of memory operand MEMOP with immediate
2526 operand IMMOP to switch from SS to SI type instructions. */
2527
2528 void
2529 s390_narrow_logical_operator (enum rtx_code code, rtx *memop, rtx *immop)
2530 {
2531 int def = code == AND ? -1 : 0;
2532 HOST_WIDE_INT mask;
2533 int part;
2534
2535 gcc_assert (GET_CODE (*memop) == MEM);
2536 gcc_assert (!MEM_VOLATILE_P (*memop));
2537
2538 mask = s390_extract_part (*immop, QImode, def);
2539 part = s390_single_part (*immop, GET_MODE (*memop), QImode, def);
2540 gcc_assert (part >= 0);
2541
2542 *memop = adjust_address (*memop, QImode, part);
2543 *immop = gen_int_mode (mask, QImode);
2544 }
2545
2546
2547 /* How to allocate a 'struct machine_function'. */
2548
2549 static struct machine_function *
2550 s390_init_machine_status (void)
2551 {
2552 return ggc_cleared_alloc<machine_function> ();
2553 }
2554
2555 /* Map for smallest class containing reg regno. */
2556
2557 const enum reg_class regclass_map[FIRST_PSEUDO_REGISTER] =
2558 { GENERAL_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 0 */
2559 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 4 */
2560 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 8 */
2561 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 12 */
2562 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 16 */
2563 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 20 */
2564 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 24 */
2565 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 28 */
2566 ADDR_REGS, CC_REGS, ADDR_REGS, ADDR_REGS, /* 32 */
2567 ACCESS_REGS, ACCESS_REGS, VEC_REGS, VEC_REGS, /* 36 */
2568 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 40 */
2569 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 44 */
2570 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 48 */
2571 VEC_REGS, VEC_REGS /* 52 */
2572 };
2573
2574 /* Return attribute type of insn. */
2575
2576 static enum attr_type
2577 s390_safe_attr_type (rtx_insn *insn)
2578 {
2579 if (recog_memoized (insn) >= 0)
2580 return get_attr_type (insn);
2581 else
2582 return TYPE_NONE;
2583 }
2584
2585 /* Return true if DISP is a valid short displacement. */
2586
2587 static bool
2588 s390_short_displacement (rtx disp)
2589 {
2590 /* No displacement is OK. */
2591 if (!disp)
2592 return true;
2593
2594 /* Without the long displacement facility we don't need to
2595 distingiush between long and short displacement. */
2596 if (!TARGET_LONG_DISPLACEMENT)
2597 return true;
2598
2599 /* Integer displacement in range. */
2600 if (GET_CODE (disp) == CONST_INT)
2601 return INTVAL (disp) >= 0 && INTVAL (disp) < 4096;
2602
2603 /* GOT offset is not OK, the GOT can be large. */
2604 if (GET_CODE (disp) == CONST
2605 && GET_CODE (XEXP (disp, 0)) == UNSPEC
2606 && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOT
2607 || XINT (XEXP (disp, 0), 1) == UNSPEC_GOTNTPOFF))
2608 return false;
2609
2610 /* All other symbolic constants are literal pool references,
2611 which are OK as the literal pool must be small. */
2612 if (GET_CODE (disp) == CONST)
2613 return true;
2614
2615 return false;
2616 }
2617
2618 /* Decompose a RTL expression ADDR for a memory address into
2619 its components, returned in OUT.
2620
2621 Returns false if ADDR is not a valid memory address, true
2622 otherwise. If OUT is NULL, don't return the components,
2623 but check for validity only.
2624
2625 Note: Only addresses in canonical form are recognized.
2626 LEGITIMIZE_ADDRESS should convert non-canonical forms to the
2627 canonical form so that they will be recognized. */
2628
2629 static int
2630 s390_decompose_address (rtx addr, struct s390_address *out)
2631 {
2632 HOST_WIDE_INT offset = 0;
2633 rtx base = NULL_RTX;
2634 rtx indx = NULL_RTX;
2635 rtx disp = NULL_RTX;
2636 rtx orig_disp;
2637 bool pointer = false;
2638 bool base_ptr = false;
2639 bool indx_ptr = false;
2640 bool literal_pool = false;
2641
2642 /* We may need to substitute the literal pool base register into the address
2643 below. However, at this point we do not know which register is going to
2644 be used as base, so we substitute the arg pointer register. This is going
2645 to be treated as holding a pointer below -- it shouldn't be used for any
2646 other purpose. */
2647 rtx fake_pool_base = gen_rtx_REG (Pmode, ARG_POINTER_REGNUM);
2648
2649 /* Decompose address into base + index + displacement. */
2650
2651 if (GET_CODE (addr) == REG || GET_CODE (addr) == UNSPEC)
2652 base = addr;
2653
2654 else if (GET_CODE (addr) == PLUS)
2655 {
2656 rtx op0 = XEXP (addr, 0);
2657 rtx op1 = XEXP (addr, 1);
2658 enum rtx_code code0 = GET_CODE (op0);
2659 enum rtx_code code1 = GET_CODE (op1);
2660
2661 if (code0 == REG || code0 == UNSPEC)
2662 {
2663 if (code1 == REG || code1 == UNSPEC)
2664 {
2665 indx = op0; /* index + base */
2666 base = op1;
2667 }
2668
2669 else
2670 {
2671 base = op0; /* base + displacement */
2672 disp = op1;
2673 }
2674 }
2675
2676 else if (code0 == PLUS)
2677 {
2678 indx = XEXP (op0, 0); /* index + base + disp */
2679 base = XEXP (op0, 1);
2680 disp = op1;
2681 }
2682
2683 else
2684 {
2685 return false;
2686 }
2687 }
2688
2689 else
2690 disp = addr; /* displacement */
2691
2692 /* Extract integer part of displacement. */
2693 orig_disp = disp;
2694 if (disp)
2695 {
2696 if (GET_CODE (disp) == CONST_INT)
2697 {
2698 offset = INTVAL (disp);
2699 disp = NULL_RTX;
2700 }
2701 else if (GET_CODE (disp) == CONST
2702 && GET_CODE (XEXP (disp, 0)) == PLUS
2703 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
2704 {
2705 offset = INTVAL (XEXP (XEXP (disp, 0), 1));
2706 disp = XEXP (XEXP (disp, 0), 0);
2707 }
2708 }
2709
2710 /* Strip off CONST here to avoid special case tests later. */
2711 if (disp && GET_CODE (disp) == CONST)
2712 disp = XEXP (disp, 0);
2713
2714 /* We can convert literal pool addresses to
2715 displacements by basing them off the base register. */
2716 if (disp && GET_CODE (disp) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (disp))
2717 {
2718 /* Either base or index must be free to hold the base register. */
2719 if (!base)
2720 base = fake_pool_base, literal_pool = true;
2721 else if (!indx)
2722 indx = fake_pool_base, literal_pool = true;
2723 else
2724 return false;
2725
2726 /* Mark up the displacement. */
2727 disp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, disp),
2728 UNSPEC_LTREL_OFFSET);
2729 }
2730
2731 /* Validate base register. */
2732 if (base)
2733 {
2734 if (GET_CODE (base) == UNSPEC)
2735 switch (XINT (base, 1))
2736 {
2737 case UNSPEC_LTREF:
2738 if (!disp)
2739 disp = gen_rtx_UNSPEC (Pmode,
2740 gen_rtvec (1, XVECEXP (base, 0, 0)),
2741 UNSPEC_LTREL_OFFSET);
2742 else
2743 return false;
2744
2745 base = XVECEXP (base, 0, 1);
2746 break;
2747
2748 case UNSPEC_LTREL_BASE:
2749 if (XVECLEN (base, 0) == 1)
2750 base = fake_pool_base, literal_pool = true;
2751 else
2752 base = XVECEXP (base, 0, 1);
2753 break;
2754
2755 default:
2756 return false;
2757 }
2758
2759 if (!REG_P (base)
2760 || (GET_MODE (base) != SImode
2761 && GET_MODE (base) != Pmode))
2762 return false;
2763
2764 if (REGNO (base) == STACK_POINTER_REGNUM
2765 || REGNO (base) == FRAME_POINTER_REGNUM
2766 || ((reload_completed || reload_in_progress)
2767 && frame_pointer_needed
2768 && REGNO (base) == HARD_FRAME_POINTER_REGNUM)
2769 || REGNO (base) == ARG_POINTER_REGNUM
2770 || (flag_pic
2771 && REGNO (base) == PIC_OFFSET_TABLE_REGNUM))
2772 pointer = base_ptr = true;
2773
2774 if ((reload_completed || reload_in_progress)
2775 && base == cfun->machine->base_reg)
2776 pointer = base_ptr = literal_pool = true;
2777 }
2778
2779 /* Validate index register. */
2780 if (indx)
2781 {
2782 if (GET_CODE (indx) == UNSPEC)
2783 switch (XINT (indx, 1))
2784 {
2785 case UNSPEC_LTREF:
2786 if (!disp)
2787 disp = gen_rtx_UNSPEC (Pmode,
2788 gen_rtvec (1, XVECEXP (indx, 0, 0)),
2789 UNSPEC_LTREL_OFFSET);
2790 else
2791 return false;
2792
2793 indx = XVECEXP (indx, 0, 1);
2794 break;
2795
2796 case UNSPEC_LTREL_BASE:
2797 if (XVECLEN (indx, 0) == 1)
2798 indx = fake_pool_base, literal_pool = true;
2799 else
2800 indx = XVECEXP (indx, 0, 1);
2801 break;
2802
2803 default:
2804 return false;
2805 }
2806
2807 if (!REG_P (indx)
2808 || (GET_MODE (indx) != SImode
2809 && GET_MODE (indx) != Pmode))
2810 return false;
2811
2812 if (REGNO (indx) == STACK_POINTER_REGNUM
2813 || REGNO (indx) == FRAME_POINTER_REGNUM
2814 || ((reload_completed || reload_in_progress)
2815 && frame_pointer_needed
2816 && REGNO (indx) == HARD_FRAME_POINTER_REGNUM)
2817 || REGNO (indx) == ARG_POINTER_REGNUM
2818 || (flag_pic
2819 && REGNO (indx) == PIC_OFFSET_TABLE_REGNUM))
2820 pointer = indx_ptr = true;
2821
2822 if ((reload_completed || reload_in_progress)
2823 && indx == cfun->machine->base_reg)
2824 pointer = indx_ptr = literal_pool = true;
2825 }
2826
2827 /* Prefer to use pointer as base, not index. */
2828 if (base && indx && !base_ptr
2829 && (indx_ptr || (!REG_POINTER (base) && REG_POINTER (indx))))
2830 {
2831 rtx tmp = base;
2832 base = indx;
2833 indx = tmp;
2834 }
2835
2836 /* Validate displacement. */
2837 if (!disp)
2838 {
2839 /* If virtual registers are involved, the displacement will change later
2840 anyway as the virtual registers get eliminated. This could make a
2841 valid displacement invalid, but it is more likely to make an invalid
2842 displacement valid, because we sometimes access the register save area
2843 via negative offsets to one of those registers.
2844 Thus we don't check the displacement for validity here. If after
2845 elimination the displacement turns out to be invalid after all,
2846 this is fixed up by reload in any case. */
2847 /* LRA maintains always displacements up to date and we need to
2848 know the displacement is right during all LRA not only at the
2849 final elimination. */
2850 if (lra_in_progress
2851 || (base != arg_pointer_rtx
2852 && indx != arg_pointer_rtx
2853 && base != return_address_pointer_rtx
2854 && indx != return_address_pointer_rtx
2855 && base != frame_pointer_rtx
2856 && indx != frame_pointer_rtx
2857 && base != virtual_stack_vars_rtx
2858 && indx != virtual_stack_vars_rtx))
2859 if (!DISP_IN_RANGE (offset))
2860 return false;
2861 }
2862 else
2863 {
2864 /* All the special cases are pointers. */
2865 pointer = true;
2866
2867 /* In the small-PIC case, the linker converts @GOT
2868 and @GOTNTPOFF offsets to possible displacements. */
2869 if (GET_CODE (disp) == UNSPEC
2870 && (XINT (disp, 1) == UNSPEC_GOT
2871 || XINT (disp, 1) == UNSPEC_GOTNTPOFF)
2872 && flag_pic == 1)
2873 {
2874 ;
2875 }
2876
2877 /* Accept pool label offsets. */
2878 else if (GET_CODE (disp) == UNSPEC
2879 && XINT (disp, 1) == UNSPEC_POOL_OFFSET)
2880 ;
2881
2882 /* Accept literal pool references. */
2883 else if (GET_CODE (disp) == UNSPEC
2884 && XINT (disp, 1) == UNSPEC_LTREL_OFFSET)
2885 {
2886 /* In case CSE pulled a non literal pool reference out of
2887 the pool we have to reject the address. This is
2888 especially important when loading the GOT pointer on non
2889 zarch CPUs. In this case the literal pool contains an lt
2890 relative offset to the _GLOBAL_OFFSET_TABLE_ label which
2891 will most likely exceed the displacement. */
2892 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
2893 || !CONSTANT_POOL_ADDRESS_P (XVECEXP (disp, 0, 0)))
2894 return false;
2895
2896 orig_disp = gen_rtx_CONST (Pmode, disp);
2897 if (offset)
2898 {
2899 /* If we have an offset, make sure it does not
2900 exceed the size of the constant pool entry. */
2901 rtx sym = XVECEXP (disp, 0, 0);
2902 if (offset >= GET_MODE_SIZE (get_pool_mode (sym)))
2903 return false;
2904
2905 orig_disp = plus_constant (Pmode, orig_disp, offset);
2906 }
2907 }
2908
2909 else
2910 return false;
2911 }
2912
2913 if (!base && !indx)
2914 pointer = true;
2915
2916 if (out)
2917 {
2918 out->base = base;
2919 out->indx = indx;
2920 out->disp = orig_disp;
2921 out->pointer = pointer;
2922 out->literal_pool = literal_pool;
2923 }
2924
2925 return true;
2926 }
2927
2928 /* Decompose a RTL expression OP for a shift count into its components,
2929 and return the base register in BASE and the offset in OFFSET.
2930
2931 Return true if OP is a valid shift count, false if not. */
2932
2933 bool
2934 s390_decompose_shift_count (rtx op, rtx *base, HOST_WIDE_INT *offset)
2935 {
2936 HOST_WIDE_INT off = 0;
2937
2938 /* We can have an integer constant, an address register,
2939 or a sum of the two. */
2940 if (GET_CODE (op) == CONST_INT)
2941 {
2942 off = INTVAL (op);
2943 op = NULL_RTX;
2944 }
2945 if (op && GET_CODE (op) == PLUS && GET_CODE (XEXP (op, 1)) == CONST_INT)
2946 {
2947 off = INTVAL (XEXP (op, 1));
2948 op = XEXP (op, 0);
2949 }
2950 while (op && GET_CODE (op) == SUBREG)
2951 op = SUBREG_REG (op);
2952
2953 if (op && GET_CODE (op) != REG)
2954 return false;
2955
2956 if (offset)
2957 *offset = off;
2958 if (base)
2959 *base = op;
2960
2961 return true;
2962 }
2963
2964
2965 /* Return true if CODE is a valid address without index. */
2966
2967 bool
2968 s390_legitimate_address_without_index_p (rtx op)
2969 {
2970 struct s390_address addr;
2971
2972 if (!s390_decompose_address (XEXP (op, 0), &addr))
2973 return false;
2974 if (addr.indx)
2975 return false;
2976
2977 return true;
2978 }
2979
2980
2981 /* Return TRUE if ADDR is an operand valid for a load/store relative
2982 instruction. Be aware that the alignment of the operand needs to
2983 be checked separately.
2984 Valid addresses are single references or a sum of a reference and a
2985 constant integer. Return these parts in SYMREF and ADDEND. You can
2986 pass NULL in REF and/or ADDEND if you are not interested in these
2987 values. Literal pool references are *not* considered symbol
2988 references. */
2989
2990 static bool
2991 s390_loadrelative_operand_p (rtx addr, rtx *symref, HOST_WIDE_INT *addend)
2992 {
2993 HOST_WIDE_INT tmpaddend = 0;
2994
2995 if (GET_CODE (addr) == CONST)
2996 addr = XEXP (addr, 0);
2997
2998 if (GET_CODE (addr) == PLUS)
2999 {
3000 if (!CONST_INT_P (XEXP (addr, 1)))
3001 return false;
3002
3003 tmpaddend = INTVAL (XEXP (addr, 1));
3004 addr = XEXP (addr, 0);
3005 }
3006
3007 if ((GET_CODE (addr) == SYMBOL_REF && !CONSTANT_POOL_ADDRESS_P (addr))
3008 || (GET_CODE (addr) == UNSPEC
3009 && (XINT (addr, 1) == UNSPEC_GOTENT
3010 || (TARGET_CPU_ZARCH && XINT (addr, 1) == UNSPEC_PLT))))
3011 {
3012 if (symref)
3013 *symref = addr;
3014 if (addend)
3015 *addend = tmpaddend;
3016
3017 return true;
3018 }
3019 return false;
3020 }
3021
3022 /* Return true if the address in OP is valid for constraint letter C
3023 if wrapped in a MEM rtx. Set LIT_POOL_OK to true if it literal
3024 pool MEMs should be accepted. Only the Q, R, S, T constraint
3025 letters are allowed for C. */
3026
3027 static int
3028 s390_check_qrst_address (char c, rtx op, bool lit_pool_ok)
3029 {
3030 struct s390_address addr;
3031 bool decomposed = false;
3032
3033 /* This check makes sure that no symbolic address (except literal
3034 pool references) are accepted by the R or T constraints. */
3035 if (s390_loadrelative_operand_p (op, NULL, NULL))
3036 return 0;
3037
3038 /* Ensure literal pool references are only accepted if LIT_POOL_OK. */
3039 if (!lit_pool_ok)
3040 {
3041 if (!s390_decompose_address (op, &addr))
3042 return 0;
3043 if (addr.literal_pool)
3044 return 0;
3045 decomposed = true;
3046 }
3047
3048 switch (c)
3049 {
3050 case 'Q': /* no index short displacement */
3051 if (!decomposed && !s390_decompose_address (op, &addr))
3052 return 0;
3053 if (addr.indx)
3054 return 0;
3055 if (!s390_short_displacement (addr.disp))
3056 return 0;
3057 break;
3058
3059 case 'R': /* with index short displacement */
3060 if (TARGET_LONG_DISPLACEMENT)
3061 {
3062 if (!decomposed && !s390_decompose_address (op, &addr))
3063 return 0;
3064 if (!s390_short_displacement (addr.disp))
3065 return 0;
3066 }
3067 /* Any invalid address here will be fixed up by reload,
3068 so accept it for the most generic constraint. */
3069 break;
3070
3071 case 'S': /* no index long displacement */
3072 if (!TARGET_LONG_DISPLACEMENT)
3073 return 0;
3074 if (!decomposed && !s390_decompose_address (op, &addr))
3075 return 0;
3076 if (addr.indx)
3077 return 0;
3078 if (s390_short_displacement (addr.disp))
3079 return 0;
3080 break;
3081
3082 case 'T': /* with index long displacement */
3083 if (!TARGET_LONG_DISPLACEMENT)
3084 return 0;
3085 /* Any invalid address here will be fixed up by reload,
3086 so accept it for the most generic constraint. */
3087 if ((decomposed || s390_decompose_address (op, &addr))
3088 && s390_short_displacement (addr.disp))
3089 return 0;
3090 break;
3091 default:
3092 return 0;
3093 }
3094 return 1;
3095 }
3096
3097
3098 /* Evaluates constraint strings described by the regular expression
3099 ([A|B|Z](Q|R|S|T))|U|W|Y and returns 1 if OP is a valid operand for
3100 the constraint given in STR, or 0 else. */
3101
3102 int
3103 s390_mem_constraint (const char *str, rtx op)
3104 {
3105 char c = str[0];
3106
3107 switch (c)
3108 {
3109 case 'A':
3110 /* Check for offsettable variants of memory constraints. */
3111 if (!MEM_P (op) || MEM_VOLATILE_P (op))
3112 return 0;
3113 if ((reload_completed || reload_in_progress)
3114 ? !offsettable_memref_p (op) : !offsettable_nonstrict_memref_p (op))
3115 return 0;
3116 return s390_check_qrst_address (str[1], XEXP (op, 0), true);
3117 case 'B':
3118 /* Check for non-literal-pool variants of memory constraints. */
3119 if (!MEM_P (op))
3120 return 0;
3121 return s390_check_qrst_address (str[1], XEXP (op, 0), false);
3122 case 'Q':
3123 case 'R':
3124 case 'S':
3125 case 'T':
3126 if (GET_CODE (op) != MEM)
3127 return 0;
3128 return s390_check_qrst_address (c, XEXP (op, 0), true);
3129 case 'U':
3130 return (s390_check_qrst_address ('Q', op, true)
3131 || s390_check_qrst_address ('R', op, true));
3132 case 'W':
3133 return (s390_check_qrst_address ('S', op, true)
3134 || s390_check_qrst_address ('T', op, true));
3135 case 'Y':
3136 /* Simply check for the basic form of a shift count. Reload will
3137 take care of making sure we have a proper base register. */
3138 if (!s390_decompose_shift_count (op, NULL, NULL))
3139 return 0;
3140 break;
3141 case 'Z':
3142 return s390_check_qrst_address (str[1], op, true);
3143 default:
3144 return 0;
3145 }
3146 return 1;
3147 }
3148
3149
3150 /* Evaluates constraint strings starting with letter O. Input
3151 parameter C is the second letter following the "O" in the constraint
3152 string. Returns 1 if VALUE meets the respective constraint and 0
3153 otherwise. */
3154
3155 int
3156 s390_O_constraint_str (const char c, HOST_WIDE_INT value)
3157 {
3158 if (!TARGET_EXTIMM)
3159 return 0;
3160
3161 switch (c)
3162 {
3163 case 's':
3164 return trunc_int_for_mode (value, SImode) == value;
3165
3166 case 'p':
3167 return value == 0
3168 || s390_single_part (GEN_INT (value), DImode, SImode, 0) == 1;
3169
3170 case 'n':
3171 return s390_single_part (GEN_INT (value - 1), DImode, SImode, -1) == 1;
3172
3173 default:
3174 gcc_unreachable ();
3175 }
3176 }
3177
3178
3179 /* Evaluates constraint strings starting with letter N. Parameter STR
3180 contains the letters following letter "N" in the constraint string.
3181 Returns true if VALUE matches the constraint. */
3182
3183 int
3184 s390_N_constraint_str (const char *str, HOST_WIDE_INT value)
3185 {
3186 machine_mode mode, part_mode;
3187 int def;
3188 int part, part_goal;
3189
3190
3191 if (str[0] == 'x')
3192 part_goal = -1;
3193 else
3194 part_goal = str[0] - '0';
3195
3196 switch (str[1])
3197 {
3198 case 'Q':
3199 part_mode = QImode;
3200 break;
3201 case 'H':
3202 part_mode = HImode;
3203 break;
3204 case 'S':
3205 part_mode = SImode;
3206 break;
3207 default:
3208 return 0;
3209 }
3210
3211 switch (str[2])
3212 {
3213 case 'H':
3214 mode = HImode;
3215 break;
3216 case 'S':
3217 mode = SImode;
3218 break;
3219 case 'D':
3220 mode = DImode;
3221 break;
3222 default:
3223 return 0;
3224 }
3225
3226 switch (str[3])
3227 {
3228 case '0':
3229 def = 0;
3230 break;
3231 case 'F':
3232 def = -1;
3233 break;
3234 default:
3235 return 0;
3236 }
3237
3238 if (GET_MODE_SIZE (mode) <= GET_MODE_SIZE (part_mode))
3239 return 0;
3240
3241 part = s390_single_part (GEN_INT (value), mode, part_mode, def);
3242 if (part < 0)
3243 return 0;
3244 if (part_goal != -1 && part_goal != part)
3245 return 0;
3246
3247 return 1;
3248 }
3249
3250
3251 /* Returns true if the input parameter VALUE is a float zero. */
3252
3253 int
3254 s390_float_const_zero_p (rtx value)
3255 {
3256 return (GET_MODE_CLASS (GET_MODE (value)) == MODE_FLOAT
3257 && value == CONST0_RTX (GET_MODE (value)));
3258 }
3259
3260 /* Implement TARGET_REGISTER_MOVE_COST. */
3261
3262 static int
3263 s390_register_move_cost (machine_mode mode,
3264 reg_class_t from, reg_class_t to)
3265 {
3266 /* On s390, copy between fprs and gprs is expensive. */
3267
3268 /* It becomes somewhat faster having ldgr/lgdr. */
3269 if (TARGET_Z10 && GET_MODE_SIZE (mode) == 8)
3270 {
3271 /* ldgr is single cycle. */
3272 if (reg_classes_intersect_p (from, GENERAL_REGS)
3273 && reg_classes_intersect_p (to, FP_REGS))
3274 return 1;
3275 /* lgdr needs 3 cycles. */
3276 if (reg_classes_intersect_p (to, GENERAL_REGS)
3277 && reg_classes_intersect_p (from, FP_REGS))
3278 return 3;
3279 }
3280
3281 /* Otherwise copying is done via memory. */
3282 if ((reg_classes_intersect_p (from, GENERAL_REGS)
3283 && reg_classes_intersect_p (to, FP_REGS))
3284 || (reg_classes_intersect_p (from, FP_REGS)
3285 && reg_classes_intersect_p (to, GENERAL_REGS)))
3286 return 10;
3287
3288 return 1;
3289 }
3290
3291 /* Implement TARGET_MEMORY_MOVE_COST. */
3292
3293 static int
3294 s390_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
3295 reg_class_t rclass ATTRIBUTE_UNUSED,
3296 bool in ATTRIBUTE_UNUSED)
3297 {
3298 return 2;
3299 }
3300
3301 /* Compute a (partial) cost for rtx X. Return true if the complete
3302 cost has been computed, and false if subexpressions should be
3303 scanned. In either case, *TOTAL contains the cost result.
3304 CODE contains GET_CODE (x), OUTER_CODE contains the code
3305 of the superexpression of x. */
3306
3307 static bool
3308 s390_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
3309 int *total, bool speed ATTRIBUTE_UNUSED)
3310 {
3311 switch (code)
3312 {
3313 case CONST:
3314 case CONST_INT:
3315 case LABEL_REF:
3316 case SYMBOL_REF:
3317 case CONST_DOUBLE:
3318 case MEM:
3319 *total = 0;
3320 return true;
3321
3322 case ASHIFT:
3323 case ASHIFTRT:
3324 case LSHIFTRT:
3325 case ROTATE:
3326 case ROTATERT:
3327 case AND:
3328 case IOR:
3329 case XOR:
3330 case NEG:
3331 case NOT:
3332 *total = COSTS_N_INSNS (1);
3333 return false;
3334
3335 case PLUS:
3336 case MINUS:
3337 *total = COSTS_N_INSNS (1);
3338 return false;
3339
3340 case MULT:
3341 switch (GET_MODE (x))
3342 {
3343 case SImode:
3344 {
3345 rtx left = XEXP (x, 0);
3346 rtx right = XEXP (x, 1);
3347 if (GET_CODE (right) == CONST_INT
3348 && CONST_OK_FOR_K (INTVAL (right)))
3349 *total = s390_cost->mhi;
3350 else if (GET_CODE (left) == SIGN_EXTEND)
3351 *total = s390_cost->mh;
3352 else
3353 *total = s390_cost->ms; /* msr, ms, msy */
3354 break;
3355 }
3356 case DImode:
3357 {
3358 rtx left = XEXP (x, 0);
3359 rtx right = XEXP (x, 1);
3360 if (TARGET_ZARCH)
3361 {
3362 if (GET_CODE (right) == CONST_INT
3363 && CONST_OK_FOR_K (INTVAL (right)))
3364 *total = s390_cost->mghi;
3365 else if (GET_CODE (left) == SIGN_EXTEND)
3366 *total = s390_cost->msgf;
3367 else
3368 *total = s390_cost->msg; /* msgr, msg */
3369 }
3370 else /* TARGET_31BIT */
3371 {
3372 if (GET_CODE (left) == SIGN_EXTEND
3373 && GET_CODE (right) == SIGN_EXTEND)
3374 /* mulsidi case: mr, m */
3375 *total = s390_cost->m;
3376 else if (GET_CODE (left) == ZERO_EXTEND
3377 && GET_CODE (right) == ZERO_EXTEND
3378 && TARGET_CPU_ZARCH)
3379 /* umulsidi case: ml, mlr */
3380 *total = s390_cost->ml;
3381 else
3382 /* Complex calculation is required. */
3383 *total = COSTS_N_INSNS (40);
3384 }
3385 break;
3386 }
3387 case SFmode:
3388 case DFmode:
3389 *total = s390_cost->mult_df;
3390 break;
3391 case TFmode:
3392 *total = s390_cost->mxbr;
3393 break;
3394 default:
3395 return false;
3396 }
3397 return false;
3398
3399 case FMA:
3400 switch (GET_MODE (x))
3401 {
3402 case DFmode:
3403 *total = s390_cost->madbr;
3404 break;
3405 case SFmode:
3406 *total = s390_cost->maebr;
3407 break;
3408 default:
3409 return false;
3410 }
3411 /* Negate in the third argument is free: FMSUB. */
3412 if (GET_CODE (XEXP (x, 2)) == NEG)
3413 {
3414 *total += (rtx_cost (XEXP (x, 0), FMA, 0, speed)
3415 + rtx_cost (XEXP (x, 1), FMA, 1, speed)
3416 + rtx_cost (XEXP (XEXP (x, 2), 0), FMA, 2, speed));
3417 return true;
3418 }
3419 return false;
3420
3421 case UDIV:
3422 case UMOD:
3423 if (GET_MODE (x) == TImode) /* 128 bit division */
3424 *total = s390_cost->dlgr;
3425 else if (GET_MODE (x) == DImode)
3426 {
3427 rtx right = XEXP (x, 1);
3428 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
3429 *total = s390_cost->dlr;
3430 else /* 64 by 64 bit division */
3431 *total = s390_cost->dlgr;
3432 }
3433 else if (GET_MODE (x) == SImode) /* 32 bit division */
3434 *total = s390_cost->dlr;
3435 return false;
3436
3437 case DIV:
3438 case MOD:
3439 if (GET_MODE (x) == DImode)
3440 {
3441 rtx right = XEXP (x, 1);
3442 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
3443 if (TARGET_ZARCH)
3444 *total = s390_cost->dsgfr;
3445 else
3446 *total = s390_cost->dr;
3447 else /* 64 by 64 bit division */
3448 *total = s390_cost->dsgr;
3449 }
3450 else if (GET_MODE (x) == SImode) /* 32 bit division */
3451 *total = s390_cost->dlr;
3452 else if (GET_MODE (x) == SFmode)
3453 {
3454 *total = s390_cost->debr;
3455 }
3456 else if (GET_MODE (x) == DFmode)
3457 {
3458 *total = s390_cost->ddbr;
3459 }
3460 else if (GET_MODE (x) == TFmode)
3461 {
3462 *total = s390_cost->dxbr;
3463 }
3464 return false;
3465
3466 case SQRT:
3467 if (GET_MODE (x) == SFmode)
3468 *total = s390_cost->sqebr;
3469 else if (GET_MODE (x) == DFmode)
3470 *total = s390_cost->sqdbr;
3471 else /* TFmode */
3472 *total = s390_cost->sqxbr;
3473 return false;
3474
3475 case SIGN_EXTEND:
3476 case ZERO_EXTEND:
3477 if (outer_code == MULT || outer_code == DIV || outer_code == MOD
3478 || outer_code == PLUS || outer_code == MINUS
3479 || outer_code == COMPARE)
3480 *total = 0;
3481 return false;
3482
3483 case COMPARE:
3484 *total = COSTS_N_INSNS (1);
3485 if (GET_CODE (XEXP (x, 0)) == AND
3486 && GET_CODE (XEXP (x, 1)) == CONST_INT
3487 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
3488 {
3489 rtx op0 = XEXP (XEXP (x, 0), 0);
3490 rtx op1 = XEXP (XEXP (x, 0), 1);
3491 rtx op2 = XEXP (x, 1);
3492
3493 if (memory_operand (op0, GET_MODE (op0))
3494 && s390_tm_ccmode (op1, op2, 0) != VOIDmode)
3495 return true;
3496 if (register_operand (op0, GET_MODE (op0))
3497 && s390_tm_ccmode (op1, op2, 1) != VOIDmode)
3498 return true;
3499 }
3500 return false;
3501
3502 default:
3503 return false;
3504 }
3505 }
3506
3507 /* Return the cost of an address rtx ADDR. */
3508
3509 static int
3510 s390_address_cost (rtx addr, machine_mode mode ATTRIBUTE_UNUSED,
3511 addr_space_t as ATTRIBUTE_UNUSED,
3512 bool speed ATTRIBUTE_UNUSED)
3513 {
3514 struct s390_address ad;
3515 if (!s390_decompose_address (addr, &ad))
3516 return 1000;
3517
3518 return ad.indx? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (1);
3519 }
3520
3521 /* If OP is a SYMBOL_REF of a thread-local symbol, return its TLS mode,
3522 otherwise return 0. */
3523
3524 int
3525 tls_symbolic_operand (rtx op)
3526 {
3527 if (GET_CODE (op) != SYMBOL_REF)
3528 return 0;
3529 return SYMBOL_REF_TLS_MODEL (op);
3530 }
3531 \f
3532 /* Split DImode access register reference REG (on 64-bit) into its constituent
3533 low and high parts, and store them into LO and HI. Note that gen_lowpart/
3534 gen_highpart cannot be used as they assume all registers are word-sized,
3535 while our access registers have only half that size. */
3536
3537 void
3538 s390_split_access_reg (rtx reg, rtx *lo, rtx *hi)
3539 {
3540 gcc_assert (TARGET_64BIT);
3541 gcc_assert (ACCESS_REG_P (reg));
3542 gcc_assert (GET_MODE (reg) == DImode);
3543 gcc_assert (!(REGNO (reg) & 1));
3544
3545 *lo = gen_rtx_REG (SImode, REGNO (reg) + 1);
3546 *hi = gen_rtx_REG (SImode, REGNO (reg));
3547 }
3548
3549 /* Return true if OP contains a symbol reference */
3550
3551 bool
3552 symbolic_reference_mentioned_p (rtx op)
3553 {
3554 const char *fmt;
3555 int i;
3556
3557 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3558 return 1;
3559
3560 fmt = GET_RTX_FORMAT (GET_CODE (op));
3561 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3562 {
3563 if (fmt[i] == 'E')
3564 {
3565 int j;
3566
3567 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3568 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3569 return 1;
3570 }
3571
3572 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3573 return 1;
3574 }
3575
3576 return 0;
3577 }
3578
3579 /* Return true if OP contains a reference to a thread-local symbol. */
3580
3581 bool
3582 tls_symbolic_reference_mentioned_p (rtx op)
3583 {
3584 const char *fmt;
3585 int i;
3586
3587 if (GET_CODE (op) == SYMBOL_REF)
3588 return tls_symbolic_operand (op);
3589
3590 fmt = GET_RTX_FORMAT (GET_CODE (op));
3591 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3592 {
3593 if (fmt[i] == 'E')
3594 {
3595 int j;
3596
3597 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3598 if (tls_symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3599 return true;
3600 }
3601
3602 else if (fmt[i] == 'e' && tls_symbolic_reference_mentioned_p (XEXP (op, i)))
3603 return true;
3604 }
3605
3606 return false;
3607 }
3608
3609
3610 /* Return true if OP is a legitimate general operand when
3611 generating PIC code. It is given that flag_pic is on
3612 and that OP satisfies CONSTANT_P or is a CONST_DOUBLE. */
3613
3614 int
3615 legitimate_pic_operand_p (rtx op)
3616 {
3617 /* Accept all non-symbolic constants. */
3618 if (!SYMBOLIC_CONST (op))
3619 return 1;
3620
3621 /* Reject everything else; must be handled
3622 via emit_symbolic_move. */
3623 return 0;
3624 }
3625
3626 /* Returns true if the constant value OP is a legitimate general operand.
3627 It is given that OP satisfies CONSTANT_P or is a CONST_DOUBLE. */
3628
3629 static bool
3630 s390_legitimate_constant_p (machine_mode mode, rtx op)
3631 {
3632 if (TARGET_VX && VECTOR_MODE_P (mode) && GET_CODE (op) == CONST_VECTOR)
3633 {
3634 if (GET_MODE_SIZE (mode) != 16)
3635 return 0;
3636
3637 if (!const0_operand (op, mode)
3638 && !s390_contiguous_bitmask_vector_p (op, NULL, NULL)
3639 && !s390_bytemask_vector_p (op, NULL))
3640 return 0;
3641 }
3642
3643 /* Accept all non-symbolic constants. */
3644 if (!SYMBOLIC_CONST (op))
3645 return 1;
3646
3647 /* Accept immediate LARL operands. */
3648 if (TARGET_CPU_ZARCH && larl_operand (op, mode))
3649 return 1;
3650
3651 /* Thread-local symbols are never legal constants. This is
3652 so that emit_call knows that computing such addresses
3653 might require a function call. */
3654 if (TLS_SYMBOLIC_CONST (op))
3655 return 0;
3656
3657 /* In the PIC case, symbolic constants must *not* be
3658 forced into the literal pool. We accept them here,
3659 so that they will be handled by emit_symbolic_move. */
3660 if (flag_pic)
3661 return 1;
3662
3663 /* All remaining non-PIC symbolic constants are
3664 forced into the literal pool. */
3665 return 0;
3666 }
3667
3668 /* Determine if it's legal to put X into the constant pool. This
3669 is not possible if X contains the address of a symbol that is
3670 not constant (TLS) or not known at final link time (PIC). */
3671
3672 static bool
3673 s390_cannot_force_const_mem (machine_mode mode, rtx x)
3674 {
3675 switch (GET_CODE (x))
3676 {
3677 case CONST_INT:
3678 case CONST_DOUBLE:
3679 case CONST_VECTOR:
3680 /* Accept all non-symbolic constants. */
3681 return false;
3682
3683 case LABEL_REF:
3684 /* Labels are OK iff we are non-PIC. */
3685 return flag_pic != 0;
3686
3687 case SYMBOL_REF:
3688 /* 'Naked' TLS symbol references are never OK,
3689 non-TLS symbols are OK iff we are non-PIC. */
3690 if (tls_symbolic_operand (x))
3691 return true;
3692 else
3693 return flag_pic != 0;
3694
3695 case CONST:
3696 return s390_cannot_force_const_mem (mode, XEXP (x, 0));
3697 case PLUS:
3698 case MINUS:
3699 return s390_cannot_force_const_mem (mode, XEXP (x, 0))
3700 || s390_cannot_force_const_mem (mode, XEXP (x, 1));
3701
3702 case UNSPEC:
3703 switch (XINT (x, 1))
3704 {
3705 /* Only lt-relative or GOT-relative UNSPECs are OK. */
3706 case UNSPEC_LTREL_OFFSET:
3707 case UNSPEC_GOT:
3708 case UNSPEC_GOTOFF:
3709 case UNSPEC_PLTOFF:
3710 case UNSPEC_TLSGD:
3711 case UNSPEC_TLSLDM:
3712 case UNSPEC_NTPOFF:
3713 case UNSPEC_DTPOFF:
3714 case UNSPEC_GOTNTPOFF:
3715 case UNSPEC_INDNTPOFF:
3716 return false;
3717
3718 /* If the literal pool shares the code section, be put
3719 execute template placeholders into the pool as well. */
3720 case UNSPEC_INSN:
3721 return TARGET_CPU_ZARCH;
3722
3723 default:
3724 return true;
3725 }
3726 break;
3727
3728 default:
3729 gcc_unreachable ();
3730 }
3731 }
3732
3733 /* Returns true if the constant value OP is a legitimate general
3734 operand during and after reload. The difference to
3735 legitimate_constant_p is that this function will not accept
3736 a constant that would need to be forced to the literal pool
3737 before it can be used as operand.
3738 This function accepts all constants which can be loaded directly
3739 into a GPR. */
3740
3741 bool
3742 legitimate_reload_constant_p (rtx op)
3743 {
3744 /* Accept la(y) operands. */
3745 if (GET_CODE (op) == CONST_INT
3746 && DISP_IN_RANGE (INTVAL (op)))
3747 return true;
3748
3749 /* Accept l(g)hi/l(g)fi operands. */
3750 if (GET_CODE (op) == CONST_INT
3751 && (CONST_OK_FOR_K (INTVAL (op)) || CONST_OK_FOR_Os (INTVAL (op))))
3752 return true;
3753
3754 /* Accept lliXX operands. */
3755 if (TARGET_ZARCH
3756 && GET_CODE (op) == CONST_INT
3757 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
3758 && s390_single_part (op, word_mode, HImode, 0) >= 0)
3759 return true;
3760
3761 if (TARGET_EXTIMM
3762 && GET_CODE (op) == CONST_INT
3763 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
3764 && s390_single_part (op, word_mode, SImode, 0) >= 0)
3765 return true;
3766
3767 /* Accept larl operands. */
3768 if (TARGET_CPU_ZARCH
3769 && larl_operand (op, VOIDmode))
3770 return true;
3771
3772 /* Accept floating-point zero operands that fit into a single GPR. */
3773 if (GET_CODE (op) == CONST_DOUBLE
3774 && s390_float_const_zero_p (op)
3775 && GET_MODE_SIZE (GET_MODE (op)) <= UNITS_PER_WORD)
3776 return true;
3777
3778 /* Accept double-word operands that can be split. */
3779 if (GET_CODE (op) == CONST_INT
3780 && trunc_int_for_mode (INTVAL (op), word_mode) != INTVAL (op))
3781 {
3782 machine_mode dword_mode = word_mode == SImode ? DImode : TImode;
3783 rtx hi = operand_subword (op, 0, 0, dword_mode);
3784 rtx lo = operand_subword (op, 1, 0, dword_mode);
3785 return legitimate_reload_constant_p (hi)
3786 && legitimate_reload_constant_p (lo);
3787 }
3788
3789 /* Everything else cannot be handled without reload. */
3790 return false;
3791 }
3792
3793 /* Returns true if the constant value OP is a legitimate fp operand
3794 during and after reload.
3795 This function accepts all constants which can be loaded directly
3796 into an FPR. */
3797
3798 static bool
3799 legitimate_reload_fp_constant_p (rtx op)
3800 {
3801 /* Accept floating-point zero operands if the load zero instruction
3802 can be used. Prior to z196 the load fp zero instruction caused a
3803 performance penalty if the result is used as BFP number. */
3804 if (TARGET_Z196
3805 && GET_CODE (op) == CONST_DOUBLE
3806 && s390_float_const_zero_p (op))
3807 return true;
3808
3809 return false;
3810 }
3811
3812 /* Returns true if the constant value OP is a legitimate vector operand
3813 during and after reload.
3814 This function accepts all constants which can be loaded directly
3815 into an VR. */
3816
3817 static bool
3818 legitimate_reload_vector_constant_p (rtx op)
3819 {
3820 /* FIXME: Support constant vectors with all the same 16 bit unsigned
3821 operands. These can be loaded with vrepi. */
3822
3823 if (TARGET_VX && GET_MODE_SIZE (GET_MODE (op)) == 16
3824 && (const0_operand (op, GET_MODE (op))
3825 || constm1_operand (op, GET_MODE (op))
3826 || s390_contiguous_bitmask_vector_p (op, NULL, NULL)
3827 || s390_bytemask_vector_p (op, NULL)))
3828 return true;
3829
3830 return false;
3831 }
3832
3833 /* Given an rtx OP being reloaded into a reg required to be in class RCLASS,
3834 return the class of reg to actually use. */
3835
3836 static reg_class_t
3837 s390_preferred_reload_class (rtx op, reg_class_t rclass)
3838 {
3839 switch (GET_CODE (op))
3840 {
3841 /* Constants we cannot reload into general registers
3842 must be forced into the literal pool. */
3843 case CONST_VECTOR:
3844 case CONST_DOUBLE:
3845 case CONST_INT:
3846 if (reg_class_subset_p (GENERAL_REGS, rclass)
3847 && legitimate_reload_constant_p (op))
3848 return GENERAL_REGS;
3849 else if (reg_class_subset_p (ADDR_REGS, rclass)
3850 && legitimate_reload_constant_p (op))
3851 return ADDR_REGS;
3852 else if (reg_class_subset_p (FP_REGS, rclass)
3853 && legitimate_reload_fp_constant_p (op))
3854 return FP_REGS;
3855 else if (reg_class_subset_p (VEC_REGS, rclass)
3856 && legitimate_reload_vector_constant_p (op))
3857 return VEC_REGS;
3858
3859 return NO_REGS;
3860
3861 /* If a symbolic constant or a PLUS is reloaded,
3862 it is most likely being used as an address, so
3863 prefer ADDR_REGS. If 'class' is not a superset
3864 of ADDR_REGS, e.g. FP_REGS, reject this reload. */
3865 case CONST:
3866 /* Symrefs cannot be pushed into the literal pool with -fPIC
3867 so we *MUST NOT* return NO_REGS for these cases
3868 (s390_cannot_force_const_mem will return true).
3869
3870 On the other hand we MUST return NO_REGS for symrefs with
3871 invalid addend which might have been pushed to the literal
3872 pool (no -fPIC). Usually we would expect them to be
3873 handled via secondary reload but this does not happen if
3874 they are used as literal pool slot replacement in reload
3875 inheritance (see emit_input_reload_insns). */
3876 if (TARGET_CPU_ZARCH
3877 && GET_CODE (XEXP (op, 0)) == PLUS
3878 && GET_CODE (XEXP (XEXP(op, 0), 0)) == SYMBOL_REF
3879 && GET_CODE (XEXP (XEXP(op, 0), 1)) == CONST_INT)
3880 {
3881 if (flag_pic && reg_class_subset_p (ADDR_REGS, rclass))
3882 return ADDR_REGS;
3883 else
3884 return NO_REGS;
3885 }
3886 /* fallthrough */
3887 case LABEL_REF:
3888 case SYMBOL_REF:
3889 if (!legitimate_reload_constant_p (op))
3890 return NO_REGS;
3891 /* fallthrough */
3892 case PLUS:
3893 /* load address will be used. */
3894 if (reg_class_subset_p (ADDR_REGS, rclass))
3895 return ADDR_REGS;
3896 else
3897 return NO_REGS;
3898
3899 default:
3900 break;
3901 }
3902
3903 return rclass;
3904 }
3905
3906 /* Return true if ADDR is SYMBOL_REF + addend with addend being a
3907 multiple of ALIGNMENT and the SYMBOL_REF being naturally
3908 aligned. */
3909
3910 bool
3911 s390_check_symref_alignment (rtx addr, HOST_WIDE_INT alignment)
3912 {
3913 HOST_WIDE_INT addend;
3914 rtx symref;
3915
3916 if (!s390_loadrelative_operand_p (addr, &symref, &addend))
3917 return false;
3918
3919 if (addend & (alignment - 1))
3920 return false;
3921
3922 if (GET_CODE (symref) == SYMBOL_REF
3923 && !SYMBOL_REF_NOT_NATURALLY_ALIGNED_P (symref))
3924 return true;
3925
3926 if (GET_CODE (symref) == UNSPEC
3927 && alignment <= UNITS_PER_LONG)
3928 return true;
3929
3930 return false;
3931 }
3932
3933 /* ADDR is moved into REG using larl. If ADDR isn't a valid larl
3934 operand SCRATCH is used to reload the even part of the address and
3935 adding one. */
3936
3937 void
3938 s390_reload_larl_operand (rtx reg, rtx addr, rtx scratch)
3939 {
3940 HOST_WIDE_INT addend;
3941 rtx symref;
3942
3943 if (!s390_loadrelative_operand_p (addr, &symref, &addend))
3944 gcc_unreachable ();
3945
3946 if (!(addend & 1))
3947 /* Easy case. The addend is even so larl will do fine. */
3948 emit_move_insn (reg, addr);
3949 else
3950 {
3951 /* We can leave the scratch register untouched if the target
3952 register is a valid base register. */
3953 if (REGNO (reg) < FIRST_PSEUDO_REGISTER
3954 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS)
3955 scratch = reg;
3956
3957 gcc_assert (REGNO (scratch) < FIRST_PSEUDO_REGISTER);
3958 gcc_assert (REGNO_REG_CLASS (REGNO (scratch)) == ADDR_REGS);
3959
3960 if (addend != 1)
3961 emit_move_insn (scratch,
3962 gen_rtx_CONST (Pmode,
3963 gen_rtx_PLUS (Pmode, symref,
3964 GEN_INT (addend - 1))));
3965 else
3966 emit_move_insn (scratch, symref);
3967
3968 /* Increment the address using la in order to avoid clobbering cc. */
3969 s390_load_address (reg, gen_rtx_PLUS (Pmode, scratch, const1_rtx));
3970 }
3971 }
3972
3973 /* Generate what is necessary to move between REG and MEM using
3974 SCRATCH. The direction is given by TOMEM. */
3975
3976 void
3977 s390_reload_symref_address (rtx reg, rtx mem, rtx scratch, bool tomem)
3978 {
3979 /* Reload might have pulled a constant out of the literal pool.
3980 Force it back in. */
3981 if (CONST_INT_P (mem) || GET_CODE (mem) == CONST_DOUBLE
3982 || GET_CODE (mem) == CONST_VECTOR
3983 || GET_CODE (mem) == CONST)
3984 mem = force_const_mem (GET_MODE (reg), mem);
3985
3986 gcc_assert (MEM_P (mem));
3987
3988 /* For a load from memory we can leave the scratch register
3989 untouched if the target register is a valid base register. */
3990 if (!tomem
3991 && REGNO (reg) < FIRST_PSEUDO_REGISTER
3992 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS
3993 && GET_MODE (reg) == GET_MODE (scratch))
3994 scratch = reg;
3995
3996 /* Load address into scratch register. Since we can't have a
3997 secondary reload for a secondary reload we have to cover the case
3998 where larl would need a secondary reload here as well. */
3999 s390_reload_larl_operand (scratch, XEXP (mem, 0), scratch);
4000
4001 /* Now we can use a standard load/store to do the move. */
4002 if (tomem)
4003 emit_move_insn (replace_equiv_address (mem, scratch), reg);
4004 else
4005 emit_move_insn (reg, replace_equiv_address (mem, scratch));
4006 }
4007
4008 /* Inform reload about cases where moving X with a mode MODE to a register in
4009 RCLASS requires an extra scratch or immediate register. Return the class
4010 needed for the immediate register. */
4011
4012 static reg_class_t
4013 s390_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
4014 machine_mode mode, secondary_reload_info *sri)
4015 {
4016 enum reg_class rclass = (enum reg_class) rclass_i;
4017
4018 /* Intermediate register needed. */
4019 if (reg_classes_intersect_p (CC_REGS, rclass))
4020 return GENERAL_REGS;
4021
4022 if (TARGET_VX)
4023 {
4024 /* The vst/vl vector move instructions allow only for short
4025 displacements. */
4026 if (MEM_P (x)
4027 && GET_CODE (XEXP (x, 0)) == PLUS
4028 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4029 && !SHORT_DISP_IN_RANGE(INTVAL (XEXP (XEXP (x, 0), 1)))
4030 && reg_class_subset_p (rclass, VEC_REGS)
4031 && (!reg_class_subset_p (rclass, FP_REGS)
4032 || (GET_MODE_SIZE (mode) > 8
4033 && s390_class_max_nregs (FP_REGS, mode) == 1)))
4034 {
4035 if (in_p)
4036 sri->icode = (TARGET_64BIT ?
4037 CODE_FOR_reloaddi_la_in :
4038 CODE_FOR_reloadsi_la_in);
4039 else
4040 sri->icode = (TARGET_64BIT ?
4041 CODE_FOR_reloaddi_la_out :
4042 CODE_FOR_reloadsi_la_out);
4043 }
4044 }
4045
4046 if (TARGET_Z10)
4047 {
4048 HOST_WIDE_INT offset;
4049 rtx symref;
4050
4051 /* On z10 several optimizer steps may generate larl operands with
4052 an odd addend. */
4053 if (in_p
4054 && s390_loadrelative_operand_p (x, &symref, &offset)
4055 && mode == Pmode
4056 && !SYMBOL_REF_ALIGN1_P (symref)
4057 && (offset & 1) == 1)
4058 sri->icode = ((mode == DImode) ? CODE_FOR_reloaddi_larl_odd_addend_z10
4059 : CODE_FOR_reloadsi_larl_odd_addend_z10);
4060
4061 /* Handle all the (mem (symref)) accesses we cannot use the z10
4062 instructions for. */
4063 if (MEM_P (x)
4064 && s390_loadrelative_operand_p (XEXP (x, 0), NULL, NULL)
4065 && (mode == QImode
4066 || !reg_class_subset_p (rclass, GENERAL_REGS)
4067 || GET_MODE_SIZE (mode) > UNITS_PER_WORD
4068 || !s390_check_symref_alignment (XEXP (x, 0),
4069 GET_MODE_SIZE (mode))))
4070 {
4071 #define __SECONDARY_RELOAD_CASE(M,m) \
4072 case M##mode: \
4073 if (TARGET_64BIT) \
4074 sri->icode = in_p ? CODE_FOR_reload##m##di_toreg_z10 : \
4075 CODE_FOR_reload##m##di_tomem_z10; \
4076 else \
4077 sri->icode = in_p ? CODE_FOR_reload##m##si_toreg_z10 : \
4078 CODE_FOR_reload##m##si_tomem_z10; \
4079 break;
4080
4081 switch (GET_MODE (x))
4082 {
4083 __SECONDARY_RELOAD_CASE (QI, qi);
4084 __SECONDARY_RELOAD_CASE (HI, hi);
4085 __SECONDARY_RELOAD_CASE (SI, si);
4086 __SECONDARY_RELOAD_CASE (DI, di);
4087 __SECONDARY_RELOAD_CASE (TI, ti);
4088 __SECONDARY_RELOAD_CASE (SF, sf);
4089 __SECONDARY_RELOAD_CASE (DF, df);
4090 __SECONDARY_RELOAD_CASE (TF, tf);
4091 __SECONDARY_RELOAD_CASE (SD, sd);
4092 __SECONDARY_RELOAD_CASE (DD, dd);
4093 __SECONDARY_RELOAD_CASE (TD, td);
4094 __SECONDARY_RELOAD_CASE (V1QI, v1qi);
4095 __SECONDARY_RELOAD_CASE (V2QI, v2qi);
4096 __SECONDARY_RELOAD_CASE (V4QI, v4qi);
4097 __SECONDARY_RELOAD_CASE (V8QI, v8qi);
4098 __SECONDARY_RELOAD_CASE (V16QI, v16qi);
4099 __SECONDARY_RELOAD_CASE (V1HI, v1hi);
4100 __SECONDARY_RELOAD_CASE (V2HI, v2hi);
4101 __SECONDARY_RELOAD_CASE (V4HI, v4hi);
4102 __SECONDARY_RELOAD_CASE (V8HI, v8hi);
4103 __SECONDARY_RELOAD_CASE (V1SI, v1si);
4104 __SECONDARY_RELOAD_CASE (V2SI, v2si);
4105 __SECONDARY_RELOAD_CASE (V4SI, v4si);
4106 __SECONDARY_RELOAD_CASE (V1DI, v1di);
4107 __SECONDARY_RELOAD_CASE (V2DI, v2di);
4108 __SECONDARY_RELOAD_CASE (V1TI, v1ti);
4109 __SECONDARY_RELOAD_CASE (V1SF, v1sf);
4110 __SECONDARY_RELOAD_CASE (V2SF, v2sf);
4111 __SECONDARY_RELOAD_CASE (V4SF, v4sf);
4112 __SECONDARY_RELOAD_CASE (V1DF, v1df);
4113 __SECONDARY_RELOAD_CASE (V2DF, v2df);
4114 __SECONDARY_RELOAD_CASE (V1TF, v1tf);
4115 default:
4116 gcc_unreachable ();
4117 }
4118 #undef __SECONDARY_RELOAD_CASE
4119 }
4120 }
4121
4122 /* We need a scratch register when loading a PLUS expression which
4123 is not a legitimate operand of the LOAD ADDRESS instruction. */
4124 /* LRA can deal with transformation of plus op very well -- so we
4125 don't need to prompt LRA in this case. */
4126 if (! lra_in_progress && in_p && s390_plus_operand (x, mode))
4127 sri->icode = (TARGET_64BIT ?
4128 CODE_FOR_reloaddi_plus : CODE_FOR_reloadsi_plus);
4129
4130 /* Performing a multiword move from or to memory we have to make sure the
4131 second chunk in memory is addressable without causing a displacement
4132 overflow. If that would be the case we calculate the address in
4133 a scratch register. */
4134 if (MEM_P (x)
4135 && GET_CODE (XEXP (x, 0)) == PLUS
4136 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4137 && !DISP_IN_RANGE (INTVAL (XEXP (XEXP (x, 0), 1))
4138 + GET_MODE_SIZE (mode) - 1))
4139 {
4140 /* For GENERAL_REGS a displacement overflow is no problem if occurring
4141 in a s_operand address since we may fallback to lm/stm. So we only
4142 have to care about overflows in the b+i+d case. */
4143 if ((reg_classes_intersect_p (GENERAL_REGS, rclass)
4144 && s390_class_max_nregs (GENERAL_REGS, mode) > 1
4145 && GET_CODE (XEXP (XEXP (x, 0), 0)) == PLUS)
4146 /* For FP_REGS no lm/stm is available so this check is triggered
4147 for displacement overflows in b+i+d and b+d like addresses. */
4148 || (reg_classes_intersect_p (FP_REGS, rclass)
4149 && s390_class_max_nregs (FP_REGS, mode) > 1))
4150 {
4151 if (in_p)
4152 sri->icode = (TARGET_64BIT ?
4153 CODE_FOR_reloaddi_la_in :
4154 CODE_FOR_reloadsi_la_in);
4155 else
4156 sri->icode = (TARGET_64BIT ?
4157 CODE_FOR_reloaddi_la_out :
4158 CODE_FOR_reloadsi_la_out);
4159 }
4160 }
4161
4162 /* A scratch address register is needed when a symbolic constant is
4163 copied to r0 compiling with -fPIC. In other cases the target
4164 register might be used as temporary (see legitimize_pic_address). */
4165 if (in_p && SYMBOLIC_CONST (x) && flag_pic == 2 && rclass != ADDR_REGS)
4166 sri->icode = (TARGET_64BIT ?
4167 CODE_FOR_reloaddi_PIC_addr :
4168 CODE_FOR_reloadsi_PIC_addr);
4169
4170 /* Either scratch or no register needed. */
4171 return NO_REGS;
4172 }
4173
4174 /* Generate code to load SRC, which is PLUS that is not a
4175 legitimate operand for the LA instruction, into TARGET.
4176 SCRATCH may be used as scratch register. */
4177
4178 void
4179 s390_expand_plus_operand (rtx target, rtx src,
4180 rtx scratch)
4181 {
4182 rtx sum1, sum2;
4183 struct s390_address ad;
4184
4185 /* src must be a PLUS; get its two operands. */
4186 gcc_assert (GET_CODE (src) == PLUS);
4187 gcc_assert (GET_MODE (src) == Pmode);
4188
4189 /* Check if any of the two operands is already scheduled
4190 for replacement by reload. This can happen e.g. when
4191 float registers occur in an address. */
4192 sum1 = find_replacement (&XEXP (src, 0));
4193 sum2 = find_replacement (&XEXP (src, 1));
4194 src = gen_rtx_PLUS (Pmode, sum1, sum2);
4195
4196 /* If the address is already strictly valid, there's nothing to do. */
4197 if (!s390_decompose_address (src, &ad)
4198 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
4199 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
4200 {
4201 /* Otherwise, one of the operands cannot be an address register;
4202 we reload its value into the scratch register. */
4203 if (true_regnum (sum1) < 1 || true_regnum (sum1) > 15)
4204 {
4205 emit_move_insn (scratch, sum1);
4206 sum1 = scratch;
4207 }
4208 if (true_regnum (sum2) < 1 || true_regnum (sum2) > 15)
4209 {
4210 emit_move_insn (scratch, sum2);
4211 sum2 = scratch;
4212 }
4213
4214 /* According to the way these invalid addresses are generated
4215 in reload.c, it should never happen (at least on s390) that
4216 *neither* of the PLUS components, after find_replacements
4217 was applied, is an address register. */
4218 if (sum1 == scratch && sum2 == scratch)
4219 {
4220 debug_rtx (src);
4221 gcc_unreachable ();
4222 }
4223
4224 src = gen_rtx_PLUS (Pmode, sum1, sum2);
4225 }
4226
4227 /* Emit the LOAD ADDRESS pattern. Note that reload of PLUS
4228 is only ever performed on addresses, so we can mark the
4229 sum as legitimate for LA in any case. */
4230 s390_load_address (target, src);
4231 }
4232
4233
4234 /* Return true if ADDR is a valid memory address.
4235 STRICT specifies whether strict register checking applies. */
4236
4237 static bool
4238 s390_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
4239 {
4240 struct s390_address ad;
4241
4242 if (TARGET_Z10
4243 && larl_operand (addr, VOIDmode)
4244 && (mode == VOIDmode
4245 || s390_check_symref_alignment (addr, GET_MODE_SIZE (mode))))
4246 return true;
4247
4248 if (!s390_decompose_address (addr, &ad))
4249 return false;
4250
4251 if (strict)
4252 {
4253 if (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
4254 return false;
4255
4256 if (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx)))
4257 return false;
4258 }
4259 else
4260 {
4261 if (ad.base
4262 && !(REGNO (ad.base) >= FIRST_PSEUDO_REGISTER
4263 || REGNO_REG_CLASS (REGNO (ad.base)) == ADDR_REGS))
4264 return false;
4265
4266 if (ad.indx
4267 && !(REGNO (ad.indx) >= FIRST_PSEUDO_REGISTER
4268 || REGNO_REG_CLASS (REGNO (ad.indx)) == ADDR_REGS))
4269 return false;
4270 }
4271 return true;
4272 }
4273
4274 /* Return true if OP is a valid operand for the LA instruction.
4275 In 31-bit, we need to prove that the result is used as an
4276 address, as LA performs only a 31-bit addition. */
4277
4278 bool
4279 legitimate_la_operand_p (rtx op)
4280 {
4281 struct s390_address addr;
4282 if (!s390_decompose_address (op, &addr))
4283 return false;
4284
4285 return (TARGET_64BIT || addr.pointer);
4286 }
4287
4288 /* Return true if it is valid *and* preferable to use LA to
4289 compute the sum of OP1 and OP2. */
4290
4291 bool
4292 preferred_la_operand_p (rtx op1, rtx op2)
4293 {
4294 struct s390_address addr;
4295
4296 if (op2 != const0_rtx)
4297 op1 = gen_rtx_PLUS (Pmode, op1, op2);
4298
4299 if (!s390_decompose_address (op1, &addr))
4300 return false;
4301 if (addr.base && !REGNO_OK_FOR_BASE_P (REGNO (addr.base)))
4302 return false;
4303 if (addr.indx && !REGNO_OK_FOR_INDEX_P (REGNO (addr.indx)))
4304 return false;
4305
4306 /* Avoid LA instructions with index register on z196; it is
4307 preferable to use regular add instructions when possible.
4308 Starting with zEC12 the la with index register is "uncracked"
4309 again. */
4310 if (addr.indx && s390_tune == PROCESSOR_2817_Z196)
4311 return false;
4312
4313 if (!TARGET_64BIT && !addr.pointer)
4314 return false;
4315
4316 if (addr.pointer)
4317 return true;
4318
4319 if ((addr.base && REG_P (addr.base) && REG_POINTER (addr.base))
4320 || (addr.indx && REG_P (addr.indx) && REG_POINTER (addr.indx)))
4321 return true;
4322
4323 return false;
4324 }
4325
4326 /* Emit a forced load-address operation to load SRC into DST.
4327 This will use the LOAD ADDRESS instruction even in situations
4328 where legitimate_la_operand_p (SRC) returns false. */
4329
4330 void
4331 s390_load_address (rtx dst, rtx src)
4332 {
4333 if (TARGET_64BIT)
4334 emit_move_insn (dst, src);
4335 else
4336 emit_insn (gen_force_la_31 (dst, src));
4337 }
4338
4339 /* Return a legitimate reference for ORIG (an address) using the
4340 register REG. If REG is 0, a new pseudo is generated.
4341
4342 There are two types of references that must be handled:
4343
4344 1. Global data references must load the address from the GOT, via
4345 the PIC reg. An insn is emitted to do this load, and the reg is
4346 returned.
4347
4348 2. Static data references, constant pool addresses, and code labels
4349 compute the address as an offset from the GOT, whose base is in
4350 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
4351 differentiate them from global data objects. The returned
4352 address is the PIC reg + an unspec constant.
4353
4354 TARGET_LEGITIMIZE_ADDRESS_P rejects symbolic references unless the PIC
4355 reg also appears in the address. */
4356
4357 rtx
4358 legitimize_pic_address (rtx orig, rtx reg)
4359 {
4360 rtx addr = orig;
4361 rtx addend = const0_rtx;
4362 rtx new_rtx = orig;
4363
4364 gcc_assert (!TLS_SYMBOLIC_CONST (addr));
4365
4366 if (GET_CODE (addr) == CONST)
4367 addr = XEXP (addr, 0);
4368
4369 if (GET_CODE (addr) == PLUS)
4370 {
4371 addend = XEXP (addr, 1);
4372 addr = XEXP (addr, 0);
4373 }
4374
4375 if ((GET_CODE (addr) == LABEL_REF
4376 || (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (addr))
4377 || (GET_CODE (addr) == UNSPEC &&
4378 (XINT (addr, 1) == UNSPEC_GOTENT
4379 || (TARGET_CPU_ZARCH && XINT (addr, 1) == UNSPEC_PLT))))
4380 && GET_CODE (addend) == CONST_INT)
4381 {
4382 /* This can be locally addressed. */
4383
4384 /* larl_operand requires UNSPECs to be wrapped in a const rtx. */
4385 rtx const_addr = (GET_CODE (addr) == UNSPEC ?
4386 gen_rtx_CONST (Pmode, addr) : addr);
4387
4388 if (TARGET_CPU_ZARCH
4389 && larl_operand (const_addr, VOIDmode)
4390 && INTVAL (addend) < (HOST_WIDE_INT)1 << 31
4391 && INTVAL (addend) >= -((HOST_WIDE_INT)1 << 31))
4392 {
4393 if (INTVAL (addend) & 1)
4394 {
4395 /* LARL can't handle odd offsets, so emit a pair of LARL
4396 and LA. */
4397 rtx temp = reg? reg : gen_reg_rtx (Pmode);
4398
4399 if (!DISP_IN_RANGE (INTVAL (addend)))
4400 {
4401 HOST_WIDE_INT even = INTVAL (addend) - 1;
4402 addr = gen_rtx_PLUS (Pmode, addr, GEN_INT (even));
4403 addr = gen_rtx_CONST (Pmode, addr);
4404 addend = const1_rtx;
4405 }
4406
4407 emit_move_insn (temp, addr);
4408 new_rtx = gen_rtx_PLUS (Pmode, temp, addend);
4409
4410 if (reg != 0)
4411 {
4412 s390_load_address (reg, new_rtx);
4413 new_rtx = reg;
4414 }
4415 }
4416 else
4417 {
4418 /* If the offset is even, we can just use LARL. This
4419 will happen automatically. */
4420 }
4421 }
4422 else
4423 {
4424 /* No larl - Access local symbols relative to the GOT. */
4425
4426 rtx temp = reg? reg : gen_reg_rtx (Pmode);
4427
4428 if (reload_in_progress || reload_completed)
4429 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4430
4431 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
4432 if (addend != const0_rtx)
4433 addr = gen_rtx_PLUS (Pmode, addr, addend);
4434 addr = gen_rtx_CONST (Pmode, addr);
4435 addr = force_const_mem (Pmode, addr);
4436 emit_move_insn (temp, addr);
4437
4438 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4439 if (reg != 0)
4440 {
4441 s390_load_address (reg, new_rtx);
4442 new_rtx = reg;
4443 }
4444 }
4445 }
4446 else if (GET_CODE (addr) == SYMBOL_REF && addend == const0_rtx)
4447 {
4448 /* A non-local symbol reference without addend.
4449
4450 The symbol ref is wrapped into an UNSPEC to make sure the
4451 proper operand modifier (@GOT or @GOTENT) will be emitted.
4452 This will tell the linker to put the symbol into the GOT.
4453
4454 Additionally the code dereferencing the GOT slot is emitted here.
4455
4456 An addend to the symref needs to be added afterwards.
4457 legitimize_pic_address calls itself recursively to handle
4458 that case. So no need to do it here. */
4459
4460 if (reg == 0)
4461 reg = gen_reg_rtx (Pmode);
4462
4463 if (TARGET_Z10)
4464 {
4465 /* Use load relative if possible.
4466 lgrl <target>, sym@GOTENT */
4467 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
4468 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4469 new_rtx = gen_const_mem (GET_MODE (reg), new_rtx);
4470
4471 emit_move_insn (reg, new_rtx);
4472 new_rtx = reg;
4473 }
4474 else if (flag_pic == 1)
4475 {
4476 /* Assume GOT offset is a valid displacement operand (< 4k
4477 or < 512k with z990). This is handled the same way in
4478 both 31- and 64-bit code (@GOT).
4479 lg <target>, sym@GOT(r12) */
4480
4481 if (reload_in_progress || reload_completed)
4482 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4483
4484 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
4485 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4486 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
4487 new_rtx = gen_const_mem (Pmode, new_rtx);
4488 emit_move_insn (reg, new_rtx);
4489 new_rtx = reg;
4490 }
4491 else if (TARGET_CPU_ZARCH)
4492 {
4493 /* If the GOT offset might be >= 4k, we determine the position
4494 of the GOT entry via a PC-relative LARL (@GOTENT).
4495 larl temp, sym@GOTENT
4496 lg <target>, 0(temp) */
4497
4498 rtx temp = reg ? reg : gen_reg_rtx (Pmode);
4499
4500 gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
4501 || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
4502
4503 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
4504 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4505 emit_move_insn (temp, new_rtx);
4506
4507 new_rtx = gen_const_mem (Pmode, temp);
4508 emit_move_insn (reg, new_rtx);
4509
4510 new_rtx = reg;
4511 }
4512 else
4513 {
4514 /* If the GOT offset might be >= 4k, we have to load it
4515 from the literal pool (@GOT).
4516
4517 lg temp, lit-litbase(r13)
4518 lg <target>, 0(temp)
4519 lit: .long sym@GOT */
4520
4521 rtx temp = reg ? reg : gen_reg_rtx (Pmode);
4522
4523 gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
4524 || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
4525
4526 if (reload_in_progress || reload_completed)
4527 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4528
4529 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
4530 addr = gen_rtx_CONST (Pmode, addr);
4531 addr = force_const_mem (Pmode, addr);
4532 emit_move_insn (temp, addr);
4533
4534 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4535 new_rtx = gen_const_mem (Pmode, new_rtx);
4536 emit_move_insn (reg, new_rtx);
4537 new_rtx = reg;
4538 }
4539 }
4540 else if (GET_CODE (addr) == UNSPEC && GET_CODE (addend) == CONST_INT)
4541 {
4542 gcc_assert (XVECLEN (addr, 0) == 1);
4543 switch (XINT (addr, 1))
4544 {
4545 /* These address symbols (or PLT slots) relative to the GOT
4546 (not GOT slots!). In general this will exceed the
4547 displacement range so these value belong into the literal
4548 pool. */
4549 case UNSPEC_GOTOFF:
4550 case UNSPEC_PLTOFF:
4551 new_rtx = force_const_mem (Pmode, orig);
4552 break;
4553
4554 /* For -fPIC the GOT size might exceed the displacement
4555 range so make sure the value is in the literal pool. */
4556 case UNSPEC_GOT:
4557 if (flag_pic == 2)
4558 new_rtx = force_const_mem (Pmode, orig);
4559 break;
4560
4561 /* For @GOTENT larl is used. This is handled like local
4562 symbol refs. */
4563 case UNSPEC_GOTENT:
4564 gcc_unreachable ();
4565 break;
4566
4567 /* @PLT is OK as is on 64-bit, must be converted to
4568 GOT-relative @PLTOFF on 31-bit. */
4569 case UNSPEC_PLT:
4570 if (!TARGET_CPU_ZARCH)
4571 {
4572 rtx temp = reg? reg : gen_reg_rtx (Pmode);
4573
4574 if (reload_in_progress || reload_completed)
4575 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4576
4577 addr = XVECEXP (addr, 0, 0);
4578 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
4579 UNSPEC_PLTOFF);
4580 if (addend != const0_rtx)
4581 addr = gen_rtx_PLUS (Pmode, addr, addend);
4582 addr = gen_rtx_CONST (Pmode, addr);
4583 addr = force_const_mem (Pmode, addr);
4584 emit_move_insn (temp, addr);
4585
4586 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4587 if (reg != 0)
4588 {
4589 s390_load_address (reg, new_rtx);
4590 new_rtx = reg;
4591 }
4592 }
4593 else
4594 /* On 64 bit larl can be used. This case is handled like
4595 local symbol refs. */
4596 gcc_unreachable ();
4597 break;
4598
4599 /* Everything else cannot happen. */
4600 default:
4601 gcc_unreachable ();
4602 }
4603 }
4604 else if (addend != const0_rtx)
4605 {
4606 /* Otherwise, compute the sum. */
4607
4608 rtx base = legitimize_pic_address (addr, reg);
4609 new_rtx = legitimize_pic_address (addend,
4610 base == reg ? NULL_RTX : reg);
4611 if (GET_CODE (new_rtx) == CONST_INT)
4612 new_rtx = plus_constant (Pmode, base, INTVAL (new_rtx));
4613 else
4614 {
4615 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
4616 {
4617 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
4618 new_rtx = XEXP (new_rtx, 1);
4619 }
4620 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
4621 }
4622
4623 if (GET_CODE (new_rtx) == CONST)
4624 new_rtx = XEXP (new_rtx, 0);
4625 new_rtx = force_operand (new_rtx, 0);
4626 }
4627
4628 return new_rtx;
4629 }
4630
4631 /* Load the thread pointer into a register. */
4632
4633 rtx
4634 s390_get_thread_pointer (void)
4635 {
4636 rtx tp = gen_reg_rtx (Pmode);
4637
4638 emit_move_insn (tp, gen_rtx_REG (Pmode, TP_REGNUM));
4639 mark_reg_pointer (tp, BITS_PER_WORD);
4640
4641 return tp;
4642 }
4643
4644 /* Emit a tls call insn. The call target is the SYMBOL_REF stored
4645 in s390_tls_symbol which always refers to __tls_get_offset.
4646 The returned offset is written to RESULT_REG and an USE rtx is
4647 generated for TLS_CALL. */
4648
4649 static GTY(()) rtx s390_tls_symbol;
4650
4651 static void
4652 s390_emit_tls_call_insn (rtx result_reg, rtx tls_call)
4653 {
4654 rtx insn;
4655
4656 if (!flag_pic)
4657 emit_insn (s390_load_got ());
4658
4659 if (!s390_tls_symbol)
4660 s390_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_offset");
4661
4662 insn = s390_emit_call (s390_tls_symbol, tls_call, result_reg,
4663 gen_rtx_REG (Pmode, RETURN_REGNUM));
4664
4665 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), result_reg);
4666 RTL_CONST_CALL_P (insn) = 1;
4667 }
4668
4669 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
4670 this (thread-local) address. REG may be used as temporary. */
4671
4672 static rtx
4673 legitimize_tls_address (rtx addr, rtx reg)
4674 {
4675 rtx new_rtx, tls_call, temp, base, r2, insn;
4676
4677 if (GET_CODE (addr) == SYMBOL_REF)
4678 switch (tls_symbolic_operand (addr))
4679 {
4680 case TLS_MODEL_GLOBAL_DYNAMIC:
4681 start_sequence ();
4682 r2 = gen_rtx_REG (Pmode, 2);
4683 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_TLSGD);
4684 new_rtx = gen_rtx_CONST (Pmode, tls_call);
4685 new_rtx = force_const_mem (Pmode, new_rtx);
4686 emit_move_insn (r2, new_rtx);
4687 s390_emit_tls_call_insn (r2, tls_call);
4688 insn = get_insns ();
4689 end_sequence ();
4690
4691 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
4692 temp = gen_reg_rtx (Pmode);
4693 emit_libcall_block (insn, temp, r2, new_rtx);
4694
4695 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
4696 if (reg != 0)
4697 {
4698 s390_load_address (reg, new_rtx);
4699 new_rtx = reg;
4700 }
4701 break;
4702
4703 case TLS_MODEL_LOCAL_DYNAMIC:
4704 start_sequence ();
4705 r2 = gen_rtx_REG (Pmode, 2);
4706 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM);
4707 new_rtx = gen_rtx_CONST (Pmode, tls_call);
4708 new_rtx = force_const_mem (Pmode, new_rtx);
4709 emit_move_insn (r2, new_rtx);
4710 s390_emit_tls_call_insn (r2, tls_call);
4711 insn = get_insns ();
4712 end_sequence ();
4713
4714 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM_NTPOFF);
4715 temp = gen_reg_rtx (Pmode);
4716 emit_libcall_block (insn, temp, r2, new_rtx);
4717
4718 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
4719 base = gen_reg_rtx (Pmode);
4720 s390_load_address (base, new_rtx);
4721
4722 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_DTPOFF);
4723 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4724 new_rtx = force_const_mem (Pmode, new_rtx);
4725 temp = gen_reg_rtx (Pmode);
4726 emit_move_insn (temp, new_rtx);
4727
4728 new_rtx = gen_rtx_PLUS (Pmode, base, temp);
4729 if (reg != 0)
4730 {
4731 s390_load_address (reg, new_rtx);
4732 new_rtx = reg;
4733 }
4734 break;
4735
4736 case TLS_MODEL_INITIAL_EXEC:
4737 if (flag_pic == 1)
4738 {
4739 /* Assume GOT offset < 4k. This is handled the same way
4740 in both 31- and 64-bit code. */
4741
4742 if (reload_in_progress || reload_completed)
4743 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4744
4745 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
4746 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4747 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
4748 new_rtx = gen_const_mem (Pmode, new_rtx);
4749 temp = gen_reg_rtx (Pmode);
4750 emit_move_insn (temp, new_rtx);
4751 }
4752 else if (TARGET_CPU_ZARCH)
4753 {
4754 /* If the GOT offset might be >= 4k, we determine the position
4755 of the GOT entry via a PC-relative LARL. */
4756
4757 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
4758 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4759 temp = gen_reg_rtx (Pmode);
4760 emit_move_insn (temp, new_rtx);
4761
4762 new_rtx = gen_const_mem (Pmode, temp);
4763 temp = gen_reg_rtx (Pmode);
4764 emit_move_insn (temp, new_rtx);
4765 }
4766 else if (flag_pic)
4767 {
4768 /* If the GOT offset might be >= 4k, we have to load it
4769 from the literal pool. */
4770
4771 if (reload_in_progress || reload_completed)
4772 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4773
4774 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
4775 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4776 new_rtx = force_const_mem (Pmode, new_rtx);
4777 temp = gen_reg_rtx (Pmode);
4778 emit_move_insn (temp, new_rtx);
4779
4780 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4781 new_rtx = gen_const_mem (Pmode, new_rtx);
4782
4783 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD);
4784 temp = gen_reg_rtx (Pmode);
4785 emit_insn (gen_rtx_SET (temp, new_rtx));
4786 }
4787 else
4788 {
4789 /* In position-dependent code, load the absolute address of
4790 the GOT entry from the literal pool. */
4791
4792 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
4793 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4794 new_rtx = force_const_mem (Pmode, new_rtx);
4795 temp = gen_reg_rtx (Pmode);
4796 emit_move_insn (temp, new_rtx);
4797
4798 new_rtx = temp;
4799 new_rtx = gen_const_mem (Pmode, new_rtx);
4800 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD);
4801 temp = gen_reg_rtx (Pmode);
4802 emit_insn (gen_rtx_SET (temp, new_rtx));
4803 }
4804
4805 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
4806 if (reg != 0)
4807 {
4808 s390_load_address (reg, new_rtx);
4809 new_rtx = reg;
4810 }
4811 break;
4812
4813 case TLS_MODEL_LOCAL_EXEC:
4814 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
4815 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4816 new_rtx = force_const_mem (Pmode, new_rtx);
4817 temp = gen_reg_rtx (Pmode);
4818 emit_move_insn (temp, new_rtx);
4819
4820 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
4821 if (reg != 0)
4822 {
4823 s390_load_address (reg, new_rtx);
4824 new_rtx = reg;
4825 }
4826 break;
4827
4828 default:
4829 gcc_unreachable ();
4830 }
4831
4832 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == UNSPEC)
4833 {
4834 switch (XINT (XEXP (addr, 0), 1))
4835 {
4836 case UNSPEC_INDNTPOFF:
4837 gcc_assert (TARGET_CPU_ZARCH);
4838 new_rtx = addr;
4839 break;
4840
4841 default:
4842 gcc_unreachable ();
4843 }
4844 }
4845
4846 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
4847 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4848 {
4849 new_rtx = XEXP (XEXP (addr, 0), 0);
4850 if (GET_CODE (new_rtx) != SYMBOL_REF)
4851 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4852
4853 new_rtx = legitimize_tls_address (new_rtx, reg);
4854 new_rtx = plus_constant (Pmode, new_rtx,
4855 INTVAL (XEXP (XEXP (addr, 0), 1)));
4856 new_rtx = force_operand (new_rtx, 0);
4857 }
4858
4859 else
4860 gcc_unreachable (); /* for now ... */
4861
4862 return new_rtx;
4863 }
4864
4865 /* Emit insns making the address in operands[1] valid for a standard
4866 move to operands[0]. operands[1] is replaced by an address which
4867 should be used instead of the former RTX to emit the move
4868 pattern. */
4869
4870 void
4871 emit_symbolic_move (rtx *operands)
4872 {
4873 rtx temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
4874
4875 if (GET_CODE (operands[0]) == MEM)
4876 operands[1] = force_reg (Pmode, operands[1]);
4877 else if (TLS_SYMBOLIC_CONST (operands[1]))
4878 operands[1] = legitimize_tls_address (operands[1], temp);
4879 else if (flag_pic)
4880 operands[1] = legitimize_pic_address (operands[1], temp);
4881 }
4882
4883 /* Try machine-dependent ways of modifying an illegitimate address X
4884 to be legitimate. If we find one, return the new, valid address.
4885
4886 OLDX is the address as it was before break_out_memory_refs was called.
4887 In some cases it is useful to look at this to decide what needs to be done.
4888
4889 MODE is the mode of the operand pointed to by X.
4890
4891 When -fpic is used, special handling is needed for symbolic references.
4892 See comments by legitimize_pic_address for details. */
4893
4894 static rtx
4895 s390_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
4896 machine_mode mode ATTRIBUTE_UNUSED)
4897 {
4898 rtx constant_term = const0_rtx;
4899
4900 if (TLS_SYMBOLIC_CONST (x))
4901 {
4902 x = legitimize_tls_address (x, 0);
4903
4904 if (s390_legitimate_address_p (mode, x, FALSE))
4905 return x;
4906 }
4907 else if (GET_CODE (x) == PLUS
4908 && (TLS_SYMBOLIC_CONST (XEXP (x, 0))
4909 || TLS_SYMBOLIC_CONST (XEXP (x, 1))))
4910 {
4911 return x;
4912 }
4913 else if (flag_pic)
4914 {
4915 if (SYMBOLIC_CONST (x)
4916 || (GET_CODE (x) == PLUS
4917 && (SYMBOLIC_CONST (XEXP (x, 0))
4918 || SYMBOLIC_CONST (XEXP (x, 1)))))
4919 x = legitimize_pic_address (x, 0);
4920
4921 if (s390_legitimate_address_p (mode, x, FALSE))
4922 return x;
4923 }
4924
4925 x = eliminate_constant_term (x, &constant_term);
4926
4927 /* Optimize loading of large displacements by splitting them
4928 into the multiple of 4K and the rest; this allows the
4929 former to be CSE'd if possible.
4930
4931 Don't do this if the displacement is added to a register
4932 pointing into the stack frame, as the offsets will
4933 change later anyway. */
4934
4935 if (GET_CODE (constant_term) == CONST_INT
4936 && !TARGET_LONG_DISPLACEMENT
4937 && !DISP_IN_RANGE (INTVAL (constant_term))
4938 && !(REG_P (x) && REGNO_PTR_FRAME_P (REGNO (x))))
4939 {
4940 HOST_WIDE_INT lower = INTVAL (constant_term) & 0xfff;
4941 HOST_WIDE_INT upper = INTVAL (constant_term) ^ lower;
4942
4943 rtx temp = gen_reg_rtx (Pmode);
4944 rtx val = force_operand (GEN_INT (upper), temp);
4945 if (val != temp)
4946 emit_move_insn (temp, val);
4947
4948 x = gen_rtx_PLUS (Pmode, x, temp);
4949 constant_term = GEN_INT (lower);
4950 }
4951
4952 if (GET_CODE (x) == PLUS)
4953 {
4954 if (GET_CODE (XEXP (x, 0)) == REG)
4955 {
4956 rtx temp = gen_reg_rtx (Pmode);
4957 rtx val = force_operand (XEXP (x, 1), temp);
4958 if (val != temp)
4959 emit_move_insn (temp, val);
4960
4961 x = gen_rtx_PLUS (Pmode, XEXP (x, 0), temp);
4962 }
4963
4964 else if (GET_CODE (XEXP (x, 1)) == REG)
4965 {
4966 rtx temp = gen_reg_rtx (Pmode);
4967 rtx val = force_operand (XEXP (x, 0), temp);
4968 if (val != temp)
4969 emit_move_insn (temp, val);
4970
4971 x = gen_rtx_PLUS (Pmode, temp, XEXP (x, 1));
4972 }
4973 }
4974
4975 if (constant_term != const0_rtx)
4976 x = gen_rtx_PLUS (Pmode, x, constant_term);
4977
4978 return x;
4979 }
4980
4981 /* Try a machine-dependent way of reloading an illegitimate address AD
4982 operand. If we find one, push the reload and return the new address.
4983
4984 MODE is the mode of the enclosing MEM. OPNUM is the operand number
4985 and TYPE is the reload type of the current reload. */
4986
4987 rtx
4988 legitimize_reload_address (rtx ad, machine_mode mode ATTRIBUTE_UNUSED,
4989 int opnum, int type)
4990 {
4991 if (!optimize || TARGET_LONG_DISPLACEMENT)
4992 return NULL_RTX;
4993
4994 if (GET_CODE (ad) == PLUS)
4995 {
4996 rtx tem = simplify_binary_operation (PLUS, Pmode,
4997 XEXP (ad, 0), XEXP (ad, 1));
4998 if (tem)
4999 ad = tem;
5000 }
5001
5002 if (GET_CODE (ad) == PLUS
5003 && GET_CODE (XEXP (ad, 0)) == REG
5004 && GET_CODE (XEXP (ad, 1)) == CONST_INT
5005 && !DISP_IN_RANGE (INTVAL (XEXP (ad, 1))))
5006 {
5007 HOST_WIDE_INT lower = INTVAL (XEXP (ad, 1)) & 0xfff;
5008 HOST_WIDE_INT upper = INTVAL (XEXP (ad, 1)) ^ lower;
5009 rtx cst, tem, new_rtx;
5010
5011 cst = GEN_INT (upper);
5012 if (!legitimate_reload_constant_p (cst))
5013 cst = force_const_mem (Pmode, cst);
5014
5015 tem = gen_rtx_PLUS (Pmode, XEXP (ad, 0), cst);
5016 new_rtx = gen_rtx_PLUS (Pmode, tem, GEN_INT (lower));
5017
5018 push_reload (XEXP (tem, 1), 0, &XEXP (tem, 1), 0,
5019 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
5020 opnum, (enum reload_type) type);
5021 return new_rtx;
5022 }
5023
5024 return NULL_RTX;
5025 }
5026
5027 /* Emit code to move LEN bytes from DST to SRC. */
5028
5029 bool
5030 s390_expand_movmem (rtx dst, rtx src, rtx len)
5031 {
5032 /* When tuning for z10 or higher we rely on the Glibc functions to
5033 do the right thing. Only for constant lengths below 64k we will
5034 generate inline code. */
5035 if (s390_tune >= PROCESSOR_2097_Z10
5036 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
5037 return false;
5038
5039 if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
5040 {
5041 if (INTVAL (len) > 0)
5042 emit_insn (gen_movmem_short (dst, src, GEN_INT (INTVAL (len) - 1)));
5043 }
5044
5045 else if (TARGET_MVCLE)
5046 {
5047 emit_insn (gen_movmem_long (dst, src, convert_to_mode (Pmode, len, 1)));
5048 }
5049
5050 else
5051 {
5052 rtx dst_addr, src_addr, count, blocks, temp;
5053 rtx_code_label *loop_start_label = gen_label_rtx ();
5054 rtx_code_label *loop_end_label = gen_label_rtx ();
5055 rtx_code_label *end_label = gen_label_rtx ();
5056 machine_mode mode;
5057
5058 mode = GET_MODE (len);
5059 if (mode == VOIDmode)
5060 mode = Pmode;
5061
5062 dst_addr = gen_reg_rtx (Pmode);
5063 src_addr = gen_reg_rtx (Pmode);
5064 count = gen_reg_rtx (mode);
5065 blocks = gen_reg_rtx (mode);
5066
5067 convert_move (count, len, 1);
5068 emit_cmp_and_jump_insns (count, const0_rtx,
5069 EQ, NULL_RTX, mode, 1, end_label);
5070
5071 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
5072 emit_move_insn (src_addr, force_operand (XEXP (src, 0), NULL_RTX));
5073 dst = change_address (dst, VOIDmode, dst_addr);
5074 src = change_address (src, VOIDmode, src_addr);
5075
5076 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5077 OPTAB_DIRECT);
5078 if (temp != count)
5079 emit_move_insn (count, temp);
5080
5081 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5082 OPTAB_DIRECT);
5083 if (temp != blocks)
5084 emit_move_insn (blocks, temp);
5085
5086 emit_cmp_and_jump_insns (blocks, const0_rtx,
5087 EQ, NULL_RTX, mode, 1, loop_end_label);
5088
5089 emit_label (loop_start_label);
5090
5091 if (TARGET_Z10
5092 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 768))
5093 {
5094 rtx prefetch;
5095
5096 /* Issue a read prefetch for the +3 cache line. */
5097 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, src_addr, GEN_INT (768)),
5098 const0_rtx, const0_rtx);
5099 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5100 emit_insn (prefetch);
5101
5102 /* Issue a write prefetch for the +3 cache line. */
5103 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (768)),
5104 const1_rtx, const0_rtx);
5105 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5106 emit_insn (prefetch);
5107 }
5108
5109 emit_insn (gen_movmem_short (dst, src, GEN_INT (255)));
5110 s390_load_address (dst_addr,
5111 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
5112 s390_load_address (src_addr,
5113 gen_rtx_PLUS (Pmode, src_addr, GEN_INT (256)));
5114
5115 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5116 OPTAB_DIRECT);
5117 if (temp != blocks)
5118 emit_move_insn (blocks, temp);
5119
5120 emit_cmp_and_jump_insns (blocks, const0_rtx,
5121 EQ, NULL_RTX, mode, 1, loop_end_label);
5122
5123 emit_jump (loop_start_label);
5124 emit_label (loop_end_label);
5125
5126 emit_insn (gen_movmem_short (dst, src,
5127 convert_to_mode (Pmode, count, 1)));
5128 emit_label (end_label);
5129 }
5130 return true;
5131 }
5132
5133 /* Emit code to set LEN bytes at DST to VAL.
5134 Make use of clrmem if VAL is zero. */
5135
5136 void
5137 s390_expand_setmem (rtx dst, rtx len, rtx val)
5138 {
5139 if (GET_CODE (len) == CONST_INT && INTVAL (len) == 0)
5140 return;
5141
5142 gcc_assert (GET_CODE (val) == CONST_INT || GET_MODE (val) == QImode);
5143
5144 if (GET_CODE (len) == CONST_INT && INTVAL (len) > 0 && INTVAL (len) <= 257)
5145 {
5146 if (val == const0_rtx && INTVAL (len) <= 256)
5147 emit_insn (gen_clrmem_short (dst, GEN_INT (INTVAL (len) - 1)));
5148 else
5149 {
5150 /* Initialize memory by storing the first byte. */
5151 emit_move_insn (adjust_address (dst, QImode, 0), val);
5152
5153 if (INTVAL (len) > 1)
5154 {
5155 /* Initiate 1 byte overlap move.
5156 The first byte of DST is propagated through DSTP1.
5157 Prepare a movmem for: DST+1 = DST (length = LEN - 1).
5158 DST is set to size 1 so the rest of the memory location
5159 does not count as source operand. */
5160 rtx dstp1 = adjust_address (dst, VOIDmode, 1);
5161 set_mem_size (dst, 1);
5162
5163 emit_insn (gen_movmem_short (dstp1, dst,
5164 GEN_INT (INTVAL (len) - 2)));
5165 }
5166 }
5167 }
5168
5169 else if (TARGET_MVCLE)
5170 {
5171 val = force_not_mem (convert_modes (Pmode, QImode, val, 1));
5172 emit_insn (gen_setmem_long (dst, convert_to_mode (Pmode, len, 1), val));
5173 }
5174
5175 else
5176 {
5177 rtx dst_addr, count, blocks, temp, dstp1 = NULL_RTX;
5178 rtx_code_label *loop_start_label = gen_label_rtx ();
5179 rtx_code_label *loop_end_label = gen_label_rtx ();
5180 rtx_code_label *end_label = gen_label_rtx ();
5181 machine_mode mode;
5182
5183 mode = GET_MODE (len);
5184 if (mode == VOIDmode)
5185 mode = Pmode;
5186
5187 dst_addr = gen_reg_rtx (Pmode);
5188 count = gen_reg_rtx (mode);
5189 blocks = gen_reg_rtx (mode);
5190
5191 convert_move (count, len, 1);
5192 emit_cmp_and_jump_insns (count, const0_rtx,
5193 EQ, NULL_RTX, mode, 1, end_label);
5194
5195 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
5196 dst = change_address (dst, VOIDmode, dst_addr);
5197
5198 if (val == const0_rtx)
5199 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5200 OPTAB_DIRECT);
5201 else
5202 {
5203 dstp1 = adjust_address (dst, VOIDmode, 1);
5204 set_mem_size (dst, 1);
5205
5206 /* Initialize memory by storing the first byte. */
5207 emit_move_insn (adjust_address (dst, QImode, 0), val);
5208
5209 /* If count is 1 we are done. */
5210 emit_cmp_and_jump_insns (count, const1_rtx,
5211 EQ, NULL_RTX, mode, 1, end_label);
5212
5213 temp = expand_binop (mode, add_optab, count, GEN_INT (-2), count, 1,
5214 OPTAB_DIRECT);
5215 }
5216 if (temp != count)
5217 emit_move_insn (count, temp);
5218
5219 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5220 OPTAB_DIRECT);
5221 if (temp != blocks)
5222 emit_move_insn (blocks, temp);
5223
5224 emit_cmp_and_jump_insns (blocks, const0_rtx,
5225 EQ, NULL_RTX, mode, 1, loop_end_label);
5226
5227 emit_label (loop_start_label);
5228
5229 if (TARGET_Z10
5230 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 1024))
5231 {
5232 /* Issue a write prefetch for the +4 cache line. */
5233 rtx prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr,
5234 GEN_INT (1024)),
5235 const1_rtx, const0_rtx);
5236 emit_insn (prefetch);
5237 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5238 }
5239
5240 if (val == const0_rtx)
5241 emit_insn (gen_clrmem_short (dst, GEN_INT (255)));
5242 else
5243 emit_insn (gen_movmem_short (dstp1, dst, GEN_INT (255)));
5244 s390_load_address (dst_addr,
5245 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
5246
5247 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5248 OPTAB_DIRECT);
5249 if (temp != blocks)
5250 emit_move_insn (blocks, temp);
5251
5252 emit_cmp_and_jump_insns (blocks, const0_rtx,
5253 EQ, NULL_RTX, mode, 1, loop_end_label);
5254
5255 emit_jump (loop_start_label);
5256 emit_label (loop_end_label);
5257
5258 if (val == const0_rtx)
5259 emit_insn (gen_clrmem_short (dst, convert_to_mode (Pmode, count, 1)));
5260 else
5261 emit_insn (gen_movmem_short (dstp1, dst, convert_to_mode (Pmode, count, 1)));
5262 emit_label (end_label);
5263 }
5264 }
5265
5266 /* Emit code to compare LEN bytes at OP0 with those at OP1,
5267 and return the result in TARGET. */
5268
5269 bool
5270 s390_expand_cmpmem (rtx target, rtx op0, rtx op1, rtx len)
5271 {
5272 rtx ccreg = gen_rtx_REG (CCUmode, CC_REGNUM);
5273 rtx tmp;
5274
5275 /* When tuning for z10 or higher we rely on the Glibc functions to
5276 do the right thing. Only for constant lengths below 64k we will
5277 generate inline code. */
5278 if (s390_tune >= PROCESSOR_2097_Z10
5279 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
5280 return false;
5281
5282 /* As the result of CMPINT is inverted compared to what we need,
5283 we have to swap the operands. */
5284 tmp = op0; op0 = op1; op1 = tmp;
5285
5286 if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
5287 {
5288 if (INTVAL (len) > 0)
5289 {
5290 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (INTVAL (len) - 1)));
5291 emit_insn (gen_cmpint (target, ccreg));
5292 }
5293 else
5294 emit_move_insn (target, const0_rtx);
5295 }
5296 else if (TARGET_MVCLE)
5297 {
5298 emit_insn (gen_cmpmem_long (op0, op1, convert_to_mode (Pmode, len, 1)));
5299 emit_insn (gen_cmpint (target, ccreg));
5300 }
5301 else
5302 {
5303 rtx addr0, addr1, count, blocks, temp;
5304 rtx_code_label *loop_start_label = gen_label_rtx ();
5305 rtx_code_label *loop_end_label = gen_label_rtx ();
5306 rtx_code_label *end_label = gen_label_rtx ();
5307 machine_mode mode;
5308
5309 mode = GET_MODE (len);
5310 if (mode == VOIDmode)
5311 mode = Pmode;
5312
5313 addr0 = gen_reg_rtx (Pmode);
5314 addr1 = gen_reg_rtx (Pmode);
5315 count = gen_reg_rtx (mode);
5316 blocks = gen_reg_rtx (mode);
5317
5318 convert_move (count, len, 1);
5319 emit_cmp_and_jump_insns (count, const0_rtx,
5320 EQ, NULL_RTX, mode, 1, end_label);
5321
5322 emit_move_insn (addr0, force_operand (XEXP (op0, 0), NULL_RTX));
5323 emit_move_insn (addr1, force_operand (XEXP (op1, 0), NULL_RTX));
5324 op0 = change_address (op0, VOIDmode, addr0);
5325 op1 = change_address (op1, VOIDmode, addr1);
5326
5327 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5328 OPTAB_DIRECT);
5329 if (temp != count)
5330 emit_move_insn (count, temp);
5331
5332 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5333 OPTAB_DIRECT);
5334 if (temp != blocks)
5335 emit_move_insn (blocks, temp);
5336
5337 emit_cmp_and_jump_insns (blocks, const0_rtx,
5338 EQ, NULL_RTX, mode, 1, loop_end_label);
5339
5340 emit_label (loop_start_label);
5341
5342 if (TARGET_Z10
5343 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 512))
5344 {
5345 rtx prefetch;
5346
5347 /* Issue a read prefetch for the +2 cache line of operand 1. */
5348 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr0, GEN_INT (512)),
5349 const0_rtx, const0_rtx);
5350 emit_insn (prefetch);
5351 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5352
5353 /* Issue a read prefetch for the +2 cache line of operand 2. */
5354 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr1, GEN_INT (512)),
5355 const0_rtx, const0_rtx);
5356 emit_insn (prefetch);
5357 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5358 }
5359
5360 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (255)));
5361 temp = gen_rtx_NE (VOIDmode, ccreg, const0_rtx);
5362 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
5363 gen_rtx_LABEL_REF (VOIDmode, end_label), pc_rtx);
5364 temp = gen_rtx_SET (pc_rtx, temp);
5365 emit_jump_insn (temp);
5366
5367 s390_load_address (addr0,
5368 gen_rtx_PLUS (Pmode, addr0, GEN_INT (256)));
5369 s390_load_address (addr1,
5370 gen_rtx_PLUS (Pmode, addr1, GEN_INT (256)));
5371
5372 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5373 OPTAB_DIRECT);
5374 if (temp != blocks)
5375 emit_move_insn (blocks, temp);
5376
5377 emit_cmp_and_jump_insns (blocks, const0_rtx,
5378 EQ, NULL_RTX, mode, 1, loop_end_label);
5379
5380 emit_jump (loop_start_label);
5381 emit_label (loop_end_label);
5382
5383 emit_insn (gen_cmpmem_short (op0, op1,
5384 convert_to_mode (Pmode, count, 1)));
5385 emit_label (end_label);
5386
5387 emit_insn (gen_cmpint (target, ccreg));
5388 }
5389 return true;
5390 }
5391
5392 /* Emit a conditional jump to LABEL for condition code mask MASK using
5393 comparsion operator COMPARISON. Return the emitted jump insn. */
5394
5395 static rtx
5396 s390_emit_ccraw_jump (HOST_WIDE_INT mask, enum rtx_code comparison, rtx label)
5397 {
5398 rtx temp;
5399
5400 gcc_assert (comparison == EQ || comparison == NE);
5401 gcc_assert (mask > 0 && mask < 15);
5402
5403 temp = gen_rtx_fmt_ee (comparison, VOIDmode,
5404 gen_rtx_REG (CCRAWmode, CC_REGNUM), GEN_INT (mask));
5405 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
5406 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
5407 temp = gen_rtx_SET (pc_rtx, temp);
5408 return emit_jump_insn (temp);
5409 }
5410
5411 /* Emit the instructions to implement strlen of STRING and store the
5412 result in TARGET. The string has the known ALIGNMENT. This
5413 version uses vector instructions and is therefore not appropriate
5414 for targets prior to z13. */
5415
5416 void
5417 s390_expand_vec_strlen (rtx target, rtx string, rtx alignment)
5418 {
5419 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
5420 int very_likely = REG_BR_PROB_BASE - 1;
5421 rtx highest_index_to_load_reg = gen_reg_rtx (Pmode);
5422 rtx str_reg = gen_reg_rtx (V16QImode);
5423 rtx str_addr_base_reg = gen_reg_rtx (Pmode);
5424 rtx str_idx_reg = gen_reg_rtx (Pmode);
5425 rtx result_reg = gen_reg_rtx (V16QImode);
5426 rtx is_aligned_label = gen_label_rtx ();
5427 rtx into_loop_label = NULL_RTX;
5428 rtx loop_start_label = gen_label_rtx ();
5429 rtx temp;
5430 rtx len = gen_reg_rtx (QImode);
5431 rtx cond;
5432
5433 s390_load_address (str_addr_base_reg, XEXP (string, 0));
5434 emit_move_insn (str_idx_reg, const0_rtx);
5435
5436 if (INTVAL (alignment) < 16)
5437 {
5438 /* Check whether the address happens to be aligned properly so
5439 jump directly to the aligned loop. */
5440 emit_cmp_and_jump_insns (gen_rtx_AND (Pmode,
5441 str_addr_base_reg, GEN_INT (15)),
5442 const0_rtx, EQ, NULL_RTX,
5443 Pmode, 1, is_aligned_label);
5444
5445 temp = gen_reg_rtx (Pmode);
5446 temp = expand_binop (Pmode, and_optab, str_addr_base_reg,
5447 GEN_INT (15), temp, 1, OPTAB_DIRECT);
5448 gcc_assert (REG_P (temp));
5449 highest_index_to_load_reg =
5450 expand_binop (Pmode, sub_optab, GEN_INT (15), temp,
5451 highest_index_to_load_reg, 1, OPTAB_DIRECT);
5452 gcc_assert (REG_P (highest_index_to_load_reg));
5453 emit_insn (gen_vllv16qi (str_reg,
5454 convert_to_mode (SImode, highest_index_to_load_reg, 1),
5455 gen_rtx_MEM (BLKmode, str_addr_base_reg)));
5456
5457 into_loop_label = gen_label_rtx ();
5458 s390_emit_jump (into_loop_label, NULL_RTX);
5459 emit_barrier ();
5460 }
5461
5462 emit_label (is_aligned_label);
5463 LABEL_NUSES (is_aligned_label) = INTVAL (alignment) < 16 ? 2 : 1;
5464
5465 /* Reaching this point we are only performing 16 bytes aligned
5466 loads. */
5467 emit_move_insn (highest_index_to_load_reg, GEN_INT (15));
5468
5469 emit_label (loop_start_label);
5470 LABEL_NUSES (loop_start_label) = 1;
5471
5472 /* Load 16 bytes of the string into VR. */
5473 emit_move_insn (str_reg,
5474 gen_rtx_MEM (V16QImode,
5475 gen_rtx_PLUS (Pmode, str_idx_reg,
5476 str_addr_base_reg)));
5477 if (into_loop_label != NULL_RTX)
5478 {
5479 emit_label (into_loop_label);
5480 LABEL_NUSES (into_loop_label) = 1;
5481 }
5482
5483 /* Increment string index by 16 bytes. */
5484 expand_binop (Pmode, add_optab, str_idx_reg, GEN_INT (16),
5485 str_idx_reg, 1, OPTAB_DIRECT);
5486
5487 emit_insn (gen_vec_vfenesv16qi (result_reg, str_reg, str_reg,
5488 GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
5489
5490 add_int_reg_note (s390_emit_ccraw_jump (8, NE, loop_start_label),
5491 REG_BR_PROB, very_likely);
5492 emit_insn (gen_vec_extractv16qi (len, result_reg, GEN_INT (7)));
5493
5494 /* If the string pointer wasn't aligned we have loaded less then 16
5495 bytes and the remaining bytes got filled with zeros (by vll).
5496 Now we have to check whether the resulting index lies within the
5497 bytes actually part of the string. */
5498
5499 cond = s390_emit_compare (GT, convert_to_mode (Pmode, len, 1),
5500 highest_index_to_load_reg);
5501 s390_load_address (highest_index_to_load_reg,
5502 gen_rtx_PLUS (Pmode, highest_index_to_load_reg,
5503 const1_rtx));
5504 if (TARGET_64BIT)
5505 emit_insn (gen_movdicc (str_idx_reg, cond,
5506 highest_index_to_load_reg, str_idx_reg));
5507 else
5508 emit_insn (gen_movsicc (str_idx_reg, cond,
5509 highest_index_to_load_reg, str_idx_reg));
5510
5511 add_int_reg_note (s390_emit_jump (is_aligned_label, cond), REG_BR_PROB,
5512 very_unlikely);
5513
5514 expand_binop (Pmode, add_optab, str_idx_reg,
5515 GEN_INT (-16), str_idx_reg, 1, OPTAB_DIRECT);
5516 /* FIXME: len is already zero extended - so avoid the llgcr emitted
5517 here. */
5518 temp = expand_binop (Pmode, add_optab, str_idx_reg,
5519 convert_to_mode (Pmode, len, 1),
5520 target, 1, OPTAB_DIRECT);
5521 if (temp != target)
5522 emit_move_insn (target, temp);
5523 }
5524
5525 /* Expand conditional increment or decrement using alc/slb instructions.
5526 Should generate code setting DST to either SRC or SRC + INCREMENT,
5527 depending on the result of the comparison CMP_OP0 CMP_CODE CMP_OP1.
5528 Returns true if successful, false otherwise.
5529
5530 That makes it possible to implement some if-constructs without jumps e.g.:
5531 (borrow = CC0 | CC1 and carry = CC2 | CC3)
5532 unsigned int a, b, c;
5533 if (a < b) c++; -> CCU b > a -> CC2; c += carry;
5534 if (a < b) c--; -> CCL3 a - b -> borrow; c -= borrow;
5535 if (a <= b) c++; -> CCL3 b - a -> borrow; c += carry;
5536 if (a <= b) c--; -> CCU a <= b -> borrow; c -= borrow;
5537
5538 Checks for EQ and NE with a nonzero value need an additional xor e.g.:
5539 if (a == b) c++; -> CCL3 a ^= b; 0 - a -> borrow; c += carry;
5540 if (a == b) c--; -> CCU a ^= b; a <= 0 -> CC0 | CC1; c -= borrow;
5541 if (a != b) c++; -> CCU a ^= b; a > 0 -> CC2; c += carry;
5542 if (a != b) c--; -> CCL3 a ^= b; 0 - a -> borrow; c -= borrow; */
5543
5544 bool
5545 s390_expand_addcc (enum rtx_code cmp_code, rtx cmp_op0, rtx cmp_op1,
5546 rtx dst, rtx src, rtx increment)
5547 {
5548 machine_mode cmp_mode;
5549 machine_mode cc_mode;
5550 rtx op_res;
5551 rtx insn;
5552 rtvec p;
5553 int ret;
5554
5555 if ((GET_MODE (cmp_op0) == SImode || GET_MODE (cmp_op0) == VOIDmode)
5556 && (GET_MODE (cmp_op1) == SImode || GET_MODE (cmp_op1) == VOIDmode))
5557 cmp_mode = SImode;
5558 else if ((GET_MODE (cmp_op0) == DImode || GET_MODE (cmp_op0) == VOIDmode)
5559 && (GET_MODE (cmp_op1) == DImode || GET_MODE (cmp_op1) == VOIDmode))
5560 cmp_mode = DImode;
5561 else
5562 return false;
5563
5564 /* Try ADD LOGICAL WITH CARRY. */
5565 if (increment == const1_rtx)
5566 {
5567 /* Determine CC mode to use. */
5568 if (cmp_code == EQ || cmp_code == NE)
5569 {
5570 if (cmp_op1 != const0_rtx)
5571 {
5572 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
5573 NULL_RTX, 0, OPTAB_WIDEN);
5574 cmp_op1 = const0_rtx;
5575 }
5576
5577 cmp_code = cmp_code == EQ ? LEU : GTU;
5578 }
5579
5580 if (cmp_code == LTU || cmp_code == LEU)
5581 {
5582 rtx tem = cmp_op0;
5583 cmp_op0 = cmp_op1;
5584 cmp_op1 = tem;
5585 cmp_code = swap_condition (cmp_code);
5586 }
5587
5588 switch (cmp_code)
5589 {
5590 case GTU:
5591 cc_mode = CCUmode;
5592 break;
5593
5594 case GEU:
5595 cc_mode = CCL3mode;
5596 break;
5597
5598 default:
5599 return false;
5600 }
5601
5602 /* Emit comparison instruction pattern. */
5603 if (!register_operand (cmp_op0, cmp_mode))
5604 cmp_op0 = force_reg (cmp_mode, cmp_op0);
5605
5606 insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
5607 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
5608 /* We use insn_invalid_p here to add clobbers if required. */
5609 ret = insn_invalid_p (emit_insn (insn), false);
5610 gcc_assert (!ret);
5611
5612 /* Emit ALC instruction pattern. */
5613 op_res = gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
5614 gen_rtx_REG (cc_mode, CC_REGNUM),
5615 const0_rtx);
5616
5617 if (src != const0_rtx)
5618 {
5619 if (!register_operand (src, GET_MODE (dst)))
5620 src = force_reg (GET_MODE (dst), src);
5621
5622 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, src);
5623 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, const0_rtx);
5624 }
5625
5626 p = rtvec_alloc (2);
5627 RTVEC_ELT (p, 0) =
5628 gen_rtx_SET (dst, op_res);
5629 RTVEC_ELT (p, 1) =
5630 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
5631 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
5632
5633 return true;
5634 }
5635
5636 /* Try SUBTRACT LOGICAL WITH BORROW. */
5637 if (increment == constm1_rtx)
5638 {
5639 /* Determine CC mode to use. */
5640 if (cmp_code == EQ || cmp_code == NE)
5641 {
5642 if (cmp_op1 != const0_rtx)
5643 {
5644 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
5645 NULL_RTX, 0, OPTAB_WIDEN);
5646 cmp_op1 = const0_rtx;
5647 }
5648
5649 cmp_code = cmp_code == EQ ? LEU : GTU;
5650 }
5651
5652 if (cmp_code == GTU || cmp_code == GEU)
5653 {
5654 rtx tem = cmp_op0;
5655 cmp_op0 = cmp_op1;
5656 cmp_op1 = tem;
5657 cmp_code = swap_condition (cmp_code);
5658 }
5659
5660 switch (cmp_code)
5661 {
5662 case LEU:
5663 cc_mode = CCUmode;
5664 break;
5665
5666 case LTU:
5667 cc_mode = CCL3mode;
5668 break;
5669
5670 default:
5671 return false;
5672 }
5673
5674 /* Emit comparison instruction pattern. */
5675 if (!register_operand (cmp_op0, cmp_mode))
5676 cmp_op0 = force_reg (cmp_mode, cmp_op0);
5677
5678 insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
5679 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
5680 /* We use insn_invalid_p here to add clobbers if required. */
5681 ret = insn_invalid_p (emit_insn (insn), false);
5682 gcc_assert (!ret);
5683
5684 /* Emit SLB instruction pattern. */
5685 if (!register_operand (src, GET_MODE (dst)))
5686 src = force_reg (GET_MODE (dst), src);
5687
5688 op_res = gen_rtx_MINUS (GET_MODE (dst),
5689 gen_rtx_MINUS (GET_MODE (dst), src, const0_rtx),
5690 gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
5691 gen_rtx_REG (cc_mode, CC_REGNUM),
5692 const0_rtx));
5693 p = rtvec_alloc (2);
5694 RTVEC_ELT (p, 0) =
5695 gen_rtx_SET (dst, op_res);
5696 RTVEC_ELT (p, 1) =
5697 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
5698 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
5699
5700 return true;
5701 }
5702
5703 return false;
5704 }
5705
5706 /* Expand code for the insv template. Return true if successful. */
5707
5708 bool
5709 s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src)
5710 {
5711 int bitsize = INTVAL (op1);
5712 int bitpos = INTVAL (op2);
5713 machine_mode mode = GET_MODE (dest);
5714 machine_mode smode;
5715 int smode_bsize, mode_bsize;
5716 rtx op, clobber;
5717
5718 if (bitsize + bitpos > GET_MODE_BITSIZE (mode))
5719 return false;
5720
5721 /* Generate INSERT IMMEDIATE (IILL et al). */
5722 /* (set (ze (reg)) (const_int)). */
5723 if (TARGET_ZARCH
5724 && register_operand (dest, word_mode)
5725 && (bitpos % 16) == 0
5726 && (bitsize % 16) == 0
5727 && const_int_operand (src, VOIDmode))
5728 {
5729 HOST_WIDE_INT val = INTVAL (src);
5730 int regpos = bitpos + bitsize;
5731
5732 while (regpos > bitpos)
5733 {
5734 machine_mode putmode;
5735 int putsize;
5736
5737 if (TARGET_EXTIMM && (regpos % 32 == 0) && (regpos >= bitpos + 32))
5738 putmode = SImode;
5739 else
5740 putmode = HImode;
5741
5742 putsize = GET_MODE_BITSIZE (putmode);
5743 regpos -= putsize;
5744 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
5745 GEN_INT (putsize),
5746 GEN_INT (regpos)),
5747 gen_int_mode (val, putmode));
5748 val >>= putsize;
5749 }
5750 gcc_assert (regpos == bitpos);
5751 return true;
5752 }
5753
5754 smode = smallest_mode_for_size (bitsize, MODE_INT);
5755 smode_bsize = GET_MODE_BITSIZE (smode);
5756 mode_bsize = GET_MODE_BITSIZE (mode);
5757
5758 /* Generate STORE CHARACTERS UNDER MASK (STCM et al). */
5759 if (bitpos == 0
5760 && (bitsize % BITS_PER_UNIT) == 0
5761 && MEM_P (dest)
5762 && (register_operand (src, word_mode)
5763 || const_int_operand (src, VOIDmode)))
5764 {
5765 /* Emit standard pattern if possible. */
5766 if (smode_bsize == bitsize)
5767 {
5768 emit_move_insn (adjust_address (dest, smode, 0),
5769 gen_lowpart (smode, src));
5770 return true;
5771 }
5772
5773 /* (set (ze (mem)) (const_int)). */
5774 else if (const_int_operand (src, VOIDmode))
5775 {
5776 int size = bitsize / BITS_PER_UNIT;
5777 rtx src_mem = adjust_address (force_const_mem (word_mode, src),
5778 BLKmode,
5779 UNITS_PER_WORD - size);
5780
5781 dest = adjust_address (dest, BLKmode, 0);
5782 set_mem_size (dest, size);
5783 s390_expand_movmem (dest, src_mem, GEN_INT (size));
5784 return true;
5785 }
5786
5787 /* (set (ze (mem)) (reg)). */
5788 else if (register_operand (src, word_mode))
5789 {
5790 if (bitsize <= 32)
5791 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest, op1,
5792 const0_rtx), src);
5793 else
5794 {
5795 /* Emit st,stcmh sequence. */
5796 int stcmh_width = bitsize - 32;
5797 int size = stcmh_width / BITS_PER_UNIT;
5798
5799 emit_move_insn (adjust_address (dest, SImode, size),
5800 gen_lowpart (SImode, src));
5801 set_mem_size (dest, size);
5802 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
5803 GEN_INT (stcmh_width),
5804 const0_rtx),
5805 gen_rtx_LSHIFTRT (word_mode, src, GEN_INT (32)));
5806 }
5807 return true;
5808 }
5809 }
5810
5811 /* Generate INSERT CHARACTERS UNDER MASK (IC, ICM et al). */
5812 if ((bitpos % BITS_PER_UNIT) == 0
5813 && (bitsize % BITS_PER_UNIT) == 0
5814 && (bitpos & 32) == ((bitpos + bitsize - 1) & 32)
5815 && MEM_P (src)
5816 && (mode == DImode || mode == SImode)
5817 && register_operand (dest, mode))
5818 {
5819 /* Emit a strict_low_part pattern if possible. */
5820 if (smode_bsize == bitsize && bitpos == mode_bsize - smode_bsize)
5821 {
5822 op = gen_rtx_STRICT_LOW_PART (VOIDmode, gen_lowpart (smode, dest));
5823 op = gen_rtx_SET (op, gen_lowpart (smode, src));
5824 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
5825 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber)));
5826 return true;
5827 }
5828
5829 /* ??? There are more powerful versions of ICM that are not
5830 completely represented in the md file. */
5831 }
5832
5833 /* For z10, generate ROTATE THEN INSERT SELECTED BITS (RISBG et al). */
5834 if (TARGET_Z10 && (mode == DImode || mode == SImode))
5835 {
5836 machine_mode mode_s = GET_MODE (src);
5837
5838 if (mode_s == VOIDmode)
5839 {
5840 /* Assume const_int etc already in the proper mode. */
5841 src = force_reg (mode, src);
5842 }
5843 else if (mode_s != mode)
5844 {
5845 gcc_assert (GET_MODE_BITSIZE (mode_s) >= bitsize);
5846 src = force_reg (mode_s, src);
5847 src = gen_lowpart (mode, src);
5848 }
5849
5850 op = gen_rtx_ZERO_EXTRACT (mode, dest, op1, op2),
5851 op = gen_rtx_SET (op, src);
5852
5853 if (!TARGET_ZEC12)
5854 {
5855 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
5856 op = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber));
5857 }
5858 emit_insn (op);
5859
5860 return true;
5861 }
5862
5863 return false;
5864 }
5865
5866 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic which returns a
5867 register that holds VAL of mode MODE shifted by COUNT bits. */
5868
5869 static inline rtx
5870 s390_expand_mask_and_shift (rtx val, machine_mode mode, rtx count)
5871 {
5872 val = expand_simple_binop (SImode, AND, val, GEN_INT (GET_MODE_MASK (mode)),
5873 NULL_RTX, 1, OPTAB_DIRECT);
5874 return expand_simple_binop (SImode, ASHIFT, val, count,
5875 NULL_RTX, 1, OPTAB_DIRECT);
5876 }
5877
5878 /* Generate a vector comparison COND of CMP_OP1 and CMP_OP2 and store
5879 the result in TARGET. */
5880
5881 void
5882 s390_expand_vec_compare (rtx target, enum rtx_code cond,
5883 rtx cmp_op1, rtx cmp_op2)
5884 {
5885 machine_mode mode = GET_MODE (target);
5886 bool neg_p = false, swap_p = false;
5887 rtx tmp;
5888
5889 if (GET_MODE (cmp_op1) == V2DFmode)
5890 {
5891 switch (cond)
5892 {
5893 /* NE a != b -> !(a == b) */
5894 case NE: cond = EQ; neg_p = true; break;
5895 /* UNGT a u> b -> !(b >= a) */
5896 case UNGT: cond = GE; neg_p = true; swap_p = true; break;
5897 /* UNGE a u>= b -> !(b > a) */
5898 case UNGE: cond = GT; neg_p = true; swap_p = true; break;
5899 /* LE: a <= b -> b >= a */
5900 case LE: cond = GE; swap_p = true; break;
5901 /* UNLE: a u<= b -> !(a > b) */
5902 case UNLE: cond = GT; neg_p = true; break;
5903 /* LT: a < b -> b > a */
5904 case LT: cond = GT; swap_p = true; break;
5905 /* UNLT: a u< b -> !(a >= b) */
5906 case UNLT: cond = GE; neg_p = true; break;
5907 case UNEQ:
5908 emit_insn (gen_vec_cmpuneqv2df (target, cmp_op1, cmp_op2));
5909 return;
5910 case LTGT:
5911 emit_insn (gen_vec_cmpltgtv2df (target, cmp_op1, cmp_op2));
5912 return;
5913 case ORDERED:
5914 emit_insn (gen_vec_orderedv2df (target, cmp_op1, cmp_op2));
5915 return;
5916 case UNORDERED:
5917 emit_insn (gen_vec_unorderedv2df (target, cmp_op1, cmp_op2));
5918 return;
5919 default: break;
5920 }
5921 }
5922 else
5923 {
5924 switch (cond)
5925 {
5926 /* NE: a != b -> !(a == b) */
5927 case NE: cond = EQ; neg_p = true; break;
5928 /* GE: a >= b -> !(b > a) */
5929 case GE: cond = GT; neg_p = true; swap_p = true; break;
5930 /* GEU: a >= b -> !(b > a) */
5931 case GEU: cond = GTU; neg_p = true; swap_p = true; break;
5932 /* LE: a <= b -> !(a > b) */
5933 case LE: cond = GT; neg_p = true; break;
5934 /* LEU: a <= b -> !(a > b) */
5935 case LEU: cond = GTU; neg_p = true; break;
5936 /* LT: a < b -> b > a */
5937 case LT: cond = GT; swap_p = true; break;
5938 /* LTU: a < b -> b > a */
5939 case LTU: cond = GTU; swap_p = true; break;
5940 default: break;
5941 }
5942 }
5943
5944 if (swap_p)
5945 {
5946 tmp = cmp_op1; cmp_op1 = cmp_op2; cmp_op2 = tmp;
5947 }
5948
5949 emit_insn (gen_rtx_SET (target, gen_rtx_fmt_ee (cond,
5950 mode,
5951 cmp_op1, cmp_op2)));
5952 if (neg_p)
5953 emit_insn (gen_rtx_SET (target, gen_rtx_NOT (mode, target)));
5954 }
5955
5956 /* Expand the comparison CODE of CMP1 and CMP2 and copy 1 or 0 into
5957 TARGET if either all (ALL_P is true) or any (ALL_P is false) of the
5958 elements in CMP1 and CMP2 fulfill the comparison. */
5959 void
5960 s390_expand_vec_compare_cc (rtx target, enum rtx_code code,
5961 rtx cmp1, rtx cmp2, bool all_p)
5962 {
5963 enum rtx_code new_code = code;
5964 machine_mode cmp_mode, full_cmp_mode, scratch_mode;
5965 rtx tmp_reg = gen_reg_rtx (SImode);
5966 bool swap_p = false;
5967
5968 if (GET_MODE_CLASS (GET_MODE (cmp1)) == MODE_VECTOR_INT)
5969 {
5970 switch (code)
5971 {
5972 case EQ: cmp_mode = CCVEQmode; break;
5973 case NE: cmp_mode = CCVEQmode; break;
5974 case GT: cmp_mode = CCVHmode; break;
5975 case GE: cmp_mode = CCVHmode; new_code = LE; swap_p = true; break;
5976 case LT: cmp_mode = CCVHmode; new_code = GT; swap_p = true; break;
5977 case LE: cmp_mode = CCVHmode; new_code = LE; break;
5978 case GTU: cmp_mode = CCVHUmode; break;
5979 case GEU: cmp_mode = CCVHUmode; new_code = LEU; swap_p = true; break;
5980 case LTU: cmp_mode = CCVHUmode; new_code = GTU; swap_p = true; break;
5981 case LEU: cmp_mode = CCVHUmode; new_code = LEU; break;
5982 default: gcc_unreachable ();
5983 }
5984 scratch_mode = GET_MODE (cmp1);
5985 }
5986 else if (GET_MODE (cmp1) == V2DFmode)
5987 {
5988 switch (code)
5989 {
5990 case EQ: cmp_mode = CCVEQmode; break;
5991 case NE: cmp_mode = CCVEQmode; break;
5992 case GT: cmp_mode = CCVFHmode; break;
5993 case GE: cmp_mode = CCVFHEmode; break;
5994 case UNLE: cmp_mode = CCVFHmode; break;
5995 case UNLT: cmp_mode = CCVFHEmode; break;
5996 case LT: cmp_mode = CCVFHmode; new_code = GT; swap_p = true; break;
5997 case LE: cmp_mode = CCVFHEmode; new_code = GE; swap_p = true; break;
5998 default: gcc_unreachable ();
5999 }
6000 scratch_mode = V2DImode;
6001 }
6002 else
6003 gcc_unreachable ();
6004
6005 if (!all_p)
6006 switch (cmp_mode)
6007 {
6008 case CCVEQmode: full_cmp_mode = CCVEQANYmode; break;
6009 case CCVHmode: full_cmp_mode = CCVHANYmode; break;
6010 case CCVHUmode: full_cmp_mode = CCVHUANYmode; break;
6011 case CCVFHmode: full_cmp_mode = CCVFHANYmode; break;
6012 case CCVFHEmode: full_cmp_mode = CCVFHEANYmode; break;
6013 default: gcc_unreachable ();
6014 }
6015 else
6016 /* The modes without ANY match the ALL modes. */
6017 full_cmp_mode = cmp_mode;
6018
6019 if (swap_p)
6020 {
6021 rtx tmp = cmp2;
6022 cmp2 = cmp1;
6023 cmp1 = tmp;
6024 }
6025
6026 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6027 gen_rtvec (2, gen_rtx_SET (
6028 gen_rtx_REG (cmp_mode, CC_REGNUM),
6029 gen_rtx_COMPARE (cmp_mode, cmp1, cmp2)),
6030 gen_rtx_CLOBBER (VOIDmode,
6031 gen_rtx_SCRATCH (scratch_mode)))));
6032 emit_move_insn (target, const0_rtx);
6033 emit_move_insn (tmp_reg, const1_rtx);
6034
6035 emit_move_insn (target,
6036 gen_rtx_IF_THEN_ELSE (SImode,
6037 gen_rtx_fmt_ee (new_code, VOIDmode,
6038 gen_rtx_REG (full_cmp_mode, CC_REGNUM),
6039 const0_rtx),
6040 target, tmp_reg));
6041 }
6042
6043 /* Generate a vector comparison expression loading either elements of
6044 THEN or ELS into TARGET depending on the comparison COND of CMP_OP1
6045 and CMP_OP2. */
6046
6047 void
6048 s390_expand_vcond (rtx target, rtx then, rtx els,
6049 enum rtx_code cond, rtx cmp_op1, rtx cmp_op2)
6050 {
6051 rtx tmp;
6052 machine_mode result_mode;
6053 rtx result_target;
6054
6055 /* We always use an integral type vector to hold the comparison
6056 result. */
6057 result_mode = GET_MODE (cmp_op1) == V2DFmode ? V2DImode : GET_MODE (cmp_op1);
6058 result_target = gen_reg_rtx (result_mode);
6059
6060 /* Alternatively this could be done by reload by lowering the cmp*
6061 predicates. But it appears to be better for scheduling etc. to
6062 have that in early. */
6063 if (!REG_P (cmp_op1))
6064 cmp_op1 = force_reg (GET_MODE (target), cmp_op1);
6065
6066 if (!REG_P (cmp_op2))
6067 cmp_op2 = force_reg (GET_MODE (target), cmp_op2);
6068
6069 s390_expand_vec_compare (result_target, cond,
6070 cmp_op1, cmp_op2);
6071
6072 /* If the results are supposed to be either -1 or 0 we are done
6073 since this is what our compare instructions generate anyway. */
6074 if (constm1_operand (then, GET_MODE (then))
6075 && const0_operand (els, GET_MODE (els)))
6076 {
6077 emit_move_insn (target, gen_rtx_SUBREG (GET_MODE (target),
6078 result_target, 0));
6079 return;
6080 }
6081
6082 /* Otherwise we will do a vsel afterwards. */
6083 /* This gets triggered e.g.
6084 with gcc.c-torture/compile/pr53410-1.c */
6085 if (!REG_P (then))
6086 then = force_reg (GET_MODE (target), then);
6087
6088 if (!REG_P (els))
6089 els = force_reg (GET_MODE (target), els);
6090
6091 tmp = gen_rtx_fmt_ee (EQ, VOIDmode,
6092 result_target,
6093 CONST0_RTX (result_mode));
6094
6095 /* We compared the result against zero above so we have to swap then
6096 and els here. */
6097 tmp = gen_rtx_IF_THEN_ELSE (GET_MODE (target), tmp, els, then);
6098
6099 gcc_assert (GET_MODE (target) == GET_MODE (then));
6100 emit_insn (gen_rtx_SET (target, tmp));
6101 }
6102
6103 /* Emit the RTX necessary to initialize the vector TARGET with values
6104 in VALS. */
6105 void
6106 s390_expand_vec_init (rtx target, rtx vals)
6107 {
6108 machine_mode mode = GET_MODE (target);
6109 machine_mode inner_mode = GET_MODE_INNER (mode);
6110 int n_elts = GET_MODE_NUNITS (mode);
6111 bool all_same = true, all_regs = true, all_const_int = true;
6112 rtx x;
6113 int i;
6114
6115 for (i = 0; i < n_elts; ++i)
6116 {
6117 x = XVECEXP (vals, 0, i);
6118
6119 if (!CONST_INT_P (x))
6120 all_const_int = false;
6121
6122 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6123 all_same = false;
6124
6125 if (!REG_P (x))
6126 all_regs = false;
6127 }
6128
6129 /* Use vector gen mask or vector gen byte mask if possible. */
6130 if (all_same && all_const_int
6131 && (XVECEXP (vals, 0, 0) == const0_rtx
6132 || s390_contiguous_bitmask_vector_p (XVECEXP (vals, 0, 0),
6133 NULL, NULL)
6134 || s390_bytemask_vector_p (XVECEXP (vals, 0, 0), NULL)))
6135 {
6136 emit_insn (gen_rtx_SET (target,
6137 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0))));
6138 return;
6139 }
6140
6141 if (all_same)
6142 {
6143 emit_insn (gen_rtx_SET (target,
6144 gen_rtx_VEC_DUPLICATE (mode,
6145 XVECEXP (vals, 0, 0))));
6146 return;
6147 }
6148
6149 if (all_regs && REG_P (target) && n_elts == 2 && inner_mode == DImode)
6150 {
6151 /* Use vector load pair. */
6152 emit_insn (gen_rtx_SET (target,
6153 gen_rtx_VEC_CONCAT (mode,
6154 XVECEXP (vals, 0, 0),
6155 XVECEXP (vals, 0, 1))));
6156 return;
6157 }
6158
6159 /* We are about to set the vector elements one by one. Zero out the
6160 full register first in order to help the data flow framework to
6161 detect it as full VR set. */
6162 emit_insn (gen_rtx_SET (target, CONST0_RTX (mode)));
6163
6164 /* Unfortunately the vec_init expander is not allowed to fail. So
6165 we have to implement the fallback ourselves. */
6166 for (i = 0; i < n_elts; i++)
6167 emit_insn (gen_rtx_SET (target,
6168 gen_rtx_UNSPEC (mode,
6169 gen_rtvec (3, XVECEXP (vals, 0, i),
6170 GEN_INT (i), target),
6171 UNSPEC_VEC_SET)));
6172 }
6173
6174 /* Structure to hold the initial parameters for a compare_and_swap operation
6175 in HImode and QImode. */
6176
6177 struct alignment_context
6178 {
6179 rtx memsi; /* SI aligned memory location. */
6180 rtx shift; /* Bit offset with regard to lsb. */
6181 rtx modemask; /* Mask of the HQImode shifted by SHIFT bits. */
6182 rtx modemaski; /* ~modemask */
6183 bool aligned; /* True if memory is aligned, false else. */
6184 };
6185
6186 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic to initialize
6187 structure AC for transparent simplifying, if the memory alignment is known
6188 to be at least 32bit. MEM is the memory location for the actual operation
6189 and MODE its mode. */
6190
6191 static void
6192 init_alignment_context (struct alignment_context *ac, rtx mem,
6193 machine_mode mode)
6194 {
6195 ac->shift = GEN_INT (GET_MODE_SIZE (SImode) - GET_MODE_SIZE (mode));
6196 ac->aligned = (MEM_ALIGN (mem) >= GET_MODE_BITSIZE (SImode));
6197
6198 if (ac->aligned)
6199 ac->memsi = adjust_address (mem, SImode, 0); /* Memory is aligned. */
6200 else
6201 {
6202 /* Alignment is unknown. */
6203 rtx byteoffset, addr, align;
6204
6205 /* Force the address into a register. */
6206 addr = force_reg (Pmode, XEXP (mem, 0));
6207
6208 /* Align it to SImode. */
6209 align = expand_simple_binop (Pmode, AND, addr,
6210 GEN_INT (-GET_MODE_SIZE (SImode)),
6211 NULL_RTX, 1, OPTAB_DIRECT);
6212 /* Generate MEM. */
6213 ac->memsi = gen_rtx_MEM (SImode, align);
6214 MEM_VOLATILE_P (ac->memsi) = MEM_VOLATILE_P (mem);
6215 set_mem_alias_set (ac->memsi, ALIAS_SET_MEMORY_BARRIER);
6216 set_mem_align (ac->memsi, GET_MODE_BITSIZE (SImode));
6217
6218 /* Calculate shiftcount. */
6219 byteoffset = expand_simple_binop (Pmode, AND, addr,
6220 GEN_INT (GET_MODE_SIZE (SImode) - 1),
6221 NULL_RTX, 1, OPTAB_DIRECT);
6222 /* As we already have some offset, evaluate the remaining distance. */
6223 ac->shift = expand_simple_binop (SImode, MINUS, ac->shift, byteoffset,
6224 NULL_RTX, 1, OPTAB_DIRECT);
6225 }
6226
6227 /* Shift is the byte count, but we need the bitcount. */
6228 ac->shift = expand_simple_binop (SImode, ASHIFT, ac->shift, GEN_INT (3),
6229 NULL_RTX, 1, OPTAB_DIRECT);
6230
6231 /* Calculate masks. */
6232 ac->modemask = expand_simple_binop (SImode, ASHIFT,
6233 GEN_INT (GET_MODE_MASK (mode)),
6234 ac->shift, NULL_RTX, 1, OPTAB_DIRECT);
6235 ac->modemaski = expand_simple_unop (SImode, NOT, ac->modemask,
6236 NULL_RTX, 1);
6237 }
6238
6239 /* A subroutine of s390_expand_cs_hqi. Insert INS into VAL. If possible,
6240 use a single insv insn into SEQ2. Otherwise, put prep insns in SEQ1 and
6241 perform the merge in SEQ2. */
6242
6243 static rtx
6244 s390_two_part_insv (struct alignment_context *ac, rtx *seq1, rtx *seq2,
6245 machine_mode mode, rtx val, rtx ins)
6246 {
6247 rtx tmp;
6248
6249 if (ac->aligned)
6250 {
6251 start_sequence ();
6252 tmp = copy_to_mode_reg (SImode, val);
6253 if (s390_expand_insv (tmp, GEN_INT (GET_MODE_BITSIZE (mode)),
6254 const0_rtx, ins))
6255 {
6256 *seq1 = NULL;
6257 *seq2 = get_insns ();
6258 end_sequence ();
6259 return tmp;
6260 }
6261 end_sequence ();
6262 }
6263
6264 /* Failed to use insv. Generate a two part shift and mask. */
6265 start_sequence ();
6266 tmp = s390_expand_mask_and_shift (ins, mode, ac->shift);
6267 *seq1 = get_insns ();
6268 end_sequence ();
6269
6270 start_sequence ();
6271 tmp = expand_simple_binop (SImode, IOR, tmp, val, NULL_RTX, 1, OPTAB_DIRECT);
6272 *seq2 = get_insns ();
6273 end_sequence ();
6274
6275 return tmp;
6276 }
6277
6278 /* Expand an atomic compare and swap operation for HImode and QImode. MEM is
6279 the memory location, CMP the old value to compare MEM with and NEW_RTX the
6280 value to set if CMP == MEM. */
6281
6282 void
6283 s390_expand_cs_hqi (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
6284 rtx cmp, rtx new_rtx, bool is_weak)
6285 {
6286 struct alignment_context ac;
6287 rtx cmpv, newv, val, cc, seq0, seq1, seq2, seq3;
6288 rtx res = gen_reg_rtx (SImode);
6289 rtx_code_label *csloop = NULL, *csend = NULL;
6290
6291 gcc_assert (MEM_P (mem));
6292
6293 init_alignment_context (&ac, mem, mode);
6294
6295 /* Load full word. Subsequent loads are performed by CS. */
6296 val = expand_simple_binop (SImode, AND, ac.memsi, ac.modemaski,
6297 NULL_RTX, 1, OPTAB_DIRECT);
6298
6299 /* Prepare insertions of cmp and new_rtx into the loaded value. When
6300 possible, we try to use insv to make this happen efficiently. If
6301 that fails we'll generate code both inside and outside the loop. */
6302 cmpv = s390_two_part_insv (&ac, &seq0, &seq2, mode, val, cmp);
6303 newv = s390_two_part_insv (&ac, &seq1, &seq3, mode, val, new_rtx);
6304
6305 if (seq0)
6306 emit_insn (seq0);
6307 if (seq1)
6308 emit_insn (seq1);
6309
6310 /* Start CS loop. */
6311 if (!is_weak)
6312 {
6313 /* Begin assuming success. */
6314 emit_move_insn (btarget, const1_rtx);
6315
6316 csloop = gen_label_rtx ();
6317 csend = gen_label_rtx ();
6318 emit_label (csloop);
6319 }
6320
6321 /* val = "<mem>00..0<mem>"
6322 * cmp = "00..0<cmp>00..0"
6323 * new = "00..0<new>00..0"
6324 */
6325
6326 emit_insn (seq2);
6327 emit_insn (seq3);
6328
6329 cc = s390_emit_compare_and_swap (EQ, res, ac.memsi, cmpv, newv);
6330 if (is_weak)
6331 emit_insn (gen_cstorecc4 (btarget, cc, XEXP (cc, 0), XEXP (cc, 1)));
6332 else
6333 {
6334 rtx tmp;
6335
6336 /* Jump to end if we're done (likely?). */
6337 s390_emit_jump (csend, cc);
6338
6339 /* Check for changes outside mode, and loop internal if so.
6340 Arrange the moves so that the compare is adjacent to the
6341 branch so that we can generate CRJ. */
6342 tmp = copy_to_reg (val);
6343 force_expand_binop (SImode, and_optab, res, ac.modemaski, val,
6344 1, OPTAB_DIRECT);
6345 cc = s390_emit_compare (NE, val, tmp);
6346 s390_emit_jump (csloop, cc);
6347
6348 /* Failed. */
6349 emit_move_insn (btarget, const0_rtx);
6350 emit_label (csend);
6351 }
6352
6353 /* Return the correct part of the bitfield. */
6354 convert_move (vtarget, expand_simple_binop (SImode, LSHIFTRT, res, ac.shift,
6355 NULL_RTX, 1, OPTAB_DIRECT), 1);
6356 }
6357
6358 /* Expand an atomic operation CODE of mode MODE. MEM is the memory location
6359 and VAL the value to play with. If AFTER is true then store the value
6360 MEM holds after the operation, if AFTER is false then store the value MEM
6361 holds before the operation. If TARGET is zero then discard that value, else
6362 store it to TARGET. */
6363
6364 void
6365 s390_expand_atomic (machine_mode mode, enum rtx_code code,
6366 rtx target, rtx mem, rtx val, bool after)
6367 {
6368 struct alignment_context ac;
6369 rtx cmp;
6370 rtx new_rtx = gen_reg_rtx (SImode);
6371 rtx orig = gen_reg_rtx (SImode);
6372 rtx_code_label *csloop = gen_label_rtx ();
6373
6374 gcc_assert (!target || register_operand (target, VOIDmode));
6375 gcc_assert (MEM_P (mem));
6376
6377 init_alignment_context (&ac, mem, mode);
6378
6379 /* Shift val to the correct bit positions.
6380 Preserve "icm", but prevent "ex icm". */
6381 if (!(ac.aligned && code == SET && MEM_P (val)))
6382 val = s390_expand_mask_and_shift (val, mode, ac.shift);
6383
6384 /* Further preparation insns. */
6385 if (code == PLUS || code == MINUS)
6386 emit_move_insn (orig, val);
6387 else if (code == MULT || code == AND) /* val = "11..1<val>11..1" */
6388 val = expand_simple_binop (SImode, XOR, val, ac.modemaski,
6389 NULL_RTX, 1, OPTAB_DIRECT);
6390
6391 /* Load full word. Subsequent loads are performed by CS. */
6392 cmp = force_reg (SImode, ac.memsi);
6393
6394 /* Start CS loop. */
6395 emit_label (csloop);
6396 emit_move_insn (new_rtx, cmp);
6397
6398 /* Patch new with val at correct position. */
6399 switch (code)
6400 {
6401 case PLUS:
6402 case MINUS:
6403 val = expand_simple_binop (SImode, code, new_rtx, orig,
6404 NULL_RTX, 1, OPTAB_DIRECT);
6405 val = expand_simple_binop (SImode, AND, val, ac.modemask,
6406 NULL_RTX, 1, OPTAB_DIRECT);
6407 /* FALLTHRU */
6408 case SET:
6409 if (ac.aligned && MEM_P (val))
6410 store_bit_field (new_rtx, GET_MODE_BITSIZE (mode), 0,
6411 0, 0, SImode, val);
6412 else
6413 {
6414 new_rtx = expand_simple_binop (SImode, AND, new_rtx, ac.modemaski,
6415 NULL_RTX, 1, OPTAB_DIRECT);
6416 new_rtx = expand_simple_binop (SImode, IOR, new_rtx, val,
6417 NULL_RTX, 1, OPTAB_DIRECT);
6418 }
6419 break;
6420 case AND:
6421 case IOR:
6422 case XOR:
6423 new_rtx = expand_simple_binop (SImode, code, new_rtx, val,
6424 NULL_RTX, 1, OPTAB_DIRECT);
6425 break;
6426 case MULT: /* NAND */
6427 new_rtx = expand_simple_binop (SImode, AND, new_rtx, val,
6428 NULL_RTX, 1, OPTAB_DIRECT);
6429 new_rtx = expand_simple_binop (SImode, XOR, new_rtx, ac.modemask,
6430 NULL_RTX, 1, OPTAB_DIRECT);
6431 break;
6432 default:
6433 gcc_unreachable ();
6434 }
6435
6436 s390_emit_jump (csloop, s390_emit_compare_and_swap (NE, cmp,
6437 ac.memsi, cmp, new_rtx));
6438
6439 /* Return the correct part of the bitfield. */
6440 if (target)
6441 convert_move (target, expand_simple_binop (SImode, LSHIFTRT,
6442 after ? new_rtx : cmp, ac.shift,
6443 NULL_RTX, 1, OPTAB_DIRECT), 1);
6444 }
6445
6446 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
6447 We need to emit DTP-relative relocations. */
6448
6449 static void s390_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
6450
6451 static void
6452 s390_output_dwarf_dtprel (FILE *file, int size, rtx x)
6453 {
6454 switch (size)
6455 {
6456 case 4:
6457 fputs ("\t.long\t", file);
6458 break;
6459 case 8:
6460 fputs ("\t.quad\t", file);
6461 break;
6462 default:
6463 gcc_unreachable ();
6464 }
6465 output_addr_const (file, x);
6466 fputs ("@DTPOFF", file);
6467 }
6468
6469 /* Return the proper mode for REGNO being represented in the dwarf
6470 unwind table. */
6471 machine_mode
6472 s390_dwarf_frame_reg_mode (int regno)
6473 {
6474 machine_mode save_mode = default_dwarf_frame_reg_mode (regno);
6475
6476 /* The rightmost 64 bits of vector registers are call-clobbered. */
6477 if (GET_MODE_SIZE (save_mode) > 8)
6478 save_mode = DImode;
6479
6480 return save_mode;
6481 }
6482
6483 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
6484 /* Implement TARGET_MANGLE_TYPE. */
6485
6486 static const char *
6487 s390_mangle_type (const_tree type)
6488 {
6489 type = TYPE_MAIN_VARIANT (type);
6490
6491 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
6492 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
6493 return NULL;
6494
6495 if (type == s390_builtin_types[BT_BV16QI]) return "U6__boolc";
6496 if (type == s390_builtin_types[BT_BV8HI]) return "U6__bools";
6497 if (type == s390_builtin_types[BT_BV4SI]) return "U6__booli";
6498 if (type == s390_builtin_types[BT_BV2DI]) return "U6__booll";
6499
6500 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
6501 && TARGET_LONG_DOUBLE_128)
6502 return "g";
6503
6504 /* For all other types, use normal C++ mangling. */
6505 return NULL;
6506 }
6507 #endif
6508
6509 /* In the name of slightly smaller debug output, and to cater to
6510 general assembler lossage, recognize various UNSPEC sequences
6511 and turn them back into a direct symbol reference. */
6512
6513 static rtx
6514 s390_delegitimize_address (rtx orig_x)
6515 {
6516 rtx x, y;
6517
6518 orig_x = delegitimize_mem_from_attrs (orig_x);
6519 x = orig_x;
6520
6521 /* Extract the symbol ref from:
6522 (plus:SI (reg:SI 12 %r12)
6523 (const:SI (unspec:SI [(symbol_ref/f:SI ("*.LC0"))]
6524 UNSPEC_GOTOFF/PLTOFF)))
6525 and
6526 (plus:SI (reg:SI 12 %r12)
6527 (const:SI (plus:SI (unspec:SI [(symbol_ref:SI ("L"))]
6528 UNSPEC_GOTOFF/PLTOFF)
6529 (const_int 4 [0x4])))) */
6530 if (GET_CODE (x) == PLUS
6531 && REG_P (XEXP (x, 0))
6532 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
6533 && GET_CODE (XEXP (x, 1)) == CONST)
6534 {
6535 HOST_WIDE_INT offset = 0;
6536
6537 /* The const operand. */
6538 y = XEXP (XEXP (x, 1), 0);
6539
6540 if (GET_CODE (y) == PLUS
6541 && GET_CODE (XEXP (y, 1)) == CONST_INT)
6542 {
6543 offset = INTVAL (XEXP (y, 1));
6544 y = XEXP (y, 0);
6545 }
6546
6547 if (GET_CODE (y) == UNSPEC
6548 && (XINT (y, 1) == UNSPEC_GOTOFF
6549 || XINT (y, 1) == UNSPEC_PLTOFF))
6550 return plus_constant (Pmode, XVECEXP (y, 0, 0), offset);
6551 }
6552
6553 if (GET_CODE (x) != MEM)
6554 return orig_x;
6555
6556 x = XEXP (x, 0);
6557 if (GET_CODE (x) == PLUS
6558 && GET_CODE (XEXP (x, 1)) == CONST
6559 && GET_CODE (XEXP (x, 0)) == REG
6560 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6561 {
6562 y = XEXP (XEXP (x, 1), 0);
6563 if (GET_CODE (y) == UNSPEC
6564 && XINT (y, 1) == UNSPEC_GOT)
6565 y = XVECEXP (y, 0, 0);
6566 else
6567 return orig_x;
6568 }
6569 else if (GET_CODE (x) == CONST)
6570 {
6571 /* Extract the symbol ref from:
6572 (mem:QI (const:DI (unspec:DI [(symbol_ref:DI ("foo"))]
6573 UNSPEC_PLT/GOTENT))) */
6574
6575 y = XEXP (x, 0);
6576 if (GET_CODE (y) == UNSPEC
6577 && (XINT (y, 1) == UNSPEC_GOTENT
6578 || XINT (y, 1) == UNSPEC_PLT))
6579 y = XVECEXP (y, 0, 0);
6580 else
6581 return orig_x;
6582 }
6583 else
6584 return orig_x;
6585
6586 if (GET_MODE (orig_x) != Pmode)
6587 {
6588 if (GET_MODE (orig_x) == BLKmode)
6589 return orig_x;
6590 y = lowpart_subreg (GET_MODE (orig_x), y, Pmode);
6591 if (y == NULL_RTX)
6592 return orig_x;
6593 }
6594 return y;
6595 }
6596
6597 /* Output operand OP to stdio stream FILE.
6598 OP is an address (register + offset) which is not used to address data;
6599 instead the rightmost bits are interpreted as the value. */
6600
6601 static void
6602 print_shift_count_operand (FILE *file, rtx op)
6603 {
6604 HOST_WIDE_INT offset;
6605 rtx base;
6606
6607 /* Extract base register and offset. */
6608 if (!s390_decompose_shift_count (op, &base, &offset))
6609 gcc_unreachable ();
6610
6611 /* Sanity check. */
6612 if (base)
6613 {
6614 gcc_assert (GET_CODE (base) == REG);
6615 gcc_assert (REGNO (base) < FIRST_PSEUDO_REGISTER);
6616 gcc_assert (REGNO_REG_CLASS (REGNO (base)) == ADDR_REGS);
6617 }
6618
6619 /* Offsets are constricted to twelve bits. */
6620 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset & ((1 << 12) - 1));
6621 if (base)
6622 fprintf (file, "(%s)", reg_names[REGNO (base)]);
6623 }
6624
6625 /* Assigns the number of NOP halfwords to be emitted before and after the
6626 function label to *HW_BEFORE and *HW_AFTER. Both pointers must not be NULL.
6627 If hotpatching is disabled for the function, the values are set to zero.
6628 */
6629
6630 static void
6631 s390_function_num_hotpatch_hw (tree decl,
6632 int *hw_before,
6633 int *hw_after)
6634 {
6635 tree attr;
6636
6637 attr = lookup_attribute ("hotpatch", DECL_ATTRIBUTES (decl));
6638
6639 /* Handle the arguments of the hotpatch attribute. The values
6640 specified via attribute might override the cmdline argument
6641 values. */
6642 if (attr)
6643 {
6644 tree args = TREE_VALUE (attr);
6645
6646 *hw_before = TREE_INT_CST_LOW (TREE_VALUE (args));
6647 *hw_after = TREE_INT_CST_LOW (TREE_VALUE (TREE_CHAIN (args)));
6648 }
6649 else
6650 {
6651 /* Use the values specified by the cmdline arguments. */
6652 *hw_before = s390_hotpatch_hw_before_label;
6653 *hw_after = s390_hotpatch_hw_after_label;
6654 }
6655 }
6656
6657 /* Write the extra assembler code needed to declare a function properly. */
6658
6659 void
6660 s390_asm_output_function_label (FILE *asm_out_file, const char *fname,
6661 tree decl)
6662 {
6663 int hw_before, hw_after;
6664
6665 s390_function_num_hotpatch_hw (decl, &hw_before, &hw_after);
6666 if (hw_before > 0)
6667 {
6668 unsigned int function_alignment;
6669 int i;
6670
6671 /* Add a trampoline code area before the function label and initialize it
6672 with two-byte nop instructions. This area can be overwritten with code
6673 that jumps to a patched version of the function. */
6674 asm_fprintf (asm_out_file, "\tnopr\t%%r7"
6675 "\t# pre-label NOPs for hotpatch (%d halfwords)\n",
6676 hw_before);
6677 for (i = 1; i < hw_before; i++)
6678 fputs ("\tnopr\t%r7\n", asm_out_file);
6679
6680 /* Note: The function label must be aligned so that (a) the bytes of the
6681 following nop do not cross a cacheline boundary, and (b) a jump address
6682 (eight bytes for 64 bit targets, 4 bytes for 32 bit targets) can be
6683 stored directly before the label without crossing a cacheline
6684 boundary. All this is necessary to make sure the trampoline code can
6685 be changed atomically.
6686 This alignment is done automatically using the FOUNCTION_BOUNDARY, but
6687 if there are NOPs before the function label, the alignment is placed
6688 before them. So it is necessary to duplicate the alignment after the
6689 NOPs. */
6690 function_alignment = MAX (8, DECL_ALIGN (decl) / BITS_PER_UNIT);
6691 if (! DECL_USER_ALIGN (decl))
6692 function_alignment = MAX (function_alignment,
6693 (unsigned int) align_functions);
6694 fputs ("\t# alignment for hotpatch\n", asm_out_file);
6695 ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (function_alignment));
6696 }
6697
6698 ASM_OUTPUT_LABEL (asm_out_file, fname);
6699 if (hw_after > 0)
6700 asm_fprintf (asm_out_file,
6701 "\t# post-label NOPs for hotpatch (%d halfwords)\n",
6702 hw_after);
6703 }
6704
6705 /* Output machine-dependent UNSPECs occurring in address constant X
6706 in assembler syntax to stdio stream FILE. Returns true if the
6707 constant X could be recognized, false otherwise. */
6708
6709 static bool
6710 s390_output_addr_const_extra (FILE *file, rtx x)
6711 {
6712 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 1)
6713 switch (XINT (x, 1))
6714 {
6715 case UNSPEC_GOTENT:
6716 output_addr_const (file, XVECEXP (x, 0, 0));
6717 fprintf (file, "@GOTENT");
6718 return true;
6719 case UNSPEC_GOT:
6720 output_addr_const (file, XVECEXP (x, 0, 0));
6721 fprintf (file, "@GOT");
6722 return true;
6723 case UNSPEC_GOTOFF:
6724 output_addr_const (file, XVECEXP (x, 0, 0));
6725 fprintf (file, "@GOTOFF");
6726 return true;
6727 case UNSPEC_PLT:
6728 output_addr_const (file, XVECEXP (x, 0, 0));
6729 fprintf (file, "@PLT");
6730 return true;
6731 case UNSPEC_PLTOFF:
6732 output_addr_const (file, XVECEXP (x, 0, 0));
6733 fprintf (file, "@PLTOFF");
6734 return true;
6735 case UNSPEC_TLSGD:
6736 output_addr_const (file, XVECEXP (x, 0, 0));
6737 fprintf (file, "@TLSGD");
6738 return true;
6739 case UNSPEC_TLSLDM:
6740 assemble_name (file, get_some_local_dynamic_name ());
6741 fprintf (file, "@TLSLDM");
6742 return true;
6743 case UNSPEC_DTPOFF:
6744 output_addr_const (file, XVECEXP (x, 0, 0));
6745 fprintf (file, "@DTPOFF");
6746 return true;
6747 case UNSPEC_NTPOFF:
6748 output_addr_const (file, XVECEXP (x, 0, 0));
6749 fprintf (file, "@NTPOFF");
6750 return true;
6751 case UNSPEC_GOTNTPOFF:
6752 output_addr_const (file, XVECEXP (x, 0, 0));
6753 fprintf (file, "@GOTNTPOFF");
6754 return true;
6755 case UNSPEC_INDNTPOFF:
6756 output_addr_const (file, XVECEXP (x, 0, 0));
6757 fprintf (file, "@INDNTPOFF");
6758 return true;
6759 }
6760
6761 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 2)
6762 switch (XINT (x, 1))
6763 {
6764 case UNSPEC_POOL_OFFSET:
6765 x = gen_rtx_MINUS (GET_MODE (x), XVECEXP (x, 0, 0), XVECEXP (x, 0, 1));
6766 output_addr_const (file, x);
6767 return true;
6768 }
6769 return false;
6770 }
6771
6772 /* Output address operand ADDR in assembler syntax to
6773 stdio stream FILE. */
6774
6775 void
6776 print_operand_address (FILE *file, rtx addr)
6777 {
6778 struct s390_address ad;
6779
6780 if (s390_loadrelative_operand_p (addr, NULL, NULL))
6781 {
6782 if (!TARGET_Z10)
6783 {
6784 output_operand_lossage ("symbolic memory references are "
6785 "only supported on z10 or later");
6786 return;
6787 }
6788 output_addr_const (file, addr);
6789 return;
6790 }
6791
6792 if (!s390_decompose_address (addr, &ad)
6793 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
6794 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
6795 output_operand_lossage ("cannot decompose address");
6796
6797 if (ad.disp)
6798 output_addr_const (file, ad.disp);
6799 else
6800 fprintf (file, "0");
6801
6802 if (ad.base && ad.indx)
6803 fprintf (file, "(%s,%s)", reg_names[REGNO (ad.indx)],
6804 reg_names[REGNO (ad.base)]);
6805 else if (ad.base)
6806 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
6807 }
6808
6809 /* Output operand X in assembler syntax to stdio stream FILE.
6810 CODE specified the format flag. The following format flags
6811 are recognized:
6812
6813 'C': print opcode suffix for branch condition.
6814 'D': print opcode suffix for inverse branch condition.
6815 'E': print opcode suffix for branch on index instruction.
6816 'G': print the size of the operand in bytes.
6817 'J': print tls_load/tls_gdcall/tls_ldcall suffix
6818 'M': print the second word of a TImode operand.
6819 'N': print the second word of a DImode operand.
6820 'O': print only the displacement of a memory reference or address.
6821 'R': print only the base register of a memory reference or address.
6822 'S': print S-type memory reference (base+displacement).
6823 'Y': print shift count operand.
6824
6825 'b': print integer X as if it's an unsigned byte.
6826 'c': print integer X as if it's an signed byte.
6827 'e': "end" contiguous bitmask X in either DImode or vector inner mode.
6828 'f': "end" contiguous bitmask X in SImode.
6829 'h': print integer X as if it's a signed halfword.
6830 'i': print the first nonzero HImode part of X.
6831 'j': print the first HImode part unequal to -1 of X.
6832 'k': print the first nonzero SImode part of X.
6833 'm': print the first SImode part unequal to -1 of X.
6834 'o': print integer X as if it's an unsigned 32bit word.
6835 's': "start" of contiguous bitmask X in either DImode or vector inner mode.
6836 't': CONST_INT: "start" of contiguous bitmask X in SImode.
6837 CONST_VECTOR: Generate a bitmask for vgbm instruction.
6838 'x': print integer X as if it's an unsigned halfword.
6839 'v': print register number as vector register (v1 instead of f1).
6840 */
6841
6842 void
6843 print_operand (FILE *file, rtx x, int code)
6844 {
6845 HOST_WIDE_INT ival;
6846
6847 switch (code)
6848 {
6849 case 'C':
6850 fprintf (file, s390_branch_condition_mnemonic (x, FALSE));
6851 return;
6852
6853 case 'D':
6854 fprintf (file, s390_branch_condition_mnemonic (x, TRUE));
6855 return;
6856
6857 case 'E':
6858 if (GET_CODE (x) == LE)
6859 fprintf (file, "l");
6860 else if (GET_CODE (x) == GT)
6861 fprintf (file, "h");
6862 else
6863 output_operand_lossage ("invalid comparison operator "
6864 "for 'E' output modifier");
6865 return;
6866
6867 case 'J':
6868 if (GET_CODE (x) == SYMBOL_REF)
6869 {
6870 fprintf (file, "%s", ":tls_load:");
6871 output_addr_const (file, x);
6872 }
6873 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
6874 {
6875 fprintf (file, "%s", ":tls_gdcall:");
6876 output_addr_const (file, XVECEXP (x, 0, 0));
6877 }
6878 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM)
6879 {
6880 fprintf (file, "%s", ":tls_ldcall:");
6881 const char *name = get_some_local_dynamic_name ();
6882 gcc_assert (name);
6883 assemble_name (file, name);
6884 }
6885 else
6886 output_operand_lossage ("invalid reference for 'J' output modifier");
6887 return;
6888
6889 case 'G':
6890 fprintf (file, "%u", GET_MODE_SIZE (GET_MODE (x)));
6891 return;
6892
6893 case 'O':
6894 {
6895 struct s390_address ad;
6896 int ret;
6897
6898 ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
6899
6900 if (!ret
6901 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
6902 || ad.indx)
6903 {
6904 output_operand_lossage ("invalid address for 'O' output modifier");
6905 return;
6906 }
6907
6908 if (ad.disp)
6909 output_addr_const (file, ad.disp);
6910 else
6911 fprintf (file, "0");
6912 }
6913 return;
6914
6915 case 'R':
6916 {
6917 struct s390_address ad;
6918 int ret;
6919
6920 ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
6921
6922 if (!ret
6923 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
6924 || ad.indx)
6925 {
6926 output_operand_lossage ("invalid address for 'R' output modifier");
6927 return;
6928 }
6929
6930 if (ad.base)
6931 fprintf (file, "%s", reg_names[REGNO (ad.base)]);
6932 else
6933 fprintf (file, "0");
6934 }
6935 return;
6936
6937 case 'S':
6938 {
6939 struct s390_address ad;
6940 int ret;
6941
6942 if (!MEM_P (x))
6943 {
6944 output_operand_lossage ("memory reference expected for "
6945 "'S' output modifier");
6946 return;
6947 }
6948 ret = s390_decompose_address (XEXP (x, 0), &ad);
6949
6950 if (!ret
6951 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
6952 || ad.indx)
6953 {
6954 output_operand_lossage ("invalid address for 'S' output modifier");
6955 return;
6956 }
6957
6958 if (ad.disp)
6959 output_addr_const (file, ad.disp);
6960 else
6961 fprintf (file, "0");
6962
6963 if (ad.base)
6964 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
6965 }
6966 return;
6967
6968 case 'N':
6969 if (GET_CODE (x) == REG)
6970 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
6971 else if (GET_CODE (x) == MEM)
6972 x = change_address (x, VOIDmode,
6973 plus_constant (Pmode, XEXP (x, 0), 4));
6974 else
6975 output_operand_lossage ("register or memory expression expected "
6976 "for 'N' output modifier");
6977 break;
6978
6979 case 'M':
6980 if (GET_CODE (x) == REG)
6981 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
6982 else if (GET_CODE (x) == MEM)
6983 x = change_address (x, VOIDmode,
6984 plus_constant (Pmode, XEXP (x, 0), 8));
6985 else
6986 output_operand_lossage ("register or memory expression expected "
6987 "for 'M' output modifier");
6988 break;
6989
6990 case 'Y':
6991 print_shift_count_operand (file, x);
6992 return;
6993 }
6994
6995 switch (GET_CODE (x))
6996 {
6997 case REG:
6998 /* Print FP regs as fx instead of vx when they are accessed
6999 through non-vector mode. */
7000 if (code == 'v'
7001 || VECTOR_NOFP_REG_P (x)
7002 || (FP_REG_P (x) && VECTOR_MODE_P (GET_MODE (x)))
7003 || (VECTOR_REG_P (x)
7004 && (GET_MODE_SIZE (GET_MODE (x)) /
7005 s390_class_max_nregs (FP_REGS, GET_MODE (x))) > 8))
7006 fprintf (file, "%%v%s", reg_names[REGNO (x)] + 2);
7007 else
7008 fprintf (file, "%s", reg_names[REGNO (x)]);
7009 break;
7010
7011 case MEM:
7012 output_address (XEXP (x, 0));
7013 break;
7014
7015 case CONST:
7016 case CODE_LABEL:
7017 case LABEL_REF:
7018 case SYMBOL_REF:
7019 output_addr_const (file, x);
7020 break;
7021
7022 case CONST_INT:
7023 ival = INTVAL (x);
7024 switch (code)
7025 {
7026 case 0:
7027 break;
7028 case 'b':
7029 ival &= 0xff;
7030 break;
7031 case 'c':
7032 ival = ((ival & 0xff) ^ 0x80) - 0x80;
7033 break;
7034 case 'x':
7035 ival &= 0xffff;
7036 break;
7037 case 'h':
7038 ival = ((ival & 0xffff) ^ 0x8000) - 0x8000;
7039 break;
7040 case 'i':
7041 ival = s390_extract_part (x, HImode, 0);
7042 break;
7043 case 'j':
7044 ival = s390_extract_part (x, HImode, -1);
7045 break;
7046 case 'k':
7047 ival = s390_extract_part (x, SImode, 0);
7048 break;
7049 case 'm':
7050 ival = s390_extract_part (x, SImode, -1);
7051 break;
7052 case 'o':
7053 ival &= 0xffffffff;
7054 break;
7055 case 'e': case 'f':
7056 case 's': case 't':
7057 {
7058 int pos, len;
7059 bool ok;
7060
7061 len = (code == 's' || code == 'e' ? 64 : 32);
7062 ok = s390_contiguous_bitmask_p (ival, len, &pos, &len);
7063 gcc_assert (ok);
7064 if (code == 's' || code == 't')
7065 ival = 64 - pos - len;
7066 else
7067 ival = 64 - 1 - pos;
7068 }
7069 break;
7070 default:
7071 output_operand_lossage ("invalid constant for output modifier '%c'", code);
7072 }
7073 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
7074 break;
7075
7076 case CONST_DOUBLE:
7077 gcc_assert (GET_MODE (x) == VOIDmode);
7078 if (code == 'b')
7079 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x) & 0xff);
7080 else if (code == 'x')
7081 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x) & 0xffff);
7082 else if (code == 'h')
7083 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
7084 ((CONST_DOUBLE_LOW (x) & 0xffff) ^ 0x8000) - 0x8000);
7085 else
7086 {
7087 if (code == 0)
7088 output_operand_lossage ("invalid constant - try using "
7089 "an output modifier");
7090 else
7091 output_operand_lossage ("invalid constant for output modifier '%c'",
7092 code);
7093 }
7094 break;
7095 case CONST_VECTOR:
7096 switch (code)
7097 {
7098 case 'e':
7099 case 's':
7100 {
7101 int start, stop, inner_len;
7102 bool ok;
7103
7104 inner_len = GET_MODE_UNIT_BITSIZE (GET_MODE (x));
7105 ok = s390_contiguous_bitmask_vector_p (x, &start, &stop);
7106 gcc_assert (ok);
7107 if (code == 's' || code == 't')
7108 ival = inner_len - stop - 1;
7109 else
7110 ival = inner_len - start - 1;
7111 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
7112 }
7113 break;
7114 case 't':
7115 {
7116 unsigned mask;
7117 bool ok = s390_bytemask_vector_p (x, &mask);
7118 gcc_assert (ok);
7119 fprintf (file, "%u", mask);
7120 }
7121 break;
7122
7123 default:
7124 output_operand_lossage ("invalid constant vector for output "
7125 "modifier '%c'", code);
7126 }
7127 break;
7128
7129 default:
7130 if (code == 0)
7131 output_operand_lossage ("invalid expression - try using "
7132 "an output modifier");
7133 else
7134 output_operand_lossage ("invalid expression for output "
7135 "modifier '%c'", code);
7136 break;
7137 }
7138 }
7139
7140 /* Target hook for assembling integer objects. We need to define it
7141 here to work a round a bug in some versions of GAS, which couldn't
7142 handle values smaller than INT_MIN when printed in decimal. */
7143
7144 static bool
7145 s390_assemble_integer (rtx x, unsigned int size, int aligned_p)
7146 {
7147 if (size == 8 && aligned_p
7148 && GET_CODE (x) == CONST_INT && INTVAL (x) < INT_MIN)
7149 {
7150 fprintf (asm_out_file, "\t.quad\t" HOST_WIDE_INT_PRINT_HEX "\n",
7151 INTVAL (x));
7152 return true;
7153 }
7154 return default_assemble_integer (x, size, aligned_p);
7155 }
7156
7157 /* Returns true if register REGNO is used for forming
7158 a memory address in expression X. */
7159
7160 static bool
7161 reg_used_in_mem_p (int regno, rtx x)
7162 {
7163 enum rtx_code code = GET_CODE (x);
7164 int i, j;
7165 const char *fmt;
7166
7167 if (code == MEM)
7168 {
7169 if (refers_to_regno_p (regno, XEXP (x, 0)))
7170 return true;
7171 }
7172 else if (code == SET
7173 && GET_CODE (SET_DEST (x)) == PC)
7174 {
7175 if (refers_to_regno_p (regno, SET_SRC (x)))
7176 return true;
7177 }
7178
7179 fmt = GET_RTX_FORMAT (code);
7180 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
7181 {
7182 if (fmt[i] == 'e'
7183 && reg_used_in_mem_p (regno, XEXP (x, i)))
7184 return true;
7185
7186 else if (fmt[i] == 'E')
7187 for (j = 0; j < XVECLEN (x, i); j++)
7188 if (reg_used_in_mem_p (regno, XVECEXP (x, i, j)))
7189 return true;
7190 }
7191 return false;
7192 }
7193
7194 /* Returns true if expression DEP_RTX sets an address register
7195 used by instruction INSN to address memory. */
7196
7197 static bool
7198 addr_generation_dependency_p (rtx dep_rtx, rtx_insn *insn)
7199 {
7200 rtx target, pat;
7201
7202 if (NONJUMP_INSN_P (dep_rtx))
7203 dep_rtx = PATTERN (dep_rtx);
7204
7205 if (GET_CODE (dep_rtx) == SET)
7206 {
7207 target = SET_DEST (dep_rtx);
7208 if (GET_CODE (target) == STRICT_LOW_PART)
7209 target = XEXP (target, 0);
7210 while (GET_CODE (target) == SUBREG)
7211 target = SUBREG_REG (target);
7212
7213 if (GET_CODE (target) == REG)
7214 {
7215 int regno = REGNO (target);
7216
7217 if (s390_safe_attr_type (insn) == TYPE_LA)
7218 {
7219 pat = PATTERN (insn);
7220 if (GET_CODE (pat) == PARALLEL)
7221 {
7222 gcc_assert (XVECLEN (pat, 0) == 2);
7223 pat = XVECEXP (pat, 0, 0);
7224 }
7225 gcc_assert (GET_CODE (pat) == SET);
7226 return refers_to_regno_p (regno, SET_SRC (pat));
7227 }
7228 else if (get_attr_atype (insn) == ATYPE_AGEN)
7229 return reg_used_in_mem_p (regno, PATTERN (insn));
7230 }
7231 }
7232 return false;
7233 }
7234
7235 /* Return 1, if dep_insn sets register used in insn in the agen unit. */
7236
7237 int
7238 s390_agen_dep_p (rtx_insn *dep_insn, rtx_insn *insn)
7239 {
7240 rtx dep_rtx = PATTERN (dep_insn);
7241 int i;
7242
7243 if (GET_CODE (dep_rtx) == SET
7244 && addr_generation_dependency_p (dep_rtx, insn))
7245 return 1;
7246 else if (GET_CODE (dep_rtx) == PARALLEL)
7247 {
7248 for (i = 0; i < XVECLEN (dep_rtx, 0); i++)
7249 {
7250 if (addr_generation_dependency_p (XVECEXP (dep_rtx, 0, i), insn))
7251 return 1;
7252 }
7253 }
7254 return 0;
7255 }
7256
7257
7258 /* A C statement (sans semicolon) to update the integer scheduling priority
7259 INSN_PRIORITY (INSN). Increase the priority to execute the INSN earlier,
7260 reduce the priority to execute INSN later. Do not define this macro if
7261 you do not need to adjust the scheduling priorities of insns.
7262
7263 A STD instruction should be scheduled earlier,
7264 in order to use the bypass. */
7265 static int
7266 s390_adjust_priority (rtx_insn *insn, int priority)
7267 {
7268 if (! INSN_P (insn))
7269 return priority;
7270
7271 if (s390_tune != PROCESSOR_2084_Z990
7272 && s390_tune != PROCESSOR_2094_Z9_109
7273 && s390_tune != PROCESSOR_2097_Z10
7274 && s390_tune != PROCESSOR_2817_Z196
7275 && s390_tune != PROCESSOR_2827_ZEC12
7276 && s390_tune != PROCESSOR_2964_Z13)
7277 return priority;
7278
7279 switch (s390_safe_attr_type (insn))
7280 {
7281 case TYPE_FSTOREDF:
7282 case TYPE_FSTORESF:
7283 priority = priority << 3;
7284 break;
7285 case TYPE_STORE:
7286 case TYPE_STM:
7287 priority = priority << 1;
7288 break;
7289 default:
7290 break;
7291 }
7292 return priority;
7293 }
7294
7295
7296 /* The number of instructions that can be issued per cycle. */
7297
7298 static int
7299 s390_issue_rate (void)
7300 {
7301 switch (s390_tune)
7302 {
7303 case PROCESSOR_2084_Z990:
7304 case PROCESSOR_2094_Z9_109:
7305 case PROCESSOR_2817_Z196:
7306 return 3;
7307 case PROCESSOR_2097_Z10:
7308 return 2;
7309 /* Starting with EC12 we use the sched_reorder hook to take care
7310 of instruction dispatch constraints. The algorithm only
7311 picks the best instruction and assumes only a single
7312 instruction gets issued per cycle. */
7313 case PROCESSOR_2827_ZEC12:
7314 default:
7315 return 1;
7316 }
7317 }
7318
7319 static int
7320 s390_first_cycle_multipass_dfa_lookahead (void)
7321 {
7322 return 4;
7323 }
7324
7325 /* Annotate every literal pool reference in X by an UNSPEC_LTREF expression.
7326 Fix up MEMs as required. */
7327
7328 static void
7329 annotate_constant_pool_refs (rtx *x)
7330 {
7331 int i, j;
7332 const char *fmt;
7333
7334 gcc_assert (GET_CODE (*x) != SYMBOL_REF
7335 || !CONSTANT_POOL_ADDRESS_P (*x));
7336
7337 /* Literal pool references can only occur inside a MEM ... */
7338 if (GET_CODE (*x) == MEM)
7339 {
7340 rtx memref = XEXP (*x, 0);
7341
7342 if (GET_CODE (memref) == SYMBOL_REF
7343 && CONSTANT_POOL_ADDRESS_P (memref))
7344 {
7345 rtx base = cfun->machine->base_reg;
7346 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, memref, base),
7347 UNSPEC_LTREF);
7348
7349 *x = replace_equiv_address (*x, addr);
7350 return;
7351 }
7352
7353 if (GET_CODE (memref) == CONST
7354 && GET_CODE (XEXP (memref, 0)) == PLUS
7355 && GET_CODE (XEXP (XEXP (memref, 0), 1)) == CONST_INT
7356 && GET_CODE (XEXP (XEXP (memref, 0), 0)) == SYMBOL_REF
7357 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (memref, 0), 0)))
7358 {
7359 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (memref, 0), 1));
7360 rtx sym = XEXP (XEXP (memref, 0), 0);
7361 rtx base = cfun->machine->base_reg;
7362 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
7363 UNSPEC_LTREF);
7364
7365 *x = replace_equiv_address (*x, plus_constant (Pmode, addr, off));
7366 return;
7367 }
7368 }
7369
7370 /* ... or a load-address type pattern. */
7371 if (GET_CODE (*x) == SET)
7372 {
7373 rtx addrref = SET_SRC (*x);
7374
7375 if (GET_CODE (addrref) == SYMBOL_REF
7376 && CONSTANT_POOL_ADDRESS_P (addrref))
7377 {
7378 rtx base = cfun->machine->base_reg;
7379 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addrref, base),
7380 UNSPEC_LTREF);
7381
7382 SET_SRC (*x) = addr;
7383 return;
7384 }
7385
7386 if (GET_CODE (addrref) == CONST
7387 && GET_CODE (XEXP (addrref, 0)) == PLUS
7388 && GET_CODE (XEXP (XEXP (addrref, 0), 1)) == CONST_INT
7389 && GET_CODE (XEXP (XEXP (addrref, 0), 0)) == SYMBOL_REF
7390 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (addrref, 0), 0)))
7391 {
7392 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (addrref, 0), 1));
7393 rtx sym = XEXP (XEXP (addrref, 0), 0);
7394 rtx base = cfun->machine->base_reg;
7395 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
7396 UNSPEC_LTREF);
7397
7398 SET_SRC (*x) = plus_constant (Pmode, addr, off);
7399 return;
7400 }
7401 }
7402
7403 /* Annotate LTREL_BASE as well. */
7404 if (GET_CODE (*x) == UNSPEC
7405 && XINT (*x, 1) == UNSPEC_LTREL_BASE)
7406 {
7407 rtx base = cfun->machine->base_reg;
7408 *x = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, XVECEXP (*x, 0, 0), base),
7409 UNSPEC_LTREL_BASE);
7410 return;
7411 }
7412
7413 fmt = GET_RTX_FORMAT (GET_CODE (*x));
7414 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
7415 {
7416 if (fmt[i] == 'e')
7417 {
7418 annotate_constant_pool_refs (&XEXP (*x, i));
7419 }
7420 else if (fmt[i] == 'E')
7421 {
7422 for (j = 0; j < XVECLEN (*x, i); j++)
7423 annotate_constant_pool_refs (&XVECEXP (*x, i, j));
7424 }
7425 }
7426 }
7427
7428 /* Split all branches that exceed the maximum distance.
7429 Returns true if this created a new literal pool entry. */
7430
7431 static int
7432 s390_split_branches (void)
7433 {
7434 rtx temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
7435 int new_literal = 0, ret;
7436 rtx_insn *insn;
7437 rtx pat, target;
7438 rtx *label;
7439
7440 /* We need correct insn addresses. */
7441
7442 shorten_branches (get_insns ());
7443
7444 /* Find all branches that exceed 64KB, and split them. */
7445
7446 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7447 {
7448 if (! JUMP_P (insn) || tablejump_p (insn, NULL, NULL))
7449 continue;
7450
7451 pat = PATTERN (insn);
7452 if (GET_CODE (pat) == PARALLEL)
7453 pat = XVECEXP (pat, 0, 0);
7454 if (GET_CODE (pat) != SET || SET_DEST (pat) != pc_rtx)
7455 continue;
7456
7457 if (GET_CODE (SET_SRC (pat)) == LABEL_REF)
7458 {
7459 label = &SET_SRC (pat);
7460 }
7461 else if (GET_CODE (SET_SRC (pat)) == IF_THEN_ELSE)
7462 {
7463 if (GET_CODE (XEXP (SET_SRC (pat), 1)) == LABEL_REF)
7464 label = &XEXP (SET_SRC (pat), 1);
7465 else if (GET_CODE (XEXP (SET_SRC (pat), 2)) == LABEL_REF)
7466 label = &XEXP (SET_SRC (pat), 2);
7467 else
7468 continue;
7469 }
7470 else
7471 continue;
7472
7473 if (get_attr_length (insn) <= 4)
7474 continue;
7475
7476 /* We are going to use the return register as scratch register,
7477 make sure it will be saved/restored by the prologue/epilogue. */
7478 cfun_frame_layout.save_return_addr_p = 1;
7479
7480 if (!flag_pic)
7481 {
7482 new_literal = 1;
7483 rtx mem = force_const_mem (Pmode, *label);
7484 rtx_insn *set_insn = emit_insn_before (gen_rtx_SET (temp_reg, mem),
7485 insn);
7486 INSN_ADDRESSES_NEW (set_insn, -1);
7487 annotate_constant_pool_refs (&PATTERN (set_insn));
7488
7489 target = temp_reg;
7490 }
7491 else
7492 {
7493 new_literal = 1;
7494 target = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, *label),
7495 UNSPEC_LTREL_OFFSET);
7496 target = gen_rtx_CONST (Pmode, target);
7497 target = force_const_mem (Pmode, target);
7498 rtx_insn *set_insn = emit_insn_before (gen_rtx_SET (temp_reg, target),
7499 insn);
7500 INSN_ADDRESSES_NEW (set_insn, -1);
7501 annotate_constant_pool_refs (&PATTERN (set_insn));
7502
7503 target = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, XEXP (target, 0),
7504 cfun->machine->base_reg),
7505 UNSPEC_LTREL_BASE);
7506 target = gen_rtx_PLUS (Pmode, temp_reg, target);
7507 }
7508
7509 ret = validate_change (insn, label, target, 0);
7510 gcc_assert (ret);
7511 }
7512
7513 return new_literal;
7514 }
7515
7516
7517 /* Find an annotated literal pool symbol referenced in RTX X,
7518 and store it at REF. Will abort if X contains references to
7519 more than one such pool symbol; multiple references to the same
7520 symbol are allowed, however.
7521
7522 The rtx pointed to by REF must be initialized to NULL_RTX
7523 by the caller before calling this routine. */
7524
7525 static void
7526 find_constant_pool_ref (rtx x, rtx *ref)
7527 {
7528 int i, j;
7529 const char *fmt;
7530
7531 /* Ignore LTREL_BASE references. */
7532 if (GET_CODE (x) == UNSPEC
7533 && XINT (x, 1) == UNSPEC_LTREL_BASE)
7534 return;
7535 /* Likewise POOL_ENTRY insns. */
7536 if (GET_CODE (x) == UNSPEC_VOLATILE
7537 && XINT (x, 1) == UNSPECV_POOL_ENTRY)
7538 return;
7539
7540 gcc_assert (GET_CODE (x) != SYMBOL_REF
7541 || !CONSTANT_POOL_ADDRESS_P (x));
7542
7543 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_LTREF)
7544 {
7545 rtx sym = XVECEXP (x, 0, 0);
7546 gcc_assert (GET_CODE (sym) == SYMBOL_REF
7547 && CONSTANT_POOL_ADDRESS_P (sym));
7548
7549 if (*ref == NULL_RTX)
7550 *ref = sym;
7551 else
7552 gcc_assert (*ref == sym);
7553
7554 return;
7555 }
7556
7557 fmt = GET_RTX_FORMAT (GET_CODE (x));
7558 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
7559 {
7560 if (fmt[i] == 'e')
7561 {
7562 find_constant_pool_ref (XEXP (x, i), ref);
7563 }
7564 else if (fmt[i] == 'E')
7565 {
7566 for (j = 0; j < XVECLEN (x, i); j++)
7567 find_constant_pool_ref (XVECEXP (x, i, j), ref);
7568 }
7569 }
7570 }
7571
7572 /* Replace every reference to the annotated literal pool
7573 symbol REF in X by its base plus OFFSET. */
7574
7575 static void
7576 replace_constant_pool_ref (rtx *x, rtx ref, rtx offset)
7577 {
7578 int i, j;
7579 const char *fmt;
7580
7581 gcc_assert (*x != ref);
7582
7583 if (GET_CODE (*x) == UNSPEC
7584 && XINT (*x, 1) == UNSPEC_LTREF
7585 && XVECEXP (*x, 0, 0) == ref)
7586 {
7587 *x = gen_rtx_PLUS (Pmode, XVECEXP (*x, 0, 1), offset);
7588 return;
7589 }
7590
7591 if (GET_CODE (*x) == PLUS
7592 && GET_CODE (XEXP (*x, 1)) == CONST_INT
7593 && GET_CODE (XEXP (*x, 0)) == UNSPEC
7594 && XINT (XEXP (*x, 0), 1) == UNSPEC_LTREF
7595 && XVECEXP (XEXP (*x, 0), 0, 0) == ref)
7596 {
7597 rtx addr = gen_rtx_PLUS (Pmode, XVECEXP (XEXP (*x, 0), 0, 1), offset);
7598 *x = plus_constant (Pmode, addr, INTVAL (XEXP (*x, 1)));
7599 return;
7600 }
7601
7602 fmt = GET_RTX_FORMAT (GET_CODE (*x));
7603 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
7604 {
7605 if (fmt[i] == 'e')
7606 {
7607 replace_constant_pool_ref (&XEXP (*x, i), ref, offset);
7608 }
7609 else if (fmt[i] == 'E')
7610 {
7611 for (j = 0; j < XVECLEN (*x, i); j++)
7612 replace_constant_pool_ref (&XVECEXP (*x, i, j), ref, offset);
7613 }
7614 }
7615 }
7616
7617 /* Check whether X contains an UNSPEC_LTREL_BASE.
7618 Return its constant pool symbol if found, NULL_RTX otherwise. */
7619
7620 static rtx
7621 find_ltrel_base (rtx x)
7622 {
7623 int i, j;
7624 const char *fmt;
7625
7626 if (GET_CODE (x) == UNSPEC
7627 && XINT (x, 1) == UNSPEC_LTREL_BASE)
7628 return XVECEXP (x, 0, 0);
7629
7630 fmt = GET_RTX_FORMAT (GET_CODE (x));
7631 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
7632 {
7633 if (fmt[i] == 'e')
7634 {
7635 rtx fnd = find_ltrel_base (XEXP (x, i));
7636 if (fnd)
7637 return fnd;
7638 }
7639 else if (fmt[i] == 'E')
7640 {
7641 for (j = 0; j < XVECLEN (x, i); j++)
7642 {
7643 rtx fnd = find_ltrel_base (XVECEXP (x, i, j));
7644 if (fnd)
7645 return fnd;
7646 }
7647 }
7648 }
7649
7650 return NULL_RTX;
7651 }
7652
7653 /* Replace any occurrence of UNSPEC_LTREL_BASE in X with its base. */
7654
7655 static void
7656 replace_ltrel_base (rtx *x)
7657 {
7658 int i, j;
7659 const char *fmt;
7660
7661 if (GET_CODE (*x) == UNSPEC
7662 && XINT (*x, 1) == UNSPEC_LTREL_BASE)
7663 {
7664 *x = XVECEXP (*x, 0, 1);
7665 return;
7666 }
7667
7668 fmt = GET_RTX_FORMAT (GET_CODE (*x));
7669 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
7670 {
7671 if (fmt[i] == 'e')
7672 {
7673 replace_ltrel_base (&XEXP (*x, i));
7674 }
7675 else if (fmt[i] == 'E')
7676 {
7677 for (j = 0; j < XVECLEN (*x, i); j++)
7678 replace_ltrel_base (&XVECEXP (*x, i, j));
7679 }
7680 }
7681 }
7682
7683
7684 /* We keep a list of constants which we have to add to internal
7685 constant tables in the middle of large functions. */
7686
7687 #define NR_C_MODES 31
7688 machine_mode constant_modes[NR_C_MODES] =
7689 {
7690 TFmode, TImode, TDmode,
7691 V16QImode, V8HImode, V4SImode, V2DImode, V4SFmode, V2DFmode, V1TFmode,
7692 DFmode, DImode, DDmode,
7693 V8QImode, V4HImode, V2SImode, V1DImode, V2SFmode, V1DFmode,
7694 SFmode, SImode, SDmode,
7695 V4QImode, V2HImode, V1SImode, V1SFmode,
7696 HImode,
7697 V2QImode, V1HImode,
7698 QImode,
7699 V1QImode
7700 };
7701
7702 struct constant
7703 {
7704 struct constant *next;
7705 rtx value;
7706 rtx_code_label *label;
7707 };
7708
7709 struct constant_pool
7710 {
7711 struct constant_pool *next;
7712 rtx_insn *first_insn;
7713 rtx_insn *pool_insn;
7714 bitmap insns;
7715 rtx_insn *emit_pool_after;
7716
7717 struct constant *constants[NR_C_MODES];
7718 struct constant *execute;
7719 rtx_code_label *label;
7720 int size;
7721 };
7722
7723 /* Allocate new constant_pool structure. */
7724
7725 static struct constant_pool *
7726 s390_alloc_pool (void)
7727 {
7728 struct constant_pool *pool;
7729 int i;
7730
7731 pool = (struct constant_pool *) xmalloc (sizeof *pool);
7732 pool->next = NULL;
7733 for (i = 0; i < NR_C_MODES; i++)
7734 pool->constants[i] = NULL;
7735
7736 pool->execute = NULL;
7737 pool->label = gen_label_rtx ();
7738 pool->first_insn = NULL;
7739 pool->pool_insn = NULL;
7740 pool->insns = BITMAP_ALLOC (NULL);
7741 pool->size = 0;
7742 pool->emit_pool_after = NULL;
7743
7744 return pool;
7745 }
7746
7747 /* Create new constant pool covering instructions starting at INSN
7748 and chain it to the end of POOL_LIST. */
7749
7750 static struct constant_pool *
7751 s390_start_pool (struct constant_pool **pool_list, rtx_insn *insn)
7752 {
7753 struct constant_pool *pool, **prev;
7754
7755 pool = s390_alloc_pool ();
7756 pool->first_insn = insn;
7757
7758 for (prev = pool_list; *prev; prev = &(*prev)->next)
7759 ;
7760 *prev = pool;
7761
7762 return pool;
7763 }
7764
7765 /* End range of instructions covered by POOL at INSN and emit
7766 placeholder insn representing the pool. */
7767
7768 static void
7769 s390_end_pool (struct constant_pool *pool, rtx_insn *insn)
7770 {
7771 rtx pool_size = GEN_INT (pool->size + 8 /* alignment slop */);
7772
7773 if (!insn)
7774 insn = get_last_insn ();
7775
7776 pool->pool_insn = emit_insn_after (gen_pool (pool_size), insn);
7777 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
7778 }
7779
7780 /* Add INSN to the list of insns covered by POOL. */
7781
7782 static void
7783 s390_add_pool_insn (struct constant_pool *pool, rtx insn)
7784 {
7785 bitmap_set_bit (pool->insns, INSN_UID (insn));
7786 }
7787
7788 /* Return pool out of POOL_LIST that covers INSN. */
7789
7790 static struct constant_pool *
7791 s390_find_pool (struct constant_pool *pool_list, rtx insn)
7792 {
7793 struct constant_pool *pool;
7794
7795 for (pool = pool_list; pool; pool = pool->next)
7796 if (bitmap_bit_p (pool->insns, INSN_UID (insn)))
7797 break;
7798
7799 return pool;
7800 }
7801
7802 /* Add constant VAL of mode MODE to the constant pool POOL. */
7803
7804 static void
7805 s390_add_constant (struct constant_pool *pool, rtx val, machine_mode mode)
7806 {
7807 struct constant *c;
7808 int i;
7809
7810 for (i = 0; i < NR_C_MODES; i++)
7811 if (constant_modes[i] == mode)
7812 break;
7813 gcc_assert (i != NR_C_MODES);
7814
7815 for (c = pool->constants[i]; c != NULL; c = c->next)
7816 if (rtx_equal_p (val, c->value))
7817 break;
7818
7819 if (c == NULL)
7820 {
7821 c = (struct constant *) xmalloc (sizeof *c);
7822 c->value = val;
7823 c->label = gen_label_rtx ();
7824 c->next = pool->constants[i];
7825 pool->constants[i] = c;
7826 pool->size += GET_MODE_SIZE (mode);
7827 }
7828 }
7829
7830 /* Return an rtx that represents the offset of X from the start of
7831 pool POOL. */
7832
7833 static rtx
7834 s390_pool_offset (struct constant_pool *pool, rtx x)
7835 {
7836 rtx label;
7837
7838 label = gen_rtx_LABEL_REF (GET_MODE (x), pool->label);
7839 x = gen_rtx_UNSPEC (GET_MODE (x), gen_rtvec (2, x, label),
7840 UNSPEC_POOL_OFFSET);
7841 return gen_rtx_CONST (GET_MODE (x), x);
7842 }
7843
7844 /* Find constant VAL of mode MODE in the constant pool POOL.
7845 Return an RTX describing the distance from the start of
7846 the pool to the location of the new constant. */
7847
7848 static rtx
7849 s390_find_constant (struct constant_pool *pool, rtx val,
7850 machine_mode mode)
7851 {
7852 struct constant *c;
7853 int i;
7854
7855 for (i = 0; i < NR_C_MODES; i++)
7856 if (constant_modes[i] == mode)
7857 break;
7858 gcc_assert (i != NR_C_MODES);
7859
7860 for (c = pool->constants[i]; c != NULL; c = c->next)
7861 if (rtx_equal_p (val, c->value))
7862 break;
7863
7864 gcc_assert (c);
7865
7866 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
7867 }
7868
7869 /* Check whether INSN is an execute. Return the label_ref to its
7870 execute target template if so, NULL_RTX otherwise. */
7871
7872 static rtx
7873 s390_execute_label (rtx insn)
7874 {
7875 if (NONJUMP_INSN_P (insn)
7876 && GET_CODE (PATTERN (insn)) == PARALLEL
7877 && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
7878 && XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE)
7879 return XVECEXP (XVECEXP (PATTERN (insn), 0, 0), 0, 2);
7880
7881 return NULL_RTX;
7882 }
7883
7884 /* Add execute target for INSN to the constant pool POOL. */
7885
7886 static void
7887 s390_add_execute (struct constant_pool *pool, rtx insn)
7888 {
7889 struct constant *c;
7890
7891 for (c = pool->execute; c != NULL; c = c->next)
7892 if (INSN_UID (insn) == INSN_UID (c->value))
7893 break;
7894
7895 if (c == NULL)
7896 {
7897 c = (struct constant *) xmalloc (sizeof *c);
7898 c->value = insn;
7899 c->label = gen_label_rtx ();
7900 c->next = pool->execute;
7901 pool->execute = c;
7902 pool->size += 6;
7903 }
7904 }
7905
7906 /* Find execute target for INSN in the constant pool POOL.
7907 Return an RTX describing the distance from the start of
7908 the pool to the location of the execute target. */
7909
7910 static rtx
7911 s390_find_execute (struct constant_pool *pool, rtx insn)
7912 {
7913 struct constant *c;
7914
7915 for (c = pool->execute; c != NULL; c = c->next)
7916 if (INSN_UID (insn) == INSN_UID (c->value))
7917 break;
7918
7919 gcc_assert (c);
7920
7921 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
7922 }
7923
7924 /* For an execute INSN, extract the execute target template. */
7925
7926 static rtx
7927 s390_execute_target (rtx insn)
7928 {
7929 rtx pattern = PATTERN (insn);
7930 gcc_assert (s390_execute_label (insn));
7931
7932 if (XVECLEN (pattern, 0) == 2)
7933 {
7934 pattern = copy_rtx (XVECEXP (pattern, 0, 1));
7935 }
7936 else
7937 {
7938 rtvec vec = rtvec_alloc (XVECLEN (pattern, 0) - 1);
7939 int i;
7940
7941 for (i = 0; i < XVECLEN (pattern, 0) - 1; i++)
7942 RTVEC_ELT (vec, i) = copy_rtx (XVECEXP (pattern, 0, i + 1));
7943
7944 pattern = gen_rtx_PARALLEL (VOIDmode, vec);
7945 }
7946
7947 return pattern;
7948 }
7949
7950 /* Indicate that INSN cannot be duplicated. This is the case for
7951 execute insns that carry a unique label. */
7952
7953 static bool
7954 s390_cannot_copy_insn_p (rtx_insn *insn)
7955 {
7956 rtx label = s390_execute_label (insn);
7957 return label && label != const0_rtx;
7958 }
7959
7960 /* Dump out the constants in POOL. If REMOTE_LABEL is true,
7961 do not emit the pool base label. */
7962
7963 static void
7964 s390_dump_pool (struct constant_pool *pool, bool remote_label)
7965 {
7966 struct constant *c;
7967 rtx_insn *insn = pool->pool_insn;
7968 int i;
7969
7970 /* Switch to rodata section. */
7971 if (TARGET_CPU_ZARCH)
7972 {
7973 insn = emit_insn_after (gen_pool_section_start (), insn);
7974 INSN_ADDRESSES_NEW (insn, -1);
7975 }
7976
7977 /* Ensure minimum pool alignment. */
7978 if (TARGET_CPU_ZARCH)
7979 insn = emit_insn_after (gen_pool_align (GEN_INT (8)), insn);
7980 else
7981 insn = emit_insn_after (gen_pool_align (GEN_INT (4)), insn);
7982 INSN_ADDRESSES_NEW (insn, -1);
7983
7984 /* Emit pool base label. */
7985 if (!remote_label)
7986 {
7987 insn = emit_label_after (pool->label, insn);
7988 INSN_ADDRESSES_NEW (insn, -1);
7989 }
7990
7991 /* Dump constants in descending alignment requirement order,
7992 ensuring proper alignment for every constant. */
7993 for (i = 0; i < NR_C_MODES; i++)
7994 for (c = pool->constants[i]; c; c = c->next)
7995 {
7996 /* Convert UNSPEC_LTREL_OFFSET unspecs to pool-relative references. */
7997 rtx value = copy_rtx (c->value);
7998 if (GET_CODE (value) == CONST
7999 && GET_CODE (XEXP (value, 0)) == UNSPEC
8000 && XINT (XEXP (value, 0), 1) == UNSPEC_LTREL_OFFSET
8001 && XVECLEN (XEXP (value, 0), 0) == 1)
8002 value = s390_pool_offset (pool, XVECEXP (XEXP (value, 0), 0, 0));
8003
8004 insn = emit_label_after (c->label, insn);
8005 INSN_ADDRESSES_NEW (insn, -1);
8006
8007 value = gen_rtx_UNSPEC_VOLATILE (constant_modes[i],
8008 gen_rtvec (1, value),
8009 UNSPECV_POOL_ENTRY);
8010 insn = emit_insn_after (value, insn);
8011 INSN_ADDRESSES_NEW (insn, -1);
8012 }
8013
8014 /* Ensure minimum alignment for instructions. */
8015 insn = emit_insn_after (gen_pool_align (GEN_INT (2)), insn);
8016 INSN_ADDRESSES_NEW (insn, -1);
8017
8018 /* Output in-pool execute template insns. */
8019 for (c = pool->execute; c; c = c->next)
8020 {
8021 insn = emit_label_after (c->label, insn);
8022 INSN_ADDRESSES_NEW (insn, -1);
8023
8024 insn = emit_insn_after (s390_execute_target (c->value), insn);
8025 INSN_ADDRESSES_NEW (insn, -1);
8026 }
8027
8028 /* Switch back to previous section. */
8029 if (TARGET_CPU_ZARCH)
8030 {
8031 insn = emit_insn_after (gen_pool_section_end (), insn);
8032 INSN_ADDRESSES_NEW (insn, -1);
8033 }
8034
8035 insn = emit_barrier_after (insn);
8036 INSN_ADDRESSES_NEW (insn, -1);
8037
8038 /* Remove placeholder insn. */
8039 remove_insn (pool->pool_insn);
8040 }
8041
8042 /* Free all memory used by POOL. */
8043
8044 static void
8045 s390_free_pool (struct constant_pool *pool)
8046 {
8047 struct constant *c, *next;
8048 int i;
8049
8050 for (i = 0; i < NR_C_MODES; i++)
8051 for (c = pool->constants[i]; c; c = next)
8052 {
8053 next = c->next;
8054 free (c);
8055 }
8056
8057 for (c = pool->execute; c; c = next)
8058 {
8059 next = c->next;
8060 free (c);
8061 }
8062
8063 BITMAP_FREE (pool->insns);
8064 free (pool);
8065 }
8066
8067
8068 /* Collect main literal pool. Return NULL on overflow. */
8069
8070 static struct constant_pool *
8071 s390_mainpool_start (void)
8072 {
8073 struct constant_pool *pool;
8074 rtx_insn *insn;
8075
8076 pool = s390_alloc_pool ();
8077
8078 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8079 {
8080 if (NONJUMP_INSN_P (insn)
8081 && GET_CODE (PATTERN (insn)) == SET
8082 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC_VOLATILE
8083 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPECV_MAIN_POOL)
8084 {
8085 /* There might be two main_pool instructions if base_reg
8086 is call-clobbered; one for shrink-wrapped code and one
8087 for the rest. We want to keep the first. */
8088 if (pool->pool_insn)
8089 {
8090 insn = PREV_INSN (insn);
8091 delete_insn (NEXT_INSN (insn));
8092 continue;
8093 }
8094 pool->pool_insn = insn;
8095 }
8096
8097 if (!TARGET_CPU_ZARCH && s390_execute_label (insn))
8098 {
8099 s390_add_execute (pool, insn);
8100 }
8101 else if (NONJUMP_INSN_P (insn) || CALL_P (insn))
8102 {
8103 rtx pool_ref = NULL_RTX;
8104 find_constant_pool_ref (PATTERN (insn), &pool_ref);
8105 if (pool_ref)
8106 {
8107 rtx constant = get_pool_constant (pool_ref);
8108 machine_mode mode = get_pool_mode (pool_ref);
8109 s390_add_constant (pool, constant, mode);
8110 }
8111 }
8112
8113 /* If hot/cold partitioning is enabled we have to make sure that
8114 the literal pool is emitted in the same section where the
8115 initialization of the literal pool base pointer takes place.
8116 emit_pool_after is only used in the non-overflow case on non
8117 Z cpus where we can emit the literal pool at the end of the
8118 function body within the text section. */
8119 if (NOTE_P (insn)
8120 && NOTE_KIND (insn) == NOTE_INSN_SWITCH_TEXT_SECTIONS
8121 && !pool->emit_pool_after)
8122 pool->emit_pool_after = PREV_INSN (insn);
8123 }
8124
8125 gcc_assert (pool->pool_insn || pool->size == 0);
8126
8127 if (pool->size >= 4096)
8128 {
8129 /* We're going to chunkify the pool, so remove the main
8130 pool placeholder insn. */
8131 remove_insn (pool->pool_insn);
8132
8133 s390_free_pool (pool);
8134 pool = NULL;
8135 }
8136
8137 /* If the functions ends with the section where the literal pool
8138 should be emitted set the marker to its end. */
8139 if (pool && !pool->emit_pool_after)
8140 pool->emit_pool_after = get_last_insn ();
8141
8142 return pool;
8143 }
8144
8145 /* POOL holds the main literal pool as collected by s390_mainpool_start.
8146 Modify the current function to output the pool constants as well as
8147 the pool register setup instruction. */
8148
8149 static void
8150 s390_mainpool_finish (struct constant_pool *pool)
8151 {
8152 rtx base_reg = cfun->machine->base_reg;
8153
8154 /* If the pool is empty, we're done. */
8155 if (pool->size == 0)
8156 {
8157 /* We don't actually need a base register after all. */
8158 cfun->machine->base_reg = NULL_RTX;
8159
8160 if (pool->pool_insn)
8161 remove_insn (pool->pool_insn);
8162 s390_free_pool (pool);
8163 return;
8164 }
8165
8166 /* We need correct insn addresses. */
8167 shorten_branches (get_insns ());
8168
8169 /* On zSeries, we use a LARL to load the pool register. The pool is
8170 located in the .rodata section, so we emit it after the function. */
8171 if (TARGET_CPU_ZARCH)
8172 {
8173 rtx set = gen_main_base_64 (base_reg, pool->label);
8174 rtx_insn *insn = emit_insn_after (set, pool->pool_insn);
8175 INSN_ADDRESSES_NEW (insn, -1);
8176 remove_insn (pool->pool_insn);
8177
8178 insn = get_last_insn ();
8179 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
8180 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8181
8182 s390_dump_pool (pool, 0);
8183 }
8184
8185 /* On S/390, if the total size of the function's code plus literal pool
8186 does not exceed 4096 bytes, we use BASR to set up a function base
8187 pointer, and emit the literal pool at the end of the function. */
8188 else if (INSN_ADDRESSES (INSN_UID (pool->emit_pool_after))
8189 + pool->size + 8 /* alignment slop */ < 4096)
8190 {
8191 rtx set = gen_main_base_31_small (base_reg, pool->label);
8192 rtx_insn *insn = emit_insn_after (set, pool->pool_insn);
8193 INSN_ADDRESSES_NEW (insn, -1);
8194 remove_insn (pool->pool_insn);
8195
8196 insn = emit_label_after (pool->label, insn);
8197 INSN_ADDRESSES_NEW (insn, -1);
8198
8199 /* emit_pool_after will be set by s390_mainpool_start to the
8200 last insn of the section where the literal pool should be
8201 emitted. */
8202 insn = pool->emit_pool_after;
8203
8204 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
8205 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8206
8207 s390_dump_pool (pool, 1);
8208 }
8209
8210 /* Otherwise, we emit an inline literal pool and use BASR to branch
8211 over it, setting up the pool register at the same time. */
8212 else
8213 {
8214 rtx_code_label *pool_end = gen_label_rtx ();
8215
8216 rtx pat = gen_main_base_31_large (base_reg, pool->label, pool_end);
8217 rtx_insn *insn = emit_jump_insn_after (pat, pool->pool_insn);
8218 JUMP_LABEL (insn) = pool_end;
8219 INSN_ADDRESSES_NEW (insn, -1);
8220 remove_insn (pool->pool_insn);
8221
8222 insn = emit_label_after (pool->label, insn);
8223 INSN_ADDRESSES_NEW (insn, -1);
8224
8225 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
8226 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8227
8228 insn = emit_label_after (pool_end, pool->pool_insn);
8229 INSN_ADDRESSES_NEW (insn, -1);
8230
8231 s390_dump_pool (pool, 1);
8232 }
8233
8234
8235 /* Replace all literal pool references. */
8236
8237 for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
8238 {
8239 if (INSN_P (insn))
8240 replace_ltrel_base (&PATTERN (insn));
8241
8242 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
8243 {
8244 rtx addr, pool_ref = NULL_RTX;
8245 find_constant_pool_ref (PATTERN (insn), &pool_ref);
8246 if (pool_ref)
8247 {
8248 if (s390_execute_label (insn))
8249 addr = s390_find_execute (pool, insn);
8250 else
8251 addr = s390_find_constant (pool, get_pool_constant (pool_ref),
8252 get_pool_mode (pool_ref));
8253
8254 replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr);
8255 INSN_CODE (insn) = -1;
8256 }
8257 }
8258 }
8259
8260
8261 /* Free the pool. */
8262 s390_free_pool (pool);
8263 }
8264
8265 /* POOL holds the main literal pool as collected by s390_mainpool_start.
8266 We have decided we cannot use this pool, so revert all changes
8267 to the current function that were done by s390_mainpool_start. */
8268 static void
8269 s390_mainpool_cancel (struct constant_pool *pool)
8270 {
8271 /* We didn't actually change the instruction stream, so simply
8272 free the pool memory. */
8273 s390_free_pool (pool);
8274 }
8275
8276
8277 /* Chunkify the literal pool. */
8278
8279 #define S390_POOL_CHUNK_MIN 0xc00
8280 #define S390_POOL_CHUNK_MAX 0xe00
8281
8282 static struct constant_pool *
8283 s390_chunkify_start (void)
8284 {
8285 struct constant_pool *curr_pool = NULL, *pool_list = NULL;
8286 int extra_size = 0;
8287 bitmap far_labels;
8288 rtx pending_ltrel = NULL_RTX;
8289 rtx_insn *insn;
8290
8291 rtx (*gen_reload_base) (rtx, rtx) =
8292 TARGET_CPU_ZARCH? gen_reload_base_64 : gen_reload_base_31;
8293
8294
8295 /* We need correct insn addresses. */
8296
8297 shorten_branches (get_insns ());
8298
8299 /* Scan all insns and move literals to pool chunks. */
8300
8301 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8302 {
8303 bool section_switch_p = false;
8304
8305 /* Check for pending LTREL_BASE. */
8306 if (INSN_P (insn))
8307 {
8308 rtx ltrel_base = find_ltrel_base (PATTERN (insn));
8309 if (ltrel_base)
8310 {
8311 gcc_assert (ltrel_base == pending_ltrel);
8312 pending_ltrel = NULL_RTX;
8313 }
8314 }
8315
8316 if (!TARGET_CPU_ZARCH && s390_execute_label (insn))
8317 {
8318 if (!curr_pool)
8319 curr_pool = s390_start_pool (&pool_list, insn);
8320
8321 s390_add_execute (curr_pool, insn);
8322 s390_add_pool_insn (curr_pool, insn);
8323 }
8324 else if (NONJUMP_INSN_P (insn) || CALL_P (insn))
8325 {
8326 rtx pool_ref = NULL_RTX;
8327 find_constant_pool_ref (PATTERN (insn), &pool_ref);
8328 if (pool_ref)
8329 {
8330 rtx constant = get_pool_constant (pool_ref);
8331 machine_mode mode = get_pool_mode (pool_ref);
8332
8333 if (!curr_pool)
8334 curr_pool = s390_start_pool (&pool_list, insn);
8335
8336 s390_add_constant (curr_pool, constant, mode);
8337 s390_add_pool_insn (curr_pool, insn);
8338
8339 /* Don't split the pool chunk between a LTREL_OFFSET load
8340 and the corresponding LTREL_BASE. */
8341 if (GET_CODE (constant) == CONST
8342 && GET_CODE (XEXP (constant, 0)) == UNSPEC
8343 && XINT (XEXP (constant, 0), 1) == UNSPEC_LTREL_OFFSET)
8344 {
8345 gcc_assert (!pending_ltrel);
8346 pending_ltrel = pool_ref;
8347 }
8348 }
8349 }
8350
8351 if (JUMP_P (insn) || JUMP_TABLE_DATA_P (insn) || LABEL_P (insn))
8352 {
8353 if (curr_pool)
8354 s390_add_pool_insn (curr_pool, insn);
8355 /* An LTREL_BASE must follow within the same basic block. */
8356 gcc_assert (!pending_ltrel);
8357 }
8358
8359 if (NOTE_P (insn))
8360 switch (NOTE_KIND (insn))
8361 {
8362 case NOTE_INSN_SWITCH_TEXT_SECTIONS:
8363 section_switch_p = true;
8364 break;
8365 case NOTE_INSN_VAR_LOCATION:
8366 case NOTE_INSN_CALL_ARG_LOCATION:
8367 continue;
8368 default:
8369 break;
8370 }
8371
8372 if (!curr_pool
8373 || INSN_ADDRESSES_SIZE () <= (size_t) INSN_UID (insn)
8374 || INSN_ADDRESSES (INSN_UID (insn)) == -1)
8375 continue;
8376
8377 if (TARGET_CPU_ZARCH)
8378 {
8379 if (curr_pool->size < S390_POOL_CHUNK_MAX)
8380 continue;
8381
8382 s390_end_pool (curr_pool, NULL);
8383 curr_pool = NULL;
8384 }
8385 else
8386 {
8387 int chunk_size = INSN_ADDRESSES (INSN_UID (insn))
8388 - INSN_ADDRESSES (INSN_UID (curr_pool->first_insn))
8389 + extra_size;
8390
8391 /* We will later have to insert base register reload insns.
8392 Those will have an effect on code size, which we need to
8393 consider here. This calculation makes rather pessimistic
8394 worst-case assumptions. */
8395 if (LABEL_P (insn))
8396 extra_size += 6;
8397
8398 if (chunk_size < S390_POOL_CHUNK_MIN
8399 && curr_pool->size < S390_POOL_CHUNK_MIN
8400 && !section_switch_p)
8401 continue;
8402
8403 /* Pool chunks can only be inserted after BARRIERs ... */
8404 if (BARRIER_P (insn))
8405 {
8406 s390_end_pool (curr_pool, insn);
8407 curr_pool = NULL;
8408 extra_size = 0;
8409 }
8410
8411 /* ... so if we don't find one in time, create one. */
8412 else if (chunk_size > S390_POOL_CHUNK_MAX
8413 || curr_pool->size > S390_POOL_CHUNK_MAX
8414 || section_switch_p)
8415 {
8416 rtx_insn *label, *jump, *barrier, *next, *prev;
8417
8418 if (!section_switch_p)
8419 {
8420 /* We can insert the barrier only after a 'real' insn. */
8421 if (! NONJUMP_INSN_P (insn) && ! CALL_P (insn))
8422 continue;
8423 if (get_attr_length (insn) == 0)
8424 continue;
8425 /* Don't separate LTREL_BASE from the corresponding
8426 LTREL_OFFSET load. */
8427 if (pending_ltrel)
8428 continue;
8429 next = insn;
8430 do
8431 {
8432 insn = next;
8433 next = NEXT_INSN (insn);
8434 }
8435 while (next
8436 && NOTE_P (next)
8437 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
8438 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION));
8439 }
8440 else
8441 {
8442 gcc_assert (!pending_ltrel);
8443
8444 /* The old pool has to end before the section switch
8445 note in order to make it part of the current
8446 section. */
8447 insn = PREV_INSN (insn);
8448 }
8449
8450 label = gen_label_rtx ();
8451 prev = insn;
8452 if (prev && NOTE_P (prev))
8453 prev = prev_nonnote_insn (prev);
8454 if (prev)
8455 jump = emit_jump_insn_after_setloc (gen_jump (label), insn,
8456 INSN_LOCATION (prev));
8457 else
8458 jump = emit_jump_insn_after_noloc (gen_jump (label), insn);
8459 barrier = emit_barrier_after (jump);
8460 insn = emit_label_after (label, barrier);
8461 JUMP_LABEL (jump) = label;
8462 LABEL_NUSES (label) = 1;
8463
8464 INSN_ADDRESSES_NEW (jump, -1);
8465 INSN_ADDRESSES_NEW (barrier, -1);
8466 INSN_ADDRESSES_NEW (insn, -1);
8467
8468 s390_end_pool (curr_pool, barrier);
8469 curr_pool = NULL;
8470 extra_size = 0;
8471 }
8472 }
8473 }
8474
8475 if (curr_pool)
8476 s390_end_pool (curr_pool, NULL);
8477 gcc_assert (!pending_ltrel);
8478
8479 /* Find all labels that are branched into
8480 from an insn belonging to a different chunk. */
8481
8482 far_labels = BITMAP_ALLOC (NULL);
8483
8484 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8485 {
8486 rtx_jump_table_data *table;
8487
8488 /* Labels marked with LABEL_PRESERVE_P can be target
8489 of non-local jumps, so we have to mark them.
8490 The same holds for named labels.
8491
8492 Don't do that, however, if it is the label before
8493 a jump table. */
8494
8495 if (LABEL_P (insn)
8496 && (LABEL_PRESERVE_P (insn) || LABEL_NAME (insn)))
8497 {
8498 rtx_insn *vec_insn = NEXT_INSN (insn);
8499 if (! vec_insn || ! JUMP_TABLE_DATA_P (vec_insn))
8500 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (insn));
8501 }
8502 /* Check potential targets in a table jump (casesi_jump). */
8503 else if (tablejump_p (insn, NULL, &table))
8504 {
8505 rtx vec_pat = PATTERN (table);
8506 int i, diff_p = GET_CODE (vec_pat) == ADDR_DIFF_VEC;
8507
8508 for (i = 0; i < XVECLEN (vec_pat, diff_p); i++)
8509 {
8510 rtx label = XEXP (XVECEXP (vec_pat, diff_p, i), 0);
8511
8512 if (s390_find_pool (pool_list, label)
8513 != s390_find_pool (pool_list, insn))
8514 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
8515 }
8516 }
8517 /* If we have a direct jump (conditional or unconditional),
8518 check all potential targets. */
8519 else if (JUMP_P (insn))
8520 {
8521 rtx pat = PATTERN (insn);
8522
8523 if (GET_CODE (pat) == PARALLEL)
8524 pat = XVECEXP (pat, 0, 0);
8525
8526 if (GET_CODE (pat) == SET)
8527 {
8528 rtx label = JUMP_LABEL (insn);
8529 if (label && !ANY_RETURN_P (label))
8530 {
8531 if (s390_find_pool (pool_list, label)
8532 != s390_find_pool (pool_list, insn))
8533 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
8534 }
8535 }
8536 }
8537 }
8538
8539 /* Insert base register reload insns before every pool. */
8540
8541 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
8542 {
8543 rtx new_insn = gen_reload_base (cfun->machine->base_reg,
8544 curr_pool->label);
8545 rtx_insn *insn = curr_pool->first_insn;
8546 INSN_ADDRESSES_NEW (emit_insn_before (new_insn, insn), -1);
8547 }
8548
8549 /* Insert base register reload insns at every far label. */
8550
8551 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8552 if (LABEL_P (insn)
8553 && bitmap_bit_p (far_labels, CODE_LABEL_NUMBER (insn)))
8554 {
8555 struct constant_pool *pool = s390_find_pool (pool_list, insn);
8556 if (pool)
8557 {
8558 rtx new_insn = gen_reload_base (cfun->machine->base_reg,
8559 pool->label);
8560 INSN_ADDRESSES_NEW (emit_insn_after (new_insn, insn), -1);
8561 }
8562 }
8563
8564
8565 BITMAP_FREE (far_labels);
8566
8567
8568 /* Recompute insn addresses. */
8569
8570 init_insn_lengths ();
8571 shorten_branches (get_insns ());
8572
8573 return pool_list;
8574 }
8575
8576 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
8577 After we have decided to use this list, finish implementing
8578 all changes to the current function as required. */
8579
8580 static void
8581 s390_chunkify_finish (struct constant_pool *pool_list)
8582 {
8583 struct constant_pool *curr_pool = NULL;
8584 rtx_insn *insn;
8585
8586
8587 /* Replace all literal pool references. */
8588
8589 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8590 {
8591 if (INSN_P (insn))
8592 replace_ltrel_base (&PATTERN (insn));
8593
8594 curr_pool = s390_find_pool (pool_list, insn);
8595 if (!curr_pool)
8596 continue;
8597
8598 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
8599 {
8600 rtx addr, pool_ref = NULL_RTX;
8601 find_constant_pool_ref (PATTERN (insn), &pool_ref);
8602 if (pool_ref)
8603 {
8604 if (s390_execute_label (insn))
8605 addr = s390_find_execute (curr_pool, insn);
8606 else
8607 addr = s390_find_constant (curr_pool,
8608 get_pool_constant (pool_ref),
8609 get_pool_mode (pool_ref));
8610
8611 replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr);
8612 INSN_CODE (insn) = -1;
8613 }
8614 }
8615 }
8616
8617 /* Dump out all literal pools. */
8618
8619 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
8620 s390_dump_pool (curr_pool, 0);
8621
8622 /* Free pool list. */
8623
8624 while (pool_list)
8625 {
8626 struct constant_pool *next = pool_list->next;
8627 s390_free_pool (pool_list);
8628 pool_list = next;
8629 }
8630 }
8631
8632 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
8633 We have decided we cannot use this list, so revert all changes
8634 to the current function that were done by s390_chunkify_start. */
8635
8636 static void
8637 s390_chunkify_cancel (struct constant_pool *pool_list)
8638 {
8639 struct constant_pool *curr_pool = NULL;
8640 rtx_insn *insn;
8641
8642 /* Remove all pool placeholder insns. */
8643
8644 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
8645 {
8646 /* Did we insert an extra barrier? Remove it. */
8647 rtx_insn *barrier = PREV_INSN (curr_pool->pool_insn);
8648 rtx_insn *jump = barrier? PREV_INSN (barrier) : NULL;
8649 rtx_insn *label = NEXT_INSN (curr_pool->pool_insn);
8650
8651 if (jump && JUMP_P (jump)
8652 && barrier && BARRIER_P (barrier)
8653 && label && LABEL_P (label)
8654 && GET_CODE (PATTERN (jump)) == SET
8655 && SET_DEST (PATTERN (jump)) == pc_rtx
8656 && GET_CODE (SET_SRC (PATTERN (jump))) == LABEL_REF
8657 && XEXP (SET_SRC (PATTERN (jump)), 0) == label)
8658 {
8659 remove_insn (jump);
8660 remove_insn (barrier);
8661 remove_insn (label);
8662 }
8663
8664 remove_insn (curr_pool->pool_insn);
8665 }
8666
8667 /* Remove all base register reload insns. */
8668
8669 for (insn = get_insns (); insn; )
8670 {
8671 rtx_insn *next_insn = NEXT_INSN (insn);
8672
8673 if (NONJUMP_INSN_P (insn)
8674 && GET_CODE (PATTERN (insn)) == SET
8675 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
8676 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_RELOAD_BASE)
8677 remove_insn (insn);
8678
8679 insn = next_insn;
8680 }
8681
8682 /* Free pool list. */
8683
8684 while (pool_list)
8685 {
8686 struct constant_pool *next = pool_list->next;
8687 s390_free_pool (pool_list);
8688 pool_list = next;
8689 }
8690 }
8691
8692 /* Output the constant pool entry EXP in mode MODE with alignment ALIGN. */
8693
8694 void
8695 s390_output_pool_entry (rtx exp, machine_mode mode, unsigned int align)
8696 {
8697 REAL_VALUE_TYPE r;
8698
8699 switch (GET_MODE_CLASS (mode))
8700 {
8701 case MODE_FLOAT:
8702 case MODE_DECIMAL_FLOAT:
8703 gcc_assert (GET_CODE (exp) == CONST_DOUBLE);
8704
8705 REAL_VALUE_FROM_CONST_DOUBLE (r, exp);
8706 assemble_real (r, mode, align);
8707 break;
8708
8709 case MODE_INT:
8710 assemble_integer (exp, GET_MODE_SIZE (mode), align, 1);
8711 mark_symbol_refs_as_used (exp);
8712 break;
8713
8714 case MODE_VECTOR_INT:
8715 case MODE_VECTOR_FLOAT:
8716 {
8717 int i;
8718 machine_mode inner_mode;
8719 gcc_assert (GET_CODE (exp) == CONST_VECTOR);
8720
8721 inner_mode = GET_MODE_INNER (GET_MODE (exp));
8722 for (i = 0; i < XVECLEN (exp, 0); i++)
8723 s390_output_pool_entry (XVECEXP (exp, 0, i),
8724 inner_mode,
8725 i == 0
8726 ? align
8727 : GET_MODE_BITSIZE (inner_mode));
8728 }
8729 break;
8730
8731 default:
8732 gcc_unreachable ();
8733 }
8734 }
8735
8736
8737 /* Return an RTL expression representing the value of the return address
8738 for the frame COUNT steps up from the current frame. FRAME is the
8739 frame pointer of that frame. */
8740
8741 rtx
8742 s390_return_addr_rtx (int count, rtx frame ATTRIBUTE_UNUSED)
8743 {
8744 int offset;
8745 rtx addr;
8746
8747 /* Without backchain, we fail for all but the current frame. */
8748
8749 if (!TARGET_BACKCHAIN && count > 0)
8750 return NULL_RTX;
8751
8752 /* For the current frame, we need to make sure the initial
8753 value of RETURN_REGNUM is actually saved. */
8754
8755 if (count == 0)
8756 {
8757 /* On non-z architectures branch splitting could overwrite r14. */
8758 if (TARGET_CPU_ZARCH)
8759 return get_hard_reg_initial_val (Pmode, RETURN_REGNUM);
8760 else
8761 {
8762 cfun_frame_layout.save_return_addr_p = true;
8763 return gen_rtx_MEM (Pmode, return_address_pointer_rtx);
8764 }
8765 }
8766
8767 if (TARGET_PACKED_STACK)
8768 offset = -2 * UNITS_PER_LONG;
8769 else
8770 offset = RETURN_REGNUM * UNITS_PER_LONG;
8771
8772 addr = plus_constant (Pmode, frame, offset);
8773 addr = memory_address (Pmode, addr);
8774 return gen_rtx_MEM (Pmode, addr);
8775 }
8776
8777 /* Return an RTL expression representing the back chain stored in
8778 the current stack frame. */
8779
8780 rtx
8781 s390_back_chain_rtx (void)
8782 {
8783 rtx chain;
8784
8785 gcc_assert (TARGET_BACKCHAIN);
8786
8787 if (TARGET_PACKED_STACK)
8788 chain = plus_constant (Pmode, stack_pointer_rtx,
8789 STACK_POINTER_OFFSET - UNITS_PER_LONG);
8790 else
8791 chain = stack_pointer_rtx;
8792
8793 chain = gen_rtx_MEM (Pmode, chain);
8794 return chain;
8795 }
8796
8797 /* Find first call clobbered register unused in a function.
8798 This could be used as base register in a leaf function
8799 or for holding the return address before epilogue. */
8800
8801 static int
8802 find_unused_clobbered_reg (void)
8803 {
8804 int i;
8805 for (i = 0; i < 6; i++)
8806 if (!df_regs_ever_live_p (i))
8807 return i;
8808 return 0;
8809 }
8810
8811
8812 /* Helper function for s390_regs_ever_clobbered. Sets the fields in DATA for all
8813 clobbered hard regs in SETREG. */
8814
8815 static void
8816 s390_reg_clobbered_rtx (rtx setreg, const_rtx set_insn ATTRIBUTE_UNUSED, void *data)
8817 {
8818 char *regs_ever_clobbered = (char *)data;
8819 unsigned int i, regno;
8820 machine_mode mode = GET_MODE (setreg);
8821
8822 if (GET_CODE (setreg) == SUBREG)
8823 {
8824 rtx inner = SUBREG_REG (setreg);
8825 if (!GENERAL_REG_P (inner) && !FP_REG_P (inner))
8826 return;
8827 regno = subreg_regno (setreg);
8828 }
8829 else if (GENERAL_REG_P (setreg) || FP_REG_P (setreg))
8830 regno = REGNO (setreg);
8831 else
8832 return;
8833
8834 for (i = regno;
8835 i < regno + HARD_REGNO_NREGS (regno, mode);
8836 i++)
8837 regs_ever_clobbered[i] = 1;
8838 }
8839
8840 /* Walks through all basic blocks of the current function looking
8841 for clobbered hard regs using s390_reg_clobbered_rtx. The fields
8842 of the passed integer array REGS_EVER_CLOBBERED are set to one for
8843 each of those regs. */
8844
8845 static void
8846 s390_regs_ever_clobbered (char regs_ever_clobbered[])
8847 {
8848 basic_block cur_bb;
8849 rtx_insn *cur_insn;
8850 unsigned int i;
8851
8852 memset (regs_ever_clobbered, 0, 32);
8853
8854 /* For non-leaf functions we have to consider all call clobbered regs to be
8855 clobbered. */
8856 if (!crtl->is_leaf)
8857 {
8858 for (i = 0; i < 32; i++)
8859 regs_ever_clobbered[i] = call_really_used_regs[i];
8860 }
8861
8862 /* Make the "magic" eh_return registers live if necessary. For regs_ever_live
8863 this work is done by liveness analysis (mark_regs_live_at_end).
8864 Special care is needed for functions containing landing pads. Landing pads
8865 may use the eh registers, but the code which sets these registers is not
8866 contained in that function. Hence s390_regs_ever_clobbered is not able to
8867 deal with this automatically. */
8868 if (crtl->calls_eh_return || cfun->machine->has_landing_pad_p)
8869 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM ; i++)
8870 if (crtl->calls_eh_return
8871 || (cfun->machine->has_landing_pad_p
8872 && df_regs_ever_live_p (EH_RETURN_DATA_REGNO (i))))
8873 regs_ever_clobbered[EH_RETURN_DATA_REGNO (i)] = 1;
8874
8875 /* For nonlocal gotos all call-saved registers have to be saved.
8876 This flag is also set for the unwinding code in libgcc.
8877 See expand_builtin_unwind_init. For regs_ever_live this is done by
8878 reload. */
8879 if (crtl->saves_all_registers)
8880 for (i = 0; i < 32; i++)
8881 if (!call_really_used_regs[i])
8882 regs_ever_clobbered[i] = 1;
8883
8884 FOR_EACH_BB_FN (cur_bb, cfun)
8885 {
8886 FOR_BB_INSNS (cur_bb, cur_insn)
8887 {
8888 rtx pat;
8889
8890 if (!INSN_P (cur_insn))
8891 continue;
8892
8893 pat = PATTERN (cur_insn);
8894
8895 /* Ignore GPR restore insns. */
8896 if (epilogue_completed && RTX_FRAME_RELATED_P (cur_insn))
8897 {
8898 if (GET_CODE (pat) == SET
8899 && GENERAL_REG_P (SET_DEST (pat)))
8900 {
8901 /* lgdr */
8902 if (GET_MODE (SET_SRC (pat)) == DImode
8903 && FP_REG_P (SET_SRC (pat)))
8904 continue;
8905
8906 /* l / lg */
8907 if (GET_CODE (SET_SRC (pat)) == MEM)
8908 continue;
8909 }
8910
8911 /* lm / lmg */
8912 if (GET_CODE (pat) == PARALLEL
8913 && load_multiple_operation (pat, VOIDmode))
8914 continue;
8915 }
8916
8917 note_stores (pat,
8918 s390_reg_clobbered_rtx,
8919 regs_ever_clobbered);
8920 }
8921 }
8922 }
8923
8924 /* Determine the frame area which actually has to be accessed
8925 in the function epilogue. The values are stored at the
8926 given pointers AREA_BOTTOM (address of the lowest used stack
8927 address) and AREA_TOP (address of the first item which does
8928 not belong to the stack frame). */
8929
8930 static void
8931 s390_frame_area (int *area_bottom, int *area_top)
8932 {
8933 int b, t;
8934
8935 b = INT_MAX;
8936 t = INT_MIN;
8937
8938 if (cfun_frame_layout.first_restore_gpr != -1)
8939 {
8940 b = (cfun_frame_layout.gprs_offset
8941 + cfun_frame_layout.first_restore_gpr * UNITS_PER_LONG);
8942 t = b + (cfun_frame_layout.last_restore_gpr
8943 - cfun_frame_layout.first_restore_gpr + 1) * UNITS_PER_LONG;
8944 }
8945
8946 if (TARGET_64BIT && cfun_save_high_fprs_p)
8947 {
8948 b = MIN (b, cfun_frame_layout.f8_offset);
8949 t = MAX (t, (cfun_frame_layout.f8_offset
8950 + cfun_frame_layout.high_fprs * 8));
8951 }
8952
8953 if (!TARGET_64BIT)
8954 {
8955 if (cfun_fpr_save_p (FPR4_REGNUM))
8956 {
8957 b = MIN (b, cfun_frame_layout.f4_offset);
8958 t = MAX (t, cfun_frame_layout.f4_offset + 8);
8959 }
8960 if (cfun_fpr_save_p (FPR6_REGNUM))
8961 {
8962 b = MIN (b, cfun_frame_layout.f4_offset + 8);
8963 t = MAX (t, cfun_frame_layout.f4_offset + 16);
8964 }
8965 }
8966 *area_bottom = b;
8967 *area_top = t;
8968 }
8969 /* Update gpr_save_slots in the frame layout trying to make use of
8970 FPRs as GPR save slots.
8971 This is a helper routine of s390_register_info. */
8972
8973 static void
8974 s390_register_info_gprtofpr ()
8975 {
8976 int save_reg_slot = FPR0_REGNUM;
8977 int i, j;
8978
8979 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
8980 return;
8981
8982 for (i = 15; i >= 6; i--)
8983 {
8984 if (cfun_gpr_save_slot (i) == 0)
8985 continue;
8986
8987 /* Advance to the next FP register which can be used as a
8988 GPR save slot. */
8989 while ((!call_really_used_regs[save_reg_slot]
8990 || df_regs_ever_live_p (save_reg_slot)
8991 || cfun_fpr_save_p (save_reg_slot))
8992 && FP_REGNO_P (save_reg_slot))
8993 save_reg_slot++;
8994 if (!FP_REGNO_P (save_reg_slot))
8995 {
8996 /* We only want to use ldgr/lgdr if we can get rid of
8997 stm/lm entirely. So undo the gpr slot allocation in
8998 case we ran out of FPR save slots. */
8999 for (j = 6; j <= 15; j++)
9000 if (FP_REGNO_P (cfun_gpr_save_slot (j)))
9001 cfun_gpr_save_slot (j) = -1;
9002 break;
9003 }
9004 cfun_gpr_save_slot (i) = save_reg_slot++;
9005 }
9006 }
9007
9008 /* Set the bits in fpr_bitmap for FPRs which need to be saved due to
9009 stdarg.
9010 This is a helper routine for s390_register_info. */
9011
9012 static void
9013 s390_register_info_stdarg_fpr ()
9014 {
9015 int i;
9016 int min_fpr;
9017 int max_fpr;
9018
9019 /* Save the FP argument regs for stdarg. f0, f2 for 31 bit and
9020 f0-f4 for 64 bit. */
9021 if (!cfun->stdarg
9022 || !TARGET_HARD_FLOAT
9023 || !cfun->va_list_fpr_size
9024 || crtl->args.info.fprs >= FP_ARG_NUM_REG)
9025 return;
9026
9027 min_fpr = crtl->args.info.fprs;
9028 max_fpr = min_fpr + cfun->va_list_fpr_size;
9029 if (max_fpr > FP_ARG_NUM_REG)
9030 max_fpr = FP_ARG_NUM_REG;
9031
9032 for (i = min_fpr; i < max_fpr; i++)
9033 cfun_set_fpr_save (i + FPR0_REGNUM);
9034 }
9035
9036 /* Reserve the GPR save slots for GPRs which need to be saved due to
9037 stdarg.
9038 This is a helper routine for s390_register_info. */
9039
9040 static void
9041 s390_register_info_stdarg_gpr ()
9042 {
9043 int i;
9044 int min_gpr;
9045 int max_gpr;
9046
9047 if (!cfun->stdarg
9048 || !cfun->va_list_gpr_size
9049 || crtl->args.info.gprs >= GP_ARG_NUM_REG)
9050 return;
9051
9052 min_gpr = crtl->args.info.gprs;
9053 max_gpr = min_gpr + cfun->va_list_gpr_size;
9054 if (max_gpr > GP_ARG_NUM_REG)
9055 max_gpr = GP_ARG_NUM_REG;
9056
9057 for (i = min_gpr; i < max_gpr; i++)
9058 cfun_gpr_save_slot (2 + i) = -1;
9059 }
9060
9061 /* The GPR and FPR save slots in cfun->machine->frame_layout are set
9062 for registers which need to be saved in function prologue.
9063 This function can be used until the insns emitted for save/restore
9064 of the regs are visible in the RTL stream. */
9065
9066 static void
9067 s390_register_info ()
9068 {
9069 int i, j;
9070 char clobbered_regs[32];
9071
9072 gcc_assert (!epilogue_completed);
9073
9074 if (reload_completed)
9075 /* After reload we rely on our own routine to determine which
9076 registers need saving. */
9077 s390_regs_ever_clobbered (clobbered_regs);
9078 else
9079 /* During reload we use regs_ever_live as a base since reload
9080 does changes in there which we otherwise would not be aware
9081 of. */
9082 for (i = 0; i < 32; i++)
9083 clobbered_regs[i] = df_regs_ever_live_p (i);
9084
9085 for (i = 0; i < 32; i++)
9086 clobbered_regs[i] = clobbered_regs[i] && !global_regs[i];
9087
9088 /* Mark the call-saved FPRs which need to be saved.
9089 This needs to be done before checking the special GPRs since the
9090 stack pointer usage depends on whether high FPRs have to be saved
9091 or not. */
9092 cfun_frame_layout.fpr_bitmap = 0;
9093 cfun_frame_layout.high_fprs = 0;
9094 for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
9095 if (clobbered_regs[i] && !call_really_used_regs[i])
9096 {
9097 cfun_set_fpr_save (i);
9098 if (i >= FPR8_REGNUM)
9099 cfun_frame_layout.high_fprs++;
9100 }
9101
9102 if (flag_pic)
9103 clobbered_regs[PIC_OFFSET_TABLE_REGNUM]
9104 |= !!df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
9105
9106 clobbered_regs[BASE_REGNUM]
9107 |= (cfun->machine->base_reg
9108 && REGNO (cfun->machine->base_reg) == BASE_REGNUM);
9109
9110 clobbered_regs[HARD_FRAME_POINTER_REGNUM]
9111 |= !!frame_pointer_needed;
9112
9113 /* On pre z900 machines this might take until machine dependent
9114 reorg to decide.
9115 save_return_addr_p will only be set on non-zarch machines so
9116 there is no risk that r14 goes into an FPR instead of a stack
9117 slot. */
9118 clobbered_regs[RETURN_REGNUM]
9119 |= (!crtl->is_leaf
9120 || TARGET_TPF_PROFILING
9121 || cfun->machine->split_branches_pending_p
9122 || cfun_frame_layout.save_return_addr_p
9123 || crtl->calls_eh_return);
9124
9125 clobbered_regs[STACK_POINTER_REGNUM]
9126 |= (!crtl->is_leaf
9127 || TARGET_TPF_PROFILING
9128 || cfun_save_high_fprs_p
9129 || get_frame_size () > 0
9130 || (reload_completed && cfun_frame_layout.frame_size > 0)
9131 || cfun->calls_alloca);
9132
9133 memset (cfun_frame_layout.gpr_save_slots, 0, 16);
9134
9135 for (i = 6; i < 16; i++)
9136 if (clobbered_regs[i])
9137 cfun_gpr_save_slot (i) = -1;
9138
9139 s390_register_info_stdarg_fpr ();
9140 s390_register_info_gprtofpr ();
9141
9142 /* First find the range of GPRs to be restored. Vararg regs don't
9143 need to be restored so we do it before assigning slots to the
9144 vararg GPRs. */
9145 for (i = 0; i < 16 && cfun_gpr_save_slot (i) != -1; i++);
9146 for (j = 15; j > i && cfun_gpr_save_slot (j) != -1; j--);
9147 cfun_frame_layout.first_restore_gpr = (i == 16) ? -1 : i;
9148 cfun_frame_layout.last_restore_gpr = (i == 16) ? -1 : j;
9149
9150 /* stdarg functions might need to save GPRs 2 to 6. This might
9151 override the GPR->FPR save decision made above for r6 since
9152 vararg regs must go to the stack. */
9153 s390_register_info_stdarg_gpr ();
9154
9155 /* Now the range of GPRs which need saving. */
9156 for (i = 0; i < 16 && cfun_gpr_save_slot (i) != -1; i++);
9157 for (j = 15; j > i && cfun_gpr_save_slot (j) != -1; j--);
9158 cfun_frame_layout.first_save_gpr = (i == 16) ? -1 : i;
9159 cfun_frame_layout.last_save_gpr = (i == 16) ? -1 : j;
9160 }
9161
9162 /* This function is called by s390_optimize_prologue in order to get
9163 rid of unnecessary GPR save/restore instructions. The register info
9164 for the GPRs is re-computed and the ranges are re-calculated. */
9165
9166 static void
9167 s390_optimize_register_info ()
9168 {
9169 char clobbered_regs[32];
9170 int i, j;
9171
9172 gcc_assert (epilogue_completed);
9173 gcc_assert (!cfun->machine->split_branches_pending_p);
9174
9175 s390_regs_ever_clobbered (clobbered_regs);
9176
9177 for (i = 0; i < 32; i++)
9178 clobbered_regs[i] = clobbered_regs[i] && !global_regs[i];
9179
9180 /* There is still special treatment needed for cases invisible to
9181 s390_regs_ever_clobbered. */
9182 clobbered_regs[RETURN_REGNUM]
9183 |= (TARGET_TPF_PROFILING
9184 /* When expanding builtin_return_addr in ESA mode we do not
9185 know whether r14 will later be needed as scratch reg when
9186 doing branch splitting. So the builtin always accesses the
9187 r14 save slot and we need to stick to the save/restore
9188 decision for r14 even if it turns out that it didn't get
9189 clobbered. */
9190 || cfun_frame_layout.save_return_addr_p
9191 || crtl->calls_eh_return);
9192
9193 memset (cfun_frame_layout.gpr_save_slots, 0, 6);
9194
9195 for (i = 6; i < 16; i++)
9196 if (!clobbered_regs[i])
9197 cfun_gpr_save_slot (i) = 0;
9198
9199 for (i = 0; i < 16 && cfun_gpr_save_slot (i) != -1; i++);
9200 for (j = 15; j > i && cfun_gpr_save_slot (j) != -1; j--);
9201 cfun_frame_layout.first_restore_gpr = (i == 16) ? -1 : i;
9202 cfun_frame_layout.last_restore_gpr = (i == 16) ? -1 : j;
9203
9204 s390_register_info_stdarg_gpr ();
9205
9206 for (i = 0; i < 16 && cfun_gpr_save_slot (i) != -1; i++);
9207 for (j = 15; j > i && cfun_gpr_save_slot (j) != -1; j--);
9208 cfun_frame_layout.first_save_gpr = (i == 16) ? -1 : i;
9209 cfun_frame_layout.last_save_gpr = (i == 16) ? -1 : j;
9210 }
9211
9212 /* Fill cfun->machine with info about frame of current function. */
9213
9214 static void
9215 s390_frame_info (void)
9216 {
9217 HOST_WIDE_INT lowest_offset;
9218
9219 cfun_frame_layout.first_save_gpr_slot = cfun_frame_layout.first_save_gpr;
9220 cfun_frame_layout.last_save_gpr_slot = cfun_frame_layout.last_save_gpr;
9221
9222 /* The va_arg builtin uses a constant distance of 16 *
9223 UNITS_PER_LONG (r0-r15) to reach the FPRs from the reg_save_area
9224 pointer. So even if we are going to save the stack pointer in an
9225 FPR we need the stack space in order to keep the offsets
9226 correct. */
9227 if (cfun->stdarg && cfun_save_arg_fprs_p)
9228 {
9229 cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
9230
9231 if (cfun_frame_layout.first_save_gpr_slot == -1)
9232 cfun_frame_layout.first_save_gpr_slot = STACK_POINTER_REGNUM;
9233 }
9234
9235 cfun_frame_layout.frame_size = get_frame_size ();
9236 if (!TARGET_64BIT && cfun_frame_layout.frame_size > 0x7fff0000)
9237 fatal_error (input_location,
9238 "total size of local variables exceeds architecture limit");
9239
9240 if (!TARGET_PACKED_STACK)
9241 {
9242 /* Fixed stack layout. */
9243 cfun_frame_layout.backchain_offset = 0;
9244 cfun_frame_layout.f0_offset = 16 * UNITS_PER_LONG;
9245 cfun_frame_layout.f4_offset = cfun_frame_layout.f0_offset + 2 * 8;
9246 cfun_frame_layout.f8_offset = -cfun_frame_layout.high_fprs * 8;
9247 cfun_frame_layout.gprs_offset = (cfun_frame_layout.first_save_gpr_slot
9248 * UNITS_PER_LONG);
9249 }
9250 else if (TARGET_BACKCHAIN)
9251 {
9252 /* Kernel stack layout - packed stack, backchain, no float */
9253 gcc_assert (TARGET_SOFT_FLOAT);
9254 cfun_frame_layout.backchain_offset = (STACK_POINTER_OFFSET
9255 - UNITS_PER_LONG);
9256
9257 /* The distance between the backchain and the return address
9258 save slot must not change. So we always need a slot for the
9259 stack pointer which resides in between. */
9260 cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
9261
9262 cfun_frame_layout.gprs_offset
9263 = cfun_frame_layout.backchain_offset - cfun_gprs_save_area_size;
9264
9265 /* FPRs will not be saved. Nevertheless pick sane values to
9266 keep area calculations valid. */
9267 cfun_frame_layout.f0_offset =
9268 cfun_frame_layout.f4_offset =
9269 cfun_frame_layout.f8_offset = cfun_frame_layout.gprs_offset;
9270 }
9271 else
9272 {
9273 int num_fprs;
9274
9275 /* Packed stack layout without backchain. */
9276
9277 /* With stdarg FPRs need their dedicated slots. */
9278 num_fprs = (TARGET_64BIT && cfun->stdarg ? 2
9279 : (cfun_fpr_save_p (FPR4_REGNUM) +
9280 cfun_fpr_save_p (FPR6_REGNUM)));
9281 cfun_frame_layout.f4_offset = STACK_POINTER_OFFSET - 8 * num_fprs;
9282
9283 num_fprs = (cfun->stdarg ? 2
9284 : (cfun_fpr_save_p (FPR0_REGNUM)
9285 + cfun_fpr_save_p (FPR2_REGNUM)));
9286 cfun_frame_layout.f0_offset = cfun_frame_layout.f4_offset - 8 * num_fprs;
9287
9288 cfun_frame_layout.gprs_offset
9289 = cfun_frame_layout.f0_offset - cfun_gprs_save_area_size;
9290
9291 cfun_frame_layout.f8_offset = (cfun_frame_layout.gprs_offset
9292 - cfun_frame_layout.high_fprs * 8);
9293 }
9294
9295 if (cfun_save_high_fprs_p)
9296 cfun_frame_layout.frame_size += cfun_frame_layout.high_fprs * 8;
9297
9298 if (!crtl->is_leaf)
9299 cfun_frame_layout.frame_size += crtl->outgoing_args_size;
9300
9301 /* In the following cases we have to allocate a STACK_POINTER_OFFSET
9302 sized area at the bottom of the stack. This is required also for
9303 leaf functions. When GCC generates a local stack reference it
9304 will always add STACK_POINTER_OFFSET to all these references. */
9305 if (crtl->is_leaf
9306 && !TARGET_TPF_PROFILING
9307 && cfun_frame_layout.frame_size == 0
9308 && !cfun->calls_alloca)
9309 return;
9310
9311 /* Calculate the number of bytes we have used in our own register
9312 save area. With the packed stack layout we can re-use the
9313 remaining bytes for normal stack elements. */
9314
9315 if (TARGET_PACKED_STACK)
9316 lowest_offset = MIN (MIN (cfun_frame_layout.f0_offset,
9317 cfun_frame_layout.f4_offset),
9318 cfun_frame_layout.gprs_offset);
9319 else
9320 lowest_offset = 0;
9321
9322 if (TARGET_BACKCHAIN)
9323 lowest_offset = MIN (lowest_offset, cfun_frame_layout.backchain_offset);
9324
9325 cfun_frame_layout.frame_size += STACK_POINTER_OFFSET - lowest_offset;
9326
9327 /* If under 31 bit an odd number of gprs has to be saved we have to
9328 adjust the frame size to sustain 8 byte alignment of stack
9329 frames. */
9330 cfun_frame_layout.frame_size = ((cfun_frame_layout.frame_size +
9331 STACK_BOUNDARY / BITS_PER_UNIT - 1)
9332 & ~(STACK_BOUNDARY / BITS_PER_UNIT - 1));
9333 }
9334
9335 /* Generate frame layout. Fills in register and frame data for the current
9336 function in cfun->machine. This routine can be called multiple times;
9337 it will re-do the complete frame layout every time. */
9338
9339 static void
9340 s390_init_frame_layout (void)
9341 {
9342 HOST_WIDE_INT frame_size;
9343 int base_used;
9344
9345 gcc_assert (!reload_completed);
9346
9347 /* On S/390 machines, we may need to perform branch splitting, which
9348 will require both base and return address register. We have no
9349 choice but to assume we're going to need them until right at the
9350 end of the machine dependent reorg phase. */
9351 if (!TARGET_CPU_ZARCH)
9352 cfun->machine->split_branches_pending_p = true;
9353
9354 do
9355 {
9356 frame_size = cfun_frame_layout.frame_size;
9357
9358 /* Try to predict whether we'll need the base register. */
9359 base_used = cfun->machine->split_branches_pending_p
9360 || crtl->uses_const_pool
9361 || (!DISP_IN_RANGE (frame_size)
9362 && !CONST_OK_FOR_K (frame_size));
9363
9364 /* Decide which register to use as literal pool base. In small
9365 leaf functions, try to use an unused call-clobbered register
9366 as base register to avoid save/restore overhead. */
9367 if (!base_used)
9368 cfun->machine->base_reg = NULL_RTX;
9369 else if (crtl->is_leaf && !df_regs_ever_live_p (5))
9370 cfun->machine->base_reg = gen_rtx_REG (Pmode, 5);
9371 else
9372 cfun->machine->base_reg = gen_rtx_REG (Pmode, BASE_REGNUM);
9373
9374 s390_register_info ();
9375 s390_frame_info ();
9376 }
9377 while (frame_size != cfun_frame_layout.frame_size);
9378 }
9379
9380 /* Remove the FPR clobbers from a tbegin insn if it can be proven that
9381 the TX is nonescaping. A transaction is considered escaping if
9382 there is at least one path from tbegin returning CC0 to the
9383 function exit block without an tend.
9384
9385 The check so far has some limitations:
9386 - only single tbegin/tend BBs are supported
9387 - the first cond jump after tbegin must separate the CC0 path from ~CC0
9388 - when CC is copied to a GPR and the CC0 check is done with the GPR
9389 this is not supported
9390 */
9391
9392 static void
9393 s390_optimize_nonescaping_tx (void)
9394 {
9395 const unsigned int CC0 = 1 << 3;
9396 basic_block tbegin_bb = NULL;
9397 basic_block tend_bb = NULL;
9398 basic_block bb;
9399 rtx_insn *insn;
9400 bool result = true;
9401 int bb_index;
9402 rtx_insn *tbegin_insn = NULL;
9403
9404 if (!cfun->machine->tbegin_p)
9405 return;
9406
9407 for (bb_index = 0; bb_index < n_basic_blocks_for_fn (cfun); bb_index++)
9408 {
9409 bb = BASIC_BLOCK_FOR_FN (cfun, bb_index);
9410
9411 if (!bb)
9412 continue;
9413
9414 FOR_BB_INSNS (bb, insn)
9415 {
9416 rtx ite, cc, pat, target;
9417 unsigned HOST_WIDE_INT mask;
9418
9419 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
9420 continue;
9421
9422 pat = PATTERN (insn);
9423
9424 if (GET_CODE (pat) == PARALLEL)
9425 pat = XVECEXP (pat, 0, 0);
9426
9427 if (GET_CODE (pat) != SET
9428 || GET_CODE (SET_SRC (pat)) != UNSPEC_VOLATILE)
9429 continue;
9430
9431 if (XINT (SET_SRC (pat), 1) == UNSPECV_TBEGIN)
9432 {
9433 rtx_insn *tmp;
9434
9435 tbegin_insn = insn;
9436
9437 /* Just return if the tbegin doesn't have clobbers. */
9438 if (GET_CODE (PATTERN (insn)) != PARALLEL)
9439 return;
9440
9441 if (tbegin_bb != NULL)
9442 return;
9443
9444 /* Find the next conditional jump. */
9445 for (tmp = NEXT_INSN (insn);
9446 tmp != NULL_RTX;
9447 tmp = NEXT_INSN (tmp))
9448 {
9449 if (reg_set_p (gen_rtx_REG (CCmode, CC_REGNUM), tmp))
9450 return;
9451 if (!JUMP_P (tmp))
9452 continue;
9453
9454 ite = SET_SRC (PATTERN (tmp));
9455 if (GET_CODE (ite) != IF_THEN_ELSE)
9456 continue;
9457
9458 cc = XEXP (XEXP (ite, 0), 0);
9459 if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc))
9460 || GET_MODE (cc) != CCRAWmode
9461 || GET_CODE (XEXP (XEXP (ite, 0), 1)) != CONST_INT)
9462 return;
9463
9464 if (bb->succs->length () != 2)
9465 return;
9466
9467 mask = INTVAL (XEXP (XEXP (ite, 0), 1));
9468 if (GET_CODE (XEXP (ite, 0)) == NE)
9469 mask ^= 0xf;
9470
9471 if (mask == CC0)
9472 target = XEXP (ite, 1);
9473 else if (mask == (CC0 ^ 0xf))
9474 target = XEXP (ite, 2);
9475 else
9476 return;
9477
9478 {
9479 edge_iterator ei;
9480 edge e1, e2;
9481
9482 ei = ei_start (bb->succs);
9483 e1 = ei_safe_edge (ei);
9484 ei_next (&ei);
9485 e2 = ei_safe_edge (ei);
9486
9487 if (e2->flags & EDGE_FALLTHRU)
9488 {
9489 e2 = e1;
9490 e1 = ei_safe_edge (ei);
9491 }
9492
9493 if (!(e1->flags & EDGE_FALLTHRU))
9494 return;
9495
9496 tbegin_bb = (target == pc_rtx) ? e1->dest : e2->dest;
9497 }
9498 if (tmp == BB_END (bb))
9499 break;
9500 }
9501 }
9502
9503 if (XINT (SET_SRC (pat), 1) == UNSPECV_TEND)
9504 {
9505 if (tend_bb != NULL)
9506 return;
9507 tend_bb = bb;
9508 }
9509 }
9510 }
9511
9512 /* Either we successfully remove the FPR clobbers here or we are not
9513 able to do anything for this TX. Both cases don't qualify for
9514 another look. */
9515 cfun->machine->tbegin_p = false;
9516
9517 if (tbegin_bb == NULL || tend_bb == NULL)
9518 return;
9519
9520 calculate_dominance_info (CDI_POST_DOMINATORS);
9521 result = dominated_by_p (CDI_POST_DOMINATORS, tbegin_bb, tend_bb);
9522 free_dominance_info (CDI_POST_DOMINATORS);
9523
9524 if (!result)
9525 return;
9526
9527 PATTERN (tbegin_insn) = gen_rtx_PARALLEL (VOIDmode,
9528 gen_rtvec (2,
9529 XVECEXP (PATTERN (tbegin_insn), 0, 0),
9530 XVECEXP (PATTERN (tbegin_insn), 0, 1)));
9531 INSN_CODE (tbegin_insn) = -1;
9532 df_insn_rescan (tbegin_insn);
9533
9534 return;
9535 }
9536
9537 /* Return true if it is legal to put a value with MODE into REGNO. */
9538
9539 bool
9540 s390_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
9541 {
9542 if (!TARGET_VX && VECTOR_NOFP_REGNO_P (regno))
9543 return false;
9544
9545 switch (REGNO_REG_CLASS (regno))
9546 {
9547 case VEC_REGS:
9548 return ((GET_MODE_CLASS (mode) == MODE_INT
9549 && s390_class_max_nregs (VEC_REGS, mode) == 1)
9550 || mode == DFmode
9551 || s390_vector_mode_supported_p (mode));
9552 break;
9553 case FP_REGS:
9554 if (TARGET_VX
9555 && ((GET_MODE_CLASS (mode) == MODE_INT
9556 && s390_class_max_nregs (FP_REGS, mode) == 1)
9557 || mode == DFmode
9558 || s390_vector_mode_supported_p (mode)))
9559 return true;
9560
9561 if (REGNO_PAIR_OK (regno, mode))
9562 {
9563 if (mode == SImode || mode == DImode)
9564 return true;
9565
9566 if (FLOAT_MODE_P (mode) && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
9567 return true;
9568 }
9569 break;
9570 case ADDR_REGS:
9571 if (FRAME_REGNO_P (regno) && mode == Pmode)
9572 return true;
9573
9574 /* fallthrough */
9575 case GENERAL_REGS:
9576 if (REGNO_PAIR_OK (regno, mode))
9577 {
9578 if (TARGET_ZARCH
9579 || (mode != TFmode && mode != TCmode && mode != TDmode))
9580 return true;
9581 }
9582 break;
9583 case CC_REGS:
9584 if (GET_MODE_CLASS (mode) == MODE_CC)
9585 return true;
9586 break;
9587 case ACCESS_REGS:
9588 if (REGNO_PAIR_OK (regno, mode))
9589 {
9590 if (mode == SImode || mode == Pmode)
9591 return true;
9592 }
9593 break;
9594 default:
9595 return false;
9596 }
9597
9598 return false;
9599 }
9600
9601 /* Return nonzero if register OLD_REG can be renamed to register NEW_REG. */
9602
9603 bool
9604 s390_hard_regno_rename_ok (unsigned int old_reg, unsigned int new_reg)
9605 {
9606 /* Once we've decided upon a register to use as base register, it must
9607 no longer be used for any other purpose. */
9608 if (cfun->machine->base_reg)
9609 if (REGNO (cfun->machine->base_reg) == old_reg
9610 || REGNO (cfun->machine->base_reg) == new_reg)
9611 return false;
9612
9613 /* Prevent regrename from using call-saved regs which haven't
9614 actually been saved. This is necessary since regrename assumes
9615 the backend save/restore decisions are based on
9616 df_regs_ever_live. Since we have our own routine we have to tell
9617 regrename manually about it. */
9618 if (GENERAL_REGNO_P (new_reg)
9619 && !call_really_used_regs[new_reg]
9620 && cfun_gpr_save_slot (new_reg) == 0)
9621 return false;
9622
9623 return true;
9624 }
9625
9626 /* Return nonzero if register REGNO can be used as a scratch register
9627 in peephole2. */
9628
9629 static bool
9630 s390_hard_regno_scratch_ok (unsigned int regno)
9631 {
9632 /* See s390_hard_regno_rename_ok. */
9633 if (GENERAL_REGNO_P (regno)
9634 && !call_really_used_regs[regno]
9635 && cfun_gpr_save_slot (regno) == 0)
9636 return false;
9637
9638 return true;
9639 }
9640
9641 /* Maximum number of registers to represent a value of mode MODE
9642 in a register of class RCLASS. */
9643
9644 int
9645 s390_class_max_nregs (enum reg_class rclass, machine_mode mode)
9646 {
9647 int reg_size;
9648 bool reg_pair_required_p = false;
9649
9650 switch (rclass)
9651 {
9652 case FP_REGS:
9653 case VEC_REGS:
9654 reg_size = TARGET_VX ? 16 : 8;
9655
9656 /* TF and TD modes would fit into a VR but we put them into a
9657 register pair since we do not have 128bit FP instructions on
9658 full VRs. */
9659 if (TARGET_VX
9660 && SCALAR_FLOAT_MODE_P (mode)
9661 && GET_MODE_SIZE (mode) >= 16)
9662 reg_pair_required_p = true;
9663
9664 /* Even if complex types would fit into a single FPR/VR we force
9665 them into a register pair to deal with the parts more easily.
9666 (FIXME: What about complex ints?) */
9667 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
9668 reg_pair_required_p = true;
9669 break;
9670 case ACCESS_REGS:
9671 reg_size = 4;
9672 break;
9673 default:
9674 reg_size = UNITS_PER_WORD;
9675 break;
9676 }
9677
9678 if (reg_pair_required_p)
9679 return 2 * ((GET_MODE_SIZE (mode) / 2 + reg_size - 1) / reg_size);
9680
9681 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
9682 }
9683
9684 /* Return TRUE if changing mode from FROM to TO should not be allowed
9685 for register class CLASS. */
9686
9687 int
9688 s390_cannot_change_mode_class (machine_mode from_mode,
9689 machine_mode to_mode,
9690 enum reg_class rclass)
9691 {
9692 machine_mode small_mode;
9693 machine_mode big_mode;
9694
9695 if (GET_MODE_SIZE (from_mode) == GET_MODE_SIZE (to_mode))
9696 return 0;
9697
9698 if (GET_MODE_SIZE (from_mode) < GET_MODE_SIZE (to_mode))
9699 {
9700 small_mode = from_mode;
9701 big_mode = to_mode;
9702 }
9703 else
9704 {
9705 small_mode = to_mode;
9706 big_mode = from_mode;
9707 }
9708
9709 /* Values residing in VRs are little-endian style. All modes are
9710 placed left-aligned in an VR. This means that we cannot allow
9711 switching between modes with differing sizes. Also if the vector
9712 facility is available we still place TFmode values in VR register
9713 pairs, since the only instructions we have operating on TFmodes
9714 only deal with register pairs. Therefore we have to allow DFmode
9715 subregs of TFmodes to enable the TFmode splitters. */
9716 if (reg_classes_intersect_p (VEC_REGS, rclass)
9717 && (GET_MODE_SIZE (small_mode) < 8
9718 || s390_class_max_nregs (VEC_REGS, big_mode) == 1))
9719 return 1;
9720
9721 /* Likewise for access registers, since they have only half the
9722 word size on 64-bit. */
9723 if (reg_classes_intersect_p (ACCESS_REGS, rclass))
9724 return 1;
9725
9726 return 0;
9727 }
9728
9729 /* Return true if we use LRA instead of reload pass. */
9730 static bool
9731 s390_lra_p (void)
9732 {
9733 return s390_lra_flag;
9734 }
9735
9736 /* Return true if register FROM can be eliminated via register TO. */
9737
9738 static bool
9739 s390_can_eliminate (const int from, const int to)
9740 {
9741 /* On zSeries machines, we have not marked the base register as fixed.
9742 Instead, we have an elimination rule BASE_REGNUM -> BASE_REGNUM.
9743 If a function requires the base register, we say here that this
9744 elimination cannot be performed. This will cause reload to free
9745 up the base register (as if it were fixed). On the other hand,
9746 if the current function does *not* require the base register, we
9747 say here the elimination succeeds, which in turn allows reload
9748 to allocate the base register for any other purpose. */
9749 if (from == BASE_REGNUM && to == BASE_REGNUM)
9750 {
9751 if (TARGET_CPU_ZARCH)
9752 {
9753 s390_init_frame_layout ();
9754 return cfun->machine->base_reg == NULL_RTX;
9755 }
9756
9757 return false;
9758 }
9759
9760 /* Everything else must point into the stack frame. */
9761 gcc_assert (to == STACK_POINTER_REGNUM
9762 || to == HARD_FRAME_POINTER_REGNUM);
9763
9764 gcc_assert (from == FRAME_POINTER_REGNUM
9765 || from == ARG_POINTER_REGNUM
9766 || from == RETURN_ADDRESS_POINTER_REGNUM);
9767
9768 /* Make sure we actually saved the return address. */
9769 if (from == RETURN_ADDRESS_POINTER_REGNUM)
9770 if (!crtl->calls_eh_return
9771 && !cfun->stdarg
9772 && !cfun_frame_layout.save_return_addr_p)
9773 return false;
9774
9775 return true;
9776 }
9777
9778 /* Return offset between register FROM and TO initially after prolog. */
9779
9780 HOST_WIDE_INT
9781 s390_initial_elimination_offset (int from, int to)
9782 {
9783 HOST_WIDE_INT offset;
9784
9785 /* ??? Why are we called for non-eliminable pairs? */
9786 if (!s390_can_eliminate (from, to))
9787 return 0;
9788
9789 switch (from)
9790 {
9791 case FRAME_POINTER_REGNUM:
9792 offset = (get_frame_size()
9793 + STACK_POINTER_OFFSET
9794 + crtl->outgoing_args_size);
9795 break;
9796
9797 case ARG_POINTER_REGNUM:
9798 s390_init_frame_layout ();
9799 offset = cfun_frame_layout.frame_size + STACK_POINTER_OFFSET;
9800 break;
9801
9802 case RETURN_ADDRESS_POINTER_REGNUM:
9803 s390_init_frame_layout ();
9804
9805 if (cfun_frame_layout.first_save_gpr_slot == -1)
9806 {
9807 /* If it turns out that for stdarg nothing went into the reg
9808 save area we also do not need the return address
9809 pointer. */
9810 if (cfun->stdarg && !cfun_save_arg_fprs_p)
9811 return 0;
9812
9813 gcc_unreachable ();
9814 }
9815
9816 /* In order to make the following work it is not necessary for
9817 r14 to have a save slot. It is sufficient if one other GPR
9818 got one. Since the GPRs are always stored without gaps we
9819 are able to calculate where the r14 save slot would
9820 reside. */
9821 offset = (cfun_frame_layout.frame_size + cfun_frame_layout.gprs_offset +
9822 (RETURN_REGNUM - cfun_frame_layout.first_save_gpr_slot) *
9823 UNITS_PER_LONG);
9824 break;
9825
9826 case BASE_REGNUM:
9827 offset = 0;
9828 break;
9829
9830 default:
9831 gcc_unreachable ();
9832 }
9833
9834 return offset;
9835 }
9836
9837 /* Emit insn to save fpr REGNUM at offset OFFSET relative
9838 to register BASE. Return generated insn. */
9839
9840 static rtx
9841 save_fpr (rtx base, int offset, int regnum)
9842 {
9843 rtx addr;
9844 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
9845
9846 if (regnum >= 16 && regnum <= (16 + FP_ARG_NUM_REG))
9847 set_mem_alias_set (addr, get_varargs_alias_set ());
9848 else
9849 set_mem_alias_set (addr, get_frame_alias_set ());
9850
9851 return emit_move_insn (addr, gen_rtx_REG (DFmode, regnum));
9852 }
9853
9854 /* Emit insn to restore fpr REGNUM from offset OFFSET relative
9855 to register BASE. Return generated insn. */
9856
9857 static rtx
9858 restore_fpr (rtx base, int offset, int regnum)
9859 {
9860 rtx addr;
9861 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
9862 set_mem_alias_set (addr, get_frame_alias_set ());
9863
9864 return emit_move_insn (gen_rtx_REG (DFmode, regnum), addr);
9865 }
9866
9867 /* Return true if REGNO is a global register, but not one
9868 of the special ones that need to be saved/restored in anyway. */
9869
9870 static inline bool
9871 global_not_special_regno_p (int regno)
9872 {
9873 return (global_regs[regno]
9874 /* These registers are special and need to be
9875 restored in any case. */
9876 && !(regno == STACK_POINTER_REGNUM
9877 || regno == RETURN_REGNUM
9878 || regno == BASE_REGNUM
9879 || (flag_pic && regno == (int)PIC_OFFSET_TABLE_REGNUM)));
9880 }
9881
9882 /* Generate insn to save registers FIRST to LAST into
9883 the register save area located at offset OFFSET
9884 relative to register BASE. */
9885
9886 static rtx
9887 save_gprs (rtx base, int offset, int first, int last)
9888 {
9889 rtx addr, insn, note;
9890 int i;
9891
9892 addr = plus_constant (Pmode, base, offset);
9893 addr = gen_rtx_MEM (Pmode, addr);
9894
9895 set_mem_alias_set (addr, get_frame_alias_set ());
9896
9897 /* Special-case single register. */
9898 if (first == last)
9899 {
9900 if (TARGET_64BIT)
9901 insn = gen_movdi (addr, gen_rtx_REG (Pmode, first));
9902 else
9903 insn = gen_movsi (addr, gen_rtx_REG (Pmode, first));
9904
9905 if (!global_not_special_regno_p (first))
9906 RTX_FRAME_RELATED_P (insn) = 1;
9907 return insn;
9908 }
9909
9910
9911 insn = gen_store_multiple (addr,
9912 gen_rtx_REG (Pmode, first),
9913 GEN_INT (last - first + 1));
9914
9915 if (first <= 6 && cfun->stdarg)
9916 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
9917 {
9918 rtx mem = XEXP (XVECEXP (PATTERN (insn), 0, i), 0);
9919
9920 if (first + i <= 6)
9921 set_mem_alias_set (mem, get_varargs_alias_set ());
9922 }
9923
9924 /* We need to set the FRAME_RELATED flag on all SETs
9925 inside the store-multiple pattern.
9926
9927 However, we must not emit DWARF records for registers 2..5
9928 if they are stored for use by variable arguments ...
9929
9930 ??? Unfortunately, it is not enough to simply not the
9931 FRAME_RELATED flags for those SETs, because the first SET
9932 of the PARALLEL is always treated as if it had the flag
9933 set, even if it does not. Therefore we emit a new pattern
9934 without those registers as REG_FRAME_RELATED_EXPR note. */
9935
9936 if (first >= 6 && !global_not_special_regno_p (first))
9937 {
9938 rtx pat = PATTERN (insn);
9939
9940 for (i = 0; i < XVECLEN (pat, 0); i++)
9941 if (GET_CODE (XVECEXP (pat, 0, i)) == SET
9942 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (pat,
9943 0, i)))))
9944 RTX_FRAME_RELATED_P (XVECEXP (pat, 0, i)) = 1;
9945
9946 RTX_FRAME_RELATED_P (insn) = 1;
9947 }
9948 else if (last >= 6)
9949 {
9950 int start;
9951
9952 for (start = first >= 6 ? first : 6; start <= last; start++)
9953 if (!global_not_special_regno_p (start))
9954 break;
9955
9956 if (start > last)
9957 return insn;
9958
9959 addr = plus_constant (Pmode, base,
9960 offset + (start - first) * UNITS_PER_LONG);
9961
9962 if (start == last)
9963 {
9964 if (TARGET_64BIT)
9965 note = gen_movdi (gen_rtx_MEM (Pmode, addr),
9966 gen_rtx_REG (Pmode, start));
9967 else
9968 note = gen_movsi (gen_rtx_MEM (Pmode, addr),
9969 gen_rtx_REG (Pmode, start));
9970 note = PATTERN (note);
9971
9972 add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
9973 RTX_FRAME_RELATED_P (insn) = 1;
9974
9975 return insn;
9976 }
9977
9978 note = gen_store_multiple (gen_rtx_MEM (Pmode, addr),
9979 gen_rtx_REG (Pmode, start),
9980 GEN_INT (last - start + 1));
9981 note = PATTERN (note);
9982
9983 add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
9984
9985 for (i = 0; i < XVECLEN (note, 0); i++)
9986 if (GET_CODE (XVECEXP (note, 0, i)) == SET
9987 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (note,
9988 0, i)))))
9989 RTX_FRAME_RELATED_P (XVECEXP (note, 0, i)) = 1;
9990
9991 RTX_FRAME_RELATED_P (insn) = 1;
9992 }
9993
9994 return insn;
9995 }
9996
9997 /* Generate insn to restore registers FIRST to LAST from
9998 the register save area located at offset OFFSET
9999 relative to register BASE. */
10000
10001 static rtx
10002 restore_gprs (rtx base, int offset, int first, int last)
10003 {
10004 rtx addr, insn;
10005
10006 addr = plus_constant (Pmode, base, offset);
10007 addr = gen_rtx_MEM (Pmode, addr);
10008 set_mem_alias_set (addr, get_frame_alias_set ());
10009
10010 /* Special-case single register. */
10011 if (first == last)
10012 {
10013 if (TARGET_64BIT)
10014 insn = gen_movdi (gen_rtx_REG (Pmode, first), addr);
10015 else
10016 insn = gen_movsi (gen_rtx_REG (Pmode, first), addr);
10017
10018 RTX_FRAME_RELATED_P (insn) = 1;
10019 return insn;
10020 }
10021
10022 insn = gen_load_multiple (gen_rtx_REG (Pmode, first),
10023 addr,
10024 GEN_INT (last - first + 1));
10025 RTX_FRAME_RELATED_P (insn) = 1;
10026 return insn;
10027 }
10028
10029 /* Return insn sequence to load the GOT register. */
10030
10031 static GTY(()) rtx got_symbol;
10032 rtx_insn *
10033 s390_load_got (void)
10034 {
10035 rtx_insn *insns;
10036
10037 /* We cannot use pic_offset_table_rtx here since we use this
10038 function also for non-pic if __tls_get_offset is called and in
10039 that case PIC_OFFSET_TABLE_REGNUM as well as pic_offset_table_rtx
10040 aren't usable. */
10041 rtx got_rtx = gen_rtx_REG (Pmode, 12);
10042
10043 if (!got_symbol)
10044 {
10045 got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
10046 SYMBOL_REF_FLAGS (got_symbol) = SYMBOL_FLAG_LOCAL;
10047 }
10048
10049 start_sequence ();
10050
10051 if (TARGET_CPU_ZARCH)
10052 {
10053 emit_move_insn (got_rtx, got_symbol);
10054 }
10055 else
10056 {
10057 rtx offset;
10058
10059 offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, got_symbol),
10060 UNSPEC_LTREL_OFFSET);
10061 offset = gen_rtx_CONST (Pmode, offset);
10062 offset = force_const_mem (Pmode, offset);
10063
10064 emit_move_insn (got_rtx, offset);
10065
10066 offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (offset, 0)),
10067 UNSPEC_LTREL_BASE);
10068 offset = gen_rtx_PLUS (Pmode, got_rtx, offset);
10069
10070 emit_move_insn (got_rtx, offset);
10071 }
10072
10073 insns = get_insns ();
10074 end_sequence ();
10075 return insns;
10076 }
10077
10078 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
10079 and the change to the stack pointer. */
10080
10081 static void
10082 s390_emit_stack_tie (void)
10083 {
10084 rtx mem = gen_frame_mem (BLKmode,
10085 gen_rtx_REG (Pmode, STACK_POINTER_REGNUM));
10086
10087 emit_insn (gen_stack_tie (mem));
10088 }
10089
10090 /* Copy GPRS into FPR save slots. */
10091
10092 static void
10093 s390_save_gprs_to_fprs (void)
10094 {
10095 int i;
10096
10097 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
10098 return;
10099
10100 for (i = 6; i < 16; i++)
10101 {
10102 if (FP_REGNO_P (cfun_gpr_save_slot (i)))
10103 {
10104 rtx_insn *insn =
10105 emit_move_insn (gen_rtx_REG (DImode, cfun_gpr_save_slot (i)),
10106 gen_rtx_REG (DImode, i));
10107 RTX_FRAME_RELATED_P (insn) = 1;
10108 }
10109 }
10110 }
10111
10112 /* Restore GPRs from FPR save slots. */
10113
10114 static void
10115 s390_restore_gprs_from_fprs (void)
10116 {
10117 int i;
10118
10119 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
10120 return;
10121
10122 for (i = 6; i < 16; i++)
10123 {
10124 if (FP_REGNO_P (cfun_gpr_save_slot (i)))
10125 {
10126 rtx_insn *insn =
10127 emit_move_insn (gen_rtx_REG (DImode, i),
10128 gen_rtx_REG (DImode, cfun_gpr_save_slot (i)));
10129 df_set_regs_ever_live (i, true);
10130 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, i));
10131 if (i == STACK_POINTER_REGNUM)
10132 add_reg_note (insn, REG_CFA_DEF_CFA,
10133 plus_constant (Pmode, stack_pointer_rtx,
10134 STACK_POINTER_OFFSET));
10135 RTX_FRAME_RELATED_P (insn) = 1;
10136 }
10137 }
10138 }
10139
10140
10141 /* A pass run immediately before shrink-wrapping and prologue and epilogue
10142 generation. */
10143
10144 namespace {
10145
10146 const pass_data pass_data_s390_early_mach =
10147 {
10148 RTL_PASS, /* type */
10149 "early_mach", /* name */
10150 OPTGROUP_NONE, /* optinfo_flags */
10151 TV_MACH_DEP, /* tv_id */
10152 0, /* properties_required */
10153 0, /* properties_provided */
10154 0, /* properties_destroyed */
10155 0, /* todo_flags_start */
10156 ( TODO_df_verify | TODO_df_finish ), /* todo_flags_finish */
10157 };
10158
10159 class pass_s390_early_mach : public rtl_opt_pass
10160 {
10161 public:
10162 pass_s390_early_mach (gcc::context *ctxt)
10163 : rtl_opt_pass (pass_data_s390_early_mach, ctxt)
10164 {}
10165
10166 /* opt_pass methods: */
10167 virtual unsigned int execute (function *);
10168
10169 }; // class pass_s390_early_mach
10170
10171 unsigned int
10172 pass_s390_early_mach::execute (function *fun)
10173 {
10174 rtx_insn *insn;
10175
10176 /* Try to get rid of the FPR clobbers. */
10177 s390_optimize_nonescaping_tx ();
10178
10179 /* Re-compute register info. */
10180 s390_register_info ();
10181
10182 /* If we're using a base register, ensure that it is always valid for
10183 the first non-prologue instruction. */
10184 if (fun->machine->base_reg)
10185 emit_insn_at_entry (gen_main_pool (fun->machine->base_reg));
10186
10187 /* Annotate all constant pool references to let the scheduler know
10188 they implicitly use the base register. */
10189 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
10190 if (INSN_P (insn))
10191 {
10192 annotate_constant_pool_refs (&PATTERN (insn));
10193 df_insn_rescan (insn);
10194 }
10195 return 0;
10196 }
10197
10198 } // anon namespace
10199
10200 /* Expand the prologue into a bunch of separate insns. */
10201
10202 void
10203 s390_emit_prologue (void)
10204 {
10205 rtx insn, addr;
10206 rtx temp_reg;
10207 int i;
10208 int offset;
10209 int next_fpr = 0;
10210
10211 /* Choose best register to use for temp use within prologue.
10212 See below for why TPF must use the register 1. */
10213
10214 if (!has_hard_reg_initial_val (Pmode, RETURN_REGNUM)
10215 && !crtl->is_leaf
10216 && !TARGET_TPF_PROFILING)
10217 temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
10218 else
10219 temp_reg = gen_rtx_REG (Pmode, 1);
10220
10221 s390_save_gprs_to_fprs ();
10222
10223 /* Save call saved gprs. */
10224 if (cfun_frame_layout.first_save_gpr != -1)
10225 {
10226 insn = save_gprs (stack_pointer_rtx,
10227 cfun_frame_layout.gprs_offset +
10228 UNITS_PER_LONG * (cfun_frame_layout.first_save_gpr
10229 - cfun_frame_layout.first_save_gpr_slot),
10230 cfun_frame_layout.first_save_gpr,
10231 cfun_frame_layout.last_save_gpr);
10232 emit_insn (insn);
10233 }
10234
10235 /* Dummy insn to mark literal pool slot. */
10236
10237 if (cfun->machine->base_reg)
10238 emit_insn (gen_main_pool (cfun->machine->base_reg));
10239
10240 offset = cfun_frame_layout.f0_offset;
10241
10242 /* Save f0 and f2. */
10243 for (i = FPR0_REGNUM; i <= FPR0_REGNUM + 1; i++)
10244 {
10245 if (cfun_fpr_save_p (i))
10246 {
10247 save_fpr (stack_pointer_rtx, offset, i);
10248 offset += 8;
10249 }
10250 else if (!TARGET_PACKED_STACK || cfun->stdarg)
10251 offset += 8;
10252 }
10253
10254 /* Save f4 and f6. */
10255 offset = cfun_frame_layout.f4_offset;
10256 for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
10257 {
10258 if (cfun_fpr_save_p (i))
10259 {
10260 insn = save_fpr (stack_pointer_rtx, offset, i);
10261 offset += 8;
10262
10263 /* If f4 and f6 are call clobbered they are saved due to
10264 stdargs and therefore are not frame related. */
10265 if (!call_really_used_regs[i])
10266 RTX_FRAME_RELATED_P (insn) = 1;
10267 }
10268 else if (!TARGET_PACKED_STACK || call_really_used_regs[i])
10269 offset += 8;
10270 }
10271
10272 if (TARGET_PACKED_STACK
10273 && cfun_save_high_fprs_p
10274 && cfun_frame_layout.f8_offset + cfun_frame_layout.high_fprs * 8 > 0)
10275 {
10276 offset = (cfun_frame_layout.f8_offset
10277 + (cfun_frame_layout.high_fprs - 1) * 8);
10278
10279 for (i = FPR15_REGNUM; i >= FPR8_REGNUM && offset >= 0; i--)
10280 if (cfun_fpr_save_p (i))
10281 {
10282 insn = save_fpr (stack_pointer_rtx, offset, i);
10283
10284 RTX_FRAME_RELATED_P (insn) = 1;
10285 offset -= 8;
10286 }
10287 if (offset >= cfun_frame_layout.f8_offset)
10288 next_fpr = i;
10289 }
10290
10291 if (!TARGET_PACKED_STACK)
10292 next_fpr = cfun_save_high_fprs_p ? FPR15_REGNUM : 0;
10293
10294 if (flag_stack_usage_info)
10295 current_function_static_stack_size = cfun_frame_layout.frame_size;
10296
10297 /* Decrement stack pointer. */
10298
10299 if (cfun_frame_layout.frame_size > 0)
10300 {
10301 rtx frame_off = GEN_INT (-cfun_frame_layout.frame_size);
10302 rtx real_frame_off;
10303
10304 if (s390_stack_size)
10305 {
10306 HOST_WIDE_INT stack_guard;
10307
10308 if (s390_stack_guard)
10309 stack_guard = s390_stack_guard;
10310 else
10311 {
10312 /* If no value for stack guard is provided the smallest power of 2
10313 larger than the current frame size is chosen. */
10314 stack_guard = 1;
10315 while (stack_guard < cfun_frame_layout.frame_size)
10316 stack_guard <<= 1;
10317 }
10318
10319 if (cfun_frame_layout.frame_size >= s390_stack_size)
10320 {
10321 warning (0, "frame size of function %qs is %wd"
10322 " bytes exceeding user provided stack limit of "
10323 "%d bytes. "
10324 "An unconditional trap is added.",
10325 current_function_name(), cfun_frame_layout.frame_size,
10326 s390_stack_size);
10327 emit_insn (gen_trap ());
10328 }
10329 else
10330 {
10331 /* stack_guard has to be smaller than s390_stack_size.
10332 Otherwise we would emit an AND with zero which would
10333 not match the test under mask pattern. */
10334 if (stack_guard >= s390_stack_size)
10335 {
10336 warning (0, "frame size of function %qs is %wd"
10337 " bytes which is more than half the stack size. "
10338 "The dynamic check would not be reliable. "
10339 "No check emitted for this function.",
10340 current_function_name(),
10341 cfun_frame_layout.frame_size);
10342 }
10343 else
10344 {
10345 HOST_WIDE_INT stack_check_mask = ((s390_stack_size - 1)
10346 & ~(stack_guard - 1));
10347
10348 rtx t = gen_rtx_AND (Pmode, stack_pointer_rtx,
10349 GEN_INT (stack_check_mask));
10350 if (TARGET_64BIT)
10351 emit_insn (gen_ctrapdi4 (gen_rtx_EQ (VOIDmode,
10352 t, const0_rtx),
10353 t, const0_rtx, const0_rtx));
10354 else
10355 emit_insn (gen_ctrapsi4 (gen_rtx_EQ (VOIDmode,
10356 t, const0_rtx),
10357 t, const0_rtx, const0_rtx));
10358 }
10359 }
10360 }
10361
10362 if (s390_warn_framesize > 0
10363 && cfun_frame_layout.frame_size >= s390_warn_framesize)
10364 warning (0, "frame size of %qs is %wd bytes",
10365 current_function_name (), cfun_frame_layout.frame_size);
10366
10367 if (s390_warn_dynamicstack_p && cfun->calls_alloca)
10368 warning (0, "%qs uses dynamic stack allocation", current_function_name ());
10369
10370 /* Save incoming stack pointer into temp reg. */
10371 if (TARGET_BACKCHAIN || next_fpr)
10372 insn = emit_insn (gen_move_insn (temp_reg, stack_pointer_rtx));
10373
10374 /* Subtract frame size from stack pointer. */
10375
10376 if (DISP_IN_RANGE (INTVAL (frame_off)))
10377 {
10378 insn = gen_rtx_SET (stack_pointer_rtx,
10379 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10380 frame_off));
10381 insn = emit_insn (insn);
10382 }
10383 else
10384 {
10385 if (!CONST_OK_FOR_K (INTVAL (frame_off)))
10386 frame_off = force_const_mem (Pmode, frame_off);
10387
10388 insn = emit_insn (gen_add2_insn (stack_pointer_rtx, frame_off));
10389 annotate_constant_pool_refs (&PATTERN (insn));
10390 }
10391
10392 RTX_FRAME_RELATED_P (insn) = 1;
10393 real_frame_off = GEN_INT (-cfun_frame_layout.frame_size);
10394 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10395 gen_rtx_SET (stack_pointer_rtx,
10396 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10397 real_frame_off)));
10398
10399 /* Set backchain. */
10400
10401 if (TARGET_BACKCHAIN)
10402 {
10403 if (cfun_frame_layout.backchain_offset)
10404 addr = gen_rtx_MEM (Pmode,
10405 plus_constant (Pmode, stack_pointer_rtx,
10406 cfun_frame_layout.backchain_offset));
10407 else
10408 addr = gen_rtx_MEM (Pmode, stack_pointer_rtx);
10409 set_mem_alias_set (addr, get_frame_alias_set ());
10410 insn = emit_insn (gen_move_insn (addr, temp_reg));
10411 }
10412
10413 /* If we support non-call exceptions (e.g. for Java),
10414 we need to make sure the backchain pointer is set up
10415 before any possibly trapping memory access. */
10416 if (TARGET_BACKCHAIN && cfun->can_throw_non_call_exceptions)
10417 {
10418 addr = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
10419 emit_clobber (addr);
10420 }
10421 }
10422
10423 /* Save fprs 8 - 15 (64 bit ABI). */
10424
10425 if (cfun_save_high_fprs_p && next_fpr)
10426 {
10427 /* If the stack might be accessed through a different register
10428 we have to make sure that the stack pointer decrement is not
10429 moved below the use of the stack slots. */
10430 s390_emit_stack_tie ();
10431
10432 insn = emit_insn (gen_add2_insn (temp_reg,
10433 GEN_INT (cfun_frame_layout.f8_offset)));
10434
10435 offset = 0;
10436
10437 for (i = FPR8_REGNUM; i <= next_fpr; i++)
10438 if (cfun_fpr_save_p (i))
10439 {
10440 rtx addr = plus_constant (Pmode, stack_pointer_rtx,
10441 cfun_frame_layout.frame_size
10442 + cfun_frame_layout.f8_offset
10443 + offset);
10444
10445 insn = save_fpr (temp_reg, offset, i);
10446 offset += 8;
10447 RTX_FRAME_RELATED_P (insn) = 1;
10448 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10449 gen_rtx_SET (gen_rtx_MEM (DFmode, addr),
10450 gen_rtx_REG (DFmode, i)));
10451 }
10452 }
10453
10454 /* Set frame pointer, if needed. */
10455
10456 if (frame_pointer_needed)
10457 {
10458 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
10459 RTX_FRAME_RELATED_P (insn) = 1;
10460 }
10461
10462 /* Set up got pointer, if needed. */
10463
10464 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
10465 {
10466 rtx_insn *insns = s390_load_got ();
10467
10468 for (rtx_insn *insn = insns; insn; insn = NEXT_INSN (insn))
10469 annotate_constant_pool_refs (&PATTERN (insn));
10470
10471 emit_insn (insns);
10472 }
10473
10474 if (TARGET_TPF_PROFILING)
10475 {
10476 /* Generate a BAS instruction to serve as a function
10477 entry intercept to facilitate the use of tracing
10478 algorithms located at the branch target. */
10479 emit_insn (gen_prologue_tpf ());
10480
10481 /* Emit a blockage here so that all code
10482 lies between the profiling mechanisms. */
10483 emit_insn (gen_blockage ());
10484 }
10485 }
10486
10487 /* Expand the epilogue into a bunch of separate insns. */
10488
10489 void
10490 s390_emit_epilogue (bool sibcall)
10491 {
10492 rtx frame_pointer, return_reg, cfa_restores = NULL_RTX;
10493 int area_bottom, area_top, offset = 0;
10494 int next_offset;
10495 rtvec p;
10496 int i;
10497
10498 if (TARGET_TPF_PROFILING)
10499 {
10500
10501 /* Generate a BAS instruction to serve as a function
10502 entry intercept to facilitate the use of tracing
10503 algorithms located at the branch target. */
10504
10505 /* Emit a blockage here so that all code
10506 lies between the profiling mechanisms. */
10507 emit_insn (gen_blockage ());
10508
10509 emit_insn (gen_epilogue_tpf ());
10510 }
10511
10512 /* Check whether to use frame or stack pointer for restore. */
10513
10514 frame_pointer = (frame_pointer_needed
10515 ? hard_frame_pointer_rtx : stack_pointer_rtx);
10516
10517 s390_frame_area (&area_bottom, &area_top);
10518
10519 /* Check whether we can access the register save area.
10520 If not, increment the frame pointer as required. */
10521
10522 if (area_top <= area_bottom)
10523 {
10524 /* Nothing to restore. */
10525 }
10526 else if (DISP_IN_RANGE (cfun_frame_layout.frame_size + area_bottom)
10527 && DISP_IN_RANGE (cfun_frame_layout.frame_size + area_top - 1))
10528 {
10529 /* Area is in range. */
10530 offset = cfun_frame_layout.frame_size;
10531 }
10532 else
10533 {
10534 rtx insn, frame_off, cfa;
10535
10536 offset = area_bottom < 0 ? -area_bottom : 0;
10537 frame_off = GEN_INT (cfun_frame_layout.frame_size - offset);
10538
10539 cfa = gen_rtx_SET (frame_pointer,
10540 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
10541 if (DISP_IN_RANGE (INTVAL (frame_off)))
10542 {
10543 insn = gen_rtx_SET (frame_pointer,
10544 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
10545 insn = emit_insn (insn);
10546 }
10547 else
10548 {
10549 if (!CONST_OK_FOR_K (INTVAL (frame_off)))
10550 frame_off = force_const_mem (Pmode, frame_off);
10551
10552 insn = emit_insn (gen_add2_insn (frame_pointer, frame_off));
10553 annotate_constant_pool_refs (&PATTERN (insn));
10554 }
10555 add_reg_note (insn, REG_CFA_ADJUST_CFA, cfa);
10556 RTX_FRAME_RELATED_P (insn) = 1;
10557 }
10558
10559 /* Restore call saved fprs. */
10560
10561 if (TARGET_64BIT)
10562 {
10563 if (cfun_save_high_fprs_p)
10564 {
10565 next_offset = cfun_frame_layout.f8_offset;
10566 for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
10567 {
10568 if (cfun_fpr_save_p (i))
10569 {
10570 restore_fpr (frame_pointer,
10571 offset + next_offset, i);
10572 cfa_restores
10573 = alloc_reg_note (REG_CFA_RESTORE,
10574 gen_rtx_REG (DFmode, i), cfa_restores);
10575 next_offset += 8;
10576 }
10577 }
10578 }
10579
10580 }
10581 else
10582 {
10583 next_offset = cfun_frame_layout.f4_offset;
10584 /* f4, f6 */
10585 for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
10586 {
10587 if (cfun_fpr_save_p (i))
10588 {
10589 restore_fpr (frame_pointer,
10590 offset + next_offset, i);
10591 cfa_restores
10592 = alloc_reg_note (REG_CFA_RESTORE,
10593 gen_rtx_REG (DFmode, i), cfa_restores);
10594 next_offset += 8;
10595 }
10596 else if (!TARGET_PACKED_STACK)
10597 next_offset += 8;
10598 }
10599
10600 }
10601
10602 /* Return register. */
10603
10604 return_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
10605
10606 /* Restore call saved gprs. */
10607
10608 if (cfun_frame_layout.first_restore_gpr != -1)
10609 {
10610 rtx insn, addr;
10611 int i;
10612
10613 /* Check for global register and save them
10614 to stack location from where they get restored. */
10615
10616 for (i = cfun_frame_layout.first_restore_gpr;
10617 i <= cfun_frame_layout.last_restore_gpr;
10618 i++)
10619 {
10620 if (global_not_special_regno_p (i))
10621 {
10622 addr = plus_constant (Pmode, frame_pointer,
10623 offset + cfun_frame_layout.gprs_offset
10624 + (i - cfun_frame_layout.first_save_gpr_slot)
10625 * UNITS_PER_LONG);
10626 addr = gen_rtx_MEM (Pmode, addr);
10627 set_mem_alias_set (addr, get_frame_alias_set ());
10628 emit_move_insn (addr, gen_rtx_REG (Pmode, i));
10629 }
10630 else
10631 cfa_restores
10632 = alloc_reg_note (REG_CFA_RESTORE,
10633 gen_rtx_REG (Pmode, i), cfa_restores);
10634 }
10635
10636 if (! sibcall)
10637 {
10638 /* Fetch return address from stack before load multiple,
10639 this will do good for scheduling.
10640
10641 Only do this if we already decided that r14 needs to be
10642 saved to a stack slot. (And not just because r14 happens to
10643 be in between two GPRs which need saving.) Otherwise it
10644 would be difficult to take that decision back in
10645 s390_optimize_prologue. */
10646 if (cfun_gpr_save_slot (RETURN_REGNUM) == -1)
10647 {
10648 int return_regnum = find_unused_clobbered_reg();
10649 if (!return_regnum)
10650 return_regnum = 4;
10651 return_reg = gen_rtx_REG (Pmode, return_regnum);
10652
10653 addr = plus_constant (Pmode, frame_pointer,
10654 offset + cfun_frame_layout.gprs_offset
10655 + (RETURN_REGNUM
10656 - cfun_frame_layout.first_save_gpr_slot)
10657 * UNITS_PER_LONG);
10658 addr = gen_rtx_MEM (Pmode, addr);
10659 set_mem_alias_set (addr, get_frame_alias_set ());
10660 emit_move_insn (return_reg, addr);
10661
10662 /* Once we did that optimization we have to make sure
10663 s390_optimize_prologue does not try to remove the
10664 store of r14 since we will not be able to find the
10665 load issued here. */
10666 cfun_frame_layout.save_return_addr_p = true;
10667 }
10668 }
10669
10670 insn = restore_gprs (frame_pointer,
10671 offset + cfun_frame_layout.gprs_offset
10672 + (cfun_frame_layout.first_restore_gpr
10673 - cfun_frame_layout.first_save_gpr_slot)
10674 * UNITS_PER_LONG,
10675 cfun_frame_layout.first_restore_gpr,
10676 cfun_frame_layout.last_restore_gpr);
10677 insn = emit_insn (insn);
10678 REG_NOTES (insn) = cfa_restores;
10679 add_reg_note (insn, REG_CFA_DEF_CFA,
10680 plus_constant (Pmode, stack_pointer_rtx,
10681 STACK_POINTER_OFFSET));
10682 RTX_FRAME_RELATED_P (insn) = 1;
10683 }
10684
10685 s390_restore_gprs_from_fprs ();
10686
10687 if (! sibcall)
10688 {
10689
10690 /* Return to caller. */
10691
10692 p = rtvec_alloc (2);
10693
10694 RTVEC_ELT (p, 0) = ret_rtx;
10695 RTVEC_ELT (p, 1) = gen_rtx_USE (VOIDmode, return_reg);
10696 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
10697 }
10698 }
10699
10700 /* Implement TARGET_SET_UP_BY_PROLOGUE. */
10701
10702 static void
10703 s300_set_up_by_prologue (hard_reg_set_container *regs)
10704 {
10705 if (cfun->machine->base_reg
10706 && !call_really_used_regs[REGNO (cfun->machine->base_reg)])
10707 SET_HARD_REG_BIT (regs->set, REGNO (cfun->machine->base_reg));
10708 }
10709
10710 /* Return true if the function can use simple_return to return outside
10711 of a shrink-wrapped region. At present shrink-wrapping is supported
10712 in all cases. */
10713
10714 bool
10715 s390_can_use_simple_return_insn (void)
10716 {
10717 return true;
10718 }
10719
10720 /* Return true if the epilogue is guaranteed to contain only a return
10721 instruction and if a direct return can therefore be used instead.
10722 One of the main advantages of using direct return instructions
10723 is that we can then use conditional returns. */
10724
10725 bool
10726 s390_can_use_return_insn (void)
10727 {
10728 int i;
10729
10730 if (!reload_completed)
10731 return false;
10732
10733 if (crtl->profile)
10734 return false;
10735
10736 if (TARGET_TPF_PROFILING)
10737 return false;
10738
10739 for (i = 0; i < 16; i++)
10740 if (cfun_gpr_save_slot (i))
10741 return false;
10742
10743 /* For 31 bit this is not covered by the frame_size check below
10744 since f4, f6 are saved in the register save area without needing
10745 additional stack space. */
10746 if (!TARGET_64BIT
10747 && (cfun_fpr_save_p (FPR4_REGNUM) || cfun_fpr_save_p (FPR6_REGNUM)))
10748 return false;
10749
10750 if (cfun->machine->base_reg
10751 && !call_really_used_regs[REGNO (cfun->machine->base_reg)])
10752 return false;
10753
10754 return cfun_frame_layout.frame_size == 0;
10755 }
10756
10757 /* The VX ABI differs for vararg functions. Therefore we need the
10758 prototype of the callee to be available when passing vector type
10759 values. */
10760 static const char *
10761 s390_invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
10762 {
10763 return ((TARGET_VX_ABI
10764 && typelist == 0
10765 && VECTOR_TYPE_P (TREE_TYPE (val))
10766 && (funcdecl == NULL_TREE
10767 || (TREE_CODE (funcdecl) == FUNCTION_DECL
10768 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
10769 ? N_("Vector argument passed to unprototyped function")
10770 : NULL);
10771 }
10772
10773
10774 /* Return the size in bytes of a function argument of
10775 type TYPE and/or mode MODE. At least one of TYPE or
10776 MODE must be specified. */
10777
10778 static int
10779 s390_function_arg_size (machine_mode mode, const_tree type)
10780 {
10781 if (type)
10782 return int_size_in_bytes (type);
10783
10784 /* No type info available for some library calls ... */
10785 if (mode != BLKmode)
10786 return GET_MODE_SIZE (mode);
10787
10788 /* If we have neither type nor mode, abort */
10789 gcc_unreachable ();
10790 }
10791
10792 /* Return true if a function argument of type TYPE and mode MODE
10793 is to be passed in a vector register, if available. */
10794
10795 bool
10796 s390_function_arg_vector (machine_mode mode, const_tree type)
10797 {
10798 if (!TARGET_VX_ABI)
10799 return false;
10800
10801 if (s390_function_arg_size (mode, type) > 16)
10802 return false;
10803
10804 /* No type info available for some library calls ... */
10805 if (!type)
10806 return VECTOR_MODE_P (mode);
10807
10808 /* The ABI says that record types with a single member are treated
10809 just like that member would be. */
10810 while (TREE_CODE (type) == RECORD_TYPE)
10811 {
10812 tree field, single = NULL_TREE;
10813
10814 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
10815 {
10816 if (TREE_CODE (field) != FIELD_DECL)
10817 continue;
10818
10819 if (single == NULL_TREE)
10820 single = TREE_TYPE (field);
10821 else
10822 return false;
10823 }
10824
10825 if (single == NULL_TREE)
10826 return false;
10827 else
10828 {
10829 /* If the field declaration adds extra byte due to
10830 e.g. padding this is not accepted as vector type. */
10831 if (int_size_in_bytes (single) <= 0
10832 || int_size_in_bytes (single) != int_size_in_bytes (type))
10833 return false;
10834 type = single;
10835 }
10836 }
10837
10838 return VECTOR_TYPE_P (type);
10839 }
10840
10841 /* Return true if a function argument of type TYPE and mode MODE
10842 is to be passed in a floating-point register, if available. */
10843
10844 static bool
10845 s390_function_arg_float (machine_mode mode, const_tree type)
10846 {
10847 if (s390_function_arg_size (mode, type) > 8)
10848 return false;
10849
10850 /* Soft-float changes the ABI: no floating-point registers are used. */
10851 if (TARGET_SOFT_FLOAT)
10852 return false;
10853
10854 /* No type info available for some library calls ... */
10855 if (!type)
10856 return mode == SFmode || mode == DFmode || mode == SDmode || mode == DDmode;
10857
10858 /* The ABI says that record types with a single member are treated
10859 just like that member would be. */
10860 while (TREE_CODE (type) == RECORD_TYPE)
10861 {
10862 tree field, single = NULL_TREE;
10863
10864 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
10865 {
10866 if (TREE_CODE (field) != FIELD_DECL)
10867 continue;
10868
10869 if (single == NULL_TREE)
10870 single = TREE_TYPE (field);
10871 else
10872 return false;
10873 }
10874
10875 if (single == NULL_TREE)
10876 return false;
10877 else
10878 type = single;
10879 }
10880
10881 return TREE_CODE (type) == REAL_TYPE;
10882 }
10883
10884 /* Return true if a function argument of type TYPE and mode MODE
10885 is to be passed in an integer register, or a pair of integer
10886 registers, if available. */
10887
10888 static bool
10889 s390_function_arg_integer (machine_mode mode, const_tree type)
10890 {
10891 int size = s390_function_arg_size (mode, type);
10892 if (size > 8)
10893 return false;
10894
10895 /* No type info available for some library calls ... */
10896 if (!type)
10897 return GET_MODE_CLASS (mode) == MODE_INT
10898 || (TARGET_SOFT_FLOAT && SCALAR_FLOAT_MODE_P (mode));
10899
10900 /* We accept small integral (and similar) types. */
10901 if (INTEGRAL_TYPE_P (type)
10902 || POINTER_TYPE_P (type)
10903 || TREE_CODE (type) == NULLPTR_TYPE
10904 || TREE_CODE (type) == OFFSET_TYPE
10905 || (TARGET_SOFT_FLOAT && TREE_CODE (type) == REAL_TYPE))
10906 return true;
10907
10908 /* We also accept structs of size 1, 2, 4, 8 that are not
10909 passed in floating-point registers. */
10910 if (AGGREGATE_TYPE_P (type)
10911 && exact_log2 (size) >= 0
10912 && !s390_function_arg_float (mode, type))
10913 return true;
10914
10915 return false;
10916 }
10917
10918 /* Return 1 if a function argument of type TYPE and mode MODE
10919 is to be passed by reference. The ABI specifies that only
10920 structures of size 1, 2, 4, or 8 bytes are passed by value,
10921 all other structures (and complex numbers) are passed by
10922 reference. */
10923
10924 static bool
10925 s390_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
10926 machine_mode mode, const_tree type,
10927 bool named ATTRIBUTE_UNUSED)
10928 {
10929 int size = s390_function_arg_size (mode, type);
10930
10931 if (s390_function_arg_vector (mode, type))
10932 return false;
10933
10934 if (size > 8)
10935 return true;
10936
10937 if (type)
10938 {
10939 if (AGGREGATE_TYPE_P (type) && exact_log2 (size) < 0)
10940 return true;
10941
10942 if (TREE_CODE (type) == COMPLEX_TYPE
10943 || TREE_CODE (type) == VECTOR_TYPE)
10944 return true;
10945 }
10946
10947 return false;
10948 }
10949
10950 /* Update the data in CUM to advance over an argument of mode MODE and
10951 data type TYPE. (TYPE is null for libcalls where that information
10952 may not be available.). The boolean NAMED specifies whether the
10953 argument is a named argument (as opposed to an unnamed argument
10954 matching an ellipsis). */
10955
10956 static void
10957 s390_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
10958 const_tree type, bool named)
10959 {
10960 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
10961
10962 if (s390_function_arg_vector (mode, type))
10963 {
10964 /* We are called for unnamed vector stdarg arguments which are
10965 passed on the stack. In this case this hook does not have to
10966 do anything since stack arguments are tracked by common
10967 code. */
10968 if (!named)
10969 return;
10970 cum->vrs += 1;
10971 }
10972 else if (s390_function_arg_float (mode, type))
10973 {
10974 cum->fprs += 1;
10975 }
10976 else if (s390_function_arg_integer (mode, type))
10977 {
10978 int size = s390_function_arg_size (mode, type);
10979 cum->gprs += ((size + UNITS_PER_LONG - 1) / UNITS_PER_LONG);
10980 }
10981 else
10982 gcc_unreachable ();
10983 }
10984
10985 /* Define where to put the arguments to a function.
10986 Value is zero to push the argument on the stack,
10987 or a hard register in which to store the argument.
10988
10989 MODE is the argument's machine mode.
10990 TYPE is the data type of the argument (as a tree).
10991 This is null for libcalls where that information may
10992 not be available.
10993 CUM is a variable of type CUMULATIVE_ARGS which gives info about
10994 the preceding args and about the function being called.
10995 NAMED is nonzero if this argument is a named parameter
10996 (otherwise it is an extra parameter matching an ellipsis).
10997
10998 On S/390, we use general purpose registers 2 through 6 to
10999 pass integer, pointer, and certain structure arguments, and
11000 floating point registers 0 and 2 (0, 2, 4, and 6 on 64-bit)
11001 to pass floating point arguments. All remaining arguments
11002 are pushed to the stack. */
11003
11004 static rtx
11005 s390_function_arg (cumulative_args_t cum_v, machine_mode mode,
11006 const_tree type, bool named)
11007 {
11008 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
11009
11010 if (!named)
11011 s390_check_type_for_vector_abi (type, true, false);
11012
11013 if (s390_function_arg_vector (mode, type))
11014 {
11015 /* Vector arguments being part of the ellipsis are passed on the
11016 stack. */
11017 if (!named || (cum->vrs + 1 > VEC_ARG_NUM_REG))
11018 return NULL_RTX;
11019
11020 return gen_rtx_REG (mode, cum->vrs + FIRST_VEC_ARG_REGNO);
11021 }
11022 else if (s390_function_arg_float (mode, type))
11023 {
11024 if (cum->fprs + 1 > FP_ARG_NUM_REG)
11025 return NULL_RTX;
11026 else
11027 return gen_rtx_REG (mode, cum->fprs + 16);
11028 }
11029 else if (s390_function_arg_integer (mode, type))
11030 {
11031 int size = s390_function_arg_size (mode, type);
11032 int n_gprs = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
11033
11034 if (cum->gprs + n_gprs > GP_ARG_NUM_REG)
11035 return NULL_RTX;
11036 else if (n_gprs == 1 || UNITS_PER_WORD == UNITS_PER_LONG)
11037 return gen_rtx_REG (mode, cum->gprs + 2);
11038 else if (n_gprs == 2)
11039 {
11040 rtvec p = rtvec_alloc (2);
11041
11042 RTVEC_ELT (p, 0)
11043 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 2),
11044 const0_rtx);
11045 RTVEC_ELT (p, 1)
11046 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 3),
11047 GEN_INT (4));
11048
11049 return gen_rtx_PARALLEL (mode, p);
11050 }
11051 }
11052
11053 /* After the real arguments, expand_call calls us once again
11054 with a void_type_node type. Whatever we return here is
11055 passed as operand 2 to the call expanders.
11056
11057 We don't need this feature ... */
11058 else if (type == void_type_node)
11059 return const0_rtx;
11060
11061 gcc_unreachable ();
11062 }
11063
11064 /* Return true if return values of type TYPE should be returned
11065 in a memory buffer whose address is passed by the caller as
11066 hidden first argument. */
11067
11068 static bool
11069 s390_return_in_memory (const_tree type, const_tree fundecl ATTRIBUTE_UNUSED)
11070 {
11071 /* We accept small integral (and similar) types. */
11072 if (INTEGRAL_TYPE_P (type)
11073 || POINTER_TYPE_P (type)
11074 || TREE_CODE (type) == OFFSET_TYPE
11075 || TREE_CODE (type) == REAL_TYPE)
11076 return int_size_in_bytes (type) > 8;
11077
11078 /* vector types which fit into a VR. */
11079 if (TARGET_VX_ABI
11080 && VECTOR_TYPE_P (type)
11081 && int_size_in_bytes (type) <= 16)
11082 return false;
11083
11084 /* Aggregates and similar constructs are always returned
11085 in memory. */
11086 if (AGGREGATE_TYPE_P (type)
11087 || TREE_CODE (type) == COMPLEX_TYPE
11088 || VECTOR_TYPE_P (type))
11089 return true;
11090
11091 /* ??? We get called on all sorts of random stuff from
11092 aggregate_value_p. We can't abort, but it's not clear
11093 what's safe to return. Pretend it's a struct I guess. */
11094 return true;
11095 }
11096
11097 /* Function arguments and return values are promoted to word size. */
11098
11099 static machine_mode
11100 s390_promote_function_mode (const_tree type, machine_mode mode,
11101 int *punsignedp,
11102 const_tree fntype ATTRIBUTE_UNUSED,
11103 int for_return ATTRIBUTE_UNUSED)
11104 {
11105 if (INTEGRAL_MODE_P (mode)
11106 && GET_MODE_SIZE (mode) < UNITS_PER_LONG)
11107 {
11108 if (type != NULL_TREE && POINTER_TYPE_P (type))
11109 *punsignedp = POINTERS_EXTEND_UNSIGNED;
11110 return Pmode;
11111 }
11112
11113 return mode;
11114 }
11115
11116 /* Define where to return a (scalar) value of type RET_TYPE.
11117 If RET_TYPE is null, define where to return a (scalar)
11118 value of mode MODE from a libcall. */
11119
11120 static rtx
11121 s390_function_and_libcall_value (machine_mode mode,
11122 const_tree ret_type,
11123 const_tree fntype_or_decl,
11124 bool outgoing ATTRIBUTE_UNUSED)
11125 {
11126 /* For vector return types it is important to use the RET_TYPE
11127 argument whenever available since the middle-end might have
11128 changed the mode to a scalar mode. */
11129 bool vector_ret_type_p = ((ret_type && VECTOR_TYPE_P (ret_type))
11130 || (!ret_type && VECTOR_MODE_P (mode)));
11131
11132 /* For normal functions perform the promotion as
11133 promote_function_mode would do. */
11134 if (ret_type)
11135 {
11136 int unsignedp = TYPE_UNSIGNED (ret_type);
11137 mode = promote_function_mode (ret_type, mode, &unsignedp,
11138 fntype_or_decl, 1);
11139 }
11140
11141 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
11142 || SCALAR_FLOAT_MODE_P (mode)
11143 || (TARGET_VX_ABI && vector_ret_type_p));
11144 gcc_assert (GET_MODE_SIZE (mode) <= (TARGET_VX_ABI ? 16 : 8));
11145
11146 if (TARGET_VX_ABI && vector_ret_type_p)
11147 return gen_rtx_REG (mode, FIRST_VEC_ARG_REGNO);
11148 else if (TARGET_HARD_FLOAT && SCALAR_FLOAT_MODE_P (mode))
11149 return gen_rtx_REG (mode, 16);
11150 else if (GET_MODE_SIZE (mode) <= UNITS_PER_LONG
11151 || UNITS_PER_LONG == UNITS_PER_WORD)
11152 return gen_rtx_REG (mode, 2);
11153 else if (GET_MODE_SIZE (mode) == 2 * UNITS_PER_LONG)
11154 {
11155 /* This case is triggered when returning a 64 bit value with
11156 -m31 -mzarch. Although the value would fit into a single
11157 register it has to be forced into a 32 bit register pair in
11158 order to match the ABI. */
11159 rtvec p = rtvec_alloc (2);
11160
11161 RTVEC_ELT (p, 0)
11162 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 2), const0_rtx);
11163 RTVEC_ELT (p, 1)
11164 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 3), GEN_INT (4));
11165
11166 return gen_rtx_PARALLEL (mode, p);
11167 }
11168
11169 gcc_unreachable ();
11170 }
11171
11172 /* Define where to return a scalar return value of type RET_TYPE. */
11173
11174 static rtx
11175 s390_function_value (const_tree ret_type, const_tree fn_decl_or_type,
11176 bool outgoing)
11177 {
11178 return s390_function_and_libcall_value (TYPE_MODE (ret_type), ret_type,
11179 fn_decl_or_type, outgoing);
11180 }
11181
11182 /* Define where to return a scalar libcall return value of mode
11183 MODE. */
11184
11185 static rtx
11186 s390_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
11187 {
11188 return s390_function_and_libcall_value (mode, NULL_TREE,
11189 NULL_TREE, true);
11190 }
11191
11192
11193 /* Create and return the va_list datatype.
11194
11195 On S/390, va_list is an array type equivalent to
11196
11197 typedef struct __va_list_tag
11198 {
11199 long __gpr;
11200 long __fpr;
11201 void *__overflow_arg_area;
11202 void *__reg_save_area;
11203 } va_list[1];
11204
11205 where __gpr and __fpr hold the number of general purpose
11206 or floating point arguments used up to now, respectively,
11207 __overflow_arg_area points to the stack location of the
11208 next argument passed on the stack, and __reg_save_area
11209 always points to the start of the register area in the
11210 call frame of the current function. The function prologue
11211 saves all registers used for argument passing into this
11212 area if the function uses variable arguments. */
11213
11214 static tree
11215 s390_build_builtin_va_list (void)
11216 {
11217 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
11218
11219 record = lang_hooks.types.make_type (RECORD_TYPE);
11220
11221 type_decl =
11222 build_decl (BUILTINS_LOCATION,
11223 TYPE_DECL, get_identifier ("__va_list_tag"), record);
11224
11225 f_gpr = build_decl (BUILTINS_LOCATION,
11226 FIELD_DECL, get_identifier ("__gpr"),
11227 long_integer_type_node);
11228 f_fpr = build_decl (BUILTINS_LOCATION,
11229 FIELD_DECL, get_identifier ("__fpr"),
11230 long_integer_type_node);
11231 f_ovf = build_decl (BUILTINS_LOCATION,
11232 FIELD_DECL, get_identifier ("__overflow_arg_area"),
11233 ptr_type_node);
11234 f_sav = build_decl (BUILTINS_LOCATION,
11235 FIELD_DECL, get_identifier ("__reg_save_area"),
11236 ptr_type_node);
11237
11238 va_list_gpr_counter_field = f_gpr;
11239 va_list_fpr_counter_field = f_fpr;
11240
11241 DECL_FIELD_CONTEXT (f_gpr) = record;
11242 DECL_FIELD_CONTEXT (f_fpr) = record;
11243 DECL_FIELD_CONTEXT (f_ovf) = record;
11244 DECL_FIELD_CONTEXT (f_sav) = record;
11245
11246 TYPE_STUB_DECL (record) = type_decl;
11247 TYPE_NAME (record) = type_decl;
11248 TYPE_FIELDS (record) = f_gpr;
11249 DECL_CHAIN (f_gpr) = f_fpr;
11250 DECL_CHAIN (f_fpr) = f_ovf;
11251 DECL_CHAIN (f_ovf) = f_sav;
11252
11253 layout_type (record);
11254
11255 /* The correct type is an array type of one element. */
11256 return build_array_type (record, build_index_type (size_zero_node));
11257 }
11258
11259 /* Implement va_start by filling the va_list structure VALIST.
11260 STDARG_P is always true, and ignored.
11261 NEXTARG points to the first anonymous stack argument.
11262
11263 The following global variables are used to initialize
11264 the va_list structure:
11265
11266 crtl->args.info:
11267 holds number of gprs and fprs used for named arguments.
11268 crtl->args.arg_offset_rtx:
11269 holds the offset of the first anonymous stack argument
11270 (relative to the virtual arg pointer). */
11271
11272 static void
11273 s390_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
11274 {
11275 HOST_WIDE_INT n_gpr, n_fpr;
11276 int off;
11277 tree f_gpr, f_fpr, f_ovf, f_sav;
11278 tree gpr, fpr, ovf, sav, t;
11279
11280 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
11281 f_fpr = DECL_CHAIN (f_gpr);
11282 f_ovf = DECL_CHAIN (f_fpr);
11283 f_sav = DECL_CHAIN (f_ovf);
11284
11285 valist = build_simple_mem_ref (valist);
11286 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
11287 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
11288 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
11289 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
11290
11291 /* Count number of gp and fp argument registers used. */
11292
11293 n_gpr = crtl->args.info.gprs;
11294 n_fpr = crtl->args.info.fprs;
11295
11296 if (cfun->va_list_gpr_size)
11297 {
11298 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
11299 build_int_cst (NULL_TREE, n_gpr));
11300 TREE_SIDE_EFFECTS (t) = 1;
11301 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11302 }
11303
11304 if (cfun->va_list_fpr_size)
11305 {
11306 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
11307 build_int_cst (NULL_TREE, n_fpr));
11308 TREE_SIDE_EFFECTS (t) = 1;
11309 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11310 }
11311
11312 /* Find the overflow area.
11313 FIXME: This currently is too pessimistic when the vector ABI is
11314 enabled. In that case we *always* set up the overflow area
11315 pointer. */
11316 if (n_gpr + cfun->va_list_gpr_size > GP_ARG_NUM_REG
11317 || n_fpr + cfun->va_list_fpr_size > FP_ARG_NUM_REG
11318 || TARGET_VX_ABI)
11319 {
11320 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
11321
11322 off = INTVAL (crtl->args.arg_offset_rtx);
11323 off = off < 0 ? 0 : off;
11324 if (TARGET_DEBUG_ARG)
11325 fprintf (stderr, "va_start: n_gpr = %d, n_fpr = %d off %d\n",
11326 (int)n_gpr, (int)n_fpr, off);
11327
11328 t = fold_build_pointer_plus_hwi (t, off);
11329
11330 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
11331 TREE_SIDE_EFFECTS (t) = 1;
11332 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11333 }
11334
11335 /* Find the register save area. */
11336 if ((cfun->va_list_gpr_size && n_gpr < GP_ARG_NUM_REG)
11337 || (cfun->va_list_fpr_size && n_fpr < FP_ARG_NUM_REG))
11338 {
11339 t = make_tree (TREE_TYPE (sav), return_address_pointer_rtx);
11340 t = fold_build_pointer_plus_hwi (t, -RETURN_REGNUM * UNITS_PER_LONG);
11341
11342 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
11343 TREE_SIDE_EFFECTS (t) = 1;
11344 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11345 }
11346 }
11347
11348 /* Implement va_arg by updating the va_list structure
11349 VALIST as required to retrieve an argument of type
11350 TYPE, and returning that argument.
11351
11352 Generates code equivalent to:
11353
11354 if (integral value) {
11355 if (size <= 4 && args.gpr < 5 ||
11356 size > 4 && args.gpr < 4 )
11357 ret = args.reg_save_area[args.gpr+8]
11358 else
11359 ret = *args.overflow_arg_area++;
11360 } else if (vector value) {
11361 ret = *args.overflow_arg_area;
11362 args.overflow_arg_area += size / 8;
11363 } else if (float value) {
11364 if (args.fgpr < 2)
11365 ret = args.reg_save_area[args.fpr+64]
11366 else
11367 ret = *args.overflow_arg_area++;
11368 } else if (aggregate value) {
11369 if (args.gpr < 5)
11370 ret = *args.reg_save_area[args.gpr]
11371 else
11372 ret = **args.overflow_arg_area++;
11373 } */
11374
11375 static tree
11376 s390_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
11377 gimple_seq *post_p ATTRIBUTE_UNUSED)
11378 {
11379 tree f_gpr, f_fpr, f_ovf, f_sav;
11380 tree gpr, fpr, ovf, sav, reg, t, u;
11381 int indirect_p, size, n_reg, sav_ofs, sav_scale, max_reg;
11382 tree lab_false, lab_over;
11383 tree addr = create_tmp_var (ptr_type_node, "addr");
11384 bool left_align_p; /* How a value < UNITS_PER_LONG is aligned within
11385 a stack slot. */
11386
11387 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
11388 f_fpr = DECL_CHAIN (f_gpr);
11389 f_ovf = DECL_CHAIN (f_fpr);
11390 f_sav = DECL_CHAIN (f_ovf);
11391
11392 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
11393 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
11394 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
11395
11396 /* The tree for args* cannot be shared between gpr/fpr and ovf since
11397 both appear on a lhs. */
11398 valist = unshare_expr (valist);
11399 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
11400
11401 size = int_size_in_bytes (type);
11402
11403 s390_check_type_for_vector_abi (type, true, false);
11404
11405 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
11406 {
11407 if (TARGET_DEBUG_ARG)
11408 {
11409 fprintf (stderr, "va_arg: aggregate type");
11410 debug_tree (type);
11411 }
11412
11413 /* Aggregates are passed by reference. */
11414 indirect_p = 1;
11415 reg = gpr;
11416 n_reg = 1;
11417
11418 /* kernel stack layout on 31 bit: It is assumed here that no padding
11419 will be added by s390_frame_info because for va_args always an even
11420 number of gprs has to be saved r15-r2 = 14 regs. */
11421 sav_ofs = 2 * UNITS_PER_LONG;
11422 sav_scale = UNITS_PER_LONG;
11423 size = UNITS_PER_LONG;
11424 max_reg = GP_ARG_NUM_REG - n_reg;
11425 left_align_p = false;
11426 }
11427 else if (s390_function_arg_vector (TYPE_MODE (type), type))
11428 {
11429 if (TARGET_DEBUG_ARG)
11430 {
11431 fprintf (stderr, "va_arg: vector type");
11432 debug_tree (type);
11433 }
11434
11435 indirect_p = 0;
11436 reg = NULL_TREE;
11437 n_reg = 0;
11438 sav_ofs = 0;
11439 sav_scale = 8;
11440 max_reg = 0;
11441 left_align_p = true;
11442 }
11443 else if (s390_function_arg_float (TYPE_MODE (type), type))
11444 {
11445 if (TARGET_DEBUG_ARG)
11446 {
11447 fprintf (stderr, "va_arg: float type");
11448 debug_tree (type);
11449 }
11450
11451 /* FP args go in FP registers, if present. */
11452 indirect_p = 0;
11453 reg = fpr;
11454 n_reg = 1;
11455 sav_ofs = 16 * UNITS_PER_LONG;
11456 sav_scale = 8;
11457 max_reg = FP_ARG_NUM_REG - n_reg;
11458 left_align_p = false;
11459 }
11460 else
11461 {
11462 if (TARGET_DEBUG_ARG)
11463 {
11464 fprintf (stderr, "va_arg: other type");
11465 debug_tree (type);
11466 }
11467
11468 /* Otherwise into GP registers. */
11469 indirect_p = 0;
11470 reg = gpr;
11471 n_reg = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
11472
11473 /* kernel stack layout on 31 bit: It is assumed here that no padding
11474 will be added by s390_frame_info because for va_args always an even
11475 number of gprs has to be saved r15-r2 = 14 regs. */
11476 sav_ofs = 2 * UNITS_PER_LONG;
11477
11478 if (size < UNITS_PER_LONG)
11479 sav_ofs += UNITS_PER_LONG - size;
11480
11481 sav_scale = UNITS_PER_LONG;
11482 max_reg = GP_ARG_NUM_REG - n_reg;
11483 left_align_p = false;
11484 }
11485
11486 /* Pull the value out of the saved registers ... */
11487
11488 if (reg != NULL_TREE)
11489 {
11490 /*
11491 if (reg > ((typeof (reg))max_reg))
11492 goto lab_false;
11493
11494 addr = sav + sav_ofs + reg * save_scale;
11495
11496 goto lab_over;
11497
11498 lab_false:
11499 */
11500
11501 lab_false = create_artificial_label (UNKNOWN_LOCATION);
11502 lab_over = create_artificial_label (UNKNOWN_LOCATION);
11503
11504 t = fold_convert (TREE_TYPE (reg), size_int (max_reg));
11505 t = build2 (GT_EXPR, boolean_type_node, reg, t);
11506 u = build1 (GOTO_EXPR, void_type_node, lab_false);
11507 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
11508 gimplify_and_add (t, pre_p);
11509
11510 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
11511 u = build2 (MULT_EXPR, TREE_TYPE (reg), reg,
11512 fold_convert (TREE_TYPE (reg), size_int (sav_scale)));
11513 t = fold_build_pointer_plus (t, u);
11514
11515 gimplify_assign (addr, t, pre_p);
11516
11517 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
11518
11519 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
11520 }
11521
11522 /* ... Otherwise out of the overflow area. */
11523
11524 t = ovf;
11525 if (size < UNITS_PER_LONG && !left_align_p)
11526 t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG - size);
11527
11528 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
11529
11530 gimplify_assign (addr, t, pre_p);
11531
11532 if (size < UNITS_PER_LONG && left_align_p)
11533 t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG);
11534 else
11535 t = fold_build_pointer_plus_hwi (t, size);
11536
11537 gimplify_assign (ovf, t, pre_p);
11538
11539 if (reg != NULL_TREE)
11540 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
11541
11542
11543 /* Increment register save count. */
11544
11545 if (n_reg > 0)
11546 {
11547 u = build2 (PREINCREMENT_EXPR, TREE_TYPE (reg), reg,
11548 fold_convert (TREE_TYPE (reg), size_int (n_reg)));
11549 gimplify_and_add (u, pre_p);
11550 }
11551
11552 if (indirect_p)
11553 {
11554 t = build_pointer_type_for_mode (build_pointer_type (type),
11555 ptr_mode, true);
11556 addr = fold_convert (t, addr);
11557 addr = build_va_arg_indirect_ref (addr);
11558 }
11559 else
11560 {
11561 t = build_pointer_type_for_mode (type, ptr_mode, true);
11562 addr = fold_convert (t, addr);
11563 }
11564
11565 return build_va_arg_indirect_ref (addr);
11566 }
11567
11568 /* Emit rtl for the tbegin or tbegin_retry (RETRY != NULL_RTX)
11569 expanders.
11570 DEST - Register location where CC will be stored.
11571 TDB - Pointer to a 256 byte area where to store the transaction.
11572 diagnostic block. NULL if TDB is not needed.
11573 RETRY - Retry count value. If non-NULL a retry loop for CC2
11574 is emitted
11575 CLOBBER_FPRS_P - If true clobbers for all FPRs are emitted as part
11576 of the tbegin instruction pattern. */
11577
11578 void
11579 s390_expand_tbegin (rtx dest, rtx tdb, rtx retry, bool clobber_fprs_p)
11580 {
11581 rtx retry_plus_two = gen_reg_rtx (SImode);
11582 rtx retry_reg = gen_reg_rtx (SImode);
11583 rtx_code_label *retry_label = NULL;
11584
11585 if (retry != NULL_RTX)
11586 {
11587 emit_move_insn (retry_reg, retry);
11588 emit_insn (gen_addsi3 (retry_plus_two, retry_reg, const2_rtx));
11589 emit_insn (gen_addsi3 (retry_reg, retry_reg, const1_rtx));
11590 retry_label = gen_label_rtx ();
11591 emit_label (retry_label);
11592 }
11593
11594 if (clobber_fprs_p)
11595 emit_insn (gen_tbegin_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK), tdb));
11596 else
11597 emit_insn (gen_tbegin_nofloat_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
11598 tdb));
11599
11600 emit_move_insn (dest, gen_rtx_UNSPEC (SImode,
11601 gen_rtvec (1, gen_rtx_REG (CCRAWmode,
11602 CC_REGNUM)),
11603 UNSPEC_CC_TO_INT));
11604 if (retry != NULL_RTX)
11605 {
11606 const int CC0 = 1 << 3;
11607 const int CC1 = 1 << 2;
11608 const int CC3 = 1 << 0;
11609 rtx jump;
11610 rtx count = gen_reg_rtx (SImode);
11611 rtx_code_label *leave_label = gen_label_rtx ();
11612
11613 /* Exit for success and permanent failures. */
11614 jump = s390_emit_jump (leave_label,
11615 gen_rtx_EQ (VOIDmode,
11616 gen_rtx_REG (CCRAWmode, CC_REGNUM),
11617 gen_rtx_CONST_INT (VOIDmode, CC0 | CC1 | CC3)));
11618 LABEL_NUSES (leave_label) = 1;
11619
11620 /* CC2 - transient failure. Perform retry with ppa. */
11621 emit_move_insn (count, retry_plus_two);
11622 emit_insn (gen_subsi3 (count, count, retry_reg));
11623 emit_insn (gen_tx_assist (count));
11624 jump = emit_jump_insn (gen_doloop_si64 (retry_label,
11625 retry_reg,
11626 retry_reg));
11627 JUMP_LABEL (jump) = retry_label;
11628 LABEL_NUSES (retry_label) = 1;
11629 emit_label (leave_label);
11630 }
11631 }
11632
11633
11634 /* Return the decl for the target specific builtin with the function
11635 code FCODE. */
11636
11637 static tree
11638 s390_builtin_decl (unsigned fcode, bool initialized_p ATTRIBUTE_UNUSED)
11639 {
11640 if (fcode >= S390_BUILTIN_MAX)
11641 return error_mark_node;
11642
11643 return s390_builtin_decls[fcode];
11644 }
11645
11646 /* We call mcount before the function prologue. So a profiled leaf
11647 function should stay a leaf function. */
11648
11649 static bool
11650 s390_keep_leaf_when_profiled ()
11651 {
11652 return true;
11653 }
11654
11655 /* Output assembly code for the trampoline template to
11656 stdio stream FILE.
11657
11658 On S/390, we use gpr 1 internally in the trampoline code;
11659 gpr 0 is used to hold the static chain. */
11660
11661 static void
11662 s390_asm_trampoline_template (FILE *file)
11663 {
11664 rtx op[2];
11665 op[0] = gen_rtx_REG (Pmode, 0);
11666 op[1] = gen_rtx_REG (Pmode, 1);
11667
11668 if (TARGET_64BIT)
11669 {
11670 output_asm_insn ("basr\t%1,0", op); /* 2 byte */
11671 output_asm_insn ("lmg\t%0,%1,14(%1)", op); /* 6 byte */
11672 output_asm_insn ("br\t%1", op); /* 2 byte */
11673 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 10));
11674 }
11675 else
11676 {
11677 output_asm_insn ("basr\t%1,0", op); /* 2 byte */
11678 output_asm_insn ("lm\t%0,%1,6(%1)", op); /* 4 byte */
11679 output_asm_insn ("br\t%1", op); /* 2 byte */
11680 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 8));
11681 }
11682 }
11683
11684 /* Emit RTL insns to initialize the variable parts of a trampoline.
11685 FNADDR is an RTX for the address of the function's pure code.
11686 CXT is an RTX for the static chain value for the function. */
11687
11688 static void
11689 s390_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
11690 {
11691 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
11692 rtx mem;
11693
11694 emit_block_move (m_tramp, assemble_trampoline_template (),
11695 GEN_INT (2 * UNITS_PER_LONG), BLOCK_OP_NORMAL);
11696
11697 mem = adjust_address (m_tramp, Pmode, 2 * UNITS_PER_LONG);
11698 emit_move_insn (mem, cxt);
11699 mem = adjust_address (m_tramp, Pmode, 3 * UNITS_PER_LONG);
11700 emit_move_insn (mem, fnaddr);
11701 }
11702
11703 /* Output assembler code to FILE to increment profiler label # LABELNO
11704 for profiling a function entry. */
11705
11706 void
11707 s390_function_profiler (FILE *file, int labelno)
11708 {
11709 rtx op[7];
11710
11711 char label[128];
11712 ASM_GENERATE_INTERNAL_LABEL (label, "LP", labelno);
11713
11714 fprintf (file, "# function profiler \n");
11715
11716 op[0] = gen_rtx_REG (Pmode, RETURN_REGNUM);
11717 op[1] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
11718 op[1] = gen_rtx_MEM (Pmode, plus_constant (Pmode, op[1], UNITS_PER_LONG));
11719
11720 op[2] = gen_rtx_REG (Pmode, 1);
11721 op[3] = gen_rtx_SYMBOL_REF (Pmode, label);
11722 SYMBOL_REF_FLAGS (op[3]) = SYMBOL_FLAG_LOCAL;
11723
11724 op[4] = gen_rtx_SYMBOL_REF (Pmode, "_mcount");
11725 if (flag_pic)
11726 {
11727 op[4] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[4]), UNSPEC_PLT);
11728 op[4] = gen_rtx_CONST (Pmode, op[4]);
11729 }
11730
11731 if (TARGET_64BIT)
11732 {
11733 output_asm_insn ("stg\t%0,%1", op);
11734 output_asm_insn ("larl\t%2,%3", op);
11735 output_asm_insn ("brasl\t%0,%4", op);
11736 output_asm_insn ("lg\t%0,%1", op);
11737 }
11738 else if (!flag_pic)
11739 {
11740 op[6] = gen_label_rtx ();
11741
11742 output_asm_insn ("st\t%0,%1", op);
11743 output_asm_insn ("bras\t%2,%l6", op);
11744 output_asm_insn (".long\t%4", op);
11745 output_asm_insn (".long\t%3", op);
11746 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[6]));
11747 output_asm_insn ("l\t%0,0(%2)", op);
11748 output_asm_insn ("l\t%2,4(%2)", op);
11749 output_asm_insn ("basr\t%0,%0", op);
11750 output_asm_insn ("l\t%0,%1", op);
11751 }
11752 else
11753 {
11754 op[5] = gen_label_rtx ();
11755 op[6] = gen_label_rtx ();
11756
11757 output_asm_insn ("st\t%0,%1", op);
11758 output_asm_insn ("bras\t%2,%l6", op);
11759 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[5]));
11760 output_asm_insn (".long\t%4-%l5", op);
11761 output_asm_insn (".long\t%3-%l5", op);
11762 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[6]));
11763 output_asm_insn ("lr\t%0,%2", op);
11764 output_asm_insn ("a\t%0,0(%2)", op);
11765 output_asm_insn ("a\t%2,4(%2)", op);
11766 output_asm_insn ("basr\t%0,%0", op);
11767 output_asm_insn ("l\t%0,%1", op);
11768 }
11769 }
11770
11771 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
11772 into its SYMBOL_REF_FLAGS. */
11773
11774 static void
11775 s390_encode_section_info (tree decl, rtx rtl, int first)
11776 {
11777 default_encode_section_info (decl, rtl, first);
11778
11779 if (TREE_CODE (decl) == VAR_DECL)
11780 {
11781 /* If a variable has a forced alignment to < 2 bytes, mark it
11782 with SYMBOL_FLAG_ALIGN1 to prevent it from being used as LARL
11783 operand. */
11784 if (DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 16)
11785 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
11786 if (!DECL_SIZE (decl)
11787 || !DECL_ALIGN (decl)
11788 || !tree_fits_shwi_p (DECL_SIZE (decl))
11789 || (DECL_ALIGN (decl) <= 64
11790 && DECL_ALIGN (decl) != tree_to_shwi (DECL_SIZE (decl))))
11791 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_NOT_NATURALLY_ALIGNED;
11792 }
11793
11794 /* Literal pool references don't have a decl so they are handled
11795 differently here. We rely on the information in the MEM_ALIGN
11796 entry to decide upon natural alignment. */
11797 if (MEM_P (rtl)
11798 && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF
11799 && TREE_CONSTANT_POOL_ADDRESS_P (XEXP (rtl, 0))
11800 && (MEM_ALIGN (rtl) == 0
11801 || GET_MODE_BITSIZE (GET_MODE (rtl)) == 0
11802 || MEM_ALIGN (rtl) < GET_MODE_BITSIZE (GET_MODE (rtl))))
11803 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_NOT_NATURALLY_ALIGNED;
11804 }
11805
11806 /* Output thunk to FILE that implements a C++ virtual function call (with
11807 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
11808 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
11809 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
11810 relative to the resulting this pointer. */
11811
11812 static void
11813 s390_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
11814 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
11815 tree function)
11816 {
11817 rtx op[10];
11818 int nonlocal = 0;
11819
11820 /* Make sure unwind info is emitted for the thunk if needed. */
11821 final_start_function (emit_barrier (), file, 1);
11822
11823 /* Operand 0 is the target function. */
11824 op[0] = XEXP (DECL_RTL (function), 0);
11825 if (flag_pic && !SYMBOL_REF_LOCAL_P (op[0]))
11826 {
11827 nonlocal = 1;
11828 op[0] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[0]),
11829 TARGET_64BIT ? UNSPEC_PLT : UNSPEC_GOT);
11830 op[0] = gen_rtx_CONST (Pmode, op[0]);
11831 }
11832
11833 /* Operand 1 is the 'this' pointer. */
11834 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
11835 op[1] = gen_rtx_REG (Pmode, 3);
11836 else
11837 op[1] = gen_rtx_REG (Pmode, 2);
11838
11839 /* Operand 2 is the delta. */
11840 op[2] = GEN_INT (delta);
11841
11842 /* Operand 3 is the vcall_offset. */
11843 op[3] = GEN_INT (vcall_offset);
11844
11845 /* Operand 4 is the temporary register. */
11846 op[4] = gen_rtx_REG (Pmode, 1);
11847
11848 /* Operands 5 to 8 can be used as labels. */
11849 op[5] = NULL_RTX;
11850 op[6] = NULL_RTX;
11851 op[7] = NULL_RTX;
11852 op[8] = NULL_RTX;
11853
11854 /* Operand 9 can be used for temporary register. */
11855 op[9] = NULL_RTX;
11856
11857 /* Generate code. */
11858 if (TARGET_64BIT)
11859 {
11860 /* Setup literal pool pointer if required. */
11861 if ((!DISP_IN_RANGE (delta)
11862 && !CONST_OK_FOR_K (delta)
11863 && !CONST_OK_FOR_Os (delta))
11864 || (!DISP_IN_RANGE (vcall_offset)
11865 && !CONST_OK_FOR_K (vcall_offset)
11866 && !CONST_OK_FOR_Os (vcall_offset)))
11867 {
11868 op[5] = gen_label_rtx ();
11869 output_asm_insn ("larl\t%4,%5", op);
11870 }
11871
11872 /* Add DELTA to this pointer. */
11873 if (delta)
11874 {
11875 if (CONST_OK_FOR_J (delta))
11876 output_asm_insn ("la\t%1,%2(%1)", op);
11877 else if (DISP_IN_RANGE (delta))
11878 output_asm_insn ("lay\t%1,%2(%1)", op);
11879 else if (CONST_OK_FOR_K (delta))
11880 output_asm_insn ("aghi\t%1,%2", op);
11881 else if (CONST_OK_FOR_Os (delta))
11882 output_asm_insn ("agfi\t%1,%2", op);
11883 else
11884 {
11885 op[6] = gen_label_rtx ();
11886 output_asm_insn ("agf\t%1,%6-%5(%4)", op);
11887 }
11888 }
11889
11890 /* Perform vcall adjustment. */
11891 if (vcall_offset)
11892 {
11893 if (DISP_IN_RANGE (vcall_offset))
11894 {
11895 output_asm_insn ("lg\t%4,0(%1)", op);
11896 output_asm_insn ("ag\t%1,%3(%4)", op);
11897 }
11898 else if (CONST_OK_FOR_K (vcall_offset))
11899 {
11900 output_asm_insn ("lghi\t%4,%3", op);
11901 output_asm_insn ("ag\t%4,0(%1)", op);
11902 output_asm_insn ("ag\t%1,0(%4)", op);
11903 }
11904 else if (CONST_OK_FOR_Os (vcall_offset))
11905 {
11906 output_asm_insn ("lgfi\t%4,%3", op);
11907 output_asm_insn ("ag\t%4,0(%1)", op);
11908 output_asm_insn ("ag\t%1,0(%4)", op);
11909 }
11910 else
11911 {
11912 op[7] = gen_label_rtx ();
11913 output_asm_insn ("llgf\t%4,%7-%5(%4)", op);
11914 output_asm_insn ("ag\t%4,0(%1)", op);
11915 output_asm_insn ("ag\t%1,0(%4)", op);
11916 }
11917 }
11918
11919 /* Jump to target. */
11920 output_asm_insn ("jg\t%0", op);
11921
11922 /* Output literal pool if required. */
11923 if (op[5])
11924 {
11925 output_asm_insn (".align\t4", op);
11926 targetm.asm_out.internal_label (file, "L",
11927 CODE_LABEL_NUMBER (op[5]));
11928 }
11929 if (op[6])
11930 {
11931 targetm.asm_out.internal_label (file, "L",
11932 CODE_LABEL_NUMBER (op[6]));
11933 output_asm_insn (".long\t%2", op);
11934 }
11935 if (op[7])
11936 {
11937 targetm.asm_out.internal_label (file, "L",
11938 CODE_LABEL_NUMBER (op[7]));
11939 output_asm_insn (".long\t%3", op);
11940 }
11941 }
11942 else
11943 {
11944 /* Setup base pointer if required. */
11945 if (!vcall_offset
11946 || (!DISP_IN_RANGE (delta)
11947 && !CONST_OK_FOR_K (delta)
11948 && !CONST_OK_FOR_Os (delta))
11949 || (!DISP_IN_RANGE (delta)
11950 && !CONST_OK_FOR_K (vcall_offset)
11951 && !CONST_OK_FOR_Os (vcall_offset)))
11952 {
11953 op[5] = gen_label_rtx ();
11954 output_asm_insn ("basr\t%4,0", op);
11955 targetm.asm_out.internal_label (file, "L",
11956 CODE_LABEL_NUMBER (op[5]));
11957 }
11958
11959 /* Add DELTA to this pointer. */
11960 if (delta)
11961 {
11962 if (CONST_OK_FOR_J (delta))
11963 output_asm_insn ("la\t%1,%2(%1)", op);
11964 else if (DISP_IN_RANGE (delta))
11965 output_asm_insn ("lay\t%1,%2(%1)", op);
11966 else if (CONST_OK_FOR_K (delta))
11967 output_asm_insn ("ahi\t%1,%2", op);
11968 else if (CONST_OK_FOR_Os (delta))
11969 output_asm_insn ("afi\t%1,%2", op);
11970 else
11971 {
11972 op[6] = gen_label_rtx ();
11973 output_asm_insn ("a\t%1,%6-%5(%4)", op);
11974 }
11975 }
11976
11977 /* Perform vcall adjustment. */
11978 if (vcall_offset)
11979 {
11980 if (CONST_OK_FOR_J (vcall_offset))
11981 {
11982 output_asm_insn ("l\t%4,0(%1)", op);
11983 output_asm_insn ("a\t%1,%3(%4)", op);
11984 }
11985 else if (DISP_IN_RANGE (vcall_offset))
11986 {
11987 output_asm_insn ("l\t%4,0(%1)", op);
11988 output_asm_insn ("ay\t%1,%3(%4)", op);
11989 }
11990 else if (CONST_OK_FOR_K (vcall_offset))
11991 {
11992 output_asm_insn ("lhi\t%4,%3", op);
11993 output_asm_insn ("a\t%4,0(%1)", op);
11994 output_asm_insn ("a\t%1,0(%4)", op);
11995 }
11996 else if (CONST_OK_FOR_Os (vcall_offset))
11997 {
11998 output_asm_insn ("iilf\t%4,%3", op);
11999 output_asm_insn ("a\t%4,0(%1)", op);
12000 output_asm_insn ("a\t%1,0(%4)", op);
12001 }
12002 else
12003 {
12004 op[7] = gen_label_rtx ();
12005 output_asm_insn ("l\t%4,%7-%5(%4)", op);
12006 output_asm_insn ("a\t%4,0(%1)", op);
12007 output_asm_insn ("a\t%1,0(%4)", op);
12008 }
12009
12010 /* We had to clobber the base pointer register.
12011 Re-setup the base pointer (with a different base). */
12012 op[5] = gen_label_rtx ();
12013 output_asm_insn ("basr\t%4,0", op);
12014 targetm.asm_out.internal_label (file, "L",
12015 CODE_LABEL_NUMBER (op[5]));
12016 }
12017
12018 /* Jump to target. */
12019 op[8] = gen_label_rtx ();
12020
12021 if (!flag_pic)
12022 output_asm_insn ("l\t%4,%8-%5(%4)", op);
12023 else if (!nonlocal)
12024 output_asm_insn ("a\t%4,%8-%5(%4)", op);
12025 /* We cannot call through .plt, since .plt requires %r12 loaded. */
12026 else if (flag_pic == 1)
12027 {
12028 output_asm_insn ("a\t%4,%8-%5(%4)", op);
12029 output_asm_insn ("l\t%4,%0(%4)", op);
12030 }
12031 else if (flag_pic == 2)
12032 {
12033 op[9] = gen_rtx_REG (Pmode, 0);
12034 output_asm_insn ("l\t%9,%8-4-%5(%4)", op);
12035 output_asm_insn ("a\t%4,%8-%5(%4)", op);
12036 output_asm_insn ("ar\t%4,%9", op);
12037 output_asm_insn ("l\t%4,0(%4)", op);
12038 }
12039
12040 output_asm_insn ("br\t%4", op);
12041
12042 /* Output literal pool. */
12043 output_asm_insn (".align\t4", op);
12044
12045 if (nonlocal && flag_pic == 2)
12046 output_asm_insn (".long\t%0", op);
12047 if (nonlocal)
12048 {
12049 op[0] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
12050 SYMBOL_REF_FLAGS (op[0]) = SYMBOL_FLAG_LOCAL;
12051 }
12052
12053 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[8]));
12054 if (!flag_pic)
12055 output_asm_insn (".long\t%0", op);
12056 else
12057 output_asm_insn (".long\t%0-%5", op);
12058
12059 if (op[6])
12060 {
12061 targetm.asm_out.internal_label (file, "L",
12062 CODE_LABEL_NUMBER (op[6]));
12063 output_asm_insn (".long\t%2", op);
12064 }
12065 if (op[7])
12066 {
12067 targetm.asm_out.internal_label (file, "L",
12068 CODE_LABEL_NUMBER (op[7]));
12069 output_asm_insn (".long\t%3", op);
12070 }
12071 }
12072 final_end_function ();
12073 }
12074
12075 static bool
12076 s390_valid_pointer_mode (machine_mode mode)
12077 {
12078 return (mode == SImode || (TARGET_64BIT && mode == DImode));
12079 }
12080
12081 /* Checks whether the given CALL_EXPR would use a caller
12082 saved register. This is used to decide whether sibling call
12083 optimization could be performed on the respective function
12084 call. */
12085
12086 static bool
12087 s390_call_saved_register_used (tree call_expr)
12088 {
12089 CUMULATIVE_ARGS cum_v;
12090 cumulative_args_t cum;
12091 tree parameter;
12092 machine_mode mode;
12093 tree type;
12094 rtx parm_rtx;
12095 int reg, i;
12096
12097 INIT_CUMULATIVE_ARGS (cum_v, NULL, NULL, 0, 0);
12098 cum = pack_cumulative_args (&cum_v);
12099
12100 for (i = 0; i < call_expr_nargs (call_expr); i++)
12101 {
12102 parameter = CALL_EXPR_ARG (call_expr, i);
12103 gcc_assert (parameter);
12104
12105 /* For an undeclared variable passed as parameter we will get
12106 an ERROR_MARK node here. */
12107 if (TREE_CODE (parameter) == ERROR_MARK)
12108 return true;
12109
12110 type = TREE_TYPE (parameter);
12111 gcc_assert (type);
12112
12113 mode = TYPE_MODE (type);
12114 gcc_assert (mode);
12115
12116 /* We assume that in the target function all parameters are
12117 named. This only has an impact on vector argument register
12118 usage none of which is call-saved. */
12119 if (pass_by_reference (&cum_v, mode, type, true))
12120 {
12121 mode = Pmode;
12122 type = build_pointer_type (type);
12123 }
12124
12125 parm_rtx = s390_function_arg (cum, mode, type, true);
12126
12127 s390_function_arg_advance (cum, mode, type, true);
12128
12129 if (!parm_rtx)
12130 continue;
12131
12132 if (REG_P (parm_rtx))
12133 {
12134 for (reg = 0;
12135 reg < HARD_REGNO_NREGS (REGNO (parm_rtx), GET_MODE (parm_rtx));
12136 reg++)
12137 if (!call_used_regs[reg + REGNO (parm_rtx)])
12138 return true;
12139 }
12140
12141 if (GET_CODE (parm_rtx) == PARALLEL)
12142 {
12143 int i;
12144
12145 for (i = 0; i < XVECLEN (parm_rtx, 0); i++)
12146 {
12147 rtx r = XEXP (XVECEXP (parm_rtx, 0, i), 0);
12148
12149 gcc_assert (REG_P (r));
12150
12151 for (reg = 0;
12152 reg < HARD_REGNO_NREGS (REGNO (r), GET_MODE (r));
12153 reg++)
12154 if (!call_used_regs[reg + REGNO (r)])
12155 return true;
12156 }
12157 }
12158
12159 }
12160 return false;
12161 }
12162
12163 /* Return true if the given call expression can be
12164 turned into a sibling call.
12165 DECL holds the declaration of the function to be called whereas
12166 EXP is the call expression itself. */
12167
12168 static bool
12169 s390_function_ok_for_sibcall (tree decl, tree exp)
12170 {
12171 /* The TPF epilogue uses register 1. */
12172 if (TARGET_TPF_PROFILING)
12173 return false;
12174
12175 /* The 31 bit PLT code uses register 12 (GOT pointer - caller saved)
12176 which would have to be restored before the sibcall. */
12177 if (!TARGET_64BIT && flag_pic && decl && !targetm.binds_local_p (decl))
12178 return false;
12179
12180 /* Register 6 on s390 is available as an argument register but unfortunately
12181 "caller saved". This makes functions needing this register for arguments
12182 not suitable for sibcalls. */
12183 return !s390_call_saved_register_used (exp);
12184 }
12185
12186 /* Return the fixed registers used for condition codes. */
12187
12188 static bool
12189 s390_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
12190 {
12191 *p1 = CC_REGNUM;
12192 *p2 = INVALID_REGNUM;
12193
12194 return true;
12195 }
12196
12197 /* This function is used by the call expanders of the machine description.
12198 It emits the call insn itself together with the necessary operations
12199 to adjust the target address and returns the emitted insn.
12200 ADDR_LOCATION is the target address rtx
12201 TLS_CALL the location of the thread-local symbol
12202 RESULT_REG the register where the result of the call should be stored
12203 RETADDR_REG the register where the return address should be stored
12204 If this parameter is NULL_RTX the call is considered
12205 to be a sibling call. */
12206
12207 rtx_insn *
12208 s390_emit_call (rtx addr_location, rtx tls_call, rtx result_reg,
12209 rtx retaddr_reg)
12210 {
12211 bool plt_call = false;
12212 rtx_insn *insn;
12213 rtx call;
12214 rtx clobber;
12215 rtvec vec;
12216
12217 /* Direct function calls need special treatment. */
12218 if (GET_CODE (addr_location) == SYMBOL_REF)
12219 {
12220 /* When calling a global routine in PIC mode, we must
12221 replace the symbol itself with the PLT stub. */
12222 if (flag_pic && !SYMBOL_REF_LOCAL_P (addr_location))
12223 {
12224 if (retaddr_reg != NULL_RTX)
12225 {
12226 addr_location = gen_rtx_UNSPEC (Pmode,
12227 gen_rtvec (1, addr_location),
12228 UNSPEC_PLT);
12229 addr_location = gen_rtx_CONST (Pmode, addr_location);
12230 plt_call = true;
12231 }
12232 else
12233 /* For -fpic code the PLT entries might use r12 which is
12234 call-saved. Therefore we cannot do a sibcall when
12235 calling directly using a symbol ref. When reaching
12236 this point we decided (in s390_function_ok_for_sibcall)
12237 to do a sibcall for a function pointer but one of the
12238 optimizers was able to get rid of the function pointer
12239 by propagating the symbol ref into the call. This
12240 optimization is illegal for S/390 so we turn the direct
12241 call into a indirect call again. */
12242 addr_location = force_reg (Pmode, addr_location);
12243 }
12244
12245 /* Unless we can use the bras(l) insn, force the
12246 routine address into a register. */
12247 if (!TARGET_SMALL_EXEC && !TARGET_CPU_ZARCH)
12248 {
12249 if (flag_pic)
12250 addr_location = legitimize_pic_address (addr_location, 0);
12251 else
12252 addr_location = force_reg (Pmode, addr_location);
12253 }
12254 }
12255
12256 /* If it is already an indirect call or the code above moved the
12257 SYMBOL_REF to somewhere else make sure the address can be found in
12258 register 1. */
12259 if (retaddr_reg == NULL_RTX
12260 && GET_CODE (addr_location) != SYMBOL_REF
12261 && !plt_call)
12262 {
12263 emit_move_insn (gen_rtx_REG (Pmode, SIBCALL_REGNUM), addr_location);
12264 addr_location = gen_rtx_REG (Pmode, SIBCALL_REGNUM);
12265 }
12266
12267 addr_location = gen_rtx_MEM (QImode, addr_location);
12268 call = gen_rtx_CALL (VOIDmode, addr_location, const0_rtx);
12269
12270 if (result_reg != NULL_RTX)
12271 call = gen_rtx_SET (result_reg, call);
12272
12273 if (retaddr_reg != NULL_RTX)
12274 {
12275 clobber = gen_rtx_CLOBBER (VOIDmode, retaddr_reg);
12276
12277 if (tls_call != NULL_RTX)
12278 vec = gen_rtvec (3, call, clobber,
12279 gen_rtx_USE (VOIDmode, tls_call));
12280 else
12281 vec = gen_rtvec (2, call, clobber);
12282
12283 call = gen_rtx_PARALLEL (VOIDmode, vec);
12284 }
12285
12286 insn = emit_call_insn (call);
12287
12288 /* 31-bit PLT stubs and tls calls use the GOT register implicitly. */
12289 if ((!TARGET_64BIT && plt_call) || tls_call != NULL_RTX)
12290 {
12291 /* s390_function_ok_for_sibcall should
12292 have denied sibcalls in this case. */
12293 gcc_assert (retaddr_reg != NULL_RTX);
12294 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, 12));
12295 }
12296 return insn;
12297 }
12298
12299 /* Implement TARGET_CONDITIONAL_REGISTER_USAGE. */
12300
12301 static void
12302 s390_conditional_register_usage (void)
12303 {
12304 int i;
12305
12306 if (flag_pic)
12307 {
12308 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12309 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12310 }
12311 if (TARGET_CPU_ZARCH)
12312 {
12313 fixed_regs[BASE_REGNUM] = 0;
12314 call_used_regs[BASE_REGNUM] = 0;
12315 fixed_regs[RETURN_REGNUM] = 0;
12316 call_used_regs[RETURN_REGNUM] = 0;
12317 }
12318 if (TARGET_64BIT)
12319 {
12320 for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
12321 call_used_regs[i] = call_really_used_regs[i] = 0;
12322 }
12323 else
12324 {
12325 call_used_regs[FPR4_REGNUM] = call_really_used_regs[FPR4_REGNUM] = 0;
12326 call_used_regs[FPR6_REGNUM] = call_really_used_regs[FPR6_REGNUM] = 0;
12327 }
12328
12329 if (TARGET_SOFT_FLOAT)
12330 {
12331 for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
12332 call_used_regs[i] = fixed_regs[i] = 1;
12333 }
12334
12335 /* Disable v16 - v31 for non-vector target. */
12336 if (!TARGET_VX)
12337 {
12338 for (i = VR16_REGNUM; i <= VR31_REGNUM; i++)
12339 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
12340 }
12341 }
12342
12343 /* Corresponding function to eh_return expander. */
12344
12345 static GTY(()) rtx s390_tpf_eh_return_symbol;
12346 void
12347 s390_emit_tpf_eh_return (rtx target)
12348 {
12349 rtx_insn *insn;
12350 rtx reg, orig_ra;
12351
12352 if (!s390_tpf_eh_return_symbol)
12353 s390_tpf_eh_return_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tpf_eh_return");
12354
12355 reg = gen_rtx_REG (Pmode, 2);
12356 orig_ra = gen_rtx_REG (Pmode, 3);
12357
12358 emit_move_insn (reg, target);
12359 emit_move_insn (orig_ra, get_hard_reg_initial_val (Pmode, RETURN_REGNUM));
12360 insn = s390_emit_call (s390_tpf_eh_return_symbol, NULL_RTX, reg,
12361 gen_rtx_REG (Pmode, RETURN_REGNUM));
12362 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), reg);
12363 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), orig_ra);
12364
12365 emit_move_insn (EH_RETURN_HANDLER_RTX, reg);
12366 }
12367
12368 /* Rework the prologue/epilogue to avoid saving/restoring
12369 registers unnecessarily. */
12370
12371 static void
12372 s390_optimize_prologue (void)
12373 {
12374 rtx_insn *insn, *new_insn, *next_insn;
12375
12376 /* Do a final recompute of the frame-related data. */
12377 s390_optimize_register_info ();
12378
12379 /* If all special registers are in fact used, there's nothing we
12380 can do, so no point in walking the insn list. */
12381
12382 if (cfun_frame_layout.first_save_gpr <= BASE_REGNUM
12383 && cfun_frame_layout.last_save_gpr >= BASE_REGNUM
12384 && (TARGET_CPU_ZARCH
12385 || (cfun_frame_layout.first_save_gpr <= RETURN_REGNUM
12386 && cfun_frame_layout.last_save_gpr >= RETURN_REGNUM)))
12387 return;
12388
12389 /* Search for prologue/epilogue insns and replace them. */
12390
12391 for (insn = get_insns (); insn; insn = next_insn)
12392 {
12393 int first, last, off;
12394 rtx set, base, offset;
12395 rtx pat;
12396
12397 next_insn = NEXT_INSN (insn);
12398
12399 if (! NONJUMP_INSN_P (insn) || ! RTX_FRAME_RELATED_P (insn))
12400 continue;
12401
12402 pat = PATTERN (insn);
12403
12404 /* Remove ldgr/lgdr instructions used for saving and restore
12405 GPRs if possible. */
12406 if (TARGET_Z10
12407 && GET_CODE (pat) == SET
12408 && GET_MODE (SET_SRC (pat)) == DImode
12409 && REG_P (SET_SRC (pat))
12410 && REG_P (SET_DEST (pat)))
12411 {
12412 int src_regno = REGNO (SET_SRC (pat));
12413 int dest_regno = REGNO (SET_DEST (pat));
12414 int gpr_regno;
12415 int fpr_regno;
12416
12417 if (!((GENERAL_REGNO_P (src_regno) && FP_REGNO_P (dest_regno))
12418 || (FP_REGNO_P (src_regno) && GENERAL_REGNO_P (dest_regno))))
12419 continue;
12420
12421 gpr_regno = GENERAL_REGNO_P (src_regno) ? src_regno : dest_regno;
12422 fpr_regno = FP_REGNO_P (src_regno) ? src_regno : dest_regno;
12423
12424 /* GPR must be call-saved, FPR must be call-clobbered. */
12425 if (!call_really_used_regs[fpr_regno]
12426 || call_really_used_regs[gpr_regno])
12427 continue;
12428
12429 /* It must not happen that what we once saved in an FPR now
12430 needs a stack slot. */
12431 gcc_assert (cfun_gpr_save_slot (gpr_regno) != -1);
12432
12433 if (cfun_gpr_save_slot (gpr_regno) == 0)
12434 {
12435 remove_insn (insn);
12436 continue;
12437 }
12438 }
12439
12440 if (GET_CODE (pat) == PARALLEL
12441 && store_multiple_operation (pat, VOIDmode))
12442 {
12443 set = XVECEXP (pat, 0, 0);
12444 first = REGNO (SET_SRC (set));
12445 last = first + XVECLEN (pat, 0) - 1;
12446 offset = const0_rtx;
12447 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
12448 off = INTVAL (offset);
12449
12450 if (GET_CODE (base) != REG || off < 0)
12451 continue;
12452 if (cfun_frame_layout.first_save_gpr != -1
12453 && (cfun_frame_layout.first_save_gpr < first
12454 || cfun_frame_layout.last_save_gpr > last))
12455 continue;
12456 if (REGNO (base) != STACK_POINTER_REGNUM
12457 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
12458 continue;
12459 if (first > BASE_REGNUM || last < BASE_REGNUM)
12460 continue;
12461
12462 if (cfun_frame_layout.first_save_gpr != -1)
12463 {
12464 rtx s_pat = save_gprs (base,
12465 off + (cfun_frame_layout.first_save_gpr
12466 - first) * UNITS_PER_LONG,
12467 cfun_frame_layout.first_save_gpr,
12468 cfun_frame_layout.last_save_gpr);
12469 new_insn = emit_insn_before (s_pat, insn);
12470 INSN_ADDRESSES_NEW (new_insn, -1);
12471 }
12472
12473 remove_insn (insn);
12474 continue;
12475 }
12476
12477 if (cfun_frame_layout.first_save_gpr == -1
12478 && GET_CODE (pat) == SET
12479 && GENERAL_REG_P (SET_SRC (pat))
12480 && GET_CODE (SET_DEST (pat)) == MEM)
12481 {
12482 set = pat;
12483 first = REGNO (SET_SRC (set));
12484 offset = const0_rtx;
12485 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
12486 off = INTVAL (offset);
12487
12488 if (GET_CODE (base) != REG || off < 0)
12489 continue;
12490 if (REGNO (base) != STACK_POINTER_REGNUM
12491 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
12492 continue;
12493
12494 remove_insn (insn);
12495 continue;
12496 }
12497
12498 if (GET_CODE (pat) == PARALLEL
12499 && load_multiple_operation (pat, VOIDmode))
12500 {
12501 set = XVECEXP (pat, 0, 0);
12502 first = REGNO (SET_DEST (set));
12503 last = first + XVECLEN (pat, 0) - 1;
12504 offset = const0_rtx;
12505 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
12506 off = INTVAL (offset);
12507
12508 if (GET_CODE (base) != REG || off < 0)
12509 continue;
12510
12511 if (cfun_frame_layout.first_restore_gpr != -1
12512 && (cfun_frame_layout.first_restore_gpr < first
12513 || cfun_frame_layout.last_restore_gpr > last))
12514 continue;
12515 if (REGNO (base) != STACK_POINTER_REGNUM
12516 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
12517 continue;
12518 if (first > BASE_REGNUM || last < BASE_REGNUM)
12519 continue;
12520
12521 if (cfun_frame_layout.first_restore_gpr != -1)
12522 {
12523 rtx rpat = restore_gprs (base,
12524 off + (cfun_frame_layout.first_restore_gpr
12525 - first) * UNITS_PER_LONG,
12526 cfun_frame_layout.first_restore_gpr,
12527 cfun_frame_layout.last_restore_gpr);
12528
12529 /* Remove REG_CFA_RESTOREs for registers that we no
12530 longer need to save. */
12531 REG_NOTES (rpat) = REG_NOTES (insn);
12532 for (rtx *ptr = &REG_NOTES (rpat); *ptr; )
12533 if (REG_NOTE_KIND (*ptr) == REG_CFA_RESTORE
12534 && ((int) REGNO (XEXP (*ptr, 0))
12535 < cfun_frame_layout.first_restore_gpr))
12536 *ptr = XEXP (*ptr, 1);
12537 else
12538 ptr = &XEXP (*ptr, 1);
12539 new_insn = emit_insn_before (rpat, insn);
12540 RTX_FRAME_RELATED_P (new_insn) = 1;
12541 INSN_ADDRESSES_NEW (new_insn, -1);
12542 }
12543
12544 remove_insn (insn);
12545 continue;
12546 }
12547
12548 if (cfun_frame_layout.first_restore_gpr == -1
12549 && GET_CODE (pat) == SET
12550 && GENERAL_REG_P (SET_DEST (pat))
12551 && GET_CODE (SET_SRC (pat)) == MEM)
12552 {
12553 set = pat;
12554 first = REGNO (SET_DEST (set));
12555 offset = const0_rtx;
12556 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
12557 off = INTVAL (offset);
12558
12559 if (GET_CODE (base) != REG || off < 0)
12560 continue;
12561
12562 if (REGNO (base) != STACK_POINTER_REGNUM
12563 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
12564 continue;
12565
12566 remove_insn (insn);
12567 continue;
12568 }
12569 }
12570 }
12571
12572 /* On z10 and later the dynamic branch prediction must see the
12573 backward jump within a certain windows. If not it falls back to
12574 the static prediction. This function rearranges the loop backward
12575 branch in a way which makes the static prediction always correct.
12576 The function returns true if it added an instruction. */
12577 static bool
12578 s390_fix_long_loop_prediction (rtx_insn *insn)
12579 {
12580 rtx set = single_set (insn);
12581 rtx code_label, label_ref, new_label;
12582 rtx_insn *uncond_jump;
12583 rtx_insn *cur_insn;
12584 rtx tmp;
12585 int distance;
12586
12587 /* This will exclude branch on count and branch on index patterns
12588 since these are correctly statically predicted. */
12589 if (!set
12590 || SET_DEST (set) != pc_rtx
12591 || GET_CODE (SET_SRC(set)) != IF_THEN_ELSE)
12592 return false;
12593
12594 /* Skip conditional returns. */
12595 if (ANY_RETURN_P (XEXP (SET_SRC (set), 1))
12596 && XEXP (SET_SRC (set), 2) == pc_rtx)
12597 return false;
12598
12599 label_ref = (GET_CODE (XEXP (SET_SRC (set), 1)) == LABEL_REF ?
12600 XEXP (SET_SRC (set), 1) : XEXP (SET_SRC (set), 2));
12601
12602 gcc_assert (GET_CODE (label_ref) == LABEL_REF);
12603
12604 code_label = XEXP (label_ref, 0);
12605
12606 if (INSN_ADDRESSES (INSN_UID (code_label)) == -1
12607 || INSN_ADDRESSES (INSN_UID (insn)) == -1
12608 || (INSN_ADDRESSES (INSN_UID (insn))
12609 - INSN_ADDRESSES (INSN_UID (code_label)) < PREDICT_DISTANCE))
12610 return false;
12611
12612 for (distance = 0, cur_insn = PREV_INSN (insn);
12613 distance < PREDICT_DISTANCE - 6;
12614 distance += get_attr_length (cur_insn), cur_insn = PREV_INSN (cur_insn))
12615 if (!cur_insn || JUMP_P (cur_insn) || LABEL_P (cur_insn))
12616 return false;
12617
12618 new_label = gen_label_rtx ();
12619 uncond_jump = emit_jump_insn_after (
12620 gen_rtx_SET (pc_rtx,
12621 gen_rtx_LABEL_REF (VOIDmode, code_label)),
12622 insn);
12623 emit_label_after (new_label, uncond_jump);
12624
12625 tmp = XEXP (SET_SRC (set), 1);
12626 XEXP (SET_SRC (set), 1) = XEXP (SET_SRC (set), 2);
12627 XEXP (SET_SRC (set), 2) = tmp;
12628 INSN_CODE (insn) = -1;
12629
12630 XEXP (label_ref, 0) = new_label;
12631 JUMP_LABEL (insn) = new_label;
12632 JUMP_LABEL (uncond_jump) = code_label;
12633
12634 return true;
12635 }
12636
12637 /* Returns 1 if INSN reads the value of REG for purposes not related
12638 to addressing of memory, and 0 otherwise. */
12639 static int
12640 s390_non_addr_reg_read_p (rtx reg, rtx_insn *insn)
12641 {
12642 return reg_referenced_p (reg, PATTERN (insn))
12643 && !reg_used_in_mem_p (REGNO (reg), PATTERN (insn));
12644 }
12645
12646 /* Starting from INSN find_cond_jump looks downwards in the insn
12647 stream for a single jump insn which is the last user of the
12648 condition code set in INSN. */
12649 static rtx_insn *
12650 find_cond_jump (rtx_insn *insn)
12651 {
12652 for (; insn; insn = NEXT_INSN (insn))
12653 {
12654 rtx ite, cc;
12655
12656 if (LABEL_P (insn))
12657 break;
12658
12659 if (!JUMP_P (insn))
12660 {
12661 if (reg_mentioned_p (gen_rtx_REG (CCmode, CC_REGNUM), insn))
12662 break;
12663 continue;
12664 }
12665
12666 /* This will be triggered by a return. */
12667 if (GET_CODE (PATTERN (insn)) != SET)
12668 break;
12669
12670 gcc_assert (SET_DEST (PATTERN (insn)) == pc_rtx);
12671 ite = SET_SRC (PATTERN (insn));
12672
12673 if (GET_CODE (ite) != IF_THEN_ELSE)
12674 break;
12675
12676 cc = XEXP (XEXP (ite, 0), 0);
12677 if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc)))
12678 break;
12679
12680 if (find_reg_note (insn, REG_DEAD, cc))
12681 return insn;
12682 break;
12683 }
12684
12685 return NULL;
12686 }
12687
12688 /* Swap the condition in COND and the operands in OP0 and OP1 so that
12689 the semantics does not change. If NULL_RTX is passed as COND the
12690 function tries to find the conditional jump starting with INSN. */
12691 static void
12692 s390_swap_cmp (rtx cond, rtx *op0, rtx *op1, rtx_insn *insn)
12693 {
12694 rtx tmp = *op0;
12695
12696 if (cond == NULL_RTX)
12697 {
12698 rtx_insn *jump = find_cond_jump (NEXT_INSN (insn));
12699 rtx set = jump ? single_set (jump) : NULL_RTX;
12700
12701 if (set == NULL_RTX)
12702 return;
12703
12704 cond = XEXP (SET_SRC (set), 0);
12705 }
12706
12707 *op0 = *op1;
12708 *op1 = tmp;
12709 PUT_CODE (cond, swap_condition (GET_CODE (cond)));
12710 }
12711
12712 /* On z10, instructions of the compare-and-branch family have the
12713 property to access the register occurring as second operand with
12714 its bits complemented. If such a compare is grouped with a second
12715 instruction that accesses the same register non-complemented, and
12716 if that register's value is delivered via a bypass, then the
12717 pipeline recycles, thereby causing significant performance decline.
12718 This function locates such situations and exchanges the two
12719 operands of the compare. The function return true whenever it
12720 added an insn. */
12721 static bool
12722 s390_z10_optimize_cmp (rtx_insn *insn)
12723 {
12724 rtx_insn *prev_insn, *next_insn;
12725 bool insn_added_p = false;
12726 rtx cond, *op0, *op1;
12727
12728 if (GET_CODE (PATTERN (insn)) == PARALLEL)
12729 {
12730 /* Handle compare and branch and branch on count
12731 instructions. */
12732 rtx pattern = single_set (insn);
12733
12734 if (!pattern
12735 || SET_DEST (pattern) != pc_rtx
12736 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE)
12737 return false;
12738
12739 cond = XEXP (SET_SRC (pattern), 0);
12740 op0 = &XEXP (cond, 0);
12741 op1 = &XEXP (cond, 1);
12742 }
12743 else if (GET_CODE (PATTERN (insn)) == SET)
12744 {
12745 rtx src, dest;
12746
12747 /* Handle normal compare instructions. */
12748 src = SET_SRC (PATTERN (insn));
12749 dest = SET_DEST (PATTERN (insn));
12750
12751 if (!REG_P (dest)
12752 || !CC_REGNO_P (REGNO (dest))
12753 || GET_CODE (src) != COMPARE)
12754 return false;
12755
12756 /* s390_swap_cmp will try to find the conditional
12757 jump when passing NULL_RTX as condition. */
12758 cond = NULL_RTX;
12759 op0 = &XEXP (src, 0);
12760 op1 = &XEXP (src, 1);
12761 }
12762 else
12763 return false;
12764
12765 if (!REG_P (*op0) || !REG_P (*op1))
12766 return false;
12767
12768 if (GET_MODE_CLASS (GET_MODE (*op0)) != MODE_INT)
12769 return false;
12770
12771 /* Swap the COMPARE arguments and its mask if there is a
12772 conflicting access in the previous insn. */
12773 prev_insn = prev_active_insn (insn);
12774 if (prev_insn != NULL_RTX && INSN_P (prev_insn)
12775 && reg_referenced_p (*op1, PATTERN (prev_insn)))
12776 s390_swap_cmp (cond, op0, op1, insn);
12777
12778 /* Check if there is a conflict with the next insn. If there
12779 was no conflict with the previous insn, then swap the
12780 COMPARE arguments and its mask. If we already swapped
12781 the operands, or if swapping them would cause a conflict
12782 with the previous insn, issue a NOP after the COMPARE in
12783 order to separate the two instuctions. */
12784 next_insn = next_active_insn (insn);
12785 if (next_insn != NULL_RTX && INSN_P (next_insn)
12786 && s390_non_addr_reg_read_p (*op1, next_insn))
12787 {
12788 if (prev_insn != NULL_RTX && INSN_P (prev_insn)
12789 && s390_non_addr_reg_read_p (*op0, prev_insn))
12790 {
12791 if (REGNO (*op1) == 0)
12792 emit_insn_after (gen_nop1 (), insn);
12793 else
12794 emit_insn_after (gen_nop (), insn);
12795 insn_added_p = true;
12796 }
12797 else
12798 s390_swap_cmp (cond, op0, op1, insn);
12799 }
12800 return insn_added_p;
12801 }
12802
12803 /* Perform machine-dependent processing. */
12804
12805 static void
12806 s390_reorg (void)
12807 {
12808 bool pool_overflow = false;
12809 int hw_before, hw_after;
12810
12811 /* Make sure all splits have been performed; splits after
12812 machine_dependent_reorg might confuse insn length counts. */
12813 split_all_insns_noflow ();
12814
12815 /* Install the main literal pool and the associated base
12816 register load insns.
12817
12818 In addition, there are two problematic situations we need
12819 to correct:
12820
12821 - the literal pool might be > 4096 bytes in size, so that
12822 some of its elements cannot be directly accessed
12823
12824 - a branch target might be > 64K away from the branch, so that
12825 it is not possible to use a PC-relative instruction.
12826
12827 To fix those, we split the single literal pool into multiple
12828 pool chunks, reloading the pool base register at various
12829 points throughout the function to ensure it always points to
12830 the pool chunk the following code expects, and / or replace
12831 PC-relative branches by absolute branches.
12832
12833 However, the two problems are interdependent: splitting the
12834 literal pool can move a branch further away from its target,
12835 causing the 64K limit to overflow, and on the other hand,
12836 replacing a PC-relative branch by an absolute branch means
12837 we need to put the branch target address into the literal
12838 pool, possibly causing it to overflow.
12839
12840 So, we loop trying to fix up both problems until we manage
12841 to satisfy both conditions at the same time. Note that the
12842 loop is guaranteed to terminate as every pass of the loop
12843 strictly decreases the total number of PC-relative branches
12844 in the function. (This is not completely true as there
12845 might be branch-over-pool insns introduced by chunkify_start.
12846 Those never need to be split however.) */
12847
12848 for (;;)
12849 {
12850 struct constant_pool *pool = NULL;
12851
12852 /* Collect the literal pool. */
12853 if (!pool_overflow)
12854 {
12855 pool = s390_mainpool_start ();
12856 if (!pool)
12857 pool_overflow = true;
12858 }
12859
12860 /* If literal pool overflowed, start to chunkify it. */
12861 if (pool_overflow)
12862 pool = s390_chunkify_start ();
12863
12864 /* Split out-of-range branches. If this has created new
12865 literal pool entries, cancel current chunk list and
12866 recompute it. zSeries machines have large branch
12867 instructions, so we never need to split a branch. */
12868 if (!TARGET_CPU_ZARCH && s390_split_branches ())
12869 {
12870 if (pool_overflow)
12871 s390_chunkify_cancel (pool);
12872 else
12873 s390_mainpool_cancel (pool);
12874
12875 continue;
12876 }
12877
12878 /* If we made it up to here, both conditions are satisfied.
12879 Finish up literal pool related changes. */
12880 if (pool_overflow)
12881 s390_chunkify_finish (pool);
12882 else
12883 s390_mainpool_finish (pool);
12884
12885 /* We're done splitting branches. */
12886 cfun->machine->split_branches_pending_p = false;
12887 break;
12888 }
12889
12890 /* Generate out-of-pool execute target insns. */
12891 if (TARGET_CPU_ZARCH)
12892 {
12893 rtx_insn *insn, *target;
12894 rtx label;
12895
12896 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
12897 {
12898 label = s390_execute_label (insn);
12899 if (!label)
12900 continue;
12901
12902 gcc_assert (label != const0_rtx);
12903
12904 target = emit_label (XEXP (label, 0));
12905 INSN_ADDRESSES_NEW (target, -1);
12906
12907 target = emit_insn (s390_execute_target (insn));
12908 INSN_ADDRESSES_NEW (target, -1);
12909 }
12910 }
12911
12912 /* Try to optimize prologue and epilogue further. */
12913 s390_optimize_prologue ();
12914
12915 /* Walk over the insns and do some >=z10 specific changes. */
12916 if (s390_tune == PROCESSOR_2097_Z10
12917 || s390_tune == PROCESSOR_2817_Z196
12918 || s390_tune == PROCESSOR_2827_ZEC12
12919 || s390_tune == PROCESSOR_2964_Z13)
12920 {
12921 rtx_insn *insn;
12922 bool insn_added_p = false;
12923
12924 /* The insn lengths and addresses have to be up to date for the
12925 following manipulations. */
12926 shorten_branches (get_insns ());
12927
12928 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
12929 {
12930 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
12931 continue;
12932
12933 if (JUMP_P (insn))
12934 insn_added_p |= s390_fix_long_loop_prediction (insn);
12935
12936 if ((GET_CODE (PATTERN (insn)) == PARALLEL
12937 || GET_CODE (PATTERN (insn)) == SET)
12938 && s390_tune == PROCESSOR_2097_Z10)
12939 insn_added_p |= s390_z10_optimize_cmp (insn);
12940 }
12941
12942 /* Adjust branches if we added new instructions. */
12943 if (insn_added_p)
12944 shorten_branches (get_insns ());
12945 }
12946
12947 s390_function_num_hotpatch_hw (current_function_decl, &hw_before, &hw_after);
12948 if (hw_after > 0)
12949 {
12950 rtx_insn *insn;
12951
12952 /* Insert NOPs for hotpatching. */
12953 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
12954 /* Emit NOPs
12955 1. inside the area covered by debug information to allow setting
12956 breakpoints at the NOPs,
12957 2. before any insn which results in an asm instruction,
12958 3. before in-function labels to avoid jumping to the NOPs, for
12959 example as part of a loop,
12960 4. before any barrier in case the function is completely empty
12961 (__builtin_unreachable ()) and has neither internal labels nor
12962 active insns.
12963 */
12964 if (active_insn_p (insn) || BARRIER_P (insn) || LABEL_P (insn))
12965 break;
12966 /* Output a series of NOPs before the first active insn. */
12967 while (insn && hw_after > 0)
12968 {
12969 if (hw_after >= 3 && TARGET_CPU_ZARCH)
12970 {
12971 emit_insn_before (gen_nop_6_byte (), insn);
12972 hw_after -= 3;
12973 }
12974 else if (hw_after >= 2)
12975 {
12976 emit_insn_before (gen_nop_4_byte (), insn);
12977 hw_after -= 2;
12978 }
12979 else
12980 {
12981 emit_insn_before (gen_nop_2_byte (), insn);
12982 hw_after -= 1;
12983 }
12984 }
12985 }
12986 }
12987
12988 /* Return true if INSN is a fp load insn writing register REGNO. */
12989 static inline bool
12990 s390_fpload_toreg (rtx_insn *insn, unsigned int regno)
12991 {
12992 rtx set;
12993 enum attr_type flag = s390_safe_attr_type (insn);
12994
12995 if (flag != TYPE_FLOADSF && flag != TYPE_FLOADDF)
12996 return false;
12997
12998 set = single_set (insn);
12999
13000 if (set == NULL_RTX)
13001 return false;
13002
13003 if (!REG_P (SET_DEST (set)) || !MEM_P (SET_SRC (set)))
13004 return false;
13005
13006 if (REGNO (SET_DEST (set)) != regno)
13007 return false;
13008
13009 return true;
13010 }
13011
13012 /* This value describes the distance to be avoided between an
13013 aritmetic fp instruction and an fp load writing the same register.
13014 Z10_EARLYLOAD_DISTANCE - 1 as well as Z10_EARLYLOAD_DISTANCE + 1 is
13015 fine but the exact value has to be avoided. Otherwise the FP
13016 pipeline will throw an exception causing a major penalty. */
13017 #define Z10_EARLYLOAD_DISTANCE 7
13018
13019 /* Rearrange the ready list in order to avoid the situation described
13020 for Z10_EARLYLOAD_DISTANCE. A problematic load instruction is
13021 moved to the very end of the ready list. */
13022 static void
13023 s390_z10_prevent_earlyload_conflicts (rtx_insn **ready, int *nready_p)
13024 {
13025 unsigned int regno;
13026 int nready = *nready_p;
13027 rtx_insn *tmp;
13028 int i;
13029 rtx_insn *insn;
13030 rtx set;
13031 enum attr_type flag;
13032 int distance;
13033
13034 /* Skip DISTANCE - 1 active insns. */
13035 for (insn = last_scheduled_insn, distance = Z10_EARLYLOAD_DISTANCE - 1;
13036 distance > 0 && insn != NULL_RTX;
13037 distance--, insn = prev_active_insn (insn))
13038 if (CALL_P (insn) || JUMP_P (insn))
13039 return;
13040
13041 if (insn == NULL_RTX)
13042 return;
13043
13044 set = single_set (insn);
13045
13046 if (set == NULL_RTX || !REG_P (SET_DEST (set))
13047 || GET_MODE_CLASS (GET_MODE (SET_DEST (set))) != MODE_FLOAT)
13048 return;
13049
13050 flag = s390_safe_attr_type (insn);
13051
13052 if (flag == TYPE_FLOADSF || flag == TYPE_FLOADDF)
13053 return;
13054
13055 regno = REGNO (SET_DEST (set));
13056 i = nready - 1;
13057
13058 while (!s390_fpload_toreg (ready[i], regno) && i > 0)
13059 i--;
13060
13061 if (!i)
13062 return;
13063
13064 tmp = ready[i];
13065 memmove (&ready[1], &ready[0], sizeof (rtx_insn *) * i);
13066 ready[0] = tmp;
13067 }
13068
13069
13070 /* The s390_sched_state variable tracks the state of the current or
13071 the last instruction group.
13072
13073 0,1,2 number of instructions scheduled in the current group
13074 3 the last group is complete - normal insns
13075 4 the last group was a cracked/expanded insn */
13076
13077 static int s390_sched_state;
13078
13079 #define S390_OOO_SCHED_STATE_NORMAL 3
13080 #define S390_OOO_SCHED_STATE_CRACKED 4
13081
13082 #define S390_OOO_SCHED_ATTR_MASK_CRACKED 0x1
13083 #define S390_OOO_SCHED_ATTR_MASK_EXPANDED 0x2
13084 #define S390_OOO_SCHED_ATTR_MASK_ENDGROUP 0x4
13085 #define S390_OOO_SCHED_ATTR_MASK_GROUPALONE 0x8
13086
13087 static unsigned int
13088 s390_get_sched_attrmask (rtx_insn *insn)
13089 {
13090 unsigned int mask = 0;
13091
13092 if (get_attr_ooo_cracked (insn))
13093 mask |= S390_OOO_SCHED_ATTR_MASK_CRACKED;
13094 if (get_attr_ooo_expanded (insn))
13095 mask |= S390_OOO_SCHED_ATTR_MASK_EXPANDED;
13096 if (get_attr_ooo_endgroup (insn))
13097 mask |= S390_OOO_SCHED_ATTR_MASK_ENDGROUP;
13098 if (get_attr_ooo_groupalone (insn))
13099 mask |= S390_OOO_SCHED_ATTR_MASK_GROUPALONE;
13100 return mask;
13101 }
13102
13103 /* Return the scheduling score for INSN. The higher the score the
13104 better. The score is calculated from the OOO scheduling attributes
13105 of INSN and the scheduling state s390_sched_state. */
13106 static int
13107 s390_sched_score (rtx_insn *insn)
13108 {
13109 unsigned int mask = s390_get_sched_attrmask (insn);
13110 int score = 0;
13111
13112 switch (s390_sched_state)
13113 {
13114 case 0:
13115 /* Try to put insns into the first slot which would otherwise
13116 break a group. */
13117 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) != 0
13118 || (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) != 0)
13119 score += 5;
13120 if ((mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) != 0)
13121 score += 10;
13122 case 1:
13123 /* Prefer not cracked insns while trying to put together a
13124 group. */
13125 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) == 0
13126 && (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) == 0
13127 && (mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) == 0)
13128 score += 10;
13129 if ((mask & S390_OOO_SCHED_ATTR_MASK_ENDGROUP) == 0)
13130 score += 5;
13131 break;
13132 case 2:
13133 /* Prefer not cracked insns while trying to put together a
13134 group. */
13135 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) == 0
13136 && (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) == 0
13137 && (mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) == 0)
13138 score += 10;
13139 /* Prefer endgroup insns in the last slot. */
13140 if ((mask & S390_OOO_SCHED_ATTR_MASK_ENDGROUP) != 0)
13141 score += 10;
13142 break;
13143 case S390_OOO_SCHED_STATE_NORMAL:
13144 /* Prefer not cracked insns if the last was not cracked. */
13145 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) == 0
13146 && (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) == 0)
13147 score += 5;
13148 if ((mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) != 0)
13149 score += 10;
13150 break;
13151 case S390_OOO_SCHED_STATE_CRACKED:
13152 /* Try to keep cracked insns together to prevent them from
13153 interrupting groups. */
13154 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) != 0
13155 || (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) != 0)
13156 score += 5;
13157 break;
13158 }
13159 return score;
13160 }
13161
13162 /* This function is called via hook TARGET_SCHED_REORDER before
13163 issuing one insn from list READY which contains *NREADYP entries.
13164 For target z10 it reorders load instructions to avoid early load
13165 conflicts in the floating point pipeline */
13166 static int
13167 s390_sched_reorder (FILE *file, int verbose,
13168 rtx_insn **ready, int *nreadyp, int clock ATTRIBUTE_UNUSED)
13169 {
13170 if (s390_tune == PROCESSOR_2097_Z10)
13171 if (reload_completed && *nreadyp > 1)
13172 s390_z10_prevent_earlyload_conflicts (ready, nreadyp);
13173
13174 if ((s390_tune == PROCESSOR_2827_ZEC12
13175 || s390_tune == PROCESSOR_2964_Z13)
13176 && reload_completed
13177 && *nreadyp > 1)
13178 {
13179 int i;
13180 int last_index = *nreadyp - 1;
13181 int max_index = -1;
13182 int max_score = -1;
13183 rtx_insn *tmp;
13184
13185 /* Just move the insn with the highest score to the top (the
13186 end) of the list. A full sort is not needed since a conflict
13187 in the hazard recognition cannot happen. So the top insn in
13188 the ready list will always be taken. */
13189 for (i = last_index; i >= 0; i--)
13190 {
13191 int score;
13192
13193 if (recog_memoized (ready[i]) < 0)
13194 continue;
13195
13196 score = s390_sched_score (ready[i]);
13197 if (score > max_score)
13198 {
13199 max_score = score;
13200 max_index = i;
13201 }
13202 }
13203
13204 if (max_index != -1)
13205 {
13206 if (max_index != last_index)
13207 {
13208 tmp = ready[max_index];
13209 ready[max_index] = ready[last_index];
13210 ready[last_index] = tmp;
13211
13212 if (verbose > 5)
13213 fprintf (file,
13214 "move insn %d to the top of list\n",
13215 INSN_UID (ready[last_index]));
13216 }
13217 else if (verbose > 5)
13218 fprintf (file,
13219 "best insn %d already on top\n",
13220 INSN_UID (ready[last_index]));
13221 }
13222
13223 if (verbose > 5)
13224 {
13225 fprintf (file, "ready list ooo attributes - sched state: %d\n",
13226 s390_sched_state);
13227
13228 for (i = last_index; i >= 0; i--)
13229 {
13230 if (recog_memoized (ready[i]) < 0)
13231 continue;
13232 fprintf (file, "insn %d score: %d: ", INSN_UID (ready[i]),
13233 s390_sched_score (ready[i]));
13234 #define PRINT_OOO_ATTR(ATTR) fprintf (file, "%s ", get_attr_##ATTR (ready[i]) ? #ATTR : "!" #ATTR);
13235 PRINT_OOO_ATTR (ooo_cracked);
13236 PRINT_OOO_ATTR (ooo_expanded);
13237 PRINT_OOO_ATTR (ooo_endgroup);
13238 PRINT_OOO_ATTR (ooo_groupalone);
13239 #undef PRINT_OOO_ATTR
13240 fprintf (file, "\n");
13241 }
13242 }
13243 }
13244
13245 return s390_issue_rate ();
13246 }
13247
13248
13249 /* This function is called via hook TARGET_SCHED_VARIABLE_ISSUE after
13250 the scheduler has issued INSN. It stores the last issued insn into
13251 last_scheduled_insn in order to make it available for
13252 s390_sched_reorder. */
13253 static int
13254 s390_sched_variable_issue (FILE *file, int verbose, rtx_insn *insn, int more)
13255 {
13256 last_scheduled_insn = insn;
13257
13258 if ((s390_tune == PROCESSOR_2827_ZEC12
13259 || s390_tune == PROCESSOR_2964_Z13)
13260 && reload_completed
13261 && recog_memoized (insn) >= 0)
13262 {
13263 unsigned int mask = s390_get_sched_attrmask (insn);
13264
13265 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) != 0
13266 || (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) != 0)
13267 s390_sched_state = S390_OOO_SCHED_STATE_CRACKED;
13268 else if ((mask & S390_OOO_SCHED_ATTR_MASK_ENDGROUP) != 0
13269 || (mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) != 0)
13270 s390_sched_state = S390_OOO_SCHED_STATE_NORMAL;
13271 else
13272 {
13273 /* Only normal insns are left (mask == 0). */
13274 switch (s390_sched_state)
13275 {
13276 case 0:
13277 case 1:
13278 case 2:
13279 case S390_OOO_SCHED_STATE_NORMAL:
13280 if (s390_sched_state == S390_OOO_SCHED_STATE_NORMAL)
13281 s390_sched_state = 1;
13282 else
13283 s390_sched_state++;
13284
13285 break;
13286 case S390_OOO_SCHED_STATE_CRACKED:
13287 s390_sched_state = S390_OOO_SCHED_STATE_NORMAL;
13288 break;
13289 }
13290 }
13291 if (verbose > 5)
13292 {
13293 fprintf (file, "insn %d: ", INSN_UID (insn));
13294 #define PRINT_OOO_ATTR(ATTR) \
13295 fprintf (file, "%s ", get_attr_##ATTR (insn) ? #ATTR : "");
13296 PRINT_OOO_ATTR (ooo_cracked);
13297 PRINT_OOO_ATTR (ooo_expanded);
13298 PRINT_OOO_ATTR (ooo_endgroup);
13299 PRINT_OOO_ATTR (ooo_groupalone);
13300 #undef PRINT_OOO_ATTR
13301 fprintf (file, "\n");
13302 fprintf (file, "sched state: %d\n", s390_sched_state);
13303 }
13304 }
13305
13306 if (GET_CODE (PATTERN (insn)) != USE
13307 && GET_CODE (PATTERN (insn)) != CLOBBER)
13308 return more - 1;
13309 else
13310 return more;
13311 }
13312
13313 static void
13314 s390_sched_init (FILE *file ATTRIBUTE_UNUSED,
13315 int verbose ATTRIBUTE_UNUSED,
13316 int max_ready ATTRIBUTE_UNUSED)
13317 {
13318 last_scheduled_insn = NULL;
13319 s390_sched_state = 0;
13320 }
13321
13322 /* This target hook implementation for TARGET_LOOP_UNROLL_ADJUST calculates
13323 a new number struct loop *loop should be unrolled if tuned for cpus with
13324 a built-in stride prefetcher.
13325 The loop is analyzed for memory accesses by calling check_dpu for
13326 each rtx of the loop. Depending on the loop_depth and the amount of
13327 memory accesses a new number <=nunroll is returned to improve the
13328 behaviour of the hardware prefetch unit. */
13329 static unsigned
13330 s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
13331 {
13332 basic_block *bbs;
13333 rtx_insn *insn;
13334 unsigned i;
13335 unsigned mem_count = 0;
13336
13337 if (s390_tune != PROCESSOR_2097_Z10
13338 && s390_tune != PROCESSOR_2817_Z196
13339 && s390_tune != PROCESSOR_2827_ZEC12
13340 && s390_tune != PROCESSOR_2964_Z13)
13341 return nunroll;
13342
13343 /* Count the number of memory references within the loop body. */
13344 bbs = get_loop_body (loop);
13345 subrtx_iterator::array_type array;
13346 for (i = 0; i < loop->num_nodes; i++)
13347 FOR_BB_INSNS (bbs[i], insn)
13348 if (INSN_P (insn) && INSN_CODE (insn) != -1)
13349 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
13350 if (MEM_P (*iter))
13351 mem_count += 1;
13352 free (bbs);
13353
13354 /* Prevent division by zero, and we do not need to adjust nunroll in this case. */
13355 if (mem_count == 0)
13356 return nunroll;
13357
13358 switch (loop_depth(loop))
13359 {
13360 case 1:
13361 return MIN (nunroll, 28 / mem_count);
13362 case 2:
13363 return MIN (nunroll, 22 / mem_count);
13364 default:
13365 return MIN (nunroll, 16 / mem_count);
13366 }
13367 }
13368
13369 static void
13370 s390_option_override (void)
13371 {
13372 unsigned int i;
13373 cl_deferred_option *opt;
13374 vec<cl_deferred_option> *v =
13375 (vec<cl_deferred_option> *) s390_deferred_options;
13376
13377 if (v)
13378 FOR_EACH_VEC_ELT (*v, i, opt)
13379 {
13380 switch (opt->opt_index)
13381 {
13382 case OPT_mhotpatch_:
13383 {
13384 int val1;
13385 int val2;
13386 char s[256];
13387 char *t;
13388
13389 strncpy (s, opt->arg, 256);
13390 s[255] = 0;
13391 t = strchr (s, ',');
13392 if (t != NULL)
13393 {
13394 *t = 0;
13395 t++;
13396 val1 = integral_argument (s);
13397 val2 = integral_argument (t);
13398 }
13399 else
13400 {
13401 val1 = -1;
13402 val2 = -1;
13403 }
13404 if (val1 == -1 || val2 == -1)
13405 {
13406 /* argument is not a plain number */
13407 error ("arguments to %qs should be non-negative integers",
13408 "-mhotpatch=n,m");
13409 break;
13410 }
13411 else if (val1 > s390_hotpatch_hw_max
13412 || val2 > s390_hotpatch_hw_max)
13413 {
13414 error ("argument to %qs is too large (max. %d)",
13415 "-mhotpatch=n,m", s390_hotpatch_hw_max);
13416 break;
13417 }
13418 s390_hotpatch_hw_before_label = val1;
13419 s390_hotpatch_hw_after_label = val2;
13420 break;
13421 }
13422 default:
13423 gcc_unreachable ();
13424 }
13425 }
13426
13427 /* Set up function hooks. */
13428 init_machine_status = s390_init_machine_status;
13429
13430 /* Architecture mode defaults according to ABI. */
13431 if (!(target_flags_explicit & MASK_ZARCH))
13432 {
13433 if (TARGET_64BIT)
13434 target_flags |= MASK_ZARCH;
13435 else
13436 target_flags &= ~MASK_ZARCH;
13437 }
13438
13439 /* Set the march default in case it hasn't been specified on
13440 cmdline. */
13441 if (s390_arch == PROCESSOR_max)
13442 {
13443 s390_arch_string = TARGET_ZARCH? "z900" : "g5";
13444 s390_arch = TARGET_ZARCH ? PROCESSOR_2064_Z900 : PROCESSOR_9672_G5;
13445 s390_arch_flags = processor_flags_table[(int)s390_arch];
13446 }
13447
13448 /* Determine processor to tune for. */
13449 if (s390_tune == PROCESSOR_max)
13450 {
13451 s390_tune = s390_arch;
13452 s390_tune_flags = s390_arch_flags;
13453 }
13454
13455 /* Sanity checks. */
13456 if (s390_arch == PROCESSOR_NATIVE || s390_tune == PROCESSOR_NATIVE)
13457 gcc_unreachable ();
13458 if (TARGET_ZARCH && !TARGET_CPU_ZARCH)
13459 error ("z/Architecture mode not supported on %s", s390_arch_string);
13460 if (TARGET_64BIT && !TARGET_ZARCH)
13461 error ("64-bit ABI not supported in ESA/390 mode");
13462
13463 /* Use hardware DFP if available and not explicitly disabled by
13464 user. E.g. with -m31 -march=z10 -mzarch */
13465 if (!(target_flags_explicit & MASK_HARD_DFP) && TARGET_DFP)
13466 target_flags |= MASK_HARD_DFP;
13467
13468 /* Enable hardware transactions if available and not explicitly
13469 disabled by user. E.g. with -m31 -march=zEC12 -mzarch */
13470 if (!(target_flags_explicit & MASK_OPT_HTM) && TARGET_CPU_HTM && TARGET_ZARCH)
13471 target_flags |= MASK_OPT_HTM;
13472
13473 if (target_flags_explicit & MASK_OPT_VX)
13474 {
13475 if (TARGET_OPT_VX)
13476 {
13477 if (!TARGET_CPU_VX)
13478 error ("hardware vector support not available on %s",
13479 s390_arch_string);
13480 if (TARGET_SOFT_FLOAT)
13481 error ("hardware vector support not available with -msoft-float");
13482 }
13483 }
13484 else if (TARGET_CPU_VX)
13485 /* Enable vector support if available and not explicitly disabled
13486 by user. E.g. with -m31 -march=z13 -mzarch */
13487 target_flags |= MASK_OPT_VX;
13488
13489 if (TARGET_HARD_DFP && !TARGET_DFP)
13490 {
13491 if (target_flags_explicit & MASK_HARD_DFP)
13492 {
13493 if (!TARGET_CPU_DFP)
13494 error ("hardware decimal floating point instructions"
13495 " not available on %s", s390_arch_string);
13496 if (!TARGET_ZARCH)
13497 error ("hardware decimal floating point instructions"
13498 " not available in ESA/390 mode");
13499 }
13500 else
13501 target_flags &= ~MASK_HARD_DFP;
13502 }
13503
13504 if ((target_flags_explicit & MASK_SOFT_FLOAT) && TARGET_SOFT_FLOAT)
13505 {
13506 if ((target_flags_explicit & MASK_HARD_DFP) && TARGET_HARD_DFP)
13507 error ("-mhard-dfp can%'t be used in conjunction with -msoft-float");
13508
13509 target_flags &= ~MASK_HARD_DFP;
13510 }
13511
13512 /* Set processor cost function. */
13513 switch (s390_tune)
13514 {
13515 case PROCESSOR_2084_Z990:
13516 s390_cost = &z990_cost;
13517 break;
13518 case PROCESSOR_2094_Z9_109:
13519 s390_cost = &z9_109_cost;
13520 break;
13521 case PROCESSOR_2097_Z10:
13522 s390_cost = &z10_cost;
13523 break;
13524 case PROCESSOR_2817_Z196:
13525 s390_cost = &z196_cost;
13526 break;
13527 case PROCESSOR_2827_ZEC12:
13528 case PROCESSOR_2964_Z13:
13529 s390_cost = &zEC12_cost;
13530 break;
13531 default:
13532 s390_cost = &z900_cost;
13533 }
13534
13535 if (TARGET_BACKCHAIN && TARGET_PACKED_STACK && TARGET_HARD_FLOAT)
13536 error ("-mbackchain -mpacked-stack -mhard-float are not supported "
13537 "in combination");
13538
13539 if (s390_stack_size)
13540 {
13541 if (s390_stack_guard >= s390_stack_size)
13542 error ("stack size must be greater than the stack guard value");
13543 else if (s390_stack_size > 1 << 16)
13544 error ("stack size must not be greater than 64k");
13545 }
13546 else if (s390_stack_guard)
13547 error ("-mstack-guard implies use of -mstack-size");
13548
13549 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
13550 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
13551 target_flags |= MASK_LONG_DOUBLE_128;
13552 #endif
13553
13554 if (s390_tune == PROCESSOR_2097_Z10
13555 || s390_tune == PROCESSOR_2817_Z196
13556 || s390_tune == PROCESSOR_2827_ZEC12
13557 || s390_tune == PROCESSOR_2964_Z13)
13558 {
13559 maybe_set_param_value (PARAM_MAX_UNROLLED_INSNS, 100,
13560 global_options.x_param_values,
13561 global_options_set.x_param_values);
13562 maybe_set_param_value (PARAM_MAX_UNROLL_TIMES, 32,
13563 global_options.x_param_values,
13564 global_options_set.x_param_values);
13565 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 2000,
13566 global_options.x_param_values,
13567 global_options_set.x_param_values);
13568 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 64,
13569 global_options.x_param_values,
13570 global_options_set.x_param_values);
13571 }
13572
13573 maybe_set_param_value (PARAM_MAX_PENDING_LIST_LENGTH, 256,
13574 global_options.x_param_values,
13575 global_options_set.x_param_values);
13576 /* values for loop prefetching */
13577 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, 256,
13578 global_options.x_param_values,
13579 global_options_set.x_param_values);
13580 maybe_set_param_value (PARAM_L1_CACHE_SIZE, 128,
13581 global_options.x_param_values,
13582 global_options_set.x_param_values);
13583 /* s390 has more than 2 levels and the size is much larger. Since
13584 we are always running virtualized assume that we only get a small
13585 part of the caches above l1. */
13586 maybe_set_param_value (PARAM_L2_CACHE_SIZE, 1500,
13587 global_options.x_param_values,
13588 global_options_set.x_param_values);
13589 maybe_set_param_value (PARAM_PREFETCH_MIN_INSN_TO_MEM_RATIO, 2,
13590 global_options.x_param_values,
13591 global_options_set.x_param_values);
13592 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES, 6,
13593 global_options.x_param_values,
13594 global_options_set.x_param_values);
13595
13596 /* This cannot reside in s390_option_optimization_table since HAVE_prefetch
13597 requires the arch flags to be evaluated already. Since prefetching
13598 is beneficial on s390, we enable it if available. */
13599 if (flag_prefetch_loop_arrays < 0 && HAVE_prefetch && optimize >= 3)
13600 flag_prefetch_loop_arrays = 1;
13601
13602 /* Use the alternative scheduling-pressure algorithm by default. */
13603 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
13604 global_options.x_param_values,
13605 global_options_set.x_param_values);
13606
13607 if (TARGET_TPF)
13608 {
13609 /* Don't emit DWARF3/4 unless specifically selected. The TPF
13610 debuggers do not yet support DWARF 3/4. */
13611 if (!global_options_set.x_dwarf_strict)
13612 dwarf_strict = 1;
13613 if (!global_options_set.x_dwarf_version)
13614 dwarf_version = 2;
13615 }
13616
13617 /* Register a target-specific optimization-and-lowering pass
13618 to run immediately before prologue and epilogue generation.
13619
13620 Registering the pass must be done at start up. It's
13621 convenient to do it here. */
13622 opt_pass *new_pass = new pass_s390_early_mach (g);
13623 struct register_pass_info insert_pass_s390_early_mach =
13624 {
13625 new_pass, /* pass */
13626 "pro_and_epilogue", /* reference_pass_name */
13627 1, /* ref_pass_instance_number */
13628 PASS_POS_INSERT_BEFORE /* po_op */
13629 };
13630 register_pass (&insert_pass_s390_early_mach);
13631 }
13632
13633 /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */
13634
13635 static bool
13636 s390_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
13637 unsigned int align ATTRIBUTE_UNUSED,
13638 enum by_pieces_operation op ATTRIBUTE_UNUSED,
13639 bool speed_p ATTRIBUTE_UNUSED)
13640 {
13641 return (size == 1 || size == 2
13642 || size == 4 || (TARGET_ZARCH && size == 8));
13643 }
13644
13645 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
13646
13647 static void
13648 s390_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
13649 {
13650 tree sfpc = s390_builtin_decls[S390_BUILTIN_s390_sfpc];
13651 tree efpc = s390_builtin_decls[S390_BUILTIN_s390_efpc];
13652 tree call_efpc = build_call_expr (efpc, 0);
13653 tree fenv_var = create_tmp_var (unsigned_type_node);
13654
13655 #define FPC_EXCEPTION_MASK HOST_WIDE_INT_UC (0xf8000000)
13656 #define FPC_FLAGS_MASK HOST_WIDE_INT_UC (0x00f80000)
13657 #define FPC_DXC_MASK HOST_WIDE_INT_UC (0x0000ff00)
13658 #define FPC_EXCEPTION_MASK_SHIFT HOST_WIDE_INT_UC (24)
13659 #define FPC_FLAGS_SHIFT HOST_WIDE_INT_UC (16)
13660 #define FPC_DXC_SHIFT HOST_WIDE_INT_UC (8)
13661
13662 /* Generates the equivalent of feholdexcept (&fenv_var)
13663
13664 fenv_var = __builtin_s390_efpc ();
13665 __builtin_s390_sfpc (fenv_var & mask) */
13666 tree old_fpc = build2 (MODIFY_EXPR, unsigned_type_node, fenv_var, call_efpc);
13667 tree new_fpc =
13668 build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
13669 build_int_cst (unsigned_type_node,
13670 ~(FPC_DXC_MASK | FPC_FLAGS_MASK |
13671 FPC_EXCEPTION_MASK)));
13672 tree set_new_fpc = build_call_expr (sfpc, 1, new_fpc);
13673 *hold = build2 (COMPOUND_EXPR, void_type_node, old_fpc, set_new_fpc);
13674
13675 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT)
13676
13677 __builtin_s390_sfpc (__builtin_s390_efpc () & mask) */
13678 new_fpc = build2 (BIT_AND_EXPR, unsigned_type_node, call_efpc,
13679 build_int_cst (unsigned_type_node,
13680 ~(FPC_DXC_MASK | FPC_FLAGS_MASK)));
13681 *clear = build_call_expr (sfpc, 1, new_fpc);
13682
13683 /* Generates the equivalent of feupdateenv (fenv_var)
13684
13685 old_fpc = __builtin_s390_efpc ();
13686 __builtin_s390_sfpc (fenv_var);
13687 __atomic_feraiseexcept ((old_fpc & FPC_FLAGS_MASK) >> FPC_FLAGS_SHIFT); */
13688
13689 old_fpc = create_tmp_var (unsigned_type_node);
13690 tree store_old_fpc = build2 (MODIFY_EXPR, void_type_node,
13691 old_fpc, call_efpc);
13692
13693 set_new_fpc = build_call_expr (sfpc, 1, fenv_var);
13694
13695 tree raise_old_except = build2 (BIT_AND_EXPR, unsigned_type_node, old_fpc,
13696 build_int_cst (unsigned_type_node,
13697 FPC_FLAGS_MASK));
13698 raise_old_except = build2 (RSHIFT_EXPR, unsigned_type_node, raise_old_except,
13699 build_int_cst (unsigned_type_node,
13700 FPC_FLAGS_SHIFT));
13701 tree atomic_feraiseexcept
13702 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
13703 raise_old_except = build_call_expr (atomic_feraiseexcept,
13704 1, raise_old_except);
13705
13706 *update = build2 (COMPOUND_EXPR, void_type_node,
13707 build2 (COMPOUND_EXPR, void_type_node,
13708 store_old_fpc, set_new_fpc),
13709 raise_old_except);
13710
13711 #undef FPC_EXCEPTION_MASK
13712 #undef FPC_FLAGS_MASK
13713 #undef FPC_DXC_MASK
13714 #undef FPC_EXCEPTION_MASK_SHIFT
13715 #undef FPC_FLAGS_SHIFT
13716 #undef FPC_DXC_SHIFT
13717 }
13718
13719 /* Return the vector mode to be used for inner mode MODE when doing
13720 vectorization. */
13721 static machine_mode
13722 s390_preferred_simd_mode (machine_mode mode)
13723 {
13724 if (TARGET_VX)
13725 switch (mode)
13726 {
13727 case DFmode:
13728 return V2DFmode;
13729 case DImode:
13730 return V2DImode;
13731 case SImode:
13732 return V4SImode;
13733 case HImode:
13734 return V8HImode;
13735 case QImode:
13736 return V16QImode;
13737 default:;
13738 }
13739 return word_mode;
13740 }
13741
13742 /* Our hardware does not require vectors to be strictly aligned. */
13743 static bool
13744 s390_support_vector_misalignment (machine_mode mode ATTRIBUTE_UNUSED,
13745 const_tree type ATTRIBUTE_UNUSED,
13746 int misalignment ATTRIBUTE_UNUSED,
13747 bool is_packed ATTRIBUTE_UNUSED)
13748 {
13749 if (TARGET_VX)
13750 return true;
13751
13752 return default_builtin_support_vector_misalignment (mode, type, misalignment,
13753 is_packed);
13754 }
13755
13756 /* The vector ABI requires vector types to be aligned on an 8 byte
13757 boundary (our stack alignment). However, we allow this to be
13758 overriden by the user, while this definitely breaks the ABI. */
13759 static HOST_WIDE_INT
13760 s390_vector_alignment (const_tree type)
13761 {
13762 if (!TARGET_VX_ABI)
13763 return default_vector_alignment (type);
13764
13765 if (TYPE_USER_ALIGN (type))
13766 return TYPE_ALIGN (type);
13767
13768 return MIN (64, tree_to_shwi (TYPE_SIZE (type)));
13769 }
13770
13771 /* Implement TARGET_ASM_FILE_END. */
13772 static void
13773 s390_asm_file_end (void)
13774 {
13775 #ifdef HAVE_AS_GNU_ATTRIBUTE
13776 varpool_node *vnode;
13777 cgraph_node *cnode;
13778
13779 FOR_EACH_VARIABLE (vnode)
13780 if (TREE_PUBLIC (vnode->decl))
13781 s390_check_type_for_vector_abi (TREE_TYPE (vnode->decl), false, false);
13782
13783 FOR_EACH_FUNCTION (cnode)
13784 if (TREE_PUBLIC (cnode->decl))
13785 s390_check_type_for_vector_abi (TREE_TYPE (cnode->decl), false, false);
13786
13787
13788 if (s390_vector_abi != 0)
13789 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
13790 s390_vector_abi);
13791 #endif
13792 file_end_indicate_exec_stack ();
13793 }
13794
13795 /* Return true if TYPE is a vector bool type. */
13796 static inline bool
13797 s390_vector_bool_type_p (const_tree type)
13798 {
13799 return TYPE_VECTOR_OPAQUE (type);
13800 }
13801
13802 /* Return the diagnostic message string if the binary operation OP is
13803 not permitted on TYPE1 and TYPE2, NULL otherwise. */
13804 static const char*
13805 s390_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2)
13806 {
13807 bool bool1_p, bool2_p;
13808 bool plusminus_p;
13809 bool muldiv_p;
13810 bool compare_p;
13811 machine_mode mode1, mode2;
13812
13813 if (!TARGET_ZVECTOR)
13814 return NULL;
13815
13816 if (!VECTOR_TYPE_P (type1) || !VECTOR_TYPE_P (type2))
13817 return NULL;
13818
13819 bool1_p = s390_vector_bool_type_p (type1);
13820 bool2_p = s390_vector_bool_type_p (type2);
13821
13822 /* Mixing signed and unsigned types is forbidden for all
13823 operators. */
13824 if (!bool1_p && !bool2_p
13825 && TYPE_UNSIGNED (type1) != TYPE_UNSIGNED (type2))
13826 return N_("types differ in signess");
13827
13828 plusminus_p = (op == PLUS_EXPR || op == MINUS_EXPR);
13829 muldiv_p = (op == MULT_EXPR || op == RDIV_EXPR || op == TRUNC_DIV_EXPR
13830 || op == CEIL_DIV_EXPR || op == FLOOR_DIV_EXPR
13831 || op == ROUND_DIV_EXPR);
13832 compare_p = (op == LT_EXPR || op == LE_EXPR || op == GT_EXPR || op == GE_EXPR
13833 || op == EQ_EXPR || op == NE_EXPR);
13834
13835 if (bool1_p && bool2_p && (plusminus_p || muldiv_p))
13836 return N_("binary operator does not support two vector bool operands");
13837
13838 if (bool1_p != bool2_p && (muldiv_p || compare_p))
13839 return N_("binary operator does not support vector bool operand");
13840
13841 mode1 = TYPE_MODE (type1);
13842 mode2 = TYPE_MODE (type2);
13843
13844 if (bool1_p != bool2_p && plusminus_p
13845 && (GET_MODE_CLASS (mode1) == MODE_VECTOR_FLOAT
13846 || GET_MODE_CLASS (mode2) == MODE_VECTOR_FLOAT))
13847 return N_("binary operator does not support mixing vector "
13848 "bool with floating point vector operands");
13849
13850 return NULL;
13851 }
13852
13853 /* Initialize GCC target structure. */
13854
13855 #undef TARGET_ASM_ALIGNED_HI_OP
13856 #define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
13857 #undef TARGET_ASM_ALIGNED_DI_OP
13858 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
13859 #undef TARGET_ASM_INTEGER
13860 #define TARGET_ASM_INTEGER s390_assemble_integer
13861
13862 #undef TARGET_ASM_OPEN_PAREN
13863 #define TARGET_ASM_OPEN_PAREN ""
13864
13865 #undef TARGET_ASM_CLOSE_PAREN
13866 #define TARGET_ASM_CLOSE_PAREN ""
13867
13868 #undef TARGET_OPTION_OVERRIDE
13869 #define TARGET_OPTION_OVERRIDE s390_option_override
13870
13871 #undef TARGET_ENCODE_SECTION_INFO
13872 #define TARGET_ENCODE_SECTION_INFO s390_encode_section_info
13873
13874 #undef TARGET_SCALAR_MODE_SUPPORTED_P
13875 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
13876
13877 #ifdef HAVE_AS_TLS
13878 #undef TARGET_HAVE_TLS
13879 #define TARGET_HAVE_TLS true
13880 #endif
13881 #undef TARGET_CANNOT_FORCE_CONST_MEM
13882 #define TARGET_CANNOT_FORCE_CONST_MEM s390_cannot_force_const_mem
13883
13884 #undef TARGET_DELEGITIMIZE_ADDRESS
13885 #define TARGET_DELEGITIMIZE_ADDRESS s390_delegitimize_address
13886
13887 #undef TARGET_LEGITIMIZE_ADDRESS
13888 #define TARGET_LEGITIMIZE_ADDRESS s390_legitimize_address
13889
13890 #undef TARGET_RETURN_IN_MEMORY
13891 #define TARGET_RETURN_IN_MEMORY s390_return_in_memory
13892
13893 #undef TARGET_INIT_BUILTINS
13894 #define TARGET_INIT_BUILTINS s390_init_builtins
13895 #undef TARGET_EXPAND_BUILTIN
13896 #define TARGET_EXPAND_BUILTIN s390_expand_builtin
13897 #undef TARGET_BUILTIN_DECL
13898 #define TARGET_BUILTIN_DECL s390_builtin_decl
13899
13900 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
13901 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA s390_output_addr_const_extra
13902
13903 #undef TARGET_ASM_OUTPUT_MI_THUNK
13904 #define TARGET_ASM_OUTPUT_MI_THUNK s390_output_mi_thunk
13905 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
13906 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
13907
13908 #undef TARGET_SCHED_ADJUST_PRIORITY
13909 #define TARGET_SCHED_ADJUST_PRIORITY s390_adjust_priority
13910 #undef TARGET_SCHED_ISSUE_RATE
13911 #define TARGET_SCHED_ISSUE_RATE s390_issue_rate
13912 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
13913 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD s390_first_cycle_multipass_dfa_lookahead
13914
13915 #undef TARGET_SCHED_VARIABLE_ISSUE
13916 #define TARGET_SCHED_VARIABLE_ISSUE s390_sched_variable_issue
13917 #undef TARGET_SCHED_REORDER
13918 #define TARGET_SCHED_REORDER s390_sched_reorder
13919 #undef TARGET_SCHED_INIT
13920 #define TARGET_SCHED_INIT s390_sched_init
13921
13922 #undef TARGET_CANNOT_COPY_INSN_P
13923 #define TARGET_CANNOT_COPY_INSN_P s390_cannot_copy_insn_p
13924 #undef TARGET_RTX_COSTS
13925 #define TARGET_RTX_COSTS s390_rtx_costs
13926 #undef TARGET_ADDRESS_COST
13927 #define TARGET_ADDRESS_COST s390_address_cost
13928 #undef TARGET_REGISTER_MOVE_COST
13929 #define TARGET_REGISTER_MOVE_COST s390_register_move_cost
13930 #undef TARGET_MEMORY_MOVE_COST
13931 #define TARGET_MEMORY_MOVE_COST s390_memory_move_cost
13932
13933 #undef TARGET_MACHINE_DEPENDENT_REORG
13934 #define TARGET_MACHINE_DEPENDENT_REORG s390_reorg
13935
13936 #undef TARGET_VALID_POINTER_MODE
13937 #define TARGET_VALID_POINTER_MODE s390_valid_pointer_mode
13938
13939 #undef TARGET_BUILD_BUILTIN_VA_LIST
13940 #define TARGET_BUILD_BUILTIN_VA_LIST s390_build_builtin_va_list
13941 #undef TARGET_EXPAND_BUILTIN_VA_START
13942 #define TARGET_EXPAND_BUILTIN_VA_START s390_va_start
13943 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
13944 #define TARGET_GIMPLIFY_VA_ARG_EXPR s390_gimplify_va_arg
13945
13946 #undef TARGET_PROMOTE_FUNCTION_MODE
13947 #define TARGET_PROMOTE_FUNCTION_MODE s390_promote_function_mode
13948 #undef TARGET_PASS_BY_REFERENCE
13949 #define TARGET_PASS_BY_REFERENCE s390_pass_by_reference
13950
13951 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
13952 #define TARGET_FUNCTION_OK_FOR_SIBCALL s390_function_ok_for_sibcall
13953 #undef TARGET_FUNCTION_ARG
13954 #define TARGET_FUNCTION_ARG s390_function_arg
13955 #undef TARGET_FUNCTION_ARG_ADVANCE
13956 #define TARGET_FUNCTION_ARG_ADVANCE s390_function_arg_advance
13957 #undef TARGET_FUNCTION_VALUE
13958 #define TARGET_FUNCTION_VALUE s390_function_value
13959 #undef TARGET_LIBCALL_VALUE
13960 #define TARGET_LIBCALL_VALUE s390_libcall_value
13961 #undef TARGET_STRICT_ARGUMENT_NAMING
13962 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
13963
13964 #undef TARGET_KEEP_LEAF_WHEN_PROFILED
13965 #define TARGET_KEEP_LEAF_WHEN_PROFILED s390_keep_leaf_when_profiled
13966
13967 #undef TARGET_FIXED_CONDITION_CODE_REGS
13968 #define TARGET_FIXED_CONDITION_CODE_REGS s390_fixed_condition_code_regs
13969
13970 #undef TARGET_CC_MODES_COMPATIBLE
13971 #define TARGET_CC_MODES_COMPATIBLE s390_cc_modes_compatible
13972
13973 #undef TARGET_INVALID_WITHIN_DOLOOP
13974 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null
13975
13976 #ifdef HAVE_AS_TLS
13977 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
13978 #define TARGET_ASM_OUTPUT_DWARF_DTPREL s390_output_dwarf_dtprel
13979 #endif
13980
13981 #undef TARGET_DWARF_FRAME_REG_MODE
13982 #define TARGET_DWARF_FRAME_REG_MODE s390_dwarf_frame_reg_mode
13983
13984 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
13985 #undef TARGET_MANGLE_TYPE
13986 #define TARGET_MANGLE_TYPE s390_mangle_type
13987 #endif
13988
13989 #undef TARGET_SCALAR_MODE_SUPPORTED_P
13990 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
13991
13992 #undef TARGET_VECTOR_MODE_SUPPORTED_P
13993 #define TARGET_VECTOR_MODE_SUPPORTED_P s390_vector_mode_supported_p
13994
13995 #undef TARGET_PREFERRED_RELOAD_CLASS
13996 #define TARGET_PREFERRED_RELOAD_CLASS s390_preferred_reload_class
13997
13998 #undef TARGET_SECONDARY_RELOAD
13999 #define TARGET_SECONDARY_RELOAD s390_secondary_reload
14000
14001 #undef TARGET_LIBGCC_CMP_RETURN_MODE
14002 #define TARGET_LIBGCC_CMP_RETURN_MODE s390_libgcc_cmp_return_mode
14003
14004 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
14005 #define TARGET_LIBGCC_SHIFT_COUNT_MODE s390_libgcc_shift_count_mode
14006
14007 #undef TARGET_LEGITIMATE_ADDRESS_P
14008 #define TARGET_LEGITIMATE_ADDRESS_P s390_legitimate_address_p
14009
14010 #undef TARGET_LEGITIMATE_CONSTANT_P
14011 #define TARGET_LEGITIMATE_CONSTANT_P s390_legitimate_constant_p
14012
14013 #undef TARGET_LRA_P
14014 #define TARGET_LRA_P s390_lra_p
14015
14016 #undef TARGET_CAN_ELIMINATE
14017 #define TARGET_CAN_ELIMINATE s390_can_eliminate
14018
14019 #undef TARGET_CONDITIONAL_REGISTER_USAGE
14020 #define TARGET_CONDITIONAL_REGISTER_USAGE s390_conditional_register_usage
14021
14022 #undef TARGET_LOOP_UNROLL_ADJUST
14023 #define TARGET_LOOP_UNROLL_ADJUST s390_loop_unroll_adjust
14024
14025 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
14026 #define TARGET_ASM_TRAMPOLINE_TEMPLATE s390_asm_trampoline_template
14027 #undef TARGET_TRAMPOLINE_INIT
14028 #define TARGET_TRAMPOLINE_INIT s390_trampoline_init
14029
14030 #undef TARGET_UNWIND_WORD_MODE
14031 #define TARGET_UNWIND_WORD_MODE s390_unwind_word_mode
14032
14033 #undef TARGET_CANONICALIZE_COMPARISON
14034 #define TARGET_CANONICALIZE_COMPARISON s390_canonicalize_comparison
14035
14036 #undef TARGET_HARD_REGNO_SCRATCH_OK
14037 #define TARGET_HARD_REGNO_SCRATCH_OK s390_hard_regno_scratch_ok
14038
14039 #undef TARGET_ATTRIBUTE_TABLE
14040 #define TARGET_ATTRIBUTE_TABLE s390_attribute_table
14041
14042 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
14043 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
14044
14045 #undef TARGET_SET_UP_BY_PROLOGUE
14046 #define TARGET_SET_UP_BY_PROLOGUE s300_set_up_by_prologue
14047
14048 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
14049 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
14050 s390_use_by_pieces_infrastructure_p
14051
14052 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
14053 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV s390_atomic_assign_expand_fenv
14054
14055 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
14056 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN s390_invalid_arg_for_unprototyped_fn
14057
14058 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
14059 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE s390_preferred_simd_mode
14060
14061 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
14062 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT s390_support_vector_misalignment
14063
14064 #undef TARGET_VECTOR_ALIGNMENT
14065 #define TARGET_VECTOR_ALIGNMENT s390_vector_alignment
14066
14067 #undef TARGET_INVALID_BINARY_OP
14068 #define TARGET_INVALID_BINARY_OP s390_invalid_binary_op
14069
14070 #undef TARGET_ASM_FILE_END
14071 #define TARGET_ASM_FILE_END s390_asm_file_end
14072
14073 struct gcc_target targetm = TARGET_INITIALIZER;
14074
14075 #include "gt-s390.h"