]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/s390/s390.c
S/390: Add -fsplit-stack support
[thirdparty/gcc.git] / gcc / config / s390 / s390.c
1 /* Subroutines used for code generation on IBM S/390 and zSeries
2 Copyright (C) 1999-2016 Free Software Foundation, Inc.
3 Contributed by Hartmut Penner (hpenner@de.ibm.com) and
4 Ulrich Weigand (uweigand@de.ibm.com) and
5 Andreas Krebbel (Andreas.Krebbel@de.ibm.com).
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "target.h"
28 #include "target-globals.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "cfgloop.h"
34 #include "df.h"
35 #include "tm_p.h"
36 #include "stringpool.h"
37 #include "expmed.h"
38 #include "optabs.h"
39 #include "regs.h"
40 #include "emit-rtl.h"
41 #include "recog.h"
42 #include "cgraph.h"
43 #include "diagnostic-core.h"
44 #include "diagnostic.h"
45 #include "alias.h"
46 #include "fold-const.h"
47 #include "print-tree.h"
48 #include "stor-layout.h"
49 #include "varasm.h"
50 #include "calls.h"
51 #include "conditions.h"
52 #include "output.h"
53 #include "insn-attr.h"
54 #include "flags.h"
55 #include "except.h"
56 #include "dojump.h"
57 #include "explow.h"
58 #include "stmt.h"
59 #include "expr.h"
60 #include "reload.h"
61 #include "cfgrtl.h"
62 #include "cfganal.h"
63 #include "lcm.h"
64 #include "cfgbuild.h"
65 #include "cfgcleanup.h"
66 #include "debug.h"
67 #include "langhooks.h"
68 #include "internal-fn.h"
69 #include "gimple-fold.h"
70 #include "tree-eh.h"
71 #include "gimplify.h"
72 #include "params.h"
73 #include "opts.h"
74 #include "tree-pass.h"
75 #include "context.h"
76 #include "builtins.h"
77 #include "rtl-iter.h"
78 #include "intl.h"
79 #include "tm-constrs.h"
80
81 /* This file should be included last. */
82 #include "target-def.h"
83
84 /* Remember the last target of s390_set_current_function. */
85 static GTY(()) tree s390_previous_fndecl;
86
87 /* Define the specific costs for a given cpu. */
88
89 struct processor_costs
90 {
91 /* multiplication */
92 const int m; /* cost of an M instruction. */
93 const int mghi; /* cost of an MGHI instruction. */
94 const int mh; /* cost of an MH instruction. */
95 const int mhi; /* cost of an MHI instruction. */
96 const int ml; /* cost of an ML instruction. */
97 const int mr; /* cost of an MR instruction. */
98 const int ms; /* cost of an MS instruction. */
99 const int msg; /* cost of an MSG instruction. */
100 const int msgf; /* cost of an MSGF instruction. */
101 const int msgfr; /* cost of an MSGFR instruction. */
102 const int msgr; /* cost of an MSGR instruction. */
103 const int msr; /* cost of an MSR instruction. */
104 const int mult_df; /* cost of multiplication in DFmode. */
105 const int mxbr;
106 /* square root */
107 const int sqxbr; /* cost of square root in TFmode. */
108 const int sqdbr; /* cost of square root in DFmode. */
109 const int sqebr; /* cost of square root in SFmode. */
110 /* multiply and add */
111 const int madbr; /* cost of multiply and add in DFmode. */
112 const int maebr; /* cost of multiply and add in SFmode. */
113 /* division */
114 const int dxbr;
115 const int ddbr;
116 const int debr;
117 const int dlgr;
118 const int dlr;
119 const int dr;
120 const int dsgfr;
121 const int dsgr;
122 };
123
124 #define s390_cost ((const struct processor_costs *)(s390_cost_pointer))
125
126 static const
127 struct processor_costs z900_cost =
128 {
129 COSTS_N_INSNS (5), /* M */
130 COSTS_N_INSNS (10), /* MGHI */
131 COSTS_N_INSNS (5), /* MH */
132 COSTS_N_INSNS (4), /* MHI */
133 COSTS_N_INSNS (5), /* ML */
134 COSTS_N_INSNS (5), /* MR */
135 COSTS_N_INSNS (4), /* MS */
136 COSTS_N_INSNS (15), /* MSG */
137 COSTS_N_INSNS (7), /* MSGF */
138 COSTS_N_INSNS (7), /* MSGFR */
139 COSTS_N_INSNS (10), /* MSGR */
140 COSTS_N_INSNS (4), /* MSR */
141 COSTS_N_INSNS (7), /* multiplication in DFmode */
142 COSTS_N_INSNS (13), /* MXBR */
143 COSTS_N_INSNS (136), /* SQXBR */
144 COSTS_N_INSNS (44), /* SQDBR */
145 COSTS_N_INSNS (35), /* SQEBR */
146 COSTS_N_INSNS (18), /* MADBR */
147 COSTS_N_INSNS (13), /* MAEBR */
148 COSTS_N_INSNS (134), /* DXBR */
149 COSTS_N_INSNS (30), /* DDBR */
150 COSTS_N_INSNS (27), /* DEBR */
151 COSTS_N_INSNS (220), /* DLGR */
152 COSTS_N_INSNS (34), /* DLR */
153 COSTS_N_INSNS (34), /* DR */
154 COSTS_N_INSNS (32), /* DSGFR */
155 COSTS_N_INSNS (32), /* DSGR */
156 };
157
158 static const
159 struct processor_costs z990_cost =
160 {
161 COSTS_N_INSNS (4), /* M */
162 COSTS_N_INSNS (2), /* MGHI */
163 COSTS_N_INSNS (2), /* MH */
164 COSTS_N_INSNS (2), /* MHI */
165 COSTS_N_INSNS (4), /* ML */
166 COSTS_N_INSNS (4), /* MR */
167 COSTS_N_INSNS (5), /* MS */
168 COSTS_N_INSNS (6), /* MSG */
169 COSTS_N_INSNS (4), /* MSGF */
170 COSTS_N_INSNS (4), /* MSGFR */
171 COSTS_N_INSNS (4), /* MSGR */
172 COSTS_N_INSNS (4), /* MSR */
173 COSTS_N_INSNS (1), /* multiplication in DFmode */
174 COSTS_N_INSNS (28), /* MXBR */
175 COSTS_N_INSNS (130), /* SQXBR */
176 COSTS_N_INSNS (66), /* SQDBR */
177 COSTS_N_INSNS (38), /* SQEBR */
178 COSTS_N_INSNS (1), /* MADBR */
179 COSTS_N_INSNS (1), /* MAEBR */
180 COSTS_N_INSNS (60), /* DXBR */
181 COSTS_N_INSNS (40), /* DDBR */
182 COSTS_N_INSNS (26), /* DEBR */
183 COSTS_N_INSNS (176), /* DLGR */
184 COSTS_N_INSNS (31), /* DLR */
185 COSTS_N_INSNS (31), /* DR */
186 COSTS_N_INSNS (31), /* DSGFR */
187 COSTS_N_INSNS (31), /* DSGR */
188 };
189
190 static const
191 struct processor_costs z9_109_cost =
192 {
193 COSTS_N_INSNS (4), /* M */
194 COSTS_N_INSNS (2), /* MGHI */
195 COSTS_N_INSNS (2), /* MH */
196 COSTS_N_INSNS (2), /* MHI */
197 COSTS_N_INSNS (4), /* ML */
198 COSTS_N_INSNS (4), /* MR */
199 COSTS_N_INSNS (5), /* MS */
200 COSTS_N_INSNS (6), /* MSG */
201 COSTS_N_INSNS (4), /* MSGF */
202 COSTS_N_INSNS (4), /* MSGFR */
203 COSTS_N_INSNS (4), /* MSGR */
204 COSTS_N_INSNS (4), /* MSR */
205 COSTS_N_INSNS (1), /* multiplication in DFmode */
206 COSTS_N_INSNS (28), /* MXBR */
207 COSTS_N_INSNS (130), /* SQXBR */
208 COSTS_N_INSNS (66), /* SQDBR */
209 COSTS_N_INSNS (38), /* SQEBR */
210 COSTS_N_INSNS (1), /* MADBR */
211 COSTS_N_INSNS (1), /* MAEBR */
212 COSTS_N_INSNS (60), /* DXBR */
213 COSTS_N_INSNS (40), /* DDBR */
214 COSTS_N_INSNS (26), /* DEBR */
215 COSTS_N_INSNS (30), /* DLGR */
216 COSTS_N_INSNS (23), /* DLR */
217 COSTS_N_INSNS (23), /* DR */
218 COSTS_N_INSNS (24), /* DSGFR */
219 COSTS_N_INSNS (24), /* DSGR */
220 };
221
222 static const
223 struct processor_costs z10_cost =
224 {
225 COSTS_N_INSNS (10), /* M */
226 COSTS_N_INSNS (10), /* MGHI */
227 COSTS_N_INSNS (10), /* MH */
228 COSTS_N_INSNS (10), /* MHI */
229 COSTS_N_INSNS (10), /* ML */
230 COSTS_N_INSNS (10), /* MR */
231 COSTS_N_INSNS (10), /* MS */
232 COSTS_N_INSNS (10), /* MSG */
233 COSTS_N_INSNS (10), /* MSGF */
234 COSTS_N_INSNS (10), /* MSGFR */
235 COSTS_N_INSNS (10), /* MSGR */
236 COSTS_N_INSNS (10), /* MSR */
237 COSTS_N_INSNS (1) , /* multiplication in DFmode */
238 COSTS_N_INSNS (50), /* MXBR */
239 COSTS_N_INSNS (120), /* SQXBR */
240 COSTS_N_INSNS (52), /* SQDBR */
241 COSTS_N_INSNS (38), /* SQEBR */
242 COSTS_N_INSNS (1), /* MADBR */
243 COSTS_N_INSNS (1), /* MAEBR */
244 COSTS_N_INSNS (111), /* DXBR */
245 COSTS_N_INSNS (39), /* DDBR */
246 COSTS_N_INSNS (32), /* DEBR */
247 COSTS_N_INSNS (160), /* DLGR */
248 COSTS_N_INSNS (71), /* DLR */
249 COSTS_N_INSNS (71), /* DR */
250 COSTS_N_INSNS (71), /* DSGFR */
251 COSTS_N_INSNS (71), /* DSGR */
252 };
253
254 static const
255 struct processor_costs z196_cost =
256 {
257 COSTS_N_INSNS (7), /* M */
258 COSTS_N_INSNS (5), /* MGHI */
259 COSTS_N_INSNS (5), /* MH */
260 COSTS_N_INSNS (5), /* MHI */
261 COSTS_N_INSNS (7), /* ML */
262 COSTS_N_INSNS (7), /* MR */
263 COSTS_N_INSNS (6), /* MS */
264 COSTS_N_INSNS (8), /* MSG */
265 COSTS_N_INSNS (6), /* MSGF */
266 COSTS_N_INSNS (6), /* MSGFR */
267 COSTS_N_INSNS (8), /* MSGR */
268 COSTS_N_INSNS (6), /* MSR */
269 COSTS_N_INSNS (1) , /* multiplication in DFmode */
270 COSTS_N_INSNS (40), /* MXBR B+40 */
271 COSTS_N_INSNS (100), /* SQXBR B+100 */
272 COSTS_N_INSNS (42), /* SQDBR B+42 */
273 COSTS_N_INSNS (28), /* SQEBR B+28 */
274 COSTS_N_INSNS (1), /* MADBR B */
275 COSTS_N_INSNS (1), /* MAEBR B */
276 COSTS_N_INSNS (101), /* DXBR B+101 */
277 COSTS_N_INSNS (29), /* DDBR */
278 COSTS_N_INSNS (22), /* DEBR */
279 COSTS_N_INSNS (160), /* DLGR cracked */
280 COSTS_N_INSNS (160), /* DLR cracked */
281 COSTS_N_INSNS (160), /* DR expanded */
282 COSTS_N_INSNS (160), /* DSGFR cracked */
283 COSTS_N_INSNS (160), /* DSGR cracked */
284 };
285
286 static const
287 struct processor_costs zEC12_cost =
288 {
289 COSTS_N_INSNS (7), /* M */
290 COSTS_N_INSNS (5), /* MGHI */
291 COSTS_N_INSNS (5), /* MH */
292 COSTS_N_INSNS (5), /* MHI */
293 COSTS_N_INSNS (7), /* ML */
294 COSTS_N_INSNS (7), /* MR */
295 COSTS_N_INSNS (6), /* MS */
296 COSTS_N_INSNS (8), /* MSG */
297 COSTS_N_INSNS (6), /* MSGF */
298 COSTS_N_INSNS (6), /* MSGFR */
299 COSTS_N_INSNS (8), /* MSGR */
300 COSTS_N_INSNS (6), /* MSR */
301 COSTS_N_INSNS (1) , /* multiplication in DFmode */
302 COSTS_N_INSNS (40), /* MXBR B+40 */
303 COSTS_N_INSNS (100), /* SQXBR B+100 */
304 COSTS_N_INSNS (42), /* SQDBR B+42 */
305 COSTS_N_INSNS (28), /* SQEBR B+28 */
306 COSTS_N_INSNS (1), /* MADBR B */
307 COSTS_N_INSNS (1), /* MAEBR B */
308 COSTS_N_INSNS (131), /* DXBR B+131 */
309 COSTS_N_INSNS (29), /* DDBR */
310 COSTS_N_INSNS (22), /* DEBR */
311 COSTS_N_INSNS (160), /* DLGR cracked */
312 COSTS_N_INSNS (160), /* DLR cracked */
313 COSTS_N_INSNS (160), /* DR expanded */
314 COSTS_N_INSNS (160), /* DSGFR cracked */
315 COSTS_N_INSNS (160), /* DSGR cracked */
316 };
317
318 static struct
319 {
320 const char *const name;
321 const enum processor_type processor;
322 const struct processor_costs *cost;
323 }
324 const processor_table[] =
325 {
326 { "g5", PROCESSOR_9672_G5, &z900_cost },
327 { "g6", PROCESSOR_9672_G6, &z900_cost },
328 { "z900", PROCESSOR_2064_Z900, &z900_cost },
329 { "z990", PROCESSOR_2084_Z990, &z990_cost },
330 { "z9-109", PROCESSOR_2094_Z9_109, &z9_109_cost },
331 { "z9-ec", PROCESSOR_2094_Z9_EC, &z9_109_cost },
332 { "z10", PROCESSOR_2097_Z10, &z10_cost },
333 { "z196", PROCESSOR_2817_Z196, &z196_cost },
334 { "zEC12", PROCESSOR_2827_ZEC12, &zEC12_cost },
335 { "z13", PROCESSOR_2964_Z13, &zEC12_cost },
336 { "native", PROCESSOR_NATIVE, NULL }
337 };
338
339 extern int reload_completed;
340
341 /* Kept up to date using the SCHED_VARIABLE_ISSUE hook. */
342 static rtx_insn *last_scheduled_insn;
343
344 /* Structure used to hold the components of a S/390 memory
345 address. A legitimate address on S/390 is of the general
346 form
347 base + index + displacement
348 where any of the components is optional.
349
350 base and index are registers of the class ADDR_REGS,
351 displacement is an unsigned 12-bit immediate constant. */
352
353 struct s390_address
354 {
355 rtx base;
356 rtx indx;
357 rtx disp;
358 bool pointer;
359 bool literal_pool;
360 };
361
362 /* The following structure is embedded in the machine
363 specific part of struct function. */
364
365 struct GTY (()) s390_frame_layout
366 {
367 /* Offset within stack frame. */
368 HOST_WIDE_INT gprs_offset;
369 HOST_WIDE_INT f0_offset;
370 HOST_WIDE_INT f4_offset;
371 HOST_WIDE_INT f8_offset;
372 HOST_WIDE_INT backchain_offset;
373
374 /* Number of first and last gpr where slots in the register
375 save area are reserved for. */
376 int first_save_gpr_slot;
377 int last_save_gpr_slot;
378
379 /* Location (FP register number) where GPRs (r0-r15) should
380 be saved to.
381 0 - does not need to be saved at all
382 -1 - stack slot */
383 #define SAVE_SLOT_NONE 0
384 #define SAVE_SLOT_STACK -1
385 signed char gpr_save_slots[16];
386
387 /* Number of first and last gpr to be saved, restored. */
388 int first_save_gpr;
389 int first_restore_gpr;
390 int last_save_gpr;
391 int last_restore_gpr;
392
393 /* Bits standing for floating point registers. Set, if the
394 respective register has to be saved. Starting with reg 16 (f0)
395 at the rightmost bit.
396 Bit 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
397 fpr 15 13 11 9 14 12 10 8 7 5 3 1 6 4 2 0
398 reg 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 */
399 unsigned int fpr_bitmap;
400
401 /* Number of floating point registers f8-f15 which must be saved. */
402 int high_fprs;
403
404 /* Set if return address needs to be saved.
405 This flag is set by s390_return_addr_rtx if it could not use
406 the initial value of r14 and therefore depends on r14 saved
407 to the stack. */
408 bool save_return_addr_p;
409
410 /* Size of stack frame. */
411 HOST_WIDE_INT frame_size;
412 };
413
414 /* Define the structure for the machine field in struct function. */
415
416 struct GTY(()) machine_function
417 {
418 struct s390_frame_layout frame_layout;
419
420 /* Literal pool base register. */
421 rtx base_reg;
422
423 /* True if we may need to perform branch splitting. */
424 bool split_branches_pending_p;
425
426 bool has_landing_pad_p;
427
428 /* True if the current function may contain a tbegin clobbering
429 FPRs. */
430 bool tbegin_p;
431
432 /* For -fsplit-stack support: A stack local which holds a pointer to
433 the stack arguments for a function with a variable number of
434 arguments. This is set at the start of the function and is used
435 to initialize the overflow_arg_area field of the va_list
436 structure. */
437 rtx split_stack_varargs_pointer;
438 };
439
440 /* Few accessor macros for struct cfun->machine->s390_frame_layout. */
441
442 #define cfun_frame_layout (cfun->machine->frame_layout)
443 #define cfun_save_high_fprs_p (!!cfun_frame_layout.high_fprs)
444 #define cfun_save_arg_fprs_p (!!(TARGET_64BIT \
445 ? cfun_frame_layout.fpr_bitmap & 0x0f \
446 : cfun_frame_layout.fpr_bitmap & 0x03))
447 #define cfun_gprs_save_area_size ((cfun_frame_layout.last_save_gpr_slot - \
448 cfun_frame_layout.first_save_gpr_slot + 1) * UNITS_PER_LONG)
449 #define cfun_set_fpr_save(REGNO) (cfun->machine->frame_layout.fpr_bitmap |= \
450 (1 << (REGNO - FPR0_REGNUM)))
451 #define cfun_fpr_save_p(REGNO) (!!(cfun->machine->frame_layout.fpr_bitmap & \
452 (1 << (REGNO - FPR0_REGNUM))))
453 #define cfun_gpr_save_slot(REGNO) \
454 cfun->machine->frame_layout.gpr_save_slots[REGNO]
455
456 /* Number of GPRs and FPRs used for argument passing. */
457 #define GP_ARG_NUM_REG 5
458 #define FP_ARG_NUM_REG (TARGET_64BIT? 4 : 2)
459 #define VEC_ARG_NUM_REG 8
460
461 /* A couple of shortcuts. */
462 #define CONST_OK_FOR_J(x) \
463 CONST_OK_FOR_CONSTRAINT_P((x), 'J', "J")
464 #define CONST_OK_FOR_K(x) \
465 CONST_OK_FOR_CONSTRAINT_P((x), 'K', "K")
466 #define CONST_OK_FOR_Os(x) \
467 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Os")
468 #define CONST_OK_FOR_Op(x) \
469 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Op")
470 #define CONST_OK_FOR_On(x) \
471 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "On")
472
473 #define REGNO_PAIR_OK(REGNO, MODE) \
474 (HARD_REGNO_NREGS ((REGNO), (MODE)) == 1 || !((REGNO) & 1))
475
476 /* That's the read ahead of the dynamic branch prediction unit in
477 bytes on a z10 (or higher) CPU. */
478 #define PREDICT_DISTANCE (TARGET_Z10 ? 384 : 2048)
479
480
481 /* Indicate which ABI has been used for passing vector args.
482 0 - no vector type arguments have been passed where the ABI is relevant
483 1 - the old ABI has been used
484 2 - a vector type argument has been passed either in a vector register
485 or on the stack by value */
486 static int s390_vector_abi = 0;
487
488 /* Set the vector ABI marker if TYPE is subject to the vector ABI
489 switch. The vector ABI affects only vector data types. There are
490 two aspects of the vector ABI relevant here:
491
492 1. vectors >= 16 bytes have an alignment of 8 bytes with the new
493 ABI and natural alignment with the old.
494
495 2. vector <= 16 bytes are passed in VRs or by value on the stack
496 with the new ABI but by reference on the stack with the old.
497
498 If ARG_P is true TYPE is used for a function argument or return
499 value. The ABI marker then is set for all vector data types. If
500 ARG_P is false only type 1 vectors are being checked. */
501
502 static void
503 s390_check_type_for_vector_abi (const_tree type, bool arg_p, bool in_struct_p)
504 {
505 static hash_set<const_tree> visited_types_hash;
506
507 if (s390_vector_abi)
508 return;
509
510 if (type == NULL_TREE || TREE_CODE (type) == ERROR_MARK)
511 return;
512
513 if (visited_types_hash.contains (type))
514 return;
515
516 visited_types_hash.add (type);
517
518 if (VECTOR_TYPE_P (type))
519 {
520 int type_size = int_size_in_bytes (type);
521
522 /* Outside arguments only the alignment is changing and this
523 only happens for vector types >= 16 bytes. */
524 if (!arg_p && type_size < 16)
525 return;
526
527 /* In arguments vector types > 16 are passed as before (GCC
528 never enforced the bigger alignment for arguments which was
529 required by the old vector ABI). However, it might still be
530 ABI relevant due to the changed alignment if it is a struct
531 member. */
532 if (arg_p && type_size > 16 && !in_struct_p)
533 return;
534
535 s390_vector_abi = TARGET_VX_ABI ? 2 : 1;
536 }
537 else if (POINTER_TYPE_P (type) || TREE_CODE (type) == ARRAY_TYPE)
538 {
539 /* ARRAY_TYPE: Since with neither of the ABIs we have more than
540 natural alignment there will never be ABI dependent padding
541 in an array type. That's why we do not set in_struct_p to
542 true here. */
543 s390_check_type_for_vector_abi (TREE_TYPE (type), arg_p, in_struct_p);
544 }
545 else if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
546 {
547 tree arg_chain;
548
549 /* Check the return type. */
550 s390_check_type_for_vector_abi (TREE_TYPE (type), true, false);
551
552 for (arg_chain = TYPE_ARG_TYPES (type);
553 arg_chain;
554 arg_chain = TREE_CHAIN (arg_chain))
555 s390_check_type_for_vector_abi (TREE_VALUE (arg_chain), true, false);
556 }
557 else if (RECORD_OR_UNION_TYPE_P (type))
558 {
559 tree field;
560
561 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
562 {
563 if (TREE_CODE (field) != FIELD_DECL)
564 continue;
565
566 s390_check_type_for_vector_abi (TREE_TYPE (field), arg_p, true);
567 }
568 }
569 }
570
571
572 /* System z builtins. */
573
574 #include "s390-builtins.h"
575
576 const unsigned int bflags_builtin[S390_BUILTIN_MAX + 1] =
577 {
578 #undef B_DEF
579 #undef OB_DEF
580 #undef OB_DEF_VAR
581 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, ...) BFLAGS,
582 #define OB_DEF(...)
583 #define OB_DEF_VAR(...)
584 #include "s390-builtins.def"
585 0
586 };
587
588 const unsigned int opflags_builtin[S390_BUILTIN_MAX + 1] =
589 {
590 #undef B_DEF
591 #undef OB_DEF
592 #undef OB_DEF_VAR
593 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, ...) OPFLAGS,
594 #define OB_DEF(...)
595 #define OB_DEF_VAR(...)
596 #include "s390-builtins.def"
597 0
598 };
599
600 const unsigned int bflags_overloaded_builtin[S390_OVERLOADED_BUILTIN_MAX + 1] =
601 {
602 #undef B_DEF
603 #undef OB_DEF
604 #undef OB_DEF_VAR
605 #define B_DEF(...)
606 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, ...) BFLAGS,
607 #define OB_DEF_VAR(...)
608 #include "s390-builtins.def"
609 0
610 };
611
612 const unsigned int
613 opflags_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX + 1] =
614 {
615 #undef B_DEF
616 #undef OB_DEF
617 #undef OB_DEF_VAR
618 #define B_DEF(...)
619 #define OB_DEF(...)
620 #define OB_DEF_VAR(NAME, PATTERN, FLAGS, FNTYPE) FLAGS,
621 #include "s390-builtins.def"
622 0
623 };
624
625 tree s390_builtin_types[BT_MAX];
626 tree s390_builtin_fn_types[BT_FN_MAX];
627 tree s390_builtin_decls[S390_BUILTIN_MAX +
628 S390_OVERLOADED_BUILTIN_MAX +
629 S390_OVERLOADED_BUILTIN_VAR_MAX];
630
631 static enum insn_code const code_for_builtin[S390_BUILTIN_MAX + 1] = {
632 #undef B_DEF
633 #undef OB_DEF
634 #undef OB_DEF_VAR
635 #define B_DEF(NAME, PATTERN, ...) CODE_FOR_##PATTERN,
636 #define OB_DEF(...)
637 #define OB_DEF_VAR(...)
638
639 #include "s390-builtins.def"
640 CODE_FOR_nothing
641 };
642
643 static void
644 s390_init_builtins (void)
645 {
646 /* These definitions are being used in s390-builtins.def. */
647 tree returns_twice_attr = tree_cons (get_identifier ("returns_twice"),
648 NULL, NULL);
649 tree noreturn_attr = tree_cons (get_identifier ("noreturn"), NULL, NULL);
650 tree c_uint64_type_node;
651
652 /* The uint64_type_node from tree.c is not compatible to the C99
653 uint64_t data type. What we want is c_uint64_type_node from
654 c-common.c. But since backend code is not supposed to interface
655 with the frontend we recreate it here. */
656 if (TARGET_64BIT)
657 c_uint64_type_node = long_unsigned_type_node;
658 else
659 c_uint64_type_node = long_long_unsigned_type_node;
660
661 #undef DEF_TYPE
662 #define DEF_TYPE(INDEX, BFLAGS, NODE, CONST_P) \
663 if (s390_builtin_types[INDEX] == NULL) \
664 s390_builtin_types[INDEX] = (!CONST_P) ? \
665 (NODE) : build_type_variant ((NODE), 1, 0);
666
667 #undef DEF_POINTER_TYPE
668 #define DEF_POINTER_TYPE(INDEX, BFLAGS, INDEX_BASE) \
669 if (s390_builtin_types[INDEX] == NULL) \
670 s390_builtin_types[INDEX] = \
671 build_pointer_type (s390_builtin_types[INDEX_BASE]);
672
673 #undef DEF_DISTINCT_TYPE
674 #define DEF_DISTINCT_TYPE(INDEX, BFLAGS, INDEX_BASE) \
675 if (s390_builtin_types[INDEX] == NULL) \
676 s390_builtin_types[INDEX] = \
677 build_distinct_type_copy (s390_builtin_types[INDEX_BASE]);
678
679 #undef DEF_VECTOR_TYPE
680 #define DEF_VECTOR_TYPE(INDEX, BFLAGS, INDEX_BASE, ELEMENTS) \
681 if (s390_builtin_types[INDEX] == NULL) \
682 s390_builtin_types[INDEX] = \
683 build_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
684
685 #undef DEF_OPAQUE_VECTOR_TYPE
686 #define DEF_OPAQUE_VECTOR_TYPE(INDEX, BFLAGS, INDEX_BASE, ELEMENTS) \
687 if (s390_builtin_types[INDEX] == NULL) \
688 s390_builtin_types[INDEX] = \
689 build_opaque_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
690
691 #undef DEF_FN_TYPE
692 #define DEF_FN_TYPE(INDEX, BFLAGS, args...) \
693 if (s390_builtin_fn_types[INDEX] == NULL) \
694 s390_builtin_fn_types[INDEX] = \
695 build_function_type_list (args, NULL_TREE);
696 #undef DEF_OV_TYPE
697 #define DEF_OV_TYPE(...)
698 #include "s390-builtin-types.def"
699
700 #undef B_DEF
701 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, FNTYPE) \
702 if (s390_builtin_decls[S390_BUILTIN_##NAME] == NULL) \
703 s390_builtin_decls[S390_BUILTIN_##NAME] = \
704 add_builtin_function ("__builtin_" #NAME, \
705 s390_builtin_fn_types[FNTYPE], \
706 S390_BUILTIN_##NAME, \
707 BUILT_IN_MD, \
708 NULL, \
709 ATTRS);
710 #undef OB_DEF
711 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, FNTYPE) \
712 if (s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] \
713 == NULL) \
714 s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] = \
715 add_builtin_function ("__builtin_" #NAME, \
716 s390_builtin_fn_types[FNTYPE], \
717 S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX, \
718 BUILT_IN_MD, \
719 NULL, \
720 0);
721 #undef OB_DEF_VAR
722 #define OB_DEF_VAR(...)
723 #include "s390-builtins.def"
724
725 }
726
727 /* Return true if ARG is appropriate as argument number ARGNUM of
728 builtin DECL. The operand flags from s390-builtins.def have to
729 passed as OP_FLAGS. */
730 bool
731 s390_const_operand_ok (tree arg, int argnum, int op_flags, tree decl)
732 {
733 if (O_UIMM_P (op_flags))
734 {
735 int bitwidths[] = { 1, 2, 3, 4, 5, 8, 12, 16, 32 };
736 int bitwidth = bitwidths[op_flags - O_U1];
737
738 if (!tree_fits_uhwi_p (arg)
739 || tree_to_uhwi (arg) > ((unsigned HOST_WIDE_INT)1 << bitwidth) - 1)
740 {
741 error("constant argument %d for builtin %qF is out of range (0.."
742 HOST_WIDE_INT_PRINT_UNSIGNED ")",
743 argnum, decl,
744 ((unsigned HOST_WIDE_INT)1 << bitwidth) - 1);
745 return false;
746 }
747 }
748
749 if (O_SIMM_P (op_flags))
750 {
751 int bitwidths[] = { 2, 3, 4, 5, 8, 12, 16, 32 };
752 int bitwidth = bitwidths[op_flags - O_S2];
753
754 if (!tree_fits_shwi_p (arg)
755 || tree_to_shwi (arg) < -((HOST_WIDE_INT)1 << (bitwidth - 1))
756 || tree_to_shwi (arg) > (((HOST_WIDE_INT)1 << (bitwidth - 1)) - 1))
757 {
758 error("constant argument %d for builtin %qF is out of range ("
759 HOST_WIDE_INT_PRINT_DEC ".."
760 HOST_WIDE_INT_PRINT_DEC ")",
761 argnum, decl,
762 -((HOST_WIDE_INT)1 << (bitwidth - 1)),
763 ((HOST_WIDE_INT)1 << (bitwidth - 1)) - 1);
764 return false;
765 }
766 }
767 return true;
768 }
769
770 /* Expand an expression EXP that calls a built-in function,
771 with result going to TARGET if that's convenient
772 (and in mode MODE if that's convenient).
773 SUBTARGET may be used as the target for computing one of EXP's operands.
774 IGNORE is nonzero if the value is to be ignored. */
775
776 static rtx
777 s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
778 machine_mode mode ATTRIBUTE_UNUSED,
779 int ignore ATTRIBUTE_UNUSED)
780 {
781 #define MAX_ARGS 5
782
783 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
784 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
785 enum insn_code icode;
786 rtx op[MAX_ARGS], pat;
787 int arity;
788 bool nonvoid;
789 tree arg;
790 call_expr_arg_iterator iter;
791 unsigned int all_op_flags = opflags_for_builtin (fcode);
792 machine_mode last_vec_mode = VOIDmode;
793
794 if (TARGET_DEBUG_ARG)
795 {
796 fprintf (stderr,
797 "s390_expand_builtin, code = %4d, %s, bflags = 0x%x\n",
798 (int)fcode, IDENTIFIER_POINTER (DECL_NAME (fndecl)),
799 bflags_for_builtin (fcode));
800 }
801
802 if (S390_USE_TARGET_ATTRIBUTE)
803 {
804 unsigned int bflags;
805
806 bflags = bflags_for_builtin (fcode);
807 if ((bflags & B_HTM) && !TARGET_HTM)
808 {
809 error ("Builtin %qF is not supported without -mhtm "
810 "(default with -march=zEC12 and higher).", fndecl);
811 return const0_rtx;
812 }
813 if ((bflags & B_VX) && !TARGET_VX)
814 {
815 error ("Builtin %qF is not supported without -mvx "
816 "(default with -march=z13 and higher).", fndecl);
817 return const0_rtx;
818 }
819 }
820 if (fcode >= S390_OVERLOADED_BUILTIN_VAR_OFFSET
821 && fcode < S390_ALL_BUILTIN_MAX)
822 {
823 gcc_unreachable ();
824 }
825 else if (fcode < S390_OVERLOADED_BUILTIN_OFFSET)
826 {
827 icode = code_for_builtin[fcode];
828 /* Set a flag in the machine specific cfun part in order to support
829 saving/restoring of FPRs. */
830 if (fcode == S390_BUILTIN_tbegin || fcode == S390_BUILTIN_tbegin_retry)
831 cfun->machine->tbegin_p = true;
832 }
833 else if (fcode < S390_OVERLOADED_BUILTIN_VAR_OFFSET)
834 {
835 error ("Unresolved overloaded builtin");
836 return const0_rtx;
837 }
838 else
839 internal_error ("bad builtin fcode");
840
841 if (icode == 0)
842 internal_error ("bad builtin icode");
843
844 nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
845
846 if (nonvoid)
847 {
848 machine_mode tmode = insn_data[icode].operand[0].mode;
849 if (!target
850 || GET_MODE (target) != tmode
851 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
852 target = gen_reg_rtx (tmode);
853
854 /* There are builtins (e.g. vec_promote) with no vector
855 arguments but an element selector. So we have to also look
856 at the vector return type when emitting the modulo
857 operation. */
858 if (VECTOR_MODE_P (insn_data[icode].operand[0].mode))
859 last_vec_mode = insn_data[icode].operand[0].mode;
860 }
861
862 arity = 0;
863 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
864 {
865 const struct insn_operand_data *insn_op;
866 unsigned int op_flags = all_op_flags & ((1 << O_SHIFT) - 1);
867
868 all_op_flags = all_op_flags >> O_SHIFT;
869
870 if (arg == error_mark_node)
871 return NULL_RTX;
872 if (arity >= MAX_ARGS)
873 return NULL_RTX;
874
875 if (O_IMM_P (op_flags)
876 && TREE_CODE (arg) != INTEGER_CST)
877 {
878 error ("constant value required for builtin %qF argument %d",
879 fndecl, arity + 1);
880 return const0_rtx;
881 }
882
883 if (!s390_const_operand_ok (arg, arity + 1, op_flags, fndecl))
884 return const0_rtx;
885
886 insn_op = &insn_data[icode].operand[arity + nonvoid];
887 op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
888
889 /* expand_expr truncates constants to the target mode only if it
890 is "convenient". However, our checks below rely on this
891 being done. */
892 if (CONST_INT_P (op[arity])
893 && SCALAR_INT_MODE_P (insn_op->mode)
894 && GET_MODE (op[arity]) != insn_op->mode)
895 op[arity] = GEN_INT (trunc_int_for_mode (INTVAL (op[arity]),
896 insn_op->mode));
897
898 /* Wrap the expanded RTX for pointer types into a MEM expr with
899 the proper mode. This allows us to use e.g. (match_operand
900 "memory_operand"..) in the insn patterns instead of (mem
901 (match_operand "address_operand)). This is helpful for
902 patterns not just accepting MEMs. */
903 if (POINTER_TYPE_P (TREE_TYPE (arg))
904 && insn_op->predicate != address_operand)
905 op[arity] = gen_rtx_MEM (insn_op->mode, op[arity]);
906
907 /* Expand the module operation required on element selectors. */
908 if (op_flags == O_ELEM)
909 {
910 gcc_assert (last_vec_mode != VOIDmode);
911 op[arity] = simplify_expand_binop (SImode, code_to_optab (AND),
912 op[arity],
913 GEN_INT (GET_MODE_NUNITS (last_vec_mode) - 1),
914 NULL_RTX, 1, OPTAB_DIRECT);
915 }
916
917 /* Record the vector mode used for an element selector. This assumes:
918 1. There is no builtin with two different vector modes and an element selector
919 2. The element selector comes after the vector type it is referring to.
920 This currently the true for all the builtins but FIXME we
921 should better check for that. */
922 if (VECTOR_MODE_P (insn_op->mode))
923 last_vec_mode = insn_op->mode;
924
925 if (insn_op->predicate (op[arity], insn_op->mode))
926 {
927 arity++;
928 continue;
929 }
930
931 if (MEM_P (op[arity])
932 && insn_op->predicate == memory_operand
933 && (GET_MODE (XEXP (op[arity], 0)) == Pmode
934 || GET_MODE (XEXP (op[arity], 0)) == VOIDmode))
935 {
936 op[arity] = replace_equiv_address (op[arity],
937 copy_to_mode_reg (Pmode,
938 XEXP (op[arity], 0)));
939 }
940 else if (GET_MODE (op[arity]) == insn_op->mode
941 || GET_MODE (op[arity]) == VOIDmode
942 || (insn_op->predicate == address_operand
943 && GET_MODE (op[arity]) == Pmode))
944 {
945 /* An address_operand usually has VOIDmode in the expander
946 so we cannot use this. */
947 machine_mode target_mode =
948 (insn_op->predicate == address_operand
949 ? Pmode : insn_op->mode);
950 op[arity] = copy_to_mode_reg (target_mode, op[arity]);
951 }
952
953 if (!insn_op->predicate (op[arity], insn_op->mode))
954 {
955 error ("Invalid argument %d for builtin %qF", arity + 1, fndecl);
956 return const0_rtx;
957 }
958 arity++;
959 }
960
961 switch (arity)
962 {
963 case 0:
964 pat = GEN_FCN (icode) (target);
965 break;
966 case 1:
967 if (nonvoid)
968 pat = GEN_FCN (icode) (target, op[0]);
969 else
970 pat = GEN_FCN (icode) (op[0]);
971 break;
972 case 2:
973 if (nonvoid)
974 pat = GEN_FCN (icode) (target, op[0], op[1]);
975 else
976 pat = GEN_FCN (icode) (op[0], op[1]);
977 break;
978 case 3:
979 if (nonvoid)
980 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
981 else
982 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
983 break;
984 case 4:
985 if (nonvoid)
986 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
987 else
988 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
989 break;
990 case 5:
991 if (nonvoid)
992 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
993 else
994 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
995 break;
996 case 6:
997 if (nonvoid)
998 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4], op[5]);
999 else
1000 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]);
1001 break;
1002 default:
1003 gcc_unreachable ();
1004 }
1005 if (!pat)
1006 return NULL_RTX;
1007 emit_insn (pat);
1008
1009 if (nonvoid)
1010 return target;
1011 else
1012 return const0_rtx;
1013 }
1014
1015
1016 static const int s390_hotpatch_hw_max = 1000000;
1017 static int s390_hotpatch_hw_before_label = 0;
1018 static int s390_hotpatch_hw_after_label = 0;
1019
1020 /* Check whether the hotpatch attribute is applied to a function and, if it has
1021 an argument, the argument is valid. */
1022
1023 static tree
1024 s390_handle_hotpatch_attribute (tree *node, tree name, tree args,
1025 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1026 {
1027 tree expr;
1028 tree expr2;
1029 int err;
1030
1031 if (TREE_CODE (*node) != FUNCTION_DECL)
1032 {
1033 warning (OPT_Wattributes, "%qE attribute only applies to functions",
1034 name);
1035 *no_add_attrs = true;
1036 }
1037 if (args != NULL && TREE_CHAIN (args) != NULL)
1038 {
1039 expr = TREE_VALUE (args);
1040 expr2 = TREE_VALUE (TREE_CHAIN (args));
1041 }
1042 if (args == NULL || TREE_CHAIN (args) == NULL)
1043 err = 1;
1044 else if (TREE_CODE (expr) != INTEGER_CST
1045 || !INTEGRAL_TYPE_P (TREE_TYPE (expr))
1046 || wi::gtu_p (expr, s390_hotpatch_hw_max))
1047 err = 1;
1048 else if (TREE_CODE (expr2) != INTEGER_CST
1049 || !INTEGRAL_TYPE_P (TREE_TYPE (expr2))
1050 || wi::gtu_p (expr2, s390_hotpatch_hw_max))
1051 err = 1;
1052 else
1053 err = 0;
1054 if (err)
1055 {
1056 error ("requested %qE attribute is not a comma separated pair of"
1057 " non-negative integer constants or too large (max. %d)", name,
1058 s390_hotpatch_hw_max);
1059 *no_add_attrs = true;
1060 }
1061
1062 return NULL_TREE;
1063 }
1064
1065 /* Expand the s390_vector_bool type attribute. */
1066
1067 static tree
1068 s390_handle_vectorbool_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
1069 tree args ATTRIBUTE_UNUSED,
1070 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1071 {
1072 tree type = *node, result = NULL_TREE;
1073 machine_mode mode;
1074
1075 while (POINTER_TYPE_P (type)
1076 || TREE_CODE (type) == FUNCTION_TYPE
1077 || TREE_CODE (type) == METHOD_TYPE
1078 || TREE_CODE (type) == ARRAY_TYPE)
1079 type = TREE_TYPE (type);
1080
1081 mode = TYPE_MODE (type);
1082 switch (mode)
1083 {
1084 case DImode: case V2DImode: result = s390_builtin_types[BT_BV2DI]; break;
1085 case SImode: case V4SImode: result = s390_builtin_types[BT_BV4SI]; break;
1086 case HImode: case V8HImode: result = s390_builtin_types[BT_BV8HI]; break;
1087 case QImode: case V16QImode: result = s390_builtin_types[BT_BV16QI];
1088 default: break;
1089 }
1090
1091 *no_add_attrs = true; /* No need to hang on to the attribute. */
1092
1093 if (result)
1094 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
1095
1096 return NULL_TREE;
1097 }
1098
1099 static const struct attribute_spec s390_attribute_table[] = {
1100 { "hotpatch", 2, 2, true, false, false, s390_handle_hotpatch_attribute, false },
1101 { "s390_vector_bool", 0, 0, false, true, false, s390_handle_vectorbool_attribute, true },
1102 /* End element. */
1103 { NULL, 0, 0, false, false, false, NULL, false }
1104 };
1105
1106 /* Return the alignment for LABEL. We default to the -falign-labels
1107 value except for the literal pool base label. */
1108 int
1109 s390_label_align (rtx label)
1110 {
1111 rtx_insn *prev_insn = prev_active_insn (label);
1112 rtx set, src;
1113
1114 if (prev_insn == NULL_RTX)
1115 goto old;
1116
1117 set = single_set (prev_insn);
1118
1119 if (set == NULL_RTX)
1120 goto old;
1121
1122 src = SET_SRC (set);
1123
1124 /* Don't align literal pool base labels. */
1125 if (GET_CODE (src) == UNSPEC
1126 && XINT (src, 1) == UNSPEC_MAIN_BASE)
1127 return 0;
1128
1129 old:
1130 return align_labels_log;
1131 }
1132
1133 static machine_mode
1134 s390_libgcc_cmp_return_mode (void)
1135 {
1136 return TARGET_64BIT ? DImode : SImode;
1137 }
1138
1139 static machine_mode
1140 s390_libgcc_shift_count_mode (void)
1141 {
1142 return TARGET_64BIT ? DImode : SImode;
1143 }
1144
1145 static machine_mode
1146 s390_unwind_word_mode (void)
1147 {
1148 return TARGET_64BIT ? DImode : SImode;
1149 }
1150
1151 /* Return true if the back end supports mode MODE. */
1152 static bool
1153 s390_scalar_mode_supported_p (machine_mode mode)
1154 {
1155 /* In contrast to the default implementation reject TImode constants on 31bit
1156 TARGET_ZARCH for ABI compliance. */
1157 if (!TARGET_64BIT && TARGET_ZARCH && mode == TImode)
1158 return false;
1159
1160 if (DECIMAL_FLOAT_MODE_P (mode))
1161 return default_decimal_float_supported_p ();
1162
1163 return default_scalar_mode_supported_p (mode);
1164 }
1165
1166 /* Return true if the back end supports vector mode MODE. */
1167 static bool
1168 s390_vector_mode_supported_p (machine_mode mode)
1169 {
1170 machine_mode inner;
1171
1172 if (!VECTOR_MODE_P (mode)
1173 || !TARGET_VX
1174 || GET_MODE_SIZE (mode) > 16)
1175 return false;
1176
1177 inner = GET_MODE_INNER (mode);
1178
1179 switch (inner)
1180 {
1181 case QImode:
1182 case HImode:
1183 case SImode:
1184 case DImode:
1185 case TImode:
1186 case SFmode:
1187 case DFmode:
1188 case TFmode:
1189 return true;
1190 default:
1191 return false;
1192 }
1193 }
1194
1195 /* Set the has_landing_pad_p flag in struct machine_function to VALUE. */
1196
1197 void
1198 s390_set_has_landing_pad_p (bool value)
1199 {
1200 cfun->machine->has_landing_pad_p = value;
1201 }
1202
1203 /* If two condition code modes are compatible, return a condition code
1204 mode which is compatible with both. Otherwise, return
1205 VOIDmode. */
1206
1207 static machine_mode
1208 s390_cc_modes_compatible (machine_mode m1, machine_mode m2)
1209 {
1210 if (m1 == m2)
1211 return m1;
1212
1213 switch (m1)
1214 {
1215 case CCZmode:
1216 if (m2 == CCUmode || m2 == CCTmode || m2 == CCZ1mode
1217 || m2 == CCSmode || m2 == CCSRmode || m2 == CCURmode)
1218 return m2;
1219 return VOIDmode;
1220
1221 case CCSmode:
1222 case CCUmode:
1223 case CCTmode:
1224 case CCSRmode:
1225 case CCURmode:
1226 case CCZ1mode:
1227 if (m2 == CCZmode)
1228 return m1;
1229
1230 return VOIDmode;
1231
1232 default:
1233 return VOIDmode;
1234 }
1235 return VOIDmode;
1236 }
1237
1238 /* Return true if SET either doesn't set the CC register, or else
1239 the source and destination have matching CC modes and that
1240 CC mode is at least as constrained as REQ_MODE. */
1241
1242 static bool
1243 s390_match_ccmode_set (rtx set, machine_mode req_mode)
1244 {
1245 machine_mode set_mode;
1246
1247 gcc_assert (GET_CODE (set) == SET);
1248
1249 if (GET_CODE (SET_DEST (set)) != REG || !CC_REGNO_P (REGNO (SET_DEST (set))))
1250 return 1;
1251
1252 set_mode = GET_MODE (SET_DEST (set));
1253 switch (set_mode)
1254 {
1255 case CCSmode:
1256 case CCSRmode:
1257 case CCUmode:
1258 case CCURmode:
1259 case CCLmode:
1260 case CCL1mode:
1261 case CCL2mode:
1262 case CCL3mode:
1263 case CCT1mode:
1264 case CCT2mode:
1265 case CCT3mode:
1266 case CCVEQmode:
1267 case CCVHmode:
1268 case CCVHUmode:
1269 case CCVFHmode:
1270 case CCVFHEmode:
1271 if (req_mode != set_mode)
1272 return 0;
1273 break;
1274
1275 case CCZmode:
1276 if (req_mode != CCSmode && req_mode != CCUmode && req_mode != CCTmode
1277 && req_mode != CCSRmode && req_mode != CCURmode)
1278 return 0;
1279 break;
1280
1281 case CCAPmode:
1282 case CCANmode:
1283 if (req_mode != CCAmode)
1284 return 0;
1285 break;
1286
1287 default:
1288 gcc_unreachable ();
1289 }
1290
1291 return (GET_MODE (SET_SRC (set)) == set_mode);
1292 }
1293
1294 /* Return true if every SET in INSN that sets the CC register
1295 has source and destination with matching CC modes and that
1296 CC mode is at least as constrained as REQ_MODE.
1297 If REQ_MODE is VOIDmode, always return false. */
1298
1299 bool
1300 s390_match_ccmode (rtx_insn *insn, machine_mode req_mode)
1301 {
1302 int i;
1303
1304 /* s390_tm_ccmode returns VOIDmode to indicate failure. */
1305 if (req_mode == VOIDmode)
1306 return false;
1307
1308 if (GET_CODE (PATTERN (insn)) == SET)
1309 return s390_match_ccmode_set (PATTERN (insn), req_mode);
1310
1311 if (GET_CODE (PATTERN (insn)) == PARALLEL)
1312 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
1313 {
1314 rtx set = XVECEXP (PATTERN (insn), 0, i);
1315 if (GET_CODE (set) == SET)
1316 if (!s390_match_ccmode_set (set, req_mode))
1317 return false;
1318 }
1319
1320 return true;
1321 }
1322
1323 /* If a test-under-mask instruction can be used to implement
1324 (compare (and ... OP1) OP2), return the CC mode required
1325 to do that. Otherwise, return VOIDmode.
1326 MIXED is true if the instruction can distinguish between
1327 CC1 and CC2 for mixed selected bits (TMxx), it is false
1328 if the instruction cannot (TM). */
1329
1330 machine_mode
1331 s390_tm_ccmode (rtx op1, rtx op2, bool mixed)
1332 {
1333 int bit0, bit1;
1334
1335 /* ??? Fixme: should work on CONST_WIDE_INT as well. */
1336 if (GET_CODE (op1) != CONST_INT || GET_CODE (op2) != CONST_INT)
1337 return VOIDmode;
1338
1339 /* Selected bits all zero: CC0.
1340 e.g.: int a; if ((a & (16 + 128)) == 0) */
1341 if (INTVAL (op2) == 0)
1342 return CCTmode;
1343
1344 /* Selected bits all one: CC3.
1345 e.g.: int a; if ((a & (16 + 128)) == 16 + 128) */
1346 if (INTVAL (op2) == INTVAL (op1))
1347 return CCT3mode;
1348
1349 /* Exactly two bits selected, mixed zeroes and ones: CC1 or CC2. e.g.:
1350 int a;
1351 if ((a & (16 + 128)) == 16) -> CCT1
1352 if ((a & (16 + 128)) == 128) -> CCT2 */
1353 if (mixed)
1354 {
1355 bit1 = exact_log2 (INTVAL (op2));
1356 bit0 = exact_log2 (INTVAL (op1) ^ INTVAL (op2));
1357 if (bit0 != -1 && bit1 != -1)
1358 return bit0 > bit1 ? CCT1mode : CCT2mode;
1359 }
1360
1361 return VOIDmode;
1362 }
1363
1364 /* Given a comparison code OP (EQ, NE, etc.) and the operands
1365 OP0 and OP1 of a COMPARE, return the mode to be used for the
1366 comparison. */
1367
1368 machine_mode
1369 s390_select_ccmode (enum rtx_code code, rtx op0, rtx op1)
1370 {
1371 if (TARGET_VX
1372 && register_operand (op0, DFmode)
1373 && register_operand (op1, DFmode))
1374 {
1375 /* LT, LE, UNGT, UNGE require swapping OP0 and OP1. Either
1376 s390_emit_compare or s390_canonicalize_comparison will take
1377 care of it. */
1378 switch (code)
1379 {
1380 case EQ:
1381 case NE:
1382 return CCVEQmode;
1383 case GT:
1384 case UNLE:
1385 return CCVFHmode;
1386 case GE:
1387 case UNLT:
1388 return CCVFHEmode;
1389 default:
1390 ;
1391 }
1392 }
1393
1394 switch (code)
1395 {
1396 case EQ:
1397 case NE:
1398 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
1399 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1400 return CCAPmode;
1401 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
1402 && CONST_OK_FOR_K (INTVAL (XEXP (op0, 1))))
1403 return CCAPmode;
1404 if ((GET_CODE (op0) == PLUS || GET_CODE (op0) == MINUS
1405 || GET_CODE (op1) == NEG)
1406 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1407 return CCLmode;
1408
1409 if (GET_CODE (op0) == AND)
1410 {
1411 /* Check whether we can potentially do it via TM. */
1412 machine_mode ccmode;
1413 ccmode = s390_tm_ccmode (XEXP (op0, 1), op1, 1);
1414 if (ccmode != VOIDmode)
1415 {
1416 /* Relax CCTmode to CCZmode to allow fall-back to AND
1417 if that turns out to be beneficial. */
1418 return ccmode == CCTmode ? CCZmode : ccmode;
1419 }
1420 }
1421
1422 if (register_operand (op0, HImode)
1423 && GET_CODE (op1) == CONST_INT
1424 && (INTVAL (op1) == -1 || INTVAL (op1) == 65535))
1425 return CCT3mode;
1426 if (register_operand (op0, QImode)
1427 && GET_CODE (op1) == CONST_INT
1428 && (INTVAL (op1) == -1 || INTVAL (op1) == 255))
1429 return CCT3mode;
1430
1431 return CCZmode;
1432
1433 case LE:
1434 case LT:
1435 case GE:
1436 case GT:
1437 /* The only overflow condition of NEG and ABS happens when
1438 -INT_MAX is used as parameter, which stays negative. So
1439 we have an overflow from a positive value to a negative.
1440 Using CCAP mode the resulting cc can be used for comparisons. */
1441 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
1442 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1443 return CCAPmode;
1444
1445 /* If constants are involved in an add instruction it is possible to use
1446 the resulting cc for comparisons with zero. Knowing the sign of the
1447 constant the overflow behavior gets predictable. e.g.:
1448 int a, b; if ((b = a + c) > 0)
1449 with c as a constant value: c < 0 -> CCAN and c >= 0 -> CCAP */
1450 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
1451 && (CONST_OK_FOR_K (INTVAL (XEXP (op0, 1)))
1452 || (CONST_OK_FOR_CONSTRAINT_P (INTVAL (XEXP (op0, 1)), 'O', "Os")
1453 /* Avoid INT32_MIN on 32 bit. */
1454 && (!TARGET_ZARCH || INTVAL (XEXP (op0, 1)) != -0x7fffffff - 1))))
1455 {
1456 if (INTVAL (XEXP((op0), 1)) < 0)
1457 return CCANmode;
1458 else
1459 return CCAPmode;
1460 }
1461 /* Fall through. */
1462 case UNORDERED:
1463 case ORDERED:
1464 case UNEQ:
1465 case UNLE:
1466 case UNLT:
1467 case UNGE:
1468 case UNGT:
1469 case LTGT:
1470 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1471 && GET_CODE (op1) != CONST_INT)
1472 return CCSRmode;
1473 return CCSmode;
1474
1475 case LTU:
1476 case GEU:
1477 if (GET_CODE (op0) == PLUS
1478 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1479 return CCL1mode;
1480
1481 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1482 && GET_CODE (op1) != CONST_INT)
1483 return CCURmode;
1484 return CCUmode;
1485
1486 case LEU:
1487 case GTU:
1488 if (GET_CODE (op0) == MINUS
1489 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1490 return CCL2mode;
1491
1492 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1493 && GET_CODE (op1) != CONST_INT)
1494 return CCURmode;
1495 return CCUmode;
1496
1497 default:
1498 gcc_unreachable ();
1499 }
1500 }
1501
1502 /* Replace the comparison OP0 CODE OP1 by a semantically equivalent one
1503 that we can implement more efficiently. */
1504
1505 static void
1506 s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
1507 bool op0_preserve_value)
1508 {
1509 if (op0_preserve_value)
1510 return;
1511
1512 /* Convert ZERO_EXTRACT back to AND to enable TM patterns. */
1513 if ((*code == EQ || *code == NE)
1514 && *op1 == const0_rtx
1515 && GET_CODE (*op0) == ZERO_EXTRACT
1516 && GET_CODE (XEXP (*op0, 1)) == CONST_INT
1517 && GET_CODE (XEXP (*op0, 2)) == CONST_INT
1518 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
1519 {
1520 rtx inner = XEXP (*op0, 0);
1521 HOST_WIDE_INT modesize = GET_MODE_BITSIZE (GET_MODE (inner));
1522 HOST_WIDE_INT len = INTVAL (XEXP (*op0, 1));
1523 HOST_WIDE_INT pos = INTVAL (XEXP (*op0, 2));
1524
1525 if (len > 0 && len < modesize
1526 && pos >= 0 && pos + len <= modesize
1527 && modesize <= HOST_BITS_PER_WIDE_INT)
1528 {
1529 unsigned HOST_WIDE_INT block;
1530 block = ((unsigned HOST_WIDE_INT) 1 << len) - 1;
1531 block <<= modesize - pos - len;
1532
1533 *op0 = gen_rtx_AND (GET_MODE (inner), inner,
1534 gen_int_mode (block, GET_MODE (inner)));
1535 }
1536 }
1537
1538 /* Narrow AND of memory against immediate to enable TM. */
1539 if ((*code == EQ || *code == NE)
1540 && *op1 == const0_rtx
1541 && GET_CODE (*op0) == AND
1542 && GET_CODE (XEXP (*op0, 1)) == CONST_INT
1543 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
1544 {
1545 rtx inner = XEXP (*op0, 0);
1546 rtx mask = XEXP (*op0, 1);
1547
1548 /* Ignore paradoxical SUBREGs if all extra bits are masked out. */
1549 if (GET_CODE (inner) == SUBREG
1550 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (inner)))
1551 && (GET_MODE_SIZE (GET_MODE (inner))
1552 >= GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1553 && ((INTVAL (mask)
1554 & GET_MODE_MASK (GET_MODE (inner))
1555 & ~GET_MODE_MASK (GET_MODE (SUBREG_REG (inner))))
1556 == 0))
1557 inner = SUBREG_REG (inner);
1558
1559 /* Do not change volatile MEMs. */
1560 if (MEM_P (inner) && !MEM_VOLATILE_P (inner))
1561 {
1562 int part = s390_single_part (XEXP (*op0, 1),
1563 GET_MODE (inner), QImode, 0);
1564 if (part >= 0)
1565 {
1566 mask = gen_int_mode (s390_extract_part (mask, QImode, 0), QImode);
1567 inner = adjust_address_nv (inner, QImode, part);
1568 *op0 = gen_rtx_AND (QImode, inner, mask);
1569 }
1570 }
1571 }
1572
1573 /* Narrow comparisons against 0xffff to HImode if possible. */
1574 if ((*code == EQ || *code == NE)
1575 && GET_CODE (*op1) == CONST_INT
1576 && INTVAL (*op1) == 0xffff
1577 && SCALAR_INT_MODE_P (GET_MODE (*op0))
1578 && (nonzero_bits (*op0, GET_MODE (*op0))
1579 & ~(unsigned HOST_WIDE_INT) 0xffff) == 0)
1580 {
1581 *op0 = gen_lowpart (HImode, *op0);
1582 *op1 = constm1_rtx;
1583 }
1584
1585 /* Remove redundant UNSPEC_STRCMPCC_TO_INT conversions if possible. */
1586 if (GET_CODE (*op0) == UNSPEC
1587 && XINT (*op0, 1) == UNSPEC_STRCMPCC_TO_INT
1588 && XVECLEN (*op0, 0) == 1
1589 && GET_MODE (XVECEXP (*op0, 0, 0)) == CCUmode
1590 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
1591 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
1592 && *op1 == const0_rtx)
1593 {
1594 enum rtx_code new_code = UNKNOWN;
1595 switch (*code)
1596 {
1597 case EQ: new_code = EQ; break;
1598 case NE: new_code = NE; break;
1599 case LT: new_code = GTU; break;
1600 case GT: new_code = LTU; break;
1601 case LE: new_code = GEU; break;
1602 case GE: new_code = LEU; break;
1603 default: break;
1604 }
1605
1606 if (new_code != UNKNOWN)
1607 {
1608 *op0 = XVECEXP (*op0, 0, 0);
1609 *code = new_code;
1610 }
1611 }
1612
1613 /* Remove redundant UNSPEC_CC_TO_INT conversions if possible. */
1614 if (GET_CODE (*op0) == UNSPEC
1615 && XINT (*op0, 1) == UNSPEC_CC_TO_INT
1616 && XVECLEN (*op0, 0) == 1
1617 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
1618 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
1619 && CONST_INT_P (*op1))
1620 {
1621 enum rtx_code new_code = UNKNOWN;
1622 switch (GET_MODE (XVECEXP (*op0, 0, 0)))
1623 {
1624 case CCZmode:
1625 case CCRAWmode:
1626 switch (*code)
1627 {
1628 case EQ: new_code = EQ; break;
1629 case NE: new_code = NE; break;
1630 default: break;
1631 }
1632 break;
1633 default: break;
1634 }
1635
1636 if (new_code != UNKNOWN)
1637 {
1638 /* For CCRAWmode put the required cc mask into the second
1639 operand. */
1640 if (GET_MODE (XVECEXP (*op0, 0, 0)) == CCRAWmode
1641 && INTVAL (*op1) >= 0 && INTVAL (*op1) <= 3)
1642 *op1 = gen_rtx_CONST_INT (VOIDmode, 1 << (3 - INTVAL (*op1)));
1643 *op0 = XVECEXP (*op0, 0, 0);
1644 *code = new_code;
1645 }
1646 }
1647
1648 /* Simplify cascaded EQ, NE with const0_rtx. */
1649 if ((*code == NE || *code == EQ)
1650 && (GET_CODE (*op0) == EQ || GET_CODE (*op0) == NE)
1651 && GET_MODE (*op0) == SImode
1652 && GET_MODE (XEXP (*op0, 0)) == CCZ1mode
1653 && REG_P (XEXP (*op0, 0))
1654 && XEXP (*op0, 1) == const0_rtx
1655 && *op1 == const0_rtx)
1656 {
1657 if ((*code == EQ && GET_CODE (*op0) == NE)
1658 || (*code == NE && GET_CODE (*op0) == EQ))
1659 *code = EQ;
1660 else
1661 *code = NE;
1662 *op0 = XEXP (*op0, 0);
1663 }
1664
1665 /* Prefer register over memory as first operand. */
1666 if (MEM_P (*op0) && REG_P (*op1))
1667 {
1668 rtx tem = *op0; *op0 = *op1; *op1 = tem;
1669 *code = (int)swap_condition ((enum rtx_code)*code);
1670 }
1671
1672 /* Using the scalar variants of vector instructions for 64 bit FP
1673 comparisons might require swapping the operands. */
1674 if (TARGET_VX
1675 && register_operand (*op0, DFmode)
1676 && register_operand (*op1, DFmode)
1677 && (*code == LT || *code == LE || *code == UNGT || *code == UNGE))
1678 {
1679 rtx tmp;
1680
1681 switch (*code)
1682 {
1683 case LT: *code = GT; break;
1684 case LE: *code = GE; break;
1685 case UNGT: *code = UNLE; break;
1686 case UNGE: *code = UNLT; break;
1687 default: ;
1688 }
1689 tmp = *op0; *op0 = *op1; *op1 = tmp;
1690 }
1691 }
1692
1693 /* Helper function for s390_emit_compare. If possible emit a 64 bit
1694 FP compare using the single element variant of vector instructions.
1695 Replace CODE with the comparison code to be used in the CC reg
1696 compare and return the condition code register RTX in CC. */
1697
1698 static bool
1699 s390_expand_vec_compare_scalar (enum rtx_code *code, rtx cmp1, rtx cmp2,
1700 rtx *cc)
1701 {
1702 machine_mode cmp_mode;
1703 bool swap_p = false;
1704
1705 switch (*code)
1706 {
1707 case EQ: cmp_mode = CCVEQmode; break;
1708 case NE: cmp_mode = CCVEQmode; break;
1709 case GT: cmp_mode = CCVFHmode; break;
1710 case GE: cmp_mode = CCVFHEmode; break;
1711 case UNLE: cmp_mode = CCVFHmode; break;
1712 case UNLT: cmp_mode = CCVFHEmode; break;
1713 case LT: cmp_mode = CCVFHmode; *code = GT; swap_p = true; break;
1714 case LE: cmp_mode = CCVFHEmode; *code = GE; swap_p = true; break;
1715 case UNGE: cmp_mode = CCVFHmode; *code = UNLE; swap_p = true; break;
1716 case UNGT: cmp_mode = CCVFHEmode; *code = UNLT; swap_p = true; break;
1717 default: return false;
1718 }
1719
1720 if (swap_p)
1721 {
1722 rtx tmp = cmp2;
1723 cmp2 = cmp1;
1724 cmp1 = tmp;
1725 }
1726 *cc = gen_rtx_REG (cmp_mode, CC_REGNUM);
1727 emit_insn (gen_rtx_PARALLEL (VOIDmode,
1728 gen_rtvec (2,
1729 gen_rtx_SET (*cc,
1730 gen_rtx_COMPARE (cmp_mode, cmp1,
1731 cmp2)),
1732 gen_rtx_CLOBBER (VOIDmode,
1733 gen_rtx_SCRATCH (V2DImode)))));
1734 return true;
1735 }
1736
1737
1738 /* Emit a compare instruction suitable to implement the comparison
1739 OP0 CODE OP1. Return the correct condition RTL to be placed in
1740 the IF_THEN_ELSE of the conditional branch testing the result. */
1741
1742 rtx
1743 s390_emit_compare (enum rtx_code code, rtx op0, rtx op1)
1744 {
1745 machine_mode mode = s390_select_ccmode (code, op0, op1);
1746 rtx cc;
1747
1748 if (TARGET_VX
1749 && register_operand (op0, DFmode)
1750 && register_operand (op1, DFmode)
1751 && s390_expand_vec_compare_scalar (&code, op0, op1, &cc))
1752 {
1753 /* Work has been done by s390_expand_vec_compare_scalar already. */
1754 }
1755 else if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
1756 {
1757 /* Do not output a redundant compare instruction if a
1758 compare_and_swap pattern already computed the result and the
1759 machine modes are compatible. */
1760 gcc_assert (s390_cc_modes_compatible (GET_MODE (op0), mode)
1761 == GET_MODE (op0));
1762 cc = op0;
1763 }
1764 else
1765 {
1766 cc = gen_rtx_REG (mode, CC_REGNUM);
1767 emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (mode, op0, op1)));
1768 }
1769
1770 return gen_rtx_fmt_ee (code, VOIDmode, cc, const0_rtx);
1771 }
1772
1773 /* Emit a SImode compare and swap instruction setting MEM to NEW_RTX if OLD
1774 matches CMP.
1775 Return the correct condition RTL to be placed in the IF_THEN_ELSE of the
1776 conditional branch testing the result. */
1777
1778 static rtx
1779 s390_emit_compare_and_swap (enum rtx_code code, rtx old, rtx mem,
1780 rtx cmp, rtx new_rtx)
1781 {
1782 emit_insn (gen_atomic_compare_and_swapsi_internal (old, mem, cmp, new_rtx));
1783 return s390_emit_compare (code, gen_rtx_REG (CCZ1mode, CC_REGNUM),
1784 const0_rtx);
1785 }
1786
1787 /* Emit a jump instruction to TARGET and return it. If COND is
1788 NULL_RTX, emit an unconditional jump, else a conditional jump under
1789 condition COND. */
1790
1791 rtx_insn *
1792 s390_emit_jump (rtx target, rtx cond)
1793 {
1794 rtx insn;
1795
1796 target = gen_rtx_LABEL_REF (VOIDmode, target);
1797 if (cond)
1798 target = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, target, pc_rtx);
1799
1800 insn = gen_rtx_SET (pc_rtx, target);
1801 return emit_jump_insn (insn);
1802 }
1803
1804 /* Return branch condition mask to implement a branch
1805 specified by CODE. Return -1 for invalid comparisons. */
1806
1807 int
1808 s390_branch_condition_mask (rtx code)
1809 {
1810 const int CC0 = 1 << 3;
1811 const int CC1 = 1 << 2;
1812 const int CC2 = 1 << 1;
1813 const int CC3 = 1 << 0;
1814
1815 gcc_assert (GET_CODE (XEXP (code, 0)) == REG);
1816 gcc_assert (REGNO (XEXP (code, 0)) == CC_REGNUM);
1817 gcc_assert (XEXP (code, 1) == const0_rtx
1818 || (GET_MODE (XEXP (code, 0)) == CCRAWmode
1819 && CONST_INT_P (XEXP (code, 1))));
1820
1821
1822 switch (GET_MODE (XEXP (code, 0)))
1823 {
1824 case CCZmode:
1825 case CCZ1mode:
1826 switch (GET_CODE (code))
1827 {
1828 case EQ: return CC0;
1829 case NE: return CC1 | CC2 | CC3;
1830 default: return -1;
1831 }
1832 break;
1833
1834 case CCT1mode:
1835 switch (GET_CODE (code))
1836 {
1837 case EQ: return CC1;
1838 case NE: return CC0 | CC2 | CC3;
1839 default: return -1;
1840 }
1841 break;
1842
1843 case CCT2mode:
1844 switch (GET_CODE (code))
1845 {
1846 case EQ: return CC2;
1847 case NE: return CC0 | CC1 | CC3;
1848 default: return -1;
1849 }
1850 break;
1851
1852 case CCT3mode:
1853 switch (GET_CODE (code))
1854 {
1855 case EQ: return CC3;
1856 case NE: return CC0 | CC1 | CC2;
1857 default: return -1;
1858 }
1859 break;
1860
1861 case CCLmode:
1862 switch (GET_CODE (code))
1863 {
1864 case EQ: return CC0 | CC2;
1865 case NE: return CC1 | CC3;
1866 default: return -1;
1867 }
1868 break;
1869
1870 case CCL1mode:
1871 switch (GET_CODE (code))
1872 {
1873 case LTU: return CC2 | CC3; /* carry */
1874 case GEU: return CC0 | CC1; /* no carry */
1875 default: return -1;
1876 }
1877 break;
1878
1879 case CCL2mode:
1880 switch (GET_CODE (code))
1881 {
1882 case GTU: return CC0 | CC1; /* borrow */
1883 case LEU: return CC2 | CC3; /* no borrow */
1884 default: return -1;
1885 }
1886 break;
1887
1888 case CCL3mode:
1889 switch (GET_CODE (code))
1890 {
1891 case EQ: return CC0 | CC2;
1892 case NE: return CC1 | CC3;
1893 case LTU: return CC1;
1894 case GTU: return CC3;
1895 case LEU: return CC1 | CC2;
1896 case GEU: return CC2 | CC3;
1897 default: return -1;
1898 }
1899
1900 case CCUmode:
1901 switch (GET_CODE (code))
1902 {
1903 case EQ: return CC0;
1904 case NE: return CC1 | CC2 | CC3;
1905 case LTU: return CC1;
1906 case GTU: return CC2;
1907 case LEU: return CC0 | CC1;
1908 case GEU: return CC0 | CC2;
1909 default: return -1;
1910 }
1911 break;
1912
1913 case CCURmode:
1914 switch (GET_CODE (code))
1915 {
1916 case EQ: return CC0;
1917 case NE: return CC2 | CC1 | CC3;
1918 case LTU: return CC2;
1919 case GTU: return CC1;
1920 case LEU: return CC0 | CC2;
1921 case GEU: return CC0 | CC1;
1922 default: return -1;
1923 }
1924 break;
1925
1926 case CCAPmode:
1927 switch (GET_CODE (code))
1928 {
1929 case EQ: return CC0;
1930 case NE: return CC1 | CC2 | CC3;
1931 case LT: return CC1 | CC3;
1932 case GT: return CC2;
1933 case LE: return CC0 | CC1 | CC3;
1934 case GE: return CC0 | CC2;
1935 default: return -1;
1936 }
1937 break;
1938
1939 case CCANmode:
1940 switch (GET_CODE (code))
1941 {
1942 case EQ: return CC0;
1943 case NE: return CC1 | CC2 | CC3;
1944 case LT: return CC1;
1945 case GT: return CC2 | CC3;
1946 case LE: return CC0 | CC1;
1947 case GE: return CC0 | CC2 | CC3;
1948 default: return -1;
1949 }
1950 break;
1951
1952 case CCSmode:
1953 switch (GET_CODE (code))
1954 {
1955 case EQ: return CC0;
1956 case NE: return CC1 | CC2 | CC3;
1957 case LT: return CC1;
1958 case GT: return CC2;
1959 case LE: return CC0 | CC1;
1960 case GE: return CC0 | CC2;
1961 case UNORDERED: return CC3;
1962 case ORDERED: return CC0 | CC1 | CC2;
1963 case UNEQ: return CC0 | CC3;
1964 case UNLT: return CC1 | CC3;
1965 case UNGT: return CC2 | CC3;
1966 case UNLE: return CC0 | CC1 | CC3;
1967 case UNGE: return CC0 | CC2 | CC3;
1968 case LTGT: return CC1 | CC2;
1969 default: return -1;
1970 }
1971 break;
1972
1973 case CCSRmode:
1974 switch (GET_CODE (code))
1975 {
1976 case EQ: return CC0;
1977 case NE: return CC2 | CC1 | CC3;
1978 case LT: return CC2;
1979 case GT: return CC1;
1980 case LE: return CC0 | CC2;
1981 case GE: return CC0 | CC1;
1982 case UNORDERED: return CC3;
1983 case ORDERED: return CC0 | CC2 | CC1;
1984 case UNEQ: return CC0 | CC3;
1985 case UNLT: return CC2 | CC3;
1986 case UNGT: return CC1 | CC3;
1987 case UNLE: return CC0 | CC2 | CC3;
1988 case UNGE: return CC0 | CC1 | CC3;
1989 case LTGT: return CC2 | CC1;
1990 default: return -1;
1991 }
1992 break;
1993
1994 /* Vector comparison modes. */
1995
1996 case CCVEQmode:
1997 switch (GET_CODE (code))
1998 {
1999 case EQ: return CC0;
2000 case NE: return CC3;
2001 default: return -1;
2002 }
2003
2004 case CCVEQANYmode:
2005 switch (GET_CODE (code))
2006 {
2007 case EQ: return CC0 | CC1;
2008 case NE: return CC3 | CC1;
2009 default: return -1;
2010 }
2011
2012 /* Integer vector compare modes. */
2013
2014 case CCVHmode:
2015 switch (GET_CODE (code))
2016 {
2017 case GT: return CC0;
2018 case LE: return CC3;
2019 default: return -1;
2020 }
2021
2022 case CCVHANYmode:
2023 switch (GET_CODE (code))
2024 {
2025 case GT: return CC0 | CC1;
2026 case LE: return CC3 | CC1;
2027 default: return -1;
2028 }
2029
2030 case CCVHUmode:
2031 switch (GET_CODE (code))
2032 {
2033 case GTU: return CC0;
2034 case LEU: return CC3;
2035 default: return -1;
2036 }
2037
2038 case CCVHUANYmode:
2039 switch (GET_CODE (code))
2040 {
2041 case GTU: return CC0 | CC1;
2042 case LEU: return CC3 | CC1;
2043 default: return -1;
2044 }
2045
2046 /* FP vector compare modes. */
2047
2048 case CCVFHmode:
2049 switch (GET_CODE (code))
2050 {
2051 case GT: return CC0;
2052 case UNLE: return CC3;
2053 default: return -1;
2054 }
2055
2056 case CCVFHANYmode:
2057 switch (GET_CODE (code))
2058 {
2059 case GT: return CC0 | CC1;
2060 case UNLE: return CC3 | CC1;
2061 default: return -1;
2062 }
2063
2064 case CCVFHEmode:
2065 switch (GET_CODE (code))
2066 {
2067 case GE: return CC0;
2068 case UNLT: return CC3;
2069 default: return -1;
2070 }
2071
2072 case CCVFHEANYmode:
2073 switch (GET_CODE (code))
2074 {
2075 case GE: return CC0 | CC1;
2076 case UNLT: return CC3 | CC1;
2077 default: return -1;
2078 }
2079
2080
2081 case CCRAWmode:
2082 switch (GET_CODE (code))
2083 {
2084 case EQ:
2085 return INTVAL (XEXP (code, 1));
2086 case NE:
2087 return (INTVAL (XEXP (code, 1))) ^ 0xf;
2088 default:
2089 gcc_unreachable ();
2090 }
2091
2092 default:
2093 return -1;
2094 }
2095 }
2096
2097
2098 /* Return branch condition mask to implement a compare and branch
2099 specified by CODE. Return -1 for invalid comparisons. */
2100
2101 int
2102 s390_compare_and_branch_condition_mask (rtx code)
2103 {
2104 const int CC0 = 1 << 3;
2105 const int CC1 = 1 << 2;
2106 const int CC2 = 1 << 1;
2107
2108 switch (GET_CODE (code))
2109 {
2110 case EQ:
2111 return CC0;
2112 case NE:
2113 return CC1 | CC2;
2114 case LT:
2115 case LTU:
2116 return CC1;
2117 case GT:
2118 case GTU:
2119 return CC2;
2120 case LE:
2121 case LEU:
2122 return CC0 | CC1;
2123 case GE:
2124 case GEU:
2125 return CC0 | CC2;
2126 default:
2127 gcc_unreachable ();
2128 }
2129 return -1;
2130 }
2131
2132 /* If INV is false, return assembler mnemonic string to implement
2133 a branch specified by CODE. If INV is true, return mnemonic
2134 for the corresponding inverted branch. */
2135
2136 static const char *
2137 s390_branch_condition_mnemonic (rtx code, int inv)
2138 {
2139 int mask;
2140
2141 static const char *const mnemonic[16] =
2142 {
2143 NULL, "o", "h", "nle",
2144 "l", "nhe", "lh", "ne",
2145 "e", "nlh", "he", "nl",
2146 "le", "nh", "no", NULL
2147 };
2148
2149 if (GET_CODE (XEXP (code, 0)) == REG
2150 && REGNO (XEXP (code, 0)) == CC_REGNUM
2151 && (XEXP (code, 1) == const0_rtx
2152 || (GET_MODE (XEXP (code, 0)) == CCRAWmode
2153 && CONST_INT_P (XEXP (code, 1)))))
2154 mask = s390_branch_condition_mask (code);
2155 else
2156 mask = s390_compare_and_branch_condition_mask (code);
2157
2158 gcc_assert (mask >= 0);
2159
2160 if (inv)
2161 mask ^= 15;
2162
2163 gcc_assert (mask >= 1 && mask <= 14);
2164
2165 return mnemonic[mask];
2166 }
2167
2168 /* Return the part of op which has a value different from def.
2169 The size of the part is determined by mode.
2170 Use this function only if you already know that op really
2171 contains such a part. */
2172
2173 unsigned HOST_WIDE_INT
2174 s390_extract_part (rtx op, machine_mode mode, int def)
2175 {
2176 unsigned HOST_WIDE_INT value = 0;
2177 int max_parts = HOST_BITS_PER_WIDE_INT / GET_MODE_BITSIZE (mode);
2178 int part_bits = GET_MODE_BITSIZE (mode);
2179 unsigned HOST_WIDE_INT part_mask
2180 = ((unsigned HOST_WIDE_INT)1 << part_bits) - 1;
2181 int i;
2182
2183 for (i = 0; i < max_parts; i++)
2184 {
2185 if (i == 0)
2186 value = (unsigned HOST_WIDE_INT) INTVAL (op);
2187 else
2188 value >>= part_bits;
2189
2190 if ((value & part_mask) != (def & part_mask))
2191 return value & part_mask;
2192 }
2193
2194 gcc_unreachable ();
2195 }
2196
2197 /* If OP is an integer constant of mode MODE with exactly one
2198 part of mode PART_MODE unequal to DEF, return the number of that
2199 part. Otherwise, return -1. */
2200
2201 int
2202 s390_single_part (rtx op,
2203 machine_mode mode,
2204 machine_mode part_mode,
2205 int def)
2206 {
2207 unsigned HOST_WIDE_INT value = 0;
2208 int n_parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (part_mode);
2209 unsigned HOST_WIDE_INT part_mask
2210 = ((unsigned HOST_WIDE_INT)1 << GET_MODE_BITSIZE (part_mode)) - 1;
2211 int i, part = -1;
2212
2213 if (GET_CODE (op) != CONST_INT)
2214 return -1;
2215
2216 for (i = 0; i < n_parts; i++)
2217 {
2218 if (i == 0)
2219 value = (unsigned HOST_WIDE_INT) INTVAL (op);
2220 else
2221 value >>= GET_MODE_BITSIZE (part_mode);
2222
2223 if ((value & part_mask) != (def & part_mask))
2224 {
2225 if (part != -1)
2226 return -1;
2227 else
2228 part = i;
2229 }
2230 }
2231 return part == -1 ? -1 : n_parts - 1 - part;
2232 }
2233
2234 /* Return true if IN contains a contiguous bitfield in the lower SIZE
2235 bits and no other bits are set in IN. POS and LENGTH can be used
2236 to obtain the start position and the length of the bitfield.
2237
2238 POS gives the position of the first bit of the bitfield counting
2239 from the lowest order bit starting with zero. In order to use this
2240 value for S/390 instructions this has to be converted to "bits big
2241 endian" style. */
2242
2243 bool
2244 s390_contiguous_bitmask_p (unsigned HOST_WIDE_INT in, int size,
2245 int *pos, int *length)
2246 {
2247 int tmp_pos = 0;
2248 int tmp_length = 0;
2249 int i;
2250 unsigned HOST_WIDE_INT mask = 1ULL;
2251 bool contiguous = false;
2252
2253 for (i = 0; i < size; mask <<= 1, i++)
2254 {
2255 if (contiguous)
2256 {
2257 if (mask & in)
2258 tmp_length++;
2259 else
2260 break;
2261 }
2262 else
2263 {
2264 if (mask & in)
2265 {
2266 contiguous = true;
2267 tmp_length++;
2268 }
2269 else
2270 tmp_pos++;
2271 }
2272 }
2273
2274 if (!tmp_length)
2275 return false;
2276
2277 /* Calculate a mask for all bits beyond the contiguous bits. */
2278 mask = (-1LL & ~(((1ULL << (tmp_length + tmp_pos - 1)) << 1) - 1));
2279
2280 if ((unsigned)size < sizeof (HOST_WIDE_INT) * BITS_PER_UNIT)
2281 mask &= (HOST_WIDE_INT_1U << size) - 1;
2282
2283 if (mask & in)
2284 return false;
2285
2286 if (tmp_length + tmp_pos - 1 > size)
2287 return false;
2288
2289 if (length)
2290 *length = tmp_length;
2291
2292 if (pos)
2293 *pos = tmp_pos;
2294
2295 return true;
2296 }
2297
2298 /* Return true if OP contains the same contiguous bitfield in *all*
2299 its elements. START and END can be used to obtain the start and
2300 end position of the bitfield.
2301
2302 START/STOP give the position of the first/last bit of the bitfield
2303 counting from the lowest order bit starting with zero. In order to
2304 use these values for S/390 instructions this has to be converted to
2305 "bits big endian" style. */
2306
2307 bool
2308 s390_contiguous_bitmask_vector_p (rtx op, int *start, int *end)
2309 {
2310 unsigned HOST_WIDE_INT mask;
2311 int length, size;
2312 rtx elt;
2313
2314 if (!const_vec_duplicate_p (op, &elt)
2315 || !CONST_INT_P (elt))
2316 return false;
2317
2318 size = GET_MODE_UNIT_BITSIZE (GET_MODE (op));
2319
2320 /* We cannot deal with V1TI/V1TF. This would require a vgmq. */
2321 if (size > 64)
2322 return false;
2323
2324 mask = UINTVAL (elt);
2325 if (s390_contiguous_bitmask_p (mask, size, start,
2326 end != NULL ? &length : NULL))
2327 {
2328 if (end != NULL)
2329 *end = *start + length - 1;
2330 return true;
2331 }
2332 /* 0xff00000f style immediates can be covered by swapping start and
2333 end indices in vgm. */
2334 if (s390_contiguous_bitmask_p (~mask, size, start,
2335 end != NULL ? &length : NULL))
2336 {
2337 if (end != NULL)
2338 *end = *start - 1;
2339 if (start != NULL)
2340 *start = *start + length;
2341 return true;
2342 }
2343 return false;
2344 }
2345
2346 /* Return true if C consists only of byte chunks being either 0 or
2347 0xff. If MASK is !=NULL a byte mask is generated which is
2348 appropriate for the vector generate byte mask instruction. */
2349
2350 bool
2351 s390_bytemask_vector_p (rtx op, unsigned *mask)
2352 {
2353 int i;
2354 unsigned tmp_mask = 0;
2355 int nunit, unit_size;
2356
2357 if (!VECTOR_MODE_P (GET_MODE (op))
2358 || GET_CODE (op) != CONST_VECTOR
2359 || !CONST_INT_P (XVECEXP (op, 0, 0)))
2360 return false;
2361
2362 nunit = GET_MODE_NUNITS (GET_MODE (op));
2363 unit_size = GET_MODE_UNIT_SIZE (GET_MODE (op));
2364
2365 for (i = 0; i < nunit; i++)
2366 {
2367 unsigned HOST_WIDE_INT c;
2368 int j;
2369
2370 if (!CONST_INT_P (XVECEXP (op, 0, i)))
2371 return false;
2372
2373 c = UINTVAL (XVECEXP (op, 0, i));
2374 for (j = 0; j < unit_size; j++)
2375 {
2376 if ((c & 0xff) != 0 && (c & 0xff) != 0xff)
2377 return false;
2378 tmp_mask |= (c & 1) << ((nunit - 1 - i) * unit_size + j);
2379 c = c >> BITS_PER_UNIT;
2380 }
2381 }
2382
2383 if (mask != NULL)
2384 *mask = tmp_mask;
2385
2386 return true;
2387 }
2388
2389 /* Check whether a rotate of ROTL followed by an AND of CONTIG is
2390 equivalent to a shift followed by the AND. In particular, CONTIG
2391 should not overlap the (rotated) bit 0/bit 63 gap. Negative values
2392 for ROTL indicate a rotate to the right. */
2393
2394 bool
2395 s390_extzv_shift_ok (int bitsize, int rotl, unsigned HOST_WIDE_INT contig)
2396 {
2397 int pos, len;
2398 bool ok;
2399
2400 ok = s390_contiguous_bitmask_p (contig, bitsize, &pos, &len);
2401 gcc_assert (ok);
2402
2403 return ((rotl >= 0 && rotl <= pos)
2404 || (rotl < 0 && -rotl <= bitsize - len - pos));
2405 }
2406
2407 /* Check whether we can (and want to) split a double-word
2408 move in mode MODE from SRC to DST into two single-word
2409 moves, moving the subword FIRST_SUBWORD first. */
2410
2411 bool
2412 s390_split_ok_p (rtx dst, rtx src, machine_mode mode, int first_subword)
2413 {
2414 /* Floating point and vector registers cannot be split. */
2415 if (FP_REG_P (src) || FP_REG_P (dst) || VECTOR_REG_P (src) || VECTOR_REG_P (dst))
2416 return false;
2417
2418 /* We don't need to split if operands are directly accessible. */
2419 if (s_operand (src, mode) || s_operand (dst, mode))
2420 return false;
2421
2422 /* Non-offsettable memory references cannot be split. */
2423 if ((GET_CODE (src) == MEM && !offsettable_memref_p (src))
2424 || (GET_CODE (dst) == MEM && !offsettable_memref_p (dst)))
2425 return false;
2426
2427 /* Moving the first subword must not clobber a register
2428 needed to move the second subword. */
2429 if (register_operand (dst, mode))
2430 {
2431 rtx subreg = operand_subword (dst, first_subword, 0, mode);
2432 if (reg_overlap_mentioned_p (subreg, src))
2433 return false;
2434 }
2435
2436 return true;
2437 }
2438
2439 /* Return true if it can be proven that [MEM1, MEM1 + SIZE]
2440 and [MEM2, MEM2 + SIZE] do overlap and false
2441 otherwise. */
2442
2443 bool
2444 s390_overlap_p (rtx mem1, rtx mem2, HOST_WIDE_INT size)
2445 {
2446 rtx addr1, addr2, addr_delta;
2447 HOST_WIDE_INT delta;
2448
2449 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
2450 return true;
2451
2452 if (size == 0)
2453 return false;
2454
2455 addr1 = XEXP (mem1, 0);
2456 addr2 = XEXP (mem2, 0);
2457
2458 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
2459
2460 /* This overlapping check is used by peepholes merging memory block operations.
2461 Overlapping operations would otherwise be recognized by the S/390 hardware
2462 and would fall back to a slower implementation. Allowing overlapping
2463 operations would lead to slow code but not to wrong code. Therefore we are
2464 somewhat optimistic if we cannot prove that the memory blocks are
2465 overlapping.
2466 That's why we return false here although this may accept operations on
2467 overlapping memory areas. */
2468 if (!addr_delta || GET_CODE (addr_delta) != CONST_INT)
2469 return false;
2470
2471 delta = INTVAL (addr_delta);
2472
2473 if (delta == 0
2474 || (delta > 0 && delta < size)
2475 || (delta < 0 && -delta < size))
2476 return true;
2477
2478 return false;
2479 }
2480
2481 /* Check whether the address of memory reference MEM2 equals exactly
2482 the address of memory reference MEM1 plus DELTA. Return true if
2483 we can prove this to be the case, false otherwise. */
2484
2485 bool
2486 s390_offset_p (rtx mem1, rtx mem2, rtx delta)
2487 {
2488 rtx addr1, addr2, addr_delta;
2489
2490 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
2491 return false;
2492
2493 addr1 = XEXP (mem1, 0);
2494 addr2 = XEXP (mem2, 0);
2495
2496 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
2497 if (!addr_delta || !rtx_equal_p (addr_delta, delta))
2498 return false;
2499
2500 return true;
2501 }
2502
2503 /* Expand logical operator CODE in mode MODE with operands OPERANDS. */
2504
2505 void
2506 s390_expand_logical_operator (enum rtx_code code, machine_mode mode,
2507 rtx *operands)
2508 {
2509 machine_mode wmode = mode;
2510 rtx dst = operands[0];
2511 rtx src1 = operands[1];
2512 rtx src2 = operands[2];
2513 rtx op, clob, tem;
2514
2515 /* If we cannot handle the operation directly, use a temp register. */
2516 if (!s390_logical_operator_ok_p (operands))
2517 dst = gen_reg_rtx (mode);
2518
2519 /* QImode and HImode patterns make sense only if we have a destination
2520 in memory. Otherwise perform the operation in SImode. */
2521 if ((mode == QImode || mode == HImode) && GET_CODE (dst) != MEM)
2522 wmode = SImode;
2523
2524 /* Widen operands if required. */
2525 if (mode != wmode)
2526 {
2527 if (GET_CODE (dst) == SUBREG
2528 && (tem = simplify_subreg (wmode, dst, mode, 0)) != 0)
2529 dst = tem;
2530 else if (REG_P (dst))
2531 dst = gen_rtx_SUBREG (wmode, dst, 0);
2532 else
2533 dst = gen_reg_rtx (wmode);
2534
2535 if (GET_CODE (src1) == SUBREG
2536 && (tem = simplify_subreg (wmode, src1, mode, 0)) != 0)
2537 src1 = tem;
2538 else if (GET_MODE (src1) != VOIDmode)
2539 src1 = gen_rtx_SUBREG (wmode, force_reg (mode, src1), 0);
2540
2541 if (GET_CODE (src2) == SUBREG
2542 && (tem = simplify_subreg (wmode, src2, mode, 0)) != 0)
2543 src2 = tem;
2544 else if (GET_MODE (src2) != VOIDmode)
2545 src2 = gen_rtx_SUBREG (wmode, force_reg (mode, src2), 0);
2546 }
2547
2548 /* Emit the instruction. */
2549 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, wmode, src1, src2));
2550 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
2551 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
2552
2553 /* Fix up the destination if needed. */
2554 if (dst != operands[0])
2555 emit_move_insn (operands[0], gen_lowpart (mode, dst));
2556 }
2557
2558 /* Check whether OPERANDS are OK for a logical operation (AND, IOR, XOR). */
2559
2560 bool
2561 s390_logical_operator_ok_p (rtx *operands)
2562 {
2563 /* If the destination operand is in memory, it needs to coincide
2564 with one of the source operands. After reload, it has to be
2565 the first source operand. */
2566 if (GET_CODE (operands[0]) == MEM)
2567 return rtx_equal_p (operands[0], operands[1])
2568 || (!reload_completed && rtx_equal_p (operands[0], operands[2]));
2569
2570 return true;
2571 }
2572
2573 /* Narrow logical operation CODE of memory operand MEMOP with immediate
2574 operand IMMOP to switch from SS to SI type instructions. */
2575
2576 void
2577 s390_narrow_logical_operator (enum rtx_code code, rtx *memop, rtx *immop)
2578 {
2579 int def = code == AND ? -1 : 0;
2580 HOST_WIDE_INT mask;
2581 int part;
2582
2583 gcc_assert (GET_CODE (*memop) == MEM);
2584 gcc_assert (!MEM_VOLATILE_P (*memop));
2585
2586 mask = s390_extract_part (*immop, QImode, def);
2587 part = s390_single_part (*immop, GET_MODE (*memop), QImode, def);
2588 gcc_assert (part >= 0);
2589
2590 *memop = adjust_address (*memop, QImode, part);
2591 *immop = gen_int_mode (mask, QImode);
2592 }
2593
2594
2595 /* How to allocate a 'struct machine_function'. */
2596
2597 static struct machine_function *
2598 s390_init_machine_status (void)
2599 {
2600 return ggc_cleared_alloc<machine_function> ();
2601 }
2602
2603 /* Map for smallest class containing reg regno. */
2604
2605 const enum reg_class regclass_map[FIRST_PSEUDO_REGISTER] =
2606 { GENERAL_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 0 */
2607 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 4 */
2608 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 8 */
2609 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 12 */
2610 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 16 */
2611 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 20 */
2612 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 24 */
2613 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 28 */
2614 ADDR_REGS, CC_REGS, ADDR_REGS, ADDR_REGS, /* 32 */
2615 ACCESS_REGS, ACCESS_REGS, VEC_REGS, VEC_REGS, /* 36 */
2616 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 40 */
2617 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 44 */
2618 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 48 */
2619 VEC_REGS, VEC_REGS /* 52 */
2620 };
2621
2622 /* Return attribute type of insn. */
2623
2624 static enum attr_type
2625 s390_safe_attr_type (rtx_insn *insn)
2626 {
2627 if (recog_memoized (insn) >= 0)
2628 return get_attr_type (insn);
2629 else
2630 return TYPE_NONE;
2631 }
2632
2633 /* Return true if DISP is a valid short displacement. */
2634
2635 static bool
2636 s390_short_displacement (rtx disp)
2637 {
2638 /* No displacement is OK. */
2639 if (!disp)
2640 return true;
2641
2642 /* Without the long displacement facility we don't need to
2643 distingiush between long and short displacement. */
2644 if (!TARGET_LONG_DISPLACEMENT)
2645 return true;
2646
2647 /* Integer displacement in range. */
2648 if (GET_CODE (disp) == CONST_INT)
2649 return INTVAL (disp) >= 0 && INTVAL (disp) < 4096;
2650
2651 /* GOT offset is not OK, the GOT can be large. */
2652 if (GET_CODE (disp) == CONST
2653 && GET_CODE (XEXP (disp, 0)) == UNSPEC
2654 && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOT
2655 || XINT (XEXP (disp, 0), 1) == UNSPEC_GOTNTPOFF))
2656 return false;
2657
2658 /* All other symbolic constants are literal pool references,
2659 which are OK as the literal pool must be small. */
2660 if (GET_CODE (disp) == CONST)
2661 return true;
2662
2663 return false;
2664 }
2665
2666 /* Decompose a RTL expression ADDR for a memory address into
2667 its components, returned in OUT.
2668
2669 Returns false if ADDR is not a valid memory address, true
2670 otherwise. If OUT is NULL, don't return the components,
2671 but check for validity only.
2672
2673 Note: Only addresses in canonical form are recognized.
2674 LEGITIMIZE_ADDRESS should convert non-canonical forms to the
2675 canonical form so that they will be recognized. */
2676
2677 static int
2678 s390_decompose_address (rtx addr, struct s390_address *out)
2679 {
2680 HOST_WIDE_INT offset = 0;
2681 rtx base = NULL_RTX;
2682 rtx indx = NULL_RTX;
2683 rtx disp = NULL_RTX;
2684 rtx orig_disp;
2685 bool pointer = false;
2686 bool base_ptr = false;
2687 bool indx_ptr = false;
2688 bool literal_pool = false;
2689
2690 /* We may need to substitute the literal pool base register into the address
2691 below. However, at this point we do not know which register is going to
2692 be used as base, so we substitute the arg pointer register. This is going
2693 to be treated as holding a pointer below -- it shouldn't be used for any
2694 other purpose. */
2695 rtx fake_pool_base = gen_rtx_REG (Pmode, ARG_POINTER_REGNUM);
2696
2697 /* Decompose address into base + index + displacement. */
2698
2699 if (GET_CODE (addr) == REG || GET_CODE (addr) == UNSPEC)
2700 base = addr;
2701
2702 else if (GET_CODE (addr) == PLUS)
2703 {
2704 rtx op0 = XEXP (addr, 0);
2705 rtx op1 = XEXP (addr, 1);
2706 enum rtx_code code0 = GET_CODE (op0);
2707 enum rtx_code code1 = GET_CODE (op1);
2708
2709 if (code0 == REG || code0 == UNSPEC)
2710 {
2711 if (code1 == REG || code1 == UNSPEC)
2712 {
2713 indx = op0; /* index + base */
2714 base = op1;
2715 }
2716
2717 else
2718 {
2719 base = op0; /* base + displacement */
2720 disp = op1;
2721 }
2722 }
2723
2724 else if (code0 == PLUS)
2725 {
2726 indx = XEXP (op0, 0); /* index + base + disp */
2727 base = XEXP (op0, 1);
2728 disp = op1;
2729 }
2730
2731 else
2732 {
2733 return false;
2734 }
2735 }
2736
2737 else
2738 disp = addr; /* displacement */
2739
2740 /* Extract integer part of displacement. */
2741 orig_disp = disp;
2742 if (disp)
2743 {
2744 if (GET_CODE (disp) == CONST_INT)
2745 {
2746 offset = INTVAL (disp);
2747 disp = NULL_RTX;
2748 }
2749 else if (GET_CODE (disp) == CONST
2750 && GET_CODE (XEXP (disp, 0)) == PLUS
2751 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
2752 {
2753 offset = INTVAL (XEXP (XEXP (disp, 0), 1));
2754 disp = XEXP (XEXP (disp, 0), 0);
2755 }
2756 }
2757
2758 /* Strip off CONST here to avoid special case tests later. */
2759 if (disp && GET_CODE (disp) == CONST)
2760 disp = XEXP (disp, 0);
2761
2762 /* We can convert literal pool addresses to
2763 displacements by basing them off the base register. */
2764 if (disp && GET_CODE (disp) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (disp))
2765 {
2766 /* Either base or index must be free to hold the base register. */
2767 if (!base)
2768 base = fake_pool_base, literal_pool = true;
2769 else if (!indx)
2770 indx = fake_pool_base, literal_pool = true;
2771 else
2772 return false;
2773
2774 /* Mark up the displacement. */
2775 disp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, disp),
2776 UNSPEC_LTREL_OFFSET);
2777 }
2778
2779 /* Validate base register. */
2780 if (base)
2781 {
2782 if (GET_CODE (base) == UNSPEC)
2783 switch (XINT (base, 1))
2784 {
2785 case UNSPEC_LTREF:
2786 if (!disp)
2787 disp = gen_rtx_UNSPEC (Pmode,
2788 gen_rtvec (1, XVECEXP (base, 0, 0)),
2789 UNSPEC_LTREL_OFFSET);
2790 else
2791 return false;
2792
2793 base = XVECEXP (base, 0, 1);
2794 break;
2795
2796 case UNSPEC_LTREL_BASE:
2797 if (XVECLEN (base, 0) == 1)
2798 base = fake_pool_base, literal_pool = true;
2799 else
2800 base = XVECEXP (base, 0, 1);
2801 break;
2802
2803 default:
2804 return false;
2805 }
2806
2807 if (!REG_P (base)
2808 || (GET_MODE (base) != SImode
2809 && GET_MODE (base) != Pmode))
2810 return false;
2811
2812 if (REGNO (base) == STACK_POINTER_REGNUM
2813 || REGNO (base) == FRAME_POINTER_REGNUM
2814 || ((reload_completed || reload_in_progress)
2815 && frame_pointer_needed
2816 && REGNO (base) == HARD_FRAME_POINTER_REGNUM)
2817 || REGNO (base) == ARG_POINTER_REGNUM
2818 || (flag_pic
2819 && REGNO (base) == PIC_OFFSET_TABLE_REGNUM))
2820 pointer = base_ptr = true;
2821
2822 if ((reload_completed || reload_in_progress)
2823 && base == cfun->machine->base_reg)
2824 pointer = base_ptr = literal_pool = true;
2825 }
2826
2827 /* Validate index register. */
2828 if (indx)
2829 {
2830 if (GET_CODE (indx) == UNSPEC)
2831 switch (XINT (indx, 1))
2832 {
2833 case UNSPEC_LTREF:
2834 if (!disp)
2835 disp = gen_rtx_UNSPEC (Pmode,
2836 gen_rtvec (1, XVECEXP (indx, 0, 0)),
2837 UNSPEC_LTREL_OFFSET);
2838 else
2839 return false;
2840
2841 indx = XVECEXP (indx, 0, 1);
2842 break;
2843
2844 case UNSPEC_LTREL_BASE:
2845 if (XVECLEN (indx, 0) == 1)
2846 indx = fake_pool_base, literal_pool = true;
2847 else
2848 indx = XVECEXP (indx, 0, 1);
2849 break;
2850
2851 default:
2852 return false;
2853 }
2854
2855 if (!REG_P (indx)
2856 || (GET_MODE (indx) != SImode
2857 && GET_MODE (indx) != Pmode))
2858 return false;
2859
2860 if (REGNO (indx) == STACK_POINTER_REGNUM
2861 || REGNO (indx) == FRAME_POINTER_REGNUM
2862 || ((reload_completed || reload_in_progress)
2863 && frame_pointer_needed
2864 && REGNO (indx) == HARD_FRAME_POINTER_REGNUM)
2865 || REGNO (indx) == ARG_POINTER_REGNUM
2866 || (flag_pic
2867 && REGNO (indx) == PIC_OFFSET_TABLE_REGNUM))
2868 pointer = indx_ptr = true;
2869
2870 if ((reload_completed || reload_in_progress)
2871 && indx == cfun->machine->base_reg)
2872 pointer = indx_ptr = literal_pool = true;
2873 }
2874
2875 /* Prefer to use pointer as base, not index. */
2876 if (base && indx && !base_ptr
2877 && (indx_ptr || (!REG_POINTER (base) && REG_POINTER (indx))))
2878 {
2879 rtx tmp = base;
2880 base = indx;
2881 indx = tmp;
2882 }
2883
2884 /* Validate displacement. */
2885 if (!disp)
2886 {
2887 /* If virtual registers are involved, the displacement will change later
2888 anyway as the virtual registers get eliminated. This could make a
2889 valid displacement invalid, but it is more likely to make an invalid
2890 displacement valid, because we sometimes access the register save area
2891 via negative offsets to one of those registers.
2892 Thus we don't check the displacement for validity here. If after
2893 elimination the displacement turns out to be invalid after all,
2894 this is fixed up by reload in any case. */
2895 /* LRA maintains always displacements up to date and we need to
2896 know the displacement is right during all LRA not only at the
2897 final elimination. */
2898 if (lra_in_progress
2899 || (base != arg_pointer_rtx
2900 && indx != arg_pointer_rtx
2901 && base != return_address_pointer_rtx
2902 && indx != return_address_pointer_rtx
2903 && base != frame_pointer_rtx
2904 && indx != frame_pointer_rtx
2905 && base != virtual_stack_vars_rtx
2906 && indx != virtual_stack_vars_rtx))
2907 if (!DISP_IN_RANGE (offset))
2908 return false;
2909 }
2910 else
2911 {
2912 /* All the special cases are pointers. */
2913 pointer = true;
2914
2915 /* In the small-PIC case, the linker converts @GOT
2916 and @GOTNTPOFF offsets to possible displacements. */
2917 if (GET_CODE (disp) == UNSPEC
2918 && (XINT (disp, 1) == UNSPEC_GOT
2919 || XINT (disp, 1) == UNSPEC_GOTNTPOFF)
2920 && flag_pic == 1)
2921 {
2922 ;
2923 }
2924
2925 /* Accept pool label offsets. */
2926 else if (GET_CODE (disp) == UNSPEC
2927 && XINT (disp, 1) == UNSPEC_POOL_OFFSET)
2928 ;
2929
2930 /* Accept literal pool references. */
2931 else if (GET_CODE (disp) == UNSPEC
2932 && XINT (disp, 1) == UNSPEC_LTREL_OFFSET)
2933 {
2934 /* In case CSE pulled a non literal pool reference out of
2935 the pool we have to reject the address. This is
2936 especially important when loading the GOT pointer on non
2937 zarch CPUs. In this case the literal pool contains an lt
2938 relative offset to the _GLOBAL_OFFSET_TABLE_ label which
2939 will most likely exceed the displacement. */
2940 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
2941 || !CONSTANT_POOL_ADDRESS_P (XVECEXP (disp, 0, 0)))
2942 return false;
2943
2944 orig_disp = gen_rtx_CONST (Pmode, disp);
2945 if (offset)
2946 {
2947 /* If we have an offset, make sure it does not
2948 exceed the size of the constant pool entry. */
2949 rtx sym = XVECEXP (disp, 0, 0);
2950 if (offset >= GET_MODE_SIZE (get_pool_mode (sym)))
2951 return false;
2952
2953 orig_disp = plus_constant (Pmode, orig_disp, offset);
2954 }
2955 }
2956
2957 else
2958 return false;
2959 }
2960
2961 if (!base && !indx)
2962 pointer = true;
2963
2964 if (out)
2965 {
2966 out->base = base;
2967 out->indx = indx;
2968 out->disp = orig_disp;
2969 out->pointer = pointer;
2970 out->literal_pool = literal_pool;
2971 }
2972
2973 return true;
2974 }
2975
2976 /* Decompose a RTL expression OP for a shift count into its components,
2977 and return the base register in BASE and the offset in OFFSET.
2978
2979 Return true if OP is a valid shift count, false if not. */
2980
2981 bool
2982 s390_decompose_shift_count (rtx op, rtx *base, HOST_WIDE_INT *offset)
2983 {
2984 HOST_WIDE_INT off = 0;
2985
2986 /* We can have an integer constant, an address register,
2987 or a sum of the two. */
2988 if (GET_CODE (op) == CONST_INT)
2989 {
2990 off = INTVAL (op);
2991 op = NULL_RTX;
2992 }
2993 if (op && GET_CODE (op) == PLUS && GET_CODE (XEXP (op, 1)) == CONST_INT)
2994 {
2995 off = INTVAL (XEXP (op, 1));
2996 op = XEXP (op, 0);
2997 }
2998 while (op && GET_CODE (op) == SUBREG)
2999 op = SUBREG_REG (op);
3000
3001 if (op && GET_CODE (op) != REG)
3002 return false;
3003
3004 if (offset)
3005 *offset = off;
3006 if (base)
3007 *base = op;
3008
3009 return true;
3010 }
3011
3012
3013 /* Return true if CODE is a valid address without index. */
3014
3015 bool
3016 s390_legitimate_address_without_index_p (rtx op)
3017 {
3018 struct s390_address addr;
3019
3020 if (!s390_decompose_address (XEXP (op, 0), &addr))
3021 return false;
3022 if (addr.indx)
3023 return false;
3024
3025 return true;
3026 }
3027
3028
3029 /* Return TRUE if ADDR is an operand valid for a load/store relative
3030 instruction. Be aware that the alignment of the operand needs to
3031 be checked separately.
3032 Valid addresses are single references or a sum of a reference and a
3033 constant integer. Return these parts in SYMREF and ADDEND. You can
3034 pass NULL in REF and/or ADDEND if you are not interested in these
3035 values. Literal pool references are *not* considered symbol
3036 references. */
3037
3038 static bool
3039 s390_loadrelative_operand_p (rtx addr, rtx *symref, HOST_WIDE_INT *addend)
3040 {
3041 HOST_WIDE_INT tmpaddend = 0;
3042
3043 if (GET_CODE (addr) == CONST)
3044 addr = XEXP (addr, 0);
3045
3046 if (GET_CODE (addr) == PLUS)
3047 {
3048 if (!CONST_INT_P (XEXP (addr, 1)))
3049 return false;
3050
3051 tmpaddend = INTVAL (XEXP (addr, 1));
3052 addr = XEXP (addr, 0);
3053 }
3054
3055 if ((GET_CODE (addr) == SYMBOL_REF && !CONSTANT_POOL_ADDRESS_P (addr))
3056 || (GET_CODE (addr) == UNSPEC
3057 && (XINT (addr, 1) == UNSPEC_GOTENT
3058 || (TARGET_CPU_ZARCH && XINT (addr, 1) == UNSPEC_PLT))))
3059 {
3060 if (symref)
3061 *symref = addr;
3062 if (addend)
3063 *addend = tmpaddend;
3064
3065 return true;
3066 }
3067 return false;
3068 }
3069
3070 /* Return true if the address in OP is valid for constraint letter C
3071 if wrapped in a MEM rtx. Set LIT_POOL_OK to true if it literal
3072 pool MEMs should be accepted. Only the Q, R, S, T constraint
3073 letters are allowed for C. */
3074
3075 static int
3076 s390_check_qrst_address (char c, rtx op, bool lit_pool_ok)
3077 {
3078 struct s390_address addr;
3079 bool decomposed = false;
3080
3081 /* This check makes sure that no symbolic address (except literal
3082 pool references) are accepted by the R or T constraints. */
3083 if (s390_loadrelative_operand_p (op, NULL, NULL))
3084 return 0;
3085
3086 /* Ensure literal pool references are only accepted if LIT_POOL_OK. */
3087 if (!lit_pool_ok)
3088 {
3089 if (!s390_decompose_address (op, &addr))
3090 return 0;
3091 if (addr.literal_pool)
3092 return 0;
3093 decomposed = true;
3094 }
3095
3096 switch (c)
3097 {
3098 case 'Q': /* no index short displacement */
3099 if (!decomposed && !s390_decompose_address (op, &addr))
3100 return 0;
3101 if (addr.indx)
3102 return 0;
3103 if (!s390_short_displacement (addr.disp))
3104 return 0;
3105 break;
3106
3107 case 'R': /* with index short displacement */
3108 if (TARGET_LONG_DISPLACEMENT)
3109 {
3110 if (!decomposed && !s390_decompose_address (op, &addr))
3111 return 0;
3112 if (!s390_short_displacement (addr.disp))
3113 return 0;
3114 }
3115 /* Any invalid address here will be fixed up by reload,
3116 so accept it for the most generic constraint. */
3117 break;
3118
3119 case 'S': /* no index long displacement */
3120 if (!TARGET_LONG_DISPLACEMENT)
3121 return 0;
3122 if (!decomposed && !s390_decompose_address (op, &addr))
3123 return 0;
3124 if (addr.indx)
3125 return 0;
3126 if (s390_short_displacement (addr.disp))
3127 return 0;
3128 break;
3129
3130 case 'T': /* with index long displacement */
3131 if (!TARGET_LONG_DISPLACEMENT)
3132 return 0;
3133 /* Any invalid address here will be fixed up by reload,
3134 so accept it for the most generic constraint. */
3135 if ((decomposed || s390_decompose_address (op, &addr))
3136 && s390_short_displacement (addr.disp))
3137 return 0;
3138 break;
3139 default:
3140 return 0;
3141 }
3142 return 1;
3143 }
3144
3145
3146 /* Evaluates constraint strings described by the regular expression
3147 ([A|B|Z](Q|R|S|T))|U|W|Y and returns 1 if OP is a valid operand for
3148 the constraint given in STR, or 0 else. */
3149
3150 int
3151 s390_mem_constraint (const char *str, rtx op)
3152 {
3153 char c = str[0];
3154
3155 switch (c)
3156 {
3157 case 'A':
3158 /* Check for offsettable variants of memory constraints. */
3159 if (!MEM_P (op) || MEM_VOLATILE_P (op))
3160 return 0;
3161 if ((reload_completed || reload_in_progress)
3162 ? !offsettable_memref_p (op) : !offsettable_nonstrict_memref_p (op))
3163 return 0;
3164 return s390_check_qrst_address (str[1], XEXP (op, 0), true);
3165 case 'B':
3166 /* Check for non-literal-pool variants of memory constraints. */
3167 if (!MEM_P (op))
3168 return 0;
3169 return s390_check_qrst_address (str[1], XEXP (op, 0), false);
3170 case 'Q':
3171 case 'R':
3172 case 'S':
3173 case 'T':
3174 if (GET_CODE (op) != MEM)
3175 return 0;
3176 return s390_check_qrst_address (c, XEXP (op, 0), true);
3177 case 'U':
3178 return (s390_check_qrst_address ('Q', op, true)
3179 || s390_check_qrst_address ('R', op, true));
3180 case 'W':
3181 return (s390_check_qrst_address ('S', op, true)
3182 || s390_check_qrst_address ('T', op, true));
3183 case 'Y':
3184 /* Simply check for the basic form of a shift count. Reload will
3185 take care of making sure we have a proper base register. */
3186 if (!s390_decompose_shift_count (op, NULL, NULL))
3187 return 0;
3188 break;
3189 case 'Z':
3190 return s390_check_qrst_address (str[1], op, true);
3191 default:
3192 return 0;
3193 }
3194 return 1;
3195 }
3196
3197
3198 /* Evaluates constraint strings starting with letter O. Input
3199 parameter C is the second letter following the "O" in the constraint
3200 string. Returns 1 if VALUE meets the respective constraint and 0
3201 otherwise. */
3202
3203 int
3204 s390_O_constraint_str (const char c, HOST_WIDE_INT value)
3205 {
3206 if (!TARGET_EXTIMM)
3207 return 0;
3208
3209 switch (c)
3210 {
3211 case 's':
3212 return trunc_int_for_mode (value, SImode) == value;
3213
3214 case 'p':
3215 return value == 0
3216 || s390_single_part (GEN_INT (value), DImode, SImode, 0) == 1;
3217
3218 case 'n':
3219 return s390_single_part (GEN_INT (value - 1), DImode, SImode, -1) == 1;
3220
3221 default:
3222 gcc_unreachable ();
3223 }
3224 }
3225
3226
3227 /* Evaluates constraint strings starting with letter N. Parameter STR
3228 contains the letters following letter "N" in the constraint string.
3229 Returns true if VALUE matches the constraint. */
3230
3231 int
3232 s390_N_constraint_str (const char *str, HOST_WIDE_INT value)
3233 {
3234 machine_mode mode, part_mode;
3235 int def;
3236 int part, part_goal;
3237
3238
3239 if (str[0] == 'x')
3240 part_goal = -1;
3241 else
3242 part_goal = str[0] - '0';
3243
3244 switch (str[1])
3245 {
3246 case 'Q':
3247 part_mode = QImode;
3248 break;
3249 case 'H':
3250 part_mode = HImode;
3251 break;
3252 case 'S':
3253 part_mode = SImode;
3254 break;
3255 default:
3256 return 0;
3257 }
3258
3259 switch (str[2])
3260 {
3261 case 'H':
3262 mode = HImode;
3263 break;
3264 case 'S':
3265 mode = SImode;
3266 break;
3267 case 'D':
3268 mode = DImode;
3269 break;
3270 default:
3271 return 0;
3272 }
3273
3274 switch (str[3])
3275 {
3276 case '0':
3277 def = 0;
3278 break;
3279 case 'F':
3280 def = -1;
3281 break;
3282 default:
3283 return 0;
3284 }
3285
3286 if (GET_MODE_SIZE (mode) <= GET_MODE_SIZE (part_mode))
3287 return 0;
3288
3289 part = s390_single_part (GEN_INT (value), mode, part_mode, def);
3290 if (part < 0)
3291 return 0;
3292 if (part_goal != -1 && part_goal != part)
3293 return 0;
3294
3295 return 1;
3296 }
3297
3298
3299 /* Returns true if the input parameter VALUE is a float zero. */
3300
3301 int
3302 s390_float_const_zero_p (rtx value)
3303 {
3304 return (GET_MODE_CLASS (GET_MODE (value)) == MODE_FLOAT
3305 && value == CONST0_RTX (GET_MODE (value)));
3306 }
3307
3308 /* Implement TARGET_REGISTER_MOVE_COST. */
3309
3310 static int
3311 s390_register_move_cost (machine_mode mode,
3312 reg_class_t from, reg_class_t to)
3313 {
3314 /* On s390, copy between fprs and gprs is expensive. */
3315
3316 /* It becomes somewhat faster having ldgr/lgdr. */
3317 if (TARGET_Z10 && GET_MODE_SIZE (mode) == 8)
3318 {
3319 /* ldgr is single cycle. */
3320 if (reg_classes_intersect_p (from, GENERAL_REGS)
3321 && reg_classes_intersect_p (to, FP_REGS))
3322 return 1;
3323 /* lgdr needs 3 cycles. */
3324 if (reg_classes_intersect_p (to, GENERAL_REGS)
3325 && reg_classes_intersect_p (from, FP_REGS))
3326 return 3;
3327 }
3328
3329 /* Otherwise copying is done via memory. */
3330 if ((reg_classes_intersect_p (from, GENERAL_REGS)
3331 && reg_classes_intersect_p (to, FP_REGS))
3332 || (reg_classes_intersect_p (from, FP_REGS)
3333 && reg_classes_intersect_p (to, GENERAL_REGS)))
3334 return 10;
3335
3336 return 1;
3337 }
3338
3339 /* Implement TARGET_MEMORY_MOVE_COST. */
3340
3341 static int
3342 s390_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
3343 reg_class_t rclass ATTRIBUTE_UNUSED,
3344 bool in ATTRIBUTE_UNUSED)
3345 {
3346 return 2;
3347 }
3348
3349 /* Compute a (partial) cost for rtx X. Return true if the complete
3350 cost has been computed, and false if subexpressions should be
3351 scanned. In either case, *TOTAL contains the cost result.
3352 OUTER_CODE contains the code of the superexpression of x. */
3353
3354 static bool
3355 s390_rtx_costs (rtx x, machine_mode mode, int outer_code,
3356 int opno ATTRIBUTE_UNUSED,
3357 int *total, bool speed ATTRIBUTE_UNUSED)
3358 {
3359 int code = GET_CODE (x);
3360 switch (code)
3361 {
3362 case CONST:
3363 case CONST_INT:
3364 case LABEL_REF:
3365 case SYMBOL_REF:
3366 case CONST_DOUBLE:
3367 case CONST_WIDE_INT:
3368 case MEM:
3369 *total = 0;
3370 return true;
3371
3372 case IOR:
3373 /* risbg */
3374 if (GET_CODE (XEXP (x, 0)) == AND
3375 && GET_CODE (XEXP (x, 1)) == ASHIFT
3376 && REG_P (XEXP (XEXP (x, 0), 0))
3377 && REG_P (XEXP (XEXP (x, 1), 0))
3378 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3379 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3380 && (UINTVAL (XEXP (XEXP (x, 0), 1)) ==
3381 (1UL << UINTVAL (XEXP (XEXP (x, 1), 1))) - 1))
3382 {
3383 *total = COSTS_N_INSNS (2);
3384 return true;
3385 }
3386 case ASHIFT:
3387 case ASHIFTRT:
3388 case LSHIFTRT:
3389 case ROTATE:
3390 case ROTATERT:
3391 case AND:
3392 case XOR:
3393 case NEG:
3394 case NOT:
3395 *total = COSTS_N_INSNS (1);
3396 return false;
3397
3398 case PLUS:
3399 case MINUS:
3400 *total = COSTS_N_INSNS (1);
3401 return false;
3402
3403 case MULT:
3404 switch (mode)
3405 {
3406 case SImode:
3407 {
3408 rtx left = XEXP (x, 0);
3409 rtx right = XEXP (x, 1);
3410 if (GET_CODE (right) == CONST_INT
3411 && CONST_OK_FOR_K (INTVAL (right)))
3412 *total = s390_cost->mhi;
3413 else if (GET_CODE (left) == SIGN_EXTEND)
3414 *total = s390_cost->mh;
3415 else
3416 *total = s390_cost->ms; /* msr, ms, msy */
3417 break;
3418 }
3419 case DImode:
3420 {
3421 rtx left = XEXP (x, 0);
3422 rtx right = XEXP (x, 1);
3423 if (TARGET_ZARCH)
3424 {
3425 if (GET_CODE (right) == CONST_INT
3426 && CONST_OK_FOR_K (INTVAL (right)))
3427 *total = s390_cost->mghi;
3428 else if (GET_CODE (left) == SIGN_EXTEND)
3429 *total = s390_cost->msgf;
3430 else
3431 *total = s390_cost->msg; /* msgr, msg */
3432 }
3433 else /* TARGET_31BIT */
3434 {
3435 if (GET_CODE (left) == SIGN_EXTEND
3436 && GET_CODE (right) == SIGN_EXTEND)
3437 /* mulsidi case: mr, m */
3438 *total = s390_cost->m;
3439 else if (GET_CODE (left) == ZERO_EXTEND
3440 && GET_CODE (right) == ZERO_EXTEND
3441 && TARGET_CPU_ZARCH)
3442 /* umulsidi case: ml, mlr */
3443 *total = s390_cost->ml;
3444 else
3445 /* Complex calculation is required. */
3446 *total = COSTS_N_INSNS (40);
3447 }
3448 break;
3449 }
3450 case SFmode:
3451 case DFmode:
3452 *total = s390_cost->mult_df;
3453 break;
3454 case TFmode:
3455 *total = s390_cost->mxbr;
3456 break;
3457 default:
3458 return false;
3459 }
3460 return false;
3461
3462 case FMA:
3463 switch (mode)
3464 {
3465 case DFmode:
3466 *total = s390_cost->madbr;
3467 break;
3468 case SFmode:
3469 *total = s390_cost->maebr;
3470 break;
3471 default:
3472 return false;
3473 }
3474 /* Negate in the third argument is free: FMSUB. */
3475 if (GET_CODE (XEXP (x, 2)) == NEG)
3476 {
3477 *total += (rtx_cost (XEXP (x, 0), mode, FMA, 0, speed)
3478 + rtx_cost (XEXP (x, 1), mode, FMA, 1, speed)
3479 + rtx_cost (XEXP (XEXP (x, 2), 0), mode, FMA, 2, speed));
3480 return true;
3481 }
3482 return false;
3483
3484 case UDIV:
3485 case UMOD:
3486 if (mode == TImode) /* 128 bit division */
3487 *total = s390_cost->dlgr;
3488 else if (mode == DImode)
3489 {
3490 rtx right = XEXP (x, 1);
3491 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
3492 *total = s390_cost->dlr;
3493 else /* 64 by 64 bit division */
3494 *total = s390_cost->dlgr;
3495 }
3496 else if (mode == SImode) /* 32 bit division */
3497 *total = s390_cost->dlr;
3498 return false;
3499
3500 case DIV:
3501 case MOD:
3502 if (mode == DImode)
3503 {
3504 rtx right = XEXP (x, 1);
3505 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
3506 if (TARGET_ZARCH)
3507 *total = s390_cost->dsgfr;
3508 else
3509 *total = s390_cost->dr;
3510 else /* 64 by 64 bit division */
3511 *total = s390_cost->dsgr;
3512 }
3513 else if (mode == SImode) /* 32 bit division */
3514 *total = s390_cost->dlr;
3515 else if (mode == SFmode)
3516 {
3517 *total = s390_cost->debr;
3518 }
3519 else if (mode == DFmode)
3520 {
3521 *total = s390_cost->ddbr;
3522 }
3523 else if (mode == TFmode)
3524 {
3525 *total = s390_cost->dxbr;
3526 }
3527 return false;
3528
3529 case SQRT:
3530 if (mode == SFmode)
3531 *total = s390_cost->sqebr;
3532 else if (mode == DFmode)
3533 *total = s390_cost->sqdbr;
3534 else /* TFmode */
3535 *total = s390_cost->sqxbr;
3536 return false;
3537
3538 case SIGN_EXTEND:
3539 case ZERO_EXTEND:
3540 if (outer_code == MULT || outer_code == DIV || outer_code == MOD
3541 || outer_code == PLUS || outer_code == MINUS
3542 || outer_code == COMPARE)
3543 *total = 0;
3544 return false;
3545
3546 case COMPARE:
3547 *total = COSTS_N_INSNS (1);
3548 if (GET_CODE (XEXP (x, 0)) == AND
3549 && GET_CODE (XEXP (x, 1)) == CONST_INT
3550 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
3551 {
3552 rtx op0 = XEXP (XEXP (x, 0), 0);
3553 rtx op1 = XEXP (XEXP (x, 0), 1);
3554 rtx op2 = XEXP (x, 1);
3555
3556 if (memory_operand (op0, GET_MODE (op0))
3557 && s390_tm_ccmode (op1, op2, 0) != VOIDmode)
3558 return true;
3559 if (register_operand (op0, GET_MODE (op0))
3560 && s390_tm_ccmode (op1, op2, 1) != VOIDmode)
3561 return true;
3562 }
3563 return false;
3564
3565 default:
3566 return false;
3567 }
3568 }
3569
3570 /* Return the cost of an address rtx ADDR. */
3571
3572 static int
3573 s390_address_cost (rtx addr, machine_mode mode ATTRIBUTE_UNUSED,
3574 addr_space_t as ATTRIBUTE_UNUSED,
3575 bool speed ATTRIBUTE_UNUSED)
3576 {
3577 struct s390_address ad;
3578 if (!s390_decompose_address (addr, &ad))
3579 return 1000;
3580
3581 return ad.indx? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (1);
3582 }
3583
3584 /* If OP is a SYMBOL_REF of a thread-local symbol, return its TLS mode,
3585 otherwise return 0. */
3586
3587 int
3588 tls_symbolic_operand (rtx op)
3589 {
3590 if (GET_CODE (op) != SYMBOL_REF)
3591 return 0;
3592 return SYMBOL_REF_TLS_MODEL (op);
3593 }
3594 \f
3595 /* Split DImode access register reference REG (on 64-bit) into its constituent
3596 low and high parts, and store them into LO and HI. Note that gen_lowpart/
3597 gen_highpart cannot be used as they assume all registers are word-sized,
3598 while our access registers have only half that size. */
3599
3600 void
3601 s390_split_access_reg (rtx reg, rtx *lo, rtx *hi)
3602 {
3603 gcc_assert (TARGET_64BIT);
3604 gcc_assert (ACCESS_REG_P (reg));
3605 gcc_assert (GET_MODE (reg) == DImode);
3606 gcc_assert (!(REGNO (reg) & 1));
3607
3608 *lo = gen_rtx_REG (SImode, REGNO (reg) + 1);
3609 *hi = gen_rtx_REG (SImode, REGNO (reg));
3610 }
3611
3612 /* Return true if OP contains a symbol reference */
3613
3614 bool
3615 symbolic_reference_mentioned_p (rtx op)
3616 {
3617 const char *fmt;
3618 int i;
3619
3620 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3621 return 1;
3622
3623 fmt = GET_RTX_FORMAT (GET_CODE (op));
3624 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3625 {
3626 if (fmt[i] == 'E')
3627 {
3628 int j;
3629
3630 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3631 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3632 return 1;
3633 }
3634
3635 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3636 return 1;
3637 }
3638
3639 return 0;
3640 }
3641
3642 /* Return true if OP contains a reference to a thread-local symbol. */
3643
3644 bool
3645 tls_symbolic_reference_mentioned_p (rtx op)
3646 {
3647 const char *fmt;
3648 int i;
3649
3650 if (GET_CODE (op) == SYMBOL_REF)
3651 return tls_symbolic_operand (op);
3652
3653 fmt = GET_RTX_FORMAT (GET_CODE (op));
3654 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3655 {
3656 if (fmt[i] == 'E')
3657 {
3658 int j;
3659
3660 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3661 if (tls_symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3662 return true;
3663 }
3664
3665 else if (fmt[i] == 'e' && tls_symbolic_reference_mentioned_p (XEXP (op, i)))
3666 return true;
3667 }
3668
3669 return false;
3670 }
3671
3672
3673 /* Return true if OP is a legitimate general operand when
3674 generating PIC code. It is given that flag_pic is on
3675 and that OP satisfies CONSTANT_P. */
3676
3677 int
3678 legitimate_pic_operand_p (rtx op)
3679 {
3680 /* Accept all non-symbolic constants. */
3681 if (!SYMBOLIC_CONST (op))
3682 return 1;
3683
3684 /* Reject everything else; must be handled
3685 via emit_symbolic_move. */
3686 return 0;
3687 }
3688
3689 /* Returns true if the constant value OP is a legitimate general operand.
3690 It is given that OP satisfies CONSTANT_P. */
3691
3692 static bool
3693 s390_legitimate_constant_p (machine_mode mode, rtx op)
3694 {
3695 if (TARGET_VX && VECTOR_MODE_P (mode) && GET_CODE (op) == CONST_VECTOR)
3696 {
3697 if (GET_MODE_SIZE (mode) != 16)
3698 return 0;
3699
3700 if (!satisfies_constraint_j00 (op)
3701 && !satisfies_constraint_jm1 (op)
3702 && !satisfies_constraint_jKK (op)
3703 && !satisfies_constraint_jxx (op)
3704 && !satisfies_constraint_jyy (op))
3705 return 0;
3706 }
3707
3708 /* Accept all non-symbolic constants. */
3709 if (!SYMBOLIC_CONST (op))
3710 return 1;
3711
3712 /* Accept immediate LARL operands. */
3713 if (TARGET_CPU_ZARCH && larl_operand (op, mode))
3714 return 1;
3715
3716 /* Thread-local symbols are never legal constants. This is
3717 so that emit_call knows that computing such addresses
3718 might require a function call. */
3719 if (TLS_SYMBOLIC_CONST (op))
3720 return 0;
3721
3722 /* In the PIC case, symbolic constants must *not* be
3723 forced into the literal pool. We accept them here,
3724 so that they will be handled by emit_symbolic_move. */
3725 if (flag_pic)
3726 return 1;
3727
3728 /* All remaining non-PIC symbolic constants are
3729 forced into the literal pool. */
3730 return 0;
3731 }
3732
3733 /* Determine if it's legal to put X into the constant pool. This
3734 is not possible if X contains the address of a symbol that is
3735 not constant (TLS) or not known at final link time (PIC). */
3736
3737 static bool
3738 s390_cannot_force_const_mem (machine_mode mode, rtx x)
3739 {
3740 switch (GET_CODE (x))
3741 {
3742 case CONST_INT:
3743 case CONST_DOUBLE:
3744 case CONST_WIDE_INT:
3745 case CONST_VECTOR:
3746 /* Accept all non-symbolic constants. */
3747 return false;
3748
3749 case LABEL_REF:
3750 /* Labels are OK iff we are non-PIC. */
3751 return flag_pic != 0;
3752
3753 case SYMBOL_REF:
3754 /* 'Naked' TLS symbol references are never OK,
3755 non-TLS symbols are OK iff we are non-PIC. */
3756 if (tls_symbolic_operand (x))
3757 return true;
3758 else
3759 return flag_pic != 0;
3760
3761 case CONST:
3762 return s390_cannot_force_const_mem (mode, XEXP (x, 0));
3763 case PLUS:
3764 case MINUS:
3765 return s390_cannot_force_const_mem (mode, XEXP (x, 0))
3766 || s390_cannot_force_const_mem (mode, XEXP (x, 1));
3767
3768 case UNSPEC:
3769 switch (XINT (x, 1))
3770 {
3771 /* Only lt-relative or GOT-relative UNSPECs are OK. */
3772 case UNSPEC_LTREL_OFFSET:
3773 case UNSPEC_GOT:
3774 case UNSPEC_GOTOFF:
3775 case UNSPEC_PLTOFF:
3776 case UNSPEC_TLSGD:
3777 case UNSPEC_TLSLDM:
3778 case UNSPEC_NTPOFF:
3779 case UNSPEC_DTPOFF:
3780 case UNSPEC_GOTNTPOFF:
3781 case UNSPEC_INDNTPOFF:
3782 return false;
3783
3784 /* If the literal pool shares the code section, be put
3785 execute template placeholders into the pool as well. */
3786 case UNSPEC_INSN:
3787 return TARGET_CPU_ZARCH;
3788
3789 default:
3790 return true;
3791 }
3792 break;
3793
3794 default:
3795 gcc_unreachable ();
3796 }
3797 }
3798
3799 /* Returns true if the constant value OP is a legitimate general
3800 operand during and after reload. The difference to
3801 legitimate_constant_p is that this function will not accept
3802 a constant that would need to be forced to the literal pool
3803 before it can be used as operand.
3804 This function accepts all constants which can be loaded directly
3805 into a GPR. */
3806
3807 bool
3808 legitimate_reload_constant_p (rtx op)
3809 {
3810 /* Accept la(y) operands. */
3811 if (GET_CODE (op) == CONST_INT
3812 && DISP_IN_RANGE (INTVAL (op)))
3813 return true;
3814
3815 /* Accept l(g)hi/l(g)fi operands. */
3816 if (GET_CODE (op) == CONST_INT
3817 && (CONST_OK_FOR_K (INTVAL (op)) || CONST_OK_FOR_Os (INTVAL (op))))
3818 return true;
3819
3820 /* Accept lliXX operands. */
3821 if (TARGET_ZARCH
3822 && GET_CODE (op) == CONST_INT
3823 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
3824 && s390_single_part (op, word_mode, HImode, 0) >= 0)
3825 return true;
3826
3827 if (TARGET_EXTIMM
3828 && GET_CODE (op) == CONST_INT
3829 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
3830 && s390_single_part (op, word_mode, SImode, 0) >= 0)
3831 return true;
3832
3833 /* Accept larl operands. */
3834 if (TARGET_CPU_ZARCH
3835 && larl_operand (op, VOIDmode))
3836 return true;
3837
3838 /* Accept floating-point zero operands that fit into a single GPR. */
3839 if (GET_CODE (op) == CONST_DOUBLE
3840 && s390_float_const_zero_p (op)
3841 && GET_MODE_SIZE (GET_MODE (op)) <= UNITS_PER_WORD)
3842 return true;
3843
3844 /* Accept double-word operands that can be split. */
3845 if (GET_CODE (op) == CONST_WIDE_INT
3846 || (GET_CODE (op) == CONST_INT
3847 && trunc_int_for_mode (INTVAL (op), word_mode) != INTVAL (op)))
3848 {
3849 machine_mode dword_mode = word_mode == SImode ? DImode : TImode;
3850 rtx hi = operand_subword (op, 0, 0, dword_mode);
3851 rtx lo = operand_subword (op, 1, 0, dword_mode);
3852 return legitimate_reload_constant_p (hi)
3853 && legitimate_reload_constant_p (lo);
3854 }
3855
3856 /* Everything else cannot be handled without reload. */
3857 return false;
3858 }
3859
3860 /* Returns true if the constant value OP is a legitimate fp operand
3861 during and after reload.
3862 This function accepts all constants which can be loaded directly
3863 into an FPR. */
3864
3865 static bool
3866 legitimate_reload_fp_constant_p (rtx op)
3867 {
3868 /* Accept floating-point zero operands if the load zero instruction
3869 can be used. Prior to z196 the load fp zero instruction caused a
3870 performance penalty if the result is used as BFP number. */
3871 if (TARGET_Z196
3872 && GET_CODE (op) == CONST_DOUBLE
3873 && s390_float_const_zero_p (op))
3874 return true;
3875
3876 return false;
3877 }
3878
3879 /* Returns true if the constant value OP is a legitimate vector operand
3880 during and after reload.
3881 This function accepts all constants which can be loaded directly
3882 into an VR. */
3883
3884 static bool
3885 legitimate_reload_vector_constant_p (rtx op)
3886 {
3887 if (TARGET_VX && GET_MODE_SIZE (GET_MODE (op)) == 16
3888 && (satisfies_constraint_j00 (op)
3889 || satisfies_constraint_jm1 (op)
3890 || satisfies_constraint_jKK (op)
3891 || satisfies_constraint_jxx (op)
3892 || satisfies_constraint_jyy (op)))
3893 return true;
3894
3895 return false;
3896 }
3897
3898 /* Given an rtx OP being reloaded into a reg required to be in class RCLASS,
3899 return the class of reg to actually use. */
3900
3901 static reg_class_t
3902 s390_preferred_reload_class (rtx op, reg_class_t rclass)
3903 {
3904 switch (GET_CODE (op))
3905 {
3906 /* Constants we cannot reload into general registers
3907 must be forced into the literal pool. */
3908 case CONST_VECTOR:
3909 case CONST_DOUBLE:
3910 case CONST_INT:
3911 case CONST_WIDE_INT:
3912 if (reg_class_subset_p (GENERAL_REGS, rclass)
3913 && legitimate_reload_constant_p (op))
3914 return GENERAL_REGS;
3915 else if (reg_class_subset_p (ADDR_REGS, rclass)
3916 && legitimate_reload_constant_p (op))
3917 return ADDR_REGS;
3918 else if (reg_class_subset_p (FP_REGS, rclass)
3919 && legitimate_reload_fp_constant_p (op))
3920 return FP_REGS;
3921 else if (reg_class_subset_p (VEC_REGS, rclass)
3922 && legitimate_reload_vector_constant_p (op))
3923 return VEC_REGS;
3924
3925 return NO_REGS;
3926
3927 /* If a symbolic constant or a PLUS is reloaded,
3928 it is most likely being used as an address, so
3929 prefer ADDR_REGS. If 'class' is not a superset
3930 of ADDR_REGS, e.g. FP_REGS, reject this reload. */
3931 case CONST:
3932 /* Symrefs cannot be pushed into the literal pool with -fPIC
3933 so we *MUST NOT* return NO_REGS for these cases
3934 (s390_cannot_force_const_mem will return true).
3935
3936 On the other hand we MUST return NO_REGS for symrefs with
3937 invalid addend which might have been pushed to the literal
3938 pool (no -fPIC). Usually we would expect them to be
3939 handled via secondary reload but this does not happen if
3940 they are used as literal pool slot replacement in reload
3941 inheritance (see emit_input_reload_insns). */
3942 if (TARGET_CPU_ZARCH
3943 && GET_CODE (XEXP (op, 0)) == PLUS
3944 && GET_CODE (XEXP (XEXP(op, 0), 0)) == SYMBOL_REF
3945 && GET_CODE (XEXP (XEXP(op, 0), 1)) == CONST_INT)
3946 {
3947 if (flag_pic && reg_class_subset_p (ADDR_REGS, rclass))
3948 return ADDR_REGS;
3949 else
3950 return NO_REGS;
3951 }
3952 /* fallthrough */
3953 case LABEL_REF:
3954 case SYMBOL_REF:
3955 if (!legitimate_reload_constant_p (op))
3956 return NO_REGS;
3957 /* fallthrough */
3958 case PLUS:
3959 /* load address will be used. */
3960 if (reg_class_subset_p (ADDR_REGS, rclass))
3961 return ADDR_REGS;
3962 else
3963 return NO_REGS;
3964
3965 default:
3966 break;
3967 }
3968
3969 return rclass;
3970 }
3971
3972 /* Return true if ADDR is SYMBOL_REF + addend with addend being a
3973 multiple of ALIGNMENT and the SYMBOL_REF being naturally
3974 aligned. */
3975
3976 bool
3977 s390_check_symref_alignment (rtx addr, HOST_WIDE_INT alignment)
3978 {
3979 HOST_WIDE_INT addend;
3980 rtx symref;
3981
3982 /* The "required alignment" might be 0 (e.g. for certain structs
3983 accessed via BLKmode). Early abort in this case, as well as when
3984 an alignment > 8 is required. */
3985 if (alignment < 2 || alignment > 8)
3986 return false;
3987
3988 if (!s390_loadrelative_operand_p (addr, &symref, &addend))
3989 return false;
3990
3991 if (addend & (alignment - 1))
3992 return false;
3993
3994 if (GET_CODE (symref) == SYMBOL_REF)
3995 {
3996 /* We have load-relative instructions for 2-byte, 4-byte, and
3997 8-byte alignment so allow only these. */
3998 switch (alignment)
3999 {
4000 case 8: return !SYMBOL_FLAG_NOTALIGN8_P (symref);
4001 case 4: return !SYMBOL_FLAG_NOTALIGN4_P (symref);
4002 case 2: return !SYMBOL_FLAG_NOTALIGN2_P (symref);
4003 default: return false;
4004 }
4005 }
4006
4007 if (GET_CODE (symref) == UNSPEC
4008 && alignment <= UNITS_PER_LONG)
4009 return true;
4010
4011 return false;
4012 }
4013
4014 /* ADDR is moved into REG using larl. If ADDR isn't a valid larl
4015 operand SCRATCH is used to reload the even part of the address and
4016 adding one. */
4017
4018 void
4019 s390_reload_larl_operand (rtx reg, rtx addr, rtx scratch)
4020 {
4021 HOST_WIDE_INT addend;
4022 rtx symref;
4023
4024 if (!s390_loadrelative_operand_p (addr, &symref, &addend))
4025 gcc_unreachable ();
4026
4027 if (!(addend & 1))
4028 /* Easy case. The addend is even so larl will do fine. */
4029 emit_move_insn (reg, addr);
4030 else
4031 {
4032 /* We can leave the scratch register untouched if the target
4033 register is a valid base register. */
4034 if (REGNO (reg) < FIRST_PSEUDO_REGISTER
4035 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS)
4036 scratch = reg;
4037
4038 gcc_assert (REGNO (scratch) < FIRST_PSEUDO_REGISTER);
4039 gcc_assert (REGNO_REG_CLASS (REGNO (scratch)) == ADDR_REGS);
4040
4041 if (addend != 1)
4042 emit_move_insn (scratch,
4043 gen_rtx_CONST (Pmode,
4044 gen_rtx_PLUS (Pmode, symref,
4045 GEN_INT (addend - 1))));
4046 else
4047 emit_move_insn (scratch, symref);
4048
4049 /* Increment the address using la in order to avoid clobbering cc. */
4050 s390_load_address (reg, gen_rtx_PLUS (Pmode, scratch, const1_rtx));
4051 }
4052 }
4053
4054 /* Generate what is necessary to move between REG and MEM using
4055 SCRATCH. The direction is given by TOMEM. */
4056
4057 void
4058 s390_reload_symref_address (rtx reg, rtx mem, rtx scratch, bool tomem)
4059 {
4060 /* Reload might have pulled a constant out of the literal pool.
4061 Force it back in. */
4062 if (CONST_INT_P (mem) || GET_CODE (mem) == CONST_DOUBLE
4063 || GET_CODE (mem) == CONST_WIDE_INT
4064 || GET_CODE (mem) == CONST_VECTOR
4065 || GET_CODE (mem) == CONST)
4066 mem = force_const_mem (GET_MODE (reg), mem);
4067
4068 gcc_assert (MEM_P (mem));
4069
4070 /* For a load from memory we can leave the scratch register
4071 untouched if the target register is a valid base register. */
4072 if (!tomem
4073 && REGNO (reg) < FIRST_PSEUDO_REGISTER
4074 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS
4075 && GET_MODE (reg) == GET_MODE (scratch))
4076 scratch = reg;
4077
4078 /* Load address into scratch register. Since we can't have a
4079 secondary reload for a secondary reload we have to cover the case
4080 where larl would need a secondary reload here as well. */
4081 s390_reload_larl_operand (scratch, XEXP (mem, 0), scratch);
4082
4083 /* Now we can use a standard load/store to do the move. */
4084 if (tomem)
4085 emit_move_insn (replace_equiv_address (mem, scratch), reg);
4086 else
4087 emit_move_insn (reg, replace_equiv_address (mem, scratch));
4088 }
4089
4090 /* Inform reload about cases where moving X with a mode MODE to a register in
4091 RCLASS requires an extra scratch or immediate register. Return the class
4092 needed for the immediate register. */
4093
4094 static reg_class_t
4095 s390_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
4096 machine_mode mode, secondary_reload_info *sri)
4097 {
4098 enum reg_class rclass = (enum reg_class) rclass_i;
4099
4100 /* Intermediate register needed. */
4101 if (reg_classes_intersect_p (CC_REGS, rclass))
4102 return GENERAL_REGS;
4103
4104 if (TARGET_VX)
4105 {
4106 /* The vst/vl vector move instructions allow only for short
4107 displacements. */
4108 if (MEM_P (x)
4109 && GET_CODE (XEXP (x, 0)) == PLUS
4110 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4111 && !SHORT_DISP_IN_RANGE(INTVAL (XEXP (XEXP (x, 0), 1)))
4112 && reg_class_subset_p (rclass, VEC_REGS)
4113 && (!reg_class_subset_p (rclass, FP_REGS)
4114 || (GET_MODE_SIZE (mode) > 8
4115 && s390_class_max_nregs (FP_REGS, mode) == 1)))
4116 {
4117 if (in_p)
4118 sri->icode = (TARGET_64BIT ?
4119 CODE_FOR_reloaddi_la_in :
4120 CODE_FOR_reloadsi_la_in);
4121 else
4122 sri->icode = (TARGET_64BIT ?
4123 CODE_FOR_reloaddi_la_out :
4124 CODE_FOR_reloadsi_la_out);
4125 }
4126 }
4127
4128 if (TARGET_Z10)
4129 {
4130 HOST_WIDE_INT offset;
4131 rtx symref;
4132
4133 /* On z10 several optimizer steps may generate larl operands with
4134 an odd addend. */
4135 if (in_p
4136 && s390_loadrelative_operand_p (x, &symref, &offset)
4137 && mode == Pmode
4138 && !SYMBOL_FLAG_NOTALIGN2_P (symref)
4139 && (offset & 1) == 1)
4140 sri->icode = ((mode == DImode) ? CODE_FOR_reloaddi_larl_odd_addend_z10
4141 : CODE_FOR_reloadsi_larl_odd_addend_z10);
4142
4143 /* Handle all the (mem (symref)) accesses we cannot use the z10
4144 instructions for. */
4145 if (MEM_P (x)
4146 && s390_loadrelative_operand_p (XEXP (x, 0), NULL, NULL)
4147 && (mode == QImode
4148 || !reg_class_subset_p (rclass, GENERAL_REGS)
4149 || GET_MODE_SIZE (mode) > UNITS_PER_WORD
4150 || !s390_check_symref_alignment (XEXP (x, 0),
4151 GET_MODE_SIZE (mode))))
4152 {
4153 #define __SECONDARY_RELOAD_CASE(M,m) \
4154 case M##mode: \
4155 if (TARGET_64BIT) \
4156 sri->icode = in_p ? CODE_FOR_reload##m##di_toreg_z10 : \
4157 CODE_FOR_reload##m##di_tomem_z10; \
4158 else \
4159 sri->icode = in_p ? CODE_FOR_reload##m##si_toreg_z10 : \
4160 CODE_FOR_reload##m##si_tomem_z10; \
4161 break;
4162
4163 switch (GET_MODE (x))
4164 {
4165 __SECONDARY_RELOAD_CASE (QI, qi);
4166 __SECONDARY_RELOAD_CASE (HI, hi);
4167 __SECONDARY_RELOAD_CASE (SI, si);
4168 __SECONDARY_RELOAD_CASE (DI, di);
4169 __SECONDARY_RELOAD_CASE (TI, ti);
4170 __SECONDARY_RELOAD_CASE (SF, sf);
4171 __SECONDARY_RELOAD_CASE (DF, df);
4172 __SECONDARY_RELOAD_CASE (TF, tf);
4173 __SECONDARY_RELOAD_CASE (SD, sd);
4174 __SECONDARY_RELOAD_CASE (DD, dd);
4175 __SECONDARY_RELOAD_CASE (TD, td);
4176 __SECONDARY_RELOAD_CASE (V1QI, v1qi);
4177 __SECONDARY_RELOAD_CASE (V2QI, v2qi);
4178 __SECONDARY_RELOAD_CASE (V4QI, v4qi);
4179 __SECONDARY_RELOAD_CASE (V8QI, v8qi);
4180 __SECONDARY_RELOAD_CASE (V16QI, v16qi);
4181 __SECONDARY_RELOAD_CASE (V1HI, v1hi);
4182 __SECONDARY_RELOAD_CASE (V2HI, v2hi);
4183 __SECONDARY_RELOAD_CASE (V4HI, v4hi);
4184 __SECONDARY_RELOAD_CASE (V8HI, v8hi);
4185 __SECONDARY_RELOAD_CASE (V1SI, v1si);
4186 __SECONDARY_RELOAD_CASE (V2SI, v2si);
4187 __SECONDARY_RELOAD_CASE (V4SI, v4si);
4188 __SECONDARY_RELOAD_CASE (V1DI, v1di);
4189 __SECONDARY_RELOAD_CASE (V2DI, v2di);
4190 __SECONDARY_RELOAD_CASE (V1TI, v1ti);
4191 __SECONDARY_RELOAD_CASE (V1SF, v1sf);
4192 __SECONDARY_RELOAD_CASE (V2SF, v2sf);
4193 __SECONDARY_RELOAD_CASE (V4SF, v4sf);
4194 __SECONDARY_RELOAD_CASE (V1DF, v1df);
4195 __SECONDARY_RELOAD_CASE (V2DF, v2df);
4196 __SECONDARY_RELOAD_CASE (V1TF, v1tf);
4197 default:
4198 gcc_unreachable ();
4199 }
4200 #undef __SECONDARY_RELOAD_CASE
4201 }
4202 }
4203
4204 /* We need a scratch register when loading a PLUS expression which
4205 is not a legitimate operand of the LOAD ADDRESS instruction. */
4206 /* LRA can deal with transformation of plus op very well -- so we
4207 don't need to prompt LRA in this case. */
4208 if (! lra_in_progress && in_p && s390_plus_operand (x, mode))
4209 sri->icode = (TARGET_64BIT ?
4210 CODE_FOR_reloaddi_plus : CODE_FOR_reloadsi_plus);
4211
4212 /* Performing a multiword move from or to memory we have to make sure the
4213 second chunk in memory is addressable without causing a displacement
4214 overflow. If that would be the case we calculate the address in
4215 a scratch register. */
4216 if (MEM_P (x)
4217 && GET_CODE (XEXP (x, 0)) == PLUS
4218 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4219 && !DISP_IN_RANGE (INTVAL (XEXP (XEXP (x, 0), 1))
4220 + GET_MODE_SIZE (mode) - 1))
4221 {
4222 /* For GENERAL_REGS a displacement overflow is no problem if occurring
4223 in a s_operand address since we may fallback to lm/stm. So we only
4224 have to care about overflows in the b+i+d case. */
4225 if ((reg_classes_intersect_p (GENERAL_REGS, rclass)
4226 && s390_class_max_nregs (GENERAL_REGS, mode) > 1
4227 && GET_CODE (XEXP (XEXP (x, 0), 0)) == PLUS)
4228 /* For FP_REGS no lm/stm is available so this check is triggered
4229 for displacement overflows in b+i+d and b+d like addresses. */
4230 || (reg_classes_intersect_p (FP_REGS, rclass)
4231 && s390_class_max_nregs (FP_REGS, mode) > 1))
4232 {
4233 if (in_p)
4234 sri->icode = (TARGET_64BIT ?
4235 CODE_FOR_reloaddi_la_in :
4236 CODE_FOR_reloadsi_la_in);
4237 else
4238 sri->icode = (TARGET_64BIT ?
4239 CODE_FOR_reloaddi_la_out :
4240 CODE_FOR_reloadsi_la_out);
4241 }
4242 }
4243
4244 /* A scratch address register is needed when a symbolic constant is
4245 copied to r0 compiling with -fPIC. In other cases the target
4246 register might be used as temporary (see legitimize_pic_address). */
4247 if (in_p && SYMBOLIC_CONST (x) && flag_pic == 2 && rclass != ADDR_REGS)
4248 sri->icode = (TARGET_64BIT ?
4249 CODE_FOR_reloaddi_PIC_addr :
4250 CODE_FOR_reloadsi_PIC_addr);
4251
4252 /* Either scratch or no register needed. */
4253 return NO_REGS;
4254 }
4255
4256 /* Generate code to load SRC, which is PLUS that is not a
4257 legitimate operand for the LA instruction, into TARGET.
4258 SCRATCH may be used as scratch register. */
4259
4260 void
4261 s390_expand_plus_operand (rtx target, rtx src,
4262 rtx scratch)
4263 {
4264 rtx sum1, sum2;
4265 struct s390_address ad;
4266
4267 /* src must be a PLUS; get its two operands. */
4268 gcc_assert (GET_CODE (src) == PLUS);
4269 gcc_assert (GET_MODE (src) == Pmode);
4270
4271 /* Check if any of the two operands is already scheduled
4272 for replacement by reload. This can happen e.g. when
4273 float registers occur in an address. */
4274 sum1 = find_replacement (&XEXP (src, 0));
4275 sum2 = find_replacement (&XEXP (src, 1));
4276 src = gen_rtx_PLUS (Pmode, sum1, sum2);
4277
4278 /* If the address is already strictly valid, there's nothing to do. */
4279 if (!s390_decompose_address (src, &ad)
4280 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
4281 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
4282 {
4283 /* Otherwise, one of the operands cannot be an address register;
4284 we reload its value into the scratch register. */
4285 if (true_regnum (sum1) < 1 || true_regnum (sum1) > 15)
4286 {
4287 emit_move_insn (scratch, sum1);
4288 sum1 = scratch;
4289 }
4290 if (true_regnum (sum2) < 1 || true_regnum (sum2) > 15)
4291 {
4292 emit_move_insn (scratch, sum2);
4293 sum2 = scratch;
4294 }
4295
4296 /* According to the way these invalid addresses are generated
4297 in reload.c, it should never happen (at least on s390) that
4298 *neither* of the PLUS components, after find_replacements
4299 was applied, is an address register. */
4300 if (sum1 == scratch && sum2 == scratch)
4301 {
4302 debug_rtx (src);
4303 gcc_unreachable ();
4304 }
4305
4306 src = gen_rtx_PLUS (Pmode, sum1, sum2);
4307 }
4308
4309 /* Emit the LOAD ADDRESS pattern. Note that reload of PLUS
4310 is only ever performed on addresses, so we can mark the
4311 sum as legitimate for LA in any case. */
4312 s390_load_address (target, src);
4313 }
4314
4315
4316 /* Return true if ADDR is a valid memory address.
4317 STRICT specifies whether strict register checking applies. */
4318
4319 static bool
4320 s390_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
4321 {
4322 struct s390_address ad;
4323
4324 if (TARGET_Z10
4325 && larl_operand (addr, VOIDmode)
4326 && (mode == VOIDmode
4327 || s390_check_symref_alignment (addr, GET_MODE_SIZE (mode))))
4328 return true;
4329
4330 if (!s390_decompose_address (addr, &ad))
4331 return false;
4332
4333 if (strict)
4334 {
4335 if (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
4336 return false;
4337
4338 if (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx)))
4339 return false;
4340 }
4341 else
4342 {
4343 if (ad.base
4344 && !(REGNO (ad.base) >= FIRST_PSEUDO_REGISTER
4345 || REGNO_REG_CLASS (REGNO (ad.base)) == ADDR_REGS))
4346 return false;
4347
4348 if (ad.indx
4349 && !(REGNO (ad.indx) >= FIRST_PSEUDO_REGISTER
4350 || REGNO_REG_CLASS (REGNO (ad.indx)) == ADDR_REGS))
4351 return false;
4352 }
4353 return true;
4354 }
4355
4356 /* Return true if OP is a valid operand for the LA instruction.
4357 In 31-bit, we need to prove that the result is used as an
4358 address, as LA performs only a 31-bit addition. */
4359
4360 bool
4361 legitimate_la_operand_p (rtx op)
4362 {
4363 struct s390_address addr;
4364 if (!s390_decompose_address (op, &addr))
4365 return false;
4366
4367 return (TARGET_64BIT || addr.pointer);
4368 }
4369
4370 /* Return true if it is valid *and* preferable to use LA to
4371 compute the sum of OP1 and OP2. */
4372
4373 bool
4374 preferred_la_operand_p (rtx op1, rtx op2)
4375 {
4376 struct s390_address addr;
4377
4378 if (op2 != const0_rtx)
4379 op1 = gen_rtx_PLUS (Pmode, op1, op2);
4380
4381 if (!s390_decompose_address (op1, &addr))
4382 return false;
4383 if (addr.base && !REGNO_OK_FOR_BASE_P (REGNO (addr.base)))
4384 return false;
4385 if (addr.indx && !REGNO_OK_FOR_INDEX_P (REGNO (addr.indx)))
4386 return false;
4387
4388 /* Avoid LA instructions with index register on z196; it is
4389 preferable to use regular add instructions when possible.
4390 Starting with zEC12 the la with index register is "uncracked"
4391 again. */
4392 if (addr.indx && s390_tune == PROCESSOR_2817_Z196)
4393 return false;
4394
4395 if (!TARGET_64BIT && !addr.pointer)
4396 return false;
4397
4398 if (addr.pointer)
4399 return true;
4400
4401 if ((addr.base && REG_P (addr.base) && REG_POINTER (addr.base))
4402 || (addr.indx && REG_P (addr.indx) && REG_POINTER (addr.indx)))
4403 return true;
4404
4405 return false;
4406 }
4407
4408 /* Emit a forced load-address operation to load SRC into DST.
4409 This will use the LOAD ADDRESS instruction even in situations
4410 where legitimate_la_operand_p (SRC) returns false. */
4411
4412 void
4413 s390_load_address (rtx dst, rtx src)
4414 {
4415 if (TARGET_64BIT)
4416 emit_move_insn (dst, src);
4417 else
4418 emit_insn (gen_force_la_31 (dst, src));
4419 }
4420
4421 /* Return a legitimate reference for ORIG (an address) using the
4422 register REG. If REG is 0, a new pseudo is generated.
4423
4424 There are two types of references that must be handled:
4425
4426 1. Global data references must load the address from the GOT, via
4427 the PIC reg. An insn is emitted to do this load, and the reg is
4428 returned.
4429
4430 2. Static data references, constant pool addresses, and code labels
4431 compute the address as an offset from the GOT, whose base is in
4432 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
4433 differentiate them from global data objects. The returned
4434 address is the PIC reg + an unspec constant.
4435
4436 TARGET_LEGITIMIZE_ADDRESS_P rejects symbolic references unless the PIC
4437 reg also appears in the address. */
4438
4439 rtx
4440 legitimize_pic_address (rtx orig, rtx reg)
4441 {
4442 rtx addr = orig;
4443 rtx addend = const0_rtx;
4444 rtx new_rtx = orig;
4445
4446 gcc_assert (!TLS_SYMBOLIC_CONST (addr));
4447
4448 if (GET_CODE (addr) == CONST)
4449 addr = XEXP (addr, 0);
4450
4451 if (GET_CODE (addr) == PLUS)
4452 {
4453 addend = XEXP (addr, 1);
4454 addr = XEXP (addr, 0);
4455 }
4456
4457 if ((GET_CODE (addr) == LABEL_REF
4458 || (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (addr))
4459 || (GET_CODE (addr) == UNSPEC &&
4460 (XINT (addr, 1) == UNSPEC_GOTENT
4461 || (TARGET_CPU_ZARCH && XINT (addr, 1) == UNSPEC_PLT))))
4462 && GET_CODE (addend) == CONST_INT)
4463 {
4464 /* This can be locally addressed. */
4465
4466 /* larl_operand requires UNSPECs to be wrapped in a const rtx. */
4467 rtx const_addr = (GET_CODE (addr) == UNSPEC ?
4468 gen_rtx_CONST (Pmode, addr) : addr);
4469
4470 if (TARGET_CPU_ZARCH
4471 && larl_operand (const_addr, VOIDmode)
4472 && INTVAL (addend) < (HOST_WIDE_INT)1 << 31
4473 && INTVAL (addend) >= -((HOST_WIDE_INT)1 << 31))
4474 {
4475 if (INTVAL (addend) & 1)
4476 {
4477 /* LARL can't handle odd offsets, so emit a pair of LARL
4478 and LA. */
4479 rtx temp = reg? reg : gen_reg_rtx (Pmode);
4480
4481 if (!DISP_IN_RANGE (INTVAL (addend)))
4482 {
4483 HOST_WIDE_INT even = INTVAL (addend) - 1;
4484 addr = gen_rtx_PLUS (Pmode, addr, GEN_INT (even));
4485 addr = gen_rtx_CONST (Pmode, addr);
4486 addend = const1_rtx;
4487 }
4488
4489 emit_move_insn (temp, addr);
4490 new_rtx = gen_rtx_PLUS (Pmode, temp, addend);
4491
4492 if (reg != 0)
4493 {
4494 s390_load_address (reg, new_rtx);
4495 new_rtx = reg;
4496 }
4497 }
4498 else
4499 {
4500 /* If the offset is even, we can just use LARL. This
4501 will happen automatically. */
4502 }
4503 }
4504 else
4505 {
4506 /* No larl - Access local symbols relative to the GOT. */
4507
4508 rtx temp = reg? reg : gen_reg_rtx (Pmode);
4509
4510 if (reload_in_progress || reload_completed)
4511 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4512
4513 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
4514 if (addend != const0_rtx)
4515 addr = gen_rtx_PLUS (Pmode, addr, addend);
4516 addr = gen_rtx_CONST (Pmode, addr);
4517 addr = force_const_mem (Pmode, addr);
4518 emit_move_insn (temp, addr);
4519
4520 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4521 if (reg != 0)
4522 {
4523 s390_load_address (reg, new_rtx);
4524 new_rtx = reg;
4525 }
4526 }
4527 }
4528 else if (GET_CODE (addr) == SYMBOL_REF && addend == const0_rtx)
4529 {
4530 /* A non-local symbol reference without addend.
4531
4532 The symbol ref is wrapped into an UNSPEC to make sure the
4533 proper operand modifier (@GOT or @GOTENT) will be emitted.
4534 This will tell the linker to put the symbol into the GOT.
4535
4536 Additionally the code dereferencing the GOT slot is emitted here.
4537
4538 An addend to the symref needs to be added afterwards.
4539 legitimize_pic_address calls itself recursively to handle
4540 that case. So no need to do it here. */
4541
4542 if (reg == 0)
4543 reg = gen_reg_rtx (Pmode);
4544
4545 if (TARGET_Z10)
4546 {
4547 /* Use load relative if possible.
4548 lgrl <target>, sym@GOTENT */
4549 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
4550 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4551 new_rtx = gen_const_mem (GET_MODE (reg), new_rtx);
4552
4553 emit_move_insn (reg, new_rtx);
4554 new_rtx = reg;
4555 }
4556 else if (flag_pic == 1)
4557 {
4558 /* Assume GOT offset is a valid displacement operand (< 4k
4559 or < 512k with z990). This is handled the same way in
4560 both 31- and 64-bit code (@GOT).
4561 lg <target>, sym@GOT(r12) */
4562
4563 if (reload_in_progress || reload_completed)
4564 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4565
4566 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
4567 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4568 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
4569 new_rtx = gen_const_mem (Pmode, new_rtx);
4570 emit_move_insn (reg, new_rtx);
4571 new_rtx = reg;
4572 }
4573 else if (TARGET_CPU_ZARCH)
4574 {
4575 /* If the GOT offset might be >= 4k, we determine the position
4576 of the GOT entry via a PC-relative LARL (@GOTENT).
4577 larl temp, sym@GOTENT
4578 lg <target>, 0(temp) */
4579
4580 rtx temp = reg ? reg : gen_reg_rtx (Pmode);
4581
4582 gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
4583 || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
4584
4585 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
4586 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4587 emit_move_insn (temp, new_rtx);
4588
4589 new_rtx = gen_const_mem (Pmode, temp);
4590 emit_move_insn (reg, new_rtx);
4591
4592 new_rtx = reg;
4593 }
4594 else
4595 {
4596 /* If the GOT offset might be >= 4k, we have to load it
4597 from the literal pool (@GOT).
4598
4599 lg temp, lit-litbase(r13)
4600 lg <target>, 0(temp)
4601 lit: .long sym@GOT */
4602
4603 rtx temp = reg ? reg : gen_reg_rtx (Pmode);
4604
4605 gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
4606 || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
4607
4608 if (reload_in_progress || reload_completed)
4609 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4610
4611 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
4612 addr = gen_rtx_CONST (Pmode, addr);
4613 addr = force_const_mem (Pmode, addr);
4614 emit_move_insn (temp, addr);
4615
4616 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4617 new_rtx = gen_const_mem (Pmode, new_rtx);
4618 emit_move_insn (reg, new_rtx);
4619 new_rtx = reg;
4620 }
4621 }
4622 else if (GET_CODE (addr) == UNSPEC && GET_CODE (addend) == CONST_INT)
4623 {
4624 gcc_assert (XVECLEN (addr, 0) == 1);
4625 switch (XINT (addr, 1))
4626 {
4627 /* These address symbols (or PLT slots) relative to the GOT
4628 (not GOT slots!). In general this will exceed the
4629 displacement range so these value belong into the literal
4630 pool. */
4631 case UNSPEC_GOTOFF:
4632 case UNSPEC_PLTOFF:
4633 new_rtx = force_const_mem (Pmode, orig);
4634 break;
4635
4636 /* For -fPIC the GOT size might exceed the displacement
4637 range so make sure the value is in the literal pool. */
4638 case UNSPEC_GOT:
4639 if (flag_pic == 2)
4640 new_rtx = force_const_mem (Pmode, orig);
4641 break;
4642
4643 /* For @GOTENT larl is used. This is handled like local
4644 symbol refs. */
4645 case UNSPEC_GOTENT:
4646 gcc_unreachable ();
4647 break;
4648
4649 /* @PLT is OK as is on 64-bit, must be converted to
4650 GOT-relative @PLTOFF on 31-bit. */
4651 case UNSPEC_PLT:
4652 if (!TARGET_CPU_ZARCH)
4653 {
4654 rtx temp = reg? reg : gen_reg_rtx (Pmode);
4655
4656 if (reload_in_progress || reload_completed)
4657 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4658
4659 addr = XVECEXP (addr, 0, 0);
4660 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
4661 UNSPEC_PLTOFF);
4662 if (addend != const0_rtx)
4663 addr = gen_rtx_PLUS (Pmode, addr, addend);
4664 addr = gen_rtx_CONST (Pmode, addr);
4665 addr = force_const_mem (Pmode, addr);
4666 emit_move_insn (temp, addr);
4667
4668 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4669 if (reg != 0)
4670 {
4671 s390_load_address (reg, new_rtx);
4672 new_rtx = reg;
4673 }
4674 }
4675 else
4676 /* On 64 bit larl can be used. This case is handled like
4677 local symbol refs. */
4678 gcc_unreachable ();
4679 break;
4680
4681 /* Everything else cannot happen. */
4682 default:
4683 gcc_unreachable ();
4684 }
4685 }
4686 else if (addend != const0_rtx)
4687 {
4688 /* Otherwise, compute the sum. */
4689
4690 rtx base = legitimize_pic_address (addr, reg);
4691 new_rtx = legitimize_pic_address (addend,
4692 base == reg ? NULL_RTX : reg);
4693 if (GET_CODE (new_rtx) == CONST_INT)
4694 new_rtx = plus_constant (Pmode, base, INTVAL (new_rtx));
4695 else
4696 {
4697 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
4698 {
4699 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
4700 new_rtx = XEXP (new_rtx, 1);
4701 }
4702 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
4703 }
4704
4705 if (GET_CODE (new_rtx) == CONST)
4706 new_rtx = XEXP (new_rtx, 0);
4707 new_rtx = force_operand (new_rtx, 0);
4708 }
4709
4710 return new_rtx;
4711 }
4712
4713 /* Load the thread pointer into a register. */
4714
4715 rtx
4716 s390_get_thread_pointer (void)
4717 {
4718 rtx tp = gen_reg_rtx (Pmode);
4719
4720 emit_move_insn (tp, gen_rtx_REG (Pmode, TP_REGNUM));
4721 mark_reg_pointer (tp, BITS_PER_WORD);
4722
4723 return tp;
4724 }
4725
4726 /* Emit a tls call insn. The call target is the SYMBOL_REF stored
4727 in s390_tls_symbol which always refers to __tls_get_offset.
4728 The returned offset is written to RESULT_REG and an USE rtx is
4729 generated for TLS_CALL. */
4730
4731 static GTY(()) rtx s390_tls_symbol;
4732
4733 static void
4734 s390_emit_tls_call_insn (rtx result_reg, rtx tls_call)
4735 {
4736 rtx insn;
4737
4738 if (!flag_pic)
4739 emit_insn (s390_load_got ());
4740
4741 if (!s390_tls_symbol)
4742 s390_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_offset");
4743
4744 insn = s390_emit_call (s390_tls_symbol, tls_call, result_reg,
4745 gen_rtx_REG (Pmode, RETURN_REGNUM));
4746
4747 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), result_reg);
4748 RTL_CONST_CALL_P (insn) = 1;
4749 }
4750
4751 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
4752 this (thread-local) address. REG may be used as temporary. */
4753
4754 static rtx
4755 legitimize_tls_address (rtx addr, rtx reg)
4756 {
4757 rtx new_rtx, tls_call, temp, base, r2, insn;
4758
4759 if (GET_CODE (addr) == SYMBOL_REF)
4760 switch (tls_symbolic_operand (addr))
4761 {
4762 case TLS_MODEL_GLOBAL_DYNAMIC:
4763 start_sequence ();
4764 r2 = gen_rtx_REG (Pmode, 2);
4765 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_TLSGD);
4766 new_rtx = gen_rtx_CONST (Pmode, tls_call);
4767 new_rtx = force_const_mem (Pmode, new_rtx);
4768 emit_move_insn (r2, new_rtx);
4769 s390_emit_tls_call_insn (r2, tls_call);
4770 insn = get_insns ();
4771 end_sequence ();
4772
4773 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
4774 temp = gen_reg_rtx (Pmode);
4775 emit_libcall_block (insn, temp, r2, new_rtx);
4776
4777 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
4778 if (reg != 0)
4779 {
4780 s390_load_address (reg, new_rtx);
4781 new_rtx = reg;
4782 }
4783 break;
4784
4785 case TLS_MODEL_LOCAL_DYNAMIC:
4786 start_sequence ();
4787 r2 = gen_rtx_REG (Pmode, 2);
4788 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM);
4789 new_rtx = gen_rtx_CONST (Pmode, tls_call);
4790 new_rtx = force_const_mem (Pmode, new_rtx);
4791 emit_move_insn (r2, new_rtx);
4792 s390_emit_tls_call_insn (r2, tls_call);
4793 insn = get_insns ();
4794 end_sequence ();
4795
4796 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM_NTPOFF);
4797 temp = gen_reg_rtx (Pmode);
4798 emit_libcall_block (insn, temp, r2, new_rtx);
4799
4800 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
4801 base = gen_reg_rtx (Pmode);
4802 s390_load_address (base, new_rtx);
4803
4804 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_DTPOFF);
4805 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4806 new_rtx = force_const_mem (Pmode, new_rtx);
4807 temp = gen_reg_rtx (Pmode);
4808 emit_move_insn (temp, new_rtx);
4809
4810 new_rtx = gen_rtx_PLUS (Pmode, base, temp);
4811 if (reg != 0)
4812 {
4813 s390_load_address (reg, new_rtx);
4814 new_rtx = reg;
4815 }
4816 break;
4817
4818 case TLS_MODEL_INITIAL_EXEC:
4819 if (flag_pic == 1)
4820 {
4821 /* Assume GOT offset < 4k. This is handled the same way
4822 in both 31- and 64-bit code. */
4823
4824 if (reload_in_progress || reload_completed)
4825 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4826
4827 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
4828 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4829 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
4830 new_rtx = gen_const_mem (Pmode, new_rtx);
4831 temp = gen_reg_rtx (Pmode);
4832 emit_move_insn (temp, new_rtx);
4833 }
4834 else if (TARGET_CPU_ZARCH)
4835 {
4836 /* If the GOT offset might be >= 4k, we determine the position
4837 of the GOT entry via a PC-relative LARL. */
4838
4839 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
4840 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4841 temp = gen_reg_rtx (Pmode);
4842 emit_move_insn (temp, new_rtx);
4843
4844 new_rtx = gen_const_mem (Pmode, temp);
4845 temp = gen_reg_rtx (Pmode);
4846 emit_move_insn (temp, new_rtx);
4847 }
4848 else if (flag_pic)
4849 {
4850 /* If the GOT offset might be >= 4k, we have to load it
4851 from the literal pool. */
4852
4853 if (reload_in_progress || reload_completed)
4854 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4855
4856 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
4857 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4858 new_rtx = force_const_mem (Pmode, new_rtx);
4859 temp = gen_reg_rtx (Pmode);
4860 emit_move_insn (temp, new_rtx);
4861
4862 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4863 new_rtx = gen_const_mem (Pmode, new_rtx);
4864
4865 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD);
4866 temp = gen_reg_rtx (Pmode);
4867 emit_insn (gen_rtx_SET (temp, new_rtx));
4868 }
4869 else
4870 {
4871 /* In position-dependent code, load the absolute address of
4872 the GOT entry from the literal pool. */
4873
4874 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
4875 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4876 new_rtx = force_const_mem (Pmode, new_rtx);
4877 temp = gen_reg_rtx (Pmode);
4878 emit_move_insn (temp, new_rtx);
4879
4880 new_rtx = temp;
4881 new_rtx = gen_const_mem (Pmode, new_rtx);
4882 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD);
4883 temp = gen_reg_rtx (Pmode);
4884 emit_insn (gen_rtx_SET (temp, new_rtx));
4885 }
4886
4887 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
4888 if (reg != 0)
4889 {
4890 s390_load_address (reg, new_rtx);
4891 new_rtx = reg;
4892 }
4893 break;
4894
4895 case TLS_MODEL_LOCAL_EXEC:
4896 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
4897 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4898 new_rtx = force_const_mem (Pmode, new_rtx);
4899 temp = gen_reg_rtx (Pmode);
4900 emit_move_insn (temp, new_rtx);
4901
4902 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
4903 if (reg != 0)
4904 {
4905 s390_load_address (reg, new_rtx);
4906 new_rtx = reg;
4907 }
4908 break;
4909
4910 default:
4911 gcc_unreachable ();
4912 }
4913
4914 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == UNSPEC)
4915 {
4916 switch (XINT (XEXP (addr, 0), 1))
4917 {
4918 case UNSPEC_INDNTPOFF:
4919 gcc_assert (TARGET_CPU_ZARCH);
4920 new_rtx = addr;
4921 break;
4922
4923 default:
4924 gcc_unreachable ();
4925 }
4926 }
4927
4928 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
4929 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4930 {
4931 new_rtx = XEXP (XEXP (addr, 0), 0);
4932 if (GET_CODE (new_rtx) != SYMBOL_REF)
4933 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4934
4935 new_rtx = legitimize_tls_address (new_rtx, reg);
4936 new_rtx = plus_constant (Pmode, new_rtx,
4937 INTVAL (XEXP (XEXP (addr, 0), 1)));
4938 new_rtx = force_operand (new_rtx, 0);
4939 }
4940
4941 else
4942 gcc_unreachable (); /* for now ... */
4943
4944 return new_rtx;
4945 }
4946
4947 /* Emit insns making the address in operands[1] valid for a standard
4948 move to operands[0]. operands[1] is replaced by an address which
4949 should be used instead of the former RTX to emit the move
4950 pattern. */
4951
4952 void
4953 emit_symbolic_move (rtx *operands)
4954 {
4955 rtx temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
4956
4957 if (GET_CODE (operands[0]) == MEM)
4958 operands[1] = force_reg (Pmode, operands[1]);
4959 else if (TLS_SYMBOLIC_CONST (operands[1]))
4960 operands[1] = legitimize_tls_address (operands[1], temp);
4961 else if (flag_pic)
4962 operands[1] = legitimize_pic_address (operands[1], temp);
4963 }
4964
4965 /* Try machine-dependent ways of modifying an illegitimate address X
4966 to be legitimate. If we find one, return the new, valid address.
4967
4968 OLDX is the address as it was before break_out_memory_refs was called.
4969 In some cases it is useful to look at this to decide what needs to be done.
4970
4971 MODE is the mode of the operand pointed to by X.
4972
4973 When -fpic is used, special handling is needed for symbolic references.
4974 See comments by legitimize_pic_address for details. */
4975
4976 static rtx
4977 s390_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
4978 machine_mode mode ATTRIBUTE_UNUSED)
4979 {
4980 rtx constant_term = const0_rtx;
4981
4982 if (TLS_SYMBOLIC_CONST (x))
4983 {
4984 x = legitimize_tls_address (x, 0);
4985
4986 if (s390_legitimate_address_p (mode, x, FALSE))
4987 return x;
4988 }
4989 else if (GET_CODE (x) == PLUS
4990 && (TLS_SYMBOLIC_CONST (XEXP (x, 0))
4991 || TLS_SYMBOLIC_CONST (XEXP (x, 1))))
4992 {
4993 return x;
4994 }
4995 else if (flag_pic)
4996 {
4997 if (SYMBOLIC_CONST (x)
4998 || (GET_CODE (x) == PLUS
4999 && (SYMBOLIC_CONST (XEXP (x, 0))
5000 || SYMBOLIC_CONST (XEXP (x, 1)))))
5001 x = legitimize_pic_address (x, 0);
5002
5003 if (s390_legitimate_address_p (mode, x, FALSE))
5004 return x;
5005 }
5006
5007 x = eliminate_constant_term (x, &constant_term);
5008
5009 /* Optimize loading of large displacements by splitting them
5010 into the multiple of 4K and the rest; this allows the
5011 former to be CSE'd if possible.
5012
5013 Don't do this if the displacement is added to a register
5014 pointing into the stack frame, as the offsets will
5015 change later anyway. */
5016
5017 if (GET_CODE (constant_term) == CONST_INT
5018 && !TARGET_LONG_DISPLACEMENT
5019 && !DISP_IN_RANGE (INTVAL (constant_term))
5020 && !(REG_P (x) && REGNO_PTR_FRAME_P (REGNO (x))))
5021 {
5022 HOST_WIDE_INT lower = INTVAL (constant_term) & 0xfff;
5023 HOST_WIDE_INT upper = INTVAL (constant_term) ^ lower;
5024
5025 rtx temp = gen_reg_rtx (Pmode);
5026 rtx val = force_operand (GEN_INT (upper), temp);
5027 if (val != temp)
5028 emit_move_insn (temp, val);
5029
5030 x = gen_rtx_PLUS (Pmode, x, temp);
5031 constant_term = GEN_INT (lower);
5032 }
5033
5034 if (GET_CODE (x) == PLUS)
5035 {
5036 if (GET_CODE (XEXP (x, 0)) == REG)
5037 {
5038 rtx temp = gen_reg_rtx (Pmode);
5039 rtx val = force_operand (XEXP (x, 1), temp);
5040 if (val != temp)
5041 emit_move_insn (temp, val);
5042
5043 x = gen_rtx_PLUS (Pmode, XEXP (x, 0), temp);
5044 }
5045
5046 else if (GET_CODE (XEXP (x, 1)) == REG)
5047 {
5048 rtx temp = gen_reg_rtx (Pmode);
5049 rtx val = force_operand (XEXP (x, 0), temp);
5050 if (val != temp)
5051 emit_move_insn (temp, val);
5052
5053 x = gen_rtx_PLUS (Pmode, temp, XEXP (x, 1));
5054 }
5055 }
5056
5057 if (constant_term != const0_rtx)
5058 x = gen_rtx_PLUS (Pmode, x, constant_term);
5059
5060 return x;
5061 }
5062
5063 /* Try a machine-dependent way of reloading an illegitimate address AD
5064 operand. If we find one, push the reload and return the new address.
5065
5066 MODE is the mode of the enclosing MEM. OPNUM is the operand number
5067 and TYPE is the reload type of the current reload. */
5068
5069 rtx
5070 legitimize_reload_address (rtx ad, machine_mode mode ATTRIBUTE_UNUSED,
5071 int opnum, int type)
5072 {
5073 if (!optimize || TARGET_LONG_DISPLACEMENT)
5074 return NULL_RTX;
5075
5076 if (GET_CODE (ad) == PLUS)
5077 {
5078 rtx tem = simplify_binary_operation (PLUS, Pmode,
5079 XEXP (ad, 0), XEXP (ad, 1));
5080 if (tem)
5081 ad = tem;
5082 }
5083
5084 if (GET_CODE (ad) == PLUS
5085 && GET_CODE (XEXP (ad, 0)) == REG
5086 && GET_CODE (XEXP (ad, 1)) == CONST_INT
5087 && !DISP_IN_RANGE (INTVAL (XEXP (ad, 1))))
5088 {
5089 HOST_WIDE_INT lower = INTVAL (XEXP (ad, 1)) & 0xfff;
5090 HOST_WIDE_INT upper = INTVAL (XEXP (ad, 1)) ^ lower;
5091 rtx cst, tem, new_rtx;
5092
5093 cst = GEN_INT (upper);
5094 if (!legitimate_reload_constant_p (cst))
5095 cst = force_const_mem (Pmode, cst);
5096
5097 tem = gen_rtx_PLUS (Pmode, XEXP (ad, 0), cst);
5098 new_rtx = gen_rtx_PLUS (Pmode, tem, GEN_INT (lower));
5099
5100 push_reload (XEXP (tem, 1), 0, &XEXP (tem, 1), 0,
5101 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
5102 opnum, (enum reload_type) type);
5103 return new_rtx;
5104 }
5105
5106 return NULL_RTX;
5107 }
5108
5109 /* Emit code to move LEN bytes from DST to SRC. */
5110
5111 bool
5112 s390_expand_movmem (rtx dst, rtx src, rtx len)
5113 {
5114 /* When tuning for z10 or higher we rely on the Glibc functions to
5115 do the right thing. Only for constant lengths below 64k we will
5116 generate inline code. */
5117 if (s390_tune >= PROCESSOR_2097_Z10
5118 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
5119 return false;
5120
5121 if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
5122 {
5123 if (INTVAL (len) > 0)
5124 emit_insn (gen_movmem_short (dst, src, GEN_INT (INTVAL (len) - 1)));
5125 }
5126
5127 else if (TARGET_MVCLE)
5128 {
5129 emit_insn (gen_movmem_long (dst, src, convert_to_mode (Pmode, len, 1)));
5130 }
5131
5132 else
5133 {
5134 rtx dst_addr, src_addr, count, blocks, temp;
5135 rtx_code_label *loop_start_label = gen_label_rtx ();
5136 rtx_code_label *loop_end_label = gen_label_rtx ();
5137 rtx_code_label *end_label = gen_label_rtx ();
5138 machine_mode mode;
5139
5140 mode = GET_MODE (len);
5141 if (mode == VOIDmode)
5142 mode = Pmode;
5143
5144 dst_addr = gen_reg_rtx (Pmode);
5145 src_addr = gen_reg_rtx (Pmode);
5146 count = gen_reg_rtx (mode);
5147 blocks = gen_reg_rtx (mode);
5148
5149 convert_move (count, len, 1);
5150 emit_cmp_and_jump_insns (count, const0_rtx,
5151 EQ, NULL_RTX, mode, 1, end_label);
5152
5153 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
5154 emit_move_insn (src_addr, force_operand (XEXP (src, 0), NULL_RTX));
5155 dst = change_address (dst, VOIDmode, dst_addr);
5156 src = change_address (src, VOIDmode, src_addr);
5157
5158 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5159 OPTAB_DIRECT);
5160 if (temp != count)
5161 emit_move_insn (count, temp);
5162
5163 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5164 OPTAB_DIRECT);
5165 if (temp != blocks)
5166 emit_move_insn (blocks, temp);
5167
5168 emit_cmp_and_jump_insns (blocks, const0_rtx,
5169 EQ, NULL_RTX, mode, 1, loop_end_label);
5170
5171 emit_label (loop_start_label);
5172
5173 if (TARGET_Z10
5174 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 768))
5175 {
5176 rtx prefetch;
5177
5178 /* Issue a read prefetch for the +3 cache line. */
5179 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, src_addr, GEN_INT (768)),
5180 const0_rtx, const0_rtx);
5181 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5182 emit_insn (prefetch);
5183
5184 /* Issue a write prefetch for the +3 cache line. */
5185 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (768)),
5186 const1_rtx, const0_rtx);
5187 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5188 emit_insn (prefetch);
5189 }
5190
5191 emit_insn (gen_movmem_short (dst, src, GEN_INT (255)));
5192 s390_load_address (dst_addr,
5193 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
5194 s390_load_address (src_addr,
5195 gen_rtx_PLUS (Pmode, src_addr, GEN_INT (256)));
5196
5197 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5198 OPTAB_DIRECT);
5199 if (temp != blocks)
5200 emit_move_insn (blocks, temp);
5201
5202 emit_cmp_and_jump_insns (blocks, const0_rtx,
5203 EQ, NULL_RTX, mode, 1, loop_end_label);
5204
5205 emit_jump (loop_start_label);
5206 emit_label (loop_end_label);
5207
5208 emit_insn (gen_movmem_short (dst, src,
5209 convert_to_mode (Pmode, count, 1)));
5210 emit_label (end_label);
5211 }
5212 return true;
5213 }
5214
5215 /* Emit code to set LEN bytes at DST to VAL.
5216 Make use of clrmem if VAL is zero. */
5217
5218 void
5219 s390_expand_setmem (rtx dst, rtx len, rtx val)
5220 {
5221 if (GET_CODE (len) == CONST_INT && INTVAL (len) == 0)
5222 return;
5223
5224 gcc_assert (GET_CODE (val) == CONST_INT || GET_MODE (val) == QImode);
5225
5226 if (GET_CODE (len) == CONST_INT && INTVAL (len) > 0 && INTVAL (len) <= 257)
5227 {
5228 if (val == const0_rtx && INTVAL (len) <= 256)
5229 emit_insn (gen_clrmem_short (dst, GEN_INT (INTVAL (len) - 1)));
5230 else
5231 {
5232 /* Initialize memory by storing the first byte. */
5233 emit_move_insn (adjust_address (dst, QImode, 0), val);
5234
5235 if (INTVAL (len) > 1)
5236 {
5237 /* Initiate 1 byte overlap move.
5238 The first byte of DST is propagated through DSTP1.
5239 Prepare a movmem for: DST+1 = DST (length = LEN - 1).
5240 DST is set to size 1 so the rest of the memory location
5241 does not count as source operand. */
5242 rtx dstp1 = adjust_address (dst, VOIDmode, 1);
5243 set_mem_size (dst, 1);
5244
5245 emit_insn (gen_movmem_short (dstp1, dst,
5246 GEN_INT (INTVAL (len) - 2)));
5247 }
5248 }
5249 }
5250
5251 else if (TARGET_MVCLE)
5252 {
5253 val = force_not_mem (convert_modes (Pmode, QImode, val, 1));
5254 if (TARGET_64BIT)
5255 emit_insn (gen_setmem_long_di (dst, convert_to_mode (Pmode, len, 1),
5256 val));
5257 else
5258 emit_insn (gen_setmem_long_si (dst, convert_to_mode (Pmode, len, 1),
5259 val));
5260 }
5261
5262 else
5263 {
5264 rtx dst_addr, count, blocks, temp, dstp1 = NULL_RTX;
5265 rtx_code_label *loop_start_label = gen_label_rtx ();
5266 rtx_code_label *loop_end_label = gen_label_rtx ();
5267 rtx_code_label *end_label = gen_label_rtx ();
5268 machine_mode mode;
5269
5270 mode = GET_MODE (len);
5271 if (mode == VOIDmode)
5272 mode = Pmode;
5273
5274 dst_addr = gen_reg_rtx (Pmode);
5275 count = gen_reg_rtx (mode);
5276 blocks = gen_reg_rtx (mode);
5277
5278 convert_move (count, len, 1);
5279 emit_cmp_and_jump_insns (count, const0_rtx,
5280 EQ, NULL_RTX, mode, 1, end_label);
5281
5282 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
5283 dst = change_address (dst, VOIDmode, dst_addr);
5284
5285 if (val == const0_rtx)
5286 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5287 OPTAB_DIRECT);
5288 else
5289 {
5290 dstp1 = adjust_address (dst, VOIDmode, 1);
5291 set_mem_size (dst, 1);
5292
5293 /* Initialize memory by storing the first byte. */
5294 emit_move_insn (adjust_address (dst, QImode, 0), val);
5295
5296 /* If count is 1 we are done. */
5297 emit_cmp_and_jump_insns (count, const1_rtx,
5298 EQ, NULL_RTX, mode, 1, end_label);
5299
5300 temp = expand_binop (mode, add_optab, count, GEN_INT (-2), count, 1,
5301 OPTAB_DIRECT);
5302 }
5303 if (temp != count)
5304 emit_move_insn (count, temp);
5305
5306 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5307 OPTAB_DIRECT);
5308 if (temp != blocks)
5309 emit_move_insn (blocks, temp);
5310
5311 emit_cmp_and_jump_insns (blocks, const0_rtx,
5312 EQ, NULL_RTX, mode, 1, loop_end_label);
5313
5314 emit_label (loop_start_label);
5315
5316 if (TARGET_Z10
5317 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 1024))
5318 {
5319 /* Issue a write prefetch for the +4 cache line. */
5320 rtx prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr,
5321 GEN_INT (1024)),
5322 const1_rtx, const0_rtx);
5323 emit_insn (prefetch);
5324 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5325 }
5326
5327 if (val == const0_rtx)
5328 emit_insn (gen_clrmem_short (dst, GEN_INT (255)));
5329 else
5330 emit_insn (gen_movmem_short (dstp1, dst, GEN_INT (255)));
5331 s390_load_address (dst_addr,
5332 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
5333
5334 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5335 OPTAB_DIRECT);
5336 if (temp != blocks)
5337 emit_move_insn (blocks, temp);
5338
5339 emit_cmp_and_jump_insns (blocks, const0_rtx,
5340 EQ, NULL_RTX, mode, 1, loop_end_label);
5341
5342 emit_jump (loop_start_label);
5343 emit_label (loop_end_label);
5344
5345 if (val == const0_rtx)
5346 emit_insn (gen_clrmem_short (dst, convert_to_mode (Pmode, count, 1)));
5347 else
5348 emit_insn (gen_movmem_short (dstp1, dst, convert_to_mode (Pmode, count, 1)));
5349 emit_label (end_label);
5350 }
5351 }
5352
5353 /* Emit code to compare LEN bytes at OP0 with those at OP1,
5354 and return the result in TARGET. */
5355
5356 bool
5357 s390_expand_cmpmem (rtx target, rtx op0, rtx op1, rtx len)
5358 {
5359 rtx ccreg = gen_rtx_REG (CCUmode, CC_REGNUM);
5360 rtx tmp;
5361
5362 /* When tuning for z10 or higher we rely on the Glibc functions to
5363 do the right thing. Only for constant lengths below 64k we will
5364 generate inline code. */
5365 if (s390_tune >= PROCESSOR_2097_Z10
5366 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
5367 return false;
5368
5369 /* As the result of CMPINT is inverted compared to what we need,
5370 we have to swap the operands. */
5371 tmp = op0; op0 = op1; op1 = tmp;
5372
5373 if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
5374 {
5375 if (INTVAL (len) > 0)
5376 {
5377 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (INTVAL (len) - 1)));
5378 emit_insn (gen_cmpint (target, ccreg));
5379 }
5380 else
5381 emit_move_insn (target, const0_rtx);
5382 }
5383 else if (TARGET_MVCLE)
5384 {
5385 emit_insn (gen_cmpmem_long (op0, op1, convert_to_mode (Pmode, len, 1)));
5386 emit_insn (gen_cmpint (target, ccreg));
5387 }
5388 else
5389 {
5390 rtx addr0, addr1, count, blocks, temp;
5391 rtx_code_label *loop_start_label = gen_label_rtx ();
5392 rtx_code_label *loop_end_label = gen_label_rtx ();
5393 rtx_code_label *end_label = gen_label_rtx ();
5394 machine_mode mode;
5395
5396 mode = GET_MODE (len);
5397 if (mode == VOIDmode)
5398 mode = Pmode;
5399
5400 addr0 = gen_reg_rtx (Pmode);
5401 addr1 = gen_reg_rtx (Pmode);
5402 count = gen_reg_rtx (mode);
5403 blocks = gen_reg_rtx (mode);
5404
5405 convert_move (count, len, 1);
5406 emit_cmp_and_jump_insns (count, const0_rtx,
5407 EQ, NULL_RTX, mode, 1, end_label);
5408
5409 emit_move_insn (addr0, force_operand (XEXP (op0, 0), NULL_RTX));
5410 emit_move_insn (addr1, force_operand (XEXP (op1, 0), NULL_RTX));
5411 op0 = change_address (op0, VOIDmode, addr0);
5412 op1 = change_address (op1, VOIDmode, addr1);
5413
5414 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5415 OPTAB_DIRECT);
5416 if (temp != count)
5417 emit_move_insn (count, temp);
5418
5419 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5420 OPTAB_DIRECT);
5421 if (temp != blocks)
5422 emit_move_insn (blocks, temp);
5423
5424 emit_cmp_and_jump_insns (blocks, const0_rtx,
5425 EQ, NULL_RTX, mode, 1, loop_end_label);
5426
5427 emit_label (loop_start_label);
5428
5429 if (TARGET_Z10
5430 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 512))
5431 {
5432 rtx prefetch;
5433
5434 /* Issue a read prefetch for the +2 cache line of operand 1. */
5435 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr0, GEN_INT (512)),
5436 const0_rtx, const0_rtx);
5437 emit_insn (prefetch);
5438 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5439
5440 /* Issue a read prefetch for the +2 cache line of operand 2. */
5441 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr1, GEN_INT (512)),
5442 const0_rtx, const0_rtx);
5443 emit_insn (prefetch);
5444 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5445 }
5446
5447 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (255)));
5448 temp = gen_rtx_NE (VOIDmode, ccreg, const0_rtx);
5449 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
5450 gen_rtx_LABEL_REF (VOIDmode, end_label), pc_rtx);
5451 temp = gen_rtx_SET (pc_rtx, temp);
5452 emit_jump_insn (temp);
5453
5454 s390_load_address (addr0,
5455 gen_rtx_PLUS (Pmode, addr0, GEN_INT (256)));
5456 s390_load_address (addr1,
5457 gen_rtx_PLUS (Pmode, addr1, GEN_INT (256)));
5458
5459 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5460 OPTAB_DIRECT);
5461 if (temp != blocks)
5462 emit_move_insn (blocks, temp);
5463
5464 emit_cmp_and_jump_insns (blocks, const0_rtx,
5465 EQ, NULL_RTX, mode, 1, loop_end_label);
5466
5467 emit_jump (loop_start_label);
5468 emit_label (loop_end_label);
5469
5470 emit_insn (gen_cmpmem_short (op0, op1,
5471 convert_to_mode (Pmode, count, 1)));
5472 emit_label (end_label);
5473
5474 emit_insn (gen_cmpint (target, ccreg));
5475 }
5476 return true;
5477 }
5478
5479 /* Emit a conditional jump to LABEL for condition code mask MASK using
5480 comparsion operator COMPARISON. Return the emitted jump insn. */
5481
5482 static rtx
5483 s390_emit_ccraw_jump (HOST_WIDE_INT mask, enum rtx_code comparison, rtx label)
5484 {
5485 rtx temp;
5486
5487 gcc_assert (comparison == EQ || comparison == NE);
5488 gcc_assert (mask > 0 && mask < 15);
5489
5490 temp = gen_rtx_fmt_ee (comparison, VOIDmode,
5491 gen_rtx_REG (CCRAWmode, CC_REGNUM), GEN_INT (mask));
5492 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
5493 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
5494 temp = gen_rtx_SET (pc_rtx, temp);
5495 return emit_jump_insn (temp);
5496 }
5497
5498 /* Emit the instructions to implement strlen of STRING and store the
5499 result in TARGET. The string has the known ALIGNMENT. This
5500 version uses vector instructions and is therefore not appropriate
5501 for targets prior to z13. */
5502
5503 void
5504 s390_expand_vec_strlen (rtx target, rtx string, rtx alignment)
5505 {
5506 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
5507 int very_likely = REG_BR_PROB_BASE - 1;
5508 rtx highest_index_to_load_reg = gen_reg_rtx (Pmode);
5509 rtx str_reg = gen_reg_rtx (V16QImode);
5510 rtx str_addr_base_reg = gen_reg_rtx (Pmode);
5511 rtx str_idx_reg = gen_reg_rtx (Pmode);
5512 rtx result_reg = gen_reg_rtx (V16QImode);
5513 rtx is_aligned_label = gen_label_rtx ();
5514 rtx into_loop_label = NULL_RTX;
5515 rtx loop_start_label = gen_label_rtx ();
5516 rtx temp;
5517 rtx len = gen_reg_rtx (QImode);
5518 rtx cond;
5519
5520 s390_load_address (str_addr_base_reg, XEXP (string, 0));
5521 emit_move_insn (str_idx_reg, const0_rtx);
5522
5523 if (INTVAL (alignment) < 16)
5524 {
5525 /* Check whether the address happens to be aligned properly so
5526 jump directly to the aligned loop. */
5527 emit_cmp_and_jump_insns (gen_rtx_AND (Pmode,
5528 str_addr_base_reg, GEN_INT (15)),
5529 const0_rtx, EQ, NULL_RTX,
5530 Pmode, 1, is_aligned_label);
5531
5532 temp = gen_reg_rtx (Pmode);
5533 temp = expand_binop (Pmode, and_optab, str_addr_base_reg,
5534 GEN_INT (15), temp, 1, OPTAB_DIRECT);
5535 gcc_assert (REG_P (temp));
5536 highest_index_to_load_reg =
5537 expand_binop (Pmode, sub_optab, GEN_INT (15), temp,
5538 highest_index_to_load_reg, 1, OPTAB_DIRECT);
5539 gcc_assert (REG_P (highest_index_to_load_reg));
5540 emit_insn (gen_vllv16qi (str_reg,
5541 convert_to_mode (SImode, highest_index_to_load_reg, 1),
5542 gen_rtx_MEM (BLKmode, str_addr_base_reg)));
5543
5544 into_loop_label = gen_label_rtx ();
5545 s390_emit_jump (into_loop_label, NULL_RTX);
5546 emit_barrier ();
5547 }
5548
5549 emit_label (is_aligned_label);
5550 LABEL_NUSES (is_aligned_label) = INTVAL (alignment) < 16 ? 2 : 1;
5551
5552 /* Reaching this point we are only performing 16 bytes aligned
5553 loads. */
5554 emit_move_insn (highest_index_to_load_reg, GEN_INT (15));
5555
5556 emit_label (loop_start_label);
5557 LABEL_NUSES (loop_start_label) = 1;
5558
5559 /* Load 16 bytes of the string into VR. */
5560 emit_move_insn (str_reg,
5561 gen_rtx_MEM (V16QImode,
5562 gen_rtx_PLUS (Pmode, str_idx_reg,
5563 str_addr_base_reg)));
5564 if (into_loop_label != NULL_RTX)
5565 {
5566 emit_label (into_loop_label);
5567 LABEL_NUSES (into_loop_label) = 1;
5568 }
5569
5570 /* Increment string index by 16 bytes. */
5571 expand_binop (Pmode, add_optab, str_idx_reg, GEN_INT (16),
5572 str_idx_reg, 1, OPTAB_DIRECT);
5573
5574 emit_insn (gen_vec_vfenesv16qi (result_reg, str_reg, str_reg,
5575 GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
5576
5577 add_int_reg_note (s390_emit_ccraw_jump (8, NE, loop_start_label),
5578 REG_BR_PROB, very_likely);
5579 emit_insn (gen_vec_extractv16qi (len, result_reg, GEN_INT (7)));
5580
5581 /* If the string pointer wasn't aligned we have loaded less then 16
5582 bytes and the remaining bytes got filled with zeros (by vll).
5583 Now we have to check whether the resulting index lies within the
5584 bytes actually part of the string. */
5585
5586 cond = s390_emit_compare (GT, convert_to_mode (Pmode, len, 1),
5587 highest_index_to_load_reg);
5588 s390_load_address (highest_index_to_load_reg,
5589 gen_rtx_PLUS (Pmode, highest_index_to_load_reg,
5590 const1_rtx));
5591 if (TARGET_64BIT)
5592 emit_insn (gen_movdicc (str_idx_reg, cond,
5593 highest_index_to_load_reg, str_idx_reg));
5594 else
5595 emit_insn (gen_movsicc (str_idx_reg, cond,
5596 highest_index_to_load_reg, str_idx_reg));
5597
5598 add_int_reg_note (s390_emit_jump (is_aligned_label, cond), REG_BR_PROB,
5599 very_unlikely);
5600
5601 expand_binop (Pmode, add_optab, str_idx_reg,
5602 GEN_INT (-16), str_idx_reg, 1, OPTAB_DIRECT);
5603 /* FIXME: len is already zero extended - so avoid the llgcr emitted
5604 here. */
5605 temp = expand_binop (Pmode, add_optab, str_idx_reg,
5606 convert_to_mode (Pmode, len, 1),
5607 target, 1, OPTAB_DIRECT);
5608 if (temp != target)
5609 emit_move_insn (target, temp);
5610 }
5611
5612 /* Expand conditional increment or decrement using alc/slb instructions.
5613 Should generate code setting DST to either SRC or SRC + INCREMENT,
5614 depending on the result of the comparison CMP_OP0 CMP_CODE CMP_OP1.
5615 Returns true if successful, false otherwise.
5616
5617 That makes it possible to implement some if-constructs without jumps e.g.:
5618 (borrow = CC0 | CC1 and carry = CC2 | CC3)
5619 unsigned int a, b, c;
5620 if (a < b) c++; -> CCU b > a -> CC2; c += carry;
5621 if (a < b) c--; -> CCL3 a - b -> borrow; c -= borrow;
5622 if (a <= b) c++; -> CCL3 b - a -> borrow; c += carry;
5623 if (a <= b) c--; -> CCU a <= b -> borrow; c -= borrow;
5624
5625 Checks for EQ and NE with a nonzero value need an additional xor e.g.:
5626 if (a == b) c++; -> CCL3 a ^= b; 0 - a -> borrow; c += carry;
5627 if (a == b) c--; -> CCU a ^= b; a <= 0 -> CC0 | CC1; c -= borrow;
5628 if (a != b) c++; -> CCU a ^= b; a > 0 -> CC2; c += carry;
5629 if (a != b) c--; -> CCL3 a ^= b; 0 - a -> borrow; c -= borrow; */
5630
5631 bool
5632 s390_expand_addcc (enum rtx_code cmp_code, rtx cmp_op0, rtx cmp_op1,
5633 rtx dst, rtx src, rtx increment)
5634 {
5635 machine_mode cmp_mode;
5636 machine_mode cc_mode;
5637 rtx op_res;
5638 rtx insn;
5639 rtvec p;
5640 int ret;
5641
5642 if ((GET_MODE (cmp_op0) == SImode || GET_MODE (cmp_op0) == VOIDmode)
5643 && (GET_MODE (cmp_op1) == SImode || GET_MODE (cmp_op1) == VOIDmode))
5644 cmp_mode = SImode;
5645 else if ((GET_MODE (cmp_op0) == DImode || GET_MODE (cmp_op0) == VOIDmode)
5646 && (GET_MODE (cmp_op1) == DImode || GET_MODE (cmp_op1) == VOIDmode))
5647 cmp_mode = DImode;
5648 else
5649 return false;
5650
5651 /* Try ADD LOGICAL WITH CARRY. */
5652 if (increment == const1_rtx)
5653 {
5654 /* Determine CC mode to use. */
5655 if (cmp_code == EQ || cmp_code == NE)
5656 {
5657 if (cmp_op1 != const0_rtx)
5658 {
5659 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
5660 NULL_RTX, 0, OPTAB_WIDEN);
5661 cmp_op1 = const0_rtx;
5662 }
5663
5664 cmp_code = cmp_code == EQ ? LEU : GTU;
5665 }
5666
5667 if (cmp_code == LTU || cmp_code == LEU)
5668 {
5669 rtx tem = cmp_op0;
5670 cmp_op0 = cmp_op1;
5671 cmp_op1 = tem;
5672 cmp_code = swap_condition (cmp_code);
5673 }
5674
5675 switch (cmp_code)
5676 {
5677 case GTU:
5678 cc_mode = CCUmode;
5679 break;
5680
5681 case GEU:
5682 cc_mode = CCL3mode;
5683 break;
5684
5685 default:
5686 return false;
5687 }
5688
5689 /* Emit comparison instruction pattern. */
5690 if (!register_operand (cmp_op0, cmp_mode))
5691 cmp_op0 = force_reg (cmp_mode, cmp_op0);
5692
5693 insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
5694 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
5695 /* We use insn_invalid_p here to add clobbers if required. */
5696 ret = insn_invalid_p (emit_insn (insn), false);
5697 gcc_assert (!ret);
5698
5699 /* Emit ALC instruction pattern. */
5700 op_res = gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
5701 gen_rtx_REG (cc_mode, CC_REGNUM),
5702 const0_rtx);
5703
5704 if (src != const0_rtx)
5705 {
5706 if (!register_operand (src, GET_MODE (dst)))
5707 src = force_reg (GET_MODE (dst), src);
5708
5709 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, src);
5710 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, const0_rtx);
5711 }
5712
5713 p = rtvec_alloc (2);
5714 RTVEC_ELT (p, 0) =
5715 gen_rtx_SET (dst, op_res);
5716 RTVEC_ELT (p, 1) =
5717 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
5718 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
5719
5720 return true;
5721 }
5722
5723 /* Try SUBTRACT LOGICAL WITH BORROW. */
5724 if (increment == constm1_rtx)
5725 {
5726 /* Determine CC mode to use. */
5727 if (cmp_code == EQ || cmp_code == NE)
5728 {
5729 if (cmp_op1 != const0_rtx)
5730 {
5731 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
5732 NULL_RTX, 0, OPTAB_WIDEN);
5733 cmp_op1 = const0_rtx;
5734 }
5735
5736 cmp_code = cmp_code == EQ ? LEU : GTU;
5737 }
5738
5739 if (cmp_code == GTU || cmp_code == GEU)
5740 {
5741 rtx tem = cmp_op0;
5742 cmp_op0 = cmp_op1;
5743 cmp_op1 = tem;
5744 cmp_code = swap_condition (cmp_code);
5745 }
5746
5747 switch (cmp_code)
5748 {
5749 case LEU:
5750 cc_mode = CCUmode;
5751 break;
5752
5753 case LTU:
5754 cc_mode = CCL3mode;
5755 break;
5756
5757 default:
5758 return false;
5759 }
5760
5761 /* Emit comparison instruction pattern. */
5762 if (!register_operand (cmp_op0, cmp_mode))
5763 cmp_op0 = force_reg (cmp_mode, cmp_op0);
5764
5765 insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
5766 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
5767 /* We use insn_invalid_p here to add clobbers if required. */
5768 ret = insn_invalid_p (emit_insn (insn), false);
5769 gcc_assert (!ret);
5770
5771 /* Emit SLB instruction pattern. */
5772 if (!register_operand (src, GET_MODE (dst)))
5773 src = force_reg (GET_MODE (dst), src);
5774
5775 op_res = gen_rtx_MINUS (GET_MODE (dst),
5776 gen_rtx_MINUS (GET_MODE (dst), src, const0_rtx),
5777 gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
5778 gen_rtx_REG (cc_mode, CC_REGNUM),
5779 const0_rtx));
5780 p = rtvec_alloc (2);
5781 RTVEC_ELT (p, 0) =
5782 gen_rtx_SET (dst, op_res);
5783 RTVEC_ELT (p, 1) =
5784 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
5785 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
5786
5787 return true;
5788 }
5789
5790 return false;
5791 }
5792
5793 /* Expand code for the insv template. Return true if successful. */
5794
5795 bool
5796 s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src)
5797 {
5798 int bitsize = INTVAL (op1);
5799 int bitpos = INTVAL (op2);
5800 machine_mode mode = GET_MODE (dest);
5801 machine_mode smode;
5802 int smode_bsize, mode_bsize;
5803 rtx op, clobber;
5804
5805 if (bitsize + bitpos > GET_MODE_BITSIZE (mode))
5806 return false;
5807
5808 /* Generate INSERT IMMEDIATE (IILL et al). */
5809 /* (set (ze (reg)) (const_int)). */
5810 if (TARGET_ZARCH
5811 && register_operand (dest, word_mode)
5812 && (bitpos % 16) == 0
5813 && (bitsize % 16) == 0
5814 && const_int_operand (src, VOIDmode))
5815 {
5816 HOST_WIDE_INT val = INTVAL (src);
5817 int regpos = bitpos + bitsize;
5818
5819 while (regpos > bitpos)
5820 {
5821 machine_mode putmode;
5822 int putsize;
5823
5824 if (TARGET_EXTIMM && (regpos % 32 == 0) && (regpos >= bitpos + 32))
5825 putmode = SImode;
5826 else
5827 putmode = HImode;
5828
5829 putsize = GET_MODE_BITSIZE (putmode);
5830 regpos -= putsize;
5831 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
5832 GEN_INT (putsize),
5833 GEN_INT (regpos)),
5834 gen_int_mode (val, putmode));
5835 val >>= putsize;
5836 }
5837 gcc_assert (regpos == bitpos);
5838 return true;
5839 }
5840
5841 smode = smallest_mode_for_size (bitsize, MODE_INT);
5842 smode_bsize = GET_MODE_BITSIZE (smode);
5843 mode_bsize = GET_MODE_BITSIZE (mode);
5844
5845 /* Generate STORE CHARACTERS UNDER MASK (STCM et al). */
5846 if (bitpos == 0
5847 && (bitsize % BITS_PER_UNIT) == 0
5848 && MEM_P (dest)
5849 && (register_operand (src, word_mode)
5850 || const_int_operand (src, VOIDmode)))
5851 {
5852 /* Emit standard pattern if possible. */
5853 if (smode_bsize == bitsize)
5854 {
5855 emit_move_insn (adjust_address (dest, smode, 0),
5856 gen_lowpart (smode, src));
5857 return true;
5858 }
5859
5860 /* (set (ze (mem)) (const_int)). */
5861 else if (const_int_operand (src, VOIDmode))
5862 {
5863 int size = bitsize / BITS_PER_UNIT;
5864 rtx src_mem = adjust_address (force_const_mem (word_mode, src),
5865 BLKmode,
5866 UNITS_PER_WORD - size);
5867
5868 dest = adjust_address (dest, BLKmode, 0);
5869 set_mem_size (dest, size);
5870 s390_expand_movmem (dest, src_mem, GEN_INT (size));
5871 return true;
5872 }
5873
5874 /* (set (ze (mem)) (reg)). */
5875 else if (register_operand (src, word_mode))
5876 {
5877 if (bitsize <= 32)
5878 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest, op1,
5879 const0_rtx), src);
5880 else
5881 {
5882 /* Emit st,stcmh sequence. */
5883 int stcmh_width = bitsize - 32;
5884 int size = stcmh_width / BITS_PER_UNIT;
5885
5886 emit_move_insn (adjust_address (dest, SImode, size),
5887 gen_lowpart (SImode, src));
5888 set_mem_size (dest, size);
5889 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
5890 GEN_INT (stcmh_width),
5891 const0_rtx),
5892 gen_rtx_LSHIFTRT (word_mode, src, GEN_INT (32)));
5893 }
5894 return true;
5895 }
5896 }
5897
5898 /* Generate INSERT CHARACTERS UNDER MASK (IC, ICM et al). */
5899 if ((bitpos % BITS_PER_UNIT) == 0
5900 && (bitsize % BITS_PER_UNIT) == 0
5901 && (bitpos & 32) == ((bitpos + bitsize - 1) & 32)
5902 && MEM_P (src)
5903 && (mode == DImode || mode == SImode)
5904 && register_operand (dest, mode))
5905 {
5906 /* Emit a strict_low_part pattern if possible. */
5907 if (smode_bsize == bitsize && bitpos == mode_bsize - smode_bsize)
5908 {
5909 op = gen_rtx_STRICT_LOW_PART (VOIDmode, gen_lowpart (smode, dest));
5910 op = gen_rtx_SET (op, gen_lowpart (smode, src));
5911 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
5912 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber)));
5913 return true;
5914 }
5915
5916 /* ??? There are more powerful versions of ICM that are not
5917 completely represented in the md file. */
5918 }
5919
5920 /* For z10, generate ROTATE THEN INSERT SELECTED BITS (RISBG et al). */
5921 if (TARGET_Z10 && (mode == DImode || mode == SImode))
5922 {
5923 machine_mode mode_s = GET_MODE (src);
5924
5925 if (mode_s == VOIDmode)
5926 {
5927 /* For constant zero values the representation with AND
5928 appears to be folded in more situations than the (set
5929 (zero_extract) ...).
5930 We only do this when the start and end of the bitfield
5931 remain in the same SImode chunk. That way nihf or nilf
5932 can be used.
5933 The AND patterns might still generate a risbg for this. */
5934 if (src == const0_rtx && bitpos / 32 == (bitpos + bitsize - 1) / 32)
5935 return false;
5936 else
5937 src = force_reg (mode, src);
5938 }
5939 else if (mode_s != mode)
5940 {
5941 gcc_assert (GET_MODE_BITSIZE (mode_s) >= bitsize);
5942 src = force_reg (mode_s, src);
5943 src = gen_lowpart (mode, src);
5944 }
5945
5946 op = gen_rtx_ZERO_EXTRACT (mode, dest, op1, op2),
5947 op = gen_rtx_SET (op, src);
5948
5949 if (!TARGET_ZEC12)
5950 {
5951 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
5952 op = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber));
5953 }
5954 emit_insn (op);
5955
5956 return true;
5957 }
5958
5959 return false;
5960 }
5961
5962 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic which returns a
5963 register that holds VAL of mode MODE shifted by COUNT bits. */
5964
5965 static inline rtx
5966 s390_expand_mask_and_shift (rtx val, machine_mode mode, rtx count)
5967 {
5968 val = expand_simple_binop (SImode, AND, val, GEN_INT (GET_MODE_MASK (mode)),
5969 NULL_RTX, 1, OPTAB_DIRECT);
5970 return expand_simple_binop (SImode, ASHIFT, val, count,
5971 NULL_RTX, 1, OPTAB_DIRECT);
5972 }
5973
5974 /* Generate a vector comparison COND of CMP_OP1 and CMP_OP2 and store
5975 the result in TARGET. */
5976
5977 void
5978 s390_expand_vec_compare (rtx target, enum rtx_code cond,
5979 rtx cmp_op1, rtx cmp_op2)
5980 {
5981 machine_mode mode = GET_MODE (target);
5982 bool neg_p = false, swap_p = false;
5983 rtx tmp;
5984
5985 if (GET_MODE (cmp_op1) == V2DFmode)
5986 {
5987 switch (cond)
5988 {
5989 /* NE a != b -> !(a == b) */
5990 case NE: cond = EQ; neg_p = true; break;
5991 /* UNGT a u> b -> !(b >= a) */
5992 case UNGT: cond = GE; neg_p = true; swap_p = true; break;
5993 /* UNGE a u>= b -> !(b > a) */
5994 case UNGE: cond = GT; neg_p = true; swap_p = true; break;
5995 /* LE: a <= b -> b >= a */
5996 case LE: cond = GE; swap_p = true; break;
5997 /* UNLE: a u<= b -> !(a > b) */
5998 case UNLE: cond = GT; neg_p = true; break;
5999 /* LT: a < b -> b > a */
6000 case LT: cond = GT; swap_p = true; break;
6001 /* UNLT: a u< b -> !(a >= b) */
6002 case UNLT: cond = GE; neg_p = true; break;
6003 case UNEQ:
6004 emit_insn (gen_vec_cmpuneqv2df (target, cmp_op1, cmp_op2));
6005 return;
6006 case LTGT:
6007 emit_insn (gen_vec_cmpltgtv2df (target, cmp_op1, cmp_op2));
6008 return;
6009 case ORDERED:
6010 emit_insn (gen_vec_orderedv2df (target, cmp_op1, cmp_op2));
6011 return;
6012 case UNORDERED:
6013 emit_insn (gen_vec_unorderedv2df (target, cmp_op1, cmp_op2));
6014 return;
6015 default: break;
6016 }
6017 }
6018 else
6019 {
6020 switch (cond)
6021 {
6022 /* NE: a != b -> !(a == b) */
6023 case NE: cond = EQ; neg_p = true; break;
6024 /* GE: a >= b -> !(b > a) */
6025 case GE: cond = GT; neg_p = true; swap_p = true; break;
6026 /* GEU: a >= b -> !(b > a) */
6027 case GEU: cond = GTU; neg_p = true; swap_p = true; break;
6028 /* LE: a <= b -> !(a > b) */
6029 case LE: cond = GT; neg_p = true; break;
6030 /* LEU: a <= b -> !(a > b) */
6031 case LEU: cond = GTU; neg_p = true; break;
6032 /* LT: a < b -> b > a */
6033 case LT: cond = GT; swap_p = true; break;
6034 /* LTU: a < b -> b > a */
6035 case LTU: cond = GTU; swap_p = true; break;
6036 default: break;
6037 }
6038 }
6039
6040 if (swap_p)
6041 {
6042 tmp = cmp_op1; cmp_op1 = cmp_op2; cmp_op2 = tmp;
6043 }
6044
6045 emit_insn (gen_rtx_SET (target, gen_rtx_fmt_ee (cond,
6046 mode,
6047 cmp_op1, cmp_op2)));
6048 if (neg_p)
6049 emit_insn (gen_rtx_SET (target, gen_rtx_NOT (mode, target)));
6050 }
6051
6052 /* Expand the comparison CODE of CMP1 and CMP2 and copy 1 or 0 into
6053 TARGET if either all (ALL_P is true) or any (ALL_P is false) of the
6054 elements in CMP1 and CMP2 fulfill the comparison. */
6055 void
6056 s390_expand_vec_compare_cc (rtx target, enum rtx_code code,
6057 rtx cmp1, rtx cmp2, bool all_p)
6058 {
6059 enum rtx_code new_code = code;
6060 machine_mode cmp_mode, full_cmp_mode, scratch_mode;
6061 rtx tmp_reg = gen_reg_rtx (SImode);
6062 bool swap_p = false;
6063
6064 if (GET_MODE_CLASS (GET_MODE (cmp1)) == MODE_VECTOR_INT)
6065 {
6066 switch (code)
6067 {
6068 case EQ: cmp_mode = CCVEQmode; break;
6069 case NE: cmp_mode = CCVEQmode; break;
6070 case GT: cmp_mode = CCVHmode; break;
6071 case GE: cmp_mode = CCVHmode; new_code = LE; swap_p = true; break;
6072 case LT: cmp_mode = CCVHmode; new_code = GT; swap_p = true; break;
6073 case LE: cmp_mode = CCVHmode; new_code = LE; break;
6074 case GTU: cmp_mode = CCVHUmode; break;
6075 case GEU: cmp_mode = CCVHUmode; new_code = LEU; swap_p = true; break;
6076 case LTU: cmp_mode = CCVHUmode; new_code = GTU; swap_p = true; break;
6077 case LEU: cmp_mode = CCVHUmode; new_code = LEU; break;
6078 default: gcc_unreachable ();
6079 }
6080 scratch_mode = GET_MODE (cmp1);
6081 }
6082 else if (GET_MODE (cmp1) == V2DFmode)
6083 {
6084 switch (code)
6085 {
6086 case EQ: cmp_mode = CCVEQmode; break;
6087 case NE: cmp_mode = CCVEQmode; break;
6088 case GT: cmp_mode = CCVFHmode; break;
6089 case GE: cmp_mode = CCVFHEmode; break;
6090 case UNLE: cmp_mode = CCVFHmode; break;
6091 case UNLT: cmp_mode = CCVFHEmode; break;
6092 case LT: cmp_mode = CCVFHmode; new_code = GT; swap_p = true; break;
6093 case LE: cmp_mode = CCVFHEmode; new_code = GE; swap_p = true; break;
6094 default: gcc_unreachable ();
6095 }
6096 scratch_mode = V2DImode;
6097 }
6098 else
6099 gcc_unreachable ();
6100
6101 if (!all_p)
6102 switch (cmp_mode)
6103 {
6104 case CCVEQmode: full_cmp_mode = CCVEQANYmode; break;
6105 case CCVHmode: full_cmp_mode = CCVHANYmode; break;
6106 case CCVHUmode: full_cmp_mode = CCVHUANYmode; break;
6107 case CCVFHmode: full_cmp_mode = CCVFHANYmode; break;
6108 case CCVFHEmode: full_cmp_mode = CCVFHEANYmode; break;
6109 default: gcc_unreachable ();
6110 }
6111 else
6112 /* The modes without ANY match the ALL modes. */
6113 full_cmp_mode = cmp_mode;
6114
6115 if (swap_p)
6116 {
6117 rtx tmp = cmp2;
6118 cmp2 = cmp1;
6119 cmp1 = tmp;
6120 }
6121
6122 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6123 gen_rtvec (2, gen_rtx_SET (
6124 gen_rtx_REG (cmp_mode, CC_REGNUM),
6125 gen_rtx_COMPARE (cmp_mode, cmp1, cmp2)),
6126 gen_rtx_CLOBBER (VOIDmode,
6127 gen_rtx_SCRATCH (scratch_mode)))));
6128 emit_move_insn (target, const0_rtx);
6129 emit_move_insn (tmp_reg, const1_rtx);
6130
6131 emit_move_insn (target,
6132 gen_rtx_IF_THEN_ELSE (SImode,
6133 gen_rtx_fmt_ee (new_code, VOIDmode,
6134 gen_rtx_REG (full_cmp_mode, CC_REGNUM),
6135 const0_rtx),
6136 target, tmp_reg));
6137 }
6138
6139 /* Generate a vector comparison expression loading either elements of
6140 THEN or ELS into TARGET depending on the comparison COND of CMP_OP1
6141 and CMP_OP2. */
6142
6143 void
6144 s390_expand_vcond (rtx target, rtx then, rtx els,
6145 enum rtx_code cond, rtx cmp_op1, rtx cmp_op2)
6146 {
6147 rtx tmp;
6148 machine_mode result_mode;
6149 rtx result_target;
6150
6151 machine_mode target_mode = GET_MODE (target);
6152 machine_mode cmp_mode = GET_MODE (cmp_op1);
6153 rtx op = (cond == LT) ? els : then;
6154
6155 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
6156 and x < 0 ? 1 : 0 into (unsigned) x >> 31. Likewise
6157 for short and byte (x >> 15 and x >> 7 respectively). */
6158 if ((cond == LT || cond == GE)
6159 && target_mode == cmp_mode
6160 && cmp_op2 == CONST0_RTX (cmp_mode)
6161 && op == CONST0_RTX (target_mode)
6162 && s390_vector_mode_supported_p (target_mode)
6163 && GET_MODE_CLASS (target_mode) == MODE_VECTOR_INT)
6164 {
6165 rtx negop = (cond == LT) ? then : els;
6166
6167 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (target_mode)) - 1;
6168
6169 /* if x < 0 ? 1 : 0 or if x >= 0 ? 0 : 1 */
6170 if (negop == CONST1_RTX (target_mode))
6171 {
6172 rtx res = expand_simple_binop (cmp_mode, LSHIFTRT, cmp_op1,
6173 GEN_INT (shift), target,
6174 1, OPTAB_DIRECT);
6175 if (res != target)
6176 emit_move_insn (target, res);
6177 return;
6178 }
6179
6180 /* if x < 0 ? -1 : 0 or if x >= 0 ? 0 : -1 */
6181 else if (all_ones_operand (negop, target_mode))
6182 {
6183 rtx res = expand_simple_binop (cmp_mode, ASHIFTRT, cmp_op1,
6184 GEN_INT (shift), target,
6185 0, OPTAB_DIRECT);
6186 if (res != target)
6187 emit_move_insn (target, res);
6188 return;
6189 }
6190 }
6191
6192 /* We always use an integral type vector to hold the comparison
6193 result. */
6194 result_mode = cmp_mode == V2DFmode ? V2DImode : cmp_mode;
6195 result_target = gen_reg_rtx (result_mode);
6196
6197 /* We allow vector immediates as comparison operands that
6198 can be handled by the optimization above but not by the
6199 following code. Hence, force them into registers here. */
6200 if (!REG_P (cmp_op1))
6201 cmp_op1 = force_reg (target_mode, cmp_op1);
6202
6203 if (!REG_P (cmp_op2))
6204 cmp_op2 = force_reg (target_mode, cmp_op2);
6205
6206 s390_expand_vec_compare (result_target, cond,
6207 cmp_op1, cmp_op2);
6208
6209 /* If the results are supposed to be either -1 or 0 we are done
6210 since this is what our compare instructions generate anyway. */
6211 if (all_ones_operand (then, GET_MODE (then))
6212 && const0_operand (els, GET_MODE (els)))
6213 {
6214 emit_move_insn (target, gen_rtx_SUBREG (target_mode,
6215 result_target, 0));
6216 return;
6217 }
6218
6219 /* Otherwise we will do a vsel afterwards. */
6220 /* This gets triggered e.g.
6221 with gcc.c-torture/compile/pr53410-1.c */
6222 if (!REG_P (then))
6223 then = force_reg (target_mode, then);
6224
6225 if (!REG_P (els))
6226 els = force_reg (target_mode, els);
6227
6228 tmp = gen_rtx_fmt_ee (EQ, VOIDmode,
6229 result_target,
6230 CONST0_RTX (result_mode));
6231
6232 /* We compared the result against zero above so we have to swap then
6233 and els here. */
6234 tmp = gen_rtx_IF_THEN_ELSE (target_mode, tmp, els, then);
6235
6236 gcc_assert (target_mode == GET_MODE (then));
6237 emit_insn (gen_rtx_SET (target, tmp));
6238 }
6239
6240 /* Emit the RTX necessary to initialize the vector TARGET with values
6241 in VALS. */
6242 void
6243 s390_expand_vec_init (rtx target, rtx vals)
6244 {
6245 machine_mode mode = GET_MODE (target);
6246 machine_mode inner_mode = GET_MODE_INNER (mode);
6247 int n_elts = GET_MODE_NUNITS (mode);
6248 bool all_same = true, all_regs = true, all_const_int = true;
6249 rtx x;
6250 int i;
6251
6252 for (i = 0; i < n_elts; ++i)
6253 {
6254 x = XVECEXP (vals, 0, i);
6255
6256 if (!CONST_INT_P (x))
6257 all_const_int = false;
6258
6259 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6260 all_same = false;
6261
6262 if (!REG_P (x))
6263 all_regs = false;
6264 }
6265
6266 /* Use vector gen mask or vector gen byte mask if possible. */
6267 if (all_same && all_const_int
6268 && (XVECEXP (vals, 0, 0) == const0_rtx
6269 || s390_contiguous_bitmask_vector_p (XVECEXP (vals, 0, 0),
6270 NULL, NULL)
6271 || s390_bytemask_vector_p (XVECEXP (vals, 0, 0), NULL)))
6272 {
6273 emit_insn (gen_rtx_SET (target,
6274 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0))));
6275 return;
6276 }
6277
6278 if (all_same)
6279 {
6280 emit_insn (gen_rtx_SET (target,
6281 gen_rtx_VEC_DUPLICATE (mode,
6282 XVECEXP (vals, 0, 0))));
6283 return;
6284 }
6285
6286 if (all_regs && REG_P (target) && n_elts == 2 && inner_mode == DImode)
6287 {
6288 /* Use vector load pair. */
6289 emit_insn (gen_rtx_SET (target,
6290 gen_rtx_VEC_CONCAT (mode,
6291 XVECEXP (vals, 0, 0),
6292 XVECEXP (vals, 0, 1))));
6293 return;
6294 }
6295
6296 /* We are about to set the vector elements one by one. Zero out the
6297 full register first in order to help the data flow framework to
6298 detect it as full VR set. */
6299 emit_insn (gen_rtx_SET (target, CONST0_RTX (mode)));
6300
6301 /* Unfortunately the vec_init expander is not allowed to fail. So
6302 we have to implement the fallback ourselves. */
6303 for (i = 0; i < n_elts; i++)
6304 emit_insn (gen_rtx_SET (target,
6305 gen_rtx_UNSPEC (mode,
6306 gen_rtvec (3, XVECEXP (vals, 0, i),
6307 GEN_INT (i), target),
6308 UNSPEC_VEC_SET)));
6309 }
6310
6311 /* Structure to hold the initial parameters for a compare_and_swap operation
6312 in HImode and QImode. */
6313
6314 struct alignment_context
6315 {
6316 rtx memsi; /* SI aligned memory location. */
6317 rtx shift; /* Bit offset with regard to lsb. */
6318 rtx modemask; /* Mask of the HQImode shifted by SHIFT bits. */
6319 rtx modemaski; /* ~modemask */
6320 bool aligned; /* True if memory is aligned, false else. */
6321 };
6322
6323 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic to initialize
6324 structure AC for transparent simplifying, if the memory alignment is known
6325 to be at least 32bit. MEM is the memory location for the actual operation
6326 and MODE its mode. */
6327
6328 static void
6329 init_alignment_context (struct alignment_context *ac, rtx mem,
6330 machine_mode mode)
6331 {
6332 ac->shift = GEN_INT (GET_MODE_SIZE (SImode) - GET_MODE_SIZE (mode));
6333 ac->aligned = (MEM_ALIGN (mem) >= GET_MODE_BITSIZE (SImode));
6334
6335 if (ac->aligned)
6336 ac->memsi = adjust_address (mem, SImode, 0); /* Memory is aligned. */
6337 else
6338 {
6339 /* Alignment is unknown. */
6340 rtx byteoffset, addr, align;
6341
6342 /* Force the address into a register. */
6343 addr = force_reg (Pmode, XEXP (mem, 0));
6344
6345 /* Align it to SImode. */
6346 align = expand_simple_binop (Pmode, AND, addr,
6347 GEN_INT (-GET_MODE_SIZE (SImode)),
6348 NULL_RTX, 1, OPTAB_DIRECT);
6349 /* Generate MEM. */
6350 ac->memsi = gen_rtx_MEM (SImode, align);
6351 MEM_VOLATILE_P (ac->memsi) = MEM_VOLATILE_P (mem);
6352 set_mem_alias_set (ac->memsi, ALIAS_SET_MEMORY_BARRIER);
6353 set_mem_align (ac->memsi, GET_MODE_BITSIZE (SImode));
6354
6355 /* Calculate shiftcount. */
6356 byteoffset = expand_simple_binop (Pmode, AND, addr,
6357 GEN_INT (GET_MODE_SIZE (SImode) - 1),
6358 NULL_RTX, 1, OPTAB_DIRECT);
6359 /* As we already have some offset, evaluate the remaining distance. */
6360 ac->shift = expand_simple_binop (SImode, MINUS, ac->shift, byteoffset,
6361 NULL_RTX, 1, OPTAB_DIRECT);
6362 }
6363
6364 /* Shift is the byte count, but we need the bitcount. */
6365 ac->shift = expand_simple_binop (SImode, ASHIFT, ac->shift, GEN_INT (3),
6366 NULL_RTX, 1, OPTAB_DIRECT);
6367
6368 /* Calculate masks. */
6369 ac->modemask = expand_simple_binop (SImode, ASHIFT,
6370 GEN_INT (GET_MODE_MASK (mode)),
6371 ac->shift, NULL_RTX, 1, OPTAB_DIRECT);
6372 ac->modemaski = expand_simple_unop (SImode, NOT, ac->modemask,
6373 NULL_RTX, 1);
6374 }
6375
6376 /* A subroutine of s390_expand_cs_hqi. Insert INS into VAL. If possible,
6377 use a single insv insn into SEQ2. Otherwise, put prep insns in SEQ1 and
6378 perform the merge in SEQ2. */
6379
6380 static rtx
6381 s390_two_part_insv (struct alignment_context *ac, rtx *seq1, rtx *seq2,
6382 machine_mode mode, rtx val, rtx ins)
6383 {
6384 rtx tmp;
6385
6386 if (ac->aligned)
6387 {
6388 start_sequence ();
6389 tmp = copy_to_mode_reg (SImode, val);
6390 if (s390_expand_insv (tmp, GEN_INT (GET_MODE_BITSIZE (mode)),
6391 const0_rtx, ins))
6392 {
6393 *seq1 = NULL;
6394 *seq2 = get_insns ();
6395 end_sequence ();
6396 return tmp;
6397 }
6398 end_sequence ();
6399 }
6400
6401 /* Failed to use insv. Generate a two part shift and mask. */
6402 start_sequence ();
6403 tmp = s390_expand_mask_and_shift (ins, mode, ac->shift);
6404 *seq1 = get_insns ();
6405 end_sequence ();
6406
6407 start_sequence ();
6408 tmp = expand_simple_binop (SImode, IOR, tmp, val, NULL_RTX, 1, OPTAB_DIRECT);
6409 *seq2 = get_insns ();
6410 end_sequence ();
6411
6412 return tmp;
6413 }
6414
6415 /* Expand an atomic compare and swap operation for HImode and QImode. MEM is
6416 the memory location, CMP the old value to compare MEM with and NEW_RTX the
6417 value to set if CMP == MEM. */
6418
6419 void
6420 s390_expand_cs_hqi (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
6421 rtx cmp, rtx new_rtx, bool is_weak)
6422 {
6423 struct alignment_context ac;
6424 rtx cmpv, newv, val, cc, seq0, seq1, seq2, seq3;
6425 rtx res = gen_reg_rtx (SImode);
6426 rtx_code_label *csloop = NULL, *csend = NULL;
6427
6428 gcc_assert (MEM_P (mem));
6429
6430 init_alignment_context (&ac, mem, mode);
6431
6432 /* Load full word. Subsequent loads are performed by CS. */
6433 val = expand_simple_binop (SImode, AND, ac.memsi, ac.modemaski,
6434 NULL_RTX, 1, OPTAB_DIRECT);
6435
6436 /* Prepare insertions of cmp and new_rtx into the loaded value. When
6437 possible, we try to use insv to make this happen efficiently. If
6438 that fails we'll generate code both inside and outside the loop. */
6439 cmpv = s390_two_part_insv (&ac, &seq0, &seq2, mode, val, cmp);
6440 newv = s390_two_part_insv (&ac, &seq1, &seq3, mode, val, new_rtx);
6441
6442 if (seq0)
6443 emit_insn (seq0);
6444 if (seq1)
6445 emit_insn (seq1);
6446
6447 /* Start CS loop. */
6448 if (!is_weak)
6449 {
6450 /* Begin assuming success. */
6451 emit_move_insn (btarget, const1_rtx);
6452
6453 csloop = gen_label_rtx ();
6454 csend = gen_label_rtx ();
6455 emit_label (csloop);
6456 }
6457
6458 /* val = "<mem>00..0<mem>"
6459 * cmp = "00..0<cmp>00..0"
6460 * new = "00..0<new>00..0"
6461 */
6462
6463 emit_insn (seq2);
6464 emit_insn (seq3);
6465
6466 cc = s390_emit_compare_and_swap (EQ, res, ac.memsi, cmpv, newv);
6467 if (is_weak)
6468 emit_insn (gen_cstorecc4 (btarget, cc, XEXP (cc, 0), XEXP (cc, 1)));
6469 else
6470 {
6471 rtx tmp;
6472
6473 /* Jump to end if we're done (likely?). */
6474 s390_emit_jump (csend, cc);
6475
6476 /* Check for changes outside mode, and loop internal if so.
6477 Arrange the moves so that the compare is adjacent to the
6478 branch so that we can generate CRJ. */
6479 tmp = copy_to_reg (val);
6480 force_expand_binop (SImode, and_optab, res, ac.modemaski, val,
6481 1, OPTAB_DIRECT);
6482 cc = s390_emit_compare (NE, val, tmp);
6483 s390_emit_jump (csloop, cc);
6484
6485 /* Failed. */
6486 emit_move_insn (btarget, const0_rtx);
6487 emit_label (csend);
6488 }
6489
6490 /* Return the correct part of the bitfield. */
6491 convert_move (vtarget, expand_simple_binop (SImode, LSHIFTRT, res, ac.shift,
6492 NULL_RTX, 1, OPTAB_DIRECT), 1);
6493 }
6494
6495 /* Expand an atomic operation CODE of mode MODE. MEM is the memory location
6496 and VAL the value to play with. If AFTER is true then store the value
6497 MEM holds after the operation, if AFTER is false then store the value MEM
6498 holds before the operation. If TARGET is zero then discard that value, else
6499 store it to TARGET. */
6500
6501 void
6502 s390_expand_atomic (machine_mode mode, enum rtx_code code,
6503 rtx target, rtx mem, rtx val, bool after)
6504 {
6505 struct alignment_context ac;
6506 rtx cmp;
6507 rtx new_rtx = gen_reg_rtx (SImode);
6508 rtx orig = gen_reg_rtx (SImode);
6509 rtx_code_label *csloop = gen_label_rtx ();
6510
6511 gcc_assert (!target || register_operand (target, VOIDmode));
6512 gcc_assert (MEM_P (mem));
6513
6514 init_alignment_context (&ac, mem, mode);
6515
6516 /* Shift val to the correct bit positions.
6517 Preserve "icm", but prevent "ex icm". */
6518 if (!(ac.aligned && code == SET && MEM_P (val)))
6519 val = s390_expand_mask_and_shift (val, mode, ac.shift);
6520
6521 /* Further preparation insns. */
6522 if (code == PLUS || code == MINUS)
6523 emit_move_insn (orig, val);
6524 else if (code == MULT || code == AND) /* val = "11..1<val>11..1" */
6525 val = expand_simple_binop (SImode, XOR, val, ac.modemaski,
6526 NULL_RTX, 1, OPTAB_DIRECT);
6527
6528 /* Load full word. Subsequent loads are performed by CS. */
6529 cmp = force_reg (SImode, ac.memsi);
6530
6531 /* Start CS loop. */
6532 emit_label (csloop);
6533 emit_move_insn (new_rtx, cmp);
6534
6535 /* Patch new with val at correct position. */
6536 switch (code)
6537 {
6538 case PLUS:
6539 case MINUS:
6540 val = expand_simple_binop (SImode, code, new_rtx, orig,
6541 NULL_RTX, 1, OPTAB_DIRECT);
6542 val = expand_simple_binop (SImode, AND, val, ac.modemask,
6543 NULL_RTX, 1, OPTAB_DIRECT);
6544 /* FALLTHRU */
6545 case SET:
6546 if (ac.aligned && MEM_P (val))
6547 store_bit_field (new_rtx, GET_MODE_BITSIZE (mode), 0,
6548 0, 0, SImode, val, false);
6549 else
6550 {
6551 new_rtx = expand_simple_binop (SImode, AND, new_rtx, ac.modemaski,
6552 NULL_RTX, 1, OPTAB_DIRECT);
6553 new_rtx = expand_simple_binop (SImode, IOR, new_rtx, val,
6554 NULL_RTX, 1, OPTAB_DIRECT);
6555 }
6556 break;
6557 case AND:
6558 case IOR:
6559 case XOR:
6560 new_rtx = expand_simple_binop (SImode, code, new_rtx, val,
6561 NULL_RTX, 1, OPTAB_DIRECT);
6562 break;
6563 case MULT: /* NAND */
6564 new_rtx = expand_simple_binop (SImode, AND, new_rtx, val,
6565 NULL_RTX, 1, OPTAB_DIRECT);
6566 new_rtx = expand_simple_binop (SImode, XOR, new_rtx, ac.modemask,
6567 NULL_RTX, 1, OPTAB_DIRECT);
6568 break;
6569 default:
6570 gcc_unreachable ();
6571 }
6572
6573 s390_emit_jump (csloop, s390_emit_compare_and_swap (NE, cmp,
6574 ac.memsi, cmp, new_rtx));
6575
6576 /* Return the correct part of the bitfield. */
6577 if (target)
6578 convert_move (target, expand_simple_binop (SImode, LSHIFTRT,
6579 after ? new_rtx : cmp, ac.shift,
6580 NULL_RTX, 1, OPTAB_DIRECT), 1);
6581 }
6582
6583 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
6584 We need to emit DTP-relative relocations. */
6585
6586 static void s390_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
6587
6588 static void
6589 s390_output_dwarf_dtprel (FILE *file, int size, rtx x)
6590 {
6591 switch (size)
6592 {
6593 case 4:
6594 fputs ("\t.long\t", file);
6595 break;
6596 case 8:
6597 fputs ("\t.quad\t", file);
6598 break;
6599 default:
6600 gcc_unreachable ();
6601 }
6602 output_addr_const (file, x);
6603 fputs ("@DTPOFF", file);
6604 }
6605
6606 /* Return the proper mode for REGNO being represented in the dwarf
6607 unwind table. */
6608 machine_mode
6609 s390_dwarf_frame_reg_mode (int regno)
6610 {
6611 machine_mode save_mode = default_dwarf_frame_reg_mode (regno);
6612
6613 /* Make sure not to return DImode for any GPR with -m31 -mzarch. */
6614 if (GENERAL_REGNO_P (regno))
6615 save_mode = Pmode;
6616
6617 /* The rightmost 64 bits of vector registers are call-clobbered. */
6618 if (GET_MODE_SIZE (save_mode) > 8)
6619 save_mode = DImode;
6620
6621 return save_mode;
6622 }
6623
6624 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
6625 /* Implement TARGET_MANGLE_TYPE. */
6626
6627 static const char *
6628 s390_mangle_type (const_tree type)
6629 {
6630 type = TYPE_MAIN_VARIANT (type);
6631
6632 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
6633 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
6634 return NULL;
6635
6636 if (type == s390_builtin_types[BT_BV16QI]) return "U6__boolc";
6637 if (type == s390_builtin_types[BT_BV8HI]) return "U6__bools";
6638 if (type == s390_builtin_types[BT_BV4SI]) return "U6__booli";
6639 if (type == s390_builtin_types[BT_BV2DI]) return "U6__booll";
6640
6641 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
6642 && TARGET_LONG_DOUBLE_128)
6643 return "g";
6644
6645 /* For all other types, use normal C++ mangling. */
6646 return NULL;
6647 }
6648 #endif
6649
6650 /* In the name of slightly smaller debug output, and to cater to
6651 general assembler lossage, recognize various UNSPEC sequences
6652 and turn them back into a direct symbol reference. */
6653
6654 static rtx
6655 s390_delegitimize_address (rtx orig_x)
6656 {
6657 rtx x, y;
6658
6659 orig_x = delegitimize_mem_from_attrs (orig_x);
6660 x = orig_x;
6661
6662 /* Extract the symbol ref from:
6663 (plus:SI (reg:SI 12 %r12)
6664 (const:SI (unspec:SI [(symbol_ref/f:SI ("*.LC0"))]
6665 UNSPEC_GOTOFF/PLTOFF)))
6666 and
6667 (plus:SI (reg:SI 12 %r12)
6668 (const:SI (plus:SI (unspec:SI [(symbol_ref:SI ("L"))]
6669 UNSPEC_GOTOFF/PLTOFF)
6670 (const_int 4 [0x4])))) */
6671 if (GET_CODE (x) == PLUS
6672 && REG_P (XEXP (x, 0))
6673 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
6674 && GET_CODE (XEXP (x, 1)) == CONST)
6675 {
6676 HOST_WIDE_INT offset = 0;
6677
6678 /* The const operand. */
6679 y = XEXP (XEXP (x, 1), 0);
6680
6681 if (GET_CODE (y) == PLUS
6682 && GET_CODE (XEXP (y, 1)) == CONST_INT)
6683 {
6684 offset = INTVAL (XEXP (y, 1));
6685 y = XEXP (y, 0);
6686 }
6687
6688 if (GET_CODE (y) == UNSPEC
6689 && (XINT (y, 1) == UNSPEC_GOTOFF
6690 || XINT (y, 1) == UNSPEC_PLTOFF))
6691 return plus_constant (Pmode, XVECEXP (y, 0, 0), offset);
6692 }
6693
6694 if (GET_CODE (x) != MEM)
6695 return orig_x;
6696
6697 x = XEXP (x, 0);
6698 if (GET_CODE (x) == PLUS
6699 && GET_CODE (XEXP (x, 1)) == CONST
6700 && GET_CODE (XEXP (x, 0)) == REG
6701 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6702 {
6703 y = XEXP (XEXP (x, 1), 0);
6704 if (GET_CODE (y) == UNSPEC
6705 && XINT (y, 1) == UNSPEC_GOT)
6706 y = XVECEXP (y, 0, 0);
6707 else
6708 return orig_x;
6709 }
6710 else if (GET_CODE (x) == CONST)
6711 {
6712 /* Extract the symbol ref from:
6713 (mem:QI (const:DI (unspec:DI [(symbol_ref:DI ("foo"))]
6714 UNSPEC_PLT/GOTENT))) */
6715
6716 y = XEXP (x, 0);
6717 if (GET_CODE (y) == UNSPEC
6718 && (XINT (y, 1) == UNSPEC_GOTENT
6719 || XINT (y, 1) == UNSPEC_PLT))
6720 y = XVECEXP (y, 0, 0);
6721 else
6722 return orig_x;
6723 }
6724 else
6725 return orig_x;
6726
6727 if (GET_MODE (orig_x) != Pmode)
6728 {
6729 if (GET_MODE (orig_x) == BLKmode)
6730 return orig_x;
6731 y = lowpart_subreg (GET_MODE (orig_x), y, Pmode);
6732 if (y == NULL_RTX)
6733 return orig_x;
6734 }
6735 return y;
6736 }
6737
6738 /* Output operand OP to stdio stream FILE.
6739 OP is an address (register + offset) which is not used to address data;
6740 instead the rightmost bits are interpreted as the value. */
6741
6742 static void
6743 print_shift_count_operand (FILE *file, rtx op)
6744 {
6745 HOST_WIDE_INT offset;
6746 rtx base;
6747
6748 /* Extract base register and offset. */
6749 if (!s390_decompose_shift_count (op, &base, &offset))
6750 gcc_unreachable ();
6751
6752 /* Sanity check. */
6753 if (base)
6754 {
6755 gcc_assert (GET_CODE (base) == REG);
6756 gcc_assert (REGNO (base) < FIRST_PSEUDO_REGISTER);
6757 gcc_assert (REGNO_REG_CLASS (REGNO (base)) == ADDR_REGS);
6758 }
6759
6760 /* Offsets are constricted to twelve bits. */
6761 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset & ((1 << 12) - 1));
6762 if (base)
6763 fprintf (file, "(%s)", reg_names[REGNO (base)]);
6764 }
6765
6766 /* Assigns the number of NOP halfwords to be emitted before and after the
6767 function label to *HW_BEFORE and *HW_AFTER. Both pointers must not be NULL.
6768 If hotpatching is disabled for the function, the values are set to zero.
6769 */
6770
6771 static void
6772 s390_function_num_hotpatch_hw (tree decl,
6773 int *hw_before,
6774 int *hw_after)
6775 {
6776 tree attr;
6777
6778 attr = lookup_attribute ("hotpatch", DECL_ATTRIBUTES (decl));
6779
6780 /* Handle the arguments of the hotpatch attribute. The values
6781 specified via attribute might override the cmdline argument
6782 values. */
6783 if (attr)
6784 {
6785 tree args = TREE_VALUE (attr);
6786
6787 *hw_before = TREE_INT_CST_LOW (TREE_VALUE (args));
6788 *hw_after = TREE_INT_CST_LOW (TREE_VALUE (TREE_CHAIN (args)));
6789 }
6790 else
6791 {
6792 /* Use the values specified by the cmdline arguments. */
6793 *hw_before = s390_hotpatch_hw_before_label;
6794 *hw_after = s390_hotpatch_hw_after_label;
6795 }
6796 }
6797
6798 /* Write the current .machine and .machinemode specification to the assembler
6799 file. */
6800
6801 #ifdef HAVE_AS_MACHINE_MACHINEMODE
6802 static void
6803 s390_asm_output_machine_for_arch (FILE *asm_out_file)
6804 {
6805 fprintf (asm_out_file, "\t.machinemode %s\n",
6806 (TARGET_ZARCH) ? "zarch" : "esa");
6807 fprintf (asm_out_file, "\t.machine \"%s", processor_table[s390_arch].name);
6808 if (S390_USE_ARCHITECTURE_MODIFIERS)
6809 {
6810 int cpu_flags;
6811
6812 cpu_flags = processor_flags_table[(int) s390_arch];
6813 if (TARGET_HTM && !(cpu_flags & PF_TX))
6814 fprintf (asm_out_file, "+htm");
6815 else if (!TARGET_HTM && (cpu_flags & PF_TX))
6816 fprintf (asm_out_file, "+nohtm");
6817 if (TARGET_VX && !(cpu_flags & PF_VX))
6818 fprintf (asm_out_file, "+vx");
6819 else if (!TARGET_VX && (cpu_flags & PF_VX))
6820 fprintf (asm_out_file, "+novx");
6821 }
6822 fprintf (asm_out_file, "\"\n");
6823 }
6824
6825 /* Write an extra function header before the very start of the function. */
6826
6827 void
6828 s390_asm_output_function_prefix (FILE *asm_out_file,
6829 const char *fnname ATTRIBUTE_UNUSED)
6830 {
6831 if (DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl) == NULL)
6832 return;
6833 /* Since only the function specific options are saved but not the indications
6834 which options are set, it's too much work here to figure out which options
6835 have actually changed. Thus, generate .machine and .machinemode whenever a
6836 function has the target attribute or pragma. */
6837 fprintf (asm_out_file, "\t.machinemode push\n");
6838 fprintf (asm_out_file, "\t.machine push\n");
6839 s390_asm_output_machine_for_arch (asm_out_file);
6840 }
6841
6842 /* Write an extra function footer after the very end of the function. */
6843
6844 void
6845 s390_asm_declare_function_size (FILE *asm_out_file,
6846 const char *fnname, tree decl)
6847 {
6848 if (!flag_inhibit_size_directive)
6849 ASM_OUTPUT_MEASURED_SIZE (asm_out_file, fnname);
6850 if (DECL_FUNCTION_SPECIFIC_TARGET (decl) == NULL)
6851 return;
6852 fprintf (asm_out_file, "\t.machine pop\n");
6853 fprintf (asm_out_file, "\t.machinemode pop\n");
6854 }
6855 #endif
6856
6857 /* Write the extra assembler code needed to declare a function properly. */
6858
6859 void
6860 s390_asm_output_function_label (FILE *asm_out_file, const char *fname,
6861 tree decl)
6862 {
6863 int hw_before, hw_after;
6864
6865 s390_function_num_hotpatch_hw (decl, &hw_before, &hw_after);
6866 if (hw_before > 0)
6867 {
6868 unsigned int function_alignment;
6869 int i;
6870
6871 /* Add a trampoline code area before the function label and initialize it
6872 with two-byte nop instructions. This area can be overwritten with code
6873 that jumps to a patched version of the function. */
6874 asm_fprintf (asm_out_file, "\tnopr\t%%r7"
6875 "\t# pre-label NOPs for hotpatch (%d halfwords)\n",
6876 hw_before);
6877 for (i = 1; i < hw_before; i++)
6878 fputs ("\tnopr\t%r7\n", asm_out_file);
6879
6880 /* Note: The function label must be aligned so that (a) the bytes of the
6881 following nop do not cross a cacheline boundary, and (b) a jump address
6882 (eight bytes for 64 bit targets, 4 bytes for 32 bit targets) can be
6883 stored directly before the label without crossing a cacheline
6884 boundary. All this is necessary to make sure the trampoline code can
6885 be changed atomically.
6886 This alignment is done automatically using the FOUNCTION_BOUNDARY, but
6887 if there are NOPs before the function label, the alignment is placed
6888 before them. So it is necessary to duplicate the alignment after the
6889 NOPs. */
6890 function_alignment = MAX (8, DECL_ALIGN (decl) / BITS_PER_UNIT);
6891 if (! DECL_USER_ALIGN (decl))
6892 function_alignment = MAX (function_alignment,
6893 (unsigned int) align_functions);
6894 fputs ("\t# alignment for hotpatch\n", asm_out_file);
6895 ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (function_alignment));
6896 }
6897
6898 if (S390_USE_TARGET_ATTRIBUTE && TARGET_DEBUG_ARG)
6899 {
6900 asm_fprintf (asm_out_file, "\t# fn:%s ar%d\n", fname, s390_arch);
6901 asm_fprintf (asm_out_file, "\t# fn:%s tu%d\n", fname, s390_tune);
6902 asm_fprintf (asm_out_file, "\t# fn:%s sg%d\n", fname, s390_stack_guard);
6903 asm_fprintf (asm_out_file, "\t# fn:%s ss%d\n", fname, s390_stack_size);
6904 asm_fprintf (asm_out_file, "\t# fn:%s bc%d\n", fname, s390_branch_cost);
6905 asm_fprintf (asm_out_file, "\t# fn:%s wf%d\n", fname,
6906 s390_warn_framesize);
6907 asm_fprintf (asm_out_file, "\t# fn:%s ba%d\n", fname, TARGET_BACKCHAIN);
6908 asm_fprintf (asm_out_file, "\t# fn:%s hd%d\n", fname, TARGET_HARD_DFP);
6909 asm_fprintf (asm_out_file, "\t# fn:%s hf%d\n", fname, !TARGET_SOFT_FLOAT);
6910 asm_fprintf (asm_out_file, "\t# fn:%s ht%d\n", fname, TARGET_OPT_HTM);
6911 asm_fprintf (asm_out_file, "\t# fn:%s vx%d\n", fname, TARGET_OPT_VX);
6912 asm_fprintf (asm_out_file, "\t# fn:%s ps%d\n", fname,
6913 TARGET_PACKED_STACK);
6914 asm_fprintf (asm_out_file, "\t# fn:%s se%d\n", fname, TARGET_SMALL_EXEC);
6915 asm_fprintf (asm_out_file, "\t# fn:%s mv%d\n", fname, TARGET_MVCLE);
6916 asm_fprintf (asm_out_file, "\t# fn:%s zv%d\n", fname, TARGET_ZVECTOR);
6917 asm_fprintf (asm_out_file, "\t# fn:%s wd%d\n", fname,
6918 s390_warn_dynamicstack_p);
6919 }
6920 ASM_OUTPUT_LABEL (asm_out_file, fname);
6921 if (hw_after > 0)
6922 asm_fprintf (asm_out_file,
6923 "\t# post-label NOPs for hotpatch (%d halfwords)\n",
6924 hw_after);
6925 }
6926
6927 /* Output machine-dependent UNSPECs occurring in address constant X
6928 in assembler syntax to stdio stream FILE. Returns true if the
6929 constant X could be recognized, false otherwise. */
6930
6931 static bool
6932 s390_output_addr_const_extra (FILE *file, rtx x)
6933 {
6934 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 1)
6935 switch (XINT (x, 1))
6936 {
6937 case UNSPEC_GOTENT:
6938 output_addr_const (file, XVECEXP (x, 0, 0));
6939 fprintf (file, "@GOTENT");
6940 return true;
6941 case UNSPEC_GOT:
6942 output_addr_const (file, XVECEXP (x, 0, 0));
6943 fprintf (file, "@GOT");
6944 return true;
6945 case UNSPEC_GOTOFF:
6946 output_addr_const (file, XVECEXP (x, 0, 0));
6947 fprintf (file, "@GOTOFF");
6948 return true;
6949 case UNSPEC_PLT:
6950 output_addr_const (file, XVECEXP (x, 0, 0));
6951 fprintf (file, "@PLT");
6952 return true;
6953 case UNSPEC_PLTOFF:
6954 output_addr_const (file, XVECEXP (x, 0, 0));
6955 fprintf (file, "@PLTOFF");
6956 return true;
6957 case UNSPEC_TLSGD:
6958 output_addr_const (file, XVECEXP (x, 0, 0));
6959 fprintf (file, "@TLSGD");
6960 return true;
6961 case UNSPEC_TLSLDM:
6962 assemble_name (file, get_some_local_dynamic_name ());
6963 fprintf (file, "@TLSLDM");
6964 return true;
6965 case UNSPEC_DTPOFF:
6966 output_addr_const (file, XVECEXP (x, 0, 0));
6967 fprintf (file, "@DTPOFF");
6968 return true;
6969 case UNSPEC_NTPOFF:
6970 output_addr_const (file, XVECEXP (x, 0, 0));
6971 fprintf (file, "@NTPOFF");
6972 return true;
6973 case UNSPEC_GOTNTPOFF:
6974 output_addr_const (file, XVECEXP (x, 0, 0));
6975 fprintf (file, "@GOTNTPOFF");
6976 return true;
6977 case UNSPEC_INDNTPOFF:
6978 output_addr_const (file, XVECEXP (x, 0, 0));
6979 fprintf (file, "@INDNTPOFF");
6980 return true;
6981 }
6982
6983 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 2)
6984 switch (XINT (x, 1))
6985 {
6986 case UNSPEC_POOL_OFFSET:
6987 x = gen_rtx_MINUS (GET_MODE (x), XVECEXP (x, 0, 0), XVECEXP (x, 0, 1));
6988 output_addr_const (file, x);
6989 return true;
6990 }
6991 return false;
6992 }
6993
6994 /* Output address operand ADDR in assembler syntax to
6995 stdio stream FILE. */
6996
6997 void
6998 print_operand_address (FILE *file, rtx addr)
6999 {
7000 struct s390_address ad;
7001
7002 if (s390_loadrelative_operand_p (addr, NULL, NULL))
7003 {
7004 if (!TARGET_Z10)
7005 {
7006 output_operand_lossage ("symbolic memory references are "
7007 "only supported on z10 or later");
7008 return;
7009 }
7010 output_addr_const (file, addr);
7011 return;
7012 }
7013
7014 if (!s390_decompose_address (addr, &ad)
7015 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7016 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
7017 output_operand_lossage ("cannot decompose address");
7018
7019 if (ad.disp)
7020 output_addr_const (file, ad.disp);
7021 else
7022 fprintf (file, "0");
7023
7024 if (ad.base && ad.indx)
7025 fprintf (file, "(%s,%s)", reg_names[REGNO (ad.indx)],
7026 reg_names[REGNO (ad.base)]);
7027 else if (ad.base)
7028 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
7029 }
7030
7031 /* Output operand X in assembler syntax to stdio stream FILE.
7032 CODE specified the format flag. The following format flags
7033 are recognized:
7034
7035 'C': print opcode suffix for branch condition.
7036 'D': print opcode suffix for inverse branch condition.
7037 'E': print opcode suffix for branch on index instruction.
7038 'G': print the size of the operand in bytes.
7039 'J': print tls_load/tls_gdcall/tls_ldcall suffix
7040 'M': print the second word of a TImode operand.
7041 'N': print the second word of a DImode operand.
7042 'O': print only the displacement of a memory reference or address.
7043 'R': print only the base register of a memory reference or address.
7044 'S': print S-type memory reference (base+displacement).
7045 'Y': print shift count operand.
7046
7047 'b': print integer X as if it's an unsigned byte.
7048 'c': print integer X as if it's an signed byte.
7049 'e': "end" contiguous bitmask X in either DImode or vector inner mode.
7050 'f': "end" contiguous bitmask X in SImode.
7051 'h': print integer X as if it's a signed halfword.
7052 'i': print the first nonzero HImode part of X.
7053 'j': print the first HImode part unequal to -1 of X.
7054 'k': print the first nonzero SImode part of X.
7055 'm': print the first SImode part unequal to -1 of X.
7056 'o': print integer X as if it's an unsigned 32bit word.
7057 's': "start" of contiguous bitmask X in either DImode or vector inner mode.
7058 't': CONST_INT: "start" of contiguous bitmask X in SImode.
7059 CONST_VECTOR: Generate a bitmask for vgbm instruction.
7060 'x': print integer X as if it's an unsigned halfword.
7061 'v': print register number as vector register (v1 instead of f1).
7062 */
7063
7064 void
7065 print_operand (FILE *file, rtx x, int code)
7066 {
7067 HOST_WIDE_INT ival;
7068
7069 switch (code)
7070 {
7071 case 'C':
7072 fprintf (file, s390_branch_condition_mnemonic (x, FALSE));
7073 return;
7074
7075 case 'D':
7076 fprintf (file, s390_branch_condition_mnemonic (x, TRUE));
7077 return;
7078
7079 case 'E':
7080 if (GET_CODE (x) == LE)
7081 fprintf (file, "l");
7082 else if (GET_CODE (x) == GT)
7083 fprintf (file, "h");
7084 else
7085 output_operand_lossage ("invalid comparison operator "
7086 "for 'E' output modifier");
7087 return;
7088
7089 case 'J':
7090 if (GET_CODE (x) == SYMBOL_REF)
7091 {
7092 fprintf (file, "%s", ":tls_load:");
7093 output_addr_const (file, x);
7094 }
7095 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
7096 {
7097 fprintf (file, "%s", ":tls_gdcall:");
7098 output_addr_const (file, XVECEXP (x, 0, 0));
7099 }
7100 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM)
7101 {
7102 fprintf (file, "%s", ":tls_ldcall:");
7103 const char *name = get_some_local_dynamic_name ();
7104 gcc_assert (name);
7105 assemble_name (file, name);
7106 }
7107 else
7108 output_operand_lossage ("invalid reference for 'J' output modifier");
7109 return;
7110
7111 case 'G':
7112 fprintf (file, "%u", GET_MODE_SIZE (GET_MODE (x)));
7113 return;
7114
7115 case 'O':
7116 {
7117 struct s390_address ad;
7118 int ret;
7119
7120 ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
7121
7122 if (!ret
7123 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7124 || ad.indx)
7125 {
7126 output_operand_lossage ("invalid address for 'O' output modifier");
7127 return;
7128 }
7129
7130 if (ad.disp)
7131 output_addr_const (file, ad.disp);
7132 else
7133 fprintf (file, "0");
7134 }
7135 return;
7136
7137 case 'R':
7138 {
7139 struct s390_address ad;
7140 int ret;
7141
7142 ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
7143
7144 if (!ret
7145 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7146 || ad.indx)
7147 {
7148 output_operand_lossage ("invalid address for 'R' output modifier");
7149 return;
7150 }
7151
7152 if (ad.base)
7153 fprintf (file, "%s", reg_names[REGNO (ad.base)]);
7154 else
7155 fprintf (file, "0");
7156 }
7157 return;
7158
7159 case 'S':
7160 {
7161 struct s390_address ad;
7162 int ret;
7163
7164 if (!MEM_P (x))
7165 {
7166 output_operand_lossage ("memory reference expected for "
7167 "'S' output modifier");
7168 return;
7169 }
7170 ret = s390_decompose_address (XEXP (x, 0), &ad);
7171
7172 if (!ret
7173 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7174 || ad.indx)
7175 {
7176 output_operand_lossage ("invalid address for 'S' output modifier");
7177 return;
7178 }
7179
7180 if (ad.disp)
7181 output_addr_const (file, ad.disp);
7182 else
7183 fprintf (file, "0");
7184
7185 if (ad.base)
7186 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
7187 }
7188 return;
7189
7190 case 'N':
7191 if (GET_CODE (x) == REG)
7192 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
7193 else if (GET_CODE (x) == MEM)
7194 x = change_address (x, VOIDmode,
7195 plus_constant (Pmode, XEXP (x, 0), 4));
7196 else
7197 output_operand_lossage ("register or memory expression expected "
7198 "for 'N' output modifier");
7199 break;
7200
7201 case 'M':
7202 if (GET_CODE (x) == REG)
7203 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
7204 else if (GET_CODE (x) == MEM)
7205 x = change_address (x, VOIDmode,
7206 plus_constant (Pmode, XEXP (x, 0), 8));
7207 else
7208 output_operand_lossage ("register or memory expression expected "
7209 "for 'M' output modifier");
7210 break;
7211
7212 case 'Y':
7213 print_shift_count_operand (file, x);
7214 return;
7215 }
7216
7217 switch (GET_CODE (x))
7218 {
7219 case REG:
7220 /* Print FP regs as fx instead of vx when they are accessed
7221 through non-vector mode. */
7222 if (code == 'v'
7223 || VECTOR_NOFP_REG_P (x)
7224 || (FP_REG_P (x) && VECTOR_MODE_P (GET_MODE (x)))
7225 || (VECTOR_REG_P (x)
7226 && (GET_MODE_SIZE (GET_MODE (x)) /
7227 s390_class_max_nregs (FP_REGS, GET_MODE (x))) > 8))
7228 fprintf (file, "%%v%s", reg_names[REGNO (x)] + 2);
7229 else
7230 fprintf (file, "%s", reg_names[REGNO (x)]);
7231 break;
7232
7233 case MEM:
7234 output_address (GET_MODE (x), XEXP (x, 0));
7235 break;
7236
7237 case CONST:
7238 case CODE_LABEL:
7239 case LABEL_REF:
7240 case SYMBOL_REF:
7241 output_addr_const (file, x);
7242 break;
7243
7244 case CONST_INT:
7245 ival = INTVAL (x);
7246 switch (code)
7247 {
7248 case 0:
7249 break;
7250 case 'b':
7251 ival &= 0xff;
7252 break;
7253 case 'c':
7254 ival = ((ival & 0xff) ^ 0x80) - 0x80;
7255 break;
7256 case 'x':
7257 ival &= 0xffff;
7258 break;
7259 case 'h':
7260 ival = ((ival & 0xffff) ^ 0x8000) - 0x8000;
7261 break;
7262 case 'i':
7263 ival = s390_extract_part (x, HImode, 0);
7264 break;
7265 case 'j':
7266 ival = s390_extract_part (x, HImode, -1);
7267 break;
7268 case 'k':
7269 ival = s390_extract_part (x, SImode, 0);
7270 break;
7271 case 'm':
7272 ival = s390_extract_part (x, SImode, -1);
7273 break;
7274 case 'o':
7275 ival &= 0xffffffff;
7276 break;
7277 case 'e': case 'f':
7278 case 's': case 't':
7279 {
7280 int pos, len;
7281 bool ok;
7282
7283 len = (code == 's' || code == 'e' ? 64 : 32);
7284 ok = s390_contiguous_bitmask_p (ival, len, &pos, &len);
7285 gcc_assert (ok);
7286 if (code == 's' || code == 't')
7287 ival = 64 - pos - len;
7288 else
7289 ival = 64 - 1 - pos;
7290 }
7291 break;
7292 default:
7293 output_operand_lossage ("invalid constant for output modifier '%c'", code);
7294 }
7295 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
7296 break;
7297
7298 case CONST_WIDE_INT:
7299 if (code == 'b')
7300 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
7301 CONST_WIDE_INT_ELT (x, 0) & 0xff);
7302 else if (code == 'x')
7303 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
7304 CONST_WIDE_INT_ELT (x, 0) & 0xffff);
7305 else if (code == 'h')
7306 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
7307 ((CONST_WIDE_INT_ELT (x, 0) & 0xffff) ^ 0x8000) - 0x8000);
7308 else
7309 {
7310 if (code == 0)
7311 output_operand_lossage ("invalid constant - try using "
7312 "an output modifier");
7313 else
7314 output_operand_lossage ("invalid constant for output modifier '%c'",
7315 code);
7316 }
7317 break;
7318 case CONST_VECTOR:
7319 switch (code)
7320 {
7321 case 'h':
7322 gcc_assert (const_vec_duplicate_p (x));
7323 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
7324 ((INTVAL (XVECEXP (x, 0, 0)) & 0xffff) ^ 0x8000) - 0x8000);
7325 break;
7326 case 'e':
7327 case 's':
7328 {
7329 int start, stop, inner_len;
7330 bool ok;
7331
7332 inner_len = GET_MODE_UNIT_BITSIZE (GET_MODE (x));
7333 ok = s390_contiguous_bitmask_vector_p (x, &start, &stop);
7334 gcc_assert (ok);
7335 if (code == 's' || code == 't')
7336 ival = inner_len - stop - 1;
7337 else
7338 ival = inner_len - start - 1;
7339 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
7340 }
7341 break;
7342 case 't':
7343 {
7344 unsigned mask;
7345 bool ok = s390_bytemask_vector_p (x, &mask);
7346 gcc_assert (ok);
7347 fprintf (file, "%u", mask);
7348 }
7349 break;
7350
7351 default:
7352 output_operand_lossage ("invalid constant vector for output "
7353 "modifier '%c'", code);
7354 }
7355 break;
7356
7357 default:
7358 if (code == 0)
7359 output_operand_lossage ("invalid expression - try using "
7360 "an output modifier");
7361 else
7362 output_operand_lossage ("invalid expression for output "
7363 "modifier '%c'", code);
7364 break;
7365 }
7366 }
7367
7368 /* Target hook for assembling integer objects. We need to define it
7369 here to work a round a bug in some versions of GAS, which couldn't
7370 handle values smaller than INT_MIN when printed in decimal. */
7371
7372 static bool
7373 s390_assemble_integer (rtx x, unsigned int size, int aligned_p)
7374 {
7375 if (size == 8 && aligned_p
7376 && GET_CODE (x) == CONST_INT && INTVAL (x) < INT_MIN)
7377 {
7378 fprintf (asm_out_file, "\t.quad\t" HOST_WIDE_INT_PRINT_HEX "\n",
7379 INTVAL (x));
7380 return true;
7381 }
7382 return default_assemble_integer (x, size, aligned_p);
7383 }
7384
7385 /* Returns true if register REGNO is used for forming
7386 a memory address in expression X. */
7387
7388 static bool
7389 reg_used_in_mem_p (int regno, rtx x)
7390 {
7391 enum rtx_code code = GET_CODE (x);
7392 int i, j;
7393 const char *fmt;
7394
7395 if (code == MEM)
7396 {
7397 if (refers_to_regno_p (regno, XEXP (x, 0)))
7398 return true;
7399 }
7400 else if (code == SET
7401 && GET_CODE (SET_DEST (x)) == PC)
7402 {
7403 if (refers_to_regno_p (regno, SET_SRC (x)))
7404 return true;
7405 }
7406
7407 fmt = GET_RTX_FORMAT (code);
7408 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
7409 {
7410 if (fmt[i] == 'e'
7411 && reg_used_in_mem_p (regno, XEXP (x, i)))
7412 return true;
7413
7414 else if (fmt[i] == 'E')
7415 for (j = 0; j < XVECLEN (x, i); j++)
7416 if (reg_used_in_mem_p (regno, XVECEXP (x, i, j)))
7417 return true;
7418 }
7419 return false;
7420 }
7421
7422 /* Returns true if expression DEP_RTX sets an address register
7423 used by instruction INSN to address memory. */
7424
7425 static bool
7426 addr_generation_dependency_p (rtx dep_rtx, rtx_insn *insn)
7427 {
7428 rtx target, pat;
7429
7430 if (NONJUMP_INSN_P (dep_rtx))
7431 dep_rtx = PATTERN (dep_rtx);
7432
7433 if (GET_CODE (dep_rtx) == SET)
7434 {
7435 target = SET_DEST (dep_rtx);
7436 if (GET_CODE (target) == STRICT_LOW_PART)
7437 target = XEXP (target, 0);
7438 while (GET_CODE (target) == SUBREG)
7439 target = SUBREG_REG (target);
7440
7441 if (GET_CODE (target) == REG)
7442 {
7443 int regno = REGNO (target);
7444
7445 if (s390_safe_attr_type (insn) == TYPE_LA)
7446 {
7447 pat = PATTERN (insn);
7448 if (GET_CODE (pat) == PARALLEL)
7449 {
7450 gcc_assert (XVECLEN (pat, 0) == 2);
7451 pat = XVECEXP (pat, 0, 0);
7452 }
7453 gcc_assert (GET_CODE (pat) == SET);
7454 return refers_to_regno_p (regno, SET_SRC (pat));
7455 }
7456 else if (get_attr_atype (insn) == ATYPE_AGEN)
7457 return reg_used_in_mem_p (regno, PATTERN (insn));
7458 }
7459 }
7460 return false;
7461 }
7462
7463 /* Return 1, if dep_insn sets register used in insn in the agen unit. */
7464
7465 int
7466 s390_agen_dep_p (rtx_insn *dep_insn, rtx_insn *insn)
7467 {
7468 rtx dep_rtx = PATTERN (dep_insn);
7469 int i;
7470
7471 if (GET_CODE (dep_rtx) == SET
7472 && addr_generation_dependency_p (dep_rtx, insn))
7473 return 1;
7474 else if (GET_CODE (dep_rtx) == PARALLEL)
7475 {
7476 for (i = 0; i < XVECLEN (dep_rtx, 0); i++)
7477 {
7478 if (addr_generation_dependency_p (XVECEXP (dep_rtx, 0, i), insn))
7479 return 1;
7480 }
7481 }
7482 return 0;
7483 }
7484
7485
7486 /* A C statement (sans semicolon) to update the integer scheduling priority
7487 INSN_PRIORITY (INSN). Increase the priority to execute the INSN earlier,
7488 reduce the priority to execute INSN later. Do not define this macro if
7489 you do not need to adjust the scheduling priorities of insns.
7490
7491 A STD instruction should be scheduled earlier,
7492 in order to use the bypass. */
7493 static int
7494 s390_adjust_priority (rtx_insn *insn, int priority)
7495 {
7496 if (! INSN_P (insn))
7497 return priority;
7498
7499 if (s390_tune <= PROCESSOR_2064_Z900)
7500 return priority;
7501
7502 switch (s390_safe_attr_type (insn))
7503 {
7504 case TYPE_FSTOREDF:
7505 case TYPE_FSTORESF:
7506 priority = priority << 3;
7507 break;
7508 case TYPE_STORE:
7509 case TYPE_STM:
7510 priority = priority << 1;
7511 break;
7512 default:
7513 break;
7514 }
7515 return priority;
7516 }
7517
7518
7519 /* The number of instructions that can be issued per cycle. */
7520
7521 static int
7522 s390_issue_rate (void)
7523 {
7524 switch (s390_tune)
7525 {
7526 case PROCESSOR_2084_Z990:
7527 case PROCESSOR_2094_Z9_109:
7528 case PROCESSOR_2094_Z9_EC:
7529 case PROCESSOR_2817_Z196:
7530 return 3;
7531 case PROCESSOR_2097_Z10:
7532 return 2;
7533 case PROCESSOR_9672_G5:
7534 case PROCESSOR_9672_G6:
7535 case PROCESSOR_2064_Z900:
7536 /* Starting with EC12 we use the sched_reorder hook to take care
7537 of instruction dispatch constraints. The algorithm only
7538 picks the best instruction and assumes only a single
7539 instruction gets issued per cycle. */
7540 case PROCESSOR_2827_ZEC12:
7541 case PROCESSOR_2964_Z13:
7542 default:
7543 return 1;
7544 }
7545 }
7546
7547 static int
7548 s390_first_cycle_multipass_dfa_lookahead (void)
7549 {
7550 return 4;
7551 }
7552
7553 /* Annotate every literal pool reference in X by an UNSPEC_LTREF expression.
7554 Fix up MEMs as required. */
7555
7556 static void
7557 annotate_constant_pool_refs (rtx *x)
7558 {
7559 int i, j;
7560 const char *fmt;
7561
7562 gcc_assert (GET_CODE (*x) != SYMBOL_REF
7563 || !CONSTANT_POOL_ADDRESS_P (*x));
7564
7565 /* Literal pool references can only occur inside a MEM ... */
7566 if (GET_CODE (*x) == MEM)
7567 {
7568 rtx memref = XEXP (*x, 0);
7569
7570 if (GET_CODE (memref) == SYMBOL_REF
7571 && CONSTANT_POOL_ADDRESS_P (memref))
7572 {
7573 rtx base = cfun->machine->base_reg;
7574 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, memref, base),
7575 UNSPEC_LTREF);
7576
7577 *x = replace_equiv_address (*x, addr);
7578 return;
7579 }
7580
7581 if (GET_CODE (memref) == CONST
7582 && GET_CODE (XEXP (memref, 0)) == PLUS
7583 && GET_CODE (XEXP (XEXP (memref, 0), 1)) == CONST_INT
7584 && GET_CODE (XEXP (XEXP (memref, 0), 0)) == SYMBOL_REF
7585 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (memref, 0), 0)))
7586 {
7587 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (memref, 0), 1));
7588 rtx sym = XEXP (XEXP (memref, 0), 0);
7589 rtx base = cfun->machine->base_reg;
7590 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
7591 UNSPEC_LTREF);
7592
7593 *x = replace_equiv_address (*x, plus_constant (Pmode, addr, off));
7594 return;
7595 }
7596 }
7597
7598 /* ... or a load-address type pattern. */
7599 if (GET_CODE (*x) == SET)
7600 {
7601 rtx addrref = SET_SRC (*x);
7602
7603 if (GET_CODE (addrref) == SYMBOL_REF
7604 && CONSTANT_POOL_ADDRESS_P (addrref))
7605 {
7606 rtx base = cfun->machine->base_reg;
7607 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addrref, base),
7608 UNSPEC_LTREF);
7609
7610 SET_SRC (*x) = addr;
7611 return;
7612 }
7613
7614 if (GET_CODE (addrref) == CONST
7615 && GET_CODE (XEXP (addrref, 0)) == PLUS
7616 && GET_CODE (XEXP (XEXP (addrref, 0), 1)) == CONST_INT
7617 && GET_CODE (XEXP (XEXP (addrref, 0), 0)) == SYMBOL_REF
7618 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (addrref, 0), 0)))
7619 {
7620 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (addrref, 0), 1));
7621 rtx sym = XEXP (XEXP (addrref, 0), 0);
7622 rtx base = cfun->machine->base_reg;
7623 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
7624 UNSPEC_LTREF);
7625
7626 SET_SRC (*x) = plus_constant (Pmode, addr, off);
7627 return;
7628 }
7629 }
7630
7631 /* Annotate LTREL_BASE as well. */
7632 if (GET_CODE (*x) == UNSPEC
7633 && XINT (*x, 1) == UNSPEC_LTREL_BASE)
7634 {
7635 rtx base = cfun->machine->base_reg;
7636 *x = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, XVECEXP (*x, 0, 0), base),
7637 UNSPEC_LTREL_BASE);
7638 return;
7639 }
7640
7641 fmt = GET_RTX_FORMAT (GET_CODE (*x));
7642 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
7643 {
7644 if (fmt[i] == 'e')
7645 {
7646 annotate_constant_pool_refs (&XEXP (*x, i));
7647 }
7648 else if (fmt[i] == 'E')
7649 {
7650 for (j = 0; j < XVECLEN (*x, i); j++)
7651 annotate_constant_pool_refs (&XVECEXP (*x, i, j));
7652 }
7653 }
7654 }
7655
7656 /* Split all branches that exceed the maximum distance.
7657 Returns true if this created a new literal pool entry. */
7658
7659 static int
7660 s390_split_branches (void)
7661 {
7662 rtx temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
7663 int new_literal = 0, ret;
7664 rtx_insn *insn;
7665 rtx pat, target;
7666 rtx *label;
7667
7668 /* We need correct insn addresses. */
7669
7670 shorten_branches (get_insns ());
7671
7672 /* Find all branches that exceed 64KB, and split them. */
7673
7674 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7675 {
7676 if (! JUMP_P (insn) || tablejump_p (insn, NULL, NULL))
7677 continue;
7678
7679 pat = PATTERN (insn);
7680 if (GET_CODE (pat) == PARALLEL)
7681 pat = XVECEXP (pat, 0, 0);
7682 if (GET_CODE (pat) != SET || SET_DEST (pat) != pc_rtx)
7683 continue;
7684
7685 if (GET_CODE (SET_SRC (pat)) == LABEL_REF)
7686 {
7687 label = &SET_SRC (pat);
7688 }
7689 else if (GET_CODE (SET_SRC (pat)) == IF_THEN_ELSE)
7690 {
7691 if (GET_CODE (XEXP (SET_SRC (pat), 1)) == LABEL_REF)
7692 label = &XEXP (SET_SRC (pat), 1);
7693 else if (GET_CODE (XEXP (SET_SRC (pat), 2)) == LABEL_REF)
7694 label = &XEXP (SET_SRC (pat), 2);
7695 else
7696 continue;
7697 }
7698 else
7699 continue;
7700
7701 if (get_attr_length (insn) <= 4)
7702 continue;
7703
7704 /* We are going to use the return register as scratch register,
7705 make sure it will be saved/restored by the prologue/epilogue. */
7706 cfun_frame_layout.save_return_addr_p = 1;
7707
7708 if (!flag_pic)
7709 {
7710 new_literal = 1;
7711 rtx mem = force_const_mem (Pmode, *label);
7712 rtx_insn *set_insn = emit_insn_before (gen_rtx_SET (temp_reg, mem),
7713 insn);
7714 INSN_ADDRESSES_NEW (set_insn, -1);
7715 annotate_constant_pool_refs (&PATTERN (set_insn));
7716
7717 target = temp_reg;
7718 }
7719 else
7720 {
7721 new_literal = 1;
7722 target = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, *label),
7723 UNSPEC_LTREL_OFFSET);
7724 target = gen_rtx_CONST (Pmode, target);
7725 target = force_const_mem (Pmode, target);
7726 rtx_insn *set_insn = emit_insn_before (gen_rtx_SET (temp_reg, target),
7727 insn);
7728 INSN_ADDRESSES_NEW (set_insn, -1);
7729 annotate_constant_pool_refs (&PATTERN (set_insn));
7730
7731 target = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, XEXP (target, 0),
7732 cfun->machine->base_reg),
7733 UNSPEC_LTREL_BASE);
7734 target = gen_rtx_PLUS (Pmode, temp_reg, target);
7735 }
7736
7737 ret = validate_change (insn, label, target, 0);
7738 gcc_assert (ret);
7739 }
7740
7741 return new_literal;
7742 }
7743
7744
7745 /* Find an annotated literal pool symbol referenced in RTX X,
7746 and store it at REF. Will abort if X contains references to
7747 more than one such pool symbol; multiple references to the same
7748 symbol are allowed, however.
7749
7750 The rtx pointed to by REF must be initialized to NULL_RTX
7751 by the caller before calling this routine. */
7752
7753 static void
7754 find_constant_pool_ref (rtx x, rtx *ref)
7755 {
7756 int i, j;
7757 const char *fmt;
7758
7759 /* Ignore LTREL_BASE references. */
7760 if (GET_CODE (x) == UNSPEC
7761 && XINT (x, 1) == UNSPEC_LTREL_BASE)
7762 return;
7763 /* Likewise POOL_ENTRY insns. */
7764 if (GET_CODE (x) == UNSPEC_VOLATILE
7765 && XINT (x, 1) == UNSPECV_POOL_ENTRY)
7766 return;
7767
7768 gcc_assert (GET_CODE (x) != SYMBOL_REF
7769 || !CONSTANT_POOL_ADDRESS_P (x));
7770
7771 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_LTREF)
7772 {
7773 rtx sym = XVECEXP (x, 0, 0);
7774 gcc_assert (GET_CODE (sym) == SYMBOL_REF
7775 && CONSTANT_POOL_ADDRESS_P (sym));
7776
7777 if (*ref == NULL_RTX)
7778 *ref = sym;
7779 else
7780 gcc_assert (*ref == sym);
7781
7782 return;
7783 }
7784
7785 fmt = GET_RTX_FORMAT (GET_CODE (x));
7786 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
7787 {
7788 if (fmt[i] == 'e')
7789 {
7790 find_constant_pool_ref (XEXP (x, i), ref);
7791 }
7792 else if (fmt[i] == 'E')
7793 {
7794 for (j = 0; j < XVECLEN (x, i); j++)
7795 find_constant_pool_ref (XVECEXP (x, i, j), ref);
7796 }
7797 }
7798 }
7799
7800 /* Replace every reference to the annotated literal pool
7801 symbol REF in X by its base plus OFFSET. */
7802
7803 static void
7804 replace_constant_pool_ref (rtx *x, rtx ref, rtx offset)
7805 {
7806 int i, j;
7807 const char *fmt;
7808
7809 gcc_assert (*x != ref);
7810
7811 if (GET_CODE (*x) == UNSPEC
7812 && XINT (*x, 1) == UNSPEC_LTREF
7813 && XVECEXP (*x, 0, 0) == ref)
7814 {
7815 *x = gen_rtx_PLUS (Pmode, XVECEXP (*x, 0, 1), offset);
7816 return;
7817 }
7818
7819 if (GET_CODE (*x) == PLUS
7820 && GET_CODE (XEXP (*x, 1)) == CONST_INT
7821 && GET_CODE (XEXP (*x, 0)) == UNSPEC
7822 && XINT (XEXP (*x, 0), 1) == UNSPEC_LTREF
7823 && XVECEXP (XEXP (*x, 0), 0, 0) == ref)
7824 {
7825 rtx addr = gen_rtx_PLUS (Pmode, XVECEXP (XEXP (*x, 0), 0, 1), offset);
7826 *x = plus_constant (Pmode, addr, INTVAL (XEXP (*x, 1)));
7827 return;
7828 }
7829
7830 fmt = GET_RTX_FORMAT (GET_CODE (*x));
7831 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
7832 {
7833 if (fmt[i] == 'e')
7834 {
7835 replace_constant_pool_ref (&XEXP (*x, i), ref, offset);
7836 }
7837 else if (fmt[i] == 'E')
7838 {
7839 for (j = 0; j < XVECLEN (*x, i); j++)
7840 replace_constant_pool_ref (&XVECEXP (*x, i, j), ref, offset);
7841 }
7842 }
7843 }
7844
7845 /* Check whether X contains an UNSPEC_LTREL_BASE.
7846 Return its constant pool symbol if found, NULL_RTX otherwise. */
7847
7848 static rtx
7849 find_ltrel_base (rtx x)
7850 {
7851 int i, j;
7852 const char *fmt;
7853
7854 if (GET_CODE (x) == UNSPEC
7855 && XINT (x, 1) == UNSPEC_LTREL_BASE)
7856 return XVECEXP (x, 0, 0);
7857
7858 fmt = GET_RTX_FORMAT (GET_CODE (x));
7859 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
7860 {
7861 if (fmt[i] == 'e')
7862 {
7863 rtx fnd = find_ltrel_base (XEXP (x, i));
7864 if (fnd)
7865 return fnd;
7866 }
7867 else if (fmt[i] == 'E')
7868 {
7869 for (j = 0; j < XVECLEN (x, i); j++)
7870 {
7871 rtx fnd = find_ltrel_base (XVECEXP (x, i, j));
7872 if (fnd)
7873 return fnd;
7874 }
7875 }
7876 }
7877
7878 return NULL_RTX;
7879 }
7880
7881 /* Replace any occurrence of UNSPEC_LTREL_BASE in X with its base. */
7882
7883 static void
7884 replace_ltrel_base (rtx *x)
7885 {
7886 int i, j;
7887 const char *fmt;
7888
7889 if (GET_CODE (*x) == UNSPEC
7890 && XINT (*x, 1) == UNSPEC_LTREL_BASE)
7891 {
7892 *x = XVECEXP (*x, 0, 1);
7893 return;
7894 }
7895
7896 fmt = GET_RTX_FORMAT (GET_CODE (*x));
7897 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
7898 {
7899 if (fmt[i] == 'e')
7900 {
7901 replace_ltrel_base (&XEXP (*x, i));
7902 }
7903 else if (fmt[i] == 'E')
7904 {
7905 for (j = 0; j < XVECLEN (*x, i); j++)
7906 replace_ltrel_base (&XVECEXP (*x, i, j));
7907 }
7908 }
7909 }
7910
7911
7912 /* We keep a list of constants which we have to add to internal
7913 constant tables in the middle of large functions. */
7914
7915 #define NR_C_MODES 32
7916 machine_mode constant_modes[NR_C_MODES] =
7917 {
7918 TFmode, TImode, TDmode,
7919 V16QImode, V8HImode, V4SImode, V2DImode, V1TImode,
7920 V4SFmode, V2DFmode, V1TFmode,
7921 DFmode, DImode, DDmode,
7922 V8QImode, V4HImode, V2SImode, V1DImode, V2SFmode, V1DFmode,
7923 SFmode, SImode, SDmode,
7924 V4QImode, V2HImode, V1SImode, V1SFmode,
7925 HImode,
7926 V2QImode, V1HImode,
7927 QImode,
7928 V1QImode
7929 };
7930
7931 struct constant
7932 {
7933 struct constant *next;
7934 rtx value;
7935 rtx_code_label *label;
7936 };
7937
7938 struct constant_pool
7939 {
7940 struct constant_pool *next;
7941 rtx_insn *first_insn;
7942 rtx_insn *pool_insn;
7943 bitmap insns;
7944 rtx_insn *emit_pool_after;
7945
7946 struct constant *constants[NR_C_MODES];
7947 struct constant *execute;
7948 rtx_code_label *label;
7949 int size;
7950 };
7951
7952 /* Allocate new constant_pool structure. */
7953
7954 static struct constant_pool *
7955 s390_alloc_pool (void)
7956 {
7957 struct constant_pool *pool;
7958 int i;
7959
7960 pool = (struct constant_pool *) xmalloc (sizeof *pool);
7961 pool->next = NULL;
7962 for (i = 0; i < NR_C_MODES; i++)
7963 pool->constants[i] = NULL;
7964
7965 pool->execute = NULL;
7966 pool->label = gen_label_rtx ();
7967 pool->first_insn = NULL;
7968 pool->pool_insn = NULL;
7969 pool->insns = BITMAP_ALLOC (NULL);
7970 pool->size = 0;
7971 pool->emit_pool_after = NULL;
7972
7973 return pool;
7974 }
7975
7976 /* Create new constant pool covering instructions starting at INSN
7977 and chain it to the end of POOL_LIST. */
7978
7979 static struct constant_pool *
7980 s390_start_pool (struct constant_pool **pool_list, rtx_insn *insn)
7981 {
7982 struct constant_pool *pool, **prev;
7983
7984 pool = s390_alloc_pool ();
7985 pool->first_insn = insn;
7986
7987 for (prev = pool_list; *prev; prev = &(*prev)->next)
7988 ;
7989 *prev = pool;
7990
7991 return pool;
7992 }
7993
7994 /* End range of instructions covered by POOL at INSN and emit
7995 placeholder insn representing the pool. */
7996
7997 static void
7998 s390_end_pool (struct constant_pool *pool, rtx_insn *insn)
7999 {
8000 rtx pool_size = GEN_INT (pool->size + 8 /* alignment slop */);
8001
8002 if (!insn)
8003 insn = get_last_insn ();
8004
8005 pool->pool_insn = emit_insn_after (gen_pool (pool_size), insn);
8006 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8007 }
8008
8009 /* Add INSN to the list of insns covered by POOL. */
8010
8011 static void
8012 s390_add_pool_insn (struct constant_pool *pool, rtx insn)
8013 {
8014 bitmap_set_bit (pool->insns, INSN_UID (insn));
8015 }
8016
8017 /* Return pool out of POOL_LIST that covers INSN. */
8018
8019 static struct constant_pool *
8020 s390_find_pool (struct constant_pool *pool_list, rtx insn)
8021 {
8022 struct constant_pool *pool;
8023
8024 for (pool = pool_list; pool; pool = pool->next)
8025 if (bitmap_bit_p (pool->insns, INSN_UID (insn)))
8026 break;
8027
8028 return pool;
8029 }
8030
8031 /* Add constant VAL of mode MODE to the constant pool POOL. */
8032
8033 static void
8034 s390_add_constant (struct constant_pool *pool, rtx val, machine_mode mode)
8035 {
8036 struct constant *c;
8037 int i;
8038
8039 for (i = 0; i < NR_C_MODES; i++)
8040 if (constant_modes[i] == mode)
8041 break;
8042 gcc_assert (i != NR_C_MODES);
8043
8044 for (c = pool->constants[i]; c != NULL; c = c->next)
8045 if (rtx_equal_p (val, c->value))
8046 break;
8047
8048 if (c == NULL)
8049 {
8050 c = (struct constant *) xmalloc (sizeof *c);
8051 c->value = val;
8052 c->label = gen_label_rtx ();
8053 c->next = pool->constants[i];
8054 pool->constants[i] = c;
8055 pool->size += GET_MODE_SIZE (mode);
8056 }
8057 }
8058
8059 /* Return an rtx that represents the offset of X from the start of
8060 pool POOL. */
8061
8062 static rtx
8063 s390_pool_offset (struct constant_pool *pool, rtx x)
8064 {
8065 rtx label;
8066
8067 label = gen_rtx_LABEL_REF (GET_MODE (x), pool->label);
8068 x = gen_rtx_UNSPEC (GET_MODE (x), gen_rtvec (2, x, label),
8069 UNSPEC_POOL_OFFSET);
8070 return gen_rtx_CONST (GET_MODE (x), x);
8071 }
8072
8073 /* Find constant VAL of mode MODE in the constant pool POOL.
8074 Return an RTX describing the distance from the start of
8075 the pool to the location of the new constant. */
8076
8077 static rtx
8078 s390_find_constant (struct constant_pool *pool, rtx val,
8079 machine_mode mode)
8080 {
8081 struct constant *c;
8082 int i;
8083
8084 for (i = 0; i < NR_C_MODES; i++)
8085 if (constant_modes[i] == mode)
8086 break;
8087 gcc_assert (i != NR_C_MODES);
8088
8089 for (c = pool->constants[i]; c != NULL; c = c->next)
8090 if (rtx_equal_p (val, c->value))
8091 break;
8092
8093 gcc_assert (c);
8094
8095 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
8096 }
8097
8098 /* Check whether INSN is an execute. Return the label_ref to its
8099 execute target template if so, NULL_RTX otherwise. */
8100
8101 static rtx
8102 s390_execute_label (rtx insn)
8103 {
8104 if (NONJUMP_INSN_P (insn)
8105 && GET_CODE (PATTERN (insn)) == PARALLEL
8106 && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
8107 && XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE)
8108 return XVECEXP (XVECEXP (PATTERN (insn), 0, 0), 0, 2);
8109
8110 return NULL_RTX;
8111 }
8112
8113 /* Add execute target for INSN to the constant pool POOL. */
8114
8115 static void
8116 s390_add_execute (struct constant_pool *pool, rtx insn)
8117 {
8118 struct constant *c;
8119
8120 for (c = pool->execute; c != NULL; c = c->next)
8121 if (INSN_UID (insn) == INSN_UID (c->value))
8122 break;
8123
8124 if (c == NULL)
8125 {
8126 c = (struct constant *) xmalloc (sizeof *c);
8127 c->value = insn;
8128 c->label = gen_label_rtx ();
8129 c->next = pool->execute;
8130 pool->execute = c;
8131 pool->size += 6;
8132 }
8133 }
8134
8135 /* Find execute target for INSN in the constant pool POOL.
8136 Return an RTX describing the distance from the start of
8137 the pool to the location of the execute target. */
8138
8139 static rtx
8140 s390_find_execute (struct constant_pool *pool, rtx insn)
8141 {
8142 struct constant *c;
8143
8144 for (c = pool->execute; c != NULL; c = c->next)
8145 if (INSN_UID (insn) == INSN_UID (c->value))
8146 break;
8147
8148 gcc_assert (c);
8149
8150 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
8151 }
8152
8153 /* For an execute INSN, extract the execute target template. */
8154
8155 static rtx
8156 s390_execute_target (rtx insn)
8157 {
8158 rtx pattern = PATTERN (insn);
8159 gcc_assert (s390_execute_label (insn));
8160
8161 if (XVECLEN (pattern, 0) == 2)
8162 {
8163 pattern = copy_rtx (XVECEXP (pattern, 0, 1));
8164 }
8165 else
8166 {
8167 rtvec vec = rtvec_alloc (XVECLEN (pattern, 0) - 1);
8168 int i;
8169
8170 for (i = 0; i < XVECLEN (pattern, 0) - 1; i++)
8171 RTVEC_ELT (vec, i) = copy_rtx (XVECEXP (pattern, 0, i + 1));
8172
8173 pattern = gen_rtx_PARALLEL (VOIDmode, vec);
8174 }
8175
8176 return pattern;
8177 }
8178
8179 /* Indicate that INSN cannot be duplicated. This is the case for
8180 execute insns that carry a unique label. */
8181
8182 static bool
8183 s390_cannot_copy_insn_p (rtx_insn *insn)
8184 {
8185 rtx label = s390_execute_label (insn);
8186 return label && label != const0_rtx;
8187 }
8188
8189 /* Dump out the constants in POOL. If REMOTE_LABEL is true,
8190 do not emit the pool base label. */
8191
8192 static void
8193 s390_dump_pool (struct constant_pool *pool, bool remote_label)
8194 {
8195 struct constant *c;
8196 rtx_insn *insn = pool->pool_insn;
8197 int i;
8198
8199 /* Switch to rodata section. */
8200 if (TARGET_CPU_ZARCH)
8201 {
8202 insn = emit_insn_after (gen_pool_section_start (), insn);
8203 INSN_ADDRESSES_NEW (insn, -1);
8204 }
8205
8206 /* Ensure minimum pool alignment. */
8207 if (TARGET_CPU_ZARCH)
8208 insn = emit_insn_after (gen_pool_align (GEN_INT (8)), insn);
8209 else
8210 insn = emit_insn_after (gen_pool_align (GEN_INT (4)), insn);
8211 INSN_ADDRESSES_NEW (insn, -1);
8212
8213 /* Emit pool base label. */
8214 if (!remote_label)
8215 {
8216 insn = emit_label_after (pool->label, insn);
8217 INSN_ADDRESSES_NEW (insn, -1);
8218 }
8219
8220 /* Dump constants in descending alignment requirement order,
8221 ensuring proper alignment for every constant. */
8222 for (i = 0; i < NR_C_MODES; i++)
8223 for (c = pool->constants[i]; c; c = c->next)
8224 {
8225 /* Convert UNSPEC_LTREL_OFFSET unspecs to pool-relative references. */
8226 rtx value = copy_rtx (c->value);
8227 if (GET_CODE (value) == CONST
8228 && GET_CODE (XEXP (value, 0)) == UNSPEC
8229 && XINT (XEXP (value, 0), 1) == UNSPEC_LTREL_OFFSET
8230 && XVECLEN (XEXP (value, 0), 0) == 1)
8231 value = s390_pool_offset (pool, XVECEXP (XEXP (value, 0), 0, 0));
8232
8233 insn = emit_label_after (c->label, insn);
8234 INSN_ADDRESSES_NEW (insn, -1);
8235
8236 value = gen_rtx_UNSPEC_VOLATILE (constant_modes[i],
8237 gen_rtvec (1, value),
8238 UNSPECV_POOL_ENTRY);
8239 insn = emit_insn_after (value, insn);
8240 INSN_ADDRESSES_NEW (insn, -1);
8241 }
8242
8243 /* Ensure minimum alignment for instructions. */
8244 insn = emit_insn_after (gen_pool_align (GEN_INT (2)), insn);
8245 INSN_ADDRESSES_NEW (insn, -1);
8246
8247 /* Output in-pool execute template insns. */
8248 for (c = pool->execute; c; c = c->next)
8249 {
8250 insn = emit_label_after (c->label, insn);
8251 INSN_ADDRESSES_NEW (insn, -1);
8252
8253 insn = emit_insn_after (s390_execute_target (c->value), insn);
8254 INSN_ADDRESSES_NEW (insn, -1);
8255 }
8256
8257 /* Switch back to previous section. */
8258 if (TARGET_CPU_ZARCH)
8259 {
8260 insn = emit_insn_after (gen_pool_section_end (), insn);
8261 INSN_ADDRESSES_NEW (insn, -1);
8262 }
8263
8264 insn = emit_barrier_after (insn);
8265 INSN_ADDRESSES_NEW (insn, -1);
8266
8267 /* Remove placeholder insn. */
8268 remove_insn (pool->pool_insn);
8269 }
8270
8271 /* Free all memory used by POOL. */
8272
8273 static void
8274 s390_free_pool (struct constant_pool *pool)
8275 {
8276 struct constant *c, *next;
8277 int i;
8278
8279 for (i = 0; i < NR_C_MODES; i++)
8280 for (c = pool->constants[i]; c; c = next)
8281 {
8282 next = c->next;
8283 free (c);
8284 }
8285
8286 for (c = pool->execute; c; c = next)
8287 {
8288 next = c->next;
8289 free (c);
8290 }
8291
8292 BITMAP_FREE (pool->insns);
8293 free (pool);
8294 }
8295
8296
8297 /* Collect main literal pool. Return NULL on overflow. */
8298
8299 static struct constant_pool *
8300 s390_mainpool_start (void)
8301 {
8302 struct constant_pool *pool;
8303 rtx_insn *insn;
8304
8305 pool = s390_alloc_pool ();
8306
8307 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8308 {
8309 if (NONJUMP_INSN_P (insn)
8310 && GET_CODE (PATTERN (insn)) == SET
8311 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC_VOLATILE
8312 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPECV_MAIN_POOL)
8313 {
8314 /* There might be two main_pool instructions if base_reg
8315 is call-clobbered; one for shrink-wrapped code and one
8316 for the rest. We want to keep the first. */
8317 if (pool->pool_insn)
8318 {
8319 insn = PREV_INSN (insn);
8320 delete_insn (NEXT_INSN (insn));
8321 continue;
8322 }
8323 pool->pool_insn = insn;
8324 }
8325
8326 if (!TARGET_CPU_ZARCH && s390_execute_label (insn))
8327 {
8328 s390_add_execute (pool, insn);
8329 }
8330 else if (NONJUMP_INSN_P (insn) || CALL_P (insn))
8331 {
8332 rtx pool_ref = NULL_RTX;
8333 find_constant_pool_ref (PATTERN (insn), &pool_ref);
8334 if (pool_ref)
8335 {
8336 rtx constant = get_pool_constant (pool_ref);
8337 machine_mode mode = get_pool_mode (pool_ref);
8338 s390_add_constant (pool, constant, mode);
8339 }
8340 }
8341
8342 /* If hot/cold partitioning is enabled we have to make sure that
8343 the literal pool is emitted in the same section where the
8344 initialization of the literal pool base pointer takes place.
8345 emit_pool_after is only used in the non-overflow case on non
8346 Z cpus where we can emit the literal pool at the end of the
8347 function body within the text section. */
8348 if (NOTE_P (insn)
8349 && NOTE_KIND (insn) == NOTE_INSN_SWITCH_TEXT_SECTIONS
8350 && !pool->emit_pool_after)
8351 pool->emit_pool_after = PREV_INSN (insn);
8352 }
8353
8354 gcc_assert (pool->pool_insn || pool->size == 0);
8355
8356 if (pool->size >= 4096)
8357 {
8358 /* We're going to chunkify the pool, so remove the main
8359 pool placeholder insn. */
8360 remove_insn (pool->pool_insn);
8361
8362 s390_free_pool (pool);
8363 pool = NULL;
8364 }
8365
8366 /* If the functions ends with the section where the literal pool
8367 should be emitted set the marker to its end. */
8368 if (pool && !pool->emit_pool_after)
8369 pool->emit_pool_after = get_last_insn ();
8370
8371 return pool;
8372 }
8373
8374 /* POOL holds the main literal pool as collected by s390_mainpool_start.
8375 Modify the current function to output the pool constants as well as
8376 the pool register setup instruction. */
8377
8378 static void
8379 s390_mainpool_finish (struct constant_pool *pool)
8380 {
8381 rtx base_reg = cfun->machine->base_reg;
8382
8383 /* If the pool is empty, we're done. */
8384 if (pool->size == 0)
8385 {
8386 /* We don't actually need a base register after all. */
8387 cfun->machine->base_reg = NULL_RTX;
8388
8389 if (pool->pool_insn)
8390 remove_insn (pool->pool_insn);
8391 s390_free_pool (pool);
8392 return;
8393 }
8394
8395 /* We need correct insn addresses. */
8396 shorten_branches (get_insns ());
8397
8398 /* On zSeries, we use a LARL to load the pool register. The pool is
8399 located in the .rodata section, so we emit it after the function. */
8400 if (TARGET_CPU_ZARCH)
8401 {
8402 rtx set = gen_main_base_64 (base_reg, pool->label);
8403 rtx_insn *insn = emit_insn_after (set, pool->pool_insn);
8404 INSN_ADDRESSES_NEW (insn, -1);
8405 remove_insn (pool->pool_insn);
8406
8407 insn = get_last_insn ();
8408 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
8409 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8410
8411 s390_dump_pool (pool, 0);
8412 }
8413
8414 /* On S/390, if the total size of the function's code plus literal pool
8415 does not exceed 4096 bytes, we use BASR to set up a function base
8416 pointer, and emit the literal pool at the end of the function. */
8417 else if (INSN_ADDRESSES (INSN_UID (pool->emit_pool_after))
8418 + pool->size + 8 /* alignment slop */ < 4096)
8419 {
8420 rtx set = gen_main_base_31_small (base_reg, pool->label);
8421 rtx_insn *insn = emit_insn_after (set, pool->pool_insn);
8422 INSN_ADDRESSES_NEW (insn, -1);
8423 remove_insn (pool->pool_insn);
8424
8425 insn = emit_label_after (pool->label, insn);
8426 INSN_ADDRESSES_NEW (insn, -1);
8427
8428 /* emit_pool_after will be set by s390_mainpool_start to the
8429 last insn of the section where the literal pool should be
8430 emitted. */
8431 insn = pool->emit_pool_after;
8432
8433 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
8434 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8435
8436 s390_dump_pool (pool, 1);
8437 }
8438
8439 /* Otherwise, we emit an inline literal pool and use BASR to branch
8440 over it, setting up the pool register at the same time. */
8441 else
8442 {
8443 rtx_code_label *pool_end = gen_label_rtx ();
8444
8445 rtx pat = gen_main_base_31_large (base_reg, pool->label, pool_end);
8446 rtx_insn *insn = emit_jump_insn_after (pat, pool->pool_insn);
8447 JUMP_LABEL (insn) = pool_end;
8448 INSN_ADDRESSES_NEW (insn, -1);
8449 remove_insn (pool->pool_insn);
8450
8451 insn = emit_label_after (pool->label, insn);
8452 INSN_ADDRESSES_NEW (insn, -1);
8453
8454 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
8455 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8456
8457 insn = emit_label_after (pool_end, pool->pool_insn);
8458 INSN_ADDRESSES_NEW (insn, -1);
8459
8460 s390_dump_pool (pool, 1);
8461 }
8462
8463
8464 /* Replace all literal pool references. */
8465
8466 for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
8467 {
8468 if (INSN_P (insn))
8469 replace_ltrel_base (&PATTERN (insn));
8470
8471 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
8472 {
8473 rtx addr, pool_ref = NULL_RTX;
8474 find_constant_pool_ref (PATTERN (insn), &pool_ref);
8475 if (pool_ref)
8476 {
8477 if (s390_execute_label (insn))
8478 addr = s390_find_execute (pool, insn);
8479 else
8480 addr = s390_find_constant (pool, get_pool_constant (pool_ref),
8481 get_pool_mode (pool_ref));
8482
8483 replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr);
8484 INSN_CODE (insn) = -1;
8485 }
8486 }
8487 }
8488
8489
8490 /* Free the pool. */
8491 s390_free_pool (pool);
8492 }
8493
8494 /* POOL holds the main literal pool as collected by s390_mainpool_start.
8495 We have decided we cannot use this pool, so revert all changes
8496 to the current function that were done by s390_mainpool_start. */
8497 static void
8498 s390_mainpool_cancel (struct constant_pool *pool)
8499 {
8500 /* We didn't actually change the instruction stream, so simply
8501 free the pool memory. */
8502 s390_free_pool (pool);
8503 }
8504
8505
8506 /* Chunkify the literal pool. */
8507
8508 #define S390_POOL_CHUNK_MIN 0xc00
8509 #define S390_POOL_CHUNK_MAX 0xe00
8510
8511 static struct constant_pool *
8512 s390_chunkify_start (void)
8513 {
8514 struct constant_pool *curr_pool = NULL, *pool_list = NULL;
8515 int extra_size = 0;
8516 bitmap far_labels;
8517 rtx pending_ltrel = NULL_RTX;
8518 rtx_insn *insn;
8519
8520 rtx (*gen_reload_base) (rtx, rtx) =
8521 TARGET_CPU_ZARCH? gen_reload_base_64 : gen_reload_base_31;
8522
8523
8524 /* We need correct insn addresses. */
8525
8526 shorten_branches (get_insns ());
8527
8528 /* Scan all insns and move literals to pool chunks. */
8529
8530 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8531 {
8532 bool section_switch_p = false;
8533
8534 /* Check for pending LTREL_BASE. */
8535 if (INSN_P (insn))
8536 {
8537 rtx ltrel_base = find_ltrel_base (PATTERN (insn));
8538 if (ltrel_base)
8539 {
8540 gcc_assert (ltrel_base == pending_ltrel);
8541 pending_ltrel = NULL_RTX;
8542 }
8543 }
8544
8545 if (!TARGET_CPU_ZARCH && s390_execute_label (insn))
8546 {
8547 if (!curr_pool)
8548 curr_pool = s390_start_pool (&pool_list, insn);
8549
8550 s390_add_execute (curr_pool, insn);
8551 s390_add_pool_insn (curr_pool, insn);
8552 }
8553 else if (NONJUMP_INSN_P (insn) || CALL_P (insn))
8554 {
8555 rtx pool_ref = NULL_RTX;
8556 find_constant_pool_ref (PATTERN (insn), &pool_ref);
8557 if (pool_ref)
8558 {
8559 rtx constant = get_pool_constant (pool_ref);
8560 machine_mode mode = get_pool_mode (pool_ref);
8561
8562 if (!curr_pool)
8563 curr_pool = s390_start_pool (&pool_list, insn);
8564
8565 s390_add_constant (curr_pool, constant, mode);
8566 s390_add_pool_insn (curr_pool, insn);
8567
8568 /* Don't split the pool chunk between a LTREL_OFFSET load
8569 and the corresponding LTREL_BASE. */
8570 if (GET_CODE (constant) == CONST
8571 && GET_CODE (XEXP (constant, 0)) == UNSPEC
8572 && XINT (XEXP (constant, 0), 1) == UNSPEC_LTREL_OFFSET)
8573 {
8574 gcc_assert (!pending_ltrel);
8575 pending_ltrel = pool_ref;
8576 }
8577 }
8578 }
8579
8580 if (JUMP_P (insn) || JUMP_TABLE_DATA_P (insn) || LABEL_P (insn))
8581 {
8582 if (curr_pool)
8583 s390_add_pool_insn (curr_pool, insn);
8584 /* An LTREL_BASE must follow within the same basic block. */
8585 gcc_assert (!pending_ltrel);
8586 }
8587
8588 if (NOTE_P (insn))
8589 switch (NOTE_KIND (insn))
8590 {
8591 case NOTE_INSN_SWITCH_TEXT_SECTIONS:
8592 section_switch_p = true;
8593 break;
8594 case NOTE_INSN_VAR_LOCATION:
8595 case NOTE_INSN_CALL_ARG_LOCATION:
8596 continue;
8597 default:
8598 break;
8599 }
8600
8601 if (!curr_pool
8602 || INSN_ADDRESSES_SIZE () <= (size_t) INSN_UID (insn)
8603 || INSN_ADDRESSES (INSN_UID (insn)) == -1)
8604 continue;
8605
8606 if (TARGET_CPU_ZARCH)
8607 {
8608 if (curr_pool->size < S390_POOL_CHUNK_MAX)
8609 continue;
8610
8611 s390_end_pool (curr_pool, NULL);
8612 curr_pool = NULL;
8613 }
8614 else
8615 {
8616 int chunk_size = INSN_ADDRESSES (INSN_UID (insn))
8617 - INSN_ADDRESSES (INSN_UID (curr_pool->first_insn))
8618 + extra_size;
8619
8620 /* We will later have to insert base register reload insns.
8621 Those will have an effect on code size, which we need to
8622 consider here. This calculation makes rather pessimistic
8623 worst-case assumptions. */
8624 if (LABEL_P (insn))
8625 extra_size += 6;
8626
8627 if (chunk_size < S390_POOL_CHUNK_MIN
8628 && curr_pool->size < S390_POOL_CHUNK_MIN
8629 && !section_switch_p)
8630 continue;
8631
8632 /* Pool chunks can only be inserted after BARRIERs ... */
8633 if (BARRIER_P (insn))
8634 {
8635 s390_end_pool (curr_pool, insn);
8636 curr_pool = NULL;
8637 extra_size = 0;
8638 }
8639
8640 /* ... so if we don't find one in time, create one. */
8641 else if (chunk_size > S390_POOL_CHUNK_MAX
8642 || curr_pool->size > S390_POOL_CHUNK_MAX
8643 || section_switch_p)
8644 {
8645 rtx_insn *label, *jump, *barrier, *next, *prev;
8646
8647 if (!section_switch_p)
8648 {
8649 /* We can insert the barrier only after a 'real' insn. */
8650 if (! NONJUMP_INSN_P (insn) && ! CALL_P (insn))
8651 continue;
8652 if (get_attr_length (insn) == 0)
8653 continue;
8654 /* Don't separate LTREL_BASE from the corresponding
8655 LTREL_OFFSET load. */
8656 if (pending_ltrel)
8657 continue;
8658 next = insn;
8659 do
8660 {
8661 insn = next;
8662 next = NEXT_INSN (insn);
8663 }
8664 while (next
8665 && NOTE_P (next)
8666 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
8667 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION));
8668 }
8669 else
8670 {
8671 gcc_assert (!pending_ltrel);
8672
8673 /* The old pool has to end before the section switch
8674 note in order to make it part of the current
8675 section. */
8676 insn = PREV_INSN (insn);
8677 }
8678
8679 label = gen_label_rtx ();
8680 prev = insn;
8681 if (prev && NOTE_P (prev))
8682 prev = prev_nonnote_insn (prev);
8683 if (prev)
8684 jump = emit_jump_insn_after_setloc (gen_jump (label), insn,
8685 INSN_LOCATION (prev));
8686 else
8687 jump = emit_jump_insn_after_noloc (gen_jump (label), insn);
8688 barrier = emit_barrier_after (jump);
8689 insn = emit_label_after (label, barrier);
8690 JUMP_LABEL (jump) = label;
8691 LABEL_NUSES (label) = 1;
8692
8693 INSN_ADDRESSES_NEW (jump, -1);
8694 INSN_ADDRESSES_NEW (barrier, -1);
8695 INSN_ADDRESSES_NEW (insn, -1);
8696
8697 s390_end_pool (curr_pool, barrier);
8698 curr_pool = NULL;
8699 extra_size = 0;
8700 }
8701 }
8702 }
8703
8704 if (curr_pool)
8705 s390_end_pool (curr_pool, NULL);
8706 gcc_assert (!pending_ltrel);
8707
8708 /* Find all labels that are branched into
8709 from an insn belonging to a different chunk. */
8710
8711 far_labels = BITMAP_ALLOC (NULL);
8712
8713 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8714 {
8715 rtx_jump_table_data *table;
8716
8717 /* Labels marked with LABEL_PRESERVE_P can be target
8718 of non-local jumps, so we have to mark them.
8719 The same holds for named labels.
8720
8721 Don't do that, however, if it is the label before
8722 a jump table. */
8723
8724 if (LABEL_P (insn)
8725 && (LABEL_PRESERVE_P (insn) || LABEL_NAME (insn)))
8726 {
8727 rtx_insn *vec_insn = NEXT_INSN (insn);
8728 if (! vec_insn || ! JUMP_TABLE_DATA_P (vec_insn))
8729 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (insn));
8730 }
8731 /* Check potential targets in a table jump (casesi_jump). */
8732 else if (tablejump_p (insn, NULL, &table))
8733 {
8734 rtx vec_pat = PATTERN (table);
8735 int i, diff_p = GET_CODE (vec_pat) == ADDR_DIFF_VEC;
8736
8737 for (i = 0; i < XVECLEN (vec_pat, diff_p); i++)
8738 {
8739 rtx label = XEXP (XVECEXP (vec_pat, diff_p, i), 0);
8740
8741 if (s390_find_pool (pool_list, label)
8742 != s390_find_pool (pool_list, insn))
8743 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
8744 }
8745 }
8746 /* If we have a direct jump (conditional or unconditional),
8747 check all potential targets. */
8748 else if (JUMP_P (insn))
8749 {
8750 rtx pat = PATTERN (insn);
8751
8752 if (GET_CODE (pat) == PARALLEL)
8753 pat = XVECEXP (pat, 0, 0);
8754
8755 if (GET_CODE (pat) == SET)
8756 {
8757 rtx label = JUMP_LABEL (insn);
8758 if (label && !ANY_RETURN_P (label))
8759 {
8760 if (s390_find_pool (pool_list, label)
8761 != s390_find_pool (pool_list, insn))
8762 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
8763 }
8764 }
8765 }
8766 }
8767
8768 /* Insert base register reload insns before every pool. */
8769
8770 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
8771 {
8772 rtx new_insn = gen_reload_base (cfun->machine->base_reg,
8773 curr_pool->label);
8774 rtx_insn *insn = curr_pool->first_insn;
8775 INSN_ADDRESSES_NEW (emit_insn_before (new_insn, insn), -1);
8776 }
8777
8778 /* Insert base register reload insns at every far label. */
8779
8780 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8781 if (LABEL_P (insn)
8782 && bitmap_bit_p (far_labels, CODE_LABEL_NUMBER (insn)))
8783 {
8784 struct constant_pool *pool = s390_find_pool (pool_list, insn);
8785 if (pool)
8786 {
8787 rtx new_insn = gen_reload_base (cfun->machine->base_reg,
8788 pool->label);
8789 INSN_ADDRESSES_NEW (emit_insn_after (new_insn, insn), -1);
8790 }
8791 }
8792
8793
8794 BITMAP_FREE (far_labels);
8795
8796
8797 /* Recompute insn addresses. */
8798
8799 init_insn_lengths ();
8800 shorten_branches (get_insns ());
8801
8802 return pool_list;
8803 }
8804
8805 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
8806 After we have decided to use this list, finish implementing
8807 all changes to the current function as required. */
8808
8809 static void
8810 s390_chunkify_finish (struct constant_pool *pool_list)
8811 {
8812 struct constant_pool *curr_pool = NULL;
8813 rtx_insn *insn;
8814
8815
8816 /* Replace all literal pool references. */
8817
8818 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8819 {
8820 if (INSN_P (insn))
8821 replace_ltrel_base (&PATTERN (insn));
8822
8823 curr_pool = s390_find_pool (pool_list, insn);
8824 if (!curr_pool)
8825 continue;
8826
8827 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
8828 {
8829 rtx addr, pool_ref = NULL_RTX;
8830 find_constant_pool_ref (PATTERN (insn), &pool_ref);
8831 if (pool_ref)
8832 {
8833 if (s390_execute_label (insn))
8834 addr = s390_find_execute (curr_pool, insn);
8835 else
8836 addr = s390_find_constant (curr_pool,
8837 get_pool_constant (pool_ref),
8838 get_pool_mode (pool_ref));
8839
8840 replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr);
8841 INSN_CODE (insn) = -1;
8842 }
8843 }
8844 }
8845
8846 /* Dump out all literal pools. */
8847
8848 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
8849 s390_dump_pool (curr_pool, 0);
8850
8851 /* Free pool list. */
8852
8853 while (pool_list)
8854 {
8855 struct constant_pool *next = pool_list->next;
8856 s390_free_pool (pool_list);
8857 pool_list = next;
8858 }
8859 }
8860
8861 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
8862 We have decided we cannot use this list, so revert all changes
8863 to the current function that were done by s390_chunkify_start. */
8864
8865 static void
8866 s390_chunkify_cancel (struct constant_pool *pool_list)
8867 {
8868 struct constant_pool *curr_pool = NULL;
8869 rtx_insn *insn;
8870
8871 /* Remove all pool placeholder insns. */
8872
8873 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
8874 {
8875 /* Did we insert an extra barrier? Remove it. */
8876 rtx_insn *barrier = PREV_INSN (curr_pool->pool_insn);
8877 rtx_insn *jump = barrier? PREV_INSN (barrier) : NULL;
8878 rtx_insn *label = NEXT_INSN (curr_pool->pool_insn);
8879
8880 if (jump && JUMP_P (jump)
8881 && barrier && BARRIER_P (barrier)
8882 && label && LABEL_P (label)
8883 && GET_CODE (PATTERN (jump)) == SET
8884 && SET_DEST (PATTERN (jump)) == pc_rtx
8885 && GET_CODE (SET_SRC (PATTERN (jump))) == LABEL_REF
8886 && XEXP (SET_SRC (PATTERN (jump)), 0) == label)
8887 {
8888 remove_insn (jump);
8889 remove_insn (barrier);
8890 remove_insn (label);
8891 }
8892
8893 remove_insn (curr_pool->pool_insn);
8894 }
8895
8896 /* Remove all base register reload insns. */
8897
8898 for (insn = get_insns (); insn; )
8899 {
8900 rtx_insn *next_insn = NEXT_INSN (insn);
8901
8902 if (NONJUMP_INSN_P (insn)
8903 && GET_CODE (PATTERN (insn)) == SET
8904 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
8905 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_RELOAD_BASE)
8906 remove_insn (insn);
8907
8908 insn = next_insn;
8909 }
8910
8911 /* Free pool list. */
8912
8913 while (pool_list)
8914 {
8915 struct constant_pool *next = pool_list->next;
8916 s390_free_pool (pool_list);
8917 pool_list = next;
8918 }
8919 }
8920
8921 /* Output the constant pool entry EXP in mode MODE with alignment ALIGN. */
8922
8923 void
8924 s390_output_pool_entry (rtx exp, machine_mode mode, unsigned int align)
8925 {
8926 switch (GET_MODE_CLASS (mode))
8927 {
8928 case MODE_FLOAT:
8929 case MODE_DECIMAL_FLOAT:
8930 gcc_assert (GET_CODE (exp) == CONST_DOUBLE);
8931
8932 assemble_real (*CONST_DOUBLE_REAL_VALUE (exp), mode, align);
8933 break;
8934
8935 case MODE_INT:
8936 assemble_integer (exp, GET_MODE_SIZE (mode), align, 1);
8937 mark_symbol_refs_as_used (exp);
8938 break;
8939
8940 case MODE_VECTOR_INT:
8941 case MODE_VECTOR_FLOAT:
8942 {
8943 int i;
8944 machine_mode inner_mode;
8945 gcc_assert (GET_CODE (exp) == CONST_VECTOR);
8946
8947 inner_mode = GET_MODE_INNER (GET_MODE (exp));
8948 for (i = 0; i < XVECLEN (exp, 0); i++)
8949 s390_output_pool_entry (XVECEXP (exp, 0, i),
8950 inner_mode,
8951 i == 0
8952 ? align
8953 : GET_MODE_BITSIZE (inner_mode));
8954 }
8955 break;
8956
8957 default:
8958 gcc_unreachable ();
8959 }
8960 }
8961
8962
8963 /* Return an RTL expression representing the value of the return address
8964 for the frame COUNT steps up from the current frame. FRAME is the
8965 frame pointer of that frame. */
8966
8967 rtx
8968 s390_return_addr_rtx (int count, rtx frame ATTRIBUTE_UNUSED)
8969 {
8970 int offset;
8971 rtx addr;
8972
8973 /* Without backchain, we fail for all but the current frame. */
8974
8975 if (!TARGET_BACKCHAIN && count > 0)
8976 return NULL_RTX;
8977
8978 /* For the current frame, we need to make sure the initial
8979 value of RETURN_REGNUM is actually saved. */
8980
8981 if (count == 0)
8982 {
8983 /* On non-z architectures branch splitting could overwrite r14. */
8984 if (TARGET_CPU_ZARCH)
8985 return get_hard_reg_initial_val (Pmode, RETURN_REGNUM);
8986 else
8987 {
8988 cfun_frame_layout.save_return_addr_p = true;
8989 return gen_rtx_MEM (Pmode, return_address_pointer_rtx);
8990 }
8991 }
8992
8993 if (TARGET_PACKED_STACK)
8994 offset = -2 * UNITS_PER_LONG;
8995 else
8996 offset = RETURN_REGNUM * UNITS_PER_LONG;
8997
8998 addr = plus_constant (Pmode, frame, offset);
8999 addr = memory_address (Pmode, addr);
9000 return gen_rtx_MEM (Pmode, addr);
9001 }
9002
9003 /* Return an RTL expression representing the back chain stored in
9004 the current stack frame. */
9005
9006 rtx
9007 s390_back_chain_rtx (void)
9008 {
9009 rtx chain;
9010
9011 gcc_assert (TARGET_BACKCHAIN);
9012
9013 if (TARGET_PACKED_STACK)
9014 chain = plus_constant (Pmode, stack_pointer_rtx,
9015 STACK_POINTER_OFFSET - UNITS_PER_LONG);
9016 else
9017 chain = stack_pointer_rtx;
9018
9019 chain = gen_rtx_MEM (Pmode, chain);
9020 return chain;
9021 }
9022
9023 /* Find first call clobbered register unused in a function.
9024 This could be used as base register in a leaf function
9025 or for holding the return address before epilogue. */
9026
9027 static int
9028 find_unused_clobbered_reg (void)
9029 {
9030 int i;
9031 for (i = 0; i < 6; i++)
9032 if (!df_regs_ever_live_p (i))
9033 return i;
9034 return 0;
9035 }
9036
9037
9038 /* Helper function for s390_regs_ever_clobbered. Sets the fields in DATA for all
9039 clobbered hard regs in SETREG. */
9040
9041 static void
9042 s390_reg_clobbered_rtx (rtx setreg, const_rtx set_insn ATTRIBUTE_UNUSED, void *data)
9043 {
9044 char *regs_ever_clobbered = (char *)data;
9045 unsigned int i, regno;
9046 machine_mode mode = GET_MODE (setreg);
9047
9048 if (GET_CODE (setreg) == SUBREG)
9049 {
9050 rtx inner = SUBREG_REG (setreg);
9051 if (!GENERAL_REG_P (inner) && !FP_REG_P (inner))
9052 return;
9053 regno = subreg_regno (setreg);
9054 }
9055 else if (GENERAL_REG_P (setreg) || FP_REG_P (setreg))
9056 regno = REGNO (setreg);
9057 else
9058 return;
9059
9060 for (i = regno;
9061 i < regno + HARD_REGNO_NREGS (regno, mode);
9062 i++)
9063 regs_ever_clobbered[i] = 1;
9064 }
9065
9066 /* Walks through all basic blocks of the current function looking
9067 for clobbered hard regs using s390_reg_clobbered_rtx. The fields
9068 of the passed integer array REGS_EVER_CLOBBERED are set to one for
9069 each of those regs. */
9070
9071 static void
9072 s390_regs_ever_clobbered (char regs_ever_clobbered[])
9073 {
9074 basic_block cur_bb;
9075 rtx_insn *cur_insn;
9076 unsigned int i;
9077
9078 memset (regs_ever_clobbered, 0, 32);
9079
9080 /* For non-leaf functions we have to consider all call clobbered regs to be
9081 clobbered. */
9082 if (!crtl->is_leaf)
9083 {
9084 for (i = 0; i < 32; i++)
9085 regs_ever_clobbered[i] = call_really_used_regs[i];
9086 }
9087
9088 /* Make the "magic" eh_return registers live if necessary. For regs_ever_live
9089 this work is done by liveness analysis (mark_regs_live_at_end).
9090 Special care is needed for functions containing landing pads. Landing pads
9091 may use the eh registers, but the code which sets these registers is not
9092 contained in that function. Hence s390_regs_ever_clobbered is not able to
9093 deal with this automatically. */
9094 if (crtl->calls_eh_return || cfun->machine->has_landing_pad_p)
9095 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM ; i++)
9096 if (crtl->calls_eh_return
9097 || (cfun->machine->has_landing_pad_p
9098 && df_regs_ever_live_p (EH_RETURN_DATA_REGNO (i))))
9099 regs_ever_clobbered[EH_RETURN_DATA_REGNO (i)] = 1;
9100
9101 /* For nonlocal gotos all call-saved registers have to be saved.
9102 This flag is also set for the unwinding code in libgcc.
9103 See expand_builtin_unwind_init. For regs_ever_live this is done by
9104 reload. */
9105 if (crtl->saves_all_registers)
9106 for (i = 0; i < 32; i++)
9107 if (!call_really_used_regs[i])
9108 regs_ever_clobbered[i] = 1;
9109
9110 FOR_EACH_BB_FN (cur_bb, cfun)
9111 {
9112 FOR_BB_INSNS (cur_bb, cur_insn)
9113 {
9114 rtx pat;
9115
9116 if (!INSN_P (cur_insn))
9117 continue;
9118
9119 pat = PATTERN (cur_insn);
9120
9121 /* Ignore GPR restore insns. */
9122 if (epilogue_completed && RTX_FRAME_RELATED_P (cur_insn))
9123 {
9124 if (GET_CODE (pat) == SET
9125 && GENERAL_REG_P (SET_DEST (pat)))
9126 {
9127 /* lgdr */
9128 if (GET_MODE (SET_SRC (pat)) == DImode
9129 && FP_REG_P (SET_SRC (pat)))
9130 continue;
9131
9132 /* l / lg */
9133 if (GET_CODE (SET_SRC (pat)) == MEM)
9134 continue;
9135 }
9136
9137 /* lm / lmg */
9138 if (GET_CODE (pat) == PARALLEL
9139 && load_multiple_operation (pat, VOIDmode))
9140 continue;
9141 }
9142
9143 note_stores (pat,
9144 s390_reg_clobbered_rtx,
9145 regs_ever_clobbered);
9146 }
9147 }
9148 }
9149
9150 /* Determine the frame area which actually has to be accessed
9151 in the function epilogue. The values are stored at the
9152 given pointers AREA_BOTTOM (address of the lowest used stack
9153 address) and AREA_TOP (address of the first item which does
9154 not belong to the stack frame). */
9155
9156 static void
9157 s390_frame_area (int *area_bottom, int *area_top)
9158 {
9159 int b, t;
9160
9161 b = INT_MAX;
9162 t = INT_MIN;
9163
9164 if (cfun_frame_layout.first_restore_gpr != -1)
9165 {
9166 b = (cfun_frame_layout.gprs_offset
9167 + cfun_frame_layout.first_restore_gpr * UNITS_PER_LONG);
9168 t = b + (cfun_frame_layout.last_restore_gpr
9169 - cfun_frame_layout.first_restore_gpr + 1) * UNITS_PER_LONG;
9170 }
9171
9172 if (TARGET_64BIT && cfun_save_high_fprs_p)
9173 {
9174 b = MIN (b, cfun_frame_layout.f8_offset);
9175 t = MAX (t, (cfun_frame_layout.f8_offset
9176 + cfun_frame_layout.high_fprs * 8));
9177 }
9178
9179 if (!TARGET_64BIT)
9180 {
9181 if (cfun_fpr_save_p (FPR4_REGNUM))
9182 {
9183 b = MIN (b, cfun_frame_layout.f4_offset);
9184 t = MAX (t, cfun_frame_layout.f4_offset + 8);
9185 }
9186 if (cfun_fpr_save_p (FPR6_REGNUM))
9187 {
9188 b = MIN (b, cfun_frame_layout.f4_offset + 8);
9189 t = MAX (t, cfun_frame_layout.f4_offset + 16);
9190 }
9191 }
9192 *area_bottom = b;
9193 *area_top = t;
9194 }
9195 /* Update gpr_save_slots in the frame layout trying to make use of
9196 FPRs as GPR save slots.
9197 This is a helper routine of s390_register_info. */
9198
9199 static void
9200 s390_register_info_gprtofpr ()
9201 {
9202 int save_reg_slot = FPR0_REGNUM;
9203 int i, j;
9204
9205 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
9206 return;
9207
9208 for (i = 15; i >= 6; i--)
9209 {
9210 if (cfun_gpr_save_slot (i) == SAVE_SLOT_NONE)
9211 continue;
9212
9213 /* Advance to the next FP register which can be used as a
9214 GPR save slot. */
9215 while ((!call_really_used_regs[save_reg_slot]
9216 || df_regs_ever_live_p (save_reg_slot)
9217 || cfun_fpr_save_p (save_reg_slot))
9218 && FP_REGNO_P (save_reg_slot))
9219 save_reg_slot++;
9220 if (!FP_REGNO_P (save_reg_slot))
9221 {
9222 /* We only want to use ldgr/lgdr if we can get rid of
9223 stm/lm entirely. So undo the gpr slot allocation in
9224 case we ran out of FPR save slots. */
9225 for (j = 6; j <= 15; j++)
9226 if (FP_REGNO_P (cfun_gpr_save_slot (j)))
9227 cfun_gpr_save_slot (j) = SAVE_SLOT_STACK;
9228 break;
9229 }
9230 cfun_gpr_save_slot (i) = save_reg_slot++;
9231 }
9232 }
9233
9234 /* Set the bits in fpr_bitmap for FPRs which need to be saved due to
9235 stdarg.
9236 This is a helper routine for s390_register_info. */
9237
9238 static void
9239 s390_register_info_stdarg_fpr ()
9240 {
9241 int i;
9242 int min_fpr;
9243 int max_fpr;
9244
9245 /* Save the FP argument regs for stdarg. f0, f2 for 31 bit and
9246 f0-f4 for 64 bit. */
9247 if (!cfun->stdarg
9248 || !TARGET_HARD_FLOAT
9249 || !cfun->va_list_fpr_size
9250 || crtl->args.info.fprs >= FP_ARG_NUM_REG)
9251 return;
9252
9253 min_fpr = crtl->args.info.fprs;
9254 max_fpr = min_fpr + cfun->va_list_fpr_size - 1;
9255 if (max_fpr >= FP_ARG_NUM_REG)
9256 max_fpr = FP_ARG_NUM_REG - 1;
9257
9258 /* FPR argument regs start at f0. */
9259 min_fpr += FPR0_REGNUM;
9260 max_fpr += FPR0_REGNUM;
9261
9262 for (i = min_fpr; i <= max_fpr; i++)
9263 cfun_set_fpr_save (i);
9264 }
9265
9266 /* Reserve the GPR save slots for GPRs which need to be saved due to
9267 stdarg.
9268 This is a helper routine for s390_register_info. */
9269
9270 static void
9271 s390_register_info_stdarg_gpr ()
9272 {
9273 int i;
9274 int min_gpr;
9275 int max_gpr;
9276
9277 if (!cfun->stdarg
9278 || !cfun->va_list_gpr_size
9279 || crtl->args.info.gprs >= GP_ARG_NUM_REG)
9280 return;
9281
9282 min_gpr = crtl->args.info.gprs;
9283 max_gpr = min_gpr + cfun->va_list_gpr_size - 1;
9284 if (max_gpr >= GP_ARG_NUM_REG)
9285 max_gpr = GP_ARG_NUM_REG - 1;
9286
9287 /* GPR argument regs start at r2. */
9288 min_gpr += GPR2_REGNUM;
9289 max_gpr += GPR2_REGNUM;
9290
9291 /* If r6 was supposed to be saved into an FPR and now needs to go to
9292 the stack for vararg we have to adjust the restore range to make
9293 sure that the restore is done from stack as well. */
9294 if (FP_REGNO_P (cfun_gpr_save_slot (GPR6_REGNUM))
9295 && min_gpr <= GPR6_REGNUM
9296 && max_gpr >= GPR6_REGNUM)
9297 {
9298 if (cfun_frame_layout.first_restore_gpr == -1
9299 || cfun_frame_layout.first_restore_gpr > GPR6_REGNUM)
9300 cfun_frame_layout.first_restore_gpr = GPR6_REGNUM;
9301 if (cfun_frame_layout.last_restore_gpr == -1
9302 || cfun_frame_layout.last_restore_gpr < GPR6_REGNUM)
9303 cfun_frame_layout.last_restore_gpr = GPR6_REGNUM;
9304 }
9305
9306 if (cfun_frame_layout.first_save_gpr == -1
9307 || cfun_frame_layout.first_save_gpr > min_gpr)
9308 cfun_frame_layout.first_save_gpr = min_gpr;
9309
9310 if (cfun_frame_layout.last_save_gpr == -1
9311 || cfun_frame_layout.last_save_gpr < max_gpr)
9312 cfun_frame_layout.last_save_gpr = max_gpr;
9313
9314 for (i = min_gpr; i <= max_gpr; i++)
9315 cfun_gpr_save_slot (i) = SAVE_SLOT_STACK;
9316 }
9317
9318 /* Calculate the save and restore ranges for stm(g) and lm(g) in the
9319 prologue and epilogue. */
9320
9321 static void
9322 s390_register_info_set_ranges ()
9323 {
9324 int i, j;
9325
9326 /* Find the first and the last save slot supposed to use the stack
9327 to set the restore range.
9328 Vararg regs might be marked as save to stack but only the
9329 call-saved regs really need restoring (i.e. r6). This code
9330 assumes that the vararg regs have not yet been recorded in
9331 cfun_gpr_save_slot. */
9332 for (i = 0; i < 16 && cfun_gpr_save_slot (i) != SAVE_SLOT_STACK; i++);
9333 for (j = 15; j > i && cfun_gpr_save_slot (j) != SAVE_SLOT_STACK; j--);
9334 cfun_frame_layout.first_restore_gpr = (i == 16) ? -1 : i;
9335 cfun_frame_layout.last_restore_gpr = (i == 16) ? -1 : j;
9336 cfun_frame_layout.first_save_gpr = (i == 16) ? -1 : i;
9337 cfun_frame_layout.last_save_gpr = (i == 16) ? -1 : j;
9338 }
9339
9340 /* The GPR and FPR save slots in cfun->machine->frame_layout are set
9341 for registers which need to be saved in function prologue.
9342 This function can be used until the insns emitted for save/restore
9343 of the regs are visible in the RTL stream. */
9344
9345 static void
9346 s390_register_info ()
9347 {
9348 int i;
9349 char clobbered_regs[32];
9350
9351 gcc_assert (!epilogue_completed);
9352
9353 if (reload_completed)
9354 /* After reload we rely on our own routine to determine which
9355 registers need saving. */
9356 s390_regs_ever_clobbered (clobbered_regs);
9357 else
9358 /* During reload we use regs_ever_live as a base since reload
9359 does changes in there which we otherwise would not be aware
9360 of. */
9361 for (i = 0; i < 32; i++)
9362 clobbered_regs[i] = df_regs_ever_live_p (i);
9363
9364 for (i = 0; i < 32; i++)
9365 clobbered_regs[i] = clobbered_regs[i] && !global_regs[i];
9366
9367 /* Mark the call-saved FPRs which need to be saved.
9368 This needs to be done before checking the special GPRs since the
9369 stack pointer usage depends on whether high FPRs have to be saved
9370 or not. */
9371 cfun_frame_layout.fpr_bitmap = 0;
9372 cfun_frame_layout.high_fprs = 0;
9373 for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
9374 if (clobbered_regs[i] && !call_really_used_regs[i])
9375 {
9376 cfun_set_fpr_save (i);
9377 if (i >= FPR8_REGNUM)
9378 cfun_frame_layout.high_fprs++;
9379 }
9380
9381 /* Register 12 is used for GOT address, but also as temp in prologue
9382 for split-stack stdarg functions (unless r14 is available). */
9383 clobbered_regs[12]
9384 |= ((flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
9385 || (flag_split_stack && cfun->stdarg
9386 && (crtl->is_leaf || TARGET_TPF_PROFILING
9387 || has_hard_reg_initial_val (Pmode, RETURN_REGNUM))));
9388
9389 clobbered_regs[BASE_REGNUM]
9390 |= (cfun->machine->base_reg
9391 && REGNO (cfun->machine->base_reg) == BASE_REGNUM);
9392
9393 clobbered_regs[HARD_FRAME_POINTER_REGNUM]
9394 |= !!frame_pointer_needed;
9395
9396 /* On pre z900 machines this might take until machine dependent
9397 reorg to decide.
9398 save_return_addr_p will only be set on non-zarch machines so
9399 there is no risk that r14 goes into an FPR instead of a stack
9400 slot. */
9401 clobbered_regs[RETURN_REGNUM]
9402 |= (!crtl->is_leaf
9403 || TARGET_TPF_PROFILING
9404 || cfun->machine->split_branches_pending_p
9405 || cfun_frame_layout.save_return_addr_p
9406 || crtl->calls_eh_return);
9407
9408 clobbered_regs[STACK_POINTER_REGNUM]
9409 |= (!crtl->is_leaf
9410 || TARGET_TPF_PROFILING
9411 || cfun_save_high_fprs_p
9412 || get_frame_size () > 0
9413 || (reload_completed && cfun_frame_layout.frame_size > 0)
9414 || cfun->calls_alloca);
9415
9416 memset (cfun_frame_layout.gpr_save_slots, SAVE_SLOT_NONE, 16);
9417
9418 for (i = 6; i < 16; i++)
9419 if (clobbered_regs[i])
9420 cfun_gpr_save_slot (i) = SAVE_SLOT_STACK;
9421
9422 s390_register_info_stdarg_fpr ();
9423 s390_register_info_gprtofpr ();
9424 s390_register_info_set_ranges ();
9425 /* stdarg functions might need to save GPRs 2 to 6. This might
9426 override the GPR->FPR save decision made by
9427 s390_register_info_gprtofpr for r6 since vararg regs must go to
9428 the stack. */
9429 s390_register_info_stdarg_gpr ();
9430 }
9431
9432 /* This function is called by s390_optimize_prologue in order to get
9433 rid of unnecessary GPR save/restore instructions. The register info
9434 for the GPRs is re-computed and the ranges are re-calculated. */
9435
9436 static void
9437 s390_optimize_register_info ()
9438 {
9439 char clobbered_regs[32];
9440 int i;
9441
9442 gcc_assert (epilogue_completed);
9443 gcc_assert (!cfun->machine->split_branches_pending_p);
9444
9445 s390_regs_ever_clobbered (clobbered_regs);
9446
9447 for (i = 0; i < 32; i++)
9448 clobbered_regs[i] = clobbered_regs[i] && !global_regs[i];
9449
9450 /* There is still special treatment needed for cases invisible to
9451 s390_regs_ever_clobbered. */
9452 clobbered_regs[RETURN_REGNUM]
9453 |= (TARGET_TPF_PROFILING
9454 /* When expanding builtin_return_addr in ESA mode we do not
9455 know whether r14 will later be needed as scratch reg when
9456 doing branch splitting. So the builtin always accesses the
9457 r14 save slot and we need to stick to the save/restore
9458 decision for r14 even if it turns out that it didn't get
9459 clobbered. */
9460 || cfun_frame_layout.save_return_addr_p
9461 || crtl->calls_eh_return);
9462
9463 memset (cfun_frame_layout.gpr_save_slots, SAVE_SLOT_NONE, 6);
9464
9465 for (i = 6; i < 16; i++)
9466 if (!clobbered_regs[i])
9467 cfun_gpr_save_slot (i) = SAVE_SLOT_NONE;
9468
9469 s390_register_info_set_ranges ();
9470 s390_register_info_stdarg_gpr ();
9471 }
9472
9473 /* Fill cfun->machine with info about frame of current function. */
9474
9475 static void
9476 s390_frame_info (void)
9477 {
9478 HOST_WIDE_INT lowest_offset;
9479
9480 cfun_frame_layout.first_save_gpr_slot = cfun_frame_layout.first_save_gpr;
9481 cfun_frame_layout.last_save_gpr_slot = cfun_frame_layout.last_save_gpr;
9482
9483 /* The va_arg builtin uses a constant distance of 16 *
9484 UNITS_PER_LONG (r0-r15) to reach the FPRs from the reg_save_area
9485 pointer. So even if we are going to save the stack pointer in an
9486 FPR we need the stack space in order to keep the offsets
9487 correct. */
9488 if (cfun->stdarg && cfun_save_arg_fprs_p)
9489 {
9490 cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
9491
9492 if (cfun_frame_layout.first_save_gpr_slot == -1)
9493 cfun_frame_layout.first_save_gpr_slot = STACK_POINTER_REGNUM;
9494 }
9495
9496 cfun_frame_layout.frame_size = get_frame_size ();
9497 if (!TARGET_64BIT && cfun_frame_layout.frame_size > 0x7fff0000)
9498 fatal_error (input_location,
9499 "total size of local variables exceeds architecture limit");
9500
9501 if (!TARGET_PACKED_STACK)
9502 {
9503 /* Fixed stack layout. */
9504 cfun_frame_layout.backchain_offset = 0;
9505 cfun_frame_layout.f0_offset = 16 * UNITS_PER_LONG;
9506 cfun_frame_layout.f4_offset = cfun_frame_layout.f0_offset + 2 * 8;
9507 cfun_frame_layout.f8_offset = -cfun_frame_layout.high_fprs * 8;
9508 cfun_frame_layout.gprs_offset = (cfun_frame_layout.first_save_gpr_slot
9509 * UNITS_PER_LONG);
9510 }
9511 else if (TARGET_BACKCHAIN)
9512 {
9513 /* Kernel stack layout - packed stack, backchain, no float */
9514 gcc_assert (TARGET_SOFT_FLOAT);
9515 cfun_frame_layout.backchain_offset = (STACK_POINTER_OFFSET
9516 - UNITS_PER_LONG);
9517
9518 /* The distance between the backchain and the return address
9519 save slot must not change. So we always need a slot for the
9520 stack pointer which resides in between. */
9521 cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
9522
9523 cfun_frame_layout.gprs_offset
9524 = cfun_frame_layout.backchain_offset - cfun_gprs_save_area_size;
9525
9526 /* FPRs will not be saved. Nevertheless pick sane values to
9527 keep area calculations valid. */
9528 cfun_frame_layout.f0_offset =
9529 cfun_frame_layout.f4_offset =
9530 cfun_frame_layout.f8_offset = cfun_frame_layout.gprs_offset;
9531 }
9532 else
9533 {
9534 int num_fprs;
9535
9536 /* Packed stack layout without backchain. */
9537
9538 /* With stdarg FPRs need their dedicated slots. */
9539 num_fprs = (TARGET_64BIT && cfun->stdarg ? 2
9540 : (cfun_fpr_save_p (FPR4_REGNUM) +
9541 cfun_fpr_save_p (FPR6_REGNUM)));
9542 cfun_frame_layout.f4_offset = STACK_POINTER_OFFSET - 8 * num_fprs;
9543
9544 num_fprs = (cfun->stdarg ? 2
9545 : (cfun_fpr_save_p (FPR0_REGNUM)
9546 + cfun_fpr_save_p (FPR2_REGNUM)));
9547 cfun_frame_layout.f0_offset = cfun_frame_layout.f4_offset - 8 * num_fprs;
9548
9549 cfun_frame_layout.gprs_offset
9550 = cfun_frame_layout.f0_offset - cfun_gprs_save_area_size;
9551
9552 cfun_frame_layout.f8_offset = (cfun_frame_layout.gprs_offset
9553 - cfun_frame_layout.high_fprs * 8);
9554 }
9555
9556 if (cfun_save_high_fprs_p)
9557 cfun_frame_layout.frame_size += cfun_frame_layout.high_fprs * 8;
9558
9559 if (!crtl->is_leaf)
9560 cfun_frame_layout.frame_size += crtl->outgoing_args_size;
9561
9562 /* In the following cases we have to allocate a STACK_POINTER_OFFSET
9563 sized area at the bottom of the stack. This is required also for
9564 leaf functions. When GCC generates a local stack reference it
9565 will always add STACK_POINTER_OFFSET to all these references. */
9566 if (crtl->is_leaf
9567 && !TARGET_TPF_PROFILING
9568 && cfun_frame_layout.frame_size == 0
9569 && !cfun->calls_alloca)
9570 return;
9571
9572 /* Calculate the number of bytes we have used in our own register
9573 save area. With the packed stack layout we can re-use the
9574 remaining bytes for normal stack elements. */
9575
9576 if (TARGET_PACKED_STACK)
9577 lowest_offset = MIN (MIN (cfun_frame_layout.f0_offset,
9578 cfun_frame_layout.f4_offset),
9579 cfun_frame_layout.gprs_offset);
9580 else
9581 lowest_offset = 0;
9582
9583 if (TARGET_BACKCHAIN)
9584 lowest_offset = MIN (lowest_offset, cfun_frame_layout.backchain_offset);
9585
9586 cfun_frame_layout.frame_size += STACK_POINTER_OFFSET - lowest_offset;
9587
9588 /* If under 31 bit an odd number of gprs has to be saved we have to
9589 adjust the frame size to sustain 8 byte alignment of stack
9590 frames. */
9591 cfun_frame_layout.frame_size = ((cfun_frame_layout.frame_size +
9592 STACK_BOUNDARY / BITS_PER_UNIT - 1)
9593 & ~(STACK_BOUNDARY / BITS_PER_UNIT - 1));
9594 }
9595
9596 /* Generate frame layout. Fills in register and frame data for the current
9597 function in cfun->machine. This routine can be called multiple times;
9598 it will re-do the complete frame layout every time. */
9599
9600 static void
9601 s390_init_frame_layout (void)
9602 {
9603 HOST_WIDE_INT frame_size;
9604 int base_used;
9605
9606 /* After LRA the frame layout is supposed to be read-only and should
9607 not be re-computed. */
9608 if (reload_completed)
9609 return;
9610
9611 /* On S/390 machines, we may need to perform branch splitting, which
9612 will require both base and return address register. We have no
9613 choice but to assume we're going to need them until right at the
9614 end of the machine dependent reorg phase. */
9615 if (!TARGET_CPU_ZARCH)
9616 cfun->machine->split_branches_pending_p = true;
9617
9618 do
9619 {
9620 frame_size = cfun_frame_layout.frame_size;
9621
9622 /* Try to predict whether we'll need the base register. */
9623 base_used = cfun->machine->split_branches_pending_p
9624 || crtl->uses_const_pool
9625 || (!DISP_IN_RANGE (frame_size)
9626 && !CONST_OK_FOR_K (frame_size));
9627
9628 /* Decide which register to use as literal pool base. In small
9629 leaf functions, try to use an unused call-clobbered register
9630 as base register to avoid save/restore overhead. */
9631 if (!base_used)
9632 cfun->machine->base_reg = NULL_RTX;
9633 else
9634 {
9635 int br = 0;
9636
9637 if (crtl->is_leaf)
9638 /* Prefer r5 (most likely to be free). */
9639 for (br = 5; br >= 2 && df_regs_ever_live_p (br); br--)
9640 ;
9641 cfun->machine->base_reg =
9642 gen_rtx_REG (Pmode, (br >= 2) ? br : BASE_REGNUM);
9643 }
9644
9645 s390_register_info ();
9646 s390_frame_info ();
9647 }
9648 while (frame_size != cfun_frame_layout.frame_size);
9649 }
9650
9651 /* Remove the FPR clobbers from a tbegin insn if it can be proven that
9652 the TX is nonescaping. A transaction is considered escaping if
9653 there is at least one path from tbegin returning CC0 to the
9654 function exit block without an tend.
9655
9656 The check so far has some limitations:
9657 - only single tbegin/tend BBs are supported
9658 - the first cond jump after tbegin must separate the CC0 path from ~CC0
9659 - when CC is copied to a GPR and the CC0 check is done with the GPR
9660 this is not supported
9661 */
9662
9663 static void
9664 s390_optimize_nonescaping_tx (void)
9665 {
9666 const unsigned int CC0 = 1 << 3;
9667 basic_block tbegin_bb = NULL;
9668 basic_block tend_bb = NULL;
9669 basic_block bb;
9670 rtx_insn *insn;
9671 bool result = true;
9672 int bb_index;
9673 rtx_insn *tbegin_insn = NULL;
9674
9675 if (!cfun->machine->tbegin_p)
9676 return;
9677
9678 for (bb_index = 0; bb_index < n_basic_blocks_for_fn (cfun); bb_index++)
9679 {
9680 bb = BASIC_BLOCK_FOR_FN (cfun, bb_index);
9681
9682 if (!bb)
9683 continue;
9684
9685 FOR_BB_INSNS (bb, insn)
9686 {
9687 rtx ite, cc, pat, target;
9688 unsigned HOST_WIDE_INT mask;
9689
9690 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
9691 continue;
9692
9693 pat = PATTERN (insn);
9694
9695 if (GET_CODE (pat) == PARALLEL)
9696 pat = XVECEXP (pat, 0, 0);
9697
9698 if (GET_CODE (pat) != SET
9699 || GET_CODE (SET_SRC (pat)) != UNSPEC_VOLATILE)
9700 continue;
9701
9702 if (XINT (SET_SRC (pat), 1) == UNSPECV_TBEGIN)
9703 {
9704 rtx_insn *tmp;
9705
9706 tbegin_insn = insn;
9707
9708 /* Just return if the tbegin doesn't have clobbers. */
9709 if (GET_CODE (PATTERN (insn)) != PARALLEL)
9710 return;
9711
9712 if (tbegin_bb != NULL)
9713 return;
9714
9715 /* Find the next conditional jump. */
9716 for (tmp = NEXT_INSN (insn);
9717 tmp != NULL_RTX;
9718 tmp = NEXT_INSN (tmp))
9719 {
9720 if (reg_set_p (gen_rtx_REG (CCmode, CC_REGNUM), tmp))
9721 return;
9722 if (!JUMP_P (tmp))
9723 continue;
9724
9725 ite = SET_SRC (PATTERN (tmp));
9726 if (GET_CODE (ite) != IF_THEN_ELSE)
9727 continue;
9728
9729 cc = XEXP (XEXP (ite, 0), 0);
9730 if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc))
9731 || GET_MODE (cc) != CCRAWmode
9732 || GET_CODE (XEXP (XEXP (ite, 0), 1)) != CONST_INT)
9733 return;
9734
9735 if (bb->succs->length () != 2)
9736 return;
9737
9738 mask = INTVAL (XEXP (XEXP (ite, 0), 1));
9739 if (GET_CODE (XEXP (ite, 0)) == NE)
9740 mask ^= 0xf;
9741
9742 if (mask == CC0)
9743 target = XEXP (ite, 1);
9744 else if (mask == (CC0 ^ 0xf))
9745 target = XEXP (ite, 2);
9746 else
9747 return;
9748
9749 {
9750 edge_iterator ei;
9751 edge e1, e2;
9752
9753 ei = ei_start (bb->succs);
9754 e1 = ei_safe_edge (ei);
9755 ei_next (&ei);
9756 e2 = ei_safe_edge (ei);
9757
9758 if (e2->flags & EDGE_FALLTHRU)
9759 {
9760 e2 = e1;
9761 e1 = ei_safe_edge (ei);
9762 }
9763
9764 if (!(e1->flags & EDGE_FALLTHRU))
9765 return;
9766
9767 tbegin_bb = (target == pc_rtx) ? e1->dest : e2->dest;
9768 }
9769 if (tmp == BB_END (bb))
9770 break;
9771 }
9772 }
9773
9774 if (XINT (SET_SRC (pat), 1) == UNSPECV_TEND)
9775 {
9776 if (tend_bb != NULL)
9777 return;
9778 tend_bb = bb;
9779 }
9780 }
9781 }
9782
9783 /* Either we successfully remove the FPR clobbers here or we are not
9784 able to do anything for this TX. Both cases don't qualify for
9785 another look. */
9786 cfun->machine->tbegin_p = false;
9787
9788 if (tbegin_bb == NULL || tend_bb == NULL)
9789 return;
9790
9791 calculate_dominance_info (CDI_POST_DOMINATORS);
9792 result = dominated_by_p (CDI_POST_DOMINATORS, tbegin_bb, tend_bb);
9793 free_dominance_info (CDI_POST_DOMINATORS);
9794
9795 if (!result)
9796 return;
9797
9798 PATTERN (tbegin_insn) = gen_rtx_PARALLEL (VOIDmode,
9799 gen_rtvec (2,
9800 XVECEXP (PATTERN (tbegin_insn), 0, 0),
9801 XVECEXP (PATTERN (tbegin_insn), 0, 1)));
9802 INSN_CODE (tbegin_insn) = -1;
9803 df_insn_rescan (tbegin_insn);
9804
9805 return;
9806 }
9807
9808 /* Return true if it is legal to put a value with MODE into REGNO. */
9809
9810 bool
9811 s390_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
9812 {
9813 if (!TARGET_VX && VECTOR_NOFP_REGNO_P (regno))
9814 return false;
9815
9816 switch (REGNO_REG_CLASS (regno))
9817 {
9818 case VEC_REGS:
9819 return ((GET_MODE_CLASS (mode) == MODE_INT
9820 && s390_class_max_nregs (VEC_REGS, mode) == 1)
9821 || mode == DFmode
9822 || s390_vector_mode_supported_p (mode));
9823 break;
9824 case FP_REGS:
9825 if (TARGET_VX
9826 && ((GET_MODE_CLASS (mode) == MODE_INT
9827 && s390_class_max_nregs (FP_REGS, mode) == 1)
9828 || mode == DFmode
9829 || s390_vector_mode_supported_p (mode)))
9830 return true;
9831
9832 if (REGNO_PAIR_OK (regno, mode))
9833 {
9834 if (mode == SImode || mode == DImode)
9835 return true;
9836
9837 if (FLOAT_MODE_P (mode) && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
9838 return true;
9839 }
9840 break;
9841 case ADDR_REGS:
9842 if (FRAME_REGNO_P (regno) && mode == Pmode)
9843 return true;
9844
9845 /* fallthrough */
9846 case GENERAL_REGS:
9847 if (REGNO_PAIR_OK (regno, mode))
9848 {
9849 if (TARGET_ZARCH
9850 || (mode != TFmode && mode != TCmode && mode != TDmode))
9851 return true;
9852 }
9853 break;
9854 case CC_REGS:
9855 if (GET_MODE_CLASS (mode) == MODE_CC)
9856 return true;
9857 break;
9858 case ACCESS_REGS:
9859 if (REGNO_PAIR_OK (regno, mode))
9860 {
9861 if (mode == SImode || mode == Pmode)
9862 return true;
9863 }
9864 break;
9865 default:
9866 return false;
9867 }
9868
9869 return false;
9870 }
9871
9872 /* Return nonzero if register OLD_REG can be renamed to register NEW_REG. */
9873
9874 bool
9875 s390_hard_regno_rename_ok (unsigned int old_reg, unsigned int new_reg)
9876 {
9877 /* Once we've decided upon a register to use as base register, it must
9878 no longer be used for any other purpose. */
9879 if (cfun->machine->base_reg)
9880 if (REGNO (cfun->machine->base_reg) == old_reg
9881 || REGNO (cfun->machine->base_reg) == new_reg)
9882 return false;
9883
9884 /* Prevent regrename from using call-saved regs which haven't
9885 actually been saved. This is necessary since regrename assumes
9886 the backend save/restore decisions are based on
9887 df_regs_ever_live. Since we have our own routine we have to tell
9888 regrename manually about it. */
9889 if (GENERAL_REGNO_P (new_reg)
9890 && !call_really_used_regs[new_reg]
9891 && cfun_gpr_save_slot (new_reg) == SAVE_SLOT_NONE)
9892 return false;
9893
9894 return true;
9895 }
9896
9897 /* Return nonzero if register REGNO can be used as a scratch register
9898 in peephole2. */
9899
9900 static bool
9901 s390_hard_regno_scratch_ok (unsigned int regno)
9902 {
9903 /* See s390_hard_regno_rename_ok. */
9904 if (GENERAL_REGNO_P (regno)
9905 && !call_really_used_regs[regno]
9906 && cfun_gpr_save_slot (regno) == SAVE_SLOT_NONE)
9907 return false;
9908
9909 return true;
9910 }
9911
9912 /* Maximum number of registers to represent a value of mode MODE
9913 in a register of class RCLASS. */
9914
9915 int
9916 s390_class_max_nregs (enum reg_class rclass, machine_mode mode)
9917 {
9918 int reg_size;
9919 bool reg_pair_required_p = false;
9920
9921 switch (rclass)
9922 {
9923 case FP_REGS:
9924 case VEC_REGS:
9925 reg_size = TARGET_VX ? 16 : 8;
9926
9927 /* TF and TD modes would fit into a VR but we put them into a
9928 register pair since we do not have 128bit FP instructions on
9929 full VRs. */
9930 if (TARGET_VX
9931 && SCALAR_FLOAT_MODE_P (mode)
9932 && GET_MODE_SIZE (mode) >= 16)
9933 reg_pair_required_p = true;
9934
9935 /* Even if complex types would fit into a single FPR/VR we force
9936 them into a register pair to deal with the parts more easily.
9937 (FIXME: What about complex ints?) */
9938 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
9939 reg_pair_required_p = true;
9940 break;
9941 case ACCESS_REGS:
9942 reg_size = 4;
9943 break;
9944 default:
9945 reg_size = UNITS_PER_WORD;
9946 break;
9947 }
9948
9949 if (reg_pair_required_p)
9950 return 2 * ((GET_MODE_SIZE (mode) / 2 + reg_size - 1) / reg_size);
9951
9952 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
9953 }
9954
9955 /* Return TRUE if changing mode from FROM to TO should not be allowed
9956 for register class CLASS. */
9957
9958 int
9959 s390_cannot_change_mode_class (machine_mode from_mode,
9960 machine_mode to_mode,
9961 enum reg_class rclass)
9962 {
9963 machine_mode small_mode;
9964 machine_mode big_mode;
9965
9966 if (GET_MODE_SIZE (from_mode) == GET_MODE_SIZE (to_mode))
9967 return 0;
9968
9969 if (GET_MODE_SIZE (from_mode) < GET_MODE_SIZE (to_mode))
9970 {
9971 small_mode = from_mode;
9972 big_mode = to_mode;
9973 }
9974 else
9975 {
9976 small_mode = to_mode;
9977 big_mode = from_mode;
9978 }
9979
9980 /* Values residing in VRs are little-endian style. All modes are
9981 placed left-aligned in an VR. This means that we cannot allow
9982 switching between modes with differing sizes. Also if the vector
9983 facility is available we still place TFmode values in VR register
9984 pairs, since the only instructions we have operating on TFmodes
9985 only deal with register pairs. Therefore we have to allow DFmode
9986 subregs of TFmodes to enable the TFmode splitters. */
9987 if (reg_classes_intersect_p (VEC_REGS, rclass)
9988 && (GET_MODE_SIZE (small_mode) < 8
9989 || s390_class_max_nregs (VEC_REGS, big_mode) == 1))
9990 return 1;
9991
9992 /* Likewise for access registers, since they have only half the
9993 word size on 64-bit. */
9994 if (reg_classes_intersect_p (ACCESS_REGS, rclass))
9995 return 1;
9996
9997 return 0;
9998 }
9999
10000 /* Return true if we use LRA instead of reload pass. */
10001 static bool
10002 s390_lra_p (void)
10003 {
10004 return s390_lra_flag;
10005 }
10006
10007 /* Return true if register FROM can be eliminated via register TO. */
10008
10009 static bool
10010 s390_can_eliminate (const int from, const int to)
10011 {
10012 /* On zSeries machines, we have not marked the base register as fixed.
10013 Instead, we have an elimination rule BASE_REGNUM -> BASE_REGNUM.
10014 If a function requires the base register, we say here that this
10015 elimination cannot be performed. This will cause reload to free
10016 up the base register (as if it were fixed). On the other hand,
10017 if the current function does *not* require the base register, we
10018 say here the elimination succeeds, which in turn allows reload
10019 to allocate the base register for any other purpose. */
10020 if (from == BASE_REGNUM && to == BASE_REGNUM)
10021 {
10022 if (TARGET_CPU_ZARCH)
10023 {
10024 s390_init_frame_layout ();
10025 return cfun->machine->base_reg == NULL_RTX;
10026 }
10027
10028 return false;
10029 }
10030
10031 /* Everything else must point into the stack frame. */
10032 gcc_assert (to == STACK_POINTER_REGNUM
10033 || to == HARD_FRAME_POINTER_REGNUM);
10034
10035 gcc_assert (from == FRAME_POINTER_REGNUM
10036 || from == ARG_POINTER_REGNUM
10037 || from == RETURN_ADDRESS_POINTER_REGNUM);
10038
10039 /* Make sure we actually saved the return address. */
10040 if (from == RETURN_ADDRESS_POINTER_REGNUM)
10041 if (!crtl->calls_eh_return
10042 && !cfun->stdarg
10043 && !cfun_frame_layout.save_return_addr_p)
10044 return false;
10045
10046 return true;
10047 }
10048
10049 /* Return offset between register FROM and TO initially after prolog. */
10050
10051 HOST_WIDE_INT
10052 s390_initial_elimination_offset (int from, int to)
10053 {
10054 HOST_WIDE_INT offset;
10055
10056 /* ??? Why are we called for non-eliminable pairs? */
10057 if (!s390_can_eliminate (from, to))
10058 return 0;
10059
10060 switch (from)
10061 {
10062 case FRAME_POINTER_REGNUM:
10063 offset = (get_frame_size()
10064 + STACK_POINTER_OFFSET
10065 + crtl->outgoing_args_size);
10066 break;
10067
10068 case ARG_POINTER_REGNUM:
10069 s390_init_frame_layout ();
10070 offset = cfun_frame_layout.frame_size + STACK_POINTER_OFFSET;
10071 break;
10072
10073 case RETURN_ADDRESS_POINTER_REGNUM:
10074 s390_init_frame_layout ();
10075
10076 if (cfun_frame_layout.first_save_gpr_slot == -1)
10077 {
10078 /* If it turns out that for stdarg nothing went into the reg
10079 save area we also do not need the return address
10080 pointer. */
10081 if (cfun->stdarg && !cfun_save_arg_fprs_p)
10082 return 0;
10083
10084 gcc_unreachable ();
10085 }
10086
10087 /* In order to make the following work it is not necessary for
10088 r14 to have a save slot. It is sufficient if one other GPR
10089 got one. Since the GPRs are always stored without gaps we
10090 are able to calculate where the r14 save slot would
10091 reside. */
10092 offset = (cfun_frame_layout.frame_size + cfun_frame_layout.gprs_offset +
10093 (RETURN_REGNUM - cfun_frame_layout.first_save_gpr_slot) *
10094 UNITS_PER_LONG);
10095 break;
10096
10097 case BASE_REGNUM:
10098 offset = 0;
10099 break;
10100
10101 default:
10102 gcc_unreachable ();
10103 }
10104
10105 return offset;
10106 }
10107
10108 /* Emit insn to save fpr REGNUM at offset OFFSET relative
10109 to register BASE. Return generated insn. */
10110
10111 static rtx
10112 save_fpr (rtx base, int offset, int regnum)
10113 {
10114 rtx addr;
10115 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
10116
10117 if (regnum >= 16 && regnum <= (16 + FP_ARG_NUM_REG))
10118 set_mem_alias_set (addr, get_varargs_alias_set ());
10119 else
10120 set_mem_alias_set (addr, get_frame_alias_set ());
10121
10122 return emit_move_insn (addr, gen_rtx_REG (DFmode, regnum));
10123 }
10124
10125 /* Emit insn to restore fpr REGNUM from offset OFFSET relative
10126 to register BASE. Return generated insn. */
10127
10128 static rtx
10129 restore_fpr (rtx base, int offset, int regnum)
10130 {
10131 rtx addr;
10132 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
10133 set_mem_alias_set (addr, get_frame_alias_set ());
10134
10135 return emit_move_insn (gen_rtx_REG (DFmode, regnum), addr);
10136 }
10137
10138 /* Return true if REGNO is a global register, but not one
10139 of the special ones that need to be saved/restored in anyway. */
10140
10141 static inline bool
10142 global_not_special_regno_p (int regno)
10143 {
10144 return (global_regs[regno]
10145 /* These registers are special and need to be
10146 restored in any case. */
10147 && !(regno == STACK_POINTER_REGNUM
10148 || regno == RETURN_REGNUM
10149 || regno == BASE_REGNUM
10150 || (flag_pic && regno == (int)PIC_OFFSET_TABLE_REGNUM)));
10151 }
10152
10153 /* Generate insn to save registers FIRST to LAST into
10154 the register save area located at offset OFFSET
10155 relative to register BASE. */
10156
10157 static rtx
10158 save_gprs (rtx base, int offset, int first, int last)
10159 {
10160 rtx addr, insn, note;
10161 int i;
10162
10163 addr = plus_constant (Pmode, base, offset);
10164 addr = gen_rtx_MEM (Pmode, addr);
10165
10166 set_mem_alias_set (addr, get_frame_alias_set ());
10167
10168 /* Special-case single register. */
10169 if (first == last)
10170 {
10171 if (TARGET_64BIT)
10172 insn = gen_movdi (addr, gen_rtx_REG (Pmode, first));
10173 else
10174 insn = gen_movsi (addr, gen_rtx_REG (Pmode, first));
10175
10176 if (!global_not_special_regno_p (first))
10177 RTX_FRAME_RELATED_P (insn) = 1;
10178 return insn;
10179 }
10180
10181
10182 insn = gen_store_multiple (addr,
10183 gen_rtx_REG (Pmode, first),
10184 GEN_INT (last - first + 1));
10185
10186 if (first <= 6 && cfun->stdarg)
10187 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
10188 {
10189 rtx mem = XEXP (XVECEXP (PATTERN (insn), 0, i), 0);
10190
10191 if (first + i <= 6)
10192 set_mem_alias_set (mem, get_varargs_alias_set ());
10193 }
10194
10195 /* We need to set the FRAME_RELATED flag on all SETs
10196 inside the store-multiple pattern.
10197
10198 However, we must not emit DWARF records for registers 2..5
10199 if they are stored for use by variable arguments ...
10200
10201 ??? Unfortunately, it is not enough to simply not the
10202 FRAME_RELATED flags for those SETs, because the first SET
10203 of the PARALLEL is always treated as if it had the flag
10204 set, even if it does not. Therefore we emit a new pattern
10205 without those registers as REG_FRAME_RELATED_EXPR note. */
10206
10207 if (first >= 6 && !global_not_special_regno_p (first))
10208 {
10209 rtx pat = PATTERN (insn);
10210
10211 for (i = 0; i < XVECLEN (pat, 0); i++)
10212 if (GET_CODE (XVECEXP (pat, 0, i)) == SET
10213 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (pat,
10214 0, i)))))
10215 RTX_FRAME_RELATED_P (XVECEXP (pat, 0, i)) = 1;
10216
10217 RTX_FRAME_RELATED_P (insn) = 1;
10218 }
10219 else if (last >= 6)
10220 {
10221 int start;
10222
10223 for (start = first >= 6 ? first : 6; start <= last; start++)
10224 if (!global_not_special_regno_p (start))
10225 break;
10226
10227 if (start > last)
10228 return insn;
10229
10230 addr = plus_constant (Pmode, base,
10231 offset + (start - first) * UNITS_PER_LONG);
10232
10233 if (start == last)
10234 {
10235 if (TARGET_64BIT)
10236 note = gen_movdi (gen_rtx_MEM (Pmode, addr),
10237 gen_rtx_REG (Pmode, start));
10238 else
10239 note = gen_movsi (gen_rtx_MEM (Pmode, addr),
10240 gen_rtx_REG (Pmode, start));
10241 note = PATTERN (note);
10242
10243 add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
10244 RTX_FRAME_RELATED_P (insn) = 1;
10245
10246 return insn;
10247 }
10248
10249 note = gen_store_multiple (gen_rtx_MEM (Pmode, addr),
10250 gen_rtx_REG (Pmode, start),
10251 GEN_INT (last - start + 1));
10252 note = PATTERN (note);
10253
10254 add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
10255
10256 for (i = 0; i < XVECLEN (note, 0); i++)
10257 if (GET_CODE (XVECEXP (note, 0, i)) == SET
10258 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (note,
10259 0, i)))))
10260 RTX_FRAME_RELATED_P (XVECEXP (note, 0, i)) = 1;
10261
10262 RTX_FRAME_RELATED_P (insn) = 1;
10263 }
10264
10265 return insn;
10266 }
10267
10268 /* Generate insn to restore registers FIRST to LAST from
10269 the register save area located at offset OFFSET
10270 relative to register BASE. */
10271
10272 static rtx
10273 restore_gprs (rtx base, int offset, int first, int last)
10274 {
10275 rtx addr, insn;
10276
10277 addr = plus_constant (Pmode, base, offset);
10278 addr = gen_rtx_MEM (Pmode, addr);
10279 set_mem_alias_set (addr, get_frame_alias_set ());
10280
10281 /* Special-case single register. */
10282 if (first == last)
10283 {
10284 if (TARGET_64BIT)
10285 insn = gen_movdi (gen_rtx_REG (Pmode, first), addr);
10286 else
10287 insn = gen_movsi (gen_rtx_REG (Pmode, first), addr);
10288
10289 RTX_FRAME_RELATED_P (insn) = 1;
10290 return insn;
10291 }
10292
10293 insn = gen_load_multiple (gen_rtx_REG (Pmode, first),
10294 addr,
10295 GEN_INT (last - first + 1));
10296 RTX_FRAME_RELATED_P (insn) = 1;
10297 return insn;
10298 }
10299
10300 /* Return insn sequence to load the GOT register. */
10301
10302 static GTY(()) rtx got_symbol;
10303 rtx_insn *
10304 s390_load_got (void)
10305 {
10306 rtx_insn *insns;
10307
10308 /* We cannot use pic_offset_table_rtx here since we use this
10309 function also for non-pic if __tls_get_offset is called and in
10310 that case PIC_OFFSET_TABLE_REGNUM as well as pic_offset_table_rtx
10311 aren't usable. */
10312 rtx got_rtx = gen_rtx_REG (Pmode, 12);
10313
10314 if (!got_symbol)
10315 {
10316 got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
10317 SYMBOL_REF_FLAGS (got_symbol) = SYMBOL_FLAG_LOCAL;
10318 }
10319
10320 start_sequence ();
10321
10322 if (TARGET_CPU_ZARCH)
10323 {
10324 emit_move_insn (got_rtx, got_symbol);
10325 }
10326 else
10327 {
10328 rtx offset;
10329
10330 offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, got_symbol),
10331 UNSPEC_LTREL_OFFSET);
10332 offset = gen_rtx_CONST (Pmode, offset);
10333 offset = force_const_mem (Pmode, offset);
10334
10335 emit_move_insn (got_rtx, offset);
10336
10337 offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (offset, 0)),
10338 UNSPEC_LTREL_BASE);
10339 offset = gen_rtx_PLUS (Pmode, got_rtx, offset);
10340
10341 emit_move_insn (got_rtx, offset);
10342 }
10343
10344 insns = get_insns ();
10345 end_sequence ();
10346 return insns;
10347 }
10348
10349 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
10350 and the change to the stack pointer. */
10351
10352 static void
10353 s390_emit_stack_tie (void)
10354 {
10355 rtx mem = gen_frame_mem (BLKmode,
10356 gen_rtx_REG (Pmode, STACK_POINTER_REGNUM));
10357
10358 emit_insn (gen_stack_tie (mem));
10359 }
10360
10361 /* Copy GPRS into FPR save slots. */
10362
10363 static void
10364 s390_save_gprs_to_fprs (void)
10365 {
10366 int i;
10367
10368 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
10369 return;
10370
10371 for (i = 6; i < 16; i++)
10372 {
10373 if (FP_REGNO_P (cfun_gpr_save_slot (i)))
10374 {
10375 rtx_insn *insn =
10376 emit_move_insn (gen_rtx_REG (DImode, cfun_gpr_save_slot (i)),
10377 gen_rtx_REG (DImode, i));
10378 RTX_FRAME_RELATED_P (insn) = 1;
10379 /* This prevents dwarf2cfi from interpreting the set. Doing
10380 so it might emit def_cfa_register infos setting an FPR as
10381 new CFA. */
10382 add_reg_note (insn, REG_CFA_REGISTER, PATTERN (insn));
10383 }
10384 }
10385 }
10386
10387 /* Restore GPRs from FPR save slots. */
10388
10389 static void
10390 s390_restore_gprs_from_fprs (void)
10391 {
10392 int i;
10393
10394 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
10395 return;
10396
10397 for (i = 6; i < 16; i++)
10398 {
10399 if (FP_REGNO_P (cfun_gpr_save_slot (i)))
10400 {
10401 rtx_insn *insn =
10402 emit_move_insn (gen_rtx_REG (DImode, i),
10403 gen_rtx_REG (DImode, cfun_gpr_save_slot (i)));
10404 df_set_regs_ever_live (i, true);
10405 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, i));
10406 if (i == STACK_POINTER_REGNUM)
10407 add_reg_note (insn, REG_CFA_DEF_CFA,
10408 plus_constant (Pmode, stack_pointer_rtx,
10409 STACK_POINTER_OFFSET));
10410 RTX_FRAME_RELATED_P (insn) = 1;
10411 }
10412 }
10413 }
10414
10415
10416 /* A pass run immediately before shrink-wrapping and prologue and epilogue
10417 generation. */
10418
10419 namespace {
10420
10421 const pass_data pass_data_s390_early_mach =
10422 {
10423 RTL_PASS, /* type */
10424 "early_mach", /* name */
10425 OPTGROUP_NONE, /* optinfo_flags */
10426 TV_MACH_DEP, /* tv_id */
10427 0, /* properties_required */
10428 0, /* properties_provided */
10429 0, /* properties_destroyed */
10430 0, /* todo_flags_start */
10431 ( TODO_df_verify | TODO_df_finish ), /* todo_flags_finish */
10432 };
10433
10434 class pass_s390_early_mach : public rtl_opt_pass
10435 {
10436 public:
10437 pass_s390_early_mach (gcc::context *ctxt)
10438 : rtl_opt_pass (pass_data_s390_early_mach, ctxt)
10439 {}
10440
10441 /* opt_pass methods: */
10442 virtual unsigned int execute (function *);
10443
10444 }; // class pass_s390_early_mach
10445
10446 unsigned int
10447 pass_s390_early_mach::execute (function *fun)
10448 {
10449 rtx_insn *insn;
10450
10451 /* Try to get rid of the FPR clobbers. */
10452 s390_optimize_nonescaping_tx ();
10453
10454 /* Re-compute register info. */
10455 s390_register_info ();
10456
10457 /* If we're using a base register, ensure that it is always valid for
10458 the first non-prologue instruction. */
10459 if (fun->machine->base_reg)
10460 emit_insn_at_entry (gen_main_pool (fun->machine->base_reg));
10461
10462 /* Annotate all constant pool references to let the scheduler know
10463 they implicitly use the base register. */
10464 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
10465 if (INSN_P (insn))
10466 {
10467 annotate_constant_pool_refs (&PATTERN (insn));
10468 df_insn_rescan (insn);
10469 }
10470 return 0;
10471 }
10472
10473 } // anon namespace
10474
10475 /* Expand the prologue into a bunch of separate insns. */
10476
10477 void
10478 s390_emit_prologue (void)
10479 {
10480 rtx insn, addr;
10481 rtx temp_reg;
10482 int i;
10483 int offset;
10484 int next_fpr = 0;
10485
10486 /* Choose best register to use for temp use within prologue.
10487 TPF with profiling must avoid the register 14 - the tracing function
10488 needs the original contents of r14 to be preserved. */
10489
10490 if (!has_hard_reg_initial_val (Pmode, RETURN_REGNUM)
10491 && !crtl->is_leaf
10492 && !TARGET_TPF_PROFILING)
10493 temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
10494 else if (flag_split_stack && cfun->stdarg)
10495 temp_reg = gen_rtx_REG (Pmode, 12);
10496 else
10497 temp_reg = gen_rtx_REG (Pmode, 1);
10498
10499 s390_save_gprs_to_fprs ();
10500
10501 /* Save call saved gprs. */
10502 if (cfun_frame_layout.first_save_gpr != -1)
10503 {
10504 insn = save_gprs (stack_pointer_rtx,
10505 cfun_frame_layout.gprs_offset +
10506 UNITS_PER_LONG * (cfun_frame_layout.first_save_gpr
10507 - cfun_frame_layout.first_save_gpr_slot),
10508 cfun_frame_layout.first_save_gpr,
10509 cfun_frame_layout.last_save_gpr);
10510 emit_insn (insn);
10511 }
10512
10513 /* Dummy insn to mark literal pool slot. */
10514
10515 if (cfun->machine->base_reg)
10516 emit_insn (gen_main_pool (cfun->machine->base_reg));
10517
10518 offset = cfun_frame_layout.f0_offset;
10519
10520 /* Save f0 and f2. */
10521 for (i = FPR0_REGNUM; i <= FPR0_REGNUM + 1; i++)
10522 {
10523 if (cfun_fpr_save_p (i))
10524 {
10525 save_fpr (stack_pointer_rtx, offset, i);
10526 offset += 8;
10527 }
10528 else if (!TARGET_PACKED_STACK || cfun->stdarg)
10529 offset += 8;
10530 }
10531
10532 /* Save f4 and f6. */
10533 offset = cfun_frame_layout.f4_offset;
10534 for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
10535 {
10536 if (cfun_fpr_save_p (i))
10537 {
10538 insn = save_fpr (stack_pointer_rtx, offset, i);
10539 offset += 8;
10540
10541 /* If f4 and f6 are call clobbered they are saved due to
10542 stdargs and therefore are not frame related. */
10543 if (!call_really_used_regs[i])
10544 RTX_FRAME_RELATED_P (insn) = 1;
10545 }
10546 else if (!TARGET_PACKED_STACK || call_really_used_regs[i])
10547 offset += 8;
10548 }
10549
10550 if (TARGET_PACKED_STACK
10551 && cfun_save_high_fprs_p
10552 && cfun_frame_layout.f8_offset + cfun_frame_layout.high_fprs * 8 > 0)
10553 {
10554 offset = (cfun_frame_layout.f8_offset
10555 + (cfun_frame_layout.high_fprs - 1) * 8);
10556
10557 for (i = FPR15_REGNUM; i >= FPR8_REGNUM && offset >= 0; i--)
10558 if (cfun_fpr_save_p (i))
10559 {
10560 insn = save_fpr (stack_pointer_rtx, offset, i);
10561
10562 RTX_FRAME_RELATED_P (insn) = 1;
10563 offset -= 8;
10564 }
10565 if (offset >= cfun_frame_layout.f8_offset)
10566 next_fpr = i;
10567 }
10568
10569 if (!TARGET_PACKED_STACK)
10570 next_fpr = cfun_save_high_fprs_p ? FPR15_REGNUM : 0;
10571
10572 if (flag_stack_usage_info)
10573 current_function_static_stack_size = cfun_frame_layout.frame_size;
10574
10575 /* Decrement stack pointer. */
10576
10577 if (cfun_frame_layout.frame_size > 0)
10578 {
10579 rtx frame_off = GEN_INT (-cfun_frame_layout.frame_size);
10580 rtx real_frame_off;
10581
10582 if (s390_stack_size)
10583 {
10584 HOST_WIDE_INT stack_guard;
10585
10586 if (s390_stack_guard)
10587 stack_guard = s390_stack_guard;
10588 else
10589 {
10590 /* If no value for stack guard is provided the smallest power of 2
10591 larger than the current frame size is chosen. */
10592 stack_guard = 1;
10593 while (stack_guard < cfun_frame_layout.frame_size)
10594 stack_guard <<= 1;
10595 }
10596
10597 if (cfun_frame_layout.frame_size >= s390_stack_size)
10598 {
10599 warning (0, "frame size of function %qs is %wd"
10600 " bytes exceeding user provided stack limit of "
10601 "%d bytes. "
10602 "An unconditional trap is added.",
10603 current_function_name(), cfun_frame_layout.frame_size,
10604 s390_stack_size);
10605 emit_insn (gen_trap ());
10606 emit_barrier ();
10607 }
10608 else
10609 {
10610 /* stack_guard has to be smaller than s390_stack_size.
10611 Otherwise we would emit an AND with zero which would
10612 not match the test under mask pattern. */
10613 if (stack_guard >= s390_stack_size)
10614 {
10615 warning (0, "frame size of function %qs is %wd"
10616 " bytes which is more than half the stack size. "
10617 "The dynamic check would not be reliable. "
10618 "No check emitted for this function.",
10619 current_function_name(),
10620 cfun_frame_layout.frame_size);
10621 }
10622 else
10623 {
10624 HOST_WIDE_INT stack_check_mask = ((s390_stack_size - 1)
10625 & ~(stack_guard - 1));
10626
10627 rtx t = gen_rtx_AND (Pmode, stack_pointer_rtx,
10628 GEN_INT (stack_check_mask));
10629 if (TARGET_64BIT)
10630 emit_insn (gen_ctrapdi4 (gen_rtx_EQ (VOIDmode,
10631 t, const0_rtx),
10632 t, const0_rtx, const0_rtx));
10633 else
10634 emit_insn (gen_ctrapsi4 (gen_rtx_EQ (VOIDmode,
10635 t, const0_rtx),
10636 t, const0_rtx, const0_rtx));
10637 }
10638 }
10639 }
10640
10641 if (s390_warn_framesize > 0
10642 && cfun_frame_layout.frame_size >= s390_warn_framesize)
10643 warning (0, "frame size of %qs is %wd bytes",
10644 current_function_name (), cfun_frame_layout.frame_size);
10645
10646 if (s390_warn_dynamicstack_p && cfun->calls_alloca)
10647 warning (0, "%qs uses dynamic stack allocation", current_function_name ());
10648
10649 /* Save incoming stack pointer into temp reg. */
10650 if (TARGET_BACKCHAIN || next_fpr)
10651 insn = emit_insn (gen_move_insn (temp_reg, stack_pointer_rtx));
10652
10653 /* Subtract frame size from stack pointer. */
10654
10655 if (DISP_IN_RANGE (INTVAL (frame_off)))
10656 {
10657 insn = gen_rtx_SET (stack_pointer_rtx,
10658 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10659 frame_off));
10660 insn = emit_insn (insn);
10661 }
10662 else
10663 {
10664 if (!CONST_OK_FOR_K (INTVAL (frame_off)))
10665 frame_off = force_const_mem (Pmode, frame_off);
10666
10667 insn = emit_insn (gen_add2_insn (stack_pointer_rtx, frame_off));
10668 annotate_constant_pool_refs (&PATTERN (insn));
10669 }
10670
10671 RTX_FRAME_RELATED_P (insn) = 1;
10672 real_frame_off = GEN_INT (-cfun_frame_layout.frame_size);
10673 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10674 gen_rtx_SET (stack_pointer_rtx,
10675 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10676 real_frame_off)));
10677
10678 /* Set backchain. */
10679
10680 if (TARGET_BACKCHAIN)
10681 {
10682 if (cfun_frame_layout.backchain_offset)
10683 addr = gen_rtx_MEM (Pmode,
10684 plus_constant (Pmode, stack_pointer_rtx,
10685 cfun_frame_layout.backchain_offset));
10686 else
10687 addr = gen_rtx_MEM (Pmode, stack_pointer_rtx);
10688 set_mem_alias_set (addr, get_frame_alias_set ());
10689 insn = emit_insn (gen_move_insn (addr, temp_reg));
10690 }
10691
10692 /* If we support non-call exceptions (e.g. for Java),
10693 we need to make sure the backchain pointer is set up
10694 before any possibly trapping memory access. */
10695 if (TARGET_BACKCHAIN && cfun->can_throw_non_call_exceptions)
10696 {
10697 addr = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
10698 emit_clobber (addr);
10699 }
10700 }
10701
10702 /* Save fprs 8 - 15 (64 bit ABI). */
10703
10704 if (cfun_save_high_fprs_p && next_fpr)
10705 {
10706 /* If the stack might be accessed through a different register
10707 we have to make sure that the stack pointer decrement is not
10708 moved below the use of the stack slots. */
10709 s390_emit_stack_tie ();
10710
10711 insn = emit_insn (gen_add2_insn (temp_reg,
10712 GEN_INT (cfun_frame_layout.f8_offset)));
10713
10714 offset = 0;
10715
10716 for (i = FPR8_REGNUM; i <= next_fpr; i++)
10717 if (cfun_fpr_save_p (i))
10718 {
10719 rtx addr = plus_constant (Pmode, stack_pointer_rtx,
10720 cfun_frame_layout.frame_size
10721 + cfun_frame_layout.f8_offset
10722 + offset);
10723
10724 insn = save_fpr (temp_reg, offset, i);
10725 offset += 8;
10726 RTX_FRAME_RELATED_P (insn) = 1;
10727 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10728 gen_rtx_SET (gen_rtx_MEM (DFmode, addr),
10729 gen_rtx_REG (DFmode, i)));
10730 }
10731 }
10732
10733 /* Set frame pointer, if needed. */
10734
10735 if (frame_pointer_needed)
10736 {
10737 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
10738 RTX_FRAME_RELATED_P (insn) = 1;
10739 }
10740
10741 /* Set up got pointer, if needed. */
10742
10743 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
10744 {
10745 rtx_insn *insns = s390_load_got ();
10746
10747 for (rtx_insn *insn = insns; insn; insn = NEXT_INSN (insn))
10748 annotate_constant_pool_refs (&PATTERN (insn));
10749
10750 emit_insn (insns);
10751 }
10752
10753 if (TARGET_TPF_PROFILING)
10754 {
10755 /* Generate a BAS instruction to serve as a function
10756 entry intercept to facilitate the use of tracing
10757 algorithms located at the branch target. */
10758 emit_insn (gen_prologue_tpf ());
10759
10760 /* Emit a blockage here so that all code
10761 lies between the profiling mechanisms. */
10762 emit_insn (gen_blockage ());
10763 }
10764 }
10765
10766 /* Expand the epilogue into a bunch of separate insns. */
10767
10768 void
10769 s390_emit_epilogue (bool sibcall)
10770 {
10771 rtx frame_pointer, return_reg, cfa_restores = NULL_RTX;
10772 int area_bottom, area_top, offset = 0;
10773 int next_offset;
10774 rtvec p;
10775 int i;
10776
10777 if (TARGET_TPF_PROFILING)
10778 {
10779
10780 /* Generate a BAS instruction to serve as a function
10781 entry intercept to facilitate the use of tracing
10782 algorithms located at the branch target. */
10783
10784 /* Emit a blockage here so that all code
10785 lies between the profiling mechanisms. */
10786 emit_insn (gen_blockage ());
10787
10788 emit_insn (gen_epilogue_tpf ());
10789 }
10790
10791 /* Check whether to use frame or stack pointer for restore. */
10792
10793 frame_pointer = (frame_pointer_needed
10794 ? hard_frame_pointer_rtx : stack_pointer_rtx);
10795
10796 s390_frame_area (&area_bottom, &area_top);
10797
10798 /* Check whether we can access the register save area.
10799 If not, increment the frame pointer as required. */
10800
10801 if (area_top <= area_bottom)
10802 {
10803 /* Nothing to restore. */
10804 }
10805 else if (DISP_IN_RANGE (cfun_frame_layout.frame_size + area_bottom)
10806 && DISP_IN_RANGE (cfun_frame_layout.frame_size + area_top - 1))
10807 {
10808 /* Area is in range. */
10809 offset = cfun_frame_layout.frame_size;
10810 }
10811 else
10812 {
10813 rtx insn, frame_off, cfa;
10814
10815 offset = area_bottom < 0 ? -area_bottom : 0;
10816 frame_off = GEN_INT (cfun_frame_layout.frame_size - offset);
10817
10818 cfa = gen_rtx_SET (frame_pointer,
10819 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
10820 if (DISP_IN_RANGE (INTVAL (frame_off)))
10821 {
10822 insn = gen_rtx_SET (frame_pointer,
10823 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
10824 insn = emit_insn (insn);
10825 }
10826 else
10827 {
10828 if (!CONST_OK_FOR_K (INTVAL (frame_off)))
10829 frame_off = force_const_mem (Pmode, frame_off);
10830
10831 insn = emit_insn (gen_add2_insn (frame_pointer, frame_off));
10832 annotate_constant_pool_refs (&PATTERN (insn));
10833 }
10834 add_reg_note (insn, REG_CFA_ADJUST_CFA, cfa);
10835 RTX_FRAME_RELATED_P (insn) = 1;
10836 }
10837
10838 /* Restore call saved fprs. */
10839
10840 if (TARGET_64BIT)
10841 {
10842 if (cfun_save_high_fprs_p)
10843 {
10844 next_offset = cfun_frame_layout.f8_offset;
10845 for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
10846 {
10847 if (cfun_fpr_save_p (i))
10848 {
10849 restore_fpr (frame_pointer,
10850 offset + next_offset, i);
10851 cfa_restores
10852 = alloc_reg_note (REG_CFA_RESTORE,
10853 gen_rtx_REG (DFmode, i), cfa_restores);
10854 next_offset += 8;
10855 }
10856 }
10857 }
10858
10859 }
10860 else
10861 {
10862 next_offset = cfun_frame_layout.f4_offset;
10863 /* f4, f6 */
10864 for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
10865 {
10866 if (cfun_fpr_save_p (i))
10867 {
10868 restore_fpr (frame_pointer,
10869 offset + next_offset, i);
10870 cfa_restores
10871 = alloc_reg_note (REG_CFA_RESTORE,
10872 gen_rtx_REG (DFmode, i), cfa_restores);
10873 next_offset += 8;
10874 }
10875 else if (!TARGET_PACKED_STACK)
10876 next_offset += 8;
10877 }
10878
10879 }
10880
10881 /* Return register. */
10882
10883 return_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
10884
10885 /* Restore call saved gprs. */
10886
10887 if (cfun_frame_layout.first_restore_gpr != -1)
10888 {
10889 rtx insn, addr;
10890 int i;
10891
10892 /* Check for global register and save them
10893 to stack location from where they get restored. */
10894
10895 for (i = cfun_frame_layout.first_restore_gpr;
10896 i <= cfun_frame_layout.last_restore_gpr;
10897 i++)
10898 {
10899 if (global_not_special_regno_p (i))
10900 {
10901 addr = plus_constant (Pmode, frame_pointer,
10902 offset + cfun_frame_layout.gprs_offset
10903 + (i - cfun_frame_layout.first_save_gpr_slot)
10904 * UNITS_PER_LONG);
10905 addr = gen_rtx_MEM (Pmode, addr);
10906 set_mem_alias_set (addr, get_frame_alias_set ());
10907 emit_move_insn (addr, gen_rtx_REG (Pmode, i));
10908 }
10909 else
10910 cfa_restores
10911 = alloc_reg_note (REG_CFA_RESTORE,
10912 gen_rtx_REG (Pmode, i), cfa_restores);
10913 }
10914
10915 if (! sibcall)
10916 {
10917 /* Fetch return address from stack before load multiple,
10918 this will do good for scheduling.
10919
10920 Only do this if we already decided that r14 needs to be
10921 saved to a stack slot. (And not just because r14 happens to
10922 be in between two GPRs which need saving.) Otherwise it
10923 would be difficult to take that decision back in
10924 s390_optimize_prologue. */
10925 if (cfun_gpr_save_slot (RETURN_REGNUM) == SAVE_SLOT_STACK)
10926 {
10927 int return_regnum = find_unused_clobbered_reg();
10928 if (!return_regnum)
10929 return_regnum = 4;
10930 return_reg = gen_rtx_REG (Pmode, return_regnum);
10931
10932 addr = plus_constant (Pmode, frame_pointer,
10933 offset + cfun_frame_layout.gprs_offset
10934 + (RETURN_REGNUM
10935 - cfun_frame_layout.first_save_gpr_slot)
10936 * UNITS_PER_LONG);
10937 addr = gen_rtx_MEM (Pmode, addr);
10938 set_mem_alias_set (addr, get_frame_alias_set ());
10939 emit_move_insn (return_reg, addr);
10940
10941 /* Once we did that optimization we have to make sure
10942 s390_optimize_prologue does not try to remove the
10943 store of r14 since we will not be able to find the
10944 load issued here. */
10945 cfun_frame_layout.save_return_addr_p = true;
10946 }
10947 }
10948
10949 insn = restore_gprs (frame_pointer,
10950 offset + cfun_frame_layout.gprs_offset
10951 + (cfun_frame_layout.first_restore_gpr
10952 - cfun_frame_layout.first_save_gpr_slot)
10953 * UNITS_PER_LONG,
10954 cfun_frame_layout.first_restore_gpr,
10955 cfun_frame_layout.last_restore_gpr);
10956 insn = emit_insn (insn);
10957 REG_NOTES (insn) = cfa_restores;
10958 add_reg_note (insn, REG_CFA_DEF_CFA,
10959 plus_constant (Pmode, stack_pointer_rtx,
10960 STACK_POINTER_OFFSET));
10961 RTX_FRAME_RELATED_P (insn) = 1;
10962 }
10963
10964 s390_restore_gprs_from_fprs ();
10965
10966 if (! sibcall)
10967 {
10968
10969 /* Return to caller. */
10970
10971 p = rtvec_alloc (2);
10972
10973 RTVEC_ELT (p, 0) = ret_rtx;
10974 RTVEC_ELT (p, 1) = gen_rtx_USE (VOIDmode, return_reg);
10975 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
10976 }
10977 }
10978
10979 /* Implement TARGET_SET_UP_BY_PROLOGUE. */
10980
10981 static void
10982 s300_set_up_by_prologue (hard_reg_set_container *regs)
10983 {
10984 if (cfun->machine->base_reg
10985 && !call_really_used_regs[REGNO (cfun->machine->base_reg)])
10986 SET_HARD_REG_BIT (regs->set, REGNO (cfun->machine->base_reg));
10987 }
10988
10989 /* -fsplit-stack support. */
10990
10991 /* A SYMBOL_REF for __morestack. */
10992 static GTY(()) rtx morestack_ref;
10993
10994 /* When using -fsplit-stack, the allocation routines set a field in
10995 the TCB to the bottom of the stack plus this much space, measured
10996 in bytes. */
10997
10998 #define SPLIT_STACK_AVAILABLE 1024
10999
11000 /* Emit -fsplit-stack prologue, which goes before the regular function
11001 prologue. */
11002
11003 void
11004 s390_expand_split_stack_prologue (void)
11005 {
11006 rtx r1, guard, cc = NULL;
11007 rtx_insn *insn;
11008 /* Offset from thread pointer to __private_ss. */
11009 int psso = TARGET_64BIT ? 0x38 : 0x20;
11010 /* Pointer size in bytes. */
11011 /* Frame size and argument size - the two parameters to __morestack. */
11012 HOST_WIDE_INT frame_size = cfun_frame_layout.frame_size;
11013 /* Align argument size to 8 bytes - simplifies __morestack code. */
11014 HOST_WIDE_INT args_size = crtl->args.size >= 0
11015 ? ((crtl->args.size + 7) & ~7)
11016 : 0;
11017 /* Label to be called by __morestack. */
11018 rtx_code_label *call_done = NULL;
11019 rtx_code_label *parm_base = NULL;
11020 rtx tmp;
11021
11022 gcc_assert (flag_split_stack && reload_completed);
11023 if (!TARGET_CPU_ZARCH)
11024 {
11025 sorry ("CPUs older than z900 are not supported for -fsplit-stack");
11026 return;
11027 }
11028
11029 r1 = gen_rtx_REG (Pmode, 1);
11030
11031 /* If no stack frame will be allocated, don't do anything. */
11032 if (!frame_size)
11033 {
11034 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11035 {
11036 /* If va_start is used, just use r15. */
11037 emit_move_insn (r1,
11038 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11039 GEN_INT (STACK_POINTER_OFFSET)));
11040
11041 }
11042 return;
11043 }
11044
11045 if (morestack_ref == NULL_RTX)
11046 {
11047 morestack_ref = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11048 SYMBOL_REF_FLAGS (morestack_ref) |= (SYMBOL_FLAG_LOCAL
11049 | SYMBOL_FLAG_FUNCTION);
11050 }
11051
11052 if (CONST_OK_FOR_K (frame_size) || CONST_OK_FOR_Op (frame_size))
11053 {
11054 /* If frame_size will fit in an add instruction, do a stack space
11055 check, and only call __morestack if there's not enough space. */
11056
11057 /* Get thread pointer. r1 is the only register we can always destroy - r0
11058 could contain a static chain (and cannot be used to address memory
11059 anyway), r2-r6 can contain parameters, and r6-r15 are callee-saved. */
11060 emit_move_insn (r1, gen_rtx_REG (Pmode, TP_REGNUM));
11061 /* Aim at __private_ss. */
11062 guard = gen_rtx_MEM (Pmode, plus_constant (Pmode, r1, psso));
11063
11064 /* If less that 1kiB used, skip addition and compare directly with
11065 __private_ss. */
11066 if (frame_size > SPLIT_STACK_AVAILABLE)
11067 {
11068 emit_move_insn (r1, guard);
11069 if (TARGET_64BIT)
11070 emit_insn (gen_adddi3 (r1, r1, GEN_INT (frame_size)));
11071 else
11072 emit_insn (gen_addsi3 (r1, r1, GEN_INT (frame_size)));
11073 guard = r1;
11074 }
11075
11076 /* Compare the (maybe adjusted) guard with the stack pointer. */
11077 cc = s390_emit_compare (LT, stack_pointer_rtx, guard);
11078 }
11079
11080 call_done = gen_label_rtx ();
11081 parm_base = gen_label_rtx ();
11082
11083 /* Emit the parameter block. */
11084 tmp = gen_split_stack_data (parm_base, call_done,
11085 GEN_INT (frame_size),
11086 GEN_INT (args_size));
11087 insn = emit_insn (tmp);
11088 add_reg_note (insn, REG_LABEL_OPERAND, call_done);
11089 LABEL_NUSES (call_done)++;
11090 add_reg_note (insn, REG_LABEL_OPERAND, parm_base);
11091 LABEL_NUSES (parm_base)++;
11092
11093 /* %r1 = litbase. */
11094 insn = emit_move_insn (r1, gen_rtx_LABEL_REF (VOIDmode, parm_base));
11095 add_reg_note (insn, REG_LABEL_OPERAND, parm_base);
11096 LABEL_NUSES (parm_base)++;
11097
11098 /* Now, we need to call __morestack. It has very special calling
11099 conventions: it preserves param/return/static chain registers for
11100 calling main function body, and looks for its own parameters at %r1. */
11101
11102 if (cc != NULL)
11103 {
11104 tmp = gen_split_stack_cond_call (morestack_ref, cc, call_done);
11105
11106 insn = emit_jump_insn (tmp);
11107 JUMP_LABEL (insn) = call_done;
11108 LABEL_NUSES (call_done)++;
11109
11110 /* Mark the jump as very unlikely to be taken. */
11111 add_int_reg_note (insn, REG_BR_PROB, REG_BR_PROB_BASE / 100);
11112
11113 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11114 {
11115 /* If va_start is used, and __morestack was not called, just use
11116 r15. */
11117 emit_move_insn (r1,
11118 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11119 GEN_INT (STACK_POINTER_OFFSET)));
11120 }
11121 }
11122 else
11123 {
11124 tmp = gen_split_stack_call (morestack_ref, call_done);
11125 insn = emit_jump_insn (tmp);
11126 JUMP_LABEL (insn) = call_done;
11127 LABEL_NUSES (call_done)++;
11128 emit_barrier ();
11129 }
11130
11131 /* __morestack will call us here. */
11132
11133 emit_label (call_done);
11134 }
11135
11136 /* We may have to tell the dataflow pass that the split stack prologue
11137 is initializing a register. */
11138
11139 static void
11140 s390_live_on_entry (bitmap regs)
11141 {
11142 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11143 {
11144 gcc_assert (flag_split_stack);
11145 bitmap_set_bit (regs, 1);
11146 }
11147 }
11148
11149 /* Return true if the function can use simple_return to return outside
11150 of a shrink-wrapped region. At present shrink-wrapping is supported
11151 in all cases. */
11152
11153 bool
11154 s390_can_use_simple_return_insn (void)
11155 {
11156 return true;
11157 }
11158
11159 /* Return true if the epilogue is guaranteed to contain only a return
11160 instruction and if a direct return can therefore be used instead.
11161 One of the main advantages of using direct return instructions
11162 is that we can then use conditional returns. */
11163
11164 bool
11165 s390_can_use_return_insn (void)
11166 {
11167 int i;
11168
11169 if (!reload_completed)
11170 return false;
11171
11172 if (crtl->profile)
11173 return false;
11174
11175 if (TARGET_TPF_PROFILING)
11176 return false;
11177
11178 for (i = 0; i < 16; i++)
11179 if (cfun_gpr_save_slot (i) != SAVE_SLOT_NONE)
11180 return false;
11181
11182 /* For 31 bit this is not covered by the frame_size check below
11183 since f4, f6 are saved in the register save area without needing
11184 additional stack space. */
11185 if (!TARGET_64BIT
11186 && (cfun_fpr_save_p (FPR4_REGNUM) || cfun_fpr_save_p (FPR6_REGNUM)))
11187 return false;
11188
11189 if (cfun->machine->base_reg
11190 && !call_really_used_regs[REGNO (cfun->machine->base_reg)])
11191 return false;
11192
11193 return cfun_frame_layout.frame_size == 0;
11194 }
11195
11196 /* The VX ABI differs for vararg functions. Therefore we need the
11197 prototype of the callee to be available when passing vector type
11198 values. */
11199 static const char *
11200 s390_invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
11201 {
11202 return ((TARGET_VX_ABI
11203 && typelist == 0
11204 && VECTOR_TYPE_P (TREE_TYPE (val))
11205 && (funcdecl == NULL_TREE
11206 || (TREE_CODE (funcdecl) == FUNCTION_DECL
11207 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
11208 ? N_("Vector argument passed to unprototyped function")
11209 : NULL);
11210 }
11211
11212
11213 /* Return the size in bytes of a function argument of
11214 type TYPE and/or mode MODE. At least one of TYPE or
11215 MODE must be specified. */
11216
11217 static int
11218 s390_function_arg_size (machine_mode mode, const_tree type)
11219 {
11220 if (type)
11221 return int_size_in_bytes (type);
11222
11223 /* No type info available for some library calls ... */
11224 if (mode != BLKmode)
11225 return GET_MODE_SIZE (mode);
11226
11227 /* If we have neither type nor mode, abort */
11228 gcc_unreachable ();
11229 }
11230
11231 /* Return true if a function argument of type TYPE and mode MODE
11232 is to be passed in a vector register, if available. */
11233
11234 bool
11235 s390_function_arg_vector (machine_mode mode, const_tree type)
11236 {
11237 if (!TARGET_VX_ABI)
11238 return false;
11239
11240 if (s390_function_arg_size (mode, type) > 16)
11241 return false;
11242
11243 /* No type info available for some library calls ... */
11244 if (!type)
11245 return VECTOR_MODE_P (mode);
11246
11247 /* The ABI says that record types with a single member are treated
11248 just like that member would be. */
11249 while (TREE_CODE (type) == RECORD_TYPE)
11250 {
11251 tree field, single = NULL_TREE;
11252
11253 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
11254 {
11255 if (TREE_CODE (field) != FIELD_DECL)
11256 continue;
11257
11258 if (single == NULL_TREE)
11259 single = TREE_TYPE (field);
11260 else
11261 return false;
11262 }
11263
11264 if (single == NULL_TREE)
11265 return false;
11266 else
11267 {
11268 /* If the field declaration adds extra byte due to
11269 e.g. padding this is not accepted as vector type. */
11270 if (int_size_in_bytes (single) <= 0
11271 || int_size_in_bytes (single) != int_size_in_bytes (type))
11272 return false;
11273 type = single;
11274 }
11275 }
11276
11277 return VECTOR_TYPE_P (type);
11278 }
11279
11280 /* Return true if a function argument of type TYPE and mode MODE
11281 is to be passed in a floating-point register, if available. */
11282
11283 static bool
11284 s390_function_arg_float (machine_mode mode, const_tree type)
11285 {
11286 if (s390_function_arg_size (mode, type) > 8)
11287 return false;
11288
11289 /* Soft-float changes the ABI: no floating-point registers are used. */
11290 if (TARGET_SOFT_FLOAT)
11291 return false;
11292
11293 /* No type info available for some library calls ... */
11294 if (!type)
11295 return mode == SFmode || mode == DFmode || mode == SDmode || mode == DDmode;
11296
11297 /* The ABI says that record types with a single member are treated
11298 just like that member would be. */
11299 while (TREE_CODE (type) == RECORD_TYPE)
11300 {
11301 tree field, single = NULL_TREE;
11302
11303 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
11304 {
11305 if (TREE_CODE (field) != FIELD_DECL)
11306 continue;
11307
11308 if (single == NULL_TREE)
11309 single = TREE_TYPE (field);
11310 else
11311 return false;
11312 }
11313
11314 if (single == NULL_TREE)
11315 return false;
11316 else
11317 type = single;
11318 }
11319
11320 return TREE_CODE (type) == REAL_TYPE;
11321 }
11322
11323 /* Return true if a function argument of type TYPE and mode MODE
11324 is to be passed in an integer register, or a pair of integer
11325 registers, if available. */
11326
11327 static bool
11328 s390_function_arg_integer (machine_mode mode, const_tree type)
11329 {
11330 int size = s390_function_arg_size (mode, type);
11331 if (size > 8)
11332 return false;
11333
11334 /* No type info available for some library calls ... */
11335 if (!type)
11336 return GET_MODE_CLASS (mode) == MODE_INT
11337 || (TARGET_SOFT_FLOAT && SCALAR_FLOAT_MODE_P (mode));
11338
11339 /* We accept small integral (and similar) types. */
11340 if (INTEGRAL_TYPE_P (type)
11341 || POINTER_TYPE_P (type)
11342 || TREE_CODE (type) == NULLPTR_TYPE
11343 || TREE_CODE (type) == OFFSET_TYPE
11344 || (TARGET_SOFT_FLOAT && TREE_CODE (type) == REAL_TYPE))
11345 return true;
11346
11347 /* We also accept structs of size 1, 2, 4, 8 that are not
11348 passed in floating-point registers. */
11349 if (AGGREGATE_TYPE_P (type)
11350 && exact_log2 (size) >= 0
11351 && !s390_function_arg_float (mode, type))
11352 return true;
11353
11354 return false;
11355 }
11356
11357 /* Return 1 if a function argument of type TYPE and mode MODE
11358 is to be passed by reference. The ABI specifies that only
11359 structures of size 1, 2, 4, or 8 bytes are passed by value,
11360 all other structures (and complex numbers) are passed by
11361 reference. */
11362
11363 static bool
11364 s390_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
11365 machine_mode mode, const_tree type,
11366 bool named ATTRIBUTE_UNUSED)
11367 {
11368 int size = s390_function_arg_size (mode, type);
11369
11370 if (s390_function_arg_vector (mode, type))
11371 return false;
11372
11373 if (size > 8)
11374 return true;
11375
11376 if (type)
11377 {
11378 if (AGGREGATE_TYPE_P (type) && exact_log2 (size) < 0)
11379 return true;
11380
11381 if (TREE_CODE (type) == COMPLEX_TYPE
11382 || TREE_CODE (type) == VECTOR_TYPE)
11383 return true;
11384 }
11385
11386 return false;
11387 }
11388
11389 /* Update the data in CUM to advance over an argument of mode MODE and
11390 data type TYPE. (TYPE is null for libcalls where that information
11391 may not be available.). The boolean NAMED specifies whether the
11392 argument is a named argument (as opposed to an unnamed argument
11393 matching an ellipsis). */
11394
11395 static void
11396 s390_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
11397 const_tree type, bool named)
11398 {
11399 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
11400
11401 if (s390_function_arg_vector (mode, type))
11402 {
11403 /* We are called for unnamed vector stdarg arguments which are
11404 passed on the stack. In this case this hook does not have to
11405 do anything since stack arguments are tracked by common
11406 code. */
11407 if (!named)
11408 return;
11409 cum->vrs += 1;
11410 }
11411 else if (s390_function_arg_float (mode, type))
11412 {
11413 cum->fprs += 1;
11414 }
11415 else if (s390_function_arg_integer (mode, type))
11416 {
11417 int size = s390_function_arg_size (mode, type);
11418 cum->gprs += ((size + UNITS_PER_LONG - 1) / UNITS_PER_LONG);
11419 }
11420 else
11421 gcc_unreachable ();
11422 }
11423
11424 /* Define where to put the arguments to a function.
11425 Value is zero to push the argument on the stack,
11426 or a hard register in which to store the argument.
11427
11428 MODE is the argument's machine mode.
11429 TYPE is the data type of the argument (as a tree).
11430 This is null for libcalls where that information may
11431 not be available.
11432 CUM is a variable of type CUMULATIVE_ARGS which gives info about
11433 the preceding args and about the function being called.
11434 NAMED is nonzero if this argument is a named parameter
11435 (otherwise it is an extra parameter matching an ellipsis).
11436
11437 On S/390, we use general purpose registers 2 through 6 to
11438 pass integer, pointer, and certain structure arguments, and
11439 floating point registers 0 and 2 (0, 2, 4, and 6 on 64-bit)
11440 to pass floating point arguments. All remaining arguments
11441 are pushed to the stack. */
11442
11443 static rtx
11444 s390_function_arg (cumulative_args_t cum_v, machine_mode mode,
11445 const_tree type, bool named)
11446 {
11447 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
11448
11449 if (!named)
11450 s390_check_type_for_vector_abi (type, true, false);
11451
11452 if (s390_function_arg_vector (mode, type))
11453 {
11454 /* Vector arguments being part of the ellipsis are passed on the
11455 stack. */
11456 if (!named || (cum->vrs + 1 > VEC_ARG_NUM_REG))
11457 return NULL_RTX;
11458
11459 return gen_rtx_REG (mode, cum->vrs + FIRST_VEC_ARG_REGNO);
11460 }
11461 else if (s390_function_arg_float (mode, type))
11462 {
11463 if (cum->fprs + 1 > FP_ARG_NUM_REG)
11464 return NULL_RTX;
11465 else
11466 return gen_rtx_REG (mode, cum->fprs + 16);
11467 }
11468 else if (s390_function_arg_integer (mode, type))
11469 {
11470 int size = s390_function_arg_size (mode, type);
11471 int n_gprs = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
11472
11473 if (cum->gprs + n_gprs > GP_ARG_NUM_REG)
11474 return NULL_RTX;
11475 else if (n_gprs == 1 || UNITS_PER_WORD == UNITS_PER_LONG)
11476 return gen_rtx_REG (mode, cum->gprs + 2);
11477 else if (n_gprs == 2)
11478 {
11479 rtvec p = rtvec_alloc (2);
11480
11481 RTVEC_ELT (p, 0)
11482 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 2),
11483 const0_rtx);
11484 RTVEC_ELT (p, 1)
11485 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 3),
11486 GEN_INT (4));
11487
11488 return gen_rtx_PARALLEL (mode, p);
11489 }
11490 }
11491
11492 /* After the real arguments, expand_call calls us once again
11493 with a void_type_node type. Whatever we return here is
11494 passed as operand 2 to the call expanders.
11495
11496 We don't need this feature ... */
11497 else if (type == void_type_node)
11498 return const0_rtx;
11499
11500 gcc_unreachable ();
11501 }
11502
11503 /* Return true if return values of type TYPE should be returned
11504 in a memory buffer whose address is passed by the caller as
11505 hidden first argument. */
11506
11507 static bool
11508 s390_return_in_memory (const_tree type, const_tree fundecl ATTRIBUTE_UNUSED)
11509 {
11510 /* We accept small integral (and similar) types. */
11511 if (INTEGRAL_TYPE_P (type)
11512 || POINTER_TYPE_P (type)
11513 || TREE_CODE (type) == OFFSET_TYPE
11514 || TREE_CODE (type) == REAL_TYPE)
11515 return int_size_in_bytes (type) > 8;
11516
11517 /* vector types which fit into a VR. */
11518 if (TARGET_VX_ABI
11519 && VECTOR_TYPE_P (type)
11520 && int_size_in_bytes (type) <= 16)
11521 return false;
11522
11523 /* Aggregates and similar constructs are always returned
11524 in memory. */
11525 if (AGGREGATE_TYPE_P (type)
11526 || TREE_CODE (type) == COMPLEX_TYPE
11527 || VECTOR_TYPE_P (type))
11528 return true;
11529
11530 /* ??? We get called on all sorts of random stuff from
11531 aggregate_value_p. We can't abort, but it's not clear
11532 what's safe to return. Pretend it's a struct I guess. */
11533 return true;
11534 }
11535
11536 /* Function arguments and return values are promoted to word size. */
11537
11538 static machine_mode
11539 s390_promote_function_mode (const_tree type, machine_mode mode,
11540 int *punsignedp,
11541 const_tree fntype ATTRIBUTE_UNUSED,
11542 int for_return ATTRIBUTE_UNUSED)
11543 {
11544 if (INTEGRAL_MODE_P (mode)
11545 && GET_MODE_SIZE (mode) < UNITS_PER_LONG)
11546 {
11547 if (type != NULL_TREE && POINTER_TYPE_P (type))
11548 *punsignedp = POINTERS_EXTEND_UNSIGNED;
11549 return Pmode;
11550 }
11551
11552 return mode;
11553 }
11554
11555 /* Define where to return a (scalar) value of type RET_TYPE.
11556 If RET_TYPE is null, define where to return a (scalar)
11557 value of mode MODE from a libcall. */
11558
11559 static rtx
11560 s390_function_and_libcall_value (machine_mode mode,
11561 const_tree ret_type,
11562 const_tree fntype_or_decl,
11563 bool outgoing ATTRIBUTE_UNUSED)
11564 {
11565 /* For vector return types it is important to use the RET_TYPE
11566 argument whenever available since the middle-end might have
11567 changed the mode to a scalar mode. */
11568 bool vector_ret_type_p = ((ret_type && VECTOR_TYPE_P (ret_type))
11569 || (!ret_type && VECTOR_MODE_P (mode)));
11570
11571 /* For normal functions perform the promotion as
11572 promote_function_mode would do. */
11573 if (ret_type)
11574 {
11575 int unsignedp = TYPE_UNSIGNED (ret_type);
11576 mode = promote_function_mode (ret_type, mode, &unsignedp,
11577 fntype_or_decl, 1);
11578 }
11579
11580 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
11581 || SCALAR_FLOAT_MODE_P (mode)
11582 || (TARGET_VX_ABI && vector_ret_type_p));
11583 gcc_assert (GET_MODE_SIZE (mode) <= (TARGET_VX_ABI ? 16 : 8));
11584
11585 if (TARGET_VX_ABI && vector_ret_type_p)
11586 return gen_rtx_REG (mode, FIRST_VEC_ARG_REGNO);
11587 else if (TARGET_HARD_FLOAT && SCALAR_FLOAT_MODE_P (mode))
11588 return gen_rtx_REG (mode, 16);
11589 else if (GET_MODE_SIZE (mode) <= UNITS_PER_LONG
11590 || UNITS_PER_LONG == UNITS_PER_WORD)
11591 return gen_rtx_REG (mode, 2);
11592 else if (GET_MODE_SIZE (mode) == 2 * UNITS_PER_LONG)
11593 {
11594 /* This case is triggered when returning a 64 bit value with
11595 -m31 -mzarch. Although the value would fit into a single
11596 register it has to be forced into a 32 bit register pair in
11597 order to match the ABI. */
11598 rtvec p = rtvec_alloc (2);
11599
11600 RTVEC_ELT (p, 0)
11601 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 2), const0_rtx);
11602 RTVEC_ELT (p, 1)
11603 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 3), GEN_INT (4));
11604
11605 return gen_rtx_PARALLEL (mode, p);
11606 }
11607
11608 gcc_unreachable ();
11609 }
11610
11611 /* Define where to return a scalar return value of type RET_TYPE. */
11612
11613 static rtx
11614 s390_function_value (const_tree ret_type, const_tree fn_decl_or_type,
11615 bool outgoing)
11616 {
11617 return s390_function_and_libcall_value (TYPE_MODE (ret_type), ret_type,
11618 fn_decl_or_type, outgoing);
11619 }
11620
11621 /* Define where to return a scalar libcall return value of mode
11622 MODE. */
11623
11624 static rtx
11625 s390_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
11626 {
11627 return s390_function_and_libcall_value (mode, NULL_TREE,
11628 NULL_TREE, true);
11629 }
11630
11631
11632 /* Create and return the va_list datatype.
11633
11634 On S/390, va_list is an array type equivalent to
11635
11636 typedef struct __va_list_tag
11637 {
11638 long __gpr;
11639 long __fpr;
11640 void *__overflow_arg_area;
11641 void *__reg_save_area;
11642 } va_list[1];
11643
11644 where __gpr and __fpr hold the number of general purpose
11645 or floating point arguments used up to now, respectively,
11646 __overflow_arg_area points to the stack location of the
11647 next argument passed on the stack, and __reg_save_area
11648 always points to the start of the register area in the
11649 call frame of the current function. The function prologue
11650 saves all registers used for argument passing into this
11651 area if the function uses variable arguments. */
11652
11653 static tree
11654 s390_build_builtin_va_list (void)
11655 {
11656 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
11657
11658 record = lang_hooks.types.make_type (RECORD_TYPE);
11659
11660 type_decl =
11661 build_decl (BUILTINS_LOCATION,
11662 TYPE_DECL, get_identifier ("__va_list_tag"), record);
11663
11664 f_gpr = build_decl (BUILTINS_LOCATION,
11665 FIELD_DECL, get_identifier ("__gpr"),
11666 long_integer_type_node);
11667 f_fpr = build_decl (BUILTINS_LOCATION,
11668 FIELD_DECL, get_identifier ("__fpr"),
11669 long_integer_type_node);
11670 f_ovf = build_decl (BUILTINS_LOCATION,
11671 FIELD_DECL, get_identifier ("__overflow_arg_area"),
11672 ptr_type_node);
11673 f_sav = build_decl (BUILTINS_LOCATION,
11674 FIELD_DECL, get_identifier ("__reg_save_area"),
11675 ptr_type_node);
11676
11677 va_list_gpr_counter_field = f_gpr;
11678 va_list_fpr_counter_field = f_fpr;
11679
11680 DECL_FIELD_CONTEXT (f_gpr) = record;
11681 DECL_FIELD_CONTEXT (f_fpr) = record;
11682 DECL_FIELD_CONTEXT (f_ovf) = record;
11683 DECL_FIELD_CONTEXT (f_sav) = record;
11684
11685 TYPE_STUB_DECL (record) = type_decl;
11686 TYPE_NAME (record) = type_decl;
11687 TYPE_FIELDS (record) = f_gpr;
11688 DECL_CHAIN (f_gpr) = f_fpr;
11689 DECL_CHAIN (f_fpr) = f_ovf;
11690 DECL_CHAIN (f_ovf) = f_sav;
11691
11692 layout_type (record);
11693
11694 /* The correct type is an array type of one element. */
11695 return build_array_type (record, build_index_type (size_zero_node));
11696 }
11697
11698 /* Implement va_start by filling the va_list structure VALIST.
11699 STDARG_P is always true, and ignored.
11700 NEXTARG points to the first anonymous stack argument.
11701
11702 The following global variables are used to initialize
11703 the va_list structure:
11704
11705 crtl->args.info:
11706 holds number of gprs and fprs used for named arguments.
11707 crtl->args.arg_offset_rtx:
11708 holds the offset of the first anonymous stack argument
11709 (relative to the virtual arg pointer). */
11710
11711 static void
11712 s390_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
11713 {
11714 HOST_WIDE_INT n_gpr, n_fpr;
11715 int off;
11716 tree f_gpr, f_fpr, f_ovf, f_sav;
11717 tree gpr, fpr, ovf, sav, t;
11718
11719 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
11720 f_fpr = DECL_CHAIN (f_gpr);
11721 f_ovf = DECL_CHAIN (f_fpr);
11722 f_sav = DECL_CHAIN (f_ovf);
11723
11724 valist = build_simple_mem_ref (valist);
11725 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
11726 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
11727 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
11728 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
11729
11730 /* Count number of gp and fp argument registers used. */
11731
11732 n_gpr = crtl->args.info.gprs;
11733 n_fpr = crtl->args.info.fprs;
11734
11735 if (cfun->va_list_gpr_size)
11736 {
11737 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
11738 build_int_cst (NULL_TREE, n_gpr));
11739 TREE_SIDE_EFFECTS (t) = 1;
11740 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11741 }
11742
11743 if (cfun->va_list_fpr_size)
11744 {
11745 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
11746 build_int_cst (NULL_TREE, n_fpr));
11747 TREE_SIDE_EFFECTS (t) = 1;
11748 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11749 }
11750
11751 if (flag_split_stack
11752 && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl))
11753 == NULL)
11754 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
11755 {
11756 rtx reg;
11757 rtx_insn *seq;
11758
11759 reg = gen_reg_rtx (Pmode);
11760 cfun->machine->split_stack_varargs_pointer = reg;
11761
11762 start_sequence ();
11763 emit_move_insn (reg, gen_rtx_REG (Pmode, 1));
11764 seq = get_insns ();
11765 end_sequence ();
11766
11767 push_topmost_sequence ();
11768 emit_insn_after (seq, entry_of_function ());
11769 pop_topmost_sequence ();
11770 }
11771
11772 /* Find the overflow area.
11773 FIXME: This currently is too pessimistic when the vector ABI is
11774 enabled. In that case we *always* set up the overflow area
11775 pointer. */
11776 if (n_gpr + cfun->va_list_gpr_size > GP_ARG_NUM_REG
11777 || n_fpr + cfun->va_list_fpr_size > FP_ARG_NUM_REG
11778 || TARGET_VX_ABI)
11779 {
11780 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
11781 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
11782 else
11783 t = make_tree (TREE_TYPE (ovf), cfun->machine->split_stack_varargs_pointer);
11784
11785 off = INTVAL (crtl->args.arg_offset_rtx);
11786 off = off < 0 ? 0 : off;
11787 if (TARGET_DEBUG_ARG)
11788 fprintf (stderr, "va_start: n_gpr = %d, n_fpr = %d off %d\n",
11789 (int)n_gpr, (int)n_fpr, off);
11790
11791 t = fold_build_pointer_plus_hwi (t, off);
11792
11793 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
11794 TREE_SIDE_EFFECTS (t) = 1;
11795 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11796 }
11797
11798 /* Find the register save area. */
11799 if ((cfun->va_list_gpr_size && n_gpr < GP_ARG_NUM_REG)
11800 || (cfun->va_list_fpr_size && n_fpr < FP_ARG_NUM_REG))
11801 {
11802 t = make_tree (TREE_TYPE (sav), return_address_pointer_rtx);
11803 t = fold_build_pointer_plus_hwi (t, -RETURN_REGNUM * UNITS_PER_LONG);
11804
11805 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
11806 TREE_SIDE_EFFECTS (t) = 1;
11807 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11808 }
11809 }
11810
11811 /* Implement va_arg by updating the va_list structure
11812 VALIST as required to retrieve an argument of type
11813 TYPE, and returning that argument.
11814
11815 Generates code equivalent to:
11816
11817 if (integral value) {
11818 if (size <= 4 && args.gpr < 5 ||
11819 size > 4 && args.gpr < 4 )
11820 ret = args.reg_save_area[args.gpr+8]
11821 else
11822 ret = *args.overflow_arg_area++;
11823 } else if (vector value) {
11824 ret = *args.overflow_arg_area;
11825 args.overflow_arg_area += size / 8;
11826 } else if (float value) {
11827 if (args.fgpr < 2)
11828 ret = args.reg_save_area[args.fpr+64]
11829 else
11830 ret = *args.overflow_arg_area++;
11831 } else if (aggregate value) {
11832 if (args.gpr < 5)
11833 ret = *args.reg_save_area[args.gpr]
11834 else
11835 ret = **args.overflow_arg_area++;
11836 } */
11837
11838 static tree
11839 s390_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
11840 gimple_seq *post_p ATTRIBUTE_UNUSED)
11841 {
11842 tree f_gpr, f_fpr, f_ovf, f_sav;
11843 tree gpr, fpr, ovf, sav, reg, t, u;
11844 int indirect_p, size, n_reg, sav_ofs, sav_scale, max_reg;
11845 tree lab_false, lab_over;
11846 tree addr = create_tmp_var (ptr_type_node, "addr");
11847 bool left_align_p; /* How a value < UNITS_PER_LONG is aligned within
11848 a stack slot. */
11849
11850 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
11851 f_fpr = DECL_CHAIN (f_gpr);
11852 f_ovf = DECL_CHAIN (f_fpr);
11853 f_sav = DECL_CHAIN (f_ovf);
11854
11855 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
11856 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
11857 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
11858
11859 /* The tree for args* cannot be shared between gpr/fpr and ovf since
11860 both appear on a lhs. */
11861 valist = unshare_expr (valist);
11862 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
11863
11864 size = int_size_in_bytes (type);
11865
11866 s390_check_type_for_vector_abi (type, true, false);
11867
11868 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
11869 {
11870 if (TARGET_DEBUG_ARG)
11871 {
11872 fprintf (stderr, "va_arg: aggregate type");
11873 debug_tree (type);
11874 }
11875
11876 /* Aggregates are passed by reference. */
11877 indirect_p = 1;
11878 reg = gpr;
11879 n_reg = 1;
11880
11881 /* kernel stack layout on 31 bit: It is assumed here that no padding
11882 will be added by s390_frame_info because for va_args always an even
11883 number of gprs has to be saved r15-r2 = 14 regs. */
11884 sav_ofs = 2 * UNITS_PER_LONG;
11885 sav_scale = UNITS_PER_LONG;
11886 size = UNITS_PER_LONG;
11887 max_reg = GP_ARG_NUM_REG - n_reg;
11888 left_align_p = false;
11889 }
11890 else if (s390_function_arg_vector (TYPE_MODE (type), type))
11891 {
11892 if (TARGET_DEBUG_ARG)
11893 {
11894 fprintf (stderr, "va_arg: vector type");
11895 debug_tree (type);
11896 }
11897
11898 indirect_p = 0;
11899 reg = NULL_TREE;
11900 n_reg = 0;
11901 sav_ofs = 0;
11902 sav_scale = 8;
11903 max_reg = 0;
11904 left_align_p = true;
11905 }
11906 else if (s390_function_arg_float (TYPE_MODE (type), type))
11907 {
11908 if (TARGET_DEBUG_ARG)
11909 {
11910 fprintf (stderr, "va_arg: float type");
11911 debug_tree (type);
11912 }
11913
11914 /* FP args go in FP registers, if present. */
11915 indirect_p = 0;
11916 reg = fpr;
11917 n_reg = 1;
11918 sav_ofs = 16 * UNITS_PER_LONG;
11919 sav_scale = 8;
11920 max_reg = FP_ARG_NUM_REG - n_reg;
11921 left_align_p = false;
11922 }
11923 else
11924 {
11925 if (TARGET_DEBUG_ARG)
11926 {
11927 fprintf (stderr, "va_arg: other type");
11928 debug_tree (type);
11929 }
11930
11931 /* Otherwise into GP registers. */
11932 indirect_p = 0;
11933 reg = gpr;
11934 n_reg = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
11935
11936 /* kernel stack layout on 31 bit: It is assumed here that no padding
11937 will be added by s390_frame_info because for va_args always an even
11938 number of gprs has to be saved r15-r2 = 14 regs. */
11939 sav_ofs = 2 * UNITS_PER_LONG;
11940
11941 if (size < UNITS_PER_LONG)
11942 sav_ofs += UNITS_PER_LONG - size;
11943
11944 sav_scale = UNITS_PER_LONG;
11945 max_reg = GP_ARG_NUM_REG - n_reg;
11946 left_align_p = false;
11947 }
11948
11949 /* Pull the value out of the saved registers ... */
11950
11951 if (reg != NULL_TREE)
11952 {
11953 /*
11954 if (reg > ((typeof (reg))max_reg))
11955 goto lab_false;
11956
11957 addr = sav + sav_ofs + reg * save_scale;
11958
11959 goto lab_over;
11960
11961 lab_false:
11962 */
11963
11964 lab_false = create_artificial_label (UNKNOWN_LOCATION);
11965 lab_over = create_artificial_label (UNKNOWN_LOCATION);
11966
11967 t = fold_convert (TREE_TYPE (reg), size_int (max_reg));
11968 t = build2 (GT_EXPR, boolean_type_node, reg, t);
11969 u = build1 (GOTO_EXPR, void_type_node, lab_false);
11970 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
11971 gimplify_and_add (t, pre_p);
11972
11973 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
11974 u = build2 (MULT_EXPR, TREE_TYPE (reg), reg,
11975 fold_convert (TREE_TYPE (reg), size_int (sav_scale)));
11976 t = fold_build_pointer_plus (t, u);
11977
11978 gimplify_assign (addr, t, pre_p);
11979
11980 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
11981
11982 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
11983 }
11984
11985 /* ... Otherwise out of the overflow area. */
11986
11987 t = ovf;
11988 if (size < UNITS_PER_LONG && !left_align_p)
11989 t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG - size);
11990
11991 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
11992
11993 gimplify_assign (addr, t, pre_p);
11994
11995 if (size < UNITS_PER_LONG && left_align_p)
11996 t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG);
11997 else
11998 t = fold_build_pointer_plus_hwi (t, size);
11999
12000 gimplify_assign (ovf, t, pre_p);
12001
12002 if (reg != NULL_TREE)
12003 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
12004
12005
12006 /* Increment register save count. */
12007
12008 if (n_reg > 0)
12009 {
12010 u = build2 (PREINCREMENT_EXPR, TREE_TYPE (reg), reg,
12011 fold_convert (TREE_TYPE (reg), size_int (n_reg)));
12012 gimplify_and_add (u, pre_p);
12013 }
12014
12015 if (indirect_p)
12016 {
12017 t = build_pointer_type_for_mode (build_pointer_type (type),
12018 ptr_mode, true);
12019 addr = fold_convert (t, addr);
12020 addr = build_va_arg_indirect_ref (addr);
12021 }
12022 else
12023 {
12024 t = build_pointer_type_for_mode (type, ptr_mode, true);
12025 addr = fold_convert (t, addr);
12026 }
12027
12028 return build_va_arg_indirect_ref (addr);
12029 }
12030
12031 /* Emit rtl for the tbegin or tbegin_retry (RETRY != NULL_RTX)
12032 expanders.
12033 DEST - Register location where CC will be stored.
12034 TDB - Pointer to a 256 byte area where to store the transaction.
12035 diagnostic block. NULL if TDB is not needed.
12036 RETRY - Retry count value. If non-NULL a retry loop for CC2
12037 is emitted
12038 CLOBBER_FPRS_P - If true clobbers for all FPRs are emitted as part
12039 of the tbegin instruction pattern. */
12040
12041 void
12042 s390_expand_tbegin (rtx dest, rtx tdb, rtx retry, bool clobber_fprs_p)
12043 {
12044 rtx retry_plus_two = gen_reg_rtx (SImode);
12045 rtx retry_reg = gen_reg_rtx (SImode);
12046 rtx_code_label *retry_label = NULL;
12047
12048 if (retry != NULL_RTX)
12049 {
12050 emit_move_insn (retry_reg, retry);
12051 emit_insn (gen_addsi3 (retry_plus_two, retry_reg, const2_rtx));
12052 emit_insn (gen_addsi3 (retry_reg, retry_reg, const1_rtx));
12053 retry_label = gen_label_rtx ();
12054 emit_label (retry_label);
12055 }
12056
12057 if (clobber_fprs_p)
12058 {
12059 if (TARGET_VX)
12060 emit_insn (gen_tbegin_1_z13 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
12061 tdb));
12062 else
12063 emit_insn (gen_tbegin_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
12064 tdb));
12065 }
12066 else
12067 emit_insn (gen_tbegin_nofloat_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
12068 tdb));
12069
12070 emit_move_insn (dest, gen_rtx_UNSPEC (SImode,
12071 gen_rtvec (1, gen_rtx_REG (CCRAWmode,
12072 CC_REGNUM)),
12073 UNSPEC_CC_TO_INT));
12074 if (retry != NULL_RTX)
12075 {
12076 const int CC0 = 1 << 3;
12077 const int CC1 = 1 << 2;
12078 const int CC3 = 1 << 0;
12079 rtx jump;
12080 rtx count = gen_reg_rtx (SImode);
12081 rtx_code_label *leave_label = gen_label_rtx ();
12082
12083 /* Exit for success and permanent failures. */
12084 jump = s390_emit_jump (leave_label,
12085 gen_rtx_EQ (VOIDmode,
12086 gen_rtx_REG (CCRAWmode, CC_REGNUM),
12087 gen_rtx_CONST_INT (VOIDmode, CC0 | CC1 | CC3)));
12088 LABEL_NUSES (leave_label) = 1;
12089
12090 /* CC2 - transient failure. Perform retry with ppa. */
12091 emit_move_insn (count, retry_plus_two);
12092 emit_insn (gen_subsi3 (count, count, retry_reg));
12093 emit_insn (gen_tx_assist (count));
12094 jump = emit_jump_insn (gen_doloop_si64 (retry_label,
12095 retry_reg,
12096 retry_reg));
12097 JUMP_LABEL (jump) = retry_label;
12098 LABEL_NUSES (retry_label) = 1;
12099 emit_label (leave_label);
12100 }
12101 }
12102
12103
12104 /* Return the decl for the target specific builtin with the function
12105 code FCODE. */
12106
12107 static tree
12108 s390_builtin_decl (unsigned fcode, bool initialized_p ATTRIBUTE_UNUSED)
12109 {
12110 if (fcode >= S390_BUILTIN_MAX)
12111 return error_mark_node;
12112
12113 return s390_builtin_decls[fcode];
12114 }
12115
12116 /* We call mcount before the function prologue. So a profiled leaf
12117 function should stay a leaf function. */
12118
12119 static bool
12120 s390_keep_leaf_when_profiled ()
12121 {
12122 return true;
12123 }
12124
12125 /* Output assembly code for the trampoline template to
12126 stdio stream FILE.
12127
12128 On S/390, we use gpr 1 internally in the trampoline code;
12129 gpr 0 is used to hold the static chain. */
12130
12131 static void
12132 s390_asm_trampoline_template (FILE *file)
12133 {
12134 rtx op[2];
12135 op[0] = gen_rtx_REG (Pmode, 0);
12136 op[1] = gen_rtx_REG (Pmode, 1);
12137
12138 if (TARGET_64BIT)
12139 {
12140 output_asm_insn ("basr\t%1,0", op); /* 2 byte */
12141 output_asm_insn ("lmg\t%0,%1,14(%1)", op); /* 6 byte */
12142 output_asm_insn ("br\t%1", op); /* 2 byte */
12143 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 10));
12144 }
12145 else
12146 {
12147 output_asm_insn ("basr\t%1,0", op); /* 2 byte */
12148 output_asm_insn ("lm\t%0,%1,6(%1)", op); /* 4 byte */
12149 output_asm_insn ("br\t%1", op); /* 2 byte */
12150 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 8));
12151 }
12152 }
12153
12154 /* Emit RTL insns to initialize the variable parts of a trampoline.
12155 FNADDR is an RTX for the address of the function's pure code.
12156 CXT is an RTX for the static chain value for the function. */
12157
12158 static void
12159 s390_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
12160 {
12161 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
12162 rtx mem;
12163
12164 emit_block_move (m_tramp, assemble_trampoline_template (),
12165 GEN_INT (2 * UNITS_PER_LONG), BLOCK_OP_NORMAL);
12166
12167 mem = adjust_address (m_tramp, Pmode, 2 * UNITS_PER_LONG);
12168 emit_move_insn (mem, cxt);
12169 mem = adjust_address (m_tramp, Pmode, 3 * UNITS_PER_LONG);
12170 emit_move_insn (mem, fnaddr);
12171 }
12172
12173 /* Output assembler code to FILE to increment profiler label # LABELNO
12174 for profiling a function entry. */
12175
12176 void
12177 s390_function_profiler (FILE *file, int labelno)
12178 {
12179 rtx op[7];
12180
12181 char label[128];
12182 ASM_GENERATE_INTERNAL_LABEL (label, "LP", labelno);
12183
12184 fprintf (file, "# function profiler \n");
12185
12186 op[0] = gen_rtx_REG (Pmode, RETURN_REGNUM);
12187 op[1] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
12188 op[1] = gen_rtx_MEM (Pmode, plus_constant (Pmode, op[1], UNITS_PER_LONG));
12189
12190 op[2] = gen_rtx_REG (Pmode, 1);
12191 op[3] = gen_rtx_SYMBOL_REF (Pmode, label);
12192 SYMBOL_REF_FLAGS (op[3]) = SYMBOL_FLAG_LOCAL;
12193
12194 op[4] = gen_rtx_SYMBOL_REF (Pmode, "_mcount");
12195 if (flag_pic)
12196 {
12197 op[4] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[4]), UNSPEC_PLT);
12198 op[4] = gen_rtx_CONST (Pmode, op[4]);
12199 }
12200
12201 if (TARGET_64BIT)
12202 {
12203 output_asm_insn ("stg\t%0,%1", op);
12204 output_asm_insn ("larl\t%2,%3", op);
12205 output_asm_insn ("brasl\t%0,%4", op);
12206 output_asm_insn ("lg\t%0,%1", op);
12207 }
12208 else if (!flag_pic)
12209 {
12210 op[6] = gen_label_rtx ();
12211
12212 output_asm_insn ("st\t%0,%1", op);
12213 output_asm_insn ("bras\t%2,%l6", op);
12214 output_asm_insn (".long\t%4", op);
12215 output_asm_insn (".long\t%3", op);
12216 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[6]));
12217 output_asm_insn ("l\t%0,0(%2)", op);
12218 output_asm_insn ("l\t%2,4(%2)", op);
12219 output_asm_insn ("basr\t%0,%0", op);
12220 output_asm_insn ("l\t%0,%1", op);
12221 }
12222 else
12223 {
12224 op[5] = gen_label_rtx ();
12225 op[6] = gen_label_rtx ();
12226
12227 output_asm_insn ("st\t%0,%1", op);
12228 output_asm_insn ("bras\t%2,%l6", op);
12229 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[5]));
12230 output_asm_insn (".long\t%4-%l5", op);
12231 output_asm_insn (".long\t%3-%l5", op);
12232 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[6]));
12233 output_asm_insn ("lr\t%0,%2", op);
12234 output_asm_insn ("a\t%0,0(%2)", op);
12235 output_asm_insn ("a\t%2,4(%2)", op);
12236 output_asm_insn ("basr\t%0,%0", op);
12237 output_asm_insn ("l\t%0,%1", op);
12238 }
12239 }
12240
12241 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
12242 into its SYMBOL_REF_FLAGS. */
12243
12244 static void
12245 s390_encode_section_info (tree decl, rtx rtl, int first)
12246 {
12247 default_encode_section_info (decl, rtl, first);
12248
12249 if (TREE_CODE (decl) == VAR_DECL)
12250 {
12251 /* Store the alignment to be able to check if we can use
12252 a larl/load-relative instruction. We only handle the cases
12253 that can go wrong (i.e. no FUNC_DECLs). If a symref does
12254 not have any flag we assume it to be correctly aligned. */
12255
12256 if (DECL_ALIGN (decl) % 64)
12257 SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl, 0));
12258
12259 if (DECL_ALIGN (decl) % 32)
12260 SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl, 0));
12261
12262 if (DECL_ALIGN (decl) == 0 || DECL_ALIGN (decl) % 16)
12263 SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl, 0));
12264 }
12265
12266 /* Literal pool references don't have a decl so they are handled
12267 differently here. We rely on the information in the MEM_ALIGN
12268 entry to decide upon the alignment. */
12269 if (MEM_P (rtl)
12270 && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF
12271 && TREE_CONSTANT_POOL_ADDRESS_P (XEXP (rtl, 0))
12272 && MEM_ALIGN (rtl) != 0
12273 && GET_MODE_BITSIZE (GET_MODE (rtl)) != 0)
12274 {
12275 if (MEM_ALIGN (rtl) % 64)
12276 SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl, 0));
12277
12278 if (MEM_ALIGN (rtl) % 32)
12279 SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl, 0));
12280
12281 if (MEM_ALIGN (rtl) == 0 || MEM_ALIGN (rtl) % 16)
12282 SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl, 0));
12283 }
12284 }
12285
12286 /* Output thunk to FILE that implements a C++ virtual function call (with
12287 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
12288 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
12289 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
12290 relative to the resulting this pointer. */
12291
12292 static void
12293 s390_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
12294 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
12295 tree function)
12296 {
12297 rtx op[10];
12298 int nonlocal = 0;
12299
12300 /* Make sure unwind info is emitted for the thunk if needed. */
12301 final_start_function (emit_barrier (), file, 1);
12302
12303 /* Operand 0 is the target function. */
12304 op[0] = XEXP (DECL_RTL (function), 0);
12305 if (flag_pic && !SYMBOL_REF_LOCAL_P (op[0]))
12306 {
12307 nonlocal = 1;
12308 op[0] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[0]),
12309 TARGET_64BIT ? UNSPEC_PLT : UNSPEC_GOT);
12310 op[0] = gen_rtx_CONST (Pmode, op[0]);
12311 }
12312
12313 /* Operand 1 is the 'this' pointer. */
12314 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
12315 op[1] = gen_rtx_REG (Pmode, 3);
12316 else
12317 op[1] = gen_rtx_REG (Pmode, 2);
12318
12319 /* Operand 2 is the delta. */
12320 op[2] = GEN_INT (delta);
12321
12322 /* Operand 3 is the vcall_offset. */
12323 op[3] = GEN_INT (vcall_offset);
12324
12325 /* Operand 4 is the temporary register. */
12326 op[4] = gen_rtx_REG (Pmode, 1);
12327
12328 /* Operands 5 to 8 can be used as labels. */
12329 op[5] = NULL_RTX;
12330 op[6] = NULL_RTX;
12331 op[7] = NULL_RTX;
12332 op[8] = NULL_RTX;
12333
12334 /* Operand 9 can be used for temporary register. */
12335 op[9] = NULL_RTX;
12336
12337 /* Generate code. */
12338 if (TARGET_64BIT)
12339 {
12340 /* Setup literal pool pointer if required. */
12341 if ((!DISP_IN_RANGE (delta)
12342 && !CONST_OK_FOR_K (delta)
12343 && !CONST_OK_FOR_Os (delta))
12344 || (!DISP_IN_RANGE (vcall_offset)
12345 && !CONST_OK_FOR_K (vcall_offset)
12346 && !CONST_OK_FOR_Os (vcall_offset)))
12347 {
12348 op[5] = gen_label_rtx ();
12349 output_asm_insn ("larl\t%4,%5", op);
12350 }
12351
12352 /* Add DELTA to this pointer. */
12353 if (delta)
12354 {
12355 if (CONST_OK_FOR_J (delta))
12356 output_asm_insn ("la\t%1,%2(%1)", op);
12357 else if (DISP_IN_RANGE (delta))
12358 output_asm_insn ("lay\t%1,%2(%1)", op);
12359 else if (CONST_OK_FOR_K (delta))
12360 output_asm_insn ("aghi\t%1,%2", op);
12361 else if (CONST_OK_FOR_Os (delta))
12362 output_asm_insn ("agfi\t%1,%2", op);
12363 else
12364 {
12365 op[6] = gen_label_rtx ();
12366 output_asm_insn ("agf\t%1,%6-%5(%4)", op);
12367 }
12368 }
12369
12370 /* Perform vcall adjustment. */
12371 if (vcall_offset)
12372 {
12373 if (DISP_IN_RANGE (vcall_offset))
12374 {
12375 output_asm_insn ("lg\t%4,0(%1)", op);
12376 output_asm_insn ("ag\t%1,%3(%4)", op);
12377 }
12378 else if (CONST_OK_FOR_K (vcall_offset))
12379 {
12380 output_asm_insn ("lghi\t%4,%3", op);
12381 output_asm_insn ("ag\t%4,0(%1)", op);
12382 output_asm_insn ("ag\t%1,0(%4)", op);
12383 }
12384 else if (CONST_OK_FOR_Os (vcall_offset))
12385 {
12386 output_asm_insn ("lgfi\t%4,%3", op);
12387 output_asm_insn ("ag\t%4,0(%1)", op);
12388 output_asm_insn ("ag\t%1,0(%4)", op);
12389 }
12390 else
12391 {
12392 op[7] = gen_label_rtx ();
12393 output_asm_insn ("llgf\t%4,%7-%5(%4)", op);
12394 output_asm_insn ("ag\t%4,0(%1)", op);
12395 output_asm_insn ("ag\t%1,0(%4)", op);
12396 }
12397 }
12398
12399 /* Jump to target. */
12400 output_asm_insn ("jg\t%0", op);
12401
12402 /* Output literal pool if required. */
12403 if (op[5])
12404 {
12405 output_asm_insn (".align\t4", op);
12406 targetm.asm_out.internal_label (file, "L",
12407 CODE_LABEL_NUMBER (op[5]));
12408 }
12409 if (op[6])
12410 {
12411 targetm.asm_out.internal_label (file, "L",
12412 CODE_LABEL_NUMBER (op[6]));
12413 output_asm_insn (".long\t%2", op);
12414 }
12415 if (op[7])
12416 {
12417 targetm.asm_out.internal_label (file, "L",
12418 CODE_LABEL_NUMBER (op[7]));
12419 output_asm_insn (".long\t%3", op);
12420 }
12421 }
12422 else
12423 {
12424 /* Setup base pointer if required. */
12425 if (!vcall_offset
12426 || (!DISP_IN_RANGE (delta)
12427 && !CONST_OK_FOR_K (delta)
12428 && !CONST_OK_FOR_Os (delta))
12429 || (!DISP_IN_RANGE (delta)
12430 && !CONST_OK_FOR_K (vcall_offset)
12431 && !CONST_OK_FOR_Os (vcall_offset)))
12432 {
12433 op[5] = gen_label_rtx ();
12434 output_asm_insn ("basr\t%4,0", op);
12435 targetm.asm_out.internal_label (file, "L",
12436 CODE_LABEL_NUMBER (op[5]));
12437 }
12438
12439 /* Add DELTA to this pointer. */
12440 if (delta)
12441 {
12442 if (CONST_OK_FOR_J (delta))
12443 output_asm_insn ("la\t%1,%2(%1)", op);
12444 else if (DISP_IN_RANGE (delta))
12445 output_asm_insn ("lay\t%1,%2(%1)", op);
12446 else if (CONST_OK_FOR_K (delta))
12447 output_asm_insn ("ahi\t%1,%2", op);
12448 else if (CONST_OK_FOR_Os (delta))
12449 output_asm_insn ("afi\t%1,%2", op);
12450 else
12451 {
12452 op[6] = gen_label_rtx ();
12453 output_asm_insn ("a\t%1,%6-%5(%4)", op);
12454 }
12455 }
12456
12457 /* Perform vcall adjustment. */
12458 if (vcall_offset)
12459 {
12460 if (CONST_OK_FOR_J (vcall_offset))
12461 {
12462 output_asm_insn ("l\t%4,0(%1)", op);
12463 output_asm_insn ("a\t%1,%3(%4)", op);
12464 }
12465 else if (DISP_IN_RANGE (vcall_offset))
12466 {
12467 output_asm_insn ("l\t%4,0(%1)", op);
12468 output_asm_insn ("ay\t%1,%3(%4)", op);
12469 }
12470 else if (CONST_OK_FOR_K (vcall_offset))
12471 {
12472 output_asm_insn ("lhi\t%4,%3", op);
12473 output_asm_insn ("a\t%4,0(%1)", op);
12474 output_asm_insn ("a\t%1,0(%4)", op);
12475 }
12476 else if (CONST_OK_FOR_Os (vcall_offset))
12477 {
12478 output_asm_insn ("iilf\t%4,%3", op);
12479 output_asm_insn ("a\t%4,0(%1)", op);
12480 output_asm_insn ("a\t%1,0(%4)", op);
12481 }
12482 else
12483 {
12484 op[7] = gen_label_rtx ();
12485 output_asm_insn ("l\t%4,%7-%5(%4)", op);
12486 output_asm_insn ("a\t%4,0(%1)", op);
12487 output_asm_insn ("a\t%1,0(%4)", op);
12488 }
12489
12490 /* We had to clobber the base pointer register.
12491 Re-setup the base pointer (with a different base). */
12492 op[5] = gen_label_rtx ();
12493 output_asm_insn ("basr\t%4,0", op);
12494 targetm.asm_out.internal_label (file, "L",
12495 CODE_LABEL_NUMBER (op[5]));
12496 }
12497
12498 /* Jump to target. */
12499 op[8] = gen_label_rtx ();
12500
12501 if (!flag_pic)
12502 output_asm_insn ("l\t%4,%8-%5(%4)", op);
12503 else if (!nonlocal)
12504 output_asm_insn ("a\t%4,%8-%5(%4)", op);
12505 /* We cannot call through .plt, since .plt requires %r12 loaded. */
12506 else if (flag_pic == 1)
12507 {
12508 output_asm_insn ("a\t%4,%8-%5(%4)", op);
12509 output_asm_insn ("l\t%4,%0(%4)", op);
12510 }
12511 else if (flag_pic == 2)
12512 {
12513 op[9] = gen_rtx_REG (Pmode, 0);
12514 output_asm_insn ("l\t%9,%8-4-%5(%4)", op);
12515 output_asm_insn ("a\t%4,%8-%5(%4)", op);
12516 output_asm_insn ("ar\t%4,%9", op);
12517 output_asm_insn ("l\t%4,0(%4)", op);
12518 }
12519
12520 output_asm_insn ("br\t%4", op);
12521
12522 /* Output literal pool. */
12523 output_asm_insn (".align\t4", op);
12524
12525 if (nonlocal && flag_pic == 2)
12526 output_asm_insn (".long\t%0", op);
12527 if (nonlocal)
12528 {
12529 op[0] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
12530 SYMBOL_REF_FLAGS (op[0]) = SYMBOL_FLAG_LOCAL;
12531 }
12532
12533 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[8]));
12534 if (!flag_pic)
12535 output_asm_insn (".long\t%0", op);
12536 else
12537 output_asm_insn (".long\t%0-%5", op);
12538
12539 if (op[6])
12540 {
12541 targetm.asm_out.internal_label (file, "L",
12542 CODE_LABEL_NUMBER (op[6]));
12543 output_asm_insn (".long\t%2", op);
12544 }
12545 if (op[7])
12546 {
12547 targetm.asm_out.internal_label (file, "L",
12548 CODE_LABEL_NUMBER (op[7]));
12549 output_asm_insn (".long\t%3", op);
12550 }
12551 }
12552 final_end_function ();
12553 }
12554
12555 static bool
12556 s390_valid_pointer_mode (machine_mode mode)
12557 {
12558 return (mode == SImode || (TARGET_64BIT && mode == DImode));
12559 }
12560
12561 /* Checks whether the given CALL_EXPR would use a caller
12562 saved register. This is used to decide whether sibling call
12563 optimization could be performed on the respective function
12564 call. */
12565
12566 static bool
12567 s390_call_saved_register_used (tree call_expr)
12568 {
12569 CUMULATIVE_ARGS cum_v;
12570 cumulative_args_t cum;
12571 tree parameter;
12572 machine_mode mode;
12573 tree type;
12574 rtx parm_rtx;
12575 int reg, i;
12576
12577 INIT_CUMULATIVE_ARGS (cum_v, NULL, NULL, 0, 0);
12578 cum = pack_cumulative_args (&cum_v);
12579
12580 for (i = 0; i < call_expr_nargs (call_expr); i++)
12581 {
12582 parameter = CALL_EXPR_ARG (call_expr, i);
12583 gcc_assert (parameter);
12584
12585 /* For an undeclared variable passed as parameter we will get
12586 an ERROR_MARK node here. */
12587 if (TREE_CODE (parameter) == ERROR_MARK)
12588 return true;
12589
12590 type = TREE_TYPE (parameter);
12591 gcc_assert (type);
12592
12593 mode = TYPE_MODE (type);
12594 gcc_assert (mode);
12595
12596 /* We assume that in the target function all parameters are
12597 named. This only has an impact on vector argument register
12598 usage none of which is call-saved. */
12599 if (pass_by_reference (&cum_v, mode, type, true))
12600 {
12601 mode = Pmode;
12602 type = build_pointer_type (type);
12603 }
12604
12605 parm_rtx = s390_function_arg (cum, mode, type, true);
12606
12607 s390_function_arg_advance (cum, mode, type, true);
12608
12609 if (!parm_rtx)
12610 continue;
12611
12612 if (REG_P (parm_rtx))
12613 {
12614 for (reg = 0;
12615 reg < HARD_REGNO_NREGS (REGNO (parm_rtx), GET_MODE (parm_rtx));
12616 reg++)
12617 if (!call_used_regs[reg + REGNO (parm_rtx)])
12618 return true;
12619 }
12620
12621 if (GET_CODE (parm_rtx) == PARALLEL)
12622 {
12623 int i;
12624
12625 for (i = 0; i < XVECLEN (parm_rtx, 0); i++)
12626 {
12627 rtx r = XEXP (XVECEXP (parm_rtx, 0, i), 0);
12628
12629 gcc_assert (REG_P (r));
12630
12631 for (reg = 0;
12632 reg < HARD_REGNO_NREGS (REGNO (r), GET_MODE (r));
12633 reg++)
12634 if (!call_used_regs[reg + REGNO (r)])
12635 return true;
12636 }
12637 }
12638
12639 }
12640 return false;
12641 }
12642
12643 /* Return true if the given call expression can be
12644 turned into a sibling call.
12645 DECL holds the declaration of the function to be called whereas
12646 EXP is the call expression itself. */
12647
12648 static bool
12649 s390_function_ok_for_sibcall (tree decl, tree exp)
12650 {
12651 /* The TPF epilogue uses register 1. */
12652 if (TARGET_TPF_PROFILING)
12653 return false;
12654
12655 /* The 31 bit PLT code uses register 12 (GOT pointer - caller saved)
12656 which would have to be restored before the sibcall. */
12657 if (!TARGET_64BIT && flag_pic && decl && !targetm.binds_local_p (decl))
12658 return false;
12659
12660 /* Register 6 on s390 is available as an argument register but unfortunately
12661 "caller saved". This makes functions needing this register for arguments
12662 not suitable for sibcalls. */
12663 return !s390_call_saved_register_used (exp);
12664 }
12665
12666 /* Return the fixed registers used for condition codes. */
12667
12668 static bool
12669 s390_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
12670 {
12671 *p1 = CC_REGNUM;
12672 *p2 = INVALID_REGNUM;
12673
12674 return true;
12675 }
12676
12677 /* This function is used by the call expanders of the machine description.
12678 It emits the call insn itself together with the necessary operations
12679 to adjust the target address and returns the emitted insn.
12680 ADDR_LOCATION is the target address rtx
12681 TLS_CALL the location of the thread-local symbol
12682 RESULT_REG the register where the result of the call should be stored
12683 RETADDR_REG the register where the return address should be stored
12684 If this parameter is NULL_RTX the call is considered
12685 to be a sibling call. */
12686
12687 rtx_insn *
12688 s390_emit_call (rtx addr_location, rtx tls_call, rtx result_reg,
12689 rtx retaddr_reg)
12690 {
12691 bool plt_call = false;
12692 rtx_insn *insn;
12693 rtx call;
12694 rtx clobber;
12695 rtvec vec;
12696
12697 /* Direct function calls need special treatment. */
12698 if (GET_CODE (addr_location) == SYMBOL_REF)
12699 {
12700 /* When calling a global routine in PIC mode, we must
12701 replace the symbol itself with the PLT stub. */
12702 if (flag_pic && !SYMBOL_REF_LOCAL_P (addr_location))
12703 {
12704 if (TARGET_64BIT || retaddr_reg != NULL_RTX)
12705 {
12706 addr_location = gen_rtx_UNSPEC (Pmode,
12707 gen_rtvec (1, addr_location),
12708 UNSPEC_PLT);
12709 addr_location = gen_rtx_CONST (Pmode, addr_location);
12710 plt_call = true;
12711 }
12712 else
12713 /* For -fpic code the PLT entries might use r12 which is
12714 call-saved. Therefore we cannot do a sibcall when
12715 calling directly using a symbol ref. When reaching
12716 this point we decided (in s390_function_ok_for_sibcall)
12717 to do a sibcall for a function pointer but one of the
12718 optimizers was able to get rid of the function pointer
12719 by propagating the symbol ref into the call. This
12720 optimization is illegal for S/390 so we turn the direct
12721 call into a indirect call again. */
12722 addr_location = force_reg (Pmode, addr_location);
12723 }
12724
12725 /* Unless we can use the bras(l) insn, force the
12726 routine address into a register. */
12727 if (!TARGET_SMALL_EXEC && !TARGET_CPU_ZARCH)
12728 {
12729 if (flag_pic)
12730 addr_location = legitimize_pic_address (addr_location, 0);
12731 else
12732 addr_location = force_reg (Pmode, addr_location);
12733 }
12734 }
12735
12736 /* If it is already an indirect call or the code above moved the
12737 SYMBOL_REF to somewhere else make sure the address can be found in
12738 register 1. */
12739 if (retaddr_reg == NULL_RTX
12740 && GET_CODE (addr_location) != SYMBOL_REF
12741 && !plt_call)
12742 {
12743 emit_move_insn (gen_rtx_REG (Pmode, SIBCALL_REGNUM), addr_location);
12744 addr_location = gen_rtx_REG (Pmode, SIBCALL_REGNUM);
12745 }
12746
12747 addr_location = gen_rtx_MEM (QImode, addr_location);
12748 call = gen_rtx_CALL (VOIDmode, addr_location, const0_rtx);
12749
12750 if (result_reg != NULL_RTX)
12751 call = gen_rtx_SET (result_reg, call);
12752
12753 if (retaddr_reg != NULL_RTX)
12754 {
12755 clobber = gen_rtx_CLOBBER (VOIDmode, retaddr_reg);
12756
12757 if (tls_call != NULL_RTX)
12758 vec = gen_rtvec (3, call, clobber,
12759 gen_rtx_USE (VOIDmode, tls_call));
12760 else
12761 vec = gen_rtvec (2, call, clobber);
12762
12763 call = gen_rtx_PARALLEL (VOIDmode, vec);
12764 }
12765
12766 insn = emit_call_insn (call);
12767
12768 /* 31-bit PLT stubs and tls calls use the GOT register implicitly. */
12769 if ((!TARGET_64BIT && plt_call) || tls_call != NULL_RTX)
12770 {
12771 /* s390_function_ok_for_sibcall should
12772 have denied sibcalls in this case. */
12773 gcc_assert (retaddr_reg != NULL_RTX);
12774 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, 12));
12775 }
12776 return insn;
12777 }
12778
12779 /* Implement TARGET_CONDITIONAL_REGISTER_USAGE. */
12780
12781 static void
12782 s390_conditional_register_usage (void)
12783 {
12784 int i;
12785
12786 if (flag_pic)
12787 {
12788 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12789 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12790 }
12791 if (TARGET_CPU_ZARCH)
12792 {
12793 fixed_regs[BASE_REGNUM] = 0;
12794 call_used_regs[BASE_REGNUM] = 0;
12795 fixed_regs[RETURN_REGNUM] = 0;
12796 call_used_regs[RETURN_REGNUM] = 0;
12797 }
12798 if (TARGET_64BIT)
12799 {
12800 for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
12801 call_used_regs[i] = call_really_used_regs[i] = 0;
12802 }
12803 else
12804 {
12805 call_used_regs[FPR4_REGNUM] = call_really_used_regs[FPR4_REGNUM] = 0;
12806 call_used_regs[FPR6_REGNUM] = call_really_used_regs[FPR6_REGNUM] = 0;
12807 }
12808
12809 if (TARGET_SOFT_FLOAT)
12810 {
12811 for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
12812 call_used_regs[i] = fixed_regs[i] = 1;
12813 }
12814
12815 /* Disable v16 - v31 for non-vector target. */
12816 if (!TARGET_VX)
12817 {
12818 for (i = VR16_REGNUM; i <= VR31_REGNUM; i++)
12819 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
12820 }
12821 }
12822
12823 /* Corresponding function to eh_return expander. */
12824
12825 static GTY(()) rtx s390_tpf_eh_return_symbol;
12826 void
12827 s390_emit_tpf_eh_return (rtx target)
12828 {
12829 rtx_insn *insn;
12830 rtx reg, orig_ra;
12831
12832 if (!s390_tpf_eh_return_symbol)
12833 s390_tpf_eh_return_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tpf_eh_return");
12834
12835 reg = gen_rtx_REG (Pmode, 2);
12836 orig_ra = gen_rtx_REG (Pmode, 3);
12837
12838 emit_move_insn (reg, target);
12839 emit_move_insn (orig_ra, get_hard_reg_initial_val (Pmode, RETURN_REGNUM));
12840 insn = s390_emit_call (s390_tpf_eh_return_symbol, NULL_RTX, reg,
12841 gen_rtx_REG (Pmode, RETURN_REGNUM));
12842 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), reg);
12843 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), orig_ra);
12844
12845 emit_move_insn (EH_RETURN_HANDLER_RTX, reg);
12846 }
12847
12848 /* Rework the prologue/epilogue to avoid saving/restoring
12849 registers unnecessarily. */
12850
12851 static void
12852 s390_optimize_prologue (void)
12853 {
12854 rtx_insn *insn, *new_insn, *next_insn;
12855
12856 /* Do a final recompute of the frame-related data. */
12857 s390_optimize_register_info ();
12858
12859 /* If all special registers are in fact used, there's nothing we
12860 can do, so no point in walking the insn list. */
12861
12862 if (cfun_frame_layout.first_save_gpr <= BASE_REGNUM
12863 && cfun_frame_layout.last_save_gpr >= BASE_REGNUM
12864 && (TARGET_CPU_ZARCH
12865 || (cfun_frame_layout.first_save_gpr <= RETURN_REGNUM
12866 && cfun_frame_layout.last_save_gpr >= RETURN_REGNUM)))
12867 return;
12868
12869 /* Search for prologue/epilogue insns and replace them. */
12870
12871 for (insn = get_insns (); insn; insn = next_insn)
12872 {
12873 int first, last, off;
12874 rtx set, base, offset;
12875 rtx pat;
12876
12877 next_insn = NEXT_INSN (insn);
12878
12879 if (! NONJUMP_INSN_P (insn) || ! RTX_FRAME_RELATED_P (insn))
12880 continue;
12881
12882 pat = PATTERN (insn);
12883
12884 /* Remove ldgr/lgdr instructions used for saving and restore
12885 GPRs if possible. */
12886 if (TARGET_Z10
12887 && GET_CODE (pat) == SET
12888 && GET_MODE (SET_SRC (pat)) == DImode
12889 && REG_P (SET_SRC (pat))
12890 && REG_P (SET_DEST (pat)))
12891 {
12892 int src_regno = REGNO (SET_SRC (pat));
12893 int dest_regno = REGNO (SET_DEST (pat));
12894 int gpr_regno;
12895 int fpr_regno;
12896
12897 if (!((GENERAL_REGNO_P (src_regno) && FP_REGNO_P (dest_regno))
12898 || (FP_REGNO_P (src_regno) && GENERAL_REGNO_P (dest_regno))))
12899 continue;
12900
12901 gpr_regno = GENERAL_REGNO_P (src_regno) ? src_regno : dest_regno;
12902 fpr_regno = FP_REGNO_P (src_regno) ? src_regno : dest_regno;
12903
12904 /* GPR must be call-saved, FPR must be call-clobbered. */
12905 if (!call_really_used_regs[fpr_regno]
12906 || call_really_used_regs[gpr_regno])
12907 continue;
12908
12909 /* It must not happen that what we once saved in an FPR now
12910 needs a stack slot. */
12911 gcc_assert (cfun_gpr_save_slot (gpr_regno) != SAVE_SLOT_STACK);
12912
12913 if (cfun_gpr_save_slot (gpr_regno) == SAVE_SLOT_NONE)
12914 {
12915 remove_insn (insn);
12916 continue;
12917 }
12918 }
12919
12920 if (GET_CODE (pat) == PARALLEL
12921 && store_multiple_operation (pat, VOIDmode))
12922 {
12923 set = XVECEXP (pat, 0, 0);
12924 first = REGNO (SET_SRC (set));
12925 last = first + XVECLEN (pat, 0) - 1;
12926 offset = const0_rtx;
12927 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
12928 off = INTVAL (offset);
12929
12930 if (GET_CODE (base) != REG || off < 0)
12931 continue;
12932 if (cfun_frame_layout.first_save_gpr != -1
12933 && (cfun_frame_layout.first_save_gpr < first
12934 || cfun_frame_layout.last_save_gpr > last))
12935 continue;
12936 if (REGNO (base) != STACK_POINTER_REGNUM
12937 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
12938 continue;
12939 if (first > BASE_REGNUM || last < BASE_REGNUM)
12940 continue;
12941
12942 if (cfun_frame_layout.first_save_gpr != -1)
12943 {
12944 rtx s_pat = save_gprs (base,
12945 off + (cfun_frame_layout.first_save_gpr
12946 - first) * UNITS_PER_LONG,
12947 cfun_frame_layout.first_save_gpr,
12948 cfun_frame_layout.last_save_gpr);
12949 new_insn = emit_insn_before (s_pat, insn);
12950 INSN_ADDRESSES_NEW (new_insn, -1);
12951 }
12952
12953 remove_insn (insn);
12954 continue;
12955 }
12956
12957 if (cfun_frame_layout.first_save_gpr == -1
12958 && GET_CODE (pat) == SET
12959 && GENERAL_REG_P (SET_SRC (pat))
12960 && GET_CODE (SET_DEST (pat)) == MEM)
12961 {
12962 set = pat;
12963 first = REGNO (SET_SRC (set));
12964 offset = const0_rtx;
12965 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
12966 off = INTVAL (offset);
12967
12968 if (GET_CODE (base) != REG || off < 0)
12969 continue;
12970 if (REGNO (base) != STACK_POINTER_REGNUM
12971 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
12972 continue;
12973
12974 remove_insn (insn);
12975 continue;
12976 }
12977
12978 if (GET_CODE (pat) == PARALLEL
12979 && load_multiple_operation (pat, VOIDmode))
12980 {
12981 set = XVECEXP (pat, 0, 0);
12982 first = REGNO (SET_DEST (set));
12983 last = first + XVECLEN (pat, 0) - 1;
12984 offset = const0_rtx;
12985 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
12986 off = INTVAL (offset);
12987
12988 if (GET_CODE (base) != REG || off < 0)
12989 continue;
12990
12991 if (cfun_frame_layout.first_restore_gpr != -1
12992 && (cfun_frame_layout.first_restore_gpr < first
12993 || cfun_frame_layout.last_restore_gpr > last))
12994 continue;
12995 if (REGNO (base) != STACK_POINTER_REGNUM
12996 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
12997 continue;
12998 if (first > BASE_REGNUM || last < BASE_REGNUM)
12999 continue;
13000
13001 if (cfun_frame_layout.first_restore_gpr != -1)
13002 {
13003 rtx rpat = restore_gprs (base,
13004 off + (cfun_frame_layout.first_restore_gpr
13005 - first) * UNITS_PER_LONG,
13006 cfun_frame_layout.first_restore_gpr,
13007 cfun_frame_layout.last_restore_gpr);
13008
13009 /* Remove REG_CFA_RESTOREs for registers that we no
13010 longer need to save. */
13011 REG_NOTES (rpat) = REG_NOTES (insn);
13012 for (rtx *ptr = &REG_NOTES (rpat); *ptr; )
13013 if (REG_NOTE_KIND (*ptr) == REG_CFA_RESTORE
13014 && ((int) REGNO (XEXP (*ptr, 0))
13015 < cfun_frame_layout.first_restore_gpr))
13016 *ptr = XEXP (*ptr, 1);
13017 else
13018 ptr = &XEXP (*ptr, 1);
13019 new_insn = emit_insn_before (rpat, insn);
13020 RTX_FRAME_RELATED_P (new_insn) = 1;
13021 INSN_ADDRESSES_NEW (new_insn, -1);
13022 }
13023
13024 remove_insn (insn);
13025 continue;
13026 }
13027
13028 if (cfun_frame_layout.first_restore_gpr == -1
13029 && GET_CODE (pat) == SET
13030 && GENERAL_REG_P (SET_DEST (pat))
13031 && GET_CODE (SET_SRC (pat)) == MEM)
13032 {
13033 set = pat;
13034 first = REGNO (SET_DEST (set));
13035 offset = const0_rtx;
13036 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
13037 off = INTVAL (offset);
13038
13039 if (GET_CODE (base) != REG || off < 0)
13040 continue;
13041
13042 if (REGNO (base) != STACK_POINTER_REGNUM
13043 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13044 continue;
13045
13046 remove_insn (insn);
13047 continue;
13048 }
13049 }
13050 }
13051
13052 /* On z10 and later the dynamic branch prediction must see the
13053 backward jump within a certain windows. If not it falls back to
13054 the static prediction. This function rearranges the loop backward
13055 branch in a way which makes the static prediction always correct.
13056 The function returns true if it added an instruction. */
13057 static bool
13058 s390_fix_long_loop_prediction (rtx_insn *insn)
13059 {
13060 rtx set = single_set (insn);
13061 rtx code_label, label_ref, new_label;
13062 rtx_insn *uncond_jump;
13063 rtx_insn *cur_insn;
13064 rtx tmp;
13065 int distance;
13066
13067 /* This will exclude branch on count and branch on index patterns
13068 since these are correctly statically predicted. */
13069 if (!set
13070 || SET_DEST (set) != pc_rtx
13071 || GET_CODE (SET_SRC(set)) != IF_THEN_ELSE)
13072 return false;
13073
13074 /* Skip conditional returns. */
13075 if (ANY_RETURN_P (XEXP (SET_SRC (set), 1))
13076 && XEXP (SET_SRC (set), 2) == pc_rtx)
13077 return false;
13078
13079 label_ref = (GET_CODE (XEXP (SET_SRC (set), 1)) == LABEL_REF ?
13080 XEXP (SET_SRC (set), 1) : XEXP (SET_SRC (set), 2));
13081
13082 gcc_assert (GET_CODE (label_ref) == LABEL_REF);
13083
13084 code_label = XEXP (label_ref, 0);
13085
13086 if (INSN_ADDRESSES (INSN_UID (code_label)) == -1
13087 || INSN_ADDRESSES (INSN_UID (insn)) == -1
13088 || (INSN_ADDRESSES (INSN_UID (insn))
13089 - INSN_ADDRESSES (INSN_UID (code_label)) < PREDICT_DISTANCE))
13090 return false;
13091
13092 for (distance = 0, cur_insn = PREV_INSN (insn);
13093 distance < PREDICT_DISTANCE - 6;
13094 distance += get_attr_length (cur_insn), cur_insn = PREV_INSN (cur_insn))
13095 if (!cur_insn || JUMP_P (cur_insn) || LABEL_P (cur_insn))
13096 return false;
13097
13098 new_label = gen_label_rtx ();
13099 uncond_jump = emit_jump_insn_after (
13100 gen_rtx_SET (pc_rtx,
13101 gen_rtx_LABEL_REF (VOIDmode, code_label)),
13102 insn);
13103 emit_label_after (new_label, uncond_jump);
13104
13105 tmp = XEXP (SET_SRC (set), 1);
13106 XEXP (SET_SRC (set), 1) = XEXP (SET_SRC (set), 2);
13107 XEXP (SET_SRC (set), 2) = tmp;
13108 INSN_CODE (insn) = -1;
13109
13110 XEXP (label_ref, 0) = new_label;
13111 JUMP_LABEL (insn) = new_label;
13112 JUMP_LABEL (uncond_jump) = code_label;
13113
13114 return true;
13115 }
13116
13117 /* Returns 1 if INSN reads the value of REG for purposes not related
13118 to addressing of memory, and 0 otherwise. */
13119 static int
13120 s390_non_addr_reg_read_p (rtx reg, rtx_insn *insn)
13121 {
13122 return reg_referenced_p (reg, PATTERN (insn))
13123 && !reg_used_in_mem_p (REGNO (reg), PATTERN (insn));
13124 }
13125
13126 /* Starting from INSN find_cond_jump looks downwards in the insn
13127 stream for a single jump insn which is the last user of the
13128 condition code set in INSN. */
13129 static rtx_insn *
13130 find_cond_jump (rtx_insn *insn)
13131 {
13132 for (; insn; insn = NEXT_INSN (insn))
13133 {
13134 rtx ite, cc;
13135
13136 if (LABEL_P (insn))
13137 break;
13138
13139 if (!JUMP_P (insn))
13140 {
13141 if (reg_mentioned_p (gen_rtx_REG (CCmode, CC_REGNUM), insn))
13142 break;
13143 continue;
13144 }
13145
13146 /* This will be triggered by a return. */
13147 if (GET_CODE (PATTERN (insn)) != SET)
13148 break;
13149
13150 gcc_assert (SET_DEST (PATTERN (insn)) == pc_rtx);
13151 ite = SET_SRC (PATTERN (insn));
13152
13153 if (GET_CODE (ite) != IF_THEN_ELSE)
13154 break;
13155
13156 cc = XEXP (XEXP (ite, 0), 0);
13157 if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc)))
13158 break;
13159
13160 if (find_reg_note (insn, REG_DEAD, cc))
13161 return insn;
13162 break;
13163 }
13164
13165 return NULL;
13166 }
13167
13168 /* Swap the condition in COND and the operands in OP0 and OP1 so that
13169 the semantics does not change. If NULL_RTX is passed as COND the
13170 function tries to find the conditional jump starting with INSN. */
13171 static void
13172 s390_swap_cmp (rtx cond, rtx *op0, rtx *op1, rtx_insn *insn)
13173 {
13174 rtx tmp = *op0;
13175
13176 if (cond == NULL_RTX)
13177 {
13178 rtx_insn *jump = find_cond_jump (NEXT_INSN (insn));
13179 rtx set = jump ? single_set (jump) : NULL_RTX;
13180
13181 if (set == NULL_RTX)
13182 return;
13183
13184 cond = XEXP (SET_SRC (set), 0);
13185 }
13186
13187 *op0 = *op1;
13188 *op1 = tmp;
13189 PUT_CODE (cond, swap_condition (GET_CODE (cond)));
13190 }
13191
13192 /* On z10, instructions of the compare-and-branch family have the
13193 property to access the register occurring as second operand with
13194 its bits complemented. If such a compare is grouped with a second
13195 instruction that accesses the same register non-complemented, and
13196 if that register's value is delivered via a bypass, then the
13197 pipeline recycles, thereby causing significant performance decline.
13198 This function locates such situations and exchanges the two
13199 operands of the compare. The function return true whenever it
13200 added an insn. */
13201 static bool
13202 s390_z10_optimize_cmp (rtx_insn *insn)
13203 {
13204 rtx_insn *prev_insn, *next_insn;
13205 bool insn_added_p = false;
13206 rtx cond, *op0, *op1;
13207
13208 if (GET_CODE (PATTERN (insn)) == PARALLEL)
13209 {
13210 /* Handle compare and branch and branch on count
13211 instructions. */
13212 rtx pattern = single_set (insn);
13213
13214 if (!pattern
13215 || SET_DEST (pattern) != pc_rtx
13216 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE)
13217 return false;
13218
13219 cond = XEXP (SET_SRC (pattern), 0);
13220 op0 = &XEXP (cond, 0);
13221 op1 = &XEXP (cond, 1);
13222 }
13223 else if (GET_CODE (PATTERN (insn)) == SET)
13224 {
13225 rtx src, dest;
13226
13227 /* Handle normal compare instructions. */
13228 src = SET_SRC (PATTERN (insn));
13229 dest = SET_DEST (PATTERN (insn));
13230
13231 if (!REG_P (dest)
13232 || !CC_REGNO_P (REGNO (dest))
13233 || GET_CODE (src) != COMPARE)
13234 return false;
13235
13236 /* s390_swap_cmp will try to find the conditional
13237 jump when passing NULL_RTX as condition. */
13238 cond = NULL_RTX;
13239 op0 = &XEXP (src, 0);
13240 op1 = &XEXP (src, 1);
13241 }
13242 else
13243 return false;
13244
13245 if (!REG_P (*op0) || !REG_P (*op1))
13246 return false;
13247
13248 if (GET_MODE_CLASS (GET_MODE (*op0)) != MODE_INT)
13249 return false;
13250
13251 /* Swap the COMPARE arguments and its mask if there is a
13252 conflicting access in the previous insn. */
13253 prev_insn = prev_active_insn (insn);
13254 if (prev_insn != NULL_RTX && INSN_P (prev_insn)
13255 && reg_referenced_p (*op1, PATTERN (prev_insn)))
13256 s390_swap_cmp (cond, op0, op1, insn);
13257
13258 /* Check if there is a conflict with the next insn. If there
13259 was no conflict with the previous insn, then swap the
13260 COMPARE arguments and its mask. If we already swapped
13261 the operands, or if swapping them would cause a conflict
13262 with the previous insn, issue a NOP after the COMPARE in
13263 order to separate the two instuctions. */
13264 next_insn = next_active_insn (insn);
13265 if (next_insn != NULL_RTX && INSN_P (next_insn)
13266 && s390_non_addr_reg_read_p (*op1, next_insn))
13267 {
13268 if (prev_insn != NULL_RTX && INSN_P (prev_insn)
13269 && s390_non_addr_reg_read_p (*op0, prev_insn))
13270 {
13271 if (REGNO (*op1) == 0)
13272 emit_insn_after (gen_nop1 (), insn);
13273 else
13274 emit_insn_after (gen_nop (), insn);
13275 insn_added_p = true;
13276 }
13277 else
13278 s390_swap_cmp (cond, op0, op1, insn);
13279 }
13280 return insn_added_p;
13281 }
13282
13283 /* Perform machine-dependent processing. */
13284
13285 static void
13286 s390_reorg (void)
13287 {
13288 bool pool_overflow = false;
13289 int hw_before, hw_after;
13290
13291 /* Make sure all splits have been performed; splits after
13292 machine_dependent_reorg might confuse insn length counts. */
13293 split_all_insns_noflow ();
13294
13295 /* Install the main literal pool and the associated base
13296 register load insns.
13297
13298 In addition, there are two problematic situations we need
13299 to correct:
13300
13301 - the literal pool might be > 4096 bytes in size, so that
13302 some of its elements cannot be directly accessed
13303
13304 - a branch target might be > 64K away from the branch, so that
13305 it is not possible to use a PC-relative instruction.
13306
13307 To fix those, we split the single literal pool into multiple
13308 pool chunks, reloading the pool base register at various
13309 points throughout the function to ensure it always points to
13310 the pool chunk the following code expects, and / or replace
13311 PC-relative branches by absolute branches.
13312
13313 However, the two problems are interdependent: splitting the
13314 literal pool can move a branch further away from its target,
13315 causing the 64K limit to overflow, and on the other hand,
13316 replacing a PC-relative branch by an absolute branch means
13317 we need to put the branch target address into the literal
13318 pool, possibly causing it to overflow.
13319
13320 So, we loop trying to fix up both problems until we manage
13321 to satisfy both conditions at the same time. Note that the
13322 loop is guaranteed to terminate as every pass of the loop
13323 strictly decreases the total number of PC-relative branches
13324 in the function. (This is not completely true as there
13325 might be branch-over-pool insns introduced by chunkify_start.
13326 Those never need to be split however.) */
13327
13328 for (;;)
13329 {
13330 struct constant_pool *pool = NULL;
13331
13332 /* Collect the literal pool. */
13333 if (!pool_overflow)
13334 {
13335 pool = s390_mainpool_start ();
13336 if (!pool)
13337 pool_overflow = true;
13338 }
13339
13340 /* If literal pool overflowed, start to chunkify it. */
13341 if (pool_overflow)
13342 pool = s390_chunkify_start ();
13343
13344 /* Split out-of-range branches. If this has created new
13345 literal pool entries, cancel current chunk list and
13346 recompute it. zSeries machines have large branch
13347 instructions, so we never need to split a branch. */
13348 if (!TARGET_CPU_ZARCH && s390_split_branches ())
13349 {
13350 if (pool_overflow)
13351 s390_chunkify_cancel (pool);
13352 else
13353 s390_mainpool_cancel (pool);
13354
13355 continue;
13356 }
13357
13358 /* If we made it up to here, both conditions are satisfied.
13359 Finish up literal pool related changes. */
13360 if (pool_overflow)
13361 s390_chunkify_finish (pool);
13362 else
13363 s390_mainpool_finish (pool);
13364
13365 /* We're done splitting branches. */
13366 cfun->machine->split_branches_pending_p = false;
13367 break;
13368 }
13369
13370 /* Generate out-of-pool execute target insns. */
13371 if (TARGET_CPU_ZARCH)
13372 {
13373 rtx_insn *insn, *target;
13374 rtx label;
13375
13376 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
13377 {
13378 label = s390_execute_label (insn);
13379 if (!label)
13380 continue;
13381
13382 gcc_assert (label != const0_rtx);
13383
13384 target = emit_label (XEXP (label, 0));
13385 INSN_ADDRESSES_NEW (target, -1);
13386
13387 target = emit_insn (s390_execute_target (insn));
13388 INSN_ADDRESSES_NEW (target, -1);
13389 }
13390 }
13391
13392 /* Try to optimize prologue and epilogue further. */
13393 s390_optimize_prologue ();
13394
13395 /* Walk over the insns and do some >=z10 specific changes. */
13396 if (s390_tune >= PROCESSOR_2097_Z10)
13397 {
13398 rtx_insn *insn;
13399 bool insn_added_p = false;
13400
13401 /* The insn lengths and addresses have to be up to date for the
13402 following manipulations. */
13403 shorten_branches (get_insns ());
13404
13405 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
13406 {
13407 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
13408 continue;
13409
13410 if (JUMP_P (insn))
13411 insn_added_p |= s390_fix_long_loop_prediction (insn);
13412
13413 if ((GET_CODE (PATTERN (insn)) == PARALLEL
13414 || GET_CODE (PATTERN (insn)) == SET)
13415 && s390_tune == PROCESSOR_2097_Z10)
13416 insn_added_p |= s390_z10_optimize_cmp (insn);
13417 }
13418
13419 /* Adjust branches if we added new instructions. */
13420 if (insn_added_p)
13421 shorten_branches (get_insns ());
13422 }
13423
13424 s390_function_num_hotpatch_hw (current_function_decl, &hw_before, &hw_after);
13425 if (hw_after > 0)
13426 {
13427 rtx_insn *insn;
13428
13429 /* Insert NOPs for hotpatching. */
13430 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
13431 /* Emit NOPs
13432 1. inside the area covered by debug information to allow setting
13433 breakpoints at the NOPs,
13434 2. before any insn which results in an asm instruction,
13435 3. before in-function labels to avoid jumping to the NOPs, for
13436 example as part of a loop,
13437 4. before any barrier in case the function is completely empty
13438 (__builtin_unreachable ()) and has neither internal labels nor
13439 active insns.
13440 */
13441 if (active_insn_p (insn) || BARRIER_P (insn) || LABEL_P (insn))
13442 break;
13443 /* Output a series of NOPs before the first active insn. */
13444 while (insn && hw_after > 0)
13445 {
13446 if (hw_after >= 3 && TARGET_CPU_ZARCH)
13447 {
13448 emit_insn_before (gen_nop_6_byte (), insn);
13449 hw_after -= 3;
13450 }
13451 else if (hw_after >= 2)
13452 {
13453 emit_insn_before (gen_nop_4_byte (), insn);
13454 hw_after -= 2;
13455 }
13456 else
13457 {
13458 emit_insn_before (gen_nop_2_byte (), insn);
13459 hw_after -= 1;
13460 }
13461 }
13462 }
13463 }
13464
13465 /* Return true if INSN is a fp load insn writing register REGNO. */
13466 static inline bool
13467 s390_fpload_toreg (rtx_insn *insn, unsigned int regno)
13468 {
13469 rtx set;
13470 enum attr_type flag = s390_safe_attr_type (insn);
13471
13472 if (flag != TYPE_FLOADSF && flag != TYPE_FLOADDF)
13473 return false;
13474
13475 set = single_set (insn);
13476
13477 if (set == NULL_RTX)
13478 return false;
13479
13480 if (!REG_P (SET_DEST (set)) || !MEM_P (SET_SRC (set)))
13481 return false;
13482
13483 if (REGNO (SET_DEST (set)) != regno)
13484 return false;
13485
13486 return true;
13487 }
13488
13489 /* This value describes the distance to be avoided between an
13490 aritmetic fp instruction and an fp load writing the same register.
13491 Z10_EARLYLOAD_DISTANCE - 1 as well as Z10_EARLYLOAD_DISTANCE + 1 is
13492 fine but the exact value has to be avoided. Otherwise the FP
13493 pipeline will throw an exception causing a major penalty. */
13494 #define Z10_EARLYLOAD_DISTANCE 7
13495
13496 /* Rearrange the ready list in order to avoid the situation described
13497 for Z10_EARLYLOAD_DISTANCE. A problematic load instruction is
13498 moved to the very end of the ready list. */
13499 static void
13500 s390_z10_prevent_earlyload_conflicts (rtx_insn **ready, int *nready_p)
13501 {
13502 unsigned int regno;
13503 int nready = *nready_p;
13504 rtx_insn *tmp;
13505 int i;
13506 rtx_insn *insn;
13507 rtx set;
13508 enum attr_type flag;
13509 int distance;
13510
13511 /* Skip DISTANCE - 1 active insns. */
13512 for (insn = last_scheduled_insn, distance = Z10_EARLYLOAD_DISTANCE - 1;
13513 distance > 0 && insn != NULL_RTX;
13514 distance--, insn = prev_active_insn (insn))
13515 if (CALL_P (insn) || JUMP_P (insn))
13516 return;
13517
13518 if (insn == NULL_RTX)
13519 return;
13520
13521 set = single_set (insn);
13522
13523 if (set == NULL_RTX || !REG_P (SET_DEST (set))
13524 || GET_MODE_CLASS (GET_MODE (SET_DEST (set))) != MODE_FLOAT)
13525 return;
13526
13527 flag = s390_safe_attr_type (insn);
13528
13529 if (flag == TYPE_FLOADSF || flag == TYPE_FLOADDF)
13530 return;
13531
13532 regno = REGNO (SET_DEST (set));
13533 i = nready - 1;
13534
13535 while (!s390_fpload_toreg (ready[i], regno) && i > 0)
13536 i--;
13537
13538 if (!i)
13539 return;
13540
13541 tmp = ready[i];
13542 memmove (&ready[1], &ready[0], sizeof (rtx_insn *) * i);
13543 ready[0] = tmp;
13544 }
13545
13546
13547 /* The s390_sched_state variable tracks the state of the current or
13548 the last instruction group.
13549
13550 0,1,2 number of instructions scheduled in the current group
13551 3 the last group is complete - normal insns
13552 4 the last group was a cracked/expanded insn */
13553
13554 static int s390_sched_state;
13555
13556 #define S390_OOO_SCHED_STATE_NORMAL 3
13557 #define S390_OOO_SCHED_STATE_CRACKED 4
13558
13559 #define S390_OOO_SCHED_ATTR_MASK_CRACKED 0x1
13560 #define S390_OOO_SCHED_ATTR_MASK_EXPANDED 0x2
13561 #define S390_OOO_SCHED_ATTR_MASK_ENDGROUP 0x4
13562 #define S390_OOO_SCHED_ATTR_MASK_GROUPALONE 0x8
13563
13564 static unsigned int
13565 s390_get_sched_attrmask (rtx_insn *insn)
13566 {
13567 unsigned int mask = 0;
13568
13569 if (get_attr_ooo_cracked (insn))
13570 mask |= S390_OOO_SCHED_ATTR_MASK_CRACKED;
13571 if (get_attr_ooo_expanded (insn))
13572 mask |= S390_OOO_SCHED_ATTR_MASK_EXPANDED;
13573 if (get_attr_ooo_endgroup (insn))
13574 mask |= S390_OOO_SCHED_ATTR_MASK_ENDGROUP;
13575 if (get_attr_ooo_groupalone (insn))
13576 mask |= S390_OOO_SCHED_ATTR_MASK_GROUPALONE;
13577 return mask;
13578 }
13579
13580 /* Return the scheduling score for INSN. The higher the score the
13581 better. The score is calculated from the OOO scheduling attributes
13582 of INSN and the scheduling state s390_sched_state. */
13583 static int
13584 s390_sched_score (rtx_insn *insn)
13585 {
13586 unsigned int mask = s390_get_sched_attrmask (insn);
13587 int score = 0;
13588
13589 switch (s390_sched_state)
13590 {
13591 case 0:
13592 /* Try to put insns into the first slot which would otherwise
13593 break a group. */
13594 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) != 0
13595 || (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) != 0)
13596 score += 5;
13597 if ((mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) != 0)
13598 score += 10;
13599 case 1:
13600 /* Prefer not cracked insns while trying to put together a
13601 group. */
13602 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) == 0
13603 && (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) == 0
13604 && (mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) == 0)
13605 score += 10;
13606 if ((mask & S390_OOO_SCHED_ATTR_MASK_ENDGROUP) == 0)
13607 score += 5;
13608 break;
13609 case 2:
13610 /* Prefer not cracked insns while trying to put together a
13611 group. */
13612 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) == 0
13613 && (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) == 0
13614 && (mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) == 0)
13615 score += 10;
13616 /* Prefer endgroup insns in the last slot. */
13617 if ((mask & S390_OOO_SCHED_ATTR_MASK_ENDGROUP) != 0)
13618 score += 10;
13619 break;
13620 case S390_OOO_SCHED_STATE_NORMAL:
13621 /* Prefer not cracked insns if the last was not cracked. */
13622 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) == 0
13623 && (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) == 0)
13624 score += 5;
13625 if ((mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) != 0)
13626 score += 10;
13627 break;
13628 case S390_OOO_SCHED_STATE_CRACKED:
13629 /* Try to keep cracked insns together to prevent them from
13630 interrupting groups. */
13631 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) != 0
13632 || (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) != 0)
13633 score += 5;
13634 break;
13635 }
13636 return score;
13637 }
13638
13639 /* This function is called via hook TARGET_SCHED_REORDER before
13640 issuing one insn from list READY which contains *NREADYP entries.
13641 For target z10 it reorders load instructions to avoid early load
13642 conflicts in the floating point pipeline */
13643 static int
13644 s390_sched_reorder (FILE *file, int verbose,
13645 rtx_insn **ready, int *nreadyp, int clock ATTRIBUTE_UNUSED)
13646 {
13647 if (s390_tune == PROCESSOR_2097_Z10
13648 && reload_completed
13649 && *nreadyp > 1)
13650 s390_z10_prevent_earlyload_conflicts (ready, nreadyp);
13651
13652 if (s390_tune >= PROCESSOR_2827_ZEC12
13653 && reload_completed
13654 && *nreadyp > 1)
13655 {
13656 int i;
13657 int last_index = *nreadyp - 1;
13658 int max_index = -1;
13659 int max_score = -1;
13660 rtx_insn *tmp;
13661
13662 /* Just move the insn with the highest score to the top (the
13663 end) of the list. A full sort is not needed since a conflict
13664 in the hazard recognition cannot happen. So the top insn in
13665 the ready list will always be taken. */
13666 for (i = last_index; i >= 0; i--)
13667 {
13668 int score;
13669
13670 if (recog_memoized (ready[i]) < 0)
13671 continue;
13672
13673 score = s390_sched_score (ready[i]);
13674 if (score > max_score)
13675 {
13676 max_score = score;
13677 max_index = i;
13678 }
13679 }
13680
13681 if (max_index != -1)
13682 {
13683 if (max_index != last_index)
13684 {
13685 tmp = ready[max_index];
13686 ready[max_index] = ready[last_index];
13687 ready[last_index] = tmp;
13688
13689 if (verbose > 5)
13690 fprintf (file,
13691 "move insn %d to the top of list\n",
13692 INSN_UID (ready[last_index]));
13693 }
13694 else if (verbose > 5)
13695 fprintf (file,
13696 "best insn %d already on top\n",
13697 INSN_UID (ready[last_index]));
13698 }
13699
13700 if (verbose > 5)
13701 {
13702 fprintf (file, "ready list ooo attributes - sched state: %d\n",
13703 s390_sched_state);
13704
13705 for (i = last_index; i >= 0; i--)
13706 {
13707 if (recog_memoized (ready[i]) < 0)
13708 continue;
13709 fprintf (file, "insn %d score: %d: ", INSN_UID (ready[i]),
13710 s390_sched_score (ready[i]));
13711 #define PRINT_OOO_ATTR(ATTR) fprintf (file, "%s ", get_attr_##ATTR (ready[i]) ? #ATTR : "!" #ATTR);
13712 PRINT_OOO_ATTR (ooo_cracked);
13713 PRINT_OOO_ATTR (ooo_expanded);
13714 PRINT_OOO_ATTR (ooo_endgroup);
13715 PRINT_OOO_ATTR (ooo_groupalone);
13716 #undef PRINT_OOO_ATTR
13717 fprintf (file, "\n");
13718 }
13719 }
13720 }
13721
13722 return s390_issue_rate ();
13723 }
13724
13725
13726 /* This function is called via hook TARGET_SCHED_VARIABLE_ISSUE after
13727 the scheduler has issued INSN. It stores the last issued insn into
13728 last_scheduled_insn in order to make it available for
13729 s390_sched_reorder. */
13730 static int
13731 s390_sched_variable_issue (FILE *file, int verbose, rtx_insn *insn, int more)
13732 {
13733 last_scheduled_insn = insn;
13734
13735 if (s390_tune >= PROCESSOR_2827_ZEC12
13736 && reload_completed
13737 && recog_memoized (insn) >= 0)
13738 {
13739 unsigned int mask = s390_get_sched_attrmask (insn);
13740
13741 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) != 0
13742 || (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) != 0)
13743 s390_sched_state = S390_OOO_SCHED_STATE_CRACKED;
13744 else if ((mask & S390_OOO_SCHED_ATTR_MASK_ENDGROUP) != 0
13745 || (mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) != 0)
13746 s390_sched_state = S390_OOO_SCHED_STATE_NORMAL;
13747 else
13748 {
13749 /* Only normal insns are left (mask == 0). */
13750 switch (s390_sched_state)
13751 {
13752 case 0:
13753 case 1:
13754 case 2:
13755 case S390_OOO_SCHED_STATE_NORMAL:
13756 if (s390_sched_state == S390_OOO_SCHED_STATE_NORMAL)
13757 s390_sched_state = 1;
13758 else
13759 s390_sched_state++;
13760
13761 break;
13762 case S390_OOO_SCHED_STATE_CRACKED:
13763 s390_sched_state = S390_OOO_SCHED_STATE_NORMAL;
13764 break;
13765 }
13766 }
13767 if (verbose > 5)
13768 {
13769 fprintf (file, "insn %d: ", INSN_UID (insn));
13770 #define PRINT_OOO_ATTR(ATTR) \
13771 fprintf (file, "%s ", get_attr_##ATTR (insn) ? #ATTR : "");
13772 PRINT_OOO_ATTR (ooo_cracked);
13773 PRINT_OOO_ATTR (ooo_expanded);
13774 PRINT_OOO_ATTR (ooo_endgroup);
13775 PRINT_OOO_ATTR (ooo_groupalone);
13776 #undef PRINT_OOO_ATTR
13777 fprintf (file, "\n");
13778 fprintf (file, "sched state: %d\n", s390_sched_state);
13779 }
13780 }
13781
13782 if (GET_CODE (PATTERN (insn)) != USE
13783 && GET_CODE (PATTERN (insn)) != CLOBBER)
13784 return more - 1;
13785 else
13786 return more;
13787 }
13788
13789 static void
13790 s390_sched_init (FILE *file ATTRIBUTE_UNUSED,
13791 int verbose ATTRIBUTE_UNUSED,
13792 int max_ready ATTRIBUTE_UNUSED)
13793 {
13794 last_scheduled_insn = NULL;
13795 s390_sched_state = 0;
13796 }
13797
13798 /* This target hook implementation for TARGET_LOOP_UNROLL_ADJUST calculates
13799 a new number struct loop *loop should be unrolled if tuned for cpus with
13800 a built-in stride prefetcher.
13801 The loop is analyzed for memory accesses by calling check_dpu for
13802 each rtx of the loop. Depending on the loop_depth and the amount of
13803 memory accesses a new number <=nunroll is returned to improve the
13804 behavior of the hardware prefetch unit. */
13805 static unsigned
13806 s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
13807 {
13808 basic_block *bbs;
13809 rtx_insn *insn;
13810 unsigned i;
13811 unsigned mem_count = 0;
13812
13813 if (s390_tune < PROCESSOR_2097_Z10)
13814 return nunroll;
13815
13816 /* Count the number of memory references within the loop body. */
13817 bbs = get_loop_body (loop);
13818 subrtx_iterator::array_type array;
13819 for (i = 0; i < loop->num_nodes; i++)
13820 FOR_BB_INSNS (bbs[i], insn)
13821 if (INSN_P (insn) && INSN_CODE (insn) != -1)
13822 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
13823 if (MEM_P (*iter))
13824 mem_count += 1;
13825 free (bbs);
13826
13827 /* Prevent division by zero, and we do not need to adjust nunroll in this case. */
13828 if (mem_count == 0)
13829 return nunroll;
13830
13831 switch (loop_depth(loop))
13832 {
13833 case 1:
13834 return MIN (nunroll, 28 / mem_count);
13835 case 2:
13836 return MIN (nunroll, 22 / mem_count);
13837 default:
13838 return MIN (nunroll, 16 / mem_count);
13839 }
13840 }
13841
13842 /* Restore the current options. This is a hook function and also called
13843 internally. */
13844
13845 static void
13846 s390_function_specific_restore (struct gcc_options *opts,
13847 struct cl_target_option *ptr ATTRIBUTE_UNUSED)
13848 {
13849 opts->x_s390_cost_pointer = (long)processor_table[opts->x_s390_tune].cost;
13850 }
13851
13852 static void
13853 s390_option_override_internal (bool main_args_p,
13854 struct gcc_options *opts,
13855 const struct gcc_options *opts_set)
13856 {
13857 const char *prefix;
13858 const char *suffix;
13859
13860 /* Set up prefix/suffix so the error messages refer to either the command
13861 line argument, or the attribute(target). */
13862 if (main_args_p)
13863 {
13864 prefix = "-m";
13865 suffix = "";
13866 }
13867 else
13868 {
13869 prefix = "option(\"";
13870 suffix = "\")";
13871 }
13872
13873
13874 /* Architecture mode defaults according to ABI. */
13875 if (!(opts_set->x_target_flags & MASK_ZARCH))
13876 {
13877 if (TARGET_64BIT)
13878 opts->x_target_flags |= MASK_ZARCH;
13879 else
13880 opts->x_target_flags &= ~MASK_ZARCH;
13881 }
13882
13883 /* Set the march default in case it hasn't been specified on cmdline. */
13884 if (!opts_set->x_s390_arch)
13885 opts->x_s390_arch = PROCESSOR_2064_Z900;
13886 else if (opts->x_s390_arch == PROCESSOR_9672_G5
13887 || opts->x_s390_arch == PROCESSOR_9672_G6)
13888 warning (OPT_Wdeprecated, "%sarch=%s%s is deprecated and will be removed "
13889 "in future releases; use at least %sarch=z900%s",
13890 prefix, opts->x_s390_arch == PROCESSOR_9672_G5 ? "g5" : "g6",
13891 suffix, prefix, suffix);
13892
13893 opts->x_s390_arch_flags = processor_flags_table[(int) opts->x_s390_arch];
13894
13895 /* Determine processor to tune for. */
13896 if (!opts_set->x_s390_tune)
13897 opts->x_s390_tune = opts->x_s390_arch;
13898 else if (opts->x_s390_tune == PROCESSOR_9672_G5
13899 || opts->x_s390_tune == PROCESSOR_9672_G6)
13900 warning (OPT_Wdeprecated, "%stune=%s%s is deprecated and will be removed "
13901 "in future releases; use at least %stune=z900%s",
13902 prefix, opts->x_s390_tune == PROCESSOR_9672_G5 ? "g5" : "g6",
13903 suffix, prefix, suffix);
13904
13905 opts->x_s390_tune_flags = processor_flags_table[opts->x_s390_tune];
13906
13907 /* Sanity checks. */
13908 if (opts->x_s390_arch == PROCESSOR_NATIVE
13909 || opts->x_s390_tune == PROCESSOR_NATIVE)
13910 gcc_unreachable ();
13911 if (TARGET_ZARCH_P (opts->x_target_flags) && !TARGET_CPU_ZARCH_P (opts))
13912 error ("z/Architecture mode not supported on %s",
13913 processor_table[(int)opts->x_s390_arch].name);
13914 if (TARGET_64BIT && !TARGET_ZARCH_P (opts->x_target_flags))
13915 error ("64-bit ABI not supported in ESA/390 mode");
13916
13917 /* Enable hardware transactions if available and not explicitly
13918 disabled by user. E.g. with -m31 -march=zEC12 -mzarch */
13919 if (!TARGET_OPT_HTM_P (opts_set->x_target_flags))
13920 {
13921 if (TARGET_CPU_HTM_P (opts) && TARGET_ZARCH_P (opts->x_target_flags))
13922 opts->x_target_flags |= MASK_OPT_HTM;
13923 else
13924 opts->x_target_flags &= ~MASK_OPT_HTM;
13925 }
13926
13927 if (TARGET_OPT_VX_P (opts_set->x_target_flags))
13928 {
13929 if (TARGET_OPT_VX_P (opts->x_target_flags))
13930 {
13931 if (!TARGET_CPU_VX_P (opts))
13932 error ("hardware vector support not available on %s",
13933 processor_table[(int)opts->x_s390_arch].name);
13934 if (TARGET_SOFT_FLOAT_P (opts->x_target_flags))
13935 error ("hardware vector support not available with -msoft-float");
13936 }
13937 }
13938 else
13939 {
13940 if (TARGET_CPU_VX_P (opts))
13941 /* Enable vector support if available and not explicitly disabled
13942 by user. E.g. with -m31 -march=z13 -mzarch */
13943 opts->x_target_flags |= MASK_OPT_VX;
13944 else
13945 opts->x_target_flags &= ~MASK_OPT_VX;
13946 }
13947
13948 /* Use hardware DFP if available and not explicitly disabled by
13949 user. E.g. with -m31 -march=z10 -mzarch */
13950 if (!TARGET_HARD_DFP_P (opts_set->x_target_flags))
13951 {
13952 if (TARGET_DFP_P (opts))
13953 opts->x_target_flags |= MASK_HARD_DFP;
13954 else
13955 opts->x_target_flags &= ~MASK_HARD_DFP;
13956 }
13957
13958 if (TARGET_HARD_DFP_P (opts->x_target_flags) && !TARGET_DFP_P (opts))
13959 {
13960 if (TARGET_HARD_DFP_P (opts_set->x_target_flags))
13961 {
13962 if (!TARGET_CPU_DFP_P (opts))
13963 error ("hardware decimal floating point instructions"
13964 " not available on %s",
13965 processor_table[(int)opts->x_s390_arch].name);
13966 if (!TARGET_ZARCH_P (opts->x_target_flags))
13967 error ("hardware decimal floating point instructions"
13968 " not available in ESA/390 mode");
13969 }
13970 else
13971 opts->x_target_flags &= ~MASK_HARD_DFP;
13972 }
13973
13974 if (TARGET_SOFT_FLOAT_P (opts_set->x_target_flags)
13975 && TARGET_SOFT_FLOAT_P (opts->x_target_flags))
13976 {
13977 if (TARGET_HARD_DFP_P (opts_set->x_target_flags)
13978 && TARGET_HARD_DFP_P (opts->x_target_flags))
13979 error ("-mhard-dfp can%'t be used in conjunction with -msoft-float");
13980
13981 opts->x_target_flags &= ~MASK_HARD_DFP;
13982 }
13983
13984 if (TARGET_BACKCHAIN_P (opts->x_target_flags)
13985 && TARGET_PACKED_STACK_P (opts->x_target_flags)
13986 && TARGET_HARD_FLOAT_P (opts->x_target_flags))
13987 error ("-mbackchain -mpacked-stack -mhard-float are not supported "
13988 "in combination");
13989
13990 if (opts->x_s390_stack_size)
13991 {
13992 if (opts->x_s390_stack_guard >= opts->x_s390_stack_size)
13993 error ("stack size must be greater than the stack guard value");
13994 else if (opts->x_s390_stack_size > 1 << 16)
13995 error ("stack size must not be greater than 64k");
13996 }
13997 else if (opts->x_s390_stack_guard)
13998 error ("-mstack-guard implies use of -mstack-size");
13999
14000 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
14001 if (!TARGET_LONG_DOUBLE_128_P (opts_set->x_target_flags))
14002 opts->x_target_flags |= MASK_LONG_DOUBLE_128;
14003 #endif
14004
14005 if (opts->x_s390_tune >= PROCESSOR_2097_Z10)
14006 {
14007 maybe_set_param_value (PARAM_MAX_UNROLLED_INSNS, 100,
14008 opts->x_param_values,
14009 opts_set->x_param_values);
14010 maybe_set_param_value (PARAM_MAX_UNROLL_TIMES, 32,
14011 opts->x_param_values,
14012 opts_set->x_param_values);
14013 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 2000,
14014 opts->x_param_values,
14015 opts_set->x_param_values);
14016 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 64,
14017 opts->x_param_values,
14018 opts_set->x_param_values);
14019 }
14020
14021 maybe_set_param_value (PARAM_MAX_PENDING_LIST_LENGTH, 256,
14022 opts->x_param_values,
14023 opts_set->x_param_values);
14024 /* values for loop prefetching */
14025 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, 256,
14026 opts->x_param_values,
14027 opts_set->x_param_values);
14028 maybe_set_param_value (PARAM_L1_CACHE_SIZE, 128,
14029 opts->x_param_values,
14030 opts_set->x_param_values);
14031 /* s390 has more than 2 levels and the size is much larger. Since
14032 we are always running virtualized assume that we only get a small
14033 part of the caches above l1. */
14034 maybe_set_param_value (PARAM_L2_CACHE_SIZE, 1500,
14035 opts->x_param_values,
14036 opts_set->x_param_values);
14037 maybe_set_param_value (PARAM_PREFETCH_MIN_INSN_TO_MEM_RATIO, 2,
14038 opts->x_param_values,
14039 opts_set->x_param_values);
14040 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES, 6,
14041 opts->x_param_values,
14042 opts_set->x_param_values);
14043
14044 /* Use the alternative scheduling-pressure algorithm by default. */
14045 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
14046 opts->x_param_values,
14047 opts_set->x_param_values);
14048
14049 /* Call target specific restore function to do post-init work. At the moment,
14050 this just sets opts->x_s390_cost_pointer. */
14051 s390_function_specific_restore (opts, NULL);
14052 }
14053
14054 static void
14055 s390_option_override (void)
14056 {
14057 unsigned int i;
14058 cl_deferred_option *opt;
14059 vec<cl_deferred_option> *v =
14060 (vec<cl_deferred_option> *) s390_deferred_options;
14061
14062 if (v)
14063 FOR_EACH_VEC_ELT (*v, i, opt)
14064 {
14065 switch (opt->opt_index)
14066 {
14067 case OPT_mhotpatch_:
14068 {
14069 int val1;
14070 int val2;
14071 char s[256];
14072 char *t;
14073
14074 strncpy (s, opt->arg, 256);
14075 s[255] = 0;
14076 t = strchr (s, ',');
14077 if (t != NULL)
14078 {
14079 *t = 0;
14080 t++;
14081 val1 = integral_argument (s);
14082 val2 = integral_argument (t);
14083 }
14084 else
14085 {
14086 val1 = -1;
14087 val2 = -1;
14088 }
14089 if (val1 == -1 || val2 == -1)
14090 {
14091 /* argument is not a plain number */
14092 error ("arguments to %qs should be non-negative integers",
14093 "-mhotpatch=n,m");
14094 break;
14095 }
14096 else if (val1 > s390_hotpatch_hw_max
14097 || val2 > s390_hotpatch_hw_max)
14098 {
14099 error ("argument to %qs is too large (max. %d)",
14100 "-mhotpatch=n,m", s390_hotpatch_hw_max);
14101 break;
14102 }
14103 s390_hotpatch_hw_before_label = val1;
14104 s390_hotpatch_hw_after_label = val2;
14105 break;
14106 }
14107 default:
14108 gcc_unreachable ();
14109 }
14110 }
14111
14112 /* Set up function hooks. */
14113 init_machine_status = s390_init_machine_status;
14114
14115 s390_option_override_internal (true, &global_options, &global_options_set);
14116
14117 /* Save the initial options in case the user does function specific
14118 options. */
14119 target_option_default_node = build_target_option_node (&global_options);
14120 target_option_current_node = target_option_default_node;
14121
14122 /* This cannot reside in s390_option_optimization_table since HAVE_prefetch
14123 requires the arch flags to be evaluated already. Since prefetching
14124 is beneficial on s390, we enable it if available. */
14125 if (flag_prefetch_loop_arrays < 0 && HAVE_prefetch && optimize >= 3)
14126 flag_prefetch_loop_arrays = 1;
14127
14128 if (TARGET_TPF)
14129 {
14130 /* Don't emit DWARF3/4 unless specifically selected. The TPF
14131 debuggers do not yet support DWARF 3/4. */
14132 if (!global_options_set.x_dwarf_strict)
14133 dwarf_strict = 1;
14134 if (!global_options_set.x_dwarf_version)
14135 dwarf_version = 2;
14136 }
14137
14138 /* Register a target-specific optimization-and-lowering pass
14139 to run immediately before prologue and epilogue generation.
14140
14141 Registering the pass must be done at start up. It's
14142 convenient to do it here. */
14143 opt_pass *new_pass = new pass_s390_early_mach (g);
14144 struct register_pass_info insert_pass_s390_early_mach =
14145 {
14146 new_pass, /* pass */
14147 "pro_and_epilogue", /* reference_pass_name */
14148 1, /* ref_pass_instance_number */
14149 PASS_POS_INSERT_BEFORE /* po_op */
14150 };
14151 register_pass (&insert_pass_s390_early_mach);
14152 }
14153
14154 #if S390_USE_TARGET_ATTRIBUTE
14155 /* Inner function to process the attribute((target(...))), take an argument and
14156 set the current options from the argument. If we have a list, recursively go
14157 over the list. */
14158
14159 static bool
14160 s390_valid_target_attribute_inner_p (tree args,
14161 struct gcc_options *opts,
14162 struct gcc_options *new_opts_set,
14163 bool force_pragma)
14164 {
14165 char *next_optstr;
14166 bool ret = true;
14167
14168 #define S390_ATTRIB(S,O,A) { S, sizeof (S)-1, O, A, 0 }
14169 #define S390_PRAGMA(S,O,A) { S, sizeof (S)-1, O, A, 1 }
14170 static const struct
14171 {
14172 const char *string;
14173 size_t len;
14174 int opt;
14175 int has_arg;
14176 int only_as_pragma;
14177 } attrs[] = {
14178 /* enum options */
14179 S390_ATTRIB ("arch=", OPT_march_, 1),
14180 S390_ATTRIB ("tune=", OPT_mtune_, 1),
14181 /* uinteger options */
14182 S390_ATTRIB ("stack-guard=", OPT_mstack_guard_, 1),
14183 S390_ATTRIB ("stack-size=", OPT_mstack_size_, 1),
14184 S390_ATTRIB ("branch-cost=", OPT_mbranch_cost_, 1),
14185 S390_ATTRIB ("warn-framesize=", OPT_mwarn_framesize_, 1),
14186 /* flag options */
14187 S390_ATTRIB ("backchain", OPT_mbackchain, 0),
14188 S390_ATTRIB ("hard-dfp", OPT_mhard_dfp, 0),
14189 S390_ATTRIB ("hard-float", OPT_mhard_float, 0),
14190 S390_ATTRIB ("htm", OPT_mhtm, 0),
14191 S390_ATTRIB ("vx", OPT_mvx, 0),
14192 S390_ATTRIB ("packed-stack", OPT_mpacked_stack, 0),
14193 S390_ATTRIB ("small-exec", OPT_msmall_exec, 0),
14194 S390_ATTRIB ("soft-float", OPT_msoft_float, 0),
14195 S390_ATTRIB ("mvcle", OPT_mmvcle, 0),
14196 S390_PRAGMA ("zvector", OPT_mzvector, 0),
14197 /* boolean options */
14198 S390_ATTRIB ("warn-dynamicstack", OPT_mwarn_dynamicstack, 0),
14199 };
14200 #undef S390_ATTRIB
14201 #undef S390_PRAGMA
14202
14203 /* If this is a list, recurse to get the options. */
14204 if (TREE_CODE (args) == TREE_LIST)
14205 {
14206 bool ret = true;
14207 int num_pragma_values;
14208 int i;
14209
14210 /* Note: attribs.c:decl_attributes prepends the values from
14211 current_target_pragma to the list of target attributes. To determine
14212 whether we're looking at a value of the attribute or the pragma we
14213 assume that the first [list_length (current_target_pragma)] values in
14214 the list are the values from the pragma. */
14215 num_pragma_values = (!force_pragma && current_target_pragma != NULL)
14216 ? list_length (current_target_pragma) : 0;
14217 for (i = 0; args; args = TREE_CHAIN (args), i++)
14218 {
14219 bool is_pragma;
14220
14221 is_pragma = (force_pragma || i < num_pragma_values);
14222 if (TREE_VALUE (args)
14223 && !s390_valid_target_attribute_inner_p (TREE_VALUE (args),
14224 opts, new_opts_set,
14225 is_pragma))
14226 {
14227 ret = false;
14228 }
14229 }
14230 return ret;
14231 }
14232
14233 else if (TREE_CODE (args) != STRING_CST)
14234 {
14235 error ("attribute %<target%> argument not a string");
14236 return false;
14237 }
14238
14239 /* Handle multiple arguments separated by commas. */
14240 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
14241
14242 while (next_optstr && *next_optstr != '\0')
14243 {
14244 char *p = next_optstr;
14245 char *orig_p = p;
14246 char *comma = strchr (next_optstr, ',');
14247 size_t len, opt_len;
14248 int opt;
14249 bool opt_set_p;
14250 char ch;
14251 unsigned i;
14252 int mask = 0;
14253 enum cl_var_type var_type;
14254 bool found;
14255
14256 if (comma)
14257 {
14258 *comma = '\0';
14259 len = comma - next_optstr;
14260 next_optstr = comma + 1;
14261 }
14262 else
14263 {
14264 len = strlen (p);
14265 next_optstr = NULL;
14266 }
14267
14268 /* Recognize no-xxx. */
14269 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
14270 {
14271 opt_set_p = false;
14272 p += 3;
14273 len -= 3;
14274 }
14275 else
14276 opt_set_p = true;
14277
14278 /* Find the option. */
14279 ch = *p;
14280 found = false;
14281 for (i = 0; i < ARRAY_SIZE (attrs); i++)
14282 {
14283 opt_len = attrs[i].len;
14284 if (ch == attrs[i].string[0]
14285 && ((attrs[i].has_arg) ? len > opt_len : len == opt_len)
14286 && memcmp (p, attrs[i].string, opt_len) == 0)
14287 {
14288 opt = attrs[i].opt;
14289 if (!opt_set_p && cl_options[opt].cl_reject_negative)
14290 continue;
14291 mask = cl_options[opt].var_value;
14292 var_type = cl_options[opt].var_type;
14293 found = true;
14294 break;
14295 }
14296 }
14297
14298 /* Process the option. */
14299 if (!found)
14300 {
14301 error ("attribute(target(\"%s\")) is unknown", orig_p);
14302 return false;
14303 }
14304 else if (attrs[i].only_as_pragma && !force_pragma)
14305 {
14306 /* Value is not allowed for the target attribute. */
14307 error ("Value %qs is not supported by attribute %<target%>",
14308 attrs[i].string);
14309 return false;
14310 }
14311
14312 else if (var_type == CLVC_BIT_SET || var_type == CLVC_BIT_CLEAR)
14313 {
14314 if (var_type == CLVC_BIT_CLEAR)
14315 opt_set_p = !opt_set_p;
14316
14317 if (opt_set_p)
14318 opts->x_target_flags |= mask;
14319 else
14320 opts->x_target_flags &= ~mask;
14321 new_opts_set->x_target_flags |= mask;
14322 }
14323
14324 else if (cl_options[opt].var_type == CLVC_BOOLEAN)
14325 {
14326 int value;
14327
14328 if (cl_options[opt].cl_uinteger)
14329 {
14330 /* Unsigned integer argument. Code based on the function
14331 decode_cmdline_option () in opts-common.c. */
14332 value = integral_argument (p + opt_len);
14333 }
14334 else
14335 value = (opt_set_p) ? 1 : 0;
14336
14337 if (value != -1)
14338 {
14339 struct cl_decoded_option decoded;
14340
14341 /* Value range check; only implemented for numeric and boolean
14342 options at the moment. */
14343 generate_option (opt, NULL, value, CL_TARGET, &decoded);
14344 s390_handle_option (opts, new_opts_set, &decoded, input_location);
14345 set_option (opts, new_opts_set, opt, value,
14346 p + opt_len, DK_UNSPECIFIED, input_location,
14347 global_dc);
14348 }
14349 else
14350 {
14351 error ("attribute(target(\"%s\")) is unknown", orig_p);
14352 ret = false;
14353 }
14354 }
14355
14356 else if (cl_options[opt].var_type == CLVC_ENUM)
14357 {
14358 bool arg_ok;
14359 int value;
14360
14361 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
14362 if (arg_ok)
14363 set_option (opts, new_opts_set, opt, value,
14364 p + opt_len, DK_UNSPECIFIED, input_location,
14365 global_dc);
14366 else
14367 {
14368 error ("attribute(target(\"%s\")) is unknown", orig_p);
14369 ret = false;
14370 }
14371 }
14372
14373 else
14374 gcc_unreachable ();
14375 }
14376 return ret;
14377 }
14378
14379 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
14380
14381 tree
14382 s390_valid_target_attribute_tree (tree args,
14383 struct gcc_options *opts,
14384 const struct gcc_options *opts_set,
14385 bool force_pragma)
14386 {
14387 tree t = NULL_TREE;
14388 struct gcc_options new_opts_set;
14389
14390 memset (&new_opts_set, 0, sizeof (new_opts_set));
14391
14392 /* Process each of the options on the chain. */
14393 if (! s390_valid_target_attribute_inner_p (args, opts, &new_opts_set,
14394 force_pragma))
14395 return error_mark_node;
14396
14397 /* If some option was set (even if it has not changed), rerun
14398 s390_option_override_internal, and then save the options away. */
14399 if (new_opts_set.x_target_flags
14400 || new_opts_set.x_s390_arch
14401 || new_opts_set.x_s390_tune
14402 || new_opts_set.x_s390_stack_guard
14403 || new_opts_set.x_s390_stack_size
14404 || new_opts_set.x_s390_branch_cost
14405 || new_opts_set.x_s390_warn_framesize
14406 || new_opts_set.x_s390_warn_dynamicstack_p)
14407 {
14408 const unsigned char *src = (const unsigned char *)opts_set;
14409 unsigned char *dest = (unsigned char *)&new_opts_set;
14410 unsigned int i;
14411
14412 /* Merge the original option flags into the new ones. */
14413 for (i = 0; i < sizeof(*opts_set); i++)
14414 dest[i] |= src[i];
14415
14416 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
14417 s390_option_override_internal (false, opts, &new_opts_set);
14418 /* Save the current options unless we are validating options for
14419 #pragma. */
14420 t = build_target_option_node (opts);
14421 }
14422 return t;
14423 }
14424
14425 /* Hook to validate attribute((target("string"))). */
14426
14427 static bool
14428 s390_valid_target_attribute_p (tree fndecl,
14429 tree ARG_UNUSED (name),
14430 tree args,
14431 int ARG_UNUSED (flags))
14432 {
14433 struct gcc_options func_options;
14434 tree new_target, new_optimize;
14435 bool ret = true;
14436
14437 /* attribute((target("default"))) does nothing, beyond
14438 affecting multi-versioning. */
14439 if (TREE_VALUE (args)
14440 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
14441 && TREE_CHAIN (args) == NULL_TREE
14442 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
14443 return true;
14444
14445 tree old_optimize = build_optimization_node (&global_options);
14446
14447 /* Get the optimization options of the current function. */
14448 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
14449
14450 if (!func_optimize)
14451 func_optimize = old_optimize;
14452
14453 /* Init func_options. */
14454 memset (&func_options, 0, sizeof (func_options));
14455 init_options_struct (&func_options, NULL);
14456 lang_hooks.init_options_struct (&func_options);
14457
14458 cl_optimization_restore (&func_options, TREE_OPTIMIZATION (func_optimize));
14459
14460 /* Initialize func_options to the default before its target options can
14461 be set. */
14462 cl_target_option_restore (&func_options,
14463 TREE_TARGET_OPTION (target_option_default_node));
14464
14465 new_target = s390_valid_target_attribute_tree (args, &func_options,
14466 &global_options_set,
14467 (args ==
14468 current_target_pragma));
14469 new_optimize = build_optimization_node (&func_options);
14470 if (new_target == error_mark_node)
14471 ret = false;
14472 else if (fndecl && new_target)
14473 {
14474 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
14475 if (old_optimize != new_optimize)
14476 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
14477 }
14478 return ret;
14479 }
14480
14481 /* Restore targets globals from NEW_TREE and invalidate s390_previous_fndecl
14482 cache. */
14483
14484 void
14485 s390_activate_target_options (tree new_tree)
14486 {
14487 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
14488 if (TREE_TARGET_GLOBALS (new_tree))
14489 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
14490 else if (new_tree == target_option_default_node)
14491 restore_target_globals (&default_target_globals);
14492 else
14493 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
14494 s390_previous_fndecl = NULL_TREE;
14495 }
14496
14497 /* Establish appropriate back-end context for processing the function
14498 FNDECL. The argument might be NULL to indicate processing at top
14499 level, outside of any function scope. */
14500 static void
14501 s390_set_current_function (tree fndecl)
14502 {
14503 /* Only change the context if the function changes. This hook is called
14504 several times in the course of compiling a function, and we don't want to
14505 slow things down too much or call target_reinit when it isn't safe. */
14506 if (fndecl == s390_previous_fndecl)
14507 return;
14508
14509 tree old_tree;
14510 if (s390_previous_fndecl == NULL_TREE)
14511 old_tree = target_option_current_node;
14512 else if (DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl))
14513 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl);
14514 else
14515 old_tree = target_option_default_node;
14516
14517 if (fndecl == NULL_TREE)
14518 {
14519 if (old_tree != target_option_current_node)
14520 s390_activate_target_options (target_option_current_node);
14521 return;
14522 }
14523
14524 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
14525 if (new_tree == NULL_TREE)
14526 new_tree = target_option_default_node;
14527
14528 if (old_tree != new_tree)
14529 s390_activate_target_options (new_tree);
14530 s390_previous_fndecl = fndecl;
14531 }
14532 #endif
14533
14534 /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */
14535
14536 static bool
14537 s390_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
14538 unsigned int align ATTRIBUTE_UNUSED,
14539 enum by_pieces_operation op ATTRIBUTE_UNUSED,
14540 bool speed_p ATTRIBUTE_UNUSED)
14541 {
14542 return (size == 1 || size == 2
14543 || size == 4 || (TARGET_ZARCH && size == 8));
14544 }
14545
14546 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
14547
14548 static void
14549 s390_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
14550 {
14551 tree sfpc = s390_builtin_decls[S390_BUILTIN_s390_sfpc];
14552 tree efpc = s390_builtin_decls[S390_BUILTIN_s390_efpc];
14553 tree call_efpc = build_call_expr (efpc, 0);
14554 tree fenv_var = create_tmp_var_raw (unsigned_type_node);
14555
14556 #define FPC_EXCEPTION_MASK HOST_WIDE_INT_UC (0xf8000000)
14557 #define FPC_FLAGS_MASK HOST_WIDE_INT_UC (0x00f80000)
14558 #define FPC_DXC_MASK HOST_WIDE_INT_UC (0x0000ff00)
14559 #define FPC_EXCEPTION_MASK_SHIFT HOST_WIDE_INT_UC (24)
14560 #define FPC_FLAGS_SHIFT HOST_WIDE_INT_UC (16)
14561 #define FPC_DXC_SHIFT HOST_WIDE_INT_UC (8)
14562
14563 /* Generates the equivalent of feholdexcept (&fenv_var)
14564
14565 fenv_var = __builtin_s390_efpc ();
14566 __builtin_s390_sfpc (fenv_var & mask) */
14567 tree old_fpc = build2 (MODIFY_EXPR, unsigned_type_node, fenv_var, call_efpc);
14568 tree new_fpc =
14569 build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
14570 build_int_cst (unsigned_type_node,
14571 ~(FPC_DXC_MASK | FPC_FLAGS_MASK |
14572 FPC_EXCEPTION_MASK)));
14573 tree set_new_fpc = build_call_expr (sfpc, 1, new_fpc);
14574 *hold = build2 (COMPOUND_EXPR, void_type_node, old_fpc, set_new_fpc);
14575
14576 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT)
14577
14578 __builtin_s390_sfpc (__builtin_s390_efpc () & mask) */
14579 new_fpc = build2 (BIT_AND_EXPR, unsigned_type_node, call_efpc,
14580 build_int_cst (unsigned_type_node,
14581 ~(FPC_DXC_MASK | FPC_FLAGS_MASK)));
14582 *clear = build_call_expr (sfpc, 1, new_fpc);
14583
14584 /* Generates the equivalent of feupdateenv (fenv_var)
14585
14586 old_fpc = __builtin_s390_efpc ();
14587 __builtin_s390_sfpc (fenv_var);
14588 __atomic_feraiseexcept ((old_fpc & FPC_FLAGS_MASK) >> FPC_FLAGS_SHIFT); */
14589
14590 old_fpc = create_tmp_var_raw (unsigned_type_node);
14591 tree store_old_fpc = build2 (MODIFY_EXPR, void_type_node,
14592 old_fpc, call_efpc);
14593
14594 set_new_fpc = build_call_expr (sfpc, 1, fenv_var);
14595
14596 tree raise_old_except = build2 (BIT_AND_EXPR, unsigned_type_node, old_fpc,
14597 build_int_cst (unsigned_type_node,
14598 FPC_FLAGS_MASK));
14599 raise_old_except = build2 (RSHIFT_EXPR, unsigned_type_node, raise_old_except,
14600 build_int_cst (unsigned_type_node,
14601 FPC_FLAGS_SHIFT));
14602 tree atomic_feraiseexcept
14603 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
14604 raise_old_except = build_call_expr (atomic_feraiseexcept,
14605 1, raise_old_except);
14606
14607 *update = build2 (COMPOUND_EXPR, void_type_node,
14608 build2 (COMPOUND_EXPR, void_type_node,
14609 store_old_fpc, set_new_fpc),
14610 raise_old_except);
14611
14612 #undef FPC_EXCEPTION_MASK
14613 #undef FPC_FLAGS_MASK
14614 #undef FPC_DXC_MASK
14615 #undef FPC_EXCEPTION_MASK_SHIFT
14616 #undef FPC_FLAGS_SHIFT
14617 #undef FPC_DXC_SHIFT
14618 }
14619
14620 /* Return the vector mode to be used for inner mode MODE when doing
14621 vectorization. */
14622 static machine_mode
14623 s390_preferred_simd_mode (machine_mode mode)
14624 {
14625 if (TARGET_VX)
14626 switch (mode)
14627 {
14628 case DFmode:
14629 return V2DFmode;
14630 case DImode:
14631 return V2DImode;
14632 case SImode:
14633 return V4SImode;
14634 case HImode:
14635 return V8HImode;
14636 case QImode:
14637 return V16QImode;
14638 default:;
14639 }
14640 return word_mode;
14641 }
14642
14643 /* Our hardware does not require vectors to be strictly aligned. */
14644 static bool
14645 s390_support_vector_misalignment (machine_mode mode ATTRIBUTE_UNUSED,
14646 const_tree type ATTRIBUTE_UNUSED,
14647 int misalignment ATTRIBUTE_UNUSED,
14648 bool is_packed ATTRIBUTE_UNUSED)
14649 {
14650 if (TARGET_VX)
14651 return true;
14652
14653 return default_builtin_support_vector_misalignment (mode, type, misalignment,
14654 is_packed);
14655 }
14656
14657 /* The vector ABI requires vector types to be aligned on an 8 byte
14658 boundary (our stack alignment). However, we allow this to be
14659 overriden by the user, while this definitely breaks the ABI. */
14660 static HOST_WIDE_INT
14661 s390_vector_alignment (const_tree type)
14662 {
14663 if (!TARGET_VX_ABI)
14664 return default_vector_alignment (type);
14665
14666 if (TYPE_USER_ALIGN (type))
14667 return TYPE_ALIGN (type);
14668
14669 return MIN (64, tree_to_shwi (TYPE_SIZE (type)));
14670 }
14671
14672 #ifdef HAVE_AS_MACHINE_MACHINEMODE
14673 /* Implement TARGET_ASM_FILE_START. */
14674 static void
14675 s390_asm_file_start (void)
14676 {
14677 s390_asm_output_machine_for_arch (asm_out_file);
14678 }
14679 #endif
14680
14681 /* Implement TARGET_ASM_FILE_END. */
14682 static void
14683 s390_asm_file_end (void)
14684 {
14685 #ifdef HAVE_AS_GNU_ATTRIBUTE
14686 varpool_node *vnode;
14687 cgraph_node *cnode;
14688
14689 FOR_EACH_VARIABLE (vnode)
14690 if (TREE_PUBLIC (vnode->decl))
14691 s390_check_type_for_vector_abi (TREE_TYPE (vnode->decl), false, false);
14692
14693 FOR_EACH_FUNCTION (cnode)
14694 if (TREE_PUBLIC (cnode->decl))
14695 s390_check_type_for_vector_abi (TREE_TYPE (cnode->decl), false, false);
14696
14697
14698 if (s390_vector_abi != 0)
14699 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
14700 s390_vector_abi);
14701 #endif
14702 file_end_indicate_exec_stack ();
14703
14704 if (flag_split_stack)
14705 file_end_indicate_split_stack ();
14706 }
14707
14708 /* Return true if TYPE is a vector bool type. */
14709 static inline bool
14710 s390_vector_bool_type_p (const_tree type)
14711 {
14712 return TYPE_VECTOR_OPAQUE (type);
14713 }
14714
14715 /* Return the diagnostic message string if the binary operation OP is
14716 not permitted on TYPE1 and TYPE2, NULL otherwise. */
14717 static const char*
14718 s390_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2)
14719 {
14720 bool bool1_p, bool2_p;
14721 bool plusminus_p;
14722 bool muldiv_p;
14723 bool compare_p;
14724 machine_mode mode1, mode2;
14725
14726 if (!TARGET_ZVECTOR)
14727 return NULL;
14728
14729 if (!VECTOR_TYPE_P (type1) || !VECTOR_TYPE_P (type2))
14730 return NULL;
14731
14732 bool1_p = s390_vector_bool_type_p (type1);
14733 bool2_p = s390_vector_bool_type_p (type2);
14734
14735 /* Mixing signed and unsigned types is forbidden for all
14736 operators. */
14737 if (!bool1_p && !bool2_p
14738 && TYPE_UNSIGNED (type1) != TYPE_UNSIGNED (type2))
14739 return N_("types differ in signess");
14740
14741 plusminus_p = (op == PLUS_EXPR || op == MINUS_EXPR);
14742 muldiv_p = (op == MULT_EXPR || op == RDIV_EXPR || op == TRUNC_DIV_EXPR
14743 || op == CEIL_DIV_EXPR || op == FLOOR_DIV_EXPR
14744 || op == ROUND_DIV_EXPR);
14745 compare_p = (op == LT_EXPR || op == LE_EXPR || op == GT_EXPR || op == GE_EXPR
14746 || op == EQ_EXPR || op == NE_EXPR);
14747
14748 if (bool1_p && bool2_p && (plusminus_p || muldiv_p))
14749 return N_("binary operator does not support two vector bool operands");
14750
14751 if (bool1_p != bool2_p && (muldiv_p || compare_p))
14752 return N_("binary operator does not support vector bool operand");
14753
14754 mode1 = TYPE_MODE (type1);
14755 mode2 = TYPE_MODE (type2);
14756
14757 if (bool1_p != bool2_p && plusminus_p
14758 && (GET_MODE_CLASS (mode1) == MODE_VECTOR_FLOAT
14759 || GET_MODE_CLASS (mode2) == MODE_VECTOR_FLOAT))
14760 return N_("binary operator does not support mixing vector "
14761 "bool with floating point vector operands");
14762
14763 return NULL;
14764 }
14765
14766 /* Initialize GCC target structure. */
14767
14768 #undef TARGET_ASM_ALIGNED_HI_OP
14769 #define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
14770 #undef TARGET_ASM_ALIGNED_DI_OP
14771 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
14772 #undef TARGET_ASM_INTEGER
14773 #define TARGET_ASM_INTEGER s390_assemble_integer
14774
14775 #undef TARGET_ASM_OPEN_PAREN
14776 #define TARGET_ASM_OPEN_PAREN ""
14777
14778 #undef TARGET_ASM_CLOSE_PAREN
14779 #define TARGET_ASM_CLOSE_PAREN ""
14780
14781 #undef TARGET_OPTION_OVERRIDE
14782 #define TARGET_OPTION_OVERRIDE s390_option_override
14783
14784 #undef TARGET_ENCODE_SECTION_INFO
14785 #define TARGET_ENCODE_SECTION_INFO s390_encode_section_info
14786
14787 #undef TARGET_SCALAR_MODE_SUPPORTED_P
14788 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
14789
14790 #ifdef HAVE_AS_TLS
14791 #undef TARGET_HAVE_TLS
14792 #define TARGET_HAVE_TLS true
14793 #endif
14794 #undef TARGET_CANNOT_FORCE_CONST_MEM
14795 #define TARGET_CANNOT_FORCE_CONST_MEM s390_cannot_force_const_mem
14796
14797 #undef TARGET_DELEGITIMIZE_ADDRESS
14798 #define TARGET_DELEGITIMIZE_ADDRESS s390_delegitimize_address
14799
14800 #undef TARGET_LEGITIMIZE_ADDRESS
14801 #define TARGET_LEGITIMIZE_ADDRESS s390_legitimize_address
14802
14803 #undef TARGET_RETURN_IN_MEMORY
14804 #define TARGET_RETURN_IN_MEMORY s390_return_in_memory
14805
14806 #undef TARGET_INIT_BUILTINS
14807 #define TARGET_INIT_BUILTINS s390_init_builtins
14808 #undef TARGET_EXPAND_BUILTIN
14809 #define TARGET_EXPAND_BUILTIN s390_expand_builtin
14810 #undef TARGET_BUILTIN_DECL
14811 #define TARGET_BUILTIN_DECL s390_builtin_decl
14812
14813 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
14814 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA s390_output_addr_const_extra
14815
14816 #undef TARGET_ASM_OUTPUT_MI_THUNK
14817 #define TARGET_ASM_OUTPUT_MI_THUNK s390_output_mi_thunk
14818 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
14819 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
14820
14821 #undef TARGET_SCHED_ADJUST_PRIORITY
14822 #define TARGET_SCHED_ADJUST_PRIORITY s390_adjust_priority
14823 #undef TARGET_SCHED_ISSUE_RATE
14824 #define TARGET_SCHED_ISSUE_RATE s390_issue_rate
14825 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
14826 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD s390_first_cycle_multipass_dfa_lookahead
14827
14828 #undef TARGET_SCHED_VARIABLE_ISSUE
14829 #define TARGET_SCHED_VARIABLE_ISSUE s390_sched_variable_issue
14830 #undef TARGET_SCHED_REORDER
14831 #define TARGET_SCHED_REORDER s390_sched_reorder
14832 #undef TARGET_SCHED_INIT
14833 #define TARGET_SCHED_INIT s390_sched_init
14834
14835 #undef TARGET_CANNOT_COPY_INSN_P
14836 #define TARGET_CANNOT_COPY_INSN_P s390_cannot_copy_insn_p
14837 #undef TARGET_RTX_COSTS
14838 #define TARGET_RTX_COSTS s390_rtx_costs
14839 #undef TARGET_ADDRESS_COST
14840 #define TARGET_ADDRESS_COST s390_address_cost
14841 #undef TARGET_REGISTER_MOVE_COST
14842 #define TARGET_REGISTER_MOVE_COST s390_register_move_cost
14843 #undef TARGET_MEMORY_MOVE_COST
14844 #define TARGET_MEMORY_MOVE_COST s390_memory_move_cost
14845
14846 #undef TARGET_MACHINE_DEPENDENT_REORG
14847 #define TARGET_MACHINE_DEPENDENT_REORG s390_reorg
14848
14849 #undef TARGET_VALID_POINTER_MODE
14850 #define TARGET_VALID_POINTER_MODE s390_valid_pointer_mode
14851
14852 #undef TARGET_BUILD_BUILTIN_VA_LIST
14853 #define TARGET_BUILD_BUILTIN_VA_LIST s390_build_builtin_va_list
14854 #undef TARGET_EXPAND_BUILTIN_VA_START
14855 #define TARGET_EXPAND_BUILTIN_VA_START s390_va_start
14856 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
14857 #define TARGET_GIMPLIFY_VA_ARG_EXPR s390_gimplify_va_arg
14858
14859 #undef TARGET_PROMOTE_FUNCTION_MODE
14860 #define TARGET_PROMOTE_FUNCTION_MODE s390_promote_function_mode
14861 #undef TARGET_PASS_BY_REFERENCE
14862 #define TARGET_PASS_BY_REFERENCE s390_pass_by_reference
14863
14864 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
14865 #define TARGET_FUNCTION_OK_FOR_SIBCALL s390_function_ok_for_sibcall
14866 #undef TARGET_FUNCTION_ARG
14867 #define TARGET_FUNCTION_ARG s390_function_arg
14868 #undef TARGET_FUNCTION_ARG_ADVANCE
14869 #define TARGET_FUNCTION_ARG_ADVANCE s390_function_arg_advance
14870 #undef TARGET_FUNCTION_VALUE
14871 #define TARGET_FUNCTION_VALUE s390_function_value
14872 #undef TARGET_LIBCALL_VALUE
14873 #define TARGET_LIBCALL_VALUE s390_libcall_value
14874 #undef TARGET_STRICT_ARGUMENT_NAMING
14875 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
14876
14877 #undef TARGET_KEEP_LEAF_WHEN_PROFILED
14878 #define TARGET_KEEP_LEAF_WHEN_PROFILED s390_keep_leaf_when_profiled
14879
14880 #undef TARGET_FIXED_CONDITION_CODE_REGS
14881 #define TARGET_FIXED_CONDITION_CODE_REGS s390_fixed_condition_code_regs
14882
14883 #undef TARGET_CC_MODES_COMPATIBLE
14884 #define TARGET_CC_MODES_COMPATIBLE s390_cc_modes_compatible
14885
14886 #undef TARGET_INVALID_WITHIN_DOLOOP
14887 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null
14888
14889 #ifdef HAVE_AS_TLS
14890 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
14891 #define TARGET_ASM_OUTPUT_DWARF_DTPREL s390_output_dwarf_dtprel
14892 #endif
14893
14894 #undef TARGET_DWARF_FRAME_REG_MODE
14895 #define TARGET_DWARF_FRAME_REG_MODE s390_dwarf_frame_reg_mode
14896
14897 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
14898 #undef TARGET_MANGLE_TYPE
14899 #define TARGET_MANGLE_TYPE s390_mangle_type
14900 #endif
14901
14902 #undef TARGET_SCALAR_MODE_SUPPORTED_P
14903 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
14904
14905 #undef TARGET_VECTOR_MODE_SUPPORTED_P
14906 #define TARGET_VECTOR_MODE_SUPPORTED_P s390_vector_mode_supported_p
14907
14908 #undef TARGET_PREFERRED_RELOAD_CLASS
14909 #define TARGET_PREFERRED_RELOAD_CLASS s390_preferred_reload_class
14910
14911 #undef TARGET_SECONDARY_RELOAD
14912 #define TARGET_SECONDARY_RELOAD s390_secondary_reload
14913
14914 #undef TARGET_LIBGCC_CMP_RETURN_MODE
14915 #define TARGET_LIBGCC_CMP_RETURN_MODE s390_libgcc_cmp_return_mode
14916
14917 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
14918 #define TARGET_LIBGCC_SHIFT_COUNT_MODE s390_libgcc_shift_count_mode
14919
14920 #undef TARGET_LEGITIMATE_ADDRESS_P
14921 #define TARGET_LEGITIMATE_ADDRESS_P s390_legitimate_address_p
14922
14923 #undef TARGET_LEGITIMATE_CONSTANT_P
14924 #define TARGET_LEGITIMATE_CONSTANT_P s390_legitimate_constant_p
14925
14926 #undef TARGET_LRA_P
14927 #define TARGET_LRA_P s390_lra_p
14928
14929 #undef TARGET_CAN_ELIMINATE
14930 #define TARGET_CAN_ELIMINATE s390_can_eliminate
14931
14932 #undef TARGET_CONDITIONAL_REGISTER_USAGE
14933 #define TARGET_CONDITIONAL_REGISTER_USAGE s390_conditional_register_usage
14934
14935 #undef TARGET_LOOP_UNROLL_ADJUST
14936 #define TARGET_LOOP_UNROLL_ADJUST s390_loop_unroll_adjust
14937
14938 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
14939 #define TARGET_ASM_TRAMPOLINE_TEMPLATE s390_asm_trampoline_template
14940 #undef TARGET_TRAMPOLINE_INIT
14941 #define TARGET_TRAMPOLINE_INIT s390_trampoline_init
14942
14943 #undef TARGET_UNWIND_WORD_MODE
14944 #define TARGET_UNWIND_WORD_MODE s390_unwind_word_mode
14945
14946 #undef TARGET_CANONICALIZE_COMPARISON
14947 #define TARGET_CANONICALIZE_COMPARISON s390_canonicalize_comparison
14948
14949 #undef TARGET_HARD_REGNO_SCRATCH_OK
14950 #define TARGET_HARD_REGNO_SCRATCH_OK s390_hard_regno_scratch_ok
14951
14952 #undef TARGET_ATTRIBUTE_TABLE
14953 #define TARGET_ATTRIBUTE_TABLE s390_attribute_table
14954
14955 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
14956 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
14957
14958 #undef TARGET_SET_UP_BY_PROLOGUE
14959 #define TARGET_SET_UP_BY_PROLOGUE s300_set_up_by_prologue
14960
14961 #undef TARGET_EXTRA_LIVE_ON_ENTRY
14962 #define TARGET_EXTRA_LIVE_ON_ENTRY s390_live_on_entry
14963
14964 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
14965 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
14966 s390_use_by_pieces_infrastructure_p
14967
14968 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
14969 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV s390_atomic_assign_expand_fenv
14970
14971 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
14972 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN s390_invalid_arg_for_unprototyped_fn
14973
14974 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
14975 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE s390_preferred_simd_mode
14976
14977 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
14978 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT s390_support_vector_misalignment
14979
14980 #undef TARGET_VECTOR_ALIGNMENT
14981 #define TARGET_VECTOR_ALIGNMENT s390_vector_alignment
14982
14983 #undef TARGET_INVALID_BINARY_OP
14984 #define TARGET_INVALID_BINARY_OP s390_invalid_binary_op
14985
14986 #ifdef HAVE_AS_MACHINE_MACHINEMODE
14987 #undef TARGET_ASM_FILE_START
14988 #define TARGET_ASM_FILE_START s390_asm_file_start
14989 #endif
14990
14991 #undef TARGET_ASM_FILE_END
14992 #define TARGET_ASM_FILE_END s390_asm_file_end
14993
14994 #if S390_USE_TARGET_ATTRIBUTE
14995 #undef TARGET_SET_CURRENT_FUNCTION
14996 #define TARGET_SET_CURRENT_FUNCTION s390_set_current_function
14997
14998 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
14999 #define TARGET_OPTION_VALID_ATTRIBUTE_P s390_valid_target_attribute_p
15000 #endif
15001
15002 #undef TARGET_OPTION_RESTORE
15003 #define TARGET_OPTION_RESTORE s390_function_specific_restore
15004
15005 struct gcc_target targetm = TARGET_INITIALIZER;
15006
15007 #include "gt-s390.h"