]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/s390/s390.c
Move MEMMODEL_* from coretypes.h to memmodel.h
[thirdparty/gcc.git] / gcc / config / s390 / s390.c
1 /* Subroutines used for code generation on IBM S/390 and zSeries
2 Copyright (C) 1999-2016 Free Software Foundation, Inc.
3 Contributed by Hartmut Penner (hpenner@de.ibm.com) and
4 Ulrich Weigand (uweigand@de.ibm.com) and
5 Andreas Krebbel (Andreas.Krebbel@de.ibm.com).
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "target.h"
28 #include "target-globals.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "cfgloop.h"
34 #include "df.h"
35 #include "memmodel.h"
36 #include "tm_p.h"
37 #include "stringpool.h"
38 #include "expmed.h"
39 #include "optabs.h"
40 #include "regs.h"
41 #include "emit-rtl.h"
42 #include "recog.h"
43 #include "cgraph.h"
44 #include "diagnostic-core.h"
45 #include "diagnostic.h"
46 #include "alias.h"
47 #include "fold-const.h"
48 #include "print-tree.h"
49 #include "stor-layout.h"
50 #include "varasm.h"
51 #include "calls.h"
52 #include "conditions.h"
53 #include "output.h"
54 #include "insn-attr.h"
55 #include "flags.h"
56 #include "except.h"
57 #include "dojump.h"
58 #include "explow.h"
59 #include "stmt.h"
60 #include "expr.h"
61 #include "reload.h"
62 #include "cfgrtl.h"
63 #include "cfganal.h"
64 #include "lcm.h"
65 #include "cfgbuild.h"
66 #include "cfgcleanup.h"
67 #include "debug.h"
68 #include "langhooks.h"
69 #include "internal-fn.h"
70 #include "gimple-fold.h"
71 #include "tree-eh.h"
72 #include "gimplify.h"
73 #include "params.h"
74 #include "opts.h"
75 #include "tree-pass.h"
76 #include "context.h"
77 #include "builtins.h"
78 #include "rtl-iter.h"
79 #include "intl.h"
80 #include "tm-constrs.h"
81
82 /* This file should be included last. */
83 #include "target-def.h"
84
85 /* Remember the last target of s390_set_current_function. */
86 static GTY(()) tree s390_previous_fndecl;
87
88 /* Define the specific costs for a given cpu. */
89
90 struct processor_costs
91 {
92 /* multiplication */
93 const int m; /* cost of an M instruction. */
94 const int mghi; /* cost of an MGHI instruction. */
95 const int mh; /* cost of an MH instruction. */
96 const int mhi; /* cost of an MHI instruction. */
97 const int ml; /* cost of an ML instruction. */
98 const int mr; /* cost of an MR instruction. */
99 const int ms; /* cost of an MS instruction. */
100 const int msg; /* cost of an MSG instruction. */
101 const int msgf; /* cost of an MSGF instruction. */
102 const int msgfr; /* cost of an MSGFR instruction. */
103 const int msgr; /* cost of an MSGR instruction. */
104 const int msr; /* cost of an MSR instruction. */
105 const int mult_df; /* cost of multiplication in DFmode. */
106 const int mxbr;
107 /* square root */
108 const int sqxbr; /* cost of square root in TFmode. */
109 const int sqdbr; /* cost of square root in DFmode. */
110 const int sqebr; /* cost of square root in SFmode. */
111 /* multiply and add */
112 const int madbr; /* cost of multiply and add in DFmode. */
113 const int maebr; /* cost of multiply and add in SFmode. */
114 /* division */
115 const int dxbr;
116 const int ddbr;
117 const int debr;
118 const int dlgr;
119 const int dlr;
120 const int dr;
121 const int dsgfr;
122 const int dsgr;
123 };
124
125 #define s390_cost ((const struct processor_costs *)(s390_cost_pointer))
126
127 static const
128 struct processor_costs z900_cost =
129 {
130 COSTS_N_INSNS (5), /* M */
131 COSTS_N_INSNS (10), /* MGHI */
132 COSTS_N_INSNS (5), /* MH */
133 COSTS_N_INSNS (4), /* MHI */
134 COSTS_N_INSNS (5), /* ML */
135 COSTS_N_INSNS (5), /* MR */
136 COSTS_N_INSNS (4), /* MS */
137 COSTS_N_INSNS (15), /* MSG */
138 COSTS_N_INSNS (7), /* MSGF */
139 COSTS_N_INSNS (7), /* MSGFR */
140 COSTS_N_INSNS (10), /* MSGR */
141 COSTS_N_INSNS (4), /* MSR */
142 COSTS_N_INSNS (7), /* multiplication in DFmode */
143 COSTS_N_INSNS (13), /* MXBR */
144 COSTS_N_INSNS (136), /* SQXBR */
145 COSTS_N_INSNS (44), /* SQDBR */
146 COSTS_N_INSNS (35), /* SQEBR */
147 COSTS_N_INSNS (18), /* MADBR */
148 COSTS_N_INSNS (13), /* MAEBR */
149 COSTS_N_INSNS (134), /* DXBR */
150 COSTS_N_INSNS (30), /* DDBR */
151 COSTS_N_INSNS (27), /* DEBR */
152 COSTS_N_INSNS (220), /* DLGR */
153 COSTS_N_INSNS (34), /* DLR */
154 COSTS_N_INSNS (34), /* DR */
155 COSTS_N_INSNS (32), /* DSGFR */
156 COSTS_N_INSNS (32), /* DSGR */
157 };
158
159 static const
160 struct processor_costs z990_cost =
161 {
162 COSTS_N_INSNS (4), /* M */
163 COSTS_N_INSNS (2), /* MGHI */
164 COSTS_N_INSNS (2), /* MH */
165 COSTS_N_INSNS (2), /* MHI */
166 COSTS_N_INSNS (4), /* ML */
167 COSTS_N_INSNS (4), /* MR */
168 COSTS_N_INSNS (5), /* MS */
169 COSTS_N_INSNS (6), /* MSG */
170 COSTS_N_INSNS (4), /* MSGF */
171 COSTS_N_INSNS (4), /* MSGFR */
172 COSTS_N_INSNS (4), /* MSGR */
173 COSTS_N_INSNS (4), /* MSR */
174 COSTS_N_INSNS (1), /* multiplication in DFmode */
175 COSTS_N_INSNS (28), /* MXBR */
176 COSTS_N_INSNS (130), /* SQXBR */
177 COSTS_N_INSNS (66), /* SQDBR */
178 COSTS_N_INSNS (38), /* SQEBR */
179 COSTS_N_INSNS (1), /* MADBR */
180 COSTS_N_INSNS (1), /* MAEBR */
181 COSTS_N_INSNS (60), /* DXBR */
182 COSTS_N_INSNS (40), /* DDBR */
183 COSTS_N_INSNS (26), /* DEBR */
184 COSTS_N_INSNS (176), /* DLGR */
185 COSTS_N_INSNS (31), /* DLR */
186 COSTS_N_INSNS (31), /* DR */
187 COSTS_N_INSNS (31), /* DSGFR */
188 COSTS_N_INSNS (31), /* DSGR */
189 };
190
191 static const
192 struct processor_costs z9_109_cost =
193 {
194 COSTS_N_INSNS (4), /* M */
195 COSTS_N_INSNS (2), /* MGHI */
196 COSTS_N_INSNS (2), /* MH */
197 COSTS_N_INSNS (2), /* MHI */
198 COSTS_N_INSNS (4), /* ML */
199 COSTS_N_INSNS (4), /* MR */
200 COSTS_N_INSNS (5), /* MS */
201 COSTS_N_INSNS (6), /* MSG */
202 COSTS_N_INSNS (4), /* MSGF */
203 COSTS_N_INSNS (4), /* MSGFR */
204 COSTS_N_INSNS (4), /* MSGR */
205 COSTS_N_INSNS (4), /* MSR */
206 COSTS_N_INSNS (1), /* multiplication in DFmode */
207 COSTS_N_INSNS (28), /* MXBR */
208 COSTS_N_INSNS (130), /* SQXBR */
209 COSTS_N_INSNS (66), /* SQDBR */
210 COSTS_N_INSNS (38), /* SQEBR */
211 COSTS_N_INSNS (1), /* MADBR */
212 COSTS_N_INSNS (1), /* MAEBR */
213 COSTS_N_INSNS (60), /* DXBR */
214 COSTS_N_INSNS (40), /* DDBR */
215 COSTS_N_INSNS (26), /* DEBR */
216 COSTS_N_INSNS (30), /* DLGR */
217 COSTS_N_INSNS (23), /* DLR */
218 COSTS_N_INSNS (23), /* DR */
219 COSTS_N_INSNS (24), /* DSGFR */
220 COSTS_N_INSNS (24), /* DSGR */
221 };
222
223 static const
224 struct processor_costs z10_cost =
225 {
226 COSTS_N_INSNS (10), /* M */
227 COSTS_N_INSNS (10), /* MGHI */
228 COSTS_N_INSNS (10), /* MH */
229 COSTS_N_INSNS (10), /* MHI */
230 COSTS_N_INSNS (10), /* ML */
231 COSTS_N_INSNS (10), /* MR */
232 COSTS_N_INSNS (10), /* MS */
233 COSTS_N_INSNS (10), /* MSG */
234 COSTS_N_INSNS (10), /* MSGF */
235 COSTS_N_INSNS (10), /* MSGFR */
236 COSTS_N_INSNS (10), /* MSGR */
237 COSTS_N_INSNS (10), /* MSR */
238 COSTS_N_INSNS (1) , /* multiplication in DFmode */
239 COSTS_N_INSNS (50), /* MXBR */
240 COSTS_N_INSNS (120), /* SQXBR */
241 COSTS_N_INSNS (52), /* SQDBR */
242 COSTS_N_INSNS (38), /* SQEBR */
243 COSTS_N_INSNS (1), /* MADBR */
244 COSTS_N_INSNS (1), /* MAEBR */
245 COSTS_N_INSNS (111), /* DXBR */
246 COSTS_N_INSNS (39), /* DDBR */
247 COSTS_N_INSNS (32), /* DEBR */
248 COSTS_N_INSNS (160), /* DLGR */
249 COSTS_N_INSNS (71), /* DLR */
250 COSTS_N_INSNS (71), /* DR */
251 COSTS_N_INSNS (71), /* DSGFR */
252 COSTS_N_INSNS (71), /* DSGR */
253 };
254
255 static const
256 struct processor_costs z196_cost =
257 {
258 COSTS_N_INSNS (7), /* M */
259 COSTS_N_INSNS (5), /* MGHI */
260 COSTS_N_INSNS (5), /* MH */
261 COSTS_N_INSNS (5), /* MHI */
262 COSTS_N_INSNS (7), /* ML */
263 COSTS_N_INSNS (7), /* MR */
264 COSTS_N_INSNS (6), /* MS */
265 COSTS_N_INSNS (8), /* MSG */
266 COSTS_N_INSNS (6), /* MSGF */
267 COSTS_N_INSNS (6), /* MSGFR */
268 COSTS_N_INSNS (8), /* MSGR */
269 COSTS_N_INSNS (6), /* MSR */
270 COSTS_N_INSNS (1) , /* multiplication in DFmode */
271 COSTS_N_INSNS (40), /* MXBR B+40 */
272 COSTS_N_INSNS (100), /* SQXBR B+100 */
273 COSTS_N_INSNS (42), /* SQDBR B+42 */
274 COSTS_N_INSNS (28), /* SQEBR B+28 */
275 COSTS_N_INSNS (1), /* MADBR B */
276 COSTS_N_INSNS (1), /* MAEBR B */
277 COSTS_N_INSNS (101), /* DXBR B+101 */
278 COSTS_N_INSNS (29), /* DDBR */
279 COSTS_N_INSNS (22), /* DEBR */
280 COSTS_N_INSNS (160), /* DLGR cracked */
281 COSTS_N_INSNS (160), /* DLR cracked */
282 COSTS_N_INSNS (160), /* DR expanded */
283 COSTS_N_INSNS (160), /* DSGFR cracked */
284 COSTS_N_INSNS (160), /* DSGR cracked */
285 };
286
287 static const
288 struct processor_costs zEC12_cost =
289 {
290 COSTS_N_INSNS (7), /* M */
291 COSTS_N_INSNS (5), /* MGHI */
292 COSTS_N_INSNS (5), /* MH */
293 COSTS_N_INSNS (5), /* MHI */
294 COSTS_N_INSNS (7), /* ML */
295 COSTS_N_INSNS (7), /* MR */
296 COSTS_N_INSNS (6), /* MS */
297 COSTS_N_INSNS (8), /* MSG */
298 COSTS_N_INSNS (6), /* MSGF */
299 COSTS_N_INSNS (6), /* MSGFR */
300 COSTS_N_INSNS (8), /* MSGR */
301 COSTS_N_INSNS (6), /* MSR */
302 COSTS_N_INSNS (1) , /* multiplication in DFmode */
303 COSTS_N_INSNS (40), /* MXBR B+40 */
304 COSTS_N_INSNS (100), /* SQXBR B+100 */
305 COSTS_N_INSNS (42), /* SQDBR B+42 */
306 COSTS_N_INSNS (28), /* SQEBR B+28 */
307 COSTS_N_INSNS (1), /* MADBR B */
308 COSTS_N_INSNS (1), /* MAEBR B */
309 COSTS_N_INSNS (131), /* DXBR B+131 */
310 COSTS_N_INSNS (29), /* DDBR */
311 COSTS_N_INSNS (22), /* DEBR */
312 COSTS_N_INSNS (160), /* DLGR cracked */
313 COSTS_N_INSNS (160), /* DLR cracked */
314 COSTS_N_INSNS (160), /* DR expanded */
315 COSTS_N_INSNS (160), /* DSGFR cracked */
316 COSTS_N_INSNS (160), /* DSGR cracked */
317 };
318
319 static struct
320 {
321 const char *const name;
322 const enum processor_type processor;
323 const struct processor_costs *cost;
324 }
325 const processor_table[] =
326 {
327 { "g5", PROCESSOR_9672_G5, &z900_cost },
328 { "g6", PROCESSOR_9672_G6, &z900_cost },
329 { "z900", PROCESSOR_2064_Z900, &z900_cost },
330 { "z990", PROCESSOR_2084_Z990, &z990_cost },
331 { "z9-109", PROCESSOR_2094_Z9_109, &z9_109_cost },
332 { "z9-ec", PROCESSOR_2094_Z9_EC, &z9_109_cost },
333 { "z10", PROCESSOR_2097_Z10, &z10_cost },
334 { "z196", PROCESSOR_2817_Z196, &z196_cost },
335 { "zEC12", PROCESSOR_2827_ZEC12, &zEC12_cost },
336 { "z13", PROCESSOR_2964_Z13, &zEC12_cost },
337 { "native", PROCESSOR_NATIVE, NULL }
338 };
339
340 extern int reload_completed;
341
342 /* Kept up to date using the SCHED_VARIABLE_ISSUE hook. */
343 static rtx_insn *last_scheduled_insn;
344 #define MAX_SCHED_UNITS 3
345 static int last_scheduled_unit_distance[MAX_SCHED_UNITS];
346
347 /* The maximum score added for an instruction whose unit hasn't been
348 in use for MAX_SCHED_MIX_DISTANCE steps. Increase this value to
349 give instruction mix scheduling more priority over instruction
350 grouping. */
351 #define MAX_SCHED_MIX_SCORE 8
352
353 /* The maximum distance up to which individual scores will be
354 calculated. Everything beyond this gives MAX_SCHED_MIX_SCORE.
355 Increase this with the OOO windows size of the machine. */
356 #define MAX_SCHED_MIX_DISTANCE 100
357
358 /* Structure used to hold the components of a S/390 memory
359 address. A legitimate address on S/390 is of the general
360 form
361 base + index + displacement
362 where any of the components is optional.
363
364 base and index are registers of the class ADDR_REGS,
365 displacement is an unsigned 12-bit immediate constant. */
366
367 struct s390_address
368 {
369 rtx base;
370 rtx indx;
371 rtx disp;
372 bool pointer;
373 bool literal_pool;
374 };
375
376 /* The following structure is embedded in the machine
377 specific part of struct function. */
378
379 struct GTY (()) s390_frame_layout
380 {
381 /* Offset within stack frame. */
382 HOST_WIDE_INT gprs_offset;
383 HOST_WIDE_INT f0_offset;
384 HOST_WIDE_INT f4_offset;
385 HOST_WIDE_INT f8_offset;
386 HOST_WIDE_INT backchain_offset;
387
388 /* Number of first and last gpr where slots in the register
389 save area are reserved for. */
390 int first_save_gpr_slot;
391 int last_save_gpr_slot;
392
393 /* Location (FP register number) where GPRs (r0-r15) should
394 be saved to.
395 0 - does not need to be saved at all
396 -1 - stack slot */
397 #define SAVE_SLOT_NONE 0
398 #define SAVE_SLOT_STACK -1
399 signed char gpr_save_slots[16];
400
401 /* Number of first and last gpr to be saved, restored. */
402 int first_save_gpr;
403 int first_restore_gpr;
404 int last_save_gpr;
405 int last_restore_gpr;
406
407 /* Bits standing for floating point registers. Set, if the
408 respective register has to be saved. Starting with reg 16 (f0)
409 at the rightmost bit.
410 Bit 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
411 fpr 15 13 11 9 14 12 10 8 7 5 3 1 6 4 2 0
412 reg 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 */
413 unsigned int fpr_bitmap;
414
415 /* Number of floating point registers f8-f15 which must be saved. */
416 int high_fprs;
417
418 /* Set if return address needs to be saved.
419 This flag is set by s390_return_addr_rtx if it could not use
420 the initial value of r14 and therefore depends on r14 saved
421 to the stack. */
422 bool save_return_addr_p;
423
424 /* Size of stack frame. */
425 HOST_WIDE_INT frame_size;
426 };
427
428 /* Define the structure for the machine field in struct function. */
429
430 struct GTY(()) machine_function
431 {
432 struct s390_frame_layout frame_layout;
433
434 /* Literal pool base register. */
435 rtx base_reg;
436
437 /* True if we may need to perform branch splitting. */
438 bool split_branches_pending_p;
439
440 bool has_landing_pad_p;
441
442 /* True if the current function may contain a tbegin clobbering
443 FPRs. */
444 bool tbegin_p;
445
446 /* For -fsplit-stack support: A stack local which holds a pointer to
447 the stack arguments for a function with a variable number of
448 arguments. This is set at the start of the function and is used
449 to initialize the overflow_arg_area field of the va_list
450 structure. */
451 rtx split_stack_varargs_pointer;
452 };
453
454 /* Few accessor macros for struct cfun->machine->s390_frame_layout. */
455
456 #define cfun_frame_layout (cfun->machine->frame_layout)
457 #define cfun_save_high_fprs_p (!!cfun_frame_layout.high_fprs)
458 #define cfun_save_arg_fprs_p (!!(TARGET_64BIT \
459 ? cfun_frame_layout.fpr_bitmap & 0x0f \
460 : cfun_frame_layout.fpr_bitmap & 0x03))
461 #define cfun_gprs_save_area_size ((cfun_frame_layout.last_save_gpr_slot - \
462 cfun_frame_layout.first_save_gpr_slot + 1) * UNITS_PER_LONG)
463 #define cfun_set_fpr_save(REGNO) (cfun->machine->frame_layout.fpr_bitmap |= \
464 (1 << (REGNO - FPR0_REGNUM)))
465 #define cfun_fpr_save_p(REGNO) (!!(cfun->machine->frame_layout.fpr_bitmap & \
466 (1 << (REGNO - FPR0_REGNUM))))
467 #define cfun_gpr_save_slot(REGNO) \
468 cfun->machine->frame_layout.gpr_save_slots[REGNO]
469
470 /* Number of GPRs and FPRs used for argument passing. */
471 #define GP_ARG_NUM_REG 5
472 #define FP_ARG_NUM_REG (TARGET_64BIT? 4 : 2)
473 #define VEC_ARG_NUM_REG 8
474
475 /* A couple of shortcuts. */
476 #define CONST_OK_FOR_J(x) \
477 CONST_OK_FOR_CONSTRAINT_P((x), 'J', "J")
478 #define CONST_OK_FOR_K(x) \
479 CONST_OK_FOR_CONSTRAINT_P((x), 'K', "K")
480 #define CONST_OK_FOR_Os(x) \
481 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Os")
482 #define CONST_OK_FOR_Op(x) \
483 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Op")
484 #define CONST_OK_FOR_On(x) \
485 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "On")
486
487 #define REGNO_PAIR_OK(REGNO, MODE) \
488 (HARD_REGNO_NREGS ((REGNO), (MODE)) == 1 || !((REGNO) & 1))
489
490 /* That's the read ahead of the dynamic branch prediction unit in
491 bytes on a z10 (or higher) CPU. */
492 #define PREDICT_DISTANCE (TARGET_Z10 ? 384 : 2048)
493
494
495 /* Indicate which ABI has been used for passing vector args.
496 0 - no vector type arguments have been passed where the ABI is relevant
497 1 - the old ABI has been used
498 2 - a vector type argument has been passed either in a vector register
499 or on the stack by value */
500 static int s390_vector_abi = 0;
501
502 /* Set the vector ABI marker if TYPE is subject to the vector ABI
503 switch. The vector ABI affects only vector data types. There are
504 two aspects of the vector ABI relevant here:
505
506 1. vectors >= 16 bytes have an alignment of 8 bytes with the new
507 ABI and natural alignment with the old.
508
509 2. vector <= 16 bytes are passed in VRs or by value on the stack
510 with the new ABI but by reference on the stack with the old.
511
512 If ARG_P is true TYPE is used for a function argument or return
513 value. The ABI marker then is set for all vector data types. If
514 ARG_P is false only type 1 vectors are being checked. */
515
516 static void
517 s390_check_type_for_vector_abi (const_tree type, bool arg_p, bool in_struct_p)
518 {
519 static hash_set<const_tree> visited_types_hash;
520
521 if (s390_vector_abi)
522 return;
523
524 if (type == NULL_TREE || TREE_CODE (type) == ERROR_MARK)
525 return;
526
527 if (visited_types_hash.contains (type))
528 return;
529
530 visited_types_hash.add (type);
531
532 if (VECTOR_TYPE_P (type))
533 {
534 int type_size = int_size_in_bytes (type);
535
536 /* Outside arguments only the alignment is changing and this
537 only happens for vector types >= 16 bytes. */
538 if (!arg_p && type_size < 16)
539 return;
540
541 /* In arguments vector types > 16 are passed as before (GCC
542 never enforced the bigger alignment for arguments which was
543 required by the old vector ABI). However, it might still be
544 ABI relevant due to the changed alignment if it is a struct
545 member. */
546 if (arg_p && type_size > 16 && !in_struct_p)
547 return;
548
549 s390_vector_abi = TARGET_VX_ABI ? 2 : 1;
550 }
551 else if (POINTER_TYPE_P (type) || TREE_CODE (type) == ARRAY_TYPE)
552 {
553 /* ARRAY_TYPE: Since with neither of the ABIs we have more than
554 natural alignment there will never be ABI dependent padding
555 in an array type. That's why we do not set in_struct_p to
556 true here. */
557 s390_check_type_for_vector_abi (TREE_TYPE (type), arg_p, in_struct_p);
558 }
559 else if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
560 {
561 tree arg_chain;
562
563 /* Check the return type. */
564 s390_check_type_for_vector_abi (TREE_TYPE (type), true, false);
565
566 for (arg_chain = TYPE_ARG_TYPES (type);
567 arg_chain;
568 arg_chain = TREE_CHAIN (arg_chain))
569 s390_check_type_for_vector_abi (TREE_VALUE (arg_chain), true, false);
570 }
571 else if (RECORD_OR_UNION_TYPE_P (type))
572 {
573 tree field;
574
575 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
576 {
577 if (TREE_CODE (field) != FIELD_DECL)
578 continue;
579
580 s390_check_type_for_vector_abi (TREE_TYPE (field), arg_p, true);
581 }
582 }
583 }
584
585
586 /* System z builtins. */
587
588 #include "s390-builtins.h"
589
590 const unsigned int bflags_builtin[S390_BUILTIN_MAX + 1] =
591 {
592 #undef B_DEF
593 #undef OB_DEF
594 #undef OB_DEF_VAR
595 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, ...) BFLAGS,
596 #define OB_DEF(...)
597 #define OB_DEF_VAR(...)
598 #include "s390-builtins.def"
599 0
600 };
601
602 const unsigned int opflags_builtin[S390_BUILTIN_MAX + 1] =
603 {
604 #undef B_DEF
605 #undef OB_DEF
606 #undef OB_DEF_VAR
607 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, ...) OPFLAGS,
608 #define OB_DEF(...)
609 #define OB_DEF_VAR(...)
610 #include "s390-builtins.def"
611 0
612 };
613
614 const unsigned int bflags_overloaded_builtin[S390_OVERLOADED_BUILTIN_MAX + 1] =
615 {
616 #undef B_DEF
617 #undef OB_DEF
618 #undef OB_DEF_VAR
619 #define B_DEF(...)
620 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, ...) BFLAGS,
621 #define OB_DEF_VAR(...)
622 #include "s390-builtins.def"
623 0
624 };
625
626 const unsigned int
627 opflags_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX + 1] =
628 {
629 #undef B_DEF
630 #undef OB_DEF
631 #undef OB_DEF_VAR
632 #define B_DEF(...)
633 #define OB_DEF(...)
634 #define OB_DEF_VAR(NAME, PATTERN, FLAGS, FNTYPE) FLAGS,
635 #include "s390-builtins.def"
636 0
637 };
638
639 tree s390_builtin_types[BT_MAX];
640 tree s390_builtin_fn_types[BT_FN_MAX];
641 tree s390_builtin_decls[S390_BUILTIN_MAX +
642 S390_OVERLOADED_BUILTIN_MAX +
643 S390_OVERLOADED_BUILTIN_VAR_MAX];
644
645 static enum insn_code const code_for_builtin[S390_BUILTIN_MAX + 1] = {
646 #undef B_DEF
647 #undef OB_DEF
648 #undef OB_DEF_VAR
649 #define B_DEF(NAME, PATTERN, ...) CODE_FOR_##PATTERN,
650 #define OB_DEF(...)
651 #define OB_DEF_VAR(...)
652
653 #include "s390-builtins.def"
654 CODE_FOR_nothing
655 };
656
657 static void
658 s390_init_builtins (void)
659 {
660 /* These definitions are being used in s390-builtins.def. */
661 tree returns_twice_attr = tree_cons (get_identifier ("returns_twice"),
662 NULL, NULL);
663 tree noreturn_attr = tree_cons (get_identifier ("noreturn"), NULL, NULL);
664 tree c_uint64_type_node;
665
666 /* The uint64_type_node from tree.c is not compatible to the C99
667 uint64_t data type. What we want is c_uint64_type_node from
668 c-common.c. But since backend code is not supposed to interface
669 with the frontend we recreate it here. */
670 if (TARGET_64BIT)
671 c_uint64_type_node = long_unsigned_type_node;
672 else
673 c_uint64_type_node = long_long_unsigned_type_node;
674
675 #undef DEF_TYPE
676 #define DEF_TYPE(INDEX, BFLAGS, NODE, CONST_P) \
677 if (s390_builtin_types[INDEX] == NULL) \
678 s390_builtin_types[INDEX] = (!CONST_P) ? \
679 (NODE) : build_type_variant ((NODE), 1, 0);
680
681 #undef DEF_POINTER_TYPE
682 #define DEF_POINTER_TYPE(INDEX, BFLAGS, INDEX_BASE) \
683 if (s390_builtin_types[INDEX] == NULL) \
684 s390_builtin_types[INDEX] = \
685 build_pointer_type (s390_builtin_types[INDEX_BASE]);
686
687 #undef DEF_DISTINCT_TYPE
688 #define DEF_DISTINCT_TYPE(INDEX, BFLAGS, INDEX_BASE) \
689 if (s390_builtin_types[INDEX] == NULL) \
690 s390_builtin_types[INDEX] = \
691 build_distinct_type_copy (s390_builtin_types[INDEX_BASE]);
692
693 #undef DEF_VECTOR_TYPE
694 #define DEF_VECTOR_TYPE(INDEX, BFLAGS, INDEX_BASE, ELEMENTS) \
695 if (s390_builtin_types[INDEX] == NULL) \
696 s390_builtin_types[INDEX] = \
697 build_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
698
699 #undef DEF_OPAQUE_VECTOR_TYPE
700 #define DEF_OPAQUE_VECTOR_TYPE(INDEX, BFLAGS, INDEX_BASE, ELEMENTS) \
701 if (s390_builtin_types[INDEX] == NULL) \
702 s390_builtin_types[INDEX] = \
703 build_opaque_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
704
705 #undef DEF_FN_TYPE
706 #define DEF_FN_TYPE(INDEX, BFLAGS, args...) \
707 if (s390_builtin_fn_types[INDEX] == NULL) \
708 s390_builtin_fn_types[INDEX] = \
709 build_function_type_list (args, NULL_TREE);
710 #undef DEF_OV_TYPE
711 #define DEF_OV_TYPE(...)
712 #include "s390-builtin-types.def"
713
714 #undef B_DEF
715 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, FNTYPE) \
716 if (s390_builtin_decls[S390_BUILTIN_##NAME] == NULL) \
717 s390_builtin_decls[S390_BUILTIN_##NAME] = \
718 add_builtin_function ("__builtin_" #NAME, \
719 s390_builtin_fn_types[FNTYPE], \
720 S390_BUILTIN_##NAME, \
721 BUILT_IN_MD, \
722 NULL, \
723 ATTRS);
724 #undef OB_DEF
725 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, FNTYPE) \
726 if (s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] \
727 == NULL) \
728 s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] = \
729 add_builtin_function ("__builtin_" #NAME, \
730 s390_builtin_fn_types[FNTYPE], \
731 S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX, \
732 BUILT_IN_MD, \
733 NULL, \
734 0);
735 #undef OB_DEF_VAR
736 #define OB_DEF_VAR(...)
737 #include "s390-builtins.def"
738
739 }
740
741 /* Return true if ARG is appropriate as argument number ARGNUM of
742 builtin DECL. The operand flags from s390-builtins.def have to
743 passed as OP_FLAGS. */
744 bool
745 s390_const_operand_ok (tree arg, int argnum, int op_flags, tree decl)
746 {
747 if (O_UIMM_P (op_flags))
748 {
749 int bitwidths[] = { 1, 2, 3, 4, 5, 8, 12, 16, 32 };
750 int bitwidth = bitwidths[op_flags - O_U1];
751
752 if (!tree_fits_uhwi_p (arg)
753 || tree_to_uhwi (arg) > ((unsigned HOST_WIDE_INT)1 << bitwidth) - 1)
754 {
755 error("constant argument %d for builtin %qF is out of range (0.."
756 HOST_WIDE_INT_PRINT_UNSIGNED ")",
757 argnum, decl,
758 ((unsigned HOST_WIDE_INT)1 << bitwidth) - 1);
759 return false;
760 }
761 }
762
763 if (O_SIMM_P (op_flags))
764 {
765 int bitwidths[] = { 2, 3, 4, 5, 8, 12, 16, 32 };
766 int bitwidth = bitwidths[op_flags - O_S2];
767
768 if (!tree_fits_shwi_p (arg)
769 || tree_to_shwi (arg) < -((HOST_WIDE_INT)1 << (bitwidth - 1))
770 || tree_to_shwi (arg) > (((HOST_WIDE_INT)1 << (bitwidth - 1)) - 1))
771 {
772 error("constant argument %d for builtin %qF is out of range ("
773 HOST_WIDE_INT_PRINT_DEC ".."
774 HOST_WIDE_INT_PRINT_DEC ")",
775 argnum, decl,
776 -((HOST_WIDE_INT)1 << (bitwidth - 1)),
777 ((HOST_WIDE_INT)1 << (bitwidth - 1)) - 1);
778 return false;
779 }
780 }
781 return true;
782 }
783
784 /* Expand an expression EXP that calls a built-in function,
785 with result going to TARGET if that's convenient
786 (and in mode MODE if that's convenient).
787 SUBTARGET may be used as the target for computing one of EXP's operands.
788 IGNORE is nonzero if the value is to be ignored. */
789
790 static rtx
791 s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
792 machine_mode mode ATTRIBUTE_UNUSED,
793 int ignore ATTRIBUTE_UNUSED)
794 {
795 #define MAX_ARGS 6
796
797 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
798 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
799 enum insn_code icode;
800 rtx op[MAX_ARGS], pat;
801 int arity;
802 bool nonvoid;
803 tree arg;
804 call_expr_arg_iterator iter;
805 unsigned int all_op_flags = opflags_for_builtin (fcode);
806 machine_mode last_vec_mode = VOIDmode;
807
808 if (TARGET_DEBUG_ARG)
809 {
810 fprintf (stderr,
811 "s390_expand_builtin, code = %4d, %s, bflags = 0x%x\n",
812 (int)fcode, IDENTIFIER_POINTER (DECL_NAME (fndecl)),
813 bflags_for_builtin (fcode));
814 }
815
816 if (S390_USE_TARGET_ATTRIBUTE)
817 {
818 unsigned int bflags;
819
820 bflags = bflags_for_builtin (fcode);
821 if ((bflags & B_HTM) && !TARGET_HTM)
822 {
823 error ("Builtin %qF is not supported without -mhtm "
824 "(default with -march=zEC12 and higher).", fndecl);
825 return const0_rtx;
826 }
827 if ((bflags & B_VX) && !TARGET_VX)
828 {
829 error ("Builtin %qF is not supported without -mvx "
830 "(default with -march=z13 and higher).", fndecl);
831 return const0_rtx;
832 }
833 }
834 if (fcode >= S390_OVERLOADED_BUILTIN_VAR_OFFSET
835 && fcode < S390_ALL_BUILTIN_MAX)
836 {
837 gcc_unreachable ();
838 }
839 else if (fcode < S390_OVERLOADED_BUILTIN_OFFSET)
840 {
841 icode = code_for_builtin[fcode];
842 /* Set a flag in the machine specific cfun part in order to support
843 saving/restoring of FPRs. */
844 if (fcode == S390_BUILTIN_tbegin || fcode == S390_BUILTIN_tbegin_retry)
845 cfun->machine->tbegin_p = true;
846 }
847 else if (fcode < S390_OVERLOADED_BUILTIN_VAR_OFFSET)
848 {
849 error ("Unresolved overloaded builtin");
850 return const0_rtx;
851 }
852 else
853 internal_error ("bad builtin fcode");
854
855 if (icode == 0)
856 internal_error ("bad builtin icode");
857
858 nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
859
860 if (nonvoid)
861 {
862 machine_mode tmode = insn_data[icode].operand[0].mode;
863 if (!target
864 || GET_MODE (target) != tmode
865 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
866 target = gen_reg_rtx (tmode);
867
868 /* There are builtins (e.g. vec_promote) with no vector
869 arguments but an element selector. So we have to also look
870 at the vector return type when emitting the modulo
871 operation. */
872 if (VECTOR_MODE_P (insn_data[icode].operand[0].mode))
873 last_vec_mode = insn_data[icode].operand[0].mode;
874 }
875
876 arity = 0;
877 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
878 {
879 rtx tmp_rtx;
880 const struct insn_operand_data *insn_op;
881 unsigned int op_flags = all_op_flags & ((1 << O_SHIFT) - 1);
882
883 all_op_flags = all_op_flags >> O_SHIFT;
884
885 if (arg == error_mark_node)
886 return NULL_RTX;
887 if (arity >= MAX_ARGS)
888 return NULL_RTX;
889
890 if (O_IMM_P (op_flags)
891 && TREE_CODE (arg) != INTEGER_CST)
892 {
893 error ("constant value required for builtin %qF argument %d",
894 fndecl, arity + 1);
895 return const0_rtx;
896 }
897
898 if (!s390_const_operand_ok (arg, arity + 1, op_flags, fndecl))
899 return const0_rtx;
900
901 insn_op = &insn_data[icode].operand[arity + nonvoid];
902 op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
903
904 /* expand_expr truncates constants to the target mode only if it
905 is "convenient". However, our checks below rely on this
906 being done. */
907 if (CONST_INT_P (op[arity])
908 && SCALAR_INT_MODE_P (insn_op->mode)
909 && GET_MODE (op[arity]) != insn_op->mode)
910 op[arity] = GEN_INT (trunc_int_for_mode (INTVAL (op[arity]),
911 insn_op->mode));
912
913 /* Wrap the expanded RTX for pointer types into a MEM expr with
914 the proper mode. This allows us to use e.g. (match_operand
915 "memory_operand"..) in the insn patterns instead of (mem
916 (match_operand "address_operand)). This is helpful for
917 patterns not just accepting MEMs. */
918 if (POINTER_TYPE_P (TREE_TYPE (arg))
919 && insn_op->predicate != address_operand)
920 op[arity] = gen_rtx_MEM (insn_op->mode, op[arity]);
921
922 /* Expand the module operation required on element selectors. */
923 if (op_flags == O_ELEM)
924 {
925 gcc_assert (last_vec_mode != VOIDmode);
926 op[arity] = simplify_expand_binop (SImode, code_to_optab (AND),
927 op[arity],
928 GEN_INT (GET_MODE_NUNITS (last_vec_mode) - 1),
929 NULL_RTX, 1, OPTAB_DIRECT);
930 }
931
932 /* Record the vector mode used for an element selector. This assumes:
933 1. There is no builtin with two different vector modes and an element selector
934 2. The element selector comes after the vector type it is referring to.
935 This currently the true for all the builtins but FIXME we
936 should better check for that. */
937 if (VECTOR_MODE_P (insn_op->mode))
938 last_vec_mode = insn_op->mode;
939
940 if (insn_op->predicate (op[arity], insn_op->mode))
941 {
942 arity++;
943 continue;
944 }
945
946 if (MEM_P (op[arity])
947 && insn_op->predicate == memory_operand
948 && (GET_MODE (XEXP (op[arity], 0)) == Pmode
949 || GET_MODE (XEXP (op[arity], 0)) == VOIDmode))
950 {
951 op[arity] = replace_equiv_address (op[arity],
952 copy_to_mode_reg (Pmode,
953 XEXP (op[arity], 0)));
954 }
955 /* Some of the builtins require different modes/types than the
956 pattern in order to implement a specific API. Instead of
957 adding many expanders which do the mode change we do it here.
958 E.g. s390_vec_add_u128 required to have vector unsigned char
959 arguments is mapped to addti3. */
960 else if (insn_op->mode != VOIDmode
961 && GET_MODE (op[arity]) != VOIDmode
962 && GET_MODE (op[arity]) != insn_op->mode
963 && ((tmp_rtx = simplify_gen_subreg (insn_op->mode, op[arity],
964 GET_MODE (op[arity]), 0))
965 != NULL_RTX))
966 {
967 op[arity] = tmp_rtx;
968 }
969 else if (GET_MODE (op[arity]) == insn_op->mode
970 || GET_MODE (op[arity]) == VOIDmode
971 || (insn_op->predicate == address_operand
972 && GET_MODE (op[arity]) == Pmode))
973 {
974 /* An address_operand usually has VOIDmode in the expander
975 so we cannot use this. */
976 machine_mode target_mode =
977 (insn_op->predicate == address_operand
978 ? Pmode : insn_op->mode);
979 op[arity] = copy_to_mode_reg (target_mode, op[arity]);
980 }
981
982 if (!insn_op->predicate (op[arity], insn_op->mode))
983 {
984 error ("Invalid argument %d for builtin %qF", arity + 1, fndecl);
985 return const0_rtx;
986 }
987 arity++;
988 }
989
990 switch (arity)
991 {
992 case 0:
993 pat = GEN_FCN (icode) (target);
994 break;
995 case 1:
996 if (nonvoid)
997 pat = GEN_FCN (icode) (target, op[0]);
998 else
999 pat = GEN_FCN (icode) (op[0]);
1000 break;
1001 case 2:
1002 if (nonvoid)
1003 pat = GEN_FCN (icode) (target, op[0], op[1]);
1004 else
1005 pat = GEN_FCN (icode) (op[0], op[1]);
1006 break;
1007 case 3:
1008 if (nonvoid)
1009 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
1010 else
1011 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
1012 break;
1013 case 4:
1014 if (nonvoid)
1015 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
1016 else
1017 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
1018 break;
1019 case 5:
1020 if (nonvoid)
1021 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
1022 else
1023 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
1024 break;
1025 case 6:
1026 if (nonvoid)
1027 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4], op[5]);
1028 else
1029 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]);
1030 break;
1031 default:
1032 gcc_unreachable ();
1033 }
1034 if (!pat)
1035 return NULL_RTX;
1036 emit_insn (pat);
1037
1038 if (nonvoid)
1039 return target;
1040 else
1041 return const0_rtx;
1042 }
1043
1044
1045 static const int s390_hotpatch_hw_max = 1000000;
1046 static int s390_hotpatch_hw_before_label = 0;
1047 static int s390_hotpatch_hw_after_label = 0;
1048
1049 /* Check whether the hotpatch attribute is applied to a function and, if it has
1050 an argument, the argument is valid. */
1051
1052 static tree
1053 s390_handle_hotpatch_attribute (tree *node, tree name, tree args,
1054 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1055 {
1056 tree expr;
1057 tree expr2;
1058 int err;
1059
1060 if (TREE_CODE (*node) != FUNCTION_DECL)
1061 {
1062 warning (OPT_Wattributes, "%qE attribute only applies to functions",
1063 name);
1064 *no_add_attrs = true;
1065 }
1066 if (args != NULL && TREE_CHAIN (args) != NULL)
1067 {
1068 expr = TREE_VALUE (args);
1069 expr2 = TREE_VALUE (TREE_CHAIN (args));
1070 }
1071 if (args == NULL || TREE_CHAIN (args) == NULL)
1072 err = 1;
1073 else if (TREE_CODE (expr) != INTEGER_CST
1074 || !INTEGRAL_TYPE_P (TREE_TYPE (expr))
1075 || wi::gtu_p (expr, s390_hotpatch_hw_max))
1076 err = 1;
1077 else if (TREE_CODE (expr2) != INTEGER_CST
1078 || !INTEGRAL_TYPE_P (TREE_TYPE (expr2))
1079 || wi::gtu_p (expr2, s390_hotpatch_hw_max))
1080 err = 1;
1081 else
1082 err = 0;
1083 if (err)
1084 {
1085 error ("requested %qE attribute is not a comma separated pair of"
1086 " non-negative integer constants or too large (max. %d)", name,
1087 s390_hotpatch_hw_max);
1088 *no_add_attrs = true;
1089 }
1090
1091 return NULL_TREE;
1092 }
1093
1094 /* Expand the s390_vector_bool type attribute. */
1095
1096 static tree
1097 s390_handle_vectorbool_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
1098 tree args ATTRIBUTE_UNUSED,
1099 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1100 {
1101 tree type = *node, result = NULL_TREE;
1102 machine_mode mode;
1103
1104 while (POINTER_TYPE_P (type)
1105 || TREE_CODE (type) == FUNCTION_TYPE
1106 || TREE_CODE (type) == METHOD_TYPE
1107 || TREE_CODE (type) == ARRAY_TYPE)
1108 type = TREE_TYPE (type);
1109
1110 mode = TYPE_MODE (type);
1111 switch (mode)
1112 {
1113 case DImode: case V2DImode: result = s390_builtin_types[BT_BV2DI]; break;
1114 case SImode: case V4SImode: result = s390_builtin_types[BT_BV4SI]; break;
1115 case HImode: case V8HImode: result = s390_builtin_types[BT_BV8HI]; break;
1116 case QImode: case V16QImode: result = s390_builtin_types[BT_BV16QI];
1117 default: break;
1118 }
1119
1120 *no_add_attrs = true; /* No need to hang on to the attribute. */
1121
1122 if (result)
1123 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
1124
1125 return NULL_TREE;
1126 }
1127
1128 static const struct attribute_spec s390_attribute_table[] = {
1129 { "hotpatch", 2, 2, true, false, false, s390_handle_hotpatch_attribute, false },
1130 { "s390_vector_bool", 0, 0, false, true, false, s390_handle_vectorbool_attribute, true },
1131 /* End element. */
1132 { NULL, 0, 0, false, false, false, NULL, false }
1133 };
1134
1135 /* Return the alignment for LABEL. We default to the -falign-labels
1136 value except for the literal pool base label. */
1137 int
1138 s390_label_align (rtx_insn *label)
1139 {
1140 rtx_insn *prev_insn = prev_active_insn (label);
1141 rtx set, src;
1142
1143 if (prev_insn == NULL_RTX)
1144 goto old;
1145
1146 set = single_set (prev_insn);
1147
1148 if (set == NULL_RTX)
1149 goto old;
1150
1151 src = SET_SRC (set);
1152
1153 /* Don't align literal pool base labels. */
1154 if (GET_CODE (src) == UNSPEC
1155 && XINT (src, 1) == UNSPEC_MAIN_BASE)
1156 return 0;
1157
1158 old:
1159 return align_labels_log;
1160 }
1161
1162 static machine_mode
1163 s390_libgcc_cmp_return_mode (void)
1164 {
1165 return TARGET_64BIT ? DImode : SImode;
1166 }
1167
1168 static machine_mode
1169 s390_libgcc_shift_count_mode (void)
1170 {
1171 return TARGET_64BIT ? DImode : SImode;
1172 }
1173
1174 static machine_mode
1175 s390_unwind_word_mode (void)
1176 {
1177 return TARGET_64BIT ? DImode : SImode;
1178 }
1179
1180 /* Return true if the back end supports mode MODE. */
1181 static bool
1182 s390_scalar_mode_supported_p (machine_mode mode)
1183 {
1184 /* In contrast to the default implementation reject TImode constants on 31bit
1185 TARGET_ZARCH for ABI compliance. */
1186 if (!TARGET_64BIT && TARGET_ZARCH && mode == TImode)
1187 return false;
1188
1189 if (DECIMAL_FLOAT_MODE_P (mode))
1190 return default_decimal_float_supported_p ();
1191
1192 return default_scalar_mode_supported_p (mode);
1193 }
1194
1195 /* Return true if the back end supports vector mode MODE. */
1196 static bool
1197 s390_vector_mode_supported_p (machine_mode mode)
1198 {
1199 machine_mode inner;
1200
1201 if (!VECTOR_MODE_P (mode)
1202 || !TARGET_VX
1203 || GET_MODE_SIZE (mode) > 16)
1204 return false;
1205
1206 inner = GET_MODE_INNER (mode);
1207
1208 switch (inner)
1209 {
1210 case QImode:
1211 case HImode:
1212 case SImode:
1213 case DImode:
1214 case TImode:
1215 case SFmode:
1216 case DFmode:
1217 case TFmode:
1218 return true;
1219 default:
1220 return false;
1221 }
1222 }
1223
1224 /* Set the has_landing_pad_p flag in struct machine_function to VALUE. */
1225
1226 void
1227 s390_set_has_landing_pad_p (bool value)
1228 {
1229 cfun->machine->has_landing_pad_p = value;
1230 }
1231
1232 /* If two condition code modes are compatible, return a condition code
1233 mode which is compatible with both. Otherwise, return
1234 VOIDmode. */
1235
1236 static machine_mode
1237 s390_cc_modes_compatible (machine_mode m1, machine_mode m2)
1238 {
1239 if (m1 == m2)
1240 return m1;
1241
1242 switch (m1)
1243 {
1244 case CCZmode:
1245 if (m2 == CCUmode || m2 == CCTmode || m2 == CCZ1mode
1246 || m2 == CCSmode || m2 == CCSRmode || m2 == CCURmode)
1247 return m2;
1248 return VOIDmode;
1249
1250 case CCSmode:
1251 case CCUmode:
1252 case CCTmode:
1253 case CCSRmode:
1254 case CCURmode:
1255 case CCZ1mode:
1256 if (m2 == CCZmode)
1257 return m1;
1258
1259 return VOIDmode;
1260
1261 default:
1262 return VOIDmode;
1263 }
1264 return VOIDmode;
1265 }
1266
1267 /* Return true if SET either doesn't set the CC register, or else
1268 the source and destination have matching CC modes and that
1269 CC mode is at least as constrained as REQ_MODE. */
1270
1271 static bool
1272 s390_match_ccmode_set (rtx set, machine_mode req_mode)
1273 {
1274 machine_mode set_mode;
1275
1276 gcc_assert (GET_CODE (set) == SET);
1277
1278 if (GET_CODE (SET_DEST (set)) != REG || !CC_REGNO_P (REGNO (SET_DEST (set))))
1279 return 1;
1280
1281 set_mode = GET_MODE (SET_DEST (set));
1282 switch (set_mode)
1283 {
1284 case CCSmode:
1285 case CCSRmode:
1286 case CCUmode:
1287 case CCURmode:
1288 case CCLmode:
1289 case CCL1mode:
1290 case CCL2mode:
1291 case CCL3mode:
1292 case CCT1mode:
1293 case CCT2mode:
1294 case CCT3mode:
1295 case CCVEQmode:
1296 case CCVHmode:
1297 case CCVHUmode:
1298 case CCVFHmode:
1299 case CCVFHEmode:
1300 if (req_mode != set_mode)
1301 return 0;
1302 break;
1303
1304 case CCZmode:
1305 if (req_mode != CCSmode && req_mode != CCUmode && req_mode != CCTmode
1306 && req_mode != CCSRmode && req_mode != CCURmode)
1307 return 0;
1308 break;
1309
1310 case CCAPmode:
1311 case CCANmode:
1312 if (req_mode != CCAmode)
1313 return 0;
1314 break;
1315
1316 default:
1317 gcc_unreachable ();
1318 }
1319
1320 return (GET_MODE (SET_SRC (set)) == set_mode);
1321 }
1322
1323 /* Return true if every SET in INSN that sets the CC register
1324 has source and destination with matching CC modes and that
1325 CC mode is at least as constrained as REQ_MODE.
1326 If REQ_MODE is VOIDmode, always return false. */
1327
1328 bool
1329 s390_match_ccmode (rtx_insn *insn, machine_mode req_mode)
1330 {
1331 int i;
1332
1333 /* s390_tm_ccmode returns VOIDmode to indicate failure. */
1334 if (req_mode == VOIDmode)
1335 return false;
1336
1337 if (GET_CODE (PATTERN (insn)) == SET)
1338 return s390_match_ccmode_set (PATTERN (insn), req_mode);
1339
1340 if (GET_CODE (PATTERN (insn)) == PARALLEL)
1341 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
1342 {
1343 rtx set = XVECEXP (PATTERN (insn), 0, i);
1344 if (GET_CODE (set) == SET)
1345 if (!s390_match_ccmode_set (set, req_mode))
1346 return false;
1347 }
1348
1349 return true;
1350 }
1351
1352 /* If a test-under-mask instruction can be used to implement
1353 (compare (and ... OP1) OP2), return the CC mode required
1354 to do that. Otherwise, return VOIDmode.
1355 MIXED is true if the instruction can distinguish between
1356 CC1 and CC2 for mixed selected bits (TMxx), it is false
1357 if the instruction cannot (TM). */
1358
1359 machine_mode
1360 s390_tm_ccmode (rtx op1, rtx op2, bool mixed)
1361 {
1362 int bit0, bit1;
1363
1364 /* ??? Fixme: should work on CONST_WIDE_INT as well. */
1365 if (GET_CODE (op1) != CONST_INT || GET_CODE (op2) != CONST_INT)
1366 return VOIDmode;
1367
1368 /* Selected bits all zero: CC0.
1369 e.g.: int a; if ((a & (16 + 128)) == 0) */
1370 if (INTVAL (op2) == 0)
1371 return CCTmode;
1372
1373 /* Selected bits all one: CC3.
1374 e.g.: int a; if ((a & (16 + 128)) == 16 + 128) */
1375 if (INTVAL (op2) == INTVAL (op1))
1376 return CCT3mode;
1377
1378 /* Exactly two bits selected, mixed zeroes and ones: CC1 or CC2. e.g.:
1379 int a;
1380 if ((a & (16 + 128)) == 16) -> CCT1
1381 if ((a & (16 + 128)) == 128) -> CCT2 */
1382 if (mixed)
1383 {
1384 bit1 = exact_log2 (INTVAL (op2));
1385 bit0 = exact_log2 (INTVAL (op1) ^ INTVAL (op2));
1386 if (bit0 != -1 && bit1 != -1)
1387 return bit0 > bit1 ? CCT1mode : CCT2mode;
1388 }
1389
1390 return VOIDmode;
1391 }
1392
1393 /* Given a comparison code OP (EQ, NE, etc.) and the operands
1394 OP0 and OP1 of a COMPARE, return the mode to be used for the
1395 comparison. */
1396
1397 machine_mode
1398 s390_select_ccmode (enum rtx_code code, rtx op0, rtx op1)
1399 {
1400 if (TARGET_VX
1401 && register_operand (op0, DFmode)
1402 && register_operand (op1, DFmode))
1403 {
1404 /* LT, LE, UNGT, UNGE require swapping OP0 and OP1. Either
1405 s390_emit_compare or s390_canonicalize_comparison will take
1406 care of it. */
1407 switch (code)
1408 {
1409 case EQ:
1410 case NE:
1411 return CCVEQmode;
1412 case GT:
1413 case UNLE:
1414 return CCVFHmode;
1415 case GE:
1416 case UNLT:
1417 return CCVFHEmode;
1418 default:
1419 ;
1420 }
1421 }
1422
1423 switch (code)
1424 {
1425 case EQ:
1426 case NE:
1427 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
1428 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1429 return CCAPmode;
1430 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
1431 && CONST_OK_FOR_K (INTVAL (XEXP (op0, 1))))
1432 return CCAPmode;
1433 if ((GET_CODE (op0) == PLUS || GET_CODE (op0) == MINUS
1434 || GET_CODE (op1) == NEG)
1435 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1436 return CCLmode;
1437
1438 if (GET_CODE (op0) == AND)
1439 {
1440 /* Check whether we can potentially do it via TM. */
1441 machine_mode ccmode;
1442 ccmode = s390_tm_ccmode (XEXP (op0, 1), op1, 1);
1443 if (ccmode != VOIDmode)
1444 {
1445 /* Relax CCTmode to CCZmode to allow fall-back to AND
1446 if that turns out to be beneficial. */
1447 return ccmode == CCTmode ? CCZmode : ccmode;
1448 }
1449 }
1450
1451 if (register_operand (op0, HImode)
1452 && GET_CODE (op1) == CONST_INT
1453 && (INTVAL (op1) == -1 || INTVAL (op1) == 65535))
1454 return CCT3mode;
1455 if (register_operand (op0, QImode)
1456 && GET_CODE (op1) == CONST_INT
1457 && (INTVAL (op1) == -1 || INTVAL (op1) == 255))
1458 return CCT3mode;
1459
1460 return CCZmode;
1461
1462 case LE:
1463 case LT:
1464 case GE:
1465 case GT:
1466 /* The only overflow condition of NEG and ABS happens when
1467 -INT_MAX is used as parameter, which stays negative. So
1468 we have an overflow from a positive value to a negative.
1469 Using CCAP mode the resulting cc can be used for comparisons. */
1470 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
1471 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1472 return CCAPmode;
1473
1474 /* If constants are involved in an add instruction it is possible to use
1475 the resulting cc for comparisons with zero. Knowing the sign of the
1476 constant the overflow behavior gets predictable. e.g.:
1477 int a, b; if ((b = a + c) > 0)
1478 with c as a constant value: c < 0 -> CCAN and c >= 0 -> CCAP */
1479 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
1480 && (CONST_OK_FOR_K (INTVAL (XEXP (op0, 1)))
1481 || (CONST_OK_FOR_CONSTRAINT_P (INTVAL (XEXP (op0, 1)), 'O', "Os")
1482 /* Avoid INT32_MIN on 32 bit. */
1483 && (!TARGET_ZARCH || INTVAL (XEXP (op0, 1)) != -0x7fffffff - 1))))
1484 {
1485 if (INTVAL (XEXP((op0), 1)) < 0)
1486 return CCANmode;
1487 else
1488 return CCAPmode;
1489 }
1490 /* Fall through. */
1491 case UNORDERED:
1492 case ORDERED:
1493 case UNEQ:
1494 case UNLE:
1495 case UNLT:
1496 case UNGE:
1497 case UNGT:
1498 case LTGT:
1499 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1500 && GET_CODE (op1) != CONST_INT)
1501 return CCSRmode;
1502 return CCSmode;
1503
1504 case LTU:
1505 case GEU:
1506 if (GET_CODE (op0) == PLUS
1507 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1508 return CCL1mode;
1509
1510 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1511 && GET_CODE (op1) != CONST_INT)
1512 return CCURmode;
1513 return CCUmode;
1514
1515 case LEU:
1516 case GTU:
1517 if (GET_CODE (op0) == MINUS
1518 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1519 return CCL2mode;
1520
1521 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1522 && GET_CODE (op1) != CONST_INT)
1523 return CCURmode;
1524 return CCUmode;
1525
1526 default:
1527 gcc_unreachable ();
1528 }
1529 }
1530
1531 /* Replace the comparison OP0 CODE OP1 by a semantically equivalent one
1532 that we can implement more efficiently. */
1533
1534 static void
1535 s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
1536 bool op0_preserve_value)
1537 {
1538 if (op0_preserve_value)
1539 return;
1540
1541 /* Convert ZERO_EXTRACT back to AND to enable TM patterns. */
1542 if ((*code == EQ || *code == NE)
1543 && *op1 == const0_rtx
1544 && GET_CODE (*op0) == ZERO_EXTRACT
1545 && GET_CODE (XEXP (*op0, 1)) == CONST_INT
1546 && GET_CODE (XEXP (*op0, 2)) == CONST_INT
1547 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
1548 {
1549 rtx inner = XEXP (*op0, 0);
1550 HOST_WIDE_INT modesize = GET_MODE_BITSIZE (GET_MODE (inner));
1551 HOST_WIDE_INT len = INTVAL (XEXP (*op0, 1));
1552 HOST_WIDE_INT pos = INTVAL (XEXP (*op0, 2));
1553
1554 if (len > 0 && len < modesize
1555 && pos >= 0 && pos + len <= modesize
1556 && modesize <= HOST_BITS_PER_WIDE_INT)
1557 {
1558 unsigned HOST_WIDE_INT block;
1559 block = ((unsigned HOST_WIDE_INT) 1 << len) - 1;
1560 block <<= modesize - pos - len;
1561
1562 *op0 = gen_rtx_AND (GET_MODE (inner), inner,
1563 gen_int_mode (block, GET_MODE (inner)));
1564 }
1565 }
1566
1567 /* Narrow AND of memory against immediate to enable TM. */
1568 if ((*code == EQ || *code == NE)
1569 && *op1 == const0_rtx
1570 && GET_CODE (*op0) == AND
1571 && GET_CODE (XEXP (*op0, 1)) == CONST_INT
1572 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
1573 {
1574 rtx inner = XEXP (*op0, 0);
1575 rtx mask = XEXP (*op0, 1);
1576
1577 /* Ignore paradoxical SUBREGs if all extra bits are masked out. */
1578 if (GET_CODE (inner) == SUBREG
1579 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (inner)))
1580 && (GET_MODE_SIZE (GET_MODE (inner))
1581 >= GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1582 && ((INTVAL (mask)
1583 & GET_MODE_MASK (GET_MODE (inner))
1584 & ~GET_MODE_MASK (GET_MODE (SUBREG_REG (inner))))
1585 == 0))
1586 inner = SUBREG_REG (inner);
1587
1588 /* Do not change volatile MEMs. */
1589 if (MEM_P (inner) && !MEM_VOLATILE_P (inner))
1590 {
1591 int part = s390_single_part (XEXP (*op0, 1),
1592 GET_MODE (inner), QImode, 0);
1593 if (part >= 0)
1594 {
1595 mask = gen_int_mode (s390_extract_part (mask, QImode, 0), QImode);
1596 inner = adjust_address_nv (inner, QImode, part);
1597 *op0 = gen_rtx_AND (QImode, inner, mask);
1598 }
1599 }
1600 }
1601
1602 /* Narrow comparisons against 0xffff to HImode if possible. */
1603 if ((*code == EQ || *code == NE)
1604 && GET_CODE (*op1) == CONST_INT
1605 && INTVAL (*op1) == 0xffff
1606 && SCALAR_INT_MODE_P (GET_MODE (*op0))
1607 && (nonzero_bits (*op0, GET_MODE (*op0))
1608 & ~(unsigned HOST_WIDE_INT) 0xffff) == 0)
1609 {
1610 *op0 = gen_lowpart (HImode, *op0);
1611 *op1 = constm1_rtx;
1612 }
1613
1614 /* Remove redundant UNSPEC_STRCMPCC_TO_INT conversions if possible. */
1615 if (GET_CODE (*op0) == UNSPEC
1616 && XINT (*op0, 1) == UNSPEC_STRCMPCC_TO_INT
1617 && XVECLEN (*op0, 0) == 1
1618 && GET_MODE (XVECEXP (*op0, 0, 0)) == CCUmode
1619 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
1620 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
1621 && *op1 == const0_rtx)
1622 {
1623 enum rtx_code new_code = UNKNOWN;
1624 switch (*code)
1625 {
1626 case EQ: new_code = EQ; break;
1627 case NE: new_code = NE; break;
1628 case LT: new_code = GTU; break;
1629 case GT: new_code = LTU; break;
1630 case LE: new_code = GEU; break;
1631 case GE: new_code = LEU; break;
1632 default: break;
1633 }
1634
1635 if (new_code != UNKNOWN)
1636 {
1637 *op0 = XVECEXP (*op0, 0, 0);
1638 *code = new_code;
1639 }
1640 }
1641
1642 /* Remove redundant UNSPEC_CC_TO_INT conversions if possible. */
1643 if (GET_CODE (*op0) == UNSPEC
1644 && XINT (*op0, 1) == UNSPEC_CC_TO_INT
1645 && XVECLEN (*op0, 0) == 1
1646 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
1647 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
1648 && CONST_INT_P (*op1))
1649 {
1650 enum rtx_code new_code = UNKNOWN;
1651 switch (GET_MODE (XVECEXP (*op0, 0, 0)))
1652 {
1653 case CCZmode:
1654 case CCRAWmode:
1655 switch (*code)
1656 {
1657 case EQ: new_code = EQ; break;
1658 case NE: new_code = NE; break;
1659 default: break;
1660 }
1661 break;
1662 default: break;
1663 }
1664
1665 if (new_code != UNKNOWN)
1666 {
1667 /* For CCRAWmode put the required cc mask into the second
1668 operand. */
1669 if (GET_MODE (XVECEXP (*op0, 0, 0)) == CCRAWmode
1670 && INTVAL (*op1) >= 0 && INTVAL (*op1) <= 3)
1671 *op1 = gen_rtx_CONST_INT (VOIDmode, 1 << (3 - INTVAL (*op1)));
1672 *op0 = XVECEXP (*op0, 0, 0);
1673 *code = new_code;
1674 }
1675 }
1676
1677 /* Simplify cascaded EQ, NE with const0_rtx. */
1678 if ((*code == NE || *code == EQ)
1679 && (GET_CODE (*op0) == EQ || GET_CODE (*op0) == NE)
1680 && GET_MODE (*op0) == SImode
1681 && GET_MODE (XEXP (*op0, 0)) == CCZ1mode
1682 && REG_P (XEXP (*op0, 0))
1683 && XEXP (*op0, 1) == const0_rtx
1684 && *op1 == const0_rtx)
1685 {
1686 if ((*code == EQ && GET_CODE (*op0) == NE)
1687 || (*code == NE && GET_CODE (*op0) == EQ))
1688 *code = EQ;
1689 else
1690 *code = NE;
1691 *op0 = XEXP (*op0, 0);
1692 }
1693
1694 /* Prefer register over memory as first operand. */
1695 if (MEM_P (*op0) && REG_P (*op1))
1696 {
1697 rtx tem = *op0; *op0 = *op1; *op1 = tem;
1698 *code = (int)swap_condition ((enum rtx_code)*code);
1699 }
1700
1701 /* Using the scalar variants of vector instructions for 64 bit FP
1702 comparisons might require swapping the operands. */
1703 if (TARGET_VX
1704 && register_operand (*op0, DFmode)
1705 && register_operand (*op1, DFmode)
1706 && (*code == LT || *code == LE || *code == UNGT || *code == UNGE))
1707 {
1708 rtx tmp;
1709
1710 switch (*code)
1711 {
1712 case LT: *code = GT; break;
1713 case LE: *code = GE; break;
1714 case UNGT: *code = UNLE; break;
1715 case UNGE: *code = UNLT; break;
1716 default: ;
1717 }
1718 tmp = *op0; *op0 = *op1; *op1 = tmp;
1719 }
1720 }
1721
1722 /* Helper function for s390_emit_compare. If possible emit a 64 bit
1723 FP compare using the single element variant of vector instructions.
1724 Replace CODE with the comparison code to be used in the CC reg
1725 compare and return the condition code register RTX in CC. */
1726
1727 static bool
1728 s390_expand_vec_compare_scalar (enum rtx_code *code, rtx cmp1, rtx cmp2,
1729 rtx *cc)
1730 {
1731 machine_mode cmp_mode;
1732 bool swap_p = false;
1733
1734 switch (*code)
1735 {
1736 case EQ: cmp_mode = CCVEQmode; break;
1737 case NE: cmp_mode = CCVEQmode; break;
1738 case GT: cmp_mode = CCVFHmode; break;
1739 case GE: cmp_mode = CCVFHEmode; break;
1740 case UNLE: cmp_mode = CCVFHmode; break;
1741 case UNLT: cmp_mode = CCVFHEmode; break;
1742 case LT: cmp_mode = CCVFHmode; *code = GT; swap_p = true; break;
1743 case LE: cmp_mode = CCVFHEmode; *code = GE; swap_p = true; break;
1744 case UNGE: cmp_mode = CCVFHmode; *code = UNLE; swap_p = true; break;
1745 case UNGT: cmp_mode = CCVFHEmode; *code = UNLT; swap_p = true; break;
1746 default: return false;
1747 }
1748
1749 if (swap_p)
1750 {
1751 rtx tmp = cmp2;
1752 cmp2 = cmp1;
1753 cmp1 = tmp;
1754 }
1755 *cc = gen_rtx_REG (cmp_mode, CC_REGNUM);
1756 emit_insn (gen_rtx_PARALLEL (VOIDmode,
1757 gen_rtvec (2,
1758 gen_rtx_SET (*cc,
1759 gen_rtx_COMPARE (cmp_mode, cmp1,
1760 cmp2)),
1761 gen_rtx_CLOBBER (VOIDmode,
1762 gen_rtx_SCRATCH (V2DImode)))));
1763 return true;
1764 }
1765
1766
1767 /* Emit a compare instruction suitable to implement the comparison
1768 OP0 CODE OP1. Return the correct condition RTL to be placed in
1769 the IF_THEN_ELSE of the conditional branch testing the result. */
1770
1771 rtx
1772 s390_emit_compare (enum rtx_code code, rtx op0, rtx op1)
1773 {
1774 machine_mode mode = s390_select_ccmode (code, op0, op1);
1775 rtx cc;
1776
1777 if (TARGET_VX
1778 && register_operand (op0, DFmode)
1779 && register_operand (op1, DFmode)
1780 && s390_expand_vec_compare_scalar (&code, op0, op1, &cc))
1781 {
1782 /* Work has been done by s390_expand_vec_compare_scalar already. */
1783 }
1784 else if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
1785 {
1786 /* Do not output a redundant compare instruction if a
1787 compare_and_swap pattern already computed the result and the
1788 machine modes are compatible. */
1789 gcc_assert (s390_cc_modes_compatible (GET_MODE (op0), mode)
1790 == GET_MODE (op0));
1791 cc = op0;
1792 }
1793 else
1794 {
1795 cc = gen_rtx_REG (mode, CC_REGNUM);
1796 emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (mode, op0, op1)));
1797 }
1798
1799 return gen_rtx_fmt_ee (code, VOIDmode, cc, const0_rtx);
1800 }
1801
1802 /* Emit a SImode compare and swap instruction setting MEM to NEW_RTX if OLD
1803 matches CMP.
1804 Return the correct condition RTL to be placed in the IF_THEN_ELSE of the
1805 conditional branch testing the result. */
1806
1807 static rtx
1808 s390_emit_compare_and_swap (enum rtx_code code, rtx old, rtx mem,
1809 rtx cmp, rtx new_rtx)
1810 {
1811 emit_insn (gen_atomic_compare_and_swapsi_internal (old, mem, cmp, new_rtx));
1812 return s390_emit_compare (code, gen_rtx_REG (CCZ1mode, CC_REGNUM),
1813 const0_rtx);
1814 }
1815
1816 /* Emit a jump instruction to TARGET and return it. If COND is
1817 NULL_RTX, emit an unconditional jump, else a conditional jump under
1818 condition COND. */
1819
1820 rtx_insn *
1821 s390_emit_jump (rtx target, rtx cond)
1822 {
1823 rtx insn;
1824
1825 target = gen_rtx_LABEL_REF (VOIDmode, target);
1826 if (cond)
1827 target = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, target, pc_rtx);
1828
1829 insn = gen_rtx_SET (pc_rtx, target);
1830 return emit_jump_insn (insn);
1831 }
1832
1833 /* Return branch condition mask to implement a branch
1834 specified by CODE. Return -1 for invalid comparisons. */
1835
1836 int
1837 s390_branch_condition_mask (rtx code)
1838 {
1839 const int CC0 = 1 << 3;
1840 const int CC1 = 1 << 2;
1841 const int CC2 = 1 << 1;
1842 const int CC3 = 1 << 0;
1843
1844 gcc_assert (GET_CODE (XEXP (code, 0)) == REG);
1845 gcc_assert (REGNO (XEXP (code, 0)) == CC_REGNUM);
1846 gcc_assert (XEXP (code, 1) == const0_rtx
1847 || (GET_MODE (XEXP (code, 0)) == CCRAWmode
1848 && CONST_INT_P (XEXP (code, 1))));
1849
1850
1851 switch (GET_MODE (XEXP (code, 0)))
1852 {
1853 case CCZmode:
1854 case CCZ1mode:
1855 switch (GET_CODE (code))
1856 {
1857 case EQ: return CC0;
1858 case NE: return CC1 | CC2 | CC3;
1859 default: return -1;
1860 }
1861 break;
1862
1863 case CCT1mode:
1864 switch (GET_CODE (code))
1865 {
1866 case EQ: return CC1;
1867 case NE: return CC0 | CC2 | CC3;
1868 default: return -1;
1869 }
1870 break;
1871
1872 case CCT2mode:
1873 switch (GET_CODE (code))
1874 {
1875 case EQ: return CC2;
1876 case NE: return CC0 | CC1 | CC3;
1877 default: return -1;
1878 }
1879 break;
1880
1881 case CCT3mode:
1882 switch (GET_CODE (code))
1883 {
1884 case EQ: return CC3;
1885 case NE: return CC0 | CC1 | CC2;
1886 default: return -1;
1887 }
1888 break;
1889
1890 case CCLmode:
1891 switch (GET_CODE (code))
1892 {
1893 case EQ: return CC0 | CC2;
1894 case NE: return CC1 | CC3;
1895 default: return -1;
1896 }
1897 break;
1898
1899 case CCL1mode:
1900 switch (GET_CODE (code))
1901 {
1902 case LTU: return CC2 | CC3; /* carry */
1903 case GEU: return CC0 | CC1; /* no carry */
1904 default: return -1;
1905 }
1906 break;
1907
1908 case CCL2mode:
1909 switch (GET_CODE (code))
1910 {
1911 case GTU: return CC0 | CC1; /* borrow */
1912 case LEU: return CC2 | CC3; /* no borrow */
1913 default: return -1;
1914 }
1915 break;
1916
1917 case CCL3mode:
1918 switch (GET_CODE (code))
1919 {
1920 case EQ: return CC0 | CC2;
1921 case NE: return CC1 | CC3;
1922 case LTU: return CC1;
1923 case GTU: return CC3;
1924 case LEU: return CC1 | CC2;
1925 case GEU: return CC2 | CC3;
1926 default: return -1;
1927 }
1928
1929 case CCUmode:
1930 switch (GET_CODE (code))
1931 {
1932 case EQ: return CC0;
1933 case NE: return CC1 | CC2 | CC3;
1934 case LTU: return CC1;
1935 case GTU: return CC2;
1936 case LEU: return CC0 | CC1;
1937 case GEU: return CC0 | CC2;
1938 default: return -1;
1939 }
1940 break;
1941
1942 case CCURmode:
1943 switch (GET_CODE (code))
1944 {
1945 case EQ: return CC0;
1946 case NE: return CC2 | CC1 | CC3;
1947 case LTU: return CC2;
1948 case GTU: return CC1;
1949 case LEU: return CC0 | CC2;
1950 case GEU: return CC0 | CC1;
1951 default: return -1;
1952 }
1953 break;
1954
1955 case CCAPmode:
1956 switch (GET_CODE (code))
1957 {
1958 case EQ: return CC0;
1959 case NE: return CC1 | CC2 | CC3;
1960 case LT: return CC1 | CC3;
1961 case GT: return CC2;
1962 case LE: return CC0 | CC1 | CC3;
1963 case GE: return CC0 | CC2;
1964 default: return -1;
1965 }
1966 break;
1967
1968 case CCANmode:
1969 switch (GET_CODE (code))
1970 {
1971 case EQ: return CC0;
1972 case NE: return CC1 | CC2 | CC3;
1973 case LT: return CC1;
1974 case GT: return CC2 | CC3;
1975 case LE: return CC0 | CC1;
1976 case GE: return CC0 | CC2 | CC3;
1977 default: return -1;
1978 }
1979 break;
1980
1981 case CCSmode:
1982 switch (GET_CODE (code))
1983 {
1984 case EQ: return CC0;
1985 case NE: return CC1 | CC2 | CC3;
1986 case LT: return CC1;
1987 case GT: return CC2;
1988 case LE: return CC0 | CC1;
1989 case GE: return CC0 | CC2;
1990 case UNORDERED: return CC3;
1991 case ORDERED: return CC0 | CC1 | CC2;
1992 case UNEQ: return CC0 | CC3;
1993 case UNLT: return CC1 | CC3;
1994 case UNGT: return CC2 | CC3;
1995 case UNLE: return CC0 | CC1 | CC3;
1996 case UNGE: return CC0 | CC2 | CC3;
1997 case LTGT: return CC1 | CC2;
1998 default: return -1;
1999 }
2000 break;
2001
2002 case CCSRmode:
2003 switch (GET_CODE (code))
2004 {
2005 case EQ: return CC0;
2006 case NE: return CC2 | CC1 | CC3;
2007 case LT: return CC2;
2008 case GT: return CC1;
2009 case LE: return CC0 | CC2;
2010 case GE: return CC0 | CC1;
2011 case UNORDERED: return CC3;
2012 case ORDERED: return CC0 | CC2 | CC1;
2013 case UNEQ: return CC0 | CC3;
2014 case UNLT: return CC2 | CC3;
2015 case UNGT: return CC1 | CC3;
2016 case UNLE: return CC0 | CC2 | CC3;
2017 case UNGE: return CC0 | CC1 | CC3;
2018 case LTGT: return CC2 | CC1;
2019 default: return -1;
2020 }
2021 break;
2022
2023 /* Vector comparison modes. */
2024
2025 case CCVEQmode:
2026 switch (GET_CODE (code))
2027 {
2028 case EQ: return CC0;
2029 case NE: return CC3;
2030 default: return -1;
2031 }
2032
2033 case CCVEQANYmode:
2034 switch (GET_CODE (code))
2035 {
2036 case EQ: return CC0 | CC1;
2037 case NE: return CC3 | CC1;
2038 default: return -1;
2039 }
2040
2041 /* Integer vector compare modes. */
2042
2043 case CCVHmode:
2044 switch (GET_CODE (code))
2045 {
2046 case GT: return CC0;
2047 case LE: return CC3;
2048 default: return -1;
2049 }
2050
2051 case CCVHANYmode:
2052 switch (GET_CODE (code))
2053 {
2054 case GT: return CC0 | CC1;
2055 case LE: return CC3 | CC1;
2056 default: return -1;
2057 }
2058
2059 case CCVHUmode:
2060 switch (GET_CODE (code))
2061 {
2062 case GTU: return CC0;
2063 case LEU: return CC3;
2064 default: return -1;
2065 }
2066
2067 case CCVHUANYmode:
2068 switch (GET_CODE (code))
2069 {
2070 case GTU: return CC0 | CC1;
2071 case LEU: return CC3 | CC1;
2072 default: return -1;
2073 }
2074
2075 /* FP vector compare modes. */
2076
2077 case CCVFHmode:
2078 switch (GET_CODE (code))
2079 {
2080 case GT: return CC0;
2081 case UNLE: return CC3;
2082 default: return -1;
2083 }
2084
2085 case CCVFHANYmode:
2086 switch (GET_CODE (code))
2087 {
2088 case GT: return CC0 | CC1;
2089 case UNLE: return CC3 | CC1;
2090 default: return -1;
2091 }
2092
2093 case CCVFHEmode:
2094 switch (GET_CODE (code))
2095 {
2096 case GE: return CC0;
2097 case UNLT: return CC3;
2098 default: return -1;
2099 }
2100
2101 case CCVFHEANYmode:
2102 switch (GET_CODE (code))
2103 {
2104 case GE: return CC0 | CC1;
2105 case UNLT: return CC3 | CC1;
2106 default: return -1;
2107 }
2108
2109
2110 case CCRAWmode:
2111 switch (GET_CODE (code))
2112 {
2113 case EQ:
2114 return INTVAL (XEXP (code, 1));
2115 case NE:
2116 return (INTVAL (XEXP (code, 1))) ^ 0xf;
2117 default:
2118 gcc_unreachable ();
2119 }
2120
2121 default:
2122 return -1;
2123 }
2124 }
2125
2126
2127 /* Return branch condition mask to implement a compare and branch
2128 specified by CODE. Return -1 for invalid comparisons. */
2129
2130 int
2131 s390_compare_and_branch_condition_mask (rtx code)
2132 {
2133 const int CC0 = 1 << 3;
2134 const int CC1 = 1 << 2;
2135 const int CC2 = 1 << 1;
2136
2137 switch (GET_CODE (code))
2138 {
2139 case EQ:
2140 return CC0;
2141 case NE:
2142 return CC1 | CC2;
2143 case LT:
2144 case LTU:
2145 return CC1;
2146 case GT:
2147 case GTU:
2148 return CC2;
2149 case LE:
2150 case LEU:
2151 return CC0 | CC1;
2152 case GE:
2153 case GEU:
2154 return CC0 | CC2;
2155 default:
2156 gcc_unreachable ();
2157 }
2158 return -1;
2159 }
2160
2161 /* If INV is false, return assembler mnemonic string to implement
2162 a branch specified by CODE. If INV is true, return mnemonic
2163 for the corresponding inverted branch. */
2164
2165 static const char *
2166 s390_branch_condition_mnemonic (rtx code, int inv)
2167 {
2168 int mask;
2169
2170 static const char *const mnemonic[16] =
2171 {
2172 NULL, "o", "h", "nle",
2173 "l", "nhe", "lh", "ne",
2174 "e", "nlh", "he", "nl",
2175 "le", "nh", "no", NULL
2176 };
2177
2178 if (GET_CODE (XEXP (code, 0)) == REG
2179 && REGNO (XEXP (code, 0)) == CC_REGNUM
2180 && (XEXP (code, 1) == const0_rtx
2181 || (GET_MODE (XEXP (code, 0)) == CCRAWmode
2182 && CONST_INT_P (XEXP (code, 1)))))
2183 mask = s390_branch_condition_mask (code);
2184 else
2185 mask = s390_compare_and_branch_condition_mask (code);
2186
2187 gcc_assert (mask >= 0);
2188
2189 if (inv)
2190 mask ^= 15;
2191
2192 gcc_assert (mask >= 1 && mask <= 14);
2193
2194 return mnemonic[mask];
2195 }
2196
2197 /* Return the part of op which has a value different from def.
2198 The size of the part is determined by mode.
2199 Use this function only if you already know that op really
2200 contains such a part. */
2201
2202 unsigned HOST_WIDE_INT
2203 s390_extract_part (rtx op, machine_mode mode, int def)
2204 {
2205 unsigned HOST_WIDE_INT value = 0;
2206 int max_parts = HOST_BITS_PER_WIDE_INT / GET_MODE_BITSIZE (mode);
2207 int part_bits = GET_MODE_BITSIZE (mode);
2208 unsigned HOST_WIDE_INT part_mask
2209 = ((unsigned HOST_WIDE_INT)1 << part_bits) - 1;
2210 int i;
2211
2212 for (i = 0; i < max_parts; i++)
2213 {
2214 if (i == 0)
2215 value = (unsigned HOST_WIDE_INT) INTVAL (op);
2216 else
2217 value >>= part_bits;
2218
2219 if ((value & part_mask) != (def & part_mask))
2220 return value & part_mask;
2221 }
2222
2223 gcc_unreachable ();
2224 }
2225
2226 /* If OP is an integer constant of mode MODE with exactly one
2227 part of mode PART_MODE unequal to DEF, return the number of that
2228 part. Otherwise, return -1. */
2229
2230 int
2231 s390_single_part (rtx op,
2232 machine_mode mode,
2233 machine_mode part_mode,
2234 int def)
2235 {
2236 unsigned HOST_WIDE_INT value = 0;
2237 int n_parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (part_mode);
2238 unsigned HOST_WIDE_INT part_mask
2239 = ((unsigned HOST_WIDE_INT)1 << GET_MODE_BITSIZE (part_mode)) - 1;
2240 int i, part = -1;
2241
2242 if (GET_CODE (op) != CONST_INT)
2243 return -1;
2244
2245 for (i = 0; i < n_parts; i++)
2246 {
2247 if (i == 0)
2248 value = (unsigned HOST_WIDE_INT) INTVAL (op);
2249 else
2250 value >>= GET_MODE_BITSIZE (part_mode);
2251
2252 if ((value & part_mask) != (def & part_mask))
2253 {
2254 if (part != -1)
2255 return -1;
2256 else
2257 part = i;
2258 }
2259 }
2260 return part == -1 ? -1 : n_parts - 1 - part;
2261 }
2262
2263 /* Return true if IN contains a contiguous bitfield in the lower SIZE
2264 bits and no other bits are set in (the lower SIZE bits of) IN.
2265
2266 PSTART and PEND can be used to obtain the start and end
2267 position (inclusive) of the bitfield relative to 64
2268 bits. *PSTART / *PEND gives the position of the first/last bit
2269 of the bitfield counting from the highest order bit starting
2270 with zero. */
2271
2272 bool
2273 s390_contiguous_bitmask_nowrap_p (unsigned HOST_WIDE_INT in, int size,
2274 int *pstart, int *pend)
2275 {
2276 int start;
2277 int end = -1;
2278 int lowbit = sizeof (HOST_WIDE_INT) * BITS_PER_UNIT - 1;
2279 int highbit = sizeof (HOST_WIDE_INT) * BITS_PER_UNIT - size;
2280 unsigned HOST_WIDE_INT bitmask = 1ULL;
2281
2282 gcc_assert (!!pstart == !!pend);
2283 for (start = lowbit; start >= highbit; bitmask <<= 1, start--)
2284 if (end == -1)
2285 {
2286 /* Look for the rightmost bit of a contiguous range of ones. */
2287 if (bitmask & in)
2288 /* Found it. */
2289 end = start;
2290 }
2291 else
2292 {
2293 /* Look for the firt zero bit after the range of ones. */
2294 if (! (bitmask & in))
2295 /* Found it. */
2296 break;
2297 }
2298 /* We're one past the last one-bit. */
2299 start++;
2300
2301 if (end == -1)
2302 /* No one bits found. */
2303 return false;
2304
2305 if (start > highbit)
2306 {
2307 unsigned HOST_WIDE_INT mask;
2308
2309 /* Calculate a mask for all bits beyond the contiguous bits. */
2310 mask = ((~(0ULL) >> highbit) & (~(0ULL) << (lowbit - start + 1)));
2311 if (mask & in)
2312 /* There are more bits set beyond the first range of one bits. */
2313 return false;
2314 }
2315
2316 if (pstart)
2317 {
2318 *pstart = start;
2319 *pend = end;
2320 }
2321
2322 return true;
2323 }
2324
2325 /* Same as s390_contiguous_bitmask_nowrap_p but also returns true
2326 if ~IN contains a contiguous bitfield. In that case, *END is <
2327 *START.
2328
2329 If WRAP_P is true, a bitmask that wraps around is also tested.
2330 When a wraparoud occurs *START is greater than *END (in
2331 non-null pointers), and the uppermost (64 - SIZE) bits are thus
2332 part of the range. If WRAP_P is false, no wraparound is
2333 tested. */
2334
2335 bool
2336 s390_contiguous_bitmask_p (unsigned HOST_WIDE_INT in, bool wrap_p,
2337 int size, int *start, int *end)
2338 {
2339 int bs = sizeof (HOST_WIDE_INT) * BITS_PER_UNIT;
2340 bool b;
2341
2342 gcc_assert (!!start == !!end);
2343 if ((in & ((~(0ULL)) >> (bs - size))) == 0)
2344 /* This cannot be expressed as a contiguous bitmask. Exit early because
2345 the second call of s390_contiguous_bitmask_nowrap_p would accept this as
2346 a valid bitmask. */
2347 return false;
2348 b = s390_contiguous_bitmask_nowrap_p (in, size, start, end);
2349 if (b)
2350 return true;
2351 if (! wrap_p)
2352 return false;
2353 b = s390_contiguous_bitmask_nowrap_p (~in, size, start, end);
2354 if (b && start)
2355 {
2356 int s = *start;
2357 int e = *end;
2358
2359 gcc_assert (s >= 1);
2360 *start = ((e + 1) & (bs - 1));
2361 *end = ((s - 1 + bs) & (bs - 1));
2362 }
2363
2364 return b;
2365 }
2366
2367 /* Return true if OP contains the same contiguous bitfield in *all*
2368 its elements. START and END can be used to obtain the start and
2369 end position of the bitfield.
2370
2371 START/STOP give the position of the first/last bit of the bitfield
2372 counting from the lowest order bit starting with zero. In order to
2373 use these values for S/390 instructions this has to be converted to
2374 "bits big endian" style. */
2375
2376 bool
2377 s390_contiguous_bitmask_vector_p (rtx op, int *start, int *end)
2378 {
2379 unsigned HOST_WIDE_INT mask;
2380 int size;
2381 rtx elt;
2382 bool b;
2383
2384 gcc_assert (!!start == !!end);
2385 if (!const_vec_duplicate_p (op, &elt)
2386 || !CONST_INT_P (elt))
2387 return false;
2388
2389 size = GET_MODE_UNIT_BITSIZE (GET_MODE (op));
2390
2391 /* We cannot deal with V1TI/V1TF. This would require a vgmq. */
2392 if (size > 64)
2393 return false;
2394
2395 mask = UINTVAL (elt);
2396
2397 b = s390_contiguous_bitmask_p (mask, true, size, start, end);
2398 if (b)
2399 {
2400 if (start)
2401 {
2402 int bs = sizeof (HOST_WIDE_INT) * BITS_PER_UNIT;
2403
2404 *start -= (bs - size);
2405 *end -= (bs - size);
2406 }
2407 return true;
2408 }
2409 else
2410 return false;
2411 }
2412
2413 /* Return true if C consists only of byte chunks being either 0 or
2414 0xff. If MASK is !=NULL a byte mask is generated which is
2415 appropriate for the vector generate byte mask instruction. */
2416
2417 bool
2418 s390_bytemask_vector_p (rtx op, unsigned *mask)
2419 {
2420 int i;
2421 unsigned tmp_mask = 0;
2422 int nunit, unit_size;
2423
2424 if (!VECTOR_MODE_P (GET_MODE (op))
2425 || GET_CODE (op) != CONST_VECTOR
2426 || !CONST_INT_P (XVECEXP (op, 0, 0)))
2427 return false;
2428
2429 nunit = GET_MODE_NUNITS (GET_MODE (op));
2430 unit_size = GET_MODE_UNIT_SIZE (GET_MODE (op));
2431
2432 for (i = 0; i < nunit; i++)
2433 {
2434 unsigned HOST_WIDE_INT c;
2435 int j;
2436
2437 if (!CONST_INT_P (XVECEXP (op, 0, i)))
2438 return false;
2439
2440 c = UINTVAL (XVECEXP (op, 0, i));
2441 for (j = 0; j < unit_size; j++)
2442 {
2443 if ((c & 0xff) != 0 && (c & 0xff) != 0xff)
2444 return false;
2445 tmp_mask |= (c & 1) << ((nunit - 1 - i) * unit_size + j);
2446 c = c >> BITS_PER_UNIT;
2447 }
2448 }
2449
2450 if (mask != NULL)
2451 *mask = tmp_mask;
2452
2453 return true;
2454 }
2455
2456 /* Check whether a rotate of ROTL followed by an AND of CONTIG is
2457 equivalent to a shift followed by the AND. In particular, CONTIG
2458 should not overlap the (rotated) bit 0/bit 63 gap. Negative values
2459 for ROTL indicate a rotate to the right. */
2460
2461 bool
2462 s390_extzv_shift_ok (int bitsize, int rotl, unsigned HOST_WIDE_INT contig)
2463 {
2464 int start, end;
2465 bool ok;
2466
2467 ok = s390_contiguous_bitmask_nowrap_p (contig, bitsize, &start, &end);
2468 gcc_assert (ok);
2469
2470 if (rotl >= 0)
2471 return (64 - end >= rotl);
2472 else
2473 {
2474 /* Translate "- rotate right" in BITSIZE mode to "rotate left" in
2475 DIMode. */
2476 rotl = -rotl + (64 - bitsize);
2477 return (start >= rotl);
2478 }
2479 }
2480
2481 /* Check whether we can (and want to) split a double-word
2482 move in mode MODE from SRC to DST into two single-word
2483 moves, moving the subword FIRST_SUBWORD first. */
2484
2485 bool
2486 s390_split_ok_p (rtx dst, rtx src, machine_mode mode, int first_subword)
2487 {
2488 /* Floating point and vector registers cannot be split. */
2489 if (FP_REG_P (src) || FP_REG_P (dst) || VECTOR_REG_P (src) || VECTOR_REG_P (dst))
2490 return false;
2491
2492 /* We don't need to split if operands are directly accessible. */
2493 if (s_operand (src, mode) || s_operand (dst, mode))
2494 return false;
2495
2496 /* Non-offsettable memory references cannot be split. */
2497 if ((GET_CODE (src) == MEM && !offsettable_memref_p (src))
2498 || (GET_CODE (dst) == MEM && !offsettable_memref_p (dst)))
2499 return false;
2500
2501 /* Moving the first subword must not clobber a register
2502 needed to move the second subword. */
2503 if (register_operand (dst, mode))
2504 {
2505 rtx subreg = operand_subword (dst, first_subword, 0, mode);
2506 if (reg_overlap_mentioned_p (subreg, src))
2507 return false;
2508 }
2509
2510 return true;
2511 }
2512
2513 /* Return true if it can be proven that [MEM1, MEM1 + SIZE]
2514 and [MEM2, MEM2 + SIZE] do overlap and false
2515 otherwise. */
2516
2517 bool
2518 s390_overlap_p (rtx mem1, rtx mem2, HOST_WIDE_INT size)
2519 {
2520 rtx addr1, addr2, addr_delta;
2521 HOST_WIDE_INT delta;
2522
2523 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
2524 return true;
2525
2526 if (size == 0)
2527 return false;
2528
2529 addr1 = XEXP (mem1, 0);
2530 addr2 = XEXP (mem2, 0);
2531
2532 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
2533
2534 /* This overlapping check is used by peepholes merging memory block operations.
2535 Overlapping operations would otherwise be recognized by the S/390 hardware
2536 and would fall back to a slower implementation. Allowing overlapping
2537 operations would lead to slow code but not to wrong code. Therefore we are
2538 somewhat optimistic if we cannot prove that the memory blocks are
2539 overlapping.
2540 That's why we return false here although this may accept operations on
2541 overlapping memory areas. */
2542 if (!addr_delta || GET_CODE (addr_delta) != CONST_INT)
2543 return false;
2544
2545 delta = INTVAL (addr_delta);
2546
2547 if (delta == 0
2548 || (delta > 0 && delta < size)
2549 || (delta < 0 && -delta < size))
2550 return true;
2551
2552 return false;
2553 }
2554
2555 /* Check whether the address of memory reference MEM2 equals exactly
2556 the address of memory reference MEM1 plus DELTA. Return true if
2557 we can prove this to be the case, false otherwise. */
2558
2559 bool
2560 s390_offset_p (rtx mem1, rtx mem2, rtx delta)
2561 {
2562 rtx addr1, addr2, addr_delta;
2563
2564 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
2565 return false;
2566
2567 addr1 = XEXP (mem1, 0);
2568 addr2 = XEXP (mem2, 0);
2569
2570 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
2571 if (!addr_delta || !rtx_equal_p (addr_delta, delta))
2572 return false;
2573
2574 return true;
2575 }
2576
2577 /* Expand logical operator CODE in mode MODE with operands OPERANDS. */
2578
2579 void
2580 s390_expand_logical_operator (enum rtx_code code, machine_mode mode,
2581 rtx *operands)
2582 {
2583 machine_mode wmode = mode;
2584 rtx dst = operands[0];
2585 rtx src1 = operands[1];
2586 rtx src2 = operands[2];
2587 rtx op, clob, tem;
2588
2589 /* If we cannot handle the operation directly, use a temp register. */
2590 if (!s390_logical_operator_ok_p (operands))
2591 dst = gen_reg_rtx (mode);
2592
2593 /* QImode and HImode patterns make sense only if we have a destination
2594 in memory. Otherwise perform the operation in SImode. */
2595 if ((mode == QImode || mode == HImode) && GET_CODE (dst) != MEM)
2596 wmode = SImode;
2597
2598 /* Widen operands if required. */
2599 if (mode != wmode)
2600 {
2601 if (GET_CODE (dst) == SUBREG
2602 && (tem = simplify_subreg (wmode, dst, mode, 0)) != 0)
2603 dst = tem;
2604 else if (REG_P (dst))
2605 dst = gen_rtx_SUBREG (wmode, dst, 0);
2606 else
2607 dst = gen_reg_rtx (wmode);
2608
2609 if (GET_CODE (src1) == SUBREG
2610 && (tem = simplify_subreg (wmode, src1, mode, 0)) != 0)
2611 src1 = tem;
2612 else if (GET_MODE (src1) != VOIDmode)
2613 src1 = gen_rtx_SUBREG (wmode, force_reg (mode, src1), 0);
2614
2615 if (GET_CODE (src2) == SUBREG
2616 && (tem = simplify_subreg (wmode, src2, mode, 0)) != 0)
2617 src2 = tem;
2618 else if (GET_MODE (src2) != VOIDmode)
2619 src2 = gen_rtx_SUBREG (wmode, force_reg (mode, src2), 0);
2620 }
2621
2622 /* Emit the instruction. */
2623 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, wmode, src1, src2));
2624 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
2625 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
2626
2627 /* Fix up the destination if needed. */
2628 if (dst != operands[0])
2629 emit_move_insn (operands[0], gen_lowpart (mode, dst));
2630 }
2631
2632 /* Check whether OPERANDS are OK for a logical operation (AND, IOR, XOR). */
2633
2634 bool
2635 s390_logical_operator_ok_p (rtx *operands)
2636 {
2637 /* If the destination operand is in memory, it needs to coincide
2638 with one of the source operands. After reload, it has to be
2639 the first source operand. */
2640 if (GET_CODE (operands[0]) == MEM)
2641 return rtx_equal_p (operands[0], operands[1])
2642 || (!reload_completed && rtx_equal_p (operands[0], operands[2]));
2643
2644 return true;
2645 }
2646
2647 /* Narrow logical operation CODE of memory operand MEMOP with immediate
2648 operand IMMOP to switch from SS to SI type instructions. */
2649
2650 void
2651 s390_narrow_logical_operator (enum rtx_code code, rtx *memop, rtx *immop)
2652 {
2653 int def = code == AND ? -1 : 0;
2654 HOST_WIDE_INT mask;
2655 int part;
2656
2657 gcc_assert (GET_CODE (*memop) == MEM);
2658 gcc_assert (!MEM_VOLATILE_P (*memop));
2659
2660 mask = s390_extract_part (*immop, QImode, def);
2661 part = s390_single_part (*immop, GET_MODE (*memop), QImode, def);
2662 gcc_assert (part >= 0);
2663
2664 *memop = adjust_address (*memop, QImode, part);
2665 *immop = gen_int_mode (mask, QImode);
2666 }
2667
2668
2669 /* How to allocate a 'struct machine_function'. */
2670
2671 static struct machine_function *
2672 s390_init_machine_status (void)
2673 {
2674 return ggc_cleared_alloc<machine_function> ();
2675 }
2676
2677 /* Map for smallest class containing reg regno. */
2678
2679 const enum reg_class regclass_map[FIRST_PSEUDO_REGISTER] =
2680 { GENERAL_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 0 */
2681 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 4 */
2682 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 8 */
2683 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 12 */
2684 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 16 */
2685 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 20 */
2686 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 24 */
2687 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 28 */
2688 ADDR_REGS, CC_REGS, ADDR_REGS, ADDR_REGS, /* 32 */
2689 ACCESS_REGS, ACCESS_REGS, VEC_REGS, VEC_REGS, /* 36 */
2690 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 40 */
2691 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 44 */
2692 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 48 */
2693 VEC_REGS, VEC_REGS /* 52 */
2694 };
2695
2696 /* Return attribute type of insn. */
2697
2698 static enum attr_type
2699 s390_safe_attr_type (rtx_insn *insn)
2700 {
2701 if (recog_memoized (insn) >= 0)
2702 return get_attr_type (insn);
2703 else
2704 return TYPE_NONE;
2705 }
2706
2707 /* Return true if DISP is a valid short displacement. */
2708
2709 static bool
2710 s390_short_displacement (rtx disp)
2711 {
2712 /* No displacement is OK. */
2713 if (!disp)
2714 return true;
2715
2716 /* Without the long displacement facility we don't need to
2717 distingiush between long and short displacement. */
2718 if (!TARGET_LONG_DISPLACEMENT)
2719 return true;
2720
2721 /* Integer displacement in range. */
2722 if (GET_CODE (disp) == CONST_INT)
2723 return INTVAL (disp) >= 0 && INTVAL (disp) < 4096;
2724
2725 /* GOT offset is not OK, the GOT can be large. */
2726 if (GET_CODE (disp) == CONST
2727 && GET_CODE (XEXP (disp, 0)) == UNSPEC
2728 && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOT
2729 || XINT (XEXP (disp, 0), 1) == UNSPEC_GOTNTPOFF))
2730 return false;
2731
2732 /* All other symbolic constants are literal pool references,
2733 which are OK as the literal pool must be small. */
2734 if (GET_CODE (disp) == CONST)
2735 return true;
2736
2737 return false;
2738 }
2739
2740 /* Decompose a RTL expression ADDR for a memory address into
2741 its components, returned in OUT.
2742
2743 Returns false if ADDR is not a valid memory address, true
2744 otherwise. If OUT is NULL, don't return the components,
2745 but check for validity only.
2746
2747 Note: Only addresses in canonical form are recognized.
2748 LEGITIMIZE_ADDRESS should convert non-canonical forms to the
2749 canonical form so that they will be recognized. */
2750
2751 static int
2752 s390_decompose_address (rtx addr, struct s390_address *out)
2753 {
2754 HOST_WIDE_INT offset = 0;
2755 rtx base = NULL_RTX;
2756 rtx indx = NULL_RTX;
2757 rtx disp = NULL_RTX;
2758 rtx orig_disp;
2759 bool pointer = false;
2760 bool base_ptr = false;
2761 bool indx_ptr = false;
2762 bool literal_pool = false;
2763
2764 /* We may need to substitute the literal pool base register into the address
2765 below. However, at this point we do not know which register is going to
2766 be used as base, so we substitute the arg pointer register. This is going
2767 to be treated as holding a pointer below -- it shouldn't be used for any
2768 other purpose. */
2769 rtx fake_pool_base = gen_rtx_REG (Pmode, ARG_POINTER_REGNUM);
2770
2771 /* Decompose address into base + index + displacement. */
2772
2773 if (GET_CODE (addr) == REG || GET_CODE (addr) == UNSPEC)
2774 base = addr;
2775
2776 else if (GET_CODE (addr) == PLUS)
2777 {
2778 rtx op0 = XEXP (addr, 0);
2779 rtx op1 = XEXP (addr, 1);
2780 enum rtx_code code0 = GET_CODE (op0);
2781 enum rtx_code code1 = GET_CODE (op1);
2782
2783 if (code0 == REG || code0 == UNSPEC)
2784 {
2785 if (code1 == REG || code1 == UNSPEC)
2786 {
2787 indx = op0; /* index + base */
2788 base = op1;
2789 }
2790
2791 else
2792 {
2793 base = op0; /* base + displacement */
2794 disp = op1;
2795 }
2796 }
2797
2798 else if (code0 == PLUS)
2799 {
2800 indx = XEXP (op0, 0); /* index + base + disp */
2801 base = XEXP (op0, 1);
2802 disp = op1;
2803 }
2804
2805 else
2806 {
2807 return false;
2808 }
2809 }
2810
2811 else
2812 disp = addr; /* displacement */
2813
2814 /* Extract integer part of displacement. */
2815 orig_disp = disp;
2816 if (disp)
2817 {
2818 if (GET_CODE (disp) == CONST_INT)
2819 {
2820 offset = INTVAL (disp);
2821 disp = NULL_RTX;
2822 }
2823 else if (GET_CODE (disp) == CONST
2824 && GET_CODE (XEXP (disp, 0)) == PLUS
2825 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
2826 {
2827 offset = INTVAL (XEXP (XEXP (disp, 0), 1));
2828 disp = XEXP (XEXP (disp, 0), 0);
2829 }
2830 }
2831
2832 /* Strip off CONST here to avoid special case tests later. */
2833 if (disp && GET_CODE (disp) == CONST)
2834 disp = XEXP (disp, 0);
2835
2836 /* We can convert literal pool addresses to
2837 displacements by basing them off the base register. */
2838 if (disp && GET_CODE (disp) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (disp))
2839 {
2840 /* Either base or index must be free to hold the base register. */
2841 if (!base)
2842 base = fake_pool_base, literal_pool = true;
2843 else if (!indx)
2844 indx = fake_pool_base, literal_pool = true;
2845 else
2846 return false;
2847
2848 /* Mark up the displacement. */
2849 disp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, disp),
2850 UNSPEC_LTREL_OFFSET);
2851 }
2852
2853 /* Validate base register. */
2854 if (base)
2855 {
2856 if (GET_CODE (base) == UNSPEC)
2857 switch (XINT (base, 1))
2858 {
2859 case UNSPEC_LTREF:
2860 if (!disp)
2861 disp = gen_rtx_UNSPEC (Pmode,
2862 gen_rtvec (1, XVECEXP (base, 0, 0)),
2863 UNSPEC_LTREL_OFFSET);
2864 else
2865 return false;
2866
2867 base = XVECEXP (base, 0, 1);
2868 break;
2869
2870 case UNSPEC_LTREL_BASE:
2871 if (XVECLEN (base, 0) == 1)
2872 base = fake_pool_base, literal_pool = true;
2873 else
2874 base = XVECEXP (base, 0, 1);
2875 break;
2876
2877 default:
2878 return false;
2879 }
2880
2881 if (!REG_P (base) || GET_MODE (base) != Pmode)
2882 return false;
2883
2884 if (REGNO (base) == STACK_POINTER_REGNUM
2885 || REGNO (base) == FRAME_POINTER_REGNUM
2886 || ((reload_completed || reload_in_progress)
2887 && frame_pointer_needed
2888 && REGNO (base) == HARD_FRAME_POINTER_REGNUM)
2889 || REGNO (base) == ARG_POINTER_REGNUM
2890 || (flag_pic
2891 && REGNO (base) == PIC_OFFSET_TABLE_REGNUM))
2892 pointer = base_ptr = true;
2893
2894 if ((reload_completed || reload_in_progress)
2895 && base == cfun->machine->base_reg)
2896 pointer = base_ptr = literal_pool = true;
2897 }
2898
2899 /* Validate index register. */
2900 if (indx)
2901 {
2902 if (GET_CODE (indx) == UNSPEC)
2903 switch (XINT (indx, 1))
2904 {
2905 case UNSPEC_LTREF:
2906 if (!disp)
2907 disp = gen_rtx_UNSPEC (Pmode,
2908 gen_rtvec (1, XVECEXP (indx, 0, 0)),
2909 UNSPEC_LTREL_OFFSET);
2910 else
2911 return false;
2912
2913 indx = XVECEXP (indx, 0, 1);
2914 break;
2915
2916 case UNSPEC_LTREL_BASE:
2917 if (XVECLEN (indx, 0) == 1)
2918 indx = fake_pool_base, literal_pool = true;
2919 else
2920 indx = XVECEXP (indx, 0, 1);
2921 break;
2922
2923 default:
2924 return false;
2925 }
2926
2927 if (!REG_P (indx) || GET_MODE (indx) != Pmode)
2928 return false;
2929
2930 if (REGNO (indx) == STACK_POINTER_REGNUM
2931 || REGNO (indx) == FRAME_POINTER_REGNUM
2932 || ((reload_completed || reload_in_progress)
2933 && frame_pointer_needed
2934 && REGNO (indx) == HARD_FRAME_POINTER_REGNUM)
2935 || REGNO (indx) == ARG_POINTER_REGNUM
2936 || (flag_pic
2937 && REGNO (indx) == PIC_OFFSET_TABLE_REGNUM))
2938 pointer = indx_ptr = true;
2939
2940 if ((reload_completed || reload_in_progress)
2941 && indx == cfun->machine->base_reg)
2942 pointer = indx_ptr = literal_pool = true;
2943 }
2944
2945 /* Prefer to use pointer as base, not index. */
2946 if (base && indx && !base_ptr
2947 && (indx_ptr || (!REG_POINTER (base) && REG_POINTER (indx))))
2948 {
2949 rtx tmp = base;
2950 base = indx;
2951 indx = tmp;
2952 }
2953
2954 /* Validate displacement. */
2955 if (!disp)
2956 {
2957 /* If virtual registers are involved, the displacement will change later
2958 anyway as the virtual registers get eliminated. This could make a
2959 valid displacement invalid, but it is more likely to make an invalid
2960 displacement valid, because we sometimes access the register save area
2961 via negative offsets to one of those registers.
2962 Thus we don't check the displacement for validity here. If after
2963 elimination the displacement turns out to be invalid after all,
2964 this is fixed up by reload in any case. */
2965 /* LRA maintains always displacements up to date and we need to
2966 know the displacement is right during all LRA not only at the
2967 final elimination. */
2968 if (lra_in_progress
2969 || (base != arg_pointer_rtx
2970 && indx != arg_pointer_rtx
2971 && base != return_address_pointer_rtx
2972 && indx != return_address_pointer_rtx
2973 && base != frame_pointer_rtx
2974 && indx != frame_pointer_rtx
2975 && base != virtual_stack_vars_rtx
2976 && indx != virtual_stack_vars_rtx))
2977 if (!DISP_IN_RANGE (offset))
2978 return false;
2979 }
2980 else
2981 {
2982 /* All the special cases are pointers. */
2983 pointer = true;
2984
2985 /* In the small-PIC case, the linker converts @GOT
2986 and @GOTNTPOFF offsets to possible displacements. */
2987 if (GET_CODE (disp) == UNSPEC
2988 && (XINT (disp, 1) == UNSPEC_GOT
2989 || XINT (disp, 1) == UNSPEC_GOTNTPOFF)
2990 && flag_pic == 1)
2991 {
2992 ;
2993 }
2994
2995 /* Accept pool label offsets. */
2996 else if (GET_CODE (disp) == UNSPEC
2997 && XINT (disp, 1) == UNSPEC_POOL_OFFSET)
2998 ;
2999
3000 /* Accept literal pool references. */
3001 else if (GET_CODE (disp) == UNSPEC
3002 && XINT (disp, 1) == UNSPEC_LTREL_OFFSET)
3003 {
3004 /* In case CSE pulled a non literal pool reference out of
3005 the pool we have to reject the address. This is
3006 especially important when loading the GOT pointer on non
3007 zarch CPUs. In this case the literal pool contains an lt
3008 relative offset to the _GLOBAL_OFFSET_TABLE_ label which
3009 will most likely exceed the displacement. */
3010 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
3011 || !CONSTANT_POOL_ADDRESS_P (XVECEXP (disp, 0, 0)))
3012 return false;
3013
3014 orig_disp = gen_rtx_CONST (Pmode, disp);
3015 if (offset)
3016 {
3017 /* If we have an offset, make sure it does not
3018 exceed the size of the constant pool entry. */
3019 rtx sym = XVECEXP (disp, 0, 0);
3020 if (offset >= GET_MODE_SIZE (get_pool_mode (sym)))
3021 return false;
3022
3023 orig_disp = plus_constant (Pmode, orig_disp, offset);
3024 }
3025 }
3026
3027 else
3028 return false;
3029 }
3030
3031 if (!base && !indx)
3032 pointer = true;
3033
3034 if (out)
3035 {
3036 out->base = base;
3037 out->indx = indx;
3038 out->disp = orig_disp;
3039 out->pointer = pointer;
3040 out->literal_pool = literal_pool;
3041 }
3042
3043 return true;
3044 }
3045
3046 /* Decompose a RTL expression OP for an address style operand into its
3047 components, and return the base register in BASE and the offset in
3048 OFFSET. While OP looks like an address it is never supposed to be
3049 used as such.
3050
3051 Return true if OP is a valid address operand, false if not. */
3052
3053 bool
3054 s390_decompose_addrstyle_without_index (rtx op, rtx *base,
3055 HOST_WIDE_INT *offset)
3056 {
3057 rtx off = NULL_RTX;
3058
3059 /* We can have an integer constant, an address register,
3060 or a sum of the two. */
3061 if (CONST_SCALAR_INT_P (op))
3062 {
3063 off = op;
3064 op = NULL_RTX;
3065 }
3066 if (op && GET_CODE (op) == PLUS && CONST_SCALAR_INT_P (XEXP (op, 1)))
3067 {
3068 off = XEXP (op, 1);
3069 op = XEXP (op, 0);
3070 }
3071 while (op && GET_CODE (op) == SUBREG)
3072 op = SUBREG_REG (op);
3073
3074 if (op && GET_CODE (op) != REG)
3075 return false;
3076
3077 if (offset)
3078 {
3079 if (off == NULL_RTX)
3080 *offset = 0;
3081 else if (CONST_INT_P (off))
3082 *offset = INTVAL (off);
3083 else if (CONST_WIDE_INT_P (off))
3084 /* The offset will anyway be cut down to 12 bits so take just
3085 the lowest order chunk of the wide int. */
3086 *offset = CONST_WIDE_INT_ELT (off, 0);
3087 else
3088 gcc_unreachable ();
3089 }
3090 if (base)
3091 *base = op;
3092
3093 return true;
3094 }
3095
3096
3097 /* Return true if CODE is a valid address without index. */
3098
3099 bool
3100 s390_legitimate_address_without_index_p (rtx op)
3101 {
3102 struct s390_address addr;
3103
3104 if (!s390_decompose_address (XEXP (op, 0), &addr))
3105 return false;
3106 if (addr.indx)
3107 return false;
3108
3109 return true;
3110 }
3111
3112
3113 /* Return TRUE if ADDR is an operand valid for a load/store relative
3114 instruction. Be aware that the alignment of the operand needs to
3115 be checked separately.
3116 Valid addresses are single references or a sum of a reference and a
3117 constant integer. Return these parts in SYMREF and ADDEND. You can
3118 pass NULL in REF and/or ADDEND if you are not interested in these
3119 values. Literal pool references are *not* considered symbol
3120 references. */
3121
3122 static bool
3123 s390_loadrelative_operand_p (rtx addr, rtx *symref, HOST_WIDE_INT *addend)
3124 {
3125 HOST_WIDE_INT tmpaddend = 0;
3126
3127 if (GET_CODE (addr) == CONST)
3128 addr = XEXP (addr, 0);
3129
3130 if (GET_CODE (addr) == PLUS)
3131 {
3132 if (!CONST_INT_P (XEXP (addr, 1)))
3133 return false;
3134
3135 tmpaddend = INTVAL (XEXP (addr, 1));
3136 addr = XEXP (addr, 0);
3137 }
3138
3139 if ((GET_CODE (addr) == SYMBOL_REF && !CONSTANT_POOL_ADDRESS_P (addr))
3140 || (GET_CODE (addr) == UNSPEC
3141 && (XINT (addr, 1) == UNSPEC_GOTENT
3142 || (TARGET_CPU_ZARCH && XINT (addr, 1) == UNSPEC_PLT))))
3143 {
3144 if (symref)
3145 *symref = addr;
3146 if (addend)
3147 *addend = tmpaddend;
3148
3149 return true;
3150 }
3151 return false;
3152 }
3153
3154 /* Return true if the address in OP is valid for constraint letter C
3155 if wrapped in a MEM rtx. Set LIT_POOL_OK to true if it literal
3156 pool MEMs should be accepted. Only the Q, R, S, T constraint
3157 letters are allowed for C. */
3158
3159 static int
3160 s390_check_qrst_address (char c, rtx op, bool lit_pool_ok)
3161 {
3162 struct s390_address addr;
3163 bool decomposed = false;
3164
3165 /* This check makes sure that no symbolic address (except literal
3166 pool references) are accepted by the R or T constraints. */
3167 if (s390_loadrelative_operand_p (op, NULL, NULL))
3168 return 0;
3169
3170 /* Ensure literal pool references are only accepted if LIT_POOL_OK. */
3171 if (!lit_pool_ok)
3172 {
3173 if (!s390_decompose_address (op, &addr))
3174 return 0;
3175 if (addr.literal_pool)
3176 return 0;
3177 decomposed = true;
3178 }
3179
3180 /* With reload, we sometimes get intermediate address forms that are
3181 actually invalid as-is, but we need to accept them in the most
3182 generic cases below ('R' or 'T'), since reload will in fact fix
3183 them up. LRA behaves differently here; we never see such forms,
3184 but on the other hand, we need to strictly reject every invalid
3185 address form. Perform this check right up front. */
3186 if (lra_in_progress)
3187 {
3188 if (!decomposed && !s390_decompose_address (op, &addr))
3189 return 0;
3190 decomposed = true;
3191 }
3192
3193 switch (c)
3194 {
3195 case 'Q': /* no index short displacement */
3196 if (!decomposed && !s390_decompose_address (op, &addr))
3197 return 0;
3198 if (addr.indx)
3199 return 0;
3200 if (!s390_short_displacement (addr.disp))
3201 return 0;
3202 break;
3203
3204 case 'R': /* with index short displacement */
3205 if (TARGET_LONG_DISPLACEMENT)
3206 {
3207 if (!decomposed && !s390_decompose_address (op, &addr))
3208 return 0;
3209 if (!s390_short_displacement (addr.disp))
3210 return 0;
3211 }
3212 /* Any invalid address here will be fixed up by reload,
3213 so accept it for the most generic constraint. */
3214 break;
3215
3216 case 'S': /* no index long displacement */
3217 if (!decomposed && !s390_decompose_address (op, &addr))
3218 return 0;
3219 if (addr.indx)
3220 return 0;
3221 break;
3222
3223 case 'T': /* with index long displacement */
3224 /* Any invalid address here will be fixed up by reload,
3225 so accept it for the most generic constraint. */
3226 break;
3227
3228 default:
3229 return 0;
3230 }
3231 return 1;
3232 }
3233
3234
3235 /* Evaluates constraint strings described by the regular expression
3236 ([A|B|Z](Q|R|S|T))|Y and returns 1 if OP is a valid operand for
3237 the constraint given in STR, or 0 else. */
3238
3239 int
3240 s390_mem_constraint (const char *str, rtx op)
3241 {
3242 char c = str[0];
3243
3244 switch (c)
3245 {
3246 case 'A':
3247 /* Check for offsettable variants of memory constraints. */
3248 if (!MEM_P (op) || MEM_VOLATILE_P (op))
3249 return 0;
3250 if ((reload_completed || reload_in_progress)
3251 ? !offsettable_memref_p (op) : !offsettable_nonstrict_memref_p (op))
3252 return 0;
3253 return s390_check_qrst_address (str[1], XEXP (op, 0), true);
3254 case 'B':
3255 /* Check for non-literal-pool variants of memory constraints. */
3256 if (!MEM_P (op))
3257 return 0;
3258 return s390_check_qrst_address (str[1], XEXP (op, 0), false);
3259 case 'Q':
3260 case 'R':
3261 case 'S':
3262 case 'T':
3263 if (GET_CODE (op) != MEM)
3264 return 0;
3265 return s390_check_qrst_address (c, XEXP (op, 0), true);
3266 case 'Y':
3267 /* Simply check for the basic form of a shift count. Reload will
3268 take care of making sure we have a proper base register. */
3269 if (!s390_decompose_addrstyle_without_index (op, NULL, NULL))
3270 return 0;
3271 break;
3272 case 'Z':
3273 return s390_check_qrst_address (str[1], op, true);
3274 default:
3275 return 0;
3276 }
3277 return 1;
3278 }
3279
3280
3281 /* Evaluates constraint strings starting with letter O. Input
3282 parameter C is the second letter following the "O" in the constraint
3283 string. Returns 1 if VALUE meets the respective constraint and 0
3284 otherwise. */
3285
3286 int
3287 s390_O_constraint_str (const char c, HOST_WIDE_INT value)
3288 {
3289 if (!TARGET_EXTIMM)
3290 return 0;
3291
3292 switch (c)
3293 {
3294 case 's':
3295 return trunc_int_for_mode (value, SImode) == value;
3296
3297 case 'p':
3298 return value == 0
3299 || s390_single_part (GEN_INT (value), DImode, SImode, 0) == 1;
3300
3301 case 'n':
3302 return s390_single_part (GEN_INT (value - 1), DImode, SImode, -1) == 1;
3303
3304 default:
3305 gcc_unreachable ();
3306 }
3307 }
3308
3309
3310 /* Evaluates constraint strings starting with letter N. Parameter STR
3311 contains the letters following letter "N" in the constraint string.
3312 Returns true if VALUE matches the constraint. */
3313
3314 int
3315 s390_N_constraint_str (const char *str, HOST_WIDE_INT value)
3316 {
3317 machine_mode mode, part_mode;
3318 int def;
3319 int part, part_goal;
3320
3321
3322 if (str[0] == 'x')
3323 part_goal = -1;
3324 else
3325 part_goal = str[0] - '0';
3326
3327 switch (str[1])
3328 {
3329 case 'Q':
3330 part_mode = QImode;
3331 break;
3332 case 'H':
3333 part_mode = HImode;
3334 break;
3335 case 'S':
3336 part_mode = SImode;
3337 break;
3338 default:
3339 return 0;
3340 }
3341
3342 switch (str[2])
3343 {
3344 case 'H':
3345 mode = HImode;
3346 break;
3347 case 'S':
3348 mode = SImode;
3349 break;
3350 case 'D':
3351 mode = DImode;
3352 break;
3353 default:
3354 return 0;
3355 }
3356
3357 switch (str[3])
3358 {
3359 case '0':
3360 def = 0;
3361 break;
3362 case 'F':
3363 def = -1;
3364 break;
3365 default:
3366 return 0;
3367 }
3368
3369 if (GET_MODE_SIZE (mode) <= GET_MODE_SIZE (part_mode))
3370 return 0;
3371
3372 part = s390_single_part (GEN_INT (value), mode, part_mode, def);
3373 if (part < 0)
3374 return 0;
3375 if (part_goal != -1 && part_goal != part)
3376 return 0;
3377
3378 return 1;
3379 }
3380
3381
3382 /* Returns true if the input parameter VALUE is a float zero. */
3383
3384 int
3385 s390_float_const_zero_p (rtx value)
3386 {
3387 return (GET_MODE_CLASS (GET_MODE (value)) == MODE_FLOAT
3388 && value == CONST0_RTX (GET_MODE (value)));
3389 }
3390
3391 /* Implement TARGET_REGISTER_MOVE_COST. */
3392
3393 static int
3394 s390_register_move_cost (machine_mode mode,
3395 reg_class_t from, reg_class_t to)
3396 {
3397 /* On s390, copy between fprs and gprs is expensive. */
3398
3399 /* It becomes somewhat faster having ldgr/lgdr. */
3400 if (TARGET_Z10 && GET_MODE_SIZE (mode) == 8)
3401 {
3402 /* ldgr is single cycle. */
3403 if (reg_classes_intersect_p (from, GENERAL_REGS)
3404 && reg_classes_intersect_p (to, FP_REGS))
3405 return 1;
3406 /* lgdr needs 3 cycles. */
3407 if (reg_classes_intersect_p (to, GENERAL_REGS)
3408 && reg_classes_intersect_p (from, FP_REGS))
3409 return 3;
3410 }
3411
3412 /* Otherwise copying is done via memory. */
3413 if ((reg_classes_intersect_p (from, GENERAL_REGS)
3414 && reg_classes_intersect_p (to, FP_REGS))
3415 || (reg_classes_intersect_p (from, FP_REGS)
3416 && reg_classes_intersect_p (to, GENERAL_REGS)))
3417 return 10;
3418
3419 return 1;
3420 }
3421
3422 /* Implement TARGET_MEMORY_MOVE_COST. */
3423
3424 static int
3425 s390_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
3426 reg_class_t rclass ATTRIBUTE_UNUSED,
3427 bool in ATTRIBUTE_UNUSED)
3428 {
3429 return 2;
3430 }
3431
3432 /* Compute a (partial) cost for rtx X. Return true if the complete
3433 cost has been computed, and false if subexpressions should be
3434 scanned. In either case, *TOTAL contains the cost result. The
3435 initial value of *TOTAL is the default value computed by
3436 rtx_cost. It may be left unmodified. OUTER_CODE contains the
3437 code of the superexpression of x. */
3438
3439 static bool
3440 s390_rtx_costs (rtx x, machine_mode mode, int outer_code,
3441 int opno ATTRIBUTE_UNUSED,
3442 int *total, bool speed ATTRIBUTE_UNUSED)
3443 {
3444 int code = GET_CODE (x);
3445 switch (code)
3446 {
3447 case CONST:
3448 case CONST_INT:
3449 case LABEL_REF:
3450 case SYMBOL_REF:
3451 case CONST_DOUBLE:
3452 case CONST_WIDE_INT:
3453 case MEM:
3454 *total = 0;
3455 return true;
3456
3457 case IOR:
3458 /* risbg */
3459 if (GET_CODE (XEXP (x, 0)) == AND
3460 && GET_CODE (XEXP (x, 1)) == ASHIFT
3461 && REG_P (XEXP (XEXP (x, 0), 0))
3462 && REG_P (XEXP (XEXP (x, 1), 0))
3463 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3464 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3465 && (UINTVAL (XEXP (XEXP (x, 0), 1)) ==
3466 (1UL << UINTVAL (XEXP (XEXP (x, 1), 1))) - 1))
3467 {
3468 *total = COSTS_N_INSNS (2);
3469 return true;
3470 }
3471 /* fallthrough */
3472 case ASHIFT:
3473 case ASHIFTRT:
3474 case LSHIFTRT:
3475 case ROTATE:
3476 case ROTATERT:
3477 case AND:
3478 case XOR:
3479 case NEG:
3480 case NOT:
3481 *total = COSTS_N_INSNS (1);
3482 return false;
3483
3484 case PLUS:
3485 case MINUS:
3486 *total = COSTS_N_INSNS (1);
3487 return false;
3488
3489 case MULT:
3490 switch (mode)
3491 {
3492 case SImode:
3493 {
3494 rtx left = XEXP (x, 0);
3495 rtx right = XEXP (x, 1);
3496 if (GET_CODE (right) == CONST_INT
3497 && CONST_OK_FOR_K (INTVAL (right)))
3498 *total = s390_cost->mhi;
3499 else if (GET_CODE (left) == SIGN_EXTEND)
3500 *total = s390_cost->mh;
3501 else
3502 *total = s390_cost->ms; /* msr, ms, msy */
3503 break;
3504 }
3505 case DImode:
3506 {
3507 rtx left = XEXP (x, 0);
3508 rtx right = XEXP (x, 1);
3509 if (TARGET_ZARCH)
3510 {
3511 if (GET_CODE (right) == CONST_INT
3512 && CONST_OK_FOR_K (INTVAL (right)))
3513 *total = s390_cost->mghi;
3514 else if (GET_CODE (left) == SIGN_EXTEND)
3515 *total = s390_cost->msgf;
3516 else
3517 *total = s390_cost->msg; /* msgr, msg */
3518 }
3519 else /* TARGET_31BIT */
3520 {
3521 if (GET_CODE (left) == SIGN_EXTEND
3522 && GET_CODE (right) == SIGN_EXTEND)
3523 /* mulsidi case: mr, m */
3524 *total = s390_cost->m;
3525 else if (GET_CODE (left) == ZERO_EXTEND
3526 && GET_CODE (right) == ZERO_EXTEND
3527 && TARGET_CPU_ZARCH)
3528 /* umulsidi case: ml, mlr */
3529 *total = s390_cost->ml;
3530 else
3531 /* Complex calculation is required. */
3532 *total = COSTS_N_INSNS (40);
3533 }
3534 break;
3535 }
3536 case SFmode:
3537 case DFmode:
3538 *total = s390_cost->mult_df;
3539 break;
3540 case TFmode:
3541 *total = s390_cost->mxbr;
3542 break;
3543 default:
3544 return false;
3545 }
3546 return false;
3547
3548 case FMA:
3549 switch (mode)
3550 {
3551 case DFmode:
3552 *total = s390_cost->madbr;
3553 break;
3554 case SFmode:
3555 *total = s390_cost->maebr;
3556 break;
3557 default:
3558 return false;
3559 }
3560 /* Negate in the third argument is free: FMSUB. */
3561 if (GET_CODE (XEXP (x, 2)) == NEG)
3562 {
3563 *total += (rtx_cost (XEXP (x, 0), mode, FMA, 0, speed)
3564 + rtx_cost (XEXP (x, 1), mode, FMA, 1, speed)
3565 + rtx_cost (XEXP (XEXP (x, 2), 0), mode, FMA, 2, speed));
3566 return true;
3567 }
3568 return false;
3569
3570 case UDIV:
3571 case UMOD:
3572 if (mode == TImode) /* 128 bit division */
3573 *total = s390_cost->dlgr;
3574 else if (mode == DImode)
3575 {
3576 rtx right = XEXP (x, 1);
3577 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
3578 *total = s390_cost->dlr;
3579 else /* 64 by 64 bit division */
3580 *total = s390_cost->dlgr;
3581 }
3582 else if (mode == SImode) /* 32 bit division */
3583 *total = s390_cost->dlr;
3584 return false;
3585
3586 case DIV:
3587 case MOD:
3588 if (mode == DImode)
3589 {
3590 rtx right = XEXP (x, 1);
3591 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
3592 if (TARGET_ZARCH)
3593 *total = s390_cost->dsgfr;
3594 else
3595 *total = s390_cost->dr;
3596 else /* 64 by 64 bit division */
3597 *total = s390_cost->dsgr;
3598 }
3599 else if (mode == SImode) /* 32 bit division */
3600 *total = s390_cost->dlr;
3601 else if (mode == SFmode)
3602 {
3603 *total = s390_cost->debr;
3604 }
3605 else if (mode == DFmode)
3606 {
3607 *total = s390_cost->ddbr;
3608 }
3609 else if (mode == TFmode)
3610 {
3611 *total = s390_cost->dxbr;
3612 }
3613 return false;
3614
3615 case SQRT:
3616 if (mode == SFmode)
3617 *total = s390_cost->sqebr;
3618 else if (mode == DFmode)
3619 *total = s390_cost->sqdbr;
3620 else /* TFmode */
3621 *total = s390_cost->sqxbr;
3622 return false;
3623
3624 case SIGN_EXTEND:
3625 case ZERO_EXTEND:
3626 if (outer_code == MULT || outer_code == DIV || outer_code == MOD
3627 || outer_code == PLUS || outer_code == MINUS
3628 || outer_code == COMPARE)
3629 *total = 0;
3630 return false;
3631
3632 case COMPARE:
3633 *total = COSTS_N_INSNS (1);
3634 if (GET_CODE (XEXP (x, 0)) == AND
3635 && GET_CODE (XEXP (x, 1)) == CONST_INT
3636 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
3637 {
3638 rtx op0 = XEXP (XEXP (x, 0), 0);
3639 rtx op1 = XEXP (XEXP (x, 0), 1);
3640 rtx op2 = XEXP (x, 1);
3641
3642 if (memory_operand (op0, GET_MODE (op0))
3643 && s390_tm_ccmode (op1, op2, 0) != VOIDmode)
3644 return true;
3645 if (register_operand (op0, GET_MODE (op0))
3646 && s390_tm_ccmode (op1, op2, 1) != VOIDmode)
3647 return true;
3648 }
3649 return false;
3650
3651 default:
3652 return false;
3653 }
3654 }
3655
3656 /* Return the cost of an address rtx ADDR. */
3657
3658 static int
3659 s390_address_cost (rtx addr, machine_mode mode ATTRIBUTE_UNUSED,
3660 addr_space_t as ATTRIBUTE_UNUSED,
3661 bool speed ATTRIBUTE_UNUSED)
3662 {
3663 struct s390_address ad;
3664 if (!s390_decompose_address (addr, &ad))
3665 return 1000;
3666
3667 return ad.indx? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (1);
3668 }
3669
3670 /* If OP is a SYMBOL_REF of a thread-local symbol, return its TLS mode,
3671 otherwise return 0. */
3672
3673 int
3674 tls_symbolic_operand (rtx op)
3675 {
3676 if (GET_CODE (op) != SYMBOL_REF)
3677 return 0;
3678 return SYMBOL_REF_TLS_MODEL (op);
3679 }
3680 \f
3681 /* Split DImode access register reference REG (on 64-bit) into its constituent
3682 low and high parts, and store them into LO and HI. Note that gen_lowpart/
3683 gen_highpart cannot be used as they assume all registers are word-sized,
3684 while our access registers have only half that size. */
3685
3686 void
3687 s390_split_access_reg (rtx reg, rtx *lo, rtx *hi)
3688 {
3689 gcc_assert (TARGET_64BIT);
3690 gcc_assert (ACCESS_REG_P (reg));
3691 gcc_assert (GET_MODE (reg) == DImode);
3692 gcc_assert (!(REGNO (reg) & 1));
3693
3694 *lo = gen_rtx_REG (SImode, REGNO (reg) + 1);
3695 *hi = gen_rtx_REG (SImode, REGNO (reg));
3696 }
3697
3698 /* Return true if OP contains a symbol reference */
3699
3700 bool
3701 symbolic_reference_mentioned_p (rtx op)
3702 {
3703 const char *fmt;
3704 int i;
3705
3706 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3707 return 1;
3708
3709 fmt = GET_RTX_FORMAT (GET_CODE (op));
3710 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3711 {
3712 if (fmt[i] == 'E')
3713 {
3714 int j;
3715
3716 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3717 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3718 return 1;
3719 }
3720
3721 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3722 return 1;
3723 }
3724
3725 return 0;
3726 }
3727
3728 /* Return true if OP contains a reference to a thread-local symbol. */
3729
3730 bool
3731 tls_symbolic_reference_mentioned_p (rtx op)
3732 {
3733 const char *fmt;
3734 int i;
3735
3736 if (GET_CODE (op) == SYMBOL_REF)
3737 return tls_symbolic_operand (op);
3738
3739 fmt = GET_RTX_FORMAT (GET_CODE (op));
3740 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3741 {
3742 if (fmt[i] == 'E')
3743 {
3744 int j;
3745
3746 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3747 if (tls_symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3748 return true;
3749 }
3750
3751 else if (fmt[i] == 'e' && tls_symbolic_reference_mentioned_p (XEXP (op, i)))
3752 return true;
3753 }
3754
3755 return false;
3756 }
3757
3758
3759 /* Return true if OP is a legitimate general operand when
3760 generating PIC code. It is given that flag_pic is on
3761 and that OP satisfies CONSTANT_P. */
3762
3763 int
3764 legitimate_pic_operand_p (rtx op)
3765 {
3766 /* Accept all non-symbolic constants. */
3767 if (!SYMBOLIC_CONST (op))
3768 return 1;
3769
3770 /* Reject everything else; must be handled
3771 via emit_symbolic_move. */
3772 return 0;
3773 }
3774
3775 /* Returns true if the constant value OP is a legitimate general operand.
3776 It is given that OP satisfies CONSTANT_P. */
3777
3778 static bool
3779 s390_legitimate_constant_p (machine_mode mode, rtx op)
3780 {
3781 if (TARGET_VX && VECTOR_MODE_P (mode) && GET_CODE (op) == CONST_VECTOR)
3782 {
3783 if (GET_MODE_SIZE (mode) != 16)
3784 return 0;
3785
3786 if (!satisfies_constraint_j00 (op)
3787 && !satisfies_constraint_jm1 (op)
3788 && !satisfies_constraint_jKK (op)
3789 && !satisfies_constraint_jxx (op)
3790 && !satisfies_constraint_jyy (op))
3791 return 0;
3792 }
3793
3794 /* Accept all non-symbolic constants. */
3795 if (!SYMBOLIC_CONST (op))
3796 return 1;
3797
3798 /* Accept immediate LARL operands. */
3799 if (TARGET_CPU_ZARCH && larl_operand (op, mode))
3800 return 1;
3801
3802 /* Thread-local symbols are never legal constants. This is
3803 so that emit_call knows that computing such addresses
3804 might require a function call. */
3805 if (TLS_SYMBOLIC_CONST (op))
3806 return 0;
3807
3808 /* In the PIC case, symbolic constants must *not* be
3809 forced into the literal pool. We accept them here,
3810 so that they will be handled by emit_symbolic_move. */
3811 if (flag_pic)
3812 return 1;
3813
3814 /* All remaining non-PIC symbolic constants are
3815 forced into the literal pool. */
3816 return 0;
3817 }
3818
3819 /* Determine if it's legal to put X into the constant pool. This
3820 is not possible if X contains the address of a symbol that is
3821 not constant (TLS) or not known at final link time (PIC). */
3822
3823 static bool
3824 s390_cannot_force_const_mem (machine_mode mode, rtx x)
3825 {
3826 switch (GET_CODE (x))
3827 {
3828 case CONST_INT:
3829 case CONST_DOUBLE:
3830 case CONST_WIDE_INT:
3831 case CONST_VECTOR:
3832 /* Accept all non-symbolic constants. */
3833 return false;
3834
3835 case LABEL_REF:
3836 /* Labels are OK iff we are non-PIC. */
3837 return flag_pic != 0;
3838
3839 case SYMBOL_REF:
3840 /* 'Naked' TLS symbol references are never OK,
3841 non-TLS symbols are OK iff we are non-PIC. */
3842 if (tls_symbolic_operand (x))
3843 return true;
3844 else
3845 return flag_pic != 0;
3846
3847 case CONST:
3848 return s390_cannot_force_const_mem (mode, XEXP (x, 0));
3849 case PLUS:
3850 case MINUS:
3851 return s390_cannot_force_const_mem (mode, XEXP (x, 0))
3852 || s390_cannot_force_const_mem (mode, XEXP (x, 1));
3853
3854 case UNSPEC:
3855 switch (XINT (x, 1))
3856 {
3857 /* Only lt-relative or GOT-relative UNSPECs are OK. */
3858 case UNSPEC_LTREL_OFFSET:
3859 case UNSPEC_GOT:
3860 case UNSPEC_GOTOFF:
3861 case UNSPEC_PLTOFF:
3862 case UNSPEC_TLSGD:
3863 case UNSPEC_TLSLDM:
3864 case UNSPEC_NTPOFF:
3865 case UNSPEC_DTPOFF:
3866 case UNSPEC_GOTNTPOFF:
3867 case UNSPEC_INDNTPOFF:
3868 return false;
3869
3870 /* If the literal pool shares the code section, be put
3871 execute template placeholders into the pool as well. */
3872 case UNSPEC_INSN:
3873 return TARGET_CPU_ZARCH;
3874
3875 default:
3876 return true;
3877 }
3878 break;
3879
3880 default:
3881 gcc_unreachable ();
3882 }
3883 }
3884
3885 /* Returns true if the constant value OP is a legitimate general
3886 operand during and after reload. The difference to
3887 legitimate_constant_p is that this function will not accept
3888 a constant that would need to be forced to the literal pool
3889 before it can be used as operand.
3890 This function accepts all constants which can be loaded directly
3891 into a GPR. */
3892
3893 bool
3894 legitimate_reload_constant_p (rtx op)
3895 {
3896 /* Accept la(y) operands. */
3897 if (GET_CODE (op) == CONST_INT
3898 && DISP_IN_RANGE (INTVAL (op)))
3899 return true;
3900
3901 /* Accept l(g)hi/l(g)fi operands. */
3902 if (GET_CODE (op) == CONST_INT
3903 && (CONST_OK_FOR_K (INTVAL (op)) || CONST_OK_FOR_Os (INTVAL (op))))
3904 return true;
3905
3906 /* Accept lliXX operands. */
3907 if (TARGET_ZARCH
3908 && GET_CODE (op) == CONST_INT
3909 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
3910 && s390_single_part (op, word_mode, HImode, 0) >= 0)
3911 return true;
3912
3913 if (TARGET_EXTIMM
3914 && GET_CODE (op) == CONST_INT
3915 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
3916 && s390_single_part (op, word_mode, SImode, 0) >= 0)
3917 return true;
3918
3919 /* Accept larl operands. */
3920 if (TARGET_CPU_ZARCH
3921 && larl_operand (op, VOIDmode))
3922 return true;
3923
3924 /* Accept floating-point zero operands that fit into a single GPR. */
3925 if (GET_CODE (op) == CONST_DOUBLE
3926 && s390_float_const_zero_p (op)
3927 && GET_MODE_SIZE (GET_MODE (op)) <= UNITS_PER_WORD)
3928 return true;
3929
3930 /* Accept double-word operands that can be split. */
3931 if (GET_CODE (op) == CONST_WIDE_INT
3932 || (GET_CODE (op) == CONST_INT
3933 && trunc_int_for_mode (INTVAL (op), word_mode) != INTVAL (op)))
3934 {
3935 machine_mode dword_mode = word_mode == SImode ? DImode : TImode;
3936 rtx hi = operand_subword (op, 0, 0, dword_mode);
3937 rtx lo = operand_subword (op, 1, 0, dword_mode);
3938 return legitimate_reload_constant_p (hi)
3939 && legitimate_reload_constant_p (lo);
3940 }
3941
3942 /* Everything else cannot be handled without reload. */
3943 return false;
3944 }
3945
3946 /* Returns true if the constant value OP is a legitimate fp operand
3947 during and after reload.
3948 This function accepts all constants which can be loaded directly
3949 into an FPR. */
3950
3951 static bool
3952 legitimate_reload_fp_constant_p (rtx op)
3953 {
3954 /* Accept floating-point zero operands if the load zero instruction
3955 can be used. Prior to z196 the load fp zero instruction caused a
3956 performance penalty if the result is used as BFP number. */
3957 if (TARGET_Z196
3958 && GET_CODE (op) == CONST_DOUBLE
3959 && s390_float_const_zero_p (op))
3960 return true;
3961
3962 return false;
3963 }
3964
3965 /* Returns true if the constant value OP is a legitimate vector operand
3966 during and after reload.
3967 This function accepts all constants which can be loaded directly
3968 into an VR. */
3969
3970 static bool
3971 legitimate_reload_vector_constant_p (rtx op)
3972 {
3973 if (TARGET_VX && GET_MODE_SIZE (GET_MODE (op)) == 16
3974 && (satisfies_constraint_j00 (op)
3975 || satisfies_constraint_jm1 (op)
3976 || satisfies_constraint_jKK (op)
3977 || satisfies_constraint_jxx (op)
3978 || satisfies_constraint_jyy (op)))
3979 return true;
3980
3981 return false;
3982 }
3983
3984 /* Given an rtx OP being reloaded into a reg required to be in class RCLASS,
3985 return the class of reg to actually use. */
3986
3987 static reg_class_t
3988 s390_preferred_reload_class (rtx op, reg_class_t rclass)
3989 {
3990 switch (GET_CODE (op))
3991 {
3992 /* Constants we cannot reload into general registers
3993 must be forced into the literal pool. */
3994 case CONST_VECTOR:
3995 case CONST_DOUBLE:
3996 case CONST_INT:
3997 case CONST_WIDE_INT:
3998 if (reg_class_subset_p (GENERAL_REGS, rclass)
3999 && legitimate_reload_constant_p (op))
4000 return GENERAL_REGS;
4001 else if (reg_class_subset_p (ADDR_REGS, rclass)
4002 && legitimate_reload_constant_p (op))
4003 return ADDR_REGS;
4004 else if (reg_class_subset_p (FP_REGS, rclass)
4005 && legitimate_reload_fp_constant_p (op))
4006 return FP_REGS;
4007 else if (reg_class_subset_p (VEC_REGS, rclass)
4008 && legitimate_reload_vector_constant_p (op))
4009 return VEC_REGS;
4010
4011 return NO_REGS;
4012
4013 /* If a symbolic constant or a PLUS is reloaded,
4014 it is most likely being used as an address, so
4015 prefer ADDR_REGS. If 'class' is not a superset
4016 of ADDR_REGS, e.g. FP_REGS, reject this reload. */
4017 case CONST:
4018 /* Symrefs cannot be pushed into the literal pool with -fPIC
4019 so we *MUST NOT* return NO_REGS for these cases
4020 (s390_cannot_force_const_mem will return true).
4021
4022 On the other hand we MUST return NO_REGS for symrefs with
4023 invalid addend which might have been pushed to the literal
4024 pool (no -fPIC). Usually we would expect them to be
4025 handled via secondary reload but this does not happen if
4026 they are used as literal pool slot replacement in reload
4027 inheritance (see emit_input_reload_insns). */
4028 if (TARGET_CPU_ZARCH
4029 && GET_CODE (XEXP (op, 0)) == PLUS
4030 && GET_CODE (XEXP (XEXP(op, 0), 0)) == SYMBOL_REF
4031 && GET_CODE (XEXP (XEXP(op, 0), 1)) == CONST_INT)
4032 {
4033 if (flag_pic && reg_class_subset_p (ADDR_REGS, rclass))
4034 return ADDR_REGS;
4035 else
4036 return NO_REGS;
4037 }
4038 /* fallthrough */
4039 case LABEL_REF:
4040 case SYMBOL_REF:
4041 if (!legitimate_reload_constant_p (op))
4042 return NO_REGS;
4043 /* fallthrough */
4044 case PLUS:
4045 /* load address will be used. */
4046 if (reg_class_subset_p (ADDR_REGS, rclass))
4047 return ADDR_REGS;
4048 else
4049 return NO_REGS;
4050
4051 default:
4052 break;
4053 }
4054
4055 return rclass;
4056 }
4057
4058 /* Return true if ADDR is SYMBOL_REF + addend with addend being a
4059 multiple of ALIGNMENT and the SYMBOL_REF being naturally
4060 aligned. */
4061
4062 bool
4063 s390_check_symref_alignment (rtx addr, HOST_WIDE_INT alignment)
4064 {
4065 HOST_WIDE_INT addend;
4066 rtx symref;
4067
4068 /* The "required alignment" might be 0 (e.g. for certain structs
4069 accessed via BLKmode). Early abort in this case, as well as when
4070 an alignment > 8 is required. */
4071 if (alignment < 2 || alignment > 8)
4072 return false;
4073
4074 if (!s390_loadrelative_operand_p (addr, &symref, &addend))
4075 return false;
4076
4077 if (addend & (alignment - 1))
4078 return false;
4079
4080 if (GET_CODE (symref) == SYMBOL_REF)
4081 {
4082 /* We have load-relative instructions for 2-byte, 4-byte, and
4083 8-byte alignment so allow only these. */
4084 switch (alignment)
4085 {
4086 case 8: return !SYMBOL_FLAG_NOTALIGN8_P (symref);
4087 case 4: return !SYMBOL_FLAG_NOTALIGN4_P (symref);
4088 case 2: return !SYMBOL_FLAG_NOTALIGN2_P (symref);
4089 default: return false;
4090 }
4091 }
4092
4093 if (GET_CODE (symref) == UNSPEC
4094 && alignment <= UNITS_PER_LONG)
4095 return true;
4096
4097 return false;
4098 }
4099
4100 /* ADDR is moved into REG using larl. If ADDR isn't a valid larl
4101 operand SCRATCH is used to reload the even part of the address and
4102 adding one. */
4103
4104 void
4105 s390_reload_larl_operand (rtx reg, rtx addr, rtx scratch)
4106 {
4107 HOST_WIDE_INT addend;
4108 rtx symref;
4109
4110 if (!s390_loadrelative_operand_p (addr, &symref, &addend))
4111 gcc_unreachable ();
4112
4113 if (!(addend & 1))
4114 /* Easy case. The addend is even so larl will do fine. */
4115 emit_move_insn (reg, addr);
4116 else
4117 {
4118 /* We can leave the scratch register untouched if the target
4119 register is a valid base register. */
4120 if (REGNO (reg) < FIRST_PSEUDO_REGISTER
4121 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS)
4122 scratch = reg;
4123
4124 gcc_assert (REGNO (scratch) < FIRST_PSEUDO_REGISTER);
4125 gcc_assert (REGNO_REG_CLASS (REGNO (scratch)) == ADDR_REGS);
4126
4127 if (addend != 1)
4128 emit_move_insn (scratch,
4129 gen_rtx_CONST (Pmode,
4130 gen_rtx_PLUS (Pmode, symref,
4131 GEN_INT (addend - 1))));
4132 else
4133 emit_move_insn (scratch, symref);
4134
4135 /* Increment the address using la in order to avoid clobbering cc. */
4136 s390_load_address (reg, gen_rtx_PLUS (Pmode, scratch, const1_rtx));
4137 }
4138 }
4139
4140 /* Generate what is necessary to move between REG and MEM using
4141 SCRATCH. The direction is given by TOMEM. */
4142
4143 void
4144 s390_reload_symref_address (rtx reg, rtx mem, rtx scratch, bool tomem)
4145 {
4146 /* Reload might have pulled a constant out of the literal pool.
4147 Force it back in. */
4148 if (CONST_INT_P (mem) || GET_CODE (mem) == CONST_DOUBLE
4149 || GET_CODE (mem) == CONST_WIDE_INT
4150 || GET_CODE (mem) == CONST_VECTOR
4151 || GET_CODE (mem) == CONST)
4152 mem = force_const_mem (GET_MODE (reg), mem);
4153
4154 gcc_assert (MEM_P (mem));
4155
4156 /* For a load from memory we can leave the scratch register
4157 untouched if the target register is a valid base register. */
4158 if (!tomem
4159 && REGNO (reg) < FIRST_PSEUDO_REGISTER
4160 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS
4161 && GET_MODE (reg) == GET_MODE (scratch))
4162 scratch = reg;
4163
4164 /* Load address into scratch register. Since we can't have a
4165 secondary reload for a secondary reload we have to cover the case
4166 where larl would need a secondary reload here as well. */
4167 s390_reload_larl_operand (scratch, XEXP (mem, 0), scratch);
4168
4169 /* Now we can use a standard load/store to do the move. */
4170 if (tomem)
4171 emit_move_insn (replace_equiv_address (mem, scratch), reg);
4172 else
4173 emit_move_insn (reg, replace_equiv_address (mem, scratch));
4174 }
4175
4176 /* Inform reload about cases where moving X with a mode MODE to a register in
4177 RCLASS requires an extra scratch or immediate register. Return the class
4178 needed for the immediate register. */
4179
4180 static reg_class_t
4181 s390_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
4182 machine_mode mode, secondary_reload_info *sri)
4183 {
4184 enum reg_class rclass = (enum reg_class) rclass_i;
4185
4186 /* Intermediate register needed. */
4187 if (reg_classes_intersect_p (CC_REGS, rclass))
4188 return GENERAL_REGS;
4189
4190 if (TARGET_VX)
4191 {
4192 /* The vst/vl vector move instructions allow only for short
4193 displacements. */
4194 if (MEM_P (x)
4195 && GET_CODE (XEXP (x, 0)) == PLUS
4196 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4197 && !SHORT_DISP_IN_RANGE(INTVAL (XEXP (XEXP (x, 0), 1)))
4198 && reg_class_subset_p (rclass, VEC_REGS)
4199 && (!reg_class_subset_p (rclass, FP_REGS)
4200 || (GET_MODE_SIZE (mode) > 8
4201 && s390_class_max_nregs (FP_REGS, mode) == 1)))
4202 {
4203 if (in_p)
4204 sri->icode = (TARGET_64BIT ?
4205 CODE_FOR_reloaddi_la_in :
4206 CODE_FOR_reloadsi_la_in);
4207 else
4208 sri->icode = (TARGET_64BIT ?
4209 CODE_FOR_reloaddi_la_out :
4210 CODE_FOR_reloadsi_la_out);
4211 }
4212 }
4213
4214 if (TARGET_Z10)
4215 {
4216 HOST_WIDE_INT offset;
4217 rtx symref;
4218
4219 /* On z10 several optimizer steps may generate larl operands with
4220 an odd addend. */
4221 if (in_p
4222 && s390_loadrelative_operand_p (x, &symref, &offset)
4223 && mode == Pmode
4224 && !SYMBOL_FLAG_NOTALIGN2_P (symref)
4225 && (offset & 1) == 1)
4226 sri->icode = ((mode == DImode) ? CODE_FOR_reloaddi_larl_odd_addend_z10
4227 : CODE_FOR_reloadsi_larl_odd_addend_z10);
4228
4229 /* Handle all the (mem (symref)) accesses we cannot use the z10
4230 instructions for. */
4231 if (MEM_P (x)
4232 && s390_loadrelative_operand_p (XEXP (x, 0), NULL, NULL)
4233 && (mode == QImode
4234 || !reg_class_subset_p (rclass, GENERAL_REGS)
4235 || GET_MODE_SIZE (mode) > UNITS_PER_WORD
4236 || !s390_check_symref_alignment (XEXP (x, 0),
4237 GET_MODE_SIZE (mode))))
4238 {
4239 #define __SECONDARY_RELOAD_CASE(M,m) \
4240 case M##mode: \
4241 if (TARGET_64BIT) \
4242 sri->icode = in_p ? CODE_FOR_reload##m##di_toreg_z10 : \
4243 CODE_FOR_reload##m##di_tomem_z10; \
4244 else \
4245 sri->icode = in_p ? CODE_FOR_reload##m##si_toreg_z10 : \
4246 CODE_FOR_reload##m##si_tomem_z10; \
4247 break;
4248
4249 switch (GET_MODE (x))
4250 {
4251 __SECONDARY_RELOAD_CASE (QI, qi);
4252 __SECONDARY_RELOAD_CASE (HI, hi);
4253 __SECONDARY_RELOAD_CASE (SI, si);
4254 __SECONDARY_RELOAD_CASE (DI, di);
4255 __SECONDARY_RELOAD_CASE (TI, ti);
4256 __SECONDARY_RELOAD_CASE (SF, sf);
4257 __SECONDARY_RELOAD_CASE (DF, df);
4258 __SECONDARY_RELOAD_CASE (TF, tf);
4259 __SECONDARY_RELOAD_CASE (SD, sd);
4260 __SECONDARY_RELOAD_CASE (DD, dd);
4261 __SECONDARY_RELOAD_CASE (TD, td);
4262 __SECONDARY_RELOAD_CASE (V1QI, v1qi);
4263 __SECONDARY_RELOAD_CASE (V2QI, v2qi);
4264 __SECONDARY_RELOAD_CASE (V4QI, v4qi);
4265 __SECONDARY_RELOAD_CASE (V8QI, v8qi);
4266 __SECONDARY_RELOAD_CASE (V16QI, v16qi);
4267 __SECONDARY_RELOAD_CASE (V1HI, v1hi);
4268 __SECONDARY_RELOAD_CASE (V2HI, v2hi);
4269 __SECONDARY_RELOAD_CASE (V4HI, v4hi);
4270 __SECONDARY_RELOAD_CASE (V8HI, v8hi);
4271 __SECONDARY_RELOAD_CASE (V1SI, v1si);
4272 __SECONDARY_RELOAD_CASE (V2SI, v2si);
4273 __SECONDARY_RELOAD_CASE (V4SI, v4si);
4274 __SECONDARY_RELOAD_CASE (V1DI, v1di);
4275 __SECONDARY_RELOAD_CASE (V2DI, v2di);
4276 __SECONDARY_RELOAD_CASE (V1TI, v1ti);
4277 __SECONDARY_RELOAD_CASE (V1SF, v1sf);
4278 __SECONDARY_RELOAD_CASE (V2SF, v2sf);
4279 __SECONDARY_RELOAD_CASE (V4SF, v4sf);
4280 __SECONDARY_RELOAD_CASE (V1DF, v1df);
4281 __SECONDARY_RELOAD_CASE (V2DF, v2df);
4282 __SECONDARY_RELOAD_CASE (V1TF, v1tf);
4283 default:
4284 gcc_unreachable ();
4285 }
4286 #undef __SECONDARY_RELOAD_CASE
4287 }
4288 }
4289
4290 /* We need a scratch register when loading a PLUS expression which
4291 is not a legitimate operand of the LOAD ADDRESS instruction. */
4292 /* LRA can deal with transformation of plus op very well -- so we
4293 don't need to prompt LRA in this case. */
4294 if (! lra_in_progress && in_p && s390_plus_operand (x, mode))
4295 sri->icode = (TARGET_64BIT ?
4296 CODE_FOR_reloaddi_plus : CODE_FOR_reloadsi_plus);
4297
4298 /* Performing a multiword move from or to memory we have to make sure the
4299 second chunk in memory is addressable without causing a displacement
4300 overflow. If that would be the case we calculate the address in
4301 a scratch register. */
4302 if (MEM_P (x)
4303 && GET_CODE (XEXP (x, 0)) == PLUS
4304 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4305 && !DISP_IN_RANGE (INTVAL (XEXP (XEXP (x, 0), 1))
4306 + GET_MODE_SIZE (mode) - 1))
4307 {
4308 /* For GENERAL_REGS a displacement overflow is no problem if occurring
4309 in a s_operand address since we may fallback to lm/stm. So we only
4310 have to care about overflows in the b+i+d case. */
4311 if ((reg_classes_intersect_p (GENERAL_REGS, rclass)
4312 && s390_class_max_nregs (GENERAL_REGS, mode) > 1
4313 && GET_CODE (XEXP (XEXP (x, 0), 0)) == PLUS)
4314 /* For FP_REGS no lm/stm is available so this check is triggered
4315 for displacement overflows in b+i+d and b+d like addresses. */
4316 || (reg_classes_intersect_p (FP_REGS, rclass)
4317 && s390_class_max_nregs (FP_REGS, mode) > 1))
4318 {
4319 if (in_p)
4320 sri->icode = (TARGET_64BIT ?
4321 CODE_FOR_reloaddi_la_in :
4322 CODE_FOR_reloadsi_la_in);
4323 else
4324 sri->icode = (TARGET_64BIT ?
4325 CODE_FOR_reloaddi_la_out :
4326 CODE_FOR_reloadsi_la_out);
4327 }
4328 }
4329
4330 /* A scratch address register is needed when a symbolic constant is
4331 copied to r0 compiling with -fPIC. In other cases the target
4332 register might be used as temporary (see legitimize_pic_address). */
4333 if (in_p && SYMBOLIC_CONST (x) && flag_pic == 2 && rclass != ADDR_REGS)
4334 sri->icode = (TARGET_64BIT ?
4335 CODE_FOR_reloaddi_PIC_addr :
4336 CODE_FOR_reloadsi_PIC_addr);
4337
4338 /* Either scratch or no register needed. */
4339 return NO_REGS;
4340 }
4341
4342 /* Generate code to load SRC, which is PLUS that is not a
4343 legitimate operand for the LA instruction, into TARGET.
4344 SCRATCH may be used as scratch register. */
4345
4346 void
4347 s390_expand_plus_operand (rtx target, rtx src,
4348 rtx scratch)
4349 {
4350 rtx sum1, sum2;
4351 struct s390_address ad;
4352
4353 /* src must be a PLUS; get its two operands. */
4354 gcc_assert (GET_CODE (src) == PLUS);
4355 gcc_assert (GET_MODE (src) == Pmode);
4356
4357 /* Check if any of the two operands is already scheduled
4358 for replacement by reload. This can happen e.g. when
4359 float registers occur in an address. */
4360 sum1 = find_replacement (&XEXP (src, 0));
4361 sum2 = find_replacement (&XEXP (src, 1));
4362 src = gen_rtx_PLUS (Pmode, sum1, sum2);
4363
4364 /* If the address is already strictly valid, there's nothing to do. */
4365 if (!s390_decompose_address (src, &ad)
4366 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
4367 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
4368 {
4369 /* Otherwise, one of the operands cannot be an address register;
4370 we reload its value into the scratch register. */
4371 if (true_regnum (sum1) < 1 || true_regnum (sum1) > 15)
4372 {
4373 emit_move_insn (scratch, sum1);
4374 sum1 = scratch;
4375 }
4376 if (true_regnum (sum2) < 1 || true_regnum (sum2) > 15)
4377 {
4378 emit_move_insn (scratch, sum2);
4379 sum2 = scratch;
4380 }
4381
4382 /* According to the way these invalid addresses are generated
4383 in reload.c, it should never happen (at least on s390) that
4384 *neither* of the PLUS components, after find_replacements
4385 was applied, is an address register. */
4386 if (sum1 == scratch && sum2 == scratch)
4387 {
4388 debug_rtx (src);
4389 gcc_unreachable ();
4390 }
4391
4392 src = gen_rtx_PLUS (Pmode, sum1, sum2);
4393 }
4394
4395 /* Emit the LOAD ADDRESS pattern. Note that reload of PLUS
4396 is only ever performed on addresses, so we can mark the
4397 sum as legitimate for LA in any case. */
4398 s390_load_address (target, src);
4399 }
4400
4401
4402 /* Return true if ADDR is a valid memory address.
4403 STRICT specifies whether strict register checking applies. */
4404
4405 static bool
4406 s390_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
4407 {
4408 struct s390_address ad;
4409
4410 if (TARGET_Z10
4411 && larl_operand (addr, VOIDmode)
4412 && (mode == VOIDmode
4413 || s390_check_symref_alignment (addr, GET_MODE_SIZE (mode))))
4414 return true;
4415
4416 if (!s390_decompose_address (addr, &ad))
4417 return false;
4418
4419 if (strict)
4420 {
4421 if (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
4422 return false;
4423
4424 if (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx)))
4425 return false;
4426 }
4427 else
4428 {
4429 if (ad.base
4430 && !(REGNO (ad.base) >= FIRST_PSEUDO_REGISTER
4431 || REGNO_REG_CLASS (REGNO (ad.base)) == ADDR_REGS))
4432 return false;
4433
4434 if (ad.indx
4435 && !(REGNO (ad.indx) >= FIRST_PSEUDO_REGISTER
4436 || REGNO_REG_CLASS (REGNO (ad.indx)) == ADDR_REGS))
4437 return false;
4438 }
4439 return true;
4440 }
4441
4442 /* Return true if OP is a valid operand for the LA instruction.
4443 In 31-bit, we need to prove that the result is used as an
4444 address, as LA performs only a 31-bit addition. */
4445
4446 bool
4447 legitimate_la_operand_p (rtx op)
4448 {
4449 struct s390_address addr;
4450 if (!s390_decompose_address (op, &addr))
4451 return false;
4452
4453 return (TARGET_64BIT || addr.pointer);
4454 }
4455
4456 /* Return true if it is valid *and* preferable to use LA to
4457 compute the sum of OP1 and OP2. */
4458
4459 bool
4460 preferred_la_operand_p (rtx op1, rtx op2)
4461 {
4462 struct s390_address addr;
4463
4464 if (op2 != const0_rtx)
4465 op1 = gen_rtx_PLUS (Pmode, op1, op2);
4466
4467 if (!s390_decompose_address (op1, &addr))
4468 return false;
4469 if (addr.base && !REGNO_OK_FOR_BASE_P (REGNO (addr.base)))
4470 return false;
4471 if (addr.indx && !REGNO_OK_FOR_INDEX_P (REGNO (addr.indx)))
4472 return false;
4473
4474 /* Avoid LA instructions with index register on z196; it is
4475 preferable to use regular add instructions when possible.
4476 Starting with zEC12 the la with index register is "uncracked"
4477 again. */
4478 if (addr.indx && s390_tune == PROCESSOR_2817_Z196)
4479 return false;
4480
4481 if (!TARGET_64BIT && !addr.pointer)
4482 return false;
4483
4484 if (addr.pointer)
4485 return true;
4486
4487 if ((addr.base && REG_P (addr.base) && REG_POINTER (addr.base))
4488 || (addr.indx && REG_P (addr.indx) && REG_POINTER (addr.indx)))
4489 return true;
4490
4491 return false;
4492 }
4493
4494 /* Emit a forced load-address operation to load SRC into DST.
4495 This will use the LOAD ADDRESS instruction even in situations
4496 where legitimate_la_operand_p (SRC) returns false. */
4497
4498 void
4499 s390_load_address (rtx dst, rtx src)
4500 {
4501 if (TARGET_64BIT)
4502 emit_move_insn (dst, src);
4503 else
4504 emit_insn (gen_force_la_31 (dst, src));
4505 }
4506
4507 /* Return a legitimate reference for ORIG (an address) using the
4508 register REG. If REG is 0, a new pseudo is generated.
4509
4510 There are two types of references that must be handled:
4511
4512 1. Global data references must load the address from the GOT, via
4513 the PIC reg. An insn is emitted to do this load, and the reg is
4514 returned.
4515
4516 2. Static data references, constant pool addresses, and code labels
4517 compute the address as an offset from the GOT, whose base is in
4518 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
4519 differentiate them from global data objects. The returned
4520 address is the PIC reg + an unspec constant.
4521
4522 TARGET_LEGITIMIZE_ADDRESS_P rejects symbolic references unless the PIC
4523 reg also appears in the address. */
4524
4525 rtx
4526 legitimize_pic_address (rtx orig, rtx reg)
4527 {
4528 rtx addr = orig;
4529 rtx addend = const0_rtx;
4530 rtx new_rtx = orig;
4531
4532 gcc_assert (!TLS_SYMBOLIC_CONST (addr));
4533
4534 if (GET_CODE (addr) == CONST)
4535 addr = XEXP (addr, 0);
4536
4537 if (GET_CODE (addr) == PLUS)
4538 {
4539 addend = XEXP (addr, 1);
4540 addr = XEXP (addr, 0);
4541 }
4542
4543 if ((GET_CODE (addr) == LABEL_REF
4544 || (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (addr))
4545 || (GET_CODE (addr) == UNSPEC &&
4546 (XINT (addr, 1) == UNSPEC_GOTENT
4547 || (TARGET_CPU_ZARCH && XINT (addr, 1) == UNSPEC_PLT))))
4548 && GET_CODE (addend) == CONST_INT)
4549 {
4550 /* This can be locally addressed. */
4551
4552 /* larl_operand requires UNSPECs to be wrapped in a const rtx. */
4553 rtx const_addr = (GET_CODE (addr) == UNSPEC ?
4554 gen_rtx_CONST (Pmode, addr) : addr);
4555
4556 if (TARGET_CPU_ZARCH
4557 && larl_operand (const_addr, VOIDmode)
4558 && INTVAL (addend) < (HOST_WIDE_INT)1 << 31
4559 && INTVAL (addend) >= -((HOST_WIDE_INT)1 << 31))
4560 {
4561 if (INTVAL (addend) & 1)
4562 {
4563 /* LARL can't handle odd offsets, so emit a pair of LARL
4564 and LA. */
4565 rtx temp = reg? reg : gen_reg_rtx (Pmode);
4566
4567 if (!DISP_IN_RANGE (INTVAL (addend)))
4568 {
4569 HOST_WIDE_INT even = INTVAL (addend) - 1;
4570 addr = gen_rtx_PLUS (Pmode, addr, GEN_INT (even));
4571 addr = gen_rtx_CONST (Pmode, addr);
4572 addend = const1_rtx;
4573 }
4574
4575 emit_move_insn (temp, addr);
4576 new_rtx = gen_rtx_PLUS (Pmode, temp, addend);
4577
4578 if (reg != 0)
4579 {
4580 s390_load_address (reg, new_rtx);
4581 new_rtx = reg;
4582 }
4583 }
4584 else
4585 {
4586 /* If the offset is even, we can just use LARL. This
4587 will happen automatically. */
4588 }
4589 }
4590 else
4591 {
4592 /* No larl - Access local symbols relative to the GOT. */
4593
4594 rtx temp = reg? reg : gen_reg_rtx (Pmode);
4595
4596 if (reload_in_progress || reload_completed)
4597 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4598
4599 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
4600 if (addend != const0_rtx)
4601 addr = gen_rtx_PLUS (Pmode, addr, addend);
4602 addr = gen_rtx_CONST (Pmode, addr);
4603 addr = force_const_mem (Pmode, addr);
4604 emit_move_insn (temp, addr);
4605
4606 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4607 if (reg != 0)
4608 {
4609 s390_load_address (reg, new_rtx);
4610 new_rtx = reg;
4611 }
4612 }
4613 }
4614 else if (GET_CODE (addr) == SYMBOL_REF && addend == const0_rtx)
4615 {
4616 /* A non-local symbol reference without addend.
4617
4618 The symbol ref is wrapped into an UNSPEC to make sure the
4619 proper operand modifier (@GOT or @GOTENT) will be emitted.
4620 This will tell the linker to put the symbol into the GOT.
4621
4622 Additionally the code dereferencing the GOT slot is emitted here.
4623
4624 An addend to the symref needs to be added afterwards.
4625 legitimize_pic_address calls itself recursively to handle
4626 that case. So no need to do it here. */
4627
4628 if (reg == 0)
4629 reg = gen_reg_rtx (Pmode);
4630
4631 if (TARGET_Z10)
4632 {
4633 /* Use load relative if possible.
4634 lgrl <target>, sym@GOTENT */
4635 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
4636 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4637 new_rtx = gen_const_mem (GET_MODE (reg), new_rtx);
4638
4639 emit_move_insn (reg, new_rtx);
4640 new_rtx = reg;
4641 }
4642 else if (flag_pic == 1)
4643 {
4644 /* Assume GOT offset is a valid displacement operand (< 4k
4645 or < 512k with z990). This is handled the same way in
4646 both 31- and 64-bit code (@GOT).
4647 lg <target>, sym@GOT(r12) */
4648
4649 if (reload_in_progress || reload_completed)
4650 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4651
4652 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
4653 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4654 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
4655 new_rtx = gen_const_mem (Pmode, new_rtx);
4656 emit_move_insn (reg, new_rtx);
4657 new_rtx = reg;
4658 }
4659 else if (TARGET_CPU_ZARCH)
4660 {
4661 /* If the GOT offset might be >= 4k, we determine the position
4662 of the GOT entry via a PC-relative LARL (@GOTENT).
4663 larl temp, sym@GOTENT
4664 lg <target>, 0(temp) */
4665
4666 rtx temp = reg ? reg : gen_reg_rtx (Pmode);
4667
4668 gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
4669 || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
4670
4671 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
4672 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4673 emit_move_insn (temp, new_rtx);
4674
4675 new_rtx = gen_const_mem (Pmode, temp);
4676 emit_move_insn (reg, new_rtx);
4677
4678 new_rtx = reg;
4679 }
4680 else
4681 {
4682 /* If the GOT offset might be >= 4k, we have to load it
4683 from the literal pool (@GOT).
4684
4685 lg temp, lit-litbase(r13)
4686 lg <target>, 0(temp)
4687 lit: .long sym@GOT */
4688
4689 rtx temp = reg ? reg : gen_reg_rtx (Pmode);
4690
4691 gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
4692 || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
4693
4694 if (reload_in_progress || reload_completed)
4695 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4696
4697 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
4698 addr = gen_rtx_CONST (Pmode, addr);
4699 addr = force_const_mem (Pmode, addr);
4700 emit_move_insn (temp, addr);
4701
4702 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4703 new_rtx = gen_const_mem (Pmode, new_rtx);
4704 emit_move_insn (reg, new_rtx);
4705 new_rtx = reg;
4706 }
4707 }
4708 else if (GET_CODE (addr) == UNSPEC && GET_CODE (addend) == CONST_INT)
4709 {
4710 gcc_assert (XVECLEN (addr, 0) == 1);
4711 switch (XINT (addr, 1))
4712 {
4713 /* These address symbols (or PLT slots) relative to the GOT
4714 (not GOT slots!). In general this will exceed the
4715 displacement range so these value belong into the literal
4716 pool. */
4717 case UNSPEC_GOTOFF:
4718 case UNSPEC_PLTOFF:
4719 new_rtx = force_const_mem (Pmode, orig);
4720 break;
4721
4722 /* For -fPIC the GOT size might exceed the displacement
4723 range so make sure the value is in the literal pool. */
4724 case UNSPEC_GOT:
4725 if (flag_pic == 2)
4726 new_rtx = force_const_mem (Pmode, orig);
4727 break;
4728
4729 /* For @GOTENT larl is used. This is handled like local
4730 symbol refs. */
4731 case UNSPEC_GOTENT:
4732 gcc_unreachable ();
4733 break;
4734
4735 /* @PLT is OK as is on 64-bit, must be converted to
4736 GOT-relative @PLTOFF on 31-bit. */
4737 case UNSPEC_PLT:
4738 if (!TARGET_CPU_ZARCH)
4739 {
4740 rtx temp = reg? reg : gen_reg_rtx (Pmode);
4741
4742 if (reload_in_progress || reload_completed)
4743 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4744
4745 addr = XVECEXP (addr, 0, 0);
4746 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
4747 UNSPEC_PLTOFF);
4748 if (addend != const0_rtx)
4749 addr = gen_rtx_PLUS (Pmode, addr, addend);
4750 addr = gen_rtx_CONST (Pmode, addr);
4751 addr = force_const_mem (Pmode, addr);
4752 emit_move_insn (temp, addr);
4753
4754 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4755 if (reg != 0)
4756 {
4757 s390_load_address (reg, new_rtx);
4758 new_rtx = reg;
4759 }
4760 }
4761 else
4762 /* On 64 bit larl can be used. This case is handled like
4763 local symbol refs. */
4764 gcc_unreachable ();
4765 break;
4766
4767 /* Everything else cannot happen. */
4768 default:
4769 gcc_unreachable ();
4770 }
4771 }
4772 else if (addend != const0_rtx)
4773 {
4774 /* Otherwise, compute the sum. */
4775
4776 rtx base = legitimize_pic_address (addr, reg);
4777 new_rtx = legitimize_pic_address (addend,
4778 base == reg ? NULL_RTX : reg);
4779 if (GET_CODE (new_rtx) == CONST_INT)
4780 new_rtx = plus_constant (Pmode, base, INTVAL (new_rtx));
4781 else
4782 {
4783 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
4784 {
4785 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
4786 new_rtx = XEXP (new_rtx, 1);
4787 }
4788 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
4789 }
4790
4791 if (GET_CODE (new_rtx) == CONST)
4792 new_rtx = XEXP (new_rtx, 0);
4793 new_rtx = force_operand (new_rtx, 0);
4794 }
4795
4796 return new_rtx;
4797 }
4798
4799 /* Load the thread pointer into a register. */
4800
4801 rtx
4802 s390_get_thread_pointer (void)
4803 {
4804 rtx tp = gen_reg_rtx (Pmode);
4805
4806 emit_move_insn (tp, gen_rtx_REG (Pmode, TP_REGNUM));
4807 mark_reg_pointer (tp, BITS_PER_WORD);
4808
4809 return tp;
4810 }
4811
4812 /* Emit a tls call insn. The call target is the SYMBOL_REF stored
4813 in s390_tls_symbol which always refers to __tls_get_offset.
4814 The returned offset is written to RESULT_REG and an USE rtx is
4815 generated for TLS_CALL. */
4816
4817 static GTY(()) rtx s390_tls_symbol;
4818
4819 static void
4820 s390_emit_tls_call_insn (rtx result_reg, rtx tls_call)
4821 {
4822 rtx insn;
4823
4824 if (!flag_pic)
4825 emit_insn (s390_load_got ());
4826
4827 if (!s390_tls_symbol)
4828 s390_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_offset");
4829
4830 insn = s390_emit_call (s390_tls_symbol, tls_call, result_reg,
4831 gen_rtx_REG (Pmode, RETURN_REGNUM));
4832
4833 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), result_reg);
4834 RTL_CONST_CALL_P (insn) = 1;
4835 }
4836
4837 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
4838 this (thread-local) address. REG may be used as temporary. */
4839
4840 static rtx
4841 legitimize_tls_address (rtx addr, rtx reg)
4842 {
4843 rtx new_rtx, tls_call, temp, base, r2, insn;
4844
4845 if (GET_CODE (addr) == SYMBOL_REF)
4846 switch (tls_symbolic_operand (addr))
4847 {
4848 case TLS_MODEL_GLOBAL_DYNAMIC:
4849 start_sequence ();
4850 r2 = gen_rtx_REG (Pmode, 2);
4851 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_TLSGD);
4852 new_rtx = gen_rtx_CONST (Pmode, tls_call);
4853 new_rtx = force_const_mem (Pmode, new_rtx);
4854 emit_move_insn (r2, new_rtx);
4855 s390_emit_tls_call_insn (r2, tls_call);
4856 insn = get_insns ();
4857 end_sequence ();
4858
4859 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
4860 temp = gen_reg_rtx (Pmode);
4861 emit_libcall_block (insn, temp, r2, new_rtx);
4862
4863 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
4864 if (reg != 0)
4865 {
4866 s390_load_address (reg, new_rtx);
4867 new_rtx = reg;
4868 }
4869 break;
4870
4871 case TLS_MODEL_LOCAL_DYNAMIC:
4872 start_sequence ();
4873 r2 = gen_rtx_REG (Pmode, 2);
4874 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM);
4875 new_rtx = gen_rtx_CONST (Pmode, tls_call);
4876 new_rtx = force_const_mem (Pmode, new_rtx);
4877 emit_move_insn (r2, new_rtx);
4878 s390_emit_tls_call_insn (r2, tls_call);
4879 insn = get_insns ();
4880 end_sequence ();
4881
4882 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM_NTPOFF);
4883 temp = gen_reg_rtx (Pmode);
4884 emit_libcall_block (insn, temp, r2, new_rtx);
4885
4886 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
4887 base = gen_reg_rtx (Pmode);
4888 s390_load_address (base, new_rtx);
4889
4890 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_DTPOFF);
4891 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4892 new_rtx = force_const_mem (Pmode, new_rtx);
4893 temp = gen_reg_rtx (Pmode);
4894 emit_move_insn (temp, new_rtx);
4895
4896 new_rtx = gen_rtx_PLUS (Pmode, base, temp);
4897 if (reg != 0)
4898 {
4899 s390_load_address (reg, new_rtx);
4900 new_rtx = reg;
4901 }
4902 break;
4903
4904 case TLS_MODEL_INITIAL_EXEC:
4905 if (flag_pic == 1)
4906 {
4907 /* Assume GOT offset < 4k. This is handled the same way
4908 in both 31- and 64-bit code. */
4909
4910 if (reload_in_progress || reload_completed)
4911 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4912
4913 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
4914 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4915 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
4916 new_rtx = gen_const_mem (Pmode, new_rtx);
4917 temp = gen_reg_rtx (Pmode);
4918 emit_move_insn (temp, new_rtx);
4919 }
4920 else if (TARGET_CPU_ZARCH)
4921 {
4922 /* If the GOT offset might be >= 4k, we determine the position
4923 of the GOT entry via a PC-relative LARL. */
4924
4925 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
4926 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4927 temp = gen_reg_rtx (Pmode);
4928 emit_move_insn (temp, new_rtx);
4929
4930 new_rtx = gen_const_mem (Pmode, temp);
4931 temp = gen_reg_rtx (Pmode);
4932 emit_move_insn (temp, new_rtx);
4933 }
4934 else if (flag_pic)
4935 {
4936 /* If the GOT offset might be >= 4k, we have to load it
4937 from the literal pool. */
4938
4939 if (reload_in_progress || reload_completed)
4940 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4941
4942 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
4943 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4944 new_rtx = force_const_mem (Pmode, new_rtx);
4945 temp = gen_reg_rtx (Pmode);
4946 emit_move_insn (temp, new_rtx);
4947
4948 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4949 new_rtx = gen_const_mem (Pmode, new_rtx);
4950
4951 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD);
4952 temp = gen_reg_rtx (Pmode);
4953 emit_insn (gen_rtx_SET (temp, new_rtx));
4954 }
4955 else
4956 {
4957 /* In position-dependent code, load the absolute address of
4958 the GOT entry from the literal pool. */
4959
4960 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
4961 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4962 new_rtx = force_const_mem (Pmode, new_rtx);
4963 temp = gen_reg_rtx (Pmode);
4964 emit_move_insn (temp, new_rtx);
4965
4966 new_rtx = temp;
4967 new_rtx = gen_const_mem (Pmode, new_rtx);
4968 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD);
4969 temp = gen_reg_rtx (Pmode);
4970 emit_insn (gen_rtx_SET (temp, new_rtx));
4971 }
4972
4973 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
4974 if (reg != 0)
4975 {
4976 s390_load_address (reg, new_rtx);
4977 new_rtx = reg;
4978 }
4979 break;
4980
4981 case TLS_MODEL_LOCAL_EXEC:
4982 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
4983 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4984 new_rtx = force_const_mem (Pmode, new_rtx);
4985 temp = gen_reg_rtx (Pmode);
4986 emit_move_insn (temp, new_rtx);
4987
4988 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
4989 if (reg != 0)
4990 {
4991 s390_load_address (reg, new_rtx);
4992 new_rtx = reg;
4993 }
4994 break;
4995
4996 default:
4997 gcc_unreachable ();
4998 }
4999
5000 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == UNSPEC)
5001 {
5002 switch (XINT (XEXP (addr, 0), 1))
5003 {
5004 case UNSPEC_INDNTPOFF:
5005 gcc_assert (TARGET_CPU_ZARCH);
5006 new_rtx = addr;
5007 break;
5008
5009 default:
5010 gcc_unreachable ();
5011 }
5012 }
5013
5014 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
5015 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
5016 {
5017 new_rtx = XEXP (XEXP (addr, 0), 0);
5018 if (GET_CODE (new_rtx) != SYMBOL_REF)
5019 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5020
5021 new_rtx = legitimize_tls_address (new_rtx, reg);
5022 new_rtx = plus_constant (Pmode, new_rtx,
5023 INTVAL (XEXP (XEXP (addr, 0), 1)));
5024 new_rtx = force_operand (new_rtx, 0);
5025 }
5026
5027 else
5028 gcc_unreachable (); /* for now ... */
5029
5030 return new_rtx;
5031 }
5032
5033 /* Emit insns making the address in operands[1] valid for a standard
5034 move to operands[0]. operands[1] is replaced by an address which
5035 should be used instead of the former RTX to emit the move
5036 pattern. */
5037
5038 void
5039 emit_symbolic_move (rtx *operands)
5040 {
5041 rtx temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
5042
5043 if (GET_CODE (operands[0]) == MEM)
5044 operands[1] = force_reg (Pmode, operands[1]);
5045 else if (TLS_SYMBOLIC_CONST (operands[1]))
5046 operands[1] = legitimize_tls_address (operands[1], temp);
5047 else if (flag_pic)
5048 operands[1] = legitimize_pic_address (operands[1], temp);
5049 }
5050
5051 /* Try machine-dependent ways of modifying an illegitimate address X
5052 to be legitimate. If we find one, return the new, valid address.
5053
5054 OLDX is the address as it was before break_out_memory_refs was called.
5055 In some cases it is useful to look at this to decide what needs to be done.
5056
5057 MODE is the mode of the operand pointed to by X.
5058
5059 When -fpic is used, special handling is needed for symbolic references.
5060 See comments by legitimize_pic_address for details. */
5061
5062 static rtx
5063 s390_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
5064 machine_mode mode ATTRIBUTE_UNUSED)
5065 {
5066 rtx constant_term = const0_rtx;
5067
5068 if (TLS_SYMBOLIC_CONST (x))
5069 {
5070 x = legitimize_tls_address (x, 0);
5071
5072 if (s390_legitimate_address_p (mode, x, FALSE))
5073 return x;
5074 }
5075 else if (GET_CODE (x) == PLUS
5076 && (TLS_SYMBOLIC_CONST (XEXP (x, 0))
5077 || TLS_SYMBOLIC_CONST (XEXP (x, 1))))
5078 {
5079 return x;
5080 }
5081 else if (flag_pic)
5082 {
5083 if (SYMBOLIC_CONST (x)
5084 || (GET_CODE (x) == PLUS
5085 && (SYMBOLIC_CONST (XEXP (x, 0))
5086 || SYMBOLIC_CONST (XEXP (x, 1)))))
5087 x = legitimize_pic_address (x, 0);
5088
5089 if (s390_legitimate_address_p (mode, x, FALSE))
5090 return x;
5091 }
5092
5093 x = eliminate_constant_term (x, &constant_term);
5094
5095 /* Optimize loading of large displacements by splitting them
5096 into the multiple of 4K and the rest; this allows the
5097 former to be CSE'd if possible.
5098
5099 Don't do this if the displacement is added to a register
5100 pointing into the stack frame, as the offsets will
5101 change later anyway. */
5102
5103 if (GET_CODE (constant_term) == CONST_INT
5104 && !TARGET_LONG_DISPLACEMENT
5105 && !DISP_IN_RANGE (INTVAL (constant_term))
5106 && !(REG_P (x) && REGNO_PTR_FRAME_P (REGNO (x))))
5107 {
5108 HOST_WIDE_INT lower = INTVAL (constant_term) & 0xfff;
5109 HOST_WIDE_INT upper = INTVAL (constant_term) ^ lower;
5110
5111 rtx temp = gen_reg_rtx (Pmode);
5112 rtx val = force_operand (GEN_INT (upper), temp);
5113 if (val != temp)
5114 emit_move_insn (temp, val);
5115
5116 x = gen_rtx_PLUS (Pmode, x, temp);
5117 constant_term = GEN_INT (lower);
5118 }
5119
5120 if (GET_CODE (x) == PLUS)
5121 {
5122 if (GET_CODE (XEXP (x, 0)) == REG)
5123 {
5124 rtx temp = gen_reg_rtx (Pmode);
5125 rtx val = force_operand (XEXP (x, 1), temp);
5126 if (val != temp)
5127 emit_move_insn (temp, val);
5128
5129 x = gen_rtx_PLUS (Pmode, XEXP (x, 0), temp);
5130 }
5131
5132 else if (GET_CODE (XEXP (x, 1)) == REG)
5133 {
5134 rtx temp = gen_reg_rtx (Pmode);
5135 rtx val = force_operand (XEXP (x, 0), temp);
5136 if (val != temp)
5137 emit_move_insn (temp, val);
5138
5139 x = gen_rtx_PLUS (Pmode, temp, XEXP (x, 1));
5140 }
5141 }
5142
5143 if (constant_term != const0_rtx)
5144 x = gen_rtx_PLUS (Pmode, x, constant_term);
5145
5146 return x;
5147 }
5148
5149 /* Try a machine-dependent way of reloading an illegitimate address AD
5150 operand. If we find one, push the reload and return the new address.
5151
5152 MODE is the mode of the enclosing MEM. OPNUM is the operand number
5153 and TYPE is the reload type of the current reload. */
5154
5155 rtx
5156 legitimize_reload_address (rtx ad, machine_mode mode ATTRIBUTE_UNUSED,
5157 int opnum, int type)
5158 {
5159 if (!optimize || TARGET_LONG_DISPLACEMENT)
5160 return NULL_RTX;
5161
5162 if (GET_CODE (ad) == PLUS)
5163 {
5164 rtx tem = simplify_binary_operation (PLUS, Pmode,
5165 XEXP (ad, 0), XEXP (ad, 1));
5166 if (tem)
5167 ad = tem;
5168 }
5169
5170 if (GET_CODE (ad) == PLUS
5171 && GET_CODE (XEXP (ad, 0)) == REG
5172 && GET_CODE (XEXP (ad, 1)) == CONST_INT
5173 && !DISP_IN_RANGE (INTVAL (XEXP (ad, 1))))
5174 {
5175 HOST_WIDE_INT lower = INTVAL (XEXP (ad, 1)) & 0xfff;
5176 HOST_WIDE_INT upper = INTVAL (XEXP (ad, 1)) ^ lower;
5177 rtx cst, tem, new_rtx;
5178
5179 cst = GEN_INT (upper);
5180 if (!legitimate_reload_constant_p (cst))
5181 cst = force_const_mem (Pmode, cst);
5182
5183 tem = gen_rtx_PLUS (Pmode, XEXP (ad, 0), cst);
5184 new_rtx = gen_rtx_PLUS (Pmode, tem, GEN_INT (lower));
5185
5186 push_reload (XEXP (tem, 1), 0, &XEXP (tem, 1), 0,
5187 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
5188 opnum, (enum reload_type) type);
5189 return new_rtx;
5190 }
5191
5192 return NULL_RTX;
5193 }
5194
5195 /* Emit code to move LEN bytes from DST to SRC. */
5196
5197 bool
5198 s390_expand_movmem (rtx dst, rtx src, rtx len)
5199 {
5200 /* When tuning for z10 or higher we rely on the Glibc functions to
5201 do the right thing. Only for constant lengths below 64k we will
5202 generate inline code. */
5203 if (s390_tune >= PROCESSOR_2097_Z10
5204 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
5205 return false;
5206
5207 if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
5208 {
5209 if (INTVAL (len) > 0)
5210 emit_insn (gen_movmem_short (dst, src, GEN_INT (INTVAL (len) - 1)));
5211 }
5212
5213 else if (TARGET_MVCLE)
5214 {
5215 emit_insn (gen_movmem_long (dst, src, convert_to_mode (Pmode, len, 1)));
5216 }
5217
5218 else
5219 {
5220 rtx dst_addr, src_addr, count, blocks, temp;
5221 rtx_code_label *loop_start_label = gen_label_rtx ();
5222 rtx_code_label *loop_end_label = gen_label_rtx ();
5223 rtx_code_label *end_label = gen_label_rtx ();
5224 machine_mode mode;
5225
5226 mode = GET_MODE (len);
5227 if (mode == VOIDmode)
5228 mode = Pmode;
5229
5230 dst_addr = gen_reg_rtx (Pmode);
5231 src_addr = gen_reg_rtx (Pmode);
5232 count = gen_reg_rtx (mode);
5233 blocks = gen_reg_rtx (mode);
5234
5235 convert_move (count, len, 1);
5236 emit_cmp_and_jump_insns (count, const0_rtx,
5237 EQ, NULL_RTX, mode, 1, end_label);
5238
5239 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
5240 emit_move_insn (src_addr, force_operand (XEXP (src, 0), NULL_RTX));
5241 dst = change_address (dst, VOIDmode, dst_addr);
5242 src = change_address (src, VOIDmode, src_addr);
5243
5244 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5245 OPTAB_DIRECT);
5246 if (temp != count)
5247 emit_move_insn (count, temp);
5248
5249 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5250 OPTAB_DIRECT);
5251 if (temp != blocks)
5252 emit_move_insn (blocks, temp);
5253
5254 emit_cmp_and_jump_insns (blocks, const0_rtx,
5255 EQ, NULL_RTX, mode, 1, loop_end_label);
5256
5257 emit_label (loop_start_label);
5258
5259 if (TARGET_Z10
5260 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 768))
5261 {
5262 rtx prefetch;
5263
5264 /* Issue a read prefetch for the +3 cache line. */
5265 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, src_addr, GEN_INT (768)),
5266 const0_rtx, const0_rtx);
5267 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5268 emit_insn (prefetch);
5269
5270 /* Issue a write prefetch for the +3 cache line. */
5271 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (768)),
5272 const1_rtx, const0_rtx);
5273 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5274 emit_insn (prefetch);
5275 }
5276
5277 emit_insn (gen_movmem_short (dst, src, GEN_INT (255)));
5278 s390_load_address (dst_addr,
5279 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
5280 s390_load_address (src_addr,
5281 gen_rtx_PLUS (Pmode, src_addr, GEN_INT (256)));
5282
5283 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5284 OPTAB_DIRECT);
5285 if (temp != blocks)
5286 emit_move_insn (blocks, temp);
5287
5288 emit_cmp_and_jump_insns (blocks, const0_rtx,
5289 EQ, NULL_RTX, mode, 1, loop_end_label);
5290
5291 emit_jump (loop_start_label);
5292 emit_label (loop_end_label);
5293
5294 emit_insn (gen_movmem_short (dst, src,
5295 convert_to_mode (Pmode, count, 1)));
5296 emit_label (end_label);
5297 }
5298 return true;
5299 }
5300
5301 /* Emit code to set LEN bytes at DST to VAL.
5302 Make use of clrmem if VAL is zero. */
5303
5304 void
5305 s390_expand_setmem (rtx dst, rtx len, rtx val)
5306 {
5307 if (GET_CODE (len) == CONST_INT && INTVAL (len) == 0)
5308 return;
5309
5310 gcc_assert (GET_CODE (val) == CONST_INT || GET_MODE (val) == QImode);
5311
5312 if (GET_CODE (len) == CONST_INT && INTVAL (len) > 0 && INTVAL (len) <= 257)
5313 {
5314 if (val == const0_rtx && INTVAL (len) <= 256)
5315 emit_insn (gen_clrmem_short (dst, GEN_INT (INTVAL (len) - 1)));
5316 else
5317 {
5318 /* Initialize memory by storing the first byte. */
5319 emit_move_insn (adjust_address (dst, QImode, 0), val);
5320
5321 if (INTVAL (len) > 1)
5322 {
5323 /* Initiate 1 byte overlap move.
5324 The first byte of DST is propagated through DSTP1.
5325 Prepare a movmem for: DST+1 = DST (length = LEN - 1).
5326 DST is set to size 1 so the rest of the memory location
5327 does not count as source operand. */
5328 rtx dstp1 = adjust_address (dst, VOIDmode, 1);
5329 set_mem_size (dst, 1);
5330
5331 emit_insn (gen_movmem_short (dstp1, dst,
5332 GEN_INT (INTVAL (len) - 2)));
5333 }
5334 }
5335 }
5336
5337 else if (TARGET_MVCLE)
5338 {
5339 val = force_not_mem (convert_modes (Pmode, QImode, val, 1));
5340 if (TARGET_64BIT)
5341 emit_insn (gen_setmem_long_di (dst, convert_to_mode (Pmode, len, 1),
5342 val));
5343 else
5344 emit_insn (gen_setmem_long_si (dst, convert_to_mode (Pmode, len, 1),
5345 val));
5346 }
5347
5348 else
5349 {
5350 rtx dst_addr, count, blocks, temp, dstp1 = NULL_RTX;
5351 rtx_code_label *loop_start_label = gen_label_rtx ();
5352 rtx_code_label *loop_end_label = gen_label_rtx ();
5353 rtx_code_label *end_label = gen_label_rtx ();
5354 machine_mode mode;
5355
5356 mode = GET_MODE (len);
5357 if (mode == VOIDmode)
5358 mode = Pmode;
5359
5360 dst_addr = gen_reg_rtx (Pmode);
5361 count = gen_reg_rtx (mode);
5362 blocks = gen_reg_rtx (mode);
5363
5364 convert_move (count, len, 1);
5365 emit_cmp_and_jump_insns (count, const0_rtx,
5366 EQ, NULL_RTX, mode, 1, end_label);
5367
5368 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
5369 dst = change_address (dst, VOIDmode, dst_addr);
5370
5371 if (val == const0_rtx)
5372 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5373 OPTAB_DIRECT);
5374 else
5375 {
5376 dstp1 = adjust_address (dst, VOIDmode, 1);
5377 set_mem_size (dst, 1);
5378
5379 /* Initialize memory by storing the first byte. */
5380 emit_move_insn (adjust_address (dst, QImode, 0), val);
5381
5382 /* If count is 1 we are done. */
5383 emit_cmp_and_jump_insns (count, const1_rtx,
5384 EQ, NULL_RTX, mode, 1, end_label);
5385
5386 temp = expand_binop (mode, add_optab, count, GEN_INT (-2), count, 1,
5387 OPTAB_DIRECT);
5388 }
5389 if (temp != count)
5390 emit_move_insn (count, temp);
5391
5392 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5393 OPTAB_DIRECT);
5394 if (temp != blocks)
5395 emit_move_insn (blocks, temp);
5396
5397 emit_cmp_and_jump_insns (blocks, const0_rtx,
5398 EQ, NULL_RTX, mode, 1, loop_end_label);
5399
5400 emit_label (loop_start_label);
5401
5402 if (TARGET_Z10
5403 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 1024))
5404 {
5405 /* Issue a write prefetch for the +4 cache line. */
5406 rtx prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr,
5407 GEN_INT (1024)),
5408 const1_rtx, const0_rtx);
5409 emit_insn (prefetch);
5410 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5411 }
5412
5413 if (val == const0_rtx)
5414 emit_insn (gen_clrmem_short (dst, GEN_INT (255)));
5415 else
5416 emit_insn (gen_movmem_short (dstp1, dst, GEN_INT (255)));
5417 s390_load_address (dst_addr,
5418 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
5419
5420 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5421 OPTAB_DIRECT);
5422 if (temp != blocks)
5423 emit_move_insn (blocks, temp);
5424
5425 emit_cmp_and_jump_insns (blocks, const0_rtx,
5426 EQ, NULL_RTX, mode, 1, loop_end_label);
5427
5428 emit_jump (loop_start_label);
5429 emit_label (loop_end_label);
5430
5431 if (val == const0_rtx)
5432 emit_insn (gen_clrmem_short (dst, convert_to_mode (Pmode, count, 1)));
5433 else
5434 emit_insn (gen_movmem_short (dstp1, dst, convert_to_mode (Pmode, count, 1)));
5435 emit_label (end_label);
5436 }
5437 }
5438
5439 /* Emit code to compare LEN bytes at OP0 with those at OP1,
5440 and return the result in TARGET. */
5441
5442 bool
5443 s390_expand_cmpmem (rtx target, rtx op0, rtx op1, rtx len)
5444 {
5445 rtx ccreg = gen_rtx_REG (CCUmode, CC_REGNUM);
5446 rtx tmp;
5447
5448 /* When tuning for z10 or higher we rely on the Glibc functions to
5449 do the right thing. Only for constant lengths below 64k we will
5450 generate inline code. */
5451 if (s390_tune >= PROCESSOR_2097_Z10
5452 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
5453 return false;
5454
5455 /* As the result of CMPINT is inverted compared to what we need,
5456 we have to swap the operands. */
5457 tmp = op0; op0 = op1; op1 = tmp;
5458
5459 if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
5460 {
5461 if (INTVAL (len) > 0)
5462 {
5463 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (INTVAL (len) - 1)));
5464 emit_insn (gen_cmpint (target, ccreg));
5465 }
5466 else
5467 emit_move_insn (target, const0_rtx);
5468 }
5469 else if (TARGET_MVCLE)
5470 {
5471 emit_insn (gen_cmpmem_long (op0, op1, convert_to_mode (Pmode, len, 1)));
5472 emit_insn (gen_cmpint (target, ccreg));
5473 }
5474 else
5475 {
5476 rtx addr0, addr1, count, blocks, temp;
5477 rtx_code_label *loop_start_label = gen_label_rtx ();
5478 rtx_code_label *loop_end_label = gen_label_rtx ();
5479 rtx_code_label *end_label = gen_label_rtx ();
5480 machine_mode mode;
5481
5482 mode = GET_MODE (len);
5483 if (mode == VOIDmode)
5484 mode = Pmode;
5485
5486 addr0 = gen_reg_rtx (Pmode);
5487 addr1 = gen_reg_rtx (Pmode);
5488 count = gen_reg_rtx (mode);
5489 blocks = gen_reg_rtx (mode);
5490
5491 convert_move (count, len, 1);
5492 emit_cmp_and_jump_insns (count, const0_rtx,
5493 EQ, NULL_RTX, mode, 1, end_label);
5494
5495 emit_move_insn (addr0, force_operand (XEXP (op0, 0), NULL_RTX));
5496 emit_move_insn (addr1, force_operand (XEXP (op1, 0), NULL_RTX));
5497 op0 = change_address (op0, VOIDmode, addr0);
5498 op1 = change_address (op1, VOIDmode, addr1);
5499
5500 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5501 OPTAB_DIRECT);
5502 if (temp != count)
5503 emit_move_insn (count, temp);
5504
5505 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5506 OPTAB_DIRECT);
5507 if (temp != blocks)
5508 emit_move_insn (blocks, temp);
5509
5510 emit_cmp_and_jump_insns (blocks, const0_rtx,
5511 EQ, NULL_RTX, mode, 1, loop_end_label);
5512
5513 emit_label (loop_start_label);
5514
5515 if (TARGET_Z10
5516 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 512))
5517 {
5518 rtx prefetch;
5519
5520 /* Issue a read prefetch for the +2 cache line of operand 1. */
5521 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr0, GEN_INT (512)),
5522 const0_rtx, const0_rtx);
5523 emit_insn (prefetch);
5524 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5525
5526 /* Issue a read prefetch for the +2 cache line of operand 2. */
5527 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr1, GEN_INT (512)),
5528 const0_rtx, const0_rtx);
5529 emit_insn (prefetch);
5530 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5531 }
5532
5533 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (255)));
5534 temp = gen_rtx_NE (VOIDmode, ccreg, const0_rtx);
5535 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
5536 gen_rtx_LABEL_REF (VOIDmode, end_label), pc_rtx);
5537 temp = gen_rtx_SET (pc_rtx, temp);
5538 emit_jump_insn (temp);
5539
5540 s390_load_address (addr0,
5541 gen_rtx_PLUS (Pmode, addr0, GEN_INT (256)));
5542 s390_load_address (addr1,
5543 gen_rtx_PLUS (Pmode, addr1, GEN_INT (256)));
5544
5545 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5546 OPTAB_DIRECT);
5547 if (temp != blocks)
5548 emit_move_insn (blocks, temp);
5549
5550 emit_cmp_and_jump_insns (blocks, const0_rtx,
5551 EQ, NULL_RTX, mode, 1, loop_end_label);
5552
5553 emit_jump (loop_start_label);
5554 emit_label (loop_end_label);
5555
5556 emit_insn (gen_cmpmem_short (op0, op1,
5557 convert_to_mode (Pmode, count, 1)));
5558 emit_label (end_label);
5559
5560 emit_insn (gen_cmpint (target, ccreg));
5561 }
5562 return true;
5563 }
5564
5565 /* Emit a conditional jump to LABEL for condition code mask MASK using
5566 comparsion operator COMPARISON. Return the emitted jump insn. */
5567
5568 static rtx
5569 s390_emit_ccraw_jump (HOST_WIDE_INT mask, enum rtx_code comparison, rtx label)
5570 {
5571 rtx temp;
5572
5573 gcc_assert (comparison == EQ || comparison == NE);
5574 gcc_assert (mask > 0 && mask < 15);
5575
5576 temp = gen_rtx_fmt_ee (comparison, VOIDmode,
5577 gen_rtx_REG (CCRAWmode, CC_REGNUM), GEN_INT (mask));
5578 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
5579 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
5580 temp = gen_rtx_SET (pc_rtx, temp);
5581 return emit_jump_insn (temp);
5582 }
5583
5584 /* Emit the instructions to implement strlen of STRING and store the
5585 result in TARGET. The string has the known ALIGNMENT. This
5586 version uses vector instructions and is therefore not appropriate
5587 for targets prior to z13. */
5588
5589 void
5590 s390_expand_vec_strlen (rtx target, rtx string, rtx alignment)
5591 {
5592 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
5593 int very_likely = REG_BR_PROB_BASE - 1;
5594 rtx highest_index_to_load_reg = gen_reg_rtx (Pmode);
5595 rtx str_reg = gen_reg_rtx (V16QImode);
5596 rtx str_addr_base_reg = gen_reg_rtx (Pmode);
5597 rtx str_idx_reg = gen_reg_rtx (Pmode);
5598 rtx result_reg = gen_reg_rtx (V16QImode);
5599 rtx is_aligned_label = gen_label_rtx ();
5600 rtx into_loop_label = NULL_RTX;
5601 rtx loop_start_label = gen_label_rtx ();
5602 rtx temp;
5603 rtx len = gen_reg_rtx (QImode);
5604 rtx cond;
5605
5606 s390_load_address (str_addr_base_reg, XEXP (string, 0));
5607 emit_move_insn (str_idx_reg, const0_rtx);
5608
5609 if (INTVAL (alignment) < 16)
5610 {
5611 /* Check whether the address happens to be aligned properly so
5612 jump directly to the aligned loop. */
5613 emit_cmp_and_jump_insns (gen_rtx_AND (Pmode,
5614 str_addr_base_reg, GEN_INT (15)),
5615 const0_rtx, EQ, NULL_RTX,
5616 Pmode, 1, is_aligned_label);
5617
5618 temp = gen_reg_rtx (Pmode);
5619 temp = expand_binop (Pmode, and_optab, str_addr_base_reg,
5620 GEN_INT (15), temp, 1, OPTAB_DIRECT);
5621 gcc_assert (REG_P (temp));
5622 highest_index_to_load_reg =
5623 expand_binop (Pmode, sub_optab, GEN_INT (15), temp,
5624 highest_index_to_load_reg, 1, OPTAB_DIRECT);
5625 gcc_assert (REG_P (highest_index_to_load_reg));
5626 emit_insn (gen_vllv16qi (str_reg,
5627 convert_to_mode (SImode, highest_index_to_load_reg, 1),
5628 gen_rtx_MEM (BLKmode, str_addr_base_reg)));
5629
5630 into_loop_label = gen_label_rtx ();
5631 s390_emit_jump (into_loop_label, NULL_RTX);
5632 emit_barrier ();
5633 }
5634
5635 emit_label (is_aligned_label);
5636 LABEL_NUSES (is_aligned_label) = INTVAL (alignment) < 16 ? 2 : 1;
5637
5638 /* Reaching this point we are only performing 16 bytes aligned
5639 loads. */
5640 emit_move_insn (highest_index_to_load_reg, GEN_INT (15));
5641
5642 emit_label (loop_start_label);
5643 LABEL_NUSES (loop_start_label) = 1;
5644
5645 /* Load 16 bytes of the string into VR. */
5646 emit_move_insn (str_reg,
5647 gen_rtx_MEM (V16QImode,
5648 gen_rtx_PLUS (Pmode, str_idx_reg,
5649 str_addr_base_reg)));
5650 if (into_loop_label != NULL_RTX)
5651 {
5652 emit_label (into_loop_label);
5653 LABEL_NUSES (into_loop_label) = 1;
5654 }
5655
5656 /* Increment string index by 16 bytes. */
5657 expand_binop (Pmode, add_optab, str_idx_reg, GEN_INT (16),
5658 str_idx_reg, 1, OPTAB_DIRECT);
5659
5660 emit_insn (gen_vec_vfenesv16qi (result_reg, str_reg, str_reg,
5661 GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
5662
5663 add_int_reg_note (s390_emit_ccraw_jump (8, NE, loop_start_label),
5664 REG_BR_PROB, very_likely);
5665 emit_insn (gen_vec_extractv16qi (len, result_reg, GEN_INT (7)));
5666
5667 /* If the string pointer wasn't aligned we have loaded less then 16
5668 bytes and the remaining bytes got filled with zeros (by vll).
5669 Now we have to check whether the resulting index lies within the
5670 bytes actually part of the string. */
5671
5672 cond = s390_emit_compare (GT, convert_to_mode (Pmode, len, 1),
5673 highest_index_to_load_reg);
5674 s390_load_address (highest_index_to_load_reg,
5675 gen_rtx_PLUS (Pmode, highest_index_to_load_reg,
5676 const1_rtx));
5677 if (TARGET_64BIT)
5678 emit_insn (gen_movdicc (str_idx_reg, cond,
5679 highest_index_to_load_reg, str_idx_reg));
5680 else
5681 emit_insn (gen_movsicc (str_idx_reg, cond,
5682 highest_index_to_load_reg, str_idx_reg));
5683
5684 add_int_reg_note (s390_emit_jump (is_aligned_label, cond), REG_BR_PROB,
5685 very_unlikely);
5686
5687 expand_binop (Pmode, add_optab, str_idx_reg,
5688 GEN_INT (-16), str_idx_reg, 1, OPTAB_DIRECT);
5689 /* FIXME: len is already zero extended - so avoid the llgcr emitted
5690 here. */
5691 temp = expand_binop (Pmode, add_optab, str_idx_reg,
5692 convert_to_mode (Pmode, len, 1),
5693 target, 1, OPTAB_DIRECT);
5694 if (temp != target)
5695 emit_move_insn (target, temp);
5696 }
5697
5698 void
5699 s390_expand_vec_movstr (rtx result, rtx dst, rtx src)
5700 {
5701 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
5702 rtx temp = gen_reg_rtx (Pmode);
5703 rtx src_addr = XEXP (src, 0);
5704 rtx dst_addr = XEXP (dst, 0);
5705 rtx src_addr_reg = gen_reg_rtx (Pmode);
5706 rtx dst_addr_reg = gen_reg_rtx (Pmode);
5707 rtx offset = gen_reg_rtx (Pmode);
5708 rtx vsrc = gen_reg_rtx (V16QImode);
5709 rtx vpos = gen_reg_rtx (V16QImode);
5710 rtx loadlen = gen_reg_rtx (SImode);
5711 rtx gpos_qi = gen_reg_rtx(QImode);
5712 rtx gpos = gen_reg_rtx (SImode);
5713 rtx done_label = gen_label_rtx ();
5714 rtx loop_label = gen_label_rtx ();
5715 rtx exit_label = gen_label_rtx ();
5716 rtx full_label = gen_label_rtx ();
5717
5718 /* Perform a quick check for string ending on the first up to 16
5719 bytes and exit early if successful. */
5720
5721 emit_insn (gen_vlbb (vsrc, src, GEN_INT (6)));
5722 emit_insn (gen_lcbb (loadlen, src_addr, GEN_INT (6)));
5723 emit_insn (gen_vfenezv16qi (vpos, vsrc, vsrc));
5724 emit_insn (gen_vec_extractv16qi (gpos_qi, vpos, GEN_INT (7)));
5725 emit_move_insn (gpos, gen_rtx_SUBREG (SImode, gpos_qi, 0));
5726 /* gpos is the byte index if a zero was found and 16 otherwise.
5727 So if it is lower than the loaded bytes we have a hit. */
5728 emit_cmp_and_jump_insns (gpos, loadlen, GE, NULL_RTX, SImode, 1,
5729 full_label);
5730 emit_insn (gen_vstlv16qi (vsrc, gpos, dst));
5731
5732 force_expand_binop (Pmode, add_optab, dst_addr, gpos, result,
5733 1, OPTAB_DIRECT);
5734 emit_jump (exit_label);
5735 emit_barrier ();
5736
5737 emit_label (full_label);
5738 LABEL_NUSES (full_label) = 1;
5739
5740 /* Calculate `offset' so that src + offset points to the last byte
5741 before 16 byte alignment. */
5742
5743 /* temp = src_addr & 0xf */
5744 force_expand_binop (Pmode, and_optab, src_addr, GEN_INT (15), temp,
5745 1, OPTAB_DIRECT);
5746
5747 /* offset = 0xf - temp */
5748 emit_move_insn (offset, GEN_INT (15));
5749 force_expand_binop (Pmode, sub_optab, offset, temp, offset,
5750 1, OPTAB_DIRECT);
5751
5752 /* Store `offset' bytes in the dstination string. The quick check
5753 has loaded at least `offset' bytes into vsrc. */
5754
5755 emit_insn (gen_vstlv16qi (vsrc, gen_lowpart (SImode, offset), dst));
5756
5757 /* Advance to the next byte to be loaded. */
5758 force_expand_binop (Pmode, add_optab, offset, const1_rtx, offset,
5759 1, OPTAB_DIRECT);
5760
5761 /* Make sure the addresses are single regs which can be used as a
5762 base. */
5763 emit_move_insn (src_addr_reg, src_addr);
5764 emit_move_insn (dst_addr_reg, dst_addr);
5765
5766 /* MAIN LOOP */
5767
5768 emit_label (loop_label);
5769 LABEL_NUSES (loop_label) = 1;
5770
5771 emit_move_insn (vsrc,
5772 gen_rtx_MEM (V16QImode,
5773 gen_rtx_PLUS (Pmode, src_addr_reg, offset)));
5774
5775 emit_insn (gen_vec_vfenesv16qi (vpos, vsrc, vsrc,
5776 GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
5777 add_int_reg_note (s390_emit_ccraw_jump (8, EQ, done_label),
5778 REG_BR_PROB, very_unlikely);
5779
5780 emit_move_insn (gen_rtx_MEM (V16QImode,
5781 gen_rtx_PLUS (Pmode, dst_addr_reg, offset)),
5782 vsrc);
5783 /* offset += 16 */
5784 force_expand_binop (Pmode, add_optab, offset, GEN_INT (16),
5785 offset, 1, OPTAB_DIRECT);
5786
5787 emit_jump (loop_label);
5788 emit_barrier ();
5789
5790 /* REGULAR EXIT */
5791
5792 /* We are done. Add the offset of the zero character to the dst_addr
5793 pointer to get the result. */
5794
5795 emit_label (done_label);
5796 LABEL_NUSES (done_label) = 1;
5797
5798 force_expand_binop (Pmode, add_optab, dst_addr_reg, offset, dst_addr_reg,
5799 1, OPTAB_DIRECT);
5800
5801 emit_insn (gen_vec_extractv16qi (gpos_qi, vpos, GEN_INT (7)));
5802 emit_move_insn (gpos, gen_rtx_SUBREG (SImode, gpos_qi, 0));
5803
5804 emit_insn (gen_vstlv16qi (vsrc, gpos, gen_rtx_MEM (BLKmode, dst_addr_reg)));
5805
5806 force_expand_binop (Pmode, add_optab, dst_addr_reg, gpos, result,
5807 1, OPTAB_DIRECT);
5808
5809 /* EARLY EXIT */
5810
5811 emit_label (exit_label);
5812 LABEL_NUSES (exit_label) = 1;
5813 }
5814
5815
5816 /* Expand conditional increment or decrement using alc/slb instructions.
5817 Should generate code setting DST to either SRC or SRC + INCREMENT,
5818 depending on the result of the comparison CMP_OP0 CMP_CODE CMP_OP1.
5819 Returns true if successful, false otherwise.
5820
5821 That makes it possible to implement some if-constructs without jumps e.g.:
5822 (borrow = CC0 | CC1 and carry = CC2 | CC3)
5823 unsigned int a, b, c;
5824 if (a < b) c++; -> CCU b > a -> CC2; c += carry;
5825 if (a < b) c--; -> CCL3 a - b -> borrow; c -= borrow;
5826 if (a <= b) c++; -> CCL3 b - a -> borrow; c += carry;
5827 if (a <= b) c--; -> CCU a <= b -> borrow; c -= borrow;
5828
5829 Checks for EQ and NE with a nonzero value need an additional xor e.g.:
5830 if (a == b) c++; -> CCL3 a ^= b; 0 - a -> borrow; c += carry;
5831 if (a == b) c--; -> CCU a ^= b; a <= 0 -> CC0 | CC1; c -= borrow;
5832 if (a != b) c++; -> CCU a ^= b; a > 0 -> CC2; c += carry;
5833 if (a != b) c--; -> CCL3 a ^= b; 0 - a -> borrow; c -= borrow; */
5834
5835 bool
5836 s390_expand_addcc (enum rtx_code cmp_code, rtx cmp_op0, rtx cmp_op1,
5837 rtx dst, rtx src, rtx increment)
5838 {
5839 machine_mode cmp_mode;
5840 machine_mode cc_mode;
5841 rtx op_res;
5842 rtx insn;
5843 rtvec p;
5844 int ret;
5845
5846 if ((GET_MODE (cmp_op0) == SImode || GET_MODE (cmp_op0) == VOIDmode)
5847 && (GET_MODE (cmp_op1) == SImode || GET_MODE (cmp_op1) == VOIDmode))
5848 cmp_mode = SImode;
5849 else if ((GET_MODE (cmp_op0) == DImode || GET_MODE (cmp_op0) == VOIDmode)
5850 && (GET_MODE (cmp_op1) == DImode || GET_MODE (cmp_op1) == VOIDmode))
5851 cmp_mode = DImode;
5852 else
5853 return false;
5854
5855 /* Try ADD LOGICAL WITH CARRY. */
5856 if (increment == const1_rtx)
5857 {
5858 /* Determine CC mode to use. */
5859 if (cmp_code == EQ || cmp_code == NE)
5860 {
5861 if (cmp_op1 != const0_rtx)
5862 {
5863 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
5864 NULL_RTX, 0, OPTAB_WIDEN);
5865 cmp_op1 = const0_rtx;
5866 }
5867
5868 cmp_code = cmp_code == EQ ? LEU : GTU;
5869 }
5870
5871 if (cmp_code == LTU || cmp_code == LEU)
5872 {
5873 rtx tem = cmp_op0;
5874 cmp_op0 = cmp_op1;
5875 cmp_op1 = tem;
5876 cmp_code = swap_condition (cmp_code);
5877 }
5878
5879 switch (cmp_code)
5880 {
5881 case GTU:
5882 cc_mode = CCUmode;
5883 break;
5884
5885 case GEU:
5886 cc_mode = CCL3mode;
5887 break;
5888
5889 default:
5890 return false;
5891 }
5892
5893 /* Emit comparison instruction pattern. */
5894 if (!register_operand (cmp_op0, cmp_mode))
5895 cmp_op0 = force_reg (cmp_mode, cmp_op0);
5896
5897 insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
5898 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
5899 /* We use insn_invalid_p here to add clobbers if required. */
5900 ret = insn_invalid_p (emit_insn (insn), false);
5901 gcc_assert (!ret);
5902
5903 /* Emit ALC instruction pattern. */
5904 op_res = gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
5905 gen_rtx_REG (cc_mode, CC_REGNUM),
5906 const0_rtx);
5907
5908 if (src != const0_rtx)
5909 {
5910 if (!register_operand (src, GET_MODE (dst)))
5911 src = force_reg (GET_MODE (dst), src);
5912
5913 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, src);
5914 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, const0_rtx);
5915 }
5916
5917 p = rtvec_alloc (2);
5918 RTVEC_ELT (p, 0) =
5919 gen_rtx_SET (dst, op_res);
5920 RTVEC_ELT (p, 1) =
5921 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
5922 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
5923
5924 return true;
5925 }
5926
5927 /* Try SUBTRACT LOGICAL WITH BORROW. */
5928 if (increment == constm1_rtx)
5929 {
5930 /* Determine CC mode to use. */
5931 if (cmp_code == EQ || cmp_code == NE)
5932 {
5933 if (cmp_op1 != const0_rtx)
5934 {
5935 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
5936 NULL_RTX, 0, OPTAB_WIDEN);
5937 cmp_op1 = const0_rtx;
5938 }
5939
5940 cmp_code = cmp_code == EQ ? LEU : GTU;
5941 }
5942
5943 if (cmp_code == GTU || cmp_code == GEU)
5944 {
5945 rtx tem = cmp_op0;
5946 cmp_op0 = cmp_op1;
5947 cmp_op1 = tem;
5948 cmp_code = swap_condition (cmp_code);
5949 }
5950
5951 switch (cmp_code)
5952 {
5953 case LEU:
5954 cc_mode = CCUmode;
5955 break;
5956
5957 case LTU:
5958 cc_mode = CCL3mode;
5959 break;
5960
5961 default:
5962 return false;
5963 }
5964
5965 /* Emit comparison instruction pattern. */
5966 if (!register_operand (cmp_op0, cmp_mode))
5967 cmp_op0 = force_reg (cmp_mode, cmp_op0);
5968
5969 insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
5970 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
5971 /* We use insn_invalid_p here to add clobbers if required. */
5972 ret = insn_invalid_p (emit_insn (insn), false);
5973 gcc_assert (!ret);
5974
5975 /* Emit SLB instruction pattern. */
5976 if (!register_operand (src, GET_MODE (dst)))
5977 src = force_reg (GET_MODE (dst), src);
5978
5979 op_res = gen_rtx_MINUS (GET_MODE (dst),
5980 gen_rtx_MINUS (GET_MODE (dst), src, const0_rtx),
5981 gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
5982 gen_rtx_REG (cc_mode, CC_REGNUM),
5983 const0_rtx));
5984 p = rtvec_alloc (2);
5985 RTVEC_ELT (p, 0) =
5986 gen_rtx_SET (dst, op_res);
5987 RTVEC_ELT (p, 1) =
5988 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
5989 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
5990
5991 return true;
5992 }
5993
5994 return false;
5995 }
5996
5997 /* Expand code for the insv template. Return true if successful. */
5998
5999 bool
6000 s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src)
6001 {
6002 int bitsize = INTVAL (op1);
6003 int bitpos = INTVAL (op2);
6004 machine_mode mode = GET_MODE (dest);
6005 machine_mode smode;
6006 int smode_bsize, mode_bsize;
6007 rtx op, clobber;
6008
6009 if (bitsize + bitpos > GET_MODE_BITSIZE (mode))
6010 return false;
6011
6012 /* Generate INSERT IMMEDIATE (IILL et al). */
6013 /* (set (ze (reg)) (const_int)). */
6014 if (TARGET_ZARCH
6015 && register_operand (dest, word_mode)
6016 && (bitpos % 16) == 0
6017 && (bitsize % 16) == 0
6018 && const_int_operand (src, VOIDmode))
6019 {
6020 HOST_WIDE_INT val = INTVAL (src);
6021 int regpos = bitpos + bitsize;
6022
6023 while (regpos > bitpos)
6024 {
6025 machine_mode putmode;
6026 int putsize;
6027
6028 if (TARGET_EXTIMM && (regpos % 32 == 0) && (regpos >= bitpos + 32))
6029 putmode = SImode;
6030 else
6031 putmode = HImode;
6032
6033 putsize = GET_MODE_BITSIZE (putmode);
6034 regpos -= putsize;
6035 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
6036 GEN_INT (putsize),
6037 GEN_INT (regpos)),
6038 gen_int_mode (val, putmode));
6039 val >>= putsize;
6040 }
6041 gcc_assert (regpos == bitpos);
6042 return true;
6043 }
6044
6045 smode = smallest_mode_for_size (bitsize, MODE_INT);
6046 smode_bsize = GET_MODE_BITSIZE (smode);
6047 mode_bsize = GET_MODE_BITSIZE (mode);
6048
6049 /* Generate STORE CHARACTERS UNDER MASK (STCM et al). */
6050 if (bitpos == 0
6051 && (bitsize % BITS_PER_UNIT) == 0
6052 && MEM_P (dest)
6053 && (register_operand (src, word_mode)
6054 || const_int_operand (src, VOIDmode)))
6055 {
6056 /* Emit standard pattern if possible. */
6057 if (smode_bsize == bitsize)
6058 {
6059 emit_move_insn (adjust_address (dest, smode, 0),
6060 gen_lowpart (smode, src));
6061 return true;
6062 }
6063
6064 /* (set (ze (mem)) (const_int)). */
6065 else if (const_int_operand (src, VOIDmode))
6066 {
6067 int size = bitsize / BITS_PER_UNIT;
6068 rtx src_mem = adjust_address (force_const_mem (word_mode, src),
6069 BLKmode,
6070 UNITS_PER_WORD - size);
6071
6072 dest = adjust_address (dest, BLKmode, 0);
6073 set_mem_size (dest, size);
6074 s390_expand_movmem (dest, src_mem, GEN_INT (size));
6075 return true;
6076 }
6077
6078 /* (set (ze (mem)) (reg)). */
6079 else if (register_operand (src, word_mode))
6080 {
6081 if (bitsize <= 32)
6082 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest, op1,
6083 const0_rtx), src);
6084 else
6085 {
6086 /* Emit st,stcmh sequence. */
6087 int stcmh_width = bitsize - 32;
6088 int size = stcmh_width / BITS_PER_UNIT;
6089
6090 emit_move_insn (adjust_address (dest, SImode, size),
6091 gen_lowpart (SImode, src));
6092 set_mem_size (dest, size);
6093 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
6094 GEN_INT (stcmh_width),
6095 const0_rtx),
6096 gen_rtx_LSHIFTRT (word_mode, src, GEN_INT (32)));
6097 }
6098 return true;
6099 }
6100 }
6101
6102 /* Generate INSERT CHARACTERS UNDER MASK (IC, ICM et al). */
6103 if ((bitpos % BITS_PER_UNIT) == 0
6104 && (bitsize % BITS_PER_UNIT) == 0
6105 && (bitpos & 32) == ((bitpos + bitsize - 1) & 32)
6106 && MEM_P (src)
6107 && (mode == DImode || mode == SImode)
6108 && register_operand (dest, mode))
6109 {
6110 /* Emit a strict_low_part pattern if possible. */
6111 if (smode_bsize == bitsize && bitpos == mode_bsize - smode_bsize)
6112 {
6113 op = gen_rtx_STRICT_LOW_PART (VOIDmode, gen_lowpart (smode, dest));
6114 op = gen_rtx_SET (op, gen_lowpart (smode, src));
6115 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6116 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber)));
6117 return true;
6118 }
6119
6120 /* ??? There are more powerful versions of ICM that are not
6121 completely represented in the md file. */
6122 }
6123
6124 /* For z10, generate ROTATE THEN INSERT SELECTED BITS (RISBG et al). */
6125 if (TARGET_Z10 && (mode == DImode || mode == SImode))
6126 {
6127 machine_mode mode_s = GET_MODE (src);
6128
6129 if (CONSTANT_P (src))
6130 {
6131 /* For constant zero values the representation with AND
6132 appears to be folded in more situations than the (set
6133 (zero_extract) ...).
6134 We only do this when the start and end of the bitfield
6135 remain in the same SImode chunk. That way nihf or nilf
6136 can be used.
6137 The AND patterns might still generate a risbg for this. */
6138 if (src == const0_rtx && bitpos / 32 == (bitpos + bitsize - 1) / 32)
6139 return false;
6140 else
6141 src = force_reg (mode, src);
6142 }
6143 else if (mode_s != mode)
6144 {
6145 gcc_assert (GET_MODE_BITSIZE (mode_s) >= bitsize);
6146 src = force_reg (mode_s, src);
6147 src = gen_lowpart (mode, src);
6148 }
6149
6150 op = gen_rtx_ZERO_EXTRACT (mode, dest, op1, op2),
6151 op = gen_rtx_SET (op, src);
6152
6153 if (!TARGET_ZEC12)
6154 {
6155 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6156 op = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber));
6157 }
6158 emit_insn (op);
6159
6160 return true;
6161 }
6162
6163 return false;
6164 }
6165
6166 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic which returns a
6167 register that holds VAL of mode MODE shifted by COUNT bits. */
6168
6169 static inline rtx
6170 s390_expand_mask_and_shift (rtx val, machine_mode mode, rtx count)
6171 {
6172 val = expand_simple_binop (SImode, AND, val, GEN_INT (GET_MODE_MASK (mode)),
6173 NULL_RTX, 1, OPTAB_DIRECT);
6174 return expand_simple_binop (SImode, ASHIFT, val, count,
6175 NULL_RTX, 1, OPTAB_DIRECT);
6176 }
6177
6178 /* Generate a vector comparison COND of CMP_OP1 and CMP_OP2 and store
6179 the result in TARGET. */
6180
6181 void
6182 s390_expand_vec_compare (rtx target, enum rtx_code cond,
6183 rtx cmp_op1, rtx cmp_op2)
6184 {
6185 machine_mode mode = GET_MODE (target);
6186 bool neg_p = false, swap_p = false;
6187 rtx tmp;
6188
6189 if (GET_MODE (cmp_op1) == V2DFmode)
6190 {
6191 switch (cond)
6192 {
6193 /* NE a != b -> !(a == b) */
6194 case NE: cond = EQ; neg_p = true; break;
6195 /* UNGT a u> b -> !(b >= a) */
6196 case UNGT: cond = GE; neg_p = true; swap_p = true; break;
6197 /* UNGE a u>= b -> !(b > a) */
6198 case UNGE: cond = GT; neg_p = true; swap_p = true; break;
6199 /* LE: a <= b -> b >= a */
6200 case LE: cond = GE; swap_p = true; break;
6201 /* UNLE: a u<= b -> !(a > b) */
6202 case UNLE: cond = GT; neg_p = true; break;
6203 /* LT: a < b -> b > a */
6204 case LT: cond = GT; swap_p = true; break;
6205 /* UNLT: a u< b -> !(a >= b) */
6206 case UNLT: cond = GE; neg_p = true; break;
6207 case UNEQ:
6208 emit_insn (gen_vec_cmpuneqv2df (target, cmp_op1, cmp_op2));
6209 return;
6210 case LTGT:
6211 emit_insn (gen_vec_cmpltgtv2df (target, cmp_op1, cmp_op2));
6212 return;
6213 case ORDERED:
6214 emit_insn (gen_vec_orderedv2df (target, cmp_op1, cmp_op2));
6215 return;
6216 case UNORDERED:
6217 emit_insn (gen_vec_unorderedv2df (target, cmp_op1, cmp_op2));
6218 return;
6219 default: break;
6220 }
6221 }
6222 else
6223 {
6224 switch (cond)
6225 {
6226 /* NE: a != b -> !(a == b) */
6227 case NE: cond = EQ; neg_p = true; break;
6228 /* GE: a >= b -> !(b > a) */
6229 case GE: cond = GT; neg_p = true; swap_p = true; break;
6230 /* GEU: a >= b -> !(b > a) */
6231 case GEU: cond = GTU; neg_p = true; swap_p = true; break;
6232 /* LE: a <= b -> !(a > b) */
6233 case LE: cond = GT; neg_p = true; break;
6234 /* LEU: a <= b -> !(a > b) */
6235 case LEU: cond = GTU; neg_p = true; break;
6236 /* LT: a < b -> b > a */
6237 case LT: cond = GT; swap_p = true; break;
6238 /* LTU: a < b -> b > a */
6239 case LTU: cond = GTU; swap_p = true; break;
6240 default: break;
6241 }
6242 }
6243
6244 if (swap_p)
6245 {
6246 tmp = cmp_op1; cmp_op1 = cmp_op2; cmp_op2 = tmp;
6247 }
6248
6249 emit_insn (gen_rtx_SET (target, gen_rtx_fmt_ee (cond,
6250 mode,
6251 cmp_op1, cmp_op2)));
6252 if (neg_p)
6253 emit_insn (gen_rtx_SET (target, gen_rtx_NOT (mode, target)));
6254 }
6255
6256 /* Expand the comparison CODE of CMP1 and CMP2 and copy 1 or 0 into
6257 TARGET if either all (ALL_P is true) or any (ALL_P is false) of the
6258 elements in CMP1 and CMP2 fulfill the comparison. */
6259 void
6260 s390_expand_vec_compare_cc (rtx target, enum rtx_code code,
6261 rtx cmp1, rtx cmp2, bool all_p)
6262 {
6263 enum rtx_code new_code = code;
6264 machine_mode cmp_mode, full_cmp_mode, scratch_mode;
6265 rtx tmp_reg = gen_reg_rtx (SImode);
6266 bool swap_p = false;
6267
6268 if (GET_MODE_CLASS (GET_MODE (cmp1)) == MODE_VECTOR_INT)
6269 {
6270 switch (code)
6271 {
6272 case EQ: cmp_mode = CCVEQmode; break;
6273 case NE: cmp_mode = CCVEQmode; break;
6274 case GT: cmp_mode = CCVHmode; break;
6275 case GE: cmp_mode = CCVHmode; new_code = LE; swap_p = true; break;
6276 case LT: cmp_mode = CCVHmode; new_code = GT; swap_p = true; break;
6277 case LE: cmp_mode = CCVHmode; new_code = LE; break;
6278 case GTU: cmp_mode = CCVHUmode; break;
6279 case GEU: cmp_mode = CCVHUmode; new_code = LEU; swap_p = true; break;
6280 case LTU: cmp_mode = CCVHUmode; new_code = GTU; swap_p = true; break;
6281 case LEU: cmp_mode = CCVHUmode; new_code = LEU; break;
6282 default: gcc_unreachable ();
6283 }
6284 scratch_mode = GET_MODE (cmp1);
6285 }
6286 else if (GET_MODE (cmp1) == V2DFmode)
6287 {
6288 switch (code)
6289 {
6290 case EQ: cmp_mode = CCVEQmode; break;
6291 case NE: cmp_mode = CCVEQmode; break;
6292 case GT: cmp_mode = CCVFHmode; break;
6293 case GE: cmp_mode = CCVFHEmode; break;
6294 case UNLE: cmp_mode = CCVFHmode; break;
6295 case UNLT: cmp_mode = CCVFHEmode; break;
6296 case LT: cmp_mode = CCVFHmode; new_code = GT; swap_p = true; break;
6297 case LE: cmp_mode = CCVFHEmode; new_code = GE; swap_p = true; break;
6298 default: gcc_unreachable ();
6299 }
6300 scratch_mode = V2DImode;
6301 }
6302 else
6303 gcc_unreachable ();
6304
6305 if (!all_p)
6306 switch (cmp_mode)
6307 {
6308 case CCVEQmode: full_cmp_mode = CCVEQANYmode; break;
6309 case CCVHmode: full_cmp_mode = CCVHANYmode; break;
6310 case CCVHUmode: full_cmp_mode = CCVHUANYmode; break;
6311 case CCVFHmode: full_cmp_mode = CCVFHANYmode; break;
6312 case CCVFHEmode: full_cmp_mode = CCVFHEANYmode; break;
6313 default: gcc_unreachable ();
6314 }
6315 else
6316 /* The modes without ANY match the ALL modes. */
6317 full_cmp_mode = cmp_mode;
6318
6319 if (swap_p)
6320 {
6321 rtx tmp = cmp2;
6322 cmp2 = cmp1;
6323 cmp1 = tmp;
6324 }
6325
6326 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6327 gen_rtvec (2, gen_rtx_SET (
6328 gen_rtx_REG (cmp_mode, CC_REGNUM),
6329 gen_rtx_COMPARE (cmp_mode, cmp1, cmp2)),
6330 gen_rtx_CLOBBER (VOIDmode,
6331 gen_rtx_SCRATCH (scratch_mode)))));
6332 emit_move_insn (target, const0_rtx);
6333 emit_move_insn (tmp_reg, const1_rtx);
6334
6335 emit_move_insn (target,
6336 gen_rtx_IF_THEN_ELSE (SImode,
6337 gen_rtx_fmt_ee (new_code, VOIDmode,
6338 gen_rtx_REG (full_cmp_mode, CC_REGNUM),
6339 const0_rtx),
6340 target, tmp_reg));
6341 }
6342
6343 /* Generate a vector comparison expression loading either elements of
6344 THEN or ELS into TARGET depending on the comparison COND of CMP_OP1
6345 and CMP_OP2. */
6346
6347 void
6348 s390_expand_vcond (rtx target, rtx then, rtx els,
6349 enum rtx_code cond, rtx cmp_op1, rtx cmp_op2)
6350 {
6351 rtx tmp;
6352 machine_mode result_mode;
6353 rtx result_target;
6354
6355 machine_mode target_mode = GET_MODE (target);
6356 machine_mode cmp_mode = GET_MODE (cmp_op1);
6357 rtx op = (cond == LT) ? els : then;
6358
6359 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
6360 and x < 0 ? 1 : 0 into (unsigned) x >> 31. Likewise
6361 for short and byte (x >> 15 and x >> 7 respectively). */
6362 if ((cond == LT || cond == GE)
6363 && target_mode == cmp_mode
6364 && cmp_op2 == CONST0_RTX (cmp_mode)
6365 && op == CONST0_RTX (target_mode)
6366 && s390_vector_mode_supported_p (target_mode)
6367 && GET_MODE_CLASS (target_mode) == MODE_VECTOR_INT)
6368 {
6369 rtx negop = (cond == LT) ? then : els;
6370
6371 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (target_mode)) - 1;
6372
6373 /* if x < 0 ? 1 : 0 or if x >= 0 ? 0 : 1 */
6374 if (negop == CONST1_RTX (target_mode))
6375 {
6376 rtx res = expand_simple_binop (cmp_mode, LSHIFTRT, cmp_op1,
6377 GEN_INT (shift), target,
6378 1, OPTAB_DIRECT);
6379 if (res != target)
6380 emit_move_insn (target, res);
6381 return;
6382 }
6383
6384 /* if x < 0 ? -1 : 0 or if x >= 0 ? 0 : -1 */
6385 else if (all_ones_operand (negop, target_mode))
6386 {
6387 rtx res = expand_simple_binop (cmp_mode, ASHIFTRT, cmp_op1,
6388 GEN_INT (shift), target,
6389 0, OPTAB_DIRECT);
6390 if (res != target)
6391 emit_move_insn (target, res);
6392 return;
6393 }
6394 }
6395
6396 /* We always use an integral type vector to hold the comparison
6397 result. */
6398 result_mode = cmp_mode == V2DFmode ? V2DImode : cmp_mode;
6399 result_target = gen_reg_rtx (result_mode);
6400
6401 /* We allow vector immediates as comparison operands that
6402 can be handled by the optimization above but not by the
6403 following code. Hence, force them into registers here. */
6404 if (!REG_P (cmp_op1))
6405 cmp_op1 = force_reg (GET_MODE (cmp_op1), cmp_op1);
6406
6407 if (!REG_P (cmp_op2))
6408 cmp_op2 = force_reg (GET_MODE (cmp_op2), cmp_op2);
6409
6410 s390_expand_vec_compare (result_target, cond,
6411 cmp_op1, cmp_op2);
6412
6413 /* If the results are supposed to be either -1 or 0 we are done
6414 since this is what our compare instructions generate anyway. */
6415 if (all_ones_operand (then, GET_MODE (then))
6416 && const0_operand (els, GET_MODE (els)))
6417 {
6418 emit_move_insn (target, gen_rtx_SUBREG (target_mode,
6419 result_target, 0));
6420 return;
6421 }
6422
6423 /* Otherwise we will do a vsel afterwards. */
6424 /* This gets triggered e.g.
6425 with gcc.c-torture/compile/pr53410-1.c */
6426 if (!REG_P (then))
6427 then = force_reg (target_mode, then);
6428
6429 if (!REG_P (els))
6430 els = force_reg (target_mode, els);
6431
6432 tmp = gen_rtx_fmt_ee (EQ, VOIDmode,
6433 result_target,
6434 CONST0_RTX (result_mode));
6435
6436 /* We compared the result against zero above so we have to swap then
6437 and els here. */
6438 tmp = gen_rtx_IF_THEN_ELSE (target_mode, tmp, els, then);
6439
6440 gcc_assert (target_mode == GET_MODE (then));
6441 emit_insn (gen_rtx_SET (target, tmp));
6442 }
6443
6444 /* Emit the RTX necessary to initialize the vector TARGET with values
6445 in VALS. */
6446 void
6447 s390_expand_vec_init (rtx target, rtx vals)
6448 {
6449 machine_mode mode = GET_MODE (target);
6450 machine_mode inner_mode = GET_MODE_INNER (mode);
6451 int n_elts = GET_MODE_NUNITS (mode);
6452 bool all_same = true, all_regs = true, all_const_int = true;
6453 rtx x;
6454 int i;
6455
6456 for (i = 0; i < n_elts; ++i)
6457 {
6458 x = XVECEXP (vals, 0, i);
6459
6460 if (!CONST_INT_P (x))
6461 all_const_int = false;
6462
6463 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6464 all_same = false;
6465
6466 if (!REG_P (x))
6467 all_regs = false;
6468 }
6469
6470 /* Use vector gen mask or vector gen byte mask if possible. */
6471 if (all_same && all_const_int
6472 && (XVECEXP (vals, 0, 0) == const0_rtx
6473 || s390_contiguous_bitmask_vector_p (XVECEXP (vals, 0, 0),
6474 NULL, NULL)
6475 || s390_bytemask_vector_p (XVECEXP (vals, 0, 0), NULL)))
6476 {
6477 emit_insn (gen_rtx_SET (target,
6478 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0))));
6479 return;
6480 }
6481
6482 if (all_same)
6483 {
6484 emit_insn (gen_rtx_SET (target,
6485 gen_rtx_VEC_DUPLICATE (mode,
6486 XVECEXP (vals, 0, 0))));
6487 return;
6488 }
6489
6490 if (all_regs && REG_P (target) && n_elts == 2 && inner_mode == DImode)
6491 {
6492 /* Use vector load pair. */
6493 emit_insn (gen_rtx_SET (target,
6494 gen_rtx_VEC_CONCAT (mode,
6495 XVECEXP (vals, 0, 0),
6496 XVECEXP (vals, 0, 1))));
6497 return;
6498 }
6499
6500 /* We are about to set the vector elements one by one. Zero out the
6501 full register first in order to help the data flow framework to
6502 detect it as full VR set. */
6503 emit_insn (gen_rtx_SET (target, CONST0_RTX (mode)));
6504
6505 /* Unfortunately the vec_init expander is not allowed to fail. So
6506 we have to implement the fallback ourselves. */
6507 for (i = 0; i < n_elts; i++)
6508 {
6509 rtx elem = XVECEXP (vals, 0, i);
6510 if (!general_operand (elem, GET_MODE (elem)))
6511 elem = force_reg (inner_mode, elem);
6512
6513 emit_insn (gen_rtx_SET (target,
6514 gen_rtx_UNSPEC (mode,
6515 gen_rtvec (3, elem,
6516 GEN_INT (i), target),
6517 UNSPEC_VEC_SET)));
6518 }
6519 }
6520
6521 /* Structure to hold the initial parameters for a compare_and_swap operation
6522 in HImode and QImode. */
6523
6524 struct alignment_context
6525 {
6526 rtx memsi; /* SI aligned memory location. */
6527 rtx shift; /* Bit offset with regard to lsb. */
6528 rtx modemask; /* Mask of the HQImode shifted by SHIFT bits. */
6529 rtx modemaski; /* ~modemask */
6530 bool aligned; /* True if memory is aligned, false else. */
6531 };
6532
6533 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic to initialize
6534 structure AC for transparent simplifying, if the memory alignment is known
6535 to be at least 32bit. MEM is the memory location for the actual operation
6536 and MODE its mode. */
6537
6538 static void
6539 init_alignment_context (struct alignment_context *ac, rtx mem,
6540 machine_mode mode)
6541 {
6542 ac->shift = GEN_INT (GET_MODE_SIZE (SImode) - GET_MODE_SIZE (mode));
6543 ac->aligned = (MEM_ALIGN (mem) >= GET_MODE_BITSIZE (SImode));
6544
6545 if (ac->aligned)
6546 ac->memsi = adjust_address (mem, SImode, 0); /* Memory is aligned. */
6547 else
6548 {
6549 /* Alignment is unknown. */
6550 rtx byteoffset, addr, align;
6551
6552 /* Force the address into a register. */
6553 addr = force_reg (Pmode, XEXP (mem, 0));
6554
6555 /* Align it to SImode. */
6556 align = expand_simple_binop (Pmode, AND, addr,
6557 GEN_INT (-GET_MODE_SIZE (SImode)),
6558 NULL_RTX, 1, OPTAB_DIRECT);
6559 /* Generate MEM. */
6560 ac->memsi = gen_rtx_MEM (SImode, align);
6561 MEM_VOLATILE_P (ac->memsi) = MEM_VOLATILE_P (mem);
6562 set_mem_alias_set (ac->memsi, ALIAS_SET_MEMORY_BARRIER);
6563 set_mem_align (ac->memsi, GET_MODE_BITSIZE (SImode));
6564
6565 /* Calculate shiftcount. */
6566 byteoffset = expand_simple_binop (Pmode, AND, addr,
6567 GEN_INT (GET_MODE_SIZE (SImode) - 1),
6568 NULL_RTX, 1, OPTAB_DIRECT);
6569 /* As we already have some offset, evaluate the remaining distance. */
6570 ac->shift = expand_simple_binop (SImode, MINUS, ac->shift, byteoffset,
6571 NULL_RTX, 1, OPTAB_DIRECT);
6572 }
6573
6574 /* Shift is the byte count, but we need the bitcount. */
6575 ac->shift = expand_simple_binop (SImode, ASHIFT, ac->shift, GEN_INT (3),
6576 NULL_RTX, 1, OPTAB_DIRECT);
6577
6578 /* Calculate masks. */
6579 ac->modemask = expand_simple_binop (SImode, ASHIFT,
6580 GEN_INT (GET_MODE_MASK (mode)),
6581 ac->shift, NULL_RTX, 1, OPTAB_DIRECT);
6582 ac->modemaski = expand_simple_unop (SImode, NOT, ac->modemask,
6583 NULL_RTX, 1);
6584 }
6585
6586 /* A subroutine of s390_expand_cs_hqi. Insert INS into VAL. If possible,
6587 use a single insv insn into SEQ2. Otherwise, put prep insns in SEQ1 and
6588 perform the merge in SEQ2. */
6589
6590 static rtx
6591 s390_two_part_insv (struct alignment_context *ac, rtx *seq1, rtx *seq2,
6592 machine_mode mode, rtx val, rtx ins)
6593 {
6594 rtx tmp;
6595
6596 if (ac->aligned)
6597 {
6598 start_sequence ();
6599 tmp = copy_to_mode_reg (SImode, val);
6600 if (s390_expand_insv (tmp, GEN_INT (GET_MODE_BITSIZE (mode)),
6601 const0_rtx, ins))
6602 {
6603 *seq1 = NULL;
6604 *seq2 = get_insns ();
6605 end_sequence ();
6606 return tmp;
6607 }
6608 end_sequence ();
6609 }
6610
6611 /* Failed to use insv. Generate a two part shift and mask. */
6612 start_sequence ();
6613 tmp = s390_expand_mask_and_shift (ins, mode, ac->shift);
6614 *seq1 = get_insns ();
6615 end_sequence ();
6616
6617 start_sequence ();
6618 tmp = expand_simple_binop (SImode, IOR, tmp, val, NULL_RTX, 1, OPTAB_DIRECT);
6619 *seq2 = get_insns ();
6620 end_sequence ();
6621
6622 return tmp;
6623 }
6624
6625 /* Expand an atomic compare and swap operation for HImode and QImode. MEM is
6626 the memory location, CMP the old value to compare MEM with and NEW_RTX the
6627 value to set if CMP == MEM. */
6628
6629 void
6630 s390_expand_cs_hqi (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
6631 rtx cmp, rtx new_rtx, bool is_weak)
6632 {
6633 struct alignment_context ac;
6634 rtx cmpv, newv, val, cc, seq0, seq1, seq2, seq3;
6635 rtx res = gen_reg_rtx (SImode);
6636 rtx_code_label *csloop = NULL, *csend = NULL;
6637
6638 gcc_assert (MEM_P (mem));
6639
6640 init_alignment_context (&ac, mem, mode);
6641
6642 /* Load full word. Subsequent loads are performed by CS. */
6643 val = expand_simple_binop (SImode, AND, ac.memsi, ac.modemaski,
6644 NULL_RTX, 1, OPTAB_DIRECT);
6645
6646 /* Prepare insertions of cmp and new_rtx into the loaded value. When
6647 possible, we try to use insv to make this happen efficiently. If
6648 that fails we'll generate code both inside and outside the loop. */
6649 cmpv = s390_two_part_insv (&ac, &seq0, &seq2, mode, val, cmp);
6650 newv = s390_two_part_insv (&ac, &seq1, &seq3, mode, val, new_rtx);
6651
6652 if (seq0)
6653 emit_insn (seq0);
6654 if (seq1)
6655 emit_insn (seq1);
6656
6657 /* Start CS loop. */
6658 if (!is_weak)
6659 {
6660 /* Begin assuming success. */
6661 emit_move_insn (btarget, const1_rtx);
6662
6663 csloop = gen_label_rtx ();
6664 csend = gen_label_rtx ();
6665 emit_label (csloop);
6666 }
6667
6668 /* val = "<mem>00..0<mem>"
6669 * cmp = "00..0<cmp>00..0"
6670 * new = "00..0<new>00..0"
6671 */
6672
6673 emit_insn (seq2);
6674 emit_insn (seq3);
6675
6676 cc = s390_emit_compare_and_swap (EQ, res, ac.memsi, cmpv, newv);
6677 if (is_weak)
6678 emit_insn (gen_cstorecc4 (btarget, cc, XEXP (cc, 0), XEXP (cc, 1)));
6679 else
6680 {
6681 rtx tmp;
6682
6683 /* Jump to end if we're done (likely?). */
6684 s390_emit_jump (csend, cc);
6685
6686 /* Check for changes outside mode, and loop internal if so.
6687 Arrange the moves so that the compare is adjacent to the
6688 branch so that we can generate CRJ. */
6689 tmp = copy_to_reg (val);
6690 force_expand_binop (SImode, and_optab, res, ac.modemaski, val,
6691 1, OPTAB_DIRECT);
6692 cc = s390_emit_compare (NE, val, tmp);
6693 s390_emit_jump (csloop, cc);
6694
6695 /* Failed. */
6696 emit_move_insn (btarget, const0_rtx);
6697 emit_label (csend);
6698 }
6699
6700 /* Return the correct part of the bitfield. */
6701 convert_move (vtarget, expand_simple_binop (SImode, LSHIFTRT, res, ac.shift,
6702 NULL_RTX, 1, OPTAB_DIRECT), 1);
6703 }
6704
6705 /* Expand an atomic operation CODE of mode MODE. MEM is the memory location
6706 and VAL the value to play with. If AFTER is true then store the value
6707 MEM holds after the operation, if AFTER is false then store the value MEM
6708 holds before the operation. If TARGET is zero then discard that value, else
6709 store it to TARGET. */
6710
6711 void
6712 s390_expand_atomic (machine_mode mode, enum rtx_code code,
6713 rtx target, rtx mem, rtx val, bool after)
6714 {
6715 struct alignment_context ac;
6716 rtx cmp;
6717 rtx new_rtx = gen_reg_rtx (SImode);
6718 rtx orig = gen_reg_rtx (SImode);
6719 rtx_code_label *csloop = gen_label_rtx ();
6720
6721 gcc_assert (!target || register_operand (target, VOIDmode));
6722 gcc_assert (MEM_P (mem));
6723
6724 init_alignment_context (&ac, mem, mode);
6725
6726 /* Shift val to the correct bit positions.
6727 Preserve "icm", but prevent "ex icm". */
6728 if (!(ac.aligned && code == SET && MEM_P (val)))
6729 val = s390_expand_mask_and_shift (val, mode, ac.shift);
6730
6731 /* Further preparation insns. */
6732 if (code == PLUS || code == MINUS)
6733 emit_move_insn (orig, val);
6734 else if (code == MULT || code == AND) /* val = "11..1<val>11..1" */
6735 val = expand_simple_binop (SImode, XOR, val, ac.modemaski,
6736 NULL_RTX, 1, OPTAB_DIRECT);
6737
6738 /* Load full word. Subsequent loads are performed by CS. */
6739 cmp = force_reg (SImode, ac.memsi);
6740
6741 /* Start CS loop. */
6742 emit_label (csloop);
6743 emit_move_insn (new_rtx, cmp);
6744
6745 /* Patch new with val at correct position. */
6746 switch (code)
6747 {
6748 case PLUS:
6749 case MINUS:
6750 val = expand_simple_binop (SImode, code, new_rtx, orig,
6751 NULL_RTX, 1, OPTAB_DIRECT);
6752 val = expand_simple_binop (SImode, AND, val, ac.modemask,
6753 NULL_RTX, 1, OPTAB_DIRECT);
6754 /* FALLTHRU */
6755 case SET:
6756 if (ac.aligned && MEM_P (val))
6757 store_bit_field (new_rtx, GET_MODE_BITSIZE (mode), 0,
6758 0, 0, SImode, val, false);
6759 else
6760 {
6761 new_rtx = expand_simple_binop (SImode, AND, new_rtx, ac.modemaski,
6762 NULL_RTX, 1, OPTAB_DIRECT);
6763 new_rtx = expand_simple_binop (SImode, IOR, new_rtx, val,
6764 NULL_RTX, 1, OPTAB_DIRECT);
6765 }
6766 break;
6767 case AND:
6768 case IOR:
6769 case XOR:
6770 new_rtx = expand_simple_binop (SImode, code, new_rtx, val,
6771 NULL_RTX, 1, OPTAB_DIRECT);
6772 break;
6773 case MULT: /* NAND */
6774 new_rtx = expand_simple_binop (SImode, AND, new_rtx, val,
6775 NULL_RTX, 1, OPTAB_DIRECT);
6776 new_rtx = expand_simple_binop (SImode, XOR, new_rtx, ac.modemask,
6777 NULL_RTX, 1, OPTAB_DIRECT);
6778 break;
6779 default:
6780 gcc_unreachable ();
6781 }
6782
6783 s390_emit_jump (csloop, s390_emit_compare_and_swap (NE, cmp,
6784 ac.memsi, cmp, new_rtx));
6785
6786 /* Return the correct part of the bitfield. */
6787 if (target)
6788 convert_move (target, expand_simple_binop (SImode, LSHIFTRT,
6789 after ? new_rtx : cmp, ac.shift,
6790 NULL_RTX, 1, OPTAB_DIRECT), 1);
6791 }
6792
6793 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
6794 We need to emit DTP-relative relocations. */
6795
6796 static void s390_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
6797
6798 static void
6799 s390_output_dwarf_dtprel (FILE *file, int size, rtx x)
6800 {
6801 switch (size)
6802 {
6803 case 4:
6804 fputs ("\t.long\t", file);
6805 break;
6806 case 8:
6807 fputs ("\t.quad\t", file);
6808 break;
6809 default:
6810 gcc_unreachable ();
6811 }
6812 output_addr_const (file, x);
6813 fputs ("@DTPOFF", file);
6814 }
6815
6816 /* Return the proper mode for REGNO being represented in the dwarf
6817 unwind table. */
6818 machine_mode
6819 s390_dwarf_frame_reg_mode (int regno)
6820 {
6821 machine_mode save_mode = default_dwarf_frame_reg_mode (regno);
6822
6823 /* Make sure not to return DImode for any GPR with -m31 -mzarch. */
6824 if (GENERAL_REGNO_P (regno))
6825 save_mode = Pmode;
6826
6827 /* The rightmost 64 bits of vector registers are call-clobbered. */
6828 if (GET_MODE_SIZE (save_mode) > 8)
6829 save_mode = DImode;
6830
6831 return save_mode;
6832 }
6833
6834 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
6835 /* Implement TARGET_MANGLE_TYPE. */
6836
6837 static const char *
6838 s390_mangle_type (const_tree type)
6839 {
6840 type = TYPE_MAIN_VARIANT (type);
6841
6842 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
6843 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
6844 return NULL;
6845
6846 if (type == s390_builtin_types[BT_BV16QI]) return "U6__boolc";
6847 if (type == s390_builtin_types[BT_BV8HI]) return "U6__bools";
6848 if (type == s390_builtin_types[BT_BV4SI]) return "U6__booli";
6849 if (type == s390_builtin_types[BT_BV2DI]) return "U6__booll";
6850
6851 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
6852 && TARGET_LONG_DOUBLE_128)
6853 return "g";
6854
6855 /* For all other types, use normal C++ mangling. */
6856 return NULL;
6857 }
6858 #endif
6859
6860 /* In the name of slightly smaller debug output, and to cater to
6861 general assembler lossage, recognize various UNSPEC sequences
6862 and turn them back into a direct symbol reference. */
6863
6864 static rtx
6865 s390_delegitimize_address (rtx orig_x)
6866 {
6867 rtx x, y;
6868
6869 orig_x = delegitimize_mem_from_attrs (orig_x);
6870 x = orig_x;
6871
6872 /* Extract the symbol ref from:
6873 (plus:SI (reg:SI 12 %r12)
6874 (const:SI (unspec:SI [(symbol_ref/f:SI ("*.LC0"))]
6875 UNSPEC_GOTOFF/PLTOFF)))
6876 and
6877 (plus:SI (reg:SI 12 %r12)
6878 (const:SI (plus:SI (unspec:SI [(symbol_ref:SI ("L"))]
6879 UNSPEC_GOTOFF/PLTOFF)
6880 (const_int 4 [0x4])))) */
6881 if (GET_CODE (x) == PLUS
6882 && REG_P (XEXP (x, 0))
6883 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
6884 && GET_CODE (XEXP (x, 1)) == CONST)
6885 {
6886 HOST_WIDE_INT offset = 0;
6887
6888 /* The const operand. */
6889 y = XEXP (XEXP (x, 1), 0);
6890
6891 if (GET_CODE (y) == PLUS
6892 && GET_CODE (XEXP (y, 1)) == CONST_INT)
6893 {
6894 offset = INTVAL (XEXP (y, 1));
6895 y = XEXP (y, 0);
6896 }
6897
6898 if (GET_CODE (y) == UNSPEC
6899 && (XINT (y, 1) == UNSPEC_GOTOFF
6900 || XINT (y, 1) == UNSPEC_PLTOFF))
6901 return plus_constant (Pmode, XVECEXP (y, 0, 0), offset);
6902 }
6903
6904 if (GET_CODE (x) != MEM)
6905 return orig_x;
6906
6907 x = XEXP (x, 0);
6908 if (GET_CODE (x) == PLUS
6909 && GET_CODE (XEXP (x, 1)) == CONST
6910 && GET_CODE (XEXP (x, 0)) == REG
6911 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6912 {
6913 y = XEXP (XEXP (x, 1), 0);
6914 if (GET_CODE (y) == UNSPEC
6915 && XINT (y, 1) == UNSPEC_GOT)
6916 y = XVECEXP (y, 0, 0);
6917 else
6918 return orig_x;
6919 }
6920 else if (GET_CODE (x) == CONST)
6921 {
6922 /* Extract the symbol ref from:
6923 (mem:QI (const:DI (unspec:DI [(symbol_ref:DI ("foo"))]
6924 UNSPEC_PLT/GOTENT))) */
6925
6926 y = XEXP (x, 0);
6927 if (GET_CODE (y) == UNSPEC
6928 && (XINT (y, 1) == UNSPEC_GOTENT
6929 || XINT (y, 1) == UNSPEC_PLT))
6930 y = XVECEXP (y, 0, 0);
6931 else
6932 return orig_x;
6933 }
6934 else
6935 return orig_x;
6936
6937 if (GET_MODE (orig_x) != Pmode)
6938 {
6939 if (GET_MODE (orig_x) == BLKmode)
6940 return orig_x;
6941 y = lowpart_subreg (GET_MODE (orig_x), y, Pmode);
6942 if (y == NULL_RTX)
6943 return orig_x;
6944 }
6945 return y;
6946 }
6947
6948 /* Output operand OP to stdio stream FILE.
6949 OP is an address (register + offset) which is not used to address data;
6950 instead the rightmost bits are interpreted as the value. */
6951
6952 static void
6953 print_addrstyle_operand (FILE *file, rtx op)
6954 {
6955 HOST_WIDE_INT offset;
6956 rtx base;
6957
6958 /* Extract base register and offset. */
6959 if (!s390_decompose_addrstyle_without_index (op, &base, &offset))
6960 gcc_unreachable ();
6961
6962 /* Sanity check. */
6963 if (base)
6964 {
6965 gcc_assert (GET_CODE (base) == REG);
6966 gcc_assert (REGNO (base) < FIRST_PSEUDO_REGISTER);
6967 gcc_assert (REGNO_REG_CLASS (REGNO (base)) == ADDR_REGS);
6968 }
6969
6970 /* Offsets are constricted to twelve bits. */
6971 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset & ((1 << 12) - 1));
6972 if (base)
6973 fprintf (file, "(%s)", reg_names[REGNO (base)]);
6974 }
6975
6976 /* Assigns the number of NOP halfwords to be emitted before and after the
6977 function label to *HW_BEFORE and *HW_AFTER. Both pointers must not be NULL.
6978 If hotpatching is disabled for the function, the values are set to zero.
6979 */
6980
6981 static void
6982 s390_function_num_hotpatch_hw (tree decl,
6983 int *hw_before,
6984 int *hw_after)
6985 {
6986 tree attr;
6987
6988 attr = lookup_attribute ("hotpatch", DECL_ATTRIBUTES (decl));
6989
6990 /* Handle the arguments of the hotpatch attribute. The values
6991 specified via attribute might override the cmdline argument
6992 values. */
6993 if (attr)
6994 {
6995 tree args = TREE_VALUE (attr);
6996
6997 *hw_before = TREE_INT_CST_LOW (TREE_VALUE (args));
6998 *hw_after = TREE_INT_CST_LOW (TREE_VALUE (TREE_CHAIN (args)));
6999 }
7000 else
7001 {
7002 /* Use the values specified by the cmdline arguments. */
7003 *hw_before = s390_hotpatch_hw_before_label;
7004 *hw_after = s390_hotpatch_hw_after_label;
7005 }
7006 }
7007
7008 /* Write the current .machine and .machinemode specification to the assembler
7009 file. */
7010
7011 #ifdef HAVE_AS_MACHINE_MACHINEMODE
7012 static void
7013 s390_asm_output_machine_for_arch (FILE *asm_out_file)
7014 {
7015 fprintf (asm_out_file, "\t.machinemode %s\n",
7016 (TARGET_ZARCH) ? "zarch" : "esa");
7017 fprintf (asm_out_file, "\t.machine \"%s", processor_table[s390_arch].name);
7018 if (S390_USE_ARCHITECTURE_MODIFIERS)
7019 {
7020 int cpu_flags;
7021
7022 cpu_flags = processor_flags_table[(int) s390_arch];
7023 if (TARGET_HTM && !(cpu_flags & PF_TX))
7024 fprintf (asm_out_file, "+htm");
7025 else if (!TARGET_HTM && (cpu_flags & PF_TX))
7026 fprintf (asm_out_file, "+nohtm");
7027 if (TARGET_VX && !(cpu_flags & PF_VX))
7028 fprintf (asm_out_file, "+vx");
7029 else if (!TARGET_VX && (cpu_flags & PF_VX))
7030 fprintf (asm_out_file, "+novx");
7031 }
7032 fprintf (asm_out_file, "\"\n");
7033 }
7034
7035 /* Write an extra function header before the very start of the function. */
7036
7037 void
7038 s390_asm_output_function_prefix (FILE *asm_out_file,
7039 const char *fnname ATTRIBUTE_UNUSED)
7040 {
7041 if (DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl) == NULL)
7042 return;
7043 /* Since only the function specific options are saved but not the indications
7044 which options are set, it's too much work here to figure out which options
7045 have actually changed. Thus, generate .machine and .machinemode whenever a
7046 function has the target attribute or pragma. */
7047 fprintf (asm_out_file, "\t.machinemode push\n");
7048 fprintf (asm_out_file, "\t.machine push\n");
7049 s390_asm_output_machine_for_arch (asm_out_file);
7050 }
7051
7052 /* Write an extra function footer after the very end of the function. */
7053
7054 void
7055 s390_asm_declare_function_size (FILE *asm_out_file,
7056 const char *fnname, tree decl)
7057 {
7058 if (!flag_inhibit_size_directive)
7059 ASM_OUTPUT_MEASURED_SIZE (asm_out_file, fnname);
7060 if (DECL_FUNCTION_SPECIFIC_TARGET (decl) == NULL)
7061 return;
7062 fprintf (asm_out_file, "\t.machine pop\n");
7063 fprintf (asm_out_file, "\t.machinemode pop\n");
7064 }
7065 #endif
7066
7067 /* Write the extra assembler code needed to declare a function properly. */
7068
7069 void
7070 s390_asm_output_function_label (FILE *asm_out_file, const char *fname,
7071 tree decl)
7072 {
7073 int hw_before, hw_after;
7074
7075 s390_function_num_hotpatch_hw (decl, &hw_before, &hw_after);
7076 if (hw_before > 0)
7077 {
7078 unsigned int function_alignment;
7079 int i;
7080
7081 /* Add a trampoline code area before the function label and initialize it
7082 with two-byte nop instructions. This area can be overwritten with code
7083 that jumps to a patched version of the function. */
7084 asm_fprintf (asm_out_file, "\tnopr\t%%r7"
7085 "\t# pre-label NOPs for hotpatch (%d halfwords)\n",
7086 hw_before);
7087 for (i = 1; i < hw_before; i++)
7088 fputs ("\tnopr\t%r7\n", asm_out_file);
7089
7090 /* Note: The function label must be aligned so that (a) the bytes of the
7091 following nop do not cross a cacheline boundary, and (b) a jump address
7092 (eight bytes for 64 bit targets, 4 bytes for 32 bit targets) can be
7093 stored directly before the label without crossing a cacheline
7094 boundary. All this is necessary to make sure the trampoline code can
7095 be changed atomically.
7096 This alignment is done automatically using the FOUNCTION_BOUNDARY, but
7097 if there are NOPs before the function label, the alignment is placed
7098 before them. So it is necessary to duplicate the alignment after the
7099 NOPs. */
7100 function_alignment = MAX (8, DECL_ALIGN (decl) / BITS_PER_UNIT);
7101 if (! DECL_USER_ALIGN (decl))
7102 function_alignment = MAX (function_alignment,
7103 (unsigned int) align_functions);
7104 fputs ("\t# alignment for hotpatch\n", asm_out_file);
7105 ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (function_alignment));
7106 }
7107
7108 if (S390_USE_TARGET_ATTRIBUTE && TARGET_DEBUG_ARG)
7109 {
7110 asm_fprintf (asm_out_file, "\t# fn:%s ar%d\n", fname, s390_arch);
7111 asm_fprintf (asm_out_file, "\t# fn:%s tu%d\n", fname, s390_tune);
7112 asm_fprintf (asm_out_file, "\t# fn:%s sg%d\n", fname, s390_stack_guard);
7113 asm_fprintf (asm_out_file, "\t# fn:%s ss%d\n", fname, s390_stack_size);
7114 asm_fprintf (asm_out_file, "\t# fn:%s bc%d\n", fname, s390_branch_cost);
7115 asm_fprintf (asm_out_file, "\t# fn:%s wf%d\n", fname,
7116 s390_warn_framesize);
7117 asm_fprintf (asm_out_file, "\t# fn:%s ba%d\n", fname, TARGET_BACKCHAIN);
7118 asm_fprintf (asm_out_file, "\t# fn:%s hd%d\n", fname, TARGET_HARD_DFP);
7119 asm_fprintf (asm_out_file, "\t# fn:%s hf%d\n", fname, !TARGET_SOFT_FLOAT);
7120 asm_fprintf (asm_out_file, "\t# fn:%s ht%d\n", fname, TARGET_OPT_HTM);
7121 asm_fprintf (asm_out_file, "\t# fn:%s vx%d\n", fname, TARGET_OPT_VX);
7122 asm_fprintf (asm_out_file, "\t# fn:%s ps%d\n", fname,
7123 TARGET_PACKED_STACK);
7124 asm_fprintf (asm_out_file, "\t# fn:%s se%d\n", fname, TARGET_SMALL_EXEC);
7125 asm_fprintf (asm_out_file, "\t# fn:%s mv%d\n", fname, TARGET_MVCLE);
7126 asm_fprintf (asm_out_file, "\t# fn:%s zv%d\n", fname, TARGET_ZVECTOR);
7127 asm_fprintf (asm_out_file, "\t# fn:%s wd%d\n", fname,
7128 s390_warn_dynamicstack_p);
7129 }
7130 ASM_OUTPUT_LABEL (asm_out_file, fname);
7131 if (hw_after > 0)
7132 asm_fprintf (asm_out_file,
7133 "\t# post-label NOPs for hotpatch (%d halfwords)\n",
7134 hw_after);
7135 }
7136
7137 /* Output machine-dependent UNSPECs occurring in address constant X
7138 in assembler syntax to stdio stream FILE. Returns true if the
7139 constant X could be recognized, false otherwise. */
7140
7141 static bool
7142 s390_output_addr_const_extra (FILE *file, rtx x)
7143 {
7144 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 1)
7145 switch (XINT (x, 1))
7146 {
7147 case UNSPEC_GOTENT:
7148 output_addr_const (file, XVECEXP (x, 0, 0));
7149 fprintf (file, "@GOTENT");
7150 return true;
7151 case UNSPEC_GOT:
7152 output_addr_const (file, XVECEXP (x, 0, 0));
7153 fprintf (file, "@GOT");
7154 return true;
7155 case UNSPEC_GOTOFF:
7156 output_addr_const (file, XVECEXP (x, 0, 0));
7157 fprintf (file, "@GOTOFF");
7158 return true;
7159 case UNSPEC_PLT:
7160 output_addr_const (file, XVECEXP (x, 0, 0));
7161 fprintf (file, "@PLT");
7162 return true;
7163 case UNSPEC_PLTOFF:
7164 output_addr_const (file, XVECEXP (x, 0, 0));
7165 fprintf (file, "@PLTOFF");
7166 return true;
7167 case UNSPEC_TLSGD:
7168 output_addr_const (file, XVECEXP (x, 0, 0));
7169 fprintf (file, "@TLSGD");
7170 return true;
7171 case UNSPEC_TLSLDM:
7172 assemble_name (file, get_some_local_dynamic_name ());
7173 fprintf (file, "@TLSLDM");
7174 return true;
7175 case UNSPEC_DTPOFF:
7176 output_addr_const (file, XVECEXP (x, 0, 0));
7177 fprintf (file, "@DTPOFF");
7178 return true;
7179 case UNSPEC_NTPOFF:
7180 output_addr_const (file, XVECEXP (x, 0, 0));
7181 fprintf (file, "@NTPOFF");
7182 return true;
7183 case UNSPEC_GOTNTPOFF:
7184 output_addr_const (file, XVECEXP (x, 0, 0));
7185 fprintf (file, "@GOTNTPOFF");
7186 return true;
7187 case UNSPEC_INDNTPOFF:
7188 output_addr_const (file, XVECEXP (x, 0, 0));
7189 fprintf (file, "@INDNTPOFF");
7190 return true;
7191 }
7192
7193 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 2)
7194 switch (XINT (x, 1))
7195 {
7196 case UNSPEC_POOL_OFFSET:
7197 x = gen_rtx_MINUS (GET_MODE (x), XVECEXP (x, 0, 0), XVECEXP (x, 0, 1));
7198 output_addr_const (file, x);
7199 return true;
7200 }
7201 return false;
7202 }
7203
7204 /* Output address operand ADDR in assembler syntax to
7205 stdio stream FILE. */
7206
7207 void
7208 print_operand_address (FILE *file, rtx addr)
7209 {
7210 struct s390_address ad;
7211
7212 if (s390_loadrelative_operand_p (addr, NULL, NULL))
7213 {
7214 if (!TARGET_Z10)
7215 {
7216 output_operand_lossage ("symbolic memory references are "
7217 "only supported on z10 or later");
7218 return;
7219 }
7220 output_addr_const (file, addr);
7221 return;
7222 }
7223
7224 if (!s390_decompose_address (addr, &ad)
7225 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7226 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
7227 output_operand_lossage ("cannot decompose address");
7228
7229 if (ad.disp)
7230 output_addr_const (file, ad.disp);
7231 else
7232 fprintf (file, "0");
7233
7234 if (ad.base && ad.indx)
7235 fprintf (file, "(%s,%s)", reg_names[REGNO (ad.indx)],
7236 reg_names[REGNO (ad.base)]);
7237 else if (ad.base)
7238 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
7239 }
7240
7241 /* Output operand X in assembler syntax to stdio stream FILE.
7242 CODE specified the format flag. The following format flags
7243 are recognized:
7244
7245 'C': print opcode suffix for branch condition.
7246 'D': print opcode suffix for inverse branch condition.
7247 'E': print opcode suffix for branch on index instruction.
7248 'G': print the size of the operand in bytes.
7249 'J': print tls_load/tls_gdcall/tls_ldcall suffix
7250 'M': print the second word of a TImode operand.
7251 'N': print the second word of a DImode operand.
7252 'O': print only the displacement of a memory reference or address.
7253 'R': print only the base register of a memory reference or address.
7254 'S': print S-type memory reference (base+displacement).
7255 'Y': print address style operand without index (e.g. shift count or setmem
7256 operand).
7257
7258 'b': print integer X as if it's an unsigned byte.
7259 'c': print integer X as if it's an signed byte.
7260 'e': "end" contiguous bitmask X in either DImode or vector inner mode.
7261 'f': "end" contiguous bitmask X in SImode.
7262 'h': print integer X as if it's a signed halfword.
7263 'i': print the first nonzero HImode part of X.
7264 'j': print the first HImode part unequal to -1 of X.
7265 'k': print the first nonzero SImode part of X.
7266 'm': print the first SImode part unequal to -1 of X.
7267 'o': print integer X as if it's an unsigned 32bit word.
7268 's': "start" of contiguous bitmask X in either DImode or vector inner mode.
7269 't': CONST_INT: "start" of contiguous bitmask X in SImode.
7270 CONST_VECTOR: Generate a bitmask for vgbm instruction.
7271 'x': print integer X as if it's an unsigned halfword.
7272 'v': print register number as vector register (v1 instead of f1).
7273 */
7274
7275 void
7276 print_operand (FILE *file, rtx x, int code)
7277 {
7278 HOST_WIDE_INT ival;
7279
7280 switch (code)
7281 {
7282 case 'C':
7283 fprintf (file, s390_branch_condition_mnemonic (x, FALSE));
7284 return;
7285
7286 case 'D':
7287 fprintf (file, s390_branch_condition_mnemonic (x, TRUE));
7288 return;
7289
7290 case 'E':
7291 if (GET_CODE (x) == LE)
7292 fprintf (file, "l");
7293 else if (GET_CODE (x) == GT)
7294 fprintf (file, "h");
7295 else
7296 output_operand_lossage ("invalid comparison operator "
7297 "for 'E' output modifier");
7298 return;
7299
7300 case 'J':
7301 if (GET_CODE (x) == SYMBOL_REF)
7302 {
7303 fprintf (file, "%s", ":tls_load:");
7304 output_addr_const (file, x);
7305 }
7306 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
7307 {
7308 fprintf (file, "%s", ":tls_gdcall:");
7309 output_addr_const (file, XVECEXP (x, 0, 0));
7310 }
7311 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM)
7312 {
7313 fprintf (file, "%s", ":tls_ldcall:");
7314 const char *name = get_some_local_dynamic_name ();
7315 gcc_assert (name);
7316 assemble_name (file, name);
7317 }
7318 else
7319 output_operand_lossage ("invalid reference for 'J' output modifier");
7320 return;
7321
7322 case 'G':
7323 fprintf (file, "%u", GET_MODE_SIZE (GET_MODE (x)));
7324 return;
7325
7326 case 'O':
7327 {
7328 struct s390_address ad;
7329 int ret;
7330
7331 ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
7332
7333 if (!ret
7334 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7335 || ad.indx)
7336 {
7337 output_operand_lossage ("invalid address for 'O' output modifier");
7338 return;
7339 }
7340
7341 if (ad.disp)
7342 output_addr_const (file, ad.disp);
7343 else
7344 fprintf (file, "0");
7345 }
7346 return;
7347
7348 case 'R':
7349 {
7350 struct s390_address ad;
7351 int ret;
7352
7353 ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
7354
7355 if (!ret
7356 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7357 || ad.indx)
7358 {
7359 output_operand_lossage ("invalid address for 'R' output modifier");
7360 return;
7361 }
7362
7363 if (ad.base)
7364 fprintf (file, "%s", reg_names[REGNO (ad.base)]);
7365 else
7366 fprintf (file, "0");
7367 }
7368 return;
7369
7370 case 'S':
7371 {
7372 struct s390_address ad;
7373 int ret;
7374
7375 if (!MEM_P (x))
7376 {
7377 output_operand_lossage ("memory reference expected for "
7378 "'S' output modifier");
7379 return;
7380 }
7381 ret = s390_decompose_address (XEXP (x, 0), &ad);
7382
7383 if (!ret
7384 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7385 || ad.indx)
7386 {
7387 output_operand_lossage ("invalid address for 'S' output modifier");
7388 return;
7389 }
7390
7391 if (ad.disp)
7392 output_addr_const (file, ad.disp);
7393 else
7394 fprintf (file, "0");
7395
7396 if (ad.base)
7397 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
7398 }
7399 return;
7400
7401 case 'N':
7402 if (GET_CODE (x) == REG)
7403 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
7404 else if (GET_CODE (x) == MEM)
7405 x = change_address (x, VOIDmode,
7406 plus_constant (Pmode, XEXP (x, 0), 4));
7407 else
7408 output_operand_lossage ("register or memory expression expected "
7409 "for 'N' output modifier");
7410 break;
7411
7412 case 'M':
7413 if (GET_CODE (x) == REG)
7414 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
7415 else if (GET_CODE (x) == MEM)
7416 x = change_address (x, VOIDmode,
7417 plus_constant (Pmode, XEXP (x, 0), 8));
7418 else
7419 output_operand_lossage ("register or memory expression expected "
7420 "for 'M' output modifier");
7421 break;
7422
7423 case 'Y':
7424 print_addrstyle_operand (file, x);
7425 return;
7426 }
7427
7428 switch (GET_CODE (x))
7429 {
7430 case REG:
7431 /* Print FP regs as fx instead of vx when they are accessed
7432 through non-vector mode. */
7433 if (code == 'v'
7434 || VECTOR_NOFP_REG_P (x)
7435 || (FP_REG_P (x) && VECTOR_MODE_P (GET_MODE (x)))
7436 || (VECTOR_REG_P (x)
7437 && (GET_MODE_SIZE (GET_MODE (x)) /
7438 s390_class_max_nregs (FP_REGS, GET_MODE (x))) > 8))
7439 fprintf (file, "%%v%s", reg_names[REGNO (x)] + 2);
7440 else
7441 fprintf (file, "%s", reg_names[REGNO (x)]);
7442 break;
7443
7444 case MEM:
7445 output_address (GET_MODE (x), XEXP (x, 0));
7446 break;
7447
7448 case CONST:
7449 case CODE_LABEL:
7450 case LABEL_REF:
7451 case SYMBOL_REF:
7452 output_addr_const (file, x);
7453 break;
7454
7455 case CONST_INT:
7456 ival = INTVAL (x);
7457 switch (code)
7458 {
7459 case 0:
7460 break;
7461 case 'b':
7462 ival &= 0xff;
7463 break;
7464 case 'c':
7465 ival = ((ival & 0xff) ^ 0x80) - 0x80;
7466 break;
7467 case 'x':
7468 ival &= 0xffff;
7469 break;
7470 case 'h':
7471 ival = ((ival & 0xffff) ^ 0x8000) - 0x8000;
7472 break;
7473 case 'i':
7474 ival = s390_extract_part (x, HImode, 0);
7475 break;
7476 case 'j':
7477 ival = s390_extract_part (x, HImode, -1);
7478 break;
7479 case 'k':
7480 ival = s390_extract_part (x, SImode, 0);
7481 break;
7482 case 'm':
7483 ival = s390_extract_part (x, SImode, -1);
7484 break;
7485 case 'o':
7486 ival &= 0xffffffff;
7487 break;
7488 case 'e': case 'f':
7489 case 's': case 't':
7490 {
7491 int start, end;
7492 int len;
7493 bool ok;
7494
7495 len = (code == 's' || code == 'e' ? 64 : 32);
7496 ok = s390_contiguous_bitmask_p (ival, true, len, &start, &end);
7497 gcc_assert (ok);
7498 if (code == 's' || code == 't')
7499 ival = start;
7500 else
7501 ival = end;
7502 }
7503 break;
7504 default:
7505 output_operand_lossage ("invalid constant for output modifier '%c'", code);
7506 }
7507 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
7508 break;
7509
7510 case CONST_WIDE_INT:
7511 if (code == 'b')
7512 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
7513 CONST_WIDE_INT_ELT (x, 0) & 0xff);
7514 else if (code == 'x')
7515 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
7516 CONST_WIDE_INT_ELT (x, 0) & 0xffff);
7517 else if (code == 'h')
7518 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
7519 ((CONST_WIDE_INT_ELT (x, 0) & 0xffff) ^ 0x8000) - 0x8000);
7520 else
7521 {
7522 if (code == 0)
7523 output_operand_lossage ("invalid constant - try using "
7524 "an output modifier");
7525 else
7526 output_operand_lossage ("invalid constant for output modifier '%c'",
7527 code);
7528 }
7529 break;
7530 case CONST_VECTOR:
7531 switch (code)
7532 {
7533 case 'h':
7534 gcc_assert (const_vec_duplicate_p (x));
7535 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
7536 ((INTVAL (XVECEXP (x, 0, 0)) & 0xffff) ^ 0x8000) - 0x8000);
7537 break;
7538 case 'e':
7539 case 's':
7540 {
7541 int start, end;
7542 bool ok;
7543
7544 ok = s390_contiguous_bitmask_vector_p (x, &start, &end);
7545 gcc_assert (ok);
7546 ival = (code == 's') ? start : end;
7547 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
7548 }
7549 break;
7550 case 't':
7551 {
7552 unsigned mask;
7553 bool ok = s390_bytemask_vector_p (x, &mask);
7554 gcc_assert (ok);
7555 fprintf (file, "%u", mask);
7556 }
7557 break;
7558
7559 default:
7560 output_operand_lossage ("invalid constant vector for output "
7561 "modifier '%c'", code);
7562 }
7563 break;
7564
7565 default:
7566 if (code == 0)
7567 output_operand_lossage ("invalid expression - try using "
7568 "an output modifier");
7569 else
7570 output_operand_lossage ("invalid expression for output "
7571 "modifier '%c'", code);
7572 break;
7573 }
7574 }
7575
7576 /* Target hook for assembling integer objects. We need to define it
7577 here to work a round a bug in some versions of GAS, which couldn't
7578 handle values smaller than INT_MIN when printed in decimal. */
7579
7580 static bool
7581 s390_assemble_integer (rtx x, unsigned int size, int aligned_p)
7582 {
7583 if (size == 8 && aligned_p
7584 && GET_CODE (x) == CONST_INT && INTVAL (x) < INT_MIN)
7585 {
7586 fprintf (asm_out_file, "\t.quad\t" HOST_WIDE_INT_PRINT_HEX "\n",
7587 INTVAL (x));
7588 return true;
7589 }
7590 return default_assemble_integer (x, size, aligned_p);
7591 }
7592
7593 /* Returns true if register REGNO is used for forming
7594 a memory address in expression X. */
7595
7596 static bool
7597 reg_used_in_mem_p (int regno, rtx x)
7598 {
7599 enum rtx_code code = GET_CODE (x);
7600 int i, j;
7601 const char *fmt;
7602
7603 if (code == MEM)
7604 {
7605 if (refers_to_regno_p (regno, XEXP (x, 0)))
7606 return true;
7607 }
7608 else if (code == SET
7609 && GET_CODE (SET_DEST (x)) == PC)
7610 {
7611 if (refers_to_regno_p (regno, SET_SRC (x)))
7612 return true;
7613 }
7614
7615 fmt = GET_RTX_FORMAT (code);
7616 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
7617 {
7618 if (fmt[i] == 'e'
7619 && reg_used_in_mem_p (regno, XEXP (x, i)))
7620 return true;
7621
7622 else if (fmt[i] == 'E')
7623 for (j = 0; j < XVECLEN (x, i); j++)
7624 if (reg_used_in_mem_p (regno, XVECEXP (x, i, j)))
7625 return true;
7626 }
7627 return false;
7628 }
7629
7630 /* Returns true if expression DEP_RTX sets an address register
7631 used by instruction INSN to address memory. */
7632
7633 static bool
7634 addr_generation_dependency_p (rtx dep_rtx, rtx_insn *insn)
7635 {
7636 rtx target, pat;
7637
7638 if (NONJUMP_INSN_P (dep_rtx))
7639 dep_rtx = PATTERN (dep_rtx);
7640
7641 if (GET_CODE (dep_rtx) == SET)
7642 {
7643 target = SET_DEST (dep_rtx);
7644 if (GET_CODE (target) == STRICT_LOW_PART)
7645 target = XEXP (target, 0);
7646 while (GET_CODE (target) == SUBREG)
7647 target = SUBREG_REG (target);
7648
7649 if (GET_CODE (target) == REG)
7650 {
7651 int regno = REGNO (target);
7652
7653 if (s390_safe_attr_type (insn) == TYPE_LA)
7654 {
7655 pat = PATTERN (insn);
7656 if (GET_CODE (pat) == PARALLEL)
7657 {
7658 gcc_assert (XVECLEN (pat, 0) == 2);
7659 pat = XVECEXP (pat, 0, 0);
7660 }
7661 gcc_assert (GET_CODE (pat) == SET);
7662 return refers_to_regno_p (regno, SET_SRC (pat));
7663 }
7664 else if (get_attr_atype (insn) == ATYPE_AGEN)
7665 return reg_used_in_mem_p (regno, PATTERN (insn));
7666 }
7667 }
7668 return false;
7669 }
7670
7671 /* Return 1, if dep_insn sets register used in insn in the agen unit. */
7672
7673 int
7674 s390_agen_dep_p (rtx_insn *dep_insn, rtx_insn *insn)
7675 {
7676 rtx dep_rtx = PATTERN (dep_insn);
7677 int i;
7678
7679 if (GET_CODE (dep_rtx) == SET
7680 && addr_generation_dependency_p (dep_rtx, insn))
7681 return 1;
7682 else if (GET_CODE (dep_rtx) == PARALLEL)
7683 {
7684 for (i = 0; i < XVECLEN (dep_rtx, 0); i++)
7685 {
7686 if (addr_generation_dependency_p (XVECEXP (dep_rtx, 0, i), insn))
7687 return 1;
7688 }
7689 }
7690 return 0;
7691 }
7692
7693
7694 /* A C statement (sans semicolon) to update the integer scheduling priority
7695 INSN_PRIORITY (INSN). Increase the priority to execute the INSN earlier,
7696 reduce the priority to execute INSN later. Do not define this macro if
7697 you do not need to adjust the scheduling priorities of insns.
7698
7699 A STD instruction should be scheduled earlier,
7700 in order to use the bypass. */
7701 static int
7702 s390_adjust_priority (rtx_insn *insn, int priority)
7703 {
7704 if (! INSN_P (insn))
7705 return priority;
7706
7707 if (s390_tune <= PROCESSOR_2064_Z900)
7708 return priority;
7709
7710 switch (s390_safe_attr_type (insn))
7711 {
7712 case TYPE_FSTOREDF:
7713 case TYPE_FSTORESF:
7714 priority = priority << 3;
7715 break;
7716 case TYPE_STORE:
7717 case TYPE_STM:
7718 priority = priority << 1;
7719 break;
7720 default:
7721 break;
7722 }
7723 return priority;
7724 }
7725
7726
7727 /* The number of instructions that can be issued per cycle. */
7728
7729 static int
7730 s390_issue_rate (void)
7731 {
7732 switch (s390_tune)
7733 {
7734 case PROCESSOR_2084_Z990:
7735 case PROCESSOR_2094_Z9_109:
7736 case PROCESSOR_2094_Z9_EC:
7737 case PROCESSOR_2817_Z196:
7738 return 3;
7739 case PROCESSOR_2097_Z10:
7740 return 2;
7741 case PROCESSOR_9672_G5:
7742 case PROCESSOR_9672_G6:
7743 case PROCESSOR_2064_Z900:
7744 /* Starting with EC12 we use the sched_reorder hook to take care
7745 of instruction dispatch constraints. The algorithm only
7746 picks the best instruction and assumes only a single
7747 instruction gets issued per cycle. */
7748 case PROCESSOR_2827_ZEC12:
7749 case PROCESSOR_2964_Z13:
7750 default:
7751 return 1;
7752 }
7753 }
7754
7755 static int
7756 s390_first_cycle_multipass_dfa_lookahead (void)
7757 {
7758 return 4;
7759 }
7760
7761 /* Annotate every literal pool reference in X by an UNSPEC_LTREF expression.
7762 Fix up MEMs as required. */
7763
7764 static void
7765 annotate_constant_pool_refs (rtx *x)
7766 {
7767 int i, j;
7768 const char *fmt;
7769
7770 gcc_assert (GET_CODE (*x) != SYMBOL_REF
7771 || !CONSTANT_POOL_ADDRESS_P (*x));
7772
7773 /* Literal pool references can only occur inside a MEM ... */
7774 if (GET_CODE (*x) == MEM)
7775 {
7776 rtx memref = XEXP (*x, 0);
7777
7778 if (GET_CODE (memref) == SYMBOL_REF
7779 && CONSTANT_POOL_ADDRESS_P (memref))
7780 {
7781 rtx base = cfun->machine->base_reg;
7782 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, memref, base),
7783 UNSPEC_LTREF);
7784
7785 *x = replace_equiv_address (*x, addr);
7786 return;
7787 }
7788
7789 if (GET_CODE (memref) == CONST
7790 && GET_CODE (XEXP (memref, 0)) == PLUS
7791 && GET_CODE (XEXP (XEXP (memref, 0), 1)) == CONST_INT
7792 && GET_CODE (XEXP (XEXP (memref, 0), 0)) == SYMBOL_REF
7793 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (memref, 0), 0)))
7794 {
7795 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (memref, 0), 1));
7796 rtx sym = XEXP (XEXP (memref, 0), 0);
7797 rtx base = cfun->machine->base_reg;
7798 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
7799 UNSPEC_LTREF);
7800
7801 *x = replace_equiv_address (*x, plus_constant (Pmode, addr, off));
7802 return;
7803 }
7804 }
7805
7806 /* ... or a load-address type pattern. */
7807 if (GET_CODE (*x) == SET)
7808 {
7809 rtx addrref = SET_SRC (*x);
7810
7811 if (GET_CODE (addrref) == SYMBOL_REF
7812 && CONSTANT_POOL_ADDRESS_P (addrref))
7813 {
7814 rtx base = cfun->machine->base_reg;
7815 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addrref, base),
7816 UNSPEC_LTREF);
7817
7818 SET_SRC (*x) = addr;
7819 return;
7820 }
7821
7822 if (GET_CODE (addrref) == CONST
7823 && GET_CODE (XEXP (addrref, 0)) == PLUS
7824 && GET_CODE (XEXP (XEXP (addrref, 0), 1)) == CONST_INT
7825 && GET_CODE (XEXP (XEXP (addrref, 0), 0)) == SYMBOL_REF
7826 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (addrref, 0), 0)))
7827 {
7828 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (addrref, 0), 1));
7829 rtx sym = XEXP (XEXP (addrref, 0), 0);
7830 rtx base = cfun->machine->base_reg;
7831 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
7832 UNSPEC_LTREF);
7833
7834 SET_SRC (*x) = plus_constant (Pmode, addr, off);
7835 return;
7836 }
7837 }
7838
7839 /* Annotate LTREL_BASE as well. */
7840 if (GET_CODE (*x) == UNSPEC
7841 && XINT (*x, 1) == UNSPEC_LTREL_BASE)
7842 {
7843 rtx base = cfun->machine->base_reg;
7844 *x = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, XVECEXP (*x, 0, 0), base),
7845 UNSPEC_LTREL_BASE);
7846 return;
7847 }
7848
7849 fmt = GET_RTX_FORMAT (GET_CODE (*x));
7850 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
7851 {
7852 if (fmt[i] == 'e')
7853 {
7854 annotate_constant_pool_refs (&XEXP (*x, i));
7855 }
7856 else if (fmt[i] == 'E')
7857 {
7858 for (j = 0; j < XVECLEN (*x, i); j++)
7859 annotate_constant_pool_refs (&XVECEXP (*x, i, j));
7860 }
7861 }
7862 }
7863
7864 /* Split all branches that exceed the maximum distance.
7865 Returns true if this created a new literal pool entry. */
7866
7867 static int
7868 s390_split_branches (void)
7869 {
7870 rtx temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
7871 int new_literal = 0, ret;
7872 rtx_insn *insn;
7873 rtx pat, target;
7874 rtx *label;
7875
7876 /* We need correct insn addresses. */
7877
7878 shorten_branches (get_insns ());
7879
7880 /* Find all branches that exceed 64KB, and split them. */
7881
7882 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7883 {
7884 if (! JUMP_P (insn) || tablejump_p (insn, NULL, NULL))
7885 continue;
7886
7887 pat = PATTERN (insn);
7888 if (GET_CODE (pat) == PARALLEL)
7889 pat = XVECEXP (pat, 0, 0);
7890 if (GET_CODE (pat) != SET || SET_DEST (pat) != pc_rtx)
7891 continue;
7892
7893 if (GET_CODE (SET_SRC (pat)) == LABEL_REF)
7894 {
7895 label = &SET_SRC (pat);
7896 }
7897 else if (GET_CODE (SET_SRC (pat)) == IF_THEN_ELSE)
7898 {
7899 if (GET_CODE (XEXP (SET_SRC (pat), 1)) == LABEL_REF)
7900 label = &XEXP (SET_SRC (pat), 1);
7901 else if (GET_CODE (XEXP (SET_SRC (pat), 2)) == LABEL_REF)
7902 label = &XEXP (SET_SRC (pat), 2);
7903 else
7904 continue;
7905 }
7906 else
7907 continue;
7908
7909 if (get_attr_length (insn) <= 4)
7910 continue;
7911
7912 /* We are going to use the return register as scratch register,
7913 make sure it will be saved/restored by the prologue/epilogue. */
7914 cfun_frame_layout.save_return_addr_p = 1;
7915
7916 if (!flag_pic)
7917 {
7918 new_literal = 1;
7919 rtx mem = force_const_mem (Pmode, *label);
7920 rtx_insn *set_insn = emit_insn_before (gen_rtx_SET (temp_reg, mem),
7921 insn);
7922 INSN_ADDRESSES_NEW (set_insn, -1);
7923 annotate_constant_pool_refs (&PATTERN (set_insn));
7924
7925 target = temp_reg;
7926 }
7927 else
7928 {
7929 new_literal = 1;
7930 target = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, *label),
7931 UNSPEC_LTREL_OFFSET);
7932 target = gen_rtx_CONST (Pmode, target);
7933 target = force_const_mem (Pmode, target);
7934 rtx_insn *set_insn = emit_insn_before (gen_rtx_SET (temp_reg, target),
7935 insn);
7936 INSN_ADDRESSES_NEW (set_insn, -1);
7937 annotate_constant_pool_refs (&PATTERN (set_insn));
7938
7939 target = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, XEXP (target, 0),
7940 cfun->machine->base_reg),
7941 UNSPEC_LTREL_BASE);
7942 target = gen_rtx_PLUS (Pmode, temp_reg, target);
7943 }
7944
7945 ret = validate_change (insn, label, target, 0);
7946 gcc_assert (ret);
7947 }
7948
7949 return new_literal;
7950 }
7951
7952
7953 /* Find an annotated literal pool symbol referenced in RTX X,
7954 and store it at REF. Will abort if X contains references to
7955 more than one such pool symbol; multiple references to the same
7956 symbol are allowed, however.
7957
7958 The rtx pointed to by REF must be initialized to NULL_RTX
7959 by the caller before calling this routine. */
7960
7961 static void
7962 find_constant_pool_ref (rtx x, rtx *ref)
7963 {
7964 int i, j;
7965 const char *fmt;
7966
7967 /* Ignore LTREL_BASE references. */
7968 if (GET_CODE (x) == UNSPEC
7969 && XINT (x, 1) == UNSPEC_LTREL_BASE)
7970 return;
7971 /* Likewise POOL_ENTRY insns. */
7972 if (GET_CODE (x) == UNSPEC_VOLATILE
7973 && XINT (x, 1) == UNSPECV_POOL_ENTRY)
7974 return;
7975
7976 gcc_assert (GET_CODE (x) != SYMBOL_REF
7977 || !CONSTANT_POOL_ADDRESS_P (x));
7978
7979 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_LTREF)
7980 {
7981 rtx sym = XVECEXP (x, 0, 0);
7982 gcc_assert (GET_CODE (sym) == SYMBOL_REF
7983 && CONSTANT_POOL_ADDRESS_P (sym));
7984
7985 if (*ref == NULL_RTX)
7986 *ref = sym;
7987 else
7988 gcc_assert (*ref == sym);
7989
7990 return;
7991 }
7992
7993 fmt = GET_RTX_FORMAT (GET_CODE (x));
7994 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
7995 {
7996 if (fmt[i] == 'e')
7997 {
7998 find_constant_pool_ref (XEXP (x, i), ref);
7999 }
8000 else if (fmt[i] == 'E')
8001 {
8002 for (j = 0; j < XVECLEN (x, i); j++)
8003 find_constant_pool_ref (XVECEXP (x, i, j), ref);
8004 }
8005 }
8006 }
8007
8008 /* Replace every reference to the annotated literal pool
8009 symbol REF in X by its base plus OFFSET. */
8010
8011 static void
8012 replace_constant_pool_ref (rtx *x, rtx ref, rtx offset)
8013 {
8014 int i, j;
8015 const char *fmt;
8016
8017 gcc_assert (*x != ref);
8018
8019 if (GET_CODE (*x) == UNSPEC
8020 && XINT (*x, 1) == UNSPEC_LTREF
8021 && XVECEXP (*x, 0, 0) == ref)
8022 {
8023 *x = gen_rtx_PLUS (Pmode, XVECEXP (*x, 0, 1), offset);
8024 return;
8025 }
8026
8027 if (GET_CODE (*x) == PLUS
8028 && GET_CODE (XEXP (*x, 1)) == CONST_INT
8029 && GET_CODE (XEXP (*x, 0)) == UNSPEC
8030 && XINT (XEXP (*x, 0), 1) == UNSPEC_LTREF
8031 && XVECEXP (XEXP (*x, 0), 0, 0) == ref)
8032 {
8033 rtx addr = gen_rtx_PLUS (Pmode, XVECEXP (XEXP (*x, 0), 0, 1), offset);
8034 *x = plus_constant (Pmode, addr, INTVAL (XEXP (*x, 1)));
8035 return;
8036 }
8037
8038 fmt = GET_RTX_FORMAT (GET_CODE (*x));
8039 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
8040 {
8041 if (fmt[i] == 'e')
8042 {
8043 replace_constant_pool_ref (&XEXP (*x, i), ref, offset);
8044 }
8045 else if (fmt[i] == 'E')
8046 {
8047 for (j = 0; j < XVECLEN (*x, i); j++)
8048 replace_constant_pool_ref (&XVECEXP (*x, i, j), ref, offset);
8049 }
8050 }
8051 }
8052
8053 /* Check whether X contains an UNSPEC_LTREL_BASE.
8054 Return its constant pool symbol if found, NULL_RTX otherwise. */
8055
8056 static rtx
8057 find_ltrel_base (rtx x)
8058 {
8059 int i, j;
8060 const char *fmt;
8061
8062 if (GET_CODE (x) == UNSPEC
8063 && XINT (x, 1) == UNSPEC_LTREL_BASE)
8064 return XVECEXP (x, 0, 0);
8065
8066 fmt = GET_RTX_FORMAT (GET_CODE (x));
8067 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8068 {
8069 if (fmt[i] == 'e')
8070 {
8071 rtx fnd = find_ltrel_base (XEXP (x, i));
8072 if (fnd)
8073 return fnd;
8074 }
8075 else if (fmt[i] == 'E')
8076 {
8077 for (j = 0; j < XVECLEN (x, i); j++)
8078 {
8079 rtx fnd = find_ltrel_base (XVECEXP (x, i, j));
8080 if (fnd)
8081 return fnd;
8082 }
8083 }
8084 }
8085
8086 return NULL_RTX;
8087 }
8088
8089 /* Replace any occurrence of UNSPEC_LTREL_BASE in X with its base. */
8090
8091 static void
8092 replace_ltrel_base (rtx *x)
8093 {
8094 int i, j;
8095 const char *fmt;
8096
8097 if (GET_CODE (*x) == UNSPEC
8098 && XINT (*x, 1) == UNSPEC_LTREL_BASE)
8099 {
8100 *x = XVECEXP (*x, 0, 1);
8101 return;
8102 }
8103
8104 fmt = GET_RTX_FORMAT (GET_CODE (*x));
8105 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
8106 {
8107 if (fmt[i] == 'e')
8108 {
8109 replace_ltrel_base (&XEXP (*x, i));
8110 }
8111 else if (fmt[i] == 'E')
8112 {
8113 for (j = 0; j < XVECLEN (*x, i); j++)
8114 replace_ltrel_base (&XVECEXP (*x, i, j));
8115 }
8116 }
8117 }
8118
8119
8120 /* We keep a list of constants which we have to add to internal
8121 constant tables in the middle of large functions. */
8122
8123 #define NR_C_MODES 32
8124 machine_mode constant_modes[NR_C_MODES] =
8125 {
8126 TFmode, TImode, TDmode,
8127 V16QImode, V8HImode, V4SImode, V2DImode, V1TImode,
8128 V4SFmode, V2DFmode, V1TFmode,
8129 DFmode, DImode, DDmode,
8130 V8QImode, V4HImode, V2SImode, V1DImode, V2SFmode, V1DFmode,
8131 SFmode, SImode, SDmode,
8132 V4QImode, V2HImode, V1SImode, V1SFmode,
8133 HImode,
8134 V2QImode, V1HImode,
8135 QImode,
8136 V1QImode
8137 };
8138
8139 struct constant
8140 {
8141 struct constant *next;
8142 rtx value;
8143 rtx_code_label *label;
8144 };
8145
8146 struct constant_pool
8147 {
8148 struct constant_pool *next;
8149 rtx_insn *first_insn;
8150 rtx_insn *pool_insn;
8151 bitmap insns;
8152 rtx_insn *emit_pool_after;
8153
8154 struct constant *constants[NR_C_MODES];
8155 struct constant *execute;
8156 rtx_code_label *label;
8157 int size;
8158 };
8159
8160 /* Allocate new constant_pool structure. */
8161
8162 static struct constant_pool *
8163 s390_alloc_pool (void)
8164 {
8165 struct constant_pool *pool;
8166 int i;
8167
8168 pool = (struct constant_pool *) xmalloc (sizeof *pool);
8169 pool->next = NULL;
8170 for (i = 0; i < NR_C_MODES; i++)
8171 pool->constants[i] = NULL;
8172
8173 pool->execute = NULL;
8174 pool->label = gen_label_rtx ();
8175 pool->first_insn = NULL;
8176 pool->pool_insn = NULL;
8177 pool->insns = BITMAP_ALLOC (NULL);
8178 pool->size = 0;
8179 pool->emit_pool_after = NULL;
8180
8181 return pool;
8182 }
8183
8184 /* Create new constant pool covering instructions starting at INSN
8185 and chain it to the end of POOL_LIST. */
8186
8187 static struct constant_pool *
8188 s390_start_pool (struct constant_pool **pool_list, rtx_insn *insn)
8189 {
8190 struct constant_pool *pool, **prev;
8191
8192 pool = s390_alloc_pool ();
8193 pool->first_insn = insn;
8194
8195 for (prev = pool_list; *prev; prev = &(*prev)->next)
8196 ;
8197 *prev = pool;
8198
8199 return pool;
8200 }
8201
8202 /* End range of instructions covered by POOL at INSN and emit
8203 placeholder insn representing the pool. */
8204
8205 static void
8206 s390_end_pool (struct constant_pool *pool, rtx_insn *insn)
8207 {
8208 rtx pool_size = GEN_INT (pool->size + 8 /* alignment slop */);
8209
8210 if (!insn)
8211 insn = get_last_insn ();
8212
8213 pool->pool_insn = emit_insn_after (gen_pool (pool_size), insn);
8214 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8215 }
8216
8217 /* Add INSN to the list of insns covered by POOL. */
8218
8219 static void
8220 s390_add_pool_insn (struct constant_pool *pool, rtx insn)
8221 {
8222 bitmap_set_bit (pool->insns, INSN_UID (insn));
8223 }
8224
8225 /* Return pool out of POOL_LIST that covers INSN. */
8226
8227 static struct constant_pool *
8228 s390_find_pool (struct constant_pool *pool_list, rtx insn)
8229 {
8230 struct constant_pool *pool;
8231
8232 for (pool = pool_list; pool; pool = pool->next)
8233 if (bitmap_bit_p (pool->insns, INSN_UID (insn)))
8234 break;
8235
8236 return pool;
8237 }
8238
8239 /* Add constant VAL of mode MODE to the constant pool POOL. */
8240
8241 static void
8242 s390_add_constant (struct constant_pool *pool, rtx val, machine_mode mode)
8243 {
8244 struct constant *c;
8245 int i;
8246
8247 for (i = 0; i < NR_C_MODES; i++)
8248 if (constant_modes[i] == mode)
8249 break;
8250 gcc_assert (i != NR_C_MODES);
8251
8252 for (c = pool->constants[i]; c != NULL; c = c->next)
8253 if (rtx_equal_p (val, c->value))
8254 break;
8255
8256 if (c == NULL)
8257 {
8258 c = (struct constant *) xmalloc (sizeof *c);
8259 c->value = val;
8260 c->label = gen_label_rtx ();
8261 c->next = pool->constants[i];
8262 pool->constants[i] = c;
8263 pool->size += GET_MODE_SIZE (mode);
8264 }
8265 }
8266
8267 /* Return an rtx that represents the offset of X from the start of
8268 pool POOL. */
8269
8270 static rtx
8271 s390_pool_offset (struct constant_pool *pool, rtx x)
8272 {
8273 rtx label;
8274
8275 label = gen_rtx_LABEL_REF (GET_MODE (x), pool->label);
8276 x = gen_rtx_UNSPEC (GET_MODE (x), gen_rtvec (2, x, label),
8277 UNSPEC_POOL_OFFSET);
8278 return gen_rtx_CONST (GET_MODE (x), x);
8279 }
8280
8281 /* Find constant VAL of mode MODE in the constant pool POOL.
8282 Return an RTX describing the distance from the start of
8283 the pool to the location of the new constant. */
8284
8285 static rtx
8286 s390_find_constant (struct constant_pool *pool, rtx val,
8287 machine_mode mode)
8288 {
8289 struct constant *c;
8290 int i;
8291
8292 for (i = 0; i < NR_C_MODES; i++)
8293 if (constant_modes[i] == mode)
8294 break;
8295 gcc_assert (i != NR_C_MODES);
8296
8297 for (c = pool->constants[i]; c != NULL; c = c->next)
8298 if (rtx_equal_p (val, c->value))
8299 break;
8300
8301 gcc_assert (c);
8302
8303 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
8304 }
8305
8306 /* Check whether INSN is an execute. Return the label_ref to its
8307 execute target template if so, NULL_RTX otherwise. */
8308
8309 static rtx
8310 s390_execute_label (rtx insn)
8311 {
8312 if (NONJUMP_INSN_P (insn)
8313 && GET_CODE (PATTERN (insn)) == PARALLEL
8314 && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
8315 && XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE)
8316 return XVECEXP (XVECEXP (PATTERN (insn), 0, 0), 0, 2);
8317
8318 return NULL_RTX;
8319 }
8320
8321 /* Add execute target for INSN to the constant pool POOL. */
8322
8323 static void
8324 s390_add_execute (struct constant_pool *pool, rtx insn)
8325 {
8326 struct constant *c;
8327
8328 for (c = pool->execute; c != NULL; c = c->next)
8329 if (INSN_UID (insn) == INSN_UID (c->value))
8330 break;
8331
8332 if (c == NULL)
8333 {
8334 c = (struct constant *) xmalloc (sizeof *c);
8335 c->value = insn;
8336 c->label = gen_label_rtx ();
8337 c->next = pool->execute;
8338 pool->execute = c;
8339 pool->size += 6;
8340 }
8341 }
8342
8343 /* Find execute target for INSN in the constant pool POOL.
8344 Return an RTX describing the distance from the start of
8345 the pool to the location of the execute target. */
8346
8347 static rtx
8348 s390_find_execute (struct constant_pool *pool, rtx insn)
8349 {
8350 struct constant *c;
8351
8352 for (c = pool->execute; c != NULL; c = c->next)
8353 if (INSN_UID (insn) == INSN_UID (c->value))
8354 break;
8355
8356 gcc_assert (c);
8357
8358 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
8359 }
8360
8361 /* For an execute INSN, extract the execute target template. */
8362
8363 static rtx
8364 s390_execute_target (rtx insn)
8365 {
8366 rtx pattern = PATTERN (insn);
8367 gcc_assert (s390_execute_label (insn));
8368
8369 if (XVECLEN (pattern, 0) == 2)
8370 {
8371 pattern = copy_rtx (XVECEXP (pattern, 0, 1));
8372 }
8373 else
8374 {
8375 rtvec vec = rtvec_alloc (XVECLEN (pattern, 0) - 1);
8376 int i;
8377
8378 for (i = 0; i < XVECLEN (pattern, 0) - 1; i++)
8379 RTVEC_ELT (vec, i) = copy_rtx (XVECEXP (pattern, 0, i + 1));
8380
8381 pattern = gen_rtx_PARALLEL (VOIDmode, vec);
8382 }
8383
8384 return pattern;
8385 }
8386
8387 /* Indicate that INSN cannot be duplicated. This is the case for
8388 execute insns that carry a unique label. */
8389
8390 static bool
8391 s390_cannot_copy_insn_p (rtx_insn *insn)
8392 {
8393 rtx label = s390_execute_label (insn);
8394 return label && label != const0_rtx;
8395 }
8396
8397 /* Dump out the constants in POOL. If REMOTE_LABEL is true,
8398 do not emit the pool base label. */
8399
8400 static void
8401 s390_dump_pool (struct constant_pool *pool, bool remote_label)
8402 {
8403 struct constant *c;
8404 rtx_insn *insn = pool->pool_insn;
8405 int i;
8406
8407 /* Switch to rodata section. */
8408 if (TARGET_CPU_ZARCH)
8409 {
8410 insn = emit_insn_after (gen_pool_section_start (), insn);
8411 INSN_ADDRESSES_NEW (insn, -1);
8412 }
8413
8414 /* Ensure minimum pool alignment. */
8415 if (TARGET_CPU_ZARCH)
8416 insn = emit_insn_after (gen_pool_align (GEN_INT (8)), insn);
8417 else
8418 insn = emit_insn_after (gen_pool_align (GEN_INT (4)), insn);
8419 INSN_ADDRESSES_NEW (insn, -1);
8420
8421 /* Emit pool base label. */
8422 if (!remote_label)
8423 {
8424 insn = emit_label_after (pool->label, insn);
8425 INSN_ADDRESSES_NEW (insn, -1);
8426 }
8427
8428 /* Dump constants in descending alignment requirement order,
8429 ensuring proper alignment for every constant. */
8430 for (i = 0; i < NR_C_MODES; i++)
8431 for (c = pool->constants[i]; c; c = c->next)
8432 {
8433 /* Convert UNSPEC_LTREL_OFFSET unspecs to pool-relative references. */
8434 rtx value = copy_rtx (c->value);
8435 if (GET_CODE (value) == CONST
8436 && GET_CODE (XEXP (value, 0)) == UNSPEC
8437 && XINT (XEXP (value, 0), 1) == UNSPEC_LTREL_OFFSET
8438 && XVECLEN (XEXP (value, 0), 0) == 1)
8439 value = s390_pool_offset (pool, XVECEXP (XEXP (value, 0), 0, 0));
8440
8441 insn = emit_label_after (c->label, insn);
8442 INSN_ADDRESSES_NEW (insn, -1);
8443
8444 value = gen_rtx_UNSPEC_VOLATILE (constant_modes[i],
8445 gen_rtvec (1, value),
8446 UNSPECV_POOL_ENTRY);
8447 insn = emit_insn_after (value, insn);
8448 INSN_ADDRESSES_NEW (insn, -1);
8449 }
8450
8451 /* Ensure minimum alignment for instructions. */
8452 insn = emit_insn_after (gen_pool_align (GEN_INT (2)), insn);
8453 INSN_ADDRESSES_NEW (insn, -1);
8454
8455 /* Output in-pool execute template insns. */
8456 for (c = pool->execute; c; c = c->next)
8457 {
8458 insn = emit_label_after (c->label, insn);
8459 INSN_ADDRESSES_NEW (insn, -1);
8460
8461 insn = emit_insn_after (s390_execute_target (c->value), insn);
8462 INSN_ADDRESSES_NEW (insn, -1);
8463 }
8464
8465 /* Switch back to previous section. */
8466 if (TARGET_CPU_ZARCH)
8467 {
8468 insn = emit_insn_after (gen_pool_section_end (), insn);
8469 INSN_ADDRESSES_NEW (insn, -1);
8470 }
8471
8472 insn = emit_barrier_after (insn);
8473 INSN_ADDRESSES_NEW (insn, -1);
8474
8475 /* Remove placeholder insn. */
8476 remove_insn (pool->pool_insn);
8477 }
8478
8479 /* Free all memory used by POOL. */
8480
8481 static void
8482 s390_free_pool (struct constant_pool *pool)
8483 {
8484 struct constant *c, *next;
8485 int i;
8486
8487 for (i = 0; i < NR_C_MODES; i++)
8488 for (c = pool->constants[i]; c; c = next)
8489 {
8490 next = c->next;
8491 free (c);
8492 }
8493
8494 for (c = pool->execute; c; c = next)
8495 {
8496 next = c->next;
8497 free (c);
8498 }
8499
8500 BITMAP_FREE (pool->insns);
8501 free (pool);
8502 }
8503
8504
8505 /* Collect main literal pool. Return NULL on overflow. */
8506
8507 static struct constant_pool *
8508 s390_mainpool_start (void)
8509 {
8510 struct constant_pool *pool;
8511 rtx_insn *insn;
8512
8513 pool = s390_alloc_pool ();
8514
8515 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8516 {
8517 if (NONJUMP_INSN_P (insn)
8518 && GET_CODE (PATTERN (insn)) == SET
8519 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC_VOLATILE
8520 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPECV_MAIN_POOL)
8521 {
8522 /* There might be two main_pool instructions if base_reg
8523 is call-clobbered; one for shrink-wrapped code and one
8524 for the rest. We want to keep the first. */
8525 if (pool->pool_insn)
8526 {
8527 insn = PREV_INSN (insn);
8528 delete_insn (NEXT_INSN (insn));
8529 continue;
8530 }
8531 pool->pool_insn = insn;
8532 }
8533
8534 if (!TARGET_CPU_ZARCH && s390_execute_label (insn))
8535 {
8536 s390_add_execute (pool, insn);
8537 }
8538 else if (NONJUMP_INSN_P (insn) || CALL_P (insn))
8539 {
8540 rtx pool_ref = NULL_RTX;
8541 find_constant_pool_ref (PATTERN (insn), &pool_ref);
8542 if (pool_ref)
8543 {
8544 rtx constant = get_pool_constant (pool_ref);
8545 machine_mode mode = get_pool_mode (pool_ref);
8546 s390_add_constant (pool, constant, mode);
8547 }
8548 }
8549
8550 /* If hot/cold partitioning is enabled we have to make sure that
8551 the literal pool is emitted in the same section where the
8552 initialization of the literal pool base pointer takes place.
8553 emit_pool_after is only used in the non-overflow case on non
8554 Z cpus where we can emit the literal pool at the end of the
8555 function body within the text section. */
8556 if (NOTE_P (insn)
8557 && NOTE_KIND (insn) == NOTE_INSN_SWITCH_TEXT_SECTIONS
8558 && !pool->emit_pool_after)
8559 pool->emit_pool_after = PREV_INSN (insn);
8560 }
8561
8562 gcc_assert (pool->pool_insn || pool->size == 0);
8563
8564 if (pool->size >= 4096)
8565 {
8566 /* We're going to chunkify the pool, so remove the main
8567 pool placeholder insn. */
8568 remove_insn (pool->pool_insn);
8569
8570 s390_free_pool (pool);
8571 pool = NULL;
8572 }
8573
8574 /* If the functions ends with the section where the literal pool
8575 should be emitted set the marker to its end. */
8576 if (pool && !pool->emit_pool_after)
8577 pool->emit_pool_after = get_last_insn ();
8578
8579 return pool;
8580 }
8581
8582 /* POOL holds the main literal pool as collected by s390_mainpool_start.
8583 Modify the current function to output the pool constants as well as
8584 the pool register setup instruction. */
8585
8586 static void
8587 s390_mainpool_finish (struct constant_pool *pool)
8588 {
8589 rtx base_reg = cfun->machine->base_reg;
8590
8591 /* If the pool is empty, we're done. */
8592 if (pool->size == 0)
8593 {
8594 /* We don't actually need a base register after all. */
8595 cfun->machine->base_reg = NULL_RTX;
8596
8597 if (pool->pool_insn)
8598 remove_insn (pool->pool_insn);
8599 s390_free_pool (pool);
8600 return;
8601 }
8602
8603 /* We need correct insn addresses. */
8604 shorten_branches (get_insns ());
8605
8606 /* On zSeries, we use a LARL to load the pool register. The pool is
8607 located in the .rodata section, so we emit it after the function. */
8608 if (TARGET_CPU_ZARCH)
8609 {
8610 rtx set = gen_main_base_64 (base_reg, pool->label);
8611 rtx_insn *insn = emit_insn_after (set, pool->pool_insn);
8612 INSN_ADDRESSES_NEW (insn, -1);
8613 remove_insn (pool->pool_insn);
8614
8615 insn = get_last_insn ();
8616 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
8617 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8618
8619 s390_dump_pool (pool, 0);
8620 }
8621
8622 /* On S/390, if the total size of the function's code plus literal pool
8623 does not exceed 4096 bytes, we use BASR to set up a function base
8624 pointer, and emit the literal pool at the end of the function. */
8625 else if (INSN_ADDRESSES (INSN_UID (pool->emit_pool_after))
8626 + pool->size + 8 /* alignment slop */ < 4096)
8627 {
8628 rtx set = gen_main_base_31_small (base_reg, pool->label);
8629 rtx_insn *insn = emit_insn_after (set, pool->pool_insn);
8630 INSN_ADDRESSES_NEW (insn, -1);
8631 remove_insn (pool->pool_insn);
8632
8633 insn = emit_label_after (pool->label, insn);
8634 INSN_ADDRESSES_NEW (insn, -1);
8635
8636 /* emit_pool_after will be set by s390_mainpool_start to the
8637 last insn of the section where the literal pool should be
8638 emitted. */
8639 insn = pool->emit_pool_after;
8640
8641 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
8642 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8643
8644 s390_dump_pool (pool, 1);
8645 }
8646
8647 /* Otherwise, we emit an inline literal pool and use BASR to branch
8648 over it, setting up the pool register at the same time. */
8649 else
8650 {
8651 rtx_code_label *pool_end = gen_label_rtx ();
8652
8653 rtx pat = gen_main_base_31_large (base_reg, pool->label, pool_end);
8654 rtx_insn *insn = emit_jump_insn_after (pat, pool->pool_insn);
8655 JUMP_LABEL (insn) = pool_end;
8656 INSN_ADDRESSES_NEW (insn, -1);
8657 remove_insn (pool->pool_insn);
8658
8659 insn = emit_label_after (pool->label, insn);
8660 INSN_ADDRESSES_NEW (insn, -1);
8661
8662 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
8663 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8664
8665 insn = emit_label_after (pool_end, pool->pool_insn);
8666 INSN_ADDRESSES_NEW (insn, -1);
8667
8668 s390_dump_pool (pool, 1);
8669 }
8670
8671
8672 /* Replace all literal pool references. */
8673
8674 for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
8675 {
8676 if (INSN_P (insn))
8677 replace_ltrel_base (&PATTERN (insn));
8678
8679 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
8680 {
8681 rtx addr, pool_ref = NULL_RTX;
8682 find_constant_pool_ref (PATTERN (insn), &pool_ref);
8683 if (pool_ref)
8684 {
8685 if (s390_execute_label (insn))
8686 addr = s390_find_execute (pool, insn);
8687 else
8688 addr = s390_find_constant (pool, get_pool_constant (pool_ref),
8689 get_pool_mode (pool_ref));
8690
8691 replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr);
8692 INSN_CODE (insn) = -1;
8693 }
8694 }
8695 }
8696
8697
8698 /* Free the pool. */
8699 s390_free_pool (pool);
8700 }
8701
8702 /* POOL holds the main literal pool as collected by s390_mainpool_start.
8703 We have decided we cannot use this pool, so revert all changes
8704 to the current function that were done by s390_mainpool_start. */
8705 static void
8706 s390_mainpool_cancel (struct constant_pool *pool)
8707 {
8708 /* We didn't actually change the instruction stream, so simply
8709 free the pool memory. */
8710 s390_free_pool (pool);
8711 }
8712
8713
8714 /* Chunkify the literal pool. */
8715
8716 #define S390_POOL_CHUNK_MIN 0xc00
8717 #define S390_POOL_CHUNK_MAX 0xe00
8718
8719 static struct constant_pool *
8720 s390_chunkify_start (void)
8721 {
8722 struct constant_pool *curr_pool = NULL, *pool_list = NULL;
8723 int extra_size = 0;
8724 bitmap far_labels;
8725 rtx pending_ltrel = NULL_RTX;
8726 rtx_insn *insn;
8727
8728 rtx (*gen_reload_base) (rtx, rtx) =
8729 TARGET_CPU_ZARCH? gen_reload_base_64 : gen_reload_base_31;
8730
8731
8732 /* We need correct insn addresses. */
8733
8734 shorten_branches (get_insns ());
8735
8736 /* Scan all insns and move literals to pool chunks. */
8737
8738 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8739 {
8740 bool section_switch_p = false;
8741
8742 /* Check for pending LTREL_BASE. */
8743 if (INSN_P (insn))
8744 {
8745 rtx ltrel_base = find_ltrel_base (PATTERN (insn));
8746 if (ltrel_base)
8747 {
8748 gcc_assert (ltrel_base == pending_ltrel);
8749 pending_ltrel = NULL_RTX;
8750 }
8751 }
8752
8753 if (!TARGET_CPU_ZARCH && s390_execute_label (insn))
8754 {
8755 if (!curr_pool)
8756 curr_pool = s390_start_pool (&pool_list, insn);
8757
8758 s390_add_execute (curr_pool, insn);
8759 s390_add_pool_insn (curr_pool, insn);
8760 }
8761 else if (NONJUMP_INSN_P (insn) || CALL_P (insn))
8762 {
8763 rtx pool_ref = NULL_RTX;
8764 find_constant_pool_ref (PATTERN (insn), &pool_ref);
8765 if (pool_ref)
8766 {
8767 rtx constant = get_pool_constant (pool_ref);
8768 machine_mode mode = get_pool_mode (pool_ref);
8769
8770 if (!curr_pool)
8771 curr_pool = s390_start_pool (&pool_list, insn);
8772
8773 s390_add_constant (curr_pool, constant, mode);
8774 s390_add_pool_insn (curr_pool, insn);
8775
8776 /* Don't split the pool chunk between a LTREL_OFFSET load
8777 and the corresponding LTREL_BASE. */
8778 if (GET_CODE (constant) == CONST
8779 && GET_CODE (XEXP (constant, 0)) == UNSPEC
8780 && XINT (XEXP (constant, 0), 1) == UNSPEC_LTREL_OFFSET)
8781 {
8782 gcc_assert (!pending_ltrel);
8783 pending_ltrel = pool_ref;
8784 }
8785 }
8786 }
8787
8788 if (JUMP_P (insn) || JUMP_TABLE_DATA_P (insn) || LABEL_P (insn))
8789 {
8790 if (curr_pool)
8791 s390_add_pool_insn (curr_pool, insn);
8792 /* An LTREL_BASE must follow within the same basic block. */
8793 gcc_assert (!pending_ltrel);
8794 }
8795
8796 if (NOTE_P (insn))
8797 switch (NOTE_KIND (insn))
8798 {
8799 case NOTE_INSN_SWITCH_TEXT_SECTIONS:
8800 section_switch_p = true;
8801 break;
8802 case NOTE_INSN_VAR_LOCATION:
8803 case NOTE_INSN_CALL_ARG_LOCATION:
8804 continue;
8805 default:
8806 break;
8807 }
8808
8809 if (!curr_pool
8810 || INSN_ADDRESSES_SIZE () <= (size_t) INSN_UID (insn)
8811 || INSN_ADDRESSES (INSN_UID (insn)) == -1)
8812 continue;
8813
8814 if (TARGET_CPU_ZARCH)
8815 {
8816 if (curr_pool->size < S390_POOL_CHUNK_MAX)
8817 continue;
8818
8819 s390_end_pool (curr_pool, NULL);
8820 curr_pool = NULL;
8821 }
8822 else
8823 {
8824 int chunk_size = INSN_ADDRESSES (INSN_UID (insn))
8825 - INSN_ADDRESSES (INSN_UID (curr_pool->first_insn))
8826 + extra_size;
8827
8828 /* We will later have to insert base register reload insns.
8829 Those will have an effect on code size, which we need to
8830 consider here. This calculation makes rather pessimistic
8831 worst-case assumptions. */
8832 if (LABEL_P (insn))
8833 extra_size += 6;
8834
8835 if (chunk_size < S390_POOL_CHUNK_MIN
8836 && curr_pool->size < S390_POOL_CHUNK_MIN
8837 && !section_switch_p)
8838 continue;
8839
8840 /* Pool chunks can only be inserted after BARRIERs ... */
8841 if (BARRIER_P (insn))
8842 {
8843 s390_end_pool (curr_pool, insn);
8844 curr_pool = NULL;
8845 extra_size = 0;
8846 }
8847
8848 /* ... so if we don't find one in time, create one. */
8849 else if (chunk_size > S390_POOL_CHUNK_MAX
8850 || curr_pool->size > S390_POOL_CHUNK_MAX
8851 || section_switch_p)
8852 {
8853 rtx_insn *label, *jump, *barrier, *next, *prev;
8854
8855 if (!section_switch_p)
8856 {
8857 /* We can insert the barrier only after a 'real' insn. */
8858 if (! NONJUMP_INSN_P (insn) && ! CALL_P (insn))
8859 continue;
8860 if (get_attr_length (insn) == 0)
8861 continue;
8862 /* Don't separate LTREL_BASE from the corresponding
8863 LTREL_OFFSET load. */
8864 if (pending_ltrel)
8865 continue;
8866 next = insn;
8867 do
8868 {
8869 insn = next;
8870 next = NEXT_INSN (insn);
8871 }
8872 while (next
8873 && NOTE_P (next)
8874 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
8875 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION));
8876 }
8877 else
8878 {
8879 gcc_assert (!pending_ltrel);
8880
8881 /* The old pool has to end before the section switch
8882 note in order to make it part of the current
8883 section. */
8884 insn = PREV_INSN (insn);
8885 }
8886
8887 label = gen_label_rtx ();
8888 prev = insn;
8889 if (prev && NOTE_P (prev))
8890 prev = prev_nonnote_insn (prev);
8891 if (prev)
8892 jump = emit_jump_insn_after_setloc (gen_jump (label), insn,
8893 INSN_LOCATION (prev));
8894 else
8895 jump = emit_jump_insn_after_noloc (gen_jump (label), insn);
8896 barrier = emit_barrier_after (jump);
8897 insn = emit_label_after (label, barrier);
8898 JUMP_LABEL (jump) = label;
8899 LABEL_NUSES (label) = 1;
8900
8901 INSN_ADDRESSES_NEW (jump, -1);
8902 INSN_ADDRESSES_NEW (barrier, -1);
8903 INSN_ADDRESSES_NEW (insn, -1);
8904
8905 s390_end_pool (curr_pool, barrier);
8906 curr_pool = NULL;
8907 extra_size = 0;
8908 }
8909 }
8910 }
8911
8912 if (curr_pool)
8913 s390_end_pool (curr_pool, NULL);
8914 gcc_assert (!pending_ltrel);
8915
8916 /* Find all labels that are branched into
8917 from an insn belonging to a different chunk. */
8918
8919 far_labels = BITMAP_ALLOC (NULL);
8920
8921 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8922 {
8923 rtx_jump_table_data *table;
8924
8925 /* Labels marked with LABEL_PRESERVE_P can be target
8926 of non-local jumps, so we have to mark them.
8927 The same holds for named labels.
8928
8929 Don't do that, however, if it is the label before
8930 a jump table. */
8931
8932 if (LABEL_P (insn)
8933 && (LABEL_PRESERVE_P (insn) || LABEL_NAME (insn)))
8934 {
8935 rtx_insn *vec_insn = NEXT_INSN (insn);
8936 if (! vec_insn || ! JUMP_TABLE_DATA_P (vec_insn))
8937 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (insn));
8938 }
8939 /* Check potential targets in a table jump (casesi_jump). */
8940 else if (tablejump_p (insn, NULL, &table))
8941 {
8942 rtx vec_pat = PATTERN (table);
8943 int i, diff_p = GET_CODE (vec_pat) == ADDR_DIFF_VEC;
8944
8945 for (i = 0; i < XVECLEN (vec_pat, diff_p); i++)
8946 {
8947 rtx label = XEXP (XVECEXP (vec_pat, diff_p, i), 0);
8948
8949 if (s390_find_pool (pool_list, label)
8950 != s390_find_pool (pool_list, insn))
8951 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
8952 }
8953 }
8954 /* If we have a direct jump (conditional or unconditional),
8955 check all potential targets. */
8956 else if (JUMP_P (insn))
8957 {
8958 rtx pat = PATTERN (insn);
8959
8960 if (GET_CODE (pat) == PARALLEL)
8961 pat = XVECEXP (pat, 0, 0);
8962
8963 if (GET_CODE (pat) == SET)
8964 {
8965 rtx label = JUMP_LABEL (insn);
8966 if (label && !ANY_RETURN_P (label))
8967 {
8968 if (s390_find_pool (pool_list, label)
8969 != s390_find_pool (pool_list, insn))
8970 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
8971 }
8972 }
8973 }
8974 }
8975
8976 /* Insert base register reload insns before every pool. */
8977
8978 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
8979 {
8980 rtx new_insn = gen_reload_base (cfun->machine->base_reg,
8981 curr_pool->label);
8982 rtx_insn *insn = curr_pool->first_insn;
8983 INSN_ADDRESSES_NEW (emit_insn_before (new_insn, insn), -1);
8984 }
8985
8986 /* Insert base register reload insns at every far label. */
8987
8988 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8989 if (LABEL_P (insn)
8990 && bitmap_bit_p (far_labels, CODE_LABEL_NUMBER (insn)))
8991 {
8992 struct constant_pool *pool = s390_find_pool (pool_list, insn);
8993 if (pool)
8994 {
8995 rtx new_insn = gen_reload_base (cfun->machine->base_reg,
8996 pool->label);
8997 INSN_ADDRESSES_NEW (emit_insn_after (new_insn, insn), -1);
8998 }
8999 }
9000
9001
9002 BITMAP_FREE (far_labels);
9003
9004
9005 /* Recompute insn addresses. */
9006
9007 init_insn_lengths ();
9008 shorten_branches (get_insns ());
9009
9010 return pool_list;
9011 }
9012
9013 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
9014 After we have decided to use this list, finish implementing
9015 all changes to the current function as required. */
9016
9017 static void
9018 s390_chunkify_finish (struct constant_pool *pool_list)
9019 {
9020 struct constant_pool *curr_pool = NULL;
9021 rtx_insn *insn;
9022
9023
9024 /* Replace all literal pool references. */
9025
9026 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9027 {
9028 if (INSN_P (insn))
9029 replace_ltrel_base (&PATTERN (insn));
9030
9031 curr_pool = s390_find_pool (pool_list, insn);
9032 if (!curr_pool)
9033 continue;
9034
9035 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9036 {
9037 rtx addr, pool_ref = NULL_RTX;
9038 find_constant_pool_ref (PATTERN (insn), &pool_ref);
9039 if (pool_ref)
9040 {
9041 if (s390_execute_label (insn))
9042 addr = s390_find_execute (curr_pool, insn);
9043 else
9044 addr = s390_find_constant (curr_pool,
9045 get_pool_constant (pool_ref),
9046 get_pool_mode (pool_ref));
9047
9048 replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr);
9049 INSN_CODE (insn) = -1;
9050 }
9051 }
9052 }
9053
9054 /* Dump out all literal pools. */
9055
9056 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
9057 s390_dump_pool (curr_pool, 0);
9058
9059 /* Free pool list. */
9060
9061 while (pool_list)
9062 {
9063 struct constant_pool *next = pool_list->next;
9064 s390_free_pool (pool_list);
9065 pool_list = next;
9066 }
9067 }
9068
9069 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
9070 We have decided we cannot use this list, so revert all changes
9071 to the current function that were done by s390_chunkify_start. */
9072
9073 static void
9074 s390_chunkify_cancel (struct constant_pool *pool_list)
9075 {
9076 struct constant_pool *curr_pool = NULL;
9077 rtx_insn *insn;
9078
9079 /* Remove all pool placeholder insns. */
9080
9081 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
9082 {
9083 /* Did we insert an extra barrier? Remove it. */
9084 rtx_insn *barrier = PREV_INSN (curr_pool->pool_insn);
9085 rtx_insn *jump = barrier? PREV_INSN (barrier) : NULL;
9086 rtx_insn *label = NEXT_INSN (curr_pool->pool_insn);
9087
9088 if (jump && JUMP_P (jump)
9089 && barrier && BARRIER_P (barrier)
9090 && label && LABEL_P (label)
9091 && GET_CODE (PATTERN (jump)) == SET
9092 && SET_DEST (PATTERN (jump)) == pc_rtx
9093 && GET_CODE (SET_SRC (PATTERN (jump))) == LABEL_REF
9094 && XEXP (SET_SRC (PATTERN (jump)), 0) == label)
9095 {
9096 remove_insn (jump);
9097 remove_insn (barrier);
9098 remove_insn (label);
9099 }
9100
9101 remove_insn (curr_pool->pool_insn);
9102 }
9103
9104 /* Remove all base register reload insns. */
9105
9106 for (insn = get_insns (); insn; )
9107 {
9108 rtx_insn *next_insn = NEXT_INSN (insn);
9109
9110 if (NONJUMP_INSN_P (insn)
9111 && GET_CODE (PATTERN (insn)) == SET
9112 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
9113 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_RELOAD_BASE)
9114 remove_insn (insn);
9115
9116 insn = next_insn;
9117 }
9118
9119 /* Free pool list. */
9120
9121 while (pool_list)
9122 {
9123 struct constant_pool *next = pool_list->next;
9124 s390_free_pool (pool_list);
9125 pool_list = next;
9126 }
9127 }
9128
9129 /* Output the constant pool entry EXP in mode MODE with alignment ALIGN. */
9130
9131 void
9132 s390_output_pool_entry (rtx exp, machine_mode mode, unsigned int align)
9133 {
9134 switch (GET_MODE_CLASS (mode))
9135 {
9136 case MODE_FLOAT:
9137 case MODE_DECIMAL_FLOAT:
9138 gcc_assert (GET_CODE (exp) == CONST_DOUBLE);
9139
9140 assemble_real (*CONST_DOUBLE_REAL_VALUE (exp), mode, align);
9141 break;
9142
9143 case MODE_INT:
9144 assemble_integer (exp, GET_MODE_SIZE (mode), align, 1);
9145 mark_symbol_refs_as_used (exp);
9146 break;
9147
9148 case MODE_VECTOR_INT:
9149 case MODE_VECTOR_FLOAT:
9150 {
9151 int i;
9152 machine_mode inner_mode;
9153 gcc_assert (GET_CODE (exp) == CONST_VECTOR);
9154
9155 inner_mode = GET_MODE_INNER (GET_MODE (exp));
9156 for (i = 0; i < XVECLEN (exp, 0); i++)
9157 s390_output_pool_entry (XVECEXP (exp, 0, i),
9158 inner_mode,
9159 i == 0
9160 ? align
9161 : GET_MODE_BITSIZE (inner_mode));
9162 }
9163 break;
9164
9165 default:
9166 gcc_unreachable ();
9167 }
9168 }
9169
9170
9171 /* Return an RTL expression representing the value of the return address
9172 for the frame COUNT steps up from the current frame. FRAME is the
9173 frame pointer of that frame. */
9174
9175 rtx
9176 s390_return_addr_rtx (int count, rtx frame ATTRIBUTE_UNUSED)
9177 {
9178 int offset;
9179 rtx addr;
9180
9181 /* Without backchain, we fail for all but the current frame. */
9182
9183 if (!TARGET_BACKCHAIN && count > 0)
9184 return NULL_RTX;
9185
9186 /* For the current frame, we need to make sure the initial
9187 value of RETURN_REGNUM is actually saved. */
9188
9189 if (count == 0)
9190 {
9191 /* On non-z architectures branch splitting could overwrite r14. */
9192 if (TARGET_CPU_ZARCH)
9193 return get_hard_reg_initial_val (Pmode, RETURN_REGNUM);
9194 else
9195 {
9196 cfun_frame_layout.save_return_addr_p = true;
9197 return gen_rtx_MEM (Pmode, return_address_pointer_rtx);
9198 }
9199 }
9200
9201 if (TARGET_PACKED_STACK)
9202 offset = -2 * UNITS_PER_LONG;
9203 else
9204 offset = RETURN_REGNUM * UNITS_PER_LONG;
9205
9206 addr = plus_constant (Pmode, frame, offset);
9207 addr = memory_address (Pmode, addr);
9208 return gen_rtx_MEM (Pmode, addr);
9209 }
9210
9211 /* Return an RTL expression representing the back chain stored in
9212 the current stack frame. */
9213
9214 rtx
9215 s390_back_chain_rtx (void)
9216 {
9217 rtx chain;
9218
9219 gcc_assert (TARGET_BACKCHAIN);
9220
9221 if (TARGET_PACKED_STACK)
9222 chain = plus_constant (Pmode, stack_pointer_rtx,
9223 STACK_POINTER_OFFSET - UNITS_PER_LONG);
9224 else
9225 chain = stack_pointer_rtx;
9226
9227 chain = gen_rtx_MEM (Pmode, chain);
9228 return chain;
9229 }
9230
9231 /* Find first call clobbered register unused in a function.
9232 This could be used as base register in a leaf function
9233 or for holding the return address before epilogue. */
9234
9235 static int
9236 find_unused_clobbered_reg (void)
9237 {
9238 int i;
9239 for (i = 0; i < 6; i++)
9240 if (!df_regs_ever_live_p (i))
9241 return i;
9242 return 0;
9243 }
9244
9245
9246 /* Helper function for s390_regs_ever_clobbered. Sets the fields in DATA for all
9247 clobbered hard regs in SETREG. */
9248
9249 static void
9250 s390_reg_clobbered_rtx (rtx setreg, const_rtx set_insn ATTRIBUTE_UNUSED, void *data)
9251 {
9252 char *regs_ever_clobbered = (char *)data;
9253 unsigned int i, regno;
9254 machine_mode mode = GET_MODE (setreg);
9255
9256 if (GET_CODE (setreg) == SUBREG)
9257 {
9258 rtx inner = SUBREG_REG (setreg);
9259 if (!GENERAL_REG_P (inner) && !FP_REG_P (inner))
9260 return;
9261 regno = subreg_regno (setreg);
9262 }
9263 else if (GENERAL_REG_P (setreg) || FP_REG_P (setreg))
9264 regno = REGNO (setreg);
9265 else
9266 return;
9267
9268 for (i = regno;
9269 i < regno + HARD_REGNO_NREGS (regno, mode);
9270 i++)
9271 regs_ever_clobbered[i] = 1;
9272 }
9273
9274 /* Walks through all basic blocks of the current function looking
9275 for clobbered hard regs using s390_reg_clobbered_rtx. The fields
9276 of the passed integer array REGS_EVER_CLOBBERED are set to one for
9277 each of those regs. */
9278
9279 static void
9280 s390_regs_ever_clobbered (char regs_ever_clobbered[])
9281 {
9282 basic_block cur_bb;
9283 rtx_insn *cur_insn;
9284 unsigned int i;
9285
9286 memset (regs_ever_clobbered, 0, 32);
9287
9288 /* For non-leaf functions we have to consider all call clobbered regs to be
9289 clobbered. */
9290 if (!crtl->is_leaf)
9291 {
9292 for (i = 0; i < 32; i++)
9293 regs_ever_clobbered[i] = call_really_used_regs[i];
9294 }
9295
9296 /* Make the "magic" eh_return registers live if necessary. For regs_ever_live
9297 this work is done by liveness analysis (mark_regs_live_at_end).
9298 Special care is needed for functions containing landing pads. Landing pads
9299 may use the eh registers, but the code which sets these registers is not
9300 contained in that function. Hence s390_regs_ever_clobbered is not able to
9301 deal with this automatically. */
9302 if (crtl->calls_eh_return || cfun->machine->has_landing_pad_p)
9303 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM ; i++)
9304 if (crtl->calls_eh_return
9305 || (cfun->machine->has_landing_pad_p
9306 && df_regs_ever_live_p (EH_RETURN_DATA_REGNO (i))))
9307 regs_ever_clobbered[EH_RETURN_DATA_REGNO (i)] = 1;
9308
9309 /* For nonlocal gotos all call-saved registers have to be saved.
9310 This flag is also set for the unwinding code in libgcc.
9311 See expand_builtin_unwind_init. For regs_ever_live this is done by
9312 reload. */
9313 if (crtl->saves_all_registers)
9314 for (i = 0; i < 32; i++)
9315 if (!call_really_used_regs[i])
9316 regs_ever_clobbered[i] = 1;
9317
9318 FOR_EACH_BB_FN (cur_bb, cfun)
9319 {
9320 FOR_BB_INSNS (cur_bb, cur_insn)
9321 {
9322 rtx pat;
9323
9324 if (!INSN_P (cur_insn))
9325 continue;
9326
9327 pat = PATTERN (cur_insn);
9328
9329 /* Ignore GPR restore insns. */
9330 if (epilogue_completed && RTX_FRAME_RELATED_P (cur_insn))
9331 {
9332 if (GET_CODE (pat) == SET
9333 && GENERAL_REG_P (SET_DEST (pat)))
9334 {
9335 /* lgdr */
9336 if (GET_MODE (SET_SRC (pat)) == DImode
9337 && FP_REG_P (SET_SRC (pat)))
9338 continue;
9339
9340 /* l / lg */
9341 if (GET_CODE (SET_SRC (pat)) == MEM)
9342 continue;
9343 }
9344
9345 /* lm / lmg */
9346 if (GET_CODE (pat) == PARALLEL
9347 && load_multiple_operation (pat, VOIDmode))
9348 continue;
9349 }
9350
9351 note_stores (pat,
9352 s390_reg_clobbered_rtx,
9353 regs_ever_clobbered);
9354 }
9355 }
9356 }
9357
9358 /* Determine the frame area which actually has to be accessed
9359 in the function epilogue. The values are stored at the
9360 given pointers AREA_BOTTOM (address of the lowest used stack
9361 address) and AREA_TOP (address of the first item which does
9362 not belong to the stack frame). */
9363
9364 static void
9365 s390_frame_area (int *area_bottom, int *area_top)
9366 {
9367 int b, t;
9368
9369 b = INT_MAX;
9370 t = INT_MIN;
9371
9372 if (cfun_frame_layout.first_restore_gpr != -1)
9373 {
9374 b = (cfun_frame_layout.gprs_offset
9375 + cfun_frame_layout.first_restore_gpr * UNITS_PER_LONG);
9376 t = b + (cfun_frame_layout.last_restore_gpr
9377 - cfun_frame_layout.first_restore_gpr + 1) * UNITS_PER_LONG;
9378 }
9379
9380 if (TARGET_64BIT && cfun_save_high_fprs_p)
9381 {
9382 b = MIN (b, cfun_frame_layout.f8_offset);
9383 t = MAX (t, (cfun_frame_layout.f8_offset
9384 + cfun_frame_layout.high_fprs * 8));
9385 }
9386
9387 if (!TARGET_64BIT)
9388 {
9389 if (cfun_fpr_save_p (FPR4_REGNUM))
9390 {
9391 b = MIN (b, cfun_frame_layout.f4_offset);
9392 t = MAX (t, cfun_frame_layout.f4_offset + 8);
9393 }
9394 if (cfun_fpr_save_p (FPR6_REGNUM))
9395 {
9396 b = MIN (b, cfun_frame_layout.f4_offset + 8);
9397 t = MAX (t, cfun_frame_layout.f4_offset + 16);
9398 }
9399 }
9400 *area_bottom = b;
9401 *area_top = t;
9402 }
9403 /* Update gpr_save_slots in the frame layout trying to make use of
9404 FPRs as GPR save slots.
9405 This is a helper routine of s390_register_info. */
9406
9407 static void
9408 s390_register_info_gprtofpr ()
9409 {
9410 int save_reg_slot = FPR0_REGNUM;
9411 int i, j;
9412
9413 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
9414 return;
9415
9416 for (i = 15; i >= 6; i--)
9417 {
9418 if (cfun_gpr_save_slot (i) == SAVE_SLOT_NONE)
9419 continue;
9420
9421 /* Advance to the next FP register which can be used as a
9422 GPR save slot. */
9423 while ((!call_really_used_regs[save_reg_slot]
9424 || df_regs_ever_live_p (save_reg_slot)
9425 || cfun_fpr_save_p (save_reg_slot))
9426 && FP_REGNO_P (save_reg_slot))
9427 save_reg_slot++;
9428 if (!FP_REGNO_P (save_reg_slot))
9429 {
9430 /* We only want to use ldgr/lgdr if we can get rid of
9431 stm/lm entirely. So undo the gpr slot allocation in
9432 case we ran out of FPR save slots. */
9433 for (j = 6; j <= 15; j++)
9434 if (FP_REGNO_P (cfun_gpr_save_slot (j)))
9435 cfun_gpr_save_slot (j) = SAVE_SLOT_STACK;
9436 break;
9437 }
9438 cfun_gpr_save_slot (i) = save_reg_slot++;
9439 }
9440 }
9441
9442 /* Set the bits in fpr_bitmap for FPRs which need to be saved due to
9443 stdarg.
9444 This is a helper routine for s390_register_info. */
9445
9446 static void
9447 s390_register_info_stdarg_fpr ()
9448 {
9449 int i;
9450 int min_fpr;
9451 int max_fpr;
9452
9453 /* Save the FP argument regs for stdarg. f0, f2 for 31 bit and
9454 f0-f4 for 64 bit. */
9455 if (!cfun->stdarg
9456 || !TARGET_HARD_FLOAT
9457 || !cfun->va_list_fpr_size
9458 || crtl->args.info.fprs >= FP_ARG_NUM_REG)
9459 return;
9460
9461 min_fpr = crtl->args.info.fprs;
9462 max_fpr = min_fpr + cfun->va_list_fpr_size - 1;
9463 if (max_fpr >= FP_ARG_NUM_REG)
9464 max_fpr = FP_ARG_NUM_REG - 1;
9465
9466 /* FPR argument regs start at f0. */
9467 min_fpr += FPR0_REGNUM;
9468 max_fpr += FPR0_REGNUM;
9469
9470 for (i = min_fpr; i <= max_fpr; i++)
9471 cfun_set_fpr_save (i);
9472 }
9473
9474 /* Reserve the GPR save slots for GPRs which need to be saved due to
9475 stdarg.
9476 This is a helper routine for s390_register_info. */
9477
9478 static void
9479 s390_register_info_stdarg_gpr ()
9480 {
9481 int i;
9482 int min_gpr;
9483 int max_gpr;
9484
9485 if (!cfun->stdarg
9486 || !cfun->va_list_gpr_size
9487 || crtl->args.info.gprs >= GP_ARG_NUM_REG)
9488 return;
9489
9490 min_gpr = crtl->args.info.gprs;
9491 max_gpr = min_gpr + cfun->va_list_gpr_size - 1;
9492 if (max_gpr >= GP_ARG_NUM_REG)
9493 max_gpr = GP_ARG_NUM_REG - 1;
9494
9495 /* GPR argument regs start at r2. */
9496 min_gpr += GPR2_REGNUM;
9497 max_gpr += GPR2_REGNUM;
9498
9499 /* If r6 was supposed to be saved into an FPR and now needs to go to
9500 the stack for vararg we have to adjust the restore range to make
9501 sure that the restore is done from stack as well. */
9502 if (FP_REGNO_P (cfun_gpr_save_slot (GPR6_REGNUM))
9503 && min_gpr <= GPR6_REGNUM
9504 && max_gpr >= GPR6_REGNUM)
9505 {
9506 if (cfun_frame_layout.first_restore_gpr == -1
9507 || cfun_frame_layout.first_restore_gpr > GPR6_REGNUM)
9508 cfun_frame_layout.first_restore_gpr = GPR6_REGNUM;
9509 if (cfun_frame_layout.last_restore_gpr == -1
9510 || cfun_frame_layout.last_restore_gpr < GPR6_REGNUM)
9511 cfun_frame_layout.last_restore_gpr = GPR6_REGNUM;
9512 }
9513
9514 if (cfun_frame_layout.first_save_gpr == -1
9515 || cfun_frame_layout.first_save_gpr > min_gpr)
9516 cfun_frame_layout.first_save_gpr = min_gpr;
9517
9518 if (cfun_frame_layout.last_save_gpr == -1
9519 || cfun_frame_layout.last_save_gpr < max_gpr)
9520 cfun_frame_layout.last_save_gpr = max_gpr;
9521
9522 for (i = min_gpr; i <= max_gpr; i++)
9523 cfun_gpr_save_slot (i) = SAVE_SLOT_STACK;
9524 }
9525
9526 /* Calculate the save and restore ranges for stm(g) and lm(g) in the
9527 prologue and epilogue. */
9528
9529 static void
9530 s390_register_info_set_ranges ()
9531 {
9532 int i, j;
9533
9534 /* Find the first and the last save slot supposed to use the stack
9535 to set the restore range.
9536 Vararg regs might be marked as save to stack but only the
9537 call-saved regs really need restoring (i.e. r6). This code
9538 assumes that the vararg regs have not yet been recorded in
9539 cfun_gpr_save_slot. */
9540 for (i = 0; i < 16 && cfun_gpr_save_slot (i) != SAVE_SLOT_STACK; i++);
9541 for (j = 15; j > i && cfun_gpr_save_slot (j) != SAVE_SLOT_STACK; j--);
9542 cfun_frame_layout.first_restore_gpr = (i == 16) ? -1 : i;
9543 cfun_frame_layout.last_restore_gpr = (i == 16) ? -1 : j;
9544 cfun_frame_layout.first_save_gpr = (i == 16) ? -1 : i;
9545 cfun_frame_layout.last_save_gpr = (i == 16) ? -1 : j;
9546 }
9547
9548 /* The GPR and FPR save slots in cfun->machine->frame_layout are set
9549 for registers which need to be saved in function prologue.
9550 This function can be used until the insns emitted for save/restore
9551 of the regs are visible in the RTL stream. */
9552
9553 static void
9554 s390_register_info ()
9555 {
9556 int i;
9557 char clobbered_regs[32];
9558
9559 gcc_assert (!epilogue_completed);
9560
9561 if (reload_completed)
9562 /* After reload we rely on our own routine to determine which
9563 registers need saving. */
9564 s390_regs_ever_clobbered (clobbered_regs);
9565 else
9566 /* During reload we use regs_ever_live as a base since reload
9567 does changes in there which we otherwise would not be aware
9568 of. */
9569 for (i = 0; i < 32; i++)
9570 clobbered_regs[i] = df_regs_ever_live_p (i);
9571
9572 for (i = 0; i < 32; i++)
9573 clobbered_regs[i] = clobbered_regs[i] && !global_regs[i];
9574
9575 /* Mark the call-saved FPRs which need to be saved.
9576 This needs to be done before checking the special GPRs since the
9577 stack pointer usage depends on whether high FPRs have to be saved
9578 or not. */
9579 cfun_frame_layout.fpr_bitmap = 0;
9580 cfun_frame_layout.high_fprs = 0;
9581 for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
9582 if (clobbered_regs[i] && !call_really_used_regs[i])
9583 {
9584 cfun_set_fpr_save (i);
9585 if (i >= FPR8_REGNUM)
9586 cfun_frame_layout.high_fprs++;
9587 }
9588
9589 /* Register 12 is used for GOT address, but also as temp in prologue
9590 for split-stack stdarg functions (unless r14 is available). */
9591 clobbered_regs[12]
9592 |= ((flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
9593 || (flag_split_stack && cfun->stdarg
9594 && (crtl->is_leaf || TARGET_TPF_PROFILING
9595 || has_hard_reg_initial_val (Pmode, RETURN_REGNUM))));
9596
9597 clobbered_regs[BASE_REGNUM]
9598 |= (cfun->machine->base_reg
9599 && REGNO (cfun->machine->base_reg) == BASE_REGNUM);
9600
9601 clobbered_regs[HARD_FRAME_POINTER_REGNUM]
9602 |= !!frame_pointer_needed;
9603
9604 /* On pre z900 machines this might take until machine dependent
9605 reorg to decide.
9606 save_return_addr_p will only be set on non-zarch machines so
9607 there is no risk that r14 goes into an FPR instead of a stack
9608 slot. */
9609 clobbered_regs[RETURN_REGNUM]
9610 |= (!crtl->is_leaf
9611 || TARGET_TPF_PROFILING
9612 || cfun->machine->split_branches_pending_p
9613 || cfun_frame_layout.save_return_addr_p
9614 || crtl->calls_eh_return);
9615
9616 clobbered_regs[STACK_POINTER_REGNUM]
9617 |= (!crtl->is_leaf
9618 || TARGET_TPF_PROFILING
9619 || cfun_save_high_fprs_p
9620 || get_frame_size () > 0
9621 || (reload_completed && cfun_frame_layout.frame_size > 0)
9622 || cfun->calls_alloca);
9623
9624 memset (cfun_frame_layout.gpr_save_slots, SAVE_SLOT_NONE, 16);
9625
9626 for (i = 6; i < 16; i++)
9627 if (clobbered_regs[i])
9628 cfun_gpr_save_slot (i) = SAVE_SLOT_STACK;
9629
9630 s390_register_info_stdarg_fpr ();
9631 s390_register_info_gprtofpr ();
9632 s390_register_info_set_ranges ();
9633 /* stdarg functions might need to save GPRs 2 to 6. This might
9634 override the GPR->FPR save decision made by
9635 s390_register_info_gprtofpr for r6 since vararg regs must go to
9636 the stack. */
9637 s390_register_info_stdarg_gpr ();
9638 }
9639
9640 /* This function is called by s390_optimize_prologue in order to get
9641 rid of unnecessary GPR save/restore instructions. The register info
9642 for the GPRs is re-computed and the ranges are re-calculated. */
9643
9644 static void
9645 s390_optimize_register_info ()
9646 {
9647 char clobbered_regs[32];
9648 int i;
9649
9650 gcc_assert (epilogue_completed);
9651 gcc_assert (!cfun->machine->split_branches_pending_p);
9652
9653 s390_regs_ever_clobbered (clobbered_regs);
9654
9655 for (i = 0; i < 32; i++)
9656 clobbered_regs[i] = clobbered_regs[i] && !global_regs[i];
9657
9658 /* There is still special treatment needed for cases invisible to
9659 s390_regs_ever_clobbered. */
9660 clobbered_regs[RETURN_REGNUM]
9661 |= (TARGET_TPF_PROFILING
9662 /* When expanding builtin_return_addr in ESA mode we do not
9663 know whether r14 will later be needed as scratch reg when
9664 doing branch splitting. So the builtin always accesses the
9665 r14 save slot and we need to stick to the save/restore
9666 decision for r14 even if it turns out that it didn't get
9667 clobbered. */
9668 || cfun_frame_layout.save_return_addr_p
9669 || crtl->calls_eh_return);
9670
9671 memset (cfun_frame_layout.gpr_save_slots, SAVE_SLOT_NONE, 6);
9672
9673 for (i = 6; i < 16; i++)
9674 if (!clobbered_regs[i])
9675 cfun_gpr_save_slot (i) = SAVE_SLOT_NONE;
9676
9677 s390_register_info_set_ranges ();
9678 s390_register_info_stdarg_gpr ();
9679 }
9680
9681 /* Fill cfun->machine with info about frame of current function. */
9682
9683 static void
9684 s390_frame_info (void)
9685 {
9686 HOST_WIDE_INT lowest_offset;
9687
9688 cfun_frame_layout.first_save_gpr_slot = cfun_frame_layout.first_save_gpr;
9689 cfun_frame_layout.last_save_gpr_slot = cfun_frame_layout.last_save_gpr;
9690
9691 /* The va_arg builtin uses a constant distance of 16 *
9692 UNITS_PER_LONG (r0-r15) to reach the FPRs from the reg_save_area
9693 pointer. So even if we are going to save the stack pointer in an
9694 FPR we need the stack space in order to keep the offsets
9695 correct. */
9696 if (cfun->stdarg && cfun_save_arg_fprs_p)
9697 {
9698 cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
9699
9700 if (cfun_frame_layout.first_save_gpr_slot == -1)
9701 cfun_frame_layout.first_save_gpr_slot = STACK_POINTER_REGNUM;
9702 }
9703
9704 cfun_frame_layout.frame_size = get_frame_size ();
9705 if (!TARGET_64BIT && cfun_frame_layout.frame_size > 0x7fff0000)
9706 fatal_error (input_location,
9707 "total size of local variables exceeds architecture limit");
9708
9709 if (!TARGET_PACKED_STACK)
9710 {
9711 /* Fixed stack layout. */
9712 cfun_frame_layout.backchain_offset = 0;
9713 cfun_frame_layout.f0_offset = 16 * UNITS_PER_LONG;
9714 cfun_frame_layout.f4_offset = cfun_frame_layout.f0_offset + 2 * 8;
9715 cfun_frame_layout.f8_offset = -cfun_frame_layout.high_fprs * 8;
9716 cfun_frame_layout.gprs_offset = (cfun_frame_layout.first_save_gpr_slot
9717 * UNITS_PER_LONG);
9718 }
9719 else if (TARGET_BACKCHAIN)
9720 {
9721 /* Kernel stack layout - packed stack, backchain, no float */
9722 gcc_assert (TARGET_SOFT_FLOAT);
9723 cfun_frame_layout.backchain_offset = (STACK_POINTER_OFFSET
9724 - UNITS_PER_LONG);
9725
9726 /* The distance between the backchain and the return address
9727 save slot must not change. So we always need a slot for the
9728 stack pointer which resides in between. */
9729 cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
9730
9731 cfun_frame_layout.gprs_offset
9732 = cfun_frame_layout.backchain_offset - cfun_gprs_save_area_size;
9733
9734 /* FPRs will not be saved. Nevertheless pick sane values to
9735 keep area calculations valid. */
9736 cfun_frame_layout.f0_offset =
9737 cfun_frame_layout.f4_offset =
9738 cfun_frame_layout.f8_offset = cfun_frame_layout.gprs_offset;
9739 }
9740 else
9741 {
9742 int num_fprs;
9743
9744 /* Packed stack layout without backchain. */
9745
9746 /* With stdarg FPRs need their dedicated slots. */
9747 num_fprs = (TARGET_64BIT && cfun->stdarg ? 2
9748 : (cfun_fpr_save_p (FPR4_REGNUM) +
9749 cfun_fpr_save_p (FPR6_REGNUM)));
9750 cfun_frame_layout.f4_offset = STACK_POINTER_OFFSET - 8 * num_fprs;
9751
9752 num_fprs = (cfun->stdarg ? 2
9753 : (cfun_fpr_save_p (FPR0_REGNUM)
9754 + cfun_fpr_save_p (FPR2_REGNUM)));
9755 cfun_frame_layout.f0_offset = cfun_frame_layout.f4_offset - 8 * num_fprs;
9756
9757 cfun_frame_layout.gprs_offset
9758 = cfun_frame_layout.f0_offset - cfun_gprs_save_area_size;
9759
9760 cfun_frame_layout.f8_offset = (cfun_frame_layout.gprs_offset
9761 - cfun_frame_layout.high_fprs * 8);
9762 }
9763
9764 if (cfun_save_high_fprs_p)
9765 cfun_frame_layout.frame_size += cfun_frame_layout.high_fprs * 8;
9766
9767 if (!crtl->is_leaf)
9768 cfun_frame_layout.frame_size += crtl->outgoing_args_size;
9769
9770 /* In the following cases we have to allocate a STACK_POINTER_OFFSET
9771 sized area at the bottom of the stack. This is required also for
9772 leaf functions. When GCC generates a local stack reference it
9773 will always add STACK_POINTER_OFFSET to all these references. */
9774 if (crtl->is_leaf
9775 && !TARGET_TPF_PROFILING
9776 && cfun_frame_layout.frame_size == 0
9777 && !cfun->calls_alloca)
9778 return;
9779
9780 /* Calculate the number of bytes we have used in our own register
9781 save area. With the packed stack layout we can re-use the
9782 remaining bytes for normal stack elements. */
9783
9784 if (TARGET_PACKED_STACK)
9785 lowest_offset = MIN (MIN (cfun_frame_layout.f0_offset,
9786 cfun_frame_layout.f4_offset),
9787 cfun_frame_layout.gprs_offset);
9788 else
9789 lowest_offset = 0;
9790
9791 if (TARGET_BACKCHAIN)
9792 lowest_offset = MIN (lowest_offset, cfun_frame_layout.backchain_offset);
9793
9794 cfun_frame_layout.frame_size += STACK_POINTER_OFFSET - lowest_offset;
9795
9796 /* If under 31 bit an odd number of gprs has to be saved we have to
9797 adjust the frame size to sustain 8 byte alignment of stack
9798 frames. */
9799 cfun_frame_layout.frame_size = ((cfun_frame_layout.frame_size +
9800 STACK_BOUNDARY / BITS_PER_UNIT - 1)
9801 & ~(STACK_BOUNDARY / BITS_PER_UNIT - 1));
9802 }
9803
9804 /* Generate frame layout. Fills in register and frame data for the current
9805 function in cfun->machine. This routine can be called multiple times;
9806 it will re-do the complete frame layout every time. */
9807
9808 static void
9809 s390_init_frame_layout (void)
9810 {
9811 HOST_WIDE_INT frame_size;
9812 int base_used;
9813
9814 /* After LRA the frame layout is supposed to be read-only and should
9815 not be re-computed. */
9816 if (reload_completed)
9817 return;
9818
9819 /* On S/390 machines, we may need to perform branch splitting, which
9820 will require both base and return address register. We have no
9821 choice but to assume we're going to need them until right at the
9822 end of the machine dependent reorg phase. */
9823 if (!TARGET_CPU_ZARCH)
9824 cfun->machine->split_branches_pending_p = true;
9825
9826 do
9827 {
9828 frame_size = cfun_frame_layout.frame_size;
9829
9830 /* Try to predict whether we'll need the base register. */
9831 base_used = cfun->machine->split_branches_pending_p
9832 || crtl->uses_const_pool
9833 || (!DISP_IN_RANGE (frame_size)
9834 && !CONST_OK_FOR_K (frame_size));
9835
9836 /* Decide which register to use as literal pool base. In small
9837 leaf functions, try to use an unused call-clobbered register
9838 as base register to avoid save/restore overhead. */
9839 if (!base_used)
9840 cfun->machine->base_reg = NULL_RTX;
9841 else
9842 {
9843 int br = 0;
9844
9845 if (crtl->is_leaf)
9846 /* Prefer r5 (most likely to be free). */
9847 for (br = 5; br >= 2 && df_regs_ever_live_p (br); br--)
9848 ;
9849 cfun->machine->base_reg =
9850 gen_rtx_REG (Pmode, (br >= 2) ? br : BASE_REGNUM);
9851 }
9852
9853 s390_register_info ();
9854 s390_frame_info ();
9855 }
9856 while (frame_size != cfun_frame_layout.frame_size);
9857 }
9858
9859 /* Remove the FPR clobbers from a tbegin insn if it can be proven that
9860 the TX is nonescaping. A transaction is considered escaping if
9861 there is at least one path from tbegin returning CC0 to the
9862 function exit block without an tend.
9863
9864 The check so far has some limitations:
9865 - only single tbegin/tend BBs are supported
9866 - the first cond jump after tbegin must separate the CC0 path from ~CC0
9867 - when CC is copied to a GPR and the CC0 check is done with the GPR
9868 this is not supported
9869 */
9870
9871 static void
9872 s390_optimize_nonescaping_tx (void)
9873 {
9874 const unsigned int CC0 = 1 << 3;
9875 basic_block tbegin_bb = NULL;
9876 basic_block tend_bb = NULL;
9877 basic_block bb;
9878 rtx_insn *insn;
9879 bool result = true;
9880 int bb_index;
9881 rtx_insn *tbegin_insn = NULL;
9882
9883 if (!cfun->machine->tbegin_p)
9884 return;
9885
9886 for (bb_index = 0; bb_index < n_basic_blocks_for_fn (cfun); bb_index++)
9887 {
9888 bb = BASIC_BLOCK_FOR_FN (cfun, bb_index);
9889
9890 if (!bb)
9891 continue;
9892
9893 FOR_BB_INSNS (bb, insn)
9894 {
9895 rtx ite, cc, pat, target;
9896 unsigned HOST_WIDE_INT mask;
9897
9898 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
9899 continue;
9900
9901 pat = PATTERN (insn);
9902
9903 if (GET_CODE (pat) == PARALLEL)
9904 pat = XVECEXP (pat, 0, 0);
9905
9906 if (GET_CODE (pat) != SET
9907 || GET_CODE (SET_SRC (pat)) != UNSPEC_VOLATILE)
9908 continue;
9909
9910 if (XINT (SET_SRC (pat), 1) == UNSPECV_TBEGIN)
9911 {
9912 rtx_insn *tmp;
9913
9914 tbegin_insn = insn;
9915
9916 /* Just return if the tbegin doesn't have clobbers. */
9917 if (GET_CODE (PATTERN (insn)) != PARALLEL)
9918 return;
9919
9920 if (tbegin_bb != NULL)
9921 return;
9922
9923 /* Find the next conditional jump. */
9924 for (tmp = NEXT_INSN (insn);
9925 tmp != NULL_RTX;
9926 tmp = NEXT_INSN (tmp))
9927 {
9928 if (reg_set_p (gen_rtx_REG (CCmode, CC_REGNUM), tmp))
9929 return;
9930 if (!JUMP_P (tmp))
9931 continue;
9932
9933 ite = SET_SRC (PATTERN (tmp));
9934 if (GET_CODE (ite) != IF_THEN_ELSE)
9935 continue;
9936
9937 cc = XEXP (XEXP (ite, 0), 0);
9938 if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc))
9939 || GET_MODE (cc) != CCRAWmode
9940 || GET_CODE (XEXP (XEXP (ite, 0), 1)) != CONST_INT)
9941 return;
9942
9943 if (bb->succs->length () != 2)
9944 return;
9945
9946 mask = INTVAL (XEXP (XEXP (ite, 0), 1));
9947 if (GET_CODE (XEXP (ite, 0)) == NE)
9948 mask ^= 0xf;
9949
9950 if (mask == CC0)
9951 target = XEXP (ite, 1);
9952 else if (mask == (CC0 ^ 0xf))
9953 target = XEXP (ite, 2);
9954 else
9955 return;
9956
9957 {
9958 edge_iterator ei;
9959 edge e1, e2;
9960
9961 ei = ei_start (bb->succs);
9962 e1 = ei_safe_edge (ei);
9963 ei_next (&ei);
9964 e2 = ei_safe_edge (ei);
9965
9966 if (e2->flags & EDGE_FALLTHRU)
9967 {
9968 e2 = e1;
9969 e1 = ei_safe_edge (ei);
9970 }
9971
9972 if (!(e1->flags & EDGE_FALLTHRU))
9973 return;
9974
9975 tbegin_bb = (target == pc_rtx) ? e1->dest : e2->dest;
9976 }
9977 if (tmp == BB_END (bb))
9978 break;
9979 }
9980 }
9981
9982 if (XINT (SET_SRC (pat), 1) == UNSPECV_TEND)
9983 {
9984 if (tend_bb != NULL)
9985 return;
9986 tend_bb = bb;
9987 }
9988 }
9989 }
9990
9991 /* Either we successfully remove the FPR clobbers here or we are not
9992 able to do anything for this TX. Both cases don't qualify for
9993 another look. */
9994 cfun->machine->tbegin_p = false;
9995
9996 if (tbegin_bb == NULL || tend_bb == NULL)
9997 return;
9998
9999 calculate_dominance_info (CDI_POST_DOMINATORS);
10000 result = dominated_by_p (CDI_POST_DOMINATORS, tbegin_bb, tend_bb);
10001 free_dominance_info (CDI_POST_DOMINATORS);
10002
10003 if (!result)
10004 return;
10005
10006 PATTERN (tbegin_insn) = gen_rtx_PARALLEL (VOIDmode,
10007 gen_rtvec (2,
10008 XVECEXP (PATTERN (tbegin_insn), 0, 0),
10009 XVECEXP (PATTERN (tbegin_insn), 0, 1)));
10010 INSN_CODE (tbegin_insn) = -1;
10011 df_insn_rescan (tbegin_insn);
10012
10013 return;
10014 }
10015
10016 /* Return true if it is legal to put a value with MODE into REGNO. */
10017
10018 bool
10019 s390_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
10020 {
10021 if (!TARGET_VX && VECTOR_NOFP_REGNO_P (regno))
10022 return false;
10023
10024 switch (REGNO_REG_CLASS (regno))
10025 {
10026 case VEC_REGS:
10027 return ((GET_MODE_CLASS (mode) == MODE_INT
10028 && s390_class_max_nregs (VEC_REGS, mode) == 1)
10029 || mode == DFmode
10030 || s390_vector_mode_supported_p (mode));
10031 break;
10032 case FP_REGS:
10033 if (TARGET_VX
10034 && ((GET_MODE_CLASS (mode) == MODE_INT
10035 && s390_class_max_nregs (FP_REGS, mode) == 1)
10036 || mode == DFmode
10037 || s390_vector_mode_supported_p (mode)))
10038 return true;
10039
10040 if (REGNO_PAIR_OK (regno, mode))
10041 {
10042 if (mode == SImode || mode == DImode)
10043 return true;
10044
10045 if (FLOAT_MODE_P (mode) && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
10046 return true;
10047 }
10048 break;
10049 case ADDR_REGS:
10050 if (FRAME_REGNO_P (regno) && mode == Pmode)
10051 return true;
10052
10053 /* fallthrough */
10054 case GENERAL_REGS:
10055 if (REGNO_PAIR_OK (regno, mode))
10056 {
10057 if (TARGET_ZARCH
10058 || (mode != TFmode && mode != TCmode && mode != TDmode))
10059 return true;
10060 }
10061 break;
10062 case CC_REGS:
10063 if (GET_MODE_CLASS (mode) == MODE_CC)
10064 return true;
10065 break;
10066 case ACCESS_REGS:
10067 if (REGNO_PAIR_OK (regno, mode))
10068 {
10069 if (mode == SImode || mode == Pmode)
10070 return true;
10071 }
10072 break;
10073 default:
10074 return false;
10075 }
10076
10077 return false;
10078 }
10079
10080 /* Return nonzero if register OLD_REG can be renamed to register NEW_REG. */
10081
10082 bool
10083 s390_hard_regno_rename_ok (unsigned int old_reg, unsigned int new_reg)
10084 {
10085 /* Once we've decided upon a register to use as base register, it must
10086 no longer be used for any other purpose. */
10087 if (cfun->machine->base_reg)
10088 if (REGNO (cfun->machine->base_reg) == old_reg
10089 || REGNO (cfun->machine->base_reg) == new_reg)
10090 return false;
10091
10092 /* Prevent regrename from using call-saved regs which haven't
10093 actually been saved. This is necessary since regrename assumes
10094 the backend save/restore decisions are based on
10095 df_regs_ever_live. Since we have our own routine we have to tell
10096 regrename manually about it. */
10097 if (GENERAL_REGNO_P (new_reg)
10098 && !call_really_used_regs[new_reg]
10099 && cfun_gpr_save_slot (new_reg) == SAVE_SLOT_NONE)
10100 return false;
10101
10102 return true;
10103 }
10104
10105 /* Return nonzero if register REGNO can be used as a scratch register
10106 in peephole2. */
10107
10108 static bool
10109 s390_hard_regno_scratch_ok (unsigned int regno)
10110 {
10111 /* See s390_hard_regno_rename_ok. */
10112 if (GENERAL_REGNO_P (regno)
10113 && !call_really_used_regs[regno]
10114 && cfun_gpr_save_slot (regno) == SAVE_SLOT_NONE)
10115 return false;
10116
10117 return true;
10118 }
10119
10120 /* Maximum number of registers to represent a value of mode MODE
10121 in a register of class RCLASS. */
10122
10123 int
10124 s390_class_max_nregs (enum reg_class rclass, machine_mode mode)
10125 {
10126 int reg_size;
10127 bool reg_pair_required_p = false;
10128
10129 switch (rclass)
10130 {
10131 case FP_REGS:
10132 case VEC_REGS:
10133 reg_size = TARGET_VX ? 16 : 8;
10134
10135 /* TF and TD modes would fit into a VR but we put them into a
10136 register pair since we do not have 128bit FP instructions on
10137 full VRs. */
10138 if (TARGET_VX
10139 && SCALAR_FLOAT_MODE_P (mode)
10140 && GET_MODE_SIZE (mode) >= 16)
10141 reg_pair_required_p = true;
10142
10143 /* Even if complex types would fit into a single FPR/VR we force
10144 them into a register pair to deal with the parts more easily.
10145 (FIXME: What about complex ints?) */
10146 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
10147 reg_pair_required_p = true;
10148 break;
10149 case ACCESS_REGS:
10150 reg_size = 4;
10151 break;
10152 default:
10153 reg_size = UNITS_PER_WORD;
10154 break;
10155 }
10156
10157 if (reg_pair_required_p)
10158 return 2 * ((GET_MODE_SIZE (mode) / 2 + reg_size - 1) / reg_size);
10159
10160 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
10161 }
10162
10163 /* Return TRUE if changing mode from FROM to TO should not be allowed
10164 for register class CLASS. */
10165
10166 int
10167 s390_cannot_change_mode_class (machine_mode from_mode,
10168 machine_mode to_mode,
10169 enum reg_class rclass)
10170 {
10171 machine_mode small_mode;
10172 machine_mode big_mode;
10173
10174 if (GET_MODE_SIZE (from_mode) == GET_MODE_SIZE (to_mode))
10175 return 0;
10176
10177 if (GET_MODE_SIZE (from_mode) < GET_MODE_SIZE (to_mode))
10178 {
10179 small_mode = from_mode;
10180 big_mode = to_mode;
10181 }
10182 else
10183 {
10184 small_mode = to_mode;
10185 big_mode = from_mode;
10186 }
10187
10188 /* Values residing in VRs are little-endian style. All modes are
10189 placed left-aligned in an VR. This means that we cannot allow
10190 switching between modes with differing sizes. Also if the vector
10191 facility is available we still place TFmode values in VR register
10192 pairs, since the only instructions we have operating on TFmodes
10193 only deal with register pairs. Therefore we have to allow DFmode
10194 subregs of TFmodes to enable the TFmode splitters. */
10195 if (reg_classes_intersect_p (VEC_REGS, rclass)
10196 && (GET_MODE_SIZE (small_mode) < 8
10197 || s390_class_max_nregs (VEC_REGS, big_mode) == 1))
10198 return 1;
10199
10200 /* Likewise for access registers, since they have only half the
10201 word size on 64-bit. */
10202 if (reg_classes_intersect_p (ACCESS_REGS, rclass))
10203 return 1;
10204
10205 return 0;
10206 }
10207
10208 /* Return true if we use LRA instead of reload pass. */
10209 static bool
10210 s390_lra_p (void)
10211 {
10212 return s390_lra_flag;
10213 }
10214
10215 /* Return true if register FROM can be eliminated via register TO. */
10216
10217 static bool
10218 s390_can_eliminate (const int from, const int to)
10219 {
10220 /* On zSeries machines, we have not marked the base register as fixed.
10221 Instead, we have an elimination rule BASE_REGNUM -> BASE_REGNUM.
10222 If a function requires the base register, we say here that this
10223 elimination cannot be performed. This will cause reload to free
10224 up the base register (as if it were fixed). On the other hand,
10225 if the current function does *not* require the base register, we
10226 say here the elimination succeeds, which in turn allows reload
10227 to allocate the base register for any other purpose. */
10228 if (from == BASE_REGNUM && to == BASE_REGNUM)
10229 {
10230 if (TARGET_CPU_ZARCH)
10231 {
10232 s390_init_frame_layout ();
10233 return cfun->machine->base_reg == NULL_RTX;
10234 }
10235
10236 return false;
10237 }
10238
10239 /* Everything else must point into the stack frame. */
10240 gcc_assert (to == STACK_POINTER_REGNUM
10241 || to == HARD_FRAME_POINTER_REGNUM);
10242
10243 gcc_assert (from == FRAME_POINTER_REGNUM
10244 || from == ARG_POINTER_REGNUM
10245 || from == RETURN_ADDRESS_POINTER_REGNUM);
10246
10247 /* Make sure we actually saved the return address. */
10248 if (from == RETURN_ADDRESS_POINTER_REGNUM)
10249 if (!crtl->calls_eh_return
10250 && !cfun->stdarg
10251 && !cfun_frame_layout.save_return_addr_p)
10252 return false;
10253
10254 return true;
10255 }
10256
10257 /* Return offset between register FROM and TO initially after prolog. */
10258
10259 HOST_WIDE_INT
10260 s390_initial_elimination_offset (int from, int to)
10261 {
10262 HOST_WIDE_INT offset;
10263
10264 /* ??? Why are we called for non-eliminable pairs? */
10265 if (!s390_can_eliminate (from, to))
10266 return 0;
10267
10268 switch (from)
10269 {
10270 case FRAME_POINTER_REGNUM:
10271 offset = (get_frame_size()
10272 + STACK_POINTER_OFFSET
10273 + crtl->outgoing_args_size);
10274 break;
10275
10276 case ARG_POINTER_REGNUM:
10277 s390_init_frame_layout ();
10278 offset = cfun_frame_layout.frame_size + STACK_POINTER_OFFSET;
10279 break;
10280
10281 case RETURN_ADDRESS_POINTER_REGNUM:
10282 s390_init_frame_layout ();
10283
10284 if (cfun_frame_layout.first_save_gpr_slot == -1)
10285 {
10286 /* If it turns out that for stdarg nothing went into the reg
10287 save area we also do not need the return address
10288 pointer. */
10289 if (cfun->stdarg && !cfun_save_arg_fprs_p)
10290 return 0;
10291
10292 gcc_unreachable ();
10293 }
10294
10295 /* In order to make the following work it is not necessary for
10296 r14 to have a save slot. It is sufficient if one other GPR
10297 got one. Since the GPRs are always stored without gaps we
10298 are able to calculate where the r14 save slot would
10299 reside. */
10300 offset = (cfun_frame_layout.frame_size + cfun_frame_layout.gprs_offset +
10301 (RETURN_REGNUM - cfun_frame_layout.first_save_gpr_slot) *
10302 UNITS_PER_LONG);
10303 break;
10304
10305 case BASE_REGNUM:
10306 offset = 0;
10307 break;
10308
10309 default:
10310 gcc_unreachable ();
10311 }
10312
10313 return offset;
10314 }
10315
10316 /* Emit insn to save fpr REGNUM at offset OFFSET relative
10317 to register BASE. Return generated insn. */
10318
10319 static rtx
10320 save_fpr (rtx base, int offset, int regnum)
10321 {
10322 rtx addr;
10323 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
10324
10325 if (regnum >= 16 && regnum <= (16 + FP_ARG_NUM_REG))
10326 set_mem_alias_set (addr, get_varargs_alias_set ());
10327 else
10328 set_mem_alias_set (addr, get_frame_alias_set ());
10329
10330 return emit_move_insn (addr, gen_rtx_REG (DFmode, regnum));
10331 }
10332
10333 /* Emit insn to restore fpr REGNUM from offset OFFSET relative
10334 to register BASE. Return generated insn. */
10335
10336 static rtx
10337 restore_fpr (rtx base, int offset, int regnum)
10338 {
10339 rtx addr;
10340 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
10341 set_mem_alias_set (addr, get_frame_alias_set ());
10342
10343 return emit_move_insn (gen_rtx_REG (DFmode, regnum), addr);
10344 }
10345
10346 /* Return true if REGNO is a global register, but not one
10347 of the special ones that need to be saved/restored in anyway. */
10348
10349 static inline bool
10350 global_not_special_regno_p (int regno)
10351 {
10352 return (global_regs[regno]
10353 /* These registers are special and need to be
10354 restored in any case. */
10355 && !(regno == STACK_POINTER_REGNUM
10356 || regno == RETURN_REGNUM
10357 || regno == BASE_REGNUM
10358 || (flag_pic && regno == (int)PIC_OFFSET_TABLE_REGNUM)));
10359 }
10360
10361 /* Generate insn to save registers FIRST to LAST into
10362 the register save area located at offset OFFSET
10363 relative to register BASE. */
10364
10365 static rtx
10366 save_gprs (rtx base, int offset, int first, int last)
10367 {
10368 rtx addr, insn, note;
10369 int i;
10370
10371 addr = plus_constant (Pmode, base, offset);
10372 addr = gen_rtx_MEM (Pmode, addr);
10373
10374 set_mem_alias_set (addr, get_frame_alias_set ());
10375
10376 /* Special-case single register. */
10377 if (first == last)
10378 {
10379 if (TARGET_64BIT)
10380 insn = gen_movdi (addr, gen_rtx_REG (Pmode, first));
10381 else
10382 insn = gen_movsi (addr, gen_rtx_REG (Pmode, first));
10383
10384 if (!global_not_special_regno_p (first))
10385 RTX_FRAME_RELATED_P (insn) = 1;
10386 return insn;
10387 }
10388
10389
10390 insn = gen_store_multiple (addr,
10391 gen_rtx_REG (Pmode, first),
10392 GEN_INT (last - first + 1));
10393
10394 if (first <= 6 && cfun->stdarg)
10395 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
10396 {
10397 rtx mem = XEXP (XVECEXP (PATTERN (insn), 0, i), 0);
10398
10399 if (first + i <= 6)
10400 set_mem_alias_set (mem, get_varargs_alias_set ());
10401 }
10402
10403 /* We need to set the FRAME_RELATED flag on all SETs
10404 inside the store-multiple pattern.
10405
10406 However, we must not emit DWARF records for registers 2..5
10407 if they are stored for use by variable arguments ...
10408
10409 ??? Unfortunately, it is not enough to simply not the
10410 FRAME_RELATED flags for those SETs, because the first SET
10411 of the PARALLEL is always treated as if it had the flag
10412 set, even if it does not. Therefore we emit a new pattern
10413 without those registers as REG_FRAME_RELATED_EXPR note. */
10414
10415 if (first >= 6 && !global_not_special_regno_p (first))
10416 {
10417 rtx pat = PATTERN (insn);
10418
10419 for (i = 0; i < XVECLEN (pat, 0); i++)
10420 if (GET_CODE (XVECEXP (pat, 0, i)) == SET
10421 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (pat,
10422 0, i)))))
10423 RTX_FRAME_RELATED_P (XVECEXP (pat, 0, i)) = 1;
10424
10425 RTX_FRAME_RELATED_P (insn) = 1;
10426 }
10427 else if (last >= 6)
10428 {
10429 int start;
10430
10431 for (start = first >= 6 ? first : 6; start <= last; start++)
10432 if (!global_not_special_regno_p (start))
10433 break;
10434
10435 if (start > last)
10436 return insn;
10437
10438 addr = plus_constant (Pmode, base,
10439 offset + (start - first) * UNITS_PER_LONG);
10440
10441 if (start == last)
10442 {
10443 if (TARGET_64BIT)
10444 note = gen_movdi (gen_rtx_MEM (Pmode, addr),
10445 gen_rtx_REG (Pmode, start));
10446 else
10447 note = gen_movsi (gen_rtx_MEM (Pmode, addr),
10448 gen_rtx_REG (Pmode, start));
10449 note = PATTERN (note);
10450
10451 add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
10452 RTX_FRAME_RELATED_P (insn) = 1;
10453
10454 return insn;
10455 }
10456
10457 note = gen_store_multiple (gen_rtx_MEM (Pmode, addr),
10458 gen_rtx_REG (Pmode, start),
10459 GEN_INT (last - start + 1));
10460 note = PATTERN (note);
10461
10462 add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
10463
10464 for (i = 0; i < XVECLEN (note, 0); i++)
10465 if (GET_CODE (XVECEXP (note, 0, i)) == SET
10466 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (note,
10467 0, i)))))
10468 RTX_FRAME_RELATED_P (XVECEXP (note, 0, i)) = 1;
10469
10470 RTX_FRAME_RELATED_P (insn) = 1;
10471 }
10472
10473 return insn;
10474 }
10475
10476 /* Generate insn to restore registers FIRST to LAST from
10477 the register save area located at offset OFFSET
10478 relative to register BASE. */
10479
10480 static rtx
10481 restore_gprs (rtx base, int offset, int first, int last)
10482 {
10483 rtx addr, insn;
10484
10485 addr = plus_constant (Pmode, base, offset);
10486 addr = gen_rtx_MEM (Pmode, addr);
10487 set_mem_alias_set (addr, get_frame_alias_set ());
10488
10489 /* Special-case single register. */
10490 if (first == last)
10491 {
10492 if (TARGET_64BIT)
10493 insn = gen_movdi (gen_rtx_REG (Pmode, first), addr);
10494 else
10495 insn = gen_movsi (gen_rtx_REG (Pmode, first), addr);
10496
10497 RTX_FRAME_RELATED_P (insn) = 1;
10498 return insn;
10499 }
10500
10501 insn = gen_load_multiple (gen_rtx_REG (Pmode, first),
10502 addr,
10503 GEN_INT (last - first + 1));
10504 RTX_FRAME_RELATED_P (insn) = 1;
10505 return insn;
10506 }
10507
10508 /* Return insn sequence to load the GOT register. */
10509
10510 static GTY(()) rtx got_symbol;
10511 rtx_insn *
10512 s390_load_got (void)
10513 {
10514 rtx_insn *insns;
10515
10516 /* We cannot use pic_offset_table_rtx here since we use this
10517 function also for non-pic if __tls_get_offset is called and in
10518 that case PIC_OFFSET_TABLE_REGNUM as well as pic_offset_table_rtx
10519 aren't usable. */
10520 rtx got_rtx = gen_rtx_REG (Pmode, 12);
10521
10522 if (!got_symbol)
10523 {
10524 got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
10525 SYMBOL_REF_FLAGS (got_symbol) = SYMBOL_FLAG_LOCAL;
10526 }
10527
10528 start_sequence ();
10529
10530 if (TARGET_CPU_ZARCH)
10531 {
10532 emit_move_insn (got_rtx, got_symbol);
10533 }
10534 else
10535 {
10536 rtx offset;
10537
10538 offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, got_symbol),
10539 UNSPEC_LTREL_OFFSET);
10540 offset = gen_rtx_CONST (Pmode, offset);
10541 offset = force_const_mem (Pmode, offset);
10542
10543 emit_move_insn (got_rtx, offset);
10544
10545 offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (offset, 0)),
10546 UNSPEC_LTREL_BASE);
10547 offset = gen_rtx_PLUS (Pmode, got_rtx, offset);
10548
10549 emit_move_insn (got_rtx, offset);
10550 }
10551
10552 insns = get_insns ();
10553 end_sequence ();
10554 return insns;
10555 }
10556
10557 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
10558 and the change to the stack pointer. */
10559
10560 static void
10561 s390_emit_stack_tie (void)
10562 {
10563 rtx mem = gen_frame_mem (BLKmode,
10564 gen_rtx_REG (Pmode, STACK_POINTER_REGNUM));
10565
10566 emit_insn (gen_stack_tie (mem));
10567 }
10568
10569 /* Copy GPRS into FPR save slots. */
10570
10571 static void
10572 s390_save_gprs_to_fprs (void)
10573 {
10574 int i;
10575
10576 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
10577 return;
10578
10579 for (i = 6; i < 16; i++)
10580 {
10581 if (FP_REGNO_P (cfun_gpr_save_slot (i)))
10582 {
10583 rtx_insn *insn =
10584 emit_move_insn (gen_rtx_REG (DImode, cfun_gpr_save_slot (i)),
10585 gen_rtx_REG (DImode, i));
10586 RTX_FRAME_RELATED_P (insn) = 1;
10587 /* This prevents dwarf2cfi from interpreting the set. Doing
10588 so it might emit def_cfa_register infos setting an FPR as
10589 new CFA. */
10590 add_reg_note (insn, REG_CFA_REGISTER, PATTERN (insn));
10591 }
10592 }
10593 }
10594
10595 /* Restore GPRs from FPR save slots. */
10596
10597 static void
10598 s390_restore_gprs_from_fprs (void)
10599 {
10600 int i;
10601
10602 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
10603 return;
10604
10605 for (i = 6; i < 16; i++)
10606 {
10607 rtx_insn *insn;
10608
10609 if (!FP_REGNO_P (cfun_gpr_save_slot (i)))
10610 continue;
10611
10612 rtx fpr = gen_rtx_REG (DImode, cfun_gpr_save_slot (i));
10613
10614 if (i == STACK_POINTER_REGNUM)
10615 insn = emit_insn (gen_stack_restore_from_fpr (fpr));
10616 else
10617 insn = emit_move_insn (gen_rtx_REG (DImode, i), fpr);
10618
10619 df_set_regs_ever_live (i, true);
10620 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, i));
10621 if (i == STACK_POINTER_REGNUM)
10622 add_reg_note (insn, REG_CFA_DEF_CFA,
10623 plus_constant (Pmode, stack_pointer_rtx,
10624 STACK_POINTER_OFFSET));
10625 RTX_FRAME_RELATED_P (insn) = 1;
10626 }
10627 }
10628
10629
10630 /* A pass run immediately before shrink-wrapping and prologue and epilogue
10631 generation. */
10632
10633 namespace {
10634
10635 const pass_data pass_data_s390_early_mach =
10636 {
10637 RTL_PASS, /* type */
10638 "early_mach", /* name */
10639 OPTGROUP_NONE, /* optinfo_flags */
10640 TV_MACH_DEP, /* tv_id */
10641 0, /* properties_required */
10642 0, /* properties_provided */
10643 0, /* properties_destroyed */
10644 0, /* todo_flags_start */
10645 ( TODO_df_verify | TODO_df_finish ), /* todo_flags_finish */
10646 };
10647
10648 class pass_s390_early_mach : public rtl_opt_pass
10649 {
10650 public:
10651 pass_s390_early_mach (gcc::context *ctxt)
10652 : rtl_opt_pass (pass_data_s390_early_mach, ctxt)
10653 {}
10654
10655 /* opt_pass methods: */
10656 virtual unsigned int execute (function *);
10657
10658 }; // class pass_s390_early_mach
10659
10660 unsigned int
10661 pass_s390_early_mach::execute (function *fun)
10662 {
10663 rtx_insn *insn;
10664
10665 /* Try to get rid of the FPR clobbers. */
10666 s390_optimize_nonescaping_tx ();
10667
10668 /* Re-compute register info. */
10669 s390_register_info ();
10670
10671 /* If we're using a base register, ensure that it is always valid for
10672 the first non-prologue instruction. */
10673 if (fun->machine->base_reg)
10674 emit_insn_at_entry (gen_main_pool (fun->machine->base_reg));
10675
10676 /* Annotate all constant pool references to let the scheduler know
10677 they implicitly use the base register. */
10678 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
10679 if (INSN_P (insn))
10680 {
10681 annotate_constant_pool_refs (&PATTERN (insn));
10682 df_insn_rescan (insn);
10683 }
10684 return 0;
10685 }
10686
10687 } // anon namespace
10688
10689 /* Expand the prologue into a bunch of separate insns. */
10690
10691 void
10692 s390_emit_prologue (void)
10693 {
10694 rtx insn, addr;
10695 rtx temp_reg;
10696 int i;
10697 int offset;
10698 int next_fpr = 0;
10699
10700 /* Choose best register to use for temp use within prologue.
10701 TPF with profiling must avoid the register 14 - the tracing function
10702 needs the original contents of r14 to be preserved. */
10703
10704 if (!has_hard_reg_initial_val (Pmode, RETURN_REGNUM)
10705 && !crtl->is_leaf
10706 && !TARGET_TPF_PROFILING)
10707 temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
10708 else if (flag_split_stack && cfun->stdarg)
10709 temp_reg = gen_rtx_REG (Pmode, 12);
10710 else
10711 temp_reg = gen_rtx_REG (Pmode, 1);
10712
10713 s390_save_gprs_to_fprs ();
10714
10715 /* Save call saved gprs. */
10716 if (cfun_frame_layout.first_save_gpr != -1)
10717 {
10718 insn = save_gprs (stack_pointer_rtx,
10719 cfun_frame_layout.gprs_offset +
10720 UNITS_PER_LONG * (cfun_frame_layout.first_save_gpr
10721 - cfun_frame_layout.first_save_gpr_slot),
10722 cfun_frame_layout.first_save_gpr,
10723 cfun_frame_layout.last_save_gpr);
10724 emit_insn (insn);
10725 }
10726
10727 /* Dummy insn to mark literal pool slot. */
10728
10729 if (cfun->machine->base_reg)
10730 emit_insn (gen_main_pool (cfun->machine->base_reg));
10731
10732 offset = cfun_frame_layout.f0_offset;
10733
10734 /* Save f0 and f2. */
10735 for (i = FPR0_REGNUM; i <= FPR0_REGNUM + 1; i++)
10736 {
10737 if (cfun_fpr_save_p (i))
10738 {
10739 save_fpr (stack_pointer_rtx, offset, i);
10740 offset += 8;
10741 }
10742 else if (!TARGET_PACKED_STACK || cfun->stdarg)
10743 offset += 8;
10744 }
10745
10746 /* Save f4 and f6. */
10747 offset = cfun_frame_layout.f4_offset;
10748 for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
10749 {
10750 if (cfun_fpr_save_p (i))
10751 {
10752 insn = save_fpr (stack_pointer_rtx, offset, i);
10753 offset += 8;
10754
10755 /* If f4 and f6 are call clobbered they are saved due to
10756 stdargs and therefore are not frame related. */
10757 if (!call_really_used_regs[i])
10758 RTX_FRAME_RELATED_P (insn) = 1;
10759 }
10760 else if (!TARGET_PACKED_STACK || call_really_used_regs[i])
10761 offset += 8;
10762 }
10763
10764 if (TARGET_PACKED_STACK
10765 && cfun_save_high_fprs_p
10766 && cfun_frame_layout.f8_offset + cfun_frame_layout.high_fprs * 8 > 0)
10767 {
10768 offset = (cfun_frame_layout.f8_offset
10769 + (cfun_frame_layout.high_fprs - 1) * 8);
10770
10771 for (i = FPR15_REGNUM; i >= FPR8_REGNUM && offset >= 0; i--)
10772 if (cfun_fpr_save_p (i))
10773 {
10774 insn = save_fpr (stack_pointer_rtx, offset, i);
10775
10776 RTX_FRAME_RELATED_P (insn) = 1;
10777 offset -= 8;
10778 }
10779 if (offset >= cfun_frame_layout.f8_offset)
10780 next_fpr = i;
10781 }
10782
10783 if (!TARGET_PACKED_STACK)
10784 next_fpr = cfun_save_high_fprs_p ? FPR15_REGNUM : 0;
10785
10786 if (flag_stack_usage_info)
10787 current_function_static_stack_size = cfun_frame_layout.frame_size;
10788
10789 /* Decrement stack pointer. */
10790
10791 if (cfun_frame_layout.frame_size > 0)
10792 {
10793 rtx frame_off = GEN_INT (-cfun_frame_layout.frame_size);
10794 rtx real_frame_off;
10795
10796 if (s390_stack_size)
10797 {
10798 HOST_WIDE_INT stack_guard;
10799
10800 if (s390_stack_guard)
10801 stack_guard = s390_stack_guard;
10802 else
10803 {
10804 /* If no value for stack guard is provided the smallest power of 2
10805 larger than the current frame size is chosen. */
10806 stack_guard = 1;
10807 while (stack_guard < cfun_frame_layout.frame_size)
10808 stack_guard <<= 1;
10809 }
10810
10811 if (cfun_frame_layout.frame_size >= s390_stack_size)
10812 {
10813 warning (0, "frame size of function %qs is %wd"
10814 " bytes exceeding user provided stack limit of "
10815 "%d bytes. "
10816 "An unconditional trap is added.",
10817 current_function_name(), cfun_frame_layout.frame_size,
10818 s390_stack_size);
10819 emit_insn (gen_trap ());
10820 emit_barrier ();
10821 }
10822 else
10823 {
10824 /* stack_guard has to be smaller than s390_stack_size.
10825 Otherwise we would emit an AND with zero which would
10826 not match the test under mask pattern. */
10827 if (stack_guard >= s390_stack_size)
10828 {
10829 warning (0, "frame size of function %qs is %wd"
10830 " bytes which is more than half the stack size. "
10831 "The dynamic check would not be reliable. "
10832 "No check emitted for this function.",
10833 current_function_name(),
10834 cfun_frame_layout.frame_size);
10835 }
10836 else
10837 {
10838 HOST_WIDE_INT stack_check_mask = ((s390_stack_size - 1)
10839 & ~(stack_guard - 1));
10840
10841 rtx t = gen_rtx_AND (Pmode, stack_pointer_rtx,
10842 GEN_INT (stack_check_mask));
10843 if (TARGET_64BIT)
10844 emit_insn (gen_ctrapdi4 (gen_rtx_EQ (VOIDmode,
10845 t, const0_rtx),
10846 t, const0_rtx, const0_rtx));
10847 else
10848 emit_insn (gen_ctrapsi4 (gen_rtx_EQ (VOIDmode,
10849 t, const0_rtx),
10850 t, const0_rtx, const0_rtx));
10851 }
10852 }
10853 }
10854
10855 if (s390_warn_framesize > 0
10856 && cfun_frame_layout.frame_size >= s390_warn_framesize)
10857 warning (0, "frame size of %qs is %wd bytes",
10858 current_function_name (), cfun_frame_layout.frame_size);
10859
10860 if (s390_warn_dynamicstack_p && cfun->calls_alloca)
10861 warning (0, "%qs uses dynamic stack allocation", current_function_name ());
10862
10863 /* Save incoming stack pointer into temp reg. */
10864 if (TARGET_BACKCHAIN || next_fpr)
10865 insn = emit_insn (gen_move_insn (temp_reg, stack_pointer_rtx));
10866
10867 /* Subtract frame size from stack pointer. */
10868
10869 if (DISP_IN_RANGE (INTVAL (frame_off)))
10870 {
10871 insn = gen_rtx_SET (stack_pointer_rtx,
10872 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10873 frame_off));
10874 insn = emit_insn (insn);
10875 }
10876 else
10877 {
10878 if (!CONST_OK_FOR_K (INTVAL (frame_off)))
10879 frame_off = force_const_mem (Pmode, frame_off);
10880
10881 insn = emit_insn (gen_add2_insn (stack_pointer_rtx, frame_off));
10882 annotate_constant_pool_refs (&PATTERN (insn));
10883 }
10884
10885 RTX_FRAME_RELATED_P (insn) = 1;
10886 real_frame_off = GEN_INT (-cfun_frame_layout.frame_size);
10887 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10888 gen_rtx_SET (stack_pointer_rtx,
10889 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
10890 real_frame_off)));
10891
10892 /* Set backchain. */
10893
10894 if (TARGET_BACKCHAIN)
10895 {
10896 if (cfun_frame_layout.backchain_offset)
10897 addr = gen_rtx_MEM (Pmode,
10898 plus_constant (Pmode, stack_pointer_rtx,
10899 cfun_frame_layout.backchain_offset));
10900 else
10901 addr = gen_rtx_MEM (Pmode, stack_pointer_rtx);
10902 set_mem_alias_set (addr, get_frame_alias_set ());
10903 insn = emit_insn (gen_move_insn (addr, temp_reg));
10904 }
10905
10906 /* If we support non-call exceptions (e.g. for Java),
10907 we need to make sure the backchain pointer is set up
10908 before any possibly trapping memory access. */
10909 if (TARGET_BACKCHAIN && cfun->can_throw_non_call_exceptions)
10910 {
10911 addr = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
10912 emit_clobber (addr);
10913 }
10914 }
10915
10916 /* Save fprs 8 - 15 (64 bit ABI). */
10917
10918 if (cfun_save_high_fprs_p && next_fpr)
10919 {
10920 /* If the stack might be accessed through a different register
10921 we have to make sure that the stack pointer decrement is not
10922 moved below the use of the stack slots. */
10923 s390_emit_stack_tie ();
10924
10925 insn = emit_insn (gen_add2_insn (temp_reg,
10926 GEN_INT (cfun_frame_layout.f8_offset)));
10927
10928 offset = 0;
10929
10930 for (i = FPR8_REGNUM; i <= next_fpr; i++)
10931 if (cfun_fpr_save_p (i))
10932 {
10933 rtx addr = plus_constant (Pmode, stack_pointer_rtx,
10934 cfun_frame_layout.frame_size
10935 + cfun_frame_layout.f8_offset
10936 + offset);
10937
10938 insn = save_fpr (temp_reg, offset, i);
10939 offset += 8;
10940 RTX_FRAME_RELATED_P (insn) = 1;
10941 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
10942 gen_rtx_SET (gen_rtx_MEM (DFmode, addr),
10943 gen_rtx_REG (DFmode, i)));
10944 }
10945 }
10946
10947 /* Set frame pointer, if needed. */
10948
10949 if (frame_pointer_needed)
10950 {
10951 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
10952 RTX_FRAME_RELATED_P (insn) = 1;
10953 }
10954
10955 /* Set up got pointer, if needed. */
10956
10957 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
10958 {
10959 rtx_insn *insns = s390_load_got ();
10960
10961 for (rtx_insn *insn = insns; insn; insn = NEXT_INSN (insn))
10962 annotate_constant_pool_refs (&PATTERN (insn));
10963
10964 emit_insn (insns);
10965 }
10966
10967 if (TARGET_TPF_PROFILING)
10968 {
10969 /* Generate a BAS instruction to serve as a function
10970 entry intercept to facilitate the use of tracing
10971 algorithms located at the branch target. */
10972 emit_insn (gen_prologue_tpf ());
10973
10974 /* Emit a blockage here so that all code
10975 lies between the profiling mechanisms. */
10976 emit_insn (gen_blockage ());
10977 }
10978 }
10979
10980 /* Expand the epilogue into a bunch of separate insns. */
10981
10982 void
10983 s390_emit_epilogue (bool sibcall)
10984 {
10985 rtx frame_pointer, return_reg, cfa_restores = NULL_RTX;
10986 int area_bottom, area_top, offset = 0;
10987 int next_offset;
10988 rtvec p;
10989 int i;
10990
10991 if (TARGET_TPF_PROFILING)
10992 {
10993
10994 /* Generate a BAS instruction to serve as a function
10995 entry intercept to facilitate the use of tracing
10996 algorithms located at the branch target. */
10997
10998 /* Emit a blockage here so that all code
10999 lies between the profiling mechanisms. */
11000 emit_insn (gen_blockage ());
11001
11002 emit_insn (gen_epilogue_tpf ());
11003 }
11004
11005 /* Check whether to use frame or stack pointer for restore. */
11006
11007 frame_pointer = (frame_pointer_needed
11008 ? hard_frame_pointer_rtx : stack_pointer_rtx);
11009
11010 s390_frame_area (&area_bottom, &area_top);
11011
11012 /* Check whether we can access the register save area.
11013 If not, increment the frame pointer as required. */
11014
11015 if (area_top <= area_bottom)
11016 {
11017 /* Nothing to restore. */
11018 }
11019 else if (DISP_IN_RANGE (cfun_frame_layout.frame_size + area_bottom)
11020 && DISP_IN_RANGE (cfun_frame_layout.frame_size + area_top - 1))
11021 {
11022 /* Area is in range. */
11023 offset = cfun_frame_layout.frame_size;
11024 }
11025 else
11026 {
11027 rtx insn, frame_off, cfa;
11028
11029 offset = area_bottom < 0 ? -area_bottom : 0;
11030 frame_off = GEN_INT (cfun_frame_layout.frame_size - offset);
11031
11032 cfa = gen_rtx_SET (frame_pointer,
11033 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
11034 if (DISP_IN_RANGE (INTVAL (frame_off)))
11035 {
11036 insn = gen_rtx_SET (frame_pointer,
11037 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
11038 insn = emit_insn (insn);
11039 }
11040 else
11041 {
11042 if (!CONST_OK_FOR_K (INTVAL (frame_off)))
11043 frame_off = force_const_mem (Pmode, frame_off);
11044
11045 insn = emit_insn (gen_add2_insn (frame_pointer, frame_off));
11046 annotate_constant_pool_refs (&PATTERN (insn));
11047 }
11048 add_reg_note (insn, REG_CFA_ADJUST_CFA, cfa);
11049 RTX_FRAME_RELATED_P (insn) = 1;
11050 }
11051
11052 /* Restore call saved fprs. */
11053
11054 if (TARGET_64BIT)
11055 {
11056 if (cfun_save_high_fprs_p)
11057 {
11058 next_offset = cfun_frame_layout.f8_offset;
11059 for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
11060 {
11061 if (cfun_fpr_save_p (i))
11062 {
11063 restore_fpr (frame_pointer,
11064 offset + next_offset, i);
11065 cfa_restores
11066 = alloc_reg_note (REG_CFA_RESTORE,
11067 gen_rtx_REG (DFmode, i), cfa_restores);
11068 next_offset += 8;
11069 }
11070 }
11071 }
11072
11073 }
11074 else
11075 {
11076 next_offset = cfun_frame_layout.f4_offset;
11077 /* f4, f6 */
11078 for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
11079 {
11080 if (cfun_fpr_save_p (i))
11081 {
11082 restore_fpr (frame_pointer,
11083 offset + next_offset, i);
11084 cfa_restores
11085 = alloc_reg_note (REG_CFA_RESTORE,
11086 gen_rtx_REG (DFmode, i), cfa_restores);
11087 next_offset += 8;
11088 }
11089 else if (!TARGET_PACKED_STACK)
11090 next_offset += 8;
11091 }
11092
11093 }
11094
11095 /* Return register. */
11096
11097 return_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
11098
11099 /* Restore call saved gprs. */
11100
11101 if (cfun_frame_layout.first_restore_gpr != -1)
11102 {
11103 rtx insn, addr;
11104 int i;
11105
11106 /* Check for global register and save them
11107 to stack location from where they get restored. */
11108
11109 for (i = cfun_frame_layout.first_restore_gpr;
11110 i <= cfun_frame_layout.last_restore_gpr;
11111 i++)
11112 {
11113 if (global_not_special_regno_p (i))
11114 {
11115 addr = plus_constant (Pmode, frame_pointer,
11116 offset + cfun_frame_layout.gprs_offset
11117 + (i - cfun_frame_layout.first_save_gpr_slot)
11118 * UNITS_PER_LONG);
11119 addr = gen_rtx_MEM (Pmode, addr);
11120 set_mem_alias_set (addr, get_frame_alias_set ());
11121 emit_move_insn (addr, gen_rtx_REG (Pmode, i));
11122 }
11123 else
11124 cfa_restores
11125 = alloc_reg_note (REG_CFA_RESTORE,
11126 gen_rtx_REG (Pmode, i), cfa_restores);
11127 }
11128
11129 if (! sibcall)
11130 {
11131 /* Fetch return address from stack before load multiple,
11132 this will do good for scheduling.
11133
11134 Only do this if we already decided that r14 needs to be
11135 saved to a stack slot. (And not just because r14 happens to
11136 be in between two GPRs which need saving.) Otherwise it
11137 would be difficult to take that decision back in
11138 s390_optimize_prologue. */
11139 if (cfun_gpr_save_slot (RETURN_REGNUM) == SAVE_SLOT_STACK)
11140 {
11141 int return_regnum = find_unused_clobbered_reg();
11142 if (!return_regnum)
11143 return_regnum = 4;
11144 return_reg = gen_rtx_REG (Pmode, return_regnum);
11145
11146 addr = plus_constant (Pmode, frame_pointer,
11147 offset + cfun_frame_layout.gprs_offset
11148 + (RETURN_REGNUM
11149 - cfun_frame_layout.first_save_gpr_slot)
11150 * UNITS_PER_LONG);
11151 addr = gen_rtx_MEM (Pmode, addr);
11152 set_mem_alias_set (addr, get_frame_alias_set ());
11153 emit_move_insn (return_reg, addr);
11154
11155 /* Once we did that optimization we have to make sure
11156 s390_optimize_prologue does not try to remove the
11157 store of r14 since we will not be able to find the
11158 load issued here. */
11159 cfun_frame_layout.save_return_addr_p = true;
11160 }
11161 }
11162
11163 insn = restore_gprs (frame_pointer,
11164 offset + cfun_frame_layout.gprs_offset
11165 + (cfun_frame_layout.first_restore_gpr
11166 - cfun_frame_layout.first_save_gpr_slot)
11167 * UNITS_PER_LONG,
11168 cfun_frame_layout.first_restore_gpr,
11169 cfun_frame_layout.last_restore_gpr);
11170 insn = emit_insn (insn);
11171 REG_NOTES (insn) = cfa_restores;
11172 add_reg_note (insn, REG_CFA_DEF_CFA,
11173 plus_constant (Pmode, stack_pointer_rtx,
11174 STACK_POINTER_OFFSET));
11175 RTX_FRAME_RELATED_P (insn) = 1;
11176 }
11177
11178 s390_restore_gprs_from_fprs ();
11179
11180 if (! sibcall)
11181 {
11182
11183 /* Return to caller. */
11184
11185 p = rtvec_alloc (2);
11186
11187 RTVEC_ELT (p, 0) = ret_rtx;
11188 RTVEC_ELT (p, 1) = gen_rtx_USE (VOIDmode, return_reg);
11189 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
11190 }
11191 }
11192
11193 /* Implement TARGET_SET_UP_BY_PROLOGUE. */
11194
11195 static void
11196 s300_set_up_by_prologue (hard_reg_set_container *regs)
11197 {
11198 if (cfun->machine->base_reg
11199 && !call_really_used_regs[REGNO (cfun->machine->base_reg)])
11200 SET_HARD_REG_BIT (regs->set, REGNO (cfun->machine->base_reg));
11201 }
11202
11203 /* -fsplit-stack support. */
11204
11205 /* A SYMBOL_REF for __morestack. */
11206 static GTY(()) rtx morestack_ref;
11207
11208 /* When using -fsplit-stack, the allocation routines set a field in
11209 the TCB to the bottom of the stack plus this much space, measured
11210 in bytes. */
11211
11212 #define SPLIT_STACK_AVAILABLE 1024
11213
11214 /* Emit -fsplit-stack prologue, which goes before the regular function
11215 prologue. */
11216
11217 void
11218 s390_expand_split_stack_prologue (void)
11219 {
11220 rtx r1, guard, cc = NULL;
11221 rtx_insn *insn;
11222 /* Offset from thread pointer to __private_ss. */
11223 int psso = TARGET_64BIT ? 0x38 : 0x20;
11224 /* Pointer size in bytes. */
11225 /* Frame size and argument size - the two parameters to __morestack. */
11226 HOST_WIDE_INT frame_size = cfun_frame_layout.frame_size;
11227 /* Align argument size to 8 bytes - simplifies __morestack code. */
11228 HOST_WIDE_INT args_size = crtl->args.size >= 0
11229 ? ((crtl->args.size + 7) & ~7)
11230 : 0;
11231 /* Label to be called by __morestack. */
11232 rtx_code_label *call_done = NULL;
11233 rtx_code_label *parm_base = NULL;
11234 rtx tmp;
11235
11236 gcc_assert (flag_split_stack && reload_completed);
11237 if (!TARGET_CPU_ZARCH)
11238 {
11239 sorry ("CPUs older than z900 are not supported for -fsplit-stack");
11240 return;
11241 }
11242
11243 r1 = gen_rtx_REG (Pmode, 1);
11244
11245 /* If no stack frame will be allocated, don't do anything. */
11246 if (!frame_size)
11247 {
11248 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11249 {
11250 /* If va_start is used, just use r15. */
11251 emit_move_insn (r1,
11252 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11253 GEN_INT (STACK_POINTER_OFFSET)));
11254
11255 }
11256 return;
11257 }
11258
11259 if (morestack_ref == NULL_RTX)
11260 {
11261 morestack_ref = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11262 SYMBOL_REF_FLAGS (morestack_ref) |= (SYMBOL_FLAG_LOCAL
11263 | SYMBOL_FLAG_FUNCTION);
11264 }
11265
11266 if (CONST_OK_FOR_K (frame_size) || CONST_OK_FOR_Op (frame_size))
11267 {
11268 /* If frame_size will fit in an add instruction, do a stack space
11269 check, and only call __morestack if there's not enough space. */
11270
11271 /* Get thread pointer. r1 is the only register we can always destroy - r0
11272 could contain a static chain (and cannot be used to address memory
11273 anyway), r2-r6 can contain parameters, and r6-r15 are callee-saved. */
11274 emit_move_insn (r1, gen_rtx_REG (Pmode, TP_REGNUM));
11275 /* Aim at __private_ss. */
11276 guard = gen_rtx_MEM (Pmode, plus_constant (Pmode, r1, psso));
11277
11278 /* If less that 1kiB used, skip addition and compare directly with
11279 __private_ss. */
11280 if (frame_size > SPLIT_STACK_AVAILABLE)
11281 {
11282 emit_move_insn (r1, guard);
11283 if (TARGET_64BIT)
11284 emit_insn (gen_adddi3 (r1, r1, GEN_INT (frame_size)));
11285 else
11286 emit_insn (gen_addsi3 (r1, r1, GEN_INT (frame_size)));
11287 guard = r1;
11288 }
11289
11290 /* Compare the (maybe adjusted) guard with the stack pointer. */
11291 cc = s390_emit_compare (LT, stack_pointer_rtx, guard);
11292 }
11293
11294 call_done = gen_label_rtx ();
11295 parm_base = gen_label_rtx ();
11296
11297 /* Emit the parameter block. */
11298 tmp = gen_split_stack_data (parm_base, call_done,
11299 GEN_INT (frame_size),
11300 GEN_INT (args_size));
11301 insn = emit_insn (tmp);
11302 add_reg_note (insn, REG_LABEL_OPERAND, call_done);
11303 LABEL_NUSES (call_done)++;
11304 add_reg_note (insn, REG_LABEL_OPERAND, parm_base);
11305 LABEL_NUSES (parm_base)++;
11306
11307 /* %r1 = litbase. */
11308 insn = emit_move_insn (r1, gen_rtx_LABEL_REF (VOIDmode, parm_base));
11309 add_reg_note (insn, REG_LABEL_OPERAND, parm_base);
11310 LABEL_NUSES (parm_base)++;
11311
11312 /* Now, we need to call __morestack. It has very special calling
11313 conventions: it preserves param/return/static chain registers for
11314 calling main function body, and looks for its own parameters at %r1. */
11315
11316 if (cc != NULL)
11317 {
11318 tmp = gen_split_stack_cond_call (morestack_ref, cc, call_done);
11319
11320 insn = emit_jump_insn (tmp);
11321 JUMP_LABEL (insn) = call_done;
11322 LABEL_NUSES (call_done)++;
11323
11324 /* Mark the jump as very unlikely to be taken. */
11325 add_int_reg_note (insn, REG_BR_PROB, REG_BR_PROB_BASE / 100);
11326
11327 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11328 {
11329 /* If va_start is used, and __morestack was not called, just use
11330 r15. */
11331 emit_move_insn (r1,
11332 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11333 GEN_INT (STACK_POINTER_OFFSET)));
11334 }
11335 }
11336 else
11337 {
11338 tmp = gen_split_stack_call (morestack_ref, call_done);
11339 insn = emit_jump_insn (tmp);
11340 JUMP_LABEL (insn) = call_done;
11341 LABEL_NUSES (call_done)++;
11342 emit_barrier ();
11343 }
11344
11345 /* __morestack will call us here. */
11346
11347 emit_label (call_done);
11348 }
11349
11350 /* We may have to tell the dataflow pass that the split stack prologue
11351 is initializing a register. */
11352
11353 static void
11354 s390_live_on_entry (bitmap regs)
11355 {
11356 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11357 {
11358 gcc_assert (flag_split_stack);
11359 bitmap_set_bit (regs, 1);
11360 }
11361 }
11362
11363 /* Return true if the function can use simple_return to return outside
11364 of a shrink-wrapped region. At present shrink-wrapping is supported
11365 in all cases. */
11366
11367 bool
11368 s390_can_use_simple_return_insn (void)
11369 {
11370 return true;
11371 }
11372
11373 /* Return true if the epilogue is guaranteed to contain only a return
11374 instruction and if a direct return can therefore be used instead.
11375 One of the main advantages of using direct return instructions
11376 is that we can then use conditional returns. */
11377
11378 bool
11379 s390_can_use_return_insn (void)
11380 {
11381 int i;
11382
11383 if (!reload_completed)
11384 return false;
11385
11386 if (crtl->profile)
11387 return false;
11388
11389 if (TARGET_TPF_PROFILING)
11390 return false;
11391
11392 for (i = 0; i < 16; i++)
11393 if (cfun_gpr_save_slot (i) != SAVE_SLOT_NONE)
11394 return false;
11395
11396 /* For 31 bit this is not covered by the frame_size check below
11397 since f4, f6 are saved in the register save area without needing
11398 additional stack space. */
11399 if (!TARGET_64BIT
11400 && (cfun_fpr_save_p (FPR4_REGNUM) || cfun_fpr_save_p (FPR6_REGNUM)))
11401 return false;
11402
11403 if (cfun->machine->base_reg
11404 && !call_really_used_regs[REGNO (cfun->machine->base_reg)])
11405 return false;
11406
11407 return cfun_frame_layout.frame_size == 0;
11408 }
11409
11410 /* The VX ABI differs for vararg functions. Therefore we need the
11411 prototype of the callee to be available when passing vector type
11412 values. */
11413 static const char *
11414 s390_invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
11415 {
11416 return ((TARGET_VX_ABI
11417 && typelist == 0
11418 && VECTOR_TYPE_P (TREE_TYPE (val))
11419 && (funcdecl == NULL_TREE
11420 || (TREE_CODE (funcdecl) == FUNCTION_DECL
11421 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
11422 ? N_("Vector argument passed to unprototyped function")
11423 : NULL);
11424 }
11425
11426
11427 /* Return the size in bytes of a function argument of
11428 type TYPE and/or mode MODE. At least one of TYPE or
11429 MODE must be specified. */
11430
11431 static int
11432 s390_function_arg_size (machine_mode mode, const_tree type)
11433 {
11434 if (type)
11435 return int_size_in_bytes (type);
11436
11437 /* No type info available for some library calls ... */
11438 if (mode != BLKmode)
11439 return GET_MODE_SIZE (mode);
11440
11441 /* If we have neither type nor mode, abort */
11442 gcc_unreachable ();
11443 }
11444
11445 /* Return true if a function argument of type TYPE and mode MODE
11446 is to be passed in a vector register, if available. */
11447
11448 bool
11449 s390_function_arg_vector (machine_mode mode, const_tree type)
11450 {
11451 if (!TARGET_VX_ABI)
11452 return false;
11453
11454 if (s390_function_arg_size (mode, type) > 16)
11455 return false;
11456
11457 /* No type info available for some library calls ... */
11458 if (!type)
11459 return VECTOR_MODE_P (mode);
11460
11461 /* The ABI says that record types with a single member are treated
11462 just like that member would be. */
11463 while (TREE_CODE (type) == RECORD_TYPE)
11464 {
11465 tree field, single = NULL_TREE;
11466
11467 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
11468 {
11469 if (TREE_CODE (field) != FIELD_DECL)
11470 continue;
11471
11472 if (single == NULL_TREE)
11473 single = TREE_TYPE (field);
11474 else
11475 return false;
11476 }
11477
11478 if (single == NULL_TREE)
11479 return false;
11480 else
11481 {
11482 /* If the field declaration adds extra byte due to
11483 e.g. padding this is not accepted as vector type. */
11484 if (int_size_in_bytes (single) <= 0
11485 || int_size_in_bytes (single) != int_size_in_bytes (type))
11486 return false;
11487 type = single;
11488 }
11489 }
11490
11491 return VECTOR_TYPE_P (type);
11492 }
11493
11494 /* Return true if a function argument of type TYPE and mode MODE
11495 is to be passed in a floating-point register, if available. */
11496
11497 static bool
11498 s390_function_arg_float (machine_mode mode, const_tree type)
11499 {
11500 if (s390_function_arg_size (mode, type) > 8)
11501 return false;
11502
11503 /* Soft-float changes the ABI: no floating-point registers are used. */
11504 if (TARGET_SOFT_FLOAT)
11505 return false;
11506
11507 /* No type info available for some library calls ... */
11508 if (!type)
11509 return mode == SFmode || mode == DFmode || mode == SDmode || mode == DDmode;
11510
11511 /* The ABI says that record types with a single member are treated
11512 just like that member would be. */
11513 while (TREE_CODE (type) == RECORD_TYPE)
11514 {
11515 tree field, single = NULL_TREE;
11516
11517 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
11518 {
11519 if (TREE_CODE (field) != FIELD_DECL)
11520 continue;
11521
11522 if (single == NULL_TREE)
11523 single = TREE_TYPE (field);
11524 else
11525 return false;
11526 }
11527
11528 if (single == NULL_TREE)
11529 return false;
11530 else
11531 type = single;
11532 }
11533
11534 return TREE_CODE (type) == REAL_TYPE;
11535 }
11536
11537 /* Return true if a function argument of type TYPE and mode MODE
11538 is to be passed in an integer register, or a pair of integer
11539 registers, if available. */
11540
11541 static bool
11542 s390_function_arg_integer (machine_mode mode, const_tree type)
11543 {
11544 int size = s390_function_arg_size (mode, type);
11545 if (size > 8)
11546 return false;
11547
11548 /* No type info available for some library calls ... */
11549 if (!type)
11550 return GET_MODE_CLASS (mode) == MODE_INT
11551 || (TARGET_SOFT_FLOAT && SCALAR_FLOAT_MODE_P (mode));
11552
11553 /* We accept small integral (and similar) types. */
11554 if (INTEGRAL_TYPE_P (type)
11555 || POINTER_TYPE_P (type)
11556 || TREE_CODE (type) == NULLPTR_TYPE
11557 || TREE_CODE (type) == OFFSET_TYPE
11558 || (TARGET_SOFT_FLOAT && TREE_CODE (type) == REAL_TYPE))
11559 return true;
11560
11561 /* We also accept structs of size 1, 2, 4, 8 that are not
11562 passed in floating-point registers. */
11563 if (AGGREGATE_TYPE_P (type)
11564 && exact_log2 (size) >= 0
11565 && !s390_function_arg_float (mode, type))
11566 return true;
11567
11568 return false;
11569 }
11570
11571 /* Return 1 if a function argument of type TYPE and mode MODE
11572 is to be passed by reference. The ABI specifies that only
11573 structures of size 1, 2, 4, or 8 bytes are passed by value,
11574 all other structures (and complex numbers) are passed by
11575 reference. */
11576
11577 static bool
11578 s390_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
11579 machine_mode mode, const_tree type,
11580 bool named ATTRIBUTE_UNUSED)
11581 {
11582 int size = s390_function_arg_size (mode, type);
11583
11584 if (s390_function_arg_vector (mode, type))
11585 return false;
11586
11587 if (size > 8)
11588 return true;
11589
11590 if (type)
11591 {
11592 if (AGGREGATE_TYPE_P (type) && exact_log2 (size) < 0)
11593 return true;
11594
11595 if (TREE_CODE (type) == COMPLEX_TYPE
11596 || TREE_CODE (type) == VECTOR_TYPE)
11597 return true;
11598 }
11599
11600 return false;
11601 }
11602
11603 /* Update the data in CUM to advance over an argument of mode MODE and
11604 data type TYPE. (TYPE is null for libcalls where that information
11605 may not be available.). The boolean NAMED specifies whether the
11606 argument is a named argument (as opposed to an unnamed argument
11607 matching an ellipsis). */
11608
11609 static void
11610 s390_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
11611 const_tree type, bool named)
11612 {
11613 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
11614
11615 if (s390_function_arg_vector (mode, type))
11616 {
11617 /* We are called for unnamed vector stdarg arguments which are
11618 passed on the stack. In this case this hook does not have to
11619 do anything since stack arguments are tracked by common
11620 code. */
11621 if (!named)
11622 return;
11623 cum->vrs += 1;
11624 }
11625 else if (s390_function_arg_float (mode, type))
11626 {
11627 cum->fprs += 1;
11628 }
11629 else if (s390_function_arg_integer (mode, type))
11630 {
11631 int size = s390_function_arg_size (mode, type);
11632 cum->gprs += ((size + UNITS_PER_LONG - 1) / UNITS_PER_LONG);
11633 }
11634 else
11635 gcc_unreachable ();
11636 }
11637
11638 /* Define where to put the arguments to a function.
11639 Value is zero to push the argument on the stack,
11640 or a hard register in which to store the argument.
11641
11642 MODE is the argument's machine mode.
11643 TYPE is the data type of the argument (as a tree).
11644 This is null for libcalls where that information may
11645 not be available.
11646 CUM is a variable of type CUMULATIVE_ARGS which gives info about
11647 the preceding args and about the function being called.
11648 NAMED is nonzero if this argument is a named parameter
11649 (otherwise it is an extra parameter matching an ellipsis).
11650
11651 On S/390, we use general purpose registers 2 through 6 to
11652 pass integer, pointer, and certain structure arguments, and
11653 floating point registers 0 and 2 (0, 2, 4, and 6 on 64-bit)
11654 to pass floating point arguments. All remaining arguments
11655 are pushed to the stack. */
11656
11657 static rtx
11658 s390_function_arg (cumulative_args_t cum_v, machine_mode mode,
11659 const_tree type, bool named)
11660 {
11661 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
11662
11663 if (!named)
11664 s390_check_type_for_vector_abi (type, true, false);
11665
11666 if (s390_function_arg_vector (mode, type))
11667 {
11668 /* Vector arguments being part of the ellipsis are passed on the
11669 stack. */
11670 if (!named || (cum->vrs + 1 > VEC_ARG_NUM_REG))
11671 return NULL_RTX;
11672
11673 return gen_rtx_REG (mode, cum->vrs + FIRST_VEC_ARG_REGNO);
11674 }
11675 else if (s390_function_arg_float (mode, type))
11676 {
11677 if (cum->fprs + 1 > FP_ARG_NUM_REG)
11678 return NULL_RTX;
11679 else
11680 return gen_rtx_REG (mode, cum->fprs + 16);
11681 }
11682 else if (s390_function_arg_integer (mode, type))
11683 {
11684 int size = s390_function_arg_size (mode, type);
11685 int n_gprs = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
11686
11687 if (cum->gprs + n_gprs > GP_ARG_NUM_REG)
11688 return NULL_RTX;
11689 else if (n_gprs == 1 || UNITS_PER_WORD == UNITS_PER_LONG)
11690 return gen_rtx_REG (mode, cum->gprs + 2);
11691 else if (n_gprs == 2)
11692 {
11693 rtvec p = rtvec_alloc (2);
11694
11695 RTVEC_ELT (p, 0)
11696 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 2),
11697 const0_rtx);
11698 RTVEC_ELT (p, 1)
11699 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 3),
11700 GEN_INT (4));
11701
11702 return gen_rtx_PARALLEL (mode, p);
11703 }
11704 }
11705
11706 /* After the real arguments, expand_call calls us once again
11707 with a void_type_node type. Whatever we return here is
11708 passed as operand 2 to the call expanders.
11709
11710 We don't need this feature ... */
11711 else if (type == void_type_node)
11712 return const0_rtx;
11713
11714 gcc_unreachable ();
11715 }
11716
11717 /* Return true if return values of type TYPE should be returned
11718 in a memory buffer whose address is passed by the caller as
11719 hidden first argument. */
11720
11721 static bool
11722 s390_return_in_memory (const_tree type, const_tree fundecl ATTRIBUTE_UNUSED)
11723 {
11724 /* We accept small integral (and similar) types. */
11725 if (INTEGRAL_TYPE_P (type)
11726 || POINTER_TYPE_P (type)
11727 || TREE_CODE (type) == OFFSET_TYPE
11728 || TREE_CODE (type) == REAL_TYPE)
11729 return int_size_in_bytes (type) > 8;
11730
11731 /* vector types which fit into a VR. */
11732 if (TARGET_VX_ABI
11733 && VECTOR_TYPE_P (type)
11734 && int_size_in_bytes (type) <= 16)
11735 return false;
11736
11737 /* Aggregates and similar constructs are always returned
11738 in memory. */
11739 if (AGGREGATE_TYPE_P (type)
11740 || TREE_CODE (type) == COMPLEX_TYPE
11741 || VECTOR_TYPE_P (type))
11742 return true;
11743
11744 /* ??? We get called on all sorts of random stuff from
11745 aggregate_value_p. We can't abort, but it's not clear
11746 what's safe to return. Pretend it's a struct I guess. */
11747 return true;
11748 }
11749
11750 /* Function arguments and return values are promoted to word size. */
11751
11752 static machine_mode
11753 s390_promote_function_mode (const_tree type, machine_mode mode,
11754 int *punsignedp,
11755 const_tree fntype ATTRIBUTE_UNUSED,
11756 int for_return ATTRIBUTE_UNUSED)
11757 {
11758 if (INTEGRAL_MODE_P (mode)
11759 && GET_MODE_SIZE (mode) < UNITS_PER_LONG)
11760 {
11761 if (type != NULL_TREE && POINTER_TYPE_P (type))
11762 *punsignedp = POINTERS_EXTEND_UNSIGNED;
11763 return Pmode;
11764 }
11765
11766 return mode;
11767 }
11768
11769 /* Define where to return a (scalar) value of type RET_TYPE.
11770 If RET_TYPE is null, define where to return a (scalar)
11771 value of mode MODE from a libcall. */
11772
11773 static rtx
11774 s390_function_and_libcall_value (machine_mode mode,
11775 const_tree ret_type,
11776 const_tree fntype_or_decl,
11777 bool outgoing ATTRIBUTE_UNUSED)
11778 {
11779 /* For vector return types it is important to use the RET_TYPE
11780 argument whenever available since the middle-end might have
11781 changed the mode to a scalar mode. */
11782 bool vector_ret_type_p = ((ret_type && VECTOR_TYPE_P (ret_type))
11783 || (!ret_type && VECTOR_MODE_P (mode)));
11784
11785 /* For normal functions perform the promotion as
11786 promote_function_mode would do. */
11787 if (ret_type)
11788 {
11789 int unsignedp = TYPE_UNSIGNED (ret_type);
11790 mode = promote_function_mode (ret_type, mode, &unsignedp,
11791 fntype_or_decl, 1);
11792 }
11793
11794 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
11795 || SCALAR_FLOAT_MODE_P (mode)
11796 || (TARGET_VX_ABI && vector_ret_type_p));
11797 gcc_assert (GET_MODE_SIZE (mode) <= (TARGET_VX_ABI ? 16 : 8));
11798
11799 if (TARGET_VX_ABI && vector_ret_type_p)
11800 return gen_rtx_REG (mode, FIRST_VEC_ARG_REGNO);
11801 else if (TARGET_HARD_FLOAT && SCALAR_FLOAT_MODE_P (mode))
11802 return gen_rtx_REG (mode, 16);
11803 else if (GET_MODE_SIZE (mode) <= UNITS_PER_LONG
11804 || UNITS_PER_LONG == UNITS_PER_WORD)
11805 return gen_rtx_REG (mode, 2);
11806 else if (GET_MODE_SIZE (mode) == 2 * UNITS_PER_LONG)
11807 {
11808 /* This case is triggered when returning a 64 bit value with
11809 -m31 -mzarch. Although the value would fit into a single
11810 register it has to be forced into a 32 bit register pair in
11811 order to match the ABI. */
11812 rtvec p = rtvec_alloc (2);
11813
11814 RTVEC_ELT (p, 0)
11815 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 2), const0_rtx);
11816 RTVEC_ELT (p, 1)
11817 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 3), GEN_INT (4));
11818
11819 return gen_rtx_PARALLEL (mode, p);
11820 }
11821
11822 gcc_unreachable ();
11823 }
11824
11825 /* Define where to return a scalar return value of type RET_TYPE. */
11826
11827 static rtx
11828 s390_function_value (const_tree ret_type, const_tree fn_decl_or_type,
11829 bool outgoing)
11830 {
11831 return s390_function_and_libcall_value (TYPE_MODE (ret_type), ret_type,
11832 fn_decl_or_type, outgoing);
11833 }
11834
11835 /* Define where to return a scalar libcall return value of mode
11836 MODE. */
11837
11838 static rtx
11839 s390_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
11840 {
11841 return s390_function_and_libcall_value (mode, NULL_TREE,
11842 NULL_TREE, true);
11843 }
11844
11845
11846 /* Create and return the va_list datatype.
11847
11848 On S/390, va_list is an array type equivalent to
11849
11850 typedef struct __va_list_tag
11851 {
11852 long __gpr;
11853 long __fpr;
11854 void *__overflow_arg_area;
11855 void *__reg_save_area;
11856 } va_list[1];
11857
11858 where __gpr and __fpr hold the number of general purpose
11859 or floating point arguments used up to now, respectively,
11860 __overflow_arg_area points to the stack location of the
11861 next argument passed on the stack, and __reg_save_area
11862 always points to the start of the register area in the
11863 call frame of the current function. The function prologue
11864 saves all registers used for argument passing into this
11865 area if the function uses variable arguments. */
11866
11867 static tree
11868 s390_build_builtin_va_list (void)
11869 {
11870 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
11871
11872 record = lang_hooks.types.make_type (RECORD_TYPE);
11873
11874 type_decl =
11875 build_decl (BUILTINS_LOCATION,
11876 TYPE_DECL, get_identifier ("__va_list_tag"), record);
11877
11878 f_gpr = build_decl (BUILTINS_LOCATION,
11879 FIELD_DECL, get_identifier ("__gpr"),
11880 long_integer_type_node);
11881 f_fpr = build_decl (BUILTINS_LOCATION,
11882 FIELD_DECL, get_identifier ("__fpr"),
11883 long_integer_type_node);
11884 f_ovf = build_decl (BUILTINS_LOCATION,
11885 FIELD_DECL, get_identifier ("__overflow_arg_area"),
11886 ptr_type_node);
11887 f_sav = build_decl (BUILTINS_LOCATION,
11888 FIELD_DECL, get_identifier ("__reg_save_area"),
11889 ptr_type_node);
11890
11891 va_list_gpr_counter_field = f_gpr;
11892 va_list_fpr_counter_field = f_fpr;
11893
11894 DECL_FIELD_CONTEXT (f_gpr) = record;
11895 DECL_FIELD_CONTEXT (f_fpr) = record;
11896 DECL_FIELD_CONTEXT (f_ovf) = record;
11897 DECL_FIELD_CONTEXT (f_sav) = record;
11898
11899 TYPE_STUB_DECL (record) = type_decl;
11900 TYPE_NAME (record) = type_decl;
11901 TYPE_FIELDS (record) = f_gpr;
11902 DECL_CHAIN (f_gpr) = f_fpr;
11903 DECL_CHAIN (f_fpr) = f_ovf;
11904 DECL_CHAIN (f_ovf) = f_sav;
11905
11906 layout_type (record);
11907
11908 /* The correct type is an array type of one element. */
11909 return build_array_type (record, build_index_type (size_zero_node));
11910 }
11911
11912 /* Implement va_start by filling the va_list structure VALIST.
11913 STDARG_P is always true, and ignored.
11914 NEXTARG points to the first anonymous stack argument.
11915
11916 The following global variables are used to initialize
11917 the va_list structure:
11918
11919 crtl->args.info:
11920 holds number of gprs and fprs used for named arguments.
11921 crtl->args.arg_offset_rtx:
11922 holds the offset of the first anonymous stack argument
11923 (relative to the virtual arg pointer). */
11924
11925 static void
11926 s390_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
11927 {
11928 HOST_WIDE_INT n_gpr, n_fpr;
11929 int off;
11930 tree f_gpr, f_fpr, f_ovf, f_sav;
11931 tree gpr, fpr, ovf, sav, t;
11932
11933 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
11934 f_fpr = DECL_CHAIN (f_gpr);
11935 f_ovf = DECL_CHAIN (f_fpr);
11936 f_sav = DECL_CHAIN (f_ovf);
11937
11938 valist = build_simple_mem_ref (valist);
11939 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
11940 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
11941 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
11942 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
11943
11944 /* Count number of gp and fp argument registers used. */
11945
11946 n_gpr = crtl->args.info.gprs;
11947 n_fpr = crtl->args.info.fprs;
11948
11949 if (cfun->va_list_gpr_size)
11950 {
11951 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
11952 build_int_cst (NULL_TREE, n_gpr));
11953 TREE_SIDE_EFFECTS (t) = 1;
11954 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11955 }
11956
11957 if (cfun->va_list_fpr_size)
11958 {
11959 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
11960 build_int_cst (NULL_TREE, n_fpr));
11961 TREE_SIDE_EFFECTS (t) = 1;
11962 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11963 }
11964
11965 if (flag_split_stack
11966 && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl))
11967 == NULL)
11968 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
11969 {
11970 rtx reg;
11971 rtx_insn *seq;
11972
11973 reg = gen_reg_rtx (Pmode);
11974 cfun->machine->split_stack_varargs_pointer = reg;
11975
11976 start_sequence ();
11977 emit_move_insn (reg, gen_rtx_REG (Pmode, 1));
11978 seq = get_insns ();
11979 end_sequence ();
11980
11981 push_topmost_sequence ();
11982 emit_insn_after (seq, entry_of_function ());
11983 pop_topmost_sequence ();
11984 }
11985
11986 /* Find the overflow area.
11987 FIXME: This currently is too pessimistic when the vector ABI is
11988 enabled. In that case we *always* set up the overflow area
11989 pointer. */
11990 if (n_gpr + cfun->va_list_gpr_size > GP_ARG_NUM_REG
11991 || n_fpr + cfun->va_list_fpr_size > FP_ARG_NUM_REG
11992 || TARGET_VX_ABI)
11993 {
11994 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
11995 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
11996 else
11997 t = make_tree (TREE_TYPE (ovf), cfun->machine->split_stack_varargs_pointer);
11998
11999 off = INTVAL (crtl->args.arg_offset_rtx);
12000 off = off < 0 ? 0 : off;
12001 if (TARGET_DEBUG_ARG)
12002 fprintf (stderr, "va_start: n_gpr = %d, n_fpr = %d off %d\n",
12003 (int)n_gpr, (int)n_fpr, off);
12004
12005 t = fold_build_pointer_plus_hwi (t, off);
12006
12007 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
12008 TREE_SIDE_EFFECTS (t) = 1;
12009 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12010 }
12011
12012 /* Find the register save area. */
12013 if ((cfun->va_list_gpr_size && n_gpr < GP_ARG_NUM_REG)
12014 || (cfun->va_list_fpr_size && n_fpr < FP_ARG_NUM_REG))
12015 {
12016 t = make_tree (TREE_TYPE (sav), return_address_pointer_rtx);
12017 t = fold_build_pointer_plus_hwi (t, -RETURN_REGNUM * UNITS_PER_LONG);
12018
12019 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
12020 TREE_SIDE_EFFECTS (t) = 1;
12021 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12022 }
12023 }
12024
12025 /* Implement va_arg by updating the va_list structure
12026 VALIST as required to retrieve an argument of type
12027 TYPE, and returning that argument.
12028
12029 Generates code equivalent to:
12030
12031 if (integral value) {
12032 if (size <= 4 && args.gpr < 5 ||
12033 size > 4 && args.gpr < 4 )
12034 ret = args.reg_save_area[args.gpr+8]
12035 else
12036 ret = *args.overflow_arg_area++;
12037 } else if (vector value) {
12038 ret = *args.overflow_arg_area;
12039 args.overflow_arg_area += size / 8;
12040 } else if (float value) {
12041 if (args.fgpr < 2)
12042 ret = args.reg_save_area[args.fpr+64]
12043 else
12044 ret = *args.overflow_arg_area++;
12045 } else if (aggregate value) {
12046 if (args.gpr < 5)
12047 ret = *args.reg_save_area[args.gpr]
12048 else
12049 ret = **args.overflow_arg_area++;
12050 } */
12051
12052 static tree
12053 s390_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
12054 gimple_seq *post_p ATTRIBUTE_UNUSED)
12055 {
12056 tree f_gpr, f_fpr, f_ovf, f_sav;
12057 tree gpr, fpr, ovf, sav, reg, t, u;
12058 int indirect_p, size, n_reg, sav_ofs, sav_scale, max_reg;
12059 tree lab_false, lab_over;
12060 tree addr = create_tmp_var (ptr_type_node, "addr");
12061 bool left_align_p; /* How a value < UNITS_PER_LONG is aligned within
12062 a stack slot. */
12063
12064 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
12065 f_fpr = DECL_CHAIN (f_gpr);
12066 f_ovf = DECL_CHAIN (f_fpr);
12067 f_sav = DECL_CHAIN (f_ovf);
12068
12069 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
12070 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
12071 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
12072
12073 /* The tree for args* cannot be shared between gpr/fpr and ovf since
12074 both appear on a lhs. */
12075 valist = unshare_expr (valist);
12076 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
12077
12078 size = int_size_in_bytes (type);
12079
12080 s390_check_type_for_vector_abi (type, true, false);
12081
12082 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
12083 {
12084 if (TARGET_DEBUG_ARG)
12085 {
12086 fprintf (stderr, "va_arg: aggregate type");
12087 debug_tree (type);
12088 }
12089
12090 /* Aggregates are passed by reference. */
12091 indirect_p = 1;
12092 reg = gpr;
12093 n_reg = 1;
12094
12095 /* kernel stack layout on 31 bit: It is assumed here that no padding
12096 will be added by s390_frame_info because for va_args always an even
12097 number of gprs has to be saved r15-r2 = 14 regs. */
12098 sav_ofs = 2 * UNITS_PER_LONG;
12099 sav_scale = UNITS_PER_LONG;
12100 size = UNITS_PER_LONG;
12101 max_reg = GP_ARG_NUM_REG - n_reg;
12102 left_align_p = false;
12103 }
12104 else if (s390_function_arg_vector (TYPE_MODE (type), type))
12105 {
12106 if (TARGET_DEBUG_ARG)
12107 {
12108 fprintf (stderr, "va_arg: vector type");
12109 debug_tree (type);
12110 }
12111
12112 indirect_p = 0;
12113 reg = NULL_TREE;
12114 n_reg = 0;
12115 sav_ofs = 0;
12116 sav_scale = 8;
12117 max_reg = 0;
12118 left_align_p = true;
12119 }
12120 else if (s390_function_arg_float (TYPE_MODE (type), type))
12121 {
12122 if (TARGET_DEBUG_ARG)
12123 {
12124 fprintf (stderr, "va_arg: float type");
12125 debug_tree (type);
12126 }
12127
12128 /* FP args go in FP registers, if present. */
12129 indirect_p = 0;
12130 reg = fpr;
12131 n_reg = 1;
12132 sav_ofs = 16 * UNITS_PER_LONG;
12133 sav_scale = 8;
12134 max_reg = FP_ARG_NUM_REG - n_reg;
12135 left_align_p = false;
12136 }
12137 else
12138 {
12139 if (TARGET_DEBUG_ARG)
12140 {
12141 fprintf (stderr, "va_arg: other type");
12142 debug_tree (type);
12143 }
12144
12145 /* Otherwise into GP registers. */
12146 indirect_p = 0;
12147 reg = gpr;
12148 n_reg = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
12149
12150 /* kernel stack layout on 31 bit: It is assumed here that no padding
12151 will be added by s390_frame_info because for va_args always an even
12152 number of gprs has to be saved r15-r2 = 14 regs. */
12153 sav_ofs = 2 * UNITS_PER_LONG;
12154
12155 if (size < UNITS_PER_LONG)
12156 sav_ofs += UNITS_PER_LONG - size;
12157
12158 sav_scale = UNITS_PER_LONG;
12159 max_reg = GP_ARG_NUM_REG - n_reg;
12160 left_align_p = false;
12161 }
12162
12163 /* Pull the value out of the saved registers ... */
12164
12165 if (reg != NULL_TREE)
12166 {
12167 /*
12168 if (reg > ((typeof (reg))max_reg))
12169 goto lab_false;
12170
12171 addr = sav + sav_ofs + reg * save_scale;
12172
12173 goto lab_over;
12174
12175 lab_false:
12176 */
12177
12178 lab_false = create_artificial_label (UNKNOWN_LOCATION);
12179 lab_over = create_artificial_label (UNKNOWN_LOCATION);
12180
12181 t = fold_convert (TREE_TYPE (reg), size_int (max_reg));
12182 t = build2 (GT_EXPR, boolean_type_node, reg, t);
12183 u = build1 (GOTO_EXPR, void_type_node, lab_false);
12184 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
12185 gimplify_and_add (t, pre_p);
12186
12187 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
12188 u = build2 (MULT_EXPR, TREE_TYPE (reg), reg,
12189 fold_convert (TREE_TYPE (reg), size_int (sav_scale)));
12190 t = fold_build_pointer_plus (t, u);
12191
12192 gimplify_assign (addr, t, pre_p);
12193
12194 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
12195
12196 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
12197 }
12198
12199 /* ... Otherwise out of the overflow area. */
12200
12201 t = ovf;
12202 if (size < UNITS_PER_LONG && !left_align_p)
12203 t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG - size);
12204
12205 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
12206
12207 gimplify_assign (addr, t, pre_p);
12208
12209 if (size < UNITS_PER_LONG && left_align_p)
12210 t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG);
12211 else
12212 t = fold_build_pointer_plus_hwi (t, size);
12213
12214 gimplify_assign (ovf, t, pre_p);
12215
12216 if (reg != NULL_TREE)
12217 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
12218
12219
12220 /* Increment register save count. */
12221
12222 if (n_reg > 0)
12223 {
12224 u = build2 (PREINCREMENT_EXPR, TREE_TYPE (reg), reg,
12225 fold_convert (TREE_TYPE (reg), size_int (n_reg)));
12226 gimplify_and_add (u, pre_p);
12227 }
12228
12229 if (indirect_p)
12230 {
12231 t = build_pointer_type_for_mode (build_pointer_type (type),
12232 ptr_mode, true);
12233 addr = fold_convert (t, addr);
12234 addr = build_va_arg_indirect_ref (addr);
12235 }
12236 else
12237 {
12238 t = build_pointer_type_for_mode (type, ptr_mode, true);
12239 addr = fold_convert (t, addr);
12240 }
12241
12242 return build_va_arg_indirect_ref (addr);
12243 }
12244
12245 /* Emit rtl for the tbegin or tbegin_retry (RETRY != NULL_RTX)
12246 expanders.
12247 DEST - Register location where CC will be stored.
12248 TDB - Pointer to a 256 byte area where to store the transaction.
12249 diagnostic block. NULL if TDB is not needed.
12250 RETRY - Retry count value. If non-NULL a retry loop for CC2
12251 is emitted
12252 CLOBBER_FPRS_P - If true clobbers for all FPRs are emitted as part
12253 of the tbegin instruction pattern. */
12254
12255 void
12256 s390_expand_tbegin (rtx dest, rtx tdb, rtx retry, bool clobber_fprs_p)
12257 {
12258 rtx retry_plus_two = gen_reg_rtx (SImode);
12259 rtx retry_reg = gen_reg_rtx (SImode);
12260 rtx_code_label *retry_label = NULL;
12261
12262 if (retry != NULL_RTX)
12263 {
12264 emit_move_insn (retry_reg, retry);
12265 emit_insn (gen_addsi3 (retry_plus_two, retry_reg, const2_rtx));
12266 emit_insn (gen_addsi3 (retry_reg, retry_reg, const1_rtx));
12267 retry_label = gen_label_rtx ();
12268 emit_label (retry_label);
12269 }
12270
12271 if (clobber_fprs_p)
12272 {
12273 if (TARGET_VX)
12274 emit_insn (gen_tbegin_1_z13 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
12275 tdb));
12276 else
12277 emit_insn (gen_tbegin_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
12278 tdb));
12279 }
12280 else
12281 emit_insn (gen_tbegin_nofloat_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
12282 tdb));
12283
12284 emit_move_insn (dest, gen_rtx_UNSPEC (SImode,
12285 gen_rtvec (1, gen_rtx_REG (CCRAWmode,
12286 CC_REGNUM)),
12287 UNSPEC_CC_TO_INT));
12288 if (retry != NULL_RTX)
12289 {
12290 const int CC0 = 1 << 3;
12291 const int CC1 = 1 << 2;
12292 const int CC3 = 1 << 0;
12293 rtx jump;
12294 rtx count = gen_reg_rtx (SImode);
12295 rtx_code_label *leave_label = gen_label_rtx ();
12296
12297 /* Exit for success and permanent failures. */
12298 jump = s390_emit_jump (leave_label,
12299 gen_rtx_EQ (VOIDmode,
12300 gen_rtx_REG (CCRAWmode, CC_REGNUM),
12301 gen_rtx_CONST_INT (VOIDmode, CC0 | CC1 | CC3)));
12302 LABEL_NUSES (leave_label) = 1;
12303
12304 /* CC2 - transient failure. Perform retry with ppa. */
12305 emit_move_insn (count, retry_plus_two);
12306 emit_insn (gen_subsi3 (count, count, retry_reg));
12307 emit_insn (gen_tx_assist (count));
12308 jump = emit_jump_insn (gen_doloop_si64 (retry_label,
12309 retry_reg,
12310 retry_reg));
12311 JUMP_LABEL (jump) = retry_label;
12312 LABEL_NUSES (retry_label) = 1;
12313 emit_label (leave_label);
12314 }
12315 }
12316
12317
12318 /* Return the decl for the target specific builtin with the function
12319 code FCODE. */
12320
12321 static tree
12322 s390_builtin_decl (unsigned fcode, bool initialized_p ATTRIBUTE_UNUSED)
12323 {
12324 if (fcode >= S390_BUILTIN_MAX)
12325 return error_mark_node;
12326
12327 return s390_builtin_decls[fcode];
12328 }
12329
12330 /* We call mcount before the function prologue. So a profiled leaf
12331 function should stay a leaf function. */
12332
12333 static bool
12334 s390_keep_leaf_when_profiled ()
12335 {
12336 return true;
12337 }
12338
12339 /* Output assembly code for the trampoline template to
12340 stdio stream FILE.
12341
12342 On S/390, we use gpr 1 internally in the trampoline code;
12343 gpr 0 is used to hold the static chain. */
12344
12345 static void
12346 s390_asm_trampoline_template (FILE *file)
12347 {
12348 rtx op[2];
12349 op[0] = gen_rtx_REG (Pmode, 0);
12350 op[1] = gen_rtx_REG (Pmode, 1);
12351
12352 if (TARGET_64BIT)
12353 {
12354 output_asm_insn ("basr\t%1,0", op); /* 2 byte */
12355 output_asm_insn ("lmg\t%0,%1,14(%1)", op); /* 6 byte */
12356 output_asm_insn ("br\t%1", op); /* 2 byte */
12357 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 10));
12358 }
12359 else
12360 {
12361 output_asm_insn ("basr\t%1,0", op); /* 2 byte */
12362 output_asm_insn ("lm\t%0,%1,6(%1)", op); /* 4 byte */
12363 output_asm_insn ("br\t%1", op); /* 2 byte */
12364 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 8));
12365 }
12366 }
12367
12368 /* Emit RTL insns to initialize the variable parts of a trampoline.
12369 FNADDR is an RTX for the address of the function's pure code.
12370 CXT is an RTX for the static chain value for the function. */
12371
12372 static void
12373 s390_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
12374 {
12375 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
12376 rtx mem;
12377
12378 emit_block_move (m_tramp, assemble_trampoline_template (),
12379 GEN_INT (2 * UNITS_PER_LONG), BLOCK_OP_NORMAL);
12380
12381 mem = adjust_address (m_tramp, Pmode, 2 * UNITS_PER_LONG);
12382 emit_move_insn (mem, cxt);
12383 mem = adjust_address (m_tramp, Pmode, 3 * UNITS_PER_LONG);
12384 emit_move_insn (mem, fnaddr);
12385 }
12386
12387 /* Output assembler code to FILE to increment profiler label # LABELNO
12388 for profiling a function entry. */
12389
12390 void
12391 s390_function_profiler (FILE *file, int labelno)
12392 {
12393 rtx op[7];
12394
12395 char label[128];
12396 ASM_GENERATE_INTERNAL_LABEL (label, "LP", labelno);
12397
12398 fprintf (file, "# function profiler \n");
12399
12400 op[0] = gen_rtx_REG (Pmode, RETURN_REGNUM);
12401 op[1] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
12402 op[1] = gen_rtx_MEM (Pmode, plus_constant (Pmode, op[1], UNITS_PER_LONG));
12403
12404 op[2] = gen_rtx_REG (Pmode, 1);
12405 op[3] = gen_rtx_SYMBOL_REF (Pmode, label);
12406 SYMBOL_REF_FLAGS (op[3]) = SYMBOL_FLAG_LOCAL;
12407
12408 op[4] = gen_rtx_SYMBOL_REF (Pmode, "_mcount");
12409 if (flag_pic)
12410 {
12411 op[4] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[4]), UNSPEC_PLT);
12412 op[4] = gen_rtx_CONST (Pmode, op[4]);
12413 }
12414
12415 if (TARGET_64BIT)
12416 {
12417 output_asm_insn ("stg\t%0,%1", op);
12418 output_asm_insn ("larl\t%2,%3", op);
12419 output_asm_insn ("brasl\t%0,%4", op);
12420 output_asm_insn ("lg\t%0,%1", op);
12421 }
12422 else if (TARGET_CPU_ZARCH)
12423 {
12424 output_asm_insn ("st\t%0,%1", op);
12425 output_asm_insn ("larl\t%2,%3", op);
12426 output_asm_insn ("brasl\t%0,%4", op);
12427 output_asm_insn ("l\t%0,%1", op);
12428 }
12429 else if (!flag_pic)
12430 {
12431 op[6] = gen_label_rtx ();
12432
12433 output_asm_insn ("st\t%0,%1", op);
12434 output_asm_insn ("bras\t%2,%l6", op);
12435 output_asm_insn (".long\t%4", op);
12436 output_asm_insn (".long\t%3", op);
12437 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[6]));
12438 output_asm_insn ("l\t%0,0(%2)", op);
12439 output_asm_insn ("l\t%2,4(%2)", op);
12440 output_asm_insn ("basr\t%0,%0", op);
12441 output_asm_insn ("l\t%0,%1", op);
12442 }
12443 else
12444 {
12445 op[5] = gen_label_rtx ();
12446 op[6] = gen_label_rtx ();
12447
12448 output_asm_insn ("st\t%0,%1", op);
12449 output_asm_insn ("bras\t%2,%l6", op);
12450 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[5]));
12451 output_asm_insn (".long\t%4-%l5", op);
12452 output_asm_insn (".long\t%3-%l5", op);
12453 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[6]));
12454 output_asm_insn ("lr\t%0,%2", op);
12455 output_asm_insn ("a\t%0,0(%2)", op);
12456 output_asm_insn ("a\t%2,4(%2)", op);
12457 output_asm_insn ("basr\t%0,%0", op);
12458 output_asm_insn ("l\t%0,%1", op);
12459 }
12460 }
12461
12462 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
12463 into its SYMBOL_REF_FLAGS. */
12464
12465 static void
12466 s390_encode_section_info (tree decl, rtx rtl, int first)
12467 {
12468 default_encode_section_info (decl, rtl, first);
12469
12470 if (TREE_CODE (decl) == VAR_DECL)
12471 {
12472 /* Store the alignment to be able to check if we can use
12473 a larl/load-relative instruction. We only handle the cases
12474 that can go wrong (i.e. no FUNC_DECLs). */
12475 if (DECL_ALIGN (decl) == 0 || DECL_ALIGN (decl) % 16)
12476 SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl, 0));
12477 else if (DECL_ALIGN (decl) % 32)
12478 SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl, 0));
12479 else if (DECL_ALIGN (decl) % 64)
12480 SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl, 0));
12481 }
12482
12483 /* Literal pool references don't have a decl so they are handled
12484 differently here. We rely on the information in the MEM_ALIGN
12485 entry to decide upon the alignment. */
12486 if (MEM_P (rtl)
12487 && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF
12488 && TREE_CONSTANT_POOL_ADDRESS_P (XEXP (rtl, 0)))
12489 {
12490 if (MEM_ALIGN (rtl) == 0 || MEM_ALIGN (rtl) % 16)
12491 SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl, 0));
12492 else if (MEM_ALIGN (rtl) % 32)
12493 SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl, 0));
12494 else if (MEM_ALIGN (rtl) % 64)
12495 SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl, 0));
12496 }
12497 }
12498
12499 /* Output thunk to FILE that implements a C++ virtual function call (with
12500 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
12501 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
12502 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
12503 relative to the resulting this pointer. */
12504
12505 static void
12506 s390_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
12507 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
12508 tree function)
12509 {
12510 rtx op[10];
12511 int nonlocal = 0;
12512
12513 /* Make sure unwind info is emitted for the thunk if needed. */
12514 final_start_function (emit_barrier (), file, 1);
12515
12516 /* Operand 0 is the target function. */
12517 op[0] = XEXP (DECL_RTL (function), 0);
12518 if (flag_pic && !SYMBOL_REF_LOCAL_P (op[0]))
12519 {
12520 nonlocal = 1;
12521 op[0] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[0]),
12522 TARGET_64BIT ? UNSPEC_PLT : UNSPEC_GOT);
12523 op[0] = gen_rtx_CONST (Pmode, op[0]);
12524 }
12525
12526 /* Operand 1 is the 'this' pointer. */
12527 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
12528 op[1] = gen_rtx_REG (Pmode, 3);
12529 else
12530 op[1] = gen_rtx_REG (Pmode, 2);
12531
12532 /* Operand 2 is the delta. */
12533 op[2] = GEN_INT (delta);
12534
12535 /* Operand 3 is the vcall_offset. */
12536 op[3] = GEN_INT (vcall_offset);
12537
12538 /* Operand 4 is the temporary register. */
12539 op[4] = gen_rtx_REG (Pmode, 1);
12540
12541 /* Operands 5 to 8 can be used as labels. */
12542 op[5] = NULL_RTX;
12543 op[6] = NULL_RTX;
12544 op[7] = NULL_RTX;
12545 op[8] = NULL_RTX;
12546
12547 /* Operand 9 can be used for temporary register. */
12548 op[9] = NULL_RTX;
12549
12550 /* Generate code. */
12551 if (TARGET_64BIT)
12552 {
12553 /* Setup literal pool pointer if required. */
12554 if ((!DISP_IN_RANGE (delta)
12555 && !CONST_OK_FOR_K (delta)
12556 && !CONST_OK_FOR_Os (delta))
12557 || (!DISP_IN_RANGE (vcall_offset)
12558 && !CONST_OK_FOR_K (vcall_offset)
12559 && !CONST_OK_FOR_Os (vcall_offset)))
12560 {
12561 op[5] = gen_label_rtx ();
12562 output_asm_insn ("larl\t%4,%5", op);
12563 }
12564
12565 /* Add DELTA to this pointer. */
12566 if (delta)
12567 {
12568 if (CONST_OK_FOR_J (delta))
12569 output_asm_insn ("la\t%1,%2(%1)", op);
12570 else if (DISP_IN_RANGE (delta))
12571 output_asm_insn ("lay\t%1,%2(%1)", op);
12572 else if (CONST_OK_FOR_K (delta))
12573 output_asm_insn ("aghi\t%1,%2", op);
12574 else if (CONST_OK_FOR_Os (delta))
12575 output_asm_insn ("agfi\t%1,%2", op);
12576 else
12577 {
12578 op[6] = gen_label_rtx ();
12579 output_asm_insn ("agf\t%1,%6-%5(%4)", op);
12580 }
12581 }
12582
12583 /* Perform vcall adjustment. */
12584 if (vcall_offset)
12585 {
12586 if (DISP_IN_RANGE (vcall_offset))
12587 {
12588 output_asm_insn ("lg\t%4,0(%1)", op);
12589 output_asm_insn ("ag\t%1,%3(%4)", op);
12590 }
12591 else if (CONST_OK_FOR_K (vcall_offset))
12592 {
12593 output_asm_insn ("lghi\t%4,%3", op);
12594 output_asm_insn ("ag\t%4,0(%1)", op);
12595 output_asm_insn ("ag\t%1,0(%4)", op);
12596 }
12597 else if (CONST_OK_FOR_Os (vcall_offset))
12598 {
12599 output_asm_insn ("lgfi\t%4,%3", op);
12600 output_asm_insn ("ag\t%4,0(%1)", op);
12601 output_asm_insn ("ag\t%1,0(%4)", op);
12602 }
12603 else
12604 {
12605 op[7] = gen_label_rtx ();
12606 output_asm_insn ("llgf\t%4,%7-%5(%4)", op);
12607 output_asm_insn ("ag\t%4,0(%1)", op);
12608 output_asm_insn ("ag\t%1,0(%4)", op);
12609 }
12610 }
12611
12612 /* Jump to target. */
12613 output_asm_insn ("jg\t%0", op);
12614
12615 /* Output literal pool if required. */
12616 if (op[5])
12617 {
12618 output_asm_insn (".align\t4", op);
12619 targetm.asm_out.internal_label (file, "L",
12620 CODE_LABEL_NUMBER (op[5]));
12621 }
12622 if (op[6])
12623 {
12624 targetm.asm_out.internal_label (file, "L",
12625 CODE_LABEL_NUMBER (op[6]));
12626 output_asm_insn (".long\t%2", op);
12627 }
12628 if (op[7])
12629 {
12630 targetm.asm_out.internal_label (file, "L",
12631 CODE_LABEL_NUMBER (op[7]));
12632 output_asm_insn (".long\t%3", op);
12633 }
12634 }
12635 else
12636 {
12637 /* Setup base pointer if required. */
12638 if (!vcall_offset
12639 || (!DISP_IN_RANGE (delta)
12640 && !CONST_OK_FOR_K (delta)
12641 && !CONST_OK_FOR_Os (delta))
12642 || (!DISP_IN_RANGE (delta)
12643 && !CONST_OK_FOR_K (vcall_offset)
12644 && !CONST_OK_FOR_Os (vcall_offset)))
12645 {
12646 op[5] = gen_label_rtx ();
12647 output_asm_insn ("basr\t%4,0", op);
12648 targetm.asm_out.internal_label (file, "L",
12649 CODE_LABEL_NUMBER (op[5]));
12650 }
12651
12652 /* Add DELTA to this pointer. */
12653 if (delta)
12654 {
12655 if (CONST_OK_FOR_J (delta))
12656 output_asm_insn ("la\t%1,%2(%1)", op);
12657 else if (DISP_IN_RANGE (delta))
12658 output_asm_insn ("lay\t%1,%2(%1)", op);
12659 else if (CONST_OK_FOR_K (delta))
12660 output_asm_insn ("ahi\t%1,%2", op);
12661 else if (CONST_OK_FOR_Os (delta))
12662 output_asm_insn ("afi\t%1,%2", op);
12663 else
12664 {
12665 op[6] = gen_label_rtx ();
12666 output_asm_insn ("a\t%1,%6-%5(%4)", op);
12667 }
12668 }
12669
12670 /* Perform vcall adjustment. */
12671 if (vcall_offset)
12672 {
12673 if (CONST_OK_FOR_J (vcall_offset))
12674 {
12675 output_asm_insn ("l\t%4,0(%1)", op);
12676 output_asm_insn ("a\t%1,%3(%4)", op);
12677 }
12678 else if (DISP_IN_RANGE (vcall_offset))
12679 {
12680 output_asm_insn ("l\t%4,0(%1)", op);
12681 output_asm_insn ("ay\t%1,%3(%4)", op);
12682 }
12683 else if (CONST_OK_FOR_K (vcall_offset))
12684 {
12685 output_asm_insn ("lhi\t%4,%3", op);
12686 output_asm_insn ("a\t%4,0(%1)", op);
12687 output_asm_insn ("a\t%1,0(%4)", op);
12688 }
12689 else if (CONST_OK_FOR_Os (vcall_offset))
12690 {
12691 output_asm_insn ("iilf\t%4,%3", op);
12692 output_asm_insn ("a\t%4,0(%1)", op);
12693 output_asm_insn ("a\t%1,0(%4)", op);
12694 }
12695 else
12696 {
12697 op[7] = gen_label_rtx ();
12698 output_asm_insn ("l\t%4,%7-%5(%4)", op);
12699 output_asm_insn ("a\t%4,0(%1)", op);
12700 output_asm_insn ("a\t%1,0(%4)", op);
12701 }
12702
12703 /* We had to clobber the base pointer register.
12704 Re-setup the base pointer (with a different base). */
12705 op[5] = gen_label_rtx ();
12706 output_asm_insn ("basr\t%4,0", op);
12707 targetm.asm_out.internal_label (file, "L",
12708 CODE_LABEL_NUMBER (op[5]));
12709 }
12710
12711 /* Jump to target. */
12712 op[8] = gen_label_rtx ();
12713
12714 if (!flag_pic)
12715 output_asm_insn ("l\t%4,%8-%5(%4)", op);
12716 else if (!nonlocal)
12717 output_asm_insn ("a\t%4,%8-%5(%4)", op);
12718 /* We cannot call through .plt, since .plt requires %r12 loaded. */
12719 else if (flag_pic == 1)
12720 {
12721 output_asm_insn ("a\t%4,%8-%5(%4)", op);
12722 output_asm_insn ("l\t%4,%0(%4)", op);
12723 }
12724 else if (flag_pic == 2)
12725 {
12726 op[9] = gen_rtx_REG (Pmode, 0);
12727 output_asm_insn ("l\t%9,%8-4-%5(%4)", op);
12728 output_asm_insn ("a\t%4,%8-%5(%4)", op);
12729 output_asm_insn ("ar\t%4,%9", op);
12730 output_asm_insn ("l\t%4,0(%4)", op);
12731 }
12732
12733 output_asm_insn ("br\t%4", op);
12734
12735 /* Output literal pool. */
12736 output_asm_insn (".align\t4", op);
12737
12738 if (nonlocal && flag_pic == 2)
12739 output_asm_insn (".long\t%0", op);
12740 if (nonlocal)
12741 {
12742 op[0] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
12743 SYMBOL_REF_FLAGS (op[0]) = SYMBOL_FLAG_LOCAL;
12744 }
12745
12746 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[8]));
12747 if (!flag_pic)
12748 output_asm_insn (".long\t%0", op);
12749 else
12750 output_asm_insn (".long\t%0-%5", op);
12751
12752 if (op[6])
12753 {
12754 targetm.asm_out.internal_label (file, "L",
12755 CODE_LABEL_NUMBER (op[6]));
12756 output_asm_insn (".long\t%2", op);
12757 }
12758 if (op[7])
12759 {
12760 targetm.asm_out.internal_label (file, "L",
12761 CODE_LABEL_NUMBER (op[7]));
12762 output_asm_insn (".long\t%3", op);
12763 }
12764 }
12765 final_end_function ();
12766 }
12767
12768 static bool
12769 s390_valid_pointer_mode (machine_mode mode)
12770 {
12771 return (mode == SImode || (TARGET_64BIT && mode == DImode));
12772 }
12773
12774 /* Checks whether the given CALL_EXPR would use a caller
12775 saved register. This is used to decide whether sibling call
12776 optimization could be performed on the respective function
12777 call. */
12778
12779 static bool
12780 s390_call_saved_register_used (tree call_expr)
12781 {
12782 CUMULATIVE_ARGS cum_v;
12783 cumulative_args_t cum;
12784 tree parameter;
12785 machine_mode mode;
12786 tree type;
12787 rtx parm_rtx;
12788 int reg, i;
12789
12790 INIT_CUMULATIVE_ARGS (cum_v, NULL, NULL, 0, 0);
12791 cum = pack_cumulative_args (&cum_v);
12792
12793 for (i = 0; i < call_expr_nargs (call_expr); i++)
12794 {
12795 parameter = CALL_EXPR_ARG (call_expr, i);
12796 gcc_assert (parameter);
12797
12798 /* For an undeclared variable passed as parameter we will get
12799 an ERROR_MARK node here. */
12800 if (TREE_CODE (parameter) == ERROR_MARK)
12801 return true;
12802
12803 type = TREE_TYPE (parameter);
12804 gcc_assert (type);
12805
12806 mode = TYPE_MODE (type);
12807 gcc_assert (mode);
12808
12809 /* We assume that in the target function all parameters are
12810 named. This only has an impact on vector argument register
12811 usage none of which is call-saved. */
12812 if (pass_by_reference (&cum_v, mode, type, true))
12813 {
12814 mode = Pmode;
12815 type = build_pointer_type (type);
12816 }
12817
12818 parm_rtx = s390_function_arg (cum, mode, type, true);
12819
12820 s390_function_arg_advance (cum, mode, type, true);
12821
12822 if (!parm_rtx)
12823 continue;
12824
12825 if (REG_P (parm_rtx))
12826 {
12827 for (reg = 0;
12828 reg < HARD_REGNO_NREGS (REGNO (parm_rtx), GET_MODE (parm_rtx));
12829 reg++)
12830 if (!call_used_regs[reg + REGNO (parm_rtx)])
12831 return true;
12832 }
12833
12834 if (GET_CODE (parm_rtx) == PARALLEL)
12835 {
12836 int i;
12837
12838 for (i = 0; i < XVECLEN (parm_rtx, 0); i++)
12839 {
12840 rtx r = XEXP (XVECEXP (parm_rtx, 0, i), 0);
12841
12842 gcc_assert (REG_P (r));
12843
12844 for (reg = 0;
12845 reg < HARD_REGNO_NREGS (REGNO (r), GET_MODE (r));
12846 reg++)
12847 if (!call_used_regs[reg + REGNO (r)])
12848 return true;
12849 }
12850 }
12851
12852 }
12853 return false;
12854 }
12855
12856 /* Return true if the given call expression can be
12857 turned into a sibling call.
12858 DECL holds the declaration of the function to be called whereas
12859 EXP is the call expression itself. */
12860
12861 static bool
12862 s390_function_ok_for_sibcall (tree decl, tree exp)
12863 {
12864 /* The TPF epilogue uses register 1. */
12865 if (TARGET_TPF_PROFILING)
12866 return false;
12867
12868 /* The 31 bit PLT code uses register 12 (GOT pointer - caller saved)
12869 which would have to be restored before the sibcall. */
12870 if (!TARGET_64BIT && flag_pic && decl && !targetm.binds_local_p (decl))
12871 return false;
12872
12873 /* Register 6 on s390 is available as an argument register but unfortunately
12874 "caller saved". This makes functions needing this register for arguments
12875 not suitable for sibcalls. */
12876 return !s390_call_saved_register_used (exp);
12877 }
12878
12879 /* Return the fixed registers used for condition codes. */
12880
12881 static bool
12882 s390_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
12883 {
12884 *p1 = CC_REGNUM;
12885 *p2 = INVALID_REGNUM;
12886
12887 return true;
12888 }
12889
12890 /* This function is used by the call expanders of the machine description.
12891 It emits the call insn itself together with the necessary operations
12892 to adjust the target address and returns the emitted insn.
12893 ADDR_LOCATION is the target address rtx
12894 TLS_CALL the location of the thread-local symbol
12895 RESULT_REG the register where the result of the call should be stored
12896 RETADDR_REG the register where the return address should be stored
12897 If this parameter is NULL_RTX the call is considered
12898 to be a sibling call. */
12899
12900 rtx_insn *
12901 s390_emit_call (rtx addr_location, rtx tls_call, rtx result_reg,
12902 rtx retaddr_reg)
12903 {
12904 bool plt_call = false;
12905 rtx_insn *insn;
12906 rtx call;
12907 rtx clobber;
12908 rtvec vec;
12909
12910 /* Direct function calls need special treatment. */
12911 if (GET_CODE (addr_location) == SYMBOL_REF)
12912 {
12913 /* When calling a global routine in PIC mode, we must
12914 replace the symbol itself with the PLT stub. */
12915 if (flag_pic && !SYMBOL_REF_LOCAL_P (addr_location))
12916 {
12917 if (TARGET_64BIT || retaddr_reg != NULL_RTX)
12918 {
12919 addr_location = gen_rtx_UNSPEC (Pmode,
12920 gen_rtvec (1, addr_location),
12921 UNSPEC_PLT);
12922 addr_location = gen_rtx_CONST (Pmode, addr_location);
12923 plt_call = true;
12924 }
12925 else
12926 /* For -fpic code the PLT entries might use r12 which is
12927 call-saved. Therefore we cannot do a sibcall when
12928 calling directly using a symbol ref. When reaching
12929 this point we decided (in s390_function_ok_for_sibcall)
12930 to do a sibcall for a function pointer but one of the
12931 optimizers was able to get rid of the function pointer
12932 by propagating the symbol ref into the call. This
12933 optimization is illegal for S/390 so we turn the direct
12934 call into a indirect call again. */
12935 addr_location = force_reg (Pmode, addr_location);
12936 }
12937
12938 /* Unless we can use the bras(l) insn, force the
12939 routine address into a register. */
12940 if (!TARGET_SMALL_EXEC && !TARGET_CPU_ZARCH)
12941 {
12942 if (flag_pic)
12943 addr_location = legitimize_pic_address (addr_location, 0);
12944 else
12945 addr_location = force_reg (Pmode, addr_location);
12946 }
12947 }
12948
12949 /* If it is already an indirect call or the code above moved the
12950 SYMBOL_REF to somewhere else make sure the address can be found in
12951 register 1. */
12952 if (retaddr_reg == NULL_RTX
12953 && GET_CODE (addr_location) != SYMBOL_REF
12954 && !plt_call)
12955 {
12956 emit_move_insn (gen_rtx_REG (Pmode, SIBCALL_REGNUM), addr_location);
12957 addr_location = gen_rtx_REG (Pmode, SIBCALL_REGNUM);
12958 }
12959
12960 addr_location = gen_rtx_MEM (QImode, addr_location);
12961 call = gen_rtx_CALL (VOIDmode, addr_location, const0_rtx);
12962
12963 if (result_reg != NULL_RTX)
12964 call = gen_rtx_SET (result_reg, call);
12965
12966 if (retaddr_reg != NULL_RTX)
12967 {
12968 clobber = gen_rtx_CLOBBER (VOIDmode, retaddr_reg);
12969
12970 if (tls_call != NULL_RTX)
12971 vec = gen_rtvec (3, call, clobber,
12972 gen_rtx_USE (VOIDmode, tls_call));
12973 else
12974 vec = gen_rtvec (2, call, clobber);
12975
12976 call = gen_rtx_PARALLEL (VOIDmode, vec);
12977 }
12978
12979 insn = emit_call_insn (call);
12980
12981 /* 31-bit PLT stubs and tls calls use the GOT register implicitly. */
12982 if ((!TARGET_64BIT && plt_call) || tls_call != NULL_RTX)
12983 {
12984 /* s390_function_ok_for_sibcall should
12985 have denied sibcalls in this case. */
12986 gcc_assert (retaddr_reg != NULL_RTX);
12987 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, 12));
12988 }
12989 return insn;
12990 }
12991
12992 /* Implement TARGET_CONDITIONAL_REGISTER_USAGE. */
12993
12994 static void
12995 s390_conditional_register_usage (void)
12996 {
12997 int i;
12998
12999 if (flag_pic)
13000 {
13001 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13002 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13003 }
13004 if (TARGET_CPU_ZARCH)
13005 {
13006 fixed_regs[BASE_REGNUM] = 0;
13007 call_used_regs[BASE_REGNUM] = 0;
13008 fixed_regs[RETURN_REGNUM] = 0;
13009 call_used_regs[RETURN_REGNUM] = 0;
13010 }
13011 if (TARGET_64BIT)
13012 {
13013 for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
13014 call_used_regs[i] = call_really_used_regs[i] = 0;
13015 }
13016 else
13017 {
13018 call_used_regs[FPR4_REGNUM] = call_really_used_regs[FPR4_REGNUM] = 0;
13019 call_used_regs[FPR6_REGNUM] = call_really_used_regs[FPR6_REGNUM] = 0;
13020 }
13021
13022 if (TARGET_SOFT_FLOAT)
13023 {
13024 for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
13025 call_used_regs[i] = fixed_regs[i] = 1;
13026 }
13027
13028 /* Disable v16 - v31 for non-vector target. */
13029 if (!TARGET_VX)
13030 {
13031 for (i = VR16_REGNUM; i <= VR31_REGNUM; i++)
13032 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
13033 }
13034 }
13035
13036 /* Corresponding function to eh_return expander. */
13037
13038 static GTY(()) rtx s390_tpf_eh_return_symbol;
13039 void
13040 s390_emit_tpf_eh_return (rtx target)
13041 {
13042 rtx_insn *insn;
13043 rtx reg, orig_ra;
13044
13045 if (!s390_tpf_eh_return_symbol)
13046 s390_tpf_eh_return_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tpf_eh_return");
13047
13048 reg = gen_rtx_REG (Pmode, 2);
13049 orig_ra = gen_rtx_REG (Pmode, 3);
13050
13051 emit_move_insn (reg, target);
13052 emit_move_insn (orig_ra, get_hard_reg_initial_val (Pmode, RETURN_REGNUM));
13053 insn = s390_emit_call (s390_tpf_eh_return_symbol, NULL_RTX, reg,
13054 gen_rtx_REG (Pmode, RETURN_REGNUM));
13055 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), reg);
13056 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), orig_ra);
13057
13058 emit_move_insn (EH_RETURN_HANDLER_RTX, reg);
13059 }
13060
13061 /* Rework the prologue/epilogue to avoid saving/restoring
13062 registers unnecessarily. */
13063
13064 static void
13065 s390_optimize_prologue (void)
13066 {
13067 rtx_insn *insn, *new_insn, *next_insn;
13068
13069 /* Do a final recompute of the frame-related data. */
13070 s390_optimize_register_info ();
13071
13072 /* If all special registers are in fact used, there's nothing we
13073 can do, so no point in walking the insn list. */
13074
13075 if (cfun_frame_layout.first_save_gpr <= BASE_REGNUM
13076 && cfun_frame_layout.last_save_gpr >= BASE_REGNUM
13077 && (TARGET_CPU_ZARCH
13078 || (cfun_frame_layout.first_save_gpr <= RETURN_REGNUM
13079 && cfun_frame_layout.last_save_gpr >= RETURN_REGNUM)))
13080 return;
13081
13082 /* Search for prologue/epilogue insns and replace them. */
13083
13084 for (insn = get_insns (); insn; insn = next_insn)
13085 {
13086 int first, last, off;
13087 rtx set, base, offset;
13088 rtx pat;
13089
13090 next_insn = NEXT_INSN (insn);
13091
13092 if (! NONJUMP_INSN_P (insn) || ! RTX_FRAME_RELATED_P (insn))
13093 continue;
13094
13095 pat = PATTERN (insn);
13096
13097 /* Remove ldgr/lgdr instructions used for saving and restore
13098 GPRs if possible. */
13099 if (TARGET_Z10)
13100 {
13101 rtx tmp_pat = pat;
13102
13103 if (INSN_CODE (insn) == CODE_FOR_stack_restore_from_fpr)
13104 tmp_pat = XVECEXP (pat, 0, 0);
13105
13106 if (GET_CODE (tmp_pat) == SET
13107 && GET_MODE (SET_SRC (tmp_pat)) == DImode
13108 && REG_P (SET_SRC (tmp_pat))
13109 && REG_P (SET_DEST (tmp_pat)))
13110 {
13111 int src_regno = REGNO (SET_SRC (tmp_pat));
13112 int dest_regno = REGNO (SET_DEST (tmp_pat));
13113 int gpr_regno;
13114 int fpr_regno;
13115
13116 if (!((GENERAL_REGNO_P (src_regno)
13117 && FP_REGNO_P (dest_regno))
13118 || (FP_REGNO_P (src_regno)
13119 && GENERAL_REGNO_P (dest_regno))))
13120 continue;
13121
13122 gpr_regno = GENERAL_REGNO_P (src_regno) ? src_regno : dest_regno;
13123 fpr_regno = FP_REGNO_P (src_regno) ? src_regno : dest_regno;
13124
13125 /* GPR must be call-saved, FPR must be call-clobbered. */
13126 if (!call_really_used_regs[fpr_regno]
13127 || call_really_used_regs[gpr_regno])
13128 continue;
13129
13130 /* It must not happen that what we once saved in an FPR now
13131 needs a stack slot. */
13132 gcc_assert (cfun_gpr_save_slot (gpr_regno) != SAVE_SLOT_STACK);
13133
13134 if (cfun_gpr_save_slot (gpr_regno) == SAVE_SLOT_NONE)
13135 {
13136 remove_insn (insn);
13137 continue;
13138 }
13139 }
13140 }
13141
13142 if (GET_CODE (pat) == PARALLEL
13143 && store_multiple_operation (pat, VOIDmode))
13144 {
13145 set = XVECEXP (pat, 0, 0);
13146 first = REGNO (SET_SRC (set));
13147 last = first + XVECLEN (pat, 0) - 1;
13148 offset = const0_rtx;
13149 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
13150 off = INTVAL (offset);
13151
13152 if (GET_CODE (base) != REG || off < 0)
13153 continue;
13154 if (cfun_frame_layout.first_save_gpr != -1
13155 && (cfun_frame_layout.first_save_gpr < first
13156 || cfun_frame_layout.last_save_gpr > last))
13157 continue;
13158 if (REGNO (base) != STACK_POINTER_REGNUM
13159 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13160 continue;
13161 if (first > BASE_REGNUM || last < BASE_REGNUM)
13162 continue;
13163
13164 if (cfun_frame_layout.first_save_gpr != -1)
13165 {
13166 rtx s_pat = save_gprs (base,
13167 off + (cfun_frame_layout.first_save_gpr
13168 - first) * UNITS_PER_LONG,
13169 cfun_frame_layout.first_save_gpr,
13170 cfun_frame_layout.last_save_gpr);
13171 new_insn = emit_insn_before (s_pat, insn);
13172 INSN_ADDRESSES_NEW (new_insn, -1);
13173 }
13174
13175 remove_insn (insn);
13176 continue;
13177 }
13178
13179 if (cfun_frame_layout.first_save_gpr == -1
13180 && GET_CODE (pat) == SET
13181 && GENERAL_REG_P (SET_SRC (pat))
13182 && GET_CODE (SET_DEST (pat)) == MEM)
13183 {
13184 set = pat;
13185 first = REGNO (SET_SRC (set));
13186 offset = const0_rtx;
13187 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
13188 off = INTVAL (offset);
13189
13190 if (GET_CODE (base) != REG || off < 0)
13191 continue;
13192 if (REGNO (base) != STACK_POINTER_REGNUM
13193 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13194 continue;
13195
13196 remove_insn (insn);
13197 continue;
13198 }
13199
13200 if (GET_CODE (pat) == PARALLEL
13201 && load_multiple_operation (pat, VOIDmode))
13202 {
13203 set = XVECEXP (pat, 0, 0);
13204 first = REGNO (SET_DEST (set));
13205 last = first + XVECLEN (pat, 0) - 1;
13206 offset = const0_rtx;
13207 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
13208 off = INTVAL (offset);
13209
13210 if (GET_CODE (base) != REG || off < 0)
13211 continue;
13212
13213 if (cfun_frame_layout.first_restore_gpr != -1
13214 && (cfun_frame_layout.first_restore_gpr < first
13215 || cfun_frame_layout.last_restore_gpr > last))
13216 continue;
13217 if (REGNO (base) != STACK_POINTER_REGNUM
13218 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13219 continue;
13220 if (first > BASE_REGNUM || last < BASE_REGNUM)
13221 continue;
13222
13223 if (cfun_frame_layout.first_restore_gpr != -1)
13224 {
13225 rtx rpat = restore_gprs (base,
13226 off + (cfun_frame_layout.first_restore_gpr
13227 - first) * UNITS_PER_LONG,
13228 cfun_frame_layout.first_restore_gpr,
13229 cfun_frame_layout.last_restore_gpr);
13230
13231 /* Remove REG_CFA_RESTOREs for registers that we no
13232 longer need to save. */
13233 REG_NOTES (rpat) = REG_NOTES (insn);
13234 for (rtx *ptr = &REG_NOTES (rpat); *ptr; )
13235 if (REG_NOTE_KIND (*ptr) == REG_CFA_RESTORE
13236 && ((int) REGNO (XEXP (*ptr, 0))
13237 < cfun_frame_layout.first_restore_gpr))
13238 *ptr = XEXP (*ptr, 1);
13239 else
13240 ptr = &XEXP (*ptr, 1);
13241 new_insn = emit_insn_before (rpat, insn);
13242 RTX_FRAME_RELATED_P (new_insn) = 1;
13243 INSN_ADDRESSES_NEW (new_insn, -1);
13244 }
13245
13246 remove_insn (insn);
13247 continue;
13248 }
13249
13250 if (cfun_frame_layout.first_restore_gpr == -1
13251 && GET_CODE (pat) == SET
13252 && GENERAL_REG_P (SET_DEST (pat))
13253 && GET_CODE (SET_SRC (pat)) == MEM)
13254 {
13255 set = pat;
13256 first = REGNO (SET_DEST (set));
13257 offset = const0_rtx;
13258 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
13259 off = INTVAL (offset);
13260
13261 if (GET_CODE (base) != REG || off < 0)
13262 continue;
13263
13264 if (REGNO (base) != STACK_POINTER_REGNUM
13265 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13266 continue;
13267
13268 remove_insn (insn);
13269 continue;
13270 }
13271 }
13272 }
13273
13274 /* On z10 and later the dynamic branch prediction must see the
13275 backward jump within a certain windows. If not it falls back to
13276 the static prediction. This function rearranges the loop backward
13277 branch in a way which makes the static prediction always correct.
13278 The function returns true if it added an instruction. */
13279 static bool
13280 s390_fix_long_loop_prediction (rtx_insn *insn)
13281 {
13282 rtx set = single_set (insn);
13283 rtx code_label, label_ref, new_label;
13284 rtx_insn *uncond_jump;
13285 rtx_insn *cur_insn;
13286 rtx tmp;
13287 int distance;
13288
13289 /* This will exclude branch on count and branch on index patterns
13290 since these are correctly statically predicted. */
13291 if (!set
13292 || SET_DEST (set) != pc_rtx
13293 || GET_CODE (SET_SRC(set)) != IF_THEN_ELSE)
13294 return false;
13295
13296 /* Skip conditional returns. */
13297 if (ANY_RETURN_P (XEXP (SET_SRC (set), 1))
13298 && XEXP (SET_SRC (set), 2) == pc_rtx)
13299 return false;
13300
13301 label_ref = (GET_CODE (XEXP (SET_SRC (set), 1)) == LABEL_REF ?
13302 XEXP (SET_SRC (set), 1) : XEXP (SET_SRC (set), 2));
13303
13304 gcc_assert (GET_CODE (label_ref) == LABEL_REF);
13305
13306 code_label = XEXP (label_ref, 0);
13307
13308 if (INSN_ADDRESSES (INSN_UID (code_label)) == -1
13309 || INSN_ADDRESSES (INSN_UID (insn)) == -1
13310 || (INSN_ADDRESSES (INSN_UID (insn))
13311 - INSN_ADDRESSES (INSN_UID (code_label)) < PREDICT_DISTANCE))
13312 return false;
13313
13314 for (distance = 0, cur_insn = PREV_INSN (insn);
13315 distance < PREDICT_DISTANCE - 6;
13316 distance += get_attr_length (cur_insn), cur_insn = PREV_INSN (cur_insn))
13317 if (!cur_insn || JUMP_P (cur_insn) || LABEL_P (cur_insn))
13318 return false;
13319
13320 new_label = gen_label_rtx ();
13321 uncond_jump = emit_jump_insn_after (
13322 gen_rtx_SET (pc_rtx,
13323 gen_rtx_LABEL_REF (VOIDmode, code_label)),
13324 insn);
13325 emit_label_after (new_label, uncond_jump);
13326
13327 tmp = XEXP (SET_SRC (set), 1);
13328 XEXP (SET_SRC (set), 1) = XEXP (SET_SRC (set), 2);
13329 XEXP (SET_SRC (set), 2) = tmp;
13330 INSN_CODE (insn) = -1;
13331
13332 XEXP (label_ref, 0) = new_label;
13333 JUMP_LABEL (insn) = new_label;
13334 JUMP_LABEL (uncond_jump) = code_label;
13335
13336 return true;
13337 }
13338
13339 /* Returns 1 if INSN reads the value of REG for purposes not related
13340 to addressing of memory, and 0 otherwise. */
13341 static int
13342 s390_non_addr_reg_read_p (rtx reg, rtx_insn *insn)
13343 {
13344 return reg_referenced_p (reg, PATTERN (insn))
13345 && !reg_used_in_mem_p (REGNO (reg), PATTERN (insn));
13346 }
13347
13348 /* Starting from INSN find_cond_jump looks downwards in the insn
13349 stream for a single jump insn which is the last user of the
13350 condition code set in INSN. */
13351 static rtx_insn *
13352 find_cond_jump (rtx_insn *insn)
13353 {
13354 for (; insn; insn = NEXT_INSN (insn))
13355 {
13356 rtx ite, cc;
13357
13358 if (LABEL_P (insn))
13359 break;
13360
13361 if (!JUMP_P (insn))
13362 {
13363 if (reg_mentioned_p (gen_rtx_REG (CCmode, CC_REGNUM), insn))
13364 break;
13365 continue;
13366 }
13367
13368 /* This will be triggered by a return. */
13369 if (GET_CODE (PATTERN (insn)) != SET)
13370 break;
13371
13372 gcc_assert (SET_DEST (PATTERN (insn)) == pc_rtx);
13373 ite = SET_SRC (PATTERN (insn));
13374
13375 if (GET_CODE (ite) != IF_THEN_ELSE)
13376 break;
13377
13378 cc = XEXP (XEXP (ite, 0), 0);
13379 if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc)))
13380 break;
13381
13382 if (find_reg_note (insn, REG_DEAD, cc))
13383 return insn;
13384 break;
13385 }
13386
13387 return NULL;
13388 }
13389
13390 /* Swap the condition in COND and the operands in OP0 and OP1 so that
13391 the semantics does not change. If NULL_RTX is passed as COND the
13392 function tries to find the conditional jump starting with INSN. */
13393 static void
13394 s390_swap_cmp (rtx cond, rtx *op0, rtx *op1, rtx_insn *insn)
13395 {
13396 rtx tmp = *op0;
13397
13398 if (cond == NULL_RTX)
13399 {
13400 rtx_insn *jump = find_cond_jump (NEXT_INSN (insn));
13401 rtx set = jump ? single_set (jump) : NULL_RTX;
13402
13403 if (set == NULL_RTX)
13404 return;
13405
13406 cond = XEXP (SET_SRC (set), 0);
13407 }
13408
13409 *op0 = *op1;
13410 *op1 = tmp;
13411 PUT_CODE (cond, swap_condition (GET_CODE (cond)));
13412 }
13413
13414 /* On z10, instructions of the compare-and-branch family have the
13415 property to access the register occurring as second operand with
13416 its bits complemented. If such a compare is grouped with a second
13417 instruction that accesses the same register non-complemented, and
13418 if that register's value is delivered via a bypass, then the
13419 pipeline recycles, thereby causing significant performance decline.
13420 This function locates such situations and exchanges the two
13421 operands of the compare. The function return true whenever it
13422 added an insn. */
13423 static bool
13424 s390_z10_optimize_cmp (rtx_insn *insn)
13425 {
13426 rtx_insn *prev_insn, *next_insn;
13427 bool insn_added_p = false;
13428 rtx cond, *op0, *op1;
13429
13430 if (GET_CODE (PATTERN (insn)) == PARALLEL)
13431 {
13432 /* Handle compare and branch and branch on count
13433 instructions. */
13434 rtx pattern = single_set (insn);
13435
13436 if (!pattern
13437 || SET_DEST (pattern) != pc_rtx
13438 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE)
13439 return false;
13440
13441 cond = XEXP (SET_SRC (pattern), 0);
13442 op0 = &XEXP (cond, 0);
13443 op1 = &XEXP (cond, 1);
13444 }
13445 else if (GET_CODE (PATTERN (insn)) == SET)
13446 {
13447 rtx src, dest;
13448
13449 /* Handle normal compare instructions. */
13450 src = SET_SRC (PATTERN (insn));
13451 dest = SET_DEST (PATTERN (insn));
13452
13453 if (!REG_P (dest)
13454 || !CC_REGNO_P (REGNO (dest))
13455 || GET_CODE (src) != COMPARE)
13456 return false;
13457
13458 /* s390_swap_cmp will try to find the conditional
13459 jump when passing NULL_RTX as condition. */
13460 cond = NULL_RTX;
13461 op0 = &XEXP (src, 0);
13462 op1 = &XEXP (src, 1);
13463 }
13464 else
13465 return false;
13466
13467 if (!REG_P (*op0) || !REG_P (*op1))
13468 return false;
13469
13470 if (GET_MODE_CLASS (GET_MODE (*op0)) != MODE_INT)
13471 return false;
13472
13473 /* Swap the COMPARE arguments and its mask if there is a
13474 conflicting access in the previous insn. */
13475 prev_insn = prev_active_insn (insn);
13476 if (prev_insn != NULL_RTX && INSN_P (prev_insn)
13477 && reg_referenced_p (*op1, PATTERN (prev_insn)))
13478 s390_swap_cmp (cond, op0, op1, insn);
13479
13480 /* Check if there is a conflict with the next insn. If there
13481 was no conflict with the previous insn, then swap the
13482 COMPARE arguments and its mask. If we already swapped
13483 the operands, or if swapping them would cause a conflict
13484 with the previous insn, issue a NOP after the COMPARE in
13485 order to separate the two instuctions. */
13486 next_insn = next_active_insn (insn);
13487 if (next_insn != NULL_RTX && INSN_P (next_insn)
13488 && s390_non_addr_reg_read_p (*op1, next_insn))
13489 {
13490 if (prev_insn != NULL_RTX && INSN_P (prev_insn)
13491 && s390_non_addr_reg_read_p (*op0, prev_insn))
13492 {
13493 if (REGNO (*op1) == 0)
13494 emit_insn_after (gen_nop1 (), insn);
13495 else
13496 emit_insn_after (gen_nop (), insn);
13497 insn_added_p = true;
13498 }
13499 else
13500 s390_swap_cmp (cond, op0, op1, insn);
13501 }
13502 return insn_added_p;
13503 }
13504
13505 /* Perform machine-dependent processing. */
13506
13507 static void
13508 s390_reorg (void)
13509 {
13510 bool pool_overflow = false;
13511 int hw_before, hw_after;
13512
13513 /* Make sure all splits have been performed; splits after
13514 machine_dependent_reorg might confuse insn length counts. */
13515 split_all_insns_noflow ();
13516
13517 /* Install the main literal pool and the associated base
13518 register load insns.
13519
13520 In addition, there are two problematic situations we need
13521 to correct:
13522
13523 - the literal pool might be > 4096 bytes in size, so that
13524 some of its elements cannot be directly accessed
13525
13526 - a branch target might be > 64K away from the branch, so that
13527 it is not possible to use a PC-relative instruction.
13528
13529 To fix those, we split the single literal pool into multiple
13530 pool chunks, reloading the pool base register at various
13531 points throughout the function to ensure it always points to
13532 the pool chunk the following code expects, and / or replace
13533 PC-relative branches by absolute branches.
13534
13535 However, the two problems are interdependent: splitting the
13536 literal pool can move a branch further away from its target,
13537 causing the 64K limit to overflow, and on the other hand,
13538 replacing a PC-relative branch by an absolute branch means
13539 we need to put the branch target address into the literal
13540 pool, possibly causing it to overflow.
13541
13542 So, we loop trying to fix up both problems until we manage
13543 to satisfy both conditions at the same time. Note that the
13544 loop is guaranteed to terminate as every pass of the loop
13545 strictly decreases the total number of PC-relative branches
13546 in the function. (This is not completely true as there
13547 might be branch-over-pool insns introduced by chunkify_start.
13548 Those never need to be split however.) */
13549
13550 for (;;)
13551 {
13552 struct constant_pool *pool = NULL;
13553
13554 /* Collect the literal pool. */
13555 if (!pool_overflow)
13556 {
13557 pool = s390_mainpool_start ();
13558 if (!pool)
13559 pool_overflow = true;
13560 }
13561
13562 /* If literal pool overflowed, start to chunkify it. */
13563 if (pool_overflow)
13564 pool = s390_chunkify_start ();
13565
13566 /* Split out-of-range branches. If this has created new
13567 literal pool entries, cancel current chunk list and
13568 recompute it. zSeries machines have large branch
13569 instructions, so we never need to split a branch. */
13570 if (!TARGET_CPU_ZARCH && s390_split_branches ())
13571 {
13572 if (pool_overflow)
13573 s390_chunkify_cancel (pool);
13574 else
13575 s390_mainpool_cancel (pool);
13576
13577 continue;
13578 }
13579
13580 /* If we made it up to here, both conditions are satisfied.
13581 Finish up literal pool related changes. */
13582 if (pool_overflow)
13583 s390_chunkify_finish (pool);
13584 else
13585 s390_mainpool_finish (pool);
13586
13587 /* We're done splitting branches. */
13588 cfun->machine->split_branches_pending_p = false;
13589 break;
13590 }
13591
13592 /* Generate out-of-pool execute target insns. */
13593 if (TARGET_CPU_ZARCH)
13594 {
13595 rtx_insn *insn, *target;
13596 rtx label;
13597
13598 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
13599 {
13600 label = s390_execute_label (insn);
13601 if (!label)
13602 continue;
13603
13604 gcc_assert (label != const0_rtx);
13605
13606 target = emit_label (XEXP (label, 0));
13607 INSN_ADDRESSES_NEW (target, -1);
13608
13609 target = emit_insn (s390_execute_target (insn));
13610 INSN_ADDRESSES_NEW (target, -1);
13611 }
13612 }
13613
13614 /* Try to optimize prologue and epilogue further. */
13615 s390_optimize_prologue ();
13616
13617 /* Walk over the insns and do some >=z10 specific changes. */
13618 if (s390_tune >= PROCESSOR_2097_Z10)
13619 {
13620 rtx_insn *insn;
13621 bool insn_added_p = false;
13622
13623 /* The insn lengths and addresses have to be up to date for the
13624 following manipulations. */
13625 shorten_branches (get_insns ());
13626
13627 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
13628 {
13629 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
13630 continue;
13631
13632 if (JUMP_P (insn))
13633 insn_added_p |= s390_fix_long_loop_prediction (insn);
13634
13635 if ((GET_CODE (PATTERN (insn)) == PARALLEL
13636 || GET_CODE (PATTERN (insn)) == SET)
13637 && s390_tune == PROCESSOR_2097_Z10)
13638 insn_added_p |= s390_z10_optimize_cmp (insn);
13639 }
13640
13641 /* Adjust branches if we added new instructions. */
13642 if (insn_added_p)
13643 shorten_branches (get_insns ());
13644 }
13645
13646 s390_function_num_hotpatch_hw (current_function_decl, &hw_before, &hw_after);
13647 if (hw_after > 0)
13648 {
13649 rtx_insn *insn;
13650
13651 /* Insert NOPs for hotpatching. */
13652 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
13653 /* Emit NOPs
13654 1. inside the area covered by debug information to allow setting
13655 breakpoints at the NOPs,
13656 2. before any insn which results in an asm instruction,
13657 3. before in-function labels to avoid jumping to the NOPs, for
13658 example as part of a loop,
13659 4. before any barrier in case the function is completely empty
13660 (__builtin_unreachable ()) and has neither internal labels nor
13661 active insns.
13662 */
13663 if (active_insn_p (insn) || BARRIER_P (insn) || LABEL_P (insn))
13664 break;
13665 /* Output a series of NOPs before the first active insn. */
13666 while (insn && hw_after > 0)
13667 {
13668 if (hw_after >= 3 && TARGET_CPU_ZARCH)
13669 {
13670 emit_insn_before (gen_nop_6_byte (), insn);
13671 hw_after -= 3;
13672 }
13673 else if (hw_after >= 2)
13674 {
13675 emit_insn_before (gen_nop_4_byte (), insn);
13676 hw_after -= 2;
13677 }
13678 else
13679 {
13680 emit_insn_before (gen_nop_2_byte (), insn);
13681 hw_after -= 1;
13682 }
13683 }
13684 }
13685 }
13686
13687 /* Return true if INSN is a fp load insn writing register REGNO. */
13688 static inline bool
13689 s390_fpload_toreg (rtx_insn *insn, unsigned int regno)
13690 {
13691 rtx set;
13692 enum attr_type flag = s390_safe_attr_type (insn);
13693
13694 if (flag != TYPE_FLOADSF && flag != TYPE_FLOADDF)
13695 return false;
13696
13697 set = single_set (insn);
13698
13699 if (set == NULL_RTX)
13700 return false;
13701
13702 if (!REG_P (SET_DEST (set)) || !MEM_P (SET_SRC (set)))
13703 return false;
13704
13705 if (REGNO (SET_DEST (set)) != regno)
13706 return false;
13707
13708 return true;
13709 }
13710
13711 /* This value describes the distance to be avoided between an
13712 aritmetic fp instruction and an fp load writing the same register.
13713 Z10_EARLYLOAD_DISTANCE - 1 as well as Z10_EARLYLOAD_DISTANCE + 1 is
13714 fine but the exact value has to be avoided. Otherwise the FP
13715 pipeline will throw an exception causing a major penalty. */
13716 #define Z10_EARLYLOAD_DISTANCE 7
13717
13718 /* Rearrange the ready list in order to avoid the situation described
13719 for Z10_EARLYLOAD_DISTANCE. A problematic load instruction is
13720 moved to the very end of the ready list. */
13721 static void
13722 s390_z10_prevent_earlyload_conflicts (rtx_insn **ready, int *nready_p)
13723 {
13724 unsigned int regno;
13725 int nready = *nready_p;
13726 rtx_insn *tmp;
13727 int i;
13728 rtx_insn *insn;
13729 rtx set;
13730 enum attr_type flag;
13731 int distance;
13732
13733 /* Skip DISTANCE - 1 active insns. */
13734 for (insn = last_scheduled_insn, distance = Z10_EARLYLOAD_DISTANCE - 1;
13735 distance > 0 && insn != NULL_RTX;
13736 distance--, insn = prev_active_insn (insn))
13737 if (CALL_P (insn) || JUMP_P (insn))
13738 return;
13739
13740 if (insn == NULL_RTX)
13741 return;
13742
13743 set = single_set (insn);
13744
13745 if (set == NULL_RTX || !REG_P (SET_DEST (set))
13746 || GET_MODE_CLASS (GET_MODE (SET_DEST (set))) != MODE_FLOAT)
13747 return;
13748
13749 flag = s390_safe_attr_type (insn);
13750
13751 if (flag == TYPE_FLOADSF || flag == TYPE_FLOADDF)
13752 return;
13753
13754 regno = REGNO (SET_DEST (set));
13755 i = nready - 1;
13756
13757 while (!s390_fpload_toreg (ready[i], regno) && i > 0)
13758 i--;
13759
13760 if (!i)
13761 return;
13762
13763 tmp = ready[i];
13764 memmove (&ready[1], &ready[0], sizeof (rtx_insn *) * i);
13765 ready[0] = tmp;
13766 }
13767
13768
13769 /* The s390_sched_state variable tracks the state of the current or
13770 the last instruction group.
13771
13772 0,1,2 number of instructions scheduled in the current group
13773 3 the last group is complete - normal insns
13774 4 the last group was a cracked/expanded insn */
13775
13776 static int s390_sched_state;
13777
13778 #define S390_SCHED_STATE_NORMAL 3
13779 #define S390_SCHED_STATE_CRACKED 4
13780
13781 #define S390_SCHED_ATTR_MASK_CRACKED 0x1
13782 #define S390_SCHED_ATTR_MASK_EXPANDED 0x2
13783 #define S390_SCHED_ATTR_MASK_ENDGROUP 0x4
13784 #define S390_SCHED_ATTR_MASK_GROUPALONE 0x8
13785
13786 static unsigned int
13787 s390_get_sched_attrmask (rtx_insn *insn)
13788 {
13789 unsigned int mask = 0;
13790
13791 switch (s390_tune)
13792 {
13793 case PROCESSOR_2827_ZEC12:
13794 if (get_attr_zEC12_cracked (insn))
13795 mask |= S390_SCHED_ATTR_MASK_CRACKED;
13796 if (get_attr_zEC12_expanded (insn))
13797 mask |= S390_SCHED_ATTR_MASK_EXPANDED;
13798 if (get_attr_zEC12_endgroup (insn))
13799 mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
13800 if (get_attr_zEC12_groupalone (insn))
13801 mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
13802 break;
13803 case PROCESSOR_2964_Z13:
13804 if (get_attr_z13_cracked (insn))
13805 mask |= S390_SCHED_ATTR_MASK_CRACKED;
13806 if (get_attr_z13_expanded (insn))
13807 mask |= S390_SCHED_ATTR_MASK_EXPANDED;
13808 if (get_attr_z13_endgroup (insn))
13809 mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
13810 if (get_attr_z13_groupalone (insn))
13811 mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
13812 break;
13813 default:
13814 gcc_unreachable ();
13815 }
13816 return mask;
13817 }
13818
13819 static unsigned int
13820 s390_get_unit_mask (rtx_insn *insn, int *units)
13821 {
13822 unsigned int mask = 0;
13823
13824 switch (s390_tune)
13825 {
13826 case PROCESSOR_2964_Z13:
13827 *units = 3;
13828 if (get_attr_z13_unit_lsu (insn))
13829 mask |= 1 << 0;
13830 if (get_attr_z13_unit_fxu (insn))
13831 mask |= 1 << 1;
13832 if (get_attr_z13_unit_vfu (insn))
13833 mask |= 1 << 2;
13834 break;
13835 default:
13836 gcc_unreachable ();
13837 }
13838 return mask;
13839 }
13840
13841 /* Return the scheduling score for INSN. The higher the score the
13842 better. The score is calculated from the OOO scheduling attributes
13843 of INSN and the scheduling state s390_sched_state. */
13844 static int
13845 s390_sched_score (rtx_insn *insn)
13846 {
13847 unsigned int mask = s390_get_sched_attrmask (insn);
13848 int score = 0;
13849
13850 switch (s390_sched_state)
13851 {
13852 case 0:
13853 /* Try to put insns into the first slot which would otherwise
13854 break a group. */
13855 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
13856 || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0)
13857 score += 5;
13858 if ((mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
13859 score += 10;
13860 /* fallthrough */
13861 case 1:
13862 /* Prefer not cracked insns while trying to put together a
13863 group. */
13864 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
13865 && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0
13866 && (mask & S390_SCHED_ATTR_MASK_GROUPALONE) == 0)
13867 score += 10;
13868 if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) == 0)
13869 score += 5;
13870 break;
13871 case 2:
13872 /* Prefer not cracked insns while trying to put together a
13873 group. */
13874 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
13875 && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0
13876 && (mask & S390_SCHED_ATTR_MASK_GROUPALONE) == 0)
13877 score += 10;
13878 /* Prefer endgroup insns in the last slot. */
13879 if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0)
13880 score += 10;
13881 break;
13882 case S390_SCHED_STATE_NORMAL:
13883 /* Prefer not cracked insns if the last was not cracked. */
13884 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
13885 && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0)
13886 score += 5;
13887 if ((mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
13888 score += 10;
13889 break;
13890 case S390_SCHED_STATE_CRACKED:
13891 /* Try to keep cracked insns together to prevent them from
13892 interrupting groups. */
13893 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
13894 || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0)
13895 score += 5;
13896 break;
13897 }
13898
13899 if (s390_tune == PROCESSOR_2964_Z13)
13900 {
13901 int units, i;
13902 unsigned unit_mask, m = 1;
13903
13904 unit_mask = s390_get_unit_mask (insn, &units);
13905 gcc_assert (units <= MAX_SCHED_UNITS);
13906
13907 /* Add a score in range 0..MAX_SCHED_MIX_SCORE depending on how long
13908 ago the last insn of this unit type got scheduled. This is
13909 supposed to help providing a proper instruction mix to the
13910 CPU. */
13911 for (i = 0; i < units; i++, m <<= 1)
13912 if (m & unit_mask)
13913 score += (last_scheduled_unit_distance[i] * MAX_SCHED_MIX_SCORE /
13914 MAX_SCHED_MIX_DISTANCE);
13915 }
13916 return score;
13917 }
13918
13919 /* This function is called via hook TARGET_SCHED_REORDER before
13920 issuing one insn from list READY which contains *NREADYP entries.
13921 For target z10 it reorders load instructions to avoid early load
13922 conflicts in the floating point pipeline */
13923 static int
13924 s390_sched_reorder (FILE *file, int verbose,
13925 rtx_insn **ready, int *nreadyp, int clock ATTRIBUTE_UNUSED)
13926 {
13927 if (s390_tune == PROCESSOR_2097_Z10
13928 && reload_completed
13929 && *nreadyp > 1)
13930 s390_z10_prevent_earlyload_conflicts (ready, nreadyp);
13931
13932 if (s390_tune >= PROCESSOR_2827_ZEC12
13933 && reload_completed
13934 && *nreadyp > 1)
13935 {
13936 int i;
13937 int last_index = *nreadyp - 1;
13938 int max_index = -1;
13939 int max_score = -1;
13940 rtx_insn *tmp;
13941
13942 /* Just move the insn with the highest score to the top (the
13943 end) of the list. A full sort is not needed since a conflict
13944 in the hazard recognition cannot happen. So the top insn in
13945 the ready list will always be taken. */
13946 for (i = last_index; i >= 0; i--)
13947 {
13948 int score;
13949
13950 if (recog_memoized (ready[i]) < 0)
13951 continue;
13952
13953 score = s390_sched_score (ready[i]);
13954 if (score > max_score)
13955 {
13956 max_score = score;
13957 max_index = i;
13958 }
13959 }
13960
13961 if (max_index != -1)
13962 {
13963 if (max_index != last_index)
13964 {
13965 tmp = ready[max_index];
13966 ready[max_index] = ready[last_index];
13967 ready[last_index] = tmp;
13968
13969 if (verbose > 5)
13970 fprintf (file,
13971 ";;\t\tBACKEND: move insn %d to the top of list\n",
13972 INSN_UID (ready[last_index]));
13973 }
13974 else if (verbose > 5)
13975 fprintf (file,
13976 ";;\t\tBACKEND: best insn %d already on top\n",
13977 INSN_UID (ready[last_index]));
13978 }
13979
13980 if (verbose > 5)
13981 {
13982 fprintf (file, "ready list ooo attributes - sched state: %d\n",
13983 s390_sched_state);
13984
13985 for (i = last_index; i >= 0; i--)
13986 {
13987 unsigned int sched_mask;
13988 rtx_insn *insn = ready[i];
13989
13990 if (recog_memoized (insn) < 0)
13991 continue;
13992
13993 sched_mask = s390_get_sched_attrmask (insn);
13994 fprintf (file, ";;\t\tBACKEND: insn %d score: %d: ",
13995 INSN_UID (insn),
13996 s390_sched_score (insn));
13997 #define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ",\
13998 ((M) & sched_mask) ? #ATTR : "");
13999 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED, cracked);
14000 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED, expanded);
14001 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP, endgroup);
14002 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE, groupalone);
14003 #undef PRINT_SCHED_ATTR
14004 if (s390_tune == PROCESSOR_2964_Z13)
14005 {
14006 unsigned int unit_mask, m = 1;
14007 int units, j;
14008
14009 unit_mask = s390_get_unit_mask (insn, &units);
14010 fprintf (file, "(units:");
14011 for (j = 0; j < units; j++, m <<= 1)
14012 if (m & unit_mask)
14013 fprintf (file, " u%d", j);
14014 fprintf (file, ")");
14015 }
14016 fprintf (file, "\n");
14017 }
14018 }
14019 }
14020
14021 return s390_issue_rate ();
14022 }
14023
14024
14025 /* This function is called via hook TARGET_SCHED_VARIABLE_ISSUE after
14026 the scheduler has issued INSN. It stores the last issued insn into
14027 last_scheduled_insn in order to make it available for
14028 s390_sched_reorder. */
14029 static int
14030 s390_sched_variable_issue (FILE *file, int verbose, rtx_insn *insn, int more)
14031 {
14032 last_scheduled_insn = insn;
14033
14034 if (s390_tune >= PROCESSOR_2827_ZEC12
14035 && reload_completed
14036 && recog_memoized (insn) >= 0)
14037 {
14038 unsigned int mask = s390_get_sched_attrmask (insn);
14039
14040 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
14041 || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0)
14042 s390_sched_state = S390_SCHED_STATE_CRACKED;
14043 else if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0
14044 || (mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
14045 s390_sched_state = S390_SCHED_STATE_NORMAL;
14046 else
14047 {
14048 /* Only normal insns are left (mask == 0). */
14049 switch (s390_sched_state)
14050 {
14051 case 0:
14052 case 1:
14053 case 2:
14054 case S390_SCHED_STATE_NORMAL:
14055 if (s390_sched_state == S390_SCHED_STATE_NORMAL)
14056 s390_sched_state = 1;
14057 else
14058 s390_sched_state++;
14059
14060 break;
14061 case S390_SCHED_STATE_CRACKED:
14062 s390_sched_state = S390_SCHED_STATE_NORMAL;
14063 break;
14064 }
14065 }
14066
14067 if (s390_tune == PROCESSOR_2964_Z13)
14068 {
14069 int units, i;
14070 unsigned unit_mask, m = 1;
14071
14072 unit_mask = s390_get_unit_mask (insn, &units);
14073 gcc_assert (units <= MAX_SCHED_UNITS);
14074
14075 for (i = 0; i < units; i++, m <<= 1)
14076 if (m & unit_mask)
14077 last_scheduled_unit_distance[i] = 0;
14078 else if (last_scheduled_unit_distance[i] < MAX_SCHED_MIX_DISTANCE)
14079 last_scheduled_unit_distance[i]++;
14080 }
14081
14082 if (verbose > 5)
14083 {
14084 unsigned int sched_mask;
14085
14086 sched_mask = s390_get_sched_attrmask (insn);
14087
14088 fprintf (file, ";;\t\tBACKEND: insn %d: ", INSN_UID (insn));
14089 #define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ", ((M) & sched_mask) ? #ATTR : "");
14090 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED, cracked);
14091 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED, expanded);
14092 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP, endgroup);
14093 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE, groupalone);
14094 #undef PRINT_SCHED_ATTR
14095
14096 if (s390_tune == PROCESSOR_2964_Z13)
14097 {
14098 unsigned int unit_mask, m = 1;
14099 int units, j;
14100
14101 unit_mask = s390_get_unit_mask (insn, &units);
14102 fprintf (file, "(units:");
14103 for (j = 0; j < units; j++, m <<= 1)
14104 if (m & unit_mask)
14105 fprintf (file, " %d", j);
14106 fprintf (file, ")");
14107 }
14108 fprintf (file, " sched state: %d\n", s390_sched_state);
14109
14110 if (s390_tune == PROCESSOR_2964_Z13)
14111 {
14112 int units, j;
14113
14114 s390_get_unit_mask (insn, &units);
14115
14116 fprintf (file, ";;\t\tBACKEND: units unused for: ");
14117 for (j = 0; j < units; j++)
14118 fprintf (file, "%d:%d ", j, last_scheduled_unit_distance[j]);
14119 fprintf (file, "\n");
14120 }
14121 }
14122 }
14123
14124 if (GET_CODE (PATTERN (insn)) != USE
14125 && GET_CODE (PATTERN (insn)) != CLOBBER)
14126 return more - 1;
14127 else
14128 return more;
14129 }
14130
14131 static void
14132 s390_sched_init (FILE *file ATTRIBUTE_UNUSED,
14133 int verbose ATTRIBUTE_UNUSED,
14134 int max_ready ATTRIBUTE_UNUSED)
14135 {
14136 last_scheduled_insn = NULL;
14137 memset (last_scheduled_unit_distance, 0, MAX_SCHED_UNITS * sizeof (int));
14138 s390_sched_state = 0;
14139 }
14140
14141 /* This target hook implementation for TARGET_LOOP_UNROLL_ADJUST calculates
14142 a new number struct loop *loop should be unrolled if tuned for cpus with
14143 a built-in stride prefetcher.
14144 The loop is analyzed for memory accesses by calling check_dpu for
14145 each rtx of the loop. Depending on the loop_depth and the amount of
14146 memory accesses a new number <=nunroll is returned to improve the
14147 behavior of the hardware prefetch unit. */
14148 static unsigned
14149 s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
14150 {
14151 basic_block *bbs;
14152 rtx_insn *insn;
14153 unsigned i;
14154 unsigned mem_count = 0;
14155
14156 if (s390_tune < PROCESSOR_2097_Z10)
14157 return nunroll;
14158
14159 /* Count the number of memory references within the loop body. */
14160 bbs = get_loop_body (loop);
14161 subrtx_iterator::array_type array;
14162 for (i = 0; i < loop->num_nodes; i++)
14163 FOR_BB_INSNS (bbs[i], insn)
14164 if (INSN_P (insn) && INSN_CODE (insn) != -1)
14165 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
14166 if (MEM_P (*iter))
14167 mem_count += 1;
14168 free (bbs);
14169
14170 /* Prevent division by zero, and we do not need to adjust nunroll in this case. */
14171 if (mem_count == 0)
14172 return nunroll;
14173
14174 switch (loop_depth(loop))
14175 {
14176 case 1:
14177 return MIN (nunroll, 28 / mem_count);
14178 case 2:
14179 return MIN (nunroll, 22 / mem_count);
14180 default:
14181 return MIN (nunroll, 16 / mem_count);
14182 }
14183 }
14184
14185 /* Restore the current options. This is a hook function and also called
14186 internally. */
14187
14188 static void
14189 s390_function_specific_restore (struct gcc_options *opts,
14190 struct cl_target_option *ptr ATTRIBUTE_UNUSED)
14191 {
14192 opts->x_s390_cost_pointer = (long)processor_table[opts->x_s390_tune].cost;
14193 }
14194
14195 static void
14196 s390_option_override_internal (bool main_args_p,
14197 struct gcc_options *opts,
14198 const struct gcc_options *opts_set)
14199 {
14200 const char *prefix;
14201 const char *suffix;
14202
14203 /* Set up prefix/suffix so the error messages refer to either the command
14204 line argument, or the attribute(target). */
14205 if (main_args_p)
14206 {
14207 prefix = "-m";
14208 suffix = "";
14209 }
14210 else
14211 {
14212 prefix = "option(\"";
14213 suffix = "\")";
14214 }
14215
14216
14217 /* Architecture mode defaults according to ABI. */
14218 if (!(opts_set->x_target_flags & MASK_ZARCH))
14219 {
14220 if (TARGET_64BIT)
14221 opts->x_target_flags |= MASK_ZARCH;
14222 else
14223 opts->x_target_flags &= ~MASK_ZARCH;
14224 }
14225
14226 /* Set the march default in case it hasn't been specified on cmdline. */
14227 if (!opts_set->x_s390_arch)
14228 opts->x_s390_arch = PROCESSOR_2064_Z900;
14229 else if (opts->x_s390_arch == PROCESSOR_9672_G5
14230 || opts->x_s390_arch == PROCESSOR_9672_G6)
14231 warning (OPT_Wdeprecated, "%sarch=%s%s is deprecated and will be removed "
14232 "in future releases; use at least %sarch=z900%s",
14233 prefix, opts->x_s390_arch == PROCESSOR_9672_G5 ? "g5" : "g6",
14234 suffix, prefix, suffix);
14235
14236 opts->x_s390_arch_flags = processor_flags_table[(int) opts->x_s390_arch];
14237
14238 /* Determine processor to tune for. */
14239 if (!opts_set->x_s390_tune)
14240 opts->x_s390_tune = opts->x_s390_arch;
14241 else if (opts->x_s390_tune == PROCESSOR_9672_G5
14242 || opts->x_s390_tune == PROCESSOR_9672_G6)
14243 warning (OPT_Wdeprecated, "%stune=%s%s is deprecated and will be removed "
14244 "in future releases; use at least %stune=z900%s",
14245 prefix, opts->x_s390_tune == PROCESSOR_9672_G5 ? "g5" : "g6",
14246 suffix, prefix, suffix);
14247
14248 opts->x_s390_tune_flags = processor_flags_table[opts->x_s390_tune];
14249
14250 /* Sanity checks. */
14251 if (opts->x_s390_arch == PROCESSOR_NATIVE
14252 || opts->x_s390_tune == PROCESSOR_NATIVE)
14253 gcc_unreachable ();
14254 if (TARGET_ZARCH_P (opts->x_target_flags) && !TARGET_CPU_ZARCH_P (opts))
14255 error ("z/Architecture mode not supported on %s",
14256 processor_table[(int)opts->x_s390_arch].name);
14257 if (TARGET_64BIT && !TARGET_ZARCH_P (opts->x_target_flags))
14258 error ("64-bit ABI not supported in ESA/390 mode");
14259
14260 /* Enable hardware transactions if available and not explicitly
14261 disabled by user. E.g. with -m31 -march=zEC12 -mzarch */
14262 if (!TARGET_OPT_HTM_P (opts_set->x_target_flags))
14263 {
14264 if (TARGET_CPU_HTM_P (opts) && TARGET_ZARCH_P (opts->x_target_flags))
14265 opts->x_target_flags |= MASK_OPT_HTM;
14266 else
14267 opts->x_target_flags &= ~MASK_OPT_HTM;
14268 }
14269
14270 if (TARGET_OPT_VX_P (opts_set->x_target_flags))
14271 {
14272 if (TARGET_OPT_VX_P (opts->x_target_flags))
14273 {
14274 if (!TARGET_CPU_VX_P (opts))
14275 error ("hardware vector support not available on %s",
14276 processor_table[(int)opts->x_s390_arch].name);
14277 if (TARGET_SOFT_FLOAT_P (opts->x_target_flags))
14278 error ("hardware vector support not available with -msoft-float");
14279 }
14280 }
14281 else
14282 {
14283 if (TARGET_CPU_VX_P (opts))
14284 /* Enable vector support if available and not explicitly disabled
14285 by user. E.g. with -m31 -march=z13 -mzarch */
14286 opts->x_target_flags |= MASK_OPT_VX;
14287 else
14288 opts->x_target_flags &= ~MASK_OPT_VX;
14289 }
14290
14291 /* Use hardware DFP if available and not explicitly disabled by
14292 user. E.g. with -m31 -march=z10 -mzarch */
14293 if (!TARGET_HARD_DFP_P (opts_set->x_target_flags))
14294 {
14295 if (TARGET_DFP_P (opts))
14296 opts->x_target_flags |= MASK_HARD_DFP;
14297 else
14298 opts->x_target_flags &= ~MASK_HARD_DFP;
14299 }
14300
14301 if (TARGET_HARD_DFP_P (opts->x_target_flags) && !TARGET_DFP_P (opts))
14302 {
14303 if (TARGET_HARD_DFP_P (opts_set->x_target_flags))
14304 {
14305 if (!TARGET_CPU_DFP_P (opts))
14306 error ("hardware decimal floating point instructions"
14307 " not available on %s",
14308 processor_table[(int)opts->x_s390_arch].name);
14309 if (!TARGET_ZARCH_P (opts->x_target_flags))
14310 error ("hardware decimal floating point instructions"
14311 " not available in ESA/390 mode");
14312 }
14313 else
14314 opts->x_target_flags &= ~MASK_HARD_DFP;
14315 }
14316
14317 if (TARGET_SOFT_FLOAT_P (opts_set->x_target_flags)
14318 && TARGET_SOFT_FLOAT_P (opts->x_target_flags))
14319 {
14320 if (TARGET_HARD_DFP_P (opts_set->x_target_flags)
14321 && TARGET_HARD_DFP_P (opts->x_target_flags))
14322 error ("-mhard-dfp can%'t be used in conjunction with -msoft-float");
14323
14324 opts->x_target_flags &= ~MASK_HARD_DFP;
14325 }
14326
14327 if (TARGET_BACKCHAIN_P (opts->x_target_flags)
14328 && TARGET_PACKED_STACK_P (opts->x_target_flags)
14329 && TARGET_HARD_FLOAT_P (opts->x_target_flags))
14330 error ("-mbackchain -mpacked-stack -mhard-float are not supported "
14331 "in combination");
14332
14333 if (opts->x_s390_stack_size)
14334 {
14335 if (opts->x_s390_stack_guard >= opts->x_s390_stack_size)
14336 error ("stack size must be greater than the stack guard value");
14337 else if (opts->x_s390_stack_size > 1 << 16)
14338 error ("stack size must not be greater than 64k");
14339 }
14340 else if (opts->x_s390_stack_guard)
14341 error ("-mstack-guard implies use of -mstack-size");
14342
14343 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
14344 if (!TARGET_LONG_DOUBLE_128_P (opts_set->x_target_flags))
14345 opts->x_target_flags |= MASK_LONG_DOUBLE_128;
14346 #endif
14347
14348 if (opts->x_s390_tune >= PROCESSOR_2097_Z10)
14349 {
14350 maybe_set_param_value (PARAM_MAX_UNROLLED_INSNS, 100,
14351 opts->x_param_values,
14352 opts_set->x_param_values);
14353 maybe_set_param_value (PARAM_MAX_UNROLL_TIMES, 32,
14354 opts->x_param_values,
14355 opts_set->x_param_values);
14356 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 2000,
14357 opts->x_param_values,
14358 opts_set->x_param_values);
14359 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 64,
14360 opts->x_param_values,
14361 opts_set->x_param_values);
14362 }
14363
14364 maybe_set_param_value (PARAM_MAX_PENDING_LIST_LENGTH, 256,
14365 opts->x_param_values,
14366 opts_set->x_param_values);
14367 /* values for loop prefetching */
14368 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, 256,
14369 opts->x_param_values,
14370 opts_set->x_param_values);
14371 maybe_set_param_value (PARAM_L1_CACHE_SIZE, 128,
14372 opts->x_param_values,
14373 opts_set->x_param_values);
14374 /* s390 has more than 2 levels and the size is much larger. Since
14375 we are always running virtualized assume that we only get a small
14376 part of the caches above l1. */
14377 maybe_set_param_value (PARAM_L2_CACHE_SIZE, 1500,
14378 opts->x_param_values,
14379 opts_set->x_param_values);
14380 maybe_set_param_value (PARAM_PREFETCH_MIN_INSN_TO_MEM_RATIO, 2,
14381 opts->x_param_values,
14382 opts_set->x_param_values);
14383 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES, 6,
14384 opts->x_param_values,
14385 opts_set->x_param_values);
14386
14387 /* Use the alternative scheduling-pressure algorithm by default. */
14388 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
14389 opts->x_param_values,
14390 opts_set->x_param_values);
14391
14392 /* Call target specific restore function to do post-init work. At the moment,
14393 this just sets opts->x_s390_cost_pointer. */
14394 s390_function_specific_restore (opts, NULL);
14395 }
14396
14397 static void
14398 s390_option_override (void)
14399 {
14400 unsigned int i;
14401 cl_deferred_option *opt;
14402 vec<cl_deferred_option> *v =
14403 (vec<cl_deferred_option> *) s390_deferred_options;
14404
14405 if (v)
14406 FOR_EACH_VEC_ELT (*v, i, opt)
14407 {
14408 switch (opt->opt_index)
14409 {
14410 case OPT_mhotpatch_:
14411 {
14412 int val1;
14413 int val2;
14414 char s[256];
14415 char *t;
14416
14417 strncpy (s, opt->arg, 256);
14418 s[255] = 0;
14419 t = strchr (s, ',');
14420 if (t != NULL)
14421 {
14422 *t = 0;
14423 t++;
14424 val1 = integral_argument (s);
14425 val2 = integral_argument (t);
14426 }
14427 else
14428 {
14429 val1 = -1;
14430 val2 = -1;
14431 }
14432 if (val1 == -1 || val2 == -1)
14433 {
14434 /* argument is not a plain number */
14435 error ("arguments to %qs should be non-negative integers",
14436 "-mhotpatch=n,m");
14437 break;
14438 }
14439 else if (val1 > s390_hotpatch_hw_max
14440 || val2 > s390_hotpatch_hw_max)
14441 {
14442 error ("argument to %qs is too large (max. %d)",
14443 "-mhotpatch=n,m", s390_hotpatch_hw_max);
14444 break;
14445 }
14446 s390_hotpatch_hw_before_label = val1;
14447 s390_hotpatch_hw_after_label = val2;
14448 break;
14449 }
14450 default:
14451 gcc_unreachable ();
14452 }
14453 }
14454
14455 /* Set up function hooks. */
14456 init_machine_status = s390_init_machine_status;
14457
14458 s390_option_override_internal (true, &global_options, &global_options_set);
14459
14460 /* Save the initial options in case the user does function specific
14461 options. */
14462 target_option_default_node = build_target_option_node (&global_options);
14463 target_option_current_node = target_option_default_node;
14464
14465 /* This cannot reside in s390_option_optimization_table since HAVE_prefetch
14466 requires the arch flags to be evaluated already. Since prefetching
14467 is beneficial on s390, we enable it if available. */
14468 if (flag_prefetch_loop_arrays < 0 && HAVE_prefetch && optimize >= 3)
14469 flag_prefetch_loop_arrays = 1;
14470
14471 if (TARGET_TPF)
14472 {
14473 /* Don't emit DWARF3/4 unless specifically selected. The TPF
14474 debuggers do not yet support DWARF 3/4. */
14475 if (!global_options_set.x_dwarf_strict)
14476 dwarf_strict = 1;
14477 if (!global_options_set.x_dwarf_version)
14478 dwarf_version = 2;
14479 }
14480
14481 /* Register a target-specific optimization-and-lowering pass
14482 to run immediately before prologue and epilogue generation.
14483
14484 Registering the pass must be done at start up. It's
14485 convenient to do it here. */
14486 opt_pass *new_pass = new pass_s390_early_mach (g);
14487 struct register_pass_info insert_pass_s390_early_mach =
14488 {
14489 new_pass, /* pass */
14490 "pro_and_epilogue", /* reference_pass_name */
14491 1, /* ref_pass_instance_number */
14492 PASS_POS_INSERT_BEFORE /* po_op */
14493 };
14494 register_pass (&insert_pass_s390_early_mach);
14495 }
14496
14497 #if S390_USE_TARGET_ATTRIBUTE
14498 /* Inner function to process the attribute((target(...))), take an argument and
14499 set the current options from the argument. If we have a list, recursively go
14500 over the list. */
14501
14502 static bool
14503 s390_valid_target_attribute_inner_p (tree args,
14504 struct gcc_options *opts,
14505 struct gcc_options *new_opts_set,
14506 bool force_pragma)
14507 {
14508 char *next_optstr;
14509 bool ret = true;
14510
14511 #define S390_ATTRIB(S,O,A) { S, sizeof (S)-1, O, A, 0 }
14512 #define S390_PRAGMA(S,O,A) { S, sizeof (S)-1, O, A, 1 }
14513 static const struct
14514 {
14515 const char *string;
14516 size_t len;
14517 int opt;
14518 int has_arg;
14519 int only_as_pragma;
14520 } attrs[] = {
14521 /* enum options */
14522 S390_ATTRIB ("arch=", OPT_march_, 1),
14523 S390_ATTRIB ("tune=", OPT_mtune_, 1),
14524 /* uinteger options */
14525 S390_ATTRIB ("stack-guard=", OPT_mstack_guard_, 1),
14526 S390_ATTRIB ("stack-size=", OPT_mstack_size_, 1),
14527 S390_ATTRIB ("branch-cost=", OPT_mbranch_cost_, 1),
14528 S390_ATTRIB ("warn-framesize=", OPT_mwarn_framesize_, 1),
14529 /* flag options */
14530 S390_ATTRIB ("backchain", OPT_mbackchain, 0),
14531 S390_ATTRIB ("hard-dfp", OPT_mhard_dfp, 0),
14532 S390_ATTRIB ("hard-float", OPT_mhard_float, 0),
14533 S390_ATTRIB ("htm", OPT_mhtm, 0),
14534 S390_ATTRIB ("vx", OPT_mvx, 0),
14535 S390_ATTRIB ("packed-stack", OPT_mpacked_stack, 0),
14536 S390_ATTRIB ("small-exec", OPT_msmall_exec, 0),
14537 S390_ATTRIB ("soft-float", OPT_msoft_float, 0),
14538 S390_ATTRIB ("mvcle", OPT_mmvcle, 0),
14539 S390_PRAGMA ("zvector", OPT_mzvector, 0),
14540 /* boolean options */
14541 S390_ATTRIB ("warn-dynamicstack", OPT_mwarn_dynamicstack, 0),
14542 };
14543 #undef S390_ATTRIB
14544 #undef S390_PRAGMA
14545
14546 /* If this is a list, recurse to get the options. */
14547 if (TREE_CODE (args) == TREE_LIST)
14548 {
14549 bool ret = true;
14550 int num_pragma_values;
14551 int i;
14552
14553 /* Note: attribs.c:decl_attributes prepends the values from
14554 current_target_pragma to the list of target attributes. To determine
14555 whether we're looking at a value of the attribute or the pragma we
14556 assume that the first [list_length (current_target_pragma)] values in
14557 the list are the values from the pragma. */
14558 num_pragma_values = (!force_pragma && current_target_pragma != NULL)
14559 ? list_length (current_target_pragma) : 0;
14560 for (i = 0; args; args = TREE_CHAIN (args), i++)
14561 {
14562 bool is_pragma;
14563
14564 is_pragma = (force_pragma || i < num_pragma_values);
14565 if (TREE_VALUE (args)
14566 && !s390_valid_target_attribute_inner_p (TREE_VALUE (args),
14567 opts, new_opts_set,
14568 is_pragma))
14569 {
14570 ret = false;
14571 }
14572 }
14573 return ret;
14574 }
14575
14576 else if (TREE_CODE (args) != STRING_CST)
14577 {
14578 error ("attribute %<target%> argument not a string");
14579 return false;
14580 }
14581
14582 /* Handle multiple arguments separated by commas. */
14583 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
14584
14585 while (next_optstr && *next_optstr != '\0')
14586 {
14587 char *p = next_optstr;
14588 char *orig_p = p;
14589 char *comma = strchr (next_optstr, ',');
14590 size_t len, opt_len;
14591 int opt;
14592 bool opt_set_p;
14593 char ch;
14594 unsigned i;
14595 int mask = 0;
14596 enum cl_var_type var_type;
14597 bool found;
14598
14599 if (comma)
14600 {
14601 *comma = '\0';
14602 len = comma - next_optstr;
14603 next_optstr = comma + 1;
14604 }
14605 else
14606 {
14607 len = strlen (p);
14608 next_optstr = NULL;
14609 }
14610
14611 /* Recognize no-xxx. */
14612 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
14613 {
14614 opt_set_p = false;
14615 p += 3;
14616 len -= 3;
14617 }
14618 else
14619 opt_set_p = true;
14620
14621 /* Find the option. */
14622 ch = *p;
14623 found = false;
14624 for (i = 0; i < ARRAY_SIZE (attrs); i++)
14625 {
14626 opt_len = attrs[i].len;
14627 if (ch == attrs[i].string[0]
14628 && ((attrs[i].has_arg) ? len > opt_len : len == opt_len)
14629 && memcmp (p, attrs[i].string, opt_len) == 0)
14630 {
14631 opt = attrs[i].opt;
14632 if (!opt_set_p && cl_options[opt].cl_reject_negative)
14633 continue;
14634 mask = cl_options[opt].var_value;
14635 var_type = cl_options[opt].var_type;
14636 found = true;
14637 break;
14638 }
14639 }
14640
14641 /* Process the option. */
14642 if (!found)
14643 {
14644 error ("attribute(target(\"%s\")) is unknown", orig_p);
14645 return false;
14646 }
14647 else if (attrs[i].only_as_pragma && !force_pragma)
14648 {
14649 /* Value is not allowed for the target attribute. */
14650 error ("Value %qs is not supported by attribute %<target%>",
14651 attrs[i].string);
14652 return false;
14653 }
14654
14655 else if (var_type == CLVC_BIT_SET || var_type == CLVC_BIT_CLEAR)
14656 {
14657 if (var_type == CLVC_BIT_CLEAR)
14658 opt_set_p = !opt_set_p;
14659
14660 if (opt_set_p)
14661 opts->x_target_flags |= mask;
14662 else
14663 opts->x_target_flags &= ~mask;
14664 new_opts_set->x_target_flags |= mask;
14665 }
14666
14667 else if (cl_options[opt].var_type == CLVC_BOOLEAN)
14668 {
14669 int value;
14670
14671 if (cl_options[opt].cl_uinteger)
14672 {
14673 /* Unsigned integer argument. Code based on the function
14674 decode_cmdline_option () in opts-common.c. */
14675 value = integral_argument (p + opt_len);
14676 }
14677 else
14678 value = (opt_set_p) ? 1 : 0;
14679
14680 if (value != -1)
14681 {
14682 struct cl_decoded_option decoded;
14683
14684 /* Value range check; only implemented for numeric and boolean
14685 options at the moment. */
14686 generate_option (opt, NULL, value, CL_TARGET, &decoded);
14687 s390_handle_option (opts, new_opts_set, &decoded, input_location);
14688 set_option (opts, new_opts_set, opt, value,
14689 p + opt_len, DK_UNSPECIFIED, input_location,
14690 global_dc);
14691 }
14692 else
14693 {
14694 error ("attribute(target(\"%s\")) is unknown", orig_p);
14695 ret = false;
14696 }
14697 }
14698
14699 else if (cl_options[opt].var_type == CLVC_ENUM)
14700 {
14701 bool arg_ok;
14702 int value;
14703
14704 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
14705 if (arg_ok)
14706 set_option (opts, new_opts_set, opt, value,
14707 p + opt_len, DK_UNSPECIFIED, input_location,
14708 global_dc);
14709 else
14710 {
14711 error ("attribute(target(\"%s\")) is unknown", orig_p);
14712 ret = false;
14713 }
14714 }
14715
14716 else
14717 gcc_unreachable ();
14718 }
14719 return ret;
14720 }
14721
14722 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
14723
14724 tree
14725 s390_valid_target_attribute_tree (tree args,
14726 struct gcc_options *opts,
14727 const struct gcc_options *opts_set,
14728 bool force_pragma)
14729 {
14730 tree t = NULL_TREE;
14731 struct gcc_options new_opts_set;
14732
14733 memset (&new_opts_set, 0, sizeof (new_opts_set));
14734
14735 /* Process each of the options on the chain. */
14736 if (! s390_valid_target_attribute_inner_p (args, opts, &new_opts_set,
14737 force_pragma))
14738 return error_mark_node;
14739
14740 /* If some option was set (even if it has not changed), rerun
14741 s390_option_override_internal, and then save the options away. */
14742 if (new_opts_set.x_target_flags
14743 || new_opts_set.x_s390_arch
14744 || new_opts_set.x_s390_tune
14745 || new_opts_set.x_s390_stack_guard
14746 || new_opts_set.x_s390_stack_size
14747 || new_opts_set.x_s390_branch_cost
14748 || new_opts_set.x_s390_warn_framesize
14749 || new_opts_set.x_s390_warn_dynamicstack_p)
14750 {
14751 const unsigned char *src = (const unsigned char *)opts_set;
14752 unsigned char *dest = (unsigned char *)&new_opts_set;
14753 unsigned int i;
14754
14755 /* Merge the original option flags into the new ones. */
14756 for (i = 0; i < sizeof(*opts_set); i++)
14757 dest[i] |= src[i];
14758
14759 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
14760 s390_option_override_internal (false, opts, &new_opts_set);
14761 /* Save the current options unless we are validating options for
14762 #pragma. */
14763 t = build_target_option_node (opts);
14764 }
14765 return t;
14766 }
14767
14768 /* Hook to validate attribute((target("string"))). */
14769
14770 static bool
14771 s390_valid_target_attribute_p (tree fndecl,
14772 tree ARG_UNUSED (name),
14773 tree args,
14774 int ARG_UNUSED (flags))
14775 {
14776 struct gcc_options func_options;
14777 tree new_target, new_optimize;
14778 bool ret = true;
14779
14780 /* attribute((target("default"))) does nothing, beyond
14781 affecting multi-versioning. */
14782 if (TREE_VALUE (args)
14783 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
14784 && TREE_CHAIN (args) == NULL_TREE
14785 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
14786 return true;
14787
14788 tree old_optimize = build_optimization_node (&global_options);
14789
14790 /* Get the optimization options of the current function. */
14791 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
14792
14793 if (!func_optimize)
14794 func_optimize = old_optimize;
14795
14796 /* Init func_options. */
14797 memset (&func_options, 0, sizeof (func_options));
14798 init_options_struct (&func_options, NULL);
14799 lang_hooks.init_options_struct (&func_options);
14800
14801 cl_optimization_restore (&func_options, TREE_OPTIMIZATION (func_optimize));
14802
14803 /* Initialize func_options to the default before its target options can
14804 be set. */
14805 cl_target_option_restore (&func_options,
14806 TREE_TARGET_OPTION (target_option_default_node));
14807
14808 new_target = s390_valid_target_attribute_tree (args, &func_options,
14809 &global_options_set,
14810 (args ==
14811 current_target_pragma));
14812 new_optimize = build_optimization_node (&func_options);
14813 if (new_target == error_mark_node)
14814 ret = false;
14815 else if (fndecl && new_target)
14816 {
14817 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
14818 if (old_optimize != new_optimize)
14819 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
14820 }
14821 return ret;
14822 }
14823
14824 /* Restore targets globals from NEW_TREE and invalidate s390_previous_fndecl
14825 cache. */
14826
14827 void
14828 s390_activate_target_options (tree new_tree)
14829 {
14830 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
14831 if (TREE_TARGET_GLOBALS (new_tree))
14832 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
14833 else if (new_tree == target_option_default_node)
14834 restore_target_globals (&default_target_globals);
14835 else
14836 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
14837 s390_previous_fndecl = NULL_TREE;
14838 }
14839
14840 /* Establish appropriate back-end context for processing the function
14841 FNDECL. The argument might be NULL to indicate processing at top
14842 level, outside of any function scope. */
14843 static void
14844 s390_set_current_function (tree fndecl)
14845 {
14846 /* Only change the context if the function changes. This hook is called
14847 several times in the course of compiling a function, and we don't want to
14848 slow things down too much or call target_reinit when it isn't safe. */
14849 if (fndecl == s390_previous_fndecl)
14850 return;
14851
14852 tree old_tree;
14853 if (s390_previous_fndecl == NULL_TREE)
14854 old_tree = target_option_current_node;
14855 else if (DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl))
14856 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl);
14857 else
14858 old_tree = target_option_default_node;
14859
14860 if (fndecl == NULL_TREE)
14861 {
14862 if (old_tree != target_option_current_node)
14863 s390_activate_target_options (target_option_current_node);
14864 return;
14865 }
14866
14867 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
14868 if (new_tree == NULL_TREE)
14869 new_tree = target_option_default_node;
14870
14871 if (old_tree != new_tree)
14872 s390_activate_target_options (new_tree);
14873 s390_previous_fndecl = fndecl;
14874 }
14875 #endif
14876
14877 /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */
14878
14879 static bool
14880 s390_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
14881 unsigned int align ATTRIBUTE_UNUSED,
14882 enum by_pieces_operation op ATTRIBUTE_UNUSED,
14883 bool speed_p ATTRIBUTE_UNUSED)
14884 {
14885 return (size == 1 || size == 2
14886 || size == 4 || (TARGET_ZARCH && size == 8));
14887 }
14888
14889 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
14890
14891 static void
14892 s390_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
14893 {
14894 tree sfpc = s390_builtin_decls[S390_BUILTIN_s390_sfpc];
14895 tree efpc = s390_builtin_decls[S390_BUILTIN_s390_efpc];
14896 tree call_efpc = build_call_expr (efpc, 0);
14897 tree fenv_var = create_tmp_var_raw (unsigned_type_node);
14898
14899 #define FPC_EXCEPTION_MASK HOST_WIDE_INT_UC (0xf8000000)
14900 #define FPC_FLAGS_MASK HOST_WIDE_INT_UC (0x00f80000)
14901 #define FPC_DXC_MASK HOST_WIDE_INT_UC (0x0000ff00)
14902 #define FPC_EXCEPTION_MASK_SHIFT HOST_WIDE_INT_UC (24)
14903 #define FPC_FLAGS_SHIFT HOST_WIDE_INT_UC (16)
14904 #define FPC_DXC_SHIFT HOST_WIDE_INT_UC (8)
14905
14906 /* Generates the equivalent of feholdexcept (&fenv_var)
14907
14908 fenv_var = __builtin_s390_efpc ();
14909 __builtin_s390_sfpc (fenv_var & mask) */
14910 tree old_fpc = build2 (MODIFY_EXPR, unsigned_type_node, fenv_var, call_efpc);
14911 tree new_fpc =
14912 build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
14913 build_int_cst (unsigned_type_node,
14914 ~(FPC_DXC_MASK | FPC_FLAGS_MASK |
14915 FPC_EXCEPTION_MASK)));
14916 tree set_new_fpc = build_call_expr (sfpc, 1, new_fpc);
14917 *hold = build2 (COMPOUND_EXPR, void_type_node, old_fpc, set_new_fpc);
14918
14919 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT)
14920
14921 __builtin_s390_sfpc (__builtin_s390_efpc () & mask) */
14922 new_fpc = build2 (BIT_AND_EXPR, unsigned_type_node, call_efpc,
14923 build_int_cst (unsigned_type_node,
14924 ~(FPC_DXC_MASK | FPC_FLAGS_MASK)));
14925 *clear = build_call_expr (sfpc, 1, new_fpc);
14926
14927 /* Generates the equivalent of feupdateenv (fenv_var)
14928
14929 old_fpc = __builtin_s390_efpc ();
14930 __builtin_s390_sfpc (fenv_var);
14931 __atomic_feraiseexcept ((old_fpc & FPC_FLAGS_MASK) >> FPC_FLAGS_SHIFT); */
14932
14933 old_fpc = create_tmp_var_raw (unsigned_type_node);
14934 tree store_old_fpc = build2 (MODIFY_EXPR, void_type_node,
14935 old_fpc, call_efpc);
14936
14937 set_new_fpc = build_call_expr (sfpc, 1, fenv_var);
14938
14939 tree raise_old_except = build2 (BIT_AND_EXPR, unsigned_type_node, old_fpc,
14940 build_int_cst (unsigned_type_node,
14941 FPC_FLAGS_MASK));
14942 raise_old_except = build2 (RSHIFT_EXPR, unsigned_type_node, raise_old_except,
14943 build_int_cst (unsigned_type_node,
14944 FPC_FLAGS_SHIFT));
14945 tree atomic_feraiseexcept
14946 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
14947 raise_old_except = build_call_expr (atomic_feraiseexcept,
14948 1, raise_old_except);
14949
14950 *update = build2 (COMPOUND_EXPR, void_type_node,
14951 build2 (COMPOUND_EXPR, void_type_node,
14952 store_old_fpc, set_new_fpc),
14953 raise_old_except);
14954
14955 #undef FPC_EXCEPTION_MASK
14956 #undef FPC_FLAGS_MASK
14957 #undef FPC_DXC_MASK
14958 #undef FPC_EXCEPTION_MASK_SHIFT
14959 #undef FPC_FLAGS_SHIFT
14960 #undef FPC_DXC_SHIFT
14961 }
14962
14963 /* Return the vector mode to be used for inner mode MODE when doing
14964 vectorization. */
14965 static machine_mode
14966 s390_preferred_simd_mode (machine_mode mode)
14967 {
14968 if (TARGET_VX)
14969 switch (mode)
14970 {
14971 case DFmode:
14972 return V2DFmode;
14973 case DImode:
14974 return V2DImode;
14975 case SImode:
14976 return V4SImode;
14977 case HImode:
14978 return V8HImode;
14979 case QImode:
14980 return V16QImode;
14981 default:;
14982 }
14983 return word_mode;
14984 }
14985
14986 /* Our hardware does not require vectors to be strictly aligned. */
14987 static bool
14988 s390_support_vector_misalignment (machine_mode mode ATTRIBUTE_UNUSED,
14989 const_tree type ATTRIBUTE_UNUSED,
14990 int misalignment ATTRIBUTE_UNUSED,
14991 bool is_packed ATTRIBUTE_UNUSED)
14992 {
14993 if (TARGET_VX)
14994 return true;
14995
14996 return default_builtin_support_vector_misalignment (mode, type, misalignment,
14997 is_packed);
14998 }
14999
15000 /* The vector ABI requires vector types to be aligned on an 8 byte
15001 boundary (our stack alignment). However, we allow this to be
15002 overriden by the user, while this definitely breaks the ABI. */
15003 static HOST_WIDE_INT
15004 s390_vector_alignment (const_tree type)
15005 {
15006 if (!TARGET_VX_ABI)
15007 return default_vector_alignment (type);
15008
15009 if (TYPE_USER_ALIGN (type))
15010 return TYPE_ALIGN (type);
15011
15012 return MIN (64, tree_to_shwi (TYPE_SIZE (type)));
15013 }
15014
15015 #ifdef HAVE_AS_MACHINE_MACHINEMODE
15016 /* Implement TARGET_ASM_FILE_START. */
15017 static void
15018 s390_asm_file_start (void)
15019 {
15020 default_file_start ();
15021 s390_asm_output_machine_for_arch (asm_out_file);
15022 }
15023 #endif
15024
15025 /* Implement TARGET_ASM_FILE_END. */
15026 static void
15027 s390_asm_file_end (void)
15028 {
15029 #ifdef HAVE_AS_GNU_ATTRIBUTE
15030 varpool_node *vnode;
15031 cgraph_node *cnode;
15032
15033 FOR_EACH_VARIABLE (vnode)
15034 if (TREE_PUBLIC (vnode->decl))
15035 s390_check_type_for_vector_abi (TREE_TYPE (vnode->decl), false, false);
15036
15037 FOR_EACH_FUNCTION (cnode)
15038 if (TREE_PUBLIC (cnode->decl))
15039 s390_check_type_for_vector_abi (TREE_TYPE (cnode->decl), false, false);
15040
15041
15042 if (s390_vector_abi != 0)
15043 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
15044 s390_vector_abi);
15045 #endif
15046 file_end_indicate_exec_stack ();
15047
15048 if (flag_split_stack)
15049 file_end_indicate_split_stack ();
15050 }
15051
15052 /* Return true if TYPE is a vector bool type. */
15053 static inline bool
15054 s390_vector_bool_type_p (const_tree type)
15055 {
15056 return TYPE_VECTOR_OPAQUE (type);
15057 }
15058
15059 /* Return the diagnostic message string if the binary operation OP is
15060 not permitted on TYPE1 and TYPE2, NULL otherwise. */
15061 static const char*
15062 s390_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2)
15063 {
15064 bool bool1_p, bool2_p;
15065 bool plusminus_p;
15066 bool muldiv_p;
15067 bool compare_p;
15068 machine_mode mode1, mode2;
15069
15070 if (!TARGET_ZVECTOR)
15071 return NULL;
15072
15073 if (!VECTOR_TYPE_P (type1) || !VECTOR_TYPE_P (type2))
15074 return NULL;
15075
15076 bool1_p = s390_vector_bool_type_p (type1);
15077 bool2_p = s390_vector_bool_type_p (type2);
15078
15079 /* Mixing signed and unsigned types is forbidden for all
15080 operators. */
15081 if (!bool1_p && !bool2_p
15082 && TYPE_UNSIGNED (type1) != TYPE_UNSIGNED (type2))
15083 return N_("types differ in signess");
15084
15085 plusminus_p = (op == PLUS_EXPR || op == MINUS_EXPR);
15086 muldiv_p = (op == MULT_EXPR || op == RDIV_EXPR || op == TRUNC_DIV_EXPR
15087 || op == CEIL_DIV_EXPR || op == FLOOR_DIV_EXPR
15088 || op == ROUND_DIV_EXPR);
15089 compare_p = (op == LT_EXPR || op == LE_EXPR || op == GT_EXPR || op == GE_EXPR
15090 || op == EQ_EXPR || op == NE_EXPR);
15091
15092 if (bool1_p && bool2_p && (plusminus_p || muldiv_p))
15093 return N_("binary operator does not support two vector bool operands");
15094
15095 if (bool1_p != bool2_p && (muldiv_p || compare_p))
15096 return N_("binary operator does not support vector bool operand");
15097
15098 mode1 = TYPE_MODE (type1);
15099 mode2 = TYPE_MODE (type2);
15100
15101 if (bool1_p != bool2_p && plusminus_p
15102 && (GET_MODE_CLASS (mode1) == MODE_VECTOR_FLOAT
15103 || GET_MODE_CLASS (mode2) == MODE_VECTOR_FLOAT))
15104 return N_("binary operator does not support mixing vector "
15105 "bool with floating point vector operands");
15106
15107 return NULL;
15108 }
15109
15110 /* Initialize GCC target structure. */
15111
15112 #undef TARGET_ASM_ALIGNED_HI_OP
15113 #define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
15114 #undef TARGET_ASM_ALIGNED_DI_OP
15115 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
15116 #undef TARGET_ASM_INTEGER
15117 #define TARGET_ASM_INTEGER s390_assemble_integer
15118
15119 #undef TARGET_ASM_OPEN_PAREN
15120 #define TARGET_ASM_OPEN_PAREN ""
15121
15122 #undef TARGET_ASM_CLOSE_PAREN
15123 #define TARGET_ASM_CLOSE_PAREN ""
15124
15125 #undef TARGET_OPTION_OVERRIDE
15126 #define TARGET_OPTION_OVERRIDE s390_option_override
15127
15128 #ifdef TARGET_THREAD_SSP_OFFSET
15129 #undef TARGET_STACK_PROTECT_GUARD
15130 #define TARGET_STACK_PROTECT_GUARD hook_tree_void_null
15131 #endif
15132
15133 #undef TARGET_ENCODE_SECTION_INFO
15134 #define TARGET_ENCODE_SECTION_INFO s390_encode_section_info
15135
15136 #undef TARGET_SCALAR_MODE_SUPPORTED_P
15137 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
15138
15139 #ifdef HAVE_AS_TLS
15140 #undef TARGET_HAVE_TLS
15141 #define TARGET_HAVE_TLS true
15142 #endif
15143 #undef TARGET_CANNOT_FORCE_CONST_MEM
15144 #define TARGET_CANNOT_FORCE_CONST_MEM s390_cannot_force_const_mem
15145
15146 #undef TARGET_DELEGITIMIZE_ADDRESS
15147 #define TARGET_DELEGITIMIZE_ADDRESS s390_delegitimize_address
15148
15149 #undef TARGET_LEGITIMIZE_ADDRESS
15150 #define TARGET_LEGITIMIZE_ADDRESS s390_legitimize_address
15151
15152 #undef TARGET_RETURN_IN_MEMORY
15153 #define TARGET_RETURN_IN_MEMORY s390_return_in_memory
15154
15155 #undef TARGET_INIT_BUILTINS
15156 #define TARGET_INIT_BUILTINS s390_init_builtins
15157 #undef TARGET_EXPAND_BUILTIN
15158 #define TARGET_EXPAND_BUILTIN s390_expand_builtin
15159 #undef TARGET_BUILTIN_DECL
15160 #define TARGET_BUILTIN_DECL s390_builtin_decl
15161
15162 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
15163 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA s390_output_addr_const_extra
15164
15165 #undef TARGET_ASM_OUTPUT_MI_THUNK
15166 #define TARGET_ASM_OUTPUT_MI_THUNK s390_output_mi_thunk
15167 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
15168 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
15169
15170 #undef TARGET_SCHED_ADJUST_PRIORITY
15171 #define TARGET_SCHED_ADJUST_PRIORITY s390_adjust_priority
15172 #undef TARGET_SCHED_ISSUE_RATE
15173 #define TARGET_SCHED_ISSUE_RATE s390_issue_rate
15174 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
15175 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD s390_first_cycle_multipass_dfa_lookahead
15176
15177 #undef TARGET_SCHED_VARIABLE_ISSUE
15178 #define TARGET_SCHED_VARIABLE_ISSUE s390_sched_variable_issue
15179 #undef TARGET_SCHED_REORDER
15180 #define TARGET_SCHED_REORDER s390_sched_reorder
15181 #undef TARGET_SCHED_INIT
15182 #define TARGET_SCHED_INIT s390_sched_init
15183
15184 #undef TARGET_CANNOT_COPY_INSN_P
15185 #define TARGET_CANNOT_COPY_INSN_P s390_cannot_copy_insn_p
15186 #undef TARGET_RTX_COSTS
15187 #define TARGET_RTX_COSTS s390_rtx_costs
15188 #undef TARGET_ADDRESS_COST
15189 #define TARGET_ADDRESS_COST s390_address_cost
15190 #undef TARGET_REGISTER_MOVE_COST
15191 #define TARGET_REGISTER_MOVE_COST s390_register_move_cost
15192 #undef TARGET_MEMORY_MOVE_COST
15193 #define TARGET_MEMORY_MOVE_COST s390_memory_move_cost
15194
15195 #undef TARGET_MACHINE_DEPENDENT_REORG
15196 #define TARGET_MACHINE_DEPENDENT_REORG s390_reorg
15197
15198 #undef TARGET_VALID_POINTER_MODE
15199 #define TARGET_VALID_POINTER_MODE s390_valid_pointer_mode
15200
15201 #undef TARGET_BUILD_BUILTIN_VA_LIST
15202 #define TARGET_BUILD_BUILTIN_VA_LIST s390_build_builtin_va_list
15203 #undef TARGET_EXPAND_BUILTIN_VA_START
15204 #define TARGET_EXPAND_BUILTIN_VA_START s390_va_start
15205 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
15206 #define TARGET_GIMPLIFY_VA_ARG_EXPR s390_gimplify_va_arg
15207
15208 #undef TARGET_PROMOTE_FUNCTION_MODE
15209 #define TARGET_PROMOTE_FUNCTION_MODE s390_promote_function_mode
15210 #undef TARGET_PASS_BY_REFERENCE
15211 #define TARGET_PASS_BY_REFERENCE s390_pass_by_reference
15212
15213 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
15214 #define TARGET_FUNCTION_OK_FOR_SIBCALL s390_function_ok_for_sibcall
15215 #undef TARGET_FUNCTION_ARG
15216 #define TARGET_FUNCTION_ARG s390_function_arg
15217 #undef TARGET_FUNCTION_ARG_ADVANCE
15218 #define TARGET_FUNCTION_ARG_ADVANCE s390_function_arg_advance
15219 #undef TARGET_FUNCTION_VALUE
15220 #define TARGET_FUNCTION_VALUE s390_function_value
15221 #undef TARGET_LIBCALL_VALUE
15222 #define TARGET_LIBCALL_VALUE s390_libcall_value
15223 #undef TARGET_STRICT_ARGUMENT_NAMING
15224 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
15225
15226 #undef TARGET_KEEP_LEAF_WHEN_PROFILED
15227 #define TARGET_KEEP_LEAF_WHEN_PROFILED s390_keep_leaf_when_profiled
15228
15229 #undef TARGET_FIXED_CONDITION_CODE_REGS
15230 #define TARGET_FIXED_CONDITION_CODE_REGS s390_fixed_condition_code_regs
15231
15232 #undef TARGET_CC_MODES_COMPATIBLE
15233 #define TARGET_CC_MODES_COMPATIBLE s390_cc_modes_compatible
15234
15235 #undef TARGET_INVALID_WITHIN_DOLOOP
15236 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null
15237
15238 #ifdef HAVE_AS_TLS
15239 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
15240 #define TARGET_ASM_OUTPUT_DWARF_DTPREL s390_output_dwarf_dtprel
15241 #endif
15242
15243 #undef TARGET_DWARF_FRAME_REG_MODE
15244 #define TARGET_DWARF_FRAME_REG_MODE s390_dwarf_frame_reg_mode
15245
15246 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
15247 #undef TARGET_MANGLE_TYPE
15248 #define TARGET_MANGLE_TYPE s390_mangle_type
15249 #endif
15250
15251 #undef TARGET_SCALAR_MODE_SUPPORTED_P
15252 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
15253
15254 #undef TARGET_VECTOR_MODE_SUPPORTED_P
15255 #define TARGET_VECTOR_MODE_SUPPORTED_P s390_vector_mode_supported_p
15256
15257 #undef TARGET_PREFERRED_RELOAD_CLASS
15258 #define TARGET_PREFERRED_RELOAD_CLASS s390_preferred_reload_class
15259
15260 #undef TARGET_SECONDARY_RELOAD
15261 #define TARGET_SECONDARY_RELOAD s390_secondary_reload
15262
15263 #undef TARGET_LIBGCC_CMP_RETURN_MODE
15264 #define TARGET_LIBGCC_CMP_RETURN_MODE s390_libgcc_cmp_return_mode
15265
15266 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
15267 #define TARGET_LIBGCC_SHIFT_COUNT_MODE s390_libgcc_shift_count_mode
15268
15269 #undef TARGET_LEGITIMATE_ADDRESS_P
15270 #define TARGET_LEGITIMATE_ADDRESS_P s390_legitimate_address_p
15271
15272 #undef TARGET_LEGITIMATE_CONSTANT_P
15273 #define TARGET_LEGITIMATE_CONSTANT_P s390_legitimate_constant_p
15274
15275 #undef TARGET_LRA_P
15276 #define TARGET_LRA_P s390_lra_p
15277
15278 #undef TARGET_CAN_ELIMINATE
15279 #define TARGET_CAN_ELIMINATE s390_can_eliminate
15280
15281 #undef TARGET_CONDITIONAL_REGISTER_USAGE
15282 #define TARGET_CONDITIONAL_REGISTER_USAGE s390_conditional_register_usage
15283
15284 #undef TARGET_LOOP_UNROLL_ADJUST
15285 #define TARGET_LOOP_UNROLL_ADJUST s390_loop_unroll_adjust
15286
15287 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
15288 #define TARGET_ASM_TRAMPOLINE_TEMPLATE s390_asm_trampoline_template
15289 #undef TARGET_TRAMPOLINE_INIT
15290 #define TARGET_TRAMPOLINE_INIT s390_trampoline_init
15291
15292 #undef TARGET_UNWIND_WORD_MODE
15293 #define TARGET_UNWIND_WORD_MODE s390_unwind_word_mode
15294
15295 #undef TARGET_CANONICALIZE_COMPARISON
15296 #define TARGET_CANONICALIZE_COMPARISON s390_canonicalize_comparison
15297
15298 #undef TARGET_HARD_REGNO_SCRATCH_OK
15299 #define TARGET_HARD_REGNO_SCRATCH_OK s390_hard_regno_scratch_ok
15300
15301 #undef TARGET_ATTRIBUTE_TABLE
15302 #define TARGET_ATTRIBUTE_TABLE s390_attribute_table
15303
15304 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
15305 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
15306
15307 #undef TARGET_SET_UP_BY_PROLOGUE
15308 #define TARGET_SET_UP_BY_PROLOGUE s300_set_up_by_prologue
15309
15310 #undef TARGET_EXTRA_LIVE_ON_ENTRY
15311 #define TARGET_EXTRA_LIVE_ON_ENTRY s390_live_on_entry
15312
15313 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
15314 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
15315 s390_use_by_pieces_infrastructure_p
15316
15317 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
15318 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV s390_atomic_assign_expand_fenv
15319
15320 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
15321 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN s390_invalid_arg_for_unprototyped_fn
15322
15323 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
15324 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE s390_preferred_simd_mode
15325
15326 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
15327 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT s390_support_vector_misalignment
15328
15329 #undef TARGET_VECTOR_ALIGNMENT
15330 #define TARGET_VECTOR_ALIGNMENT s390_vector_alignment
15331
15332 #undef TARGET_INVALID_BINARY_OP
15333 #define TARGET_INVALID_BINARY_OP s390_invalid_binary_op
15334
15335 #ifdef HAVE_AS_MACHINE_MACHINEMODE
15336 #undef TARGET_ASM_FILE_START
15337 #define TARGET_ASM_FILE_START s390_asm_file_start
15338 #endif
15339
15340 #undef TARGET_ASM_FILE_END
15341 #define TARGET_ASM_FILE_END s390_asm_file_end
15342
15343 #if S390_USE_TARGET_ATTRIBUTE
15344 #undef TARGET_SET_CURRENT_FUNCTION
15345 #define TARGET_SET_CURRENT_FUNCTION s390_set_current_function
15346
15347 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
15348 #define TARGET_OPTION_VALID_ATTRIBUTE_P s390_valid_target_attribute_p
15349 #endif
15350
15351 #undef TARGET_OPTION_RESTORE
15352 #define TARGET_OPTION_RESTORE s390_function_specific_restore
15353
15354 struct gcc_target targetm = TARGET_INITIALIZER;
15355
15356 #include "gt-s390.h"