]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/s390/s390.c
ed8eefa31f7a767740ce950da796debc10ec2fd5
[thirdparty/gcc.git] / gcc / config / s390 / s390.c
1 /* Subroutines used for code generation on IBM S/390 and zSeries
2 Copyright (C) 1999-2013 Free Software Foundation, Inc.
3 Contributed by Hartmut Penner (hpenner@de.ibm.com) and
4 Ulrich Weigand (uweigand@de.ibm.com) and
5 Andreas Krebbel (Andreas.Krebbel@de.ibm.com).
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "tm_p.h"
30 #include "regs.h"
31 #include "hard-reg-set.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-attr.h"
36 #include "flags.h"
37 #include "except.h"
38 #include "function.h"
39 #include "recog.h"
40 #include "expr.h"
41 #include "reload.h"
42 #include "diagnostic-core.h"
43 #include "basic-block.h"
44 #include "ggc.h"
45 #include "target.h"
46 #include "target-def.h"
47 #include "debug.h"
48 #include "langhooks.h"
49 #include "optabs.h"
50 #include "gimple.h"
51 #include "gimplify.h"
52 #include "df.h"
53 #include "params.h"
54 #include "cfgloop.h"
55 #include "opts.h"
56
57 /* Define the specific costs for a given cpu. */
58
59 struct processor_costs
60 {
61 /* multiplication */
62 const int m; /* cost of an M instruction. */
63 const int mghi; /* cost of an MGHI instruction. */
64 const int mh; /* cost of an MH instruction. */
65 const int mhi; /* cost of an MHI instruction. */
66 const int ml; /* cost of an ML instruction. */
67 const int mr; /* cost of an MR instruction. */
68 const int ms; /* cost of an MS instruction. */
69 const int msg; /* cost of an MSG instruction. */
70 const int msgf; /* cost of an MSGF instruction. */
71 const int msgfr; /* cost of an MSGFR instruction. */
72 const int msgr; /* cost of an MSGR instruction. */
73 const int msr; /* cost of an MSR instruction. */
74 const int mult_df; /* cost of multiplication in DFmode. */
75 const int mxbr;
76 /* square root */
77 const int sqxbr; /* cost of square root in TFmode. */
78 const int sqdbr; /* cost of square root in DFmode. */
79 const int sqebr; /* cost of square root in SFmode. */
80 /* multiply and add */
81 const int madbr; /* cost of multiply and add in DFmode. */
82 const int maebr; /* cost of multiply and add in SFmode. */
83 /* division */
84 const int dxbr;
85 const int ddbr;
86 const int debr;
87 const int dlgr;
88 const int dlr;
89 const int dr;
90 const int dsgfr;
91 const int dsgr;
92 };
93
94 const struct processor_costs *s390_cost;
95
96 static const
97 struct processor_costs z900_cost =
98 {
99 COSTS_N_INSNS (5), /* M */
100 COSTS_N_INSNS (10), /* MGHI */
101 COSTS_N_INSNS (5), /* MH */
102 COSTS_N_INSNS (4), /* MHI */
103 COSTS_N_INSNS (5), /* ML */
104 COSTS_N_INSNS (5), /* MR */
105 COSTS_N_INSNS (4), /* MS */
106 COSTS_N_INSNS (15), /* MSG */
107 COSTS_N_INSNS (7), /* MSGF */
108 COSTS_N_INSNS (7), /* MSGFR */
109 COSTS_N_INSNS (10), /* MSGR */
110 COSTS_N_INSNS (4), /* MSR */
111 COSTS_N_INSNS (7), /* multiplication in DFmode */
112 COSTS_N_INSNS (13), /* MXBR */
113 COSTS_N_INSNS (136), /* SQXBR */
114 COSTS_N_INSNS (44), /* SQDBR */
115 COSTS_N_INSNS (35), /* SQEBR */
116 COSTS_N_INSNS (18), /* MADBR */
117 COSTS_N_INSNS (13), /* MAEBR */
118 COSTS_N_INSNS (134), /* DXBR */
119 COSTS_N_INSNS (30), /* DDBR */
120 COSTS_N_INSNS (27), /* DEBR */
121 COSTS_N_INSNS (220), /* DLGR */
122 COSTS_N_INSNS (34), /* DLR */
123 COSTS_N_INSNS (34), /* DR */
124 COSTS_N_INSNS (32), /* DSGFR */
125 COSTS_N_INSNS (32), /* DSGR */
126 };
127
128 static const
129 struct processor_costs z990_cost =
130 {
131 COSTS_N_INSNS (4), /* M */
132 COSTS_N_INSNS (2), /* MGHI */
133 COSTS_N_INSNS (2), /* MH */
134 COSTS_N_INSNS (2), /* MHI */
135 COSTS_N_INSNS (4), /* ML */
136 COSTS_N_INSNS (4), /* MR */
137 COSTS_N_INSNS (5), /* MS */
138 COSTS_N_INSNS (6), /* MSG */
139 COSTS_N_INSNS (4), /* MSGF */
140 COSTS_N_INSNS (4), /* MSGFR */
141 COSTS_N_INSNS (4), /* MSGR */
142 COSTS_N_INSNS (4), /* MSR */
143 COSTS_N_INSNS (1), /* multiplication in DFmode */
144 COSTS_N_INSNS (28), /* MXBR */
145 COSTS_N_INSNS (130), /* SQXBR */
146 COSTS_N_INSNS (66), /* SQDBR */
147 COSTS_N_INSNS (38), /* SQEBR */
148 COSTS_N_INSNS (1), /* MADBR */
149 COSTS_N_INSNS (1), /* MAEBR */
150 COSTS_N_INSNS (60), /* DXBR */
151 COSTS_N_INSNS (40), /* DDBR */
152 COSTS_N_INSNS (26), /* DEBR */
153 COSTS_N_INSNS (176), /* DLGR */
154 COSTS_N_INSNS (31), /* DLR */
155 COSTS_N_INSNS (31), /* DR */
156 COSTS_N_INSNS (31), /* DSGFR */
157 COSTS_N_INSNS (31), /* DSGR */
158 };
159
160 static const
161 struct processor_costs z9_109_cost =
162 {
163 COSTS_N_INSNS (4), /* M */
164 COSTS_N_INSNS (2), /* MGHI */
165 COSTS_N_INSNS (2), /* MH */
166 COSTS_N_INSNS (2), /* MHI */
167 COSTS_N_INSNS (4), /* ML */
168 COSTS_N_INSNS (4), /* MR */
169 COSTS_N_INSNS (5), /* MS */
170 COSTS_N_INSNS (6), /* MSG */
171 COSTS_N_INSNS (4), /* MSGF */
172 COSTS_N_INSNS (4), /* MSGFR */
173 COSTS_N_INSNS (4), /* MSGR */
174 COSTS_N_INSNS (4), /* MSR */
175 COSTS_N_INSNS (1), /* multiplication in DFmode */
176 COSTS_N_INSNS (28), /* MXBR */
177 COSTS_N_INSNS (130), /* SQXBR */
178 COSTS_N_INSNS (66), /* SQDBR */
179 COSTS_N_INSNS (38), /* SQEBR */
180 COSTS_N_INSNS (1), /* MADBR */
181 COSTS_N_INSNS (1), /* MAEBR */
182 COSTS_N_INSNS (60), /* DXBR */
183 COSTS_N_INSNS (40), /* DDBR */
184 COSTS_N_INSNS (26), /* DEBR */
185 COSTS_N_INSNS (30), /* DLGR */
186 COSTS_N_INSNS (23), /* DLR */
187 COSTS_N_INSNS (23), /* DR */
188 COSTS_N_INSNS (24), /* DSGFR */
189 COSTS_N_INSNS (24), /* DSGR */
190 };
191
192 static const
193 struct processor_costs z10_cost =
194 {
195 COSTS_N_INSNS (10), /* M */
196 COSTS_N_INSNS (10), /* MGHI */
197 COSTS_N_INSNS (10), /* MH */
198 COSTS_N_INSNS (10), /* MHI */
199 COSTS_N_INSNS (10), /* ML */
200 COSTS_N_INSNS (10), /* MR */
201 COSTS_N_INSNS (10), /* MS */
202 COSTS_N_INSNS (10), /* MSG */
203 COSTS_N_INSNS (10), /* MSGF */
204 COSTS_N_INSNS (10), /* MSGFR */
205 COSTS_N_INSNS (10), /* MSGR */
206 COSTS_N_INSNS (10), /* MSR */
207 COSTS_N_INSNS (1) , /* multiplication in DFmode */
208 COSTS_N_INSNS (50), /* MXBR */
209 COSTS_N_INSNS (120), /* SQXBR */
210 COSTS_N_INSNS (52), /* SQDBR */
211 COSTS_N_INSNS (38), /* SQEBR */
212 COSTS_N_INSNS (1), /* MADBR */
213 COSTS_N_INSNS (1), /* MAEBR */
214 COSTS_N_INSNS (111), /* DXBR */
215 COSTS_N_INSNS (39), /* DDBR */
216 COSTS_N_INSNS (32), /* DEBR */
217 COSTS_N_INSNS (160), /* DLGR */
218 COSTS_N_INSNS (71), /* DLR */
219 COSTS_N_INSNS (71), /* DR */
220 COSTS_N_INSNS (71), /* DSGFR */
221 COSTS_N_INSNS (71), /* DSGR */
222 };
223
224 static const
225 struct processor_costs z196_cost =
226 {
227 COSTS_N_INSNS (7), /* M */
228 COSTS_N_INSNS (5), /* MGHI */
229 COSTS_N_INSNS (5), /* MH */
230 COSTS_N_INSNS (5), /* MHI */
231 COSTS_N_INSNS (7), /* ML */
232 COSTS_N_INSNS (7), /* MR */
233 COSTS_N_INSNS (6), /* MS */
234 COSTS_N_INSNS (8), /* MSG */
235 COSTS_N_INSNS (6), /* MSGF */
236 COSTS_N_INSNS (6), /* MSGFR */
237 COSTS_N_INSNS (8), /* MSGR */
238 COSTS_N_INSNS (6), /* MSR */
239 COSTS_N_INSNS (1) , /* multiplication in DFmode */
240 COSTS_N_INSNS (40), /* MXBR B+40 */
241 COSTS_N_INSNS (100), /* SQXBR B+100 */
242 COSTS_N_INSNS (42), /* SQDBR B+42 */
243 COSTS_N_INSNS (28), /* SQEBR B+28 */
244 COSTS_N_INSNS (1), /* MADBR B */
245 COSTS_N_INSNS (1), /* MAEBR B */
246 COSTS_N_INSNS (101), /* DXBR B+101 */
247 COSTS_N_INSNS (29), /* DDBR */
248 COSTS_N_INSNS (22), /* DEBR */
249 COSTS_N_INSNS (160), /* DLGR cracked */
250 COSTS_N_INSNS (160), /* DLR cracked */
251 COSTS_N_INSNS (160), /* DR expanded */
252 COSTS_N_INSNS (160), /* DSGFR cracked */
253 COSTS_N_INSNS (160), /* DSGR cracked */
254 };
255
256 static const
257 struct processor_costs zEC12_cost =
258 {
259 COSTS_N_INSNS (7), /* M */
260 COSTS_N_INSNS (5), /* MGHI */
261 COSTS_N_INSNS (5), /* MH */
262 COSTS_N_INSNS (5), /* MHI */
263 COSTS_N_INSNS (7), /* ML */
264 COSTS_N_INSNS (7), /* MR */
265 COSTS_N_INSNS (6), /* MS */
266 COSTS_N_INSNS (8), /* MSG */
267 COSTS_N_INSNS (6), /* MSGF */
268 COSTS_N_INSNS (6), /* MSGFR */
269 COSTS_N_INSNS (8), /* MSGR */
270 COSTS_N_INSNS (6), /* MSR */
271 COSTS_N_INSNS (1) , /* multiplication in DFmode */
272 COSTS_N_INSNS (40), /* MXBR B+40 */
273 COSTS_N_INSNS (100), /* SQXBR B+100 */
274 COSTS_N_INSNS (42), /* SQDBR B+42 */
275 COSTS_N_INSNS (28), /* SQEBR B+28 */
276 COSTS_N_INSNS (1), /* MADBR B */
277 COSTS_N_INSNS (1), /* MAEBR B */
278 COSTS_N_INSNS (131), /* DXBR B+131 */
279 COSTS_N_INSNS (29), /* DDBR */
280 COSTS_N_INSNS (22), /* DEBR */
281 COSTS_N_INSNS (160), /* DLGR cracked */
282 COSTS_N_INSNS (160), /* DLR cracked */
283 COSTS_N_INSNS (160), /* DR expanded */
284 COSTS_N_INSNS (160), /* DSGFR cracked */
285 COSTS_N_INSNS (160), /* DSGR cracked */
286 };
287
288 extern int reload_completed;
289
290 /* Kept up to date using the SCHED_VARIABLE_ISSUE hook. */
291 static rtx last_scheduled_insn;
292
293 /* Structure used to hold the components of a S/390 memory
294 address. A legitimate address on S/390 is of the general
295 form
296 base + index + displacement
297 where any of the components is optional.
298
299 base and index are registers of the class ADDR_REGS,
300 displacement is an unsigned 12-bit immediate constant. */
301
302 struct s390_address
303 {
304 rtx base;
305 rtx indx;
306 rtx disp;
307 bool pointer;
308 bool literal_pool;
309 };
310
311 /* The following structure is embedded in the machine
312 specific part of struct function. */
313
314 struct GTY (()) s390_frame_layout
315 {
316 /* Offset within stack frame. */
317 HOST_WIDE_INT gprs_offset;
318 HOST_WIDE_INT f0_offset;
319 HOST_WIDE_INT f4_offset;
320 HOST_WIDE_INT f8_offset;
321 HOST_WIDE_INT backchain_offset;
322
323 /* Number of first and last gpr where slots in the register
324 save area are reserved for. */
325 int first_save_gpr_slot;
326 int last_save_gpr_slot;
327
328 /* Location (FP register number) where GPRs (r0-r15) should
329 be saved to.
330 0 - does not need to be saved at all
331 -1 - stack slot */
332 signed char gpr_save_slots[16];
333
334 /* Number of first and last gpr to be saved, restored. */
335 int first_save_gpr;
336 int first_restore_gpr;
337 int last_save_gpr;
338 int last_restore_gpr;
339
340 /* Bits standing for floating point registers. Set, if the
341 respective register has to be saved. Starting with reg 16 (f0)
342 at the rightmost bit.
343 Bit 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
344 fpr 15 13 11 9 14 12 10 8 7 5 3 1 6 4 2 0
345 reg 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 */
346 unsigned int fpr_bitmap;
347
348 /* Number of floating point registers f8-f15 which must be saved. */
349 int high_fprs;
350
351 /* Set if return address needs to be saved.
352 This flag is set by s390_return_addr_rtx if it could not use
353 the initial value of r14 and therefore depends on r14 saved
354 to the stack. */
355 bool save_return_addr_p;
356
357 /* Size of stack frame. */
358 HOST_WIDE_INT frame_size;
359 };
360
361 /* Define the structure for the machine field in struct function. */
362
363 struct GTY(()) machine_function
364 {
365 struct s390_frame_layout frame_layout;
366
367 /* Literal pool base register. */
368 rtx base_reg;
369
370 /* True if we may need to perform branch splitting. */
371 bool split_branches_pending_p;
372
373 /* Some local-dynamic TLS symbol name. */
374 const char *some_ld_name;
375
376 bool has_landing_pad_p;
377
378 /* True if the current function may contain a tbegin clobbering
379 FPRs. */
380 bool tbegin_p;
381 };
382
383 /* Few accessor macros for struct cfun->machine->s390_frame_layout. */
384
385 #define cfun_frame_layout (cfun->machine->frame_layout)
386 #define cfun_save_high_fprs_p (!!cfun_frame_layout.high_fprs)
387 #define cfun_save_arg_fprs_p (!!(TARGET_64BIT \
388 ? cfun_frame_layout.fpr_bitmap & 0x0f \
389 : cfun_frame_layout.fpr_bitmap & 0x03))
390 #define cfun_gprs_save_area_size ((cfun_frame_layout.last_save_gpr_slot - \
391 cfun_frame_layout.first_save_gpr_slot + 1) * UNITS_PER_LONG)
392 #define cfun_set_fpr_save(REGNO) (cfun->machine->frame_layout.fpr_bitmap |= \
393 (1 << (REGNO - FPR0_REGNUM)))
394 #define cfun_fpr_save_p(REGNO) (!!(cfun->machine->frame_layout.fpr_bitmap & \
395 (1 << (REGNO - FPR0_REGNUM))))
396 #define cfun_gpr_save_slot(REGNO) \
397 cfun->machine->frame_layout.gpr_save_slots[REGNO]
398
399 /* Number of GPRs and FPRs used for argument passing. */
400 #define GP_ARG_NUM_REG 5
401 #define FP_ARG_NUM_REG (TARGET_64BIT? 4 : 2)
402
403 /* A couple of shortcuts. */
404 #define CONST_OK_FOR_J(x) \
405 CONST_OK_FOR_CONSTRAINT_P((x), 'J', "J")
406 #define CONST_OK_FOR_K(x) \
407 CONST_OK_FOR_CONSTRAINT_P((x), 'K', "K")
408 #define CONST_OK_FOR_Os(x) \
409 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Os")
410 #define CONST_OK_FOR_Op(x) \
411 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Op")
412 #define CONST_OK_FOR_On(x) \
413 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "On")
414
415 #define REGNO_PAIR_OK(REGNO, MODE) \
416 (HARD_REGNO_NREGS ((REGNO), (MODE)) == 1 || !((REGNO) & 1))
417
418 /* That's the read ahead of the dynamic branch prediction unit in
419 bytes on a z10 (or higher) CPU. */
420 #define PREDICT_DISTANCE (TARGET_Z10 ? 384 : 2048)
421
422 /* Return the alignment for LABEL. We default to the -falign-labels
423 value except for the literal pool base label. */
424 int
425 s390_label_align (rtx label)
426 {
427 rtx prev_insn = prev_active_insn (label);
428
429 if (prev_insn == NULL_RTX)
430 goto old;
431
432 prev_insn = single_set (prev_insn);
433
434 if (prev_insn == NULL_RTX)
435 goto old;
436
437 prev_insn = SET_SRC (prev_insn);
438
439 /* Don't align literal pool base labels. */
440 if (GET_CODE (prev_insn) == UNSPEC
441 && XINT (prev_insn, 1) == UNSPEC_MAIN_BASE)
442 return 0;
443
444 old:
445 return align_labels_log;
446 }
447
448 static enum machine_mode
449 s390_libgcc_cmp_return_mode (void)
450 {
451 return TARGET_64BIT ? DImode : SImode;
452 }
453
454 static enum machine_mode
455 s390_libgcc_shift_count_mode (void)
456 {
457 return TARGET_64BIT ? DImode : SImode;
458 }
459
460 static enum machine_mode
461 s390_unwind_word_mode (void)
462 {
463 return TARGET_64BIT ? DImode : SImode;
464 }
465
466 /* Return true if the back end supports mode MODE. */
467 static bool
468 s390_scalar_mode_supported_p (enum machine_mode mode)
469 {
470 /* In contrast to the default implementation reject TImode constants on 31bit
471 TARGET_ZARCH for ABI compliance. */
472 if (!TARGET_64BIT && TARGET_ZARCH && mode == TImode)
473 return false;
474
475 if (DECIMAL_FLOAT_MODE_P (mode))
476 return default_decimal_float_supported_p ();
477
478 return default_scalar_mode_supported_p (mode);
479 }
480
481 /* Set the has_landing_pad_p flag in struct machine_function to VALUE. */
482
483 void
484 s390_set_has_landing_pad_p (bool value)
485 {
486 cfun->machine->has_landing_pad_p = value;
487 }
488
489 /* If two condition code modes are compatible, return a condition code
490 mode which is compatible with both. Otherwise, return
491 VOIDmode. */
492
493 static enum machine_mode
494 s390_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
495 {
496 if (m1 == m2)
497 return m1;
498
499 switch (m1)
500 {
501 case CCZmode:
502 if (m2 == CCUmode || m2 == CCTmode || m2 == CCZ1mode
503 || m2 == CCSmode || m2 == CCSRmode || m2 == CCURmode)
504 return m2;
505 return VOIDmode;
506
507 case CCSmode:
508 case CCUmode:
509 case CCTmode:
510 case CCSRmode:
511 case CCURmode:
512 case CCZ1mode:
513 if (m2 == CCZmode)
514 return m1;
515
516 return VOIDmode;
517
518 default:
519 return VOIDmode;
520 }
521 return VOIDmode;
522 }
523
524 /* Return true if SET either doesn't set the CC register, or else
525 the source and destination have matching CC modes and that
526 CC mode is at least as constrained as REQ_MODE. */
527
528 static bool
529 s390_match_ccmode_set (rtx set, enum machine_mode req_mode)
530 {
531 enum machine_mode set_mode;
532
533 gcc_assert (GET_CODE (set) == SET);
534
535 if (GET_CODE (SET_DEST (set)) != REG || !CC_REGNO_P (REGNO (SET_DEST (set))))
536 return 1;
537
538 set_mode = GET_MODE (SET_DEST (set));
539 switch (set_mode)
540 {
541 case CCSmode:
542 case CCSRmode:
543 case CCUmode:
544 case CCURmode:
545 case CCLmode:
546 case CCL1mode:
547 case CCL2mode:
548 case CCL3mode:
549 case CCT1mode:
550 case CCT2mode:
551 case CCT3mode:
552 if (req_mode != set_mode)
553 return 0;
554 break;
555
556 case CCZmode:
557 if (req_mode != CCSmode && req_mode != CCUmode && req_mode != CCTmode
558 && req_mode != CCSRmode && req_mode != CCURmode)
559 return 0;
560 break;
561
562 case CCAPmode:
563 case CCANmode:
564 if (req_mode != CCAmode)
565 return 0;
566 break;
567
568 default:
569 gcc_unreachable ();
570 }
571
572 return (GET_MODE (SET_SRC (set)) == set_mode);
573 }
574
575 /* Return true if every SET in INSN that sets the CC register
576 has source and destination with matching CC modes and that
577 CC mode is at least as constrained as REQ_MODE.
578 If REQ_MODE is VOIDmode, always return false. */
579
580 bool
581 s390_match_ccmode (rtx insn, enum machine_mode req_mode)
582 {
583 int i;
584
585 /* s390_tm_ccmode returns VOIDmode to indicate failure. */
586 if (req_mode == VOIDmode)
587 return false;
588
589 if (GET_CODE (PATTERN (insn)) == SET)
590 return s390_match_ccmode_set (PATTERN (insn), req_mode);
591
592 if (GET_CODE (PATTERN (insn)) == PARALLEL)
593 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
594 {
595 rtx set = XVECEXP (PATTERN (insn), 0, i);
596 if (GET_CODE (set) == SET)
597 if (!s390_match_ccmode_set (set, req_mode))
598 return false;
599 }
600
601 return true;
602 }
603
604 /* If a test-under-mask instruction can be used to implement
605 (compare (and ... OP1) OP2), return the CC mode required
606 to do that. Otherwise, return VOIDmode.
607 MIXED is true if the instruction can distinguish between
608 CC1 and CC2 for mixed selected bits (TMxx), it is false
609 if the instruction cannot (TM). */
610
611 enum machine_mode
612 s390_tm_ccmode (rtx op1, rtx op2, bool mixed)
613 {
614 int bit0, bit1;
615
616 /* ??? Fixme: should work on CONST_DOUBLE as well. */
617 if (GET_CODE (op1) != CONST_INT || GET_CODE (op2) != CONST_INT)
618 return VOIDmode;
619
620 /* Selected bits all zero: CC0.
621 e.g.: int a; if ((a & (16 + 128)) == 0) */
622 if (INTVAL (op2) == 0)
623 return CCTmode;
624
625 /* Selected bits all one: CC3.
626 e.g.: int a; if ((a & (16 + 128)) == 16 + 128) */
627 if (INTVAL (op2) == INTVAL (op1))
628 return CCT3mode;
629
630 /* Exactly two bits selected, mixed zeroes and ones: CC1 or CC2. e.g.:
631 int a;
632 if ((a & (16 + 128)) == 16) -> CCT1
633 if ((a & (16 + 128)) == 128) -> CCT2 */
634 if (mixed)
635 {
636 bit1 = exact_log2 (INTVAL (op2));
637 bit0 = exact_log2 (INTVAL (op1) ^ INTVAL (op2));
638 if (bit0 != -1 && bit1 != -1)
639 return bit0 > bit1 ? CCT1mode : CCT2mode;
640 }
641
642 return VOIDmode;
643 }
644
645 /* Given a comparison code OP (EQ, NE, etc.) and the operands
646 OP0 and OP1 of a COMPARE, return the mode to be used for the
647 comparison. */
648
649 enum machine_mode
650 s390_select_ccmode (enum rtx_code code, rtx op0, rtx op1)
651 {
652 switch (code)
653 {
654 case EQ:
655 case NE:
656 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
657 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
658 return CCAPmode;
659 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
660 && CONST_OK_FOR_K (INTVAL (XEXP (op0, 1))))
661 return CCAPmode;
662 if ((GET_CODE (op0) == PLUS || GET_CODE (op0) == MINUS
663 || GET_CODE (op1) == NEG)
664 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
665 return CCLmode;
666
667 if (GET_CODE (op0) == AND)
668 {
669 /* Check whether we can potentially do it via TM. */
670 enum machine_mode ccmode;
671 ccmode = s390_tm_ccmode (XEXP (op0, 1), op1, 1);
672 if (ccmode != VOIDmode)
673 {
674 /* Relax CCTmode to CCZmode to allow fall-back to AND
675 if that turns out to be beneficial. */
676 return ccmode == CCTmode ? CCZmode : ccmode;
677 }
678 }
679
680 if (register_operand (op0, HImode)
681 && GET_CODE (op1) == CONST_INT
682 && (INTVAL (op1) == -1 || INTVAL (op1) == 65535))
683 return CCT3mode;
684 if (register_operand (op0, QImode)
685 && GET_CODE (op1) == CONST_INT
686 && (INTVAL (op1) == -1 || INTVAL (op1) == 255))
687 return CCT3mode;
688
689 return CCZmode;
690
691 case LE:
692 case LT:
693 case GE:
694 case GT:
695 /* The only overflow condition of NEG and ABS happens when
696 -INT_MAX is used as parameter, which stays negative. So
697 we have an overflow from a positive value to a negative.
698 Using CCAP mode the resulting cc can be used for comparisons. */
699 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
700 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
701 return CCAPmode;
702
703 /* If constants are involved in an add instruction it is possible to use
704 the resulting cc for comparisons with zero. Knowing the sign of the
705 constant the overflow behavior gets predictable. e.g.:
706 int a, b; if ((b = a + c) > 0)
707 with c as a constant value: c < 0 -> CCAN and c >= 0 -> CCAP */
708 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
709 && (CONST_OK_FOR_K (INTVAL (XEXP (op0, 1)))
710 || (CONST_OK_FOR_CONSTRAINT_P (INTVAL (XEXP (op0, 1)), 'O', "Os")
711 /* Avoid INT32_MIN on 32 bit. */
712 && (!TARGET_ZARCH || INTVAL (XEXP (op0, 1)) != -0x7fffffff - 1))))
713 {
714 if (INTVAL (XEXP((op0), 1)) < 0)
715 return CCANmode;
716 else
717 return CCAPmode;
718 }
719 /* Fall through. */
720 case UNORDERED:
721 case ORDERED:
722 case UNEQ:
723 case UNLE:
724 case UNLT:
725 case UNGE:
726 case UNGT:
727 case LTGT:
728 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
729 && GET_CODE (op1) != CONST_INT)
730 return CCSRmode;
731 return CCSmode;
732
733 case LTU:
734 case GEU:
735 if (GET_CODE (op0) == PLUS
736 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
737 return CCL1mode;
738
739 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
740 && GET_CODE (op1) != CONST_INT)
741 return CCURmode;
742 return CCUmode;
743
744 case LEU:
745 case GTU:
746 if (GET_CODE (op0) == MINUS
747 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
748 return CCL2mode;
749
750 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
751 && GET_CODE (op1) != CONST_INT)
752 return CCURmode;
753 return CCUmode;
754
755 default:
756 gcc_unreachable ();
757 }
758 }
759
760 /* Replace the comparison OP0 CODE OP1 by a semantically equivalent one
761 that we can implement more efficiently. */
762
763 static void
764 s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
765 bool op0_preserve_value)
766 {
767 if (op0_preserve_value)
768 return;
769
770 /* Convert ZERO_EXTRACT back to AND to enable TM patterns. */
771 if ((*code == EQ || *code == NE)
772 && *op1 == const0_rtx
773 && GET_CODE (*op0) == ZERO_EXTRACT
774 && GET_CODE (XEXP (*op0, 1)) == CONST_INT
775 && GET_CODE (XEXP (*op0, 2)) == CONST_INT
776 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
777 {
778 rtx inner = XEXP (*op0, 0);
779 HOST_WIDE_INT modesize = GET_MODE_BITSIZE (GET_MODE (inner));
780 HOST_WIDE_INT len = INTVAL (XEXP (*op0, 1));
781 HOST_WIDE_INT pos = INTVAL (XEXP (*op0, 2));
782
783 if (len > 0 && len < modesize
784 && pos >= 0 && pos + len <= modesize
785 && modesize <= HOST_BITS_PER_WIDE_INT)
786 {
787 unsigned HOST_WIDE_INT block;
788 block = ((unsigned HOST_WIDE_INT) 1 << len) - 1;
789 block <<= modesize - pos - len;
790
791 *op0 = gen_rtx_AND (GET_MODE (inner), inner,
792 gen_int_mode (block, GET_MODE (inner)));
793 }
794 }
795
796 /* Narrow AND of memory against immediate to enable TM. */
797 if ((*code == EQ || *code == NE)
798 && *op1 == const0_rtx
799 && GET_CODE (*op0) == AND
800 && GET_CODE (XEXP (*op0, 1)) == CONST_INT
801 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
802 {
803 rtx inner = XEXP (*op0, 0);
804 rtx mask = XEXP (*op0, 1);
805
806 /* Ignore paradoxical SUBREGs if all extra bits are masked out. */
807 if (GET_CODE (inner) == SUBREG
808 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (inner)))
809 && (GET_MODE_SIZE (GET_MODE (inner))
810 >= GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
811 && ((INTVAL (mask)
812 & GET_MODE_MASK (GET_MODE (inner))
813 & ~GET_MODE_MASK (GET_MODE (SUBREG_REG (inner))))
814 == 0))
815 inner = SUBREG_REG (inner);
816
817 /* Do not change volatile MEMs. */
818 if (MEM_P (inner) && !MEM_VOLATILE_P (inner))
819 {
820 int part = s390_single_part (XEXP (*op0, 1),
821 GET_MODE (inner), QImode, 0);
822 if (part >= 0)
823 {
824 mask = gen_int_mode (s390_extract_part (mask, QImode, 0), QImode);
825 inner = adjust_address_nv (inner, QImode, part);
826 *op0 = gen_rtx_AND (QImode, inner, mask);
827 }
828 }
829 }
830
831 /* Narrow comparisons against 0xffff to HImode if possible. */
832 if ((*code == EQ || *code == NE)
833 && GET_CODE (*op1) == CONST_INT
834 && INTVAL (*op1) == 0xffff
835 && SCALAR_INT_MODE_P (GET_MODE (*op0))
836 && (nonzero_bits (*op0, GET_MODE (*op0))
837 & ~(unsigned HOST_WIDE_INT) 0xffff) == 0)
838 {
839 *op0 = gen_lowpart (HImode, *op0);
840 *op1 = constm1_rtx;
841 }
842
843 /* Remove redundant UNSPEC_STRCMPCC_TO_INT conversions if possible. */
844 if (GET_CODE (*op0) == UNSPEC
845 && XINT (*op0, 1) == UNSPEC_STRCMPCC_TO_INT
846 && XVECLEN (*op0, 0) == 1
847 && GET_MODE (XVECEXP (*op0, 0, 0)) == CCUmode
848 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
849 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
850 && *op1 == const0_rtx)
851 {
852 enum rtx_code new_code = UNKNOWN;
853 switch (*code)
854 {
855 case EQ: new_code = EQ; break;
856 case NE: new_code = NE; break;
857 case LT: new_code = GTU; break;
858 case GT: new_code = LTU; break;
859 case LE: new_code = GEU; break;
860 case GE: new_code = LEU; break;
861 default: break;
862 }
863
864 if (new_code != UNKNOWN)
865 {
866 *op0 = XVECEXP (*op0, 0, 0);
867 *code = new_code;
868 }
869 }
870
871 /* Remove redundant UNSPEC_CC_TO_INT conversions if possible. */
872 if (GET_CODE (*op0) == UNSPEC
873 && XINT (*op0, 1) == UNSPEC_CC_TO_INT
874 && XVECLEN (*op0, 0) == 1
875 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
876 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
877 && CONST_INT_P (*op1))
878 {
879 enum rtx_code new_code = UNKNOWN;
880 switch (GET_MODE (XVECEXP (*op0, 0, 0)))
881 {
882 case CCZmode:
883 case CCRAWmode:
884 switch (*code)
885 {
886 case EQ: new_code = EQ; break;
887 case NE: new_code = NE; break;
888 default: break;
889 }
890 break;
891 default: break;
892 }
893
894 if (new_code != UNKNOWN)
895 {
896 /* For CCRAWmode put the required cc mask into the second
897 operand. */
898 if (GET_MODE (XVECEXP (*op0, 0, 0)) == CCRAWmode)
899 *op1 = gen_rtx_CONST_INT (VOIDmode, 1 << (3 - INTVAL (*op1)));
900 *op0 = XVECEXP (*op0, 0, 0);
901 *code = new_code;
902 }
903 }
904
905 /* Simplify cascaded EQ, NE with const0_rtx. */
906 if ((*code == NE || *code == EQ)
907 && (GET_CODE (*op0) == EQ || GET_CODE (*op0) == NE)
908 && GET_MODE (*op0) == SImode
909 && GET_MODE (XEXP (*op0, 0)) == CCZ1mode
910 && REG_P (XEXP (*op0, 0))
911 && XEXP (*op0, 1) == const0_rtx
912 && *op1 == const0_rtx)
913 {
914 if ((*code == EQ && GET_CODE (*op0) == NE)
915 || (*code == NE && GET_CODE (*op0) == EQ))
916 *code = EQ;
917 else
918 *code = NE;
919 *op0 = XEXP (*op0, 0);
920 }
921
922 /* Prefer register over memory as first operand. */
923 if (MEM_P (*op0) && REG_P (*op1))
924 {
925 rtx tem = *op0; *op0 = *op1; *op1 = tem;
926 *code = (int)swap_condition ((enum rtx_code)*code);
927 }
928 }
929
930 /* Emit a compare instruction suitable to implement the comparison
931 OP0 CODE OP1. Return the correct condition RTL to be placed in
932 the IF_THEN_ELSE of the conditional branch testing the result. */
933
934 rtx
935 s390_emit_compare (enum rtx_code code, rtx op0, rtx op1)
936 {
937 enum machine_mode mode = s390_select_ccmode (code, op0, op1);
938 rtx cc;
939
940 /* Do not output a redundant compare instruction if a compare_and_swap
941 pattern already computed the result and the machine modes are compatible. */
942 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
943 {
944 gcc_assert (s390_cc_modes_compatible (GET_MODE (op0), mode)
945 == GET_MODE (op0));
946 cc = op0;
947 }
948 else
949 {
950 cc = gen_rtx_REG (mode, CC_REGNUM);
951 emit_insn (gen_rtx_SET (VOIDmode, cc, gen_rtx_COMPARE (mode, op0, op1)));
952 }
953
954 return gen_rtx_fmt_ee (code, VOIDmode, cc, const0_rtx);
955 }
956
957 /* Emit a SImode compare and swap instruction setting MEM to NEW_RTX if OLD
958 matches CMP.
959 Return the correct condition RTL to be placed in the IF_THEN_ELSE of the
960 conditional branch testing the result. */
961
962 static rtx
963 s390_emit_compare_and_swap (enum rtx_code code, rtx old, rtx mem,
964 rtx cmp, rtx new_rtx)
965 {
966 emit_insn (gen_atomic_compare_and_swapsi_internal (old, mem, cmp, new_rtx));
967 return s390_emit_compare (code, gen_rtx_REG (CCZ1mode, CC_REGNUM),
968 const0_rtx);
969 }
970
971 /* Emit a jump instruction to TARGET and return it. If COND is
972 NULL_RTX, emit an unconditional jump, else a conditional jump under
973 condition COND. */
974
975 rtx
976 s390_emit_jump (rtx target, rtx cond)
977 {
978 rtx insn;
979
980 target = gen_rtx_LABEL_REF (VOIDmode, target);
981 if (cond)
982 target = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, target, pc_rtx);
983
984 insn = gen_rtx_SET (VOIDmode, pc_rtx, target);
985 return emit_jump_insn (insn);
986 }
987
988 /* Return branch condition mask to implement a branch
989 specified by CODE. Return -1 for invalid comparisons. */
990
991 int
992 s390_branch_condition_mask (rtx code)
993 {
994 const int CC0 = 1 << 3;
995 const int CC1 = 1 << 2;
996 const int CC2 = 1 << 1;
997 const int CC3 = 1 << 0;
998
999 gcc_assert (GET_CODE (XEXP (code, 0)) == REG);
1000 gcc_assert (REGNO (XEXP (code, 0)) == CC_REGNUM);
1001 gcc_assert (XEXP (code, 1) == const0_rtx
1002 || (GET_MODE (XEXP (code, 0)) == CCRAWmode
1003 && CONST_INT_P (XEXP (code, 1))));
1004
1005
1006 switch (GET_MODE (XEXP (code, 0)))
1007 {
1008 case CCZmode:
1009 case CCZ1mode:
1010 switch (GET_CODE (code))
1011 {
1012 case EQ: return CC0;
1013 case NE: return CC1 | CC2 | CC3;
1014 default: return -1;
1015 }
1016 break;
1017
1018 case CCT1mode:
1019 switch (GET_CODE (code))
1020 {
1021 case EQ: return CC1;
1022 case NE: return CC0 | CC2 | CC3;
1023 default: return -1;
1024 }
1025 break;
1026
1027 case CCT2mode:
1028 switch (GET_CODE (code))
1029 {
1030 case EQ: return CC2;
1031 case NE: return CC0 | CC1 | CC3;
1032 default: return -1;
1033 }
1034 break;
1035
1036 case CCT3mode:
1037 switch (GET_CODE (code))
1038 {
1039 case EQ: return CC3;
1040 case NE: return CC0 | CC1 | CC2;
1041 default: return -1;
1042 }
1043 break;
1044
1045 case CCLmode:
1046 switch (GET_CODE (code))
1047 {
1048 case EQ: return CC0 | CC2;
1049 case NE: return CC1 | CC3;
1050 default: return -1;
1051 }
1052 break;
1053
1054 case CCL1mode:
1055 switch (GET_CODE (code))
1056 {
1057 case LTU: return CC2 | CC3; /* carry */
1058 case GEU: return CC0 | CC1; /* no carry */
1059 default: return -1;
1060 }
1061 break;
1062
1063 case CCL2mode:
1064 switch (GET_CODE (code))
1065 {
1066 case GTU: return CC0 | CC1; /* borrow */
1067 case LEU: return CC2 | CC3; /* no borrow */
1068 default: return -1;
1069 }
1070 break;
1071
1072 case CCL3mode:
1073 switch (GET_CODE (code))
1074 {
1075 case EQ: return CC0 | CC2;
1076 case NE: return CC1 | CC3;
1077 case LTU: return CC1;
1078 case GTU: return CC3;
1079 case LEU: return CC1 | CC2;
1080 case GEU: return CC2 | CC3;
1081 default: return -1;
1082 }
1083
1084 case CCUmode:
1085 switch (GET_CODE (code))
1086 {
1087 case EQ: return CC0;
1088 case NE: return CC1 | CC2 | CC3;
1089 case LTU: return CC1;
1090 case GTU: return CC2;
1091 case LEU: return CC0 | CC1;
1092 case GEU: return CC0 | CC2;
1093 default: return -1;
1094 }
1095 break;
1096
1097 case CCURmode:
1098 switch (GET_CODE (code))
1099 {
1100 case EQ: return CC0;
1101 case NE: return CC2 | CC1 | CC3;
1102 case LTU: return CC2;
1103 case GTU: return CC1;
1104 case LEU: return CC0 | CC2;
1105 case GEU: return CC0 | CC1;
1106 default: return -1;
1107 }
1108 break;
1109
1110 case CCAPmode:
1111 switch (GET_CODE (code))
1112 {
1113 case EQ: return CC0;
1114 case NE: return CC1 | CC2 | CC3;
1115 case LT: return CC1 | CC3;
1116 case GT: return CC2;
1117 case LE: return CC0 | CC1 | CC3;
1118 case GE: return CC0 | CC2;
1119 default: return -1;
1120 }
1121 break;
1122
1123 case CCANmode:
1124 switch (GET_CODE (code))
1125 {
1126 case EQ: return CC0;
1127 case NE: return CC1 | CC2 | CC3;
1128 case LT: return CC1;
1129 case GT: return CC2 | CC3;
1130 case LE: return CC0 | CC1;
1131 case GE: return CC0 | CC2 | CC3;
1132 default: return -1;
1133 }
1134 break;
1135
1136 case CCSmode:
1137 switch (GET_CODE (code))
1138 {
1139 case EQ: return CC0;
1140 case NE: return CC1 | CC2 | CC3;
1141 case LT: return CC1;
1142 case GT: return CC2;
1143 case LE: return CC0 | CC1;
1144 case GE: return CC0 | CC2;
1145 case UNORDERED: return CC3;
1146 case ORDERED: return CC0 | CC1 | CC2;
1147 case UNEQ: return CC0 | CC3;
1148 case UNLT: return CC1 | CC3;
1149 case UNGT: return CC2 | CC3;
1150 case UNLE: return CC0 | CC1 | CC3;
1151 case UNGE: return CC0 | CC2 | CC3;
1152 case LTGT: return CC1 | CC2;
1153 default: return -1;
1154 }
1155 break;
1156
1157 case CCSRmode:
1158 switch (GET_CODE (code))
1159 {
1160 case EQ: return CC0;
1161 case NE: return CC2 | CC1 | CC3;
1162 case LT: return CC2;
1163 case GT: return CC1;
1164 case LE: return CC0 | CC2;
1165 case GE: return CC0 | CC1;
1166 case UNORDERED: return CC3;
1167 case ORDERED: return CC0 | CC2 | CC1;
1168 case UNEQ: return CC0 | CC3;
1169 case UNLT: return CC2 | CC3;
1170 case UNGT: return CC1 | CC3;
1171 case UNLE: return CC0 | CC2 | CC3;
1172 case UNGE: return CC0 | CC1 | CC3;
1173 case LTGT: return CC2 | CC1;
1174 default: return -1;
1175 }
1176 break;
1177
1178 case CCRAWmode:
1179 switch (GET_CODE (code))
1180 {
1181 case EQ:
1182 return INTVAL (XEXP (code, 1));
1183 case NE:
1184 return (INTVAL (XEXP (code, 1))) ^ 0xf;
1185 default:
1186 gcc_unreachable ();
1187 }
1188
1189 default:
1190 return -1;
1191 }
1192 }
1193
1194
1195 /* Return branch condition mask to implement a compare and branch
1196 specified by CODE. Return -1 for invalid comparisons. */
1197
1198 int
1199 s390_compare_and_branch_condition_mask (rtx code)
1200 {
1201 const int CC0 = 1 << 3;
1202 const int CC1 = 1 << 2;
1203 const int CC2 = 1 << 1;
1204
1205 switch (GET_CODE (code))
1206 {
1207 case EQ:
1208 return CC0;
1209 case NE:
1210 return CC1 | CC2;
1211 case LT:
1212 case LTU:
1213 return CC1;
1214 case GT:
1215 case GTU:
1216 return CC2;
1217 case LE:
1218 case LEU:
1219 return CC0 | CC1;
1220 case GE:
1221 case GEU:
1222 return CC0 | CC2;
1223 default:
1224 gcc_unreachable ();
1225 }
1226 return -1;
1227 }
1228
1229 /* If INV is false, return assembler mnemonic string to implement
1230 a branch specified by CODE. If INV is true, return mnemonic
1231 for the corresponding inverted branch. */
1232
1233 static const char *
1234 s390_branch_condition_mnemonic (rtx code, int inv)
1235 {
1236 int mask;
1237
1238 static const char *const mnemonic[16] =
1239 {
1240 NULL, "o", "h", "nle",
1241 "l", "nhe", "lh", "ne",
1242 "e", "nlh", "he", "nl",
1243 "le", "nh", "no", NULL
1244 };
1245
1246 if (GET_CODE (XEXP (code, 0)) == REG
1247 && REGNO (XEXP (code, 0)) == CC_REGNUM
1248 && (XEXP (code, 1) == const0_rtx
1249 || (GET_MODE (XEXP (code, 0)) == CCRAWmode
1250 && CONST_INT_P (XEXP (code, 1)))))
1251 mask = s390_branch_condition_mask (code);
1252 else
1253 mask = s390_compare_and_branch_condition_mask (code);
1254
1255 gcc_assert (mask >= 0);
1256
1257 if (inv)
1258 mask ^= 15;
1259
1260 gcc_assert (mask >= 1 && mask <= 14);
1261
1262 return mnemonic[mask];
1263 }
1264
1265 /* Return the part of op which has a value different from def.
1266 The size of the part is determined by mode.
1267 Use this function only if you already know that op really
1268 contains such a part. */
1269
1270 unsigned HOST_WIDE_INT
1271 s390_extract_part (rtx op, enum machine_mode mode, int def)
1272 {
1273 unsigned HOST_WIDE_INT value = 0;
1274 int max_parts = HOST_BITS_PER_WIDE_INT / GET_MODE_BITSIZE (mode);
1275 int part_bits = GET_MODE_BITSIZE (mode);
1276 unsigned HOST_WIDE_INT part_mask
1277 = ((unsigned HOST_WIDE_INT)1 << part_bits) - 1;
1278 int i;
1279
1280 for (i = 0; i < max_parts; i++)
1281 {
1282 if (i == 0)
1283 value = (unsigned HOST_WIDE_INT) INTVAL (op);
1284 else
1285 value >>= part_bits;
1286
1287 if ((value & part_mask) != (def & part_mask))
1288 return value & part_mask;
1289 }
1290
1291 gcc_unreachable ();
1292 }
1293
1294 /* If OP is an integer constant of mode MODE with exactly one
1295 part of mode PART_MODE unequal to DEF, return the number of that
1296 part. Otherwise, return -1. */
1297
1298 int
1299 s390_single_part (rtx op,
1300 enum machine_mode mode,
1301 enum machine_mode part_mode,
1302 int def)
1303 {
1304 unsigned HOST_WIDE_INT value = 0;
1305 int n_parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (part_mode);
1306 unsigned HOST_WIDE_INT part_mask
1307 = ((unsigned HOST_WIDE_INT)1 << GET_MODE_BITSIZE (part_mode)) - 1;
1308 int i, part = -1;
1309
1310 if (GET_CODE (op) != CONST_INT)
1311 return -1;
1312
1313 for (i = 0; i < n_parts; i++)
1314 {
1315 if (i == 0)
1316 value = (unsigned HOST_WIDE_INT) INTVAL (op);
1317 else
1318 value >>= GET_MODE_BITSIZE (part_mode);
1319
1320 if ((value & part_mask) != (def & part_mask))
1321 {
1322 if (part != -1)
1323 return -1;
1324 else
1325 part = i;
1326 }
1327 }
1328 return part == -1 ? -1 : n_parts - 1 - part;
1329 }
1330
1331 /* Return true if IN contains a contiguous bitfield in the lower SIZE
1332 bits and no other bits are set in IN. POS and LENGTH can be used
1333 to obtain the start position and the length of the bitfield.
1334
1335 POS gives the position of the first bit of the bitfield counting
1336 from the lowest order bit starting with zero. In order to use this
1337 value for S/390 instructions this has to be converted to "bits big
1338 endian" style. */
1339
1340 bool
1341 s390_contiguous_bitmask_p (unsigned HOST_WIDE_INT in, int size,
1342 int *pos, int *length)
1343 {
1344 int tmp_pos = 0;
1345 int tmp_length = 0;
1346 int i;
1347 unsigned HOST_WIDE_INT mask = 1ULL;
1348 bool contiguous = false;
1349
1350 for (i = 0; i < size; mask <<= 1, i++)
1351 {
1352 if (contiguous)
1353 {
1354 if (mask & in)
1355 tmp_length++;
1356 else
1357 break;
1358 }
1359 else
1360 {
1361 if (mask & in)
1362 {
1363 contiguous = true;
1364 tmp_length++;
1365 }
1366 else
1367 tmp_pos++;
1368 }
1369 }
1370
1371 if (!tmp_length)
1372 return false;
1373
1374 /* Calculate a mask for all bits beyond the contiguous bits. */
1375 mask = (-1LL & ~(((1ULL << (tmp_length + tmp_pos - 1)) << 1) - 1));
1376
1377 if (mask & in)
1378 return false;
1379
1380 if (tmp_length + tmp_pos - 1 > size)
1381 return false;
1382
1383 if (length)
1384 *length = tmp_length;
1385
1386 if (pos)
1387 *pos = tmp_pos;
1388
1389 return true;
1390 }
1391
1392 /* Check whether a rotate of ROTL followed by an AND of CONTIG is
1393 equivalent to a shift followed by the AND. In particular, CONTIG
1394 should not overlap the (rotated) bit 0/bit 63 gap. Negative values
1395 for ROTL indicate a rotate to the right. */
1396
1397 bool
1398 s390_extzv_shift_ok (int bitsize, int rotl, unsigned HOST_WIDE_INT contig)
1399 {
1400 int pos, len;
1401 bool ok;
1402
1403 ok = s390_contiguous_bitmask_p (contig, bitsize, &pos, &len);
1404 gcc_assert (ok);
1405
1406 return ((rotl >= 0 && rotl <= pos)
1407 || (rotl < 0 && -rotl <= bitsize - len - pos));
1408 }
1409
1410 /* Check whether we can (and want to) split a double-word
1411 move in mode MODE from SRC to DST into two single-word
1412 moves, moving the subword FIRST_SUBWORD first. */
1413
1414 bool
1415 s390_split_ok_p (rtx dst, rtx src, enum machine_mode mode, int first_subword)
1416 {
1417 /* Floating point registers cannot be split. */
1418 if (FP_REG_P (src) || FP_REG_P (dst))
1419 return false;
1420
1421 /* We don't need to split if operands are directly accessible. */
1422 if (s_operand (src, mode) || s_operand (dst, mode))
1423 return false;
1424
1425 /* Non-offsettable memory references cannot be split. */
1426 if ((GET_CODE (src) == MEM && !offsettable_memref_p (src))
1427 || (GET_CODE (dst) == MEM && !offsettable_memref_p (dst)))
1428 return false;
1429
1430 /* Moving the first subword must not clobber a register
1431 needed to move the second subword. */
1432 if (register_operand (dst, mode))
1433 {
1434 rtx subreg = operand_subword (dst, first_subword, 0, mode);
1435 if (reg_overlap_mentioned_p (subreg, src))
1436 return false;
1437 }
1438
1439 return true;
1440 }
1441
1442 /* Return true if it can be proven that [MEM1, MEM1 + SIZE]
1443 and [MEM2, MEM2 + SIZE] do overlap and false
1444 otherwise. */
1445
1446 bool
1447 s390_overlap_p (rtx mem1, rtx mem2, HOST_WIDE_INT size)
1448 {
1449 rtx addr1, addr2, addr_delta;
1450 HOST_WIDE_INT delta;
1451
1452 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
1453 return true;
1454
1455 if (size == 0)
1456 return false;
1457
1458 addr1 = XEXP (mem1, 0);
1459 addr2 = XEXP (mem2, 0);
1460
1461 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
1462
1463 /* This overlapping check is used by peepholes merging memory block operations.
1464 Overlapping operations would otherwise be recognized by the S/390 hardware
1465 and would fall back to a slower implementation. Allowing overlapping
1466 operations would lead to slow code but not to wrong code. Therefore we are
1467 somewhat optimistic if we cannot prove that the memory blocks are
1468 overlapping.
1469 That's why we return false here although this may accept operations on
1470 overlapping memory areas. */
1471 if (!addr_delta || GET_CODE (addr_delta) != CONST_INT)
1472 return false;
1473
1474 delta = INTVAL (addr_delta);
1475
1476 if (delta == 0
1477 || (delta > 0 && delta < size)
1478 || (delta < 0 && -delta < size))
1479 return true;
1480
1481 return false;
1482 }
1483
1484 /* Check whether the address of memory reference MEM2 equals exactly
1485 the address of memory reference MEM1 plus DELTA. Return true if
1486 we can prove this to be the case, false otherwise. */
1487
1488 bool
1489 s390_offset_p (rtx mem1, rtx mem2, rtx delta)
1490 {
1491 rtx addr1, addr2, addr_delta;
1492
1493 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
1494 return false;
1495
1496 addr1 = XEXP (mem1, 0);
1497 addr2 = XEXP (mem2, 0);
1498
1499 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
1500 if (!addr_delta || !rtx_equal_p (addr_delta, delta))
1501 return false;
1502
1503 return true;
1504 }
1505
1506 /* Expand logical operator CODE in mode MODE with operands OPERANDS. */
1507
1508 void
1509 s390_expand_logical_operator (enum rtx_code code, enum machine_mode mode,
1510 rtx *operands)
1511 {
1512 enum machine_mode wmode = mode;
1513 rtx dst = operands[0];
1514 rtx src1 = operands[1];
1515 rtx src2 = operands[2];
1516 rtx op, clob, tem;
1517
1518 /* If we cannot handle the operation directly, use a temp register. */
1519 if (!s390_logical_operator_ok_p (operands))
1520 dst = gen_reg_rtx (mode);
1521
1522 /* QImode and HImode patterns make sense only if we have a destination
1523 in memory. Otherwise perform the operation in SImode. */
1524 if ((mode == QImode || mode == HImode) && GET_CODE (dst) != MEM)
1525 wmode = SImode;
1526
1527 /* Widen operands if required. */
1528 if (mode != wmode)
1529 {
1530 if (GET_CODE (dst) == SUBREG
1531 && (tem = simplify_subreg (wmode, dst, mode, 0)) != 0)
1532 dst = tem;
1533 else if (REG_P (dst))
1534 dst = gen_rtx_SUBREG (wmode, dst, 0);
1535 else
1536 dst = gen_reg_rtx (wmode);
1537
1538 if (GET_CODE (src1) == SUBREG
1539 && (tem = simplify_subreg (wmode, src1, mode, 0)) != 0)
1540 src1 = tem;
1541 else if (GET_MODE (src1) != VOIDmode)
1542 src1 = gen_rtx_SUBREG (wmode, force_reg (mode, src1), 0);
1543
1544 if (GET_CODE (src2) == SUBREG
1545 && (tem = simplify_subreg (wmode, src2, mode, 0)) != 0)
1546 src2 = tem;
1547 else if (GET_MODE (src2) != VOIDmode)
1548 src2 = gen_rtx_SUBREG (wmode, force_reg (mode, src2), 0);
1549 }
1550
1551 /* Emit the instruction. */
1552 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, wmode, src1, src2));
1553 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
1554 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
1555
1556 /* Fix up the destination if needed. */
1557 if (dst != operands[0])
1558 emit_move_insn (operands[0], gen_lowpart (mode, dst));
1559 }
1560
1561 /* Check whether OPERANDS are OK for a logical operation (AND, IOR, XOR). */
1562
1563 bool
1564 s390_logical_operator_ok_p (rtx *operands)
1565 {
1566 /* If the destination operand is in memory, it needs to coincide
1567 with one of the source operands. After reload, it has to be
1568 the first source operand. */
1569 if (GET_CODE (operands[0]) == MEM)
1570 return rtx_equal_p (operands[0], operands[1])
1571 || (!reload_completed && rtx_equal_p (operands[0], operands[2]));
1572
1573 return true;
1574 }
1575
1576 /* Narrow logical operation CODE of memory operand MEMOP with immediate
1577 operand IMMOP to switch from SS to SI type instructions. */
1578
1579 void
1580 s390_narrow_logical_operator (enum rtx_code code, rtx *memop, rtx *immop)
1581 {
1582 int def = code == AND ? -1 : 0;
1583 HOST_WIDE_INT mask;
1584 int part;
1585
1586 gcc_assert (GET_CODE (*memop) == MEM);
1587 gcc_assert (!MEM_VOLATILE_P (*memop));
1588
1589 mask = s390_extract_part (*immop, QImode, def);
1590 part = s390_single_part (*immop, GET_MODE (*memop), QImode, def);
1591 gcc_assert (part >= 0);
1592
1593 *memop = adjust_address (*memop, QImode, part);
1594 *immop = gen_int_mode (mask, QImode);
1595 }
1596
1597
1598 /* How to allocate a 'struct machine_function'. */
1599
1600 static struct machine_function *
1601 s390_init_machine_status (void)
1602 {
1603 return ggc_alloc_cleared_machine_function ();
1604 }
1605
1606 static void
1607 s390_option_override (void)
1608 {
1609 /* Set up function hooks. */
1610 init_machine_status = s390_init_machine_status;
1611
1612 /* Architecture mode defaults according to ABI. */
1613 if (!(target_flags_explicit & MASK_ZARCH))
1614 {
1615 if (TARGET_64BIT)
1616 target_flags |= MASK_ZARCH;
1617 else
1618 target_flags &= ~MASK_ZARCH;
1619 }
1620
1621 /* Set the march default in case it hasn't been specified on
1622 cmdline. */
1623 if (s390_arch == PROCESSOR_max)
1624 {
1625 s390_arch_string = TARGET_ZARCH? "z900" : "g5";
1626 s390_arch = TARGET_ZARCH ? PROCESSOR_2064_Z900 : PROCESSOR_9672_G5;
1627 s390_arch_flags = processor_flags_table[(int)s390_arch];
1628 }
1629
1630 /* Determine processor to tune for. */
1631 if (s390_tune == PROCESSOR_max)
1632 {
1633 s390_tune = s390_arch;
1634 s390_tune_flags = s390_arch_flags;
1635 }
1636
1637 /* Sanity checks. */
1638 if (TARGET_ZARCH && !TARGET_CPU_ZARCH)
1639 error ("z/Architecture mode not supported on %s", s390_arch_string);
1640 if (TARGET_64BIT && !TARGET_ZARCH)
1641 error ("64-bit ABI not supported in ESA/390 mode");
1642
1643 /* Use hardware DFP if available and not explicitly disabled by
1644 user. E.g. with -m31 -march=z10 -mzarch */
1645 if (!(target_flags_explicit & MASK_HARD_DFP) && TARGET_DFP)
1646 target_flags |= MASK_HARD_DFP;
1647
1648 /* Enable hardware transactions if available and not explicitly
1649 disabled by user. E.g. with -m31 -march=zEC12 -mzarch */
1650 if (!(target_flags_explicit & MASK_OPT_HTM) && TARGET_CPU_HTM && TARGET_ZARCH)
1651 target_flags |= MASK_OPT_HTM;
1652
1653 if (TARGET_HARD_DFP && !TARGET_DFP)
1654 {
1655 if (target_flags_explicit & MASK_HARD_DFP)
1656 {
1657 if (!TARGET_CPU_DFP)
1658 error ("hardware decimal floating point instructions"
1659 " not available on %s", s390_arch_string);
1660 if (!TARGET_ZARCH)
1661 error ("hardware decimal floating point instructions"
1662 " not available in ESA/390 mode");
1663 }
1664 else
1665 target_flags &= ~MASK_HARD_DFP;
1666 }
1667
1668 if ((target_flags_explicit & MASK_SOFT_FLOAT) && TARGET_SOFT_FLOAT)
1669 {
1670 if ((target_flags_explicit & MASK_HARD_DFP) && TARGET_HARD_DFP)
1671 error ("-mhard-dfp can%'t be used in conjunction with -msoft-float");
1672
1673 target_flags &= ~MASK_HARD_DFP;
1674 }
1675
1676 /* Set processor cost function. */
1677 switch (s390_tune)
1678 {
1679 case PROCESSOR_2084_Z990:
1680 s390_cost = &z990_cost;
1681 break;
1682 case PROCESSOR_2094_Z9_109:
1683 s390_cost = &z9_109_cost;
1684 break;
1685 case PROCESSOR_2097_Z10:
1686 s390_cost = &z10_cost;
1687 break;
1688 case PROCESSOR_2817_Z196:
1689 s390_cost = &z196_cost;
1690 break;
1691 case PROCESSOR_2827_ZEC12:
1692 s390_cost = &zEC12_cost;
1693 break;
1694 default:
1695 s390_cost = &z900_cost;
1696 }
1697
1698 if (TARGET_BACKCHAIN && TARGET_PACKED_STACK && TARGET_HARD_FLOAT)
1699 error ("-mbackchain -mpacked-stack -mhard-float are not supported "
1700 "in combination");
1701
1702 if (s390_stack_size)
1703 {
1704 if (s390_stack_guard >= s390_stack_size)
1705 error ("stack size must be greater than the stack guard value");
1706 else if (s390_stack_size > 1 << 16)
1707 error ("stack size must not be greater than 64k");
1708 }
1709 else if (s390_stack_guard)
1710 error ("-mstack-guard implies use of -mstack-size");
1711
1712 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
1713 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
1714 target_flags |= MASK_LONG_DOUBLE_128;
1715 #endif
1716
1717 if (s390_tune == PROCESSOR_2097_Z10
1718 || s390_tune == PROCESSOR_2817_Z196
1719 || s390_tune == PROCESSOR_2827_ZEC12)
1720 {
1721 maybe_set_param_value (PARAM_MAX_UNROLLED_INSNS, 100,
1722 global_options.x_param_values,
1723 global_options_set.x_param_values);
1724 maybe_set_param_value (PARAM_MAX_UNROLL_TIMES, 32,
1725 global_options.x_param_values,
1726 global_options_set.x_param_values);
1727 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 2000,
1728 global_options.x_param_values,
1729 global_options_set.x_param_values);
1730 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 64,
1731 global_options.x_param_values,
1732 global_options_set.x_param_values);
1733 }
1734
1735 maybe_set_param_value (PARAM_MAX_PENDING_LIST_LENGTH, 256,
1736 global_options.x_param_values,
1737 global_options_set.x_param_values);
1738 /* values for loop prefetching */
1739 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, 256,
1740 global_options.x_param_values,
1741 global_options_set.x_param_values);
1742 maybe_set_param_value (PARAM_L1_CACHE_SIZE, 128,
1743 global_options.x_param_values,
1744 global_options_set.x_param_values);
1745 /* s390 has more than 2 levels and the size is much larger. Since
1746 we are always running virtualized assume that we only get a small
1747 part of the caches above l1. */
1748 maybe_set_param_value (PARAM_L2_CACHE_SIZE, 1500,
1749 global_options.x_param_values,
1750 global_options_set.x_param_values);
1751 maybe_set_param_value (PARAM_PREFETCH_MIN_INSN_TO_MEM_RATIO, 2,
1752 global_options.x_param_values,
1753 global_options_set.x_param_values);
1754 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES, 6,
1755 global_options.x_param_values,
1756 global_options_set.x_param_values);
1757
1758 /* This cannot reside in s390_option_optimization_table since HAVE_prefetch
1759 requires the arch flags to be evaluated already. Since prefetching
1760 is beneficial on s390, we enable it if available. */
1761 if (flag_prefetch_loop_arrays < 0 && HAVE_prefetch && optimize >= 3)
1762 flag_prefetch_loop_arrays = 1;
1763
1764 /* Use the alternative scheduling-pressure algorithm by default. */
1765 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
1766 global_options.x_param_values,
1767 global_options_set.x_param_values);
1768
1769 if (TARGET_TPF)
1770 {
1771 /* Don't emit DWARF3/4 unless specifically selected. The TPF
1772 debuggers do not yet support DWARF 3/4. */
1773 if (!global_options_set.x_dwarf_strict)
1774 dwarf_strict = 1;
1775 if (!global_options_set.x_dwarf_version)
1776 dwarf_version = 2;
1777 }
1778 }
1779
1780 /* Map for smallest class containing reg regno. */
1781
1782 const enum reg_class regclass_map[FIRST_PSEUDO_REGISTER] =
1783 { GENERAL_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS,
1784 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS,
1785 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS,
1786 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS,
1787 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
1788 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
1789 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
1790 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
1791 ADDR_REGS, CC_REGS, ADDR_REGS, ADDR_REGS,
1792 ACCESS_REGS, ACCESS_REGS
1793 };
1794
1795 /* Return attribute type of insn. */
1796
1797 static enum attr_type
1798 s390_safe_attr_type (rtx insn)
1799 {
1800 if (recog_memoized (insn) >= 0)
1801 return get_attr_type (insn);
1802 else
1803 return TYPE_NONE;
1804 }
1805
1806 /* Return true if DISP is a valid short displacement. */
1807
1808 static bool
1809 s390_short_displacement (rtx disp)
1810 {
1811 /* No displacement is OK. */
1812 if (!disp)
1813 return true;
1814
1815 /* Without the long displacement facility we don't need to
1816 distingiush between long and short displacement. */
1817 if (!TARGET_LONG_DISPLACEMENT)
1818 return true;
1819
1820 /* Integer displacement in range. */
1821 if (GET_CODE (disp) == CONST_INT)
1822 return INTVAL (disp) >= 0 && INTVAL (disp) < 4096;
1823
1824 /* GOT offset is not OK, the GOT can be large. */
1825 if (GET_CODE (disp) == CONST
1826 && GET_CODE (XEXP (disp, 0)) == UNSPEC
1827 && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOT
1828 || XINT (XEXP (disp, 0), 1) == UNSPEC_GOTNTPOFF))
1829 return false;
1830
1831 /* All other symbolic constants are literal pool references,
1832 which are OK as the literal pool must be small. */
1833 if (GET_CODE (disp) == CONST)
1834 return true;
1835
1836 return false;
1837 }
1838
1839 /* Decompose a RTL expression ADDR for a memory address into
1840 its components, returned in OUT.
1841
1842 Returns false if ADDR is not a valid memory address, true
1843 otherwise. If OUT is NULL, don't return the components,
1844 but check for validity only.
1845
1846 Note: Only addresses in canonical form are recognized.
1847 LEGITIMIZE_ADDRESS should convert non-canonical forms to the
1848 canonical form so that they will be recognized. */
1849
1850 static int
1851 s390_decompose_address (rtx addr, struct s390_address *out)
1852 {
1853 HOST_WIDE_INT offset = 0;
1854 rtx base = NULL_RTX;
1855 rtx indx = NULL_RTX;
1856 rtx disp = NULL_RTX;
1857 rtx orig_disp;
1858 bool pointer = false;
1859 bool base_ptr = false;
1860 bool indx_ptr = false;
1861 bool literal_pool = false;
1862
1863 /* We may need to substitute the literal pool base register into the address
1864 below. However, at this point we do not know which register is going to
1865 be used as base, so we substitute the arg pointer register. This is going
1866 to be treated as holding a pointer below -- it shouldn't be used for any
1867 other purpose. */
1868 rtx fake_pool_base = gen_rtx_REG (Pmode, ARG_POINTER_REGNUM);
1869
1870 /* Decompose address into base + index + displacement. */
1871
1872 if (GET_CODE (addr) == REG || GET_CODE (addr) == UNSPEC)
1873 base = addr;
1874
1875 else if (GET_CODE (addr) == PLUS)
1876 {
1877 rtx op0 = XEXP (addr, 0);
1878 rtx op1 = XEXP (addr, 1);
1879 enum rtx_code code0 = GET_CODE (op0);
1880 enum rtx_code code1 = GET_CODE (op1);
1881
1882 if (code0 == REG || code0 == UNSPEC)
1883 {
1884 if (code1 == REG || code1 == UNSPEC)
1885 {
1886 indx = op0; /* index + base */
1887 base = op1;
1888 }
1889
1890 else
1891 {
1892 base = op0; /* base + displacement */
1893 disp = op1;
1894 }
1895 }
1896
1897 else if (code0 == PLUS)
1898 {
1899 indx = XEXP (op0, 0); /* index + base + disp */
1900 base = XEXP (op0, 1);
1901 disp = op1;
1902 }
1903
1904 else
1905 {
1906 return false;
1907 }
1908 }
1909
1910 else
1911 disp = addr; /* displacement */
1912
1913 /* Extract integer part of displacement. */
1914 orig_disp = disp;
1915 if (disp)
1916 {
1917 if (GET_CODE (disp) == CONST_INT)
1918 {
1919 offset = INTVAL (disp);
1920 disp = NULL_RTX;
1921 }
1922 else if (GET_CODE (disp) == CONST
1923 && GET_CODE (XEXP (disp, 0)) == PLUS
1924 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
1925 {
1926 offset = INTVAL (XEXP (XEXP (disp, 0), 1));
1927 disp = XEXP (XEXP (disp, 0), 0);
1928 }
1929 }
1930
1931 /* Strip off CONST here to avoid special case tests later. */
1932 if (disp && GET_CODE (disp) == CONST)
1933 disp = XEXP (disp, 0);
1934
1935 /* We can convert literal pool addresses to
1936 displacements by basing them off the base register. */
1937 if (disp && GET_CODE (disp) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (disp))
1938 {
1939 /* Either base or index must be free to hold the base register. */
1940 if (!base)
1941 base = fake_pool_base, literal_pool = true;
1942 else if (!indx)
1943 indx = fake_pool_base, literal_pool = true;
1944 else
1945 return false;
1946
1947 /* Mark up the displacement. */
1948 disp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, disp),
1949 UNSPEC_LTREL_OFFSET);
1950 }
1951
1952 /* Validate base register. */
1953 if (base)
1954 {
1955 if (GET_CODE (base) == UNSPEC)
1956 switch (XINT (base, 1))
1957 {
1958 case UNSPEC_LTREF:
1959 if (!disp)
1960 disp = gen_rtx_UNSPEC (Pmode,
1961 gen_rtvec (1, XVECEXP (base, 0, 0)),
1962 UNSPEC_LTREL_OFFSET);
1963 else
1964 return false;
1965
1966 base = XVECEXP (base, 0, 1);
1967 break;
1968
1969 case UNSPEC_LTREL_BASE:
1970 if (XVECLEN (base, 0) == 1)
1971 base = fake_pool_base, literal_pool = true;
1972 else
1973 base = XVECEXP (base, 0, 1);
1974 break;
1975
1976 default:
1977 return false;
1978 }
1979
1980 if (!REG_P (base)
1981 || (GET_MODE (base) != SImode
1982 && GET_MODE (base) != Pmode))
1983 return false;
1984
1985 if (REGNO (base) == STACK_POINTER_REGNUM
1986 || REGNO (base) == FRAME_POINTER_REGNUM
1987 || ((reload_completed || reload_in_progress)
1988 && frame_pointer_needed
1989 && REGNO (base) == HARD_FRAME_POINTER_REGNUM)
1990 || REGNO (base) == ARG_POINTER_REGNUM
1991 || (flag_pic
1992 && REGNO (base) == PIC_OFFSET_TABLE_REGNUM))
1993 pointer = base_ptr = true;
1994
1995 if ((reload_completed || reload_in_progress)
1996 && base == cfun->machine->base_reg)
1997 pointer = base_ptr = literal_pool = true;
1998 }
1999
2000 /* Validate index register. */
2001 if (indx)
2002 {
2003 if (GET_CODE (indx) == UNSPEC)
2004 switch (XINT (indx, 1))
2005 {
2006 case UNSPEC_LTREF:
2007 if (!disp)
2008 disp = gen_rtx_UNSPEC (Pmode,
2009 gen_rtvec (1, XVECEXP (indx, 0, 0)),
2010 UNSPEC_LTREL_OFFSET);
2011 else
2012 return false;
2013
2014 indx = XVECEXP (indx, 0, 1);
2015 break;
2016
2017 case UNSPEC_LTREL_BASE:
2018 if (XVECLEN (indx, 0) == 1)
2019 indx = fake_pool_base, literal_pool = true;
2020 else
2021 indx = XVECEXP (indx, 0, 1);
2022 break;
2023
2024 default:
2025 return false;
2026 }
2027
2028 if (!REG_P (indx)
2029 || (GET_MODE (indx) != SImode
2030 && GET_MODE (indx) != Pmode))
2031 return false;
2032
2033 if (REGNO (indx) == STACK_POINTER_REGNUM
2034 || REGNO (indx) == FRAME_POINTER_REGNUM
2035 || ((reload_completed || reload_in_progress)
2036 && frame_pointer_needed
2037 && REGNO (indx) == HARD_FRAME_POINTER_REGNUM)
2038 || REGNO (indx) == ARG_POINTER_REGNUM
2039 || (flag_pic
2040 && REGNO (indx) == PIC_OFFSET_TABLE_REGNUM))
2041 pointer = indx_ptr = true;
2042
2043 if ((reload_completed || reload_in_progress)
2044 && indx == cfun->machine->base_reg)
2045 pointer = indx_ptr = literal_pool = true;
2046 }
2047
2048 /* Prefer to use pointer as base, not index. */
2049 if (base && indx && !base_ptr
2050 && (indx_ptr || (!REG_POINTER (base) && REG_POINTER (indx))))
2051 {
2052 rtx tmp = base;
2053 base = indx;
2054 indx = tmp;
2055 }
2056
2057 /* Validate displacement. */
2058 if (!disp)
2059 {
2060 /* If virtual registers are involved, the displacement will change later
2061 anyway as the virtual registers get eliminated. This could make a
2062 valid displacement invalid, but it is more likely to make an invalid
2063 displacement valid, because we sometimes access the register save area
2064 via negative offsets to one of those registers.
2065 Thus we don't check the displacement for validity here. If after
2066 elimination the displacement turns out to be invalid after all,
2067 this is fixed up by reload in any case. */
2068 /* LRA maintains always displacements up to date and we need to
2069 know the displacement is right during all LRA not only at the
2070 final elimination. */
2071 if (lra_in_progress
2072 || (base != arg_pointer_rtx
2073 && indx != arg_pointer_rtx
2074 && base != return_address_pointer_rtx
2075 && indx != return_address_pointer_rtx
2076 && base != frame_pointer_rtx
2077 && indx != frame_pointer_rtx
2078 && base != virtual_stack_vars_rtx
2079 && indx != virtual_stack_vars_rtx))
2080 if (!DISP_IN_RANGE (offset))
2081 return false;
2082 }
2083 else
2084 {
2085 /* All the special cases are pointers. */
2086 pointer = true;
2087
2088 /* In the small-PIC case, the linker converts @GOT
2089 and @GOTNTPOFF offsets to possible displacements. */
2090 if (GET_CODE (disp) == UNSPEC
2091 && (XINT (disp, 1) == UNSPEC_GOT
2092 || XINT (disp, 1) == UNSPEC_GOTNTPOFF)
2093 && flag_pic == 1)
2094 {
2095 ;
2096 }
2097
2098 /* Accept pool label offsets. */
2099 else if (GET_CODE (disp) == UNSPEC
2100 && XINT (disp, 1) == UNSPEC_POOL_OFFSET)
2101 ;
2102
2103 /* Accept literal pool references. */
2104 else if (GET_CODE (disp) == UNSPEC
2105 && XINT (disp, 1) == UNSPEC_LTREL_OFFSET)
2106 {
2107 /* In case CSE pulled a non literal pool reference out of
2108 the pool we have to reject the address. This is
2109 especially important when loading the GOT pointer on non
2110 zarch CPUs. In this case the literal pool contains an lt
2111 relative offset to the _GLOBAL_OFFSET_TABLE_ label which
2112 will most likely exceed the displacement. */
2113 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
2114 || !CONSTANT_POOL_ADDRESS_P (XVECEXP (disp, 0, 0)))
2115 return false;
2116
2117 orig_disp = gen_rtx_CONST (Pmode, disp);
2118 if (offset)
2119 {
2120 /* If we have an offset, make sure it does not
2121 exceed the size of the constant pool entry. */
2122 rtx sym = XVECEXP (disp, 0, 0);
2123 if (offset >= GET_MODE_SIZE (get_pool_mode (sym)))
2124 return false;
2125
2126 orig_disp = plus_constant (Pmode, orig_disp, offset);
2127 }
2128 }
2129
2130 else
2131 return false;
2132 }
2133
2134 if (!base && !indx)
2135 pointer = true;
2136
2137 if (out)
2138 {
2139 out->base = base;
2140 out->indx = indx;
2141 out->disp = orig_disp;
2142 out->pointer = pointer;
2143 out->literal_pool = literal_pool;
2144 }
2145
2146 return true;
2147 }
2148
2149 /* Decompose a RTL expression OP for a shift count into its components,
2150 and return the base register in BASE and the offset in OFFSET.
2151
2152 Return true if OP is a valid shift count, false if not. */
2153
2154 bool
2155 s390_decompose_shift_count (rtx op, rtx *base, HOST_WIDE_INT *offset)
2156 {
2157 HOST_WIDE_INT off = 0;
2158
2159 /* We can have an integer constant, an address register,
2160 or a sum of the two. */
2161 if (GET_CODE (op) == CONST_INT)
2162 {
2163 off = INTVAL (op);
2164 op = NULL_RTX;
2165 }
2166 if (op && GET_CODE (op) == PLUS && GET_CODE (XEXP (op, 1)) == CONST_INT)
2167 {
2168 off = INTVAL (XEXP (op, 1));
2169 op = XEXP (op, 0);
2170 }
2171 while (op && GET_CODE (op) == SUBREG)
2172 op = SUBREG_REG (op);
2173
2174 if (op && GET_CODE (op) != REG)
2175 return false;
2176
2177 if (offset)
2178 *offset = off;
2179 if (base)
2180 *base = op;
2181
2182 return true;
2183 }
2184
2185
2186 /* Return true if CODE is a valid address without index. */
2187
2188 bool
2189 s390_legitimate_address_without_index_p (rtx op)
2190 {
2191 struct s390_address addr;
2192
2193 if (!s390_decompose_address (XEXP (op, 0), &addr))
2194 return false;
2195 if (addr.indx)
2196 return false;
2197
2198 return true;
2199 }
2200
2201
2202 /* Return TRUE if ADDR is an operand valid for a load/store relative
2203 instruction. Be aware that the alignment of the operand needs to
2204 be checked separately.
2205 Valid addresses are single references or a sum of a reference and a
2206 constant integer. Return these parts in SYMREF and ADDEND. You can
2207 pass NULL in REF and/or ADDEND if you are not interested in these
2208 values. Literal pool references are *not* considered symbol
2209 references. */
2210
2211 static bool
2212 s390_loadrelative_operand_p (rtx addr, rtx *symref, HOST_WIDE_INT *addend)
2213 {
2214 HOST_WIDE_INT tmpaddend = 0;
2215
2216 if (GET_CODE (addr) == CONST)
2217 addr = XEXP (addr, 0);
2218
2219 if (GET_CODE (addr) == PLUS)
2220 {
2221 if (!CONST_INT_P (XEXP (addr, 1)))
2222 return false;
2223
2224 tmpaddend = INTVAL (XEXP (addr, 1));
2225 addr = XEXP (addr, 0);
2226 }
2227
2228 if ((GET_CODE (addr) == SYMBOL_REF && !CONSTANT_POOL_ADDRESS_P (addr))
2229 || (GET_CODE (addr) == UNSPEC
2230 && (XINT (addr, 1) == UNSPEC_GOTENT
2231 || (TARGET_CPU_ZARCH && XINT (addr, 1) == UNSPEC_PLT))))
2232 {
2233 if (symref)
2234 *symref = addr;
2235 if (addend)
2236 *addend = tmpaddend;
2237
2238 return true;
2239 }
2240 return false;
2241 }
2242
2243 /* Return true if the address in OP is valid for constraint letter C
2244 if wrapped in a MEM rtx. Set LIT_POOL_OK to true if it literal
2245 pool MEMs should be accepted. Only the Q, R, S, T constraint
2246 letters are allowed for C. */
2247
2248 static int
2249 s390_check_qrst_address (char c, rtx op, bool lit_pool_ok)
2250 {
2251 struct s390_address addr;
2252 bool decomposed = false;
2253
2254 /* This check makes sure that no symbolic address (except literal
2255 pool references) are accepted by the R or T constraints. */
2256 if (s390_loadrelative_operand_p (op, NULL, NULL))
2257 return 0;
2258
2259 /* Ensure literal pool references are only accepted if LIT_POOL_OK. */
2260 if (!lit_pool_ok)
2261 {
2262 if (!s390_decompose_address (op, &addr))
2263 return 0;
2264 if (addr.literal_pool)
2265 return 0;
2266 decomposed = true;
2267 }
2268
2269 switch (c)
2270 {
2271 case 'Q': /* no index short displacement */
2272 if (!decomposed && !s390_decompose_address (op, &addr))
2273 return 0;
2274 if (addr.indx)
2275 return 0;
2276 if (!s390_short_displacement (addr.disp))
2277 return 0;
2278 break;
2279
2280 case 'R': /* with index short displacement */
2281 if (TARGET_LONG_DISPLACEMENT)
2282 {
2283 if (!decomposed && !s390_decompose_address (op, &addr))
2284 return 0;
2285 if (!s390_short_displacement (addr.disp))
2286 return 0;
2287 }
2288 /* Any invalid address here will be fixed up by reload,
2289 so accept it for the most generic constraint. */
2290 break;
2291
2292 case 'S': /* no index long displacement */
2293 if (!TARGET_LONG_DISPLACEMENT)
2294 return 0;
2295 if (!decomposed && !s390_decompose_address (op, &addr))
2296 return 0;
2297 if (addr.indx)
2298 return 0;
2299 if (s390_short_displacement (addr.disp))
2300 return 0;
2301 break;
2302
2303 case 'T': /* with index long displacement */
2304 if (!TARGET_LONG_DISPLACEMENT)
2305 return 0;
2306 /* Any invalid address here will be fixed up by reload,
2307 so accept it for the most generic constraint. */
2308 if ((decomposed || s390_decompose_address (op, &addr))
2309 && s390_short_displacement (addr.disp))
2310 return 0;
2311 break;
2312 default:
2313 return 0;
2314 }
2315 return 1;
2316 }
2317
2318
2319 /* Evaluates constraint strings described by the regular expression
2320 ([A|B|Z](Q|R|S|T))|U|W|Y and returns 1 if OP is a valid operand for
2321 the constraint given in STR, or 0 else. */
2322
2323 int
2324 s390_mem_constraint (const char *str, rtx op)
2325 {
2326 char c = str[0];
2327
2328 switch (c)
2329 {
2330 case 'A':
2331 /* Check for offsettable variants of memory constraints. */
2332 if (!MEM_P (op) || MEM_VOLATILE_P (op))
2333 return 0;
2334 if ((reload_completed || reload_in_progress)
2335 ? !offsettable_memref_p (op) : !offsettable_nonstrict_memref_p (op))
2336 return 0;
2337 return s390_check_qrst_address (str[1], XEXP (op, 0), true);
2338 case 'B':
2339 /* Check for non-literal-pool variants of memory constraints. */
2340 if (!MEM_P (op))
2341 return 0;
2342 return s390_check_qrst_address (str[1], XEXP (op, 0), false);
2343 case 'Q':
2344 case 'R':
2345 case 'S':
2346 case 'T':
2347 if (GET_CODE (op) != MEM)
2348 return 0;
2349 return s390_check_qrst_address (c, XEXP (op, 0), true);
2350 case 'U':
2351 return (s390_check_qrst_address ('Q', op, true)
2352 || s390_check_qrst_address ('R', op, true));
2353 case 'W':
2354 return (s390_check_qrst_address ('S', op, true)
2355 || s390_check_qrst_address ('T', op, true));
2356 case 'Y':
2357 /* Simply check for the basic form of a shift count. Reload will
2358 take care of making sure we have a proper base register. */
2359 if (!s390_decompose_shift_count (op, NULL, NULL))
2360 return 0;
2361 break;
2362 case 'Z':
2363 return s390_check_qrst_address (str[1], op, true);
2364 default:
2365 return 0;
2366 }
2367 return 1;
2368 }
2369
2370
2371 /* Evaluates constraint strings starting with letter O. Input
2372 parameter C is the second letter following the "O" in the constraint
2373 string. Returns 1 if VALUE meets the respective constraint and 0
2374 otherwise. */
2375
2376 int
2377 s390_O_constraint_str (const char c, HOST_WIDE_INT value)
2378 {
2379 if (!TARGET_EXTIMM)
2380 return 0;
2381
2382 switch (c)
2383 {
2384 case 's':
2385 return trunc_int_for_mode (value, SImode) == value;
2386
2387 case 'p':
2388 return value == 0
2389 || s390_single_part (GEN_INT (value), DImode, SImode, 0) == 1;
2390
2391 case 'n':
2392 return s390_single_part (GEN_INT (value - 1), DImode, SImode, -1) == 1;
2393
2394 default:
2395 gcc_unreachable ();
2396 }
2397 }
2398
2399
2400 /* Evaluates constraint strings starting with letter N. Parameter STR
2401 contains the letters following letter "N" in the constraint string.
2402 Returns true if VALUE matches the constraint. */
2403
2404 int
2405 s390_N_constraint_str (const char *str, HOST_WIDE_INT value)
2406 {
2407 enum machine_mode mode, part_mode;
2408 int def;
2409 int part, part_goal;
2410
2411
2412 if (str[0] == 'x')
2413 part_goal = -1;
2414 else
2415 part_goal = str[0] - '0';
2416
2417 switch (str[1])
2418 {
2419 case 'Q':
2420 part_mode = QImode;
2421 break;
2422 case 'H':
2423 part_mode = HImode;
2424 break;
2425 case 'S':
2426 part_mode = SImode;
2427 break;
2428 default:
2429 return 0;
2430 }
2431
2432 switch (str[2])
2433 {
2434 case 'H':
2435 mode = HImode;
2436 break;
2437 case 'S':
2438 mode = SImode;
2439 break;
2440 case 'D':
2441 mode = DImode;
2442 break;
2443 default:
2444 return 0;
2445 }
2446
2447 switch (str[3])
2448 {
2449 case '0':
2450 def = 0;
2451 break;
2452 case 'F':
2453 def = -1;
2454 break;
2455 default:
2456 return 0;
2457 }
2458
2459 if (GET_MODE_SIZE (mode) <= GET_MODE_SIZE (part_mode))
2460 return 0;
2461
2462 part = s390_single_part (GEN_INT (value), mode, part_mode, def);
2463 if (part < 0)
2464 return 0;
2465 if (part_goal != -1 && part_goal != part)
2466 return 0;
2467
2468 return 1;
2469 }
2470
2471
2472 /* Returns true if the input parameter VALUE is a float zero. */
2473
2474 int
2475 s390_float_const_zero_p (rtx value)
2476 {
2477 return (GET_MODE_CLASS (GET_MODE (value)) == MODE_FLOAT
2478 && value == CONST0_RTX (GET_MODE (value)));
2479 }
2480
2481 /* Implement TARGET_REGISTER_MOVE_COST. */
2482
2483 static int
2484 s390_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
2485 reg_class_t from, reg_class_t to)
2486 {
2487 /* On s390, copy between fprs and gprs is expensive as long as no
2488 ldgr/lgdr can be used. */
2489 if ((!TARGET_Z10 || GET_MODE_SIZE (mode) != 8)
2490 && ((reg_classes_intersect_p (from, GENERAL_REGS)
2491 && reg_classes_intersect_p (to, FP_REGS))
2492 || (reg_classes_intersect_p (from, FP_REGS)
2493 && reg_classes_intersect_p (to, GENERAL_REGS))))
2494 return 10;
2495
2496 return 1;
2497 }
2498
2499 /* Implement TARGET_MEMORY_MOVE_COST. */
2500
2501 static int
2502 s390_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
2503 reg_class_t rclass ATTRIBUTE_UNUSED,
2504 bool in ATTRIBUTE_UNUSED)
2505 {
2506 return 1;
2507 }
2508
2509 /* Compute a (partial) cost for rtx X. Return true if the complete
2510 cost has been computed, and false if subexpressions should be
2511 scanned. In either case, *TOTAL contains the cost result.
2512 CODE contains GET_CODE (x), OUTER_CODE contains the code
2513 of the superexpression of x. */
2514
2515 static bool
2516 s390_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
2517 int *total, bool speed ATTRIBUTE_UNUSED)
2518 {
2519 switch (code)
2520 {
2521 case CONST:
2522 case CONST_INT:
2523 case LABEL_REF:
2524 case SYMBOL_REF:
2525 case CONST_DOUBLE:
2526 case MEM:
2527 *total = 0;
2528 return true;
2529
2530 case ASHIFT:
2531 case ASHIFTRT:
2532 case LSHIFTRT:
2533 case ROTATE:
2534 case ROTATERT:
2535 case AND:
2536 case IOR:
2537 case XOR:
2538 case NEG:
2539 case NOT:
2540 *total = COSTS_N_INSNS (1);
2541 return false;
2542
2543 case PLUS:
2544 case MINUS:
2545 *total = COSTS_N_INSNS (1);
2546 return false;
2547
2548 case MULT:
2549 switch (GET_MODE (x))
2550 {
2551 case SImode:
2552 {
2553 rtx left = XEXP (x, 0);
2554 rtx right = XEXP (x, 1);
2555 if (GET_CODE (right) == CONST_INT
2556 && CONST_OK_FOR_K (INTVAL (right)))
2557 *total = s390_cost->mhi;
2558 else if (GET_CODE (left) == SIGN_EXTEND)
2559 *total = s390_cost->mh;
2560 else
2561 *total = s390_cost->ms; /* msr, ms, msy */
2562 break;
2563 }
2564 case DImode:
2565 {
2566 rtx left = XEXP (x, 0);
2567 rtx right = XEXP (x, 1);
2568 if (TARGET_ZARCH)
2569 {
2570 if (GET_CODE (right) == CONST_INT
2571 && CONST_OK_FOR_K (INTVAL (right)))
2572 *total = s390_cost->mghi;
2573 else if (GET_CODE (left) == SIGN_EXTEND)
2574 *total = s390_cost->msgf;
2575 else
2576 *total = s390_cost->msg; /* msgr, msg */
2577 }
2578 else /* TARGET_31BIT */
2579 {
2580 if (GET_CODE (left) == SIGN_EXTEND
2581 && GET_CODE (right) == SIGN_EXTEND)
2582 /* mulsidi case: mr, m */
2583 *total = s390_cost->m;
2584 else if (GET_CODE (left) == ZERO_EXTEND
2585 && GET_CODE (right) == ZERO_EXTEND
2586 && TARGET_CPU_ZARCH)
2587 /* umulsidi case: ml, mlr */
2588 *total = s390_cost->ml;
2589 else
2590 /* Complex calculation is required. */
2591 *total = COSTS_N_INSNS (40);
2592 }
2593 break;
2594 }
2595 case SFmode:
2596 case DFmode:
2597 *total = s390_cost->mult_df;
2598 break;
2599 case TFmode:
2600 *total = s390_cost->mxbr;
2601 break;
2602 default:
2603 return false;
2604 }
2605 return false;
2606
2607 case FMA:
2608 switch (GET_MODE (x))
2609 {
2610 case DFmode:
2611 *total = s390_cost->madbr;
2612 break;
2613 case SFmode:
2614 *total = s390_cost->maebr;
2615 break;
2616 default:
2617 return false;
2618 }
2619 /* Negate in the third argument is free: FMSUB. */
2620 if (GET_CODE (XEXP (x, 2)) == NEG)
2621 {
2622 *total += (rtx_cost (XEXP (x, 0), FMA, 0, speed)
2623 + rtx_cost (XEXP (x, 1), FMA, 1, speed)
2624 + rtx_cost (XEXP (XEXP (x, 2), 0), FMA, 2, speed));
2625 return true;
2626 }
2627 return false;
2628
2629 case UDIV:
2630 case UMOD:
2631 if (GET_MODE (x) == TImode) /* 128 bit division */
2632 *total = s390_cost->dlgr;
2633 else if (GET_MODE (x) == DImode)
2634 {
2635 rtx right = XEXP (x, 1);
2636 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
2637 *total = s390_cost->dlr;
2638 else /* 64 by 64 bit division */
2639 *total = s390_cost->dlgr;
2640 }
2641 else if (GET_MODE (x) == SImode) /* 32 bit division */
2642 *total = s390_cost->dlr;
2643 return false;
2644
2645 case DIV:
2646 case MOD:
2647 if (GET_MODE (x) == DImode)
2648 {
2649 rtx right = XEXP (x, 1);
2650 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
2651 if (TARGET_ZARCH)
2652 *total = s390_cost->dsgfr;
2653 else
2654 *total = s390_cost->dr;
2655 else /* 64 by 64 bit division */
2656 *total = s390_cost->dsgr;
2657 }
2658 else if (GET_MODE (x) == SImode) /* 32 bit division */
2659 *total = s390_cost->dlr;
2660 else if (GET_MODE (x) == SFmode)
2661 {
2662 *total = s390_cost->debr;
2663 }
2664 else if (GET_MODE (x) == DFmode)
2665 {
2666 *total = s390_cost->ddbr;
2667 }
2668 else if (GET_MODE (x) == TFmode)
2669 {
2670 *total = s390_cost->dxbr;
2671 }
2672 return false;
2673
2674 case SQRT:
2675 if (GET_MODE (x) == SFmode)
2676 *total = s390_cost->sqebr;
2677 else if (GET_MODE (x) == DFmode)
2678 *total = s390_cost->sqdbr;
2679 else /* TFmode */
2680 *total = s390_cost->sqxbr;
2681 return false;
2682
2683 case SIGN_EXTEND:
2684 case ZERO_EXTEND:
2685 if (outer_code == MULT || outer_code == DIV || outer_code == MOD
2686 || outer_code == PLUS || outer_code == MINUS
2687 || outer_code == COMPARE)
2688 *total = 0;
2689 return false;
2690
2691 case COMPARE:
2692 *total = COSTS_N_INSNS (1);
2693 if (GET_CODE (XEXP (x, 0)) == AND
2694 && GET_CODE (XEXP (x, 1)) == CONST_INT
2695 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
2696 {
2697 rtx op0 = XEXP (XEXP (x, 0), 0);
2698 rtx op1 = XEXP (XEXP (x, 0), 1);
2699 rtx op2 = XEXP (x, 1);
2700
2701 if (memory_operand (op0, GET_MODE (op0))
2702 && s390_tm_ccmode (op1, op2, 0) != VOIDmode)
2703 return true;
2704 if (register_operand (op0, GET_MODE (op0))
2705 && s390_tm_ccmode (op1, op2, 1) != VOIDmode)
2706 return true;
2707 }
2708 return false;
2709
2710 default:
2711 return false;
2712 }
2713 }
2714
2715 /* Return the cost of an address rtx ADDR. */
2716
2717 static int
2718 s390_address_cost (rtx addr, enum machine_mode mode ATTRIBUTE_UNUSED,
2719 addr_space_t as ATTRIBUTE_UNUSED,
2720 bool speed ATTRIBUTE_UNUSED)
2721 {
2722 struct s390_address ad;
2723 if (!s390_decompose_address (addr, &ad))
2724 return 1000;
2725
2726 return ad.indx? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (1);
2727 }
2728
2729 /* If OP is a SYMBOL_REF of a thread-local symbol, return its TLS mode,
2730 otherwise return 0. */
2731
2732 int
2733 tls_symbolic_operand (rtx op)
2734 {
2735 if (GET_CODE (op) != SYMBOL_REF)
2736 return 0;
2737 return SYMBOL_REF_TLS_MODEL (op);
2738 }
2739 \f
2740 /* Split DImode access register reference REG (on 64-bit) into its constituent
2741 low and high parts, and store them into LO and HI. Note that gen_lowpart/
2742 gen_highpart cannot be used as they assume all registers are word-sized,
2743 while our access registers have only half that size. */
2744
2745 void
2746 s390_split_access_reg (rtx reg, rtx *lo, rtx *hi)
2747 {
2748 gcc_assert (TARGET_64BIT);
2749 gcc_assert (ACCESS_REG_P (reg));
2750 gcc_assert (GET_MODE (reg) == DImode);
2751 gcc_assert (!(REGNO (reg) & 1));
2752
2753 *lo = gen_rtx_REG (SImode, REGNO (reg) + 1);
2754 *hi = gen_rtx_REG (SImode, REGNO (reg));
2755 }
2756
2757 /* Return true if OP contains a symbol reference */
2758
2759 bool
2760 symbolic_reference_mentioned_p (rtx op)
2761 {
2762 const char *fmt;
2763 int i;
2764
2765 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
2766 return 1;
2767
2768 fmt = GET_RTX_FORMAT (GET_CODE (op));
2769 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
2770 {
2771 if (fmt[i] == 'E')
2772 {
2773 int j;
2774
2775 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
2776 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
2777 return 1;
2778 }
2779
2780 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
2781 return 1;
2782 }
2783
2784 return 0;
2785 }
2786
2787 /* Return true if OP contains a reference to a thread-local symbol. */
2788
2789 bool
2790 tls_symbolic_reference_mentioned_p (rtx op)
2791 {
2792 const char *fmt;
2793 int i;
2794
2795 if (GET_CODE (op) == SYMBOL_REF)
2796 return tls_symbolic_operand (op);
2797
2798 fmt = GET_RTX_FORMAT (GET_CODE (op));
2799 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
2800 {
2801 if (fmt[i] == 'E')
2802 {
2803 int j;
2804
2805 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
2806 if (tls_symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
2807 return true;
2808 }
2809
2810 else if (fmt[i] == 'e' && tls_symbolic_reference_mentioned_p (XEXP (op, i)))
2811 return true;
2812 }
2813
2814 return false;
2815 }
2816
2817
2818 /* Return true if OP is a legitimate general operand when
2819 generating PIC code. It is given that flag_pic is on
2820 and that OP satisfies CONSTANT_P or is a CONST_DOUBLE. */
2821
2822 int
2823 legitimate_pic_operand_p (rtx op)
2824 {
2825 /* Accept all non-symbolic constants. */
2826 if (!SYMBOLIC_CONST (op))
2827 return 1;
2828
2829 /* Reject everything else; must be handled
2830 via emit_symbolic_move. */
2831 return 0;
2832 }
2833
2834 /* Returns true if the constant value OP is a legitimate general operand.
2835 It is given that OP satisfies CONSTANT_P or is a CONST_DOUBLE. */
2836
2837 static bool
2838 s390_legitimate_constant_p (enum machine_mode mode, rtx op)
2839 {
2840 /* Accept all non-symbolic constants. */
2841 if (!SYMBOLIC_CONST (op))
2842 return 1;
2843
2844 /* Accept immediate LARL operands. */
2845 if (TARGET_CPU_ZARCH && larl_operand (op, mode))
2846 return 1;
2847
2848 /* Thread-local symbols are never legal constants. This is
2849 so that emit_call knows that computing such addresses
2850 might require a function call. */
2851 if (TLS_SYMBOLIC_CONST (op))
2852 return 0;
2853
2854 /* In the PIC case, symbolic constants must *not* be
2855 forced into the literal pool. We accept them here,
2856 so that they will be handled by emit_symbolic_move. */
2857 if (flag_pic)
2858 return 1;
2859
2860 /* All remaining non-PIC symbolic constants are
2861 forced into the literal pool. */
2862 return 0;
2863 }
2864
2865 /* Determine if it's legal to put X into the constant pool. This
2866 is not possible if X contains the address of a symbol that is
2867 not constant (TLS) or not known at final link time (PIC). */
2868
2869 static bool
2870 s390_cannot_force_const_mem (enum machine_mode mode, rtx x)
2871 {
2872 switch (GET_CODE (x))
2873 {
2874 case CONST_INT:
2875 case CONST_DOUBLE:
2876 /* Accept all non-symbolic constants. */
2877 return false;
2878
2879 case LABEL_REF:
2880 /* Labels are OK iff we are non-PIC. */
2881 return flag_pic != 0;
2882
2883 case SYMBOL_REF:
2884 /* 'Naked' TLS symbol references are never OK,
2885 non-TLS symbols are OK iff we are non-PIC. */
2886 if (tls_symbolic_operand (x))
2887 return true;
2888 else
2889 return flag_pic != 0;
2890
2891 case CONST:
2892 return s390_cannot_force_const_mem (mode, XEXP (x, 0));
2893 case PLUS:
2894 case MINUS:
2895 return s390_cannot_force_const_mem (mode, XEXP (x, 0))
2896 || s390_cannot_force_const_mem (mode, XEXP (x, 1));
2897
2898 case UNSPEC:
2899 switch (XINT (x, 1))
2900 {
2901 /* Only lt-relative or GOT-relative UNSPECs are OK. */
2902 case UNSPEC_LTREL_OFFSET:
2903 case UNSPEC_GOT:
2904 case UNSPEC_GOTOFF:
2905 case UNSPEC_PLTOFF:
2906 case UNSPEC_TLSGD:
2907 case UNSPEC_TLSLDM:
2908 case UNSPEC_NTPOFF:
2909 case UNSPEC_DTPOFF:
2910 case UNSPEC_GOTNTPOFF:
2911 case UNSPEC_INDNTPOFF:
2912 return false;
2913
2914 /* If the literal pool shares the code section, be put
2915 execute template placeholders into the pool as well. */
2916 case UNSPEC_INSN:
2917 return TARGET_CPU_ZARCH;
2918
2919 default:
2920 return true;
2921 }
2922 break;
2923
2924 default:
2925 gcc_unreachable ();
2926 }
2927 }
2928
2929 /* Returns true if the constant value OP is a legitimate general
2930 operand during and after reload. The difference to
2931 legitimate_constant_p is that this function will not accept
2932 a constant that would need to be forced to the literal pool
2933 before it can be used as operand.
2934 This function accepts all constants which can be loaded directly
2935 into a GPR. */
2936
2937 bool
2938 legitimate_reload_constant_p (rtx op)
2939 {
2940 /* Accept la(y) operands. */
2941 if (GET_CODE (op) == CONST_INT
2942 && DISP_IN_RANGE (INTVAL (op)))
2943 return true;
2944
2945 /* Accept l(g)hi/l(g)fi operands. */
2946 if (GET_CODE (op) == CONST_INT
2947 && (CONST_OK_FOR_K (INTVAL (op)) || CONST_OK_FOR_Os (INTVAL (op))))
2948 return true;
2949
2950 /* Accept lliXX operands. */
2951 if (TARGET_ZARCH
2952 && GET_CODE (op) == CONST_INT
2953 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
2954 && s390_single_part (op, word_mode, HImode, 0) >= 0)
2955 return true;
2956
2957 if (TARGET_EXTIMM
2958 && GET_CODE (op) == CONST_INT
2959 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
2960 && s390_single_part (op, word_mode, SImode, 0) >= 0)
2961 return true;
2962
2963 /* Accept larl operands. */
2964 if (TARGET_CPU_ZARCH
2965 && larl_operand (op, VOIDmode))
2966 return true;
2967
2968 /* Accept floating-point zero operands that fit into a single GPR. */
2969 if (GET_CODE (op) == CONST_DOUBLE
2970 && s390_float_const_zero_p (op)
2971 && GET_MODE_SIZE (GET_MODE (op)) <= UNITS_PER_WORD)
2972 return true;
2973
2974 /* Accept double-word operands that can be split. */
2975 if (GET_CODE (op) == CONST_INT
2976 && trunc_int_for_mode (INTVAL (op), word_mode) != INTVAL (op))
2977 {
2978 enum machine_mode dword_mode = word_mode == SImode ? DImode : TImode;
2979 rtx hi = operand_subword (op, 0, 0, dword_mode);
2980 rtx lo = operand_subword (op, 1, 0, dword_mode);
2981 return legitimate_reload_constant_p (hi)
2982 && legitimate_reload_constant_p (lo);
2983 }
2984
2985 /* Everything else cannot be handled without reload. */
2986 return false;
2987 }
2988
2989 /* Returns true if the constant value OP is a legitimate fp operand
2990 during and after reload.
2991 This function accepts all constants which can be loaded directly
2992 into an FPR. */
2993
2994 static bool
2995 legitimate_reload_fp_constant_p (rtx op)
2996 {
2997 /* Accept floating-point zero operands if the load zero instruction
2998 can be used. Prior to z196 the load fp zero instruction caused a
2999 performance penalty if the result is used as BFP number. */
3000 if (TARGET_Z196
3001 && GET_CODE (op) == CONST_DOUBLE
3002 && s390_float_const_zero_p (op))
3003 return true;
3004
3005 return false;
3006 }
3007
3008 /* Given an rtx OP being reloaded into a reg required to be in class RCLASS,
3009 return the class of reg to actually use. */
3010
3011 static reg_class_t
3012 s390_preferred_reload_class (rtx op, reg_class_t rclass)
3013 {
3014 switch (GET_CODE (op))
3015 {
3016 /* Constants we cannot reload into general registers
3017 must be forced into the literal pool. */
3018 case CONST_DOUBLE:
3019 case CONST_INT:
3020 if (reg_class_subset_p (GENERAL_REGS, rclass)
3021 && legitimate_reload_constant_p (op))
3022 return GENERAL_REGS;
3023 else if (reg_class_subset_p (ADDR_REGS, rclass)
3024 && legitimate_reload_constant_p (op))
3025 return ADDR_REGS;
3026 else if (reg_class_subset_p (FP_REGS, rclass)
3027 && legitimate_reload_fp_constant_p (op))
3028 return FP_REGS;
3029 return NO_REGS;
3030
3031 /* If a symbolic constant or a PLUS is reloaded,
3032 it is most likely being used as an address, so
3033 prefer ADDR_REGS. If 'class' is not a superset
3034 of ADDR_REGS, e.g. FP_REGS, reject this reload. */
3035 case CONST:
3036 /* A larl operand with odd addend will get fixed via secondary
3037 reload. So don't request it to be pushed into literal
3038 pool. */
3039 if (TARGET_CPU_ZARCH
3040 && GET_CODE (XEXP (op, 0)) == PLUS
3041 && GET_CODE (XEXP (XEXP(op, 0), 0)) == SYMBOL_REF
3042 && GET_CODE (XEXP (XEXP(op, 0), 1)) == CONST_INT)
3043 {
3044 if (reg_class_subset_p (ADDR_REGS, rclass))
3045 return ADDR_REGS;
3046 else
3047 return NO_REGS;
3048 }
3049 /* fallthrough */
3050 case LABEL_REF:
3051 case SYMBOL_REF:
3052 if (!legitimate_reload_constant_p (op))
3053 return NO_REGS;
3054 /* fallthrough */
3055 case PLUS:
3056 /* load address will be used. */
3057 if (reg_class_subset_p (ADDR_REGS, rclass))
3058 return ADDR_REGS;
3059 else
3060 return NO_REGS;
3061
3062 default:
3063 break;
3064 }
3065
3066 return rclass;
3067 }
3068
3069 /* Return true if ADDR is SYMBOL_REF + addend with addend being a
3070 multiple of ALIGNMENT and the SYMBOL_REF being naturally
3071 aligned. */
3072
3073 bool
3074 s390_check_symref_alignment (rtx addr, HOST_WIDE_INT alignment)
3075 {
3076 HOST_WIDE_INT addend;
3077 rtx symref;
3078
3079 if (!s390_loadrelative_operand_p (addr, &symref, &addend))
3080 return false;
3081
3082 if (addend & (alignment - 1))
3083 return false;
3084
3085 if (GET_CODE (symref) == SYMBOL_REF
3086 && !SYMBOL_REF_NOT_NATURALLY_ALIGNED_P (symref))
3087 return true;
3088
3089 if (GET_CODE (symref) == UNSPEC
3090 && alignment <= UNITS_PER_LONG)
3091 return true;
3092
3093 return false;
3094 }
3095
3096 /* ADDR is moved into REG using larl. If ADDR isn't a valid larl
3097 operand SCRATCH is used to reload the even part of the address and
3098 adding one. */
3099
3100 void
3101 s390_reload_larl_operand (rtx reg, rtx addr, rtx scratch)
3102 {
3103 HOST_WIDE_INT addend;
3104 rtx symref;
3105
3106 if (!s390_loadrelative_operand_p (addr, &symref, &addend))
3107 gcc_unreachable ();
3108
3109 if (!(addend & 1))
3110 /* Easy case. The addend is even so larl will do fine. */
3111 emit_move_insn (reg, addr);
3112 else
3113 {
3114 /* We can leave the scratch register untouched if the target
3115 register is a valid base register. */
3116 if (REGNO (reg) < FIRST_PSEUDO_REGISTER
3117 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS)
3118 scratch = reg;
3119
3120 gcc_assert (REGNO (scratch) < FIRST_PSEUDO_REGISTER);
3121 gcc_assert (REGNO_REG_CLASS (REGNO (scratch)) == ADDR_REGS);
3122
3123 if (addend != 1)
3124 emit_move_insn (scratch,
3125 gen_rtx_CONST (Pmode,
3126 gen_rtx_PLUS (Pmode, symref,
3127 GEN_INT (addend - 1))));
3128 else
3129 emit_move_insn (scratch, symref);
3130
3131 /* Increment the address using la in order to avoid clobbering cc. */
3132 s390_load_address (reg, gen_rtx_PLUS (Pmode, scratch, const1_rtx));
3133 }
3134 }
3135
3136 /* Generate what is necessary to move between REG and MEM using
3137 SCRATCH. The direction is given by TOMEM. */
3138
3139 void
3140 s390_reload_symref_address (rtx reg, rtx mem, rtx scratch, bool tomem)
3141 {
3142 /* Reload might have pulled a constant out of the literal pool.
3143 Force it back in. */
3144 if (CONST_INT_P (mem) || GET_CODE (mem) == CONST_DOUBLE
3145 || GET_CODE (mem) == CONST)
3146 mem = force_const_mem (GET_MODE (reg), mem);
3147
3148 gcc_assert (MEM_P (mem));
3149
3150 /* For a load from memory we can leave the scratch register
3151 untouched if the target register is a valid base register. */
3152 if (!tomem
3153 && REGNO (reg) < FIRST_PSEUDO_REGISTER
3154 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS
3155 && GET_MODE (reg) == GET_MODE (scratch))
3156 scratch = reg;
3157
3158 /* Load address into scratch register. Since we can't have a
3159 secondary reload for a secondary reload we have to cover the case
3160 where larl would need a secondary reload here as well. */
3161 s390_reload_larl_operand (scratch, XEXP (mem, 0), scratch);
3162
3163 /* Now we can use a standard load/store to do the move. */
3164 if (tomem)
3165 emit_move_insn (replace_equiv_address (mem, scratch), reg);
3166 else
3167 emit_move_insn (reg, replace_equiv_address (mem, scratch));
3168 }
3169
3170 /* Inform reload about cases where moving X with a mode MODE to a register in
3171 RCLASS requires an extra scratch or immediate register. Return the class
3172 needed for the immediate register. */
3173
3174 static reg_class_t
3175 s390_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
3176 enum machine_mode mode, secondary_reload_info *sri)
3177 {
3178 enum reg_class rclass = (enum reg_class) rclass_i;
3179
3180 /* Intermediate register needed. */
3181 if (reg_classes_intersect_p (CC_REGS, rclass))
3182 return GENERAL_REGS;
3183
3184 if (TARGET_Z10)
3185 {
3186 HOST_WIDE_INT offset;
3187 rtx symref;
3188
3189 /* On z10 several optimizer steps may generate larl operands with
3190 an odd addend. */
3191 if (in_p
3192 && s390_loadrelative_operand_p (x, &symref, &offset)
3193 && mode == Pmode
3194 && !SYMBOL_REF_ALIGN1_P (symref)
3195 && (offset & 1) == 1)
3196 sri->icode = ((mode == DImode) ? CODE_FOR_reloaddi_larl_odd_addend_z10
3197 : CODE_FOR_reloadsi_larl_odd_addend_z10);
3198
3199 /* On z10 we need a scratch register when moving QI, TI or floating
3200 point mode values from or to a memory location with a SYMBOL_REF
3201 or if the symref addend of a SI or DI move is not aligned to the
3202 width of the access. */
3203 if (MEM_P (x)
3204 && s390_loadrelative_operand_p (XEXP (x, 0), NULL, NULL)
3205 && (mode == QImode || mode == TImode || FLOAT_MODE_P (mode)
3206 || (!TARGET_ZARCH && mode == DImode)
3207 || ((mode == HImode || mode == SImode || mode == DImode)
3208 && (!s390_check_symref_alignment (XEXP (x, 0),
3209 GET_MODE_SIZE (mode))))))
3210 {
3211 #define __SECONDARY_RELOAD_CASE(M,m) \
3212 case M##mode: \
3213 if (TARGET_64BIT) \
3214 sri->icode = in_p ? CODE_FOR_reload##m##di_toreg_z10 : \
3215 CODE_FOR_reload##m##di_tomem_z10; \
3216 else \
3217 sri->icode = in_p ? CODE_FOR_reload##m##si_toreg_z10 : \
3218 CODE_FOR_reload##m##si_tomem_z10; \
3219 break;
3220
3221 switch (GET_MODE (x))
3222 {
3223 __SECONDARY_RELOAD_CASE (QI, qi);
3224 __SECONDARY_RELOAD_CASE (HI, hi);
3225 __SECONDARY_RELOAD_CASE (SI, si);
3226 __SECONDARY_RELOAD_CASE (DI, di);
3227 __SECONDARY_RELOAD_CASE (TI, ti);
3228 __SECONDARY_RELOAD_CASE (SF, sf);
3229 __SECONDARY_RELOAD_CASE (DF, df);
3230 __SECONDARY_RELOAD_CASE (TF, tf);
3231 __SECONDARY_RELOAD_CASE (SD, sd);
3232 __SECONDARY_RELOAD_CASE (DD, dd);
3233 __SECONDARY_RELOAD_CASE (TD, td);
3234
3235 default:
3236 gcc_unreachable ();
3237 }
3238 #undef __SECONDARY_RELOAD_CASE
3239 }
3240 }
3241
3242 /* We need a scratch register when loading a PLUS expression which
3243 is not a legitimate operand of the LOAD ADDRESS instruction. */
3244 /* LRA can deal with transformation of plus op very well -- so we
3245 don't need to prompt LRA in this case. */
3246 if (! lra_in_progress && in_p && s390_plus_operand (x, mode))
3247 sri->icode = (TARGET_64BIT ?
3248 CODE_FOR_reloaddi_plus : CODE_FOR_reloadsi_plus);
3249
3250 /* Performing a multiword move from or to memory we have to make sure the
3251 second chunk in memory is addressable without causing a displacement
3252 overflow. If that would be the case we calculate the address in
3253 a scratch register. */
3254 if (MEM_P (x)
3255 && GET_CODE (XEXP (x, 0)) == PLUS
3256 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
3257 && !DISP_IN_RANGE (INTVAL (XEXP (XEXP (x, 0), 1))
3258 + GET_MODE_SIZE (mode) - 1))
3259 {
3260 /* For GENERAL_REGS a displacement overflow is no problem if occurring
3261 in a s_operand address since we may fallback to lm/stm. So we only
3262 have to care about overflows in the b+i+d case. */
3263 if ((reg_classes_intersect_p (GENERAL_REGS, rclass)
3264 && s390_class_max_nregs (GENERAL_REGS, mode) > 1
3265 && GET_CODE (XEXP (XEXP (x, 0), 0)) == PLUS)
3266 /* For FP_REGS no lm/stm is available so this check is triggered
3267 for displacement overflows in b+i+d and b+d like addresses. */
3268 || (reg_classes_intersect_p (FP_REGS, rclass)
3269 && s390_class_max_nregs (FP_REGS, mode) > 1))
3270 {
3271 if (in_p)
3272 sri->icode = (TARGET_64BIT ?
3273 CODE_FOR_reloaddi_nonoffmem_in :
3274 CODE_FOR_reloadsi_nonoffmem_in);
3275 else
3276 sri->icode = (TARGET_64BIT ?
3277 CODE_FOR_reloaddi_nonoffmem_out :
3278 CODE_FOR_reloadsi_nonoffmem_out);
3279 }
3280 }
3281
3282 /* A scratch address register is needed when a symbolic constant is
3283 copied to r0 compiling with -fPIC. In other cases the target
3284 register might be used as temporary (see legitimize_pic_address). */
3285 if (in_p && SYMBOLIC_CONST (x) && flag_pic == 2 && rclass != ADDR_REGS)
3286 sri->icode = (TARGET_64BIT ?
3287 CODE_FOR_reloaddi_PIC_addr :
3288 CODE_FOR_reloadsi_PIC_addr);
3289
3290 /* Either scratch or no register needed. */
3291 return NO_REGS;
3292 }
3293
3294 /* Generate code to load SRC, which is PLUS that is not a
3295 legitimate operand for the LA instruction, into TARGET.
3296 SCRATCH may be used as scratch register. */
3297
3298 void
3299 s390_expand_plus_operand (rtx target, rtx src,
3300 rtx scratch)
3301 {
3302 rtx sum1, sum2;
3303 struct s390_address ad;
3304
3305 /* src must be a PLUS; get its two operands. */
3306 gcc_assert (GET_CODE (src) == PLUS);
3307 gcc_assert (GET_MODE (src) == Pmode);
3308
3309 /* Check if any of the two operands is already scheduled
3310 for replacement by reload. This can happen e.g. when
3311 float registers occur in an address. */
3312 sum1 = find_replacement (&XEXP (src, 0));
3313 sum2 = find_replacement (&XEXP (src, 1));
3314 src = gen_rtx_PLUS (Pmode, sum1, sum2);
3315
3316 /* If the address is already strictly valid, there's nothing to do. */
3317 if (!s390_decompose_address (src, &ad)
3318 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
3319 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
3320 {
3321 /* Otherwise, one of the operands cannot be an address register;
3322 we reload its value into the scratch register. */
3323 if (true_regnum (sum1) < 1 || true_regnum (sum1) > 15)
3324 {
3325 emit_move_insn (scratch, sum1);
3326 sum1 = scratch;
3327 }
3328 if (true_regnum (sum2) < 1 || true_regnum (sum2) > 15)
3329 {
3330 emit_move_insn (scratch, sum2);
3331 sum2 = scratch;
3332 }
3333
3334 /* According to the way these invalid addresses are generated
3335 in reload.c, it should never happen (at least on s390) that
3336 *neither* of the PLUS components, after find_replacements
3337 was applied, is an address register. */
3338 if (sum1 == scratch && sum2 == scratch)
3339 {
3340 debug_rtx (src);
3341 gcc_unreachable ();
3342 }
3343
3344 src = gen_rtx_PLUS (Pmode, sum1, sum2);
3345 }
3346
3347 /* Emit the LOAD ADDRESS pattern. Note that reload of PLUS
3348 is only ever performed on addresses, so we can mark the
3349 sum as legitimate for LA in any case. */
3350 s390_load_address (target, src);
3351 }
3352
3353
3354 /* Return true if ADDR is a valid memory address.
3355 STRICT specifies whether strict register checking applies. */
3356
3357 static bool
3358 s390_legitimate_address_p (enum machine_mode mode, rtx addr, bool strict)
3359 {
3360 struct s390_address ad;
3361
3362 if (TARGET_Z10
3363 && larl_operand (addr, VOIDmode)
3364 && (mode == VOIDmode
3365 || s390_check_symref_alignment (addr, GET_MODE_SIZE (mode))))
3366 return true;
3367
3368 if (!s390_decompose_address (addr, &ad))
3369 return false;
3370
3371 if (strict)
3372 {
3373 if (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
3374 return false;
3375
3376 if (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx)))
3377 return false;
3378 }
3379 else
3380 {
3381 if (ad.base
3382 && !(REGNO (ad.base) >= FIRST_PSEUDO_REGISTER
3383 || REGNO_REG_CLASS (REGNO (ad.base)) == ADDR_REGS))
3384 return false;
3385
3386 if (ad.indx
3387 && !(REGNO (ad.indx) >= FIRST_PSEUDO_REGISTER
3388 || REGNO_REG_CLASS (REGNO (ad.indx)) == ADDR_REGS))
3389 return false;
3390 }
3391 return true;
3392 }
3393
3394 /* Return true if OP is a valid operand for the LA instruction.
3395 In 31-bit, we need to prove that the result is used as an
3396 address, as LA performs only a 31-bit addition. */
3397
3398 bool
3399 legitimate_la_operand_p (rtx op)
3400 {
3401 struct s390_address addr;
3402 if (!s390_decompose_address (op, &addr))
3403 return false;
3404
3405 return (TARGET_64BIT || addr.pointer);
3406 }
3407
3408 /* Return true if it is valid *and* preferable to use LA to
3409 compute the sum of OP1 and OP2. */
3410
3411 bool
3412 preferred_la_operand_p (rtx op1, rtx op2)
3413 {
3414 struct s390_address addr;
3415
3416 if (op2 != const0_rtx)
3417 op1 = gen_rtx_PLUS (Pmode, op1, op2);
3418
3419 if (!s390_decompose_address (op1, &addr))
3420 return false;
3421 if (addr.base && !REGNO_OK_FOR_BASE_P (REGNO (addr.base)))
3422 return false;
3423 if (addr.indx && !REGNO_OK_FOR_INDEX_P (REGNO (addr.indx)))
3424 return false;
3425
3426 /* Avoid LA instructions with index register on z196; it is
3427 preferable to use regular add instructions when possible.
3428 Starting with zEC12 the la with index register is "uncracked"
3429 again. */
3430 if (addr.indx && s390_tune == PROCESSOR_2817_Z196)
3431 return false;
3432
3433 if (!TARGET_64BIT && !addr.pointer)
3434 return false;
3435
3436 if (addr.pointer)
3437 return true;
3438
3439 if ((addr.base && REG_P (addr.base) && REG_POINTER (addr.base))
3440 || (addr.indx && REG_P (addr.indx) && REG_POINTER (addr.indx)))
3441 return true;
3442
3443 return false;
3444 }
3445
3446 /* Emit a forced load-address operation to load SRC into DST.
3447 This will use the LOAD ADDRESS instruction even in situations
3448 where legitimate_la_operand_p (SRC) returns false. */
3449
3450 void
3451 s390_load_address (rtx dst, rtx src)
3452 {
3453 if (TARGET_64BIT)
3454 emit_move_insn (dst, src);
3455 else
3456 emit_insn (gen_force_la_31 (dst, src));
3457 }
3458
3459 /* Return a legitimate reference for ORIG (an address) using the
3460 register REG. If REG is 0, a new pseudo is generated.
3461
3462 There are two types of references that must be handled:
3463
3464 1. Global data references must load the address from the GOT, via
3465 the PIC reg. An insn is emitted to do this load, and the reg is
3466 returned.
3467
3468 2. Static data references, constant pool addresses, and code labels
3469 compute the address as an offset from the GOT, whose base is in
3470 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
3471 differentiate them from global data objects. The returned
3472 address is the PIC reg + an unspec constant.
3473
3474 TARGET_LEGITIMIZE_ADDRESS_P rejects symbolic references unless the PIC
3475 reg also appears in the address. */
3476
3477 rtx
3478 legitimize_pic_address (rtx orig, rtx reg)
3479 {
3480 rtx addr = orig;
3481 rtx addend = const0_rtx;
3482 rtx new_rtx = orig;
3483
3484 gcc_assert (!TLS_SYMBOLIC_CONST (addr));
3485
3486 if (GET_CODE (addr) == CONST)
3487 addr = XEXP (addr, 0);
3488
3489 if (GET_CODE (addr) == PLUS)
3490 {
3491 addend = XEXP (addr, 1);
3492 addr = XEXP (addr, 0);
3493 }
3494
3495 if ((GET_CODE (addr) == LABEL_REF
3496 || (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (addr))
3497 || (GET_CODE (addr) == UNSPEC &&
3498 (XINT (addr, 1) == UNSPEC_GOTENT
3499 || (TARGET_CPU_ZARCH && XINT (addr, 1) == UNSPEC_PLT))))
3500 && GET_CODE (addend) == CONST_INT)
3501 {
3502 /* This can be locally addressed. */
3503
3504 /* larl_operand requires UNSPECs to be wrapped in a const rtx. */
3505 rtx const_addr = (GET_CODE (addr) == UNSPEC ?
3506 gen_rtx_CONST (Pmode, addr) : addr);
3507
3508 if (TARGET_CPU_ZARCH
3509 && larl_operand (const_addr, VOIDmode)
3510 && INTVAL (addend) < (HOST_WIDE_INT)1 << 31
3511 && INTVAL (addend) >= -((HOST_WIDE_INT)1 << 31))
3512 {
3513 if (INTVAL (addend) & 1)
3514 {
3515 /* LARL can't handle odd offsets, so emit a pair of LARL
3516 and LA. */
3517 rtx temp = reg? reg : gen_reg_rtx (Pmode);
3518
3519 if (!DISP_IN_RANGE (INTVAL (addend)))
3520 {
3521 HOST_WIDE_INT even = INTVAL (addend) - 1;
3522 addr = gen_rtx_PLUS (Pmode, addr, GEN_INT (even));
3523 addr = gen_rtx_CONST (Pmode, addr);
3524 addend = const1_rtx;
3525 }
3526
3527 emit_move_insn (temp, addr);
3528 new_rtx = gen_rtx_PLUS (Pmode, temp, addend);
3529
3530 if (reg != 0)
3531 {
3532 s390_load_address (reg, new_rtx);
3533 new_rtx = reg;
3534 }
3535 }
3536 else
3537 {
3538 /* If the offset is even, we can just use LARL. This
3539 will happen automatically. */
3540 }
3541 }
3542 else
3543 {
3544 /* No larl - Access local symbols relative to the GOT. */
3545
3546 rtx temp = reg? reg : gen_reg_rtx (Pmode);
3547
3548 if (reload_in_progress || reload_completed)
3549 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
3550
3551 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
3552 if (addend != const0_rtx)
3553 addr = gen_rtx_PLUS (Pmode, addr, addend);
3554 addr = gen_rtx_CONST (Pmode, addr);
3555 addr = force_const_mem (Pmode, addr);
3556 emit_move_insn (temp, addr);
3557
3558 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
3559 if (reg != 0)
3560 {
3561 s390_load_address (reg, new_rtx);
3562 new_rtx = reg;
3563 }
3564 }
3565 }
3566 else if (GET_CODE (addr) == SYMBOL_REF && addend == const0_rtx)
3567 {
3568 /* A non-local symbol reference without addend.
3569
3570 The symbol ref is wrapped into an UNSPEC to make sure the
3571 proper operand modifier (@GOT or @GOTENT) will be emitted.
3572 This will tell the linker to put the symbol into the GOT.
3573
3574 Additionally the code dereferencing the GOT slot is emitted here.
3575
3576 An addend to the symref needs to be added afterwards.
3577 legitimize_pic_address calls itself recursively to handle
3578 that case. So no need to do it here. */
3579
3580 if (reg == 0)
3581 reg = gen_reg_rtx (Pmode);
3582
3583 if (TARGET_Z10)
3584 {
3585 /* Use load relative if possible.
3586 lgrl <target>, sym@GOTENT */
3587 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
3588 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3589 new_rtx = gen_const_mem (GET_MODE (reg), new_rtx);
3590
3591 emit_move_insn (reg, new_rtx);
3592 new_rtx = reg;
3593 }
3594 else if (flag_pic == 1)
3595 {
3596 /* Assume GOT offset is a valid displacement operand (< 4k
3597 or < 512k with z990). This is handled the same way in
3598 both 31- and 64-bit code (@GOT).
3599 lg <target>, sym@GOT(r12) */
3600
3601 if (reload_in_progress || reload_completed)
3602 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
3603
3604 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
3605 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3606 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
3607 new_rtx = gen_const_mem (Pmode, new_rtx);
3608 emit_move_insn (reg, new_rtx);
3609 new_rtx = reg;
3610 }
3611 else if (TARGET_CPU_ZARCH)
3612 {
3613 /* If the GOT offset might be >= 4k, we determine the position
3614 of the GOT entry via a PC-relative LARL (@GOTENT).
3615 larl temp, sym@GOTENT
3616 lg <target>, 0(temp) */
3617
3618 rtx temp = reg ? reg : gen_reg_rtx (Pmode);
3619
3620 gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
3621 || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
3622
3623 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
3624 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3625 emit_move_insn (temp, new_rtx);
3626
3627 new_rtx = gen_const_mem (Pmode, temp);
3628 emit_move_insn (reg, new_rtx);
3629
3630 new_rtx = reg;
3631 }
3632 else
3633 {
3634 /* If the GOT offset might be >= 4k, we have to load it
3635 from the literal pool (@GOT).
3636
3637 lg temp, lit-litbase(r13)
3638 lg <target>, 0(temp)
3639 lit: .long sym@GOT */
3640
3641 rtx temp = reg ? reg : gen_reg_rtx (Pmode);
3642
3643 gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
3644 || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
3645
3646 if (reload_in_progress || reload_completed)
3647 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
3648
3649 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
3650 addr = gen_rtx_CONST (Pmode, addr);
3651 addr = force_const_mem (Pmode, addr);
3652 emit_move_insn (temp, addr);
3653
3654 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
3655 new_rtx = gen_const_mem (Pmode, new_rtx);
3656 emit_move_insn (reg, new_rtx);
3657 new_rtx = reg;
3658 }
3659 }
3660 else if (GET_CODE (addr) == UNSPEC && GET_CODE (addend) == CONST_INT)
3661 {
3662 gcc_assert (XVECLEN (addr, 0) == 1);
3663 switch (XINT (addr, 1))
3664 {
3665 /* These address symbols (or PLT slots) relative to the GOT
3666 (not GOT slots!). In general this will exceed the
3667 displacement range so these value belong into the literal
3668 pool. */
3669 case UNSPEC_GOTOFF:
3670 case UNSPEC_PLTOFF:
3671 new_rtx = force_const_mem (Pmode, orig);
3672 break;
3673
3674 /* For -fPIC the GOT size might exceed the displacement
3675 range so make sure the value is in the literal pool. */
3676 case UNSPEC_GOT:
3677 if (flag_pic == 2)
3678 new_rtx = force_const_mem (Pmode, orig);
3679 break;
3680
3681 /* For @GOTENT larl is used. This is handled like local
3682 symbol refs. */
3683 case UNSPEC_GOTENT:
3684 gcc_unreachable ();
3685 break;
3686
3687 /* @PLT is OK as is on 64-bit, must be converted to
3688 GOT-relative @PLTOFF on 31-bit. */
3689 case UNSPEC_PLT:
3690 if (!TARGET_CPU_ZARCH)
3691 {
3692 rtx temp = reg? reg : gen_reg_rtx (Pmode);
3693
3694 if (reload_in_progress || reload_completed)
3695 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
3696
3697 addr = XVECEXP (addr, 0, 0);
3698 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
3699 UNSPEC_PLTOFF);
3700 if (addend != const0_rtx)
3701 addr = gen_rtx_PLUS (Pmode, addr, addend);
3702 addr = gen_rtx_CONST (Pmode, addr);
3703 addr = force_const_mem (Pmode, addr);
3704 emit_move_insn (temp, addr);
3705
3706 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
3707 if (reg != 0)
3708 {
3709 s390_load_address (reg, new_rtx);
3710 new_rtx = reg;
3711 }
3712 }
3713 else
3714 /* On 64 bit larl can be used. This case is handled like
3715 local symbol refs. */
3716 gcc_unreachable ();
3717 break;
3718
3719 /* Everything else cannot happen. */
3720 default:
3721 gcc_unreachable ();
3722 }
3723 }
3724 else if (addend != const0_rtx)
3725 {
3726 /* Otherwise, compute the sum. */
3727
3728 rtx base = legitimize_pic_address (addr, reg);
3729 new_rtx = legitimize_pic_address (addend,
3730 base == reg ? NULL_RTX : reg);
3731 if (GET_CODE (new_rtx) == CONST_INT)
3732 new_rtx = plus_constant (Pmode, base, INTVAL (new_rtx));
3733 else
3734 {
3735 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
3736 {
3737 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
3738 new_rtx = XEXP (new_rtx, 1);
3739 }
3740 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
3741 }
3742
3743 if (GET_CODE (new_rtx) == CONST)
3744 new_rtx = XEXP (new_rtx, 0);
3745 new_rtx = force_operand (new_rtx, 0);
3746 }
3747
3748 return new_rtx;
3749 }
3750
3751 /* Load the thread pointer into a register. */
3752
3753 rtx
3754 s390_get_thread_pointer (void)
3755 {
3756 rtx tp = gen_reg_rtx (Pmode);
3757
3758 emit_move_insn (tp, gen_rtx_REG (Pmode, TP_REGNUM));
3759 mark_reg_pointer (tp, BITS_PER_WORD);
3760
3761 return tp;
3762 }
3763
3764 /* Emit a tls call insn. The call target is the SYMBOL_REF stored
3765 in s390_tls_symbol which always refers to __tls_get_offset.
3766 The returned offset is written to RESULT_REG and an USE rtx is
3767 generated for TLS_CALL. */
3768
3769 static GTY(()) rtx s390_tls_symbol;
3770
3771 static void
3772 s390_emit_tls_call_insn (rtx result_reg, rtx tls_call)
3773 {
3774 rtx insn;
3775
3776 if (!flag_pic)
3777 emit_insn (s390_load_got ());
3778
3779 if (!s390_tls_symbol)
3780 s390_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_offset");
3781
3782 insn = s390_emit_call (s390_tls_symbol, tls_call, result_reg,
3783 gen_rtx_REG (Pmode, RETURN_REGNUM));
3784
3785 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), result_reg);
3786 RTL_CONST_CALL_P (insn) = 1;
3787 }
3788
3789 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
3790 this (thread-local) address. REG may be used as temporary. */
3791
3792 static rtx
3793 legitimize_tls_address (rtx addr, rtx reg)
3794 {
3795 rtx new_rtx, tls_call, temp, base, r2, insn;
3796
3797 if (GET_CODE (addr) == SYMBOL_REF)
3798 switch (tls_symbolic_operand (addr))
3799 {
3800 case TLS_MODEL_GLOBAL_DYNAMIC:
3801 start_sequence ();
3802 r2 = gen_rtx_REG (Pmode, 2);
3803 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_TLSGD);
3804 new_rtx = gen_rtx_CONST (Pmode, tls_call);
3805 new_rtx = force_const_mem (Pmode, new_rtx);
3806 emit_move_insn (r2, new_rtx);
3807 s390_emit_tls_call_insn (r2, tls_call);
3808 insn = get_insns ();
3809 end_sequence ();
3810
3811 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
3812 temp = gen_reg_rtx (Pmode);
3813 emit_libcall_block (insn, temp, r2, new_rtx);
3814
3815 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
3816 if (reg != 0)
3817 {
3818 s390_load_address (reg, new_rtx);
3819 new_rtx = reg;
3820 }
3821 break;
3822
3823 case TLS_MODEL_LOCAL_DYNAMIC:
3824 start_sequence ();
3825 r2 = gen_rtx_REG (Pmode, 2);
3826 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM);
3827 new_rtx = gen_rtx_CONST (Pmode, tls_call);
3828 new_rtx = force_const_mem (Pmode, new_rtx);
3829 emit_move_insn (r2, new_rtx);
3830 s390_emit_tls_call_insn (r2, tls_call);
3831 insn = get_insns ();
3832 end_sequence ();
3833
3834 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM_NTPOFF);
3835 temp = gen_reg_rtx (Pmode);
3836 emit_libcall_block (insn, temp, r2, new_rtx);
3837
3838 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
3839 base = gen_reg_rtx (Pmode);
3840 s390_load_address (base, new_rtx);
3841
3842 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_DTPOFF);
3843 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3844 new_rtx = force_const_mem (Pmode, new_rtx);
3845 temp = gen_reg_rtx (Pmode);
3846 emit_move_insn (temp, new_rtx);
3847
3848 new_rtx = gen_rtx_PLUS (Pmode, base, temp);
3849 if (reg != 0)
3850 {
3851 s390_load_address (reg, new_rtx);
3852 new_rtx = reg;
3853 }
3854 break;
3855
3856 case TLS_MODEL_INITIAL_EXEC:
3857 if (flag_pic == 1)
3858 {
3859 /* Assume GOT offset < 4k. This is handled the same way
3860 in both 31- and 64-bit code. */
3861
3862 if (reload_in_progress || reload_completed)
3863 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
3864
3865 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
3866 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3867 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
3868 new_rtx = gen_const_mem (Pmode, new_rtx);
3869 temp = gen_reg_rtx (Pmode);
3870 emit_move_insn (temp, new_rtx);
3871 }
3872 else if (TARGET_CPU_ZARCH)
3873 {
3874 /* If the GOT offset might be >= 4k, we determine the position
3875 of the GOT entry via a PC-relative LARL. */
3876
3877 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
3878 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3879 temp = gen_reg_rtx (Pmode);
3880 emit_move_insn (temp, new_rtx);
3881
3882 new_rtx = gen_const_mem (Pmode, temp);
3883 temp = gen_reg_rtx (Pmode);
3884 emit_move_insn (temp, new_rtx);
3885 }
3886 else if (flag_pic)
3887 {
3888 /* If the GOT offset might be >= 4k, we have to load it
3889 from the literal pool. */
3890
3891 if (reload_in_progress || reload_completed)
3892 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
3893
3894 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
3895 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3896 new_rtx = force_const_mem (Pmode, new_rtx);
3897 temp = gen_reg_rtx (Pmode);
3898 emit_move_insn (temp, new_rtx);
3899
3900 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
3901 new_rtx = gen_const_mem (Pmode, new_rtx);
3902
3903 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD);
3904 temp = gen_reg_rtx (Pmode);
3905 emit_insn (gen_rtx_SET (Pmode, temp, new_rtx));
3906 }
3907 else
3908 {
3909 /* In position-dependent code, load the absolute address of
3910 the GOT entry from the literal pool. */
3911
3912 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
3913 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3914 new_rtx = force_const_mem (Pmode, new_rtx);
3915 temp = gen_reg_rtx (Pmode);
3916 emit_move_insn (temp, new_rtx);
3917
3918 new_rtx = temp;
3919 new_rtx = gen_const_mem (Pmode, new_rtx);
3920 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD);
3921 temp = gen_reg_rtx (Pmode);
3922 emit_insn (gen_rtx_SET (Pmode, temp, new_rtx));
3923 }
3924
3925 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
3926 if (reg != 0)
3927 {
3928 s390_load_address (reg, new_rtx);
3929 new_rtx = reg;
3930 }
3931 break;
3932
3933 case TLS_MODEL_LOCAL_EXEC:
3934 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
3935 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3936 new_rtx = force_const_mem (Pmode, new_rtx);
3937 temp = gen_reg_rtx (Pmode);
3938 emit_move_insn (temp, new_rtx);
3939
3940 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
3941 if (reg != 0)
3942 {
3943 s390_load_address (reg, new_rtx);
3944 new_rtx = reg;
3945 }
3946 break;
3947
3948 default:
3949 gcc_unreachable ();
3950 }
3951
3952 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == UNSPEC)
3953 {
3954 switch (XINT (XEXP (addr, 0), 1))
3955 {
3956 case UNSPEC_INDNTPOFF:
3957 gcc_assert (TARGET_CPU_ZARCH);
3958 new_rtx = addr;
3959 break;
3960
3961 default:
3962 gcc_unreachable ();
3963 }
3964 }
3965
3966 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
3967 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
3968 {
3969 new_rtx = XEXP (XEXP (addr, 0), 0);
3970 if (GET_CODE (new_rtx) != SYMBOL_REF)
3971 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
3972
3973 new_rtx = legitimize_tls_address (new_rtx, reg);
3974 new_rtx = plus_constant (Pmode, new_rtx,
3975 INTVAL (XEXP (XEXP (addr, 0), 1)));
3976 new_rtx = force_operand (new_rtx, 0);
3977 }
3978
3979 else
3980 gcc_unreachable (); /* for now ... */
3981
3982 return new_rtx;
3983 }
3984
3985 /* Emit insns making the address in operands[1] valid for a standard
3986 move to operands[0]. operands[1] is replaced by an address which
3987 should be used instead of the former RTX to emit the move
3988 pattern. */
3989
3990 void
3991 emit_symbolic_move (rtx *operands)
3992 {
3993 rtx temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
3994
3995 if (GET_CODE (operands[0]) == MEM)
3996 operands[1] = force_reg (Pmode, operands[1]);
3997 else if (TLS_SYMBOLIC_CONST (operands[1]))
3998 operands[1] = legitimize_tls_address (operands[1], temp);
3999 else if (flag_pic)
4000 operands[1] = legitimize_pic_address (operands[1], temp);
4001 }
4002
4003 /* Try machine-dependent ways of modifying an illegitimate address X
4004 to be legitimate. If we find one, return the new, valid address.
4005
4006 OLDX is the address as it was before break_out_memory_refs was called.
4007 In some cases it is useful to look at this to decide what needs to be done.
4008
4009 MODE is the mode of the operand pointed to by X.
4010
4011 When -fpic is used, special handling is needed for symbolic references.
4012 See comments by legitimize_pic_address for details. */
4013
4014 static rtx
4015 s390_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
4016 enum machine_mode mode ATTRIBUTE_UNUSED)
4017 {
4018 rtx constant_term = const0_rtx;
4019
4020 if (TLS_SYMBOLIC_CONST (x))
4021 {
4022 x = legitimize_tls_address (x, 0);
4023
4024 if (s390_legitimate_address_p (mode, x, FALSE))
4025 return x;
4026 }
4027 else if (GET_CODE (x) == PLUS
4028 && (TLS_SYMBOLIC_CONST (XEXP (x, 0))
4029 || TLS_SYMBOLIC_CONST (XEXP (x, 1))))
4030 {
4031 return x;
4032 }
4033 else if (flag_pic)
4034 {
4035 if (SYMBOLIC_CONST (x)
4036 || (GET_CODE (x) == PLUS
4037 && (SYMBOLIC_CONST (XEXP (x, 0))
4038 || SYMBOLIC_CONST (XEXP (x, 1)))))
4039 x = legitimize_pic_address (x, 0);
4040
4041 if (s390_legitimate_address_p (mode, x, FALSE))
4042 return x;
4043 }
4044
4045 x = eliminate_constant_term (x, &constant_term);
4046
4047 /* Optimize loading of large displacements by splitting them
4048 into the multiple of 4K and the rest; this allows the
4049 former to be CSE'd if possible.
4050
4051 Don't do this if the displacement is added to a register
4052 pointing into the stack frame, as the offsets will
4053 change later anyway. */
4054
4055 if (GET_CODE (constant_term) == CONST_INT
4056 && !TARGET_LONG_DISPLACEMENT
4057 && !DISP_IN_RANGE (INTVAL (constant_term))
4058 && !(REG_P (x) && REGNO_PTR_FRAME_P (REGNO (x))))
4059 {
4060 HOST_WIDE_INT lower = INTVAL (constant_term) & 0xfff;
4061 HOST_WIDE_INT upper = INTVAL (constant_term) ^ lower;
4062
4063 rtx temp = gen_reg_rtx (Pmode);
4064 rtx val = force_operand (GEN_INT (upper), temp);
4065 if (val != temp)
4066 emit_move_insn (temp, val);
4067
4068 x = gen_rtx_PLUS (Pmode, x, temp);
4069 constant_term = GEN_INT (lower);
4070 }
4071
4072 if (GET_CODE (x) == PLUS)
4073 {
4074 if (GET_CODE (XEXP (x, 0)) == REG)
4075 {
4076 rtx temp = gen_reg_rtx (Pmode);
4077 rtx val = force_operand (XEXP (x, 1), temp);
4078 if (val != temp)
4079 emit_move_insn (temp, val);
4080
4081 x = gen_rtx_PLUS (Pmode, XEXP (x, 0), temp);
4082 }
4083
4084 else if (GET_CODE (XEXP (x, 1)) == REG)
4085 {
4086 rtx temp = gen_reg_rtx (Pmode);
4087 rtx val = force_operand (XEXP (x, 0), temp);
4088 if (val != temp)
4089 emit_move_insn (temp, val);
4090
4091 x = gen_rtx_PLUS (Pmode, temp, XEXP (x, 1));
4092 }
4093 }
4094
4095 if (constant_term != const0_rtx)
4096 x = gen_rtx_PLUS (Pmode, x, constant_term);
4097
4098 return x;
4099 }
4100
4101 /* Try a machine-dependent way of reloading an illegitimate address AD
4102 operand. If we find one, push the reload and return the new address.
4103
4104 MODE is the mode of the enclosing MEM. OPNUM is the operand number
4105 and TYPE is the reload type of the current reload. */
4106
4107 rtx
4108 legitimize_reload_address (rtx ad, enum machine_mode mode ATTRIBUTE_UNUSED,
4109 int opnum, int type)
4110 {
4111 if (!optimize || TARGET_LONG_DISPLACEMENT)
4112 return NULL_RTX;
4113
4114 if (GET_CODE (ad) == PLUS)
4115 {
4116 rtx tem = simplify_binary_operation (PLUS, Pmode,
4117 XEXP (ad, 0), XEXP (ad, 1));
4118 if (tem)
4119 ad = tem;
4120 }
4121
4122 if (GET_CODE (ad) == PLUS
4123 && GET_CODE (XEXP (ad, 0)) == REG
4124 && GET_CODE (XEXP (ad, 1)) == CONST_INT
4125 && !DISP_IN_RANGE (INTVAL (XEXP (ad, 1))))
4126 {
4127 HOST_WIDE_INT lower = INTVAL (XEXP (ad, 1)) & 0xfff;
4128 HOST_WIDE_INT upper = INTVAL (XEXP (ad, 1)) ^ lower;
4129 rtx cst, tem, new_rtx;
4130
4131 cst = GEN_INT (upper);
4132 if (!legitimate_reload_constant_p (cst))
4133 cst = force_const_mem (Pmode, cst);
4134
4135 tem = gen_rtx_PLUS (Pmode, XEXP (ad, 0), cst);
4136 new_rtx = gen_rtx_PLUS (Pmode, tem, GEN_INT (lower));
4137
4138 push_reload (XEXP (tem, 1), 0, &XEXP (tem, 1), 0,
4139 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
4140 opnum, (enum reload_type) type);
4141 return new_rtx;
4142 }
4143
4144 return NULL_RTX;
4145 }
4146
4147 /* Emit code to move LEN bytes from DST to SRC. */
4148
4149 bool
4150 s390_expand_movmem (rtx dst, rtx src, rtx len)
4151 {
4152 /* When tuning for z10 or higher we rely on the Glibc functions to
4153 do the right thing. Only for constant lengths below 64k we will
4154 generate inline code. */
4155 if (s390_tune >= PROCESSOR_2097_Z10
4156 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
4157 return false;
4158
4159 if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
4160 {
4161 if (INTVAL (len) > 0)
4162 emit_insn (gen_movmem_short (dst, src, GEN_INT (INTVAL (len) - 1)));
4163 }
4164
4165 else if (TARGET_MVCLE)
4166 {
4167 emit_insn (gen_movmem_long (dst, src, convert_to_mode (Pmode, len, 1)));
4168 }
4169
4170 else
4171 {
4172 rtx dst_addr, src_addr, count, blocks, temp;
4173 rtx loop_start_label = gen_label_rtx ();
4174 rtx loop_end_label = gen_label_rtx ();
4175 rtx end_label = gen_label_rtx ();
4176 enum machine_mode mode;
4177
4178 mode = GET_MODE (len);
4179 if (mode == VOIDmode)
4180 mode = Pmode;
4181
4182 dst_addr = gen_reg_rtx (Pmode);
4183 src_addr = gen_reg_rtx (Pmode);
4184 count = gen_reg_rtx (mode);
4185 blocks = gen_reg_rtx (mode);
4186
4187 convert_move (count, len, 1);
4188 emit_cmp_and_jump_insns (count, const0_rtx,
4189 EQ, NULL_RTX, mode, 1, end_label);
4190
4191 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
4192 emit_move_insn (src_addr, force_operand (XEXP (src, 0), NULL_RTX));
4193 dst = change_address (dst, VOIDmode, dst_addr);
4194 src = change_address (src, VOIDmode, src_addr);
4195
4196 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
4197 OPTAB_DIRECT);
4198 if (temp != count)
4199 emit_move_insn (count, temp);
4200
4201 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
4202 OPTAB_DIRECT);
4203 if (temp != blocks)
4204 emit_move_insn (blocks, temp);
4205
4206 emit_cmp_and_jump_insns (blocks, const0_rtx,
4207 EQ, NULL_RTX, mode, 1, loop_end_label);
4208
4209 emit_label (loop_start_label);
4210
4211 if (TARGET_Z10
4212 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 768))
4213 {
4214 rtx prefetch;
4215
4216 /* Issue a read prefetch for the +3 cache line. */
4217 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, src_addr, GEN_INT (768)),
4218 const0_rtx, const0_rtx);
4219 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
4220 emit_insn (prefetch);
4221
4222 /* Issue a write prefetch for the +3 cache line. */
4223 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (768)),
4224 const1_rtx, const0_rtx);
4225 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
4226 emit_insn (prefetch);
4227 }
4228
4229 emit_insn (gen_movmem_short (dst, src, GEN_INT (255)));
4230 s390_load_address (dst_addr,
4231 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
4232 s390_load_address (src_addr,
4233 gen_rtx_PLUS (Pmode, src_addr, GEN_INT (256)));
4234
4235 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
4236 OPTAB_DIRECT);
4237 if (temp != blocks)
4238 emit_move_insn (blocks, temp);
4239
4240 emit_cmp_and_jump_insns (blocks, const0_rtx,
4241 EQ, NULL_RTX, mode, 1, loop_end_label);
4242
4243 emit_jump (loop_start_label);
4244 emit_label (loop_end_label);
4245
4246 emit_insn (gen_movmem_short (dst, src,
4247 convert_to_mode (Pmode, count, 1)));
4248 emit_label (end_label);
4249 }
4250 return true;
4251 }
4252
4253 /* Emit code to set LEN bytes at DST to VAL.
4254 Make use of clrmem if VAL is zero. */
4255
4256 void
4257 s390_expand_setmem (rtx dst, rtx len, rtx val)
4258 {
4259 if (GET_CODE (len) == CONST_INT && INTVAL (len) == 0)
4260 return;
4261
4262 gcc_assert (GET_CODE (val) == CONST_INT || GET_MODE (val) == QImode);
4263
4264 if (GET_CODE (len) == CONST_INT && INTVAL (len) > 0 && INTVAL (len) <= 257)
4265 {
4266 if (val == const0_rtx && INTVAL (len) <= 256)
4267 emit_insn (gen_clrmem_short (dst, GEN_INT (INTVAL (len) - 1)));
4268 else
4269 {
4270 /* Initialize memory by storing the first byte. */
4271 emit_move_insn (adjust_address (dst, QImode, 0), val);
4272
4273 if (INTVAL (len) > 1)
4274 {
4275 /* Initiate 1 byte overlap move.
4276 The first byte of DST is propagated through DSTP1.
4277 Prepare a movmem for: DST+1 = DST (length = LEN - 1).
4278 DST is set to size 1 so the rest of the memory location
4279 does not count as source operand. */
4280 rtx dstp1 = adjust_address (dst, VOIDmode, 1);
4281 set_mem_size (dst, 1);
4282
4283 emit_insn (gen_movmem_short (dstp1, dst,
4284 GEN_INT (INTVAL (len) - 2)));
4285 }
4286 }
4287 }
4288
4289 else if (TARGET_MVCLE)
4290 {
4291 val = force_not_mem (convert_modes (Pmode, QImode, val, 1));
4292 emit_insn (gen_setmem_long (dst, convert_to_mode (Pmode, len, 1), val));
4293 }
4294
4295 else
4296 {
4297 rtx dst_addr, count, blocks, temp, dstp1 = NULL_RTX;
4298 rtx loop_start_label = gen_label_rtx ();
4299 rtx loop_end_label = gen_label_rtx ();
4300 rtx end_label = gen_label_rtx ();
4301 enum machine_mode mode;
4302
4303 mode = GET_MODE (len);
4304 if (mode == VOIDmode)
4305 mode = Pmode;
4306
4307 dst_addr = gen_reg_rtx (Pmode);
4308 count = gen_reg_rtx (mode);
4309 blocks = gen_reg_rtx (mode);
4310
4311 convert_move (count, len, 1);
4312 emit_cmp_and_jump_insns (count, const0_rtx,
4313 EQ, NULL_RTX, mode, 1, end_label);
4314
4315 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
4316 dst = change_address (dst, VOIDmode, dst_addr);
4317
4318 if (val == const0_rtx)
4319 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
4320 OPTAB_DIRECT);
4321 else
4322 {
4323 dstp1 = adjust_address (dst, VOIDmode, 1);
4324 set_mem_size (dst, 1);
4325
4326 /* Initialize memory by storing the first byte. */
4327 emit_move_insn (adjust_address (dst, QImode, 0), val);
4328
4329 /* If count is 1 we are done. */
4330 emit_cmp_and_jump_insns (count, const1_rtx,
4331 EQ, NULL_RTX, mode, 1, end_label);
4332
4333 temp = expand_binop (mode, add_optab, count, GEN_INT (-2), count, 1,
4334 OPTAB_DIRECT);
4335 }
4336 if (temp != count)
4337 emit_move_insn (count, temp);
4338
4339 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
4340 OPTAB_DIRECT);
4341 if (temp != blocks)
4342 emit_move_insn (blocks, temp);
4343
4344 emit_cmp_and_jump_insns (blocks, const0_rtx,
4345 EQ, NULL_RTX, mode, 1, loop_end_label);
4346
4347 emit_label (loop_start_label);
4348
4349 if (TARGET_Z10
4350 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 1024))
4351 {
4352 /* Issue a write prefetch for the +4 cache line. */
4353 rtx prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr,
4354 GEN_INT (1024)),
4355 const1_rtx, const0_rtx);
4356 emit_insn (prefetch);
4357 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
4358 }
4359
4360 if (val == const0_rtx)
4361 emit_insn (gen_clrmem_short (dst, GEN_INT (255)));
4362 else
4363 emit_insn (gen_movmem_short (dstp1, dst, GEN_INT (255)));
4364 s390_load_address (dst_addr,
4365 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
4366
4367 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
4368 OPTAB_DIRECT);
4369 if (temp != blocks)
4370 emit_move_insn (blocks, temp);
4371
4372 emit_cmp_and_jump_insns (blocks, const0_rtx,
4373 EQ, NULL_RTX, mode, 1, loop_end_label);
4374
4375 emit_jump (loop_start_label);
4376 emit_label (loop_end_label);
4377
4378 if (val == const0_rtx)
4379 emit_insn (gen_clrmem_short (dst, convert_to_mode (Pmode, count, 1)));
4380 else
4381 emit_insn (gen_movmem_short (dstp1, dst, convert_to_mode (Pmode, count, 1)));
4382 emit_label (end_label);
4383 }
4384 }
4385
4386 /* Emit code to compare LEN bytes at OP0 with those at OP1,
4387 and return the result in TARGET. */
4388
4389 bool
4390 s390_expand_cmpmem (rtx target, rtx op0, rtx op1, rtx len)
4391 {
4392 rtx ccreg = gen_rtx_REG (CCUmode, CC_REGNUM);
4393 rtx tmp;
4394
4395 /* When tuning for z10 or higher we rely on the Glibc functions to
4396 do the right thing. Only for constant lengths below 64k we will
4397 generate inline code. */
4398 if (s390_tune >= PROCESSOR_2097_Z10
4399 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
4400 return false;
4401
4402 /* As the result of CMPINT is inverted compared to what we need,
4403 we have to swap the operands. */
4404 tmp = op0; op0 = op1; op1 = tmp;
4405
4406 if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
4407 {
4408 if (INTVAL (len) > 0)
4409 {
4410 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (INTVAL (len) - 1)));
4411 emit_insn (gen_cmpint (target, ccreg));
4412 }
4413 else
4414 emit_move_insn (target, const0_rtx);
4415 }
4416 else if (TARGET_MVCLE)
4417 {
4418 emit_insn (gen_cmpmem_long (op0, op1, convert_to_mode (Pmode, len, 1)));
4419 emit_insn (gen_cmpint (target, ccreg));
4420 }
4421 else
4422 {
4423 rtx addr0, addr1, count, blocks, temp;
4424 rtx loop_start_label = gen_label_rtx ();
4425 rtx loop_end_label = gen_label_rtx ();
4426 rtx end_label = gen_label_rtx ();
4427 enum machine_mode mode;
4428
4429 mode = GET_MODE (len);
4430 if (mode == VOIDmode)
4431 mode = Pmode;
4432
4433 addr0 = gen_reg_rtx (Pmode);
4434 addr1 = gen_reg_rtx (Pmode);
4435 count = gen_reg_rtx (mode);
4436 blocks = gen_reg_rtx (mode);
4437
4438 convert_move (count, len, 1);
4439 emit_cmp_and_jump_insns (count, const0_rtx,
4440 EQ, NULL_RTX, mode, 1, end_label);
4441
4442 emit_move_insn (addr0, force_operand (XEXP (op0, 0), NULL_RTX));
4443 emit_move_insn (addr1, force_operand (XEXP (op1, 0), NULL_RTX));
4444 op0 = change_address (op0, VOIDmode, addr0);
4445 op1 = change_address (op1, VOIDmode, addr1);
4446
4447 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
4448 OPTAB_DIRECT);
4449 if (temp != count)
4450 emit_move_insn (count, temp);
4451
4452 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
4453 OPTAB_DIRECT);
4454 if (temp != blocks)
4455 emit_move_insn (blocks, temp);
4456
4457 emit_cmp_and_jump_insns (blocks, const0_rtx,
4458 EQ, NULL_RTX, mode, 1, loop_end_label);
4459
4460 emit_label (loop_start_label);
4461
4462 if (TARGET_Z10
4463 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 512))
4464 {
4465 rtx prefetch;
4466
4467 /* Issue a read prefetch for the +2 cache line of operand 1. */
4468 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr0, GEN_INT (512)),
4469 const0_rtx, const0_rtx);
4470 emit_insn (prefetch);
4471 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
4472
4473 /* Issue a read prefetch for the +2 cache line of operand 2. */
4474 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr1, GEN_INT (512)),
4475 const0_rtx, const0_rtx);
4476 emit_insn (prefetch);
4477 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
4478 }
4479
4480 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (255)));
4481 temp = gen_rtx_NE (VOIDmode, ccreg, const0_rtx);
4482 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
4483 gen_rtx_LABEL_REF (VOIDmode, end_label), pc_rtx);
4484 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
4485 emit_jump_insn (temp);
4486
4487 s390_load_address (addr0,
4488 gen_rtx_PLUS (Pmode, addr0, GEN_INT (256)));
4489 s390_load_address (addr1,
4490 gen_rtx_PLUS (Pmode, addr1, GEN_INT (256)));
4491
4492 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
4493 OPTAB_DIRECT);
4494 if (temp != blocks)
4495 emit_move_insn (blocks, temp);
4496
4497 emit_cmp_and_jump_insns (blocks, const0_rtx,
4498 EQ, NULL_RTX, mode, 1, loop_end_label);
4499
4500 emit_jump (loop_start_label);
4501 emit_label (loop_end_label);
4502
4503 emit_insn (gen_cmpmem_short (op0, op1,
4504 convert_to_mode (Pmode, count, 1)));
4505 emit_label (end_label);
4506
4507 emit_insn (gen_cmpint (target, ccreg));
4508 }
4509 return true;
4510 }
4511
4512
4513 /* Expand conditional increment or decrement using alc/slb instructions.
4514 Should generate code setting DST to either SRC or SRC + INCREMENT,
4515 depending on the result of the comparison CMP_OP0 CMP_CODE CMP_OP1.
4516 Returns true if successful, false otherwise.
4517
4518 That makes it possible to implement some if-constructs without jumps e.g.:
4519 (borrow = CC0 | CC1 and carry = CC2 | CC3)
4520 unsigned int a, b, c;
4521 if (a < b) c++; -> CCU b > a -> CC2; c += carry;
4522 if (a < b) c--; -> CCL3 a - b -> borrow; c -= borrow;
4523 if (a <= b) c++; -> CCL3 b - a -> borrow; c += carry;
4524 if (a <= b) c--; -> CCU a <= b -> borrow; c -= borrow;
4525
4526 Checks for EQ and NE with a nonzero value need an additional xor e.g.:
4527 if (a == b) c++; -> CCL3 a ^= b; 0 - a -> borrow; c += carry;
4528 if (a == b) c--; -> CCU a ^= b; a <= 0 -> CC0 | CC1; c -= borrow;
4529 if (a != b) c++; -> CCU a ^= b; a > 0 -> CC2; c += carry;
4530 if (a != b) c--; -> CCL3 a ^= b; 0 - a -> borrow; c -= borrow; */
4531
4532 bool
4533 s390_expand_addcc (enum rtx_code cmp_code, rtx cmp_op0, rtx cmp_op1,
4534 rtx dst, rtx src, rtx increment)
4535 {
4536 enum machine_mode cmp_mode;
4537 enum machine_mode cc_mode;
4538 rtx op_res;
4539 rtx insn;
4540 rtvec p;
4541 int ret;
4542
4543 if ((GET_MODE (cmp_op0) == SImode || GET_MODE (cmp_op0) == VOIDmode)
4544 && (GET_MODE (cmp_op1) == SImode || GET_MODE (cmp_op1) == VOIDmode))
4545 cmp_mode = SImode;
4546 else if ((GET_MODE (cmp_op0) == DImode || GET_MODE (cmp_op0) == VOIDmode)
4547 && (GET_MODE (cmp_op1) == DImode || GET_MODE (cmp_op1) == VOIDmode))
4548 cmp_mode = DImode;
4549 else
4550 return false;
4551
4552 /* Try ADD LOGICAL WITH CARRY. */
4553 if (increment == const1_rtx)
4554 {
4555 /* Determine CC mode to use. */
4556 if (cmp_code == EQ || cmp_code == NE)
4557 {
4558 if (cmp_op1 != const0_rtx)
4559 {
4560 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
4561 NULL_RTX, 0, OPTAB_WIDEN);
4562 cmp_op1 = const0_rtx;
4563 }
4564
4565 cmp_code = cmp_code == EQ ? LEU : GTU;
4566 }
4567
4568 if (cmp_code == LTU || cmp_code == LEU)
4569 {
4570 rtx tem = cmp_op0;
4571 cmp_op0 = cmp_op1;
4572 cmp_op1 = tem;
4573 cmp_code = swap_condition (cmp_code);
4574 }
4575
4576 switch (cmp_code)
4577 {
4578 case GTU:
4579 cc_mode = CCUmode;
4580 break;
4581
4582 case GEU:
4583 cc_mode = CCL3mode;
4584 break;
4585
4586 default:
4587 return false;
4588 }
4589
4590 /* Emit comparison instruction pattern. */
4591 if (!register_operand (cmp_op0, cmp_mode))
4592 cmp_op0 = force_reg (cmp_mode, cmp_op0);
4593
4594 insn = gen_rtx_SET (VOIDmode, gen_rtx_REG (cc_mode, CC_REGNUM),
4595 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
4596 /* We use insn_invalid_p here to add clobbers if required. */
4597 ret = insn_invalid_p (emit_insn (insn), false);
4598 gcc_assert (!ret);
4599
4600 /* Emit ALC instruction pattern. */
4601 op_res = gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
4602 gen_rtx_REG (cc_mode, CC_REGNUM),
4603 const0_rtx);
4604
4605 if (src != const0_rtx)
4606 {
4607 if (!register_operand (src, GET_MODE (dst)))
4608 src = force_reg (GET_MODE (dst), src);
4609
4610 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, src);
4611 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, const0_rtx);
4612 }
4613
4614 p = rtvec_alloc (2);
4615 RTVEC_ELT (p, 0) =
4616 gen_rtx_SET (VOIDmode, dst, op_res);
4617 RTVEC_ELT (p, 1) =
4618 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
4619 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
4620
4621 return true;
4622 }
4623
4624 /* Try SUBTRACT LOGICAL WITH BORROW. */
4625 if (increment == constm1_rtx)
4626 {
4627 /* Determine CC mode to use. */
4628 if (cmp_code == EQ || cmp_code == NE)
4629 {
4630 if (cmp_op1 != const0_rtx)
4631 {
4632 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
4633 NULL_RTX, 0, OPTAB_WIDEN);
4634 cmp_op1 = const0_rtx;
4635 }
4636
4637 cmp_code = cmp_code == EQ ? LEU : GTU;
4638 }
4639
4640 if (cmp_code == GTU || cmp_code == GEU)
4641 {
4642 rtx tem = cmp_op0;
4643 cmp_op0 = cmp_op1;
4644 cmp_op1 = tem;
4645 cmp_code = swap_condition (cmp_code);
4646 }
4647
4648 switch (cmp_code)
4649 {
4650 case LEU:
4651 cc_mode = CCUmode;
4652 break;
4653
4654 case LTU:
4655 cc_mode = CCL3mode;
4656 break;
4657
4658 default:
4659 return false;
4660 }
4661
4662 /* Emit comparison instruction pattern. */
4663 if (!register_operand (cmp_op0, cmp_mode))
4664 cmp_op0 = force_reg (cmp_mode, cmp_op0);
4665
4666 insn = gen_rtx_SET (VOIDmode, gen_rtx_REG (cc_mode, CC_REGNUM),
4667 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
4668 /* We use insn_invalid_p here to add clobbers if required. */
4669 ret = insn_invalid_p (emit_insn (insn), false);
4670 gcc_assert (!ret);
4671
4672 /* Emit SLB instruction pattern. */
4673 if (!register_operand (src, GET_MODE (dst)))
4674 src = force_reg (GET_MODE (dst), src);
4675
4676 op_res = gen_rtx_MINUS (GET_MODE (dst),
4677 gen_rtx_MINUS (GET_MODE (dst), src, const0_rtx),
4678 gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
4679 gen_rtx_REG (cc_mode, CC_REGNUM),
4680 const0_rtx));
4681 p = rtvec_alloc (2);
4682 RTVEC_ELT (p, 0) =
4683 gen_rtx_SET (VOIDmode, dst, op_res);
4684 RTVEC_ELT (p, 1) =
4685 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
4686 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
4687
4688 return true;
4689 }
4690
4691 return false;
4692 }
4693
4694 /* Expand code for the insv template. Return true if successful. */
4695
4696 bool
4697 s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src)
4698 {
4699 int bitsize = INTVAL (op1);
4700 int bitpos = INTVAL (op2);
4701 enum machine_mode mode = GET_MODE (dest);
4702 enum machine_mode smode;
4703 int smode_bsize, mode_bsize;
4704 rtx op, clobber;
4705
4706 if (bitsize + bitpos > GET_MODE_SIZE (mode))
4707 return false;
4708
4709 /* Generate INSERT IMMEDIATE (IILL et al). */
4710 /* (set (ze (reg)) (const_int)). */
4711 if (TARGET_ZARCH
4712 && register_operand (dest, word_mode)
4713 && (bitpos % 16) == 0
4714 && (bitsize % 16) == 0
4715 && const_int_operand (src, VOIDmode))
4716 {
4717 HOST_WIDE_INT val = INTVAL (src);
4718 int regpos = bitpos + bitsize;
4719
4720 while (regpos > bitpos)
4721 {
4722 enum machine_mode putmode;
4723 int putsize;
4724
4725 if (TARGET_EXTIMM && (regpos % 32 == 0) && (regpos >= bitpos + 32))
4726 putmode = SImode;
4727 else
4728 putmode = HImode;
4729
4730 putsize = GET_MODE_BITSIZE (putmode);
4731 regpos -= putsize;
4732 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
4733 GEN_INT (putsize),
4734 GEN_INT (regpos)),
4735 gen_int_mode (val, putmode));
4736 val >>= putsize;
4737 }
4738 gcc_assert (regpos == bitpos);
4739 return true;
4740 }
4741
4742 smode = smallest_mode_for_size (bitsize, MODE_INT);
4743 smode_bsize = GET_MODE_BITSIZE (smode);
4744 mode_bsize = GET_MODE_BITSIZE (mode);
4745
4746 /* Generate STORE CHARACTERS UNDER MASK (STCM et al). */
4747 if (bitpos == 0
4748 && (bitsize % BITS_PER_UNIT) == 0
4749 && MEM_P (dest)
4750 && (register_operand (src, word_mode)
4751 || const_int_operand (src, VOIDmode)))
4752 {
4753 /* Emit standard pattern if possible. */
4754 if (smode_bsize == bitsize)
4755 {
4756 emit_move_insn (adjust_address (dest, smode, 0),
4757 gen_lowpart (smode, src));
4758 return true;
4759 }
4760
4761 /* (set (ze (mem)) (const_int)). */
4762 else if (const_int_operand (src, VOIDmode))
4763 {
4764 int size = bitsize / BITS_PER_UNIT;
4765 rtx src_mem = adjust_address (force_const_mem (word_mode, src),
4766 BLKmode,
4767 UNITS_PER_WORD - size);
4768
4769 dest = adjust_address (dest, BLKmode, 0);
4770 set_mem_size (dest, size);
4771 s390_expand_movmem (dest, src_mem, GEN_INT (size));
4772 return true;
4773 }
4774
4775 /* (set (ze (mem)) (reg)). */
4776 else if (register_operand (src, word_mode))
4777 {
4778 if (bitsize <= 32)
4779 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest, op1,
4780 const0_rtx), src);
4781 else
4782 {
4783 /* Emit st,stcmh sequence. */
4784 int stcmh_width = bitsize - 32;
4785 int size = stcmh_width / BITS_PER_UNIT;
4786
4787 emit_move_insn (adjust_address (dest, SImode, size),
4788 gen_lowpart (SImode, src));
4789 set_mem_size (dest, size);
4790 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
4791 GEN_INT (stcmh_width),
4792 const0_rtx),
4793 gen_rtx_LSHIFTRT (word_mode, src, GEN_INT (32)));
4794 }
4795 return true;
4796 }
4797 }
4798
4799 /* Generate INSERT CHARACTERS UNDER MASK (IC, ICM et al). */
4800 if ((bitpos % BITS_PER_UNIT) == 0
4801 && (bitsize % BITS_PER_UNIT) == 0
4802 && (bitpos & 32) == ((bitpos + bitsize - 1) & 32)
4803 && MEM_P (src)
4804 && (mode == DImode || mode == SImode)
4805 && register_operand (dest, mode))
4806 {
4807 /* Emit a strict_low_part pattern if possible. */
4808 if (smode_bsize == bitsize && bitpos == mode_bsize - smode_bsize)
4809 {
4810 op = gen_rtx_STRICT_LOW_PART (VOIDmode, gen_lowpart (smode, dest));
4811 op = gen_rtx_SET (VOIDmode, op, gen_lowpart (smode, src));
4812 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
4813 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber)));
4814 return true;
4815 }
4816
4817 /* ??? There are more powerful versions of ICM that are not
4818 completely represented in the md file. */
4819 }
4820
4821 /* For z10, generate ROTATE THEN INSERT SELECTED BITS (RISBG et al). */
4822 if (TARGET_Z10 && (mode == DImode || mode == SImode))
4823 {
4824 enum machine_mode mode_s = GET_MODE (src);
4825
4826 if (mode_s == VOIDmode)
4827 {
4828 /* Assume const_int etc already in the proper mode. */
4829 src = force_reg (mode, src);
4830 }
4831 else if (mode_s != mode)
4832 {
4833 gcc_assert (GET_MODE_BITSIZE (mode_s) >= bitsize);
4834 src = force_reg (mode_s, src);
4835 src = gen_lowpart (mode, src);
4836 }
4837
4838 op = gen_rtx_ZERO_EXTRACT (mode, dest, op1, op2),
4839 op = gen_rtx_SET (VOIDmode, op, src);
4840
4841 if (!TARGET_ZEC12)
4842 {
4843 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
4844 op = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber));
4845 }
4846 emit_insn (op);
4847
4848 return true;
4849 }
4850
4851 return false;
4852 }
4853
4854 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic which returns a
4855 register that holds VAL of mode MODE shifted by COUNT bits. */
4856
4857 static inline rtx
4858 s390_expand_mask_and_shift (rtx val, enum machine_mode mode, rtx count)
4859 {
4860 val = expand_simple_binop (SImode, AND, val, GEN_INT (GET_MODE_MASK (mode)),
4861 NULL_RTX, 1, OPTAB_DIRECT);
4862 return expand_simple_binop (SImode, ASHIFT, val, count,
4863 NULL_RTX, 1, OPTAB_DIRECT);
4864 }
4865
4866 /* Structure to hold the initial parameters for a compare_and_swap operation
4867 in HImode and QImode. */
4868
4869 struct alignment_context
4870 {
4871 rtx memsi; /* SI aligned memory location. */
4872 rtx shift; /* Bit offset with regard to lsb. */
4873 rtx modemask; /* Mask of the HQImode shifted by SHIFT bits. */
4874 rtx modemaski; /* ~modemask */
4875 bool aligned; /* True if memory is aligned, false else. */
4876 };
4877
4878 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic to initialize
4879 structure AC for transparent simplifying, if the memory alignment is known
4880 to be at least 32bit. MEM is the memory location for the actual operation
4881 and MODE its mode. */
4882
4883 static void
4884 init_alignment_context (struct alignment_context *ac, rtx mem,
4885 enum machine_mode mode)
4886 {
4887 ac->shift = GEN_INT (GET_MODE_SIZE (SImode) - GET_MODE_SIZE (mode));
4888 ac->aligned = (MEM_ALIGN (mem) >= GET_MODE_BITSIZE (SImode));
4889
4890 if (ac->aligned)
4891 ac->memsi = adjust_address (mem, SImode, 0); /* Memory is aligned. */
4892 else
4893 {
4894 /* Alignment is unknown. */
4895 rtx byteoffset, addr, align;
4896
4897 /* Force the address into a register. */
4898 addr = force_reg (Pmode, XEXP (mem, 0));
4899
4900 /* Align it to SImode. */
4901 align = expand_simple_binop (Pmode, AND, addr,
4902 GEN_INT (-GET_MODE_SIZE (SImode)),
4903 NULL_RTX, 1, OPTAB_DIRECT);
4904 /* Generate MEM. */
4905 ac->memsi = gen_rtx_MEM (SImode, align);
4906 MEM_VOLATILE_P (ac->memsi) = MEM_VOLATILE_P (mem);
4907 set_mem_alias_set (ac->memsi, ALIAS_SET_MEMORY_BARRIER);
4908 set_mem_align (ac->memsi, GET_MODE_BITSIZE (SImode));
4909
4910 /* Calculate shiftcount. */
4911 byteoffset = expand_simple_binop (Pmode, AND, addr,
4912 GEN_INT (GET_MODE_SIZE (SImode) - 1),
4913 NULL_RTX, 1, OPTAB_DIRECT);
4914 /* As we already have some offset, evaluate the remaining distance. */
4915 ac->shift = expand_simple_binop (SImode, MINUS, ac->shift, byteoffset,
4916 NULL_RTX, 1, OPTAB_DIRECT);
4917 }
4918
4919 /* Shift is the byte count, but we need the bitcount. */
4920 ac->shift = expand_simple_binop (SImode, ASHIFT, ac->shift, GEN_INT (3),
4921 NULL_RTX, 1, OPTAB_DIRECT);
4922
4923 /* Calculate masks. */
4924 ac->modemask = expand_simple_binop (SImode, ASHIFT,
4925 GEN_INT (GET_MODE_MASK (mode)),
4926 ac->shift, NULL_RTX, 1, OPTAB_DIRECT);
4927 ac->modemaski = expand_simple_unop (SImode, NOT, ac->modemask,
4928 NULL_RTX, 1);
4929 }
4930
4931 /* A subroutine of s390_expand_cs_hqi. Insert INS into VAL. If possible,
4932 use a single insv insn into SEQ2. Otherwise, put prep insns in SEQ1 and
4933 perform the merge in SEQ2. */
4934
4935 static rtx
4936 s390_two_part_insv (struct alignment_context *ac, rtx *seq1, rtx *seq2,
4937 enum machine_mode mode, rtx val, rtx ins)
4938 {
4939 rtx tmp;
4940
4941 if (ac->aligned)
4942 {
4943 start_sequence ();
4944 tmp = copy_to_mode_reg (SImode, val);
4945 if (s390_expand_insv (tmp, GEN_INT (GET_MODE_BITSIZE (mode)),
4946 const0_rtx, ins))
4947 {
4948 *seq1 = NULL;
4949 *seq2 = get_insns ();
4950 end_sequence ();
4951 return tmp;
4952 }
4953 end_sequence ();
4954 }
4955
4956 /* Failed to use insv. Generate a two part shift and mask. */
4957 start_sequence ();
4958 tmp = s390_expand_mask_and_shift (ins, mode, ac->shift);
4959 *seq1 = get_insns ();
4960 end_sequence ();
4961
4962 start_sequence ();
4963 tmp = expand_simple_binop (SImode, IOR, tmp, val, NULL_RTX, 1, OPTAB_DIRECT);
4964 *seq2 = get_insns ();
4965 end_sequence ();
4966
4967 return tmp;
4968 }
4969
4970 /* Expand an atomic compare and swap operation for HImode and QImode. MEM is
4971 the memory location, CMP the old value to compare MEM with and NEW_RTX the
4972 value to set if CMP == MEM. */
4973
4974 void
4975 s390_expand_cs_hqi (enum machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
4976 rtx cmp, rtx new_rtx, bool is_weak)
4977 {
4978 struct alignment_context ac;
4979 rtx cmpv, newv, val, cc, seq0, seq1, seq2, seq3;
4980 rtx res = gen_reg_rtx (SImode);
4981 rtx csloop = NULL, csend = NULL;
4982
4983 gcc_assert (MEM_P (mem));
4984
4985 init_alignment_context (&ac, mem, mode);
4986
4987 /* Load full word. Subsequent loads are performed by CS. */
4988 val = expand_simple_binop (SImode, AND, ac.memsi, ac.modemaski,
4989 NULL_RTX, 1, OPTAB_DIRECT);
4990
4991 /* Prepare insertions of cmp and new_rtx into the loaded value. When
4992 possible, we try to use insv to make this happen efficiently. If
4993 that fails we'll generate code both inside and outside the loop. */
4994 cmpv = s390_two_part_insv (&ac, &seq0, &seq2, mode, val, cmp);
4995 newv = s390_two_part_insv (&ac, &seq1, &seq3, mode, val, new_rtx);
4996
4997 if (seq0)
4998 emit_insn (seq0);
4999 if (seq1)
5000 emit_insn (seq1);
5001
5002 /* Start CS loop. */
5003 if (!is_weak)
5004 {
5005 /* Begin assuming success. */
5006 emit_move_insn (btarget, const1_rtx);
5007
5008 csloop = gen_label_rtx ();
5009 csend = gen_label_rtx ();
5010 emit_label (csloop);
5011 }
5012
5013 /* val = "<mem>00..0<mem>"
5014 * cmp = "00..0<cmp>00..0"
5015 * new = "00..0<new>00..0"
5016 */
5017
5018 emit_insn (seq2);
5019 emit_insn (seq3);
5020
5021 cc = s390_emit_compare_and_swap (EQ, res, ac.memsi, cmpv, newv);
5022 if (is_weak)
5023 emit_insn (gen_cstorecc4 (btarget, cc, XEXP (cc, 0), XEXP (cc, 1)));
5024 else
5025 {
5026 rtx tmp;
5027
5028 /* Jump to end if we're done (likely?). */
5029 s390_emit_jump (csend, cc);
5030
5031 /* Check for changes outside mode, and loop internal if so.
5032 Arrange the moves so that the compare is adjacent to the
5033 branch so that we can generate CRJ. */
5034 tmp = copy_to_reg (val);
5035 force_expand_binop (SImode, and_optab, res, ac.modemaski, val,
5036 1, OPTAB_DIRECT);
5037 cc = s390_emit_compare (NE, val, tmp);
5038 s390_emit_jump (csloop, cc);
5039
5040 /* Failed. */
5041 emit_move_insn (btarget, const0_rtx);
5042 emit_label (csend);
5043 }
5044
5045 /* Return the correct part of the bitfield. */
5046 convert_move (vtarget, expand_simple_binop (SImode, LSHIFTRT, res, ac.shift,
5047 NULL_RTX, 1, OPTAB_DIRECT), 1);
5048 }
5049
5050 /* Expand an atomic operation CODE of mode MODE. MEM is the memory location
5051 and VAL the value to play with. If AFTER is true then store the value
5052 MEM holds after the operation, if AFTER is false then store the value MEM
5053 holds before the operation. If TARGET is zero then discard that value, else
5054 store it to TARGET. */
5055
5056 void
5057 s390_expand_atomic (enum machine_mode mode, enum rtx_code code,
5058 rtx target, rtx mem, rtx val, bool after)
5059 {
5060 struct alignment_context ac;
5061 rtx cmp;
5062 rtx new_rtx = gen_reg_rtx (SImode);
5063 rtx orig = gen_reg_rtx (SImode);
5064 rtx csloop = gen_label_rtx ();
5065
5066 gcc_assert (!target || register_operand (target, VOIDmode));
5067 gcc_assert (MEM_P (mem));
5068
5069 init_alignment_context (&ac, mem, mode);
5070
5071 /* Shift val to the correct bit positions.
5072 Preserve "icm", but prevent "ex icm". */
5073 if (!(ac.aligned && code == SET && MEM_P (val)))
5074 val = s390_expand_mask_and_shift (val, mode, ac.shift);
5075
5076 /* Further preparation insns. */
5077 if (code == PLUS || code == MINUS)
5078 emit_move_insn (orig, val);
5079 else if (code == MULT || code == AND) /* val = "11..1<val>11..1" */
5080 val = expand_simple_binop (SImode, XOR, val, ac.modemaski,
5081 NULL_RTX, 1, OPTAB_DIRECT);
5082
5083 /* Load full word. Subsequent loads are performed by CS. */
5084 cmp = force_reg (SImode, ac.memsi);
5085
5086 /* Start CS loop. */
5087 emit_label (csloop);
5088 emit_move_insn (new_rtx, cmp);
5089
5090 /* Patch new with val at correct position. */
5091 switch (code)
5092 {
5093 case PLUS:
5094 case MINUS:
5095 val = expand_simple_binop (SImode, code, new_rtx, orig,
5096 NULL_RTX, 1, OPTAB_DIRECT);
5097 val = expand_simple_binop (SImode, AND, val, ac.modemask,
5098 NULL_RTX, 1, OPTAB_DIRECT);
5099 /* FALLTHRU */
5100 case SET:
5101 if (ac.aligned && MEM_P (val))
5102 store_bit_field (new_rtx, GET_MODE_BITSIZE (mode), 0,
5103 0, 0, SImode, val);
5104 else
5105 {
5106 new_rtx = expand_simple_binop (SImode, AND, new_rtx, ac.modemaski,
5107 NULL_RTX, 1, OPTAB_DIRECT);
5108 new_rtx = expand_simple_binop (SImode, IOR, new_rtx, val,
5109 NULL_RTX, 1, OPTAB_DIRECT);
5110 }
5111 break;
5112 case AND:
5113 case IOR:
5114 case XOR:
5115 new_rtx = expand_simple_binop (SImode, code, new_rtx, val,
5116 NULL_RTX, 1, OPTAB_DIRECT);
5117 break;
5118 case MULT: /* NAND */
5119 new_rtx = expand_simple_binop (SImode, AND, new_rtx, val,
5120 NULL_RTX, 1, OPTAB_DIRECT);
5121 new_rtx = expand_simple_binop (SImode, XOR, new_rtx, ac.modemask,
5122 NULL_RTX, 1, OPTAB_DIRECT);
5123 break;
5124 default:
5125 gcc_unreachable ();
5126 }
5127
5128 s390_emit_jump (csloop, s390_emit_compare_and_swap (NE, cmp,
5129 ac.memsi, cmp, new_rtx));
5130
5131 /* Return the correct part of the bitfield. */
5132 if (target)
5133 convert_move (target, expand_simple_binop (SImode, LSHIFTRT,
5134 after ? new_rtx : cmp, ac.shift,
5135 NULL_RTX, 1, OPTAB_DIRECT), 1);
5136 }
5137
5138 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
5139 We need to emit DTP-relative relocations. */
5140
5141 static void s390_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
5142
5143 static void
5144 s390_output_dwarf_dtprel (FILE *file, int size, rtx x)
5145 {
5146 switch (size)
5147 {
5148 case 4:
5149 fputs ("\t.long\t", file);
5150 break;
5151 case 8:
5152 fputs ("\t.quad\t", file);
5153 break;
5154 default:
5155 gcc_unreachable ();
5156 }
5157 output_addr_const (file, x);
5158 fputs ("@DTPOFF", file);
5159 }
5160
5161 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
5162 /* Implement TARGET_MANGLE_TYPE. */
5163
5164 static const char *
5165 s390_mangle_type (const_tree type)
5166 {
5167 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
5168 && TARGET_LONG_DOUBLE_128)
5169 return "g";
5170
5171 /* For all other types, use normal C++ mangling. */
5172 return NULL;
5173 }
5174 #endif
5175
5176 /* In the name of slightly smaller debug output, and to cater to
5177 general assembler lossage, recognize various UNSPEC sequences
5178 and turn them back into a direct symbol reference. */
5179
5180 static rtx
5181 s390_delegitimize_address (rtx orig_x)
5182 {
5183 rtx x, y;
5184
5185 orig_x = delegitimize_mem_from_attrs (orig_x);
5186 x = orig_x;
5187
5188 /* Extract the symbol ref from:
5189 (plus:SI (reg:SI 12 %r12)
5190 (const:SI (unspec:SI [(symbol_ref/f:SI ("*.LC0"))]
5191 UNSPEC_GOTOFF/PLTOFF)))
5192 and
5193 (plus:SI (reg:SI 12 %r12)
5194 (const:SI (plus:SI (unspec:SI [(symbol_ref:SI ("L"))]
5195 UNSPEC_GOTOFF/PLTOFF)
5196 (const_int 4 [0x4])))) */
5197 if (GET_CODE (x) == PLUS
5198 && REG_P (XEXP (x, 0))
5199 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
5200 && GET_CODE (XEXP (x, 1)) == CONST)
5201 {
5202 HOST_WIDE_INT offset = 0;
5203
5204 /* The const operand. */
5205 y = XEXP (XEXP (x, 1), 0);
5206
5207 if (GET_CODE (y) == PLUS
5208 && GET_CODE (XEXP (y, 1)) == CONST_INT)
5209 {
5210 offset = INTVAL (XEXP (y, 1));
5211 y = XEXP (y, 0);
5212 }
5213
5214 if (GET_CODE (y) == UNSPEC
5215 && (XINT (y, 1) == UNSPEC_GOTOFF
5216 || XINT (y, 1) == UNSPEC_PLTOFF))
5217 return plus_constant (Pmode, XVECEXP (y, 0, 0), offset);
5218 }
5219
5220 if (GET_CODE (x) != MEM)
5221 return orig_x;
5222
5223 x = XEXP (x, 0);
5224 if (GET_CODE (x) == PLUS
5225 && GET_CODE (XEXP (x, 1)) == CONST
5226 && GET_CODE (XEXP (x, 0)) == REG
5227 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
5228 {
5229 y = XEXP (XEXP (x, 1), 0);
5230 if (GET_CODE (y) == UNSPEC
5231 && XINT (y, 1) == UNSPEC_GOT)
5232 y = XVECEXP (y, 0, 0);
5233 else
5234 return orig_x;
5235 }
5236 else if (GET_CODE (x) == CONST)
5237 {
5238 /* Extract the symbol ref from:
5239 (mem:QI (const:DI (unspec:DI [(symbol_ref:DI ("foo"))]
5240 UNSPEC_PLT/GOTENT))) */
5241
5242 y = XEXP (x, 0);
5243 if (GET_CODE (y) == UNSPEC
5244 && (XINT (y, 1) == UNSPEC_GOTENT
5245 || XINT (y, 1) == UNSPEC_PLT))
5246 y = XVECEXP (y, 0, 0);
5247 else
5248 return orig_x;
5249 }
5250 else
5251 return orig_x;
5252
5253 if (GET_MODE (orig_x) != Pmode)
5254 {
5255 if (GET_MODE (orig_x) == BLKmode)
5256 return orig_x;
5257 y = lowpart_subreg (GET_MODE (orig_x), y, Pmode);
5258 if (y == NULL_RTX)
5259 return orig_x;
5260 }
5261 return y;
5262 }
5263
5264 /* Output operand OP to stdio stream FILE.
5265 OP is an address (register + offset) which is not used to address data;
5266 instead the rightmost bits are interpreted as the value. */
5267
5268 static void
5269 print_shift_count_operand (FILE *file, rtx op)
5270 {
5271 HOST_WIDE_INT offset;
5272 rtx base;
5273
5274 /* Extract base register and offset. */
5275 if (!s390_decompose_shift_count (op, &base, &offset))
5276 gcc_unreachable ();
5277
5278 /* Sanity check. */
5279 if (base)
5280 {
5281 gcc_assert (GET_CODE (base) == REG);
5282 gcc_assert (REGNO (base) < FIRST_PSEUDO_REGISTER);
5283 gcc_assert (REGNO_REG_CLASS (REGNO (base)) == ADDR_REGS);
5284 }
5285
5286 /* Offsets are constricted to twelve bits. */
5287 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset & ((1 << 12) - 1));
5288 if (base)
5289 fprintf (file, "(%s)", reg_names[REGNO (base)]);
5290 }
5291
5292 /* See 'get_some_local_dynamic_name'. */
5293
5294 static int
5295 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
5296 {
5297 rtx x = *px;
5298
5299 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
5300 {
5301 x = get_pool_constant (x);
5302 return for_each_rtx (&x, get_some_local_dynamic_name_1, 0);
5303 }
5304
5305 if (GET_CODE (x) == SYMBOL_REF
5306 && tls_symbolic_operand (x) == TLS_MODEL_LOCAL_DYNAMIC)
5307 {
5308 cfun->machine->some_ld_name = XSTR (x, 0);
5309 return 1;
5310 }
5311
5312 return 0;
5313 }
5314
5315 /* Locate some local-dynamic symbol still in use by this function
5316 so that we can print its name in local-dynamic base patterns. */
5317
5318 static const char *
5319 get_some_local_dynamic_name (void)
5320 {
5321 rtx insn;
5322
5323 if (cfun->machine->some_ld_name)
5324 return cfun->machine->some_ld_name;
5325
5326 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
5327 if (INSN_P (insn)
5328 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
5329 return cfun->machine->some_ld_name;
5330
5331 gcc_unreachable ();
5332 }
5333
5334 /* Output machine-dependent UNSPECs occurring in address constant X
5335 in assembler syntax to stdio stream FILE. Returns true if the
5336 constant X could be recognized, false otherwise. */
5337
5338 static bool
5339 s390_output_addr_const_extra (FILE *file, rtx x)
5340 {
5341 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 1)
5342 switch (XINT (x, 1))
5343 {
5344 case UNSPEC_GOTENT:
5345 output_addr_const (file, XVECEXP (x, 0, 0));
5346 fprintf (file, "@GOTENT");
5347 return true;
5348 case UNSPEC_GOT:
5349 output_addr_const (file, XVECEXP (x, 0, 0));
5350 fprintf (file, "@GOT");
5351 return true;
5352 case UNSPEC_GOTOFF:
5353 output_addr_const (file, XVECEXP (x, 0, 0));
5354 fprintf (file, "@GOTOFF");
5355 return true;
5356 case UNSPEC_PLT:
5357 output_addr_const (file, XVECEXP (x, 0, 0));
5358 fprintf (file, "@PLT");
5359 return true;
5360 case UNSPEC_PLTOFF:
5361 output_addr_const (file, XVECEXP (x, 0, 0));
5362 fprintf (file, "@PLTOFF");
5363 return true;
5364 case UNSPEC_TLSGD:
5365 output_addr_const (file, XVECEXP (x, 0, 0));
5366 fprintf (file, "@TLSGD");
5367 return true;
5368 case UNSPEC_TLSLDM:
5369 assemble_name (file, get_some_local_dynamic_name ());
5370 fprintf (file, "@TLSLDM");
5371 return true;
5372 case UNSPEC_DTPOFF:
5373 output_addr_const (file, XVECEXP (x, 0, 0));
5374 fprintf (file, "@DTPOFF");
5375 return true;
5376 case UNSPEC_NTPOFF:
5377 output_addr_const (file, XVECEXP (x, 0, 0));
5378 fprintf (file, "@NTPOFF");
5379 return true;
5380 case UNSPEC_GOTNTPOFF:
5381 output_addr_const (file, XVECEXP (x, 0, 0));
5382 fprintf (file, "@GOTNTPOFF");
5383 return true;
5384 case UNSPEC_INDNTPOFF:
5385 output_addr_const (file, XVECEXP (x, 0, 0));
5386 fprintf (file, "@INDNTPOFF");
5387 return true;
5388 }
5389
5390 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 2)
5391 switch (XINT (x, 1))
5392 {
5393 case UNSPEC_POOL_OFFSET:
5394 x = gen_rtx_MINUS (GET_MODE (x), XVECEXP (x, 0, 0), XVECEXP (x, 0, 1));
5395 output_addr_const (file, x);
5396 return true;
5397 }
5398 return false;
5399 }
5400
5401 /* Output address operand ADDR in assembler syntax to
5402 stdio stream FILE. */
5403
5404 void
5405 print_operand_address (FILE *file, rtx addr)
5406 {
5407 struct s390_address ad;
5408
5409 if (s390_loadrelative_operand_p (addr, NULL, NULL))
5410 {
5411 if (!TARGET_Z10)
5412 {
5413 output_operand_lossage ("symbolic memory references are "
5414 "only supported on z10 or later");
5415 return;
5416 }
5417 output_addr_const (file, addr);
5418 return;
5419 }
5420
5421 if (!s390_decompose_address (addr, &ad)
5422 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
5423 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
5424 output_operand_lossage ("cannot decompose address");
5425
5426 if (ad.disp)
5427 output_addr_const (file, ad.disp);
5428 else
5429 fprintf (file, "0");
5430
5431 if (ad.base && ad.indx)
5432 fprintf (file, "(%s,%s)", reg_names[REGNO (ad.indx)],
5433 reg_names[REGNO (ad.base)]);
5434 else if (ad.base)
5435 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
5436 }
5437
5438 /* Output operand X in assembler syntax to stdio stream FILE.
5439 CODE specified the format flag. The following format flags
5440 are recognized:
5441
5442 'C': print opcode suffix for branch condition.
5443 'D': print opcode suffix for inverse branch condition.
5444 'E': print opcode suffix for branch on index instruction.
5445 'G': print the size of the operand in bytes.
5446 'J': print tls_load/tls_gdcall/tls_ldcall suffix
5447 'M': print the second word of a TImode operand.
5448 'N': print the second word of a DImode operand.
5449 'O': print only the displacement of a memory reference.
5450 'R': print only the base register of a memory reference.
5451 'S': print S-type memory reference (base+displacement).
5452 'Y': print shift count operand.
5453
5454 'b': print integer X as if it's an unsigned byte.
5455 'c': print integer X as if it's an signed byte.
5456 'e': "end" of DImode contiguous bitmask X.
5457 'f': "end" of SImode contiguous bitmask X.
5458 'h': print integer X as if it's a signed halfword.
5459 'i': print the first nonzero HImode part of X.
5460 'j': print the first HImode part unequal to -1 of X.
5461 'k': print the first nonzero SImode part of X.
5462 'm': print the first SImode part unequal to -1 of X.
5463 'o': print integer X as if it's an unsigned 32bit word.
5464 's': "start" of DImode contiguous bitmask X.
5465 't': "start" of SImode contiguous bitmask X.
5466 'x': print integer X as if it's an unsigned halfword.
5467 */
5468
5469 void
5470 print_operand (FILE *file, rtx x, int code)
5471 {
5472 HOST_WIDE_INT ival;
5473
5474 switch (code)
5475 {
5476 case 'C':
5477 fprintf (file, s390_branch_condition_mnemonic (x, FALSE));
5478 return;
5479
5480 case 'D':
5481 fprintf (file, s390_branch_condition_mnemonic (x, TRUE));
5482 return;
5483
5484 case 'E':
5485 if (GET_CODE (x) == LE)
5486 fprintf (file, "l");
5487 else if (GET_CODE (x) == GT)
5488 fprintf (file, "h");
5489 else
5490 output_operand_lossage ("invalid comparison operator "
5491 "for 'E' output modifier");
5492 return;
5493
5494 case 'J':
5495 if (GET_CODE (x) == SYMBOL_REF)
5496 {
5497 fprintf (file, "%s", ":tls_load:");
5498 output_addr_const (file, x);
5499 }
5500 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
5501 {
5502 fprintf (file, "%s", ":tls_gdcall:");
5503 output_addr_const (file, XVECEXP (x, 0, 0));
5504 }
5505 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM)
5506 {
5507 fprintf (file, "%s", ":tls_ldcall:");
5508 assemble_name (file, get_some_local_dynamic_name ());
5509 }
5510 else
5511 output_operand_lossage ("invalid reference for 'J' output modifier");
5512 return;
5513
5514 case 'G':
5515 fprintf (file, "%u", GET_MODE_SIZE (GET_MODE (x)));
5516 return;
5517
5518 case 'O':
5519 {
5520 struct s390_address ad;
5521 int ret;
5522
5523 if (!MEM_P (x))
5524 {
5525 output_operand_lossage ("memory reference expected for "
5526 "'O' output modifier");
5527 return;
5528 }
5529
5530 ret = s390_decompose_address (XEXP (x, 0), &ad);
5531
5532 if (!ret
5533 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
5534 || ad.indx)
5535 {
5536 output_operand_lossage ("invalid address for 'O' output modifier");
5537 return;
5538 }
5539
5540 if (ad.disp)
5541 output_addr_const (file, ad.disp);
5542 else
5543 fprintf (file, "0");
5544 }
5545 return;
5546
5547 case 'R':
5548 {
5549 struct s390_address ad;
5550 int ret;
5551
5552 if (!MEM_P (x))
5553 {
5554 output_operand_lossage ("memory reference expected for "
5555 "'R' output modifier");
5556 return;
5557 }
5558
5559 ret = s390_decompose_address (XEXP (x, 0), &ad);
5560
5561 if (!ret
5562 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
5563 || ad.indx)
5564 {
5565 output_operand_lossage ("invalid address for 'R' output modifier");
5566 return;
5567 }
5568
5569 if (ad.base)
5570 fprintf (file, "%s", reg_names[REGNO (ad.base)]);
5571 else
5572 fprintf (file, "0");
5573 }
5574 return;
5575
5576 case 'S':
5577 {
5578 struct s390_address ad;
5579 int ret;
5580
5581 if (!MEM_P (x))
5582 {
5583 output_operand_lossage ("memory reference expected for "
5584 "'S' output modifier");
5585 return;
5586 }
5587 ret = s390_decompose_address (XEXP (x, 0), &ad);
5588
5589 if (!ret
5590 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
5591 || ad.indx)
5592 {
5593 output_operand_lossage ("invalid address for 'S' output modifier");
5594 return;
5595 }
5596
5597 if (ad.disp)
5598 output_addr_const (file, ad.disp);
5599 else
5600 fprintf (file, "0");
5601
5602 if (ad.base)
5603 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
5604 }
5605 return;
5606
5607 case 'N':
5608 if (GET_CODE (x) == REG)
5609 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
5610 else if (GET_CODE (x) == MEM)
5611 x = change_address (x, VOIDmode,
5612 plus_constant (Pmode, XEXP (x, 0), 4));
5613 else
5614 output_operand_lossage ("register or memory expression expected "
5615 "for 'N' output modifier");
5616 break;
5617
5618 case 'M':
5619 if (GET_CODE (x) == REG)
5620 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
5621 else if (GET_CODE (x) == MEM)
5622 x = change_address (x, VOIDmode,
5623 plus_constant (Pmode, XEXP (x, 0), 8));
5624 else
5625 output_operand_lossage ("register or memory expression expected "
5626 "for 'M' output modifier");
5627 break;
5628
5629 case 'Y':
5630 print_shift_count_operand (file, x);
5631 return;
5632 }
5633
5634 switch (GET_CODE (x))
5635 {
5636 case REG:
5637 fprintf (file, "%s", reg_names[REGNO (x)]);
5638 break;
5639
5640 case MEM:
5641 output_address (XEXP (x, 0));
5642 break;
5643
5644 case CONST:
5645 case CODE_LABEL:
5646 case LABEL_REF:
5647 case SYMBOL_REF:
5648 output_addr_const (file, x);
5649 break;
5650
5651 case CONST_INT:
5652 ival = INTVAL (x);
5653 switch (code)
5654 {
5655 case 0:
5656 break;
5657 case 'b':
5658 ival &= 0xff;
5659 break;
5660 case 'c':
5661 ival = ((ival & 0xff) ^ 0x80) - 0x80;
5662 break;
5663 case 'x':
5664 ival &= 0xffff;
5665 break;
5666 case 'h':
5667 ival = ((ival & 0xffff) ^ 0x8000) - 0x8000;
5668 break;
5669 case 'i':
5670 ival = s390_extract_part (x, HImode, 0);
5671 break;
5672 case 'j':
5673 ival = s390_extract_part (x, HImode, -1);
5674 break;
5675 case 'k':
5676 ival = s390_extract_part (x, SImode, 0);
5677 break;
5678 case 'm':
5679 ival = s390_extract_part (x, SImode, -1);
5680 break;
5681 case 'o':
5682 ival &= 0xffffffff;
5683 break;
5684 case 'e': case 'f':
5685 case 's': case 't':
5686 {
5687 int pos, len;
5688 bool ok;
5689
5690 len = (code == 's' || code == 'e' ? 64 : 32);
5691 ok = s390_contiguous_bitmask_p (ival, len, &pos, &len);
5692 gcc_assert (ok);
5693 if (code == 's' || code == 't')
5694 ival = 64 - pos - len;
5695 else
5696 ival = 64 - 1 - pos;
5697 }
5698 break;
5699 default:
5700 output_operand_lossage ("invalid constant for output modifier '%c'", code);
5701 }
5702 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
5703 break;
5704
5705 case CONST_DOUBLE:
5706 gcc_assert (GET_MODE (x) == VOIDmode);
5707 if (code == 'b')
5708 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x) & 0xff);
5709 else if (code == 'x')
5710 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x) & 0xffff);
5711 else if (code == 'h')
5712 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
5713 ((CONST_DOUBLE_LOW (x) & 0xffff) ^ 0x8000) - 0x8000);
5714 else
5715 {
5716 if (code == 0)
5717 output_operand_lossage ("invalid constant - try using "
5718 "an output modifier");
5719 else
5720 output_operand_lossage ("invalid constant for output modifier '%c'",
5721 code);
5722 }
5723 break;
5724
5725 default:
5726 if (code == 0)
5727 output_operand_lossage ("invalid expression - try using "
5728 "an output modifier");
5729 else
5730 output_operand_lossage ("invalid expression for output "
5731 "modifier '%c'", code);
5732 break;
5733 }
5734 }
5735
5736 /* Target hook for assembling integer objects. We need to define it
5737 here to work a round a bug in some versions of GAS, which couldn't
5738 handle values smaller than INT_MIN when printed in decimal. */
5739
5740 static bool
5741 s390_assemble_integer (rtx x, unsigned int size, int aligned_p)
5742 {
5743 if (size == 8 && aligned_p
5744 && GET_CODE (x) == CONST_INT && INTVAL (x) < INT_MIN)
5745 {
5746 fprintf (asm_out_file, "\t.quad\t" HOST_WIDE_INT_PRINT_HEX "\n",
5747 INTVAL (x));
5748 return true;
5749 }
5750 return default_assemble_integer (x, size, aligned_p);
5751 }
5752
5753 /* Returns true if register REGNO is used for forming
5754 a memory address in expression X. */
5755
5756 static bool
5757 reg_used_in_mem_p (int regno, rtx x)
5758 {
5759 enum rtx_code code = GET_CODE (x);
5760 int i, j;
5761 const char *fmt;
5762
5763 if (code == MEM)
5764 {
5765 if (refers_to_regno_p (regno, regno+1,
5766 XEXP (x, 0), 0))
5767 return true;
5768 }
5769 else if (code == SET
5770 && GET_CODE (SET_DEST (x)) == PC)
5771 {
5772 if (refers_to_regno_p (regno, regno+1,
5773 SET_SRC (x), 0))
5774 return true;
5775 }
5776
5777 fmt = GET_RTX_FORMAT (code);
5778 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
5779 {
5780 if (fmt[i] == 'e'
5781 && reg_used_in_mem_p (regno, XEXP (x, i)))
5782 return true;
5783
5784 else if (fmt[i] == 'E')
5785 for (j = 0; j < XVECLEN (x, i); j++)
5786 if (reg_used_in_mem_p (regno, XVECEXP (x, i, j)))
5787 return true;
5788 }
5789 return false;
5790 }
5791
5792 /* Returns true if expression DEP_RTX sets an address register
5793 used by instruction INSN to address memory. */
5794
5795 static bool
5796 addr_generation_dependency_p (rtx dep_rtx, rtx insn)
5797 {
5798 rtx target, pat;
5799
5800 if (NONJUMP_INSN_P (dep_rtx))
5801 dep_rtx = PATTERN (dep_rtx);
5802
5803 if (GET_CODE (dep_rtx) == SET)
5804 {
5805 target = SET_DEST (dep_rtx);
5806 if (GET_CODE (target) == STRICT_LOW_PART)
5807 target = XEXP (target, 0);
5808 while (GET_CODE (target) == SUBREG)
5809 target = SUBREG_REG (target);
5810
5811 if (GET_CODE (target) == REG)
5812 {
5813 int regno = REGNO (target);
5814
5815 if (s390_safe_attr_type (insn) == TYPE_LA)
5816 {
5817 pat = PATTERN (insn);
5818 if (GET_CODE (pat) == PARALLEL)
5819 {
5820 gcc_assert (XVECLEN (pat, 0) == 2);
5821 pat = XVECEXP (pat, 0, 0);
5822 }
5823 gcc_assert (GET_CODE (pat) == SET);
5824 return refers_to_regno_p (regno, regno+1, SET_SRC (pat), 0);
5825 }
5826 else if (get_attr_atype (insn) == ATYPE_AGEN)
5827 return reg_used_in_mem_p (regno, PATTERN (insn));
5828 }
5829 }
5830 return false;
5831 }
5832
5833 /* Return 1, if dep_insn sets register used in insn in the agen unit. */
5834
5835 int
5836 s390_agen_dep_p (rtx dep_insn, rtx insn)
5837 {
5838 rtx dep_rtx = PATTERN (dep_insn);
5839 int i;
5840
5841 if (GET_CODE (dep_rtx) == SET
5842 && addr_generation_dependency_p (dep_rtx, insn))
5843 return 1;
5844 else if (GET_CODE (dep_rtx) == PARALLEL)
5845 {
5846 for (i = 0; i < XVECLEN (dep_rtx, 0); i++)
5847 {
5848 if (addr_generation_dependency_p (XVECEXP (dep_rtx, 0, i), insn))
5849 return 1;
5850 }
5851 }
5852 return 0;
5853 }
5854
5855
5856 /* A C statement (sans semicolon) to update the integer scheduling priority
5857 INSN_PRIORITY (INSN). Increase the priority to execute the INSN earlier,
5858 reduce the priority to execute INSN later. Do not define this macro if
5859 you do not need to adjust the scheduling priorities of insns.
5860
5861 A STD instruction should be scheduled earlier,
5862 in order to use the bypass. */
5863 static int
5864 s390_adjust_priority (rtx insn ATTRIBUTE_UNUSED, int priority)
5865 {
5866 if (! INSN_P (insn))
5867 return priority;
5868
5869 if (s390_tune != PROCESSOR_2084_Z990
5870 && s390_tune != PROCESSOR_2094_Z9_109
5871 && s390_tune != PROCESSOR_2097_Z10
5872 && s390_tune != PROCESSOR_2817_Z196
5873 && s390_tune != PROCESSOR_2827_ZEC12)
5874 return priority;
5875
5876 switch (s390_safe_attr_type (insn))
5877 {
5878 case TYPE_FSTOREDF:
5879 case TYPE_FSTORESF:
5880 priority = priority << 3;
5881 break;
5882 case TYPE_STORE:
5883 case TYPE_STM:
5884 priority = priority << 1;
5885 break;
5886 default:
5887 break;
5888 }
5889 return priority;
5890 }
5891
5892
5893 /* The number of instructions that can be issued per cycle. */
5894
5895 static int
5896 s390_issue_rate (void)
5897 {
5898 switch (s390_tune)
5899 {
5900 case PROCESSOR_2084_Z990:
5901 case PROCESSOR_2094_Z9_109:
5902 case PROCESSOR_2817_Z196:
5903 return 3;
5904 case PROCESSOR_2097_Z10:
5905 case PROCESSOR_2827_ZEC12:
5906 return 2;
5907 default:
5908 return 1;
5909 }
5910 }
5911
5912 static int
5913 s390_first_cycle_multipass_dfa_lookahead (void)
5914 {
5915 return 4;
5916 }
5917
5918 /* Annotate every literal pool reference in X by an UNSPEC_LTREF expression.
5919 Fix up MEMs as required. */
5920
5921 static void
5922 annotate_constant_pool_refs (rtx *x)
5923 {
5924 int i, j;
5925 const char *fmt;
5926
5927 gcc_assert (GET_CODE (*x) != SYMBOL_REF
5928 || !CONSTANT_POOL_ADDRESS_P (*x));
5929
5930 /* Literal pool references can only occur inside a MEM ... */
5931 if (GET_CODE (*x) == MEM)
5932 {
5933 rtx memref = XEXP (*x, 0);
5934
5935 if (GET_CODE (memref) == SYMBOL_REF
5936 && CONSTANT_POOL_ADDRESS_P (memref))
5937 {
5938 rtx base = cfun->machine->base_reg;
5939 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, memref, base),
5940 UNSPEC_LTREF);
5941
5942 *x = replace_equiv_address (*x, addr);
5943 return;
5944 }
5945
5946 if (GET_CODE (memref) == CONST
5947 && GET_CODE (XEXP (memref, 0)) == PLUS
5948 && GET_CODE (XEXP (XEXP (memref, 0), 1)) == CONST_INT
5949 && GET_CODE (XEXP (XEXP (memref, 0), 0)) == SYMBOL_REF
5950 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (memref, 0), 0)))
5951 {
5952 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (memref, 0), 1));
5953 rtx sym = XEXP (XEXP (memref, 0), 0);
5954 rtx base = cfun->machine->base_reg;
5955 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
5956 UNSPEC_LTREF);
5957
5958 *x = replace_equiv_address (*x, plus_constant (Pmode, addr, off));
5959 return;
5960 }
5961 }
5962
5963 /* ... or a load-address type pattern. */
5964 if (GET_CODE (*x) == SET)
5965 {
5966 rtx addrref = SET_SRC (*x);
5967
5968 if (GET_CODE (addrref) == SYMBOL_REF
5969 && CONSTANT_POOL_ADDRESS_P (addrref))
5970 {
5971 rtx base = cfun->machine->base_reg;
5972 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addrref, base),
5973 UNSPEC_LTREF);
5974
5975 SET_SRC (*x) = addr;
5976 return;
5977 }
5978
5979 if (GET_CODE (addrref) == CONST
5980 && GET_CODE (XEXP (addrref, 0)) == PLUS
5981 && GET_CODE (XEXP (XEXP (addrref, 0), 1)) == CONST_INT
5982 && GET_CODE (XEXP (XEXP (addrref, 0), 0)) == SYMBOL_REF
5983 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (addrref, 0), 0)))
5984 {
5985 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (addrref, 0), 1));
5986 rtx sym = XEXP (XEXP (addrref, 0), 0);
5987 rtx base = cfun->machine->base_reg;
5988 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
5989 UNSPEC_LTREF);
5990
5991 SET_SRC (*x) = plus_constant (Pmode, addr, off);
5992 return;
5993 }
5994 }
5995
5996 /* Annotate LTREL_BASE as well. */
5997 if (GET_CODE (*x) == UNSPEC
5998 && XINT (*x, 1) == UNSPEC_LTREL_BASE)
5999 {
6000 rtx base = cfun->machine->base_reg;
6001 *x = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, XVECEXP (*x, 0, 0), base),
6002 UNSPEC_LTREL_BASE);
6003 return;
6004 }
6005
6006 fmt = GET_RTX_FORMAT (GET_CODE (*x));
6007 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
6008 {
6009 if (fmt[i] == 'e')
6010 {
6011 annotate_constant_pool_refs (&XEXP (*x, i));
6012 }
6013 else if (fmt[i] == 'E')
6014 {
6015 for (j = 0; j < XVECLEN (*x, i); j++)
6016 annotate_constant_pool_refs (&XVECEXP (*x, i, j));
6017 }
6018 }
6019 }
6020
6021 /* Split all branches that exceed the maximum distance.
6022 Returns true if this created a new literal pool entry. */
6023
6024 static int
6025 s390_split_branches (void)
6026 {
6027 rtx temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
6028 int new_literal = 0, ret;
6029 rtx insn, pat, tmp, target;
6030 rtx *label;
6031
6032 /* We need correct insn addresses. */
6033
6034 shorten_branches (get_insns ());
6035
6036 /* Find all branches that exceed 64KB, and split them. */
6037
6038 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6039 {
6040 if (! JUMP_P (insn) || tablejump_p (insn, NULL, NULL))
6041 continue;
6042
6043 pat = PATTERN (insn);
6044 if (GET_CODE (pat) == PARALLEL)
6045 pat = XVECEXP (pat, 0, 0);
6046 if (GET_CODE (pat) != SET || SET_DEST (pat) != pc_rtx)
6047 continue;
6048
6049 if (GET_CODE (SET_SRC (pat)) == LABEL_REF)
6050 {
6051 label = &SET_SRC (pat);
6052 }
6053 else if (GET_CODE (SET_SRC (pat)) == IF_THEN_ELSE)
6054 {
6055 if (GET_CODE (XEXP (SET_SRC (pat), 1)) == LABEL_REF)
6056 label = &XEXP (SET_SRC (pat), 1);
6057 else if (GET_CODE (XEXP (SET_SRC (pat), 2)) == LABEL_REF)
6058 label = &XEXP (SET_SRC (pat), 2);
6059 else
6060 continue;
6061 }
6062 else
6063 continue;
6064
6065 if (get_attr_length (insn) <= 4)
6066 continue;
6067
6068 /* We are going to use the return register as scratch register,
6069 make sure it will be saved/restored by the prologue/epilogue. */
6070 cfun_frame_layout.save_return_addr_p = 1;
6071
6072 if (!flag_pic)
6073 {
6074 new_literal = 1;
6075 tmp = force_const_mem (Pmode, *label);
6076 tmp = emit_insn_before (gen_rtx_SET (Pmode, temp_reg, tmp), insn);
6077 INSN_ADDRESSES_NEW (tmp, -1);
6078 annotate_constant_pool_refs (&PATTERN (tmp));
6079
6080 target = temp_reg;
6081 }
6082 else
6083 {
6084 new_literal = 1;
6085 target = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, *label),
6086 UNSPEC_LTREL_OFFSET);
6087 target = gen_rtx_CONST (Pmode, target);
6088 target = force_const_mem (Pmode, target);
6089 tmp = emit_insn_before (gen_rtx_SET (Pmode, temp_reg, target), insn);
6090 INSN_ADDRESSES_NEW (tmp, -1);
6091 annotate_constant_pool_refs (&PATTERN (tmp));
6092
6093 target = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, XEXP (target, 0),
6094 cfun->machine->base_reg),
6095 UNSPEC_LTREL_BASE);
6096 target = gen_rtx_PLUS (Pmode, temp_reg, target);
6097 }
6098
6099 ret = validate_change (insn, label, target, 0);
6100 gcc_assert (ret);
6101 }
6102
6103 return new_literal;
6104 }
6105
6106
6107 /* Find an annotated literal pool symbol referenced in RTX X,
6108 and store it at REF. Will abort if X contains references to
6109 more than one such pool symbol; multiple references to the same
6110 symbol are allowed, however.
6111
6112 The rtx pointed to by REF must be initialized to NULL_RTX
6113 by the caller before calling this routine. */
6114
6115 static void
6116 find_constant_pool_ref (rtx x, rtx *ref)
6117 {
6118 int i, j;
6119 const char *fmt;
6120
6121 /* Ignore LTREL_BASE references. */
6122 if (GET_CODE (x) == UNSPEC
6123 && XINT (x, 1) == UNSPEC_LTREL_BASE)
6124 return;
6125 /* Likewise POOL_ENTRY insns. */
6126 if (GET_CODE (x) == UNSPEC_VOLATILE
6127 && XINT (x, 1) == UNSPECV_POOL_ENTRY)
6128 return;
6129
6130 gcc_assert (GET_CODE (x) != SYMBOL_REF
6131 || !CONSTANT_POOL_ADDRESS_P (x));
6132
6133 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_LTREF)
6134 {
6135 rtx sym = XVECEXP (x, 0, 0);
6136 gcc_assert (GET_CODE (sym) == SYMBOL_REF
6137 && CONSTANT_POOL_ADDRESS_P (sym));
6138
6139 if (*ref == NULL_RTX)
6140 *ref = sym;
6141 else
6142 gcc_assert (*ref == sym);
6143
6144 return;
6145 }
6146
6147 fmt = GET_RTX_FORMAT (GET_CODE (x));
6148 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6149 {
6150 if (fmt[i] == 'e')
6151 {
6152 find_constant_pool_ref (XEXP (x, i), ref);
6153 }
6154 else if (fmt[i] == 'E')
6155 {
6156 for (j = 0; j < XVECLEN (x, i); j++)
6157 find_constant_pool_ref (XVECEXP (x, i, j), ref);
6158 }
6159 }
6160 }
6161
6162 /* Replace every reference to the annotated literal pool
6163 symbol REF in X by its base plus OFFSET. */
6164
6165 static void
6166 replace_constant_pool_ref (rtx *x, rtx ref, rtx offset)
6167 {
6168 int i, j;
6169 const char *fmt;
6170
6171 gcc_assert (*x != ref);
6172
6173 if (GET_CODE (*x) == UNSPEC
6174 && XINT (*x, 1) == UNSPEC_LTREF
6175 && XVECEXP (*x, 0, 0) == ref)
6176 {
6177 *x = gen_rtx_PLUS (Pmode, XVECEXP (*x, 0, 1), offset);
6178 return;
6179 }
6180
6181 if (GET_CODE (*x) == PLUS
6182 && GET_CODE (XEXP (*x, 1)) == CONST_INT
6183 && GET_CODE (XEXP (*x, 0)) == UNSPEC
6184 && XINT (XEXP (*x, 0), 1) == UNSPEC_LTREF
6185 && XVECEXP (XEXP (*x, 0), 0, 0) == ref)
6186 {
6187 rtx addr = gen_rtx_PLUS (Pmode, XVECEXP (XEXP (*x, 0), 0, 1), offset);
6188 *x = plus_constant (Pmode, addr, INTVAL (XEXP (*x, 1)));
6189 return;
6190 }
6191
6192 fmt = GET_RTX_FORMAT (GET_CODE (*x));
6193 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
6194 {
6195 if (fmt[i] == 'e')
6196 {
6197 replace_constant_pool_ref (&XEXP (*x, i), ref, offset);
6198 }
6199 else if (fmt[i] == 'E')
6200 {
6201 for (j = 0; j < XVECLEN (*x, i); j++)
6202 replace_constant_pool_ref (&XVECEXP (*x, i, j), ref, offset);
6203 }
6204 }
6205 }
6206
6207 /* Check whether X contains an UNSPEC_LTREL_BASE.
6208 Return its constant pool symbol if found, NULL_RTX otherwise. */
6209
6210 static rtx
6211 find_ltrel_base (rtx x)
6212 {
6213 int i, j;
6214 const char *fmt;
6215
6216 if (GET_CODE (x) == UNSPEC
6217 && XINT (x, 1) == UNSPEC_LTREL_BASE)
6218 return XVECEXP (x, 0, 0);
6219
6220 fmt = GET_RTX_FORMAT (GET_CODE (x));
6221 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
6222 {
6223 if (fmt[i] == 'e')
6224 {
6225 rtx fnd = find_ltrel_base (XEXP (x, i));
6226 if (fnd)
6227 return fnd;
6228 }
6229 else if (fmt[i] == 'E')
6230 {
6231 for (j = 0; j < XVECLEN (x, i); j++)
6232 {
6233 rtx fnd = find_ltrel_base (XVECEXP (x, i, j));
6234 if (fnd)
6235 return fnd;
6236 }
6237 }
6238 }
6239
6240 return NULL_RTX;
6241 }
6242
6243 /* Replace any occurrence of UNSPEC_LTREL_BASE in X with its base. */
6244
6245 static void
6246 replace_ltrel_base (rtx *x)
6247 {
6248 int i, j;
6249 const char *fmt;
6250
6251 if (GET_CODE (*x) == UNSPEC
6252 && XINT (*x, 1) == UNSPEC_LTREL_BASE)
6253 {
6254 *x = XVECEXP (*x, 0, 1);
6255 return;
6256 }
6257
6258 fmt = GET_RTX_FORMAT (GET_CODE (*x));
6259 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
6260 {
6261 if (fmt[i] == 'e')
6262 {
6263 replace_ltrel_base (&XEXP (*x, i));
6264 }
6265 else if (fmt[i] == 'E')
6266 {
6267 for (j = 0; j < XVECLEN (*x, i); j++)
6268 replace_ltrel_base (&XVECEXP (*x, i, j));
6269 }
6270 }
6271 }
6272
6273
6274 /* We keep a list of constants which we have to add to internal
6275 constant tables in the middle of large functions. */
6276
6277 #define NR_C_MODES 11
6278 enum machine_mode constant_modes[NR_C_MODES] =
6279 {
6280 TFmode, TImode, TDmode,
6281 DFmode, DImode, DDmode,
6282 SFmode, SImode, SDmode,
6283 HImode,
6284 QImode
6285 };
6286
6287 struct constant
6288 {
6289 struct constant *next;
6290 rtx value;
6291 rtx label;
6292 };
6293
6294 struct constant_pool
6295 {
6296 struct constant_pool *next;
6297 rtx first_insn;
6298 rtx pool_insn;
6299 bitmap insns;
6300 rtx emit_pool_after;
6301
6302 struct constant *constants[NR_C_MODES];
6303 struct constant *execute;
6304 rtx label;
6305 int size;
6306 };
6307
6308 /* Allocate new constant_pool structure. */
6309
6310 static struct constant_pool *
6311 s390_alloc_pool (void)
6312 {
6313 struct constant_pool *pool;
6314 int i;
6315
6316 pool = (struct constant_pool *) xmalloc (sizeof *pool);
6317 pool->next = NULL;
6318 for (i = 0; i < NR_C_MODES; i++)
6319 pool->constants[i] = NULL;
6320
6321 pool->execute = NULL;
6322 pool->label = gen_label_rtx ();
6323 pool->first_insn = NULL_RTX;
6324 pool->pool_insn = NULL_RTX;
6325 pool->insns = BITMAP_ALLOC (NULL);
6326 pool->size = 0;
6327 pool->emit_pool_after = NULL_RTX;
6328
6329 return pool;
6330 }
6331
6332 /* Create new constant pool covering instructions starting at INSN
6333 and chain it to the end of POOL_LIST. */
6334
6335 static struct constant_pool *
6336 s390_start_pool (struct constant_pool **pool_list, rtx insn)
6337 {
6338 struct constant_pool *pool, **prev;
6339
6340 pool = s390_alloc_pool ();
6341 pool->first_insn = insn;
6342
6343 for (prev = pool_list; *prev; prev = &(*prev)->next)
6344 ;
6345 *prev = pool;
6346
6347 return pool;
6348 }
6349
6350 /* End range of instructions covered by POOL at INSN and emit
6351 placeholder insn representing the pool. */
6352
6353 static void
6354 s390_end_pool (struct constant_pool *pool, rtx insn)
6355 {
6356 rtx pool_size = GEN_INT (pool->size + 8 /* alignment slop */);
6357
6358 if (!insn)
6359 insn = get_last_insn ();
6360
6361 pool->pool_insn = emit_insn_after (gen_pool (pool_size), insn);
6362 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
6363 }
6364
6365 /* Add INSN to the list of insns covered by POOL. */
6366
6367 static void
6368 s390_add_pool_insn (struct constant_pool *pool, rtx insn)
6369 {
6370 bitmap_set_bit (pool->insns, INSN_UID (insn));
6371 }
6372
6373 /* Return pool out of POOL_LIST that covers INSN. */
6374
6375 static struct constant_pool *
6376 s390_find_pool (struct constant_pool *pool_list, rtx insn)
6377 {
6378 struct constant_pool *pool;
6379
6380 for (pool = pool_list; pool; pool = pool->next)
6381 if (bitmap_bit_p (pool->insns, INSN_UID (insn)))
6382 break;
6383
6384 return pool;
6385 }
6386
6387 /* Add constant VAL of mode MODE to the constant pool POOL. */
6388
6389 static void
6390 s390_add_constant (struct constant_pool *pool, rtx val, enum machine_mode mode)
6391 {
6392 struct constant *c;
6393 int i;
6394
6395 for (i = 0; i < NR_C_MODES; i++)
6396 if (constant_modes[i] == mode)
6397 break;
6398 gcc_assert (i != NR_C_MODES);
6399
6400 for (c = pool->constants[i]; c != NULL; c = c->next)
6401 if (rtx_equal_p (val, c->value))
6402 break;
6403
6404 if (c == NULL)
6405 {
6406 c = (struct constant *) xmalloc (sizeof *c);
6407 c->value = val;
6408 c->label = gen_label_rtx ();
6409 c->next = pool->constants[i];
6410 pool->constants[i] = c;
6411 pool->size += GET_MODE_SIZE (mode);
6412 }
6413 }
6414
6415 /* Return an rtx that represents the offset of X from the start of
6416 pool POOL. */
6417
6418 static rtx
6419 s390_pool_offset (struct constant_pool *pool, rtx x)
6420 {
6421 rtx label;
6422
6423 label = gen_rtx_LABEL_REF (GET_MODE (x), pool->label);
6424 x = gen_rtx_UNSPEC (GET_MODE (x), gen_rtvec (2, x, label),
6425 UNSPEC_POOL_OFFSET);
6426 return gen_rtx_CONST (GET_MODE (x), x);
6427 }
6428
6429 /* Find constant VAL of mode MODE in the constant pool POOL.
6430 Return an RTX describing the distance from the start of
6431 the pool to the location of the new constant. */
6432
6433 static rtx
6434 s390_find_constant (struct constant_pool *pool, rtx val,
6435 enum machine_mode mode)
6436 {
6437 struct constant *c;
6438 int i;
6439
6440 for (i = 0; i < NR_C_MODES; i++)
6441 if (constant_modes[i] == mode)
6442 break;
6443 gcc_assert (i != NR_C_MODES);
6444
6445 for (c = pool->constants[i]; c != NULL; c = c->next)
6446 if (rtx_equal_p (val, c->value))
6447 break;
6448
6449 gcc_assert (c);
6450
6451 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
6452 }
6453
6454 /* Check whether INSN is an execute. Return the label_ref to its
6455 execute target template if so, NULL_RTX otherwise. */
6456
6457 static rtx
6458 s390_execute_label (rtx insn)
6459 {
6460 if (NONJUMP_INSN_P (insn)
6461 && GET_CODE (PATTERN (insn)) == PARALLEL
6462 && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
6463 && XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE)
6464 return XVECEXP (XVECEXP (PATTERN (insn), 0, 0), 0, 2);
6465
6466 return NULL_RTX;
6467 }
6468
6469 /* Add execute target for INSN to the constant pool POOL. */
6470
6471 static void
6472 s390_add_execute (struct constant_pool *pool, rtx insn)
6473 {
6474 struct constant *c;
6475
6476 for (c = pool->execute; c != NULL; c = c->next)
6477 if (INSN_UID (insn) == INSN_UID (c->value))
6478 break;
6479
6480 if (c == NULL)
6481 {
6482 c = (struct constant *) xmalloc (sizeof *c);
6483 c->value = insn;
6484 c->label = gen_label_rtx ();
6485 c->next = pool->execute;
6486 pool->execute = c;
6487 pool->size += 6;
6488 }
6489 }
6490
6491 /* Find execute target for INSN in the constant pool POOL.
6492 Return an RTX describing the distance from the start of
6493 the pool to the location of the execute target. */
6494
6495 static rtx
6496 s390_find_execute (struct constant_pool *pool, rtx insn)
6497 {
6498 struct constant *c;
6499
6500 for (c = pool->execute; c != NULL; c = c->next)
6501 if (INSN_UID (insn) == INSN_UID (c->value))
6502 break;
6503
6504 gcc_assert (c);
6505
6506 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
6507 }
6508
6509 /* For an execute INSN, extract the execute target template. */
6510
6511 static rtx
6512 s390_execute_target (rtx insn)
6513 {
6514 rtx pattern = PATTERN (insn);
6515 gcc_assert (s390_execute_label (insn));
6516
6517 if (XVECLEN (pattern, 0) == 2)
6518 {
6519 pattern = copy_rtx (XVECEXP (pattern, 0, 1));
6520 }
6521 else
6522 {
6523 rtvec vec = rtvec_alloc (XVECLEN (pattern, 0) - 1);
6524 int i;
6525
6526 for (i = 0; i < XVECLEN (pattern, 0) - 1; i++)
6527 RTVEC_ELT (vec, i) = copy_rtx (XVECEXP (pattern, 0, i + 1));
6528
6529 pattern = gen_rtx_PARALLEL (VOIDmode, vec);
6530 }
6531
6532 return pattern;
6533 }
6534
6535 /* Indicate that INSN cannot be duplicated. This is the case for
6536 execute insns that carry a unique label. */
6537
6538 static bool
6539 s390_cannot_copy_insn_p (rtx insn)
6540 {
6541 rtx label = s390_execute_label (insn);
6542 return label && label != const0_rtx;
6543 }
6544
6545 /* Dump out the constants in POOL. If REMOTE_LABEL is true,
6546 do not emit the pool base label. */
6547
6548 static void
6549 s390_dump_pool (struct constant_pool *pool, bool remote_label)
6550 {
6551 struct constant *c;
6552 rtx insn = pool->pool_insn;
6553 int i;
6554
6555 /* Switch to rodata section. */
6556 if (TARGET_CPU_ZARCH)
6557 {
6558 insn = emit_insn_after (gen_pool_section_start (), insn);
6559 INSN_ADDRESSES_NEW (insn, -1);
6560 }
6561
6562 /* Ensure minimum pool alignment. */
6563 if (TARGET_CPU_ZARCH)
6564 insn = emit_insn_after (gen_pool_align (GEN_INT (8)), insn);
6565 else
6566 insn = emit_insn_after (gen_pool_align (GEN_INT (4)), insn);
6567 INSN_ADDRESSES_NEW (insn, -1);
6568
6569 /* Emit pool base label. */
6570 if (!remote_label)
6571 {
6572 insn = emit_label_after (pool->label, insn);
6573 INSN_ADDRESSES_NEW (insn, -1);
6574 }
6575
6576 /* Dump constants in descending alignment requirement order,
6577 ensuring proper alignment for every constant. */
6578 for (i = 0; i < NR_C_MODES; i++)
6579 for (c = pool->constants[i]; c; c = c->next)
6580 {
6581 /* Convert UNSPEC_LTREL_OFFSET unspecs to pool-relative references. */
6582 rtx value = copy_rtx (c->value);
6583 if (GET_CODE (value) == CONST
6584 && GET_CODE (XEXP (value, 0)) == UNSPEC
6585 && XINT (XEXP (value, 0), 1) == UNSPEC_LTREL_OFFSET
6586 && XVECLEN (XEXP (value, 0), 0) == 1)
6587 value = s390_pool_offset (pool, XVECEXP (XEXP (value, 0), 0, 0));
6588
6589 insn = emit_label_after (c->label, insn);
6590 INSN_ADDRESSES_NEW (insn, -1);
6591
6592 value = gen_rtx_UNSPEC_VOLATILE (constant_modes[i],
6593 gen_rtvec (1, value),
6594 UNSPECV_POOL_ENTRY);
6595 insn = emit_insn_after (value, insn);
6596 INSN_ADDRESSES_NEW (insn, -1);
6597 }
6598
6599 /* Ensure minimum alignment for instructions. */
6600 insn = emit_insn_after (gen_pool_align (GEN_INT (2)), insn);
6601 INSN_ADDRESSES_NEW (insn, -1);
6602
6603 /* Output in-pool execute template insns. */
6604 for (c = pool->execute; c; c = c->next)
6605 {
6606 insn = emit_label_after (c->label, insn);
6607 INSN_ADDRESSES_NEW (insn, -1);
6608
6609 insn = emit_insn_after (s390_execute_target (c->value), insn);
6610 INSN_ADDRESSES_NEW (insn, -1);
6611 }
6612
6613 /* Switch back to previous section. */
6614 if (TARGET_CPU_ZARCH)
6615 {
6616 insn = emit_insn_after (gen_pool_section_end (), insn);
6617 INSN_ADDRESSES_NEW (insn, -1);
6618 }
6619
6620 insn = emit_barrier_after (insn);
6621 INSN_ADDRESSES_NEW (insn, -1);
6622
6623 /* Remove placeholder insn. */
6624 remove_insn (pool->pool_insn);
6625 }
6626
6627 /* Free all memory used by POOL. */
6628
6629 static void
6630 s390_free_pool (struct constant_pool *pool)
6631 {
6632 struct constant *c, *next;
6633 int i;
6634
6635 for (i = 0; i < NR_C_MODES; i++)
6636 for (c = pool->constants[i]; c; c = next)
6637 {
6638 next = c->next;
6639 free (c);
6640 }
6641
6642 for (c = pool->execute; c; c = next)
6643 {
6644 next = c->next;
6645 free (c);
6646 }
6647
6648 BITMAP_FREE (pool->insns);
6649 free (pool);
6650 }
6651
6652
6653 /* Collect main literal pool. Return NULL on overflow. */
6654
6655 static struct constant_pool *
6656 s390_mainpool_start (void)
6657 {
6658 struct constant_pool *pool;
6659 rtx insn;
6660
6661 pool = s390_alloc_pool ();
6662
6663 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6664 {
6665 if (NONJUMP_INSN_P (insn)
6666 && GET_CODE (PATTERN (insn)) == SET
6667 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC_VOLATILE
6668 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPECV_MAIN_POOL)
6669 {
6670 gcc_assert (!pool->pool_insn);
6671 pool->pool_insn = insn;
6672 }
6673
6674 if (!TARGET_CPU_ZARCH && s390_execute_label (insn))
6675 {
6676 s390_add_execute (pool, insn);
6677 }
6678 else if (NONJUMP_INSN_P (insn) || CALL_P (insn))
6679 {
6680 rtx pool_ref = NULL_RTX;
6681 find_constant_pool_ref (PATTERN (insn), &pool_ref);
6682 if (pool_ref)
6683 {
6684 rtx constant = get_pool_constant (pool_ref);
6685 enum machine_mode mode = get_pool_mode (pool_ref);
6686 s390_add_constant (pool, constant, mode);
6687 }
6688 }
6689
6690 /* If hot/cold partitioning is enabled we have to make sure that
6691 the literal pool is emitted in the same section where the
6692 initialization of the literal pool base pointer takes place.
6693 emit_pool_after is only used in the non-overflow case on non
6694 Z cpus where we can emit the literal pool at the end of the
6695 function body within the text section. */
6696 if (NOTE_P (insn)
6697 && NOTE_KIND (insn) == NOTE_INSN_SWITCH_TEXT_SECTIONS
6698 && !pool->emit_pool_after)
6699 pool->emit_pool_after = PREV_INSN (insn);
6700 }
6701
6702 gcc_assert (pool->pool_insn || pool->size == 0);
6703
6704 if (pool->size >= 4096)
6705 {
6706 /* We're going to chunkify the pool, so remove the main
6707 pool placeholder insn. */
6708 remove_insn (pool->pool_insn);
6709
6710 s390_free_pool (pool);
6711 pool = NULL;
6712 }
6713
6714 /* If the functions ends with the section where the literal pool
6715 should be emitted set the marker to its end. */
6716 if (pool && !pool->emit_pool_after)
6717 pool->emit_pool_after = get_last_insn ();
6718
6719 return pool;
6720 }
6721
6722 /* POOL holds the main literal pool as collected by s390_mainpool_start.
6723 Modify the current function to output the pool constants as well as
6724 the pool register setup instruction. */
6725
6726 static void
6727 s390_mainpool_finish (struct constant_pool *pool)
6728 {
6729 rtx base_reg = cfun->machine->base_reg;
6730 rtx insn;
6731
6732 /* If the pool is empty, we're done. */
6733 if (pool->size == 0)
6734 {
6735 /* We don't actually need a base register after all. */
6736 cfun->machine->base_reg = NULL_RTX;
6737
6738 if (pool->pool_insn)
6739 remove_insn (pool->pool_insn);
6740 s390_free_pool (pool);
6741 return;
6742 }
6743
6744 /* We need correct insn addresses. */
6745 shorten_branches (get_insns ());
6746
6747 /* On zSeries, we use a LARL to load the pool register. The pool is
6748 located in the .rodata section, so we emit it after the function. */
6749 if (TARGET_CPU_ZARCH)
6750 {
6751 insn = gen_main_base_64 (base_reg, pool->label);
6752 insn = emit_insn_after (insn, pool->pool_insn);
6753 INSN_ADDRESSES_NEW (insn, -1);
6754 remove_insn (pool->pool_insn);
6755
6756 insn = get_last_insn ();
6757 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
6758 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
6759
6760 s390_dump_pool (pool, 0);
6761 }
6762
6763 /* On S/390, if the total size of the function's code plus literal pool
6764 does not exceed 4096 bytes, we use BASR to set up a function base
6765 pointer, and emit the literal pool at the end of the function. */
6766 else if (INSN_ADDRESSES (INSN_UID (pool->emit_pool_after))
6767 + pool->size + 8 /* alignment slop */ < 4096)
6768 {
6769 insn = gen_main_base_31_small (base_reg, pool->label);
6770 insn = emit_insn_after (insn, pool->pool_insn);
6771 INSN_ADDRESSES_NEW (insn, -1);
6772 remove_insn (pool->pool_insn);
6773
6774 insn = emit_label_after (pool->label, insn);
6775 INSN_ADDRESSES_NEW (insn, -1);
6776
6777 /* emit_pool_after will be set by s390_mainpool_start to the
6778 last insn of the section where the literal pool should be
6779 emitted. */
6780 insn = pool->emit_pool_after;
6781
6782 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
6783 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
6784
6785 s390_dump_pool (pool, 1);
6786 }
6787
6788 /* Otherwise, we emit an inline literal pool and use BASR to branch
6789 over it, setting up the pool register at the same time. */
6790 else
6791 {
6792 rtx pool_end = gen_label_rtx ();
6793
6794 insn = gen_main_base_31_large (base_reg, pool->label, pool_end);
6795 insn = emit_jump_insn_after (insn, pool->pool_insn);
6796 JUMP_LABEL (insn) = pool_end;
6797 INSN_ADDRESSES_NEW (insn, -1);
6798 remove_insn (pool->pool_insn);
6799
6800 insn = emit_label_after (pool->label, insn);
6801 INSN_ADDRESSES_NEW (insn, -1);
6802
6803 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
6804 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
6805
6806 insn = emit_label_after (pool_end, pool->pool_insn);
6807 INSN_ADDRESSES_NEW (insn, -1);
6808
6809 s390_dump_pool (pool, 1);
6810 }
6811
6812
6813 /* Replace all literal pool references. */
6814
6815 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6816 {
6817 if (INSN_P (insn))
6818 replace_ltrel_base (&PATTERN (insn));
6819
6820 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
6821 {
6822 rtx addr, pool_ref = NULL_RTX;
6823 find_constant_pool_ref (PATTERN (insn), &pool_ref);
6824 if (pool_ref)
6825 {
6826 if (s390_execute_label (insn))
6827 addr = s390_find_execute (pool, insn);
6828 else
6829 addr = s390_find_constant (pool, get_pool_constant (pool_ref),
6830 get_pool_mode (pool_ref));
6831
6832 replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr);
6833 INSN_CODE (insn) = -1;
6834 }
6835 }
6836 }
6837
6838
6839 /* Free the pool. */
6840 s390_free_pool (pool);
6841 }
6842
6843 /* POOL holds the main literal pool as collected by s390_mainpool_start.
6844 We have decided we cannot use this pool, so revert all changes
6845 to the current function that were done by s390_mainpool_start. */
6846 static void
6847 s390_mainpool_cancel (struct constant_pool *pool)
6848 {
6849 /* We didn't actually change the instruction stream, so simply
6850 free the pool memory. */
6851 s390_free_pool (pool);
6852 }
6853
6854
6855 /* Chunkify the literal pool. */
6856
6857 #define S390_POOL_CHUNK_MIN 0xc00
6858 #define S390_POOL_CHUNK_MAX 0xe00
6859
6860 static struct constant_pool *
6861 s390_chunkify_start (void)
6862 {
6863 struct constant_pool *curr_pool = NULL, *pool_list = NULL;
6864 int extra_size = 0;
6865 bitmap far_labels;
6866 rtx pending_ltrel = NULL_RTX;
6867 rtx insn;
6868
6869 rtx (*gen_reload_base) (rtx, rtx) =
6870 TARGET_CPU_ZARCH? gen_reload_base_64 : gen_reload_base_31;
6871
6872
6873 /* We need correct insn addresses. */
6874
6875 shorten_branches (get_insns ());
6876
6877 /* Scan all insns and move literals to pool chunks. */
6878
6879 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
6880 {
6881 bool section_switch_p = false;
6882
6883 /* Check for pending LTREL_BASE. */
6884 if (INSN_P (insn))
6885 {
6886 rtx ltrel_base = find_ltrel_base (PATTERN (insn));
6887 if (ltrel_base)
6888 {
6889 gcc_assert (ltrel_base == pending_ltrel);
6890 pending_ltrel = NULL_RTX;
6891 }
6892 }
6893
6894 if (!TARGET_CPU_ZARCH && s390_execute_label (insn))
6895 {
6896 if (!curr_pool)
6897 curr_pool = s390_start_pool (&pool_list, insn);
6898
6899 s390_add_execute (curr_pool, insn);
6900 s390_add_pool_insn (curr_pool, insn);
6901 }
6902 else if (NONJUMP_INSN_P (insn) || CALL_P (insn))
6903 {
6904 rtx pool_ref = NULL_RTX;
6905 find_constant_pool_ref (PATTERN (insn), &pool_ref);
6906 if (pool_ref)
6907 {
6908 rtx constant = get_pool_constant (pool_ref);
6909 enum machine_mode mode = get_pool_mode (pool_ref);
6910
6911 if (!curr_pool)
6912 curr_pool = s390_start_pool (&pool_list, insn);
6913
6914 s390_add_constant (curr_pool, constant, mode);
6915 s390_add_pool_insn (curr_pool, insn);
6916
6917 /* Don't split the pool chunk between a LTREL_OFFSET load
6918 and the corresponding LTREL_BASE. */
6919 if (GET_CODE (constant) == CONST
6920 && GET_CODE (XEXP (constant, 0)) == UNSPEC
6921 && XINT (XEXP (constant, 0), 1) == UNSPEC_LTREL_OFFSET)
6922 {
6923 gcc_assert (!pending_ltrel);
6924 pending_ltrel = pool_ref;
6925 }
6926 }
6927 }
6928
6929 if (JUMP_P (insn) || JUMP_TABLE_DATA_P (insn) || LABEL_P (insn))
6930 {
6931 if (curr_pool)
6932 s390_add_pool_insn (curr_pool, insn);
6933 /* An LTREL_BASE must follow within the same basic block. */
6934 gcc_assert (!pending_ltrel);
6935 }
6936
6937 if (NOTE_P (insn))
6938 switch (NOTE_KIND (insn))
6939 {
6940 case NOTE_INSN_SWITCH_TEXT_SECTIONS:
6941 section_switch_p = true;
6942 break;
6943 case NOTE_INSN_VAR_LOCATION:
6944 case NOTE_INSN_CALL_ARG_LOCATION:
6945 continue;
6946 default:
6947 break;
6948 }
6949
6950 if (!curr_pool
6951 || INSN_ADDRESSES_SIZE () <= (size_t) INSN_UID (insn)
6952 || INSN_ADDRESSES (INSN_UID (insn)) == -1)
6953 continue;
6954
6955 if (TARGET_CPU_ZARCH)
6956 {
6957 if (curr_pool->size < S390_POOL_CHUNK_MAX)
6958 continue;
6959
6960 s390_end_pool (curr_pool, NULL_RTX);
6961 curr_pool = NULL;
6962 }
6963 else
6964 {
6965 int chunk_size = INSN_ADDRESSES (INSN_UID (insn))
6966 - INSN_ADDRESSES (INSN_UID (curr_pool->first_insn))
6967 + extra_size;
6968
6969 /* We will later have to insert base register reload insns.
6970 Those will have an effect on code size, which we need to
6971 consider here. This calculation makes rather pessimistic
6972 worst-case assumptions. */
6973 if (LABEL_P (insn))
6974 extra_size += 6;
6975
6976 if (chunk_size < S390_POOL_CHUNK_MIN
6977 && curr_pool->size < S390_POOL_CHUNK_MIN
6978 && !section_switch_p)
6979 continue;
6980
6981 /* Pool chunks can only be inserted after BARRIERs ... */
6982 if (BARRIER_P (insn))
6983 {
6984 s390_end_pool (curr_pool, insn);
6985 curr_pool = NULL;
6986 extra_size = 0;
6987 }
6988
6989 /* ... so if we don't find one in time, create one. */
6990 else if (chunk_size > S390_POOL_CHUNK_MAX
6991 || curr_pool->size > S390_POOL_CHUNK_MAX
6992 || section_switch_p)
6993 {
6994 rtx label, jump, barrier, next, prev;
6995
6996 if (!section_switch_p)
6997 {
6998 /* We can insert the barrier only after a 'real' insn. */
6999 if (! NONJUMP_INSN_P (insn) && ! CALL_P (insn))
7000 continue;
7001 if (get_attr_length (insn) == 0)
7002 continue;
7003 /* Don't separate LTREL_BASE from the corresponding
7004 LTREL_OFFSET load. */
7005 if (pending_ltrel)
7006 continue;
7007 next = insn;
7008 do
7009 {
7010 insn = next;
7011 next = NEXT_INSN (insn);
7012 }
7013 while (next
7014 && NOTE_P (next)
7015 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
7016 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION));
7017 }
7018 else
7019 {
7020 gcc_assert (!pending_ltrel);
7021
7022 /* The old pool has to end before the section switch
7023 note in order to make it part of the current
7024 section. */
7025 insn = PREV_INSN (insn);
7026 }
7027
7028 label = gen_label_rtx ();
7029 prev = insn;
7030 if (prev && NOTE_P (prev))
7031 prev = prev_nonnote_insn (prev);
7032 if (prev)
7033 jump = emit_jump_insn_after_setloc (gen_jump (label), insn,
7034 INSN_LOCATION (prev));
7035 else
7036 jump = emit_jump_insn_after_noloc (gen_jump (label), insn);
7037 barrier = emit_barrier_after (jump);
7038 insn = emit_label_after (label, barrier);
7039 JUMP_LABEL (jump) = label;
7040 LABEL_NUSES (label) = 1;
7041
7042 INSN_ADDRESSES_NEW (jump, -1);
7043 INSN_ADDRESSES_NEW (barrier, -1);
7044 INSN_ADDRESSES_NEW (insn, -1);
7045
7046 s390_end_pool (curr_pool, barrier);
7047 curr_pool = NULL;
7048 extra_size = 0;
7049 }
7050 }
7051 }
7052
7053 if (curr_pool)
7054 s390_end_pool (curr_pool, NULL_RTX);
7055 gcc_assert (!pending_ltrel);
7056
7057 /* Find all labels that are branched into
7058 from an insn belonging to a different chunk. */
7059
7060 far_labels = BITMAP_ALLOC (NULL);
7061
7062 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7063 {
7064 rtx table;
7065
7066 /* Labels marked with LABEL_PRESERVE_P can be target
7067 of non-local jumps, so we have to mark them.
7068 The same holds for named labels.
7069
7070 Don't do that, however, if it is the label before
7071 a jump table. */
7072
7073 if (LABEL_P (insn)
7074 && (LABEL_PRESERVE_P (insn) || LABEL_NAME (insn)))
7075 {
7076 rtx vec_insn = NEXT_INSN (insn);
7077 if (! vec_insn || ! JUMP_TABLE_DATA_P (vec_insn))
7078 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (insn));
7079 }
7080 /* Check potential targets in a table jump (casesi_jump). */
7081 else if (tablejump_p (insn, NULL, &table))
7082 {
7083 rtx vec_pat = PATTERN (table);
7084 int i, diff_p = GET_CODE (vec_pat) == ADDR_DIFF_VEC;
7085
7086 for (i = 0; i < XVECLEN (vec_pat, diff_p); i++)
7087 {
7088 rtx label = XEXP (XVECEXP (vec_pat, diff_p, i), 0);
7089
7090 if (s390_find_pool (pool_list, label)
7091 != s390_find_pool (pool_list, insn))
7092 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
7093 }
7094 }
7095 /* If we have a direct jump (conditional or unconditional),
7096 check all potential targets. */
7097 else if (JUMP_P (insn))
7098 {
7099 rtx pat = PATTERN (insn);
7100
7101 if (GET_CODE (pat) == PARALLEL)
7102 pat = XVECEXP (pat, 0, 0);
7103
7104 if (GET_CODE (pat) == SET)
7105 {
7106 rtx label = JUMP_LABEL (insn);
7107 if (label)
7108 {
7109 if (s390_find_pool (pool_list, label)
7110 != s390_find_pool (pool_list, insn))
7111 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
7112 }
7113 }
7114 }
7115 }
7116
7117 /* Insert base register reload insns before every pool. */
7118
7119 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
7120 {
7121 rtx new_insn = gen_reload_base (cfun->machine->base_reg,
7122 curr_pool->label);
7123 rtx insn = curr_pool->first_insn;
7124 INSN_ADDRESSES_NEW (emit_insn_before (new_insn, insn), -1);
7125 }
7126
7127 /* Insert base register reload insns at every far label. */
7128
7129 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7130 if (LABEL_P (insn)
7131 && bitmap_bit_p (far_labels, CODE_LABEL_NUMBER (insn)))
7132 {
7133 struct constant_pool *pool = s390_find_pool (pool_list, insn);
7134 if (pool)
7135 {
7136 rtx new_insn = gen_reload_base (cfun->machine->base_reg,
7137 pool->label);
7138 INSN_ADDRESSES_NEW (emit_insn_after (new_insn, insn), -1);
7139 }
7140 }
7141
7142
7143 BITMAP_FREE (far_labels);
7144
7145
7146 /* Recompute insn addresses. */
7147
7148 init_insn_lengths ();
7149 shorten_branches (get_insns ());
7150
7151 return pool_list;
7152 }
7153
7154 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
7155 After we have decided to use this list, finish implementing
7156 all changes to the current function as required. */
7157
7158 static void
7159 s390_chunkify_finish (struct constant_pool *pool_list)
7160 {
7161 struct constant_pool *curr_pool = NULL;
7162 rtx insn;
7163
7164
7165 /* Replace all literal pool references. */
7166
7167 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
7168 {
7169 if (INSN_P (insn))
7170 replace_ltrel_base (&PATTERN (insn));
7171
7172 curr_pool = s390_find_pool (pool_list, insn);
7173 if (!curr_pool)
7174 continue;
7175
7176 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
7177 {
7178 rtx addr, pool_ref = NULL_RTX;
7179 find_constant_pool_ref (PATTERN (insn), &pool_ref);
7180 if (pool_ref)
7181 {
7182 if (s390_execute_label (insn))
7183 addr = s390_find_execute (curr_pool, insn);
7184 else
7185 addr = s390_find_constant (curr_pool,
7186 get_pool_constant (pool_ref),
7187 get_pool_mode (pool_ref));
7188
7189 replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr);
7190 INSN_CODE (insn) = -1;
7191 }
7192 }
7193 }
7194
7195 /* Dump out all literal pools. */
7196
7197 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
7198 s390_dump_pool (curr_pool, 0);
7199
7200 /* Free pool list. */
7201
7202 while (pool_list)
7203 {
7204 struct constant_pool *next = pool_list->next;
7205 s390_free_pool (pool_list);
7206 pool_list = next;
7207 }
7208 }
7209
7210 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
7211 We have decided we cannot use this list, so revert all changes
7212 to the current function that were done by s390_chunkify_start. */
7213
7214 static void
7215 s390_chunkify_cancel (struct constant_pool *pool_list)
7216 {
7217 struct constant_pool *curr_pool = NULL;
7218 rtx insn;
7219
7220 /* Remove all pool placeholder insns. */
7221
7222 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
7223 {
7224 /* Did we insert an extra barrier? Remove it. */
7225 rtx barrier = PREV_INSN (curr_pool->pool_insn);
7226 rtx jump = barrier? PREV_INSN (barrier) : NULL_RTX;
7227 rtx label = NEXT_INSN (curr_pool->pool_insn);
7228
7229 if (jump && JUMP_P (jump)
7230 && barrier && BARRIER_P (barrier)
7231 && label && LABEL_P (label)
7232 && GET_CODE (PATTERN (jump)) == SET
7233 && SET_DEST (PATTERN (jump)) == pc_rtx
7234 && GET_CODE (SET_SRC (PATTERN (jump))) == LABEL_REF
7235 && XEXP (SET_SRC (PATTERN (jump)), 0) == label)
7236 {
7237 remove_insn (jump);
7238 remove_insn (barrier);
7239 remove_insn (label);
7240 }
7241
7242 remove_insn (curr_pool->pool_insn);
7243 }
7244
7245 /* Remove all base register reload insns. */
7246
7247 for (insn = get_insns (); insn; )
7248 {
7249 rtx next_insn = NEXT_INSN (insn);
7250
7251 if (NONJUMP_INSN_P (insn)
7252 && GET_CODE (PATTERN (insn)) == SET
7253 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
7254 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_RELOAD_BASE)
7255 remove_insn (insn);
7256
7257 insn = next_insn;
7258 }
7259
7260 /* Free pool list. */
7261
7262 while (pool_list)
7263 {
7264 struct constant_pool *next = pool_list->next;
7265 s390_free_pool (pool_list);
7266 pool_list = next;
7267 }
7268 }
7269
7270 /* Output the constant pool entry EXP in mode MODE with alignment ALIGN. */
7271
7272 void
7273 s390_output_pool_entry (rtx exp, enum machine_mode mode, unsigned int align)
7274 {
7275 REAL_VALUE_TYPE r;
7276
7277 switch (GET_MODE_CLASS (mode))
7278 {
7279 case MODE_FLOAT:
7280 case MODE_DECIMAL_FLOAT:
7281 gcc_assert (GET_CODE (exp) == CONST_DOUBLE);
7282
7283 REAL_VALUE_FROM_CONST_DOUBLE (r, exp);
7284 assemble_real (r, mode, align);
7285 break;
7286
7287 case MODE_INT:
7288 assemble_integer (exp, GET_MODE_SIZE (mode), align, 1);
7289 mark_symbol_refs_as_used (exp);
7290 break;
7291
7292 default:
7293 gcc_unreachable ();
7294 }
7295 }
7296
7297
7298 /* Return an RTL expression representing the value of the return address
7299 for the frame COUNT steps up from the current frame. FRAME is the
7300 frame pointer of that frame. */
7301
7302 rtx
7303 s390_return_addr_rtx (int count, rtx frame ATTRIBUTE_UNUSED)
7304 {
7305 int offset;
7306 rtx addr;
7307
7308 /* Without backchain, we fail for all but the current frame. */
7309
7310 if (!TARGET_BACKCHAIN && count > 0)
7311 return NULL_RTX;
7312
7313 /* For the current frame, we need to make sure the initial
7314 value of RETURN_REGNUM is actually saved. */
7315
7316 if (count == 0)
7317 {
7318 /* On non-z architectures branch splitting could overwrite r14. */
7319 if (TARGET_CPU_ZARCH)
7320 return get_hard_reg_initial_val (Pmode, RETURN_REGNUM);
7321 else
7322 {
7323 cfun_frame_layout.save_return_addr_p = true;
7324 return gen_rtx_MEM (Pmode, return_address_pointer_rtx);
7325 }
7326 }
7327
7328 if (TARGET_PACKED_STACK)
7329 offset = -2 * UNITS_PER_LONG;
7330 else
7331 offset = RETURN_REGNUM * UNITS_PER_LONG;
7332
7333 addr = plus_constant (Pmode, frame, offset);
7334 addr = memory_address (Pmode, addr);
7335 return gen_rtx_MEM (Pmode, addr);
7336 }
7337
7338 /* Return an RTL expression representing the back chain stored in
7339 the current stack frame. */
7340
7341 rtx
7342 s390_back_chain_rtx (void)
7343 {
7344 rtx chain;
7345
7346 gcc_assert (TARGET_BACKCHAIN);
7347
7348 if (TARGET_PACKED_STACK)
7349 chain = plus_constant (Pmode, stack_pointer_rtx,
7350 STACK_POINTER_OFFSET - UNITS_PER_LONG);
7351 else
7352 chain = stack_pointer_rtx;
7353
7354 chain = gen_rtx_MEM (Pmode, chain);
7355 return chain;
7356 }
7357
7358 /* Find first call clobbered register unused in a function.
7359 This could be used as base register in a leaf function
7360 or for holding the return address before epilogue. */
7361
7362 static int
7363 find_unused_clobbered_reg (void)
7364 {
7365 int i;
7366 for (i = 0; i < 6; i++)
7367 if (!df_regs_ever_live_p (i))
7368 return i;
7369 return 0;
7370 }
7371
7372
7373 /* Helper function for s390_regs_ever_clobbered. Sets the fields in DATA for all
7374 clobbered hard regs in SETREG. */
7375
7376 static void
7377 s390_reg_clobbered_rtx (rtx setreg, const_rtx set_insn ATTRIBUTE_UNUSED, void *data)
7378 {
7379 char *regs_ever_clobbered = (char *)data;
7380 unsigned int i, regno;
7381 enum machine_mode mode = GET_MODE (setreg);
7382
7383 if (GET_CODE (setreg) == SUBREG)
7384 {
7385 rtx inner = SUBREG_REG (setreg);
7386 if (!GENERAL_REG_P (inner) && !FP_REG_P (inner))
7387 return;
7388 regno = subreg_regno (setreg);
7389 }
7390 else if (GENERAL_REG_P (setreg) || FP_REG_P (setreg))
7391 regno = REGNO (setreg);
7392 else
7393 return;
7394
7395 for (i = regno;
7396 i < regno + HARD_REGNO_NREGS (regno, mode);
7397 i++)
7398 regs_ever_clobbered[i] = 1;
7399 }
7400
7401 /* Walks through all basic blocks of the current function looking
7402 for clobbered hard regs using s390_reg_clobbered_rtx. The fields
7403 of the passed integer array REGS_EVER_CLOBBERED are set to one for
7404 each of those regs. */
7405
7406 static void
7407 s390_regs_ever_clobbered (char regs_ever_clobbered[])
7408 {
7409 basic_block cur_bb;
7410 rtx cur_insn;
7411 unsigned int i;
7412
7413 memset (regs_ever_clobbered, 0, 32);
7414
7415 /* For non-leaf functions we have to consider all call clobbered regs to be
7416 clobbered. */
7417 if (!crtl->is_leaf)
7418 {
7419 for (i = 0; i < 32; i++)
7420 regs_ever_clobbered[i] = call_really_used_regs[i];
7421 }
7422
7423 /* Make the "magic" eh_return registers live if necessary. For regs_ever_live
7424 this work is done by liveness analysis (mark_regs_live_at_end).
7425 Special care is needed for functions containing landing pads. Landing pads
7426 may use the eh registers, but the code which sets these registers is not
7427 contained in that function. Hence s390_regs_ever_clobbered is not able to
7428 deal with this automatically. */
7429 if (crtl->calls_eh_return || cfun->machine->has_landing_pad_p)
7430 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM ; i++)
7431 if (crtl->calls_eh_return
7432 || (cfun->machine->has_landing_pad_p
7433 && df_regs_ever_live_p (EH_RETURN_DATA_REGNO (i))))
7434 regs_ever_clobbered[EH_RETURN_DATA_REGNO (i)] = 1;
7435
7436 /* For nonlocal gotos all call-saved registers have to be saved.
7437 This flag is also set for the unwinding code in libgcc.
7438 See expand_builtin_unwind_init. For regs_ever_live this is done by
7439 reload. */
7440 if (crtl->saves_all_registers)
7441 for (i = 0; i < 32; i++)
7442 if (!call_really_used_regs[i])
7443 regs_ever_clobbered[i] = 1;
7444
7445 FOR_EACH_BB (cur_bb)
7446 {
7447 FOR_BB_INSNS (cur_bb, cur_insn)
7448 {
7449 rtx pat;
7450
7451 if (!INSN_P (cur_insn))
7452 continue;
7453
7454 pat = PATTERN (cur_insn);
7455
7456 /* Ignore GPR restore insns. */
7457 if (epilogue_completed && RTX_FRAME_RELATED_P (cur_insn))
7458 {
7459 if (GET_CODE (pat) == SET
7460 && GENERAL_REG_P (SET_DEST (pat)))
7461 {
7462 /* lgdr */
7463 if (GET_MODE (SET_SRC (pat)) == DImode
7464 && FP_REG_P (SET_SRC (pat)))
7465 continue;
7466
7467 /* l / lg */
7468 if (GET_CODE (SET_SRC (pat)) == MEM)
7469 continue;
7470 }
7471
7472 /* lm / lmg */
7473 if (GET_CODE (pat) == PARALLEL
7474 && load_multiple_operation (pat, VOIDmode))
7475 continue;
7476 }
7477
7478 note_stores (pat,
7479 s390_reg_clobbered_rtx,
7480 regs_ever_clobbered);
7481 }
7482 }
7483 }
7484
7485 /* Determine the frame area which actually has to be accessed
7486 in the function epilogue. The values are stored at the
7487 given pointers AREA_BOTTOM (address of the lowest used stack
7488 address) and AREA_TOP (address of the first item which does
7489 not belong to the stack frame). */
7490
7491 static void
7492 s390_frame_area (int *area_bottom, int *area_top)
7493 {
7494 int b, t;
7495
7496 b = INT_MAX;
7497 t = INT_MIN;
7498
7499 if (cfun_frame_layout.first_restore_gpr != -1)
7500 {
7501 b = (cfun_frame_layout.gprs_offset
7502 + cfun_frame_layout.first_restore_gpr * UNITS_PER_LONG);
7503 t = b + (cfun_frame_layout.last_restore_gpr
7504 - cfun_frame_layout.first_restore_gpr + 1) * UNITS_PER_LONG;
7505 }
7506
7507 if (TARGET_64BIT && cfun_save_high_fprs_p)
7508 {
7509 b = MIN (b, cfun_frame_layout.f8_offset);
7510 t = MAX (t, (cfun_frame_layout.f8_offset
7511 + cfun_frame_layout.high_fprs * 8));
7512 }
7513
7514 if (!TARGET_64BIT)
7515 {
7516 if (cfun_fpr_save_p (FPR4_REGNUM))
7517 {
7518 b = MIN (b, cfun_frame_layout.f4_offset);
7519 t = MAX (t, cfun_frame_layout.f4_offset + 8);
7520 }
7521 if (cfun_fpr_save_p (FPR6_REGNUM))
7522 {
7523 b = MIN (b, cfun_frame_layout.f4_offset + 8);
7524 t = MAX (t, cfun_frame_layout.f4_offset + 16);
7525 }
7526 }
7527 *area_bottom = b;
7528 *area_top = t;
7529 }
7530 /* Update gpr_save_slots in the frame layout trying to make use of
7531 FPRs as GPR save slots.
7532 This is a helper routine of s390_register_info. */
7533
7534 static void
7535 s390_register_info_gprtofpr ()
7536 {
7537 int save_reg_slot = FPR0_REGNUM;
7538 int i, j;
7539
7540 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
7541 return;
7542
7543 for (i = 15; i >= 6; i--)
7544 {
7545 if (cfun_gpr_save_slot (i) == 0)
7546 continue;
7547
7548 /* Advance to the next FP register which can be used as a
7549 GPR save slot. */
7550 while ((!call_really_used_regs[save_reg_slot]
7551 || df_regs_ever_live_p (save_reg_slot)
7552 || cfun_fpr_save_p (save_reg_slot))
7553 && FP_REGNO_P (save_reg_slot))
7554 save_reg_slot++;
7555 if (!FP_REGNO_P (save_reg_slot))
7556 {
7557 /* We only want to use ldgr/lgdr if we can get rid of
7558 stm/lm entirely. So undo the gpr slot allocation in
7559 case we ran out of FPR save slots. */
7560 for (j = 6; j <= 15; j++)
7561 if (FP_REGNO_P (cfun_gpr_save_slot (j)))
7562 cfun_gpr_save_slot (j) = -1;
7563 break;
7564 }
7565 cfun_gpr_save_slot (i) = save_reg_slot++;
7566 }
7567 }
7568
7569 /* Set the bits in fpr_bitmap for FPRs which need to be saved due to
7570 stdarg.
7571 This is a helper routine for s390_register_info. */
7572
7573 static void
7574 s390_register_info_stdarg_fpr ()
7575 {
7576 int i;
7577 int min_fpr;
7578 int max_fpr;
7579
7580 /* Save the FP argument regs for stdarg. f0, f2 for 31 bit and
7581 f0-f4 for 64 bit. */
7582 if (!cfun->stdarg
7583 || !TARGET_HARD_FLOAT
7584 || !cfun->va_list_fpr_size
7585 || crtl->args.info.fprs >= FP_ARG_NUM_REG)
7586 return;
7587
7588 min_fpr = crtl->args.info.fprs;
7589 max_fpr = min_fpr + cfun->va_list_fpr_size;
7590 if (max_fpr > FP_ARG_NUM_REG)
7591 max_fpr = FP_ARG_NUM_REG;
7592
7593 for (i = min_fpr; i < max_fpr; i++)
7594 cfun_set_fpr_save (i + FPR0_REGNUM);
7595 }
7596
7597 /* Reserve the GPR save slots for GPRs which need to be saved due to
7598 stdarg.
7599 This is a helper routine for s390_register_info. */
7600
7601 static void
7602 s390_register_info_stdarg_gpr ()
7603 {
7604 int i;
7605 int min_gpr;
7606 int max_gpr;
7607
7608 if (!cfun->stdarg
7609 || !cfun->va_list_gpr_size
7610 || crtl->args.info.gprs >= GP_ARG_NUM_REG)
7611 return;
7612
7613 min_gpr = crtl->args.info.gprs;
7614 max_gpr = min_gpr + cfun->va_list_gpr_size;
7615 if (max_gpr > GP_ARG_NUM_REG)
7616 max_gpr = GP_ARG_NUM_REG;
7617
7618 for (i = min_gpr; i < max_gpr; i++)
7619 cfun_gpr_save_slot (2 + i) = -1;
7620 }
7621
7622 /* The GPR and FPR save slots in cfun->machine->frame_layout are set
7623 for registers which need to be saved in function prologue.
7624 This function can be used until the insns emitted for save/restore
7625 of the regs are visible in the RTL stream. */
7626
7627 static void
7628 s390_register_info ()
7629 {
7630 int i, j;
7631 char clobbered_regs[32];
7632
7633 gcc_assert (!epilogue_completed);
7634
7635 if (reload_completed)
7636 /* After reload we rely on our own routine to determine which
7637 registers need saving. */
7638 s390_regs_ever_clobbered (clobbered_regs);
7639 else
7640 /* During reload we use regs_ever_live as a base since reload
7641 does changes in there which we otherwise would not be aware
7642 of. */
7643 for (i = 0; i < 32; i++)
7644 clobbered_regs[i] = df_regs_ever_live_p (i);
7645
7646 for (i = 0; i < 32; i++)
7647 clobbered_regs[i] = clobbered_regs[i] && !global_regs[i];
7648
7649 /* Mark the call-saved FPRs which need to be saved.
7650 This needs to be done before checking the special GPRs since the
7651 stack pointer usage depends on whether high FPRs have to be saved
7652 or not. */
7653 cfun_frame_layout.fpr_bitmap = 0;
7654 cfun_frame_layout.high_fprs = 0;
7655 for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
7656 if (clobbered_regs[i] && !call_really_used_regs[i])
7657 {
7658 cfun_set_fpr_save (i);
7659 if (i >= FPR8_REGNUM)
7660 cfun_frame_layout.high_fprs++;
7661 }
7662
7663 if (flag_pic)
7664 clobbered_regs[PIC_OFFSET_TABLE_REGNUM]
7665 |= !!df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
7666
7667 clobbered_regs[BASE_REGNUM]
7668 |= (cfun->machine->base_reg
7669 && REGNO (cfun->machine->base_reg) == BASE_REGNUM);
7670
7671 clobbered_regs[HARD_FRAME_POINTER_REGNUM]
7672 |= !!frame_pointer_needed;
7673
7674 /* On pre z900 machines this might take until machine dependent
7675 reorg to decide.
7676 save_return_addr_p will only be set on non-zarch machines so
7677 there is no risk that r14 goes into an FPR instead of a stack
7678 slot. */
7679 clobbered_regs[RETURN_REGNUM]
7680 |= (!crtl->is_leaf
7681 || TARGET_TPF_PROFILING
7682 || cfun->machine->split_branches_pending_p
7683 || cfun_frame_layout.save_return_addr_p
7684 || crtl->calls_eh_return);
7685
7686 clobbered_regs[STACK_POINTER_REGNUM]
7687 |= (!crtl->is_leaf
7688 || TARGET_TPF_PROFILING
7689 || cfun_save_high_fprs_p
7690 || get_frame_size () > 0
7691 || (reload_completed && cfun_frame_layout.frame_size > 0)
7692 || cfun->calls_alloca);
7693
7694 memset (cfun_frame_layout.gpr_save_slots, 0, 16);
7695
7696 for (i = 6; i < 16; i++)
7697 if (clobbered_regs[i])
7698 cfun_gpr_save_slot (i) = -1;
7699
7700 s390_register_info_stdarg_fpr ();
7701 s390_register_info_gprtofpr ();
7702
7703 /* First find the range of GPRs to be restored. Vararg regs don't
7704 need to be restored so we do it before assigning slots to the
7705 vararg GPRs. */
7706 for (i = 0; i < 16 && cfun_gpr_save_slot (i) != -1; i++);
7707 for (j = 15; j > i && cfun_gpr_save_slot (j) != -1; j--);
7708 cfun_frame_layout.first_restore_gpr = (i == 16) ? -1 : i;
7709 cfun_frame_layout.last_restore_gpr = (i == 16) ? -1 : j;
7710
7711 /* stdarg functions might need to save GPRs 2 to 6. This might
7712 override the GPR->FPR save decision made above for r6 since
7713 vararg regs must go to the stack. */
7714 s390_register_info_stdarg_gpr ();
7715
7716 /* Now the range of GPRs which need saving. */
7717 for (i = 0; i < 16 && cfun_gpr_save_slot (i) != -1; i++);
7718 for (j = 15; j > i && cfun_gpr_save_slot (j) != -1; j--);
7719 cfun_frame_layout.first_save_gpr = (i == 16) ? -1 : i;
7720 cfun_frame_layout.last_save_gpr = (i == 16) ? -1 : j;
7721 }
7722
7723 /* This function is called by s390_optimize_prologue in order to get
7724 rid of unnecessary GPR save/restore instructions. The register info
7725 for the GPRs is re-computed and the ranges are re-calculated. */
7726
7727 static void
7728 s390_optimize_register_info ()
7729 {
7730 char clobbered_regs[32];
7731 int i, j;
7732
7733 gcc_assert (epilogue_completed);
7734 gcc_assert (!cfun->machine->split_branches_pending_p);
7735
7736 s390_regs_ever_clobbered (clobbered_regs);
7737
7738 for (i = 0; i < 32; i++)
7739 clobbered_regs[i] = clobbered_regs[i] && !global_regs[i];
7740
7741 /* There is still special treatment needed for cases invisible to
7742 s390_regs_ever_clobbered. */
7743 clobbered_regs[RETURN_REGNUM]
7744 |= (TARGET_TPF_PROFILING
7745 /* When expanding builtin_return_addr in ESA mode we do not
7746 know whether r14 will later be needed as scratch reg when
7747 doing branch splitting. So the builtin always accesses the
7748 r14 save slot and we need to stick to the save/restore
7749 decision for r14 even if it turns out that it didn't get
7750 clobbered. */
7751 || cfun_frame_layout.save_return_addr_p
7752 || crtl->calls_eh_return);
7753
7754 memset (cfun_frame_layout.gpr_save_slots, 0, 6);
7755
7756 for (i = 6; i < 16; i++)
7757 if (!clobbered_regs[i])
7758 cfun_gpr_save_slot (i) = 0;
7759
7760 for (i = 0; i < 16 && cfun_gpr_save_slot (i) != -1; i++);
7761 for (j = 15; j > i && cfun_gpr_save_slot (j) != -1; j--);
7762 cfun_frame_layout.first_restore_gpr = (i == 16) ? -1 : i;
7763 cfun_frame_layout.last_restore_gpr = (i == 16) ? -1 : j;
7764
7765 s390_register_info_stdarg_gpr ();
7766
7767 for (i = 0; i < 16 && cfun_gpr_save_slot (i) != -1; i++);
7768 for (j = 15; j > i && cfun_gpr_save_slot (j) != -1; j--);
7769 cfun_frame_layout.first_save_gpr = (i == 16) ? -1 : i;
7770 cfun_frame_layout.last_save_gpr = (i == 16) ? -1 : j;
7771 }
7772
7773 /* Fill cfun->machine with info about frame of current function. */
7774
7775 static void
7776 s390_frame_info (void)
7777 {
7778 HOST_WIDE_INT lowest_offset;
7779
7780 cfun_frame_layout.first_save_gpr_slot = cfun_frame_layout.first_save_gpr;
7781 cfun_frame_layout.last_save_gpr_slot = cfun_frame_layout.last_save_gpr;
7782
7783 /* The va_arg builtin uses a constant distance of 16 *
7784 UNITS_PER_LONG (r0-r15) to reach the FPRs from the reg_save_area
7785 pointer. So even if we are going to save the stack pointer in an
7786 FPR we need the stack space in order to keep the offsets
7787 correct. */
7788 if (cfun->stdarg && cfun_save_arg_fprs_p)
7789 {
7790 cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
7791
7792 if (cfun_frame_layout.first_save_gpr_slot == -1)
7793 cfun_frame_layout.first_save_gpr_slot = STACK_POINTER_REGNUM;
7794 }
7795
7796 cfun_frame_layout.frame_size = get_frame_size ();
7797 if (!TARGET_64BIT && cfun_frame_layout.frame_size > 0x7fff0000)
7798 fatal_error ("total size of local variables exceeds architecture limit");
7799
7800 if (!TARGET_PACKED_STACK)
7801 {
7802 /* Fixed stack layout. */
7803 cfun_frame_layout.backchain_offset = 0;
7804 cfun_frame_layout.f0_offset = 16 * UNITS_PER_LONG;
7805 cfun_frame_layout.f4_offset = cfun_frame_layout.f0_offset + 2 * 8;
7806 cfun_frame_layout.f8_offset = -cfun_frame_layout.high_fprs * 8;
7807 cfun_frame_layout.gprs_offset = (cfun_frame_layout.first_save_gpr_slot
7808 * UNITS_PER_LONG);
7809 }
7810 else if (TARGET_BACKCHAIN)
7811 {
7812 /* Kernel stack layout - packed stack, backchain, no float */
7813 gcc_assert (TARGET_SOFT_FLOAT);
7814 cfun_frame_layout.backchain_offset = (STACK_POINTER_OFFSET
7815 - UNITS_PER_LONG);
7816
7817 /* The distance between the backchain and the return address
7818 save slot must not change. So we always need a slot for the
7819 stack pointer which resides in between. */
7820 cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
7821
7822 cfun_frame_layout.gprs_offset
7823 = cfun_frame_layout.backchain_offset - cfun_gprs_save_area_size;
7824
7825 /* FPRs will not be saved. Nevertheless pick sane values to
7826 keep area calculations valid. */
7827 cfun_frame_layout.f0_offset =
7828 cfun_frame_layout.f4_offset =
7829 cfun_frame_layout.f8_offset = cfun_frame_layout.gprs_offset;
7830 }
7831 else
7832 {
7833 int num_fprs;
7834
7835 /* Packed stack layout without backchain. */
7836
7837 /* With stdarg FPRs need their dedicated slots. */
7838 num_fprs = (TARGET_64BIT && cfun->stdarg ? 2
7839 : (cfun_fpr_save_p (FPR4_REGNUM) +
7840 cfun_fpr_save_p (FPR6_REGNUM)));
7841 cfun_frame_layout.f4_offset = STACK_POINTER_OFFSET - 8 * num_fprs;
7842
7843 num_fprs = (cfun->stdarg ? 2
7844 : (cfun_fpr_save_p (FPR0_REGNUM)
7845 + cfun_fpr_save_p (FPR2_REGNUM)));
7846 cfun_frame_layout.f0_offset = cfun_frame_layout.f4_offset - 8 * num_fprs;
7847
7848 cfun_frame_layout.gprs_offset
7849 = cfun_frame_layout.f0_offset - cfun_gprs_save_area_size;
7850
7851 cfun_frame_layout.f8_offset = (cfun_frame_layout.gprs_offset
7852 - cfun_frame_layout.high_fprs * 8);
7853 }
7854
7855 if (cfun_save_high_fprs_p)
7856 cfun_frame_layout.frame_size += cfun_frame_layout.high_fprs * 8;
7857
7858 if (!crtl->is_leaf)
7859 cfun_frame_layout.frame_size += crtl->outgoing_args_size;
7860
7861 /* In the following cases we have to allocate a STACK_POINTER_OFFSET
7862 sized area at the bottom of the stack. This is required also for
7863 leaf functions. When GCC generates a local stack reference it
7864 will always add STACK_POINTER_OFFSET to all these references. */
7865 if (crtl->is_leaf
7866 && !TARGET_TPF_PROFILING
7867 && cfun_frame_layout.frame_size == 0
7868 && !cfun->calls_alloca)
7869 return;
7870
7871 /* Calculate the number of bytes we have used in our own register
7872 save area. With the packed stack layout we can re-use the
7873 remaining bytes for normal stack elements. */
7874
7875 if (TARGET_PACKED_STACK)
7876 lowest_offset = MIN (MIN (cfun_frame_layout.f0_offset,
7877 cfun_frame_layout.f4_offset),
7878 cfun_frame_layout.gprs_offset);
7879 else
7880 lowest_offset = 0;
7881
7882 if (TARGET_BACKCHAIN)
7883 lowest_offset = MIN (lowest_offset, cfun_frame_layout.backchain_offset);
7884
7885 cfun_frame_layout.frame_size += STACK_POINTER_OFFSET - lowest_offset;
7886
7887 /* If under 31 bit an odd number of gprs has to be saved we have to
7888 adjust the frame size to sustain 8 byte alignment of stack
7889 frames. */
7890 cfun_frame_layout.frame_size = ((cfun_frame_layout.frame_size +
7891 STACK_BOUNDARY / BITS_PER_UNIT - 1)
7892 & ~(STACK_BOUNDARY / BITS_PER_UNIT - 1));
7893 }
7894
7895 /* Generate frame layout. Fills in register and frame data for the current
7896 function in cfun->machine. This routine can be called multiple times;
7897 it will re-do the complete frame layout every time. */
7898
7899 static void
7900 s390_init_frame_layout (void)
7901 {
7902 HOST_WIDE_INT frame_size;
7903 int base_used;
7904
7905 gcc_assert (!reload_completed);
7906
7907 /* On S/390 machines, we may need to perform branch splitting, which
7908 will require both base and return address register. We have no
7909 choice but to assume we're going to need them until right at the
7910 end of the machine dependent reorg phase. */
7911 if (!TARGET_CPU_ZARCH)
7912 cfun->machine->split_branches_pending_p = true;
7913
7914 do
7915 {
7916 frame_size = cfun_frame_layout.frame_size;
7917
7918 /* Try to predict whether we'll need the base register. */
7919 base_used = cfun->machine->split_branches_pending_p
7920 || crtl->uses_const_pool
7921 || (!DISP_IN_RANGE (frame_size)
7922 && !CONST_OK_FOR_K (frame_size));
7923
7924 /* Decide which register to use as literal pool base. In small
7925 leaf functions, try to use an unused call-clobbered register
7926 as base register to avoid save/restore overhead. */
7927 if (!base_used)
7928 cfun->machine->base_reg = NULL_RTX;
7929 else if (crtl->is_leaf && !df_regs_ever_live_p (5))
7930 cfun->machine->base_reg = gen_rtx_REG (Pmode, 5);
7931 else
7932 cfun->machine->base_reg = gen_rtx_REG (Pmode, BASE_REGNUM);
7933
7934 s390_register_info ();
7935 s390_frame_info ();
7936 }
7937 while (frame_size != cfun_frame_layout.frame_size);
7938 }
7939
7940 /* Remove the FPR clobbers from a tbegin insn if it can be proven that
7941 the TX is nonescaping. A transaction is considered escaping if
7942 there is at least one path from tbegin returning CC0 to the
7943 function exit block without an tend.
7944
7945 The check so far has some limitations:
7946 - only single tbegin/tend BBs are supported
7947 - the first cond jump after tbegin must separate the CC0 path from ~CC0
7948 - when CC is copied to a GPR and the CC0 check is done with the GPR
7949 this is not supported
7950 */
7951
7952 static void
7953 s390_optimize_nonescaping_tx (void)
7954 {
7955 const unsigned int CC0 = 1 << 3;
7956 basic_block tbegin_bb = NULL;
7957 basic_block tend_bb = NULL;
7958 basic_block bb;
7959 rtx insn;
7960 bool result = true;
7961 int bb_index;
7962 rtx tbegin_insn = NULL_RTX;
7963
7964 if (!cfun->machine->tbegin_p)
7965 return;
7966
7967 for (bb_index = 0; bb_index < n_basic_blocks; bb_index++)
7968 {
7969 bb = BASIC_BLOCK (bb_index);
7970
7971 FOR_BB_INSNS (bb, insn)
7972 {
7973 rtx ite, cc, pat, target;
7974 unsigned HOST_WIDE_INT mask;
7975
7976 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
7977 continue;
7978
7979 pat = PATTERN (insn);
7980
7981 if (GET_CODE (pat) == PARALLEL)
7982 pat = XVECEXP (pat, 0, 0);
7983
7984 if (GET_CODE (pat) != SET
7985 || GET_CODE (SET_SRC (pat)) != UNSPEC_VOLATILE)
7986 continue;
7987
7988 if (XINT (SET_SRC (pat), 1) == UNSPECV_TBEGIN)
7989 {
7990 rtx tmp;
7991
7992 tbegin_insn = insn;
7993
7994 /* Just return if the tbegin doesn't have clobbers. */
7995 if (GET_CODE (PATTERN (insn)) != PARALLEL)
7996 return;
7997
7998 if (tbegin_bb != NULL)
7999 return;
8000
8001 /* Find the next conditional jump. */
8002 for (tmp = NEXT_INSN (insn);
8003 tmp != NULL_RTX;
8004 tmp = NEXT_INSN (tmp))
8005 {
8006 if (reg_set_p (gen_rtx_REG (CCmode, CC_REGNUM), tmp))
8007 return;
8008 if (!JUMP_P (tmp))
8009 continue;
8010
8011 ite = SET_SRC (PATTERN (tmp));
8012 if (GET_CODE (ite) != IF_THEN_ELSE)
8013 continue;
8014
8015 cc = XEXP (XEXP (ite, 0), 0);
8016 if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc))
8017 || GET_MODE (cc) != CCRAWmode
8018 || GET_CODE (XEXP (XEXP (ite, 0), 1)) != CONST_INT)
8019 return;
8020
8021 if (bb->succs->length () != 2)
8022 return;
8023
8024 mask = INTVAL (XEXP (XEXP (ite, 0), 1));
8025 if (GET_CODE (XEXP (ite, 0)) == NE)
8026 mask ^= 0xf;
8027
8028 if (mask == CC0)
8029 target = XEXP (ite, 1);
8030 else if (mask == (CC0 ^ 0xf))
8031 target = XEXP (ite, 2);
8032 else
8033 return;
8034
8035 {
8036 edge_iterator ei;
8037 edge e1, e2;
8038
8039 ei = ei_start (bb->succs);
8040 e1 = ei_safe_edge (ei);
8041 ei_next (&ei);
8042 e2 = ei_safe_edge (ei);
8043
8044 if (e2->flags & EDGE_FALLTHRU)
8045 {
8046 e2 = e1;
8047 e1 = ei_safe_edge (ei);
8048 }
8049
8050 if (!(e1->flags & EDGE_FALLTHRU))
8051 return;
8052
8053 tbegin_bb = (target == pc_rtx) ? e1->dest : e2->dest;
8054 }
8055 if (tmp == BB_END (bb))
8056 break;
8057 }
8058 }
8059
8060 if (XINT (SET_SRC (pat), 1) == UNSPECV_TEND)
8061 {
8062 if (tend_bb != NULL)
8063 return;
8064 tend_bb = bb;
8065 }
8066 }
8067 }
8068
8069 /* Either we successfully remove the FPR clobbers here or we are not
8070 able to do anything for this TX. Both cases don't qualify for
8071 another look. */
8072 cfun->machine->tbegin_p = false;
8073
8074 if (tbegin_bb == NULL || tend_bb == NULL)
8075 return;
8076
8077 calculate_dominance_info (CDI_POST_DOMINATORS);
8078 result = dominated_by_p (CDI_POST_DOMINATORS, tbegin_bb, tend_bb);
8079 free_dominance_info (CDI_POST_DOMINATORS);
8080
8081 if (!result)
8082 return;
8083
8084 PATTERN (tbegin_insn) = XVECEXP (PATTERN (tbegin_insn), 0, 0);
8085 INSN_CODE (tbegin_insn) = -1;
8086 df_insn_rescan (tbegin_insn);
8087
8088 return;
8089 }
8090
8091 /* Return true if it is legal to put a value with MODE into REGNO. */
8092
8093 bool
8094 s390_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
8095 {
8096 switch (REGNO_REG_CLASS (regno))
8097 {
8098 case FP_REGS:
8099 if (REGNO_PAIR_OK (regno, mode))
8100 {
8101 if (mode == SImode || mode == DImode)
8102 return true;
8103
8104 if (FLOAT_MODE_P (mode) && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
8105 return true;
8106 }
8107 break;
8108 case ADDR_REGS:
8109 if (FRAME_REGNO_P (regno) && mode == Pmode)
8110 return true;
8111
8112 /* fallthrough */
8113 case GENERAL_REGS:
8114 if (REGNO_PAIR_OK (regno, mode))
8115 {
8116 if (TARGET_ZARCH
8117 || (mode != TFmode && mode != TCmode && mode != TDmode))
8118 return true;
8119 }
8120 break;
8121 case CC_REGS:
8122 if (GET_MODE_CLASS (mode) == MODE_CC)
8123 return true;
8124 break;
8125 case ACCESS_REGS:
8126 if (REGNO_PAIR_OK (regno, mode))
8127 {
8128 if (mode == SImode || mode == Pmode)
8129 return true;
8130 }
8131 break;
8132 default:
8133 return false;
8134 }
8135
8136 return false;
8137 }
8138
8139 /* Return nonzero if register OLD_REG can be renamed to register NEW_REG. */
8140
8141 bool
8142 s390_hard_regno_rename_ok (unsigned int old_reg, unsigned int new_reg)
8143 {
8144 /* Once we've decided upon a register to use as base register, it must
8145 no longer be used for any other purpose. */
8146 if (cfun->machine->base_reg)
8147 if (REGNO (cfun->machine->base_reg) == old_reg
8148 || REGNO (cfun->machine->base_reg) == new_reg)
8149 return false;
8150
8151 /* Prevent regrename from using call-saved regs which haven't
8152 actually been saved. This is necessary since regrename assumes
8153 the backend save/restore decisions are based on
8154 df_regs_ever_live. Since we have our own routine we have to tell
8155 regrename manually about it. */
8156 if (GENERAL_REGNO_P (new_reg)
8157 && !call_really_used_regs[new_reg]
8158 && cfun_gpr_save_slot (new_reg) == 0)
8159 return false;
8160
8161 return true;
8162 }
8163
8164 /* Return nonzero if register REGNO can be used as a scratch register
8165 in peephole2. */
8166
8167 static bool
8168 s390_hard_regno_scratch_ok (unsigned int regno)
8169 {
8170 /* See s390_hard_regno_rename_ok. */
8171 if (GENERAL_REGNO_P (regno)
8172 && !call_really_used_regs[regno]
8173 && cfun_gpr_save_slot (regno) == 0)
8174 return false;
8175
8176 return true;
8177 }
8178
8179 /* Maximum number of registers to represent a value of mode MODE
8180 in a register of class RCLASS. */
8181
8182 int
8183 s390_class_max_nregs (enum reg_class rclass, enum machine_mode mode)
8184 {
8185 switch (rclass)
8186 {
8187 case FP_REGS:
8188 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
8189 return 2 * ((GET_MODE_SIZE (mode) / 2 + 8 - 1) / 8);
8190 else
8191 return (GET_MODE_SIZE (mode) + 8 - 1) / 8;
8192 case ACCESS_REGS:
8193 return (GET_MODE_SIZE (mode) + 4 - 1) / 4;
8194 default:
8195 break;
8196 }
8197 return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
8198 }
8199
8200 /* Return true if we use LRA instead of reload pass. */
8201 static bool
8202 s390_lra_p (void)
8203 {
8204 return s390_lra_flag;
8205 }
8206
8207 /* Return true if register FROM can be eliminated via register TO. */
8208
8209 static bool
8210 s390_can_eliminate (const int from, const int to)
8211 {
8212 /* On zSeries machines, we have not marked the base register as fixed.
8213 Instead, we have an elimination rule BASE_REGNUM -> BASE_REGNUM.
8214 If a function requires the base register, we say here that this
8215 elimination cannot be performed. This will cause reload to free
8216 up the base register (as if it were fixed). On the other hand,
8217 if the current function does *not* require the base register, we
8218 say here the elimination succeeds, which in turn allows reload
8219 to allocate the base register for any other purpose. */
8220 if (from == BASE_REGNUM && to == BASE_REGNUM)
8221 {
8222 if (TARGET_CPU_ZARCH)
8223 {
8224 s390_init_frame_layout ();
8225 return cfun->machine->base_reg == NULL_RTX;
8226 }
8227
8228 return false;
8229 }
8230
8231 /* Everything else must point into the stack frame. */
8232 gcc_assert (to == STACK_POINTER_REGNUM
8233 || to == HARD_FRAME_POINTER_REGNUM);
8234
8235 gcc_assert (from == FRAME_POINTER_REGNUM
8236 || from == ARG_POINTER_REGNUM
8237 || from == RETURN_ADDRESS_POINTER_REGNUM);
8238
8239 /* Make sure we actually saved the return address. */
8240 if (from == RETURN_ADDRESS_POINTER_REGNUM)
8241 if (!crtl->calls_eh_return
8242 && !cfun->stdarg
8243 && !cfun_frame_layout.save_return_addr_p)
8244 return false;
8245
8246 return true;
8247 }
8248
8249 /* Return offset between register FROM and TO initially after prolog. */
8250
8251 HOST_WIDE_INT
8252 s390_initial_elimination_offset (int from, int to)
8253 {
8254 HOST_WIDE_INT offset;
8255
8256 /* ??? Why are we called for non-eliminable pairs? */
8257 if (!s390_can_eliminate (from, to))
8258 return 0;
8259
8260 switch (from)
8261 {
8262 case FRAME_POINTER_REGNUM:
8263 offset = (get_frame_size()
8264 + STACK_POINTER_OFFSET
8265 + crtl->outgoing_args_size);
8266 break;
8267
8268 case ARG_POINTER_REGNUM:
8269 s390_init_frame_layout ();
8270 offset = cfun_frame_layout.frame_size + STACK_POINTER_OFFSET;
8271 break;
8272
8273 case RETURN_ADDRESS_POINTER_REGNUM:
8274 s390_init_frame_layout ();
8275
8276 if (cfun_frame_layout.first_save_gpr_slot == -1)
8277 {
8278 /* If it turns out that for stdarg nothing went into the reg
8279 save area we also do not need the return address
8280 pointer. */
8281 if (cfun->stdarg && !cfun_save_arg_fprs_p)
8282 return 0;
8283
8284 gcc_unreachable ();
8285 }
8286
8287 /* In order to make the following work it is not necessary for
8288 r14 to have a save slot. It is sufficient if one other GPR
8289 got one. Since the GPRs are always stored without gaps we
8290 are able to calculate where the r14 save slot would
8291 reside. */
8292 offset = (cfun_frame_layout.frame_size + cfun_frame_layout.gprs_offset +
8293 (RETURN_REGNUM - cfun_frame_layout.first_save_gpr_slot) *
8294 UNITS_PER_LONG);
8295 break;
8296
8297 case BASE_REGNUM:
8298 offset = 0;
8299 break;
8300
8301 default:
8302 gcc_unreachable ();
8303 }
8304
8305 return offset;
8306 }
8307
8308 /* Emit insn to save fpr REGNUM at offset OFFSET relative
8309 to register BASE. Return generated insn. */
8310
8311 static rtx
8312 save_fpr (rtx base, int offset, int regnum)
8313 {
8314 rtx addr;
8315 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
8316
8317 if (regnum >= 16 && regnum <= (16 + FP_ARG_NUM_REG))
8318 set_mem_alias_set (addr, get_varargs_alias_set ());
8319 else
8320 set_mem_alias_set (addr, get_frame_alias_set ());
8321
8322 return emit_move_insn (addr, gen_rtx_REG (DFmode, regnum));
8323 }
8324
8325 /* Emit insn to restore fpr REGNUM from offset OFFSET relative
8326 to register BASE. Return generated insn. */
8327
8328 static rtx
8329 restore_fpr (rtx base, int offset, int regnum)
8330 {
8331 rtx addr;
8332 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
8333 set_mem_alias_set (addr, get_frame_alias_set ());
8334
8335 return emit_move_insn (gen_rtx_REG (DFmode, regnum), addr);
8336 }
8337
8338 /* Return true if REGNO is a global register, but not one
8339 of the special ones that need to be saved/restored in anyway. */
8340
8341 static inline bool
8342 global_not_special_regno_p (int regno)
8343 {
8344 return (global_regs[regno]
8345 /* These registers are special and need to be
8346 restored in any case. */
8347 && !(regno == STACK_POINTER_REGNUM
8348 || regno == RETURN_REGNUM
8349 || regno == BASE_REGNUM
8350 || (flag_pic && regno == (int)PIC_OFFSET_TABLE_REGNUM)));
8351 }
8352
8353 /* Generate insn to save registers FIRST to LAST into
8354 the register save area located at offset OFFSET
8355 relative to register BASE. */
8356
8357 static rtx
8358 save_gprs (rtx base, int offset, int first, int last)
8359 {
8360 rtx addr, insn, note;
8361 int i;
8362
8363 addr = plus_constant (Pmode, base, offset);
8364 addr = gen_rtx_MEM (Pmode, addr);
8365
8366 set_mem_alias_set (addr, get_frame_alias_set ());
8367
8368 /* Special-case single register. */
8369 if (first == last)
8370 {
8371 if (TARGET_64BIT)
8372 insn = gen_movdi (addr, gen_rtx_REG (Pmode, first));
8373 else
8374 insn = gen_movsi (addr, gen_rtx_REG (Pmode, first));
8375
8376 if (!global_not_special_regno_p (first))
8377 RTX_FRAME_RELATED_P (insn) = 1;
8378 return insn;
8379 }
8380
8381
8382 insn = gen_store_multiple (addr,
8383 gen_rtx_REG (Pmode, first),
8384 GEN_INT (last - first + 1));
8385
8386 if (first <= 6 && cfun->stdarg)
8387 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
8388 {
8389 rtx mem = XEXP (XVECEXP (PATTERN (insn), 0, i), 0);
8390
8391 if (first + i <= 6)
8392 set_mem_alias_set (mem, get_varargs_alias_set ());
8393 }
8394
8395 /* We need to set the FRAME_RELATED flag on all SETs
8396 inside the store-multiple pattern.
8397
8398 However, we must not emit DWARF records for registers 2..5
8399 if they are stored for use by variable arguments ...
8400
8401 ??? Unfortunately, it is not enough to simply not the
8402 FRAME_RELATED flags for those SETs, because the first SET
8403 of the PARALLEL is always treated as if it had the flag
8404 set, even if it does not. Therefore we emit a new pattern
8405 without those registers as REG_FRAME_RELATED_EXPR note. */
8406
8407 if (first >= 6 && !global_not_special_regno_p (first))
8408 {
8409 rtx pat = PATTERN (insn);
8410
8411 for (i = 0; i < XVECLEN (pat, 0); i++)
8412 if (GET_CODE (XVECEXP (pat, 0, i)) == SET
8413 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (pat,
8414 0, i)))))
8415 RTX_FRAME_RELATED_P (XVECEXP (pat, 0, i)) = 1;
8416
8417 RTX_FRAME_RELATED_P (insn) = 1;
8418 }
8419 else if (last >= 6)
8420 {
8421 int start;
8422
8423 for (start = first >= 6 ? first : 6; start <= last; start++)
8424 if (!global_not_special_regno_p (start))
8425 break;
8426
8427 if (start > last)
8428 return insn;
8429
8430 addr = plus_constant (Pmode, base,
8431 offset + (start - first) * UNITS_PER_LONG);
8432
8433 if (start == last)
8434 {
8435 if (TARGET_64BIT)
8436 note = gen_movdi (gen_rtx_MEM (Pmode, addr),
8437 gen_rtx_REG (Pmode, start));
8438 else
8439 note = gen_movsi (gen_rtx_MEM (Pmode, addr),
8440 gen_rtx_REG (Pmode, start));
8441 note = PATTERN (note);
8442
8443 add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
8444 RTX_FRAME_RELATED_P (insn) = 1;
8445
8446 return insn;
8447 }
8448
8449 note = gen_store_multiple (gen_rtx_MEM (Pmode, addr),
8450 gen_rtx_REG (Pmode, start),
8451 GEN_INT (last - start + 1));
8452 note = PATTERN (note);
8453
8454 add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
8455
8456 for (i = 0; i < XVECLEN (note, 0); i++)
8457 if (GET_CODE (XVECEXP (note, 0, i)) == SET
8458 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (note,
8459 0, i)))))
8460 RTX_FRAME_RELATED_P (XVECEXP (note, 0, i)) = 1;
8461
8462 RTX_FRAME_RELATED_P (insn) = 1;
8463 }
8464
8465 return insn;
8466 }
8467
8468 /* Generate insn to restore registers FIRST to LAST from
8469 the register save area located at offset OFFSET
8470 relative to register BASE. */
8471
8472 static rtx
8473 restore_gprs (rtx base, int offset, int first, int last)
8474 {
8475 rtx addr, insn;
8476
8477 addr = plus_constant (Pmode, base, offset);
8478 addr = gen_rtx_MEM (Pmode, addr);
8479 set_mem_alias_set (addr, get_frame_alias_set ());
8480
8481 /* Special-case single register. */
8482 if (first == last)
8483 {
8484 if (TARGET_64BIT)
8485 insn = gen_movdi (gen_rtx_REG (Pmode, first), addr);
8486 else
8487 insn = gen_movsi (gen_rtx_REG (Pmode, first), addr);
8488
8489 RTX_FRAME_RELATED_P (insn) = 1;
8490 return insn;
8491 }
8492
8493 insn = gen_load_multiple (gen_rtx_REG (Pmode, first),
8494 addr,
8495 GEN_INT (last - first + 1));
8496 RTX_FRAME_RELATED_P (insn) = 1;
8497 return insn;
8498 }
8499
8500 /* Return insn sequence to load the GOT register. */
8501
8502 static GTY(()) rtx got_symbol;
8503 rtx
8504 s390_load_got (void)
8505 {
8506 rtx insns;
8507
8508 /* We cannot use pic_offset_table_rtx here since we use this
8509 function also for non-pic if __tls_get_offset is called and in
8510 that case PIC_OFFSET_TABLE_REGNUM as well as pic_offset_table_rtx
8511 aren't usable. */
8512 rtx got_rtx = gen_rtx_REG (Pmode, 12);
8513
8514 if (!got_symbol)
8515 {
8516 got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
8517 SYMBOL_REF_FLAGS (got_symbol) = SYMBOL_FLAG_LOCAL;
8518 }
8519
8520 start_sequence ();
8521
8522 if (TARGET_CPU_ZARCH)
8523 {
8524 emit_move_insn (got_rtx, got_symbol);
8525 }
8526 else
8527 {
8528 rtx offset;
8529
8530 offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, got_symbol),
8531 UNSPEC_LTREL_OFFSET);
8532 offset = gen_rtx_CONST (Pmode, offset);
8533 offset = force_const_mem (Pmode, offset);
8534
8535 emit_move_insn (got_rtx, offset);
8536
8537 offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (offset, 0)),
8538 UNSPEC_LTREL_BASE);
8539 offset = gen_rtx_PLUS (Pmode, got_rtx, offset);
8540
8541 emit_move_insn (got_rtx, offset);
8542 }
8543
8544 insns = get_insns ();
8545 end_sequence ();
8546 return insns;
8547 }
8548
8549 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
8550 and the change to the stack pointer. */
8551
8552 static void
8553 s390_emit_stack_tie (void)
8554 {
8555 rtx mem = gen_frame_mem (BLKmode,
8556 gen_rtx_REG (Pmode, STACK_POINTER_REGNUM));
8557
8558 emit_insn (gen_stack_tie (mem));
8559 }
8560
8561 /* Copy GPRS into FPR save slots. */
8562
8563 static void
8564 s390_save_gprs_to_fprs (void)
8565 {
8566 int i;
8567
8568 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
8569 return;
8570
8571 for (i = 6; i < 16; i++)
8572 {
8573 if (FP_REGNO_P (cfun_gpr_save_slot (i)))
8574 {
8575 rtx insn =
8576 emit_move_insn (gen_rtx_REG (DImode, cfun_gpr_save_slot (i)),
8577 gen_rtx_REG (DImode, i));
8578 RTX_FRAME_RELATED_P (insn) = 1;
8579 }
8580 }
8581 }
8582
8583 /* Restore GPRs from FPR save slots. */
8584
8585 static void
8586 s390_restore_gprs_from_fprs (void)
8587 {
8588 int i;
8589
8590 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
8591 return;
8592
8593 for (i = 6; i < 16; i++)
8594 {
8595 if (FP_REGNO_P (cfun_gpr_save_slot (i)))
8596 {
8597 rtx insn =
8598 emit_move_insn (gen_rtx_REG (DImode, i),
8599 gen_rtx_REG (DImode, cfun_gpr_save_slot (i)));
8600 df_set_regs_ever_live (i, true);
8601 /* The frame related flag is only required on the save
8602 operations. We nevertheless set it also for the restore
8603 in order to recognize these instructions in
8604 s390_optimize_prologue. The flag will then be
8605 deleted. */
8606 RTX_FRAME_RELATED_P (insn) = 1;
8607 }
8608 }
8609 }
8610
8611 /* Expand the prologue into a bunch of separate insns. */
8612
8613 void
8614 s390_emit_prologue (void)
8615 {
8616 rtx insn, addr;
8617 rtx temp_reg;
8618 int i;
8619 int offset;
8620 int next_fpr = 0;
8621
8622 /* Try to get rid of the FPR clobbers. */
8623 s390_optimize_nonescaping_tx ();
8624
8625 /* Re-compute register info. */
8626 s390_register_info ();
8627
8628 /* Annotate all constant pool references to let the scheduler know
8629 they implicitly use the base register. */
8630
8631 push_topmost_sequence ();
8632
8633 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8634 if (INSN_P (insn))
8635 {
8636 annotate_constant_pool_refs (&PATTERN (insn));
8637 df_insn_rescan (insn);
8638 }
8639
8640 pop_topmost_sequence ();
8641
8642 /* Choose best register to use for temp use within prologue.
8643 See below for why TPF must use the register 1. */
8644
8645 if (!has_hard_reg_initial_val (Pmode, RETURN_REGNUM)
8646 && !crtl->is_leaf
8647 && !TARGET_TPF_PROFILING)
8648 temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
8649 else
8650 temp_reg = gen_rtx_REG (Pmode, 1);
8651
8652 s390_save_gprs_to_fprs ();
8653
8654 /* Save call saved gprs. */
8655 if (cfun_frame_layout.first_save_gpr != -1)
8656 {
8657 insn = save_gprs (stack_pointer_rtx,
8658 cfun_frame_layout.gprs_offset +
8659 UNITS_PER_LONG * (cfun_frame_layout.first_save_gpr
8660 - cfun_frame_layout.first_save_gpr_slot),
8661 cfun_frame_layout.first_save_gpr,
8662 cfun_frame_layout.last_save_gpr);
8663 emit_insn (insn);
8664 }
8665
8666 /* Dummy insn to mark literal pool slot. */
8667
8668 if (cfun->machine->base_reg)
8669 emit_insn (gen_main_pool (cfun->machine->base_reg));
8670
8671 offset = cfun_frame_layout.f0_offset;
8672
8673 /* Save f0 and f2. */
8674 for (i = FPR0_REGNUM; i <= FPR0_REGNUM + 1; i++)
8675 {
8676 if (cfun_fpr_save_p (i))
8677 {
8678 save_fpr (stack_pointer_rtx, offset, i);
8679 offset += 8;
8680 }
8681 else if (!TARGET_PACKED_STACK || cfun->stdarg)
8682 offset += 8;
8683 }
8684
8685 /* Save f4 and f6. */
8686 offset = cfun_frame_layout.f4_offset;
8687 for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
8688 {
8689 if (cfun_fpr_save_p (i))
8690 {
8691 insn = save_fpr (stack_pointer_rtx, offset, i);
8692 offset += 8;
8693
8694 /* If f4 and f6 are call clobbered they are saved due to
8695 stdargs and therefore are not frame related. */
8696 if (!call_really_used_regs[i])
8697 RTX_FRAME_RELATED_P (insn) = 1;
8698 }
8699 else if (!TARGET_PACKED_STACK || call_really_used_regs[i])
8700 offset += 8;
8701 }
8702
8703 if (TARGET_PACKED_STACK
8704 && cfun_save_high_fprs_p
8705 && cfun_frame_layout.f8_offset + cfun_frame_layout.high_fprs * 8 > 0)
8706 {
8707 offset = (cfun_frame_layout.f8_offset
8708 + (cfun_frame_layout.high_fprs - 1) * 8);
8709
8710 for (i = FPR15_REGNUM; i >= FPR8_REGNUM && offset >= 0; i--)
8711 if (cfun_fpr_save_p (i))
8712 {
8713 insn = save_fpr (stack_pointer_rtx, offset, i);
8714
8715 RTX_FRAME_RELATED_P (insn) = 1;
8716 offset -= 8;
8717 }
8718 if (offset >= cfun_frame_layout.f8_offset)
8719 next_fpr = i;
8720 }
8721
8722 if (!TARGET_PACKED_STACK)
8723 next_fpr = cfun_save_high_fprs_p ? FPR15_REGNUM : 0;
8724
8725 if (flag_stack_usage_info)
8726 current_function_static_stack_size = cfun_frame_layout.frame_size;
8727
8728 /* Decrement stack pointer. */
8729
8730 if (cfun_frame_layout.frame_size > 0)
8731 {
8732 rtx frame_off = GEN_INT (-cfun_frame_layout.frame_size);
8733 rtx real_frame_off;
8734
8735 if (s390_stack_size)
8736 {
8737 HOST_WIDE_INT stack_guard;
8738
8739 if (s390_stack_guard)
8740 stack_guard = s390_stack_guard;
8741 else
8742 {
8743 /* If no value for stack guard is provided the smallest power of 2
8744 larger than the current frame size is chosen. */
8745 stack_guard = 1;
8746 while (stack_guard < cfun_frame_layout.frame_size)
8747 stack_guard <<= 1;
8748 }
8749
8750 if (cfun_frame_layout.frame_size >= s390_stack_size)
8751 {
8752 warning (0, "frame size of function %qs is %wd"
8753 " bytes exceeding user provided stack limit of "
8754 "%d bytes. "
8755 "An unconditional trap is added.",
8756 current_function_name(), cfun_frame_layout.frame_size,
8757 s390_stack_size);
8758 emit_insn (gen_trap ());
8759 }
8760 else
8761 {
8762 /* stack_guard has to be smaller than s390_stack_size.
8763 Otherwise we would emit an AND with zero which would
8764 not match the test under mask pattern. */
8765 if (stack_guard >= s390_stack_size)
8766 {
8767 warning (0, "frame size of function %qs is %wd"
8768 " bytes which is more than half the stack size. "
8769 "The dynamic check would not be reliable. "
8770 "No check emitted for this function.",
8771 current_function_name(),
8772 cfun_frame_layout.frame_size);
8773 }
8774 else
8775 {
8776 HOST_WIDE_INT stack_check_mask = ((s390_stack_size - 1)
8777 & ~(stack_guard - 1));
8778
8779 rtx t = gen_rtx_AND (Pmode, stack_pointer_rtx,
8780 GEN_INT (stack_check_mask));
8781 if (TARGET_64BIT)
8782 emit_insn (gen_ctrapdi4 (gen_rtx_EQ (VOIDmode,
8783 t, const0_rtx),
8784 t, const0_rtx, const0_rtx));
8785 else
8786 emit_insn (gen_ctrapsi4 (gen_rtx_EQ (VOIDmode,
8787 t, const0_rtx),
8788 t, const0_rtx, const0_rtx));
8789 }
8790 }
8791 }
8792
8793 if (s390_warn_framesize > 0
8794 && cfun_frame_layout.frame_size >= s390_warn_framesize)
8795 warning (0, "frame size of %qs is %wd bytes",
8796 current_function_name (), cfun_frame_layout.frame_size);
8797
8798 if (s390_warn_dynamicstack_p && cfun->calls_alloca)
8799 warning (0, "%qs uses dynamic stack allocation", current_function_name ());
8800
8801 /* Save incoming stack pointer into temp reg. */
8802 if (TARGET_BACKCHAIN || next_fpr)
8803 insn = emit_insn (gen_move_insn (temp_reg, stack_pointer_rtx));
8804
8805 /* Subtract frame size from stack pointer. */
8806
8807 if (DISP_IN_RANGE (INTVAL (frame_off)))
8808 {
8809 insn = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8810 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
8811 frame_off));
8812 insn = emit_insn (insn);
8813 }
8814 else
8815 {
8816 if (!CONST_OK_FOR_K (INTVAL (frame_off)))
8817 frame_off = force_const_mem (Pmode, frame_off);
8818
8819 insn = emit_insn (gen_add2_insn (stack_pointer_rtx, frame_off));
8820 annotate_constant_pool_refs (&PATTERN (insn));
8821 }
8822
8823 RTX_FRAME_RELATED_P (insn) = 1;
8824 real_frame_off = GEN_INT (-cfun_frame_layout.frame_size);
8825 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
8826 gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8827 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
8828 real_frame_off)));
8829
8830 /* Set backchain. */
8831
8832 if (TARGET_BACKCHAIN)
8833 {
8834 if (cfun_frame_layout.backchain_offset)
8835 addr = gen_rtx_MEM (Pmode,
8836 plus_constant (Pmode, stack_pointer_rtx,
8837 cfun_frame_layout.backchain_offset));
8838 else
8839 addr = gen_rtx_MEM (Pmode, stack_pointer_rtx);
8840 set_mem_alias_set (addr, get_frame_alias_set ());
8841 insn = emit_insn (gen_move_insn (addr, temp_reg));
8842 }
8843
8844 /* If we support non-call exceptions (e.g. for Java),
8845 we need to make sure the backchain pointer is set up
8846 before any possibly trapping memory access. */
8847 if (TARGET_BACKCHAIN && cfun->can_throw_non_call_exceptions)
8848 {
8849 addr = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
8850 emit_clobber (addr);
8851 }
8852 }
8853
8854 /* Save fprs 8 - 15 (64 bit ABI). */
8855
8856 if (cfun_save_high_fprs_p && next_fpr)
8857 {
8858 /* If the stack might be accessed through a different register
8859 we have to make sure that the stack pointer decrement is not
8860 moved below the use of the stack slots. */
8861 s390_emit_stack_tie ();
8862
8863 insn = emit_insn (gen_add2_insn (temp_reg,
8864 GEN_INT (cfun_frame_layout.f8_offset)));
8865
8866 offset = 0;
8867
8868 for (i = FPR8_REGNUM; i <= next_fpr; i++)
8869 if (cfun_fpr_save_p (i))
8870 {
8871 rtx addr = plus_constant (Pmode, stack_pointer_rtx,
8872 cfun_frame_layout.frame_size
8873 + cfun_frame_layout.f8_offset
8874 + offset);
8875
8876 insn = save_fpr (temp_reg, offset, i);
8877 offset += 8;
8878 RTX_FRAME_RELATED_P (insn) = 1;
8879 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
8880 gen_rtx_SET (VOIDmode,
8881 gen_rtx_MEM (DFmode, addr),
8882 gen_rtx_REG (DFmode, i)));
8883 }
8884 }
8885
8886 /* Set frame pointer, if needed. */
8887
8888 if (frame_pointer_needed)
8889 {
8890 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
8891 RTX_FRAME_RELATED_P (insn) = 1;
8892 }
8893
8894 /* Set up got pointer, if needed. */
8895
8896 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
8897 {
8898 rtx insns = s390_load_got ();
8899
8900 for (insn = insns; insn; insn = NEXT_INSN (insn))
8901 annotate_constant_pool_refs (&PATTERN (insn));
8902
8903 emit_insn (insns);
8904 }
8905
8906 if (TARGET_TPF_PROFILING)
8907 {
8908 /* Generate a BAS instruction to serve as a function
8909 entry intercept to facilitate the use of tracing
8910 algorithms located at the branch target. */
8911 emit_insn (gen_prologue_tpf ());
8912
8913 /* Emit a blockage here so that all code
8914 lies between the profiling mechanisms. */
8915 emit_insn (gen_blockage ());
8916 }
8917 }
8918
8919 /* Expand the epilogue into a bunch of separate insns. */
8920
8921 void
8922 s390_emit_epilogue (bool sibcall)
8923 {
8924 rtx frame_pointer, return_reg, cfa_restores = NULL_RTX;
8925 int area_bottom, area_top, offset = 0;
8926 int next_offset;
8927 rtvec p;
8928 int i;
8929
8930 if (TARGET_TPF_PROFILING)
8931 {
8932
8933 /* Generate a BAS instruction to serve as a function
8934 entry intercept to facilitate the use of tracing
8935 algorithms located at the branch target. */
8936
8937 /* Emit a blockage here so that all code
8938 lies between the profiling mechanisms. */
8939 emit_insn (gen_blockage ());
8940
8941 emit_insn (gen_epilogue_tpf ());
8942 }
8943
8944 /* Check whether to use frame or stack pointer for restore. */
8945
8946 frame_pointer = (frame_pointer_needed
8947 ? hard_frame_pointer_rtx : stack_pointer_rtx);
8948
8949 s390_frame_area (&area_bottom, &area_top);
8950
8951 /* Check whether we can access the register save area.
8952 If not, increment the frame pointer as required. */
8953
8954 if (area_top <= area_bottom)
8955 {
8956 /* Nothing to restore. */
8957 }
8958 else if (DISP_IN_RANGE (cfun_frame_layout.frame_size + area_bottom)
8959 && DISP_IN_RANGE (cfun_frame_layout.frame_size + area_top - 1))
8960 {
8961 /* Area is in range. */
8962 offset = cfun_frame_layout.frame_size;
8963 }
8964 else
8965 {
8966 rtx insn, frame_off, cfa;
8967
8968 offset = area_bottom < 0 ? -area_bottom : 0;
8969 frame_off = GEN_INT (cfun_frame_layout.frame_size - offset);
8970
8971 cfa = gen_rtx_SET (VOIDmode, frame_pointer,
8972 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
8973 if (DISP_IN_RANGE (INTVAL (frame_off)))
8974 {
8975 insn = gen_rtx_SET (VOIDmode, frame_pointer,
8976 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
8977 insn = emit_insn (insn);
8978 }
8979 else
8980 {
8981 if (!CONST_OK_FOR_K (INTVAL (frame_off)))
8982 frame_off = force_const_mem (Pmode, frame_off);
8983
8984 insn = emit_insn (gen_add2_insn (frame_pointer, frame_off));
8985 annotate_constant_pool_refs (&PATTERN (insn));
8986 }
8987 add_reg_note (insn, REG_CFA_ADJUST_CFA, cfa);
8988 RTX_FRAME_RELATED_P (insn) = 1;
8989 }
8990
8991 /* Restore call saved fprs. */
8992
8993 if (TARGET_64BIT)
8994 {
8995 if (cfun_save_high_fprs_p)
8996 {
8997 next_offset = cfun_frame_layout.f8_offset;
8998 for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
8999 {
9000 if (cfun_fpr_save_p (i))
9001 {
9002 restore_fpr (frame_pointer,
9003 offset + next_offset, i);
9004 cfa_restores
9005 = alloc_reg_note (REG_CFA_RESTORE,
9006 gen_rtx_REG (DFmode, i), cfa_restores);
9007 next_offset += 8;
9008 }
9009 }
9010 }
9011
9012 }
9013 else
9014 {
9015 next_offset = cfun_frame_layout.f4_offset;
9016 /* f4, f6 */
9017 for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
9018 {
9019 if (cfun_fpr_save_p (i))
9020 {
9021 restore_fpr (frame_pointer,
9022 offset + next_offset, i);
9023 cfa_restores
9024 = alloc_reg_note (REG_CFA_RESTORE,
9025 gen_rtx_REG (DFmode, i), cfa_restores);
9026 next_offset += 8;
9027 }
9028 else if (!TARGET_PACKED_STACK)
9029 next_offset += 8;
9030 }
9031
9032 }
9033
9034 /* Return register. */
9035
9036 return_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
9037
9038 /* Restore call saved gprs. */
9039
9040 if (cfun_frame_layout.first_restore_gpr != -1)
9041 {
9042 rtx insn, addr;
9043 int i;
9044
9045 /* Check for global register and save them
9046 to stack location from where they get restored. */
9047
9048 for (i = cfun_frame_layout.first_restore_gpr;
9049 i <= cfun_frame_layout.last_restore_gpr;
9050 i++)
9051 {
9052 if (global_not_special_regno_p (i))
9053 {
9054 addr = plus_constant (Pmode, frame_pointer,
9055 offset + cfun_frame_layout.gprs_offset
9056 + (i - cfun_frame_layout.first_save_gpr_slot)
9057 * UNITS_PER_LONG);
9058 addr = gen_rtx_MEM (Pmode, addr);
9059 set_mem_alias_set (addr, get_frame_alias_set ());
9060 emit_move_insn (addr, gen_rtx_REG (Pmode, i));
9061 }
9062 else
9063 cfa_restores
9064 = alloc_reg_note (REG_CFA_RESTORE,
9065 gen_rtx_REG (Pmode, i), cfa_restores);
9066 }
9067
9068 if (! sibcall)
9069 {
9070 /* Fetch return address from stack before load multiple,
9071 this will do good for scheduling. */
9072
9073 if (cfun_frame_layout.save_return_addr_p
9074 || (cfun_frame_layout.first_restore_gpr < BASE_REGNUM
9075 && cfun_frame_layout.last_restore_gpr > RETURN_REGNUM))
9076 {
9077 int return_regnum = find_unused_clobbered_reg();
9078 if (!return_regnum)
9079 return_regnum = 4;
9080 return_reg = gen_rtx_REG (Pmode, return_regnum);
9081
9082 addr = plus_constant (Pmode, frame_pointer,
9083 offset + cfun_frame_layout.gprs_offset
9084 + (RETURN_REGNUM
9085 - cfun_frame_layout.first_save_gpr_slot)
9086 * UNITS_PER_LONG);
9087 addr = gen_rtx_MEM (Pmode, addr);
9088 set_mem_alias_set (addr, get_frame_alias_set ());
9089 emit_move_insn (return_reg, addr);
9090 }
9091 }
9092
9093 insn = restore_gprs (frame_pointer,
9094 offset + cfun_frame_layout.gprs_offset
9095 + (cfun_frame_layout.first_restore_gpr
9096 - cfun_frame_layout.first_save_gpr_slot)
9097 * UNITS_PER_LONG,
9098 cfun_frame_layout.first_restore_gpr,
9099 cfun_frame_layout.last_restore_gpr);
9100 insn = emit_insn (insn);
9101 REG_NOTES (insn) = cfa_restores;
9102 add_reg_note (insn, REG_CFA_DEF_CFA,
9103 plus_constant (Pmode, stack_pointer_rtx,
9104 STACK_POINTER_OFFSET));
9105 RTX_FRAME_RELATED_P (insn) = 1;
9106 }
9107
9108 s390_restore_gprs_from_fprs ();
9109
9110 if (! sibcall)
9111 {
9112
9113 /* Return to caller. */
9114
9115 p = rtvec_alloc (2);
9116
9117 RTVEC_ELT (p, 0) = ret_rtx;
9118 RTVEC_ELT (p, 1) = gen_rtx_USE (VOIDmode, return_reg);
9119 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
9120 }
9121 }
9122
9123
9124 /* Return the size in bytes of a function argument of
9125 type TYPE and/or mode MODE. At least one of TYPE or
9126 MODE must be specified. */
9127
9128 static int
9129 s390_function_arg_size (enum machine_mode mode, const_tree type)
9130 {
9131 if (type)
9132 return int_size_in_bytes (type);
9133
9134 /* No type info available for some library calls ... */
9135 if (mode != BLKmode)
9136 return GET_MODE_SIZE (mode);
9137
9138 /* If we have neither type nor mode, abort */
9139 gcc_unreachable ();
9140 }
9141
9142 /* Return true if a function argument of type TYPE and mode MODE
9143 is to be passed in a floating-point register, if available. */
9144
9145 static bool
9146 s390_function_arg_float (enum machine_mode mode, const_tree type)
9147 {
9148 int size = s390_function_arg_size (mode, type);
9149 if (size > 8)
9150 return false;
9151
9152 /* Soft-float changes the ABI: no floating-point registers are used. */
9153 if (TARGET_SOFT_FLOAT)
9154 return false;
9155
9156 /* No type info available for some library calls ... */
9157 if (!type)
9158 return mode == SFmode || mode == DFmode || mode == SDmode || mode == DDmode;
9159
9160 /* The ABI says that record types with a single member are treated
9161 just like that member would be. */
9162 while (TREE_CODE (type) == RECORD_TYPE)
9163 {
9164 tree field, single = NULL_TREE;
9165
9166 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
9167 {
9168 if (TREE_CODE (field) != FIELD_DECL)
9169 continue;
9170
9171 if (single == NULL_TREE)
9172 single = TREE_TYPE (field);
9173 else
9174 return false;
9175 }
9176
9177 if (single == NULL_TREE)
9178 return false;
9179 else
9180 type = single;
9181 }
9182
9183 return TREE_CODE (type) == REAL_TYPE;
9184 }
9185
9186 /* Return true if a function argument of type TYPE and mode MODE
9187 is to be passed in an integer register, or a pair of integer
9188 registers, if available. */
9189
9190 static bool
9191 s390_function_arg_integer (enum machine_mode mode, const_tree type)
9192 {
9193 int size = s390_function_arg_size (mode, type);
9194 if (size > 8)
9195 return false;
9196
9197 /* No type info available for some library calls ... */
9198 if (!type)
9199 return GET_MODE_CLASS (mode) == MODE_INT
9200 || (TARGET_SOFT_FLOAT && SCALAR_FLOAT_MODE_P (mode));
9201
9202 /* We accept small integral (and similar) types. */
9203 if (INTEGRAL_TYPE_P (type)
9204 || POINTER_TYPE_P (type)
9205 || TREE_CODE (type) == NULLPTR_TYPE
9206 || TREE_CODE (type) == OFFSET_TYPE
9207 || (TARGET_SOFT_FLOAT && TREE_CODE (type) == REAL_TYPE))
9208 return true;
9209
9210 /* We also accept structs of size 1, 2, 4, 8 that are not
9211 passed in floating-point registers. */
9212 if (AGGREGATE_TYPE_P (type)
9213 && exact_log2 (size) >= 0
9214 && !s390_function_arg_float (mode, type))
9215 return true;
9216
9217 return false;
9218 }
9219
9220 /* Return 1 if a function argument of type TYPE and mode MODE
9221 is to be passed by reference. The ABI specifies that only
9222 structures of size 1, 2, 4, or 8 bytes are passed by value,
9223 all other structures (and complex numbers) are passed by
9224 reference. */
9225
9226 static bool
9227 s390_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
9228 enum machine_mode mode, const_tree type,
9229 bool named ATTRIBUTE_UNUSED)
9230 {
9231 int size = s390_function_arg_size (mode, type);
9232 if (size > 8)
9233 return true;
9234
9235 if (type)
9236 {
9237 if (AGGREGATE_TYPE_P (type) && exact_log2 (size) < 0)
9238 return 1;
9239
9240 if (TREE_CODE (type) == COMPLEX_TYPE
9241 || TREE_CODE (type) == VECTOR_TYPE)
9242 return 1;
9243 }
9244
9245 return 0;
9246 }
9247
9248 /* Update the data in CUM to advance over an argument of mode MODE and
9249 data type TYPE. (TYPE is null for libcalls where that information
9250 may not be available.). The boolean NAMED specifies whether the
9251 argument is a named argument (as opposed to an unnamed argument
9252 matching an ellipsis). */
9253
9254 static void
9255 s390_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
9256 const_tree type, bool named ATTRIBUTE_UNUSED)
9257 {
9258 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9259
9260 if (s390_function_arg_float (mode, type))
9261 {
9262 cum->fprs += 1;
9263 }
9264 else if (s390_function_arg_integer (mode, type))
9265 {
9266 int size = s390_function_arg_size (mode, type);
9267 cum->gprs += ((size + UNITS_PER_LONG - 1) / UNITS_PER_LONG);
9268 }
9269 else
9270 gcc_unreachable ();
9271 }
9272
9273 /* Define where to put the arguments to a function.
9274 Value is zero to push the argument on the stack,
9275 or a hard register in which to store the argument.
9276
9277 MODE is the argument's machine mode.
9278 TYPE is the data type of the argument (as a tree).
9279 This is null for libcalls where that information may
9280 not be available.
9281 CUM is a variable of type CUMULATIVE_ARGS which gives info about
9282 the preceding args and about the function being called.
9283 NAMED is nonzero if this argument is a named parameter
9284 (otherwise it is an extra parameter matching an ellipsis).
9285
9286 On S/390, we use general purpose registers 2 through 6 to
9287 pass integer, pointer, and certain structure arguments, and
9288 floating point registers 0 and 2 (0, 2, 4, and 6 on 64-bit)
9289 to pass floating point arguments. All remaining arguments
9290 are pushed to the stack. */
9291
9292 static rtx
9293 s390_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
9294 const_tree type, bool named ATTRIBUTE_UNUSED)
9295 {
9296 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
9297
9298 if (s390_function_arg_float (mode, type))
9299 {
9300 if (cum->fprs + 1 > FP_ARG_NUM_REG)
9301 return 0;
9302 else
9303 return gen_rtx_REG (mode, cum->fprs + 16);
9304 }
9305 else if (s390_function_arg_integer (mode, type))
9306 {
9307 int size = s390_function_arg_size (mode, type);
9308 int n_gprs = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
9309
9310 if (cum->gprs + n_gprs > GP_ARG_NUM_REG)
9311 return 0;
9312 else if (n_gprs == 1 || UNITS_PER_WORD == UNITS_PER_LONG)
9313 return gen_rtx_REG (mode, cum->gprs + 2);
9314 else if (n_gprs == 2)
9315 {
9316 rtvec p = rtvec_alloc (2);
9317
9318 RTVEC_ELT (p, 0)
9319 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 2),
9320 const0_rtx);
9321 RTVEC_ELT (p, 1)
9322 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 3),
9323 GEN_INT (4));
9324
9325 return gen_rtx_PARALLEL (mode, p);
9326 }
9327 }
9328
9329 /* After the real arguments, expand_call calls us once again
9330 with a void_type_node type. Whatever we return here is
9331 passed as operand 2 to the call expanders.
9332
9333 We don't need this feature ... */
9334 else if (type == void_type_node)
9335 return const0_rtx;
9336
9337 gcc_unreachable ();
9338 }
9339
9340 /* Return true if return values of type TYPE should be returned
9341 in a memory buffer whose address is passed by the caller as
9342 hidden first argument. */
9343
9344 static bool
9345 s390_return_in_memory (const_tree type, const_tree fundecl ATTRIBUTE_UNUSED)
9346 {
9347 /* We accept small integral (and similar) types. */
9348 if (INTEGRAL_TYPE_P (type)
9349 || POINTER_TYPE_P (type)
9350 || TREE_CODE (type) == OFFSET_TYPE
9351 || TREE_CODE (type) == REAL_TYPE)
9352 return int_size_in_bytes (type) > 8;
9353
9354 /* Aggregates and similar constructs are always returned
9355 in memory. */
9356 if (AGGREGATE_TYPE_P (type)
9357 || TREE_CODE (type) == COMPLEX_TYPE
9358 || TREE_CODE (type) == VECTOR_TYPE)
9359 return true;
9360
9361 /* ??? We get called on all sorts of random stuff from
9362 aggregate_value_p. We can't abort, but it's not clear
9363 what's safe to return. Pretend it's a struct I guess. */
9364 return true;
9365 }
9366
9367 /* Function arguments and return values are promoted to word size. */
9368
9369 static enum machine_mode
9370 s390_promote_function_mode (const_tree type, enum machine_mode mode,
9371 int *punsignedp,
9372 const_tree fntype ATTRIBUTE_UNUSED,
9373 int for_return ATTRIBUTE_UNUSED)
9374 {
9375 if (INTEGRAL_MODE_P (mode)
9376 && GET_MODE_SIZE (mode) < UNITS_PER_LONG)
9377 {
9378 if (type != NULL_TREE && POINTER_TYPE_P (type))
9379 *punsignedp = POINTERS_EXTEND_UNSIGNED;
9380 return Pmode;
9381 }
9382
9383 return mode;
9384 }
9385
9386 /* Define where to return a (scalar) value of type RET_TYPE.
9387 If RET_TYPE is null, define where to return a (scalar)
9388 value of mode MODE from a libcall. */
9389
9390 static rtx
9391 s390_function_and_libcall_value (enum machine_mode mode,
9392 const_tree ret_type,
9393 const_tree fntype_or_decl,
9394 bool outgoing ATTRIBUTE_UNUSED)
9395 {
9396 /* For normal functions perform the promotion as
9397 promote_function_mode would do. */
9398 if (ret_type)
9399 {
9400 int unsignedp = TYPE_UNSIGNED (ret_type);
9401 mode = promote_function_mode (ret_type, mode, &unsignedp,
9402 fntype_or_decl, 1);
9403 }
9404
9405 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT || SCALAR_FLOAT_MODE_P (mode));
9406 gcc_assert (GET_MODE_SIZE (mode) <= 8);
9407
9408 if (TARGET_HARD_FLOAT && SCALAR_FLOAT_MODE_P (mode))
9409 return gen_rtx_REG (mode, 16);
9410 else if (GET_MODE_SIZE (mode) <= UNITS_PER_LONG
9411 || UNITS_PER_LONG == UNITS_PER_WORD)
9412 return gen_rtx_REG (mode, 2);
9413 else if (GET_MODE_SIZE (mode) == 2 * UNITS_PER_LONG)
9414 {
9415 /* This case is triggered when returning a 64 bit value with
9416 -m31 -mzarch. Although the value would fit into a single
9417 register it has to be forced into a 32 bit register pair in
9418 order to match the ABI. */
9419 rtvec p = rtvec_alloc (2);
9420
9421 RTVEC_ELT (p, 0)
9422 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 2), const0_rtx);
9423 RTVEC_ELT (p, 1)
9424 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 3), GEN_INT (4));
9425
9426 return gen_rtx_PARALLEL (mode, p);
9427 }
9428
9429 gcc_unreachable ();
9430 }
9431
9432 /* Define where to return a scalar return value of type RET_TYPE. */
9433
9434 static rtx
9435 s390_function_value (const_tree ret_type, const_tree fn_decl_or_type,
9436 bool outgoing)
9437 {
9438 return s390_function_and_libcall_value (TYPE_MODE (ret_type), ret_type,
9439 fn_decl_or_type, outgoing);
9440 }
9441
9442 /* Define where to return a scalar libcall return value of mode
9443 MODE. */
9444
9445 static rtx
9446 s390_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
9447 {
9448 return s390_function_and_libcall_value (mode, NULL_TREE,
9449 NULL_TREE, true);
9450 }
9451
9452
9453 /* Create and return the va_list datatype.
9454
9455 On S/390, va_list is an array type equivalent to
9456
9457 typedef struct __va_list_tag
9458 {
9459 long __gpr;
9460 long __fpr;
9461 void *__overflow_arg_area;
9462 void *__reg_save_area;
9463 } va_list[1];
9464
9465 where __gpr and __fpr hold the number of general purpose
9466 or floating point arguments used up to now, respectively,
9467 __overflow_arg_area points to the stack location of the
9468 next argument passed on the stack, and __reg_save_area
9469 always points to the start of the register area in the
9470 call frame of the current function. The function prologue
9471 saves all registers used for argument passing into this
9472 area if the function uses variable arguments. */
9473
9474 static tree
9475 s390_build_builtin_va_list (void)
9476 {
9477 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
9478
9479 record = lang_hooks.types.make_type (RECORD_TYPE);
9480
9481 type_decl =
9482 build_decl (BUILTINS_LOCATION,
9483 TYPE_DECL, get_identifier ("__va_list_tag"), record);
9484
9485 f_gpr = build_decl (BUILTINS_LOCATION,
9486 FIELD_DECL, get_identifier ("__gpr"),
9487 long_integer_type_node);
9488 f_fpr = build_decl (BUILTINS_LOCATION,
9489 FIELD_DECL, get_identifier ("__fpr"),
9490 long_integer_type_node);
9491 f_ovf = build_decl (BUILTINS_LOCATION,
9492 FIELD_DECL, get_identifier ("__overflow_arg_area"),
9493 ptr_type_node);
9494 f_sav = build_decl (BUILTINS_LOCATION,
9495 FIELD_DECL, get_identifier ("__reg_save_area"),
9496 ptr_type_node);
9497
9498 va_list_gpr_counter_field = f_gpr;
9499 va_list_fpr_counter_field = f_fpr;
9500
9501 DECL_FIELD_CONTEXT (f_gpr) = record;
9502 DECL_FIELD_CONTEXT (f_fpr) = record;
9503 DECL_FIELD_CONTEXT (f_ovf) = record;
9504 DECL_FIELD_CONTEXT (f_sav) = record;
9505
9506 TYPE_STUB_DECL (record) = type_decl;
9507 TYPE_NAME (record) = type_decl;
9508 TYPE_FIELDS (record) = f_gpr;
9509 DECL_CHAIN (f_gpr) = f_fpr;
9510 DECL_CHAIN (f_fpr) = f_ovf;
9511 DECL_CHAIN (f_ovf) = f_sav;
9512
9513 layout_type (record);
9514
9515 /* The correct type is an array type of one element. */
9516 return build_array_type (record, build_index_type (size_zero_node));
9517 }
9518
9519 /* Implement va_start by filling the va_list structure VALIST.
9520 STDARG_P is always true, and ignored.
9521 NEXTARG points to the first anonymous stack argument.
9522
9523 The following global variables are used to initialize
9524 the va_list structure:
9525
9526 crtl->args.info:
9527 holds number of gprs and fprs used for named arguments.
9528 crtl->args.arg_offset_rtx:
9529 holds the offset of the first anonymous stack argument
9530 (relative to the virtual arg pointer). */
9531
9532 static void
9533 s390_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
9534 {
9535 HOST_WIDE_INT n_gpr, n_fpr;
9536 int off;
9537 tree f_gpr, f_fpr, f_ovf, f_sav;
9538 tree gpr, fpr, ovf, sav, t;
9539
9540 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
9541 f_fpr = DECL_CHAIN (f_gpr);
9542 f_ovf = DECL_CHAIN (f_fpr);
9543 f_sav = DECL_CHAIN (f_ovf);
9544
9545 valist = build_simple_mem_ref (valist);
9546 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
9547 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
9548 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
9549 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
9550
9551 /* Count number of gp and fp argument registers used. */
9552
9553 n_gpr = crtl->args.info.gprs;
9554 n_fpr = crtl->args.info.fprs;
9555
9556 if (cfun->va_list_gpr_size)
9557 {
9558 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
9559 build_int_cst (NULL_TREE, n_gpr));
9560 TREE_SIDE_EFFECTS (t) = 1;
9561 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9562 }
9563
9564 if (cfun->va_list_fpr_size)
9565 {
9566 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
9567 build_int_cst (NULL_TREE, n_fpr));
9568 TREE_SIDE_EFFECTS (t) = 1;
9569 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9570 }
9571
9572 /* Find the overflow area. */
9573 if (n_gpr + cfun->va_list_gpr_size > GP_ARG_NUM_REG
9574 || n_fpr + cfun->va_list_fpr_size > FP_ARG_NUM_REG)
9575 {
9576 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
9577
9578 off = INTVAL (crtl->args.arg_offset_rtx);
9579 off = off < 0 ? 0 : off;
9580 if (TARGET_DEBUG_ARG)
9581 fprintf (stderr, "va_start: n_gpr = %d, n_fpr = %d off %d\n",
9582 (int)n_gpr, (int)n_fpr, off);
9583
9584 t = fold_build_pointer_plus_hwi (t, off);
9585
9586 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
9587 TREE_SIDE_EFFECTS (t) = 1;
9588 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9589 }
9590
9591 /* Find the register save area. */
9592 if ((cfun->va_list_gpr_size && n_gpr < GP_ARG_NUM_REG)
9593 || (cfun->va_list_fpr_size && n_fpr < FP_ARG_NUM_REG))
9594 {
9595 t = make_tree (TREE_TYPE (sav), return_address_pointer_rtx);
9596 t = fold_build_pointer_plus_hwi (t, -RETURN_REGNUM * UNITS_PER_LONG);
9597
9598 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
9599 TREE_SIDE_EFFECTS (t) = 1;
9600 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
9601 }
9602 }
9603
9604 /* Implement va_arg by updating the va_list structure
9605 VALIST as required to retrieve an argument of type
9606 TYPE, and returning that argument.
9607
9608 Generates code equivalent to:
9609
9610 if (integral value) {
9611 if (size <= 4 && args.gpr < 5 ||
9612 size > 4 && args.gpr < 4 )
9613 ret = args.reg_save_area[args.gpr+8]
9614 else
9615 ret = *args.overflow_arg_area++;
9616 } else if (float value) {
9617 if (args.fgpr < 2)
9618 ret = args.reg_save_area[args.fpr+64]
9619 else
9620 ret = *args.overflow_arg_area++;
9621 } else if (aggregate value) {
9622 if (args.gpr < 5)
9623 ret = *args.reg_save_area[args.gpr]
9624 else
9625 ret = **args.overflow_arg_area++;
9626 } */
9627
9628 static tree
9629 s390_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
9630 gimple_seq *post_p ATTRIBUTE_UNUSED)
9631 {
9632 tree f_gpr, f_fpr, f_ovf, f_sav;
9633 tree gpr, fpr, ovf, sav, reg, t, u;
9634 int indirect_p, size, n_reg, sav_ofs, sav_scale, max_reg;
9635 tree lab_false, lab_over, addr;
9636
9637 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
9638 f_fpr = DECL_CHAIN (f_gpr);
9639 f_ovf = DECL_CHAIN (f_fpr);
9640 f_sav = DECL_CHAIN (f_ovf);
9641
9642 valist = build_va_arg_indirect_ref (valist);
9643 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
9644 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
9645 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
9646
9647 /* The tree for args* cannot be shared between gpr/fpr and ovf since
9648 both appear on a lhs. */
9649 valist = unshare_expr (valist);
9650 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
9651
9652 size = int_size_in_bytes (type);
9653
9654 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
9655 {
9656 if (TARGET_DEBUG_ARG)
9657 {
9658 fprintf (stderr, "va_arg: aggregate type");
9659 debug_tree (type);
9660 }
9661
9662 /* Aggregates are passed by reference. */
9663 indirect_p = 1;
9664 reg = gpr;
9665 n_reg = 1;
9666
9667 /* kernel stack layout on 31 bit: It is assumed here that no padding
9668 will be added by s390_frame_info because for va_args always an even
9669 number of gprs has to be saved r15-r2 = 14 regs. */
9670 sav_ofs = 2 * UNITS_PER_LONG;
9671 sav_scale = UNITS_PER_LONG;
9672 size = UNITS_PER_LONG;
9673 max_reg = GP_ARG_NUM_REG - n_reg;
9674 }
9675 else if (s390_function_arg_float (TYPE_MODE (type), type))
9676 {
9677 if (TARGET_DEBUG_ARG)
9678 {
9679 fprintf (stderr, "va_arg: float type");
9680 debug_tree (type);
9681 }
9682
9683 /* FP args go in FP registers, if present. */
9684 indirect_p = 0;
9685 reg = fpr;
9686 n_reg = 1;
9687 sav_ofs = 16 * UNITS_PER_LONG;
9688 sav_scale = 8;
9689 max_reg = FP_ARG_NUM_REG - n_reg;
9690 }
9691 else
9692 {
9693 if (TARGET_DEBUG_ARG)
9694 {
9695 fprintf (stderr, "va_arg: other type");
9696 debug_tree (type);
9697 }
9698
9699 /* Otherwise into GP registers. */
9700 indirect_p = 0;
9701 reg = gpr;
9702 n_reg = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
9703
9704 /* kernel stack layout on 31 bit: It is assumed here that no padding
9705 will be added by s390_frame_info because for va_args always an even
9706 number of gprs has to be saved r15-r2 = 14 regs. */
9707 sav_ofs = 2 * UNITS_PER_LONG;
9708
9709 if (size < UNITS_PER_LONG)
9710 sav_ofs += UNITS_PER_LONG - size;
9711
9712 sav_scale = UNITS_PER_LONG;
9713 max_reg = GP_ARG_NUM_REG - n_reg;
9714 }
9715
9716 /* Pull the value out of the saved registers ... */
9717
9718 lab_false = create_artificial_label (UNKNOWN_LOCATION);
9719 lab_over = create_artificial_label (UNKNOWN_LOCATION);
9720 addr = create_tmp_var (ptr_type_node, "addr");
9721
9722 t = fold_convert (TREE_TYPE (reg), size_int (max_reg));
9723 t = build2 (GT_EXPR, boolean_type_node, reg, t);
9724 u = build1 (GOTO_EXPR, void_type_node, lab_false);
9725 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
9726 gimplify_and_add (t, pre_p);
9727
9728 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
9729 u = build2 (MULT_EXPR, TREE_TYPE (reg), reg,
9730 fold_convert (TREE_TYPE (reg), size_int (sav_scale)));
9731 t = fold_build_pointer_plus (t, u);
9732
9733 gimplify_assign (addr, t, pre_p);
9734
9735 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
9736
9737 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
9738
9739
9740 /* ... Otherwise out of the overflow area. */
9741
9742 t = ovf;
9743 if (size < UNITS_PER_LONG)
9744 t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG - size);
9745
9746 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
9747
9748 gimplify_assign (addr, t, pre_p);
9749
9750 t = fold_build_pointer_plus_hwi (t, size);
9751 gimplify_assign (ovf, t, pre_p);
9752
9753 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
9754
9755
9756 /* Increment register save count. */
9757
9758 u = build2 (PREINCREMENT_EXPR, TREE_TYPE (reg), reg,
9759 fold_convert (TREE_TYPE (reg), size_int (n_reg)));
9760 gimplify_and_add (u, pre_p);
9761
9762 if (indirect_p)
9763 {
9764 t = build_pointer_type_for_mode (build_pointer_type (type),
9765 ptr_mode, true);
9766 addr = fold_convert (t, addr);
9767 addr = build_va_arg_indirect_ref (addr);
9768 }
9769 else
9770 {
9771 t = build_pointer_type_for_mode (type, ptr_mode, true);
9772 addr = fold_convert (t, addr);
9773 }
9774
9775 return build_va_arg_indirect_ref (addr);
9776 }
9777
9778 /* Emit rtl for the tbegin or tbegin_retry (RETRY != NULL_RTX)
9779 expanders.
9780 DEST - Register location where CC will be stored.
9781 TDB - Pointer to a 256 byte area where to store the transaction.
9782 diagnostic block. NULL if TDB is not needed.
9783 RETRY - Retry count value. If non-NULL a retry loop for CC2
9784 is emitted
9785 CLOBBER_FPRS_P - If true clobbers for all FPRs are emitted as part
9786 of the tbegin instruction pattern. */
9787
9788 void
9789 s390_expand_tbegin (rtx dest, rtx tdb, rtx retry, bool clobber_fprs_p)
9790 {
9791 const int CC0 = 1 << 3;
9792 const int CC1 = 1 << 2;
9793 const int CC3 = 1 << 0;
9794 rtx abort_label = gen_label_rtx ();
9795 rtx leave_label = gen_label_rtx ();
9796 rtx retry_reg = gen_reg_rtx (SImode);
9797 rtx retry_label = NULL_RTX;
9798 rtx jump;
9799 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
9800
9801 if (retry != NULL_RTX)
9802 {
9803 emit_move_insn (retry_reg, retry);
9804 retry_label = gen_label_rtx ();
9805 emit_label (retry_label);
9806 }
9807
9808 if (clobber_fprs_p)
9809 emit_insn (gen_tbegin_1 (tdb,
9810 gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK)));
9811 else
9812 emit_insn (gen_tbegin_nofloat_1 (tdb,
9813 gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK)));
9814
9815 jump = s390_emit_jump (abort_label,
9816 gen_rtx_NE (VOIDmode,
9817 gen_rtx_REG (CCRAWmode, CC_REGNUM),
9818 gen_rtx_CONST_INT (VOIDmode, CC0)));
9819
9820 JUMP_LABEL (jump) = abort_label;
9821 LABEL_NUSES (abort_label) = 1;
9822 add_int_reg_note (jump, REG_BR_PROB, very_unlikely);
9823
9824 /* Initialize CC return value. */
9825 emit_move_insn (dest, const0_rtx);
9826
9827 s390_emit_jump (leave_label, NULL_RTX);
9828 LABEL_NUSES (leave_label) = 1;
9829 emit_barrier ();
9830
9831 /* Abort handler code. */
9832
9833 emit_label (abort_label);
9834 if (retry != NULL_RTX)
9835 {
9836 rtx count = gen_reg_rtx (SImode);
9837 jump = s390_emit_jump (leave_label,
9838 gen_rtx_EQ (VOIDmode,
9839 gen_rtx_REG (CCRAWmode, CC_REGNUM),
9840 gen_rtx_CONST_INT (VOIDmode, CC1 | CC3)));
9841 LABEL_NUSES (leave_label) = 2;
9842 add_int_reg_note (jump, REG_BR_PROB, very_unlikely);
9843
9844 /* CC2 - transient failure. Perform retry with ppa. */
9845 emit_move_insn (count, retry);
9846 emit_insn (gen_subsi3 (count, count, retry_reg));
9847 emit_insn (gen_tx_assist (count));
9848 jump = emit_jump_insn (gen_doloop_si64 (retry_label,
9849 retry_reg,
9850 retry_reg));
9851 JUMP_LABEL (jump) = retry_label;
9852 LABEL_NUSES (retry_label) = 1;
9853 }
9854
9855 emit_move_insn (dest, gen_rtx_UNSPEC (SImode,
9856 gen_rtvec (1, gen_rtx_REG (CCRAWmode,
9857 CC_REGNUM)),
9858 UNSPEC_CC_TO_INT));
9859 emit_label (leave_label);
9860 }
9861
9862 /* Builtins. */
9863
9864 enum s390_builtin
9865 {
9866 S390_BUILTIN_TBEGIN,
9867 S390_BUILTIN_TBEGIN_NOFLOAT,
9868 S390_BUILTIN_TBEGIN_RETRY,
9869 S390_BUILTIN_TBEGIN_RETRY_NOFLOAT,
9870 S390_BUILTIN_TBEGINC,
9871 S390_BUILTIN_TEND,
9872 S390_BUILTIN_TABORT,
9873 S390_BUILTIN_NON_TX_STORE,
9874 S390_BUILTIN_TX_NESTING_DEPTH,
9875 S390_BUILTIN_TX_ASSIST,
9876
9877 S390_BUILTIN_max
9878 };
9879
9880 static enum insn_code const code_for_builtin[S390_BUILTIN_max] = {
9881 CODE_FOR_tbegin,
9882 CODE_FOR_tbegin_nofloat,
9883 CODE_FOR_tbegin_retry,
9884 CODE_FOR_tbegin_retry_nofloat,
9885 CODE_FOR_tbeginc,
9886 CODE_FOR_tend,
9887 CODE_FOR_tabort,
9888 CODE_FOR_ntstg,
9889 CODE_FOR_etnd,
9890 CODE_FOR_tx_assist
9891 };
9892
9893 static void
9894 s390_init_builtins (void)
9895 {
9896 tree ftype, uint64_type;
9897
9898 /* void foo (void) */
9899 ftype = build_function_type_list (void_type_node, NULL_TREE);
9900 add_builtin_function ("__builtin_tbeginc", ftype, S390_BUILTIN_TBEGINC,
9901 BUILT_IN_MD, NULL, NULL_TREE);
9902
9903 /* void foo (int) */
9904 ftype = build_function_type_list (void_type_node, integer_type_node,
9905 NULL_TREE);
9906 add_builtin_function ("__builtin_tabort", ftype,
9907 S390_BUILTIN_TABORT, BUILT_IN_MD, NULL, NULL_TREE);
9908 add_builtin_function ("__builtin_tx_assist", ftype,
9909 S390_BUILTIN_TX_ASSIST, BUILT_IN_MD, NULL, NULL_TREE);
9910
9911 /* int foo (void *) */
9912 ftype = build_function_type_list (integer_type_node, ptr_type_node, NULL_TREE);
9913 add_builtin_function ("__builtin_tbegin", ftype, S390_BUILTIN_TBEGIN,
9914 BUILT_IN_MD, NULL, NULL_TREE);
9915 add_builtin_function ("__builtin_tbegin_nofloat", ftype,
9916 S390_BUILTIN_TBEGIN_NOFLOAT,
9917 BUILT_IN_MD, NULL, NULL_TREE);
9918
9919 /* int foo (void *, int) */
9920 ftype = build_function_type_list (integer_type_node, ptr_type_node,
9921 integer_type_node, NULL_TREE);
9922 add_builtin_function ("__builtin_tbegin_retry", ftype,
9923 S390_BUILTIN_TBEGIN_RETRY,
9924 BUILT_IN_MD,
9925 NULL, NULL_TREE);
9926 add_builtin_function ("__builtin_tbegin_retry_nofloat", ftype,
9927 S390_BUILTIN_TBEGIN_RETRY_NOFLOAT,
9928 BUILT_IN_MD,
9929 NULL, NULL_TREE);
9930
9931 /* int foo (void) */
9932 ftype = build_function_type_list (integer_type_node, NULL_TREE);
9933 add_builtin_function ("__builtin_tx_nesting_depth", ftype,
9934 S390_BUILTIN_TX_NESTING_DEPTH,
9935 BUILT_IN_MD, NULL, NULL_TREE);
9936 add_builtin_function ("__builtin_tend", ftype,
9937 S390_BUILTIN_TEND, BUILT_IN_MD, NULL, NULL_TREE);
9938
9939 /* void foo (uint64_t *, uint64_t) */
9940 if (TARGET_64BIT)
9941 uint64_type = long_unsigned_type_node;
9942 else
9943 uint64_type = long_long_unsigned_type_node;
9944
9945 ftype = build_function_type_list (void_type_node,
9946 build_pointer_type (uint64_type),
9947 uint64_type, NULL_TREE);
9948 add_builtin_function ("__builtin_non_tx_store", ftype,
9949 S390_BUILTIN_NON_TX_STORE,
9950 BUILT_IN_MD, NULL, NULL_TREE);
9951 }
9952
9953 /* Expand an expression EXP that calls a built-in function,
9954 with result going to TARGET if that's convenient
9955 (and in mode MODE if that's convenient).
9956 SUBTARGET may be used as the target for computing one of EXP's operands.
9957 IGNORE is nonzero if the value is to be ignored. */
9958
9959 static rtx
9960 s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
9961 enum machine_mode mode ATTRIBUTE_UNUSED,
9962 int ignore ATTRIBUTE_UNUSED)
9963 {
9964 #define MAX_ARGS 2
9965
9966 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
9967 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
9968 enum insn_code icode;
9969 rtx op[MAX_ARGS], pat;
9970 int arity;
9971 bool nonvoid;
9972 tree arg;
9973 call_expr_arg_iterator iter;
9974
9975 if (fcode >= S390_BUILTIN_max)
9976 internal_error ("bad builtin fcode");
9977 icode = code_for_builtin[fcode];
9978 if (icode == 0)
9979 internal_error ("bad builtin fcode");
9980
9981 if (!TARGET_HTM)
9982 error ("Transactional execution builtins not enabled (-mhtm)\n");
9983
9984 /* Set a flag in the machine specific cfun part in order to support
9985 saving/restoring of FPRs. */
9986 if (fcode == S390_BUILTIN_TBEGIN || fcode == S390_BUILTIN_TBEGIN_RETRY)
9987 cfun->machine->tbegin_p = true;
9988
9989 nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
9990
9991 arity = 0;
9992 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
9993 {
9994 const struct insn_operand_data *insn_op;
9995
9996 if (arg == error_mark_node)
9997 return NULL_RTX;
9998 if (arity >= MAX_ARGS)
9999 return NULL_RTX;
10000
10001 insn_op = &insn_data[icode].operand[arity + nonvoid];
10002
10003 op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
10004
10005 if (!(*insn_op->predicate) (op[arity], insn_op->mode))
10006 {
10007 if (insn_op->predicate == memory_operand)
10008 {
10009 /* Don't move a NULL pointer into a register. Otherwise
10010 we have to rely on combine being able to move it back
10011 in order to get an immediate 0 in the instruction. */
10012 if (op[arity] != const0_rtx)
10013 op[arity] = copy_to_mode_reg (Pmode, op[arity]);
10014 op[arity] = gen_rtx_MEM (insn_op->mode, op[arity]);
10015 }
10016 else
10017 op[arity] = copy_to_mode_reg (insn_op->mode, op[arity]);
10018 }
10019
10020 arity++;
10021 }
10022
10023 if (nonvoid)
10024 {
10025 enum machine_mode tmode = insn_data[icode].operand[0].mode;
10026 if (!target
10027 || GET_MODE (target) != tmode
10028 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
10029 target = gen_reg_rtx (tmode);
10030 }
10031
10032 switch (arity)
10033 {
10034 case 0:
10035 pat = GEN_FCN (icode) (target);
10036 break;
10037 case 1:
10038 if (nonvoid)
10039 pat = GEN_FCN (icode) (target, op[0]);
10040 else
10041 pat = GEN_FCN (icode) (op[0]);
10042 break;
10043 case 2:
10044 if (nonvoid)
10045 pat = GEN_FCN (icode) (target, op[0], op[1]);
10046 else
10047 pat = GEN_FCN (icode) (op[0], op[1]);
10048 break;
10049 default:
10050 gcc_unreachable ();
10051 }
10052 if (!pat)
10053 return NULL_RTX;
10054 emit_insn (pat);
10055
10056 if (nonvoid)
10057 return target;
10058 else
10059 return const0_rtx;
10060 }
10061
10062
10063 /* Output assembly code for the trampoline template to
10064 stdio stream FILE.
10065
10066 On S/390, we use gpr 1 internally in the trampoline code;
10067 gpr 0 is used to hold the static chain. */
10068
10069 static void
10070 s390_asm_trampoline_template (FILE *file)
10071 {
10072 rtx op[2];
10073 op[0] = gen_rtx_REG (Pmode, 0);
10074 op[1] = gen_rtx_REG (Pmode, 1);
10075
10076 if (TARGET_64BIT)
10077 {
10078 output_asm_insn ("basr\t%1,0", op); /* 2 byte */
10079 output_asm_insn ("lmg\t%0,%1,14(%1)", op); /* 6 byte */
10080 output_asm_insn ("br\t%1", op); /* 2 byte */
10081 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 10));
10082 }
10083 else
10084 {
10085 output_asm_insn ("basr\t%1,0", op); /* 2 byte */
10086 output_asm_insn ("lm\t%0,%1,6(%1)", op); /* 4 byte */
10087 output_asm_insn ("br\t%1", op); /* 2 byte */
10088 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 8));
10089 }
10090 }
10091
10092 /* Emit RTL insns to initialize the variable parts of a trampoline.
10093 FNADDR is an RTX for the address of the function's pure code.
10094 CXT is an RTX for the static chain value for the function. */
10095
10096 static void
10097 s390_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
10098 {
10099 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10100 rtx mem;
10101
10102 emit_block_move (m_tramp, assemble_trampoline_template (),
10103 GEN_INT (2 * UNITS_PER_LONG), BLOCK_OP_NORMAL);
10104
10105 mem = adjust_address (m_tramp, Pmode, 2 * UNITS_PER_LONG);
10106 emit_move_insn (mem, cxt);
10107 mem = adjust_address (m_tramp, Pmode, 3 * UNITS_PER_LONG);
10108 emit_move_insn (mem, fnaddr);
10109 }
10110
10111 /* Output assembler code to FILE to increment profiler label # LABELNO
10112 for profiling a function entry. */
10113
10114 void
10115 s390_function_profiler (FILE *file, int labelno)
10116 {
10117 rtx op[7];
10118
10119 char label[128];
10120 ASM_GENERATE_INTERNAL_LABEL (label, "LP", labelno);
10121
10122 fprintf (file, "# function profiler \n");
10123
10124 op[0] = gen_rtx_REG (Pmode, RETURN_REGNUM);
10125 op[1] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
10126 op[1] = gen_rtx_MEM (Pmode, plus_constant (Pmode, op[1], UNITS_PER_LONG));
10127
10128 op[2] = gen_rtx_REG (Pmode, 1);
10129 op[3] = gen_rtx_SYMBOL_REF (Pmode, label);
10130 SYMBOL_REF_FLAGS (op[3]) = SYMBOL_FLAG_LOCAL;
10131
10132 op[4] = gen_rtx_SYMBOL_REF (Pmode, "_mcount");
10133 if (flag_pic)
10134 {
10135 op[4] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[4]), UNSPEC_PLT);
10136 op[4] = gen_rtx_CONST (Pmode, op[4]);
10137 }
10138
10139 if (TARGET_64BIT)
10140 {
10141 output_asm_insn ("stg\t%0,%1", op);
10142 output_asm_insn ("larl\t%2,%3", op);
10143 output_asm_insn ("brasl\t%0,%4", op);
10144 output_asm_insn ("lg\t%0,%1", op);
10145 }
10146 else if (!flag_pic)
10147 {
10148 op[6] = gen_label_rtx ();
10149
10150 output_asm_insn ("st\t%0,%1", op);
10151 output_asm_insn ("bras\t%2,%l6", op);
10152 output_asm_insn (".long\t%4", op);
10153 output_asm_insn (".long\t%3", op);
10154 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[6]));
10155 output_asm_insn ("l\t%0,0(%2)", op);
10156 output_asm_insn ("l\t%2,4(%2)", op);
10157 output_asm_insn ("basr\t%0,%0", op);
10158 output_asm_insn ("l\t%0,%1", op);
10159 }
10160 else
10161 {
10162 op[5] = gen_label_rtx ();
10163 op[6] = gen_label_rtx ();
10164
10165 output_asm_insn ("st\t%0,%1", op);
10166 output_asm_insn ("bras\t%2,%l6", op);
10167 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[5]));
10168 output_asm_insn (".long\t%4-%l5", op);
10169 output_asm_insn (".long\t%3-%l5", op);
10170 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[6]));
10171 output_asm_insn ("lr\t%0,%2", op);
10172 output_asm_insn ("a\t%0,0(%2)", op);
10173 output_asm_insn ("a\t%2,4(%2)", op);
10174 output_asm_insn ("basr\t%0,%0", op);
10175 output_asm_insn ("l\t%0,%1", op);
10176 }
10177 }
10178
10179 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
10180 into its SYMBOL_REF_FLAGS. */
10181
10182 static void
10183 s390_encode_section_info (tree decl, rtx rtl, int first)
10184 {
10185 default_encode_section_info (decl, rtl, first);
10186
10187 if (TREE_CODE (decl) == VAR_DECL)
10188 {
10189 /* If a variable has a forced alignment to < 2 bytes, mark it
10190 with SYMBOL_FLAG_ALIGN1 to prevent it from being used as LARL
10191 operand. */
10192 if (DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 16)
10193 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
10194 if (!DECL_SIZE (decl)
10195 || !DECL_ALIGN (decl)
10196 || !host_integerp (DECL_SIZE (decl), 0)
10197 || (DECL_ALIGN (decl) <= 64
10198 && DECL_ALIGN (decl) != tree_low_cst (DECL_SIZE (decl), 0)))
10199 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_NOT_NATURALLY_ALIGNED;
10200 }
10201
10202 /* Literal pool references don't have a decl so they are handled
10203 differently here. We rely on the information in the MEM_ALIGN
10204 entry to decide upon natural alignment. */
10205 if (MEM_P (rtl)
10206 && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF
10207 && TREE_CONSTANT_POOL_ADDRESS_P (XEXP (rtl, 0))
10208 && (MEM_ALIGN (rtl) == 0
10209 || GET_MODE_BITSIZE (GET_MODE (rtl)) == 0
10210 || MEM_ALIGN (rtl) < GET_MODE_BITSIZE (GET_MODE (rtl))))
10211 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_NOT_NATURALLY_ALIGNED;
10212 }
10213
10214 /* Output thunk to FILE that implements a C++ virtual function call (with
10215 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
10216 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
10217 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
10218 relative to the resulting this pointer. */
10219
10220 static void
10221 s390_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
10222 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10223 tree function)
10224 {
10225 rtx op[10];
10226 int nonlocal = 0;
10227
10228 /* Make sure unwind info is emitted for the thunk if needed. */
10229 final_start_function (emit_barrier (), file, 1);
10230
10231 /* Operand 0 is the target function. */
10232 op[0] = XEXP (DECL_RTL (function), 0);
10233 if (flag_pic && !SYMBOL_REF_LOCAL_P (op[0]))
10234 {
10235 nonlocal = 1;
10236 op[0] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[0]),
10237 TARGET_64BIT ? UNSPEC_PLT : UNSPEC_GOT);
10238 op[0] = gen_rtx_CONST (Pmode, op[0]);
10239 }
10240
10241 /* Operand 1 is the 'this' pointer. */
10242 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
10243 op[1] = gen_rtx_REG (Pmode, 3);
10244 else
10245 op[1] = gen_rtx_REG (Pmode, 2);
10246
10247 /* Operand 2 is the delta. */
10248 op[2] = GEN_INT (delta);
10249
10250 /* Operand 3 is the vcall_offset. */
10251 op[3] = GEN_INT (vcall_offset);
10252
10253 /* Operand 4 is the temporary register. */
10254 op[4] = gen_rtx_REG (Pmode, 1);
10255
10256 /* Operands 5 to 8 can be used as labels. */
10257 op[5] = NULL_RTX;
10258 op[6] = NULL_RTX;
10259 op[7] = NULL_RTX;
10260 op[8] = NULL_RTX;
10261
10262 /* Operand 9 can be used for temporary register. */
10263 op[9] = NULL_RTX;
10264
10265 /* Generate code. */
10266 if (TARGET_64BIT)
10267 {
10268 /* Setup literal pool pointer if required. */
10269 if ((!DISP_IN_RANGE (delta)
10270 && !CONST_OK_FOR_K (delta)
10271 && !CONST_OK_FOR_Os (delta))
10272 || (!DISP_IN_RANGE (vcall_offset)
10273 && !CONST_OK_FOR_K (vcall_offset)
10274 && !CONST_OK_FOR_Os (vcall_offset)))
10275 {
10276 op[5] = gen_label_rtx ();
10277 output_asm_insn ("larl\t%4,%5", op);
10278 }
10279
10280 /* Add DELTA to this pointer. */
10281 if (delta)
10282 {
10283 if (CONST_OK_FOR_J (delta))
10284 output_asm_insn ("la\t%1,%2(%1)", op);
10285 else if (DISP_IN_RANGE (delta))
10286 output_asm_insn ("lay\t%1,%2(%1)", op);
10287 else if (CONST_OK_FOR_K (delta))
10288 output_asm_insn ("aghi\t%1,%2", op);
10289 else if (CONST_OK_FOR_Os (delta))
10290 output_asm_insn ("agfi\t%1,%2", op);
10291 else
10292 {
10293 op[6] = gen_label_rtx ();
10294 output_asm_insn ("agf\t%1,%6-%5(%4)", op);
10295 }
10296 }
10297
10298 /* Perform vcall adjustment. */
10299 if (vcall_offset)
10300 {
10301 if (DISP_IN_RANGE (vcall_offset))
10302 {
10303 output_asm_insn ("lg\t%4,0(%1)", op);
10304 output_asm_insn ("ag\t%1,%3(%4)", op);
10305 }
10306 else if (CONST_OK_FOR_K (vcall_offset))
10307 {
10308 output_asm_insn ("lghi\t%4,%3", op);
10309 output_asm_insn ("ag\t%4,0(%1)", op);
10310 output_asm_insn ("ag\t%1,0(%4)", op);
10311 }
10312 else if (CONST_OK_FOR_Os (vcall_offset))
10313 {
10314 output_asm_insn ("lgfi\t%4,%3", op);
10315 output_asm_insn ("ag\t%4,0(%1)", op);
10316 output_asm_insn ("ag\t%1,0(%4)", op);
10317 }
10318 else
10319 {
10320 op[7] = gen_label_rtx ();
10321 output_asm_insn ("llgf\t%4,%7-%5(%4)", op);
10322 output_asm_insn ("ag\t%4,0(%1)", op);
10323 output_asm_insn ("ag\t%1,0(%4)", op);
10324 }
10325 }
10326
10327 /* Jump to target. */
10328 output_asm_insn ("jg\t%0", op);
10329
10330 /* Output literal pool if required. */
10331 if (op[5])
10332 {
10333 output_asm_insn (".align\t4", op);
10334 targetm.asm_out.internal_label (file, "L",
10335 CODE_LABEL_NUMBER (op[5]));
10336 }
10337 if (op[6])
10338 {
10339 targetm.asm_out.internal_label (file, "L",
10340 CODE_LABEL_NUMBER (op[6]));
10341 output_asm_insn (".long\t%2", op);
10342 }
10343 if (op[7])
10344 {
10345 targetm.asm_out.internal_label (file, "L",
10346 CODE_LABEL_NUMBER (op[7]));
10347 output_asm_insn (".long\t%3", op);
10348 }
10349 }
10350 else
10351 {
10352 /* Setup base pointer if required. */
10353 if (!vcall_offset
10354 || (!DISP_IN_RANGE (delta)
10355 && !CONST_OK_FOR_K (delta)
10356 && !CONST_OK_FOR_Os (delta))
10357 || (!DISP_IN_RANGE (delta)
10358 && !CONST_OK_FOR_K (vcall_offset)
10359 && !CONST_OK_FOR_Os (vcall_offset)))
10360 {
10361 op[5] = gen_label_rtx ();
10362 output_asm_insn ("basr\t%4,0", op);
10363 targetm.asm_out.internal_label (file, "L",
10364 CODE_LABEL_NUMBER (op[5]));
10365 }
10366
10367 /* Add DELTA to this pointer. */
10368 if (delta)
10369 {
10370 if (CONST_OK_FOR_J (delta))
10371 output_asm_insn ("la\t%1,%2(%1)", op);
10372 else if (DISP_IN_RANGE (delta))
10373 output_asm_insn ("lay\t%1,%2(%1)", op);
10374 else if (CONST_OK_FOR_K (delta))
10375 output_asm_insn ("ahi\t%1,%2", op);
10376 else if (CONST_OK_FOR_Os (delta))
10377 output_asm_insn ("afi\t%1,%2", op);
10378 else
10379 {
10380 op[6] = gen_label_rtx ();
10381 output_asm_insn ("a\t%1,%6-%5(%4)", op);
10382 }
10383 }
10384
10385 /* Perform vcall adjustment. */
10386 if (vcall_offset)
10387 {
10388 if (CONST_OK_FOR_J (vcall_offset))
10389 {
10390 output_asm_insn ("l\t%4,0(%1)", op);
10391 output_asm_insn ("a\t%1,%3(%4)", op);
10392 }
10393 else if (DISP_IN_RANGE (vcall_offset))
10394 {
10395 output_asm_insn ("l\t%4,0(%1)", op);
10396 output_asm_insn ("ay\t%1,%3(%4)", op);
10397 }
10398 else if (CONST_OK_FOR_K (vcall_offset))
10399 {
10400 output_asm_insn ("lhi\t%4,%3", op);
10401 output_asm_insn ("a\t%4,0(%1)", op);
10402 output_asm_insn ("a\t%1,0(%4)", op);
10403 }
10404 else if (CONST_OK_FOR_Os (vcall_offset))
10405 {
10406 output_asm_insn ("iilf\t%4,%3", op);
10407 output_asm_insn ("a\t%4,0(%1)", op);
10408 output_asm_insn ("a\t%1,0(%4)", op);
10409 }
10410 else
10411 {
10412 op[7] = gen_label_rtx ();
10413 output_asm_insn ("l\t%4,%7-%5(%4)", op);
10414 output_asm_insn ("a\t%4,0(%1)", op);
10415 output_asm_insn ("a\t%1,0(%4)", op);
10416 }
10417
10418 /* We had to clobber the base pointer register.
10419 Re-setup the base pointer (with a different base). */
10420 op[5] = gen_label_rtx ();
10421 output_asm_insn ("basr\t%4,0", op);
10422 targetm.asm_out.internal_label (file, "L",
10423 CODE_LABEL_NUMBER (op[5]));
10424 }
10425
10426 /* Jump to target. */
10427 op[8] = gen_label_rtx ();
10428
10429 if (!flag_pic)
10430 output_asm_insn ("l\t%4,%8-%5(%4)", op);
10431 else if (!nonlocal)
10432 output_asm_insn ("a\t%4,%8-%5(%4)", op);
10433 /* We cannot call through .plt, since .plt requires %r12 loaded. */
10434 else if (flag_pic == 1)
10435 {
10436 output_asm_insn ("a\t%4,%8-%5(%4)", op);
10437 output_asm_insn ("l\t%4,%0(%4)", op);
10438 }
10439 else if (flag_pic == 2)
10440 {
10441 op[9] = gen_rtx_REG (Pmode, 0);
10442 output_asm_insn ("l\t%9,%8-4-%5(%4)", op);
10443 output_asm_insn ("a\t%4,%8-%5(%4)", op);
10444 output_asm_insn ("ar\t%4,%9", op);
10445 output_asm_insn ("l\t%4,0(%4)", op);
10446 }
10447
10448 output_asm_insn ("br\t%4", op);
10449
10450 /* Output literal pool. */
10451 output_asm_insn (".align\t4", op);
10452
10453 if (nonlocal && flag_pic == 2)
10454 output_asm_insn (".long\t%0", op);
10455 if (nonlocal)
10456 {
10457 op[0] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
10458 SYMBOL_REF_FLAGS (op[0]) = SYMBOL_FLAG_LOCAL;
10459 }
10460
10461 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[8]));
10462 if (!flag_pic)
10463 output_asm_insn (".long\t%0", op);
10464 else
10465 output_asm_insn (".long\t%0-%5", op);
10466
10467 if (op[6])
10468 {
10469 targetm.asm_out.internal_label (file, "L",
10470 CODE_LABEL_NUMBER (op[6]));
10471 output_asm_insn (".long\t%2", op);
10472 }
10473 if (op[7])
10474 {
10475 targetm.asm_out.internal_label (file, "L",
10476 CODE_LABEL_NUMBER (op[7]));
10477 output_asm_insn (".long\t%3", op);
10478 }
10479 }
10480 final_end_function ();
10481 }
10482
10483 static bool
10484 s390_valid_pointer_mode (enum machine_mode mode)
10485 {
10486 return (mode == SImode || (TARGET_64BIT && mode == DImode));
10487 }
10488
10489 /* Checks whether the given CALL_EXPR would use a caller
10490 saved register. This is used to decide whether sibling call
10491 optimization could be performed on the respective function
10492 call. */
10493
10494 static bool
10495 s390_call_saved_register_used (tree call_expr)
10496 {
10497 CUMULATIVE_ARGS cum_v;
10498 cumulative_args_t cum;
10499 tree parameter;
10500 enum machine_mode mode;
10501 tree type;
10502 rtx parm_rtx;
10503 int reg, i;
10504
10505 INIT_CUMULATIVE_ARGS (cum_v, NULL, NULL, 0, 0);
10506 cum = pack_cumulative_args (&cum_v);
10507
10508 for (i = 0; i < call_expr_nargs (call_expr); i++)
10509 {
10510 parameter = CALL_EXPR_ARG (call_expr, i);
10511 gcc_assert (parameter);
10512
10513 /* For an undeclared variable passed as parameter we will get
10514 an ERROR_MARK node here. */
10515 if (TREE_CODE (parameter) == ERROR_MARK)
10516 return true;
10517
10518 type = TREE_TYPE (parameter);
10519 gcc_assert (type);
10520
10521 mode = TYPE_MODE (type);
10522 gcc_assert (mode);
10523
10524 if (pass_by_reference (&cum_v, mode, type, true))
10525 {
10526 mode = Pmode;
10527 type = build_pointer_type (type);
10528 }
10529
10530 parm_rtx = s390_function_arg (cum, mode, type, 0);
10531
10532 s390_function_arg_advance (cum, mode, type, 0);
10533
10534 if (!parm_rtx)
10535 continue;
10536
10537 if (REG_P (parm_rtx))
10538 {
10539 for (reg = 0;
10540 reg < HARD_REGNO_NREGS (REGNO (parm_rtx), GET_MODE (parm_rtx));
10541 reg++)
10542 if (!call_used_regs[reg + REGNO (parm_rtx)])
10543 return true;
10544 }
10545
10546 if (GET_CODE (parm_rtx) == PARALLEL)
10547 {
10548 int i;
10549
10550 for (i = 0; i < XVECLEN (parm_rtx, 0); i++)
10551 {
10552 rtx r = XEXP (XVECEXP (parm_rtx, 0, i), 0);
10553
10554 gcc_assert (REG_P (r));
10555
10556 for (reg = 0;
10557 reg < HARD_REGNO_NREGS (REGNO (r), GET_MODE (r));
10558 reg++)
10559 if (!call_used_regs[reg + REGNO (r)])
10560 return true;
10561 }
10562 }
10563
10564 }
10565 return false;
10566 }
10567
10568 /* Return true if the given call expression can be
10569 turned into a sibling call.
10570 DECL holds the declaration of the function to be called whereas
10571 EXP is the call expression itself. */
10572
10573 static bool
10574 s390_function_ok_for_sibcall (tree decl, tree exp)
10575 {
10576 /* The TPF epilogue uses register 1. */
10577 if (TARGET_TPF_PROFILING)
10578 return false;
10579
10580 /* The 31 bit PLT code uses register 12 (GOT pointer - caller saved)
10581 which would have to be restored before the sibcall. */
10582 if (!TARGET_64BIT && flag_pic && decl && !targetm.binds_local_p (decl))
10583 return false;
10584
10585 /* Register 6 on s390 is available as an argument register but unfortunately
10586 "caller saved". This makes functions needing this register for arguments
10587 not suitable for sibcalls. */
10588 return !s390_call_saved_register_used (exp);
10589 }
10590
10591 /* Return the fixed registers used for condition codes. */
10592
10593 static bool
10594 s390_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
10595 {
10596 *p1 = CC_REGNUM;
10597 *p2 = INVALID_REGNUM;
10598
10599 return true;
10600 }
10601
10602 /* This function is used by the call expanders of the machine description.
10603 It emits the call insn itself together with the necessary operations
10604 to adjust the target address and returns the emitted insn.
10605 ADDR_LOCATION is the target address rtx
10606 TLS_CALL the location of the thread-local symbol
10607 RESULT_REG the register where the result of the call should be stored
10608 RETADDR_REG the register where the return address should be stored
10609 If this parameter is NULL_RTX the call is considered
10610 to be a sibling call. */
10611
10612 rtx
10613 s390_emit_call (rtx addr_location, rtx tls_call, rtx result_reg,
10614 rtx retaddr_reg)
10615 {
10616 bool plt_call = false;
10617 rtx insn;
10618 rtx call;
10619 rtx clobber;
10620 rtvec vec;
10621
10622 /* Direct function calls need special treatment. */
10623 if (GET_CODE (addr_location) == SYMBOL_REF)
10624 {
10625 /* When calling a global routine in PIC mode, we must
10626 replace the symbol itself with the PLT stub. */
10627 if (flag_pic && !SYMBOL_REF_LOCAL_P (addr_location))
10628 {
10629 if (retaddr_reg != NULL_RTX)
10630 {
10631 addr_location = gen_rtx_UNSPEC (Pmode,
10632 gen_rtvec (1, addr_location),
10633 UNSPEC_PLT);
10634 addr_location = gen_rtx_CONST (Pmode, addr_location);
10635 plt_call = true;
10636 }
10637 else
10638 /* For -fpic code the PLT entries might use r12 which is
10639 call-saved. Therefore we cannot do a sibcall when
10640 calling directly using a symbol ref. When reaching
10641 this point we decided (in s390_function_ok_for_sibcall)
10642 to do a sibcall for a function pointer but one of the
10643 optimizers was able to get rid of the function pointer
10644 by propagating the symbol ref into the call. This
10645 optimization is illegal for S/390 so we turn the direct
10646 call into a indirect call again. */
10647 addr_location = force_reg (Pmode, addr_location);
10648 }
10649
10650 /* Unless we can use the bras(l) insn, force the
10651 routine address into a register. */
10652 if (!TARGET_SMALL_EXEC && !TARGET_CPU_ZARCH)
10653 {
10654 if (flag_pic)
10655 addr_location = legitimize_pic_address (addr_location, 0);
10656 else
10657 addr_location = force_reg (Pmode, addr_location);
10658 }
10659 }
10660
10661 /* If it is already an indirect call or the code above moved the
10662 SYMBOL_REF to somewhere else make sure the address can be found in
10663 register 1. */
10664 if (retaddr_reg == NULL_RTX
10665 && GET_CODE (addr_location) != SYMBOL_REF
10666 && !plt_call)
10667 {
10668 emit_move_insn (gen_rtx_REG (Pmode, SIBCALL_REGNUM), addr_location);
10669 addr_location = gen_rtx_REG (Pmode, SIBCALL_REGNUM);
10670 }
10671
10672 addr_location = gen_rtx_MEM (QImode, addr_location);
10673 call = gen_rtx_CALL (VOIDmode, addr_location, const0_rtx);
10674
10675 if (result_reg != NULL_RTX)
10676 call = gen_rtx_SET (VOIDmode, result_reg, call);
10677
10678 if (retaddr_reg != NULL_RTX)
10679 {
10680 clobber = gen_rtx_CLOBBER (VOIDmode, retaddr_reg);
10681
10682 if (tls_call != NULL_RTX)
10683 vec = gen_rtvec (3, call, clobber,
10684 gen_rtx_USE (VOIDmode, tls_call));
10685 else
10686 vec = gen_rtvec (2, call, clobber);
10687
10688 call = gen_rtx_PARALLEL (VOIDmode, vec);
10689 }
10690
10691 insn = emit_call_insn (call);
10692
10693 /* 31-bit PLT stubs and tls calls use the GOT register implicitly. */
10694 if ((!TARGET_64BIT && plt_call) || tls_call != NULL_RTX)
10695 {
10696 /* s390_function_ok_for_sibcall should
10697 have denied sibcalls in this case. */
10698 gcc_assert (retaddr_reg != NULL_RTX);
10699 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, 12));
10700 }
10701 return insn;
10702 }
10703
10704 /* Implement TARGET_CONDITIONAL_REGISTER_USAGE. */
10705
10706 static void
10707 s390_conditional_register_usage (void)
10708 {
10709 int i;
10710
10711 if (flag_pic)
10712 {
10713 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
10714 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
10715 }
10716 if (TARGET_CPU_ZARCH)
10717 {
10718 fixed_regs[BASE_REGNUM] = 0;
10719 call_used_regs[BASE_REGNUM] = 0;
10720 fixed_regs[RETURN_REGNUM] = 0;
10721 call_used_regs[RETURN_REGNUM] = 0;
10722 }
10723 if (TARGET_64BIT)
10724 {
10725 for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
10726 call_used_regs[i] = call_really_used_regs[i] = 0;
10727 }
10728 else
10729 {
10730 call_used_regs[FPR4_REGNUM] = call_really_used_regs[FPR4_REGNUM] = 0;
10731 call_used_regs[FPR6_REGNUM] = call_really_used_regs[FPR6_REGNUM] = 0;
10732 }
10733
10734 if (TARGET_SOFT_FLOAT)
10735 {
10736 for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
10737 call_used_regs[i] = fixed_regs[i] = 1;
10738 }
10739 }
10740
10741 /* Corresponding function to eh_return expander. */
10742
10743 static GTY(()) rtx s390_tpf_eh_return_symbol;
10744 void
10745 s390_emit_tpf_eh_return (rtx target)
10746 {
10747 rtx insn, reg;
10748
10749 if (!s390_tpf_eh_return_symbol)
10750 s390_tpf_eh_return_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tpf_eh_return");
10751
10752 reg = gen_rtx_REG (Pmode, 2);
10753
10754 emit_move_insn (reg, target);
10755 insn = s390_emit_call (s390_tpf_eh_return_symbol, NULL_RTX, reg,
10756 gen_rtx_REG (Pmode, RETURN_REGNUM));
10757 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), reg);
10758
10759 emit_move_insn (EH_RETURN_HANDLER_RTX, reg);
10760 }
10761
10762 /* Rework the prologue/epilogue to avoid saving/restoring
10763 registers unnecessarily. */
10764
10765 static void
10766 s390_optimize_prologue (void)
10767 {
10768 rtx insn, new_insn, next_insn;
10769
10770 /* Do a final recompute of the frame-related data. */
10771 s390_optimize_register_info ();
10772
10773 /* If all special registers are in fact used, there's nothing we
10774 can do, so no point in walking the insn list. */
10775
10776 if (cfun_frame_layout.first_save_gpr <= BASE_REGNUM
10777 && cfun_frame_layout.last_save_gpr >= BASE_REGNUM
10778 && (TARGET_CPU_ZARCH
10779 || (cfun_frame_layout.first_save_gpr <= RETURN_REGNUM
10780 && cfun_frame_layout.last_save_gpr >= RETURN_REGNUM)))
10781 return;
10782
10783 /* Search for prologue/epilogue insns and replace them. */
10784
10785 for (insn = get_insns (); insn; insn = next_insn)
10786 {
10787 int first, last, off;
10788 rtx set, base, offset;
10789 rtx pat;
10790
10791 next_insn = NEXT_INSN (insn);
10792
10793 if (! NONJUMP_INSN_P (insn) || ! RTX_FRAME_RELATED_P (insn))
10794 continue;
10795
10796 pat = PATTERN (insn);
10797
10798 /* Remove ldgr/lgdr instructions used for saving and restore
10799 GPRs if possible. */
10800 if (TARGET_Z10
10801 && GET_CODE (pat) == SET
10802 && GET_MODE (SET_SRC (pat)) == DImode
10803 && REG_P (SET_SRC (pat))
10804 && REG_P (SET_DEST (pat)))
10805 {
10806 int src_regno = REGNO (SET_SRC (pat));
10807 int dest_regno = REGNO (SET_DEST (pat));
10808 int gpr_regno;
10809 int fpr_regno;
10810
10811 if (!((GENERAL_REGNO_P (src_regno) && FP_REGNO_P (dest_regno))
10812 || (FP_REGNO_P (src_regno) && GENERAL_REGNO_P (dest_regno))))
10813 continue;
10814
10815 gpr_regno = GENERAL_REGNO_P (src_regno) ? src_regno : dest_regno;
10816 fpr_regno = FP_REGNO_P (src_regno) ? src_regno : dest_regno;
10817
10818 /* GPR must be call-saved, FPR must be call-clobbered. */
10819 if (!call_really_used_regs[fpr_regno]
10820 || call_really_used_regs[gpr_regno])
10821 continue;
10822
10823 /* For restores we have to revert the frame related flag
10824 since no debug info is supposed to be generated for
10825 these. */
10826 if (dest_regno == gpr_regno)
10827 RTX_FRAME_RELATED_P (insn) = 0;
10828
10829 /* It must not happen that what we once saved in an FPR now
10830 needs a stack slot. */
10831 gcc_assert (cfun_gpr_save_slot (gpr_regno) != -1);
10832
10833 if (cfun_gpr_save_slot (gpr_regno) == 0)
10834 {
10835 remove_insn (insn);
10836 continue;
10837 }
10838 }
10839
10840 if (GET_CODE (pat) == PARALLEL
10841 && store_multiple_operation (pat, VOIDmode))
10842 {
10843 set = XVECEXP (pat, 0, 0);
10844 first = REGNO (SET_SRC (set));
10845 last = first + XVECLEN (pat, 0) - 1;
10846 offset = const0_rtx;
10847 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
10848 off = INTVAL (offset);
10849
10850 if (GET_CODE (base) != REG || off < 0)
10851 continue;
10852 if (cfun_frame_layout.first_save_gpr != -1
10853 && (cfun_frame_layout.first_save_gpr < first
10854 || cfun_frame_layout.last_save_gpr > last))
10855 continue;
10856 if (REGNO (base) != STACK_POINTER_REGNUM
10857 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
10858 continue;
10859 if (first > BASE_REGNUM || last < BASE_REGNUM)
10860 continue;
10861
10862 if (cfun_frame_layout.first_save_gpr != -1)
10863 {
10864 new_insn = save_gprs (base,
10865 off + (cfun_frame_layout.first_save_gpr
10866 - first) * UNITS_PER_LONG,
10867 cfun_frame_layout.first_save_gpr,
10868 cfun_frame_layout.last_save_gpr);
10869 new_insn = emit_insn_before (new_insn, insn);
10870 INSN_ADDRESSES_NEW (new_insn, -1);
10871 }
10872
10873 remove_insn (insn);
10874 continue;
10875 }
10876
10877 if (cfun_frame_layout.first_save_gpr == -1
10878 && GET_CODE (pat) == SET
10879 && GENERAL_REG_P (SET_SRC (pat))
10880 && GET_CODE (SET_DEST (pat)) == MEM)
10881 {
10882 set = pat;
10883 first = REGNO (SET_SRC (set));
10884 offset = const0_rtx;
10885 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
10886 off = INTVAL (offset);
10887
10888 if (GET_CODE (base) != REG || off < 0)
10889 continue;
10890 if (REGNO (base) != STACK_POINTER_REGNUM
10891 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
10892 continue;
10893
10894 remove_insn (insn);
10895 continue;
10896 }
10897
10898 if (GET_CODE (pat) == PARALLEL
10899 && load_multiple_operation (pat, VOIDmode))
10900 {
10901 set = XVECEXP (pat, 0, 0);
10902 first = REGNO (SET_DEST (set));
10903 last = first + XVECLEN (pat, 0) - 1;
10904 offset = const0_rtx;
10905 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
10906 off = INTVAL (offset);
10907
10908 if (GET_CODE (base) != REG || off < 0)
10909 continue;
10910
10911 RTX_FRAME_RELATED_P (insn) = 0;
10912
10913 if (cfun_frame_layout.first_restore_gpr != -1
10914 && (cfun_frame_layout.first_restore_gpr < first
10915 || cfun_frame_layout.last_restore_gpr > last))
10916 continue;
10917 if (REGNO (base) != STACK_POINTER_REGNUM
10918 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
10919 continue;
10920 if (first > BASE_REGNUM || last < BASE_REGNUM)
10921 continue;
10922
10923 if (cfun_frame_layout.first_restore_gpr != -1)
10924 {
10925 new_insn = restore_gprs (base,
10926 off + (cfun_frame_layout.first_restore_gpr
10927 - first) * UNITS_PER_LONG,
10928 cfun_frame_layout.first_restore_gpr,
10929 cfun_frame_layout.last_restore_gpr);
10930 RTX_FRAME_RELATED_P (new_insn) = 0;
10931 new_insn = emit_insn_before (new_insn, insn);
10932 INSN_ADDRESSES_NEW (new_insn, -1);
10933 }
10934
10935 remove_insn (insn);
10936 continue;
10937 }
10938
10939 if (cfun_frame_layout.first_restore_gpr == -1
10940 && GET_CODE (pat) == SET
10941 && GENERAL_REG_P (SET_DEST (pat))
10942 && GET_CODE (SET_SRC (pat)) == MEM)
10943 {
10944 set = pat;
10945 first = REGNO (SET_DEST (set));
10946 offset = const0_rtx;
10947 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
10948 off = INTVAL (offset);
10949
10950 if (GET_CODE (base) != REG || off < 0)
10951 continue;
10952
10953 RTX_FRAME_RELATED_P (insn) = 0;
10954
10955 if (REGNO (base) != STACK_POINTER_REGNUM
10956 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
10957 continue;
10958
10959 remove_insn (insn);
10960 continue;
10961 }
10962 }
10963 }
10964
10965 /* On z10 and later the dynamic branch prediction must see the
10966 backward jump within a certain windows. If not it falls back to
10967 the static prediction. This function rearranges the loop backward
10968 branch in a way which makes the static prediction always correct.
10969 The function returns true if it added an instruction. */
10970 static bool
10971 s390_fix_long_loop_prediction (rtx insn)
10972 {
10973 rtx set = single_set (insn);
10974 rtx code_label, label_ref, new_label;
10975 rtx uncond_jump;
10976 rtx cur_insn;
10977 rtx tmp;
10978 int distance;
10979
10980 /* This will exclude branch on count and branch on index patterns
10981 since these are correctly statically predicted. */
10982 if (!set
10983 || SET_DEST (set) != pc_rtx
10984 || GET_CODE (SET_SRC(set)) != IF_THEN_ELSE)
10985 return false;
10986
10987 label_ref = (GET_CODE (XEXP (SET_SRC (set), 1)) == LABEL_REF ?
10988 XEXP (SET_SRC (set), 1) : XEXP (SET_SRC (set), 2));
10989
10990 gcc_assert (GET_CODE (label_ref) == LABEL_REF);
10991
10992 code_label = XEXP (label_ref, 0);
10993
10994 if (INSN_ADDRESSES (INSN_UID (code_label)) == -1
10995 || INSN_ADDRESSES (INSN_UID (insn)) == -1
10996 || (INSN_ADDRESSES (INSN_UID (insn))
10997 - INSN_ADDRESSES (INSN_UID (code_label)) < PREDICT_DISTANCE))
10998 return false;
10999
11000 for (distance = 0, cur_insn = PREV_INSN (insn);
11001 distance < PREDICT_DISTANCE - 6;
11002 distance += get_attr_length (cur_insn), cur_insn = PREV_INSN (cur_insn))
11003 if (!cur_insn || JUMP_P (cur_insn) || LABEL_P (cur_insn))
11004 return false;
11005
11006 new_label = gen_label_rtx ();
11007 uncond_jump = emit_jump_insn_after (
11008 gen_rtx_SET (VOIDmode, pc_rtx,
11009 gen_rtx_LABEL_REF (VOIDmode, code_label)),
11010 insn);
11011 emit_label_after (new_label, uncond_jump);
11012
11013 tmp = XEXP (SET_SRC (set), 1);
11014 XEXP (SET_SRC (set), 1) = XEXP (SET_SRC (set), 2);
11015 XEXP (SET_SRC (set), 2) = tmp;
11016 INSN_CODE (insn) = -1;
11017
11018 XEXP (label_ref, 0) = new_label;
11019 JUMP_LABEL (insn) = new_label;
11020 JUMP_LABEL (uncond_jump) = code_label;
11021
11022 return true;
11023 }
11024
11025 /* Returns 1 if INSN reads the value of REG for purposes not related
11026 to addressing of memory, and 0 otherwise. */
11027 static int
11028 s390_non_addr_reg_read_p (rtx reg, rtx insn)
11029 {
11030 return reg_referenced_p (reg, PATTERN (insn))
11031 && !reg_used_in_mem_p (REGNO (reg), PATTERN (insn));
11032 }
11033
11034 /* Starting from INSN find_cond_jump looks downwards in the insn
11035 stream for a single jump insn which is the last user of the
11036 condition code set in INSN. */
11037 static rtx
11038 find_cond_jump (rtx insn)
11039 {
11040 for (; insn; insn = NEXT_INSN (insn))
11041 {
11042 rtx ite, cc;
11043
11044 if (LABEL_P (insn))
11045 break;
11046
11047 if (!JUMP_P (insn))
11048 {
11049 if (reg_mentioned_p (gen_rtx_REG (CCmode, CC_REGNUM), insn))
11050 break;
11051 continue;
11052 }
11053
11054 /* This will be triggered by a return. */
11055 if (GET_CODE (PATTERN (insn)) != SET)
11056 break;
11057
11058 gcc_assert (SET_DEST (PATTERN (insn)) == pc_rtx);
11059 ite = SET_SRC (PATTERN (insn));
11060
11061 if (GET_CODE (ite) != IF_THEN_ELSE)
11062 break;
11063
11064 cc = XEXP (XEXP (ite, 0), 0);
11065 if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc)))
11066 break;
11067
11068 if (find_reg_note (insn, REG_DEAD, cc))
11069 return insn;
11070 break;
11071 }
11072
11073 return NULL_RTX;
11074 }
11075
11076 /* Swap the condition in COND and the operands in OP0 and OP1 so that
11077 the semantics does not change. If NULL_RTX is passed as COND the
11078 function tries to find the conditional jump starting with INSN. */
11079 static void
11080 s390_swap_cmp (rtx cond, rtx *op0, rtx *op1, rtx insn)
11081 {
11082 rtx tmp = *op0;
11083
11084 if (cond == NULL_RTX)
11085 {
11086 rtx jump = find_cond_jump (NEXT_INSN (insn));
11087 jump = jump ? single_set (jump) : NULL_RTX;
11088
11089 if (jump == NULL_RTX)
11090 return;
11091
11092 cond = XEXP (XEXP (jump, 1), 0);
11093 }
11094
11095 *op0 = *op1;
11096 *op1 = tmp;
11097 PUT_CODE (cond, swap_condition (GET_CODE (cond)));
11098 }
11099
11100 /* On z10, instructions of the compare-and-branch family have the
11101 property to access the register occurring as second operand with
11102 its bits complemented. If such a compare is grouped with a second
11103 instruction that accesses the same register non-complemented, and
11104 if that register's value is delivered via a bypass, then the
11105 pipeline recycles, thereby causing significant performance decline.
11106 This function locates such situations and exchanges the two
11107 operands of the compare. The function return true whenever it
11108 added an insn. */
11109 static bool
11110 s390_z10_optimize_cmp (rtx insn)
11111 {
11112 rtx prev_insn, next_insn;
11113 bool insn_added_p = false;
11114 rtx cond, *op0, *op1;
11115
11116 if (GET_CODE (PATTERN (insn)) == PARALLEL)
11117 {
11118 /* Handle compare and branch and branch on count
11119 instructions. */
11120 rtx pattern = single_set (insn);
11121
11122 if (!pattern
11123 || SET_DEST (pattern) != pc_rtx
11124 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE)
11125 return false;
11126
11127 cond = XEXP (SET_SRC (pattern), 0);
11128 op0 = &XEXP (cond, 0);
11129 op1 = &XEXP (cond, 1);
11130 }
11131 else if (GET_CODE (PATTERN (insn)) == SET)
11132 {
11133 rtx src, dest;
11134
11135 /* Handle normal compare instructions. */
11136 src = SET_SRC (PATTERN (insn));
11137 dest = SET_DEST (PATTERN (insn));
11138
11139 if (!REG_P (dest)
11140 || !CC_REGNO_P (REGNO (dest))
11141 || GET_CODE (src) != COMPARE)
11142 return false;
11143
11144 /* s390_swap_cmp will try to find the conditional
11145 jump when passing NULL_RTX as condition. */
11146 cond = NULL_RTX;
11147 op0 = &XEXP (src, 0);
11148 op1 = &XEXP (src, 1);
11149 }
11150 else
11151 return false;
11152
11153 if (!REG_P (*op0) || !REG_P (*op1))
11154 return false;
11155
11156 if (GET_MODE_CLASS (GET_MODE (*op0)) != MODE_INT)
11157 return false;
11158
11159 /* Swap the COMPARE arguments and its mask if there is a
11160 conflicting access in the previous insn. */
11161 prev_insn = prev_active_insn (insn);
11162 if (prev_insn != NULL_RTX && INSN_P (prev_insn)
11163 && reg_referenced_p (*op1, PATTERN (prev_insn)))
11164 s390_swap_cmp (cond, op0, op1, insn);
11165
11166 /* Check if there is a conflict with the next insn. If there
11167 was no conflict with the previous insn, then swap the
11168 COMPARE arguments and its mask. If we already swapped
11169 the operands, or if swapping them would cause a conflict
11170 with the previous insn, issue a NOP after the COMPARE in
11171 order to separate the two instuctions. */
11172 next_insn = next_active_insn (insn);
11173 if (next_insn != NULL_RTX && INSN_P (next_insn)
11174 && s390_non_addr_reg_read_p (*op1, next_insn))
11175 {
11176 if (prev_insn != NULL_RTX && INSN_P (prev_insn)
11177 && s390_non_addr_reg_read_p (*op0, prev_insn))
11178 {
11179 if (REGNO (*op1) == 0)
11180 emit_insn_after (gen_nop1 (), insn);
11181 else
11182 emit_insn_after (gen_nop (), insn);
11183 insn_added_p = true;
11184 }
11185 else
11186 s390_swap_cmp (cond, op0, op1, insn);
11187 }
11188 return insn_added_p;
11189 }
11190
11191 /* Perform machine-dependent processing. */
11192
11193 static void
11194 s390_reorg (void)
11195 {
11196 bool pool_overflow = false;
11197
11198 /* Make sure all splits have been performed; splits after
11199 machine_dependent_reorg might confuse insn length counts. */
11200 split_all_insns_noflow ();
11201
11202 /* Install the main literal pool and the associated base
11203 register load insns.
11204
11205 In addition, there are two problematic situations we need
11206 to correct:
11207
11208 - the literal pool might be > 4096 bytes in size, so that
11209 some of its elements cannot be directly accessed
11210
11211 - a branch target might be > 64K away from the branch, so that
11212 it is not possible to use a PC-relative instruction.
11213
11214 To fix those, we split the single literal pool into multiple
11215 pool chunks, reloading the pool base register at various
11216 points throughout the function to ensure it always points to
11217 the pool chunk the following code expects, and / or replace
11218 PC-relative branches by absolute branches.
11219
11220 However, the two problems are interdependent: splitting the
11221 literal pool can move a branch further away from its target,
11222 causing the 64K limit to overflow, and on the other hand,
11223 replacing a PC-relative branch by an absolute branch means
11224 we need to put the branch target address into the literal
11225 pool, possibly causing it to overflow.
11226
11227 So, we loop trying to fix up both problems until we manage
11228 to satisfy both conditions at the same time. Note that the
11229 loop is guaranteed to terminate as every pass of the loop
11230 strictly decreases the total number of PC-relative branches
11231 in the function. (This is not completely true as there
11232 might be branch-over-pool insns introduced by chunkify_start.
11233 Those never need to be split however.) */
11234
11235 for (;;)
11236 {
11237 struct constant_pool *pool = NULL;
11238
11239 /* Collect the literal pool. */
11240 if (!pool_overflow)
11241 {
11242 pool = s390_mainpool_start ();
11243 if (!pool)
11244 pool_overflow = true;
11245 }
11246
11247 /* If literal pool overflowed, start to chunkify it. */
11248 if (pool_overflow)
11249 pool = s390_chunkify_start ();
11250
11251 /* Split out-of-range branches. If this has created new
11252 literal pool entries, cancel current chunk list and
11253 recompute it. zSeries machines have large branch
11254 instructions, so we never need to split a branch. */
11255 if (!TARGET_CPU_ZARCH && s390_split_branches ())
11256 {
11257 if (pool_overflow)
11258 s390_chunkify_cancel (pool);
11259 else
11260 s390_mainpool_cancel (pool);
11261
11262 continue;
11263 }
11264
11265 /* If we made it up to here, both conditions are satisfied.
11266 Finish up literal pool related changes. */
11267 if (pool_overflow)
11268 s390_chunkify_finish (pool);
11269 else
11270 s390_mainpool_finish (pool);
11271
11272 /* We're done splitting branches. */
11273 cfun->machine->split_branches_pending_p = false;
11274 break;
11275 }
11276
11277 /* Generate out-of-pool execute target insns. */
11278 if (TARGET_CPU_ZARCH)
11279 {
11280 rtx insn, label, target;
11281
11282 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
11283 {
11284 label = s390_execute_label (insn);
11285 if (!label)
11286 continue;
11287
11288 gcc_assert (label != const0_rtx);
11289
11290 target = emit_label (XEXP (label, 0));
11291 INSN_ADDRESSES_NEW (target, -1);
11292
11293 target = emit_insn (s390_execute_target (insn));
11294 INSN_ADDRESSES_NEW (target, -1);
11295 }
11296 }
11297
11298 /* Try to optimize prologue and epilogue further. */
11299 s390_optimize_prologue ();
11300
11301 /* Walk over the insns and do some >=z10 specific changes. */
11302 if (s390_tune == PROCESSOR_2097_Z10
11303 || s390_tune == PROCESSOR_2817_Z196
11304 || s390_tune == PROCESSOR_2827_ZEC12)
11305 {
11306 rtx insn;
11307 bool insn_added_p = false;
11308
11309 /* The insn lengths and addresses have to be up to date for the
11310 following manipulations. */
11311 shorten_branches (get_insns ());
11312
11313 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
11314 {
11315 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
11316 continue;
11317
11318 if (JUMP_P (insn))
11319 insn_added_p |= s390_fix_long_loop_prediction (insn);
11320
11321 if ((GET_CODE (PATTERN (insn)) == PARALLEL
11322 || GET_CODE (PATTERN (insn)) == SET)
11323 && s390_tune == PROCESSOR_2097_Z10)
11324 insn_added_p |= s390_z10_optimize_cmp (insn);
11325 }
11326
11327 /* Adjust branches if we added new instructions. */
11328 if (insn_added_p)
11329 shorten_branches (get_insns ());
11330 }
11331 }
11332
11333 /* Return true if INSN is a fp load insn writing register REGNO. */
11334 static inline bool
11335 s390_fpload_toreg (rtx insn, unsigned int regno)
11336 {
11337 rtx set;
11338 enum attr_type flag = s390_safe_attr_type (insn);
11339
11340 if (flag != TYPE_FLOADSF && flag != TYPE_FLOADDF)
11341 return false;
11342
11343 set = single_set (insn);
11344
11345 if (set == NULL_RTX)
11346 return false;
11347
11348 if (!REG_P (SET_DEST (set)) || !MEM_P (SET_SRC (set)))
11349 return false;
11350
11351 if (REGNO (SET_DEST (set)) != regno)
11352 return false;
11353
11354 return true;
11355 }
11356
11357 /* This value describes the distance to be avoided between an
11358 aritmetic fp instruction and an fp load writing the same register.
11359 Z10_EARLYLOAD_DISTANCE - 1 as well as Z10_EARLYLOAD_DISTANCE + 1 is
11360 fine but the exact value has to be avoided. Otherwise the FP
11361 pipeline will throw an exception causing a major penalty. */
11362 #define Z10_EARLYLOAD_DISTANCE 7
11363
11364 /* Rearrange the ready list in order to avoid the situation described
11365 for Z10_EARLYLOAD_DISTANCE. A problematic load instruction is
11366 moved to the very end of the ready list. */
11367 static void
11368 s390_z10_prevent_earlyload_conflicts (rtx *ready, int *nready_p)
11369 {
11370 unsigned int regno;
11371 int nready = *nready_p;
11372 rtx tmp;
11373 int i;
11374 rtx insn;
11375 rtx set;
11376 enum attr_type flag;
11377 int distance;
11378
11379 /* Skip DISTANCE - 1 active insns. */
11380 for (insn = last_scheduled_insn, distance = Z10_EARLYLOAD_DISTANCE - 1;
11381 distance > 0 && insn != NULL_RTX;
11382 distance--, insn = prev_active_insn (insn))
11383 if (CALL_P (insn) || JUMP_P (insn))
11384 return;
11385
11386 if (insn == NULL_RTX)
11387 return;
11388
11389 set = single_set (insn);
11390
11391 if (set == NULL_RTX || !REG_P (SET_DEST (set))
11392 || GET_MODE_CLASS (GET_MODE (SET_DEST (set))) != MODE_FLOAT)
11393 return;
11394
11395 flag = s390_safe_attr_type (insn);
11396
11397 if (flag == TYPE_FLOADSF || flag == TYPE_FLOADDF)
11398 return;
11399
11400 regno = REGNO (SET_DEST (set));
11401 i = nready - 1;
11402
11403 while (!s390_fpload_toreg (ready[i], regno) && i > 0)
11404 i--;
11405
11406 if (!i)
11407 return;
11408
11409 tmp = ready[i];
11410 memmove (&ready[1], &ready[0], sizeof (rtx) * i);
11411 ready[0] = tmp;
11412 }
11413
11414
11415 /* The s390_sched_state variable tracks the state of the current or
11416 the last instruction group.
11417
11418 0,1,2 number of instructions scheduled in the current group
11419 3 the last group is complete - normal insns
11420 4 the last group was a cracked/expanded insn */
11421
11422 static int s390_sched_state;
11423
11424 #define S390_OOO_SCHED_STATE_NORMAL 3
11425 #define S390_OOO_SCHED_STATE_CRACKED 4
11426
11427 #define S390_OOO_SCHED_ATTR_MASK_CRACKED 0x1
11428 #define S390_OOO_SCHED_ATTR_MASK_EXPANDED 0x2
11429 #define S390_OOO_SCHED_ATTR_MASK_ENDGROUP 0x4
11430 #define S390_OOO_SCHED_ATTR_MASK_GROUPALONE 0x8
11431
11432 static unsigned int
11433 s390_get_sched_attrmask (rtx insn)
11434 {
11435 unsigned int mask = 0;
11436
11437 if (get_attr_ooo_cracked (insn))
11438 mask |= S390_OOO_SCHED_ATTR_MASK_CRACKED;
11439 if (get_attr_ooo_expanded (insn))
11440 mask |= S390_OOO_SCHED_ATTR_MASK_EXPANDED;
11441 if (get_attr_ooo_endgroup (insn))
11442 mask |= S390_OOO_SCHED_ATTR_MASK_ENDGROUP;
11443 if (get_attr_ooo_groupalone (insn))
11444 mask |= S390_OOO_SCHED_ATTR_MASK_GROUPALONE;
11445 return mask;
11446 }
11447
11448 /* Return the scheduling score for INSN. The higher the score the
11449 better. The score is calculated from the OOO scheduling attributes
11450 of INSN and the scheduling state s390_sched_state. */
11451 static int
11452 s390_sched_score (rtx insn)
11453 {
11454 unsigned int mask = s390_get_sched_attrmask (insn);
11455 int score = 0;
11456
11457 switch (s390_sched_state)
11458 {
11459 case 0:
11460 /* Try to put insns into the first slot which would otherwise
11461 break a group. */
11462 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) != 0
11463 || (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) != 0)
11464 score += 5;
11465 if ((mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) != 0)
11466 score += 10;
11467 case 1:
11468 /* Prefer not cracked insns while trying to put together a
11469 group. */
11470 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) == 0
11471 && (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) == 0
11472 && (mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) == 0)
11473 score += 10;
11474 if ((mask & S390_OOO_SCHED_ATTR_MASK_ENDGROUP) == 0)
11475 score += 5;
11476 break;
11477 case 2:
11478 /* Prefer not cracked insns while trying to put together a
11479 group. */
11480 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) == 0
11481 && (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) == 0
11482 && (mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) == 0)
11483 score += 10;
11484 /* Prefer endgroup insns in the last slot. */
11485 if ((mask & S390_OOO_SCHED_ATTR_MASK_ENDGROUP) != 0)
11486 score += 10;
11487 break;
11488 case S390_OOO_SCHED_STATE_NORMAL:
11489 /* Prefer not cracked insns if the last was not cracked. */
11490 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) == 0
11491 && (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) == 0)
11492 score += 5;
11493 if ((mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) != 0)
11494 score += 10;
11495 break;
11496 case S390_OOO_SCHED_STATE_CRACKED:
11497 /* Try to keep cracked insns together to prevent them from
11498 interrupting groups. */
11499 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) != 0
11500 || (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) != 0)
11501 score += 5;
11502 break;
11503 }
11504 return score;
11505 }
11506
11507 /* This function is called via hook TARGET_SCHED_REORDER before
11508 issuing one insn from list READY which contains *NREADYP entries.
11509 For target z10 it reorders load instructions to avoid early load
11510 conflicts in the floating point pipeline */
11511 static int
11512 s390_sched_reorder (FILE *file, int verbose,
11513 rtx *ready, int *nreadyp, int clock ATTRIBUTE_UNUSED)
11514 {
11515 if (s390_tune == PROCESSOR_2097_Z10)
11516 if (reload_completed && *nreadyp > 1)
11517 s390_z10_prevent_earlyload_conflicts (ready, nreadyp);
11518
11519 if (s390_tune == PROCESSOR_2827_ZEC12
11520 && reload_completed
11521 && *nreadyp > 1)
11522 {
11523 int i;
11524 int last_index = *nreadyp - 1;
11525 int max_index = -1;
11526 int max_score = -1;
11527 rtx tmp;
11528
11529 /* Just move the insn with the highest score to the top (the
11530 end) of the list. A full sort is not needed since a conflict
11531 in the hazard recognition cannot happen. So the top insn in
11532 the ready list will always be taken. */
11533 for (i = last_index; i >= 0; i--)
11534 {
11535 int score;
11536
11537 if (recog_memoized (ready[i]) < 0)
11538 continue;
11539
11540 score = s390_sched_score (ready[i]);
11541 if (score > max_score)
11542 {
11543 max_score = score;
11544 max_index = i;
11545 }
11546 }
11547
11548 if (max_index != -1)
11549 {
11550 if (max_index != last_index)
11551 {
11552 tmp = ready[max_index];
11553 ready[max_index] = ready[last_index];
11554 ready[last_index] = tmp;
11555
11556 if (verbose > 5)
11557 fprintf (file,
11558 "move insn %d to the top of list\n",
11559 INSN_UID (ready[last_index]));
11560 }
11561 else if (verbose > 5)
11562 fprintf (file,
11563 "best insn %d already on top\n",
11564 INSN_UID (ready[last_index]));
11565 }
11566
11567 if (verbose > 5)
11568 {
11569 fprintf (file, "ready list ooo attributes - sched state: %d\n",
11570 s390_sched_state);
11571
11572 for (i = last_index; i >= 0; i--)
11573 {
11574 if (recog_memoized (ready[i]) < 0)
11575 continue;
11576 fprintf (file, "insn %d score: %d: ", INSN_UID (ready[i]),
11577 s390_sched_score (ready[i]));
11578 #define PRINT_OOO_ATTR(ATTR) fprintf (file, "%s ", get_attr_##ATTR (ready[i]) ? #ATTR : "!" #ATTR);
11579 PRINT_OOO_ATTR (ooo_cracked);
11580 PRINT_OOO_ATTR (ooo_expanded);
11581 PRINT_OOO_ATTR (ooo_endgroup);
11582 PRINT_OOO_ATTR (ooo_groupalone);
11583 #undef PRINT_OOO_ATTR
11584 fprintf (file, "\n");
11585 }
11586 }
11587 }
11588
11589 return s390_issue_rate ();
11590 }
11591
11592
11593 /* This function is called via hook TARGET_SCHED_VARIABLE_ISSUE after
11594 the scheduler has issued INSN. It stores the last issued insn into
11595 last_scheduled_insn in order to make it available for
11596 s390_sched_reorder. */
11597 static int
11598 s390_sched_variable_issue (FILE *file, int verbose, rtx insn, int more)
11599 {
11600 last_scheduled_insn = insn;
11601
11602 if (s390_tune == PROCESSOR_2827_ZEC12
11603 && reload_completed
11604 && recog_memoized (insn) >= 0)
11605 {
11606 unsigned int mask = s390_get_sched_attrmask (insn);
11607
11608 if ((mask & S390_OOO_SCHED_ATTR_MASK_CRACKED) != 0
11609 || (mask & S390_OOO_SCHED_ATTR_MASK_EXPANDED) != 0)
11610 s390_sched_state = S390_OOO_SCHED_STATE_CRACKED;
11611 else if ((mask & S390_OOO_SCHED_ATTR_MASK_ENDGROUP) != 0
11612 || (mask & S390_OOO_SCHED_ATTR_MASK_GROUPALONE) != 0)
11613 s390_sched_state = S390_OOO_SCHED_STATE_NORMAL;
11614 else
11615 {
11616 /* Only normal insns are left (mask == 0). */
11617 switch (s390_sched_state)
11618 {
11619 case 0:
11620 case 1:
11621 case 2:
11622 case S390_OOO_SCHED_STATE_NORMAL:
11623 if (s390_sched_state == S390_OOO_SCHED_STATE_NORMAL)
11624 s390_sched_state = 1;
11625 else
11626 s390_sched_state++;
11627
11628 break;
11629 case S390_OOO_SCHED_STATE_CRACKED:
11630 s390_sched_state = S390_OOO_SCHED_STATE_NORMAL;
11631 break;
11632 }
11633 }
11634 if (verbose > 5)
11635 {
11636 fprintf (file, "insn %d: ", INSN_UID (insn));
11637 #define PRINT_OOO_ATTR(ATTR) \
11638 fprintf (file, "%s ", get_attr_##ATTR (insn) ? #ATTR : "");
11639 PRINT_OOO_ATTR (ooo_cracked);
11640 PRINT_OOO_ATTR (ooo_expanded);
11641 PRINT_OOO_ATTR (ooo_endgroup);
11642 PRINT_OOO_ATTR (ooo_groupalone);
11643 #undef PRINT_OOO_ATTR
11644 fprintf (file, "\n");
11645 fprintf (file, "sched state: %d\n", s390_sched_state);
11646 }
11647 }
11648
11649 if (GET_CODE (PATTERN (insn)) != USE
11650 && GET_CODE (PATTERN (insn)) != CLOBBER)
11651 return more - 1;
11652 else
11653 return more;
11654 }
11655
11656 static void
11657 s390_sched_init (FILE *file ATTRIBUTE_UNUSED,
11658 int verbose ATTRIBUTE_UNUSED,
11659 int max_ready ATTRIBUTE_UNUSED)
11660 {
11661 last_scheduled_insn = NULL_RTX;
11662 s390_sched_state = 0;
11663 }
11664
11665 /* This function checks the whole of insn X for memory references. The
11666 function always returns zero because the framework it is called
11667 from would stop recursively analyzing the insn upon a return value
11668 other than zero. The real result of this function is updating
11669 counter variable MEM_COUNT. */
11670 static int
11671 check_dpu (rtx *x, unsigned *mem_count)
11672 {
11673 if (*x != NULL_RTX && MEM_P (*x))
11674 (*mem_count)++;
11675 return 0;
11676 }
11677
11678 /* This target hook implementation for TARGET_LOOP_UNROLL_ADJUST calculates
11679 a new number struct loop *loop should be unrolled if tuned for cpus with
11680 a built-in stride prefetcher.
11681 The loop is analyzed for memory accesses by calling check_dpu for
11682 each rtx of the loop. Depending on the loop_depth and the amount of
11683 memory accesses a new number <=nunroll is returned to improve the
11684 behaviour of the hardware prefetch unit. */
11685 static unsigned
11686 s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
11687 {
11688 basic_block *bbs;
11689 rtx insn;
11690 unsigned i;
11691 unsigned mem_count = 0;
11692
11693 if (s390_tune != PROCESSOR_2097_Z10
11694 && s390_tune != PROCESSOR_2817_Z196
11695 && s390_tune != PROCESSOR_2827_ZEC12)
11696 return nunroll;
11697
11698 /* Count the number of memory references within the loop body. */
11699 bbs = get_loop_body (loop);
11700 for (i = 0; i < loop->num_nodes; i++)
11701 {
11702 for (insn = BB_HEAD (bbs[i]); insn != BB_END (bbs[i]); insn = NEXT_INSN (insn))
11703 if (INSN_P (insn) && INSN_CODE (insn) != -1)
11704 for_each_rtx (&insn, (rtx_function) check_dpu, &mem_count);
11705 }
11706 free (bbs);
11707
11708 /* Prevent division by zero, and we do not need to adjust nunroll in this case. */
11709 if (mem_count == 0)
11710 return nunroll;
11711
11712 switch (loop_depth(loop))
11713 {
11714 case 1:
11715 return MIN (nunroll, 28 / mem_count);
11716 case 2:
11717 return MIN (nunroll, 22 / mem_count);
11718 default:
11719 return MIN (nunroll, 16 / mem_count);
11720 }
11721 }
11722
11723 /* Initialize GCC target structure. */
11724
11725 #undef TARGET_ASM_ALIGNED_HI_OP
11726 #define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
11727 #undef TARGET_ASM_ALIGNED_DI_OP
11728 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
11729 #undef TARGET_ASM_INTEGER
11730 #define TARGET_ASM_INTEGER s390_assemble_integer
11731
11732 #undef TARGET_ASM_OPEN_PAREN
11733 #define TARGET_ASM_OPEN_PAREN ""
11734
11735 #undef TARGET_ASM_CLOSE_PAREN
11736 #define TARGET_ASM_CLOSE_PAREN ""
11737
11738 #undef TARGET_OPTION_OVERRIDE
11739 #define TARGET_OPTION_OVERRIDE s390_option_override
11740
11741 #undef TARGET_ENCODE_SECTION_INFO
11742 #define TARGET_ENCODE_SECTION_INFO s390_encode_section_info
11743
11744 #undef TARGET_SCALAR_MODE_SUPPORTED_P
11745 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
11746
11747 #ifdef HAVE_AS_TLS
11748 #undef TARGET_HAVE_TLS
11749 #define TARGET_HAVE_TLS true
11750 #endif
11751 #undef TARGET_CANNOT_FORCE_CONST_MEM
11752 #define TARGET_CANNOT_FORCE_CONST_MEM s390_cannot_force_const_mem
11753
11754 #undef TARGET_DELEGITIMIZE_ADDRESS
11755 #define TARGET_DELEGITIMIZE_ADDRESS s390_delegitimize_address
11756
11757 #undef TARGET_LEGITIMIZE_ADDRESS
11758 #define TARGET_LEGITIMIZE_ADDRESS s390_legitimize_address
11759
11760 #undef TARGET_RETURN_IN_MEMORY
11761 #define TARGET_RETURN_IN_MEMORY s390_return_in_memory
11762
11763 #undef TARGET_INIT_BUILTINS
11764 #define TARGET_INIT_BUILTINS s390_init_builtins
11765 #undef TARGET_EXPAND_BUILTIN
11766 #define TARGET_EXPAND_BUILTIN s390_expand_builtin
11767
11768 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
11769 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA s390_output_addr_const_extra
11770
11771 #undef TARGET_ASM_OUTPUT_MI_THUNK
11772 #define TARGET_ASM_OUTPUT_MI_THUNK s390_output_mi_thunk
11773 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
11774 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
11775
11776 #undef TARGET_SCHED_ADJUST_PRIORITY
11777 #define TARGET_SCHED_ADJUST_PRIORITY s390_adjust_priority
11778 #undef TARGET_SCHED_ISSUE_RATE
11779 #define TARGET_SCHED_ISSUE_RATE s390_issue_rate
11780 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
11781 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD s390_first_cycle_multipass_dfa_lookahead
11782
11783 #undef TARGET_SCHED_VARIABLE_ISSUE
11784 #define TARGET_SCHED_VARIABLE_ISSUE s390_sched_variable_issue
11785 #undef TARGET_SCHED_REORDER
11786 #define TARGET_SCHED_REORDER s390_sched_reorder
11787 #undef TARGET_SCHED_INIT
11788 #define TARGET_SCHED_INIT s390_sched_init
11789
11790 #undef TARGET_CANNOT_COPY_INSN_P
11791 #define TARGET_CANNOT_COPY_INSN_P s390_cannot_copy_insn_p
11792 #undef TARGET_RTX_COSTS
11793 #define TARGET_RTX_COSTS s390_rtx_costs
11794 #undef TARGET_ADDRESS_COST
11795 #define TARGET_ADDRESS_COST s390_address_cost
11796 #undef TARGET_REGISTER_MOVE_COST
11797 #define TARGET_REGISTER_MOVE_COST s390_register_move_cost
11798 #undef TARGET_MEMORY_MOVE_COST
11799 #define TARGET_MEMORY_MOVE_COST s390_memory_move_cost
11800
11801 #undef TARGET_MACHINE_DEPENDENT_REORG
11802 #define TARGET_MACHINE_DEPENDENT_REORG s390_reorg
11803
11804 #undef TARGET_VALID_POINTER_MODE
11805 #define TARGET_VALID_POINTER_MODE s390_valid_pointer_mode
11806
11807 #undef TARGET_BUILD_BUILTIN_VA_LIST
11808 #define TARGET_BUILD_BUILTIN_VA_LIST s390_build_builtin_va_list
11809 #undef TARGET_EXPAND_BUILTIN_VA_START
11810 #define TARGET_EXPAND_BUILTIN_VA_START s390_va_start
11811 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
11812 #define TARGET_GIMPLIFY_VA_ARG_EXPR s390_gimplify_va_arg
11813
11814 #undef TARGET_PROMOTE_FUNCTION_MODE
11815 #define TARGET_PROMOTE_FUNCTION_MODE s390_promote_function_mode
11816 #undef TARGET_PASS_BY_REFERENCE
11817 #define TARGET_PASS_BY_REFERENCE s390_pass_by_reference
11818
11819 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
11820 #define TARGET_FUNCTION_OK_FOR_SIBCALL s390_function_ok_for_sibcall
11821 #undef TARGET_FUNCTION_ARG
11822 #define TARGET_FUNCTION_ARG s390_function_arg
11823 #undef TARGET_FUNCTION_ARG_ADVANCE
11824 #define TARGET_FUNCTION_ARG_ADVANCE s390_function_arg_advance
11825 #undef TARGET_FUNCTION_VALUE
11826 #define TARGET_FUNCTION_VALUE s390_function_value
11827 #undef TARGET_LIBCALL_VALUE
11828 #define TARGET_LIBCALL_VALUE s390_libcall_value
11829
11830 #undef TARGET_FIXED_CONDITION_CODE_REGS
11831 #define TARGET_FIXED_CONDITION_CODE_REGS s390_fixed_condition_code_regs
11832
11833 #undef TARGET_CC_MODES_COMPATIBLE
11834 #define TARGET_CC_MODES_COMPATIBLE s390_cc_modes_compatible
11835
11836 #undef TARGET_INVALID_WITHIN_DOLOOP
11837 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_null
11838
11839 #ifdef HAVE_AS_TLS
11840 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
11841 #define TARGET_ASM_OUTPUT_DWARF_DTPREL s390_output_dwarf_dtprel
11842 #endif
11843
11844 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
11845 #undef TARGET_MANGLE_TYPE
11846 #define TARGET_MANGLE_TYPE s390_mangle_type
11847 #endif
11848
11849 #undef TARGET_SCALAR_MODE_SUPPORTED_P
11850 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
11851
11852 #undef TARGET_PREFERRED_RELOAD_CLASS
11853 #define TARGET_PREFERRED_RELOAD_CLASS s390_preferred_reload_class
11854
11855 #undef TARGET_SECONDARY_RELOAD
11856 #define TARGET_SECONDARY_RELOAD s390_secondary_reload
11857
11858 #undef TARGET_LIBGCC_CMP_RETURN_MODE
11859 #define TARGET_LIBGCC_CMP_RETURN_MODE s390_libgcc_cmp_return_mode
11860
11861 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
11862 #define TARGET_LIBGCC_SHIFT_COUNT_MODE s390_libgcc_shift_count_mode
11863
11864 #undef TARGET_LEGITIMATE_ADDRESS_P
11865 #define TARGET_LEGITIMATE_ADDRESS_P s390_legitimate_address_p
11866
11867 #undef TARGET_LEGITIMATE_CONSTANT_P
11868 #define TARGET_LEGITIMATE_CONSTANT_P s390_legitimate_constant_p
11869
11870 #undef TARGET_LRA_P
11871 #define TARGET_LRA_P s390_lra_p
11872
11873 #undef TARGET_CAN_ELIMINATE
11874 #define TARGET_CAN_ELIMINATE s390_can_eliminate
11875
11876 #undef TARGET_CONDITIONAL_REGISTER_USAGE
11877 #define TARGET_CONDITIONAL_REGISTER_USAGE s390_conditional_register_usage
11878
11879 #undef TARGET_LOOP_UNROLL_ADJUST
11880 #define TARGET_LOOP_UNROLL_ADJUST s390_loop_unroll_adjust
11881
11882 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
11883 #define TARGET_ASM_TRAMPOLINE_TEMPLATE s390_asm_trampoline_template
11884 #undef TARGET_TRAMPOLINE_INIT
11885 #define TARGET_TRAMPOLINE_INIT s390_trampoline_init
11886
11887 #undef TARGET_UNWIND_WORD_MODE
11888 #define TARGET_UNWIND_WORD_MODE s390_unwind_word_mode
11889
11890 #undef TARGET_CANONICALIZE_COMPARISON
11891 #define TARGET_CANONICALIZE_COMPARISON s390_canonicalize_comparison
11892
11893 #undef TARGET_HARD_REGNO_SCRATCH_OK
11894 #define TARGET_HARD_REGNO_SCRATCH_OK s390_hard_regno_scratch_ok
11895
11896 struct gcc_target targetm = TARGET_INITIALIZER;
11897
11898 #include "gt-s390.h"