]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/s390/s390.c
Update copyright years.
[thirdparty/gcc.git] / gcc / config / s390 / s390.c
1 /* Subroutines used for code generation on IBM S/390 and zSeries
2 Copyright (C) 1999-2022 Free Software Foundation, Inc.
3 Contributed by Hartmut Penner (hpenner@de.ibm.com) and
4 Ulrich Weigand (uweigand@de.ibm.com) and
5 Andreas Krebbel (Andreas.Krebbel@de.ibm.com).
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #define IN_TARGET_CODE 1
24
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "backend.h"
29 #include "target.h"
30 #include "target-globals.h"
31 #include "rtl.h"
32 #include "tree.h"
33 #include "gimple.h"
34 #include "cfghooks.h"
35 #include "cfgloop.h"
36 #include "df.h"
37 #include "memmodel.h"
38 #include "tm_p.h"
39 #include "stringpool.h"
40 #include "attribs.h"
41 #include "expmed.h"
42 #include "optabs.h"
43 #include "regs.h"
44 #include "emit-rtl.h"
45 #include "recog.h"
46 #include "cgraph.h"
47 #include "diagnostic-core.h"
48 #include "diagnostic.h"
49 #include "alias.h"
50 #include "fold-const.h"
51 #include "print-tree.h"
52 #include "stor-layout.h"
53 #include "varasm.h"
54 #include "calls.h"
55 #include "conditions.h"
56 #include "output.h"
57 #include "insn-attr.h"
58 #include "flags.h"
59 #include "except.h"
60 #include "dojump.h"
61 #include "explow.h"
62 #include "stmt.h"
63 #include "expr.h"
64 #include "reload.h"
65 #include "cfgrtl.h"
66 #include "cfganal.h"
67 #include "lcm.h"
68 #include "cfgbuild.h"
69 #include "cfgcleanup.h"
70 #include "debug.h"
71 #include "langhooks.h"
72 #include "internal-fn.h"
73 #include "gimple-fold.h"
74 #include "tree-eh.h"
75 #include "gimplify.h"
76 #include "opts.h"
77 #include "tree-pass.h"
78 #include "context.h"
79 #include "builtins.h"
80 #include "rtl-iter.h"
81 #include "intl.h"
82 #include "tm-constrs.h"
83 #include "tree-vrp.h"
84 #include "symbol-summary.h"
85 #include "ipa-prop.h"
86 #include "ipa-fnsummary.h"
87 #include "sched-int.h"
88
89 /* This file should be included last. */
90 #include "target-def.h"
91
92 static bool s390_hard_regno_mode_ok (unsigned int, machine_mode);
93
94 /* Remember the last target of s390_set_current_function. */
95 static GTY(()) tree s390_previous_fndecl;
96
97 /* Define the specific costs for a given cpu. */
98
99 struct processor_costs
100 {
101 /* multiplication */
102 const int m; /* cost of an M instruction. */
103 const int mghi; /* cost of an MGHI instruction. */
104 const int mh; /* cost of an MH instruction. */
105 const int mhi; /* cost of an MHI instruction. */
106 const int ml; /* cost of an ML instruction. */
107 const int mr; /* cost of an MR instruction. */
108 const int ms; /* cost of an MS instruction. */
109 const int msg; /* cost of an MSG instruction. */
110 const int msgf; /* cost of an MSGF instruction. */
111 const int msgfr; /* cost of an MSGFR instruction. */
112 const int msgr; /* cost of an MSGR instruction. */
113 const int msr; /* cost of an MSR instruction. */
114 const int mult_df; /* cost of multiplication in DFmode. */
115 const int mxbr;
116 /* square root */
117 const int sqxbr; /* cost of square root in TFmode. */
118 const int sqdbr; /* cost of square root in DFmode. */
119 const int sqebr; /* cost of square root in SFmode. */
120 /* multiply and add */
121 const int madbr; /* cost of multiply and add in DFmode. */
122 const int maebr; /* cost of multiply and add in SFmode. */
123 /* division */
124 const int dxbr;
125 const int ddbr;
126 const int debr;
127 const int dlgr;
128 const int dlr;
129 const int dr;
130 const int dsgfr;
131 const int dsgr;
132 };
133
134 #define s390_cost ((const struct processor_costs *)(s390_cost_pointer))
135
136 static const
137 struct processor_costs z900_cost =
138 {
139 COSTS_N_INSNS (5), /* M */
140 COSTS_N_INSNS (10), /* MGHI */
141 COSTS_N_INSNS (5), /* MH */
142 COSTS_N_INSNS (4), /* MHI */
143 COSTS_N_INSNS (5), /* ML */
144 COSTS_N_INSNS (5), /* MR */
145 COSTS_N_INSNS (4), /* MS */
146 COSTS_N_INSNS (15), /* MSG */
147 COSTS_N_INSNS (7), /* MSGF */
148 COSTS_N_INSNS (7), /* MSGFR */
149 COSTS_N_INSNS (10), /* MSGR */
150 COSTS_N_INSNS (4), /* MSR */
151 COSTS_N_INSNS (7), /* multiplication in DFmode */
152 COSTS_N_INSNS (13), /* MXBR */
153 COSTS_N_INSNS (136), /* SQXBR */
154 COSTS_N_INSNS (44), /* SQDBR */
155 COSTS_N_INSNS (35), /* SQEBR */
156 COSTS_N_INSNS (18), /* MADBR */
157 COSTS_N_INSNS (13), /* MAEBR */
158 COSTS_N_INSNS (134), /* DXBR */
159 COSTS_N_INSNS (30), /* DDBR */
160 COSTS_N_INSNS (27), /* DEBR */
161 COSTS_N_INSNS (220), /* DLGR */
162 COSTS_N_INSNS (34), /* DLR */
163 COSTS_N_INSNS (34), /* DR */
164 COSTS_N_INSNS (32), /* DSGFR */
165 COSTS_N_INSNS (32), /* DSGR */
166 };
167
168 static const
169 struct processor_costs z990_cost =
170 {
171 COSTS_N_INSNS (4), /* M */
172 COSTS_N_INSNS (2), /* MGHI */
173 COSTS_N_INSNS (2), /* MH */
174 COSTS_N_INSNS (2), /* MHI */
175 COSTS_N_INSNS (4), /* ML */
176 COSTS_N_INSNS (4), /* MR */
177 COSTS_N_INSNS (5), /* MS */
178 COSTS_N_INSNS (6), /* MSG */
179 COSTS_N_INSNS (4), /* MSGF */
180 COSTS_N_INSNS (4), /* MSGFR */
181 COSTS_N_INSNS (4), /* MSGR */
182 COSTS_N_INSNS (4), /* MSR */
183 COSTS_N_INSNS (1), /* multiplication in DFmode */
184 COSTS_N_INSNS (28), /* MXBR */
185 COSTS_N_INSNS (130), /* SQXBR */
186 COSTS_N_INSNS (66), /* SQDBR */
187 COSTS_N_INSNS (38), /* SQEBR */
188 COSTS_N_INSNS (1), /* MADBR */
189 COSTS_N_INSNS (1), /* MAEBR */
190 COSTS_N_INSNS (60), /* DXBR */
191 COSTS_N_INSNS (40), /* DDBR */
192 COSTS_N_INSNS (26), /* DEBR */
193 COSTS_N_INSNS (176), /* DLGR */
194 COSTS_N_INSNS (31), /* DLR */
195 COSTS_N_INSNS (31), /* DR */
196 COSTS_N_INSNS (31), /* DSGFR */
197 COSTS_N_INSNS (31), /* DSGR */
198 };
199
200 static const
201 struct processor_costs z9_109_cost =
202 {
203 COSTS_N_INSNS (4), /* M */
204 COSTS_N_INSNS (2), /* MGHI */
205 COSTS_N_INSNS (2), /* MH */
206 COSTS_N_INSNS (2), /* MHI */
207 COSTS_N_INSNS (4), /* ML */
208 COSTS_N_INSNS (4), /* MR */
209 COSTS_N_INSNS (5), /* MS */
210 COSTS_N_INSNS (6), /* MSG */
211 COSTS_N_INSNS (4), /* MSGF */
212 COSTS_N_INSNS (4), /* MSGFR */
213 COSTS_N_INSNS (4), /* MSGR */
214 COSTS_N_INSNS (4), /* MSR */
215 COSTS_N_INSNS (1), /* multiplication in DFmode */
216 COSTS_N_INSNS (28), /* MXBR */
217 COSTS_N_INSNS (130), /* SQXBR */
218 COSTS_N_INSNS (66), /* SQDBR */
219 COSTS_N_INSNS (38), /* SQEBR */
220 COSTS_N_INSNS (1), /* MADBR */
221 COSTS_N_INSNS (1), /* MAEBR */
222 COSTS_N_INSNS (60), /* DXBR */
223 COSTS_N_INSNS (40), /* DDBR */
224 COSTS_N_INSNS (26), /* DEBR */
225 COSTS_N_INSNS (30), /* DLGR */
226 COSTS_N_INSNS (23), /* DLR */
227 COSTS_N_INSNS (23), /* DR */
228 COSTS_N_INSNS (24), /* DSGFR */
229 COSTS_N_INSNS (24), /* DSGR */
230 };
231
232 static const
233 struct processor_costs z10_cost =
234 {
235 COSTS_N_INSNS (10), /* M */
236 COSTS_N_INSNS (10), /* MGHI */
237 COSTS_N_INSNS (10), /* MH */
238 COSTS_N_INSNS (10), /* MHI */
239 COSTS_N_INSNS (10), /* ML */
240 COSTS_N_INSNS (10), /* MR */
241 COSTS_N_INSNS (10), /* MS */
242 COSTS_N_INSNS (10), /* MSG */
243 COSTS_N_INSNS (10), /* MSGF */
244 COSTS_N_INSNS (10), /* MSGFR */
245 COSTS_N_INSNS (10), /* MSGR */
246 COSTS_N_INSNS (10), /* MSR */
247 COSTS_N_INSNS (1) , /* multiplication in DFmode */
248 COSTS_N_INSNS (50), /* MXBR */
249 COSTS_N_INSNS (120), /* SQXBR */
250 COSTS_N_INSNS (52), /* SQDBR */
251 COSTS_N_INSNS (38), /* SQEBR */
252 COSTS_N_INSNS (1), /* MADBR */
253 COSTS_N_INSNS (1), /* MAEBR */
254 COSTS_N_INSNS (111), /* DXBR */
255 COSTS_N_INSNS (39), /* DDBR */
256 COSTS_N_INSNS (32), /* DEBR */
257 COSTS_N_INSNS (160), /* DLGR */
258 COSTS_N_INSNS (71), /* DLR */
259 COSTS_N_INSNS (71), /* DR */
260 COSTS_N_INSNS (71), /* DSGFR */
261 COSTS_N_INSNS (71), /* DSGR */
262 };
263
264 static const
265 struct processor_costs z196_cost =
266 {
267 COSTS_N_INSNS (7), /* M */
268 COSTS_N_INSNS (5), /* MGHI */
269 COSTS_N_INSNS (5), /* MH */
270 COSTS_N_INSNS (5), /* MHI */
271 COSTS_N_INSNS (7), /* ML */
272 COSTS_N_INSNS (7), /* MR */
273 COSTS_N_INSNS (6), /* MS */
274 COSTS_N_INSNS (8), /* MSG */
275 COSTS_N_INSNS (6), /* MSGF */
276 COSTS_N_INSNS (6), /* MSGFR */
277 COSTS_N_INSNS (8), /* MSGR */
278 COSTS_N_INSNS (6), /* MSR */
279 COSTS_N_INSNS (1) , /* multiplication in DFmode */
280 COSTS_N_INSNS (40), /* MXBR B+40 */
281 COSTS_N_INSNS (100), /* SQXBR B+100 */
282 COSTS_N_INSNS (42), /* SQDBR B+42 */
283 COSTS_N_INSNS (28), /* SQEBR B+28 */
284 COSTS_N_INSNS (1), /* MADBR B */
285 COSTS_N_INSNS (1), /* MAEBR B */
286 COSTS_N_INSNS (101), /* DXBR B+101 */
287 COSTS_N_INSNS (29), /* DDBR */
288 COSTS_N_INSNS (22), /* DEBR */
289 COSTS_N_INSNS (160), /* DLGR cracked */
290 COSTS_N_INSNS (160), /* DLR cracked */
291 COSTS_N_INSNS (160), /* DR expanded */
292 COSTS_N_INSNS (160), /* DSGFR cracked */
293 COSTS_N_INSNS (160), /* DSGR cracked */
294 };
295
296 static const
297 struct processor_costs zEC12_cost =
298 {
299 COSTS_N_INSNS (7), /* M */
300 COSTS_N_INSNS (5), /* MGHI */
301 COSTS_N_INSNS (5), /* MH */
302 COSTS_N_INSNS (5), /* MHI */
303 COSTS_N_INSNS (7), /* ML */
304 COSTS_N_INSNS (7), /* MR */
305 COSTS_N_INSNS (6), /* MS */
306 COSTS_N_INSNS (8), /* MSG */
307 COSTS_N_INSNS (6), /* MSGF */
308 COSTS_N_INSNS (6), /* MSGFR */
309 COSTS_N_INSNS (8), /* MSGR */
310 COSTS_N_INSNS (6), /* MSR */
311 COSTS_N_INSNS (1) , /* multiplication in DFmode */
312 COSTS_N_INSNS (40), /* MXBR B+40 */
313 COSTS_N_INSNS (100), /* SQXBR B+100 */
314 COSTS_N_INSNS (42), /* SQDBR B+42 */
315 COSTS_N_INSNS (28), /* SQEBR B+28 */
316 COSTS_N_INSNS (1), /* MADBR B */
317 COSTS_N_INSNS (1), /* MAEBR B */
318 COSTS_N_INSNS (131), /* DXBR B+131 */
319 COSTS_N_INSNS (29), /* DDBR */
320 COSTS_N_INSNS (22), /* DEBR */
321 COSTS_N_INSNS (160), /* DLGR cracked */
322 COSTS_N_INSNS (160), /* DLR cracked */
323 COSTS_N_INSNS (160), /* DR expanded */
324 COSTS_N_INSNS (160), /* DSGFR cracked */
325 COSTS_N_INSNS (160), /* DSGR cracked */
326 };
327
328 const struct s390_processor processor_table[] =
329 {
330 { "z900", "z900", PROCESSOR_2064_Z900, &z900_cost, 5 },
331 { "z990", "z990", PROCESSOR_2084_Z990, &z990_cost, 6 },
332 { "z9-109", "z9-109", PROCESSOR_2094_Z9_109, &z9_109_cost, 7 },
333 { "z9-ec", "z9-ec", PROCESSOR_2094_Z9_EC, &z9_109_cost, 7 },
334 { "z10", "z10", PROCESSOR_2097_Z10, &z10_cost, 8 },
335 { "z196", "z196", PROCESSOR_2817_Z196, &z196_cost, 9 },
336 { "zEC12", "zEC12", PROCESSOR_2827_ZEC12, &zEC12_cost, 10 },
337 { "z13", "z13", PROCESSOR_2964_Z13, &zEC12_cost, 11 },
338 { "z14", "arch12", PROCESSOR_3906_Z14, &zEC12_cost, 12 },
339 { "z15", "arch13", PROCESSOR_8561_Z15, &zEC12_cost, 13 },
340 { "arch14", "arch14", PROCESSOR_ARCH14, &zEC12_cost, 14 },
341 { "native", "", PROCESSOR_NATIVE, NULL, 0 }
342 };
343
344 extern int reload_completed;
345
346 /* Kept up to date using the SCHED_VARIABLE_ISSUE hook. */
347 static rtx_insn *last_scheduled_insn;
348 #define NUM_SIDES 2
349
350 #define MAX_SCHED_UNITS 4
351 static int last_scheduled_unit_distance[MAX_SCHED_UNITS][NUM_SIDES];
352
353 /* Estimate of number of cycles a long-running insn occupies an
354 execution unit. */
355 static int fxd_longrunning[NUM_SIDES];
356 static int fpd_longrunning[NUM_SIDES];
357
358 /* The maximum score added for an instruction whose unit hasn't been
359 in use for MAX_SCHED_MIX_DISTANCE steps. Increase this value to
360 give instruction mix scheduling more priority over instruction
361 grouping. */
362 #define MAX_SCHED_MIX_SCORE 2
363
364 /* The maximum distance up to which individual scores will be
365 calculated. Everything beyond this gives MAX_SCHED_MIX_SCORE.
366 Increase this with the OOO windows size of the machine. */
367 #define MAX_SCHED_MIX_DISTANCE 70
368
369 /* Structure used to hold the components of a S/390 memory
370 address. A legitimate address on S/390 is of the general
371 form
372 base + index + displacement
373 where any of the components is optional.
374
375 base and index are registers of the class ADDR_REGS,
376 displacement is an unsigned 12-bit immediate constant. */
377
378 /* The max number of insns of backend generated memset/memcpy/memcmp
379 loops. This value is used in the unroll adjust hook to detect such
380 loops. Current max is 9 coming from the memcmp loop. */
381 #define BLOCK_MEM_OPS_LOOP_INSNS 9
382
383 struct s390_address
384 {
385 rtx base;
386 rtx indx;
387 rtx disp;
388 bool pointer;
389 bool literal_pool;
390 };
391
392 /* Few accessor macros for struct cfun->machine->s390_frame_layout. */
393
394 #define cfun_frame_layout (cfun->machine->frame_layout)
395 #define cfun_save_high_fprs_p (!!cfun_frame_layout.high_fprs)
396 #define cfun_save_arg_fprs_p (!!(TARGET_64BIT \
397 ? cfun_frame_layout.fpr_bitmap & 0x0f \
398 : cfun_frame_layout.fpr_bitmap & 0x03))
399 #define cfun_gprs_save_area_size ((cfun_frame_layout.last_save_gpr_slot - \
400 cfun_frame_layout.first_save_gpr_slot + 1) * UNITS_PER_LONG)
401 #define cfun_set_fpr_save(REGNO) (cfun->machine->frame_layout.fpr_bitmap |= \
402 (1 << (REGNO - FPR0_REGNUM)))
403 #define cfun_fpr_save_p(REGNO) (!!(cfun->machine->frame_layout.fpr_bitmap & \
404 (1 << (REGNO - FPR0_REGNUM))))
405 #define cfun_gpr_save_slot(REGNO) \
406 cfun->machine->frame_layout.gpr_save_slots[REGNO]
407
408 /* Number of GPRs and FPRs used for argument passing. */
409 #define GP_ARG_NUM_REG 5
410 #define FP_ARG_NUM_REG (TARGET_64BIT? 4 : 2)
411 #define VEC_ARG_NUM_REG 8
412
413 /* A couple of shortcuts. */
414 #define CONST_OK_FOR_J(x) \
415 CONST_OK_FOR_CONSTRAINT_P((x), 'J', "J")
416 #define CONST_OK_FOR_K(x) \
417 CONST_OK_FOR_CONSTRAINT_P((x), 'K', "K")
418 #define CONST_OK_FOR_Os(x) \
419 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Os")
420 #define CONST_OK_FOR_Op(x) \
421 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Op")
422 #define CONST_OK_FOR_On(x) \
423 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "On")
424
425 #define REGNO_PAIR_OK(REGNO, MODE) \
426 (s390_hard_regno_nregs ((REGNO), (MODE)) == 1 || !((REGNO) & 1))
427
428 /* That's the read ahead of the dynamic branch prediction unit in
429 bytes on a z10 (or higher) CPU. */
430 #define PREDICT_DISTANCE (TARGET_Z10 ? 384 : 2048)
431
432 /* Masks per jump target register indicating which thunk need to be
433 generated. */
434 static GTY(()) int indirect_branch_prez10thunk_mask = 0;
435 static GTY(()) int indirect_branch_z10thunk_mask = 0;
436
437 #define INDIRECT_BRANCH_NUM_OPTIONS 4
438
439 enum s390_indirect_branch_option
440 {
441 s390_opt_indirect_branch_jump = 0,
442 s390_opt_indirect_branch_call,
443 s390_opt_function_return_reg,
444 s390_opt_function_return_mem
445 };
446
447 static GTY(()) int indirect_branch_table_label_no[INDIRECT_BRANCH_NUM_OPTIONS] = { 0 };
448 const char *indirect_branch_table_label[INDIRECT_BRANCH_NUM_OPTIONS] = \
449 { "LJUMP", "LCALL", "LRETREG", "LRETMEM" };
450 const char *indirect_branch_table_name[INDIRECT_BRANCH_NUM_OPTIONS] = \
451 { ".s390_indirect_jump", ".s390_indirect_call",
452 ".s390_return_reg", ".s390_return_mem" };
453
454 bool
455 s390_return_addr_from_memory ()
456 {
457 return cfun_gpr_save_slot(RETURN_REGNUM) == SAVE_SLOT_STACK;
458 }
459
460 /* Return nonzero if it's OK to use fused multiply-add for MODE. */
461 bool
462 s390_fma_allowed_p (machine_mode mode)
463 {
464 if (TARGET_VXE && mode == TFmode)
465 return flag_vx_long_double_fma;
466
467 return true;
468 }
469
470 /* Indicate which ABI has been used for passing vector args.
471 0 - no vector type arguments have been passed where the ABI is relevant
472 1 - the old ABI has been used
473 2 - a vector type argument has been passed either in a vector register
474 or on the stack by value */
475 static int s390_vector_abi = 0;
476
477 /* Set the vector ABI marker if TYPE is subject to the vector ABI
478 switch. The vector ABI affects only vector data types. There are
479 two aspects of the vector ABI relevant here:
480
481 1. vectors >= 16 bytes have an alignment of 8 bytes with the new
482 ABI and natural alignment with the old.
483
484 2. vector <= 16 bytes are passed in VRs or by value on the stack
485 with the new ABI but by reference on the stack with the old.
486
487 If ARG_P is true TYPE is used for a function argument or return
488 value. The ABI marker then is set for all vector data types. If
489 ARG_P is false only type 1 vectors are being checked. */
490
491 static void
492 s390_check_type_for_vector_abi (const_tree type, bool arg_p, bool in_struct_p)
493 {
494 static hash_set<const_tree> visited_types_hash;
495
496 if (s390_vector_abi)
497 return;
498
499 if (type == NULL_TREE || TREE_CODE (type) == ERROR_MARK)
500 return;
501
502 if (visited_types_hash.contains (type))
503 return;
504
505 visited_types_hash.add (type);
506
507 if (VECTOR_TYPE_P (type))
508 {
509 int type_size = int_size_in_bytes (type);
510
511 /* Outside arguments only the alignment is changing and this
512 only happens for vector types >= 16 bytes. */
513 if (!arg_p && type_size < 16)
514 return;
515
516 /* In arguments vector types > 16 are passed as before (GCC
517 never enforced the bigger alignment for arguments which was
518 required by the old vector ABI). However, it might still be
519 ABI relevant due to the changed alignment if it is a struct
520 member. */
521 if (arg_p && type_size > 16 && !in_struct_p)
522 return;
523
524 s390_vector_abi = TARGET_VX_ABI ? 2 : 1;
525 }
526 else if (POINTER_TYPE_P (type) || TREE_CODE (type) == ARRAY_TYPE)
527 {
528 /* ARRAY_TYPE: Since with neither of the ABIs we have more than
529 natural alignment there will never be ABI dependent padding
530 in an array type. That's why we do not set in_struct_p to
531 true here. */
532 s390_check_type_for_vector_abi (TREE_TYPE (type), arg_p, in_struct_p);
533 }
534 else if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
535 {
536 tree arg_chain;
537
538 /* Check the return type. */
539 s390_check_type_for_vector_abi (TREE_TYPE (type), true, false);
540
541 for (arg_chain = TYPE_ARG_TYPES (type);
542 arg_chain;
543 arg_chain = TREE_CHAIN (arg_chain))
544 s390_check_type_for_vector_abi (TREE_VALUE (arg_chain), true, false);
545 }
546 else if (RECORD_OR_UNION_TYPE_P (type))
547 {
548 tree field;
549
550 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
551 {
552 if (TREE_CODE (field) != FIELD_DECL)
553 continue;
554
555 s390_check_type_for_vector_abi (TREE_TYPE (field), arg_p, true);
556 }
557 }
558 }
559
560
561 /* System z builtins. */
562
563 #include "s390-builtins.h"
564
565 const unsigned int bflags_builtin[S390_BUILTIN_MAX + 1] =
566 {
567 #undef B_DEF
568 #undef OB_DEF
569 #undef OB_DEF_VAR
570 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, ...) BFLAGS,
571 #define OB_DEF(...)
572 #define OB_DEF_VAR(...)
573 #include "s390-builtins.def"
574 0
575 };
576
577 const unsigned int opflags_builtin[S390_BUILTIN_MAX + 1] =
578 {
579 #undef B_DEF
580 #undef OB_DEF
581 #undef OB_DEF_VAR
582 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, ...) OPFLAGS,
583 #define OB_DEF(...)
584 #define OB_DEF_VAR(...)
585 #include "s390-builtins.def"
586 0
587 };
588
589 const unsigned int bflags_overloaded_builtin[S390_OVERLOADED_BUILTIN_MAX + 1] =
590 {
591 #undef B_DEF
592 #undef OB_DEF
593 #undef OB_DEF_VAR
594 #define B_DEF(...)
595 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, ...) BFLAGS,
596 #define OB_DEF_VAR(...)
597 #include "s390-builtins.def"
598 0
599 };
600
601 const unsigned int
602 bflags_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX + 1] =
603 {
604 #undef B_DEF
605 #undef OB_DEF
606 #undef OB_DEF_VAR
607 #define B_DEF(...)
608 #define OB_DEF(...)
609 #define OB_DEF_VAR(NAME, PATTERN, FLAGS, OPFLAGS, FNTYPE) FLAGS,
610 #include "s390-builtins.def"
611 0
612 };
613
614 const unsigned int
615 opflags_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX + 1] =
616 {
617 #undef B_DEF
618 #undef OB_DEF
619 #undef OB_DEF_VAR
620 #define B_DEF(...)
621 #define OB_DEF(...)
622 #define OB_DEF_VAR(NAME, PATTERN, FLAGS, OPFLAGS, FNTYPE) OPFLAGS,
623 #include "s390-builtins.def"
624 0
625 };
626
627 tree s390_builtin_types[BT_MAX];
628 tree s390_builtin_fn_types[BT_FN_MAX];
629 tree s390_builtin_decls[S390_BUILTIN_MAX +
630 S390_OVERLOADED_BUILTIN_MAX +
631 S390_OVERLOADED_BUILTIN_VAR_MAX];
632
633 static enum insn_code const code_for_builtin[S390_BUILTIN_MAX + 1] = {
634 #undef B_DEF
635 #undef OB_DEF
636 #undef OB_DEF_VAR
637 #define B_DEF(NAME, PATTERN, ...) CODE_FOR_##PATTERN,
638 #define OB_DEF(...)
639 #define OB_DEF_VAR(...)
640
641 #include "s390-builtins.def"
642 CODE_FOR_nothing
643 };
644
645 static void
646 s390_init_builtins (void)
647 {
648 /* These definitions are being used in s390-builtins.def. */
649 tree returns_twice_attr = tree_cons (get_identifier ("returns_twice"),
650 NULL, NULL);
651 tree noreturn_attr = tree_cons (get_identifier ("noreturn"), NULL, NULL);
652 tree c_uint64_type_node;
653
654 /* The uint64_type_node from tree.c is not compatible to the C99
655 uint64_t data type. What we want is c_uint64_type_node from
656 c-common.c. But since backend code is not supposed to interface
657 with the frontend we recreate it here. */
658 if (TARGET_64BIT)
659 c_uint64_type_node = long_unsigned_type_node;
660 else
661 c_uint64_type_node = long_long_unsigned_type_node;
662
663 #undef DEF_TYPE
664 #define DEF_TYPE(INDEX, NODE, CONST_P) \
665 if (s390_builtin_types[INDEX] == NULL) \
666 s390_builtin_types[INDEX] = (!CONST_P) ? \
667 (NODE) : build_type_variant ((NODE), 1, 0);
668
669 #undef DEF_POINTER_TYPE
670 #define DEF_POINTER_TYPE(INDEX, INDEX_BASE) \
671 if (s390_builtin_types[INDEX] == NULL) \
672 s390_builtin_types[INDEX] = \
673 build_pointer_type (s390_builtin_types[INDEX_BASE]);
674
675 #undef DEF_DISTINCT_TYPE
676 #define DEF_DISTINCT_TYPE(INDEX, INDEX_BASE) \
677 if (s390_builtin_types[INDEX] == NULL) \
678 s390_builtin_types[INDEX] = \
679 build_distinct_type_copy (s390_builtin_types[INDEX_BASE]);
680
681 #undef DEF_VECTOR_TYPE
682 #define DEF_VECTOR_TYPE(INDEX, INDEX_BASE, ELEMENTS) \
683 if (s390_builtin_types[INDEX] == NULL) \
684 s390_builtin_types[INDEX] = \
685 build_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
686
687 #undef DEF_OPAQUE_VECTOR_TYPE
688 #define DEF_OPAQUE_VECTOR_TYPE(INDEX, INDEX_BASE, ELEMENTS) \
689 if (s390_builtin_types[INDEX] == NULL) \
690 s390_builtin_types[INDEX] = \
691 build_opaque_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
692
693 #undef DEF_FN_TYPE
694 #define DEF_FN_TYPE(INDEX, args...) \
695 if (s390_builtin_fn_types[INDEX] == NULL) \
696 s390_builtin_fn_types[INDEX] = \
697 build_function_type_list (args, NULL_TREE);
698 #undef DEF_OV_TYPE
699 #define DEF_OV_TYPE(...)
700 #include "s390-builtin-types.def"
701
702 #undef B_DEF
703 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, FNTYPE) \
704 if (s390_builtin_decls[S390_BUILTIN_##NAME] == NULL) \
705 s390_builtin_decls[S390_BUILTIN_##NAME] = \
706 add_builtin_function ("__builtin_" #NAME, \
707 s390_builtin_fn_types[FNTYPE], \
708 S390_BUILTIN_##NAME, \
709 BUILT_IN_MD, \
710 NULL, \
711 ATTRS);
712 #undef OB_DEF
713 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, FNTYPE) \
714 if (s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] \
715 == NULL) \
716 s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] = \
717 add_builtin_function ("__builtin_" #NAME, \
718 s390_builtin_fn_types[FNTYPE], \
719 S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX, \
720 BUILT_IN_MD, \
721 NULL, \
722 0);
723 #undef OB_DEF_VAR
724 #define OB_DEF_VAR(...)
725 #include "s390-builtins.def"
726
727 }
728
729 /* Return true if ARG is appropriate as argument number ARGNUM of
730 builtin DECL. The operand flags from s390-builtins.def have to
731 passed as OP_FLAGS. */
732 bool
733 s390_const_operand_ok (tree arg, int argnum, int op_flags, tree decl)
734 {
735 if (O_UIMM_P (op_flags))
736 {
737 unsigned HOST_WIDE_INT bitwidths[] = { 1, 2, 3, 4, 5, 8, 12, 16, 32, 4 };
738 unsigned HOST_WIDE_INT bitmasks[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 12 };
739 unsigned HOST_WIDE_INT bitwidth = bitwidths[op_flags - O_U1];
740 unsigned HOST_WIDE_INT bitmask = bitmasks[op_flags - O_U1];
741
742 gcc_assert(ARRAY_SIZE(bitwidths) == (O_M12 - O_U1 + 1));
743 gcc_assert(ARRAY_SIZE(bitmasks) == (O_M12 - O_U1 + 1));
744
745 if (!tree_fits_uhwi_p (arg)
746 || tree_to_uhwi (arg) > (HOST_WIDE_INT_1U << bitwidth) - 1
747 || (bitmask && tree_to_uhwi (arg) & ~bitmask))
748 {
749 if (bitmask)
750 {
751 gcc_assert (bitmask < 16);
752 char values[120] = "";
753
754 for (unsigned HOST_WIDE_INT i = 0; i <= bitmask; i++)
755 {
756 char buf[5];
757 if (i & ~bitmask)
758 continue;
759 int ret = snprintf (buf, 5, HOST_WIDE_INT_PRINT_UNSIGNED, i & bitmask);
760 gcc_assert (ret < 5);
761 strcat (values, buf);
762 if (i < bitmask)
763 strcat (values, ", ");
764 }
765 error ("constant argument %d for builtin %qF is invalid (%s)",
766 argnum, decl, values);
767 }
768 else
769 error ("constant argument %d for builtin %qF is out of range (0..%wu)",
770 argnum, decl, (HOST_WIDE_INT_1U << bitwidth) - 1);
771
772 return false;
773 }
774 }
775
776 if (O_SIMM_P (op_flags))
777 {
778 int bitwidths[] = { 2, 3, 4, 5, 8, 12, 16, 32 };
779 int bitwidth = bitwidths[op_flags - O_S2];
780
781 if (!tree_fits_shwi_p (arg)
782 || tree_to_shwi (arg) < -(HOST_WIDE_INT_1 << (bitwidth - 1))
783 || tree_to_shwi (arg) > ((HOST_WIDE_INT_1 << (bitwidth - 1)) - 1))
784 {
785 error ("constant argument %d for builtin %qF is out of range "
786 "(%wd..%wd)", argnum, decl,
787 -(HOST_WIDE_INT_1 << (bitwidth - 1)),
788 (HOST_WIDE_INT_1 << (bitwidth - 1)) - 1);
789 return false;
790 }
791 }
792 return true;
793 }
794
795 /* Expand an expression EXP that calls a built-in function,
796 with result going to TARGET if that's convenient
797 (and in mode MODE if that's convenient).
798 SUBTARGET may be used as the target for computing one of EXP's operands.
799 IGNORE is nonzero if the value is to be ignored. */
800
801 static rtx
802 s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
803 machine_mode mode ATTRIBUTE_UNUSED,
804 int ignore ATTRIBUTE_UNUSED)
805 {
806 #define MAX_ARGS 6
807
808 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
809 unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl);
810 enum insn_code icode;
811 rtx op[MAX_ARGS], pat;
812 int arity;
813 bool nonvoid;
814 tree arg;
815 call_expr_arg_iterator iter;
816 unsigned int all_op_flags = opflags_for_builtin (fcode);
817 machine_mode last_vec_mode = VOIDmode;
818
819 if (TARGET_DEBUG_ARG)
820 {
821 fprintf (stderr,
822 "s390_expand_builtin, code = %4d, %s, bflags = 0x%x\n",
823 (int)fcode, IDENTIFIER_POINTER (DECL_NAME (fndecl)),
824 bflags_for_builtin (fcode));
825 }
826
827 if (S390_USE_TARGET_ATTRIBUTE)
828 {
829 unsigned int bflags;
830
831 bflags = bflags_for_builtin (fcode);
832 if ((bflags & B_HTM) && !TARGET_HTM)
833 {
834 error ("builtin %qF is not supported without %<-mhtm%> "
835 "(default with %<-march=zEC12%> and higher).", fndecl);
836 return const0_rtx;
837 }
838 if (((bflags & B_VX) || (bflags & B_VXE)) && !TARGET_VX)
839 {
840 error ("builtin %qF requires %<-mvx%> "
841 "(default with %<-march=z13%> and higher).", fndecl);
842 return const0_rtx;
843 }
844
845 if ((bflags & B_VXE) && !TARGET_VXE)
846 {
847 error ("Builtin %qF requires z14 or higher.", fndecl);
848 return const0_rtx;
849 }
850
851 if ((bflags & B_VXE2) && !TARGET_VXE2)
852 {
853 error ("Builtin %qF requires z15 or higher.", fndecl);
854 return const0_rtx;
855 }
856
857 if ((bflags & B_NNPA) && !TARGET_NNPA)
858 {
859 error ("Builtin %qF requires arch14 or higher.", fndecl);
860 return const0_rtx;
861 }
862 }
863 if (fcode >= S390_OVERLOADED_BUILTIN_VAR_OFFSET
864 && fcode < S390_ALL_BUILTIN_MAX)
865 {
866 gcc_unreachable ();
867 }
868 else if (fcode < S390_OVERLOADED_BUILTIN_OFFSET)
869 {
870 icode = code_for_builtin[fcode];
871 /* Set a flag in the machine specific cfun part in order to support
872 saving/restoring of FPRs. */
873 if (fcode == S390_BUILTIN_tbegin || fcode == S390_BUILTIN_tbegin_retry)
874 cfun->machine->tbegin_p = true;
875 }
876 else if (fcode < S390_OVERLOADED_BUILTIN_VAR_OFFSET)
877 {
878 error ("unresolved overloaded builtin");
879 return const0_rtx;
880 }
881 else
882 internal_error ("bad builtin fcode");
883
884 if (icode == 0)
885 internal_error ("bad builtin icode");
886
887 nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
888
889 if (nonvoid)
890 {
891 machine_mode tmode = insn_data[icode].operand[0].mode;
892 if (!target
893 || GET_MODE (target) != tmode
894 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
895 target = gen_reg_rtx (tmode);
896
897 /* There are builtins (e.g. vec_promote) with no vector
898 arguments but an element selector. So we have to also look
899 at the vector return type when emitting the modulo
900 operation. */
901 if (VECTOR_MODE_P (insn_data[icode].operand[0].mode))
902 last_vec_mode = insn_data[icode].operand[0].mode;
903 }
904
905 arity = 0;
906 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
907 {
908 rtx tmp_rtx;
909 const struct insn_operand_data *insn_op;
910 unsigned int op_flags = all_op_flags & ((1 << O_SHIFT) - 1);
911
912 all_op_flags = all_op_flags >> O_SHIFT;
913
914 if (arg == error_mark_node)
915 return NULL_RTX;
916 if (arity >= MAX_ARGS)
917 return NULL_RTX;
918
919 if (O_IMM_P (op_flags)
920 && TREE_CODE (arg) != INTEGER_CST)
921 {
922 error ("constant value required for builtin %qF argument %d",
923 fndecl, arity + 1);
924 return const0_rtx;
925 }
926
927 if (!s390_const_operand_ok (arg, arity + 1, op_flags, fndecl))
928 return const0_rtx;
929
930 insn_op = &insn_data[icode].operand[arity + nonvoid];
931 op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
932
933 /* expand_expr truncates constants to the target mode only if it
934 is "convenient". However, our checks below rely on this
935 being done. */
936 if (CONST_INT_P (op[arity])
937 && SCALAR_INT_MODE_P (insn_op->mode)
938 && GET_MODE (op[arity]) != insn_op->mode)
939 op[arity] = GEN_INT (trunc_int_for_mode (INTVAL (op[arity]),
940 insn_op->mode));
941
942 /* Wrap the expanded RTX for pointer types into a MEM expr with
943 the proper mode. This allows us to use e.g. (match_operand
944 "memory_operand"..) in the insn patterns instead of (mem
945 (match_operand "address_operand)). This is helpful for
946 patterns not just accepting MEMs. */
947 if (POINTER_TYPE_P (TREE_TYPE (arg))
948 && insn_op->predicate != address_operand)
949 op[arity] = gen_rtx_MEM (insn_op->mode, op[arity]);
950
951 /* Expand the module operation required on element selectors. */
952 if (op_flags == O_ELEM)
953 {
954 gcc_assert (last_vec_mode != VOIDmode);
955 op[arity] = simplify_expand_binop (SImode, code_to_optab (AND),
956 op[arity],
957 GEN_INT (GET_MODE_NUNITS (last_vec_mode) - 1),
958 NULL_RTX, 1, OPTAB_DIRECT);
959 }
960
961 /* Record the vector mode used for an element selector. This assumes:
962 1. There is no builtin with two different vector modes and an element selector
963 2. The element selector comes after the vector type it is referring to.
964 This currently the true for all the builtins but FIXME we
965 should better check for that. */
966 if (VECTOR_MODE_P (insn_op->mode))
967 last_vec_mode = insn_op->mode;
968
969 if (insn_op->predicate (op[arity], insn_op->mode))
970 {
971 arity++;
972 continue;
973 }
974
975 /* A memory operand is rejected by the memory_operand predicate.
976 Try making the address legal by copying it into a register. */
977 if (MEM_P (op[arity])
978 && insn_op->predicate == memory_operand
979 && (GET_MODE (XEXP (op[arity], 0)) == Pmode
980 || GET_MODE (XEXP (op[arity], 0)) == VOIDmode))
981 {
982 op[arity] = replace_equiv_address (op[arity],
983 copy_to_mode_reg (Pmode,
984 XEXP (op[arity], 0)));
985 }
986 /* Some of the builtins require different modes/types than the
987 pattern in order to implement a specific API. Instead of
988 adding many expanders which do the mode change we do it here.
989 E.g. s390_vec_add_u128 required to have vector unsigned char
990 arguments is mapped to addti3. */
991 else if (insn_op->mode != VOIDmode
992 && GET_MODE (op[arity]) != VOIDmode
993 && GET_MODE (op[arity]) != insn_op->mode
994 && ((tmp_rtx = simplify_gen_subreg (insn_op->mode, op[arity],
995 GET_MODE (op[arity]), 0))
996 != NULL_RTX))
997 {
998 op[arity] = tmp_rtx;
999 }
1000
1001 /* The predicate rejects the operand although the mode is fine.
1002 Copy the operand to register. */
1003 if (!insn_op->predicate (op[arity], insn_op->mode)
1004 && (GET_MODE (op[arity]) == insn_op->mode
1005 || GET_MODE (op[arity]) == VOIDmode
1006 || (insn_op->predicate == address_operand
1007 && GET_MODE (op[arity]) == Pmode)))
1008 {
1009 /* An address_operand usually has VOIDmode in the expander
1010 so we cannot use this. */
1011 machine_mode target_mode =
1012 (insn_op->predicate == address_operand
1013 ? (machine_mode) Pmode : insn_op->mode);
1014 op[arity] = copy_to_mode_reg (target_mode, op[arity]);
1015 }
1016
1017 if (!insn_op->predicate (op[arity], insn_op->mode))
1018 {
1019 error ("invalid argument %d for builtin %qF", arity + 1, fndecl);
1020 return const0_rtx;
1021 }
1022 arity++;
1023 }
1024
1025 switch (arity)
1026 {
1027 case 0:
1028 pat = GEN_FCN (icode) (target);
1029 break;
1030 case 1:
1031 if (nonvoid)
1032 pat = GEN_FCN (icode) (target, op[0]);
1033 else
1034 pat = GEN_FCN (icode) (op[0]);
1035 break;
1036 case 2:
1037 if (nonvoid)
1038 pat = GEN_FCN (icode) (target, op[0], op[1]);
1039 else
1040 pat = GEN_FCN (icode) (op[0], op[1]);
1041 break;
1042 case 3:
1043 if (nonvoid)
1044 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
1045 else
1046 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
1047 break;
1048 case 4:
1049 if (nonvoid)
1050 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
1051 else
1052 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
1053 break;
1054 case 5:
1055 if (nonvoid)
1056 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
1057 else
1058 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
1059 break;
1060 case 6:
1061 if (nonvoid)
1062 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4], op[5]);
1063 else
1064 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]);
1065 break;
1066 default:
1067 gcc_unreachable ();
1068 }
1069 if (!pat)
1070 return NULL_RTX;
1071 emit_insn (pat);
1072
1073 if (nonvoid)
1074 return target;
1075 else
1076 return const0_rtx;
1077 }
1078
1079
1080 static const int s390_hotpatch_hw_max = 1000000;
1081 static int s390_hotpatch_hw_before_label = 0;
1082 static int s390_hotpatch_hw_after_label = 0;
1083
1084 /* Check whether the hotpatch attribute is applied to a function and, if it has
1085 an argument, the argument is valid. */
1086
1087 static tree
1088 s390_handle_hotpatch_attribute (tree *node, tree name, tree args,
1089 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1090 {
1091 tree expr;
1092 tree expr2;
1093 int err;
1094
1095 if (TREE_CODE (*node) != FUNCTION_DECL)
1096 {
1097 warning (OPT_Wattributes, "%qE attribute only applies to functions",
1098 name);
1099 *no_add_attrs = true;
1100 }
1101 if (args != NULL && TREE_CHAIN (args) != NULL)
1102 {
1103 expr = TREE_VALUE (args);
1104 expr2 = TREE_VALUE (TREE_CHAIN (args));
1105 }
1106 if (args == NULL || TREE_CHAIN (args) == NULL)
1107 err = 1;
1108 else if (TREE_CODE (expr) != INTEGER_CST
1109 || !INTEGRAL_TYPE_P (TREE_TYPE (expr))
1110 || wi::gtu_p (wi::to_wide (expr), s390_hotpatch_hw_max))
1111 err = 1;
1112 else if (TREE_CODE (expr2) != INTEGER_CST
1113 || !INTEGRAL_TYPE_P (TREE_TYPE (expr2))
1114 || wi::gtu_p (wi::to_wide (expr2), s390_hotpatch_hw_max))
1115 err = 1;
1116 else
1117 err = 0;
1118 if (err)
1119 {
1120 error ("requested %qE attribute is not a comma separated pair of"
1121 " non-negative integer constants or too large (max. %d)", name,
1122 s390_hotpatch_hw_max);
1123 *no_add_attrs = true;
1124 }
1125
1126 return NULL_TREE;
1127 }
1128
1129 /* Expand the s390_vector_bool type attribute. */
1130
1131 static tree
1132 s390_handle_vectorbool_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
1133 tree args ATTRIBUTE_UNUSED,
1134 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1135 {
1136 tree type = *node, result = NULL_TREE;
1137 machine_mode mode;
1138
1139 while (POINTER_TYPE_P (type)
1140 || TREE_CODE (type) == FUNCTION_TYPE
1141 || TREE_CODE (type) == METHOD_TYPE
1142 || TREE_CODE (type) == ARRAY_TYPE)
1143 type = TREE_TYPE (type);
1144
1145 mode = TYPE_MODE (type);
1146 switch (mode)
1147 {
1148 case E_DImode: case E_V2DImode:
1149 result = s390_builtin_types[BT_BV2DI];
1150 break;
1151 case E_SImode: case E_V4SImode:
1152 result = s390_builtin_types[BT_BV4SI];
1153 break;
1154 case E_HImode: case E_V8HImode:
1155 result = s390_builtin_types[BT_BV8HI];
1156 break;
1157 case E_QImode: case E_V16QImode:
1158 result = s390_builtin_types[BT_BV16QI];
1159 break;
1160 default:
1161 break;
1162 }
1163
1164 *no_add_attrs = true; /* No need to hang on to the attribute. */
1165
1166 if (result)
1167 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
1168
1169 return NULL_TREE;
1170 }
1171
1172 /* Check syntax of function decl attributes having a string type value. */
1173
1174 static tree
1175 s390_handle_string_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
1176 tree args ATTRIBUTE_UNUSED,
1177 int flags ATTRIBUTE_UNUSED,
1178 bool *no_add_attrs)
1179 {
1180 tree cst;
1181
1182 if (TREE_CODE (*node) != FUNCTION_DECL)
1183 {
1184 warning (OPT_Wattributes, "%qE attribute only applies to functions",
1185 name);
1186 *no_add_attrs = true;
1187 }
1188
1189 cst = TREE_VALUE (args);
1190
1191 if (TREE_CODE (cst) != STRING_CST)
1192 {
1193 warning (OPT_Wattributes,
1194 "%qE attribute requires a string constant argument",
1195 name);
1196 *no_add_attrs = true;
1197 }
1198
1199 if (is_attribute_p ("indirect_branch", name)
1200 || is_attribute_p ("indirect_branch_call", name)
1201 || is_attribute_p ("function_return", name)
1202 || is_attribute_p ("function_return_reg", name)
1203 || is_attribute_p ("function_return_mem", name))
1204 {
1205 if (strcmp (TREE_STRING_POINTER (cst), "keep") != 0
1206 && strcmp (TREE_STRING_POINTER (cst), "thunk") != 0
1207 && strcmp (TREE_STRING_POINTER (cst), "thunk-extern") != 0)
1208 {
1209 warning (OPT_Wattributes,
1210 "argument to %qE attribute is not "
1211 "(keep|thunk|thunk-extern)", name);
1212 *no_add_attrs = true;
1213 }
1214 }
1215
1216 if (is_attribute_p ("indirect_branch_jump", name)
1217 && strcmp (TREE_STRING_POINTER (cst), "keep") != 0
1218 && strcmp (TREE_STRING_POINTER (cst), "thunk") != 0
1219 && strcmp (TREE_STRING_POINTER (cst), "thunk-inline") != 0
1220 && strcmp (TREE_STRING_POINTER (cst), "thunk-extern") != 0)
1221 {
1222 warning (OPT_Wattributes,
1223 "argument to %qE attribute is not "
1224 "(keep|thunk|thunk-inline|thunk-extern)", name);
1225 *no_add_attrs = true;
1226 }
1227
1228 return NULL_TREE;
1229 }
1230
1231 static const struct attribute_spec s390_attribute_table[] = {
1232 { "hotpatch", 2, 2, true, false, false, false,
1233 s390_handle_hotpatch_attribute, NULL },
1234 { "s390_vector_bool", 0, 0, false, true, false, true,
1235 s390_handle_vectorbool_attribute, NULL },
1236 { "indirect_branch", 1, 1, true, false, false, false,
1237 s390_handle_string_attribute, NULL },
1238 { "indirect_branch_jump", 1, 1, true, false, false, false,
1239 s390_handle_string_attribute, NULL },
1240 { "indirect_branch_call", 1, 1, true, false, false, false,
1241 s390_handle_string_attribute, NULL },
1242 { "function_return", 1, 1, true, false, false, false,
1243 s390_handle_string_attribute, NULL },
1244 { "function_return_reg", 1, 1, true, false, false, false,
1245 s390_handle_string_attribute, NULL },
1246 { "function_return_mem", 1, 1, true, false, false, false,
1247 s390_handle_string_attribute, NULL },
1248
1249 /* End element. */
1250 { NULL, 0, 0, false, false, false, false, NULL, NULL }
1251 };
1252
1253 /* Return the alignment for LABEL. We default to the -falign-labels
1254 value except for the literal pool base label. */
1255 int
1256 s390_label_align (rtx_insn *label)
1257 {
1258 rtx_insn *prev_insn = prev_active_insn (label);
1259 rtx set, src;
1260
1261 if (prev_insn == NULL_RTX)
1262 goto old;
1263
1264 set = single_set (prev_insn);
1265
1266 if (set == NULL_RTX)
1267 goto old;
1268
1269 src = SET_SRC (set);
1270
1271 /* Don't align literal pool base labels. */
1272 if (GET_CODE (src) == UNSPEC
1273 && XINT (src, 1) == UNSPEC_MAIN_BASE)
1274 return 0;
1275
1276 old:
1277 return align_labels.levels[0].log;
1278 }
1279
1280 static GTY(()) rtx got_symbol;
1281
1282 /* Return the GOT table symbol. The symbol will be created when the
1283 function is invoked for the first time. */
1284
1285 static rtx
1286 s390_got_symbol (void)
1287 {
1288 if (!got_symbol)
1289 {
1290 got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
1291 SYMBOL_REF_FLAGS (got_symbol) = SYMBOL_FLAG_LOCAL;
1292 }
1293
1294 return got_symbol;
1295 }
1296
1297 static scalar_int_mode
1298 s390_libgcc_cmp_return_mode (void)
1299 {
1300 return TARGET_64BIT ? DImode : SImode;
1301 }
1302
1303 static scalar_int_mode
1304 s390_libgcc_shift_count_mode (void)
1305 {
1306 return TARGET_64BIT ? DImode : SImode;
1307 }
1308
1309 static scalar_int_mode
1310 s390_unwind_word_mode (void)
1311 {
1312 return TARGET_64BIT ? DImode : SImode;
1313 }
1314
1315 /* Return true if the back end supports mode MODE. */
1316 static bool
1317 s390_scalar_mode_supported_p (scalar_mode mode)
1318 {
1319 /* In contrast to the default implementation reject TImode constants on 31bit
1320 TARGET_ZARCH for ABI compliance. */
1321 if (!TARGET_64BIT && TARGET_ZARCH && mode == TImode)
1322 return false;
1323
1324 if (DECIMAL_FLOAT_MODE_P (mode))
1325 return default_decimal_float_supported_p ();
1326
1327 return default_scalar_mode_supported_p (mode);
1328 }
1329
1330 /* Return true if the back end supports vector mode MODE. */
1331 static bool
1332 s390_vector_mode_supported_p (machine_mode mode)
1333 {
1334 machine_mode inner;
1335
1336 if (!VECTOR_MODE_P (mode)
1337 || !TARGET_VX
1338 || GET_MODE_SIZE (mode) > 16)
1339 return false;
1340
1341 inner = GET_MODE_INNER (mode);
1342
1343 switch (inner)
1344 {
1345 case E_QImode:
1346 case E_HImode:
1347 case E_SImode:
1348 case E_DImode:
1349 case E_TImode:
1350 case E_SFmode:
1351 case E_DFmode:
1352 case E_TFmode:
1353 return true;
1354 default:
1355 return false;
1356 }
1357 }
1358
1359 /* Set the has_landing_pad_p flag in struct machine_function to VALUE. */
1360
1361 void
1362 s390_set_has_landing_pad_p (bool value)
1363 {
1364 cfun->machine->has_landing_pad_p = value;
1365 }
1366
1367 /* If two condition code modes are compatible, return a condition code
1368 mode which is compatible with both. Otherwise, return
1369 VOIDmode. */
1370
1371 static machine_mode
1372 s390_cc_modes_compatible (machine_mode m1, machine_mode m2)
1373 {
1374 if (m1 == m2)
1375 return m1;
1376
1377 switch (m1)
1378 {
1379 case E_CCZmode:
1380 if (m2 == CCUmode || m2 == CCTmode || m2 == CCZ1mode
1381 || m2 == CCSmode || m2 == CCSRmode || m2 == CCURmode)
1382 return m2;
1383 return VOIDmode;
1384
1385 case E_CCSmode:
1386 case E_CCUmode:
1387 case E_CCTmode:
1388 case E_CCSRmode:
1389 case E_CCURmode:
1390 case E_CCZ1mode:
1391 if (m2 == CCZmode)
1392 return m1;
1393
1394 return VOIDmode;
1395
1396 default:
1397 return VOIDmode;
1398 }
1399 return VOIDmode;
1400 }
1401
1402 /* Return true if SET either doesn't set the CC register, or else
1403 the source and destination have matching CC modes and that
1404 CC mode is at least as constrained as REQ_MODE. */
1405
1406 static bool
1407 s390_match_ccmode_set (rtx set, machine_mode req_mode)
1408 {
1409 machine_mode set_mode;
1410
1411 gcc_assert (GET_CODE (set) == SET);
1412
1413 /* These modes are supposed to be used only in CC consumer
1414 patterns. */
1415 gcc_assert (req_mode != CCVIALLmode && req_mode != CCVIANYmode
1416 && req_mode != CCVFALLmode && req_mode != CCVFANYmode);
1417
1418 if (GET_CODE (SET_DEST (set)) != REG || !CC_REGNO_P (REGNO (SET_DEST (set))))
1419 return 1;
1420
1421 set_mode = GET_MODE (SET_DEST (set));
1422 switch (set_mode)
1423 {
1424 case E_CCZ1mode:
1425 case E_CCSmode:
1426 case E_CCSRmode:
1427 case E_CCSFPSmode:
1428 case E_CCUmode:
1429 case E_CCURmode:
1430 case E_CCOmode:
1431 case E_CCLmode:
1432 case E_CCL1mode:
1433 case E_CCL2mode:
1434 case E_CCL3mode:
1435 case E_CCT1mode:
1436 case E_CCT2mode:
1437 case E_CCT3mode:
1438 case E_CCVEQmode:
1439 case E_CCVIHmode:
1440 case E_CCVIHUmode:
1441 case E_CCVFHmode:
1442 case E_CCVFHEmode:
1443 if (req_mode != set_mode)
1444 return 0;
1445 break;
1446
1447 case E_CCZmode:
1448 if (req_mode != CCSmode && req_mode != CCUmode && req_mode != CCTmode
1449 && req_mode != CCSRmode && req_mode != CCURmode
1450 && req_mode != CCZ1mode)
1451 return 0;
1452 break;
1453
1454 case E_CCAPmode:
1455 case E_CCANmode:
1456 if (req_mode != CCAmode)
1457 return 0;
1458 break;
1459
1460 default:
1461 gcc_unreachable ();
1462 }
1463
1464 return (GET_MODE (SET_SRC (set)) == set_mode);
1465 }
1466
1467 /* Return true if every SET in INSN that sets the CC register
1468 has source and destination with matching CC modes and that
1469 CC mode is at least as constrained as REQ_MODE.
1470 If REQ_MODE is VOIDmode, always return false. */
1471
1472 bool
1473 s390_match_ccmode (rtx_insn *insn, machine_mode req_mode)
1474 {
1475 int i;
1476
1477 /* s390_tm_ccmode returns VOIDmode to indicate failure. */
1478 if (req_mode == VOIDmode)
1479 return false;
1480
1481 if (GET_CODE (PATTERN (insn)) == SET)
1482 return s390_match_ccmode_set (PATTERN (insn), req_mode);
1483
1484 if (GET_CODE (PATTERN (insn)) == PARALLEL)
1485 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
1486 {
1487 rtx set = XVECEXP (PATTERN (insn), 0, i);
1488 if (GET_CODE (set) == SET)
1489 if (!s390_match_ccmode_set (set, req_mode))
1490 return false;
1491 }
1492
1493 return true;
1494 }
1495
1496 /* If a test-under-mask instruction can be used to implement
1497 (compare (and ... OP1) OP2), return the CC mode required
1498 to do that. Otherwise, return VOIDmode.
1499 MIXED is true if the instruction can distinguish between
1500 CC1 and CC2 for mixed selected bits (TMxx), it is false
1501 if the instruction cannot (TM). */
1502
1503 machine_mode
1504 s390_tm_ccmode (rtx op1, rtx op2, bool mixed)
1505 {
1506 int bit0, bit1;
1507
1508 /* ??? Fixme: should work on CONST_WIDE_INT as well. */
1509 if (GET_CODE (op1) != CONST_INT || GET_CODE (op2) != CONST_INT)
1510 return VOIDmode;
1511
1512 /* Selected bits all zero: CC0.
1513 e.g.: int a; if ((a & (16 + 128)) == 0) */
1514 if (INTVAL (op2) == 0)
1515 return CCTmode;
1516
1517 /* Selected bits all one: CC3.
1518 e.g.: int a; if ((a & (16 + 128)) == 16 + 128) */
1519 if (INTVAL (op2) == INTVAL (op1))
1520 return CCT3mode;
1521
1522 /* Exactly two bits selected, mixed zeroes and ones: CC1 or CC2. e.g.:
1523 int a;
1524 if ((a & (16 + 128)) == 16) -> CCT1
1525 if ((a & (16 + 128)) == 128) -> CCT2 */
1526 if (mixed)
1527 {
1528 bit1 = exact_log2 (INTVAL (op2));
1529 bit0 = exact_log2 (INTVAL (op1) ^ INTVAL (op2));
1530 if (bit0 != -1 && bit1 != -1)
1531 return bit0 > bit1 ? CCT1mode : CCT2mode;
1532 }
1533
1534 return VOIDmode;
1535 }
1536
1537 /* Given a comparison code OP (EQ, NE, etc.) and the operands
1538 OP0 and OP1 of a COMPARE, return the mode to be used for the
1539 comparison. */
1540
1541 machine_mode
1542 s390_select_ccmode (enum rtx_code code, rtx op0, rtx op1)
1543 {
1544 switch (code)
1545 {
1546 case EQ:
1547 case NE:
1548 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
1549 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1550 return CCAPmode;
1551 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
1552 && CONST_OK_FOR_K (INTVAL (XEXP (op0, 1))))
1553 return CCAPmode;
1554 if ((GET_CODE (op0) == PLUS || GET_CODE (op0) == MINUS
1555 || GET_CODE (op1) == NEG)
1556 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1557 return CCLmode;
1558
1559 if (GET_CODE (op0) == AND)
1560 {
1561 /* Check whether we can potentially do it via TM. */
1562 machine_mode ccmode;
1563 ccmode = s390_tm_ccmode (XEXP (op0, 1), op1, 1);
1564 if (ccmode != VOIDmode)
1565 {
1566 /* Relax CCTmode to CCZmode to allow fall-back to AND
1567 if that turns out to be beneficial. */
1568 return ccmode == CCTmode ? CCZmode : ccmode;
1569 }
1570 }
1571
1572 if (register_operand (op0, HImode)
1573 && GET_CODE (op1) == CONST_INT
1574 && (INTVAL (op1) == -1 || INTVAL (op1) == 65535))
1575 return CCT3mode;
1576 if (register_operand (op0, QImode)
1577 && GET_CODE (op1) == CONST_INT
1578 && (INTVAL (op1) == -1 || INTVAL (op1) == 255))
1579 return CCT3mode;
1580
1581 return CCZmode;
1582
1583 case LE:
1584 case LT:
1585 case GE:
1586 case GT:
1587 /* The only overflow condition of NEG and ABS happens when
1588 -INT_MAX is used as parameter, which stays negative. So
1589 we have an overflow from a positive value to a negative.
1590 Using CCAP mode the resulting cc can be used for comparisons. */
1591 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
1592 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1593 return CCAPmode;
1594
1595 /* If constants are involved in an add instruction it is possible to use
1596 the resulting cc for comparisons with zero. Knowing the sign of the
1597 constant the overflow behavior gets predictable. e.g.:
1598 int a, b; if ((b = a + c) > 0)
1599 with c as a constant value: c < 0 -> CCAN and c >= 0 -> CCAP */
1600 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
1601 && (CONST_OK_FOR_K (INTVAL (XEXP (op0, 1)))
1602 || (CONST_OK_FOR_CONSTRAINT_P (INTVAL (XEXP (op0, 1)), 'O', "Os")
1603 /* Avoid INT32_MIN on 32 bit. */
1604 && (!TARGET_ZARCH || INTVAL (XEXP (op0, 1)) != -0x7fffffff - 1))))
1605 {
1606 if (INTVAL (XEXP((op0), 1)) < 0)
1607 return CCANmode;
1608 else
1609 return CCAPmode;
1610 }
1611
1612 /* Fall through. */
1613 case LTGT:
1614 if (HONOR_NANS (op0) || HONOR_NANS (op1))
1615 return CCSFPSmode;
1616
1617 /* Fall through. */
1618 case UNORDERED:
1619 case ORDERED:
1620 case UNEQ:
1621 case UNLE:
1622 case UNLT:
1623 case UNGE:
1624 case UNGT:
1625 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1626 && GET_CODE (op1) != CONST_INT)
1627 return CCSRmode;
1628 return CCSmode;
1629
1630 case LTU:
1631 case GEU:
1632 if (GET_CODE (op0) == PLUS
1633 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1634 return CCL1mode;
1635
1636 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1637 && GET_CODE (op1) != CONST_INT)
1638 return CCURmode;
1639 return CCUmode;
1640
1641 case LEU:
1642 case GTU:
1643 if (GET_CODE (op0) == MINUS
1644 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1645 return CCL2mode;
1646
1647 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1648 && GET_CODE (op1) != CONST_INT)
1649 return CCURmode;
1650 return CCUmode;
1651
1652 default:
1653 gcc_unreachable ();
1654 }
1655 }
1656
1657 /* Replace the comparison OP0 CODE OP1 by a semantically equivalent one
1658 that we can implement more efficiently. */
1659
1660 static void
1661 s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
1662 bool op0_preserve_value)
1663 {
1664 if (op0_preserve_value)
1665 return;
1666
1667 /* Convert ZERO_EXTRACT back to AND to enable TM patterns. */
1668 if ((*code == EQ || *code == NE)
1669 && *op1 == const0_rtx
1670 && GET_CODE (*op0) == ZERO_EXTRACT
1671 && GET_CODE (XEXP (*op0, 1)) == CONST_INT
1672 && GET_CODE (XEXP (*op0, 2)) == CONST_INT
1673 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
1674 {
1675 rtx inner = XEXP (*op0, 0);
1676 HOST_WIDE_INT modesize = GET_MODE_BITSIZE (GET_MODE (inner));
1677 HOST_WIDE_INT len = INTVAL (XEXP (*op0, 1));
1678 HOST_WIDE_INT pos = INTVAL (XEXP (*op0, 2));
1679
1680 if (len > 0 && len < modesize
1681 && pos >= 0 && pos + len <= modesize
1682 && modesize <= HOST_BITS_PER_WIDE_INT)
1683 {
1684 unsigned HOST_WIDE_INT block;
1685 block = (HOST_WIDE_INT_1U << len) - 1;
1686 block <<= modesize - pos - len;
1687
1688 *op0 = gen_rtx_AND (GET_MODE (inner), inner,
1689 gen_int_mode (block, GET_MODE (inner)));
1690 }
1691 }
1692
1693 /* Narrow AND of memory against immediate to enable TM. */
1694 if ((*code == EQ || *code == NE)
1695 && *op1 == const0_rtx
1696 && GET_CODE (*op0) == AND
1697 && GET_CODE (XEXP (*op0, 1)) == CONST_INT
1698 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
1699 {
1700 rtx inner = XEXP (*op0, 0);
1701 rtx mask = XEXP (*op0, 1);
1702
1703 /* Ignore paradoxical SUBREGs if all extra bits are masked out. */
1704 if (GET_CODE (inner) == SUBREG
1705 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (inner)))
1706 && (GET_MODE_SIZE (GET_MODE (inner))
1707 >= GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1708 && ((INTVAL (mask)
1709 & GET_MODE_MASK (GET_MODE (inner))
1710 & ~GET_MODE_MASK (GET_MODE (SUBREG_REG (inner))))
1711 == 0))
1712 inner = SUBREG_REG (inner);
1713
1714 /* Do not change volatile MEMs. */
1715 if (MEM_P (inner) && !MEM_VOLATILE_P (inner))
1716 {
1717 int part = s390_single_part (XEXP (*op0, 1),
1718 GET_MODE (inner), QImode, 0);
1719 if (part >= 0)
1720 {
1721 mask = gen_int_mode (s390_extract_part (mask, QImode, 0), QImode);
1722 inner = adjust_address_nv (inner, QImode, part);
1723 *op0 = gen_rtx_AND (QImode, inner, mask);
1724 }
1725 }
1726 }
1727
1728 /* Narrow comparisons against 0xffff to HImode if possible. */
1729 if ((*code == EQ || *code == NE)
1730 && GET_CODE (*op1) == CONST_INT
1731 && INTVAL (*op1) == 0xffff
1732 && SCALAR_INT_MODE_P (GET_MODE (*op0))
1733 && (nonzero_bits (*op0, GET_MODE (*op0))
1734 & ~HOST_WIDE_INT_UC (0xffff)) == 0)
1735 {
1736 *op0 = gen_lowpart (HImode, *op0);
1737 *op1 = constm1_rtx;
1738 }
1739
1740 /* Remove redundant UNSPEC_STRCMPCC_TO_INT conversions if possible. */
1741 if (GET_CODE (*op0) == UNSPEC
1742 && XINT (*op0, 1) == UNSPEC_STRCMPCC_TO_INT
1743 && XVECLEN (*op0, 0) == 1
1744 && GET_MODE (XVECEXP (*op0, 0, 0)) == CCUmode
1745 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
1746 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
1747 && *op1 == const0_rtx)
1748 {
1749 enum rtx_code new_code = UNKNOWN;
1750 switch (*code)
1751 {
1752 case EQ: new_code = EQ; break;
1753 case NE: new_code = NE; break;
1754 case LT: new_code = GTU; break;
1755 case GT: new_code = LTU; break;
1756 case LE: new_code = GEU; break;
1757 case GE: new_code = LEU; break;
1758 default: break;
1759 }
1760
1761 if (new_code != UNKNOWN)
1762 {
1763 *op0 = XVECEXP (*op0, 0, 0);
1764 *code = new_code;
1765 }
1766 }
1767
1768 /* Remove redundant UNSPEC_CC_TO_INT conversions if possible. */
1769 if (GET_CODE (*op0) == UNSPEC
1770 && XINT (*op0, 1) == UNSPEC_CC_TO_INT
1771 && XVECLEN (*op0, 0) == 1
1772 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
1773 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
1774 && CONST_INT_P (*op1))
1775 {
1776 enum rtx_code new_code = UNKNOWN;
1777 switch (GET_MODE (XVECEXP (*op0, 0, 0)))
1778 {
1779 case E_CCZmode:
1780 case E_CCRAWmode:
1781 switch (*code)
1782 {
1783 case EQ: new_code = EQ; break;
1784 case NE: new_code = NE; break;
1785 default: break;
1786 }
1787 break;
1788 default: break;
1789 }
1790
1791 if (new_code != UNKNOWN)
1792 {
1793 /* For CCRAWmode put the required cc mask into the second
1794 operand. */
1795 if (GET_MODE (XVECEXP (*op0, 0, 0)) == CCRAWmode
1796 && INTVAL (*op1) >= 0 && INTVAL (*op1) <= 3)
1797 *op1 = gen_rtx_CONST_INT (VOIDmode, 1 << (3 - INTVAL (*op1)));
1798 *op0 = XVECEXP (*op0, 0, 0);
1799 *code = new_code;
1800 }
1801 }
1802
1803 /* Simplify cascaded EQ, NE with const0_rtx. */
1804 if ((*code == NE || *code == EQ)
1805 && (GET_CODE (*op0) == EQ || GET_CODE (*op0) == NE)
1806 && GET_MODE (*op0) == SImode
1807 && GET_MODE (XEXP (*op0, 0)) == CCZ1mode
1808 && REG_P (XEXP (*op0, 0))
1809 && XEXP (*op0, 1) == const0_rtx
1810 && *op1 == const0_rtx)
1811 {
1812 if ((*code == EQ && GET_CODE (*op0) == NE)
1813 || (*code == NE && GET_CODE (*op0) == EQ))
1814 *code = EQ;
1815 else
1816 *code = NE;
1817 *op0 = XEXP (*op0, 0);
1818 }
1819
1820 /* Prefer register over memory as first operand. */
1821 if (MEM_P (*op0) && REG_P (*op1))
1822 {
1823 rtx tem = *op0; *op0 = *op1; *op1 = tem;
1824 *code = (int)swap_condition ((enum rtx_code)*code);
1825 }
1826
1827 /* A comparison result is compared against zero. Replace it with
1828 the (perhaps inverted) original comparison.
1829 This probably should be done by simplify_relational_operation. */
1830 if ((*code == EQ || *code == NE)
1831 && *op1 == const0_rtx
1832 && COMPARISON_P (*op0)
1833 && CC_REG_P (XEXP (*op0, 0)))
1834 {
1835 enum rtx_code new_code;
1836
1837 if (*code == EQ)
1838 new_code = reversed_comparison_code_parts (GET_CODE (*op0),
1839 XEXP (*op0, 0),
1840 XEXP (*op0, 1), NULL);
1841 else
1842 new_code = GET_CODE (*op0);
1843
1844 if (new_code != UNKNOWN)
1845 {
1846 *code = new_code;
1847 *op1 = XEXP (*op0, 1);
1848 *op0 = XEXP (*op0, 0);
1849 }
1850 }
1851
1852 /* ~a==b -> ~(a^b)==0 ~a!=b -> ~(a^b)!=0 */
1853 if (TARGET_Z15
1854 && (*code == EQ || *code == NE)
1855 && (GET_MODE (*op0) == DImode || GET_MODE (*op0) == SImode)
1856 && GET_CODE (*op0) == NOT)
1857 {
1858 machine_mode mode = GET_MODE (*op0);
1859 *op0 = gen_rtx_XOR (mode, XEXP (*op0, 0), *op1);
1860 *op0 = gen_rtx_NOT (mode, *op0);
1861 *op1 = const0_rtx;
1862 }
1863
1864 /* a&b == -1 -> ~a|~b == 0 a|b == -1 -> ~a&~b == 0 */
1865 if (TARGET_Z15
1866 && (*code == EQ || *code == NE)
1867 && (GET_CODE (*op0) == AND || GET_CODE (*op0) == IOR)
1868 && (GET_MODE (*op0) == DImode || GET_MODE (*op0) == SImode)
1869 && CONST_INT_P (*op1)
1870 && *op1 == constm1_rtx)
1871 {
1872 machine_mode mode = GET_MODE (*op0);
1873 rtx op00 = gen_rtx_NOT (mode, XEXP (*op0, 0));
1874 rtx op01 = gen_rtx_NOT (mode, XEXP (*op0, 1));
1875
1876 if (GET_CODE (*op0) == AND)
1877 *op0 = gen_rtx_IOR (mode, op00, op01);
1878 else
1879 *op0 = gen_rtx_AND (mode, op00, op01);
1880
1881 *op1 = const0_rtx;
1882 }
1883 }
1884
1885
1886 /* Emit a compare instruction suitable to implement the comparison
1887 OP0 CODE OP1. Return the correct condition RTL to be placed in
1888 the IF_THEN_ELSE of the conditional branch testing the result. */
1889
1890 rtx
1891 s390_emit_compare (enum rtx_code code, rtx op0, rtx op1)
1892 {
1893 machine_mode mode = s390_select_ccmode (code, op0, op1);
1894 rtx cc;
1895
1896 /* Force OP1 into register in order to satisfy VXE TFmode patterns. */
1897 if (TARGET_VXE && GET_MODE (op1) == TFmode)
1898 op1 = force_reg (TFmode, op1);
1899
1900 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
1901 {
1902 /* Do not output a redundant compare instruction if a
1903 compare_and_swap pattern already computed the result and the
1904 machine modes are compatible. */
1905 gcc_assert (s390_cc_modes_compatible (GET_MODE (op0), mode)
1906 == GET_MODE (op0));
1907 cc = op0;
1908 }
1909 else
1910 {
1911 cc = gen_rtx_REG (mode, CC_REGNUM);
1912 emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (mode, op0, op1)));
1913 }
1914
1915 return gen_rtx_fmt_ee (code, VOIDmode, cc, const0_rtx);
1916 }
1917
1918 /* If MEM is not a legitimate compare-and-swap memory operand, return a new
1919 MEM, whose address is a pseudo containing the original MEM's address. */
1920
1921 static rtx
1922 s390_legitimize_cs_operand (rtx mem)
1923 {
1924 rtx tmp;
1925
1926 if (!contains_symbol_ref_p (mem))
1927 return mem;
1928 tmp = gen_reg_rtx (Pmode);
1929 emit_move_insn (tmp, copy_rtx (XEXP (mem, 0)));
1930 return change_address (mem, VOIDmode, tmp);
1931 }
1932
1933 /* Emit a SImode compare and swap instruction setting MEM to NEW_RTX if OLD
1934 matches CMP.
1935 Return the correct condition RTL to be placed in the IF_THEN_ELSE of the
1936 conditional branch testing the result. */
1937
1938 static rtx
1939 s390_emit_compare_and_swap (enum rtx_code code, rtx old, rtx mem,
1940 rtx cmp, rtx new_rtx, machine_mode ccmode)
1941 {
1942 rtx cc;
1943
1944 mem = s390_legitimize_cs_operand (mem);
1945 cc = gen_rtx_REG (ccmode, CC_REGNUM);
1946 switch (GET_MODE (mem))
1947 {
1948 case E_SImode:
1949 emit_insn (gen_atomic_compare_and_swapsi_internal (old, mem, cmp,
1950 new_rtx, cc));
1951 break;
1952 case E_DImode:
1953 emit_insn (gen_atomic_compare_and_swapdi_internal (old, mem, cmp,
1954 new_rtx, cc));
1955 break;
1956 case E_TImode:
1957 emit_insn (gen_atomic_compare_and_swapti_internal (old, mem, cmp,
1958 new_rtx, cc));
1959 break;
1960 case E_QImode:
1961 case E_HImode:
1962 default:
1963 gcc_unreachable ();
1964 }
1965 return s390_emit_compare (code, cc, const0_rtx);
1966 }
1967
1968 /* Emit a jump instruction to TARGET and return it. If COND is
1969 NULL_RTX, emit an unconditional jump, else a conditional jump under
1970 condition COND. */
1971
1972 rtx_insn *
1973 s390_emit_jump (rtx target, rtx cond)
1974 {
1975 rtx insn;
1976
1977 target = gen_rtx_LABEL_REF (VOIDmode, target);
1978 if (cond)
1979 target = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, target, pc_rtx);
1980
1981 insn = gen_rtx_SET (pc_rtx, target);
1982 return emit_jump_insn (insn);
1983 }
1984
1985 /* Return branch condition mask to implement a branch
1986 specified by CODE. Return -1 for invalid comparisons. */
1987
1988 int
1989 s390_branch_condition_mask (rtx code)
1990 {
1991 const int CC0 = 1 << 3;
1992 const int CC1 = 1 << 2;
1993 const int CC2 = 1 << 1;
1994 const int CC3 = 1 << 0;
1995
1996 gcc_assert (GET_CODE (XEXP (code, 0)) == REG);
1997 gcc_assert (REGNO (XEXP (code, 0)) == CC_REGNUM);
1998 gcc_assert (XEXP (code, 1) == const0_rtx
1999 || (GET_MODE (XEXP (code, 0)) == CCRAWmode
2000 && CONST_INT_P (XEXP (code, 1))));
2001
2002
2003 switch (GET_MODE (XEXP (code, 0)))
2004 {
2005 case E_CCZmode:
2006 case E_CCZ1mode:
2007 switch (GET_CODE (code))
2008 {
2009 case EQ: return CC0;
2010 case NE: return CC1 | CC2 | CC3;
2011 default: return -1;
2012 }
2013 break;
2014
2015 case E_CCT1mode:
2016 switch (GET_CODE (code))
2017 {
2018 case EQ: return CC1;
2019 case NE: return CC0 | CC2 | CC3;
2020 default: return -1;
2021 }
2022 break;
2023
2024 case E_CCT2mode:
2025 switch (GET_CODE (code))
2026 {
2027 case EQ: return CC2;
2028 case NE: return CC0 | CC1 | CC3;
2029 default: return -1;
2030 }
2031 break;
2032
2033 case E_CCT3mode:
2034 switch (GET_CODE (code))
2035 {
2036 case EQ: return CC3;
2037 case NE: return CC0 | CC1 | CC2;
2038 default: return -1;
2039 }
2040 break;
2041
2042 case E_CCLmode:
2043 switch (GET_CODE (code))
2044 {
2045 case EQ: return CC0 | CC2;
2046 case NE: return CC1 | CC3;
2047 default: return -1;
2048 }
2049 break;
2050
2051 case E_CCL1mode:
2052 switch (GET_CODE (code))
2053 {
2054 case LTU: return CC2 | CC3; /* carry */
2055 case GEU: return CC0 | CC1; /* no carry */
2056 default: return -1;
2057 }
2058 break;
2059
2060 case E_CCL2mode:
2061 switch (GET_CODE (code))
2062 {
2063 case GTU: return CC0 | CC1; /* borrow */
2064 case LEU: return CC2 | CC3; /* no borrow */
2065 default: return -1;
2066 }
2067 break;
2068
2069 case E_CCL3mode:
2070 switch (GET_CODE (code))
2071 {
2072 case EQ: return CC0 | CC2;
2073 case NE: return CC1 | CC3;
2074 case LTU: return CC1;
2075 case GTU: return CC3;
2076 case LEU: return CC1 | CC2;
2077 case GEU: return CC2 | CC3;
2078 default: return -1;
2079 }
2080
2081 case E_CCUmode:
2082 switch (GET_CODE (code))
2083 {
2084 case EQ: return CC0;
2085 case NE: return CC1 | CC2 | CC3;
2086 case LTU: return CC1;
2087 case GTU: return CC2;
2088 case LEU: return CC0 | CC1;
2089 case GEU: return CC0 | CC2;
2090 default: return -1;
2091 }
2092 break;
2093
2094 case E_CCURmode:
2095 switch (GET_CODE (code))
2096 {
2097 case EQ: return CC0;
2098 case NE: return CC2 | CC1 | CC3;
2099 case LTU: return CC2;
2100 case GTU: return CC1;
2101 case LEU: return CC0 | CC2;
2102 case GEU: return CC0 | CC1;
2103 default: return -1;
2104 }
2105 break;
2106
2107 case E_CCAPmode:
2108 switch (GET_CODE (code))
2109 {
2110 case EQ: return CC0;
2111 case NE: return CC1 | CC2 | CC3;
2112 case LT: return CC1 | CC3;
2113 case GT: return CC2;
2114 case LE: return CC0 | CC1 | CC3;
2115 case GE: return CC0 | CC2;
2116 default: return -1;
2117 }
2118 break;
2119
2120 case E_CCANmode:
2121 switch (GET_CODE (code))
2122 {
2123 case EQ: return CC0;
2124 case NE: return CC1 | CC2 | CC3;
2125 case LT: return CC1;
2126 case GT: return CC2 | CC3;
2127 case LE: return CC0 | CC1;
2128 case GE: return CC0 | CC2 | CC3;
2129 default: return -1;
2130 }
2131 break;
2132
2133 case E_CCOmode:
2134 switch (GET_CODE (code))
2135 {
2136 case EQ: return CC0 | CC1 | CC2;
2137 case NE: return CC3;
2138 default: return -1;
2139 }
2140 break;
2141
2142 case E_CCSmode:
2143 case E_CCSFPSmode:
2144 switch (GET_CODE (code))
2145 {
2146 case EQ: return CC0;
2147 case NE: return CC1 | CC2 | CC3;
2148 case LT: return CC1;
2149 case GT: return CC2;
2150 case LE: return CC0 | CC1;
2151 case GE: return CC0 | CC2;
2152 case UNORDERED: return CC3;
2153 case ORDERED: return CC0 | CC1 | CC2;
2154 case UNEQ: return CC0 | CC3;
2155 case UNLT: return CC1 | CC3;
2156 case UNGT: return CC2 | CC3;
2157 case UNLE: return CC0 | CC1 | CC3;
2158 case UNGE: return CC0 | CC2 | CC3;
2159 case LTGT: return CC1 | CC2;
2160 default: return -1;
2161 }
2162 break;
2163
2164 case E_CCSRmode:
2165 switch (GET_CODE (code))
2166 {
2167 case EQ: return CC0;
2168 case NE: return CC2 | CC1 | CC3;
2169 case LT: return CC2;
2170 case GT: return CC1;
2171 case LE: return CC0 | CC2;
2172 case GE: return CC0 | CC1;
2173 case UNORDERED: return CC3;
2174 case ORDERED: return CC0 | CC2 | CC1;
2175 case UNEQ: return CC0 | CC3;
2176 case UNLT: return CC2 | CC3;
2177 case UNGT: return CC1 | CC3;
2178 case UNLE: return CC0 | CC2 | CC3;
2179 case UNGE: return CC0 | CC1 | CC3;
2180 case LTGT: return CC2 | CC1;
2181 default: return -1;
2182 }
2183 break;
2184
2185 /* Vector comparison modes. */
2186 /* CC2 will never be set. It however is part of the negated
2187 masks. */
2188 case E_CCVIALLmode:
2189 switch (GET_CODE (code))
2190 {
2191 case EQ:
2192 case GTU:
2193 case GT:
2194 case GE: return CC0;
2195 /* The inverted modes are in fact *any* modes. */
2196 case NE:
2197 case LEU:
2198 case LE:
2199 case LT: return CC3 | CC1 | CC2;
2200 default: return -1;
2201 }
2202
2203 case E_CCVIANYmode:
2204 switch (GET_CODE (code))
2205 {
2206 case EQ:
2207 case GTU:
2208 case GT:
2209 case GE: return CC0 | CC1;
2210 /* The inverted modes are in fact *all* modes. */
2211 case NE:
2212 case LEU:
2213 case LE:
2214 case LT: return CC3 | CC2;
2215 default: return -1;
2216 }
2217 case E_CCVFALLmode:
2218 switch (GET_CODE (code))
2219 {
2220 case EQ:
2221 case GT:
2222 case GE: return CC0;
2223 /* The inverted modes are in fact *any* modes. */
2224 case NE:
2225 case UNLE:
2226 case UNLT: return CC3 | CC1 | CC2;
2227 default: return -1;
2228 }
2229
2230 case E_CCVFANYmode:
2231 switch (GET_CODE (code))
2232 {
2233 case EQ:
2234 case GT:
2235 case GE: return CC0 | CC1;
2236 /* The inverted modes are in fact *all* modes. */
2237 case NE:
2238 case UNLE:
2239 case UNLT: return CC3 | CC2;
2240 default: return -1;
2241 }
2242
2243 case E_CCRAWmode:
2244 switch (GET_CODE (code))
2245 {
2246 case EQ:
2247 return INTVAL (XEXP (code, 1));
2248 case NE:
2249 return (INTVAL (XEXP (code, 1))) ^ 0xf;
2250 default:
2251 gcc_unreachable ();
2252 }
2253
2254 default:
2255 return -1;
2256 }
2257 }
2258
2259
2260 /* Return branch condition mask to implement a compare and branch
2261 specified by CODE. Return -1 for invalid comparisons. */
2262
2263 int
2264 s390_compare_and_branch_condition_mask (rtx code)
2265 {
2266 const int CC0 = 1 << 3;
2267 const int CC1 = 1 << 2;
2268 const int CC2 = 1 << 1;
2269
2270 switch (GET_CODE (code))
2271 {
2272 case EQ:
2273 return CC0;
2274 case NE:
2275 return CC1 | CC2;
2276 case LT:
2277 case LTU:
2278 return CC1;
2279 case GT:
2280 case GTU:
2281 return CC2;
2282 case LE:
2283 case LEU:
2284 return CC0 | CC1;
2285 case GE:
2286 case GEU:
2287 return CC0 | CC2;
2288 default:
2289 gcc_unreachable ();
2290 }
2291 return -1;
2292 }
2293
2294 /* If INV is false, return assembler mnemonic string to implement
2295 a branch specified by CODE. If INV is true, return mnemonic
2296 for the corresponding inverted branch. */
2297
2298 static const char *
2299 s390_branch_condition_mnemonic (rtx code, int inv)
2300 {
2301 int mask;
2302
2303 static const char *const mnemonic[16] =
2304 {
2305 NULL, "o", "h", "nle",
2306 "l", "nhe", "lh", "ne",
2307 "e", "nlh", "he", "nl",
2308 "le", "nh", "no", NULL
2309 };
2310
2311 if (GET_CODE (XEXP (code, 0)) == REG
2312 && REGNO (XEXP (code, 0)) == CC_REGNUM
2313 && (XEXP (code, 1) == const0_rtx
2314 || (GET_MODE (XEXP (code, 0)) == CCRAWmode
2315 && CONST_INT_P (XEXP (code, 1)))))
2316 mask = s390_branch_condition_mask (code);
2317 else
2318 mask = s390_compare_and_branch_condition_mask (code);
2319
2320 gcc_assert (mask >= 0);
2321
2322 if (inv)
2323 mask ^= 15;
2324
2325 gcc_assert (mask >= 1 && mask <= 14);
2326
2327 return mnemonic[mask];
2328 }
2329
2330 /* Return the part of op which has a value different from def.
2331 The size of the part is determined by mode.
2332 Use this function only if you already know that op really
2333 contains such a part. */
2334
2335 unsigned HOST_WIDE_INT
2336 s390_extract_part (rtx op, machine_mode mode, int def)
2337 {
2338 unsigned HOST_WIDE_INT value = 0;
2339 int max_parts = HOST_BITS_PER_WIDE_INT / GET_MODE_BITSIZE (mode);
2340 int part_bits = GET_MODE_BITSIZE (mode);
2341 unsigned HOST_WIDE_INT part_mask = (HOST_WIDE_INT_1U << part_bits) - 1;
2342 int i;
2343
2344 for (i = 0; i < max_parts; i++)
2345 {
2346 if (i == 0)
2347 value = UINTVAL (op);
2348 else
2349 value >>= part_bits;
2350
2351 if ((value & part_mask) != (def & part_mask))
2352 return value & part_mask;
2353 }
2354
2355 gcc_unreachable ();
2356 }
2357
2358 /* If OP is an integer constant of mode MODE with exactly one
2359 part of mode PART_MODE unequal to DEF, return the number of that
2360 part. Otherwise, return -1. */
2361
2362 int
2363 s390_single_part (rtx op,
2364 machine_mode mode,
2365 machine_mode part_mode,
2366 int def)
2367 {
2368 unsigned HOST_WIDE_INT value = 0;
2369 int n_parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (part_mode);
2370 unsigned HOST_WIDE_INT part_mask
2371 = (HOST_WIDE_INT_1U << GET_MODE_BITSIZE (part_mode)) - 1;
2372 int i, part = -1;
2373
2374 if (GET_CODE (op) != CONST_INT)
2375 return -1;
2376
2377 for (i = 0; i < n_parts; i++)
2378 {
2379 if (i == 0)
2380 value = UINTVAL (op);
2381 else
2382 value >>= GET_MODE_BITSIZE (part_mode);
2383
2384 if ((value & part_mask) != (def & part_mask))
2385 {
2386 if (part != -1)
2387 return -1;
2388 else
2389 part = i;
2390 }
2391 }
2392 return part == -1 ? -1 : n_parts - 1 - part;
2393 }
2394
2395 /* Return true if IN contains a contiguous bitfield in the lower SIZE
2396 bits and no other bits are set in (the lower SIZE bits of) IN.
2397
2398 PSTART and PEND can be used to obtain the start and end
2399 position (inclusive) of the bitfield relative to 64
2400 bits. *PSTART / *PEND gives the position of the first/last bit
2401 of the bitfield counting from the highest order bit starting
2402 with zero. */
2403
2404 bool
2405 s390_contiguous_bitmask_nowrap_p (unsigned HOST_WIDE_INT in, int size,
2406 int *pstart, int *pend)
2407 {
2408 int start;
2409 int end = -1;
2410 int lowbit = HOST_BITS_PER_WIDE_INT - 1;
2411 int highbit = HOST_BITS_PER_WIDE_INT - size;
2412 unsigned HOST_WIDE_INT bitmask = HOST_WIDE_INT_1U;
2413
2414 gcc_assert (!!pstart == !!pend);
2415 for (start = lowbit; start >= highbit; bitmask <<= 1, start--)
2416 if (end == -1)
2417 {
2418 /* Look for the rightmost bit of a contiguous range of ones. */
2419 if (bitmask & in)
2420 /* Found it. */
2421 end = start;
2422 }
2423 else
2424 {
2425 /* Look for the firt zero bit after the range of ones. */
2426 if (! (bitmask & in))
2427 /* Found it. */
2428 break;
2429 }
2430 /* We're one past the last one-bit. */
2431 start++;
2432
2433 if (end == -1)
2434 /* No one bits found. */
2435 return false;
2436
2437 if (start > highbit)
2438 {
2439 unsigned HOST_WIDE_INT mask;
2440
2441 /* Calculate a mask for all bits beyond the contiguous bits. */
2442 mask = ((~HOST_WIDE_INT_0U >> highbit)
2443 & (~HOST_WIDE_INT_0U << (lowbit - start + 1)));
2444 if (mask & in)
2445 /* There are more bits set beyond the first range of one bits. */
2446 return false;
2447 }
2448
2449 if (pstart)
2450 {
2451 *pstart = start;
2452 *pend = end;
2453 }
2454
2455 return true;
2456 }
2457
2458 /* Same as s390_contiguous_bitmask_nowrap_p but also returns true
2459 if ~IN contains a contiguous bitfield. In that case, *END is <
2460 *START.
2461
2462 If WRAP_P is true, a bitmask that wraps around is also tested.
2463 When a wraparoud occurs *START is greater than *END (in
2464 non-null pointers), and the uppermost (64 - SIZE) bits are thus
2465 part of the range. If WRAP_P is false, no wraparound is
2466 tested. */
2467
2468 bool
2469 s390_contiguous_bitmask_p (unsigned HOST_WIDE_INT in, bool wrap_p,
2470 int size, int *start, int *end)
2471 {
2472 int bs = HOST_BITS_PER_WIDE_INT;
2473 bool b;
2474
2475 gcc_assert (!!start == !!end);
2476 if ((in & ((~HOST_WIDE_INT_0U) >> (bs - size))) == 0)
2477 /* This cannot be expressed as a contiguous bitmask. Exit early because
2478 the second call of s390_contiguous_bitmask_nowrap_p would accept this as
2479 a valid bitmask. */
2480 return false;
2481 b = s390_contiguous_bitmask_nowrap_p (in, size, start, end);
2482 if (b)
2483 return true;
2484 if (! wrap_p)
2485 return false;
2486 b = s390_contiguous_bitmask_nowrap_p (~in, size, start, end);
2487 if (b && start)
2488 {
2489 int s = *start;
2490 int e = *end;
2491
2492 gcc_assert (s >= 1);
2493 *start = ((e + 1) & (bs - 1));
2494 *end = ((s - 1 + bs) & (bs - 1));
2495 }
2496
2497 return b;
2498 }
2499
2500 /* Return true if OP contains the same contiguous bitfield in *all*
2501 its elements. START and END can be used to obtain the start and
2502 end position of the bitfield.
2503
2504 START/STOP give the position of the first/last bit of the bitfield
2505 counting from the lowest order bit starting with zero. In order to
2506 use these values for S/390 instructions this has to be converted to
2507 "bits big endian" style. */
2508
2509 bool
2510 s390_contiguous_bitmask_vector_p (rtx op, int *start, int *end)
2511 {
2512 unsigned HOST_WIDE_INT mask;
2513 int size;
2514 rtx elt;
2515 bool b;
2516
2517 /* Handle floats by bitcasting them to ints. */
2518 op = gen_lowpart (related_int_vector_mode (GET_MODE (op)).require (), op);
2519
2520 gcc_assert (!!start == !!end);
2521 if (!const_vec_duplicate_p (op, &elt)
2522 || !CONST_INT_P (elt))
2523 return false;
2524
2525 size = GET_MODE_UNIT_BITSIZE (GET_MODE (op));
2526
2527 /* We cannot deal with V1TI/V1TF. This would require a vgmq. */
2528 if (size > 64)
2529 return false;
2530
2531 mask = UINTVAL (elt);
2532
2533 b = s390_contiguous_bitmask_p (mask, true, size, start, end);
2534 if (b)
2535 {
2536 if (start)
2537 {
2538 *start -= (HOST_BITS_PER_WIDE_INT - size);
2539 *end -= (HOST_BITS_PER_WIDE_INT - size);
2540 }
2541 return true;
2542 }
2543 else
2544 return false;
2545 }
2546
2547 /* Return true if C consists only of byte chunks being either 0 or
2548 0xff. If MASK is !=NULL a byte mask is generated which is
2549 appropriate for the vector generate byte mask instruction. */
2550
2551 bool
2552 s390_bytemask_vector_p (rtx op, unsigned *mask)
2553 {
2554 int i;
2555 unsigned tmp_mask = 0;
2556 int nunit, unit_size;
2557
2558 if (!VECTOR_MODE_P (GET_MODE (op))
2559 || GET_CODE (op) != CONST_VECTOR
2560 || !CONST_INT_P (XVECEXP (op, 0, 0)))
2561 return false;
2562
2563 nunit = GET_MODE_NUNITS (GET_MODE (op));
2564 unit_size = GET_MODE_UNIT_SIZE (GET_MODE (op));
2565
2566 for (i = 0; i < nunit; i++)
2567 {
2568 unsigned HOST_WIDE_INT c;
2569 int j;
2570
2571 if (!CONST_INT_P (XVECEXP (op, 0, i)))
2572 return false;
2573
2574 c = UINTVAL (XVECEXP (op, 0, i));
2575 for (j = 0; j < unit_size; j++)
2576 {
2577 if ((c & 0xff) != 0 && (c & 0xff) != 0xff)
2578 return false;
2579 tmp_mask |= (c & 1) << ((nunit - 1 - i) * unit_size + j);
2580 c = c >> BITS_PER_UNIT;
2581 }
2582 }
2583
2584 if (mask != NULL)
2585 *mask = tmp_mask;
2586
2587 return true;
2588 }
2589
2590 /* Check whether a rotate of ROTL followed by an AND of CONTIG is
2591 equivalent to a shift followed by the AND. In particular, CONTIG
2592 should not overlap the (rotated) bit 0/bit 63 gap. Negative values
2593 for ROTL indicate a rotate to the right. */
2594
2595 bool
2596 s390_extzv_shift_ok (int bitsize, int rotl, unsigned HOST_WIDE_INT contig)
2597 {
2598 int start, end;
2599 bool ok;
2600
2601 ok = s390_contiguous_bitmask_nowrap_p (contig, bitsize, &start, &end);
2602 gcc_assert (ok);
2603
2604 if (rotl >= 0)
2605 return (64 - end >= rotl);
2606 else
2607 {
2608 /* Translate "- rotate right" in BITSIZE mode to "rotate left" in
2609 DIMode. */
2610 rotl = -rotl + (64 - bitsize);
2611 return (start >= rotl);
2612 }
2613 }
2614
2615 /* Check whether we can (and want to) split a double-word
2616 move in mode MODE from SRC to DST into two single-word
2617 moves, moving the subword FIRST_SUBWORD first. */
2618
2619 bool
2620 s390_split_ok_p (rtx dst, rtx src, machine_mode mode, int first_subword)
2621 {
2622 /* Floating point and vector registers cannot be split. */
2623 if (FP_REG_P (src) || FP_REG_P (dst) || VECTOR_REG_P (src) || VECTOR_REG_P (dst))
2624 return false;
2625
2626 /* Non-offsettable memory references cannot be split. */
2627 if ((GET_CODE (src) == MEM && !offsettable_memref_p (src))
2628 || (GET_CODE (dst) == MEM && !offsettable_memref_p (dst)))
2629 return false;
2630
2631 /* Moving the first subword must not clobber a register
2632 needed to move the second subword. */
2633 if (register_operand (dst, mode))
2634 {
2635 rtx subreg = operand_subword (dst, first_subword, 0, mode);
2636 if (reg_overlap_mentioned_p (subreg, src))
2637 return false;
2638 }
2639
2640 return true;
2641 }
2642
2643 /* Return true if it can be proven that [MEM1, MEM1 + SIZE]
2644 and [MEM2, MEM2 + SIZE] do overlap and false
2645 otherwise. */
2646
2647 bool
2648 s390_overlap_p (rtx mem1, rtx mem2, HOST_WIDE_INT size)
2649 {
2650 rtx addr1, addr2, addr_delta;
2651 HOST_WIDE_INT delta;
2652
2653 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
2654 return true;
2655
2656 if (size == 0)
2657 return false;
2658
2659 addr1 = XEXP (mem1, 0);
2660 addr2 = XEXP (mem2, 0);
2661
2662 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
2663
2664 /* This overlapping check is used by peepholes merging memory block operations.
2665 Overlapping operations would otherwise be recognized by the S/390 hardware
2666 and would fall back to a slower implementation. Allowing overlapping
2667 operations would lead to slow code but not to wrong code. Therefore we are
2668 somewhat optimistic if we cannot prove that the memory blocks are
2669 overlapping.
2670 That's why we return false here although this may accept operations on
2671 overlapping memory areas. */
2672 if (!addr_delta || GET_CODE (addr_delta) != CONST_INT)
2673 return false;
2674
2675 delta = INTVAL (addr_delta);
2676
2677 if (delta == 0
2678 || (delta > 0 && delta < size)
2679 || (delta < 0 && -delta < size))
2680 return true;
2681
2682 return false;
2683 }
2684
2685 /* Check whether the address of memory reference MEM2 equals exactly
2686 the address of memory reference MEM1 plus DELTA. Return true if
2687 we can prove this to be the case, false otherwise. */
2688
2689 bool
2690 s390_offset_p (rtx mem1, rtx mem2, rtx delta)
2691 {
2692 rtx addr1, addr2, addr_delta;
2693
2694 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
2695 return false;
2696
2697 addr1 = XEXP (mem1, 0);
2698 addr2 = XEXP (mem2, 0);
2699
2700 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
2701 if (!addr_delta || !rtx_equal_p (addr_delta, delta))
2702 return false;
2703
2704 return true;
2705 }
2706
2707 /* Expand logical operator CODE in mode MODE with operands OPERANDS. */
2708
2709 void
2710 s390_expand_logical_operator (enum rtx_code code, machine_mode mode,
2711 rtx *operands)
2712 {
2713 machine_mode wmode = mode;
2714 rtx dst = operands[0];
2715 rtx src1 = operands[1];
2716 rtx src2 = operands[2];
2717 rtx op, clob, tem;
2718
2719 /* If we cannot handle the operation directly, use a temp register. */
2720 if (!s390_logical_operator_ok_p (operands))
2721 dst = gen_reg_rtx (mode);
2722
2723 /* QImode and HImode patterns make sense only if we have a destination
2724 in memory. Otherwise perform the operation in SImode. */
2725 if ((mode == QImode || mode == HImode) && GET_CODE (dst) != MEM)
2726 wmode = SImode;
2727
2728 /* Widen operands if required. */
2729 if (mode != wmode)
2730 {
2731 if (GET_CODE (dst) == SUBREG
2732 && (tem = simplify_subreg (wmode, dst, mode, 0)) != 0)
2733 dst = tem;
2734 else if (REG_P (dst))
2735 dst = gen_rtx_SUBREG (wmode, dst, 0);
2736 else
2737 dst = gen_reg_rtx (wmode);
2738
2739 if (GET_CODE (src1) == SUBREG
2740 && (tem = simplify_subreg (wmode, src1, mode, 0)) != 0)
2741 src1 = tem;
2742 else if (GET_MODE (src1) != VOIDmode)
2743 src1 = gen_rtx_SUBREG (wmode, force_reg (mode, src1), 0);
2744
2745 if (GET_CODE (src2) == SUBREG
2746 && (tem = simplify_subreg (wmode, src2, mode, 0)) != 0)
2747 src2 = tem;
2748 else if (GET_MODE (src2) != VOIDmode)
2749 src2 = gen_rtx_SUBREG (wmode, force_reg (mode, src2), 0);
2750 }
2751
2752 /* Emit the instruction. */
2753 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, wmode, src1, src2));
2754 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
2755 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
2756
2757 /* Fix up the destination if needed. */
2758 if (dst != operands[0])
2759 emit_move_insn (operands[0], gen_lowpart (mode, dst));
2760 }
2761
2762 /* Check whether OPERANDS are OK for a logical operation (AND, IOR, XOR). */
2763
2764 bool
2765 s390_logical_operator_ok_p (rtx *operands)
2766 {
2767 /* If the destination operand is in memory, it needs to coincide
2768 with one of the source operands. After reload, it has to be
2769 the first source operand. */
2770 if (GET_CODE (operands[0]) == MEM)
2771 return rtx_equal_p (operands[0], operands[1])
2772 || (!reload_completed && rtx_equal_p (operands[0], operands[2]));
2773
2774 return true;
2775 }
2776
2777 /* Narrow logical operation CODE of memory operand MEMOP with immediate
2778 operand IMMOP to switch from SS to SI type instructions. */
2779
2780 void
2781 s390_narrow_logical_operator (enum rtx_code code, rtx *memop, rtx *immop)
2782 {
2783 int def = code == AND ? -1 : 0;
2784 HOST_WIDE_INT mask;
2785 int part;
2786
2787 gcc_assert (GET_CODE (*memop) == MEM);
2788 gcc_assert (!MEM_VOLATILE_P (*memop));
2789
2790 mask = s390_extract_part (*immop, QImode, def);
2791 part = s390_single_part (*immop, GET_MODE (*memop), QImode, def);
2792 gcc_assert (part >= 0);
2793
2794 *memop = adjust_address (*memop, QImode, part);
2795 *immop = gen_int_mode (mask, QImode);
2796 }
2797
2798
2799 /* How to allocate a 'struct machine_function'. */
2800
2801 static struct machine_function *
2802 s390_init_machine_status (void)
2803 {
2804 return ggc_cleared_alloc<machine_function> ();
2805 }
2806
2807 /* Map for smallest class containing reg regno. */
2808
2809 const enum reg_class regclass_map[FIRST_PSEUDO_REGISTER] =
2810 { GENERAL_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 0 */
2811 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 4 */
2812 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 8 */
2813 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 12 */
2814 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 16 */
2815 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 20 */
2816 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 24 */
2817 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 28 */
2818 ADDR_REGS, CC_REGS, ADDR_REGS, ADDR_REGS, /* 32 */
2819 ACCESS_REGS, ACCESS_REGS, VEC_REGS, VEC_REGS, /* 36 */
2820 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 40 */
2821 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 44 */
2822 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 48 */
2823 VEC_REGS, VEC_REGS /* 52 */
2824 };
2825
2826 /* Return attribute type of insn. */
2827
2828 static enum attr_type
2829 s390_safe_attr_type (rtx_insn *insn)
2830 {
2831 if (recog_memoized (insn) >= 0)
2832 return get_attr_type (insn);
2833 else
2834 return TYPE_NONE;
2835 }
2836
2837 /* Return attribute relative_long of insn. */
2838
2839 static bool
2840 s390_safe_relative_long_p (rtx_insn *insn)
2841 {
2842 if (recog_memoized (insn) >= 0)
2843 return get_attr_relative_long (insn) == RELATIVE_LONG_YES;
2844 else
2845 return false;
2846 }
2847
2848 /* Return true if DISP is a valid short displacement. */
2849
2850 static bool
2851 s390_short_displacement (rtx disp)
2852 {
2853 /* No displacement is OK. */
2854 if (!disp)
2855 return true;
2856
2857 /* Without the long displacement facility we don't need to
2858 distingiush between long and short displacement. */
2859 if (!TARGET_LONG_DISPLACEMENT)
2860 return true;
2861
2862 /* Integer displacement in range. */
2863 if (GET_CODE (disp) == CONST_INT)
2864 return INTVAL (disp) >= 0 && INTVAL (disp) < 4096;
2865
2866 /* GOT offset is not OK, the GOT can be large. */
2867 if (GET_CODE (disp) == CONST
2868 && GET_CODE (XEXP (disp, 0)) == UNSPEC
2869 && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOT
2870 || XINT (XEXP (disp, 0), 1) == UNSPEC_GOTNTPOFF))
2871 return false;
2872
2873 /* All other symbolic constants are literal pool references,
2874 which are OK as the literal pool must be small. */
2875 if (GET_CODE (disp) == CONST)
2876 return true;
2877
2878 return false;
2879 }
2880
2881 /* Attempts to split `ref', which should be UNSPEC_LTREF, into (base + `disp').
2882 If successful, also determines the
2883 following characteristics of `ref': `is_ptr' - whether it can be an
2884 LA argument, `is_base_ptr' - whether the resulting base is a well-known
2885 base register (stack/frame pointer, etc), `is_pool_ptr` - whether it is
2886 considered a literal pool pointer for purposes of avoiding two different
2887 literal pool pointers per insn during or after reload (`B' constraint). */
2888 static bool
2889 s390_decompose_constant_pool_ref (rtx *ref, rtx *disp, bool *is_ptr,
2890 bool *is_base_ptr, bool *is_pool_ptr)
2891 {
2892 if (!*ref)
2893 return true;
2894
2895 if (GET_CODE (*ref) == UNSPEC)
2896 switch (XINT (*ref, 1))
2897 {
2898 case UNSPEC_LTREF:
2899 if (!*disp)
2900 *disp = gen_rtx_UNSPEC (Pmode,
2901 gen_rtvec (1, XVECEXP (*ref, 0, 0)),
2902 UNSPEC_LTREL_OFFSET);
2903 else
2904 return false;
2905
2906 *ref = XVECEXP (*ref, 0, 1);
2907 break;
2908
2909 default:
2910 return false;
2911 }
2912
2913 if (!REG_P (*ref) || GET_MODE (*ref) != Pmode)
2914 return false;
2915
2916 if (REGNO (*ref) == STACK_POINTER_REGNUM
2917 || REGNO (*ref) == FRAME_POINTER_REGNUM
2918 || ((reload_completed || reload_in_progress)
2919 && frame_pointer_needed
2920 && REGNO (*ref) == HARD_FRAME_POINTER_REGNUM)
2921 || REGNO (*ref) == ARG_POINTER_REGNUM
2922 || (flag_pic
2923 && REGNO (*ref) == PIC_OFFSET_TABLE_REGNUM))
2924 *is_ptr = *is_base_ptr = true;
2925
2926 if ((reload_completed || reload_in_progress)
2927 && *ref == cfun->machine->base_reg)
2928 *is_ptr = *is_base_ptr = *is_pool_ptr = true;
2929
2930 return true;
2931 }
2932
2933 /* Decompose a RTL expression ADDR for a memory address into
2934 its components, returned in OUT.
2935
2936 Returns false if ADDR is not a valid memory address, true
2937 otherwise. If OUT is NULL, don't return the components,
2938 but check for validity only.
2939
2940 Note: Only addresses in canonical form are recognized.
2941 LEGITIMIZE_ADDRESS should convert non-canonical forms to the
2942 canonical form so that they will be recognized. */
2943
2944 static int
2945 s390_decompose_address (rtx addr, struct s390_address *out)
2946 {
2947 HOST_WIDE_INT offset = 0;
2948 rtx base = NULL_RTX;
2949 rtx indx = NULL_RTX;
2950 rtx disp = NULL_RTX;
2951 rtx orig_disp;
2952 bool pointer = false;
2953 bool base_ptr = false;
2954 bool indx_ptr = false;
2955 bool literal_pool = false;
2956
2957 /* We may need to substitute the literal pool base register into the address
2958 below. However, at this point we do not know which register is going to
2959 be used as base, so we substitute the arg pointer register. This is going
2960 to be treated as holding a pointer below -- it shouldn't be used for any
2961 other purpose. */
2962 rtx fake_pool_base = gen_rtx_REG (Pmode, ARG_POINTER_REGNUM);
2963
2964 /* Decompose address into base + index + displacement. */
2965
2966 if (GET_CODE (addr) == REG || GET_CODE (addr) == UNSPEC)
2967 base = addr;
2968
2969 else if (GET_CODE (addr) == PLUS)
2970 {
2971 rtx op0 = XEXP (addr, 0);
2972 rtx op1 = XEXP (addr, 1);
2973 enum rtx_code code0 = GET_CODE (op0);
2974 enum rtx_code code1 = GET_CODE (op1);
2975
2976 if (code0 == REG || code0 == UNSPEC)
2977 {
2978 if (code1 == REG || code1 == UNSPEC)
2979 {
2980 indx = op0; /* index + base */
2981 base = op1;
2982 }
2983
2984 else
2985 {
2986 base = op0; /* base + displacement */
2987 disp = op1;
2988 }
2989 }
2990
2991 else if (code0 == PLUS)
2992 {
2993 indx = XEXP (op0, 0); /* index + base + disp */
2994 base = XEXP (op0, 1);
2995 disp = op1;
2996 }
2997
2998 else
2999 {
3000 return false;
3001 }
3002 }
3003
3004 else
3005 disp = addr; /* displacement */
3006
3007 /* Extract integer part of displacement. */
3008 orig_disp = disp;
3009 if (disp)
3010 {
3011 if (GET_CODE (disp) == CONST_INT)
3012 {
3013 offset = INTVAL (disp);
3014 disp = NULL_RTX;
3015 }
3016 else if (GET_CODE (disp) == CONST
3017 && GET_CODE (XEXP (disp, 0)) == PLUS
3018 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
3019 {
3020 offset = INTVAL (XEXP (XEXP (disp, 0), 1));
3021 disp = XEXP (XEXP (disp, 0), 0);
3022 }
3023 }
3024
3025 /* Strip off CONST here to avoid special case tests later. */
3026 if (disp && GET_CODE (disp) == CONST)
3027 disp = XEXP (disp, 0);
3028
3029 /* We can convert literal pool addresses to
3030 displacements by basing them off the base register. */
3031 if (disp && GET_CODE (disp) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (disp))
3032 {
3033 if (base || indx)
3034 return false;
3035
3036 base = fake_pool_base, literal_pool = true;
3037
3038 /* Mark up the displacement. */
3039 disp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, disp),
3040 UNSPEC_LTREL_OFFSET);
3041 }
3042
3043 /* Validate base register. */
3044 if (!s390_decompose_constant_pool_ref (&base, &disp, &pointer, &base_ptr,
3045 &literal_pool))
3046 return false;
3047
3048 /* Validate index register. */
3049 if (!s390_decompose_constant_pool_ref (&indx, &disp, &pointer, &indx_ptr,
3050 &literal_pool))
3051 return false;
3052
3053 /* Prefer to use pointer as base, not index. */
3054 if (base && indx && !base_ptr
3055 && (indx_ptr || (!REG_POINTER (base) && REG_POINTER (indx))))
3056 {
3057 rtx tmp = base;
3058 base = indx;
3059 indx = tmp;
3060 }
3061
3062 /* Validate displacement. */
3063 if (!disp)
3064 {
3065 /* If virtual registers are involved, the displacement will change later
3066 anyway as the virtual registers get eliminated. This could make a
3067 valid displacement invalid, but it is more likely to make an invalid
3068 displacement valid, because we sometimes access the register save area
3069 via negative offsets to one of those registers.
3070 Thus we don't check the displacement for validity here. If after
3071 elimination the displacement turns out to be invalid after all,
3072 this is fixed up by reload in any case. */
3073 /* LRA maintains always displacements up to date and we need to
3074 know the displacement is right during all LRA not only at the
3075 final elimination. */
3076 if (lra_in_progress
3077 || (base != arg_pointer_rtx
3078 && indx != arg_pointer_rtx
3079 && base != return_address_pointer_rtx
3080 && indx != return_address_pointer_rtx
3081 && base != frame_pointer_rtx
3082 && indx != frame_pointer_rtx
3083 && base != virtual_stack_vars_rtx
3084 && indx != virtual_stack_vars_rtx))
3085 if (!DISP_IN_RANGE (offset))
3086 return false;
3087 }
3088 else
3089 {
3090 /* All the special cases are pointers. */
3091 pointer = true;
3092
3093 /* In the small-PIC case, the linker converts @GOT
3094 and @GOTNTPOFF offsets to possible displacements. */
3095 if (GET_CODE (disp) == UNSPEC
3096 && (XINT (disp, 1) == UNSPEC_GOT
3097 || XINT (disp, 1) == UNSPEC_GOTNTPOFF)
3098 && flag_pic == 1)
3099 {
3100 ;
3101 }
3102
3103 /* Accept pool label offsets. */
3104 else if (GET_CODE (disp) == UNSPEC
3105 && XINT (disp, 1) == UNSPEC_POOL_OFFSET)
3106 ;
3107
3108 /* Accept literal pool references. */
3109 else if (GET_CODE (disp) == UNSPEC
3110 && XINT (disp, 1) == UNSPEC_LTREL_OFFSET)
3111 {
3112 /* In case CSE pulled a non literal pool reference out of
3113 the pool we have to reject the address. This is
3114 especially important when loading the GOT pointer on non
3115 zarch CPUs. In this case the literal pool contains an lt
3116 relative offset to the _GLOBAL_OFFSET_TABLE_ label which
3117 will most likely exceed the displacement. */
3118 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
3119 || !CONSTANT_POOL_ADDRESS_P (XVECEXP (disp, 0, 0)))
3120 return false;
3121
3122 orig_disp = gen_rtx_CONST (Pmode, disp);
3123 if (offset)
3124 {
3125 /* If we have an offset, make sure it does not
3126 exceed the size of the constant pool entry.
3127 Otherwise we might generate an out-of-range
3128 displacement for the base register form. */
3129 rtx sym = XVECEXP (disp, 0, 0);
3130 if (offset >= GET_MODE_SIZE (get_pool_mode (sym)))
3131 return false;
3132
3133 orig_disp = plus_constant (Pmode, orig_disp, offset);
3134 }
3135 }
3136
3137 else
3138 return false;
3139 }
3140
3141 if (!base && !indx)
3142 pointer = true;
3143
3144 if (out)
3145 {
3146 out->base = base;
3147 out->indx = indx;
3148 out->disp = orig_disp;
3149 out->pointer = pointer;
3150 out->literal_pool = literal_pool;
3151 }
3152
3153 return true;
3154 }
3155
3156 /* Decompose a RTL expression OP for an address style operand into its
3157 components, and return the base register in BASE and the offset in
3158 OFFSET. While OP looks like an address it is never supposed to be
3159 used as such.
3160
3161 Return true if OP is a valid address operand, false if not. */
3162
3163 bool
3164 s390_decompose_addrstyle_without_index (rtx op, rtx *base,
3165 HOST_WIDE_INT *offset)
3166 {
3167 rtx off = NULL_RTX;
3168
3169 /* We can have an integer constant, an address register,
3170 or a sum of the two. */
3171 if (CONST_SCALAR_INT_P (op))
3172 {
3173 off = op;
3174 op = NULL_RTX;
3175 }
3176 if (op && GET_CODE (op) == PLUS && CONST_SCALAR_INT_P (XEXP (op, 1)))
3177 {
3178 off = XEXP (op, 1);
3179 op = XEXP (op, 0);
3180 }
3181 while (op && GET_CODE (op) == SUBREG)
3182 op = SUBREG_REG (op);
3183
3184 if (op && GET_CODE (op) != REG)
3185 return false;
3186
3187 if (offset)
3188 {
3189 if (off == NULL_RTX)
3190 *offset = 0;
3191 else if (CONST_INT_P (off))
3192 *offset = INTVAL (off);
3193 else if (CONST_WIDE_INT_P (off))
3194 /* The offset will anyway be cut down to 12 bits so take just
3195 the lowest order chunk of the wide int. */
3196 *offset = CONST_WIDE_INT_ELT (off, 0);
3197 else
3198 gcc_unreachable ();
3199 }
3200 if (base)
3201 *base = op;
3202
3203 return true;
3204 }
3205
3206 /* Check that OP is a valid shift count operand.
3207 It should be of the following structure:
3208 (subreg (and (plus (reg imm_op)) 2^k-1) 7)
3209 where subreg, and and plus are optional.
3210
3211 If IMPLICIT_MASK is > 0 and OP contains and
3212 (AND ... immediate)
3213 it is checked whether IMPLICIT_MASK and the immediate match.
3214 Otherwise, no checking is performed.
3215 */
3216 bool
3217 s390_valid_shift_count (rtx op, HOST_WIDE_INT implicit_mask)
3218 {
3219 /* Strip subreg. */
3220 while (GET_CODE (op) == SUBREG && subreg_lowpart_p (op))
3221 op = XEXP (op, 0);
3222
3223 /* Check for an and with proper constant. */
3224 if (GET_CODE (op) == AND)
3225 {
3226 rtx op1 = XEXP (op, 0);
3227 rtx imm = XEXP (op, 1);
3228
3229 if (GET_CODE (op1) == SUBREG && subreg_lowpart_p (op1))
3230 op1 = XEXP (op1, 0);
3231
3232 if (!(register_operand (op1, GET_MODE (op1)) || GET_CODE (op1) == PLUS))
3233 return false;
3234
3235 if (!immediate_operand (imm, GET_MODE (imm)))
3236 return false;
3237
3238 HOST_WIDE_INT val = INTVAL (imm);
3239 if (implicit_mask > 0
3240 && (val & implicit_mask) != implicit_mask)
3241 return false;
3242
3243 op = op1;
3244 }
3245
3246 /* Check the rest. */
3247 return s390_decompose_addrstyle_without_index (op, NULL, NULL);
3248 }
3249
3250 /* Return true if CODE is a valid address without index. */
3251
3252 bool
3253 s390_legitimate_address_without_index_p (rtx op)
3254 {
3255 struct s390_address addr;
3256
3257 if (!s390_decompose_address (XEXP (op, 0), &addr))
3258 return false;
3259 if (addr.indx)
3260 return false;
3261
3262 return true;
3263 }
3264
3265
3266 /* Return TRUE if ADDR is an operand valid for a load/store relative
3267 instruction. Be aware that the alignment of the operand needs to
3268 be checked separately.
3269 Valid addresses are single references or a sum of a reference and a
3270 constant integer. Return these parts in SYMREF and ADDEND. You can
3271 pass NULL in REF and/or ADDEND if you are not interested in these
3272 values. */
3273
3274 static bool
3275 s390_loadrelative_operand_p (rtx addr, rtx *symref, HOST_WIDE_INT *addend)
3276 {
3277 HOST_WIDE_INT tmpaddend = 0;
3278
3279 if (GET_CODE (addr) == CONST)
3280 addr = XEXP (addr, 0);
3281
3282 if (GET_CODE (addr) == PLUS)
3283 {
3284 if (!CONST_INT_P (XEXP (addr, 1)))
3285 return false;
3286
3287 tmpaddend = INTVAL (XEXP (addr, 1));
3288 addr = XEXP (addr, 0);
3289 }
3290
3291 if (GET_CODE (addr) == SYMBOL_REF
3292 || (GET_CODE (addr) == UNSPEC
3293 && (XINT (addr, 1) == UNSPEC_GOTENT
3294 || XINT (addr, 1) == UNSPEC_PLT31)))
3295 {
3296 if (symref)
3297 *symref = addr;
3298 if (addend)
3299 *addend = tmpaddend;
3300
3301 return true;
3302 }
3303 return false;
3304 }
3305
3306 /* Return true if the address in OP is valid for constraint letter C
3307 if wrapped in a MEM rtx. Set LIT_POOL_OK to true if it literal
3308 pool MEMs should be accepted. Only the Q, R, S, T constraint
3309 letters are allowed for C. */
3310
3311 static int
3312 s390_check_qrst_address (char c, rtx op, bool lit_pool_ok)
3313 {
3314 rtx symref;
3315 struct s390_address addr;
3316 bool decomposed = false;
3317
3318 if (!address_operand (op, GET_MODE (op)))
3319 return 0;
3320
3321 /* This check makes sure that no symbolic address (except literal
3322 pool references) are accepted by the R or T constraints. */
3323 if (s390_loadrelative_operand_p (op, &symref, NULL)
3324 && (!lit_pool_ok
3325 || !SYMBOL_REF_P (symref)
3326 || !CONSTANT_POOL_ADDRESS_P (symref)))
3327 return 0;
3328
3329 /* Ensure literal pool references are only accepted if LIT_POOL_OK. */
3330 if (!lit_pool_ok)
3331 {
3332 if (!s390_decompose_address (op, &addr))
3333 return 0;
3334 if (addr.literal_pool)
3335 return 0;
3336 decomposed = true;
3337 }
3338
3339 /* With reload, we sometimes get intermediate address forms that are
3340 actually invalid as-is, but we need to accept them in the most
3341 generic cases below ('R' or 'T'), since reload will in fact fix
3342 them up. LRA behaves differently here; we never see such forms,
3343 but on the other hand, we need to strictly reject every invalid
3344 address form. After both reload and LRA invalid address forms
3345 must be rejected, because nothing will fix them up later. Perform
3346 this check right up front. */
3347 if (lra_in_progress || reload_completed)
3348 {
3349 if (!decomposed && !s390_decompose_address (op, &addr))
3350 return 0;
3351 decomposed = true;
3352 }
3353
3354 switch (c)
3355 {
3356 case 'Q': /* no index short displacement */
3357 if (!decomposed && !s390_decompose_address (op, &addr))
3358 return 0;
3359 if (addr.indx)
3360 return 0;
3361 if (!s390_short_displacement (addr.disp))
3362 return 0;
3363 break;
3364
3365 case 'R': /* with index short displacement */
3366 if (TARGET_LONG_DISPLACEMENT)
3367 {
3368 if (!decomposed && !s390_decompose_address (op, &addr))
3369 return 0;
3370 if (!s390_short_displacement (addr.disp))
3371 return 0;
3372 }
3373 /* Any invalid address here will be fixed up by reload,
3374 so accept it for the most generic constraint. */
3375 break;
3376
3377 case 'S': /* no index long displacement */
3378 if (!decomposed && !s390_decompose_address (op, &addr))
3379 return 0;
3380 if (addr.indx)
3381 return 0;
3382 break;
3383
3384 case 'T': /* with index long displacement */
3385 /* Any invalid address here will be fixed up by reload,
3386 so accept it for the most generic constraint. */
3387 break;
3388
3389 default:
3390 return 0;
3391 }
3392 return 1;
3393 }
3394
3395
3396 /* Evaluates constraint strings described by the regular expression
3397 ([A|B|Z](Q|R|S|T))|Y and returns 1 if OP is a valid operand for
3398 the constraint given in STR, or 0 else. */
3399
3400 int
3401 s390_mem_constraint (const char *str, rtx op)
3402 {
3403 char c = str[0];
3404
3405 switch (c)
3406 {
3407 case 'A':
3408 /* Check for offsettable variants of memory constraints. */
3409 if (!MEM_P (op) || MEM_VOLATILE_P (op))
3410 return 0;
3411 if ((reload_completed || reload_in_progress)
3412 ? !offsettable_memref_p (op) : !offsettable_nonstrict_memref_p (op))
3413 return 0;
3414 return s390_check_qrst_address (str[1], XEXP (op, 0), true);
3415 case 'B':
3416 /* Check for non-literal-pool variants of memory constraints. */
3417 if (!MEM_P (op))
3418 return 0;
3419 return s390_check_qrst_address (str[1], XEXP (op, 0), false);
3420 case 'Q':
3421 case 'R':
3422 case 'S':
3423 case 'T':
3424 if (GET_CODE (op) != MEM)
3425 return 0;
3426 return s390_check_qrst_address (c, XEXP (op, 0), true);
3427 case 'Y':
3428 /* Simply check for the basic form of a shift count. Reload will
3429 take care of making sure we have a proper base register. */
3430 if (!s390_decompose_addrstyle_without_index (op, NULL, NULL))
3431 return 0;
3432 break;
3433 case 'Z':
3434 return s390_check_qrst_address (str[1], op, true);
3435 default:
3436 return 0;
3437 }
3438 return 1;
3439 }
3440
3441
3442 /* Evaluates constraint strings starting with letter O. Input
3443 parameter C is the second letter following the "O" in the constraint
3444 string. Returns 1 if VALUE meets the respective constraint and 0
3445 otherwise. */
3446
3447 int
3448 s390_O_constraint_str (const char c, HOST_WIDE_INT value)
3449 {
3450 if (!TARGET_EXTIMM)
3451 return 0;
3452
3453 switch (c)
3454 {
3455 case 's':
3456 return trunc_int_for_mode (value, SImode) == value;
3457
3458 case 'p':
3459 return value == 0
3460 || s390_single_part (GEN_INT (value), DImode, SImode, 0) == 1;
3461
3462 case 'n':
3463 return s390_single_part (GEN_INT (value - 1), DImode, SImode, -1) == 1;
3464
3465 default:
3466 gcc_unreachable ();
3467 }
3468 }
3469
3470
3471 /* Evaluates constraint strings starting with letter N. Parameter STR
3472 contains the letters following letter "N" in the constraint string.
3473 Returns true if VALUE matches the constraint. */
3474
3475 int
3476 s390_N_constraint_str (const char *str, HOST_WIDE_INT value)
3477 {
3478 machine_mode mode, part_mode;
3479 int def;
3480 int part, part_goal;
3481
3482
3483 if (str[0] == 'x')
3484 part_goal = -1;
3485 else
3486 part_goal = str[0] - '0';
3487
3488 switch (str[1])
3489 {
3490 case 'Q':
3491 part_mode = QImode;
3492 break;
3493 case 'H':
3494 part_mode = HImode;
3495 break;
3496 case 'S':
3497 part_mode = SImode;
3498 break;
3499 default:
3500 return 0;
3501 }
3502
3503 switch (str[2])
3504 {
3505 case 'H':
3506 mode = HImode;
3507 break;
3508 case 'S':
3509 mode = SImode;
3510 break;
3511 case 'D':
3512 mode = DImode;
3513 break;
3514 default:
3515 return 0;
3516 }
3517
3518 switch (str[3])
3519 {
3520 case '0':
3521 def = 0;
3522 break;
3523 case 'F':
3524 def = -1;
3525 break;
3526 default:
3527 return 0;
3528 }
3529
3530 if (GET_MODE_SIZE (mode) <= GET_MODE_SIZE (part_mode))
3531 return 0;
3532
3533 part = s390_single_part (GEN_INT (value), mode, part_mode, def);
3534 if (part < 0)
3535 return 0;
3536 if (part_goal != -1 && part_goal != part)
3537 return 0;
3538
3539 return 1;
3540 }
3541
3542
3543 /* Returns true if the input parameter VALUE is a float zero. */
3544
3545 int
3546 s390_float_const_zero_p (rtx value)
3547 {
3548 return (GET_MODE_CLASS (GET_MODE (value)) == MODE_FLOAT
3549 && value == CONST0_RTX (GET_MODE (value)));
3550 }
3551
3552 /* Implement TARGET_REGISTER_MOVE_COST. */
3553
3554 static int
3555 s390_register_move_cost (machine_mode mode,
3556 reg_class_t from, reg_class_t to)
3557 {
3558 /* On s390, copy between fprs and gprs is expensive. */
3559
3560 /* It becomes somewhat faster having ldgr/lgdr. */
3561 if (TARGET_Z10 && GET_MODE_SIZE (mode) == 8)
3562 {
3563 /* ldgr is single cycle. */
3564 if (reg_classes_intersect_p (from, GENERAL_REGS)
3565 && reg_classes_intersect_p (to, FP_REGS))
3566 return 1;
3567 /* lgdr needs 3 cycles. */
3568 if (reg_classes_intersect_p (to, GENERAL_REGS)
3569 && reg_classes_intersect_p (from, FP_REGS))
3570 return 3;
3571 }
3572
3573 /* Otherwise copying is done via memory. */
3574 if ((reg_classes_intersect_p (from, GENERAL_REGS)
3575 && reg_classes_intersect_p (to, FP_REGS))
3576 || (reg_classes_intersect_p (from, FP_REGS)
3577 && reg_classes_intersect_p (to, GENERAL_REGS)))
3578 return 10;
3579
3580 /* We usually do not want to copy via CC. */
3581 if (reg_classes_intersect_p (from, CC_REGS)
3582 || reg_classes_intersect_p (to, CC_REGS))
3583 return 5;
3584
3585 return 1;
3586 }
3587
3588 /* Implement TARGET_MEMORY_MOVE_COST. */
3589
3590 static int
3591 s390_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
3592 reg_class_t rclass ATTRIBUTE_UNUSED,
3593 bool in ATTRIBUTE_UNUSED)
3594 {
3595 return 2;
3596 }
3597
3598 /* Compute a (partial) cost for rtx X. Return true if the complete
3599 cost has been computed, and false if subexpressions should be
3600 scanned. In either case, *TOTAL contains the cost result. The
3601 initial value of *TOTAL is the default value computed by
3602 rtx_cost. It may be left unmodified. OUTER_CODE contains the
3603 code of the superexpression of x. */
3604
3605 static bool
3606 s390_rtx_costs (rtx x, machine_mode mode, int outer_code,
3607 int opno ATTRIBUTE_UNUSED,
3608 int *total, bool speed ATTRIBUTE_UNUSED)
3609 {
3610 int code = GET_CODE (x);
3611 switch (code)
3612 {
3613 case CONST:
3614 case CONST_INT:
3615 case LABEL_REF:
3616 case SYMBOL_REF:
3617 case CONST_DOUBLE:
3618 case CONST_WIDE_INT:
3619 case MEM:
3620 *total = 0;
3621 return true;
3622
3623 case SET:
3624 {
3625 /* Without this a conditional move instruction would be
3626 accounted as 3 * COSTS_N_INSNS (set, if_then_else,
3627 comparison operator). That's a bit pessimistic. */
3628
3629 if (!TARGET_Z196 || GET_CODE (SET_SRC (x)) != IF_THEN_ELSE)
3630 return false;
3631
3632 rtx cond = XEXP (SET_SRC (x), 0);
3633
3634 if (!CC_REG_P (XEXP (cond, 0)) || !CONST_INT_P (XEXP (cond, 1)))
3635 return false;
3636
3637 /* It is going to be a load/store on condition. Make it
3638 slightly more expensive than a normal load. */
3639 *total = COSTS_N_INSNS (1) + 1;
3640
3641 rtx dst = SET_DEST (x);
3642 rtx then = XEXP (SET_SRC (x), 1);
3643 rtx els = XEXP (SET_SRC (x), 2);
3644
3645 /* It is a real IF-THEN-ELSE. An additional move will be
3646 needed to implement that. */
3647 if (!TARGET_Z15
3648 && reload_completed
3649 && !rtx_equal_p (dst, then)
3650 && !rtx_equal_p (dst, els))
3651 *total += COSTS_N_INSNS (1) / 2;
3652
3653 /* A minor penalty for constants we cannot directly handle. */
3654 if ((CONST_INT_P (then) || CONST_INT_P (els))
3655 && (!TARGET_Z13 || MEM_P (dst)
3656 || (CONST_INT_P (then) && !satisfies_constraint_K (then))
3657 || (CONST_INT_P (els) && !satisfies_constraint_K (els))))
3658 *total += COSTS_N_INSNS (1) / 2;
3659
3660 /* A store on condition can only handle register src operands. */
3661 if (MEM_P (dst) && (!REG_P (then) || !REG_P (els)))
3662 *total += COSTS_N_INSNS (1) / 2;
3663
3664 return true;
3665 }
3666 case IOR:
3667
3668 /* nnrk, nngrk */
3669 if (TARGET_Z15
3670 && (mode == SImode || mode == DImode)
3671 && GET_CODE (XEXP (x, 0)) == NOT
3672 && GET_CODE (XEXP (x, 1)) == NOT)
3673 {
3674 *total = COSTS_N_INSNS (1);
3675 if (!REG_P (XEXP (XEXP (x, 0), 0)))
3676 *total += 1;
3677 if (!REG_P (XEXP (XEXP (x, 1), 0)))
3678 *total += 1;
3679 return true;
3680 }
3681
3682 /* risbg */
3683 if (GET_CODE (XEXP (x, 0)) == AND
3684 && GET_CODE (XEXP (x, 1)) == ASHIFT
3685 && REG_P (XEXP (XEXP (x, 0), 0))
3686 && REG_P (XEXP (XEXP (x, 1), 0))
3687 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3688 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3689 && (UINTVAL (XEXP (XEXP (x, 0), 1)) ==
3690 (HOST_WIDE_INT_1U << UINTVAL (XEXP (XEXP (x, 1), 1))) - 1))
3691 {
3692 *total = COSTS_N_INSNS (2);
3693 return true;
3694 }
3695
3696 /* ~AND on a 128 bit mode. This can be done using a vector
3697 instruction. */
3698 if (TARGET_VXE
3699 && GET_CODE (XEXP (x, 0)) == NOT
3700 && GET_CODE (XEXP (x, 1)) == NOT
3701 && REG_P (XEXP (XEXP (x, 0), 0))
3702 && REG_P (XEXP (XEXP (x, 1), 0))
3703 && GET_MODE_SIZE (GET_MODE (XEXP (XEXP (x, 0), 0))) == 16
3704 && s390_hard_regno_mode_ok (VR0_REGNUM,
3705 GET_MODE (XEXP (XEXP (x, 0), 0))))
3706 {
3707 *total = COSTS_N_INSNS (1);
3708 return true;
3709 }
3710
3711 *total = COSTS_N_INSNS (1);
3712 return false;
3713
3714 case AND:
3715 /* nork, nogrk */
3716 if (TARGET_Z15
3717 && (mode == SImode || mode == DImode)
3718 && GET_CODE (XEXP (x, 0)) == NOT
3719 && GET_CODE (XEXP (x, 1)) == NOT)
3720 {
3721 *total = COSTS_N_INSNS (1);
3722 if (!REG_P (XEXP (XEXP (x, 0), 0)))
3723 *total += 1;
3724 if (!REG_P (XEXP (XEXP (x, 1), 0)))
3725 *total += 1;
3726 return true;
3727 }
3728 /* fallthrough */
3729 case ASHIFT:
3730 case ASHIFTRT:
3731 case LSHIFTRT:
3732 case ROTATE:
3733 case ROTATERT:
3734 case XOR:
3735 case NEG:
3736 case NOT:
3737 case PLUS:
3738 case MINUS:
3739 *total = COSTS_N_INSNS (1);
3740 return false;
3741
3742 case MULT:
3743 switch (mode)
3744 {
3745 case E_SImode:
3746 {
3747 rtx left = XEXP (x, 0);
3748 rtx right = XEXP (x, 1);
3749 if (GET_CODE (right) == CONST_INT
3750 && CONST_OK_FOR_K (INTVAL (right)))
3751 *total = s390_cost->mhi;
3752 else if (GET_CODE (left) == SIGN_EXTEND)
3753 *total = s390_cost->mh;
3754 else
3755 *total = s390_cost->ms; /* msr, ms, msy */
3756 break;
3757 }
3758 case E_DImode:
3759 {
3760 rtx left = XEXP (x, 0);
3761 rtx right = XEXP (x, 1);
3762 if (TARGET_ZARCH)
3763 {
3764 if (GET_CODE (right) == CONST_INT
3765 && CONST_OK_FOR_K (INTVAL (right)))
3766 *total = s390_cost->mghi;
3767 else if (GET_CODE (left) == SIGN_EXTEND)
3768 *total = s390_cost->msgf;
3769 else
3770 *total = s390_cost->msg; /* msgr, msg */
3771 }
3772 else /* TARGET_31BIT */
3773 {
3774 if (GET_CODE (left) == SIGN_EXTEND
3775 && GET_CODE (right) == SIGN_EXTEND)
3776 /* mulsidi case: mr, m */
3777 *total = s390_cost->m;
3778 else if (GET_CODE (left) == ZERO_EXTEND
3779 && GET_CODE (right) == ZERO_EXTEND)
3780 /* umulsidi case: ml, mlr */
3781 *total = s390_cost->ml;
3782 else
3783 /* Complex calculation is required. */
3784 *total = COSTS_N_INSNS (40);
3785 }
3786 break;
3787 }
3788 case E_SFmode:
3789 case E_DFmode:
3790 *total = s390_cost->mult_df;
3791 break;
3792 case E_TFmode:
3793 *total = s390_cost->mxbr;
3794 break;
3795 default:
3796 return false;
3797 }
3798 return false;
3799
3800 case FMA:
3801 switch (mode)
3802 {
3803 case E_DFmode:
3804 *total = s390_cost->madbr;
3805 break;
3806 case E_SFmode:
3807 *total = s390_cost->maebr;
3808 break;
3809 default:
3810 return false;
3811 }
3812 /* Negate in the third argument is free: FMSUB. */
3813 if (GET_CODE (XEXP (x, 2)) == NEG)
3814 {
3815 *total += (rtx_cost (XEXP (x, 0), mode, FMA, 0, speed)
3816 + rtx_cost (XEXP (x, 1), mode, FMA, 1, speed)
3817 + rtx_cost (XEXP (XEXP (x, 2), 0), mode, FMA, 2, speed));
3818 return true;
3819 }
3820 return false;
3821
3822 case UDIV:
3823 case UMOD:
3824 if (mode == TImode) /* 128 bit division */
3825 *total = s390_cost->dlgr;
3826 else if (mode == DImode)
3827 {
3828 rtx right = XEXP (x, 1);
3829 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
3830 *total = s390_cost->dlr;
3831 else /* 64 by 64 bit division */
3832 *total = s390_cost->dlgr;
3833 }
3834 else if (mode == SImode) /* 32 bit division */
3835 *total = s390_cost->dlr;
3836 return false;
3837
3838 case DIV:
3839 case MOD:
3840 if (mode == DImode)
3841 {
3842 rtx right = XEXP (x, 1);
3843 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
3844 if (TARGET_ZARCH)
3845 *total = s390_cost->dsgfr;
3846 else
3847 *total = s390_cost->dr;
3848 else /* 64 by 64 bit division */
3849 *total = s390_cost->dsgr;
3850 }
3851 else if (mode == SImode) /* 32 bit division */
3852 *total = s390_cost->dlr;
3853 else if (mode == SFmode)
3854 {
3855 *total = s390_cost->debr;
3856 }
3857 else if (mode == DFmode)
3858 {
3859 *total = s390_cost->ddbr;
3860 }
3861 else if (mode == TFmode)
3862 {
3863 *total = s390_cost->dxbr;
3864 }
3865 return false;
3866
3867 case SQRT:
3868 if (mode == SFmode)
3869 *total = s390_cost->sqebr;
3870 else if (mode == DFmode)
3871 *total = s390_cost->sqdbr;
3872 else /* TFmode */
3873 *total = s390_cost->sqxbr;
3874 return false;
3875
3876 case SIGN_EXTEND:
3877 case ZERO_EXTEND:
3878 if (outer_code == MULT || outer_code == DIV || outer_code == MOD
3879 || outer_code == PLUS || outer_code == MINUS
3880 || outer_code == COMPARE)
3881 *total = 0;
3882 return false;
3883
3884 case COMPARE:
3885 *total = COSTS_N_INSNS (1);
3886
3887 /* nxrk, nxgrk ~(a^b)==0 */
3888 if (TARGET_Z15
3889 && GET_CODE (XEXP (x, 0)) == NOT
3890 && XEXP (x, 1) == const0_rtx
3891 && GET_CODE (XEXP (XEXP (x, 0), 0)) == XOR
3892 && (GET_MODE (XEXP (x, 0)) == SImode || GET_MODE (XEXP (x, 0)) == DImode)
3893 && mode == CCZmode)
3894 {
3895 if (!REG_P (XEXP (XEXP (XEXP (x, 0), 0), 0)))
3896 *total += 1;
3897 if (!REG_P (XEXP (XEXP (XEXP (x, 0), 0), 1)))
3898 *total += 1;
3899 return true;
3900 }
3901
3902 /* nnrk, nngrk, nork, nogrk */
3903 if (TARGET_Z15
3904 && (GET_CODE (XEXP (x, 0)) == AND || GET_CODE (XEXP (x, 0)) == IOR)
3905 && XEXP (x, 1) == const0_rtx
3906 && (GET_MODE (XEXP (x, 0)) == SImode || GET_MODE (XEXP (x, 0)) == DImode)
3907 && GET_CODE (XEXP (XEXP (x, 0), 0)) == NOT
3908 && GET_CODE (XEXP (XEXP (x, 0), 1)) == NOT
3909 && mode == CCZmode)
3910 {
3911 if (!REG_P (XEXP (XEXP (XEXP (x, 0), 0), 0)))
3912 *total += 1;
3913 if (!REG_P (XEXP (XEXP (XEXP (x, 0), 1), 0)))
3914 *total += 1;
3915 return true;
3916 }
3917
3918 if (GET_CODE (XEXP (x, 0)) == AND
3919 && GET_CODE (XEXP (x, 1)) == CONST_INT
3920 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
3921 {
3922 rtx op0 = XEXP (XEXP (x, 0), 0);
3923 rtx op1 = XEXP (XEXP (x, 0), 1);
3924 rtx op2 = XEXP (x, 1);
3925
3926 if (memory_operand (op0, GET_MODE (op0))
3927 && s390_tm_ccmode (op1, op2, 0) != VOIDmode)
3928 return true;
3929 if (register_operand (op0, GET_MODE (op0))
3930 && s390_tm_ccmode (op1, op2, 1) != VOIDmode)
3931 return true;
3932 }
3933 return false;
3934
3935 default:
3936 return false;
3937 }
3938 }
3939
3940 /* Return the cost of an address rtx ADDR. */
3941
3942 static int
3943 s390_address_cost (rtx addr, machine_mode mode ATTRIBUTE_UNUSED,
3944 addr_space_t as ATTRIBUTE_UNUSED,
3945 bool speed ATTRIBUTE_UNUSED)
3946 {
3947 struct s390_address ad;
3948 if (!s390_decompose_address (addr, &ad))
3949 return 1000;
3950
3951 return ad.indx? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (1);
3952 }
3953
3954 /* Implement targetm.vectorize.builtin_vectorization_cost. */
3955 static int
3956 s390_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
3957 tree vectype,
3958 int misalign ATTRIBUTE_UNUSED)
3959 {
3960 switch (type_of_cost)
3961 {
3962 case scalar_stmt:
3963 case scalar_load:
3964 case scalar_store:
3965 case vector_stmt:
3966 case vector_load:
3967 case vector_store:
3968 case vector_gather_load:
3969 case vector_scatter_store:
3970 case vec_to_scalar:
3971 case scalar_to_vec:
3972 case cond_branch_not_taken:
3973 case vec_perm:
3974 case vec_promote_demote:
3975 case unaligned_load:
3976 case unaligned_store:
3977 return 1;
3978
3979 case cond_branch_taken:
3980 return 3;
3981
3982 case vec_construct:
3983 return TYPE_VECTOR_SUBPARTS (vectype) - 1;
3984
3985 default:
3986 gcc_unreachable ();
3987 }
3988 }
3989
3990 /* If OP is a SYMBOL_REF of a thread-local symbol, return its TLS mode,
3991 otherwise return 0. */
3992
3993 int
3994 tls_symbolic_operand (rtx op)
3995 {
3996 if (GET_CODE (op) != SYMBOL_REF)
3997 return 0;
3998 return SYMBOL_REF_TLS_MODEL (op);
3999 }
4000 \f
4001 /* Split DImode access register reference REG (on 64-bit) into its constituent
4002 low and high parts, and store them into LO and HI. Note that gen_lowpart/
4003 gen_highpart cannot be used as they assume all registers are word-sized,
4004 while our access registers have only half that size. */
4005
4006 void
4007 s390_split_access_reg (rtx reg, rtx *lo, rtx *hi)
4008 {
4009 gcc_assert (TARGET_64BIT);
4010 gcc_assert (ACCESS_REG_P (reg));
4011 gcc_assert (GET_MODE (reg) == DImode);
4012 gcc_assert (!(REGNO (reg) & 1));
4013
4014 *lo = gen_rtx_REG (SImode, REGNO (reg) + 1);
4015 *hi = gen_rtx_REG (SImode, REGNO (reg));
4016 }
4017
4018 /* Return true if OP contains a symbol reference */
4019
4020 bool
4021 symbolic_reference_mentioned_p (rtx op)
4022 {
4023 const char *fmt;
4024 int i;
4025
4026 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4027 return 1;
4028
4029 fmt = GET_RTX_FORMAT (GET_CODE (op));
4030 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4031 {
4032 if (fmt[i] == 'E')
4033 {
4034 int j;
4035
4036 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4037 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4038 return 1;
4039 }
4040
4041 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4042 return 1;
4043 }
4044
4045 return 0;
4046 }
4047
4048 /* Return true if OP contains a reference to a thread-local symbol. */
4049
4050 bool
4051 tls_symbolic_reference_mentioned_p (rtx op)
4052 {
4053 const char *fmt;
4054 int i;
4055
4056 if (GET_CODE (op) == SYMBOL_REF)
4057 return tls_symbolic_operand (op);
4058
4059 fmt = GET_RTX_FORMAT (GET_CODE (op));
4060 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4061 {
4062 if (fmt[i] == 'E')
4063 {
4064 int j;
4065
4066 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4067 if (tls_symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4068 return true;
4069 }
4070
4071 else if (fmt[i] == 'e' && tls_symbolic_reference_mentioned_p (XEXP (op, i)))
4072 return true;
4073 }
4074
4075 return false;
4076 }
4077
4078
4079 /* Return true if OP is a legitimate general operand when
4080 generating PIC code. It is given that flag_pic is on
4081 and that OP satisfies CONSTANT_P. */
4082
4083 int
4084 legitimate_pic_operand_p (rtx op)
4085 {
4086 /* Accept all non-symbolic constants. */
4087 if (!SYMBOLIC_CONST (op))
4088 return 1;
4089
4090 /* Accept addresses that can be expressed relative to (pc). */
4091 if (larl_operand (op, VOIDmode))
4092 return 1;
4093
4094 /* Reject everything else; must be handled
4095 via emit_symbolic_move. */
4096 return 0;
4097 }
4098
4099 /* Returns true if the constant value OP is a legitimate general operand.
4100 It is given that OP satisfies CONSTANT_P. */
4101
4102 static bool
4103 s390_legitimate_constant_p (machine_mode mode, rtx op)
4104 {
4105 if (TARGET_VX && VECTOR_MODE_P (mode) && GET_CODE (op) == CONST_VECTOR)
4106 {
4107 if (GET_MODE_SIZE (mode) != 16)
4108 return 0;
4109
4110 if (!satisfies_constraint_j00 (op)
4111 && !satisfies_constraint_jm1 (op)
4112 && !satisfies_constraint_jKK (op)
4113 && !satisfies_constraint_jxx (op)
4114 && !satisfies_constraint_jyy (op))
4115 return 0;
4116 }
4117
4118 /* Accept all non-symbolic constants. */
4119 if (!SYMBOLIC_CONST (op))
4120 return 1;
4121
4122 /* Accept immediate LARL operands. */
4123 if (larl_operand (op, mode))
4124 return 1;
4125
4126 /* Thread-local symbols are never legal constants. This is
4127 so that emit_call knows that computing such addresses
4128 might require a function call. */
4129 if (TLS_SYMBOLIC_CONST (op))
4130 return 0;
4131
4132 /* In the PIC case, symbolic constants must *not* be
4133 forced into the literal pool. We accept them here,
4134 so that they will be handled by emit_symbolic_move. */
4135 if (flag_pic)
4136 return 1;
4137
4138 /* All remaining non-PIC symbolic constants are
4139 forced into the literal pool. */
4140 return 0;
4141 }
4142
4143 /* Determine if it's legal to put X into the constant pool. This
4144 is not possible if X contains the address of a symbol that is
4145 not constant (TLS) or not known at final link time (PIC). */
4146
4147 static bool
4148 s390_cannot_force_const_mem (machine_mode mode, rtx x)
4149 {
4150 switch (GET_CODE (x))
4151 {
4152 case CONST_INT:
4153 case CONST_DOUBLE:
4154 case CONST_WIDE_INT:
4155 case CONST_VECTOR:
4156 /* Accept all non-symbolic constants. */
4157 return false;
4158
4159 case NEG:
4160 /* Accept an unary '-' only on scalar numeric constants. */
4161 switch (GET_CODE (XEXP (x, 0)))
4162 {
4163 case CONST_INT:
4164 case CONST_DOUBLE:
4165 case CONST_WIDE_INT:
4166 return false;
4167 default:
4168 return true;
4169 }
4170
4171 case LABEL_REF:
4172 /* Labels are OK iff we are non-PIC. */
4173 return flag_pic != 0;
4174
4175 case SYMBOL_REF:
4176 /* 'Naked' TLS symbol references are never OK,
4177 non-TLS symbols are OK iff we are non-PIC. */
4178 if (tls_symbolic_operand (x))
4179 return true;
4180 else
4181 return flag_pic != 0;
4182
4183 case CONST:
4184 return s390_cannot_force_const_mem (mode, XEXP (x, 0));
4185 case PLUS:
4186 case MINUS:
4187 return s390_cannot_force_const_mem (mode, XEXP (x, 0))
4188 || s390_cannot_force_const_mem (mode, XEXP (x, 1));
4189
4190 case UNSPEC:
4191 switch (XINT (x, 1))
4192 {
4193 /* Only lt-relative or GOT-relative UNSPECs are OK. */
4194 case UNSPEC_LTREL_OFFSET:
4195 case UNSPEC_GOT:
4196 case UNSPEC_GOTOFF:
4197 case UNSPEC_PLTOFF:
4198 case UNSPEC_TLSGD:
4199 case UNSPEC_TLSLDM:
4200 case UNSPEC_NTPOFF:
4201 case UNSPEC_DTPOFF:
4202 case UNSPEC_GOTNTPOFF:
4203 case UNSPEC_INDNTPOFF:
4204 return false;
4205
4206 /* If the literal pool shares the code section, be put
4207 execute template placeholders into the pool as well. */
4208 case UNSPEC_INSN:
4209 default:
4210 return true;
4211 }
4212 break;
4213
4214 default:
4215 gcc_unreachable ();
4216 }
4217 }
4218
4219 /* Returns true if the constant value OP is a legitimate general
4220 operand during and after reload. The difference to
4221 legitimate_constant_p is that this function will not accept
4222 a constant that would need to be forced to the literal pool
4223 before it can be used as operand.
4224 This function accepts all constants which can be loaded directly
4225 into a GPR. */
4226
4227 bool
4228 legitimate_reload_constant_p (rtx op)
4229 {
4230 /* Accept la(y) operands. */
4231 if (GET_CODE (op) == CONST_INT
4232 && DISP_IN_RANGE (INTVAL (op)))
4233 return true;
4234
4235 /* Accept l(g)hi/l(g)fi operands. */
4236 if (GET_CODE (op) == CONST_INT
4237 && (CONST_OK_FOR_K (INTVAL (op)) || CONST_OK_FOR_Os (INTVAL (op))))
4238 return true;
4239
4240 /* Accept lliXX operands. */
4241 if (TARGET_ZARCH
4242 && GET_CODE (op) == CONST_INT
4243 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
4244 && s390_single_part (op, word_mode, HImode, 0) >= 0)
4245 return true;
4246
4247 if (TARGET_EXTIMM
4248 && GET_CODE (op) == CONST_INT
4249 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
4250 && s390_single_part (op, word_mode, SImode, 0) >= 0)
4251 return true;
4252
4253 /* Accept larl operands. */
4254 if (larl_operand (op, VOIDmode))
4255 return true;
4256
4257 /* Accept floating-point zero operands that fit into a single GPR. */
4258 if (GET_CODE (op) == CONST_DOUBLE
4259 && s390_float_const_zero_p (op)
4260 && GET_MODE_SIZE (GET_MODE (op)) <= UNITS_PER_WORD)
4261 return true;
4262
4263 /* Accept double-word operands that can be split. */
4264 if (GET_CODE (op) == CONST_WIDE_INT
4265 || (GET_CODE (op) == CONST_INT
4266 && trunc_int_for_mode (INTVAL (op), word_mode) != INTVAL (op)))
4267 {
4268 machine_mode dword_mode = word_mode == SImode ? DImode : TImode;
4269 rtx hi = operand_subword (op, 0, 0, dword_mode);
4270 rtx lo = operand_subword (op, 1, 0, dword_mode);
4271 return legitimate_reload_constant_p (hi)
4272 && legitimate_reload_constant_p (lo);
4273 }
4274
4275 /* Everything else cannot be handled without reload. */
4276 return false;
4277 }
4278
4279 /* Returns true if the constant value OP is a legitimate fp operand
4280 during and after reload.
4281 This function accepts all constants which can be loaded directly
4282 into an FPR. */
4283
4284 static bool
4285 legitimate_reload_fp_constant_p (rtx op)
4286 {
4287 /* Accept floating-point zero operands if the load zero instruction
4288 can be used. Prior to z196 the load fp zero instruction caused a
4289 performance penalty if the result is used as BFP number. */
4290 if (TARGET_Z196
4291 && GET_CODE (op) == CONST_DOUBLE
4292 && s390_float_const_zero_p (op))
4293 return true;
4294
4295 return false;
4296 }
4297
4298 /* Returns true if the constant value OP is a legitimate vector operand
4299 during and after reload.
4300 This function accepts all constants which can be loaded directly
4301 into an VR. */
4302
4303 static bool
4304 legitimate_reload_vector_constant_p (rtx op)
4305 {
4306 if (TARGET_VX && GET_MODE_SIZE (GET_MODE (op)) == 16
4307 && (satisfies_constraint_j00 (op)
4308 || satisfies_constraint_jm1 (op)
4309 || satisfies_constraint_jKK (op)
4310 || satisfies_constraint_jxx (op)
4311 || satisfies_constraint_jyy (op)))
4312 return true;
4313
4314 return false;
4315 }
4316
4317 /* Given an rtx OP being reloaded into a reg required to be in class RCLASS,
4318 return the class of reg to actually use. */
4319
4320 static reg_class_t
4321 s390_preferred_reload_class (rtx op, reg_class_t rclass)
4322 {
4323 switch (GET_CODE (op))
4324 {
4325 /* Constants we cannot reload into general registers
4326 must be forced into the literal pool. */
4327 case CONST_VECTOR:
4328 case CONST_DOUBLE:
4329 case CONST_INT:
4330 case CONST_WIDE_INT:
4331 if (reg_class_subset_p (GENERAL_REGS, rclass)
4332 && legitimate_reload_constant_p (op))
4333 return GENERAL_REGS;
4334 else if (reg_class_subset_p (ADDR_REGS, rclass)
4335 && legitimate_reload_constant_p (op))
4336 return ADDR_REGS;
4337 else if (reg_class_subset_p (FP_REGS, rclass)
4338 && legitimate_reload_fp_constant_p (op))
4339 return FP_REGS;
4340 else if (reg_class_subset_p (VEC_REGS, rclass)
4341 && legitimate_reload_vector_constant_p (op))
4342 return VEC_REGS;
4343
4344 return NO_REGS;
4345
4346 /* If a symbolic constant or a PLUS is reloaded,
4347 it is most likely being used as an address, so
4348 prefer ADDR_REGS. If 'class' is not a superset
4349 of ADDR_REGS, e.g. FP_REGS, reject this reload. */
4350 case CONST:
4351 /* Symrefs cannot be pushed into the literal pool with -fPIC
4352 so we *MUST NOT* return NO_REGS for these cases
4353 (s390_cannot_force_const_mem will return true).
4354
4355 On the other hand we MUST return NO_REGS for symrefs with
4356 invalid addend which might have been pushed to the literal
4357 pool (no -fPIC). Usually we would expect them to be
4358 handled via secondary reload but this does not happen if
4359 they are used as literal pool slot replacement in reload
4360 inheritance (see emit_input_reload_insns). */
4361 if (GET_CODE (XEXP (op, 0)) == PLUS
4362 && GET_CODE (XEXP (XEXP(op, 0), 0)) == SYMBOL_REF
4363 && GET_CODE (XEXP (XEXP(op, 0), 1)) == CONST_INT)
4364 {
4365 if (flag_pic && reg_class_subset_p (ADDR_REGS, rclass))
4366 return ADDR_REGS;
4367 else
4368 return NO_REGS;
4369 }
4370 /* fallthrough */
4371 case LABEL_REF:
4372 case SYMBOL_REF:
4373 if (!legitimate_reload_constant_p (op))
4374 return NO_REGS;
4375 /* fallthrough */
4376 case PLUS:
4377 /* load address will be used. */
4378 if (reg_class_subset_p (ADDR_REGS, rclass))
4379 return ADDR_REGS;
4380 else
4381 return NO_REGS;
4382
4383 default:
4384 break;
4385 }
4386
4387 return rclass;
4388 }
4389
4390 /* Return true if ADDR is SYMBOL_REF + addend with addend being a
4391 multiple of ALIGNMENT and the SYMBOL_REF being naturally
4392 aligned. */
4393
4394 bool
4395 s390_check_symref_alignment (rtx addr, HOST_WIDE_INT alignment)
4396 {
4397 HOST_WIDE_INT addend;
4398 rtx symref;
4399
4400 /* The "required alignment" might be 0 (e.g. for certain structs
4401 accessed via BLKmode). Early abort in this case, as well as when
4402 an alignment > 8 is required. */
4403 if (alignment < 2 || alignment > 8)
4404 return false;
4405
4406 if (!s390_loadrelative_operand_p (addr, &symref, &addend))
4407 return false;
4408
4409 if (addend & (alignment - 1))
4410 return false;
4411
4412 if (GET_CODE (symref) == SYMBOL_REF)
4413 {
4414 /* s390_encode_section_info is not called for anchors, since they don't
4415 have corresponding VAR_DECLs. Therefore, we cannot rely on
4416 SYMBOL_FLAG_NOTALIGN{2,4,8}_P returning useful information. */
4417 if (SYMBOL_REF_ANCHOR_P (symref))
4418 {
4419 HOST_WIDE_INT block_offset = SYMBOL_REF_BLOCK_OFFSET (symref);
4420 unsigned int block_alignment = (SYMBOL_REF_BLOCK (symref)->alignment
4421 / BITS_PER_UNIT);
4422
4423 gcc_assert (block_offset >= 0);
4424 return ((block_offset & (alignment - 1)) == 0
4425 && block_alignment >= alignment);
4426 }
4427
4428 /* We have load-relative instructions for 2-byte, 4-byte, and
4429 8-byte alignment so allow only these. */
4430 switch (alignment)
4431 {
4432 case 8: return !SYMBOL_FLAG_NOTALIGN8_P (symref);
4433 case 4: return !SYMBOL_FLAG_NOTALIGN4_P (symref);
4434 case 2: return !SYMBOL_FLAG_NOTALIGN2_P (symref);
4435 default: return false;
4436 }
4437 }
4438
4439 if (GET_CODE (symref) == UNSPEC
4440 && alignment <= UNITS_PER_LONG)
4441 return true;
4442
4443 return false;
4444 }
4445
4446 /* ADDR is moved into REG using larl. If ADDR isn't a valid larl
4447 operand SCRATCH is used to reload the even part of the address and
4448 adding one. */
4449
4450 void
4451 s390_reload_larl_operand (rtx reg, rtx addr, rtx scratch)
4452 {
4453 HOST_WIDE_INT addend;
4454 rtx symref;
4455
4456 if (!s390_loadrelative_operand_p (addr, &symref, &addend))
4457 gcc_unreachable ();
4458
4459 if (!(addend & 1))
4460 /* Easy case. The addend is even so larl will do fine. */
4461 emit_move_insn (reg, addr);
4462 else
4463 {
4464 /* We can leave the scratch register untouched if the target
4465 register is a valid base register. */
4466 if (REGNO (reg) < FIRST_PSEUDO_REGISTER
4467 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS)
4468 scratch = reg;
4469
4470 gcc_assert (REGNO (scratch) < FIRST_PSEUDO_REGISTER);
4471 gcc_assert (REGNO_REG_CLASS (REGNO (scratch)) == ADDR_REGS);
4472
4473 if (addend != 1)
4474 emit_move_insn (scratch,
4475 gen_rtx_CONST (Pmode,
4476 gen_rtx_PLUS (Pmode, symref,
4477 GEN_INT (addend - 1))));
4478 else
4479 emit_move_insn (scratch, symref);
4480
4481 /* Increment the address using la in order to avoid clobbering cc. */
4482 s390_load_address (reg, gen_rtx_PLUS (Pmode, scratch, const1_rtx));
4483 }
4484 }
4485
4486 /* Generate what is necessary to move between REG and MEM using
4487 SCRATCH. The direction is given by TOMEM. */
4488
4489 void
4490 s390_reload_symref_address (rtx reg, rtx mem, rtx scratch, bool tomem)
4491 {
4492 /* Reload might have pulled a constant out of the literal pool.
4493 Force it back in. */
4494 if (CONST_INT_P (mem) || GET_CODE (mem) == CONST_DOUBLE
4495 || GET_CODE (mem) == CONST_WIDE_INT
4496 || GET_CODE (mem) == CONST_VECTOR
4497 || GET_CODE (mem) == CONST)
4498 mem = force_const_mem (GET_MODE (reg), mem);
4499
4500 gcc_assert (MEM_P (mem));
4501
4502 /* For a load from memory we can leave the scratch register
4503 untouched if the target register is a valid base register. */
4504 if (!tomem
4505 && REGNO (reg) < FIRST_PSEUDO_REGISTER
4506 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS
4507 && GET_MODE (reg) == GET_MODE (scratch))
4508 scratch = reg;
4509
4510 /* Load address into scratch register. Since we can't have a
4511 secondary reload for a secondary reload we have to cover the case
4512 where larl would need a secondary reload here as well. */
4513 s390_reload_larl_operand (scratch, XEXP (mem, 0), scratch);
4514
4515 /* Now we can use a standard load/store to do the move. */
4516 if (tomem)
4517 emit_move_insn (replace_equiv_address (mem, scratch), reg);
4518 else
4519 emit_move_insn (reg, replace_equiv_address (mem, scratch));
4520 }
4521
4522 /* Inform reload about cases where moving X with a mode MODE to a register in
4523 RCLASS requires an extra scratch or immediate register. Return the class
4524 needed for the immediate register. */
4525
4526 static reg_class_t
4527 s390_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
4528 machine_mode mode, secondary_reload_info *sri)
4529 {
4530 enum reg_class rclass = (enum reg_class) rclass_i;
4531
4532 /* Intermediate register needed. */
4533 if (reg_classes_intersect_p (CC_REGS, rclass))
4534 return GENERAL_REGS;
4535
4536 if (TARGET_VX)
4537 {
4538 /* The vst/vl vector move instructions allow only for short
4539 displacements. */
4540 if (MEM_P (x)
4541 && GET_CODE (XEXP (x, 0)) == PLUS
4542 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4543 && !SHORT_DISP_IN_RANGE(INTVAL (XEXP (XEXP (x, 0), 1)))
4544 && reg_class_subset_p (rclass, VEC_REGS)
4545 && (!reg_class_subset_p (rclass, FP_REGS)
4546 || (GET_MODE_SIZE (mode) > 8
4547 && s390_class_max_nregs (FP_REGS, mode) == 1)))
4548 {
4549 if (in_p)
4550 sri->icode = (TARGET_64BIT ?
4551 CODE_FOR_reloaddi_la_in :
4552 CODE_FOR_reloadsi_la_in);
4553 else
4554 sri->icode = (TARGET_64BIT ?
4555 CODE_FOR_reloaddi_la_out :
4556 CODE_FOR_reloadsi_la_out);
4557 }
4558 }
4559
4560 if (TARGET_Z10)
4561 {
4562 HOST_WIDE_INT offset;
4563 rtx symref;
4564
4565 /* On z10 several optimizer steps may generate larl operands with
4566 an odd addend. */
4567 if (in_p
4568 && s390_loadrelative_operand_p (x, &symref, &offset)
4569 && mode == Pmode
4570 && !SYMBOL_FLAG_NOTALIGN2_P (symref)
4571 && (offset & 1) == 1)
4572 sri->icode = ((mode == DImode) ? CODE_FOR_reloaddi_larl_odd_addend_z10
4573 : CODE_FOR_reloadsi_larl_odd_addend_z10);
4574
4575 /* Handle all the (mem (symref)) accesses we cannot use the z10
4576 instructions for. */
4577 if (MEM_P (x)
4578 && s390_loadrelative_operand_p (XEXP (x, 0), NULL, NULL)
4579 && (mode == QImode
4580 || !reg_class_subset_p (rclass, GENERAL_REGS)
4581 || GET_MODE_SIZE (mode) > UNITS_PER_WORD
4582 || !s390_check_symref_alignment (XEXP (x, 0),
4583 GET_MODE_SIZE (mode))))
4584 {
4585 #define __SECONDARY_RELOAD_CASE(M,m) \
4586 case E_##M##mode: \
4587 if (TARGET_64BIT) \
4588 sri->icode = in_p ? CODE_FOR_reload##m##di_toreg_z10 : \
4589 CODE_FOR_reload##m##di_tomem_z10; \
4590 else \
4591 sri->icode = in_p ? CODE_FOR_reload##m##si_toreg_z10 : \
4592 CODE_FOR_reload##m##si_tomem_z10; \
4593 break;
4594
4595 switch (GET_MODE (x))
4596 {
4597 __SECONDARY_RELOAD_CASE (QI, qi);
4598 __SECONDARY_RELOAD_CASE (HI, hi);
4599 __SECONDARY_RELOAD_CASE (SI, si);
4600 __SECONDARY_RELOAD_CASE (DI, di);
4601 __SECONDARY_RELOAD_CASE (TI, ti);
4602 __SECONDARY_RELOAD_CASE (SF, sf);
4603 __SECONDARY_RELOAD_CASE (DF, df);
4604 __SECONDARY_RELOAD_CASE (TF, tf);
4605 __SECONDARY_RELOAD_CASE (SD, sd);
4606 __SECONDARY_RELOAD_CASE (DD, dd);
4607 __SECONDARY_RELOAD_CASE (TD, td);
4608 __SECONDARY_RELOAD_CASE (V1QI, v1qi);
4609 __SECONDARY_RELOAD_CASE (V2QI, v2qi);
4610 __SECONDARY_RELOAD_CASE (V4QI, v4qi);
4611 __SECONDARY_RELOAD_CASE (V8QI, v8qi);
4612 __SECONDARY_RELOAD_CASE (V16QI, v16qi);
4613 __SECONDARY_RELOAD_CASE (V1HI, v1hi);
4614 __SECONDARY_RELOAD_CASE (V2HI, v2hi);
4615 __SECONDARY_RELOAD_CASE (V4HI, v4hi);
4616 __SECONDARY_RELOAD_CASE (V8HI, v8hi);
4617 __SECONDARY_RELOAD_CASE (V1SI, v1si);
4618 __SECONDARY_RELOAD_CASE (V2SI, v2si);
4619 __SECONDARY_RELOAD_CASE (V4SI, v4si);
4620 __SECONDARY_RELOAD_CASE (V1DI, v1di);
4621 __SECONDARY_RELOAD_CASE (V2DI, v2di);
4622 __SECONDARY_RELOAD_CASE (V1TI, v1ti);
4623 __SECONDARY_RELOAD_CASE (V1SF, v1sf);
4624 __SECONDARY_RELOAD_CASE (V2SF, v2sf);
4625 __SECONDARY_RELOAD_CASE (V4SF, v4sf);
4626 __SECONDARY_RELOAD_CASE (V1DF, v1df);
4627 __SECONDARY_RELOAD_CASE (V2DF, v2df);
4628 __SECONDARY_RELOAD_CASE (V1TF, v1tf);
4629 default:
4630 gcc_unreachable ();
4631 }
4632 #undef __SECONDARY_RELOAD_CASE
4633 }
4634 }
4635
4636 /* We need a scratch register when loading a PLUS expression which
4637 is not a legitimate operand of the LOAD ADDRESS instruction. */
4638 /* LRA can deal with transformation of plus op very well -- so we
4639 don't need to prompt LRA in this case. */
4640 if (! lra_in_progress && in_p && s390_plus_operand (x, mode))
4641 sri->icode = (TARGET_64BIT ?
4642 CODE_FOR_reloaddi_plus : CODE_FOR_reloadsi_plus);
4643
4644 /* Performing a multiword move from or to memory we have to make sure the
4645 second chunk in memory is addressable without causing a displacement
4646 overflow. If that would be the case we calculate the address in
4647 a scratch register. */
4648 if (MEM_P (x)
4649 && GET_CODE (XEXP (x, 0)) == PLUS
4650 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4651 && !DISP_IN_RANGE (INTVAL (XEXP (XEXP (x, 0), 1))
4652 + GET_MODE_SIZE (mode) - 1))
4653 {
4654 /* For GENERAL_REGS a displacement overflow is no problem if occurring
4655 in a s_operand address since we may fallback to lm/stm. So we only
4656 have to care about overflows in the b+i+d case. */
4657 if ((reg_classes_intersect_p (GENERAL_REGS, rclass)
4658 && s390_class_max_nregs (GENERAL_REGS, mode) > 1
4659 && GET_CODE (XEXP (XEXP (x, 0), 0)) == PLUS)
4660 /* For FP_REGS no lm/stm is available so this check is triggered
4661 for displacement overflows in b+i+d and b+d like addresses. */
4662 || (reg_classes_intersect_p (FP_REGS, rclass)
4663 && s390_class_max_nregs (FP_REGS, mode) > 1))
4664 {
4665 if (in_p)
4666 sri->icode = (TARGET_64BIT ?
4667 CODE_FOR_reloaddi_la_in :
4668 CODE_FOR_reloadsi_la_in);
4669 else
4670 sri->icode = (TARGET_64BIT ?
4671 CODE_FOR_reloaddi_la_out :
4672 CODE_FOR_reloadsi_la_out);
4673 }
4674 }
4675
4676 /* A scratch address register is needed when a symbolic constant is
4677 copied to r0 compiling with -fPIC. In other cases the target
4678 register might be used as temporary (see legitimize_pic_address). */
4679 if (in_p && SYMBOLIC_CONST (x) && flag_pic == 2 && rclass != ADDR_REGS)
4680 sri->icode = (TARGET_64BIT ?
4681 CODE_FOR_reloaddi_PIC_addr :
4682 CODE_FOR_reloadsi_PIC_addr);
4683
4684 /* Either scratch or no register needed. */
4685 return NO_REGS;
4686 }
4687
4688 /* Implement TARGET_SECONDARY_MEMORY_NEEDED.
4689
4690 We need secondary memory to move data between GPRs and FPRs.
4691
4692 - With DFP the ldgr lgdr instructions are available. Due to the
4693 different alignment we cannot use them for SFmode. For 31 bit a
4694 64 bit value in GPR would be a register pair so here we still
4695 need to go via memory.
4696
4697 - With z13 we can do the SF/SImode moves with vlgvf. Due to the
4698 overlapping of FPRs and VRs we still disallow TF/TD modes to be
4699 in full VRs so as before also on z13 we do these moves via
4700 memory.
4701
4702 FIXME: Should we try splitting it into two vlgvg's/vlvg's instead? */
4703
4704 static bool
4705 s390_secondary_memory_needed (machine_mode mode,
4706 reg_class_t class1, reg_class_t class2)
4707 {
4708 return (((reg_classes_intersect_p (class1, VEC_REGS)
4709 && reg_classes_intersect_p (class2, GENERAL_REGS))
4710 || (reg_classes_intersect_p (class1, GENERAL_REGS)
4711 && reg_classes_intersect_p (class2, VEC_REGS)))
4712 && (TARGET_TPF || !TARGET_DFP || !TARGET_64BIT
4713 || GET_MODE_SIZE (mode) != 8)
4714 && (!TARGET_VX || (SCALAR_FLOAT_MODE_P (mode)
4715 && GET_MODE_SIZE (mode) > 8)));
4716 }
4717
4718 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
4719
4720 get_secondary_mem widens its argument to BITS_PER_WORD which loses on 64bit
4721 because the movsi and movsf patterns don't handle r/f moves. */
4722
4723 static machine_mode
4724 s390_secondary_memory_needed_mode (machine_mode mode)
4725 {
4726 if (GET_MODE_BITSIZE (mode) < 32)
4727 return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
4728 return mode;
4729 }
4730
4731 /* Generate code to load SRC, which is PLUS that is not a
4732 legitimate operand for the LA instruction, into TARGET.
4733 SCRATCH may be used as scratch register. */
4734
4735 void
4736 s390_expand_plus_operand (rtx target, rtx src,
4737 rtx scratch)
4738 {
4739 rtx sum1, sum2;
4740 struct s390_address ad;
4741
4742 /* src must be a PLUS; get its two operands. */
4743 gcc_assert (GET_CODE (src) == PLUS);
4744 gcc_assert (GET_MODE (src) == Pmode);
4745
4746 /* Check if any of the two operands is already scheduled
4747 for replacement by reload. This can happen e.g. when
4748 float registers occur in an address. */
4749 sum1 = find_replacement (&XEXP (src, 0));
4750 sum2 = find_replacement (&XEXP (src, 1));
4751 src = gen_rtx_PLUS (Pmode, sum1, sum2);
4752
4753 /* If the address is already strictly valid, there's nothing to do. */
4754 if (!s390_decompose_address (src, &ad)
4755 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
4756 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
4757 {
4758 /* Otherwise, one of the operands cannot be an address register;
4759 we reload its value into the scratch register. */
4760 if (true_regnum (sum1) < 1 || true_regnum (sum1) > 15)
4761 {
4762 emit_move_insn (scratch, sum1);
4763 sum1 = scratch;
4764 }
4765 if (true_regnum (sum2) < 1 || true_regnum (sum2) > 15)
4766 {
4767 emit_move_insn (scratch, sum2);
4768 sum2 = scratch;
4769 }
4770
4771 /* According to the way these invalid addresses are generated
4772 in reload.c, it should never happen (at least on s390) that
4773 *neither* of the PLUS components, after find_replacements
4774 was applied, is an address register. */
4775 if (sum1 == scratch && sum2 == scratch)
4776 {
4777 debug_rtx (src);
4778 gcc_unreachable ();
4779 }
4780
4781 src = gen_rtx_PLUS (Pmode, sum1, sum2);
4782 }
4783
4784 /* Emit the LOAD ADDRESS pattern. Note that reload of PLUS
4785 is only ever performed on addresses, so we can mark the
4786 sum as legitimate for LA in any case. */
4787 s390_load_address (target, src);
4788 }
4789
4790
4791 /* Return true if ADDR is a valid memory address.
4792 STRICT specifies whether strict register checking applies. */
4793
4794 static bool
4795 s390_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
4796 {
4797 struct s390_address ad;
4798
4799 if (TARGET_Z10
4800 && larl_operand (addr, VOIDmode)
4801 && (mode == VOIDmode
4802 || s390_check_symref_alignment (addr, GET_MODE_SIZE (mode))))
4803 return true;
4804
4805 if (!s390_decompose_address (addr, &ad))
4806 return false;
4807
4808 /* The vector memory instructions only support short displacements.
4809 Reject invalid displacements early to prevent plenty of lay
4810 instructions to be generated later which then cannot be merged
4811 properly. */
4812 if (TARGET_VX
4813 && VECTOR_MODE_P (mode)
4814 && ad.disp != NULL_RTX
4815 && CONST_INT_P (ad.disp)
4816 && !SHORT_DISP_IN_RANGE (INTVAL (ad.disp)))
4817 return false;
4818
4819 if (strict)
4820 {
4821 if (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
4822 return false;
4823
4824 if (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx)))
4825 return false;
4826 }
4827 else
4828 {
4829 if (ad.base
4830 && !(REGNO (ad.base) >= FIRST_PSEUDO_REGISTER
4831 || REGNO_REG_CLASS (REGNO (ad.base)) == ADDR_REGS))
4832 return false;
4833
4834 if (ad.indx
4835 && !(REGNO (ad.indx) >= FIRST_PSEUDO_REGISTER
4836 || REGNO_REG_CLASS (REGNO (ad.indx)) == ADDR_REGS))
4837 return false;
4838 }
4839 return true;
4840 }
4841
4842 /* Return true if OP is a valid operand for the LA instruction.
4843 In 31-bit, we need to prove that the result is used as an
4844 address, as LA performs only a 31-bit addition. */
4845
4846 bool
4847 legitimate_la_operand_p (rtx op)
4848 {
4849 struct s390_address addr;
4850 if (!s390_decompose_address (op, &addr))
4851 return false;
4852
4853 return (TARGET_64BIT || addr.pointer);
4854 }
4855
4856 /* Return true if it is valid *and* preferable to use LA to
4857 compute the sum of OP1 and OP2. */
4858
4859 bool
4860 preferred_la_operand_p (rtx op1, rtx op2)
4861 {
4862 struct s390_address addr;
4863
4864 if (op2 != const0_rtx)
4865 op1 = gen_rtx_PLUS (Pmode, op1, op2);
4866
4867 if (!s390_decompose_address (op1, &addr))
4868 return false;
4869 if (addr.base && !REGNO_OK_FOR_BASE_P (REGNO (addr.base)))
4870 return false;
4871 if (addr.indx && !REGNO_OK_FOR_INDEX_P (REGNO (addr.indx)))
4872 return false;
4873
4874 /* Avoid LA instructions with index (and base) register on z196 or
4875 later; it is preferable to use regular add instructions when
4876 possible. Starting with zEC12 the la with index register is
4877 "uncracked" again but still slower than a regular add. */
4878 if (addr.indx && s390_tune >= PROCESSOR_2817_Z196)
4879 return false;
4880
4881 if (!TARGET_64BIT && !addr.pointer)
4882 return false;
4883
4884 if (addr.pointer)
4885 return true;
4886
4887 if ((addr.base && REG_P (addr.base) && REG_POINTER (addr.base))
4888 || (addr.indx && REG_P (addr.indx) && REG_POINTER (addr.indx)))
4889 return true;
4890
4891 return false;
4892 }
4893
4894 /* Emit a forced load-address operation to load SRC into DST.
4895 This will use the LOAD ADDRESS instruction even in situations
4896 where legitimate_la_operand_p (SRC) returns false. */
4897
4898 void
4899 s390_load_address (rtx dst, rtx src)
4900 {
4901 if (TARGET_64BIT)
4902 emit_move_insn (dst, src);
4903 else
4904 emit_insn (gen_force_la_31 (dst, src));
4905 }
4906
4907 /* Return true if it ok to use SYMBOL_REF in a relative address. */
4908
4909 bool
4910 s390_rel_address_ok_p (rtx symbol_ref)
4911 {
4912 tree decl;
4913
4914 if (symbol_ref == s390_got_symbol () || CONSTANT_POOL_ADDRESS_P (symbol_ref))
4915 return true;
4916
4917 decl = SYMBOL_REF_DECL (symbol_ref);
4918
4919 if (!flag_pic || SYMBOL_REF_LOCAL_P (symbol_ref))
4920 return (s390_pic_data_is_text_relative
4921 || (decl
4922 && TREE_CODE (decl) == FUNCTION_DECL));
4923
4924 return false;
4925 }
4926
4927 /* Return a legitimate reference for ORIG (an address) using the
4928 register REG. If REG is 0, a new pseudo is generated.
4929
4930 There are two types of references that must be handled:
4931
4932 1. Global data references must load the address from the GOT, via
4933 the PIC reg. An insn is emitted to do this load, and the reg is
4934 returned.
4935
4936 2. Static data references, constant pool addresses, and code labels
4937 compute the address as an offset from the GOT, whose base is in
4938 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
4939 differentiate them from global data objects. The returned
4940 address is the PIC reg + an unspec constant.
4941
4942 TARGET_LEGITIMIZE_ADDRESS_P rejects symbolic references unless the PIC
4943 reg also appears in the address. */
4944
4945 rtx
4946 legitimize_pic_address (rtx orig, rtx reg)
4947 {
4948 rtx addr = orig;
4949 rtx addend = const0_rtx;
4950 rtx new_rtx = orig;
4951
4952 gcc_assert (!TLS_SYMBOLIC_CONST (addr));
4953
4954 if (GET_CODE (addr) == CONST)
4955 addr = XEXP (addr, 0);
4956
4957 if (GET_CODE (addr) == PLUS)
4958 {
4959 addend = XEXP (addr, 1);
4960 addr = XEXP (addr, 0);
4961 }
4962
4963 if ((GET_CODE (addr) == LABEL_REF
4964 || (SYMBOL_REF_P (addr) && s390_rel_address_ok_p (addr))
4965 || (GET_CODE (addr) == UNSPEC &&
4966 (XINT (addr, 1) == UNSPEC_GOTENT
4967 || XINT (addr, 1) == UNSPEC_PLT31)))
4968 && GET_CODE (addend) == CONST_INT)
4969 {
4970 /* This can be locally addressed. */
4971
4972 /* larl_operand requires UNSPECs to be wrapped in a const rtx. */
4973 rtx const_addr = (GET_CODE (addr) == UNSPEC ?
4974 gen_rtx_CONST (Pmode, addr) : addr);
4975
4976 if (larl_operand (const_addr, VOIDmode)
4977 && INTVAL (addend) < HOST_WIDE_INT_1 << 31
4978 && INTVAL (addend) >= -(HOST_WIDE_INT_1 << 31))
4979 {
4980 if (INTVAL (addend) & 1)
4981 {
4982 /* LARL can't handle odd offsets, so emit a pair of LARL
4983 and LA. */
4984 rtx temp = reg? reg : gen_reg_rtx (Pmode);
4985
4986 if (!DISP_IN_RANGE (INTVAL (addend)))
4987 {
4988 HOST_WIDE_INT even = INTVAL (addend) - 1;
4989 addr = gen_rtx_PLUS (Pmode, addr, GEN_INT (even));
4990 addr = gen_rtx_CONST (Pmode, addr);
4991 addend = const1_rtx;
4992 }
4993
4994 emit_move_insn (temp, addr);
4995 new_rtx = gen_rtx_PLUS (Pmode, temp, addend);
4996
4997 if (reg != 0)
4998 {
4999 s390_load_address (reg, new_rtx);
5000 new_rtx = reg;
5001 }
5002 }
5003 else
5004 {
5005 /* If the offset is even, we can just use LARL. This
5006 will happen automatically. */
5007 }
5008 }
5009 else
5010 {
5011 /* No larl - Access local symbols relative to the GOT. */
5012
5013 rtx temp = reg? reg : gen_reg_rtx (Pmode);
5014
5015 if (reload_in_progress || reload_completed)
5016 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
5017
5018 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
5019 if (addend != const0_rtx)
5020 addr = gen_rtx_PLUS (Pmode, addr, addend);
5021 addr = gen_rtx_CONST (Pmode, addr);
5022 addr = force_const_mem (Pmode, addr);
5023 emit_move_insn (temp, addr);
5024
5025 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
5026 if (reg != 0)
5027 {
5028 s390_load_address (reg, new_rtx);
5029 new_rtx = reg;
5030 }
5031 }
5032 }
5033 else if (GET_CODE (addr) == SYMBOL_REF && addend == const0_rtx)
5034 {
5035 /* A non-local symbol reference without addend.
5036
5037 The symbol ref is wrapped into an UNSPEC to make sure the
5038 proper operand modifier (@GOT or @GOTENT) will be emitted.
5039 This will tell the linker to put the symbol into the GOT.
5040
5041 Additionally the code dereferencing the GOT slot is emitted here.
5042
5043 An addend to the symref needs to be added afterwards.
5044 legitimize_pic_address calls itself recursively to handle
5045 that case. So no need to do it here. */
5046
5047 if (reg == 0)
5048 reg = gen_reg_rtx (Pmode);
5049
5050 if (TARGET_Z10)
5051 {
5052 /* Use load relative if possible.
5053 lgrl <target>, sym@GOTENT */
5054 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
5055 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5056 new_rtx = gen_const_mem (GET_MODE (reg), new_rtx);
5057
5058 emit_move_insn (reg, new_rtx);
5059 new_rtx = reg;
5060 }
5061 else if (flag_pic == 1)
5062 {
5063 /* Assume GOT offset is a valid displacement operand (< 4k
5064 or < 512k with z990). This is handled the same way in
5065 both 31- and 64-bit code (@GOT).
5066 lg <target>, sym@GOT(r12) */
5067
5068 if (reload_in_progress || reload_completed)
5069 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
5070
5071 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
5072 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5073 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
5074 new_rtx = gen_const_mem (Pmode, new_rtx);
5075 emit_move_insn (reg, new_rtx);
5076 new_rtx = reg;
5077 }
5078 else
5079 {
5080 /* If the GOT offset might be >= 4k, we determine the position
5081 of the GOT entry via a PC-relative LARL (@GOTENT).
5082 larl temp, sym@GOTENT
5083 lg <target>, 0(temp) */
5084
5085 rtx temp = reg ? reg : gen_reg_rtx (Pmode);
5086
5087 gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
5088 || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
5089
5090 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
5091 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5092 emit_move_insn (temp, new_rtx);
5093 new_rtx = gen_const_mem (Pmode, temp);
5094 emit_move_insn (reg, new_rtx);
5095
5096 new_rtx = reg;
5097 }
5098 }
5099 else if (GET_CODE (addr) == UNSPEC && GET_CODE (addend) == CONST_INT)
5100 {
5101 gcc_assert (XVECLEN (addr, 0) == 1);
5102 switch (XINT (addr, 1))
5103 {
5104 /* These address symbols (or PLT slots) relative to the GOT
5105 (not GOT slots!). In general this will exceed the
5106 displacement range so these value belong into the literal
5107 pool. */
5108 case UNSPEC_GOTOFF:
5109 case UNSPEC_PLTOFF:
5110 new_rtx = force_const_mem (Pmode, orig);
5111 break;
5112
5113 /* For -fPIC the GOT size might exceed the displacement
5114 range so make sure the value is in the literal pool. */
5115 case UNSPEC_GOT:
5116 if (flag_pic == 2)
5117 new_rtx = force_const_mem (Pmode, orig);
5118 break;
5119
5120 /* For @GOTENT larl is used. This is handled like local
5121 symbol refs. */
5122 case UNSPEC_GOTENT:
5123 gcc_unreachable ();
5124 break;
5125
5126 /* For @PLT larl is used. This is handled like local
5127 symbol refs. */
5128 case UNSPEC_PLT31:
5129 gcc_unreachable ();
5130 break;
5131
5132 /* Everything else cannot happen. */
5133 default:
5134 gcc_unreachable ();
5135 }
5136 }
5137 else if (addend != const0_rtx)
5138 {
5139 /* Otherwise, compute the sum. */
5140
5141 rtx base = legitimize_pic_address (addr, reg);
5142 new_rtx = legitimize_pic_address (addend,
5143 base == reg ? NULL_RTX : reg);
5144 if (GET_CODE (new_rtx) == CONST_INT)
5145 new_rtx = plus_constant (Pmode, base, INTVAL (new_rtx));
5146 else
5147 {
5148 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
5149 {
5150 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
5151 new_rtx = XEXP (new_rtx, 1);
5152 }
5153 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
5154 }
5155
5156 if (GET_CODE (new_rtx) == CONST)
5157 new_rtx = XEXP (new_rtx, 0);
5158 new_rtx = force_operand (new_rtx, 0);
5159 }
5160
5161 return new_rtx;
5162 }
5163
5164 /* Load the thread pointer into a register. */
5165
5166 rtx
5167 s390_get_thread_pointer (void)
5168 {
5169 rtx tp = gen_reg_rtx (Pmode);
5170
5171 emit_insn (gen_get_thread_pointer (Pmode, tp));
5172
5173 mark_reg_pointer (tp, BITS_PER_WORD);
5174
5175 return tp;
5176 }
5177
5178 /* Emit a tls call insn. The call target is the SYMBOL_REF stored
5179 in s390_tls_symbol which always refers to __tls_get_offset.
5180 The returned offset is written to RESULT_REG and an USE rtx is
5181 generated for TLS_CALL. */
5182
5183 static GTY(()) rtx s390_tls_symbol;
5184
5185 static void
5186 s390_emit_tls_call_insn (rtx result_reg, rtx tls_call)
5187 {
5188 rtx insn;
5189
5190 if (!flag_pic)
5191 emit_insn (s390_load_got ());
5192
5193 if (!s390_tls_symbol)
5194 {
5195 s390_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_offset");
5196 SYMBOL_REF_FLAGS (s390_tls_symbol) |= SYMBOL_FLAG_FUNCTION;
5197 }
5198
5199 insn = s390_emit_call (s390_tls_symbol, tls_call, result_reg,
5200 gen_rtx_REG (Pmode, RETURN_REGNUM));
5201
5202 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), result_reg);
5203 RTL_CONST_CALL_P (insn) = 1;
5204 }
5205
5206 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
5207 this (thread-local) address. REG may be used as temporary. */
5208
5209 static rtx
5210 legitimize_tls_address (rtx addr, rtx reg)
5211 {
5212 rtx new_rtx, tls_call, temp, base, r2;
5213 rtx_insn *insn;
5214
5215 if (GET_CODE (addr) == SYMBOL_REF)
5216 switch (tls_symbolic_operand (addr))
5217 {
5218 case TLS_MODEL_GLOBAL_DYNAMIC:
5219 start_sequence ();
5220 r2 = gen_rtx_REG (Pmode, 2);
5221 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_TLSGD);
5222 new_rtx = gen_rtx_CONST (Pmode, tls_call);
5223 new_rtx = force_const_mem (Pmode, new_rtx);
5224 emit_move_insn (r2, new_rtx);
5225 s390_emit_tls_call_insn (r2, tls_call);
5226 insn = get_insns ();
5227 end_sequence ();
5228
5229 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
5230 temp = gen_reg_rtx (Pmode);
5231 emit_libcall_block (insn, temp, r2, new_rtx);
5232
5233 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5234 if (reg != 0)
5235 {
5236 s390_load_address (reg, new_rtx);
5237 new_rtx = reg;
5238 }
5239 break;
5240
5241 case TLS_MODEL_LOCAL_DYNAMIC:
5242 start_sequence ();
5243 r2 = gen_rtx_REG (Pmode, 2);
5244 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM);
5245 new_rtx = gen_rtx_CONST (Pmode, tls_call);
5246 new_rtx = force_const_mem (Pmode, new_rtx);
5247 emit_move_insn (r2, new_rtx);
5248 s390_emit_tls_call_insn (r2, tls_call);
5249 insn = get_insns ();
5250 end_sequence ();
5251
5252 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM_NTPOFF);
5253 temp = gen_reg_rtx (Pmode);
5254 emit_libcall_block (insn, temp, r2, new_rtx);
5255
5256 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5257 base = gen_reg_rtx (Pmode);
5258 s390_load_address (base, new_rtx);
5259
5260 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_DTPOFF);
5261 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5262 new_rtx = force_const_mem (Pmode, new_rtx);
5263 temp = gen_reg_rtx (Pmode);
5264 emit_move_insn (temp, new_rtx);
5265
5266 new_rtx = gen_rtx_PLUS (Pmode, base, temp);
5267 if (reg != 0)
5268 {
5269 s390_load_address (reg, new_rtx);
5270 new_rtx = reg;
5271 }
5272 break;
5273
5274 case TLS_MODEL_INITIAL_EXEC:
5275 if (flag_pic == 1)
5276 {
5277 /* Assume GOT offset < 4k. This is handled the same way
5278 in both 31- and 64-bit code. */
5279
5280 if (reload_in_progress || reload_completed)
5281 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
5282
5283 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
5284 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5285 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
5286 new_rtx = gen_const_mem (Pmode, new_rtx);
5287 temp = gen_reg_rtx (Pmode);
5288 emit_move_insn (temp, new_rtx);
5289 }
5290 else
5291 {
5292 /* If the GOT offset might be >= 4k, we determine the position
5293 of the GOT entry via a PC-relative LARL. */
5294
5295 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
5296 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5297 temp = gen_reg_rtx (Pmode);
5298 emit_move_insn (temp, new_rtx);
5299
5300 new_rtx = gen_const_mem (Pmode, temp);
5301 temp = gen_reg_rtx (Pmode);
5302 emit_move_insn (temp, new_rtx);
5303 }
5304
5305 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5306 if (reg != 0)
5307 {
5308 s390_load_address (reg, new_rtx);
5309 new_rtx = reg;
5310 }
5311 break;
5312
5313 case TLS_MODEL_LOCAL_EXEC:
5314 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
5315 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5316 new_rtx = force_const_mem (Pmode, new_rtx);
5317 temp = gen_reg_rtx (Pmode);
5318 emit_move_insn (temp, new_rtx);
5319
5320 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5321 if (reg != 0)
5322 {
5323 s390_load_address (reg, new_rtx);
5324 new_rtx = reg;
5325 }
5326 break;
5327
5328 default:
5329 gcc_unreachable ();
5330 }
5331
5332 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == UNSPEC)
5333 {
5334 switch (XINT (XEXP (addr, 0), 1))
5335 {
5336 case UNSPEC_NTPOFF:
5337 case UNSPEC_INDNTPOFF:
5338 new_rtx = addr;
5339 break;
5340
5341 default:
5342 gcc_unreachable ();
5343 }
5344 }
5345
5346 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
5347 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
5348 {
5349 new_rtx = XEXP (XEXP (addr, 0), 0);
5350 if (GET_CODE (new_rtx) != SYMBOL_REF)
5351 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5352
5353 new_rtx = legitimize_tls_address (new_rtx, reg);
5354 new_rtx = plus_constant (Pmode, new_rtx,
5355 INTVAL (XEXP (XEXP (addr, 0), 1)));
5356 new_rtx = force_operand (new_rtx, 0);
5357 }
5358
5359 /* (const (neg (unspec (symbol_ref)))) -> (neg (const (unspec (symbol_ref)))) */
5360 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == NEG)
5361 {
5362 new_rtx = XEXP (XEXP (addr, 0), 0);
5363 if (GET_CODE (new_rtx) != SYMBOL_REF)
5364 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5365
5366 new_rtx = legitimize_tls_address (new_rtx, reg);
5367 new_rtx = gen_rtx_NEG (Pmode, new_rtx);
5368 new_rtx = force_operand (new_rtx, 0);
5369 }
5370
5371 else
5372 gcc_unreachable (); /* for now ... */
5373
5374 return new_rtx;
5375 }
5376
5377 /* Emit insns making the address in operands[1] valid for a standard
5378 move to operands[0]. operands[1] is replaced by an address which
5379 should be used instead of the former RTX to emit the move
5380 pattern. */
5381
5382 void
5383 emit_symbolic_move (rtx *operands)
5384 {
5385 rtx temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
5386
5387 if (GET_CODE (operands[0]) == MEM)
5388 operands[1] = force_reg (Pmode, operands[1]);
5389 else if (TLS_SYMBOLIC_CONST (operands[1]))
5390 operands[1] = legitimize_tls_address (operands[1], temp);
5391 else if (flag_pic)
5392 operands[1] = legitimize_pic_address (operands[1], temp);
5393 }
5394
5395 /* Try machine-dependent ways of modifying an illegitimate address X
5396 to be legitimate. If we find one, return the new, valid address.
5397
5398 OLDX is the address as it was before break_out_memory_refs was called.
5399 In some cases it is useful to look at this to decide what needs to be done.
5400
5401 MODE is the mode of the operand pointed to by X.
5402
5403 When -fpic is used, special handling is needed for symbolic references.
5404 See comments by legitimize_pic_address for details. */
5405
5406 static rtx
5407 s390_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
5408 machine_mode mode ATTRIBUTE_UNUSED)
5409 {
5410 rtx constant_term = const0_rtx;
5411
5412 if (TLS_SYMBOLIC_CONST (x))
5413 {
5414 x = legitimize_tls_address (x, 0);
5415
5416 if (s390_legitimate_address_p (mode, x, FALSE))
5417 return x;
5418 }
5419 else if (GET_CODE (x) == PLUS
5420 && (TLS_SYMBOLIC_CONST (XEXP (x, 0))
5421 || TLS_SYMBOLIC_CONST (XEXP (x, 1))))
5422 {
5423 return x;
5424 }
5425 else if (flag_pic)
5426 {
5427 if (SYMBOLIC_CONST (x)
5428 || (GET_CODE (x) == PLUS
5429 && (SYMBOLIC_CONST (XEXP (x, 0))
5430 || SYMBOLIC_CONST (XEXP (x, 1)))))
5431 x = legitimize_pic_address (x, 0);
5432
5433 if (s390_legitimate_address_p (mode, x, FALSE))
5434 return x;
5435 }
5436
5437 x = eliminate_constant_term (x, &constant_term);
5438
5439 /* Optimize loading of large displacements by splitting them
5440 into the multiple of 4K and the rest; this allows the
5441 former to be CSE'd if possible.
5442
5443 Don't do this if the displacement is added to a register
5444 pointing into the stack frame, as the offsets will
5445 change later anyway. */
5446
5447 if (GET_CODE (constant_term) == CONST_INT
5448 && !TARGET_LONG_DISPLACEMENT
5449 && !DISP_IN_RANGE (INTVAL (constant_term))
5450 && !(REG_P (x) && REGNO_PTR_FRAME_P (REGNO (x))))
5451 {
5452 HOST_WIDE_INT lower = INTVAL (constant_term) & 0xfff;
5453 HOST_WIDE_INT upper = INTVAL (constant_term) ^ lower;
5454
5455 rtx temp = gen_reg_rtx (Pmode);
5456 rtx val = force_operand (GEN_INT (upper), temp);
5457 if (val != temp)
5458 emit_move_insn (temp, val);
5459
5460 x = gen_rtx_PLUS (Pmode, x, temp);
5461 constant_term = GEN_INT (lower);
5462 }
5463
5464 if (GET_CODE (x) == PLUS)
5465 {
5466 if (GET_CODE (XEXP (x, 0)) == REG)
5467 {
5468 rtx temp = gen_reg_rtx (Pmode);
5469 rtx val = force_operand (XEXP (x, 1), temp);
5470 if (val != temp)
5471 emit_move_insn (temp, val);
5472
5473 x = gen_rtx_PLUS (Pmode, XEXP (x, 0), temp);
5474 }
5475
5476 else if (GET_CODE (XEXP (x, 1)) == REG)
5477 {
5478 rtx temp = gen_reg_rtx (Pmode);
5479 rtx val = force_operand (XEXP (x, 0), temp);
5480 if (val != temp)
5481 emit_move_insn (temp, val);
5482
5483 x = gen_rtx_PLUS (Pmode, temp, XEXP (x, 1));
5484 }
5485 }
5486
5487 if (constant_term != const0_rtx)
5488 x = gen_rtx_PLUS (Pmode, x, constant_term);
5489
5490 return x;
5491 }
5492
5493 /* Try a machine-dependent way of reloading an illegitimate address AD
5494 operand. If we find one, push the reload and return the new address.
5495
5496 MODE is the mode of the enclosing MEM. OPNUM is the operand number
5497 and TYPE is the reload type of the current reload. */
5498
5499 rtx
5500 legitimize_reload_address (rtx ad, machine_mode mode ATTRIBUTE_UNUSED,
5501 int opnum, int type)
5502 {
5503 if (!optimize || TARGET_LONG_DISPLACEMENT)
5504 return NULL_RTX;
5505
5506 if (GET_CODE (ad) == PLUS)
5507 {
5508 rtx tem = simplify_binary_operation (PLUS, Pmode,
5509 XEXP (ad, 0), XEXP (ad, 1));
5510 if (tem)
5511 ad = tem;
5512 }
5513
5514 if (GET_CODE (ad) == PLUS
5515 && GET_CODE (XEXP (ad, 0)) == REG
5516 && GET_CODE (XEXP (ad, 1)) == CONST_INT
5517 && !DISP_IN_RANGE (INTVAL (XEXP (ad, 1))))
5518 {
5519 HOST_WIDE_INT lower = INTVAL (XEXP (ad, 1)) & 0xfff;
5520 HOST_WIDE_INT upper = INTVAL (XEXP (ad, 1)) ^ lower;
5521 rtx cst, tem, new_rtx;
5522
5523 cst = GEN_INT (upper);
5524 if (!legitimate_reload_constant_p (cst))
5525 cst = force_const_mem (Pmode, cst);
5526
5527 tem = gen_rtx_PLUS (Pmode, XEXP (ad, 0), cst);
5528 new_rtx = gen_rtx_PLUS (Pmode, tem, GEN_INT (lower));
5529
5530 push_reload (XEXP (tem, 1), 0, &XEXP (tem, 1), 0,
5531 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
5532 opnum, (enum reload_type) type);
5533 return new_rtx;
5534 }
5535
5536 return NULL_RTX;
5537 }
5538
5539 /* Emit code to move LEN bytes from DST to SRC. */
5540
5541 bool
5542 s390_expand_cpymem (rtx dst, rtx src, rtx len)
5543 {
5544 /* When tuning for z10 or higher we rely on the Glibc functions to
5545 do the right thing. Only for constant lengths below 64k we will
5546 generate inline code. */
5547 if (s390_tune >= PROCESSOR_2097_Z10
5548 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
5549 return false;
5550
5551 /* Expand memcpy for constant length operands without a loop if it
5552 is shorter that way.
5553
5554 With a constant length argument a
5555 memcpy loop (without pfd) is 36 bytes -> 6 * mvc */
5556 if (GET_CODE (len) == CONST_INT
5557 && INTVAL (len) >= 0
5558 && INTVAL (len) <= 256 * 6
5559 && (!TARGET_MVCLE || INTVAL (len) <= 256))
5560 {
5561 HOST_WIDE_INT o, l;
5562
5563 for (l = INTVAL (len), o = 0; l > 0; l -= 256, o += 256)
5564 {
5565 rtx newdst = adjust_address (dst, BLKmode, o);
5566 rtx newsrc = adjust_address (src, BLKmode, o);
5567 emit_insn (gen_cpymem_short (newdst, newsrc,
5568 GEN_INT (l > 256 ? 255 : l - 1)));
5569 }
5570 }
5571
5572 else if (TARGET_MVCLE)
5573 {
5574 emit_insn (gen_cpymem_long (dst, src, convert_to_mode (Pmode, len, 1)));
5575 }
5576
5577 else
5578 {
5579 rtx dst_addr, src_addr, count, blocks, temp;
5580 rtx_code_label *loop_start_label = gen_label_rtx ();
5581 rtx_code_label *loop_end_label = gen_label_rtx ();
5582 rtx_code_label *end_label = gen_label_rtx ();
5583 machine_mode mode;
5584
5585 mode = GET_MODE (len);
5586 if (mode == VOIDmode)
5587 mode = Pmode;
5588
5589 dst_addr = gen_reg_rtx (Pmode);
5590 src_addr = gen_reg_rtx (Pmode);
5591 count = gen_reg_rtx (mode);
5592 blocks = gen_reg_rtx (mode);
5593
5594 convert_move (count, len, 1);
5595 emit_cmp_and_jump_insns (count, const0_rtx,
5596 EQ, NULL_RTX, mode, 1, end_label);
5597
5598 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
5599 emit_move_insn (src_addr, force_operand (XEXP (src, 0), NULL_RTX));
5600 dst = change_address (dst, VOIDmode, dst_addr);
5601 src = change_address (src, VOIDmode, src_addr);
5602
5603 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5604 OPTAB_DIRECT);
5605 if (temp != count)
5606 emit_move_insn (count, temp);
5607
5608 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5609 OPTAB_DIRECT);
5610 if (temp != blocks)
5611 emit_move_insn (blocks, temp);
5612
5613 emit_cmp_and_jump_insns (blocks, const0_rtx,
5614 EQ, NULL_RTX, mode, 1, loop_end_label);
5615
5616 emit_label (loop_start_label);
5617
5618 if (TARGET_Z10
5619 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 768))
5620 {
5621 rtx prefetch;
5622
5623 /* Issue a read prefetch for the +3 cache line. */
5624 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, src_addr, GEN_INT (768)),
5625 const0_rtx, const0_rtx);
5626 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5627 emit_insn (prefetch);
5628
5629 /* Issue a write prefetch for the +3 cache line. */
5630 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (768)),
5631 const1_rtx, const0_rtx);
5632 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5633 emit_insn (prefetch);
5634 }
5635
5636 emit_insn (gen_cpymem_short (dst, src, GEN_INT (255)));
5637 s390_load_address (dst_addr,
5638 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
5639 s390_load_address (src_addr,
5640 gen_rtx_PLUS (Pmode, src_addr, GEN_INT (256)));
5641
5642 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5643 OPTAB_DIRECT);
5644 if (temp != blocks)
5645 emit_move_insn (blocks, temp);
5646
5647 emit_cmp_and_jump_insns (blocks, const0_rtx,
5648 EQ, NULL_RTX, mode, 1, loop_end_label);
5649
5650 emit_jump (loop_start_label);
5651 emit_label (loop_end_label);
5652
5653 emit_insn (gen_cpymem_short (dst, src,
5654 convert_to_mode (Pmode, count, 1)));
5655 emit_label (end_label);
5656 }
5657 return true;
5658 }
5659
5660 /* Emit code to set LEN bytes at DST to VAL.
5661 Make use of clrmem if VAL is zero. */
5662
5663 void
5664 s390_expand_setmem (rtx dst, rtx len, rtx val)
5665 {
5666 if (GET_CODE (len) == CONST_INT && INTVAL (len) <= 0)
5667 return;
5668
5669 gcc_assert (GET_CODE (val) == CONST_INT || GET_MODE (val) == QImode);
5670
5671 /* Expand setmem/clrmem for a constant length operand without a
5672 loop if it will be shorter that way.
5673 clrmem loop (with PFD) is 30 bytes -> 5 * xc
5674 clrmem loop (without PFD) is 24 bytes -> 4 * xc
5675 setmem loop (with PFD) is 38 bytes -> ~4 * (mvi/stc + mvc)
5676 setmem loop (without PFD) is 32 bytes -> ~4 * (mvi/stc + mvc) */
5677 if (GET_CODE (len) == CONST_INT
5678 && ((val == const0_rtx
5679 && (INTVAL (len) <= 256 * 4
5680 || (INTVAL (len) <= 256 * 5 && TARGET_SETMEM_PFD(val,len))))
5681 || (val != const0_rtx && INTVAL (len) <= 257 * 4))
5682 && (!TARGET_MVCLE || INTVAL (len) <= 256))
5683 {
5684 HOST_WIDE_INT o, l;
5685
5686 if (val == const0_rtx)
5687 /* clrmem: emit 256 byte blockwise XCs. */
5688 for (l = INTVAL (len), o = 0; l > 0; l -= 256, o += 256)
5689 {
5690 rtx newdst = adjust_address (dst, BLKmode, o);
5691 emit_insn (gen_clrmem_short (newdst,
5692 GEN_INT (l > 256 ? 255 : l - 1)));
5693 }
5694 else
5695 /* setmem: emit 1(mvi) + 256(mvc) byte blockwise memsets by
5696 setting first byte to val and using a 256 byte mvc with one
5697 byte overlap to propagate the byte. */
5698 for (l = INTVAL (len), o = 0; l > 0; l -= 257, o += 257)
5699 {
5700 rtx newdst = adjust_address (dst, BLKmode, o);
5701 emit_move_insn (adjust_address (dst, QImode, o), val);
5702 if (l > 1)
5703 {
5704 rtx newdstp1 = adjust_address (dst, BLKmode, o + 1);
5705 emit_insn (gen_cpymem_short (newdstp1, newdst,
5706 GEN_INT (l > 257 ? 255 : l - 2)));
5707 }
5708 }
5709 }
5710
5711 else if (TARGET_MVCLE)
5712 {
5713 val = force_not_mem (convert_modes (Pmode, QImode, val, 1));
5714 if (TARGET_64BIT)
5715 emit_insn (gen_setmem_long_di (dst, convert_to_mode (Pmode, len, 1),
5716 val));
5717 else
5718 emit_insn (gen_setmem_long_si (dst, convert_to_mode (Pmode, len, 1),
5719 val));
5720 }
5721
5722 else
5723 {
5724 rtx dst_addr, count, blocks, temp, dstp1 = NULL_RTX;
5725 rtx_code_label *loop_start_label = gen_label_rtx ();
5726 rtx_code_label *onebyte_end_label = gen_label_rtx ();
5727 rtx_code_label *zerobyte_end_label = gen_label_rtx ();
5728 rtx_code_label *restbyte_end_label = gen_label_rtx ();
5729 machine_mode mode;
5730
5731 mode = GET_MODE (len);
5732 if (mode == VOIDmode)
5733 mode = Pmode;
5734
5735 dst_addr = gen_reg_rtx (Pmode);
5736 count = gen_reg_rtx (mode);
5737 blocks = gen_reg_rtx (mode);
5738
5739 convert_move (count, len, 1);
5740 emit_cmp_and_jump_insns (count, const0_rtx,
5741 EQ, NULL_RTX, mode, 1, zerobyte_end_label,
5742 profile_probability::very_unlikely ());
5743
5744 /* We need to make a copy of the target address since memset is
5745 supposed to return it unmodified. We have to make it here
5746 already since the new reg is used at onebyte_end_label. */
5747 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
5748 dst = change_address (dst, VOIDmode, dst_addr);
5749
5750 if (val != const0_rtx)
5751 {
5752 /* When using the overlapping mvc the original target
5753 address is only accessed as single byte entity (even by
5754 the mvc reading this value). */
5755 set_mem_size (dst, 1);
5756 dstp1 = adjust_address (dst, VOIDmode, 1);
5757 emit_cmp_and_jump_insns (count,
5758 const1_rtx, EQ, NULL_RTX, mode, 1,
5759 onebyte_end_label,
5760 profile_probability::very_unlikely ());
5761 }
5762
5763 /* There is one unconditional (mvi+mvc)/xc after the loop
5764 dealing with the rest of the bytes, subtracting two (mvi+mvc)
5765 or one (xc) here leaves this number of bytes to be handled by
5766 it. */
5767 temp = expand_binop (mode, add_optab, count,
5768 val == const0_rtx ? constm1_rtx : GEN_INT (-2),
5769 count, 1, OPTAB_DIRECT);
5770 if (temp != count)
5771 emit_move_insn (count, temp);
5772
5773 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5774 OPTAB_DIRECT);
5775 if (temp != blocks)
5776 emit_move_insn (blocks, temp);
5777
5778 emit_cmp_and_jump_insns (blocks, const0_rtx,
5779 EQ, NULL_RTX, mode, 1, restbyte_end_label);
5780
5781 emit_jump (loop_start_label);
5782
5783 if (val != const0_rtx)
5784 {
5785 /* The 1 byte != 0 special case. Not handled efficiently
5786 since we require two jumps for that. However, this
5787 should be very rare. */
5788 emit_label (onebyte_end_label);
5789 emit_move_insn (adjust_address (dst, QImode, 0), val);
5790 emit_jump (zerobyte_end_label);
5791 }
5792
5793 emit_label (loop_start_label);
5794
5795 if (TARGET_SETMEM_PFD (val, len))
5796 {
5797 /* Issue a write prefetch. */
5798 rtx distance = GEN_INT (TARGET_SETMEM_PREFETCH_DISTANCE);
5799 rtx prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr, distance),
5800 const1_rtx, const0_rtx);
5801 emit_insn (prefetch);
5802 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5803 }
5804
5805 if (val == const0_rtx)
5806 emit_insn (gen_clrmem_short (dst, GEN_INT (255)));
5807 else
5808 {
5809 /* Set the first byte in the block to the value and use an
5810 overlapping mvc for the block. */
5811 emit_move_insn (adjust_address (dst, QImode, 0), val);
5812 emit_insn (gen_cpymem_short (dstp1, dst, GEN_INT (254)));
5813 }
5814 s390_load_address (dst_addr,
5815 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
5816
5817 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5818 OPTAB_DIRECT);
5819 if (temp != blocks)
5820 emit_move_insn (blocks, temp);
5821
5822 emit_cmp_and_jump_insns (blocks, const0_rtx,
5823 NE, NULL_RTX, mode, 1, loop_start_label);
5824
5825 emit_label (restbyte_end_label);
5826
5827 if (val == const0_rtx)
5828 emit_insn (gen_clrmem_short (dst, convert_to_mode (Pmode, count, 1)));
5829 else
5830 {
5831 /* Set the first byte in the block to the value and use an
5832 overlapping mvc for the block. */
5833 emit_move_insn (adjust_address (dst, QImode, 0), val);
5834 /* execute only uses the lowest 8 bits of count that's
5835 exactly what we need here. */
5836 emit_insn (gen_cpymem_short (dstp1, dst,
5837 convert_to_mode (Pmode, count, 1)));
5838 }
5839
5840 emit_label (zerobyte_end_label);
5841 }
5842 }
5843
5844 /* Emit code to compare LEN bytes at OP0 with those at OP1,
5845 and return the result in TARGET. */
5846
5847 bool
5848 s390_expand_cmpmem (rtx target, rtx op0, rtx op1, rtx len)
5849 {
5850 rtx ccreg = gen_rtx_REG (CCUmode, CC_REGNUM);
5851 rtx tmp;
5852
5853 /* When tuning for z10 or higher we rely on the Glibc functions to
5854 do the right thing. Only for constant lengths below 64k we will
5855 generate inline code. */
5856 if (s390_tune >= PROCESSOR_2097_Z10
5857 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
5858 return false;
5859
5860 /* As the result of CMPINT is inverted compared to what we need,
5861 we have to swap the operands. */
5862 tmp = op0; op0 = op1; op1 = tmp;
5863
5864 if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
5865 {
5866 if (INTVAL (len) > 0)
5867 {
5868 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (INTVAL (len) - 1)));
5869 emit_insn (gen_cmpint (target, ccreg));
5870 }
5871 else
5872 emit_move_insn (target, const0_rtx);
5873 }
5874 else if (TARGET_MVCLE)
5875 {
5876 emit_insn (gen_cmpmem_long (op0, op1, convert_to_mode (Pmode, len, 1)));
5877 emit_insn (gen_cmpint (target, ccreg));
5878 }
5879 else
5880 {
5881 rtx addr0, addr1, count, blocks, temp;
5882 rtx_code_label *loop_start_label = gen_label_rtx ();
5883 rtx_code_label *loop_end_label = gen_label_rtx ();
5884 rtx_code_label *end_label = gen_label_rtx ();
5885 machine_mode mode;
5886
5887 mode = GET_MODE (len);
5888 if (mode == VOIDmode)
5889 mode = Pmode;
5890
5891 addr0 = gen_reg_rtx (Pmode);
5892 addr1 = gen_reg_rtx (Pmode);
5893 count = gen_reg_rtx (mode);
5894 blocks = gen_reg_rtx (mode);
5895
5896 convert_move (count, len, 1);
5897 emit_cmp_and_jump_insns (count, const0_rtx,
5898 EQ, NULL_RTX, mode, 1, end_label);
5899
5900 emit_move_insn (addr0, force_operand (XEXP (op0, 0), NULL_RTX));
5901 emit_move_insn (addr1, force_operand (XEXP (op1, 0), NULL_RTX));
5902 op0 = change_address (op0, VOIDmode, addr0);
5903 op1 = change_address (op1, VOIDmode, addr1);
5904
5905 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5906 OPTAB_DIRECT);
5907 if (temp != count)
5908 emit_move_insn (count, temp);
5909
5910 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5911 OPTAB_DIRECT);
5912 if (temp != blocks)
5913 emit_move_insn (blocks, temp);
5914
5915 emit_cmp_and_jump_insns (blocks, const0_rtx,
5916 EQ, NULL_RTX, mode, 1, loop_end_label);
5917
5918 emit_label (loop_start_label);
5919
5920 if (TARGET_Z10
5921 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 512))
5922 {
5923 rtx prefetch;
5924
5925 /* Issue a read prefetch for the +2 cache line of operand 1. */
5926 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr0, GEN_INT (512)),
5927 const0_rtx, const0_rtx);
5928 emit_insn (prefetch);
5929 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5930
5931 /* Issue a read prefetch for the +2 cache line of operand 2. */
5932 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr1, GEN_INT (512)),
5933 const0_rtx, const0_rtx);
5934 emit_insn (prefetch);
5935 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5936 }
5937
5938 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (255)));
5939 temp = gen_rtx_NE (VOIDmode, ccreg, const0_rtx);
5940 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
5941 gen_rtx_LABEL_REF (VOIDmode, end_label), pc_rtx);
5942 temp = gen_rtx_SET (pc_rtx, temp);
5943 emit_jump_insn (temp);
5944
5945 s390_load_address (addr0,
5946 gen_rtx_PLUS (Pmode, addr0, GEN_INT (256)));
5947 s390_load_address (addr1,
5948 gen_rtx_PLUS (Pmode, addr1, GEN_INT (256)));
5949
5950 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5951 OPTAB_DIRECT);
5952 if (temp != blocks)
5953 emit_move_insn (blocks, temp);
5954
5955 emit_cmp_and_jump_insns (blocks, const0_rtx,
5956 EQ, NULL_RTX, mode, 1, loop_end_label);
5957
5958 emit_jump (loop_start_label);
5959 emit_label (loop_end_label);
5960
5961 emit_insn (gen_cmpmem_short (op0, op1,
5962 convert_to_mode (Pmode, count, 1)));
5963 emit_label (end_label);
5964
5965 emit_insn (gen_cmpint (target, ccreg));
5966 }
5967 return true;
5968 }
5969
5970 /* Emit a conditional jump to LABEL for condition code mask MASK using
5971 comparsion operator COMPARISON. Return the emitted jump insn. */
5972
5973 static rtx_insn *
5974 s390_emit_ccraw_jump (HOST_WIDE_INT mask, enum rtx_code comparison, rtx label)
5975 {
5976 rtx temp;
5977
5978 gcc_assert (comparison == EQ || comparison == NE);
5979 gcc_assert (mask > 0 && mask < 15);
5980
5981 temp = gen_rtx_fmt_ee (comparison, VOIDmode,
5982 gen_rtx_REG (CCRAWmode, CC_REGNUM), GEN_INT (mask));
5983 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
5984 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
5985 temp = gen_rtx_SET (pc_rtx, temp);
5986 return emit_jump_insn (temp);
5987 }
5988
5989 /* Emit the instructions to implement strlen of STRING and store the
5990 result in TARGET. The string has the known ALIGNMENT. This
5991 version uses vector instructions and is therefore not appropriate
5992 for targets prior to z13. */
5993
5994 void
5995 s390_expand_vec_strlen (rtx target, rtx string, rtx alignment)
5996 {
5997 rtx highest_index_to_load_reg = gen_reg_rtx (Pmode);
5998 rtx str_reg = gen_reg_rtx (V16QImode);
5999 rtx str_addr_base_reg = gen_reg_rtx (Pmode);
6000 rtx str_idx_reg = gen_reg_rtx (Pmode);
6001 rtx result_reg = gen_reg_rtx (V16QImode);
6002 rtx is_aligned_label = gen_label_rtx ();
6003 rtx into_loop_label = NULL_RTX;
6004 rtx loop_start_label = gen_label_rtx ();
6005 rtx temp;
6006 rtx len = gen_reg_rtx (QImode);
6007 rtx cond;
6008 rtx mem;
6009
6010 s390_load_address (str_addr_base_reg, XEXP (string, 0));
6011 emit_move_insn (str_idx_reg, const0_rtx);
6012
6013 if (INTVAL (alignment) < 16)
6014 {
6015 /* Check whether the address happens to be aligned properly so
6016 jump directly to the aligned loop. */
6017 emit_cmp_and_jump_insns (gen_rtx_AND (Pmode,
6018 str_addr_base_reg, GEN_INT (15)),
6019 const0_rtx, EQ, NULL_RTX,
6020 Pmode, 1, is_aligned_label);
6021
6022 temp = gen_reg_rtx (Pmode);
6023 temp = expand_binop (Pmode, and_optab, str_addr_base_reg,
6024 GEN_INT (15), temp, 1, OPTAB_DIRECT);
6025 gcc_assert (REG_P (temp));
6026 highest_index_to_load_reg =
6027 expand_binop (Pmode, sub_optab, GEN_INT (15), temp,
6028 highest_index_to_load_reg, 1, OPTAB_DIRECT);
6029 gcc_assert (REG_P (highest_index_to_load_reg));
6030 emit_insn (gen_vllv16qi (str_reg,
6031 convert_to_mode (SImode, highest_index_to_load_reg, 1),
6032 gen_rtx_MEM (BLKmode, str_addr_base_reg)));
6033
6034 into_loop_label = gen_label_rtx ();
6035 s390_emit_jump (into_loop_label, NULL_RTX);
6036 emit_barrier ();
6037 }
6038
6039 emit_label (is_aligned_label);
6040 LABEL_NUSES (is_aligned_label) = INTVAL (alignment) < 16 ? 2 : 1;
6041
6042 /* Reaching this point we are only performing 16 bytes aligned
6043 loads. */
6044 emit_move_insn (highest_index_to_load_reg, GEN_INT (15));
6045
6046 emit_label (loop_start_label);
6047 LABEL_NUSES (loop_start_label) = 1;
6048
6049 /* Load 16 bytes of the string into VR. */
6050 mem = gen_rtx_MEM (V16QImode,
6051 gen_rtx_PLUS (Pmode, str_idx_reg, str_addr_base_reg));
6052 set_mem_align (mem, 128);
6053 emit_move_insn (str_reg, mem);
6054 if (into_loop_label != NULL_RTX)
6055 {
6056 emit_label (into_loop_label);
6057 LABEL_NUSES (into_loop_label) = 1;
6058 }
6059
6060 /* Increment string index by 16 bytes. */
6061 expand_binop (Pmode, add_optab, str_idx_reg, GEN_INT (16),
6062 str_idx_reg, 1, OPTAB_DIRECT);
6063
6064 emit_insn (gen_vec_vfenesv16qi (result_reg, str_reg, str_reg,
6065 GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
6066
6067 add_int_reg_note (s390_emit_ccraw_jump (8, NE, loop_start_label),
6068 REG_BR_PROB,
6069 profile_probability::very_likely ().to_reg_br_prob_note ());
6070 emit_insn (gen_vec_extractv16qiqi (len, result_reg, GEN_INT (7)));
6071
6072 /* If the string pointer wasn't aligned we have loaded less then 16
6073 bytes and the remaining bytes got filled with zeros (by vll).
6074 Now we have to check whether the resulting index lies within the
6075 bytes actually part of the string. */
6076
6077 cond = s390_emit_compare (GT, convert_to_mode (Pmode, len, 1),
6078 highest_index_to_load_reg);
6079 s390_load_address (highest_index_to_load_reg,
6080 gen_rtx_PLUS (Pmode, highest_index_to_load_reg,
6081 const1_rtx));
6082 if (TARGET_64BIT)
6083 emit_insn (gen_movdicc (str_idx_reg, cond,
6084 highest_index_to_load_reg, str_idx_reg));
6085 else
6086 emit_insn (gen_movsicc (str_idx_reg, cond,
6087 highest_index_to_load_reg, str_idx_reg));
6088
6089 add_reg_br_prob_note (s390_emit_jump (is_aligned_label, cond),
6090 profile_probability::very_unlikely ());
6091
6092 expand_binop (Pmode, add_optab, str_idx_reg,
6093 GEN_INT (-16), str_idx_reg, 1, OPTAB_DIRECT);
6094 /* FIXME: len is already zero extended - so avoid the llgcr emitted
6095 here. */
6096 temp = expand_binop (Pmode, add_optab, str_idx_reg,
6097 convert_to_mode (Pmode, len, 1),
6098 target, 1, OPTAB_DIRECT);
6099 if (temp != target)
6100 emit_move_insn (target, temp);
6101 }
6102
6103 void
6104 s390_expand_vec_movstr (rtx result, rtx dst, rtx src)
6105 {
6106 rtx temp = gen_reg_rtx (Pmode);
6107 rtx src_addr = XEXP (src, 0);
6108 rtx dst_addr = XEXP (dst, 0);
6109 rtx src_addr_reg = gen_reg_rtx (Pmode);
6110 rtx dst_addr_reg = gen_reg_rtx (Pmode);
6111 rtx offset = gen_reg_rtx (Pmode);
6112 rtx vsrc = gen_reg_rtx (V16QImode);
6113 rtx vpos = gen_reg_rtx (V16QImode);
6114 rtx loadlen = gen_reg_rtx (SImode);
6115 rtx gpos_qi = gen_reg_rtx(QImode);
6116 rtx gpos = gen_reg_rtx (SImode);
6117 rtx done_label = gen_label_rtx ();
6118 rtx loop_label = gen_label_rtx ();
6119 rtx exit_label = gen_label_rtx ();
6120 rtx full_label = gen_label_rtx ();
6121
6122 /* Perform a quick check for string ending on the first up to 16
6123 bytes and exit early if successful. */
6124
6125 emit_insn (gen_vlbb (vsrc, src, GEN_INT (6)));
6126 emit_insn (gen_lcbb (loadlen, src_addr, GEN_INT (6)));
6127 emit_insn (gen_vfenezv16qi (vpos, vsrc, vsrc));
6128 emit_insn (gen_vec_extractv16qiqi (gpos_qi, vpos, GEN_INT (7)));
6129 emit_move_insn (gpos, gen_rtx_SUBREG (SImode, gpos_qi, 0));
6130 /* gpos is the byte index if a zero was found and 16 otherwise.
6131 So if it is lower than the loaded bytes we have a hit. */
6132 emit_cmp_and_jump_insns (gpos, loadlen, GE, NULL_RTX, SImode, 1,
6133 full_label);
6134 emit_insn (gen_vstlv16qi (vsrc, gpos, dst));
6135
6136 force_expand_binop (Pmode, add_optab, dst_addr, gpos, result,
6137 1, OPTAB_DIRECT);
6138 emit_jump (exit_label);
6139 emit_barrier ();
6140
6141 emit_label (full_label);
6142 LABEL_NUSES (full_label) = 1;
6143
6144 /* Calculate `offset' so that src + offset points to the last byte
6145 before 16 byte alignment. */
6146
6147 /* temp = src_addr & 0xf */
6148 force_expand_binop (Pmode, and_optab, src_addr, GEN_INT (15), temp,
6149 1, OPTAB_DIRECT);
6150
6151 /* offset = 0xf - temp */
6152 emit_move_insn (offset, GEN_INT (15));
6153 force_expand_binop (Pmode, sub_optab, offset, temp, offset,
6154 1, OPTAB_DIRECT);
6155
6156 /* Store `offset' bytes in the dstination string. The quick check
6157 has loaded at least `offset' bytes into vsrc. */
6158
6159 emit_insn (gen_vstlv16qi (vsrc, gen_lowpart (SImode, offset), dst));
6160
6161 /* Advance to the next byte to be loaded. */
6162 force_expand_binop (Pmode, add_optab, offset, const1_rtx, offset,
6163 1, OPTAB_DIRECT);
6164
6165 /* Make sure the addresses are single regs which can be used as a
6166 base. */
6167 emit_move_insn (src_addr_reg, src_addr);
6168 emit_move_insn (dst_addr_reg, dst_addr);
6169
6170 /* MAIN LOOP */
6171
6172 emit_label (loop_label);
6173 LABEL_NUSES (loop_label) = 1;
6174
6175 emit_move_insn (vsrc,
6176 gen_rtx_MEM (V16QImode,
6177 gen_rtx_PLUS (Pmode, src_addr_reg, offset)));
6178
6179 emit_insn (gen_vec_vfenesv16qi (vpos, vsrc, vsrc,
6180 GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
6181 add_int_reg_note (s390_emit_ccraw_jump (8, EQ, done_label),
6182 REG_BR_PROB, profile_probability::very_unlikely ()
6183 .to_reg_br_prob_note ());
6184
6185 emit_move_insn (gen_rtx_MEM (V16QImode,
6186 gen_rtx_PLUS (Pmode, dst_addr_reg, offset)),
6187 vsrc);
6188 /* offset += 16 */
6189 force_expand_binop (Pmode, add_optab, offset, GEN_INT (16),
6190 offset, 1, OPTAB_DIRECT);
6191
6192 emit_jump (loop_label);
6193 emit_barrier ();
6194
6195 /* REGULAR EXIT */
6196
6197 /* We are done. Add the offset of the zero character to the dst_addr
6198 pointer to get the result. */
6199
6200 emit_label (done_label);
6201 LABEL_NUSES (done_label) = 1;
6202
6203 force_expand_binop (Pmode, add_optab, dst_addr_reg, offset, dst_addr_reg,
6204 1, OPTAB_DIRECT);
6205
6206 emit_insn (gen_vec_extractv16qiqi (gpos_qi, vpos, GEN_INT (7)));
6207 emit_move_insn (gpos, gen_rtx_SUBREG (SImode, gpos_qi, 0));
6208
6209 emit_insn (gen_vstlv16qi (vsrc, gpos, gen_rtx_MEM (BLKmode, dst_addr_reg)));
6210
6211 force_expand_binop (Pmode, add_optab, dst_addr_reg, gpos, result,
6212 1, OPTAB_DIRECT);
6213
6214 /* EARLY EXIT */
6215
6216 emit_label (exit_label);
6217 LABEL_NUSES (exit_label) = 1;
6218 }
6219
6220
6221 /* Expand conditional increment or decrement using alc/slb instructions.
6222 Should generate code setting DST to either SRC or SRC + INCREMENT,
6223 depending on the result of the comparison CMP_OP0 CMP_CODE CMP_OP1.
6224 Returns true if successful, false otherwise.
6225
6226 That makes it possible to implement some if-constructs without jumps e.g.:
6227 (borrow = CC0 | CC1 and carry = CC2 | CC3)
6228 unsigned int a, b, c;
6229 if (a < b) c++; -> CCU b > a -> CC2; c += carry;
6230 if (a < b) c--; -> CCL3 a - b -> borrow; c -= borrow;
6231 if (a <= b) c++; -> CCL3 b - a -> borrow; c += carry;
6232 if (a <= b) c--; -> CCU a <= b -> borrow; c -= borrow;
6233
6234 Checks for EQ and NE with a nonzero value need an additional xor e.g.:
6235 if (a == b) c++; -> CCL3 a ^= b; 0 - a -> borrow; c += carry;
6236 if (a == b) c--; -> CCU a ^= b; a <= 0 -> CC0 | CC1; c -= borrow;
6237 if (a != b) c++; -> CCU a ^= b; a > 0 -> CC2; c += carry;
6238 if (a != b) c--; -> CCL3 a ^= b; 0 - a -> borrow; c -= borrow; */
6239
6240 bool
6241 s390_expand_addcc (enum rtx_code cmp_code, rtx cmp_op0, rtx cmp_op1,
6242 rtx dst, rtx src, rtx increment)
6243 {
6244 machine_mode cmp_mode;
6245 machine_mode cc_mode;
6246 rtx op_res;
6247 rtx insn;
6248 rtvec p;
6249 int ret;
6250
6251 if ((GET_MODE (cmp_op0) == SImode || GET_MODE (cmp_op0) == VOIDmode)
6252 && (GET_MODE (cmp_op1) == SImode || GET_MODE (cmp_op1) == VOIDmode))
6253 cmp_mode = SImode;
6254 else if ((GET_MODE (cmp_op0) == DImode || GET_MODE (cmp_op0) == VOIDmode)
6255 && (GET_MODE (cmp_op1) == DImode || GET_MODE (cmp_op1) == VOIDmode))
6256 cmp_mode = DImode;
6257 else
6258 return false;
6259
6260 /* Try ADD LOGICAL WITH CARRY. */
6261 if (increment == const1_rtx)
6262 {
6263 /* Determine CC mode to use. */
6264 if (cmp_code == EQ || cmp_code == NE)
6265 {
6266 if (cmp_op1 != const0_rtx)
6267 {
6268 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
6269 NULL_RTX, 0, OPTAB_WIDEN);
6270 cmp_op1 = const0_rtx;
6271 }
6272
6273 cmp_code = cmp_code == EQ ? LEU : GTU;
6274 }
6275
6276 if (cmp_code == LTU || cmp_code == LEU)
6277 {
6278 rtx tem = cmp_op0;
6279 cmp_op0 = cmp_op1;
6280 cmp_op1 = tem;
6281 cmp_code = swap_condition (cmp_code);
6282 }
6283
6284 switch (cmp_code)
6285 {
6286 case GTU:
6287 cc_mode = CCUmode;
6288 break;
6289
6290 case GEU:
6291 cc_mode = CCL3mode;
6292 break;
6293
6294 default:
6295 return false;
6296 }
6297
6298 /* Emit comparison instruction pattern. */
6299 if (!register_operand (cmp_op0, cmp_mode))
6300 cmp_op0 = force_reg (cmp_mode, cmp_op0);
6301
6302 insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
6303 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
6304 /* We use insn_invalid_p here to add clobbers if required. */
6305 ret = insn_invalid_p (emit_insn (insn), false);
6306 gcc_assert (!ret);
6307
6308 /* Emit ALC instruction pattern. */
6309 op_res = gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
6310 gen_rtx_REG (cc_mode, CC_REGNUM),
6311 const0_rtx);
6312
6313 if (src != const0_rtx)
6314 {
6315 if (!register_operand (src, GET_MODE (dst)))
6316 src = force_reg (GET_MODE (dst), src);
6317
6318 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, src);
6319 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, const0_rtx);
6320 }
6321
6322 p = rtvec_alloc (2);
6323 RTVEC_ELT (p, 0) =
6324 gen_rtx_SET (dst, op_res);
6325 RTVEC_ELT (p, 1) =
6326 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6327 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
6328
6329 return true;
6330 }
6331
6332 /* Try SUBTRACT LOGICAL WITH BORROW. */
6333 if (increment == constm1_rtx)
6334 {
6335 /* Determine CC mode to use. */
6336 if (cmp_code == EQ || cmp_code == NE)
6337 {
6338 if (cmp_op1 != const0_rtx)
6339 {
6340 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
6341 NULL_RTX, 0, OPTAB_WIDEN);
6342 cmp_op1 = const0_rtx;
6343 }
6344
6345 cmp_code = cmp_code == EQ ? LEU : GTU;
6346 }
6347
6348 if (cmp_code == GTU || cmp_code == GEU)
6349 {
6350 rtx tem = cmp_op0;
6351 cmp_op0 = cmp_op1;
6352 cmp_op1 = tem;
6353 cmp_code = swap_condition (cmp_code);
6354 }
6355
6356 switch (cmp_code)
6357 {
6358 case LEU:
6359 cc_mode = CCUmode;
6360 break;
6361
6362 case LTU:
6363 cc_mode = CCL3mode;
6364 break;
6365
6366 default:
6367 return false;
6368 }
6369
6370 /* Emit comparison instruction pattern. */
6371 if (!register_operand (cmp_op0, cmp_mode))
6372 cmp_op0 = force_reg (cmp_mode, cmp_op0);
6373
6374 insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
6375 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
6376 /* We use insn_invalid_p here to add clobbers if required. */
6377 ret = insn_invalid_p (emit_insn (insn), false);
6378 gcc_assert (!ret);
6379
6380 /* Emit SLB instruction pattern. */
6381 if (!register_operand (src, GET_MODE (dst)))
6382 src = force_reg (GET_MODE (dst), src);
6383
6384 op_res = gen_rtx_MINUS (GET_MODE (dst),
6385 gen_rtx_MINUS (GET_MODE (dst), src, const0_rtx),
6386 gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
6387 gen_rtx_REG (cc_mode, CC_REGNUM),
6388 const0_rtx));
6389 p = rtvec_alloc (2);
6390 RTVEC_ELT (p, 0) =
6391 gen_rtx_SET (dst, op_res);
6392 RTVEC_ELT (p, 1) =
6393 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6394 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
6395
6396 return true;
6397 }
6398
6399 return false;
6400 }
6401
6402 /* Expand code for the insv template. Return true if successful. */
6403
6404 bool
6405 s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src)
6406 {
6407 int bitsize = INTVAL (op1);
6408 int bitpos = INTVAL (op2);
6409 machine_mode mode = GET_MODE (dest);
6410 machine_mode smode;
6411 int smode_bsize, mode_bsize;
6412 rtx op, clobber;
6413
6414 if (bitsize + bitpos > GET_MODE_BITSIZE (mode))
6415 return false;
6416
6417 /* Just a move. */
6418 if (bitpos == 0
6419 && bitsize == GET_MODE_BITSIZE (GET_MODE (src))
6420 && mode == GET_MODE (src))
6421 {
6422 emit_move_insn (dest, src);
6423 return true;
6424 }
6425
6426 /* Generate INSERT IMMEDIATE (IILL et al). */
6427 /* (set (ze (reg)) (const_int)). */
6428 if (TARGET_ZARCH
6429 && register_operand (dest, word_mode)
6430 && (bitpos % 16) == 0
6431 && (bitsize % 16) == 0
6432 && const_int_operand (src, VOIDmode))
6433 {
6434 HOST_WIDE_INT val = INTVAL (src);
6435 int regpos = bitpos + bitsize;
6436
6437 while (regpos > bitpos)
6438 {
6439 machine_mode putmode;
6440 int putsize;
6441
6442 if (TARGET_EXTIMM && (regpos % 32 == 0) && (regpos >= bitpos + 32))
6443 putmode = SImode;
6444 else
6445 putmode = HImode;
6446
6447 putsize = GET_MODE_BITSIZE (putmode);
6448 regpos -= putsize;
6449 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
6450 GEN_INT (putsize),
6451 GEN_INT (regpos)),
6452 gen_int_mode (val, putmode));
6453 val >>= putsize;
6454 }
6455 gcc_assert (regpos == bitpos);
6456 return true;
6457 }
6458
6459 smode = smallest_int_mode_for_size (bitsize);
6460 smode_bsize = GET_MODE_BITSIZE (smode);
6461 mode_bsize = GET_MODE_BITSIZE (mode);
6462
6463 /* Generate STORE CHARACTERS UNDER MASK (STCM et al). */
6464 if (bitpos == 0
6465 && (bitsize % BITS_PER_UNIT) == 0
6466 && MEM_P (dest)
6467 && (register_operand (src, word_mode)
6468 || const_int_operand (src, VOIDmode)))
6469 {
6470 /* Emit standard pattern if possible. */
6471 if (smode_bsize == bitsize)
6472 {
6473 emit_move_insn (adjust_address (dest, smode, 0),
6474 gen_lowpart (smode, src));
6475 return true;
6476 }
6477
6478 /* (set (ze (mem)) (const_int)). */
6479 else if (const_int_operand (src, VOIDmode))
6480 {
6481 int size = bitsize / BITS_PER_UNIT;
6482 rtx src_mem = adjust_address (force_const_mem (word_mode, src),
6483 BLKmode,
6484 UNITS_PER_WORD - size);
6485
6486 dest = adjust_address (dest, BLKmode, 0);
6487 set_mem_size (dest, size);
6488 s390_expand_cpymem (dest, src_mem, GEN_INT (size));
6489 return true;
6490 }
6491
6492 /* (set (ze (mem)) (reg)). */
6493 else if (register_operand (src, word_mode))
6494 {
6495 if (bitsize <= 32)
6496 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest, op1,
6497 const0_rtx), src);
6498 else
6499 {
6500 /* Emit st,stcmh sequence. */
6501 int stcmh_width = bitsize - 32;
6502 int size = stcmh_width / BITS_PER_UNIT;
6503
6504 emit_move_insn (adjust_address (dest, SImode, size),
6505 gen_lowpart (SImode, src));
6506 set_mem_size (dest, size);
6507 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
6508 GEN_INT (stcmh_width),
6509 const0_rtx),
6510 gen_rtx_LSHIFTRT (word_mode, src, GEN_INT (32)));
6511 }
6512 return true;
6513 }
6514 }
6515
6516 /* Generate INSERT CHARACTERS UNDER MASK (IC, ICM et al). */
6517 if ((bitpos % BITS_PER_UNIT) == 0
6518 && (bitsize % BITS_PER_UNIT) == 0
6519 && (bitpos & 32) == ((bitpos + bitsize - 1) & 32)
6520 && MEM_P (src)
6521 && (mode == DImode || mode == SImode)
6522 && mode != smode
6523 && register_operand (dest, mode))
6524 {
6525 /* Emit a strict_low_part pattern if possible. */
6526 if (smode_bsize == bitsize && bitpos == mode_bsize - smode_bsize)
6527 {
6528 rtx low_dest = gen_lowpart (smode, dest);
6529 rtx low_src = gen_lowpart (smode, src);
6530
6531 switch (smode)
6532 {
6533 case E_QImode: emit_insn (gen_movstrictqi (low_dest, low_src)); return true;
6534 case E_HImode: emit_insn (gen_movstricthi (low_dest, low_src)); return true;
6535 case E_SImode: emit_insn (gen_movstrictsi (low_dest, low_src)); return true;
6536 default: break;
6537 }
6538 }
6539
6540 /* ??? There are more powerful versions of ICM that are not
6541 completely represented in the md file. */
6542 }
6543
6544 /* For z10, generate ROTATE THEN INSERT SELECTED BITS (RISBG et al). */
6545 if (TARGET_Z10 && (mode == DImode || mode == SImode))
6546 {
6547 machine_mode mode_s = GET_MODE (src);
6548
6549 if (CONSTANT_P (src))
6550 {
6551 /* For constant zero values the representation with AND
6552 appears to be folded in more situations than the (set
6553 (zero_extract) ...).
6554 We only do this when the start and end of the bitfield
6555 remain in the same SImode chunk. That way nihf or nilf
6556 can be used.
6557 The AND patterns might still generate a risbg for this. */
6558 if (src == const0_rtx && bitpos / 32 == (bitpos + bitsize - 1) / 32)
6559 return false;
6560 else
6561 src = force_reg (mode, src);
6562 }
6563 else if (mode_s != mode)
6564 {
6565 gcc_assert (GET_MODE_BITSIZE (mode_s) >= bitsize);
6566 src = force_reg (mode_s, src);
6567 src = gen_lowpart (mode, src);
6568 }
6569
6570 op = gen_rtx_ZERO_EXTRACT (mode, dest, op1, op2),
6571 op = gen_rtx_SET (op, src);
6572
6573 if (!TARGET_ZEC12)
6574 {
6575 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6576 op = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber));
6577 }
6578 emit_insn (op);
6579
6580 return true;
6581 }
6582
6583 return false;
6584 }
6585
6586 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic which returns a
6587 register that holds VAL of mode MODE shifted by COUNT bits. */
6588
6589 static inline rtx
6590 s390_expand_mask_and_shift (rtx val, machine_mode mode, rtx count)
6591 {
6592 val = expand_simple_binop (SImode, AND, val, GEN_INT (GET_MODE_MASK (mode)),
6593 NULL_RTX, 1, OPTAB_DIRECT);
6594 return expand_simple_binop (SImode, ASHIFT, val, count,
6595 NULL_RTX, 1, OPTAB_DIRECT);
6596 }
6597
6598 /* Generate a vector comparison COND of CMP_OP1 and CMP_OP2 and store
6599 the result in TARGET. */
6600
6601 void
6602 s390_expand_vec_compare (rtx target, enum rtx_code cond,
6603 rtx cmp_op1, rtx cmp_op2)
6604 {
6605 machine_mode mode = GET_MODE (target);
6606 bool neg_p = false, swap_p = false;
6607 rtx tmp;
6608
6609 if (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_VECTOR_FLOAT)
6610 {
6611 cmp_op2 = force_reg (GET_MODE (cmp_op1), cmp_op2);
6612 switch (cond)
6613 {
6614 /* NE a != b -> !(a == b) */
6615 case NE: cond = EQ; neg_p = true; break;
6616 case UNGT:
6617 emit_insn (gen_vec_cmpungt (target, cmp_op1, cmp_op2));
6618 return;
6619 case UNGE:
6620 emit_insn (gen_vec_cmpunge (target, cmp_op1, cmp_op2));
6621 return;
6622 case LE: cond = GE; swap_p = true; break;
6623 /* UNLE: (a u<= b) -> (b u>= a). */
6624 case UNLE:
6625 emit_insn (gen_vec_cmpunge (target, cmp_op2, cmp_op1));
6626 return;
6627 /* LT: a < b -> b > a */
6628 case LT: cond = GT; swap_p = true; break;
6629 /* UNLT: (a u< b) -> (b u> a). */
6630 case UNLT:
6631 emit_insn (gen_vec_cmpungt (target, cmp_op2, cmp_op1));
6632 return;
6633 case UNEQ:
6634 emit_insn (gen_vec_cmpuneq (target, cmp_op1, cmp_op2));
6635 return;
6636 case LTGT:
6637 emit_insn (gen_vec_cmpltgt (target, cmp_op1, cmp_op2));
6638 return;
6639 case ORDERED:
6640 emit_insn (gen_vec_cmpordered (target, cmp_op1, cmp_op2));
6641 return;
6642 case UNORDERED:
6643 emit_insn (gen_vec_cmpunordered (target, cmp_op1, cmp_op2));
6644 return;
6645 default: break;
6646 }
6647 }
6648 else
6649 {
6650 /* Turn x < 0 into x >> (bits per element - 1) */
6651 if (cond == LT && cmp_op2 == CONST0_RTX (mode))
6652 {
6653 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (mode)) - 1;
6654 rtx res = expand_simple_binop (mode, ASHIFTRT, cmp_op1,
6655 GEN_INT (shift), target,
6656 0, OPTAB_DIRECT);
6657 if (res != target)
6658 emit_move_insn (target, res);
6659 return;
6660 }
6661 cmp_op2 = force_reg (GET_MODE (cmp_op1), cmp_op2);
6662
6663 switch (cond)
6664 {
6665 /* NE: a != b -> !(a == b) */
6666 case NE: cond = EQ; neg_p = true; break;
6667 /* GE: a >= b -> !(b > a) */
6668 case GE: cond = GT; neg_p = true; swap_p = true; break;
6669 /* GEU: a >= b -> !(b > a) */
6670 case GEU: cond = GTU; neg_p = true; swap_p = true; break;
6671 /* LE: a <= b -> !(a > b) */
6672 case LE: cond = GT; neg_p = true; break;
6673 /* LEU: a <= b -> !(a > b) */
6674 case LEU: cond = GTU; neg_p = true; break;
6675 /* LT: a < b -> b > a */
6676 case LT: cond = GT; swap_p = true; break;
6677 /* LTU: a < b -> b > a */
6678 case LTU: cond = GTU; swap_p = true; break;
6679 default: break;
6680 }
6681 }
6682
6683 if (swap_p)
6684 {
6685 tmp = cmp_op1; cmp_op1 = cmp_op2; cmp_op2 = tmp;
6686 }
6687
6688 emit_insn (gen_rtx_SET (target, gen_rtx_fmt_ee (cond,
6689 mode,
6690 cmp_op1, cmp_op2)));
6691 if (neg_p)
6692 emit_insn (gen_rtx_SET (target, gen_rtx_NOT (mode, target)));
6693 }
6694
6695 /* Expand the comparison CODE of CMP1 and CMP2 and copy 1 or 0 into
6696 TARGET if either all (ALL_P is true) or any (ALL_P is false) of the
6697 elements in CMP1 and CMP2 fulfill the comparison.
6698 This function is only used to emit patterns for the vx builtins and
6699 therefore only handles comparison codes required by the
6700 builtins. */
6701 void
6702 s390_expand_vec_compare_cc (rtx target, enum rtx_code code,
6703 rtx cmp1, rtx cmp2, bool all_p)
6704 {
6705 machine_mode cc_producer_mode, cc_consumer_mode, scratch_mode;
6706 rtx tmp_reg = gen_reg_rtx (SImode);
6707 bool swap_p = false;
6708
6709 if (GET_MODE_CLASS (GET_MODE (cmp1)) == MODE_VECTOR_INT)
6710 {
6711 switch (code)
6712 {
6713 case EQ:
6714 case NE:
6715 cc_producer_mode = CCVEQmode;
6716 break;
6717 case GE:
6718 case LT:
6719 code = swap_condition (code);
6720 swap_p = true;
6721 /* fallthrough */
6722 case GT:
6723 case LE:
6724 cc_producer_mode = CCVIHmode;
6725 break;
6726 case GEU:
6727 case LTU:
6728 code = swap_condition (code);
6729 swap_p = true;
6730 /* fallthrough */
6731 case GTU:
6732 case LEU:
6733 cc_producer_mode = CCVIHUmode;
6734 break;
6735 default:
6736 gcc_unreachable ();
6737 }
6738
6739 scratch_mode = GET_MODE (cmp1);
6740 /* These codes represent inverted CC interpretations. Inverting
6741 an ALL CC mode results in an ANY CC mode and the other way
6742 around. Invert the all_p flag here to compensate for
6743 that. */
6744 if (code == NE || code == LE || code == LEU)
6745 all_p = !all_p;
6746
6747 cc_consumer_mode = all_p ? CCVIALLmode : CCVIANYmode;
6748 }
6749 else if (GET_MODE_CLASS (GET_MODE (cmp1)) == MODE_VECTOR_FLOAT)
6750 {
6751 bool inv_p = false;
6752
6753 switch (code)
6754 {
6755 case EQ: cc_producer_mode = CCVEQmode; break;
6756 case NE: cc_producer_mode = CCVEQmode; inv_p = true; break;
6757 case GT: cc_producer_mode = CCVFHmode; break;
6758 case GE: cc_producer_mode = CCVFHEmode; break;
6759 case UNLE: cc_producer_mode = CCVFHmode; inv_p = true; break;
6760 case UNLT: cc_producer_mode = CCVFHEmode; inv_p = true; break;
6761 case LT: cc_producer_mode = CCVFHmode; code = GT; swap_p = true; break;
6762 case LE: cc_producer_mode = CCVFHEmode; code = GE; swap_p = true; break;
6763 default: gcc_unreachable ();
6764 }
6765 scratch_mode = related_int_vector_mode (GET_MODE (cmp1)).require ();
6766
6767 if (inv_p)
6768 all_p = !all_p;
6769
6770 cc_consumer_mode = all_p ? CCVFALLmode : CCVFANYmode;
6771 }
6772 else
6773 gcc_unreachable ();
6774
6775 if (swap_p)
6776 {
6777 rtx tmp = cmp2;
6778 cmp2 = cmp1;
6779 cmp1 = tmp;
6780 }
6781
6782 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6783 gen_rtvec (2, gen_rtx_SET (
6784 gen_rtx_REG (cc_producer_mode, CC_REGNUM),
6785 gen_rtx_COMPARE (cc_producer_mode, cmp1, cmp2)),
6786 gen_rtx_CLOBBER (VOIDmode,
6787 gen_rtx_SCRATCH (scratch_mode)))));
6788 emit_move_insn (target, const0_rtx);
6789 emit_move_insn (tmp_reg, const1_rtx);
6790
6791 emit_move_insn (target,
6792 gen_rtx_IF_THEN_ELSE (SImode,
6793 gen_rtx_fmt_ee (code, VOIDmode,
6794 gen_rtx_REG (cc_consumer_mode, CC_REGNUM),
6795 const0_rtx),
6796 tmp_reg, target));
6797 }
6798
6799 /* Invert the comparison CODE applied to a CC mode. This is only safe
6800 if we know whether there result was created by a floating point
6801 compare or not. For the CCV modes this is encoded as part of the
6802 mode. */
6803 enum rtx_code
6804 s390_reverse_condition (machine_mode mode, enum rtx_code code)
6805 {
6806 /* Reversal of FP compares takes care -- an ordered compare
6807 becomes an unordered compare and vice versa. */
6808 if (mode == CCVFALLmode || mode == CCVFANYmode || mode == CCSFPSmode)
6809 return reverse_condition_maybe_unordered (code);
6810 else if (mode == CCVIALLmode || mode == CCVIANYmode)
6811 return reverse_condition (code);
6812 else
6813 gcc_unreachable ();
6814 }
6815
6816 /* Generate a vector comparison expression loading either elements of
6817 THEN or ELS into TARGET depending on the comparison COND of CMP_OP1
6818 and CMP_OP2. */
6819
6820 void
6821 s390_expand_vcond (rtx target, rtx then, rtx els,
6822 enum rtx_code cond, rtx cmp_op1, rtx cmp_op2)
6823 {
6824 rtx tmp;
6825 machine_mode result_mode;
6826 rtx result_target;
6827
6828 machine_mode target_mode = GET_MODE (target);
6829 machine_mode cmp_mode = GET_MODE (cmp_op1);
6830 rtx op = (cond == LT) ? els : then;
6831
6832 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
6833 and x < 0 ? 1 : 0 into (unsigned) x >> 31. Likewise
6834 for short and byte (x >> 15 and x >> 7 respectively). */
6835 if ((cond == LT || cond == GE)
6836 && target_mode == cmp_mode
6837 && cmp_op2 == CONST0_RTX (cmp_mode)
6838 && op == CONST0_RTX (target_mode)
6839 && s390_vector_mode_supported_p (target_mode)
6840 && GET_MODE_CLASS (target_mode) == MODE_VECTOR_INT)
6841 {
6842 rtx negop = (cond == LT) ? then : els;
6843
6844 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (target_mode)) - 1;
6845
6846 /* if x < 0 ? 1 : 0 or if x >= 0 ? 0 : 1 */
6847 if (negop == CONST1_RTX (target_mode))
6848 {
6849 rtx res = expand_simple_binop (cmp_mode, LSHIFTRT, cmp_op1,
6850 GEN_INT (shift), target,
6851 1, OPTAB_DIRECT);
6852 if (res != target)
6853 emit_move_insn (target, res);
6854 return;
6855 }
6856
6857 /* if x < 0 ? -1 : 0 or if x >= 0 ? 0 : -1 */
6858 else if (all_ones_operand (negop, target_mode))
6859 {
6860 rtx res = expand_simple_binop (cmp_mode, ASHIFTRT, cmp_op1,
6861 GEN_INT (shift), target,
6862 0, OPTAB_DIRECT);
6863 if (res != target)
6864 emit_move_insn (target, res);
6865 return;
6866 }
6867 }
6868
6869 /* We always use an integral type vector to hold the comparison
6870 result. */
6871 result_mode = related_int_vector_mode (cmp_mode).require ();
6872 result_target = gen_reg_rtx (result_mode);
6873
6874 /* We allow vector immediates as comparison operands that
6875 can be handled by the optimization above but not by the
6876 following code. Hence, force them into registers here. */
6877 if (!REG_P (cmp_op1))
6878 cmp_op1 = force_reg (GET_MODE (cmp_op1), cmp_op1);
6879
6880 s390_expand_vec_compare (result_target, cond, cmp_op1, cmp_op2);
6881
6882 /* If the results are supposed to be either -1 or 0 we are done
6883 since this is what our compare instructions generate anyway. */
6884 if (all_ones_operand (then, GET_MODE (then))
6885 && const0_operand (els, GET_MODE (els)))
6886 {
6887 emit_move_insn (target, gen_rtx_SUBREG (target_mode,
6888 result_target, 0));
6889 return;
6890 }
6891
6892 /* Otherwise we will do a vsel afterwards. */
6893 /* This gets triggered e.g.
6894 with gcc.c-torture/compile/pr53410-1.c */
6895 if (!REG_P (then))
6896 then = force_reg (target_mode, then);
6897
6898 if (!REG_P (els))
6899 els = force_reg (target_mode, els);
6900
6901 tmp = gen_rtx_fmt_ee (EQ, VOIDmode,
6902 result_target,
6903 CONST0_RTX (result_mode));
6904
6905 /* We compared the result against zero above so we have to swap then
6906 and els here. */
6907 tmp = gen_rtx_IF_THEN_ELSE (target_mode, tmp, els, then);
6908
6909 gcc_assert (target_mode == GET_MODE (then));
6910 emit_insn (gen_rtx_SET (target, tmp));
6911 }
6912
6913 /* Emit the RTX necessary to initialize the vector TARGET with values
6914 in VALS. */
6915 void
6916 s390_expand_vec_init (rtx target, rtx vals)
6917 {
6918 machine_mode mode = GET_MODE (target);
6919 machine_mode inner_mode = GET_MODE_INNER (mode);
6920 int n_elts = GET_MODE_NUNITS (mode);
6921 bool all_same = true, all_regs = true, all_const_int = true;
6922 rtx x;
6923 int i;
6924
6925 for (i = 0; i < n_elts; ++i)
6926 {
6927 x = XVECEXP (vals, 0, i);
6928
6929 if (!CONST_INT_P (x))
6930 all_const_int = false;
6931
6932 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6933 all_same = false;
6934
6935 if (!REG_P (x))
6936 all_regs = false;
6937 }
6938
6939 /* Use vector gen mask or vector gen byte mask if possible. */
6940 if (all_same && all_const_int)
6941 {
6942 rtx vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
6943 if (XVECEXP (vals, 0, 0) == const0_rtx
6944 || s390_contiguous_bitmask_vector_p (vec, NULL, NULL)
6945 || s390_bytemask_vector_p (vec, NULL))
6946 {
6947 emit_insn (gen_rtx_SET (target, vec));
6948 return;
6949 }
6950 }
6951
6952 /* Use vector replicate instructions. vlrep/vrepi/vrep */
6953 if (all_same)
6954 {
6955 rtx elem = XVECEXP (vals, 0, 0);
6956
6957 /* vec_splats accepts general_operand as source. */
6958 if (!general_operand (elem, GET_MODE (elem)))
6959 elem = force_reg (inner_mode, elem);
6960
6961 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, elem)));
6962 return;
6963 }
6964
6965 if (all_regs
6966 && REG_P (target)
6967 && n_elts == 2
6968 && GET_MODE_SIZE (inner_mode) == 8)
6969 {
6970 /* Use vector load pair. */
6971 emit_insn (gen_rtx_SET (target,
6972 gen_rtx_VEC_CONCAT (mode,
6973 XVECEXP (vals, 0, 0),
6974 XVECEXP (vals, 0, 1))));
6975 return;
6976 }
6977
6978 /* Use vector load logical element and zero. */
6979 if (TARGET_VXE && (mode == V4SImode || mode == V4SFmode))
6980 {
6981 bool found = true;
6982
6983 x = XVECEXP (vals, 0, 0);
6984 if (memory_operand (x, inner_mode))
6985 {
6986 for (i = 1; i < n_elts; ++i)
6987 found = found && XVECEXP (vals, 0, i) == const0_rtx;
6988
6989 if (found)
6990 {
6991 machine_mode half_mode = (inner_mode == SFmode
6992 ? V2SFmode : V2SImode);
6993 emit_insn (gen_rtx_SET (target,
6994 gen_rtx_VEC_CONCAT (mode,
6995 gen_rtx_VEC_CONCAT (half_mode,
6996 x,
6997 const0_rtx),
6998 gen_rtx_VEC_CONCAT (half_mode,
6999 const0_rtx,
7000 const0_rtx))));
7001 return;
7002 }
7003 }
7004 }
7005
7006 /* We are about to set the vector elements one by one. Zero out the
7007 full register first in order to help the data flow framework to
7008 detect it as full VR set. */
7009 emit_insn (gen_rtx_SET (target, CONST0_RTX (mode)));
7010
7011 /* Unfortunately the vec_init expander is not allowed to fail. So
7012 we have to implement the fallback ourselves. */
7013 for (i = 0; i < n_elts; i++)
7014 {
7015 rtx elem = XVECEXP (vals, 0, i);
7016 if (!general_operand (elem, GET_MODE (elem)))
7017 elem = force_reg (inner_mode, elem);
7018
7019 emit_insn (gen_rtx_SET (target,
7020 gen_rtx_UNSPEC (mode,
7021 gen_rtvec (3, elem,
7022 GEN_INT (i), target),
7023 UNSPEC_VEC_SET)));
7024 }
7025 }
7026
7027 /* Return a parallel of constant integers to be used as permutation
7028 vector for a vector merge operation in MODE. If HIGH_P is true the
7029 left-most elements of the source vectors are merged otherwise the
7030 right-most elements. */
7031 rtx
7032 s390_expand_merge_perm_const (machine_mode mode, bool high_p)
7033 {
7034 int nelts = GET_MODE_NUNITS (mode);
7035 rtx perm[16];
7036 int addend = high_p ? 0 : nelts;
7037
7038 for (int i = 0; i < nelts; i++)
7039 perm[i] = GEN_INT ((i + addend) / 2 + (i % 2) * nelts);
7040
7041 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelts, perm));
7042 }
7043
7044 /* Emit RTL to implement a vector merge operation of SRC1 and SRC2
7045 which creates the result in TARGET. HIGH_P determines whether a
7046 merge hi or lo will be generated. */
7047 void
7048 s390_expand_merge (rtx target, rtx src1, rtx src2, bool high_p)
7049 {
7050 machine_mode mode = GET_MODE (target);
7051 opt_machine_mode opt_mode_2x = mode_for_vector (GET_MODE_INNER (mode),
7052 2 * GET_MODE_NUNITS (mode));
7053 gcc_assert (opt_mode_2x.exists ());
7054 machine_mode mode_double_nelts = opt_mode_2x.require ();
7055 rtx constv = s390_expand_merge_perm_const (mode, high_p);
7056 src1 = force_reg (GET_MODE (src1), src1);
7057 src2 = force_reg (GET_MODE (src2), src2);
7058 rtx x = gen_rtx_VEC_CONCAT (mode_double_nelts, src1, src2);
7059 x = gen_rtx_VEC_SELECT (mode, x, constv);
7060 emit_insn (gen_rtx_SET (target, x));
7061 }
7062
7063 /* Emit a vector constant that contains 1s in each element's sign bit position
7064 and 0s in other positions. MODE is the desired constant's mode. */
7065 extern rtx
7066 s390_build_signbit_mask (machine_mode mode)
7067 {
7068 if (mode == TFmode && TARGET_VXE)
7069 {
7070 wide_int mask_val = wi::set_bit_in_zero (127, 128);
7071 rtx mask = immed_wide_int_const (mask_val, TImode);
7072 return gen_lowpart (TFmode, mask);
7073 }
7074
7075 /* Generate the integral element mask value. */
7076 machine_mode inner_mode = GET_MODE_INNER (mode);
7077 int inner_bitsize = GET_MODE_BITSIZE (inner_mode);
7078 wide_int mask_val = wi::set_bit_in_zero (inner_bitsize - 1, inner_bitsize);
7079
7080 /* Emit the element mask rtx. Use gen_lowpart in order to cast the integral
7081 value to the desired mode. */
7082 machine_mode int_mode = related_int_vector_mode (mode).require ();
7083 rtx mask = immed_wide_int_const (mask_val, GET_MODE_INNER (int_mode));
7084 mask = gen_lowpart (inner_mode, mask);
7085
7086 /* Emit the vector mask rtx by mode the element mask rtx. */
7087 int nunits = GET_MODE_NUNITS (mode);
7088 rtvec v = rtvec_alloc (nunits);
7089 for (int i = 0; i < nunits; i++)
7090 RTVEC_ELT (v, i) = mask;
7091 return gen_rtx_CONST_VECTOR (mode, v);
7092 }
7093
7094 /* Structure to hold the initial parameters for a compare_and_swap operation
7095 in HImode and QImode. */
7096
7097 struct alignment_context
7098 {
7099 rtx memsi; /* SI aligned memory location. */
7100 rtx shift; /* Bit offset with regard to lsb. */
7101 rtx modemask; /* Mask of the HQImode shifted by SHIFT bits. */
7102 rtx modemaski; /* ~modemask */
7103 bool aligned; /* True if memory is aligned, false else. */
7104 };
7105
7106 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic to initialize
7107 structure AC for transparent simplifying, if the memory alignment is known
7108 to be at least 32bit. MEM is the memory location for the actual operation
7109 and MODE its mode. */
7110
7111 static void
7112 init_alignment_context (struct alignment_context *ac, rtx mem,
7113 machine_mode mode)
7114 {
7115 ac->shift = GEN_INT (GET_MODE_SIZE (SImode) - GET_MODE_SIZE (mode));
7116 ac->aligned = (MEM_ALIGN (mem) >= GET_MODE_BITSIZE (SImode));
7117
7118 if (ac->aligned)
7119 ac->memsi = adjust_address (mem, SImode, 0); /* Memory is aligned. */
7120 else
7121 {
7122 /* Alignment is unknown. */
7123 rtx byteoffset, addr, align;
7124
7125 /* Force the address into a register. */
7126 addr = force_reg (Pmode, XEXP (mem, 0));
7127
7128 /* Align it to SImode. */
7129 align = expand_simple_binop (Pmode, AND, addr,
7130 GEN_INT (-GET_MODE_SIZE (SImode)),
7131 NULL_RTX, 1, OPTAB_DIRECT);
7132 /* Generate MEM. */
7133 ac->memsi = gen_rtx_MEM (SImode, align);
7134 MEM_VOLATILE_P (ac->memsi) = MEM_VOLATILE_P (mem);
7135 set_mem_alias_set (ac->memsi, ALIAS_SET_MEMORY_BARRIER);
7136 set_mem_align (ac->memsi, GET_MODE_BITSIZE (SImode));
7137
7138 /* Calculate shiftcount. */
7139 byteoffset = expand_simple_binop (Pmode, AND, addr,
7140 GEN_INT (GET_MODE_SIZE (SImode) - 1),
7141 NULL_RTX, 1, OPTAB_DIRECT);
7142 /* As we already have some offset, evaluate the remaining distance. */
7143 ac->shift = expand_simple_binop (SImode, MINUS, ac->shift, byteoffset,
7144 NULL_RTX, 1, OPTAB_DIRECT);
7145 }
7146
7147 /* Shift is the byte count, but we need the bitcount. */
7148 ac->shift = expand_simple_binop (SImode, ASHIFT, ac->shift, GEN_INT (3),
7149 NULL_RTX, 1, OPTAB_DIRECT);
7150
7151 /* Calculate masks. */
7152 ac->modemask = expand_simple_binop (SImode, ASHIFT,
7153 GEN_INT (GET_MODE_MASK (mode)),
7154 ac->shift, NULL_RTX, 1, OPTAB_DIRECT);
7155 ac->modemaski = expand_simple_unop (SImode, NOT, ac->modemask,
7156 NULL_RTX, 1);
7157 }
7158
7159 /* A subroutine of s390_expand_cs_hqi. Insert INS into VAL. If possible,
7160 use a single insv insn into SEQ2. Otherwise, put prep insns in SEQ1 and
7161 perform the merge in SEQ2. */
7162
7163 static rtx
7164 s390_two_part_insv (struct alignment_context *ac, rtx *seq1, rtx *seq2,
7165 machine_mode mode, rtx val, rtx ins)
7166 {
7167 rtx tmp;
7168
7169 if (ac->aligned)
7170 {
7171 start_sequence ();
7172 tmp = copy_to_mode_reg (SImode, val);
7173 if (s390_expand_insv (tmp, GEN_INT (GET_MODE_BITSIZE (mode)),
7174 const0_rtx, ins))
7175 {
7176 *seq1 = NULL;
7177 *seq2 = get_insns ();
7178 end_sequence ();
7179 return tmp;
7180 }
7181 end_sequence ();
7182 }
7183
7184 /* Failed to use insv. Generate a two part shift and mask. */
7185 start_sequence ();
7186 tmp = s390_expand_mask_and_shift (ins, mode, ac->shift);
7187 *seq1 = get_insns ();
7188 end_sequence ();
7189
7190 start_sequence ();
7191 tmp = expand_simple_binop (SImode, IOR, tmp, val, NULL_RTX, 1, OPTAB_DIRECT);
7192 *seq2 = get_insns ();
7193 end_sequence ();
7194
7195 return tmp;
7196 }
7197
7198 /* Expand an atomic compare and swap operation for HImode and QImode. MEM is
7199 the memory location, CMP the old value to compare MEM with and NEW_RTX the
7200 value to set if CMP == MEM. */
7201
7202 static void
7203 s390_expand_cs_hqi (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
7204 rtx cmp, rtx new_rtx, bool is_weak)
7205 {
7206 struct alignment_context ac;
7207 rtx cmpv, newv, val, cc, seq0, seq1, seq2, seq3;
7208 rtx res = gen_reg_rtx (SImode);
7209 rtx_code_label *csloop = NULL, *csend = NULL;
7210
7211 gcc_assert (MEM_P (mem));
7212
7213 init_alignment_context (&ac, mem, mode);
7214
7215 /* Load full word. Subsequent loads are performed by CS. */
7216 val = expand_simple_binop (SImode, AND, ac.memsi, ac.modemaski,
7217 NULL_RTX, 1, OPTAB_DIRECT);
7218
7219 /* Prepare insertions of cmp and new_rtx into the loaded value. When
7220 possible, we try to use insv to make this happen efficiently. If
7221 that fails we'll generate code both inside and outside the loop. */
7222 cmpv = s390_two_part_insv (&ac, &seq0, &seq2, mode, val, cmp);
7223 newv = s390_two_part_insv (&ac, &seq1, &seq3, mode, val, new_rtx);
7224
7225 if (seq0)
7226 emit_insn (seq0);
7227 if (seq1)
7228 emit_insn (seq1);
7229
7230 /* Start CS loop. */
7231 if (!is_weak)
7232 {
7233 /* Begin assuming success. */
7234 emit_move_insn (btarget, const1_rtx);
7235
7236 csloop = gen_label_rtx ();
7237 csend = gen_label_rtx ();
7238 emit_label (csloop);
7239 }
7240
7241 /* val = "<mem>00..0<mem>"
7242 * cmp = "00..0<cmp>00..0"
7243 * new = "00..0<new>00..0"
7244 */
7245
7246 emit_insn (seq2);
7247 emit_insn (seq3);
7248
7249 cc = s390_emit_compare_and_swap (EQ, res, ac.memsi, cmpv, newv, CCZ1mode);
7250 if (is_weak)
7251 emit_insn (gen_cstorecc4 (btarget, cc, XEXP (cc, 0), XEXP (cc, 1)));
7252 else
7253 {
7254 rtx tmp;
7255
7256 /* Jump to end if we're done (likely?). */
7257 s390_emit_jump (csend, cc);
7258
7259 /* Check for changes outside mode, and loop internal if so.
7260 Arrange the moves so that the compare is adjacent to the
7261 branch so that we can generate CRJ. */
7262 tmp = copy_to_reg (val);
7263 force_expand_binop (SImode, and_optab, res, ac.modemaski, val,
7264 1, OPTAB_DIRECT);
7265 cc = s390_emit_compare (NE, val, tmp);
7266 s390_emit_jump (csloop, cc);
7267
7268 /* Failed. */
7269 emit_move_insn (btarget, const0_rtx);
7270 emit_label (csend);
7271 }
7272
7273 /* Return the correct part of the bitfield. */
7274 convert_move (vtarget, expand_simple_binop (SImode, LSHIFTRT, res, ac.shift,
7275 NULL_RTX, 1, OPTAB_DIRECT), 1);
7276 }
7277
7278 /* Variant of s390_expand_cs for SI, DI and TI modes. */
7279 static void
7280 s390_expand_cs_tdsi (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
7281 rtx cmp, rtx new_rtx, bool is_weak)
7282 {
7283 rtx output = vtarget;
7284 rtx_code_label *skip_cs_label = NULL;
7285 bool do_const_opt = false;
7286
7287 if (!register_operand (output, mode))
7288 output = gen_reg_rtx (mode);
7289
7290 /* If IS_WEAK is true and the INPUT value is a constant, compare the memory
7291 with the constant first and skip the compare_and_swap because its very
7292 expensive and likely to fail anyway.
7293 Note 1: This is done only for IS_WEAK. C11 allows optimizations that may
7294 cause spurious in that case.
7295 Note 2: It may be useful to do this also for non-constant INPUT.
7296 Note 3: Currently only targets with "load on condition" are supported
7297 (z196 and newer). */
7298
7299 if (TARGET_Z196
7300 && (mode == SImode || mode == DImode))
7301 do_const_opt = (is_weak && CONST_INT_P (cmp));
7302
7303 if (do_const_opt)
7304 {
7305 rtx cc = gen_rtx_REG (CCZmode, CC_REGNUM);
7306
7307 skip_cs_label = gen_label_rtx ();
7308 emit_move_insn (btarget, const0_rtx);
7309 if (CONST_INT_P (cmp) && INTVAL (cmp) == 0)
7310 {
7311 rtvec lt = rtvec_alloc (2);
7312
7313 /* Load-and-test + conditional jump. */
7314 RTVEC_ELT (lt, 0)
7315 = gen_rtx_SET (cc, gen_rtx_COMPARE (CCZmode, mem, cmp));
7316 RTVEC_ELT (lt, 1) = gen_rtx_SET (output, mem);
7317 emit_insn (gen_rtx_PARALLEL (VOIDmode, lt));
7318 }
7319 else
7320 {
7321 emit_move_insn (output, mem);
7322 emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (CCZmode, output, cmp)));
7323 }
7324 s390_emit_jump (skip_cs_label, gen_rtx_NE (VOIDmode, cc, const0_rtx));
7325 add_reg_br_prob_note (get_last_insn (),
7326 profile_probability::very_unlikely ());
7327 /* If the jump is not taken, OUTPUT is the expected value. */
7328 cmp = output;
7329 /* Reload newval to a register manually, *after* the compare and jump
7330 above. Otherwise Reload might place it before the jump. */
7331 }
7332 else
7333 cmp = force_reg (mode, cmp);
7334 new_rtx = force_reg (mode, new_rtx);
7335 s390_emit_compare_and_swap (EQ, output, mem, cmp, new_rtx,
7336 (do_const_opt) ? CCZmode : CCZ1mode);
7337 if (skip_cs_label != NULL)
7338 emit_label (skip_cs_label);
7339
7340 /* We deliberately accept non-register operands in the predicate
7341 to ensure the write back to the output operand happens *before*
7342 the store-flags code below. This makes it easier for combine
7343 to merge the store-flags code with a potential test-and-branch
7344 pattern following (immediately!) afterwards. */
7345 if (output != vtarget)
7346 emit_move_insn (vtarget, output);
7347
7348 if (do_const_opt)
7349 {
7350 rtx cc, cond, ite;
7351
7352 /* Do not use gen_cstorecc4 here because it writes either 1 or 0, but
7353 btarget has already been initialized with 0 above. */
7354 cc = gen_rtx_REG (CCZmode, CC_REGNUM);
7355 cond = gen_rtx_EQ (VOIDmode, cc, const0_rtx);
7356 ite = gen_rtx_IF_THEN_ELSE (SImode, cond, const1_rtx, btarget);
7357 emit_insn (gen_rtx_SET (btarget, ite));
7358 }
7359 else
7360 {
7361 rtx cc, cond;
7362
7363 cc = gen_rtx_REG (CCZ1mode, CC_REGNUM);
7364 cond = gen_rtx_EQ (SImode, cc, const0_rtx);
7365 emit_insn (gen_cstorecc4 (btarget, cond, cc, const0_rtx));
7366 }
7367 }
7368
7369 /* Expand an atomic compare and swap operation. MEM is the memory location,
7370 CMP the old value to compare MEM with and NEW_RTX the value to set if
7371 CMP == MEM. */
7372
7373 void
7374 s390_expand_cs (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
7375 rtx cmp, rtx new_rtx, bool is_weak)
7376 {
7377 switch (mode)
7378 {
7379 case E_TImode:
7380 case E_DImode:
7381 case E_SImode:
7382 s390_expand_cs_tdsi (mode, btarget, vtarget, mem, cmp, new_rtx, is_weak);
7383 break;
7384 case E_HImode:
7385 case E_QImode:
7386 s390_expand_cs_hqi (mode, btarget, vtarget, mem, cmp, new_rtx, is_weak);
7387 break;
7388 default:
7389 gcc_unreachable ();
7390 }
7391 }
7392
7393 /* Expand an atomic_exchange operation simulated with a compare-and-swap loop.
7394 The memory location MEM is set to INPUT. OUTPUT is set to the previous value
7395 of MEM. */
7396
7397 void
7398 s390_expand_atomic_exchange_tdsi (rtx output, rtx mem, rtx input)
7399 {
7400 machine_mode mode = GET_MODE (mem);
7401 rtx_code_label *csloop;
7402
7403 if (TARGET_Z196
7404 && (mode == DImode || mode == SImode)
7405 && CONST_INT_P (input) && INTVAL (input) == 0)
7406 {
7407 emit_move_insn (output, const0_rtx);
7408 if (mode == DImode)
7409 emit_insn (gen_atomic_fetch_anddi (output, mem, const0_rtx, input));
7410 else
7411 emit_insn (gen_atomic_fetch_andsi (output, mem, const0_rtx, input));
7412 return;
7413 }
7414
7415 input = force_reg (mode, input);
7416 emit_move_insn (output, mem);
7417 csloop = gen_label_rtx ();
7418 emit_label (csloop);
7419 s390_emit_jump (csloop, s390_emit_compare_and_swap (NE, output, mem, output,
7420 input, CCZ1mode));
7421 }
7422
7423 /* Expand an atomic operation CODE of mode MODE. MEM is the memory location
7424 and VAL the value to play with. If AFTER is true then store the value
7425 MEM holds after the operation, if AFTER is false then store the value MEM
7426 holds before the operation. If TARGET is zero then discard that value, else
7427 store it to TARGET. */
7428
7429 void
7430 s390_expand_atomic (machine_mode mode, enum rtx_code code,
7431 rtx target, rtx mem, rtx val, bool after)
7432 {
7433 struct alignment_context ac;
7434 rtx cmp;
7435 rtx new_rtx = gen_reg_rtx (SImode);
7436 rtx orig = gen_reg_rtx (SImode);
7437 rtx_code_label *csloop = gen_label_rtx ();
7438
7439 gcc_assert (!target || register_operand (target, VOIDmode));
7440 gcc_assert (MEM_P (mem));
7441
7442 init_alignment_context (&ac, mem, mode);
7443
7444 /* Shift val to the correct bit positions.
7445 Preserve "icm", but prevent "ex icm". */
7446 if (!(ac.aligned && code == SET && MEM_P (val)))
7447 val = s390_expand_mask_and_shift (val, mode, ac.shift);
7448
7449 /* Further preparation insns. */
7450 if (code == PLUS || code == MINUS)
7451 emit_move_insn (orig, val);
7452 else if (code == MULT || code == AND) /* val = "11..1<val>11..1" */
7453 val = expand_simple_binop (SImode, XOR, val, ac.modemaski,
7454 NULL_RTX, 1, OPTAB_DIRECT);
7455
7456 /* Load full word. Subsequent loads are performed by CS. */
7457 cmp = force_reg (SImode, ac.memsi);
7458
7459 /* Start CS loop. */
7460 emit_label (csloop);
7461 emit_move_insn (new_rtx, cmp);
7462
7463 /* Patch new with val at correct position. */
7464 switch (code)
7465 {
7466 case PLUS:
7467 case MINUS:
7468 val = expand_simple_binop (SImode, code, new_rtx, orig,
7469 NULL_RTX, 1, OPTAB_DIRECT);
7470 val = expand_simple_binop (SImode, AND, val, ac.modemask,
7471 NULL_RTX, 1, OPTAB_DIRECT);
7472 /* FALLTHRU */
7473 case SET:
7474 if (ac.aligned && MEM_P (val))
7475 store_bit_field (new_rtx, GET_MODE_BITSIZE (mode), 0,
7476 0, 0, SImode, val, false);
7477 else
7478 {
7479 new_rtx = expand_simple_binop (SImode, AND, new_rtx, ac.modemaski,
7480 NULL_RTX, 1, OPTAB_DIRECT);
7481 new_rtx = expand_simple_binop (SImode, IOR, new_rtx, val,
7482 NULL_RTX, 1, OPTAB_DIRECT);
7483 }
7484 break;
7485 case AND:
7486 case IOR:
7487 case XOR:
7488 new_rtx = expand_simple_binop (SImode, code, new_rtx, val,
7489 NULL_RTX, 1, OPTAB_DIRECT);
7490 break;
7491 case MULT: /* NAND */
7492 new_rtx = expand_simple_binop (SImode, AND, new_rtx, val,
7493 NULL_RTX, 1, OPTAB_DIRECT);
7494 new_rtx = expand_simple_binop (SImode, XOR, new_rtx, ac.modemask,
7495 NULL_RTX, 1, OPTAB_DIRECT);
7496 break;
7497 default:
7498 gcc_unreachable ();
7499 }
7500
7501 s390_emit_jump (csloop, s390_emit_compare_and_swap (NE, cmp,
7502 ac.memsi, cmp, new_rtx,
7503 CCZ1mode));
7504
7505 /* Return the correct part of the bitfield. */
7506 if (target)
7507 convert_move (target, expand_simple_binop (SImode, LSHIFTRT,
7508 after ? new_rtx : cmp, ac.shift,
7509 NULL_RTX, 1, OPTAB_DIRECT), 1);
7510 }
7511
7512 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7513 We need to emit DTP-relative relocations. */
7514
7515 static void s390_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
7516
7517 static void
7518 s390_output_dwarf_dtprel (FILE *file, int size, rtx x)
7519 {
7520 switch (size)
7521 {
7522 case 4:
7523 fputs ("\t.long\t", file);
7524 break;
7525 case 8:
7526 fputs ("\t.quad\t", file);
7527 break;
7528 default:
7529 gcc_unreachable ();
7530 }
7531 output_addr_const (file, x);
7532 fputs ("@DTPOFF", file);
7533 }
7534
7535 /* Return the proper mode for REGNO being represented in the dwarf
7536 unwind table. */
7537 machine_mode
7538 s390_dwarf_frame_reg_mode (int regno)
7539 {
7540 machine_mode save_mode = default_dwarf_frame_reg_mode (regno);
7541
7542 /* Make sure not to return DImode for any GPR with -m31 -mzarch. */
7543 if (GENERAL_REGNO_P (regno))
7544 save_mode = Pmode;
7545
7546 /* The rightmost 64 bits of vector registers are call-clobbered. */
7547 if (GET_MODE_SIZE (save_mode) > 8)
7548 save_mode = DImode;
7549
7550 return save_mode;
7551 }
7552
7553 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
7554 /* Implement TARGET_MANGLE_TYPE. */
7555
7556 static const char *
7557 s390_mangle_type (const_tree type)
7558 {
7559 type = TYPE_MAIN_VARIANT (type);
7560
7561 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
7562 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
7563 return NULL;
7564
7565 if (type == s390_builtin_types[BT_BV16QI]) return "U6__boolc";
7566 if (type == s390_builtin_types[BT_BV8HI]) return "U6__bools";
7567 if (type == s390_builtin_types[BT_BV4SI]) return "U6__booli";
7568 if (type == s390_builtin_types[BT_BV2DI]) return "U6__booll";
7569
7570 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
7571 && TARGET_LONG_DOUBLE_128)
7572 return "g";
7573
7574 /* For all other types, use normal C++ mangling. */
7575 return NULL;
7576 }
7577 #endif
7578
7579 /* In the name of slightly smaller debug output, and to cater to
7580 general assembler lossage, recognize various UNSPEC sequences
7581 and turn them back into a direct symbol reference. */
7582
7583 static rtx
7584 s390_delegitimize_address (rtx orig_x)
7585 {
7586 rtx x, y;
7587
7588 orig_x = delegitimize_mem_from_attrs (orig_x);
7589 x = orig_x;
7590
7591 /* Extract the symbol ref from:
7592 (plus:SI (reg:SI 12 %r12)
7593 (const:SI (unspec:SI [(symbol_ref/f:SI ("*.LC0"))]
7594 UNSPEC_GOTOFF/PLTOFF)))
7595 and
7596 (plus:SI (reg:SI 12 %r12)
7597 (const:SI (plus:SI (unspec:SI [(symbol_ref:SI ("L"))]
7598 UNSPEC_GOTOFF/PLTOFF)
7599 (const_int 4 [0x4])))) */
7600 if (GET_CODE (x) == PLUS
7601 && REG_P (XEXP (x, 0))
7602 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
7603 && GET_CODE (XEXP (x, 1)) == CONST)
7604 {
7605 HOST_WIDE_INT offset = 0;
7606
7607 /* The const operand. */
7608 y = XEXP (XEXP (x, 1), 0);
7609
7610 if (GET_CODE (y) == PLUS
7611 && GET_CODE (XEXP (y, 1)) == CONST_INT)
7612 {
7613 offset = INTVAL (XEXP (y, 1));
7614 y = XEXP (y, 0);
7615 }
7616
7617 if (GET_CODE (y) == UNSPEC
7618 && (XINT (y, 1) == UNSPEC_GOTOFF
7619 || XINT (y, 1) == UNSPEC_PLTOFF))
7620 return plus_constant (Pmode, XVECEXP (y, 0, 0), offset);
7621 }
7622
7623 if (GET_CODE (x) != MEM)
7624 return orig_x;
7625
7626 x = XEXP (x, 0);
7627 if (GET_CODE (x) == PLUS
7628 && GET_CODE (XEXP (x, 1)) == CONST
7629 && GET_CODE (XEXP (x, 0)) == REG
7630 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7631 {
7632 y = XEXP (XEXP (x, 1), 0);
7633 if (GET_CODE (y) == UNSPEC
7634 && XINT (y, 1) == UNSPEC_GOT)
7635 y = XVECEXP (y, 0, 0);
7636 else
7637 return orig_x;
7638 }
7639 else if (GET_CODE (x) == CONST)
7640 {
7641 /* Extract the symbol ref from:
7642 (mem:QI (const:DI (unspec:DI [(symbol_ref:DI ("foo"))]
7643 UNSPEC_PLT/GOTENT))) */
7644
7645 y = XEXP (x, 0);
7646 if (GET_CODE (y) == UNSPEC
7647 && (XINT (y, 1) == UNSPEC_GOTENT
7648 || XINT (y, 1) == UNSPEC_PLT31))
7649 y = XVECEXP (y, 0, 0);
7650 else
7651 return orig_x;
7652 }
7653 else
7654 return orig_x;
7655
7656 if (GET_MODE (orig_x) != Pmode)
7657 {
7658 if (GET_MODE (orig_x) == BLKmode)
7659 return orig_x;
7660 y = lowpart_subreg (GET_MODE (orig_x), y, Pmode);
7661 if (y == NULL_RTX)
7662 return orig_x;
7663 }
7664 return y;
7665 }
7666
7667 /* Output operand OP to stdio stream FILE.
7668 OP is an address (register + offset) which is not used to address data;
7669 instead the rightmost bits are interpreted as the value. */
7670
7671 static void
7672 print_addrstyle_operand (FILE *file, rtx op)
7673 {
7674 HOST_WIDE_INT offset;
7675 rtx base;
7676
7677 /* Extract base register and offset. */
7678 if (!s390_decompose_addrstyle_without_index (op, &base, &offset))
7679 gcc_unreachable ();
7680
7681 /* Sanity check. */
7682 if (base)
7683 {
7684 gcc_assert (GET_CODE (base) == REG);
7685 gcc_assert (REGNO (base) < FIRST_PSEUDO_REGISTER);
7686 gcc_assert (REGNO_REG_CLASS (REGNO (base)) == ADDR_REGS);
7687 }
7688
7689 /* Offsets are constricted to twelve bits. */
7690 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset & ((1 << 12) - 1));
7691 if (base)
7692 fprintf (file, "(%s)", reg_names[REGNO (base)]);
7693 }
7694
7695 /* Print the shift count operand OP to FILE.
7696 OP is an address-style operand in a form which
7697 s390_valid_shift_count permits. Subregs and no-op
7698 and-masking of the operand are stripped. */
7699
7700 static void
7701 print_shift_count_operand (FILE *file, rtx op)
7702 {
7703 /* No checking of the and mask required here. */
7704 if (!s390_valid_shift_count (op, 0))
7705 gcc_unreachable ();
7706
7707 while (op && GET_CODE (op) == SUBREG)
7708 op = SUBREG_REG (op);
7709
7710 if (GET_CODE (op) == AND)
7711 op = XEXP (op, 0);
7712
7713 print_addrstyle_operand (file, op);
7714 }
7715
7716 /* Assigns the number of NOP halfwords to be emitted before and after the
7717 function label to *HW_BEFORE and *HW_AFTER. Both pointers must not be NULL.
7718 If hotpatching is disabled for the function, the values are set to zero.
7719 */
7720
7721 static void
7722 s390_function_num_hotpatch_hw (tree decl,
7723 int *hw_before,
7724 int *hw_after)
7725 {
7726 tree attr;
7727
7728 attr = lookup_attribute ("hotpatch", DECL_ATTRIBUTES (decl));
7729
7730 /* Handle the arguments of the hotpatch attribute. The values
7731 specified via attribute might override the cmdline argument
7732 values. */
7733 if (attr)
7734 {
7735 tree args = TREE_VALUE (attr);
7736
7737 *hw_before = TREE_INT_CST_LOW (TREE_VALUE (args));
7738 *hw_after = TREE_INT_CST_LOW (TREE_VALUE (TREE_CHAIN (args)));
7739 }
7740 else
7741 {
7742 /* Use the values specified by the cmdline arguments. */
7743 *hw_before = s390_hotpatch_hw_before_label;
7744 *hw_after = s390_hotpatch_hw_after_label;
7745 }
7746 }
7747
7748 /* Write the current .machine and .machinemode specification to the assembler
7749 file. */
7750
7751 #ifdef HAVE_AS_MACHINE_MACHINEMODE
7752 static void
7753 s390_asm_output_machine_for_arch (FILE *asm_out_file)
7754 {
7755 fprintf (asm_out_file, "\t.machinemode %s\n",
7756 (TARGET_ZARCH) ? "zarch" : "esa");
7757 fprintf (asm_out_file, "\t.machine \"%s",
7758 processor_table[s390_arch].binutils_name);
7759 if (S390_USE_ARCHITECTURE_MODIFIERS)
7760 {
7761 int cpu_flags;
7762
7763 cpu_flags = processor_flags_table[(int) s390_arch];
7764 if (TARGET_HTM && !(cpu_flags & PF_TX))
7765 fprintf (asm_out_file, "+htm");
7766 else if (!TARGET_HTM && (cpu_flags & PF_TX))
7767 fprintf (asm_out_file, "+nohtm");
7768 if (TARGET_VX && !(cpu_flags & PF_VX))
7769 fprintf (asm_out_file, "+vx");
7770 else if (!TARGET_VX && (cpu_flags & PF_VX))
7771 fprintf (asm_out_file, "+novx");
7772 }
7773 fprintf (asm_out_file, "\"\n");
7774 }
7775
7776 /* Write an extra function header before the very start of the function. */
7777
7778 void
7779 s390_asm_output_function_prefix (FILE *asm_out_file,
7780 const char *fnname ATTRIBUTE_UNUSED)
7781 {
7782 if (DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl) == NULL)
7783 return;
7784 /* Since only the function specific options are saved but not the indications
7785 which options are set, it's too much work here to figure out which options
7786 have actually changed. Thus, generate .machine and .machinemode whenever a
7787 function has the target attribute or pragma. */
7788 fprintf (asm_out_file, "\t.machinemode push\n");
7789 fprintf (asm_out_file, "\t.machine push\n");
7790 s390_asm_output_machine_for_arch (asm_out_file);
7791 }
7792
7793 /* Write an extra function footer after the very end of the function. */
7794
7795 void
7796 s390_asm_declare_function_size (FILE *asm_out_file,
7797 const char *fnname, tree decl)
7798 {
7799 if (!flag_inhibit_size_directive)
7800 ASM_OUTPUT_MEASURED_SIZE (asm_out_file, fnname);
7801 if (DECL_FUNCTION_SPECIFIC_TARGET (decl) == NULL)
7802 return;
7803 fprintf (asm_out_file, "\t.machine pop\n");
7804 fprintf (asm_out_file, "\t.machinemode pop\n");
7805 }
7806 #endif
7807
7808 /* Write the extra assembler code needed to declare a function properly. */
7809
7810 void
7811 s390_asm_output_function_label (FILE *out_file, const char *fname,
7812 tree decl)
7813 {
7814 int hw_before, hw_after;
7815
7816 s390_function_num_hotpatch_hw (decl, &hw_before, &hw_after);
7817 if (hw_before > 0)
7818 {
7819 unsigned int function_alignment;
7820 int i;
7821
7822 /* Add a trampoline code area before the function label and initialize it
7823 with two-byte nop instructions. This area can be overwritten with code
7824 that jumps to a patched version of the function. */
7825 asm_fprintf (out_file, "\tnopr\t%%r0"
7826 "\t# pre-label NOPs for hotpatch (%d halfwords)\n",
7827 hw_before);
7828 for (i = 1; i < hw_before; i++)
7829 fputs ("\tnopr\t%r0\n", out_file);
7830
7831 /* Note: The function label must be aligned so that (a) the bytes of the
7832 following nop do not cross a cacheline boundary, and (b) a jump address
7833 (eight bytes for 64 bit targets, 4 bytes for 32 bit targets) can be
7834 stored directly before the label without crossing a cacheline
7835 boundary. All this is necessary to make sure the trampoline code can
7836 be changed atomically.
7837 This alignment is done automatically using the FOUNCTION_BOUNDARY, but
7838 if there are NOPs before the function label, the alignment is placed
7839 before them. So it is necessary to duplicate the alignment after the
7840 NOPs. */
7841 function_alignment = MAX (8, DECL_ALIGN (decl) / BITS_PER_UNIT);
7842 if (! DECL_USER_ALIGN (decl))
7843 function_alignment
7844 = MAX (function_alignment,
7845 (unsigned int) align_functions.levels[0].get_value ());
7846 fputs ("\t# alignment for hotpatch\n", out_file);
7847 ASM_OUTPUT_ALIGN (out_file, align_functions.levels[0].log);
7848 }
7849
7850 if (S390_USE_TARGET_ATTRIBUTE && TARGET_DEBUG_ARG)
7851 {
7852 asm_fprintf (out_file, "\t# fn:%s ar%d\n", fname, s390_arch);
7853 asm_fprintf (out_file, "\t# fn:%s tu%d\n", fname, s390_tune);
7854 asm_fprintf (out_file, "\t# fn:%s sg%d\n", fname, s390_stack_guard);
7855 asm_fprintf (out_file, "\t# fn:%s ss%d\n", fname, s390_stack_size);
7856 asm_fprintf (out_file, "\t# fn:%s bc%d\n", fname, s390_branch_cost);
7857 asm_fprintf (out_file, "\t# fn:%s wf%d\n", fname,
7858 s390_warn_framesize);
7859 asm_fprintf (out_file, "\t# fn:%s ba%d\n", fname, TARGET_BACKCHAIN);
7860 asm_fprintf (out_file, "\t# fn:%s hd%d\n", fname, TARGET_HARD_DFP);
7861 asm_fprintf (out_file, "\t# fn:%s hf%d\n", fname, !TARGET_SOFT_FLOAT);
7862 asm_fprintf (out_file, "\t# fn:%s ht%d\n", fname, TARGET_OPT_HTM);
7863 asm_fprintf (out_file, "\t# fn:%s vx%d\n", fname, TARGET_OPT_VX);
7864 asm_fprintf (out_file, "\t# fn:%s ps%d\n", fname,
7865 TARGET_PACKED_STACK);
7866 asm_fprintf (out_file, "\t# fn:%s se%d\n", fname, TARGET_SMALL_EXEC);
7867 asm_fprintf (out_file, "\t# fn:%s mv%d\n", fname, TARGET_MVCLE);
7868 asm_fprintf (out_file, "\t# fn:%s zv%d\n", fname, TARGET_ZVECTOR);
7869 asm_fprintf (out_file, "\t# fn:%s wd%d\n", fname,
7870 s390_warn_dynamicstack_p);
7871 }
7872 ASM_OUTPUT_LABEL (out_file, fname);
7873 if (hw_after > 0)
7874 asm_fprintf (out_file,
7875 "\t# post-label NOPs for hotpatch (%d halfwords)\n",
7876 hw_after);
7877 }
7878
7879 /* Output machine-dependent UNSPECs occurring in address constant X
7880 in assembler syntax to stdio stream FILE. Returns true if the
7881 constant X could be recognized, false otherwise. */
7882
7883 static bool
7884 s390_output_addr_const_extra (FILE *file, rtx x)
7885 {
7886 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 1)
7887 switch (XINT (x, 1))
7888 {
7889 case UNSPEC_GOTENT:
7890 output_addr_const (file, XVECEXP (x, 0, 0));
7891 fprintf (file, "@GOTENT");
7892 return true;
7893 case UNSPEC_GOT:
7894 output_addr_const (file, XVECEXP (x, 0, 0));
7895 fprintf (file, "@GOT");
7896 return true;
7897 case UNSPEC_GOTOFF:
7898 output_addr_const (file, XVECEXP (x, 0, 0));
7899 fprintf (file, "@GOTOFF");
7900 return true;
7901 case UNSPEC_PLT31:
7902 output_addr_const (file, XVECEXP (x, 0, 0));
7903 fprintf (file, "@PLT");
7904 return true;
7905 case UNSPEC_PLTOFF:
7906 output_addr_const (file, XVECEXP (x, 0, 0));
7907 fprintf (file, "@PLTOFF");
7908 return true;
7909 case UNSPEC_TLSGD:
7910 output_addr_const (file, XVECEXP (x, 0, 0));
7911 fprintf (file, "@TLSGD");
7912 return true;
7913 case UNSPEC_TLSLDM:
7914 assemble_name (file, get_some_local_dynamic_name ());
7915 fprintf (file, "@TLSLDM");
7916 return true;
7917 case UNSPEC_DTPOFF:
7918 output_addr_const (file, XVECEXP (x, 0, 0));
7919 fprintf (file, "@DTPOFF");
7920 return true;
7921 case UNSPEC_NTPOFF:
7922 output_addr_const (file, XVECEXP (x, 0, 0));
7923 fprintf (file, "@NTPOFF");
7924 return true;
7925 case UNSPEC_GOTNTPOFF:
7926 output_addr_const (file, XVECEXP (x, 0, 0));
7927 fprintf (file, "@GOTNTPOFF");
7928 return true;
7929 case UNSPEC_INDNTPOFF:
7930 output_addr_const (file, XVECEXP (x, 0, 0));
7931 fprintf (file, "@INDNTPOFF");
7932 return true;
7933 }
7934
7935 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 2)
7936 switch (XINT (x, 1))
7937 {
7938 case UNSPEC_POOL_OFFSET:
7939 x = gen_rtx_MINUS (GET_MODE (x), XVECEXP (x, 0, 0), XVECEXP (x, 0, 1));
7940 output_addr_const (file, x);
7941 return true;
7942 }
7943 return false;
7944 }
7945
7946 /* Output address operand ADDR in assembler syntax to
7947 stdio stream FILE. */
7948
7949 void
7950 print_operand_address (FILE *file, rtx addr)
7951 {
7952 struct s390_address ad;
7953 memset (&ad, 0, sizeof (s390_address));
7954
7955 if (s390_loadrelative_operand_p (addr, NULL, NULL))
7956 {
7957 if (!TARGET_Z10)
7958 {
7959 output_operand_lossage ("symbolic memory references are "
7960 "only supported on z10 or later");
7961 return;
7962 }
7963 output_addr_const (file, addr);
7964 return;
7965 }
7966
7967 if (!s390_decompose_address (addr, &ad)
7968 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7969 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
7970 output_operand_lossage ("cannot decompose address");
7971
7972 if (ad.disp)
7973 output_addr_const (file, ad.disp);
7974 else
7975 fprintf (file, "0");
7976
7977 if (ad.base && ad.indx)
7978 fprintf (file, "(%s,%s)", reg_names[REGNO (ad.indx)],
7979 reg_names[REGNO (ad.base)]);
7980 else if (ad.base)
7981 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
7982 }
7983
7984 /* Output operand X in assembler syntax to stdio stream FILE.
7985 CODE specified the format flag. The following format flags
7986 are recognized:
7987
7988 'A': On z14 or higher: If operand is a mem print the alignment
7989 hint usable with vl/vst prefixed by a comma.
7990 'C': print opcode suffix for branch condition.
7991 'D': print opcode suffix for inverse branch condition.
7992 'E': print opcode suffix for branch on index instruction.
7993 'G': print the size of the operand in bytes.
7994 'J': print tls_load/tls_gdcall/tls_ldcall suffix
7995 'K': print @PLT suffix for call targets and load address values.
7996 'M': print the second word of a TImode operand.
7997 'N': print the second word of a DImode operand.
7998 'O': print only the displacement of a memory reference or address.
7999 'R': print only the base register of a memory reference or address.
8000 'S': print S-type memory reference (base+displacement).
8001 'Y': print address style operand without index (e.g. shift count or setmem
8002 operand).
8003
8004 'b': print integer X as if it's an unsigned byte.
8005 'c': print integer X as if it's an signed byte.
8006 'e': "end" contiguous bitmask X in either DImode or vector inner mode.
8007 'f': "end" contiguous bitmask X in SImode.
8008 'h': print integer X as if it's a signed halfword.
8009 'i': print the first nonzero HImode part of X.
8010 'j': print the first HImode part unequal to -1 of X.
8011 'k': print the first nonzero SImode part of X.
8012 'm': print the first SImode part unequal to -1 of X.
8013 'o': print integer X as if it's an unsigned 32bit word.
8014 's': "start" of contiguous bitmask X in either DImode or vector inner mode.
8015 't': CONST_INT: "start" of contiguous bitmask X in SImode.
8016 CONST_VECTOR: Generate a bitmask for vgbm instruction.
8017 'x': print integer X as if it's an unsigned halfword.
8018 'v': print register number as vector register (v1 instead of f1).
8019 'V': print the second word of a TFmode operand as vector register.
8020 */
8021
8022 void
8023 print_operand (FILE *file, rtx x, int code)
8024 {
8025 HOST_WIDE_INT ival;
8026
8027 switch (code)
8028 {
8029 case 'A':
8030 if (TARGET_VECTOR_LOADSTORE_ALIGNMENT_HINTS && MEM_P (x))
8031 {
8032 if (MEM_ALIGN (x) >= 128)
8033 fprintf (file, ",4");
8034 else if (MEM_ALIGN (x) == 64)
8035 fprintf (file, ",3");
8036 }
8037 return;
8038 case 'C':
8039 fprintf (file, s390_branch_condition_mnemonic (x, FALSE));
8040 return;
8041
8042 case 'D':
8043 fprintf (file, s390_branch_condition_mnemonic (x, TRUE));
8044 return;
8045
8046 case 'E':
8047 if (GET_CODE (x) == LE)
8048 fprintf (file, "l");
8049 else if (GET_CODE (x) == GT)
8050 fprintf (file, "h");
8051 else
8052 output_operand_lossage ("invalid comparison operator "
8053 "for 'E' output modifier");
8054 return;
8055
8056 case 'J':
8057 if (GET_CODE (x) == SYMBOL_REF)
8058 {
8059 fprintf (file, "%s", ":tls_load:");
8060 output_addr_const (file, x);
8061 }
8062 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
8063 {
8064 fprintf (file, "%s", ":tls_gdcall:");
8065 output_addr_const (file, XVECEXP (x, 0, 0));
8066 }
8067 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM)
8068 {
8069 fprintf (file, "%s", ":tls_ldcall:");
8070 const char *name = get_some_local_dynamic_name ();
8071 gcc_assert (name);
8072 assemble_name (file, name);
8073 }
8074 else
8075 output_operand_lossage ("invalid reference for 'J' output modifier");
8076 return;
8077
8078 case 'G':
8079 fprintf (file, "%u", GET_MODE_SIZE (GET_MODE (x)));
8080 return;
8081
8082 case 'O':
8083 {
8084 struct s390_address ad;
8085 int ret;
8086
8087 ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
8088
8089 if (!ret
8090 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
8091 || ad.indx)
8092 {
8093 output_operand_lossage ("invalid address for 'O' output modifier");
8094 return;
8095 }
8096
8097 if (ad.disp)
8098 output_addr_const (file, ad.disp);
8099 else
8100 fprintf (file, "0");
8101 }
8102 return;
8103
8104 case 'R':
8105 {
8106 struct s390_address ad;
8107 int ret;
8108
8109 ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
8110
8111 if (!ret
8112 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
8113 || ad.indx)
8114 {
8115 output_operand_lossage ("invalid address for 'R' output modifier");
8116 return;
8117 }
8118
8119 if (ad.base)
8120 fprintf (file, "%s", reg_names[REGNO (ad.base)]);
8121 else
8122 fprintf (file, "0");
8123 }
8124 return;
8125
8126 case 'S':
8127 {
8128 struct s390_address ad;
8129 int ret;
8130
8131 if (!MEM_P (x))
8132 {
8133 output_operand_lossage ("memory reference expected for "
8134 "'S' output modifier");
8135 return;
8136 }
8137 ret = s390_decompose_address (XEXP (x, 0), &ad);
8138
8139 if (!ret
8140 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
8141 || ad.indx)
8142 {
8143 output_operand_lossage ("invalid address for 'S' output modifier");
8144 return;
8145 }
8146
8147 if (ad.disp)
8148 output_addr_const (file, ad.disp);
8149 else
8150 fprintf (file, "0");
8151
8152 if (ad.base)
8153 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
8154 }
8155 return;
8156
8157 case 'N':
8158 if (GET_CODE (x) == REG)
8159 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
8160 else if (GET_CODE (x) == MEM)
8161 x = change_address (x, VOIDmode,
8162 plus_constant (Pmode, XEXP (x, 0), 4));
8163 else
8164 output_operand_lossage ("register or memory expression expected "
8165 "for 'N' output modifier");
8166 break;
8167
8168 case 'M':
8169 if (GET_CODE (x) == REG)
8170 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
8171 else if (GET_CODE (x) == MEM)
8172 x = change_address (x, VOIDmode,
8173 plus_constant (Pmode, XEXP (x, 0), 8));
8174 else
8175 output_operand_lossage ("register or memory expression expected "
8176 "for 'M' output modifier");
8177 break;
8178
8179 case 'Y':
8180 print_shift_count_operand (file, x);
8181 return;
8182
8183 case 'K':
8184 /* Append @PLT to both local and non-local symbols in order to support
8185 Linux Kernel livepatching: patches contain individual functions and
8186 are loaded further than 2G away from vmlinux, and therefore they must
8187 call even static functions via PLT. ld will optimize @PLT away for
8188 normal code, and keep it for patches.
8189
8190 Do not indiscriminately add @PLT in 31-bit mode due to the %r12
8191 restriction, use UNSPEC_PLT31 instead.
8192
8193 @PLT only makes sense for functions, data is taken care of by
8194 -mno-pic-data-is-text-relative.
8195
8196 Adding @PLT interferes with handling of weak symbols in non-PIC code,
8197 since their addresses are loaded with larl, which then always produces
8198 a non-NULL result, so skip them here as well. */
8199 if (TARGET_64BIT
8200 && GET_CODE (x) == SYMBOL_REF
8201 && SYMBOL_REF_FUNCTION_P (x)
8202 && !(SYMBOL_REF_WEAK (x) && !flag_pic))
8203 fprintf (file, "@PLT");
8204 return;
8205 }
8206
8207 switch (GET_CODE (x))
8208 {
8209 case REG:
8210 /* Print FP regs as fx instead of vx when they are accessed
8211 through non-vector mode. */
8212 if ((code == 'v' || code == 'V')
8213 || VECTOR_NOFP_REG_P (x)
8214 || (FP_REG_P (x) && VECTOR_MODE_P (GET_MODE (x)))
8215 || (VECTOR_REG_P (x)
8216 && (GET_MODE_SIZE (GET_MODE (x)) /
8217 s390_class_max_nregs (FP_REGS, GET_MODE (x))) > 8))
8218 fprintf (file, "%%v%s", reg_names[REGNO (x) + (code == 'V')] + 2);
8219 else
8220 fprintf (file, "%s", reg_names[REGNO (x)]);
8221 break;
8222
8223 case MEM:
8224 output_address (GET_MODE (x), XEXP (x, 0));
8225 break;
8226
8227 case CONST:
8228 case CODE_LABEL:
8229 case LABEL_REF:
8230 case SYMBOL_REF:
8231 output_addr_const (file, x);
8232 break;
8233
8234 case CONST_INT:
8235 ival = INTVAL (x);
8236 switch (code)
8237 {
8238 case 0:
8239 break;
8240 case 'b':
8241 ival &= 0xff;
8242 break;
8243 case 'c':
8244 ival = ((ival & 0xff) ^ 0x80) - 0x80;
8245 break;
8246 case 'x':
8247 ival &= 0xffff;
8248 break;
8249 case 'h':
8250 ival = ((ival & 0xffff) ^ 0x8000) - 0x8000;
8251 break;
8252 case 'i':
8253 ival = s390_extract_part (x, HImode, 0);
8254 break;
8255 case 'j':
8256 ival = s390_extract_part (x, HImode, -1);
8257 break;
8258 case 'k':
8259 ival = s390_extract_part (x, SImode, 0);
8260 break;
8261 case 'm':
8262 ival = s390_extract_part (x, SImode, -1);
8263 break;
8264 case 'o':
8265 ival &= 0xffffffff;
8266 break;
8267 case 'e': case 'f':
8268 case 's': case 't':
8269 {
8270 int start, end;
8271 int len;
8272 bool ok;
8273
8274 len = (code == 's' || code == 'e' ? 64 : 32);
8275 ok = s390_contiguous_bitmask_p (ival, true, len, &start, &end);
8276 gcc_assert (ok);
8277 if (code == 's' || code == 't')
8278 ival = start;
8279 else
8280 ival = end;
8281 }
8282 break;
8283 default:
8284 output_operand_lossage ("invalid constant for output modifier '%c'", code);
8285 }
8286 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
8287 break;
8288
8289 case CONST_WIDE_INT:
8290 if (code == 'b')
8291 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
8292 CONST_WIDE_INT_ELT (x, 0) & 0xff);
8293 else if (code == 'x')
8294 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
8295 CONST_WIDE_INT_ELT (x, 0) & 0xffff);
8296 else if (code == 'h')
8297 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
8298 ((CONST_WIDE_INT_ELT (x, 0) & 0xffff) ^ 0x8000) - 0x8000);
8299 else
8300 {
8301 if (code == 0)
8302 output_operand_lossage ("invalid constant - try using "
8303 "an output modifier");
8304 else
8305 output_operand_lossage ("invalid constant for output modifier '%c'",
8306 code);
8307 }
8308 break;
8309 case CONST_VECTOR:
8310 switch (code)
8311 {
8312 case 'h':
8313 gcc_assert (const_vec_duplicate_p (x));
8314 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
8315 ((INTVAL (XVECEXP (x, 0, 0)) & 0xffff) ^ 0x8000) - 0x8000);
8316 break;
8317 case 'e':
8318 case 's':
8319 {
8320 int start, end;
8321 bool ok;
8322
8323 ok = s390_contiguous_bitmask_vector_p (x, &start, &end);
8324 gcc_assert (ok);
8325 ival = (code == 's') ? start : end;
8326 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
8327 }
8328 break;
8329 case 't':
8330 {
8331 unsigned mask;
8332 bool ok = s390_bytemask_vector_p (x, &mask);
8333 gcc_assert (ok);
8334 fprintf (file, "%u", mask);
8335 }
8336 break;
8337
8338 default:
8339 output_operand_lossage ("invalid constant vector for output "
8340 "modifier '%c'", code);
8341 }
8342 break;
8343
8344 default:
8345 if (code == 0)
8346 output_operand_lossage ("invalid expression - try using "
8347 "an output modifier");
8348 else
8349 output_operand_lossage ("invalid expression for output "
8350 "modifier '%c'", code);
8351 break;
8352 }
8353 }
8354
8355 /* Target hook for assembling integer objects. We need to define it
8356 here to work a round a bug in some versions of GAS, which couldn't
8357 handle values smaller than INT_MIN when printed in decimal. */
8358
8359 static bool
8360 s390_assemble_integer (rtx x, unsigned int size, int aligned_p)
8361 {
8362 if (size == 8 && aligned_p
8363 && GET_CODE (x) == CONST_INT && INTVAL (x) < INT_MIN)
8364 {
8365 fprintf (asm_out_file, "\t.quad\t" HOST_WIDE_INT_PRINT_HEX "\n",
8366 INTVAL (x));
8367 return true;
8368 }
8369 return default_assemble_integer (x, size, aligned_p);
8370 }
8371
8372 /* Returns true if register REGNO is used for forming
8373 a memory address in expression X. */
8374
8375 static bool
8376 reg_used_in_mem_p (int regno, rtx x)
8377 {
8378 enum rtx_code code = GET_CODE (x);
8379 int i, j;
8380 const char *fmt;
8381
8382 if (code == MEM)
8383 {
8384 if (refers_to_regno_p (regno, XEXP (x, 0)))
8385 return true;
8386 }
8387 else if (code == SET
8388 && GET_CODE (SET_DEST (x)) == PC)
8389 {
8390 if (refers_to_regno_p (regno, SET_SRC (x)))
8391 return true;
8392 }
8393
8394 fmt = GET_RTX_FORMAT (code);
8395 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8396 {
8397 if (fmt[i] == 'e'
8398 && reg_used_in_mem_p (regno, XEXP (x, i)))
8399 return true;
8400
8401 else if (fmt[i] == 'E')
8402 for (j = 0; j < XVECLEN (x, i); j++)
8403 if (reg_used_in_mem_p (regno, XVECEXP (x, i, j)))
8404 return true;
8405 }
8406 return false;
8407 }
8408
8409 /* Returns true if expression DEP_RTX sets an address register
8410 used by instruction INSN to address memory. */
8411
8412 static bool
8413 addr_generation_dependency_p (rtx dep_rtx, rtx_insn *insn)
8414 {
8415 rtx target, pat;
8416
8417 if (NONJUMP_INSN_P (dep_rtx))
8418 dep_rtx = PATTERN (dep_rtx);
8419
8420 if (GET_CODE (dep_rtx) == SET)
8421 {
8422 target = SET_DEST (dep_rtx);
8423 if (GET_CODE (target) == STRICT_LOW_PART)
8424 target = XEXP (target, 0);
8425 while (GET_CODE (target) == SUBREG)
8426 target = SUBREG_REG (target);
8427
8428 if (GET_CODE (target) == REG)
8429 {
8430 int regno = REGNO (target);
8431
8432 if (s390_safe_attr_type (insn) == TYPE_LA)
8433 {
8434 pat = PATTERN (insn);
8435 if (GET_CODE (pat) == PARALLEL)
8436 {
8437 gcc_assert (XVECLEN (pat, 0) == 2);
8438 pat = XVECEXP (pat, 0, 0);
8439 }
8440 gcc_assert (GET_CODE (pat) == SET);
8441 return refers_to_regno_p (regno, SET_SRC (pat));
8442 }
8443 else if (get_attr_atype (insn) == ATYPE_AGEN)
8444 return reg_used_in_mem_p (regno, PATTERN (insn));
8445 }
8446 }
8447 return false;
8448 }
8449
8450 /* Return 1, if dep_insn sets register used in insn in the agen unit. */
8451
8452 int
8453 s390_agen_dep_p (rtx_insn *dep_insn, rtx_insn *insn)
8454 {
8455 rtx dep_rtx = PATTERN (dep_insn);
8456 int i;
8457
8458 if (GET_CODE (dep_rtx) == SET
8459 && addr_generation_dependency_p (dep_rtx, insn))
8460 return 1;
8461 else if (GET_CODE (dep_rtx) == PARALLEL)
8462 {
8463 for (i = 0; i < XVECLEN (dep_rtx, 0); i++)
8464 {
8465 if (addr_generation_dependency_p (XVECEXP (dep_rtx, 0, i), insn))
8466 return 1;
8467 }
8468 }
8469 return 0;
8470 }
8471
8472
8473 /* A C statement (sans semicolon) to update the integer scheduling priority
8474 INSN_PRIORITY (INSN). Increase the priority to execute the INSN earlier,
8475 reduce the priority to execute INSN later. Do not define this macro if
8476 you do not need to adjust the scheduling priorities of insns.
8477
8478 A STD instruction should be scheduled earlier,
8479 in order to use the bypass. */
8480 static int
8481 s390_adjust_priority (rtx_insn *insn, int priority)
8482 {
8483 if (! INSN_P (insn))
8484 return priority;
8485
8486 if (s390_tune <= PROCESSOR_2064_Z900)
8487 return priority;
8488
8489 switch (s390_safe_attr_type (insn))
8490 {
8491 case TYPE_FSTOREDF:
8492 case TYPE_FSTORESF:
8493 priority = priority << 3;
8494 break;
8495 case TYPE_STORE:
8496 case TYPE_STM:
8497 priority = priority << 1;
8498 break;
8499 default:
8500 break;
8501 }
8502 return priority;
8503 }
8504
8505
8506 /* The number of instructions that can be issued per cycle. */
8507
8508 static int
8509 s390_issue_rate (void)
8510 {
8511 switch (s390_tune)
8512 {
8513 case PROCESSOR_2084_Z990:
8514 case PROCESSOR_2094_Z9_109:
8515 case PROCESSOR_2094_Z9_EC:
8516 case PROCESSOR_2817_Z196:
8517 return 3;
8518 case PROCESSOR_2097_Z10:
8519 return 2;
8520 case PROCESSOR_2064_Z900:
8521 /* Starting with EC12 we use the sched_reorder hook to take care
8522 of instruction dispatch constraints. The algorithm only
8523 picks the best instruction and assumes only a single
8524 instruction gets issued per cycle. */
8525 case PROCESSOR_2827_ZEC12:
8526 case PROCESSOR_2964_Z13:
8527 case PROCESSOR_3906_Z14:
8528 case PROCESSOR_ARCH14:
8529 default:
8530 return 1;
8531 }
8532 }
8533
8534 static int
8535 s390_first_cycle_multipass_dfa_lookahead (void)
8536 {
8537 return 4;
8538 }
8539
8540 static void
8541 annotate_constant_pool_refs_1 (rtx *x)
8542 {
8543 int i, j;
8544 const char *fmt;
8545
8546 gcc_assert (GET_CODE (*x) != SYMBOL_REF
8547 || !CONSTANT_POOL_ADDRESS_P (*x));
8548
8549 /* Literal pool references can only occur inside a MEM ... */
8550 if (GET_CODE (*x) == MEM)
8551 {
8552 rtx memref = XEXP (*x, 0);
8553
8554 if (GET_CODE (memref) == SYMBOL_REF
8555 && CONSTANT_POOL_ADDRESS_P (memref))
8556 {
8557 rtx base = cfun->machine->base_reg;
8558 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, memref, base),
8559 UNSPEC_LTREF);
8560
8561 *x = replace_equiv_address (*x, addr);
8562 return;
8563 }
8564
8565 if (GET_CODE (memref) == CONST
8566 && GET_CODE (XEXP (memref, 0)) == PLUS
8567 && GET_CODE (XEXP (XEXP (memref, 0), 1)) == CONST_INT
8568 && GET_CODE (XEXP (XEXP (memref, 0), 0)) == SYMBOL_REF
8569 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (memref, 0), 0)))
8570 {
8571 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (memref, 0), 1));
8572 rtx sym = XEXP (XEXP (memref, 0), 0);
8573 rtx base = cfun->machine->base_reg;
8574 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
8575 UNSPEC_LTREF);
8576
8577 *x = replace_equiv_address (*x, plus_constant (Pmode, addr, off));
8578 return;
8579 }
8580 }
8581
8582 /* ... or a load-address type pattern. */
8583 if (GET_CODE (*x) == SET)
8584 {
8585 rtx addrref = SET_SRC (*x);
8586
8587 if (GET_CODE (addrref) == SYMBOL_REF
8588 && CONSTANT_POOL_ADDRESS_P (addrref))
8589 {
8590 rtx base = cfun->machine->base_reg;
8591 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addrref, base),
8592 UNSPEC_LTREF);
8593
8594 SET_SRC (*x) = addr;
8595 return;
8596 }
8597
8598 if (GET_CODE (addrref) == CONST
8599 && GET_CODE (XEXP (addrref, 0)) == PLUS
8600 && GET_CODE (XEXP (XEXP (addrref, 0), 1)) == CONST_INT
8601 && GET_CODE (XEXP (XEXP (addrref, 0), 0)) == SYMBOL_REF
8602 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (addrref, 0), 0)))
8603 {
8604 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (addrref, 0), 1));
8605 rtx sym = XEXP (XEXP (addrref, 0), 0);
8606 rtx base = cfun->machine->base_reg;
8607 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
8608 UNSPEC_LTREF);
8609
8610 SET_SRC (*x) = plus_constant (Pmode, addr, off);
8611 return;
8612 }
8613 }
8614
8615 fmt = GET_RTX_FORMAT (GET_CODE (*x));
8616 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
8617 {
8618 if (fmt[i] == 'e')
8619 {
8620 annotate_constant_pool_refs_1 (&XEXP (*x, i));
8621 }
8622 else if (fmt[i] == 'E')
8623 {
8624 for (j = 0; j < XVECLEN (*x, i); j++)
8625 annotate_constant_pool_refs_1 (&XVECEXP (*x, i, j));
8626 }
8627 }
8628 }
8629
8630 /* Annotate every literal pool reference in INSN by an UNSPEC_LTREF expression.
8631 Fix up MEMs as required.
8632 Skip insns which support relative addressing, because they do not use a base
8633 register. */
8634
8635 static void
8636 annotate_constant_pool_refs (rtx_insn *insn)
8637 {
8638 if (s390_safe_relative_long_p (insn))
8639 return;
8640 annotate_constant_pool_refs_1 (&PATTERN (insn));
8641 }
8642
8643 static void
8644 find_constant_pool_ref_1 (rtx x, rtx *ref)
8645 {
8646 int i, j;
8647 const char *fmt;
8648
8649 /* Likewise POOL_ENTRY insns. */
8650 if (GET_CODE (x) == UNSPEC_VOLATILE
8651 && XINT (x, 1) == UNSPECV_POOL_ENTRY)
8652 return;
8653
8654 gcc_assert (GET_CODE (x) != SYMBOL_REF
8655 || !CONSTANT_POOL_ADDRESS_P (x));
8656
8657 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_LTREF)
8658 {
8659 rtx sym = XVECEXP (x, 0, 0);
8660 gcc_assert (GET_CODE (sym) == SYMBOL_REF
8661 && CONSTANT_POOL_ADDRESS_P (sym));
8662
8663 if (*ref == NULL_RTX)
8664 *ref = sym;
8665 else
8666 gcc_assert (*ref == sym);
8667
8668 return;
8669 }
8670
8671 fmt = GET_RTX_FORMAT (GET_CODE (x));
8672 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8673 {
8674 if (fmt[i] == 'e')
8675 {
8676 find_constant_pool_ref_1 (XEXP (x, i), ref);
8677 }
8678 else if (fmt[i] == 'E')
8679 {
8680 for (j = 0; j < XVECLEN (x, i); j++)
8681 find_constant_pool_ref_1 (XVECEXP (x, i, j), ref);
8682 }
8683 }
8684 }
8685
8686 /* Find an annotated literal pool symbol referenced in INSN,
8687 and store it at REF. Will abort if INSN contains references to
8688 more than one such pool symbol; multiple references to the same
8689 symbol are allowed, however.
8690
8691 The rtx pointed to by REF must be initialized to NULL_RTX
8692 by the caller before calling this routine.
8693
8694 Skip insns which support relative addressing, because they do not use a base
8695 register. */
8696
8697 static void
8698 find_constant_pool_ref (rtx_insn *insn, rtx *ref)
8699 {
8700 if (s390_safe_relative_long_p (insn))
8701 return;
8702 find_constant_pool_ref_1 (PATTERN (insn), ref);
8703 }
8704
8705 static void
8706 replace_constant_pool_ref_1 (rtx *x, rtx ref, rtx offset)
8707 {
8708 int i, j;
8709 const char *fmt;
8710
8711 gcc_assert (*x != ref);
8712
8713 if (GET_CODE (*x) == UNSPEC
8714 && XINT (*x, 1) == UNSPEC_LTREF
8715 && XVECEXP (*x, 0, 0) == ref)
8716 {
8717 *x = gen_rtx_PLUS (Pmode, XVECEXP (*x, 0, 1), offset);
8718 return;
8719 }
8720
8721 if (GET_CODE (*x) == PLUS
8722 && GET_CODE (XEXP (*x, 1)) == CONST_INT
8723 && GET_CODE (XEXP (*x, 0)) == UNSPEC
8724 && XINT (XEXP (*x, 0), 1) == UNSPEC_LTREF
8725 && XVECEXP (XEXP (*x, 0), 0, 0) == ref)
8726 {
8727 rtx addr = gen_rtx_PLUS (Pmode, XVECEXP (XEXP (*x, 0), 0, 1), offset);
8728 *x = plus_constant (Pmode, addr, INTVAL (XEXP (*x, 1)));
8729 return;
8730 }
8731
8732 fmt = GET_RTX_FORMAT (GET_CODE (*x));
8733 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
8734 {
8735 if (fmt[i] == 'e')
8736 {
8737 replace_constant_pool_ref_1 (&XEXP (*x, i), ref, offset);
8738 }
8739 else if (fmt[i] == 'E')
8740 {
8741 for (j = 0; j < XVECLEN (*x, i); j++)
8742 replace_constant_pool_ref_1 (&XVECEXP (*x, i, j), ref, offset);
8743 }
8744 }
8745 }
8746
8747 /* Replace every reference to the annotated literal pool
8748 symbol REF in INSN by its base plus OFFSET.
8749 Skip insns which support relative addressing, because they do not use a base
8750 register. */
8751
8752 static void
8753 replace_constant_pool_ref (rtx_insn *insn, rtx ref, rtx offset)
8754 {
8755 if (s390_safe_relative_long_p (insn))
8756 return;
8757 replace_constant_pool_ref_1 (&PATTERN (insn), ref, offset);
8758 }
8759
8760 /* We keep a list of constants which we have to add to internal
8761 constant tables in the middle of large functions. */
8762
8763 static machine_mode constant_modes[] =
8764 {
8765 TFmode, FPRX2mode, TImode, TDmode,
8766 V16QImode, V8HImode, V4SImode, V2DImode, V1TImode,
8767 V4SFmode, V2DFmode, V1TFmode,
8768 DFmode, DImode, DDmode,
8769 V8QImode, V4HImode, V2SImode, V1DImode, V2SFmode, V1DFmode,
8770 SFmode, SImode, SDmode,
8771 V4QImode, V2HImode, V1SImode, V1SFmode,
8772 HImode,
8773 V2QImode, V1HImode,
8774 QImode,
8775 V1QImode
8776 };
8777 #define NR_C_MODES (sizeof (constant_modes) / sizeof (constant_modes[0]))
8778
8779 struct constant
8780 {
8781 struct constant *next;
8782 rtx value;
8783 rtx_code_label *label;
8784 };
8785
8786 struct constant_pool
8787 {
8788 struct constant_pool *next;
8789 rtx_insn *first_insn;
8790 rtx_insn *pool_insn;
8791 bitmap insns;
8792 rtx_insn *emit_pool_after;
8793
8794 struct constant *constants[NR_C_MODES];
8795 struct constant *execute;
8796 rtx_code_label *label;
8797 int size;
8798 };
8799
8800 /* Allocate new constant_pool structure. */
8801
8802 static struct constant_pool *
8803 s390_alloc_pool (void)
8804 {
8805 struct constant_pool *pool;
8806 size_t i;
8807
8808 pool = (struct constant_pool *) xmalloc (sizeof *pool);
8809 pool->next = NULL;
8810 for (i = 0; i < NR_C_MODES; i++)
8811 pool->constants[i] = NULL;
8812
8813 pool->execute = NULL;
8814 pool->label = gen_label_rtx ();
8815 pool->first_insn = NULL;
8816 pool->pool_insn = NULL;
8817 pool->insns = BITMAP_ALLOC (NULL);
8818 pool->size = 0;
8819 pool->emit_pool_after = NULL;
8820
8821 return pool;
8822 }
8823
8824 /* Create new constant pool covering instructions starting at INSN
8825 and chain it to the end of POOL_LIST. */
8826
8827 static struct constant_pool *
8828 s390_start_pool (struct constant_pool **pool_list, rtx_insn *insn)
8829 {
8830 struct constant_pool *pool, **prev;
8831
8832 pool = s390_alloc_pool ();
8833 pool->first_insn = insn;
8834
8835 for (prev = pool_list; *prev; prev = &(*prev)->next)
8836 ;
8837 *prev = pool;
8838
8839 return pool;
8840 }
8841
8842 /* End range of instructions covered by POOL at INSN and emit
8843 placeholder insn representing the pool. */
8844
8845 static void
8846 s390_end_pool (struct constant_pool *pool, rtx_insn *insn)
8847 {
8848 rtx pool_size = GEN_INT (pool->size + 8 /* alignment slop */);
8849
8850 if (!insn)
8851 insn = get_last_insn ();
8852
8853 pool->pool_insn = emit_insn_after (gen_pool (pool_size), insn);
8854 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8855 }
8856
8857 /* Add INSN to the list of insns covered by POOL. */
8858
8859 static void
8860 s390_add_pool_insn (struct constant_pool *pool, rtx insn)
8861 {
8862 bitmap_set_bit (pool->insns, INSN_UID (insn));
8863 }
8864
8865 /* Return pool out of POOL_LIST that covers INSN. */
8866
8867 static struct constant_pool *
8868 s390_find_pool (struct constant_pool *pool_list, rtx insn)
8869 {
8870 struct constant_pool *pool;
8871
8872 for (pool = pool_list; pool; pool = pool->next)
8873 if (bitmap_bit_p (pool->insns, INSN_UID (insn)))
8874 break;
8875
8876 return pool;
8877 }
8878
8879 /* Add constant VAL of mode MODE to the constant pool POOL. */
8880
8881 static void
8882 s390_add_constant (struct constant_pool *pool, rtx val, machine_mode mode)
8883 {
8884 struct constant *c;
8885 size_t i;
8886
8887 for (i = 0; i < NR_C_MODES; i++)
8888 if (constant_modes[i] == mode)
8889 break;
8890 gcc_assert (i != NR_C_MODES);
8891
8892 for (c = pool->constants[i]; c != NULL; c = c->next)
8893 if (rtx_equal_p (val, c->value))
8894 break;
8895
8896 if (c == NULL)
8897 {
8898 c = (struct constant *) xmalloc (sizeof *c);
8899 c->value = val;
8900 c->label = gen_label_rtx ();
8901 c->next = pool->constants[i];
8902 pool->constants[i] = c;
8903 pool->size += GET_MODE_SIZE (mode);
8904 }
8905 }
8906
8907 /* Return an rtx that represents the offset of X from the start of
8908 pool POOL. */
8909
8910 static rtx
8911 s390_pool_offset (struct constant_pool *pool, rtx x)
8912 {
8913 rtx label;
8914
8915 label = gen_rtx_LABEL_REF (GET_MODE (x), pool->label);
8916 x = gen_rtx_UNSPEC (GET_MODE (x), gen_rtvec (2, x, label),
8917 UNSPEC_POOL_OFFSET);
8918 return gen_rtx_CONST (GET_MODE (x), x);
8919 }
8920
8921 /* Find constant VAL of mode MODE in the constant pool POOL.
8922 Return an RTX describing the distance from the start of
8923 the pool to the location of the new constant. */
8924
8925 static rtx
8926 s390_find_constant (struct constant_pool *pool, rtx val,
8927 machine_mode mode)
8928 {
8929 struct constant *c;
8930 size_t i;
8931
8932 for (i = 0; i < NR_C_MODES; i++)
8933 if (constant_modes[i] == mode)
8934 break;
8935 gcc_assert (i != NR_C_MODES);
8936
8937 for (c = pool->constants[i]; c != NULL; c = c->next)
8938 if (rtx_equal_p (val, c->value))
8939 break;
8940
8941 gcc_assert (c);
8942
8943 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
8944 }
8945
8946 /* Check whether INSN is an execute. Return the label_ref to its
8947 execute target template if so, NULL_RTX otherwise. */
8948
8949 static rtx
8950 s390_execute_label (rtx insn)
8951 {
8952 if (INSN_P (insn)
8953 && GET_CODE (PATTERN (insn)) == PARALLEL
8954 && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
8955 && (XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE
8956 || XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE_JUMP))
8957 {
8958 if (XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE)
8959 return XVECEXP (XVECEXP (PATTERN (insn), 0, 0), 0, 2);
8960 else
8961 {
8962 gcc_assert (JUMP_P (insn));
8963 /* For jump insns as execute target:
8964 - There is one operand less in the parallel (the
8965 modification register of the execute is always 0).
8966 - The execute target label is wrapped into an
8967 if_then_else in order to hide it from jump analysis. */
8968 return XEXP (XVECEXP (XVECEXP (PATTERN (insn), 0, 0), 0, 0), 0);
8969 }
8970 }
8971
8972 return NULL_RTX;
8973 }
8974
8975 /* Find execute target for INSN in the constant pool POOL.
8976 Return an RTX describing the distance from the start of
8977 the pool to the location of the execute target. */
8978
8979 static rtx
8980 s390_find_execute (struct constant_pool *pool, rtx insn)
8981 {
8982 struct constant *c;
8983
8984 for (c = pool->execute; c != NULL; c = c->next)
8985 if (INSN_UID (insn) == INSN_UID (c->value))
8986 break;
8987
8988 gcc_assert (c);
8989
8990 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
8991 }
8992
8993 /* For an execute INSN, extract the execute target template. */
8994
8995 static rtx
8996 s390_execute_target (rtx insn)
8997 {
8998 rtx pattern = PATTERN (insn);
8999 gcc_assert (s390_execute_label (insn));
9000
9001 if (XVECLEN (pattern, 0) == 2)
9002 {
9003 pattern = copy_rtx (XVECEXP (pattern, 0, 1));
9004 }
9005 else
9006 {
9007 rtvec vec = rtvec_alloc (XVECLEN (pattern, 0) - 1);
9008 int i;
9009
9010 for (i = 0; i < XVECLEN (pattern, 0) - 1; i++)
9011 RTVEC_ELT (vec, i) = copy_rtx (XVECEXP (pattern, 0, i + 1));
9012
9013 pattern = gen_rtx_PARALLEL (VOIDmode, vec);
9014 }
9015
9016 return pattern;
9017 }
9018
9019 /* Indicate that INSN cannot be duplicated. This is the case for
9020 execute insns that carry a unique label. */
9021
9022 static bool
9023 s390_cannot_copy_insn_p (rtx_insn *insn)
9024 {
9025 rtx label = s390_execute_label (insn);
9026 return label && label != const0_rtx;
9027 }
9028
9029 /* Dump out the constants in POOL. If REMOTE_LABEL is true,
9030 do not emit the pool base label. */
9031
9032 static void
9033 s390_dump_pool (struct constant_pool *pool, bool remote_label)
9034 {
9035 struct constant *c;
9036 rtx_insn *insn = pool->pool_insn;
9037 size_t i;
9038
9039 /* Switch to rodata section. */
9040 insn = emit_insn_after (gen_pool_section_start (), insn);
9041 INSN_ADDRESSES_NEW (insn, -1);
9042
9043 /* Ensure minimum pool alignment. */
9044 insn = emit_insn_after (gen_pool_align (GEN_INT (8)), insn);
9045 INSN_ADDRESSES_NEW (insn, -1);
9046
9047 /* Emit pool base label. */
9048 if (!remote_label)
9049 {
9050 insn = emit_label_after (pool->label, insn);
9051 INSN_ADDRESSES_NEW (insn, -1);
9052 }
9053
9054 /* Dump constants in descending alignment requirement order,
9055 ensuring proper alignment for every constant. */
9056 for (i = 0; i < NR_C_MODES; i++)
9057 for (c = pool->constants[i]; c; c = c->next)
9058 {
9059 /* Convert UNSPEC_LTREL_OFFSET unspecs to pool-relative references. */
9060 rtx value = copy_rtx (c->value);
9061 if (GET_CODE (value) == CONST
9062 && GET_CODE (XEXP (value, 0)) == UNSPEC
9063 && XINT (XEXP (value, 0), 1) == UNSPEC_LTREL_OFFSET
9064 && XVECLEN (XEXP (value, 0), 0) == 1)
9065 value = s390_pool_offset (pool, XVECEXP (XEXP (value, 0), 0, 0));
9066
9067 insn = emit_label_after (c->label, insn);
9068 INSN_ADDRESSES_NEW (insn, -1);
9069
9070 value = gen_rtx_UNSPEC_VOLATILE (constant_modes[i],
9071 gen_rtvec (1, value),
9072 UNSPECV_POOL_ENTRY);
9073 insn = emit_insn_after (value, insn);
9074 INSN_ADDRESSES_NEW (insn, -1);
9075 }
9076
9077 /* Ensure minimum alignment for instructions. */
9078 insn = emit_insn_after (gen_pool_align (GEN_INT (2)), insn);
9079 INSN_ADDRESSES_NEW (insn, -1);
9080
9081 /* Output in-pool execute template insns. */
9082 for (c = pool->execute; c; c = c->next)
9083 {
9084 insn = emit_label_after (c->label, insn);
9085 INSN_ADDRESSES_NEW (insn, -1);
9086
9087 insn = emit_insn_after (s390_execute_target (c->value), insn);
9088 INSN_ADDRESSES_NEW (insn, -1);
9089 }
9090
9091 /* Switch back to previous section. */
9092 insn = emit_insn_after (gen_pool_section_end (), insn);
9093 INSN_ADDRESSES_NEW (insn, -1);
9094
9095 insn = emit_barrier_after (insn);
9096 INSN_ADDRESSES_NEW (insn, -1);
9097
9098 /* Remove placeholder insn. */
9099 remove_insn (pool->pool_insn);
9100 }
9101
9102 /* Free all memory used by POOL. */
9103
9104 static void
9105 s390_free_pool (struct constant_pool *pool)
9106 {
9107 struct constant *c, *next;
9108 size_t i;
9109
9110 for (i = 0; i < NR_C_MODES; i++)
9111 for (c = pool->constants[i]; c; c = next)
9112 {
9113 next = c->next;
9114 free (c);
9115 }
9116
9117 for (c = pool->execute; c; c = next)
9118 {
9119 next = c->next;
9120 free (c);
9121 }
9122
9123 BITMAP_FREE (pool->insns);
9124 free (pool);
9125 }
9126
9127
9128 /* Collect main literal pool. Return NULL on overflow. */
9129
9130 static struct constant_pool *
9131 s390_mainpool_start (void)
9132 {
9133 struct constant_pool *pool;
9134 rtx_insn *insn;
9135
9136 pool = s390_alloc_pool ();
9137
9138 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9139 {
9140 if (NONJUMP_INSN_P (insn)
9141 && GET_CODE (PATTERN (insn)) == SET
9142 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC_VOLATILE
9143 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPECV_MAIN_POOL)
9144 {
9145 /* There might be two main_pool instructions if base_reg
9146 is call-clobbered; one for shrink-wrapped code and one
9147 for the rest. We want to keep the first. */
9148 if (pool->pool_insn)
9149 {
9150 insn = PREV_INSN (insn);
9151 delete_insn (NEXT_INSN (insn));
9152 continue;
9153 }
9154 pool->pool_insn = insn;
9155 }
9156
9157 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9158 {
9159 rtx pool_ref = NULL_RTX;
9160 find_constant_pool_ref (insn, &pool_ref);
9161 if (pool_ref)
9162 {
9163 rtx constant = get_pool_constant (pool_ref);
9164 machine_mode mode = get_pool_mode (pool_ref);
9165 s390_add_constant (pool, constant, mode);
9166 }
9167 }
9168
9169 /* If hot/cold partitioning is enabled we have to make sure that
9170 the literal pool is emitted in the same section where the
9171 initialization of the literal pool base pointer takes place.
9172 emit_pool_after is only used in the non-overflow case on non
9173 Z cpus where we can emit the literal pool at the end of the
9174 function body within the text section. */
9175 if (NOTE_P (insn)
9176 && NOTE_KIND (insn) == NOTE_INSN_SWITCH_TEXT_SECTIONS
9177 && !pool->emit_pool_after)
9178 pool->emit_pool_after = PREV_INSN (insn);
9179 }
9180
9181 gcc_assert (pool->pool_insn || pool->size == 0);
9182
9183 if (pool->size >= 4096)
9184 {
9185 /* We're going to chunkify the pool, so remove the main
9186 pool placeholder insn. */
9187 remove_insn (pool->pool_insn);
9188
9189 s390_free_pool (pool);
9190 pool = NULL;
9191 }
9192
9193 /* If the functions ends with the section where the literal pool
9194 should be emitted set the marker to its end. */
9195 if (pool && !pool->emit_pool_after)
9196 pool->emit_pool_after = get_last_insn ();
9197
9198 return pool;
9199 }
9200
9201 /* POOL holds the main literal pool as collected by s390_mainpool_start.
9202 Modify the current function to output the pool constants as well as
9203 the pool register setup instruction. */
9204
9205 static void
9206 s390_mainpool_finish (struct constant_pool *pool)
9207 {
9208 rtx base_reg = cfun->machine->base_reg;
9209 rtx set;
9210 rtx_insn *insn;
9211
9212 /* If the pool is empty, we're done. */
9213 if (pool->size == 0)
9214 {
9215 /* We don't actually need a base register after all. */
9216 cfun->machine->base_reg = NULL_RTX;
9217
9218 if (pool->pool_insn)
9219 remove_insn (pool->pool_insn);
9220 s390_free_pool (pool);
9221 return;
9222 }
9223
9224 /* We need correct insn addresses. */
9225 shorten_branches (get_insns ());
9226
9227 /* Use a LARL to load the pool register. The pool is
9228 located in the .rodata section, so we emit it after the function. */
9229 set = gen_main_base_64 (base_reg, pool->label);
9230 insn = emit_insn_after (set, pool->pool_insn);
9231 INSN_ADDRESSES_NEW (insn, -1);
9232 remove_insn (pool->pool_insn);
9233
9234 insn = get_last_insn ();
9235 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
9236 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
9237
9238 s390_dump_pool (pool, 0);
9239
9240 /* Replace all literal pool references. */
9241
9242 for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
9243 {
9244 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9245 {
9246 rtx addr, pool_ref = NULL_RTX;
9247 find_constant_pool_ref (insn, &pool_ref);
9248 if (pool_ref)
9249 {
9250 if (s390_execute_label (insn))
9251 addr = s390_find_execute (pool, insn);
9252 else
9253 addr = s390_find_constant (pool, get_pool_constant (pool_ref),
9254 get_pool_mode (pool_ref));
9255
9256 replace_constant_pool_ref (insn, pool_ref, addr);
9257 INSN_CODE (insn) = -1;
9258 }
9259 }
9260 }
9261
9262
9263 /* Free the pool. */
9264 s390_free_pool (pool);
9265 }
9266
9267 /* Chunkify the literal pool. */
9268
9269 #define S390_POOL_CHUNK_MIN 0xc00
9270 #define S390_POOL_CHUNK_MAX 0xe00
9271
9272 static struct constant_pool *
9273 s390_chunkify_start (void)
9274 {
9275 struct constant_pool *curr_pool = NULL, *pool_list = NULL;
9276 bitmap far_labels;
9277 rtx_insn *insn;
9278
9279 /* We need correct insn addresses. */
9280
9281 shorten_branches (get_insns ());
9282
9283 /* Scan all insns and move literals to pool chunks. */
9284
9285 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9286 {
9287 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9288 {
9289 rtx pool_ref = NULL_RTX;
9290 find_constant_pool_ref (insn, &pool_ref);
9291 if (pool_ref)
9292 {
9293 rtx constant = get_pool_constant (pool_ref);
9294 machine_mode mode = get_pool_mode (pool_ref);
9295
9296 if (!curr_pool)
9297 curr_pool = s390_start_pool (&pool_list, insn);
9298
9299 s390_add_constant (curr_pool, constant, mode);
9300 s390_add_pool_insn (curr_pool, insn);
9301 }
9302 }
9303
9304 if (JUMP_P (insn) || JUMP_TABLE_DATA_P (insn) || LABEL_P (insn))
9305 {
9306 if (curr_pool)
9307 s390_add_pool_insn (curr_pool, insn);
9308 }
9309
9310 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_VAR_LOCATION)
9311 continue;
9312
9313 if (!curr_pool
9314 || INSN_ADDRESSES_SIZE () <= (size_t) INSN_UID (insn)
9315 || INSN_ADDRESSES (INSN_UID (insn)) == -1)
9316 continue;
9317
9318 if (curr_pool->size < S390_POOL_CHUNK_MAX)
9319 continue;
9320
9321 s390_end_pool (curr_pool, NULL);
9322 curr_pool = NULL;
9323 }
9324
9325 if (curr_pool)
9326 s390_end_pool (curr_pool, NULL);
9327
9328 /* Find all labels that are branched into
9329 from an insn belonging to a different chunk. */
9330
9331 far_labels = BITMAP_ALLOC (NULL);
9332
9333 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9334 {
9335 rtx_jump_table_data *table;
9336
9337 /* Labels marked with LABEL_PRESERVE_P can be target
9338 of non-local jumps, so we have to mark them.
9339 The same holds for named labels.
9340
9341 Don't do that, however, if it is the label before
9342 a jump table. */
9343
9344 if (LABEL_P (insn)
9345 && (LABEL_PRESERVE_P (insn) || LABEL_NAME (insn)))
9346 {
9347 rtx_insn *vec_insn = NEXT_INSN (insn);
9348 if (! vec_insn || ! JUMP_TABLE_DATA_P (vec_insn))
9349 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (insn));
9350 }
9351 /* Check potential targets in a table jump (casesi_jump). */
9352 else if (tablejump_p (insn, NULL, &table))
9353 {
9354 rtx vec_pat = PATTERN (table);
9355 int i, diff_p = GET_CODE (vec_pat) == ADDR_DIFF_VEC;
9356
9357 for (i = 0; i < XVECLEN (vec_pat, diff_p); i++)
9358 {
9359 rtx label = XEXP (XVECEXP (vec_pat, diff_p, i), 0);
9360
9361 if (s390_find_pool (pool_list, label)
9362 != s390_find_pool (pool_list, insn))
9363 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
9364 }
9365 }
9366 /* If we have a direct jump (conditional or unconditional),
9367 check all potential targets. */
9368 else if (JUMP_P (insn))
9369 {
9370 rtx pat = PATTERN (insn);
9371
9372 if (GET_CODE (pat) == PARALLEL)
9373 pat = XVECEXP (pat, 0, 0);
9374
9375 if (GET_CODE (pat) == SET)
9376 {
9377 rtx label = JUMP_LABEL (insn);
9378 if (label && !ANY_RETURN_P (label))
9379 {
9380 if (s390_find_pool (pool_list, label)
9381 != s390_find_pool (pool_list, insn))
9382 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
9383 }
9384 }
9385 }
9386 }
9387
9388 /* Insert base register reload insns before every pool. */
9389
9390 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
9391 {
9392 rtx new_insn = gen_reload_base_64 (cfun->machine->base_reg,
9393 curr_pool->label);
9394 rtx_insn *insn = curr_pool->first_insn;
9395 INSN_ADDRESSES_NEW (emit_insn_before (new_insn, insn), -1);
9396 }
9397
9398 /* Insert base register reload insns at every far label. */
9399
9400 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9401 if (LABEL_P (insn)
9402 && bitmap_bit_p (far_labels, CODE_LABEL_NUMBER (insn)))
9403 {
9404 struct constant_pool *pool = s390_find_pool (pool_list, insn);
9405 if (pool)
9406 {
9407 rtx new_insn = gen_reload_base_64 (cfun->machine->base_reg,
9408 pool->label);
9409 INSN_ADDRESSES_NEW (emit_insn_after (new_insn, insn), -1);
9410 }
9411 }
9412
9413
9414 BITMAP_FREE (far_labels);
9415
9416
9417 /* Recompute insn addresses. */
9418
9419 init_insn_lengths ();
9420 shorten_branches (get_insns ());
9421
9422 return pool_list;
9423 }
9424
9425 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
9426 After we have decided to use this list, finish implementing
9427 all changes to the current function as required. */
9428
9429 static void
9430 s390_chunkify_finish (struct constant_pool *pool_list)
9431 {
9432 struct constant_pool *curr_pool = NULL;
9433 rtx_insn *insn;
9434
9435
9436 /* Replace all literal pool references. */
9437
9438 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9439 {
9440 curr_pool = s390_find_pool (pool_list, insn);
9441 if (!curr_pool)
9442 continue;
9443
9444 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9445 {
9446 rtx addr, pool_ref = NULL_RTX;
9447 find_constant_pool_ref (insn, &pool_ref);
9448 if (pool_ref)
9449 {
9450 if (s390_execute_label (insn))
9451 addr = s390_find_execute (curr_pool, insn);
9452 else
9453 addr = s390_find_constant (curr_pool,
9454 get_pool_constant (pool_ref),
9455 get_pool_mode (pool_ref));
9456
9457 replace_constant_pool_ref (insn, pool_ref, addr);
9458 INSN_CODE (insn) = -1;
9459 }
9460 }
9461 }
9462
9463 /* Dump out all literal pools. */
9464
9465 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
9466 s390_dump_pool (curr_pool, 0);
9467
9468 /* Free pool list. */
9469
9470 while (pool_list)
9471 {
9472 struct constant_pool *next = pool_list->next;
9473 s390_free_pool (pool_list);
9474 pool_list = next;
9475 }
9476 }
9477
9478 /* Output the constant pool entry EXP in mode MODE with alignment ALIGN. */
9479
9480 void
9481 s390_output_pool_entry (rtx exp, machine_mode mode, unsigned int align)
9482 {
9483 switch (GET_MODE_CLASS (mode))
9484 {
9485 case MODE_FLOAT:
9486 case MODE_DECIMAL_FLOAT:
9487 gcc_assert (GET_CODE (exp) == CONST_DOUBLE);
9488
9489 assemble_real (*CONST_DOUBLE_REAL_VALUE (exp),
9490 as_a <scalar_float_mode> (mode), align);
9491 break;
9492
9493 case MODE_INT:
9494 assemble_integer (exp, GET_MODE_SIZE (mode), align, 1);
9495 mark_symbol_refs_as_used (exp);
9496 break;
9497
9498 case MODE_VECTOR_INT:
9499 case MODE_VECTOR_FLOAT:
9500 {
9501 int i;
9502 machine_mode inner_mode;
9503 gcc_assert (GET_CODE (exp) == CONST_VECTOR);
9504
9505 inner_mode = GET_MODE_INNER (GET_MODE (exp));
9506 for (i = 0; i < XVECLEN (exp, 0); i++)
9507 s390_output_pool_entry (XVECEXP (exp, 0, i),
9508 inner_mode,
9509 i == 0
9510 ? align
9511 : GET_MODE_BITSIZE (inner_mode));
9512 }
9513 break;
9514
9515 default:
9516 gcc_unreachable ();
9517 }
9518 }
9519
9520 /* Return true if MEM refers to an integer constant in the literal pool. If
9521 VAL is not nullptr, then also fill it with the constant's value. */
9522
9523 bool
9524 s390_const_int_pool_entry_p (rtx mem, HOST_WIDE_INT *val)
9525 {
9526 /* Try to match the following:
9527 - (mem (unspec [(symbol_ref) (reg)] UNSPEC_LTREF)).
9528 - (mem (symbol_ref)). */
9529
9530 if (!MEM_P (mem))
9531 return false;
9532
9533 rtx addr = XEXP (mem, 0);
9534 rtx sym;
9535 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LTREF)
9536 sym = XVECEXP (addr, 0, 0);
9537 else
9538 sym = addr;
9539
9540 if (!SYMBOL_REF_P (sym) || !CONSTANT_POOL_ADDRESS_P (sym))
9541 return false;
9542
9543 rtx val_rtx = get_pool_constant (sym);
9544 if (!CONST_INT_P (val_rtx))
9545 return false;
9546
9547 if (val != nullptr)
9548 *val = INTVAL (val_rtx);
9549 return true;
9550 }
9551
9552 /* Return an RTL expression representing the value of the return address
9553 for the frame COUNT steps up from the current frame. FRAME is the
9554 frame pointer of that frame. */
9555
9556 rtx
9557 s390_return_addr_rtx (int count, rtx frame ATTRIBUTE_UNUSED)
9558 {
9559 int offset;
9560 rtx addr;
9561
9562 /* Without backchain, we fail for all but the current frame. */
9563
9564 if (!TARGET_BACKCHAIN && count > 0)
9565 return NULL_RTX;
9566
9567 /* For the current frame, we need to make sure the initial
9568 value of RETURN_REGNUM is actually saved. */
9569
9570 if (count == 0)
9571 return get_hard_reg_initial_val (Pmode, RETURN_REGNUM);
9572
9573 if (TARGET_PACKED_STACK)
9574 offset = -2 * UNITS_PER_LONG;
9575 else
9576 offset = RETURN_REGNUM * UNITS_PER_LONG;
9577
9578 addr = plus_constant (Pmode, frame, offset);
9579 addr = memory_address (Pmode, addr);
9580 return gen_rtx_MEM (Pmode, addr);
9581 }
9582
9583 /* Return an RTL expression representing the back chain stored in
9584 the current stack frame. */
9585
9586 rtx
9587 s390_back_chain_rtx (void)
9588 {
9589 rtx chain;
9590
9591 gcc_assert (TARGET_BACKCHAIN);
9592
9593 if (TARGET_PACKED_STACK)
9594 chain = plus_constant (Pmode, stack_pointer_rtx,
9595 STACK_POINTER_OFFSET - UNITS_PER_LONG);
9596 else
9597 chain = stack_pointer_rtx;
9598
9599 chain = gen_rtx_MEM (Pmode, chain);
9600 return chain;
9601 }
9602
9603 /* Find first call clobbered register unused in a function.
9604 This could be used as base register in a leaf function
9605 or for holding the return address before epilogue. */
9606
9607 static int
9608 find_unused_clobbered_reg (void)
9609 {
9610 int i;
9611 for (i = 0; i < 6; i++)
9612 if (!df_regs_ever_live_p (i))
9613 return i;
9614 return 0;
9615 }
9616
9617
9618 /* Helper function for s390_regs_ever_clobbered. Sets the fields in DATA for all
9619 clobbered hard regs in SETREG. */
9620
9621 static void
9622 s390_reg_clobbered_rtx (rtx setreg, const_rtx set_insn ATTRIBUTE_UNUSED, void *data)
9623 {
9624 char *regs_ever_clobbered = (char *)data;
9625 unsigned int i, regno;
9626 machine_mode mode = GET_MODE (setreg);
9627
9628 if (GET_CODE (setreg) == SUBREG)
9629 {
9630 rtx inner = SUBREG_REG (setreg);
9631 if (!GENERAL_REG_P (inner) && !FP_REG_P (inner))
9632 return;
9633 regno = subreg_regno (setreg);
9634 }
9635 else if (GENERAL_REG_P (setreg) || FP_REG_P (setreg))
9636 regno = REGNO (setreg);
9637 else
9638 return;
9639
9640 for (i = regno;
9641 i < end_hard_regno (mode, regno);
9642 i++)
9643 regs_ever_clobbered[i] = 1;
9644 }
9645
9646 /* Walks through all basic blocks of the current function looking
9647 for clobbered hard regs using s390_reg_clobbered_rtx. The fields
9648 of the passed integer array REGS_EVER_CLOBBERED are set to one for
9649 each of those regs. */
9650
9651 static void
9652 s390_regs_ever_clobbered (char regs_ever_clobbered[])
9653 {
9654 basic_block cur_bb;
9655 rtx_insn *cur_insn;
9656 unsigned int i;
9657
9658 memset (regs_ever_clobbered, 0, 32);
9659
9660 /* For non-leaf functions we have to consider all call clobbered regs to be
9661 clobbered. */
9662 if (!crtl->is_leaf)
9663 {
9664 for (i = 0; i < 32; i++)
9665 regs_ever_clobbered[i] = call_used_regs[i];
9666 }
9667
9668 /* Make the "magic" eh_return registers live if necessary. For regs_ever_live
9669 this work is done by liveness analysis (mark_regs_live_at_end).
9670 Special care is needed for functions containing landing pads. Landing pads
9671 may use the eh registers, but the code which sets these registers is not
9672 contained in that function. Hence s390_regs_ever_clobbered is not able to
9673 deal with this automatically. */
9674 if (crtl->calls_eh_return || cfun->machine->has_landing_pad_p)
9675 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM ; i++)
9676 if (crtl->calls_eh_return
9677 || (cfun->machine->has_landing_pad_p
9678 && df_regs_ever_live_p (EH_RETURN_DATA_REGNO (i))))
9679 regs_ever_clobbered[EH_RETURN_DATA_REGNO (i)] = 1;
9680
9681 /* For nonlocal gotos all call-saved registers have to be saved.
9682 This flag is also set for the unwinding code in libgcc.
9683 See expand_builtin_unwind_init. For regs_ever_live this is done by
9684 reload. */
9685 if (crtl->saves_all_registers)
9686 for (i = 0; i < 32; i++)
9687 if (!call_used_regs[i])
9688 regs_ever_clobbered[i] = 1;
9689
9690 FOR_EACH_BB_FN (cur_bb, cfun)
9691 {
9692 FOR_BB_INSNS (cur_bb, cur_insn)
9693 {
9694 rtx pat;
9695
9696 if (!INSN_P (cur_insn))
9697 continue;
9698
9699 pat = PATTERN (cur_insn);
9700
9701 /* Ignore GPR restore insns. */
9702 if (epilogue_completed && RTX_FRAME_RELATED_P (cur_insn))
9703 {
9704 if (GET_CODE (pat) == SET
9705 && GENERAL_REG_P (SET_DEST (pat)))
9706 {
9707 /* lgdr */
9708 if (GET_MODE (SET_SRC (pat)) == DImode
9709 && FP_REG_P (SET_SRC (pat)))
9710 continue;
9711
9712 /* l / lg */
9713 if (GET_CODE (SET_SRC (pat)) == MEM)
9714 continue;
9715 }
9716
9717 /* lm / lmg */
9718 if (GET_CODE (pat) == PARALLEL
9719 && load_multiple_operation (pat, VOIDmode))
9720 continue;
9721 }
9722
9723 note_stores (cur_insn,
9724 s390_reg_clobbered_rtx,
9725 regs_ever_clobbered);
9726 }
9727 }
9728 }
9729
9730 /* Determine the frame area which actually has to be accessed
9731 in the function epilogue. The values are stored at the
9732 given pointers AREA_BOTTOM (address of the lowest used stack
9733 address) and AREA_TOP (address of the first item which does
9734 not belong to the stack frame). */
9735
9736 static void
9737 s390_frame_area (int *area_bottom, int *area_top)
9738 {
9739 int b, t;
9740
9741 b = INT_MAX;
9742 t = INT_MIN;
9743
9744 if (cfun_frame_layout.first_restore_gpr != -1)
9745 {
9746 b = (cfun_frame_layout.gprs_offset
9747 + cfun_frame_layout.first_restore_gpr * UNITS_PER_LONG);
9748 t = b + (cfun_frame_layout.last_restore_gpr
9749 - cfun_frame_layout.first_restore_gpr + 1) * UNITS_PER_LONG;
9750 }
9751
9752 if (TARGET_64BIT && cfun_save_high_fprs_p)
9753 {
9754 b = MIN (b, cfun_frame_layout.f8_offset);
9755 t = MAX (t, (cfun_frame_layout.f8_offset
9756 + cfun_frame_layout.high_fprs * 8));
9757 }
9758
9759 if (!TARGET_64BIT)
9760 {
9761 if (cfun_fpr_save_p (FPR4_REGNUM))
9762 {
9763 b = MIN (b, cfun_frame_layout.f4_offset);
9764 t = MAX (t, cfun_frame_layout.f4_offset + 8);
9765 }
9766 if (cfun_fpr_save_p (FPR6_REGNUM))
9767 {
9768 b = MIN (b, cfun_frame_layout.f4_offset + 8);
9769 t = MAX (t, cfun_frame_layout.f4_offset + 16);
9770 }
9771 }
9772 *area_bottom = b;
9773 *area_top = t;
9774 }
9775 /* Update gpr_save_slots in the frame layout trying to make use of
9776 FPRs as GPR save slots.
9777 This is a helper routine of s390_register_info. */
9778
9779 static void
9780 s390_register_info_gprtofpr ()
9781 {
9782 int save_reg_slot = FPR0_REGNUM;
9783 int i, j;
9784
9785 if (TARGET_TPF || !TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
9786 return;
9787
9788 /* builtin_eh_return needs to be able to modify the return address
9789 on the stack. It could also adjust the FPR save slot instead but
9790 is it worth the trouble?! */
9791 if (crtl->calls_eh_return)
9792 return;
9793
9794 for (i = 15; i >= 6; i--)
9795 {
9796 if (cfun_gpr_save_slot (i) == SAVE_SLOT_NONE)
9797 continue;
9798
9799 /* Advance to the next FP register which can be used as a
9800 GPR save slot. */
9801 while ((!call_used_regs[save_reg_slot]
9802 || df_regs_ever_live_p (save_reg_slot)
9803 || cfun_fpr_save_p (save_reg_slot))
9804 && FP_REGNO_P (save_reg_slot))
9805 save_reg_slot++;
9806 if (!FP_REGNO_P (save_reg_slot))
9807 {
9808 /* We only want to use ldgr/lgdr if we can get rid of
9809 stm/lm entirely. So undo the gpr slot allocation in
9810 case we ran out of FPR save slots. */
9811 for (j = 6; j <= 15; j++)
9812 if (FP_REGNO_P (cfun_gpr_save_slot (j)))
9813 cfun_gpr_save_slot (j) = SAVE_SLOT_STACK;
9814 break;
9815 }
9816 cfun_gpr_save_slot (i) = save_reg_slot++;
9817 }
9818 }
9819
9820 /* Set the bits in fpr_bitmap for FPRs which need to be saved due to
9821 stdarg.
9822 This is a helper routine for s390_register_info. */
9823
9824 static void
9825 s390_register_info_stdarg_fpr ()
9826 {
9827 int i;
9828 int min_fpr;
9829 int max_fpr;
9830
9831 /* Save the FP argument regs for stdarg. f0, f2 for 31 bit and
9832 f0-f4 for 64 bit. */
9833 if (!cfun->stdarg
9834 || !TARGET_HARD_FLOAT
9835 || !cfun->va_list_fpr_size
9836 || crtl->args.info.fprs >= FP_ARG_NUM_REG)
9837 return;
9838
9839 min_fpr = crtl->args.info.fprs;
9840 max_fpr = min_fpr + cfun->va_list_fpr_size - 1;
9841 if (max_fpr >= FP_ARG_NUM_REG)
9842 max_fpr = FP_ARG_NUM_REG - 1;
9843
9844 /* FPR argument regs start at f0. */
9845 min_fpr += FPR0_REGNUM;
9846 max_fpr += FPR0_REGNUM;
9847
9848 for (i = min_fpr; i <= max_fpr; i++)
9849 cfun_set_fpr_save (i);
9850 }
9851
9852 /* Reserve the GPR save slots for GPRs which need to be saved due to
9853 stdarg.
9854 This is a helper routine for s390_register_info. */
9855
9856 static void
9857 s390_register_info_stdarg_gpr ()
9858 {
9859 int i;
9860 int min_gpr;
9861 int max_gpr;
9862
9863 if (!cfun->stdarg
9864 || !cfun->va_list_gpr_size
9865 || crtl->args.info.gprs >= GP_ARG_NUM_REG)
9866 return;
9867
9868 min_gpr = crtl->args.info.gprs;
9869 max_gpr = min_gpr + cfun->va_list_gpr_size - 1;
9870 if (max_gpr >= GP_ARG_NUM_REG)
9871 max_gpr = GP_ARG_NUM_REG - 1;
9872
9873 /* GPR argument regs start at r2. */
9874 min_gpr += GPR2_REGNUM;
9875 max_gpr += GPR2_REGNUM;
9876
9877 /* If r6 was supposed to be saved into an FPR and now needs to go to
9878 the stack for vararg we have to adjust the restore range to make
9879 sure that the restore is done from stack as well. */
9880 if (FP_REGNO_P (cfun_gpr_save_slot (GPR6_REGNUM))
9881 && min_gpr <= GPR6_REGNUM
9882 && max_gpr >= GPR6_REGNUM)
9883 {
9884 if (cfun_frame_layout.first_restore_gpr == -1
9885 || cfun_frame_layout.first_restore_gpr > GPR6_REGNUM)
9886 cfun_frame_layout.first_restore_gpr = GPR6_REGNUM;
9887 if (cfun_frame_layout.last_restore_gpr == -1
9888 || cfun_frame_layout.last_restore_gpr < GPR6_REGNUM)
9889 cfun_frame_layout.last_restore_gpr = GPR6_REGNUM;
9890 }
9891
9892 if (cfun_frame_layout.first_save_gpr == -1
9893 || cfun_frame_layout.first_save_gpr > min_gpr)
9894 cfun_frame_layout.first_save_gpr = min_gpr;
9895
9896 if (cfun_frame_layout.last_save_gpr == -1
9897 || cfun_frame_layout.last_save_gpr < max_gpr)
9898 cfun_frame_layout.last_save_gpr = max_gpr;
9899
9900 for (i = min_gpr; i <= max_gpr; i++)
9901 cfun_gpr_save_slot (i) = SAVE_SLOT_STACK;
9902 }
9903
9904 /* Calculate the save and restore ranges for stm(g) and lm(g) in the
9905 prologue and epilogue. */
9906
9907 static void
9908 s390_register_info_set_ranges ()
9909 {
9910 int i, j;
9911
9912 /* Find the first and the last save slot supposed to use the stack
9913 to set the restore range.
9914 Vararg regs might be marked as save to stack but only the
9915 call-saved regs really need restoring (i.e. r6). This code
9916 assumes that the vararg regs have not yet been recorded in
9917 cfun_gpr_save_slot. */
9918 for (i = 0; i < 16 && cfun_gpr_save_slot (i) != SAVE_SLOT_STACK; i++);
9919 for (j = 15; j > i && cfun_gpr_save_slot (j) != SAVE_SLOT_STACK; j--);
9920 cfun_frame_layout.first_restore_gpr = (i == 16) ? -1 : i;
9921 cfun_frame_layout.last_restore_gpr = (i == 16) ? -1 : j;
9922 cfun_frame_layout.first_save_gpr = (i == 16) ? -1 : i;
9923 cfun_frame_layout.last_save_gpr = (i == 16) ? -1 : j;
9924 }
9925
9926 /* The GPR and FPR save slots in cfun->machine->frame_layout are set
9927 for registers which need to be saved in function prologue.
9928 This function can be used until the insns emitted for save/restore
9929 of the regs are visible in the RTL stream. */
9930
9931 static void
9932 s390_register_info ()
9933 {
9934 int i;
9935 char clobbered_regs[32];
9936
9937 gcc_assert (!epilogue_completed);
9938
9939 if (reload_completed)
9940 /* After reload we rely on our own routine to determine which
9941 registers need saving. */
9942 s390_regs_ever_clobbered (clobbered_regs);
9943 else
9944 /* During reload we use regs_ever_live as a base since reload
9945 does changes in there which we otherwise would not be aware
9946 of. */
9947 for (i = 0; i < 32; i++)
9948 clobbered_regs[i] = df_regs_ever_live_p (i);
9949
9950 for (i = 0; i < 32; i++)
9951 clobbered_regs[i] = clobbered_regs[i] && !global_regs[i];
9952
9953 /* Mark the call-saved FPRs which need to be saved.
9954 This needs to be done before checking the special GPRs since the
9955 stack pointer usage depends on whether high FPRs have to be saved
9956 or not. */
9957 cfun_frame_layout.fpr_bitmap = 0;
9958 cfun_frame_layout.high_fprs = 0;
9959 for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
9960 if (clobbered_regs[i] && !call_used_regs[i])
9961 {
9962 cfun_set_fpr_save (i);
9963 if (i >= FPR8_REGNUM)
9964 cfun_frame_layout.high_fprs++;
9965 }
9966
9967 /* Register 12 is used for GOT address, but also as temp in prologue
9968 for split-stack stdarg functions (unless r14 is available). */
9969 clobbered_regs[12]
9970 |= ((flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
9971 || (flag_split_stack && cfun->stdarg
9972 && (crtl->is_leaf || TARGET_TPF_PROFILING
9973 || has_hard_reg_initial_val (Pmode, RETURN_REGNUM))));
9974
9975 clobbered_regs[BASE_REGNUM]
9976 |= (cfun->machine->base_reg
9977 && REGNO (cfun->machine->base_reg) == BASE_REGNUM);
9978
9979 clobbered_regs[HARD_FRAME_POINTER_REGNUM]
9980 |= !!frame_pointer_needed;
9981
9982 /* On pre z900 machines this might take until machine dependent
9983 reorg to decide.
9984 save_return_addr_p will only be set on non-zarch machines so
9985 there is no risk that r14 goes into an FPR instead of a stack
9986 slot. */
9987 clobbered_regs[RETURN_REGNUM]
9988 |= (!crtl->is_leaf
9989 || TARGET_TPF_PROFILING
9990 || cfun_frame_layout.save_return_addr_p
9991 || crtl->calls_eh_return);
9992
9993 clobbered_regs[STACK_POINTER_REGNUM]
9994 |= (!crtl->is_leaf
9995 || TARGET_TPF_PROFILING
9996 || cfun_save_high_fprs_p
9997 || get_frame_size () > 0
9998 || (reload_completed && cfun_frame_layout.frame_size > 0)
9999 || cfun->calls_alloca);
10000
10001 memset (cfun_frame_layout.gpr_save_slots, SAVE_SLOT_NONE, 16);
10002
10003 for (i = 6; i < 16; i++)
10004 if (clobbered_regs[i])
10005 cfun_gpr_save_slot (i) = SAVE_SLOT_STACK;
10006
10007 s390_register_info_stdarg_fpr ();
10008 s390_register_info_gprtofpr ();
10009 s390_register_info_set_ranges ();
10010 /* stdarg functions might need to save GPRs 2 to 6. This might
10011 override the GPR->FPR save decision made by
10012 s390_register_info_gprtofpr for r6 since vararg regs must go to
10013 the stack. */
10014 s390_register_info_stdarg_gpr ();
10015 }
10016
10017 /* Return true if REGNO is a global register, but not one
10018 of the special ones that need to be saved/restored in anyway. */
10019
10020 static inline bool
10021 global_not_special_regno_p (int regno)
10022 {
10023 return (global_regs[regno]
10024 /* These registers are special and need to be
10025 restored in any case. */
10026 && !(regno == STACK_POINTER_REGNUM
10027 || regno == RETURN_REGNUM
10028 || regno == BASE_REGNUM
10029 || (flag_pic && regno == (int)PIC_OFFSET_TABLE_REGNUM)));
10030 }
10031
10032 /* This function is called by s390_optimize_prologue in order to get
10033 rid of unnecessary GPR save/restore instructions. The register info
10034 for the GPRs is re-computed and the ranges are re-calculated. */
10035
10036 static void
10037 s390_optimize_register_info ()
10038 {
10039 char clobbered_regs[32];
10040 int i;
10041
10042 gcc_assert (epilogue_completed);
10043
10044 s390_regs_ever_clobbered (clobbered_regs);
10045
10046 /* Global registers do not need to be saved and restored unless it
10047 is one of our special regs. (r12, r13, r14, or r15). */
10048 for (i = 0; i < 32; i++)
10049 clobbered_regs[i] = clobbered_regs[i] && !global_not_special_regno_p (i);
10050
10051 /* There is still special treatment needed for cases invisible to
10052 s390_regs_ever_clobbered. */
10053 clobbered_regs[RETURN_REGNUM]
10054 |= (TARGET_TPF_PROFILING
10055 /* When expanding builtin_return_addr in ESA mode we do not
10056 know whether r14 will later be needed as scratch reg when
10057 doing branch splitting. So the builtin always accesses the
10058 r14 save slot and we need to stick to the save/restore
10059 decision for r14 even if it turns out that it didn't get
10060 clobbered. */
10061 || cfun_frame_layout.save_return_addr_p
10062 || crtl->calls_eh_return);
10063
10064 memset (cfun_frame_layout.gpr_save_slots, SAVE_SLOT_NONE, 6);
10065
10066 for (i = 6; i < 16; i++)
10067 if (!clobbered_regs[i])
10068 cfun_gpr_save_slot (i) = SAVE_SLOT_NONE;
10069
10070 s390_register_info_set_ranges ();
10071 s390_register_info_stdarg_gpr ();
10072 }
10073
10074 /* Fill cfun->machine with info about frame of current function. */
10075
10076 static void
10077 s390_frame_info (void)
10078 {
10079 HOST_WIDE_INT lowest_offset;
10080
10081 cfun_frame_layout.first_save_gpr_slot = cfun_frame_layout.first_save_gpr;
10082 cfun_frame_layout.last_save_gpr_slot = cfun_frame_layout.last_save_gpr;
10083
10084 /* The va_arg builtin uses a constant distance of 16 *
10085 UNITS_PER_LONG (r0-r15) to reach the FPRs from the reg_save_area
10086 pointer. So even if we are going to save the stack pointer in an
10087 FPR we need the stack space in order to keep the offsets
10088 correct. */
10089 if (cfun->stdarg && cfun_save_arg_fprs_p)
10090 {
10091 cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
10092
10093 if (cfun_frame_layout.first_save_gpr_slot == -1)
10094 cfun_frame_layout.first_save_gpr_slot = STACK_POINTER_REGNUM;
10095 }
10096
10097 cfun_frame_layout.frame_size = get_frame_size ();
10098 if (!TARGET_64BIT && cfun_frame_layout.frame_size > 0x7fff0000)
10099 fatal_error (input_location,
10100 "total size of local variables exceeds architecture limit");
10101
10102 if (!TARGET_PACKED_STACK)
10103 {
10104 /* Fixed stack layout. */
10105 cfun_frame_layout.backchain_offset = 0;
10106 cfun_frame_layout.f0_offset = 16 * UNITS_PER_LONG;
10107 cfun_frame_layout.f4_offset = cfun_frame_layout.f0_offset + 2 * 8;
10108 cfun_frame_layout.f8_offset = -cfun_frame_layout.high_fprs * 8;
10109 cfun_frame_layout.gprs_offset = (cfun_frame_layout.first_save_gpr_slot
10110 * UNITS_PER_LONG);
10111 }
10112 else if (TARGET_BACKCHAIN)
10113 {
10114 /* Kernel stack layout - packed stack, backchain, no float */
10115 gcc_assert (TARGET_SOFT_FLOAT);
10116 cfun_frame_layout.backchain_offset = (STACK_POINTER_OFFSET
10117 - UNITS_PER_LONG);
10118
10119 /* The distance between the backchain and the return address
10120 save slot must not change. So we always need a slot for the
10121 stack pointer which resides in between. */
10122 cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
10123
10124 cfun_frame_layout.gprs_offset
10125 = cfun_frame_layout.backchain_offset - cfun_gprs_save_area_size;
10126
10127 /* FPRs will not be saved. Nevertheless pick sane values to
10128 keep area calculations valid. */
10129 cfun_frame_layout.f0_offset =
10130 cfun_frame_layout.f4_offset =
10131 cfun_frame_layout.f8_offset = cfun_frame_layout.gprs_offset;
10132 }
10133 else
10134 {
10135 int num_fprs;
10136
10137 /* Packed stack layout without backchain. */
10138
10139 /* With stdarg FPRs need their dedicated slots. */
10140 num_fprs = (TARGET_64BIT && cfun->stdarg ? 2
10141 : (cfun_fpr_save_p (FPR4_REGNUM) +
10142 cfun_fpr_save_p (FPR6_REGNUM)));
10143 cfun_frame_layout.f4_offset = STACK_POINTER_OFFSET - 8 * num_fprs;
10144
10145 num_fprs = (cfun->stdarg ? 2
10146 : (cfun_fpr_save_p (FPR0_REGNUM)
10147 + cfun_fpr_save_p (FPR2_REGNUM)));
10148 cfun_frame_layout.f0_offset = cfun_frame_layout.f4_offset - 8 * num_fprs;
10149
10150 cfun_frame_layout.gprs_offset
10151 = cfun_frame_layout.f0_offset - cfun_gprs_save_area_size;
10152
10153 cfun_frame_layout.f8_offset = (cfun_frame_layout.gprs_offset
10154 - cfun_frame_layout.high_fprs * 8);
10155 }
10156
10157 if (cfun_save_high_fprs_p)
10158 cfun_frame_layout.frame_size += cfun_frame_layout.high_fprs * 8;
10159
10160 if (!crtl->is_leaf)
10161 cfun_frame_layout.frame_size += crtl->outgoing_args_size;
10162
10163 /* In the following cases we have to allocate a STACK_POINTER_OFFSET
10164 sized area at the bottom of the stack. This is required also for
10165 leaf functions. When GCC generates a local stack reference it
10166 will always add STACK_POINTER_OFFSET to all these references. */
10167 if (crtl->is_leaf
10168 && !TARGET_TPF_PROFILING
10169 && cfun_frame_layout.frame_size == 0
10170 && !cfun->calls_alloca)
10171 return;
10172
10173 /* Calculate the number of bytes we have used in our own register
10174 save area. With the packed stack layout we can re-use the
10175 remaining bytes for normal stack elements. */
10176
10177 if (TARGET_PACKED_STACK)
10178 lowest_offset = MIN (MIN (cfun_frame_layout.f0_offset,
10179 cfun_frame_layout.f4_offset),
10180 cfun_frame_layout.gprs_offset);
10181 else
10182 lowest_offset = 0;
10183
10184 if (TARGET_BACKCHAIN)
10185 lowest_offset = MIN (lowest_offset, cfun_frame_layout.backchain_offset);
10186
10187 cfun_frame_layout.frame_size += STACK_POINTER_OFFSET - lowest_offset;
10188
10189 /* If under 31 bit an odd number of gprs has to be saved we have to
10190 adjust the frame size to sustain 8 byte alignment of stack
10191 frames. */
10192 cfun_frame_layout.frame_size = ((cfun_frame_layout.frame_size +
10193 STACK_BOUNDARY / BITS_PER_UNIT - 1)
10194 & ~(STACK_BOUNDARY / BITS_PER_UNIT - 1));
10195 }
10196
10197 /* Generate frame layout. Fills in register and frame data for the current
10198 function in cfun->machine. This routine can be called multiple times;
10199 it will re-do the complete frame layout every time. */
10200
10201 static void
10202 s390_init_frame_layout (void)
10203 {
10204 HOST_WIDE_INT frame_size;
10205 int base_used;
10206
10207 /* After LRA the frame layout is supposed to be read-only and should
10208 not be re-computed. */
10209 if (reload_completed)
10210 return;
10211
10212 do
10213 {
10214 frame_size = cfun_frame_layout.frame_size;
10215
10216 /* Try to predict whether we'll need the base register. */
10217 base_used = crtl->uses_const_pool
10218 || (!DISP_IN_RANGE (frame_size)
10219 && !CONST_OK_FOR_K (frame_size));
10220
10221 /* Decide which register to use as literal pool base. In small
10222 leaf functions, try to use an unused call-clobbered register
10223 as base register to avoid save/restore overhead. */
10224 if (!base_used)
10225 cfun->machine->base_reg = NULL_RTX;
10226 else
10227 {
10228 int br = 0;
10229
10230 if (crtl->is_leaf)
10231 /* Prefer r5 (most likely to be free). */
10232 for (br = 5; br >= 2 && df_regs_ever_live_p (br); br--)
10233 ;
10234 cfun->machine->base_reg =
10235 gen_rtx_REG (Pmode, (br >= 2) ? br : BASE_REGNUM);
10236 }
10237
10238 s390_register_info ();
10239 s390_frame_info ();
10240 }
10241 while (frame_size != cfun_frame_layout.frame_size);
10242 }
10243
10244 /* Remove the FPR clobbers from a tbegin insn if it can be proven that
10245 the TX is nonescaping. A transaction is considered escaping if
10246 there is at least one path from tbegin returning CC0 to the
10247 function exit block without an tend.
10248
10249 The check so far has some limitations:
10250 - only single tbegin/tend BBs are supported
10251 - the first cond jump after tbegin must separate the CC0 path from ~CC0
10252 - when CC is copied to a GPR and the CC0 check is done with the GPR
10253 this is not supported
10254 */
10255
10256 static void
10257 s390_optimize_nonescaping_tx (void)
10258 {
10259 const unsigned int CC0 = 1 << 3;
10260 basic_block tbegin_bb = NULL;
10261 basic_block tend_bb = NULL;
10262 basic_block bb;
10263 rtx_insn *insn;
10264 bool result = true;
10265 int bb_index;
10266 rtx_insn *tbegin_insn = NULL;
10267
10268 if (!cfun->machine->tbegin_p)
10269 return;
10270
10271 for (bb_index = 0; bb_index < n_basic_blocks_for_fn (cfun); bb_index++)
10272 {
10273 bb = BASIC_BLOCK_FOR_FN (cfun, bb_index);
10274
10275 if (!bb)
10276 continue;
10277
10278 FOR_BB_INSNS (bb, insn)
10279 {
10280 rtx ite, cc, pat, target;
10281 unsigned HOST_WIDE_INT mask;
10282
10283 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
10284 continue;
10285
10286 pat = PATTERN (insn);
10287
10288 if (GET_CODE (pat) == PARALLEL)
10289 pat = XVECEXP (pat, 0, 0);
10290
10291 if (GET_CODE (pat) != SET
10292 || GET_CODE (SET_SRC (pat)) != UNSPEC_VOLATILE)
10293 continue;
10294
10295 if (XINT (SET_SRC (pat), 1) == UNSPECV_TBEGIN)
10296 {
10297 rtx_insn *tmp;
10298
10299 tbegin_insn = insn;
10300
10301 /* Just return if the tbegin doesn't have clobbers. */
10302 if (GET_CODE (PATTERN (insn)) != PARALLEL)
10303 return;
10304
10305 if (tbegin_bb != NULL)
10306 return;
10307
10308 /* Find the next conditional jump. */
10309 for (tmp = NEXT_INSN (insn);
10310 tmp != NULL_RTX;
10311 tmp = NEXT_INSN (tmp))
10312 {
10313 if (reg_set_p (gen_rtx_REG (CCmode, CC_REGNUM), tmp))
10314 return;
10315 if (!JUMP_P (tmp))
10316 continue;
10317
10318 ite = SET_SRC (PATTERN (tmp));
10319 if (GET_CODE (ite) != IF_THEN_ELSE)
10320 continue;
10321
10322 cc = XEXP (XEXP (ite, 0), 0);
10323 if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc))
10324 || GET_MODE (cc) != CCRAWmode
10325 || GET_CODE (XEXP (XEXP (ite, 0), 1)) != CONST_INT)
10326 return;
10327
10328 if (bb->succs->length () != 2)
10329 return;
10330
10331 mask = INTVAL (XEXP (XEXP (ite, 0), 1));
10332 if (GET_CODE (XEXP (ite, 0)) == NE)
10333 mask ^= 0xf;
10334
10335 if (mask == CC0)
10336 target = XEXP (ite, 1);
10337 else if (mask == (CC0 ^ 0xf))
10338 target = XEXP (ite, 2);
10339 else
10340 return;
10341
10342 {
10343 edge_iterator ei;
10344 edge e1, e2;
10345
10346 ei = ei_start (bb->succs);
10347 e1 = ei_safe_edge (ei);
10348 ei_next (&ei);
10349 e2 = ei_safe_edge (ei);
10350
10351 if (e2->flags & EDGE_FALLTHRU)
10352 {
10353 e2 = e1;
10354 e1 = ei_safe_edge (ei);
10355 }
10356
10357 if (!(e1->flags & EDGE_FALLTHRU))
10358 return;
10359
10360 tbegin_bb = (target == pc_rtx) ? e1->dest : e2->dest;
10361 }
10362 if (tmp == BB_END (bb))
10363 break;
10364 }
10365 }
10366
10367 if (XINT (SET_SRC (pat), 1) == UNSPECV_TEND)
10368 {
10369 if (tend_bb != NULL)
10370 return;
10371 tend_bb = bb;
10372 }
10373 }
10374 }
10375
10376 /* Either we successfully remove the FPR clobbers here or we are not
10377 able to do anything for this TX. Both cases don't qualify for
10378 another look. */
10379 cfun->machine->tbegin_p = false;
10380
10381 if (tbegin_bb == NULL || tend_bb == NULL)
10382 return;
10383
10384 calculate_dominance_info (CDI_POST_DOMINATORS);
10385 result = dominated_by_p (CDI_POST_DOMINATORS, tbegin_bb, tend_bb);
10386 free_dominance_info (CDI_POST_DOMINATORS);
10387
10388 if (!result)
10389 return;
10390
10391 PATTERN (tbegin_insn) = gen_rtx_PARALLEL (VOIDmode,
10392 gen_rtvec (2,
10393 XVECEXP (PATTERN (tbegin_insn), 0, 0),
10394 XVECEXP (PATTERN (tbegin_insn), 0, 1)));
10395 INSN_CODE (tbegin_insn) = -1;
10396 df_insn_rescan (tbegin_insn);
10397
10398 return;
10399 }
10400
10401 /* Implement TARGET_HARD_REGNO_NREGS. Because all registers in a class
10402 have the same size, this is equivalent to CLASS_MAX_NREGS. */
10403
10404 static unsigned int
10405 s390_hard_regno_nregs (unsigned int regno, machine_mode mode)
10406 {
10407 return s390_class_max_nregs (REGNO_REG_CLASS (regno), mode);
10408 }
10409
10410 /* Implement TARGET_HARD_REGNO_MODE_OK.
10411
10412 Integer modes <= word size fit into any GPR.
10413 Integer modes > word size fit into successive GPRs, starting with
10414 an even-numbered register.
10415 SImode and DImode fit into FPRs as well.
10416
10417 Floating point modes <= word size fit into any FPR or GPR.
10418 Floating point modes > word size (i.e. DFmode on 32-bit) fit
10419 into any FPR, or an even-odd GPR pair.
10420 TFmode fits only into an even-odd FPR pair.
10421
10422 Complex floating point modes fit either into two FPRs, or into
10423 successive GPRs (again starting with an even number).
10424 TCmode fits only into two successive even-odd FPR pairs.
10425
10426 Condition code modes fit only into the CC register. */
10427
10428 static bool
10429 s390_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
10430 {
10431 if (!TARGET_VX && VECTOR_NOFP_REGNO_P (regno))
10432 return false;
10433
10434 switch (REGNO_REG_CLASS (regno))
10435 {
10436 case VEC_REGS:
10437 return ((GET_MODE_CLASS (mode) == MODE_INT
10438 && s390_class_max_nregs (VEC_REGS, mode) == 1)
10439 || mode == DFmode
10440 || (TARGET_VXE && mode == SFmode)
10441 || s390_vector_mode_supported_p (mode));
10442 break;
10443 case FP_REGS:
10444 if (TARGET_VX
10445 && ((GET_MODE_CLASS (mode) == MODE_INT
10446 && s390_class_max_nregs (FP_REGS, mode) == 1)
10447 || mode == DFmode
10448 || s390_vector_mode_supported_p (mode)))
10449 return true;
10450
10451 if (REGNO_PAIR_OK (regno, mode))
10452 {
10453 if (mode == SImode || mode == DImode)
10454 return true;
10455
10456 if (FLOAT_MODE_P (mode) && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
10457 return true;
10458 }
10459 break;
10460 case ADDR_REGS:
10461 if (FRAME_REGNO_P (regno) && mode == Pmode)
10462 return true;
10463
10464 /* fallthrough */
10465 case GENERAL_REGS:
10466 if (REGNO_PAIR_OK (regno, mode))
10467 {
10468 if (TARGET_ZARCH
10469 || (mode != TFmode && mode != TCmode && mode != TDmode))
10470 return true;
10471 }
10472 break;
10473 case CC_REGS:
10474 if (GET_MODE_CLASS (mode) == MODE_CC)
10475 return true;
10476 break;
10477 case ACCESS_REGS:
10478 if (REGNO_PAIR_OK (regno, mode))
10479 {
10480 if (mode == SImode || mode == Pmode)
10481 return true;
10482 }
10483 break;
10484 default:
10485 return false;
10486 }
10487
10488 return false;
10489 }
10490
10491 /* Implement TARGET_MODES_TIEABLE_P. */
10492
10493 static bool
10494 s390_modes_tieable_p (machine_mode mode1, machine_mode mode2)
10495 {
10496 return ((mode1 == SFmode || mode1 == DFmode)
10497 == (mode2 == SFmode || mode2 == DFmode));
10498 }
10499
10500 /* Return nonzero if register OLD_REG can be renamed to register NEW_REG. */
10501
10502 bool
10503 s390_hard_regno_rename_ok (unsigned int old_reg, unsigned int new_reg)
10504 {
10505 /* Once we've decided upon a register to use as base register, it must
10506 no longer be used for any other purpose. */
10507 if (cfun->machine->base_reg)
10508 if (REGNO (cfun->machine->base_reg) == old_reg
10509 || REGNO (cfun->machine->base_reg) == new_reg)
10510 return false;
10511
10512 /* Prevent regrename from using call-saved regs which haven't
10513 actually been saved. This is necessary since regrename assumes
10514 the backend save/restore decisions are based on
10515 df_regs_ever_live. Since we have our own routine we have to tell
10516 regrename manually about it. */
10517 if (GENERAL_REGNO_P (new_reg)
10518 && !call_used_regs[new_reg]
10519 && cfun_gpr_save_slot (new_reg) == SAVE_SLOT_NONE)
10520 return false;
10521
10522 return true;
10523 }
10524
10525 /* Return nonzero if register REGNO can be used as a scratch register
10526 in peephole2. */
10527
10528 static bool
10529 s390_hard_regno_scratch_ok (unsigned int regno)
10530 {
10531 /* See s390_hard_regno_rename_ok. */
10532 if (GENERAL_REGNO_P (regno)
10533 && !call_used_regs[regno]
10534 && cfun_gpr_save_slot (regno) == SAVE_SLOT_NONE)
10535 return false;
10536
10537 return true;
10538 }
10539
10540 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. When generating
10541 code that runs in z/Architecture mode, but conforms to the 31-bit
10542 ABI, GPRs can hold 8 bytes; the ABI guarantees only that the lower 4
10543 bytes are saved across calls, however. */
10544
10545 static bool
10546 s390_hard_regno_call_part_clobbered (unsigned int, unsigned int regno,
10547 machine_mode mode)
10548 {
10549 /* For r12 we know that the only bits we actually care about are
10550 preserved across function calls. Since r12 is a fixed reg all
10551 accesses to r12 are generated by the backend.
10552
10553 This workaround is necessary until gcse implements proper
10554 tracking of partially clobbered registers. */
10555 if (!TARGET_64BIT
10556 && TARGET_ZARCH
10557 && GET_MODE_SIZE (mode) > 4
10558 && (!flag_pic || regno != PIC_OFFSET_TABLE_REGNUM)
10559 && ((regno >= 6 && regno <= 15) || regno == 32))
10560 return true;
10561
10562 if (TARGET_VX
10563 && GET_MODE_SIZE (mode) > 8
10564 && (((TARGET_64BIT && regno >= 24 && regno <= 31))
10565 || (!TARGET_64BIT && (regno == 18 || regno == 19))))
10566 return true;
10567
10568 return false;
10569 }
10570
10571 /* Maximum number of registers to represent a value of mode MODE
10572 in a register of class RCLASS. */
10573
10574 int
10575 s390_class_max_nregs (enum reg_class rclass, machine_mode mode)
10576 {
10577 int reg_size;
10578 bool reg_pair_required_p = false;
10579
10580 switch (rclass)
10581 {
10582 case FP_REGS:
10583 case VEC_REGS:
10584 reg_size = TARGET_VX ? 16 : 8;
10585
10586 /* TF and TD modes would fit into a VR but we put them into a
10587 register pair since we do not have 128bit FP instructions on
10588 full VRs. */
10589 if (TARGET_VX
10590 && SCALAR_FLOAT_MODE_P (mode)
10591 && GET_MODE_SIZE (mode) >= 16
10592 && !(TARGET_VXE && mode == TFmode))
10593 reg_pair_required_p = true;
10594
10595 /* Even if complex types would fit into a single FPR/VR we force
10596 them into a register pair to deal with the parts more easily.
10597 (FIXME: What about complex ints?) */
10598 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
10599 reg_pair_required_p = true;
10600 break;
10601 case ACCESS_REGS:
10602 reg_size = 4;
10603 break;
10604 default:
10605 reg_size = UNITS_PER_WORD;
10606 break;
10607 }
10608
10609 if (reg_pair_required_p)
10610 return 2 * ((GET_MODE_SIZE (mode) / 2 + reg_size - 1) / reg_size);
10611
10612 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
10613 }
10614
10615 /* Return nonzero if mode M describes a 128-bit float in a floating point
10616 register pair. */
10617
10618 static bool
10619 s390_is_fpr128 (machine_mode m)
10620 {
10621 return m == FPRX2mode || (!TARGET_VXE && m == TFmode);
10622 }
10623
10624 /* Return nonzero if mode M describes a 128-bit float in a vector
10625 register. */
10626
10627 static bool
10628 s390_is_vr128 (machine_mode m)
10629 {
10630 return m == V1TFmode || (TARGET_VXE && m == TFmode);
10631 }
10632
10633 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
10634
10635 static bool
10636 s390_can_change_mode_class (machine_mode from_mode,
10637 machine_mode to_mode,
10638 reg_class_t rclass)
10639 {
10640 machine_mode small_mode;
10641 machine_mode big_mode;
10642
10643 /* 128-bit values have different representations in floating point and
10644 vector registers. */
10645 if (reg_classes_intersect_p (VEC_REGS, rclass)
10646 && ((s390_is_fpr128 (from_mode) && s390_is_vr128 (to_mode))
10647 || (s390_is_vr128 (from_mode) && s390_is_fpr128 (to_mode))))
10648 return false;
10649
10650 if (GET_MODE_SIZE (from_mode) == GET_MODE_SIZE (to_mode))
10651 return true;
10652
10653 if (GET_MODE_SIZE (from_mode) < GET_MODE_SIZE (to_mode))
10654 {
10655 small_mode = from_mode;
10656 big_mode = to_mode;
10657 }
10658 else
10659 {
10660 small_mode = to_mode;
10661 big_mode = from_mode;
10662 }
10663
10664 /* Values residing in VRs are little-endian style. All modes are
10665 placed left-aligned in an VR. This means that we cannot allow
10666 switching between modes with differing sizes. Also if the vector
10667 facility is available we still place TFmode values in VR register
10668 pairs, since the only instructions we have operating on TFmodes
10669 only deal with register pairs. Therefore we have to allow DFmode
10670 subregs of TFmodes to enable the TFmode splitters. */
10671 if (reg_classes_intersect_p (VEC_REGS, rclass)
10672 && (GET_MODE_SIZE (small_mode) < 8
10673 || s390_class_max_nregs (VEC_REGS, big_mode) == 1))
10674 return false;
10675
10676 /* Likewise for access registers, since they have only half the
10677 word size on 64-bit. */
10678 if (reg_classes_intersect_p (ACCESS_REGS, rclass))
10679 return false;
10680
10681 return true;
10682 }
10683
10684 /* Return true if we use LRA instead of reload pass. */
10685 static bool
10686 s390_lra_p (void)
10687 {
10688 return s390_lra_flag;
10689 }
10690
10691 /* Return true if register FROM can be eliminated via register TO. */
10692
10693 static bool
10694 s390_can_eliminate (const int from, const int to)
10695 {
10696 /* We have not marked the base register as fixed.
10697 Instead, we have an elimination rule BASE_REGNUM -> BASE_REGNUM.
10698 If a function requires the base register, we say here that this
10699 elimination cannot be performed. This will cause reload to free
10700 up the base register (as if it were fixed). On the other hand,
10701 if the current function does *not* require the base register, we
10702 say here the elimination succeeds, which in turn allows reload
10703 to allocate the base register for any other purpose. */
10704 if (from == BASE_REGNUM && to == BASE_REGNUM)
10705 {
10706 s390_init_frame_layout ();
10707 return cfun->machine->base_reg == NULL_RTX;
10708 }
10709
10710 /* Everything else must point into the stack frame. */
10711 gcc_assert (to == STACK_POINTER_REGNUM
10712 || to == HARD_FRAME_POINTER_REGNUM);
10713
10714 gcc_assert (from == FRAME_POINTER_REGNUM
10715 || from == ARG_POINTER_REGNUM
10716 || from == RETURN_ADDRESS_POINTER_REGNUM);
10717
10718 /* Make sure we actually saved the return address. */
10719 if (from == RETURN_ADDRESS_POINTER_REGNUM)
10720 if (!crtl->calls_eh_return
10721 && !cfun->stdarg
10722 && !cfun_frame_layout.save_return_addr_p)
10723 return false;
10724
10725 return true;
10726 }
10727
10728 /* Return offset between register FROM and TO initially after prolog. */
10729
10730 HOST_WIDE_INT
10731 s390_initial_elimination_offset (int from, int to)
10732 {
10733 HOST_WIDE_INT offset;
10734
10735 /* ??? Why are we called for non-eliminable pairs? */
10736 if (!s390_can_eliminate (from, to))
10737 return 0;
10738
10739 switch (from)
10740 {
10741 case FRAME_POINTER_REGNUM:
10742 offset = (get_frame_size()
10743 + STACK_POINTER_OFFSET
10744 + crtl->outgoing_args_size);
10745 break;
10746
10747 case ARG_POINTER_REGNUM:
10748 s390_init_frame_layout ();
10749 offset = cfun_frame_layout.frame_size + STACK_POINTER_OFFSET;
10750 break;
10751
10752 case RETURN_ADDRESS_POINTER_REGNUM:
10753 s390_init_frame_layout ();
10754
10755 if (cfun_frame_layout.first_save_gpr_slot == -1)
10756 {
10757 /* If it turns out that for stdarg nothing went into the reg
10758 save area we also do not need the return address
10759 pointer. */
10760 if (cfun->stdarg && !cfun_save_arg_fprs_p)
10761 return 0;
10762
10763 gcc_unreachable ();
10764 }
10765
10766 /* In order to make the following work it is not necessary for
10767 r14 to have a save slot. It is sufficient if one other GPR
10768 got one. Since the GPRs are always stored without gaps we
10769 are able to calculate where the r14 save slot would
10770 reside. */
10771 offset = (cfun_frame_layout.frame_size + cfun_frame_layout.gprs_offset +
10772 (RETURN_REGNUM - cfun_frame_layout.first_save_gpr_slot) *
10773 UNITS_PER_LONG);
10774 break;
10775
10776 case BASE_REGNUM:
10777 offset = 0;
10778 break;
10779
10780 default:
10781 gcc_unreachable ();
10782 }
10783
10784 return offset;
10785 }
10786
10787 /* Emit insn to save fpr REGNUM at offset OFFSET relative
10788 to register BASE. Return generated insn. */
10789
10790 static rtx
10791 save_fpr (rtx base, int offset, int regnum)
10792 {
10793 rtx addr;
10794 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
10795
10796 if (regnum >= 16 && regnum <= (16 + FP_ARG_NUM_REG))
10797 set_mem_alias_set (addr, get_varargs_alias_set ());
10798 else
10799 set_mem_alias_set (addr, get_frame_alias_set ());
10800
10801 return emit_move_insn (addr, gen_rtx_REG (DFmode, regnum));
10802 }
10803
10804 /* Emit insn to restore fpr REGNUM from offset OFFSET relative
10805 to register BASE. Return generated insn. */
10806
10807 static rtx
10808 restore_fpr (rtx base, int offset, int regnum)
10809 {
10810 rtx addr;
10811 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
10812 set_mem_alias_set (addr, get_frame_alias_set ());
10813
10814 return emit_move_insn (gen_rtx_REG (DFmode, regnum), addr);
10815 }
10816
10817 /* Generate insn to save registers FIRST to LAST into
10818 the register save area located at offset OFFSET
10819 relative to register BASE. */
10820
10821 static rtx
10822 save_gprs (rtx base, int offset, int first, int last)
10823 {
10824 rtx addr, insn, note;
10825 int i;
10826
10827 addr = plus_constant (Pmode, base, offset);
10828 addr = gen_rtx_MEM (Pmode, addr);
10829
10830 set_mem_alias_set (addr, get_frame_alias_set ());
10831
10832 /* Special-case single register. */
10833 if (first == last)
10834 {
10835 if (TARGET_64BIT)
10836 insn = gen_movdi (addr, gen_rtx_REG (Pmode, first));
10837 else
10838 insn = gen_movsi (addr, gen_rtx_REG (Pmode, first));
10839
10840 if (!global_not_special_regno_p (first))
10841 RTX_FRAME_RELATED_P (insn) = 1;
10842 return insn;
10843 }
10844
10845
10846 insn = gen_store_multiple (addr,
10847 gen_rtx_REG (Pmode, first),
10848 GEN_INT (last - first + 1));
10849
10850 if (first <= 6 && cfun->stdarg)
10851 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
10852 {
10853 rtx mem = XEXP (XVECEXP (PATTERN (insn), 0, i), 0);
10854
10855 if (first + i <= 6)
10856 set_mem_alias_set (mem, get_varargs_alias_set ());
10857 }
10858
10859 /* We need to set the FRAME_RELATED flag on all SETs
10860 inside the store-multiple pattern.
10861
10862 However, we must not emit DWARF records for registers 2..5
10863 if they are stored for use by variable arguments ...
10864
10865 ??? Unfortunately, it is not enough to simply not the
10866 FRAME_RELATED flags for those SETs, because the first SET
10867 of the PARALLEL is always treated as if it had the flag
10868 set, even if it does not. Therefore we emit a new pattern
10869 without those registers as REG_FRAME_RELATED_EXPR note. */
10870
10871 if (first >= 6 && !global_not_special_regno_p (first))
10872 {
10873 rtx pat = PATTERN (insn);
10874
10875 for (i = 0; i < XVECLEN (pat, 0); i++)
10876 if (GET_CODE (XVECEXP (pat, 0, i)) == SET
10877 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (pat,
10878 0, i)))))
10879 RTX_FRAME_RELATED_P (XVECEXP (pat, 0, i)) = 1;
10880
10881 RTX_FRAME_RELATED_P (insn) = 1;
10882 }
10883 else if (last >= 6)
10884 {
10885 int start;
10886
10887 for (start = first >= 6 ? first : 6; start <= last; start++)
10888 if (!global_not_special_regno_p (start))
10889 break;
10890
10891 if (start > last)
10892 return insn;
10893
10894 addr = plus_constant (Pmode, base,
10895 offset + (start - first) * UNITS_PER_LONG);
10896
10897 if (start == last)
10898 {
10899 if (TARGET_64BIT)
10900 note = gen_movdi (gen_rtx_MEM (Pmode, addr),
10901 gen_rtx_REG (Pmode, start));
10902 else
10903 note = gen_movsi (gen_rtx_MEM (Pmode, addr),
10904 gen_rtx_REG (Pmode, start));
10905 note = PATTERN (note);
10906
10907 add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
10908 RTX_FRAME_RELATED_P (insn) = 1;
10909
10910 return insn;
10911 }
10912
10913 note = gen_store_multiple (gen_rtx_MEM (Pmode, addr),
10914 gen_rtx_REG (Pmode, start),
10915 GEN_INT (last - start + 1));
10916 note = PATTERN (note);
10917
10918 add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
10919
10920 for (i = 0; i < XVECLEN (note, 0); i++)
10921 if (GET_CODE (XVECEXP (note, 0, i)) == SET
10922 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (note,
10923 0, i)))))
10924 RTX_FRAME_RELATED_P (XVECEXP (note, 0, i)) = 1;
10925
10926 RTX_FRAME_RELATED_P (insn) = 1;
10927 }
10928
10929 return insn;
10930 }
10931
10932 /* Generate insn to restore registers FIRST to LAST from
10933 the register save area located at offset OFFSET
10934 relative to register BASE. */
10935
10936 static rtx
10937 restore_gprs (rtx base, int offset, int first, int last)
10938 {
10939 rtx addr, insn;
10940
10941 addr = plus_constant (Pmode, base, offset);
10942 addr = gen_rtx_MEM (Pmode, addr);
10943 set_mem_alias_set (addr, get_frame_alias_set ());
10944
10945 /* Special-case single register. */
10946 if (first == last)
10947 {
10948 if (TARGET_64BIT)
10949 insn = gen_movdi (gen_rtx_REG (Pmode, first), addr);
10950 else
10951 insn = gen_movsi (gen_rtx_REG (Pmode, first), addr);
10952
10953 RTX_FRAME_RELATED_P (insn) = 1;
10954 return insn;
10955 }
10956
10957 insn = gen_load_multiple (gen_rtx_REG (Pmode, first),
10958 addr,
10959 GEN_INT (last - first + 1));
10960 RTX_FRAME_RELATED_P (insn) = 1;
10961 return insn;
10962 }
10963
10964 /* Return insn sequence to load the GOT register. */
10965
10966 rtx_insn *
10967 s390_load_got (void)
10968 {
10969 rtx_insn *insns;
10970
10971 /* We cannot use pic_offset_table_rtx here since we use this
10972 function also for non-pic if __tls_get_offset is called and in
10973 that case PIC_OFFSET_TABLE_REGNUM as well as pic_offset_table_rtx
10974 aren't usable. */
10975 rtx got_rtx = gen_rtx_REG (Pmode, 12);
10976
10977 start_sequence ();
10978
10979 emit_move_insn (got_rtx, s390_got_symbol ());
10980
10981 insns = get_insns ();
10982 end_sequence ();
10983 return insns;
10984 }
10985
10986 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
10987 and the change to the stack pointer. */
10988
10989 static void
10990 s390_emit_stack_tie (void)
10991 {
10992 rtx mem = gen_frame_mem (BLKmode,
10993 gen_rtx_REG (Pmode, STACK_POINTER_REGNUM));
10994
10995 emit_insn (gen_stack_tie (mem));
10996 }
10997
10998 /* Copy GPRS into FPR save slots. */
10999
11000 static void
11001 s390_save_gprs_to_fprs (void)
11002 {
11003 int i;
11004
11005 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
11006 return;
11007
11008 for (i = 6; i < 16; i++)
11009 {
11010 if (FP_REGNO_P (cfun_gpr_save_slot (i)))
11011 {
11012 rtx_insn *insn =
11013 emit_move_insn (gen_rtx_REG (DImode, cfun_gpr_save_slot (i)),
11014 gen_rtx_REG (DImode, i));
11015 RTX_FRAME_RELATED_P (insn) = 1;
11016 /* This prevents dwarf2cfi from interpreting the set. Doing
11017 so it might emit def_cfa_register infos setting an FPR as
11018 new CFA. */
11019 add_reg_note (insn, REG_CFA_REGISTER, copy_rtx (PATTERN (insn)));
11020 }
11021 }
11022 }
11023
11024 /* Restore GPRs from FPR save slots. */
11025
11026 static void
11027 s390_restore_gprs_from_fprs (void)
11028 {
11029 int i;
11030
11031 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
11032 return;
11033
11034 /* Restore the GPRs starting with the stack pointer. That way the
11035 stack pointer already has its original value when it comes to
11036 restoring the hard frame pointer. So we can set the cfa reg back
11037 to the stack pointer. */
11038 for (i = STACK_POINTER_REGNUM; i >= 6; i--)
11039 {
11040 rtx_insn *insn;
11041
11042 if (!FP_REGNO_P (cfun_gpr_save_slot (i)))
11043 continue;
11044
11045 rtx fpr = gen_rtx_REG (DImode, cfun_gpr_save_slot (i));
11046
11047 if (i == STACK_POINTER_REGNUM)
11048 insn = emit_insn (gen_stack_restore_from_fpr (fpr));
11049 else
11050 insn = emit_move_insn (gen_rtx_REG (DImode, i), fpr);
11051
11052 df_set_regs_ever_live (i, true);
11053 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, i));
11054
11055 /* If either the stack pointer or the frame pointer get restored
11056 set the CFA value to its value at function start. Doing this
11057 for the frame pointer results in .cfi_def_cfa_register 15
11058 what is ok since if the stack pointer got modified it has
11059 been restored already. */
11060 if (i == STACK_POINTER_REGNUM || i == HARD_FRAME_POINTER_REGNUM)
11061 add_reg_note (insn, REG_CFA_DEF_CFA,
11062 plus_constant (Pmode, stack_pointer_rtx,
11063 STACK_POINTER_OFFSET));
11064 RTX_FRAME_RELATED_P (insn) = 1;
11065 }
11066 }
11067
11068
11069 /* A pass run immediately before shrink-wrapping and prologue and epilogue
11070 generation. */
11071
11072 namespace {
11073
11074 const pass_data pass_data_s390_early_mach =
11075 {
11076 RTL_PASS, /* type */
11077 "early_mach", /* name */
11078 OPTGROUP_NONE, /* optinfo_flags */
11079 TV_MACH_DEP, /* tv_id */
11080 0, /* properties_required */
11081 0, /* properties_provided */
11082 0, /* properties_destroyed */
11083 0, /* todo_flags_start */
11084 ( TODO_df_verify | TODO_df_finish ), /* todo_flags_finish */
11085 };
11086
11087 class pass_s390_early_mach : public rtl_opt_pass
11088 {
11089 public:
11090 pass_s390_early_mach (gcc::context *ctxt)
11091 : rtl_opt_pass (pass_data_s390_early_mach, ctxt)
11092 {}
11093
11094 /* opt_pass methods: */
11095 virtual unsigned int execute (function *);
11096
11097 }; // class pass_s390_early_mach
11098
11099 unsigned int
11100 pass_s390_early_mach::execute (function *fun)
11101 {
11102 rtx_insn *insn;
11103
11104 /* Try to get rid of the FPR clobbers. */
11105 s390_optimize_nonescaping_tx ();
11106
11107 /* Re-compute register info. */
11108 s390_register_info ();
11109
11110 /* If we're using a base register, ensure that it is always valid for
11111 the first non-prologue instruction. */
11112 if (fun->machine->base_reg)
11113 emit_insn_at_entry (gen_main_pool (fun->machine->base_reg));
11114
11115 /* Annotate all constant pool references to let the scheduler know
11116 they implicitly use the base register. */
11117 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
11118 if (INSN_P (insn))
11119 {
11120 annotate_constant_pool_refs (insn);
11121 df_insn_rescan (insn);
11122 }
11123 return 0;
11124 }
11125
11126 } // anon namespace
11127
11128 rtl_opt_pass *
11129 make_pass_s390_early_mach (gcc::context *ctxt)
11130 {
11131 return new pass_s390_early_mach (ctxt);
11132 }
11133
11134 /* Calculate TARGET = REG + OFFSET as s390_emit_prologue would do it.
11135 - push too big immediates to the literal pool and annotate the refs
11136 - emit frame related notes for stack pointer changes. */
11137
11138 static rtx
11139 s390_prologue_plus_offset (rtx target, rtx reg, rtx offset, bool frame_related_p)
11140 {
11141 rtx_insn *insn;
11142 rtx orig_offset = offset;
11143
11144 gcc_assert (REG_P (target));
11145 gcc_assert (REG_P (reg));
11146 gcc_assert (CONST_INT_P (offset));
11147
11148 if (offset == const0_rtx) /* lr/lgr */
11149 {
11150 insn = emit_move_insn (target, reg);
11151 }
11152 else if (DISP_IN_RANGE (INTVAL (offset))) /* la */
11153 {
11154 insn = emit_move_insn (target, gen_rtx_PLUS (Pmode, reg,
11155 offset));
11156 }
11157 else
11158 {
11159 if (!satisfies_constraint_K (offset) /* ahi/aghi */
11160 && (!TARGET_EXTIMM
11161 || (!satisfies_constraint_Op (offset) /* alfi/algfi */
11162 && !satisfies_constraint_On (offset)))) /* slfi/slgfi */
11163 offset = force_const_mem (Pmode, offset);
11164
11165 if (target != reg)
11166 {
11167 insn = emit_move_insn (target, reg);
11168 RTX_FRAME_RELATED_P (insn) = frame_related_p ? 1 : 0;
11169 }
11170
11171 insn = emit_insn (gen_add2_insn (target, offset));
11172
11173 if (!CONST_INT_P (offset))
11174 {
11175 annotate_constant_pool_refs (insn);
11176
11177 if (frame_related_p)
11178 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11179 gen_rtx_SET (target,
11180 gen_rtx_PLUS (Pmode, target,
11181 orig_offset)));
11182 }
11183 }
11184
11185 RTX_FRAME_RELATED_P (insn) = frame_related_p ? 1 : 0;
11186
11187 /* If this is a stack adjustment and we are generating a stack clash
11188 prologue, then add a REG_STACK_CHECK note to signal that this insn
11189 should be left alone. */
11190 if (flag_stack_clash_protection && target == stack_pointer_rtx)
11191 add_reg_note (insn, REG_STACK_CHECK, const0_rtx);
11192
11193 return insn;
11194 }
11195
11196 /* Emit a compare instruction with a volatile memory access as stack
11197 probe. It does not waste store tags and does not clobber any
11198 registers apart from the condition code. */
11199 static void
11200 s390_emit_stack_probe (rtx addr)
11201 {
11202 rtx mem = gen_rtx_MEM (word_mode, addr);
11203 MEM_VOLATILE_P (mem) = 1;
11204 emit_insn (gen_probe_stack (mem));
11205 }
11206
11207 /* Use a runtime loop if we have to emit more probes than this. */
11208 #define MIN_UNROLL_PROBES 3
11209
11210 /* Allocate SIZE bytes of stack space, using TEMP_REG as a temporary
11211 if necessary. LAST_PROBE_OFFSET contains the offset of the closest
11212 probe relative to the stack pointer.
11213
11214 Note that SIZE is negative.
11215
11216 The return value is true if TEMP_REG has been clobbered. */
11217 static bool
11218 allocate_stack_space (rtx size, HOST_WIDE_INT last_probe_offset,
11219 rtx temp_reg)
11220 {
11221 bool temp_reg_clobbered_p = false;
11222 HOST_WIDE_INT probe_interval
11223 = 1 << param_stack_clash_protection_probe_interval;
11224 HOST_WIDE_INT guard_size
11225 = 1 << param_stack_clash_protection_guard_size;
11226
11227 if (flag_stack_clash_protection)
11228 {
11229 if (last_probe_offset + -INTVAL (size) < guard_size)
11230 dump_stack_clash_frame_info (NO_PROBE_SMALL_FRAME, true);
11231 else
11232 {
11233 rtx offset = GEN_INT (probe_interval - UNITS_PER_LONG);
11234 HOST_WIDE_INT rounded_size = -INTVAL (size) & -probe_interval;
11235 HOST_WIDE_INT num_probes = rounded_size / probe_interval;
11236 HOST_WIDE_INT residual = -INTVAL (size) - rounded_size;
11237
11238 if (num_probes < MIN_UNROLL_PROBES)
11239 {
11240 /* Emit unrolled probe statements. */
11241
11242 for (unsigned int i = 0; i < num_probes; i++)
11243 {
11244 s390_prologue_plus_offset (stack_pointer_rtx,
11245 stack_pointer_rtx,
11246 GEN_INT (-probe_interval), true);
11247 s390_emit_stack_probe (gen_rtx_PLUS (Pmode,
11248 stack_pointer_rtx,
11249 offset));
11250 }
11251 if (num_probes > 0)
11252 last_probe_offset = INTVAL (offset);
11253 dump_stack_clash_frame_info (PROBE_INLINE, residual != 0);
11254 }
11255 else
11256 {
11257 /* Emit a loop probing the pages. */
11258
11259 rtx_code_label *loop_start_label = gen_label_rtx ();
11260
11261 /* From now on temp_reg will be the CFA register. */
11262 s390_prologue_plus_offset (temp_reg, stack_pointer_rtx,
11263 GEN_INT (-rounded_size), true);
11264 emit_label (loop_start_label);
11265
11266 s390_prologue_plus_offset (stack_pointer_rtx,
11267 stack_pointer_rtx,
11268 GEN_INT (-probe_interval), false);
11269 s390_emit_stack_probe (gen_rtx_PLUS (Pmode,
11270 stack_pointer_rtx,
11271 offset));
11272 emit_cmp_and_jump_insns (stack_pointer_rtx, temp_reg,
11273 GT, NULL_RTX,
11274 Pmode, 1, loop_start_label);
11275
11276 /* Without this make_edges ICEes. */
11277 JUMP_LABEL (get_last_insn ()) = loop_start_label;
11278 LABEL_NUSES (loop_start_label) = 1;
11279
11280 /* That's going to be a NOP since stack pointer and
11281 temp_reg are supposed to be the same here. We just
11282 emit it to set the CFA reg back to r15. */
11283 s390_prologue_plus_offset (stack_pointer_rtx, temp_reg,
11284 const0_rtx, true);
11285 temp_reg_clobbered_p = true;
11286 last_probe_offset = INTVAL (offset);
11287 dump_stack_clash_frame_info (PROBE_LOOP, residual != 0);
11288 }
11289
11290 /* Handle any residual allocation request. */
11291 s390_prologue_plus_offset (stack_pointer_rtx,
11292 stack_pointer_rtx,
11293 GEN_INT (-residual), true);
11294 last_probe_offset += residual;
11295 if (last_probe_offset >= probe_interval)
11296 s390_emit_stack_probe (gen_rtx_PLUS (Pmode,
11297 stack_pointer_rtx,
11298 GEN_INT (residual
11299 - UNITS_PER_LONG)));
11300
11301 return temp_reg_clobbered_p;
11302 }
11303 }
11304
11305 /* Subtract frame size from stack pointer. */
11306 s390_prologue_plus_offset (stack_pointer_rtx,
11307 stack_pointer_rtx,
11308 size, true);
11309
11310 return temp_reg_clobbered_p;
11311 }
11312
11313 /* Expand the prologue into a bunch of separate insns. */
11314
11315 void
11316 s390_emit_prologue (void)
11317 {
11318 rtx insn, addr;
11319 rtx temp_reg;
11320 int i;
11321 int offset;
11322 int next_fpr = 0;
11323
11324 /* Choose best register to use for temp use within prologue.
11325 TPF with profiling must avoid the register 14 - the tracing function
11326 needs the original contents of r14 to be preserved. */
11327
11328 if (!has_hard_reg_initial_val (Pmode, RETURN_REGNUM)
11329 && !crtl->is_leaf
11330 && !TARGET_TPF_PROFILING)
11331 temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
11332 else if (flag_split_stack && cfun->stdarg)
11333 temp_reg = gen_rtx_REG (Pmode, 12);
11334 else
11335 temp_reg = gen_rtx_REG (Pmode, 1);
11336
11337 /* When probing for stack-clash mitigation, we have to track the distance
11338 between the stack pointer and closest known reference.
11339
11340 Most of the time we have to make a worst case assumption. The
11341 only exception is when TARGET_BACKCHAIN is active, in which case
11342 we know *sp (offset 0) was written. */
11343 HOST_WIDE_INT probe_interval
11344 = 1 << param_stack_clash_protection_probe_interval;
11345 HOST_WIDE_INT last_probe_offset
11346 = (TARGET_BACKCHAIN
11347 ? (TARGET_PACKED_STACK ? STACK_POINTER_OFFSET - UNITS_PER_LONG : 0)
11348 : probe_interval - (STACK_BOUNDARY / UNITS_PER_WORD));
11349
11350 s390_save_gprs_to_fprs ();
11351
11352 /* Save call saved gprs. */
11353 if (cfun_frame_layout.first_save_gpr != -1)
11354 {
11355 insn = save_gprs (stack_pointer_rtx,
11356 cfun_frame_layout.gprs_offset +
11357 UNITS_PER_LONG * (cfun_frame_layout.first_save_gpr
11358 - cfun_frame_layout.first_save_gpr_slot),
11359 cfun_frame_layout.first_save_gpr,
11360 cfun_frame_layout.last_save_gpr);
11361
11362 /* This is not 100% correct. If we have more than one register saved,
11363 then LAST_PROBE_OFFSET can move even closer to sp. */
11364 last_probe_offset
11365 = (cfun_frame_layout.gprs_offset +
11366 UNITS_PER_LONG * (cfun_frame_layout.first_save_gpr
11367 - cfun_frame_layout.first_save_gpr_slot));
11368
11369 emit_insn (insn);
11370 }
11371
11372 /* Dummy insn to mark literal pool slot. */
11373
11374 if (cfun->machine->base_reg)
11375 emit_insn (gen_main_pool (cfun->machine->base_reg));
11376
11377 offset = cfun_frame_layout.f0_offset;
11378
11379 /* Save f0 and f2. */
11380 for (i = FPR0_REGNUM; i <= FPR0_REGNUM + 1; i++)
11381 {
11382 if (cfun_fpr_save_p (i))
11383 {
11384 save_fpr (stack_pointer_rtx, offset, i);
11385 if (offset < last_probe_offset)
11386 last_probe_offset = offset;
11387 offset += 8;
11388 }
11389 else if (!TARGET_PACKED_STACK || cfun->stdarg)
11390 offset += 8;
11391 }
11392
11393 /* Save f4 and f6. */
11394 offset = cfun_frame_layout.f4_offset;
11395 for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
11396 {
11397 if (cfun_fpr_save_p (i))
11398 {
11399 insn = save_fpr (stack_pointer_rtx, offset, i);
11400 if (offset < last_probe_offset)
11401 last_probe_offset = offset;
11402 offset += 8;
11403
11404 /* If f4 and f6 are call clobbered they are saved due to
11405 stdargs and therefore are not frame related. */
11406 if (!call_used_regs[i])
11407 RTX_FRAME_RELATED_P (insn) = 1;
11408 }
11409 else if (!TARGET_PACKED_STACK || call_used_regs[i])
11410 offset += 8;
11411 }
11412
11413 if (TARGET_PACKED_STACK
11414 && cfun_save_high_fprs_p
11415 && cfun_frame_layout.f8_offset + cfun_frame_layout.high_fprs * 8 > 0)
11416 {
11417 offset = (cfun_frame_layout.f8_offset
11418 + (cfun_frame_layout.high_fprs - 1) * 8);
11419
11420 for (i = FPR15_REGNUM; i >= FPR8_REGNUM && offset >= 0; i--)
11421 if (cfun_fpr_save_p (i))
11422 {
11423 insn = save_fpr (stack_pointer_rtx, offset, i);
11424 if (offset < last_probe_offset)
11425 last_probe_offset = offset;
11426
11427 RTX_FRAME_RELATED_P (insn) = 1;
11428 offset -= 8;
11429 }
11430 if (offset >= cfun_frame_layout.f8_offset)
11431 next_fpr = i;
11432 }
11433
11434 if (!TARGET_PACKED_STACK)
11435 next_fpr = cfun_save_high_fprs_p ? FPR15_REGNUM : 0;
11436
11437 if (flag_stack_usage_info)
11438 current_function_static_stack_size = cfun_frame_layout.frame_size;
11439
11440 /* Decrement stack pointer. */
11441
11442 if (cfun_frame_layout.frame_size > 0)
11443 {
11444 rtx frame_off = GEN_INT (-cfun_frame_layout.frame_size);
11445 rtx_insn *stack_pointer_backup_loc;
11446 bool temp_reg_clobbered_p;
11447
11448 if (s390_stack_size)
11449 {
11450 HOST_WIDE_INT stack_guard;
11451
11452 if (s390_stack_guard)
11453 stack_guard = s390_stack_guard;
11454 else
11455 {
11456 /* If no value for stack guard is provided the smallest power of 2
11457 larger than the current frame size is chosen. */
11458 stack_guard = 1;
11459 while (stack_guard < cfun_frame_layout.frame_size)
11460 stack_guard <<= 1;
11461 }
11462
11463 if (cfun_frame_layout.frame_size >= s390_stack_size)
11464 {
11465 warning (0, "frame size of function %qs is %wd"
11466 " bytes exceeding user provided stack limit of "
11467 "%d bytes. "
11468 "An unconditional trap is added.",
11469 current_function_name(), cfun_frame_layout.frame_size,
11470 s390_stack_size);
11471 emit_insn (gen_trap ());
11472 emit_barrier ();
11473 }
11474 else
11475 {
11476 /* stack_guard has to be smaller than s390_stack_size.
11477 Otherwise we would emit an AND with zero which would
11478 not match the test under mask pattern. */
11479 if (stack_guard >= s390_stack_size)
11480 {
11481 warning (0, "frame size of function %qs is %wd"
11482 " bytes which is more than half the stack size. "
11483 "The dynamic check would not be reliable. "
11484 "No check emitted for this function.",
11485 current_function_name(),
11486 cfun_frame_layout.frame_size);
11487 }
11488 else
11489 {
11490 HOST_WIDE_INT stack_check_mask = ((s390_stack_size - 1)
11491 & ~(stack_guard - 1));
11492
11493 rtx t = gen_rtx_AND (Pmode, stack_pointer_rtx,
11494 GEN_INT (stack_check_mask));
11495 if (TARGET_64BIT)
11496 emit_insn (gen_ctrapdi4 (gen_rtx_EQ (VOIDmode,
11497 t, const0_rtx),
11498 t, const0_rtx, const0_rtx));
11499 else
11500 emit_insn (gen_ctrapsi4 (gen_rtx_EQ (VOIDmode,
11501 t, const0_rtx),
11502 t, const0_rtx, const0_rtx));
11503 }
11504 }
11505 }
11506
11507 if (s390_warn_framesize > 0
11508 && cfun_frame_layout.frame_size >= s390_warn_framesize)
11509 warning (0, "frame size of %qs is %wd bytes",
11510 current_function_name (), cfun_frame_layout.frame_size);
11511
11512 if (s390_warn_dynamicstack_p && cfun->calls_alloca)
11513 warning (0, "%qs uses dynamic stack allocation", current_function_name ());
11514
11515 /* Save the location where we could backup the incoming stack
11516 pointer. */
11517 stack_pointer_backup_loc = get_last_insn ();
11518
11519 temp_reg_clobbered_p = allocate_stack_space (frame_off, last_probe_offset,
11520 temp_reg);
11521
11522 if (TARGET_BACKCHAIN || next_fpr)
11523 {
11524 if (temp_reg_clobbered_p)
11525 {
11526 /* allocate_stack_space had to make use of temp_reg and
11527 we need it to hold a backup of the incoming stack
11528 pointer. Calculate back that value from the current
11529 stack pointer. */
11530 s390_prologue_plus_offset (temp_reg, stack_pointer_rtx,
11531 GEN_INT (cfun_frame_layout.frame_size),
11532 false);
11533 }
11534 else
11535 {
11536 /* allocate_stack_space didn't actually required
11537 temp_reg. Insert the stack pointer backup insn
11538 before the stack pointer decrement code - knowing now
11539 that the value will survive. */
11540 emit_insn_after (gen_move_insn (temp_reg, stack_pointer_rtx),
11541 stack_pointer_backup_loc);
11542 }
11543 }
11544
11545 /* Set backchain. */
11546
11547 if (TARGET_BACKCHAIN)
11548 {
11549 if (cfun_frame_layout.backchain_offset)
11550 addr = gen_rtx_MEM (Pmode,
11551 plus_constant (Pmode, stack_pointer_rtx,
11552 cfun_frame_layout.backchain_offset));
11553 else
11554 addr = gen_rtx_MEM (Pmode, stack_pointer_rtx);
11555 set_mem_alias_set (addr, get_frame_alias_set ());
11556 insn = emit_insn (gen_move_insn (addr, temp_reg));
11557 }
11558
11559 /* If we support non-call exceptions (e.g. for Java),
11560 we need to make sure the backchain pointer is set up
11561 before any possibly trapping memory access. */
11562 if (TARGET_BACKCHAIN && cfun->can_throw_non_call_exceptions)
11563 {
11564 addr = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
11565 emit_clobber (addr);
11566 }
11567 }
11568 else if (flag_stack_clash_protection)
11569 dump_stack_clash_frame_info (NO_PROBE_NO_FRAME, false);
11570
11571 /* Save fprs 8 - 15 (64 bit ABI). */
11572
11573 if (cfun_save_high_fprs_p && next_fpr)
11574 {
11575 /* If the stack might be accessed through a different register
11576 we have to make sure that the stack pointer decrement is not
11577 moved below the use of the stack slots. */
11578 s390_emit_stack_tie ();
11579
11580 insn = emit_insn (gen_add2_insn (temp_reg,
11581 GEN_INT (cfun_frame_layout.f8_offset)));
11582
11583 offset = 0;
11584
11585 for (i = FPR8_REGNUM; i <= next_fpr; i++)
11586 if (cfun_fpr_save_p (i))
11587 {
11588 rtx addr = plus_constant (Pmode, stack_pointer_rtx,
11589 cfun_frame_layout.frame_size
11590 + cfun_frame_layout.f8_offset
11591 + offset);
11592
11593 insn = save_fpr (temp_reg, offset, i);
11594 offset += 8;
11595 RTX_FRAME_RELATED_P (insn) = 1;
11596 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11597 gen_rtx_SET (gen_rtx_MEM (DFmode, addr),
11598 gen_rtx_REG (DFmode, i)));
11599 }
11600 }
11601
11602 /* Set frame pointer, if needed. */
11603
11604 if (frame_pointer_needed)
11605 {
11606 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
11607 RTX_FRAME_RELATED_P (insn) = 1;
11608 }
11609
11610 /* Set up got pointer, if needed. */
11611
11612 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
11613 {
11614 rtx_insn *insns = s390_load_got ();
11615
11616 for (rtx_insn *insn = insns; insn; insn = NEXT_INSN (insn))
11617 annotate_constant_pool_refs (insn);
11618
11619 emit_insn (insns);
11620 }
11621
11622 #if TARGET_TPF != 0
11623 if (TARGET_TPF_PROFILING)
11624 {
11625 /* Generate a BAS instruction to serve as a function entry
11626 intercept to facilitate the use of tracing algorithms located
11627 at the branch target. */
11628 emit_insn (gen_prologue_tpf (
11629 GEN_INT (s390_tpf_trace_hook_prologue_check),
11630 GEN_INT (s390_tpf_trace_hook_prologue_target)));
11631
11632 /* Emit a blockage here so that all code lies between the
11633 profiling mechanisms. */
11634 emit_insn (gen_blockage ());
11635 }
11636 #endif
11637 }
11638
11639 /* Expand the epilogue into a bunch of separate insns. */
11640
11641 void
11642 s390_emit_epilogue (bool sibcall)
11643 {
11644 rtx frame_pointer, return_reg = NULL_RTX, cfa_restores = NULL_RTX;
11645 int area_bottom, area_top, offset = 0;
11646 int next_offset;
11647 int i;
11648
11649 #if TARGET_TPF != 0
11650 if (TARGET_TPF_PROFILING)
11651 {
11652 /* Generate a BAS instruction to serve as a function entry
11653 intercept to facilitate the use of tracing algorithms located
11654 at the branch target. */
11655
11656 /* Emit a blockage here so that all code lies between the
11657 profiling mechanisms. */
11658 emit_insn (gen_blockage ());
11659
11660 emit_insn (gen_epilogue_tpf (
11661 GEN_INT (s390_tpf_trace_hook_epilogue_check),
11662 GEN_INT (s390_tpf_trace_hook_epilogue_target)));
11663 }
11664 #endif
11665
11666 /* Check whether to use frame or stack pointer for restore. */
11667
11668 frame_pointer = (frame_pointer_needed
11669 ? hard_frame_pointer_rtx : stack_pointer_rtx);
11670
11671 s390_frame_area (&area_bottom, &area_top);
11672
11673 /* Check whether we can access the register save area.
11674 If not, increment the frame pointer as required. */
11675
11676 if (area_top <= area_bottom)
11677 {
11678 /* Nothing to restore. */
11679 }
11680 else if (DISP_IN_RANGE (cfun_frame_layout.frame_size + area_bottom)
11681 && DISP_IN_RANGE (cfun_frame_layout.frame_size + area_top - 1))
11682 {
11683 /* Area is in range. */
11684 offset = cfun_frame_layout.frame_size;
11685 }
11686 else
11687 {
11688 rtx_insn *insn;
11689 rtx frame_off, cfa;
11690
11691 offset = area_bottom < 0 ? -area_bottom : 0;
11692 frame_off = GEN_INT (cfun_frame_layout.frame_size - offset);
11693
11694 cfa = gen_rtx_SET (frame_pointer,
11695 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
11696 if (DISP_IN_RANGE (INTVAL (frame_off)))
11697 {
11698 rtx set;
11699
11700 set = gen_rtx_SET (frame_pointer,
11701 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
11702 insn = emit_insn (set);
11703 }
11704 else
11705 {
11706 if (!CONST_OK_FOR_K (INTVAL (frame_off)))
11707 frame_off = force_const_mem (Pmode, frame_off);
11708
11709 insn = emit_insn (gen_add2_insn (frame_pointer, frame_off));
11710 annotate_constant_pool_refs (insn);
11711 }
11712 add_reg_note (insn, REG_CFA_ADJUST_CFA, cfa);
11713 RTX_FRAME_RELATED_P (insn) = 1;
11714 }
11715
11716 /* Restore call saved fprs. */
11717
11718 if (TARGET_64BIT)
11719 {
11720 if (cfun_save_high_fprs_p)
11721 {
11722 next_offset = cfun_frame_layout.f8_offset;
11723 for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
11724 {
11725 if (cfun_fpr_save_p (i))
11726 {
11727 restore_fpr (frame_pointer,
11728 offset + next_offset, i);
11729 cfa_restores
11730 = alloc_reg_note (REG_CFA_RESTORE,
11731 gen_rtx_REG (DFmode, i), cfa_restores);
11732 next_offset += 8;
11733 }
11734 }
11735 }
11736
11737 }
11738 else
11739 {
11740 next_offset = cfun_frame_layout.f4_offset;
11741 /* f4, f6 */
11742 for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
11743 {
11744 if (cfun_fpr_save_p (i))
11745 {
11746 restore_fpr (frame_pointer,
11747 offset + next_offset, i);
11748 cfa_restores
11749 = alloc_reg_note (REG_CFA_RESTORE,
11750 gen_rtx_REG (DFmode, i), cfa_restores);
11751 next_offset += 8;
11752 }
11753 else if (!TARGET_PACKED_STACK)
11754 next_offset += 8;
11755 }
11756
11757 }
11758
11759 /* Restore call saved gprs. */
11760
11761 if (cfun_frame_layout.first_restore_gpr != -1)
11762 {
11763 rtx insn, addr;
11764 int i;
11765
11766 /* Check for global register and save them
11767 to stack location from where they get restored. */
11768
11769 for (i = cfun_frame_layout.first_restore_gpr;
11770 i <= cfun_frame_layout.last_restore_gpr;
11771 i++)
11772 {
11773 if (global_not_special_regno_p (i))
11774 {
11775 addr = plus_constant (Pmode, frame_pointer,
11776 offset + cfun_frame_layout.gprs_offset
11777 + (i - cfun_frame_layout.first_save_gpr_slot)
11778 * UNITS_PER_LONG);
11779 addr = gen_rtx_MEM (Pmode, addr);
11780 set_mem_alias_set (addr, get_frame_alias_set ());
11781 emit_move_insn (addr, gen_rtx_REG (Pmode, i));
11782 }
11783 else
11784 cfa_restores
11785 = alloc_reg_note (REG_CFA_RESTORE,
11786 gen_rtx_REG (Pmode, i), cfa_restores);
11787 }
11788
11789 /* Fetch return address from stack before load multiple,
11790 this will do good for scheduling.
11791
11792 Only do this if we already decided that r14 needs to be
11793 saved to a stack slot. (And not just because r14 happens to
11794 be in between two GPRs which need saving.) Otherwise it
11795 would be difficult to take that decision back in
11796 s390_optimize_prologue.
11797
11798 This optimization is only helpful on in-order machines. */
11799 if (! sibcall
11800 && cfun_gpr_save_slot (RETURN_REGNUM) == SAVE_SLOT_STACK
11801 && s390_tune <= PROCESSOR_2097_Z10)
11802 {
11803 int return_regnum = find_unused_clobbered_reg();
11804 if (!return_regnum
11805 || (TARGET_INDIRECT_BRANCH_NOBP_RET_OPTION
11806 && !TARGET_CPU_Z10
11807 && return_regnum == INDIRECT_BRANCH_THUNK_REGNUM))
11808 {
11809 gcc_assert (INDIRECT_BRANCH_THUNK_REGNUM != 4);
11810 return_regnum = 4;
11811 }
11812 return_reg = gen_rtx_REG (Pmode, return_regnum);
11813
11814 addr = plus_constant (Pmode, frame_pointer,
11815 offset + cfun_frame_layout.gprs_offset
11816 + (RETURN_REGNUM
11817 - cfun_frame_layout.first_save_gpr_slot)
11818 * UNITS_PER_LONG);
11819 addr = gen_rtx_MEM (Pmode, addr);
11820 set_mem_alias_set (addr, get_frame_alias_set ());
11821 emit_move_insn (return_reg, addr);
11822
11823 /* Once we did that optimization we have to make sure
11824 s390_optimize_prologue does not try to remove the store
11825 of r14 since we will not be able to find the load issued
11826 here. */
11827 cfun_frame_layout.save_return_addr_p = true;
11828 }
11829
11830 insn = restore_gprs (frame_pointer,
11831 offset + cfun_frame_layout.gprs_offset
11832 + (cfun_frame_layout.first_restore_gpr
11833 - cfun_frame_layout.first_save_gpr_slot)
11834 * UNITS_PER_LONG,
11835 cfun_frame_layout.first_restore_gpr,
11836 cfun_frame_layout.last_restore_gpr);
11837 insn = emit_insn (insn);
11838 REG_NOTES (insn) = cfa_restores;
11839 add_reg_note (insn, REG_CFA_DEF_CFA,
11840 plus_constant (Pmode, stack_pointer_rtx,
11841 STACK_POINTER_OFFSET));
11842 RTX_FRAME_RELATED_P (insn) = 1;
11843 }
11844
11845 s390_restore_gprs_from_fprs ();
11846
11847 if (! sibcall)
11848 {
11849 if (!return_reg && !s390_can_use_return_insn ())
11850 /* We planned to emit (return), be we are not allowed to. */
11851 return_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
11852
11853 if (return_reg)
11854 /* Emit (return) and (use). */
11855 emit_jump_insn (gen_return_use (return_reg));
11856 else
11857 /* The fact that RETURN_REGNUM is used is already reflected by
11858 EPILOGUE_USES. Emit plain (return). */
11859 emit_jump_insn (gen_return ());
11860 }
11861 }
11862
11863 /* Implement TARGET_SET_UP_BY_PROLOGUE. */
11864
11865 static void
11866 s300_set_up_by_prologue (hard_reg_set_container *regs)
11867 {
11868 if (cfun->machine->base_reg
11869 && !call_used_regs[REGNO (cfun->machine->base_reg)])
11870 SET_HARD_REG_BIT (regs->set, REGNO (cfun->machine->base_reg));
11871 }
11872
11873 /* -fsplit-stack support. */
11874
11875 /* A SYMBOL_REF for __morestack. */
11876 static GTY(()) rtx morestack_ref;
11877
11878 /* When using -fsplit-stack, the allocation routines set a field in
11879 the TCB to the bottom of the stack plus this much space, measured
11880 in bytes. */
11881
11882 #define SPLIT_STACK_AVAILABLE 1024
11883
11884 /* Emit the parmblock for __morestack into .rodata section. It
11885 consists of 3 pointer size entries:
11886 - frame size
11887 - size of stack arguments
11888 - offset between parm block and __morestack return label */
11889
11890 void
11891 s390_output_split_stack_data (rtx parm_block, rtx call_done,
11892 rtx frame_size, rtx args_size)
11893 {
11894 rtx ops[] = { parm_block, call_done };
11895
11896 switch_to_section (targetm.asm_out.function_rodata_section
11897 (current_function_decl, false));
11898
11899 if (TARGET_64BIT)
11900 output_asm_insn (".align\t8", NULL);
11901 else
11902 output_asm_insn (".align\t4", NULL);
11903
11904 (*targetm.asm_out.internal_label) (asm_out_file, "L",
11905 CODE_LABEL_NUMBER (parm_block));
11906 if (TARGET_64BIT)
11907 {
11908 output_asm_insn (".quad\t%0", &frame_size);
11909 output_asm_insn (".quad\t%0", &args_size);
11910 output_asm_insn (".quad\t%1-%0", ops);
11911 }
11912 else
11913 {
11914 output_asm_insn (".long\t%0", &frame_size);
11915 output_asm_insn (".long\t%0", &args_size);
11916 output_asm_insn (".long\t%1-%0", ops);
11917 }
11918
11919 switch_to_section (current_function_section ());
11920 }
11921
11922 /* Emit -fsplit-stack prologue, which goes before the regular function
11923 prologue. */
11924
11925 void
11926 s390_expand_split_stack_prologue (void)
11927 {
11928 rtx r1, guard, cc = NULL;
11929 rtx_insn *insn;
11930 /* Offset from thread pointer to __private_ss. */
11931 int psso = TARGET_64BIT ? 0x38 : 0x20;
11932 /* Pointer size in bytes. */
11933 /* Frame size and argument size - the two parameters to __morestack. */
11934 HOST_WIDE_INT frame_size = cfun_frame_layout.frame_size;
11935 /* Align argument size to 8 bytes - simplifies __morestack code. */
11936 HOST_WIDE_INT args_size = crtl->args.size >= 0
11937 ? ((crtl->args.size + 7) & ~7)
11938 : 0;
11939 /* Label to be called by __morestack. */
11940 rtx_code_label *call_done = NULL;
11941 rtx_code_label *parm_base = NULL;
11942 rtx tmp;
11943
11944 gcc_assert (flag_split_stack && reload_completed);
11945
11946 r1 = gen_rtx_REG (Pmode, 1);
11947
11948 /* If no stack frame will be allocated, don't do anything. */
11949 if (!frame_size)
11950 {
11951 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11952 {
11953 /* If va_start is used, just use r15. */
11954 emit_move_insn (r1,
11955 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11956 GEN_INT (STACK_POINTER_OFFSET)));
11957
11958 }
11959 return;
11960 }
11961
11962 if (morestack_ref == NULL_RTX)
11963 {
11964 morestack_ref = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11965 SYMBOL_REF_FLAGS (morestack_ref) |= (SYMBOL_FLAG_LOCAL
11966 | SYMBOL_FLAG_FUNCTION);
11967 }
11968
11969 if (CONST_OK_FOR_K (frame_size) || CONST_OK_FOR_Op (frame_size))
11970 {
11971 /* If frame_size will fit in an add instruction, do a stack space
11972 check, and only call __morestack if there's not enough space. */
11973
11974 /* Get thread pointer. r1 is the only register we can always destroy - r0
11975 could contain a static chain (and cannot be used to address memory
11976 anyway), r2-r6 can contain parameters, and r6-r15 are callee-saved. */
11977 emit_insn (gen_get_thread_pointer (Pmode, r1));
11978 /* Aim at __private_ss. */
11979 guard = gen_rtx_MEM (Pmode, plus_constant (Pmode, r1, psso));
11980
11981 /* If less that 1kiB used, skip addition and compare directly with
11982 __private_ss. */
11983 if (frame_size > SPLIT_STACK_AVAILABLE)
11984 {
11985 emit_move_insn (r1, guard);
11986 if (TARGET_64BIT)
11987 emit_insn (gen_adddi3 (r1, r1, GEN_INT (frame_size)));
11988 else
11989 emit_insn (gen_addsi3 (r1, r1, GEN_INT (frame_size)));
11990 guard = r1;
11991 }
11992
11993 /* Compare the (maybe adjusted) guard with the stack pointer. */
11994 cc = s390_emit_compare (LT, stack_pointer_rtx, guard);
11995 }
11996
11997 call_done = gen_label_rtx ();
11998 parm_base = gen_label_rtx ();
11999 LABEL_NUSES (parm_base)++;
12000 LABEL_NUSES (call_done)++;
12001
12002 /* %r1 = litbase. */
12003 insn = emit_move_insn (r1, gen_rtx_LABEL_REF (VOIDmode, parm_base));
12004 add_reg_note (insn, REG_LABEL_OPERAND, parm_base);
12005 LABEL_NUSES (parm_base)++;
12006
12007 /* Now, we need to call __morestack. It has very special calling
12008 conventions: it preserves param/return/static chain registers for
12009 calling main function body, and looks for its own parameters at %r1. */
12010 if (cc != NULL)
12011 tmp = gen_split_stack_cond_call (Pmode,
12012 morestack_ref,
12013 parm_base,
12014 call_done,
12015 GEN_INT (frame_size),
12016 GEN_INT (args_size),
12017 cc);
12018 else
12019 tmp = gen_split_stack_call (Pmode,
12020 morestack_ref,
12021 parm_base,
12022 call_done,
12023 GEN_INT (frame_size),
12024 GEN_INT (args_size));
12025
12026 insn = emit_jump_insn (tmp);
12027 JUMP_LABEL (insn) = call_done;
12028 add_reg_note (insn, REG_LABEL_OPERAND, parm_base);
12029 add_reg_note (insn, REG_LABEL_OPERAND, call_done);
12030
12031 if (cc != NULL)
12032 {
12033 /* Mark the jump as very unlikely to be taken. */
12034 add_reg_br_prob_note (insn,
12035 profile_probability::very_unlikely ());
12036
12037 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12038 {
12039 /* If va_start is used, and __morestack was not called, just use
12040 r15. */
12041 emit_move_insn (r1,
12042 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
12043 GEN_INT (STACK_POINTER_OFFSET)));
12044 }
12045 }
12046 else
12047 {
12048 emit_barrier ();
12049 }
12050
12051 /* __morestack will call us here. */
12052
12053 emit_label (call_done);
12054 }
12055
12056 /* We may have to tell the dataflow pass that the split stack prologue
12057 is initializing a register. */
12058
12059 static void
12060 s390_live_on_entry (bitmap regs)
12061 {
12062 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
12063 {
12064 gcc_assert (flag_split_stack);
12065 bitmap_set_bit (regs, 1);
12066 }
12067 }
12068
12069 /* Return true if the function can use simple_return to return outside
12070 of a shrink-wrapped region. At present shrink-wrapping is supported
12071 in all cases. */
12072
12073 bool
12074 s390_can_use_simple_return_insn (void)
12075 {
12076 return true;
12077 }
12078
12079 /* Return true if the epilogue is guaranteed to contain only a return
12080 instruction and if a direct return can therefore be used instead.
12081 One of the main advantages of using direct return instructions
12082 is that we can then use conditional returns. */
12083
12084 bool
12085 s390_can_use_return_insn (void)
12086 {
12087 int i;
12088
12089 if (!reload_completed)
12090 return false;
12091
12092 if (crtl->profile)
12093 return false;
12094
12095 if (TARGET_TPF_PROFILING)
12096 return false;
12097
12098 for (i = 0; i < 16; i++)
12099 if (cfun_gpr_save_slot (i) != SAVE_SLOT_NONE)
12100 return false;
12101
12102 /* For 31 bit this is not covered by the frame_size check below
12103 since f4, f6 are saved in the register save area without needing
12104 additional stack space. */
12105 if (!TARGET_64BIT
12106 && (cfun_fpr_save_p (FPR4_REGNUM) || cfun_fpr_save_p (FPR6_REGNUM)))
12107 return false;
12108
12109 if (cfun->machine->base_reg
12110 && !call_used_regs[REGNO (cfun->machine->base_reg)])
12111 return false;
12112
12113 return cfun_frame_layout.frame_size == 0;
12114 }
12115
12116 /* The VX ABI differs for vararg functions. Therefore we need the
12117 prototype of the callee to be available when passing vector type
12118 values. */
12119 static const char *
12120 s390_invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
12121 {
12122 return ((TARGET_VX_ABI
12123 && typelist == 0
12124 && VECTOR_TYPE_P (TREE_TYPE (val))
12125 && (funcdecl == NULL_TREE
12126 || (TREE_CODE (funcdecl) == FUNCTION_DECL
12127 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
12128 ? N_("vector argument passed to unprototyped function")
12129 : NULL);
12130 }
12131
12132
12133 /* Return the size in bytes of a function argument of
12134 type TYPE and/or mode MODE. At least one of TYPE or
12135 MODE must be specified. */
12136
12137 static int
12138 s390_function_arg_size (machine_mode mode, const_tree type)
12139 {
12140 if (type)
12141 return int_size_in_bytes (type);
12142
12143 /* No type info available for some library calls ... */
12144 if (mode != BLKmode)
12145 return GET_MODE_SIZE (mode);
12146
12147 /* If we have neither type nor mode, abort */
12148 gcc_unreachable ();
12149 }
12150
12151 /* Return true if a function argument of type TYPE and mode MODE
12152 is to be passed in a vector register, if available. */
12153
12154 bool
12155 s390_function_arg_vector (machine_mode mode, const_tree type)
12156 {
12157 if (!TARGET_VX_ABI)
12158 return false;
12159
12160 if (s390_function_arg_size (mode, type) > 16)
12161 return false;
12162
12163 /* No type info available for some library calls ... */
12164 if (!type)
12165 return VECTOR_MODE_P (mode);
12166
12167 /* The ABI says that record types with a single member are treated
12168 just like that member would be. */
12169 int empty_base_seen = 0;
12170 const_tree orig_type = type;
12171 while (TREE_CODE (type) == RECORD_TYPE)
12172 {
12173 tree field, single = NULL_TREE;
12174
12175 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
12176 {
12177 if (TREE_CODE (field) != FIELD_DECL)
12178 continue;
12179
12180 if (DECL_FIELD_ABI_IGNORED (field))
12181 {
12182 if (lookup_attribute ("no_unique_address",
12183 DECL_ATTRIBUTES (field)))
12184 empty_base_seen |= 2;
12185 else
12186 empty_base_seen |= 1;
12187 continue;
12188 }
12189
12190 if (single == NULL_TREE)
12191 single = TREE_TYPE (field);
12192 else
12193 return false;
12194 }
12195
12196 if (single == NULL_TREE)
12197 return false;
12198 else
12199 {
12200 /* If the field declaration adds extra byte due to
12201 e.g. padding this is not accepted as vector type. */
12202 if (int_size_in_bytes (single) <= 0
12203 || int_size_in_bytes (single) != int_size_in_bytes (type))
12204 return false;
12205 type = single;
12206 }
12207 }
12208
12209 if (!VECTOR_TYPE_P (type))
12210 return false;
12211
12212 if (warn_psabi && empty_base_seen)
12213 {
12214 static unsigned last_reported_type_uid;
12215 unsigned uid = TYPE_UID (TYPE_MAIN_VARIANT (orig_type));
12216 if (uid != last_reported_type_uid)
12217 {
12218 const char *url = CHANGES_ROOT_URL "gcc-10/changes.html#empty_base";
12219 last_reported_type_uid = uid;
12220 if (empty_base_seen & 1)
12221 inform (input_location,
12222 "parameter passing for argument of type %qT when C++17 "
12223 "is enabled changed to match C++14 %{in GCC 10.1%}",
12224 orig_type, url);
12225 else
12226 inform (input_location,
12227 "parameter passing for argument of type %qT with "
12228 "%<[[no_unique_address]]%> members changed "
12229 "%{in GCC 10.1%}", orig_type, url);
12230 }
12231 }
12232 return true;
12233 }
12234
12235 /* Return true if a function argument of type TYPE and mode MODE
12236 is to be passed in a floating-point register, if available. */
12237
12238 static bool
12239 s390_function_arg_float (machine_mode mode, const_tree type)
12240 {
12241 if (s390_function_arg_size (mode, type) > 8)
12242 return false;
12243
12244 /* Soft-float changes the ABI: no floating-point registers are used. */
12245 if (TARGET_SOFT_FLOAT)
12246 return false;
12247
12248 /* No type info available for some library calls ... */
12249 if (!type)
12250 return mode == SFmode || mode == DFmode || mode == SDmode || mode == DDmode;
12251
12252 /* The ABI says that record types with a single member are treated
12253 just like that member would be. */
12254 int empty_base_seen = 0;
12255 const_tree orig_type = type;
12256 while (TREE_CODE (type) == RECORD_TYPE)
12257 {
12258 tree field, single = NULL_TREE;
12259
12260 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
12261 {
12262 if (TREE_CODE (field) != FIELD_DECL)
12263 continue;
12264 if (DECL_FIELD_ABI_IGNORED (field))
12265 {
12266 if (lookup_attribute ("no_unique_address",
12267 DECL_ATTRIBUTES (field)))
12268 empty_base_seen |= 2;
12269 else
12270 empty_base_seen |= 1;
12271 continue;
12272 }
12273
12274 if (single == NULL_TREE)
12275 single = TREE_TYPE (field);
12276 else
12277 return false;
12278 }
12279
12280 if (single == NULL_TREE)
12281 return false;
12282 else
12283 type = single;
12284 }
12285
12286 if (TREE_CODE (type) != REAL_TYPE)
12287 return false;
12288
12289 if (warn_psabi && empty_base_seen)
12290 {
12291 static unsigned last_reported_type_uid;
12292 unsigned uid = TYPE_UID (TYPE_MAIN_VARIANT (orig_type));
12293 if (uid != last_reported_type_uid)
12294 {
12295 const char *url = CHANGES_ROOT_URL "gcc-10/changes.html#empty_base";
12296 last_reported_type_uid = uid;
12297 if (empty_base_seen & 1)
12298 inform (input_location,
12299 "parameter passing for argument of type %qT when C++17 "
12300 "is enabled changed to match C++14 %{in GCC 10.1%}",
12301 orig_type, url);
12302 else
12303 inform (input_location,
12304 "parameter passing for argument of type %qT with "
12305 "%<[[no_unique_address]]%> members changed "
12306 "%{in GCC 10.1%}", orig_type, url);
12307 }
12308 }
12309
12310 return true;
12311 }
12312
12313 /* Return true if a function argument of type TYPE and mode MODE
12314 is to be passed in an integer register, or a pair of integer
12315 registers, if available. */
12316
12317 static bool
12318 s390_function_arg_integer (machine_mode mode, const_tree type)
12319 {
12320 int size = s390_function_arg_size (mode, type);
12321 if (size > 8)
12322 return false;
12323
12324 /* No type info available for some library calls ... */
12325 if (!type)
12326 return GET_MODE_CLASS (mode) == MODE_INT
12327 || (TARGET_SOFT_FLOAT && SCALAR_FLOAT_MODE_P (mode));
12328
12329 /* We accept small integral (and similar) types. */
12330 if (INTEGRAL_TYPE_P (type)
12331 || POINTER_TYPE_P (type)
12332 || TREE_CODE (type) == NULLPTR_TYPE
12333 || TREE_CODE (type) == OFFSET_TYPE
12334 || (TARGET_SOFT_FLOAT && TREE_CODE (type) == REAL_TYPE))
12335 return true;
12336
12337 /* We also accept structs of size 1, 2, 4, 8 that are not
12338 passed in floating-point registers. */
12339 if (AGGREGATE_TYPE_P (type)
12340 && exact_log2 (size) >= 0
12341 && !s390_function_arg_float (mode, type))
12342 return true;
12343
12344 return false;
12345 }
12346
12347 /* Return 1 if a function argument ARG is to be passed by reference.
12348 The ABI specifies that only structures of size 1, 2, 4, or 8 bytes
12349 are passed by value, all other structures (and complex numbers) are
12350 passed by reference. */
12351
12352 static bool
12353 s390_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
12354 {
12355 int size = s390_function_arg_size (arg.mode, arg.type);
12356
12357 if (s390_function_arg_vector (arg.mode, arg.type))
12358 return false;
12359
12360 if (size > 8)
12361 return true;
12362
12363 if (tree type = arg.type)
12364 {
12365 if (AGGREGATE_TYPE_P (type) && exact_log2 (size) < 0)
12366 return true;
12367
12368 if (TREE_CODE (type) == COMPLEX_TYPE
12369 || TREE_CODE (type) == VECTOR_TYPE)
12370 return true;
12371 }
12372
12373 return false;
12374 }
12375
12376 /* Update the data in CUM to advance over argument ARG. */
12377
12378 static void
12379 s390_function_arg_advance (cumulative_args_t cum_v,
12380 const function_arg_info &arg)
12381 {
12382 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
12383
12384 if (s390_function_arg_vector (arg.mode, arg.type))
12385 {
12386 /* We are called for unnamed vector stdarg arguments which are
12387 passed on the stack. In this case this hook does not have to
12388 do anything since stack arguments are tracked by common
12389 code. */
12390 if (!arg.named)
12391 return;
12392 cum->vrs += 1;
12393 }
12394 else if (s390_function_arg_float (arg.mode, arg.type))
12395 {
12396 cum->fprs += 1;
12397 }
12398 else if (s390_function_arg_integer (arg.mode, arg.type))
12399 {
12400 int size = s390_function_arg_size (arg.mode, arg.type);
12401 cum->gprs += ((size + UNITS_PER_LONG - 1) / UNITS_PER_LONG);
12402 }
12403 else
12404 gcc_unreachable ();
12405 }
12406
12407 /* Define where to put the arguments to a function.
12408 Value is zero to push the argument on the stack,
12409 or a hard register in which to store the argument.
12410
12411 CUM is a variable of type CUMULATIVE_ARGS which gives info about
12412 the preceding args and about the function being called.
12413 ARG is a description of the argument.
12414
12415 On S/390, we use general purpose registers 2 through 6 to
12416 pass integer, pointer, and certain structure arguments, and
12417 floating point registers 0 and 2 (0, 2, 4, and 6 on 64-bit)
12418 to pass floating point arguments. All remaining arguments
12419 are pushed to the stack. */
12420
12421 static rtx
12422 s390_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
12423 {
12424 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
12425
12426 if (!arg.named)
12427 s390_check_type_for_vector_abi (arg.type, true, false);
12428
12429 if (s390_function_arg_vector (arg.mode, arg.type))
12430 {
12431 /* Vector arguments being part of the ellipsis are passed on the
12432 stack. */
12433 if (!arg.named || (cum->vrs + 1 > VEC_ARG_NUM_REG))
12434 return NULL_RTX;
12435
12436 return gen_rtx_REG (arg.mode, cum->vrs + FIRST_VEC_ARG_REGNO);
12437 }
12438 else if (s390_function_arg_float (arg.mode, arg.type))
12439 {
12440 if (cum->fprs + 1 > FP_ARG_NUM_REG)
12441 return NULL_RTX;
12442 else
12443 return gen_rtx_REG (arg.mode, cum->fprs + 16);
12444 }
12445 else if (s390_function_arg_integer (arg.mode, arg.type))
12446 {
12447 int size = s390_function_arg_size (arg.mode, arg.type);
12448 int n_gprs = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
12449
12450 if (cum->gprs + n_gprs > GP_ARG_NUM_REG)
12451 return NULL_RTX;
12452 else if (n_gprs == 1 || UNITS_PER_WORD == UNITS_PER_LONG)
12453 return gen_rtx_REG (arg.mode, cum->gprs + 2);
12454 else if (n_gprs == 2)
12455 {
12456 rtvec p = rtvec_alloc (2);
12457
12458 RTVEC_ELT (p, 0)
12459 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 2),
12460 const0_rtx);
12461 RTVEC_ELT (p, 1)
12462 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 3),
12463 GEN_INT (4));
12464
12465 return gen_rtx_PARALLEL (arg.mode, p);
12466 }
12467 }
12468
12469 /* After the real arguments, expand_call calls us once again with an
12470 end marker. Whatever we return here is passed as operand 2 to the
12471 call expanders.
12472
12473 We don't need this feature ... */
12474 else if (arg.end_marker_p ())
12475 return const0_rtx;
12476
12477 gcc_unreachable ();
12478 }
12479
12480 /* Implement TARGET_FUNCTION_ARG_BOUNDARY. Vector arguments are
12481 left-justified when placed on the stack during parameter passing. */
12482
12483 static pad_direction
12484 s390_function_arg_padding (machine_mode mode, const_tree type)
12485 {
12486 if (s390_function_arg_vector (mode, type))
12487 return PAD_UPWARD;
12488
12489 return default_function_arg_padding (mode, type);
12490 }
12491
12492 /* Return true if return values of type TYPE should be returned
12493 in a memory buffer whose address is passed by the caller as
12494 hidden first argument. */
12495
12496 static bool
12497 s390_return_in_memory (const_tree type, const_tree fundecl ATTRIBUTE_UNUSED)
12498 {
12499 /* We accept small integral (and similar) types. */
12500 if (INTEGRAL_TYPE_P (type)
12501 || POINTER_TYPE_P (type)
12502 || TREE_CODE (type) == OFFSET_TYPE
12503 || TREE_CODE (type) == REAL_TYPE)
12504 return int_size_in_bytes (type) > 8;
12505
12506 /* vector types which fit into a VR. */
12507 if (TARGET_VX_ABI
12508 && VECTOR_TYPE_P (type)
12509 && int_size_in_bytes (type) <= 16)
12510 return false;
12511
12512 /* Aggregates and similar constructs are always returned
12513 in memory. */
12514 if (AGGREGATE_TYPE_P (type)
12515 || TREE_CODE (type) == COMPLEX_TYPE
12516 || VECTOR_TYPE_P (type))
12517 return true;
12518
12519 /* ??? We get called on all sorts of random stuff from
12520 aggregate_value_p. We can't abort, but it's not clear
12521 what's safe to return. Pretend it's a struct I guess. */
12522 return true;
12523 }
12524
12525 /* Function arguments and return values are promoted to word size. */
12526
12527 static machine_mode
12528 s390_promote_function_mode (const_tree type, machine_mode mode,
12529 int *punsignedp,
12530 const_tree fntype ATTRIBUTE_UNUSED,
12531 int for_return ATTRIBUTE_UNUSED)
12532 {
12533 if (INTEGRAL_MODE_P (mode)
12534 && GET_MODE_SIZE (mode) < UNITS_PER_LONG)
12535 {
12536 if (type != NULL_TREE && POINTER_TYPE_P (type))
12537 *punsignedp = POINTERS_EXTEND_UNSIGNED;
12538 return Pmode;
12539 }
12540
12541 return mode;
12542 }
12543
12544 /* Define where to return a (scalar) value of type RET_TYPE.
12545 If RET_TYPE is null, define where to return a (scalar)
12546 value of mode MODE from a libcall. */
12547
12548 static rtx
12549 s390_function_and_libcall_value (machine_mode mode,
12550 const_tree ret_type,
12551 const_tree fntype_or_decl,
12552 bool outgoing ATTRIBUTE_UNUSED)
12553 {
12554 /* For vector return types it is important to use the RET_TYPE
12555 argument whenever available since the middle-end might have
12556 changed the mode to a scalar mode. */
12557 bool vector_ret_type_p = ((ret_type && VECTOR_TYPE_P (ret_type))
12558 || (!ret_type && VECTOR_MODE_P (mode)));
12559
12560 /* For normal functions perform the promotion as
12561 promote_function_mode would do. */
12562 if (ret_type)
12563 {
12564 int unsignedp = TYPE_UNSIGNED (ret_type);
12565 mode = promote_function_mode (ret_type, mode, &unsignedp,
12566 fntype_or_decl, 1);
12567 }
12568
12569 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
12570 || SCALAR_FLOAT_MODE_P (mode)
12571 || (TARGET_VX_ABI && vector_ret_type_p));
12572 gcc_assert (GET_MODE_SIZE (mode) <= (TARGET_VX_ABI ? 16 : 8));
12573
12574 if (TARGET_VX_ABI && vector_ret_type_p)
12575 return gen_rtx_REG (mode, FIRST_VEC_ARG_REGNO);
12576 else if (TARGET_HARD_FLOAT && SCALAR_FLOAT_MODE_P (mode))
12577 return gen_rtx_REG (mode, 16);
12578 else if (GET_MODE_SIZE (mode) <= UNITS_PER_LONG
12579 || UNITS_PER_LONG == UNITS_PER_WORD)
12580 return gen_rtx_REG (mode, 2);
12581 else if (GET_MODE_SIZE (mode) == 2 * UNITS_PER_LONG)
12582 {
12583 /* This case is triggered when returning a 64 bit value with
12584 -m31 -mzarch. Although the value would fit into a single
12585 register it has to be forced into a 32 bit register pair in
12586 order to match the ABI. */
12587 rtvec p = rtvec_alloc (2);
12588
12589 RTVEC_ELT (p, 0)
12590 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 2), const0_rtx);
12591 RTVEC_ELT (p, 1)
12592 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 3), GEN_INT (4));
12593
12594 return gen_rtx_PARALLEL (mode, p);
12595 }
12596
12597 gcc_unreachable ();
12598 }
12599
12600 /* Define where to return a scalar return value of type RET_TYPE. */
12601
12602 static rtx
12603 s390_function_value (const_tree ret_type, const_tree fn_decl_or_type,
12604 bool outgoing)
12605 {
12606 return s390_function_and_libcall_value (TYPE_MODE (ret_type), ret_type,
12607 fn_decl_or_type, outgoing);
12608 }
12609
12610 /* Define where to return a scalar libcall return value of mode
12611 MODE. */
12612
12613 static rtx
12614 s390_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
12615 {
12616 return s390_function_and_libcall_value (mode, NULL_TREE,
12617 NULL_TREE, true);
12618 }
12619
12620
12621 /* Create and return the va_list datatype.
12622
12623 On S/390, va_list is an array type equivalent to
12624
12625 typedef struct __va_list_tag
12626 {
12627 long __gpr;
12628 long __fpr;
12629 void *__overflow_arg_area;
12630 void *__reg_save_area;
12631 } va_list[1];
12632
12633 where __gpr and __fpr hold the number of general purpose
12634 or floating point arguments used up to now, respectively,
12635 __overflow_arg_area points to the stack location of the
12636 next argument passed on the stack, and __reg_save_area
12637 always points to the start of the register area in the
12638 call frame of the current function. The function prologue
12639 saves all registers used for argument passing into this
12640 area if the function uses variable arguments. */
12641
12642 static tree
12643 s390_build_builtin_va_list (void)
12644 {
12645 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
12646
12647 record = lang_hooks.types.make_type (RECORD_TYPE);
12648
12649 type_decl =
12650 build_decl (BUILTINS_LOCATION,
12651 TYPE_DECL, get_identifier ("__va_list_tag"), record);
12652
12653 f_gpr = build_decl (BUILTINS_LOCATION,
12654 FIELD_DECL, get_identifier ("__gpr"),
12655 long_integer_type_node);
12656 f_fpr = build_decl (BUILTINS_LOCATION,
12657 FIELD_DECL, get_identifier ("__fpr"),
12658 long_integer_type_node);
12659 f_ovf = build_decl (BUILTINS_LOCATION,
12660 FIELD_DECL, get_identifier ("__overflow_arg_area"),
12661 ptr_type_node);
12662 f_sav = build_decl (BUILTINS_LOCATION,
12663 FIELD_DECL, get_identifier ("__reg_save_area"),
12664 ptr_type_node);
12665
12666 va_list_gpr_counter_field = f_gpr;
12667 va_list_fpr_counter_field = f_fpr;
12668
12669 DECL_FIELD_CONTEXT (f_gpr) = record;
12670 DECL_FIELD_CONTEXT (f_fpr) = record;
12671 DECL_FIELD_CONTEXT (f_ovf) = record;
12672 DECL_FIELD_CONTEXT (f_sav) = record;
12673
12674 TYPE_STUB_DECL (record) = type_decl;
12675 TYPE_NAME (record) = type_decl;
12676 TYPE_FIELDS (record) = f_gpr;
12677 DECL_CHAIN (f_gpr) = f_fpr;
12678 DECL_CHAIN (f_fpr) = f_ovf;
12679 DECL_CHAIN (f_ovf) = f_sav;
12680
12681 layout_type (record);
12682
12683 /* The correct type is an array type of one element. */
12684 return build_array_type (record, build_index_type (size_zero_node));
12685 }
12686
12687 /* Implement va_start by filling the va_list structure VALIST.
12688 STDARG_P is always true, and ignored.
12689 NEXTARG points to the first anonymous stack argument.
12690
12691 The following global variables are used to initialize
12692 the va_list structure:
12693
12694 crtl->args.info:
12695 holds number of gprs and fprs used for named arguments.
12696 crtl->args.arg_offset_rtx:
12697 holds the offset of the first anonymous stack argument
12698 (relative to the virtual arg pointer). */
12699
12700 static void
12701 s390_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
12702 {
12703 HOST_WIDE_INT n_gpr, n_fpr;
12704 int off;
12705 tree f_gpr, f_fpr, f_ovf, f_sav;
12706 tree gpr, fpr, ovf, sav, t;
12707
12708 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
12709 f_fpr = DECL_CHAIN (f_gpr);
12710 f_ovf = DECL_CHAIN (f_fpr);
12711 f_sav = DECL_CHAIN (f_ovf);
12712
12713 valist = build_simple_mem_ref (valist);
12714 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
12715 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
12716 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
12717 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
12718
12719 /* Count number of gp and fp argument registers used. */
12720
12721 n_gpr = crtl->args.info.gprs;
12722 n_fpr = crtl->args.info.fprs;
12723
12724 if (cfun->va_list_gpr_size)
12725 {
12726 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
12727 build_int_cst (NULL_TREE, n_gpr));
12728 TREE_SIDE_EFFECTS (t) = 1;
12729 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12730 }
12731
12732 if (cfun->va_list_fpr_size)
12733 {
12734 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
12735 build_int_cst (NULL_TREE, n_fpr));
12736 TREE_SIDE_EFFECTS (t) = 1;
12737 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12738 }
12739
12740 if (flag_split_stack
12741 && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl))
12742 == NULL)
12743 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
12744 {
12745 rtx reg;
12746 rtx_insn *seq;
12747
12748 reg = gen_reg_rtx (Pmode);
12749 cfun->machine->split_stack_varargs_pointer = reg;
12750
12751 start_sequence ();
12752 emit_move_insn (reg, gen_rtx_REG (Pmode, 1));
12753 seq = get_insns ();
12754 end_sequence ();
12755
12756 push_topmost_sequence ();
12757 emit_insn_after (seq, entry_of_function ());
12758 pop_topmost_sequence ();
12759 }
12760
12761 /* Find the overflow area.
12762 FIXME: This currently is too pessimistic when the vector ABI is
12763 enabled. In that case we *always* set up the overflow area
12764 pointer. */
12765 if (n_gpr + cfun->va_list_gpr_size > GP_ARG_NUM_REG
12766 || n_fpr + cfun->va_list_fpr_size > FP_ARG_NUM_REG
12767 || TARGET_VX_ABI)
12768 {
12769 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
12770 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
12771 else
12772 t = make_tree (TREE_TYPE (ovf), cfun->machine->split_stack_varargs_pointer);
12773
12774 off = INTVAL (crtl->args.arg_offset_rtx);
12775 off = off < 0 ? 0 : off;
12776 if (TARGET_DEBUG_ARG)
12777 fprintf (stderr, "va_start: n_gpr = %d, n_fpr = %d off %d\n",
12778 (int)n_gpr, (int)n_fpr, off);
12779
12780 t = fold_build_pointer_plus_hwi (t, off);
12781
12782 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
12783 TREE_SIDE_EFFECTS (t) = 1;
12784 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12785 }
12786
12787 /* Find the register save area. */
12788 if ((cfun->va_list_gpr_size && n_gpr < GP_ARG_NUM_REG)
12789 || (cfun->va_list_fpr_size && n_fpr < FP_ARG_NUM_REG))
12790 {
12791 t = make_tree (TREE_TYPE (sav), return_address_pointer_rtx);
12792 t = fold_build_pointer_plus_hwi (t, -RETURN_REGNUM * UNITS_PER_LONG);
12793
12794 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
12795 TREE_SIDE_EFFECTS (t) = 1;
12796 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12797 }
12798 }
12799
12800 /* Implement va_arg by updating the va_list structure
12801 VALIST as required to retrieve an argument of type
12802 TYPE, and returning that argument.
12803
12804 Generates code equivalent to:
12805
12806 if (integral value) {
12807 if (size <= 4 && args.gpr < 5 ||
12808 size > 4 && args.gpr < 4 )
12809 ret = args.reg_save_area[args.gpr+8]
12810 else
12811 ret = *args.overflow_arg_area++;
12812 } else if (vector value) {
12813 ret = *args.overflow_arg_area;
12814 args.overflow_arg_area += size / 8;
12815 } else if (float value) {
12816 if (args.fgpr < 2)
12817 ret = args.reg_save_area[args.fpr+64]
12818 else
12819 ret = *args.overflow_arg_area++;
12820 } else if (aggregate value) {
12821 if (args.gpr < 5)
12822 ret = *args.reg_save_area[args.gpr]
12823 else
12824 ret = **args.overflow_arg_area++;
12825 } */
12826
12827 static tree
12828 s390_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
12829 gimple_seq *post_p ATTRIBUTE_UNUSED)
12830 {
12831 tree f_gpr, f_fpr, f_ovf, f_sav;
12832 tree gpr, fpr, ovf, sav, reg, t, u;
12833 int indirect_p, size, n_reg, sav_ofs, sav_scale, max_reg;
12834 tree lab_false, lab_over = NULL_TREE;
12835 tree addr = create_tmp_var (ptr_type_node, "addr");
12836 bool left_align_p; /* How a value < UNITS_PER_LONG is aligned within
12837 a stack slot. */
12838
12839 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
12840 f_fpr = DECL_CHAIN (f_gpr);
12841 f_ovf = DECL_CHAIN (f_fpr);
12842 f_sav = DECL_CHAIN (f_ovf);
12843
12844 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
12845 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
12846 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
12847
12848 /* The tree for args* cannot be shared between gpr/fpr and ovf since
12849 both appear on a lhs. */
12850 valist = unshare_expr (valist);
12851 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
12852
12853 size = int_size_in_bytes (type);
12854
12855 s390_check_type_for_vector_abi (type, true, false);
12856
12857 if (pass_va_arg_by_reference (type))
12858 {
12859 if (TARGET_DEBUG_ARG)
12860 {
12861 fprintf (stderr, "va_arg: aggregate type");
12862 debug_tree (type);
12863 }
12864
12865 /* Aggregates are passed by reference. */
12866 indirect_p = 1;
12867 reg = gpr;
12868 n_reg = 1;
12869
12870 /* kernel stack layout on 31 bit: It is assumed here that no padding
12871 will be added by s390_frame_info because for va_args always an even
12872 number of gprs has to be saved r15-r2 = 14 regs. */
12873 sav_ofs = 2 * UNITS_PER_LONG;
12874 sav_scale = UNITS_PER_LONG;
12875 size = UNITS_PER_LONG;
12876 max_reg = GP_ARG_NUM_REG - n_reg;
12877 left_align_p = false;
12878 }
12879 else if (s390_function_arg_vector (TYPE_MODE (type), type))
12880 {
12881 if (TARGET_DEBUG_ARG)
12882 {
12883 fprintf (stderr, "va_arg: vector type");
12884 debug_tree (type);
12885 }
12886
12887 indirect_p = 0;
12888 reg = NULL_TREE;
12889 n_reg = 0;
12890 sav_ofs = 0;
12891 sav_scale = 8;
12892 max_reg = 0;
12893 left_align_p = true;
12894 }
12895 else if (s390_function_arg_float (TYPE_MODE (type), type))
12896 {
12897 if (TARGET_DEBUG_ARG)
12898 {
12899 fprintf (stderr, "va_arg: float type");
12900 debug_tree (type);
12901 }
12902
12903 /* FP args go in FP registers, if present. */
12904 indirect_p = 0;
12905 reg = fpr;
12906 n_reg = 1;
12907 sav_ofs = 16 * UNITS_PER_LONG;
12908 sav_scale = 8;
12909 max_reg = FP_ARG_NUM_REG - n_reg;
12910 left_align_p = false;
12911 }
12912 else
12913 {
12914 if (TARGET_DEBUG_ARG)
12915 {
12916 fprintf (stderr, "va_arg: other type");
12917 debug_tree (type);
12918 }
12919
12920 /* Otherwise into GP registers. */
12921 indirect_p = 0;
12922 reg = gpr;
12923 n_reg = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
12924
12925 /* kernel stack layout on 31 bit: It is assumed here that no padding
12926 will be added by s390_frame_info because for va_args always an even
12927 number of gprs has to be saved r15-r2 = 14 regs. */
12928 sav_ofs = 2 * UNITS_PER_LONG;
12929
12930 if (size < UNITS_PER_LONG)
12931 sav_ofs += UNITS_PER_LONG - size;
12932
12933 sav_scale = UNITS_PER_LONG;
12934 max_reg = GP_ARG_NUM_REG - n_reg;
12935 left_align_p = false;
12936 }
12937
12938 /* Pull the value out of the saved registers ... */
12939
12940 if (reg != NULL_TREE)
12941 {
12942 /*
12943 if (reg > ((typeof (reg))max_reg))
12944 goto lab_false;
12945
12946 addr = sav + sav_ofs + reg * save_scale;
12947
12948 goto lab_over;
12949
12950 lab_false:
12951 */
12952
12953 lab_false = create_artificial_label (UNKNOWN_LOCATION);
12954 lab_over = create_artificial_label (UNKNOWN_LOCATION);
12955
12956 t = fold_convert (TREE_TYPE (reg), size_int (max_reg));
12957 t = build2 (GT_EXPR, boolean_type_node, reg, t);
12958 u = build1 (GOTO_EXPR, void_type_node, lab_false);
12959 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
12960 gimplify_and_add (t, pre_p);
12961
12962 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
12963 u = build2 (MULT_EXPR, TREE_TYPE (reg), reg,
12964 fold_convert (TREE_TYPE (reg), size_int (sav_scale)));
12965 t = fold_build_pointer_plus (t, u);
12966
12967 gimplify_assign (addr, t, pre_p);
12968
12969 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
12970
12971 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
12972 }
12973
12974 /* ... Otherwise out of the overflow area. */
12975
12976 t = ovf;
12977 if (size < UNITS_PER_LONG && !left_align_p)
12978 t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG - size);
12979
12980 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
12981
12982 gimplify_assign (addr, t, pre_p);
12983
12984 if (size < UNITS_PER_LONG && left_align_p)
12985 t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG);
12986 else
12987 t = fold_build_pointer_plus_hwi (t, size);
12988
12989 gimplify_assign (ovf, t, pre_p);
12990
12991 if (reg != NULL_TREE)
12992 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
12993
12994
12995 /* Increment register save count. */
12996
12997 if (n_reg > 0)
12998 {
12999 u = build2 (PREINCREMENT_EXPR, TREE_TYPE (reg), reg,
13000 fold_convert (TREE_TYPE (reg), size_int (n_reg)));
13001 gimplify_and_add (u, pre_p);
13002 }
13003
13004 if (indirect_p)
13005 {
13006 t = build_pointer_type_for_mode (build_pointer_type (type),
13007 ptr_mode, true);
13008 addr = fold_convert (t, addr);
13009 addr = build_va_arg_indirect_ref (addr);
13010 }
13011 else
13012 {
13013 t = build_pointer_type_for_mode (type, ptr_mode, true);
13014 addr = fold_convert (t, addr);
13015 }
13016
13017 return build_va_arg_indirect_ref (addr);
13018 }
13019
13020 /* Emit rtl for the tbegin or tbegin_retry (RETRY != NULL_RTX)
13021 expanders.
13022 DEST - Register location where CC will be stored.
13023 TDB - Pointer to a 256 byte area where to store the transaction.
13024 diagnostic block. NULL if TDB is not needed.
13025 RETRY - Retry count value. If non-NULL a retry loop for CC2
13026 is emitted
13027 CLOBBER_FPRS_P - If true clobbers for all FPRs are emitted as part
13028 of the tbegin instruction pattern. */
13029
13030 void
13031 s390_expand_tbegin (rtx dest, rtx tdb, rtx retry, bool clobber_fprs_p)
13032 {
13033 rtx retry_plus_two = gen_reg_rtx (SImode);
13034 rtx retry_reg = gen_reg_rtx (SImode);
13035 rtx_code_label *retry_label = NULL;
13036
13037 if (retry != NULL_RTX)
13038 {
13039 emit_move_insn (retry_reg, retry);
13040 emit_insn (gen_addsi3 (retry_plus_two, retry_reg, const2_rtx));
13041 emit_insn (gen_addsi3 (retry_reg, retry_reg, const1_rtx));
13042 retry_label = gen_label_rtx ();
13043 emit_label (retry_label);
13044 }
13045
13046 if (clobber_fprs_p)
13047 {
13048 if (TARGET_VX)
13049 emit_insn (gen_tbegin_1_z13 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
13050 tdb));
13051 else
13052 emit_insn (gen_tbegin_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
13053 tdb));
13054 }
13055 else
13056 emit_insn (gen_tbegin_nofloat_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
13057 tdb));
13058
13059 emit_move_insn (dest, gen_rtx_UNSPEC (SImode,
13060 gen_rtvec (1, gen_rtx_REG (CCRAWmode,
13061 CC_REGNUM)),
13062 UNSPEC_CC_TO_INT));
13063 if (retry != NULL_RTX)
13064 {
13065 const int CC0 = 1 << 3;
13066 const int CC1 = 1 << 2;
13067 const int CC3 = 1 << 0;
13068 rtx jump;
13069 rtx count = gen_reg_rtx (SImode);
13070 rtx_code_label *leave_label = gen_label_rtx ();
13071
13072 /* Exit for success and permanent failures. */
13073 jump = s390_emit_jump (leave_label,
13074 gen_rtx_EQ (VOIDmode,
13075 gen_rtx_REG (CCRAWmode, CC_REGNUM),
13076 gen_rtx_CONST_INT (VOIDmode, CC0 | CC1 | CC3)));
13077 LABEL_NUSES (leave_label) = 1;
13078
13079 /* CC2 - transient failure. Perform retry with ppa. */
13080 emit_move_insn (count, retry_plus_two);
13081 emit_insn (gen_subsi3 (count, count, retry_reg));
13082 emit_insn (gen_tx_assist (count));
13083 jump = emit_jump_insn (gen_doloop_si64 (retry_label,
13084 retry_reg,
13085 retry_reg));
13086 JUMP_LABEL (jump) = retry_label;
13087 LABEL_NUSES (retry_label) = 1;
13088 emit_label (leave_label);
13089 }
13090 }
13091
13092
13093 /* Return the decl for the target specific builtin with the function
13094 code FCODE. */
13095
13096 static tree
13097 s390_builtin_decl (unsigned fcode, bool initialized_p ATTRIBUTE_UNUSED)
13098 {
13099 if (fcode >= S390_BUILTIN_MAX)
13100 return error_mark_node;
13101
13102 return s390_builtin_decls[fcode];
13103 }
13104
13105 /* We call mcount before the function prologue. So a profiled leaf
13106 function should stay a leaf function. */
13107
13108 static bool
13109 s390_keep_leaf_when_profiled ()
13110 {
13111 return true;
13112 }
13113
13114 /* Output assembly code for the trampoline template to
13115 stdio stream FILE.
13116
13117 On S/390, we use gpr 1 internally in the trampoline code;
13118 gpr 0 is used to hold the static chain. */
13119
13120 static void
13121 s390_asm_trampoline_template (FILE *file)
13122 {
13123 rtx op[2];
13124 op[0] = gen_rtx_REG (Pmode, 0);
13125 op[1] = gen_rtx_REG (Pmode, 1);
13126
13127 if (TARGET_64BIT)
13128 {
13129 output_asm_insn ("basr\t%1,0", op); /* 2 byte */
13130 output_asm_insn ("lmg\t%0,%1,14(%1)", op); /* 6 byte */
13131 output_asm_insn ("br\t%1", op); /* 2 byte */
13132 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 10));
13133 }
13134 else
13135 {
13136 output_asm_insn ("basr\t%1,0", op); /* 2 byte */
13137 output_asm_insn ("lm\t%0,%1,6(%1)", op); /* 4 byte */
13138 output_asm_insn ("br\t%1", op); /* 2 byte */
13139 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 8));
13140 }
13141 }
13142
13143 /* Emit RTL insns to initialize the variable parts of a trampoline.
13144 FNADDR is an RTX for the address of the function's pure code.
13145 CXT is an RTX for the static chain value for the function. */
13146
13147 static void
13148 s390_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
13149 {
13150 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
13151 rtx mem;
13152
13153 emit_block_move (m_tramp, assemble_trampoline_template (),
13154 GEN_INT (2 * UNITS_PER_LONG), BLOCK_OP_NORMAL);
13155
13156 mem = adjust_address (m_tramp, Pmode, 2 * UNITS_PER_LONG);
13157 emit_move_insn (mem, cxt);
13158 mem = adjust_address (m_tramp, Pmode, 3 * UNITS_PER_LONG);
13159 emit_move_insn (mem, fnaddr);
13160 }
13161
13162 static void
13163 output_asm_nops (const char *user, int hw)
13164 {
13165 asm_fprintf (asm_out_file, "\t# NOPs for %s (%d halfwords)\n", user, hw);
13166 while (hw > 0)
13167 {
13168 if (hw >= 3)
13169 {
13170 output_asm_insn ("brcl\t0,0", NULL);
13171 hw -= 3;
13172 }
13173 else if (hw >= 2)
13174 {
13175 output_asm_insn ("bc\t0,0", NULL);
13176 hw -= 2;
13177 }
13178 else
13179 {
13180 output_asm_insn ("bcr\t0,0", NULL);
13181 hw -= 1;
13182 }
13183 }
13184 }
13185
13186 /* Output assembler code to FILE to call a profiler hook. */
13187
13188 void
13189 s390_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
13190 {
13191 rtx op[4];
13192
13193 fprintf (file, "# function profiler \n");
13194
13195 op[0] = gen_rtx_REG (Pmode, RETURN_REGNUM);
13196 op[1] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
13197 op[1] = gen_rtx_MEM (Pmode, plus_constant (Pmode, op[1], UNITS_PER_LONG));
13198 op[3] = GEN_INT (UNITS_PER_LONG);
13199
13200 op[2] = gen_rtx_SYMBOL_REF (Pmode, flag_fentry ? "__fentry__" : "_mcount");
13201 SYMBOL_REF_FLAGS (op[2]) |= SYMBOL_FLAG_FUNCTION;
13202 if (flag_pic && !TARGET_64BIT)
13203 {
13204 op[2] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[2]), UNSPEC_PLT31);
13205 op[2] = gen_rtx_CONST (Pmode, op[2]);
13206 }
13207
13208 if (flag_record_mcount)
13209 fprintf (file, "1:\n");
13210
13211 if (flag_fentry)
13212 {
13213 if (flag_nop_mcount)
13214 output_asm_nops ("-mnop-mcount", /* brasl */ 3);
13215 else if (cfun->static_chain_decl)
13216 warning (OPT_Wcannot_profile, "nested functions cannot be profiled "
13217 "with %<-mfentry%> on s390");
13218 else
13219 output_asm_insn ("brasl\t0,%2%K2", op);
13220 }
13221 else if (TARGET_64BIT)
13222 {
13223 if (flag_nop_mcount)
13224 output_asm_nops ("-mnop-mcount", /* stg */ 3 + /* brasl */ 3 +
13225 /* lg */ 3);
13226 else
13227 {
13228 output_asm_insn ("stg\t%0,%1", op);
13229 if (flag_dwarf2_cfi_asm)
13230 output_asm_insn (".cfi_rel_offset\t%0,%3", op);
13231 output_asm_insn ("brasl\t%0,%2%K2", op);
13232 output_asm_insn ("lg\t%0,%1", op);
13233 if (flag_dwarf2_cfi_asm)
13234 output_asm_insn (".cfi_restore\t%0", op);
13235 }
13236 }
13237 else
13238 {
13239 if (flag_nop_mcount)
13240 output_asm_nops ("-mnop-mcount", /* st */ 2 + /* brasl */ 3 +
13241 /* l */ 2);
13242 else
13243 {
13244 output_asm_insn ("st\t%0,%1", op);
13245 if (flag_dwarf2_cfi_asm)
13246 output_asm_insn (".cfi_rel_offset\t%0,%3", op);
13247 output_asm_insn ("brasl\t%0,%2%K2", op);
13248 output_asm_insn ("l\t%0,%1", op);
13249 if (flag_dwarf2_cfi_asm)
13250 output_asm_insn (".cfi_restore\t%0", op);
13251 }
13252 }
13253
13254 if (flag_record_mcount)
13255 {
13256 fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n");
13257 fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
13258 fprintf (file, "\t.previous\n");
13259 }
13260 }
13261
13262 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
13263 into its SYMBOL_REF_FLAGS. */
13264
13265 static void
13266 s390_encode_section_info (tree decl, rtx rtl, int first)
13267 {
13268 default_encode_section_info (decl, rtl, first);
13269
13270 if (TREE_CODE (decl) == VAR_DECL)
13271 {
13272 /* Store the alignment to be able to check if we can use
13273 a larl/load-relative instruction. We only handle the cases
13274 that can go wrong (i.e. no FUNC_DECLs). */
13275 if (DECL_ALIGN (decl) == 0 || DECL_ALIGN (decl) % 16)
13276 SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl, 0));
13277 else if (DECL_ALIGN (decl) % 32)
13278 SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl, 0));
13279 else if (DECL_ALIGN (decl) % 64)
13280 SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl, 0));
13281 }
13282
13283 /* Literal pool references don't have a decl so they are handled
13284 differently here. We rely on the information in the MEM_ALIGN
13285 entry to decide upon the alignment. */
13286 if (MEM_P (rtl)
13287 && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF
13288 && TREE_CONSTANT_POOL_ADDRESS_P (XEXP (rtl, 0)))
13289 {
13290 if (MEM_ALIGN (rtl) == 0 || MEM_ALIGN (rtl) % 16)
13291 SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl, 0));
13292 else if (MEM_ALIGN (rtl) % 32)
13293 SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl, 0));
13294 else if (MEM_ALIGN (rtl) % 64)
13295 SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl, 0));
13296 }
13297 }
13298
13299 /* Output thunk to FILE that implements a C++ virtual function call (with
13300 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
13301 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
13302 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
13303 relative to the resulting this pointer. */
13304
13305 static void
13306 s390_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
13307 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
13308 tree function)
13309 {
13310 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk));
13311 rtx op[10];
13312 int nonlocal = 0;
13313
13314 assemble_start_function (thunk, fnname);
13315 /* Make sure unwind info is emitted for the thunk if needed. */
13316 final_start_function (emit_barrier (), file, 1);
13317
13318 /* Operand 0 is the target function. */
13319 op[0] = XEXP (DECL_RTL (function), 0);
13320 if (flag_pic && !SYMBOL_REF_LOCAL_P (op[0]))
13321 {
13322 nonlocal = 1;
13323 if (!TARGET_64BIT)
13324 {
13325 op[0] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[0]), UNSPEC_GOT);
13326 op[0] = gen_rtx_CONST (Pmode, op[0]);
13327 }
13328 }
13329
13330 /* Operand 1 is the 'this' pointer. */
13331 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
13332 op[1] = gen_rtx_REG (Pmode, 3);
13333 else
13334 op[1] = gen_rtx_REG (Pmode, 2);
13335
13336 /* Operand 2 is the delta. */
13337 op[2] = GEN_INT (delta);
13338
13339 /* Operand 3 is the vcall_offset. */
13340 op[3] = GEN_INT (vcall_offset);
13341
13342 /* Operand 4 is the temporary register. */
13343 op[4] = gen_rtx_REG (Pmode, 1);
13344
13345 /* Operands 5 to 8 can be used as labels. */
13346 op[5] = NULL_RTX;
13347 op[6] = NULL_RTX;
13348 op[7] = NULL_RTX;
13349 op[8] = NULL_RTX;
13350
13351 /* Operand 9 can be used for temporary register. */
13352 op[9] = NULL_RTX;
13353
13354 /* Generate code. */
13355 if (TARGET_64BIT)
13356 {
13357 /* Setup literal pool pointer if required. */
13358 if ((!DISP_IN_RANGE (delta)
13359 && !CONST_OK_FOR_K (delta)
13360 && !CONST_OK_FOR_Os (delta))
13361 || (!DISP_IN_RANGE (vcall_offset)
13362 && !CONST_OK_FOR_K (vcall_offset)
13363 && !CONST_OK_FOR_Os (vcall_offset)))
13364 {
13365 op[5] = gen_label_rtx ();
13366 output_asm_insn ("larl\t%4,%5", op);
13367 }
13368
13369 /* Add DELTA to this pointer. */
13370 if (delta)
13371 {
13372 if (CONST_OK_FOR_J (delta))
13373 output_asm_insn ("la\t%1,%2(%1)", op);
13374 else if (DISP_IN_RANGE (delta))
13375 output_asm_insn ("lay\t%1,%2(%1)", op);
13376 else if (CONST_OK_FOR_K (delta))
13377 output_asm_insn ("aghi\t%1,%2", op);
13378 else if (CONST_OK_FOR_Os (delta))
13379 output_asm_insn ("agfi\t%1,%2", op);
13380 else
13381 {
13382 op[6] = gen_label_rtx ();
13383 output_asm_insn ("agf\t%1,%6-%5(%4)", op);
13384 }
13385 }
13386
13387 /* Perform vcall adjustment. */
13388 if (vcall_offset)
13389 {
13390 if (DISP_IN_RANGE (vcall_offset))
13391 {
13392 output_asm_insn ("lg\t%4,0(%1)", op);
13393 output_asm_insn ("ag\t%1,%3(%4)", op);
13394 }
13395 else if (CONST_OK_FOR_K (vcall_offset))
13396 {
13397 output_asm_insn ("lghi\t%4,%3", op);
13398 output_asm_insn ("ag\t%4,0(%1)", op);
13399 output_asm_insn ("ag\t%1,0(%4)", op);
13400 }
13401 else if (CONST_OK_FOR_Os (vcall_offset))
13402 {
13403 output_asm_insn ("lgfi\t%4,%3", op);
13404 output_asm_insn ("ag\t%4,0(%1)", op);
13405 output_asm_insn ("ag\t%1,0(%4)", op);
13406 }
13407 else
13408 {
13409 op[7] = gen_label_rtx ();
13410 output_asm_insn ("llgf\t%4,%7-%5(%4)", op);
13411 output_asm_insn ("ag\t%4,0(%1)", op);
13412 output_asm_insn ("ag\t%1,0(%4)", op);
13413 }
13414 }
13415
13416 /* Jump to target. */
13417 output_asm_insn ("jg\t%0%K0", op);
13418
13419 /* Output literal pool if required. */
13420 if (op[5])
13421 {
13422 output_asm_insn (".align\t4", op);
13423 targetm.asm_out.internal_label (file, "L",
13424 CODE_LABEL_NUMBER (op[5]));
13425 }
13426 if (op[6])
13427 {
13428 targetm.asm_out.internal_label (file, "L",
13429 CODE_LABEL_NUMBER (op[6]));
13430 output_asm_insn (".long\t%2", op);
13431 }
13432 if (op[7])
13433 {
13434 targetm.asm_out.internal_label (file, "L",
13435 CODE_LABEL_NUMBER (op[7]));
13436 output_asm_insn (".long\t%3", op);
13437 }
13438 }
13439 else
13440 {
13441 /* Setup base pointer if required. */
13442 if (!vcall_offset
13443 || (!DISP_IN_RANGE (delta)
13444 && !CONST_OK_FOR_K (delta)
13445 && !CONST_OK_FOR_Os (delta))
13446 || (!DISP_IN_RANGE (delta)
13447 && !CONST_OK_FOR_K (vcall_offset)
13448 && !CONST_OK_FOR_Os (vcall_offset)))
13449 {
13450 op[5] = gen_label_rtx ();
13451 output_asm_insn ("basr\t%4,0", op);
13452 targetm.asm_out.internal_label (file, "L",
13453 CODE_LABEL_NUMBER (op[5]));
13454 }
13455
13456 /* Add DELTA to this pointer. */
13457 if (delta)
13458 {
13459 if (CONST_OK_FOR_J (delta))
13460 output_asm_insn ("la\t%1,%2(%1)", op);
13461 else if (DISP_IN_RANGE (delta))
13462 output_asm_insn ("lay\t%1,%2(%1)", op);
13463 else if (CONST_OK_FOR_K (delta))
13464 output_asm_insn ("ahi\t%1,%2", op);
13465 else if (CONST_OK_FOR_Os (delta))
13466 output_asm_insn ("afi\t%1,%2", op);
13467 else
13468 {
13469 op[6] = gen_label_rtx ();
13470 output_asm_insn ("a\t%1,%6-%5(%4)", op);
13471 }
13472 }
13473
13474 /* Perform vcall adjustment. */
13475 if (vcall_offset)
13476 {
13477 if (CONST_OK_FOR_J (vcall_offset))
13478 {
13479 output_asm_insn ("l\t%4,0(%1)", op);
13480 output_asm_insn ("a\t%1,%3(%4)", op);
13481 }
13482 else if (DISP_IN_RANGE (vcall_offset))
13483 {
13484 output_asm_insn ("l\t%4,0(%1)", op);
13485 output_asm_insn ("ay\t%1,%3(%4)", op);
13486 }
13487 else if (CONST_OK_FOR_K (vcall_offset))
13488 {
13489 output_asm_insn ("lhi\t%4,%3", op);
13490 output_asm_insn ("a\t%4,0(%1)", op);
13491 output_asm_insn ("a\t%1,0(%4)", op);
13492 }
13493 else if (CONST_OK_FOR_Os (vcall_offset))
13494 {
13495 output_asm_insn ("iilf\t%4,%3", op);
13496 output_asm_insn ("a\t%4,0(%1)", op);
13497 output_asm_insn ("a\t%1,0(%4)", op);
13498 }
13499 else
13500 {
13501 op[7] = gen_label_rtx ();
13502 output_asm_insn ("l\t%4,%7-%5(%4)", op);
13503 output_asm_insn ("a\t%4,0(%1)", op);
13504 output_asm_insn ("a\t%1,0(%4)", op);
13505 }
13506
13507 /* We had to clobber the base pointer register.
13508 Re-setup the base pointer (with a different base). */
13509 op[5] = gen_label_rtx ();
13510 output_asm_insn ("basr\t%4,0", op);
13511 targetm.asm_out.internal_label (file, "L",
13512 CODE_LABEL_NUMBER (op[5]));
13513 }
13514
13515 /* Jump to target. */
13516 op[8] = gen_label_rtx ();
13517
13518 if (!flag_pic)
13519 output_asm_insn ("l\t%4,%8-%5(%4)", op);
13520 else if (!nonlocal)
13521 output_asm_insn ("a\t%4,%8-%5(%4)", op);
13522 /* We cannot call through .plt, since .plt requires %r12 loaded. */
13523 else if (flag_pic == 1)
13524 {
13525 output_asm_insn ("a\t%4,%8-%5(%4)", op);
13526 output_asm_insn ("l\t%4,%0(%4)", op);
13527 }
13528 else if (flag_pic == 2)
13529 {
13530 op[9] = gen_rtx_REG (Pmode, 0);
13531 output_asm_insn ("l\t%9,%8-4-%5(%4)", op);
13532 output_asm_insn ("a\t%4,%8-%5(%4)", op);
13533 output_asm_insn ("ar\t%4,%9", op);
13534 output_asm_insn ("l\t%4,0(%4)", op);
13535 }
13536
13537 output_asm_insn ("br\t%4", op);
13538
13539 /* Output literal pool. */
13540 output_asm_insn (".align\t4", op);
13541
13542 if (nonlocal && flag_pic == 2)
13543 output_asm_insn (".long\t%0", op);
13544 if (nonlocal)
13545 {
13546 op[0] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
13547 SYMBOL_REF_FLAGS (op[0]) = SYMBOL_FLAG_LOCAL;
13548 }
13549
13550 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[8]));
13551 if (!flag_pic)
13552 output_asm_insn (".long\t%0", op);
13553 else
13554 output_asm_insn (".long\t%0-%5", op);
13555
13556 if (op[6])
13557 {
13558 targetm.asm_out.internal_label (file, "L",
13559 CODE_LABEL_NUMBER (op[6]));
13560 output_asm_insn (".long\t%2", op);
13561 }
13562 if (op[7])
13563 {
13564 targetm.asm_out.internal_label (file, "L",
13565 CODE_LABEL_NUMBER (op[7]));
13566 output_asm_insn (".long\t%3", op);
13567 }
13568 }
13569 final_end_function ();
13570 assemble_end_function (thunk, fnname);
13571 }
13572
13573 /* Output either an indirect jump or an indirect call
13574 (RETURN_ADDR_REGNO != INVALID_REGNUM) with target register REGNO
13575 using a branch trampoline disabling branch target prediction. */
13576
13577 void
13578 s390_indirect_branch_via_thunk (unsigned int regno,
13579 unsigned int return_addr_regno,
13580 rtx comparison_operator,
13581 enum s390_indirect_branch_type type)
13582 {
13583 enum s390_indirect_branch_option option;
13584
13585 if (type == s390_indirect_branch_type_return)
13586 {
13587 if (s390_return_addr_from_memory ())
13588 option = s390_opt_function_return_mem;
13589 else
13590 option = s390_opt_function_return_reg;
13591 }
13592 else if (type == s390_indirect_branch_type_jump)
13593 option = s390_opt_indirect_branch_jump;
13594 else if (type == s390_indirect_branch_type_call)
13595 option = s390_opt_indirect_branch_call;
13596 else
13597 gcc_unreachable ();
13598
13599 if (TARGET_INDIRECT_BRANCH_TABLE)
13600 {
13601 char label[32];
13602
13603 ASM_GENERATE_INTERNAL_LABEL (label,
13604 indirect_branch_table_label[option],
13605 indirect_branch_table_label_no[option]++);
13606 ASM_OUTPUT_LABEL (asm_out_file, label);
13607 }
13608
13609 if (return_addr_regno != INVALID_REGNUM)
13610 {
13611 gcc_assert (comparison_operator == NULL_RTX);
13612 fprintf (asm_out_file, " \tbrasl\t%%r%d,", return_addr_regno);
13613 }
13614 else
13615 {
13616 fputs (" \tjg", asm_out_file);
13617 if (comparison_operator != NULL_RTX)
13618 print_operand (asm_out_file, comparison_operator, 'C');
13619
13620 fputs ("\t", asm_out_file);
13621 }
13622
13623 if (TARGET_CPU_Z10)
13624 fprintf (asm_out_file,
13625 TARGET_INDIRECT_BRANCH_THUNK_NAME_EXRL "\n",
13626 regno);
13627 else
13628 fprintf (asm_out_file,
13629 TARGET_INDIRECT_BRANCH_THUNK_NAME_EX "\n",
13630 INDIRECT_BRANCH_THUNK_REGNUM, regno);
13631
13632 if ((option == s390_opt_indirect_branch_jump
13633 && cfun->machine->indirect_branch_jump == indirect_branch_thunk)
13634 || (option == s390_opt_indirect_branch_call
13635 && cfun->machine->indirect_branch_call == indirect_branch_thunk)
13636 || (option == s390_opt_function_return_reg
13637 && cfun->machine->function_return_reg == indirect_branch_thunk)
13638 || (option == s390_opt_function_return_mem
13639 && cfun->machine->function_return_mem == indirect_branch_thunk))
13640 {
13641 if (TARGET_CPU_Z10)
13642 indirect_branch_z10thunk_mask |= (1 << regno);
13643 else
13644 indirect_branch_prez10thunk_mask |= (1 << regno);
13645 }
13646 }
13647
13648 /* Output an inline thunk for indirect jumps. EXECUTE_TARGET can
13649 either be an address register or a label pointing to the location
13650 of the jump instruction. */
13651
13652 void
13653 s390_indirect_branch_via_inline_thunk (rtx execute_target)
13654 {
13655 if (TARGET_INDIRECT_BRANCH_TABLE)
13656 {
13657 char label[32];
13658
13659 ASM_GENERATE_INTERNAL_LABEL (label,
13660 indirect_branch_table_label[s390_opt_indirect_branch_jump],
13661 indirect_branch_table_label_no[s390_opt_indirect_branch_jump]++);
13662 ASM_OUTPUT_LABEL (asm_out_file, label);
13663 }
13664
13665 if (!TARGET_ZARCH)
13666 fputs ("\t.machinemode zarch\n", asm_out_file);
13667
13668 if (REG_P (execute_target))
13669 fprintf (asm_out_file, "\tex\t%%r0,0(%%r%d)\n", REGNO (execute_target));
13670 else
13671 output_asm_insn ("\texrl\t%%r0,%0", &execute_target);
13672
13673 if (!TARGET_ZARCH)
13674 fputs ("\t.machinemode esa\n", asm_out_file);
13675
13676 fputs ("0:\tj\t0b\n", asm_out_file);
13677 }
13678
13679 static bool
13680 s390_valid_pointer_mode (scalar_int_mode mode)
13681 {
13682 return (mode == SImode || (TARGET_64BIT && mode == DImode));
13683 }
13684
13685 /* Checks whether the given CALL_EXPR would use a caller
13686 saved register. This is used to decide whether sibling call
13687 optimization could be performed on the respective function
13688 call. */
13689
13690 static bool
13691 s390_call_saved_register_used (tree call_expr)
13692 {
13693 CUMULATIVE_ARGS cum_v;
13694 cumulative_args_t cum;
13695 tree parameter;
13696 rtx parm_rtx;
13697 int reg, i;
13698
13699 INIT_CUMULATIVE_ARGS (cum_v, NULL, NULL, 0, 0);
13700 cum = pack_cumulative_args (&cum_v);
13701
13702 for (i = 0; i < call_expr_nargs (call_expr); i++)
13703 {
13704 parameter = CALL_EXPR_ARG (call_expr, i);
13705 gcc_assert (parameter);
13706
13707 /* For an undeclared variable passed as parameter we will get
13708 an ERROR_MARK node here. */
13709 if (TREE_CODE (parameter) == ERROR_MARK)
13710 return true;
13711
13712 /* We assume that in the target function all parameters are
13713 named. This only has an impact on vector argument register
13714 usage none of which is call-saved. */
13715 function_arg_info arg (TREE_TYPE (parameter), /*named=*/true);
13716 apply_pass_by_reference_rules (&cum_v, arg);
13717
13718 parm_rtx = s390_function_arg (cum, arg);
13719
13720 s390_function_arg_advance (cum, arg);
13721
13722 if (!parm_rtx)
13723 continue;
13724
13725 if (REG_P (parm_rtx))
13726 {
13727 for (reg = 0; reg < REG_NREGS (parm_rtx); reg++)
13728 if (!call_used_or_fixed_reg_p (reg + REGNO (parm_rtx)))
13729 return true;
13730 }
13731
13732 if (GET_CODE (parm_rtx) == PARALLEL)
13733 {
13734 int i;
13735
13736 for (i = 0; i < XVECLEN (parm_rtx, 0); i++)
13737 {
13738 rtx r = XEXP (XVECEXP (parm_rtx, 0, i), 0);
13739
13740 gcc_assert (REG_P (r));
13741
13742 for (reg = 0; reg < REG_NREGS (r); reg++)
13743 if (!call_used_or_fixed_reg_p (reg + REGNO (r)))
13744 return true;
13745 }
13746 }
13747
13748 }
13749 return false;
13750 }
13751
13752 /* Return true if the given call expression can be
13753 turned into a sibling call.
13754 DECL holds the declaration of the function to be called whereas
13755 EXP is the call expression itself. */
13756
13757 static bool
13758 s390_function_ok_for_sibcall (tree decl, tree exp)
13759 {
13760 /* The TPF epilogue uses register 1. */
13761 if (TARGET_TPF_PROFILING)
13762 return false;
13763
13764 /* The 31 bit PLT code uses register 12 (GOT pointer - caller saved)
13765 which would have to be restored before the sibcall. */
13766 if (!TARGET_64BIT && flag_pic && decl && !targetm.binds_local_p (decl))
13767 return false;
13768
13769 /* The thunks for indirect branches require r1 if no exrl is
13770 available. r1 might not be available when doing a sibling
13771 call. */
13772 if (TARGET_INDIRECT_BRANCH_NOBP_CALL
13773 && !TARGET_CPU_Z10
13774 && !decl)
13775 return false;
13776
13777 /* Register 6 on s390 is available as an argument register but unfortunately
13778 "caller saved". This makes functions needing this register for arguments
13779 not suitable for sibcalls. */
13780 return !s390_call_saved_register_used (exp);
13781 }
13782
13783 /* Return the fixed registers used for condition codes. */
13784
13785 static bool
13786 s390_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
13787 {
13788 *p1 = CC_REGNUM;
13789 *p2 = INVALID_REGNUM;
13790
13791 return true;
13792 }
13793
13794 /* This function is used by the call expanders of the machine description.
13795 It emits the call insn itself together with the necessary operations
13796 to adjust the target address and returns the emitted insn.
13797 ADDR_LOCATION is the target address rtx
13798 TLS_CALL the location of the thread-local symbol
13799 RESULT_REG the register where the result of the call should be stored
13800 RETADDR_REG the register where the return address should be stored
13801 If this parameter is NULL_RTX the call is considered
13802 to be a sibling call. */
13803
13804 rtx_insn *
13805 s390_emit_call (rtx addr_location, rtx tls_call, rtx result_reg,
13806 rtx retaddr_reg)
13807 {
13808 bool plt31_call_p = false;
13809 rtx_insn *insn;
13810 rtx vec[4] = { NULL_RTX };
13811 int elts = 0;
13812 rtx *call = &vec[0];
13813 rtx *clobber_ret_reg = &vec[1];
13814 rtx *use = &vec[2];
13815 rtx *clobber_thunk_reg = &vec[3];
13816 int i;
13817
13818 /* Direct function calls need special treatment. */
13819 if (GET_CODE (addr_location) == SYMBOL_REF)
13820 {
13821 /* When calling a global routine in PIC mode, we must
13822 replace the symbol itself with the PLT stub. */
13823 if (flag_pic && !SYMBOL_REF_LOCAL_P (addr_location) && !TARGET_64BIT)
13824 {
13825 if (retaddr_reg != NULL_RTX)
13826 {
13827 addr_location = gen_rtx_UNSPEC (Pmode,
13828 gen_rtvec (1, addr_location),
13829 UNSPEC_PLT31);
13830 addr_location = gen_rtx_CONST (Pmode, addr_location);
13831 plt31_call_p = true;
13832 }
13833 else
13834 /* For -fpic code the PLT entries might use r12 which is
13835 call-saved. Therefore we cannot do a sibcall when
13836 calling directly using a symbol ref. When reaching
13837 this point we decided (in s390_function_ok_for_sibcall)
13838 to do a sibcall for a function pointer but one of the
13839 optimizers was able to get rid of the function pointer
13840 by propagating the symbol ref into the call. This
13841 optimization is illegal for S/390 so we turn the direct
13842 call into a indirect call again. */
13843 addr_location = force_reg (Pmode, addr_location);
13844 }
13845 }
13846
13847 /* If it is already an indirect call or the code above moved the
13848 SYMBOL_REF to somewhere else make sure the address can be found in
13849 register 1. */
13850 if (retaddr_reg == NULL_RTX
13851 && GET_CODE (addr_location) != SYMBOL_REF
13852 && !plt31_call_p)
13853 {
13854 emit_move_insn (gen_rtx_REG (Pmode, SIBCALL_REGNUM), addr_location);
13855 addr_location = gen_rtx_REG (Pmode, SIBCALL_REGNUM);
13856 }
13857
13858 if (TARGET_INDIRECT_BRANCH_NOBP_CALL
13859 && GET_CODE (addr_location) != SYMBOL_REF
13860 && !plt31_call_p)
13861 {
13862 /* Indirect branch thunks require the target to be a single GPR. */
13863 addr_location = force_reg (Pmode, addr_location);
13864
13865 /* Without exrl the indirect branch thunks need an additional
13866 register for larl;ex */
13867 if (!TARGET_CPU_Z10)
13868 {
13869 *clobber_thunk_reg = gen_rtx_REG (Pmode, INDIRECT_BRANCH_THUNK_REGNUM);
13870 *clobber_thunk_reg = gen_rtx_CLOBBER (VOIDmode, *clobber_thunk_reg);
13871 }
13872 }
13873
13874 addr_location = gen_rtx_MEM (QImode, addr_location);
13875 *call = gen_rtx_CALL (VOIDmode, addr_location, const0_rtx);
13876
13877 if (result_reg != NULL_RTX)
13878 *call = gen_rtx_SET (result_reg, *call);
13879
13880 if (retaddr_reg != NULL_RTX)
13881 {
13882 *clobber_ret_reg = gen_rtx_CLOBBER (VOIDmode, retaddr_reg);
13883
13884 if (tls_call != NULL_RTX)
13885 *use = gen_rtx_USE (VOIDmode, tls_call);
13886 }
13887
13888
13889 for (i = 0; i < 4; i++)
13890 if (vec[i] != NULL_RTX)
13891 elts++;
13892
13893 if (elts > 1)
13894 {
13895 rtvec v;
13896 int e = 0;
13897
13898 v = rtvec_alloc (elts);
13899 for (i = 0; i < 4; i++)
13900 if (vec[i] != NULL_RTX)
13901 {
13902 RTVEC_ELT (v, e) = vec[i];
13903 e++;
13904 }
13905
13906 *call = gen_rtx_PARALLEL (VOIDmode, v);
13907 }
13908
13909 insn = emit_call_insn (*call);
13910
13911 /* 31-bit PLT stubs and tls calls use the GOT register implicitly. */
13912 if (plt31_call_p || tls_call != NULL_RTX)
13913 {
13914 /* s390_function_ok_for_sibcall should
13915 have denied sibcalls in this case. */
13916 gcc_assert (retaddr_reg != NULL_RTX);
13917 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, 12));
13918 }
13919 return insn;
13920 }
13921
13922 /* Implement TARGET_CONDITIONAL_REGISTER_USAGE. */
13923
13924 static void
13925 s390_conditional_register_usage (void)
13926 {
13927 int i;
13928
13929 if (flag_pic)
13930 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13931 fixed_regs[BASE_REGNUM] = 0;
13932 fixed_regs[RETURN_REGNUM] = 0;
13933 if (TARGET_64BIT)
13934 {
13935 for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
13936 call_used_regs[i] = 0;
13937 }
13938 else
13939 {
13940 call_used_regs[FPR4_REGNUM] = 0;
13941 call_used_regs[FPR6_REGNUM] = 0;
13942 }
13943
13944 if (TARGET_SOFT_FLOAT)
13945 {
13946 for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
13947 fixed_regs[i] = 1;
13948 }
13949
13950 /* Disable v16 - v31 for non-vector target. */
13951 if (!TARGET_VX)
13952 {
13953 for (i = VR16_REGNUM; i <= VR31_REGNUM; i++)
13954 fixed_regs[i] = call_used_regs[i] = 1;
13955 }
13956 }
13957
13958 /* Corresponding function to eh_return expander. */
13959
13960 static GTY(()) rtx s390_tpf_eh_return_symbol;
13961 void
13962 s390_emit_tpf_eh_return (rtx target)
13963 {
13964 rtx_insn *insn;
13965 rtx reg, orig_ra;
13966
13967 if (!s390_tpf_eh_return_symbol)
13968 {
13969 s390_tpf_eh_return_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tpf_eh_return");
13970 SYMBOL_REF_FLAGS (s390_tpf_eh_return_symbol) |= SYMBOL_FLAG_FUNCTION;
13971 }
13972
13973 reg = gen_rtx_REG (Pmode, 2);
13974 orig_ra = gen_rtx_REG (Pmode, 3);
13975
13976 emit_move_insn (reg, target);
13977 emit_move_insn (orig_ra, get_hard_reg_initial_val (Pmode, RETURN_REGNUM));
13978 insn = s390_emit_call (s390_tpf_eh_return_symbol, NULL_RTX, reg,
13979 gen_rtx_REG (Pmode, RETURN_REGNUM));
13980 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), reg);
13981 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), orig_ra);
13982
13983 emit_move_insn (EH_RETURN_HANDLER_RTX, reg);
13984 }
13985
13986 /* Rework the prologue/epilogue to avoid saving/restoring
13987 registers unnecessarily. */
13988
13989 static void
13990 s390_optimize_prologue (void)
13991 {
13992 rtx_insn *insn, *new_insn, *next_insn;
13993
13994 /* Do a final recompute of the frame-related data. */
13995 s390_optimize_register_info ();
13996
13997 /* If all special registers are in fact used, there's nothing we
13998 can do, so no point in walking the insn list. */
13999
14000 if (cfun_frame_layout.first_save_gpr <= BASE_REGNUM
14001 && cfun_frame_layout.last_save_gpr >= BASE_REGNUM)
14002 return;
14003
14004 /* Search for prologue/epilogue insns and replace them. */
14005 for (insn = get_insns (); insn; insn = next_insn)
14006 {
14007 int first, last, off;
14008 rtx set, base, offset;
14009 rtx pat;
14010
14011 next_insn = NEXT_INSN (insn);
14012
14013 if (! NONJUMP_INSN_P (insn) || ! RTX_FRAME_RELATED_P (insn))
14014 continue;
14015
14016 pat = PATTERN (insn);
14017
14018 /* Remove ldgr/lgdr instructions used for saving and restore
14019 GPRs if possible. */
14020 if (TARGET_Z10)
14021 {
14022 rtx tmp_pat = pat;
14023
14024 if (INSN_CODE (insn) == CODE_FOR_stack_restore_from_fpr)
14025 tmp_pat = XVECEXP (pat, 0, 0);
14026
14027 if (GET_CODE (tmp_pat) == SET
14028 && GET_MODE (SET_SRC (tmp_pat)) == DImode
14029 && REG_P (SET_SRC (tmp_pat))
14030 && REG_P (SET_DEST (tmp_pat)))
14031 {
14032 int src_regno = REGNO (SET_SRC (tmp_pat));
14033 int dest_regno = REGNO (SET_DEST (tmp_pat));
14034 int gpr_regno;
14035 int fpr_regno;
14036
14037 if (!((GENERAL_REGNO_P (src_regno)
14038 && FP_REGNO_P (dest_regno))
14039 || (FP_REGNO_P (src_regno)
14040 && GENERAL_REGNO_P (dest_regno))))
14041 continue;
14042
14043 gpr_regno = GENERAL_REGNO_P (src_regno) ? src_regno : dest_regno;
14044 fpr_regno = FP_REGNO_P (src_regno) ? src_regno : dest_regno;
14045
14046 /* GPR must be call-saved, FPR must be call-clobbered. */
14047 if (!call_used_regs[fpr_regno]
14048 || call_used_regs[gpr_regno])
14049 continue;
14050
14051 /* It must not happen that what we once saved in an FPR now
14052 needs a stack slot. */
14053 gcc_assert (cfun_gpr_save_slot (gpr_regno) != SAVE_SLOT_STACK);
14054
14055 if (cfun_gpr_save_slot (gpr_regno) == SAVE_SLOT_NONE)
14056 {
14057 remove_insn (insn);
14058 continue;
14059 }
14060 }
14061 }
14062
14063 if (GET_CODE (pat) == PARALLEL
14064 && store_multiple_operation (pat, VOIDmode))
14065 {
14066 set = XVECEXP (pat, 0, 0);
14067 first = REGNO (SET_SRC (set));
14068 last = first + XVECLEN (pat, 0) - 1;
14069 offset = const0_rtx;
14070 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
14071 off = INTVAL (offset);
14072
14073 if (GET_CODE (base) != REG || off < 0)
14074 continue;
14075 if (cfun_frame_layout.first_save_gpr != -1
14076 && (cfun_frame_layout.first_save_gpr < first
14077 || cfun_frame_layout.last_save_gpr > last))
14078 continue;
14079 if (REGNO (base) != STACK_POINTER_REGNUM
14080 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
14081 continue;
14082 if (first > BASE_REGNUM || last < BASE_REGNUM)
14083 continue;
14084
14085 if (cfun_frame_layout.first_save_gpr != -1)
14086 {
14087 rtx s_pat = save_gprs (base,
14088 off + (cfun_frame_layout.first_save_gpr
14089 - first) * UNITS_PER_LONG,
14090 cfun_frame_layout.first_save_gpr,
14091 cfun_frame_layout.last_save_gpr);
14092 new_insn = emit_insn_before (s_pat, insn);
14093 INSN_ADDRESSES_NEW (new_insn, -1);
14094 }
14095
14096 remove_insn (insn);
14097 continue;
14098 }
14099
14100 if (cfun_frame_layout.first_save_gpr == -1
14101 && GET_CODE (pat) == SET
14102 && GENERAL_REG_P (SET_SRC (pat))
14103 && GET_CODE (SET_DEST (pat)) == MEM)
14104 {
14105 set = pat;
14106 first = REGNO (SET_SRC (set));
14107 offset = const0_rtx;
14108 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
14109 off = INTVAL (offset);
14110
14111 if (GET_CODE (base) != REG || off < 0)
14112 continue;
14113 if (REGNO (base) != STACK_POINTER_REGNUM
14114 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
14115 continue;
14116
14117 remove_insn (insn);
14118 continue;
14119 }
14120
14121 if (GET_CODE (pat) == PARALLEL
14122 && load_multiple_operation (pat, VOIDmode))
14123 {
14124 set = XVECEXP (pat, 0, 0);
14125 first = REGNO (SET_DEST (set));
14126 last = first + XVECLEN (pat, 0) - 1;
14127 offset = const0_rtx;
14128 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
14129 off = INTVAL (offset);
14130
14131 if (GET_CODE (base) != REG || off < 0)
14132 continue;
14133
14134 if (cfun_frame_layout.first_restore_gpr != -1
14135 && (cfun_frame_layout.first_restore_gpr < first
14136 || cfun_frame_layout.last_restore_gpr > last))
14137 continue;
14138 if (REGNO (base) != STACK_POINTER_REGNUM
14139 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
14140 continue;
14141 if (first > BASE_REGNUM || last < BASE_REGNUM)
14142 continue;
14143
14144 if (cfun_frame_layout.first_restore_gpr != -1)
14145 {
14146 rtx rpat = restore_gprs (base,
14147 off + (cfun_frame_layout.first_restore_gpr
14148 - first) * UNITS_PER_LONG,
14149 cfun_frame_layout.first_restore_gpr,
14150 cfun_frame_layout.last_restore_gpr);
14151
14152 /* Remove REG_CFA_RESTOREs for registers that we no
14153 longer need to save. */
14154 REG_NOTES (rpat) = REG_NOTES (insn);
14155 for (rtx *ptr = &REG_NOTES (rpat); *ptr; )
14156 if (REG_NOTE_KIND (*ptr) == REG_CFA_RESTORE
14157 && ((int) REGNO (XEXP (*ptr, 0))
14158 < cfun_frame_layout.first_restore_gpr))
14159 *ptr = XEXP (*ptr, 1);
14160 else
14161 ptr = &XEXP (*ptr, 1);
14162 new_insn = emit_insn_before (rpat, insn);
14163 RTX_FRAME_RELATED_P (new_insn) = 1;
14164 INSN_ADDRESSES_NEW (new_insn, -1);
14165 }
14166
14167 remove_insn (insn);
14168 continue;
14169 }
14170
14171 if (cfun_frame_layout.first_restore_gpr == -1
14172 && GET_CODE (pat) == SET
14173 && GENERAL_REG_P (SET_DEST (pat))
14174 && GET_CODE (SET_SRC (pat)) == MEM)
14175 {
14176 set = pat;
14177 first = REGNO (SET_DEST (set));
14178 offset = const0_rtx;
14179 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
14180 off = INTVAL (offset);
14181
14182 if (GET_CODE (base) != REG || off < 0)
14183 continue;
14184
14185 if (REGNO (base) != STACK_POINTER_REGNUM
14186 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
14187 continue;
14188
14189 remove_insn (insn);
14190 continue;
14191 }
14192 }
14193 }
14194
14195 /* On z10 and later the dynamic branch prediction must see the
14196 backward jump within a certain windows. If not it falls back to
14197 the static prediction. This function rearranges the loop backward
14198 branch in a way which makes the static prediction always correct.
14199 The function returns true if it added an instruction. */
14200 static bool
14201 s390_fix_long_loop_prediction (rtx_insn *insn)
14202 {
14203 rtx set = single_set (insn);
14204 rtx code_label, label_ref;
14205 rtx_insn *uncond_jump;
14206 rtx_insn *cur_insn;
14207 rtx tmp;
14208 int distance;
14209
14210 /* This will exclude branch on count and branch on index patterns
14211 since these are correctly statically predicted.
14212
14213 The additional check for a PARALLEL is required here since
14214 single_set might be != NULL for PARALLELs where the set of the
14215 iteration variable is dead. */
14216 if (GET_CODE (PATTERN (insn)) == PARALLEL
14217 || !set
14218 || SET_DEST (set) != pc_rtx
14219 || GET_CODE (SET_SRC(set)) != IF_THEN_ELSE)
14220 return false;
14221
14222 /* Skip conditional returns. */
14223 if (ANY_RETURN_P (XEXP (SET_SRC (set), 1))
14224 && XEXP (SET_SRC (set), 2) == pc_rtx)
14225 return false;
14226
14227 label_ref = (GET_CODE (XEXP (SET_SRC (set), 1)) == LABEL_REF ?
14228 XEXP (SET_SRC (set), 1) : XEXP (SET_SRC (set), 2));
14229
14230 gcc_assert (GET_CODE (label_ref) == LABEL_REF);
14231
14232 code_label = XEXP (label_ref, 0);
14233
14234 if (INSN_ADDRESSES (INSN_UID (code_label)) == -1
14235 || INSN_ADDRESSES (INSN_UID (insn)) == -1
14236 || (INSN_ADDRESSES (INSN_UID (insn))
14237 - INSN_ADDRESSES (INSN_UID (code_label)) < PREDICT_DISTANCE))
14238 return false;
14239
14240 for (distance = 0, cur_insn = PREV_INSN (insn);
14241 distance < PREDICT_DISTANCE - 6;
14242 distance += get_attr_length (cur_insn), cur_insn = PREV_INSN (cur_insn))
14243 if (!cur_insn || JUMP_P (cur_insn) || LABEL_P (cur_insn))
14244 return false;
14245
14246 rtx_code_label *new_label = gen_label_rtx ();
14247 uncond_jump = emit_jump_insn_after (
14248 gen_rtx_SET (pc_rtx,
14249 gen_rtx_LABEL_REF (VOIDmode, code_label)),
14250 insn);
14251 emit_label_after (new_label, uncond_jump);
14252
14253 tmp = XEXP (SET_SRC (set), 1);
14254 XEXP (SET_SRC (set), 1) = XEXP (SET_SRC (set), 2);
14255 XEXP (SET_SRC (set), 2) = tmp;
14256 INSN_CODE (insn) = -1;
14257
14258 XEXP (label_ref, 0) = new_label;
14259 JUMP_LABEL (insn) = new_label;
14260 JUMP_LABEL (uncond_jump) = code_label;
14261
14262 return true;
14263 }
14264
14265 /* Returns 1 if INSN reads the value of REG for purposes not related
14266 to addressing of memory, and 0 otherwise. */
14267 static int
14268 s390_non_addr_reg_read_p (rtx reg, rtx_insn *insn)
14269 {
14270 return reg_referenced_p (reg, PATTERN (insn))
14271 && !reg_used_in_mem_p (REGNO (reg), PATTERN (insn));
14272 }
14273
14274 /* Starting from INSN find_cond_jump looks downwards in the insn
14275 stream for a single jump insn which is the last user of the
14276 condition code set in INSN. */
14277 static rtx_insn *
14278 find_cond_jump (rtx_insn *insn)
14279 {
14280 for (; insn; insn = NEXT_INSN (insn))
14281 {
14282 rtx ite, cc;
14283
14284 if (LABEL_P (insn))
14285 break;
14286
14287 if (!JUMP_P (insn))
14288 {
14289 if (reg_mentioned_p (gen_rtx_REG (CCmode, CC_REGNUM), insn))
14290 break;
14291 continue;
14292 }
14293
14294 /* This will be triggered by a return. */
14295 if (GET_CODE (PATTERN (insn)) != SET)
14296 break;
14297
14298 gcc_assert (SET_DEST (PATTERN (insn)) == pc_rtx);
14299 ite = SET_SRC (PATTERN (insn));
14300
14301 if (GET_CODE (ite) != IF_THEN_ELSE)
14302 break;
14303
14304 cc = XEXP (XEXP (ite, 0), 0);
14305 if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc)))
14306 break;
14307
14308 if (find_reg_note (insn, REG_DEAD, cc))
14309 return insn;
14310 break;
14311 }
14312
14313 return NULL;
14314 }
14315
14316 /* Swap the condition in COND and the operands in OP0 and OP1 so that
14317 the semantics does not change. If NULL_RTX is passed as COND the
14318 function tries to find the conditional jump starting with INSN. */
14319 static void
14320 s390_swap_cmp (rtx cond, rtx *op0, rtx *op1, rtx_insn *insn)
14321 {
14322 rtx tmp = *op0;
14323
14324 if (cond == NULL_RTX)
14325 {
14326 rtx_insn *jump = find_cond_jump (NEXT_INSN (insn));
14327 rtx set = jump ? single_set (jump) : NULL_RTX;
14328
14329 if (set == NULL_RTX)
14330 return;
14331
14332 cond = XEXP (SET_SRC (set), 0);
14333 }
14334
14335 *op0 = *op1;
14336 *op1 = tmp;
14337 PUT_CODE (cond, swap_condition (GET_CODE (cond)));
14338 }
14339
14340 /* On z10, instructions of the compare-and-branch family have the
14341 property to access the register occurring as second operand with
14342 its bits complemented. If such a compare is grouped with a second
14343 instruction that accesses the same register non-complemented, and
14344 if that register's value is delivered via a bypass, then the
14345 pipeline recycles, thereby causing significant performance decline.
14346 This function locates such situations and exchanges the two
14347 operands of the compare. The function return true whenever it
14348 added an insn. */
14349 static bool
14350 s390_z10_optimize_cmp (rtx_insn *insn)
14351 {
14352 rtx_insn *prev_insn, *next_insn;
14353 bool insn_added_p = false;
14354 rtx cond, *op0, *op1;
14355
14356 if (GET_CODE (PATTERN (insn)) == PARALLEL)
14357 {
14358 /* Handle compare and branch and branch on count
14359 instructions. */
14360 rtx pattern = single_set (insn);
14361
14362 if (!pattern
14363 || SET_DEST (pattern) != pc_rtx
14364 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE)
14365 return false;
14366
14367 cond = XEXP (SET_SRC (pattern), 0);
14368 op0 = &XEXP (cond, 0);
14369 op1 = &XEXP (cond, 1);
14370 }
14371 else if (GET_CODE (PATTERN (insn)) == SET)
14372 {
14373 rtx src, dest;
14374
14375 /* Handle normal compare instructions. */
14376 src = SET_SRC (PATTERN (insn));
14377 dest = SET_DEST (PATTERN (insn));
14378
14379 if (!REG_P (dest)
14380 || !CC_REGNO_P (REGNO (dest))
14381 || GET_CODE (src) != COMPARE)
14382 return false;
14383
14384 /* s390_swap_cmp will try to find the conditional
14385 jump when passing NULL_RTX as condition. */
14386 cond = NULL_RTX;
14387 op0 = &XEXP (src, 0);
14388 op1 = &XEXP (src, 1);
14389 }
14390 else
14391 return false;
14392
14393 if (!REG_P (*op0) || !REG_P (*op1))
14394 return false;
14395
14396 if (GET_MODE_CLASS (GET_MODE (*op0)) != MODE_INT)
14397 return false;
14398
14399 /* Swap the COMPARE arguments and its mask if there is a
14400 conflicting access in the previous insn. */
14401 prev_insn = prev_active_insn (insn);
14402 if (prev_insn != NULL_RTX && INSN_P (prev_insn)
14403 && reg_referenced_p (*op1, PATTERN (prev_insn)))
14404 s390_swap_cmp (cond, op0, op1, insn);
14405
14406 /* Check if there is a conflict with the next insn. If there
14407 was no conflict with the previous insn, then swap the
14408 COMPARE arguments and its mask. If we already swapped
14409 the operands, or if swapping them would cause a conflict
14410 with the previous insn, issue a NOP after the COMPARE in
14411 order to separate the two instuctions. */
14412 next_insn = next_active_insn (insn);
14413 if (next_insn != NULL_RTX && INSN_P (next_insn)
14414 && s390_non_addr_reg_read_p (*op1, next_insn))
14415 {
14416 if (prev_insn != NULL_RTX && INSN_P (prev_insn)
14417 && s390_non_addr_reg_read_p (*op0, prev_insn))
14418 {
14419 if (REGNO (*op1) == 0)
14420 emit_insn_after (gen_nop_lr1 (), insn);
14421 else
14422 emit_insn_after (gen_nop_lr0 (), insn);
14423 insn_added_p = true;
14424 }
14425 else
14426 s390_swap_cmp (cond, op0, op1, insn);
14427 }
14428 return insn_added_p;
14429 }
14430
14431 /* Number of INSNs to be scanned backward in the last BB of the loop
14432 and forward in the first BB of the loop. This usually should be a
14433 bit more than the number of INSNs which could go into one
14434 group. */
14435 #define S390_OSC_SCAN_INSN_NUM 5
14436
14437 /* Scan LOOP for static OSC collisions and return true if a osc_break
14438 should be issued for this loop. */
14439 static bool
14440 s390_adjust_loop_scan_osc (struct loop* loop)
14441
14442 {
14443 HARD_REG_SET modregs, newregs;
14444 rtx_insn *insn, *store_insn = NULL;
14445 rtx set;
14446 struct s390_address addr_store, addr_load;
14447 subrtx_iterator::array_type array;
14448 int insn_count;
14449
14450 CLEAR_HARD_REG_SET (modregs);
14451
14452 insn_count = 0;
14453 FOR_BB_INSNS_REVERSE (loop->latch, insn)
14454 {
14455 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
14456 continue;
14457
14458 insn_count++;
14459 if (insn_count > S390_OSC_SCAN_INSN_NUM)
14460 return false;
14461
14462 find_all_hard_reg_sets (insn, &newregs, true);
14463 modregs |= newregs;
14464
14465 set = single_set (insn);
14466 if (!set)
14467 continue;
14468
14469 if (MEM_P (SET_DEST (set))
14470 && s390_decompose_address (XEXP (SET_DEST (set), 0), &addr_store))
14471 {
14472 store_insn = insn;
14473 break;
14474 }
14475 }
14476
14477 if (store_insn == NULL_RTX)
14478 return false;
14479
14480 insn_count = 0;
14481 FOR_BB_INSNS (loop->header, insn)
14482 {
14483 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
14484 continue;
14485
14486 if (insn == store_insn)
14487 return false;
14488
14489 insn_count++;
14490 if (insn_count > S390_OSC_SCAN_INSN_NUM)
14491 return false;
14492
14493 find_all_hard_reg_sets (insn, &newregs, true);
14494 modregs |= newregs;
14495
14496 set = single_set (insn);
14497 if (!set)
14498 continue;
14499
14500 /* An intermediate store disrupts static OSC checking
14501 anyway. */
14502 if (MEM_P (SET_DEST (set))
14503 && s390_decompose_address (XEXP (SET_DEST (set), 0), NULL))
14504 return false;
14505
14506 FOR_EACH_SUBRTX (iter, array, SET_SRC (set), NONCONST)
14507 if (MEM_P (*iter)
14508 && s390_decompose_address (XEXP (*iter, 0), &addr_load)
14509 && rtx_equal_p (addr_load.base, addr_store.base)
14510 && rtx_equal_p (addr_load.indx, addr_store.indx)
14511 && rtx_equal_p (addr_load.disp, addr_store.disp))
14512 {
14513 if ((addr_load.base != NULL_RTX
14514 && TEST_HARD_REG_BIT (modregs, REGNO (addr_load.base)))
14515 || (addr_load.indx != NULL_RTX
14516 && TEST_HARD_REG_BIT (modregs, REGNO (addr_load.indx))))
14517 return true;
14518 }
14519 }
14520 return false;
14521 }
14522
14523 /* Look for adjustments which can be done on simple innermost
14524 loops. */
14525 static void
14526 s390_adjust_loops ()
14527 {
14528 df_analyze ();
14529 compute_bb_for_insn ();
14530
14531 /* Find the loops. */
14532 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
14533
14534 for (auto loop : loops_list (cfun, LI_ONLY_INNERMOST))
14535 {
14536 if (dump_file)
14537 {
14538 flow_loop_dump (loop, dump_file, NULL, 0);
14539 fprintf (dump_file, ";; OSC loop scan Loop: ");
14540 }
14541 if (loop->latch == NULL
14542 || pc_set (BB_END (loop->latch)) == NULL_RTX
14543 || !s390_adjust_loop_scan_osc (loop))
14544 {
14545 if (dump_file)
14546 {
14547 if (loop->latch == NULL)
14548 fprintf (dump_file, " muliple backward jumps\n");
14549 else
14550 {
14551 fprintf (dump_file, " header insn: %d latch insn: %d ",
14552 INSN_UID (BB_HEAD (loop->header)),
14553 INSN_UID (BB_END (loop->latch)));
14554 if (pc_set (BB_END (loop->latch)) == NULL_RTX)
14555 fprintf (dump_file, " loop does not end with jump\n");
14556 else
14557 fprintf (dump_file, " not instrumented\n");
14558 }
14559 }
14560 }
14561 else
14562 {
14563 rtx_insn *new_insn;
14564
14565 if (dump_file)
14566 fprintf (dump_file, " adding OSC break insn: ");
14567 new_insn = emit_insn_before (gen_osc_break (),
14568 BB_END (loop->latch));
14569 INSN_ADDRESSES_NEW (new_insn, -1);
14570 }
14571 }
14572
14573 loop_optimizer_finalize ();
14574
14575 df_finish_pass (false);
14576 }
14577
14578 /* Perform machine-dependent processing. */
14579
14580 static void
14581 s390_reorg (void)
14582 {
14583 struct constant_pool *pool;
14584 rtx_insn *insn;
14585 int hw_before, hw_after;
14586
14587 if (s390_tune == PROCESSOR_2964_Z13)
14588 s390_adjust_loops ();
14589
14590 /* Make sure all splits have been performed; splits after
14591 machine_dependent_reorg might confuse insn length counts. */
14592 split_all_insns_noflow ();
14593
14594 /* Install the main literal pool and the associated base
14595 register load insns. The literal pool might be > 4096 bytes in
14596 size, so that some of its elements cannot be directly accessed.
14597
14598 To fix this, we split the single literal pool into multiple
14599 pool chunks, reloading the pool base register at various
14600 points throughout the function to ensure it always points to
14601 the pool chunk the following code expects. */
14602
14603 /* Collect the literal pool. */
14604 pool = s390_mainpool_start ();
14605 if (pool)
14606 {
14607 /* Finish up literal pool related changes. */
14608 s390_mainpool_finish (pool);
14609 }
14610 else
14611 {
14612 /* If literal pool overflowed, chunkify it. */
14613 pool = s390_chunkify_start ();
14614 s390_chunkify_finish (pool);
14615 }
14616
14617 /* Generate out-of-pool execute target insns. */
14618 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14619 {
14620 rtx label;
14621 rtx_insn *target;
14622
14623 label = s390_execute_label (insn);
14624 if (!label)
14625 continue;
14626
14627 gcc_assert (label != const0_rtx);
14628
14629 target = emit_label (XEXP (label, 0));
14630 INSN_ADDRESSES_NEW (target, -1);
14631
14632 if (JUMP_P (insn))
14633 {
14634 target = emit_jump_insn (s390_execute_target (insn));
14635 /* This is important in order to keep a table jump
14636 pointing at the jump table label. Only this makes it
14637 being recognized as table jump. */
14638 JUMP_LABEL (target) = JUMP_LABEL (insn);
14639 }
14640 else
14641 target = emit_insn (s390_execute_target (insn));
14642 INSN_ADDRESSES_NEW (target, -1);
14643 }
14644
14645 /* Try to optimize prologue and epilogue further. */
14646 s390_optimize_prologue ();
14647
14648 /* Walk over the insns and do some >=z10 specific changes. */
14649 if (s390_tune >= PROCESSOR_2097_Z10)
14650 {
14651 rtx_insn *insn;
14652 bool insn_added_p = false;
14653
14654 /* The insn lengths and addresses have to be up to date for the
14655 following manipulations. */
14656 shorten_branches (get_insns ());
14657
14658 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14659 {
14660 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
14661 continue;
14662
14663 if (JUMP_P (insn))
14664 insn_added_p |= s390_fix_long_loop_prediction (insn);
14665
14666 if ((GET_CODE (PATTERN (insn)) == PARALLEL
14667 || GET_CODE (PATTERN (insn)) == SET)
14668 && s390_tune == PROCESSOR_2097_Z10)
14669 insn_added_p |= s390_z10_optimize_cmp (insn);
14670 }
14671
14672 /* Adjust branches if we added new instructions. */
14673 if (insn_added_p)
14674 shorten_branches (get_insns ());
14675 }
14676
14677 s390_function_num_hotpatch_hw (current_function_decl, &hw_before, &hw_after);
14678 if (hw_after > 0)
14679 {
14680 rtx_insn *insn;
14681
14682 /* Insert NOPs for hotpatching. */
14683 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14684 /* Emit NOPs
14685 1. inside the area covered by debug information to allow setting
14686 breakpoints at the NOPs,
14687 2. before any insn which results in an asm instruction,
14688 3. before in-function labels to avoid jumping to the NOPs, for
14689 example as part of a loop,
14690 4. before any barrier in case the function is completely empty
14691 (__builtin_unreachable ()) and has neither internal labels nor
14692 active insns.
14693 */
14694 if (active_insn_p (insn) || BARRIER_P (insn) || LABEL_P (insn))
14695 break;
14696 /* Output a series of NOPs before the first active insn. */
14697 while (insn && hw_after > 0)
14698 {
14699 if (hw_after >= 3)
14700 {
14701 emit_insn_before (gen_nop_6_byte (), insn);
14702 hw_after -= 3;
14703 }
14704 else if (hw_after >= 2)
14705 {
14706 emit_insn_before (gen_nop_4_byte (), insn);
14707 hw_after -= 2;
14708 }
14709 else
14710 {
14711 emit_insn_before (gen_nop_2_byte (), insn);
14712 hw_after -= 1;
14713 }
14714 }
14715 }
14716 }
14717
14718 /* Return true if INSN is a fp load insn writing register REGNO. */
14719 static inline bool
14720 s390_fpload_toreg (rtx_insn *insn, unsigned int regno)
14721 {
14722 rtx set;
14723 enum attr_type flag = s390_safe_attr_type (insn);
14724
14725 if (flag != TYPE_FLOADSF && flag != TYPE_FLOADDF)
14726 return false;
14727
14728 set = single_set (insn);
14729
14730 if (set == NULL_RTX)
14731 return false;
14732
14733 if (!REG_P (SET_DEST (set)) || !MEM_P (SET_SRC (set)))
14734 return false;
14735
14736 if (REGNO (SET_DEST (set)) != regno)
14737 return false;
14738
14739 return true;
14740 }
14741
14742 /* This value describes the distance to be avoided between an
14743 arithmetic fp instruction and an fp load writing the same register.
14744 Z10_EARLYLOAD_DISTANCE - 1 as well as Z10_EARLYLOAD_DISTANCE + 1 is
14745 fine but the exact value has to be avoided. Otherwise the FP
14746 pipeline will throw an exception causing a major penalty. */
14747 #define Z10_EARLYLOAD_DISTANCE 7
14748
14749 /* Rearrange the ready list in order to avoid the situation described
14750 for Z10_EARLYLOAD_DISTANCE. A problematic load instruction is
14751 moved to the very end of the ready list. */
14752 static void
14753 s390_z10_prevent_earlyload_conflicts (rtx_insn **ready, int *nready_p)
14754 {
14755 unsigned int regno;
14756 int nready = *nready_p;
14757 rtx_insn *tmp;
14758 int i;
14759 rtx_insn *insn;
14760 rtx set;
14761 enum attr_type flag;
14762 int distance;
14763
14764 /* Skip DISTANCE - 1 active insns. */
14765 for (insn = last_scheduled_insn, distance = Z10_EARLYLOAD_DISTANCE - 1;
14766 distance > 0 && insn != NULL_RTX;
14767 distance--, insn = prev_active_insn (insn))
14768 if (CALL_P (insn) || JUMP_P (insn))
14769 return;
14770
14771 if (insn == NULL_RTX)
14772 return;
14773
14774 set = single_set (insn);
14775
14776 if (set == NULL_RTX || !REG_P (SET_DEST (set))
14777 || GET_MODE_CLASS (GET_MODE (SET_DEST (set))) != MODE_FLOAT)
14778 return;
14779
14780 flag = s390_safe_attr_type (insn);
14781
14782 if (flag == TYPE_FLOADSF || flag == TYPE_FLOADDF)
14783 return;
14784
14785 regno = REGNO (SET_DEST (set));
14786 i = nready - 1;
14787
14788 while (!s390_fpload_toreg (ready[i], regno) && i > 0)
14789 i--;
14790
14791 if (!i)
14792 return;
14793
14794 tmp = ready[i];
14795 memmove (&ready[1], &ready[0], sizeof (rtx_insn *) * i);
14796 ready[0] = tmp;
14797 }
14798
14799 /* Returns TRUE if BB is entered via a fallthru edge and all other
14800 incoming edges are less than likely. */
14801 static bool
14802 s390_bb_fallthru_entry_likely (basic_block bb)
14803 {
14804 edge e, fallthru_edge;
14805 edge_iterator ei;
14806
14807 if (!bb)
14808 return false;
14809
14810 fallthru_edge = find_fallthru_edge (bb->preds);
14811 if (!fallthru_edge)
14812 return false;
14813
14814 FOR_EACH_EDGE (e, ei, bb->preds)
14815 if (e != fallthru_edge
14816 && e->probability >= profile_probability::likely ())
14817 return false;
14818
14819 return true;
14820 }
14821
14822 struct s390_sched_state
14823 {
14824 /* Number of insns in the group. */
14825 int group_state;
14826 /* Execution side of the group. */
14827 int side;
14828 /* Group can only hold two insns. */
14829 bool group_of_two;
14830 } s390_sched_state;
14831
14832 static struct s390_sched_state sched_state = {0, 1, false};
14833
14834 #define S390_SCHED_ATTR_MASK_CRACKED 0x1
14835 #define S390_SCHED_ATTR_MASK_EXPANDED 0x2
14836 #define S390_SCHED_ATTR_MASK_ENDGROUP 0x4
14837 #define S390_SCHED_ATTR_MASK_GROUPALONE 0x8
14838 #define S390_SCHED_ATTR_MASK_GROUPOFTWO 0x10
14839
14840 static unsigned int
14841 s390_get_sched_attrmask (rtx_insn *insn)
14842 {
14843 unsigned int mask = 0;
14844
14845 switch (s390_tune)
14846 {
14847 case PROCESSOR_2827_ZEC12:
14848 if (get_attr_zEC12_cracked (insn))
14849 mask |= S390_SCHED_ATTR_MASK_CRACKED;
14850 if (get_attr_zEC12_expanded (insn))
14851 mask |= S390_SCHED_ATTR_MASK_EXPANDED;
14852 if (get_attr_zEC12_endgroup (insn))
14853 mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
14854 if (get_attr_zEC12_groupalone (insn))
14855 mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
14856 break;
14857 case PROCESSOR_2964_Z13:
14858 if (get_attr_z13_cracked (insn))
14859 mask |= S390_SCHED_ATTR_MASK_CRACKED;
14860 if (get_attr_z13_expanded (insn))
14861 mask |= S390_SCHED_ATTR_MASK_EXPANDED;
14862 if (get_attr_z13_endgroup (insn))
14863 mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
14864 if (get_attr_z13_groupalone (insn))
14865 mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
14866 if (get_attr_z13_groupoftwo (insn))
14867 mask |= S390_SCHED_ATTR_MASK_GROUPOFTWO;
14868 break;
14869 case PROCESSOR_3906_Z14:
14870 if (get_attr_z14_cracked (insn))
14871 mask |= S390_SCHED_ATTR_MASK_CRACKED;
14872 if (get_attr_z14_expanded (insn))
14873 mask |= S390_SCHED_ATTR_MASK_EXPANDED;
14874 if (get_attr_z14_endgroup (insn))
14875 mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
14876 if (get_attr_z14_groupalone (insn))
14877 mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
14878 if (get_attr_z14_groupoftwo (insn))
14879 mask |= S390_SCHED_ATTR_MASK_GROUPOFTWO;
14880 break;
14881 case PROCESSOR_8561_Z15:
14882 case PROCESSOR_ARCH14:
14883 if (get_attr_z15_cracked (insn))
14884 mask |= S390_SCHED_ATTR_MASK_CRACKED;
14885 if (get_attr_z15_expanded (insn))
14886 mask |= S390_SCHED_ATTR_MASK_EXPANDED;
14887 if (get_attr_z15_endgroup (insn))
14888 mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
14889 if (get_attr_z15_groupalone (insn))
14890 mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
14891 if (get_attr_z15_groupoftwo (insn))
14892 mask |= S390_SCHED_ATTR_MASK_GROUPOFTWO;
14893 break;
14894 default:
14895 gcc_unreachable ();
14896 }
14897 return mask;
14898 }
14899
14900 static unsigned int
14901 s390_get_unit_mask (rtx_insn *insn, int *units)
14902 {
14903 unsigned int mask = 0;
14904
14905 switch (s390_tune)
14906 {
14907 case PROCESSOR_2964_Z13:
14908 *units = 4;
14909 if (get_attr_z13_unit_lsu (insn))
14910 mask |= 1 << 0;
14911 if (get_attr_z13_unit_fxa (insn))
14912 mask |= 1 << 1;
14913 if (get_attr_z13_unit_fxb (insn))
14914 mask |= 1 << 2;
14915 if (get_attr_z13_unit_vfu (insn))
14916 mask |= 1 << 3;
14917 break;
14918 case PROCESSOR_3906_Z14:
14919 *units = 4;
14920 if (get_attr_z14_unit_lsu (insn))
14921 mask |= 1 << 0;
14922 if (get_attr_z14_unit_fxa (insn))
14923 mask |= 1 << 1;
14924 if (get_attr_z14_unit_fxb (insn))
14925 mask |= 1 << 2;
14926 if (get_attr_z14_unit_vfu (insn))
14927 mask |= 1 << 3;
14928 break;
14929 case PROCESSOR_8561_Z15:
14930 case PROCESSOR_ARCH14:
14931 *units = 4;
14932 if (get_attr_z15_unit_lsu (insn))
14933 mask |= 1 << 0;
14934 if (get_attr_z15_unit_fxa (insn))
14935 mask |= 1 << 1;
14936 if (get_attr_z15_unit_fxb (insn))
14937 mask |= 1 << 2;
14938 if (get_attr_z15_unit_vfu (insn))
14939 mask |= 1 << 3;
14940 break;
14941 default:
14942 gcc_unreachable ();
14943 }
14944 return mask;
14945 }
14946
14947 static bool
14948 s390_is_fpd (rtx_insn *insn)
14949 {
14950 if (insn == NULL_RTX)
14951 return false;
14952
14953 return get_attr_z13_unit_fpd (insn) || get_attr_z14_unit_fpd (insn)
14954 || get_attr_z15_unit_fpd (insn);
14955 }
14956
14957 static bool
14958 s390_is_fxd (rtx_insn *insn)
14959 {
14960 if (insn == NULL_RTX)
14961 return false;
14962
14963 return get_attr_z13_unit_fxd (insn) || get_attr_z14_unit_fxd (insn)
14964 || get_attr_z15_unit_fxd (insn);
14965 }
14966
14967 /* Returns TRUE if INSN is a long-running instruction. */
14968 static bool
14969 s390_is_longrunning (rtx_insn *insn)
14970 {
14971 if (insn == NULL_RTX)
14972 return false;
14973
14974 return s390_is_fxd (insn) || s390_is_fpd (insn);
14975 }
14976
14977
14978 /* Return the scheduling score for INSN. The higher the score the
14979 better. The score is calculated from the OOO scheduling attributes
14980 of INSN and the scheduling state sched_state. */
14981 static int
14982 s390_sched_score (rtx_insn *insn)
14983 {
14984 unsigned int mask = s390_get_sched_attrmask (insn);
14985 int score = 0;
14986
14987 switch (sched_state.group_state)
14988 {
14989 case 0:
14990 /* Try to put insns into the first slot which would otherwise
14991 break a group. */
14992 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
14993 || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0)
14994 score += 5;
14995 if ((mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
14996 score += 10;
14997 break;
14998 case 1:
14999 /* Prefer not cracked insns while trying to put together a
15000 group. */
15001 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
15002 && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0
15003 && (mask & S390_SCHED_ATTR_MASK_GROUPALONE) == 0)
15004 score += 10;
15005 if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) == 0)
15006 score += 5;
15007 /* If we are in a group of two already, try to schedule another
15008 group-of-two insn to avoid shortening another group. */
15009 if (sched_state.group_of_two
15010 && (mask & S390_SCHED_ATTR_MASK_GROUPOFTWO) != 0)
15011 score += 15;
15012 break;
15013 case 2:
15014 /* Prefer not cracked insns while trying to put together a
15015 group. */
15016 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
15017 && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0
15018 && (mask & S390_SCHED_ATTR_MASK_GROUPALONE) == 0)
15019 score += 10;
15020 /* Prefer endgroup insns in the last slot. */
15021 if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0)
15022 score += 10;
15023 /* Try to avoid group-of-two insns in the last slot as they will
15024 shorten this group as well as the next one. */
15025 if ((mask & S390_SCHED_ATTR_MASK_GROUPOFTWO) != 0)
15026 score = MAX (0, score - 15);
15027 break;
15028 }
15029
15030 if (s390_tune >= PROCESSOR_2964_Z13)
15031 {
15032 int units, i;
15033 unsigned unit_mask, m = 1;
15034
15035 unit_mask = s390_get_unit_mask (insn, &units);
15036 gcc_assert (units <= MAX_SCHED_UNITS);
15037
15038 /* Add a score in range 0..MAX_SCHED_MIX_SCORE depending on how long
15039 ago the last insn of this unit type got scheduled. This is
15040 supposed to help providing a proper instruction mix to the
15041 CPU. */
15042 for (i = 0; i < units; i++, m <<= 1)
15043 if (m & unit_mask)
15044 score += (last_scheduled_unit_distance[i][sched_state.side]
15045 * MAX_SCHED_MIX_SCORE / MAX_SCHED_MIX_DISTANCE);
15046
15047 int other_side = 1 - sched_state.side;
15048
15049 /* Try to delay long-running insns when side is busy. */
15050 if (s390_is_longrunning (insn))
15051 {
15052 if (s390_is_fxd (insn))
15053 {
15054 if (fxd_longrunning[sched_state.side]
15055 && fxd_longrunning[other_side]
15056 <= fxd_longrunning[sched_state.side])
15057 score = MAX (0, score - 10);
15058
15059 else if (fxd_longrunning[other_side]
15060 >= fxd_longrunning[sched_state.side])
15061 score += 10;
15062 }
15063
15064 if (s390_is_fpd (insn))
15065 {
15066 if (fpd_longrunning[sched_state.side]
15067 && fpd_longrunning[other_side]
15068 <= fpd_longrunning[sched_state.side])
15069 score = MAX (0, score - 10);
15070
15071 else if (fpd_longrunning[other_side]
15072 >= fpd_longrunning[sched_state.side])
15073 score += 10;
15074 }
15075 }
15076 }
15077
15078 return score;
15079 }
15080
15081 /* This function is called via hook TARGET_SCHED_REORDER before
15082 issuing one insn from list READY which contains *NREADYP entries.
15083 For target z10 it reorders load instructions to avoid early load
15084 conflicts in the floating point pipeline */
15085 static int
15086 s390_sched_reorder (FILE *file, int verbose,
15087 rtx_insn **ready, int *nreadyp, int clock ATTRIBUTE_UNUSED)
15088 {
15089 if (s390_tune == PROCESSOR_2097_Z10
15090 && reload_completed
15091 && *nreadyp > 1)
15092 s390_z10_prevent_earlyload_conflicts (ready, nreadyp);
15093
15094 if (s390_tune >= PROCESSOR_2827_ZEC12
15095 && reload_completed
15096 && *nreadyp > 1)
15097 {
15098 int i;
15099 int last_index = *nreadyp - 1;
15100 int max_index = -1;
15101 int max_score = -1;
15102 rtx_insn *tmp;
15103
15104 /* Just move the insn with the highest score to the top (the
15105 end) of the list. A full sort is not needed since a conflict
15106 in the hazard recognition cannot happen. So the top insn in
15107 the ready list will always be taken. */
15108 for (i = last_index; i >= 0; i--)
15109 {
15110 int score;
15111
15112 if (recog_memoized (ready[i]) < 0)
15113 continue;
15114
15115 score = s390_sched_score (ready[i]);
15116 if (score > max_score)
15117 {
15118 max_score = score;
15119 max_index = i;
15120 }
15121 }
15122
15123 if (max_index != -1)
15124 {
15125 if (max_index != last_index)
15126 {
15127 tmp = ready[max_index];
15128 ready[max_index] = ready[last_index];
15129 ready[last_index] = tmp;
15130
15131 if (verbose > 5)
15132 fprintf (file,
15133 ";;\t\tBACKEND: move insn %d to the top of list\n",
15134 INSN_UID (ready[last_index]));
15135 }
15136 else if (verbose > 5)
15137 fprintf (file,
15138 ";;\t\tBACKEND: best insn %d already on top\n",
15139 INSN_UID (ready[last_index]));
15140 }
15141
15142 if (verbose > 5)
15143 {
15144 fprintf (file, "ready list ooo attributes - sched state: %d\n",
15145 sched_state.group_state);
15146
15147 for (i = last_index; i >= 0; i--)
15148 {
15149 unsigned int sched_mask;
15150 rtx_insn *insn = ready[i];
15151
15152 if (recog_memoized (insn) < 0)
15153 continue;
15154
15155 sched_mask = s390_get_sched_attrmask (insn);
15156 fprintf (file, ";;\t\tBACKEND: insn %d score: %d: ",
15157 INSN_UID (insn),
15158 s390_sched_score (insn));
15159 #define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ",\
15160 ((M) & sched_mask) ? #ATTR : "");
15161 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED, cracked);
15162 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED, expanded);
15163 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP, endgroup);
15164 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE, groupalone);
15165 #undef PRINT_SCHED_ATTR
15166 if (s390_tune >= PROCESSOR_2964_Z13)
15167 {
15168 unsigned int unit_mask, m = 1;
15169 int units, j;
15170
15171 unit_mask = s390_get_unit_mask (insn, &units);
15172 fprintf (file, "(units:");
15173 for (j = 0; j < units; j++, m <<= 1)
15174 if (m & unit_mask)
15175 fprintf (file, " u%d", j);
15176 fprintf (file, ")");
15177 }
15178 fprintf (file, "\n");
15179 }
15180 }
15181 }
15182
15183 return s390_issue_rate ();
15184 }
15185
15186
15187 /* This function is called via hook TARGET_SCHED_VARIABLE_ISSUE after
15188 the scheduler has issued INSN. It stores the last issued insn into
15189 last_scheduled_insn in order to make it available for
15190 s390_sched_reorder. */
15191 static int
15192 s390_sched_variable_issue (FILE *file, int verbose, rtx_insn *insn, int more)
15193 {
15194 last_scheduled_insn = insn;
15195
15196 bool ends_group = false;
15197
15198 if (s390_tune >= PROCESSOR_2827_ZEC12
15199 && reload_completed
15200 && recog_memoized (insn) >= 0)
15201 {
15202 unsigned int mask = s390_get_sched_attrmask (insn);
15203
15204 if ((mask & S390_SCHED_ATTR_MASK_GROUPOFTWO) != 0)
15205 sched_state.group_of_two = true;
15206
15207 /* If this is a group-of-two insn, we actually ended the last group
15208 and this insn is the first one of the new group. */
15209 if (sched_state.group_state == 2 && sched_state.group_of_two)
15210 {
15211 sched_state.side = sched_state.side ? 0 : 1;
15212 sched_state.group_state = 0;
15213 }
15214
15215 /* Longrunning and side bookkeeping. */
15216 for (int i = 0; i < 2; i++)
15217 {
15218 fxd_longrunning[i] = MAX (0, fxd_longrunning[i] - 1);
15219 fpd_longrunning[i] = MAX (0, fpd_longrunning[i] - 1);
15220 }
15221
15222 unsigned latency = insn_default_latency (insn);
15223 if (s390_is_longrunning (insn))
15224 {
15225 if (s390_is_fxd (insn))
15226 fxd_longrunning[sched_state.side] = latency;
15227 else
15228 fpd_longrunning[sched_state.side] = latency;
15229 }
15230
15231 if (s390_tune >= PROCESSOR_2964_Z13)
15232 {
15233 int units, i;
15234 unsigned unit_mask, m = 1;
15235
15236 unit_mask = s390_get_unit_mask (insn, &units);
15237 gcc_assert (units <= MAX_SCHED_UNITS);
15238
15239 for (i = 0; i < units; i++, m <<= 1)
15240 if (m & unit_mask)
15241 last_scheduled_unit_distance[i][sched_state.side] = 0;
15242 else if (last_scheduled_unit_distance[i][sched_state.side]
15243 < MAX_SCHED_MIX_DISTANCE)
15244 last_scheduled_unit_distance[i][sched_state.side]++;
15245 }
15246
15247 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
15248 || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0
15249 || (mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0
15250 || (mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0)
15251 {
15252 sched_state.group_state = 0;
15253 ends_group = true;
15254 }
15255 else
15256 {
15257 switch (sched_state.group_state)
15258 {
15259 case 0:
15260 sched_state.group_state++;
15261 break;
15262 case 1:
15263 sched_state.group_state++;
15264 if (sched_state.group_of_two)
15265 {
15266 sched_state.group_state = 0;
15267 ends_group = true;
15268 }
15269 break;
15270 case 2:
15271 sched_state.group_state++;
15272 ends_group = true;
15273 break;
15274 }
15275 }
15276
15277 if (verbose > 5)
15278 {
15279 unsigned int sched_mask;
15280
15281 sched_mask = s390_get_sched_attrmask (insn);
15282
15283 fprintf (file, ";;\t\tBACKEND: insn %d: ", INSN_UID (insn));
15284 #define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ", ((M) & sched_mask) ? #ATTR : "");
15285 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED, cracked);
15286 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED, expanded);
15287 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP, endgroup);
15288 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE, groupalone);
15289 #undef PRINT_SCHED_ATTR
15290
15291 if (s390_tune >= PROCESSOR_2964_Z13)
15292 {
15293 unsigned int unit_mask, m = 1;
15294 int units, j;
15295
15296 unit_mask = s390_get_unit_mask (insn, &units);
15297 fprintf (file, "(units:");
15298 for (j = 0; j < units; j++, m <<= 1)
15299 if (m & unit_mask)
15300 fprintf (file, " %d", j);
15301 fprintf (file, ")");
15302 }
15303 fprintf (file, " sched state: %d\n", sched_state.group_state);
15304
15305 if (s390_tune >= PROCESSOR_2964_Z13)
15306 {
15307 int units, j;
15308
15309 s390_get_unit_mask (insn, &units);
15310
15311 fprintf (file, ";;\t\tBACKEND: units on this side unused for: ");
15312 for (j = 0; j < units; j++)
15313 fprintf (file, "%d:%d ", j,
15314 last_scheduled_unit_distance[j][sched_state.side]);
15315 fprintf (file, "\n");
15316 }
15317 }
15318
15319 /* If this insn ended a group, the next will be on the other side. */
15320 if (ends_group)
15321 {
15322 sched_state.group_state = 0;
15323 sched_state.side = sched_state.side ? 0 : 1;
15324 sched_state.group_of_two = false;
15325 }
15326 }
15327
15328 if (GET_CODE (PATTERN (insn)) != USE
15329 && GET_CODE (PATTERN (insn)) != CLOBBER)
15330 return more - 1;
15331 else
15332 return more;
15333 }
15334
15335 static void
15336 s390_sched_init (FILE *file ATTRIBUTE_UNUSED,
15337 int verbose ATTRIBUTE_UNUSED,
15338 int max_ready ATTRIBUTE_UNUSED)
15339 {
15340 /* If the next basic block is most likely entered via a fallthru edge
15341 we keep the last sched state. Otherwise we start a new group.
15342 The scheduler traverses basic blocks in "instruction stream" ordering
15343 so if we see a fallthru edge here, sched_state will be of its
15344 source block.
15345
15346 current_sched_info->prev_head is the insn before the first insn of the
15347 block of insns to be scheduled.
15348 */
15349 rtx_insn *insn = current_sched_info->prev_head
15350 ? NEXT_INSN (current_sched_info->prev_head) : NULL;
15351 basic_block bb = insn ? BLOCK_FOR_INSN (insn) : NULL;
15352 if (s390_tune < PROCESSOR_2964_Z13 || !s390_bb_fallthru_entry_likely (bb))
15353 {
15354 last_scheduled_insn = NULL;
15355 memset (last_scheduled_unit_distance, 0,
15356 MAX_SCHED_UNITS * NUM_SIDES * sizeof (int));
15357 sched_state.group_state = 0;
15358 sched_state.group_of_two = false;
15359 }
15360 }
15361
15362 /* This target hook implementation for TARGET_LOOP_UNROLL_ADJUST calculates
15363 a new number struct loop *loop should be unrolled if tuned for cpus with
15364 a built-in stride prefetcher.
15365 The loop is analyzed for memory accesses by calling check_dpu for
15366 each rtx of the loop. Depending on the loop_depth and the amount of
15367 memory accesses a new number <=nunroll is returned to improve the
15368 behavior of the hardware prefetch unit. */
15369 static unsigned
15370 s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
15371 {
15372 basic_block *bbs;
15373 rtx_insn *insn;
15374 unsigned i;
15375 unsigned mem_count = 0;
15376
15377 if (s390_tune < PROCESSOR_2097_Z10)
15378 return nunroll;
15379
15380 /* Count the number of memory references within the loop body. */
15381 bbs = get_loop_body (loop);
15382 subrtx_iterator::array_type array;
15383 for (i = 0; i < loop->num_nodes; i++)
15384 FOR_BB_INSNS (bbs[i], insn)
15385 if (INSN_P (insn) && INSN_CODE (insn) != -1)
15386 {
15387 rtx set;
15388
15389 /* The runtime of small loops with memory block operations
15390 will be determined by the memory operation. Doing
15391 unrolling doesn't help here. Measurements to confirm
15392 this where only done on recent CPU levels. So better do
15393 not change anything for older CPUs. */
15394 if (s390_tune >= PROCESSOR_2964_Z13
15395 && loop->ninsns <= BLOCK_MEM_OPS_LOOP_INSNS
15396 && ((set = single_set (insn)) != NULL_RTX)
15397 && ((GET_MODE (SET_DEST (set)) == BLKmode
15398 && (GET_MODE (SET_SRC (set)) == BLKmode
15399 || SET_SRC (set) == const0_rtx))
15400 || (GET_CODE (SET_SRC (set)) == COMPARE
15401 && GET_MODE (XEXP (SET_SRC (set), 0)) == BLKmode
15402 && GET_MODE (XEXP (SET_SRC (set), 1)) == BLKmode)))
15403 {
15404 free (bbs);
15405 return 1;
15406 }
15407
15408 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
15409 if (MEM_P (*iter))
15410 mem_count += 1;
15411 }
15412 free (bbs);
15413
15414 /* Prevent division by zero, and we do not need to adjust nunroll in this case. */
15415 if (mem_count == 0)
15416 return nunroll;
15417
15418 switch (loop_depth(loop))
15419 {
15420 case 1:
15421 return MIN (nunroll, 28 / mem_count);
15422 case 2:
15423 return MIN (nunroll, 22 / mem_count);
15424 default:
15425 return MIN (nunroll, 16 / mem_count);
15426 }
15427 }
15428
15429 /* Restore the current options. This is a hook function and also called
15430 internally. */
15431
15432 static void
15433 s390_function_specific_restore (struct gcc_options *opts,
15434 struct gcc_options */* opts_set */,
15435 struct cl_target_option *ptr ATTRIBUTE_UNUSED)
15436 {
15437 opts->x_s390_cost_pointer = (long)processor_table[opts->x_s390_tune].cost;
15438 }
15439
15440 static void
15441 s390_default_align (struct gcc_options *opts)
15442 {
15443 /* Set the default function alignment to 16 in order to get rid of
15444 some unwanted performance effects. */
15445 if (opts->x_flag_align_functions && !opts->x_str_align_functions
15446 && opts->x_s390_tune >= PROCESSOR_2964_Z13)
15447 opts->x_str_align_functions = "16";
15448 }
15449
15450 static void
15451 s390_override_options_after_change (void)
15452 {
15453 s390_default_align (&global_options);
15454 }
15455
15456 static void
15457 s390_option_override_internal (struct gcc_options *opts,
15458 struct gcc_options *opts_set)
15459 {
15460 /* Architecture mode defaults according to ABI. */
15461 if (!(opts_set->x_target_flags & MASK_ZARCH))
15462 {
15463 if (TARGET_64BIT)
15464 opts->x_target_flags |= MASK_ZARCH;
15465 else
15466 opts->x_target_flags &= ~MASK_ZARCH;
15467 }
15468
15469 /* Set the march default in case it hasn't been specified on cmdline. */
15470 if (!opts_set->x_s390_arch)
15471 opts->x_s390_arch = PROCESSOR_2064_Z900;
15472
15473 opts->x_s390_arch_flags = processor_flags_table[(int) opts->x_s390_arch];
15474
15475 /* Determine processor to tune for. */
15476 if (!opts_set->x_s390_tune)
15477 opts->x_s390_tune = opts->x_s390_arch;
15478
15479 opts->x_s390_tune_flags = processor_flags_table[opts->x_s390_tune];
15480
15481 /* Sanity checks. */
15482 if (opts->x_s390_arch == PROCESSOR_NATIVE
15483 || opts->x_s390_tune == PROCESSOR_NATIVE)
15484 gcc_unreachable ();
15485 if (TARGET_64BIT && !TARGET_ZARCH_P (opts->x_target_flags))
15486 error ("64-bit ABI not supported in ESA/390 mode");
15487
15488 if (opts->x_s390_indirect_branch == indirect_branch_thunk_inline
15489 || opts->x_s390_indirect_branch_call == indirect_branch_thunk_inline
15490 || opts->x_s390_function_return == indirect_branch_thunk_inline
15491 || opts->x_s390_function_return_reg == indirect_branch_thunk_inline
15492 || opts->x_s390_function_return_mem == indirect_branch_thunk_inline)
15493 error ("thunk-inline is only supported with %<-mindirect-branch-jump%>");
15494
15495 if (opts->x_s390_indirect_branch != indirect_branch_keep)
15496 {
15497 if (!opts_set->x_s390_indirect_branch_call)
15498 opts->x_s390_indirect_branch_call = opts->x_s390_indirect_branch;
15499
15500 if (!opts_set->x_s390_indirect_branch_jump)
15501 opts->x_s390_indirect_branch_jump = opts->x_s390_indirect_branch;
15502 }
15503
15504 if (opts->x_s390_function_return != indirect_branch_keep)
15505 {
15506 if (!opts_set->x_s390_function_return_reg)
15507 opts->x_s390_function_return_reg = opts->x_s390_function_return;
15508
15509 if (!opts_set->x_s390_function_return_mem)
15510 opts->x_s390_function_return_mem = opts->x_s390_function_return;
15511 }
15512
15513 /* Enable hardware transactions if available and not explicitly
15514 disabled by user. E.g. with -m31 -march=zEC12 -mzarch */
15515 if (!TARGET_OPT_HTM_P (opts_set->x_target_flags))
15516 {
15517 if (TARGET_CPU_HTM_P (opts) && TARGET_ZARCH_P (opts->x_target_flags))
15518 opts->x_target_flags |= MASK_OPT_HTM;
15519 else
15520 opts->x_target_flags &= ~MASK_OPT_HTM;
15521 }
15522
15523 if (TARGET_OPT_VX_P (opts_set->x_target_flags))
15524 {
15525 if (TARGET_OPT_VX_P (opts->x_target_flags))
15526 {
15527 if (!TARGET_CPU_VX_P (opts))
15528 error ("hardware vector support not available on %s",
15529 processor_table[(int)opts->x_s390_arch].name);
15530 if (TARGET_SOFT_FLOAT_P (opts->x_target_flags))
15531 error ("hardware vector support not available with "
15532 "%<-msoft-float%>");
15533 }
15534 }
15535 else
15536 {
15537 if (TARGET_CPU_VX_P (opts))
15538 /* Enable vector support if available and not explicitly disabled
15539 by user. E.g. with -m31 -march=z13 -mzarch */
15540 opts->x_target_flags |= MASK_OPT_VX;
15541 else
15542 opts->x_target_flags &= ~MASK_OPT_VX;
15543 }
15544
15545 /* Use hardware DFP if available and not explicitly disabled by
15546 user. E.g. with -m31 -march=z10 -mzarch */
15547 if (!TARGET_HARD_DFP_P (opts_set->x_target_flags))
15548 {
15549 if (TARGET_DFP_P (opts))
15550 opts->x_target_flags |= MASK_HARD_DFP;
15551 else
15552 opts->x_target_flags &= ~MASK_HARD_DFP;
15553 }
15554
15555 if (TARGET_HARD_DFP_P (opts->x_target_flags) && !TARGET_DFP_P (opts))
15556 {
15557 if (TARGET_HARD_DFP_P (opts_set->x_target_flags))
15558 {
15559 if (!TARGET_CPU_DFP_P (opts))
15560 error ("hardware decimal floating point instructions"
15561 " not available on %s",
15562 processor_table[(int)opts->x_s390_arch].name);
15563 if (!TARGET_ZARCH_P (opts->x_target_flags))
15564 error ("hardware decimal floating point instructions"
15565 " not available in ESA/390 mode");
15566 }
15567 else
15568 opts->x_target_flags &= ~MASK_HARD_DFP;
15569 }
15570
15571 if (TARGET_SOFT_FLOAT_P (opts_set->x_target_flags)
15572 && TARGET_SOFT_FLOAT_P (opts->x_target_flags))
15573 {
15574 if (TARGET_HARD_DFP_P (opts_set->x_target_flags)
15575 && TARGET_HARD_DFP_P (opts->x_target_flags))
15576 error ("%<-mhard-dfp%> can%'t be used in conjunction with "
15577 "%<-msoft-float%>");
15578
15579 opts->x_target_flags &= ~MASK_HARD_DFP;
15580 }
15581
15582 if (TARGET_BACKCHAIN_P (opts->x_target_flags)
15583 && TARGET_PACKED_STACK_P (opts->x_target_flags)
15584 && TARGET_HARD_FLOAT_P (opts->x_target_flags))
15585 error ("%<-mbackchain%> %<-mpacked-stack%> %<-mhard-float%> are not "
15586 "supported in combination");
15587
15588 if (opts->x_s390_stack_size)
15589 {
15590 if (opts->x_s390_stack_guard >= opts->x_s390_stack_size)
15591 error ("stack size must be greater than the stack guard value");
15592 else if (opts->x_s390_stack_size > 1 << 16)
15593 error ("stack size must not be greater than 64k");
15594 }
15595 else if (opts->x_s390_stack_guard)
15596 error ("%<-mstack-guard%> implies use of %<-mstack-size%>");
15597
15598 /* Our implementation of the stack probe requires the probe interval
15599 to be used as displacement in an address operand. The maximum
15600 probe interval currently is 64k. This would exceed short
15601 displacements. Trim that value down to 4k if that happens. This
15602 might result in too many probes being generated only on the
15603 oldest supported machine level z900. */
15604 if (!DISP_IN_RANGE ((1 << param_stack_clash_protection_probe_interval)))
15605 param_stack_clash_protection_probe_interval = 12;
15606
15607 #if TARGET_TPF != 0
15608 if (!CONST_OK_FOR_J (opts->x_s390_tpf_trace_hook_prologue_check))
15609 error ("-mtpf-trace-hook-prologue-check requires integer in range 0..4095");
15610
15611 if (!CONST_OK_FOR_J (opts->x_s390_tpf_trace_hook_prologue_target))
15612 error ("-mtpf-trace-hook-prologue-target requires integer in range 0..4095");
15613
15614 if (!CONST_OK_FOR_J (opts->x_s390_tpf_trace_hook_epilogue_check))
15615 error ("-mtpf-trace-hook-epilogue-check requires integer in range 0..4095");
15616
15617 if (!CONST_OK_FOR_J (opts->x_s390_tpf_trace_hook_epilogue_target))
15618 error ("-mtpf-trace-hook-epilogue-target requires integer in range 0..4095");
15619
15620 if (s390_tpf_trace_skip)
15621 {
15622 opts->x_s390_tpf_trace_hook_prologue_target = TPF_TRACE_PROLOGUE_SKIP_TARGET;
15623 opts->x_s390_tpf_trace_hook_epilogue_target = TPF_TRACE_EPILOGUE_SKIP_TARGET;
15624 }
15625 #endif
15626
15627 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
15628 if (!TARGET_LONG_DOUBLE_128_P (opts_set->x_target_flags))
15629 opts->x_target_flags |= MASK_LONG_DOUBLE_128;
15630 #endif
15631
15632 if (opts->x_s390_tune >= PROCESSOR_2097_Z10)
15633 {
15634 SET_OPTION_IF_UNSET (opts, opts_set, param_max_unrolled_insns,
15635 100);
15636 SET_OPTION_IF_UNSET (opts, opts_set, param_max_unroll_times, 32);
15637 SET_OPTION_IF_UNSET (opts, opts_set, param_max_completely_peeled_insns,
15638 2000);
15639 SET_OPTION_IF_UNSET (opts, opts_set, param_max_completely_peel_times,
15640 64);
15641 }
15642
15643 SET_OPTION_IF_UNSET (opts, opts_set, param_max_pending_list_length,
15644 256);
15645 /* values for loop prefetching */
15646 SET_OPTION_IF_UNSET (opts, opts_set, param_l1_cache_line_size, 256);
15647 SET_OPTION_IF_UNSET (opts, opts_set, param_l1_cache_size, 128);
15648 /* s390 has more than 2 levels and the size is much larger. Since
15649 we are always running virtualized assume that we only get a small
15650 part of the caches above l1. */
15651 SET_OPTION_IF_UNSET (opts, opts_set, param_l2_cache_size, 1500);
15652 SET_OPTION_IF_UNSET (opts, opts_set,
15653 param_prefetch_min_insn_to_mem_ratio, 2);
15654 SET_OPTION_IF_UNSET (opts, opts_set, param_simultaneous_prefetches, 6);
15655
15656 /* Use the alternative scheduling-pressure algorithm by default. */
15657 SET_OPTION_IF_UNSET (opts, opts_set, param_sched_pressure_algorithm, 2);
15658 SET_OPTION_IF_UNSET (opts, opts_set, param_min_vect_loop_bound, 2);
15659
15660 /* Set the default alignment. */
15661 s390_default_align (opts);
15662
15663 /* Call target specific restore function to do post-init work. At the moment,
15664 this just sets opts->x_s390_cost_pointer. */
15665 s390_function_specific_restore (opts, opts_set, NULL);
15666
15667 /* Check whether -mfentry is supported. It cannot be used in 31-bit mode,
15668 because 31-bit PLT stubs assume that %r12 contains GOT address, which is
15669 not the case when the code runs before the prolog. */
15670 if (opts->x_flag_fentry && !TARGET_64BIT)
15671 error ("%<-mfentry%> is supported only for 64-bit CPUs");
15672 }
15673
15674 static void
15675 s390_option_override (void)
15676 {
15677 unsigned int i;
15678 cl_deferred_option *opt;
15679 vec<cl_deferred_option> *v =
15680 (vec<cl_deferred_option> *) s390_deferred_options;
15681
15682 if (v)
15683 FOR_EACH_VEC_ELT (*v, i, opt)
15684 {
15685 switch (opt->opt_index)
15686 {
15687 case OPT_mhotpatch_:
15688 {
15689 int val1;
15690 int val2;
15691 char *s = strtok (ASTRDUP (opt->arg), ",");
15692 char *t = strtok (NULL, "\0");
15693
15694 if (t != NULL)
15695 {
15696 val1 = integral_argument (s);
15697 val2 = integral_argument (t);
15698 }
15699 else
15700 {
15701 val1 = -1;
15702 val2 = -1;
15703 }
15704 if (val1 == -1 || val2 == -1)
15705 {
15706 /* argument is not a plain number */
15707 error ("arguments to %qs should be non-negative integers",
15708 "-mhotpatch=n,m");
15709 break;
15710 }
15711 else if (val1 > s390_hotpatch_hw_max
15712 || val2 > s390_hotpatch_hw_max)
15713 {
15714 error ("argument to %qs is too large (max. %d)",
15715 "-mhotpatch=n,m", s390_hotpatch_hw_max);
15716 break;
15717 }
15718 s390_hotpatch_hw_before_label = val1;
15719 s390_hotpatch_hw_after_label = val2;
15720 break;
15721 }
15722 default:
15723 gcc_unreachable ();
15724 }
15725 }
15726
15727 /* Set up function hooks. */
15728 init_machine_status = s390_init_machine_status;
15729
15730 s390_option_override_internal (&global_options, &global_options_set);
15731
15732 /* Save the initial options in case the user does function specific
15733 options. */
15734 target_option_default_node
15735 = build_target_option_node (&global_options, &global_options_set);
15736 target_option_current_node = target_option_default_node;
15737
15738 /* This cannot reside in s390_option_optimization_table since HAVE_prefetch
15739 requires the arch flags to be evaluated already. Since prefetching
15740 is beneficial on s390, we enable it if available. */
15741 if (flag_prefetch_loop_arrays < 0 && HAVE_prefetch && optimize >= 3)
15742 flag_prefetch_loop_arrays = 1;
15743
15744 if (!s390_pic_data_is_text_relative && !flag_pic)
15745 error ("%<-mno-pic-data-is-text-relative%> cannot be used without "
15746 "%<-fpic%>/%<-fPIC%>");
15747
15748 if (TARGET_TPF)
15749 {
15750 /* Don't emit DWARF3/4 unless specifically selected. The TPF
15751 debuggers do not yet support DWARF 3/4. */
15752 if (!OPTION_SET_P (dwarf_strict))
15753 dwarf_strict = 1;
15754 if (!OPTION_SET_P (dwarf_version))
15755 dwarf_version = 2;
15756 }
15757 }
15758
15759 #if S390_USE_TARGET_ATTRIBUTE
15760 /* Inner function to process the attribute((target(...))), take an argument and
15761 set the current options from the argument. If we have a list, recursively go
15762 over the list. */
15763
15764 static bool
15765 s390_valid_target_attribute_inner_p (tree args,
15766 struct gcc_options *opts,
15767 struct gcc_options *new_opts_set,
15768 bool force_pragma)
15769 {
15770 char *next_optstr;
15771 bool ret = true;
15772
15773 #define S390_ATTRIB(S,O,A) { S, sizeof (S)-1, O, A, 0 }
15774 #define S390_PRAGMA(S,O,A) { S, sizeof (S)-1, O, A, 1 }
15775 static const struct
15776 {
15777 const char *string;
15778 size_t len;
15779 int opt;
15780 int has_arg;
15781 int only_as_pragma;
15782 } attrs[] = {
15783 /* enum options */
15784 S390_ATTRIB ("arch=", OPT_march_, 1),
15785 S390_ATTRIB ("tune=", OPT_mtune_, 1),
15786 /* uinteger options */
15787 S390_ATTRIB ("stack-guard=", OPT_mstack_guard_, 1),
15788 S390_ATTRIB ("stack-size=", OPT_mstack_size_, 1),
15789 S390_ATTRIB ("branch-cost=", OPT_mbranch_cost_, 1),
15790 S390_ATTRIB ("warn-framesize=", OPT_mwarn_framesize_, 1),
15791 /* flag options */
15792 S390_ATTRIB ("backchain", OPT_mbackchain, 0),
15793 S390_ATTRIB ("hard-dfp", OPT_mhard_dfp, 0),
15794 S390_ATTRIB ("hard-float", OPT_mhard_float, 0),
15795 S390_ATTRIB ("htm", OPT_mhtm, 0),
15796 S390_ATTRIB ("vx", OPT_mvx, 0),
15797 S390_ATTRIB ("packed-stack", OPT_mpacked_stack, 0),
15798 S390_ATTRIB ("small-exec", OPT_msmall_exec, 0),
15799 S390_ATTRIB ("soft-float", OPT_msoft_float, 0),
15800 S390_ATTRIB ("mvcle", OPT_mmvcle, 0),
15801 S390_PRAGMA ("zvector", OPT_mzvector, 0),
15802 /* boolean options */
15803 S390_ATTRIB ("warn-dynamicstack", OPT_mwarn_dynamicstack, 0),
15804 };
15805 #undef S390_ATTRIB
15806 #undef S390_PRAGMA
15807
15808 /* If this is a list, recurse to get the options. */
15809 if (TREE_CODE (args) == TREE_LIST)
15810 {
15811 bool ret = true;
15812 int num_pragma_values;
15813 int i;
15814
15815 /* Note: attribs.c:decl_attributes prepends the values from
15816 current_target_pragma to the list of target attributes. To determine
15817 whether we're looking at a value of the attribute or the pragma we
15818 assume that the first [list_length (current_target_pragma)] values in
15819 the list are the values from the pragma. */
15820 num_pragma_values = (!force_pragma && current_target_pragma != NULL)
15821 ? list_length (current_target_pragma) : 0;
15822 for (i = 0; args; args = TREE_CHAIN (args), i++)
15823 {
15824 bool is_pragma;
15825
15826 is_pragma = (force_pragma || i < num_pragma_values);
15827 if (TREE_VALUE (args)
15828 && !s390_valid_target_attribute_inner_p (TREE_VALUE (args),
15829 opts, new_opts_set,
15830 is_pragma))
15831 {
15832 ret = false;
15833 }
15834 }
15835 return ret;
15836 }
15837
15838 else if (TREE_CODE (args) != STRING_CST)
15839 {
15840 error ("attribute %<target%> argument not a string");
15841 return false;
15842 }
15843
15844 /* Handle multiple arguments separated by commas. */
15845 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
15846
15847 while (next_optstr && *next_optstr != '\0')
15848 {
15849 char *p = next_optstr;
15850 char *orig_p = p;
15851 char *comma = strchr (next_optstr, ',');
15852 size_t len, opt_len;
15853 int opt;
15854 bool opt_set_p;
15855 char ch;
15856 unsigned i;
15857 int mask = 0;
15858 enum cl_var_type var_type;
15859 bool found;
15860
15861 if (comma)
15862 {
15863 *comma = '\0';
15864 len = comma - next_optstr;
15865 next_optstr = comma + 1;
15866 }
15867 else
15868 {
15869 len = strlen (p);
15870 next_optstr = NULL;
15871 }
15872
15873 /* Recognize no-xxx. */
15874 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
15875 {
15876 opt_set_p = false;
15877 p += 3;
15878 len -= 3;
15879 }
15880 else
15881 opt_set_p = true;
15882
15883 /* Find the option. */
15884 ch = *p;
15885 found = false;
15886 for (i = 0; i < ARRAY_SIZE (attrs); i++)
15887 {
15888 opt_len = attrs[i].len;
15889 if (ch == attrs[i].string[0]
15890 && ((attrs[i].has_arg) ? len > opt_len : len == opt_len)
15891 && memcmp (p, attrs[i].string, opt_len) == 0)
15892 {
15893 opt = attrs[i].opt;
15894 if (!opt_set_p && cl_options[opt].cl_reject_negative)
15895 continue;
15896 mask = cl_options[opt].var_value;
15897 var_type = cl_options[opt].var_type;
15898 found = true;
15899 break;
15900 }
15901 }
15902
15903 /* Process the option. */
15904 if (!found)
15905 {
15906 error ("attribute(target(\"%s\")) is unknown", orig_p);
15907 return false;
15908 }
15909 else if (attrs[i].only_as_pragma && !force_pragma)
15910 {
15911 /* Value is not allowed for the target attribute. */
15912 error ("value %qs is not supported by attribute %<target%>",
15913 attrs[i].string);
15914 return false;
15915 }
15916
15917 else if (var_type == CLVC_BIT_SET || var_type == CLVC_BIT_CLEAR)
15918 {
15919 if (var_type == CLVC_BIT_CLEAR)
15920 opt_set_p = !opt_set_p;
15921
15922 if (opt_set_p)
15923 opts->x_target_flags |= mask;
15924 else
15925 opts->x_target_flags &= ~mask;
15926 new_opts_set->x_target_flags |= mask;
15927 }
15928
15929 else if (cl_options[opt].var_type == CLVC_INTEGER)
15930 {
15931 int value;
15932
15933 if (cl_options[opt].cl_uinteger)
15934 {
15935 /* Unsigned integer argument. Code based on the function
15936 decode_cmdline_option () in opts-common.c. */
15937 value = integral_argument (p + opt_len);
15938 }
15939 else
15940 value = (opt_set_p) ? 1 : 0;
15941
15942 if (value != -1)
15943 {
15944 struct cl_decoded_option decoded;
15945
15946 /* Value range check; only implemented for numeric and boolean
15947 options at the moment. */
15948 generate_option (opt, NULL, value, CL_TARGET, &decoded);
15949 s390_handle_option (opts, new_opts_set, &decoded, input_location);
15950 set_option (opts, new_opts_set, opt, value,
15951 p + opt_len, DK_UNSPECIFIED, input_location,
15952 global_dc);
15953 }
15954 else
15955 {
15956 error ("attribute(target(\"%s\")) is unknown", orig_p);
15957 ret = false;
15958 }
15959 }
15960
15961 else if (cl_options[opt].var_type == CLVC_ENUM)
15962 {
15963 bool arg_ok;
15964 int value;
15965
15966 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
15967 if (arg_ok)
15968 set_option (opts, new_opts_set, opt, value,
15969 p + opt_len, DK_UNSPECIFIED, input_location,
15970 global_dc);
15971 else
15972 {
15973 error ("attribute(target(\"%s\")) is unknown", orig_p);
15974 ret = false;
15975 }
15976 }
15977
15978 else
15979 gcc_unreachable ();
15980 }
15981 return ret;
15982 }
15983
15984 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
15985
15986 tree
15987 s390_valid_target_attribute_tree (tree args,
15988 struct gcc_options *opts,
15989 const struct gcc_options *opts_set,
15990 bool force_pragma)
15991 {
15992 tree t = NULL_TREE;
15993 struct gcc_options new_opts_set;
15994
15995 memset (&new_opts_set, 0, sizeof (new_opts_set));
15996
15997 /* Process each of the options on the chain. */
15998 if (! s390_valid_target_attribute_inner_p (args, opts, &new_opts_set,
15999 force_pragma))
16000 return error_mark_node;
16001
16002 /* If some option was set (even if it has not changed), rerun
16003 s390_option_override_internal, and then save the options away. */
16004 if (new_opts_set.x_target_flags
16005 || new_opts_set.x_s390_arch
16006 || new_opts_set.x_s390_tune
16007 || new_opts_set.x_s390_stack_guard
16008 || new_opts_set.x_s390_stack_size
16009 || new_opts_set.x_s390_branch_cost
16010 || new_opts_set.x_s390_warn_framesize
16011 || new_opts_set.x_s390_warn_dynamicstack_p)
16012 {
16013 const unsigned char *src = (const unsigned char *)opts_set;
16014 unsigned char *dest = (unsigned char *)&new_opts_set;
16015 unsigned int i;
16016
16017 /* Merge the original option flags into the new ones. */
16018 for (i = 0; i < sizeof(*opts_set); i++)
16019 dest[i] |= src[i];
16020
16021 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
16022 s390_option_override_internal (opts, &new_opts_set);
16023 /* Save the current options unless we are validating options for
16024 #pragma. */
16025 t = build_target_option_node (opts, &new_opts_set);
16026 }
16027 return t;
16028 }
16029
16030 /* Hook to validate attribute((target("string"))). */
16031
16032 static bool
16033 s390_valid_target_attribute_p (tree fndecl,
16034 tree ARG_UNUSED (name),
16035 tree args,
16036 int ARG_UNUSED (flags))
16037 {
16038 struct gcc_options func_options, func_options_set;
16039 tree new_target, new_optimize;
16040 bool ret = true;
16041
16042 /* attribute((target("default"))) does nothing, beyond
16043 affecting multi-versioning. */
16044 if (TREE_VALUE (args)
16045 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
16046 && TREE_CHAIN (args) == NULL_TREE
16047 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
16048 return true;
16049
16050 tree old_optimize
16051 = build_optimization_node (&global_options, &global_options_set);
16052
16053 /* Get the optimization options of the current function. */
16054 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
16055
16056 if (!func_optimize)
16057 func_optimize = old_optimize;
16058
16059 /* Init func_options. */
16060 memset (&func_options, 0, sizeof (func_options));
16061 init_options_struct (&func_options, NULL);
16062 lang_hooks.init_options_struct (&func_options);
16063 memset (&func_options_set, 0, sizeof (func_options_set));
16064
16065 cl_optimization_restore (&func_options, &func_options_set,
16066 TREE_OPTIMIZATION (func_optimize));
16067
16068 /* Initialize func_options to the default before its target options can
16069 be set. */
16070 cl_target_option_restore (&func_options, &func_options_set,
16071 TREE_TARGET_OPTION (target_option_default_node));
16072
16073 new_target = s390_valid_target_attribute_tree (args, &func_options,
16074 &global_options_set,
16075 (args ==
16076 current_target_pragma));
16077 new_optimize = build_optimization_node (&func_options, &func_options_set);
16078 if (new_target == error_mark_node)
16079 ret = false;
16080 else if (fndecl && new_target)
16081 {
16082 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
16083 if (old_optimize != new_optimize)
16084 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
16085 }
16086 return ret;
16087 }
16088
16089 /* Hook to determine if one function can safely inline another. */
16090
16091 static bool
16092 s390_can_inline_p (tree caller, tree callee)
16093 {
16094 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
16095 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
16096
16097 if (!callee_tree)
16098 callee_tree = target_option_default_node;
16099 if (!caller_tree)
16100 caller_tree = target_option_default_node;
16101 if (callee_tree == caller_tree)
16102 return true;
16103
16104 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
16105 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
16106 bool ret = true;
16107
16108 if ((caller_opts->x_target_flags & ~(MASK_SOFT_FLOAT | MASK_HARD_DFP))
16109 != (callee_opts->x_target_flags & ~(MASK_SOFT_FLOAT | MASK_HARD_DFP)))
16110 ret = false;
16111
16112 /* Don't inline functions to be compiled for a more recent arch into a
16113 function for an older arch. */
16114 else if (caller_opts->x_s390_arch < callee_opts->x_s390_arch)
16115 ret = false;
16116
16117 /* Inlining a hard float function into a soft float function is only
16118 allowed if the hard float function doesn't actually make use of
16119 floating point.
16120
16121 We are called from FEs for multi-versioning call optimization, so
16122 beware of ipa_fn_summaries not available. */
16123 else if (((TARGET_SOFT_FLOAT_P (caller_opts->x_target_flags)
16124 && !TARGET_SOFT_FLOAT_P (callee_opts->x_target_flags))
16125 || (!TARGET_HARD_DFP_P (caller_opts->x_target_flags)
16126 && TARGET_HARD_DFP_P (callee_opts->x_target_flags)))
16127 && (! ipa_fn_summaries
16128 || ipa_fn_summaries->get
16129 (cgraph_node::get (callee))->fp_expressions))
16130 ret = false;
16131
16132 return ret;
16133 }
16134 #endif
16135
16136 /* Set VAL to correct enum value according to the indirect-branch or
16137 function-return attribute in ATTR. */
16138
16139 static inline void
16140 s390_indirect_branch_attrvalue (tree attr, enum indirect_branch *val)
16141 {
16142 const char *str = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
16143 if (strcmp (str, "keep") == 0)
16144 *val = indirect_branch_keep;
16145 else if (strcmp (str, "thunk") == 0)
16146 *val = indirect_branch_thunk;
16147 else if (strcmp (str, "thunk-inline") == 0)
16148 *val = indirect_branch_thunk_inline;
16149 else if (strcmp (str, "thunk-extern") == 0)
16150 *val = indirect_branch_thunk_extern;
16151 }
16152
16153 /* Memorize the setting for -mindirect-branch* and -mfunction-return*
16154 from either the cmdline or the function attributes in
16155 cfun->machine. */
16156
16157 static void
16158 s390_indirect_branch_settings (tree fndecl)
16159 {
16160 tree attr;
16161
16162 if (!fndecl)
16163 return;
16164
16165 /* Initialize with the cmdline options and let the attributes
16166 override it. */
16167 cfun->machine->indirect_branch_jump = s390_indirect_branch_jump;
16168 cfun->machine->indirect_branch_call = s390_indirect_branch_call;
16169
16170 cfun->machine->function_return_reg = s390_function_return_reg;
16171 cfun->machine->function_return_mem = s390_function_return_mem;
16172
16173 if ((attr = lookup_attribute ("indirect_branch",
16174 DECL_ATTRIBUTES (fndecl))))
16175 {
16176 s390_indirect_branch_attrvalue (attr,
16177 &cfun->machine->indirect_branch_jump);
16178 s390_indirect_branch_attrvalue (attr,
16179 &cfun->machine->indirect_branch_call);
16180 }
16181
16182 if ((attr = lookup_attribute ("indirect_branch_jump",
16183 DECL_ATTRIBUTES (fndecl))))
16184 s390_indirect_branch_attrvalue (attr, &cfun->machine->indirect_branch_jump);
16185
16186 if ((attr = lookup_attribute ("indirect_branch_call",
16187 DECL_ATTRIBUTES (fndecl))))
16188 s390_indirect_branch_attrvalue (attr, &cfun->machine->indirect_branch_call);
16189
16190 if ((attr = lookup_attribute ("function_return",
16191 DECL_ATTRIBUTES (fndecl))))
16192 {
16193 s390_indirect_branch_attrvalue (attr,
16194 &cfun->machine->function_return_reg);
16195 s390_indirect_branch_attrvalue (attr,
16196 &cfun->machine->function_return_mem);
16197 }
16198
16199 if ((attr = lookup_attribute ("function_return_reg",
16200 DECL_ATTRIBUTES (fndecl))))
16201 s390_indirect_branch_attrvalue (attr, &cfun->machine->function_return_reg);
16202
16203 if ((attr = lookup_attribute ("function_return_mem",
16204 DECL_ATTRIBUTES (fndecl))))
16205 s390_indirect_branch_attrvalue (attr, &cfun->machine->function_return_mem);
16206 }
16207
16208 #if S390_USE_TARGET_ATTRIBUTE
16209 /* Restore targets globals from NEW_TREE and invalidate s390_previous_fndecl
16210 cache. */
16211
16212 void
16213 s390_activate_target_options (tree new_tree)
16214 {
16215 cl_target_option_restore (&global_options, &global_options_set,
16216 TREE_TARGET_OPTION (new_tree));
16217 if (TREE_TARGET_GLOBALS (new_tree))
16218 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
16219 else if (new_tree == target_option_default_node)
16220 restore_target_globals (&default_target_globals);
16221 else
16222 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
16223 s390_previous_fndecl = NULL_TREE;
16224 }
16225 #endif
16226
16227 /* Establish appropriate back-end context for processing the function
16228 FNDECL. The argument might be NULL to indicate processing at top
16229 level, outside of any function scope. */
16230 static void
16231 s390_set_current_function (tree fndecl)
16232 {
16233 #if S390_USE_TARGET_ATTRIBUTE
16234 /* Only change the context if the function changes. This hook is called
16235 several times in the course of compiling a function, and we don't want to
16236 slow things down too much or call target_reinit when it isn't safe. */
16237 if (fndecl == s390_previous_fndecl)
16238 {
16239 s390_indirect_branch_settings (fndecl);
16240 return;
16241 }
16242
16243 tree old_tree;
16244 if (s390_previous_fndecl == NULL_TREE)
16245 old_tree = target_option_current_node;
16246 else if (DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl))
16247 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl);
16248 else
16249 old_tree = target_option_default_node;
16250
16251 if (fndecl == NULL_TREE)
16252 {
16253 if (old_tree != target_option_current_node)
16254 s390_activate_target_options (target_option_current_node);
16255 return;
16256 }
16257
16258 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
16259 if (new_tree == NULL_TREE)
16260 new_tree = target_option_default_node;
16261
16262 if (old_tree != new_tree)
16263 s390_activate_target_options (new_tree);
16264 s390_previous_fndecl = fndecl;
16265 #endif
16266 s390_indirect_branch_settings (fndecl);
16267 }
16268
16269 /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */
16270
16271 static bool
16272 s390_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
16273 unsigned int align ATTRIBUTE_UNUSED,
16274 enum by_pieces_operation op ATTRIBUTE_UNUSED,
16275 bool speed_p ATTRIBUTE_UNUSED)
16276 {
16277 return (size == 1 || size == 2
16278 || size == 4 || (TARGET_ZARCH && size == 8));
16279 }
16280
16281 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
16282
16283 static void
16284 s390_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
16285 {
16286 tree sfpc = s390_builtin_decls[S390_BUILTIN_s390_sfpc];
16287 tree efpc = s390_builtin_decls[S390_BUILTIN_s390_efpc];
16288 tree call_efpc = build_call_expr (efpc, 0);
16289 tree fenv_var = create_tmp_var_raw (unsigned_type_node);
16290
16291 #define FPC_EXCEPTION_MASK HOST_WIDE_INT_UC (0xf8000000)
16292 #define FPC_FLAGS_MASK HOST_WIDE_INT_UC (0x00f80000)
16293 #define FPC_DXC_MASK HOST_WIDE_INT_UC (0x0000ff00)
16294 #define FPC_EXCEPTION_MASK_SHIFT HOST_WIDE_INT_UC (24)
16295 #define FPC_FLAGS_SHIFT HOST_WIDE_INT_UC (16)
16296 #define FPC_DXC_SHIFT HOST_WIDE_INT_UC (8)
16297
16298 /* Generates the equivalent of feholdexcept (&fenv_var)
16299
16300 fenv_var = __builtin_s390_efpc ();
16301 __builtin_s390_sfpc (fenv_var & mask) */
16302 tree old_fpc = build4 (TARGET_EXPR, unsigned_type_node, fenv_var, call_efpc,
16303 NULL_TREE, NULL_TREE);
16304 tree new_fpc
16305 = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
16306 build_int_cst (unsigned_type_node,
16307 ~(FPC_DXC_MASK | FPC_FLAGS_MASK
16308 | FPC_EXCEPTION_MASK)));
16309 tree set_new_fpc = build_call_expr (sfpc, 1, new_fpc);
16310 *hold = build2 (COMPOUND_EXPR, void_type_node, old_fpc, set_new_fpc);
16311
16312 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT)
16313
16314 __builtin_s390_sfpc (__builtin_s390_efpc () & mask) */
16315 new_fpc = build2 (BIT_AND_EXPR, unsigned_type_node, call_efpc,
16316 build_int_cst (unsigned_type_node,
16317 ~(FPC_DXC_MASK | FPC_FLAGS_MASK)));
16318 *clear = build_call_expr (sfpc, 1, new_fpc);
16319
16320 /* Generates the equivalent of feupdateenv (fenv_var)
16321
16322 old_fpc = __builtin_s390_efpc ();
16323 __builtin_s390_sfpc (fenv_var);
16324 __atomic_feraiseexcept ((old_fpc & FPC_FLAGS_MASK) >> FPC_FLAGS_SHIFT); */
16325
16326 old_fpc = create_tmp_var_raw (unsigned_type_node);
16327 tree store_old_fpc = build4 (TARGET_EXPR, void_type_node, old_fpc, call_efpc,
16328 NULL_TREE, NULL_TREE);
16329
16330 set_new_fpc = build_call_expr (sfpc, 1, fenv_var);
16331
16332 tree raise_old_except = build2 (BIT_AND_EXPR, unsigned_type_node, old_fpc,
16333 build_int_cst (unsigned_type_node,
16334 FPC_FLAGS_MASK));
16335 raise_old_except = build2 (RSHIFT_EXPR, unsigned_type_node, raise_old_except,
16336 build_int_cst (unsigned_type_node,
16337 FPC_FLAGS_SHIFT));
16338 tree atomic_feraiseexcept
16339 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
16340 raise_old_except = build_call_expr (atomic_feraiseexcept,
16341 1, raise_old_except);
16342
16343 *update = build2 (COMPOUND_EXPR, void_type_node,
16344 build2 (COMPOUND_EXPR, void_type_node,
16345 store_old_fpc, set_new_fpc),
16346 raise_old_except);
16347
16348 #undef FPC_EXCEPTION_MASK
16349 #undef FPC_FLAGS_MASK
16350 #undef FPC_DXC_MASK
16351 #undef FPC_EXCEPTION_MASK_SHIFT
16352 #undef FPC_FLAGS_SHIFT
16353 #undef FPC_DXC_SHIFT
16354 }
16355
16356 /* Return the vector mode to be used for inner mode MODE when doing
16357 vectorization. */
16358 static machine_mode
16359 s390_preferred_simd_mode (scalar_mode mode)
16360 {
16361 if (TARGET_VXE)
16362 switch (mode)
16363 {
16364 case E_SFmode:
16365 return V4SFmode;
16366 default:;
16367 }
16368
16369 if (TARGET_VX)
16370 switch (mode)
16371 {
16372 case E_DFmode:
16373 return V2DFmode;
16374 case E_DImode:
16375 return V2DImode;
16376 case E_SImode:
16377 return V4SImode;
16378 case E_HImode:
16379 return V8HImode;
16380 case E_QImode:
16381 return V16QImode;
16382 default:;
16383 }
16384 return word_mode;
16385 }
16386
16387 /* Our hardware does not require vectors to be strictly aligned. */
16388 static bool
16389 s390_support_vector_misalignment (machine_mode mode ATTRIBUTE_UNUSED,
16390 const_tree type ATTRIBUTE_UNUSED,
16391 int misalignment ATTRIBUTE_UNUSED,
16392 bool is_packed ATTRIBUTE_UNUSED)
16393 {
16394 if (TARGET_VX)
16395 return true;
16396
16397 return default_builtin_support_vector_misalignment (mode, type, misalignment,
16398 is_packed);
16399 }
16400
16401 /* The vector ABI requires vector types to be aligned on an 8 byte
16402 boundary (our stack alignment). However, we allow this to be
16403 overriden by the user, while this definitely breaks the ABI. */
16404 static HOST_WIDE_INT
16405 s390_vector_alignment (const_tree type)
16406 {
16407 tree size = TYPE_SIZE (type);
16408
16409 if (!TARGET_VX_ABI)
16410 return default_vector_alignment (type);
16411
16412 if (TYPE_USER_ALIGN (type))
16413 return TYPE_ALIGN (type);
16414
16415 if (tree_fits_uhwi_p (size)
16416 && tree_to_uhwi (size) < BIGGEST_ALIGNMENT)
16417 return tree_to_uhwi (size);
16418
16419 return BIGGEST_ALIGNMENT;
16420 }
16421
16422 /* Implement TARGET_CONSTANT_ALIGNMENT. Alignment on even addresses for
16423 LARL instruction. */
16424
16425 static HOST_WIDE_INT
16426 s390_constant_alignment (const_tree, HOST_WIDE_INT align)
16427 {
16428 return MAX (align, 16);
16429 }
16430
16431 #ifdef HAVE_AS_MACHINE_MACHINEMODE
16432 /* Implement TARGET_ASM_FILE_START. */
16433 static void
16434 s390_asm_file_start (void)
16435 {
16436 default_file_start ();
16437 s390_asm_output_machine_for_arch (asm_out_file);
16438 }
16439 #endif
16440
16441 /* Implement TARGET_ASM_FILE_END. */
16442 static void
16443 s390_asm_file_end (void)
16444 {
16445 #ifdef HAVE_AS_GNU_ATTRIBUTE
16446 varpool_node *vnode;
16447 cgraph_node *cnode;
16448
16449 FOR_EACH_VARIABLE (vnode)
16450 if (TREE_PUBLIC (vnode->decl))
16451 s390_check_type_for_vector_abi (TREE_TYPE (vnode->decl), false, false);
16452
16453 FOR_EACH_FUNCTION (cnode)
16454 if (TREE_PUBLIC (cnode->decl))
16455 s390_check_type_for_vector_abi (TREE_TYPE (cnode->decl), false, false);
16456
16457
16458 if (s390_vector_abi != 0)
16459 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
16460 s390_vector_abi);
16461 #endif
16462 file_end_indicate_exec_stack ();
16463
16464 if (flag_split_stack)
16465 file_end_indicate_split_stack ();
16466 }
16467
16468 /* Return true if TYPE is a vector bool type. */
16469 static inline bool
16470 s390_vector_bool_type_p (const_tree type)
16471 {
16472 return TYPE_VECTOR_OPAQUE (type);
16473 }
16474
16475 /* Return the diagnostic message string if the binary operation OP is
16476 not permitted on TYPE1 and TYPE2, NULL otherwise. */
16477 static const char*
16478 s390_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2)
16479 {
16480 bool bool1_p, bool2_p;
16481 bool plusminus_p;
16482 bool muldiv_p;
16483 bool compare_p;
16484 machine_mode mode1, mode2;
16485
16486 if (!TARGET_ZVECTOR)
16487 return NULL;
16488
16489 if (!VECTOR_TYPE_P (type1) || !VECTOR_TYPE_P (type2))
16490 return NULL;
16491
16492 bool1_p = s390_vector_bool_type_p (type1);
16493 bool2_p = s390_vector_bool_type_p (type2);
16494
16495 /* Mixing signed and unsigned types is forbidden for all
16496 operators. */
16497 if (!bool1_p && !bool2_p
16498 && TYPE_UNSIGNED (type1) != TYPE_UNSIGNED (type2))
16499 return N_("types differ in signedness");
16500
16501 plusminus_p = (op == PLUS_EXPR || op == MINUS_EXPR);
16502 muldiv_p = (op == MULT_EXPR || op == RDIV_EXPR || op == TRUNC_DIV_EXPR
16503 || op == CEIL_DIV_EXPR || op == FLOOR_DIV_EXPR
16504 || op == ROUND_DIV_EXPR);
16505 compare_p = (op == LT_EXPR || op == LE_EXPR || op == GT_EXPR || op == GE_EXPR
16506 || op == EQ_EXPR || op == NE_EXPR);
16507
16508 if (bool1_p && bool2_p && (plusminus_p || muldiv_p))
16509 return N_("binary operator does not support two vector bool operands");
16510
16511 if (bool1_p != bool2_p && (muldiv_p || compare_p))
16512 return N_("binary operator does not support vector bool operand");
16513
16514 mode1 = TYPE_MODE (type1);
16515 mode2 = TYPE_MODE (type2);
16516
16517 if (bool1_p != bool2_p && plusminus_p
16518 && (GET_MODE_CLASS (mode1) == MODE_VECTOR_FLOAT
16519 || GET_MODE_CLASS (mode2) == MODE_VECTOR_FLOAT))
16520 return N_("binary operator does not support mixing vector "
16521 "bool with floating point vector operands");
16522
16523 return NULL;
16524 }
16525
16526 #if ENABLE_S390_EXCESS_FLOAT_PRECISION == 1
16527 /* Implement TARGET_C_EXCESS_PRECISION to maintain historic behavior with older
16528 glibc versions
16529
16530 For historical reasons, float_t and double_t had been typedef'ed to
16531 double on s390, causing operations on float_t to operate in a higher
16532 precision than is necessary. However, it is not the case that SFmode
16533 operations have implicit excess precision, and we generate more optimal
16534 code if we let the compiler know no implicit extra precision is added.
16535
16536 With a glibc with that "historic" definition, configure will enable this hook
16537 to set FLT_EVAL_METHOD to 1 for -fexcess-precision=standard (e.g., as implied
16538 by -std=cXY). That means when we are compiling with -fexcess-precision=fast,
16539 the value we set for FLT_EVAL_METHOD will be out of line with the actual
16540 precision of float_t.
16541
16542 Newer versions of glibc will be modified to derive the definition of float_t
16543 from FLT_EVAL_METHOD on s390x, as on many other architectures. There,
16544 configure will disable this hook by default, so that we defer to the default
16545 of FLT_EVAL_METHOD_PROMOTE_TO_FLOAT and a resulting typedef of float_t to
16546 float. Note that in that scenario, float_t and FLT_EVAL_METHOD will be in
16547 line independent of -fexcess-precision. */
16548
16549 static enum flt_eval_method
16550 s390_excess_precision (enum excess_precision_type type)
16551 {
16552 switch (type)
16553 {
16554 case EXCESS_PRECISION_TYPE_IMPLICIT:
16555 case EXCESS_PRECISION_TYPE_FAST:
16556 /* The fastest type to promote to will always be the native type,
16557 whether that occurs with implicit excess precision or
16558 otherwise. */
16559 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
16560 case EXCESS_PRECISION_TYPE_STANDARD:
16561 /* Otherwise, when we are in a standards compliant mode, to
16562 ensure consistency with the implementation in glibc, report that
16563 float is evaluated to the range and precision of double. */
16564 return FLT_EVAL_METHOD_PROMOTE_TO_DOUBLE;
16565 case EXCESS_PRECISION_TYPE_FLOAT16:
16566 error ("%<-fexcess-precision=16%> is not supported on this target");
16567 break;
16568 default:
16569 gcc_unreachable ();
16570 }
16571 return FLT_EVAL_METHOD_UNPREDICTABLE;
16572 }
16573 #endif
16574
16575 void
16576 s390_rawmemchr (machine_mode elt_mode, rtx dst, rtx src, rtx pat)
16577 {
16578 machine_mode vec_mode = mode_for_vector (as_a <scalar_int_mode> (elt_mode),
16579 16 / GET_MODE_SIZE (elt_mode)).require();
16580 rtx lens = gen_reg_rtx (V16QImode);
16581 rtx pattern = gen_reg_rtx (vec_mode);
16582 rtx loop_start = gen_label_rtx ();
16583 rtx loop_end = gen_label_rtx ();
16584 rtx addr = gen_reg_rtx (Pmode);
16585 rtx offset = gen_reg_rtx (Pmode);
16586 rtx loadlen = gen_reg_rtx (SImode);
16587 rtx matchlen = gen_reg_rtx (SImode);
16588 rtx mem;
16589
16590 pat = GEN_INT (trunc_int_for_mode (INTVAL (pat), elt_mode));
16591 emit_insn (gen_rtx_SET (pattern, gen_rtx_VEC_DUPLICATE (vec_mode, pat)));
16592
16593 emit_move_insn (addr, XEXP (src, 0));
16594
16595 // alignment
16596 emit_insn (gen_vlbb (lens, gen_rtx_MEM (BLKmode, addr), GEN_INT (6)));
16597 emit_insn (gen_lcbb (loadlen, addr, GEN_INT (6)));
16598 lens = convert_to_mode (vec_mode, lens, 1);
16599 emit_insn (gen_vec_vfees (vec_mode, lens, lens, pattern, GEN_INT (0)));
16600 lens = convert_to_mode (V4SImode, lens, 1);
16601 emit_insn (gen_vec_extractv4sisi (matchlen, lens, GEN_INT (1)));
16602 lens = convert_to_mode (vec_mode, lens, 1);
16603 emit_cmp_and_jump_insns (matchlen, loadlen, LT, NULL_RTX, SImode, 1, loop_end);
16604 force_expand_binop (Pmode, add_optab, addr, GEN_INT(16), addr, 1, OPTAB_DIRECT);
16605 force_expand_binop (Pmode, and_optab, addr, GEN_INT(~HOST_WIDE_INT_UC(0xf)), addr, 1, OPTAB_DIRECT);
16606 // now, addr is 16-byte aligned
16607
16608 mem = gen_rtx_MEM (vec_mode, addr);
16609 set_mem_align (mem, 128);
16610 emit_move_insn (lens, mem);
16611 emit_insn (gen_vec_vfees (vec_mode, lens, lens, pattern, GEN_INT (VSTRING_FLAG_CS)));
16612 add_int_reg_note (s390_emit_ccraw_jump (4, EQ, loop_end),
16613 REG_BR_PROB,
16614 profile_probability::very_unlikely ().to_reg_br_prob_note ());
16615
16616 emit_label (loop_start);
16617 LABEL_NUSES (loop_start) = 1;
16618
16619 force_expand_binop (Pmode, add_optab, addr, GEN_INT (16), addr, 1, OPTAB_DIRECT);
16620 mem = gen_rtx_MEM (vec_mode, addr);
16621 set_mem_align (mem, 128);
16622 emit_move_insn (lens, mem);
16623 emit_insn (gen_vec_vfees (vec_mode, lens, lens, pattern, GEN_INT (VSTRING_FLAG_CS)));
16624 add_int_reg_note (s390_emit_ccraw_jump (4, NE, loop_start),
16625 REG_BR_PROB,
16626 profile_probability::very_likely ().to_reg_br_prob_note ());
16627
16628 emit_label (loop_end);
16629 LABEL_NUSES (loop_end) = 1;
16630
16631 if (TARGET_64BIT)
16632 {
16633 lens = convert_to_mode (V2DImode, lens, 1);
16634 emit_insn (gen_vec_extractv2didi (offset, lens, GEN_INT (0)));
16635 }
16636 else
16637 {
16638 lens = convert_to_mode (V4SImode, lens, 1);
16639 emit_insn (gen_vec_extractv4sisi (offset, lens, GEN_INT (1)));
16640 }
16641 force_expand_binop (Pmode, add_optab, addr, offset, dst, 1, OPTAB_DIRECT);
16642 }
16643
16644 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
16645
16646 static unsigned HOST_WIDE_INT
16647 s390_asan_shadow_offset (void)
16648 {
16649 return TARGET_64BIT ? HOST_WIDE_INT_1U << 52 : HOST_WIDE_INT_UC (0x20000000);
16650 }
16651
16652 #ifdef HAVE_GAS_HIDDEN
16653 # define USE_HIDDEN_LINKONCE 1
16654 #else
16655 # define USE_HIDDEN_LINKONCE 0
16656 #endif
16657
16658 /* Output an indirect branch trampoline for target register REGNO. */
16659
16660 static void
16661 s390_output_indirect_thunk_function (unsigned int regno, bool z10_p)
16662 {
16663 tree decl;
16664 char thunk_label[32];
16665 int i;
16666
16667 if (z10_p)
16668 sprintf (thunk_label, TARGET_INDIRECT_BRANCH_THUNK_NAME_EXRL, regno);
16669 else
16670 sprintf (thunk_label, TARGET_INDIRECT_BRANCH_THUNK_NAME_EX,
16671 INDIRECT_BRANCH_THUNK_REGNUM, regno);
16672
16673 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
16674 get_identifier (thunk_label),
16675 build_function_type_list (void_type_node, NULL_TREE));
16676 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
16677 NULL_TREE, void_type_node);
16678 TREE_PUBLIC (decl) = 1;
16679 TREE_STATIC (decl) = 1;
16680 DECL_IGNORED_P (decl) = 1;
16681
16682 if (USE_HIDDEN_LINKONCE)
16683 {
16684 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
16685
16686 targetm.asm_out.unique_section (decl, 0);
16687 switch_to_section (get_named_section (decl, NULL, 0));
16688
16689 targetm.asm_out.globalize_label (asm_out_file, thunk_label);
16690 fputs ("\t.hidden\t", asm_out_file);
16691 assemble_name (asm_out_file, thunk_label);
16692 putc ('\n', asm_out_file);
16693 ASM_DECLARE_FUNCTION_NAME (asm_out_file, thunk_label, decl);
16694 }
16695 else
16696 {
16697 switch_to_section (text_section);
16698 ASM_OUTPUT_LABEL (asm_out_file, thunk_label);
16699 }
16700
16701 DECL_INITIAL (decl) = make_node (BLOCK);
16702 current_function_decl = decl;
16703 allocate_struct_function (decl, false);
16704 init_function_start (decl);
16705 cfun->is_thunk = true;
16706 first_function_block_is_cold = false;
16707 final_start_function (emit_barrier (), asm_out_file, 1);
16708
16709 /* This makes CFI at least usable for indirect jumps.
16710
16711 Stopping in the thunk: backtrace will point to the thunk target
16712 is if it was interrupted by a signal. For a call this means that
16713 the call chain will be: caller->callee->thunk */
16714 if (flag_asynchronous_unwind_tables && flag_dwarf2_cfi_asm)
16715 {
16716 fputs ("\t.cfi_signal_frame\n", asm_out_file);
16717 fprintf (asm_out_file, "\t.cfi_return_column %d\n", regno);
16718 for (i = 0; i < FPR15_REGNUM; i++)
16719 fprintf (asm_out_file, "\t.cfi_same_value %s\n", reg_names[i]);
16720 }
16721
16722 if (z10_p)
16723 {
16724 /* exrl 0,1f */
16725
16726 /* We generate a thunk for z10 compiled code although z10 is
16727 currently not enabled. Tell the assembler to accept the
16728 instruction. */
16729 if (!TARGET_CPU_Z10)
16730 {
16731 fputs ("\t.machine push\n", asm_out_file);
16732 fputs ("\t.machine z10\n", asm_out_file);
16733 }
16734 /* We use exrl even if -mzarch hasn't been specified on the
16735 command line so we have to tell the assembler to accept
16736 it. */
16737 if (!TARGET_ZARCH)
16738 fputs ("\t.machinemode zarch\n", asm_out_file);
16739
16740 fputs ("\texrl\t0,1f\n", asm_out_file);
16741
16742 if (!TARGET_ZARCH)
16743 fputs ("\t.machinemode esa\n", asm_out_file);
16744
16745 if (!TARGET_CPU_Z10)
16746 fputs ("\t.machine pop\n", asm_out_file);
16747 }
16748 else
16749 {
16750 /* larl %r1,1f */
16751 fprintf (asm_out_file, "\tlarl\t%%r%d,1f\n",
16752 INDIRECT_BRANCH_THUNK_REGNUM);
16753
16754 /* ex 0,0(%r1) */
16755 fprintf (asm_out_file, "\tex\t0,0(%%r%d)\n",
16756 INDIRECT_BRANCH_THUNK_REGNUM);
16757 }
16758
16759 /* 0: j 0b */
16760 fputs ("0:\tj\t0b\n", asm_out_file);
16761
16762 /* 1: br <regno> */
16763 fprintf (asm_out_file, "1:\tbr\t%%r%d\n", regno);
16764
16765 final_end_function ();
16766 init_insn_lengths ();
16767 free_after_compilation (cfun);
16768 set_cfun (NULL);
16769 current_function_decl = NULL;
16770 }
16771
16772 /* Implement the asm.code_end target hook. */
16773
16774 static void
16775 s390_code_end (void)
16776 {
16777 int i;
16778
16779 for (i = 1; i < 16; i++)
16780 {
16781 if (indirect_branch_z10thunk_mask & (1 << i))
16782 s390_output_indirect_thunk_function (i, true);
16783
16784 if (indirect_branch_prez10thunk_mask & (1 << i))
16785 s390_output_indirect_thunk_function (i, false);
16786 }
16787
16788 if (TARGET_INDIRECT_BRANCH_TABLE)
16789 {
16790 int o;
16791 int i;
16792
16793 for (o = 0; o < INDIRECT_BRANCH_NUM_OPTIONS; o++)
16794 {
16795 if (indirect_branch_table_label_no[o] == 0)
16796 continue;
16797
16798 switch_to_section (get_section (indirect_branch_table_name[o],
16799 0,
16800 NULL_TREE));
16801 for (i = 0; i < indirect_branch_table_label_no[o]; i++)
16802 {
16803 char label_start[32];
16804
16805 ASM_GENERATE_INTERNAL_LABEL (label_start,
16806 indirect_branch_table_label[o], i);
16807
16808 fputs ("\t.long\t", asm_out_file);
16809 assemble_name_raw (asm_out_file, label_start);
16810 fputs ("-.\n", asm_out_file);
16811 }
16812 switch_to_section (current_function_section ());
16813 }
16814 }
16815 }
16816
16817 /* Implement the TARGET_CASE_VALUES_THRESHOLD target hook. */
16818
16819 unsigned int
16820 s390_case_values_threshold (void)
16821 {
16822 /* Disabling branch prediction for indirect jumps makes jump tables
16823 much more expensive. */
16824 if (TARGET_INDIRECT_BRANCH_NOBP_JUMP)
16825 return 20;
16826
16827 return default_case_values_threshold ();
16828 }
16829
16830 /* Evaluate the insns between HEAD and TAIL and do back-end to install
16831 back-end specific dependencies.
16832
16833 Establish an ANTI dependency between r11 and r15 restores from FPRs
16834 to prevent the instructions scheduler from reordering them since
16835 this would break CFI. No further handling in the sched_reorder
16836 hook is required since the r11 and r15 restore will never appear in
16837 the same ready list with that change. */
16838 void
16839 s390_sched_dependencies_evaluation (rtx_insn *head, rtx_insn *tail)
16840 {
16841 if (!frame_pointer_needed || !epilogue_completed)
16842 return;
16843
16844 while (head != tail && DEBUG_INSN_P (head))
16845 head = NEXT_INSN (head);
16846
16847 rtx_insn *r15_restore = NULL, *r11_restore = NULL;
16848
16849 for (rtx_insn *insn = tail; insn != head; insn = PREV_INSN (insn))
16850 {
16851 rtx set = single_set (insn);
16852 if (!INSN_P (insn)
16853 || !RTX_FRAME_RELATED_P (insn)
16854 || set == NULL_RTX
16855 || !REG_P (SET_DEST (set))
16856 || !FP_REG_P (SET_SRC (set)))
16857 continue;
16858
16859 if (REGNO (SET_DEST (set)) == HARD_FRAME_POINTER_REGNUM)
16860 r11_restore = insn;
16861
16862 if (REGNO (SET_DEST (set)) == STACK_POINTER_REGNUM)
16863 r15_restore = insn;
16864 }
16865
16866 if (r11_restore == NULL || r15_restore == NULL)
16867 return;
16868 add_dependence (r11_restore, r15_restore, REG_DEP_ANTI);
16869 }
16870
16871 /* Implement TARGET_SHIFT_TRUNCATION_MASK for integer shifts. */
16872
16873 static unsigned HOST_WIDE_INT
16874 s390_shift_truncation_mask (machine_mode mode)
16875 {
16876 return mode == DImode || mode == SImode ? 63 : 0;
16877 }
16878
16879 /* Return TRUE iff CONSTRAINT is an "f" constraint, possibly with additional
16880 modifiers. */
16881
16882 static bool
16883 f_constraint_p (const char *constraint)
16884 {
16885 bool seen_f_p = false;
16886 bool seen_v_p = false;
16887
16888 for (size_t i = 0, c_len = strlen (constraint); i < c_len;
16889 i += CONSTRAINT_LEN (constraint[i], constraint + i))
16890 {
16891 if (constraint[i] == 'f')
16892 seen_f_p = true;
16893 if (constraint[i] == 'v')
16894 seen_v_p = true;
16895 }
16896
16897 /* Treat "fv" constraints as "v", because LRA will choose the widest register
16898 * class. */
16899 return seen_f_p && !seen_v_p;
16900 }
16901
16902 /* Return TRUE iff X is a hard floating-point (and not a vector) register. */
16903
16904 static bool
16905 s390_hard_fp_reg_p (rtx x)
16906 {
16907 if (!(REG_P (x) && HARD_REGISTER_P (x) && REG_ATTRS (x)))
16908 return false;
16909
16910 tree decl = REG_EXPR (x);
16911 if (!(HAS_DECL_ASSEMBLER_NAME_P (decl) && DECL_ASSEMBLER_NAME_SET_P (decl)))
16912 return false;
16913
16914 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
16915
16916 return name[0] == '*' && name[1] == 'f';
16917 }
16918
16919 /* Implement TARGET_MD_ASM_ADJUST hook in order to fix up "f"
16920 constraints when long doubles are stored in vector registers. */
16921
16922 static rtx_insn *
16923 s390_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &inputs,
16924 vec<machine_mode> &input_modes,
16925 vec<const char *> &constraints, vec<rtx> & /*clobbers*/,
16926 HARD_REG_SET & /*clobbered_regs*/, location_t /*loc*/)
16927 {
16928 if (!TARGET_VXE)
16929 /* Long doubles are stored in FPR pairs - nothing to do. */
16930 return NULL;
16931
16932 rtx_insn *after_md_seq = NULL, *after_md_end = NULL;
16933
16934 unsigned ninputs = inputs.length ();
16935 unsigned noutputs = outputs.length ();
16936 for (unsigned i = 0; i < noutputs; i++)
16937 {
16938 if (GET_MODE (outputs[i]) != TFmode)
16939 /* Not a long double - nothing to do. */
16940 continue;
16941 const char *constraint = constraints[i];
16942 bool allows_mem, allows_reg, is_inout;
16943 bool ok = parse_output_constraint (&constraint, i, ninputs, noutputs,
16944 &allows_mem, &allows_reg, &is_inout);
16945 gcc_assert (ok);
16946 if (!f_constraint_p (constraint))
16947 /* Long double with a constraint other than "=f" - nothing to do. */
16948 continue;
16949 gcc_assert (allows_reg);
16950 gcc_assert (!is_inout);
16951 /* Copy output value from a FPR pair into a vector register. */
16952 rtx fprx2;
16953 push_to_sequence2 (after_md_seq, after_md_end);
16954 if (s390_hard_fp_reg_p (outputs[i]))
16955 {
16956 fprx2 = gen_rtx_REG (FPRX2mode, REGNO (outputs[i]));
16957 /* The first half is already at the correct location, copy only the
16958 * second one. Use the UNSPEC pattern instead of the SUBREG one,
16959 * since s390_can_change_mode_class() rejects
16960 * (subreg:DF (reg:TF %fN) 8) and thus subreg validation fails. */
16961 rtx v1 = gen_rtx_REG (V2DFmode, REGNO (outputs[i]));
16962 rtx v3 = gen_rtx_REG (V2DFmode, REGNO (outputs[i]) + 1);
16963 emit_insn (gen_vec_permiv2df (v1, v1, v3, const0_rtx));
16964 }
16965 else
16966 {
16967 fprx2 = gen_reg_rtx (FPRX2mode);
16968 emit_insn (gen_fprx2_to_tf (outputs[i], fprx2));
16969 }
16970 after_md_seq = get_insns ();
16971 after_md_end = get_last_insn ();
16972 end_sequence ();
16973 outputs[i] = fprx2;
16974 }
16975
16976 for (unsigned i = 0; i < ninputs; i++)
16977 {
16978 if (GET_MODE (inputs[i]) != TFmode)
16979 /* Not a long double - nothing to do. */
16980 continue;
16981 const char *constraint = constraints[noutputs + i];
16982 bool allows_mem, allows_reg;
16983 bool ok = parse_input_constraint (&constraint, i, ninputs, noutputs, 0,
16984 constraints.address (), &allows_mem,
16985 &allows_reg);
16986 gcc_assert (ok);
16987 if (!f_constraint_p (constraint))
16988 /* Long double with a constraint other than "f" (or "=f" for inout
16989 operands) - nothing to do. */
16990 continue;
16991 gcc_assert (allows_reg);
16992 /* Copy input value from a vector register into a FPR pair. */
16993 rtx fprx2;
16994 if (s390_hard_fp_reg_p (inputs[i]))
16995 {
16996 fprx2 = gen_rtx_REG (FPRX2mode, REGNO (inputs[i]));
16997 /* Copy only the second half. */
16998 rtx v1 = gen_rtx_REG (V2DFmode, REGNO (inputs[i]) + 1);
16999 rtx v2 = gen_rtx_REG (V2DFmode, REGNO (inputs[i]));
17000 emit_insn (gen_vec_permiv2df (v1, v2, v1, GEN_INT (3)));
17001 }
17002 else
17003 {
17004 fprx2 = gen_reg_rtx (FPRX2mode);
17005 emit_insn (gen_tf_to_fprx2 (fprx2, inputs[i]));
17006 }
17007 inputs[i] = fprx2;
17008 input_modes[i] = FPRX2mode;
17009 }
17010
17011 return after_md_seq;
17012 }
17013
17014 #define MAX_VECT_LEN 16
17015
17016 struct expand_vec_perm_d
17017 {
17018 rtx target, op0, op1;
17019 unsigned char perm[MAX_VECT_LEN];
17020 machine_mode vmode;
17021 unsigned char nelt;
17022 bool testing_p;
17023 };
17024
17025 /* Try to expand the vector permute operation described by D using the
17026 vector merge instructions vml and vmh. Return true if vector merge
17027 could be used. */
17028 static bool
17029 expand_perm_with_merge (const struct expand_vec_perm_d &d)
17030 {
17031 bool merge_lo_p = true;
17032 bool merge_hi_p = true;
17033
17034 if (d.nelt % 2)
17035 return false;
17036
17037 // For V4SI this checks for: { 0, 4, 1, 5 }
17038 for (int telt = 0; telt < d.nelt; telt++)
17039 if (d.perm[telt] != telt / 2 + (telt % 2) * d.nelt)
17040 {
17041 merge_hi_p = false;
17042 break;
17043 }
17044
17045 if (!merge_hi_p)
17046 {
17047 // For V4SI this checks for: { 2, 6, 3, 7 }
17048 for (int telt = 0; telt < d.nelt; telt++)
17049 if (d.perm[telt] != (telt + d.nelt) / 2 + (telt % 2) * d.nelt)
17050 {
17051 merge_lo_p = false;
17052 break;
17053 }
17054 }
17055 else
17056 merge_lo_p = false;
17057
17058 if (d.testing_p)
17059 return merge_lo_p || merge_hi_p;
17060
17061 if (merge_lo_p || merge_hi_p)
17062 s390_expand_merge (d.target, d.op0, d.op1, merge_hi_p);
17063
17064 return merge_lo_p || merge_hi_p;
17065 }
17066
17067 /* Try to expand the vector permute operation described by D using the
17068 vector permute doubleword immediate instruction vpdi. Return true
17069 if vpdi could be used.
17070
17071 VPDI allows 4 different immediate values (0, 1, 4, 5). The 0 and 5
17072 cases are covered by vmrhg and vmrlg already. So we only care
17073 about the 1, 4 cases here.
17074 1 - First element of src1 and second of src2
17075 4 - Second element of src1 and first of src2 */
17076 static bool
17077 expand_perm_with_vpdi (const struct expand_vec_perm_d &d)
17078 {
17079 bool vpdi1_p = false;
17080 bool vpdi4_p = false;
17081 rtx op0_reg, op1_reg;
17082
17083 // Only V2DI and V2DF are supported here.
17084 if (d.nelt != 2)
17085 return false;
17086
17087 if (d.perm[0] == 0 && d.perm[1] == 3)
17088 vpdi1_p = true;
17089
17090 if (d.perm[0] == 1 && d.perm[1] == 2)
17091 vpdi4_p = true;
17092
17093 if (!vpdi1_p && !vpdi4_p)
17094 return false;
17095
17096 if (d.testing_p)
17097 return true;
17098
17099 op0_reg = force_reg (GET_MODE (d.op0), d.op0);
17100 op1_reg = force_reg (GET_MODE (d.op1), d.op1);
17101
17102 if (vpdi1_p)
17103 emit_insn (gen_vpdi1 (d.vmode, d.target, op0_reg, op1_reg));
17104
17105 if (vpdi4_p)
17106 emit_insn (gen_vpdi4 (d.vmode, d.target, op0_reg, op1_reg));
17107
17108 return true;
17109 }
17110
17111 /* Try to find the best sequence for the vector permute operation
17112 described by D. Return true if the operation could be
17113 expanded. */
17114 static bool
17115 vectorize_vec_perm_const_1 (const struct expand_vec_perm_d &d)
17116 {
17117 if (expand_perm_with_merge (d))
17118 return true;
17119
17120 if (expand_perm_with_vpdi (d))
17121 return true;
17122
17123 return false;
17124 }
17125
17126 /* Return true if we can emit instructions for the constant
17127 permutation vector in SEL. If OUTPUT, IN0, IN1 are non-null the
17128 hook is supposed to emit the required INSNs. */
17129
17130 bool
17131 s390_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, rtx op1,
17132 const vec_perm_indices &sel)
17133 {
17134 struct expand_vec_perm_d d;
17135 unsigned int i, nelt;
17136
17137 if (!s390_vector_mode_supported_p (vmode) || GET_MODE_SIZE (vmode) != 16)
17138 return false;
17139
17140 d.target = target;
17141 d.op0 = op0;
17142 d.op1 = op1;
17143
17144 d.vmode = vmode;
17145 gcc_assert (VECTOR_MODE_P (d.vmode));
17146 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
17147 d.testing_p = target == NULL_RTX;
17148
17149 gcc_assert (target == NULL_RTX || REG_P (target));
17150 gcc_assert (sel.length () == nelt);
17151
17152 for (i = 0; i < nelt; i++)
17153 {
17154 unsigned char e = sel[i];
17155 gcc_assert (e < 2 * nelt);
17156 d.perm[i] = e;
17157 }
17158
17159 return vectorize_vec_perm_const_1 (d);
17160 }
17161
17162 /* Initialize GCC target structure. */
17163
17164 #undef TARGET_ASM_ALIGNED_HI_OP
17165 #define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
17166 #undef TARGET_ASM_ALIGNED_DI_OP
17167 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
17168 #undef TARGET_ASM_INTEGER
17169 #define TARGET_ASM_INTEGER s390_assemble_integer
17170
17171 #undef TARGET_ASM_OPEN_PAREN
17172 #define TARGET_ASM_OPEN_PAREN ""
17173
17174 #undef TARGET_ASM_CLOSE_PAREN
17175 #define TARGET_ASM_CLOSE_PAREN ""
17176
17177 #undef TARGET_OPTION_OVERRIDE
17178 #define TARGET_OPTION_OVERRIDE s390_option_override
17179
17180 #ifdef TARGET_THREAD_SSP_OFFSET
17181 #undef TARGET_STACK_PROTECT_GUARD
17182 #define TARGET_STACK_PROTECT_GUARD hook_tree_void_null
17183 #endif
17184
17185 #undef TARGET_ENCODE_SECTION_INFO
17186 #define TARGET_ENCODE_SECTION_INFO s390_encode_section_info
17187
17188 #undef TARGET_SCALAR_MODE_SUPPORTED_P
17189 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
17190
17191 #ifdef HAVE_AS_TLS
17192 #undef TARGET_HAVE_TLS
17193 #define TARGET_HAVE_TLS true
17194 #endif
17195 #undef TARGET_CANNOT_FORCE_CONST_MEM
17196 #define TARGET_CANNOT_FORCE_CONST_MEM s390_cannot_force_const_mem
17197
17198 #undef TARGET_DELEGITIMIZE_ADDRESS
17199 #define TARGET_DELEGITIMIZE_ADDRESS s390_delegitimize_address
17200
17201 #undef TARGET_LEGITIMIZE_ADDRESS
17202 #define TARGET_LEGITIMIZE_ADDRESS s390_legitimize_address
17203
17204 #undef TARGET_RETURN_IN_MEMORY
17205 #define TARGET_RETURN_IN_MEMORY s390_return_in_memory
17206
17207 #undef TARGET_INIT_BUILTINS
17208 #define TARGET_INIT_BUILTINS s390_init_builtins
17209 #undef TARGET_EXPAND_BUILTIN
17210 #define TARGET_EXPAND_BUILTIN s390_expand_builtin
17211 #undef TARGET_BUILTIN_DECL
17212 #define TARGET_BUILTIN_DECL s390_builtin_decl
17213
17214 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
17215 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA s390_output_addr_const_extra
17216
17217 #undef TARGET_ASM_OUTPUT_MI_THUNK
17218 #define TARGET_ASM_OUTPUT_MI_THUNK s390_output_mi_thunk
17219 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
17220 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
17221
17222 #if ENABLE_S390_EXCESS_FLOAT_PRECISION == 1
17223 /* This hook is only needed to maintain the historic behavior with glibc
17224 versions that typedef float_t to double. */
17225 #undef TARGET_C_EXCESS_PRECISION
17226 #define TARGET_C_EXCESS_PRECISION s390_excess_precision
17227 #endif
17228
17229 #undef TARGET_SCHED_ADJUST_PRIORITY
17230 #define TARGET_SCHED_ADJUST_PRIORITY s390_adjust_priority
17231 #undef TARGET_SCHED_ISSUE_RATE
17232 #define TARGET_SCHED_ISSUE_RATE s390_issue_rate
17233 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
17234 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD s390_first_cycle_multipass_dfa_lookahead
17235
17236 #undef TARGET_SCHED_VARIABLE_ISSUE
17237 #define TARGET_SCHED_VARIABLE_ISSUE s390_sched_variable_issue
17238 #undef TARGET_SCHED_REORDER
17239 #define TARGET_SCHED_REORDER s390_sched_reorder
17240 #undef TARGET_SCHED_INIT
17241 #define TARGET_SCHED_INIT s390_sched_init
17242
17243 #undef TARGET_CANNOT_COPY_INSN_P
17244 #define TARGET_CANNOT_COPY_INSN_P s390_cannot_copy_insn_p
17245 #undef TARGET_RTX_COSTS
17246 #define TARGET_RTX_COSTS s390_rtx_costs
17247 #undef TARGET_ADDRESS_COST
17248 #define TARGET_ADDRESS_COST s390_address_cost
17249 #undef TARGET_REGISTER_MOVE_COST
17250 #define TARGET_REGISTER_MOVE_COST s390_register_move_cost
17251 #undef TARGET_MEMORY_MOVE_COST
17252 #define TARGET_MEMORY_MOVE_COST s390_memory_move_cost
17253 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
17254 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
17255 s390_builtin_vectorization_cost
17256
17257 #undef TARGET_MACHINE_DEPENDENT_REORG
17258 #define TARGET_MACHINE_DEPENDENT_REORG s390_reorg
17259
17260 #undef TARGET_VALID_POINTER_MODE
17261 #define TARGET_VALID_POINTER_MODE s390_valid_pointer_mode
17262
17263 #undef TARGET_BUILD_BUILTIN_VA_LIST
17264 #define TARGET_BUILD_BUILTIN_VA_LIST s390_build_builtin_va_list
17265 #undef TARGET_EXPAND_BUILTIN_VA_START
17266 #define TARGET_EXPAND_BUILTIN_VA_START s390_va_start
17267 #undef TARGET_ASAN_SHADOW_OFFSET
17268 #define TARGET_ASAN_SHADOW_OFFSET s390_asan_shadow_offset
17269 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
17270 #define TARGET_GIMPLIFY_VA_ARG_EXPR s390_gimplify_va_arg
17271
17272 #undef TARGET_PROMOTE_FUNCTION_MODE
17273 #define TARGET_PROMOTE_FUNCTION_MODE s390_promote_function_mode
17274 #undef TARGET_PASS_BY_REFERENCE
17275 #define TARGET_PASS_BY_REFERENCE s390_pass_by_reference
17276
17277 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
17278 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE s390_override_options_after_change
17279
17280 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
17281 #define TARGET_FUNCTION_OK_FOR_SIBCALL s390_function_ok_for_sibcall
17282 #undef TARGET_FUNCTION_ARG
17283 #define TARGET_FUNCTION_ARG s390_function_arg
17284 #undef TARGET_FUNCTION_ARG_ADVANCE
17285 #define TARGET_FUNCTION_ARG_ADVANCE s390_function_arg_advance
17286 #undef TARGET_FUNCTION_ARG_PADDING
17287 #define TARGET_FUNCTION_ARG_PADDING s390_function_arg_padding
17288 #undef TARGET_FUNCTION_VALUE
17289 #define TARGET_FUNCTION_VALUE s390_function_value
17290 #undef TARGET_LIBCALL_VALUE
17291 #define TARGET_LIBCALL_VALUE s390_libcall_value
17292 #undef TARGET_STRICT_ARGUMENT_NAMING
17293 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
17294
17295 #undef TARGET_KEEP_LEAF_WHEN_PROFILED
17296 #define TARGET_KEEP_LEAF_WHEN_PROFILED s390_keep_leaf_when_profiled
17297
17298 #undef TARGET_FIXED_CONDITION_CODE_REGS
17299 #define TARGET_FIXED_CONDITION_CODE_REGS s390_fixed_condition_code_regs
17300
17301 #undef TARGET_CC_MODES_COMPATIBLE
17302 #define TARGET_CC_MODES_COMPATIBLE s390_cc_modes_compatible
17303
17304 #undef TARGET_INVALID_WITHIN_DOLOOP
17305 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null
17306
17307 #ifdef HAVE_AS_TLS
17308 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
17309 #define TARGET_ASM_OUTPUT_DWARF_DTPREL s390_output_dwarf_dtprel
17310 #endif
17311
17312 #undef TARGET_DWARF_FRAME_REG_MODE
17313 #define TARGET_DWARF_FRAME_REG_MODE s390_dwarf_frame_reg_mode
17314
17315 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
17316 #undef TARGET_MANGLE_TYPE
17317 #define TARGET_MANGLE_TYPE s390_mangle_type
17318 #endif
17319
17320 #undef TARGET_SCALAR_MODE_SUPPORTED_P
17321 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
17322
17323 #undef TARGET_VECTOR_MODE_SUPPORTED_P
17324 #define TARGET_VECTOR_MODE_SUPPORTED_P s390_vector_mode_supported_p
17325
17326 #undef TARGET_PREFERRED_RELOAD_CLASS
17327 #define TARGET_PREFERRED_RELOAD_CLASS s390_preferred_reload_class
17328
17329 #undef TARGET_SECONDARY_RELOAD
17330 #define TARGET_SECONDARY_RELOAD s390_secondary_reload
17331 #undef TARGET_SECONDARY_MEMORY_NEEDED
17332 #define TARGET_SECONDARY_MEMORY_NEEDED s390_secondary_memory_needed
17333 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
17334 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE s390_secondary_memory_needed_mode
17335
17336 #undef TARGET_LIBGCC_CMP_RETURN_MODE
17337 #define TARGET_LIBGCC_CMP_RETURN_MODE s390_libgcc_cmp_return_mode
17338
17339 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
17340 #define TARGET_LIBGCC_SHIFT_COUNT_MODE s390_libgcc_shift_count_mode
17341
17342 #undef TARGET_LEGITIMATE_ADDRESS_P
17343 #define TARGET_LEGITIMATE_ADDRESS_P s390_legitimate_address_p
17344
17345 #undef TARGET_LEGITIMATE_CONSTANT_P
17346 #define TARGET_LEGITIMATE_CONSTANT_P s390_legitimate_constant_p
17347
17348 #undef TARGET_LRA_P
17349 #define TARGET_LRA_P s390_lra_p
17350
17351 #undef TARGET_CAN_ELIMINATE
17352 #define TARGET_CAN_ELIMINATE s390_can_eliminate
17353
17354 #undef TARGET_CONDITIONAL_REGISTER_USAGE
17355 #define TARGET_CONDITIONAL_REGISTER_USAGE s390_conditional_register_usage
17356
17357 #undef TARGET_LOOP_UNROLL_ADJUST
17358 #define TARGET_LOOP_UNROLL_ADJUST s390_loop_unroll_adjust
17359
17360 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
17361 #define TARGET_ASM_TRAMPOLINE_TEMPLATE s390_asm_trampoline_template
17362 #undef TARGET_TRAMPOLINE_INIT
17363 #define TARGET_TRAMPOLINE_INIT s390_trampoline_init
17364
17365 /* PR 79421 */
17366 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
17367 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
17368
17369 #undef TARGET_UNWIND_WORD_MODE
17370 #define TARGET_UNWIND_WORD_MODE s390_unwind_word_mode
17371
17372 #undef TARGET_CANONICALIZE_COMPARISON
17373 #define TARGET_CANONICALIZE_COMPARISON s390_canonicalize_comparison
17374
17375 #undef TARGET_HARD_REGNO_SCRATCH_OK
17376 #define TARGET_HARD_REGNO_SCRATCH_OK s390_hard_regno_scratch_ok
17377
17378 #undef TARGET_HARD_REGNO_NREGS
17379 #define TARGET_HARD_REGNO_NREGS s390_hard_regno_nregs
17380 #undef TARGET_HARD_REGNO_MODE_OK
17381 #define TARGET_HARD_REGNO_MODE_OK s390_hard_regno_mode_ok
17382 #undef TARGET_MODES_TIEABLE_P
17383 #define TARGET_MODES_TIEABLE_P s390_modes_tieable_p
17384
17385 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
17386 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
17387 s390_hard_regno_call_part_clobbered
17388
17389 #undef TARGET_ATTRIBUTE_TABLE
17390 #define TARGET_ATTRIBUTE_TABLE s390_attribute_table
17391
17392 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
17393 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
17394
17395 #undef TARGET_SET_UP_BY_PROLOGUE
17396 #define TARGET_SET_UP_BY_PROLOGUE s300_set_up_by_prologue
17397
17398 #undef TARGET_EXTRA_LIVE_ON_ENTRY
17399 #define TARGET_EXTRA_LIVE_ON_ENTRY s390_live_on_entry
17400
17401 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
17402 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
17403 s390_use_by_pieces_infrastructure_p
17404
17405 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
17406 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV s390_atomic_assign_expand_fenv
17407
17408 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
17409 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN s390_invalid_arg_for_unprototyped_fn
17410
17411 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
17412 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE s390_preferred_simd_mode
17413
17414 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
17415 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT s390_support_vector_misalignment
17416
17417 #undef TARGET_VECTOR_ALIGNMENT
17418 #define TARGET_VECTOR_ALIGNMENT s390_vector_alignment
17419
17420 #undef TARGET_INVALID_BINARY_OP
17421 #define TARGET_INVALID_BINARY_OP s390_invalid_binary_op
17422
17423 #ifdef HAVE_AS_MACHINE_MACHINEMODE
17424 #undef TARGET_ASM_FILE_START
17425 #define TARGET_ASM_FILE_START s390_asm_file_start
17426 #endif
17427
17428 #undef TARGET_ASM_FILE_END
17429 #define TARGET_ASM_FILE_END s390_asm_file_end
17430
17431 #undef TARGET_SET_CURRENT_FUNCTION
17432 #define TARGET_SET_CURRENT_FUNCTION s390_set_current_function
17433
17434 #if S390_USE_TARGET_ATTRIBUTE
17435 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
17436 #define TARGET_OPTION_VALID_ATTRIBUTE_P s390_valid_target_attribute_p
17437
17438 #undef TARGET_CAN_INLINE_P
17439 #define TARGET_CAN_INLINE_P s390_can_inline_p
17440 #endif
17441
17442 #undef TARGET_OPTION_RESTORE
17443 #define TARGET_OPTION_RESTORE s390_function_specific_restore
17444
17445 #undef TARGET_CAN_CHANGE_MODE_CLASS
17446 #define TARGET_CAN_CHANGE_MODE_CLASS s390_can_change_mode_class
17447
17448 #undef TARGET_CONSTANT_ALIGNMENT
17449 #define TARGET_CONSTANT_ALIGNMENT s390_constant_alignment
17450
17451 #undef TARGET_ASM_CODE_END
17452 #define TARGET_ASM_CODE_END s390_code_end
17453
17454 #undef TARGET_CASE_VALUES_THRESHOLD
17455 #define TARGET_CASE_VALUES_THRESHOLD s390_case_values_threshold
17456
17457 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
17458 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
17459 s390_sched_dependencies_evaluation
17460
17461 #undef TARGET_SHIFT_TRUNCATION_MASK
17462 #define TARGET_SHIFT_TRUNCATION_MASK s390_shift_truncation_mask
17463
17464 /* Use only short displacement, since long displacement is not available for
17465 the floating point instructions. */
17466 #undef TARGET_MAX_ANCHOR_OFFSET
17467 #define TARGET_MAX_ANCHOR_OFFSET 0xfff
17468
17469 #undef TARGET_MD_ASM_ADJUST
17470 #define TARGET_MD_ASM_ADJUST s390_md_asm_adjust
17471
17472 #undef TARGET_VECTORIZE_VEC_PERM_CONST
17473 #define TARGET_VECTORIZE_VEC_PERM_CONST s390_vectorize_vec_perm_const
17474
17475
17476 struct gcc_target targetm = TARGET_INITIALIZER;
17477
17478 #include "gt-s390.h"