]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/gimple-ssa-store-merging.cc
fortran: Add -finline-intrinsics flag for MINLOC/MAXLOC [PR90608]
[thirdparty/gcc.git] / gcc / gimple-ssa-store-merging.cc
CommitLineData
dffec8eb 1/* GIMPLE store merging and byte swapping passes.
a945c346 2 Copyright (C) 2009-2024 Free Software Foundation, Inc.
f663d9ad
KT
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
c94c3532
EB
21/* The purpose of the store merging pass is to combine multiple memory stores
22 of constant values, values loaded from memory, bitwise operations on those,
23 or bit-field values, to consecutive locations, into fewer wider stores.
24
f663d9ad
KT
25 For example, if we have a sequence peforming four byte stores to
26 consecutive memory locations:
27 [p ] := imm1;
28 [p + 1B] := imm2;
29 [p + 2B] := imm3;
30 [p + 3B] := imm4;
31 we can transform this into a single 4-byte store if the target supports it:
c94c3532 32 [p] := imm1:imm2:imm3:imm4 concatenated according to endianness.
f663d9ad 33
245f6de1
JJ
34 Or:
35 [p ] := [q ];
36 [p + 1B] := [q + 1B];
37 [p + 2B] := [q + 2B];
38 [p + 3B] := [q + 3B];
39 if there is no overlap can be transformed into a single 4-byte
40 load followed by single 4-byte store.
41
42 Or:
43 [p ] := [q ] ^ imm1;
44 [p + 1B] := [q + 1B] ^ imm2;
45 [p + 2B] := [q + 2B] ^ imm3;
46 [p + 3B] := [q + 3B] ^ imm4;
47 if there is no overlap can be transformed into a single 4-byte
48 load, xored with imm1:imm2:imm3:imm4 and stored using a single 4-byte store.
49
c94c3532
EB
50 Or:
51 [p:1 ] := imm;
52 [p:31] := val & 0x7FFFFFFF;
53 we can transform this into a single 4-byte store if the target supports it:
54 [p] := imm:(val & 0x7FFFFFFF) concatenated according to endianness.
55
f663d9ad
KT
56 The algorithm is applied to each basic block in three phases:
57
c94c3532
EB
58 1) Scan through the basic block and record assignments to destinations
59 that can be expressed as a store to memory of a certain size at a certain
60 bit offset from base expressions we can handle. For bit-fields we also
61 record the surrounding bit region, i.e. bits that could be stored in
245f6de1
JJ
62 a read-modify-write operation when storing the bit-field. Record store
63 chains to different bases in a hash_map (m_stores) and make sure to
700d4cb0 64 terminate such chains when appropriate (for example when the stored
245f6de1 65 values get used subsequently).
f663d9ad
KT
66 These stores can be a result of structure element initializers, array stores
67 etc. A store_immediate_info object is recorded for every such store.
68 Record as many such assignments to a single base as possible until a
69 statement that interferes with the store sequence is encountered.
c94c3532
EB
70 Each store has up to 2 operands, which can be a either constant, a memory
71 load or an SSA name, from which the value to be stored can be computed.
245f6de1
JJ
72 At most one of the operands can be a constant. The operands are recorded
73 in store_operand_info struct.
f663d9ad 74
c94c3532 75 2) Analyze the chains of stores recorded in phase 1) (i.e. the vector of
f663d9ad 76 store_immediate_info objects) and coalesce contiguous stores into
c94c3532 77 merged_store_group objects. For bit-field stores, we don't need to
245f6de1
JJ
78 require the stores to be contiguous, just their surrounding bit regions
79 have to be contiguous. If the expression being stored is different
80 between adjacent stores, such as one store storing a constant and
81 following storing a value loaded from memory, or if the loaded memory
82 objects are not adjacent, a new merged_store_group is created as well.
f663d9ad
KT
83
84 For example, given the stores:
85 [p ] := 0;
86 [p + 1B] := 1;
87 [p + 3B] := 0;
88 [p + 4B] := 1;
89 [p + 5B] := 0;
90 [p + 6B] := 0;
91 This phase would produce two merged_store_group objects, one recording the
92 two bytes stored in the memory region [p : p + 1] and another
93 recording the four bytes stored in the memory region [p + 3 : p + 6].
94
95 3) The merged_store_group objects produced in phase 2) are processed
96 to generate the sequence of wider stores that set the contiguous memory
97 regions to the sequence of bytes that correspond to it. This may emit
98 multiple stores per store group to handle contiguous stores that are not
99 of a size that is a power of 2. For example it can try to emit a 40-bit
100 store as a 32-bit store followed by an 8-bit store.
c94c3532
EB
101 We try to emit as wide stores as we can while respecting STRICT_ALIGNMENT
102 or TARGET_SLOW_UNALIGNED_ACCESS settings.
f663d9ad
KT
103
104 Note on endianness and example:
105 Consider 2 contiguous 16-bit stores followed by 2 contiguous 8-bit stores:
106 [p ] := 0x1234;
107 [p + 2B] := 0x5678;
108 [p + 4B] := 0xab;
109 [p + 5B] := 0xcd;
110
111 The memory layout for little-endian (LE) and big-endian (BE) must be:
112 p |LE|BE|
113 ---------
114 0 |34|12|
115 1 |12|34|
116 2 |78|56|
117 3 |56|78|
118 4 |ab|ab|
119 5 |cd|cd|
120
121 To merge these into a single 48-bit merged value 'val' in phase 2)
122 on little-endian we insert stores to higher (consecutive) bitpositions
123 into the most significant bits of the merged value.
124 The final merged value would be: 0xcdab56781234
125
126 For big-endian we insert stores to higher bitpositions into the least
127 significant bits of the merged value.
128 The final merged value would be: 0x12345678abcd
129
130 Then, in phase 3), we want to emit this 48-bit value as a 32-bit store
131 followed by a 16-bit store. Again, we must consider endianness when
132 breaking down the 48-bit value 'val' computed above.
133 For little endian we emit:
134 [p] (32-bit) := 0x56781234; // val & 0x0000ffffffff;
135 [p + 4B] (16-bit) := 0xcdab; // (val & 0xffff00000000) >> 32;
136
137 Whereas for big-endian we emit:
138 [p] (32-bit) := 0x12345678; // (val & 0xffffffff0000) >> 16;
139 [p + 4B] (16-bit) := 0xabcd; // val & 0x00000000ffff; */
140
141#include "config.h"
142#include "system.h"
143#include "coretypes.h"
144#include "backend.h"
145#include "tree.h"
146#include "gimple.h"
147#include "builtins.h"
148#include "fold-const.h"
149#include "tree-pass.h"
150#include "ssa.h"
151#include "gimple-pretty-print.h"
152#include "alias.h"
153#include "fold-const.h"
f663d9ad
KT
154#include "print-tree.h"
155#include "tree-hash-traits.h"
156#include "gimple-iterator.h"
157#include "gimplify.h"
c94c3532 158#include "gimple-fold.h"
f663d9ad
KT
159#include "stor-layout.h"
160#include "timevar.h"
629387a6
EB
161#include "cfganal.h"
162#include "cfgcleanup.h"
f663d9ad 163#include "tree-cfg.h"
629387a6 164#include "except.h"
f663d9ad
KT
165#include "tree-eh.h"
166#include "target.h"
aa55dc0c 167#include "gimplify-me.h"
a62b3dc5
JJ
168#include "rtl.h"
169#include "expr.h" /* For get_bit_range. */
dffec8eb 170#include "optabs-tree.h"
a95b474a 171#include "dbgcnt.h"
c22d8787 172#include "selftest.h"
f663d9ad
KT
173
174/* The maximum size (in bits) of the stores this pass should generate. */
175#define MAX_STORE_BITSIZE (BITS_PER_WORD)
176#define MAX_STORE_BYTES (MAX_STORE_BITSIZE / BITS_PER_UNIT)
177
245f6de1
JJ
178/* Limit to bound the number of aliasing checks for loads with the same
179 vuse as the corresponding store. */
180#define MAX_STORE_ALIAS_CHECKS 64
181
f663d9ad
KT
182namespace {
183
bebadeca 184struct bswap_stat
dffec8eb
JJ
185{
186 /* Number of hand-written 16-bit nop / bswaps found. */
187 int found_16bit;
188
189 /* Number of hand-written 32-bit nop / bswaps found. */
190 int found_32bit;
191
192 /* Number of hand-written 64-bit nop / bswaps found. */
193 int found_64bit;
194} nop_stats, bswap_stats;
195
196/* A symbolic number structure is used to detect byte permutation and selection
197 patterns of a source. To achieve that, its field N contains an artificial
198 number consisting of BITS_PER_MARKER sized markers tracking where does each
199 byte come from in the source:
200
201 0 - target byte has the value 0
202 FF - target byte has an unknown value (eg. due to sign extension)
203 1..size - marker value is the byte index in the source (0 for lsb).
204
205 To detect permutations on memory sources (arrays and structures), a symbolic
206 number is also associated:
207 - a base address BASE_ADDR and an OFFSET giving the address of the source;
208 - a range which gives the difference between the highest and lowest accessed
209 memory location to make such a symbolic number;
210 - the address SRC of the source element of lowest address as a convenience
211 to easily get BASE_ADDR + offset + lowest bytepos;
212 - number of expressions N_OPS bitwise ored together to represent
213 approximate cost of the computation.
214
215 Note 1: the range is different from size as size reflects the size of the
216 type of the current expression. For instance, for an array char a[],
217 (short) a[0] | (short) a[3] would have a size of 2 but a range of 4 while
218 (short) a[0] | ((short) a[0] << 1) would still have a size of 2 but this
219 time a range of 1.
220
221 Note 2: for non-memory sources, range holds the same value as size.
222
223 Note 3: SRC points to the SSA_NAME in case of non-memory source. */
224
225struct symbolic_number {
226 uint64_t n;
227 tree type;
228 tree base_addr;
229 tree offset;
eaa41a6d 230 poly_int64 bytepos;
dffec8eb
JJ
231 tree src;
232 tree alias_set;
233 tree vuse;
234 unsigned HOST_WIDE_INT range;
235 int n_ops;
236};
237
238#define BITS_PER_MARKER 8
239#define MARKER_MASK ((1 << BITS_PER_MARKER) - 1)
240#define MARKER_BYTE_UNKNOWN MARKER_MASK
241#define HEAD_MARKER(n, size) \
242 ((n) & ((uint64_t) MARKER_MASK << (((size) - 1) * BITS_PER_MARKER)))
243
244/* The number which the find_bswap_or_nop_1 result should match in
245 order to have a nop. The number is masked according to the size of
246 the symbolic number before using it. */
247#define CMPNOP (sizeof (int64_t) < 8 ? 0 : \
248 (uint64_t)0x08070605 << 32 | 0x04030201)
249
250/* The number which the find_bswap_or_nop_1 result should match in
251 order to have a byte swap. The number is masked according to the
252 size of the symbolic number before using it. */
253#define CMPXCHG (sizeof (int64_t) < 8 ? 0 : \
254 (uint64_t)0x01020304 << 32 | 0x05060708)
255
256/* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
257 number N. Return false if the requested operation is not permitted
258 on a symbolic number. */
259
260inline bool
261do_shift_rotate (enum tree_code code,
262 struct symbolic_number *n,
263 int count)
264{
265 int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
f675afa4 266 uint64_t head_marker;
dffec8eb 267
444cda74
JJ
268 if (count < 0
269 || count >= TYPE_PRECISION (n->type)
270 || count % BITS_PER_UNIT != 0)
dffec8eb
JJ
271 return false;
272 count = (count / BITS_PER_UNIT) * BITS_PER_MARKER;
273
274 /* Zero out the extra bits of N in order to avoid them being shifted
275 into the significant bits. */
276 if (size < 64 / BITS_PER_MARKER)
277 n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
278
279 switch (code)
280 {
281 case LSHIFT_EXPR:
282 n->n <<= count;
283 break;
284 case RSHIFT_EXPR:
285 head_marker = HEAD_MARKER (n->n, size);
286 n->n >>= count;
287 /* Arithmetic shift of signed type: result is dependent on the value. */
288 if (!TYPE_UNSIGNED (n->type) && head_marker)
289 for (i = 0; i < count / BITS_PER_MARKER; i++)
290 n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
291 << ((size - 1 - i) * BITS_PER_MARKER);
292 break;
293 case LROTATE_EXPR:
294 n->n = (n->n << count) | (n->n >> ((size * BITS_PER_MARKER) - count));
295 break;
296 case RROTATE_EXPR:
297 n->n = (n->n >> count) | (n->n << ((size * BITS_PER_MARKER) - count));
298 break;
299 default:
300 return false;
301 }
302 /* Zero unused bits for size. */
303 if (size < 64 / BITS_PER_MARKER)
304 n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
305 return true;
306}
307
308/* Perform sanity checking for the symbolic number N and the gimple
309 statement STMT. */
310
311inline bool
312verify_symbolic_number_p (struct symbolic_number *n, gimple *stmt)
313{
314 tree lhs_type;
315
650c70a9 316 lhs_type = TREE_TYPE (gimple_get_lhs (stmt));
dffec8eb 317
5ea39b24
JJ
318 if (TREE_CODE (lhs_type) != INTEGER_TYPE
319 && TREE_CODE (lhs_type) != ENUMERAL_TYPE)
dffec8eb
JJ
320 return false;
321
322 if (TYPE_PRECISION (lhs_type) != TYPE_PRECISION (n->type))
323 return false;
324
325 return true;
326}
327
328/* Initialize the symbolic number N for the bswap pass from the base element
329 SRC manipulated by the bitwise OR expression. */
330
331bool
332init_symbolic_number (struct symbolic_number *n, tree src)
333{
334 int size;
335
5b9a65ec 336 if (!INTEGRAL_TYPE_P (TREE_TYPE (src)) && !POINTER_TYPE_P (TREE_TYPE (src)))
dffec8eb
JJ
337 return false;
338
339 n->base_addr = n->offset = n->alias_set = n->vuse = NULL_TREE;
340 n->src = src;
341
342 /* Set up the symbolic number N by setting each byte to a value between 1 and
343 the byte size of rhs1. The highest order byte is set to n->size and the
344 lowest order byte to 1. */
345 n->type = TREE_TYPE (src);
346 size = TYPE_PRECISION (n->type);
347 if (size % BITS_PER_UNIT != 0)
348 return false;
349 size /= BITS_PER_UNIT;
350 if (size > 64 / BITS_PER_MARKER)
351 return false;
352 n->range = size;
353 n->n = CMPNOP;
354 n->n_ops = 1;
355
356 if (size < 64 / BITS_PER_MARKER)
357 n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
358
359 return true;
360}
361
362/* Check if STMT might be a byte swap or a nop from a memory source and returns
363 the answer. If so, REF is that memory source and the base of the memory area
364 accessed and the offset of the access from that base are recorded in N. */
365
d8b05aef 366static bool
dffec8eb
JJ
367find_bswap_or_nop_load (gimple *stmt, tree ref, struct symbolic_number *n)
368{
369 /* Leaf node is an array or component ref. Memorize its base and
370 offset from base to compare to other such leaf node. */
f37fac2b 371 poly_int64 bitsize, bitpos, bytepos;
dffec8eb
JJ
372 machine_mode mode;
373 int unsignedp, reversep, volatilep;
374 tree offset, base_addr;
375
376 /* Not prepared to handle PDP endian. */
377 if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
378 return false;
379
380 if (!gimple_assign_load_p (stmt) || gimple_has_volatile_ops (stmt))
381 return false;
382
383 base_addr = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
384 &unsignedp, &reversep, &volatilep);
385
4b84d9b8
JJ
386 if (TREE_CODE (base_addr) == TARGET_MEM_REF)
387 /* Do not rewrite TARGET_MEM_REF. */
388 return false;
389 else if (TREE_CODE (base_addr) == MEM_REF)
dffec8eb 390 {
3fed2ce9 391 poly_offset_int bit_offset = 0;
dffec8eb
JJ
392 tree off = TREE_OPERAND (base_addr, 1);
393
394 if (!integer_zerop (off))
395 {
3fed2ce9
RS
396 poly_offset_int boff = mem_ref_offset (base_addr);
397 boff <<= LOG2_BITS_PER_UNIT;
dffec8eb
JJ
398 bit_offset += boff;
399 }
400
401 base_addr = TREE_OPERAND (base_addr, 0);
402
403 /* Avoid returning a negative bitpos as this may wreak havoc later. */
3fed2ce9 404 if (maybe_lt (bit_offset, 0))
dffec8eb 405 {
3fed2ce9
RS
406 tree byte_offset = wide_int_to_tree
407 (sizetype, bits_to_bytes_round_down (bit_offset));
408 bit_offset = num_trailing_bits (bit_offset);
dffec8eb 409 if (offset)
3fed2ce9 410 offset = size_binop (PLUS_EXPR, offset, byte_offset);
dffec8eb 411 else
3fed2ce9 412 offset = byte_offset;
dffec8eb
JJ
413 }
414
3fed2ce9 415 bitpos += bit_offset.force_shwi ();
dffec8eb 416 }
4b84d9b8
JJ
417 else
418 base_addr = build_fold_addr_expr (base_addr);
dffec8eb 419
f37fac2b 420 if (!multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
dffec8eb 421 return false;
f37fac2b 422 if (!multiple_p (bitsize, BITS_PER_UNIT))
dffec8eb
JJ
423 return false;
424 if (reversep)
425 return false;
426
427 if (!init_symbolic_number (n, ref))
428 return false;
429 n->base_addr = base_addr;
430 n->offset = offset;
f37fac2b 431 n->bytepos = bytepos;
dffec8eb
JJ
432 n->alias_set = reference_alias_ptr_type (ref);
433 n->vuse = gimple_vuse (stmt);
434 return true;
435}
436
04eccbbe
JJ
437/* Compute the symbolic number N representing the result of a bitwise OR,
438 bitwise XOR or plus on 2 symbolic number N1 and N2 whose source statements
439 are respectively SOURCE_STMT1 and SOURCE_STMT2. CODE is the operation. */
dffec8eb
JJ
440
441gimple *
442perform_symbolic_merge (gimple *source_stmt1, struct symbolic_number *n1,
443 gimple *source_stmt2, struct symbolic_number *n2,
04eccbbe 444 struct symbolic_number *n, enum tree_code code)
dffec8eb
JJ
445{
446 int i, size;
447 uint64_t mask;
448 gimple *source_stmt;
449 struct symbolic_number *n_start;
450
451 tree rhs1 = gimple_assign_rhs1 (source_stmt1);
452 if (TREE_CODE (rhs1) == BIT_FIELD_REF
453 && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
454 rhs1 = TREE_OPERAND (rhs1, 0);
455 tree rhs2 = gimple_assign_rhs1 (source_stmt2);
456 if (TREE_CODE (rhs2) == BIT_FIELD_REF
457 && TREE_CODE (TREE_OPERAND (rhs2, 0)) == SSA_NAME)
458 rhs2 = TREE_OPERAND (rhs2, 0);
459
460 /* Sources are different, cancel bswap if they are not memory location with
461 the same base (array, structure, ...). */
462 if (rhs1 != rhs2)
463 {
464 uint64_t inc;
4a022c70 465 HOST_WIDE_INT start1, start2, start_sub, end_sub, end1, end2, end;
dffec8eb
JJ
466 struct symbolic_number *toinc_n_ptr, *n_end;
467 basic_block bb1, bb2;
468
469 if (!n1->base_addr || !n2->base_addr
470 || !operand_equal_p (n1->base_addr, n2->base_addr, 0))
471 return NULL;
472
473 if (!n1->offset != !n2->offset
474 || (n1->offset && !operand_equal_p (n1->offset, n2->offset, 0)))
475 return NULL;
476
4a022c70
RS
477 start1 = 0;
478 if (!(n2->bytepos - n1->bytepos).is_constant (&start2))
479 return NULL;
480
481 if (start1 < start2)
dffec8eb
JJ
482 {
483 n_start = n1;
4a022c70 484 start_sub = start2 - start1;
dffec8eb
JJ
485 }
486 else
487 {
488 n_start = n2;
4a022c70 489 start_sub = start1 - start2;
dffec8eb
JJ
490 }
491
492 bb1 = gimple_bb (source_stmt1);
493 bb2 = gimple_bb (source_stmt2);
494 if (dominated_by_p (CDI_DOMINATORS, bb1, bb2))
495 source_stmt = source_stmt1;
496 else
497 source_stmt = source_stmt2;
498
499 /* Find the highest address at which a load is performed and
500 compute related info. */
4a022c70
RS
501 end1 = start1 + (n1->range - 1);
502 end2 = start2 + (n2->range - 1);
dffec8eb
JJ
503 if (end1 < end2)
504 {
505 end = end2;
506 end_sub = end2 - end1;
507 }
508 else
509 {
510 end = end1;
511 end_sub = end1 - end2;
512 }
513 n_end = (end2 > end1) ? n2 : n1;
514
515 /* Find symbolic number whose lsb is the most significant. */
516 if (BYTES_BIG_ENDIAN)
517 toinc_n_ptr = (n_end == n1) ? n2 : n1;
518 else
519 toinc_n_ptr = (n_start == n1) ? n2 : n1;
520
4a022c70 521 n->range = end - MIN (start1, start2) + 1;
dffec8eb
JJ
522
523 /* Check that the range of memory covered can be represented by
524 a symbolic number. */
525 if (n->range > 64 / BITS_PER_MARKER)
526 return NULL;
527
528 /* Reinterpret byte marks in symbolic number holding the value of
529 bigger weight according to target endianness. */
530 inc = BYTES_BIG_ENDIAN ? end_sub : start_sub;
531 size = TYPE_PRECISION (n1->type) / BITS_PER_UNIT;
532 for (i = 0; i < size; i++, inc <<= BITS_PER_MARKER)
533 {
534 unsigned marker
535 = (toinc_n_ptr->n >> (i * BITS_PER_MARKER)) & MARKER_MASK;
536 if (marker && marker != MARKER_BYTE_UNKNOWN)
537 toinc_n_ptr->n += inc;
538 }
539 }
540 else
541 {
542 n->range = n1->range;
543 n_start = n1;
544 source_stmt = source_stmt1;
545 }
546
547 if (!n1->alias_set
548 || alias_ptr_types_compatible_p (n1->alias_set, n2->alias_set))
549 n->alias_set = n1->alias_set;
550 else
551 n->alias_set = ptr_type_node;
552 n->vuse = n_start->vuse;
553 n->base_addr = n_start->base_addr;
554 n->offset = n_start->offset;
555 n->src = n_start->src;
556 n->bytepos = n_start->bytepos;
557 n->type = n_start->type;
558 size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
531dae29 559 uint64_t res_n = n1->n | n2->n;
dffec8eb
JJ
560
561 for (i = 0, mask = MARKER_MASK; i < size; i++, mask <<= BITS_PER_MARKER)
562 {
563 uint64_t masked1, masked2;
564
565 masked1 = n1->n & mask;
566 masked2 = n2->n & mask;
531dae29
JJ
567 /* If at least one byte is 0, all of 0 | x == 0 ^ x == 0 + x == x. */
568 if (masked1 && masked2)
569 {
570 /* + can carry into upper bits, just punt. */
571 if (code == PLUS_EXPR)
572 return NULL;
573 /* x | x is still x. */
574 if (code == BIT_IOR_EXPR && masked1 == masked2)
575 continue;
576 if (code == BIT_XOR_EXPR)
577 {
578 /* x ^ x is 0, but MARKER_BYTE_UNKNOWN stands for
579 unknown values and unknown ^ unknown is unknown. */
580 if (masked1 == masked2
581 && masked1 != ((uint64_t) MARKER_BYTE_UNKNOWN
582 << i * BITS_PER_MARKER))
583 {
584 res_n &= ~mask;
585 continue;
586 }
587 }
588 /* Otherwise set the byte to unknown, it might still be
589 later masked off. */
590 res_n |= mask;
591 }
dffec8eb 592 }
531dae29 593 n->n = res_n;
dffec8eb
JJ
594 n->n_ops = n1->n_ops + n2->n_ops;
595
596 return source_stmt;
597}
598
599/* find_bswap_or_nop_1 invokes itself recursively with N and tries to perform
600 the operation given by the rhs of STMT on the result. If the operation
601 could successfully be executed the function returns a gimple stmt whose
602 rhs's first tree is the expression of the source operand and NULL
603 otherwise. */
604
605gimple *
606find_bswap_or_nop_1 (gimple *stmt, struct symbolic_number *n, int limit)
607{
608 enum tree_code code;
609 tree rhs1, rhs2 = NULL;
610 gimple *rhs1_stmt, *rhs2_stmt, *source_stmt1;
611 enum gimple_rhs_class rhs_class;
612
d8b05aef
RS
613 if (!limit
614 || !is_gimple_assign (stmt)
615 || stmt_can_throw_internal (cfun, stmt))
dffec8eb
JJ
616 return NULL;
617
618 rhs1 = gimple_assign_rhs1 (stmt);
619
620 if (find_bswap_or_nop_load (stmt, rhs1, n))
621 return stmt;
622
623 /* Handle BIT_FIELD_REF. */
624 if (TREE_CODE (rhs1) == BIT_FIELD_REF
625 && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
626 {
35cf3c55
KZ
627 if (!tree_fits_uhwi_p (TREE_OPERAND (rhs1, 1))
628 || !tree_fits_uhwi_p (TREE_OPERAND (rhs1, 2)))
629 return NULL;
630
dffec8eb
JJ
631 unsigned HOST_WIDE_INT bitsize = tree_to_uhwi (TREE_OPERAND (rhs1, 1));
632 unsigned HOST_WIDE_INT bitpos = tree_to_uhwi (TREE_OPERAND (rhs1, 2));
633 if (bitpos % BITS_PER_UNIT == 0
634 && bitsize % BITS_PER_UNIT == 0
635 && init_symbolic_number (n, TREE_OPERAND (rhs1, 0)))
636 {
637 /* Handle big-endian bit numbering in BIT_FIELD_REF. */
638 if (BYTES_BIG_ENDIAN)
639 bitpos = TYPE_PRECISION (n->type) - bitpos - bitsize;
640
641 /* Shift. */
642 if (!do_shift_rotate (RSHIFT_EXPR, n, bitpos))
643 return NULL;
644
645 /* Mask. */
646 uint64_t mask = 0;
647 uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
648 for (unsigned i = 0; i < bitsize / BITS_PER_UNIT;
649 i++, tmp <<= BITS_PER_UNIT)
650 mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
651 n->n &= mask;
652
653 /* Convert. */
654 n->type = TREE_TYPE (rhs1);
4f8e31e0
RB
655 if (!verify_symbolic_number_p (n, stmt))
656 return NULL;
657
dffec8eb
JJ
658 if (!n->base_addr)
659 n->range = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
660
4f8e31e0 661 return stmt;
dffec8eb
JJ
662 }
663
664 return NULL;
665 }
666
667 if (TREE_CODE (rhs1) != SSA_NAME)
668 return NULL;
669
670 code = gimple_assign_rhs_code (stmt);
671 rhs_class = gimple_assign_rhs_class (stmt);
672 rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
673
674 if (rhs_class == GIMPLE_BINARY_RHS)
675 rhs2 = gimple_assign_rhs2 (stmt);
676
677 /* Handle unary rhs and binary rhs with integer constants as second
678 operand. */
679
680 if (rhs_class == GIMPLE_UNARY_RHS
681 || (rhs_class == GIMPLE_BINARY_RHS
682 && TREE_CODE (rhs2) == INTEGER_CST))
683 {
684 if (code != BIT_AND_EXPR
685 && code != LSHIFT_EXPR
686 && code != RSHIFT_EXPR
687 && code != LROTATE_EXPR
688 && code != RROTATE_EXPR
689 && !CONVERT_EXPR_CODE_P (code))
690 return NULL;
691
692 source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, n, limit - 1);
693
694 /* If find_bswap_or_nop_1 returned NULL, STMT is a leaf node and
695 we have to initialize the symbolic number. */
696 if (!source_stmt1)
697 {
698 if (gimple_assign_load_p (stmt)
699 || !init_symbolic_number (n, rhs1))
700 return NULL;
701 source_stmt1 = stmt;
702 }
703
704 switch (code)
705 {
706 case BIT_AND_EXPR:
707 {
708 int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
709 uint64_t val = int_cst_value (rhs2), mask = 0;
710 uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
711
712 /* Only constants masking full bytes are allowed. */
713 for (i = 0; i < size; i++, tmp <<= BITS_PER_UNIT)
714 if ((val & tmp) != 0 && (val & tmp) != tmp)
715 return NULL;
716 else if (val & tmp)
717 mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
718
719 n->n &= mask;
720 }
721 break;
722 case LSHIFT_EXPR:
723 case RSHIFT_EXPR:
724 case LROTATE_EXPR:
725 case RROTATE_EXPR:
726 if (!do_shift_rotate (code, n, (int) TREE_INT_CST_LOW (rhs2)))
727 return NULL;
728 break;
729 CASE_CONVERT:
730 {
731 int i, type_size, old_type_size;
732 tree type;
733
650c70a9 734 type = TREE_TYPE (gimple_assign_lhs (stmt));
dffec8eb
JJ
735 type_size = TYPE_PRECISION (type);
736 if (type_size % BITS_PER_UNIT != 0)
737 return NULL;
738 type_size /= BITS_PER_UNIT;
739 if (type_size > 64 / BITS_PER_MARKER)
740 return NULL;
741
742 /* Sign extension: result is dependent on the value. */
743 old_type_size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
744 if (!TYPE_UNSIGNED (n->type) && type_size > old_type_size
745 && HEAD_MARKER (n->n, old_type_size))
746 for (i = 0; i < type_size - old_type_size; i++)
747 n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
748 << ((type_size - 1 - i) * BITS_PER_MARKER);
749
750 if (type_size < 64 / BITS_PER_MARKER)
751 {
752 /* If STMT casts to a smaller type mask out the bits not
753 belonging to the target type. */
754 n->n &= ((uint64_t) 1 << (type_size * BITS_PER_MARKER)) - 1;
755 }
756 n->type = type;
757 if (!n->base_addr)
758 n->range = type_size;
759 }
760 break;
761 default:
762 return NULL;
763 };
764 return verify_symbolic_number_p (n, stmt) ? source_stmt1 : NULL;
765 }
766
767 /* Handle binary rhs. */
768
769 if (rhs_class == GIMPLE_BINARY_RHS)
770 {
771 struct symbolic_number n1, n2;
772 gimple *source_stmt, *source_stmt2;
773
a944b5de 774 if (!rhs2 || TREE_CODE (rhs2) != SSA_NAME)
dffec8eb
JJ
775 return NULL;
776
777 rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
778
779 switch (code)
780 {
781 case BIT_IOR_EXPR:
a944b5de
RS
782 case BIT_XOR_EXPR:
783 case PLUS_EXPR:
dffec8eb
JJ
784 source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, &n1, limit - 1);
785
786 if (!source_stmt1)
787 return NULL;
788
789 source_stmt2 = find_bswap_or_nop_1 (rhs2_stmt, &n2, limit - 1);
790
791 if (!source_stmt2)
792 return NULL;
793
794 if (TYPE_PRECISION (n1.type) != TYPE_PRECISION (n2.type))
795 return NULL;
796
4b84d9b8 797 if (n1.vuse != n2.vuse)
dffec8eb
JJ
798 return NULL;
799
800 source_stmt
04eccbbe
JJ
801 = perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, n,
802 code);
dffec8eb
JJ
803
804 if (!source_stmt)
805 return NULL;
806
807 if (!verify_symbolic_number_p (n, stmt))
808 return NULL;
809
810 break;
811 default:
812 return NULL;
813 }
814 return source_stmt;
815 }
816 return NULL;
817}
818
4b84d9b8
JJ
819/* Helper for find_bswap_or_nop and try_coalesce_bswap to compute
820 *CMPXCHG, *CMPNOP and adjust *N. */
dffec8eb 821
4b84d9b8
JJ
822void
823find_bswap_or_nop_finalize (struct symbolic_number *n, uint64_t *cmpxchg,
b320edc0 824 uint64_t *cmpnop, bool *cast64_to_32)
dffec8eb
JJ
825{
826 unsigned rsize;
827 uint64_t tmpn, mask;
dffec8eb 828
4b84d9b8
JJ
829 /* The number which the find_bswap_or_nop_1 result should match in order
830 to have a full byte swap. The number is shifted to the right
831 according to the size of the symbolic number before using it. */
832 *cmpxchg = CMPXCHG;
833 *cmpnop = CMPNOP;
b320edc0 834 *cast64_to_32 = false;
dffec8eb
JJ
835
836 /* Find real size of result (highest non-zero byte). */
837 if (n->base_addr)
838 for (tmpn = n->n, rsize = 0; tmpn; tmpn >>= BITS_PER_MARKER, rsize++);
839 else
840 rsize = n->range;
841
842 /* Zero out the bits corresponding to untouched bytes in original gimple
843 expression. */
844 if (n->range < (int) sizeof (int64_t))
845 {
846 mask = ((uint64_t) 1 << (n->range * BITS_PER_MARKER)) - 1;
b320edc0
JJ
847 if (n->base_addr == NULL
848 && n->range == 4
849 && int_size_in_bytes (TREE_TYPE (n->src)) == 8)
850 {
851 /* If all bytes in n->n are either 0 or in [5..8] range, this
852 might be a candidate for (unsigned) __builtin_bswap64 (src).
853 It is not worth it for (unsigned short) __builtin_bswap64 (src)
854 or (unsigned short) __builtin_bswap32 (src). */
855 *cast64_to_32 = true;
856 for (tmpn = n->n; tmpn; tmpn >>= BITS_PER_MARKER)
857 if ((tmpn & MARKER_MASK)
858 && ((tmpn & MARKER_MASK) <= 4 || (tmpn & MARKER_MASK) > 8))
859 {
860 *cast64_to_32 = false;
861 break;
862 }
863 }
864 if (*cast64_to_32)
865 *cmpxchg &= mask;
866 else
867 *cmpxchg >>= (64 / BITS_PER_MARKER - n->range) * BITS_PER_MARKER;
4b84d9b8 868 *cmpnop &= mask;
dffec8eb
JJ
869 }
870
871 /* Zero out the bits corresponding to unused bytes in the result of the
872 gimple expression. */
873 if (rsize < n->range)
874 {
875 if (BYTES_BIG_ENDIAN)
876 {
877 mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
4b84d9b8 878 *cmpxchg &= mask;
567d5f3d
JJ
879 if (n->range - rsize == sizeof (int64_t))
880 *cmpnop = 0;
881 else
882 *cmpnop >>= (n->range - rsize) * BITS_PER_MARKER;
dffec8eb
JJ
883 }
884 else
885 {
886 mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
567d5f3d
JJ
887 if (n->range - rsize == sizeof (int64_t))
888 *cmpxchg = 0;
889 else
890 *cmpxchg >>= (n->range - rsize) * BITS_PER_MARKER;
4b84d9b8 891 *cmpnop &= mask;
dffec8eb
JJ
892 }
893 n->range = rsize;
894 }
895
b320edc0
JJ
896 if (*cast64_to_32)
897 n->range = 8;
4b84d9b8
JJ
898 n->range *= BITS_PER_UNIT;
899}
900
d8545fb2 901/* Helper function for find_bswap_or_nop,
902 Return true if N is a swap or nop with MASK. */
903static bool
904is_bswap_or_nop_p (uint64_t n, uint64_t cmpxchg,
905 uint64_t cmpnop, uint64_t* mask,
906 bool* bswap)
907{
908 *mask = ~(uint64_t) 0;
909 if (n == cmpnop)
910 *bswap = false;
911 else if (n == cmpxchg)
912 *bswap = true;
913 else
914 {
915 int set = 0;
916 for (uint64_t msk = MARKER_MASK; msk; msk <<= BITS_PER_MARKER)
917 if ((n & msk) == 0)
918 *mask &= ~msk;
919 else if ((n & msk) == (cmpxchg & msk))
920 set++;
921 else
922 return false;
923
924 if (set < 2)
925 return false;
926 *bswap = true;
927 }
928 return true;
929}
930
931
4b84d9b8
JJ
932/* Check if STMT completes a bswap implementation or a read in a given
933 endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP
934 accordingly. It also sets N to represent the kind of operations
935 performed: size of the resulting expression and whether it works on
936 a memory source, and if so alias-set and vuse. At last, the
937 function returns a stmt whose rhs's first tree is the source
938 expression. */
939
940gimple *
b320edc0 941find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap,
d8545fb2 942 bool *cast64_to_32, uint64_t *mask, uint64_t* l_rotate)
4b84d9b8 943{
650c70a9 944 tree type_size = TYPE_SIZE_UNIT (TREE_TYPE (gimple_get_lhs (stmt)));
7f0ce82a
KT
945 if (!tree_fits_uhwi_p (type_size))
946 return NULL;
947
4b84d9b8
JJ
948 /* The last parameter determines the depth search limit. It usually
949 correlates directly to the number n of bytes to be touched. We
0f507a36 950 increase that number by 2 * (log2(n) + 1) here in order to also
4b84d9b8
JJ
951 cover signed -> unsigned conversions of the src operand as can be seen
952 in libgcc, and for initial shift/and operation of the src operand. */
7f0ce82a 953 int limit = tree_to_uhwi (type_size);
0f507a36 954 limit += 2 * (1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit));
4b84d9b8
JJ
955 gimple *ins_stmt = find_bswap_or_nop_1 (stmt, n, limit);
956
957 if (!ins_stmt)
cd676dfa
JJ
958 {
959 if (gimple_assign_rhs_code (stmt) != CONSTRUCTOR
960 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
961 return NULL;
962 unsigned HOST_WIDE_INT sz = tree_to_uhwi (type_size) * BITS_PER_UNIT;
963 if (sz != 16 && sz != 32 && sz != 64)
964 return NULL;
965 tree rhs = gimple_assign_rhs1 (stmt);
9032d2b2
JJ
966 if (CONSTRUCTOR_NELTS (rhs) == 0)
967 return NULL;
cd676dfa
JJ
968 tree eltype = TREE_TYPE (TREE_TYPE (rhs));
969 unsigned HOST_WIDE_INT eltsz
970 = int_size_in_bytes (eltype) * BITS_PER_UNIT;
971 if (TYPE_PRECISION (eltype) != eltsz)
972 return NULL;
973 constructor_elt *elt;
974 unsigned int i;
975 tree type = build_nonstandard_integer_type (sz, 1);
976 FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs), i, elt)
977 {
978 if (TREE_CODE (elt->value) != SSA_NAME
979 || !INTEGRAL_TYPE_P (TREE_TYPE (elt->value)))
980 return NULL;
981 struct symbolic_number n1;
982 gimple *source_stmt
983 = find_bswap_or_nop_1 (SSA_NAME_DEF_STMT (elt->value), &n1,
984 limit - 1);
985
986 if (!source_stmt)
987 return NULL;
988
989 n1.type = type;
990 if (!n1.base_addr)
991 n1.range = sz / BITS_PER_UNIT;
992
993 if (i == 0)
994 {
995 ins_stmt = source_stmt;
996 *n = n1;
997 }
998 else
999 {
1000 if (n->vuse != n1.vuse)
1001 return NULL;
1002
1003 struct symbolic_number n0 = *n;
1004
1005 if (!BYTES_BIG_ENDIAN)
1006 {
1007 if (!do_shift_rotate (LSHIFT_EXPR, &n1, i * eltsz))
1008 return NULL;
1009 }
1010 else if (!do_shift_rotate (LSHIFT_EXPR, &n0, eltsz))
1011 return NULL;
1012 ins_stmt
04eccbbe
JJ
1013 = perform_symbolic_merge (ins_stmt, &n0, source_stmt, &n1, n,
1014 BIT_IOR_EXPR);
cd676dfa
JJ
1015
1016 if (!ins_stmt)
1017 return NULL;
1018 }
1019 }
1020 }
4b84d9b8
JJ
1021
1022 uint64_t cmpxchg, cmpnop;
d8545fb2 1023 uint64_t orig_range = n->range * BITS_PER_UNIT;
b320edc0 1024 find_bswap_or_nop_finalize (n, &cmpxchg, &cmpnop, cast64_to_32);
4b84d9b8 1025
dffec8eb
JJ
1026 /* A complete byte swap should make the symbolic number to start with
1027 the largest digit in the highest order byte. Unchanged symbolic
1028 number indicates a read with same endianness as target architecture. */
d8545fb2 1029 *l_rotate = 0;
1030 uint64_t tmp_n = n->n;
1031 if (!is_bswap_or_nop_p (tmp_n, cmpxchg, cmpnop, mask, bswap))
b320edc0 1032 {
d8545fb2 1033 /* Try bswap + lrotate. */
1034 /* TODO, handle cast64_to_32 and big/litte_endian memory
1035 source when rsize < range. */
1036 if (n->range == orig_range
57b30f01 1037 /* There're case like 0x300000200 for uint32->uint64 cast,
1038 Don't hanlde this. */
1039 && n->range == TYPE_PRECISION (n->type)
d8545fb2 1040 && ((orig_range == 32
1041 && optab_handler (rotl_optab, SImode) != CODE_FOR_nothing)
1042 || (orig_range == 64
1043 && optab_handler (rotl_optab, DImode) != CODE_FOR_nothing))
1044 && (tmp_n & MARKER_MASK) < orig_range / BITS_PER_UNIT)
1045 {
1046 uint64_t range = (orig_range / BITS_PER_UNIT) * BITS_PER_MARKER;
1047 uint64_t count = (tmp_n & MARKER_MASK) * BITS_PER_MARKER;
1048 /* .i.e. hanlde 0x203040506070800 when lower byte is zero. */
1049 if (!count)
1050 {
1051 for (uint64_t i = 1; i != range / BITS_PER_MARKER; i++)
1052 {
1053 count = (tmp_n >> i * BITS_PER_MARKER) & MARKER_MASK;
1054 if (count)
1055 {
1056 /* Count should be meaningful not 0xff. */
1057 if (count <= range / BITS_PER_MARKER)
1058 {
1059 count = (count + i) * BITS_PER_MARKER % range;
1060 break;
1061 }
1062 else
1063 return NULL;
1064 }
1065 }
1066 }
1067 tmp_n = tmp_n >> count | tmp_n << (range - count);
1068 if (orig_range == 32)
1069 tmp_n &= (1ULL << 32) - 1;
1070 if (!is_bswap_or_nop_p (tmp_n, cmpxchg, cmpnop, mask, bswap))
1071 return NULL;
1072 *l_rotate = count / BITS_PER_MARKER * BITS_PER_UNIT;
1073 gcc_assert (*bswap);
1074 }
1075 else
b320edc0 1076 return NULL;
b320edc0 1077 }
dffec8eb
JJ
1078
1079 /* Useless bit manipulation performed by code. */
1080 if (!n->base_addr && n->n == cmpnop && n->n_ops == 1)
1081 return NULL;
1082
dffec8eb
JJ
1083 return ins_stmt;
1084}
1085
1086const pass_data pass_data_optimize_bswap =
1087{
1088 GIMPLE_PASS, /* type */
1089 "bswap", /* name */
1090 OPTGROUP_NONE, /* optinfo_flags */
1091 TV_NONE, /* tv_id */
1092 PROP_ssa, /* properties_required */
1093 0, /* properties_provided */
1094 0, /* properties_destroyed */
1095 0, /* todo_flags_start */
1096 0, /* todo_flags_finish */
1097};
1098
1099class pass_optimize_bswap : public gimple_opt_pass
1100{
1101public:
1102 pass_optimize_bswap (gcc::context *ctxt)
1103 : gimple_opt_pass (pass_data_optimize_bswap, ctxt)
1104 {}
1105
1106 /* opt_pass methods: */
725793af 1107 bool gate (function *) final override
dffec8eb
JJ
1108 {
1109 return flag_expensive_optimizations && optimize && BITS_PER_UNIT == 8;
1110 }
1111
725793af 1112 unsigned int execute (function *) final override;
dffec8eb
JJ
1113
1114}; // class pass_optimize_bswap
1115
d02a8b63
JJ
1116/* Helper function for bswap_replace. Build VIEW_CONVERT_EXPR from
1117 VAL to TYPE. If VAL has different type size, emit a NOP_EXPR cast
1118 first. */
1119
1120static tree
45ff1251
JJ
1121bswap_view_convert (gimple_stmt_iterator *gsi, tree type, tree val,
1122 bool before)
d02a8b63 1123{
a4001578
JJ
1124 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (val))
1125 || POINTER_TYPE_P (TREE_TYPE (val)));
d02a8b63
JJ
1126 if (TYPE_SIZE (type) != TYPE_SIZE (TREE_TYPE (val)))
1127 {
1128 HOST_WIDE_INT prec = TREE_INT_CST_LOW (TYPE_SIZE (type));
a4001578
JJ
1129 if (POINTER_TYPE_P (TREE_TYPE (val)))
1130 {
1131 gimple *g
1132 = gimple_build_assign (make_ssa_name (pointer_sized_int_node),
1133 NOP_EXPR, val);
45ff1251
JJ
1134 if (before)
1135 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1136 else
1137 gsi_insert_after (gsi, g, GSI_NEW_STMT);
a4001578
JJ
1138 val = gimple_assign_lhs (g);
1139 }
d02a8b63
JJ
1140 tree itype = build_nonstandard_integer_type (prec, 1);
1141 gimple *g = gimple_build_assign (make_ssa_name (itype), NOP_EXPR, val);
45ff1251
JJ
1142 if (before)
1143 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1144 else
1145 gsi_insert_after (gsi, g, GSI_NEW_STMT);
d02a8b63
JJ
1146 val = gimple_assign_lhs (g);
1147 }
1148 return build1 (VIEW_CONVERT_EXPR, type, val);
1149}
1150
dffec8eb 1151/* Perform the bswap optimization: replace the expression computed in the rhs
4b84d9b8
JJ
1152 of gsi_stmt (GSI) (or if NULL add instead of replace) by an equivalent
1153 bswap, load or load + bswap expression.
dffec8eb
JJ
1154 Which of these alternatives replace the rhs is given by N->base_addr (non
1155 null if a load is needed) and BSWAP. The type, VUSE and set-alias of the
1156 load to perform are also given in N while the builtin bswap invoke is given
4b84d9b8
JJ
1157 in FNDEL. Finally, if a load is involved, INS_STMT refers to one of the
1158 load statements involved to construct the rhs in gsi_stmt (GSI) and
1159 N->range gives the size of the rhs expression for maintaining some
1160 statistics.
dffec8eb 1161
4b84d9b8
JJ
1162 Note that if the replacement involve a load and if gsi_stmt (GSI) is
1163 non-NULL, that stmt is moved just after INS_STMT to do the load with the
1164 same VUSE which can lead to gsi_stmt (GSI) changing of basic block. */
dffec8eb 1165
4b84d9b8
JJ
1166tree
1167bswap_replace (gimple_stmt_iterator gsi, gimple *ins_stmt, tree fndecl,
dffec8eb 1168 tree bswap_type, tree load_type, struct symbolic_number *n,
d8545fb2 1169 bool bswap, uint64_t mask, uint64_t l_rotate)
dffec8eb 1170{
4b84d9b8 1171 tree src, tmp, tgt = NULL_TREE;
d8545fb2 1172 gimple *bswap_stmt, *mask_stmt = NULL, *rotl_stmt = NULL;
cd676dfa 1173 tree_code conv_code = NOP_EXPR;
dffec8eb 1174
4b84d9b8 1175 gimple *cur_stmt = gsi_stmt (gsi);
dffec8eb 1176 src = n->src;
4b84d9b8 1177 if (cur_stmt)
cd676dfa
JJ
1178 {
1179 tgt = gimple_assign_lhs (cur_stmt);
1180 if (gimple_assign_rhs_code (cur_stmt) == CONSTRUCTOR
1181 && tgt
1182 && VECTOR_TYPE_P (TREE_TYPE (tgt)))
1183 conv_code = VIEW_CONVERT_EXPR;
1184 }
dffec8eb
JJ
1185
1186 /* Need to load the value from memory first. */
1187 if (n->base_addr)
1188 {
4b84d9b8
JJ
1189 gimple_stmt_iterator gsi_ins = gsi;
1190 if (ins_stmt)
1191 gsi_ins = gsi_for_stmt (ins_stmt);
dffec8eb
JJ
1192 tree addr_expr, addr_tmp, val_expr, val_tmp;
1193 tree load_offset_ptr, aligned_load_type;
4b84d9b8
JJ
1194 gimple *load_stmt;
1195 unsigned align = get_object_alignment (src);
4a022c70 1196 poly_int64 load_offset = 0;
dffec8eb 1197
4b84d9b8
JJ
1198 if (cur_stmt)
1199 {
1200 basic_block ins_bb = gimple_bb (ins_stmt);
1201 basic_block cur_bb = gimple_bb (cur_stmt);
1202 if (!dominated_by_p (CDI_DOMINATORS, cur_bb, ins_bb))
1203 return NULL_TREE;
1204
1205 /* Move cur_stmt just before one of the load of the original
1206 to ensure it has the same VUSE. See PR61517 for what could
1207 go wrong. */
1208 if (gimple_bb (cur_stmt) != gimple_bb (ins_stmt))
1209 reset_flow_sensitive_info (gimple_assign_lhs (cur_stmt));
1210 gsi_move_before (&gsi, &gsi_ins);
1211 gsi = gsi_for_stmt (cur_stmt);
1212 }
1213 else
1214 gsi = gsi_ins;
dffec8eb
JJ
1215
1216 /* Compute address to load from and cast according to the size
1217 of the load. */
4b84d9b8 1218 addr_expr = build_fold_addr_expr (src);
dffec8eb 1219 if (is_gimple_mem_ref_addr (addr_expr))
4b84d9b8 1220 addr_tmp = unshare_expr (addr_expr);
dffec8eb
JJ
1221 else
1222 {
4b84d9b8
JJ
1223 addr_tmp = unshare_expr (n->base_addr);
1224 if (!is_gimple_mem_ref_addr (addr_tmp))
1225 addr_tmp = force_gimple_operand_gsi_1 (&gsi, addr_tmp,
1226 is_gimple_mem_ref_addr,
1227 NULL_TREE, true,
1228 GSI_SAME_STMT);
1229 load_offset = n->bytepos;
1230 if (n->offset)
1231 {
1232 tree off
1233 = force_gimple_operand_gsi (&gsi, unshare_expr (n->offset),
1234 true, NULL_TREE, true,
1235 GSI_SAME_STMT);
1236 gimple *stmt
1237 = gimple_build_assign (make_ssa_name (TREE_TYPE (addr_tmp)),
1238 POINTER_PLUS_EXPR, addr_tmp, off);
1239 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1240 addr_tmp = gimple_assign_lhs (stmt);
1241 }
dffec8eb
JJ
1242 }
1243
1244 /* Perform the load. */
1245 aligned_load_type = load_type;
1246 if (align < TYPE_ALIGN (load_type))
1247 aligned_load_type = build_aligned_type (load_type, align);
1248 load_offset_ptr = build_int_cst (n->alias_set, load_offset);
1249 val_expr = fold_build2 (MEM_REF, aligned_load_type, addr_tmp,
1250 load_offset_ptr);
1251
1252 if (!bswap)
1253 {
1254 if (n->range == 16)
1255 nop_stats.found_16bit++;
1256 else if (n->range == 32)
1257 nop_stats.found_32bit++;
1258 else
1259 {
1260 gcc_assert (n->range == 64);
1261 nop_stats.found_64bit++;
1262 }
1263
1264 /* Convert the result of load if necessary. */
4b84d9b8 1265 if (tgt && !useless_type_conversion_p (TREE_TYPE (tgt), load_type))
dffec8eb
JJ
1266 {
1267 val_tmp = make_temp_ssa_name (aligned_load_type, NULL,
1268 "load_dst");
1269 load_stmt = gimple_build_assign (val_tmp, val_expr);
1270 gimple_set_vuse (load_stmt, n->vuse);
1271 gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
cd676dfa 1272 if (conv_code == VIEW_CONVERT_EXPR)
45ff1251
JJ
1273 val_tmp = bswap_view_convert (&gsi, TREE_TYPE (tgt), val_tmp,
1274 true);
cd676dfa 1275 gimple_assign_set_rhs_with_ops (&gsi, conv_code, val_tmp);
4b84d9b8 1276 update_stmt (cur_stmt);
dffec8eb 1277 }
4b84d9b8 1278 else if (cur_stmt)
dffec8eb
JJ
1279 {
1280 gimple_assign_set_rhs_with_ops (&gsi, MEM_REF, val_expr);
1281 gimple_set_vuse (cur_stmt, n->vuse);
4b84d9b8
JJ
1282 update_stmt (cur_stmt);
1283 }
1284 else
1285 {
1286 tgt = make_ssa_name (load_type);
1287 cur_stmt = gimple_build_assign (tgt, MEM_REF, val_expr);
1288 gimple_set_vuse (cur_stmt, n->vuse);
1289 gsi_insert_before (&gsi, cur_stmt, GSI_SAME_STMT);
dffec8eb 1290 }
dffec8eb
JJ
1291
1292 if (dump_file)
1293 {
1294 fprintf (dump_file,
1295 "%d bit load in target endianness found at: ",
1296 (int) n->range);
1297 print_gimple_stmt (dump_file, cur_stmt, 0);
1298 }
4b84d9b8 1299 return tgt;
dffec8eb
JJ
1300 }
1301 else
1302 {
1303 val_tmp = make_temp_ssa_name (aligned_load_type, NULL, "load_dst");
1304 load_stmt = gimple_build_assign (val_tmp, val_expr);
1305 gimple_set_vuse (load_stmt, n->vuse);
1306 gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
1307 }
1308 src = val_tmp;
1309 }
1310 else if (!bswap)
1311 {
4b84d9b8
JJ
1312 gimple *g = NULL;
1313 if (tgt && !useless_type_conversion_p (TREE_TYPE (tgt), TREE_TYPE (src)))
dffec8eb
JJ
1314 {
1315 if (!is_gimple_val (src))
4b84d9b8 1316 return NULL_TREE;
cd676dfa 1317 if (conv_code == VIEW_CONVERT_EXPR)
45ff1251 1318 src = bswap_view_convert (&gsi, TREE_TYPE (tgt), src, true);
cd676dfa 1319 g = gimple_build_assign (tgt, conv_code, src);
dffec8eb 1320 }
4b84d9b8 1321 else if (cur_stmt)
dffec8eb 1322 g = gimple_build_assign (tgt, src);
4b84d9b8
JJ
1323 else
1324 tgt = src;
dffec8eb
JJ
1325 if (n->range == 16)
1326 nop_stats.found_16bit++;
1327 else if (n->range == 32)
1328 nop_stats.found_32bit++;
1329 else
1330 {
1331 gcc_assert (n->range == 64);
1332 nop_stats.found_64bit++;
1333 }
1334 if (dump_file)
1335 {
1336 fprintf (dump_file,
1337 "%d bit reshuffle in target endianness found at: ",
1338 (int) n->range);
4b84d9b8
JJ
1339 if (cur_stmt)
1340 print_gimple_stmt (dump_file, cur_stmt, 0);
1341 else
1342 {
4af78ef8 1343 print_generic_expr (dump_file, tgt, TDF_NONE);
4b84d9b8
JJ
1344 fprintf (dump_file, "\n");
1345 }
dffec8eb 1346 }
4b84d9b8
JJ
1347 if (cur_stmt)
1348 gsi_replace (&gsi, g, true);
1349 return tgt;
dffec8eb
JJ
1350 }
1351 else if (TREE_CODE (src) == BIT_FIELD_REF)
1352 src = TREE_OPERAND (src, 0);
1353
1354 if (n->range == 16)
1355 bswap_stats.found_16bit++;
1356 else if (n->range == 32)
1357 bswap_stats.found_32bit++;
1358 else
1359 {
1360 gcc_assert (n->range == 64);
1361 bswap_stats.found_64bit++;
1362 }
1363
1364 tmp = src;
1365
1366 /* Convert the src expression if necessary. */
1367 if (!useless_type_conversion_p (TREE_TYPE (tmp), bswap_type))
1368 {
1369 gimple *convert_stmt;
1370
1371 tmp = make_temp_ssa_name (bswap_type, NULL, "bswapsrc");
1372 convert_stmt = gimple_build_assign (tmp, NOP_EXPR, src);
1373 gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT);
1374 }
1375
1376 /* Canonical form for 16 bit bswap is a rotate expression. Only 16bit values
1377 are considered as rotation of 2N bit values by N bits is generally not
1378 equivalent to a bswap. Consider for instance 0x01020304 r>> 16 which
1379 gives 0x03040102 while a bswap for that value is 0x04030201. */
1380 if (bswap && n->range == 16)
1381 {
1382 tree count = build_int_cst (NULL, BITS_PER_UNIT);
1383 src = fold_build2 (LROTATE_EXPR, bswap_type, tmp, count);
1384 bswap_stmt = gimple_build_assign (NULL, src);
1385 }
1386 else
1387 bswap_stmt = gimple_build_call (fndecl, 1, tmp);
1388
4b84d9b8
JJ
1389 if (tgt == NULL_TREE)
1390 tgt = make_ssa_name (bswap_type);
dffec8eb
JJ
1391 tmp = tgt;
1392
b320edc0
JJ
1393 if (mask != ~(uint64_t) 0)
1394 {
1395 tree m = build_int_cst (bswap_type, mask);
1396 tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
1397 gimple_set_lhs (bswap_stmt, tmp);
1398 mask_stmt = gimple_build_assign (tgt, BIT_AND_EXPR, tmp, m);
1399 tmp = tgt;
1400 }
1401
d8545fb2 1402 if (l_rotate)
1403 {
1404 tree m = build_int_cst (bswap_type, l_rotate);
1405 tmp = make_temp_ssa_name (bswap_type, NULL,
1406 mask_stmt ? "bswapmaskdst" : "bswapdst");
1407 gimple_set_lhs (mask_stmt ? mask_stmt : bswap_stmt, tmp);
1408 rotl_stmt = gimple_build_assign (tgt, LROTATE_EXPR, tmp, m);
1409 tmp = tgt;
1410 }
1411
dffec8eb
JJ
1412 /* Convert the result if necessary. */
1413 if (!useless_type_conversion_p (TREE_TYPE (tgt), bswap_type))
1414 {
dffec8eb 1415 tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
cd676dfa 1416 tree atmp = tmp;
45ff1251 1417 gimple_stmt_iterator gsi2 = gsi;
cd676dfa 1418 if (conv_code == VIEW_CONVERT_EXPR)
45ff1251
JJ
1419 atmp = bswap_view_convert (&gsi2, TREE_TYPE (tgt), tmp, false);
1420 gimple *convert_stmt = gimple_build_assign (tgt, conv_code, atmp);
1421 gsi_insert_after (&gsi2, convert_stmt, GSI_SAME_STMT);
dffec8eb
JJ
1422 }
1423
d8545fb2 1424 gimple_set_lhs (rotl_stmt ? rotl_stmt
1425 : mask_stmt ? mask_stmt : bswap_stmt, tmp);
dffec8eb
JJ
1426
1427 if (dump_file)
1428 {
1429 fprintf (dump_file, "%d bit bswap implementation found at: ",
1430 (int) n->range);
4b84d9b8
JJ
1431 if (cur_stmt)
1432 print_gimple_stmt (dump_file, cur_stmt, 0);
1433 else
1434 {
4af78ef8 1435 print_generic_expr (dump_file, tgt, TDF_NONE);
4b84d9b8
JJ
1436 fprintf (dump_file, "\n");
1437 }
dffec8eb
JJ
1438 }
1439
4b84d9b8
JJ
1440 if (cur_stmt)
1441 {
d8545fb2 1442 if (rotl_stmt)
1443 gsi_insert_after (&gsi, rotl_stmt, GSI_SAME_STMT);
b320edc0
JJ
1444 if (mask_stmt)
1445 gsi_insert_after (&gsi, mask_stmt, GSI_SAME_STMT);
4b84d9b8
JJ
1446 gsi_insert_after (&gsi, bswap_stmt, GSI_SAME_STMT);
1447 gsi_remove (&gsi, true);
1448 }
1449 else
b320edc0
JJ
1450 {
1451 gsi_insert_before (&gsi, bswap_stmt, GSI_SAME_STMT);
1452 if (mask_stmt)
1453 gsi_insert_before (&gsi, mask_stmt, GSI_SAME_STMT);
d8545fb2 1454 if (rotl_stmt)
1455 gsi_insert_after (&gsi, rotl_stmt, GSI_SAME_STMT);
b320edc0 1456 }
4b84d9b8 1457 return tgt;
dffec8eb
JJ
1458}
1459
a7553ad6
JJ
1460/* Try to optimize an assignment CUR_STMT with CONSTRUCTOR on the rhs
1461 using bswap optimizations. CDI_DOMINATORS need to be
1462 computed on entry. Return true if it has been optimized and
1463 TODO_update_ssa is needed. */
1464
1465static bool
1466maybe_optimize_vector_constructor (gimple *cur_stmt)
1467{
1468 tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
1469 struct symbolic_number n;
1470 bool bswap;
1471
1472 gcc_assert (is_gimple_assign (cur_stmt)
1473 && gimple_assign_rhs_code (cur_stmt) == CONSTRUCTOR);
1474
1475 tree rhs = gimple_assign_rhs1 (cur_stmt);
1476 if (!VECTOR_TYPE_P (TREE_TYPE (rhs))
1477 || !INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (rhs)))
1478 || gimple_assign_lhs (cur_stmt) == NULL_TREE)
1479 return false;
1480
1481 HOST_WIDE_INT sz = int_size_in_bytes (TREE_TYPE (rhs)) * BITS_PER_UNIT;
1482 switch (sz)
1483 {
1484 case 16:
1485 load_type = bswap_type = uint16_type_node;
1486 break;
1487 case 32:
1488 if (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
1489 && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing)
1490 {
1491 load_type = uint32_type_node;
1492 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1493 bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1494 }
1495 else
1496 return false;
1497 break;
1498 case 64:
1499 if (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
1500 && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
1501 || (word_mode == SImode
1502 && builtin_decl_explicit_p (BUILT_IN_BSWAP32)
1503 && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing)))
1504 {
1505 load_type = uint64_type_node;
1506 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1507 bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1508 }
1509 else
1510 return false;
1511 break;
1512 default:
1513 return false;
1514 }
1515
b320edc0 1516 bool cast64_to_32;
d8545fb2 1517 uint64_t mask, l_rotate;
b320edc0 1518 gimple *ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap,
d8545fb2 1519 &cast64_to_32, &mask, &l_rotate);
b320edc0
JJ
1520 if (!ins_stmt
1521 || n.range != (unsigned HOST_WIDE_INT) sz
1522 || cast64_to_32
1523 || mask != ~(uint64_t) 0)
a7553ad6
JJ
1524 return false;
1525
1526 if (bswap && !fndecl && n.range != 16)
1527 return false;
1528
1529 memset (&nop_stats, 0, sizeof (nop_stats));
1530 memset (&bswap_stats, 0, sizeof (bswap_stats));
1531 return bswap_replace (gsi_for_stmt (cur_stmt), ins_stmt, fndecl,
d8545fb2 1532 bswap_type, load_type, &n, bswap, mask,
1533 l_rotate) != NULL_TREE;
a7553ad6
JJ
1534}
1535
dffec8eb
JJ
1536/* Find manual byte swap implementations as well as load in a given
1537 endianness. Byte swaps are turned into a bswap builtin invokation
1538 while endian loads are converted to bswap builtin invokation or
1539 simple load according to the target endianness. */
1540
1541unsigned int
1542pass_optimize_bswap::execute (function *fun)
1543{
1544 basic_block bb;
1545 bool bswap32_p, bswap64_p;
1546 bool changed = false;
1547 tree bswap32_type = NULL_TREE, bswap64_type = NULL_TREE;
1548
1549 bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
1550 && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing);
1551 bswap64_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
1552 && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
1553 || (bswap32_p && word_mode == SImode)));
1554
1555 /* Determine the argument type of the builtins. The code later on
1556 assumes that the return and argument type are the same. */
1557 if (bswap32_p)
1558 {
1559 tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1560 bswap32_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1561 }
1562
1563 if (bswap64_p)
1564 {
1565 tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1566 bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1567 }
1568
1569 memset (&nop_stats, 0, sizeof (nop_stats));
1570 memset (&bswap_stats, 0, sizeof (bswap_stats));
1571 calculate_dominance_info (CDI_DOMINATORS);
1572
1573 FOR_EACH_BB_FN (bb, fun)
1574 {
1575 gimple_stmt_iterator gsi;
1576
1577 /* We do a reverse scan for bswap patterns to make sure we get the
1578 widest match. As bswap pattern matching doesn't handle previously
1579 inserted smaller bswap replacements as sub-patterns, the wider
1580 variant wouldn't be detected. */
1581 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi);)
1582 {
1583 gimple *ins_stmt, *cur_stmt = gsi_stmt (gsi);
1584 tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
1585 enum tree_code code;
1586 struct symbolic_number n;
b320edc0 1587 bool bswap, cast64_to_32;
d8545fb2 1588 uint64_t mask, l_rotate;
dffec8eb
JJ
1589
1590 /* This gsi_prev (&gsi) is not part of the for loop because cur_stmt
1591 might be moved to a different basic block by bswap_replace and gsi
1592 must not points to it if that's the case. Moving the gsi_prev
1593 there make sure that gsi points to the statement previous to
1594 cur_stmt while still making sure that all statements are
1595 considered in this basic block. */
1596 gsi_prev (&gsi);
1597
1598 if (!is_gimple_assign (cur_stmt))
1599 continue;
1600
1601 code = gimple_assign_rhs_code (cur_stmt);
1602 switch (code)
1603 {
1604 case LROTATE_EXPR:
1605 case RROTATE_EXPR:
1606 if (!tree_fits_uhwi_p (gimple_assign_rhs2 (cur_stmt))
1607 || tree_to_uhwi (gimple_assign_rhs2 (cur_stmt))
1608 % BITS_PER_UNIT)
1609 continue;
1610 /* Fall through. */
1611 case BIT_IOR_EXPR:
a944b5de
RS
1612 case BIT_XOR_EXPR:
1613 case PLUS_EXPR:
dffec8eb 1614 break;
cd676dfa
JJ
1615 case CONSTRUCTOR:
1616 {
1617 tree rhs = gimple_assign_rhs1 (cur_stmt);
1618 if (VECTOR_TYPE_P (TREE_TYPE (rhs))
1619 && INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (rhs))))
1620 break;
1621 }
1622 continue;
dffec8eb
JJ
1623 default:
1624 continue;
1625 }
1626
b320edc0 1627 ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap,
d8545fb2 1628 &cast64_to_32, &mask, &l_rotate);
dffec8eb
JJ
1629
1630 if (!ins_stmt)
1631 continue;
1632
1633 switch (n.range)
1634 {
1635 case 16:
1636 /* Already in canonical form, nothing to do. */
1637 if (code == LROTATE_EXPR || code == RROTATE_EXPR)
1638 continue;
1639 load_type = bswap_type = uint16_type_node;
1640 break;
1641 case 32:
1642 load_type = uint32_type_node;
1643 if (bswap32_p)
1644 {
1645 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1646 bswap_type = bswap32_type;
1647 }
1648 break;
1649 case 64:
1650 load_type = uint64_type_node;
1651 if (bswap64_p)
1652 {
1653 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1654 bswap_type = bswap64_type;
1655 }
1656 break;
1657 default:
1658 continue;
1659 }
1660
1661 if (bswap && !fndecl && n.range != 16)
1662 continue;
1663
4b84d9b8 1664 if (bswap_replace (gsi_for_stmt (cur_stmt), ins_stmt, fndecl,
d8545fb2 1665 bswap_type, load_type, &n, bswap, mask,
1666 l_rotate))
dffec8eb
JJ
1667 changed = true;
1668 }
1669 }
1670
1671 statistics_counter_event (fun, "16-bit nop implementations found",
1672 nop_stats.found_16bit);
1673 statistics_counter_event (fun, "32-bit nop implementations found",
1674 nop_stats.found_32bit);
1675 statistics_counter_event (fun, "64-bit nop implementations found",
1676 nop_stats.found_64bit);
1677 statistics_counter_event (fun, "16-bit bswap implementations found",
1678 bswap_stats.found_16bit);
1679 statistics_counter_event (fun, "32-bit bswap implementations found",
1680 bswap_stats.found_32bit);
1681 statistics_counter_event (fun, "64-bit bswap implementations found",
1682 bswap_stats.found_64bit);
1683
1684 return (changed ? TODO_update_ssa : 0);
1685}
1686
1687} // anon namespace
1688
1689gimple_opt_pass *
1690make_pass_optimize_bswap (gcc::context *ctxt)
1691{
1692 return new pass_optimize_bswap (ctxt);
1693}
1694
1695namespace {
1696
245f6de1 1697/* Struct recording one operand for the store, which is either a constant,
c94c3532
EB
1698 then VAL represents the constant and all the other fields are zero, or
1699 a memory load, then VAL represents the reference, BASE_ADDR is non-NULL
1700 and the other fields also reflect the memory load, or an SSA name, then
617be7ba 1701 VAL represents the SSA name and all the other fields are zero. */
245f6de1 1702
6c1dae73 1703class store_operand_info
245f6de1 1704{
6c1dae73 1705public:
245f6de1
JJ
1706 tree val;
1707 tree base_addr;
8a91d545
RS
1708 poly_uint64 bitsize;
1709 poly_uint64 bitpos;
1710 poly_uint64 bitregion_start;
1711 poly_uint64 bitregion_end;
245f6de1 1712 gimple *stmt;
383ac8dc 1713 bool bit_not_p;
245f6de1
JJ
1714 store_operand_info ();
1715};
1716
1717store_operand_info::store_operand_info ()
1718 : val (NULL_TREE), base_addr (NULL_TREE), bitsize (0), bitpos (0),
383ac8dc 1719 bitregion_start (0), bitregion_end (0), stmt (NULL), bit_not_p (false)
245f6de1
JJ
1720{
1721}
1722
f663d9ad
KT
1723/* Struct recording the information about a single store of an immediate
1724 to memory. These are created in the first phase and coalesced into
1725 merged_store_group objects in the second phase. */
1726
6c1dae73 1727class store_immediate_info
f663d9ad 1728{
6c1dae73 1729public:
f663d9ad
KT
1730 unsigned HOST_WIDE_INT bitsize;
1731 unsigned HOST_WIDE_INT bitpos;
a62b3dc5
JJ
1732 unsigned HOST_WIDE_INT bitregion_start;
1733 /* This is one past the last bit of the bit region. */
1734 unsigned HOST_WIDE_INT bitregion_end;
f663d9ad
KT
1735 gimple *stmt;
1736 unsigned int order;
e362a897
EB
1737 /* INTEGER_CST for constant store, STRING_CST for string store,
1738 MEM_REF for memory copy, BIT_*_EXPR for logical bitwise operation,
1739 BIT_INSERT_EXPR for bit insertion.
4b84d9b8
JJ
1740 LROTATE_EXPR if it can be only bswap optimized and
1741 ops are not really meaningful.
1742 NOP_EXPR if bswap optimization detected identity, ops
1743 are not meaningful. */
245f6de1 1744 enum tree_code rhs_code;
4b84d9b8
JJ
1745 /* Two fields for bswap optimization purposes. */
1746 struct symbolic_number n;
1747 gimple *ins_stmt;
127ef369 1748 /* True if BIT_{AND,IOR,XOR}_EXPR result is inverted before storing. */
d60edaba 1749 bool bit_not_p;
127ef369
JJ
1750 /* True if ops have been swapped and thus ops[1] represents
1751 rhs1 of BIT_{AND,IOR,XOR}_EXPR and ops[0] represents rhs2. */
1752 bool ops_swapped_p;
629387a6
EB
1753 /* The index number of the landing pad, or 0 if there is none. */
1754 int lp_nr;
245f6de1
JJ
1755 /* Operands. For BIT_*_EXPR rhs_code both operands are used, otherwise
1756 just the first one. */
1757 store_operand_info ops[2];
b5926e23 1758 store_immediate_info (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
a62b3dc5 1759 unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
4b84d9b8 1760 gimple *, unsigned int, enum tree_code,
629387a6 1761 struct symbolic_number &, gimple *, bool, int,
245f6de1
JJ
1762 const store_operand_info &,
1763 const store_operand_info &);
f663d9ad
KT
1764};
1765
1766store_immediate_info::store_immediate_info (unsigned HOST_WIDE_INT bs,
b5926e23 1767 unsigned HOST_WIDE_INT bp,
a62b3dc5
JJ
1768 unsigned HOST_WIDE_INT brs,
1769 unsigned HOST_WIDE_INT bre,
b5926e23 1770 gimple *st,
245f6de1
JJ
1771 unsigned int ord,
1772 enum tree_code rhscode,
4b84d9b8
JJ
1773 struct symbolic_number &nr,
1774 gimple *ins_stmtp,
d60edaba 1775 bool bitnotp,
629387a6 1776 int nr2,
245f6de1
JJ
1777 const store_operand_info &op0r,
1778 const store_operand_info &op1r)
a62b3dc5 1779 : bitsize (bs), bitpos (bp), bitregion_start (brs), bitregion_end (bre),
4b84d9b8 1780 stmt (st), order (ord), rhs_code (rhscode), n (nr),
629387a6 1781 ins_stmt (ins_stmtp), bit_not_p (bitnotp), ops_swapped_p (false),
4bc6fb21 1782 lp_nr (nr2), ops { op0r, op1r }
245f6de1
JJ
1783{
1784}
f663d9ad
KT
1785
1786/* Struct representing a group of stores to contiguous memory locations.
1787 These are produced by the second phase (coalescing) and consumed in the
1788 third phase that outputs the widened stores. */
1789
6c1dae73 1790class merged_store_group
f663d9ad 1791{
6c1dae73 1792public:
f663d9ad
KT
1793 unsigned HOST_WIDE_INT start;
1794 unsigned HOST_WIDE_INT width;
a62b3dc5
JJ
1795 unsigned HOST_WIDE_INT bitregion_start;
1796 unsigned HOST_WIDE_INT bitregion_end;
1797 /* The size of the allocated memory for val and mask. */
f663d9ad 1798 unsigned HOST_WIDE_INT buf_size;
a62b3dc5 1799 unsigned HOST_WIDE_INT align_base;
8a91d545 1800 poly_uint64 load_align_base[2];
f663d9ad
KT
1801
1802 unsigned int align;
245f6de1 1803 unsigned int load_align[2];
f663d9ad
KT
1804 unsigned int first_order;
1805 unsigned int last_order;
7f5a3982 1806 bool bit_insertion;
e362a897 1807 bool string_concatenation;
18e0c3d1 1808 bool only_constants;
1b3c9813 1809 bool consecutive;
18e0c3d1 1810 unsigned int first_nonmergeable_order;
629387a6 1811 int lp_nr;
f663d9ad 1812
a62b3dc5 1813 auto_vec<store_immediate_info *> stores;
f663d9ad
KT
1814 /* We record the first and last original statements in the sequence because
1815 we'll need their vuse/vdef and replacement position. It's easier to keep
1816 track of them separately as 'stores' is reordered by apply_stores. */
1817 gimple *last_stmt;
1818 gimple *first_stmt;
1819 unsigned char *val;
a62b3dc5 1820 unsigned char *mask;
f663d9ad
KT
1821
1822 merged_store_group (store_immediate_info *);
1823 ~merged_store_group ();
7f5a3982 1824 bool can_be_merged_into (store_immediate_info *);
f663d9ad
KT
1825 void merge_into (store_immediate_info *);
1826 void merge_overlapping (store_immediate_info *);
1827 bool apply_stores ();
a62b3dc5
JJ
1828private:
1829 void do_merge (store_immediate_info *);
f663d9ad
KT
1830};
1831
1832/* Debug helper. Dump LEN elements of byte array PTR to FD in hex. */
1833
1834static void
1835dump_char_array (FILE *fd, unsigned char *ptr, unsigned int len)
1836{
1837 if (!fd)
1838 return;
1839
1840 for (unsigned int i = 0; i < len; i++)
c94c3532 1841 fprintf (fd, "%02x ", ptr[i]);
f663d9ad
KT
1842 fprintf (fd, "\n");
1843}
1844
f663d9ad
KT
1845/* Clear out LEN bits starting from bit START in the byte array
1846 PTR. This clears the bits to the *right* from START.
1847 START must be within [0, BITS_PER_UNIT) and counts starting from
1848 the least significant bit. */
1849
1850static void
1851clear_bit_region_be (unsigned char *ptr, unsigned int start,
1852 unsigned int len)
1853{
1854 if (len == 0)
1855 return;
1856 /* Clear len bits to the right of start. */
1857 else if (len <= start + 1)
1858 {
1859 unsigned char mask = (~(~0U << len));
1860 mask = mask << (start + 1U - len);
1861 ptr[0] &= ~mask;
1862 }
1863 else if (start != BITS_PER_UNIT - 1)
1864 {
1865 clear_bit_region_be (ptr, start, (start % BITS_PER_UNIT) + 1);
1866 clear_bit_region_be (ptr + 1, BITS_PER_UNIT - 1,
1867 len - (start % BITS_PER_UNIT) - 1);
1868 }
1869 else if (start == BITS_PER_UNIT - 1
1870 && len > BITS_PER_UNIT)
1871 {
1872 unsigned int nbytes = len / BITS_PER_UNIT;
a62b3dc5 1873 memset (ptr, 0, nbytes);
f663d9ad
KT
1874 if (len % BITS_PER_UNIT != 0)
1875 clear_bit_region_be (ptr + nbytes, BITS_PER_UNIT - 1,
1876 len % BITS_PER_UNIT);
1877 }
1878 else
1879 gcc_unreachable ();
1880}
1881
1882/* In the byte array PTR clear the bit region starting at bit
1883 START and is LEN bits wide.
1884 For regions spanning multiple bytes do this recursively until we reach
1885 zero LEN or a region contained within a single byte. */
1886
1887static void
1888clear_bit_region (unsigned char *ptr, unsigned int start,
1889 unsigned int len)
1890{
1891 /* Degenerate base case. */
1892 if (len == 0)
1893 return;
1894 else if (start >= BITS_PER_UNIT)
1895 clear_bit_region (ptr + 1, start - BITS_PER_UNIT, len);
1896 /* Second base case. */
1897 else if ((start + len) <= BITS_PER_UNIT)
1898 {
46a61395 1899 unsigned char mask = (~0U) << (unsigned char) (BITS_PER_UNIT - len);
f663d9ad
KT
1900 mask >>= BITS_PER_UNIT - (start + len);
1901
1902 ptr[0] &= ~mask;
1903
1904 return;
1905 }
1906 /* Clear most significant bits in a byte and proceed with the next byte. */
1907 else if (start != 0)
1908 {
1909 clear_bit_region (ptr, start, BITS_PER_UNIT - start);
1f069ef5 1910 clear_bit_region (ptr + 1, 0, len - (BITS_PER_UNIT - start));
f663d9ad
KT
1911 }
1912 /* Whole bytes need to be cleared. */
1913 else if (start == 0 && len > BITS_PER_UNIT)
1914 {
1915 unsigned int nbytes = len / BITS_PER_UNIT;
a848c710
KT
1916 /* We could recurse on each byte but we clear whole bytes, so a simple
1917 memset will do. */
46a61395 1918 memset (ptr, '\0', nbytes);
f663d9ad
KT
1919 /* Clear the remaining sub-byte region if there is one. */
1920 if (len % BITS_PER_UNIT != 0)
1921 clear_bit_region (ptr + nbytes, 0, len % BITS_PER_UNIT);
1922 }
1923 else
1924 gcc_unreachable ();
1925}
1926
1927/* Write BITLEN bits of EXPR to the byte array PTR at
1928 bit position BITPOS. PTR should contain TOTAL_BYTES elements.
1929 Return true if the operation succeeded. */
1930
1931static bool
1932encode_tree_to_bitpos (tree expr, unsigned char *ptr, int bitlen, int bitpos,
46a61395 1933 unsigned int total_bytes)
f663d9ad
KT
1934{
1935 unsigned int first_byte = bitpos / BITS_PER_UNIT;
ad1de652
JJ
1936 bool sub_byte_op_p = ((bitlen % BITS_PER_UNIT)
1937 || (bitpos % BITS_PER_UNIT)
f4b31647 1938 || !int_mode_for_size (bitlen, 0).exists ());
3afd514b
JJ
1939 bool empty_ctor_p
1940 = (TREE_CODE (expr) == CONSTRUCTOR
1941 && CONSTRUCTOR_NELTS (expr) == 0
1942 && TYPE_SIZE_UNIT (TREE_TYPE (expr))
1943 && tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (expr))));
f663d9ad
KT
1944
1945 if (!sub_byte_op_p)
3afd514b
JJ
1946 {
1947 if (first_byte >= total_bytes)
1948 return false;
1949 total_bytes -= first_byte;
1950 if (empty_ctor_p)
1951 {
1952 unsigned HOST_WIDE_INT rhs_bytes
1953 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)));
1954 if (rhs_bytes > total_bytes)
1955 return false;
1956 memset (ptr + first_byte, '\0', rhs_bytes);
1957 return true;
1958 }
1959 return native_encode_expr (expr, ptr + first_byte, total_bytes) != 0;
1960 }
f663d9ad
KT
1961
1962 /* LITTLE-ENDIAN
1963 We are writing a non byte-sized quantity or at a position that is not
1964 at a byte boundary.
1965 |--------|--------|--------| ptr + first_byte
1966 ^ ^
1967 xxx xxxxxxxx xxx< bp>
1968 |______EXPR____|
1969
46a61395 1970 First native_encode_expr EXPR into a temporary buffer and shift each
f663d9ad
KT
1971 byte in the buffer by 'bp' (carrying the bits over as necessary).
1972 |00000000|00xxxxxx|xxxxxxxx| << bp = |000xxxxx|xxxxxxxx|xxx00000|
1973 <------bitlen---->< bp>
1974 Then we clear the destination bits:
1975 |---00000|00000000|000-----| ptr + first_byte
1976 <-------bitlen--->< bp>
1977
1978 Finally we ORR the bytes of the shifted EXPR into the cleared region:
1979 |---xxxxx||xxxxxxxx||xxx-----| ptr + first_byte.
1980
1981 BIG-ENDIAN
1982 We are writing a non byte-sized quantity or at a position that is not
1983 at a byte boundary.
1984 ptr + first_byte |--------|--------|--------|
1985 ^ ^
1986 <bp >xxx xxxxxxxx xxx
1987 |_____EXPR_____|
1988
46a61395 1989 First native_encode_expr EXPR into a temporary buffer and shift each
f663d9ad
KT
1990 byte in the buffer to the right by (carrying the bits over as necessary).
1991 We shift by as much as needed to align the most significant bit of EXPR
1992 with bitpos:
1993 |00xxxxxx|xxxxxxxx| >> 3 = |00000xxx|xxxxxxxx|xxxxx000|
1994 <---bitlen----> <bp ><-----bitlen----->
1995 Then we clear the destination bits:
1996 ptr + first_byte |-----000||00000000||00000---|
1997 <bp ><-------bitlen----->
1998
1999 Finally we ORR the bytes of the shifted EXPR into the cleared region:
2000 ptr + first_byte |---xxxxx||xxxxxxxx||xxx-----|.
2001 The awkwardness comes from the fact that bitpos is counted from the
2002 most significant bit of a byte. */
2003
ef1d3b57
RS
2004 /* We must be dealing with fixed-size data at this point, since the
2005 total size is also fixed. */
3afd514b
JJ
2006 unsigned int byte_size;
2007 if (empty_ctor_p)
2008 {
2009 unsigned HOST_WIDE_INT rhs_bytes
2010 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)));
2011 if (rhs_bytes > total_bytes)
2012 return false;
2013 byte_size = rhs_bytes;
2014 }
2015 else
2016 {
2017 fixed_size_mode mode
2018 = as_a <fixed_size_mode> (TYPE_MODE (TREE_TYPE (expr)));
e362a897
EB
2019 byte_size
2020 = mode == BLKmode
2021 ? tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)))
2022 : GET_MODE_SIZE (mode);
3afd514b 2023 }
f663d9ad 2024 /* Allocate an extra byte so that we have space to shift into. */
3afd514b 2025 byte_size++;
f663d9ad 2026 unsigned char *tmpbuf = XALLOCAVEC (unsigned char, byte_size);
46a61395 2027 memset (tmpbuf, '\0', byte_size);
f663d9ad 2028 /* The store detection code should only have allowed constants that are
3afd514b
JJ
2029 accepted by native_encode_expr or empty ctors. */
2030 if (!empty_ctor_p
2031 && native_encode_expr (expr, tmpbuf, byte_size - 1) == 0)
f663d9ad
KT
2032 gcc_unreachable ();
2033
2034 /* The native_encode_expr machinery uses TYPE_MODE to determine how many
2035 bytes to write. This means it can write more than
2036 ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT bytes (for example
2037 write 8 bytes for a bitlen of 40). Skip the bytes that are not within
2038 bitlen and zero out the bits that are not relevant as well (that may
2039 contain a sign bit due to sign-extension). */
2040 unsigned int padding
2041 = byte_size - ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT - 1;
ad1de652
JJ
2042 /* On big-endian the padding is at the 'front' so just skip the initial
2043 bytes. */
2044 if (BYTES_BIG_ENDIAN)
2045 tmpbuf += padding;
2046
2047 byte_size -= padding;
2048
2049 if (bitlen % BITS_PER_UNIT != 0)
f663d9ad 2050 {
4b2c06f4 2051 if (BYTES_BIG_ENDIAN)
ad1de652
JJ
2052 clear_bit_region_be (tmpbuf, BITS_PER_UNIT - 1,
2053 BITS_PER_UNIT - (bitlen % BITS_PER_UNIT));
2054 else
2055 clear_bit_region (tmpbuf, bitlen,
2056 byte_size * BITS_PER_UNIT - bitlen);
f663d9ad 2057 }
ad1de652
JJ
2058 /* Left shifting relies on the last byte being clear if bitlen is
2059 a multiple of BITS_PER_UNIT, which might not be clear if
2060 there are padding bytes. */
2061 else if (!BYTES_BIG_ENDIAN)
2062 tmpbuf[byte_size - 1] = '\0';
f663d9ad
KT
2063
2064 /* Clear the bit region in PTR where the bits from TMPBUF will be
46a61395 2065 inserted into. */
f663d9ad
KT
2066 if (BYTES_BIG_ENDIAN)
2067 clear_bit_region_be (ptr + first_byte,
2068 BITS_PER_UNIT - 1 - (bitpos % BITS_PER_UNIT), bitlen);
2069 else
2070 clear_bit_region (ptr + first_byte, bitpos % BITS_PER_UNIT, bitlen);
2071
2072 int shift_amnt;
2073 int bitlen_mod = bitlen % BITS_PER_UNIT;
2074 int bitpos_mod = bitpos % BITS_PER_UNIT;
2075
2076 bool skip_byte = false;
2077 if (BYTES_BIG_ENDIAN)
2078 {
2079 /* BITPOS and BITLEN are exactly aligned and no shifting
2080 is necessary. */
2081 if (bitpos_mod + bitlen_mod == BITS_PER_UNIT
2082 || (bitpos_mod == 0 && bitlen_mod == 0))
2083 shift_amnt = 0;
2084 /* |. . . . . . . .|
2085 <bp > <blen >.
2086 We always shift right for BYTES_BIG_ENDIAN so shift the beginning
2087 of the value until it aligns with 'bp' in the next byte over. */
2088 else if (bitpos_mod + bitlen_mod < BITS_PER_UNIT)
2089 {
2090 shift_amnt = bitlen_mod + bitpos_mod;
2091 skip_byte = bitlen_mod != 0;
2092 }
2093 /* |. . . . . . . .|
2094 <----bp--->
2095 <---blen---->.
2096 Shift the value right within the same byte so it aligns with 'bp'. */
2097 else
2098 shift_amnt = bitlen_mod + bitpos_mod - BITS_PER_UNIT;
2099 }
2100 else
2101 shift_amnt = bitpos % BITS_PER_UNIT;
2102
2103 /* Create the shifted version of EXPR. */
2104 if (!BYTES_BIG_ENDIAN)
46a61395 2105 {
8aba425f 2106 shift_bytes_in_array_left (tmpbuf, byte_size, shift_amnt);
46a61395
JJ
2107 if (shift_amnt == 0)
2108 byte_size--;
2109 }
f663d9ad
KT
2110 else
2111 {
2112 gcc_assert (BYTES_BIG_ENDIAN);
2113 shift_bytes_in_array_right (tmpbuf, byte_size, shift_amnt);
2114 /* If shifting right forced us to move into the next byte skip the now
2115 empty byte. */
2116 if (skip_byte)
2117 {
2118 tmpbuf++;
2119 byte_size--;
2120 }
2121 }
2122
2123 /* Insert the bits from TMPBUF. */
2124 for (unsigned int i = 0; i < byte_size; i++)
2125 ptr[first_byte + i] |= tmpbuf[i];
2126
2127 return true;
2128}
2129
2130/* Sorting function for store_immediate_info objects.
2131 Sorts them by bitposition. */
2132
2133static int
2134sort_by_bitpos (const void *x, const void *y)
2135{
2136 store_immediate_info *const *tmp = (store_immediate_info * const *) x;
2137 store_immediate_info *const *tmp2 = (store_immediate_info * const *) y;
2138
109cca3b 2139 if ((*tmp)->bitpos < (*tmp2)->bitpos)
f663d9ad
KT
2140 return -1;
2141 else if ((*tmp)->bitpos > (*tmp2)->bitpos)
2142 return 1;
109cca3b 2143 else
0f0027d1
KT
2144 /* If they are the same let's use the order which is guaranteed to
2145 be different. */
2146 return (*tmp)->order - (*tmp2)->order;
f663d9ad
KT
2147}
2148
2149/* Sorting function for store_immediate_info objects.
2150 Sorts them by the order field. */
2151
2152static int
2153sort_by_order (const void *x, const void *y)
2154{
2155 store_immediate_info *const *tmp = (store_immediate_info * const *) x;
2156 store_immediate_info *const *tmp2 = (store_immediate_info * const *) y;
2157
2158 if ((*tmp)->order < (*tmp2)->order)
2159 return -1;
2160 else if ((*tmp)->order > (*tmp2)->order)
2161 return 1;
2162
2163 gcc_unreachable ();
2164}
2165
2166/* Initialize a merged_store_group object from a store_immediate_info
2167 object. */
2168
2169merged_store_group::merged_store_group (store_immediate_info *info)
2170{
2171 start = info->bitpos;
2172 width = info->bitsize;
a62b3dc5
JJ
2173 bitregion_start = info->bitregion_start;
2174 bitregion_end = info->bitregion_end;
f663d9ad
KT
2175 /* VAL has memory allocated for it in apply_stores once the group
2176 width has been finalized. */
2177 val = NULL;
a62b3dc5 2178 mask = NULL;
e362a897
EB
2179 bit_insertion = info->rhs_code == BIT_INSERT_EXPR;
2180 string_concatenation = info->rhs_code == STRING_CST;
18e0c3d1 2181 only_constants = info->rhs_code == INTEGER_CST;
1b3c9813 2182 consecutive = true;
18e0c3d1 2183 first_nonmergeable_order = ~0U;
629387a6 2184 lp_nr = info->lp_nr;
a62b3dc5
JJ
2185 unsigned HOST_WIDE_INT align_bitpos = 0;
2186 get_object_alignment_1 (gimple_assign_lhs (info->stmt),
2187 &align, &align_bitpos);
2188 align_base = start - align_bitpos;
245f6de1
JJ
2189 for (int i = 0; i < 2; ++i)
2190 {
2191 store_operand_info &op = info->ops[i];
2192 if (op.base_addr == NULL_TREE)
2193 {
2194 load_align[i] = 0;
2195 load_align_base[i] = 0;
2196 }
2197 else
2198 {
2199 get_object_alignment_1 (op.val, &load_align[i], &align_bitpos);
2200 load_align_base[i] = op.bitpos - align_bitpos;
2201 }
2202 }
f663d9ad
KT
2203 stores.create (1);
2204 stores.safe_push (info);
2205 last_stmt = info->stmt;
2206 last_order = info->order;
2207 first_stmt = last_stmt;
2208 first_order = last_order;
2209 buf_size = 0;
2210}
2211
2212merged_store_group::~merged_store_group ()
2213{
2214 if (val)
2215 XDELETEVEC (val);
2216}
2217
7f5a3982
EB
2218/* Return true if the store described by INFO can be merged into the group. */
2219
2220bool
2221merged_store_group::can_be_merged_into (store_immediate_info *info)
2222{
2223 /* Do not merge bswap patterns. */
2224 if (info->rhs_code == LROTATE_EXPR)
2225 return false;
2226
629387a6
EB
2227 if (info->lp_nr != lp_nr)
2228 return false;
2229
7f5a3982
EB
2230 /* The canonical case. */
2231 if (info->rhs_code == stores[0]->rhs_code)
2232 return true;
2233
e362a897 2234 /* BIT_INSERT_EXPR is compatible with INTEGER_CST if no STRING_CST. */
7f5a3982 2235 if (info->rhs_code == BIT_INSERT_EXPR && stores[0]->rhs_code == INTEGER_CST)
e362a897 2236 return !string_concatenation;
7f5a3982
EB
2237
2238 if (stores[0]->rhs_code == BIT_INSERT_EXPR && info->rhs_code == INTEGER_CST)
e362a897 2239 return !string_concatenation;
7f5a3982 2240
ed01d707
EB
2241 /* We can turn MEM_REF into BIT_INSERT_EXPR for bit-field stores, but do it
2242 only for small regions since this can generate a lot of instructions. */
7f5a3982
EB
2243 if (info->rhs_code == MEM_REF
2244 && (stores[0]->rhs_code == INTEGER_CST
2245 || stores[0]->rhs_code == BIT_INSERT_EXPR)
2246 && info->bitregion_start == stores[0]->bitregion_start
ed01d707 2247 && info->bitregion_end == stores[0]->bitregion_end
2815558a 2248 && info->bitregion_end - info->bitregion_start <= MAX_FIXED_MODE_SIZE)
e362a897 2249 return !string_concatenation;
7f5a3982
EB
2250
2251 if (stores[0]->rhs_code == MEM_REF
2252 && (info->rhs_code == INTEGER_CST
2253 || info->rhs_code == BIT_INSERT_EXPR)
2254 && info->bitregion_start == stores[0]->bitregion_start
ed01d707 2255 && info->bitregion_end == stores[0]->bitregion_end
2815558a 2256 && info->bitregion_end - info->bitregion_start <= MAX_FIXED_MODE_SIZE)
e362a897
EB
2257 return !string_concatenation;
2258
2259 /* STRING_CST is compatible with INTEGER_CST if no BIT_INSERT_EXPR. */
2260 if (info->rhs_code == STRING_CST
2261 && stores[0]->rhs_code == INTEGER_CST
2262 && stores[0]->bitsize == CHAR_BIT)
2263 return !bit_insertion;
2264
2265 if (stores[0]->rhs_code == STRING_CST
2266 && info->rhs_code == INTEGER_CST
2267 && info->bitsize == CHAR_BIT)
2268 return !bit_insertion;
7f5a3982
EB
2269
2270 return false;
2271}
2272
a62b3dc5
JJ
2273/* Helper method for merge_into and merge_overlapping to do
2274 the common part. */
7f5a3982 2275
f663d9ad 2276void
a62b3dc5 2277merged_store_group::do_merge (store_immediate_info *info)
f663d9ad 2278{
a62b3dc5
JJ
2279 bitregion_start = MIN (bitregion_start, info->bitregion_start);
2280 bitregion_end = MAX (bitregion_end, info->bitregion_end);
2281
2282 unsigned int this_align;
2283 unsigned HOST_WIDE_INT align_bitpos = 0;
2284 get_object_alignment_1 (gimple_assign_lhs (info->stmt),
2285 &this_align, &align_bitpos);
2286 if (this_align > align)
2287 {
2288 align = this_align;
2289 align_base = info->bitpos - align_bitpos;
2290 }
245f6de1
JJ
2291 for (int i = 0; i < 2; ++i)
2292 {
2293 store_operand_info &op = info->ops[i];
2294 if (!op.base_addr)
2295 continue;
2296
2297 get_object_alignment_1 (op.val, &this_align, &align_bitpos);
2298 if (this_align > load_align[i])
2299 {
2300 load_align[i] = this_align;
2301 load_align_base[i] = op.bitpos - align_bitpos;
2302 }
2303 }
f663d9ad 2304
f663d9ad
KT
2305 gimple *stmt = info->stmt;
2306 stores.safe_push (info);
2307 if (info->order > last_order)
2308 {
2309 last_order = info->order;
2310 last_stmt = stmt;
2311 }
2312 else if (info->order < first_order)
2313 {
2314 first_order = info->order;
2315 first_stmt = stmt;
2316 }
e362a897 2317
1b3c9813
EB
2318 if (info->bitpos != start + width)
2319 consecutive = false;
2320
e362a897
EB
2321 /* We need to use extraction if there is any bit-field. */
2322 if (info->rhs_code == BIT_INSERT_EXPR)
2323 {
2324 bit_insertion = true;
2325 gcc_assert (!string_concatenation);
2326 }
2327
1b3c9813 2328 /* We want to use concatenation if there is any string. */
e362a897
EB
2329 if (info->rhs_code == STRING_CST)
2330 {
2331 string_concatenation = true;
2332 gcc_assert (!bit_insertion);
2333 }
2334
1b3c9813
EB
2335 /* But we cannot use it if we don't have consecutive stores. */
2336 if (!consecutive)
2337 string_concatenation = false;
2338
18e0c3d1
JJ
2339 if (info->rhs_code != INTEGER_CST)
2340 only_constants = false;
f663d9ad
KT
2341}
2342
a62b3dc5
JJ
2343/* Merge a store recorded by INFO into this merged store.
2344 The store is not overlapping with the existing recorded
2345 stores. */
2346
2347void
2348merged_store_group::merge_into (store_immediate_info *info)
2349{
1b3c9813
EB
2350 do_merge (info);
2351
a62b3dc5
JJ
2352 /* Make sure we're inserting in the position we think we're inserting. */
2353 gcc_assert (info->bitpos >= start + width
2354 && info->bitregion_start <= bitregion_end);
2355
c5679c37 2356 width = info->bitpos + info->bitsize - start;
a62b3dc5
JJ
2357}
2358
f663d9ad
KT
2359/* Merge a store described by INFO into this merged store.
2360 INFO overlaps in some way with the current store (i.e. it's not contiguous
2361 which is handled by merged_store_group::merge_into). */
2362
2363void
2364merged_store_group::merge_overlapping (store_immediate_info *info)
2365{
1b3c9813
EB
2366 do_merge (info);
2367
f663d9ad 2368 /* If the store extends the size of the group, extend the width. */
a62b3dc5 2369 if (info->bitpos + info->bitsize > start + width)
c5679c37 2370 width = info->bitpos + info->bitsize - start;
f663d9ad
KT
2371}
2372
2373/* Go through all the recorded stores in this group in program order and
2374 apply their values to the VAL byte array to create the final merged
2375 value. Return true if the operation succeeded. */
2376
2377bool
2378merged_store_group::apply_stores ()
2379{
e362a897
EB
2380 store_immediate_info *info;
2381 unsigned int i;
2382
a62b3dc5
JJ
2383 /* Make sure we have more than one store in the group, otherwise we cannot
2384 merge anything. */
2385 if (bitregion_start % BITS_PER_UNIT != 0
2386 || bitregion_end % BITS_PER_UNIT != 0
f663d9ad
KT
2387 || stores.length () == 1)
2388 return false;
2389
e362a897
EB
2390 buf_size = (bitregion_end - bitregion_start) / BITS_PER_UNIT;
2391
2392 /* Really do string concatenation for large strings only. */
2393 if (buf_size <= MOVE_MAX)
2394 string_concatenation = false;
2395
617be7ba
JJ
2396 /* String concatenation only works for byte aligned start and end. */
2397 if (start % BITS_PER_UNIT != 0 || width % BITS_PER_UNIT != 0)
2398 string_concatenation = false;
2399
c94c3532 2400 /* Create a power-of-2-sized buffer for native_encode_expr. */
e362a897
EB
2401 if (!string_concatenation)
2402 buf_size = 1 << ceil_log2 (buf_size);
2403
a62b3dc5
JJ
2404 val = XNEWVEC (unsigned char, 2 * buf_size);
2405 mask = val + buf_size;
2406 memset (val, 0, buf_size);
2407 memset (mask, ~0U, buf_size);
f663d9ad 2408
e362a897
EB
2409 stores.qsort (sort_by_order);
2410
f663d9ad
KT
2411 FOR_EACH_VEC_ELT (stores, i, info)
2412 {
a62b3dc5 2413 unsigned int pos_in_buffer = info->bitpos - bitregion_start;
c94c3532 2414 tree cst;
245f6de1
JJ
2415 if (info->ops[0].val && info->ops[0].base_addr == NULL_TREE)
2416 cst = info->ops[0].val;
2417 else if (info->ops[1].val && info->ops[1].base_addr == NULL_TREE)
2418 cst = info->ops[1].val;
c94c3532
EB
2419 else
2420 cst = NULL_TREE;
245f6de1 2421 bool ret = true;
e362a897
EB
2422 if (cst && info->rhs_code != BIT_INSERT_EXPR)
2423 ret = encode_tree_to_bitpos (cst, val, info->bitsize, pos_in_buffer,
2424 buf_size);
c94c3532
EB
2425 unsigned char *m = mask + (pos_in_buffer / BITS_PER_UNIT);
2426 if (BYTES_BIG_ENDIAN)
2427 clear_bit_region_be (m, (BITS_PER_UNIT - 1
2428 - (pos_in_buffer % BITS_PER_UNIT)),
2429 info->bitsize);
2430 else
2431 clear_bit_region (m, pos_in_buffer % BITS_PER_UNIT, info->bitsize);
245f6de1 2432 if (cst && dump_file && (dump_flags & TDF_DETAILS))
f663d9ad
KT
2433 {
2434 if (ret)
2435 {
c94c3532 2436 fputs ("After writing ", dump_file);
4af78ef8 2437 print_generic_expr (dump_file, cst, TDF_NONE);
f663d9ad 2438 fprintf (dump_file, " of size " HOST_WIDE_INT_PRINT_DEC
c94c3532
EB
2439 " at position %d\n", info->bitsize, pos_in_buffer);
2440 fputs (" the merged value contains ", dump_file);
f663d9ad 2441 dump_char_array (dump_file, val, buf_size);
c94c3532
EB
2442 fputs (" the merged mask contains ", dump_file);
2443 dump_char_array (dump_file, mask, buf_size);
2444 if (bit_insertion)
2445 fputs (" bit insertion is required\n", dump_file);
e362a897
EB
2446 if (string_concatenation)
2447 fputs (" string concatenation is required\n", dump_file);
f663d9ad
KT
2448 }
2449 else
2450 fprintf (dump_file, "Failed to merge stores\n");
4b84d9b8 2451 }
f663d9ad
KT
2452 if (!ret)
2453 return false;
2454 }
4b84d9b8 2455 stores.qsort (sort_by_bitpos);
f663d9ad
KT
2456 return true;
2457}
2458
2459/* Structure describing the store chain. */
2460
6c1dae73 2461class imm_store_chain_info
f663d9ad 2462{
6c1dae73 2463public:
50b6d676
AO
2464 /* Doubly-linked list that imposes an order on chain processing.
2465 PNXP (prev's next pointer) points to the head of a list, or to
2466 the next field in the previous chain in the list.
2467 See pass_store_merging::m_stores_head for more rationale. */
2468 imm_store_chain_info *next, **pnxp;
b5926e23 2469 tree base_addr;
a62b3dc5 2470 auto_vec<store_immediate_info *> m_store_info;
f663d9ad
KT
2471 auto_vec<merged_store_group *> m_merged_store_groups;
2472
50b6d676
AO
2473 imm_store_chain_info (imm_store_chain_info *&inspt, tree b_a)
2474 : next (inspt), pnxp (&inspt), base_addr (b_a)
2475 {
2476 inspt = this;
2477 if (next)
2478 {
2479 gcc_checking_assert (pnxp == next->pnxp);
2480 next->pnxp = &next;
2481 }
2482 }
2483 ~imm_store_chain_info ()
2484 {
2485 *pnxp = next;
2486 if (next)
2487 {
2488 gcc_checking_assert (&next == next->pnxp);
2489 next->pnxp = pnxp;
2490 }
2491 }
b5926e23 2492 bool terminate_and_process_chain ();
bd909071
JJ
2493 bool try_coalesce_bswap (merged_store_group *, unsigned int, unsigned int,
2494 unsigned int);
f663d9ad 2495 bool coalesce_immediate_stores ();
b5926e23
RB
2496 bool output_merged_store (merged_store_group *);
2497 bool output_merged_stores ();
f663d9ad
KT
2498};
2499
2500const pass_data pass_data_tree_store_merging = {
2501 GIMPLE_PASS, /* type */
2502 "store-merging", /* name */
2503 OPTGROUP_NONE, /* optinfo_flags */
2504 TV_GIMPLE_STORE_MERGING, /* tv_id */
2505 PROP_ssa, /* properties_required */
2506 0, /* properties_provided */
2507 0, /* properties_destroyed */
2508 0, /* todo_flags_start */
2509 TODO_update_ssa, /* todo_flags_finish */
2510};
2511
2512class pass_store_merging : public gimple_opt_pass
2513{
2514public:
2515 pass_store_merging (gcc::context *ctxt)
95d94b52
RB
2516 : gimple_opt_pass (pass_data_tree_store_merging, ctxt), m_stores_head (),
2517 m_n_chains (0), m_n_stores (0)
f663d9ad
KT
2518 {
2519 }
2520
c94c3532
EB
2521 /* Pass not supported for PDP-endian, nor for insane hosts or
2522 target character sizes where native_{encode,interpret}_expr
a62b3dc5 2523 doesn't work properly. */
725793af
DM
2524 bool
2525 gate (function *) final override
f663d9ad 2526 {
a62b3dc5 2527 return flag_store_merging
c94c3532 2528 && BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN
a62b3dc5
JJ
2529 && CHAR_BIT == 8
2530 && BITS_PER_UNIT == 8;
f663d9ad
KT
2531 }
2532
725793af 2533 unsigned int execute (function *) final override;
f663d9ad
KT
2534
2535private:
99b1c316 2536 hash_map<tree_operand_hash, class imm_store_chain_info *> m_stores;
f663d9ad 2537
50b6d676
AO
2538 /* Form a doubly-linked stack of the elements of m_stores, so that
2539 we can iterate over them in a predictable way. Using this order
2540 avoids extraneous differences in the compiler output just because
2541 of tree pointer variations (e.g. different chains end up in
2542 different positions of m_stores, so they are handled in different
2543 orders, so they allocate or release SSA names in different
2544 orders, and when they get reused, subsequent passes end up
2545 getting different SSA names, which may ultimately change
2546 decisions when going out of SSA). */
2547 imm_store_chain_info *m_stores_head;
2548
95d94b52
RB
2549 /* The number of store chains currently tracked. */
2550 unsigned m_n_chains;
2551 /* The number of stores currently tracked. */
2552 unsigned m_n_stores;
2553
629387a6
EB
2554 bool process_store (gimple *);
2555 bool terminate_and_process_chain (imm_store_chain_info *);
383ac8dc 2556 bool terminate_all_aliasing_chains (imm_store_chain_info **, gimple *);
629387a6 2557 bool terminate_and_process_all_chains ();
f663d9ad
KT
2558}; // class pass_store_merging
2559
2560/* Terminate and process all recorded chains. Return true if any changes
2561 were made. */
2562
2563bool
2564pass_store_merging::terminate_and_process_all_chains ()
2565{
f663d9ad 2566 bool ret = false;
50b6d676 2567 while (m_stores_head)
629387a6 2568 ret |= terminate_and_process_chain (m_stores_head);
b119c055 2569 gcc_assert (m_stores.is_empty ());
f663d9ad
KT
2570 return ret;
2571}
2572
383ac8dc
JJ
2573/* Terminate all chains that are affected by the statement STMT.
2574 CHAIN_INFO is the chain we should ignore from the checks if
629387a6 2575 non-NULL. Return true if any changes were made. */
f663d9ad
KT
2576
2577bool
20770eb8 2578pass_store_merging::terminate_all_aliasing_chains (imm_store_chain_info
b5926e23 2579 **chain_info,
f663d9ad
KT
2580 gimple *stmt)
2581{
2582 bool ret = false;
2583
2584 /* If the statement doesn't touch memory it can't alias. */
2585 if (!gimple_vuse (stmt))
2586 return false;
2587
9e875fd8 2588 tree store_lhs = gimple_store_p (stmt) ? gimple_get_lhs (stmt) : NULL_TREE;
6b412bf6
RB
2589 ao_ref store_lhs_ref;
2590 ao_ref_init (&store_lhs_ref, store_lhs);
383ac8dc 2591 for (imm_store_chain_info *next = m_stores_head, *cur = next; cur; cur = next)
f663d9ad 2592 {
383ac8dc
JJ
2593 next = cur->next;
2594
2595 /* We already checked all the stores in chain_info and terminated the
2596 chain if necessary. Skip it here. */
2597 if (chain_info && *chain_info == cur)
2598 continue;
2599
245f6de1
JJ
2600 store_immediate_info *info;
2601 unsigned int i;
383ac8dc 2602 FOR_EACH_VEC_ELT (cur->m_store_info, i, info)
f663d9ad 2603 {
9e875fd8 2604 tree lhs = gimple_assign_lhs (info->stmt);
6b412bf6
RB
2605 ao_ref lhs_ref;
2606 ao_ref_init (&lhs_ref, lhs);
2607 if (ref_maybe_used_by_stmt_p (stmt, &lhs_ref)
2608 || stmt_may_clobber_ref_p_1 (stmt, &lhs_ref)
2609 || (store_lhs && refs_may_alias_p_1 (&store_lhs_ref,
2610 &lhs_ref, false)))
f663d9ad 2611 {
245f6de1 2612 if (dump_file && (dump_flags & TDF_DETAILS))
f663d9ad 2613 {
245f6de1
JJ
2614 fprintf (dump_file, "stmt causes chain termination:\n");
2615 print_gimple_stmt (dump_file, stmt, 0);
f663d9ad 2616 }
629387a6 2617 ret |= terminate_and_process_chain (cur);
245f6de1 2618 break;
f663d9ad
KT
2619 }
2620 }
2621 }
2622
f663d9ad
KT
2623 return ret;
2624}
2625
2626/* Helper function. Terminate the recorded chain storing to base object
2627 BASE. Return true if the merging and output was successful. The m_stores
2628 entry is removed after the processing in any case. */
2629
2630bool
629387a6 2631pass_store_merging::terminate_and_process_chain (imm_store_chain_info *chain_info)
f663d9ad 2632{
95d94b52
RB
2633 m_n_stores -= chain_info->m_store_info.length ();
2634 m_n_chains--;
b5926e23
RB
2635 bool ret = chain_info->terminate_and_process_chain ();
2636 m_stores.remove (chain_info->base_addr);
2637 delete chain_info;
f663d9ad
KT
2638 return ret;
2639}
2640
245f6de1 2641/* Return true if stmts in between FIRST (inclusive) and LAST (exclusive)
629387a6
EB
2642 may clobber REF. FIRST and LAST must have non-NULL vdef. We want to
2643 be able to sink load of REF across stores between FIRST and LAST, up
2644 to right before LAST. */
245f6de1
JJ
2645
2646bool
2647stmts_may_clobber_ref_p (gimple *first, gimple *last, tree ref)
2648{
2649 ao_ref r;
2650 ao_ref_init (&r, ref);
2651 unsigned int count = 0;
2652 tree vop = gimple_vdef (last);
2653 gimple *stmt;
2654
629387a6
EB
2655 /* Return true conservatively if the basic blocks are different. */
2656 if (gimple_bb (first) != gimple_bb (last))
2657 return true;
2658
245f6de1
JJ
2659 do
2660 {
2661 stmt = SSA_NAME_DEF_STMT (vop);
2662 if (stmt_may_clobber_ref_p_1 (stmt, &r))
2663 return true;
4b84d9b8
JJ
2664 if (gimple_store_p (stmt)
2665 && refs_anti_dependent_p (ref, gimple_get_lhs (stmt)))
2666 return true;
245f6de1
JJ
2667 /* Avoid quadratic compile time by bounding the number of checks
2668 we perform. */
2669 if (++count > MAX_STORE_ALIAS_CHECKS)
2670 return true;
2671 vop = gimple_vuse (stmt);
2672 }
2673 while (stmt != first);
629387a6 2674
245f6de1
JJ
2675 return false;
2676}
2677
2678/* Return true if INFO->ops[IDX] is mergeable with the
2679 corresponding loads already in MERGED_STORE group.
2680 BASE_ADDR is the base address of the whole store group. */
2681
2682bool
2683compatible_load_p (merged_store_group *merged_store,
2684 store_immediate_info *info,
2685 tree base_addr, int idx)
2686{
2687 store_immediate_info *infof = merged_store->stores[0];
2688 if (!info->ops[idx].base_addr
8a91d545
RS
2689 || maybe_ne (info->ops[idx].bitpos - infof->ops[idx].bitpos,
2690 info->bitpos - infof->bitpos)
245f6de1
JJ
2691 || !operand_equal_p (info->ops[idx].base_addr,
2692 infof->ops[idx].base_addr, 0))
2693 return false;
2694
2695 store_immediate_info *infol = merged_store->stores.last ();
2696 tree load_vuse = gimple_vuse (info->ops[idx].stmt);
2697 /* In this case all vuses should be the same, e.g.
2698 _1 = s.a; _2 = s.b; _3 = _1 | 1; t.a = _3; _4 = _2 | 2; t.b = _4;
2699 or
2700 _1 = s.a; _2 = s.b; t.a = _1; t.b = _2;
2701 and we can emit the coalesced load next to any of those loads. */
2702 if (gimple_vuse (infof->ops[idx].stmt) == load_vuse
2703 && gimple_vuse (infol->ops[idx].stmt) == load_vuse)
2704 return true;
2705
2706 /* Otherwise, at least for now require that the load has the same
2707 vuse as the store. See following examples. */
2708 if (gimple_vuse (info->stmt) != load_vuse)
2709 return false;
2710
2711 if (gimple_vuse (infof->stmt) != gimple_vuse (infof->ops[idx].stmt)
2712 || (infof != infol
2713 && gimple_vuse (infol->stmt) != gimple_vuse (infol->ops[idx].stmt)))
2714 return false;
2715
2716 /* If the load is from the same location as the store, already
2717 the construction of the immediate chain info guarantees no intervening
2718 stores, so no further checks are needed. Example:
2719 _1 = s.a; _2 = _1 & -7; s.a = _2; _3 = s.b; _4 = _3 & -7; s.b = _4; */
8a91d545 2720 if (known_eq (info->ops[idx].bitpos, info->bitpos)
245f6de1
JJ
2721 && operand_equal_p (info->ops[idx].base_addr, base_addr, 0))
2722 return true;
2723
2724 /* Otherwise, we need to punt if any of the loads can be clobbered by any
2725 of the stores in the group, or any other stores in between those.
2726 Previous calls to compatible_load_p ensured that for all the
2727 merged_store->stores IDX loads, no stmts starting with
2728 merged_store->first_stmt and ending right before merged_store->last_stmt
2729 clobbers those loads. */
2730 gimple *first = merged_store->first_stmt;
2731 gimple *last = merged_store->last_stmt;
245f6de1
JJ
2732 /* The stores are sorted by increasing store bitpos, so if info->stmt store
2733 comes before the so far first load, we'll be changing
2734 merged_store->first_stmt. In that case we need to give up if
2735 any of the earlier processed loads clobber with the stmts in the new
2736 range. */
2737 if (info->order < merged_store->first_order)
2738 {
3f207ab3 2739 for (store_immediate_info *infoc : merged_store->stores)
245f6de1
JJ
2740 if (stmts_may_clobber_ref_p (info->stmt, first, infoc->ops[idx].val))
2741 return false;
2742 first = info->stmt;
2743 }
2744 /* Similarly, we could change merged_store->last_stmt, so ensure
2745 in that case no stmts in the new range clobber any of the earlier
2746 processed loads. */
2747 else if (info->order > merged_store->last_order)
2748 {
3f207ab3 2749 for (store_immediate_info *infoc : merged_store->stores)
245f6de1
JJ
2750 if (stmts_may_clobber_ref_p (last, info->stmt, infoc->ops[idx].val))
2751 return false;
2752 last = info->stmt;
2753 }
2754 /* And finally, we'd be adding a new load to the set, ensure it isn't
2755 clobbered in the new range. */
2756 if (stmts_may_clobber_ref_p (first, last, info->ops[idx].val))
2757 return false;
2758
2759 /* Otherwise, we are looking for:
2760 _1 = s.a; _2 = _1 ^ 15; t.a = _2; _3 = s.b; _4 = _3 ^ 15; t.b = _4;
2761 or
2762 _1 = s.a; t.a = _1; _2 = s.b; t.b = _2; */
2763 return true;
2764}
2765
4b84d9b8
JJ
2766/* Add all refs loaded to compute VAL to REFS vector. */
2767
2768void
2769gather_bswap_load_refs (vec<tree> *refs, tree val)
2770{
2771 if (TREE_CODE (val) != SSA_NAME)
2772 return;
2773
2774 gimple *stmt = SSA_NAME_DEF_STMT (val);
2775 if (!is_gimple_assign (stmt))
2776 return;
2777
2778 if (gimple_assign_load_p (stmt))
2779 {
2780 refs->safe_push (gimple_assign_rhs1 (stmt));
2781 return;
2782 }
2783
2784 switch (gimple_assign_rhs_class (stmt))
2785 {
2786 case GIMPLE_BINARY_RHS:
2787 gather_bswap_load_refs (refs, gimple_assign_rhs2 (stmt));
2788 /* FALLTHRU */
2789 case GIMPLE_UNARY_RHS:
2790 gather_bswap_load_refs (refs, gimple_assign_rhs1 (stmt));
2791 break;
2792 default:
2793 gcc_unreachable ();
2794 }
2795}
2796
c5679c37
JJ
2797/* Check if there are any stores in M_STORE_INFO after index I
2798 (where M_STORE_INFO must be sorted by sort_by_bitpos) that overlap
2799 a potential group ending with END that have their order
4d213bf6
JJ
2800 smaller than LAST_ORDER. ALL_INTEGER_CST_P is true if
2801 all the stores already merged and the one under consideration
2802 have rhs_code of INTEGER_CST. Return true if there are no such stores.
c5679c37
JJ
2803 Consider:
2804 MEM[(long long int *)p_28] = 0;
2805 MEM[(long long int *)p_28 + 8B] = 0;
2806 MEM[(long long int *)p_28 + 16B] = 0;
2807 MEM[(long long int *)p_28 + 24B] = 0;
2808 _129 = (int) _130;
2809 MEM[(int *)p_28 + 8B] = _129;
2810 MEM[(int *)p_28].a = -1;
2811 We already have
2812 MEM[(long long int *)p_28] = 0;
2813 MEM[(int *)p_28].a = -1;
2814 stmts in the current group and need to consider if it is safe to
2815 add MEM[(long long int *)p_28 + 8B] = 0; store into the same group.
2816 There is an overlap between that store and the MEM[(int *)p_28 + 8B] = _129;
2817 store though, so if we add the MEM[(long long int *)p_28 + 8B] = 0;
2818 into the group and merging of those 3 stores is successful, merged
2819 stmts will be emitted at the latest store from that group, i.e.
2820 LAST_ORDER, which is the MEM[(int *)p_28].a = -1; store.
2821 The MEM[(int *)p_28 + 8B] = _129; store that originally follows
2822 the MEM[(long long int *)p_28 + 8B] = 0; would now be before it,
2823 so we need to refuse merging MEM[(long long int *)p_28 + 8B] = 0;
2824 into the group. That way it will be its own store group and will
4d213bf6 2825 not be touched. If ALL_INTEGER_CST_P and there are overlapping
c5679c37 2826 INTEGER_CST stores, those are mergeable using merge_overlapping,
bd909071
JJ
2827 so don't return false for those.
2828
2829 Similarly, check stores from FIRST_EARLIER (inclusive) to END_EARLIER
2830 (exclusive), whether they don't overlap the bitrange START to END
2831 and have order in between FIRST_ORDER and LAST_ORDER. This is to
2832 prevent merging in cases like:
2833 MEM <char[12]> [&b + 8B] = {};
2834 MEM[(short *) &b] = 5;
2835 _5 = *x_4(D);
2836 MEM <long long unsigned int> [&b + 2B] = _5;
2837 MEM[(char *)&b + 16B] = 88;
2838 MEM[(int *)&b + 20B] = 1;
2839 The = {} store comes in sort_by_bitpos before the = 88 store, and can't
2840 be merged with it, because the = _5 store overlaps these and is in between
2841 them in sort_by_order ordering. If it was merged, the merged store would
2842 go after the = _5 store and thus change behavior. */
c5679c37
JJ
2843
2844static bool
00dcc88a
MS
2845check_no_overlap (const vec<store_immediate_info *> &m_store_info,
2846 unsigned int i,
bd909071
JJ
2847 bool all_integer_cst_p, unsigned int first_order,
2848 unsigned int last_order, unsigned HOST_WIDE_INT start,
2849 unsigned HOST_WIDE_INT end, unsigned int first_earlier,
2850 unsigned end_earlier)
c5679c37
JJ
2851{
2852 unsigned int len = m_store_info.length ();
bd909071
JJ
2853 for (unsigned int j = first_earlier; j < end_earlier; j++)
2854 {
2855 store_immediate_info *info = m_store_info[j];
2856 if (info->order > first_order
2857 && info->order < last_order
2858 && info->bitpos + info->bitsize > start)
2859 return false;
2860 }
c5679c37
JJ
2861 for (++i; i < len; ++i)
2862 {
2863 store_immediate_info *info = m_store_info[i];
2864 if (info->bitpos >= end)
2865 break;
2866 if (info->order < last_order
4d213bf6 2867 && (!all_integer_cst_p || info->rhs_code != INTEGER_CST))
c5679c37
JJ
2868 return false;
2869 }
2870 return true;
2871}
2872
4b84d9b8
JJ
2873/* Return true if m_store_info[first] and at least one following store
2874 form a group which store try_size bitsize value which is byte swapped
2875 from a memory load or some value, or identity from some value.
2876 This uses the bswap pass APIs. */
2877
2878bool
2879imm_store_chain_info::try_coalesce_bswap (merged_store_group *merged_store,
2880 unsigned int first,
bd909071
JJ
2881 unsigned int try_size,
2882 unsigned int first_earlier)
4b84d9b8
JJ
2883{
2884 unsigned int len = m_store_info.length (), last = first;
2885 unsigned HOST_WIDE_INT width = m_store_info[first]->bitsize;
2886 if (width >= try_size)
2887 return false;
2888 for (unsigned int i = first + 1; i < len; ++i)
2889 {
2890 if (m_store_info[i]->bitpos != m_store_info[first]->bitpos + width
cb76fcd7 2891 || m_store_info[i]->lp_nr != merged_store->lp_nr
4b84d9b8
JJ
2892 || m_store_info[i]->ins_stmt == NULL)
2893 return false;
2894 width += m_store_info[i]->bitsize;
2895 if (width >= try_size)
2896 {
2897 last = i;
2898 break;
2899 }
2900 }
2901 if (width != try_size)
2902 return false;
2903
2904 bool allow_unaligned
028d4092 2905 = !STRICT_ALIGNMENT && param_store_merging_allow_unaligned;
4b84d9b8
JJ
2906 /* Punt if the combined store would not be aligned and we need alignment. */
2907 if (!allow_unaligned)
2908 {
2909 unsigned int align = merged_store->align;
2910 unsigned HOST_WIDE_INT align_base = merged_store->align_base;
2911 for (unsigned int i = first + 1; i <= last; ++i)
2912 {
2913 unsigned int this_align;
2914 unsigned HOST_WIDE_INT align_bitpos = 0;
2915 get_object_alignment_1 (gimple_assign_lhs (m_store_info[i]->stmt),
2916 &this_align, &align_bitpos);
2917 if (this_align > align)
2918 {
2919 align = this_align;
2920 align_base = m_store_info[i]->bitpos - align_bitpos;
2921 }
2922 }
2923 unsigned HOST_WIDE_INT align_bitpos
2924 = (m_store_info[first]->bitpos - align_base) & (align - 1);
2925 if (align_bitpos)
2926 align = least_bit_hwi (align_bitpos);
2927 if (align < try_size)
2928 return false;
2929 }
2930
2931 tree type;
2932 switch (try_size)
2933 {
2934 case 16: type = uint16_type_node; break;
2935 case 32: type = uint32_type_node; break;
2936 case 64: type = uint64_type_node; break;
2937 default: gcc_unreachable ();
2938 }
2939 struct symbolic_number n;
2940 gimple *ins_stmt = NULL;
2941 int vuse_store = -1;
2942 unsigned int first_order = merged_store->first_order;
2943 unsigned int last_order = merged_store->last_order;
2944 gimple *first_stmt = merged_store->first_stmt;
2945 gimple *last_stmt = merged_store->last_stmt;
c5679c37 2946 unsigned HOST_WIDE_INT end = merged_store->start + merged_store->width;
4b84d9b8
JJ
2947 store_immediate_info *infof = m_store_info[first];
2948
2949 for (unsigned int i = first; i <= last; ++i)
2950 {
2951 store_immediate_info *info = m_store_info[i];
2952 struct symbolic_number this_n = info->n;
2953 this_n.type = type;
2954 if (!this_n.base_addr)
2955 this_n.range = try_size / BITS_PER_UNIT;
30fa8e9c
JJ
2956 else
2957 /* Update vuse in case it has changed by output_merged_stores. */
2958 this_n.vuse = gimple_vuse (info->ins_stmt);
4b84d9b8
JJ
2959 unsigned int bitpos = info->bitpos - infof->bitpos;
2960 if (!do_shift_rotate (LSHIFT_EXPR, &this_n,
2961 BYTES_BIG_ENDIAN
2962 ? try_size - info->bitsize - bitpos
2963 : bitpos))
2964 return false;
aa11164a 2965 if (this_n.base_addr && vuse_store)
4b84d9b8
JJ
2966 {
2967 unsigned int j;
2968 for (j = first; j <= last; ++j)
2969 if (this_n.vuse == gimple_vuse (m_store_info[j]->stmt))
2970 break;
2971 if (j > last)
2972 {
2973 if (vuse_store == 1)
2974 return false;
2975 vuse_store = 0;
2976 }
2977 }
2978 if (i == first)
2979 {
2980 n = this_n;
2981 ins_stmt = info->ins_stmt;
2982 }
2983 else
2984 {
c5679c37 2985 if (n.base_addr && n.vuse != this_n.vuse)
4b84d9b8 2986 {
c5679c37
JJ
2987 if (vuse_store == 0)
2988 return false;
2989 vuse_store = 1;
4b84d9b8 2990 }
c5679c37
JJ
2991 if (info->order > last_order)
2992 {
2993 last_order = info->order;
2994 last_stmt = info->stmt;
2995 }
2996 else if (info->order < first_order)
2997 {
2998 first_order = info->order;
2999 first_stmt = info->stmt;
3000 }
3001 end = MAX (end, info->bitpos + info->bitsize);
4b84d9b8
JJ
3002
3003 ins_stmt = perform_symbolic_merge (ins_stmt, &n, info->ins_stmt,
04eccbbe 3004 &this_n, &n, BIT_IOR_EXPR);
4b84d9b8
JJ
3005 if (ins_stmt == NULL)
3006 return false;
3007 }
3008 }
3009
3010 uint64_t cmpxchg, cmpnop;
b320edc0
JJ
3011 bool cast64_to_32;
3012 find_bswap_or_nop_finalize (&n, &cmpxchg, &cmpnop, &cast64_to_32);
4b84d9b8
JJ
3013
3014 /* A complete byte swap should make the symbolic number to start with
3015 the largest digit in the highest order byte. Unchanged symbolic
3016 number indicates a read with same endianness as target architecture. */
3017 if (n.n != cmpnop && n.n != cmpxchg)
3018 return false;
3019
b320edc0
JJ
3020 /* For now. */
3021 if (cast64_to_32)
3022 return false;
3023
4b84d9b8
JJ
3024 if (n.base_addr == NULL_TREE && !is_gimple_val (n.src))
3025 return false;
3026
bd909071
JJ
3027 if (!check_no_overlap (m_store_info, last, false, first_order, last_order,
3028 merged_store->start, end, first_earlier, first))
c5679c37
JJ
3029 return false;
3030
4b84d9b8
JJ
3031 /* Don't handle memory copy this way if normal non-bswap processing
3032 would handle it too. */
3033 if (n.n == cmpnop && (unsigned) n.n_ops == last - first + 1)
3034 {
3035 unsigned int i;
3036 for (i = first; i <= last; ++i)
3037 if (m_store_info[i]->rhs_code != MEM_REF)
3038 break;
3039 if (i == last + 1)
3040 return false;
3041 }
3042
3043 if (n.n == cmpxchg)
3044 switch (try_size)
3045 {
3046 case 16:
3047 /* Will emit LROTATE_EXPR. */
3048 break;
3049 case 32:
3050 if (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
3051 && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing)
3052 break;
3053 return false;
3054 case 64:
3055 if (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
74bca21d
JJ
3056 && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
3057 || (word_mode == SImode
3058 && builtin_decl_explicit_p (BUILT_IN_BSWAP32)
3059 && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing)))
4b84d9b8
JJ
3060 break;
3061 return false;
3062 default:
3063 gcc_unreachable ();
3064 }
3065
3066 if (!allow_unaligned && n.base_addr)
3067 {
3068 unsigned int align = get_object_alignment (n.src);
3069 if (align < try_size)
3070 return false;
3071 }
3072
3073 /* If each load has vuse of the corresponding store, need to verify
3074 the loads can be sunk right before the last store. */
3075 if (vuse_store == 1)
3076 {
3077 auto_vec<tree, 64> refs;
3078 for (unsigned int i = first; i <= last; ++i)
3079 gather_bswap_load_refs (&refs,
3080 gimple_assign_rhs1 (m_store_info[i]->stmt));
3081
3f207ab3 3082 for (tree ref : refs)
4b84d9b8
JJ
3083 if (stmts_may_clobber_ref_p (first_stmt, last_stmt, ref))
3084 return false;
3085 n.vuse = NULL_TREE;
3086 }
3087
3088 infof->n = n;
3089 infof->ins_stmt = ins_stmt;
3090 for (unsigned int i = first; i <= last; ++i)
3091 {
3092 m_store_info[i]->rhs_code = n.n == cmpxchg ? LROTATE_EXPR : NOP_EXPR;
3093 m_store_info[i]->ops[0].base_addr = NULL_TREE;
3094 m_store_info[i]->ops[1].base_addr = NULL_TREE;
3095 if (i != first)
3096 merged_store->merge_into (m_store_info[i]);
3097 }
3098
3099 return true;
3100}
3101
f663d9ad
KT
3102/* Go through the candidate stores recorded in m_store_info and merge them
3103 into merged_store_group objects recorded into m_merged_store_groups
3104 representing the widened stores. Return true if coalescing was successful
3105 and the number of widened stores is fewer than the original number
3106 of stores. */
3107
3108bool
3109imm_store_chain_info::coalesce_immediate_stores ()
3110{
3111 /* Anything less can't be processed. */
3112 if (m_store_info.length () < 2)
3113 return false;
3114
3115 if (dump_file && (dump_flags & TDF_DETAILS))
c94c3532 3116 fprintf (dump_file, "Attempting to coalesce %u stores in chain\n",
f663d9ad
KT
3117 m_store_info.length ());
3118
3119 store_immediate_info *info;
4b84d9b8 3120 unsigned int i, ignore = 0;
bd909071
JJ
3121 unsigned int first_earlier = 0;
3122 unsigned int end_earlier = 0;
f663d9ad
KT
3123
3124 /* Order the stores by the bitposition they write to. */
3125 m_store_info.qsort (sort_by_bitpos);
3126
3127 info = m_store_info[0];
3128 merged_store_group *merged_store = new merged_store_group (info);
c94c3532
EB
3129 if (dump_file && (dump_flags & TDF_DETAILS))
3130 fputs ("New store group\n", dump_file);
f663d9ad
KT
3131
3132 FOR_EACH_VEC_ELT (m_store_info, i, info)
3133 {
3afd514b
JJ
3134 unsigned HOST_WIDE_INT new_bitregion_start, new_bitregion_end;
3135
4b84d9b8 3136 if (i <= ignore)
c94c3532 3137 goto done;
f663d9ad 3138
bd909071
JJ
3139 while (first_earlier < end_earlier
3140 && (m_store_info[first_earlier]->bitpos
3141 + m_store_info[first_earlier]->bitsize
3142 <= merged_store->start))
3143 first_earlier++;
3144
4b84d9b8
JJ
3145 /* First try to handle group of stores like:
3146 p[0] = data >> 24;
3147 p[1] = data >> 16;
3148 p[2] = data >> 8;
3149 p[3] = data;
3150 using the bswap framework. */
3151 if (info->bitpos == merged_store->start + merged_store->width
3152 && merged_store->stores.length () == 1
3153 && merged_store->stores[0]->ins_stmt != NULL
cb76fcd7 3154 && info->lp_nr == merged_store->lp_nr
4b84d9b8
JJ
3155 && info->ins_stmt != NULL)
3156 {
3157 unsigned int try_size;
3158 for (try_size = 64; try_size >= 16; try_size >>= 1)
bd909071
JJ
3159 if (try_coalesce_bswap (merged_store, i - 1, try_size,
3160 first_earlier))
4b84d9b8
JJ
3161 break;
3162
3163 if (try_size >= 16)
3164 {
3165 ignore = i + merged_store->stores.length () - 1;
3166 m_merged_store_groups.safe_push (merged_store);
3167 if (ignore < m_store_info.length ())
bd909071
JJ
3168 {
3169 merged_store = new merged_store_group (m_store_info[ignore]);
3170 end_earlier = ignore;
3171 }
4b84d9b8
JJ
3172 else
3173 merged_store = NULL;
c94c3532 3174 goto done;
4b84d9b8
JJ
3175 }
3176 }
3177
3afd514b
JJ
3178 new_bitregion_start
3179 = MIN (merged_store->bitregion_start, info->bitregion_start);
3180 new_bitregion_end
3181 = MAX (merged_store->bitregion_end, info->bitregion_end);
3182
3183 if (info->order >= merged_store->first_nonmergeable_order
3184 || (((new_bitregion_end - new_bitregion_start + 1) / BITS_PER_UNIT)
028d4092 3185 > (unsigned) param_store_merging_max_size))
18e0c3d1
JJ
3186 ;
3187
f663d9ad
KT
3188 /* |---store 1---|
3189 |---store 2---|
4b84d9b8 3190 Overlapping stores. */
18e0c3d1 3191 else if (IN_RANGE (info->bitpos, merged_store->start,
4d213bf6
JJ
3192 merged_store->start + merged_store->width - 1)
3193 /* |---store 1---||---store 2---|
3194 Handle also the consecutive INTEGER_CST stores case here,
3195 as we have here the code to deal with overlaps. */
3196 || (info->bitregion_start <= merged_store->bitregion_end
3197 && info->rhs_code == INTEGER_CST
3198 && merged_store->only_constants
3199 && merged_store->can_be_merged_into (info)))
f663d9ad 3200 {
245f6de1 3201 /* Only allow overlapping stores of constants. */
629387a6
EB
3202 if (info->rhs_code == INTEGER_CST
3203 && merged_store->only_constants
3204 && info->lp_nr == merged_store->lp_nr)
245f6de1 3205 {
bd909071
JJ
3206 unsigned int first_order
3207 = MIN (merged_store->first_order, info->order);
6cd4c66e
JJ
3208 unsigned int last_order
3209 = MAX (merged_store->last_order, info->order);
3210 unsigned HOST_WIDE_INT end
3211 = MAX (merged_store->start + merged_store->width,
3212 info->bitpos + info->bitsize);
bd909071
JJ
3213 if (check_no_overlap (m_store_info, i, true, first_order,
3214 last_order, merged_store->start, end,
3215 first_earlier, end_earlier))
6cd4c66e
JJ
3216 {
3217 /* check_no_overlap call above made sure there are no
3218 overlapping stores with non-INTEGER_CST rhs_code
3219 in between the first and last of the stores we've
3220 just merged. If there are any INTEGER_CST rhs_code
3221 stores in between, we need to merge_overlapping them
3222 even if in the sort_by_bitpos order there are other
3223 overlapping stores in between. Keep those stores as is.
3224 Example:
3225 MEM[(int *)p_28] = 0;
3226 MEM[(char *)p_28 + 3B] = 1;
3227 MEM[(char *)p_28 + 1B] = 2;
3228 MEM[(char *)p_28 + 2B] = MEM[(char *)p_28 + 6B];
3229 We can't merge the zero store with the store of two and
3230 not merge anything else, because the store of one is
3231 in the original order in between those two, but in
3232 store_by_bitpos order it comes after the last store that
3233 we can't merge with them. We can merge the first 3 stores
3234 and keep the last store as is though. */
18e0c3d1
JJ
3235 unsigned int len = m_store_info.length ();
3236 unsigned int try_order = last_order;
3237 unsigned int first_nonmergeable_order;
3238 unsigned int k;
3239 bool last_iter = false;
3240 int attempts = 0;
3241 do
6cd4c66e 3242 {
18e0c3d1 3243 unsigned int max_order = 0;
bd909071 3244 unsigned int min_order = first_order;
18e0c3d1
JJ
3245 unsigned first_nonmergeable_int_order = ~0U;
3246 unsigned HOST_WIDE_INT this_end = end;
3247 k = i;
3248 first_nonmergeable_order = ~0U;
3249 for (unsigned int j = i + 1; j < len; ++j)
6cd4c66e 3250 {
18e0c3d1
JJ
3251 store_immediate_info *info2 = m_store_info[j];
3252 if (info2->bitpos >= this_end)
3253 break;
3254 if (info2->order < try_order)
6cd4c66e 3255 {
4119cd69
JJ
3256 if (info2->rhs_code != INTEGER_CST
3257 || info2->lp_nr != merged_store->lp_nr)
18e0c3d1
JJ
3258 {
3259 /* Normally check_no_overlap makes sure this
3260 doesn't happen, but if end grows below,
3261 then we need to process more stores than
3262 check_no_overlap verified. Example:
3263 MEM[(int *)p_5] = 0;
3264 MEM[(short *)p_5 + 3B] = 1;
3265 MEM[(char *)p_5 + 4B] = _9;
3266 MEM[(char *)p_5 + 2B] = 2; */
3267 k = 0;
3268 break;
3269 }
3270 k = j;
bd909071 3271 min_order = MIN (min_order, info2->order);
18e0c3d1
JJ
3272 this_end = MAX (this_end,
3273 info2->bitpos + info2->bitsize);
6cd4c66e 3274 }
18e0c3d1 3275 else if (info2->rhs_code == INTEGER_CST
4119cd69 3276 && info2->lp_nr == merged_store->lp_nr
18e0c3d1
JJ
3277 && !last_iter)
3278 {
3279 max_order = MAX (max_order, info2->order + 1);
3280 first_nonmergeable_int_order
3281 = MIN (first_nonmergeable_int_order,
3282 info2->order);
3283 }
3284 else
3285 first_nonmergeable_order
3286 = MIN (first_nonmergeable_order, info2->order);
6cd4c66e 3287 }
bd909071
JJ
3288 if (k > i
3289 && !check_no_overlap (m_store_info, len - 1, true,
3290 min_order, try_order,
3291 merged_store->start, this_end,
3292 first_earlier, end_earlier))
3293 k = 0;
18e0c3d1
JJ
3294 if (k == 0)
3295 {
3296 if (last_order == try_order)
3297 break;
3298 /* If this failed, but only because we grew
3299 try_order, retry with the last working one,
3300 so that we merge at least something. */
3301 try_order = last_order;
3302 last_iter = true;
3303 continue;
3304 }
3305 last_order = try_order;
3306 /* Retry with a larger try_order to see if we could
3307 merge some further INTEGER_CST stores. */
3308 if (max_order
3309 && (first_nonmergeable_int_order
3310 < first_nonmergeable_order))
3311 {
3312 try_order = MIN (max_order,
3313 first_nonmergeable_order);
3314 try_order
3315 = MIN (try_order,
3316 merged_store->first_nonmergeable_order);
3317 if (try_order > last_order && ++attempts < 16)
3318 continue;
3319 }
3320 first_nonmergeable_order
3321 = MIN (first_nonmergeable_order,
3322 first_nonmergeable_int_order);
3323 end = this_end;
3324 break;
6cd4c66e 3325 }
18e0c3d1 3326 while (1);
6cd4c66e
JJ
3327
3328 if (k != 0)
3329 {
3330 merged_store->merge_overlapping (info);
3331
18e0c3d1
JJ
3332 merged_store->first_nonmergeable_order
3333 = MIN (merged_store->first_nonmergeable_order,
3334 first_nonmergeable_order);
3335
6cd4c66e
JJ
3336 for (unsigned int j = i + 1; j <= k; j++)
3337 {
3338 store_immediate_info *info2 = m_store_info[j];
3339 gcc_assert (info2->bitpos < end);
3340 if (info2->order < last_order)
3341 {
3342 gcc_assert (info2->rhs_code == INTEGER_CST);
18e0c3d1
JJ
3343 if (info != info2)
3344 merged_store->merge_overlapping (info2);
6cd4c66e
JJ
3345 }
3346 /* Other stores are kept and not merged in any
3347 way. */
3348 }
3349 ignore = k;
3350 goto done;
3351 }
3352 }
245f6de1 3353 }
f663d9ad 3354 }
245f6de1
JJ
3355 /* |---store 1---||---store 2---|
3356 This store is consecutive to the previous one.
3357 Merge it into the current store group. There can be gaps in between
3358 the stores, but there can't be gaps in between bitregions. */
c94c3532 3359 else if (info->bitregion_start <= merged_store->bitregion_end
7f5a3982 3360 && merged_store->can_be_merged_into (info))
f663d9ad 3361 {
245f6de1
JJ
3362 store_immediate_info *infof = merged_store->stores[0];
3363
3364 /* All the rhs_code ops that take 2 operands are commutative,
3365 swap the operands if it could make the operands compatible. */
3366 if (infof->ops[0].base_addr
3367 && infof->ops[1].base_addr
3368 && info->ops[0].base_addr
3369 && info->ops[1].base_addr
8a91d545
RS
3370 && known_eq (info->ops[1].bitpos - infof->ops[0].bitpos,
3371 info->bitpos - infof->bitpos)
245f6de1
JJ
3372 && operand_equal_p (info->ops[1].base_addr,
3373 infof->ops[0].base_addr, 0))
127ef369
JJ
3374 {
3375 std::swap (info->ops[0], info->ops[1]);
3376 info->ops_swapped_p = true;
3377 }
4d213bf6 3378 if (check_no_overlap (m_store_info, i, false,
bd909071 3379 MIN (merged_store->first_order, info->order),
a7fe6482 3380 MAX (merged_store->last_order, info->order),
bd909071 3381 merged_store->start,
a7fe6482 3382 MAX (merged_store->start + merged_store->width,
bd909071
JJ
3383 info->bitpos + info->bitsize),
3384 first_earlier, end_earlier))
245f6de1 3385 {
7f5a3982
EB
3386 /* Turn MEM_REF into BIT_INSERT_EXPR for bit-field stores. */
3387 if (info->rhs_code == MEM_REF && infof->rhs_code != MEM_REF)
3388 {
3389 info->rhs_code = BIT_INSERT_EXPR;
3390 info->ops[0].val = gimple_assign_rhs1 (info->stmt);
3391 info->ops[0].base_addr = NULL_TREE;
3392 }
3393 else if (infof->rhs_code == MEM_REF && info->rhs_code != MEM_REF)
3394 {
3f207ab3 3395 for (store_immediate_info *infoj : merged_store->stores)
7f5a3982
EB
3396 {
3397 infoj->rhs_code = BIT_INSERT_EXPR;
3398 infoj->ops[0].val = gimple_assign_rhs1 (infoj->stmt);
3399 infoj->ops[0].base_addr = NULL_TREE;
3400 }
e362a897 3401 merged_store->bit_insertion = true;
7f5a3982
EB
3402 }
3403 if ((infof->ops[0].base_addr
3404 ? compatible_load_p (merged_store, info, base_addr, 0)
3405 : !info->ops[0].base_addr)
3406 && (infof->ops[1].base_addr
3407 ? compatible_load_p (merged_store, info, base_addr, 1)
3408 : !info->ops[1].base_addr))
3409 {
3410 merged_store->merge_into (info);
3411 goto done;
3412 }
245f6de1
JJ
3413 }
3414 }
f663d9ad 3415
245f6de1
JJ
3416 /* |---store 1---| <gap> |---store 2---|.
3417 Gap between stores or the rhs not compatible. Start a new group. */
f663d9ad 3418
245f6de1
JJ
3419 /* Try to apply all the stores recorded for the group to determine
3420 the bitpattern they write and discard it if that fails.
3421 This will also reject single-store groups. */
c94c3532 3422 if (merged_store->apply_stores ())
245f6de1 3423 m_merged_store_groups.safe_push (merged_store);
c94c3532
EB
3424 else
3425 delete merged_store;
f663d9ad 3426
245f6de1 3427 merged_store = new merged_store_group (info);
bd909071 3428 end_earlier = i;
c94c3532
EB
3429 if (dump_file && (dump_flags & TDF_DETAILS))
3430 fputs ("New store group\n", dump_file);
3431
3432 done:
3433 if (dump_file && (dump_flags & TDF_DETAILS))
3434 {
3435 fprintf (dump_file, "Store %u:\nbitsize:" HOST_WIDE_INT_PRINT_DEC
3436 " bitpos:" HOST_WIDE_INT_PRINT_DEC " val:",
3437 i, info->bitsize, info->bitpos);
3438 print_generic_expr (dump_file, gimple_assign_rhs1 (info->stmt));
3439 fputc ('\n', dump_file);
3440 }
f663d9ad
KT
3441 }
3442
a62b3dc5 3443 /* Record or discard the last store group. */
4b84d9b8
JJ
3444 if (merged_store)
3445 {
c94c3532 3446 if (merged_store->apply_stores ())
4b84d9b8 3447 m_merged_store_groups.safe_push (merged_store);
c94c3532
EB
3448 else
3449 delete merged_store;
4b84d9b8 3450 }
f663d9ad
KT
3451
3452 gcc_assert (m_merged_store_groups.length () <= m_store_info.length ());
c94c3532 3453
f663d9ad
KT
3454 bool success
3455 = !m_merged_store_groups.is_empty ()
3456 && m_merged_store_groups.length () < m_store_info.length ();
3457
3458 if (success && dump_file)
c94c3532 3459 fprintf (dump_file, "Coalescing successful!\nMerged into %u stores\n",
a62b3dc5 3460 m_merged_store_groups.length ());
f663d9ad
KT
3461
3462 return success;
3463}
3464
245f6de1
JJ
3465/* Return the type to use for the merged stores or loads described by STMTS.
3466 This is needed to get the alias sets right. If IS_LOAD, look for rhs,
3467 otherwise lhs. Additionally set *CLIQUEP and *BASEP to MR_DEPENDENCE_*
3468 of the MEM_REFs if any. */
f663d9ad
KT
3469
3470static tree
245f6de1
JJ
3471get_alias_type_for_stmts (vec<gimple *> &stmts, bool is_load,
3472 unsigned short *cliquep, unsigned short *basep)
f663d9ad
KT
3473{
3474 gimple *stmt;
3475 unsigned int i;
245f6de1
JJ
3476 tree type = NULL_TREE;
3477 tree ret = NULL_TREE;
3478 *cliquep = 0;
3479 *basep = 0;
f663d9ad
KT
3480
3481 FOR_EACH_VEC_ELT (stmts, i, stmt)
3482 {
245f6de1
JJ
3483 tree ref = is_load ? gimple_assign_rhs1 (stmt)
3484 : gimple_assign_lhs (stmt);
3485 tree type1 = reference_alias_ptr_type (ref);
3486 tree base = get_base_address (ref);
f663d9ad 3487
245f6de1
JJ
3488 if (i == 0)
3489 {
3490 if (TREE_CODE (base) == MEM_REF)
3491 {
3492 *cliquep = MR_DEPENDENCE_CLIQUE (base);
3493 *basep = MR_DEPENDENCE_BASE (base);
3494 }
3495 ret = type = type1;
3496 continue;
3497 }
f663d9ad 3498 if (!alias_ptr_types_compatible_p (type, type1))
245f6de1
JJ
3499 ret = ptr_type_node;
3500 if (TREE_CODE (base) != MEM_REF
3501 || *cliquep != MR_DEPENDENCE_CLIQUE (base)
3502 || *basep != MR_DEPENDENCE_BASE (base))
3503 {
3504 *cliquep = 0;
3505 *basep = 0;
3506 }
f663d9ad 3507 }
245f6de1 3508 return ret;
f663d9ad
KT
3509}
3510
3511/* Return the location_t information we can find among the statements
3512 in STMTS. */
3513
3514static location_t
245f6de1 3515get_location_for_stmts (vec<gimple *> &stmts)
f663d9ad 3516{
3f207ab3 3517 for (gimple *stmt : stmts)
f663d9ad
KT
3518 if (gimple_has_location (stmt))
3519 return gimple_location (stmt);
3520
3521 return UNKNOWN_LOCATION;
3522}
3523
3524/* Used to decribe a store resulting from splitting a wide store in smaller
3525 regularly-sized stores in split_group. */
3526
6c1dae73 3527class split_store
f663d9ad 3528{
6c1dae73 3529public:
f663d9ad
KT
3530 unsigned HOST_WIDE_INT bytepos;
3531 unsigned HOST_WIDE_INT size;
3532 unsigned HOST_WIDE_INT align;
245f6de1 3533 auto_vec<store_immediate_info *> orig_stores;
a62b3dc5
JJ
3534 /* True if there is a single orig stmt covering the whole split store. */
3535 bool orig;
f663d9ad
KT
3536 split_store (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
3537 unsigned HOST_WIDE_INT);
3538};
3539
3540/* Simple constructor. */
3541
3542split_store::split_store (unsigned HOST_WIDE_INT bp,
3543 unsigned HOST_WIDE_INT sz,
3544 unsigned HOST_WIDE_INT al)
a62b3dc5 3545 : bytepos (bp), size (sz), align (al), orig (false)
f663d9ad 3546{
245f6de1 3547 orig_stores.create (0);
f663d9ad
KT
3548}
3549
245f6de1
JJ
3550/* Record all stores in GROUP that write to the region starting at BITPOS and
3551 is of size BITSIZE. Record infos for such statements in STORES if
3552 non-NULL. The stores in GROUP must be sorted by bitposition. Return INFO
5384a802
JJ
3553 if there is exactly one original store in the range (in that case ignore
3554 clobber stmts, unless there are only clobber stmts). */
f663d9ad 3555
a62b3dc5 3556static store_immediate_info *
99b1c316 3557find_constituent_stores (class merged_store_group *group,
245f6de1
JJ
3558 vec<store_immediate_info *> *stores,
3559 unsigned int *first,
3560 unsigned HOST_WIDE_INT bitpos,
3561 unsigned HOST_WIDE_INT bitsize)
f663d9ad 3562{
a62b3dc5 3563 store_immediate_info *info, *ret = NULL;
f663d9ad 3564 unsigned int i;
a62b3dc5
JJ
3565 bool second = false;
3566 bool update_first = true;
f663d9ad 3567 unsigned HOST_WIDE_INT end = bitpos + bitsize;
a62b3dc5 3568 for (i = *first; group->stores.iterate (i, &info); ++i)
f663d9ad
KT
3569 {
3570 unsigned HOST_WIDE_INT stmt_start = info->bitpos;
3571 unsigned HOST_WIDE_INT stmt_end = stmt_start + info->bitsize;
a62b3dc5
JJ
3572 if (stmt_end <= bitpos)
3573 {
3574 /* BITPOS passed to this function never decreases from within the
3575 same split_group call, so optimize and don't scan info records
3576 which are known to end before or at BITPOS next time.
3577 Only do it if all stores before this one also pass this. */
3578 if (update_first)
3579 *first = i + 1;
3580 continue;
3581 }
3582 else
3583 update_first = false;
3584
f663d9ad 3585 /* The stores in GROUP are ordered by bitposition so if we're past
a62b3dc5
JJ
3586 the region for this group return early. */
3587 if (stmt_start >= end)
3588 return ret;
3589
5384a802
JJ
3590 if (gimple_clobber_p (info->stmt))
3591 {
3592 if (stores)
3593 stores->safe_push (info);
3594 if (ret == NULL)
3595 ret = info;
3596 continue;
3597 }
245f6de1 3598 if (stores)
a62b3dc5 3599 {
245f6de1 3600 stores->safe_push (info);
5384a802 3601 if (ret && !gimple_clobber_p (ret->stmt))
a62b3dc5
JJ
3602 {
3603 ret = NULL;
3604 second = true;
3605 }
3606 }
5384a802 3607 else if (ret && !gimple_clobber_p (ret->stmt))
a62b3dc5
JJ
3608 return NULL;
3609 if (!second)
3610 ret = info;
f663d9ad 3611 }
a62b3dc5 3612 return ret;
f663d9ad
KT
3613}
3614
d7a9512e
JJ
3615/* Return how many SSA_NAMEs used to compute value to store in the INFO
3616 store have multiple uses. If any SSA_NAME has multiple uses, also
3617 count statements needed to compute it. */
3618
3619static unsigned
3620count_multiple_uses (store_immediate_info *info)
3621{
3622 gimple *stmt = info->stmt;
3623 unsigned ret = 0;
3624 switch (info->rhs_code)
3625 {
3626 case INTEGER_CST:
e362a897 3627 case STRING_CST:
d7a9512e
JJ
3628 return 0;
3629 case BIT_AND_EXPR:
3630 case BIT_IOR_EXPR:
3631 case BIT_XOR_EXPR:
d60edaba
JJ
3632 if (info->bit_not_p)
3633 {
3634 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3635 ret = 1; /* Fall through below to return
3636 the BIT_NOT_EXPR stmt and then
3637 BIT_{AND,IOR,XOR}_EXPR and anything it
3638 uses. */
3639 else
3640 /* stmt is after this the BIT_NOT_EXPR. */
3641 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3642 }
d7a9512e
JJ
3643 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3644 {
3645 ret += 1 + info->ops[0].bit_not_p;
3646 if (info->ops[1].base_addr)
3647 ret += 1 + info->ops[1].bit_not_p;
3648 return ret + 1;
3649 }
3650 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3651 /* stmt is now the BIT_*_EXPR. */
3652 if (!has_single_use (gimple_assign_rhs1 (stmt)))
127ef369
JJ
3653 ret += 1 + info->ops[info->ops_swapped_p].bit_not_p;
3654 else if (info->ops[info->ops_swapped_p].bit_not_p)
d7a9512e
JJ
3655 {
3656 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3657 if (!has_single_use (gimple_assign_rhs1 (stmt2)))
3658 ++ret;
3659 }
3660 if (info->ops[1].base_addr == NULL_TREE)
127ef369
JJ
3661 {
3662 gcc_checking_assert (!info->ops_swapped_p);
3663 return ret;
3664 }
d7a9512e 3665 if (!has_single_use (gimple_assign_rhs2 (stmt)))
127ef369
JJ
3666 ret += 1 + info->ops[1 - info->ops_swapped_p].bit_not_p;
3667 else if (info->ops[1 - info->ops_swapped_p].bit_not_p)
d7a9512e
JJ
3668 {
3669 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3670 if (!has_single_use (gimple_assign_rhs1 (stmt2)))
3671 ++ret;
3672 }
3673 return ret;
3674 case MEM_REF:
3675 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3676 return 1 + info->ops[0].bit_not_p;
3677 else if (info->ops[0].bit_not_p)
3678 {
3679 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3680 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3681 return 1;
3682 }
3683 return 0;
c94c3532
EB
3684 case BIT_INSERT_EXPR:
3685 return has_single_use (gimple_assign_rhs1 (stmt)) ? 0 : 1;
d7a9512e
JJ
3686 default:
3687 gcc_unreachable ();
3688 }
3689}
3690
f663d9ad 3691/* Split a merged store described by GROUP by populating the SPLIT_STORES
a62b3dc5
JJ
3692 vector (if non-NULL) with split_store structs describing the byte offset
3693 (from the base), the bit size and alignment of each store as well as the
3694 original statements involved in each such split group.
f663d9ad
KT
3695 This is to separate the splitting strategy from the statement
3696 building/emission/linking done in output_merged_store.
a62b3dc5 3697 Return number of new stores.
245f6de1
JJ
3698 If ALLOW_UNALIGNED_STORE is false, then all stores must be aligned.
3699 If ALLOW_UNALIGNED_LOAD is false, then all loads must be aligned.
3afd514b
JJ
3700 BZERO_FIRST may be true only when the first store covers the whole group
3701 and clears it; if BZERO_FIRST is true, keep that first store in the set
3702 unmodified and emit further stores for the overrides only.
a62b3dc5
JJ
3703 If SPLIT_STORES is NULL, it is just a dry run to count number of
3704 new stores. */
f663d9ad 3705
a62b3dc5 3706static unsigned int
245f6de1 3707split_group (merged_store_group *group, bool allow_unaligned_store,
3afd514b 3708 bool allow_unaligned_load, bool bzero_first,
99b1c316 3709 vec<split_store *> *split_stores,
d7a9512e
JJ
3710 unsigned *total_orig,
3711 unsigned *total_new)
f663d9ad 3712{
a62b3dc5
JJ
3713 unsigned HOST_WIDE_INT pos = group->bitregion_start;
3714 unsigned HOST_WIDE_INT size = group->bitregion_end - pos;
f663d9ad 3715 unsigned HOST_WIDE_INT bytepos = pos / BITS_PER_UNIT;
a62b3dc5
JJ
3716 unsigned HOST_WIDE_INT group_align = group->align;
3717 unsigned HOST_WIDE_INT align_base = group->align_base;
245f6de1 3718 unsigned HOST_WIDE_INT group_load_align = group_align;
d7a9512e 3719 bool any_orig = false;
f663d9ad 3720
f663d9ad
KT
3721 gcc_assert ((size % BITS_PER_UNIT == 0) && (pos % BITS_PER_UNIT == 0));
3722
e362a897
EB
3723 /* For bswap framework using sets of stores, all the checking has been done
3724 earlier in try_coalesce_bswap and the result always needs to be emitted
617be7ba 3725 as a single store. Likewise for string concatenation. */
4b84d9b8 3726 if (group->stores[0]->rhs_code == LROTATE_EXPR
e362a897
EB
3727 || group->stores[0]->rhs_code == NOP_EXPR
3728 || group->string_concatenation)
4b84d9b8 3729 {
3afd514b 3730 gcc_assert (!bzero_first);
4b84d9b8
JJ
3731 if (total_orig)
3732 {
3733 /* Avoid the old/new stmt count heuristics. It should be
3734 always beneficial. */
3735 total_new[0] = 1;
3736 total_orig[0] = 2;
3737 }
3738
3739 if (split_stores)
3740 {
3741 unsigned HOST_WIDE_INT align_bitpos
3742 = (group->start - align_base) & (group_align - 1);
3743 unsigned HOST_WIDE_INT align = group_align;
3744 if (align_bitpos)
3745 align = least_bit_hwi (align_bitpos);
3746 bytepos = group->start / BITS_PER_UNIT;
99b1c316 3747 split_store *store
4b84d9b8
JJ
3748 = new split_store (bytepos, group->width, align);
3749 unsigned int first = 0;
3750 find_constituent_stores (group, &store->orig_stores,
3751 &first, group->start, group->width);
3752 split_stores->safe_push (store);
3753 }
3754
3755 return 1;
3756 }
3757
a62b3dc5 3758 unsigned int ret = 0, first = 0;
f663d9ad 3759 unsigned HOST_WIDE_INT try_pos = bytepos;
f663d9ad 3760
d7a9512e
JJ
3761 if (total_orig)
3762 {
3763 unsigned int i;
3764 store_immediate_info *info = group->stores[0];
3765
3766 total_new[0] = 0;
3767 total_orig[0] = 1; /* The orig store. */
3768 info = group->stores[0];
3769 if (info->ops[0].base_addr)
a6fbd154 3770 total_orig[0]++;
d7a9512e 3771 if (info->ops[1].base_addr)
a6fbd154 3772 total_orig[0]++;
d7a9512e
JJ
3773 switch (info->rhs_code)
3774 {
3775 case BIT_AND_EXPR:
3776 case BIT_IOR_EXPR:
3777 case BIT_XOR_EXPR:
3778 total_orig[0]++; /* The orig BIT_*_EXPR stmt. */
3779 break;
3780 default:
3781 break;
3782 }
3783 total_orig[0] *= group->stores.length ();
3784
3785 FOR_EACH_VEC_ELT (group->stores, i, info)
a6fbd154
JJ
3786 {
3787 total_new[0] += count_multiple_uses (info);
3788 total_orig[0] += (info->bit_not_p
3789 + info->ops[0].bit_not_p
3790 + info->ops[1].bit_not_p);
3791 }
d7a9512e
JJ
3792 }
3793
245f6de1
JJ
3794 if (!allow_unaligned_load)
3795 for (int i = 0; i < 2; ++i)
3796 if (group->load_align[i])
3797 group_load_align = MIN (group_load_align, group->load_align[i]);
3798
3afd514b
JJ
3799 if (bzero_first)
3800 {
5384a802
JJ
3801 store_immediate_info *gstore;
3802 FOR_EACH_VEC_ELT (group->stores, first, gstore)
3803 if (!gimple_clobber_p (gstore->stmt))
3804 break;
3805 ++first;
3afd514b
JJ
3806 ret = 1;
3807 if (split_stores)
3808 {
99b1c316 3809 split_store *store
5384a802
JJ
3810 = new split_store (bytepos, gstore->bitsize, align_base);
3811 store->orig_stores.safe_push (gstore);
3afd514b
JJ
3812 store->orig = true;
3813 any_orig = true;
3814 split_stores->safe_push (store);
3815 }
3816 }
3817
f663d9ad
KT
3818 while (size > 0)
3819 {
245f6de1 3820 if ((allow_unaligned_store || group_align <= BITS_PER_UNIT)
3afd514b
JJ
3821 && (group->mask[try_pos - bytepos] == (unsigned char) ~0U
3822 || (bzero_first && group->val[try_pos - bytepos] == 0)))
a62b3dc5
JJ
3823 {
3824 /* Skip padding bytes. */
3825 ++try_pos;
3826 size -= BITS_PER_UNIT;
3827 continue;
3828 }
3829
f663d9ad 3830 unsigned HOST_WIDE_INT try_bitpos = try_pos * BITS_PER_UNIT;
a62b3dc5
JJ
3831 unsigned int try_size = MAX_STORE_BITSIZE, nonmasked;
3832 unsigned HOST_WIDE_INT align_bitpos
3833 = (try_bitpos - align_base) & (group_align - 1);
3834 unsigned HOST_WIDE_INT align = group_align;
5384a802 3835 bool found_orig = false;
a62b3dc5
JJ
3836 if (align_bitpos)
3837 align = least_bit_hwi (align_bitpos);
245f6de1 3838 if (!allow_unaligned_store)
a62b3dc5 3839 try_size = MIN (try_size, align);
245f6de1
JJ
3840 if (!allow_unaligned_load)
3841 {
3842 /* If we can't do or don't want to do unaligned stores
3843 as well as loads, we need to take the loads into account
3844 as well. */
3845 unsigned HOST_WIDE_INT load_align = group_load_align;
3846 align_bitpos = (try_bitpos - align_base) & (load_align - 1);
3847 if (align_bitpos)
3848 load_align = least_bit_hwi (align_bitpos);
3849 for (int i = 0; i < 2; ++i)
3850 if (group->load_align[i])
3851 {
8a91d545
RS
3852 align_bitpos
3853 = known_alignment (try_bitpos
3854 - group->stores[0]->bitpos
3855 + group->stores[0]->ops[i].bitpos
3856 - group->load_align_base[i]);
3857 if (align_bitpos & (group_load_align - 1))
245f6de1
JJ
3858 {
3859 unsigned HOST_WIDE_INT a = least_bit_hwi (align_bitpos);
3860 load_align = MIN (load_align, a);
3861 }
3862 }
3863 try_size = MIN (try_size, load_align);
3864 }
a62b3dc5 3865 store_immediate_info *info
245f6de1 3866 = find_constituent_stores (group, NULL, &first, try_bitpos, try_size);
5384a802 3867 if (info && !gimple_clobber_p (info->stmt))
a62b3dc5
JJ
3868 {
3869 /* If there is just one original statement for the range, see if
3870 we can just reuse the original store which could be even larger
3871 than try_size. */
3872 unsigned HOST_WIDE_INT stmt_end
3873 = ROUND_UP (info->bitpos + info->bitsize, BITS_PER_UNIT);
245f6de1
JJ
3874 info = find_constituent_stores (group, NULL, &first, try_bitpos,
3875 stmt_end - try_bitpos);
a62b3dc5
JJ
3876 if (info && info->bitpos >= try_bitpos)
3877 {
5384a802
JJ
3878 store_immediate_info *info2 = NULL;
3879 unsigned int first_copy = first;
3880 if (info->bitpos > try_bitpos
3881 && stmt_end - try_bitpos <= try_size)
3882 {
3883 info2 = find_constituent_stores (group, NULL, &first_copy,
3884 try_bitpos,
3885 info->bitpos - try_bitpos);
3886 gcc_assert (info2 == NULL || gimple_clobber_p (info2->stmt));
3887 }
3888 if (info2 == NULL && stmt_end - try_bitpos < try_size)
3889 {
3890 info2 = find_constituent_stores (group, NULL, &first_copy,
3891 stmt_end,
3892 (try_bitpos + try_size)
3893 - stmt_end);
3894 gcc_assert (info2 == NULL || gimple_clobber_p (info2->stmt));
3895 }
3896 if (info2 == NULL)
3897 {
3898 try_size = stmt_end - try_bitpos;
3899 found_orig = true;
3900 goto found;
3901 }
a62b3dc5
JJ
3902 }
3903 }
f663d9ad 3904
a62b3dc5
JJ
3905 /* Approximate store bitsize for the case when there are no padding
3906 bits. */
3907 while (try_size > size)
3908 try_size /= 2;
3909 /* Now look for whole padding bytes at the end of that bitsize. */
3910 for (nonmasked = try_size / BITS_PER_UNIT; nonmasked > 0; --nonmasked)
3911 if (group->mask[try_pos - bytepos + nonmasked - 1]
3afd514b
JJ
3912 != (unsigned char) ~0U
3913 && (!bzero_first
3914 || group->val[try_pos - bytepos + nonmasked - 1] != 0))
a62b3dc5 3915 break;
5384a802 3916 if (nonmasked == 0 || (info && gimple_clobber_p (info->stmt)))
a62b3dc5
JJ
3917 {
3918 /* If entire try_size range is padding, skip it. */
3919 try_pos += try_size / BITS_PER_UNIT;
3920 size -= try_size;
3921 continue;
3922 }
3923 /* Otherwise try to decrease try_size if second half, last 3 quarters
3924 etc. are padding. */
3925 nonmasked *= BITS_PER_UNIT;
3926 while (nonmasked <= try_size / 2)
3927 try_size /= 2;
245f6de1 3928 if (!allow_unaligned_store && group_align > BITS_PER_UNIT)
a62b3dc5
JJ
3929 {
3930 /* Now look for whole padding bytes at the start of that bitsize. */
3931 unsigned int try_bytesize = try_size / BITS_PER_UNIT, masked;
3932 for (masked = 0; masked < try_bytesize; ++masked)
3afd514b
JJ
3933 if (group->mask[try_pos - bytepos + masked] != (unsigned char) ~0U
3934 && (!bzero_first
3935 || group->val[try_pos - bytepos + masked] != 0))
a62b3dc5
JJ
3936 break;
3937 masked *= BITS_PER_UNIT;
3938 gcc_assert (masked < try_size);
3939 if (masked >= try_size / 2)
3940 {
3941 while (masked >= try_size / 2)
3942 {
3943 try_size /= 2;
3944 try_pos += try_size / BITS_PER_UNIT;
3945 size -= try_size;
3946 masked -= try_size;
3947 }
3948 /* Need to recompute the alignment, so just retry at the new
3949 position. */
3950 continue;
3951 }
3952 }
3953
3954 found:
3955 ++ret;
f663d9ad 3956
a62b3dc5
JJ
3957 if (split_stores)
3958 {
99b1c316 3959 split_store *store
a62b3dc5 3960 = new split_store (try_pos, try_size, align);
245f6de1
JJ
3961 info = find_constituent_stores (group, &store->orig_stores,
3962 &first, try_bitpos, try_size);
a62b3dc5 3963 if (info
5384a802 3964 && !gimple_clobber_p (info->stmt)
a62b3dc5 3965 && info->bitpos >= try_bitpos
5384a802
JJ
3966 && info->bitpos + info->bitsize <= try_bitpos + try_size
3967 && (store->orig_stores.length () == 1
3968 || found_orig
3969 || (info->bitpos == try_bitpos
3970 && (info->bitpos + info->bitsize
3971 == try_bitpos + try_size))))
d7a9512e
JJ
3972 {
3973 store->orig = true;
3974 any_orig = true;
3975 }
a62b3dc5
JJ
3976 split_stores->safe_push (store);
3977 }
3978
3979 try_pos += try_size / BITS_PER_UNIT;
f663d9ad 3980 size -= try_size;
f663d9ad 3981 }
a62b3dc5 3982
d7a9512e
JJ
3983 if (total_orig)
3984 {
a6fbd154 3985 unsigned int i;
99b1c316 3986 split_store *store;
d7a9512e
JJ
3987 /* If we are reusing some original stores and any of the
3988 original SSA_NAMEs had multiple uses, we need to subtract
3989 those now before we add the new ones. */
3990 if (total_new[0] && any_orig)
3991 {
d7a9512e
JJ
3992 FOR_EACH_VEC_ELT (*split_stores, i, store)
3993 if (store->orig)
3994 total_new[0] -= count_multiple_uses (store->orig_stores[0]);
3995 }
3996 total_new[0] += ret; /* The new store. */
3997 store_immediate_info *info = group->stores[0];
3998 if (info->ops[0].base_addr)
a6fbd154 3999 total_new[0] += ret;
d7a9512e 4000 if (info->ops[1].base_addr)
a6fbd154 4001 total_new[0] += ret;
d7a9512e
JJ
4002 switch (info->rhs_code)
4003 {
4004 case BIT_AND_EXPR:
4005 case BIT_IOR_EXPR:
4006 case BIT_XOR_EXPR:
4007 total_new[0] += ret; /* The new BIT_*_EXPR stmt. */
4008 break;
4009 default:
4010 break;
4011 }
a6fbd154
JJ
4012 FOR_EACH_VEC_ELT (*split_stores, i, store)
4013 {
4014 unsigned int j;
4015 bool bit_not_p[3] = { false, false, false };
4016 /* If all orig_stores have certain bit_not_p set, then
4017 we'd use a BIT_NOT_EXPR stmt and need to account for it.
4018 If some orig_stores have certain bit_not_p set, then
4019 we'd use a BIT_XOR_EXPR with a mask and need to account for
4020 it. */
4021 FOR_EACH_VEC_ELT (store->orig_stores, j, info)
4022 {
4023 if (info->ops[0].bit_not_p)
4024 bit_not_p[0] = true;
4025 if (info->ops[1].bit_not_p)
4026 bit_not_p[1] = true;
4027 if (info->bit_not_p)
4028 bit_not_p[2] = true;
4029 }
4030 total_new[0] += bit_not_p[0] + bit_not_p[1] + bit_not_p[2];
4031 }
4032
d7a9512e
JJ
4033 }
4034
a62b3dc5 4035 return ret;
f663d9ad
KT
4036}
4037
a6fbd154
JJ
4038/* Return the operation through which the operand IDX (if < 2) or
4039 result (IDX == 2) should be inverted. If NOP_EXPR, no inversion
4040 is done, if BIT_NOT_EXPR, all bits are inverted, if BIT_XOR_EXPR,
4041 the bits should be xored with mask. */
4042
4043static enum tree_code
4044invert_op (split_store *split_store, int idx, tree int_type, tree &mask)
4045{
4046 unsigned int i;
4047 store_immediate_info *info;
4048 unsigned int cnt = 0;
e215422f 4049 bool any_paddings = false;
a6fbd154
JJ
4050 FOR_EACH_VEC_ELT (split_store->orig_stores, i, info)
4051 {
4052 bool bit_not_p = idx < 2 ? info->ops[idx].bit_not_p : info->bit_not_p;
4053 if (bit_not_p)
e215422f
JJ
4054 {
4055 ++cnt;
4056 tree lhs = gimple_assign_lhs (info->stmt);
4057 if (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4058 && TYPE_PRECISION (TREE_TYPE (lhs)) < info->bitsize)
4059 any_paddings = true;
4060 }
a6fbd154
JJ
4061 }
4062 mask = NULL_TREE;
4063 if (cnt == 0)
4064 return NOP_EXPR;
e215422f 4065 if (cnt == split_store->orig_stores.length () && !any_paddings)
a6fbd154
JJ
4066 return BIT_NOT_EXPR;
4067
4068 unsigned HOST_WIDE_INT try_bitpos = split_store->bytepos * BITS_PER_UNIT;
4069 unsigned buf_size = split_store->size / BITS_PER_UNIT;
4070 unsigned char *buf
4071 = XALLOCAVEC (unsigned char, buf_size);
4072 memset (buf, ~0U, buf_size);
4073 FOR_EACH_VEC_ELT (split_store->orig_stores, i, info)
4074 {
4075 bool bit_not_p = idx < 2 ? info->ops[idx].bit_not_p : info->bit_not_p;
4076 if (!bit_not_p)
4077 continue;
4078 /* Clear regions with bit_not_p and invert afterwards, rather than
4079 clear regions with !bit_not_p, so that gaps in between stores aren't
4080 set in the mask. */
4081 unsigned HOST_WIDE_INT bitsize = info->bitsize;
e215422f 4082 unsigned HOST_WIDE_INT prec = bitsize;
a6fbd154 4083 unsigned int pos_in_buffer = 0;
e215422f
JJ
4084 if (any_paddings)
4085 {
4086 tree lhs = gimple_assign_lhs (info->stmt);
4087 if (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4088 && TYPE_PRECISION (TREE_TYPE (lhs)) < bitsize)
4089 prec = TYPE_PRECISION (TREE_TYPE (lhs));
4090 }
a6fbd154
JJ
4091 if (info->bitpos < try_bitpos)
4092 {
4093 gcc_assert (info->bitpos + bitsize > try_bitpos);
e215422f
JJ
4094 if (!BYTES_BIG_ENDIAN)
4095 {
4096 if (prec <= try_bitpos - info->bitpos)
4097 continue;
4098 prec -= try_bitpos - info->bitpos;
4099 }
4100 bitsize -= try_bitpos - info->bitpos;
4101 if (BYTES_BIG_ENDIAN && prec > bitsize)
4102 prec = bitsize;
a6fbd154
JJ
4103 }
4104 else
4105 pos_in_buffer = info->bitpos - try_bitpos;
e215422f
JJ
4106 if (prec < bitsize)
4107 {
4108 /* If this is a bool inversion, invert just the least significant
4109 prec bits rather than all bits of it. */
4110 if (BYTES_BIG_ENDIAN)
4111 {
4112 pos_in_buffer += bitsize - prec;
4113 if (pos_in_buffer >= split_store->size)
4114 continue;
4115 }
4116 bitsize = prec;
4117 }
a6fbd154
JJ
4118 if (pos_in_buffer + bitsize > split_store->size)
4119 bitsize = split_store->size - pos_in_buffer;
4120 unsigned char *p = buf + (pos_in_buffer / BITS_PER_UNIT);
4121 if (BYTES_BIG_ENDIAN)
4122 clear_bit_region_be (p, (BITS_PER_UNIT - 1
4123 - (pos_in_buffer % BITS_PER_UNIT)), bitsize);
4124 else
4125 clear_bit_region (p, pos_in_buffer % BITS_PER_UNIT, bitsize);
4126 }
4127 for (unsigned int i = 0; i < buf_size; ++i)
4128 buf[i] = ~buf[i];
4129 mask = native_interpret_expr (int_type, buf, buf_size);
4130 return BIT_XOR_EXPR;
4131}
4132
f663d9ad
KT
4133/* Given a merged store group GROUP output the widened version of it.
4134 The store chain is against the base object BASE.
4135 Try store sizes of at most MAX_STORE_BITSIZE bits wide and don't output
4136 unaligned stores for STRICT_ALIGNMENT targets or if it's too expensive.
4137 Make sure that the number of statements output is less than the number of
4138 original statements. If a better sequence is possible emit it and
4139 return true. */
4140
4141bool
b5926e23 4142imm_store_chain_info::output_merged_store (merged_store_group *group)
f663d9ad 4143{
e362a897 4144 const unsigned HOST_WIDE_INT start_byte_pos
a62b3dc5 4145 = group->bitregion_start / BITS_PER_UNIT;
f663d9ad
KT
4146 unsigned int orig_num_stmts = group->stores.length ();
4147 if (orig_num_stmts < 2)
4148 return false;
4149
245f6de1 4150 bool allow_unaligned_store
028d4092 4151 = !STRICT_ALIGNMENT && param_store_merging_allow_unaligned;
245f6de1 4152 bool allow_unaligned_load = allow_unaligned_store;
3afd514b 4153 bool bzero_first = false;
5384a802
JJ
4154 store_immediate_info *store;
4155 unsigned int num_clobber_stmts = 0;
4156 if (group->stores[0]->rhs_code == INTEGER_CST)
4157 {
e362a897 4158 unsigned int i;
5384a802
JJ
4159 FOR_EACH_VEC_ELT (group->stores, i, store)
4160 if (gimple_clobber_p (store->stmt))
4161 num_clobber_stmts++;
4162 else if (TREE_CODE (gimple_assign_rhs1 (store->stmt)) == CONSTRUCTOR
4163 && CONSTRUCTOR_NELTS (gimple_assign_rhs1 (store->stmt)) == 0
4164 && group->start == store->bitpos
4165 && group->width == store->bitsize
4166 && (group->start % BITS_PER_UNIT) == 0
4167 && (group->width % BITS_PER_UNIT) == 0)
4168 {
4169 bzero_first = true;
4170 break;
4171 }
4172 else
4173 break;
4174 FOR_EACH_VEC_ELT_FROM (group->stores, i, store, i)
4175 if (gimple_clobber_p (store->stmt))
4176 num_clobber_stmts++;
4177 if (num_clobber_stmts == orig_num_stmts)
4178 return false;
4179 orig_num_stmts -= num_clobber_stmts;
4180 }
3afd514b 4181 if (allow_unaligned_store || bzero_first)
a62b3dc5
JJ
4182 {
4183 /* If unaligned stores are allowed, see how many stores we'd emit
4184 for unaligned and how many stores we'd emit for aligned stores.
3afd514b
JJ
4185 Only use unaligned stores if it allows fewer stores than aligned.
4186 Similarly, if there is a whole region clear first, prefer expanding
4187 it together compared to expanding clear first followed by merged
4188 further stores. */
21f65995 4189 unsigned cnt[4] = { ~0U, ~0U, ~0U, ~0U };
3afd514b
JJ
4190 int pass_min = 0;
4191 for (int pass = 0; pass < 4; ++pass)
4192 {
4193 if (!allow_unaligned_store && (pass & 1) != 0)
4194 continue;
4195 if (!bzero_first && (pass & 2) != 0)
4196 continue;
4197 cnt[pass] = split_group (group, (pass & 1) != 0,
4198 allow_unaligned_load, (pass & 2) != 0,
4199 NULL, NULL, NULL);
4200 if (cnt[pass] < cnt[pass_min])
4201 pass_min = pass;
4202 }
4203 if ((pass_min & 1) == 0)
245f6de1 4204 allow_unaligned_store = false;
3afd514b
JJ
4205 if ((pass_min & 2) == 0)
4206 bzero_first = false;
a62b3dc5 4207 }
e362a897
EB
4208
4209 auto_vec<class split_store *, 32> split_stores;
4210 split_store *split_store;
4211 unsigned total_orig, total_new, i;
3afd514b 4212 split_group (group, allow_unaligned_store, allow_unaligned_load, bzero_first,
d7a9512e 4213 &split_stores, &total_orig, &total_new);
a62b3dc5 4214
5384a802
JJ
4215 /* Determine if there is a clobber covering the whole group at the start,
4216 followed by proposed split stores that cover the whole group. In that
4217 case, prefer the transformation even if
4218 split_stores.length () == orig_num_stmts. */
4219 bool clobber_first = false;
4220 if (num_clobber_stmts
4221 && gimple_clobber_p (group->stores[0]->stmt)
4222 && group->start == group->stores[0]->bitpos
4223 && group->width == group->stores[0]->bitsize
4224 && (group->start % BITS_PER_UNIT) == 0
4225 && (group->width % BITS_PER_UNIT) == 0)
4226 {
4227 clobber_first = true;
4228 unsigned HOST_WIDE_INT pos = group->start / BITS_PER_UNIT;
4229 FOR_EACH_VEC_ELT (split_stores, i, split_store)
4230 if (split_store->bytepos != pos)
4231 {
4232 clobber_first = false;
4233 break;
4234 }
4235 else
4236 pos += split_store->size / BITS_PER_UNIT;
4237 if (pos != (group->start + group->width) / BITS_PER_UNIT)
4238 clobber_first = false;
4239 }
4240
4241 if (split_stores.length () >= orig_num_stmts + clobber_first)
a62b3dc5 4242 {
5384a802 4243
a62b3dc5
JJ
4244 /* We didn't manage to reduce the number of statements. Bail out. */
4245 if (dump_file && (dump_flags & TDF_DETAILS))
d7a9512e
JJ
4246 fprintf (dump_file, "Exceeded original number of stmts (%u)."
4247 " Not profitable to emit new sequence.\n",
4248 orig_num_stmts);
dd172744
RB
4249 FOR_EACH_VEC_ELT (split_stores, i, split_store)
4250 delete split_store;
a62b3dc5
JJ
4251 return false;
4252 }
d7a9512e
JJ
4253 if (total_orig <= total_new)
4254 {
4255 /* If number of estimated new statements is above estimated original
4256 statements, bail out too. */
4257 if (dump_file && (dump_flags & TDF_DETAILS))
4258 fprintf (dump_file, "Estimated number of original stmts (%u)"
4259 " not larger than estimated number of new"
4260 " stmts (%u).\n",
4261 total_orig, total_new);
dd172744
RB
4262 FOR_EACH_VEC_ELT (split_stores, i, split_store)
4263 delete split_store;
4b84d9b8 4264 return false;
d7a9512e 4265 }
5384a802
JJ
4266 if (group->stores[0]->rhs_code == INTEGER_CST)
4267 {
4268 bool all_orig = true;
4269 FOR_EACH_VEC_ELT (split_stores, i, split_store)
4270 if (!split_store->orig)
4271 {
4272 all_orig = false;
4273 break;
4274 }
4275 if (all_orig)
4276 {
4277 unsigned int cnt = split_stores.length ();
4278 store_immediate_info *store;
4279 FOR_EACH_VEC_ELT (group->stores, i, store)
4280 if (gimple_clobber_p (store->stmt))
4281 ++cnt;
4282 /* Punt if we wouldn't make any real changes, i.e. keep all
4283 orig stmts + all clobbers. */
4284 if (cnt == group->stores.length ())
4285 {
4286 if (dump_file && (dump_flags & TDF_DETAILS))
4287 fprintf (dump_file, "Exceeded original number of stmts (%u)."
4288 " Not profitable to emit new sequence.\n",
4289 orig_num_stmts);
4290 FOR_EACH_VEC_ELT (split_stores, i, split_store)
4291 delete split_store;
4292 return false;
4293 }
4294 }
4295 }
f663d9ad
KT
4296
4297 gimple_stmt_iterator last_gsi = gsi_for_stmt (group->last_stmt);
4298 gimple_seq seq = NULL;
f663d9ad
KT
4299 tree last_vdef, new_vuse;
4300 last_vdef = gimple_vdef (group->last_stmt);
4301 new_vuse = gimple_vuse (group->last_stmt);
4b84d9b8
JJ
4302 tree bswap_res = NULL_TREE;
4303
5384a802
JJ
4304 /* Clobbers are not removed. */
4305 if (gimple_clobber_p (group->last_stmt))
4306 {
4307 new_vuse = make_ssa_name (gimple_vop (cfun), group->last_stmt);
4308 gimple_set_vdef (group->last_stmt, new_vuse);
4309 }
4310
4b84d9b8
JJ
4311 if (group->stores[0]->rhs_code == LROTATE_EXPR
4312 || group->stores[0]->rhs_code == NOP_EXPR)
4313 {
4314 tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
4315 gimple *ins_stmt = group->stores[0]->ins_stmt;
4316 struct symbolic_number *n = &group->stores[0]->n;
4317 bool bswap = group->stores[0]->rhs_code == LROTATE_EXPR;
4318
4319 switch (n->range)
4320 {
4321 case 16:
4322 load_type = bswap_type = uint16_type_node;
4323 break;
4324 case 32:
4325 load_type = uint32_type_node;
4326 if (bswap)
4327 {
4328 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
4329 bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
4330 }
4331 break;
4332 case 64:
4333 load_type = uint64_type_node;
4334 if (bswap)
4335 {
4336 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
4337 bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
4338 }
4339 break;
4340 default:
4341 gcc_unreachable ();
4342 }
4343
4344 /* If the loads have each vuse of the corresponding store,
4345 we've checked the aliasing already in try_coalesce_bswap and
4346 we want to sink the need load into seq. So need to use new_vuse
4347 on the load. */
30fa8e9c 4348 if (n->base_addr)
4b84d9b8 4349 {
30fa8e9c
JJ
4350 if (n->vuse == NULL)
4351 {
4352 n->vuse = new_vuse;
4353 ins_stmt = NULL;
4354 }
4355 else
4356 /* Update vuse in case it has changed by output_merged_stores. */
4357 n->vuse = gimple_vuse (ins_stmt);
4b84d9b8
JJ
4358 }
4359 bswap_res = bswap_replace (gsi_start (seq), ins_stmt, fndecl,
b320edc0 4360 bswap_type, load_type, n, bswap,
d8545fb2 4361 ~(uint64_t) 0, 0);
4b84d9b8
JJ
4362 gcc_assert (bswap_res);
4363 }
f663d9ad
KT
4364
4365 gimple *stmt = NULL;
245f6de1 4366 auto_vec<gimple *, 32> orig_stmts;
4b84d9b8
JJ
4367 gimple_seq this_seq;
4368 tree addr = force_gimple_operand_1 (unshare_expr (base_addr), &this_seq,
aa55dc0c 4369 is_gimple_mem_ref_addr, NULL_TREE);
4b84d9b8 4370 gimple_seq_add_seq_without_update (&seq, this_seq);
245f6de1
JJ
4371
4372 tree load_addr[2] = { NULL_TREE, NULL_TREE };
4373 gimple_seq load_seq[2] = { NULL, NULL };
4374 gimple_stmt_iterator load_gsi[2] = { gsi_none (), gsi_none () };
4375 for (int j = 0; j < 2; ++j)
4376 {
4377 store_operand_info &op = group->stores[0]->ops[j];
4378 if (op.base_addr == NULL_TREE)
4379 continue;
4380
4381 store_immediate_info *infol = group->stores.last ();
4382 if (gimple_vuse (op.stmt) == gimple_vuse (infol->ops[j].stmt))
4383 {
97031af7
JJ
4384 /* We can't pick the location randomly; while we've verified
4385 all the loads have the same vuse, they can be still in different
4386 basic blocks and we need to pick the one from the last bb:
4387 int x = q[0];
4388 if (x == N) return;
4389 int y = q[1];
4390 p[0] = x;
4391 p[1] = y;
4392 otherwise if we put the wider load at the q[0] load, we might
4393 segfault if q[1] is not mapped. */
4394 basic_block bb = gimple_bb (op.stmt);
4395 gimple *ostmt = op.stmt;
4396 store_immediate_info *info;
4397 FOR_EACH_VEC_ELT (group->stores, i, info)
4398 {
4399 gimple *tstmt = info->ops[j].stmt;
4400 basic_block tbb = gimple_bb (tstmt);
4401 if (dominated_by_p (CDI_DOMINATORS, tbb, bb))
4402 {
4403 ostmt = tstmt;
4404 bb = tbb;
4405 }
4406 }
4407 load_gsi[j] = gsi_for_stmt (ostmt);
245f6de1
JJ
4408 load_addr[j]
4409 = force_gimple_operand_1 (unshare_expr (op.base_addr),
4410 &load_seq[j], is_gimple_mem_ref_addr,
4411 NULL_TREE);
4412 }
4413 else if (operand_equal_p (base_addr, op.base_addr, 0))
4414 load_addr[j] = addr;
4415 else
3e2927a1 4416 {
3e2927a1
JJ
4417 load_addr[j]
4418 = force_gimple_operand_1 (unshare_expr (op.base_addr),
4419 &this_seq, is_gimple_mem_ref_addr,
4420 NULL_TREE);
4421 gimple_seq_add_seq_without_update (&seq, this_seq);
4422 }
245f6de1
JJ
4423 }
4424
f663d9ad
KT
4425 FOR_EACH_VEC_ELT (split_stores, i, split_store)
4426 {
e362a897
EB
4427 const unsigned HOST_WIDE_INT try_size = split_store->size;
4428 const unsigned HOST_WIDE_INT try_pos = split_store->bytepos;
4429 const unsigned HOST_WIDE_INT try_bitpos = try_pos * BITS_PER_UNIT;
4430 const unsigned HOST_WIDE_INT try_align = split_store->align;
4431 const unsigned HOST_WIDE_INT try_offset = try_pos - start_byte_pos;
a62b3dc5
JJ
4432 tree dest, src;
4433 location_t loc;
e362a897 4434
a62b3dc5
JJ
4435 if (split_store->orig)
4436 {
5384a802
JJ
4437 /* If there is just a single non-clobber constituent store
4438 which covers the whole area, just reuse the lhs and rhs. */
4439 gimple *orig_stmt = NULL;
4440 store_immediate_info *store;
4441 unsigned int j;
4442 FOR_EACH_VEC_ELT (split_store->orig_stores, j, store)
4443 if (!gimple_clobber_p (store->stmt))
4444 {
4445 orig_stmt = store->stmt;
4446 break;
4447 }
245f6de1
JJ
4448 dest = gimple_assign_lhs (orig_stmt);
4449 src = gimple_assign_rhs1 (orig_stmt);
4450 loc = gimple_location (orig_stmt);
a62b3dc5
JJ
4451 }
4452 else
4453 {
245f6de1
JJ
4454 store_immediate_info *info;
4455 unsigned short clique, base;
4456 unsigned int k;
4457 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4458 orig_stmts.safe_push (info->stmt);
a62b3dc5 4459 tree offset_type
245f6de1 4460 = get_alias_type_for_stmts (orig_stmts, false, &clique, &base);
e362a897 4461 tree dest_type;
245f6de1
JJ
4462 loc = get_location_for_stmts (orig_stmts);
4463 orig_stmts.truncate (0);
a62b3dc5 4464
e362a897
EB
4465 if (group->string_concatenation)
4466 dest_type
4467 = build_array_type_nelts (char_type_node,
4468 try_size / BITS_PER_UNIT);
4469 else
4470 {
4471 dest_type = build_nonstandard_integer_type (try_size, UNSIGNED);
4472 dest_type = build_aligned_type (dest_type, try_align);
4473 }
4474 dest = fold_build2 (MEM_REF, dest_type, addr,
a62b3dc5 4475 build_int_cst (offset_type, try_pos));
245f6de1
JJ
4476 if (TREE_CODE (dest) == MEM_REF)
4477 {
4478 MR_DEPENDENCE_CLIQUE (dest) = clique;
4479 MR_DEPENDENCE_BASE (dest) = base;
4480 }
4481
c94c3532 4482 tree mask;
e362a897 4483 if (bswap_res || group->string_concatenation)
c94c3532
EB
4484 mask = integer_zero_node;
4485 else
e362a897
EB
4486 mask = native_interpret_expr (dest_type,
4487 group->mask + try_offset,
4b84d9b8 4488 group->buf_size);
245f6de1
JJ
4489
4490 tree ops[2];
4491 for (int j = 0;
4492 j < 1 + (split_store->orig_stores[0]->ops[1].val != NULL_TREE);
4493 ++j)
4494 {
4495 store_operand_info &op = split_store->orig_stores[0]->ops[j];
4b84d9b8
JJ
4496 if (bswap_res)
4497 ops[j] = bswap_res;
e362a897
EB
4498 else if (group->string_concatenation)
4499 {
4500 ops[j] = build_string (try_size / BITS_PER_UNIT,
4501 (const char *) group->val + try_offset);
4502 TREE_TYPE (ops[j]) = dest_type;
4503 }
4b84d9b8 4504 else if (op.base_addr)
245f6de1
JJ
4505 {
4506 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4507 orig_stmts.safe_push (info->ops[j].stmt);
4508
4509 offset_type = get_alias_type_for_stmts (orig_stmts, true,
4510 &clique, &base);
4511 location_t load_loc = get_location_for_stmts (orig_stmts);
4512 orig_stmts.truncate (0);
4513
4514 unsigned HOST_WIDE_INT load_align = group->load_align[j];
4515 unsigned HOST_WIDE_INT align_bitpos
c94c3532 4516 = known_alignment (try_bitpos
8a91d545
RS
4517 - split_store->orig_stores[0]->bitpos
4518 + op.bitpos);
4519 if (align_bitpos & (load_align - 1))
245f6de1
JJ
4520 load_align = least_bit_hwi (align_bitpos);
4521
4522 tree load_int_type
4523 = build_nonstandard_integer_type (try_size, UNSIGNED);
4524 load_int_type
4525 = build_aligned_type (load_int_type, load_align);
4526
8a91d545 4527 poly_uint64 load_pos
c94c3532 4528 = exact_div (try_bitpos
8a91d545
RS
4529 - split_store->orig_stores[0]->bitpos
4530 + op.bitpos,
4531 BITS_PER_UNIT);
245f6de1
JJ
4532 ops[j] = fold_build2 (MEM_REF, load_int_type, load_addr[j],
4533 build_int_cst (offset_type, load_pos));
4534 if (TREE_CODE (ops[j]) == MEM_REF)
4535 {
4536 MR_DEPENDENCE_CLIQUE (ops[j]) = clique;
4537 MR_DEPENDENCE_BASE (ops[j]) = base;
4538 }
4539 if (!integer_zerop (mask))
e9e2bad7
MS
4540 {
4541 /* The load might load some bits (that will be masked
4542 off later on) uninitialized, avoid -W*uninitialized
4543 warnings in that case. */
4544 suppress_warning (ops[j], OPT_Wuninitialized);
4545 }
245f6de1 4546
e362a897 4547 stmt = gimple_build_assign (make_ssa_name (dest_type), ops[j]);
245f6de1
JJ
4548 gimple_set_location (stmt, load_loc);
4549 if (gsi_bb (load_gsi[j]))
4550 {
4551 gimple_set_vuse (stmt, gimple_vuse (op.stmt));
4552 gimple_seq_add_stmt_without_update (&load_seq[j], stmt);
4553 }
4554 else
4555 {
4556 gimple_set_vuse (stmt, new_vuse);
4557 gimple_seq_add_stmt_without_update (&seq, stmt);
4558 }
4559 ops[j] = gimple_assign_lhs (stmt);
a6fbd154
JJ
4560 tree xor_mask;
4561 enum tree_code inv_op
e362a897 4562 = invert_op (split_store, j, dest_type, xor_mask);
a6fbd154 4563 if (inv_op != NOP_EXPR)
383ac8dc 4564 {
e362a897 4565 stmt = gimple_build_assign (make_ssa_name (dest_type),
a6fbd154 4566 inv_op, ops[j], xor_mask);
383ac8dc
JJ
4567 gimple_set_location (stmt, load_loc);
4568 ops[j] = gimple_assign_lhs (stmt);
4569
4570 if (gsi_bb (load_gsi[j]))
4571 gimple_seq_add_stmt_without_update (&load_seq[j],
4572 stmt);
4573 else
4574 gimple_seq_add_stmt_without_update (&seq, stmt);
4575 }
245f6de1
JJ
4576 }
4577 else
e362a897
EB
4578 ops[j] = native_interpret_expr (dest_type,
4579 group->val + try_offset,
245f6de1
JJ
4580 group->buf_size);
4581 }
4582
4583 switch (split_store->orig_stores[0]->rhs_code)
4584 {
4585 case BIT_AND_EXPR:
4586 case BIT_IOR_EXPR:
4587 case BIT_XOR_EXPR:
4588 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4589 {
4590 tree rhs1 = gimple_assign_rhs1 (info->stmt);
4591 orig_stmts.safe_push (SSA_NAME_DEF_STMT (rhs1));
4592 }
4593 location_t bit_loc;
4594 bit_loc = get_location_for_stmts (orig_stmts);
4595 orig_stmts.truncate (0);
4596
4597 stmt
e362a897 4598 = gimple_build_assign (make_ssa_name (dest_type),
245f6de1
JJ
4599 split_store->orig_stores[0]->rhs_code,
4600 ops[0], ops[1]);
4601 gimple_set_location (stmt, bit_loc);
4602 /* If there is just one load and there is a separate
4603 load_seq[0], emit the bitwise op right after it. */
4604 if (load_addr[1] == NULL_TREE && gsi_bb (load_gsi[0]))
4605 gimple_seq_add_stmt_without_update (&load_seq[0], stmt);
4606 /* Otherwise, if at least one load is in seq, we need to
4607 emit the bitwise op right before the store. If there
4608 are two loads and are emitted somewhere else, it would
4609 be better to emit the bitwise op as early as possible;
4610 we don't track where that would be possible right now
4611 though. */
4612 else
4613 gimple_seq_add_stmt_without_update (&seq, stmt);
4614 src = gimple_assign_lhs (stmt);
a6fbd154
JJ
4615 tree xor_mask;
4616 enum tree_code inv_op;
e362a897 4617 inv_op = invert_op (split_store, 2, dest_type, xor_mask);
a6fbd154 4618 if (inv_op != NOP_EXPR)
d60edaba 4619 {
e362a897 4620 stmt = gimple_build_assign (make_ssa_name (dest_type),
a6fbd154 4621 inv_op, src, xor_mask);
d60edaba
JJ
4622 gimple_set_location (stmt, bit_loc);
4623 if (load_addr[1] == NULL_TREE && gsi_bb (load_gsi[0]))
4624 gimple_seq_add_stmt_without_update (&load_seq[0], stmt);
4625 else
4626 gimple_seq_add_stmt_without_update (&seq, stmt);
4627 src = gimple_assign_lhs (stmt);
4628 }
245f6de1 4629 break;
4b84d9b8
JJ
4630 case LROTATE_EXPR:
4631 case NOP_EXPR:
4632 src = ops[0];
4633 if (!is_gimple_val (src))
4634 {
4635 stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (src)),
4636 src);
4637 gimple_seq_add_stmt_without_update (&seq, stmt);
4638 src = gimple_assign_lhs (stmt);
4639 }
e362a897 4640 if (!useless_type_conversion_p (dest_type, TREE_TYPE (src)))
4b84d9b8 4641 {
e362a897 4642 stmt = gimple_build_assign (make_ssa_name (dest_type),
4b84d9b8
JJ
4643 NOP_EXPR, src);
4644 gimple_seq_add_stmt_without_update (&seq, stmt);
4645 src = gimple_assign_lhs (stmt);
4646 }
e362a897 4647 inv_op = invert_op (split_store, 2, dest_type, xor_mask);
be52ac73
JJ
4648 if (inv_op != NOP_EXPR)
4649 {
e362a897 4650 stmt = gimple_build_assign (make_ssa_name (dest_type),
be52ac73
JJ
4651 inv_op, src, xor_mask);
4652 gimple_set_location (stmt, loc);
4653 gimple_seq_add_stmt_without_update (&seq, stmt);
4654 src = gimple_assign_lhs (stmt);
4655 }
4b84d9b8 4656 break;
245f6de1
JJ
4657 default:
4658 src = ops[0];
4659 break;
4660 }
4661
c94c3532
EB
4662 /* If bit insertion is required, we use the source as an accumulator
4663 into which the successive bit-field values are manually inserted.
4664 FIXME: perhaps use BIT_INSERT_EXPR instead in some cases? */
4665 if (group->bit_insertion)
4666 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4667 if (info->rhs_code == BIT_INSERT_EXPR
4668 && info->bitpos < try_bitpos + try_size
4669 && info->bitpos + info->bitsize > try_bitpos)
4670 {
4671 /* Mask, truncate, convert to final type, shift and ior into
4672 the accumulator. Note that every step can be a no-op. */
4673 const HOST_WIDE_INT start_gap = info->bitpos - try_bitpos;
4674 const HOST_WIDE_INT end_gap
4675 = (try_bitpos + try_size) - (info->bitpos + info->bitsize);
4676 tree tem = info->ops[0].val;
ed01d707
EB
4677 if (!INTEGRAL_TYPE_P (TREE_TYPE (tem)))
4678 {
4679 const unsigned HOST_WIDE_INT size
4680 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (tem)));
4681 tree integer_type
4682 = build_nonstandard_integer_type (size, UNSIGNED);
4683 tem = gimple_build (&seq, loc, VIEW_CONVERT_EXPR,
4684 integer_type, tem);
4685 }
c14add82
EB
4686 if (TYPE_PRECISION (TREE_TYPE (tem)) <= info->bitsize)
4687 {
4688 tree bitfield_type
4689 = build_nonstandard_integer_type (info->bitsize,
4690 UNSIGNED);
4691 tem = gimple_convert (&seq, loc, bitfield_type, tem);
4692 }
4693 else if ((BYTES_BIG_ENDIAN ? start_gap : end_gap) > 0)
c94c3532 4694 {
49a3b35c
JJ
4695 wide_int imask
4696 = wi::mask (info->bitsize, false,
4697 TYPE_PRECISION (TREE_TYPE (tem)));
c94c3532
EB
4698 tem = gimple_build (&seq, loc,
4699 BIT_AND_EXPR, TREE_TYPE (tem), tem,
49a3b35c
JJ
4700 wide_int_to_tree (TREE_TYPE (tem),
4701 imask));
c94c3532
EB
4702 }
4703 const HOST_WIDE_INT shift
4704 = (BYTES_BIG_ENDIAN ? end_gap : start_gap);
4705 if (shift < 0)
4706 tem = gimple_build (&seq, loc,
4707 RSHIFT_EXPR, TREE_TYPE (tem), tem,
4708 build_int_cst (NULL_TREE, -shift));
e362a897 4709 tem = gimple_convert (&seq, loc, dest_type, tem);
c94c3532
EB
4710 if (shift > 0)
4711 tem = gimple_build (&seq, loc,
e362a897 4712 LSHIFT_EXPR, dest_type, tem,
c94c3532
EB
4713 build_int_cst (NULL_TREE, shift));
4714 src = gimple_build (&seq, loc,
e362a897 4715 BIT_IOR_EXPR, dest_type, tem, src);
c94c3532
EB
4716 }
4717
a62b3dc5
JJ
4718 if (!integer_zerop (mask))
4719 {
e362a897 4720 tree tem = make_ssa_name (dest_type);
a62b3dc5
JJ
4721 tree load_src = unshare_expr (dest);
4722 /* The load might load some or all bits uninitialized,
4723 avoid -W*uninitialized warnings in that case.
4724 As optimization, it would be nice if all the bits are
4725 provably uninitialized (no stores at all yet or previous
4726 store a CLOBBER) we'd optimize away the load and replace
4727 it e.g. with 0. */
e9e2bad7 4728 suppress_warning (load_src, OPT_Wuninitialized);
a62b3dc5
JJ
4729 stmt = gimple_build_assign (tem, load_src);
4730 gimple_set_location (stmt, loc);
4731 gimple_set_vuse (stmt, new_vuse);
4732 gimple_seq_add_stmt_without_update (&seq, stmt);
4733
4734 /* FIXME: If there is a single chunk of zero bits in mask,
4735 perhaps use BIT_INSERT_EXPR instead? */
e362a897 4736 stmt = gimple_build_assign (make_ssa_name (dest_type),
a62b3dc5
JJ
4737 BIT_AND_EXPR, tem, mask);
4738 gimple_set_location (stmt, loc);
4739 gimple_seq_add_stmt_without_update (&seq, stmt);
4740 tem = gimple_assign_lhs (stmt);
4741
245f6de1 4742 if (TREE_CODE (src) == INTEGER_CST)
e362a897 4743 src = wide_int_to_tree (dest_type,
245f6de1
JJ
4744 wi::bit_and_not (wi::to_wide (src),
4745 wi::to_wide (mask)));
4746 else
4747 {
4748 tree nmask
e362a897 4749 = wide_int_to_tree (dest_type,
245f6de1 4750 wi::bit_not (wi::to_wide (mask)));
e362a897 4751 stmt = gimple_build_assign (make_ssa_name (dest_type),
245f6de1
JJ
4752 BIT_AND_EXPR, src, nmask);
4753 gimple_set_location (stmt, loc);
4754 gimple_seq_add_stmt_without_update (&seq, stmt);
4755 src = gimple_assign_lhs (stmt);
4756 }
e362a897 4757 stmt = gimple_build_assign (make_ssa_name (dest_type),
a62b3dc5
JJ
4758 BIT_IOR_EXPR, tem, src);
4759 gimple_set_location (stmt, loc);
4760 gimple_seq_add_stmt_without_update (&seq, stmt);
4761 src = gimple_assign_lhs (stmt);
4762 }
4763 }
f663d9ad
KT
4764
4765 stmt = gimple_build_assign (dest, src);
4766 gimple_set_location (stmt, loc);
4767 gimple_set_vuse (stmt, new_vuse);
4768 gimple_seq_add_stmt_without_update (&seq, stmt);
4769
629387a6
EB
4770 if (group->lp_nr && stmt_could_throw_p (cfun, stmt))
4771 add_stmt_to_eh_lp (stmt, group->lp_nr);
4772
f663d9ad
KT
4773 tree new_vdef;
4774 if (i < split_stores.length () - 1)
a62b3dc5 4775 new_vdef = make_ssa_name (gimple_vop (cfun), stmt);
f663d9ad
KT
4776 else
4777 new_vdef = last_vdef;
4778
4779 gimple_set_vdef (stmt, new_vdef);
4780 SSA_NAME_DEF_STMT (new_vdef) = stmt;
4781 new_vuse = new_vdef;
4782 }
4783
4784 FOR_EACH_VEC_ELT (split_stores, i, split_store)
4785 delete split_store;
4786
f663d9ad
KT
4787 gcc_assert (seq);
4788 if (dump_file)
4789 {
4790 fprintf (dump_file,
c94c3532 4791 "New sequence of %u stores to replace old one of %u stores\n",
a62b3dc5 4792 split_stores.length (), orig_num_stmts);
f663d9ad
KT
4793 if (dump_flags & TDF_DETAILS)
4794 print_gimple_seq (dump_file, seq, 0, TDF_VOPS | TDF_MEMSYMS);
4795 }
629387a6 4796
5384a802
JJ
4797 if (gimple_clobber_p (group->last_stmt))
4798 update_stmt (group->last_stmt);
4799
629387a6
EB
4800 if (group->lp_nr > 0)
4801 {
4802 /* We're going to insert a sequence of (potentially) throwing stores
4803 into an active EH region. This means that we're going to create
4804 new basic blocks with EH edges pointing to the post landing pad
4805 and, therefore, to have to update its PHI nodes, if any. For the
4806 virtual PHI node, we're going to use the VDEFs created above, but
4807 for the other nodes, we need to record the original reaching defs. */
4808 eh_landing_pad lp = get_eh_landing_pad_from_number (group->lp_nr);
4809 basic_block lp_bb = label_to_block (cfun, lp->post_landing_pad);
4810 basic_block last_bb = gimple_bb (group->last_stmt);
4811 edge last_edge = find_edge (last_bb, lp_bb);
4812 auto_vec<tree, 16> last_defs;
4813 gphi_iterator gpi;
4814 for (gpi = gsi_start_phis (lp_bb); !gsi_end_p (gpi); gsi_next (&gpi))
4815 {
4816 gphi *phi = gpi.phi ();
4817 tree last_def;
4818 if (virtual_operand_p (gimple_phi_result (phi)))
4819 last_def = NULL_TREE;
4820 else
4821 last_def = gimple_phi_arg_def (phi, last_edge->dest_idx);
4822 last_defs.safe_push (last_def);
4823 }
4824
4825 /* Do the insertion. Then, if new basic blocks have been created in the
4826 process, rewind the chain of VDEFs create above to walk the new basic
4827 blocks and update the corresponding arguments of the PHI nodes. */
4828 update_modified_stmts (seq);
4829 if (gimple_find_sub_bbs (seq, &last_gsi))
4830 while (last_vdef != gimple_vuse (group->last_stmt))
4831 {
4832 gimple *stmt = SSA_NAME_DEF_STMT (last_vdef);
4833 if (stmt_could_throw_p (cfun, stmt))
4834 {
4835 edge new_edge = find_edge (gimple_bb (stmt), lp_bb);
4836 unsigned int i;
4837 for (gpi = gsi_start_phis (lp_bb), i = 0;
4838 !gsi_end_p (gpi);
4839 gsi_next (&gpi), i++)
4840 {
4841 gphi *phi = gpi.phi ();
4842 tree new_def;
4843 if (virtual_operand_p (gimple_phi_result (phi)))
4844 new_def = last_vdef;
4845 else
4846 new_def = last_defs[i];
4847 add_phi_arg (phi, new_def, new_edge, UNKNOWN_LOCATION);
4848 }
4849 }
4850 last_vdef = gimple_vuse (stmt);
4851 }
4852 }
4853 else
4854 gsi_insert_seq_after (&last_gsi, seq, GSI_SAME_STMT);
4855
245f6de1
JJ
4856 for (int j = 0; j < 2; ++j)
4857 if (load_seq[j])
4858 gsi_insert_seq_after (&load_gsi[j], load_seq[j], GSI_SAME_STMT);
f663d9ad
KT
4859
4860 return true;
4861}
4862
4863/* Process the merged_store_group objects created in the coalescing phase.
4864 The stores are all against the base object BASE.
4865 Try to output the widened stores and delete the original statements if
4866 successful. Return true iff any changes were made. */
4867
4868bool
b5926e23 4869imm_store_chain_info::output_merged_stores ()
f663d9ad
KT
4870{
4871 unsigned int i;
4872 merged_store_group *merged_store;
4873 bool ret = false;
4874 FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_store)
4875 {
a95b474a
ML
4876 if (dbg_cnt (store_merging)
4877 && output_merged_store (merged_store))
f663d9ad
KT
4878 {
4879 unsigned int j;
4880 store_immediate_info *store;
4881 FOR_EACH_VEC_ELT (merged_store->stores, j, store)
4882 {
4883 gimple *stmt = store->stmt;
4884 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5384a802
JJ
4885 /* Don't remove clobbers, they are still useful even if
4886 everything is overwritten afterwards. */
4887 if (gimple_clobber_p (stmt))
4888 continue;
f663d9ad 4889 gsi_remove (&gsi, true);
629387a6
EB
4890 if (store->lp_nr)
4891 remove_stmt_from_eh_lp (stmt);
f663d9ad
KT
4892 if (stmt != merged_store->last_stmt)
4893 {
4894 unlink_stmt_vdef (stmt);
4895 release_defs (stmt);
4896 }
4897 }
4898 ret = true;
4899 }
4900 }
4901 if (ret && dump_file)
4902 fprintf (dump_file, "Merging successful!\n");
4903
4904 return ret;
4905}
4906
4907/* Coalesce the store_immediate_info objects recorded against the base object
4908 BASE in the first phase and output them.
4909 Delete the allocated structures.
4910 Return true if any changes were made. */
4911
4912bool
b5926e23 4913imm_store_chain_info::terminate_and_process_chain ()
f663d9ad 4914{
95d94b52
RB
4915 if (dump_file && (dump_flags & TDF_DETAILS))
4916 fprintf (dump_file, "Terminating chain with %u stores\n",
4917 m_store_info.length ());
f663d9ad
KT
4918 /* Process store chain. */
4919 bool ret = false;
4920 if (m_store_info.length () > 1)
4921 {
4922 ret = coalesce_immediate_stores ();
4923 if (ret)
b5926e23 4924 ret = output_merged_stores ();
f663d9ad
KT
4925 }
4926
4927 /* Delete all the entries we allocated ourselves. */
4928 store_immediate_info *info;
4929 unsigned int i;
4930 FOR_EACH_VEC_ELT (m_store_info, i, info)
4931 delete info;
4932
4933 merged_store_group *merged_info;
4934 FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_info)
4935 delete merged_info;
4936
4937 return ret;
4938}
4939
4940/* Return true iff LHS is a destination potentially interesting for
4941 store merging. In practice these are the codes that get_inner_reference
4942 can process. */
4943
4944static bool
4945lhs_valid_for_store_merging_p (tree lhs)
4946{
629387a6 4947 if (DECL_P (lhs))
f663d9ad
KT
4948 return true;
4949
629387a6
EB
4950 switch (TREE_CODE (lhs))
4951 {
4952 case ARRAY_REF:
4953 case ARRAY_RANGE_REF:
4954 case BIT_FIELD_REF:
4955 case COMPONENT_REF:
4956 case MEM_REF:
e362a897 4957 case VIEW_CONVERT_EXPR:
629387a6
EB
4958 return true;
4959 default:
4960 return false;
4961 }
f663d9ad
KT
4962}
4963
4964/* Return true if the tree RHS is a constant we want to consider
4965 during store merging. In practice accept all codes that
4966 native_encode_expr accepts. */
4967
4968static bool
4969rhs_valid_for_store_merging_p (tree rhs)
4970{
cf098191 4971 unsigned HOST_WIDE_INT size;
3afd514b 4972 if (TREE_CODE (rhs) == CONSTRUCTOR
3afd514b
JJ
4973 && CONSTRUCTOR_NELTS (rhs) == 0
4974 && TYPE_SIZE_UNIT (TREE_TYPE (rhs))
4975 && tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (rhs))))
4976 return true;
cf098191
RS
4977 return (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (rhs))).is_constant (&size)
4978 && native_encode_expr (rhs, NULL, size) != 0);
f663d9ad
KT
4979}
4980
629387a6
EB
4981/* Adjust *PBITPOS, *PBITREGION_START and *PBITREGION_END by BYTE_OFF bytes
4982 and return true on success or false on failure. */
4983
4984static bool
4985adjust_bit_pos (poly_offset_int byte_off,
4986 poly_int64 *pbitpos,
4987 poly_uint64 *pbitregion_start,
4988 poly_uint64 *pbitregion_end)
4989{
4990 poly_offset_int bit_off = byte_off << LOG2_BITS_PER_UNIT;
4991 bit_off += *pbitpos;
4992
4993 if (known_ge (bit_off, 0) && bit_off.to_shwi (pbitpos))
4994 {
4995 if (maybe_ne (*pbitregion_end, 0U))
4996 {
4997 bit_off = byte_off << LOG2_BITS_PER_UNIT;
4998 bit_off += *pbitregion_start;
4999 if (bit_off.to_uhwi (pbitregion_start))
5000 {
5001 bit_off = byte_off << LOG2_BITS_PER_UNIT;
5002 bit_off += *pbitregion_end;
5003 if (!bit_off.to_uhwi (pbitregion_end))
5004 *pbitregion_end = 0;
5005 }
5006 else
5007 *pbitregion_end = 0;
5008 }
5009 return true;
5010 }
5011 else
5012 return false;
5013}
5014
245f6de1
JJ
5015/* If MEM is a memory reference usable for store merging (either as
5016 store destination or for loads), return the non-NULL base_addr
5017 and set *PBITSIZE, *PBITPOS, *PBITREGION_START and *PBITREGION_END.
5018 Otherwise return NULL, *PBITPOS should be still valid even for that
5019 case. */
5020
5021static tree
8a91d545
RS
5022mem_valid_for_store_merging (tree mem, poly_uint64 *pbitsize,
5023 poly_uint64 *pbitpos,
5024 poly_uint64 *pbitregion_start,
5025 poly_uint64 *pbitregion_end)
245f6de1 5026{
8a91d545
RS
5027 poly_int64 bitsize, bitpos;
5028 poly_uint64 bitregion_start = 0, bitregion_end = 0;
245f6de1
JJ
5029 machine_mode mode;
5030 int unsignedp = 0, reversep = 0, volatilep = 0;
5031 tree offset;
5032 tree base_addr = get_inner_reference (mem, &bitsize, &bitpos, &offset, &mode,
5033 &unsignedp, &reversep, &volatilep);
5034 *pbitsize = bitsize;
387e818c 5035 if (known_le (bitsize, 0))
245f6de1
JJ
5036 return NULL_TREE;
5037
5038 if (TREE_CODE (mem) == COMPONENT_REF
5039 && DECL_BIT_FIELD_TYPE (TREE_OPERAND (mem, 1)))
5040 {
5041 get_bit_range (&bitregion_start, &bitregion_end, mem, &bitpos, &offset);
8a91d545
RS
5042 if (maybe_ne (bitregion_end, 0U))
5043 bitregion_end += 1;
245f6de1
JJ
5044 }
5045
5046 if (reversep)
5047 return NULL_TREE;
5048
5049 /* We do not want to rewrite TARGET_MEM_REFs. */
5050 if (TREE_CODE (base_addr) == TARGET_MEM_REF)
5051 return NULL_TREE;
5052 /* In some cases get_inner_reference may return a
5053 MEM_REF [ptr + byteoffset]. For the purposes of this pass
5054 canonicalize the base_addr to MEM_REF [ptr] and take
5055 byteoffset into account in the bitpos. This occurs in
5056 PR 23684 and this way we can catch more chains. */
5057 else if (TREE_CODE (base_addr) == MEM_REF)
5058 {
629387a6
EB
5059 if (!adjust_bit_pos (mem_ref_offset (base_addr), &bitpos,
5060 &bitregion_start, &bitregion_end))
245f6de1
JJ
5061 return NULL_TREE;
5062 base_addr = TREE_OPERAND (base_addr, 0);
5063 }
5064 /* get_inner_reference returns the base object, get at its
5065 address now. */
5066 else
5067 {
8a91d545 5068 if (maybe_lt (bitpos, 0))
245f6de1
JJ
5069 return NULL_TREE;
5070 base_addr = build_fold_addr_expr (base_addr);
5071 }
5072
629387a6 5073 if (offset)
245f6de1
JJ
5074 {
5075 /* If the access is variable offset then a base decl has to be
5076 address-taken to be able to emit pointer-based stores to it.
5077 ??? We might be able to get away with re-using the original
5078 base up to the first variable part and then wrapping that inside
5079 a BIT_FIELD_REF. */
5080 tree base = get_base_address (base_addr);
629387a6 5081 if (!base || (DECL_P (base) && !TREE_ADDRESSABLE (base)))
245f6de1
JJ
5082 return NULL_TREE;
5083
629387a6
EB
5084 /* Similarly to above for the base, remove constant from the offset. */
5085 if (TREE_CODE (offset) == PLUS_EXPR
5086 && TREE_CODE (TREE_OPERAND (offset, 1)) == INTEGER_CST
5087 && adjust_bit_pos (wi::to_poly_offset (TREE_OPERAND (offset, 1)),
5088 &bitpos, &bitregion_start, &bitregion_end))
5089 offset = TREE_OPERAND (offset, 0);
5090
245f6de1
JJ
5091 base_addr = build2 (POINTER_PLUS_EXPR, TREE_TYPE (base_addr),
5092 base_addr, offset);
5093 }
5094
629387a6
EB
5095 if (known_eq (bitregion_end, 0U))
5096 {
5097 bitregion_start = round_down_to_byte_boundary (bitpos);
5098 bitregion_end = round_up_to_byte_boundary (bitpos + bitsize);
5099 }
5100
245f6de1
JJ
5101 *pbitsize = bitsize;
5102 *pbitpos = bitpos;
5103 *pbitregion_start = bitregion_start;
5104 *pbitregion_end = bitregion_end;
5105 return base_addr;
5106}
5107
5108/* Return true if STMT is a load that can be used for store merging.
5109 In that case fill in *OP. BITSIZE, BITPOS, BITREGION_START and
5110 BITREGION_END are properties of the corresponding store. */
5111
5112static bool
5113handled_load (gimple *stmt, store_operand_info *op,
8a91d545
RS
5114 poly_uint64 bitsize, poly_uint64 bitpos,
5115 poly_uint64 bitregion_start, poly_uint64 bitregion_end)
245f6de1 5116{
383ac8dc 5117 if (!is_gimple_assign (stmt))
245f6de1 5118 return false;
383ac8dc
JJ
5119 if (gimple_assign_rhs_code (stmt) == BIT_NOT_EXPR)
5120 {
5121 tree rhs1 = gimple_assign_rhs1 (stmt);
5122 if (TREE_CODE (rhs1) == SSA_NAME
383ac8dc
JJ
5123 && handled_load (SSA_NAME_DEF_STMT (rhs1), op, bitsize, bitpos,
5124 bitregion_start, bitregion_end))
5125 {
d60edaba
JJ
5126 /* Don't allow _1 = load; _2 = ~1; _3 = ~_2; which should have
5127 been optimized earlier, but if allowed here, would confuse the
5128 multiple uses counting. */
5129 if (op->bit_not_p)
5130 return false;
383ac8dc
JJ
5131 op->bit_not_p = !op->bit_not_p;
5132 return true;
5133 }
5134 return false;
5135 }
5136 if (gimple_vuse (stmt)
5137 && gimple_assign_load_p (stmt)
36bbc05d 5138 && !stmt_can_throw_internal (cfun, stmt)
245f6de1
JJ
5139 && !gimple_has_volatile_ops (stmt))
5140 {
5141 tree mem = gimple_assign_rhs1 (stmt);
5142 op->base_addr
5143 = mem_valid_for_store_merging (mem, &op->bitsize, &op->bitpos,
5144 &op->bitregion_start,
5145 &op->bitregion_end);
5146 if (op->base_addr != NULL_TREE
8a91d545
RS
5147 && known_eq (op->bitsize, bitsize)
5148 && multiple_p (op->bitpos - bitpos, BITS_PER_UNIT)
5149 && known_ge (op->bitpos - op->bitregion_start,
5150 bitpos - bitregion_start)
5151 && known_ge (op->bitregion_end - op->bitpos,
5152 bitregion_end - bitpos))
245f6de1
JJ
5153 {
5154 op->stmt = stmt;
5155 op->val = mem;
383ac8dc 5156 op->bit_not_p = false;
245f6de1
JJ
5157 return true;
5158 }
5159 }
5160 return false;
5161}
5162
629387a6
EB
5163/* Return the index number of the landing pad for STMT, if any. */
5164
5165static int
5166lp_nr_for_store (gimple *stmt)
5167{
5168 if (!cfun->can_throw_non_call_exceptions || !cfun->eh)
5169 return 0;
5170
5171 if (!stmt_could_throw_p (cfun, stmt))
5172 return 0;
5173
5174 return lookup_stmt_eh_lp (stmt);
5175}
5176
245f6de1 5177/* Record the store STMT for store merging optimization if it can be
629387a6 5178 optimized. Return true if any changes were made. */
245f6de1 5179
629387a6 5180bool
245f6de1
JJ
5181pass_store_merging::process_store (gimple *stmt)
5182{
5183 tree lhs = gimple_assign_lhs (stmt);
5184 tree rhs = gimple_assign_rhs1 (stmt);
2c832ffe
SSF
5185 poly_uint64 bitsize, bitpos = 0;
5186 poly_uint64 bitregion_start = 0, bitregion_end = 0;
245f6de1
JJ
5187 tree base_addr
5188 = mem_valid_for_store_merging (lhs, &bitsize, &bitpos,
5189 &bitregion_start, &bitregion_end);
8a91d545 5190 if (known_eq (bitsize, 0U))
629387a6 5191 return false;
245f6de1
JJ
5192
5193 bool invalid = (base_addr == NULL_TREE
8a91d545
RS
5194 || (maybe_gt (bitsize,
5195 (unsigned int) MAX_BITSIZE_MODE_ANY_INT)
3afd514b
JJ
5196 && TREE_CODE (rhs) != INTEGER_CST
5197 && (TREE_CODE (rhs) != CONSTRUCTOR
5198 || CONSTRUCTOR_NELTS (rhs) != 0)));
245f6de1 5199 enum tree_code rhs_code = ERROR_MARK;
d60edaba 5200 bool bit_not_p = false;
4b84d9b8
JJ
5201 struct symbolic_number n;
5202 gimple *ins_stmt = NULL;
245f6de1
JJ
5203 store_operand_info ops[2];
5204 if (invalid)
5205 ;
e362a897
EB
5206 else if (TREE_CODE (rhs) == STRING_CST)
5207 {
5208 rhs_code = STRING_CST;
5209 ops[0].val = rhs;
5210 }
245f6de1
JJ
5211 else if (rhs_valid_for_store_merging_p (rhs))
5212 {
5213 rhs_code = INTEGER_CST;
5214 ops[0].val = rhs;
5215 }
e362a897 5216 else if (TREE_CODE (rhs) == SSA_NAME)
245f6de1
JJ
5217 {
5218 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs), *def_stmt1, *def_stmt2;
5219 if (!is_gimple_assign (def_stmt))
5220 invalid = true;
5221 else if (handled_load (def_stmt, &ops[0], bitsize, bitpos,
5222 bitregion_start, bitregion_end))
5223 rhs_code = MEM_REF;
d60edaba
JJ
5224 else if (gimple_assign_rhs_code (def_stmt) == BIT_NOT_EXPR)
5225 {
5226 tree rhs1 = gimple_assign_rhs1 (def_stmt);
5227 if (TREE_CODE (rhs1) == SSA_NAME
5228 && is_gimple_assign (SSA_NAME_DEF_STMT (rhs1)))
5229 {
5230 bit_not_p = true;
5231 def_stmt = SSA_NAME_DEF_STMT (rhs1);
5232 }
5233 }
c94c3532 5234
d60edaba 5235 if (rhs_code == ERROR_MARK && !invalid)
245f6de1
JJ
5236 switch ((rhs_code = gimple_assign_rhs_code (def_stmt)))
5237 {
5238 case BIT_AND_EXPR:
5239 case BIT_IOR_EXPR:
5240 case BIT_XOR_EXPR:
5241 tree rhs1, rhs2;
5242 rhs1 = gimple_assign_rhs1 (def_stmt);
5243 rhs2 = gimple_assign_rhs2 (def_stmt);
5244 invalid = true;
d7a9512e 5245 if (TREE_CODE (rhs1) != SSA_NAME)
245f6de1
JJ
5246 break;
5247 def_stmt1 = SSA_NAME_DEF_STMT (rhs1);
5248 if (!is_gimple_assign (def_stmt1)
5249 || !handled_load (def_stmt1, &ops[0], bitsize, bitpos,
5250 bitregion_start, bitregion_end))
5251 break;
5252 if (rhs_valid_for_store_merging_p (rhs2))
5253 ops[1].val = rhs2;
d7a9512e 5254 else if (TREE_CODE (rhs2) != SSA_NAME)
245f6de1
JJ
5255 break;
5256 else
5257 {
5258 def_stmt2 = SSA_NAME_DEF_STMT (rhs2);
5259 if (!is_gimple_assign (def_stmt2))
5260 break;
5261 else if (!handled_load (def_stmt2, &ops[1], bitsize, bitpos,
5262 bitregion_start, bitregion_end))
5263 break;
5264 }
5265 invalid = false;
5266 break;
5267 default:
5268 invalid = true;
5269 break;
5270 }
c94c3532 5271
8a91d545
RS
5272 unsigned HOST_WIDE_INT const_bitsize;
5273 if (bitsize.is_constant (&const_bitsize)
c94c3532 5274 && (const_bitsize % BITS_PER_UNIT) == 0
8a91d545 5275 && const_bitsize <= 64
c94c3532 5276 && multiple_p (bitpos, BITS_PER_UNIT))
4b84d9b8
JJ
5277 {
5278 ins_stmt = find_bswap_or_nop_1 (def_stmt, &n, 12);
5279 if (ins_stmt)
5280 {
5281 uint64_t nn = n.n;
5282 for (unsigned HOST_WIDE_INT i = 0;
8a91d545
RS
5283 i < const_bitsize;
5284 i += BITS_PER_UNIT, nn >>= BITS_PER_MARKER)
4b84d9b8
JJ
5285 if ((nn & MARKER_MASK) == 0
5286 || (nn & MARKER_MASK) == MARKER_BYTE_UNKNOWN)
5287 {
5288 ins_stmt = NULL;
5289 break;
5290 }
5291 if (ins_stmt)
5292 {
5293 if (invalid)
5294 {
5295 rhs_code = LROTATE_EXPR;
5296 ops[0].base_addr = NULL_TREE;
5297 ops[1].base_addr = NULL_TREE;
5298 }
5299 invalid = false;
5300 }
5301 }
5302 }
c94c3532
EB
5303
5304 if (invalid
5305 && bitsize.is_constant (&const_bitsize)
5306 && ((const_bitsize % BITS_PER_UNIT) != 0
5307 || !multiple_p (bitpos, BITS_PER_UNIT))
ed01d707 5308 && const_bitsize <= MAX_FIXED_MODE_SIZE)
c94c3532 5309 {
c14add82 5310 /* Bypass a conversion to the bit-field type. */
31a5d8c5
EB
5311 if (!bit_not_p
5312 && is_gimple_assign (def_stmt)
5313 && CONVERT_EXPR_CODE_P (rhs_code))
c94c3532
EB
5314 {
5315 tree rhs1 = gimple_assign_rhs1 (def_stmt);
5316 if (TREE_CODE (rhs1) == SSA_NAME
c14add82 5317 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1)))
c94c3532
EB
5318 rhs = rhs1;
5319 }
5320 rhs_code = BIT_INSERT_EXPR;
31a5d8c5 5321 bit_not_p = false;
c94c3532
EB
5322 ops[0].val = rhs;
5323 ops[0].base_addr = NULL_TREE;
5324 ops[1].base_addr = NULL_TREE;
5325 invalid = false;
5326 }
245f6de1 5327 }
e362a897
EB
5328 else
5329 invalid = true;
245f6de1 5330
8a91d545
RS
5331 unsigned HOST_WIDE_INT const_bitsize, const_bitpos;
5332 unsigned HOST_WIDE_INT const_bitregion_start, const_bitregion_end;
5333 if (invalid
5334 || !bitsize.is_constant (&const_bitsize)
5335 || !bitpos.is_constant (&const_bitpos)
5336 || !bitregion_start.is_constant (&const_bitregion_start)
5337 || !bitregion_end.is_constant (&const_bitregion_end))
629387a6 5338 return terminate_all_aliasing_chains (NULL, stmt);
245f6de1 5339
4b84d9b8
JJ
5340 if (!ins_stmt)
5341 memset (&n, 0, sizeof (n));
5342
99b1c316 5343 class imm_store_chain_info **chain_info = NULL;
629387a6 5344 bool ret = false;
383ac8dc
JJ
5345 if (base_addr)
5346 chain_info = m_stores.get (base_addr);
5347
245f6de1
JJ
5348 store_immediate_info *info;
5349 if (chain_info)
5350 {
5351 unsigned int ord = (*chain_info)->m_store_info.length ();
8a91d545
RS
5352 info = new store_immediate_info (const_bitsize, const_bitpos,
5353 const_bitregion_start,
5354 const_bitregion_end,
5355 stmt, ord, rhs_code, n, ins_stmt,
629387a6
EB
5356 bit_not_p, lp_nr_for_store (stmt),
5357 ops[0], ops[1]);
245f6de1
JJ
5358 if (dump_file && (dump_flags & TDF_DETAILS))
5359 {
5360 fprintf (dump_file, "Recording immediate store from stmt:\n");
5361 print_gimple_stmt (dump_file, stmt, 0);
5362 }
5363 (*chain_info)->m_store_info.safe_push (info);
95d94b52 5364 m_n_stores++;
629387a6 5365 ret |= terminate_all_aliasing_chains (chain_info, stmt);
245f6de1
JJ
5366 /* If we reach the limit of stores to merge in a chain terminate and
5367 process the chain now. */
5368 if ((*chain_info)->m_store_info.length ()
028d4092 5369 == (unsigned int) param_max_stores_to_merge)
245f6de1
JJ
5370 {
5371 if (dump_file && (dump_flags & TDF_DETAILS))
5372 fprintf (dump_file,
5373 "Reached maximum number of statements to merge:\n");
629387a6 5374 ret |= terminate_and_process_chain (*chain_info);
245f6de1 5375 }
245f6de1 5376 }
95d94b52
RB
5377 else
5378 {
5379 /* Store aliases any existing chain? */
5380 ret |= terminate_all_aliasing_chains (NULL, stmt);
245f6de1 5381
95d94b52
RB
5382 /* Start a new chain. */
5383 class imm_store_chain_info *new_chain
5384 = new imm_store_chain_info (m_stores_head, base_addr);
5385 info = new store_immediate_info (const_bitsize, const_bitpos,
5386 const_bitregion_start,
5387 const_bitregion_end,
5388 stmt, 0, rhs_code, n, ins_stmt,
5389 bit_not_p, lp_nr_for_store (stmt),
5390 ops[0], ops[1]);
5391 new_chain->m_store_info.safe_push (info);
5392 m_n_stores++;
5393 m_stores.put (base_addr, new_chain);
5394 m_n_chains++;
5395 if (dump_file && (dump_flags & TDF_DETAILS))
5396 {
5397 fprintf (dump_file, "Starting active chain number %u with statement:\n",
5398 m_n_chains);
5399 print_gimple_stmt (dump_file, stmt, 0);
5400 fprintf (dump_file, "The base object is:\n");
5401 print_generic_expr (dump_file, base_addr);
5402 fprintf (dump_file, "\n");
5403 }
5404 }
5405
5406 /* Prune oldest chains so that after adding the chain or store above
5407 we're again within the limits set by the params. */
5408 if (m_n_chains > (unsigned)param_max_store_chains_to_track
5409 || m_n_stores > (unsigned)param_max_stores_to_track)
245f6de1 5410 {
95d94b52
RB
5411 if (dump_file && (dump_flags & TDF_DETAILS))
5412 fprintf (dump_file, "Too many chains (%u > %d) or stores (%u > %d), "
5413 "terminating oldest chain(s).\n", m_n_chains,
5414 param_max_store_chains_to_track, m_n_stores,
5415 param_max_stores_to_track);
5416 imm_store_chain_info **e = &m_stores_head;
5417 unsigned idx = 0;
5418 unsigned n_stores = 0;
5419 while (*e)
5420 {
5421 if (idx >= (unsigned)param_max_store_chains_to_track
5422 || (n_stores + (*e)->m_store_info.length ()
5423 > (unsigned)param_max_stores_to_track))
8a8eee6b 5424 ret |= terminate_and_process_chain (*e);
95d94b52
RB
5425 else
5426 {
5427 n_stores += (*e)->m_store_info.length ();
5428 e = &(*e)->next;
5429 ++idx;
5430 }
5431 }
245f6de1 5432 }
95d94b52 5433
629387a6
EB
5434 return ret;
5435}
5436
5437/* Return true if STMT is a store valid for store merging. */
5438
5439static bool
5440store_valid_for_store_merging_p (gimple *stmt)
5441{
5442 return gimple_assign_single_p (stmt)
5443 && gimple_vdef (stmt)
5444 && lhs_valid_for_store_merging_p (gimple_assign_lhs (stmt))
5384a802 5445 && (!gimple_has_volatile_ops (stmt) || gimple_clobber_p (stmt));
629387a6
EB
5446}
5447
5448enum basic_block_status { BB_INVALID, BB_VALID, BB_EXTENDED_VALID };
5449
5450/* Return the status of basic block BB wrt store merging. */
5451
5452static enum basic_block_status
5453get_status_for_store_merging (basic_block bb)
5454{
5455 unsigned int num_statements = 0;
a7553ad6 5456 unsigned int num_constructors = 0;
629387a6
EB
5457 gimple_stmt_iterator gsi;
5458 edge e;
a591c71b 5459 gimple *last_stmt = NULL;
629387a6
EB
5460
5461 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5462 {
5463 gimple *stmt = gsi_stmt (gsi);
5464
5465 if (is_gimple_debug (stmt))
5466 continue;
5467
a591c71b
JJ
5468 last_stmt = stmt;
5469
629387a6
EB
5470 if (store_valid_for_store_merging_p (stmt) && ++num_statements >= 2)
5471 break;
a7553ad6
JJ
5472
5473 if (is_gimple_assign (stmt)
5474 && gimple_assign_rhs_code (stmt) == CONSTRUCTOR)
5475 {
5476 tree rhs = gimple_assign_rhs1 (stmt);
5477 if (VECTOR_TYPE_P (TREE_TYPE (rhs))
5478 && INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (rhs)))
5479 && gimple_assign_lhs (stmt) != NULL_TREE)
5480 {
5481 HOST_WIDE_INT sz
5482 = int_size_in_bytes (TREE_TYPE (rhs)) * BITS_PER_UNIT;
5483 if (sz == 16 || sz == 32 || sz == 64)
5484 {
5485 num_constructors = 1;
5486 break;
5487 }
5488 }
5489 }
629387a6
EB
5490 }
5491
a7553ad6 5492 if (num_statements == 0 && num_constructors == 0)
629387a6
EB
5493 return BB_INVALID;
5494
5495 if (cfun->can_throw_non_call_exceptions && cfun->eh
a591c71b 5496 && store_valid_for_store_merging_p (last_stmt)
629387a6
EB
5497 && (e = find_fallthru_edge (bb->succs))
5498 && e->dest == bb->next_bb)
5499 return BB_EXTENDED_VALID;
5500
a7553ad6 5501 return (num_statements >= 2 || num_constructors) ? BB_VALID : BB_INVALID;
245f6de1
JJ
5502}
5503
f663d9ad 5504/* Entry point for the pass. Go over each basic block recording chains of
245f6de1
JJ
5505 immediate stores. Upon encountering a terminating statement (as defined
5506 by stmt_terminates_chain_p) process the recorded stores and emit the widened
5507 variants. */
f663d9ad
KT
5508
5509unsigned int
5510pass_store_merging::execute (function *fun)
5511{
5512 basic_block bb;
5513 hash_set<gimple *> orig_stmts;
629387a6
EB
5514 bool changed = false, open_chains = false;
5515
5516 /* If the function can throw and catch non-call exceptions, we'll be trying
5517 to merge stores across different basic blocks so we need to first unsplit
5518 the EH edges in order to streamline the CFG of the function. */
5519 if (cfun->can_throw_non_call_exceptions && cfun->eh)
5520 unsplit_eh_edges ();
f663d9ad 5521
4b84d9b8
JJ
5522 calculate_dominance_info (CDI_DOMINATORS);
5523
f663d9ad
KT
5524 FOR_EACH_BB_FN (bb, fun)
5525 {
629387a6 5526 const basic_block_status bb_status = get_status_for_store_merging (bb);
f663d9ad 5527 gimple_stmt_iterator gsi;
f663d9ad 5528
629387a6
EB
5529 if (open_chains && (bb_status == BB_INVALID || !single_pred_p (bb)))
5530 {
5531 changed |= terminate_and_process_all_chains ();
5532 open_chains = false;
f663d9ad
KT
5533 }
5534
629387a6 5535 if (bb_status == BB_INVALID)
f663d9ad
KT
5536 continue;
5537
5538 if (dump_file && (dump_flags & TDF_DETAILS))
5539 fprintf (dump_file, "Processing basic block <%d>:\n", bb->index);
5540
a7553ad6 5541 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); )
f663d9ad
KT
5542 {
5543 gimple *stmt = gsi_stmt (gsi);
a7553ad6 5544 gsi_next (&gsi);
f663d9ad 5545
50b6d676
AO
5546 if (is_gimple_debug (stmt))
5547 continue;
5548
5384a802 5549 if (gimple_has_volatile_ops (stmt) && !gimple_clobber_p (stmt))
f663d9ad
KT
5550 {
5551 /* Terminate all chains. */
5552 if (dump_file && (dump_flags & TDF_DETAILS))
5553 fprintf (dump_file, "Volatile access terminates "
5554 "all chains\n");
629387a6
EB
5555 changed |= terminate_and_process_all_chains ();
5556 open_chains = false;
f663d9ad
KT
5557 continue;
5558 }
5559
a7553ad6
JJ
5560 if (is_gimple_assign (stmt)
5561 && gimple_assign_rhs_code (stmt) == CONSTRUCTOR
5562 && maybe_optimize_vector_constructor (stmt))
5563 continue;
5564
629387a6
EB
5565 if (store_valid_for_store_merging_p (stmt))
5566 changed |= process_store (stmt);
245f6de1 5567 else
629387a6
EB
5568 changed |= terminate_all_aliasing_chains (NULL, stmt);
5569 }
5570
5571 if (bb_status == BB_EXTENDED_VALID)
5572 open_chains = true;
5573 else
5574 {
5575 changed |= terminate_and_process_all_chains ();
5576 open_chains = false;
f663d9ad 5577 }
f663d9ad 5578 }
629387a6
EB
5579
5580 if (open_chains)
5581 changed |= terminate_and_process_all_chains ();
5582
5583 /* If the function can throw and catch non-call exceptions and something
5584 changed during the pass, then the CFG has (very likely) changed too. */
5585 if (cfun->can_throw_non_call_exceptions && cfun->eh && changed)
5586 {
5587 free_dominance_info (CDI_DOMINATORS);
5588 return TODO_cleanup_cfg;
5589 }
5590
f663d9ad
KT
5591 return 0;
5592}
5593
5594} // anon namespace
5595
5596/* Construct and return a store merging pass object. */
5597
5598gimple_opt_pass *
5599make_pass_store_merging (gcc::context *ctxt)
5600{
5601 return new pass_store_merging (ctxt);
5602}
c22d8787
KT
5603
5604#if CHECKING_P
5605
5606namespace selftest {
5607
5608/* Selftests for store merging helpers. */
5609
5610/* Assert that all elements of the byte arrays X and Y, both of length N
5611 are equal. */
5612
5613static void
5614verify_array_eq (unsigned char *x, unsigned char *y, unsigned int n)
5615{
5616 for (unsigned int i = 0; i < n; i++)
5617 {
5618 if (x[i] != y[i])
5619 {
5620 fprintf (stderr, "Arrays do not match. X:\n");
5621 dump_char_array (stderr, x, n);
5622 fprintf (stderr, "Y:\n");
5623 dump_char_array (stderr, y, n);
5624 }
5625 ASSERT_EQ (x[i], y[i]);
5626 }
5627}
5628
8aba425f 5629/* Test shift_bytes_in_array_left and that it carries bits across between
c22d8787
KT
5630 bytes correctly. */
5631
5632static void
8aba425f 5633verify_shift_bytes_in_array_left (void)
c22d8787
KT
5634{
5635 /* byte 1 | byte 0
5636 00011111 | 11100000. */
5637 unsigned char orig[2] = { 0xe0, 0x1f };
5638 unsigned char in[2];
5639 memcpy (in, orig, sizeof orig);
5640
5641 unsigned char expected[2] = { 0x80, 0x7f };
8aba425f 5642 shift_bytes_in_array_left (in, sizeof (in), 2);
c22d8787
KT
5643 verify_array_eq (in, expected, sizeof (in));
5644
5645 memcpy (in, orig, sizeof orig);
5646 memcpy (expected, orig, sizeof orig);
5647 /* Check that shifting by zero doesn't change anything. */
8aba425f 5648 shift_bytes_in_array_left (in, sizeof (in), 0);
c22d8787
KT
5649 verify_array_eq (in, expected, sizeof (in));
5650
5651}
5652
5653/* Test shift_bytes_in_array_right and that it carries bits across between
5654 bytes correctly. */
5655
5656static void
5657verify_shift_bytes_in_array_right (void)
5658{
5659 /* byte 1 | byte 0
5660 00011111 | 11100000. */
5661 unsigned char orig[2] = { 0x1f, 0xe0};
5662 unsigned char in[2];
5663 memcpy (in, orig, sizeof orig);
5664 unsigned char expected[2] = { 0x07, 0xf8};
5665 shift_bytes_in_array_right (in, sizeof (in), 2);
5666 verify_array_eq (in, expected, sizeof (in));
5667
5668 memcpy (in, orig, sizeof orig);
5669 memcpy (expected, orig, sizeof orig);
5670 /* Check that shifting by zero doesn't change anything. */
5671 shift_bytes_in_array_right (in, sizeof (in), 0);
5672 verify_array_eq (in, expected, sizeof (in));
5673}
5674
5675/* Test clear_bit_region that it clears exactly the bits asked and
5676 nothing more. */
5677
5678static void
5679verify_clear_bit_region (void)
5680{
5681 /* Start with all bits set and test clearing various patterns in them. */
5682 unsigned char orig[3] = { 0xff, 0xff, 0xff};
5683 unsigned char in[3];
5684 unsigned char expected[3];
5685 memcpy (in, orig, sizeof in);
5686
5687 /* Check zeroing out all the bits. */
5688 clear_bit_region (in, 0, 3 * BITS_PER_UNIT);
5689 expected[0] = expected[1] = expected[2] = 0;
5690 verify_array_eq (in, expected, sizeof in);
5691
5692 memcpy (in, orig, sizeof in);
5693 /* Leave the first and last bits intact. */
5694 clear_bit_region (in, 1, 3 * BITS_PER_UNIT - 2);
5695 expected[0] = 0x1;
5696 expected[1] = 0;
5697 expected[2] = 0x80;
5698 verify_array_eq (in, expected, sizeof in);
5699}
5700
5384a802 5701/* Test clear_bit_region_be that it clears exactly the bits asked and
c22d8787
KT
5702 nothing more. */
5703
5704static void
5705verify_clear_bit_region_be (void)
5706{
5707 /* Start with all bits set and test clearing various patterns in them. */
5708 unsigned char orig[3] = { 0xff, 0xff, 0xff};
5709 unsigned char in[3];
5710 unsigned char expected[3];
5711 memcpy (in, orig, sizeof in);
5712
5713 /* Check zeroing out all the bits. */
5714 clear_bit_region_be (in, BITS_PER_UNIT - 1, 3 * BITS_PER_UNIT);
5715 expected[0] = expected[1] = expected[2] = 0;
5716 verify_array_eq (in, expected, sizeof in);
5717
5718 memcpy (in, orig, sizeof in);
5719 /* Leave the first and last bits intact. */
5720 clear_bit_region_be (in, BITS_PER_UNIT - 2, 3 * BITS_PER_UNIT - 2);
5721 expected[0] = 0x80;
5722 expected[1] = 0;
5723 expected[2] = 0x1;
5724 verify_array_eq (in, expected, sizeof in);
5725}
5726
5727
5728/* Run all of the selftests within this file. */
5729
5730void
d5148d4f 5731store_merging_cc_tests (void)
c22d8787 5732{
8aba425f 5733 verify_shift_bytes_in_array_left ();
c22d8787
KT
5734 verify_shift_bytes_in_array_right ();
5735 verify_clear_bit_region ();
5736 verify_clear_bit_region_be ();
5737}
5738
5739} // namespace selftest
5740#endif /* CHECKING_P. */