]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/gimple-ssa-store-merging.c
Update copyright years.
[thirdparty/gcc.git] / gcc / gimple-ssa-store-merging.c
CommitLineData
dffec8eb 1/* GIMPLE store merging and byte swapping passes.
99dee823 2 Copyright (C) 2009-2021 Free Software Foundation, Inc.
f663d9ad
KT
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
c94c3532
EB
21/* The purpose of the store merging pass is to combine multiple memory stores
22 of constant values, values loaded from memory, bitwise operations on those,
23 or bit-field values, to consecutive locations, into fewer wider stores.
24
f663d9ad
KT
25 For example, if we have a sequence peforming four byte stores to
26 consecutive memory locations:
27 [p ] := imm1;
28 [p + 1B] := imm2;
29 [p + 2B] := imm3;
30 [p + 3B] := imm4;
31 we can transform this into a single 4-byte store if the target supports it:
c94c3532 32 [p] := imm1:imm2:imm3:imm4 concatenated according to endianness.
f663d9ad 33
245f6de1
JJ
34 Or:
35 [p ] := [q ];
36 [p + 1B] := [q + 1B];
37 [p + 2B] := [q + 2B];
38 [p + 3B] := [q + 3B];
39 if there is no overlap can be transformed into a single 4-byte
40 load followed by single 4-byte store.
41
42 Or:
43 [p ] := [q ] ^ imm1;
44 [p + 1B] := [q + 1B] ^ imm2;
45 [p + 2B] := [q + 2B] ^ imm3;
46 [p + 3B] := [q + 3B] ^ imm4;
47 if there is no overlap can be transformed into a single 4-byte
48 load, xored with imm1:imm2:imm3:imm4 and stored using a single 4-byte store.
49
c94c3532
EB
50 Or:
51 [p:1 ] := imm;
52 [p:31] := val & 0x7FFFFFFF;
53 we can transform this into a single 4-byte store if the target supports it:
54 [p] := imm:(val & 0x7FFFFFFF) concatenated according to endianness.
55
f663d9ad
KT
56 The algorithm is applied to each basic block in three phases:
57
c94c3532
EB
58 1) Scan through the basic block and record assignments to destinations
59 that can be expressed as a store to memory of a certain size at a certain
60 bit offset from base expressions we can handle. For bit-fields we also
61 record the surrounding bit region, i.e. bits that could be stored in
245f6de1
JJ
62 a read-modify-write operation when storing the bit-field. Record store
63 chains to different bases in a hash_map (m_stores) and make sure to
700d4cb0 64 terminate such chains when appropriate (for example when the stored
245f6de1 65 values get used subsequently).
f663d9ad
KT
66 These stores can be a result of structure element initializers, array stores
67 etc. A store_immediate_info object is recorded for every such store.
68 Record as many such assignments to a single base as possible until a
69 statement that interferes with the store sequence is encountered.
c94c3532
EB
70 Each store has up to 2 operands, which can be a either constant, a memory
71 load or an SSA name, from which the value to be stored can be computed.
245f6de1
JJ
72 At most one of the operands can be a constant. The operands are recorded
73 in store_operand_info struct.
f663d9ad 74
c94c3532 75 2) Analyze the chains of stores recorded in phase 1) (i.e. the vector of
f663d9ad 76 store_immediate_info objects) and coalesce contiguous stores into
c94c3532 77 merged_store_group objects. For bit-field stores, we don't need to
245f6de1
JJ
78 require the stores to be contiguous, just their surrounding bit regions
79 have to be contiguous. If the expression being stored is different
80 between adjacent stores, such as one store storing a constant and
81 following storing a value loaded from memory, or if the loaded memory
82 objects are not adjacent, a new merged_store_group is created as well.
f663d9ad
KT
83
84 For example, given the stores:
85 [p ] := 0;
86 [p + 1B] := 1;
87 [p + 3B] := 0;
88 [p + 4B] := 1;
89 [p + 5B] := 0;
90 [p + 6B] := 0;
91 This phase would produce two merged_store_group objects, one recording the
92 two bytes stored in the memory region [p : p + 1] and another
93 recording the four bytes stored in the memory region [p + 3 : p + 6].
94
95 3) The merged_store_group objects produced in phase 2) are processed
96 to generate the sequence of wider stores that set the contiguous memory
97 regions to the sequence of bytes that correspond to it. This may emit
98 multiple stores per store group to handle contiguous stores that are not
99 of a size that is a power of 2. For example it can try to emit a 40-bit
100 store as a 32-bit store followed by an 8-bit store.
c94c3532
EB
101 We try to emit as wide stores as we can while respecting STRICT_ALIGNMENT
102 or TARGET_SLOW_UNALIGNED_ACCESS settings.
f663d9ad
KT
103
104 Note on endianness and example:
105 Consider 2 contiguous 16-bit stores followed by 2 contiguous 8-bit stores:
106 [p ] := 0x1234;
107 [p + 2B] := 0x5678;
108 [p + 4B] := 0xab;
109 [p + 5B] := 0xcd;
110
111 The memory layout for little-endian (LE) and big-endian (BE) must be:
112 p |LE|BE|
113 ---------
114 0 |34|12|
115 1 |12|34|
116 2 |78|56|
117 3 |56|78|
118 4 |ab|ab|
119 5 |cd|cd|
120
121 To merge these into a single 48-bit merged value 'val' in phase 2)
122 on little-endian we insert stores to higher (consecutive) bitpositions
123 into the most significant bits of the merged value.
124 The final merged value would be: 0xcdab56781234
125
126 For big-endian we insert stores to higher bitpositions into the least
127 significant bits of the merged value.
128 The final merged value would be: 0x12345678abcd
129
130 Then, in phase 3), we want to emit this 48-bit value as a 32-bit store
131 followed by a 16-bit store. Again, we must consider endianness when
132 breaking down the 48-bit value 'val' computed above.
133 For little endian we emit:
134 [p] (32-bit) := 0x56781234; // val & 0x0000ffffffff;
135 [p + 4B] (16-bit) := 0xcdab; // (val & 0xffff00000000) >> 32;
136
137 Whereas for big-endian we emit:
138 [p] (32-bit) := 0x12345678; // (val & 0xffffffff0000) >> 16;
139 [p + 4B] (16-bit) := 0xabcd; // val & 0x00000000ffff; */
140
141#include "config.h"
142#include "system.h"
143#include "coretypes.h"
144#include "backend.h"
145#include "tree.h"
146#include "gimple.h"
147#include "builtins.h"
148#include "fold-const.h"
149#include "tree-pass.h"
150#include "ssa.h"
151#include "gimple-pretty-print.h"
152#include "alias.h"
153#include "fold-const.h"
f663d9ad
KT
154#include "print-tree.h"
155#include "tree-hash-traits.h"
156#include "gimple-iterator.h"
157#include "gimplify.h"
c94c3532 158#include "gimple-fold.h"
f663d9ad
KT
159#include "stor-layout.h"
160#include "timevar.h"
629387a6
EB
161#include "cfganal.h"
162#include "cfgcleanup.h"
f663d9ad 163#include "tree-cfg.h"
629387a6 164#include "except.h"
f663d9ad
KT
165#include "tree-eh.h"
166#include "target.h"
aa55dc0c 167#include "gimplify-me.h"
a62b3dc5
JJ
168#include "rtl.h"
169#include "expr.h" /* For get_bit_range. */
dffec8eb 170#include "optabs-tree.h"
a95b474a 171#include "dbgcnt.h"
c22d8787 172#include "selftest.h"
f663d9ad
KT
173
174/* The maximum size (in bits) of the stores this pass should generate. */
175#define MAX_STORE_BITSIZE (BITS_PER_WORD)
176#define MAX_STORE_BYTES (MAX_STORE_BITSIZE / BITS_PER_UNIT)
177
245f6de1
JJ
178/* Limit to bound the number of aliasing checks for loads with the same
179 vuse as the corresponding store. */
180#define MAX_STORE_ALIAS_CHECKS 64
181
f663d9ad
KT
182namespace {
183
bebadeca 184struct bswap_stat
dffec8eb
JJ
185{
186 /* Number of hand-written 16-bit nop / bswaps found. */
187 int found_16bit;
188
189 /* Number of hand-written 32-bit nop / bswaps found. */
190 int found_32bit;
191
192 /* Number of hand-written 64-bit nop / bswaps found. */
193 int found_64bit;
194} nop_stats, bswap_stats;
195
196/* A symbolic number structure is used to detect byte permutation and selection
197 patterns of a source. To achieve that, its field N contains an artificial
198 number consisting of BITS_PER_MARKER sized markers tracking where does each
199 byte come from in the source:
200
201 0 - target byte has the value 0
202 FF - target byte has an unknown value (eg. due to sign extension)
203 1..size - marker value is the byte index in the source (0 for lsb).
204
205 To detect permutations on memory sources (arrays and structures), a symbolic
206 number is also associated:
207 - a base address BASE_ADDR and an OFFSET giving the address of the source;
208 - a range which gives the difference between the highest and lowest accessed
209 memory location to make such a symbolic number;
210 - the address SRC of the source element of lowest address as a convenience
211 to easily get BASE_ADDR + offset + lowest bytepos;
212 - number of expressions N_OPS bitwise ored together to represent
213 approximate cost of the computation.
214
215 Note 1: the range is different from size as size reflects the size of the
216 type of the current expression. For instance, for an array char a[],
217 (short) a[0] | (short) a[3] would have a size of 2 but a range of 4 while
218 (short) a[0] | ((short) a[0] << 1) would still have a size of 2 but this
219 time a range of 1.
220
221 Note 2: for non-memory sources, range holds the same value as size.
222
223 Note 3: SRC points to the SSA_NAME in case of non-memory source. */
224
225struct symbolic_number {
226 uint64_t n;
227 tree type;
228 tree base_addr;
229 tree offset;
4a022c70 230 poly_int64_pod bytepos;
dffec8eb
JJ
231 tree src;
232 tree alias_set;
233 tree vuse;
234 unsigned HOST_WIDE_INT range;
235 int n_ops;
236};
237
238#define BITS_PER_MARKER 8
239#define MARKER_MASK ((1 << BITS_PER_MARKER) - 1)
240#define MARKER_BYTE_UNKNOWN MARKER_MASK
241#define HEAD_MARKER(n, size) \
242 ((n) & ((uint64_t) MARKER_MASK << (((size) - 1) * BITS_PER_MARKER)))
243
244/* The number which the find_bswap_or_nop_1 result should match in
245 order to have a nop. The number is masked according to the size of
246 the symbolic number before using it. */
247#define CMPNOP (sizeof (int64_t) < 8 ? 0 : \
248 (uint64_t)0x08070605 << 32 | 0x04030201)
249
250/* The number which the find_bswap_or_nop_1 result should match in
251 order to have a byte swap. The number is masked according to the
252 size of the symbolic number before using it. */
253#define CMPXCHG (sizeof (int64_t) < 8 ? 0 : \
254 (uint64_t)0x01020304 << 32 | 0x05060708)
255
256/* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
257 number N. Return false if the requested operation is not permitted
258 on a symbolic number. */
259
260inline bool
261do_shift_rotate (enum tree_code code,
262 struct symbolic_number *n,
263 int count)
264{
265 int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
266 unsigned head_marker;
267
444cda74
JJ
268 if (count < 0
269 || count >= TYPE_PRECISION (n->type)
270 || count % BITS_PER_UNIT != 0)
dffec8eb
JJ
271 return false;
272 count = (count / BITS_PER_UNIT) * BITS_PER_MARKER;
273
274 /* Zero out the extra bits of N in order to avoid them being shifted
275 into the significant bits. */
276 if (size < 64 / BITS_PER_MARKER)
277 n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
278
279 switch (code)
280 {
281 case LSHIFT_EXPR:
282 n->n <<= count;
283 break;
284 case RSHIFT_EXPR:
285 head_marker = HEAD_MARKER (n->n, size);
286 n->n >>= count;
287 /* Arithmetic shift of signed type: result is dependent on the value. */
288 if (!TYPE_UNSIGNED (n->type) && head_marker)
289 for (i = 0; i < count / BITS_PER_MARKER; i++)
290 n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
291 << ((size - 1 - i) * BITS_PER_MARKER);
292 break;
293 case LROTATE_EXPR:
294 n->n = (n->n << count) | (n->n >> ((size * BITS_PER_MARKER) - count));
295 break;
296 case RROTATE_EXPR:
297 n->n = (n->n >> count) | (n->n << ((size * BITS_PER_MARKER) - count));
298 break;
299 default:
300 return false;
301 }
302 /* Zero unused bits for size. */
303 if (size < 64 / BITS_PER_MARKER)
304 n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
305 return true;
306}
307
308/* Perform sanity checking for the symbolic number N and the gimple
309 statement STMT. */
310
311inline bool
312verify_symbolic_number_p (struct symbolic_number *n, gimple *stmt)
313{
314 tree lhs_type;
315
316 lhs_type = gimple_expr_type (stmt);
317
5ea39b24
JJ
318 if (TREE_CODE (lhs_type) != INTEGER_TYPE
319 && TREE_CODE (lhs_type) != ENUMERAL_TYPE)
dffec8eb
JJ
320 return false;
321
322 if (TYPE_PRECISION (lhs_type) != TYPE_PRECISION (n->type))
323 return false;
324
325 return true;
326}
327
328/* Initialize the symbolic number N for the bswap pass from the base element
329 SRC manipulated by the bitwise OR expression. */
330
331bool
332init_symbolic_number (struct symbolic_number *n, tree src)
333{
334 int size;
335
336 if (! INTEGRAL_TYPE_P (TREE_TYPE (src)))
337 return false;
338
339 n->base_addr = n->offset = n->alias_set = n->vuse = NULL_TREE;
340 n->src = src;
341
342 /* Set up the symbolic number N by setting each byte to a value between 1 and
343 the byte size of rhs1. The highest order byte is set to n->size and the
344 lowest order byte to 1. */
345 n->type = TREE_TYPE (src);
346 size = TYPE_PRECISION (n->type);
347 if (size % BITS_PER_UNIT != 0)
348 return false;
349 size /= BITS_PER_UNIT;
350 if (size > 64 / BITS_PER_MARKER)
351 return false;
352 n->range = size;
353 n->n = CMPNOP;
354 n->n_ops = 1;
355
356 if (size < 64 / BITS_PER_MARKER)
357 n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
358
359 return true;
360}
361
362/* Check if STMT might be a byte swap or a nop from a memory source and returns
363 the answer. If so, REF is that memory source and the base of the memory area
364 accessed and the offset of the access from that base are recorded in N. */
365
366bool
367find_bswap_or_nop_load (gimple *stmt, tree ref, struct symbolic_number *n)
368{
369 /* Leaf node is an array or component ref. Memorize its base and
370 offset from base to compare to other such leaf node. */
f37fac2b 371 poly_int64 bitsize, bitpos, bytepos;
dffec8eb
JJ
372 machine_mode mode;
373 int unsignedp, reversep, volatilep;
374 tree offset, base_addr;
375
376 /* Not prepared to handle PDP endian. */
377 if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
378 return false;
379
380 if (!gimple_assign_load_p (stmt) || gimple_has_volatile_ops (stmt))
381 return false;
382
383 base_addr = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
384 &unsignedp, &reversep, &volatilep);
385
4b84d9b8
JJ
386 if (TREE_CODE (base_addr) == TARGET_MEM_REF)
387 /* Do not rewrite TARGET_MEM_REF. */
388 return false;
389 else if (TREE_CODE (base_addr) == MEM_REF)
dffec8eb 390 {
3fed2ce9 391 poly_offset_int bit_offset = 0;
dffec8eb
JJ
392 tree off = TREE_OPERAND (base_addr, 1);
393
394 if (!integer_zerop (off))
395 {
3fed2ce9
RS
396 poly_offset_int boff = mem_ref_offset (base_addr);
397 boff <<= LOG2_BITS_PER_UNIT;
dffec8eb
JJ
398 bit_offset += boff;
399 }
400
401 base_addr = TREE_OPERAND (base_addr, 0);
402
403 /* Avoid returning a negative bitpos as this may wreak havoc later. */
3fed2ce9 404 if (maybe_lt (bit_offset, 0))
dffec8eb 405 {
3fed2ce9
RS
406 tree byte_offset = wide_int_to_tree
407 (sizetype, bits_to_bytes_round_down (bit_offset));
408 bit_offset = num_trailing_bits (bit_offset);
dffec8eb 409 if (offset)
3fed2ce9 410 offset = size_binop (PLUS_EXPR, offset, byte_offset);
dffec8eb 411 else
3fed2ce9 412 offset = byte_offset;
dffec8eb
JJ
413 }
414
3fed2ce9 415 bitpos += bit_offset.force_shwi ();
dffec8eb 416 }
4b84d9b8
JJ
417 else
418 base_addr = build_fold_addr_expr (base_addr);
dffec8eb 419
f37fac2b 420 if (!multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
dffec8eb 421 return false;
f37fac2b 422 if (!multiple_p (bitsize, BITS_PER_UNIT))
dffec8eb
JJ
423 return false;
424 if (reversep)
425 return false;
426
427 if (!init_symbolic_number (n, ref))
428 return false;
429 n->base_addr = base_addr;
430 n->offset = offset;
f37fac2b 431 n->bytepos = bytepos;
dffec8eb
JJ
432 n->alias_set = reference_alias_ptr_type (ref);
433 n->vuse = gimple_vuse (stmt);
434 return true;
435}
436
437/* Compute the symbolic number N representing the result of a bitwise OR on 2
438 symbolic number N1 and N2 whose source statements are respectively
439 SOURCE_STMT1 and SOURCE_STMT2. */
440
441gimple *
442perform_symbolic_merge (gimple *source_stmt1, struct symbolic_number *n1,
443 gimple *source_stmt2, struct symbolic_number *n2,
444 struct symbolic_number *n)
445{
446 int i, size;
447 uint64_t mask;
448 gimple *source_stmt;
449 struct symbolic_number *n_start;
450
451 tree rhs1 = gimple_assign_rhs1 (source_stmt1);
452 if (TREE_CODE (rhs1) == BIT_FIELD_REF
453 && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
454 rhs1 = TREE_OPERAND (rhs1, 0);
455 tree rhs2 = gimple_assign_rhs1 (source_stmt2);
456 if (TREE_CODE (rhs2) == BIT_FIELD_REF
457 && TREE_CODE (TREE_OPERAND (rhs2, 0)) == SSA_NAME)
458 rhs2 = TREE_OPERAND (rhs2, 0);
459
460 /* Sources are different, cancel bswap if they are not memory location with
461 the same base (array, structure, ...). */
462 if (rhs1 != rhs2)
463 {
464 uint64_t inc;
4a022c70 465 HOST_WIDE_INT start1, start2, start_sub, end_sub, end1, end2, end;
dffec8eb
JJ
466 struct symbolic_number *toinc_n_ptr, *n_end;
467 basic_block bb1, bb2;
468
469 if (!n1->base_addr || !n2->base_addr
470 || !operand_equal_p (n1->base_addr, n2->base_addr, 0))
471 return NULL;
472
473 if (!n1->offset != !n2->offset
474 || (n1->offset && !operand_equal_p (n1->offset, n2->offset, 0)))
475 return NULL;
476
4a022c70
RS
477 start1 = 0;
478 if (!(n2->bytepos - n1->bytepos).is_constant (&start2))
479 return NULL;
480
481 if (start1 < start2)
dffec8eb
JJ
482 {
483 n_start = n1;
4a022c70 484 start_sub = start2 - start1;
dffec8eb
JJ
485 }
486 else
487 {
488 n_start = n2;
4a022c70 489 start_sub = start1 - start2;
dffec8eb
JJ
490 }
491
492 bb1 = gimple_bb (source_stmt1);
493 bb2 = gimple_bb (source_stmt2);
494 if (dominated_by_p (CDI_DOMINATORS, bb1, bb2))
495 source_stmt = source_stmt1;
496 else
497 source_stmt = source_stmt2;
498
499 /* Find the highest address at which a load is performed and
500 compute related info. */
4a022c70
RS
501 end1 = start1 + (n1->range - 1);
502 end2 = start2 + (n2->range - 1);
dffec8eb
JJ
503 if (end1 < end2)
504 {
505 end = end2;
506 end_sub = end2 - end1;
507 }
508 else
509 {
510 end = end1;
511 end_sub = end1 - end2;
512 }
513 n_end = (end2 > end1) ? n2 : n1;
514
515 /* Find symbolic number whose lsb is the most significant. */
516 if (BYTES_BIG_ENDIAN)
517 toinc_n_ptr = (n_end == n1) ? n2 : n1;
518 else
519 toinc_n_ptr = (n_start == n1) ? n2 : n1;
520
4a022c70 521 n->range = end - MIN (start1, start2) + 1;
dffec8eb
JJ
522
523 /* Check that the range of memory covered can be represented by
524 a symbolic number. */
525 if (n->range > 64 / BITS_PER_MARKER)
526 return NULL;
527
528 /* Reinterpret byte marks in symbolic number holding the value of
529 bigger weight according to target endianness. */
530 inc = BYTES_BIG_ENDIAN ? end_sub : start_sub;
531 size = TYPE_PRECISION (n1->type) / BITS_PER_UNIT;
532 for (i = 0; i < size; i++, inc <<= BITS_PER_MARKER)
533 {
534 unsigned marker
535 = (toinc_n_ptr->n >> (i * BITS_PER_MARKER)) & MARKER_MASK;
536 if (marker && marker != MARKER_BYTE_UNKNOWN)
537 toinc_n_ptr->n += inc;
538 }
539 }
540 else
541 {
542 n->range = n1->range;
543 n_start = n1;
544 source_stmt = source_stmt1;
545 }
546
547 if (!n1->alias_set
548 || alias_ptr_types_compatible_p (n1->alias_set, n2->alias_set))
549 n->alias_set = n1->alias_set;
550 else
551 n->alias_set = ptr_type_node;
552 n->vuse = n_start->vuse;
553 n->base_addr = n_start->base_addr;
554 n->offset = n_start->offset;
555 n->src = n_start->src;
556 n->bytepos = n_start->bytepos;
557 n->type = n_start->type;
558 size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
559
560 for (i = 0, mask = MARKER_MASK; i < size; i++, mask <<= BITS_PER_MARKER)
561 {
562 uint64_t masked1, masked2;
563
564 masked1 = n1->n & mask;
565 masked2 = n2->n & mask;
566 if (masked1 && masked2 && masked1 != masked2)
567 return NULL;
568 }
569 n->n = n1->n | n2->n;
570 n->n_ops = n1->n_ops + n2->n_ops;
571
572 return source_stmt;
573}
574
575/* find_bswap_or_nop_1 invokes itself recursively with N and tries to perform
576 the operation given by the rhs of STMT on the result. If the operation
577 could successfully be executed the function returns a gimple stmt whose
578 rhs's first tree is the expression of the source operand and NULL
579 otherwise. */
580
581gimple *
582find_bswap_or_nop_1 (gimple *stmt, struct symbolic_number *n, int limit)
583{
584 enum tree_code code;
585 tree rhs1, rhs2 = NULL;
586 gimple *rhs1_stmt, *rhs2_stmt, *source_stmt1;
587 enum gimple_rhs_class rhs_class;
588
589 if (!limit || !is_gimple_assign (stmt))
590 return NULL;
591
592 rhs1 = gimple_assign_rhs1 (stmt);
593
594 if (find_bswap_or_nop_load (stmt, rhs1, n))
595 return stmt;
596
597 /* Handle BIT_FIELD_REF. */
598 if (TREE_CODE (rhs1) == BIT_FIELD_REF
599 && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
600 {
35cf3c55
KZ
601 if (!tree_fits_uhwi_p (TREE_OPERAND (rhs1, 1))
602 || !tree_fits_uhwi_p (TREE_OPERAND (rhs1, 2)))
603 return NULL;
604
dffec8eb
JJ
605 unsigned HOST_WIDE_INT bitsize = tree_to_uhwi (TREE_OPERAND (rhs1, 1));
606 unsigned HOST_WIDE_INT bitpos = tree_to_uhwi (TREE_OPERAND (rhs1, 2));
607 if (bitpos % BITS_PER_UNIT == 0
608 && bitsize % BITS_PER_UNIT == 0
609 && init_symbolic_number (n, TREE_OPERAND (rhs1, 0)))
610 {
611 /* Handle big-endian bit numbering in BIT_FIELD_REF. */
612 if (BYTES_BIG_ENDIAN)
613 bitpos = TYPE_PRECISION (n->type) - bitpos - bitsize;
614
615 /* Shift. */
616 if (!do_shift_rotate (RSHIFT_EXPR, n, bitpos))
617 return NULL;
618
619 /* Mask. */
620 uint64_t mask = 0;
621 uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
622 for (unsigned i = 0; i < bitsize / BITS_PER_UNIT;
623 i++, tmp <<= BITS_PER_UNIT)
624 mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
625 n->n &= mask;
626
627 /* Convert. */
628 n->type = TREE_TYPE (rhs1);
629 if (!n->base_addr)
630 n->range = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
631
632 return verify_symbolic_number_p (n, stmt) ? stmt : NULL;
633 }
634
635 return NULL;
636 }
637
638 if (TREE_CODE (rhs1) != SSA_NAME)
639 return NULL;
640
641 code = gimple_assign_rhs_code (stmt);
642 rhs_class = gimple_assign_rhs_class (stmt);
643 rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
644
645 if (rhs_class == GIMPLE_BINARY_RHS)
646 rhs2 = gimple_assign_rhs2 (stmt);
647
648 /* Handle unary rhs and binary rhs with integer constants as second
649 operand. */
650
651 if (rhs_class == GIMPLE_UNARY_RHS
652 || (rhs_class == GIMPLE_BINARY_RHS
653 && TREE_CODE (rhs2) == INTEGER_CST))
654 {
655 if (code != BIT_AND_EXPR
656 && code != LSHIFT_EXPR
657 && code != RSHIFT_EXPR
658 && code != LROTATE_EXPR
659 && code != RROTATE_EXPR
660 && !CONVERT_EXPR_CODE_P (code))
661 return NULL;
662
663 source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, n, limit - 1);
664
665 /* If find_bswap_or_nop_1 returned NULL, STMT is a leaf node and
666 we have to initialize the symbolic number. */
667 if (!source_stmt1)
668 {
669 if (gimple_assign_load_p (stmt)
670 || !init_symbolic_number (n, rhs1))
671 return NULL;
672 source_stmt1 = stmt;
673 }
674
675 switch (code)
676 {
677 case BIT_AND_EXPR:
678 {
679 int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
680 uint64_t val = int_cst_value (rhs2), mask = 0;
681 uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
682
683 /* Only constants masking full bytes are allowed. */
684 for (i = 0; i < size; i++, tmp <<= BITS_PER_UNIT)
685 if ((val & tmp) != 0 && (val & tmp) != tmp)
686 return NULL;
687 else if (val & tmp)
688 mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
689
690 n->n &= mask;
691 }
692 break;
693 case LSHIFT_EXPR:
694 case RSHIFT_EXPR:
695 case LROTATE_EXPR:
696 case RROTATE_EXPR:
697 if (!do_shift_rotate (code, n, (int) TREE_INT_CST_LOW (rhs2)))
698 return NULL;
699 break;
700 CASE_CONVERT:
701 {
702 int i, type_size, old_type_size;
703 tree type;
704
705 type = gimple_expr_type (stmt);
706 type_size = TYPE_PRECISION (type);
707 if (type_size % BITS_PER_UNIT != 0)
708 return NULL;
709 type_size /= BITS_PER_UNIT;
710 if (type_size > 64 / BITS_PER_MARKER)
711 return NULL;
712
713 /* Sign extension: result is dependent on the value. */
714 old_type_size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
715 if (!TYPE_UNSIGNED (n->type) && type_size > old_type_size
716 && HEAD_MARKER (n->n, old_type_size))
717 for (i = 0; i < type_size - old_type_size; i++)
718 n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
719 << ((type_size - 1 - i) * BITS_PER_MARKER);
720
721 if (type_size < 64 / BITS_PER_MARKER)
722 {
723 /* If STMT casts to a smaller type mask out the bits not
724 belonging to the target type. */
725 n->n &= ((uint64_t) 1 << (type_size * BITS_PER_MARKER)) - 1;
726 }
727 n->type = type;
728 if (!n->base_addr)
729 n->range = type_size;
730 }
731 break;
732 default:
733 return NULL;
734 };
735 return verify_symbolic_number_p (n, stmt) ? source_stmt1 : NULL;
736 }
737
738 /* Handle binary rhs. */
739
740 if (rhs_class == GIMPLE_BINARY_RHS)
741 {
742 struct symbolic_number n1, n2;
743 gimple *source_stmt, *source_stmt2;
744
745 if (code != BIT_IOR_EXPR)
746 return NULL;
747
748 if (TREE_CODE (rhs2) != SSA_NAME)
749 return NULL;
750
751 rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
752
753 switch (code)
754 {
755 case BIT_IOR_EXPR:
756 source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, &n1, limit - 1);
757
758 if (!source_stmt1)
759 return NULL;
760
761 source_stmt2 = find_bswap_or_nop_1 (rhs2_stmt, &n2, limit - 1);
762
763 if (!source_stmt2)
764 return NULL;
765
766 if (TYPE_PRECISION (n1.type) != TYPE_PRECISION (n2.type))
767 return NULL;
768
4b84d9b8 769 if (n1.vuse != n2.vuse)
dffec8eb
JJ
770 return NULL;
771
772 source_stmt
773 = perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, n);
774
775 if (!source_stmt)
776 return NULL;
777
778 if (!verify_symbolic_number_p (n, stmt))
779 return NULL;
780
781 break;
782 default:
783 return NULL;
784 }
785 return source_stmt;
786 }
787 return NULL;
788}
789
4b84d9b8
JJ
790/* Helper for find_bswap_or_nop and try_coalesce_bswap to compute
791 *CMPXCHG, *CMPNOP and adjust *N. */
dffec8eb 792
4b84d9b8
JJ
793void
794find_bswap_or_nop_finalize (struct symbolic_number *n, uint64_t *cmpxchg,
795 uint64_t *cmpnop)
dffec8eb
JJ
796{
797 unsigned rsize;
798 uint64_t tmpn, mask;
dffec8eb 799
4b84d9b8
JJ
800 /* The number which the find_bswap_or_nop_1 result should match in order
801 to have a full byte swap. The number is shifted to the right
802 according to the size of the symbolic number before using it. */
803 *cmpxchg = CMPXCHG;
804 *cmpnop = CMPNOP;
dffec8eb
JJ
805
806 /* Find real size of result (highest non-zero byte). */
807 if (n->base_addr)
808 for (tmpn = n->n, rsize = 0; tmpn; tmpn >>= BITS_PER_MARKER, rsize++);
809 else
810 rsize = n->range;
811
812 /* Zero out the bits corresponding to untouched bytes in original gimple
813 expression. */
814 if (n->range < (int) sizeof (int64_t))
815 {
816 mask = ((uint64_t) 1 << (n->range * BITS_PER_MARKER)) - 1;
4b84d9b8
JJ
817 *cmpxchg >>= (64 / BITS_PER_MARKER - n->range) * BITS_PER_MARKER;
818 *cmpnop &= mask;
dffec8eb
JJ
819 }
820
821 /* Zero out the bits corresponding to unused bytes in the result of the
822 gimple expression. */
823 if (rsize < n->range)
824 {
825 if (BYTES_BIG_ENDIAN)
826 {
827 mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
4b84d9b8
JJ
828 *cmpxchg &= mask;
829 *cmpnop >>= (n->range - rsize) * BITS_PER_MARKER;
dffec8eb
JJ
830 }
831 else
832 {
833 mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
4b84d9b8
JJ
834 *cmpxchg >>= (n->range - rsize) * BITS_PER_MARKER;
835 *cmpnop &= mask;
dffec8eb
JJ
836 }
837 n->range = rsize;
838 }
839
4b84d9b8
JJ
840 n->range *= BITS_PER_UNIT;
841}
842
843/* Check if STMT completes a bswap implementation or a read in a given
844 endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP
845 accordingly. It also sets N to represent the kind of operations
846 performed: size of the resulting expression and whether it works on
847 a memory source, and if so alias-set and vuse. At last, the
848 function returns a stmt whose rhs's first tree is the source
849 expression. */
850
851gimple *
852find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap)
853{
7f0ce82a
KT
854 tree type_size = TYPE_SIZE_UNIT (gimple_expr_type (stmt));
855 if (!tree_fits_uhwi_p (type_size))
856 return NULL;
857
4b84d9b8
JJ
858 /* The last parameter determines the depth search limit. It usually
859 correlates directly to the number n of bytes to be touched. We
0f507a36 860 increase that number by 2 * (log2(n) + 1) here in order to also
4b84d9b8
JJ
861 cover signed -> unsigned conversions of the src operand as can be seen
862 in libgcc, and for initial shift/and operation of the src operand. */
7f0ce82a 863 int limit = tree_to_uhwi (type_size);
0f507a36 864 limit += 2 * (1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit));
4b84d9b8
JJ
865 gimple *ins_stmt = find_bswap_or_nop_1 (stmt, n, limit);
866
867 if (!ins_stmt)
cd676dfa
JJ
868 {
869 if (gimple_assign_rhs_code (stmt) != CONSTRUCTOR
870 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
871 return NULL;
872 unsigned HOST_WIDE_INT sz = tree_to_uhwi (type_size) * BITS_PER_UNIT;
873 if (sz != 16 && sz != 32 && sz != 64)
874 return NULL;
875 tree rhs = gimple_assign_rhs1 (stmt);
9032d2b2
JJ
876 if (CONSTRUCTOR_NELTS (rhs) == 0)
877 return NULL;
cd676dfa
JJ
878 tree eltype = TREE_TYPE (TREE_TYPE (rhs));
879 unsigned HOST_WIDE_INT eltsz
880 = int_size_in_bytes (eltype) * BITS_PER_UNIT;
881 if (TYPE_PRECISION (eltype) != eltsz)
882 return NULL;
883 constructor_elt *elt;
884 unsigned int i;
885 tree type = build_nonstandard_integer_type (sz, 1);
886 FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs), i, elt)
887 {
888 if (TREE_CODE (elt->value) != SSA_NAME
889 || !INTEGRAL_TYPE_P (TREE_TYPE (elt->value)))
890 return NULL;
891 struct symbolic_number n1;
892 gimple *source_stmt
893 = find_bswap_or_nop_1 (SSA_NAME_DEF_STMT (elt->value), &n1,
894 limit - 1);
895
896 if (!source_stmt)
897 return NULL;
898
899 n1.type = type;
900 if (!n1.base_addr)
901 n1.range = sz / BITS_PER_UNIT;
902
903 if (i == 0)
904 {
905 ins_stmt = source_stmt;
906 *n = n1;
907 }
908 else
909 {
910 if (n->vuse != n1.vuse)
911 return NULL;
912
913 struct symbolic_number n0 = *n;
914
915 if (!BYTES_BIG_ENDIAN)
916 {
917 if (!do_shift_rotate (LSHIFT_EXPR, &n1, i * eltsz))
918 return NULL;
919 }
920 else if (!do_shift_rotate (LSHIFT_EXPR, &n0, eltsz))
921 return NULL;
922 ins_stmt
923 = perform_symbolic_merge (ins_stmt, &n0, source_stmt, &n1, n);
924
925 if (!ins_stmt)
926 return NULL;
927 }
928 }
929 }
4b84d9b8
JJ
930
931 uint64_t cmpxchg, cmpnop;
932 find_bswap_or_nop_finalize (n, &cmpxchg, &cmpnop);
933
dffec8eb
JJ
934 /* A complete byte swap should make the symbolic number to start with
935 the largest digit in the highest order byte. Unchanged symbolic
936 number indicates a read with same endianness as target architecture. */
937 if (n->n == cmpnop)
938 *bswap = false;
939 else if (n->n == cmpxchg)
940 *bswap = true;
941 else
942 return NULL;
943
944 /* Useless bit manipulation performed by code. */
945 if (!n->base_addr && n->n == cmpnop && n->n_ops == 1)
946 return NULL;
947
dffec8eb
JJ
948 return ins_stmt;
949}
950
951const pass_data pass_data_optimize_bswap =
952{
953 GIMPLE_PASS, /* type */
954 "bswap", /* name */
955 OPTGROUP_NONE, /* optinfo_flags */
956 TV_NONE, /* tv_id */
957 PROP_ssa, /* properties_required */
958 0, /* properties_provided */
959 0, /* properties_destroyed */
960 0, /* todo_flags_start */
961 0, /* todo_flags_finish */
962};
963
964class pass_optimize_bswap : public gimple_opt_pass
965{
966public:
967 pass_optimize_bswap (gcc::context *ctxt)
968 : gimple_opt_pass (pass_data_optimize_bswap, ctxt)
969 {}
970
971 /* opt_pass methods: */
972 virtual bool gate (function *)
973 {
974 return flag_expensive_optimizations && optimize && BITS_PER_UNIT == 8;
975 }
976
977 virtual unsigned int execute (function *);
978
979}; // class pass_optimize_bswap
980
981/* Perform the bswap optimization: replace the expression computed in the rhs
4b84d9b8
JJ
982 of gsi_stmt (GSI) (or if NULL add instead of replace) by an equivalent
983 bswap, load or load + bswap expression.
dffec8eb
JJ
984 Which of these alternatives replace the rhs is given by N->base_addr (non
985 null if a load is needed) and BSWAP. The type, VUSE and set-alias of the
986 load to perform are also given in N while the builtin bswap invoke is given
4b84d9b8
JJ
987 in FNDEL. Finally, if a load is involved, INS_STMT refers to one of the
988 load statements involved to construct the rhs in gsi_stmt (GSI) and
989 N->range gives the size of the rhs expression for maintaining some
990 statistics.
dffec8eb 991
4b84d9b8
JJ
992 Note that if the replacement involve a load and if gsi_stmt (GSI) is
993 non-NULL, that stmt is moved just after INS_STMT to do the load with the
994 same VUSE which can lead to gsi_stmt (GSI) changing of basic block. */
dffec8eb 995
4b84d9b8
JJ
996tree
997bswap_replace (gimple_stmt_iterator gsi, gimple *ins_stmt, tree fndecl,
dffec8eb
JJ
998 tree bswap_type, tree load_type, struct symbolic_number *n,
999 bool bswap)
1000{
4b84d9b8 1001 tree src, tmp, tgt = NULL_TREE;
dffec8eb 1002 gimple *bswap_stmt;
cd676dfa 1003 tree_code conv_code = NOP_EXPR;
dffec8eb 1004
4b84d9b8 1005 gimple *cur_stmt = gsi_stmt (gsi);
dffec8eb 1006 src = n->src;
4b84d9b8 1007 if (cur_stmt)
cd676dfa
JJ
1008 {
1009 tgt = gimple_assign_lhs (cur_stmt);
1010 if (gimple_assign_rhs_code (cur_stmt) == CONSTRUCTOR
1011 && tgt
1012 && VECTOR_TYPE_P (TREE_TYPE (tgt)))
1013 conv_code = VIEW_CONVERT_EXPR;
1014 }
dffec8eb
JJ
1015
1016 /* Need to load the value from memory first. */
1017 if (n->base_addr)
1018 {
4b84d9b8
JJ
1019 gimple_stmt_iterator gsi_ins = gsi;
1020 if (ins_stmt)
1021 gsi_ins = gsi_for_stmt (ins_stmt);
dffec8eb
JJ
1022 tree addr_expr, addr_tmp, val_expr, val_tmp;
1023 tree load_offset_ptr, aligned_load_type;
4b84d9b8
JJ
1024 gimple *load_stmt;
1025 unsigned align = get_object_alignment (src);
4a022c70 1026 poly_int64 load_offset = 0;
dffec8eb 1027
4b84d9b8
JJ
1028 if (cur_stmt)
1029 {
1030 basic_block ins_bb = gimple_bb (ins_stmt);
1031 basic_block cur_bb = gimple_bb (cur_stmt);
1032 if (!dominated_by_p (CDI_DOMINATORS, cur_bb, ins_bb))
1033 return NULL_TREE;
1034
1035 /* Move cur_stmt just before one of the load of the original
1036 to ensure it has the same VUSE. See PR61517 for what could
1037 go wrong. */
1038 if (gimple_bb (cur_stmt) != gimple_bb (ins_stmt))
1039 reset_flow_sensitive_info (gimple_assign_lhs (cur_stmt));
1040 gsi_move_before (&gsi, &gsi_ins);
1041 gsi = gsi_for_stmt (cur_stmt);
1042 }
1043 else
1044 gsi = gsi_ins;
dffec8eb
JJ
1045
1046 /* Compute address to load from and cast according to the size
1047 of the load. */
4b84d9b8 1048 addr_expr = build_fold_addr_expr (src);
dffec8eb 1049 if (is_gimple_mem_ref_addr (addr_expr))
4b84d9b8 1050 addr_tmp = unshare_expr (addr_expr);
dffec8eb
JJ
1051 else
1052 {
4b84d9b8
JJ
1053 addr_tmp = unshare_expr (n->base_addr);
1054 if (!is_gimple_mem_ref_addr (addr_tmp))
1055 addr_tmp = force_gimple_operand_gsi_1 (&gsi, addr_tmp,
1056 is_gimple_mem_ref_addr,
1057 NULL_TREE, true,
1058 GSI_SAME_STMT);
1059 load_offset = n->bytepos;
1060 if (n->offset)
1061 {
1062 tree off
1063 = force_gimple_operand_gsi (&gsi, unshare_expr (n->offset),
1064 true, NULL_TREE, true,
1065 GSI_SAME_STMT);
1066 gimple *stmt
1067 = gimple_build_assign (make_ssa_name (TREE_TYPE (addr_tmp)),
1068 POINTER_PLUS_EXPR, addr_tmp, off);
1069 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1070 addr_tmp = gimple_assign_lhs (stmt);
1071 }
dffec8eb
JJ
1072 }
1073
1074 /* Perform the load. */
1075 aligned_load_type = load_type;
1076 if (align < TYPE_ALIGN (load_type))
1077 aligned_load_type = build_aligned_type (load_type, align);
1078 load_offset_ptr = build_int_cst (n->alias_set, load_offset);
1079 val_expr = fold_build2 (MEM_REF, aligned_load_type, addr_tmp,
1080 load_offset_ptr);
1081
1082 if (!bswap)
1083 {
1084 if (n->range == 16)
1085 nop_stats.found_16bit++;
1086 else if (n->range == 32)
1087 nop_stats.found_32bit++;
1088 else
1089 {
1090 gcc_assert (n->range == 64);
1091 nop_stats.found_64bit++;
1092 }
1093
1094 /* Convert the result of load if necessary. */
4b84d9b8 1095 if (tgt && !useless_type_conversion_p (TREE_TYPE (tgt), load_type))
dffec8eb
JJ
1096 {
1097 val_tmp = make_temp_ssa_name (aligned_load_type, NULL,
1098 "load_dst");
1099 load_stmt = gimple_build_assign (val_tmp, val_expr);
1100 gimple_set_vuse (load_stmt, n->vuse);
1101 gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
cd676dfa
JJ
1102 if (conv_code == VIEW_CONVERT_EXPR)
1103 val_tmp = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (tgt), val_tmp);
1104 gimple_assign_set_rhs_with_ops (&gsi, conv_code, val_tmp);
4b84d9b8 1105 update_stmt (cur_stmt);
dffec8eb 1106 }
4b84d9b8 1107 else if (cur_stmt)
dffec8eb
JJ
1108 {
1109 gimple_assign_set_rhs_with_ops (&gsi, MEM_REF, val_expr);
1110 gimple_set_vuse (cur_stmt, n->vuse);
4b84d9b8
JJ
1111 update_stmt (cur_stmt);
1112 }
1113 else
1114 {
1115 tgt = make_ssa_name (load_type);
1116 cur_stmt = gimple_build_assign (tgt, MEM_REF, val_expr);
1117 gimple_set_vuse (cur_stmt, n->vuse);
1118 gsi_insert_before (&gsi, cur_stmt, GSI_SAME_STMT);
dffec8eb 1119 }
dffec8eb
JJ
1120
1121 if (dump_file)
1122 {
1123 fprintf (dump_file,
1124 "%d bit load in target endianness found at: ",
1125 (int) n->range);
1126 print_gimple_stmt (dump_file, cur_stmt, 0);
1127 }
4b84d9b8 1128 return tgt;
dffec8eb
JJ
1129 }
1130 else
1131 {
1132 val_tmp = make_temp_ssa_name (aligned_load_type, NULL, "load_dst");
1133 load_stmt = gimple_build_assign (val_tmp, val_expr);
1134 gimple_set_vuse (load_stmt, n->vuse);
1135 gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
1136 }
1137 src = val_tmp;
1138 }
1139 else if (!bswap)
1140 {
4b84d9b8
JJ
1141 gimple *g = NULL;
1142 if (tgt && !useless_type_conversion_p (TREE_TYPE (tgt), TREE_TYPE (src)))
dffec8eb
JJ
1143 {
1144 if (!is_gimple_val (src))
4b84d9b8 1145 return NULL_TREE;
cd676dfa
JJ
1146 if (conv_code == VIEW_CONVERT_EXPR)
1147 src = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (tgt), src);
1148 g = gimple_build_assign (tgt, conv_code, src);
dffec8eb 1149 }
4b84d9b8 1150 else if (cur_stmt)
dffec8eb 1151 g = gimple_build_assign (tgt, src);
4b84d9b8
JJ
1152 else
1153 tgt = src;
dffec8eb
JJ
1154 if (n->range == 16)
1155 nop_stats.found_16bit++;
1156 else if (n->range == 32)
1157 nop_stats.found_32bit++;
1158 else
1159 {
1160 gcc_assert (n->range == 64);
1161 nop_stats.found_64bit++;
1162 }
1163 if (dump_file)
1164 {
1165 fprintf (dump_file,
1166 "%d bit reshuffle in target endianness found at: ",
1167 (int) n->range);
4b84d9b8
JJ
1168 if (cur_stmt)
1169 print_gimple_stmt (dump_file, cur_stmt, 0);
1170 else
1171 {
4af78ef8 1172 print_generic_expr (dump_file, tgt, TDF_NONE);
4b84d9b8
JJ
1173 fprintf (dump_file, "\n");
1174 }
dffec8eb 1175 }
4b84d9b8
JJ
1176 if (cur_stmt)
1177 gsi_replace (&gsi, g, true);
1178 return tgt;
dffec8eb
JJ
1179 }
1180 else if (TREE_CODE (src) == BIT_FIELD_REF)
1181 src = TREE_OPERAND (src, 0);
1182
1183 if (n->range == 16)
1184 bswap_stats.found_16bit++;
1185 else if (n->range == 32)
1186 bswap_stats.found_32bit++;
1187 else
1188 {
1189 gcc_assert (n->range == 64);
1190 bswap_stats.found_64bit++;
1191 }
1192
1193 tmp = src;
1194
1195 /* Convert the src expression if necessary. */
1196 if (!useless_type_conversion_p (TREE_TYPE (tmp), bswap_type))
1197 {
1198 gimple *convert_stmt;
1199
1200 tmp = make_temp_ssa_name (bswap_type, NULL, "bswapsrc");
1201 convert_stmt = gimple_build_assign (tmp, NOP_EXPR, src);
1202 gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT);
1203 }
1204
1205 /* Canonical form for 16 bit bswap is a rotate expression. Only 16bit values
1206 are considered as rotation of 2N bit values by N bits is generally not
1207 equivalent to a bswap. Consider for instance 0x01020304 r>> 16 which
1208 gives 0x03040102 while a bswap for that value is 0x04030201. */
1209 if (bswap && n->range == 16)
1210 {
1211 tree count = build_int_cst (NULL, BITS_PER_UNIT);
1212 src = fold_build2 (LROTATE_EXPR, bswap_type, tmp, count);
1213 bswap_stmt = gimple_build_assign (NULL, src);
1214 }
1215 else
1216 bswap_stmt = gimple_build_call (fndecl, 1, tmp);
1217
4b84d9b8
JJ
1218 if (tgt == NULL_TREE)
1219 tgt = make_ssa_name (bswap_type);
dffec8eb
JJ
1220 tmp = tgt;
1221
1222 /* Convert the result if necessary. */
1223 if (!useless_type_conversion_p (TREE_TYPE (tgt), bswap_type))
1224 {
1225 gimple *convert_stmt;
1226
1227 tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
cd676dfa
JJ
1228 tree atmp = tmp;
1229 if (conv_code == VIEW_CONVERT_EXPR)
1230 atmp = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (tgt), tmp);
1231 convert_stmt = gimple_build_assign (tgt, conv_code, atmp);
dffec8eb
JJ
1232 gsi_insert_after (&gsi, convert_stmt, GSI_SAME_STMT);
1233 }
1234
1235 gimple_set_lhs (bswap_stmt, tmp);
1236
1237 if (dump_file)
1238 {
1239 fprintf (dump_file, "%d bit bswap implementation found at: ",
1240 (int) n->range);
4b84d9b8
JJ
1241 if (cur_stmt)
1242 print_gimple_stmt (dump_file, cur_stmt, 0);
1243 else
1244 {
4af78ef8 1245 print_generic_expr (dump_file, tgt, TDF_NONE);
4b84d9b8
JJ
1246 fprintf (dump_file, "\n");
1247 }
dffec8eb
JJ
1248 }
1249
4b84d9b8
JJ
1250 if (cur_stmt)
1251 {
1252 gsi_insert_after (&gsi, bswap_stmt, GSI_SAME_STMT);
1253 gsi_remove (&gsi, true);
1254 }
1255 else
1256 gsi_insert_before (&gsi, bswap_stmt, GSI_SAME_STMT);
1257 return tgt;
dffec8eb
JJ
1258}
1259
1260/* Find manual byte swap implementations as well as load in a given
1261 endianness. Byte swaps are turned into a bswap builtin invokation
1262 while endian loads are converted to bswap builtin invokation or
1263 simple load according to the target endianness. */
1264
1265unsigned int
1266pass_optimize_bswap::execute (function *fun)
1267{
1268 basic_block bb;
1269 bool bswap32_p, bswap64_p;
1270 bool changed = false;
1271 tree bswap32_type = NULL_TREE, bswap64_type = NULL_TREE;
1272
1273 bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
1274 && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing);
1275 bswap64_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
1276 && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
1277 || (bswap32_p && word_mode == SImode)));
1278
1279 /* Determine the argument type of the builtins. The code later on
1280 assumes that the return and argument type are the same. */
1281 if (bswap32_p)
1282 {
1283 tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1284 bswap32_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1285 }
1286
1287 if (bswap64_p)
1288 {
1289 tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1290 bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1291 }
1292
1293 memset (&nop_stats, 0, sizeof (nop_stats));
1294 memset (&bswap_stats, 0, sizeof (bswap_stats));
1295 calculate_dominance_info (CDI_DOMINATORS);
1296
1297 FOR_EACH_BB_FN (bb, fun)
1298 {
1299 gimple_stmt_iterator gsi;
1300
1301 /* We do a reverse scan for bswap patterns to make sure we get the
1302 widest match. As bswap pattern matching doesn't handle previously
1303 inserted smaller bswap replacements as sub-patterns, the wider
1304 variant wouldn't be detected. */
1305 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi);)
1306 {
1307 gimple *ins_stmt, *cur_stmt = gsi_stmt (gsi);
1308 tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
1309 enum tree_code code;
1310 struct symbolic_number n;
1311 bool bswap;
1312
1313 /* This gsi_prev (&gsi) is not part of the for loop because cur_stmt
1314 might be moved to a different basic block by bswap_replace and gsi
1315 must not points to it if that's the case. Moving the gsi_prev
1316 there make sure that gsi points to the statement previous to
1317 cur_stmt while still making sure that all statements are
1318 considered in this basic block. */
1319 gsi_prev (&gsi);
1320
1321 if (!is_gimple_assign (cur_stmt))
1322 continue;
1323
1324 code = gimple_assign_rhs_code (cur_stmt);
1325 switch (code)
1326 {
1327 case LROTATE_EXPR:
1328 case RROTATE_EXPR:
1329 if (!tree_fits_uhwi_p (gimple_assign_rhs2 (cur_stmt))
1330 || tree_to_uhwi (gimple_assign_rhs2 (cur_stmt))
1331 % BITS_PER_UNIT)
1332 continue;
1333 /* Fall through. */
1334 case BIT_IOR_EXPR:
1335 break;
cd676dfa
JJ
1336 case CONSTRUCTOR:
1337 {
1338 tree rhs = gimple_assign_rhs1 (cur_stmt);
1339 if (VECTOR_TYPE_P (TREE_TYPE (rhs))
1340 && INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (rhs))))
1341 break;
1342 }
1343 continue;
dffec8eb
JJ
1344 default:
1345 continue;
1346 }
1347
1348 ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap);
1349
1350 if (!ins_stmt)
1351 continue;
1352
1353 switch (n.range)
1354 {
1355 case 16:
1356 /* Already in canonical form, nothing to do. */
1357 if (code == LROTATE_EXPR || code == RROTATE_EXPR)
1358 continue;
1359 load_type = bswap_type = uint16_type_node;
1360 break;
1361 case 32:
1362 load_type = uint32_type_node;
1363 if (bswap32_p)
1364 {
1365 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1366 bswap_type = bswap32_type;
1367 }
1368 break;
1369 case 64:
1370 load_type = uint64_type_node;
1371 if (bswap64_p)
1372 {
1373 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1374 bswap_type = bswap64_type;
1375 }
1376 break;
1377 default:
1378 continue;
1379 }
1380
1381 if (bswap && !fndecl && n.range != 16)
1382 continue;
1383
4b84d9b8
JJ
1384 if (bswap_replace (gsi_for_stmt (cur_stmt), ins_stmt, fndecl,
1385 bswap_type, load_type, &n, bswap))
dffec8eb
JJ
1386 changed = true;
1387 }
1388 }
1389
1390 statistics_counter_event (fun, "16-bit nop implementations found",
1391 nop_stats.found_16bit);
1392 statistics_counter_event (fun, "32-bit nop implementations found",
1393 nop_stats.found_32bit);
1394 statistics_counter_event (fun, "64-bit nop implementations found",
1395 nop_stats.found_64bit);
1396 statistics_counter_event (fun, "16-bit bswap implementations found",
1397 bswap_stats.found_16bit);
1398 statistics_counter_event (fun, "32-bit bswap implementations found",
1399 bswap_stats.found_32bit);
1400 statistics_counter_event (fun, "64-bit bswap implementations found",
1401 bswap_stats.found_64bit);
1402
1403 return (changed ? TODO_update_ssa : 0);
1404}
1405
1406} // anon namespace
1407
1408gimple_opt_pass *
1409make_pass_optimize_bswap (gcc::context *ctxt)
1410{
1411 return new pass_optimize_bswap (ctxt);
1412}
1413
1414namespace {
1415
245f6de1 1416/* Struct recording one operand for the store, which is either a constant,
c94c3532
EB
1417 then VAL represents the constant and all the other fields are zero, or
1418 a memory load, then VAL represents the reference, BASE_ADDR is non-NULL
1419 and the other fields also reflect the memory load, or an SSA name, then
1420 VAL represents the SSA name and all the other fields are zero, */
245f6de1 1421
6c1dae73 1422class store_operand_info
245f6de1 1423{
6c1dae73 1424public:
245f6de1
JJ
1425 tree val;
1426 tree base_addr;
8a91d545
RS
1427 poly_uint64 bitsize;
1428 poly_uint64 bitpos;
1429 poly_uint64 bitregion_start;
1430 poly_uint64 bitregion_end;
245f6de1 1431 gimple *stmt;
383ac8dc 1432 bool bit_not_p;
245f6de1
JJ
1433 store_operand_info ();
1434};
1435
1436store_operand_info::store_operand_info ()
1437 : val (NULL_TREE), base_addr (NULL_TREE), bitsize (0), bitpos (0),
383ac8dc 1438 bitregion_start (0), bitregion_end (0), stmt (NULL), bit_not_p (false)
245f6de1
JJ
1439{
1440}
1441
f663d9ad
KT
1442/* Struct recording the information about a single store of an immediate
1443 to memory. These are created in the first phase and coalesced into
1444 merged_store_group objects in the second phase. */
1445
6c1dae73 1446class store_immediate_info
f663d9ad 1447{
6c1dae73 1448public:
f663d9ad
KT
1449 unsigned HOST_WIDE_INT bitsize;
1450 unsigned HOST_WIDE_INT bitpos;
a62b3dc5
JJ
1451 unsigned HOST_WIDE_INT bitregion_start;
1452 /* This is one past the last bit of the bit region. */
1453 unsigned HOST_WIDE_INT bitregion_end;
f663d9ad
KT
1454 gimple *stmt;
1455 unsigned int order;
e362a897
EB
1456 /* INTEGER_CST for constant store, STRING_CST for string store,
1457 MEM_REF for memory copy, BIT_*_EXPR for logical bitwise operation,
1458 BIT_INSERT_EXPR for bit insertion.
4b84d9b8
JJ
1459 LROTATE_EXPR if it can be only bswap optimized and
1460 ops are not really meaningful.
1461 NOP_EXPR if bswap optimization detected identity, ops
1462 are not meaningful. */
245f6de1 1463 enum tree_code rhs_code;
4b84d9b8
JJ
1464 /* Two fields for bswap optimization purposes. */
1465 struct symbolic_number n;
1466 gimple *ins_stmt;
127ef369 1467 /* True if BIT_{AND,IOR,XOR}_EXPR result is inverted before storing. */
d60edaba 1468 bool bit_not_p;
127ef369
JJ
1469 /* True if ops have been swapped and thus ops[1] represents
1470 rhs1 of BIT_{AND,IOR,XOR}_EXPR and ops[0] represents rhs2. */
1471 bool ops_swapped_p;
629387a6
EB
1472 /* The index number of the landing pad, or 0 if there is none. */
1473 int lp_nr;
245f6de1
JJ
1474 /* Operands. For BIT_*_EXPR rhs_code both operands are used, otherwise
1475 just the first one. */
1476 store_operand_info ops[2];
b5926e23 1477 store_immediate_info (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
a62b3dc5 1478 unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
4b84d9b8 1479 gimple *, unsigned int, enum tree_code,
629387a6 1480 struct symbolic_number &, gimple *, bool, int,
245f6de1
JJ
1481 const store_operand_info &,
1482 const store_operand_info &);
f663d9ad
KT
1483};
1484
1485store_immediate_info::store_immediate_info (unsigned HOST_WIDE_INT bs,
b5926e23 1486 unsigned HOST_WIDE_INT bp,
a62b3dc5
JJ
1487 unsigned HOST_WIDE_INT brs,
1488 unsigned HOST_WIDE_INT bre,
b5926e23 1489 gimple *st,
245f6de1
JJ
1490 unsigned int ord,
1491 enum tree_code rhscode,
4b84d9b8
JJ
1492 struct symbolic_number &nr,
1493 gimple *ins_stmtp,
d60edaba 1494 bool bitnotp,
629387a6 1495 int nr2,
245f6de1
JJ
1496 const store_operand_info &op0r,
1497 const store_operand_info &op1r)
a62b3dc5 1498 : bitsize (bs), bitpos (bp), bitregion_start (brs), bitregion_end (bre),
4b84d9b8 1499 stmt (st), order (ord), rhs_code (rhscode), n (nr),
629387a6
EB
1500 ins_stmt (ins_stmtp), bit_not_p (bitnotp), ops_swapped_p (false),
1501 lp_nr (nr2)
245f6de1
JJ
1502#if __cplusplus >= 201103L
1503 , ops { op0r, op1r }
1504{
1505}
1506#else
f663d9ad 1507{
245f6de1
JJ
1508 ops[0] = op0r;
1509 ops[1] = op1r;
f663d9ad 1510}
245f6de1 1511#endif
f663d9ad
KT
1512
1513/* Struct representing a group of stores to contiguous memory locations.
1514 These are produced by the second phase (coalescing) and consumed in the
1515 third phase that outputs the widened stores. */
1516
6c1dae73 1517class merged_store_group
f663d9ad 1518{
6c1dae73 1519public:
f663d9ad
KT
1520 unsigned HOST_WIDE_INT start;
1521 unsigned HOST_WIDE_INT width;
a62b3dc5
JJ
1522 unsigned HOST_WIDE_INT bitregion_start;
1523 unsigned HOST_WIDE_INT bitregion_end;
1524 /* The size of the allocated memory for val and mask. */
f663d9ad 1525 unsigned HOST_WIDE_INT buf_size;
a62b3dc5 1526 unsigned HOST_WIDE_INT align_base;
8a91d545 1527 poly_uint64 load_align_base[2];
f663d9ad
KT
1528
1529 unsigned int align;
245f6de1 1530 unsigned int load_align[2];
f663d9ad
KT
1531 unsigned int first_order;
1532 unsigned int last_order;
7f5a3982 1533 bool bit_insertion;
e362a897 1534 bool string_concatenation;
18e0c3d1 1535 bool only_constants;
1b3c9813 1536 bool consecutive;
18e0c3d1 1537 unsigned int first_nonmergeable_order;
629387a6 1538 int lp_nr;
f663d9ad 1539
a62b3dc5 1540 auto_vec<store_immediate_info *> stores;
f663d9ad
KT
1541 /* We record the first and last original statements in the sequence because
1542 we'll need their vuse/vdef and replacement position. It's easier to keep
1543 track of them separately as 'stores' is reordered by apply_stores. */
1544 gimple *last_stmt;
1545 gimple *first_stmt;
1546 unsigned char *val;
a62b3dc5 1547 unsigned char *mask;
f663d9ad
KT
1548
1549 merged_store_group (store_immediate_info *);
1550 ~merged_store_group ();
7f5a3982 1551 bool can_be_merged_into (store_immediate_info *);
f663d9ad
KT
1552 void merge_into (store_immediate_info *);
1553 void merge_overlapping (store_immediate_info *);
1554 bool apply_stores ();
a62b3dc5
JJ
1555private:
1556 void do_merge (store_immediate_info *);
f663d9ad
KT
1557};
1558
1559/* Debug helper. Dump LEN elements of byte array PTR to FD in hex. */
1560
1561static void
1562dump_char_array (FILE *fd, unsigned char *ptr, unsigned int len)
1563{
1564 if (!fd)
1565 return;
1566
1567 for (unsigned int i = 0; i < len; i++)
c94c3532 1568 fprintf (fd, "%02x ", ptr[i]);
f663d9ad
KT
1569 fprintf (fd, "\n");
1570}
1571
f663d9ad
KT
1572/* Clear out LEN bits starting from bit START in the byte array
1573 PTR. This clears the bits to the *right* from START.
1574 START must be within [0, BITS_PER_UNIT) and counts starting from
1575 the least significant bit. */
1576
1577static void
1578clear_bit_region_be (unsigned char *ptr, unsigned int start,
1579 unsigned int len)
1580{
1581 if (len == 0)
1582 return;
1583 /* Clear len bits to the right of start. */
1584 else if (len <= start + 1)
1585 {
1586 unsigned char mask = (~(~0U << len));
1587 mask = mask << (start + 1U - len);
1588 ptr[0] &= ~mask;
1589 }
1590 else if (start != BITS_PER_UNIT - 1)
1591 {
1592 clear_bit_region_be (ptr, start, (start % BITS_PER_UNIT) + 1);
1593 clear_bit_region_be (ptr + 1, BITS_PER_UNIT - 1,
1594 len - (start % BITS_PER_UNIT) - 1);
1595 }
1596 else if (start == BITS_PER_UNIT - 1
1597 && len > BITS_PER_UNIT)
1598 {
1599 unsigned int nbytes = len / BITS_PER_UNIT;
a62b3dc5 1600 memset (ptr, 0, nbytes);
f663d9ad
KT
1601 if (len % BITS_PER_UNIT != 0)
1602 clear_bit_region_be (ptr + nbytes, BITS_PER_UNIT - 1,
1603 len % BITS_PER_UNIT);
1604 }
1605 else
1606 gcc_unreachable ();
1607}
1608
1609/* In the byte array PTR clear the bit region starting at bit
1610 START and is LEN bits wide.
1611 For regions spanning multiple bytes do this recursively until we reach
1612 zero LEN or a region contained within a single byte. */
1613
1614static void
1615clear_bit_region (unsigned char *ptr, unsigned int start,
1616 unsigned int len)
1617{
1618 /* Degenerate base case. */
1619 if (len == 0)
1620 return;
1621 else if (start >= BITS_PER_UNIT)
1622 clear_bit_region (ptr + 1, start - BITS_PER_UNIT, len);
1623 /* Second base case. */
1624 else if ((start + len) <= BITS_PER_UNIT)
1625 {
46a61395 1626 unsigned char mask = (~0U) << (unsigned char) (BITS_PER_UNIT - len);
f663d9ad
KT
1627 mask >>= BITS_PER_UNIT - (start + len);
1628
1629 ptr[0] &= ~mask;
1630
1631 return;
1632 }
1633 /* Clear most significant bits in a byte and proceed with the next byte. */
1634 else if (start != 0)
1635 {
1636 clear_bit_region (ptr, start, BITS_PER_UNIT - start);
1f069ef5 1637 clear_bit_region (ptr + 1, 0, len - (BITS_PER_UNIT - start));
f663d9ad
KT
1638 }
1639 /* Whole bytes need to be cleared. */
1640 else if (start == 0 && len > BITS_PER_UNIT)
1641 {
1642 unsigned int nbytes = len / BITS_PER_UNIT;
a848c710
KT
1643 /* We could recurse on each byte but we clear whole bytes, so a simple
1644 memset will do. */
46a61395 1645 memset (ptr, '\0', nbytes);
f663d9ad
KT
1646 /* Clear the remaining sub-byte region if there is one. */
1647 if (len % BITS_PER_UNIT != 0)
1648 clear_bit_region (ptr + nbytes, 0, len % BITS_PER_UNIT);
1649 }
1650 else
1651 gcc_unreachable ();
1652}
1653
1654/* Write BITLEN bits of EXPR to the byte array PTR at
1655 bit position BITPOS. PTR should contain TOTAL_BYTES elements.
1656 Return true if the operation succeeded. */
1657
1658static bool
1659encode_tree_to_bitpos (tree expr, unsigned char *ptr, int bitlen, int bitpos,
46a61395 1660 unsigned int total_bytes)
f663d9ad
KT
1661{
1662 unsigned int first_byte = bitpos / BITS_PER_UNIT;
ad1de652
JJ
1663 bool sub_byte_op_p = ((bitlen % BITS_PER_UNIT)
1664 || (bitpos % BITS_PER_UNIT)
f4b31647 1665 || !int_mode_for_size (bitlen, 0).exists ());
3afd514b
JJ
1666 bool empty_ctor_p
1667 = (TREE_CODE (expr) == CONSTRUCTOR
1668 && CONSTRUCTOR_NELTS (expr) == 0
1669 && TYPE_SIZE_UNIT (TREE_TYPE (expr))
1670 && tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (expr))));
f663d9ad
KT
1671
1672 if (!sub_byte_op_p)
3afd514b
JJ
1673 {
1674 if (first_byte >= total_bytes)
1675 return false;
1676 total_bytes -= first_byte;
1677 if (empty_ctor_p)
1678 {
1679 unsigned HOST_WIDE_INT rhs_bytes
1680 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)));
1681 if (rhs_bytes > total_bytes)
1682 return false;
1683 memset (ptr + first_byte, '\0', rhs_bytes);
1684 return true;
1685 }
1686 return native_encode_expr (expr, ptr + first_byte, total_bytes) != 0;
1687 }
f663d9ad
KT
1688
1689 /* LITTLE-ENDIAN
1690 We are writing a non byte-sized quantity or at a position that is not
1691 at a byte boundary.
1692 |--------|--------|--------| ptr + first_byte
1693 ^ ^
1694 xxx xxxxxxxx xxx< bp>
1695 |______EXPR____|
1696
46a61395 1697 First native_encode_expr EXPR into a temporary buffer and shift each
f663d9ad
KT
1698 byte in the buffer by 'bp' (carrying the bits over as necessary).
1699 |00000000|00xxxxxx|xxxxxxxx| << bp = |000xxxxx|xxxxxxxx|xxx00000|
1700 <------bitlen---->< bp>
1701 Then we clear the destination bits:
1702 |---00000|00000000|000-----| ptr + first_byte
1703 <-------bitlen--->< bp>
1704
1705 Finally we ORR the bytes of the shifted EXPR into the cleared region:
1706 |---xxxxx||xxxxxxxx||xxx-----| ptr + first_byte.
1707
1708 BIG-ENDIAN
1709 We are writing a non byte-sized quantity or at a position that is not
1710 at a byte boundary.
1711 ptr + first_byte |--------|--------|--------|
1712 ^ ^
1713 <bp >xxx xxxxxxxx xxx
1714 |_____EXPR_____|
1715
46a61395 1716 First native_encode_expr EXPR into a temporary buffer and shift each
f663d9ad
KT
1717 byte in the buffer to the right by (carrying the bits over as necessary).
1718 We shift by as much as needed to align the most significant bit of EXPR
1719 with bitpos:
1720 |00xxxxxx|xxxxxxxx| >> 3 = |00000xxx|xxxxxxxx|xxxxx000|
1721 <---bitlen----> <bp ><-----bitlen----->
1722 Then we clear the destination bits:
1723 ptr + first_byte |-----000||00000000||00000---|
1724 <bp ><-------bitlen----->
1725
1726 Finally we ORR the bytes of the shifted EXPR into the cleared region:
1727 ptr + first_byte |---xxxxx||xxxxxxxx||xxx-----|.
1728 The awkwardness comes from the fact that bitpos is counted from the
1729 most significant bit of a byte. */
1730
ef1d3b57
RS
1731 /* We must be dealing with fixed-size data at this point, since the
1732 total size is also fixed. */
3afd514b
JJ
1733 unsigned int byte_size;
1734 if (empty_ctor_p)
1735 {
1736 unsigned HOST_WIDE_INT rhs_bytes
1737 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)));
1738 if (rhs_bytes > total_bytes)
1739 return false;
1740 byte_size = rhs_bytes;
1741 }
1742 else
1743 {
1744 fixed_size_mode mode
1745 = as_a <fixed_size_mode> (TYPE_MODE (TREE_TYPE (expr)));
e362a897
EB
1746 byte_size
1747 = mode == BLKmode
1748 ? tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)))
1749 : GET_MODE_SIZE (mode);
3afd514b 1750 }
f663d9ad 1751 /* Allocate an extra byte so that we have space to shift into. */
3afd514b 1752 byte_size++;
f663d9ad 1753 unsigned char *tmpbuf = XALLOCAVEC (unsigned char, byte_size);
46a61395 1754 memset (tmpbuf, '\0', byte_size);
f663d9ad 1755 /* The store detection code should only have allowed constants that are
3afd514b
JJ
1756 accepted by native_encode_expr or empty ctors. */
1757 if (!empty_ctor_p
1758 && native_encode_expr (expr, tmpbuf, byte_size - 1) == 0)
f663d9ad
KT
1759 gcc_unreachable ();
1760
1761 /* The native_encode_expr machinery uses TYPE_MODE to determine how many
1762 bytes to write. This means it can write more than
1763 ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT bytes (for example
1764 write 8 bytes for a bitlen of 40). Skip the bytes that are not within
1765 bitlen and zero out the bits that are not relevant as well (that may
1766 contain a sign bit due to sign-extension). */
1767 unsigned int padding
1768 = byte_size - ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT - 1;
ad1de652
JJ
1769 /* On big-endian the padding is at the 'front' so just skip the initial
1770 bytes. */
1771 if (BYTES_BIG_ENDIAN)
1772 tmpbuf += padding;
1773
1774 byte_size -= padding;
1775
1776 if (bitlen % BITS_PER_UNIT != 0)
f663d9ad 1777 {
4b2c06f4 1778 if (BYTES_BIG_ENDIAN)
ad1de652
JJ
1779 clear_bit_region_be (tmpbuf, BITS_PER_UNIT - 1,
1780 BITS_PER_UNIT - (bitlen % BITS_PER_UNIT));
1781 else
1782 clear_bit_region (tmpbuf, bitlen,
1783 byte_size * BITS_PER_UNIT - bitlen);
f663d9ad 1784 }
ad1de652
JJ
1785 /* Left shifting relies on the last byte being clear if bitlen is
1786 a multiple of BITS_PER_UNIT, which might not be clear if
1787 there are padding bytes. */
1788 else if (!BYTES_BIG_ENDIAN)
1789 tmpbuf[byte_size - 1] = '\0';
f663d9ad
KT
1790
1791 /* Clear the bit region in PTR where the bits from TMPBUF will be
46a61395 1792 inserted into. */
f663d9ad
KT
1793 if (BYTES_BIG_ENDIAN)
1794 clear_bit_region_be (ptr + first_byte,
1795 BITS_PER_UNIT - 1 - (bitpos % BITS_PER_UNIT), bitlen);
1796 else
1797 clear_bit_region (ptr + first_byte, bitpos % BITS_PER_UNIT, bitlen);
1798
1799 int shift_amnt;
1800 int bitlen_mod = bitlen % BITS_PER_UNIT;
1801 int bitpos_mod = bitpos % BITS_PER_UNIT;
1802
1803 bool skip_byte = false;
1804 if (BYTES_BIG_ENDIAN)
1805 {
1806 /* BITPOS and BITLEN are exactly aligned and no shifting
1807 is necessary. */
1808 if (bitpos_mod + bitlen_mod == BITS_PER_UNIT
1809 || (bitpos_mod == 0 && bitlen_mod == 0))
1810 shift_amnt = 0;
1811 /* |. . . . . . . .|
1812 <bp > <blen >.
1813 We always shift right for BYTES_BIG_ENDIAN so shift the beginning
1814 of the value until it aligns with 'bp' in the next byte over. */
1815 else if (bitpos_mod + bitlen_mod < BITS_PER_UNIT)
1816 {
1817 shift_amnt = bitlen_mod + bitpos_mod;
1818 skip_byte = bitlen_mod != 0;
1819 }
1820 /* |. . . . . . . .|
1821 <----bp--->
1822 <---blen---->.
1823 Shift the value right within the same byte so it aligns with 'bp'. */
1824 else
1825 shift_amnt = bitlen_mod + bitpos_mod - BITS_PER_UNIT;
1826 }
1827 else
1828 shift_amnt = bitpos % BITS_PER_UNIT;
1829
1830 /* Create the shifted version of EXPR. */
1831 if (!BYTES_BIG_ENDIAN)
46a61395 1832 {
8aba425f 1833 shift_bytes_in_array_left (tmpbuf, byte_size, shift_amnt);
46a61395
JJ
1834 if (shift_amnt == 0)
1835 byte_size--;
1836 }
f663d9ad
KT
1837 else
1838 {
1839 gcc_assert (BYTES_BIG_ENDIAN);
1840 shift_bytes_in_array_right (tmpbuf, byte_size, shift_amnt);
1841 /* If shifting right forced us to move into the next byte skip the now
1842 empty byte. */
1843 if (skip_byte)
1844 {
1845 tmpbuf++;
1846 byte_size--;
1847 }
1848 }
1849
1850 /* Insert the bits from TMPBUF. */
1851 for (unsigned int i = 0; i < byte_size; i++)
1852 ptr[first_byte + i] |= tmpbuf[i];
1853
1854 return true;
1855}
1856
1857/* Sorting function for store_immediate_info objects.
1858 Sorts them by bitposition. */
1859
1860static int
1861sort_by_bitpos (const void *x, const void *y)
1862{
1863 store_immediate_info *const *tmp = (store_immediate_info * const *) x;
1864 store_immediate_info *const *tmp2 = (store_immediate_info * const *) y;
1865
109cca3b 1866 if ((*tmp)->bitpos < (*tmp2)->bitpos)
f663d9ad
KT
1867 return -1;
1868 else if ((*tmp)->bitpos > (*tmp2)->bitpos)
1869 return 1;
109cca3b 1870 else
0f0027d1
KT
1871 /* If they are the same let's use the order which is guaranteed to
1872 be different. */
1873 return (*tmp)->order - (*tmp2)->order;
f663d9ad
KT
1874}
1875
1876/* Sorting function for store_immediate_info objects.
1877 Sorts them by the order field. */
1878
1879static int
1880sort_by_order (const void *x, const void *y)
1881{
1882 store_immediate_info *const *tmp = (store_immediate_info * const *) x;
1883 store_immediate_info *const *tmp2 = (store_immediate_info * const *) y;
1884
1885 if ((*tmp)->order < (*tmp2)->order)
1886 return -1;
1887 else if ((*tmp)->order > (*tmp2)->order)
1888 return 1;
1889
1890 gcc_unreachable ();
1891}
1892
1893/* Initialize a merged_store_group object from a store_immediate_info
1894 object. */
1895
1896merged_store_group::merged_store_group (store_immediate_info *info)
1897{
1898 start = info->bitpos;
1899 width = info->bitsize;
a62b3dc5
JJ
1900 bitregion_start = info->bitregion_start;
1901 bitregion_end = info->bitregion_end;
f663d9ad
KT
1902 /* VAL has memory allocated for it in apply_stores once the group
1903 width has been finalized. */
1904 val = NULL;
a62b3dc5 1905 mask = NULL;
e362a897
EB
1906 bit_insertion = info->rhs_code == BIT_INSERT_EXPR;
1907 string_concatenation = info->rhs_code == STRING_CST;
18e0c3d1 1908 only_constants = info->rhs_code == INTEGER_CST;
1b3c9813 1909 consecutive = true;
18e0c3d1 1910 first_nonmergeable_order = ~0U;
629387a6 1911 lp_nr = info->lp_nr;
a62b3dc5
JJ
1912 unsigned HOST_WIDE_INT align_bitpos = 0;
1913 get_object_alignment_1 (gimple_assign_lhs (info->stmt),
1914 &align, &align_bitpos);
1915 align_base = start - align_bitpos;
245f6de1
JJ
1916 for (int i = 0; i < 2; ++i)
1917 {
1918 store_operand_info &op = info->ops[i];
1919 if (op.base_addr == NULL_TREE)
1920 {
1921 load_align[i] = 0;
1922 load_align_base[i] = 0;
1923 }
1924 else
1925 {
1926 get_object_alignment_1 (op.val, &load_align[i], &align_bitpos);
1927 load_align_base[i] = op.bitpos - align_bitpos;
1928 }
1929 }
f663d9ad
KT
1930 stores.create (1);
1931 stores.safe_push (info);
1932 last_stmt = info->stmt;
1933 last_order = info->order;
1934 first_stmt = last_stmt;
1935 first_order = last_order;
1936 buf_size = 0;
1937}
1938
1939merged_store_group::~merged_store_group ()
1940{
1941 if (val)
1942 XDELETEVEC (val);
1943}
1944
7f5a3982
EB
1945/* Return true if the store described by INFO can be merged into the group. */
1946
1947bool
1948merged_store_group::can_be_merged_into (store_immediate_info *info)
1949{
1950 /* Do not merge bswap patterns. */
1951 if (info->rhs_code == LROTATE_EXPR)
1952 return false;
1953
629387a6
EB
1954 if (info->lp_nr != lp_nr)
1955 return false;
1956
7f5a3982
EB
1957 /* The canonical case. */
1958 if (info->rhs_code == stores[0]->rhs_code)
1959 return true;
1960
e362a897 1961 /* BIT_INSERT_EXPR is compatible with INTEGER_CST if no STRING_CST. */
7f5a3982 1962 if (info->rhs_code == BIT_INSERT_EXPR && stores[0]->rhs_code == INTEGER_CST)
e362a897 1963 return !string_concatenation;
7f5a3982
EB
1964
1965 if (stores[0]->rhs_code == BIT_INSERT_EXPR && info->rhs_code == INTEGER_CST)
e362a897 1966 return !string_concatenation;
7f5a3982 1967
ed01d707
EB
1968 /* We can turn MEM_REF into BIT_INSERT_EXPR for bit-field stores, but do it
1969 only for small regions since this can generate a lot of instructions. */
7f5a3982
EB
1970 if (info->rhs_code == MEM_REF
1971 && (stores[0]->rhs_code == INTEGER_CST
1972 || stores[0]->rhs_code == BIT_INSERT_EXPR)
1973 && info->bitregion_start == stores[0]->bitregion_start
ed01d707 1974 && info->bitregion_end == stores[0]->bitregion_end
2815558a 1975 && info->bitregion_end - info->bitregion_start <= MAX_FIXED_MODE_SIZE)
e362a897 1976 return !string_concatenation;
7f5a3982
EB
1977
1978 if (stores[0]->rhs_code == MEM_REF
1979 && (info->rhs_code == INTEGER_CST
1980 || info->rhs_code == BIT_INSERT_EXPR)
1981 && info->bitregion_start == stores[0]->bitregion_start
ed01d707 1982 && info->bitregion_end == stores[0]->bitregion_end
2815558a 1983 && info->bitregion_end - info->bitregion_start <= MAX_FIXED_MODE_SIZE)
e362a897
EB
1984 return !string_concatenation;
1985
1986 /* STRING_CST is compatible with INTEGER_CST if no BIT_INSERT_EXPR. */
1987 if (info->rhs_code == STRING_CST
1988 && stores[0]->rhs_code == INTEGER_CST
1989 && stores[0]->bitsize == CHAR_BIT)
1990 return !bit_insertion;
1991
1992 if (stores[0]->rhs_code == STRING_CST
1993 && info->rhs_code == INTEGER_CST
1994 && info->bitsize == CHAR_BIT)
1995 return !bit_insertion;
7f5a3982
EB
1996
1997 return false;
1998}
1999
a62b3dc5
JJ
2000/* Helper method for merge_into and merge_overlapping to do
2001 the common part. */
7f5a3982 2002
f663d9ad 2003void
a62b3dc5 2004merged_store_group::do_merge (store_immediate_info *info)
f663d9ad 2005{
a62b3dc5
JJ
2006 bitregion_start = MIN (bitregion_start, info->bitregion_start);
2007 bitregion_end = MAX (bitregion_end, info->bitregion_end);
2008
2009 unsigned int this_align;
2010 unsigned HOST_WIDE_INT align_bitpos = 0;
2011 get_object_alignment_1 (gimple_assign_lhs (info->stmt),
2012 &this_align, &align_bitpos);
2013 if (this_align > align)
2014 {
2015 align = this_align;
2016 align_base = info->bitpos - align_bitpos;
2017 }
245f6de1
JJ
2018 for (int i = 0; i < 2; ++i)
2019 {
2020 store_operand_info &op = info->ops[i];
2021 if (!op.base_addr)
2022 continue;
2023
2024 get_object_alignment_1 (op.val, &this_align, &align_bitpos);
2025 if (this_align > load_align[i])
2026 {
2027 load_align[i] = this_align;
2028 load_align_base[i] = op.bitpos - align_bitpos;
2029 }
2030 }
f663d9ad 2031
f663d9ad
KT
2032 gimple *stmt = info->stmt;
2033 stores.safe_push (info);
2034 if (info->order > last_order)
2035 {
2036 last_order = info->order;
2037 last_stmt = stmt;
2038 }
2039 else if (info->order < first_order)
2040 {
2041 first_order = info->order;
2042 first_stmt = stmt;
2043 }
e362a897 2044
1b3c9813
EB
2045 if (info->bitpos != start + width)
2046 consecutive = false;
2047
e362a897
EB
2048 /* We need to use extraction if there is any bit-field. */
2049 if (info->rhs_code == BIT_INSERT_EXPR)
2050 {
2051 bit_insertion = true;
2052 gcc_assert (!string_concatenation);
2053 }
2054
1b3c9813 2055 /* We want to use concatenation if there is any string. */
e362a897
EB
2056 if (info->rhs_code == STRING_CST)
2057 {
2058 string_concatenation = true;
2059 gcc_assert (!bit_insertion);
2060 }
2061
1b3c9813
EB
2062 /* But we cannot use it if we don't have consecutive stores. */
2063 if (!consecutive)
2064 string_concatenation = false;
2065
18e0c3d1
JJ
2066 if (info->rhs_code != INTEGER_CST)
2067 only_constants = false;
f663d9ad
KT
2068}
2069
a62b3dc5
JJ
2070/* Merge a store recorded by INFO into this merged store.
2071 The store is not overlapping with the existing recorded
2072 stores. */
2073
2074void
2075merged_store_group::merge_into (store_immediate_info *info)
2076{
1b3c9813
EB
2077 do_merge (info);
2078
a62b3dc5
JJ
2079 /* Make sure we're inserting in the position we think we're inserting. */
2080 gcc_assert (info->bitpos >= start + width
2081 && info->bitregion_start <= bitregion_end);
2082
c5679c37 2083 width = info->bitpos + info->bitsize - start;
a62b3dc5
JJ
2084}
2085
f663d9ad
KT
2086/* Merge a store described by INFO into this merged store.
2087 INFO overlaps in some way with the current store (i.e. it's not contiguous
2088 which is handled by merged_store_group::merge_into). */
2089
2090void
2091merged_store_group::merge_overlapping (store_immediate_info *info)
2092{
1b3c9813
EB
2093 do_merge (info);
2094
f663d9ad 2095 /* If the store extends the size of the group, extend the width. */
a62b3dc5 2096 if (info->bitpos + info->bitsize > start + width)
c5679c37 2097 width = info->bitpos + info->bitsize - start;
f663d9ad
KT
2098}
2099
2100/* Go through all the recorded stores in this group in program order and
2101 apply their values to the VAL byte array to create the final merged
2102 value. Return true if the operation succeeded. */
2103
2104bool
2105merged_store_group::apply_stores ()
2106{
e362a897
EB
2107 store_immediate_info *info;
2108 unsigned int i;
2109
a62b3dc5
JJ
2110 /* Make sure we have more than one store in the group, otherwise we cannot
2111 merge anything. */
2112 if (bitregion_start % BITS_PER_UNIT != 0
2113 || bitregion_end % BITS_PER_UNIT != 0
f663d9ad
KT
2114 || stores.length () == 1)
2115 return false;
2116
e362a897
EB
2117 buf_size = (bitregion_end - bitregion_start) / BITS_PER_UNIT;
2118
2119 /* Really do string concatenation for large strings only. */
2120 if (buf_size <= MOVE_MAX)
2121 string_concatenation = false;
2122
c94c3532 2123 /* Create a power-of-2-sized buffer for native_encode_expr. */
e362a897
EB
2124 if (!string_concatenation)
2125 buf_size = 1 << ceil_log2 (buf_size);
2126
a62b3dc5
JJ
2127 val = XNEWVEC (unsigned char, 2 * buf_size);
2128 mask = val + buf_size;
2129 memset (val, 0, buf_size);
2130 memset (mask, ~0U, buf_size);
f663d9ad 2131
e362a897
EB
2132 stores.qsort (sort_by_order);
2133
f663d9ad
KT
2134 FOR_EACH_VEC_ELT (stores, i, info)
2135 {
a62b3dc5 2136 unsigned int pos_in_buffer = info->bitpos - bitregion_start;
c94c3532 2137 tree cst;
245f6de1
JJ
2138 if (info->ops[0].val && info->ops[0].base_addr == NULL_TREE)
2139 cst = info->ops[0].val;
2140 else if (info->ops[1].val && info->ops[1].base_addr == NULL_TREE)
2141 cst = info->ops[1].val;
c94c3532
EB
2142 else
2143 cst = NULL_TREE;
245f6de1 2144 bool ret = true;
e362a897
EB
2145 if (cst && info->rhs_code != BIT_INSERT_EXPR)
2146 ret = encode_tree_to_bitpos (cst, val, info->bitsize, pos_in_buffer,
2147 buf_size);
c94c3532
EB
2148 unsigned char *m = mask + (pos_in_buffer / BITS_PER_UNIT);
2149 if (BYTES_BIG_ENDIAN)
2150 clear_bit_region_be (m, (BITS_PER_UNIT - 1
2151 - (pos_in_buffer % BITS_PER_UNIT)),
2152 info->bitsize);
2153 else
2154 clear_bit_region (m, pos_in_buffer % BITS_PER_UNIT, info->bitsize);
245f6de1 2155 if (cst && dump_file && (dump_flags & TDF_DETAILS))
f663d9ad
KT
2156 {
2157 if (ret)
2158 {
c94c3532 2159 fputs ("After writing ", dump_file);
4af78ef8 2160 print_generic_expr (dump_file, cst, TDF_NONE);
f663d9ad 2161 fprintf (dump_file, " of size " HOST_WIDE_INT_PRINT_DEC
c94c3532
EB
2162 " at position %d\n", info->bitsize, pos_in_buffer);
2163 fputs (" the merged value contains ", dump_file);
f663d9ad 2164 dump_char_array (dump_file, val, buf_size);
c94c3532
EB
2165 fputs (" the merged mask contains ", dump_file);
2166 dump_char_array (dump_file, mask, buf_size);
2167 if (bit_insertion)
2168 fputs (" bit insertion is required\n", dump_file);
e362a897
EB
2169 if (string_concatenation)
2170 fputs (" string concatenation is required\n", dump_file);
f663d9ad
KT
2171 }
2172 else
2173 fprintf (dump_file, "Failed to merge stores\n");
4b84d9b8 2174 }
f663d9ad
KT
2175 if (!ret)
2176 return false;
2177 }
4b84d9b8 2178 stores.qsort (sort_by_bitpos);
f663d9ad
KT
2179 return true;
2180}
2181
2182/* Structure describing the store chain. */
2183
6c1dae73 2184class imm_store_chain_info
f663d9ad 2185{
6c1dae73 2186public:
50b6d676
AO
2187 /* Doubly-linked list that imposes an order on chain processing.
2188 PNXP (prev's next pointer) points to the head of a list, or to
2189 the next field in the previous chain in the list.
2190 See pass_store_merging::m_stores_head for more rationale. */
2191 imm_store_chain_info *next, **pnxp;
b5926e23 2192 tree base_addr;
a62b3dc5 2193 auto_vec<store_immediate_info *> m_store_info;
f663d9ad
KT
2194 auto_vec<merged_store_group *> m_merged_store_groups;
2195
50b6d676
AO
2196 imm_store_chain_info (imm_store_chain_info *&inspt, tree b_a)
2197 : next (inspt), pnxp (&inspt), base_addr (b_a)
2198 {
2199 inspt = this;
2200 if (next)
2201 {
2202 gcc_checking_assert (pnxp == next->pnxp);
2203 next->pnxp = &next;
2204 }
2205 }
2206 ~imm_store_chain_info ()
2207 {
2208 *pnxp = next;
2209 if (next)
2210 {
2211 gcc_checking_assert (&next == next->pnxp);
2212 next->pnxp = pnxp;
2213 }
2214 }
b5926e23 2215 bool terminate_and_process_chain ();
bd909071
JJ
2216 bool try_coalesce_bswap (merged_store_group *, unsigned int, unsigned int,
2217 unsigned int);
f663d9ad 2218 bool coalesce_immediate_stores ();
b5926e23
RB
2219 bool output_merged_store (merged_store_group *);
2220 bool output_merged_stores ();
f663d9ad
KT
2221};
2222
2223const pass_data pass_data_tree_store_merging = {
2224 GIMPLE_PASS, /* type */
2225 "store-merging", /* name */
2226 OPTGROUP_NONE, /* optinfo_flags */
2227 TV_GIMPLE_STORE_MERGING, /* tv_id */
2228 PROP_ssa, /* properties_required */
2229 0, /* properties_provided */
2230 0, /* properties_destroyed */
2231 0, /* todo_flags_start */
2232 TODO_update_ssa, /* todo_flags_finish */
2233};
2234
2235class pass_store_merging : public gimple_opt_pass
2236{
2237public:
2238 pass_store_merging (gcc::context *ctxt)
faec5f24 2239 : gimple_opt_pass (pass_data_tree_store_merging, ctxt), m_stores_head ()
f663d9ad
KT
2240 {
2241 }
2242
c94c3532
EB
2243 /* Pass not supported for PDP-endian, nor for insane hosts or
2244 target character sizes where native_{encode,interpret}_expr
a62b3dc5 2245 doesn't work properly. */
f663d9ad
KT
2246 virtual bool
2247 gate (function *)
2248 {
a62b3dc5 2249 return flag_store_merging
c94c3532 2250 && BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN
a62b3dc5
JJ
2251 && CHAR_BIT == 8
2252 && BITS_PER_UNIT == 8;
f663d9ad
KT
2253 }
2254
2255 virtual unsigned int execute (function *);
2256
2257private:
99b1c316 2258 hash_map<tree_operand_hash, class imm_store_chain_info *> m_stores;
f663d9ad 2259
50b6d676
AO
2260 /* Form a doubly-linked stack of the elements of m_stores, so that
2261 we can iterate over them in a predictable way. Using this order
2262 avoids extraneous differences in the compiler output just because
2263 of tree pointer variations (e.g. different chains end up in
2264 different positions of m_stores, so they are handled in different
2265 orders, so they allocate or release SSA names in different
2266 orders, and when they get reused, subsequent passes end up
2267 getting different SSA names, which may ultimately change
2268 decisions when going out of SSA). */
2269 imm_store_chain_info *m_stores_head;
2270
629387a6
EB
2271 bool process_store (gimple *);
2272 bool terminate_and_process_chain (imm_store_chain_info *);
383ac8dc 2273 bool terminate_all_aliasing_chains (imm_store_chain_info **, gimple *);
629387a6 2274 bool terminate_and_process_all_chains ();
f663d9ad
KT
2275}; // class pass_store_merging
2276
2277/* Terminate and process all recorded chains. Return true if any changes
2278 were made. */
2279
2280bool
2281pass_store_merging::terminate_and_process_all_chains ()
2282{
f663d9ad 2283 bool ret = false;
50b6d676 2284 while (m_stores_head)
629387a6 2285 ret |= terminate_and_process_chain (m_stores_head);
b119c055 2286 gcc_assert (m_stores.is_empty ());
f663d9ad
KT
2287 return ret;
2288}
2289
383ac8dc
JJ
2290/* Terminate all chains that are affected by the statement STMT.
2291 CHAIN_INFO is the chain we should ignore from the checks if
629387a6 2292 non-NULL. Return true if any changes were made. */
f663d9ad
KT
2293
2294bool
20770eb8 2295pass_store_merging::terminate_all_aliasing_chains (imm_store_chain_info
b5926e23 2296 **chain_info,
f663d9ad
KT
2297 gimple *stmt)
2298{
2299 bool ret = false;
2300
2301 /* If the statement doesn't touch memory it can't alias. */
2302 if (!gimple_vuse (stmt))
2303 return false;
2304
9e875fd8 2305 tree store_lhs = gimple_store_p (stmt) ? gimple_get_lhs (stmt) : NULL_TREE;
6b412bf6
RB
2306 ao_ref store_lhs_ref;
2307 ao_ref_init (&store_lhs_ref, store_lhs);
383ac8dc 2308 for (imm_store_chain_info *next = m_stores_head, *cur = next; cur; cur = next)
f663d9ad 2309 {
383ac8dc
JJ
2310 next = cur->next;
2311
2312 /* We already checked all the stores in chain_info and terminated the
2313 chain if necessary. Skip it here. */
2314 if (chain_info && *chain_info == cur)
2315 continue;
2316
245f6de1
JJ
2317 store_immediate_info *info;
2318 unsigned int i;
383ac8dc 2319 FOR_EACH_VEC_ELT (cur->m_store_info, i, info)
f663d9ad 2320 {
9e875fd8 2321 tree lhs = gimple_assign_lhs (info->stmt);
6b412bf6
RB
2322 ao_ref lhs_ref;
2323 ao_ref_init (&lhs_ref, lhs);
2324 if (ref_maybe_used_by_stmt_p (stmt, &lhs_ref)
2325 || stmt_may_clobber_ref_p_1 (stmt, &lhs_ref)
2326 || (store_lhs && refs_may_alias_p_1 (&store_lhs_ref,
2327 &lhs_ref, false)))
f663d9ad 2328 {
245f6de1 2329 if (dump_file && (dump_flags & TDF_DETAILS))
f663d9ad 2330 {
245f6de1
JJ
2331 fprintf (dump_file, "stmt causes chain termination:\n");
2332 print_gimple_stmt (dump_file, stmt, 0);
f663d9ad 2333 }
629387a6 2334 ret |= terminate_and_process_chain (cur);
245f6de1 2335 break;
f663d9ad
KT
2336 }
2337 }
2338 }
2339
f663d9ad
KT
2340 return ret;
2341}
2342
2343/* Helper function. Terminate the recorded chain storing to base object
2344 BASE. Return true if the merging and output was successful. The m_stores
2345 entry is removed after the processing in any case. */
2346
2347bool
629387a6 2348pass_store_merging::terminate_and_process_chain (imm_store_chain_info *chain_info)
f663d9ad 2349{
b5926e23
RB
2350 bool ret = chain_info->terminate_and_process_chain ();
2351 m_stores.remove (chain_info->base_addr);
2352 delete chain_info;
f663d9ad
KT
2353 return ret;
2354}
2355
245f6de1 2356/* Return true if stmts in between FIRST (inclusive) and LAST (exclusive)
629387a6
EB
2357 may clobber REF. FIRST and LAST must have non-NULL vdef. We want to
2358 be able to sink load of REF across stores between FIRST and LAST, up
2359 to right before LAST. */
245f6de1
JJ
2360
2361bool
2362stmts_may_clobber_ref_p (gimple *first, gimple *last, tree ref)
2363{
2364 ao_ref r;
2365 ao_ref_init (&r, ref);
2366 unsigned int count = 0;
2367 tree vop = gimple_vdef (last);
2368 gimple *stmt;
2369
629387a6
EB
2370 /* Return true conservatively if the basic blocks are different. */
2371 if (gimple_bb (first) != gimple_bb (last))
2372 return true;
2373
245f6de1
JJ
2374 do
2375 {
2376 stmt = SSA_NAME_DEF_STMT (vop);
2377 if (stmt_may_clobber_ref_p_1 (stmt, &r))
2378 return true;
4b84d9b8
JJ
2379 if (gimple_store_p (stmt)
2380 && refs_anti_dependent_p (ref, gimple_get_lhs (stmt)))
2381 return true;
245f6de1
JJ
2382 /* Avoid quadratic compile time by bounding the number of checks
2383 we perform. */
2384 if (++count > MAX_STORE_ALIAS_CHECKS)
2385 return true;
2386 vop = gimple_vuse (stmt);
2387 }
2388 while (stmt != first);
629387a6 2389
245f6de1
JJ
2390 return false;
2391}
2392
2393/* Return true if INFO->ops[IDX] is mergeable with the
2394 corresponding loads already in MERGED_STORE group.
2395 BASE_ADDR is the base address of the whole store group. */
2396
2397bool
2398compatible_load_p (merged_store_group *merged_store,
2399 store_immediate_info *info,
2400 tree base_addr, int idx)
2401{
2402 store_immediate_info *infof = merged_store->stores[0];
2403 if (!info->ops[idx].base_addr
8a91d545
RS
2404 || maybe_ne (info->ops[idx].bitpos - infof->ops[idx].bitpos,
2405 info->bitpos - infof->bitpos)
245f6de1
JJ
2406 || !operand_equal_p (info->ops[idx].base_addr,
2407 infof->ops[idx].base_addr, 0))
2408 return false;
2409
2410 store_immediate_info *infol = merged_store->stores.last ();
2411 tree load_vuse = gimple_vuse (info->ops[idx].stmt);
2412 /* In this case all vuses should be the same, e.g.
2413 _1 = s.a; _2 = s.b; _3 = _1 | 1; t.a = _3; _4 = _2 | 2; t.b = _4;
2414 or
2415 _1 = s.a; _2 = s.b; t.a = _1; t.b = _2;
2416 and we can emit the coalesced load next to any of those loads. */
2417 if (gimple_vuse (infof->ops[idx].stmt) == load_vuse
2418 && gimple_vuse (infol->ops[idx].stmt) == load_vuse)
2419 return true;
2420
2421 /* Otherwise, at least for now require that the load has the same
2422 vuse as the store. See following examples. */
2423 if (gimple_vuse (info->stmt) != load_vuse)
2424 return false;
2425
2426 if (gimple_vuse (infof->stmt) != gimple_vuse (infof->ops[idx].stmt)
2427 || (infof != infol
2428 && gimple_vuse (infol->stmt) != gimple_vuse (infol->ops[idx].stmt)))
2429 return false;
2430
2431 /* If the load is from the same location as the store, already
2432 the construction of the immediate chain info guarantees no intervening
2433 stores, so no further checks are needed. Example:
2434 _1 = s.a; _2 = _1 & -7; s.a = _2; _3 = s.b; _4 = _3 & -7; s.b = _4; */
8a91d545 2435 if (known_eq (info->ops[idx].bitpos, info->bitpos)
245f6de1
JJ
2436 && operand_equal_p (info->ops[idx].base_addr, base_addr, 0))
2437 return true;
2438
2439 /* Otherwise, we need to punt if any of the loads can be clobbered by any
2440 of the stores in the group, or any other stores in between those.
2441 Previous calls to compatible_load_p ensured that for all the
2442 merged_store->stores IDX loads, no stmts starting with
2443 merged_store->first_stmt and ending right before merged_store->last_stmt
2444 clobbers those loads. */
2445 gimple *first = merged_store->first_stmt;
2446 gimple *last = merged_store->last_stmt;
2447 unsigned int i;
2448 store_immediate_info *infoc;
2449 /* The stores are sorted by increasing store bitpos, so if info->stmt store
2450 comes before the so far first load, we'll be changing
2451 merged_store->first_stmt. In that case we need to give up if
2452 any of the earlier processed loads clobber with the stmts in the new
2453 range. */
2454 if (info->order < merged_store->first_order)
2455 {
2456 FOR_EACH_VEC_ELT (merged_store->stores, i, infoc)
2457 if (stmts_may_clobber_ref_p (info->stmt, first, infoc->ops[idx].val))
2458 return false;
2459 first = info->stmt;
2460 }
2461 /* Similarly, we could change merged_store->last_stmt, so ensure
2462 in that case no stmts in the new range clobber any of the earlier
2463 processed loads. */
2464 else if (info->order > merged_store->last_order)
2465 {
2466 FOR_EACH_VEC_ELT (merged_store->stores, i, infoc)
2467 if (stmts_may_clobber_ref_p (last, info->stmt, infoc->ops[idx].val))
2468 return false;
2469 last = info->stmt;
2470 }
2471 /* And finally, we'd be adding a new load to the set, ensure it isn't
2472 clobbered in the new range. */
2473 if (stmts_may_clobber_ref_p (first, last, info->ops[idx].val))
2474 return false;
2475
2476 /* Otherwise, we are looking for:
2477 _1 = s.a; _2 = _1 ^ 15; t.a = _2; _3 = s.b; _4 = _3 ^ 15; t.b = _4;
2478 or
2479 _1 = s.a; t.a = _1; _2 = s.b; t.b = _2; */
2480 return true;
2481}
2482
4b84d9b8
JJ
2483/* Add all refs loaded to compute VAL to REFS vector. */
2484
2485void
2486gather_bswap_load_refs (vec<tree> *refs, tree val)
2487{
2488 if (TREE_CODE (val) != SSA_NAME)
2489 return;
2490
2491 gimple *stmt = SSA_NAME_DEF_STMT (val);
2492 if (!is_gimple_assign (stmt))
2493 return;
2494
2495 if (gimple_assign_load_p (stmt))
2496 {
2497 refs->safe_push (gimple_assign_rhs1 (stmt));
2498 return;
2499 }
2500
2501 switch (gimple_assign_rhs_class (stmt))
2502 {
2503 case GIMPLE_BINARY_RHS:
2504 gather_bswap_load_refs (refs, gimple_assign_rhs2 (stmt));
2505 /* FALLTHRU */
2506 case GIMPLE_UNARY_RHS:
2507 gather_bswap_load_refs (refs, gimple_assign_rhs1 (stmt));
2508 break;
2509 default:
2510 gcc_unreachable ();
2511 }
2512}
2513
c5679c37
JJ
2514/* Check if there are any stores in M_STORE_INFO after index I
2515 (where M_STORE_INFO must be sorted by sort_by_bitpos) that overlap
2516 a potential group ending with END that have their order
4d213bf6
JJ
2517 smaller than LAST_ORDER. ALL_INTEGER_CST_P is true if
2518 all the stores already merged and the one under consideration
2519 have rhs_code of INTEGER_CST. Return true if there are no such stores.
c5679c37
JJ
2520 Consider:
2521 MEM[(long long int *)p_28] = 0;
2522 MEM[(long long int *)p_28 + 8B] = 0;
2523 MEM[(long long int *)p_28 + 16B] = 0;
2524 MEM[(long long int *)p_28 + 24B] = 0;
2525 _129 = (int) _130;
2526 MEM[(int *)p_28 + 8B] = _129;
2527 MEM[(int *)p_28].a = -1;
2528 We already have
2529 MEM[(long long int *)p_28] = 0;
2530 MEM[(int *)p_28].a = -1;
2531 stmts in the current group and need to consider if it is safe to
2532 add MEM[(long long int *)p_28 + 8B] = 0; store into the same group.
2533 There is an overlap between that store and the MEM[(int *)p_28 + 8B] = _129;
2534 store though, so if we add the MEM[(long long int *)p_28 + 8B] = 0;
2535 into the group and merging of those 3 stores is successful, merged
2536 stmts will be emitted at the latest store from that group, i.e.
2537 LAST_ORDER, which is the MEM[(int *)p_28].a = -1; store.
2538 The MEM[(int *)p_28 + 8B] = _129; store that originally follows
2539 the MEM[(long long int *)p_28 + 8B] = 0; would now be before it,
2540 so we need to refuse merging MEM[(long long int *)p_28 + 8B] = 0;
2541 into the group. That way it will be its own store group and will
4d213bf6 2542 not be touched. If ALL_INTEGER_CST_P and there are overlapping
c5679c37 2543 INTEGER_CST stores, those are mergeable using merge_overlapping,
bd909071
JJ
2544 so don't return false for those.
2545
2546 Similarly, check stores from FIRST_EARLIER (inclusive) to END_EARLIER
2547 (exclusive), whether they don't overlap the bitrange START to END
2548 and have order in between FIRST_ORDER and LAST_ORDER. This is to
2549 prevent merging in cases like:
2550 MEM <char[12]> [&b + 8B] = {};
2551 MEM[(short *) &b] = 5;
2552 _5 = *x_4(D);
2553 MEM <long long unsigned int> [&b + 2B] = _5;
2554 MEM[(char *)&b + 16B] = 88;
2555 MEM[(int *)&b + 20B] = 1;
2556 The = {} store comes in sort_by_bitpos before the = 88 store, and can't
2557 be merged with it, because the = _5 store overlaps these and is in between
2558 them in sort_by_order ordering. If it was merged, the merged store would
2559 go after the = _5 store and thus change behavior. */
c5679c37
JJ
2560
2561static bool
2562check_no_overlap (vec<store_immediate_info *> m_store_info, unsigned int i,
bd909071
JJ
2563 bool all_integer_cst_p, unsigned int first_order,
2564 unsigned int last_order, unsigned HOST_WIDE_INT start,
2565 unsigned HOST_WIDE_INT end, unsigned int first_earlier,
2566 unsigned end_earlier)
c5679c37
JJ
2567{
2568 unsigned int len = m_store_info.length ();
bd909071
JJ
2569 for (unsigned int j = first_earlier; j < end_earlier; j++)
2570 {
2571 store_immediate_info *info = m_store_info[j];
2572 if (info->order > first_order
2573 && info->order < last_order
2574 && info->bitpos + info->bitsize > start)
2575 return false;
2576 }
c5679c37
JJ
2577 for (++i; i < len; ++i)
2578 {
2579 store_immediate_info *info = m_store_info[i];
2580 if (info->bitpos >= end)
2581 break;
2582 if (info->order < last_order
4d213bf6 2583 && (!all_integer_cst_p || info->rhs_code != INTEGER_CST))
c5679c37
JJ
2584 return false;
2585 }
2586 return true;
2587}
2588
4b84d9b8
JJ
2589/* Return true if m_store_info[first] and at least one following store
2590 form a group which store try_size bitsize value which is byte swapped
2591 from a memory load or some value, or identity from some value.
2592 This uses the bswap pass APIs. */
2593
2594bool
2595imm_store_chain_info::try_coalesce_bswap (merged_store_group *merged_store,
2596 unsigned int first,
bd909071
JJ
2597 unsigned int try_size,
2598 unsigned int first_earlier)
4b84d9b8
JJ
2599{
2600 unsigned int len = m_store_info.length (), last = first;
2601 unsigned HOST_WIDE_INT width = m_store_info[first]->bitsize;
2602 if (width >= try_size)
2603 return false;
2604 for (unsigned int i = first + 1; i < len; ++i)
2605 {
2606 if (m_store_info[i]->bitpos != m_store_info[first]->bitpos + width
cb76fcd7 2607 || m_store_info[i]->lp_nr != merged_store->lp_nr
4b84d9b8
JJ
2608 || m_store_info[i]->ins_stmt == NULL)
2609 return false;
2610 width += m_store_info[i]->bitsize;
2611 if (width >= try_size)
2612 {
2613 last = i;
2614 break;
2615 }
2616 }
2617 if (width != try_size)
2618 return false;
2619
2620 bool allow_unaligned
028d4092 2621 = !STRICT_ALIGNMENT && param_store_merging_allow_unaligned;
4b84d9b8
JJ
2622 /* Punt if the combined store would not be aligned and we need alignment. */
2623 if (!allow_unaligned)
2624 {
2625 unsigned int align = merged_store->align;
2626 unsigned HOST_WIDE_INT align_base = merged_store->align_base;
2627 for (unsigned int i = first + 1; i <= last; ++i)
2628 {
2629 unsigned int this_align;
2630 unsigned HOST_WIDE_INT align_bitpos = 0;
2631 get_object_alignment_1 (gimple_assign_lhs (m_store_info[i]->stmt),
2632 &this_align, &align_bitpos);
2633 if (this_align > align)
2634 {
2635 align = this_align;
2636 align_base = m_store_info[i]->bitpos - align_bitpos;
2637 }
2638 }
2639 unsigned HOST_WIDE_INT align_bitpos
2640 = (m_store_info[first]->bitpos - align_base) & (align - 1);
2641 if (align_bitpos)
2642 align = least_bit_hwi (align_bitpos);
2643 if (align < try_size)
2644 return false;
2645 }
2646
2647 tree type;
2648 switch (try_size)
2649 {
2650 case 16: type = uint16_type_node; break;
2651 case 32: type = uint32_type_node; break;
2652 case 64: type = uint64_type_node; break;
2653 default: gcc_unreachable ();
2654 }
2655 struct symbolic_number n;
2656 gimple *ins_stmt = NULL;
2657 int vuse_store = -1;
2658 unsigned int first_order = merged_store->first_order;
2659 unsigned int last_order = merged_store->last_order;
2660 gimple *first_stmt = merged_store->first_stmt;
2661 gimple *last_stmt = merged_store->last_stmt;
c5679c37 2662 unsigned HOST_WIDE_INT end = merged_store->start + merged_store->width;
4b84d9b8
JJ
2663 store_immediate_info *infof = m_store_info[first];
2664
2665 for (unsigned int i = first; i <= last; ++i)
2666 {
2667 store_immediate_info *info = m_store_info[i];
2668 struct symbolic_number this_n = info->n;
2669 this_n.type = type;
2670 if (!this_n.base_addr)
2671 this_n.range = try_size / BITS_PER_UNIT;
30fa8e9c
JJ
2672 else
2673 /* Update vuse in case it has changed by output_merged_stores. */
2674 this_n.vuse = gimple_vuse (info->ins_stmt);
4b84d9b8
JJ
2675 unsigned int bitpos = info->bitpos - infof->bitpos;
2676 if (!do_shift_rotate (LSHIFT_EXPR, &this_n,
2677 BYTES_BIG_ENDIAN
2678 ? try_size - info->bitsize - bitpos
2679 : bitpos))
2680 return false;
aa11164a 2681 if (this_n.base_addr && vuse_store)
4b84d9b8
JJ
2682 {
2683 unsigned int j;
2684 for (j = first; j <= last; ++j)
2685 if (this_n.vuse == gimple_vuse (m_store_info[j]->stmt))
2686 break;
2687 if (j > last)
2688 {
2689 if (vuse_store == 1)
2690 return false;
2691 vuse_store = 0;
2692 }
2693 }
2694 if (i == first)
2695 {
2696 n = this_n;
2697 ins_stmt = info->ins_stmt;
2698 }
2699 else
2700 {
c5679c37 2701 if (n.base_addr && n.vuse != this_n.vuse)
4b84d9b8 2702 {
c5679c37
JJ
2703 if (vuse_store == 0)
2704 return false;
2705 vuse_store = 1;
4b84d9b8 2706 }
c5679c37
JJ
2707 if (info->order > last_order)
2708 {
2709 last_order = info->order;
2710 last_stmt = info->stmt;
2711 }
2712 else if (info->order < first_order)
2713 {
2714 first_order = info->order;
2715 first_stmt = info->stmt;
2716 }
2717 end = MAX (end, info->bitpos + info->bitsize);
4b84d9b8
JJ
2718
2719 ins_stmt = perform_symbolic_merge (ins_stmt, &n, info->ins_stmt,
2720 &this_n, &n);
2721 if (ins_stmt == NULL)
2722 return false;
2723 }
2724 }
2725
2726 uint64_t cmpxchg, cmpnop;
2727 find_bswap_or_nop_finalize (&n, &cmpxchg, &cmpnop);
2728
2729 /* A complete byte swap should make the symbolic number to start with
2730 the largest digit in the highest order byte. Unchanged symbolic
2731 number indicates a read with same endianness as target architecture. */
2732 if (n.n != cmpnop && n.n != cmpxchg)
2733 return false;
2734
2735 if (n.base_addr == NULL_TREE && !is_gimple_val (n.src))
2736 return false;
2737
bd909071
JJ
2738 if (!check_no_overlap (m_store_info, last, false, first_order, last_order,
2739 merged_store->start, end, first_earlier, first))
c5679c37
JJ
2740 return false;
2741
4b84d9b8
JJ
2742 /* Don't handle memory copy this way if normal non-bswap processing
2743 would handle it too. */
2744 if (n.n == cmpnop && (unsigned) n.n_ops == last - first + 1)
2745 {
2746 unsigned int i;
2747 for (i = first; i <= last; ++i)
2748 if (m_store_info[i]->rhs_code != MEM_REF)
2749 break;
2750 if (i == last + 1)
2751 return false;
2752 }
2753
2754 if (n.n == cmpxchg)
2755 switch (try_size)
2756 {
2757 case 16:
2758 /* Will emit LROTATE_EXPR. */
2759 break;
2760 case 32:
2761 if (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
2762 && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing)
2763 break;
2764 return false;
2765 case 64:
2766 if (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
2767 && optab_handler (bswap_optab, DImode) != CODE_FOR_nothing)
2768 break;
2769 return false;
2770 default:
2771 gcc_unreachable ();
2772 }
2773
2774 if (!allow_unaligned && n.base_addr)
2775 {
2776 unsigned int align = get_object_alignment (n.src);
2777 if (align < try_size)
2778 return false;
2779 }
2780
2781 /* If each load has vuse of the corresponding store, need to verify
2782 the loads can be sunk right before the last store. */
2783 if (vuse_store == 1)
2784 {
2785 auto_vec<tree, 64> refs;
2786 for (unsigned int i = first; i <= last; ++i)
2787 gather_bswap_load_refs (&refs,
2788 gimple_assign_rhs1 (m_store_info[i]->stmt));
2789
2790 unsigned int i;
2791 tree ref;
2792 FOR_EACH_VEC_ELT (refs, i, ref)
2793 if (stmts_may_clobber_ref_p (first_stmt, last_stmt, ref))
2794 return false;
2795 n.vuse = NULL_TREE;
2796 }
2797
2798 infof->n = n;
2799 infof->ins_stmt = ins_stmt;
2800 for (unsigned int i = first; i <= last; ++i)
2801 {
2802 m_store_info[i]->rhs_code = n.n == cmpxchg ? LROTATE_EXPR : NOP_EXPR;
2803 m_store_info[i]->ops[0].base_addr = NULL_TREE;
2804 m_store_info[i]->ops[1].base_addr = NULL_TREE;
2805 if (i != first)
2806 merged_store->merge_into (m_store_info[i]);
2807 }
2808
2809 return true;
2810}
2811
f663d9ad
KT
2812/* Go through the candidate stores recorded in m_store_info and merge them
2813 into merged_store_group objects recorded into m_merged_store_groups
2814 representing the widened stores. Return true if coalescing was successful
2815 and the number of widened stores is fewer than the original number
2816 of stores. */
2817
2818bool
2819imm_store_chain_info::coalesce_immediate_stores ()
2820{
2821 /* Anything less can't be processed. */
2822 if (m_store_info.length () < 2)
2823 return false;
2824
2825 if (dump_file && (dump_flags & TDF_DETAILS))
c94c3532 2826 fprintf (dump_file, "Attempting to coalesce %u stores in chain\n",
f663d9ad
KT
2827 m_store_info.length ());
2828
2829 store_immediate_info *info;
4b84d9b8 2830 unsigned int i, ignore = 0;
bd909071
JJ
2831 unsigned int first_earlier = 0;
2832 unsigned int end_earlier = 0;
f663d9ad
KT
2833
2834 /* Order the stores by the bitposition they write to. */
2835 m_store_info.qsort (sort_by_bitpos);
2836
2837 info = m_store_info[0];
2838 merged_store_group *merged_store = new merged_store_group (info);
c94c3532
EB
2839 if (dump_file && (dump_flags & TDF_DETAILS))
2840 fputs ("New store group\n", dump_file);
f663d9ad
KT
2841
2842 FOR_EACH_VEC_ELT (m_store_info, i, info)
2843 {
3afd514b
JJ
2844 unsigned HOST_WIDE_INT new_bitregion_start, new_bitregion_end;
2845
4b84d9b8 2846 if (i <= ignore)
c94c3532 2847 goto done;
f663d9ad 2848
bd909071
JJ
2849 while (first_earlier < end_earlier
2850 && (m_store_info[first_earlier]->bitpos
2851 + m_store_info[first_earlier]->bitsize
2852 <= merged_store->start))
2853 first_earlier++;
2854
4b84d9b8
JJ
2855 /* First try to handle group of stores like:
2856 p[0] = data >> 24;
2857 p[1] = data >> 16;
2858 p[2] = data >> 8;
2859 p[3] = data;
2860 using the bswap framework. */
2861 if (info->bitpos == merged_store->start + merged_store->width
2862 && merged_store->stores.length () == 1
2863 && merged_store->stores[0]->ins_stmt != NULL
cb76fcd7 2864 && info->lp_nr == merged_store->lp_nr
4b84d9b8
JJ
2865 && info->ins_stmt != NULL)
2866 {
2867 unsigned int try_size;
2868 for (try_size = 64; try_size >= 16; try_size >>= 1)
bd909071
JJ
2869 if (try_coalesce_bswap (merged_store, i - 1, try_size,
2870 first_earlier))
4b84d9b8
JJ
2871 break;
2872
2873 if (try_size >= 16)
2874 {
2875 ignore = i + merged_store->stores.length () - 1;
2876 m_merged_store_groups.safe_push (merged_store);
2877 if (ignore < m_store_info.length ())
bd909071
JJ
2878 {
2879 merged_store = new merged_store_group (m_store_info[ignore]);
2880 end_earlier = ignore;
2881 }
4b84d9b8
JJ
2882 else
2883 merged_store = NULL;
c94c3532 2884 goto done;
4b84d9b8
JJ
2885 }
2886 }
2887
3afd514b
JJ
2888 new_bitregion_start
2889 = MIN (merged_store->bitregion_start, info->bitregion_start);
2890 new_bitregion_end
2891 = MAX (merged_store->bitregion_end, info->bitregion_end);
2892
2893 if (info->order >= merged_store->first_nonmergeable_order
2894 || (((new_bitregion_end - new_bitregion_start + 1) / BITS_PER_UNIT)
028d4092 2895 > (unsigned) param_store_merging_max_size))
18e0c3d1
JJ
2896 ;
2897
f663d9ad
KT
2898 /* |---store 1---|
2899 |---store 2---|
4b84d9b8 2900 Overlapping stores. */
18e0c3d1 2901 else if (IN_RANGE (info->bitpos, merged_store->start,
4d213bf6
JJ
2902 merged_store->start + merged_store->width - 1)
2903 /* |---store 1---||---store 2---|
2904 Handle also the consecutive INTEGER_CST stores case here,
2905 as we have here the code to deal with overlaps. */
2906 || (info->bitregion_start <= merged_store->bitregion_end
2907 && info->rhs_code == INTEGER_CST
2908 && merged_store->only_constants
2909 && merged_store->can_be_merged_into (info)))
f663d9ad 2910 {
245f6de1 2911 /* Only allow overlapping stores of constants. */
629387a6
EB
2912 if (info->rhs_code == INTEGER_CST
2913 && merged_store->only_constants
2914 && info->lp_nr == merged_store->lp_nr)
245f6de1 2915 {
bd909071
JJ
2916 unsigned int first_order
2917 = MIN (merged_store->first_order, info->order);
6cd4c66e
JJ
2918 unsigned int last_order
2919 = MAX (merged_store->last_order, info->order);
2920 unsigned HOST_WIDE_INT end
2921 = MAX (merged_store->start + merged_store->width,
2922 info->bitpos + info->bitsize);
bd909071
JJ
2923 if (check_no_overlap (m_store_info, i, true, first_order,
2924 last_order, merged_store->start, end,
2925 first_earlier, end_earlier))
6cd4c66e
JJ
2926 {
2927 /* check_no_overlap call above made sure there are no
2928 overlapping stores with non-INTEGER_CST rhs_code
2929 in between the first and last of the stores we've
2930 just merged. If there are any INTEGER_CST rhs_code
2931 stores in between, we need to merge_overlapping them
2932 even if in the sort_by_bitpos order there are other
2933 overlapping stores in between. Keep those stores as is.
2934 Example:
2935 MEM[(int *)p_28] = 0;
2936 MEM[(char *)p_28 + 3B] = 1;
2937 MEM[(char *)p_28 + 1B] = 2;
2938 MEM[(char *)p_28 + 2B] = MEM[(char *)p_28 + 6B];
2939 We can't merge the zero store with the store of two and
2940 not merge anything else, because the store of one is
2941 in the original order in between those two, but in
2942 store_by_bitpos order it comes after the last store that
2943 we can't merge with them. We can merge the first 3 stores
2944 and keep the last store as is though. */
18e0c3d1
JJ
2945 unsigned int len = m_store_info.length ();
2946 unsigned int try_order = last_order;
2947 unsigned int first_nonmergeable_order;
2948 unsigned int k;
2949 bool last_iter = false;
2950 int attempts = 0;
2951 do
6cd4c66e 2952 {
18e0c3d1 2953 unsigned int max_order = 0;
bd909071 2954 unsigned int min_order = first_order;
18e0c3d1
JJ
2955 unsigned first_nonmergeable_int_order = ~0U;
2956 unsigned HOST_WIDE_INT this_end = end;
2957 k = i;
2958 first_nonmergeable_order = ~0U;
2959 for (unsigned int j = i + 1; j < len; ++j)
6cd4c66e 2960 {
18e0c3d1
JJ
2961 store_immediate_info *info2 = m_store_info[j];
2962 if (info2->bitpos >= this_end)
2963 break;
2964 if (info2->order < try_order)
6cd4c66e 2965 {
4119cd69
JJ
2966 if (info2->rhs_code != INTEGER_CST
2967 || info2->lp_nr != merged_store->lp_nr)
18e0c3d1
JJ
2968 {
2969 /* Normally check_no_overlap makes sure this
2970 doesn't happen, but if end grows below,
2971 then we need to process more stores than
2972 check_no_overlap verified. Example:
2973 MEM[(int *)p_5] = 0;
2974 MEM[(short *)p_5 + 3B] = 1;
2975 MEM[(char *)p_5 + 4B] = _9;
2976 MEM[(char *)p_5 + 2B] = 2; */
2977 k = 0;
2978 break;
2979 }
2980 k = j;
bd909071 2981 min_order = MIN (min_order, info2->order);
18e0c3d1
JJ
2982 this_end = MAX (this_end,
2983 info2->bitpos + info2->bitsize);
6cd4c66e 2984 }
18e0c3d1 2985 else if (info2->rhs_code == INTEGER_CST
4119cd69 2986 && info2->lp_nr == merged_store->lp_nr
18e0c3d1
JJ
2987 && !last_iter)
2988 {
2989 max_order = MAX (max_order, info2->order + 1);
2990 first_nonmergeable_int_order
2991 = MIN (first_nonmergeable_int_order,
2992 info2->order);
2993 }
2994 else
2995 first_nonmergeable_order
2996 = MIN (first_nonmergeable_order, info2->order);
6cd4c66e 2997 }
bd909071
JJ
2998 if (k > i
2999 && !check_no_overlap (m_store_info, len - 1, true,
3000 min_order, try_order,
3001 merged_store->start, this_end,
3002 first_earlier, end_earlier))
3003 k = 0;
18e0c3d1
JJ
3004 if (k == 0)
3005 {
3006 if (last_order == try_order)
3007 break;
3008 /* If this failed, but only because we grew
3009 try_order, retry with the last working one,
3010 so that we merge at least something. */
3011 try_order = last_order;
3012 last_iter = true;
3013 continue;
3014 }
3015 last_order = try_order;
3016 /* Retry with a larger try_order to see if we could
3017 merge some further INTEGER_CST stores. */
3018 if (max_order
3019 && (first_nonmergeable_int_order
3020 < first_nonmergeable_order))
3021 {
3022 try_order = MIN (max_order,
3023 first_nonmergeable_order);
3024 try_order
3025 = MIN (try_order,
3026 merged_store->first_nonmergeable_order);
3027 if (try_order > last_order && ++attempts < 16)
3028 continue;
3029 }
3030 first_nonmergeable_order
3031 = MIN (first_nonmergeable_order,
3032 first_nonmergeable_int_order);
3033 end = this_end;
3034 break;
6cd4c66e 3035 }
18e0c3d1 3036 while (1);
6cd4c66e
JJ
3037
3038 if (k != 0)
3039 {
3040 merged_store->merge_overlapping (info);
3041
18e0c3d1
JJ
3042 merged_store->first_nonmergeable_order
3043 = MIN (merged_store->first_nonmergeable_order,
3044 first_nonmergeable_order);
3045
6cd4c66e
JJ
3046 for (unsigned int j = i + 1; j <= k; j++)
3047 {
3048 store_immediate_info *info2 = m_store_info[j];
3049 gcc_assert (info2->bitpos < end);
3050 if (info2->order < last_order)
3051 {
3052 gcc_assert (info2->rhs_code == INTEGER_CST);
18e0c3d1
JJ
3053 if (info != info2)
3054 merged_store->merge_overlapping (info2);
6cd4c66e
JJ
3055 }
3056 /* Other stores are kept and not merged in any
3057 way. */
3058 }
3059 ignore = k;
3060 goto done;
3061 }
3062 }
245f6de1 3063 }
f663d9ad 3064 }
245f6de1
JJ
3065 /* |---store 1---||---store 2---|
3066 This store is consecutive to the previous one.
3067 Merge it into the current store group. There can be gaps in between
3068 the stores, but there can't be gaps in between bitregions. */
c94c3532 3069 else if (info->bitregion_start <= merged_store->bitregion_end
7f5a3982 3070 && merged_store->can_be_merged_into (info))
f663d9ad 3071 {
245f6de1
JJ
3072 store_immediate_info *infof = merged_store->stores[0];
3073
3074 /* All the rhs_code ops that take 2 operands are commutative,
3075 swap the operands if it could make the operands compatible. */
3076 if (infof->ops[0].base_addr
3077 && infof->ops[1].base_addr
3078 && info->ops[0].base_addr
3079 && info->ops[1].base_addr
8a91d545
RS
3080 && known_eq (info->ops[1].bitpos - infof->ops[0].bitpos,
3081 info->bitpos - infof->bitpos)
245f6de1
JJ
3082 && operand_equal_p (info->ops[1].base_addr,
3083 infof->ops[0].base_addr, 0))
127ef369
JJ
3084 {
3085 std::swap (info->ops[0], info->ops[1]);
3086 info->ops_swapped_p = true;
3087 }
4d213bf6 3088 if (check_no_overlap (m_store_info, i, false,
bd909071 3089 MIN (merged_store->first_order, info->order),
a7fe6482 3090 MAX (merged_store->last_order, info->order),
bd909071 3091 merged_store->start,
a7fe6482 3092 MAX (merged_store->start + merged_store->width,
bd909071
JJ
3093 info->bitpos + info->bitsize),
3094 first_earlier, end_earlier))
245f6de1 3095 {
7f5a3982
EB
3096 /* Turn MEM_REF into BIT_INSERT_EXPR for bit-field stores. */
3097 if (info->rhs_code == MEM_REF && infof->rhs_code != MEM_REF)
3098 {
3099 info->rhs_code = BIT_INSERT_EXPR;
3100 info->ops[0].val = gimple_assign_rhs1 (info->stmt);
3101 info->ops[0].base_addr = NULL_TREE;
3102 }
3103 else if (infof->rhs_code == MEM_REF && info->rhs_code != MEM_REF)
3104 {
3105 store_immediate_info *infoj;
3106 unsigned int j;
3107 FOR_EACH_VEC_ELT (merged_store->stores, j, infoj)
3108 {
3109 infoj->rhs_code = BIT_INSERT_EXPR;
3110 infoj->ops[0].val = gimple_assign_rhs1 (infoj->stmt);
3111 infoj->ops[0].base_addr = NULL_TREE;
3112 }
e362a897 3113 merged_store->bit_insertion = true;
7f5a3982
EB
3114 }
3115 if ((infof->ops[0].base_addr
3116 ? compatible_load_p (merged_store, info, base_addr, 0)
3117 : !info->ops[0].base_addr)
3118 && (infof->ops[1].base_addr
3119 ? compatible_load_p (merged_store, info, base_addr, 1)
3120 : !info->ops[1].base_addr))
3121 {
3122 merged_store->merge_into (info);
3123 goto done;
3124 }
245f6de1
JJ
3125 }
3126 }
f663d9ad 3127
245f6de1
JJ
3128 /* |---store 1---| <gap> |---store 2---|.
3129 Gap between stores or the rhs not compatible. Start a new group. */
f663d9ad 3130
245f6de1
JJ
3131 /* Try to apply all the stores recorded for the group to determine
3132 the bitpattern they write and discard it if that fails.
3133 This will also reject single-store groups. */
c94c3532 3134 if (merged_store->apply_stores ())
245f6de1 3135 m_merged_store_groups.safe_push (merged_store);
c94c3532
EB
3136 else
3137 delete merged_store;
f663d9ad 3138
245f6de1 3139 merged_store = new merged_store_group (info);
bd909071 3140 end_earlier = i;
c94c3532
EB
3141 if (dump_file && (dump_flags & TDF_DETAILS))
3142 fputs ("New store group\n", dump_file);
3143
3144 done:
3145 if (dump_file && (dump_flags & TDF_DETAILS))
3146 {
3147 fprintf (dump_file, "Store %u:\nbitsize:" HOST_WIDE_INT_PRINT_DEC
3148 " bitpos:" HOST_WIDE_INT_PRINT_DEC " val:",
3149 i, info->bitsize, info->bitpos);
3150 print_generic_expr (dump_file, gimple_assign_rhs1 (info->stmt));
3151 fputc ('\n', dump_file);
3152 }
f663d9ad
KT
3153 }
3154
a62b3dc5 3155 /* Record or discard the last store group. */
4b84d9b8
JJ
3156 if (merged_store)
3157 {
c94c3532 3158 if (merged_store->apply_stores ())
4b84d9b8 3159 m_merged_store_groups.safe_push (merged_store);
c94c3532
EB
3160 else
3161 delete merged_store;
4b84d9b8 3162 }
f663d9ad
KT
3163
3164 gcc_assert (m_merged_store_groups.length () <= m_store_info.length ());
c94c3532 3165
f663d9ad
KT
3166 bool success
3167 = !m_merged_store_groups.is_empty ()
3168 && m_merged_store_groups.length () < m_store_info.length ();
3169
3170 if (success && dump_file)
c94c3532 3171 fprintf (dump_file, "Coalescing successful!\nMerged into %u stores\n",
a62b3dc5 3172 m_merged_store_groups.length ());
f663d9ad
KT
3173
3174 return success;
3175}
3176
245f6de1
JJ
3177/* Return the type to use for the merged stores or loads described by STMTS.
3178 This is needed to get the alias sets right. If IS_LOAD, look for rhs,
3179 otherwise lhs. Additionally set *CLIQUEP and *BASEP to MR_DEPENDENCE_*
3180 of the MEM_REFs if any. */
f663d9ad
KT
3181
3182static tree
245f6de1
JJ
3183get_alias_type_for_stmts (vec<gimple *> &stmts, bool is_load,
3184 unsigned short *cliquep, unsigned short *basep)
f663d9ad
KT
3185{
3186 gimple *stmt;
3187 unsigned int i;
245f6de1
JJ
3188 tree type = NULL_TREE;
3189 tree ret = NULL_TREE;
3190 *cliquep = 0;
3191 *basep = 0;
f663d9ad
KT
3192
3193 FOR_EACH_VEC_ELT (stmts, i, stmt)
3194 {
245f6de1
JJ
3195 tree ref = is_load ? gimple_assign_rhs1 (stmt)
3196 : gimple_assign_lhs (stmt);
3197 tree type1 = reference_alias_ptr_type (ref);
3198 tree base = get_base_address (ref);
f663d9ad 3199
245f6de1
JJ
3200 if (i == 0)
3201 {
3202 if (TREE_CODE (base) == MEM_REF)
3203 {
3204 *cliquep = MR_DEPENDENCE_CLIQUE (base);
3205 *basep = MR_DEPENDENCE_BASE (base);
3206 }
3207 ret = type = type1;
3208 continue;
3209 }
f663d9ad 3210 if (!alias_ptr_types_compatible_p (type, type1))
245f6de1
JJ
3211 ret = ptr_type_node;
3212 if (TREE_CODE (base) != MEM_REF
3213 || *cliquep != MR_DEPENDENCE_CLIQUE (base)
3214 || *basep != MR_DEPENDENCE_BASE (base))
3215 {
3216 *cliquep = 0;
3217 *basep = 0;
3218 }
f663d9ad 3219 }
245f6de1 3220 return ret;
f663d9ad
KT
3221}
3222
3223/* Return the location_t information we can find among the statements
3224 in STMTS. */
3225
3226static location_t
245f6de1 3227get_location_for_stmts (vec<gimple *> &stmts)
f663d9ad
KT
3228{
3229 gimple *stmt;
3230 unsigned int i;
3231
3232 FOR_EACH_VEC_ELT (stmts, i, stmt)
3233 if (gimple_has_location (stmt))
3234 return gimple_location (stmt);
3235
3236 return UNKNOWN_LOCATION;
3237}
3238
3239/* Used to decribe a store resulting from splitting a wide store in smaller
3240 regularly-sized stores in split_group. */
3241
6c1dae73 3242class split_store
f663d9ad 3243{
6c1dae73 3244public:
f663d9ad
KT
3245 unsigned HOST_WIDE_INT bytepos;
3246 unsigned HOST_WIDE_INT size;
3247 unsigned HOST_WIDE_INT align;
245f6de1 3248 auto_vec<store_immediate_info *> orig_stores;
a62b3dc5
JJ
3249 /* True if there is a single orig stmt covering the whole split store. */
3250 bool orig;
f663d9ad
KT
3251 split_store (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
3252 unsigned HOST_WIDE_INT);
3253};
3254
3255/* Simple constructor. */
3256
3257split_store::split_store (unsigned HOST_WIDE_INT bp,
3258 unsigned HOST_WIDE_INT sz,
3259 unsigned HOST_WIDE_INT al)
a62b3dc5 3260 : bytepos (bp), size (sz), align (al), orig (false)
f663d9ad 3261{
245f6de1 3262 orig_stores.create (0);
f663d9ad
KT
3263}
3264
245f6de1
JJ
3265/* Record all stores in GROUP that write to the region starting at BITPOS and
3266 is of size BITSIZE. Record infos for such statements in STORES if
3267 non-NULL. The stores in GROUP must be sorted by bitposition. Return INFO
5384a802
JJ
3268 if there is exactly one original store in the range (in that case ignore
3269 clobber stmts, unless there are only clobber stmts). */
f663d9ad 3270
a62b3dc5 3271static store_immediate_info *
99b1c316 3272find_constituent_stores (class merged_store_group *group,
245f6de1
JJ
3273 vec<store_immediate_info *> *stores,
3274 unsigned int *first,
3275 unsigned HOST_WIDE_INT bitpos,
3276 unsigned HOST_WIDE_INT bitsize)
f663d9ad 3277{
a62b3dc5 3278 store_immediate_info *info, *ret = NULL;
f663d9ad 3279 unsigned int i;
a62b3dc5
JJ
3280 bool second = false;
3281 bool update_first = true;
f663d9ad 3282 unsigned HOST_WIDE_INT end = bitpos + bitsize;
a62b3dc5 3283 for (i = *first; group->stores.iterate (i, &info); ++i)
f663d9ad
KT
3284 {
3285 unsigned HOST_WIDE_INT stmt_start = info->bitpos;
3286 unsigned HOST_WIDE_INT stmt_end = stmt_start + info->bitsize;
a62b3dc5
JJ
3287 if (stmt_end <= bitpos)
3288 {
3289 /* BITPOS passed to this function never decreases from within the
3290 same split_group call, so optimize and don't scan info records
3291 which are known to end before or at BITPOS next time.
3292 Only do it if all stores before this one also pass this. */
3293 if (update_first)
3294 *first = i + 1;
3295 continue;
3296 }
3297 else
3298 update_first = false;
3299
f663d9ad 3300 /* The stores in GROUP are ordered by bitposition so if we're past
a62b3dc5
JJ
3301 the region for this group return early. */
3302 if (stmt_start >= end)
3303 return ret;
3304
5384a802
JJ
3305 if (gimple_clobber_p (info->stmt))
3306 {
3307 if (stores)
3308 stores->safe_push (info);
3309 if (ret == NULL)
3310 ret = info;
3311 continue;
3312 }
245f6de1 3313 if (stores)
a62b3dc5 3314 {
245f6de1 3315 stores->safe_push (info);
5384a802 3316 if (ret && !gimple_clobber_p (ret->stmt))
a62b3dc5
JJ
3317 {
3318 ret = NULL;
3319 second = true;
3320 }
3321 }
5384a802 3322 else if (ret && !gimple_clobber_p (ret->stmt))
a62b3dc5
JJ
3323 return NULL;
3324 if (!second)
3325 ret = info;
f663d9ad 3326 }
a62b3dc5 3327 return ret;
f663d9ad
KT
3328}
3329
d7a9512e
JJ
3330/* Return how many SSA_NAMEs used to compute value to store in the INFO
3331 store have multiple uses. If any SSA_NAME has multiple uses, also
3332 count statements needed to compute it. */
3333
3334static unsigned
3335count_multiple_uses (store_immediate_info *info)
3336{
3337 gimple *stmt = info->stmt;
3338 unsigned ret = 0;
3339 switch (info->rhs_code)
3340 {
3341 case INTEGER_CST:
e362a897 3342 case STRING_CST:
d7a9512e
JJ
3343 return 0;
3344 case BIT_AND_EXPR:
3345 case BIT_IOR_EXPR:
3346 case BIT_XOR_EXPR:
d60edaba
JJ
3347 if (info->bit_not_p)
3348 {
3349 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3350 ret = 1; /* Fall through below to return
3351 the BIT_NOT_EXPR stmt and then
3352 BIT_{AND,IOR,XOR}_EXPR and anything it
3353 uses. */
3354 else
3355 /* stmt is after this the BIT_NOT_EXPR. */
3356 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3357 }
d7a9512e
JJ
3358 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3359 {
3360 ret += 1 + info->ops[0].bit_not_p;
3361 if (info->ops[1].base_addr)
3362 ret += 1 + info->ops[1].bit_not_p;
3363 return ret + 1;
3364 }
3365 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3366 /* stmt is now the BIT_*_EXPR. */
3367 if (!has_single_use (gimple_assign_rhs1 (stmt)))
127ef369
JJ
3368 ret += 1 + info->ops[info->ops_swapped_p].bit_not_p;
3369 else if (info->ops[info->ops_swapped_p].bit_not_p)
d7a9512e
JJ
3370 {
3371 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3372 if (!has_single_use (gimple_assign_rhs1 (stmt2)))
3373 ++ret;
3374 }
3375 if (info->ops[1].base_addr == NULL_TREE)
127ef369
JJ
3376 {
3377 gcc_checking_assert (!info->ops_swapped_p);
3378 return ret;
3379 }
d7a9512e 3380 if (!has_single_use (gimple_assign_rhs2 (stmt)))
127ef369
JJ
3381 ret += 1 + info->ops[1 - info->ops_swapped_p].bit_not_p;
3382 else if (info->ops[1 - info->ops_swapped_p].bit_not_p)
d7a9512e
JJ
3383 {
3384 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3385 if (!has_single_use (gimple_assign_rhs1 (stmt2)))
3386 ++ret;
3387 }
3388 return ret;
3389 case MEM_REF:
3390 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3391 return 1 + info->ops[0].bit_not_p;
3392 else if (info->ops[0].bit_not_p)
3393 {
3394 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3395 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3396 return 1;
3397 }
3398 return 0;
c94c3532
EB
3399 case BIT_INSERT_EXPR:
3400 return has_single_use (gimple_assign_rhs1 (stmt)) ? 0 : 1;
d7a9512e
JJ
3401 default:
3402 gcc_unreachable ();
3403 }
3404}
3405
f663d9ad 3406/* Split a merged store described by GROUP by populating the SPLIT_STORES
a62b3dc5
JJ
3407 vector (if non-NULL) with split_store structs describing the byte offset
3408 (from the base), the bit size and alignment of each store as well as the
3409 original statements involved in each such split group.
f663d9ad
KT
3410 This is to separate the splitting strategy from the statement
3411 building/emission/linking done in output_merged_store.
a62b3dc5 3412 Return number of new stores.
245f6de1
JJ
3413 If ALLOW_UNALIGNED_STORE is false, then all stores must be aligned.
3414 If ALLOW_UNALIGNED_LOAD is false, then all loads must be aligned.
3afd514b
JJ
3415 BZERO_FIRST may be true only when the first store covers the whole group
3416 and clears it; if BZERO_FIRST is true, keep that first store in the set
3417 unmodified and emit further stores for the overrides only.
a62b3dc5
JJ
3418 If SPLIT_STORES is NULL, it is just a dry run to count number of
3419 new stores. */
f663d9ad 3420
a62b3dc5 3421static unsigned int
245f6de1 3422split_group (merged_store_group *group, bool allow_unaligned_store,
3afd514b 3423 bool allow_unaligned_load, bool bzero_first,
99b1c316 3424 vec<split_store *> *split_stores,
d7a9512e
JJ
3425 unsigned *total_orig,
3426 unsigned *total_new)
f663d9ad 3427{
a62b3dc5
JJ
3428 unsigned HOST_WIDE_INT pos = group->bitregion_start;
3429 unsigned HOST_WIDE_INT size = group->bitregion_end - pos;
f663d9ad 3430 unsigned HOST_WIDE_INT bytepos = pos / BITS_PER_UNIT;
a62b3dc5
JJ
3431 unsigned HOST_WIDE_INT group_align = group->align;
3432 unsigned HOST_WIDE_INT align_base = group->align_base;
245f6de1 3433 unsigned HOST_WIDE_INT group_load_align = group_align;
d7a9512e 3434 bool any_orig = false;
f663d9ad 3435
f663d9ad
KT
3436 gcc_assert ((size % BITS_PER_UNIT == 0) && (pos % BITS_PER_UNIT == 0));
3437
e362a897
EB
3438 /* For bswap framework using sets of stores, all the checking has been done
3439 earlier in try_coalesce_bswap and the result always needs to be emitted
3440 as a single store. Likewise for string concatenation, */
4b84d9b8 3441 if (group->stores[0]->rhs_code == LROTATE_EXPR
e362a897
EB
3442 || group->stores[0]->rhs_code == NOP_EXPR
3443 || group->string_concatenation)
4b84d9b8 3444 {
3afd514b 3445 gcc_assert (!bzero_first);
4b84d9b8
JJ
3446 if (total_orig)
3447 {
3448 /* Avoid the old/new stmt count heuristics. It should be
3449 always beneficial. */
3450 total_new[0] = 1;
3451 total_orig[0] = 2;
3452 }
3453
3454 if (split_stores)
3455 {
3456 unsigned HOST_WIDE_INT align_bitpos
3457 = (group->start - align_base) & (group_align - 1);
3458 unsigned HOST_WIDE_INT align = group_align;
3459 if (align_bitpos)
3460 align = least_bit_hwi (align_bitpos);
3461 bytepos = group->start / BITS_PER_UNIT;
99b1c316 3462 split_store *store
4b84d9b8
JJ
3463 = new split_store (bytepos, group->width, align);
3464 unsigned int first = 0;
3465 find_constituent_stores (group, &store->orig_stores,
3466 &first, group->start, group->width);
3467 split_stores->safe_push (store);
3468 }
3469
3470 return 1;
3471 }
3472
a62b3dc5 3473 unsigned int ret = 0, first = 0;
f663d9ad 3474 unsigned HOST_WIDE_INT try_pos = bytepos;
f663d9ad 3475
d7a9512e
JJ
3476 if (total_orig)
3477 {
3478 unsigned int i;
3479 store_immediate_info *info = group->stores[0];
3480
3481 total_new[0] = 0;
3482 total_orig[0] = 1; /* The orig store. */
3483 info = group->stores[0];
3484 if (info->ops[0].base_addr)
a6fbd154 3485 total_orig[0]++;
d7a9512e 3486 if (info->ops[1].base_addr)
a6fbd154 3487 total_orig[0]++;
d7a9512e
JJ
3488 switch (info->rhs_code)
3489 {
3490 case BIT_AND_EXPR:
3491 case BIT_IOR_EXPR:
3492 case BIT_XOR_EXPR:
3493 total_orig[0]++; /* The orig BIT_*_EXPR stmt. */
3494 break;
3495 default:
3496 break;
3497 }
3498 total_orig[0] *= group->stores.length ();
3499
3500 FOR_EACH_VEC_ELT (group->stores, i, info)
a6fbd154
JJ
3501 {
3502 total_new[0] += count_multiple_uses (info);
3503 total_orig[0] += (info->bit_not_p
3504 + info->ops[0].bit_not_p
3505 + info->ops[1].bit_not_p);
3506 }
d7a9512e
JJ
3507 }
3508
245f6de1
JJ
3509 if (!allow_unaligned_load)
3510 for (int i = 0; i < 2; ++i)
3511 if (group->load_align[i])
3512 group_load_align = MIN (group_load_align, group->load_align[i]);
3513
3afd514b
JJ
3514 if (bzero_first)
3515 {
5384a802
JJ
3516 store_immediate_info *gstore;
3517 FOR_EACH_VEC_ELT (group->stores, first, gstore)
3518 if (!gimple_clobber_p (gstore->stmt))
3519 break;
3520 ++first;
3afd514b
JJ
3521 ret = 1;
3522 if (split_stores)
3523 {
99b1c316 3524 split_store *store
5384a802
JJ
3525 = new split_store (bytepos, gstore->bitsize, align_base);
3526 store->orig_stores.safe_push (gstore);
3afd514b
JJ
3527 store->orig = true;
3528 any_orig = true;
3529 split_stores->safe_push (store);
3530 }
3531 }
3532
f663d9ad
KT
3533 while (size > 0)
3534 {
245f6de1 3535 if ((allow_unaligned_store || group_align <= BITS_PER_UNIT)
3afd514b
JJ
3536 && (group->mask[try_pos - bytepos] == (unsigned char) ~0U
3537 || (bzero_first && group->val[try_pos - bytepos] == 0)))
a62b3dc5
JJ
3538 {
3539 /* Skip padding bytes. */
3540 ++try_pos;
3541 size -= BITS_PER_UNIT;
3542 continue;
3543 }
3544
f663d9ad 3545 unsigned HOST_WIDE_INT try_bitpos = try_pos * BITS_PER_UNIT;
a62b3dc5
JJ
3546 unsigned int try_size = MAX_STORE_BITSIZE, nonmasked;
3547 unsigned HOST_WIDE_INT align_bitpos
3548 = (try_bitpos - align_base) & (group_align - 1);
3549 unsigned HOST_WIDE_INT align = group_align;
5384a802 3550 bool found_orig = false;
a62b3dc5
JJ
3551 if (align_bitpos)
3552 align = least_bit_hwi (align_bitpos);
245f6de1 3553 if (!allow_unaligned_store)
a62b3dc5 3554 try_size = MIN (try_size, align);
245f6de1
JJ
3555 if (!allow_unaligned_load)
3556 {
3557 /* If we can't do or don't want to do unaligned stores
3558 as well as loads, we need to take the loads into account
3559 as well. */
3560 unsigned HOST_WIDE_INT load_align = group_load_align;
3561 align_bitpos = (try_bitpos - align_base) & (load_align - 1);
3562 if (align_bitpos)
3563 load_align = least_bit_hwi (align_bitpos);
3564 for (int i = 0; i < 2; ++i)
3565 if (group->load_align[i])
3566 {
8a91d545
RS
3567 align_bitpos
3568 = known_alignment (try_bitpos
3569 - group->stores[0]->bitpos
3570 + group->stores[0]->ops[i].bitpos
3571 - group->load_align_base[i]);
3572 if (align_bitpos & (group_load_align - 1))
245f6de1
JJ
3573 {
3574 unsigned HOST_WIDE_INT a = least_bit_hwi (align_bitpos);
3575 load_align = MIN (load_align, a);
3576 }
3577 }
3578 try_size = MIN (try_size, load_align);
3579 }
a62b3dc5 3580 store_immediate_info *info
245f6de1 3581 = find_constituent_stores (group, NULL, &first, try_bitpos, try_size);
5384a802 3582 if (info && !gimple_clobber_p (info->stmt))
a62b3dc5
JJ
3583 {
3584 /* If there is just one original statement for the range, see if
3585 we can just reuse the original store which could be even larger
3586 than try_size. */
3587 unsigned HOST_WIDE_INT stmt_end
3588 = ROUND_UP (info->bitpos + info->bitsize, BITS_PER_UNIT);
245f6de1
JJ
3589 info = find_constituent_stores (group, NULL, &first, try_bitpos,
3590 stmt_end - try_bitpos);
a62b3dc5
JJ
3591 if (info && info->bitpos >= try_bitpos)
3592 {
5384a802
JJ
3593 store_immediate_info *info2 = NULL;
3594 unsigned int first_copy = first;
3595 if (info->bitpos > try_bitpos
3596 && stmt_end - try_bitpos <= try_size)
3597 {
3598 info2 = find_constituent_stores (group, NULL, &first_copy,
3599 try_bitpos,
3600 info->bitpos - try_bitpos);
3601 gcc_assert (info2 == NULL || gimple_clobber_p (info2->stmt));
3602 }
3603 if (info2 == NULL && stmt_end - try_bitpos < try_size)
3604 {
3605 info2 = find_constituent_stores (group, NULL, &first_copy,
3606 stmt_end,
3607 (try_bitpos + try_size)
3608 - stmt_end);
3609 gcc_assert (info2 == NULL || gimple_clobber_p (info2->stmt));
3610 }
3611 if (info2 == NULL)
3612 {
3613 try_size = stmt_end - try_bitpos;
3614 found_orig = true;
3615 goto found;
3616 }
a62b3dc5
JJ
3617 }
3618 }
f663d9ad 3619
a62b3dc5
JJ
3620 /* Approximate store bitsize for the case when there are no padding
3621 bits. */
3622 while (try_size > size)
3623 try_size /= 2;
3624 /* Now look for whole padding bytes at the end of that bitsize. */
3625 for (nonmasked = try_size / BITS_PER_UNIT; nonmasked > 0; --nonmasked)
3626 if (group->mask[try_pos - bytepos + nonmasked - 1]
3afd514b
JJ
3627 != (unsigned char) ~0U
3628 && (!bzero_first
3629 || group->val[try_pos - bytepos + nonmasked - 1] != 0))
a62b3dc5 3630 break;
5384a802 3631 if (nonmasked == 0 || (info && gimple_clobber_p (info->stmt)))
a62b3dc5
JJ
3632 {
3633 /* If entire try_size range is padding, skip it. */
3634 try_pos += try_size / BITS_PER_UNIT;
3635 size -= try_size;
3636 continue;
3637 }
3638 /* Otherwise try to decrease try_size if second half, last 3 quarters
3639 etc. are padding. */
3640 nonmasked *= BITS_PER_UNIT;
3641 while (nonmasked <= try_size / 2)
3642 try_size /= 2;
245f6de1 3643 if (!allow_unaligned_store && group_align > BITS_PER_UNIT)
a62b3dc5
JJ
3644 {
3645 /* Now look for whole padding bytes at the start of that bitsize. */
3646 unsigned int try_bytesize = try_size / BITS_PER_UNIT, masked;
3647 for (masked = 0; masked < try_bytesize; ++masked)
3afd514b
JJ
3648 if (group->mask[try_pos - bytepos + masked] != (unsigned char) ~0U
3649 && (!bzero_first
3650 || group->val[try_pos - bytepos + masked] != 0))
a62b3dc5
JJ
3651 break;
3652 masked *= BITS_PER_UNIT;
3653 gcc_assert (masked < try_size);
3654 if (masked >= try_size / 2)
3655 {
3656 while (masked >= try_size / 2)
3657 {
3658 try_size /= 2;
3659 try_pos += try_size / BITS_PER_UNIT;
3660 size -= try_size;
3661 masked -= try_size;
3662 }
3663 /* Need to recompute the alignment, so just retry at the new
3664 position. */
3665 continue;
3666 }
3667 }
3668
3669 found:
3670 ++ret;
f663d9ad 3671
a62b3dc5
JJ
3672 if (split_stores)
3673 {
99b1c316 3674 split_store *store
a62b3dc5 3675 = new split_store (try_pos, try_size, align);
245f6de1
JJ
3676 info = find_constituent_stores (group, &store->orig_stores,
3677 &first, try_bitpos, try_size);
a62b3dc5 3678 if (info
5384a802 3679 && !gimple_clobber_p (info->stmt)
a62b3dc5 3680 && info->bitpos >= try_bitpos
5384a802
JJ
3681 && info->bitpos + info->bitsize <= try_bitpos + try_size
3682 && (store->orig_stores.length () == 1
3683 || found_orig
3684 || (info->bitpos == try_bitpos
3685 && (info->bitpos + info->bitsize
3686 == try_bitpos + try_size))))
d7a9512e
JJ
3687 {
3688 store->orig = true;
3689 any_orig = true;
3690 }
a62b3dc5
JJ
3691 split_stores->safe_push (store);
3692 }
3693
3694 try_pos += try_size / BITS_PER_UNIT;
f663d9ad 3695 size -= try_size;
f663d9ad 3696 }
a62b3dc5 3697
d7a9512e
JJ
3698 if (total_orig)
3699 {
a6fbd154 3700 unsigned int i;
99b1c316 3701 split_store *store;
d7a9512e
JJ
3702 /* If we are reusing some original stores and any of the
3703 original SSA_NAMEs had multiple uses, we need to subtract
3704 those now before we add the new ones. */
3705 if (total_new[0] && any_orig)
3706 {
d7a9512e
JJ
3707 FOR_EACH_VEC_ELT (*split_stores, i, store)
3708 if (store->orig)
3709 total_new[0] -= count_multiple_uses (store->orig_stores[0]);
3710 }
3711 total_new[0] += ret; /* The new store. */
3712 store_immediate_info *info = group->stores[0];
3713 if (info->ops[0].base_addr)
a6fbd154 3714 total_new[0] += ret;
d7a9512e 3715 if (info->ops[1].base_addr)
a6fbd154 3716 total_new[0] += ret;
d7a9512e
JJ
3717 switch (info->rhs_code)
3718 {
3719 case BIT_AND_EXPR:
3720 case BIT_IOR_EXPR:
3721 case BIT_XOR_EXPR:
3722 total_new[0] += ret; /* The new BIT_*_EXPR stmt. */
3723 break;
3724 default:
3725 break;
3726 }
a6fbd154
JJ
3727 FOR_EACH_VEC_ELT (*split_stores, i, store)
3728 {
3729 unsigned int j;
3730 bool bit_not_p[3] = { false, false, false };
3731 /* If all orig_stores have certain bit_not_p set, then
3732 we'd use a BIT_NOT_EXPR stmt and need to account for it.
3733 If some orig_stores have certain bit_not_p set, then
3734 we'd use a BIT_XOR_EXPR with a mask and need to account for
3735 it. */
3736 FOR_EACH_VEC_ELT (store->orig_stores, j, info)
3737 {
3738 if (info->ops[0].bit_not_p)
3739 bit_not_p[0] = true;
3740 if (info->ops[1].bit_not_p)
3741 bit_not_p[1] = true;
3742 if (info->bit_not_p)
3743 bit_not_p[2] = true;
3744 }
3745 total_new[0] += bit_not_p[0] + bit_not_p[1] + bit_not_p[2];
3746 }
3747
d7a9512e
JJ
3748 }
3749
a62b3dc5 3750 return ret;
f663d9ad
KT
3751}
3752
a6fbd154
JJ
3753/* Return the operation through which the operand IDX (if < 2) or
3754 result (IDX == 2) should be inverted. If NOP_EXPR, no inversion
3755 is done, if BIT_NOT_EXPR, all bits are inverted, if BIT_XOR_EXPR,
3756 the bits should be xored with mask. */
3757
3758static enum tree_code
3759invert_op (split_store *split_store, int idx, tree int_type, tree &mask)
3760{
3761 unsigned int i;
3762 store_immediate_info *info;
3763 unsigned int cnt = 0;
e215422f 3764 bool any_paddings = false;
a6fbd154
JJ
3765 FOR_EACH_VEC_ELT (split_store->orig_stores, i, info)
3766 {
3767 bool bit_not_p = idx < 2 ? info->ops[idx].bit_not_p : info->bit_not_p;
3768 if (bit_not_p)
e215422f
JJ
3769 {
3770 ++cnt;
3771 tree lhs = gimple_assign_lhs (info->stmt);
3772 if (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3773 && TYPE_PRECISION (TREE_TYPE (lhs)) < info->bitsize)
3774 any_paddings = true;
3775 }
a6fbd154
JJ
3776 }
3777 mask = NULL_TREE;
3778 if (cnt == 0)
3779 return NOP_EXPR;
e215422f 3780 if (cnt == split_store->orig_stores.length () && !any_paddings)
a6fbd154
JJ
3781 return BIT_NOT_EXPR;
3782
3783 unsigned HOST_WIDE_INT try_bitpos = split_store->bytepos * BITS_PER_UNIT;
3784 unsigned buf_size = split_store->size / BITS_PER_UNIT;
3785 unsigned char *buf
3786 = XALLOCAVEC (unsigned char, buf_size);
3787 memset (buf, ~0U, buf_size);
3788 FOR_EACH_VEC_ELT (split_store->orig_stores, i, info)
3789 {
3790 bool bit_not_p = idx < 2 ? info->ops[idx].bit_not_p : info->bit_not_p;
3791 if (!bit_not_p)
3792 continue;
3793 /* Clear regions with bit_not_p and invert afterwards, rather than
3794 clear regions with !bit_not_p, so that gaps in between stores aren't
3795 set in the mask. */
3796 unsigned HOST_WIDE_INT bitsize = info->bitsize;
e215422f 3797 unsigned HOST_WIDE_INT prec = bitsize;
a6fbd154 3798 unsigned int pos_in_buffer = 0;
e215422f
JJ
3799 if (any_paddings)
3800 {
3801 tree lhs = gimple_assign_lhs (info->stmt);
3802 if (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3803 && TYPE_PRECISION (TREE_TYPE (lhs)) < bitsize)
3804 prec = TYPE_PRECISION (TREE_TYPE (lhs));
3805 }
a6fbd154
JJ
3806 if (info->bitpos < try_bitpos)
3807 {
3808 gcc_assert (info->bitpos + bitsize > try_bitpos);
e215422f
JJ
3809 if (!BYTES_BIG_ENDIAN)
3810 {
3811 if (prec <= try_bitpos - info->bitpos)
3812 continue;
3813 prec -= try_bitpos - info->bitpos;
3814 }
3815 bitsize -= try_bitpos - info->bitpos;
3816 if (BYTES_BIG_ENDIAN && prec > bitsize)
3817 prec = bitsize;
a6fbd154
JJ
3818 }
3819 else
3820 pos_in_buffer = info->bitpos - try_bitpos;
e215422f
JJ
3821 if (prec < bitsize)
3822 {
3823 /* If this is a bool inversion, invert just the least significant
3824 prec bits rather than all bits of it. */
3825 if (BYTES_BIG_ENDIAN)
3826 {
3827 pos_in_buffer += bitsize - prec;
3828 if (pos_in_buffer >= split_store->size)
3829 continue;
3830 }
3831 bitsize = prec;
3832 }
a6fbd154
JJ
3833 if (pos_in_buffer + bitsize > split_store->size)
3834 bitsize = split_store->size - pos_in_buffer;
3835 unsigned char *p = buf + (pos_in_buffer / BITS_PER_UNIT);
3836 if (BYTES_BIG_ENDIAN)
3837 clear_bit_region_be (p, (BITS_PER_UNIT - 1
3838 - (pos_in_buffer % BITS_PER_UNIT)), bitsize);
3839 else
3840 clear_bit_region (p, pos_in_buffer % BITS_PER_UNIT, bitsize);
3841 }
3842 for (unsigned int i = 0; i < buf_size; ++i)
3843 buf[i] = ~buf[i];
3844 mask = native_interpret_expr (int_type, buf, buf_size);
3845 return BIT_XOR_EXPR;
3846}
3847
f663d9ad
KT
3848/* Given a merged store group GROUP output the widened version of it.
3849 The store chain is against the base object BASE.
3850 Try store sizes of at most MAX_STORE_BITSIZE bits wide and don't output
3851 unaligned stores for STRICT_ALIGNMENT targets or if it's too expensive.
3852 Make sure that the number of statements output is less than the number of
3853 original statements. If a better sequence is possible emit it and
3854 return true. */
3855
3856bool
b5926e23 3857imm_store_chain_info::output_merged_store (merged_store_group *group)
f663d9ad 3858{
e362a897 3859 const unsigned HOST_WIDE_INT start_byte_pos
a62b3dc5 3860 = group->bitregion_start / BITS_PER_UNIT;
f663d9ad
KT
3861 unsigned int orig_num_stmts = group->stores.length ();
3862 if (orig_num_stmts < 2)
3863 return false;
3864
245f6de1 3865 bool allow_unaligned_store
028d4092 3866 = !STRICT_ALIGNMENT && param_store_merging_allow_unaligned;
245f6de1 3867 bool allow_unaligned_load = allow_unaligned_store;
3afd514b 3868 bool bzero_first = false;
5384a802
JJ
3869 store_immediate_info *store;
3870 unsigned int num_clobber_stmts = 0;
3871 if (group->stores[0]->rhs_code == INTEGER_CST)
3872 {
e362a897 3873 unsigned int i;
5384a802
JJ
3874 FOR_EACH_VEC_ELT (group->stores, i, store)
3875 if (gimple_clobber_p (store->stmt))
3876 num_clobber_stmts++;
3877 else if (TREE_CODE (gimple_assign_rhs1 (store->stmt)) == CONSTRUCTOR
3878 && CONSTRUCTOR_NELTS (gimple_assign_rhs1 (store->stmt)) == 0
3879 && group->start == store->bitpos
3880 && group->width == store->bitsize
3881 && (group->start % BITS_PER_UNIT) == 0
3882 && (group->width % BITS_PER_UNIT) == 0)
3883 {
3884 bzero_first = true;
3885 break;
3886 }
3887 else
3888 break;
3889 FOR_EACH_VEC_ELT_FROM (group->stores, i, store, i)
3890 if (gimple_clobber_p (store->stmt))
3891 num_clobber_stmts++;
3892 if (num_clobber_stmts == orig_num_stmts)
3893 return false;
3894 orig_num_stmts -= num_clobber_stmts;
3895 }
3afd514b 3896 if (allow_unaligned_store || bzero_first)
a62b3dc5
JJ
3897 {
3898 /* If unaligned stores are allowed, see how many stores we'd emit
3899 for unaligned and how many stores we'd emit for aligned stores.
3afd514b
JJ
3900 Only use unaligned stores if it allows fewer stores than aligned.
3901 Similarly, if there is a whole region clear first, prefer expanding
3902 it together compared to expanding clear first followed by merged
3903 further stores. */
21f65995 3904 unsigned cnt[4] = { ~0U, ~0U, ~0U, ~0U };
3afd514b
JJ
3905 int pass_min = 0;
3906 for (int pass = 0; pass < 4; ++pass)
3907 {
3908 if (!allow_unaligned_store && (pass & 1) != 0)
3909 continue;
3910 if (!bzero_first && (pass & 2) != 0)
3911 continue;
3912 cnt[pass] = split_group (group, (pass & 1) != 0,
3913 allow_unaligned_load, (pass & 2) != 0,
3914 NULL, NULL, NULL);
3915 if (cnt[pass] < cnt[pass_min])
3916 pass_min = pass;
3917 }
3918 if ((pass_min & 1) == 0)
245f6de1 3919 allow_unaligned_store = false;
3afd514b
JJ
3920 if ((pass_min & 2) == 0)
3921 bzero_first = false;
a62b3dc5 3922 }
e362a897
EB
3923
3924 auto_vec<class split_store *, 32> split_stores;
3925 split_store *split_store;
3926 unsigned total_orig, total_new, i;
3afd514b 3927 split_group (group, allow_unaligned_store, allow_unaligned_load, bzero_first,
d7a9512e 3928 &split_stores, &total_orig, &total_new);
a62b3dc5 3929
5384a802
JJ
3930 /* Determine if there is a clobber covering the whole group at the start,
3931 followed by proposed split stores that cover the whole group. In that
3932 case, prefer the transformation even if
3933 split_stores.length () == orig_num_stmts. */
3934 bool clobber_first = false;
3935 if (num_clobber_stmts
3936 && gimple_clobber_p (group->stores[0]->stmt)
3937 && group->start == group->stores[0]->bitpos
3938 && group->width == group->stores[0]->bitsize
3939 && (group->start % BITS_PER_UNIT) == 0
3940 && (group->width % BITS_PER_UNIT) == 0)
3941 {
3942 clobber_first = true;
3943 unsigned HOST_WIDE_INT pos = group->start / BITS_PER_UNIT;
3944 FOR_EACH_VEC_ELT (split_stores, i, split_store)
3945 if (split_store->bytepos != pos)
3946 {
3947 clobber_first = false;
3948 break;
3949 }
3950 else
3951 pos += split_store->size / BITS_PER_UNIT;
3952 if (pos != (group->start + group->width) / BITS_PER_UNIT)
3953 clobber_first = false;
3954 }
3955
3956 if (split_stores.length () >= orig_num_stmts + clobber_first)
a62b3dc5 3957 {
5384a802 3958
a62b3dc5
JJ
3959 /* We didn't manage to reduce the number of statements. Bail out. */
3960 if (dump_file && (dump_flags & TDF_DETAILS))
d7a9512e
JJ
3961 fprintf (dump_file, "Exceeded original number of stmts (%u)."
3962 " Not profitable to emit new sequence.\n",
3963 orig_num_stmts);
dd172744
RB
3964 FOR_EACH_VEC_ELT (split_stores, i, split_store)
3965 delete split_store;
a62b3dc5
JJ
3966 return false;
3967 }
d7a9512e
JJ
3968 if (total_orig <= total_new)
3969 {
3970 /* If number of estimated new statements is above estimated original
3971 statements, bail out too. */
3972 if (dump_file && (dump_flags & TDF_DETAILS))
3973 fprintf (dump_file, "Estimated number of original stmts (%u)"
3974 " not larger than estimated number of new"
3975 " stmts (%u).\n",
3976 total_orig, total_new);
dd172744
RB
3977 FOR_EACH_VEC_ELT (split_stores, i, split_store)
3978 delete split_store;
4b84d9b8 3979 return false;
d7a9512e 3980 }
5384a802
JJ
3981 if (group->stores[0]->rhs_code == INTEGER_CST)
3982 {
3983 bool all_orig = true;
3984 FOR_EACH_VEC_ELT (split_stores, i, split_store)
3985 if (!split_store->orig)
3986 {
3987 all_orig = false;
3988 break;
3989 }
3990 if (all_orig)
3991 {
3992 unsigned int cnt = split_stores.length ();
3993 store_immediate_info *store;
3994 FOR_EACH_VEC_ELT (group->stores, i, store)
3995 if (gimple_clobber_p (store->stmt))
3996 ++cnt;
3997 /* Punt if we wouldn't make any real changes, i.e. keep all
3998 orig stmts + all clobbers. */
3999 if (cnt == group->stores.length ())
4000 {
4001 if (dump_file && (dump_flags & TDF_DETAILS))
4002 fprintf (dump_file, "Exceeded original number of stmts (%u)."
4003 " Not profitable to emit new sequence.\n",
4004 orig_num_stmts);
4005 FOR_EACH_VEC_ELT (split_stores, i, split_store)
4006 delete split_store;
4007 return false;
4008 }
4009 }
4010 }
f663d9ad
KT
4011
4012 gimple_stmt_iterator last_gsi = gsi_for_stmt (group->last_stmt);
4013 gimple_seq seq = NULL;
f663d9ad
KT
4014 tree last_vdef, new_vuse;
4015 last_vdef = gimple_vdef (group->last_stmt);
4016 new_vuse = gimple_vuse (group->last_stmt);
4b84d9b8
JJ
4017 tree bswap_res = NULL_TREE;
4018
5384a802
JJ
4019 /* Clobbers are not removed. */
4020 if (gimple_clobber_p (group->last_stmt))
4021 {
4022 new_vuse = make_ssa_name (gimple_vop (cfun), group->last_stmt);
4023 gimple_set_vdef (group->last_stmt, new_vuse);
4024 }
4025
4b84d9b8
JJ
4026 if (group->stores[0]->rhs_code == LROTATE_EXPR
4027 || group->stores[0]->rhs_code == NOP_EXPR)
4028 {
4029 tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
4030 gimple *ins_stmt = group->stores[0]->ins_stmt;
4031 struct symbolic_number *n = &group->stores[0]->n;
4032 bool bswap = group->stores[0]->rhs_code == LROTATE_EXPR;
4033
4034 switch (n->range)
4035 {
4036 case 16:
4037 load_type = bswap_type = uint16_type_node;
4038 break;
4039 case 32:
4040 load_type = uint32_type_node;
4041 if (bswap)
4042 {
4043 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
4044 bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
4045 }
4046 break;
4047 case 64:
4048 load_type = uint64_type_node;
4049 if (bswap)
4050 {
4051 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
4052 bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
4053 }
4054 break;
4055 default:
4056 gcc_unreachable ();
4057 }
4058
4059 /* If the loads have each vuse of the corresponding store,
4060 we've checked the aliasing already in try_coalesce_bswap and
4061 we want to sink the need load into seq. So need to use new_vuse
4062 on the load. */
30fa8e9c 4063 if (n->base_addr)
4b84d9b8 4064 {
30fa8e9c
JJ
4065 if (n->vuse == NULL)
4066 {
4067 n->vuse = new_vuse;
4068 ins_stmt = NULL;
4069 }
4070 else
4071 /* Update vuse in case it has changed by output_merged_stores. */
4072 n->vuse = gimple_vuse (ins_stmt);
4b84d9b8
JJ
4073 }
4074 bswap_res = bswap_replace (gsi_start (seq), ins_stmt, fndecl,
4075 bswap_type, load_type, n, bswap);
4076 gcc_assert (bswap_res);
4077 }
f663d9ad
KT
4078
4079 gimple *stmt = NULL;
245f6de1 4080 auto_vec<gimple *, 32> orig_stmts;
4b84d9b8
JJ
4081 gimple_seq this_seq;
4082 tree addr = force_gimple_operand_1 (unshare_expr (base_addr), &this_seq,
aa55dc0c 4083 is_gimple_mem_ref_addr, NULL_TREE);
4b84d9b8 4084 gimple_seq_add_seq_without_update (&seq, this_seq);
245f6de1
JJ
4085
4086 tree load_addr[2] = { NULL_TREE, NULL_TREE };
4087 gimple_seq load_seq[2] = { NULL, NULL };
4088 gimple_stmt_iterator load_gsi[2] = { gsi_none (), gsi_none () };
4089 for (int j = 0; j < 2; ++j)
4090 {
4091 store_operand_info &op = group->stores[0]->ops[j];
4092 if (op.base_addr == NULL_TREE)
4093 continue;
4094
4095 store_immediate_info *infol = group->stores.last ();
4096 if (gimple_vuse (op.stmt) == gimple_vuse (infol->ops[j].stmt))
4097 {
97031af7
JJ
4098 /* We can't pick the location randomly; while we've verified
4099 all the loads have the same vuse, they can be still in different
4100 basic blocks and we need to pick the one from the last bb:
4101 int x = q[0];
4102 if (x == N) return;
4103 int y = q[1];
4104 p[0] = x;
4105 p[1] = y;
4106 otherwise if we put the wider load at the q[0] load, we might
4107 segfault if q[1] is not mapped. */
4108 basic_block bb = gimple_bb (op.stmt);
4109 gimple *ostmt = op.stmt;
4110 store_immediate_info *info;
4111 FOR_EACH_VEC_ELT (group->stores, i, info)
4112 {
4113 gimple *tstmt = info->ops[j].stmt;
4114 basic_block tbb = gimple_bb (tstmt);
4115 if (dominated_by_p (CDI_DOMINATORS, tbb, bb))
4116 {
4117 ostmt = tstmt;
4118 bb = tbb;
4119 }
4120 }
4121 load_gsi[j] = gsi_for_stmt (ostmt);
245f6de1
JJ
4122 load_addr[j]
4123 = force_gimple_operand_1 (unshare_expr (op.base_addr),
4124 &load_seq[j], is_gimple_mem_ref_addr,
4125 NULL_TREE);
4126 }
4127 else if (operand_equal_p (base_addr, op.base_addr, 0))
4128 load_addr[j] = addr;
4129 else
3e2927a1 4130 {
3e2927a1
JJ
4131 load_addr[j]
4132 = force_gimple_operand_1 (unshare_expr (op.base_addr),
4133 &this_seq, is_gimple_mem_ref_addr,
4134 NULL_TREE);
4135 gimple_seq_add_seq_without_update (&seq, this_seq);
4136 }
245f6de1
JJ
4137 }
4138
f663d9ad
KT
4139 FOR_EACH_VEC_ELT (split_stores, i, split_store)
4140 {
e362a897
EB
4141 const unsigned HOST_WIDE_INT try_size = split_store->size;
4142 const unsigned HOST_WIDE_INT try_pos = split_store->bytepos;
4143 const unsigned HOST_WIDE_INT try_bitpos = try_pos * BITS_PER_UNIT;
4144 const unsigned HOST_WIDE_INT try_align = split_store->align;
4145 const unsigned HOST_WIDE_INT try_offset = try_pos - start_byte_pos;
a62b3dc5
JJ
4146 tree dest, src;
4147 location_t loc;
e362a897 4148
a62b3dc5
JJ
4149 if (split_store->orig)
4150 {
5384a802
JJ
4151 /* If there is just a single non-clobber constituent store
4152 which covers the whole area, just reuse the lhs and rhs. */
4153 gimple *orig_stmt = NULL;
4154 store_immediate_info *store;
4155 unsigned int j;
4156 FOR_EACH_VEC_ELT (split_store->orig_stores, j, store)
4157 if (!gimple_clobber_p (store->stmt))
4158 {
4159 orig_stmt = store->stmt;
4160 break;
4161 }
245f6de1
JJ
4162 dest = gimple_assign_lhs (orig_stmt);
4163 src = gimple_assign_rhs1 (orig_stmt);
4164 loc = gimple_location (orig_stmt);
a62b3dc5
JJ
4165 }
4166 else
4167 {
245f6de1
JJ
4168 store_immediate_info *info;
4169 unsigned short clique, base;
4170 unsigned int k;
4171 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4172 orig_stmts.safe_push (info->stmt);
a62b3dc5 4173 tree offset_type
245f6de1 4174 = get_alias_type_for_stmts (orig_stmts, false, &clique, &base);
e362a897 4175 tree dest_type;
245f6de1
JJ
4176 loc = get_location_for_stmts (orig_stmts);
4177 orig_stmts.truncate (0);
a62b3dc5 4178
e362a897
EB
4179 if (group->string_concatenation)
4180 dest_type
4181 = build_array_type_nelts (char_type_node,
4182 try_size / BITS_PER_UNIT);
4183 else
4184 {
4185 dest_type = build_nonstandard_integer_type (try_size, UNSIGNED);
4186 dest_type = build_aligned_type (dest_type, try_align);
4187 }
4188 dest = fold_build2 (MEM_REF, dest_type, addr,
a62b3dc5 4189 build_int_cst (offset_type, try_pos));
245f6de1
JJ
4190 if (TREE_CODE (dest) == MEM_REF)
4191 {
4192 MR_DEPENDENCE_CLIQUE (dest) = clique;
4193 MR_DEPENDENCE_BASE (dest) = base;
4194 }
4195
c94c3532 4196 tree mask;
e362a897 4197 if (bswap_res || group->string_concatenation)
c94c3532
EB
4198 mask = integer_zero_node;
4199 else
e362a897
EB
4200 mask = native_interpret_expr (dest_type,
4201 group->mask + try_offset,
4b84d9b8 4202 group->buf_size);
245f6de1
JJ
4203
4204 tree ops[2];
4205 for (int j = 0;
4206 j < 1 + (split_store->orig_stores[0]->ops[1].val != NULL_TREE);
4207 ++j)
4208 {
4209 store_operand_info &op = split_store->orig_stores[0]->ops[j];
4b84d9b8
JJ
4210 if (bswap_res)
4211 ops[j] = bswap_res;
e362a897
EB
4212 else if (group->string_concatenation)
4213 {
4214 ops[j] = build_string (try_size / BITS_PER_UNIT,
4215 (const char *) group->val + try_offset);
4216 TREE_TYPE (ops[j]) = dest_type;
4217 }
4b84d9b8 4218 else if (op.base_addr)
245f6de1
JJ
4219 {
4220 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4221 orig_stmts.safe_push (info->ops[j].stmt);
4222
4223 offset_type = get_alias_type_for_stmts (orig_stmts, true,
4224 &clique, &base);
4225 location_t load_loc = get_location_for_stmts (orig_stmts);
4226 orig_stmts.truncate (0);
4227
4228 unsigned HOST_WIDE_INT load_align = group->load_align[j];
4229 unsigned HOST_WIDE_INT align_bitpos
c94c3532 4230 = known_alignment (try_bitpos
8a91d545
RS
4231 - split_store->orig_stores[0]->bitpos
4232 + op.bitpos);
4233 if (align_bitpos & (load_align - 1))
245f6de1
JJ
4234 load_align = least_bit_hwi (align_bitpos);
4235
4236 tree load_int_type
4237 = build_nonstandard_integer_type (try_size, UNSIGNED);
4238 load_int_type
4239 = build_aligned_type (load_int_type, load_align);
4240
8a91d545 4241 poly_uint64 load_pos
c94c3532 4242 = exact_div (try_bitpos
8a91d545
RS
4243 - split_store->orig_stores[0]->bitpos
4244 + op.bitpos,
4245 BITS_PER_UNIT);
245f6de1
JJ
4246 ops[j] = fold_build2 (MEM_REF, load_int_type, load_addr[j],
4247 build_int_cst (offset_type, load_pos));
4248 if (TREE_CODE (ops[j]) == MEM_REF)
4249 {
4250 MR_DEPENDENCE_CLIQUE (ops[j]) = clique;
4251 MR_DEPENDENCE_BASE (ops[j]) = base;
4252 }
4253 if (!integer_zerop (mask))
4254 /* The load might load some bits (that will be masked off
4255 later on) uninitialized, avoid -W*uninitialized
4256 warnings in that case. */
4257 TREE_NO_WARNING (ops[j]) = 1;
4258
e362a897 4259 stmt = gimple_build_assign (make_ssa_name (dest_type), ops[j]);
245f6de1
JJ
4260 gimple_set_location (stmt, load_loc);
4261 if (gsi_bb (load_gsi[j]))
4262 {
4263 gimple_set_vuse (stmt, gimple_vuse (op.stmt));
4264 gimple_seq_add_stmt_without_update (&load_seq[j], stmt);
4265 }
4266 else
4267 {
4268 gimple_set_vuse (stmt, new_vuse);
4269 gimple_seq_add_stmt_without_update (&seq, stmt);
4270 }
4271 ops[j] = gimple_assign_lhs (stmt);
a6fbd154
JJ
4272 tree xor_mask;
4273 enum tree_code inv_op
e362a897 4274 = invert_op (split_store, j, dest_type, xor_mask);
a6fbd154 4275 if (inv_op != NOP_EXPR)
383ac8dc 4276 {
e362a897 4277 stmt = gimple_build_assign (make_ssa_name (dest_type),
a6fbd154 4278 inv_op, ops[j], xor_mask);
383ac8dc
JJ
4279 gimple_set_location (stmt, load_loc);
4280 ops[j] = gimple_assign_lhs (stmt);
4281
4282 if (gsi_bb (load_gsi[j]))
4283 gimple_seq_add_stmt_without_update (&load_seq[j],
4284 stmt);
4285 else
4286 gimple_seq_add_stmt_without_update (&seq, stmt);
4287 }
245f6de1
JJ
4288 }
4289 else
e362a897
EB
4290 ops[j] = native_interpret_expr (dest_type,
4291 group->val + try_offset,
245f6de1
JJ
4292 group->buf_size);
4293 }
4294
4295 switch (split_store->orig_stores[0]->rhs_code)
4296 {
4297 case BIT_AND_EXPR:
4298 case BIT_IOR_EXPR:
4299 case BIT_XOR_EXPR:
4300 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4301 {
4302 tree rhs1 = gimple_assign_rhs1 (info->stmt);
4303 orig_stmts.safe_push (SSA_NAME_DEF_STMT (rhs1));
4304 }
4305 location_t bit_loc;
4306 bit_loc = get_location_for_stmts (orig_stmts);
4307 orig_stmts.truncate (0);
4308
4309 stmt
e362a897 4310 = gimple_build_assign (make_ssa_name (dest_type),
245f6de1
JJ
4311 split_store->orig_stores[0]->rhs_code,
4312 ops[0], ops[1]);
4313 gimple_set_location (stmt, bit_loc);
4314 /* If there is just one load and there is a separate
4315 load_seq[0], emit the bitwise op right after it. */
4316 if (load_addr[1] == NULL_TREE && gsi_bb (load_gsi[0]))
4317 gimple_seq_add_stmt_without_update (&load_seq[0], stmt);
4318 /* Otherwise, if at least one load is in seq, we need to
4319 emit the bitwise op right before the store. If there
4320 are two loads and are emitted somewhere else, it would
4321 be better to emit the bitwise op as early as possible;
4322 we don't track where that would be possible right now
4323 though. */
4324 else
4325 gimple_seq_add_stmt_without_update (&seq, stmt);
4326 src = gimple_assign_lhs (stmt);
a6fbd154
JJ
4327 tree xor_mask;
4328 enum tree_code inv_op;
e362a897 4329 inv_op = invert_op (split_store, 2, dest_type, xor_mask);
a6fbd154 4330 if (inv_op != NOP_EXPR)
d60edaba 4331 {
e362a897 4332 stmt = gimple_build_assign (make_ssa_name (dest_type),
a6fbd154 4333 inv_op, src, xor_mask);
d60edaba
JJ
4334 gimple_set_location (stmt, bit_loc);
4335 if (load_addr[1] == NULL_TREE && gsi_bb (load_gsi[0]))
4336 gimple_seq_add_stmt_without_update (&load_seq[0], stmt);
4337 else
4338 gimple_seq_add_stmt_without_update (&seq, stmt);
4339 src = gimple_assign_lhs (stmt);
4340 }
245f6de1 4341 break;
4b84d9b8
JJ
4342 case LROTATE_EXPR:
4343 case NOP_EXPR:
4344 src = ops[0];
4345 if (!is_gimple_val (src))
4346 {
4347 stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (src)),
4348 src);
4349 gimple_seq_add_stmt_without_update (&seq, stmt);
4350 src = gimple_assign_lhs (stmt);
4351 }
e362a897 4352 if (!useless_type_conversion_p (dest_type, TREE_TYPE (src)))
4b84d9b8 4353 {
e362a897 4354 stmt = gimple_build_assign (make_ssa_name (dest_type),
4b84d9b8
JJ
4355 NOP_EXPR, src);
4356 gimple_seq_add_stmt_without_update (&seq, stmt);
4357 src = gimple_assign_lhs (stmt);
4358 }
e362a897 4359 inv_op = invert_op (split_store, 2, dest_type, xor_mask);
be52ac73
JJ
4360 if (inv_op != NOP_EXPR)
4361 {
e362a897 4362 stmt = gimple_build_assign (make_ssa_name (dest_type),
be52ac73
JJ
4363 inv_op, src, xor_mask);
4364 gimple_set_location (stmt, loc);
4365 gimple_seq_add_stmt_without_update (&seq, stmt);
4366 src = gimple_assign_lhs (stmt);
4367 }
4b84d9b8 4368 break;
245f6de1
JJ
4369 default:
4370 src = ops[0];
4371 break;
4372 }
4373
c94c3532
EB
4374 /* If bit insertion is required, we use the source as an accumulator
4375 into which the successive bit-field values are manually inserted.
4376 FIXME: perhaps use BIT_INSERT_EXPR instead in some cases? */
4377 if (group->bit_insertion)
4378 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4379 if (info->rhs_code == BIT_INSERT_EXPR
4380 && info->bitpos < try_bitpos + try_size
4381 && info->bitpos + info->bitsize > try_bitpos)
4382 {
4383 /* Mask, truncate, convert to final type, shift and ior into
4384 the accumulator. Note that every step can be a no-op. */
4385 const HOST_WIDE_INT start_gap = info->bitpos - try_bitpos;
4386 const HOST_WIDE_INT end_gap
4387 = (try_bitpos + try_size) - (info->bitpos + info->bitsize);
4388 tree tem = info->ops[0].val;
ed01d707
EB
4389 if (!INTEGRAL_TYPE_P (TREE_TYPE (tem)))
4390 {
4391 const unsigned HOST_WIDE_INT size
4392 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (tem)));
4393 tree integer_type
4394 = build_nonstandard_integer_type (size, UNSIGNED);
4395 tem = gimple_build (&seq, loc, VIEW_CONVERT_EXPR,
4396 integer_type, tem);
4397 }
c14add82
EB
4398 if (TYPE_PRECISION (TREE_TYPE (tem)) <= info->bitsize)
4399 {
4400 tree bitfield_type
4401 = build_nonstandard_integer_type (info->bitsize,
4402 UNSIGNED);
4403 tem = gimple_convert (&seq, loc, bitfield_type, tem);
4404 }
4405 else if ((BYTES_BIG_ENDIAN ? start_gap : end_gap) > 0)
c94c3532
EB
4406 {
4407 const unsigned HOST_WIDE_INT imask
4408 = (HOST_WIDE_INT_1U << info->bitsize) - 1;
4409 tem = gimple_build (&seq, loc,
4410 BIT_AND_EXPR, TREE_TYPE (tem), tem,
4411 build_int_cst (TREE_TYPE (tem),
4412 imask));
4413 }
4414 const HOST_WIDE_INT shift
4415 = (BYTES_BIG_ENDIAN ? end_gap : start_gap);
4416 if (shift < 0)
4417 tem = gimple_build (&seq, loc,
4418 RSHIFT_EXPR, TREE_TYPE (tem), tem,
4419 build_int_cst (NULL_TREE, -shift));
e362a897 4420 tem = gimple_convert (&seq, loc, dest_type, tem);
c94c3532
EB
4421 if (shift > 0)
4422 tem = gimple_build (&seq, loc,
e362a897 4423 LSHIFT_EXPR, dest_type, tem,
c94c3532
EB
4424 build_int_cst (NULL_TREE, shift));
4425 src = gimple_build (&seq, loc,
e362a897 4426 BIT_IOR_EXPR, dest_type, tem, src);
c94c3532
EB
4427 }
4428
a62b3dc5
JJ
4429 if (!integer_zerop (mask))
4430 {
e362a897 4431 tree tem = make_ssa_name (dest_type);
a62b3dc5
JJ
4432 tree load_src = unshare_expr (dest);
4433 /* The load might load some or all bits uninitialized,
4434 avoid -W*uninitialized warnings in that case.
4435 As optimization, it would be nice if all the bits are
4436 provably uninitialized (no stores at all yet or previous
4437 store a CLOBBER) we'd optimize away the load and replace
4438 it e.g. with 0. */
4439 TREE_NO_WARNING (load_src) = 1;
4440 stmt = gimple_build_assign (tem, load_src);
4441 gimple_set_location (stmt, loc);
4442 gimple_set_vuse (stmt, new_vuse);
4443 gimple_seq_add_stmt_without_update (&seq, stmt);
4444
4445 /* FIXME: If there is a single chunk of zero bits in mask,
4446 perhaps use BIT_INSERT_EXPR instead? */
e362a897 4447 stmt = gimple_build_assign (make_ssa_name (dest_type),
a62b3dc5
JJ
4448 BIT_AND_EXPR, tem, mask);
4449 gimple_set_location (stmt, loc);
4450 gimple_seq_add_stmt_without_update (&seq, stmt);
4451 tem = gimple_assign_lhs (stmt);
4452
245f6de1 4453 if (TREE_CODE (src) == INTEGER_CST)
e362a897 4454 src = wide_int_to_tree (dest_type,
245f6de1
JJ
4455 wi::bit_and_not (wi::to_wide (src),
4456 wi::to_wide (mask)));
4457 else
4458 {
4459 tree nmask
e362a897 4460 = wide_int_to_tree (dest_type,
245f6de1 4461 wi::bit_not (wi::to_wide (mask)));
e362a897 4462 stmt = gimple_build_assign (make_ssa_name (dest_type),
245f6de1
JJ
4463 BIT_AND_EXPR, src, nmask);
4464 gimple_set_location (stmt, loc);
4465 gimple_seq_add_stmt_without_update (&seq, stmt);
4466 src = gimple_assign_lhs (stmt);
4467 }
e362a897 4468 stmt = gimple_build_assign (make_ssa_name (dest_type),
a62b3dc5
JJ
4469 BIT_IOR_EXPR, tem, src);
4470 gimple_set_location (stmt, loc);
4471 gimple_seq_add_stmt_without_update (&seq, stmt);
4472 src = gimple_assign_lhs (stmt);
4473 }
4474 }
f663d9ad
KT
4475
4476 stmt = gimple_build_assign (dest, src);
4477 gimple_set_location (stmt, loc);
4478 gimple_set_vuse (stmt, new_vuse);
4479 gimple_seq_add_stmt_without_update (&seq, stmt);
4480
629387a6
EB
4481 if (group->lp_nr && stmt_could_throw_p (cfun, stmt))
4482 add_stmt_to_eh_lp (stmt, group->lp_nr);
4483
f663d9ad
KT
4484 tree new_vdef;
4485 if (i < split_stores.length () - 1)
a62b3dc5 4486 new_vdef = make_ssa_name (gimple_vop (cfun), stmt);
f663d9ad
KT
4487 else
4488 new_vdef = last_vdef;
4489
4490 gimple_set_vdef (stmt, new_vdef);
4491 SSA_NAME_DEF_STMT (new_vdef) = stmt;
4492 new_vuse = new_vdef;
4493 }
4494
4495 FOR_EACH_VEC_ELT (split_stores, i, split_store)
4496 delete split_store;
4497
f663d9ad
KT
4498 gcc_assert (seq);
4499 if (dump_file)
4500 {
4501 fprintf (dump_file,
c94c3532 4502 "New sequence of %u stores to replace old one of %u stores\n",
a62b3dc5 4503 split_stores.length (), orig_num_stmts);
f663d9ad
KT
4504 if (dump_flags & TDF_DETAILS)
4505 print_gimple_seq (dump_file, seq, 0, TDF_VOPS | TDF_MEMSYMS);
4506 }
629387a6 4507
5384a802
JJ
4508 if (gimple_clobber_p (group->last_stmt))
4509 update_stmt (group->last_stmt);
4510
629387a6
EB
4511 if (group->lp_nr > 0)
4512 {
4513 /* We're going to insert a sequence of (potentially) throwing stores
4514 into an active EH region. This means that we're going to create
4515 new basic blocks with EH edges pointing to the post landing pad
4516 and, therefore, to have to update its PHI nodes, if any. For the
4517 virtual PHI node, we're going to use the VDEFs created above, but
4518 for the other nodes, we need to record the original reaching defs. */
4519 eh_landing_pad lp = get_eh_landing_pad_from_number (group->lp_nr);
4520 basic_block lp_bb = label_to_block (cfun, lp->post_landing_pad);
4521 basic_block last_bb = gimple_bb (group->last_stmt);
4522 edge last_edge = find_edge (last_bb, lp_bb);
4523 auto_vec<tree, 16> last_defs;
4524 gphi_iterator gpi;
4525 for (gpi = gsi_start_phis (lp_bb); !gsi_end_p (gpi); gsi_next (&gpi))
4526 {
4527 gphi *phi = gpi.phi ();
4528 tree last_def;
4529 if (virtual_operand_p (gimple_phi_result (phi)))
4530 last_def = NULL_TREE;
4531 else
4532 last_def = gimple_phi_arg_def (phi, last_edge->dest_idx);
4533 last_defs.safe_push (last_def);
4534 }
4535
4536 /* Do the insertion. Then, if new basic blocks have been created in the
4537 process, rewind the chain of VDEFs create above to walk the new basic
4538 blocks and update the corresponding arguments of the PHI nodes. */
4539 update_modified_stmts (seq);
4540 if (gimple_find_sub_bbs (seq, &last_gsi))
4541 while (last_vdef != gimple_vuse (group->last_stmt))
4542 {
4543 gimple *stmt = SSA_NAME_DEF_STMT (last_vdef);
4544 if (stmt_could_throw_p (cfun, stmt))
4545 {
4546 edge new_edge = find_edge (gimple_bb (stmt), lp_bb);
4547 unsigned int i;
4548 for (gpi = gsi_start_phis (lp_bb), i = 0;
4549 !gsi_end_p (gpi);
4550 gsi_next (&gpi), i++)
4551 {
4552 gphi *phi = gpi.phi ();
4553 tree new_def;
4554 if (virtual_operand_p (gimple_phi_result (phi)))
4555 new_def = last_vdef;
4556 else
4557 new_def = last_defs[i];
4558 add_phi_arg (phi, new_def, new_edge, UNKNOWN_LOCATION);
4559 }
4560 }
4561 last_vdef = gimple_vuse (stmt);
4562 }
4563 }
4564 else
4565 gsi_insert_seq_after (&last_gsi, seq, GSI_SAME_STMT);
4566
245f6de1
JJ
4567 for (int j = 0; j < 2; ++j)
4568 if (load_seq[j])
4569 gsi_insert_seq_after (&load_gsi[j], load_seq[j], GSI_SAME_STMT);
f663d9ad
KT
4570
4571 return true;
4572}
4573
4574/* Process the merged_store_group objects created in the coalescing phase.
4575 The stores are all against the base object BASE.
4576 Try to output the widened stores and delete the original statements if
4577 successful. Return true iff any changes were made. */
4578
4579bool
b5926e23 4580imm_store_chain_info::output_merged_stores ()
f663d9ad
KT
4581{
4582 unsigned int i;
4583 merged_store_group *merged_store;
4584 bool ret = false;
4585 FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_store)
4586 {
a95b474a
ML
4587 if (dbg_cnt (store_merging)
4588 && output_merged_store (merged_store))
f663d9ad
KT
4589 {
4590 unsigned int j;
4591 store_immediate_info *store;
4592 FOR_EACH_VEC_ELT (merged_store->stores, j, store)
4593 {
4594 gimple *stmt = store->stmt;
4595 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5384a802
JJ
4596 /* Don't remove clobbers, they are still useful even if
4597 everything is overwritten afterwards. */
4598 if (gimple_clobber_p (stmt))
4599 continue;
f663d9ad 4600 gsi_remove (&gsi, true);
629387a6
EB
4601 if (store->lp_nr)
4602 remove_stmt_from_eh_lp (stmt);
f663d9ad
KT
4603 if (stmt != merged_store->last_stmt)
4604 {
4605 unlink_stmt_vdef (stmt);
4606 release_defs (stmt);
4607 }
4608 }
4609 ret = true;
4610 }
4611 }
4612 if (ret && dump_file)
4613 fprintf (dump_file, "Merging successful!\n");
4614
4615 return ret;
4616}
4617
4618/* Coalesce the store_immediate_info objects recorded against the base object
4619 BASE in the first phase and output them.
4620 Delete the allocated structures.
4621 Return true if any changes were made. */
4622
4623bool
b5926e23 4624imm_store_chain_info::terminate_and_process_chain ()
f663d9ad
KT
4625{
4626 /* Process store chain. */
4627 bool ret = false;
4628 if (m_store_info.length () > 1)
4629 {
4630 ret = coalesce_immediate_stores ();
4631 if (ret)
b5926e23 4632 ret = output_merged_stores ();
f663d9ad
KT
4633 }
4634
4635 /* Delete all the entries we allocated ourselves. */
4636 store_immediate_info *info;
4637 unsigned int i;
4638 FOR_EACH_VEC_ELT (m_store_info, i, info)
4639 delete info;
4640
4641 merged_store_group *merged_info;
4642 FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_info)
4643 delete merged_info;
4644
4645 return ret;
4646}
4647
4648/* Return true iff LHS is a destination potentially interesting for
4649 store merging. In practice these are the codes that get_inner_reference
4650 can process. */
4651
4652static bool
4653lhs_valid_for_store_merging_p (tree lhs)
4654{
629387a6 4655 if (DECL_P (lhs))
f663d9ad
KT
4656 return true;
4657
629387a6
EB
4658 switch (TREE_CODE (lhs))
4659 {
4660 case ARRAY_REF:
4661 case ARRAY_RANGE_REF:
4662 case BIT_FIELD_REF:
4663 case COMPONENT_REF:
4664 case MEM_REF:
e362a897 4665 case VIEW_CONVERT_EXPR:
629387a6
EB
4666 return true;
4667 default:
4668 return false;
4669 }
4670
4671 gcc_unreachable ();
f663d9ad
KT
4672}
4673
4674/* Return true if the tree RHS is a constant we want to consider
4675 during store merging. In practice accept all codes that
4676 native_encode_expr accepts. */
4677
4678static bool
4679rhs_valid_for_store_merging_p (tree rhs)
4680{
cf098191 4681 unsigned HOST_WIDE_INT size;
3afd514b 4682 if (TREE_CODE (rhs) == CONSTRUCTOR
3afd514b
JJ
4683 && CONSTRUCTOR_NELTS (rhs) == 0
4684 && TYPE_SIZE_UNIT (TREE_TYPE (rhs))
4685 && tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (rhs))))
4686 return true;
cf098191
RS
4687 return (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (rhs))).is_constant (&size)
4688 && native_encode_expr (rhs, NULL, size) != 0);
f663d9ad
KT
4689}
4690
629387a6
EB
4691/* Adjust *PBITPOS, *PBITREGION_START and *PBITREGION_END by BYTE_OFF bytes
4692 and return true on success or false on failure. */
4693
4694static bool
4695adjust_bit_pos (poly_offset_int byte_off,
4696 poly_int64 *pbitpos,
4697 poly_uint64 *pbitregion_start,
4698 poly_uint64 *pbitregion_end)
4699{
4700 poly_offset_int bit_off = byte_off << LOG2_BITS_PER_UNIT;
4701 bit_off += *pbitpos;
4702
4703 if (known_ge (bit_off, 0) && bit_off.to_shwi (pbitpos))
4704 {
4705 if (maybe_ne (*pbitregion_end, 0U))
4706 {
4707 bit_off = byte_off << LOG2_BITS_PER_UNIT;
4708 bit_off += *pbitregion_start;
4709 if (bit_off.to_uhwi (pbitregion_start))
4710 {
4711 bit_off = byte_off << LOG2_BITS_PER_UNIT;
4712 bit_off += *pbitregion_end;
4713 if (!bit_off.to_uhwi (pbitregion_end))
4714 *pbitregion_end = 0;
4715 }
4716 else
4717 *pbitregion_end = 0;
4718 }
4719 return true;
4720 }
4721 else
4722 return false;
4723}
4724
245f6de1
JJ
4725/* If MEM is a memory reference usable for store merging (either as
4726 store destination or for loads), return the non-NULL base_addr
4727 and set *PBITSIZE, *PBITPOS, *PBITREGION_START and *PBITREGION_END.
4728 Otherwise return NULL, *PBITPOS should be still valid even for that
4729 case. */
4730
4731static tree
8a91d545
RS
4732mem_valid_for_store_merging (tree mem, poly_uint64 *pbitsize,
4733 poly_uint64 *pbitpos,
4734 poly_uint64 *pbitregion_start,
4735 poly_uint64 *pbitregion_end)
245f6de1 4736{
8a91d545
RS
4737 poly_int64 bitsize, bitpos;
4738 poly_uint64 bitregion_start = 0, bitregion_end = 0;
245f6de1
JJ
4739 machine_mode mode;
4740 int unsignedp = 0, reversep = 0, volatilep = 0;
4741 tree offset;
4742 tree base_addr = get_inner_reference (mem, &bitsize, &bitpos, &offset, &mode,
4743 &unsignedp, &reversep, &volatilep);
4744 *pbitsize = bitsize;
8a91d545 4745 if (known_eq (bitsize, 0))
245f6de1
JJ
4746 return NULL_TREE;
4747
4748 if (TREE_CODE (mem) == COMPONENT_REF
4749 && DECL_BIT_FIELD_TYPE (TREE_OPERAND (mem, 1)))
4750 {
4751 get_bit_range (&bitregion_start, &bitregion_end, mem, &bitpos, &offset);
8a91d545
RS
4752 if (maybe_ne (bitregion_end, 0U))
4753 bitregion_end += 1;
245f6de1
JJ
4754 }
4755
4756 if (reversep)
4757 return NULL_TREE;
4758
4759 /* We do not want to rewrite TARGET_MEM_REFs. */
4760 if (TREE_CODE (base_addr) == TARGET_MEM_REF)
4761 return NULL_TREE;
4762 /* In some cases get_inner_reference may return a
4763 MEM_REF [ptr + byteoffset]. For the purposes of this pass
4764 canonicalize the base_addr to MEM_REF [ptr] and take
4765 byteoffset into account in the bitpos. This occurs in
4766 PR 23684 and this way we can catch more chains. */
4767 else if (TREE_CODE (base_addr) == MEM_REF)
4768 {
629387a6
EB
4769 if (!adjust_bit_pos (mem_ref_offset (base_addr), &bitpos,
4770 &bitregion_start, &bitregion_end))
245f6de1
JJ
4771 return NULL_TREE;
4772 base_addr = TREE_OPERAND (base_addr, 0);
4773 }
4774 /* get_inner_reference returns the base object, get at its
4775 address now. */
4776 else
4777 {
8a91d545 4778 if (maybe_lt (bitpos, 0))
245f6de1
JJ
4779 return NULL_TREE;
4780 base_addr = build_fold_addr_expr (base_addr);
4781 }
4782
629387a6 4783 if (offset)
245f6de1
JJ
4784 {
4785 /* If the access is variable offset then a base decl has to be
4786 address-taken to be able to emit pointer-based stores to it.
4787 ??? We might be able to get away with re-using the original
4788 base up to the first variable part and then wrapping that inside
4789 a BIT_FIELD_REF. */
4790 tree base = get_base_address (base_addr);
629387a6 4791 if (!base || (DECL_P (base) && !TREE_ADDRESSABLE (base)))
245f6de1
JJ
4792 return NULL_TREE;
4793
629387a6
EB
4794 /* Similarly to above for the base, remove constant from the offset. */
4795 if (TREE_CODE (offset) == PLUS_EXPR
4796 && TREE_CODE (TREE_OPERAND (offset, 1)) == INTEGER_CST
4797 && adjust_bit_pos (wi::to_poly_offset (TREE_OPERAND (offset, 1)),
4798 &bitpos, &bitregion_start, &bitregion_end))
4799 offset = TREE_OPERAND (offset, 0);
4800
245f6de1
JJ
4801 base_addr = build2 (POINTER_PLUS_EXPR, TREE_TYPE (base_addr),
4802 base_addr, offset);
4803 }
4804
629387a6
EB
4805 if (known_eq (bitregion_end, 0U))
4806 {
4807 bitregion_start = round_down_to_byte_boundary (bitpos);
4808 bitregion_end = round_up_to_byte_boundary (bitpos + bitsize);
4809 }
4810
245f6de1
JJ
4811 *pbitsize = bitsize;
4812 *pbitpos = bitpos;
4813 *pbitregion_start = bitregion_start;
4814 *pbitregion_end = bitregion_end;
4815 return base_addr;
4816}
4817
4818/* Return true if STMT is a load that can be used for store merging.
4819 In that case fill in *OP. BITSIZE, BITPOS, BITREGION_START and
4820 BITREGION_END are properties of the corresponding store. */
4821
4822static bool
4823handled_load (gimple *stmt, store_operand_info *op,
8a91d545
RS
4824 poly_uint64 bitsize, poly_uint64 bitpos,
4825 poly_uint64 bitregion_start, poly_uint64 bitregion_end)
245f6de1 4826{
383ac8dc 4827 if (!is_gimple_assign (stmt))
245f6de1 4828 return false;
383ac8dc
JJ
4829 if (gimple_assign_rhs_code (stmt) == BIT_NOT_EXPR)
4830 {
4831 tree rhs1 = gimple_assign_rhs1 (stmt);
4832 if (TREE_CODE (rhs1) == SSA_NAME
383ac8dc
JJ
4833 && handled_load (SSA_NAME_DEF_STMT (rhs1), op, bitsize, bitpos,
4834 bitregion_start, bitregion_end))
4835 {
d60edaba
JJ
4836 /* Don't allow _1 = load; _2 = ~1; _3 = ~_2; which should have
4837 been optimized earlier, but if allowed here, would confuse the
4838 multiple uses counting. */
4839 if (op->bit_not_p)
4840 return false;
383ac8dc
JJ
4841 op->bit_not_p = !op->bit_not_p;
4842 return true;
4843 }
4844 return false;
4845 }
4846 if (gimple_vuse (stmt)
4847 && gimple_assign_load_p (stmt)
36bbc05d 4848 && !stmt_can_throw_internal (cfun, stmt)
245f6de1
JJ
4849 && !gimple_has_volatile_ops (stmt))
4850 {
4851 tree mem = gimple_assign_rhs1 (stmt);
4852 op->base_addr
4853 = mem_valid_for_store_merging (mem, &op->bitsize, &op->bitpos,
4854 &op->bitregion_start,
4855 &op->bitregion_end);
4856 if (op->base_addr != NULL_TREE
8a91d545
RS
4857 && known_eq (op->bitsize, bitsize)
4858 && multiple_p (op->bitpos - bitpos, BITS_PER_UNIT)
4859 && known_ge (op->bitpos - op->bitregion_start,
4860 bitpos - bitregion_start)
4861 && known_ge (op->bitregion_end - op->bitpos,
4862 bitregion_end - bitpos))
245f6de1
JJ
4863 {
4864 op->stmt = stmt;
4865 op->val = mem;
383ac8dc 4866 op->bit_not_p = false;
245f6de1
JJ
4867 return true;
4868 }
4869 }
4870 return false;
4871}
4872
629387a6
EB
4873/* Return the index number of the landing pad for STMT, if any. */
4874
4875static int
4876lp_nr_for_store (gimple *stmt)
4877{
4878 if (!cfun->can_throw_non_call_exceptions || !cfun->eh)
4879 return 0;
4880
4881 if (!stmt_could_throw_p (cfun, stmt))
4882 return 0;
4883
4884 return lookup_stmt_eh_lp (stmt);
4885}
4886
245f6de1 4887/* Record the store STMT for store merging optimization if it can be
629387a6 4888 optimized. Return true if any changes were made. */
245f6de1 4889
629387a6 4890bool
245f6de1
JJ
4891pass_store_merging::process_store (gimple *stmt)
4892{
4893 tree lhs = gimple_assign_lhs (stmt);
4894 tree rhs = gimple_assign_rhs1 (stmt);
2c832ffe
SSF
4895 poly_uint64 bitsize, bitpos = 0;
4896 poly_uint64 bitregion_start = 0, bitregion_end = 0;
245f6de1
JJ
4897 tree base_addr
4898 = mem_valid_for_store_merging (lhs, &bitsize, &bitpos,
4899 &bitregion_start, &bitregion_end);
8a91d545 4900 if (known_eq (bitsize, 0U))
629387a6 4901 return false;
245f6de1
JJ
4902
4903 bool invalid = (base_addr == NULL_TREE
8a91d545
RS
4904 || (maybe_gt (bitsize,
4905 (unsigned int) MAX_BITSIZE_MODE_ANY_INT)
3afd514b
JJ
4906 && TREE_CODE (rhs) != INTEGER_CST
4907 && (TREE_CODE (rhs) != CONSTRUCTOR
4908 || CONSTRUCTOR_NELTS (rhs) != 0)));
245f6de1 4909 enum tree_code rhs_code = ERROR_MARK;
d60edaba 4910 bool bit_not_p = false;
4b84d9b8
JJ
4911 struct symbolic_number n;
4912 gimple *ins_stmt = NULL;
245f6de1
JJ
4913 store_operand_info ops[2];
4914 if (invalid)
4915 ;
e362a897
EB
4916 else if (TREE_CODE (rhs) == STRING_CST)
4917 {
4918 rhs_code = STRING_CST;
4919 ops[0].val = rhs;
4920 }
245f6de1
JJ
4921 else if (rhs_valid_for_store_merging_p (rhs))
4922 {
4923 rhs_code = INTEGER_CST;
4924 ops[0].val = rhs;
4925 }
e362a897 4926 else if (TREE_CODE (rhs) == SSA_NAME)
245f6de1
JJ
4927 {
4928 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs), *def_stmt1, *def_stmt2;
4929 if (!is_gimple_assign (def_stmt))
4930 invalid = true;
4931 else if (handled_load (def_stmt, &ops[0], bitsize, bitpos,
4932 bitregion_start, bitregion_end))
4933 rhs_code = MEM_REF;
d60edaba
JJ
4934 else if (gimple_assign_rhs_code (def_stmt) == BIT_NOT_EXPR)
4935 {
4936 tree rhs1 = gimple_assign_rhs1 (def_stmt);
4937 if (TREE_CODE (rhs1) == SSA_NAME
4938 && is_gimple_assign (SSA_NAME_DEF_STMT (rhs1)))
4939 {
4940 bit_not_p = true;
4941 def_stmt = SSA_NAME_DEF_STMT (rhs1);
4942 }
4943 }
c94c3532 4944
d60edaba 4945 if (rhs_code == ERROR_MARK && !invalid)
245f6de1
JJ
4946 switch ((rhs_code = gimple_assign_rhs_code (def_stmt)))
4947 {
4948 case BIT_AND_EXPR:
4949 case BIT_IOR_EXPR:
4950 case BIT_XOR_EXPR:
4951 tree rhs1, rhs2;
4952 rhs1 = gimple_assign_rhs1 (def_stmt);
4953 rhs2 = gimple_assign_rhs2 (def_stmt);
4954 invalid = true;
d7a9512e 4955 if (TREE_CODE (rhs1) != SSA_NAME)
245f6de1
JJ
4956 break;
4957 def_stmt1 = SSA_NAME_DEF_STMT (rhs1);
4958 if (!is_gimple_assign (def_stmt1)
4959 || !handled_load (def_stmt1, &ops[0], bitsize, bitpos,
4960 bitregion_start, bitregion_end))
4961 break;
4962 if (rhs_valid_for_store_merging_p (rhs2))
4963 ops[1].val = rhs2;
d7a9512e 4964 else if (TREE_CODE (rhs2) != SSA_NAME)
245f6de1
JJ
4965 break;
4966 else
4967 {
4968 def_stmt2 = SSA_NAME_DEF_STMT (rhs2);
4969 if (!is_gimple_assign (def_stmt2))
4970 break;
4971 else if (!handled_load (def_stmt2, &ops[1], bitsize, bitpos,
4972 bitregion_start, bitregion_end))
4973 break;
4974 }
4975 invalid = false;
4976 break;
4977 default:
4978 invalid = true;
4979 break;
4980 }
c94c3532 4981
8a91d545
RS
4982 unsigned HOST_WIDE_INT const_bitsize;
4983 if (bitsize.is_constant (&const_bitsize)
c94c3532 4984 && (const_bitsize % BITS_PER_UNIT) == 0
8a91d545 4985 && const_bitsize <= 64
c94c3532 4986 && multiple_p (bitpos, BITS_PER_UNIT))
4b84d9b8
JJ
4987 {
4988 ins_stmt = find_bswap_or_nop_1 (def_stmt, &n, 12);
4989 if (ins_stmt)
4990 {
4991 uint64_t nn = n.n;
4992 for (unsigned HOST_WIDE_INT i = 0;
8a91d545
RS
4993 i < const_bitsize;
4994 i += BITS_PER_UNIT, nn >>= BITS_PER_MARKER)
4b84d9b8
JJ
4995 if ((nn & MARKER_MASK) == 0
4996 || (nn & MARKER_MASK) == MARKER_BYTE_UNKNOWN)
4997 {
4998 ins_stmt = NULL;
4999 break;
5000 }
5001 if (ins_stmt)
5002 {
5003 if (invalid)
5004 {
5005 rhs_code = LROTATE_EXPR;
5006 ops[0].base_addr = NULL_TREE;
5007 ops[1].base_addr = NULL_TREE;
5008 }
5009 invalid = false;
5010 }
5011 }
5012 }
c94c3532
EB
5013
5014 if (invalid
5015 && bitsize.is_constant (&const_bitsize)
5016 && ((const_bitsize % BITS_PER_UNIT) != 0
5017 || !multiple_p (bitpos, BITS_PER_UNIT))
ed01d707 5018 && const_bitsize <= MAX_FIXED_MODE_SIZE)
c94c3532 5019 {
c14add82 5020 /* Bypass a conversion to the bit-field type. */
31a5d8c5
EB
5021 if (!bit_not_p
5022 && is_gimple_assign (def_stmt)
5023 && CONVERT_EXPR_CODE_P (rhs_code))
c94c3532
EB
5024 {
5025 tree rhs1 = gimple_assign_rhs1 (def_stmt);
5026 if (TREE_CODE (rhs1) == SSA_NAME
c14add82 5027 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1)))
c94c3532
EB
5028 rhs = rhs1;
5029 }
5030 rhs_code = BIT_INSERT_EXPR;
31a5d8c5 5031 bit_not_p = false;
c94c3532
EB
5032 ops[0].val = rhs;
5033 ops[0].base_addr = NULL_TREE;
5034 ops[1].base_addr = NULL_TREE;
5035 invalid = false;
5036 }
245f6de1 5037 }
e362a897
EB
5038 else
5039 invalid = true;
245f6de1 5040
8a91d545
RS
5041 unsigned HOST_WIDE_INT const_bitsize, const_bitpos;
5042 unsigned HOST_WIDE_INT const_bitregion_start, const_bitregion_end;
5043 if (invalid
5044 || !bitsize.is_constant (&const_bitsize)
5045 || !bitpos.is_constant (&const_bitpos)
5046 || !bitregion_start.is_constant (&const_bitregion_start)
5047 || !bitregion_end.is_constant (&const_bitregion_end))
629387a6 5048 return terminate_all_aliasing_chains (NULL, stmt);
245f6de1 5049
4b84d9b8
JJ
5050 if (!ins_stmt)
5051 memset (&n, 0, sizeof (n));
5052
99b1c316 5053 class imm_store_chain_info **chain_info = NULL;
629387a6 5054 bool ret = false;
383ac8dc
JJ
5055 if (base_addr)
5056 chain_info = m_stores.get (base_addr);
5057
245f6de1
JJ
5058 store_immediate_info *info;
5059 if (chain_info)
5060 {
5061 unsigned int ord = (*chain_info)->m_store_info.length ();
8a91d545
RS
5062 info = new store_immediate_info (const_bitsize, const_bitpos,
5063 const_bitregion_start,
5064 const_bitregion_end,
5065 stmt, ord, rhs_code, n, ins_stmt,
629387a6
EB
5066 bit_not_p, lp_nr_for_store (stmt),
5067 ops[0], ops[1]);
245f6de1
JJ
5068 if (dump_file && (dump_flags & TDF_DETAILS))
5069 {
5070 fprintf (dump_file, "Recording immediate store from stmt:\n");
5071 print_gimple_stmt (dump_file, stmt, 0);
5072 }
5073 (*chain_info)->m_store_info.safe_push (info);
629387a6 5074 ret |= terminate_all_aliasing_chains (chain_info, stmt);
245f6de1
JJ
5075 /* If we reach the limit of stores to merge in a chain terminate and
5076 process the chain now. */
5077 if ((*chain_info)->m_store_info.length ()
028d4092 5078 == (unsigned int) param_max_stores_to_merge)
245f6de1
JJ
5079 {
5080 if (dump_file && (dump_flags & TDF_DETAILS))
5081 fprintf (dump_file,
5082 "Reached maximum number of statements to merge:\n");
629387a6 5083 ret |= terminate_and_process_chain (*chain_info);
245f6de1 5084 }
629387a6 5085 return ret;
245f6de1
JJ
5086 }
5087
5088 /* Store aliases any existing chain? */
629387a6 5089 ret |= terminate_all_aliasing_chains (NULL, stmt);
245f6de1 5090 /* Start a new chain. */
99b1c316 5091 class imm_store_chain_info *new_chain
245f6de1 5092 = new imm_store_chain_info (m_stores_head, base_addr);
8a91d545
RS
5093 info = new store_immediate_info (const_bitsize, const_bitpos,
5094 const_bitregion_start,
5095 const_bitregion_end,
5096 stmt, 0, rhs_code, n, ins_stmt,
629387a6
EB
5097 bit_not_p, lp_nr_for_store (stmt),
5098 ops[0], ops[1]);
245f6de1
JJ
5099 new_chain->m_store_info.safe_push (info);
5100 m_stores.put (base_addr, new_chain);
5101 if (dump_file && (dump_flags & TDF_DETAILS))
5102 {
5103 fprintf (dump_file, "Starting new chain with statement:\n");
5104 print_gimple_stmt (dump_file, stmt, 0);
5105 fprintf (dump_file, "The base object is:\n");
5106 print_generic_expr (dump_file, base_addr);
5107 fprintf (dump_file, "\n");
5108 }
629387a6
EB
5109 return ret;
5110}
5111
5112/* Return true if STMT is a store valid for store merging. */
5113
5114static bool
5115store_valid_for_store_merging_p (gimple *stmt)
5116{
5117 return gimple_assign_single_p (stmt)
5118 && gimple_vdef (stmt)
5119 && lhs_valid_for_store_merging_p (gimple_assign_lhs (stmt))
5384a802 5120 && (!gimple_has_volatile_ops (stmt) || gimple_clobber_p (stmt));
629387a6
EB
5121}
5122
5123enum basic_block_status { BB_INVALID, BB_VALID, BB_EXTENDED_VALID };
5124
5125/* Return the status of basic block BB wrt store merging. */
5126
5127static enum basic_block_status
5128get_status_for_store_merging (basic_block bb)
5129{
5130 unsigned int num_statements = 0;
5131 gimple_stmt_iterator gsi;
5132 edge e;
5133
5134 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5135 {
5136 gimple *stmt = gsi_stmt (gsi);
5137
5138 if (is_gimple_debug (stmt))
5139 continue;
5140
5141 if (store_valid_for_store_merging_p (stmt) && ++num_statements >= 2)
5142 break;
5143 }
5144
5145 if (num_statements == 0)
5146 return BB_INVALID;
5147
5148 if (cfun->can_throw_non_call_exceptions && cfun->eh
5149 && store_valid_for_store_merging_p (gimple_seq_last_stmt (bb_seq (bb)))
5150 && (e = find_fallthru_edge (bb->succs))
5151 && e->dest == bb->next_bb)
5152 return BB_EXTENDED_VALID;
5153
5154 return num_statements >= 2 ? BB_VALID : BB_INVALID;
245f6de1
JJ
5155}
5156
f663d9ad 5157/* Entry point for the pass. Go over each basic block recording chains of
245f6de1
JJ
5158 immediate stores. Upon encountering a terminating statement (as defined
5159 by stmt_terminates_chain_p) process the recorded stores and emit the widened
5160 variants. */
f663d9ad
KT
5161
5162unsigned int
5163pass_store_merging::execute (function *fun)
5164{
5165 basic_block bb;
5166 hash_set<gimple *> orig_stmts;
629387a6
EB
5167 bool changed = false, open_chains = false;
5168
5169 /* If the function can throw and catch non-call exceptions, we'll be trying
5170 to merge stores across different basic blocks so we need to first unsplit
5171 the EH edges in order to streamline the CFG of the function. */
5172 if (cfun->can_throw_non_call_exceptions && cfun->eh)
5173 unsplit_eh_edges ();
f663d9ad 5174
4b84d9b8
JJ
5175 calculate_dominance_info (CDI_DOMINATORS);
5176
f663d9ad
KT
5177 FOR_EACH_BB_FN (bb, fun)
5178 {
629387a6 5179 const basic_block_status bb_status = get_status_for_store_merging (bb);
f663d9ad 5180 gimple_stmt_iterator gsi;
f663d9ad 5181
629387a6
EB
5182 if (open_chains && (bb_status == BB_INVALID || !single_pred_p (bb)))
5183 {
5184 changed |= terminate_and_process_all_chains ();
5185 open_chains = false;
f663d9ad
KT
5186 }
5187
629387a6 5188 if (bb_status == BB_INVALID)
f663d9ad
KT
5189 continue;
5190
5191 if (dump_file && (dump_flags & TDF_DETAILS))
5192 fprintf (dump_file, "Processing basic block <%d>:\n", bb->index);
5193
5194 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5195 {
5196 gimple *stmt = gsi_stmt (gsi);
5197
50b6d676
AO
5198 if (is_gimple_debug (stmt))
5199 continue;
5200
5384a802 5201 if (gimple_has_volatile_ops (stmt) && !gimple_clobber_p (stmt))
f663d9ad
KT
5202 {
5203 /* Terminate all chains. */
5204 if (dump_file && (dump_flags & TDF_DETAILS))
5205 fprintf (dump_file, "Volatile access terminates "
5206 "all chains\n");
629387a6
EB
5207 changed |= terminate_and_process_all_chains ();
5208 open_chains = false;
f663d9ad
KT
5209 continue;
5210 }
5211
629387a6
EB
5212 if (store_valid_for_store_merging_p (stmt))
5213 changed |= process_store (stmt);
245f6de1 5214 else
629387a6
EB
5215 changed |= terminate_all_aliasing_chains (NULL, stmt);
5216 }
5217
5218 if (bb_status == BB_EXTENDED_VALID)
5219 open_chains = true;
5220 else
5221 {
5222 changed |= terminate_and_process_all_chains ();
5223 open_chains = false;
f663d9ad 5224 }
f663d9ad 5225 }
629387a6
EB
5226
5227 if (open_chains)
5228 changed |= terminate_and_process_all_chains ();
5229
5230 /* If the function can throw and catch non-call exceptions and something
5231 changed during the pass, then the CFG has (very likely) changed too. */
5232 if (cfun->can_throw_non_call_exceptions && cfun->eh && changed)
5233 {
5234 free_dominance_info (CDI_DOMINATORS);
5235 return TODO_cleanup_cfg;
5236 }
5237
f663d9ad
KT
5238 return 0;
5239}
5240
5241} // anon namespace
5242
5243/* Construct and return a store merging pass object. */
5244
5245gimple_opt_pass *
5246make_pass_store_merging (gcc::context *ctxt)
5247{
5248 return new pass_store_merging (ctxt);
5249}
c22d8787
KT
5250
5251#if CHECKING_P
5252
5253namespace selftest {
5254
5255/* Selftests for store merging helpers. */
5256
5257/* Assert that all elements of the byte arrays X and Y, both of length N
5258 are equal. */
5259
5260static void
5261verify_array_eq (unsigned char *x, unsigned char *y, unsigned int n)
5262{
5263 for (unsigned int i = 0; i < n; i++)
5264 {
5265 if (x[i] != y[i])
5266 {
5267 fprintf (stderr, "Arrays do not match. X:\n");
5268 dump_char_array (stderr, x, n);
5269 fprintf (stderr, "Y:\n");
5270 dump_char_array (stderr, y, n);
5271 }
5272 ASSERT_EQ (x[i], y[i]);
5273 }
5274}
5275
8aba425f 5276/* Test shift_bytes_in_array_left and that it carries bits across between
c22d8787
KT
5277 bytes correctly. */
5278
5279static void
8aba425f 5280verify_shift_bytes_in_array_left (void)
c22d8787
KT
5281{
5282 /* byte 1 | byte 0
5283 00011111 | 11100000. */
5284 unsigned char orig[2] = { 0xe0, 0x1f };
5285 unsigned char in[2];
5286 memcpy (in, orig, sizeof orig);
5287
5288 unsigned char expected[2] = { 0x80, 0x7f };
8aba425f 5289 shift_bytes_in_array_left (in, sizeof (in), 2);
c22d8787
KT
5290 verify_array_eq (in, expected, sizeof (in));
5291
5292 memcpy (in, orig, sizeof orig);
5293 memcpy (expected, orig, sizeof orig);
5294 /* Check that shifting by zero doesn't change anything. */
8aba425f 5295 shift_bytes_in_array_left (in, sizeof (in), 0);
c22d8787
KT
5296 verify_array_eq (in, expected, sizeof (in));
5297
5298}
5299
5300/* Test shift_bytes_in_array_right and that it carries bits across between
5301 bytes correctly. */
5302
5303static void
5304verify_shift_bytes_in_array_right (void)
5305{
5306 /* byte 1 | byte 0
5307 00011111 | 11100000. */
5308 unsigned char orig[2] = { 0x1f, 0xe0};
5309 unsigned char in[2];
5310 memcpy (in, orig, sizeof orig);
5311 unsigned char expected[2] = { 0x07, 0xf8};
5312 shift_bytes_in_array_right (in, sizeof (in), 2);
5313 verify_array_eq (in, expected, sizeof (in));
5314
5315 memcpy (in, orig, sizeof orig);
5316 memcpy (expected, orig, sizeof orig);
5317 /* Check that shifting by zero doesn't change anything. */
5318 shift_bytes_in_array_right (in, sizeof (in), 0);
5319 verify_array_eq (in, expected, sizeof (in));
5320}
5321
5322/* Test clear_bit_region that it clears exactly the bits asked and
5323 nothing more. */
5324
5325static void
5326verify_clear_bit_region (void)
5327{
5328 /* Start with all bits set and test clearing various patterns in them. */
5329 unsigned char orig[3] = { 0xff, 0xff, 0xff};
5330 unsigned char in[3];
5331 unsigned char expected[3];
5332 memcpy (in, orig, sizeof in);
5333
5334 /* Check zeroing out all the bits. */
5335 clear_bit_region (in, 0, 3 * BITS_PER_UNIT);
5336 expected[0] = expected[1] = expected[2] = 0;
5337 verify_array_eq (in, expected, sizeof in);
5338
5339 memcpy (in, orig, sizeof in);
5340 /* Leave the first and last bits intact. */
5341 clear_bit_region (in, 1, 3 * BITS_PER_UNIT - 2);
5342 expected[0] = 0x1;
5343 expected[1] = 0;
5344 expected[2] = 0x80;
5345 verify_array_eq (in, expected, sizeof in);
5346}
5347
5384a802 5348/* Test clear_bit_region_be that it clears exactly the bits asked and
c22d8787
KT
5349 nothing more. */
5350
5351static void
5352verify_clear_bit_region_be (void)
5353{
5354 /* Start with all bits set and test clearing various patterns in them. */
5355 unsigned char orig[3] = { 0xff, 0xff, 0xff};
5356 unsigned char in[3];
5357 unsigned char expected[3];
5358 memcpy (in, orig, sizeof in);
5359
5360 /* Check zeroing out all the bits. */
5361 clear_bit_region_be (in, BITS_PER_UNIT - 1, 3 * BITS_PER_UNIT);
5362 expected[0] = expected[1] = expected[2] = 0;
5363 verify_array_eq (in, expected, sizeof in);
5364
5365 memcpy (in, orig, sizeof in);
5366 /* Leave the first and last bits intact. */
5367 clear_bit_region_be (in, BITS_PER_UNIT - 2, 3 * BITS_PER_UNIT - 2);
5368 expected[0] = 0x80;
5369 expected[1] = 0;
5370 expected[2] = 0x1;
5371 verify_array_eq (in, expected, sizeof in);
5372}
5373
5374
5375/* Run all of the selftests within this file. */
5376
5377void
5378store_merging_c_tests (void)
5379{
8aba425f 5380 verify_shift_bytes_in_array_left ();
c22d8787
KT
5381 verify_shift_bytes_in_array_right ();
5382 verify_clear_bit_region ();
5383 verify_clear_bit_region_be ();
5384}
5385
5386} // namespace selftest
5387#endif /* CHECKING_P. */