]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/gimple-ssa-store-merging.c
Update copyright years.
[thirdparty/gcc.git] / gcc / gimple-ssa-store-merging.c
CommitLineData
dffec8eb 1/* GIMPLE store merging and byte swapping passes.
7adcbafe 2 Copyright (C) 2009-2022 Free Software Foundation, Inc.
f663d9ad
KT
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
c94c3532
EB
21/* The purpose of the store merging pass is to combine multiple memory stores
22 of constant values, values loaded from memory, bitwise operations on those,
23 or bit-field values, to consecutive locations, into fewer wider stores.
24
f663d9ad
KT
25 For example, if we have a sequence peforming four byte stores to
26 consecutive memory locations:
27 [p ] := imm1;
28 [p + 1B] := imm2;
29 [p + 2B] := imm3;
30 [p + 3B] := imm4;
31 we can transform this into a single 4-byte store if the target supports it:
c94c3532 32 [p] := imm1:imm2:imm3:imm4 concatenated according to endianness.
f663d9ad 33
245f6de1
JJ
34 Or:
35 [p ] := [q ];
36 [p + 1B] := [q + 1B];
37 [p + 2B] := [q + 2B];
38 [p + 3B] := [q + 3B];
39 if there is no overlap can be transformed into a single 4-byte
40 load followed by single 4-byte store.
41
42 Or:
43 [p ] := [q ] ^ imm1;
44 [p + 1B] := [q + 1B] ^ imm2;
45 [p + 2B] := [q + 2B] ^ imm3;
46 [p + 3B] := [q + 3B] ^ imm4;
47 if there is no overlap can be transformed into a single 4-byte
48 load, xored with imm1:imm2:imm3:imm4 and stored using a single 4-byte store.
49
c94c3532
EB
50 Or:
51 [p:1 ] := imm;
52 [p:31] := val & 0x7FFFFFFF;
53 we can transform this into a single 4-byte store if the target supports it:
54 [p] := imm:(val & 0x7FFFFFFF) concatenated according to endianness.
55
f663d9ad
KT
56 The algorithm is applied to each basic block in three phases:
57
c94c3532
EB
58 1) Scan through the basic block and record assignments to destinations
59 that can be expressed as a store to memory of a certain size at a certain
60 bit offset from base expressions we can handle. For bit-fields we also
61 record the surrounding bit region, i.e. bits that could be stored in
245f6de1
JJ
62 a read-modify-write operation when storing the bit-field. Record store
63 chains to different bases in a hash_map (m_stores) and make sure to
700d4cb0 64 terminate such chains when appropriate (for example when the stored
245f6de1 65 values get used subsequently).
f663d9ad
KT
66 These stores can be a result of structure element initializers, array stores
67 etc. A store_immediate_info object is recorded for every such store.
68 Record as many such assignments to a single base as possible until a
69 statement that interferes with the store sequence is encountered.
c94c3532
EB
70 Each store has up to 2 operands, which can be a either constant, a memory
71 load or an SSA name, from which the value to be stored can be computed.
245f6de1
JJ
72 At most one of the operands can be a constant. The operands are recorded
73 in store_operand_info struct.
f663d9ad 74
c94c3532 75 2) Analyze the chains of stores recorded in phase 1) (i.e. the vector of
f663d9ad 76 store_immediate_info objects) and coalesce contiguous stores into
c94c3532 77 merged_store_group objects. For bit-field stores, we don't need to
245f6de1
JJ
78 require the stores to be contiguous, just their surrounding bit regions
79 have to be contiguous. If the expression being stored is different
80 between adjacent stores, such as one store storing a constant and
81 following storing a value loaded from memory, or if the loaded memory
82 objects are not adjacent, a new merged_store_group is created as well.
f663d9ad
KT
83
84 For example, given the stores:
85 [p ] := 0;
86 [p + 1B] := 1;
87 [p + 3B] := 0;
88 [p + 4B] := 1;
89 [p + 5B] := 0;
90 [p + 6B] := 0;
91 This phase would produce two merged_store_group objects, one recording the
92 two bytes stored in the memory region [p : p + 1] and another
93 recording the four bytes stored in the memory region [p + 3 : p + 6].
94
95 3) The merged_store_group objects produced in phase 2) are processed
96 to generate the sequence of wider stores that set the contiguous memory
97 regions to the sequence of bytes that correspond to it. This may emit
98 multiple stores per store group to handle contiguous stores that are not
99 of a size that is a power of 2. For example it can try to emit a 40-bit
100 store as a 32-bit store followed by an 8-bit store.
c94c3532
EB
101 We try to emit as wide stores as we can while respecting STRICT_ALIGNMENT
102 or TARGET_SLOW_UNALIGNED_ACCESS settings.
f663d9ad
KT
103
104 Note on endianness and example:
105 Consider 2 contiguous 16-bit stores followed by 2 contiguous 8-bit stores:
106 [p ] := 0x1234;
107 [p + 2B] := 0x5678;
108 [p + 4B] := 0xab;
109 [p + 5B] := 0xcd;
110
111 The memory layout for little-endian (LE) and big-endian (BE) must be:
112 p |LE|BE|
113 ---------
114 0 |34|12|
115 1 |12|34|
116 2 |78|56|
117 3 |56|78|
118 4 |ab|ab|
119 5 |cd|cd|
120
121 To merge these into a single 48-bit merged value 'val' in phase 2)
122 on little-endian we insert stores to higher (consecutive) bitpositions
123 into the most significant bits of the merged value.
124 The final merged value would be: 0xcdab56781234
125
126 For big-endian we insert stores to higher bitpositions into the least
127 significant bits of the merged value.
128 The final merged value would be: 0x12345678abcd
129
130 Then, in phase 3), we want to emit this 48-bit value as a 32-bit store
131 followed by a 16-bit store. Again, we must consider endianness when
132 breaking down the 48-bit value 'val' computed above.
133 For little endian we emit:
134 [p] (32-bit) := 0x56781234; // val & 0x0000ffffffff;
135 [p + 4B] (16-bit) := 0xcdab; // (val & 0xffff00000000) >> 32;
136
137 Whereas for big-endian we emit:
138 [p] (32-bit) := 0x12345678; // (val & 0xffffffff0000) >> 16;
139 [p + 4B] (16-bit) := 0xabcd; // val & 0x00000000ffff; */
140
141#include "config.h"
142#include "system.h"
143#include "coretypes.h"
144#include "backend.h"
145#include "tree.h"
146#include "gimple.h"
147#include "builtins.h"
148#include "fold-const.h"
149#include "tree-pass.h"
150#include "ssa.h"
151#include "gimple-pretty-print.h"
152#include "alias.h"
153#include "fold-const.h"
f663d9ad
KT
154#include "print-tree.h"
155#include "tree-hash-traits.h"
156#include "gimple-iterator.h"
157#include "gimplify.h"
c94c3532 158#include "gimple-fold.h"
f663d9ad
KT
159#include "stor-layout.h"
160#include "timevar.h"
629387a6
EB
161#include "cfganal.h"
162#include "cfgcleanup.h"
f663d9ad 163#include "tree-cfg.h"
629387a6 164#include "except.h"
f663d9ad
KT
165#include "tree-eh.h"
166#include "target.h"
aa55dc0c 167#include "gimplify-me.h"
a62b3dc5
JJ
168#include "rtl.h"
169#include "expr.h" /* For get_bit_range. */
dffec8eb 170#include "optabs-tree.h"
a95b474a 171#include "dbgcnt.h"
c22d8787 172#include "selftest.h"
f663d9ad
KT
173
174/* The maximum size (in bits) of the stores this pass should generate. */
175#define MAX_STORE_BITSIZE (BITS_PER_WORD)
176#define MAX_STORE_BYTES (MAX_STORE_BITSIZE / BITS_PER_UNIT)
177
245f6de1
JJ
178/* Limit to bound the number of aliasing checks for loads with the same
179 vuse as the corresponding store. */
180#define MAX_STORE_ALIAS_CHECKS 64
181
f663d9ad
KT
182namespace {
183
bebadeca 184struct bswap_stat
dffec8eb
JJ
185{
186 /* Number of hand-written 16-bit nop / bswaps found. */
187 int found_16bit;
188
189 /* Number of hand-written 32-bit nop / bswaps found. */
190 int found_32bit;
191
192 /* Number of hand-written 64-bit nop / bswaps found. */
193 int found_64bit;
194} nop_stats, bswap_stats;
195
196/* A symbolic number structure is used to detect byte permutation and selection
197 patterns of a source. To achieve that, its field N contains an artificial
198 number consisting of BITS_PER_MARKER sized markers tracking where does each
199 byte come from in the source:
200
201 0 - target byte has the value 0
202 FF - target byte has an unknown value (eg. due to sign extension)
203 1..size - marker value is the byte index in the source (0 for lsb).
204
205 To detect permutations on memory sources (arrays and structures), a symbolic
206 number is also associated:
207 - a base address BASE_ADDR and an OFFSET giving the address of the source;
208 - a range which gives the difference between the highest and lowest accessed
209 memory location to make such a symbolic number;
210 - the address SRC of the source element of lowest address as a convenience
211 to easily get BASE_ADDR + offset + lowest bytepos;
212 - number of expressions N_OPS bitwise ored together to represent
213 approximate cost of the computation.
214
215 Note 1: the range is different from size as size reflects the size of the
216 type of the current expression. For instance, for an array char a[],
217 (short) a[0] | (short) a[3] would have a size of 2 but a range of 4 while
218 (short) a[0] | ((short) a[0] << 1) would still have a size of 2 but this
219 time a range of 1.
220
221 Note 2: for non-memory sources, range holds the same value as size.
222
223 Note 3: SRC points to the SSA_NAME in case of non-memory source. */
224
225struct symbolic_number {
226 uint64_t n;
227 tree type;
228 tree base_addr;
229 tree offset;
4a022c70 230 poly_int64_pod bytepos;
dffec8eb
JJ
231 tree src;
232 tree alias_set;
233 tree vuse;
234 unsigned HOST_WIDE_INT range;
235 int n_ops;
236};
237
238#define BITS_PER_MARKER 8
239#define MARKER_MASK ((1 << BITS_PER_MARKER) - 1)
240#define MARKER_BYTE_UNKNOWN MARKER_MASK
241#define HEAD_MARKER(n, size) \
242 ((n) & ((uint64_t) MARKER_MASK << (((size) - 1) * BITS_PER_MARKER)))
243
244/* The number which the find_bswap_or_nop_1 result should match in
245 order to have a nop. The number is masked according to the size of
246 the symbolic number before using it. */
247#define CMPNOP (sizeof (int64_t) < 8 ? 0 : \
248 (uint64_t)0x08070605 << 32 | 0x04030201)
249
250/* The number which the find_bswap_or_nop_1 result should match in
251 order to have a byte swap. The number is masked according to the
252 size of the symbolic number before using it. */
253#define CMPXCHG (sizeof (int64_t) < 8 ? 0 : \
254 (uint64_t)0x01020304 << 32 | 0x05060708)
255
256/* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
257 number N. Return false if the requested operation is not permitted
258 on a symbolic number. */
259
260inline bool
261do_shift_rotate (enum tree_code code,
262 struct symbolic_number *n,
263 int count)
264{
265 int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
266 unsigned head_marker;
267
444cda74
JJ
268 if (count < 0
269 || count >= TYPE_PRECISION (n->type)
270 || count % BITS_PER_UNIT != 0)
dffec8eb
JJ
271 return false;
272 count = (count / BITS_PER_UNIT) * BITS_PER_MARKER;
273
274 /* Zero out the extra bits of N in order to avoid them being shifted
275 into the significant bits. */
276 if (size < 64 / BITS_PER_MARKER)
277 n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
278
279 switch (code)
280 {
281 case LSHIFT_EXPR:
282 n->n <<= count;
283 break;
284 case RSHIFT_EXPR:
285 head_marker = HEAD_MARKER (n->n, size);
286 n->n >>= count;
287 /* Arithmetic shift of signed type: result is dependent on the value. */
288 if (!TYPE_UNSIGNED (n->type) && head_marker)
289 for (i = 0; i < count / BITS_PER_MARKER; i++)
290 n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
291 << ((size - 1 - i) * BITS_PER_MARKER);
292 break;
293 case LROTATE_EXPR:
294 n->n = (n->n << count) | (n->n >> ((size * BITS_PER_MARKER) - count));
295 break;
296 case RROTATE_EXPR:
297 n->n = (n->n >> count) | (n->n << ((size * BITS_PER_MARKER) - count));
298 break;
299 default:
300 return false;
301 }
302 /* Zero unused bits for size. */
303 if (size < 64 / BITS_PER_MARKER)
304 n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
305 return true;
306}
307
308/* Perform sanity checking for the symbolic number N and the gimple
309 statement STMT. */
310
311inline bool
312verify_symbolic_number_p (struct symbolic_number *n, gimple *stmt)
313{
314 tree lhs_type;
315
650c70a9 316 lhs_type = TREE_TYPE (gimple_get_lhs (stmt));
dffec8eb 317
5ea39b24
JJ
318 if (TREE_CODE (lhs_type) != INTEGER_TYPE
319 && TREE_CODE (lhs_type) != ENUMERAL_TYPE)
dffec8eb
JJ
320 return false;
321
322 if (TYPE_PRECISION (lhs_type) != TYPE_PRECISION (n->type))
323 return false;
324
325 return true;
326}
327
328/* Initialize the symbolic number N for the bswap pass from the base element
329 SRC manipulated by the bitwise OR expression. */
330
331bool
332init_symbolic_number (struct symbolic_number *n, tree src)
333{
334 int size;
335
5b9a65ec 336 if (!INTEGRAL_TYPE_P (TREE_TYPE (src)) && !POINTER_TYPE_P (TREE_TYPE (src)))
dffec8eb
JJ
337 return false;
338
339 n->base_addr = n->offset = n->alias_set = n->vuse = NULL_TREE;
340 n->src = src;
341
342 /* Set up the symbolic number N by setting each byte to a value between 1 and
343 the byte size of rhs1. The highest order byte is set to n->size and the
344 lowest order byte to 1. */
345 n->type = TREE_TYPE (src);
346 size = TYPE_PRECISION (n->type);
347 if (size % BITS_PER_UNIT != 0)
348 return false;
349 size /= BITS_PER_UNIT;
350 if (size > 64 / BITS_PER_MARKER)
351 return false;
352 n->range = size;
353 n->n = CMPNOP;
354 n->n_ops = 1;
355
356 if (size < 64 / BITS_PER_MARKER)
357 n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
358
359 return true;
360}
361
362/* Check if STMT might be a byte swap or a nop from a memory source and returns
363 the answer. If so, REF is that memory source and the base of the memory area
364 accessed and the offset of the access from that base are recorded in N. */
365
366bool
367find_bswap_or_nop_load (gimple *stmt, tree ref, struct symbolic_number *n)
368{
369 /* Leaf node is an array or component ref. Memorize its base and
370 offset from base to compare to other such leaf node. */
f37fac2b 371 poly_int64 bitsize, bitpos, bytepos;
dffec8eb
JJ
372 machine_mode mode;
373 int unsignedp, reversep, volatilep;
374 tree offset, base_addr;
375
376 /* Not prepared to handle PDP endian. */
377 if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
378 return false;
379
380 if (!gimple_assign_load_p (stmt) || gimple_has_volatile_ops (stmt))
381 return false;
382
383 base_addr = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
384 &unsignedp, &reversep, &volatilep);
385
4b84d9b8
JJ
386 if (TREE_CODE (base_addr) == TARGET_MEM_REF)
387 /* Do not rewrite TARGET_MEM_REF. */
388 return false;
389 else if (TREE_CODE (base_addr) == MEM_REF)
dffec8eb 390 {
3fed2ce9 391 poly_offset_int bit_offset = 0;
dffec8eb
JJ
392 tree off = TREE_OPERAND (base_addr, 1);
393
394 if (!integer_zerop (off))
395 {
3fed2ce9
RS
396 poly_offset_int boff = mem_ref_offset (base_addr);
397 boff <<= LOG2_BITS_PER_UNIT;
dffec8eb
JJ
398 bit_offset += boff;
399 }
400
401 base_addr = TREE_OPERAND (base_addr, 0);
402
403 /* Avoid returning a negative bitpos as this may wreak havoc later. */
3fed2ce9 404 if (maybe_lt (bit_offset, 0))
dffec8eb 405 {
3fed2ce9
RS
406 tree byte_offset = wide_int_to_tree
407 (sizetype, bits_to_bytes_round_down (bit_offset));
408 bit_offset = num_trailing_bits (bit_offset);
dffec8eb 409 if (offset)
3fed2ce9 410 offset = size_binop (PLUS_EXPR, offset, byte_offset);
dffec8eb 411 else
3fed2ce9 412 offset = byte_offset;
dffec8eb
JJ
413 }
414
3fed2ce9 415 bitpos += bit_offset.force_shwi ();
dffec8eb 416 }
4b84d9b8
JJ
417 else
418 base_addr = build_fold_addr_expr (base_addr);
dffec8eb 419
f37fac2b 420 if (!multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
dffec8eb 421 return false;
f37fac2b 422 if (!multiple_p (bitsize, BITS_PER_UNIT))
dffec8eb
JJ
423 return false;
424 if (reversep)
425 return false;
426
427 if (!init_symbolic_number (n, ref))
428 return false;
429 n->base_addr = base_addr;
430 n->offset = offset;
f37fac2b 431 n->bytepos = bytepos;
dffec8eb
JJ
432 n->alias_set = reference_alias_ptr_type (ref);
433 n->vuse = gimple_vuse (stmt);
434 return true;
435}
436
04eccbbe
JJ
437/* Compute the symbolic number N representing the result of a bitwise OR,
438 bitwise XOR or plus on 2 symbolic number N1 and N2 whose source statements
439 are respectively SOURCE_STMT1 and SOURCE_STMT2. CODE is the operation. */
dffec8eb
JJ
440
441gimple *
442perform_symbolic_merge (gimple *source_stmt1, struct symbolic_number *n1,
443 gimple *source_stmt2, struct symbolic_number *n2,
04eccbbe 444 struct symbolic_number *n, enum tree_code code)
dffec8eb
JJ
445{
446 int i, size;
447 uint64_t mask;
448 gimple *source_stmt;
449 struct symbolic_number *n_start;
450
451 tree rhs1 = gimple_assign_rhs1 (source_stmt1);
452 if (TREE_CODE (rhs1) == BIT_FIELD_REF
453 && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
454 rhs1 = TREE_OPERAND (rhs1, 0);
455 tree rhs2 = gimple_assign_rhs1 (source_stmt2);
456 if (TREE_CODE (rhs2) == BIT_FIELD_REF
457 && TREE_CODE (TREE_OPERAND (rhs2, 0)) == SSA_NAME)
458 rhs2 = TREE_OPERAND (rhs2, 0);
459
460 /* Sources are different, cancel bswap if they are not memory location with
461 the same base (array, structure, ...). */
462 if (rhs1 != rhs2)
463 {
464 uint64_t inc;
4a022c70 465 HOST_WIDE_INT start1, start2, start_sub, end_sub, end1, end2, end;
dffec8eb
JJ
466 struct symbolic_number *toinc_n_ptr, *n_end;
467 basic_block bb1, bb2;
468
469 if (!n1->base_addr || !n2->base_addr
470 || !operand_equal_p (n1->base_addr, n2->base_addr, 0))
471 return NULL;
472
473 if (!n1->offset != !n2->offset
474 || (n1->offset && !operand_equal_p (n1->offset, n2->offset, 0)))
475 return NULL;
476
4a022c70
RS
477 start1 = 0;
478 if (!(n2->bytepos - n1->bytepos).is_constant (&start2))
479 return NULL;
480
481 if (start1 < start2)
dffec8eb
JJ
482 {
483 n_start = n1;
4a022c70 484 start_sub = start2 - start1;
dffec8eb
JJ
485 }
486 else
487 {
488 n_start = n2;
4a022c70 489 start_sub = start1 - start2;
dffec8eb
JJ
490 }
491
492 bb1 = gimple_bb (source_stmt1);
493 bb2 = gimple_bb (source_stmt2);
494 if (dominated_by_p (CDI_DOMINATORS, bb1, bb2))
495 source_stmt = source_stmt1;
496 else
497 source_stmt = source_stmt2;
498
499 /* Find the highest address at which a load is performed and
500 compute related info. */
4a022c70
RS
501 end1 = start1 + (n1->range - 1);
502 end2 = start2 + (n2->range - 1);
dffec8eb
JJ
503 if (end1 < end2)
504 {
505 end = end2;
506 end_sub = end2 - end1;
507 }
508 else
509 {
510 end = end1;
511 end_sub = end1 - end2;
512 }
513 n_end = (end2 > end1) ? n2 : n1;
514
515 /* Find symbolic number whose lsb is the most significant. */
516 if (BYTES_BIG_ENDIAN)
517 toinc_n_ptr = (n_end == n1) ? n2 : n1;
518 else
519 toinc_n_ptr = (n_start == n1) ? n2 : n1;
520
4a022c70 521 n->range = end - MIN (start1, start2) + 1;
dffec8eb
JJ
522
523 /* Check that the range of memory covered can be represented by
524 a symbolic number. */
525 if (n->range > 64 / BITS_PER_MARKER)
526 return NULL;
527
528 /* Reinterpret byte marks in symbolic number holding the value of
529 bigger weight according to target endianness. */
530 inc = BYTES_BIG_ENDIAN ? end_sub : start_sub;
531 size = TYPE_PRECISION (n1->type) / BITS_PER_UNIT;
532 for (i = 0; i < size; i++, inc <<= BITS_PER_MARKER)
533 {
534 unsigned marker
535 = (toinc_n_ptr->n >> (i * BITS_PER_MARKER)) & MARKER_MASK;
536 if (marker && marker != MARKER_BYTE_UNKNOWN)
537 toinc_n_ptr->n += inc;
538 }
539 }
540 else
541 {
542 n->range = n1->range;
543 n_start = n1;
544 source_stmt = source_stmt1;
545 }
546
547 if (!n1->alias_set
548 || alias_ptr_types_compatible_p (n1->alias_set, n2->alias_set))
549 n->alias_set = n1->alias_set;
550 else
551 n->alias_set = ptr_type_node;
552 n->vuse = n_start->vuse;
553 n->base_addr = n_start->base_addr;
554 n->offset = n_start->offset;
555 n->src = n_start->src;
556 n->bytepos = n_start->bytepos;
557 n->type = n_start->type;
558 size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
531dae29 559 uint64_t res_n = n1->n | n2->n;
dffec8eb
JJ
560
561 for (i = 0, mask = MARKER_MASK; i < size; i++, mask <<= BITS_PER_MARKER)
562 {
563 uint64_t masked1, masked2;
564
565 masked1 = n1->n & mask;
566 masked2 = n2->n & mask;
531dae29
JJ
567 /* If at least one byte is 0, all of 0 | x == 0 ^ x == 0 + x == x. */
568 if (masked1 && masked2)
569 {
570 /* + can carry into upper bits, just punt. */
571 if (code == PLUS_EXPR)
572 return NULL;
573 /* x | x is still x. */
574 if (code == BIT_IOR_EXPR && masked1 == masked2)
575 continue;
576 if (code == BIT_XOR_EXPR)
577 {
578 /* x ^ x is 0, but MARKER_BYTE_UNKNOWN stands for
579 unknown values and unknown ^ unknown is unknown. */
580 if (masked1 == masked2
581 && masked1 != ((uint64_t) MARKER_BYTE_UNKNOWN
582 << i * BITS_PER_MARKER))
583 {
584 res_n &= ~mask;
585 continue;
586 }
587 }
588 /* Otherwise set the byte to unknown, it might still be
589 later masked off. */
590 res_n |= mask;
591 }
dffec8eb 592 }
531dae29 593 n->n = res_n;
dffec8eb
JJ
594 n->n_ops = n1->n_ops + n2->n_ops;
595
596 return source_stmt;
597}
598
599/* find_bswap_or_nop_1 invokes itself recursively with N and tries to perform
600 the operation given by the rhs of STMT on the result. If the operation
601 could successfully be executed the function returns a gimple stmt whose
602 rhs's first tree is the expression of the source operand and NULL
603 otherwise. */
604
605gimple *
606find_bswap_or_nop_1 (gimple *stmt, struct symbolic_number *n, int limit)
607{
608 enum tree_code code;
609 tree rhs1, rhs2 = NULL;
610 gimple *rhs1_stmt, *rhs2_stmt, *source_stmt1;
611 enum gimple_rhs_class rhs_class;
612
613 if (!limit || !is_gimple_assign (stmt))
614 return NULL;
615
616 rhs1 = gimple_assign_rhs1 (stmt);
617
618 if (find_bswap_or_nop_load (stmt, rhs1, n))
619 return stmt;
620
621 /* Handle BIT_FIELD_REF. */
622 if (TREE_CODE (rhs1) == BIT_FIELD_REF
623 && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
624 {
35cf3c55
KZ
625 if (!tree_fits_uhwi_p (TREE_OPERAND (rhs1, 1))
626 || !tree_fits_uhwi_p (TREE_OPERAND (rhs1, 2)))
627 return NULL;
628
dffec8eb
JJ
629 unsigned HOST_WIDE_INT bitsize = tree_to_uhwi (TREE_OPERAND (rhs1, 1));
630 unsigned HOST_WIDE_INT bitpos = tree_to_uhwi (TREE_OPERAND (rhs1, 2));
631 if (bitpos % BITS_PER_UNIT == 0
632 && bitsize % BITS_PER_UNIT == 0
633 && init_symbolic_number (n, TREE_OPERAND (rhs1, 0)))
634 {
635 /* Handle big-endian bit numbering in BIT_FIELD_REF. */
636 if (BYTES_BIG_ENDIAN)
637 bitpos = TYPE_PRECISION (n->type) - bitpos - bitsize;
638
639 /* Shift. */
640 if (!do_shift_rotate (RSHIFT_EXPR, n, bitpos))
641 return NULL;
642
643 /* Mask. */
644 uint64_t mask = 0;
645 uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
646 for (unsigned i = 0; i < bitsize / BITS_PER_UNIT;
647 i++, tmp <<= BITS_PER_UNIT)
648 mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
649 n->n &= mask;
650
651 /* Convert. */
652 n->type = TREE_TYPE (rhs1);
653 if (!n->base_addr)
654 n->range = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
655
656 return verify_symbolic_number_p (n, stmt) ? stmt : NULL;
657 }
658
659 return NULL;
660 }
661
662 if (TREE_CODE (rhs1) != SSA_NAME)
663 return NULL;
664
665 code = gimple_assign_rhs_code (stmt);
666 rhs_class = gimple_assign_rhs_class (stmt);
667 rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
668
669 if (rhs_class == GIMPLE_BINARY_RHS)
670 rhs2 = gimple_assign_rhs2 (stmt);
671
672 /* Handle unary rhs and binary rhs with integer constants as second
673 operand. */
674
675 if (rhs_class == GIMPLE_UNARY_RHS
676 || (rhs_class == GIMPLE_BINARY_RHS
677 && TREE_CODE (rhs2) == INTEGER_CST))
678 {
679 if (code != BIT_AND_EXPR
680 && code != LSHIFT_EXPR
681 && code != RSHIFT_EXPR
682 && code != LROTATE_EXPR
683 && code != RROTATE_EXPR
684 && !CONVERT_EXPR_CODE_P (code))
685 return NULL;
686
687 source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, n, limit - 1);
688
689 /* If find_bswap_or_nop_1 returned NULL, STMT is a leaf node and
690 we have to initialize the symbolic number. */
691 if (!source_stmt1)
692 {
693 if (gimple_assign_load_p (stmt)
694 || !init_symbolic_number (n, rhs1))
695 return NULL;
696 source_stmt1 = stmt;
697 }
698
699 switch (code)
700 {
701 case BIT_AND_EXPR:
702 {
703 int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
704 uint64_t val = int_cst_value (rhs2), mask = 0;
705 uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
706
707 /* Only constants masking full bytes are allowed. */
708 for (i = 0; i < size; i++, tmp <<= BITS_PER_UNIT)
709 if ((val & tmp) != 0 && (val & tmp) != tmp)
710 return NULL;
711 else if (val & tmp)
712 mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
713
714 n->n &= mask;
715 }
716 break;
717 case LSHIFT_EXPR:
718 case RSHIFT_EXPR:
719 case LROTATE_EXPR:
720 case RROTATE_EXPR:
721 if (!do_shift_rotate (code, n, (int) TREE_INT_CST_LOW (rhs2)))
722 return NULL;
723 break;
724 CASE_CONVERT:
725 {
726 int i, type_size, old_type_size;
727 tree type;
728
650c70a9 729 type = TREE_TYPE (gimple_assign_lhs (stmt));
dffec8eb
JJ
730 type_size = TYPE_PRECISION (type);
731 if (type_size % BITS_PER_UNIT != 0)
732 return NULL;
733 type_size /= BITS_PER_UNIT;
734 if (type_size > 64 / BITS_PER_MARKER)
735 return NULL;
736
737 /* Sign extension: result is dependent on the value. */
738 old_type_size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
739 if (!TYPE_UNSIGNED (n->type) && type_size > old_type_size
740 && HEAD_MARKER (n->n, old_type_size))
741 for (i = 0; i < type_size - old_type_size; i++)
742 n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
743 << ((type_size - 1 - i) * BITS_PER_MARKER);
744
745 if (type_size < 64 / BITS_PER_MARKER)
746 {
747 /* If STMT casts to a smaller type mask out the bits not
748 belonging to the target type. */
749 n->n &= ((uint64_t) 1 << (type_size * BITS_PER_MARKER)) - 1;
750 }
751 n->type = type;
752 if (!n->base_addr)
753 n->range = type_size;
754 }
755 break;
756 default:
757 return NULL;
758 };
759 return verify_symbolic_number_p (n, stmt) ? source_stmt1 : NULL;
760 }
761
762 /* Handle binary rhs. */
763
764 if (rhs_class == GIMPLE_BINARY_RHS)
765 {
766 struct symbolic_number n1, n2;
767 gimple *source_stmt, *source_stmt2;
768
a944b5de 769 if (!rhs2 || TREE_CODE (rhs2) != SSA_NAME)
dffec8eb
JJ
770 return NULL;
771
772 rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
773
774 switch (code)
775 {
776 case BIT_IOR_EXPR:
a944b5de
RS
777 case BIT_XOR_EXPR:
778 case PLUS_EXPR:
dffec8eb
JJ
779 source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, &n1, limit - 1);
780
781 if (!source_stmt1)
782 return NULL;
783
784 source_stmt2 = find_bswap_or_nop_1 (rhs2_stmt, &n2, limit - 1);
785
786 if (!source_stmt2)
787 return NULL;
788
789 if (TYPE_PRECISION (n1.type) != TYPE_PRECISION (n2.type))
790 return NULL;
791
4b84d9b8 792 if (n1.vuse != n2.vuse)
dffec8eb
JJ
793 return NULL;
794
795 source_stmt
04eccbbe
JJ
796 = perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, n,
797 code);
dffec8eb
JJ
798
799 if (!source_stmt)
800 return NULL;
801
802 if (!verify_symbolic_number_p (n, stmt))
803 return NULL;
804
805 break;
806 default:
807 return NULL;
808 }
809 return source_stmt;
810 }
811 return NULL;
812}
813
4b84d9b8
JJ
814/* Helper for find_bswap_or_nop and try_coalesce_bswap to compute
815 *CMPXCHG, *CMPNOP and adjust *N. */
dffec8eb 816
4b84d9b8
JJ
817void
818find_bswap_or_nop_finalize (struct symbolic_number *n, uint64_t *cmpxchg,
b320edc0 819 uint64_t *cmpnop, bool *cast64_to_32)
dffec8eb
JJ
820{
821 unsigned rsize;
822 uint64_t tmpn, mask;
dffec8eb 823
4b84d9b8
JJ
824 /* The number which the find_bswap_or_nop_1 result should match in order
825 to have a full byte swap. The number is shifted to the right
826 according to the size of the symbolic number before using it. */
827 *cmpxchg = CMPXCHG;
828 *cmpnop = CMPNOP;
b320edc0 829 *cast64_to_32 = false;
dffec8eb
JJ
830
831 /* Find real size of result (highest non-zero byte). */
832 if (n->base_addr)
833 for (tmpn = n->n, rsize = 0; tmpn; tmpn >>= BITS_PER_MARKER, rsize++);
834 else
835 rsize = n->range;
836
837 /* Zero out the bits corresponding to untouched bytes in original gimple
838 expression. */
839 if (n->range < (int) sizeof (int64_t))
840 {
841 mask = ((uint64_t) 1 << (n->range * BITS_PER_MARKER)) - 1;
b320edc0
JJ
842 if (n->base_addr == NULL
843 && n->range == 4
844 && int_size_in_bytes (TREE_TYPE (n->src)) == 8)
845 {
846 /* If all bytes in n->n are either 0 or in [5..8] range, this
847 might be a candidate for (unsigned) __builtin_bswap64 (src).
848 It is not worth it for (unsigned short) __builtin_bswap64 (src)
849 or (unsigned short) __builtin_bswap32 (src). */
850 *cast64_to_32 = true;
851 for (tmpn = n->n; tmpn; tmpn >>= BITS_PER_MARKER)
852 if ((tmpn & MARKER_MASK)
853 && ((tmpn & MARKER_MASK) <= 4 || (tmpn & MARKER_MASK) > 8))
854 {
855 *cast64_to_32 = false;
856 break;
857 }
858 }
859 if (*cast64_to_32)
860 *cmpxchg &= mask;
861 else
862 *cmpxchg >>= (64 / BITS_PER_MARKER - n->range) * BITS_PER_MARKER;
4b84d9b8 863 *cmpnop &= mask;
dffec8eb
JJ
864 }
865
866 /* Zero out the bits corresponding to unused bytes in the result of the
867 gimple expression. */
868 if (rsize < n->range)
869 {
870 if (BYTES_BIG_ENDIAN)
871 {
872 mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
4b84d9b8 873 *cmpxchg &= mask;
567d5f3d
JJ
874 if (n->range - rsize == sizeof (int64_t))
875 *cmpnop = 0;
876 else
877 *cmpnop >>= (n->range - rsize) * BITS_PER_MARKER;
dffec8eb
JJ
878 }
879 else
880 {
881 mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
567d5f3d
JJ
882 if (n->range - rsize == sizeof (int64_t))
883 *cmpxchg = 0;
884 else
885 *cmpxchg >>= (n->range - rsize) * BITS_PER_MARKER;
4b84d9b8 886 *cmpnop &= mask;
dffec8eb
JJ
887 }
888 n->range = rsize;
889 }
890
b320edc0
JJ
891 if (*cast64_to_32)
892 n->range = 8;
4b84d9b8
JJ
893 n->range *= BITS_PER_UNIT;
894}
895
896/* Check if STMT completes a bswap implementation or a read in a given
897 endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP
898 accordingly. It also sets N to represent the kind of operations
899 performed: size of the resulting expression and whether it works on
900 a memory source, and if so alias-set and vuse. At last, the
901 function returns a stmt whose rhs's first tree is the source
902 expression. */
903
904gimple *
b320edc0
JJ
905find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap,
906 bool *cast64_to_32, uint64_t *mask)
4b84d9b8 907{
650c70a9 908 tree type_size = TYPE_SIZE_UNIT (TREE_TYPE (gimple_get_lhs (stmt)));
7f0ce82a
KT
909 if (!tree_fits_uhwi_p (type_size))
910 return NULL;
911
4b84d9b8
JJ
912 /* The last parameter determines the depth search limit. It usually
913 correlates directly to the number n of bytes to be touched. We
0f507a36 914 increase that number by 2 * (log2(n) + 1) here in order to also
4b84d9b8
JJ
915 cover signed -> unsigned conversions of the src operand as can be seen
916 in libgcc, and for initial shift/and operation of the src operand. */
7f0ce82a 917 int limit = tree_to_uhwi (type_size);
0f507a36 918 limit += 2 * (1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit));
4b84d9b8
JJ
919 gimple *ins_stmt = find_bswap_or_nop_1 (stmt, n, limit);
920
921 if (!ins_stmt)
cd676dfa
JJ
922 {
923 if (gimple_assign_rhs_code (stmt) != CONSTRUCTOR
924 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
925 return NULL;
926 unsigned HOST_WIDE_INT sz = tree_to_uhwi (type_size) * BITS_PER_UNIT;
927 if (sz != 16 && sz != 32 && sz != 64)
928 return NULL;
929 tree rhs = gimple_assign_rhs1 (stmt);
9032d2b2
JJ
930 if (CONSTRUCTOR_NELTS (rhs) == 0)
931 return NULL;
cd676dfa
JJ
932 tree eltype = TREE_TYPE (TREE_TYPE (rhs));
933 unsigned HOST_WIDE_INT eltsz
934 = int_size_in_bytes (eltype) * BITS_PER_UNIT;
935 if (TYPE_PRECISION (eltype) != eltsz)
936 return NULL;
937 constructor_elt *elt;
938 unsigned int i;
939 tree type = build_nonstandard_integer_type (sz, 1);
940 FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs), i, elt)
941 {
942 if (TREE_CODE (elt->value) != SSA_NAME
943 || !INTEGRAL_TYPE_P (TREE_TYPE (elt->value)))
944 return NULL;
945 struct symbolic_number n1;
946 gimple *source_stmt
947 = find_bswap_or_nop_1 (SSA_NAME_DEF_STMT (elt->value), &n1,
948 limit - 1);
949
950 if (!source_stmt)
951 return NULL;
952
953 n1.type = type;
954 if (!n1.base_addr)
955 n1.range = sz / BITS_PER_UNIT;
956
957 if (i == 0)
958 {
959 ins_stmt = source_stmt;
960 *n = n1;
961 }
962 else
963 {
964 if (n->vuse != n1.vuse)
965 return NULL;
966
967 struct symbolic_number n0 = *n;
968
969 if (!BYTES_BIG_ENDIAN)
970 {
971 if (!do_shift_rotate (LSHIFT_EXPR, &n1, i * eltsz))
972 return NULL;
973 }
974 else if (!do_shift_rotate (LSHIFT_EXPR, &n0, eltsz))
975 return NULL;
976 ins_stmt
04eccbbe
JJ
977 = perform_symbolic_merge (ins_stmt, &n0, source_stmt, &n1, n,
978 BIT_IOR_EXPR);
cd676dfa
JJ
979
980 if (!ins_stmt)
981 return NULL;
982 }
983 }
984 }
4b84d9b8
JJ
985
986 uint64_t cmpxchg, cmpnop;
b320edc0 987 find_bswap_or_nop_finalize (n, &cmpxchg, &cmpnop, cast64_to_32);
4b84d9b8 988
dffec8eb
JJ
989 /* A complete byte swap should make the symbolic number to start with
990 the largest digit in the highest order byte. Unchanged symbolic
991 number indicates a read with same endianness as target architecture. */
b320edc0 992 *mask = ~(uint64_t) 0;
dffec8eb
JJ
993 if (n->n == cmpnop)
994 *bswap = false;
995 else if (n->n == cmpxchg)
996 *bswap = true;
997 else
b320edc0
JJ
998 {
999 int set = 0;
1000 for (uint64_t msk = MARKER_MASK; msk; msk <<= BITS_PER_MARKER)
1001 if ((n->n & msk) == 0)
1002 *mask &= ~msk;
1003 else if ((n->n & msk) == (cmpxchg & msk))
1004 set++;
1005 else
1006 return NULL;
1007 if (set < 2)
1008 return NULL;
1009 *bswap = true;
1010 }
dffec8eb
JJ
1011
1012 /* Useless bit manipulation performed by code. */
1013 if (!n->base_addr && n->n == cmpnop && n->n_ops == 1)
1014 return NULL;
1015
dffec8eb
JJ
1016 return ins_stmt;
1017}
1018
1019const pass_data pass_data_optimize_bswap =
1020{
1021 GIMPLE_PASS, /* type */
1022 "bswap", /* name */
1023 OPTGROUP_NONE, /* optinfo_flags */
1024 TV_NONE, /* tv_id */
1025 PROP_ssa, /* properties_required */
1026 0, /* properties_provided */
1027 0, /* properties_destroyed */
1028 0, /* todo_flags_start */
1029 0, /* todo_flags_finish */
1030};
1031
1032class pass_optimize_bswap : public gimple_opt_pass
1033{
1034public:
1035 pass_optimize_bswap (gcc::context *ctxt)
1036 : gimple_opt_pass (pass_data_optimize_bswap, ctxt)
1037 {}
1038
1039 /* opt_pass methods: */
1040 virtual bool gate (function *)
1041 {
1042 return flag_expensive_optimizations && optimize && BITS_PER_UNIT == 8;
1043 }
1044
1045 virtual unsigned int execute (function *);
1046
1047}; // class pass_optimize_bswap
1048
d02a8b63
JJ
1049/* Helper function for bswap_replace. Build VIEW_CONVERT_EXPR from
1050 VAL to TYPE. If VAL has different type size, emit a NOP_EXPR cast
1051 first. */
1052
1053static tree
45ff1251
JJ
1054bswap_view_convert (gimple_stmt_iterator *gsi, tree type, tree val,
1055 bool before)
d02a8b63 1056{
a4001578
JJ
1057 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (val))
1058 || POINTER_TYPE_P (TREE_TYPE (val)));
d02a8b63
JJ
1059 if (TYPE_SIZE (type) != TYPE_SIZE (TREE_TYPE (val)))
1060 {
1061 HOST_WIDE_INT prec = TREE_INT_CST_LOW (TYPE_SIZE (type));
a4001578
JJ
1062 if (POINTER_TYPE_P (TREE_TYPE (val)))
1063 {
1064 gimple *g
1065 = gimple_build_assign (make_ssa_name (pointer_sized_int_node),
1066 NOP_EXPR, val);
45ff1251
JJ
1067 if (before)
1068 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1069 else
1070 gsi_insert_after (gsi, g, GSI_NEW_STMT);
a4001578
JJ
1071 val = gimple_assign_lhs (g);
1072 }
d02a8b63
JJ
1073 tree itype = build_nonstandard_integer_type (prec, 1);
1074 gimple *g = gimple_build_assign (make_ssa_name (itype), NOP_EXPR, val);
45ff1251
JJ
1075 if (before)
1076 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1077 else
1078 gsi_insert_after (gsi, g, GSI_NEW_STMT);
d02a8b63
JJ
1079 val = gimple_assign_lhs (g);
1080 }
1081 return build1 (VIEW_CONVERT_EXPR, type, val);
1082}
1083
dffec8eb 1084/* Perform the bswap optimization: replace the expression computed in the rhs
4b84d9b8
JJ
1085 of gsi_stmt (GSI) (or if NULL add instead of replace) by an equivalent
1086 bswap, load or load + bswap expression.
dffec8eb
JJ
1087 Which of these alternatives replace the rhs is given by N->base_addr (non
1088 null if a load is needed) and BSWAP. The type, VUSE and set-alias of the
1089 load to perform are also given in N while the builtin bswap invoke is given
4b84d9b8
JJ
1090 in FNDEL. Finally, if a load is involved, INS_STMT refers to one of the
1091 load statements involved to construct the rhs in gsi_stmt (GSI) and
1092 N->range gives the size of the rhs expression for maintaining some
1093 statistics.
dffec8eb 1094
4b84d9b8
JJ
1095 Note that if the replacement involve a load and if gsi_stmt (GSI) is
1096 non-NULL, that stmt is moved just after INS_STMT to do the load with the
1097 same VUSE which can lead to gsi_stmt (GSI) changing of basic block. */
dffec8eb 1098
4b84d9b8
JJ
1099tree
1100bswap_replace (gimple_stmt_iterator gsi, gimple *ins_stmt, tree fndecl,
dffec8eb 1101 tree bswap_type, tree load_type, struct symbolic_number *n,
b320edc0 1102 bool bswap, uint64_t mask)
dffec8eb 1103{
4b84d9b8 1104 tree src, tmp, tgt = NULL_TREE;
b320edc0 1105 gimple *bswap_stmt, *mask_stmt = NULL;
cd676dfa 1106 tree_code conv_code = NOP_EXPR;
dffec8eb 1107
4b84d9b8 1108 gimple *cur_stmt = gsi_stmt (gsi);
dffec8eb 1109 src = n->src;
4b84d9b8 1110 if (cur_stmt)
cd676dfa
JJ
1111 {
1112 tgt = gimple_assign_lhs (cur_stmt);
1113 if (gimple_assign_rhs_code (cur_stmt) == CONSTRUCTOR
1114 && tgt
1115 && VECTOR_TYPE_P (TREE_TYPE (tgt)))
1116 conv_code = VIEW_CONVERT_EXPR;
1117 }
dffec8eb
JJ
1118
1119 /* Need to load the value from memory first. */
1120 if (n->base_addr)
1121 {
4b84d9b8
JJ
1122 gimple_stmt_iterator gsi_ins = gsi;
1123 if (ins_stmt)
1124 gsi_ins = gsi_for_stmt (ins_stmt);
dffec8eb
JJ
1125 tree addr_expr, addr_tmp, val_expr, val_tmp;
1126 tree load_offset_ptr, aligned_load_type;
4b84d9b8
JJ
1127 gimple *load_stmt;
1128 unsigned align = get_object_alignment (src);
4a022c70 1129 poly_int64 load_offset = 0;
dffec8eb 1130
4b84d9b8
JJ
1131 if (cur_stmt)
1132 {
1133 basic_block ins_bb = gimple_bb (ins_stmt);
1134 basic_block cur_bb = gimple_bb (cur_stmt);
1135 if (!dominated_by_p (CDI_DOMINATORS, cur_bb, ins_bb))
1136 return NULL_TREE;
1137
1138 /* Move cur_stmt just before one of the load of the original
1139 to ensure it has the same VUSE. See PR61517 for what could
1140 go wrong. */
1141 if (gimple_bb (cur_stmt) != gimple_bb (ins_stmt))
1142 reset_flow_sensitive_info (gimple_assign_lhs (cur_stmt));
1143 gsi_move_before (&gsi, &gsi_ins);
1144 gsi = gsi_for_stmt (cur_stmt);
1145 }
1146 else
1147 gsi = gsi_ins;
dffec8eb
JJ
1148
1149 /* Compute address to load from and cast according to the size
1150 of the load. */
4b84d9b8 1151 addr_expr = build_fold_addr_expr (src);
dffec8eb 1152 if (is_gimple_mem_ref_addr (addr_expr))
4b84d9b8 1153 addr_tmp = unshare_expr (addr_expr);
dffec8eb
JJ
1154 else
1155 {
4b84d9b8
JJ
1156 addr_tmp = unshare_expr (n->base_addr);
1157 if (!is_gimple_mem_ref_addr (addr_tmp))
1158 addr_tmp = force_gimple_operand_gsi_1 (&gsi, addr_tmp,
1159 is_gimple_mem_ref_addr,
1160 NULL_TREE, true,
1161 GSI_SAME_STMT);
1162 load_offset = n->bytepos;
1163 if (n->offset)
1164 {
1165 tree off
1166 = force_gimple_operand_gsi (&gsi, unshare_expr (n->offset),
1167 true, NULL_TREE, true,
1168 GSI_SAME_STMT);
1169 gimple *stmt
1170 = gimple_build_assign (make_ssa_name (TREE_TYPE (addr_tmp)),
1171 POINTER_PLUS_EXPR, addr_tmp, off);
1172 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1173 addr_tmp = gimple_assign_lhs (stmt);
1174 }
dffec8eb
JJ
1175 }
1176
1177 /* Perform the load. */
1178 aligned_load_type = load_type;
1179 if (align < TYPE_ALIGN (load_type))
1180 aligned_load_type = build_aligned_type (load_type, align);
1181 load_offset_ptr = build_int_cst (n->alias_set, load_offset);
1182 val_expr = fold_build2 (MEM_REF, aligned_load_type, addr_tmp,
1183 load_offset_ptr);
1184
1185 if (!bswap)
1186 {
1187 if (n->range == 16)
1188 nop_stats.found_16bit++;
1189 else if (n->range == 32)
1190 nop_stats.found_32bit++;
1191 else
1192 {
1193 gcc_assert (n->range == 64);
1194 nop_stats.found_64bit++;
1195 }
1196
1197 /* Convert the result of load if necessary. */
4b84d9b8 1198 if (tgt && !useless_type_conversion_p (TREE_TYPE (tgt), load_type))
dffec8eb
JJ
1199 {
1200 val_tmp = make_temp_ssa_name (aligned_load_type, NULL,
1201 "load_dst");
1202 load_stmt = gimple_build_assign (val_tmp, val_expr);
1203 gimple_set_vuse (load_stmt, n->vuse);
1204 gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
cd676dfa 1205 if (conv_code == VIEW_CONVERT_EXPR)
45ff1251
JJ
1206 val_tmp = bswap_view_convert (&gsi, TREE_TYPE (tgt), val_tmp,
1207 true);
cd676dfa 1208 gimple_assign_set_rhs_with_ops (&gsi, conv_code, val_tmp);
4b84d9b8 1209 update_stmt (cur_stmt);
dffec8eb 1210 }
4b84d9b8 1211 else if (cur_stmt)
dffec8eb
JJ
1212 {
1213 gimple_assign_set_rhs_with_ops (&gsi, MEM_REF, val_expr);
1214 gimple_set_vuse (cur_stmt, n->vuse);
4b84d9b8
JJ
1215 update_stmt (cur_stmt);
1216 }
1217 else
1218 {
1219 tgt = make_ssa_name (load_type);
1220 cur_stmt = gimple_build_assign (tgt, MEM_REF, val_expr);
1221 gimple_set_vuse (cur_stmt, n->vuse);
1222 gsi_insert_before (&gsi, cur_stmt, GSI_SAME_STMT);
dffec8eb 1223 }
dffec8eb
JJ
1224
1225 if (dump_file)
1226 {
1227 fprintf (dump_file,
1228 "%d bit load in target endianness found at: ",
1229 (int) n->range);
1230 print_gimple_stmt (dump_file, cur_stmt, 0);
1231 }
4b84d9b8 1232 return tgt;
dffec8eb
JJ
1233 }
1234 else
1235 {
1236 val_tmp = make_temp_ssa_name (aligned_load_type, NULL, "load_dst");
1237 load_stmt = gimple_build_assign (val_tmp, val_expr);
1238 gimple_set_vuse (load_stmt, n->vuse);
1239 gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
1240 }
1241 src = val_tmp;
1242 }
1243 else if (!bswap)
1244 {
4b84d9b8
JJ
1245 gimple *g = NULL;
1246 if (tgt && !useless_type_conversion_p (TREE_TYPE (tgt), TREE_TYPE (src)))
dffec8eb
JJ
1247 {
1248 if (!is_gimple_val (src))
4b84d9b8 1249 return NULL_TREE;
cd676dfa 1250 if (conv_code == VIEW_CONVERT_EXPR)
45ff1251 1251 src = bswap_view_convert (&gsi, TREE_TYPE (tgt), src, true);
cd676dfa 1252 g = gimple_build_assign (tgt, conv_code, src);
dffec8eb 1253 }
4b84d9b8 1254 else if (cur_stmt)
dffec8eb 1255 g = gimple_build_assign (tgt, src);
4b84d9b8
JJ
1256 else
1257 tgt = src;
dffec8eb
JJ
1258 if (n->range == 16)
1259 nop_stats.found_16bit++;
1260 else if (n->range == 32)
1261 nop_stats.found_32bit++;
1262 else
1263 {
1264 gcc_assert (n->range == 64);
1265 nop_stats.found_64bit++;
1266 }
1267 if (dump_file)
1268 {
1269 fprintf (dump_file,
1270 "%d bit reshuffle in target endianness found at: ",
1271 (int) n->range);
4b84d9b8
JJ
1272 if (cur_stmt)
1273 print_gimple_stmt (dump_file, cur_stmt, 0);
1274 else
1275 {
4af78ef8 1276 print_generic_expr (dump_file, tgt, TDF_NONE);
4b84d9b8
JJ
1277 fprintf (dump_file, "\n");
1278 }
dffec8eb 1279 }
4b84d9b8
JJ
1280 if (cur_stmt)
1281 gsi_replace (&gsi, g, true);
1282 return tgt;
dffec8eb
JJ
1283 }
1284 else if (TREE_CODE (src) == BIT_FIELD_REF)
1285 src = TREE_OPERAND (src, 0);
1286
1287 if (n->range == 16)
1288 bswap_stats.found_16bit++;
1289 else if (n->range == 32)
1290 bswap_stats.found_32bit++;
1291 else
1292 {
1293 gcc_assert (n->range == 64);
1294 bswap_stats.found_64bit++;
1295 }
1296
1297 tmp = src;
1298
1299 /* Convert the src expression if necessary. */
1300 if (!useless_type_conversion_p (TREE_TYPE (tmp), bswap_type))
1301 {
1302 gimple *convert_stmt;
1303
1304 tmp = make_temp_ssa_name (bswap_type, NULL, "bswapsrc");
1305 convert_stmt = gimple_build_assign (tmp, NOP_EXPR, src);
1306 gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT);
1307 }
1308
1309 /* Canonical form for 16 bit bswap is a rotate expression. Only 16bit values
1310 are considered as rotation of 2N bit values by N bits is generally not
1311 equivalent to a bswap. Consider for instance 0x01020304 r>> 16 which
1312 gives 0x03040102 while a bswap for that value is 0x04030201. */
1313 if (bswap && n->range == 16)
1314 {
1315 tree count = build_int_cst (NULL, BITS_PER_UNIT);
1316 src = fold_build2 (LROTATE_EXPR, bswap_type, tmp, count);
1317 bswap_stmt = gimple_build_assign (NULL, src);
1318 }
1319 else
1320 bswap_stmt = gimple_build_call (fndecl, 1, tmp);
1321
4b84d9b8
JJ
1322 if (tgt == NULL_TREE)
1323 tgt = make_ssa_name (bswap_type);
dffec8eb
JJ
1324 tmp = tgt;
1325
b320edc0
JJ
1326 if (mask != ~(uint64_t) 0)
1327 {
1328 tree m = build_int_cst (bswap_type, mask);
1329 tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
1330 gimple_set_lhs (bswap_stmt, tmp);
1331 mask_stmt = gimple_build_assign (tgt, BIT_AND_EXPR, tmp, m);
1332 tmp = tgt;
1333 }
1334
dffec8eb
JJ
1335 /* Convert the result if necessary. */
1336 if (!useless_type_conversion_p (TREE_TYPE (tgt), bswap_type))
1337 {
dffec8eb 1338 tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
cd676dfa 1339 tree atmp = tmp;
45ff1251 1340 gimple_stmt_iterator gsi2 = gsi;
cd676dfa 1341 if (conv_code == VIEW_CONVERT_EXPR)
45ff1251
JJ
1342 atmp = bswap_view_convert (&gsi2, TREE_TYPE (tgt), tmp, false);
1343 gimple *convert_stmt = gimple_build_assign (tgt, conv_code, atmp);
1344 gsi_insert_after (&gsi2, convert_stmt, GSI_SAME_STMT);
dffec8eb
JJ
1345 }
1346
b320edc0 1347 gimple_set_lhs (mask_stmt ? mask_stmt : bswap_stmt, tmp);
dffec8eb
JJ
1348
1349 if (dump_file)
1350 {
1351 fprintf (dump_file, "%d bit bswap implementation found at: ",
1352 (int) n->range);
4b84d9b8
JJ
1353 if (cur_stmt)
1354 print_gimple_stmt (dump_file, cur_stmt, 0);
1355 else
1356 {
4af78ef8 1357 print_generic_expr (dump_file, tgt, TDF_NONE);
4b84d9b8
JJ
1358 fprintf (dump_file, "\n");
1359 }
dffec8eb
JJ
1360 }
1361
4b84d9b8
JJ
1362 if (cur_stmt)
1363 {
b320edc0
JJ
1364 if (mask_stmt)
1365 gsi_insert_after (&gsi, mask_stmt, GSI_SAME_STMT);
4b84d9b8
JJ
1366 gsi_insert_after (&gsi, bswap_stmt, GSI_SAME_STMT);
1367 gsi_remove (&gsi, true);
1368 }
1369 else
b320edc0
JJ
1370 {
1371 gsi_insert_before (&gsi, bswap_stmt, GSI_SAME_STMT);
1372 if (mask_stmt)
1373 gsi_insert_before (&gsi, mask_stmt, GSI_SAME_STMT);
1374 }
4b84d9b8 1375 return tgt;
dffec8eb
JJ
1376}
1377
a7553ad6
JJ
1378/* Try to optimize an assignment CUR_STMT with CONSTRUCTOR on the rhs
1379 using bswap optimizations. CDI_DOMINATORS need to be
1380 computed on entry. Return true if it has been optimized and
1381 TODO_update_ssa is needed. */
1382
1383static bool
1384maybe_optimize_vector_constructor (gimple *cur_stmt)
1385{
1386 tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
1387 struct symbolic_number n;
1388 bool bswap;
1389
1390 gcc_assert (is_gimple_assign (cur_stmt)
1391 && gimple_assign_rhs_code (cur_stmt) == CONSTRUCTOR);
1392
1393 tree rhs = gimple_assign_rhs1 (cur_stmt);
1394 if (!VECTOR_TYPE_P (TREE_TYPE (rhs))
1395 || !INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (rhs)))
1396 || gimple_assign_lhs (cur_stmt) == NULL_TREE)
1397 return false;
1398
1399 HOST_WIDE_INT sz = int_size_in_bytes (TREE_TYPE (rhs)) * BITS_PER_UNIT;
1400 switch (sz)
1401 {
1402 case 16:
1403 load_type = bswap_type = uint16_type_node;
1404 break;
1405 case 32:
1406 if (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
1407 && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing)
1408 {
1409 load_type = uint32_type_node;
1410 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1411 bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1412 }
1413 else
1414 return false;
1415 break;
1416 case 64:
1417 if (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
1418 && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
1419 || (word_mode == SImode
1420 && builtin_decl_explicit_p (BUILT_IN_BSWAP32)
1421 && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing)))
1422 {
1423 load_type = uint64_type_node;
1424 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1425 bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1426 }
1427 else
1428 return false;
1429 break;
1430 default:
1431 return false;
1432 }
1433
b320edc0
JJ
1434 bool cast64_to_32;
1435 uint64_t mask;
1436 gimple *ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap,
1437 &cast64_to_32, &mask);
1438 if (!ins_stmt
1439 || n.range != (unsigned HOST_WIDE_INT) sz
1440 || cast64_to_32
1441 || mask != ~(uint64_t) 0)
a7553ad6
JJ
1442 return false;
1443
1444 if (bswap && !fndecl && n.range != 16)
1445 return false;
1446
1447 memset (&nop_stats, 0, sizeof (nop_stats));
1448 memset (&bswap_stats, 0, sizeof (bswap_stats));
1449 return bswap_replace (gsi_for_stmt (cur_stmt), ins_stmt, fndecl,
b320edc0 1450 bswap_type, load_type, &n, bswap, mask) != NULL_TREE;
a7553ad6
JJ
1451}
1452
dffec8eb
JJ
1453/* Find manual byte swap implementations as well as load in a given
1454 endianness. Byte swaps are turned into a bswap builtin invokation
1455 while endian loads are converted to bswap builtin invokation or
1456 simple load according to the target endianness. */
1457
1458unsigned int
1459pass_optimize_bswap::execute (function *fun)
1460{
1461 basic_block bb;
1462 bool bswap32_p, bswap64_p;
1463 bool changed = false;
1464 tree bswap32_type = NULL_TREE, bswap64_type = NULL_TREE;
1465
1466 bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
1467 && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing);
1468 bswap64_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
1469 && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
1470 || (bswap32_p && word_mode == SImode)));
1471
1472 /* Determine the argument type of the builtins. The code later on
1473 assumes that the return and argument type are the same. */
1474 if (bswap32_p)
1475 {
1476 tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1477 bswap32_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1478 }
1479
1480 if (bswap64_p)
1481 {
1482 tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1483 bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1484 }
1485
1486 memset (&nop_stats, 0, sizeof (nop_stats));
1487 memset (&bswap_stats, 0, sizeof (bswap_stats));
1488 calculate_dominance_info (CDI_DOMINATORS);
1489
1490 FOR_EACH_BB_FN (bb, fun)
1491 {
1492 gimple_stmt_iterator gsi;
1493
1494 /* We do a reverse scan for bswap patterns to make sure we get the
1495 widest match. As bswap pattern matching doesn't handle previously
1496 inserted smaller bswap replacements as sub-patterns, the wider
1497 variant wouldn't be detected. */
1498 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi);)
1499 {
1500 gimple *ins_stmt, *cur_stmt = gsi_stmt (gsi);
1501 tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
1502 enum tree_code code;
1503 struct symbolic_number n;
b320edc0
JJ
1504 bool bswap, cast64_to_32;
1505 uint64_t mask;
dffec8eb
JJ
1506
1507 /* This gsi_prev (&gsi) is not part of the for loop because cur_stmt
1508 might be moved to a different basic block by bswap_replace and gsi
1509 must not points to it if that's the case. Moving the gsi_prev
1510 there make sure that gsi points to the statement previous to
1511 cur_stmt while still making sure that all statements are
1512 considered in this basic block. */
1513 gsi_prev (&gsi);
1514
1515 if (!is_gimple_assign (cur_stmt))
1516 continue;
1517
1518 code = gimple_assign_rhs_code (cur_stmt);
1519 switch (code)
1520 {
1521 case LROTATE_EXPR:
1522 case RROTATE_EXPR:
1523 if (!tree_fits_uhwi_p (gimple_assign_rhs2 (cur_stmt))
1524 || tree_to_uhwi (gimple_assign_rhs2 (cur_stmt))
1525 % BITS_PER_UNIT)
1526 continue;
1527 /* Fall through. */
1528 case BIT_IOR_EXPR:
a944b5de
RS
1529 case BIT_XOR_EXPR:
1530 case PLUS_EXPR:
dffec8eb 1531 break;
cd676dfa
JJ
1532 case CONSTRUCTOR:
1533 {
1534 tree rhs = gimple_assign_rhs1 (cur_stmt);
1535 if (VECTOR_TYPE_P (TREE_TYPE (rhs))
1536 && INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (rhs))))
1537 break;
1538 }
1539 continue;
dffec8eb
JJ
1540 default:
1541 continue;
1542 }
1543
b320edc0
JJ
1544 ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap,
1545 &cast64_to_32, &mask);
dffec8eb
JJ
1546
1547 if (!ins_stmt)
1548 continue;
1549
1550 switch (n.range)
1551 {
1552 case 16:
1553 /* Already in canonical form, nothing to do. */
1554 if (code == LROTATE_EXPR || code == RROTATE_EXPR)
1555 continue;
1556 load_type = bswap_type = uint16_type_node;
1557 break;
1558 case 32:
1559 load_type = uint32_type_node;
1560 if (bswap32_p)
1561 {
1562 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1563 bswap_type = bswap32_type;
1564 }
1565 break;
1566 case 64:
1567 load_type = uint64_type_node;
1568 if (bswap64_p)
1569 {
1570 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1571 bswap_type = bswap64_type;
1572 }
1573 break;
1574 default:
1575 continue;
1576 }
1577
1578 if (bswap && !fndecl && n.range != 16)
1579 continue;
1580
4b84d9b8 1581 if (bswap_replace (gsi_for_stmt (cur_stmt), ins_stmt, fndecl,
b320edc0 1582 bswap_type, load_type, &n, bswap, mask))
dffec8eb
JJ
1583 changed = true;
1584 }
1585 }
1586
1587 statistics_counter_event (fun, "16-bit nop implementations found",
1588 nop_stats.found_16bit);
1589 statistics_counter_event (fun, "32-bit nop implementations found",
1590 nop_stats.found_32bit);
1591 statistics_counter_event (fun, "64-bit nop implementations found",
1592 nop_stats.found_64bit);
1593 statistics_counter_event (fun, "16-bit bswap implementations found",
1594 bswap_stats.found_16bit);
1595 statistics_counter_event (fun, "32-bit bswap implementations found",
1596 bswap_stats.found_32bit);
1597 statistics_counter_event (fun, "64-bit bswap implementations found",
1598 bswap_stats.found_64bit);
1599
1600 return (changed ? TODO_update_ssa : 0);
1601}
1602
1603} // anon namespace
1604
1605gimple_opt_pass *
1606make_pass_optimize_bswap (gcc::context *ctxt)
1607{
1608 return new pass_optimize_bswap (ctxt);
1609}
1610
1611namespace {
1612
245f6de1 1613/* Struct recording one operand for the store, which is either a constant,
c94c3532
EB
1614 then VAL represents the constant and all the other fields are zero, or
1615 a memory load, then VAL represents the reference, BASE_ADDR is non-NULL
1616 and the other fields also reflect the memory load, or an SSA name, then
1617 VAL represents the SSA name and all the other fields are zero, */
245f6de1 1618
6c1dae73 1619class store_operand_info
245f6de1 1620{
6c1dae73 1621public:
245f6de1
JJ
1622 tree val;
1623 tree base_addr;
8a91d545
RS
1624 poly_uint64 bitsize;
1625 poly_uint64 bitpos;
1626 poly_uint64 bitregion_start;
1627 poly_uint64 bitregion_end;
245f6de1 1628 gimple *stmt;
383ac8dc 1629 bool bit_not_p;
245f6de1
JJ
1630 store_operand_info ();
1631};
1632
1633store_operand_info::store_operand_info ()
1634 : val (NULL_TREE), base_addr (NULL_TREE), bitsize (0), bitpos (0),
383ac8dc 1635 bitregion_start (0), bitregion_end (0), stmt (NULL), bit_not_p (false)
245f6de1
JJ
1636{
1637}
1638
f663d9ad
KT
1639/* Struct recording the information about a single store of an immediate
1640 to memory. These are created in the first phase and coalesced into
1641 merged_store_group objects in the second phase. */
1642
6c1dae73 1643class store_immediate_info
f663d9ad 1644{
6c1dae73 1645public:
f663d9ad
KT
1646 unsigned HOST_WIDE_INT bitsize;
1647 unsigned HOST_WIDE_INT bitpos;
a62b3dc5
JJ
1648 unsigned HOST_WIDE_INT bitregion_start;
1649 /* This is one past the last bit of the bit region. */
1650 unsigned HOST_WIDE_INT bitregion_end;
f663d9ad
KT
1651 gimple *stmt;
1652 unsigned int order;
e362a897
EB
1653 /* INTEGER_CST for constant store, STRING_CST for string store,
1654 MEM_REF for memory copy, BIT_*_EXPR for logical bitwise operation,
1655 BIT_INSERT_EXPR for bit insertion.
4b84d9b8
JJ
1656 LROTATE_EXPR if it can be only bswap optimized and
1657 ops are not really meaningful.
1658 NOP_EXPR if bswap optimization detected identity, ops
1659 are not meaningful. */
245f6de1 1660 enum tree_code rhs_code;
4b84d9b8
JJ
1661 /* Two fields for bswap optimization purposes. */
1662 struct symbolic_number n;
1663 gimple *ins_stmt;
127ef369 1664 /* True if BIT_{AND,IOR,XOR}_EXPR result is inverted before storing. */
d60edaba 1665 bool bit_not_p;
127ef369
JJ
1666 /* True if ops have been swapped and thus ops[1] represents
1667 rhs1 of BIT_{AND,IOR,XOR}_EXPR and ops[0] represents rhs2. */
1668 bool ops_swapped_p;
629387a6
EB
1669 /* The index number of the landing pad, or 0 if there is none. */
1670 int lp_nr;
245f6de1
JJ
1671 /* Operands. For BIT_*_EXPR rhs_code both operands are used, otherwise
1672 just the first one. */
1673 store_operand_info ops[2];
b5926e23 1674 store_immediate_info (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
a62b3dc5 1675 unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
4b84d9b8 1676 gimple *, unsigned int, enum tree_code,
629387a6 1677 struct symbolic_number &, gimple *, bool, int,
245f6de1
JJ
1678 const store_operand_info &,
1679 const store_operand_info &);
f663d9ad
KT
1680};
1681
1682store_immediate_info::store_immediate_info (unsigned HOST_WIDE_INT bs,
b5926e23 1683 unsigned HOST_WIDE_INT bp,
a62b3dc5
JJ
1684 unsigned HOST_WIDE_INT brs,
1685 unsigned HOST_WIDE_INT bre,
b5926e23 1686 gimple *st,
245f6de1
JJ
1687 unsigned int ord,
1688 enum tree_code rhscode,
4b84d9b8
JJ
1689 struct symbolic_number &nr,
1690 gimple *ins_stmtp,
d60edaba 1691 bool bitnotp,
629387a6 1692 int nr2,
245f6de1
JJ
1693 const store_operand_info &op0r,
1694 const store_operand_info &op1r)
a62b3dc5 1695 : bitsize (bs), bitpos (bp), bitregion_start (brs), bitregion_end (bre),
4b84d9b8 1696 stmt (st), order (ord), rhs_code (rhscode), n (nr),
629387a6 1697 ins_stmt (ins_stmtp), bit_not_p (bitnotp), ops_swapped_p (false),
4bc6fb21 1698 lp_nr (nr2), ops { op0r, op1r }
245f6de1
JJ
1699{
1700}
f663d9ad
KT
1701
1702/* Struct representing a group of stores to contiguous memory locations.
1703 These are produced by the second phase (coalescing) and consumed in the
1704 third phase that outputs the widened stores. */
1705
6c1dae73 1706class merged_store_group
f663d9ad 1707{
6c1dae73 1708public:
f663d9ad
KT
1709 unsigned HOST_WIDE_INT start;
1710 unsigned HOST_WIDE_INT width;
a62b3dc5
JJ
1711 unsigned HOST_WIDE_INT bitregion_start;
1712 unsigned HOST_WIDE_INT bitregion_end;
1713 /* The size of the allocated memory for val and mask. */
f663d9ad 1714 unsigned HOST_WIDE_INT buf_size;
a62b3dc5 1715 unsigned HOST_WIDE_INT align_base;
8a91d545 1716 poly_uint64 load_align_base[2];
f663d9ad
KT
1717
1718 unsigned int align;
245f6de1 1719 unsigned int load_align[2];
f663d9ad
KT
1720 unsigned int first_order;
1721 unsigned int last_order;
7f5a3982 1722 bool bit_insertion;
e362a897 1723 bool string_concatenation;
18e0c3d1 1724 bool only_constants;
1b3c9813 1725 bool consecutive;
18e0c3d1 1726 unsigned int first_nonmergeable_order;
629387a6 1727 int lp_nr;
f663d9ad 1728
a62b3dc5 1729 auto_vec<store_immediate_info *> stores;
f663d9ad
KT
1730 /* We record the first and last original statements in the sequence because
1731 we'll need their vuse/vdef and replacement position. It's easier to keep
1732 track of them separately as 'stores' is reordered by apply_stores. */
1733 gimple *last_stmt;
1734 gimple *first_stmt;
1735 unsigned char *val;
a62b3dc5 1736 unsigned char *mask;
f663d9ad
KT
1737
1738 merged_store_group (store_immediate_info *);
1739 ~merged_store_group ();
7f5a3982 1740 bool can_be_merged_into (store_immediate_info *);
f663d9ad
KT
1741 void merge_into (store_immediate_info *);
1742 void merge_overlapping (store_immediate_info *);
1743 bool apply_stores ();
a62b3dc5
JJ
1744private:
1745 void do_merge (store_immediate_info *);
f663d9ad
KT
1746};
1747
1748/* Debug helper. Dump LEN elements of byte array PTR to FD in hex. */
1749
1750static void
1751dump_char_array (FILE *fd, unsigned char *ptr, unsigned int len)
1752{
1753 if (!fd)
1754 return;
1755
1756 for (unsigned int i = 0; i < len; i++)
c94c3532 1757 fprintf (fd, "%02x ", ptr[i]);
f663d9ad
KT
1758 fprintf (fd, "\n");
1759}
1760
f663d9ad
KT
1761/* Clear out LEN bits starting from bit START in the byte array
1762 PTR. This clears the bits to the *right* from START.
1763 START must be within [0, BITS_PER_UNIT) and counts starting from
1764 the least significant bit. */
1765
1766static void
1767clear_bit_region_be (unsigned char *ptr, unsigned int start,
1768 unsigned int len)
1769{
1770 if (len == 0)
1771 return;
1772 /* Clear len bits to the right of start. */
1773 else if (len <= start + 1)
1774 {
1775 unsigned char mask = (~(~0U << len));
1776 mask = mask << (start + 1U - len);
1777 ptr[0] &= ~mask;
1778 }
1779 else if (start != BITS_PER_UNIT - 1)
1780 {
1781 clear_bit_region_be (ptr, start, (start % BITS_PER_UNIT) + 1);
1782 clear_bit_region_be (ptr + 1, BITS_PER_UNIT - 1,
1783 len - (start % BITS_PER_UNIT) - 1);
1784 }
1785 else if (start == BITS_PER_UNIT - 1
1786 && len > BITS_PER_UNIT)
1787 {
1788 unsigned int nbytes = len / BITS_PER_UNIT;
a62b3dc5 1789 memset (ptr, 0, nbytes);
f663d9ad
KT
1790 if (len % BITS_PER_UNIT != 0)
1791 clear_bit_region_be (ptr + nbytes, BITS_PER_UNIT - 1,
1792 len % BITS_PER_UNIT);
1793 }
1794 else
1795 gcc_unreachable ();
1796}
1797
1798/* In the byte array PTR clear the bit region starting at bit
1799 START and is LEN bits wide.
1800 For regions spanning multiple bytes do this recursively until we reach
1801 zero LEN or a region contained within a single byte. */
1802
1803static void
1804clear_bit_region (unsigned char *ptr, unsigned int start,
1805 unsigned int len)
1806{
1807 /* Degenerate base case. */
1808 if (len == 0)
1809 return;
1810 else if (start >= BITS_PER_UNIT)
1811 clear_bit_region (ptr + 1, start - BITS_PER_UNIT, len);
1812 /* Second base case. */
1813 else if ((start + len) <= BITS_PER_UNIT)
1814 {
46a61395 1815 unsigned char mask = (~0U) << (unsigned char) (BITS_PER_UNIT - len);
f663d9ad
KT
1816 mask >>= BITS_PER_UNIT - (start + len);
1817
1818 ptr[0] &= ~mask;
1819
1820 return;
1821 }
1822 /* Clear most significant bits in a byte and proceed with the next byte. */
1823 else if (start != 0)
1824 {
1825 clear_bit_region (ptr, start, BITS_PER_UNIT - start);
1f069ef5 1826 clear_bit_region (ptr + 1, 0, len - (BITS_PER_UNIT - start));
f663d9ad
KT
1827 }
1828 /* Whole bytes need to be cleared. */
1829 else if (start == 0 && len > BITS_PER_UNIT)
1830 {
1831 unsigned int nbytes = len / BITS_PER_UNIT;
a848c710
KT
1832 /* We could recurse on each byte but we clear whole bytes, so a simple
1833 memset will do. */
46a61395 1834 memset (ptr, '\0', nbytes);
f663d9ad
KT
1835 /* Clear the remaining sub-byte region if there is one. */
1836 if (len % BITS_PER_UNIT != 0)
1837 clear_bit_region (ptr + nbytes, 0, len % BITS_PER_UNIT);
1838 }
1839 else
1840 gcc_unreachable ();
1841}
1842
1843/* Write BITLEN bits of EXPR to the byte array PTR at
1844 bit position BITPOS. PTR should contain TOTAL_BYTES elements.
1845 Return true if the operation succeeded. */
1846
1847static bool
1848encode_tree_to_bitpos (tree expr, unsigned char *ptr, int bitlen, int bitpos,
46a61395 1849 unsigned int total_bytes)
f663d9ad
KT
1850{
1851 unsigned int first_byte = bitpos / BITS_PER_UNIT;
ad1de652
JJ
1852 bool sub_byte_op_p = ((bitlen % BITS_PER_UNIT)
1853 || (bitpos % BITS_PER_UNIT)
f4b31647 1854 || !int_mode_for_size (bitlen, 0).exists ());
3afd514b
JJ
1855 bool empty_ctor_p
1856 = (TREE_CODE (expr) == CONSTRUCTOR
1857 && CONSTRUCTOR_NELTS (expr) == 0
1858 && TYPE_SIZE_UNIT (TREE_TYPE (expr))
1859 && tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (expr))));
f663d9ad
KT
1860
1861 if (!sub_byte_op_p)
3afd514b
JJ
1862 {
1863 if (first_byte >= total_bytes)
1864 return false;
1865 total_bytes -= first_byte;
1866 if (empty_ctor_p)
1867 {
1868 unsigned HOST_WIDE_INT rhs_bytes
1869 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)));
1870 if (rhs_bytes > total_bytes)
1871 return false;
1872 memset (ptr + first_byte, '\0', rhs_bytes);
1873 return true;
1874 }
1875 return native_encode_expr (expr, ptr + first_byte, total_bytes) != 0;
1876 }
f663d9ad
KT
1877
1878 /* LITTLE-ENDIAN
1879 We are writing a non byte-sized quantity or at a position that is not
1880 at a byte boundary.
1881 |--------|--------|--------| ptr + first_byte
1882 ^ ^
1883 xxx xxxxxxxx xxx< bp>
1884 |______EXPR____|
1885
46a61395 1886 First native_encode_expr EXPR into a temporary buffer and shift each
f663d9ad
KT
1887 byte in the buffer by 'bp' (carrying the bits over as necessary).
1888 |00000000|00xxxxxx|xxxxxxxx| << bp = |000xxxxx|xxxxxxxx|xxx00000|
1889 <------bitlen---->< bp>
1890 Then we clear the destination bits:
1891 |---00000|00000000|000-----| ptr + first_byte
1892 <-------bitlen--->< bp>
1893
1894 Finally we ORR the bytes of the shifted EXPR into the cleared region:
1895 |---xxxxx||xxxxxxxx||xxx-----| ptr + first_byte.
1896
1897 BIG-ENDIAN
1898 We are writing a non byte-sized quantity or at a position that is not
1899 at a byte boundary.
1900 ptr + first_byte |--------|--------|--------|
1901 ^ ^
1902 <bp >xxx xxxxxxxx xxx
1903 |_____EXPR_____|
1904
46a61395 1905 First native_encode_expr EXPR into a temporary buffer and shift each
f663d9ad
KT
1906 byte in the buffer to the right by (carrying the bits over as necessary).
1907 We shift by as much as needed to align the most significant bit of EXPR
1908 with bitpos:
1909 |00xxxxxx|xxxxxxxx| >> 3 = |00000xxx|xxxxxxxx|xxxxx000|
1910 <---bitlen----> <bp ><-----bitlen----->
1911 Then we clear the destination bits:
1912 ptr + first_byte |-----000||00000000||00000---|
1913 <bp ><-------bitlen----->
1914
1915 Finally we ORR the bytes of the shifted EXPR into the cleared region:
1916 ptr + first_byte |---xxxxx||xxxxxxxx||xxx-----|.
1917 The awkwardness comes from the fact that bitpos is counted from the
1918 most significant bit of a byte. */
1919
ef1d3b57
RS
1920 /* We must be dealing with fixed-size data at this point, since the
1921 total size is also fixed. */
3afd514b
JJ
1922 unsigned int byte_size;
1923 if (empty_ctor_p)
1924 {
1925 unsigned HOST_WIDE_INT rhs_bytes
1926 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)));
1927 if (rhs_bytes > total_bytes)
1928 return false;
1929 byte_size = rhs_bytes;
1930 }
1931 else
1932 {
1933 fixed_size_mode mode
1934 = as_a <fixed_size_mode> (TYPE_MODE (TREE_TYPE (expr)));
e362a897
EB
1935 byte_size
1936 = mode == BLKmode
1937 ? tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)))
1938 : GET_MODE_SIZE (mode);
3afd514b 1939 }
f663d9ad 1940 /* Allocate an extra byte so that we have space to shift into. */
3afd514b 1941 byte_size++;
f663d9ad 1942 unsigned char *tmpbuf = XALLOCAVEC (unsigned char, byte_size);
46a61395 1943 memset (tmpbuf, '\0', byte_size);
f663d9ad 1944 /* The store detection code should only have allowed constants that are
3afd514b
JJ
1945 accepted by native_encode_expr or empty ctors. */
1946 if (!empty_ctor_p
1947 && native_encode_expr (expr, tmpbuf, byte_size - 1) == 0)
f663d9ad
KT
1948 gcc_unreachable ();
1949
1950 /* The native_encode_expr machinery uses TYPE_MODE to determine how many
1951 bytes to write. This means it can write more than
1952 ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT bytes (for example
1953 write 8 bytes for a bitlen of 40). Skip the bytes that are not within
1954 bitlen and zero out the bits that are not relevant as well (that may
1955 contain a sign bit due to sign-extension). */
1956 unsigned int padding
1957 = byte_size - ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT - 1;
ad1de652
JJ
1958 /* On big-endian the padding is at the 'front' so just skip the initial
1959 bytes. */
1960 if (BYTES_BIG_ENDIAN)
1961 tmpbuf += padding;
1962
1963 byte_size -= padding;
1964
1965 if (bitlen % BITS_PER_UNIT != 0)
f663d9ad 1966 {
4b2c06f4 1967 if (BYTES_BIG_ENDIAN)
ad1de652
JJ
1968 clear_bit_region_be (tmpbuf, BITS_PER_UNIT - 1,
1969 BITS_PER_UNIT - (bitlen % BITS_PER_UNIT));
1970 else
1971 clear_bit_region (tmpbuf, bitlen,
1972 byte_size * BITS_PER_UNIT - bitlen);
f663d9ad 1973 }
ad1de652
JJ
1974 /* Left shifting relies on the last byte being clear if bitlen is
1975 a multiple of BITS_PER_UNIT, which might not be clear if
1976 there are padding bytes. */
1977 else if (!BYTES_BIG_ENDIAN)
1978 tmpbuf[byte_size - 1] = '\0';
f663d9ad
KT
1979
1980 /* Clear the bit region in PTR where the bits from TMPBUF will be
46a61395 1981 inserted into. */
f663d9ad
KT
1982 if (BYTES_BIG_ENDIAN)
1983 clear_bit_region_be (ptr + first_byte,
1984 BITS_PER_UNIT - 1 - (bitpos % BITS_PER_UNIT), bitlen);
1985 else
1986 clear_bit_region (ptr + first_byte, bitpos % BITS_PER_UNIT, bitlen);
1987
1988 int shift_amnt;
1989 int bitlen_mod = bitlen % BITS_PER_UNIT;
1990 int bitpos_mod = bitpos % BITS_PER_UNIT;
1991
1992 bool skip_byte = false;
1993 if (BYTES_BIG_ENDIAN)
1994 {
1995 /* BITPOS and BITLEN are exactly aligned and no shifting
1996 is necessary. */
1997 if (bitpos_mod + bitlen_mod == BITS_PER_UNIT
1998 || (bitpos_mod == 0 && bitlen_mod == 0))
1999 shift_amnt = 0;
2000 /* |. . . . . . . .|
2001 <bp > <blen >.
2002 We always shift right for BYTES_BIG_ENDIAN so shift the beginning
2003 of the value until it aligns with 'bp' in the next byte over. */
2004 else if (bitpos_mod + bitlen_mod < BITS_PER_UNIT)
2005 {
2006 shift_amnt = bitlen_mod + bitpos_mod;
2007 skip_byte = bitlen_mod != 0;
2008 }
2009 /* |. . . . . . . .|
2010 <----bp--->
2011 <---blen---->.
2012 Shift the value right within the same byte so it aligns with 'bp'. */
2013 else
2014 shift_amnt = bitlen_mod + bitpos_mod - BITS_PER_UNIT;
2015 }
2016 else
2017 shift_amnt = bitpos % BITS_PER_UNIT;
2018
2019 /* Create the shifted version of EXPR. */
2020 if (!BYTES_BIG_ENDIAN)
46a61395 2021 {
8aba425f 2022 shift_bytes_in_array_left (tmpbuf, byte_size, shift_amnt);
46a61395
JJ
2023 if (shift_amnt == 0)
2024 byte_size--;
2025 }
f663d9ad
KT
2026 else
2027 {
2028 gcc_assert (BYTES_BIG_ENDIAN);
2029 shift_bytes_in_array_right (tmpbuf, byte_size, shift_amnt);
2030 /* If shifting right forced us to move into the next byte skip the now
2031 empty byte. */
2032 if (skip_byte)
2033 {
2034 tmpbuf++;
2035 byte_size--;
2036 }
2037 }
2038
2039 /* Insert the bits from TMPBUF. */
2040 for (unsigned int i = 0; i < byte_size; i++)
2041 ptr[first_byte + i] |= tmpbuf[i];
2042
2043 return true;
2044}
2045
2046/* Sorting function for store_immediate_info objects.
2047 Sorts them by bitposition. */
2048
2049static int
2050sort_by_bitpos (const void *x, const void *y)
2051{
2052 store_immediate_info *const *tmp = (store_immediate_info * const *) x;
2053 store_immediate_info *const *tmp2 = (store_immediate_info * const *) y;
2054
109cca3b 2055 if ((*tmp)->bitpos < (*tmp2)->bitpos)
f663d9ad
KT
2056 return -1;
2057 else if ((*tmp)->bitpos > (*tmp2)->bitpos)
2058 return 1;
109cca3b 2059 else
0f0027d1
KT
2060 /* If they are the same let's use the order which is guaranteed to
2061 be different. */
2062 return (*tmp)->order - (*tmp2)->order;
f663d9ad
KT
2063}
2064
2065/* Sorting function for store_immediate_info objects.
2066 Sorts them by the order field. */
2067
2068static int
2069sort_by_order (const void *x, const void *y)
2070{
2071 store_immediate_info *const *tmp = (store_immediate_info * const *) x;
2072 store_immediate_info *const *tmp2 = (store_immediate_info * const *) y;
2073
2074 if ((*tmp)->order < (*tmp2)->order)
2075 return -1;
2076 else if ((*tmp)->order > (*tmp2)->order)
2077 return 1;
2078
2079 gcc_unreachable ();
2080}
2081
2082/* Initialize a merged_store_group object from a store_immediate_info
2083 object. */
2084
2085merged_store_group::merged_store_group (store_immediate_info *info)
2086{
2087 start = info->bitpos;
2088 width = info->bitsize;
a62b3dc5
JJ
2089 bitregion_start = info->bitregion_start;
2090 bitregion_end = info->bitregion_end;
f663d9ad
KT
2091 /* VAL has memory allocated for it in apply_stores once the group
2092 width has been finalized. */
2093 val = NULL;
a62b3dc5 2094 mask = NULL;
e362a897
EB
2095 bit_insertion = info->rhs_code == BIT_INSERT_EXPR;
2096 string_concatenation = info->rhs_code == STRING_CST;
18e0c3d1 2097 only_constants = info->rhs_code == INTEGER_CST;
1b3c9813 2098 consecutive = true;
18e0c3d1 2099 first_nonmergeable_order = ~0U;
629387a6 2100 lp_nr = info->lp_nr;
a62b3dc5
JJ
2101 unsigned HOST_WIDE_INT align_bitpos = 0;
2102 get_object_alignment_1 (gimple_assign_lhs (info->stmt),
2103 &align, &align_bitpos);
2104 align_base = start - align_bitpos;
245f6de1
JJ
2105 for (int i = 0; i < 2; ++i)
2106 {
2107 store_operand_info &op = info->ops[i];
2108 if (op.base_addr == NULL_TREE)
2109 {
2110 load_align[i] = 0;
2111 load_align_base[i] = 0;
2112 }
2113 else
2114 {
2115 get_object_alignment_1 (op.val, &load_align[i], &align_bitpos);
2116 load_align_base[i] = op.bitpos - align_bitpos;
2117 }
2118 }
f663d9ad
KT
2119 stores.create (1);
2120 stores.safe_push (info);
2121 last_stmt = info->stmt;
2122 last_order = info->order;
2123 first_stmt = last_stmt;
2124 first_order = last_order;
2125 buf_size = 0;
2126}
2127
2128merged_store_group::~merged_store_group ()
2129{
2130 if (val)
2131 XDELETEVEC (val);
2132}
2133
7f5a3982
EB
2134/* Return true if the store described by INFO can be merged into the group. */
2135
2136bool
2137merged_store_group::can_be_merged_into (store_immediate_info *info)
2138{
2139 /* Do not merge bswap patterns. */
2140 if (info->rhs_code == LROTATE_EXPR)
2141 return false;
2142
629387a6
EB
2143 if (info->lp_nr != lp_nr)
2144 return false;
2145
7f5a3982
EB
2146 /* The canonical case. */
2147 if (info->rhs_code == stores[0]->rhs_code)
2148 return true;
2149
e362a897 2150 /* BIT_INSERT_EXPR is compatible with INTEGER_CST if no STRING_CST. */
7f5a3982 2151 if (info->rhs_code == BIT_INSERT_EXPR && stores[0]->rhs_code == INTEGER_CST)
e362a897 2152 return !string_concatenation;
7f5a3982
EB
2153
2154 if (stores[0]->rhs_code == BIT_INSERT_EXPR && info->rhs_code == INTEGER_CST)
e362a897 2155 return !string_concatenation;
7f5a3982 2156
ed01d707
EB
2157 /* We can turn MEM_REF into BIT_INSERT_EXPR for bit-field stores, but do it
2158 only for small regions since this can generate a lot of instructions. */
7f5a3982
EB
2159 if (info->rhs_code == MEM_REF
2160 && (stores[0]->rhs_code == INTEGER_CST
2161 || stores[0]->rhs_code == BIT_INSERT_EXPR)
2162 && info->bitregion_start == stores[0]->bitregion_start
ed01d707 2163 && info->bitregion_end == stores[0]->bitregion_end
2815558a 2164 && info->bitregion_end - info->bitregion_start <= MAX_FIXED_MODE_SIZE)
e362a897 2165 return !string_concatenation;
7f5a3982
EB
2166
2167 if (stores[0]->rhs_code == MEM_REF
2168 && (info->rhs_code == INTEGER_CST
2169 || info->rhs_code == BIT_INSERT_EXPR)
2170 && info->bitregion_start == stores[0]->bitregion_start
ed01d707 2171 && info->bitregion_end == stores[0]->bitregion_end
2815558a 2172 && info->bitregion_end - info->bitregion_start <= MAX_FIXED_MODE_SIZE)
e362a897
EB
2173 return !string_concatenation;
2174
2175 /* STRING_CST is compatible with INTEGER_CST if no BIT_INSERT_EXPR. */
2176 if (info->rhs_code == STRING_CST
2177 && stores[0]->rhs_code == INTEGER_CST
2178 && stores[0]->bitsize == CHAR_BIT)
2179 return !bit_insertion;
2180
2181 if (stores[0]->rhs_code == STRING_CST
2182 && info->rhs_code == INTEGER_CST
2183 && info->bitsize == CHAR_BIT)
2184 return !bit_insertion;
7f5a3982
EB
2185
2186 return false;
2187}
2188
a62b3dc5
JJ
2189/* Helper method for merge_into and merge_overlapping to do
2190 the common part. */
7f5a3982 2191
f663d9ad 2192void
a62b3dc5 2193merged_store_group::do_merge (store_immediate_info *info)
f663d9ad 2194{
a62b3dc5
JJ
2195 bitregion_start = MIN (bitregion_start, info->bitregion_start);
2196 bitregion_end = MAX (bitregion_end, info->bitregion_end);
2197
2198 unsigned int this_align;
2199 unsigned HOST_WIDE_INT align_bitpos = 0;
2200 get_object_alignment_1 (gimple_assign_lhs (info->stmt),
2201 &this_align, &align_bitpos);
2202 if (this_align > align)
2203 {
2204 align = this_align;
2205 align_base = info->bitpos - align_bitpos;
2206 }
245f6de1
JJ
2207 for (int i = 0; i < 2; ++i)
2208 {
2209 store_operand_info &op = info->ops[i];
2210 if (!op.base_addr)
2211 continue;
2212
2213 get_object_alignment_1 (op.val, &this_align, &align_bitpos);
2214 if (this_align > load_align[i])
2215 {
2216 load_align[i] = this_align;
2217 load_align_base[i] = op.bitpos - align_bitpos;
2218 }
2219 }
f663d9ad 2220
f663d9ad
KT
2221 gimple *stmt = info->stmt;
2222 stores.safe_push (info);
2223 if (info->order > last_order)
2224 {
2225 last_order = info->order;
2226 last_stmt = stmt;
2227 }
2228 else if (info->order < first_order)
2229 {
2230 first_order = info->order;
2231 first_stmt = stmt;
2232 }
e362a897 2233
1b3c9813
EB
2234 if (info->bitpos != start + width)
2235 consecutive = false;
2236
e362a897
EB
2237 /* We need to use extraction if there is any bit-field. */
2238 if (info->rhs_code == BIT_INSERT_EXPR)
2239 {
2240 bit_insertion = true;
2241 gcc_assert (!string_concatenation);
2242 }
2243
1b3c9813 2244 /* We want to use concatenation if there is any string. */
e362a897
EB
2245 if (info->rhs_code == STRING_CST)
2246 {
2247 string_concatenation = true;
2248 gcc_assert (!bit_insertion);
2249 }
2250
1b3c9813
EB
2251 /* But we cannot use it if we don't have consecutive stores. */
2252 if (!consecutive)
2253 string_concatenation = false;
2254
18e0c3d1
JJ
2255 if (info->rhs_code != INTEGER_CST)
2256 only_constants = false;
f663d9ad
KT
2257}
2258
a62b3dc5
JJ
2259/* Merge a store recorded by INFO into this merged store.
2260 The store is not overlapping with the existing recorded
2261 stores. */
2262
2263void
2264merged_store_group::merge_into (store_immediate_info *info)
2265{
1b3c9813
EB
2266 do_merge (info);
2267
a62b3dc5
JJ
2268 /* Make sure we're inserting in the position we think we're inserting. */
2269 gcc_assert (info->bitpos >= start + width
2270 && info->bitregion_start <= bitregion_end);
2271
c5679c37 2272 width = info->bitpos + info->bitsize - start;
a62b3dc5
JJ
2273}
2274
f663d9ad
KT
2275/* Merge a store described by INFO into this merged store.
2276 INFO overlaps in some way with the current store (i.e. it's not contiguous
2277 which is handled by merged_store_group::merge_into). */
2278
2279void
2280merged_store_group::merge_overlapping (store_immediate_info *info)
2281{
1b3c9813
EB
2282 do_merge (info);
2283
f663d9ad 2284 /* If the store extends the size of the group, extend the width. */
a62b3dc5 2285 if (info->bitpos + info->bitsize > start + width)
c5679c37 2286 width = info->bitpos + info->bitsize - start;
f663d9ad
KT
2287}
2288
2289/* Go through all the recorded stores in this group in program order and
2290 apply their values to the VAL byte array to create the final merged
2291 value. Return true if the operation succeeded. */
2292
2293bool
2294merged_store_group::apply_stores ()
2295{
e362a897
EB
2296 store_immediate_info *info;
2297 unsigned int i;
2298
a62b3dc5
JJ
2299 /* Make sure we have more than one store in the group, otherwise we cannot
2300 merge anything. */
2301 if (bitregion_start % BITS_PER_UNIT != 0
2302 || bitregion_end % BITS_PER_UNIT != 0
f663d9ad
KT
2303 || stores.length () == 1)
2304 return false;
2305
e362a897
EB
2306 buf_size = (bitregion_end - bitregion_start) / BITS_PER_UNIT;
2307
2308 /* Really do string concatenation for large strings only. */
2309 if (buf_size <= MOVE_MAX)
2310 string_concatenation = false;
2311
c94c3532 2312 /* Create a power-of-2-sized buffer for native_encode_expr. */
e362a897
EB
2313 if (!string_concatenation)
2314 buf_size = 1 << ceil_log2 (buf_size);
2315
a62b3dc5
JJ
2316 val = XNEWVEC (unsigned char, 2 * buf_size);
2317 mask = val + buf_size;
2318 memset (val, 0, buf_size);
2319 memset (mask, ~0U, buf_size);
f663d9ad 2320
e362a897
EB
2321 stores.qsort (sort_by_order);
2322
f663d9ad
KT
2323 FOR_EACH_VEC_ELT (stores, i, info)
2324 {
a62b3dc5 2325 unsigned int pos_in_buffer = info->bitpos - bitregion_start;
c94c3532 2326 tree cst;
245f6de1
JJ
2327 if (info->ops[0].val && info->ops[0].base_addr == NULL_TREE)
2328 cst = info->ops[0].val;
2329 else if (info->ops[1].val && info->ops[1].base_addr == NULL_TREE)
2330 cst = info->ops[1].val;
c94c3532
EB
2331 else
2332 cst = NULL_TREE;
245f6de1 2333 bool ret = true;
e362a897
EB
2334 if (cst && info->rhs_code != BIT_INSERT_EXPR)
2335 ret = encode_tree_to_bitpos (cst, val, info->bitsize, pos_in_buffer,
2336 buf_size);
c94c3532
EB
2337 unsigned char *m = mask + (pos_in_buffer / BITS_PER_UNIT);
2338 if (BYTES_BIG_ENDIAN)
2339 clear_bit_region_be (m, (BITS_PER_UNIT - 1
2340 - (pos_in_buffer % BITS_PER_UNIT)),
2341 info->bitsize);
2342 else
2343 clear_bit_region (m, pos_in_buffer % BITS_PER_UNIT, info->bitsize);
245f6de1 2344 if (cst && dump_file && (dump_flags & TDF_DETAILS))
f663d9ad
KT
2345 {
2346 if (ret)
2347 {
c94c3532 2348 fputs ("After writing ", dump_file);
4af78ef8 2349 print_generic_expr (dump_file, cst, TDF_NONE);
f663d9ad 2350 fprintf (dump_file, " of size " HOST_WIDE_INT_PRINT_DEC
c94c3532
EB
2351 " at position %d\n", info->bitsize, pos_in_buffer);
2352 fputs (" the merged value contains ", dump_file);
f663d9ad 2353 dump_char_array (dump_file, val, buf_size);
c94c3532
EB
2354 fputs (" the merged mask contains ", dump_file);
2355 dump_char_array (dump_file, mask, buf_size);
2356 if (bit_insertion)
2357 fputs (" bit insertion is required\n", dump_file);
e362a897
EB
2358 if (string_concatenation)
2359 fputs (" string concatenation is required\n", dump_file);
f663d9ad
KT
2360 }
2361 else
2362 fprintf (dump_file, "Failed to merge stores\n");
4b84d9b8 2363 }
f663d9ad
KT
2364 if (!ret)
2365 return false;
2366 }
4b84d9b8 2367 stores.qsort (sort_by_bitpos);
f663d9ad
KT
2368 return true;
2369}
2370
2371/* Structure describing the store chain. */
2372
6c1dae73 2373class imm_store_chain_info
f663d9ad 2374{
6c1dae73 2375public:
50b6d676
AO
2376 /* Doubly-linked list that imposes an order on chain processing.
2377 PNXP (prev's next pointer) points to the head of a list, or to
2378 the next field in the previous chain in the list.
2379 See pass_store_merging::m_stores_head for more rationale. */
2380 imm_store_chain_info *next, **pnxp;
b5926e23 2381 tree base_addr;
a62b3dc5 2382 auto_vec<store_immediate_info *> m_store_info;
f663d9ad
KT
2383 auto_vec<merged_store_group *> m_merged_store_groups;
2384
50b6d676
AO
2385 imm_store_chain_info (imm_store_chain_info *&inspt, tree b_a)
2386 : next (inspt), pnxp (&inspt), base_addr (b_a)
2387 {
2388 inspt = this;
2389 if (next)
2390 {
2391 gcc_checking_assert (pnxp == next->pnxp);
2392 next->pnxp = &next;
2393 }
2394 }
2395 ~imm_store_chain_info ()
2396 {
2397 *pnxp = next;
2398 if (next)
2399 {
2400 gcc_checking_assert (&next == next->pnxp);
2401 next->pnxp = pnxp;
2402 }
2403 }
b5926e23 2404 bool terminate_and_process_chain ();
bd909071
JJ
2405 bool try_coalesce_bswap (merged_store_group *, unsigned int, unsigned int,
2406 unsigned int);
f663d9ad 2407 bool coalesce_immediate_stores ();
b5926e23
RB
2408 bool output_merged_store (merged_store_group *);
2409 bool output_merged_stores ();
f663d9ad
KT
2410};
2411
2412const pass_data pass_data_tree_store_merging = {
2413 GIMPLE_PASS, /* type */
2414 "store-merging", /* name */
2415 OPTGROUP_NONE, /* optinfo_flags */
2416 TV_GIMPLE_STORE_MERGING, /* tv_id */
2417 PROP_ssa, /* properties_required */
2418 0, /* properties_provided */
2419 0, /* properties_destroyed */
2420 0, /* todo_flags_start */
2421 TODO_update_ssa, /* todo_flags_finish */
2422};
2423
2424class pass_store_merging : public gimple_opt_pass
2425{
2426public:
2427 pass_store_merging (gcc::context *ctxt)
95d94b52
RB
2428 : gimple_opt_pass (pass_data_tree_store_merging, ctxt), m_stores_head (),
2429 m_n_chains (0), m_n_stores (0)
f663d9ad
KT
2430 {
2431 }
2432
c94c3532
EB
2433 /* Pass not supported for PDP-endian, nor for insane hosts or
2434 target character sizes where native_{encode,interpret}_expr
a62b3dc5 2435 doesn't work properly. */
f663d9ad
KT
2436 virtual bool
2437 gate (function *)
2438 {
a62b3dc5 2439 return flag_store_merging
c94c3532 2440 && BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN
a62b3dc5
JJ
2441 && CHAR_BIT == 8
2442 && BITS_PER_UNIT == 8;
f663d9ad
KT
2443 }
2444
2445 virtual unsigned int execute (function *);
2446
2447private:
99b1c316 2448 hash_map<tree_operand_hash, class imm_store_chain_info *> m_stores;
f663d9ad 2449
50b6d676
AO
2450 /* Form a doubly-linked stack of the elements of m_stores, so that
2451 we can iterate over them in a predictable way. Using this order
2452 avoids extraneous differences in the compiler output just because
2453 of tree pointer variations (e.g. different chains end up in
2454 different positions of m_stores, so they are handled in different
2455 orders, so they allocate or release SSA names in different
2456 orders, and when they get reused, subsequent passes end up
2457 getting different SSA names, which may ultimately change
2458 decisions when going out of SSA). */
2459 imm_store_chain_info *m_stores_head;
2460
95d94b52
RB
2461 /* The number of store chains currently tracked. */
2462 unsigned m_n_chains;
2463 /* The number of stores currently tracked. */
2464 unsigned m_n_stores;
2465
629387a6
EB
2466 bool process_store (gimple *);
2467 bool terminate_and_process_chain (imm_store_chain_info *);
383ac8dc 2468 bool terminate_all_aliasing_chains (imm_store_chain_info **, gimple *);
629387a6 2469 bool terminate_and_process_all_chains ();
f663d9ad
KT
2470}; // class pass_store_merging
2471
2472/* Terminate and process all recorded chains. Return true if any changes
2473 were made. */
2474
2475bool
2476pass_store_merging::terminate_and_process_all_chains ()
2477{
f663d9ad 2478 bool ret = false;
50b6d676 2479 while (m_stores_head)
629387a6 2480 ret |= terminate_and_process_chain (m_stores_head);
b119c055 2481 gcc_assert (m_stores.is_empty ());
f663d9ad
KT
2482 return ret;
2483}
2484
383ac8dc
JJ
2485/* Terminate all chains that are affected by the statement STMT.
2486 CHAIN_INFO is the chain we should ignore from the checks if
629387a6 2487 non-NULL. Return true if any changes were made. */
f663d9ad
KT
2488
2489bool
20770eb8 2490pass_store_merging::terminate_all_aliasing_chains (imm_store_chain_info
b5926e23 2491 **chain_info,
f663d9ad
KT
2492 gimple *stmt)
2493{
2494 bool ret = false;
2495
2496 /* If the statement doesn't touch memory it can't alias. */
2497 if (!gimple_vuse (stmt))
2498 return false;
2499
9e875fd8 2500 tree store_lhs = gimple_store_p (stmt) ? gimple_get_lhs (stmt) : NULL_TREE;
6b412bf6
RB
2501 ao_ref store_lhs_ref;
2502 ao_ref_init (&store_lhs_ref, store_lhs);
383ac8dc 2503 for (imm_store_chain_info *next = m_stores_head, *cur = next; cur; cur = next)
f663d9ad 2504 {
383ac8dc
JJ
2505 next = cur->next;
2506
2507 /* We already checked all the stores in chain_info and terminated the
2508 chain if necessary. Skip it here. */
2509 if (chain_info && *chain_info == cur)
2510 continue;
2511
245f6de1
JJ
2512 store_immediate_info *info;
2513 unsigned int i;
383ac8dc 2514 FOR_EACH_VEC_ELT (cur->m_store_info, i, info)
f663d9ad 2515 {
9e875fd8 2516 tree lhs = gimple_assign_lhs (info->stmt);
6b412bf6
RB
2517 ao_ref lhs_ref;
2518 ao_ref_init (&lhs_ref, lhs);
2519 if (ref_maybe_used_by_stmt_p (stmt, &lhs_ref)
2520 || stmt_may_clobber_ref_p_1 (stmt, &lhs_ref)
2521 || (store_lhs && refs_may_alias_p_1 (&store_lhs_ref,
2522 &lhs_ref, false)))
f663d9ad 2523 {
245f6de1 2524 if (dump_file && (dump_flags & TDF_DETAILS))
f663d9ad 2525 {
245f6de1
JJ
2526 fprintf (dump_file, "stmt causes chain termination:\n");
2527 print_gimple_stmt (dump_file, stmt, 0);
f663d9ad 2528 }
629387a6 2529 ret |= terminate_and_process_chain (cur);
245f6de1 2530 break;
f663d9ad
KT
2531 }
2532 }
2533 }
2534
f663d9ad
KT
2535 return ret;
2536}
2537
2538/* Helper function. Terminate the recorded chain storing to base object
2539 BASE. Return true if the merging and output was successful. The m_stores
2540 entry is removed after the processing in any case. */
2541
2542bool
629387a6 2543pass_store_merging::terminate_and_process_chain (imm_store_chain_info *chain_info)
f663d9ad 2544{
95d94b52
RB
2545 m_n_stores -= chain_info->m_store_info.length ();
2546 m_n_chains--;
b5926e23
RB
2547 bool ret = chain_info->terminate_and_process_chain ();
2548 m_stores.remove (chain_info->base_addr);
2549 delete chain_info;
f663d9ad
KT
2550 return ret;
2551}
2552
245f6de1 2553/* Return true if stmts in between FIRST (inclusive) and LAST (exclusive)
629387a6
EB
2554 may clobber REF. FIRST and LAST must have non-NULL vdef. We want to
2555 be able to sink load of REF across stores between FIRST and LAST, up
2556 to right before LAST. */
245f6de1
JJ
2557
2558bool
2559stmts_may_clobber_ref_p (gimple *first, gimple *last, tree ref)
2560{
2561 ao_ref r;
2562 ao_ref_init (&r, ref);
2563 unsigned int count = 0;
2564 tree vop = gimple_vdef (last);
2565 gimple *stmt;
2566
629387a6
EB
2567 /* Return true conservatively if the basic blocks are different. */
2568 if (gimple_bb (first) != gimple_bb (last))
2569 return true;
2570
245f6de1
JJ
2571 do
2572 {
2573 stmt = SSA_NAME_DEF_STMT (vop);
2574 if (stmt_may_clobber_ref_p_1 (stmt, &r))
2575 return true;
4b84d9b8
JJ
2576 if (gimple_store_p (stmt)
2577 && refs_anti_dependent_p (ref, gimple_get_lhs (stmt)))
2578 return true;
245f6de1
JJ
2579 /* Avoid quadratic compile time by bounding the number of checks
2580 we perform. */
2581 if (++count > MAX_STORE_ALIAS_CHECKS)
2582 return true;
2583 vop = gimple_vuse (stmt);
2584 }
2585 while (stmt != first);
629387a6 2586
245f6de1
JJ
2587 return false;
2588}
2589
2590/* Return true if INFO->ops[IDX] is mergeable with the
2591 corresponding loads already in MERGED_STORE group.
2592 BASE_ADDR is the base address of the whole store group. */
2593
2594bool
2595compatible_load_p (merged_store_group *merged_store,
2596 store_immediate_info *info,
2597 tree base_addr, int idx)
2598{
2599 store_immediate_info *infof = merged_store->stores[0];
2600 if (!info->ops[idx].base_addr
8a91d545
RS
2601 || maybe_ne (info->ops[idx].bitpos - infof->ops[idx].bitpos,
2602 info->bitpos - infof->bitpos)
245f6de1
JJ
2603 || !operand_equal_p (info->ops[idx].base_addr,
2604 infof->ops[idx].base_addr, 0))
2605 return false;
2606
2607 store_immediate_info *infol = merged_store->stores.last ();
2608 tree load_vuse = gimple_vuse (info->ops[idx].stmt);
2609 /* In this case all vuses should be the same, e.g.
2610 _1 = s.a; _2 = s.b; _3 = _1 | 1; t.a = _3; _4 = _2 | 2; t.b = _4;
2611 or
2612 _1 = s.a; _2 = s.b; t.a = _1; t.b = _2;
2613 and we can emit the coalesced load next to any of those loads. */
2614 if (gimple_vuse (infof->ops[idx].stmt) == load_vuse
2615 && gimple_vuse (infol->ops[idx].stmt) == load_vuse)
2616 return true;
2617
2618 /* Otherwise, at least for now require that the load has the same
2619 vuse as the store. See following examples. */
2620 if (gimple_vuse (info->stmt) != load_vuse)
2621 return false;
2622
2623 if (gimple_vuse (infof->stmt) != gimple_vuse (infof->ops[idx].stmt)
2624 || (infof != infol
2625 && gimple_vuse (infol->stmt) != gimple_vuse (infol->ops[idx].stmt)))
2626 return false;
2627
2628 /* If the load is from the same location as the store, already
2629 the construction of the immediate chain info guarantees no intervening
2630 stores, so no further checks are needed. Example:
2631 _1 = s.a; _2 = _1 & -7; s.a = _2; _3 = s.b; _4 = _3 & -7; s.b = _4; */
8a91d545 2632 if (known_eq (info->ops[idx].bitpos, info->bitpos)
245f6de1
JJ
2633 && operand_equal_p (info->ops[idx].base_addr, base_addr, 0))
2634 return true;
2635
2636 /* Otherwise, we need to punt if any of the loads can be clobbered by any
2637 of the stores in the group, or any other stores in between those.
2638 Previous calls to compatible_load_p ensured that for all the
2639 merged_store->stores IDX loads, no stmts starting with
2640 merged_store->first_stmt and ending right before merged_store->last_stmt
2641 clobbers those loads. */
2642 gimple *first = merged_store->first_stmt;
2643 gimple *last = merged_store->last_stmt;
245f6de1
JJ
2644 /* The stores are sorted by increasing store bitpos, so if info->stmt store
2645 comes before the so far first load, we'll be changing
2646 merged_store->first_stmt. In that case we need to give up if
2647 any of the earlier processed loads clobber with the stmts in the new
2648 range. */
2649 if (info->order < merged_store->first_order)
2650 {
3f207ab3 2651 for (store_immediate_info *infoc : merged_store->stores)
245f6de1
JJ
2652 if (stmts_may_clobber_ref_p (info->stmt, first, infoc->ops[idx].val))
2653 return false;
2654 first = info->stmt;
2655 }
2656 /* Similarly, we could change merged_store->last_stmt, so ensure
2657 in that case no stmts in the new range clobber any of the earlier
2658 processed loads. */
2659 else if (info->order > merged_store->last_order)
2660 {
3f207ab3 2661 for (store_immediate_info *infoc : merged_store->stores)
245f6de1
JJ
2662 if (stmts_may_clobber_ref_p (last, info->stmt, infoc->ops[idx].val))
2663 return false;
2664 last = info->stmt;
2665 }
2666 /* And finally, we'd be adding a new load to the set, ensure it isn't
2667 clobbered in the new range. */
2668 if (stmts_may_clobber_ref_p (first, last, info->ops[idx].val))
2669 return false;
2670
2671 /* Otherwise, we are looking for:
2672 _1 = s.a; _2 = _1 ^ 15; t.a = _2; _3 = s.b; _4 = _3 ^ 15; t.b = _4;
2673 or
2674 _1 = s.a; t.a = _1; _2 = s.b; t.b = _2; */
2675 return true;
2676}
2677
4b84d9b8
JJ
2678/* Add all refs loaded to compute VAL to REFS vector. */
2679
2680void
2681gather_bswap_load_refs (vec<tree> *refs, tree val)
2682{
2683 if (TREE_CODE (val) != SSA_NAME)
2684 return;
2685
2686 gimple *stmt = SSA_NAME_DEF_STMT (val);
2687 if (!is_gimple_assign (stmt))
2688 return;
2689
2690 if (gimple_assign_load_p (stmt))
2691 {
2692 refs->safe_push (gimple_assign_rhs1 (stmt));
2693 return;
2694 }
2695
2696 switch (gimple_assign_rhs_class (stmt))
2697 {
2698 case GIMPLE_BINARY_RHS:
2699 gather_bswap_load_refs (refs, gimple_assign_rhs2 (stmt));
2700 /* FALLTHRU */
2701 case GIMPLE_UNARY_RHS:
2702 gather_bswap_load_refs (refs, gimple_assign_rhs1 (stmt));
2703 break;
2704 default:
2705 gcc_unreachable ();
2706 }
2707}
2708
c5679c37
JJ
2709/* Check if there are any stores in M_STORE_INFO after index I
2710 (where M_STORE_INFO must be sorted by sort_by_bitpos) that overlap
2711 a potential group ending with END that have their order
4d213bf6
JJ
2712 smaller than LAST_ORDER. ALL_INTEGER_CST_P is true if
2713 all the stores already merged and the one under consideration
2714 have rhs_code of INTEGER_CST. Return true if there are no such stores.
c5679c37
JJ
2715 Consider:
2716 MEM[(long long int *)p_28] = 0;
2717 MEM[(long long int *)p_28 + 8B] = 0;
2718 MEM[(long long int *)p_28 + 16B] = 0;
2719 MEM[(long long int *)p_28 + 24B] = 0;
2720 _129 = (int) _130;
2721 MEM[(int *)p_28 + 8B] = _129;
2722 MEM[(int *)p_28].a = -1;
2723 We already have
2724 MEM[(long long int *)p_28] = 0;
2725 MEM[(int *)p_28].a = -1;
2726 stmts in the current group and need to consider if it is safe to
2727 add MEM[(long long int *)p_28 + 8B] = 0; store into the same group.
2728 There is an overlap between that store and the MEM[(int *)p_28 + 8B] = _129;
2729 store though, so if we add the MEM[(long long int *)p_28 + 8B] = 0;
2730 into the group and merging of those 3 stores is successful, merged
2731 stmts will be emitted at the latest store from that group, i.e.
2732 LAST_ORDER, which is the MEM[(int *)p_28].a = -1; store.
2733 The MEM[(int *)p_28 + 8B] = _129; store that originally follows
2734 the MEM[(long long int *)p_28 + 8B] = 0; would now be before it,
2735 so we need to refuse merging MEM[(long long int *)p_28 + 8B] = 0;
2736 into the group. That way it will be its own store group and will
4d213bf6 2737 not be touched. If ALL_INTEGER_CST_P and there are overlapping
c5679c37 2738 INTEGER_CST stores, those are mergeable using merge_overlapping,
bd909071
JJ
2739 so don't return false for those.
2740
2741 Similarly, check stores from FIRST_EARLIER (inclusive) to END_EARLIER
2742 (exclusive), whether they don't overlap the bitrange START to END
2743 and have order in between FIRST_ORDER and LAST_ORDER. This is to
2744 prevent merging in cases like:
2745 MEM <char[12]> [&b + 8B] = {};
2746 MEM[(short *) &b] = 5;
2747 _5 = *x_4(D);
2748 MEM <long long unsigned int> [&b + 2B] = _5;
2749 MEM[(char *)&b + 16B] = 88;
2750 MEM[(int *)&b + 20B] = 1;
2751 The = {} store comes in sort_by_bitpos before the = 88 store, and can't
2752 be merged with it, because the = _5 store overlaps these and is in between
2753 them in sort_by_order ordering. If it was merged, the merged store would
2754 go after the = _5 store and thus change behavior. */
c5679c37
JJ
2755
2756static bool
00dcc88a
MS
2757check_no_overlap (const vec<store_immediate_info *> &m_store_info,
2758 unsigned int i,
bd909071
JJ
2759 bool all_integer_cst_p, unsigned int first_order,
2760 unsigned int last_order, unsigned HOST_WIDE_INT start,
2761 unsigned HOST_WIDE_INT end, unsigned int first_earlier,
2762 unsigned end_earlier)
c5679c37
JJ
2763{
2764 unsigned int len = m_store_info.length ();
bd909071
JJ
2765 for (unsigned int j = first_earlier; j < end_earlier; j++)
2766 {
2767 store_immediate_info *info = m_store_info[j];
2768 if (info->order > first_order
2769 && info->order < last_order
2770 && info->bitpos + info->bitsize > start)
2771 return false;
2772 }
c5679c37
JJ
2773 for (++i; i < len; ++i)
2774 {
2775 store_immediate_info *info = m_store_info[i];
2776 if (info->bitpos >= end)
2777 break;
2778 if (info->order < last_order
4d213bf6 2779 && (!all_integer_cst_p || info->rhs_code != INTEGER_CST))
c5679c37
JJ
2780 return false;
2781 }
2782 return true;
2783}
2784
4b84d9b8
JJ
2785/* Return true if m_store_info[first] and at least one following store
2786 form a group which store try_size bitsize value which is byte swapped
2787 from a memory load or some value, or identity from some value.
2788 This uses the bswap pass APIs. */
2789
2790bool
2791imm_store_chain_info::try_coalesce_bswap (merged_store_group *merged_store,
2792 unsigned int first,
bd909071
JJ
2793 unsigned int try_size,
2794 unsigned int first_earlier)
4b84d9b8
JJ
2795{
2796 unsigned int len = m_store_info.length (), last = first;
2797 unsigned HOST_WIDE_INT width = m_store_info[first]->bitsize;
2798 if (width >= try_size)
2799 return false;
2800 for (unsigned int i = first + 1; i < len; ++i)
2801 {
2802 if (m_store_info[i]->bitpos != m_store_info[first]->bitpos + width
cb76fcd7 2803 || m_store_info[i]->lp_nr != merged_store->lp_nr
4b84d9b8
JJ
2804 || m_store_info[i]->ins_stmt == NULL)
2805 return false;
2806 width += m_store_info[i]->bitsize;
2807 if (width >= try_size)
2808 {
2809 last = i;
2810 break;
2811 }
2812 }
2813 if (width != try_size)
2814 return false;
2815
2816 bool allow_unaligned
028d4092 2817 = !STRICT_ALIGNMENT && param_store_merging_allow_unaligned;
4b84d9b8
JJ
2818 /* Punt if the combined store would not be aligned and we need alignment. */
2819 if (!allow_unaligned)
2820 {
2821 unsigned int align = merged_store->align;
2822 unsigned HOST_WIDE_INT align_base = merged_store->align_base;
2823 for (unsigned int i = first + 1; i <= last; ++i)
2824 {
2825 unsigned int this_align;
2826 unsigned HOST_WIDE_INT align_bitpos = 0;
2827 get_object_alignment_1 (gimple_assign_lhs (m_store_info[i]->stmt),
2828 &this_align, &align_bitpos);
2829 if (this_align > align)
2830 {
2831 align = this_align;
2832 align_base = m_store_info[i]->bitpos - align_bitpos;
2833 }
2834 }
2835 unsigned HOST_WIDE_INT align_bitpos
2836 = (m_store_info[first]->bitpos - align_base) & (align - 1);
2837 if (align_bitpos)
2838 align = least_bit_hwi (align_bitpos);
2839 if (align < try_size)
2840 return false;
2841 }
2842
2843 tree type;
2844 switch (try_size)
2845 {
2846 case 16: type = uint16_type_node; break;
2847 case 32: type = uint32_type_node; break;
2848 case 64: type = uint64_type_node; break;
2849 default: gcc_unreachable ();
2850 }
2851 struct symbolic_number n;
2852 gimple *ins_stmt = NULL;
2853 int vuse_store = -1;
2854 unsigned int first_order = merged_store->first_order;
2855 unsigned int last_order = merged_store->last_order;
2856 gimple *first_stmt = merged_store->first_stmt;
2857 gimple *last_stmt = merged_store->last_stmt;
c5679c37 2858 unsigned HOST_WIDE_INT end = merged_store->start + merged_store->width;
4b84d9b8
JJ
2859 store_immediate_info *infof = m_store_info[first];
2860
2861 for (unsigned int i = first; i <= last; ++i)
2862 {
2863 store_immediate_info *info = m_store_info[i];
2864 struct symbolic_number this_n = info->n;
2865 this_n.type = type;
2866 if (!this_n.base_addr)
2867 this_n.range = try_size / BITS_PER_UNIT;
30fa8e9c
JJ
2868 else
2869 /* Update vuse in case it has changed by output_merged_stores. */
2870 this_n.vuse = gimple_vuse (info->ins_stmt);
4b84d9b8
JJ
2871 unsigned int bitpos = info->bitpos - infof->bitpos;
2872 if (!do_shift_rotate (LSHIFT_EXPR, &this_n,
2873 BYTES_BIG_ENDIAN
2874 ? try_size - info->bitsize - bitpos
2875 : bitpos))
2876 return false;
aa11164a 2877 if (this_n.base_addr && vuse_store)
4b84d9b8
JJ
2878 {
2879 unsigned int j;
2880 for (j = first; j <= last; ++j)
2881 if (this_n.vuse == gimple_vuse (m_store_info[j]->stmt))
2882 break;
2883 if (j > last)
2884 {
2885 if (vuse_store == 1)
2886 return false;
2887 vuse_store = 0;
2888 }
2889 }
2890 if (i == first)
2891 {
2892 n = this_n;
2893 ins_stmt = info->ins_stmt;
2894 }
2895 else
2896 {
c5679c37 2897 if (n.base_addr && n.vuse != this_n.vuse)
4b84d9b8 2898 {
c5679c37
JJ
2899 if (vuse_store == 0)
2900 return false;
2901 vuse_store = 1;
4b84d9b8 2902 }
c5679c37
JJ
2903 if (info->order > last_order)
2904 {
2905 last_order = info->order;
2906 last_stmt = info->stmt;
2907 }
2908 else if (info->order < first_order)
2909 {
2910 first_order = info->order;
2911 first_stmt = info->stmt;
2912 }
2913 end = MAX (end, info->bitpos + info->bitsize);
4b84d9b8
JJ
2914
2915 ins_stmt = perform_symbolic_merge (ins_stmt, &n, info->ins_stmt,
04eccbbe 2916 &this_n, &n, BIT_IOR_EXPR);
4b84d9b8
JJ
2917 if (ins_stmt == NULL)
2918 return false;
2919 }
2920 }
2921
2922 uint64_t cmpxchg, cmpnop;
b320edc0
JJ
2923 bool cast64_to_32;
2924 find_bswap_or_nop_finalize (&n, &cmpxchg, &cmpnop, &cast64_to_32);
4b84d9b8
JJ
2925
2926 /* A complete byte swap should make the symbolic number to start with
2927 the largest digit in the highest order byte. Unchanged symbolic
2928 number indicates a read with same endianness as target architecture. */
2929 if (n.n != cmpnop && n.n != cmpxchg)
2930 return false;
2931
b320edc0
JJ
2932 /* For now. */
2933 if (cast64_to_32)
2934 return false;
2935
4b84d9b8
JJ
2936 if (n.base_addr == NULL_TREE && !is_gimple_val (n.src))
2937 return false;
2938
bd909071
JJ
2939 if (!check_no_overlap (m_store_info, last, false, first_order, last_order,
2940 merged_store->start, end, first_earlier, first))
c5679c37
JJ
2941 return false;
2942
4b84d9b8
JJ
2943 /* Don't handle memory copy this way if normal non-bswap processing
2944 would handle it too. */
2945 if (n.n == cmpnop && (unsigned) n.n_ops == last - first + 1)
2946 {
2947 unsigned int i;
2948 for (i = first; i <= last; ++i)
2949 if (m_store_info[i]->rhs_code != MEM_REF)
2950 break;
2951 if (i == last + 1)
2952 return false;
2953 }
2954
2955 if (n.n == cmpxchg)
2956 switch (try_size)
2957 {
2958 case 16:
2959 /* Will emit LROTATE_EXPR. */
2960 break;
2961 case 32:
2962 if (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
2963 && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing)
2964 break;
2965 return false;
2966 case 64:
2967 if (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
2968 && optab_handler (bswap_optab, DImode) != CODE_FOR_nothing)
2969 break;
2970 return false;
2971 default:
2972 gcc_unreachable ();
2973 }
2974
2975 if (!allow_unaligned && n.base_addr)
2976 {
2977 unsigned int align = get_object_alignment (n.src);
2978 if (align < try_size)
2979 return false;
2980 }
2981
2982 /* If each load has vuse of the corresponding store, need to verify
2983 the loads can be sunk right before the last store. */
2984 if (vuse_store == 1)
2985 {
2986 auto_vec<tree, 64> refs;
2987 for (unsigned int i = first; i <= last; ++i)
2988 gather_bswap_load_refs (&refs,
2989 gimple_assign_rhs1 (m_store_info[i]->stmt));
2990
3f207ab3 2991 for (tree ref : refs)
4b84d9b8
JJ
2992 if (stmts_may_clobber_ref_p (first_stmt, last_stmt, ref))
2993 return false;
2994 n.vuse = NULL_TREE;
2995 }
2996
2997 infof->n = n;
2998 infof->ins_stmt = ins_stmt;
2999 for (unsigned int i = first; i <= last; ++i)
3000 {
3001 m_store_info[i]->rhs_code = n.n == cmpxchg ? LROTATE_EXPR : NOP_EXPR;
3002 m_store_info[i]->ops[0].base_addr = NULL_TREE;
3003 m_store_info[i]->ops[1].base_addr = NULL_TREE;
3004 if (i != first)
3005 merged_store->merge_into (m_store_info[i]);
3006 }
3007
3008 return true;
3009}
3010
f663d9ad
KT
3011/* Go through the candidate stores recorded in m_store_info and merge them
3012 into merged_store_group objects recorded into m_merged_store_groups
3013 representing the widened stores. Return true if coalescing was successful
3014 and the number of widened stores is fewer than the original number
3015 of stores. */
3016
3017bool
3018imm_store_chain_info::coalesce_immediate_stores ()
3019{
3020 /* Anything less can't be processed. */
3021 if (m_store_info.length () < 2)
3022 return false;
3023
3024 if (dump_file && (dump_flags & TDF_DETAILS))
c94c3532 3025 fprintf (dump_file, "Attempting to coalesce %u stores in chain\n",
f663d9ad
KT
3026 m_store_info.length ());
3027
3028 store_immediate_info *info;
4b84d9b8 3029 unsigned int i, ignore = 0;
bd909071
JJ
3030 unsigned int first_earlier = 0;
3031 unsigned int end_earlier = 0;
f663d9ad
KT
3032
3033 /* Order the stores by the bitposition they write to. */
3034 m_store_info.qsort (sort_by_bitpos);
3035
3036 info = m_store_info[0];
3037 merged_store_group *merged_store = new merged_store_group (info);
c94c3532
EB
3038 if (dump_file && (dump_flags & TDF_DETAILS))
3039 fputs ("New store group\n", dump_file);
f663d9ad
KT
3040
3041 FOR_EACH_VEC_ELT (m_store_info, i, info)
3042 {
3afd514b
JJ
3043 unsigned HOST_WIDE_INT new_bitregion_start, new_bitregion_end;
3044
4b84d9b8 3045 if (i <= ignore)
c94c3532 3046 goto done;
f663d9ad 3047
bd909071
JJ
3048 while (first_earlier < end_earlier
3049 && (m_store_info[first_earlier]->bitpos
3050 + m_store_info[first_earlier]->bitsize
3051 <= merged_store->start))
3052 first_earlier++;
3053
4b84d9b8
JJ
3054 /* First try to handle group of stores like:
3055 p[0] = data >> 24;
3056 p[1] = data >> 16;
3057 p[2] = data >> 8;
3058 p[3] = data;
3059 using the bswap framework. */
3060 if (info->bitpos == merged_store->start + merged_store->width
3061 && merged_store->stores.length () == 1
3062 && merged_store->stores[0]->ins_stmt != NULL
cb76fcd7 3063 && info->lp_nr == merged_store->lp_nr
4b84d9b8
JJ
3064 && info->ins_stmt != NULL)
3065 {
3066 unsigned int try_size;
3067 for (try_size = 64; try_size >= 16; try_size >>= 1)
bd909071
JJ
3068 if (try_coalesce_bswap (merged_store, i - 1, try_size,
3069 first_earlier))
4b84d9b8
JJ
3070 break;
3071
3072 if (try_size >= 16)
3073 {
3074 ignore = i + merged_store->stores.length () - 1;
3075 m_merged_store_groups.safe_push (merged_store);
3076 if (ignore < m_store_info.length ())
bd909071
JJ
3077 {
3078 merged_store = new merged_store_group (m_store_info[ignore]);
3079 end_earlier = ignore;
3080 }
4b84d9b8
JJ
3081 else
3082 merged_store = NULL;
c94c3532 3083 goto done;
4b84d9b8
JJ
3084 }
3085 }
3086
3afd514b
JJ
3087 new_bitregion_start
3088 = MIN (merged_store->bitregion_start, info->bitregion_start);
3089 new_bitregion_end
3090 = MAX (merged_store->bitregion_end, info->bitregion_end);
3091
3092 if (info->order >= merged_store->first_nonmergeable_order
3093 || (((new_bitregion_end - new_bitregion_start + 1) / BITS_PER_UNIT)
028d4092 3094 > (unsigned) param_store_merging_max_size))
18e0c3d1
JJ
3095 ;
3096
f663d9ad
KT
3097 /* |---store 1---|
3098 |---store 2---|
4b84d9b8 3099 Overlapping stores. */
18e0c3d1 3100 else if (IN_RANGE (info->bitpos, merged_store->start,
4d213bf6
JJ
3101 merged_store->start + merged_store->width - 1)
3102 /* |---store 1---||---store 2---|
3103 Handle also the consecutive INTEGER_CST stores case here,
3104 as we have here the code to deal with overlaps. */
3105 || (info->bitregion_start <= merged_store->bitregion_end
3106 && info->rhs_code == INTEGER_CST
3107 && merged_store->only_constants
3108 && merged_store->can_be_merged_into (info)))
f663d9ad 3109 {
245f6de1 3110 /* Only allow overlapping stores of constants. */
629387a6
EB
3111 if (info->rhs_code == INTEGER_CST
3112 && merged_store->only_constants
3113 && info->lp_nr == merged_store->lp_nr)
245f6de1 3114 {
bd909071
JJ
3115 unsigned int first_order
3116 = MIN (merged_store->first_order, info->order);
6cd4c66e
JJ
3117 unsigned int last_order
3118 = MAX (merged_store->last_order, info->order);
3119 unsigned HOST_WIDE_INT end
3120 = MAX (merged_store->start + merged_store->width,
3121 info->bitpos + info->bitsize);
bd909071
JJ
3122 if (check_no_overlap (m_store_info, i, true, first_order,
3123 last_order, merged_store->start, end,
3124 first_earlier, end_earlier))
6cd4c66e
JJ
3125 {
3126 /* check_no_overlap call above made sure there are no
3127 overlapping stores with non-INTEGER_CST rhs_code
3128 in between the first and last of the stores we've
3129 just merged. If there are any INTEGER_CST rhs_code
3130 stores in between, we need to merge_overlapping them
3131 even if in the sort_by_bitpos order there are other
3132 overlapping stores in between. Keep those stores as is.
3133 Example:
3134 MEM[(int *)p_28] = 0;
3135 MEM[(char *)p_28 + 3B] = 1;
3136 MEM[(char *)p_28 + 1B] = 2;
3137 MEM[(char *)p_28 + 2B] = MEM[(char *)p_28 + 6B];
3138 We can't merge the zero store with the store of two and
3139 not merge anything else, because the store of one is
3140 in the original order in between those two, but in
3141 store_by_bitpos order it comes after the last store that
3142 we can't merge with them. We can merge the first 3 stores
3143 and keep the last store as is though. */
18e0c3d1
JJ
3144 unsigned int len = m_store_info.length ();
3145 unsigned int try_order = last_order;
3146 unsigned int first_nonmergeable_order;
3147 unsigned int k;
3148 bool last_iter = false;
3149 int attempts = 0;
3150 do
6cd4c66e 3151 {
18e0c3d1 3152 unsigned int max_order = 0;
bd909071 3153 unsigned int min_order = first_order;
18e0c3d1
JJ
3154 unsigned first_nonmergeable_int_order = ~0U;
3155 unsigned HOST_WIDE_INT this_end = end;
3156 k = i;
3157 first_nonmergeable_order = ~0U;
3158 for (unsigned int j = i + 1; j < len; ++j)
6cd4c66e 3159 {
18e0c3d1
JJ
3160 store_immediate_info *info2 = m_store_info[j];
3161 if (info2->bitpos >= this_end)
3162 break;
3163 if (info2->order < try_order)
6cd4c66e 3164 {
4119cd69
JJ
3165 if (info2->rhs_code != INTEGER_CST
3166 || info2->lp_nr != merged_store->lp_nr)
18e0c3d1
JJ
3167 {
3168 /* Normally check_no_overlap makes sure this
3169 doesn't happen, but if end grows below,
3170 then we need to process more stores than
3171 check_no_overlap verified. Example:
3172 MEM[(int *)p_5] = 0;
3173 MEM[(short *)p_5 + 3B] = 1;
3174 MEM[(char *)p_5 + 4B] = _9;
3175 MEM[(char *)p_5 + 2B] = 2; */
3176 k = 0;
3177 break;
3178 }
3179 k = j;
bd909071 3180 min_order = MIN (min_order, info2->order);
18e0c3d1
JJ
3181 this_end = MAX (this_end,
3182 info2->bitpos + info2->bitsize);
6cd4c66e 3183 }
18e0c3d1 3184 else if (info2->rhs_code == INTEGER_CST
4119cd69 3185 && info2->lp_nr == merged_store->lp_nr
18e0c3d1
JJ
3186 && !last_iter)
3187 {
3188 max_order = MAX (max_order, info2->order + 1);
3189 first_nonmergeable_int_order
3190 = MIN (first_nonmergeable_int_order,
3191 info2->order);
3192 }
3193 else
3194 first_nonmergeable_order
3195 = MIN (first_nonmergeable_order, info2->order);
6cd4c66e 3196 }
bd909071
JJ
3197 if (k > i
3198 && !check_no_overlap (m_store_info, len - 1, true,
3199 min_order, try_order,
3200 merged_store->start, this_end,
3201 first_earlier, end_earlier))
3202 k = 0;
18e0c3d1
JJ
3203 if (k == 0)
3204 {
3205 if (last_order == try_order)
3206 break;
3207 /* If this failed, but only because we grew
3208 try_order, retry with the last working one,
3209 so that we merge at least something. */
3210 try_order = last_order;
3211 last_iter = true;
3212 continue;
3213 }
3214 last_order = try_order;
3215 /* Retry with a larger try_order to see if we could
3216 merge some further INTEGER_CST stores. */
3217 if (max_order
3218 && (first_nonmergeable_int_order
3219 < first_nonmergeable_order))
3220 {
3221 try_order = MIN (max_order,
3222 first_nonmergeable_order);
3223 try_order
3224 = MIN (try_order,
3225 merged_store->first_nonmergeable_order);
3226 if (try_order > last_order && ++attempts < 16)
3227 continue;
3228 }
3229 first_nonmergeable_order
3230 = MIN (first_nonmergeable_order,
3231 first_nonmergeable_int_order);
3232 end = this_end;
3233 break;
6cd4c66e 3234 }
18e0c3d1 3235 while (1);
6cd4c66e
JJ
3236
3237 if (k != 0)
3238 {
3239 merged_store->merge_overlapping (info);
3240
18e0c3d1
JJ
3241 merged_store->first_nonmergeable_order
3242 = MIN (merged_store->first_nonmergeable_order,
3243 first_nonmergeable_order);
3244
6cd4c66e
JJ
3245 for (unsigned int j = i + 1; j <= k; j++)
3246 {
3247 store_immediate_info *info2 = m_store_info[j];
3248 gcc_assert (info2->bitpos < end);
3249 if (info2->order < last_order)
3250 {
3251 gcc_assert (info2->rhs_code == INTEGER_CST);
18e0c3d1
JJ
3252 if (info != info2)
3253 merged_store->merge_overlapping (info2);
6cd4c66e
JJ
3254 }
3255 /* Other stores are kept and not merged in any
3256 way. */
3257 }
3258 ignore = k;
3259 goto done;
3260 }
3261 }
245f6de1 3262 }
f663d9ad 3263 }
245f6de1
JJ
3264 /* |---store 1---||---store 2---|
3265 This store is consecutive to the previous one.
3266 Merge it into the current store group. There can be gaps in between
3267 the stores, but there can't be gaps in between bitregions. */
c94c3532 3268 else if (info->bitregion_start <= merged_store->bitregion_end
7f5a3982 3269 && merged_store->can_be_merged_into (info))
f663d9ad 3270 {
245f6de1
JJ
3271 store_immediate_info *infof = merged_store->stores[0];
3272
3273 /* All the rhs_code ops that take 2 operands are commutative,
3274 swap the operands if it could make the operands compatible. */
3275 if (infof->ops[0].base_addr
3276 && infof->ops[1].base_addr
3277 && info->ops[0].base_addr
3278 && info->ops[1].base_addr
8a91d545
RS
3279 && known_eq (info->ops[1].bitpos - infof->ops[0].bitpos,
3280 info->bitpos - infof->bitpos)
245f6de1
JJ
3281 && operand_equal_p (info->ops[1].base_addr,
3282 infof->ops[0].base_addr, 0))
127ef369
JJ
3283 {
3284 std::swap (info->ops[0], info->ops[1]);
3285 info->ops_swapped_p = true;
3286 }
4d213bf6 3287 if (check_no_overlap (m_store_info, i, false,
bd909071 3288 MIN (merged_store->first_order, info->order),
a7fe6482 3289 MAX (merged_store->last_order, info->order),
bd909071 3290 merged_store->start,
a7fe6482 3291 MAX (merged_store->start + merged_store->width,
bd909071
JJ
3292 info->bitpos + info->bitsize),
3293 first_earlier, end_earlier))
245f6de1 3294 {
7f5a3982
EB
3295 /* Turn MEM_REF into BIT_INSERT_EXPR for bit-field stores. */
3296 if (info->rhs_code == MEM_REF && infof->rhs_code != MEM_REF)
3297 {
3298 info->rhs_code = BIT_INSERT_EXPR;
3299 info->ops[0].val = gimple_assign_rhs1 (info->stmt);
3300 info->ops[0].base_addr = NULL_TREE;
3301 }
3302 else if (infof->rhs_code == MEM_REF && info->rhs_code != MEM_REF)
3303 {
3f207ab3 3304 for (store_immediate_info *infoj : merged_store->stores)
7f5a3982
EB
3305 {
3306 infoj->rhs_code = BIT_INSERT_EXPR;
3307 infoj->ops[0].val = gimple_assign_rhs1 (infoj->stmt);
3308 infoj->ops[0].base_addr = NULL_TREE;
3309 }
e362a897 3310 merged_store->bit_insertion = true;
7f5a3982
EB
3311 }
3312 if ((infof->ops[0].base_addr
3313 ? compatible_load_p (merged_store, info, base_addr, 0)
3314 : !info->ops[0].base_addr)
3315 && (infof->ops[1].base_addr
3316 ? compatible_load_p (merged_store, info, base_addr, 1)
3317 : !info->ops[1].base_addr))
3318 {
3319 merged_store->merge_into (info);
3320 goto done;
3321 }
245f6de1
JJ
3322 }
3323 }
f663d9ad 3324
245f6de1
JJ
3325 /* |---store 1---| <gap> |---store 2---|.
3326 Gap between stores or the rhs not compatible. Start a new group. */
f663d9ad 3327
245f6de1
JJ
3328 /* Try to apply all the stores recorded for the group to determine
3329 the bitpattern they write and discard it if that fails.
3330 This will also reject single-store groups. */
c94c3532 3331 if (merged_store->apply_stores ())
245f6de1 3332 m_merged_store_groups.safe_push (merged_store);
c94c3532
EB
3333 else
3334 delete merged_store;
f663d9ad 3335
245f6de1 3336 merged_store = new merged_store_group (info);
bd909071 3337 end_earlier = i;
c94c3532
EB
3338 if (dump_file && (dump_flags & TDF_DETAILS))
3339 fputs ("New store group\n", dump_file);
3340
3341 done:
3342 if (dump_file && (dump_flags & TDF_DETAILS))
3343 {
3344 fprintf (dump_file, "Store %u:\nbitsize:" HOST_WIDE_INT_PRINT_DEC
3345 " bitpos:" HOST_WIDE_INT_PRINT_DEC " val:",
3346 i, info->bitsize, info->bitpos);
3347 print_generic_expr (dump_file, gimple_assign_rhs1 (info->stmt));
3348 fputc ('\n', dump_file);
3349 }
f663d9ad
KT
3350 }
3351
a62b3dc5 3352 /* Record or discard the last store group. */
4b84d9b8
JJ
3353 if (merged_store)
3354 {
c94c3532 3355 if (merged_store->apply_stores ())
4b84d9b8 3356 m_merged_store_groups.safe_push (merged_store);
c94c3532
EB
3357 else
3358 delete merged_store;
4b84d9b8 3359 }
f663d9ad
KT
3360
3361 gcc_assert (m_merged_store_groups.length () <= m_store_info.length ());
c94c3532 3362
f663d9ad
KT
3363 bool success
3364 = !m_merged_store_groups.is_empty ()
3365 && m_merged_store_groups.length () < m_store_info.length ();
3366
3367 if (success && dump_file)
c94c3532 3368 fprintf (dump_file, "Coalescing successful!\nMerged into %u stores\n",
a62b3dc5 3369 m_merged_store_groups.length ());
f663d9ad
KT
3370
3371 return success;
3372}
3373
245f6de1
JJ
3374/* Return the type to use for the merged stores or loads described by STMTS.
3375 This is needed to get the alias sets right. If IS_LOAD, look for rhs,
3376 otherwise lhs. Additionally set *CLIQUEP and *BASEP to MR_DEPENDENCE_*
3377 of the MEM_REFs if any. */
f663d9ad
KT
3378
3379static tree
245f6de1
JJ
3380get_alias_type_for_stmts (vec<gimple *> &stmts, bool is_load,
3381 unsigned short *cliquep, unsigned short *basep)
f663d9ad
KT
3382{
3383 gimple *stmt;
3384 unsigned int i;
245f6de1
JJ
3385 tree type = NULL_TREE;
3386 tree ret = NULL_TREE;
3387 *cliquep = 0;
3388 *basep = 0;
f663d9ad
KT
3389
3390 FOR_EACH_VEC_ELT (stmts, i, stmt)
3391 {
245f6de1
JJ
3392 tree ref = is_load ? gimple_assign_rhs1 (stmt)
3393 : gimple_assign_lhs (stmt);
3394 tree type1 = reference_alias_ptr_type (ref);
3395 tree base = get_base_address (ref);
f663d9ad 3396
245f6de1
JJ
3397 if (i == 0)
3398 {
3399 if (TREE_CODE (base) == MEM_REF)
3400 {
3401 *cliquep = MR_DEPENDENCE_CLIQUE (base);
3402 *basep = MR_DEPENDENCE_BASE (base);
3403 }
3404 ret = type = type1;
3405 continue;
3406 }
f663d9ad 3407 if (!alias_ptr_types_compatible_p (type, type1))
245f6de1
JJ
3408 ret = ptr_type_node;
3409 if (TREE_CODE (base) != MEM_REF
3410 || *cliquep != MR_DEPENDENCE_CLIQUE (base)
3411 || *basep != MR_DEPENDENCE_BASE (base))
3412 {
3413 *cliquep = 0;
3414 *basep = 0;
3415 }
f663d9ad 3416 }
245f6de1 3417 return ret;
f663d9ad
KT
3418}
3419
3420/* Return the location_t information we can find among the statements
3421 in STMTS. */
3422
3423static location_t
245f6de1 3424get_location_for_stmts (vec<gimple *> &stmts)
f663d9ad 3425{
3f207ab3 3426 for (gimple *stmt : stmts)
f663d9ad
KT
3427 if (gimple_has_location (stmt))
3428 return gimple_location (stmt);
3429
3430 return UNKNOWN_LOCATION;
3431}
3432
3433/* Used to decribe a store resulting from splitting a wide store in smaller
3434 regularly-sized stores in split_group. */
3435
6c1dae73 3436class split_store
f663d9ad 3437{
6c1dae73 3438public:
f663d9ad
KT
3439 unsigned HOST_WIDE_INT bytepos;
3440 unsigned HOST_WIDE_INT size;
3441 unsigned HOST_WIDE_INT align;
245f6de1 3442 auto_vec<store_immediate_info *> orig_stores;
a62b3dc5
JJ
3443 /* True if there is a single orig stmt covering the whole split store. */
3444 bool orig;
f663d9ad
KT
3445 split_store (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
3446 unsigned HOST_WIDE_INT);
3447};
3448
3449/* Simple constructor. */
3450
3451split_store::split_store (unsigned HOST_WIDE_INT bp,
3452 unsigned HOST_WIDE_INT sz,
3453 unsigned HOST_WIDE_INT al)
a62b3dc5 3454 : bytepos (bp), size (sz), align (al), orig (false)
f663d9ad 3455{
245f6de1 3456 orig_stores.create (0);
f663d9ad
KT
3457}
3458
245f6de1
JJ
3459/* Record all stores in GROUP that write to the region starting at BITPOS and
3460 is of size BITSIZE. Record infos for such statements in STORES if
3461 non-NULL. The stores in GROUP must be sorted by bitposition. Return INFO
5384a802
JJ
3462 if there is exactly one original store in the range (in that case ignore
3463 clobber stmts, unless there are only clobber stmts). */
f663d9ad 3464
a62b3dc5 3465static store_immediate_info *
99b1c316 3466find_constituent_stores (class merged_store_group *group,
245f6de1
JJ
3467 vec<store_immediate_info *> *stores,
3468 unsigned int *first,
3469 unsigned HOST_WIDE_INT bitpos,
3470 unsigned HOST_WIDE_INT bitsize)
f663d9ad 3471{
a62b3dc5 3472 store_immediate_info *info, *ret = NULL;
f663d9ad 3473 unsigned int i;
a62b3dc5
JJ
3474 bool second = false;
3475 bool update_first = true;
f663d9ad 3476 unsigned HOST_WIDE_INT end = bitpos + bitsize;
a62b3dc5 3477 for (i = *first; group->stores.iterate (i, &info); ++i)
f663d9ad
KT
3478 {
3479 unsigned HOST_WIDE_INT stmt_start = info->bitpos;
3480 unsigned HOST_WIDE_INT stmt_end = stmt_start + info->bitsize;
a62b3dc5
JJ
3481 if (stmt_end <= bitpos)
3482 {
3483 /* BITPOS passed to this function never decreases from within the
3484 same split_group call, so optimize and don't scan info records
3485 which are known to end before or at BITPOS next time.
3486 Only do it if all stores before this one also pass this. */
3487 if (update_first)
3488 *first = i + 1;
3489 continue;
3490 }
3491 else
3492 update_first = false;
3493
f663d9ad 3494 /* The stores in GROUP are ordered by bitposition so if we're past
a62b3dc5
JJ
3495 the region for this group return early. */
3496 if (stmt_start >= end)
3497 return ret;
3498
5384a802
JJ
3499 if (gimple_clobber_p (info->stmt))
3500 {
3501 if (stores)
3502 stores->safe_push (info);
3503 if (ret == NULL)
3504 ret = info;
3505 continue;
3506 }
245f6de1 3507 if (stores)
a62b3dc5 3508 {
245f6de1 3509 stores->safe_push (info);
5384a802 3510 if (ret && !gimple_clobber_p (ret->stmt))
a62b3dc5
JJ
3511 {
3512 ret = NULL;
3513 second = true;
3514 }
3515 }
5384a802 3516 else if (ret && !gimple_clobber_p (ret->stmt))
a62b3dc5
JJ
3517 return NULL;
3518 if (!second)
3519 ret = info;
f663d9ad 3520 }
a62b3dc5 3521 return ret;
f663d9ad
KT
3522}
3523
d7a9512e
JJ
3524/* Return how many SSA_NAMEs used to compute value to store in the INFO
3525 store have multiple uses. If any SSA_NAME has multiple uses, also
3526 count statements needed to compute it. */
3527
3528static unsigned
3529count_multiple_uses (store_immediate_info *info)
3530{
3531 gimple *stmt = info->stmt;
3532 unsigned ret = 0;
3533 switch (info->rhs_code)
3534 {
3535 case INTEGER_CST:
e362a897 3536 case STRING_CST:
d7a9512e
JJ
3537 return 0;
3538 case BIT_AND_EXPR:
3539 case BIT_IOR_EXPR:
3540 case BIT_XOR_EXPR:
d60edaba
JJ
3541 if (info->bit_not_p)
3542 {
3543 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3544 ret = 1; /* Fall through below to return
3545 the BIT_NOT_EXPR stmt and then
3546 BIT_{AND,IOR,XOR}_EXPR and anything it
3547 uses. */
3548 else
3549 /* stmt is after this the BIT_NOT_EXPR. */
3550 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3551 }
d7a9512e
JJ
3552 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3553 {
3554 ret += 1 + info->ops[0].bit_not_p;
3555 if (info->ops[1].base_addr)
3556 ret += 1 + info->ops[1].bit_not_p;
3557 return ret + 1;
3558 }
3559 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3560 /* stmt is now the BIT_*_EXPR. */
3561 if (!has_single_use (gimple_assign_rhs1 (stmt)))
127ef369
JJ
3562 ret += 1 + info->ops[info->ops_swapped_p].bit_not_p;
3563 else if (info->ops[info->ops_swapped_p].bit_not_p)
d7a9512e
JJ
3564 {
3565 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3566 if (!has_single_use (gimple_assign_rhs1 (stmt2)))
3567 ++ret;
3568 }
3569 if (info->ops[1].base_addr == NULL_TREE)
127ef369
JJ
3570 {
3571 gcc_checking_assert (!info->ops_swapped_p);
3572 return ret;
3573 }
d7a9512e 3574 if (!has_single_use (gimple_assign_rhs2 (stmt)))
127ef369
JJ
3575 ret += 1 + info->ops[1 - info->ops_swapped_p].bit_not_p;
3576 else if (info->ops[1 - info->ops_swapped_p].bit_not_p)
d7a9512e
JJ
3577 {
3578 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3579 if (!has_single_use (gimple_assign_rhs1 (stmt2)))
3580 ++ret;
3581 }
3582 return ret;
3583 case MEM_REF:
3584 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3585 return 1 + info->ops[0].bit_not_p;
3586 else if (info->ops[0].bit_not_p)
3587 {
3588 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3589 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3590 return 1;
3591 }
3592 return 0;
c94c3532
EB
3593 case BIT_INSERT_EXPR:
3594 return has_single_use (gimple_assign_rhs1 (stmt)) ? 0 : 1;
d7a9512e
JJ
3595 default:
3596 gcc_unreachable ();
3597 }
3598}
3599
f663d9ad 3600/* Split a merged store described by GROUP by populating the SPLIT_STORES
a62b3dc5
JJ
3601 vector (if non-NULL) with split_store structs describing the byte offset
3602 (from the base), the bit size and alignment of each store as well as the
3603 original statements involved in each such split group.
f663d9ad
KT
3604 This is to separate the splitting strategy from the statement
3605 building/emission/linking done in output_merged_store.
a62b3dc5 3606 Return number of new stores.
245f6de1
JJ
3607 If ALLOW_UNALIGNED_STORE is false, then all stores must be aligned.
3608 If ALLOW_UNALIGNED_LOAD is false, then all loads must be aligned.
3afd514b
JJ
3609 BZERO_FIRST may be true only when the first store covers the whole group
3610 and clears it; if BZERO_FIRST is true, keep that first store in the set
3611 unmodified and emit further stores for the overrides only.
a62b3dc5
JJ
3612 If SPLIT_STORES is NULL, it is just a dry run to count number of
3613 new stores. */
f663d9ad 3614
a62b3dc5 3615static unsigned int
245f6de1 3616split_group (merged_store_group *group, bool allow_unaligned_store,
3afd514b 3617 bool allow_unaligned_load, bool bzero_first,
99b1c316 3618 vec<split_store *> *split_stores,
d7a9512e
JJ
3619 unsigned *total_orig,
3620 unsigned *total_new)
f663d9ad 3621{
a62b3dc5
JJ
3622 unsigned HOST_WIDE_INT pos = group->bitregion_start;
3623 unsigned HOST_WIDE_INT size = group->bitregion_end - pos;
f663d9ad 3624 unsigned HOST_WIDE_INT bytepos = pos / BITS_PER_UNIT;
a62b3dc5
JJ
3625 unsigned HOST_WIDE_INT group_align = group->align;
3626 unsigned HOST_WIDE_INT align_base = group->align_base;
245f6de1 3627 unsigned HOST_WIDE_INT group_load_align = group_align;
d7a9512e 3628 bool any_orig = false;
f663d9ad 3629
f663d9ad
KT
3630 gcc_assert ((size % BITS_PER_UNIT == 0) && (pos % BITS_PER_UNIT == 0));
3631
e362a897
EB
3632 /* For bswap framework using sets of stores, all the checking has been done
3633 earlier in try_coalesce_bswap and the result always needs to be emitted
3634 as a single store. Likewise for string concatenation, */
4b84d9b8 3635 if (group->stores[0]->rhs_code == LROTATE_EXPR
e362a897
EB
3636 || group->stores[0]->rhs_code == NOP_EXPR
3637 || group->string_concatenation)
4b84d9b8 3638 {
3afd514b 3639 gcc_assert (!bzero_first);
4b84d9b8
JJ
3640 if (total_orig)
3641 {
3642 /* Avoid the old/new stmt count heuristics. It should be
3643 always beneficial. */
3644 total_new[0] = 1;
3645 total_orig[0] = 2;
3646 }
3647
3648 if (split_stores)
3649 {
3650 unsigned HOST_WIDE_INT align_bitpos
3651 = (group->start - align_base) & (group_align - 1);
3652 unsigned HOST_WIDE_INT align = group_align;
3653 if (align_bitpos)
3654 align = least_bit_hwi (align_bitpos);
3655 bytepos = group->start / BITS_PER_UNIT;
99b1c316 3656 split_store *store
4b84d9b8
JJ
3657 = new split_store (bytepos, group->width, align);
3658 unsigned int first = 0;
3659 find_constituent_stores (group, &store->orig_stores,
3660 &first, group->start, group->width);
3661 split_stores->safe_push (store);
3662 }
3663
3664 return 1;
3665 }
3666
a62b3dc5 3667 unsigned int ret = 0, first = 0;
f663d9ad 3668 unsigned HOST_WIDE_INT try_pos = bytepos;
f663d9ad 3669
d7a9512e
JJ
3670 if (total_orig)
3671 {
3672 unsigned int i;
3673 store_immediate_info *info = group->stores[0];
3674
3675 total_new[0] = 0;
3676 total_orig[0] = 1; /* The orig store. */
3677 info = group->stores[0];
3678 if (info->ops[0].base_addr)
a6fbd154 3679 total_orig[0]++;
d7a9512e 3680 if (info->ops[1].base_addr)
a6fbd154 3681 total_orig[0]++;
d7a9512e
JJ
3682 switch (info->rhs_code)
3683 {
3684 case BIT_AND_EXPR:
3685 case BIT_IOR_EXPR:
3686 case BIT_XOR_EXPR:
3687 total_orig[0]++; /* The orig BIT_*_EXPR stmt. */
3688 break;
3689 default:
3690 break;
3691 }
3692 total_orig[0] *= group->stores.length ();
3693
3694 FOR_EACH_VEC_ELT (group->stores, i, info)
a6fbd154
JJ
3695 {
3696 total_new[0] += count_multiple_uses (info);
3697 total_orig[0] += (info->bit_not_p
3698 + info->ops[0].bit_not_p
3699 + info->ops[1].bit_not_p);
3700 }
d7a9512e
JJ
3701 }
3702
245f6de1
JJ
3703 if (!allow_unaligned_load)
3704 for (int i = 0; i < 2; ++i)
3705 if (group->load_align[i])
3706 group_load_align = MIN (group_load_align, group->load_align[i]);
3707
3afd514b
JJ
3708 if (bzero_first)
3709 {
5384a802
JJ
3710 store_immediate_info *gstore;
3711 FOR_EACH_VEC_ELT (group->stores, first, gstore)
3712 if (!gimple_clobber_p (gstore->stmt))
3713 break;
3714 ++first;
3afd514b
JJ
3715 ret = 1;
3716 if (split_stores)
3717 {
99b1c316 3718 split_store *store
5384a802
JJ
3719 = new split_store (bytepos, gstore->bitsize, align_base);
3720 store->orig_stores.safe_push (gstore);
3afd514b
JJ
3721 store->orig = true;
3722 any_orig = true;
3723 split_stores->safe_push (store);
3724 }
3725 }
3726
f663d9ad
KT
3727 while (size > 0)
3728 {
245f6de1 3729 if ((allow_unaligned_store || group_align <= BITS_PER_UNIT)
3afd514b
JJ
3730 && (group->mask[try_pos - bytepos] == (unsigned char) ~0U
3731 || (bzero_first && group->val[try_pos - bytepos] == 0)))
a62b3dc5
JJ
3732 {
3733 /* Skip padding bytes. */
3734 ++try_pos;
3735 size -= BITS_PER_UNIT;
3736 continue;
3737 }
3738
f663d9ad 3739 unsigned HOST_WIDE_INT try_bitpos = try_pos * BITS_PER_UNIT;
a62b3dc5
JJ
3740 unsigned int try_size = MAX_STORE_BITSIZE, nonmasked;
3741 unsigned HOST_WIDE_INT align_bitpos
3742 = (try_bitpos - align_base) & (group_align - 1);
3743 unsigned HOST_WIDE_INT align = group_align;
5384a802 3744 bool found_orig = false;
a62b3dc5
JJ
3745 if (align_bitpos)
3746 align = least_bit_hwi (align_bitpos);
245f6de1 3747 if (!allow_unaligned_store)
a62b3dc5 3748 try_size = MIN (try_size, align);
245f6de1
JJ
3749 if (!allow_unaligned_load)
3750 {
3751 /* If we can't do or don't want to do unaligned stores
3752 as well as loads, we need to take the loads into account
3753 as well. */
3754 unsigned HOST_WIDE_INT load_align = group_load_align;
3755 align_bitpos = (try_bitpos - align_base) & (load_align - 1);
3756 if (align_bitpos)
3757 load_align = least_bit_hwi (align_bitpos);
3758 for (int i = 0; i < 2; ++i)
3759 if (group->load_align[i])
3760 {
8a91d545
RS
3761 align_bitpos
3762 = known_alignment (try_bitpos
3763 - group->stores[0]->bitpos
3764 + group->stores[0]->ops[i].bitpos
3765 - group->load_align_base[i]);
3766 if (align_bitpos & (group_load_align - 1))
245f6de1
JJ
3767 {
3768 unsigned HOST_WIDE_INT a = least_bit_hwi (align_bitpos);
3769 load_align = MIN (load_align, a);
3770 }
3771 }
3772 try_size = MIN (try_size, load_align);
3773 }
a62b3dc5 3774 store_immediate_info *info
245f6de1 3775 = find_constituent_stores (group, NULL, &first, try_bitpos, try_size);
5384a802 3776 if (info && !gimple_clobber_p (info->stmt))
a62b3dc5
JJ
3777 {
3778 /* If there is just one original statement for the range, see if
3779 we can just reuse the original store which could be even larger
3780 than try_size. */
3781 unsigned HOST_WIDE_INT stmt_end
3782 = ROUND_UP (info->bitpos + info->bitsize, BITS_PER_UNIT);
245f6de1
JJ
3783 info = find_constituent_stores (group, NULL, &first, try_bitpos,
3784 stmt_end - try_bitpos);
a62b3dc5
JJ
3785 if (info && info->bitpos >= try_bitpos)
3786 {
5384a802
JJ
3787 store_immediate_info *info2 = NULL;
3788 unsigned int first_copy = first;
3789 if (info->bitpos > try_bitpos
3790 && stmt_end - try_bitpos <= try_size)
3791 {
3792 info2 = find_constituent_stores (group, NULL, &first_copy,
3793 try_bitpos,
3794 info->bitpos - try_bitpos);
3795 gcc_assert (info2 == NULL || gimple_clobber_p (info2->stmt));
3796 }
3797 if (info2 == NULL && stmt_end - try_bitpos < try_size)
3798 {
3799 info2 = find_constituent_stores (group, NULL, &first_copy,
3800 stmt_end,
3801 (try_bitpos + try_size)
3802 - stmt_end);
3803 gcc_assert (info2 == NULL || gimple_clobber_p (info2->stmt));
3804 }
3805 if (info2 == NULL)
3806 {
3807 try_size = stmt_end - try_bitpos;
3808 found_orig = true;
3809 goto found;
3810 }
a62b3dc5
JJ
3811 }
3812 }
f663d9ad 3813
a62b3dc5
JJ
3814 /* Approximate store bitsize for the case when there are no padding
3815 bits. */
3816 while (try_size > size)
3817 try_size /= 2;
3818 /* Now look for whole padding bytes at the end of that bitsize. */
3819 for (nonmasked = try_size / BITS_PER_UNIT; nonmasked > 0; --nonmasked)
3820 if (group->mask[try_pos - bytepos + nonmasked - 1]
3afd514b
JJ
3821 != (unsigned char) ~0U
3822 && (!bzero_first
3823 || group->val[try_pos - bytepos + nonmasked - 1] != 0))
a62b3dc5 3824 break;
5384a802 3825 if (nonmasked == 0 || (info && gimple_clobber_p (info->stmt)))
a62b3dc5
JJ
3826 {
3827 /* If entire try_size range is padding, skip it. */
3828 try_pos += try_size / BITS_PER_UNIT;
3829 size -= try_size;
3830 continue;
3831 }
3832 /* Otherwise try to decrease try_size if second half, last 3 quarters
3833 etc. are padding. */
3834 nonmasked *= BITS_PER_UNIT;
3835 while (nonmasked <= try_size / 2)
3836 try_size /= 2;
245f6de1 3837 if (!allow_unaligned_store && group_align > BITS_PER_UNIT)
a62b3dc5
JJ
3838 {
3839 /* Now look for whole padding bytes at the start of that bitsize. */
3840 unsigned int try_bytesize = try_size / BITS_PER_UNIT, masked;
3841 for (masked = 0; masked < try_bytesize; ++masked)
3afd514b
JJ
3842 if (group->mask[try_pos - bytepos + masked] != (unsigned char) ~0U
3843 && (!bzero_first
3844 || group->val[try_pos - bytepos + masked] != 0))
a62b3dc5
JJ
3845 break;
3846 masked *= BITS_PER_UNIT;
3847 gcc_assert (masked < try_size);
3848 if (masked >= try_size / 2)
3849 {
3850 while (masked >= try_size / 2)
3851 {
3852 try_size /= 2;
3853 try_pos += try_size / BITS_PER_UNIT;
3854 size -= try_size;
3855 masked -= try_size;
3856 }
3857 /* Need to recompute the alignment, so just retry at the new
3858 position. */
3859 continue;
3860 }
3861 }
3862
3863 found:
3864 ++ret;
f663d9ad 3865
a62b3dc5
JJ
3866 if (split_stores)
3867 {
99b1c316 3868 split_store *store
a62b3dc5 3869 = new split_store (try_pos, try_size, align);
245f6de1
JJ
3870 info = find_constituent_stores (group, &store->orig_stores,
3871 &first, try_bitpos, try_size);
a62b3dc5 3872 if (info
5384a802 3873 && !gimple_clobber_p (info->stmt)
a62b3dc5 3874 && info->bitpos >= try_bitpos
5384a802
JJ
3875 && info->bitpos + info->bitsize <= try_bitpos + try_size
3876 && (store->orig_stores.length () == 1
3877 || found_orig
3878 || (info->bitpos == try_bitpos
3879 && (info->bitpos + info->bitsize
3880 == try_bitpos + try_size))))
d7a9512e
JJ
3881 {
3882 store->orig = true;
3883 any_orig = true;
3884 }
a62b3dc5
JJ
3885 split_stores->safe_push (store);
3886 }
3887
3888 try_pos += try_size / BITS_PER_UNIT;
f663d9ad 3889 size -= try_size;
f663d9ad 3890 }
a62b3dc5 3891
d7a9512e
JJ
3892 if (total_orig)
3893 {
a6fbd154 3894 unsigned int i;
99b1c316 3895 split_store *store;
d7a9512e
JJ
3896 /* If we are reusing some original stores and any of the
3897 original SSA_NAMEs had multiple uses, we need to subtract
3898 those now before we add the new ones. */
3899 if (total_new[0] && any_orig)
3900 {
d7a9512e
JJ
3901 FOR_EACH_VEC_ELT (*split_stores, i, store)
3902 if (store->orig)
3903 total_new[0] -= count_multiple_uses (store->orig_stores[0]);
3904 }
3905 total_new[0] += ret; /* The new store. */
3906 store_immediate_info *info = group->stores[0];
3907 if (info->ops[0].base_addr)
a6fbd154 3908 total_new[0] += ret;
d7a9512e 3909 if (info->ops[1].base_addr)
a6fbd154 3910 total_new[0] += ret;
d7a9512e
JJ
3911 switch (info->rhs_code)
3912 {
3913 case BIT_AND_EXPR:
3914 case BIT_IOR_EXPR:
3915 case BIT_XOR_EXPR:
3916 total_new[0] += ret; /* The new BIT_*_EXPR stmt. */
3917 break;
3918 default:
3919 break;
3920 }
a6fbd154
JJ
3921 FOR_EACH_VEC_ELT (*split_stores, i, store)
3922 {
3923 unsigned int j;
3924 bool bit_not_p[3] = { false, false, false };
3925 /* If all orig_stores have certain bit_not_p set, then
3926 we'd use a BIT_NOT_EXPR stmt and need to account for it.
3927 If some orig_stores have certain bit_not_p set, then
3928 we'd use a BIT_XOR_EXPR with a mask and need to account for
3929 it. */
3930 FOR_EACH_VEC_ELT (store->orig_stores, j, info)
3931 {
3932 if (info->ops[0].bit_not_p)
3933 bit_not_p[0] = true;
3934 if (info->ops[1].bit_not_p)
3935 bit_not_p[1] = true;
3936 if (info->bit_not_p)
3937 bit_not_p[2] = true;
3938 }
3939 total_new[0] += bit_not_p[0] + bit_not_p[1] + bit_not_p[2];
3940 }
3941
d7a9512e
JJ
3942 }
3943
a62b3dc5 3944 return ret;
f663d9ad
KT
3945}
3946
a6fbd154
JJ
3947/* Return the operation through which the operand IDX (if < 2) or
3948 result (IDX == 2) should be inverted. If NOP_EXPR, no inversion
3949 is done, if BIT_NOT_EXPR, all bits are inverted, if BIT_XOR_EXPR,
3950 the bits should be xored with mask. */
3951
3952static enum tree_code
3953invert_op (split_store *split_store, int idx, tree int_type, tree &mask)
3954{
3955 unsigned int i;
3956 store_immediate_info *info;
3957 unsigned int cnt = 0;
e215422f 3958 bool any_paddings = false;
a6fbd154
JJ
3959 FOR_EACH_VEC_ELT (split_store->orig_stores, i, info)
3960 {
3961 bool bit_not_p = idx < 2 ? info->ops[idx].bit_not_p : info->bit_not_p;
3962 if (bit_not_p)
e215422f
JJ
3963 {
3964 ++cnt;
3965 tree lhs = gimple_assign_lhs (info->stmt);
3966 if (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3967 && TYPE_PRECISION (TREE_TYPE (lhs)) < info->bitsize)
3968 any_paddings = true;
3969 }
a6fbd154
JJ
3970 }
3971 mask = NULL_TREE;
3972 if (cnt == 0)
3973 return NOP_EXPR;
e215422f 3974 if (cnt == split_store->orig_stores.length () && !any_paddings)
a6fbd154
JJ
3975 return BIT_NOT_EXPR;
3976
3977 unsigned HOST_WIDE_INT try_bitpos = split_store->bytepos * BITS_PER_UNIT;
3978 unsigned buf_size = split_store->size / BITS_PER_UNIT;
3979 unsigned char *buf
3980 = XALLOCAVEC (unsigned char, buf_size);
3981 memset (buf, ~0U, buf_size);
3982 FOR_EACH_VEC_ELT (split_store->orig_stores, i, info)
3983 {
3984 bool bit_not_p = idx < 2 ? info->ops[idx].bit_not_p : info->bit_not_p;
3985 if (!bit_not_p)
3986 continue;
3987 /* Clear regions with bit_not_p and invert afterwards, rather than
3988 clear regions with !bit_not_p, so that gaps in between stores aren't
3989 set in the mask. */
3990 unsigned HOST_WIDE_INT bitsize = info->bitsize;
e215422f 3991 unsigned HOST_WIDE_INT prec = bitsize;
a6fbd154 3992 unsigned int pos_in_buffer = 0;
e215422f
JJ
3993 if (any_paddings)
3994 {
3995 tree lhs = gimple_assign_lhs (info->stmt);
3996 if (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3997 && TYPE_PRECISION (TREE_TYPE (lhs)) < bitsize)
3998 prec = TYPE_PRECISION (TREE_TYPE (lhs));
3999 }
a6fbd154
JJ
4000 if (info->bitpos < try_bitpos)
4001 {
4002 gcc_assert (info->bitpos + bitsize > try_bitpos);
e215422f
JJ
4003 if (!BYTES_BIG_ENDIAN)
4004 {
4005 if (prec <= try_bitpos - info->bitpos)
4006 continue;
4007 prec -= try_bitpos - info->bitpos;
4008 }
4009 bitsize -= try_bitpos - info->bitpos;
4010 if (BYTES_BIG_ENDIAN && prec > bitsize)
4011 prec = bitsize;
a6fbd154
JJ
4012 }
4013 else
4014 pos_in_buffer = info->bitpos - try_bitpos;
e215422f
JJ
4015 if (prec < bitsize)
4016 {
4017 /* If this is a bool inversion, invert just the least significant
4018 prec bits rather than all bits of it. */
4019 if (BYTES_BIG_ENDIAN)
4020 {
4021 pos_in_buffer += bitsize - prec;
4022 if (pos_in_buffer >= split_store->size)
4023 continue;
4024 }
4025 bitsize = prec;
4026 }
a6fbd154
JJ
4027 if (pos_in_buffer + bitsize > split_store->size)
4028 bitsize = split_store->size - pos_in_buffer;
4029 unsigned char *p = buf + (pos_in_buffer / BITS_PER_UNIT);
4030 if (BYTES_BIG_ENDIAN)
4031 clear_bit_region_be (p, (BITS_PER_UNIT - 1
4032 - (pos_in_buffer % BITS_PER_UNIT)), bitsize);
4033 else
4034 clear_bit_region (p, pos_in_buffer % BITS_PER_UNIT, bitsize);
4035 }
4036 for (unsigned int i = 0; i < buf_size; ++i)
4037 buf[i] = ~buf[i];
4038 mask = native_interpret_expr (int_type, buf, buf_size);
4039 return BIT_XOR_EXPR;
4040}
4041
f663d9ad
KT
4042/* Given a merged store group GROUP output the widened version of it.
4043 The store chain is against the base object BASE.
4044 Try store sizes of at most MAX_STORE_BITSIZE bits wide and don't output
4045 unaligned stores for STRICT_ALIGNMENT targets or if it's too expensive.
4046 Make sure that the number of statements output is less than the number of
4047 original statements. If a better sequence is possible emit it and
4048 return true. */
4049
4050bool
b5926e23 4051imm_store_chain_info::output_merged_store (merged_store_group *group)
f663d9ad 4052{
e362a897 4053 const unsigned HOST_WIDE_INT start_byte_pos
a62b3dc5 4054 = group->bitregion_start / BITS_PER_UNIT;
f663d9ad
KT
4055 unsigned int orig_num_stmts = group->stores.length ();
4056 if (orig_num_stmts < 2)
4057 return false;
4058
245f6de1 4059 bool allow_unaligned_store
028d4092 4060 = !STRICT_ALIGNMENT && param_store_merging_allow_unaligned;
245f6de1 4061 bool allow_unaligned_load = allow_unaligned_store;
3afd514b 4062 bool bzero_first = false;
5384a802
JJ
4063 store_immediate_info *store;
4064 unsigned int num_clobber_stmts = 0;
4065 if (group->stores[0]->rhs_code == INTEGER_CST)
4066 {
e362a897 4067 unsigned int i;
5384a802
JJ
4068 FOR_EACH_VEC_ELT (group->stores, i, store)
4069 if (gimple_clobber_p (store->stmt))
4070 num_clobber_stmts++;
4071 else if (TREE_CODE (gimple_assign_rhs1 (store->stmt)) == CONSTRUCTOR
4072 && CONSTRUCTOR_NELTS (gimple_assign_rhs1 (store->stmt)) == 0
4073 && group->start == store->bitpos
4074 && group->width == store->bitsize
4075 && (group->start % BITS_PER_UNIT) == 0
4076 && (group->width % BITS_PER_UNIT) == 0)
4077 {
4078 bzero_first = true;
4079 break;
4080 }
4081 else
4082 break;
4083 FOR_EACH_VEC_ELT_FROM (group->stores, i, store, i)
4084 if (gimple_clobber_p (store->stmt))
4085 num_clobber_stmts++;
4086 if (num_clobber_stmts == orig_num_stmts)
4087 return false;
4088 orig_num_stmts -= num_clobber_stmts;
4089 }
3afd514b 4090 if (allow_unaligned_store || bzero_first)
a62b3dc5
JJ
4091 {
4092 /* If unaligned stores are allowed, see how many stores we'd emit
4093 for unaligned and how many stores we'd emit for aligned stores.
3afd514b
JJ
4094 Only use unaligned stores if it allows fewer stores than aligned.
4095 Similarly, if there is a whole region clear first, prefer expanding
4096 it together compared to expanding clear first followed by merged
4097 further stores. */
21f65995 4098 unsigned cnt[4] = { ~0U, ~0U, ~0U, ~0U };
3afd514b
JJ
4099 int pass_min = 0;
4100 for (int pass = 0; pass < 4; ++pass)
4101 {
4102 if (!allow_unaligned_store && (pass & 1) != 0)
4103 continue;
4104 if (!bzero_first && (pass & 2) != 0)
4105 continue;
4106 cnt[pass] = split_group (group, (pass & 1) != 0,
4107 allow_unaligned_load, (pass & 2) != 0,
4108 NULL, NULL, NULL);
4109 if (cnt[pass] < cnt[pass_min])
4110 pass_min = pass;
4111 }
4112 if ((pass_min & 1) == 0)
245f6de1 4113 allow_unaligned_store = false;
3afd514b
JJ
4114 if ((pass_min & 2) == 0)
4115 bzero_first = false;
a62b3dc5 4116 }
e362a897
EB
4117
4118 auto_vec<class split_store *, 32> split_stores;
4119 split_store *split_store;
4120 unsigned total_orig, total_new, i;
3afd514b 4121 split_group (group, allow_unaligned_store, allow_unaligned_load, bzero_first,
d7a9512e 4122 &split_stores, &total_orig, &total_new);
a62b3dc5 4123
5384a802
JJ
4124 /* Determine if there is a clobber covering the whole group at the start,
4125 followed by proposed split stores that cover the whole group. In that
4126 case, prefer the transformation even if
4127 split_stores.length () == orig_num_stmts. */
4128 bool clobber_first = false;
4129 if (num_clobber_stmts
4130 && gimple_clobber_p (group->stores[0]->stmt)
4131 && group->start == group->stores[0]->bitpos
4132 && group->width == group->stores[0]->bitsize
4133 && (group->start % BITS_PER_UNIT) == 0
4134 && (group->width % BITS_PER_UNIT) == 0)
4135 {
4136 clobber_first = true;
4137 unsigned HOST_WIDE_INT pos = group->start / BITS_PER_UNIT;
4138 FOR_EACH_VEC_ELT (split_stores, i, split_store)
4139 if (split_store->bytepos != pos)
4140 {
4141 clobber_first = false;
4142 break;
4143 }
4144 else
4145 pos += split_store->size / BITS_PER_UNIT;
4146 if (pos != (group->start + group->width) / BITS_PER_UNIT)
4147 clobber_first = false;
4148 }
4149
4150 if (split_stores.length () >= orig_num_stmts + clobber_first)
a62b3dc5 4151 {
5384a802 4152
a62b3dc5
JJ
4153 /* We didn't manage to reduce the number of statements. Bail out. */
4154 if (dump_file && (dump_flags & TDF_DETAILS))
d7a9512e
JJ
4155 fprintf (dump_file, "Exceeded original number of stmts (%u)."
4156 " Not profitable to emit new sequence.\n",
4157 orig_num_stmts);
dd172744
RB
4158 FOR_EACH_VEC_ELT (split_stores, i, split_store)
4159 delete split_store;
a62b3dc5
JJ
4160 return false;
4161 }
d7a9512e
JJ
4162 if (total_orig <= total_new)
4163 {
4164 /* If number of estimated new statements is above estimated original
4165 statements, bail out too. */
4166 if (dump_file && (dump_flags & TDF_DETAILS))
4167 fprintf (dump_file, "Estimated number of original stmts (%u)"
4168 " not larger than estimated number of new"
4169 " stmts (%u).\n",
4170 total_orig, total_new);
dd172744
RB
4171 FOR_EACH_VEC_ELT (split_stores, i, split_store)
4172 delete split_store;
4b84d9b8 4173 return false;
d7a9512e 4174 }
5384a802
JJ
4175 if (group->stores[0]->rhs_code == INTEGER_CST)
4176 {
4177 bool all_orig = true;
4178 FOR_EACH_VEC_ELT (split_stores, i, split_store)
4179 if (!split_store->orig)
4180 {
4181 all_orig = false;
4182 break;
4183 }
4184 if (all_orig)
4185 {
4186 unsigned int cnt = split_stores.length ();
4187 store_immediate_info *store;
4188 FOR_EACH_VEC_ELT (group->stores, i, store)
4189 if (gimple_clobber_p (store->stmt))
4190 ++cnt;
4191 /* Punt if we wouldn't make any real changes, i.e. keep all
4192 orig stmts + all clobbers. */
4193 if (cnt == group->stores.length ())
4194 {
4195 if (dump_file && (dump_flags & TDF_DETAILS))
4196 fprintf (dump_file, "Exceeded original number of stmts (%u)."
4197 " Not profitable to emit new sequence.\n",
4198 orig_num_stmts);
4199 FOR_EACH_VEC_ELT (split_stores, i, split_store)
4200 delete split_store;
4201 return false;
4202 }
4203 }
4204 }
f663d9ad
KT
4205
4206 gimple_stmt_iterator last_gsi = gsi_for_stmt (group->last_stmt);
4207 gimple_seq seq = NULL;
f663d9ad
KT
4208 tree last_vdef, new_vuse;
4209 last_vdef = gimple_vdef (group->last_stmt);
4210 new_vuse = gimple_vuse (group->last_stmt);
4b84d9b8
JJ
4211 tree bswap_res = NULL_TREE;
4212
5384a802
JJ
4213 /* Clobbers are not removed. */
4214 if (gimple_clobber_p (group->last_stmt))
4215 {
4216 new_vuse = make_ssa_name (gimple_vop (cfun), group->last_stmt);
4217 gimple_set_vdef (group->last_stmt, new_vuse);
4218 }
4219
4b84d9b8
JJ
4220 if (group->stores[0]->rhs_code == LROTATE_EXPR
4221 || group->stores[0]->rhs_code == NOP_EXPR)
4222 {
4223 tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
4224 gimple *ins_stmt = group->stores[0]->ins_stmt;
4225 struct symbolic_number *n = &group->stores[0]->n;
4226 bool bswap = group->stores[0]->rhs_code == LROTATE_EXPR;
4227
4228 switch (n->range)
4229 {
4230 case 16:
4231 load_type = bswap_type = uint16_type_node;
4232 break;
4233 case 32:
4234 load_type = uint32_type_node;
4235 if (bswap)
4236 {
4237 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
4238 bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
4239 }
4240 break;
4241 case 64:
4242 load_type = uint64_type_node;
4243 if (bswap)
4244 {
4245 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
4246 bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
4247 }
4248 break;
4249 default:
4250 gcc_unreachable ();
4251 }
4252
4253 /* If the loads have each vuse of the corresponding store,
4254 we've checked the aliasing already in try_coalesce_bswap and
4255 we want to sink the need load into seq. So need to use new_vuse
4256 on the load. */
30fa8e9c 4257 if (n->base_addr)
4b84d9b8 4258 {
30fa8e9c
JJ
4259 if (n->vuse == NULL)
4260 {
4261 n->vuse = new_vuse;
4262 ins_stmt = NULL;
4263 }
4264 else
4265 /* Update vuse in case it has changed by output_merged_stores. */
4266 n->vuse = gimple_vuse (ins_stmt);
4b84d9b8
JJ
4267 }
4268 bswap_res = bswap_replace (gsi_start (seq), ins_stmt, fndecl,
b320edc0
JJ
4269 bswap_type, load_type, n, bswap,
4270 ~(uint64_t) 0);
4b84d9b8
JJ
4271 gcc_assert (bswap_res);
4272 }
f663d9ad
KT
4273
4274 gimple *stmt = NULL;
245f6de1 4275 auto_vec<gimple *, 32> orig_stmts;
4b84d9b8
JJ
4276 gimple_seq this_seq;
4277 tree addr = force_gimple_operand_1 (unshare_expr (base_addr), &this_seq,
aa55dc0c 4278 is_gimple_mem_ref_addr, NULL_TREE);
4b84d9b8 4279 gimple_seq_add_seq_without_update (&seq, this_seq);
245f6de1
JJ
4280
4281 tree load_addr[2] = { NULL_TREE, NULL_TREE };
4282 gimple_seq load_seq[2] = { NULL, NULL };
4283 gimple_stmt_iterator load_gsi[2] = { gsi_none (), gsi_none () };
4284 for (int j = 0; j < 2; ++j)
4285 {
4286 store_operand_info &op = group->stores[0]->ops[j];
4287 if (op.base_addr == NULL_TREE)
4288 continue;
4289
4290 store_immediate_info *infol = group->stores.last ();
4291 if (gimple_vuse (op.stmt) == gimple_vuse (infol->ops[j].stmt))
4292 {
97031af7
JJ
4293 /* We can't pick the location randomly; while we've verified
4294 all the loads have the same vuse, they can be still in different
4295 basic blocks and we need to pick the one from the last bb:
4296 int x = q[0];
4297 if (x == N) return;
4298 int y = q[1];
4299 p[0] = x;
4300 p[1] = y;
4301 otherwise if we put the wider load at the q[0] load, we might
4302 segfault if q[1] is not mapped. */
4303 basic_block bb = gimple_bb (op.stmt);
4304 gimple *ostmt = op.stmt;
4305 store_immediate_info *info;
4306 FOR_EACH_VEC_ELT (group->stores, i, info)
4307 {
4308 gimple *tstmt = info->ops[j].stmt;
4309 basic_block tbb = gimple_bb (tstmt);
4310 if (dominated_by_p (CDI_DOMINATORS, tbb, bb))
4311 {
4312 ostmt = tstmt;
4313 bb = tbb;
4314 }
4315 }
4316 load_gsi[j] = gsi_for_stmt (ostmt);
245f6de1
JJ
4317 load_addr[j]
4318 = force_gimple_operand_1 (unshare_expr (op.base_addr),
4319 &load_seq[j], is_gimple_mem_ref_addr,
4320 NULL_TREE);
4321 }
4322 else if (operand_equal_p (base_addr, op.base_addr, 0))
4323 load_addr[j] = addr;
4324 else
3e2927a1 4325 {
3e2927a1
JJ
4326 load_addr[j]
4327 = force_gimple_operand_1 (unshare_expr (op.base_addr),
4328 &this_seq, is_gimple_mem_ref_addr,
4329 NULL_TREE);
4330 gimple_seq_add_seq_without_update (&seq, this_seq);
4331 }
245f6de1
JJ
4332 }
4333
f663d9ad
KT
4334 FOR_EACH_VEC_ELT (split_stores, i, split_store)
4335 {
e362a897
EB
4336 const unsigned HOST_WIDE_INT try_size = split_store->size;
4337 const unsigned HOST_WIDE_INT try_pos = split_store->bytepos;
4338 const unsigned HOST_WIDE_INT try_bitpos = try_pos * BITS_PER_UNIT;
4339 const unsigned HOST_WIDE_INT try_align = split_store->align;
4340 const unsigned HOST_WIDE_INT try_offset = try_pos - start_byte_pos;
a62b3dc5
JJ
4341 tree dest, src;
4342 location_t loc;
e362a897 4343
a62b3dc5
JJ
4344 if (split_store->orig)
4345 {
5384a802
JJ
4346 /* If there is just a single non-clobber constituent store
4347 which covers the whole area, just reuse the lhs and rhs. */
4348 gimple *orig_stmt = NULL;
4349 store_immediate_info *store;
4350 unsigned int j;
4351 FOR_EACH_VEC_ELT (split_store->orig_stores, j, store)
4352 if (!gimple_clobber_p (store->stmt))
4353 {
4354 orig_stmt = store->stmt;
4355 break;
4356 }
245f6de1
JJ
4357 dest = gimple_assign_lhs (orig_stmt);
4358 src = gimple_assign_rhs1 (orig_stmt);
4359 loc = gimple_location (orig_stmt);
a62b3dc5
JJ
4360 }
4361 else
4362 {
245f6de1
JJ
4363 store_immediate_info *info;
4364 unsigned short clique, base;
4365 unsigned int k;
4366 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4367 orig_stmts.safe_push (info->stmt);
a62b3dc5 4368 tree offset_type
245f6de1 4369 = get_alias_type_for_stmts (orig_stmts, false, &clique, &base);
e362a897 4370 tree dest_type;
245f6de1
JJ
4371 loc = get_location_for_stmts (orig_stmts);
4372 orig_stmts.truncate (0);
a62b3dc5 4373
e362a897
EB
4374 if (group->string_concatenation)
4375 dest_type
4376 = build_array_type_nelts (char_type_node,
4377 try_size / BITS_PER_UNIT);
4378 else
4379 {
4380 dest_type = build_nonstandard_integer_type (try_size, UNSIGNED);
4381 dest_type = build_aligned_type (dest_type, try_align);
4382 }
4383 dest = fold_build2 (MEM_REF, dest_type, addr,
a62b3dc5 4384 build_int_cst (offset_type, try_pos));
245f6de1
JJ
4385 if (TREE_CODE (dest) == MEM_REF)
4386 {
4387 MR_DEPENDENCE_CLIQUE (dest) = clique;
4388 MR_DEPENDENCE_BASE (dest) = base;
4389 }
4390
c94c3532 4391 tree mask;
e362a897 4392 if (bswap_res || group->string_concatenation)
c94c3532
EB
4393 mask = integer_zero_node;
4394 else
e362a897
EB
4395 mask = native_interpret_expr (dest_type,
4396 group->mask + try_offset,
4b84d9b8 4397 group->buf_size);
245f6de1
JJ
4398
4399 tree ops[2];
4400 for (int j = 0;
4401 j < 1 + (split_store->orig_stores[0]->ops[1].val != NULL_TREE);
4402 ++j)
4403 {
4404 store_operand_info &op = split_store->orig_stores[0]->ops[j];
4b84d9b8
JJ
4405 if (bswap_res)
4406 ops[j] = bswap_res;
e362a897
EB
4407 else if (group->string_concatenation)
4408 {
4409 ops[j] = build_string (try_size / BITS_PER_UNIT,
4410 (const char *) group->val + try_offset);
4411 TREE_TYPE (ops[j]) = dest_type;
4412 }
4b84d9b8 4413 else if (op.base_addr)
245f6de1
JJ
4414 {
4415 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4416 orig_stmts.safe_push (info->ops[j].stmt);
4417
4418 offset_type = get_alias_type_for_stmts (orig_stmts, true,
4419 &clique, &base);
4420 location_t load_loc = get_location_for_stmts (orig_stmts);
4421 orig_stmts.truncate (0);
4422
4423 unsigned HOST_WIDE_INT load_align = group->load_align[j];
4424 unsigned HOST_WIDE_INT align_bitpos
c94c3532 4425 = known_alignment (try_bitpos
8a91d545
RS
4426 - split_store->orig_stores[0]->bitpos
4427 + op.bitpos);
4428 if (align_bitpos & (load_align - 1))
245f6de1
JJ
4429 load_align = least_bit_hwi (align_bitpos);
4430
4431 tree load_int_type
4432 = build_nonstandard_integer_type (try_size, UNSIGNED);
4433 load_int_type
4434 = build_aligned_type (load_int_type, load_align);
4435
8a91d545 4436 poly_uint64 load_pos
c94c3532 4437 = exact_div (try_bitpos
8a91d545
RS
4438 - split_store->orig_stores[0]->bitpos
4439 + op.bitpos,
4440 BITS_PER_UNIT);
245f6de1
JJ
4441 ops[j] = fold_build2 (MEM_REF, load_int_type, load_addr[j],
4442 build_int_cst (offset_type, load_pos));
4443 if (TREE_CODE (ops[j]) == MEM_REF)
4444 {
4445 MR_DEPENDENCE_CLIQUE (ops[j]) = clique;
4446 MR_DEPENDENCE_BASE (ops[j]) = base;
4447 }
4448 if (!integer_zerop (mask))
e9e2bad7
MS
4449 {
4450 /* The load might load some bits (that will be masked
4451 off later on) uninitialized, avoid -W*uninitialized
4452 warnings in that case. */
4453 suppress_warning (ops[j], OPT_Wuninitialized);
4454 }
245f6de1 4455
e362a897 4456 stmt = gimple_build_assign (make_ssa_name (dest_type), ops[j]);
245f6de1
JJ
4457 gimple_set_location (stmt, load_loc);
4458 if (gsi_bb (load_gsi[j]))
4459 {
4460 gimple_set_vuse (stmt, gimple_vuse (op.stmt));
4461 gimple_seq_add_stmt_without_update (&load_seq[j], stmt);
4462 }
4463 else
4464 {
4465 gimple_set_vuse (stmt, new_vuse);
4466 gimple_seq_add_stmt_without_update (&seq, stmt);
4467 }
4468 ops[j] = gimple_assign_lhs (stmt);
a6fbd154
JJ
4469 tree xor_mask;
4470 enum tree_code inv_op
e362a897 4471 = invert_op (split_store, j, dest_type, xor_mask);
a6fbd154 4472 if (inv_op != NOP_EXPR)
383ac8dc 4473 {
e362a897 4474 stmt = gimple_build_assign (make_ssa_name (dest_type),
a6fbd154 4475 inv_op, ops[j], xor_mask);
383ac8dc
JJ
4476 gimple_set_location (stmt, load_loc);
4477 ops[j] = gimple_assign_lhs (stmt);
4478
4479 if (gsi_bb (load_gsi[j]))
4480 gimple_seq_add_stmt_without_update (&load_seq[j],
4481 stmt);
4482 else
4483 gimple_seq_add_stmt_without_update (&seq, stmt);
4484 }
245f6de1
JJ
4485 }
4486 else
e362a897
EB
4487 ops[j] = native_interpret_expr (dest_type,
4488 group->val + try_offset,
245f6de1
JJ
4489 group->buf_size);
4490 }
4491
4492 switch (split_store->orig_stores[0]->rhs_code)
4493 {
4494 case BIT_AND_EXPR:
4495 case BIT_IOR_EXPR:
4496 case BIT_XOR_EXPR:
4497 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4498 {
4499 tree rhs1 = gimple_assign_rhs1 (info->stmt);
4500 orig_stmts.safe_push (SSA_NAME_DEF_STMT (rhs1));
4501 }
4502 location_t bit_loc;
4503 bit_loc = get_location_for_stmts (orig_stmts);
4504 orig_stmts.truncate (0);
4505
4506 stmt
e362a897 4507 = gimple_build_assign (make_ssa_name (dest_type),
245f6de1
JJ
4508 split_store->orig_stores[0]->rhs_code,
4509 ops[0], ops[1]);
4510 gimple_set_location (stmt, bit_loc);
4511 /* If there is just one load and there is a separate
4512 load_seq[0], emit the bitwise op right after it. */
4513 if (load_addr[1] == NULL_TREE && gsi_bb (load_gsi[0]))
4514 gimple_seq_add_stmt_without_update (&load_seq[0], stmt);
4515 /* Otherwise, if at least one load is in seq, we need to
4516 emit the bitwise op right before the store. If there
4517 are two loads and are emitted somewhere else, it would
4518 be better to emit the bitwise op as early as possible;
4519 we don't track where that would be possible right now
4520 though. */
4521 else
4522 gimple_seq_add_stmt_without_update (&seq, stmt);
4523 src = gimple_assign_lhs (stmt);
a6fbd154
JJ
4524 tree xor_mask;
4525 enum tree_code inv_op;
e362a897 4526 inv_op = invert_op (split_store, 2, dest_type, xor_mask);
a6fbd154 4527 if (inv_op != NOP_EXPR)
d60edaba 4528 {
e362a897 4529 stmt = gimple_build_assign (make_ssa_name (dest_type),
a6fbd154 4530 inv_op, src, xor_mask);
d60edaba
JJ
4531 gimple_set_location (stmt, bit_loc);
4532 if (load_addr[1] == NULL_TREE && gsi_bb (load_gsi[0]))
4533 gimple_seq_add_stmt_without_update (&load_seq[0], stmt);
4534 else
4535 gimple_seq_add_stmt_without_update (&seq, stmt);
4536 src = gimple_assign_lhs (stmt);
4537 }
245f6de1 4538 break;
4b84d9b8
JJ
4539 case LROTATE_EXPR:
4540 case NOP_EXPR:
4541 src = ops[0];
4542 if (!is_gimple_val (src))
4543 {
4544 stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (src)),
4545 src);
4546 gimple_seq_add_stmt_without_update (&seq, stmt);
4547 src = gimple_assign_lhs (stmt);
4548 }
e362a897 4549 if (!useless_type_conversion_p (dest_type, TREE_TYPE (src)))
4b84d9b8 4550 {
e362a897 4551 stmt = gimple_build_assign (make_ssa_name (dest_type),
4b84d9b8
JJ
4552 NOP_EXPR, src);
4553 gimple_seq_add_stmt_without_update (&seq, stmt);
4554 src = gimple_assign_lhs (stmt);
4555 }
e362a897 4556 inv_op = invert_op (split_store, 2, dest_type, xor_mask);
be52ac73
JJ
4557 if (inv_op != NOP_EXPR)
4558 {
e362a897 4559 stmt = gimple_build_assign (make_ssa_name (dest_type),
be52ac73
JJ
4560 inv_op, src, xor_mask);
4561 gimple_set_location (stmt, loc);
4562 gimple_seq_add_stmt_without_update (&seq, stmt);
4563 src = gimple_assign_lhs (stmt);
4564 }
4b84d9b8 4565 break;
245f6de1
JJ
4566 default:
4567 src = ops[0];
4568 break;
4569 }
4570
c94c3532
EB
4571 /* If bit insertion is required, we use the source as an accumulator
4572 into which the successive bit-field values are manually inserted.
4573 FIXME: perhaps use BIT_INSERT_EXPR instead in some cases? */
4574 if (group->bit_insertion)
4575 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4576 if (info->rhs_code == BIT_INSERT_EXPR
4577 && info->bitpos < try_bitpos + try_size
4578 && info->bitpos + info->bitsize > try_bitpos)
4579 {
4580 /* Mask, truncate, convert to final type, shift and ior into
4581 the accumulator. Note that every step can be a no-op. */
4582 const HOST_WIDE_INT start_gap = info->bitpos - try_bitpos;
4583 const HOST_WIDE_INT end_gap
4584 = (try_bitpos + try_size) - (info->bitpos + info->bitsize);
4585 tree tem = info->ops[0].val;
ed01d707
EB
4586 if (!INTEGRAL_TYPE_P (TREE_TYPE (tem)))
4587 {
4588 const unsigned HOST_WIDE_INT size
4589 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (tem)));
4590 tree integer_type
4591 = build_nonstandard_integer_type (size, UNSIGNED);
4592 tem = gimple_build (&seq, loc, VIEW_CONVERT_EXPR,
4593 integer_type, tem);
4594 }
c14add82
EB
4595 if (TYPE_PRECISION (TREE_TYPE (tem)) <= info->bitsize)
4596 {
4597 tree bitfield_type
4598 = build_nonstandard_integer_type (info->bitsize,
4599 UNSIGNED);
4600 tem = gimple_convert (&seq, loc, bitfield_type, tem);
4601 }
4602 else if ((BYTES_BIG_ENDIAN ? start_gap : end_gap) > 0)
c94c3532
EB
4603 {
4604 const unsigned HOST_WIDE_INT imask
4605 = (HOST_WIDE_INT_1U << info->bitsize) - 1;
4606 tem = gimple_build (&seq, loc,
4607 BIT_AND_EXPR, TREE_TYPE (tem), tem,
4608 build_int_cst (TREE_TYPE (tem),
4609 imask));
4610 }
4611 const HOST_WIDE_INT shift
4612 = (BYTES_BIG_ENDIAN ? end_gap : start_gap);
4613 if (shift < 0)
4614 tem = gimple_build (&seq, loc,
4615 RSHIFT_EXPR, TREE_TYPE (tem), tem,
4616 build_int_cst (NULL_TREE, -shift));
e362a897 4617 tem = gimple_convert (&seq, loc, dest_type, tem);
c94c3532
EB
4618 if (shift > 0)
4619 tem = gimple_build (&seq, loc,
e362a897 4620 LSHIFT_EXPR, dest_type, tem,
c94c3532
EB
4621 build_int_cst (NULL_TREE, shift));
4622 src = gimple_build (&seq, loc,
e362a897 4623 BIT_IOR_EXPR, dest_type, tem, src);
c94c3532
EB
4624 }
4625
a62b3dc5
JJ
4626 if (!integer_zerop (mask))
4627 {
e362a897 4628 tree tem = make_ssa_name (dest_type);
a62b3dc5
JJ
4629 tree load_src = unshare_expr (dest);
4630 /* The load might load some or all bits uninitialized,
4631 avoid -W*uninitialized warnings in that case.
4632 As optimization, it would be nice if all the bits are
4633 provably uninitialized (no stores at all yet or previous
4634 store a CLOBBER) we'd optimize away the load and replace
4635 it e.g. with 0. */
e9e2bad7 4636 suppress_warning (load_src, OPT_Wuninitialized);
a62b3dc5
JJ
4637 stmt = gimple_build_assign (tem, load_src);
4638 gimple_set_location (stmt, loc);
4639 gimple_set_vuse (stmt, new_vuse);
4640 gimple_seq_add_stmt_without_update (&seq, stmt);
4641
4642 /* FIXME: If there is a single chunk of zero bits in mask,
4643 perhaps use BIT_INSERT_EXPR instead? */
e362a897 4644 stmt = gimple_build_assign (make_ssa_name (dest_type),
a62b3dc5
JJ
4645 BIT_AND_EXPR, tem, mask);
4646 gimple_set_location (stmt, loc);
4647 gimple_seq_add_stmt_without_update (&seq, stmt);
4648 tem = gimple_assign_lhs (stmt);
4649
245f6de1 4650 if (TREE_CODE (src) == INTEGER_CST)
e362a897 4651 src = wide_int_to_tree (dest_type,
245f6de1
JJ
4652 wi::bit_and_not (wi::to_wide (src),
4653 wi::to_wide (mask)));
4654 else
4655 {
4656 tree nmask
e362a897 4657 = wide_int_to_tree (dest_type,
245f6de1 4658 wi::bit_not (wi::to_wide (mask)));
e362a897 4659 stmt = gimple_build_assign (make_ssa_name (dest_type),
245f6de1
JJ
4660 BIT_AND_EXPR, src, nmask);
4661 gimple_set_location (stmt, loc);
4662 gimple_seq_add_stmt_without_update (&seq, stmt);
4663 src = gimple_assign_lhs (stmt);
4664 }
e362a897 4665 stmt = gimple_build_assign (make_ssa_name (dest_type),
a62b3dc5
JJ
4666 BIT_IOR_EXPR, tem, src);
4667 gimple_set_location (stmt, loc);
4668 gimple_seq_add_stmt_without_update (&seq, stmt);
4669 src = gimple_assign_lhs (stmt);
4670 }
4671 }
f663d9ad
KT
4672
4673 stmt = gimple_build_assign (dest, src);
4674 gimple_set_location (stmt, loc);
4675 gimple_set_vuse (stmt, new_vuse);
4676 gimple_seq_add_stmt_without_update (&seq, stmt);
4677
629387a6
EB
4678 if (group->lp_nr && stmt_could_throw_p (cfun, stmt))
4679 add_stmt_to_eh_lp (stmt, group->lp_nr);
4680
f663d9ad
KT
4681 tree new_vdef;
4682 if (i < split_stores.length () - 1)
a62b3dc5 4683 new_vdef = make_ssa_name (gimple_vop (cfun), stmt);
f663d9ad
KT
4684 else
4685 new_vdef = last_vdef;
4686
4687 gimple_set_vdef (stmt, new_vdef);
4688 SSA_NAME_DEF_STMT (new_vdef) = stmt;
4689 new_vuse = new_vdef;
4690 }
4691
4692 FOR_EACH_VEC_ELT (split_stores, i, split_store)
4693 delete split_store;
4694
f663d9ad
KT
4695 gcc_assert (seq);
4696 if (dump_file)
4697 {
4698 fprintf (dump_file,
c94c3532 4699 "New sequence of %u stores to replace old one of %u stores\n",
a62b3dc5 4700 split_stores.length (), orig_num_stmts);
f663d9ad
KT
4701 if (dump_flags & TDF_DETAILS)
4702 print_gimple_seq (dump_file, seq, 0, TDF_VOPS | TDF_MEMSYMS);
4703 }
629387a6 4704
5384a802
JJ
4705 if (gimple_clobber_p (group->last_stmt))
4706 update_stmt (group->last_stmt);
4707
629387a6
EB
4708 if (group->lp_nr > 0)
4709 {
4710 /* We're going to insert a sequence of (potentially) throwing stores
4711 into an active EH region. This means that we're going to create
4712 new basic blocks with EH edges pointing to the post landing pad
4713 and, therefore, to have to update its PHI nodes, if any. For the
4714 virtual PHI node, we're going to use the VDEFs created above, but
4715 for the other nodes, we need to record the original reaching defs. */
4716 eh_landing_pad lp = get_eh_landing_pad_from_number (group->lp_nr);
4717 basic_block lp_bb = label_to_block (cfun, lp->post_landing_pad);
4718 basic_block last_bb = gimple_bb (group->last_stmt);
4719 edge last_edge = find_edge (last_bb, lp_bb);
4720 auto_vec<tree, 16> last_defs;
4721 gphi_iterator gpi;
4722 for (gpi = gsi_start_phis (lp_bb); !gsi_end_p (gpi); gsi_next (&gpi))
4723 {
4724 gphi *phi = gpi.phi ();
4725 tree last_def;
4726 if (virtual_operand_p (gimple_phi_result (phi)))
4727 last_def = NULL_TREE;
4728 else
4729 last_def = gimple_phi_arg_def (phi, last_edge->dest_idx);
4730 last_defs.safe_push (last_def);
4731 }
4732
4733 /* Do the insertion. Then, if new basic blocks have been created in the
4734 process, rewind the chain of VDEFs create above to walk the new basic
4735 blocks and update the corresponding arguments of the PHI nodes. */
4736 update_modified_stmts (seq);
4737 if (gimple_find_sub_bbs (seq, &last_gsi))
4738 while (last_vdef != gimple_vuse (group->last_stmt))
4739 {
4740 gimple *stmt = SSA_NAME_DEF_STMT (last_vdef);
4741 if (stmt_could_throw_p (cfun, stmt))
4742 {
4743 edge new_edge = find_edge (gimple_bb (stmt), lp_bb);
4744 unsigned int i;
4745 for (gpi = gsi_start_phis (lp_bb), i = 0;
4746 !gsi_end_p (gpi);
4747 gsi_next (&gpi), i++)
4748 {
4749 gphi *phi = gpi.phi ();
4750 tree new_def;
4751 if (virtual_operand_p (gimple_phi_result (phi)))
4752 new_def = last_vdef;
4753 else
4754 new_def = last_defs[i];
4755 add_phi_arg (phi, new_def, new_edge, UNKNOWN_LOCATION);
4756 }
4757 }
4758 last_vdef = gimple_vuse (stmt);
4759 }
4760 }
4761 else
4762 gsi_insert_seq_after (&last_gsi, seq, GSI_SAME_STMT);
4763
245f6de1
JJ
4764 for (int j = 0; j < 2; ++j)
4765 if (load_seq[j])
4766 gsi_insert_seq_after (&load_gsi[j], load_seq[j], GSI_SAME_STMT);
f663d9ad
KT
4767
4768 return true;
4769}
4770
4771/* Process the merged_store_group objects created in the coalescing phase.
4772 The stores are all against the base object BASE.
4773 Try to output the widened stores and delete the original statements if
4774 successful. Return true iff any changes were made. */
4775
4776bool
b5926e23 4777imm_store_chain_info::output_merged_stores ()
f663d9ad
KT
4778{
4779 unsigned int i;
4780 merged_store_group *merged_store;
4781 bool ret = false;
4782 FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_store)
4783 {
a95b474a
ML
4784 if (dbg_cnt (store_merging)
4785 && output_merged_store (merged_store))
f663d9ad
KT
4786 {
4787 unsigned int j;
4788 store_immediate_info *store;
4789 FOR_EACH_VEC_ELT (merged_store->stores, j, store)
4790 {
4791 gimple *stmt = store->stmt;
4792 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5384a802
JJ
4793 /* Don't remove clobbers, they are still useful even if
4794 everything is overwritten afterwards. */
4795 if (gimple_clobber_p (stmt))
4796 continue;
f663d9ad 4797 gsi_remove (&gsi, true);
629387a6
EB
4798 if (store->lp_nr)
4799 remove_stmt_from_eh_lp (stmt);
f663d9ad
KT
4800 if (stmt != merged_store->last_stmt)
4801 {
4802 unlink_stmt_vdef (stmt);
4803 release_defs (stmt);
4804 }
4805 }
4806 ret = true;
4807 }
4808 }
4809 if (ret && dump_file)
4810 fprintf (dump_file, "Merging successful!\n");
4811
4812 return ret;
4813}
4814
4815/* Coalesce the store_immediate_info objects recorded against the base object
4816 BASE in the first phase and output them.
4817 Delete the allocated structures.
4818 Return true if any changes were made. */
4819
4820bool
b5926e23 4821imm_store_chain_info::terminate_and_process_chain ()
f663d9ad 4822{
95d94b52
RB
4823 if (dump_file && (dump_flags & TDF_DETAILS))
4824 fprintf (dump_file, "Terminating chain with %u stores\n",
4825 m_store_info.length ());
f663d9ad
KT
4826 /* Process store chain. */
4827 bool ret = false;
4828 if (m_store_info.length () > 1)
4829 {
4830 ret = coalesce_immediate_stores ();
4831 if (ret)
b5926e23 4832 ret = output_merged_stores ();
f663d9ad
KT
4833 }
4834
4835 /* Delete all the entries we allocated ourselves. */
4836 store_immediate_info *info;
4837 unsigned int i;
4838 FOR_EACH_VEC_ELT (m_store_info, i, info)
4839 delete info;
4840
4841 merged_store_group *merged_info;
4842 FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_info)
4843 delete merged_info;
4844
4845 return ret;
4846}
4847
4848/* Return true iff LHS is a destination potentially interesting for
4849 store merging. In practice these are the codes that get_inner_reference
4850 can process. */
4851
4852static bool
4853lhs_valid_for_store_merging_p (tree lhs)
4854{
629387a6 4855 if (DECL_P (lhs))
f663d9ad
KT
4856 return true;
4857
629387a6
EB
4858 switch (TREE_CODE (lhs))
4859 {
4860 case ARRAY_REF:
4861 case ARRAY_RANGE_REF:
4862 case BIT_FIELD_REF:
4863 case COMPONENT_REF:
4864 case MEM_REF:
e362a897 4865 case VIEW_CONVERT_EXPR:
629387a6
EB
4866 return true;
4867 default:
4868 return false;
4869 }
f663d9ad
KT
4870}
4871
4872/* Return true if the tree RHS is a constant we want to consider
4873 during store merging. In practice accept all codes that
4874 native_encode_expr accepts. */
4875
4876static bool
4877rhs_valid_for_store_merging_p (tree rhs)
4878{
cf098191 4879 unsigned HOST_WIDE_INT size;
3afd514b 4880 if (TREE_CODE (rhs) == CONSTRUCTOR
3afd514b
JJ
4881 && CONSTRUCTOR_NELTS (rhs) == 0
4882 && TYPE_SIZE_UNIT (TREE_TYPE (rhs))
4883 && tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (rhs))))
4884 return true;
cf098191
RS
4885 return (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (rhs))).is_constant (&size)
4886 && native_encode_expr (rhs, NULL, size) != 0);
f663d9ad
KT
4887}
4888
629387a6
EB
4889/* Adjust *PBITPOS, *PBITREGION_START and *PBITREGION_END by BYTE_OFF bytes
4890 and return true on success or false on failure. */
4891
4892static bool
4893adjust_bit_pos (poly_offset_int byte_off,
4894 poly_int64 *pbitpos,
4895 poly_uint64 *pbitregion_start,
4896 poly_uint64 *pbitregion_end)
4897{
4898 poly_offset_int bit_off = byte_off << LOG2_BITS_PER_UNIT;
4899 bit_off += *pbitpos;
4900
4901 if (known_ge (bit_off, 0) && bit_off.to_shwi (pbitpos))
4902 {
4903 if (maybe_ne (*pbitregion_end, 0U))
4904 {
4905 bit_off = byte_off << LOG2_BITS_PER_UNIT;
4906 bit_off += *pbitregion_start;
4907 if (bit_off.to_uhwi (pbitregion_start))
4908 {
4909 bit_off = byte_off << LOG2_BITS_PER_UNIT;
4910 bit_off += *pbitregion_end;
4911 if (!bit_off.to_uhwi (pbitregion_end))
4912 *pbitregion_end = 0;
4913 }
4914 else
4915 *pbitregion_end = 0;
4916 }
4917 return true;
4918 }
4919 else
4920 return false;
4921}
4922
245f6de1
JJ
4923/* If MEM is a memory reference usable for store merging (either as
4924 store destination or for loads), return the non-NULL base_addr
4925 and set *PBITSIZE, *PBITPOS, *PBITREGION_START and *PBITREGION_END.
4926 Otherwise return NULL, *PBITPOS should be still valid even for that
4927 case. */
4928
4929static tree
8a91d545
RS
4930mem_valid_for_store_merging (tree mem, poly_uint64 *pbitsize,
4931 poly_uint64 *pbitpos,
4932 poly_uint64 *pbitregion_start,
4933 poly_uint64 *pbitregion_end)
245f6de1 4934{
8a91d545
RS
4935 poly_int64 bitsize, bitpos;
4936 poly_uint64 bitregion_start = 0, bitregion_end = 0;
245f6de1
JJ
4937 machine_mode mode;
4938 int unsignedp = 0, reversep = 0, volatilep = 0;
4939 tree offset;
4940 tree base_addr = get_inner_reference (mem, &bitsize, &bitpos, &offset, &mode,
4941 &unsignedp, &reversep, &volatilep);
4942 *pbitsize = bitsize;
8a91d545 4943 if (known_eq (bitsize, 0))
245f6de1
JJ
4944 return NULL_TREE;
4945
4946 if (TREE_CODE (mem) == COMPONENT_REF
4947 && DECL_BIT_FIELD_TYPE (TREE_OPERAND (mem, 1)))
4948 {
4949 get_bit_range (&bitregion_start, &bitregion_end, mem, &bitpos, &offset);
8a91d545
RS
4950 if (maybe_ne (bitregion_end, 0U))
4951 bitregion_end += 1;
245f6de1
JJ
4952 }
4953
4954 if (reversep)
4955 return NULL_TREE;
4956
4957 /* We do not want to rewrite TARGET_MEM_REFs. */
4958 if (TREE_CODE (base_addr) == TARGET_MEM_REF)
4959 return NULL_TREE;
4960 /* In some cases get_inner_reference may return a
4961 MEM_REF [ptr + byteoffset]. For the purposes of this pass
4962 canonicalize the base_addr to MEM_REF [ptr] and take
4963 byteoffset into account in the bitpos. This occurs in
4964 PR 23684 and this way we can catch more chains. */
4965 else if (TREE_CODE (base_addr) == MEM_REF)
4966 {
629387a6
EB
4967 if (!adjust_bit_pos (mem_ref_offset (base_addr), &bitpos,
4968 &bitregion_start, &bitregion_end))
245f6de1
JJ
4969 return NULL_TREE;
4970 base_addr = TREE_OPERAND (base_addr, 0);
4971 }
4972 /* get_inner_reference returns the base object, get at its
4973 address now. */
4974 else
4975 {
8a91d545 4976 if (maybe_lt (bitpos, 0))
245f6de1
JJ
4977 return NULL_TREE;
4978 base_addr = build_fold_addr_expr (base_addr);
4979 }
4980
629387a6 4981 if (offset)
245f6de1
JJ
4982 {
4983 /* If the access is variable offset then a base decl has to be
4984 address-taken to be able to emit pointer-based stores to it.
4985 ??? We might be able to get away with re-using the original
4986 base up to the first variable part and then wrapping that inside
4987 a BIT_FIELD_REF. */
4988 tree base = get_base_address (base_addr);
629387a6 4989 if (!base || (DECL_P (base) && !TREE_ADDRESSABLE (base)))
245f6de1
JJ
4990 return NULL_TREE;
4991
629387a6
EB
4992 /* Similarly to above for the base, remove constant from the offset. */
4993 if (TREE_CODE (offset) == PLUS_EXPR
4994 && TREE_CODE (TREE_OPERAND (offset, 1)) == INTEGER_CST
4995 && adjust_bit_pos (wi::to_poly_offset (TREE_OPERAND (offset, 1)),
4996 &bitpos, &bitregion_start, &bitregion_end))
4997 offset = TREE_OPERAND (offset, 0);
4998
245f6de1
JJ
4999 base_addr = build2 (POINTER_PLUS_EXPR, TREE_TYPE (base_addr),
5000 base_addr, offset);
5001 }
5002
629387a6
EB
5003 if (known_eq (bitregion_end, 0U))
5004 {
5005 bitregion_start = round_down_to_byte_boundary (bitpos);
5006 bitregion_end = round_up_to_byte_boundary (bitpos + bitsize);
5007 }
5008
245f6de1
JJ
5009 *pbitsize = bitsize;
5010 *pbitpos = bitpos;
5011 *pbitregion_start = bitregion_start;
5012 *pbitregion_end = bitregion_end;
5013 return base_addr;
5014}
5015
5016/* Return true if STMT is a load that can be used for store merging.
5017 In that case fill in *OP. BITSIZE, BITPOS, BITREGION_START and
5018 BITREGION_END are properties of the corresponding store. */
5019
5020static bool
5021handled_load (gimple *stmt, store_operand_info *op,
8a91d545
RS
5022 poly_uint64 bitsize, poly_uint64 bitpos,
5023 poly_uint64 bitregion_start, poly_uint64 bitregion_end)
245f6de1 5024{
383ac8dc 5025 if (!is_gimple_assign (stmt))
245f6de1 5026 return false;
383ac8dc
JJ
5027 if (gimple_assign_rhs_code (stmt) == BIT_NOT_EXPR)
5028 {
5029 tree rhs1 = gimple_assign_rhs1 (stmt);
5030 if (TREE_CODE (rhs1) == SSA_NAME
383ac8dc
JJ
5031 && handled_load (SSA_NAME_DEF_STMT (rhs1), op, bitsize, bitpos,
5032 bitregion_start, bitregion_end))
5033 {
d60edaba
JJ
5034 /* Don't allow _1 = load; _2 = ~1; _3 = ~_2; which should have
5035 been optimized earlier, but if allowed here, would confuse the
5036 multiple uses counting. */
5037 if (op->bit_not_p)
5038 return false;
383ac8dc
JJ
5039 op->bit_not_p = !op->bit_not_p;
5040 return true;
5041 }
5042 return false;
5043 }
5044 if (gimple_vuse (stmt)
5045 && gimple_assign_load_p (stmt)
36bbc05d 5046 && !stmt_can_throw_internal (cfun, stmt)
245f6de1
JJ
5047 && !gimple_has_volatile_ops (stmt))
5048 {
5049 tree mem = gimple_assign_rhs1 (stmt);
5050 op->base_addr
5051 = mem_valid_for_store_merging (mem, &op->bitsize, &op->bitpos,
5052 &op->bitregion_start,
5053 &op->bitregion_end);
5054 if (op->base_addr != NULL_TREE
8a91d545
RS
5055 && known_eq (op->bitsize, bitsize)
5056 && multiple_p (op->bitpos - bitpos, BITS_PER_UNIT)
5057 && known_ge (op->bitpos - op->bitregion_start,
5058 bitpos - bitregion_start)
5059 && known_ge (op->bitregion_end - op->bitpos,
5060 bitregion_end - bitpos))
245f6de1
JJ
5061 {
5062 op->stmt = stmt;
5063 op->val = mem;
383ac8dc 5064 op->bit_not_p = false;
245f6de1
JJ
5065 return true;
5066 }
5067 }
5068 return false;
5069}
5070
629387a6
EB
5071/* Return the index number of the landing pad for STMT, if any. */
5072
5073static int
5074lp_nr_for_store (gimple *stmt)
5075{
5076 if (!cfun->can_throw_non_call_exceptions || !cfun->eh)
5077 return 0;
5078
5079 if (!stmt_could_throw_p (cfun, stmt))
5080 return 0;
5081
5082 return lookup_stmt_eh_lp (stmt);
5083}
5084
245f6de1 5085/* Record the store STMT for store merging optimization if it can be
629387a6 5086 optimized. Return true if any changes were made. */
245f6de1 5087
629387a6 5088bool
245f6de1
JJ
5089pass_store_merging::process_store (gimple *stmt)
5090{
5091 tree lhs = gimple_assign_lhs (stmt);
5092 tree rhs = gimple_assign_rhs1 (stmt);
2c832ffe
SSF
5093 poly_uint64 bitsize, bitpos = 0;
5094 poly_uint64 bitregion_start = 0, bitregion_end = 0;
245f6de1
JJ
5095 tree base_addr
5096 = mem_valid_for_store_merging (lhs, &bitsize, &bitpos,
5097 &bitregion_start, &bitregion_end);
8a91d545 5098 if (known_eq (bitsize, 0U))
629387a6 5099 return false;
245f6de1
JJ
5100
5101 bool invalid = (base_addr == NULL_TREE
8a91d545
RS
5102 || (maybe_gt (bitsize,
5103 (unsigned int) MAX_BITSIZE_MODE_ANY_INT)
3afd514b
JJ
5104 && TREE_CODE (rhs) != INTEGER_CST
5105 && (TREE_CODE (rhs) != CONSTRUCTOR
5106 || CONSTRUCTOR_NELTS (rhs) != 0)));
245f6de1 5107 enum tree_code rhs_code = ERROR_MARK;
d60edaba 5108 bool bit_not_p = false;
4b84d9b8
JJ
5109 struct symbolic_number n;
5110 gimple *ins_stmt = NULL;
245f6de1
JJ
5111 store_operand_info ops[2];
5112 if (invalid)
5113 ;
e362a897
EB
5114 else if (TREE_CODE (rhs) == STRING_CST)
5115 {
5116 rhs_code = STRING_CST;
5117 ops[0].val = rhs;
5118 }
245f6de1
JJ
5119 else if (rhs_valid_for_store_merging_p (rhs))
5120 {
5121 rhs_code = INTEGER_CST;
5122 ops[0].val = rhs;
5123 }
e362a897 5124 else if (TREE_CODE (rhs) == SSA_NAME)
245f6de1
JJ
5125 {
5126 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs), *def_stmt1, *def_stmt2;
5127 if (!is_gimple_assign (def_stmt))
5128 invalid = true;
5129 else if (handled_load (def_stmt, &ops[0], bitsize, bitpos,
5130 bitregion_start, bitregion_end))
5131 rhs_code = MEM_REF;
d60edaba
JJ
5132 else if (gimple_assign_rhs_code (def_stmt) == BIT_NOT_EXPR)
5133 {
5134 tree rhs1 = gimple_assign_rhs1 (def_stmt);
5135 if (TREE_CODE (rhs1) == SSA_NAME
5136 && is_gimple_assign (SSA_NAME_DEF_STMT (rhs1)))
5137 {
5138 bit_not_p = true;
5139 def_stmt = SSA_NAME_DEF_STMT (rhs1);
5140 }
5141 }
c94c3532 5142
d60edaba 5143 if (rhs_code == ERROR_MARK && !invalid)
245f6de1
JJ
5144 switch ((rhs_code = gimple_assign_rhs_code (def_stmt)))
5145 {
5146 case BIT_AND_EXPR:
5147 case BIT_IOR_EXPR:
5148 case BIT_XOR_EXPR:
5149 tree rhs1, rhs2;
5150 rhs1 = gimple_assign_rhs1 (def_stmt);
5151 rhs2 = gimple_assign_rhs2 (def_stmt);
5152 invalid = true;
d7a9512e 5153 if (TREE_CODE (rhs1) != SSA_NAME)
245f6de1
JJ
5154 break;
5155 def_stmt1 = SSA_NAME_DEF_STMT (rhs1);
5156 if (!is_gimple_assign (def_stmt1)
5157 || !handled_load (def_stmt1, &ops[0], bitsize, bitpos,
5158 bitregion_start, bitregion_end))
5159 break;
5160 if (rhs_valid_for_store_merging_p (rhs2))
5161 ops[1].val = rhs2;
d7a9512e 5162 else if (TREE_CODE (rhs2) != SSA_NAME)
245f6de1
JJ
5163 break;
5164 else
5165 {
5166 def_stmt2 = SSA_NAME_DEF_STMT (rhs2);
5167 if (!is_gimple_assign (def_stmt2))
5168 break;
5169 else if (!handled_load (def_stmt2, &ops[1], bitsize, bitpos,
5170 bitregion_start, bitregion_end))
5171 break;
5172 }
5173 invalid = false;
5174 break;
5175 default:
5176 invalid = true;
5177 break;
5178 }
c94c3532 5179
8a91d545
RS
5180 unsigned HOST_WIDE_INT const_bitsize;
5181 if (bitsize.is_constant (&const_bitsize)
c94c3532 5182 && (const_bitsize % BITS_PER_UNIT) == 0
8a91d545 5183 && const_bitsize <= 64
c94c3532 5184 && multiple_p (bitpos, BITS_PER_UNIT))
4b84d9b8
JJ
5185 {
5186 ins_stmt = find_bswap_or_nop_1 (def_stmt, &n, 12);
5187 if (ins_stmt)
5188 {
5189 uint64_t nn = n.n;
5190 for (unsigned HOST_WIDE_INT i = 0;
8a91d545
RS
5191 i < const_bitsize;
5192 i += BITS_PER_UNIT, nn >>= BITS_PER_MARKER)
4b84d9b8
JJ
5193 if ((nn & MARKER_MASK) == 0
5194 || (nn & MARKER_MASK) == MARKER_BYTE_UNKNOWN)
5195 {
5196 ins_stmt = NULL;
5197 break;
5198 }
5199 if (ins_stmt)
5200 {
5201 if (invalid)
5202 {
5203 rhs_code = LROTATE_EXPR;
5204 ops[0].base_addr = NULL_TREE;
5205 ops[1].base_addr = NULL_TREE;
5206 }
5207 invalid = false;
5208 }
5209 }
5210 }
c94c3532
EB
5211
5212 if (invalid
5213 && bitsize.is_constant (&const_bitsize)
5214 && ((const_bitsize % BITS_PER_UNIT) != 0
5215 || !multiple_p (bitpos, BITS_PER_UNIT))
ed01d707 5216 && const_bitsize <= MAX_FIXED_MODE_SIZE)
c94c3532 5217 {
c14add82 5218 /* Bypass a conversion to the bit-field type. */
31a5d8c5
EB
5219 if (!bit_not_p
5220 && is_gimple_assign (def_stmt)
5221 && CONVERT_EXPR_CODE_P (rhs_code))
c94c3532
EB
5222 {
5223 tree rhs1 = gimple_assign_rhs1 (def_stmt);
5224 if (TREE_CODE (rhs1) == SSA_NAME
c14add82 5225 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1)))
c94c3532
EB
5226 rhs = rhs1;
5227 }
5228 rhs_code = BIT_INSERT_EXPR;
31a5d8c5 5229 bit_not_p = false;
c94c3532
EB
5230 ops[0].val = rhs;
5231 ops[0].base_addr = NULL_TREE;
5232 ops[1].base_addr = NULL_TREE;
5233 invalid = false;
5234 }
245f6de1 5235 }
e362a897
EB
5236 else
5237 invalid = true;
245f6de1 5238
8a91d545
RS
5239 unsigned HOST_WIDE_INT const_bitsize, const_bitpos;
5240 unsigned HOST_WIDE_INT const_bitregion_start, const_bitregion_end;
5241 if (invalid
5242 || !bitsize.is_constant (&const_bitsize)
5243 || !bitpos.is_constant (&const_bitpos)
5244 || !bitregion_start.is_constant (&const_bitregion_start)
5245 || !bitregion_end.is_constant (&const_bitregion_end))
629387a6 5246 return terminate_all_aliasing_chains (NULL, stmt);
245f6de1 5247
4b84d9b8
JJ
5248 if (!ins_stmt)
5249 memset (&n, 0, sizeof (n));
5250
99b1c316 5251 class imm_store_chain_info **chain_info = NULL;
629387a6 5252 bool ret = false;
383ac8dc
JJ
5253 if (base_addr)
5254 chain_info = m_stores.get (base_addr);
5255
245f6de1
JJ
5256 store_immediate_info *info;
5257 if (chain_info)
5258 {
5259 unsigned int ord = (*chain_info)->m_store_info.length ();
8a91d545
RS
5260 info = new store_immediate_info (const_bitsize, const_bitpos,
5261 const_bitregion_start,
5262 const_bitregion_end,
5263 stmt, ord, rhs_code, n, ins_stmt,
629387a6
EB
5264 bit_not_p, lp_nr_for_store (stmt),
5265 ops[0], ops[1]);
245f6de1
JJ
5266 if (dump_file && (dump_flags & TDF_DETAILS))
5267 {
5268 fprintf (dump_file, "Recording immediate store from stmt:\n");
5269 print_gimple_stmt (dump_file, stmt, 0);
5270 }
5271 (*chain_info)->m_store_info.safe_push (info);
95d94b52 5272 m_n_stores++;
629387a6 5273 ret |= terminate_all_aliasing_chains (chain_info, stmt);
245f6de1
JJ
5274 /* If we reach the limit of stores to merge in a chain terminate and
5275 process the chain now. */
5276 if ((*chain_info)->m_store_info.length ()
028d4092 5277 == (unsigned int) param_max_stores_to_merge)
245f6de1
JJ
5278 {
5279 if (dump_file && (dump_flags & TDF_DETAILS))
5280 fprintf (dump_file,
5281 "Reached maximum number of statements to merge:\n");
629387a6 5282 ret |= terminate_and_process_chain (*chain_info);
245f6de1 5283 }
245f6de1 5284 }
95d94b52
RB
5285 else
5286 {
5287 /* Store aliases any existing chain? */
5288 ret |= terminate_all_aliasing_chains (NULL, stmt);
245f6de1 5289
95d94b52
RB
5290 /* Start a new chain. */
5291 class imm_store_chain_info *new_chain
5292 = new imm_store_chain_info (m_stores_head, base_addr);
5293 info = new store_immediate_info (const_bitsize, const_bitpos,
5294 const_bitregion_start,
5295 const_bitregion_end,
5296 stmt, 0, rhs_code, n, ins_stmt,
5297 bit_not_p, lp_nr_for_store (stmt),
5298 ops[0], ops[1]);
5299 new_chain->m_store_info.safe_push (info);
5300 m_n_stores++;
5301 m_stores.put (base_addr, new_chain);
5302 m_n_chains++;
5303 if (dump_file && (dump_flags & TDF_DETAILS))
5304 {
5305 fprintf (dump_file, "Starting active chain number %u with statement:\n",
5306 m_n_chains);
5307 print_gimple_stmt (dump_file, stmt, 0);
5308 fprintf (dump_file, "The base object is:\n");
5309 print_generic_expr (dump_file, base_addr);
5310 fprintf (dump_file, "\n");
5311 }
5312 }
5313
5314 /* Prune oldest chains so that after adding the chain or store above
5315 we're again within the limits set by the params. */
5316 if (m_n_chains > (unsigned)param_max_store_chains_to_track
5317 || m_n_stores > (unsigned)param_max_stores_to_track)
245f6de1 5318 {
95d94b52
RB
5319 if (dump_file && (dump_flags & TDF_DETAILS))
5320 fprintf (dump_file, "Too many chains (%u > %d) or stores (%u > %d), "
5321 "terminating oldest chain(s).\n", m_n_chains,
5322 param_max_store_chains_to_track, m_n_stores,
5323 param_max_stores_to_track);
5324 imm_store_chain_info **e = &m_stores_head;
5325 unsigned idx = 0;
5326 unsigned n_stores = 0;
5327 while (*e)
5328 {
5329 if (idx >= (unsigned)param_max_store_chains_to_track
5330 || (n_stores + (*e)->m_store_info.length ()
5331 > (unsigned)param_max_stores_to_track))
8a8eee6b 5332 ret |= terminate_and_process_chain (*e);
95d94b52
RB
5333 else
5334 {
5335 n_stores += (*e)->m_store_info.length ();
5336 e = &(*e)->next;
5337 ++idx;
5338 }
5339 }
245f6de1 5340 }
95d94b52 5341
629387a6
EB
5342 return ret;
5343}
5344
5345/* Return true if STMT is a store valid for store merging. */
5346
5347static bool
5348store_valid_for_store_merging_p (gimple *stmt)
5349{
5350 return gimple_assign_single_p (stmt)
5351 && gimple_vdef (stmt)
5352 && lhs_valid_for_store_merging_p (gimple_assign_lhs (stmt))
5384a802 5353 && (!gimple_has_volatile_ops (stmt) || gimple_clobber_p (stmt));
629387a6
EB
5354}
5355
5356enum basic_block_status { BB_INVALID, BB_VALID, BB_EXTENDED_VALID };
5357
5358/* Return the status of basic block BB wrt store merging. */
5359
5360static enum basic_block_status
5361get_status_for_store_merging (basic_block bb)
5362{
5363 unsigned int num_statements = 0;
a7553ad6 5364 unsigned int num_constructors = 0;
629387a6
EB
5365 gimple_stmt_iterator gsi;
5366 edge e;
5367
5368 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5369 {
5370 gimple *stmt = gsi_stmt (gsi);
5371
5372 if (is_gimple_debug (stmt))
5373 continue;
5374
5375 if (store_valid_for_store_merging_p (stmt) && ++num_statements >= 2)
5376 break;
a7553ad6
JJ
5377
5378 if (is_gimple_assign (stmt)
5379 && gimple_assign_rhs_code (stmt) == CONSTRUCTOR)
5380 {
5381 tree rhs = gimple_assign_rhs1 (stmt);
5382 if (VECTOR_TYPE_P (TREE_TYPE (rhs))
5383 && INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (rhs)))
5384 && gimple_assign_lhs (stmt) != NULL_TREE)
5385 {
5386 HOST_WIDE_INT sz
5387 = int_size_in_bytes (TREE_TYPE (rhs)) * BITS_PER_UNIT;
5388 if (sz == 16 || sz == 32 || sz == 64)
5389 {
5390 num_constructors = 1;
5391 break;
5392 }
5393 }
5394 }
629387a6
EB
5395 }
5396
a7553ad6 5397 if (num_statements == 0 && num_constructors == 0)
629387a6
EB
5398 return BB_INVALID;
5399
5400 if (cfun->can_throw_non_call_exceptions && cfun->eh
5401 && store_valid_for_store_merging_p (gimple_seq_last_stmt (bb_seq (bb)))
5402 && (e = find_fallthru_edge (bb->succs))
5403 && e->dest == bb->next_bb)
5404 return BB_EXTENDED_VALID;
5405
a7553ad6 5406 return (num_statements >= 2 || num_constructors) ? BB_VALID : BB_INVALID;
245f6de1
JJ
5407}
5408
f663d9ad 5409/* Entry point for the pass. Go over each basic block recording chains of
245f6de1
JJ
5410 immediate stores. Upon encountering a terminating statement (as defined
5411 by stmt_terminates_chain_p) process the recorded stores and emit the widened
5412 variants. */
f663d9ad
KT
5413
5414unsigned int
5415pass_store_merging::execute (function *fun)
5416{
5417 basic_block bb;
5418 hash_set<gimple *> orig_stmts;
629387a6
EB
5419 bool changed = false, open_chains = false;
5420
5421 /* If the function can throw and catch non-call exceptions, we'll be trying
5422 to merge stores across different basic blocks so we need to first unsplit
5423 the EH edges in order to streamline the CFG of the function. */
5424 if (cfun->can_throw_non_call_exceptions && cfun->eh)
5425 unsplit_eh_edges ();
f663d9ad 5426
4b84d9b8
JJ
5427 calculate_dominance_info (CDI_DOMINATORS);
5428
f663d9ad
KT
5429 FOR_EACH_BB_FN (bb, fun)
5430 {
629387a6 5431 const basic_block_status bb_status = get_status_for_store_merging (bb);
f663d9ad 5432 gimple_stmt_iterator gsi;
f663d9ad 5433
629387a6
EB
5434 if (open_chains && (bb_status == BB_INVALID || !single_pred_p (bb)))
5435 {
5436 changed |= terminate_and_process_all_chains ();
5437 open_chains = false;
f663d9ad
KT
5438 }
5439
629387a6 5440 if (bb_status == BB_INVALID)
f663d9ad
KT
5441 continue;
5442
5443 if (dump_file && (dump_flags & TDF_DETAILS))
5444 fprintf (dump_file, "Processing basic block <%d>:\n", bb->index);
5445
a7553ad6 5446 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); )
f663d9ad
KT
5447 {
5448 gimple *stmt = gsi_stmt (gsi);
a7553ad6 5449 gsi_next (&gsi);
f663d9ad 5450
50b6d676
AO
5451 if (is_gimple_debug (stmt))
5452 continue;
5453
5384a802 5454 if (gimple_has_volatile_ops (stmt) && !gimple_clobber_p (stmt))
f663d9ad
KT
5455 {
5456 /* Terminate all chains. */
5457 if (dump_file && (dump_flags & TDF_DETAILS))
5458 fprintf (dump_file, "Volatile access terminates "
5459 "all chains\n");
629387a6
EB
5460 changed |= terminate_and_process_all_chains ();
5461 open_chains = false;
f663d9ad
KT
5462 continue;
5463 }
5464
a7553ad6
JJ
5465 if (is_gimple_assign (stmt)
5466 && gimple_assign_rhs_code (stmt) == CONSTRUCTOR
5467 && maybe_optimize_vector_constructor (stmt))
5468 continue;
5469
629387a6
EB
5470 if (store_valid_for_store_merging_p (stmt))
5471 changed |= process_store (stmt);
245f6de1 5472 else
629387a6
EB
5473 changed |= terminate_all_aliasing_chains (NULL, stmt);
5474 }
5475
5476 if (bb_status == BB_EXTENDED_VALID)
5477 open_chains = true;
5478 else
5479 {
5480 changed |= terminate_and_process_all_chains ();
5481 open_chains = false;
f663d9ad 5482 }
f663d9ad 5483 }
629387a6
EB
5484
5485 if (open_chains)
5486 changed |= terminate_and_process_all_chains ();
5487
5488 /* If the function can throw and catch non-call exceptions and something
5489 changed during the pass, then the CFG has (very likely) changed too. */
5490 if (cfun->can_throw_non_call_exceptions && cfun->eh && changed)
5491 {
5492 free_dominance_info (CDI_DOMINATORS);
5493 return TODO_cleanup_cfg;
5494 }
5495
f663d9ad
KT
5496 return 0;
5497}
5498
5499} // anon namespace
5500
5501/* Construct and return a store merging pass object. */
5502
5503gimple_opt_pass *
5504make_pass_store_merging (gcc::context *ctxt)
5505{
5506 return new pass_store_merging (ctxt);
5507}
c22d8787
KT
5508
5509#if CHECKING_P
5510
5511namespace selftest {
5512
5513/* Selftests for store merging helpers. */
5514
5515/* Assert that all elements of the byte arrays X and Y, both of length N
5516 are equal. */
5517
5518static void
5519verify_array_eq (unsigned char *x, unsigned char *y, unsigned int n)
5520{
5521 for (unsigned int i = 0; i < n; i++)
5522 {
5523 if (x[i] != y[i])
5524 {
5525 fprintf (stderr, "Arrays do not match. X:\n");
5526 dump_char_array (stderr, x, n);
5527 fprintf (stderr, "Y:\n");
5528 dump_char_array (stderr, y, n);
5529 }
5530 ASSERT_EQ (x[i], y[i]);
5531 }
5532}
5533
8aba425f 5534/* Test shift_bytes_in_array_left and that it carries bits across between
c22d8787
KT
5535 bytes correctly. */
5536
5537static void
8aba425f 5538verify_shift_bytes_in_array_left (void)
c22d8787
KT
5539{
5540 /* byte 1 | byte 0
5541 00011111 | 11100000. */
5542 unsigned char orig[2] = { 0xe0, 0x1f };
5543 unsigned char in[2];
5544 memcpy (in, orig, sizeof orig);
5545
5546 unsigned char expected[2] = { 0x80, 0x7f };
8aba425f 5547 shift_bytes_in_array_left (in, sizeof (in), 2);
c22d8787
KT
5548 verify_array_eq (in, expected, sizeof (in));
5549
5550 memcpy (in, orig, sizeof orig);
5551 memcpy (expected, orig, sizeof orig);
5552 /* Check that shifting by zero doesn't change anything. */
8aba425f 5553 shift_bytes_in_array_left (in, sizeof (in), 0);
c22d8787
KT
5554 verify_array_eq (in, expected, sizeof (in));
5555
5556}
5557
5558/* Test shift_bytes_in_array_right and that it carries bits across between
5559 bytes correctly. */
5560
5561static void
5562verify_shift_bytes_in_array_right (void)
5563{
5564 /* byte 1 | byte 0
5565 00011111 | 11100000. */
5566 unsigned char orig[2] = { 0x1f, 0xe0};
5567 unsigned char in[2];
5568 memcpy (in, orig, sizeof orig);
5569 unsigned char expected[2] = { 0x07, 0xf8};
5570 shift_bytes_in_array_right (in, sizeof (in), 2);
5571 verify_array_eq (in, expected, sizeof (in));
5572
5573 memcpy (in, orig, sizeof orig);
5574 memcpy (expected, orig, sizeof orig);
5575 /* Check that shifting by zero doesn't change anything. */
5576 shift_bytes_in_array_right (in, sizeof (in), 0);
5577 verify_array_eq (in, expected, sizeof (in));
5578}
5579
5580/* Test clear_bit_region that it clears exactly the bits asked and
5581 nothing more. */
5582
5583static void
5584verify_clear_bit_region (void)
5585{
5586 /* Start with all bits set and test clearing various patterns in them. */
5587 unsigned char orig[3] = { 0xff, 0xff, 0xff};
5588 unsigned char in[3];
5589 unsigned char expected[3];
5590 memcpy (in, orig, sizeof in);
5591
5592 /* Check zeroing out all the bits. */
5593 clear_bit_region (in, 0, 3 * BITS_PER_UNIT);
5594 expected[0] = expected[1] = expected[2] = 0;
5595 verify_array_eq (in, expected, sizeof in);
5596
5597 memcpy (in, orig, sizeof in);
5598 /* Leave the first and last bits intact. */
5599 clear_bit_region (in, 1, 3 * BITS_PER_UNIT - 2);
5600 expected[0] = 0x1;
5601 expected[1] = 0;
5602 expected[2] = 0x80;
5603 verify_array_eq (in, expected, sizeof in);
5604}
5605
5384a802 5606/* Test clear_bit_region_be that it clears exactly the bits asked and
c22d8787
KT
5607 nothing more. */
5608
5609static void
5610verify_clear_bit_region_be (void)
5611{
5612 /* Start with all bits set and test clearing various patterns in them. */
5613 unsigned char orig[3] = { 0xff, 0xff, 0xff};
5614 unsigned char in[3];
5615 unsigned char expected[3];
5616 memcpy (in, orig, sizeof in);
5617
5618 /* Check zeroing out all the bits. */
5619 clear_bit_region_be (in, BITS_PER_UNIT - 1, 3 * BITS_PER_UNIT);
5620 expected[0] = expected[1] = expected[2] = 0;
5621 verify_array_eq (in, expected, sizeof in);
5622
5623 memcpy (in, orig, sizeof in);
5624 /* Leave the first and last bits intact. */
5625 clear_bit_region_be (in, BITS_PER_UNIT - 2, 3 * BITS_PER_UNIT - 2);
5626 expected[0] = 0x80;
5627 expected[1] = 0;
5628 expected[2] = 0x1;
5629 verify_array_eq (in, expected, sizeof in);
5630}
5631
5632
5633/* Run all of the selftests within this file. */
5634
5635void
5636store_merging_c_tests (void)
5637{
8aba425f 5638 verify_shift_bytes_in_array_left ();
c22d8787
KT
5639 verify_shift_bytes_in_array_right ();
5640 verify_clear_bit_region ();
5641 verify_clear_bit_region_be ();
5642}
5643
5644} // namespace selftest
5645#endif /* CHECKING_P. */