]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/gimple-ssa-store-merging.c
forwprop: Pattern recognize more rotates [PR94344]
[thirdparty/gcc.git] / gcc / gimple-ssa-store-merging.c
CommitLineData
dffec8eb 1/* GIMPLE store merging and byte swapping passes.
8d9254fc 2 Copyright (C) 2009-2020 Free Software Foundation, Inc.
f663d9ad
KT
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
c94c3532
EB
21/* The purpose of the store merging pass is to combine multiple memory stores
22 of constant values, values loaded from memory, bitwise operations on those,
23 or bit-field values, to consecutive locations, into fewer wider stores.
24
f663d9ad
KT
25 For example, if we have a sequence peforming four byte stores to
26 consecutive memory locations:
27 [p ] := imm1;
28 [p + 1B] := imm2;
29 [p + 2B] := imm3;
30 [p + 3B] := imm4;
31 we can transform this into a single 4-byte store if the target supports it:
c94c3532 32 [p] := imm1:imm2:imm3:imm4 concatenated according to endianness.
f663d9ad 33
245f6de1
JJ
34 Or:
35 [p ] := [q ];
36 [p + 1B] := [q + 1B];
37 [p + 2B] := [q + 2B];
38 [p + 3B] := [q + 3B];
39 if there is no overlap can be transformed into a single 4-byte
40 load followed by single 4-byte store.
41
42 Or:
43 [p ] := [q ] ^ imm1;
44 [p + 1B] := [q + 1B] ^ imm2;
45 [p + 2B] := [q + 2B] ^ imm3;
46 [p + 3B] := [q + 3B] ^ imm4;
47 if there is no overlap can be transformed into a single 4-byte
48 load, xored with imm1:imm2:imm3:imm4 and stored using a single 4-byte store.
49
c94c3532
EB
50 Or:
51 [p:1 ] := imm;
52 [p:31] := val & 0x7FFFFFFF;
53 we can transform this into a single 4-byte store if the target supports it:
54 [p] := imm:(val & 0x7FFFFFFF) concatenated according to endianness.
55
f663d9ad
KT
56 The algorithm is applied to each basic block in three phases:
57
c94c3532
EB
58 1) Scan through the basic block and record assignments to destinations
59 that can be expressed as a store to memory of a certain size at a certain
60 bit offset from base expressions we can handle. For bit-fields we also
61 record the surrounding bit region, i.e. bits that could be stored in
245f6de1
JJ
62 a read-modify-write operation when storing the bit-field. Record store
63 chains to different bases in a hash_map (m_stores) and make sure to
700d4cb0 64 terminate such chains when appropriate (for example when the stored
245f6de1 65 values get used subsequently).
f663d9ad
KT
66 These stores can be a result of structure element initializers, array stores
67 etc. A store_immediate_info object is recorded for every such store.
68 Record as many such assignments to a single base as possible until a
69 statement that interferes with the store sequence is encountered.
c94c3532
EB
70 Each store has up to 2 operands, which can be a either constant, a memory
71 load or an SSA name, from which the value to be stored can be computed.
245f6de1
JJ
72 At most one of the operands can be a constant. The operands are recorded
73 in store_operand_info struct.
f663d9ad 74
c94c3532 75 2) Analyze the chains of stores recorded in phase 1) (i.e. the vector of
f663d9ad 76 store_immediate_info objects) and coalesce contiguous stores into
c94c3532 77 merged_store_group objects. For bit-field stores, we don't need to
245f6de1
JJ
78 require the stores to be contiguous, just their surrounding bit regions
79 have to be contiguous. If the expression being stored is different
80 between adjacent stores, such as one store storing a constant and
81 following storing a value loaded from memory, or if the loaded memory
82 objects are not adjacent, a new merged_store_group is created as well.
f663d9ad
KT
83
84 For example, given the stores:
85 [p ] := 0;
86 [p + 1B] := 1;
87 [p + 3B] := 0;
88 [p + 4B] := 1;
89 [p + 5B] := 0;
90 [p + 6B] := 0;
91 This phase would produce two merged_store_group objects, one recording the
92 two bytes stored in the memory region [p : p + 1] and another
93 recording the four bytes stored in the memory region [p + 3 : p + 6].
94
95 3) The merged_store_group objects produced in phase 2) are processed
96 to generate the sequence of wider stores that set the contiguous memory
97 regions to the sequence of bytes that correspond to it. This may emit
98 multiple stores per store group to handle contiguous stores that are not
99 of a size that is a power of 2. For example it can try to emit a 40-bit
100 store as a 32-bit store followed by an 8-bit store.
c94c3532
EB
101 We try to emit as wide stores as we can while respecting STRICT_ALIGNMENT
102 or TARGET_SLOW_UNALIGNED_ACCESS settings.
f663d9ad
KT
103
104 Note on endianness and example:
105 Consider 2 contiguous 16-bit stores followed by 2 contiguous 8-bit stores:
106 [p ] := 0x1234;
107 [p + 2B] := 0x5678;
108 [p + 4B] := 0xab;
109 [p + 5B] := 0xcd;
110
111 The memory layout for little-endian (LE) and big-endian (BE) must be:
112 p |LE|BE|
113 ---------
114 0 |34|12|
115 1 |12|34|
116 2 |78|56|
117 3 |56|78|
118 4 |ab|ab|
119 5 |cd|cd|
120
121 To merge these into a single 48-bit merged value 'val' in phase 2)
122 on little-endian we insert stores to higher (consecutive) bitpositions
123 into the most significant bits of the merged value.
124 The final merged value would be: 0xcdab56781234
125
126 For big-endian we insert stores to higher bitpositions into the least
127 significant bits of the merged value.
128 The final merged value would be: 0x12345678abcd
129
130 Then, in phase 3), we want to emit this 48-bit value as a 32-bit store
131 followed by a 16-bit store. Again, we must consider endianness when
132 breaking down the 48-bit value 'val' computed above.
133 For little endian we emit:
134 [p] (32-bit) := 0x56781234; // val & 0x0000ffffffff;
135 [p + 4B] (16-bit) := 0xcdab; // (val & 0xffff00000000) >> 32;
136
137 Whereas for big-endian we emit:
138 [p] (32-bit) := 0x12345678; // (val & 0xffffffff0000) >> 16;
139 [p + 4B] (16-bit) := 0xabcd; // val & 0x00000000ffff; */
140
141#include "config.h"
142#include "system.h"
143#include "coretypes.h"
144#include "backend.h"
145#include "tree.h"
146#include "gimple.h"
147#include "builtins.h"
148#include "fold-const.h"
149#include "tree-pass.h"
150#include "ssa.h"
151#include "gimple-pretty-print.h"
152#include "alias.h"
153#include "fold-const.h"
f663d9ad
KT
154#include "print-tree.h"
155#include "tree-hash-traits.h"
156#include "gimple-iterator.h"
157#include "gimplify.h"
c94c3532 158#include "gimple-fold.h"
f663d9ad
KT
159#include "stor-layout.h"
160#include "timevar.h"
629387a6
EB
161#include "cfganal.h"
162#include "cfgcleanup.h"
f663d9ad 163#include "tree-cfg.h"
629387a6 164#include "except.h"
f663d9ad
KT
165#include "tree-eh.h"
166#include "target.h"
aa55dc0c 167#include "gimplify-me.h"
a62b3dc5
JJ
168#include "rtl.h"
169#include "expr.h" /* For get_bit_range. */
dffec8eb 170#include "optabs-tree.h"
a95b474a 171#include "dbgcnt.h"
c22d8787 172#include "selftest.h"
f663d9ad
KT
173
174/* The maximum size (in bits) of the stores this pass should generate. */
175#define MAX_STORE_BITSIZE (BITS_PER_WORD)
176#define MAX_STORE_BYTES (MAX_STORE_BITSIZE / BITS_PER_UNIT)
177
245f6de1
JJ
178/* Limit to bound the number of aliasing checks for loads with the same
179 vuse as the corresponding store. */
180#define MAX_STORE_ALIAS_CHECKS 64
181
f663d9ad
KT
182namespace {
183
bebadeca 184struct bswap_stat
dffec8eb
JJ
185{
186 /* Number of hand-written 16-bit nop / bswaps found. */
187 int found_16bit;
188
189 /* Number of hand-written 32-bit nop / bswaps found. */
190 int found_32bit;
191
192 /* Number of hand-written 64-bit nop / bswaps found. */
193 int found_64bit;
194} nop_stats, bswap_stats;
195
196/* A symbolic number structure is used to detect byte permutation and selection
197 patterns of a source. To achieve that, its field N contains an artificial
198 number consisting of BITS_PER_MARKER sized markers tracking where does each
199 byte come from in the source:
200
201 0 - target byte has the value 0
202 FF - target byte has an unknown value (eg. due to sign extension)
203 1..size - marker value is the byte index in the source (0 for lsb).
204
205 To detect permutations on memory sources (arrays and structures), a symbolic
206 number is also associated:
207 - a base address BASE_ADDR and an OFFSET giving the address of the source;
208 - a range which gives the difference between the highest and lowest accessed
209 memory location to make such a symbolic number;
210 - the address SRC of the source element of lowest address as a convenience
211 to easily get BASE_ADDR + offset + lowest bytepos;
212 - number of expressions N_OPS bitwise ored together to represent
213 approximate cost of the computation.
214
215 Note 1: the range is different from size as size reflects the size of the
216 type of the current expression. For instance, for an array char a[],
217 (short) a[0] | (short) a[3] would have a size of 2 but a range of 4 while
218 (short) a[0] | ((short) a[0] << 1) would still have a size of 2 but this
219 time a range of 1.
220
221 Note 2: for non-memory sources, range holds the same value as size.
222
223 Note 3: SRC points to the SSA_NAME in case of non-memory source. */
224
225struct symbolic_number {
226 uint64_t n;
227 tree type;
228 tree base_addr;
229 tree offset;
4a022c70 230 poly_int64_pod bytepos;
dffec8eb
JJ
231 tree src;
232 tree alias_set;
233 tree vuse;
234 unsigned HOST_WIDE_INT range;
235 int n_ops;
236};
237
238#define BITS_PER_MARKER 8
239#define MARKER_MASK ((1 << BITS_PER_MARKER) - 1)
240#define MARKER_BYTE_UNKNOWN MARKER_MASK
241#define HEAD_MARKER(n, size) \
242 ((n) & ((uint64_t) MARKER_MASK << (((size) - 1) * BITS_PER_MARKER)))
243
244/* The number which the find_bswap_or_nop_1 result should match in
245 order to have a nop. The number is masked according to the size of
246 the symbolic number before using it. */
247#define CMPNOP (sizeof (int64_t) < 8 ? 0 : \
248 (uint64_t)0x08070605 << 32 | 0x04030201)
249
250/* The number which the find_bswap_or_nop_1 result should match in
251 order to have a byte swap. The number is masked according to the
252 size of the symbolic number before using it. */
253#define CMPXCHG (sizeof (int64_t) < 8 ? 0 : \
254 (uint64_t)0x01020304 << 32 | 0x05060708)
255
256/* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
257 number N. Return false if the requested operation is not permitted
258 on a symbolic number. */
259
260inline bool
261do_shift_rotate (enum tree_code code,
262 struct symbolic_number *n,
263 int count)
264{
265 int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
266 unsigned head_marker;
267
444cda74
JJ
268 if (count < 0
269 || count >= TYPE_PRECISION (n->type)
270 || count % BITS_PER_UNIT != 0)
dffec8eb
JJ
271 return false;
272 count = (count / BITS_PER_UNIT) * BITS_PER_MARKER;
273
274 /* Zero out the extra bits of N in order to avoid them being shifted
275 into the significant bits. */
276 if (size < 64 / BITS_PER_MARKER)
277 n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
278
279 switch (code)
280 {
281 case LSHIFT_EXPR:
282 n->n <<= count;
283 break;
284 case RSHIFT_EXPR:
285 head_marker = HEAD_MARKER (n->n, size);
286 n->n >>= count;
287 /* Arithmetic shift of signed type: result is dependent on the value. */
288 if (!TYPE_UNSIGNED (n->type) && head_marker)
289 for (i = 0; i < count / BITS_PER_MARKER; i++)
290 n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
291 << ((size - 1 - i) * BITS_PER_MARKER);
292 break;
293 case LROTATE_EXPR:
294 n->n = (n->n << count) | (n->n >> ((size * BITS_PER_MARKER) - count));
295 break;
296 case RROTATE_EXPR:
297 n->n = (n->n >> count) | (n->n << ((size * BITS_PER_MARKER) - count));
298 break;
299 default:
300 return false;
301 }
302 /* Zero unused bits for size. */
303 if (size < 64 / BITS_PER_MARKER)
304 n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
305 return true;
306}
307
308/* Perform sanity checking for the symbolic number N and the gimple
309 statement STMT. */
310
311inline bool
312verify_symbolic_number_p (struct symbolic_number *n, gimple *stmt)
313{
314 tree lhs_type;
315
316 lhs_type = gimple_expr_type (stmt);
317
318 if (TREE_CODE (lhs_type) != INTEGER_TYPE)
319 return false;
320
321 if (TYPE_PRECISION (lhs_type) != TYPE_PRECISION (n->type))
322 return false;
323
324 return true;
325}
326
327/* Initialize the symbolic number N for the bswap pass from the base element
328 SRC manipulated by the bitwise OR expression. */
329
330bool
331init_symbolic_number (struct symbolic_number *n, tree src)
332{
333 int size;
334
335 if (! INTEGRAL_TYPE_P (TREE_TYPE (src)))
336 return false;
337
338 n->base_addr = n->offset = n->alias_set = n->vuse = NULL_TREE;
339 n->src = src;
340
341 /* Set up the symbolic number N by setting each byte to a value between 1 and
342 the byte size of rhs1. The highest order byte is set to n->size and the
343 lowest order byte to 1. */
344 n->type = TREE_TYPE (src);
345 size = TYPE_PRECISION (n->type);
346 if (size % BITS_PER_UNIT != 0)
347 return false;
348 size /= BITS_PER_UNIT;
349 if (size > 64 / BITS_PER_MARKER)
350 return false;
351 n->range = size;
352 n->n = CMPNOP;
353 n->n_ops = 1;
354
355 if (size < 64 / BITS_PER_MARKER)
356 n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
357
358 return true;
359}
360
361/* Check if STMT might be a byte swap or a nop from a memory source and returns
362 the answer. If so, REF is that memory source and the base of the memory area
363 accessed and the offset of the access from that base are recorded in N. */
364
365bool
366find_bswap_or_nop_load (gimple *stmt, tree ref, struct symbolic_number *n)
367{
368 /* Leaf node is an array or component ref. Memorize its base and
369 offset from base to compare to other such leaf node. */
f37fac2b 370 poly_int64 bitsize, bitpos, bytepos;
dffec8eb
JJ
371 machine_mode mode;
372 int unsignedp, reversep, volatilep;
373 tree offset, base_addr;
374
375 /* Not prepared to handle PDP endian. */
376 if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
377 return false;
378
379 if (!gimple_assign_load_p (stmt) || gimple_has_volatile_ops (stmt))
380 return false;
381
382 base_addr = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
383 &unsignedp, &reversep, &volatilep);
384
4b84d9b8
JJ
385 if (TREE_CODE (base_addr) == TARGET_MEM_REF)
386 /* Do not rewrite TARGET_MEM_REF. */
387 return false;
388 else if (TREE_CODE (base_addr) == MEM_REF)
dffec8eb 389 {
3fed2ce9 390 poly_offset_int bit_offset = 0;
dffec8eb
JJ
391 tree off = TREE_OPERAND (base_addr, 1);
392
393 if (!integer_zerop (off))
394 {
3fed2ce9
RS
395 poly_offset_int boff = mem_ref_offset (base_addr);
396 boff <<= LOG2_BITS_PER_UNIT;
dffec8eb
JJ
397 bit_offset += boff;
398 }
399
400 base_addr = TREE_OPERAND (base_addr, 0);
401
402 /* Avoid returning a negative bitpos as this may wreak havoc later. */
3fed2ce9 403 if (maybe_lt (bit_offset, 0))
dffec8eb 404 {
3fed2ce9
RS
405 tree byte_offset = wide_int_to_tree
406 (sizetype, bits_to_bytes_round_down (bit_offset));
407 bit_offset = num_trailing_bits (bit_offset);
dffec8eb 408 if (offset)
3fed2ce9 409 offset = size_binop (PLUS_EXPR, offset, byte_offset);
dffec8eb 410 else
3fed2ce9 411 offset = byte_offset;
dffec8eb
JJ
412 }
413
3fed2ce9 414 bitpos += bit_offset.force_shwi ();
dffec8eb 415 }
4b84d9b8
JJ
416 else
417 base_addr = build_fold_addr_expr (base_addr);
dffec8eb 418
f37fac2b 419 if (!multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
dffec8eb 420 return false;
f37fac2b 421 if (!multiple_p (bitsize, BITS_PER_UNIT))
dffec8eb
JJ
422 return false;
423 if (reversep)
424 return false;
425
426 if (!init_symbolic_number (n, ref))
427 return false;
428 n->base_addr = base_addr;
429 n->offset = offset;
f37fac2b 430 n->bytepos = bytepos;
dffec8eb
JJ
431 n->alias_set = reference_alias_ptr_type (ref);
432 n->vuse = gimple_vuse (stmt);
433 return true;
434}
435
436/* Compute the symbolic number N representing the result of a bitwise OR on 2
437 symbolic number N1 and N2 whose source statements are respectively
438 SOURCE_STMT1 and SOURCE_STMT2. */
439
440gimple *
441perform_symbolic_merge (gimple *source_stmt1, struct symbolic_number *n1,
442 gimple *source_stmt2, struct symbolic_number *n2,
443 struct symbolic_number *n)
444{
445 int i, size;
446 uint64_t mask;
447 gimple *source_stmt;
448 struct symbolic_number *n_start;
449
450 tree rhs1 = gimple_assign_rhs1 (source_stmt1);
451 if (TREE_CODE (rhs1) == BIT_FIELD_REF
452 && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
453 rhs1 = TREE_OPERAND (rhs1, 0);
454 tree rhs2 = gimple_assign_rhs1 (source_stmt2);
455 if (TREE_CODE (rhs2) == BIT_FIELD_REF
456 && TREE_CODE (TREE_OPERAND (rhs2, 0)) == SSA_NAME)
457 rhs2 = TREE_OPERAND (rhs2, 0);
458
459 /* Sources are different, cancel bswap if they are not memory location with
460 the same base (array, structure, ...). */
461 if (rhs1 != rhs2)
462 {
463 uint64_t inc;
4a022c70 464 HOST_WIDE_INT start1, start2, start_sub, end_sub, end1, end2, end;
dffec8eb
JJ
465 struct symbolic_number *toinc_n_ptr, *n_end;
466 basic_block bb1, bb2;
467
468 if (!n1->base_addr || !n2->base_addr
469 || !operand_equal_p (n1->base_addr, n2->base_addr, 0))
470 return NULL;
471
472 if (!n1->offset != !n2->offset
473 || (n1->offset && !operand_equal_p (n1->offset, n2->offset, 0)))
474 return NULL;
475
4a022c70
RS
476 start1 = 0;
477 if (!(n2->bytepos - n1->bytepos).is_constant (&start2))
478 return NULL;
479
480 if (start1 < start2)
dffec8eb
JJ
481 {
482 n_start = n1;
4a022c70 483 start_sub = start2 - start1;
dffec8eb
JJ
484 }
485 else
486 {
487 n_start = n2;
4a022c70 488 start_sub = start1 - start2;
dffec8eb
JJ
489 }
490
491 bb1 = gimple_bb (source_stmt1);
492 bb2 = gimple_bb (source_stmt2);
493 if (dominated_by_p (CDI_DOMINATORS, bb1, bb2))
494 source_stmt = source_stmt1;
495 else
496 source_stmt = source_stmt2;
497
498 /* Find the highest address at which a load is performed and
499 compute related info. */
4a022c70
RS
500 end1 = start1 + (n1->range - 1);
501 end2 = start2 + (n2->range - 1);
dffec8eb
JJ
502 if (end1 < end2)
503 {
504 end = end2;
505 end_sub = end2 - end1;
506 }
507 else
508 {
509 end = end1;
510 end_sub = end1 - end2;
511 }
512 n_end = (end2 > end1) ? n2 : n1;
513
514 /* Find symbolic number whose lsb is the most significant. */
515 if (BYTES_BIG_ENDIAN)
516 toinc_n_ptr = (n_end == n1) ? n2 : n1;
517 else
518 toinc_n_ptr = (n_start == n1) ? n2 : n1;
519
4a022c70 520 n->range = end - MIN (start1, start2) + 1;
dffec8eb
JJ
521
522 /* Check that the range of memory covered can be represented by
523 a symbolic number. */
524 if (n->range > 64 / BITS_PER_MARKER)
525 return NULL;
526
527 /* Reinterpret byte marks in symbolic number holding the value of
528 bigger weight according to target endianness. */
529 inc = BYTES_BIG_ENDIAN ? end_sub : start_sub;
530 size = TYPE_PRECISION (n1->type) / BITS_PER_UNIT;
531 for (i = 0; i < size; i++, inc <<= BITS_PER_MARKER)
532 {
533 unsigned marker
534 = (toinc_n_ptr->n >> (i * BITS_PER_MARKER)) & MARKER_MASK;
535 if (marker && marker != MARKER_BYTE_UNKNOWN)
536 toinc_n_ptr->n += inc;
537 }
538 }
539 else
540 {
541 n->range = n1->range;
542 n_start = n1;
543 source_stmt = source_stmt1;
544 }
545
546 if (!n1->alias_set
547 || alias_ptr_types_compatible_p (n1->alias_set, n2->alias_set))
548 n->alias_set = n1->alias_set;
549 else
550 n->alias_set = ptr_type_node;
551 n->vuse = n_start->vuse;
552 n->base_addr = n_start->base_addr;
553 n->offset = n_start->offset;
554 n->src = n_start->src;
555 n->bytepos = n_start->bytepos;
556 n->type = n_start->type;
557 size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
558
559 for (i = 0, mask = MARKER_MASK; i < size; i++, mask <<= BITS_PER_MARKER)
560 {
561 uint64_t masked1, masked2;
562
563 masked1 = n1->n & mask;
564 masked2 = n2->n & mask;
565 if (masked1 && masked2 && masked1 != masked2)
566 return NULL;
567 }
568 n->n = n1->n | n2->n;
569 n->n_ops = n1->n_ops + n2->n_ops;
570
571 return source_stmt;
572}
573
574/* find_bswap_or_nop_1 invokes itself recursively with N and tries to perform
575 the operation given by the rhs of STMT on the result. If the operation
576 could successfully be executed the function returns a gimple stmt whose
577 rhs's first tree is the expression of the source operand and NULL
578 otherwise. */
579
580gimple *
581find_bswap_or_nop_1 (gimple *stmt, struct symbolic_number *n, int limit)
582{
583 enum tree_code code;
584 tree rhs1, rhs2 = NULL;
585 gimple *rhs1_stmt, *rhs2_stmt, *source_stmt1;
586 enum gimple_rhs_class rhs_class;
587
588 if (!limit || !is_gimple_assign (stmt))
589 return NULL;
590
591 rhs1 = gimple_assign_rhs1 (stmt);
592
593 if (find_bswap_or_nop_load (stmt, rhs1, n))
594 return stmt;
595
596 /* Handle BIT_FIELD_REF. */
597 if (TREE_CODE (rhs1) == BIT_FIELD_REF
598 && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
599 {
600 unsigned HOST_WIDE_INT bitsize = tree_to_uhwi (TREE_OPERAND (rhs1, 1));
601 unsigned HOST_WIDE_INT bitpos = tree_to_uhwi (TREE_OPERAND (rhs1, 2));
602 if (bitpos % BITS_PER_UNIT == 0
603 && bitsize % BITS_PER_UNIT == 0
604 && init_symbolic_number (n, TREE_OPERAND (rhs1, 0)))
605 {
606 /* Handle big-endian bit numbering in BIT_FIELD_REF. */
607 if (BYTES_BIG_ENDIAN)
608 bitpos = TYPE_PRECISION (n->type) - bitpos - bitsize;
609
610 /* Shift. */
611 if (!do_shift_rotate (RSHIFT_EXPR, n, bitpos))
612 return NULL;
613
614 /* Mask. */
615 uint64_t mask = 0;
616 uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
617 for (unsigned i = 0; i < bitsize / BITS_PER_UNIT;
618 i++, tmp <<= BITS_PER_UNIT)
619 mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
620 n->n &= mask;
621
622 /* Convert. */
623 n->type = TREE_TYPE (rhs1);
624 if (!n->base_addr)
625 n->range = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
626
627 return verify_symbolic_number_p (n, stmt) ? stmt : NULL;
628 }
629
630 return NULL;
631 }
632
633 if (TREE_CODE (rhs1) != SSA_NAME)
634 return NULL;
635
636 code = gimple_assign_rhs_code (stmt);
637 rhs_class = gimple_assign_rhs_class (stmt);
638 rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
639
640 if (rhs_class == GIMPLE_BINARY_RHS)
641 rhs2 = gimple_assign_rhs2 (stmt);
642
643 /* Handle unary rhs and binary rhs with integer constants as second
644 operand. */
645
646 if (rhs_class == GIMPLE_UNARY_RHS
647 || (rhs_class == GIMPLE_BINARY_RHS
648 && TREE_CODE (rhs2) == INTEGER_CST))
649 {
650 if (code != BIT_AND_EXPR
651 && code != LSHIFT_EXPR
652 && code != RSHIFT_EXPR
653 && code != LROTATE_EXPR
654 && code != RROTATE_EXPR
655 && !CONVERT_EXPR_CODE_P (code))
656 return NULL;
657
658 source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, n, limit - 1);
659
660 /* If find_bswap_or_nop_1 returned NULL, STMT is a leaf node and
661 we have to initialize the symbolic number. */
662 if (!source_stmt1)
663 {
664 if (gimple_assign_load_p (stmt)
665 || !init_symbolic_number (n, rhs1))
666 return NULL;
667 source_stmt1 = stmt;
668 }
669
670 switch (code)
671 {
672 case BIT_AND_EXPR:
673 {
674 int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
675 uint64_t val = int_cst_value (rhs2), mask = 0;
676 uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
677
678 /* Only constants masking full bytes are allowed. */
679 for (i = 0; i < size; i++, tmp <<= BITS_PER_UNIT)
680 if ((val & tmp) != 0 && (val & tmp) != tmp)
681 return NULL;
682 else if (val & tmp)
683 mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
684
685 n->n &= mask;
686 }
687 break;
688 case LSHIFT_EXPR:
689 case RSHIFT_EXPR:
690 case LROTATE_EXPR:
691 case RROTATE_EXPR:
692 if (!do_shift_rotate (code, n, (int) TREE_INT_CST_LOW (rhs2)))
693 return NULL;
694 break;
695 CASE_CONVERT:
696 {
697 int i, type_size, old_type_size;
698 tree type;
699
700 type = gimple_expr_type (stmt);
701 type_size = TYPE_PRECISION (type);
702 if (type_size % BITS_PER_UNIT != 0)
703 return NULL;
704 type_size /= BITS_PER_UNIT;
705 if (type_size > 64 / BITS_PER_MARKER)
706 return NULL;
707
708 /* Sign extension: result is dependent on the value. */
709 old_type_size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
710 if (!TYPE_UNSIGNED (n->type) && type_size > old_type_size
711 && HEAD_MARKER (n->n, old_type_size))
712 for (i = 0; i < type_size - old_type_size; i++)
713 n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
714 << ((type_size - 1 - i) * BITS_PER_MARKER);
715
716 if (type_size < 64 / BITS_PER_MARKER)
717 {
718 /* If STMT casts to a smaller type mask out the bits not
719 belonging to the target type. */
720 n->n &= ((uint64_t) 1 << (type_size * BITS_PER_MARKER)) - 1;
721 }
722 n->type = type;
723 if (!n->base_addr)
724 n->range = type_size;
725 }
726 break;
727 default:
728 return NULL;
729 };
730 return verify_symbolic_number_p (n, stmt) ? source_stmt1 : NULL;
731 }
732
733 /* Handle binary rhs. */
734
735 if (rhs_class == GIMPLE_BINARY_RHS)
736 {
737 struct symbolic_number n1, n2;
738 gimple *source_stmt, *source_stmt2;
739
740 if (code != BIT_IOR_EXPR)
741 return NULL;
742
743 if (TREE_CODE (rhs2) != SSA_NAME)
744 return NULL;
745
746 rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
747
748 switch (code)
749 {
750 case BIT_IOR_EXPR:
751 source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, &n1, limit - 1);
752
753 if (!source_stmt1)
754 return NULL;
755
756 source_stmt2 = find_bswap_or_nop_1 (rhs2_stmt, &n2, limit - 1);
757
758 if (!source_stmt2)
759 return NULL;
760
761 if (TYPE_PRECISION (n1.type) != TYPE_PRECISION (n2.type))
762 return NULL;
763
4b84d9b8 764 if (n1.vuse != n2.vuse)
dffec8eb
JJ
765 return NULL;
766
767 source_stmt
768 = perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, n);
769
770 if (!source_stmt)
771 return NULL;
772
773 if (!verify_symbolic_number_p (n, stmt))
774 return NULL;
775
776 break;
777 default:
778 return NULL;
779 }
780 return source_stmt;
781 }
782 return NULL;
783}
784
4b84d9b8
JJ
785/* Helper for find_bswap_or_nop and try_coalesce_bswap to compute
786 *CMPXCHG, *CMPNOP and adjust *N. */
dffec8eb 787
4b84d9b8
JJ
788void
789find_bswap_or_nop_finalize (struct symbolic_number *n, uint64_t *cmpxchg,
790 uint64_t *cmpnop)
dffec8eb
JJ
791{
792 unsigned rsize;
793 uint64_t tmpn, mask;
dffec8eb 794
4b84d9b8
JJ
795 /* The number which the find_bswap_or_nop_1 result should match in order
796 to have a full byte swap. The number is shifted to the right
797 according to the size of the symbolic number before using it. */
798 *cmpxchg = CMPXCHG;
799 *cmpnop = CMPNOP;
dffec8eb
JJ
800
801 /* Find real size of result (highest non-zero byte). */
802 if (n->base_addr)
803 for (tmpn = n->n, rsize = 0; tmpn; tmpn >>= BITS_PER_MARKER, rsize++);
804 else
805 rsize = n->range;
806
807 /* Zero out the bits corresponding to untouched bytes in original gimple
808 expression. */
809 if (n->range < (int) sizeof (int64_t))
810 {
811 mask = ((uint64_t) 1 << (n->range * BITS_PER_MARKER)) - 1;
4b84d9b8
JJ
812 *cmpxchg >>= (64 / BITS_PER_MARKER - n->range) * BITS_PER_MARKER;
813 *cmpnop &= mask;
dffec8eb
JJ
814 }
815
816 /* Zero out the bits corresponding to unused bytes in the result of the
817 gimple expression. */
818 if (rsize < n->range)
819 {
820 if (BYTES_BIG_ENDIAN)
821 {
822 mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
4b84d9b8
JJ
823 *cmpxchg &= mask;
824 *cmpnop >>= (n->range - rsize) * BITS_PER_MARKER;
dffec8eb
JJ
825 }
826 else
827 {
828 mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
4b84d9b8
JJ
829 *cmpxchg >>= (n->range - rsize) * BITS_PER_MARKER;
830 *cmpnop &= mask;
dffec8eb
JJ
831 }
832 n->range = rsize;
833 }
834
4b84d9b8
JJ
835 n->range *= BITS_PER_UNIT;
836}
837
838/* Check if STMT completes a bswap implementation or a read in a given
839 endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP
840 accordingly. It also sets N to represent the kind of operations
841 performed: size of the resulting expression and whether it works on
842 a memory source, and if so alias-set and vuse. At last, the
843 function returns a stmt whose rhs's first tree is the source
844 expression. */
845
846gimple *
847find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap)
848{
849 /* The last parameter determines the depth search limit. It usually
850 correlates directly to the number n of bytes to be touched. We
0f507a36 851 increase that number by 2 * (log2(n) + 1) here in order to also
4b84d9b8
JJ
852 cover signed -> unsigned conversions of the src operand as can be seen
853 in libgcc, and for initial shift/and operation of the src operand. */
854 int limit = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt)));
0f507a36 855 limit += 2 * (1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit));
4b84d9b8
JJ
856 gimple *ins_stmt = find_bswap_or_nop_1 (stmt, n, limit);
857
858 if (!ins_stmt)
859 return NULL;
860
861 uint64_t cmpxchg, cmpnop;
862 find_bswap_or_nop_finalize (n, &cmpxchg, &cmpnop);
863
dffec8eb
JJ
864 /* A complete byte swap should make the symbolic number to start with
865 the largest digit in the highest order byte. Unchanged symbolic
866 number indicates a read with same endianness as target architecture. */
867 if (n->n == cmpnop)
868 *bswap = false;
869 else if (n->n == cmpxchg)
870 *bswap = true;
871 else
872 return NULL;
873
874 /* Useless bit manipulation performed by code. */
875 if (!n->base_addr && n->n == cmpnop && n->n_ops == 1)
876 return NULL;
877
dffec8eb
JJ
878 return ins_stmt;
879}
880
881const pass_data pass_data_optimize_bswap =
882{
883 GIMPLE_PASS, /* type */
884 "bswap", /* name */
885 OPTGROUP_NONE, /* optinfo_flags */
886 TV_NONE, /* tv_id */
887 PROP_ssa, /* properties_required */
888 0, /* properties_provided */
889 0, /* properties_destroyed */
890 0, /* todo_flags_start */
891 0, /* todo_flags_finish */
892};
893
894class pass_optimize_bswap : public gimple_opt_pass
895{
896public:
897 pass_optimize_bswap (gcc::context *ctxt)
898 : gimple_opt_pass (pass_data_optimize_bswap, ctxt)
899 {}
900
901 /* opt_pass methods: */
902 virtual bool gate (function *)
903 {
904 return flag_expensive_optimizations && optimize && BITS_PER_UNIT == 8;
905 }
906
907 virtual unsigned int execute (function *);
908
909}; // class pass_optimize_bswap
910
911/* Perform the bswap optimization: replace the expression computed in the rhs
4b84d9b8
JJ
912 of gsi_stmt (GSI) (or if NULL add instead of replace) by an equivalent
913 bswap, load or load + bswap expression.
dffec8eb
JJ
914 Which of these alternatives replace the rhs is given by N->base_addr (non
915 null if a load is needed) and BSWAP. The type, VUSE and set-alias of the
916 load to perform are also given in N while the builtin bswap invoke is given
4b84d9b8
JJ
917 in FNDEL. Finally, if a load is involved, INS_STMT refers to one of the
918 load statements involved to construct the rhs in gsi_stmt (GSI) and
919 N->range gives the size of the rhs expression for maintaining some
920 statistics.
dffec8eb 921
4b84d9b8
JJ
922 Note that if the replacement involve a load and if gsi_stmt (GSI) is
923 non-NULL, that stmt is moved just after INS_STMT to do the load with the
924 same VUSE which can lead to gsi_stmt (GSI) changing of basic block. */
dffec8eb 925
4b84d9b8
JJ
926tree
927bswap_replace (gimple_stmt_iterator gsi, gimple *ins_stmt, tree fndecl,
dffec8eb
JJ
928 tree bswap_type, tree load_type, struct symbolic_number *n,
929 bool bswap)
930{
4b84d9b8 931 tree src, tmp, tgt = NULL_TREE;
dffec8eb
JJ
932 gimple *bswap_stmt;
933
4b84d9b8 934 gimple *cur_stmt = gsi_stmt (gsi);
dffec8eb 935 src = n->src;
4b84d9b8
JJ
936 if (cur_stmt)
937 tgt = gimple_assign_lhs (cur_stmt);
dffec8eb
JJ
938
939 /* Need to load the value from memory first. */
940 if (n->base_addr)
941 {
4b84d9b8
JJ
942 gimple_stmt_iterator gsi_ins = gsi;
943 if (ins_stmt)
944 gsi_ins = gsi_for_stmt (ins_stmt);
dffec8eb
JJ
945 tree addr_expr, addr_tmp, val_expr, val_tmp;
946 tree load_offset_ptr, aligned_load_type;
4b84d9b8
JJ
947 gimple *load_stmt;
948 unsigned align = get_object_alignment (src);
4a022c70 949 poly_int64 load_offset = 0;
dffec8eb 950
4b84d9b8
JJ
951 if (cur_stmt)
952 {
953 basic_block ins_bb = gimple_bb (ins_stmt);
954 basic_block cur_bb = gimple_bb (cur_stmt);
955 if (!dominated_by_p (CDI_DOMINATORS, cur_bb, ins_bb))
956 return NULL_TREE;
957
958 /* Move cur_stmt just before one of the load of the original
959 to ensure it has the same VUSE. See PR61517 for what could
960 go wrong. */
961 if (gimple_bb (cur_stmt) != gimple_bb (ins_stmt))
962 reset_flow_sensitive_info (gimple_assign_lhs (cur_stmt));
963 gsi_move_before (&gsi, &gsi_ins);
964 gsi = gsi_for_stmt (cur_stmt);
965 }
966 else
967 gsi = gsi_ins;
dffec8eb
JJ
968
969 /* Compute address to load from and cast according to the size
970 of the load. */
4b84d9b8 971 addr_expr = build_fold_addr_expr (src);
dffec8eb 972 if (is_gimple_mem_ref_addr (addr_expr))
4b84d9b8 973 addr_tmp = unshare_expr (addr_expr);
dffec8eb
JJ
974 else
975 {
4b84d9b8
JJ
976 addr_tmp = unshare_expr (n->base_addr);
977 if (!is_gimple_mem_ref_addr (addr_tmp))
978 addr_tmp = force_gimple_operand_gsi_1 (&gsi, addr_tmp,
979 is_gimple_mem_ref_addr,
980 NULL_TREE, true,
981 GSI_SAME_STMT);
982 load_offset = n->bytepos;
983 if (n->offset)
984 {
985 tree off
986 = force_gimple_operand_gsi (&gsi, unshare_expr (n->offset),
987 true, NULL_TREE, true,
988 GSI_SAME_STMT);
989 gimple *stmt
990 = gimple_build_assign (make_ssa_name (TREE_TYPE (addr_tmp)),
991 POINTER_PLUS_EXPR, addr_tmp, off);
992 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
993 addr_tmp = gimple_assign_lhs (stmt);
994 }
dffec8eb
JJ
995 }
996
997 /* Perform the load. */
998 aligned_load_type = load_type;
999 if (align < TYPE_ALIGN (load_type))
1000 aligned_load_type = build_aligned_type (load_type, align);
1001 load_offset_ptr = build_int_cst (n->alias_set, load_offset);
1002 val_expr = fold_build2 (MEM_REF, aligned_load_type, addr_tmp,
1003 load_offset_ptr);
1004
1005 if (!bswap)
1006 {
1007 if (n->range == 16)
1008 nop_stats.found_16bit++;
1009 else if (n->range == 32)
1010 nop_stats.found_32bit++;
1011 else
1012 {
1013 gcc_assert (n->range == 64);
1014 nop_stats.found_64bit++;
1015 }
1016
1017 /* Convert the result of load if necessary. */
4b84d9b8 1018 if (tgt && !useless_type_conversion_p (TREE_TYPE (tgt), load_type))
dffec8eb
JJ
1019 {
1020 val_tmp = make_temp_ssa_name (aligned_load_type, NULL,
1021 "load_dst");
1022 load_stmt = gimple_build_assign (val_tmp, val_expr);
1023 gimple_set_vuse (load_stmt, n->vuse);
1024 gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
1025 gimple_assign_set_rhs_with_ops (&gsi, NOP_EXPR, val_tmp);
4b84d9b8 1026 update_stmt (cur_stmt);
dffec8eb 1027 }
4b84d9b8 1028 else if (cur_stmt)
dffec8eb
JJ
1029 {
1030 gimple_assign_set_rhs_with_ops (&gsi, MEM_REF, val_expr);
1031 gimple_set_vuse (cur_stmt, n->vuse);
4b84d9b8
JJ
1032 update_stmt (cur_stmt);
1033 }
1034 else
1035 {
1036 tgt = make_ssa_name (load_type);
1037 cur_stmt = gimple_build_assign (tgt, MEM_REF, val_expr);
1038 gimple_set_vuse (cur_stmt, n->vuse);
1039 gsi_insert_before (&gsi, cur_stmt, GSI_SAME_STMT);
dffec8eb 1040 }
dffec8eb
JJ
1041
1042 if (dump_file)
1043 {
1044 fprintf (dump_file,
1045 "%d bit load in target endianness found at: ",
1046 (int) n->range);
1047 print_gimple_stmt (dump_file, cur_stmt, 0);
1048 }
4b84d9b8 1049 return tgt;
dffec8eb
JJ
1050 }
1051 else
1052 {
1053 val_tmp = make_temp_ssa_name (aligned_load_type, NULL, "load_dst");
1054 load_stmt = gimple_build_assign (val_tmp, val_expr);
1055 gimple_set_vuse (load_stmt, n->vuse);
1056 gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
1057 }
1058 src = val_tmp;
1059 }
1060 else if (!bswap)
1061 {
4b84d9b8
JJ
1062 gimple *g = NULL;
1063 if (tgt && !useless_type_conversion_p (TREE_TYPE (tgt), TREE_TYPE (src)))
dffec8eb
JJ
1064 {
1065 if (!is_gimple_val (src))
4b84d9b8 1066 return NULL_TREE;
dffec8eb
JJ
1067 g = gimple_build_assign (tgt, NOP_EXPR, src);
1068 }
4b84d9b8 1069 else if (cur_stmt)
dffec8eb 1070 g = gimple_build_assign (tgt, src);
4b84d9b8
JJ
1071 else
1072 tgt = src;
dffec8eb
JJ
1073 if (n->range == 16)
1074 nop_stats.found_16bit++;
1075 else if (n->range == 32)
1076 nop_stats.found_32bit++;
1077 else
1078 {
1079 gcc_assert (n->range == 64);
1080 nop_stats.found_64bit++;
1081 }
1082 if (dump_file)
1083 {
1084 fprintf (dump_file,
1085 "%d bit reshuffle in target endianness found at: ",
1086 (int) n->range);
4b84d9b8
JJ
1087 if (cur_stmt)
1088 print_gimple_stmt (dump_file, cur_stmt, 0);
1089 else
1090 {
4af78ef8 1091 print_generic_expr (dump_file, tgt, TDF_NONE);
4b84d9b8
JJ
1092 fprintf (dump_file, "\n");
1093 }
dffec8eb 1094 }
4b84d9b8
JJ
1095 if (cur_stmt)
1096 gsi_replace (&gsi, g, true);
1097 return tgt;
dffec8eb
JJ
1098 }
1099 else if (TREE_CODE (src) == BIT_FIELD_REF)
1100 src = TREE_OPERAND (src, 0);
1101
1102 if (n->range == 16)
1103 bswap_stats.found_16bit++;
1104 else if (n->range == 32)
1105 bswap_stats.found_32bit++;
1106 else
1107 {
1108 gcc_assert (n->range == 64);
1109 bswap_stats.found_64bit++;
1110 }
1111
1112 tmp = src;
1113
1114 /* Convert the src expression if necessary. */
1115 if (!useless_type_conversion_p (TREE_TYPE (tmp), bswap_type))
1116 {
1117 gimple *convert_stmt;
1118
1119 tmp = make_temp_ssa_name (bswap_type, NULL, "bswapsrc");
1120 convert_stmt = gimple_build_assign (tmp, NOP_EXPR, src);
1121 gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT);
1122 }
1123
1124 /* Canonical form for 16 bit bswap is a rotate expression. Only 16bit values
1125 are considered as rotation of 2N bit values by N bits is generally not
1126 equivalent to a bswap. Consider for instance 0x01020304 r>> 16 which
1127 gives 0x03040102 while a bswap for that value is 0x04030201. */
1128 if (bswap && n->range == 16)
1129 {
1130 tree count = build_int_cst (NULL, BITS_PER_UNIT);
1131 src = fold_build2 (LROTATE_EXPR, bswap_type, tmp, count);
1132 bswap_stmt = gimple_build_assign (NULL, src);
1133 }
1134 else
1135 bswap_stmt = gimple_build_call (fndecl, 1, tmp);
1136
4b84d9b8
JJ
1137 if (tgt == NULL_TREE)
1138 tgt = make_ssa_name (bswap_type);
dffec8eb
JJ
1139 tmp = tgt;
1140
1141 /* Convert the result if necessary. */
1142 if (!useless_type_conversion_p (TREE_TYPE (tgt), bswap_type))
1143 {
1144 gimple *convert_stmt;
1145
1146 tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
1147 convert_stmt = gimple_build_assign (tgt, NOP_EXPR, tmp);
1148 gsi_insert_after (&gsi, convert_stmt, GSI_SAME_STMT);
1149 }
1150
1151 gimple_set_lhs (bswap_stmt, tmp);
1152
1153 if (dump_file)
1154 {
1155 fprintf (dump_file, "%d bit bswap implementation found at: ",
1156 (int) n->range);
4b84d9b8
JJ
1157 if (cur_stmt)
1158 print_gimple_stmt (dump_file, cur_stmt, 0);
1159 else
1160 {
4af78ef8 1161 print_generic_expr (dump_file, tgt, TDF_NONE);
4b84d9b8
JJ
1162 fprintf (dump_file, "\n");
1163 }
dffec8eb
JJ
1164 }
1165
4b84d9b8
JJ
1166 if (cur_stmt)
1167 {
1168 gsi_insert_after (&gsi, bswap_stmt, GSI_SAME_STMT);
1169 gsi_remove (&gsi, true);
1170 }
1171 else
1172 gsi_insert_before (&gsi, bswap_stmt, GSI_SAME_STMT);
1173 return tgt;
dffec8eb
JJ
1174}
1175
1176/* Find manual byte swap implementations as well as load in a given
1177 endianness. Byte swaps are turned into a bswap builtin invokation
1178 while endian loads are converted to bswap builtin invokation or
1179 simple load according to the target endianness. */
1180
1181unsigned int
1182pass_optimize_bswap::execute (function *fun)
1183{
1184 basic_block bb;
1185 bool bswap32_p, bswap64_p;
1186 bool changed = false;
1187 tree bswap32_type = NULL_TREE, bswap64_type = NULL_TREE;
1188
1189 bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
1190 && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing);
1191 bswap64_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
1192 && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
1193 || (bswap32_p && word_mode == SImode)));
1194
1195 /* Determine the argument type of the builtins. The code later on
1196 assumes that the return and argument type are the same. */
1197 if (bswap32_p)
1198 {
1199 tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1200 bswap32_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1201 }
1202
1203 if (bswap64_p)
1204 {
1205 tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1206 bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1207 }
1208
1209 memset (&nop_stats, 0, sizeof (nop_stats));
1210 memset (&bswap_stats, 0, sizeof (bswap_stats));
1211 calculate_dominance_info (CDI_DOMINATORS);
1212
1213 FOR_EACH_BB_FN (bb, fun)
1214 {
1215 gimple_stmt_iterator gsi;
1216
1217 /* We do a reverse scan for bswap patterns to make sure we get the
1218 widest match. As bswap pattern matching doesn't handle previously
1219 inserted smaller bswap replacements as sub-patterns, the wider
1220 variant wouldn't be detected. */
1221 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi);)
1222 {
1223 gimple *ins_stmt, *cur_stmt = gsi_stmt (gsi);
1224 tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
1225 enum tree_code code;
1226 struct symbolic_number n;
1227 bool bswap;
1228
1229 /* This gsi_prev (&gsi) is not part of the for loop because cur_stmt
1230 might be moved to a different basic block by bswap_replace and gsi
1231 must not points to it if that's the case. Moving the gsi_prev
1232 there make sure that gsi points to the statement previous to
1233 cur_stmt while still making sure that all statements are
1234 considered in this basic block. */
1235 gsi_prev (&gsi);
1236
1237 if (!is_gimple_assign (cur_stmt))
1238 continue;
1239
1240 code = gimple_assign_rhs_code (cur_stmt);
1241 switch (code)
1242 {
1243 case LROTATE_EXPR:
1244 case RROTATE_EXPR:
1245 if (!tree_fits_uhwi_p (gimple_assign_rhs2 (cur_stmt))
1246 || tree_to_uhwi (gimple_assign_rhs2 (cur_stmt))
1247 % BITS_PER_UNIT)
1248 continue;
1249 /* Fall through. */
1250 case BIT_IOR_EXPR:
1251 break;
1252 default:
1253 continue;
1254 }
1255
1256 ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap);
1257
1258 if (!ins_stmt)
1259 continue;
1260
1261 switch (n.range)
1262 {
1263 case 16:
1264 /* Already in canonical form, nothing to do. */
1265 if (code == LROTATE_EXPR || code == RROTATE_EXPR)
1266 continue;
1267 load_type = bswap_type = uint16_type_node;
1268 break;
1269 case 32:
1270 load_type = uint32_type_node;
1271 if (bswap32_p)
1272 {
1273 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1274 bswap_type = bswap32_type;
1275 }
1276 break;
1277 case 64:
1278 load_type = uint64_type_node;
1279 if (bswap64_p)
1280 {
1281 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1282 bswap_type = bswap64_type;
1283 }
1284 break;
1285 default:
1286 continue;
1287 }
1288
1289 if (bswap && !fndecl && n.range != 16)
1290 continue;
1291
4b84d9b8
JJ
1292 if (bswap_replace (gsi_for_stmt (cur_stmt), ins_stmt, fndecl,
1293 bswap_type, load_type, &n, bswap))
dffec8eb
JJ
1294 changed = true;
1295 }
1296 }
1297
1298 statistics_counter_event (fun, "16-bit nop implementations found",
1299 nop_stats.found_16bit);
1300 statistics_counter_event (fun, "32-bit nop implementations found",
1301 nop_stats.found_32bit);
1302 statistics_counter_event (fun, "64-bit nop implementations found",
1303 nop_stats.found_64bit);
1304 statistics_counter_event (fun, "16-bit bswap implementations found",
1305 bswap_stats.found_16bit);
1306 statistics_counter_event (fun, "32-bit bswap implementations found",
1307 bswap_stats.found_32bit);
1308 statistics_counter_event (fun, "64-bit bswap implementations found",
1309 bswap_stats.found_64bit);
1310
1311 return (changed ? TODO_update_ssa : 0);
1312}
1313
1314} // anon namespace
1315
1316gimple_opt_pass *
1317make_pass_optimize_bswap (gcc::context *ctxt)
1318{
1319 return new pass_optimize_bswap (ctxt);
1320}
1321
1322namespace {
1323
245f6de1 1324/* Struct recording one operand for the store, which is either a constant,
c94c3532
EB
1325 then VAL represents the constant and all the other fields are zero, or
1326 a memory load, then VAL represents the reference, BASE_ADDR is non-NULL
1327 and the other fields also reflect the memory load, or an SSA name, then
1328 VAL represents the SSA name and all the other fields are zero, */
245f6de1 1329
6c1dae73 1330class store_operand_info
245f6de1 1331{
6c1dae73 1332public:
245f6de1
JJ
1333 tree val;
1334 tree base_addr;
8a91d545
RS
1335 poly_uint64 bitsize;
1336 poly_uint64 bitpos;
1337 poly_uint64 bitregion_start;
1338 poly_uint64 bitregion_end;
245f6de1 1339 gimple *stmt;
383ac8dc 1340 bool bit_not_p;
245f6de1
JJ
1341 store_operand_info ();
1342};
1343
1344store_operand_info::store_operand_info ()
1345 : val (NULL_TREE), base_addr (NULL_TREE), bitsize (0), bitpos (0),
383ac8dc 1346 bitregion_start (0), bitregion_end (0), stmt (NULL), bit_not_p (false)
245f6de1
JJ
1347{
1348}
1349
f663d9ad
KT
1350/* Struct recording the information about a single store of an immediate
1351 to memory. These are created in the first phase and coalesced into
1352 merged_store_group objects in the second phase. */
1353
6c1dae73 1354class store_immediate_info
f663d9ad 1355{
6c1dae73 1356public:
f663d9ad
KT
1357 unsigned HOST_WIDE_INT bitsize;
1358 unsigned HOST_WIDE_INT bitpos;
a62b3dc5
JJ
1359 unsigned HOST_WIDE_INT bitregion_start;
1360 /* This is one past the last bit of the bit region. */
1361 unsigned HOST_WIDE_INT bitregion_end;
f663d9ad
KT
1362 gimple *stmt;
1363 unsigned int order;
c94c3532
EB
1364 /* INTEGER_CST for constant stores, MEM_REF for memory copy,
1365 BIT_*_EXPR for logical bitwise operation, BIT_INSERT_EXPR
1366 for bit insertion.
4b84d9b8
JJ
1367 LROTATE_EXPR if it can be only bswap optimized and
1368 ops are not really meaningful.
1369 NOP_EXPR if bswap optimization detected identity, ops
1370 are not meaningful. */
245f6de1 1371 enum tree_code rhs_code;
4b84d9b8
JJ
1372 /* Two fields for bswap optimization purposes. */
1373 struct symbolic_number n;
1374 gimple *ins_stmt;
127ef369 1375 /* True if BIT_{AND,IOR,XOR}_EXPR result is inverted before storing. */
d60edaba 1376 bool bit_not_p;
127ef369
JJ
1377 /* True if ops have been swapped and thus ops[1] represents
1378 rhs1 of BIT_{AND,IOR,XOR}_EXPR and ops[0] represents rhs2. */
1379 bool ops_swapped_p;
629387a6
EB
1380 /* The index number of the landing pad, or 0 if there is none. */
1381 int lp_nr;
245f6de1
JJ
1382 /* Operands. For BIT_*_EXPR rhs_code both operands are used, otherwise
1383 just the first one. */
1384 store_operand_info ops[2];
b5926e23 1385 store_immediate_info (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
a62b3dc5 1386 unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
4b84d9b8 1387 gimple *, unsigned int, enum tree_code,
629387a6 1388 struct symbolic_number &, gimple *, bool, int,
245f6de1
JJ
1389 const store_operand_info &,
1390 const store_operand_info &);
f663d9ad
KT
1391};
1392
1393store_immediate_info::store_immediate_info (unsigned HOST_WIDE_INT bs,
b5926e23 1394 unsigned HOST_WIDE_INT bp,
a62b3dc5
JJ
1395 unsigned HOST_WIDE_INT brs,
1396 unsigned HOST_WIDE_INT bre,
b5926e23 1397 gimple *st,
245f6de1
JJ
1398 unsigned int ord,
1399 enum tree_code rhscode,
4b84d9b8
JJ
1400 struct symbolic_number &nr,
1401 gimple *ins_stmtp,
d60edaba 1402 bool bitnotp,
629387a6 1403 int nr2,
245f6de1
JJ
1404 const store_operand_info &op0r,
1405 const store_operand_info &op1r)
a62b3dc5 1406 : bitsize (bs), bitpos (bp), bitregion_start (brs), bitregion_end (bre),
4b84d9b8 1407 stmt (st), order (ord), rhs_code (rhscode), n (nr),
629387a6
EB
1408 ins_stmt (ins_stmtp), bit_not_p (bitnotp), ops_swapped_p (false),
1409 lp_nr (nr2)
245f6de1
JJ
1410#if __cplusplus >= 201103L
1411 , ops { op0r, op1r }
1412{
1413}
1414#else
f663d9ad 1415{
245f6de1
JJ
1416 ops[0] = op0r;
1417 ops[1] = op1r;
f663d9ad 1418}
245f6de1 1419#endif
f663d9ad
KT
1420
1421/* Struct representing a group of stores to contiguous memory locations.
1422 These are produced by the second phase (coalescing) and consumed in the
1423 third phase that outputs the widened stores. */
1424
6c1dae73 1425class merged_store_group
f663d9ad 1426{
6c1dae73 1427public:
f663d9ad
KT
1428 unsigned HOST_WIDE_INT start;
1429 unsigned HOST_WIDE_INT width;
a62b3dc5
JJ
1430 unsigned HOST_WIDE_INT bitregion_start;
1431 unsigned HOST_WIDE_INT bitregion_end;
1432 /* The size of the allocated memory for val and mask. */
f663d9ad 1433 unsigned HOST_WIDE_INT buf_size;
a62b3dc5 1434 unsigned HOST_WIDE_INT align_base;
8a91d545 1435 poly_uint64 load_align_base[2];
f663d9ad
KT
1436
1437 unsigned int align;
245f6de1 1438 unsigned int load_align[2];
f663d9ad
KT
1439 unsigned int first_order;
1440 unsigned int last_order;
7f5a3982 1441 bool bit_insertion;
18e0c3d1
JJ
1442 bool only_constants;
1443 unsigned int first_nonmergeable_order;
629387a6 1444 int lp_nr;
f663d9ad 1445
a62b3dc5 1446 auto_vec<store_immediate_info *> stores;
f663d9ad
KT
1447 /* We record the first and last original statements in the sequence because
1448 we'll need their vuse/vdef and replacement position. It's easier to keep
1449 track of them separately as 'stores' is reordered by apply_stores. */
1450 gimple *last_stmt;
1451 gimple *first_stmt;
1452 unsigned char *val;
a62b3dc5 1453 unsigned char *mask;
f663d9ad
KT
1454
1455 merged_store_group (store_immediate_info *);
1456 ~merged_store_group ();
7f5a3982 1457 bool can_be_merged_into (store_immediate_info *);
f663d9ad
KT
1458 void merge_into (store_immediate_info *);
1459 void merge_overlapping (store_immediate_info *);
1460 bool apply_stores ();
a62b3dc5
JJ
1461private:
1462 void do_merge (store_immediate_info *);
f663d9ad
KT
1463};
1464
1465/* Debug helper. Dump LEN elements of byte array PTR to FD in hex. */
1466
1467static void
1468dump_char_array (FILE *fd, unsigned char *ptr, unsigned int len)
1469{
1470 if (!fd)
1471 return;
1472
1473 for (unsigned int i = 0; i < len; i++)
c94c3532 1474 fprintf (fd, "%02x ", ptr[i]);
f663d9ad
KT
1475 fprintf (fd, "\n");
1476}
1477
f663d9ad
KT
1478/* Clear out LEN bits starting from bit START in the byte array
1479 PTR. This clears the bits to the *right* from START.
1480 START must be within [0, BITS_PER_UNIT) and counts starting from
1481 the least significant bit. */
1482
1483static void
1484clear_bit_region_be (unsigned char *ptr, unsigned int start,
1485 unsigned int len)
1486{
1487 if (len == 0)
1488 return;
1489 /* Clear len bits to the right of start. */
1490 else if (len <= start + 1)
1491 {
1492 unsigned char mask = (~(~0U << len));
1493 mask = mask << (start + 1U - len);
1494 ptr[0] &= ~mask;
1495 }
1496 else if (start != BITS_PER_UNIT - 1)
1497 {
1498 clear_bit_region_be (ptr, start, (start % BITS_PER_UNIT) + 1);
1499 clear_bit_region_be (ptr + 1, BITS_PER_UNIT - 1,
1500 len - (start % BITS_PER_UNIT) - 1);
1501 }
1502 else if (start == BITS_PER_UNIT - 1
1503 && len > BITS_PER_UNIT)
1504 {
1505 unsigned int nbytes = len / BITS_PER_UNIT;
a62b3dc5 1506 memset (ptr, 0, nbytes);
f663d9ad
KT
1507 if (len % BITS_PER_UNIT != 0)
1508 clear_bit_region_be (ptr + nbytes, BITS_PER_UNIT - 1,
1509 len % BITS_PER_UNIT);
1510 }
1511 else
1512 gcc_unreachable ();
1513}
1514
1515/* In the byte array PTR clear the bit region starting at bit
1516 START and is LEN bits wide.
1517 For regions spanning multiple bytes do this recursively until we reach
1518 zero LEN or a region contained within a single byte. */
1519
1520static void
1521clear_bit_region (unsigned char *ptr, unsigned int start,
1522 unsigned int len)
1523{
1524 /* Degenerate base case. */
1525 if (len == 0)
1526 return;
1527 else if (start >= BITS_PER_UNIT)
1528 clear_bit_region (ptr + 1, start - BITS_PER_UNIT, len);
1529 /* Second base case. */
1530 else if ((start + len) <= BITS_PER_UNIT)
1531 {
46a61395 1532 unsigned char mask = (~0U) << (unsigned char) (BITS_PER_UNIT - len);
f663d9ad
KT
1533 mask >>= BITS_PER_UNIT - (start + len);
1534
1535 ptr[0] &= ~mask;
1536
1537 return;
1538 }
1539 /* Clear most significant bits in a byte and proceed with the next byte. */
1540 else if (start != 0)
1541 {
1542 clear_bit_region (ptr, start, BITS_PER_UNIT - start);
1f069ef5 1543 clear_bit_region (ptr + 1, 0, len - (BITS_PER_UNIT - start));
f663d9ad
KT
1544 }
1545 /* Whole bytes need to be cleared. */
1546 else if (start == 0 && len > BITS_PER_UNIT)
1547 {
1548 unsigned int nbytes = len / BITS_PER_UNIT;
a848c710
KT
1549 /* We could recurse on each byte but we clear whole bytes, so a simple
1550 memset will do. */
46a61395 1551 memset (ptr, '\0', nbytes);
f663d9ad
KT
1552 /* Clear the remaining sub-byte region if there is one. */
1553 if (len % BITS_PER_UNIT != 0)
1554 clear_bit_region (ptr + nbytes, 0, len % BITS_PER_UNIT);
1555 }
1556 else
1557 gcc_unreachable ();
1558}
1559
1560/* Write BITLEN bits of EXPR to the byte array PTR at
1561 bit position BITPOS. PTR should contain TOTAL_BYTES elements.
1562 Return true if the operation succeeded. */
1563
1564static bool
1565encode_tree_to_bitpos (tree expr, unsigned char *ptr, int bitlen, int bitpos,
46a61395 1566 unsigned int total_bytes)
f663d9ad
KT
1567{
1568 unsigned int first_byte = bitpos / BITS_PER_UNIT;
ad1de652
JJ
1569 bool sub_byte_op_p = ((bitlen % BITS_PER_UNIT)
1570 || (bitpos % BITS_PER_UNIT)
f4b31647 1571 || !int_mode_for_size (bitlen, 0).exists ());
3afd514b
JJ
1572 bool empty_ctor_p
1573 = (TREE_CODE (expr) == CONSTRUCTOR
1574 && CONSTRUCTOR_NELTS (expr) == 0
1575 && TYPE_SIZE_UNIT (TREE_TYPE (expr))
1576 && tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (expr))));
f663d9ad
KT
1577
1578 if (!sub_byte_op_p)
3afd514b
JJ
1579 {
1580 if (first_byte >= total_bytes)
1581 return false;
1582 total_bytes -= first_byte;
1583 if (empty_ctor_p)
1584 {
1585 unsigned HOST_WIDE_INT rhs_bytes
1586 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)));
1587 if (rhs_bytes > total_bytes)
1588 return false;
1589 memset (ptr + first_byte, '\0', rhs_bytes);
1590 return true;
1591 }
1592 return native_encode_expr (expr, ptr + first_byte, total_bytes) != 0;
1593 }
f663d9ad
KT
1594
1595 /* LITTLE-ENDIAN
1596 We are writing a non byte-sized quantity or at a position that is not
1597 at a byte boundary.
1598 |--------|--------|--------| ptr + first_byte
1599 ^ ^
1600 xxx xxxxxxxx xxx< bp>
1601 |______EXPR____|
1602
46a61395 1603 First native_encode_expr EXPR into a temporary buffer and shift each
f663d9ad
KT
1604 byte in the buffer by 'bp' (carrying the bits over as necessary).
1605 |00000000|00xxxxxx|xxxxxxxx| << bp = |000xxxxx|xxxxxxxx|xxx00000|
1606 <------bitlen---->< bp>
1607 Then we clear the destination bits:
1608 |---00000|00000000|000-----| ptr + first_byte
1609 <-------bitlen--->< bp>
1610
1611 Finally we ORR the bytes of the shifted EXPR into the cleared region:
1612 |---xxxxx||xxxxxxxx||xxx-----| ptr + first_byte.
1613
1614 BIG-ENDIAN
1615 We are writing a non byte-sized quantity or at a position that is not
1616 at a byte boundary.
1617 ptr + first_byte |--------|--------|--------|
1618 ^ ^
1619 <bp >xxx xxxxxxxx xxx
1620 |_____EXPR_____|
1621
46a61395 1622 First native_encode_expr EXPR into a temporary buffer and shift each
f663d9ad
KT
1623 byte in the buffer to the right by (carrying the bits over as necessary).
1624 We shift by as much as needed to align the most significant bit of EXPR
1625 with bitpos:
1626 |00xxxxxx|xxxxxxxx| >> 3 = |00000xxx|xxxxxxxx|xxxxx000|
1627 <---bitlen----> <bp ><-----bitlen----->
1628 Then we clear the destination bits:
1629 ptr + first_byte |-----000||00000000||00000---|
1630 <bp ><-------bitlen----->
1631
1632 Finally we ORR the bytes of the shifted EXPR into the cleared region:
1633 ptr + first_byte |---xxxxx||xxxxxxxx||xxx-----|.
1634 The awkwardness comes from the fact that bitpos is counted from the
1635 most significant bit of a byte. */
1636
ef1d3b57
RS
1637 /* We must be dealing with fixed-size data at this point, since the
1638 total size is also fixed. */
3afd514b
JJ
1639 unsigned int byte_size;
1640 if (empty_ctor_p)
1641 {
1642 unsigned HOST_WIDE_INT rhs_bytes
1643 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)));
1644 if (rhs_bytes > total_bytes)
1645 return false;
1646 byte_size = rhs_bytes;
1647 }
1648 else
1649 {
1650 fixed_size_mode mode
1651 = as_a <fixed_size_mode> (TYPE_MODE (TREE_TYPE (expr)));
1652 byte_size = GET_MODE_SIZE (mode);
1653 }
f663d9ad 1654 /* Allocate an extra byte so that we have space to shift into. */
3afd514b 1655 byte_size++;
f663d9ad 1656 unsigned char *tmpbuf = XALLOCAVEC (unsigned char, byte_size);
46a61395 1657 memset (tmpbuf, '\0', byte_size);
f663d9ad 1658 /* The store detection code should only have allowed constants that are
3afd514b
JJ
1659 accepted by native_encode_expr or empty ctors. */
1660 if (!empty_ctor_p
1661 && native_encode_expr (expr, tmpbuf, byte_size - 1) == 0)
f663d9ad
KT
1662 gcc_unreachable ();
1663
1664 /* The native_encode_expr machinery uses TYPE_MODE to determine how many
1665 bytes to write. This means it can write more than
1666 ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT bytes (for example
1667 write 8 bytes for a bitlen of 40). Skip the bytes that are not within
1668 bitlen and zero out the bits that are not relevant as well (that may
1669 contain a sign bit due to sign-extension). */
1670 unsigned int padding
1671 = byte_size - ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT - 1;
ad1de652
JJ
1672 /* On big-endian the padding is at the 'front' so just skip the initial
1673 bytes. */
1674 if (BYTES_BIG_ENDIAN)
1675 tmpbuf += padding;
1676
1677 byte_size -= padding;
1678
1679 if (bitlen % BITS_PER_UNIT != 0)
f663d9ad 1680 {
4b2c06f4 1681 if (BYTES_BIG_ENDIAN)
ad1de652
JJ
1682 clear_bit_region_be (tmpbuf, BITS_PER_UNIT - 1,
1683 BITS_PER_UNIT - (bitlen % BITS_PER_UNIT));
1684 else
1685 clear_bit_region (tmpbuf, bitlen,
1686 byte_size * BITS_PER_UNIT - bitlen);
f663d9ad 1687 }
ad1de652
JJ
1688 /* Left shifting relies on the last byte being clear if bitlen is
1689 a multiple of BITS_PER_UNIT, which might not be clear if
1690 there are padding bytes. */
1691 else if (!BYTES_BIG_ENDIAN)
1692 tmpbuf[byte_size - 1] = '\0';
f663d9ad
KT
1693
1694 /* Clear the bit region in PTR where the bits from TMPBUF will be
46a61395 1695 inserted into. */
f663d9ad
KT
1696 if (BYTES_BIG_ENDIAN)
1697 clear_bit_region_be (ptr + first_byte,
1698 BITS_PER_UNIT - 1 - (bitpos % BITS_PER_UNIT), bitlen);
1699 else
1700 clear_bit_region (ptr + first_byte, bitpos % BITS_PER_UNIT, bitlen);
1701
1702 int shift_amnt;
1703 int bitlen_mod = bitlen % BITS_PER_UNIT;
1704 int bitpos_mod = bitpos % BITS_PER_UNIT;
1705
1706 bool skip_byte = false;
1707 if (BYTES_BIG_ENDIAN)
1708 {
1709 /* BITPOS and BITLEN are exactly aligned and no shifting
1710 is necessary. */
1711 if (bitpos_mod + bitlen_mod == BITS_PER_UNIT
1712 || (bitpos_mod == 0 && bitlen_mod == 0))
1713 shift_amnt = 0;
1714 /* |. . . . . . . .|
1715 <bp > <blen >.
1716 We always shift right for BYTES_BIG_ENDIAN so shift the beginning
1717 of the value until it aligns with 'bp' in the next byte over. */
1718 else if (bitpos_mod + bitlen_mod < BITS_PER_UNIT)
1719 {
1720 shift_amnt = bitlen_mod + bitpos_mod;
1721 skip_byte = bitlen_mod != 0;
1722 }
1723 /* |. . . . . . . .|
1724 <----bp--->
1725 <---blen---->.
1726 Shift the value right within the same byte so it aligns with 'bp'. */
1727 else
1728 shift_amnt = bitlen_mod + bitpos_mod - BITS_PER_UNIT;
1729 }
1730 else
1731 shift_amnt = bitpos % BITS_PER_UNIT;
1732
1733 /* Create the shifted version of EXPR. */
1734 if (!BYTES_BIG_ENDIAN)
46a61395 1735 {
8aba425f 1736 shift_bytes_in_array_left (tmpbuf, byte_size, shift_amnt);
46a61395
JJ
1737 if (shift_amnt == 0)
1738 byte_size--;
1739 }
f663d9ad
KT
1740 else
1741 {
1742 gcc_assert (BYTES_BIG_ENDIAN);
1743 shift_bytes_in_array_right (tmpbuf, byte_size, shift_amnt);
1744 /* If shifting right forced us to move into the next byte skip the now
1745 empty byte. */
1746 if (skip_byte)
1747 {
1748 tmpbuf++;
1749 byte_size--;
1750 }
1751 }
1752
1753 /* Insert the bits from TMPBUF. */
1754 for (unsigned int i = 0; i < byte_size; i++)
1755 ptr[first_byte + i] |= tmpbuf[i];
1756
1757 return true;
1758}
1759
1760/* Sorting function for store_immediate_info objects.
1761 Sorts them by bitposition. */
1762
1763static int
1764sort_by_bitpos (const void *x, const void *y)
1765{
1766 store_immediate_info *const *tmp = (store_immediate_info * const *) x;
1767 store_immediate_info *const *tmp2 = (store_immediate_info * const *) y;
1768
109cca3b 1769 if ((*tmp)->bitpos < (*tmp2)->bitpos)
f663d9ad
KT
1770 return -1;
1771 else if ((*tmp)->bitpos > (*tmp2)->bitpos)
1772 return 1;
109cca3b 1773 else
0f0027d1
KT
1774 /* If they are the same let's use the order which is guaranteed to
1775 be different. */
1776 return (*tmp)->order - (*tmp2)->order;
f663d9ad
KT
1777}
1778
1779/* Sorting function for store_immediate_info objects.
1780 Sorts them by the order field. */
1781
1782static int
1783sort_by_order (const void *x, const void *y)
1784{
1785 store_immediate_info *const *tmp = (store_immediate_info * const *) x;
1786 store_immediate_info *const *tmp2 = (store_immediate_info * const *) y;
1787
1788 if ((*tmp)->order < (*tmp2)->order)
1789 return -1;
1790 else if ((*tmp)->order > (*tmp2)->order)
1791 return 1;
1792
1793 gcc_unreachable ();
1794}
1795
1796/* Initialize a merged_store_group object from a store_immediate_info
1797 object. */
1798
1799merged_store_group::merged_store_group (store_immediate_info *info)
1800{
1801 start = info->bitpos;
1802 width = info->bitsize;
a62b3dc5
JJ
1803 bitregion_start = info->bitregion_start;
1804 bitregion_end = info->bitregion_end;
f663d9ad
KT
1805 /* VAL has memory allocated for it in apply_stores once the group
1806 width has been finalized. */
1807 val = NULL;
a62b3dc5 1808 mask = NULL;
c94c3532 1809 bit_insertion = false;
18e0c3d1
JJ
1810 only_constants = info->rhs_code == INTEGER_CST;
1811 first_nonmergeable_order = ~0U;
629387a6 1812 lp_nr = info->lp_nr;
a62b3dc5
JJ
1813 unsigned HOST_WIDE_INT align_bitpos = 0;
1814 get_object_alignment_1 (gimple_assign_lhs (info->stmt),
1815 &align, &align_bitpos);
1816 align_base = start - align_bitpos;
245f6de1
JJ
1817 for (int i = 0; i < 2; ++i)
1818 {
1819 store_operand_info &op = info->ops[i];
1820 if (op.base_addr == NULL_TREE)
1821 {
1822 load_align[i] = 0;
1823 load_align_base[i] = 0;
1824 }
1825 else
1826 {
1827 get_object_alignment_1 (op.val, &load_align[i], &align_bitpos);
1828 load_align_base[i] = op.bitpos - align_bitpos;
1829 }
1830 }
f663d9ad
KT
1831 stores.create (1);
1832 stores.safe_push (info);
1833 last_stmt = info->stmt;
1834 last_order = info->order;
1835 first_stmt = last_stmt;
1836 first_order = last_order;
1837 buf_size = 0;
1838}
1839
1840merged_store_group::~merged_store_group ()
1841{
1842 if (val)
1843 XDELETEVEC (val);
1844}
1845
7f5a3982
EB
1846/* Return true if the store described by INFO can be merged into the group. */
1847
1848bool
1849merged_store_group::can_be_merged_into (store_immediate_info *info)
1850{
1851 /* Do not merge bswap patterns. */
1852 if (info->rhs_code == LROTATE_EXPR)
1853 return false;
1854
629387a6
EB
1855 if (info->lp_nr != lp_nr)
1856 return false;
1857
7f5a3982
EB
1858 /* The canonical case. */
1859 if (info->rhs_code == stores[0]->rhs_code)
1860 return true;
1861
1862 /* BIT_INSERT_EXPR is compatible with INTEGER_CST. */
1863 if (info->rhs_code == BIT_INSERT_EXPR && stores[0]->rhs_code == INTEGER_CST)
1864 return true;
1865
1866 if (stores[0]->rhs_code == BIT_INSERT_EXPR && info->rhs_code == INTEGER_CST)
1867 return true;
1868
1869 /* We can turn MEM_REF into BIT_INSERT_EXPR for bit-field stores. */
1870 if (info->rhs_code == MEM_REF
1871 && (stores[0]->rhs_code == INTEGER_CST
1872 || stores[0]->rhs_code == BIT_INSERT_EXPR)
1873 && info->bitregion_start == stores[0]->bitregion_start
1874 && info->bitregion_end == stores[0]->bitregion_end)
1875 return true;
1876
1877 if (stores[0]->rhs_code == MEM_REF
1878 && (info->rhs_code == INTEGER_CST
1879 || info->rhs_code == BIT_INSERT_EXPR)
1880 && info->bitregion_start == stores[0]->bitregion_start
1881 && info->bitregion_end == stores[0]->bitregion_end)
1882 return true;
1883
1884 return false;
1885}
1886
a62b3dc5
JJ
1887/* Helper method for merge_into and merge_overlapping to do
1888 the common part. */
7f5a3982 1889
f663d9ad 1890void
a62b3dc5 1891merged_store_group::do_merge (store_immediate_info *info)
f663d9ad 1892{
a62b3dc5
JJ
1893 bitregion_start = MIN (bitregion_start, info->bitregion_start);
1894 bitregion_end = MAX (bitregion_end, info->bitregion_end);
1895
1896 unsigned int this_align;
1897 unsigned HOST_WIDE_INT align_bitpos = 0;
1898 get_object_alignment_1 (gimple_assign_lhs (info->stmt),
1899 &this_align, &align_bitpos);
1900 if (this_align > align)
1901 {
1902 align = this_align;
1903 align_base = info->bitpos - align_bitpos;
1904 }
245f6de1
JJ
1905 for (int i = 0; i < 2; ++i)
1906 {
1907 store_operand_info &op = info->ops[i];
1908 if (!op.base_addr)
1909 continue;
1910
1911 get_object_alignment_1 (op.val, &this_align, &align_bitpos);
1912 if (this_align > load_align[i])
1913 {
1914 load_align[i] = this_align;
1915 load_align_base[i] = op.bitpos - align_bitpos;
1916 }
1917 }
f663d9ad 1918
f663d9ad
KT
1919 gimple *stmt = info->stmt;
1920 stores.safe_push (info);
1921 if (info->order > last_order)
1922 {
1923 last_order = info->order;
1924 last_stmt = stmt;
1925 }
1926 else if (info->order < first_order)
1927 {
1928 first_order = info->order;
1929 first_stmt = stmt;
1930 }
18e0c3d1
JJ
1931 if (info->rhs_code != INTEGER_CST)
1932 only_constants = false;
f663d9ad
KT
1933}
1934
a62b3dc5
JJ
1935/* Merge a store recorded by INFO into this merged store.
1936 The store is not overlapping with the existing recorded
1937 stores. */
1938
1939void
1940merged_store_group::merge_into (store_immediate_info *info)
1941{
a62b3dc5
JJ
1942 /* Make sure we're inserting in the position we think we're inserting. */
1943 gcc_assert (info->bitpos >= start + width
1944 && info->bitregion_start <= bitregion_end);
1945
c5679c37 1946 width = info->bitpos + info->bitsize - start;
a62b3dc5
JJ
1947 do_merge (info);
1948}
1949
f663d9ad
KT
1950/* Merge a store described by INFO into this merged store.
1951 INFO overlaps in some way with the current store (i.e. it's not contiguous
1952 which is handled by merged_store_group::merge_into). */
1953
1954void
1955merged_store_group::merge_overlapping (store_immediate_info *info)
1956{
f663d9ad 1957 /* If the store extends the size of the group, extend the width. */
a62b3dc5 1958 if (info->bitpos + info->bitsize > start + width)
c5679c37 1959 width = info->bitpos + info->bitsize - start;
f663d9ad 1960
a62b3dc5 1961 do_merge (info);
f663d9ad
KT
1962}
1963
1964/* Go through all the recorded stores in this group in program order and
1965 apply their values to the VAL byte array to create the final merged
1966 value. Return true if the operation succeeded. */
1967
1968bool
1969merged_store_group::apply_stores ()
1970{
a62b3dc5
JJ
1971 /* Make sure we have more than one store in the group, otherwise we cannot
1972 merge anything. */
1973 if (bitregion_start % BITS_PER_UNIT != 0
1974 || bitregion_end % BITS_PER_UNIT != 0
f663d9ad
KT
1975 || stores.length () == 1)
1976 return false;
1977
1978 stores.qsort (sort_by_order);
a62b3dc5 1979 store_immediate_info *info;
f663d9ad 1980 unsigned int i;
c94c3532
EB
1981 /* Create a power-of-2-sized buffer for native_encode_expr. */
1982 buf_size = 1 << ceil_log2 ((bitregion_end - bitregion_start) / BITS_PER_UNIT);
a62b3dc5
JJ
1983 val = XNEWVEC (unsigned char, 2 * buf_size);
1984 mask = val + buf_size;
1985 memset (val, 0, buf_size);
1986 memset (mask, ~0U, buf_size);
f663d9ad
KT
1987
1988 FOR_EACH_VEC_ELT (stores, i, info)
1989 {
a62b3dc5 1990 unsigned int pos_in_buffer = info->bitpos - bitregion_start;
c94c3532 1991 tree cst;
245f6de1
JJ
1992 if (info->ops[0].val && info->ops[0].base_addr == NULL_TREE)
1993 cst = info->ops[0].val;
1994 else if (info->ops[1].val && info->ops[1].base_addr == NULL_TREE)
1995 cst = info->ops[1].val;
c94c3532
EB
1996 else
1997 cst = NULL_TREE;
245f6de1
JJ
1998 bool ret = true;
1999 if (cst)
c94c3532
EB
2000 {
2001 if (info->rhs_code == BIT_INSERT_EXPR)
2002 bit_insertion = true;
2003 else
2004 ret = encode_tree_to_bitpos (cst, val, info->bitsize,
2005 pos_in_buffer, buf_size);
2006 }
2007 unsigned char *m = mask + (pos_in_buffer / BITS_PER_UNIT);
2008 if (BYTES_BIG_ENDIAN)
2009 clear_bit_region_be (m, (BITS_PER_UNIT - 1
2010 - (pos_in_buffer % BITS_PER_UNIT)),
2011 info->bitsize);
2012 else
2013 clear_bit_region (m, pos_in_buffer % BITS_PER_UNIT, info->bitsize);
245f6de1 2014 if (cst && dump_file && (dump_flags & TDF_DETAILS))
f663d9ad
KT
2015 {
2016 if (ret)
2017 {
c94c3532 2018 fputs ("After writing ", dump_file);
4af78ef8 2019 print_generic_expr (dump_file, cst, TDF_NONE);
f663d9ad 2020 fprintf (dump_file, " of size " HOST_WIDE_INT_PRINT_DEC
c94c3532
EB
2021 " at position %d\n", info->bitsize, pos_in_buffer);
2022 fputs (" the merged value contains ", dump_file);
f663d9ad 2023 dump_char_array (dump_file, val, buf_size);
c94c3532
EB
2024 fputs (" the merged mask contains ", dump_file);
2025 dump_char_array (dump_file, mask, buf_size);
2026 if (bit_insertion)
2027 fputs (" bit insertion is required\n", dump_file);
f663d9ad
KT
2028 }
2029 else
2030 fprintf (dump_file, "Failed to merge stores\n");
4b84d9b8 2031 }
f663d9ad
KT
2032 if (!ret)
2033 return false;
2034 }
4b84d9b8 2035 stores.qsort (sort_by_bitpos);
f663d9ad
KT
2036 return true;
2037}
2038
2039/* Structure describing the store chain. */
2040
6c1dae73 2041class imm_store_chain_info
f663d9ad 2042{
6c1dae73 2043public:
50b6d676
AO
2044 /* Doubly-linked list that imposes an order on chain processing.
2045 PNXP (prev's next pointer) points to the head of a list, or to
2046 the next field in the previous chain in the list.
2047 See pass_store_merging::m_stores_head for more rationale. */
2048 imm_store_chain_info *next, **pnxp;
b5926e23 2049 tree base_addr;
a62b3dc5 2050 auto_vec<store_immediate_info *> m_store_info;
f663d9ad
KT
2051 auto_vec<merged_store_group *> m_merged_store_groups;
2052
50b6d676
AO
2053 imm_store_chain_info (imm_store_chain_info *&inspt, tree b_a)
2054 : next (inspt), pnxp (&inspt), base_addr (b_a)
2055 {
2056 inspt = this;
2057 if (next)
2058 {
2059 gcc_checking_assert (pnxp == next->pnxp);
2060 next->pnxp = &next;
2061 }
2062 }
2063 ~imm_store_chain_info ()
2064 {
2065 *pnxp = next;
2066 if (next)
2067 {
2068 gcc_checking_assert (&next == next->pnxp);
2069 next->pnxp = pnxp;
2070 }
2071 }
b5926e23 2072 bool terminate_and_process_chain ();
4b84d9b8 2073 bool try_coalesce_bswap (merged_store_group *, unsigned int, unsigned int);
f663d9ad 2074 bool coalesce_immediate_stores ();
b5926e23
RB
2075 bool output_merged_store (merged_store_group *);
2076 bool output_merged_stores ();
f663d9ad
KT
2077};
2078
2079const pass_data pass_data_tree_store_merging = {
2080 GIMPLE_PASS, /* type */
2081 "store-merging", /* name */
2082 OPTGROUP_NONE, /* optinfo_flags */
2083 TV_GIMPLE_STORE_MERGING, /* tv_id */
2084 PROP_ssa, /* properties_required */
2085 0, /* properties_provided */
2086 0, /* properties_destroyed */
2087 0, /* todo_flags_start */
2088 TODO_update_ssa, /* todo_flags_finish */
2089};
2090
2091class pass_store_merging : public gimple_opt_pass
2092{
2093public:
2094 pass_store_merging (gcc::context *ctxt)
faec5f24 2095 : gimple_opt_pass (pass_data_tree_store_merging, ctxt), m_stores_head ()
f663d9ad
KT
2096 {
2097 }
2098
c94c3532
EB
2099 /* Pass not supported for PDP-endian, nor for insane hosts or
2100 target character sizes where native_{encode,interpret}_expr
a62b3dc5 2101 doesn't work properly. */
f663d9ad
KT
2102 virtual bool
2103 gate (function *)
2104 {
a62b3dc5 2105 return flag_store_merging
c94c3532 2106 && BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN
a62b3dc5
JJ
2107 && CHAR_BIT == 8
2108 && BITS_PER_UNIT == 8;
f663d9ad
KT
2109 }
2110
2111 virtual unsigned int execute (function *);
2112
2113private:
99b1c316 2114 hash_map<tree_operand_hash, class imm_store_chain_info *> m_stores;
f663d9ad 2115
50b6d676
AO
2116 /* Form a doubly-linked stack of the elements of m_stores, so that
2117 we can iterate over them in a predictable way. Using this order
2118 avoids extraneous differences in the compiler output just because
2119 of tree pointer variations (e.g. different chains end up in
2120 different positions of m_stores, so they are handled in different
2121 orders, so they allocate or release SSA names in different
2122 orders, and when they get reused, subsequent passes end up
2123 getting different SSA names, which may ultimately change
2124 decisions when going out of SSA). */
2125 imm_store_chain_info *m_stores_head;
2126
629387a6
EB
2127 bool process_store (gimple *);
2128 bool terminate_and_process_chain (imm_store_chain_info *);
383ac8dc 2129 bool terminate_all_aliasing_chains (imm_store_chain_info **, gimple *);
629387a6 2130 bool terminate_and_process_all_chains ();
f663d9ad
KT
2131}; // class pass_store_merging
2132
2133/* Terminate and process all recorded chains. Return true if any changes
2134 were made. */
2135
2136bool
2137pass_store_merging::terminate_and_process_all_chains ()
2138{
f663d9ad 2139 bool ret = false;
50b6d676 2140 while (m_stores_head)
629387a6 2141 ret |= terminate_and_process_chain (m_stores_head);
b119c055 2142 gcc_assert (m_stores.is_empty ());
f663d9ad
KT
2143 return ret;
2144}
2145
383ac8dc
JJ
2146/* Terminate all chains that are affected by the statement STMT.
2147 CHAIN_INFO is the chain we should ignore from the checks if
629387a6 2148 non-NULL. Return true if any changes were made. */
f663d9ad
KT
2149
2150bool
20770eb8 2151pass_store_merging::terminate_all_aliasing_chains (imm_store_chain_info
b5926e23 2152 **chain_info,
f663d9ad
KT
2153 gimple *stmt)
2154{
2155 bool ret = false;
2156
2157 /* If the statement doesn't touch memory it can't alias. */
2158 if (!gimple_vuse (stmt))
2159 return false;
2160
9e875fd8 2161 tree store_lhs = gimple_store_p (stmt) ? gimple_get_lhs (stmt) : NULL_TREE;
6b412bf6
RB
2162 ao_ref store_lhs_ref;
2163 ao_ref_init (&store_lhs_ref, store_lhs);
383ac8dc 2164 for (imm_store_chain_info *next = m_stores_head, *cur = next; cur; cur = next)
f663d9ad 2165 {
383ac8dc
JJ
2166 next = cur->next;
2167
2168 /* We already checked all the stores in chain_info and terminated the
2169 chain if necessary. Skip it here. */
2170 if (chain_info && *chain_info == cur)
2171 continue;
2172
245f6de1
JJ
2173 store_immediate_info *info;
2174 unsigned int i;
383ac8dc 2175 FOR_EACH_VEC_ELT (cur->m_store_info, i, info)
f663d9ad 2176 {
9e875fd8 2177 tree lhs = gimple_assign_lhs (info->stmt);
6b412bf6
RB
2178 ao_ref lhs_ref;
2179 ao_ref_init (&lhs_ref, lhs);
2180 if (ref_maybe_used_by_stmt_p (stmt, &lhs_ref)
2181 || stmt_may_clobber_ref_p_1 (stmt, &lhs_ref)
2182 || (store_lhs && refs_may_alias_p_1 (&store_lhs_ref,
2183 &lhs_ref, false)))
f663d9ad 2184 {
245f6de1 2185 if (dump_file && (dump_flags & TDF_DETAILS))
f663d9ad 2186 {
245f6de1
JJ
2187 fprintf (dump_file, "stmt causes chain termination:\n");
2188 print_gimple_stmt (dump_file, stmt, 0);
f663d9ad 2189 }
629387a6 2190 ret |= terminate_and_process_chain (cur);
245f6de1 2191 break;
f663d9ad
KT
2192 }
2193 }
2194 }
2195
f663d9ad
KT
2196 return ret;
2197}
2198
2199/* Helper function. Terminate the recorded chain storing to base object
2200 BASE. Return true if the merging and output was successful. The m_stores
2201 entry is removed after the processing in any case. */
2202
2203bool
629387a6 2204pass_store_merging::terminate_and_process_chain (imm_store_chain_info *chain_info)
f663d9ad 2205{
b5926e23
RB
2206 bool ret = chain_info->terminate_and_process_chain ();
2207 m_stores.remove (chain_info->base_addr);
2208 delete chain_info;
f663d9ad
KT
2209 return ret;
2210}
2211
245f6de1 2212/* Return true if stmts in between FIRST (inclusive) and LAST (exclusive)
629387a6
EB
2213 may clobber REF. FIRST and LAST must have non-NULL vdef. We want to
2214 be able to sink load of REF across stores between FIRST and LAST, up
2215 to right before LAST. */
245f6de1
JJ
2216
2217bool
2218stmts_may_clobber_ref_p (gimple *first, gimple *last, tree ref)
2219{
2220 ao_ref r;
2221 ao_ref_init (&r, ref);
2222 unsigned int count = 0;
2223 tree vop = gimple_vdef (last);
2224 gimple *stmt;
2225
629387a6
EB
2226 /* Return true conservatively if the basic blocks are different. */
2227 if (gimple_bb (first) != gimple_bb (last))
2228 return true;
2229
245f6de1
JJ
2230 do
2231 {
2232 stmt = SSA_NAME_DEF_STMT (vop);
2233 if (stmt_may_clobber_ref_p_1 (stmt, &r))
2234 return true;
4b84d9b8
JJ
2235 if (gimple_store_p (stmt)
2236 && refs_anti_dependent_p (ref, gimple_get_lhs (stmt)))
2237 return true;
245f6de1
JJ
2238 /* Avoid quadratic compile time by bounding the number of checks
2239 we perform. */
2240 if (++count > MAX_STORE_ALIAS_CHECKS)
2241 return true;
2242 vop = gimple_vuse (stmt);
2243 }
2244 while (stmt != first);
629387a6 2245
245f6de1
JJ
2246 return false;
2247}
2248
2249/* Return true if INFO->ops[IDX] is mergeable with the
2250 corresponding loads already in MERGED_STORE group.
2251 BASE_ADDR is the base address of the whole store group. */
2252
2253bool
2254compatible_load_p (merged_store_group *merged_store,
2255 store_immediate_info *info,
2256 tree base_addr, int idx)
2257{
2258 store_immediate_info *infof = merged_store->stores[0];
2259 if (!info->ops[idx].base_addr
8a91d545
RS
2260 || maybe_ne (info->ops[idx].bitpos - infof->ops[idx].bitpos,
2261 info->bitpos - infof->bitpos)
245f6de1
JJ
2262 || !operand_equal_p (info->ops[idx].base_addr,
2263 infof->ops[idx].base_addr, 0))
2264 return false;
2265
2266 store_immediate_info *infol = merged_store->stores.last ();
2267 tree load_vuse = gimple_vuse (info->ops[idx].stmt);
2268 /* In this case all vuses should be the same, e.g.
2269 _1 = s.a; _2 = s.b; _3 = _1 | 1; t.a = _3; _4 = _2 | 2; t.b = _4;
2270 or
2271 _1 = s.a; _2 = s.b; t.a = _1; t.b = _2;
2272 and we can emit the coalesced load next to any of those loads. */
2273 if (gimple_vuse (infof->ops[idx].stmt) == load_vuse
2274 && gimple_vuse (infol->ops[idx].stmt) == load_vuse)
2275 return true;
2276
2277 /* Otherwise, at least for now require that the load has the same
2278 vuse as the store. See following examples. */
2279 if (gimple_vuse (info->stmt) != load_vuse)
2280 return false;
2281
2282 if (gimple_vuse (infof->stmt) != gimple_vuse (infof->ops[idx].stmt)
2283 || (infof != infol
2284 && gimple_vuse (infol->stmt) != gimple_vuse (infol->ops[idx].stmt)))
2285 return false;
2286
2287 /* If the load is from the same location as the store, already
2288 the construction of the immediate chain info guarantees no intervening
2289 stores, so no further checks are needed. Example:
2290 _1 = s.a; _2 = _1 & -7; s.a = _2; _3 = s.b; _4 = _3 & -7; s.b = _4; */
8a91d545 2291 if (known_eq (info->ops[idx].bitpos, info->bitpos)
245f6de1
JJ
2292 && operand_equal_p (info->ops[idx].base_addr, base_addr, 0))
2293 return true;
2294
2295 /* Otherwise, we need to punt if any of the loads can be clobbered by any
2296 of the stores in the group, or any other stores in between those.
2297 Previous calls to compatible_load_p ensured that for all the
2298 merged_store->stores IDX loads, no stmts starting with
2299 merged_store->first_stmt and ending right before merged_store->last_stmt
2300 clobbers those loads. */
2301 gimple *first = merged_store->first_stmt;
2302 gimple *last = merged_store->last_stmt;
2303 unsigned int i;
2304 store_immediate_info *infoc;
2305 /* The stores are sorted by increasing store bitpos, so if info->stmt store
2306 comes before the so far first load, we'll be changing
2307 merged_store->first_stmt. In that case we need to give up if
2308 any of the earlier processed loads clobber with the stmts in the new
2309 range. */
2310 if (info->order < merged_store->first_order)
2311 {
2312 FOR_EACH_VEC_ELT (merged_store->stores, i, infoc)
2313 if (stmts_may_clobber_ref_p (info->stmt, first, infoc->ops[idx].val))
2314 return false;
2315 first = info->stmt;
2316 }
2317 /* Similarly, we could change merged_store->last_stmt, so ensure
2318 in that case no stmts in the new range clobber any of the earlier
2319 processed loads. */
2320 else if (info->order > merged_store->last_order)
2321 {
2322 FOR_EACH_VEC_ELT (merged_store->stores, i, infoc)
2323 if (stmts_may_clobber_ref_p (last, info->stmt, infoc->ops[idx].val))
2324 return false;
2325 last = info->stmt;
2326 }
2327 /* And finally, we'd be adding a new load to the set, ensure it isn't
2328 clobbered in the new range. */
2329 if (stmts_may_clobber_ref_p (first, last, info->ops[idx].val))
2330 return false;
2331
2332 /* Otherwise, we are looking for:
2333 _1 = s.a; _2 = _1 ^ 15; t.a = _2; _3 = s.b; _4 = _3 ^ 15; t.b = _4;
2334 or
2335 _1 = s.a; t.a = _1; _2 = s.b; t.b = _2; */
2336 return true;
2337}
2338
4b84d9b8
JJ
2339/* Add all refs loaded to compute VAL to REFS vector. */
2340
2341void
2342gather_bswap_load_refs (vec<tree> *refs, tree val)
2343{
2344 if (TREE_CODE (val) != SSA_NAME)
2345 return;
2346
2347 gimple *stmt = SSA_NAME_DEF_STMT (val);
2348 if (!is_gimple_assign (stmt))
2349 return;
2350
2351 if (gimple_assign_load_p (stmt))
2352 {
2353 refs->safe_push (gimple_assign_rhs1 (stmt));
2354 return;
2355 }
2356
2357 switch (gimple_assign_rhs_class (stmt))
2358 {
2359 case GIMPLE_BINARY_RHS:
2360 gather_bswap_load_refs (refs, gimple_assign_rhs2 (stmt));
2361 /* FALLTHRU */
2362 case GIMPLE_UNARY_RHS:
2363 gather_bswap_load_refs (refs, gimple_assign_rhs1 (stmt));
2364 break;
2365 default:
2366 gcc_unreachable ();
2367 }
2368}
2369
c5679c37
JJ
2370/* Check if there are any stores in M_STORE_INFO after index I
2371 (where M_STORE_INFO must be sorted by sort_by_bitpos) that overlap
2372 a potential group ending with END that have their order
4d213bf6
JJ
2373 smaller than LAST_ORDER. ALL_INTEGER_CST_P is true if
2374 all the stores already merged and the one under consideration
2375 have rhs_code of INTEGER_CST. Return true if there are no such stores.
c5679c37
JJ
2376 Consider:
2377 MEM[(long long int *)p_28] = 0;
2378 MEM[(long long int *)p_28 + 8B] = 0;
2379 MEM[(long long int *)p_28 + 16B] = 0;
2380 MEM[(long long int *)p_28 + 24B] = 0;
2381 _129 = (int) _130;
2382 MEM[(int *)p_28 + 8B] = _129;
2383 MEM[(int *)p_28].a = -1;
2384 We already have
2385 MEM[(long long int *)p_28] = 0;
2386 MEM[(int *)p_28].a = -1;
2387 stmts in the current group and need to consider if it is safe to
2388 add MEM[(long long int *)p_28 + 8B] = 0; store into the same group.
2389 There is an overlap between that store and the MEM[(int *)p_28 + 8B] = _129;
2390 store though, so if we add the MEM[(long long int *)p_28 + 8B] = 0;
2391 into the group and merging of those 3 stores is successful, merged
2392 stmts will be emitted at the latest store from that group, i.e.
2393 LAST_ORDER, which is the MEM[(int *)p_28].a = -1; store.
2394 The MEM[(int *)p_28 + 8B] = _129; store that originally follows
2395 the MEM[(long long int *)p_28 + 8B] = 0; would now be before it,
2396 so we need to refuse merging MEM[(long long int *)p_28 + 8B] = 0;
2397 into the group. That way it will be its own store group and will
4d213bf6 2398 not be touched. If ALL_INTEGER_CST_P and there are overlapping
c5679c37
JJ
2399 INTEGER_CST stores, those are mergeable using merge_overlapping,
2400 so don't return false for those. */
2401
2402static bool
2403check_no_overlap (vec<store_immediate_info *> m_store_info, unsigned int i,
4d213bf6 2404 bool all_integer_cst_p, unsigned int last_order,
c5679c37
JJ
2405 unsigned HOST_WIDE_INT end)
2406{
2407 unsigned int len = m_store_info.length ();
2408 for (++i; i < len; ++i)
2409 {
2410 store_immediate_info *info = m_store_info[i];
2411 if (info->bitpos >= end)
2412 break;
2413 if (info->order < last_order
4d213bf6 2414 && (!all_integer_cst_p || info->rhs_code != INTEGER_CST))
c5679c37
JJ
2415 return false;
2416 }
2417 return true;
2418}
2419
4b84d9b8
JJ
2420/* Return true if m_store_info[first] and at least one following store
2421 form a group which store try_size bitsize value which is byte swapped
2422 from a memory load or some value, or identity from some value.
2423 This uses the bswap pass APIs. */
2424
2425bool
2426imm_store_chain_info::try_coalesce_bswap (merged_store_group *merged_store,
2427 unsigned int first,
2428 unsigned int try_size)
2429{
2430 unsigned int len = m_store_info.length (), last = first;
2431 unsigned HOST_WIDE_INT width = m_store_info[first]->bitsize;
2432 if (width >= try_size)
2433 return false;
2434 for (unsigned int i = first + 1; i < len; ++i)
2435 {
2436 if (m_store_info[i]->bitpos != m_store_info[first]->bitpos + width
2437 || m_store_info[i]->ins_stmt == NULL)
2438 return false;
2439 width += m_store_info[i]->bitsize;
2440 if (width >= try_size)
2441 {
2442 last = i;
2443 break;
2444 }
2445 }
2446 if (width != try_size)
2447 return false;
2448
2449 bool allow_unaligned
028d4092 2450 = !STRICT_ALIGNMENT && param_store_merging_allow_unaligned;
4b84d9b8
JJ
2451 /* Punt if the combined store would not be aligned and we need alignment. */
2452 if (!allow_unaligned)
2453 {
2454 unsigned int align = merged_store->align;
2455 unsigned HOST_WIDE_INT align_base = merged_store->align_base;
2456 for (unsigned int i = first + 1; i <= last; ++i)
2457 {
2458 unsigned int this_align;
2459 unsigned HOST_WIDE_INT align_bitpos = 0;
2460 get_object_alignment_1 (gimple_assign_lhs (m_store_info[i]->stmt),
2461 &this_align, &align_bitpos);
2462 if (this_align > align)
2463 {
2464 align = this_align;
2465 align_base = m_store_info[i]->bitpos - align_bitpos;
2466 }
2467 }
2468 unsigned HOST_WIDE_INT align_bitpos
2469 = (m_store_info[first]->bitpos - align_base) & (align - 1);
2470 if (align_bitpos)
2471 align = least_bit_hwi (align_bitpos);
2472 if (align < try_size)
2473 return false;
2474 }
2475
2476 tree type;
2477 switch (try_size)
2478 {
2479 case 16: type = uint16_type_node; break;
2480 case 32: type = uint32_type_node; break;
2481 case 64: type = uint64_type_node; break;
2482 default: gcc_unreachable ();
2483 }
2484 struct symbolic_number n;
2485 gimple *ins_stmt = NULL;
2486 int vuse_store = -1;
2487 unsigned int first_order = merged_store->first_order;
2488 unsigned int last_order = merged_store->last_order;
2489 gimple *first_stmt = merged_store->first_stmt;
2490 gimple *last_stmt = merged_store->last_stmt;
c5679c37 2491 unsigned HOST_WIDE_INT end = merged_store->start + merged_store->width;
4b84d9b8
JJ
2492 store_immediate_info *infof = m_store_info[first];
2493
2494 for (unsigned int i = first; i <= last; ++i)
2495 {
2496 store_immediate_info *info = m_store_info[i];
2497 struct symbolic_number this_n = info->n;
2498 this_n.type = type;
2499 if (!this_n.base_addr)
2500 this_n.range = try_size / BITS_PER_UNIT;
30fa8e9c
JJ
2501 else
2502 /* Update vuse in case it has changed by output_merged_stores. */
2503 this_n.vuse = gimple_vuse (info->ins_stmt);
4b84d9b8
JJ
2504 unsigned int bitpos = info->bitpos - infof->bitpos;
2505 if (!do_shift_rotate (LSHIFT_EXPR, &this_n,
2506 BYTES_BIG_ENDIAN
2507 ? try_size - info->bitsize - bitpos
2508 : bitpos))
2509 return false;
aa11164a 2510 if (this_n.base_addr && vuse_store)
4b84d9b8
JJ
2511 {
2512 unsigned int j;
2513 for (j = first; j <= last; ++j)
2514 if (this_n.vuse == gimple_vuse (m_store_info[j]->stmt))
2515 break;
2516 if (j > last)
2517 {
2518 if (vuse_store == 1)
2519 return false;
2520 vuse_store = 0;
2521 }
2522 }
2523 if (i == first)
2524 {
2525 n = this_n;
2526 ins_stmt = info->ins_stmt;
2527 }
2528 else
2529 {
c5679c37 2530 if (n.base_addr && n.vuse != this_n.vuse)
4b84d9b8 2531 {
c5679c37
JJ
2532 if (vuse_store == 0)
2533 return false;
2534 vuse_store = 1;
4b84d9b8 2535 }
c5679c37
JJ
2536 if (info->order > last_order)
2537 {
2538 last_order = info->order;
2539 last_stmt = info->stmt;
2540 }
2541 else if (info->order < first_order)
2542 {
2543 first_order = info->order;
2544 first_stmt = info->stmt;
2545 }
2546 end = MAX (end, info->bitpos + info->bitsize);
4b84d9b8
JJ
2547
2548 ins_stmt = perform_symbolic_merge (ins_stmt, &n, info->ins_stmt,
2549 &this_n, &n);
2550 if (ins_stmt == NULL)
2551 return false;
2552 }
2553 }
2554
2555 uint64_t cmpxchg, cmpnop;
2556 find_bswap_or_nop_finalize (&n, &cmpxchg, &cmpnop);
2557
2558 /* A complete byte swap should make the symbolic number to start with
2559 the largest digit in the highest order byte. Unchanged symbolic
2560 number indicates a read with same endianness as target architecture. */
2561 if (n.n != cmpnop && n.n != cmpxchg)
2562 return false;
2563
2564 if (n.base_addr == NULL_TREE && !is_gimple_val (n.src))
2565 return false;
2566
4d213bf6 2567 if (!check_no_overlap (m_store_info, last, false, last_order, end))
c5679c37
JJ
2568 return false;
2569
4b84d9b8
JJ
2570 /* Don't handle memory copy this way if normal non-bswap processing
2571 would handle it too. */
2572 if (n.n == cmpnop && (unsigned) n.n_ops == last - first + 1)
2573 {
2574 unsigned int i;
2575 for (i = first; i <= last; ++i)
2576 if (m_store_info[i]->rhs_code != MEM_REF)
2577 break;
2578 if (i == last + 1)
2579 return false;
2580 }
2581
2582 if (n.n == cmpxchg)
2583 switch (try_size)
2584 {
2585 case 16:
2586 /* Will emit LROTATE_EXPR. */
2587 break;
2588 case 32:
2589 if (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
2590 && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing)
2591 break;
2592 return false;
2593 case 64:
2594 if (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
2595 && optab_handler (bswap_optab, DImode) != CODE_FOR_nothing)
2596 break;
2597 return false;
2598 default:
2599 gcc_unreachable ();
2600 }
2601
2602 if (!allow_unaligned && n.base_addr)
2603 {
2604 unsigned int align = get_object_alignment (n.src);
2605 if (align < try_size)
2606 return false;
2607 }
2608
2609 /* If each load has vuse of the corresponding store, need to verify
2610 the loads can be sunk right before the last store. */
2611 if (vuse_store == 1)
2612 {
2613 auto_vec<tree, 64> refs;
2614 for (unsigned int i = first; i <= last; ++i)
2615 gather_bswap_load_refs (&refs,
2616 gimple_assign_rhs1 (m_store_info[i]->stmt));
2617
2618 unsigned int i;
2619 tree ref;
2620 FOR_EACH_VEC_ELT (refs, i, ref)
2621 if (stmts_may_clobber_ref_p (first_stmt, last_stmt, ref))
2622 return false;
2623 n.vuse = NULL_TREE;
2624 }
2625
2626 infof->n = n;
2627 infof->ins_stmt = ins_stmt;
2628 for (unsigned int i = first; i <= last; ++i)
2629 {
2630 m_store_info[i]->rhs_code = n.n == cmpxchg ? LROTATE_EXPR : NOP_EXPR;
2631 m_store_info[i]->ops[0].base_addr = NULL_TREE;
2632 m_store_info[i]->ops[1].base_addr = NULL_TREE;
2633 if (i != first)
2634 merged_store->merge_into (m_store_info[i]);
2635 }
2636
2637 return true;
2638}
2639
f663d9ad
KT
2640/* Go through the candidate stores recorded in m_store_info and merge them
2641 into merged_store_group objects recorded into m_merged_store_groups
2642 representing the widened stores. Return true if coalescing was successful
2643 and the number of widened stores is fewer than the original number
2644 of stores. */
2645
2646bool
2647imm_store_chain_info::coalesce_immediate_stores ()
2648{
2649 /* Anything less can't be processed. */
2650 if (m_store_info.length () < 2)
2651 return false;
2652
2653 if (dump_file && (dump_flags & TDF_DETAILS))
c94c3532 2654 fprintf (dump_file, "Attempting to coalesce %u stores in chain\n",
f663d9ad
KT
2655 m_store_info.length ());
2656
2657 store_immediate_info *info;
4b84d9b8 2658 unsigned int i, ignore = 0;
f663d9ad
KT
2659
2660 /* Order the stores by the bitposition they write to. */
2661 m_store_info.qsort (sort_by_bitpos);
2662
2663 info = m_store_info[0];
2664 merged_store_group *merged_store = new merged_store_group (info);
c94c3532
EB
2665 if (dump_file && (dump_flags & TDF_DETAILS))
2666 fputs ("New store group\n", dump_file);
f663d9ad
KT
2667
2668 FOR_EACH_VEC_ELT (m_store_info, i, info)
2669 {
3afd514b
JJ
2670 unsigned HOST_WIDE_INT new_bitregion_start, new_bitregion_end;
2671
4b84d9b8 2672 if (i <= ignore)
c94c3532 2673 goto done;
f663d9ad 2674
4b84d9b8
JJ
2675 /* First try to handle group of stores like:
2676 p[0] = data >> 24;
2677 p[1] = data >> 16;
2678 p[2] = data >> 8;
2679 p[3] = data;
2680 using the bswap framework. */
2681 if (info->bitpos == merged_store->start + merged_store->width
2682 && merged_store->stores.length () == 1
2683 && merged_store->stores[0]->ins_stmt != NULL
2684 && info->ins_stmt != NULL)
2685 {
2686 unsigned int try_size;
2687 for (try_size = 64; try_size >= 16; try_size >>= 1)
2688 if (try_coalesce_bswap (merged_store, i - 1, try_size))
2689 break;
2690
2691 if (try_size >= 16)
2692 {
2693 ignore = i + merged_store->stores.length () - 1;
2694 m_merged_store_groups.safe_push (merged_store);
2695 if (ignore < m_store_info.length ())
2696 merged_store = new merged_store_group (m_store_info[ignore]);
2697 else
2698 merged_store = NULL;
c94c3532 2699 goto done;
4b84d9b8
JJ
2700 }
2701 }
2702
3afd514b
JJ
2703 new_bitregion_start
2704 = MIN (merged_store->bitregion_start, info->bitregion_start);
2705 new_bitregion_end
2706 = MAX (merged_store->bitregion_end, info->bitregion_end);
2707
2708 if (info->order >= merged_store->first_nonmergeable_order
2709 || (((new_bitregion_end - new_bitregion_start + 1) / BITS_PER_UNIT)
028d4092 2710 > (unsigned) param_store_merging_max_size))
18e0c3d1
JJ
2711 ;
2712
f663d9ad
KT
2713 /* |---store 1---|
2714 |---store 2---|
4b84d9b8 2715 Overlapping stores. */
18e0c3d1 2716 else if (IN_RANGE (info->bitpos, merged_store->start,
4d213bf6
JJ
2717 merged_store->start + merged_store->width - 1)
2718 /* |---store 1---||---store 2---|
2719 Handle also the consecutive INTEGER_CST stores case here,
2720 as we have here the code to deal with overlaps. */
2721 || (info->bitregion_start <= merged_store->bitregion_end
2722 && info->rhs_code == INTEGER_CST
2723 && merged_store->only_constants
2724 && merged_store->can_be_merged_into (info)))
f663d9ad 2725 {
245f6de1 2726 /* Only allow overlapping stores of constants. */
629387a6
EB
2727 if (info->rhs_code == INTEGER_CST
2728 && merged_store->only_constants
2729 && info->lp_nr == merged_store->lp_nr)
245f6de1 2730 {
6cd4c66e
JJ
2731 unsigned int last_order
2732 = MAX (merged_store->last_order, info->order);
2733 unsigned HOST_WIDE_INT end
2734 = MAX (merged_store->start + merged_store->width,
2735 info->bitpos + info->bitsize);
4d213bf6 2736 if (check_no_overlap (m_store_info, i, true, last_order, end))
6cd4c66e
JJ
2737 {
2738 /* check_no_overlap call above made sure there are no
2739 overlapping stores with non-INTEGER_CST rhs_code
2740 in between the first and last of the stores we've
2741 just merged. If there are any INTEGER_CST rhs_code
2742 stores in between, we need to merge_overlapping them
2743 even if in the sort_by_bitpos order there are other
2744 overlapping stores in between. Keep those stores as is.
2745 Example:
2746 MEM[(int *)p_28] = 0;
2747 MEM[(char *)p_28 + 3B] = 1;
2748 MEM[(char *)p_28 + 1B] = 2;
2749 MEM[(char *)p_28 + 2B] = MEM[(char *)p_28 + 6B];
2750 We can't merge the zero store with the store of two and
2751 not merge anything else, because the store of one is
2752 in the original order in between those two, but in
2753 store_by_bitpos order it comes after the last store that
2754 we can't merge with them. We can merge the first 3 stores
2755 and keep the last store as is though. */
18e0c3d1
JJ
2756 unsigned int len = m_store_info.length ();
2757 unsigned int try_order = last_order;
2758 unsigned int first_nonmergeable_order;
2759 unsigned int k;
2760 bool last_iter = false;
2761 int attempts = 0;
2762 do
6cd4c66e 2763 {
18e0c3d1
JJ
2764 unsigned int max_order = 0;
2765 unsigned first_nonmergeable_int_order = ~0U;
2766 unsigned HOST_WIDE_INT this_end = end;
2767 k = i;
2768 first_nonmergeable_order = ~0U;
2769 for (unsigned int j = i + 1; j < len; ++j)
6cd4c66e 2770 {
18e0c3d1
JJ
2771 store_immediate_info *info2 = m_store_info[j];
2772 if (info2->bitpos >= this_end)
2773 break;
2774 if (info2->order < try_order)
6cd4c66e 2775 {
4119cd69
JJ
2776 if (info2->rhs_code != INTEGER_CST
2777 || info2->lp_nr != merged_store->lp_nr)
18e0c3d1
JJ
2778 {
2779 /* Normally check_no_overlap makes sure this
2780 doesn't happen, but if end grows below,
2781 then we need to process more stores than
2782 check_no_overlap verified. Example:
2783 MEM[(int *)p_5] = 0;
2784 MEM[(short *)p_5 + 3B] = 1;
2785 MEM[(char *)p_5 + 4B] = _9;
2786 MEM[(char *)p_5 + 2B] = 2; */
2787 k = 0;
2788 break;
2789 }
2790 k = j;
2791 this_end = MAX (this_end,
2792 info2->bitpos + info2->bitsize);
6cd4c66e 2793 }
18e0c3d1 2794 else if (info2->rhs_code == INTEGER_CST
4119cd69 2795 && info2->lp_nr == merged_store->lp_nr
18e0c3d1
JJ
2796 && !last_iter)
2797 {
2798 max_order = MAX (max_order, info2->order + 1);
2799 first_nonmergeable_int_order
2800 = MIN (first_nonmergeable_int_order,
2801 info2->order);
2802 }
2803 else
2804 first_nonmergeable_order
2805 = MIN (first_nonmergeable_order, info2->order);
6cd4c66e 2806 }
18e0c3d1
JJ
2807 if (k == 0)
2808 {
2809 if (last_order == try_order)
2810 break;
2811 /* If this failed, but only because we grew
2812 try_order, retry with the last working one,
2813 so that we merge at least something. */
2814 try_order = last_order;
2815 last_iter = true;
2816 continue;
2817 }
2818 last_order = try_order;
2819 /* Retry with a larger try_order to see if we could
2820 merge some further INTEGER_CST stores. */
2821 if (max_order
2822 && (first_nonmergeable_int_order
2823 < first_nonmergeable_order))
2824 {
2825 try_order = MIN (max_order,
2826 first_nonmergeable_order);
2827 try_order
2828 = MIN (try_order,
2829 merged_store->first_nonmergeable_order);
2830 if (try_order > last_order && ++attempts < 16)
2831 continue;
2832 }
2833 first_nonmergeable_order
2834 = MIN (first_nonmergeable_order,
2835 first_nonmergeable_int_order);
2836 end = this_end;
2837 break;
6cd4c66e 2838 }
18e0c3d1 2839 while (1);
6cd4c66e
JJ
2840
2841 if (k != 0)
2842 {
2843 merged_store->merge_overlapping (info);
2844
18e0c3d1
JJ
2845 merged_store->first_nonmergeable_order
2846 = MIN (merged_store->first_nonmergeable_order,
2847 first_nonmergeable_order);
2848
6cd4c66e
JJ
2849 for (unsigned int j = i + 1; j <= k; j++)
2850 {
2851 store_immediate_info *info2 = m_store_info[j];
2852 gcc_assert (info2->bitpos < end);
2853 if (info2->order < last_order)
2854 {
2855 gcc_assert (info2->rhs_code == INTEGER_CST);
18e0c3d1
JJ
2856 if (info != info2)
2857 merged_store->merge_overlapping (info2);
6cd4c66e
JJ
2858 }
2859 /* Other stores are kept and not merged in any
2860 way. */
2861 }
2862 ignore = k;
2863 goto done;
2864 }
2865 }
245f6de1 2866 }
f663d9ad 2867 }
245f6de1
JJ
2868 /* |---store 1---||---store 2---|
2869 This store is consecutive to the previous one.
2870 Merge it into the current store group. There can be gaps in between
2871 the stores, but there can't be gaps in between bitregions. */
c94c3532 2872 else if (info->bitregion_start <= merged_store->bitregion_end
7f5a3982 2873 && merged_store->can_be_merged_into (info))
f663d9ad 2874 {
245f6de1
JJ
2875 store_immediate_info *infof = merged_store->stores[0];
2876
2877 /* All the rhs_code ops that take 2 operands are commutative,
2878 swap the operands if it could make the operands compatible. */
2879 if (infof->ops[0].base_addr
2880 && infof->ops[1].base_addr
2881 && info->ops[0].base_addr
2882 && info->ops[1].base_addr
8a91d545
RS
2883 && known_eq (info->ops[1].bitpos - infof->ops[0].bitpos,
2884 info->bitpos - infof->bitpos)
245f6de1
JJ
2885 && operand_equal_p (info->ops[1].base_addr,
2886 infof->ops[0].base_addr, 0))
127ef369
JJ
2887 {
2888 std::swap (info->ops[0], info->ops[1]);
2889 info->ops_swapped_p = true;
2890 }
4d213bf6 2891 if (check_no_overlap (m_store_info, i, false,
a7fe6482
JJ
2892 MAX (merged_store->last_order, info->order),
2893 MAX (merged_store->start + merged_store->width,
7f5a3982 2894 info->bitpos + info->bitsize)))
245f6de1 2895 {
7f5a3982
EB
2896 /* Turn MEM_REF into BIT_INSERT_EXPR for bit-field stores. */
2897 if (info->rhs_code == MEM_REF && infof->rhs_code != MEM_REF)
2898 {
2899 info->rhs_code = BIT_INSERT_EXPR;
2900 info->ops[0].val = gimple_assign_rhs1 (info->stmt);
2901 info->ops[0].base_addr = NULL_TREE;
2902 }
2903 else if (infof->rhs_code == MEM_REF && info->rhs_code != MEM_REF)
2904 {
2905 store_immediate_info *infoj;
2906 unsigned int j;
2907 FOR_EACH_VEC_ELT (merged_store->stores, j, infoj)
2908 {
2909 infoj->rhs_code = BIT_INSERT_EXPR;
2910 infoj->ops[0].val = gimple_assign_rhs1 (infoj->stmt);
2911 infoj->ops[0].base_addr = NULL_TREE;
2912 }
2913 }
2914 if ((infof->ops[0].base_addr
2915 ? compatible_load_p (merged_store, info, base_addr, 0)
2916 : !info->ops[0].base_addr)
2917 && (infof->ops[1].base_addr
2918 ? compatible_load_p (merged_store, info, base_addr, 1)
2919 : !info->ops[1].base_addr))
2920 {
2921 merged_store->merge_into (info);
2922 goto done;
2923 }
245f6de1
JJ
2924 }
2925 }
f663d9ad 2926
245f6de1
JJ
2927 /* |---store 1---| <gap> |---store 2---|.
2928 Gap between stores or the rhs not compatible. Start a new group. */
f663d9ad 2929
245f6de1
JJ
2930 /* Try to apply all the stores recorded for the group to determine
2931 the bitpattern they write and discard it if that fails.
2932 This will also reject single-store groups. */
c94c3532 2933 if (merged_store->apply_stores ())
245f6de1 2934 m_merged_store_groups.safe_push (merged_store);
c94c3532
EB
2935 else
2936 delete merged_store;
f663d9ad 2937
245f6de1 2938 merged_store = new merged_store_group (info);
c94c3532
EB
2939 if (dump_file && (dump_flags & TDF_DETAILS))
2940 fputs ("New store group\n", dump_file);
2941
2942 done:
2943 if (dump_file && (dump_flags & TDF_DETAILS))
2944 {
2945 fprintf (dump_file, "Store %u:\nbitsize:" HOST_WIDE_INT_PRINT_DEC
2946 " bitpos:" HOST_WIDE_INT_PRINT_DEC " val:",
2947 i, info->bitsize, info->bitpos);
2948 print_generic_expr (dump_file, gimple_assign_rhs1 (info->stmt));
2949 fputc ('\n', dump_file);
2950 }
f663d9ad
KT
2951 }
2952
a62b3dc5 2953 /* Record or discard the last store group. */
4b84d9b8
JJ
2954 if (merged_store)
2955 {
c94c3532 2956 if (merged_store->apply_stores ())
4b84d9b8 2957 m_merged_store_groups.safe_push (merged_store);
c94c3532
EB
2958 else
2959 delete merged_store;
4b84d9b8 2960 }
f663d9ad
KT
2961
2962 gcc_assert (m_merged_store_groups.length () <= m_store_info.length ());
c94c3532 2963
f663d9ad
KT
2964 bool success
2965 = !m_merged_store_groups.is_empty ()
2966 && m_merged_store_groups.length () < m_store_info.length ();
2967
2968 if (success && dump_file)
c94c3532 2969 fprintf (dump_file, "Coalescing successful!\nMerged into %u stores\n",
a62b3dc5 2970 m_merged_store_groups.length ());
f663d9ad
KT
2971
2972 return success;
2973}
2974
245f6de1
JJ
2975/* Return the type to use for the merged stores or loads described by STMTS.
2976 This is needed to get the alias sets right. If IS_LOAD, look for rhs,
2977 otherwise lhs. Additionally set *CLIQUEP and *BASEP to MR_DEPENDENCE_*
2978 of the MEM_REFs if any. */
f663d9ad
KT
2979
2980static tree
245f6de1
JJ
2981get_alias_type_for_stmts (vec<gimple *> &stmts, bool is_load,
2982 unsigned short *cliquep, unsigned short *basep)
f663d9ad
KT
2983{
2984 gimple *stmt;
2985 unsigned int i;
245f6de1
JJ
2986 tree type = NULL_TREE;
2987 tree ret = NULL_TREE;
2988 *cliquep = 0;
2989 *basep = 0;
f663d9ad
KT
2990
2991 FOR_EACH_VEC_ELT (stmts, i, stmt)
2992 {
245f6de1
JJ
2993 tree ref = is_load ? gimple_assign_rhs1 (stmt)
2994 : gimple_assign_lhs (stmt);
2995 tree type1 = reference_alias_ptr_type (ref);
2996 tree base = get_base_address (ref);
f663d9ad 2997
245f6de1
JJ
2998 if (i == 0)
2999 {
3000 if (TREE_CODE (base) == MEM_REF)
3001 {
3002 *cliquep = MR_DEPENDENCE_CLIQUE (base);
3003 *basep = MR_DEPENDENCE_BASE (base);
3004 }
3005 ret = type = type1;
3006 continue;
3007 }
f663d9ad 3008 if (!alias_ptr_types_compatible_p (type, type1))
245f6de1
JJ
3009 ret = ptr_type_node;
3010 if (TREE_CODE (base) != MEM_REF
3011 || *cliquep != MR_DEPENDENCE_CLIQUE (base)
3012 || *basep != MR_DEPENDENCE_BASE (base))
3013 {
3014 *cliquep = 0;
3015 *basep = 0;
3016 }
f663d9ad 3017 }
245f6de1 3018 return ret;
f663d9ad
KT
3019}
3020
3021/* Return the location_t information we can find among the statements
3022 in STMTS. */
3023
3024static location_t
245f6de1 3025get_location_for_stmts (vec<gimple *> &stmts)
f663d9ad
KT
3026{
3027 gimple *stmt;
3028 unsigned int i;
3029
3030 FOR_EACH_VEC_ELT (stmts, i, stmt)
3031 if (gimple_has_location (stmt))
3032 return gimple_location (stmt);
3033
3034 return UNKNOWN_LOCATION;
3035}
3036
3037/* Used to decribe a store resulting from splitting a wide store in smaller
3038 regularly-sized stores in split_group. */
3039
6c1dae73 3040class split_store
f663d9ad 3041{
6c1dae73 3042public:
f663d9ad
KT
3043 unsigned HOST_WIDE_INT bytepos;
3044 unsigned HOST_WIDE_INT size;
3045 unsigned HOST_WIDE_INT align;
245f6de1 3046 auto_vec<store_immediate_info *> orig_stores;
a62b3dc5
JJ
3047 /* True if there is a single orig stmt covering the whole split store. */
3048 bool orig;
f663d9ad
KT
3049 split_store (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
3050 unsigned HOST_WIDE_INT);
3051};
3052
3053/* Simple constructor. */
3054
3055split_store::split_store (unsigned HOST_WIDE_INT bp,
3056 unsigned HOST_WIDE_INT sz,
3057 unsigned HOST_WIDE_INT al)
a62b3dc5 3058 : bytepos (bp), size (sz), align (al), orig (false)
f663d9ad 3059{
245f6de1 3060 orig_stores.create (0);
f663d9ad
KT
3061}
3062
245f6de1
JJ
3063/* Record all stores in GROUP that write to the region starting at BITPOS and
3064 is of size BITSIZE. Record infos for such statements in STORES if
3065 non-NULL. The stores in GROUP must be sorted by bitposition. Return INFO
5384a802
JJ
3066 if there is exactly one original store in the range (in that case ignore
3067 clobber stmts, unless there are only clobber stmts). */
f663d9ad 3068
a62b3dc5 3069static store_immediate_info *
99b1c316 3070find_constituent_stores (class merged_store_group *group,
245f6de1
JJ
3071 vec<store_immediate_info *> *stores,
3072 unsigned int *first,
3073 unsigned HOST_WIDE_INT bitpos,
3074 unsigned HOST_WIDE_INT bitsize)
f663d9ad 3075{
a62b3dc5 3076 store_immediate_info *info, *ret = NULL;
f663d9ad 3077 unsigned int i;
a62b3dc5
JJ
3078 bool second = false;
3079 bool update_first = true;
f663d9ad 3080 unsigned HOST_WIDE_INT end = bitpos + bitsize;
a62b3dc5 3081 for (i = *first; group->stores.iterate (i, &info); ++i)
f663d9ad
KT
3082 {
3083 unsigned HOST_WIDE_INT stmt_start = info->bitpos;
3084 unsigned HOST_WIDE_INT stmt_end = stmt_start + info->bitsize;
a62b3dc5
JJ
3085 if (stmt_end <= bitpos)
3086 {
3087 /* BITPOS passed to this function never decreases from within the
3088 same split_group call, so optimize and don't scan info records
3089 which are known to end before or at BITPOS next time.
3090 Only do it if all stores before this one also pass this. */
3091 if (update_first)
3092 *first = i + 1;
3093 continue;
3094 }
3095 else
3096 update_first = false;
3097
f663d9ad 3098 /* The stores in GROUP are ordered by bitposition so if we're past
a62b3dc5
JJ
3099 the region for this group return early. */
3100 if (stmt_start >= end)
3101 return ret;
3102
5384a802
JJ
3103 if (gimple_clobber_p (info->stmt))
3104 {
3105 if (stores)
3106 stores->safe_push (info);
3107 if (ret == NULL)
3108 ret = info;
3109 continue;
3110 }
245f6de1 3111 if (stores)
a62b3dc5 3112 {
245f6de1 3113 stores->safe_push (info);
5384a802 3114 if (ret && !gimple_clobber_p (ret->stmt))
a62b3dc5
JJ
3115 {
3116 ret = NULL;
3117 second = true;
3118 }
3119 }
5384a802 3120 else if (ret && !gimple_clobber_p (ret->stmt))
a62b3dc5
JJ
3121 return NULL;
3122 if (!second)
3123 ret = info;
f663d9ad 3124 }
a62b3dc5 3125 return ret;
f663d9ad
KT
3126}
3127
d7a9512e
JJ
3128/* Return how many SSA_NAMEs used to compute value to store in the INFO
3129 store have multiple uses. If any SSA_NAME has multiple uses, also
3130 count statements needed to compute it. */
3131
3132static unsigned
3133count_multiple_uses (store_immediate_info *info)
3134{
3135 gimple *stmt = info->stmt;
3136 unsigned ret = 0;
3137 switch (info->rhs_code)
3138 {
3139 case INTEGER_CST:
3140 return 0;
3141 case BIT_AND_EXPR:
3142 case BIT_IOR_EXPR:
3143 case BIT_XOR_EXPR:
d60edaba
JJ
3144 if (info->bit_not_p)
3145 {
3146 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3147 ret = 1; /* Fall through below to return
3148 the BIT_NOT_EXPR stmt and then
3149 BIT_{AND,IOR,XOR}_EXPR and anything it
3150 uses. */
3151 else
3152 /* stmt is after this the BIT_NOT_EXPR. */
3153 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3154 }
d7a9512e
JJ
3155 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3156 {
3157 ret += 1 + info->ops[0].bit_not_p;
3158 if (info->ops[1].base_addr)
3159 ret += 1 + info->ops[1].bit_not_p;
3160 return ret + 1;
3161 }
3162 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3163 /* stmt is now the BIT_*_EXPR. */
3164 if (!has_single_use (gimple_assign_rhs1 (stmt)))
127ef369
JJ
3165 ret += 1 + info->ops[info->ops_swapped_p].bit_not_p;
3166 else if (info->ops[info->ops_swapped_p].bit_not_p)
d7a9512e
JJ
3167 {
3168 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3169 if (!has_single_use (gimple_assign_rhs1 (stmt2)))
3170 ++ret;
3171 }
3172 if (info->ops[1].base_addr == NULL_TREE)
127ef369
JJ
3173 {
3174 gcc_checking_assert (!info->ops_swapped_p);
3175 return ret;
3176 }
d7a9512e 3177 if (!has_single_use (gimple_assign_rhs2 (stmt)))
127ef369
JJ
3178 ret += 1 + info->ops[1 - info->ops_swapped_p].bit_not_p;
3179 else if (info->ops[1 - info->ops_swapped_p].bit_not_p)
d7a9512e
JJ
3180 {
3181 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3182 if (!has_single_use (gimple_assign_rhs1 (stmt2)))
3183 ++ret;
3184 }
3185 return ret;
3186 case MEM_REF:
3187 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3188 return 1 + info->ops[0].bit_not_p;
3189 else if (info->ops[0].bit_not_p)
3190 {
3191 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3192 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3193 return 1;
3194 }
3195 return 0;
c94c3532
EB
3196 case BIT_INSERT_EXPR:
3197 return has_single_use (gimple_assign_rhs1 (stmt)) ? 0 : 1;
d7a9512e
JJ
3198 default:
3199 gcc_unreachable ();
3200 }
3201}
3202
f663d9ad 3203/* Split a merged store described by GROUP by populating the SPLIT_STORES
a62b3dc5
JJ
3204 vector (if non-NULL) with split_store structs describing the byte offset
3205 (from the base), the bit size and alignment of each store as well as the
3206 original statements involved in each such split group.
f663d9ad
KT
3207 This is to separate the splitting strategy from the statement
3208 building/emission/linking done in output_merged_store.
a62b3dc5 3209 Return number of new stores.
245f6de1
JJ
3210 If ALLOW_UNALIGNED_STORE is false, then all stores must be aligned.
3211 If ALLOW_UNALIGNED_LOAD is false, then all loads must be aligned.
3afd514b
JJ
3212 BZERO_FIRST may be true only when the first store covers the whole group
3213 and clears it; if BZERO_FIRST is true, keep that first store in the set
3214 unmodified and emit further stores for the overrides only.
a62b3dc5
JJ
3215 If SPLIT_STORES is NULL, it is just a dry run to count number of
3216 new stores. */
f663d9ad 3217
a62b3dc5 3218static unsigned int
245f6de1 3219split_group (merged_store_group *group, bool allow_unaligned_store,
3afd514b 3220 bool allow_unaligned_load, bool bzero_first,
99b1c316 3221 vec<split_store *> *split_stores,
d7a9512e
JJ
3222 unsigned *total_orig,
3223 unsigned *total_new)
f663d9ad 3224{
a62b3dc5
JJ
3225 unsigned HOST_WIDE_INT pos = group->bitregion_start;
3226 unsigned HOST_WIDE_INT size = group->bitregion_end - pos;
f663d9ad 3227 unsigned HOST_WIDE_INT bytepos = pos / BITS_PER_UNIT;
a62b3dc5
JJ
3228 unsigned HOST_WIDE_INT group_align = group->align;
3229 unsigned HOST_WIDE_INT align_base = group->align_base;
245f6de1 3230 unsigned HOST_WIDE_INT group_load_align = group_align;
d7a9512e 3231 bool any_orig = false;
f663d9ad 3232
f663d9ad
KT
3233 gcc_assert ((size % BITS_PER_UNIT == 0) && (pos % BITS_PER_UNIT == 0));
3234
4b84d9b8
JJ
3235 if (group->stores[0]->rhs_code == LROTATE_EXPR
3236 || group->stores[0]->rhs_code == NOP_EXPR)
3237 {
3afd514b 3238 gcc_assert (!bzero_first);
4b84d9b8
JJ
3239 /* For bswap framework using sets of stores, all the checking
3240 has been done earlier in try_coalesce_bswap and needs to be
3241 emitted as a single store. */
3242 if (total_orig)
3243 {
3244 /* Avoid the old/new stmt count heuristics. It should be
3245 always beneficial. */
3246 total_new[0] = 1;
3247 total_orig[0] = 2;
3248 }
3249
3250 if (split_stores)
3251 {
3252 unsigned HOST_WIDE_INT align_bitpos
3253 = (group->start - align_base) & (group_align - 1);
3254 unsigned HOST_WIDE_INT align = group_align;
3255 if (align_bitpos)
3256 align = least_bit_hwi (align_bitpos);
3257 bytepos = group->start / BITS_PER_UNIT;
99b1c316 3258 split_store *store
4b84d9b8
JJ
3259 = new split_store (bytepos, group->width, align);
3260 unsigned int first = 0;
3261 find_constituent_stores (group, &store->orig_stores,
3262 &first, group->start, group->width);
3263 split_stores->safe_push (store);
3264 }
3265
3266 return 1;
3267 }
3268
a62b3dc5 3269 unsigned int ret = 0, first = 0;
f663d9ad 3270 unsigned HOST_WIDE_INT try_pos = bytepos;
f663d9ad 3271
d7a9512e
JJ
3272 if (total_orig)
3273 {
3274 unsigned int i;
3275 store_immediate_info *info = group->stores[0];
3276
3277 total_new[0] = 0;
3278 total_orig[0] = 1; /* The orig store. */
3279 info = group->stores[0];
3280 if (info->ops[0].base_addr)
a6fbd154 3281 total_orig[0]++;
d7a9512e 3282 if (info->ops[1].base_addr)
a6fbd154 3283 total_orig[0]++;
d7a9512e
JJ
3284 switch (info->rhs_code)
3285 {
3286 case BIT_AND_EXPR:
3287 case BIT_IOR_EXPR:
3288 case BIT_XOR_EXPR:
3289 total_orig[0]++; /* The orig BIT_*_EXPR stmt. */
3290 break;
3291 default:
3292 break;
3293 }
3294 total_orig[0] *= group->stores.length ();
3295
3296 FOR_EACH_VEC_ELT (group->stores, i, info)
a6fbd154
JJ
3297 {
3298 total_new[0] += count_multiple_uses (info);
3299 total_orig[0] += (info->bit_not_p
3300 + info->ops[0].bit_not_p
3301 + info->ops[1].bit_not_p);
3302 }
d7a9512e
JJ
3303 }
3304
245f6de1
JJ
3305 if (!allow_unaligned_load)
3306 for (int i = 0; i < 2; ++i)
3307 if (group->load_align[i])
3308 group_load_align = MIN (group_load_align, group->load_align[i]);
3309
3afd514b
JJ
3310 if (bzero_first)
3311 {
5384a802
JJ
3312 store_immediate_info *gstore;
3313 FOR_EACH_VEC_ELT (group->stores, first, gstore)
3314 if (!gimple_clobber_p (gstore->stmt))
3315 break;
3316 ++first;
3afd514b
JJ
3317 ret = 1;
3318 if (split_stores)
3319 {
99b1c316 3320 split_store *store
5384a802
JJ
3321 = new split_store (bytepos, gstore->bitsize, align_base);
3322 store->orig_stores.safe_push (gstore);
3afd514b
JJ
3323 store->orig = true;
3324 any_orig = true;
3325 split_stores->safe_push (store);
3326 }
3327 }
3328
f663d9ad
KT
3329 while (size > 0)
3330 {
245f6de1 3331 if ((allow_unaligned_store || group_align <= BITS_PER_UNIT)
3afd514b
JJ
3332 && (group->mask[try_pos - bytepos] == (unsigned char) ~0U
3333 || (bzero_first && group->val[try_pos - bytepos] == 0)))
a62b3dc5
JJ
3334 {
3335 /* Skip padding bytes. */
3336 ++try_pos;
3337 size -= BITS_PER_UNIT;
3338 continue;
3339 }
3340
f663d9ad 3341 unsigned HOST_WIDE_INT try_bitpos = try_pos * BITS_PER_UNIT;
a62b3dc5
JJ
3342 unsigned int try_size = MAX_STORE_BITSIZE, nonmasked;
3343 unsigned HOST_WIDE_INT align_bitpos
3344 = (try_bitpos - align_base) & (group_align - 1);
3345 unsigned HOST_WIDE_INT align = group_align;
5384a802 3346 bool found_orig = false;
a62b3dc5
JJ
3347 if (align_bitpos)
3348 align = least_bit_hwi (align_bitpos);
245f6de1 3349 if (!allow_unaligned_store)
a62b3dc5 3350 try_size = MIN (try_size, align);
245f6de1
JJ
3351 if (!allow_unaligned_load)
3352 {
3353 /* If we can't do or don't want to do unaligned stores
3354 as well as loads, we need to take the loads into account
3355 as well. */
3356 unsigned HOST_WIDE_INT load_align = group_load_align;
3357 align_bitpos = (try_bitpos - align_base) & (load_align - 1);
3358 if (align_bitpos)
3359 load_align = least_bit_hwi (align_bitpos);
3360 for (int i = 0; i < 2; ++i)
3361 if (group->load_align[i])
3362 {
8a91d545
RS
3363 align_bitpos
3364 = known_alignment (try_bitpos
3365 - group->stores[0]->bitpos
3366 + group->stores[0]->ops[i].bitpos
3367 - group->load_align_base[i]);
3368 if (align_bitpos & (group_load_align - 1))
245f6de1
JJ
3369 {
3370 unsigned HOST_WIDE_INT a = least_bit_hwi (align_bitpos);
3371 load_align = MIN (load_align, a);
3372 }
3373 }
3374 try_size = MIN (try_size, load_align);
3375 }
a62b3dc5 3376 store_immediate_info *info
245f6de1 3377 = find_constituent_stores (group, NULL, &first, try_bitpos, try_size);
5384a802 3378 if (info && !gimple_clobber_p (info->stmt))
a62b3dc5
JJ
3379 {
3380 /* If there is just one original statement for the range, see if
3381 we can just reuse the original store which could be even larger
3382 than try_size. */
3383 unsigned HOST_WIDE_INT stmt_end
3384 = ROUND_UP (info->bitpos + info->bitsize, BITS_PER_UNIT);
245f6de1
JJ
3385 info = find_constituent_stores (group, NULL, &first, try_bitpos,
3386 stmt_end - try_bitpos);
a62b3dc5
JJ
3387 if (info && info->bitpos >= try_bitpos)
3388 {
5384a802
JJ
3389 store_immediate_info *info2 = NULL;
3390 unsigned int first_copy = first;
3391 if (info->bitpos > try_bitpos
3392 && stmt_end - try_bitpos <= try_size)
3393 {
3394 info2 = find_constituent_stores (group, NULL, &first_copy,
3395 try_bitpos,
3396 info->bitpos - try_bitpos);
3397 gcc_assert (info2 == NULL || gimple_clobber_p (info2->stmt));
3398 }
3399 if (info2 == NULL && stmt_end - try_bitpos < try_size)
3400 {
3401 info2 = find_constituent_stores (group, NULL, &first_copy,
3402 stmt_end,
3403 (try_bitpos + try_size)
3404 - stmt_end);
3405 gcc_assert (info2 == NULL || gimple_clobber_p (info2->stmt));
3406 }
3407 if (info2 == NULL)
3408 {
3409 try_size = stmt_end - try_bitpos;
3410 found_orig = true;
3411 goto found;
3412 }
a62b3dc5
JJ
3413 }
3414 }
f663d9ad 3415
a62b3dc5
JJ
3416 /* Approximate store bitsize for the case when there are no padding
3417 bits. */
3418 while (try_size > size)
3419 try_size /= 2;
3420 /* Now look for whole padding bytes at the end of that bitsize. */
3421 for (nonmasked = try_size / BITS_PER_UNIT; nonmasked > 0; --nonmasked)
3422 if (group->mask[try_pos - bytepos + nonmasked - 1]
3afd514b
JJ
3423 != (unsigned char) ~0U
3424 && (!bzero_first
3425 || group->val[try_pos - bytepos + nonmasked - 1] != 0))
a62b3dc5 3426 break;
5384a802 3427 if (nonmasked == 0 || (info && gimple_clobber_p (info->stmt)))
a62b3dc5
JJ
3428 {
3429 /* If entire try_size range is padding, skip it. */
3430 try_pos += try_size / BITS_PER_UNIT;
3431 size -= try_size;
3432 continue;
3433 }
3434 /* Otherwise try to decrease try_size if second half, last 3 quarters
3435 etc. are padding. */
3436 nonmasked *= BITS_PER_UNIT;
3437 while (nonmasked <= try_size / 2)
3438 try_size /= 2;
245f6de1 3439 if (!allow_unaligned_store && group_align > BITS_PER_UNIT)
a62b3dc5
JJ
3440 {
3441 /* Now look for whole padding bytes at the start of that bitsize. */
3442 unsigned int try_bytesize = try_size / BITS_PER_UNIT, masked;
3443 for (masked = 0; masked < try_bytesize; ++masked)
3afd514b
JJ
3444 if (group->mask[try_pos - bytepos + masked] != (unsigned char) ~0U
3445 && (!bzero_first
3446 || group->val[try_pos - bytepos + masked] != 0))
a62b3dc5
JJ
3447 break;
3448 masked *= BITS_PER_UNIT;
3449 gcc_assert (masked < try_size);
3450 if (masked >= try_size / 2)
3451 {
3452 while (masked >= try_size / 2)
3453 {
3454 try_size /= 2;
3455 try_pos += try_size / BITS_PER_UNIT;
3456 size -= try_size;
3457 masked -= try_size;
3458 }
3459 /* Need to recompute the alignment, so just retry at the new
3460 position. */
3461 continue;
3462 }
3463 }
3464
3465 found:
3466 ++ret;
f663d9ad 3467
a62b3dc5
JJ
3468 if (split_stores)
3469 {
99b1c316 3470 split_store *store
a62b3dc5 3471 = new split_store (try_pos, try_size, align);
245f6de1
JJ
3472 info = find_constituent_stores (group, &store->orig_stores,
3473 &first, try_bitpos, try_size);
a62b3dc5 3474 if (info
5384a802 3475 && !gimple_clobber_p (info->stmt)
a62b3dc5 3476 && info->bitpos >= try_bitpos
5384a802
JJ
3477 && info->bitpos + info->bitsize <= try_bitpos + try_size
3478 && (store->orig_stores.length () == 1
3479 || found_orig
3480 || (info->bitpos == try_bitpos
3481 && (info->bitpos + info->bitsize
3482 == try_bitpos + try_size))))
d7a9512e
JJ
3483 {
3484 store->orig = true;
3485 any_orig = true;
3486 }
a62b3dc5
JJ
3487 split_stores->safe_push (store);
3488 }
3489
3490 try_pos += try_size / BITS_PER_UNIT;
f663d9ad 3491 size -= try_size;
f663d9ad 3492 }
a62b3dc5 3493
d7a9512e
JJ
3494 if (total_orig)
3495 {
a6fbd154 3496 unsigned int i;
99b1c316 3497 split_store *store;
d7a9512e
JJ
3498 /* If we are reusing some original stores and any of the
3499 original SSA_NAMEs had multiple uses, we need to subtract
3500 those now before we add the new ones. */
3501 if (total_new[0] && any_orig)
3502 {
d7a9512e
JJ
3503 FOR_EACH_VEC_ELT (*split_stores, i, store)
3504 if (store->orig)
3505 total_new[0] -= count_multiple_uses (store->orig_stores[0]);
3506 }
3507 total_new[0] += ret; /* The new store. */
3508 store_immediate_info *info = group->stores[0];
3509 if (info->ops[0].base_addr)
a6fbd154 3510 total_new[0] += ret;
d7a9512e 3511 if (info->ops[1].base_addr)
a6fbd154 3512 total_new[0] += ret;
d7a9512e
JJ
3513 switch (info->rhs_code)
3514 {
3515 case BIT_AND_EXPR:
3516 case BIT_IOR_EXPR:
3517 case BIT_XOR_EXPR:
3518 total_new[0] += ret; /* The new BIT_*_EXPR stmt. */
3519 break;
3520 default:
3521 break;
3522 }
a6fbd154
JJ
3523 FOR_EACH_VEC_ELT (*split_stores, i, store)
3524 {
3525 unsigned int j;
3526 bool bit_not_p[3] = { false, false, false };
3527 /* If all orig_stores have certain bit_not_p set, then
3528 we'd use a BIT_NOT_EXPR stmt and need to account for it.
3529 If some orig_stores have certain bit_not_p set, then
3530 we'd use a BIT_XOR_EXPR with a mask and need to account for
3531 it. */
3532 FOR_EACH_VEC_ELT (store->orig_stores, j, info)
3533 {
3534 if (info->ops[0].bit_not_p)
3535 bit_not_p[0] = true;
3536 if (info->ops[1].bit_not_p)
3537 bit_not_p[1] = true;
3538 if (info->bit_not_p)
3539 bit_not_p[2] = true;
3540 }
3541 total_new[0] += bit_not_p[0] + bit_not_p[1] + bit_not_p[2];
3542 }
3543
d7a9512e
JJ
3544 }
3545
a62b3dc5 3546 return ret;
f663d9ad
KT
3547}
3548
a6fbd154
JJ
3549/* Return the operation through which the operand IDX (if < 2) or
3550 result (IDX == 2) should be inverted. If NOP_EXPR, no inversion
3551 is done, if BIT_NOT_EXPR, all bits are inverted, if BIT_XOR_EXPR,
3552 the bits should be xored with mask. */
3553
3554static enum tree_code
3555invert_op (split_store *split_store, int idx, tree int_type, tree &mask)
3556{
3557 unsigned int i;
3558 store_immediate_info *info;
3559 unsigned int cnt = 0;
e215422f 3560 bool any_paddings = false;
a6fbd154
JJ
3561 FOR_EACH_VEC_ELT (split_store->orig_stores, i, info)
3562 {
3563 bool bit_not_p = idx < 2 ? info->ops[idx].bit_not_p : info->bit_not_p;
3564 if (bit_not_p)
e215422f
JJ
3565 {
3566 ++cnt;
3567 tree lhs = gimple_assign_lhs (info->stmt);
3568 if (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3569 && TYPE_PRECISION (TREE_TYPE (lhs)) < info->bitsize)
3570 any_paddings = true;
3571 }
a6fbd154
JJ
3572 }
3573 mask = NULL_TREE;
3574 if (cnt == 0)
3575 return NOP_EXPR;
e215422f 3576 if (cnt == split_store->orig_stores.length () && !any_paddings)
a6fbd154
JJ
3577 return BIT_NOT_EXPR;
3578
3579 unsigned HOST_WIDE_INT try_bitpos = split_store->bytepos * BITS_PER_UNIT;
3580 unsigned buf_size = split_store->size / BITS_PER_UNIT;
3581 unsigned char *buf
3582 = XALLOCAVEC (unsigned char, buf_size);
3583 memset (buf, ~0U, buf_size);
3584 FOR_EACH_VEC_ELT (split_store->orig_stores, i, info)
3585 {
3586 bool bit_not_p = idx < 2 ? info->ops[idx].bit_not_p : info->bit_not_p;
3587 if (!bit_not_p)
3588 continue;
3589 /* Clear regions with bit_not_p and invert afterwards, rather than
3590 clear regions with !bit_not_p, so that gaps in between stores aren't
3591 set in the mask. */
3592 unsigned HOST_WIDE_INT bitsize = info->bitsize;
e215422f 3593 unsigned HOST_WIDE_INT prec = bitsize;
a6fbd154 3594 unsigned int pos_in_buffer = 0;
e215422f
JJ
3595 if (any_paddings)
3596 {
3597 tree lhs = gimple_assign_lhs (info->stmt);
3598 if (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3599 && TYPE_PRECISION (TREE_TYPE (lhs)) < bitsize)
3600 prec = TYPE_PRECISION (TREE_TYPE (lhs));
3601 }
a6fbd154
JJ
3602 if (info->bitpos < try_bitpos)
3603 {
3604 gcc_assert (info->bitpos + bitsize > try_bitpos);
e215422f
JJ
3605 if (!BYTES_BIG_ENDIAN)
3606 {
3607 if (prec <= try_bitpos - info->bitpos)
3608 continue;
3609 prec -= try_bitpos - info->bitpos;
3610 }
3611 bitsize -= try_bitpos - info->bitpos;
3612 if (BYTES_BIG_ENDIAN && prec > bitsize)
3613 prec = bitsize;
a6fbd154
JJ
3614 }
3615 else
3616 pos_in_buffer = info->bitpos - try_bitpos;
e215422f
JJ
3617 if (prec < bitsize)
3618 {
3619 /* If this is a bool inversion, invert just the least significant
3620 prec bits rather than all bits of it. */
3621 if (BYTES_BIG_ENDIAN)
3622 {
3623 pos_in_buffer += bitsize - prec;
3624 if (pos_in_buffer >= split_store->size)
3625 continue;
3626 }
3627 bitsize = prec;
3628 }
a6fbd154
JJ
3629 if (pos_in_buffer + bitsize > split_store->size)
3630 bitsize = split_store->size - pos_in_buffer;
3631 unsigned char *p = buf + (pos_in_buffer / BITS_PER_UNIT);
3632 if (BYTES_BIG_ENDIAN)
3633 clear_bit_region_be (p, (BITS_PER_UNIT - 1
3634 - (pos_in_buffer % BITS_PER_UNIT)), bitsize);
3635 else
3636 clear_bit_region (p, pos_in_buffer % BITS_PER_UNIT, bitsize);
3637 }
3638 for (unsigned int i = 0; i < buf_size; ++i)
3639 buf[i] = ~buf[i];
3640 mask = native_interpret_expr (int_type, buf, buf_size);
3641 return BIT_XOR_EXPR;
3642}
3643
f663d9ad
KT
3644/* Given a merged store group GROUP output the widened version of it.
3645 The store chain is against the base object BASE.
3646 Try store sizes of at most MAX_STORE_BITSIZE bits wide and don't output
3647 unaligned stores for STRICT_ALIGNMENT targets or if it's too expensive.
3648 Make sure that the number of statements output is less than the number of
3649 original statements. If a better sequence is possible emit it and
3650 return true. */
3651
3652bool
b5926e23 3653imm_store_chain_info::output_merged_store (merged_store_group *group)
f663d9ad 3654{
dd172744
RB
3655 split_store *split_store;
3656 unsigned int i;
a62b3dc5
JJ
3657 unsigned HOST_WIDE_INT start_byte_pos
3658 = group->bitregion_start / BITS_PER_UNIT;
f663d9ad
KT
3659
3660 unsigned int orig_num_stmts = group->stores.length ();
3661 if (orig_num_stmts < 2)
3662 return false;
3663
99b1c316 3664 auto_vec<class split_store *, 32> split_stores;
245f6de1 3665 bool allow_unaligned_store
028d4092 3666 = !STRICT_ALIGNMENT && param_store_merging_allow_unaligned;
245f6de1 3667 bool allow_unaligned_load = allow_unaligned_store;
3afd514b 3668 bool bzero_first = false;
5384a802
JJ
3669 store_immediate_info *store;
3670 unsigned int num_clobber_stmts = 0;
3671 if (group->stores[0]->rhs_code == INTEGER_CST)
3672 {
3673 FOR_EACH_VEC_ELT (group->stores, i, store)
3674 if (gimple_clobber_p (store->stmt))
3675 num_clobber_stmts++;
3676 else if (TREE_CODE (gimple_assign_rhs1 (store->stmt)) == CONSTRUCTOR
3677 && CONSTRUCTOR_NELTS (gimple_assign_rhs1 (store->stmt)) == 0
3678 && group->start == store->bitpos
3679 && group->width == store->bitsize
3680 && (group->start % BITS_PER_UNIT) == 0
3681 && (group->width % BITS_PER_UNIT) == 0)
3682 {
3683 bzero_first = true;
3684 break;
3685 }
3686 else
3687 break;
3688 FOR_EACH_VEC_ELT_FROM (group->stores, i, store, i)
3689 if (gimple_clobber_p (store->stmt))
3690 num_clobber_stmts++;
3691 if (num_clobber_stmts == orig_num_stmts)
3692 return false;
3693 orig_num_stmts -= num_clobber_stmts;
3694 }
3afd514b 3695 if (allow_unaligned_store || bzero_first)
a62b3dc5
JJ
3696 {
3697 /* If unaligned stores are allowed, see how many stores we'd emit
3698 for unaligned and how many stores we'd emit for aligned stores.
3afd514b
JJ
3699 Only use unaligned stores if it allows fewer stores than aligned.
3700 Similarly, if there is a whole region clear first, prefer expanding
3701 it together compared to expanding clear first followed by merged
3702 further stores. */
3703 unsigned cnt[4] = { ~0, ~0, ~0, ~0 };
3704 int pass_min = 0;
3705 for (int pass = 0; pass < 4; ++pass)
3706 {
3707 if (!allow_unaligned_store && (pass & 1) != 0)
3708 continue;
3709 if (!bzero_first && (pass & 2) != 0)
3710 continue;
3711 cnt[pass] = split_group (group, (pass & 1) != 0,
3712 allow_unaligned_load, (pass & 2) != 0,
3713 NULL, NULL, NULL);
3714 if (cnt[pass] < cnt[pass_min])
3715 pass_min = pass;
3716 }
3717 if ((pass_min & 1) == 0)
245f6de1 3718 allow_unaligned_store = false;
3afd514b
JJ
3719 if ((pass_min & 2) == 0)
3720 bzero_first = false;
a62b3dc5 3721 }
d7a9512e 3722 unsigned total_orig, total_new;
3afd514b 3723 split_group (group, allow_unaligned_store, allow_unaligned_load, bzero_first,
d7a9512e 3724 &split_stores, &total_orig, &total_new);
a62b3dc5 3725
5384a802
JJ
3726 /* Determine if there is a clobber covering the whole group at the start,
3727 followed by proposed split stores that cover the whole group. In that
3728 case, prefer the transformation even if
3729 split_stores.length () == orig_num_stmts. */
3730 bool clobber_first = false;
3731 if (num_clobber_stmts
3732 && gimple_clobber_p (group->stores[0]->stmt)
3733 && group->start == group->stores[0]->bitpos
3734 && group->width == group->stores[0]->bitsize
3735 && (group->start % BITS_PER_UNIT) == 0
3736 && (group->width % BITS_PER_UNIT) == 0)
3737 {
3738 clobber_first = true;
3739 unsigned HOST_WIDE_INT pos = group->start / BITS_PER_UNIT;
3740 FOR_EACH_VEC_ELT (split_stores, i, split_store)
3741 if (split_store->bytepos != pos)
3742 {
3743 clobber_first = false;
3744 break;
3745 }
3746 else
3747 pos += split_store->size / BITS_PER_UNIT;
3748 if (pos != (group->start + group->width) / BITS_PER_UNIT)
3749 clobber_first = false;
3750 }
3751
3752 if (split_stores.length () >= orig_num_stmts + clobber_first)
a62b3dc5 3753 {
5384a802 3754
a62b3dc5
JJ
3755 /* We didn't manage to reduce the number of statements. Bail out. */
3756 if (dump_file && (dump_flags & TDF_DETAILS))
d7a9512e
JJ
3757 fprintf (dump_file, "Exceeded original number of stmts (%u)."
3758 " Not profitable to emit new sequence.\n",
3759 orig_num_stmts);
dd172744
RB
3760 FOR_EACH_VEC_ELT (split_stores, i, split_store)
3761 delete split_store;
a62b3dc5
JJ
3762 return false;
3763 }
d7a9512e
JJ
3764 if (total_orig <= total_new)
3765 {
3766 /* If number of estimated new statements is above estimated original
3767 statements, bail out too. */
3768 if (dump_file && (dump_flags & TDF_DETAILS))
3769 fprintf (dump_file, "Estimated number of original stmts (%u)"
3770 " not larger than estimated number of new"
3771 " stmts (%u).\n",
3772 total_orig, total_new);
dd172744
RB
3773 FOR_EACH_VEC_ELT (split_stores, i, split_store)
3774 delete split_store;
4b84d9b8 3775 return false;
d7a9512e 3776 }
5384a802
JJ
3777 if (group->stores[0]->rhs_code == INTEGER_CST)
3778 {
3779 bool all_orig = true;
3780 FOR_EACH_VEC_ELT (split_stores, i, split_store)
3781 if (!split_store->orig)
3782 {
3783 all_orig = false;
3784 break;
3785 }
3786 if (all_orig)
3787 {
3788 unsigned int cnt = split_stores.length ();
3789 store_immediate_info *store;
3790 FOR_EACH_VEC_ELT (group->stores, i, store)
3791 if (gimple_clobber_p (store->stmt))
3792 ++cnt;
3793 /* Punt if we wouldn't make any real changes, i.e. keep all
3794 orig stmts + all clobbers. */
3795 if (cnt == group->stores.length ())
3796 {
3797 if (dump_file && (dump_flags & TDF_DETAILS))
3798 fprintf (dump_file, "Exceeded original number of stmts (%u)."
3799 " Not profitable to emit new sequence.\n",
3800 orig_num_stmts);
3801 FOR_EACH_VEC_ELT (split_stores, i, split_store)
3802 delete split_store;
3803 return false;
3804 }
3805 }
3806 }
f663d9ad
KT
3807
3808 gimple_stmt_iterator last_gsi = gsi_for_stmt (group->last_stmt);
3809 gimple_seq seq = NULL;
f663d9ad
KT
3810 tree last_vdef, new_vuse;
3811 last_vdef = gimple_vdef (group->last_stmt);
3812 new_vuse = gimple_vuse (group->last_stmt);
4b84d9b8
JJ
3813 tree bswap_res = NULL_TREE;
3814
5384a802
JJ
3815 /* Clobbers are not removed. */
3816 if (gimple_clobber_p (group->last_stmt))
3817 {
3818 new_vuse = make_ssa_name (gimple_vop (cfun), group->last_stmt);
3819 gimple_set_vdef (group->last_stmt, new_vuse);
3820 }
3821
4b84d9b8
JJ
3822 if (group->stores[0]->rhs_code == LROTATE_EXPR
3823 || group->stores[0]->rhs_code == NOP_EXPR)
3824 {
3825 tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
3826 gimple *ins_stmt = group->stores[0]->ins_stmt;
3827 struct symbolic_number *n = &group->stores[0]->n;
3828 bool bswap = group->stores[0]->rhs_code == LROTATE_EXPR;
3829
3830 switch (n->range)
3831 {
3832 case 16:
3833 load_type = bswap_type = uint16_type_node;
3834 break;
3835 case 32:
3836 load_type = uint32_type_node;
3837 if (bswap)
3838 {
3839 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
3840 bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
3841 }
3842 break;
3843 case 64:
3844 load_type = uint64_type_node;
3845 if (bswap)
3846 {
3847 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
3848 bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
3849 }
3850 break;
3851 default:
3852 gcc_unreachable ();
3853 }
3854
3855 /* If the loads have each vuse of the corresponding store,
3856 we've checked the aliasing already in try_coalesce_bswap and
3857 we want to sink the need load into seq. So need to use new_vuse
3858 on the load. */
30fa8e9c 3859 if (n->base_addr)
4b84d9b8 3860 {
30fa8e9c
JJ
3861 if (n->vuse == NULL)
3862 {
3863 n->vuse = new_vuse;
3864 ins_stmt = NULL;
3865 }
3866 else
3867 /* Update vuse in case it has changed by output_merged_stores. */
3868 n->vuse = gimple_vuse (ins_stmt);
4b84d9b8
JJ
3869 }
3870 bswap_res = bswap_replace (gsi_start (seq), ins_stmt, fndecl,
3871 bswap_type, load_type, n, bswap);
3872 gcc_assert (bswap_res);
3873 }
f663d9ad
KT
3874
3875 gimple *stmt = NULL;
245f6de1 3876 auto_vec<gimple *, 32> orig_stmts;
4b84d9b8
JJ
3877 gimple_seq this_seq;
3878 tree addr = force_gimple_operand_1 (unshare_expr (base_addr), &this_seq,
aa55dc0c 3879 is_gimple_mem_ref_addr, NULL_TREE);
4b84d9b8 3880 gimple_seq_add_seq_without_update (&seq, this_seq);
245f6de1
JJ
3881
3882 tree load_addr[2] = { NULL_TREE, NULL_TREE };
3883 gimple_seq load_seq[2] = { NULL, NULL };
3884 gimple_stmt_iterator load_gsi[2] = { gsi_none (), gsi_none () };
3885 for (int j = 0; j < 2; ++j)
3886 {
3887 store_operand_info &op = group->stores[0]->ops[j];
3888 if (op.base_addr == NULL_TREE)
3889 continue;
3890
3891 store_immediate_info *infol = group->stores.last ();
3892 if (gimple_vuse (op.stmt) == gimple_vuse (infol->ops[j].stmt))
3893 {
97031af7
JJ
3894 /* We can't pick the location randomly; while we've verified
3895 all the loads have the same vuse, they can be still in different
3896 basic blocks and we need to pick the one from the last bb:
3897 int x = q[0];
3898 if (x == N) return;
3899 int y = q[1];
3900 p[0] = x;
3901 p[1] = y;
3902 otherwise if we put the wider load at the q[0] load, we might
3903 segfault if q[1] is not mapped. */
3904 basic_block bb = gimple_bb (op.stmt);
3905 gimple *ostmt = op.stmt;
3906 store_immediate_info *info;
3907 FOR_EACH_VEC_ELT (group->stores, i, info)
3908 {
3909 gimple *tstmt = info->ops[j].stmt;
3910 basic_block tbb = gimple_bb (tstmt);
3911 if (dominated_by_p (CDI_DOMINATORS, tbb, bb))
3912 {
3913 ostmt = tstmt;
3914 bb = tbb;
3915 }
3916 }
3917 load_gsi[j] = gsi_for_stmt (ostmt);
245f6de1
JJ
3918 load_addr[j]
3919 = force_gimple_operand_1 (unshare_expr (op.base_addr),
3920 &load_seq[j], is_gimple_mem_ref_addr,
3921 NULL_TREE);
3922 }
3923 else if (operand_equal_p (base_addr, op.base_addr, 0))
3924 load_addr[j] = addr;
3925 else
3e2927a1 3926 {
3e2927a1
JJ
3927 load_addr[j]
3928 = force_gimple_operand_1 (unshare_expr (op.base_addr),
3929 &this_seq, is_gimple_mem_ref_addr,
3930 NULL_TREE);
3931 gimple_seq_add_seq_without_update (&seq, this_seq);
3932 }
245f6de1
JJ
3933 }
3934
f663d9ad
KT
3935 FOR_EACH_VEC_ELT (split_stores, i, split_store)
3936 {
3937 unsigned HOST_WIDE_INT try_size = split_store->size;
3938 unsigned HOST_WIDE_INT try_pos = split_store->bytepos;
c94c3532 3939 unsigned HOST_WIDE_INT try_bitpos = try_pos * BITS_PER_UNIT;
f663d9ad 3940 unsigned HOST_WIDE_INT align = split_store->align;
a62b3dc5
JJ
3941 tree dest, src;
3942 location_t loc;
3943 if (split_store->orig)
3944 {
5384a802
JJ
3945 /* If there is just a single non-clobber constituent store
3946 which covers the whole area, just reuse the lhs and rhs. */
3947 gimple *orig_stmt = NULL;
3948 store_immediate_info *store;
3949 unsigned int j;
3950 FOR_EACH_VEC_ELT (split_store->orig_stores, j, store)
3951 if (!gimple_clobber_p (store->stmt))
3952 {
3953 orig_stmt = store->stmt;
3954 break;
3955 }
245f6de1
JJ
3956 dest = gimple_assign_lhs (orig_stmt);
3957 src = gimple_assign_rhs1 (orig_stmt);
3958 loc = gimple_location (orig_stmt);
a62b3dc5
JJ
3959 }
3960 else
3961 {
245f6de1
JJ
3962 store_immediate_info *info;
3963 unsigned short clique, base;
3964 unsigned int k;
3965 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
3966 orig_stmts.safe_push (info->stmt);
a62b3dc5 3967 tree offset_type
245f6de1
JJ
3968 = get_alias_type_for_stmts (orig_stmts, false, &clique, &base);
3969 loc = get_location_for_stmts (orig_stmts);
3970 orig_stmts.truncate (0);
a62b3dc5
JJ
3971
3972 tree int_type = build_nonstandard_integer_type (try_size, UNSIGNED);
3973 int_type = build_aligned_type (int_type, align);
3974 dest = fold_build2 (MEM_REF, int_type, addr,
3975 build_int_cst (offset_type, try_pos));
245f6de1
JJ
3976 if (TREE_CODE (dest) == MEM_REF)
3977 {
3978 MR_DEPENDENCE_CLIQUE (dest) = clique;
3979 MR_DEPENDENCE_BASE (dest) = base;
3980 }
3981
c94c3532
EB
3982 tree mask;
3983 if (bswap_res)
3984 mask = integer_zero_node;
3985 else
4b84d9b8
JJ
3986 mask = native_interpret_expr (int_type,
3987 group->mask + try_pos
3988 - start_byte_pos,
3989 group->buf_size);
245f6de1
JJ
3990
3991 tree ops[2];
3992 for (int j = 0;
3993 j < 1 + (split_store->orig_stores[0]->ops[1].val != NULL_TREE);
3994 ++j)
3995 {
3996 store_operand_info &op = split_store->orig_stores[0]->ops[j];
4b84d9b8
JJ
3997 if (bswap_res)
3998 ops[j] = bswap_res;
3999 else if (op.base_addr)
245f6de1
JJ
4000 {
4001 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4002 orig_stmts.safe_push (info->ops[j].stmt);
4003
4004 offset_type = get_alias_type_for_stmts (orig_stmts, true,
4005 &clique, &base);
4006 location_t load_loc = get_location_for_stmts (orig_stmts);
4007 orig_stmts.truncate (0);
4008
4009 unsigned HOST_WIDE_INT load_align = group->load_align[j];
4010 unsigned HOST_WIDE_INT align_bitpos
c94c3532 4011 = known_alignment (try_bitpos
8a91d545
RS
4012 - split_store->orig_stores[0]->bitpos
4013 + op.bitpos);
4014 if (align_bitpos & (load_align - 1))
245f6de1
JJ
4015 load_align = least_bit_hwi (align_bitpos);
4016
4017 tree load_int_type
4018 = build_nonstandard_integer_type (try_size, UNSIGNED);
4019 load_int_type
4020 = build_aligned_type (load_int_type, load_align);
4021
8a91d545 4022 poly_uint64 load_pos
c94c3532 4023 = exact_div (try_bitpos
8a91d545
RS
4024 - split_store->orig_stores[0]->bitpos
4025 + op.bitpos,
4026 BITS_PER_UNIT);
245f6de1
JJ
4027 ops[j] = fold_build2 (MEM_REF, load_int_type, load_addr[j],
4028 build_int_cst (offset_type, load_pos));
4029 if (TREE_CODE (ops[j]) == MEM_REF)
4030 {
4031 MR_DEPENDENCE_CLIQUE (ops[j]) = clique;
4032 MR_DEPENDENCE_BASE (ops[j]) = base;
4033 }
4034 if (!integer_zerop (mask))
4035 /* The load might load some bits (that will be masked off
4036 later on) uninitialized, avoid -W*uninitialized
4037 warnings in that case. */
4038 TREE_NO_WARNING (ops[j]) = 1;
4039
4040 stmt = gimple_build_assign (make_ssa_name (int_type),
4041 ops[j]);
4042 gimple_set_location (stmt, load_loc);
4043 if (gsi_bb (load_gsi[j]))
4044 {
4045 gimple_set_vuse (stmt, gimple_vuse (op.stmt));
4046 gimple_seq_add_stmt_without_update (&load_seq[j], stmt);
4047 }
4048 else
4049 {
4050 gimple_set_vuse (stmt, new_vuse);
4051 gimple_seq_add_stmt_without_update (&seq, stmt);
4052 }
4053 ops[j] = gimple_assign_lhs (stmt);
a6fbd154
JJ
4054 tree xor_mask;
4055 enum tree_code inv_op
4056 = invert_op (split_store, j, int_type, xor_mask);
4057 if (inv_op != NOP_EXPR)
383ac8dc
JJ
4058 {
4059 stmt = gimple_build_assign (make_ssa_name (int_type),
a6fbd154 4060 inv_op, ops[j], xor_mask);
383ac8dc
JJ
4061 gimple_set_location (stmt, load_loc);
4062 ops[j] = gimple_assign_lhs (stmt);
4063
4064 if (gsi_bb (load_gsi[j]))
4065 gimple_seq_add_stmt_without_update (&load_seq[j],
4066 stmt);
4067 else
4068 gimple_seq_add_stmt_without_update (&seq, stmt);
4069 }
245f6de1
JJ
4070 }
4071 else
4072 ops[j] = native_interpret_expr (int_type,
4073 group->val + try_pos
4074 - start_byte_pos,
4075 group->buf_size);
4076 }
4077
4078 switch (split_store->orig_stores[0]->rhs_code)
4079 {
4080 case BIT_AND_EXPR:
4081 case BIT_IOR_EXPR:
4082 case BIT_XOR_EXPR:
4083 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4084 {
4085 tree rhs1 = gimple_assign_rhs1 (info->stmt);
4086 orig_stmts.safe_push (SSA_NAME_DEF_STMT (rhs1));
4087 }
4088 location_t bit_loc;
4089 bit_loc = get_location_for_stmts (orig_stmts);
4090 orig_stmts.truncate (0);
4091
4092 stmt
4093 = gimple_build_assign (make_ssa_name (int_type),
4094 split_store->orig_stores[0]->rhs_code,
4095 ops[0], ops[1]);
4096 gimple_set_location (stmt, bit_loc);
4097 /* If there is just one load and there is a separate
4098 load_seq[0], emit the bitwise op right after it. */
4099 if (load_addr[1] == NULL_TREE && gsi_bb (load_gsi[0]))
4100 gimple_seq_add_stmt_without_update (&load_seq[0], stmt);
4101 /* Otherwise, if at least one load is in seq, we need to
4102 emit the bitwise op right before the store. If there
4103 are two loads and are emitted somewhere else, it would
4104 be better to emit the bitwise op as early as possible;
4105 we don't track where that would be possible right now
4106 though. */
4107 else
4108 gimple_seq_add_stmt_without_update (&seq, stmt);
4109 src = gimple_assign_lhs (stmt);
a6fbd154
JJ
4110 tree xor_mask;
4111 enum tree_code inv_op;
4112 inv_op = invert_op (split_store, 2, int_type, xor_mask);
4113 if (inv_op != NOP_EXPR)
d60edaba
JJ
4114 {
4115 stmt = gimple_build_assign (make_ssa_name (int_type),
a6fbd154 4116 inv_op, src, xor_mask);
d60edaba
JJ
4117 gimple_set_location (stmt, bit_loc);
4118 if (load_addr[1] == NULL_TREE && gsi_bb (load_gsi[0]))
4119 gimple_seq_add_stmt_without_update (&load_seq[0], stmt);
4120 else
4121 gimple_seq_add_stmt_without_update (&seq, stmt);
4122 src = gimple_assign_lhs (stmt);
4123 }
245f6de1 4124 break;
4b84d9b8
JJ
4125 case LROTATE_EXPR:
4126 case NOP_EXPR:
4127 src = ops[0];
4128 if (!is_gimple_val (src))
4129 {
4130 stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (src)),
4131 src);
4132 gimple_seq_add_stmt_without_update (&seq, stmt);
4133 src = gimple_assign_lhs (stmt);
4134 }
4135 if (!useless_type_conversion_p (int_type, TREE_TYPE (src)))
4136 {
4137 stmt = gimple_build_assign (make_ssa_name (int_type),
4138 NOP_EXPR, src);
4139 gimple_seq_add_stmt_without_update (&seq, stmt);
4140 src = gimple_assign_lhs (stmt);
4141 }
be52ac73
JJ
4142 inv_op = invert_op (split_store, 2, int_type, xor_mask);
4143 if (inv_op != NOP_EXPR)
4144 {
4145 stmt = gimple_build_assign (make_ssa_name (int_type),
4146 inv_op, src, xor_mask);
4147 gimple_set_location (stmt, loc);
4148 gimple_seq_add_stmt_without_update (&seq, stmt);
4149 src = gimple_assign_lhs (stmt);
4150 }
4b84d9b8 4151 break;
245f6de1
JJ
4152 default:
4153 src = ops[0];
4154 break;
4155 }
4156
c94c3532
EB
4157 /* If bit insertion is required, we use the source as an accumulator
4158 into which the successive bit-field values are manually inserted.
4159 FIXME: perhaps use BIT_INSERT_EXPR instead in some cases? */
4160 if (group->bit_insertion)
4161 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4162 if (info->rhs_code == BIT_INSERT_EXPR
4163 && info->bitpos < try_bitpos + try_size
4164 && info->bitpos + info->bitsize > try_bitpos)
4165 {
4166 /* Mask, truncate, convert to final type, shift and ior into
4167 the accumulator. Note that every step can be a no-op. */
4168 const HOST_WIDE_INT start_gap = info->bitpos - try_bitpos;
4169 const HOST_WIDE_INT end_gap
4170 = (try_bitpos + try_size) - (info->bitpos + info->bitsize);
4171 tree tem = info->ops[0].val;
c14add82
EB
4172 if (TYPE_PRECISION (TREE_TYPE (tem)) <= info->bitsize)
4173 {
4174 tree bitfield_type
4175 = build_nonstandard_integer_type (info->bitsize,
4176 UNSIGNED);
4177 tem = gimple_convert (&seq, loc, bitfield_type, tem);
4178 }
4179 else if ((BYTES_BIG_ENDIAN ? start_gap : end_gap) > 0)
c94c3532
EB
4180 {
4181 const unsigned HOST_WIDE_INT imask
4182 = (HOST_WIDE_INT_1U << info->bitsize) - 1;
4183 tem = gimple_build (&seq, loc,
4184 BIT_AND_EXPR, TREE_TYPE (tem), tem,
4185 build_int_cst (TREE_TYPE (tem),
4186 imask));
4187 }
4188 const HOST_WIDE_INT shift
4189 = (BYTES_BIG_ENDIAN ? end_gap : start_gap);
4190 if (shift < 0)
4191 tem = gimple_build (&seq, loc,
4192 RSHIFT_EXPR, TREE_TYPE (tem), tem,
4193 build_int_cst (NULL_TREE, -shift));
4194 tem = gimple_convert (&seq, loc, int_type, tem);
4195 if (shift > 0)
4196 tem = gimple_build (&seq, loc,
4197 LSHIFT_EXPR, int_type, tem,
4198 build_int_cst (NULL_TREE, shift));
4199 src = gimple_build (&seq, loc,
4200 BIT_IOR_EXPR, int_type, tem, src);
4201 }
4202
a62b3dc5
JJ
4203 if (!integer_zerop (mask))
4204 {
4205 tree tem = make_ssa_name (int_type);
4206 tree load_src = unshare_expr (dest);
4207 /* The load might load some or all bits uninitialized,
4208 avoid -W*uninitialized warnings in that case.
4209 As optimization, it would be nice if all the bits are
4210 provably uninitialized (no stores at all yet or previous
4211 store a CLOBBER) we'd optimize away the load and replace
4212 it e.g. with 0. */
4213 TREE_NO_WARNING (load_src) = 1;
4214 stmt = gimple_build_assign (tem, load_src);
4215 gimple_set_location (stmt, loc);
4216 gimple_set_vuse (stmt, new_vuse);
4217 gimple_seq_add_stmt_without_update (&seq, stmt);
4218
4219 /* FIXME: If there is a single chunk of zero bits in mask,
4220 perhaps use BIT_INSERT_EXPR instead? */
4221 stmt = gimple_build_assign (make_ssa_name (int_type),
4222 BIT_AND_EXPR, tem, mask);
4223 gimple_set_location (stmt, loc);
4224 gimple_seq_add_stmt_without_update (&seq, stmt);
4225 tem = gimple_assign_lhs (stmt);
4226
245f6de1
JJ
4227 if (TREE_CODE (src) == INTEGER_CST)
4228 src = wide_int_to_tree (int_type,
4229 wi::bit_and_not (wi::to_wide (src),
4230 wi::to_wide (mask)));
4231 else
4232 {
4233 tree nmask
4234 = wide_int_to_tree (int_type,
4235 wi::bit_not (wi::to_wide (mask)));
4236 stmt = gimple_build_assign (make_ssa_name (int_type),
4237 BIT_AND_EXPR, src, nmask);
4238 gimple_set_location (stmt, loc);
4239 gimple_seq_add_stmt_without_update (&seq, stmt);
4240 src = gimple_assign_lhs (stmt);
4241 }
a62b3dc5
JJ
4242 stmt = gimple_build_assign (make_ssa_name (int_type),
4243 BIT_IOR_EXPR, tem, src);
4244 gimple_set_location (stmt, loc);
4245 gimple_seq_add_stmt_without_update (&seq, stmt);
4246 src = gimple_assign_lhs (stmt);
4247 }
4248 }
f663d9ad
KT
4249
4250 stmt = gimple_build_assign (dest, src);
4251 gimple_set_location (stmt, loc);
4252 gimple_set_vuse (stmt, new_vuse);
4253 gimple_seq_add_stmt_without_update (&seq, stmt);
4254
629387a6
EB
4255 if (group->lp_nr && stmt_could_throw_p (cfun, stmt))
4256 add_stmt_to_eh_lp (stmt, group->lp_nr);
4257
f663d9ad
KT
4258 tree new_vdef;
4259 if (i < split_stores.length () - 1)
a62b3dc5 4260 new_vdef = make_ssa_name (gimple_vop (cfun), stmt);
f663d9ad
KT
4261 else
4262 new_vdef = last_vdef;
4263
4264 gimple_set_vdef (stmt, new_vdef);
4265 SSA_NAME_DEF_STMT (new_vdef) = stmt;
4266 new_vuse = new_vdef;
4267 }
4268
4269 FOR_EACH_VEC_ELT (split_stores, i, split_store)
4270 delete split_store;
4271
f663d9ad
KT
4272 gcc_assert (seq);
4273 if (dump_file)
4274 {
4275 fprintf (dump_file,
c94c3532 4276 "New sequence of %u stores to replace old one of %u stores\n",
a62b3dc5 4277 split_stores.length (), orig_num_stmts);
f663d9ad
KT
4278 if (dump_flags & TDF_DETAILS)
4279 print_gimple_seq (dump_file, seq, 0, TDF_VOPS | TDF_MEMSYMS);
4280 }
629387a6 4281
5384a802
JJ
4282 if (gimple_clobber_p (group->last_stmt))
4283 update_stmt (group->last_stmt);
4284
629387a6
EB
4285 if (group->lp_nr > 0)
4286 {
4287 /* We're going to insert a sequence of (potentially) throwing stores
4288 into an active EH region. This means that we're going to create
4289 new basic blocks with EH edges pointing to the post landing pad
4290 and, therefore, to have to update its PHI nodes, if any. For the
4291 virtual PHI node, we're going to use the VDEFs created above, but
4292 for the other nodes, we need to record the original reaching defs. */
4293 eh_landing_pad lp = get_eh_landing_pad_from_number (group->lp_nr);
4294 basic_block lp_bb = label_to_block (cfun, lp->post_landing_pad);
4295 basic_block last_bb = gimple_bb (group->last_stmt);
4296 edge last_edge = find_edge (last_bb, lp_bb);
4297 auto_vec<tree, 16> last_defs;
4298 gphi_iterator gpi;
4299 for (gpi = gsi_start_phis (lp_bb); !gsi_end_p (gpi); gsi_next (&gpi))
4300 {
4301 gphi *phi = gpi.phi ();
4302 tree last_def;
4303 if (virtual_operand_p (gimple_phi_result (phi)))
4304 last_def = NULL_TREE;
4305 else
4306 last_def = gimple_phi_arg_def (phi, last_edge->dest_idx);
4307 last_defs.safe_push (last_def);
4308 }
4309
4310 /* Do the insertion. Then, if new basic blocks have been created in the
4311 process, rewind the chain of VDEFs create above to walk the new basic
4312 blocks and update the corresponding arguments of the PHI nodes. */
4313 update_modified_stmts (seq);
4314 if (gimple_find_sub_bbs (seq, &last_gsi))
4315 while (last_vdef != gimple_vuse (group->last_stmt))
4316 {
4317 gimple *stmt = SSA_NAME_DEF_STMT (last_vdef);
4318 if (stmt_could_throw_p (cfun, stmt))
4319 {
4320 edge new_edge = find_edge (gimple_bb (stmt), lp_bb);
4321 unsigned int i;
4322 for (gpi = gsi_start_phis (lp_bb), i = 0;
4323 !gsi_end_p (gpi);
4324 gsi_next (&gpi), i++)
4325 {
4326 gphi *phi = gpi.phi ();
4327 tree new_def;
4328 if (virtual_operand_p (gimple_phi_result (phi)))
4329 new_def = last_vdef;
4330 else
4331 new_def = last_defs[i];
4332 add_phi_arg (phi, new_def, new_edge, UNKNOWN_LOCATION);
4333 }
4334 }
4335 last_vdef = gimple_vuse (stmt);
4336 }
4337 }
4338 else
4339 gsi_insert_seq_after (&last_gsi, seq, GSI_SAME_STMT);
4340
245f6de1
JJ
4341 for (int j = 0; j < 2; ++j)
4342 if (load_seq[j])
4343 gsi_insert_seq_after (&load_gsi[j], load_seq[j], GSI_SAME_STMT);
f663d9ad
KT
4344
4345 return true;
4346}
4347
4348/* Process the merged_store_group objects created in the coalescing phase.
4349 The stores are all against the base object BASE.
4350 Try to output the widened stores and delete the original statements if
4351 successful. Return true iff any changes were made. */
4352
4353bool
b5926e23 4354imm_store_chain_info::output_merged_stores ()
f663d9ad
KT
4355{
4356 unsigned int i;
4357 merged_store_group *merged_store;
4358 bool ret = false;
4359 FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_store)
4360 {
a95b474a
ML
4361 if (dbg_cnt (store_merging)
4362 && output_merged_store (merged_store))
f663d9ad
KT
4363 {
4364 unsigned int j;
4365 store_immediate_info *store;
4366 FOR_EACH_VEC_ELT (merged_store->stores, j, store)
4367 {
4368 gimple *stmt = store->stmt;
4369 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5384a802
JJ
4370 /* Don't remove clobbers, they are still useful even if
4371 everything is overwritten afterwards. */
4372 if (gimple_clobber_p (stmt))
4373 continue;
f663d9ad 4374 gsi_remove (&gsi, true);
629387a6
EB
4375 if (store->lp_nr)
4376 remove_stmt_from_eh_lp (stmt);
f663d9ad
KT
4377 if (stmt != merged_store->last_stmt)
4378 {
4379 unlink_stmt_vdef (stmt);
4380 release_defs (stmt);
4381 }
4382 }
4383 ret = true;
4384 }
4385 }
4386 if (ret && dump_file)
4387 fprintf (dump_file, "Merging successful!\n");
4388
4389 return ret;
4390}
4391
4392/* Coalesce the store_immediate_info objects recorded against the base object
4393 BASE in the first phase and output them.
4394 Delete the allocated structures.
4395 Return true if any changes were made. */
4396
4397bool
b5926e23 4398imm_store_chain_info::terminate_and_process_chain ()
f663d9ad
KT
4399{
4400 /* Process store chain. */
4401 bool ret = false;
4402 if (m_store_info.length () > 1)
4403 {
4404 ret = coalesce_immediate_stores ();
4405 if (ret)
b5926e23 4406 ret = output_merged_stores ();
f663d9ad
KT
4407 }
4408
4409 /* Delete all the entries we allocated ourselves. */
4410 store_immediate_info *info;
4411 unsigned int i;
4412 FOR_EACH_VEC_ELT (m_store_info, i, info)
4413 delete info;
4414
4415 merged_store_group *merged_info;
4416 FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_info)
4417 delete merged_info;
4418
4419 return ret;
4420}
4421
4422/* Return true iff LHS is a destination potentially interesting for
4423 store merging. In practice these are the codes that get_inner_reference
4424 can process. */
4425
4426static bool
4427lhs_valid_for_store_merging_p (tree lhs)
4428{
629387a6 4429 if (DECL_P (lhs))
f663d9ad
KT
4430 return true;
4431
629387a6
EB
4432 switch (TREE_CODE (lhs))
4433 {
4434 case ARRAY_REF:
4435 case ARRAY_RANGE_REF:
4436 case BIT_FIELD_REF:
4437 case COMPONENT_REF:
4438 case MEM_REF:
4439 return true;
4440 default:
4441 return false;
4442 }
4443
4444 gcc_unreachable ();
f663d9ad
KT
4445}
4446
4447/* Return true if the tree RHS is a constant we want to consider
4448 during store merging. In practice accept all codes that
4449 native_encode_expr accepts. */
4450
4451static bool
4452rhs_valid_for_store_merging_p (tree rhs)
4453{
cf098191 4454 unsigned HOST_WIDE_INT size;
3afd514b 4455 if (TREE_CODE (rhs) == CONSTRUCTOR
3afd514b
JJ
4456 && CONSTRUCTOR_NELTS (rhs) == 0
4457 && TYPE_SIZE_UNIT (TREE_TYPE (rhs))
4458 && tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (rhs))))
4459 return true;
cf098191
RS
4460 return (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (rhs))).is_constant (&size)
4461 && native_encode_expr (rhs, NULL, size) != 0);
f663d9ad
KT
4462}
4463
629387a6
EB
4464/* Adjust *PBITPOS, *PBITREGION_START and *PBITREGION_END by BYTE_OFF bytes
4465 and return true on success or false on failure. */
4466
4467static bool
4468adjust_bit_pos (poly_offset_int byte_off,
4469 poly_int64 *pbitpos,
4470 poly_uint64 *pbitregion_start,
4471 poly_uint64 *pbitregion_end)
4472{
4473 poly_offset_int bit_off = byte_off << LOG2_BITS_PER_UNIT;
4474 bit_off += *pbitpos;
4475
4476 if (known_ge (bit_off, 0) && bit_off.to_shwi (pbitpos))
4477 {
4478 if (maybe_ne (*pbitregion_end, 0U))
4479 {
4480 bit_off = byte_off << LOG2_BITS_PER_UNIT;
4481 bit_off += *pbitregion_start;
4482 if (bit_off.to_uhwi (pbitregion_start))
4483 {
4484 bit_off = byte_off << LOG2_BITS_PER_UNIT;
4485 bit_off += *pbitregion_end;
4486 if (!bit_off.to_uhwi (pbitregion_end))
4487 *pbitregion_end = 0;
4488 }
4489 else
4490 *pbitregion_end = 0;
4491 }
4492 return true;
4493 }
4494 else
4495 return false;
4496}
4497
245f6de1
JJ
4498/* If MEM is a memory reference usable for store merging (either as
4499 store destination or for loads), return the non-NULL base_addr
4500 and set *PBITSIZE, *PBITPOS, *PBITREGION_START and *PBITREGION_END.
4501 Otherwise return NULL, *PBITPOS should be still valid even for that
4502 case. */
4503
4504static tree
8a91d545
RS
4505mem_valid_for_store_merging (tree mem, poly_uint64 *pbitsize,
4506 poly_uint64 *pbitpos,
4507 poly_uint64 *pbitregion_start,
4508 poly_uint64 *pbitregion_end)
245f6de1 4509{
8a91d545
RS
4510 poly_int64 bitsize, bitpos;
4511 poly_uint64 bitregion_start = 0, bitregion_end = 0;
245f6de1
JJ
4512 machine_mode mode;
4513 int unsignedp = 0, reversep = 0, volatilep = 0;
4514 tree offset;
4515 tree base_addr = get_inner_reference (mem, &bitsize, &bitpos, &offset, &mode,
4516 &unsignedp, &reversep, &volatilep);
4517 *pbitsize = bitsize;
8a91d545 4518 if (known_eq (bitsize, 0))
245f6de1
JJ
4519 return NULL_TREE;
4520
4521 if (TREE_CODE (mem) == COMPONENT_REF
4522 && DECL_BIT_FIELD_TYPE (TREE_OPERAND (mem, 1)))
4523 {
4524 get_bit_range (&bitregion_start, &bitregion_end, mem, &bitpos, &offset);
8a91d545
RS
4525 if (maybe_ne (bitregion_end, 0U))
4526 bitregion_end += 1;
245f6de1
JJ
4527 }
4528
4529 if (reversep)
4530 return NULL_TREE;
4531
4532 /* We do not want to rewrite TARGET_MEM_REFs. */
4533 if (TREE_CODE (base_addr) == TARGET_MEM_REF)
4534 return NULL_TREE;
4535 /* In some cases get_inner_reference may return a
4536 MEM_REF [ptr + byteoffset]. For the purposes of this pass
4537 canonicalize the base_addr to MEM_REF [ptr] and take
4538 byteoffset into account in the bitpos. This occurs in
4539 PR 23684 and this way we can catch more chains. */
4540 else if (TREE_CODE (base_addr) == MEM_REF)
4541 {
629387a6
EB
4542 if (!adjust_bit_pos (mem_ref_offset (base_addr), &bitpos,
4543 &bitregion_start, &bitregion_end))
245f6de1
JJ
4544 return NULL_TREE;
4545 base_addr = TREE_OPERAND (base_addr, 0);
4546 }
4547 /* get_inner_reference returns the base object, get at its
4548 address now. */
4549 else
4550 {
8a91d545 4551 if (maybe_lt (bitpos, 0))
245f6de1
JJ
4552 return NULL_TREE;
4553 base_addr = build_fold_addr_expr (base_addr);
4554 }
4555
629387a6 4556 if (offset)
245f6de1
JJ
4557 {
4558 /* If the access is variable offset then a base decl has to be
4559 address-taken to be able to emit pointer-based stores to it.
4560 ??? We might be able to get away with re-using the original
4561 base up to the first variable part and then wrapping that inside
4562 a BIT_FIELD_REF. */
4563 tree base = get_base_address (base_addr);
629387a6 4564 if (!base || (DECL_P (base) && !TREE_ADDRESSABLE (base)))
245f6de1
JJ
4565 return NULL_TREE;
4566
629387a6
EB
4567 /* Similarly to above for the base, remove constant from the offset. */
4568 if (TREE_CODE (offset) == PLUS_EXPR
4569 && TREE_CODE (TREE_OPERAND (offset, 1)) == INTEGER_CST
4570 && adjust_bit_pos (wi::to_poly_offset (TREE_OPERAND (offset, 1)),
4571 &bitpos, &bitregion_start, &bitregion_end))
4572 offset = TREE_OPERAND (offset, 0);
4573
245f6de1
JJ
4574 base_addr = build2 (POINTER_PLUS_EXPR, TREE_TYPE (base_addr),
4575 base_addr, offset);
4576 }
4577
629387a6
EB
4578 if (known_eq (bitregion_end, 0U))
4579 {
4580 bitregion_start = round_down_to_byte_boundary (bitpos);
4581 bitregion_end = round_up_to_byte_boundary (bitpos + bitsize);
4582 }
4583
245f6de1
JJ
4584 *pbitsize = bitsize;
4585 *pbitpos = bitpos;
4586 *pbitregion_start = bitregion_start;
4587 *pbitregion_end = bitregion_end;
4588 return base_addr;
4589}
4590
4591/* Return true if STMT is a load that can be used for store merging.
4592 In that case fill in *OP. BITSIZE, BITPOS, BITREGION_START and
4593 BITREGION_END are properties of the corresponding store. */
4594
4595static bool
4596handled_load (gimple *stmt, store_operand_info *op,
8a91d545
RS
4597 poly_uint64 bitsize, poly_uint64 bitpos,
4598 poly_uint64 bitregion_start, poly_uint64 bitregion_end)
245f6de1 4599{
383ac8dc 4600 if (!is_gimple_assign (stmt))
245f6de1 4601 return false;
383ac8dc
JJ
4602 if (gimple_assign_rhs_code (stmt) == BIT_NOT_EXPR)
4603 {
4604 tree rhs1 = gimple_assign_rhs1 (stmt);
4605 if (TREE_CODE (rhs1) == SSA_NAME
383ac8dc
JJ
4606 && handled_load (SSA_NAME_DEF_STMT (rhs1), op, bitsize, bitpos,
4607 bitregion_start, bitregion_end))
4608 {
d60edaba
JJ
4609 /* Don't allow _1 = load; _2 = ~1; _3 = ~_2; which should have
4610 been optimized earlier, but if allowed here, would confuse the
4611 multiple uses counting. */
4612 if (op->bit_not_p)
4613 return false;
383ac8dc
JJ
4614 op->bit_not_p = !op->bit_not_p;
4615 return true;
4616 }
4617 return false;
4618 }
4619 if (gimple_vuse (stmt)
4620 && gimple_assign_load_p (stmt)
36bbc05d 4621 && !stmt_can_throw_internal (cfun, stmt)
245f6de1
JJ
4622 && !gimple_has_volatile_ops (stmt))
4623 {
4624 tree mem = gimple_assign_rhs1 (stmt);
4625 op->base_addr
4626 = mem_valid_for_store_merging (mem, &op->bitsize, &op->bitpos,
4627 &op->bitregion_start,
4628 &op->bitregion_end);
4629 if (op->base_addr != NULL_TREE
8a91d545
RS
4630 && known_eq (op->bitsize, bitsize)
4631 && multiple_p (op->bitpos - bitpos, BITS_PER_UNIT)
4632 && known_ge (op->bitpos - op->bitregion_start,
4633 bitpos - bitregion_start)
4634 && known_ge (op->bitregion_end - op->bitpos,
4635 bitregion_end - bitpos))
245f6de1
JJ
4636 {
4637 op->stmt = stmt;
4638 op->val = mem;
383ac8dc 4639 op->bit_not_p = false;
245f6de1
JJ
4640 return true;
4641 }
4642 }
4643 return false;
4644}
4645
629387a6
EB
4646/* Return the index number of the landing pad for STMT, if any. */
4647
4648static int
4649lp_nr_for_store (gimple *stmt)
4650{
4651 if (!cfun->can_throw_non_call_exceptions || !cfun->eh)
4652 return 0;
4653
4654 if (!stmt_could_throw_p (cfun, stmt))
4655 return 0;
4656
4657 return lookup_stmt_eh_lp (stmt);
4658}
4659
245f6de1 4660/* Record the store STMT for store merging optimization if it can be
629387a6 4661 optimized. Return true if any changes were made. */
245f6de1 4662
629387a6 4663bool
245f6de1
JJ
4664pass_store_merging::process_store (gimple *stmt)
4665{
4666 tree lhs = gimple_assign_lhs (stmt);
4667 tree rhs = gimple_assign_rhs1 (stmt);
8a91d545
RS
4668 poly_uint64 bitsize, bitpos;
4669 poly_uint64 bitregion_start, bitregion_end;
245f6de1
JJ
4670 tree base_addr
4671 = mem_valid_for_store_merging (lhs, &bitsize, &bitpos,
4672 &bitregion_start, &bitregion_end);
8a91d545 4673 if (known_eq (bitsize, 0U))
629387a6 4674 return false;
245f6de1
JJ
4675
4676 bool invalid = (base_addr == NULL_TREE
8a91d545
RS
4677 || (maybe_gt (bitsize,
4678 (unsigned int) MAX_BITSIZE_MODE_ANY_INT)
3afd514b
JJ
4679 && TREE_CODE (rhs) != INTEGER_CST
4680 && (TREE_CODE (rhs) != CONSTRUCTOR
4681 || CONSTRUCTOR_NELTS (rhs) != 0)));
245f6de1 4682 enum tree_code rhs_code = ERROR_MARK;
d60edaba 4683 bool bit_not_p = false;
4b84d9b8
JJ
4684 struct symbolic_number n;
4685 gimple *ins_stmt = NULL;
245f6de1
JJ
4686 store_operand_info ops[2];
4687 if (invalid)
4688 ;
4689 else if (rhs_valid_for_store_merging_p (rhs))
4690 {
4691 rhs_code = INTEGER_CST;
4692 ops[0].val = rhs;
4693 }
d7a9512e 4694 else if (TREE_CODE (rhs) != SSA_NAME)
245f6de1
JJ
4695 invalid = true;
4696 else
4697 {
4698 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs), *def_stmt1, *def_stmt2;
4699 if (!is_gimple_assign (def_stmt))
4700 invalid = true;
4701 else if (handled_load (def_stmt, &ops[0], bitsize, bitpos,
4702 bitregion_start, bitregion_end))
4703 rhs_code = MEM_REF;
d60edaba
JJ
4704 else if (gimple_assign_rhs_code (def_stmt) == BIT_NOT_EXPR)
4705 {
4706 tree rhs1 = gimple_assign_rhs1 (def_stmt);
4707 if (TREE_CODE (rhs1) == SSA_NAME
4708 && is_gimple_assign (SSA_NAME_DEF_STMT (rhs1)))
4709 {
4710 bit_not_p = true;
4711 def_stmt = SSA_NAME_DEF_STMT (rhs1);
4712 }
4713 }
c94c3532 4714
d60edaba 4715 if (rhs_code == ERROR_MARK && !invalid)
245f6de1
JJ
4716 switch ((rhs_code = gimple_assign_rhs_code (def_stmt)))
4717 {
4718 case BIT_AND_EXPR:
4719 case BIT_IOR_EXPR:
4720 case BIT_XOR_EXPR:
4721 tree rhs1, rhs2;
4722 rhs1 = gimple_assign_rhs1 (def_stmt);
4723 rhs2 = gimple_assign_rhs2 (def_stmt);
4724 invalid = true;
d7a9512e 4725 if (TREE_CODE (rhs1) != SSA_NAME)
245f6de1
JJ
4726 break;
4727 def_stmt1 = SSA_NAME_DEF_STMT (rhs1);
4728 if (!is_gimple_assign (def_stmt1)
4729 || !handled_load (def_stmt1, &ops[0], bitsize, bitpos,
4730 bitregion_start, bitregion_end))
4731 break;
4732 if (rhs_valid_for_store_merging_p (rhs2))
4733 ops[1].val = rhs2;
d7a9512e 4734 else if (TREE_CODE (rhs2) != SSA_NAME)
245f6de1
JJ
4735 break;
4736 else
4737 {
4738 def_stmt2 = SSA_NAME_DEF_STMT (rhs2);
4739 if (!is_gimple_assign (def_stmt2))
4740 break;
4741 else if (!handled_load (def_stmt2, &ops[1], bitsize, bitpos,
4742 bitregion_start, bitregion_end))
4743 break;
4744 }
4745 invalid = false;
4746 break;
4747 default:
4748 invalid = true;
4749 break;
4750 }
c94c3532 4751
8a91d545
RS
4752 unsigned HOST_WIDE_INT const_bitsize;
4753 if (bitsize.is_constant (&const_bitsize)
c94c3532 4754 && (const_bitsize % BITS_PER_UNIT) == 0
8a91d545 4755 && const_bitsize <= 64
c94c3532 4756 && multiple_p (bitpos, BITS_PER_UNIT))
4b84d9b8
JJ
4757 {
4758 ins_stmt = find_bswap_or_nop_1 (def_stmt, &n, 12);
4759 if (ins_stmt)
4760 {
4761 uint64_t nn = n.n;
4762 for (unsigned HOST_WIDE_INT i = 0;
8a91d545
RS
4763 i < const_bitsize;
4764 i += BITS_PER_UNIT, nn >>= BITS_PER_MARKER)
4b84d9b8
JJ
4765 if ((nn & MARKER_MASK) == 0
4766 || (nn & MARKER_MASK) == MARKER_BYTE_UNKNOWN)
4767 {
4768 ins_stmt = NULL;
4769 break;
4770 }
4771 if (ins_stmt)
4772 {
4773 if (invalid)
4774 {
4775 rhs_code = LROTATE_EXPR;
4776 ops[0].base_addr = NULL_TREE;
4777 ops[1].base_addr = NULL_TREE;
4778 }
4779 invalid = false;
4780 }
4781 }
4782 }
c94c3532
EB
4783
4784 if (invalid
4785 && bitsize.is_constant (&const_bitsize)
4786 && ((const_bitsize % BITS_PER_UNIT) != 0
4787 || !multiple_p (bitpos, BITS_PER_UNIT))
4788 && const_bitsize <= 64)
4789 {
c14add82 4790 /* Bypass a conversion to the bit-field type. */
31a5d8c5
EB
4791 if (!bit_not_p
4792 && is_gimple_assign (def_stmt)
4793 && CONVERT_EXPR_CODE_P (rhs_code))
c94c3532
EB
4794 {
4795 tree rhs1 = gimple_assign_rhs1 (def_stmt);
4796 if (TREE_CODE (rhs1) == SSA_NAME
c14add82 4797 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1)))
c94c3532
EB
4798 rhs = rhs1;
4799 }
4800 rhs_code = BIT_INSERT_EXPR;
31a5d8c5 4801 bit_not_p = false;
c94c3532
EB
4802 ops[0].val = rhs;
4803 ops[0].base_addr = NULL_TREE;
4804 ops[1].base_addr = NULL_TREE;
4805 invalid = false;
4806 }
245f6de1
JJ
4807 }
4808
8a91d545
RS
4809 unsigned HOST_WIDE_INT const_bitsize, const_bitpos;
4810 unsigned HOST_WIDE_INT const_bitregion_start, const_bitregion_end;
4811 if (invalid
4812 || !bitsize.is_constant (&const_bitsize)
4813 || !bitpos.is_constant (&const_bitpos)
4814 || !bitregion_start.is_constant (&const_bitregion_start)
4815 || !bitregion_end.is_constant (&const_bitregion_end))
629387a6 4816 return terminate_all_aliasing_chains (NULL, stmt);
245f6de1 4817
4b84d9b8
JJ
4818 if (!ins_stmt)
4819 memset (&n, 0, sizeof (n));
4820
99b1c316 4821 class imm_store_chain_info **chain_info = NULL;
629387a6 4822 bool ret = false;
383ac8dc
JJ
4823 if (base_addr)
4824 chain_info = m_stores.get (base_addr);
4825
245f6de1
JJ
4826 store_immediate_info *info;
4827 if (chain_info)
4828 {
4829 unsigned int ord = (*chain_info)->m_store_info.length ();
8a91d545
RS
4830 info = new store_immediate_info (const_bitsize, const_bitpos,
4831 const_bitregion_start,
4832 const_bitregion_end,
4833 stmt, ord, rhs_code, n, ins_stmt,
629387a6
EB
4834 bit_not_p, lp_nr_for_store (stmt),
4835 ops[0], ops[1]);
245f6de1
JJ
4836 if (dump_file && (dump_flags & TDF_DETAILS))
4837 {
4838 fprintf (dump_file, "Recording immediate store from stmt:\n");
4839 print_gimple_stmt (dump_file, stmt, 0);
4840 }
4841 (*chain_info)->m_store_info.safe_push (info);
629387a6 4842 ret |= terminate_all_aliasing_chains (chain_info, stmt);
245f6de1
JJ
4843 /* If we reach the limit of stores to merge in a chain terminate and
4844 process the chain now. */
4845 if ((*chain_info)->m_store_info.length ()
028d4092 4846 == (unsigned int) param_max_stores_to_merge)
245f6de1
JJ
4847 {
4848 if (dump_file && (dump_flags & TDF_DETAILS))
4849 fprintf (dump_file,
4850 "Reached maximum number of statements to merge:\n");
629387a6 4851 ret |= terminate_and_process_chain (*chain_info);
245f6de1 4852 }
629387a6 4853 return ret;
245f6de1
JJ
4854 }
4855
4856 /* Store aliases any existing chain? */
629387a6 4857 ret |= terminate_all_aliasing_chains (NULL, stmt);
245f6de1 4858 /* Start a new chain. */
99b1c316 4859 class imm_store_chain_info *new_chain
245f6de1 4860 = new imm_store_chain_info (m_stores_head, base_addr);
8a91d545
RS
4861 info = new store_immediate_info (const_bitsize, const_bitpos,
4862 const_bitregion_start,
4863 const_bitregion_end,
4864 stmt, 0, rhs_code, n, ins_stmt,
629387a6
EB
4865 bit_not_p, lp_nr_for_store (stmt),
4866 ops[0], ops[1]);
245f6de1
JJ
4867 new_chain->m_store_info.safe_push (info);
4868 m_stores.put (base_addr, new_chain);
4869 if (dump_file && (dump_flags & TDF_DETAILS))
4870 {
4871 fprintf (dump_file, "Starting new chain with statement:\n");
4872 print_gimple_stmt (dump_file, stmt, 0);
4873 fprintf (dump_file, "The base object is:\n");
4874 print_generic_expr (dump_file, base_addr);
4875 fprintf (dump_file, "\n");
4876 }
629387a6
EB
4877 return ret;
4878}
4879
4880/* Return true if STMT is a store valid for store merging. */
4881
4882static bool
4883store_valid_for_store_merging_p (gimple *stmt)
4884{
4885 return gimple_assign_single_p (stmt)
4886 && gimple_vdef (stmt)
4887 && lhs_valid_for_store_merging_p (gimple_assign_lhs (stmt))
5384a802 4888 && (!gimple_has_volatile_ops (stmt) || gimple_clobber_p (stmt));
629387a6
EB
4889}
4890
4891enum basic_block_status { BB_INVALID, BB_VALID, BB_EXTENDED_VALID };
4892
4893/* Return the status of basic block BB wrt store merging. */
4894
4895static enum basic_block_status
4896get_status_for_store_merging (basic_block bb)
4897{
4898 unsigned int num_statements = 0;
4899 gimple_stmt_iterator gsi;
4900 edge e;
4901
4902 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4903 {
4904 gimple *stmt = gsi_stmt (gsi);
4905
4906 if (is_gimple_debug (stmt))
4907 continue;
4908
4909 if (store_valid_for_store_merging_p (stmt) && ++num_statements >= 2)
4910 break;
4911 }
4912
4913 if (num_statements == 0)
4914 return BB_INVALID;
4915
4916 if (cfun->can_throw_non_call_exceptions && cfun->eh
4917 && store_valid_for_store_merging_p (gimple_seq_last_stmt (bb_seq (bb)))
4918 && (e = find_fallthru_edge (bb->succs))
4919 && e->dest == bb->next_bb)
4920 return BB_EXTENDED_VALID;
4921
4922 return num_statements >= 2 ? BB_VALID : BB_INVALID;
245f6de1
JJ
4923}
4924
f663d9ad 4925/* Entry point for the pass. Go over each basic block recording chains of
245f6de1
JJ
4926 immediate stores. Upon encountering a terminating statement (as defined
4927 by stmt_terminates_chain_p) process the recorded stores and emit the widened
4928 variants. */
f663d9ad
KT
4929
4930unsigned int
4931pass_store_merging::execute (function *fun)
4932{
4933 basic_block bb;
4934 hash_set<gimple *> orig_stmts;
629387a6
EB
4935 bool changed = false, open_chains = false;
4936
4937 /* If the function can throw and catch non-call exceptions, we'll be trying
4938 to merge stores across different basic blocks so we need to first unsplit
4939 the EH edges in order to streamline the CFG of the function. */
4940 if (cfun->can_throw_non_call_exceptions && cfun->eh)
4941 unsplit_eh_edges ();
f663d9ad 4942
4b84d9b8
JJ
4943 calculate_dominance_info (CDI_DOMINATORS);
4944
f663d9ad
KT
4945 FOR_EACH_BB_FN (bb, fun)
4946 {
629387a6 4947 const basic_block_status bb_status = get_status_for_store_merging (bb);
f663d9ad 4948 gimple_stmt_iterator gsi;
f663d9ad 4949
629387a6
EB
4950 if (open_chains && (bb_status == BB_INVALID || !single_pred_p (bb)))
4951 {
4952 changed |= terminate_and_process_all_chains ();
4953 open_chains = false;
f663d9ad
KT
4954 }
4955
629387a6 4956 if (bb_status == BB_INVALID)
f663d9ad
KT
4957 continue;
4958
4959 if (dump_file && (dump_flags & TDF_DETAILS))
4960 fprintf (dump_file, "Processing basic block <%d>:\n", bb->index);
4961
4962 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4963 {
4964 gimple *stmt = gsi_stmt (gsi);
4965
50b6d676
AO
4966 if (is_gimple_debug (stmt))
4967 continue;
4968
5384a802 4969 if (gimple_has_volatile_ops (stmt) && !gimple_clobber_p (stmt))
f663d9ad
KT
4970 {
4971 /* Terminate all chains. */
4972 if (dump_file && (dump_flags & TDF_DETAILS))
4973 fprintf (dump_file, "Volatile access terminates "
4974 "all chains\n");
629387a6
EB
4975 changed |= terminate_and_process_all_chains ();
4976 open_chains = false;
f663d9ad
KT
4977 continue;
4978 }
4979
629387a6
EB
4980 if (store_valid_for_store_merging_p (stmt))
4981 changed |= process_store (stmt);
245f6de1 4982 else
629387a6
EB
4983 changed |= terminate_all_aliasing_chains (NULL, stmt);
4984 }
4985
4986 if (bb_status == BB_EXTENDED_VALID)
4987 open_chains = true;
4988 else
4989 {
4990 changed |= terminate_and_process_all_chains ();
4991 open_chains = false;
f663d9ad 4992 }
f663d9ad 4993 }
629387a6
EB
4994
4995 if (open_chains)
4996 changed |= terminate_and_process_all_chains ();
4997
4998 /* If the function can throw and catch non-call exceptions and something
4999 changed during the pass, then the CFG has (very likely) changed too. */
5000 if (cfun->can_throw_non_call_exceptions && cfun->eh && changed)
5001 {
5002 free_dominance_info (CDI_DOMINATORS);
5003 return TODO_cleanup_cfg;
5004 }
5005
f663d9ad
KT
5006 return 0;
5007}
5008
5009} // anon namespace
5010
5011/* Construct and return a store merging pass object. */
5012
5013gimple_opt_pass *
5014make_pass_store_merging (gcc::context *ctxt)
5015{
5016 return new pass_store_merging (ctxt);
5017}
c22d8787
KT
5018
5019#if CHECKING_P
5020
5021namespace selftest {
5022
5023/* Selftests for store merging helpers. */
5024
5025/* Assert that all elements of the byte arrays X and Y, both of length N
5026 are equal. */
5027
5028static void
5029verify_array_eq (unsigned char *x, unsigned char *y, unsigned int n)
5030{
5031 for (unsigned int i = 0; i < n; i++)
5032 {
5033 if (x[i] != y[i])
5034 {
5035 fprintf (stderr, "Arrays do not match. X:\n");
5036 dump_char_array (stderr, x, n);
5037 fprintf (stderr, "Y:\n");
5038 dump_char_array (stderr, y, n);
5039 }
5040 ASSERT_EQ (x[i], y[i]);
5041 }
5042}
5043
8aba425f 5044/* Test shift_bytes_in_array_left and that it carries bits across between
c22d8787
KT
5045 bytes correctly. */
5046
5047static void
8aba425f 5048verify_shift_bytes_in_array_left (void)
c22d8787
KT
5049{
5050 /* byte 1 | byte 0
5051 00011111 | 11100000. */
5052 unsigned char orig[2] = { 0xe0, 0x1f };
5053 unsigned char in[2];
5054 memcpy (in, orig, sizeof orig);
5055
5056 unsigned char expected[2] = { 0x80, 0x7f };
8aba425f 5057 shift_bytes_in_array_left (in, sizeof (in), 2);
c22d8787
KT
5058 verify_array_eq (in, expected, sizeof (in));
5059
5060 memcpy (in, orig, sizeof orig);
5061 memcpy (expected, orig, sizeof orig);
5062 /* Check that shifting by zero doesn't change anything. */
8aba425f 5063 shift_bytes_in_array_left (in, sizeof (in), 0);
c22d8787
KT
5064 verify_array_eq (in, expected, sizeof (in));
5065
5066}
5067
5068/* Test shift_bytes_in_array_right and that it carries bits across between
5069 bytes correctly. */
5070
5071static void
5072verify_shift_bytes_in_array_right (void)
5073{
5074 /* byte 1 | byte 0
5075 00011111 | 11100000. */
5076 unsigned char orig[2] = { 0x1f, 0xe0};
5077 unsigned char in[2];
5078 memcpy (in, orig, sizeof orig);
5079 unsigned char expected[2] = { 0x07, 0xf8};
5080 shift_bytes_in_array_right (in, sizeof (in), 2);
5081 verify_array_eq (in, expected, sizeof (in));
5082
5083 memcpy (in, orig, sizeof orig);
5084 memcpy (expected, orig, sizeof orig);
5085 /* Check that shifting by zero doesn't change anything. */
5086 shift_bytes_in_array_right (in, sizeof (in), 0);
5087 verify_array_eq (in, expected, sizeof (in));
5088}
5089
5090/* Test clear_bit_region that it clears exactly the bits asked and
5091 nothing more. */
5092
5093static void
5094verify_clear_bit_region (void)
5095{
5096 /* Start with all bits set and test clearing various patterns in them. */
5097 unsigned char orig[3] = { 0xff, 0xff, 0xff};
5098 unsigned char in[3];
5099 unsigned char expected[3];
5100 memcpy (in, orig, sizeof in);
5101
5102 /* Check zeroing out all the bits. */
5103 clear_bit_region (in, 0, 3 * BITS_PER_UNIT);
5104 expected[0] = expected[1] = expected[2] = 0;
5105 verify_array_eq (in, expected, sizeof in);
5106
5107 memcpy (in, orig, sizeof in);
5108 /* Leave the first and last bits intact. */
5109 clear_bit_region (in, 1, 3 * BITS_PER_UNIT - 2);
5110 expected[0] = 0x1;
5111 expected[1] = 0;
5112 expected[2] = 0x80;
5113 verify_array_eq (in, expected, sizeof in);
5114}
5115
5384a802 5116/* Test clear_bit_region_be that it clears exactly the bits asked and
c22d8787
KT
5117 nothing more. */
5118
5119static void
5120verify_clear_bit_region_be (void)
5121{
5122 /* Start with all bits set and test clearing various patterns in them. */
5123 unsigned char orig[3] = { 0xff, 0xff, 0xff};
5124 unsigned char in[3];
5125 unsigned char expected[3];
5126 memcpy (in, orig, sizeof in);
5127
5128 /* Check zeroing out all the bits. */
5129 clear_bit_region_be (in, BITS_PER_UNIT - 1, 3 * BITS_PER_UNIT);
5130 expected[0] = expected[1] = expected[2] = 0;
5131 verify_array_eq (in, expected, sizeof in);
5132
5133 memcpy (in, orig, sizeof in);
5134 /* Leave the first and last bits intact. */
5135 clear_bit_region_be (in, BITS_PER_UNIT - 2, 3 * BITS_PER_UNIT - 2);
5136 expected[0] = 0x80;
5137 expected[1] = 0;
5138 expected[2] = 0x1;
5139 verify_array_eq (in, expected, sizeof in);
5140}
5141
5142
5143/* Run all of the selftests within this file. */
5144
5145void
5146store_merging_c_tests (void)
5147{
8aba425f 5148 verify_shift_bytes_in_array_left ();
c22d8787
KT
5149 verify_shift_bytes_in_array_right ();
5150 verify_clear_bit_region ();
5151 verify_clear_bit_region_be ();
5152}
5153
5154} // namespace selftest
5155#endif /* CHECKING_P. */