]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/gimple-ssa-store-merging.c
[GCC][PATCH][ARM] Add multilib mapping for Armv8.1-M+MVE with -mfloat-abi=hard
[thirdparty/gcc.git] / gcc / gimple-ssa-store-merging.c
CommitLineData
dffec8eb 1/* GIMPLE store merging and byte swapping passes.
8d9254fc 2 Copyright (C) 2009-2020 Free Software Foundation, Inc.
f663d9ad
KT
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
c94c3532
EB
21/* The purpose of the store merging pass is to combine multiple memory stores
22 of constant values, values loaded from memory, bitwise operations on those,
23 or bit-field values, to consecutive locations, into fewer wider stores.
24
f663d9ad
KT
25 For example, if we have a sequence peforming four byte stores to
26 consecutive memory locations:
27 [p ] := imm1;
28 [p + 1B] := imm2;
29 [p + 2B] := imm3;
30 [p + 3B] := imm4;
31 we can transform this into a single 4-byte store if the target supports it:
c94c3532 32 [p] := imm1:imm2:imm3:imm4 concatenated according to endianness.
f663d9ad 33
245f6de1
JJ
34 Or:
35 [p ] := [q ];
36 [p + 1B] := [q + 1B];
37 [p + 2B] := [q + 2B];
38 [p + 3B] := [q + 3B];
39 if there is no overlap can be transformed into a single 4-byte
40 load followed by single 4-byte store.
41
42 Or:
43 [p ] := [q ] ^ imm1;
44 [p + 1B] := [q + 1B] ^ imm2;
45 [p + 2B] := [q + 2B] ^ imm3;
46 [p + 3B] := [q + 3B] ^ imm4;
47 if there is no overlap can be transformed into a single 4-byte
48 load, xored with imm1:imm2:imm3:imm4 and stored using a single 4-byte store.
49
c94c3532
EB
50 Or:
51 [p:1 ] := imm;
52 [p:31] := val & 0x7FFFFFFF;
53 we can transform this into a single 4-byte store if the target supports it:
54 [p] := imm:(val & 0x7FFFFFFF) concatenated according to endianness.
55
f663d9ad
KT
56 The algorithm is applied to each basic block in three phases:
57
c94c3532
EB
58 1) Scan through the basic block and record assignments to destinations
59 that can be expressed as a store to memory of a certain size at a certain
60 bit offset from base expressions we can handle. For bit-fields we also
61 record the surrounding bit region, i.e. bits that could be stored in
245f6de1
JJ
62 a read-modify-write operation when storing the bit-field. Record store
63 chains to different bases in a hash_map (m_stores) and make sure to
64 terminate such chains when appropriate (for example when when the stored
65 values get used subsequently).
f663d9ad
KT
66 These stores can be a result of structure element initializers, array stores
67 etc. A store_immediate_info object is recorded for every such store.
68 Record as many such assignments to a single base as possible until a
69 statement that interferes with the store sequence is encountered.
c94c3532
EB
70 Each store has up to 2 operands, which can be a either constant, a memory
71 load or an SSA name, from which the value to be stored can be computed.
245f6de1
JJ
72 At most one of the operands can be a constant. The operands are recorded
73 in store_operand_info struct.
f663d9ad 74
c94c3532 75 2) Analyze the chains of stores recorded in phase 1) (i.e. the vector of
f663d9ad 76 store_immediate_info objects) and coalesce contiguous stores into
c94c3532 77 merged_store_group objects. For bit-field stores, we don't need to
245f6de1
JJ
78 require the stores to be contiguous, just their surrounding bit regions
79 have to be contiguous. If the expression being stored is different
80 between adjacent stores, such as one store storing a constant and
81 following storing a value loaded from memory, or if the loaded memory
82 objects are not adjacent, a new merged_store_group is created as well.
f663d9ad
KT
83
84 For example, given the stores:
85 [p ] := 0;
86 [p + 1B] := 1;
87 [p + 3B] := 0;
88 [p + 4B] := 1;
89 [p + 5B] := 0;
90 [p + 6B] := 0;
91 This phase would produce two merged_store_group objects, one recording the
92 two bytes stored in the memory region [p : p + 1] and another
93 recording the four bytes stored in the memory region [p + 3 : p + 6].
94
95 3) The merged_store_group objects produced in phase 2) are processed
96 to generate the sequence of wider stores that set the contiguous memory
97 regions to the sequence of bytes that correspond to it. This may emit
98 multiple stores per store group to handle contiguous stores that are not
99 of a size that is a power of 2. For example it can try to emit a 40-bit
100 store as a 32-bit store followed by an 8-bit store.
c94c3532
EB
101 We try to emit as wide stores as we can while respecting STRICT_ALIGNMENT
102 or TARGET_SLOW_UNALIGNED_ACCESS settings.
f663d9ad
KT
103
104 Note on endianness and example:
105 Consider 2 contiguous 16-bit stores followed by 2 contiguous 8-bit stores:
106 [p ] := 0x1234;
107 [p + 2B] := 0x5678;
108 [p + 4B] := 0xab;
109 [p + 5B] := 0xcd;
110
111 The memory layout for little-endian (LE) and big-endian (BE) must be:
112 p |LE|BE|
113 ---------
114 0 |34|12|
115 1 |12|34|
116 2 |78|56|
117 3 |56|78|
118 4 |ab|ab|
119 5 |cd|cd|
120
121 To merge these into a single 48-bit merged value 'val' in phase 2)
122 on little-endian we insert stores to higher (consecutive) bitpositions
123 into the most significant bits of the merged value.
124 The final merged value would be: 0xcdab56781234
125
126 For big-endian we insert stores to higher bitpositions into the least
127 significant bits of the merged value.
128 The final merged value would be: 0x12345678abcd
129
130 Then, in phase 3), we want to emit this 48-bit value as a 32-bit store
131 followed by a 16-bit store. Again, we must consider endianness when
132 breaking down the 48-bit value 'val' computed above.
133 For little endian we emit:
134 [p] (32-bit) := 0x56781234; // val & 0x0000ffffffff;
135 [p + 4B] (16-bit) := 0xcdab; // (val & 0xffff00000000) >> 32;
136
137 Whereas for big-endian we emit:
138 [p] (32-bit) := 0x12345678; // (val & 0xffffffff0000) >> 16;
139 [p + 4B] (16-bit) := 0xabcd; // val & 0x00000000ffff; */
140
141#include "config.h"
142#include "system.h"
143#include "coretypes.h"
144#include "backend.h"
145#include "tree.h"
146#include "gimple.h"
147#include "builtins.h"
148#include "fold-const.h"
149#include "tree-pass.h"
150#include "ssa.h"
151#include "gimple-pretty-print.h"
152#include "alias.h"
153#include "fold-const.h"
f663d9ad
KT
154#include "print-tree.h"
155#include "tree-hash-traits.h"
156#include "gimple-iterator.h"
157#include "gimplify.h"
c94c3532 158#include "gimple-fold.h"
f663d9ad
KT
159#include "stor-layout.h"
160#include "timevar.h"
629387a6
EB
161#include "cfganal.h"
162#include "cfgcleanup.h"
f663d9ad 163#include "tree-cfg.h"
629387a6 164#include "except.h"
f663d9ad
KT
165#include "tree-eh.h"
166#include "target.h"
aa55dc0c 167#include "gimplify-me.h"
a62b3dc5
JJ
168#include "rtl.h"
169#include "expr.h" /* For get_bit_range. */
dffec8eb 170#include "optabs-tree.h"
a95b474a 171#include "dbgcnt.h"
c22d8787 172#include "selftest.h"
f663d9ad
KT
173
174/* The maximum size (in bits) of the stores this pass should generate. */
175#define MAX_STORE_BITSIZE (BITS_PER_WORD)
176#define MAX_STORE_BYTES (MAX_STORE_BITSIZE / BITS_PER_UNIT)
177
245f6de1
JJ
178/* Limit to bound the number of aliasing checks for loads with the same
179 vuse as the corresponding store. */
180#define MAX_STORE_ALIAS_CHECKS 64
181
f663d9ad
KT
182namespace {
183
bebadeca 184struct bswap_stat
dffec8eb
JJ
185{
186 /* Number of hand-written 16-bit nop / bswaps found. */
187 int found_16bit;
188
189 /* Number of hand-written 32-bit nop / bswaps found. */
190 int found_32bit;
191
192 /* Number of hand-written 64-bit nop / bswaps found. */
193 int found_64bit;
194} nop_stats, bswap_stats;
195
196/* A symbolic number structure is used to detect byte permutation and selection
197 patterns of a source. To achieve that, its field N contains an artificial
198 number consisting of BITS_PER_MARKER sized markers tracking where does each
199 byte come from in the source:
200
201 0 - target byte has the value 0
202 FF - target byte has an unknown value (eg. due to sign extension)
203 1..size - marker value is the byte index in the source (0 for lsb).
204
205 To detect permutations on memory sources (arrays and structures), a symbolic
206 number is also associated:
207 - a base address BASE_ADDR and an OFFSET giving the address of the source;
208 - a range which gives the difference between the highest and lowest accessed
209 memory location to make such a symbolic number;
210 - the address SRC of the source element of lowest address as a convenience
211 to easily get BASE_ADDR + offset + lowest bytepos;
212 - number of expressions N_OPS bitwise ored together to represent
213 approximate cost of the computation.
214
215 Note 1: the range is different from size as size reflects the size of the
216 type of the current expression. For instance, for an array char a[],
217 (short) a[0] | (short) a[3] would have a size of 2 but a range of 4 while
218 (short) a[0] | ((short) a[0] << 1) would still have a size of 2 but this
219 time a range of 1.
220
221 Note 2: for non-memory sources, range holds the same value as size.
222
223 Note 3: SRC points to the SSA_NAME in case of non-memory source. */
224
225struct symbolic_number {
226 uint64_t n;
227 tree type;
228 tree base_addr;
229 tree offset;
4a022c70 230 poly_int64_pod bytepos;
dffec8eb
JJ
231 tree src;
232 tree alias_set;
233 tree vuse;
234 unsigned HOST_WIDE_INT range;
235 int n_ops;
236};
237
238#define BITS_PER_MARKER 8
239#define MARKER_MASK ((1 << BITS_PER_MARKER) - 1)
240#define MARKER_BYTE_UNKNOWN MARKER_MASK
241#define HEAD_MARKER(n, size) \
242 ((n) & ((uint64_t) MARKER_MASK << (((size) - 1) * BITS_PER_MARKER)))
243
244/* The number which the find_bswap_or_nop_1 result should match in
245 order to have a nop. The number is masked according to the size of
246 the symbolic number before using it. */
247#define CMPNOP (sizeof (int64_t) < 8 ? 0 : \
248 (uint64_t)0x08070605 << 32 | 0x04030201)
249
250/* The number which the find_bswap_or_nop_1 result should match in
251 order to have a byte swap. The number is masked according to the
252 size of the symbolic number before using it. */
253#define CMPXCHG (sizeof (int64_t) < 8 ? 0 : \
254 (uint64_t)0x01020304 << 32 | 0x05060708)
255
256/* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
257 number N. Return false if the requested operation is not permitted
258 on a symbolic number. */
259
260inline bool
261do_shift_rotate (enum tree_code code,
262 struct symbolic_number *n,
263 int count)
264{
265 int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
266 unsigned head_marker;
267
444cda74
JJ
268 if (count < 0
269 || count >= TYPE_PRECISION (n->type)
270 || count % BITS_PER_UNIT != 0)
dffec8eb
JJ
271 return false;
272 count = (count / BITS_PER_UNIT) * BITS_PER_MARKER;
273
274 /* Zero out the extra bits of N in order to avoid them being shifted
275 into the significant bits. */
276 if (size < 64 / BITS_PER_MARKER)
277 n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
278
279 switch (code)
280 {
281 case LSHIFT_EXPR:
282 n->n <<= count;
283 break;
284 case RSHIFT_EXPR:
285 head_marker = HEAD_MARKER (n->n, size);
286 n->n >>= count;
287 /* Arithmetic shift of signed type: result is dependent on the value. */
288 if (!TYPE_UNSIGNED (n->type) && head_marker)
289 for (i = 0; i < count / BITS_PER_MARKER; i++)
290 n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
291 << ((size - 1 - i) * BITS_PER_MARKER);
292 break;
293 case LROTATE_EXPR:
294 n->n = (n->n << count) | (n->n >> ((size * BITS_PER_MARKER) - count));
295 break;
296 case RROTATE_EXPR:
297 n->n = (n->n >> count) | (n->n << ((size * BITS_PER_MARKER) - count));
298 break;
299 default:
300 return false;
301 }
302 /* Zero unused bits for size. */
303 if (size < 64 / BITS_PER_MARKER)
304 n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
305 return true;
306}
307
308/* Perform sanity checking for the symbolic number N and the gimple
309 statement STMT. */
310
311inline bool
312verify_symbolic_number_p (struct symbolic_number *n, gimple *stmt)
313{
314 tree lhs_type;
315
316 lhs_type = gimple_expr_type (stmt);
317
318 if (TREE_CODE (lhs_type) != INTEGER_TYPE)
319 return false;
320
321 if (TYPE_PRECISION (lhs_type) != TYPE_PRECISION (n->type))
322 return false;
323
324 return true;
325}
326
327/* Initialize the symbolic number N for the bswap pass from the base element
328 SRC manipulated by the bitwise OR expression. */
329
330bool
331init_symbolic_number (struct symbolic_number *n, tree src)
332{
333 int size;
334
335 if (! INTEGRAL_TYPE_P (TREE_TYPE (src)))
336 return false;
337
338 n->base_addr = n->offset = n->alias_set = n->vuse = NULL_TREE;
339 n->src = src;
340
341 /* Set up the symbolic number N by setting each byte to a value between 1 and
342 the byte size of rhs1. The highest order byte is set to n->size and the
343 lowest order byte to 1. */
344 n->type = TREE_TYPE (src);
345 size = TYPE_PRECISION (n->type);
346 if (size % BITS_PER_UNIT != 0)
347 return false;
348 size /= BITS_PER_UNIT;
349 if (size > 64 / BITS_PER_MARKER)
350 return false;
351 n->range = size;
352 n->n = CMPNOP;
353 n->n_ops = 1;
354
355 if (size < 64 / BITS_PER_MARKER)
356 n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
357
358 return true;
359}
360
361/* Check if STMT might be a byte swap or a nop from a memory source and returns
362 the answer. If so, REF is that memory source and the base of the memory area
363 accessed and the offset of the access from that base are recorded in N. */
364
365bool
366find_bswap_or_nop_load (gimple *stmt, tree ref, struct symbolic_number *n)
367{
368 /* Leaf node is an array or component ref. Memorize its base and
369 offset from base to compare to other such leaf node. */
f37fac2b 370 poly_int64 bitsize, bitpos, bytepos;
dffec8eb
JJ
371 machine_mode mode;
372 int unsignedp, reversep, volatilep;
373 tree offset, base_addr;
374
375 /* Not prepared to handle PDP endian. */
376 if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
377 return false;
378
379 if (!gimple_assign_load_p (stmt) || gimple_has_volatile_ops (stmt))
380 return false;
381
382 base_addr = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
383 &unsignedp, &reversep, &volatilep);
384
4b84d9b8
JJ
385 if (TREE_CODE (base_addr) == TARGET_MEM_REF)
386 /* Do not rewrite TARGET_MEM_REF. */
387 return false;
388 else if (TREE_CODE (base_addr) == MEM_REF)
dffec8eb 389 {
3fed2ce9 390 poly_offset_int bit_offset = 0;
dffec8eb
JJ
391 tree off = TREE_OPERAND (base_addr, 1);
392
393 if (!integer_zerop (off))
394 {
3fed2ce9
RS
395 poly_offset_int boff = mem_ref_offset (base_addr);
396 boff <<= LOG2_BITS_PER_UNIT;
dffec8eb
JJ
397 bit_offset += boff;
398 }
399
400 base_addr = TREE_OPERAND (base_addr, 0);
401
402 /* Avoid returning a negative bitpos as this may wreak havoc later. */
3fed2ce9 403 if (maybe_lt (bit_offset, 0))
dffec8eb 404 {
3fed2ce9
RS
405 tree byte_offset = wide_int_to_tree
406 (sizetype, bits_to_bytes_round_down (bit_offset));
407 bit_offset = num_trailing_bits (bit_offset);
dffec8eb 408 if (offset)
3fed2ce9 409 offset = size_binop (PLUS_EXPR, offset, byte_offset);
dffec8eb 410 else
3fed2ce9 411 offset = byte_offset;
dffec8eb
JJ
412 }
413
3fed2ce9 414 bitpos += bit_offset.force_shwi ();
dffec8eb 415 }
4b84d9b8
JJ
416 else
417 base_addr = build_fold_addr_expr (base_addr);
dffec8eb 418
f37fac2b 419 if (!multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
dffec8eb 420 return false;
f37fac2b 421 if (!multiple_p (bitsize, BITS_PER_UNIT))
dffec8eb
JJ
422 return false;
423 if (reversep)
424 return false;
425
426 if (!init_symbolic_number (n, ref))
427 return false;
428 n->base_addr = base_addr;
429 n->offset = offset;
f37fac2b 430 n->bytepos = bytepos;
dffec8eb
JJ
431 n->alias_set = reference_alias_ptr_type (ref);
432 n->vuse = gimple_vuse (stmt);
433 return true;
434}
435
436/* Compute the symbolic number N representing the result of a bitwise OR on 2
437 symbolic number N1 and N2 whose source statements are respectively
438 SOURCE_STMT1 and SOURCE_STMT2. */
439
440gimple *
441perform_symbolic_merge (gimple *source_stmt1, struct symbolic_number *n1,
442 gimple *source_stmt2, struct symbolic_number *n2,
443 struct symbolic_number *n)
444{
445 int i, size;
446 uint64_t mask;
447 gimple *source_stmt;
448 struct symbolic_number *n_start;
449
450 tree rhs1 = gimple_assign_rhs1 (source_stmt1);
451 if (TREE_CODE (rhs1) == BIT_FIELD_REF
452 && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
453 rhs1 = TREE_OPERAND (rhs1, 0);
454 tree rhs2 = gimple_assign_rhs1 (source_stmt2);
455 if (TREE_CODE (rhs2) == BIT_FIELD_REF
456 && TREE_CODE (TREE_OPERAND (rhs2, 0)) == SSA_NAME)
457 rhs2 = TREE_OPERAND (rhs2, 0);
458
459 /* Sources are different, cancel bswap if they are not memory location with
460 the same base (array, structure, ...). */
461 if (rhs1 != rhs2)
462 {
463 uint64_t inc;
4a022c70 464 HOST_WIDE_INT start1, start2, start_sub, end_sub, end1, end2, end;
dffec8eb
JJ
465 struct symbolic_number *toinc_n_ptr, *n_end;
466 basic_block bb1, bb2;
467
468 if (!n1->base_addr || !n2->base_addr
469 || !operand_equal_p (n1->base_addr, n2->base_addr, 0))
470 return NULL;
471
472 if (!n1->offset != !n2->offset
473 || (n1->offset && !operand_equal_p (n1->offset, n2->offset, 0)))
474 return NULL;
475
4a022c70
RS
476 start1 = 0;
477 if (!(n2->bytepos - n1->bytepos).is_constant (&start2))
478 return NULL;
479
480 if (start1 < start2)
dffec8eb
JJ
481 {
482 n_start = n1;
4a022c70 483 start_sub = start2 - start1;
dffec8eb
JJ
484 }
485 else
486 {
487 n_start = n2;
4a022c70 488 start_sub = start1 - start2;
dffec8eb
JJ
489 }
490
491 bb1 = gimple_bb (source_stmt1);
492 bb2 = gimple_bb (source_stmt2);
493 if (dominated_by_p (CDI_DOMINATORS, bb1, bb2))
494 source_stmt = source_stmt1;
495 else
496 source_stmt = source_stmt2;
497
498 /* Find the highest address at which a load is performed and
499 compute related info. */
4a022c70
RS
500 end1 = start1 + (n1->range - 1);
501 end2 = start2 + (n2->range - 1);
dffec8eb
JJ
502 if (end1 < end2)
503 {
504 end = end2;
505 end_sub = end2 - end1;
506 }
507 else
508 {
509 end = end1;
510 end_sub = end1 - end2;
511 }
512 n_end = (end2 > end1) ? n2 : n1;
513
514 /* Find symbolic number whose lsb is the most significant. */
515 if (BYTES_BIG_ENDIAN)
516 toinc_n_ptr = (n_end == n1) ? n2 : n1;
517 else
518 toinc_n_ptr = (n_start == n1) ? n2 : n1;
519
4a022c70 520 n->range = end - MIN (start1, start2) + 1;
dffec8eb
JJ
521
522 /* Check that the range of memory covered can be represented by
523 a symbolic number. */
524 if (n->range > 64 / BITS_PER_MARKER)
525 return NULL;
526
527 /* Reinterpret byte marks in symbolic number holding the value of
528 bigger weight according to target endianness. */
529 inc = BYTES_BIG_ENDIAN ? end_sub : start_sub;
530 size = TYPE_PRECISION (n1->type) / BITS_PER_UNIT;
531 for (i = 0; i < size; i++, inc <<= BITS_PER_MARKER)
532 {
533 unsigned marker
534 = (toinc_n_ptr->n >> (i * BITS_PER_MARKER)) & MARKER_MASK;
535 if (marker && marker != MARKER_BYTE_UNKNOWN)
536 toinc_n_ptr->n += inc;
537 }
538 }
539 else
540 {
541 n->range = n1->range;
542 n_start = n1;
543 source_stmt = source_stmt1;
544 }
545
546 if (!n1->alias_set
547 || alias_ptr_types_compatible_p (n1->alias_set, n2->alias_set))
548 n->alias_set = n1->alias_set;
549 else
550 n->alias_set = ptr_type_node;
551 n->vuse = n_start->vuse;
552 n->base_addr = n_start->base_addr;
553 n->offset = n_start->offset;
554 n->src = n_start->src;
555 n->bytepos = n_start->bytepos;
556 n->type = n_start->type;
557 size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
558
559 for (i = 0, mask = MARKER_MASK; i < size; i++, mask <<= BITS_PER_MARKER)
560 {
561 uint64_t masked1, masked2;
562
563 masked1 = n1->n & mask;
564 masked2 = n2->n & mask;
565 if (masked1 && masked2 && masked1 != masked2)
566 return NULL;
567 }
568 n->n = n1->n | n2->n;
569 n->n_ops = n1->n_ops + n2->n_ops;
570
571 return source_stmt;
572}
573
574/* find_bswap_or_nop_1 invokes itself recursively with N and tries to perform
575 the operation given by the rhs of STMT on the result. If the operation
576 could successfully be executed the function returns a gimple stmt whose
577 rhs's first tree is the expression of the source operand and NULL
578 otherwise. */
579
580gimple *
581find_bswap_or_nop_1 (gimple *stmt, struct symbolic_number *n, int limit)
582{
583 enum tree_code code;
584 tree rhs1, rhs2 = NULL;
585 gimple *rhs1_stmt, *rhs2_stmt, *source_stmt1;
586 enum gimple_rhs_class rhs_class;
587
588 if (!limit || !is_gimple_assign (stmt))
589 return NULL;
590
591 rhs1 = gimple_assign_rhs1 (stmt);
592
593 if (find_bswap_or_nop_load (stmt, rhs1, n))
594 return stmt;
595
596 /* Handle BIT_FIELD_REF. */
597 if (TREE_CODE (rhs1) == BIT_FIELD_REF
598 && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
599 {
600 unsigned HOST_WIDE_INT bitsize = tree_to_uhwi (TREE_OPERAND (rhs1, 1));
601 unsigned HOST_WIDE_INT bitpos = tree_to_uhwi (TREE_OPERAND (rhs1, 2));
602 if (bitpos % BITS_PER_UNIT == 0
603 && bitsize % BITS_PER_UNIT == 0
604 && init_symbolic_number (n, TREE_OPERAND (rhs1, 0)))
605 {
606 /* Handle big-endian bit numbering in BIT_FIELD_REF. */
607 if (BYTES_BIG_ENDIAN)
608 bitpos = TYPE_PRECISION (n->type) - bitpos - bitsize;
609
610 /* Shift. */
611 if (!do_shift_rotate (RSHIFT_EXPR, n, bitpos))
612 return NULL;
613
614 /* Mask. */
615 uint64_t mask = 0;
616 uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
617 for (unsigned i = 0; i < bitsize / BITS_PER_UNIT;
618 i++, tmp <<= BITS_PER_UNIT)
619 mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
620 n->n &= mask;
621
622 /* Convert. */
623 n->type = TREE_TYPE (rhs1);
624 if (!n->base_addr)
625 n->range = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
626
627 return verify_symbolic_number_p (n, stmt) ? stmt : NULL;
628 }
629
630 return NULL;
631 }
632
633 if (TREE_CODE (rhs1) != SSA_NAME)
634 return NULL;
635
636 code = gimple_assign_rhs_code (stmt);
637 rhs_class = gimple_assign_rhs_class (stmt);
638 rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
639
640 if (rhs_class == GIMPLE_BINARY_RHS)
641 rhs2 = gimple_assign_rhs2 (stmt);
642
643 /* Handle unary rhs and binary rhs with integer constants as second
644 operand. */
645
646 if (rhs_class == GIMPLE_UNARY_RHS
647 || (rhs_class == GIMPLE_BINARY_RHS
648 && TREE_CODE (rhs2) == INTEGER_CST))
649 {
650 if (code != BIT_AND_EXPR
651 && code != LSHIFT_EXPR
652 && code != RSHIFT_EXPR
653 && code != LROTATE_EXPR
654 && code != RROTATE_EXPR
655 && !CONVERT_EXPR_CODE_P (code))
656 return NULL;
657
658 source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, n, limit - 1);
659
660 /* If find_bswap_or_nop_1 returned NULL, STMT is a leaf node and
661 we have to initialize the symbolic number. */
662 if (!source_stmt1)
663 {
664 if (gimple_assign_load_p (stmt)
665 || !init_symbolic_number (n, rhs1))
666 return NULL;
667 source_stmt1 = stmt;
668 }
669
670 switch (code)
671 {
672 case BIT_AND_EXPR:
673 {
674 int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
675 uint64_t val = int_cst_value (rhs2), mask = 0;
676 uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
677
678 /* Only constants masking full bytes are allowed. */
679 for (i = 0; i < size; i++, tmp <<= BITS_PER_UNIT)
680 if ((val & tmp) != 0 && (val & tmp) != tmp)
681 return NULL;
682 else if (val & tmp)
683 mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
684
685 n->n &= mask;
686 }
687 break;
688 case LSHIFT_EXPR:
689 case RSHIFT_EXPR:
690 case LROTATE_EXPR:
691 case RROTATE_EXPR:
692 if (!do_shift_rotate (code, n, (int) TREE_INT_CST_LOW (rhs2)))
693 return NULL;
694 break;
695 CASE_CONVERT:
696 {
697 int i, type_size, old_type_size;
698 tree type;
699
700 type = gimple_expr_type (stmt);
701 type_size = TYPE_PRECISION (type);
702 if (type_size % BITS_PER_UNIT != 0)
703 return NULL;
704 type_size /= BITS_PER_UNIT;
705 if (type_size > 64 / BITS_PER_MARKER)
706 return NULL;
707
708 /* Sign extension: result is dependent on the value. */
709 old_type_size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
710 if (!TYPE_UNSIGNED (n->type) && type_size > old_type_size
711 && HEAD_MARKER (n->n, old_type_size))
712 for (i = 0; i < type_size - old_type_size; i++)
713 n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
714 << ((type_size - 1 - i) * BITS_PER_MARKER);
715
716 if (type_size < 64 / BITS_PER_MARKER)
717 {
718 /* If STMT casts to a smaller type mask out the bits not
719 belonging to the target type. */
720 n->n &= ((uint64_t) 1 << (type_size * BITS_PER_MARKER)) - 1;
721 }
722 n->type = type;
723 if (!n->base_addr)
724 n->range = type_size;
725 }
726 break;
727 default:
728 return NULL;
729 };
730 return verify_symbolic_number_p (n, stmt) ? source_stmt1 : NULL;
731 }
732
733 /* Handle binary rhs. */
734
735 if (rhs_class == GIMPLE_BINARY_RHS)
736 {
737 struct symbolic_number n1, n2;
738 gimple *source_stmt, *source_stmt2;
739
740 if (code != BIT_IOR_EXPR)
741 return NULL;
742
743 if (TREE_CODE (rhs2) != SSA_NAME)
744 return NULL;
745
746 rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
747
748 switch (code)
749 {
750 case BIT_IOR_EXPR:
751 source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, &n1, limit - 1);
752
753 if (!source_stmt1)
754 return NULL;
755
756 source_stmt2 = find_bswap_or_nop_1 (rhs2_stmt, &n2, limit - 1);
757
758 if (!source_stmt2)
759 return NULL;
760
761 if (TYPE_PRECISION (n1.type) != TYPE_PRECISION (n2.type))
762 return NULL;
763
4b84d9b8 764 if (n1.vuse != n2.vuse)
dffec8eb
JJ
765 return NULL;
766
767 source_stmt
768 = perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, n);
769
770 if (!source_stmt)
771 return NULL;
772
773 if (!verify_symbolic_number_p (n, stmt))
774 return NULL;
775
776 break;
777 default:
778 return NULL;
779 }
780 return source_stmt;
781 }
782 return NULL;
783}
784
4b84d9b8
JJ
785/* Helper for find_bswap_or_nop and try_coalesce_bswap to compute
786 *CMPXCHG, *CMPNOP and adjust *N. */
dffec8eb 787
4b84d9b8
JJ
788void
789find_bswap_or_nop_finalize (struct symbolic_number *n, uint64_t *cmpxchg,
790 uint64_t *cmpnop)
dffec8eb
JJ
791{
792 unsigned rsize;
793 uint64_t tmpn, mask;
dffec8eb 794
4b84d9b8
JJ
795 /* The number which the find_bswap_or_nop_1 result should match in order
796 to have a full byte swap. The number is shifted to the right
797 according to the size of the symbolic number before using it. */
798 *cmpxchg = CMPXCHG;
799 *cmpnop = CMPNOP;
dffec8eb
JJ
800
801 /* Find real size of result (highest non-zero byte). */
802 if (n->base_addr)
803 for (tmpn = n->n, rsize = 0; tmpn; tmpn >>= BITS_PER_MARKER, rsize++);
804 else
805 rsize = n->range;
806
807 /* Zero out the bits corresponding to untouched bytes in original gimple
808 expression. */
809 if (n->range < (int) sizeof (int64_t))
810 {
811 mask = ((uint64_t) 1 << (n->range * BITS_PER_MARKER)) - 1;
4b84d9b8
JJ
812 *cmpxchg >>= (64 / BITS_PER_MARKER - n->range) * BITS_PER_MARKER;
813 *cmpnop &= mask;
dffec8eb
JJ
814 }
815
816 /* Zero out the bits corresponding to unused bytes in the result of the
817 gimple expression. */
818 if (rsize < n->range)
819 {
820 if (BYTES_BIG_ENDIAN)
821 {
822 mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
4b84d9b8
JJ
823 *cmpxchg &= mask;
824 *cmpnop >>= (n->range - rsize) * BITS_PER_MARKER;
dffec8eb
JJ
825 }
826 else
827 {
828 mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
4b84d9b8
JJ
829 *cmpxchg >>= (n->range - rsize) * BITS_PER_MARKER;
830 *cmpnop &= mask;
dffec8eb
JJ
831 }
832 n->range = rsize;
833 }
834
4b84d9b8
JJ
835 n->range *= BITS_PER_UNIT;
836}
837
838/* Check if STMT completes a bswap implementation or a read in a given
839 endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP
840 accordingly. It also sets N to represent the kind of operations
841 performed: size of the resulting expression and whether it works on
842 a memory source, and if so alias-set and vuse. At last, the
843 function returns a stmt whose rhs's first tree is the source
844 expression. */
845
846gimple *
847find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap)
848{
849 /* The last parameter determines the depth search limit. It usually
850 correlates directly to the number n of bytes to be touched. We
0f507a36 851 increase that number by 2 * (log2(n) + 1) here in order to also
4b84d9b8
JJ
852 cover signed -> unsigned conversions of the src operand as can be seen
853 in libgcc, and for initial shift/and operation of the src operand. */
854 int limit = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt)));
0f507a36 855 limit += 2 * (1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit));
4b84d9b8
JJ
856 gimple *ins_stmt = find_bswap_or_nop_1 (stmt, n, limit);
857
858 if (!ins_stmt)
859 return NULL;
860
861 uint64_t cmpxchg, cmpnop;
862 find_bswap_or_nop_finalize (n, &cmpxchg, &cmpnop);
863
dffec8eb
JJ
864 /* A complete byte swap should make the symbolic number to start with
865 the largest digit in the highest order byte. Unchanged symbolic
866 number indicates a read with same endianness as target architecture. */
867 if (n->n == cmpnop)
868 *bswap = false;
869 else if (n->n == cmpxchg)
870 *bswap = true;
871 else
872 return NULL;
873
874 /* Useless bit manipulation performed by code. */
875 if (!n->base_addr && n->n == cmpnop && n->n_ops == 1)
876 return NULL;
877
dffec8eb
JJ
878 return ins_stmt;
879}
880
881const pass_data pass_data_optimize_bswap =
882{
883 GIMPLE_PASS, /* type */
884 "bswap", /* name */
885 OPTGROUP_NONE, /* optinfo_flags */
886 TV_NONE, /* tv_id */
887 PROP_ssa, /* properties_required */
888 0, /* properties_provided */
889 0, /* properties_destroyed */
890 0, /* todo_flags_start */
891 0, /* todo_flags_finish */
892};
893
894class pass_optimize_bswap : public gimple_opt_pass
895{
896public:
897 pass_optimize_bswap (gcc::context *ctxt)
898 : gimple_opt_pass (pass_data_optimize_bswap, ctxt)
899 {}
900
901 /* opt_pass methods: */
902 virtual bool gate (function *)
903 {
904 return flag_expensive_optimizations && optimize && BITS_PER_UNIT == 8;
905 }
906
907 virtual unsigned int execute (function *);
908
909}; // class pass_optimize_bswap
910
911/* Perform the bswap optimization: replace the expression computed in the rhs
4b84d9b8
JJ
912 of gsi_stmt (GSI) (or if NULL add instead of replace) by an equivalent
913 bswap, load or load + bswap expression.
dffec8eb
JJ
914 Which of these alternatives replace the rhs is given by N->base_addr (non
915 null if a load is needed) and BSWAP. The type, VUSE and set-alias of the
916 load to perform are also given in N while the builtin bswap invoke is given
4b84d9b8
JJ
917 in FNDEL. Finally, if a load is involved, INS_STMT refers to one of the
918 load statements involved to construct the rhs in gsi_stmt (GSI) and
919 N->range gives the size of the rhs expression for maintaining some
920 statistics.
dffec8eb 921
4b84d9b8
JJ
922 Note that if the replacement involve a load and if gsi_stmt (GSI) is
923 non-NULL, that stmt is moved just after INS_STMT to do the load with the
924 same VUSE which can lead to gsi_stmt (GSI) changing of basic block. */
dffec8eb 925
4b84d9b8
JJ
926tree
927bswap_replace (gimple_stmt_iterator gsi, gimple *ins_stmt, tree fndecl,
dffec8eb
JJ
928 tree bswap_type, tree load_type, struct symbolic_number *n,
929 bool bswap)
930{
4b84d9b8 931 tree src, tmp, tgt = NULL_TREE;
dffec8eb
JJ
932 gimple *bswap_stmt;
933
4b84d9b8 934 gimple *cur_stmt = gsi_stmt (gsi);
dffec8eb 935 src = n->src;
4b84d9b8
JJ
936 if (cur_stmt)
937 tgt = gimple_assign_lhs (cur_stmt);
dffec8eb
JJ
938
939 /* Need to load the value from memory first. */
940 if (n->base_addr)
941 {
4b84d9b8
JJ
942 gimple_stmt_iterator gsi_ins = gsi;
943 if (ins_stmt)
944 gsi_ins = gsi_for_stmt (ins_stmt);
dffec8eb
JJ
945 tree addr_expr, addr_tmp, val_expr, val_tmp;
946 tree load_offset_ptr, aligned_load_type;
4b84d9b8
JJ
947 gimple *load_stmt;
948 unsigned align = get_object_alignment (src);
4a022c70 949 poly_int64 load_offset = 0;
dffec8eb 950
4b84d9b8
JJ
951 if (cur_stmt)
952 {
953 basic_block ins_bb = gimple_bb (ins_stmt);
954 basic_block cur_bb = gimple_bb (cur_stmt);
955 if (!dominated_by_p (CDI_DOMINATORS, cur_bb, ins_bb))
956 return NULL_TREE;
957
958 /* Move cur_stmt just before one of the load of the original
959 to ensure it has the same VUSE. See PR61517 for what could
960 go wrong. */
961 if (gimple_bb (cur_stmt) != gimple_bb (ins_stmt))
962 reset_flow_sensitive_info (gimple_assign_lhs (cur_stmt));
963 gsi_move_before (&gsi, &gsi_ins);
964 gsi = gsi_for_stmt (cur_stmt);
965 }
966 else
967 gsi = gsi_ins;
dffec8eb
JJ
968
969 /* Compute address to load from and cast according to the size
970 of the load. */
4b84d9b8 971 addr_expr = build_fold_addr_expr (src);
dffec8eb 972 if (is_gimple_mem_ref_addr (addr_expr))
4b84d9b8 973 addr_tmp = unshare_expr (addr_expr);
dffec8eb
JJ
974 else
975 {
4b84d9b8
JJ
976 addr_tmp = unshare_expr (n->base_addr);
977 if (!is_gimple_mem_ref_addr (addr_tmp))
978 addr_tmp = force_gimple_operand_gsi_1 (&gsi, addr_tmp,
979 is_gimple_mem_ref_addr,
980 NULL_TREE, true,
981 GSI_SAME_STMT);
982 load_offset = n->bytepos;
983 if (n->offset)
984 {
985 tree off
986 = force_gimple_operand_gsi (&gsi, unshare_expr (n->offset),
987 true, NULL_TREE, true,
988 GSI_SAME_STMT);
989 gimple *stmt
990 = gimple_build_assign (make_ssa_name (TREE_TYPE (addr_tmp)),
991 POINTER_PLUS_EXPR, addr_tmp, off);
992 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
993 addr_tmp = gimple_assign_lhs (stmt);
994 }
dffec8eb
JJ
995 }
996
997 /* Perform the load. */
998 aligned_load_type = load_type;
999 if (align < TYPE_ALIGN (load_type))
1000 aligned_load_type = build_aligned_type (load_type, align);
1001 load_offset_ptr = build_int_cst (n->alias_set, load_offset);
1002 val_expr = fold_build2 (MEM_REF, aligned_load_type, addr_tmp,
1003 load_offset_ptr);
1004
1005 if (!bswap)
1006 {
1007 if (n->range == 16)
1008 nop_stats.found_16bit++;
1009 else if (n->range == 32)
1010 nop_stats.found_32bit++;
1011 else
1012 {
1013 gcc_assert (n->range == 64);
1014 nop_stats.found_64bit++;
1015 }
1016
1017 /* Convert the result of load if necessary. */
4b84d9b8 1018 if (tgt && !useless_type_conversion_p (TREE_TYPE (tgt), load_type))
dffec8eb
JJ
1019 {
1020 val_tmp = make_temp_ssa_name (aligned_load_type, NULL,
1021 "load_dst");
1022 load_stmt = gimple_build_assign (val_tmp, val_expr);
1023 gimple_set_vuse (load_stmt, n->vuse);
1024 gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
1025 gimple_assign_set_rhs_with_ops (&gsi, NOP_EXPR, val_tmp);
4b84d9b8 1026 update_stmt (cur_stmt);
dffec8eb 1027 }
4b84d9b8 1028 else if (cur_stmt)
dffec8eb
JJ
1029 {
1030 gimple_assign_set_rhs_with_ops (&gsi, MEM_REF, val_expr);
1031 gimple_set_vuse (cur_stmt, n->vuse);
4b84d9b8
JJ
1032 update_stmt (cur_stmt);
1033 }
1034 else
1035 {
1036 tgt = make_ssa_name (load_type);
1037 cur_stmt = gimple_build_assign (tgt, MEM_REF, val_expr);
1038 gimple_set_vuse (cur_stmt, n->vuse);
1039 gsi_insert_before (&gsi, cur_stmt, GSI_SAME_STMT);
dffec8eb 1040 }
dffec8eb
JJ
1041
1042 if (dump_file)
1043 {
1044 fprintf (dump_file,
1045 "%d bit load in target endianness found at: ",
1046 (int) n->range);
1047 print_gimple_stmt (dump_file, cur_stmt, 0);
1048 }
4b84d9b8 1049 return tgt;
dffec8eb
JJ
1050 }
1051 else
1052 {
1053 val_tmp = make_temp_ssa_name (aligned_load_type, NULL, "load_dst");
1054 load_stmt = gimple_build_assign (val_tmp, val_expr);
1055 gimple_set_vuse (load_stmt, n->vuse);
1056 gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
1057 }
1058 src = val_tmp;
1059 }
1060 else if (!bswap)
1061 {
4b84d9b8
JJ
1062 gimple *g = NULL;
1063 if (tgt && !useless_type_conversion_p (TREE_TYPE (tgt), TREE_TYPE (src)))
dffec8eb
JJ
1064 {
1065 if (!is_gimple_val (src))
4b84d9b8 1066 return NULL_TREE;
dffec8eb
JJ
1067 g = gimple_build_assign (tgt, NOP_EXPR, src);
1068 }
4b84d9b8 1069 else if (cur_stmt)
dffec8eb 1070 g = gimple_build_assign (tgt, src);
4b84d9b8
JJ
1071 else
1072 tgt = src;
dffec8eb
JJ
1073 if (n->range == 16)
1074 nop_stats.found_16bit++;
1075 else if (n->range == 32)
1076 nop_stats.found_32bit++;
1077 else
1078 {
1079 gcc_assert (n->range == 64);
1080 nop_stats.found_64bit++;
1081 }
1082 if (dump_file)
1083 {
1084 fprintf (dump_file,
1085 "%d bit reshuffle in target endianness found at: ",
1086 (int) n->range);
4b84d9b8
JJ
1087 if (cur_stmt)
1088 print_gimple_stmt (dump_file, cur_stmt, 0);
1089 else
1090 {
4af78ef8 1091 print_generic_expr (dump_file, tgt, TDF_NONE);
4b84d9b8
JJ
1092 fprintf (dump_file, "\n");
1093 }
dffec8eb 1094 }
4b84d9b8
JJ
1095 if (cur_stmt)
1096 gsi_replace (&gsi, g, true);
1097 return tgt;
dffec8eb
JJ
1098 }
1099 else if (TREE_CODE (src) == BIT_FIELD_REF)
1100 src = TREE_OPERAND (src, 0);
1101
1102 if (n->range == 16)
1103 bswap_stats.found_16bit++;
1104 else if (n->range == 32)
1105 bswap_stats.found_32bit++;
1106 else
1107 {
1108 gcc_assert (n->range == 64);
1109 bswap_stats.found_64bit++;
1110 }
1111
1112 tmp = src;
1113
1114 /* Convert the src expression if necessary. */
1115 if (!useless_type_conversion_p (TREE_TYPE (tmp), bswap_type))
1116 {
1117 gimple *convert_stmt;
1118
1119 tmp = make_temp_ssa_name (bswap_type, NULL, "bswapsrc");
1120 convert_stmt = gimple_build_assign (tmp, NOP_EXPR, src);
1121 gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT);
1122 }
1123
1124 /* Canonical form for 16 bit bswap is a rotate expression. Only 16bit values
1125 are considered as rotation of 2N bit values by N bits is generally not
1126 equivalent to a bswap. Consider for instance 0x01020304 r>> 16 which
1127 gives 0x03040102 while a bswap for that value is 0x04030201. */
1128 if (bswap && n->range == 16)
1129 {
1130 tree count = build_int_cst (NULL, BITS_PER_UNIT);
1131 src = fold_build2 (LROTATE_EXPR, bswap_type, tmp, count);
1132 bswap_stmt = gimple_build_assign (NULL, src);
1133 }
1134 else
1135 bswap_stmt = gimple_build_call (fndecl, 1, tmp);
1136
4b84d9b8
JJ
1137 if (tgt == NULL_TREE)
1138 tgt = make_ssa_name (bswap_type);
dffec8eb
JJ
1139 tmp = tgt;
1140
1141 /* Convert the result if necessary. */
1142 if (!useless_type_conversion_p (TREE_TYPE (tgt), bswap_type))
1143 {
1144 gimple *convert_stmt;
1145
1146 tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
1147 convert_stmt = gimple_build_assign (tgt, NOP_EXPR, tmp);
1148 gsi_insert_after (&gsi, convert_stmt, GSI_SAME_STMT);
1149 }
1150
1151 gimple_set_lhs (bswap_stmt, tmp);
1152
1153 if (dump_file)
1154 {
1155 fprintf (dump_file, "%d bit bswap implementation found at: ",
1156 (int) n->range);
4b84d9b8
JJ
1157 if (cur_stmt)
1158 print_gimple_stmt (dump_file, cur_stmt, 0);
1159 else
1160 {
4af78ef8 1161 print_generic_expr (dump_file, tgt, TDF_NONE);
4b84d9b8
JJ
1162 fprintf (dump_file, "\n");
1163 }
dffec8eb
JJ
1164 }
1165
4b84d9b8
JJ
1166 if (cur_stmt)
1167 {
1168 gsi_insert_after (&gsi, bswap_stmt, GSI_SAME_STMT);
1169 gsi_remove (&gsi, true);
1170 }
1171 else
1172 gsi_insert_before (&gsi, bswap_stmt, GSI_SAME_STMT);
1173 return tgt;
dffec8eb
JJ
1174}
1175
1176/* Find manual byte swap implementations as well as load in a given
1177 endianness. Byte swaps are turned into a bswap builtin invokation
1178 while endian loads are converted to bswap builtin invokation or
1179 simple load according to the target endianness. */
1180
1181unsigned int
1182pass_optimize_bswap::execute (function *fun)
1183{
1184 basic_block bb;
1185 bool bswap32_p, bswap64_p;
1186 bool changed = false;
1187 tree bswap32_type = NULL_TREE, bswap64_type = NULL_TREE;
1188
1189 bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
1190 && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing);
1191 bswap64_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
1192 && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
1193 || (bswap32_p && word_mode == SImode)));
1194
1195 /* Determine the argument type of the builtins. The code later on
1196 assumes that the return and argument type are the same. */
1197 if (bswap32_p)
1198 {
1199 tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1200 bswap32_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1201 }
1202
1203 if (bswap64_p)
1204 {
1205 tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1206 bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1207 }
1208
1209 memset (&nop_stats, 0, sizeof (nop_stats));
1210 memset (&bswap_stats, 0, sizeof (bswap_stats));
1211 calculate_dominance_info (CDI_DOMINATORS);
1212
1213 FOR_EACH_BB_FN (bb, fun)
1214 {
1215 gimple_stmt_iterator gsi;
1216
1217 /* We do a reverse scan for bswap patterns to make sure we get the
1218 widest match. As bswap pattern matching doesn't handle previously
1219 inserted smaller bswap replacements as sub-patterns, the wider
1220 variant wouldn't be detected. */
1221 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi);)
1222 {
1223 gimple *ins_stmt, *cur_stmt = gsi_stmt (gsi);
1224 tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
1225 enum tree_code code;
1226 struct symbolic_number n;
1227 bool bswap;
1228
1229 /* This gsi_prev (&gsi) is not part of the for loop because cur_stmt
1230 might be moved to a different basic block by bswap_replace and gsi
1231 must not points to it if that's the case. Moving the gsi_prev
1232 there make sure that gsi points to the statement previous to
1233 cur_stmt while still making sure that all statements are
1234 considered in this basic block. */
1235 gsi_prev (&gsi);
1236
1237 if (!is_gimple_assign (cur_stmt))
1238 continue;
1239
1240 code = gimple_assign_rhs_code (cur_stmt);
1241 switch (code)
1242 {
1243 case LROTATE_EXPR:
1244 case RROTATE_EXPR:
1245 if (!tree_fits_uhwi_p (gimple_assign_rhs2 (cur_stmt))
1246 || tree_to_uhwi (gimple_assign_rhs2 (cur_stmt))
1247 % BITS_PER_UNIT)
1248 continue;
1249 /* Fall through. */
1250 case BIT_IOR_EXPR:
1251 break;
1252 default:
1253 continue;
1254 }
1255
1256 ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap);
1257
1258 if (!ins_stmt)
1259 continue;
1260
1261 switch (n.range)
1262 {
1263 case 16:
1264 /* Already in canonical form, nothing to do. */
1265 if (code == LROTATE_EXPR || code == RROTATE_EXPR)
1266 continue;
1267 load_type = bswap_type = uint16_type_node;
1268 break;
1269 case 32:
1270 load_type = uint32_type_node;
1271 if (bswap32_p)
1272 {
1273 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1274 bswap_type = bswap32_type;
1275 }
1276 break;
1277 case 64:
1278 load_type = uint64_type_node;
1279 if (bswap64_p)
1280 {
1281 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1282 bswap_type = bswap64_type;
1283 }
1284 break;
1285 default:
1286 continue;
1287 }
1288
1289 if (bswap && !fndecl && n.range != 16)
1290 continue;
1291
4b84d9b8
JJ
1292 if (bswap_replace (gsi_for_stmt (cur_stmt), ins_stmt, fndecl,
1293 bswap_type, load_type, &n, bswap))
dffec8eb
JJ
1294 changed = true;
1295 }
1296 }
1297
1298 statistics_counter_event (fun, "16-bit nop implementations found",
1299 nop_stats.found_16bit);
1300 statistics_counter_event (fun, "32-bit nop implementations found",
1301 nop_stats.found_32bit);
1302 statistics_counter_event (fun, "64-bit nop implementations found",
1303 nop_stats.found_64bit);
1304 statistics_counter_event (fun, "16-bit bswap implementations found",
1305 bswap_stats.found_16bit);
1306 statistics_counter_event (fun, "32-bit bswap implementations found",
1307 bswap_stats.found_32bit);
1308 statistics_counter_event (fun, "64-bit bswap implementations found",
1309 bswap_stats.found_64bit);
1310
1311 return (changed ? TODO_update_ssa : 0);
1312}
1313
1314} // anon namespace
1315
1316gimple_opt_pass *
1317make_pass_optimize_bswap (gcc::context *ctxt)
1318{
1319 return new pass_optimize_bswap (ctxt);
1320}
1321
1322namespace {
1323
245f6de1 1324/* Struct recording one operand for the store, which is either a constant,
c94c3532
EB
1325 then VAL represents the constant and all the other fields are zero, or
1326 a memory load, then VAL represents the reference, BASE_ADDR is non-NULL
1327 and the other fields also reflect the memory load, or an SSA name, then
1328 VAL represents the SSA name and all the other fields are zero, */
245f6de1 1329
6c1dae73 1330class store_operand_info
245f6de1 1331{
6c1dae73 1332public:
245f6de1
JJ
1333 tree val;
1334 tree base_addr;
8a91d545
RS
1335 poly_uint64 bitsize;
1336 poly_uint64 bitpos;
1337 poly_uint64 bitregion_start;
1338 poly_uint64 bitregion_end;
245f6de1 1339 gimple *stmt;
383ac8dc 1340 bool bit_not_p;
245f6de1
JJ
1341 store_operand_info ();
1342};
1343
1344store_operand_info::store_operand_info ()
1345 : val (NULL_TREE), base_addr (NULL_TREE), bitsize (0), bitpos (0),
383ac8dc 1346 bitregion_start (0), bitregion_end (0), stmt (NULL), bit_not_p (false)
245f6de1
JJ
1347{
1348}
1349
f663d9ad
KT
1350/* Struct recording the information about a single store of an immediate
1351 to memory. These are created in the first phase and coalesced into
1352 merged_store_group objects in the second phase. */
1353
6c1dae73 1354class store_immediate_info
f663d9ad 1355{
6c1dae73 1356public:
f663d9ad
KT
1357 unsigned HOST_WIDE_INT bitsize;
1358 unsigned HOST_WIDE_INT bitpos;
a62b3dc5
JJ
1359 unsigned HOST_WIDE_INT bitregion_start;
1360 /* This is one past the last bit of the bit region. */
1361 unsigned HOST_WIDE_INT bitregion_end;
f663d9ad
KT
1362 gimple *stmt;
1363 unsigned int order;
c94c3532
EB
1364 /* INTEGER_CST for constant stores, MEM_REF for memory copy,
1365 BIT_*_EXPR for logical bitwise operation, BIT_INSERT_EXPR
1366 for bit insertion.
4b84d9b8
JJ
1367 LROTATE_EXPR if it can be only bswap optimized and
1368 ops are not really meaningful.
1369 NOP_EXPR if bswap optimization detected identity, ops
1370 are not meaningful. */
245f6de1 1371 enum tree_code rhs_code;
4b84d9b8
JJ
1372 /* Two fields for bswap optimization purposes. */
1373 struct symbolic_number n;
1374 gimple *ins_stmt;
127ef369 1375 /* True if BIT_{AND,IOR,XOR}_EXPR result is inverted before storing. */
d60edaba 1376 bool bit_not_p;
127ef369
JJ
1377 /* True if ops have been swapped and thus ops[1] represents
1378 rhs1 of BIT_{AND,IOR,XOR}_EXPR and ops[0] represents rhs2. */
1379 bool ops_swapped_p;
629387a6
EB
1380 /* The index number of the landing pad, or 0 if there is none. */
1381 int lp_nr;
245f6de1
JJ
1382 /* Operands. For BIT_*_EXPR rhs_code both operands are used, otherwise
1383 just the first one. */
1384 store_operand_info ops[2];
b5926e23 1385 store_immediate_info (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
a62b3dc5 1386 unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
4b84d9b8 1387 gimple *, unsigned int, enum tree_code,
629387a6 1388 struct symbolic_number &, gimple *, bool, int,
245f6de1
JJ
1389 const store_operand_info &,
1390 const store_operand_info &);
f663d9ad
KT
1391};
1392
1393store_immediate_info::store_immediate_info (unsigned HOST_WIDE_INT bs,
b5926e23 1394 unsigned HOST_WIDE_INT bp,
a62b3dc5
JJ
1395 unsigned HOST_WIDE_INT brs,
1396 unsigned HOST_WIDE_INT bre,
b5926e23 1397 gimple *st,
245f6de1
JJ
1398 unsigned int ord,
1399 enum tree_code rhscode,
4b84d9b8
JJ
1400 struct symbolic_number &nr,
1401 gimple *ins_stmtp,
d60edaba 1402 bool bitnotp,
629387a6 1403 int nr2,
245f6de1
JJ
1404 const store_operand_info &op0r,
1405 const store_operand_info &op1r)
a62b3dc5 1406 : bitsize (bs), bitpos (bp), bitregion_start (brs), bitregion_end (bre),
4b84d9b8 1407 stmt (st), order (ord), rhs_code (rhscode), n (nr),
629387a6
EB
1408 ins_stmt (ins_stmtp), bit_not_p (bitnotp), ops_swapped_p (false),
1409 lp_nr (nr2)
245f6de1
JJ
1410#if __cplusplus >= 201103L
1411 , ops { op0r, op1r }
1412{
1413}
1414#else
f663d9ad 1415{
245f6de1
JJ
1416 ops[0] = op0r;
1417 ops[1] = op1r;
f663d9ad 1418}
245f6de1 1419#endif
f663d9ad
KT
1420
1421/* Struct representing a group of stores to contiguous memory locations.
1422 These are produced by the second phase (coalescing) and consumed in the
1423 third phase that outputs the widened stores. */
1424
6c1dae73 1425class merged_store_group
f663d9ad 1426{
6c1dae73 1427public:
f663d9ad
KT
1428 unsigned HOST_WIDE_INT start;
1429 unsigned HOST_WIDE_INT width;
a62b3dc5
JJ
1430 unsigned HOST_WIDE_INT bitregion_start;
1431 unsigned HOST_WIDE_INT bitregion_end;
1432 /* The size of the allocated memory for val and mask. */
f663d9ad 1433 unsigned HOST_WIDE_INT buf_size;
a62b3dc5 1434 unsigned HOST_WIDE_INT align_base;
8a91d545 1435 poly_uint64 load_align_base[2];
f663d9ad
KT
1436
1437 unsigned int align;
245f6de1 1438 unsigned int load_align[2];
f663d9ad
KT
1439 unsigned int first_order;
1440 unsigned int last_order;
7f5a3982 1441 bool bit_insertion;
18e0c3d1
JJ
1442 bool only_constants;
1443 unsigned int first_nonmergeable_order;
629387a6 1444 int lp_nr;
f663d9ad 1445
a62b3dc5 1446 auto_vec<store_immediate_info *> stores;
f663d9ad
KT
1447 /* We record the first and last original statements in the sequence because
1448 we'll need their vuse/vdef and replacement position. It's easier to keep
1449 track of them separately as 'stores' is reordered by apply_stores. */
1450 gimple *last_stmt;
1451 gimple *first_stmt;
1452 unsigned char *val;
a62b3dc5 1453 unsigned char *mask;
f663d9ad
KT
1454
1455 merged_store_group (store_immediate_info *);
1456 ~merged_store_group ();
7f5a3982 1457 bool can_be_merged_into (store_immediate_info *);
f663d9ad
KT
1458 void merge_into (store_immediate_info *);
1459 void merge_overlapping (store_immediate_info *);
1460 bool apply_stores ();
a62b3dc5
JJ
1461private:
1462 void do_merge (store_immediate_info *);
f663d9ad
KT
1463};
1464
1465/* Debug helper. Dump LEN elements of byte array PTR to FD in hex. */
1466
1467static void
1468dump_char_array (FILE *fd, unsigned char *ptr, unsigned int len)
1469{
1470 if (!fd)
1471 return;
1472
1473 for (unsigned int i = 0; i < len; i++)
c94c3532 1474 fprintf (fd, "%02x ", ptr[i]);
f663d9ad
KT
1475 fprintf (fd, "\n");
1476}
1477
f663d9ad
KT
1478/* Clear out LEN bits starting from bit START in the byte array
1479 PTR. This clears the bits to the *right* from START.
1480 START must be within [0, BITS_PER_UNIT) and counts starting from
1481 the least significant bit. */
1482
1483static void
1484clear_bit_region_be (unsigned char *ptr, unsigned int start,
1485 unsigned int len)
1486{
1487 if (len == 0)
1488 return;
1489 /* Clear len bits to the right of start. */
1490 else if (len <= start + 1)
1491 {
1492 unsigned char mask = (~(~0U << len));
1493 mask = mask << (start + 1U - len);
1494 ptr[0] &= ~mask;
1495 }
1496 else if (start != BITS_PER_UNIT - 1)
1497 {
1498 clear_bit_region_be (ptr, start, (start % BITS_PER_UNIT) + 1);
1499 clear_bit_region_be (ptr + 1, BITS_PER_UNIT - 1,
1500 len - (start % BITS_PER_UNIT) - 1);
1501 }
1502 else if (start == BITS_PER_UNIT - 1
1503 && len > BITS_PER_UNIT)
1504 {
1505 unsigned int nbytes = len / BITS_PER_UNIT;
a62b3dc5 1506 memset (ptr, 0, nbytes);
f663d9ad
KT
1507 if (len % BITS_PER_UNIT != 0)
1508 clear_bit_region_be (ptr + nbytes, BITS_PER_UNIT - 1,
1509 len % BITS_PER_UNIT);
1510 }
1511 else
1512 gcc_unreachable ();
1513}
1514
1515/* In the byte array PTR clear the bit region starting at bit
1516 START and is LEN bits wide.
1517 For regions spanning multiple bytes do this recursively until we reach
1518 zero LEN or a region contained within a single byte. */
1519
1520static void
1521clear_bit_region (unsigned char *ptr, unsigned int start,
1522 unsigned int len)
1523{
1524 /* Degenerate base case. */
1525 if (len == 0)
1526 return;
1527 else if (start >= BITS_PER_UNIT)
1528 clear_bit_region (ptr + 1, start - BITS_PER_UNIT, len);
1529 /* Second base case. */
1530 else if ((start + len) <= BITS_PER_UNIT)
1531 {
46a61395 1532 unsigned char mask = (~0U) << (unsigned char) (BITS_PER_UNIT - len);
f663d9ad
KT
1533 mask >>= BITS_PER_UNIT - (start + len);
1534
1535 ptr[0] &= ~mask;
1536
1537 return;
1538 }
1539 /* Clear most significant bits in a byte and proceed with the next byte. */
1540 else if (start != 0)
1541 {
1542 clear_bit_region (ptr, start, BITS_PER_UNIT - start);
1f069ef5 1543 clear_bit_region (ptr + 1, 0, len - (BITS_PER_UNIT - start));
f663d9ad
KT
1544 }
1545 /* Whole bytes need to be cleared. */
1546 else if (start == 0 && len > BITS_PER_UNIT)
1547 {
1548 unsigned int nbytes = len / BITS_PER_UNIT;
a848c710
KT
1549 /* We could recurse on each byte but we clear whole bytes, so a simple
1550 memset will do. */
46a61395 1551 memset (ptr, '\0', nbytes);
f663d9ad
KT
1552 /* Clear the remaining sub-byte region if there is one. */
1553 if (len % BITS_PER_UNIT != 0)
1554 clear_bit_region (ptr + nbytes, 0, len % BITS_PER_UNIT);
1555 }
1556 else
1557 gcc_unreachable ();
1558}
1559
1560/* Write BITLEN bits of EXPR to the byte array PTR at
1561 bit position BITPOS. PTR should contain TOTAL_BYTES elements.
1562 Return true if the operation succeeded. */
1563
1564static bool
1565encode_tree_to_bitpos (tree expr, unsigned char *ptr, int bitlen, int bitpos,
46a61395 1566 unsigned int total_bytes)
f663d9ad
KT
1567{
1568 unsigned int first_byte = bitpos / BITS_PER_UNIT;
ad1de652
JJ
1569 bool sub_byte_op_p = ((bitlen % BITS_PER_UNIT)
1570 || (bitpos % BITS_PER_UNIT)
f4b31647 1571 || !int_mode_for_size (bitlen, 0).exists ());
3afd514b
JJ
1572 bool empty_ctor_p
1573 = (TREE_CODE (expr) == CONSTRUCTOR
1574 && CONSTRUCTOR_NELTS (expr) == 0
1575 && TYPE_SIZE_UNIT (TREE_TYPE (expr))
1576 && tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (expr))));
f663d9ad
KT
1577
1578 if (!sub_byte_op_p)
3afd514b
JJ
1579 {
1580 if (first_byte >= total_bytes)
1581 return false;
1582 total_bytes -= first_byte;
1583 if (empty_ctor_p)
1584 {
1585 unsigned HOST_WIDE_INT rhs_bytes
1586 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)));
1587 if (rhs_bytes > total_bytes)
1588 return false;
1589 memset (ptr + first_byte, '\0', rhs_bytes);
1590 return true;
1591 }
1592 return native_encode_expr (expr, ptr + first_byte, total_bytes) != 0;
1593 }
f663d9ad
KT
1594
1595 /* LITTLE-ENDIAN
1596 We are writing a non byte-sized quantity or at a position that is not
1597 at a byte boundary.
1598 |--------|--------|--------| ptr + first_byte
1599 ^ ^
1600 xxx xxxxxxxx xxx< bp>
1601 |______EXPR____|
1602
46a61395 1603 First native_encode_expr EXPR into a temporary buffer and shift each
f663d9ad
KT
1604 byte in the buffer by 'bp' (carrying the bits over as necessary).
1605 |00000000|00xxxxxx|xxxxxxxx| << bp = |000xxxxx|xxxxxxxx|xxx00000|
1606 <------bitlen---->< bp>
1607 Then we clear the destination bits:
1608 |---00000|00000000|000-----| ptr + first_byte
1609 <-------bitlen--->< bp>
1610
1611 Finally we ORR the bytes of the shifted EXPR into the cleared region:
1612 |---xxxxx||xxxxxxxx||xxx-----| ptr + first_byte.
1613
1614 BIG-ENDIAN
1615 We are writing a non byte-sized quantity or at a position that is not
1616 at a byte boundary.
1617 ptr + first_byte |--------|--------|--------|
1618 ^ ^
1619 <bp >xxx xxxxxxxx xxx
1620 |_____EXPR_____|
1621
46a61395 1622 First native_encode_expr EXPR into a temporary buffer and shift each
f663d9ad
KT
1623 byte in the buffer to the right by (carrying the bits over as necessary).
1624 We shift by as much as needed to align the most significant bit of EXPR
1625 with bitpos:
1626 |00xxxxxx|xxxxxxxx| >> 3 = |00000xxx|xxxxxxxx|xxxxx000|
1627 <---bitlen----> <bp ><-----bitlen----->
1628 Then we clear the destination bits:
1629 ptr + first_byte |-----000||00000000||00000---|
1630 <bp ><-------bitlen----->
1631
1632 Finally we ORR the bytes of the shifted EXPR into the cleared region:
1633 ptr + first_byte |---xxxxx||xxxxxxxx||xxx-----|.
1634 The awkwardness comes from the fact that bitpos is counted from the
1635 most significant bit of a byte. */
1636
ef1d3b57
RS
1637 /* We must be dealing with fixed-size data at this point, since the
1638 total size is also fixed. */
3afd514b
JJ
1639 unsigned int byte_size;
1640 if (empty_ctor_p)
1641 {
1642 unsigned HOST_WIDE_INT rhs_bytes
1643 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)));
1644 if (rhs_bytes > total_bytes)
1645 return false;
1646 byte_size = rhs_bytes;
1647 }
1648 else
1649 {
1650 fixed_size_mode mode
1651 = as_a <fixed_size_mode> (TYPE_MODE (TREE_TYPE (expr)));
1652 byte_size = GET_MODE_SIZE (mode);
1653 }
f663d9ad 1654 /* Allocate an extra byte so that we have space to shift into. */
3afd514b 1655 byte_size++;
f663d9ad 1656 unsigned char *tmpbuf = XALLOCAVEC (unsigned char, byte_size);
46a61395 1657 memset (tmpbuf, '\0', byte_size);
f663d9ad 1658 /* The store detection code should only have allowed constants that are
3afd514b
JJ
1659 accepted by native_encode_expr or empty ctors. */
1660 if (!empty_ctor_p
1661 && native_encode_expr (expr, tmpbuf, byte_size - 1) == 0)
f663d9ad
KT
1662 gcc_unreachable ();
1663
1664 /* The native_encode_expr machinery uses TYPE_MODE to determine how many
1665 bytes to write. This means it can write more than
1666 ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT bytes (for example
1667 write 8 bytes for a bitlen of 40). Skip the bytes that are not within
1668 bitlen and zero out the bits that are not relevant as well (that may
1669 contain a sign bit due to sign-extension). */
1670 unsigned int padding
1671 = byte_size - ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT - 1;
ad1de652
JJ
1672 /* On big-endian the padding is at the 'front' so just skip the initial
1673 bytes. */
1674 if (BYTES_BIG_ENDIAN)
1675 tmpbuf += padding;
1676
1677 byte_size -= padding;
1678
1679 if (bitlen % BITS_PER_UNIT != 0)
f663d9ad 1680 {
4b2c06f4 1681 if (BYTES_BIG_ENDIAN)
ad1de652
JJ
1682 clear_bit_region_be (tmpbuf, BITS_PER_UNIT - 1,
1683 BITS_PER_UNIT - (bitlen % BITS_PER_UNIT));
1684 else
1685 clear_bit_region (tmpbuf, bitlen,
1686 byte_size * BITS_PER_UNIT - bitlen);
f663d9ad 1687 }
ad1de652
JJ
1688 /* Left shifting relies on the last byte being clear if bitlen is
1689 a multiple of BITS_PER_UNIT, which might not be clear if
1690 there are padding bytes. */
1691 else if (!BYTES_BIG_ENDIAN)
1692 tmpbuf[byte_size - 1] = '\0';
f663d9ad
KT
1693
1694 /* Clear the bit region in PTR where the bits from TMPBUF will be
46a61395 1695 inserted into. */
f663d9ad
KT
1696 if (BYTES_BIG_ENDIAN)
1697 clear_bit_region_be (ptr + first_byte,
1698 BITS_PER_UNIT - 1 - (bitpos % BITS_PER_UNIT), bitlen);
1699 else
1700 clear_bit_region (ptr + first_byte, bitpos % BITS_PER_UNIT, bitlen);
1701
1702 int shift_amnt;
1703 int bitlen_mod = bitlen % BITS_PER_UNIT;
1704 int bitpos_mod = bitpos % BITS_PER_UNIT;
1705
1706 bool skip_byte = false;
1707 if (BYTES_BIG_ENDIAN)
1708 {
1709 /* BITPOS and BITLEN are exactly aligned and no shifting
1710 is necessary. */
1711 if (bitpos_mod + bitlen_mod == BITS_PER_UNIT
1712 || (bitpos_mod == 0 && bitlen_mod == 0))
1713 shift_amnt = 0;
1714 /* |. . . . . . . .|
1715 <bp > <blen >.
1716 We always shift right for BYTES_BIG_ENDIAN so shift the beginning
1717 of the value until it aligns with 'bp' in the next byte over. */
1718 else if (bitpos_mod + bitlen_mod < BITS_PER_UNIT)
1719 {
1720 shift_amnt = bitlen_mod + bitpos_mod;
1721 skip_byte = bitlen_mod != 0;
1722 }
1723 /* |. . . . . . . .|
1724 <----bp--->
1725 <---blen---->.
1726 Shift the value right within the same byte so it aligns with 'bp'. */
1727 else
1728 shift_amnt = bitlen_mod + bitpos_mod - BITS_PER_UNIT;
1729 }
1730 else
1731 shift_amnt = bitpos % BITS_PER_UNIT;
1732
1733 /* Create the shifted version of EXPR. */
1734 if (!BYTES_BIG_ENDIAN)
46a61395 1735 {
8aba425f 1736 shift_bytes_in_array_left (tmpbuf, byte_size, shift_amnt);
46a61395
JJ
1737 if (shift_amnt == 0)
1738 byte_size--;
1739 }
f663d9ad
KT
1740 else
1741 {
1742 gcc_assert (BYTES_BIG_ENDIAN);
1743 shift_bytes_in_array_right (tmpbuf, byte_size, shift_amnt);
1744 /* If shifting right forced us to move into the next byte skip the now
1745 empty byte. */
1746 if (skip_byte)
1747 {
1748 tmpbuf++;
1749 byte_size--;
1750 }
1751 }
1752
1753 /* Insert the bits from TMPBUF. */
1754 for (unsigned int i = 0; i < byte_size; i++)
1755 ptr[first_byte + i] |= tmpbuf[i];
1756
1757 return true;
1758}
1759
1760/* Sorting function for store_immediate_info objects.
1761 Sorts them by bitposition. */
1762
1763static int
1764sort_by_bitpos (const void *x, const void *y)
1765{
1766 store_immediate_info *const *tmp = (store_immediate_info * const *) x;
1767 store_immediate_info *const *tmp2 = (store_immediate_info * const *) y;
1768
109cca3b 1769 if ((*tmp)->bitpos < (*tmp2)->bitpos)
f663d9ad
KT
1770 return -1;
1771 else if ((*tmp)->bitpos > (*tmp2)->bitpos)
1772 return 1;
109cca3b 1773 else
0f0027d1
KT
1774 /* If they are the same let's use the order which is guaranteed to
1775 be different. */
1776 return (*tmp)->order - (*tmp2)->order;
f663d9ad
KT
1777}
1778
1779/* Sorting function for store_immediate_info objects.
1780 Sorts them by the order field. */
1781
1782static int
1783sort_by_order (const void *x, const void *y)
1784{
1785 store_immediate_info *const *tmp = (store_immediate_info * const *) x;
1786 store_immediate_info *const *tmp2 = (store_immediate_info * const *) y;
1787
1788 if ((*tmp)->order < (*tmp2)->order)
1789 return -1;
1790 else if ((*tmp)->order > (*tmp2)->order)
1791 return 1;
1792
1793 gcc_unreachable ();
1794}
1795
1796/* Initialize a merged_store_group object from a store_immediate_info
1797 object. */
1798
1799merged_store_group::merged_store_group (store_immediate_info *info)
1800{
1801 start = info->bitpos;
1802 width = info->bitsize;
a62b3dc5
JJ
1803 bitregion_start = info->bitregion_start;
1804 bitregion_end = info->bitregion_end;
f663d9ad
KT
1805 /* VAL has memory allocated for it in apply_stores once the group
1806 width has been finalized. */
1807 val = NULL;
a62b3dc5 1808 mask = NULL;
c94c3532 1809 bit_insertion = false;
18e0c3d1
JJ
1810 only_constants = info->rhs_code == INTEGER_CST;
1811 first_nonmergeable_order = ~0U;
629387a6 1812 lp_nr = info->lp_nr;
a62b3dc5
JJ
1813 unsigned HOST_WIDE_INT align_bitpos = 0;
1814 get_object_alignment_1 (gimple_assign_lhs (info->stmt),
1815 &align, &align_bitpos);
1816 align_base = start - align_bitpos;
245f6de1
JJ
1817 for (int i = 0; i < 2; ++i)
1818 {
1819 store_operand_info &op = info->ops[i];
1820 if (op.base_addr == NULL_TREE)
1821 {
1822 load_align[i] = 0;
1823 load_align_base[i] = 0;
1824 }
1825 else
1826 {
1827 get_object_alignment_1 (op.val, &load_align[i], &align_bitpos);
1828 load_align_base[i] = op.bitpos - align_bitpos;
1829 }
1830 }
f663d9ad
KT
1831 stores.create (1);
1832 stores.safe_push (info);
1833 last_stmt = info->stmt;
1834 last_order = info->order;
1835 first_stmt = last_stmt;
1836 first_order = last_order;
1837 buf_size = 0;
1838}
1839
1840merged_store_group::~merged_store_group ()
1841{
1842 if (val)
1843 XDELETEVEC (val);
1844}
1845
7f5a3982
EB
1846/* Return true if the store described by INFO can be merged into the group. */
1847
1848bool
1849merged_store_group::can_be_merged_into (store_immediate_info *info)
1850{
1851 /* Do not merge bswap patterns. */
1852 if (info->rhs_code == LROTATE_EXPR)
1853 return false;
1854
629387a6
EB
1855 if (info->lp_nr != lp_nr)
1856 return false;
1857
7f5a3982
EB
1858 /* The canonical case. */
1859 if (info->rhs_code == stores[0]->rhs_code)
1860 return true;
1861
1862 /* BIT_INSERT_EXPR is compatible with INTEGER_CST. */
1863 if (info->rhs_code == BIT_INSERT_EXPR && stores[0]->rhs_code == INTEGER_CST)
1864 return true;
1865
1866 if (stores[0]->rhs_code == BIT_INSERT_EXPR && info->rhs_code == INTEGER_CST)
1867 return true;
1868
1869 /* We can turn MEM_REF into BIT_INSERT_EXPR for bit-field stores. */
1870 if (info->rhs_code == MEM_REF
1871 && (stores[0]->rhs_code == INTEGER_CST
1872 || stores[0]->rhs_code == BIT_INSERT_EXPR)
1873 && info->bitregion_start == stores[0]->bitregion_start
1874 && info->bitregion_end == stores[0]->bitregion_end)
1875 return true;
1876
1877 if (stores[0]->rhs_code == MEM_REF
1878 && (info->rhs_code == INTEGER_CST
1879 || info->rhs_code == BIT_INSERT_EXPR)
1880 && info->bitregion_start == stores[0]->bitregion_start
1881 && info->bitregion_end == stores[0]->bitregion_end)
1882 return true;
1883
1884 return false;
1885}
1886
a62b3dc5
JJ
1887/* Helper method for merge_into and merge_overlapping to do
1888 the common part. */
7f5a3982 1889
f663d9ad 1890void
a62b3dc5 1891merged_store_group::do_merge (store_immediate_info *info)
f663d9ad 1892{
a62b3dc5
JJ
1893 bitregion_start = MIN (bitregion_start, info->bitregion_start);
1894 bitregion_end = MAX (bitregion_end, info->bitregion_end);
1895
1896 unsigned int this_align;
1897 unsigned HOST_WIDE_INT align_bitpos = 0;
1898 get_object_alignment_1 (gimple_assign_lhs (info->stmt),
1899 &this_align, &align_bitpos);
1900 if (this_align > align)
1901 {
1902 align = this_align;
1903 align_base = info->bitpos - align_bitpos;
1904 }
245f6de1
JJ
1905 for (int i = 0; i < 2; ++i)
1906 {
1907 store_operand_info &op = info->ops[i];
1908 if (!op.base_addr)
1909 continue;
1910
1911 get_object_alignment_1 (op.val, &this_align, &align_bitpos);
1912 if (this_align > load_align[i])
1913 {
1914 load_align[i] = this_align;
1915 load_align_base[i] = op.bitpos - align_bitpos;
1916 }
1917 }
f663d9ad 1918
f663d9ad
KT
1919 gimple *stmt = info->stmt;
1920 stores.safe_push (info);
1921 if (info->order > last_order)
1922 {
1923 last_order = info->order;
1924 last_stmt = stmt;
1925 }
1926 else if (info->order < first_order)
1927 {
1928 first_order = info->order;
1929 first_stmt = stmt;
1930 }
18e0c3d1
JJ
1931 if (info->rhs_code != INTEGER_CST)
1932 only_constants = false;
f663d9ad
KT
1933}
1934
a62b3dc5
JJ
1935/* Merge a store recorded by INFO into this merged store.
1936 The store is not overlapping with the existing recorded
1937 stores. */
1938
1939void
1940merged_store_group::merge_into (store_immediate_info *info)
1941{
a62b3dc5
JJ
1942 /* Make sure we're inserting in the position we think we're inserting. */
1943 gcc_assert (info->bitpos >= start + width
1944 && info->bitregion_start <= bitregion_end);
1945
c5679c37 1946 width = info->bitpos + info->bitsize - start;
a62b3dc5
JJ
1947 do_merge (info);
1948}
1949
f663d9ad
KT
1950/* Merge a store described by INFO into this merged store.
1951 INFO overlaps in some way with the current store (i.e. it's not contiguous
1952 which is handled by merged_store_group::merge_into). */
1953
1954void
1955merged_store_group::merge_overlapping (store_immediate_info *info)
1956{
f663d9ad 1957 /* If the store extends the size of the group, extend the width. */
a62b3dc5 1958 if (info->bitpos + info->bitsize > start + width)
c5679c37 1959 width = info->bitpos + info->bitsize - start;
f663d9ad 1960
a62b3dc5 1961 do_merge (info);
f663d9ad
KT
1962}
1963
1964/* Go through all the recorded stores in this group in program order and
1965 apply their values to the VAL byte array to create the final merged
1966 value. Return true if the operation succeeded. */
1967
1968bool
1969merged_store_group::apply_stores ()
1970{
a62b3dc5
JJ
1971 /* Make sure we have more than one store in the group, otherwise we cannot
1972 merge anything. */
1973 if (bitregion_start % BITS_PER_UNIT != 0
1974 || bitregion_end % BITS_PER_UNIT != 0
f663d9ad
KT
1975 || stores.length () == 1)
1976 return false;
1977
1978 stores.qsort (sort_by_order);
a62b3dc5 1979 store_immediate_info *info;
f663d9ad 1980 unsigned int i;
c94c3532
EB
1981 /* Create a power-of-2-sized buffer for native_encode_expr. */
1982 buf_size = 1 << ceil_log2 ((bitregion_end - bitregion_start) / BITS_PER_UNIT);
a62b3dc5
JJ
1983 val = XNEWVEC (unsigned char, 2 * buf_size);
1984 mask = val + buf_size;
1985 memset (val, 0, buf_size);
1986 memset (mask, ~0U, buf_size);
f663d9ad
KT
1987
1988 FOR_EACH_VEC_ELT (stores, i, info)
1989 {
a62b3dc5 1990 unsigned int pos_in_buffer = info->bitpos - bitregion_start;
c94c3532 1991 tree cst;
245f6de1
JJ
1992 if (info->ops[0].val && info->ops[0].base_addr == NULL_TREE)
1993 cst = info->ops[0].val;
1994 else if (info->ops[1].val && info->ops[1].base_addr == NULL_TREE)
1995 cst = info->ops[1].val;
c94c3532
EB
1996 else
1997 cst = NULL_TREE;
245f6de1
JJ
1998 bool ret = true;
1999 if (cst)
c94c3532
EB
2000 {
2001 if (info->rhs_code == BIT_INSERT_EXPR)
2002 bit_insertion = true;
2003 else
2004 ret = encode_tree_to_bitpos (cst, val, info->bitsize,
2005 pos_in_buffer, buf_size);
2006 }
2007 unsigned char *m = mask + (pos_in_buffer / BITS_PER_UNIT);
2008 if (BYTES_BIG_ENDIAN)
2009 clear_bit_region_be (m, (BITS_PER_UNIT - 1
2010 - (pos_in_buffer % BITS_PER_UNIT)),
2011 info->bitsize);
2012 else
2013 clear_bit_region (m, pos_in_buffer % BITS_PER_UNIT, info->bitsize);
245f6de1 2014 if (cst && dump_file && (dump_flags & TDF_DETAILS))
f663d9ad
KT
2015 {
2016 if (ret)
2017 {
c94c3532 2018 fputs ("After writing ", dump_file);
4af78ef8 2019 print_generic_expr (dump_file, cst, TDF_NONE);
f663d9ad 2020 fprintf (dump_file, " of size " HOST_WIDE_INT_PRINT_DEC
c94c3532
EB
2021 " at position %d\n", info->bitsize, pos_in_buffer);
2022 fputs (" the merged value contains ", dump_file);
f663d9ad 2023 dump_char_array (dump_file, val, buf_size);
c94c3532
EB
2024 fputs (" the merged mask contains ", dump_file);
2025 dump_char_array (dump_file, mask, buf_size);
2026 if (bit_insertion)
2027 fputs (" bit insertion is required\n", dump_file);
f663d9ad
KT
2028 }
2029 else
2030 fprintf (dump_file, "Failed to merge stores\n");
4b84d9b8 2031 }
f663d9ad
KT
2032 if (!ret)
2033 return false;
2034 }
4b84d9b8 2035 stores.qsort (sort_by_bitpos);
f663d9ad
KT
2036 return true;
2037}
2038
2039/* Structure describing the store chain. */
2040
6c1dae73 2041class imm_store_chain_info
f663d9ad 2042{
6c1dae73 2043public:
50b6d676
AO
2044 /* Doubly-linked list that imposes an order on chain processing.
2045 PNXP (prev's next pointer) points to the head of a list, or to
2046 the next field in the previous chain in the list.
2047 See pass_store_merging::m_stores_head for more rationale. */
2048 imm_store_chain_info *next, **pnxp;
b5926e23 2049 tree base_addr;
a62b3dc5 2050 auto_vec<store_immediate_info *> m_store_info;
f663d9ad
KT
2051 auto_vec<merged_store_group *> m_merged_store_groups;
2052
50b6d676
AO
2053 imm_store_chain_info (imm_store_chain_info *&inspt, tree b_a)
2054 : next (inspt), pnxp (&inspt), base_addr (b_a)
2055 {
2056 inspt = this;
2057 if (next)
2058 {
2059 gcc_checking_assert (pnxp == next->pnxp);
2060 next->pnxp = &next;
2061 }
2062 }
2063 ~imm_store_chain_info ()
2064 {
2065 *pnxp = next;
2066 if (next)
2067 {
2068 gcc_checking_assert (&next == next->pnxp);
2069 next->pnxp = pnxp;
2070 }
2071 }
b5926e23 2072 bool terminate_and_process_chain ();
4b84d9b8 2073 bool try_coalesce_bswap (merged_store_group *, unsigned int, unsigned int);
f663d9ad 2074 bool coalesce_immediate_stores ();
b5926e23
RB
2075 bool output_merged_store (merged_store_group *);
2076 bool output_merged_stores ();
f663d9ad
KT
2077};
2078
2079const pass_data pass_data_tree_store_merging = {
2080 GIMPLE_PASS, /* type */
2081 "store-merging", /* name */
2082 OPTGROUP_NONE, /* optinfo_flags */
2083 TV_GIMPLE_STORE_MERGING, /* tv_id */
2084 PROP_ssa, /* properties_required */
2085 0, /* properties_provided */
2086 0, /* properties_destroyed */
2087 0, /* todo_flags_start */
2088 TODO_update_ssa, /* todo_flags_finish */
2089};
2090
2091class pass_store_merging : public gimple_opt_pass
2092{
2093public:
2094 pass_store_merging (gcc::context *ctxt)
faec5f24 2095 : gimple_opt_pass (pass_data_tree_store_merging, ctxt), m_stores_head ()
f663d9ad
KT
2096 {
2097 }
2098
c94c3532
EB
2099 /* Pass not supported for PDP-endian, nor for insane hosts or
2100 target character sizes where native_{encode,interpret}_expr
a62b3dc5 2101 doesn't work properly. */
f663d9ad
KT
2102 virtual bool
2103 gate (function *)
2104 {
a62b3dc5 2105 return flag_store_merging
c94c3532 2106 && BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN
a62b3dc5
JJ
2107 && CHAR_BIT == 8
2108 && BITS_PER_UNIT == 8;
f663d9ad
KT
2109 }
2110
2111 virtual unsigned int execute (function *);
2112
2113private:
99b1c316 2114 hash_map<tree_operand_hash, class imm_store_chain_info *> m_stores;
f663d9ad 2115
50b6d676
AO
2116 /* Form a doubly-linked stack of the elements of m_stores, so that
2117 we can iterate over them in a predictable way. Using this order
2118 avoids extraneous differences in the compiler output just because
2119 of tree pointer variations (e.g. different chains end up in
2120 different positions of m_stores, so they are handled in different
2121 orders, so they allocate or release SSA names in different
2122 orders, and when they get reused, subsequent passes end up
2123 getting different SSA names, which may ultimately change
2124 decisions when going out of SSA). */
2125 imm_store_chain_info *m_stores_head;
2126
629387a6
EB
2127 bool process_store (gimple *);
2128 bool terminate_and_process_chain (imm_store_chain_info *);
383ac8dc 2129 bool terminate_all_aliasing_chains (imm_store_chain_info **, gimple *);
629387a6 2130 bool terminate_and_process_all_chains ();
f663d9ad
KT
2131}; // class pass_store_merging
2132
2133/* Terminate and process all recorded chains. Return true if any changes
2134 were made. */
2135
2136bool
2137pass_store_merging::terminate_and_process_all_chains ()
2138{
f663d9ad 2139 bool ret = false;
50b6d676 2140 while (m_stores_head)
629387a6 2141 ret |= terminate_and_process_chain (m_stores_head);
b119c055 2142 gcc_assert (m_stores.is_empty ());
f663d9ad
KT
2143 return ret;
2144}
2145
383ac8dc
JJ
2146/* Terminate all chains that are affected by the statement STMT.
2147 CHAIN_INFO is the chain we should ignore from the checks if
629387a6 2148 non-NULL. Return true if any changes were made. */
f663d9ad
KT
2149
2150bool
20770eb8 2151pass_store_merging::terminate_all_aliasing_chains (imm_store_chain_info
b5926e23 2152 **chain_info,
f663d9ad
KT
2153 gimple *stmt)
2154{
2155 bool ret = false;
2156
2157 /* If the statement doesn't touch memory it can't alias. */
2158 if (!gimple_vuse (stmt))
2159 return false;
2160
9e875fd8 2161 tree store_lhs = gimple_store_p (stmt) ? gimple_get_lhs (stmt) : NULL_TREE;
6b412bf6
RB
2162 ao_ref store_lhs_ref;
2163 ao_ref_init (&store_lhs_ref, store_lhs);
383ac8dc 2164 for (imm_store_chain_info *next = m_stores_head, *cur = next; cur; cur = next)
f663d9ad 2165 {
383ac8dc
JJ
2166 next = cur->next;
2167
2168 /* We already checked all the stores in chain_info and terminated the
2169 chain if necessary. Skip it here. */
2170 if (chain_info && *chain_info == cur)
2171 continue;
2172
245f6de1
JJ
2173 store_immediate_info *info;
2174 unsigned int i;
383ac8dc 2175 FOR_EACH_VEC_ELT (cur->m_store_info, i, info)
f663d9ad 2176 {
9e875fd8 2177 tree lhs = gimple_assign_lhs (info->stmt);
6b412bf6
RB
2178 ao_ref lhs_ref;
2179 ao_ref_init (&lhs_ref, lhs);
2180 if (ref_maybe_used_by_stmt_p (stmt, &lhs_ref)
2181 || stmt_may_clobber_ref_p_1 (stmt, &lhs_ref)
2182 || (store_lhs && refs_may_alias_p_1 (&store_lhs_ref,
2183 &lhs_ref, false)))
f663d9ad 2184 {
245f6de1 2185 if (dump_file && (dump_flags & TDF_DETAILS))
f663d9ad 2186 {
245f6de1
JJ
2187 fprintf (dump_file, "stmt causes chain termination:\n");
2188 print_gimple_stmt (dump_file, stmt, 0);
f663d9ad 2189 }
629387a6 2190 ret |= terminate_and_process_chain (cur);
245f6de1 2191 break;
f663d9ad
KT
2192 }
2193 }
2194 }
2195
f663d9ad
KT
2196 return ret;
2197}
2198
2199/* Helper function. Terminate the recorded chain storing to base object
2200 BASE. Return true if the merging and output was successful. The m_stores
2201 entry is removed after the processing in any case. */
2202
2203bool
629387a6 2204pass_store_merging::terminate_and_process_chain (imm_store_chain_info *chain_info)
f663d9ad 2205{
b5926e23
RB
2206 bool ret = chain_info->terminate_and_process_chain ();
2207 m_stores.remove (chain_info->base_addr);
2208 delete chain_info;
f663d9ad
KT
2209 return ret;
2210}
2211
245f6de1 2212/* Return true if stmts in between FIRST (inclusive) and LAST (exclusive)
629387a6
EB
2213 may clobber REF. FIRST and LAST must have non-NULL vdef. We want to
2214 be able to sink load of REF across stores between FIRST and LAST, up
2215 to right before LAST. */
245f6de1
JJ
2216
2217bool
2218stmts_may_clobber_ref_p (gimple *first, gimple *last, tree ref)
2219{
2220 ao_ref r;
2221 ao_ref_init (&r, ref);
2222 unsigned int count = 0;
2223 tree vop = gimple_vdef (last);
2224 gimple *stmt;
2225
629387a6
EB
2226 /* Return true conservatively if the basic blocks are different. */
2227 if (gimple_bb (first) != gimple_bb (last))
2228 return true;
2229
245f6de1
JJ
2230 do
2231 {
2232 stmt = SSA_NAME_DEF_STMT (vop);
2233 if (stmt_may_clobber_ref_p_1 (stmt, &r))
2234 return true;
4b84d9b8
JJ
2235 if (gimple_store_p (stmt)
2236 && refs_anti_dependent_p (ref, gimple_get_lhs (stmt)))
2237 return true;
245f6de1
JJ
2238 /* Avoid quadratic compile time by bounding the number of checks
2239 we perform. */
2240 if (++count > MAX_STORE_ALIAS_CHECKS)
2241 return true;
2242 vop = gimple_vuse (stmt);
2243 }
2244 while (stmt != first);
629387a6 2245
245f6de1
JJ
2246 return false;
2247}
2248
2249/* Return true if INFO->ops[IDX] is mergeable with the
2250 corresponding loads already in MERGED_STORE group.
2251 BASE_ADDR is the base address of the whole store group. */
2252
2253bool
2254compatible_load_p (merged_store_group *merged_store,
2255 store_immediate_info *info,
2256 tree base_addr, int idx)
2257{
2258 store_immediate_info *infof = merged_store->stores[0];
2259 if (!info->ops[idx].base_addr
8a91d545
RS
2260 || maybe_ne (info->ops[idx].bitpos - infof->ops[idx].bitpos,
2261 info->bitpos - infof->bitpos)
245f6de1
JJ
2262 || !operand_equal_p (info->ops[idx].base_addr,
2263 infof->ops[idx].base_addr, 0))
2264 return false;
2265
2266 store_immediate_info *infol = merged_store->stores.last ();
2267 tree load_vuse = gimple_vuse (info->ops[idx].stmt);
2268 /* In this case all vuses should be the same, e.g.
2269 _1 = s.a; _2 = s.b; _3 = _1 | 1; t.a = _3; _4 = _2 | 2; t.b = _4;
2270 or
2271 _1 = s.a; _2 = s.b; t.a = _1; t.b = _2;
2272 and we can emit the coalesced load next to any of those loads. */
2273 if (gimple_vuse (infof->ops[idx].stmt) == load_vuse
2274 && gimple_vuse (infol->ops[idx].stmt) == load_vuse)
2275 return true;
2276
2277 /* Otherwise, at least for now require that the load has the same
2278 vuse as the store. See following examples. */
2279 if (gimple_vuse (info->stmt) != load_vuse)
2280 return false;
2281
2282 if (gimple_vuse (infof->stmt) != gimple_vuse (infof->ops[idx].stmt)
2283 || (infof != infol
2284 && gimple_vuse (infol->stmt) != gimple_vuse (infol->ops[idx].stmt)))
2285 return false;
2286
2287 /* If the load is from the same location as the store, already
2288 the construction of the immediate chain info guarantees no intervening
2289 stores, so no further checks are needed. Example:
2290 _1 = s.a; _2 = _1 & -7; s.a = _2; _3 = s.b; _4 = _3 & -7; s.b = _4; */
8a91d545 2291 if (known_eq (info->ops[idx].bitpos, info->bitpos)
245f6de1
JJ
2292 && operand_equal_p (info->ops[idx].base_addr, base_addr, 0))
2293 return true;
2294
2295 /* Otherwise, we need to punt if any of the loads can be clobbered by any
2296 of the stores in the group, or any other stores in between those.
2297 Previous calls to compatible_load_p ensured that for all the
2298 merged_store->stores IDX loads, no stmts starting with
2299 merged_store->first_stmt and ending right before merged_store->last_stmt
2300 clobbers those loads. */
2301 gimple *first = merged_store->first_stmt;
2302 gimple *last = merged_store->last_stmt;
2303 unsigned int i;
2304 store_immediate_info *infoc;
2305 /* The stores are sorted by increasing store bitpos, so if info->stmt store
2306 comes before the so far first load, we'll be changing
2307 merged_store->first_stmt. In that case we need to give up if
2308 any of the earlier processed loads clobber with the stmts in the new
2309 range. */
2310 if (info->order < merged_store->first_order)
2311 {
2312 FOR_EACH_VEC_ELT (merged_store->stores, i, infoc)
2313 if (stmts_may_clobber_ref_p (info->stmt, first, infoc->ops[idx].val))
2314 return false;
2315 first = info->stmt;
2316 }
2317 /* Similarly, we could change merged_store->last_stmt, so ensure
2318 in that case no stmts in the new range clobber any of the earlier
2319 processed loads. */
2320 else if (info->order > merged_store->last_order)
2321 {
2322 FOR_EACH_VEC_ELT (merged_store->stores, i, infoc)
2323 if (stmts_may_clobber_ref_p (last, info->stmt, infoc->ops[idx].val))
2324 return false;
2325 last = info->stmt;
2326 }
2327 /* And finally, we'd be adding a new load to the set, ensure it isn't
2328 clobbered in the new range. */
2329 if (stmts_may_clobber_ref_p (first, last, info->ops[idx].val))
2330 return false;
2331
2332 /* Otherwise, we are looking for:
2333 _1 = s.a; _2 = _1 ^ 15; t.a = _2; _3 = s.b; _4 = _3 ^ 15; t.b = _4;
2334 or
2335 _1 = s.a; t.a = _1; _2 = s.b; t.b = _2; */
2336 return true;
2337}
2338
4b84d9b8
JJ
2339/* Add all refs loaded to compute VAL to REFS vector. */
2340
2341void
2342gather_bswap_load_refs (vec<tree> *refs, tree val)
2343{
2344 if (TREE_CODE (val) != SSA_NAME)
2345 return;
2346
2347 gimple *stmt = SSA_NAME_DEF_STMT (val);
2348 if (!is_gimple_assign (stmt))
2349 return;
2350
2351 if (gimple_assign_load_p (stmt))
2352 {
2353 refs->safe_push (gimple_assign_rhs1 (stmt));
2354 return;
2355 }
2356
2357 switch (gimple_assign_rhs_class (stmt))
2358 {
2359 case GIMPLE_BINARY_RHS:
2360 gather_bswap_load_refs (refs, gimple_assign_rhs2 (stmt));
2361 /* FALLTHRU */
2362 case GIMPLE_UNARY_RHS:
2363 gather_bswap_load_refs (refs, gimple_assign_rhs1 (stmt));
2364 break;
2365 default:
2366 gcc_unreachable ();
2367 }
2368}
2369
c5679c37
JJ
2370/* Check if there are any stores in M_STORE_INFO after index I
2371 (where M_STORE_INFO must be sorted by sort_by_bitpos) that overlap
2372 a potential group ending with END that have their order
4d213bf6
JJ
2373 smaller than LAST_ORDER. ALL_INTEGER_CST_P is true if
2374 all the stores already merged and the one under consideration
2375 have rhs_code of INTEGER_CST. Return true if there are no such stores.
c5679c37
JJ
2376 Consider:
2377 MEM[(long long int *)p_28] = 0;
2378 MEM[(long long int *)p_28 + 8B] = 0;
2379 MEM[(long long int *)p_28 + 16B] = 0;
2380 MEM[(long long int *)p_28 + 24B] = 0;
2381 _129 = (int) _130;
2382 MEM[(int *)p_28 + 8B] = _129;
2383 MEM[(int *)p_28].a = -1;
2384 We already have
2385 MEM[(long long int *)p_28] = 0;
2386 MEM[(int *)p_28].a = -1;
2387 stmts in the current group and need to consider if it is safe to
2388 add MEM[(long long int *)p_28 + 8B] = 0; store into the same group.
2389 There is an overlap between that store and the MEM[(int *)p_28 + 8B] = _129;
2390 store though, so if we add the MEM[(long long int *)p_28 + 8B] = 0;
2391 into the group and merging of those 3 stores is successful, merged
2392 stmts will be emitted at the latest store from that group, i.e.
2393 LAST_ORDER, which is the MEM[(int *)p_28].a = -1; store.
2394 The MEM[(int *)p_28 + 8B] = _129; store that originally follows
2395 the MEM[(long long int *)p_28 + 8B] = 0; would now be before it,
2396 so we need to refuse merging MEM[(long long int *)p_28 + 8B] = 0;
2397 into the group. That way it will be its own store group and will
4d213bf6 2398 not be touched. If ALL_INTEGER_CST_P and there are overlapping
c5679c37
JJ
2399 INTEGER_CST stores, those are mergeable using merge_overlapping,
2400 so don't return false for those. */
2401
2402static bool
2403check_no_overlap (vec<store_immediate_info *> m_store_info, unsigned int i,
4d213bf6 2404 bool all_integer_cst_p, unsigned int last_order,
c5679c37
JJ
2405 unsigned HOST_WIDE_INT end)
2406{
2407 unsigned int len = m_store_info.length ();
2408 for (++i; i < len; ++i)
2409 {
2410 store_immediate_info *info = m_store_info[i];
2411 if (info->bitpos >= end)
2412 break;
2413 if (info->order < last_order
4d213bf6 2414 && (!all_integer_cst_p || info->rhs_code != INTEGER_CST))
c5679c37
JJ
2415 return false;
2416 }
2417 return true;
2418}
2419
4b84d9b8
JJ
2420/* Return true if m_store_info[first] and at least one following store
2421 form a group which store try_size bitsize value which is byte swapped
2422 from a memory load or some value, or identity from some value.
2423 This uses the bswap pass APIs. */
2424
2425bool
2426imm_store_chain_info::try_coalesce_bswap (merged_store_group *merged_store,
2427 unsigned int first,
2428 unsigned int try_size)
2429{
2430 unsigned int len = m_store_info.length (), last = first;
2431 unsigned HOST_WIDE_INT width = m_store_info[first]->bitsize;
2432 if (width >= try_size)
2433 return false;
2434 for (unsigned int i = first + 1; i < len; ++i)
2435 {
2436 if (m_store_info[i]->bitpos != m_store_info[first]->bitpos + width
2437 || m_store_info[i]->ins_stmt == NULL)
2438 return false;
2439 width += m_store_info[i]->bitsize;
2440 if (width >= try_size)
2441 {
2442 last = i;
2443 break;
2444 }
2445 }
2446 if (width != try_size)
2447 return false;
2448
2449 bool allow_unaligned
028d4092 2450 = !STRICT_ALIGNMENT && param_store_merging_allow_unaligned;
4b84d9b8
JJ
2451 /* Punt if the combined store would not be aligned and we need alignment. */
2452 if (!allow_unaligned)
2453 {
2454 unsigned int align = merged_store->align;
2455 unsigned HOST_WIDE_INT align_base = merged_store->align_base;
2456 for (unsigned int i = first + 1; i <= last; ++i)
2457 {
2458 unsigned int this_align;
2459 unsigned HOST_WIDE_INT align_bitpos = 0;
2460 get_object_alignment_1 (gimple_assign_lhs (m_store_info[i]->stmt),
2461 &this_align, &align_bitpos);
2462 if (this_align > align)
2463 {
2464 align = this_align;
2465 align_base = m_store_info[i]->bitpos - align_bitpos;
2466 }
2467 }
2468 unsigned HOST_WIDE_INT align_bitpos
2469 = (m_store_info[first]->bitpos - align_base) & (align - 1);
2470 if (align_bitpos)
2471 align = least_bit_hwi (align_bitpos);
2472 if (align < try_size)
2473 return false;
2474 }
2475
2476 tree type;
2477 switch (try_size)
2478 {
2479 case 16: type = uint16_type_node; break;
2480 case 32: type = uint32_type_node; break;
2481 case 64: type = uint64_type_node; break;
2482 default: gcc_unreachable ();
2483 }
2484 struct symbolic_number n;
2485 gimple *ins_stmt = NULL;
2486 int vuse_store = -1;
2487 unsigned int first_order = merged_store->first_order;
2488 unsigned int last_order = merged_store->last_order;
2489 gimple *first_stmt = merged_store->first_stmt;
2490 gimple *last_stmt = merged_store->last_stmt;
c5679c37 2491 unsigned HOST_WIDE_INT end = merged_store->start + merged_store->width;
4b84d9b8
JJ
2492 store_immediate_info *infof = m_store_info[first];
2493
2494 for (unsigned int i = first; i <= last; ++i)
2495 {
2496 store_immediate_info *info = m_store_info[i];
2497 struct symbolic_number this_n = info->n;
2498 this_n.type = type;
2499 if (!this_n.base_addr)
2500 this_n.range = try_size / BITS_PER_UNIT;
30fa8e9c
JJ
2501 else
2502 /* Update vuse in case it has changed by output_merged_stores. */
2503 this_n.vuse = gimple_vuse (info->ins_stmt);
4b84d9b8
JJ
2504 unsigned int bitpos = info->bitpos - infof->bitpos;
2505 if (!do_shift_rotate (LSHIFT_EXPR, &this_n,
2506 BYTES_BIG_ENDIAN
2507 ? try_size - info->bitsize - bitpos
2508 : bitpos))
2509 return false;
aa11164a 2510 if (this_n.base_addr && vuse_store)
4b84d9b8
JJ
2511 {
2512 unsigned int j;
2513 for (j = first; j <= last; ++j)
2514 if (this_n.vuse == gimple_vuse (m_store_info[j]->stmt))
2515 break;
2516 if (j > last)
2517 {
2518 if (vuse_store == 1)
2519 return false;
2520 vuse_store = 0;
2521 }
2522 }
2523 if (i == first)
2524 {
2525 n = this_n;
2526 ins_stmt = info->ins_stmt;
2527 }
2528 else
2529 {
c5679c37 2530 if (n.base_addr && n.vuse != this_n.vuse)
4b84d9b8 2531 {
c5679c37
JJ
2532 if (vuse_store == 0)
2533 return false;
2534 vuse_store = 1;
4b84d9b8 2535 }
c5679c37
JJ
2536 if (info->order > last_order)
2537 {
2538 last_order = info->order;
2539 last_stmt = info->stmt;
2540 }
2541 else if (info->order < first_order)
2542 {
2543 first_order = info->order;
2544 first_stmt = info->stmt;
2545 }
2546 end = MAX (end, info->bitpos + info->bitsize);
4b84d9b8
JJ
2547
2548 ins_stmt = perform_symbolic_merge (ins_stmt, &n, info->ins_stmt,
2549 &this_n, &n);
2550 if (ins_stmt == NULL)
2551 return false;
2552 }
2553 }
2554
2555 uint64_t cmpxchg, cmpnop;
2556 find_bswap_or_nop_finalize (&n, &cmpxchg, &cmpnop);
2557
2558 /* A complete byte swap should make the symbolic number to start with
2559 the largest digit in the highest order byte. Unchanged symbolic
2560 number indicates a read with same endianness as target architecture. */
2561 if (n.n != cmpnop && n.n != cmpxchg)
2562 return false;
2563
2564 if (n.base_addr == NULL_TREE && !is_gimple_val (n.src))
2565 return false;
2566
4d213bf6 2567 if (!check_no_overlap (m_store_info, last, false, last_order, end))
c5679c37
JJ
2568 return false;
2569
4b84d9b8
JJ
2570 /* Don't handle memory copy this way if normal non-bswap processing
2571 would handle it too. */
2572 if (n.n == cmpnop && (unsigned) n.n_ops == last - first + 1)
2573 {
2574 unsigned int i;
2575 for (i = first; i <= last; ++i)
2576 if (m_store_info[i]->rhs_code != MEM_REF)
2577 break;
2578 if (i == last + 1)
2579 return false;
2580 }
2581
2582 if (n.n == cmpxchg)
2583 switch (try_size)
2584 {
2585 case 16:
2586 /* Will emit LROTATE_EXPR. */
2587 break;
2588 case 32:
2589 if (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
2590 && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing)
2591 break;
2592 return false;
2593 case 64:
2594 if (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
2595 && optab_handler (bswap_optab, DImode) != CODE_FOR_nothing)
2596 break;
2597 return false;
2598 default:
2599 gcc_unreachable ();
2600 }
2601
2602 if (!allow_unaligned && n.base_addr)
2603 {
2604 unsigned int align = get_object_alignment (n.src);
2605 if (align < try_size)
2606 return false;
2607 }
2608
2609 /* If each load has vuse of the corresponding store, need to verify
2610 the loads can be sunk right before the last store. */
2611 if (vuse_store == 1)
2612 {
2613 auto_vec<tree, 64> refs;
2614 for (unsigned int i = first; i <= last; ++i)
2615 gather_bswap_load_refs (&refs,
2616 gimple_assign_rhs1 (m_store_info[i]->stmt));
2617
2618 unsigned int i;
2619 tree ref;
2620 FOR_EACH_VEC_ELT (refs, i, ref)
2621 if (stmts_may_clobber_ref_p (first_stmt, last_stmt, ref))
2622 return false;
2623 n.vuse = NULL_TREE;
2624 }
2625
2626 infof->n = n;
2627 infof->ins_stmt = ins_stmt;
2628 for (unsigned int i = first; i <= last; ++i)
2629 {
2630 m_store_info[i]->rhs_code = n.n == cmpxchg ? LROTATE_EXPR : NOP_EXPR;
2631 m_store_info[i]->ops[0].base_addr = NULL_TREE;
2632 m_store_info[i]->ops[1].base_addr = NULL_TREE;
2633 if (i != first)
2634 merged_store->merge_into (m_store_info[i]);
2635 }
2636
2637 return true;
2638}
2639
f663d9ad
KT
2640/* Go through the candidate stores recorded in m_store_info and merge them
2641 into merged_store_group objects recorded into m_merged_store_groups
2642 representing the widened stores. Return true if coalescing was successful
2643 and the number of widened stores is fewer than the original number
2644 of stores. */
2645
2646bool
2647imm_store_chain_info::coalesce_immediate_stores ()
2648{
2649 /* Anything less can't be processed. */
2650 if (m_store_info.length () < 2)
2651 return false;
2652
2653 if (dump_file && (dump_flags & TDF_DETAILS))
c94c3532 2654 fprintf (dump_file, "Attempting to coalesce %u stores in chain\n",
f663d9ad
KT
2655 m_store_info.length ());
2656
2657 store_immediate_info *info;
4b84d9b8 2658 unsigned int i, ignore = 0;
f663d9ad
KT
2659
2660 /* Order the stores by the bitposition they write to. */
2661 m_store_info.qsort (sort_by_bitpos);
2662
2663 info = m_store_info[0];
2664 merged_store_group *merged_store = new merged_store_group (info);
c94c3532
EB
2665 if (dump_file && (dump_flags & TDF_DETAILS))
2666 fputs ("New store group\n", dump_file);
f663d9ad
KT
2667
2668 FOR_EACH_VEC_ELT (m_store_info, i, info)
2669 {
3afd514b
JJ
2670 unsigned HOST_WIDE_INT new_bitregion_start, new_bitregion_end;
2671
4b84d9b8 2672 if (i <= ignore)
c94c3532 2673 goto done;
f663d9ad 2674
4b84d9b8
JJ
2675 /* First try to handle group of stores like:
2676 p[0] = data >> 24;
2677 p[1] = data >> 16;
2678 p[2] = data >> 8;
2679 p[3] = data;
2680 using the bswap framework. */
2681 if (info->bitpos == merged_store->start + merged_store->width
2682 && merged_store->stores.length () == 1
2683 && merged_store->stores[0]->ins_stmt != NULL
2684 && info->ins_stmt != NULL)
2685 {
2686 unsigned int try_size;
2687 for (try_size = 64; try_size >= 16; try_size >>= 1)
2688 if (try_coalesce_bswap (merged_store, i - 1, try_size))
2689 break;
2690
2691 if (try_size >= 16)
2692 {
2693 ignore = i + merged_store->stores.length () - 1;
2694 m_merged_store_groups.safe_push (merged_store);
2695 if (ignore < m_store_info.length ())
2696 merged_store = new merged_store_group (m_store_info[ignore]);
2697 else
2698 merged_store = NULL;
c94c3532 2699 goto done;
4b84d9b8
JJ
2700 }
2701 }
2702
3afd514b
JJ
2703 new_bitregion_start
2704 = MIN (merged_store->bitregion_start, info->bitregion_start);
2705 new_bitregion_end
2706 = MAX (merged_store->bitregion_end, info->bitregion_end);
2707
2708 if (info->order >= merged_store->first_nonmergeable_order
2709 || (((new_bitregion_end - new_bitregion_start + 1) / BITS_PER_UNIT)
028d4092 2710 > (unsigned) param_store_merging_max_size))
18e0c3d1
JJ
2711 ;
2712
f663d9ad
KT
2713 /* |---store 1---|
2714 |---store 2---|
4b84d9b8 2715 Overlapping stores. */
18e0c3d1 2716 else if (IN_RANGE (info->bitpos, merged_store->start,
4d213bf6
JJ
2717 merged_store->start + merged_store->width - 1)
2718 /* |---store 1---||---store 2---|
2719 Handle also the consecutive INTEGER_CST stores case here,
2720 as we have here the code to deal with overlaps. */
2721 || (info->bitregion_start <= merged_store->bitregion_end
2722 && info->rhs_code == INTEGER_CST
2723 && merged_store->only_constants
2724 && merged_store->can_be_merged_into (info)))
f663d9ad 2725 {
245f6de1 2726 /* Only allow overlapping stores of constants. */
629387a6
EB
2727 if (info->rhs_code == INTEGER_CST
2728 && merged_store->only_constants
2729 && info->lp_nr == merged_store->lp_nr)
245f6de1 2730 {
6cd4c66e
JJ
2731 unsigned int last_order
2732 = MAX (merged_store->last_order, info->order);
2733 unsigned HOST_WIDE_INT end
2734 = MAX (merged_store->start + merged_store->width,
2735 info->bitpos + info->bitsize);
4d213bf6 2736 if (check_no_overlap (m_store_info, i, true, last_order, end))
6cd4c66e
JJ
2737 {
2738 /* check_no_overlap call above made sure there are no
2739 overlapping stores with non-INTEGER_CST rhs_code
2740 in between the first and last of the stores we've
2741 just merged. If there are any INTEGER_CST rhs_code
2742 stores in between, we need to merge_overlapping them
2743 even if in the sort_by_bitpos order there are other
2744 overlapping stores in between. Keep those stores as is.
2745 Example:
2746 MEM[(int *)p_28] = 0;
2747 MEM[(char *)p_28 + 3B] = 1;
2748 MEM[(char *)p_28 + 1B] = 2;
2749 MEM[(char *)p_28 + 2B] = MEM[(char *)p_28 + 6B];
2750 We can't merge the zero store with the store of two and
2751 not merge anything else, because the store of one is
2752 in the original order in between those two, but in
2753 store_by_bitpos order it comes after the last store that
2754 we can't merge with them. We can merge the first 3 stores
2755 and keep the last store as is though. */
18e0c3d1
JJ
2756 unsigned int len = m_store_info.length ();
2757 unsigned int try_order = last_order;
2758 unsigned int first_nonmergeable_order;
2759 unsigned int k;
2760 bool last_iter = false;
2761 int attempts = 0;
2762 do
6cd4c66e 2763 {
18e0c3d1
JJ
2764 unsigned int max_order = 0;
2765 unsigned first_nonmergeable_int_order = ~0U;
2766 unsigned HOST_WIDE_INT this_end = end;
2767 k = i;
2768 first_nonmergeable_order = ~0U;
2769 for (unsigned int j = i + 1; j < len; ++j)
6cd4c66e 2770 {
18e0c3d1
JJ
2771 store_immediate_info *info2 = m_store_info[j];
2772 if (info2->bitpos >= this_end)
2773 break;
2774 if (info2->order < try_order)
6cd4c66e 2775 {
18e0c3d1
JJ
2776 if (info2->rhs_code != INTEGER_CST)
2777 {
2778 /* Normally check_no_overlap makes sure this
2779 doesn't happen, but if end grows below,
2780 then we need to process more stores than
2781 check_no_overlap verified. Example:
2782 MEM[(int *)p_5] = 0;
2783 MEM[(short *)p_5 + 3B] = 1;
2784 MEM[(char *)p_5 + 4B] = _9;
2785 MEM[(char *)p_5 + 2B] = 2; */
2786 k = 0;
2787 break;
2788 }
2789 k = j;
2790 this_end = MAX (this_end,
2791 info2->bitpos + info2->bitsize);
6cd4c66e 2792 }
18e0c3d1
JJ
2793 else if (info2->rhs_code == INTEGER_CST
2794 && !last_iter)
2795 {
2796 max_order = MAX (max_order, info2->order + 1);
2797 first_nonmergeable_int_order
2798 = MIN (first_nonmergeable_int_order,
2799 info2->order);
2800 }
2801 else
2802 first_nonmergeable_order
2803 = MIN (first_nonmergeable_order, info2->order);
6cd4c66e 2804 }
18e0c3d1
JJ
2805 if (k == 0)
2806 {
2807 if (last_order == try_order)
2808 break;
2809 /* If this failed, but only because we grew
2810 try_order, retry with the last working one,
2811 so that we merge at least something. */
2812 try_order = last_order;
2813 last_iter = true;
2814 continue;
2815 }
2816 last_order = try_order;
2817 /* Retry with a larger try_order to see if we could
2818 merge some further INTEGER_CST stores. */
2819 if (max_order
2820 && (first_nonmergeable_int_order
2821 < first_nonmergeable_order))
2822 {
2823 try_order = MIN (max_order,
2824 first_nonmergeable_order);
2825 try_order
2826 = MIN (try_order,
2827 merged_store->first_nonmergeable_order);
2828 if (try_order > last_order && ++attempts < 16)
2829 continue;
2830 }
2831 first_nonmergeable_order
2832 = MIN (first_nonmergeable_order,
2833 first_nonmergeable_int_order);
2834 end = this_end;
2835 break;
6cd4c66e 2836 }
18e0c3d1 2837 while (1);
6cd4c66e
JJ
2838
2839 if (k != 0)
2840 {
2841 merged_store->merge_overlapping (info);
2842
18e0c3d1
JJ
2843 merged_store->first_nonmergeable_order
2844 = MIN (merged_store->first_nonmergeable_order,
2845 first_nonmergeable_order);
2846
6cd4c66e
JJ
2847 for (unsigned int j = i + 1; j <= k; j++)
2848 {
2849 store_immediate_info *info2 = m_store_info[j];
2850 gcc_assert (info2->bitpos < end);
2851 if (info2->order < last_order)
2852 {
2853 gcc_assert (info2->rhs_code == INTEGER_CST);
18e0c3d1
JJ
2854 if (info != info2)
2855 merged_store->merge_overlapping (info2);
6cd4c66e
JJ
2856 }
2857 /* Other stores are kept and not merged in any
2858 way. */
2859 }
2860 ignore = k;
2861 goto done;
2862 }
2863 }
245f6de1 2864 }
f663d9ad 2865 }
245f6de1
JJ
2866 /* |---store 1---||---store 2---|
2867 This store is consecutive to the previous one.
2868 Merge it into the current store group. There can be gaps in between
2869 the stores, but there can't be gaps in between bitregions. */
c94c3532 2870 else if (info->bitregion_start <= merged_store->bitregion_end
7f5a3982 2871 && merged_store->can_be_merged_into (info))
f663d9ad 2872 {
245f6de1
JJ
2873 store_immediate_info *infof = merged_store->stores[0];
2874
2875 /* All the rhs_code ops that take 2 operands are commutative,
2876 swap the operands if it could make the operands compatible. */
2877 if (infof->ops[0].base_addr
2878 && infof->ops[1].base_addr
2879 && info->ops[0].base_addr
2880 && info->ops[1].base_addr
8a91d545
RS
2881 && known_eq (info->ops[1].bitpos - infof->ops[0].bitpos,
2882 info->bitpos - infof->bitpos)
245f6de1
JJ
2883 && operand_equal_p (info->ops[1].base_addr,
2884 infof->ops[0].base_addr, 0))
127ef369
JJ
2885 {
2886 std::swap (info->ops[0], info->ops[1]);
2887 info->ops_swapped_p = true;
2888 }
4d213bf6 2889 if (check_no_overlap (m_store_info, i, false,
a7fe6482
JJ
2890 MAX (merged_store->last_order, info->order),
2891 MAX (merged_store->start + merged_store->width,
7f5a3982 2892 info->bitpos + info->bitsize)))
245f6de1 2893 {
7f5a3982
EB
2894 /* Turn MEM_REF into BIT_INSERT_EXPR for bit-field stores. */
2895 if (info->rhs_code == MEM_REF && infof->rhs_code != MEM_REF)
2896 {
2897 info->rhs_code = BIT_INSERT_EXPR;
2898 info->ops[0].val = gimple_assign_rhs1 (info->stmt);
2899 info->ops[0].base_addr = NULL_TREE;
2900 }
2901 else if (infof->rhs_code == MEM_REF && info->rhs_code != MEM_REF)
2902 {
2903 store_immediate_info *infoj;
2904 unsigned int j;
2905 FOR_EACH_VEC_ELT (merged_store->stores, j, infoj)
2906 {
2907 infoj->rhs_code = BIT_INSERT_EXPR;
2908 infoj->ops[0].val = gimple_assign_rhs1 (infoj->stmt);
2909 infoj->ops[0].base_addr = NULL_TREE;
2910 }
2911 }
2912 if ((infof->ops[0].base_addr
2913 ? compatible_load_p (merged_store, info, base_addr, 0)
2914 : !info->ops[0].base_addr)
2915 && (infof->ops[1].base_addr
2916 ? compatible_load_p (merged_store, info, base_addr, 1)
2917 : !info->ops[1].base_addr))
2918 {
2919 merged_store->merge_into (info);
2920 goto done;
2921 }
245f6de1
JJ
2922 }
2923 }
f663d9ad 2924
245f6de1
JJ
2925 /* |---store 1---| <gap> |---store 2---|.
2926 Gap between stores or the rhs not compatible. Start a new group. */
f663d9ad 2927
245f6de1
JJ
2928 /* Try to apply all the stores recorded for the group to determine
2929 the bitpattern they write and discard it if that fails.
2930 This will also reject single-store groups. */
c94c3532 2931 if (merged_store->apply_stores ())
245f6de1 2932 m_merged_store_groups.safe_push (merged_store);
c94c3532
EB
2933 else
2934 delete merged_store;
f663d9ad 2935
245f6de1 2936 merged_store = new merged_store_group (info);
c94c3532
EB
2937 if (dump_file && (dump_flags & TDF_DETAILS))
2938 fputs ("New store group\n", dump_file);
2939
2940 done:
2941 if (dump_file && (dump_flags & TDF_DETAILS))
2942 {
2943 fprintf (dump_file, "Store %u:\nbitsize:" HOST_WIDE_INT_PRINT_DEC
2944 " bitpos:" HOST_WIDE_INT_PRINT_DEC " val:",
2945 i, info->bitsize, info->bitpos);
2946 print_generic_expr (dump_file, gimple_assign_rhs1 (info->stmt));
2947 fputc ('\n', dump_file);
2948 }
f663d9ad
KT
2949 }
2950
a62b3dc5 2951 /* Record or discard the last store group. */
4b84d9b8
JJ
2952 if (merged_store)
2953 {
c94c3532 2954 if (merged_store->apply_stores ())
4b84d9b8 2955 m_merged_store_groups.safe_push (merged_store);
c94c3532
EB
2956 else
2957 delete merged_store;
4b84d9b8 2958 }
f663d9ad
KT
2959
2960 gcc_assert (m_merged_store_groups.length () <= m_store_info.length ());
c94c3532 2961
f663d9ad
KT
2962 bool success
2963 = !m_merged_store_groups.is_empty ()
2964 && m_merged_store_groups.length () < m_store_info.length ();
2965
2966 if (success && dump_file)
c94c3532 2967 fprintf (dump_file, "Coalescing successful!\nMerged into %u stores\n",
a62b3dc5 2968 m_merged_store_groups.length ());
f663d9ad
KT
2969
2970 return success;
2971}
2972
245f6de1
JJ
2973/* Return the type to use for the merged stores or loads described by STMTS.
2974 This is needed to get the alias sets right. If IS_LOAD, look for rhs,
2975 otherwise lhs. Additionally set *CLIQUEP and *BASEP to MR_DEPENDENCE_*
2976 of the MEM_REFs if any. */
f663d9ad
KT
2977
2978static tree
245f6de1
JJ
2979get_alias_type_for_stmts (vec<gimple *> &stmts, bool is_load,
2980 unsigned short *cliquep, unsigned short *basep)
f663d9ad
KT
2981{
2982 gimple *stmt;
2983 unsigned int i;
245f6de1
JJ
2984 tree type = NULL_TREE;
2985 tree ret = NULL_TREE;
2986 *cliquep = 0;
2987 *basep = 0;
f663d9ad
KT
2988
2989 FOR_EACH_VEC_ELT (stmts, i, stmt)
2990 {
245f6de1
JJ
2991 tree ref = is_load ? gimple_assign_rhs1 (stmt)
2992 : gimple_assign_lhs (stmt);
2993 tree type1 = reference_alias_ptr_type (ref);
2994 tree base = get_base_address (ref);
f663d9ad 2995
245f6de1
JJ
2996 if (i == 0)
2997 {
2998 if (TREE_CODE (base) == MEM_REF)
2999 {
3000 *cliquep = MR_DEPENDENCE_CLIQUE (base);
3001 *basep = MR_DEPENDENCE_BASE (base);
3002 }
3003 ret = type = type1;
3004 continue;
3005 }
f663d9ad 3006 if (!alias_ptr_types_compatible_p (type, type1))
245f6de1
JJ
3007 ret = ptr_type_node;
3008 if (TREE_CODE (base) != MEM_REF
3009 || *cliquep != MR_DEPENDENCE_CLIQUE (base)
3010 || *basep != MR_DEPENDENCE_BASE (base))
3011 {
3012 *cliquep = 0;
3013 *basep = 0;
3014 }
f663d9ad 3015 }
245f6de1 3016 return ret;
f663d9ad
KT
3017}
3018
3019/* Return the location_t information we can find among the statements
3020 in STMTS. */
3021
3022static location_t
245f6de1 3023get_location_for_stmts (vec<gimple *> &stmts)
f663d9ad
KT
3024{
3025 gimple *stmt;
3026 unsigned int i;
3027
3028 FOR_EACH_VEC_ELT (stmts, i, stmt)
3029 if (gimple_has_location (stmt))
3030 return gimple_location (stmt);
3031
3032 return UNKNOWN_LOCATION;
3033}
3034
3035/* Used to decribe a store resulting from splitting a wide store in smaller
3036 regularly-sized stores in split_group. */
3037
6c1dae73 3038class split_store
f663d9ad 3039{
6c1dae73 3040public:
f663d9ad
KT
3041 unsigned HOST_WIDE_INT bytepos;
3042 unsigned HOST_WIDE_INT size;
3043 unsigned HOST_WIDE_INT align;
245f6de1 3044 auto_vec<store_immediate_info *> orig_stores;
a62b3dc5
JJ
3045 /* True if there is a single orig stmt covering the whole split store. */
3046 bool orig;
f663d9ad
KT
3047 split_store (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
3048 unsigned HOST_WIDE_INT);
3049};
3050
3051/* Simple constructor. */
3052
3053split_store::split_store (unsigned HOST_WIDE_INT bp,
3054 unsigned HOST_WIDE_INT sz,
3055 unsigned HOST_WIDE_INT al)
a62b3dc5 3056 : bytepos (bp), size (sz), align (al), orig (false)
f663d9ad 3057{
245f6de1 3058 orig_stores.create (0);
f663d9ad
KT
3059}
3060
245f6de1
JJ
3061/* Record all stores in GROUP that write to the region starting at BITPOS and
3062 is of size BITSIZE. Record infos for such statements in STORES if
3063 non-NULL. The stores in GROUP must be sorted by bitposition. Return INFO
5384a802
JJ
3064 if there is exactly one original store in the range (in that case ignore
3065 clobber stmts, unless there are only clobber stmts). */
f663d9ad 3066
a62b3dc5 3067static store_immediate_info *
99b1c316 3068find_constituent_stores (class merged_store_group *group,
245f6de1
JJ
3069 vec<store_immediate_info *> *stores,
3070 unsigned int *first,
3071 unsigned HOST_WIDE_INT bitpos,
3072 unsigned HOST_WIDE_INT bitsize)
f663d9ad 3073{
a62b3dc5 3074 store_immediate_info *info, *ret = NULL;
f663d9ad 3075 unsigned int i;
a62b3dc5
JJ
3076 bool second = false;
3077 bool update_first = true;
f663d9ad 3078 unsigned HOST_WIDE_INT end = bitpos + bitsize;
a62b3dc5 3079 for (i = *first; group->stores.iterate (i, &info); ++i)
f663d9ad
KT
3080 {
3081 unsigned HOST_WIDE_INT stmt_start = info->bitpos;
3082 unsigned HOST_WIDE_INT stmt_end = stmt_start + info->bitsize;
a62b3dc5
JJ
3083 if (stmt_end <= bitpos)
3084 {
3085 /* BITPOS passed to this function never decreases from within the
3086 same split_group call, so optimize and don't scan info records
3087 which are known to end before or at BITPOS next time.
3088 Only do it if all stores before this one also pass this. */
3089 if (update_first)
3090 *first = i + 1;
3091 continue;
3092 }
3093 else
3094 update_first = false;
3095
f663d9ad 3096 /* The stores in GROUP are ordered by bitposition so if we're past
a62b3dc5
JJ
3097 the region for this group return early. */
3098 if (stmt_start >= end)
3099 return ret;
3100
5384a802
JJ
3101 if (gimple_clobber_p (info->stmt))
3102 {
3103 if (stores)
3104 stores->safe_push (info);
3105 if (ret == NULL)
3106 ret = info;
3107 continue;
3108 }
245f6de1 3109 if (stores)
a62b3dc5 3110 {
245f6de1 3111 stores->safe_push (info);
5384a802 3112 if (ret && !gimple_clobber_p (ret->stmt))
a62b3dc5
JJ
3113 {
3114 ret = NULL;
3115 second = true;
3116 }
3117 }
5384a802 3118 else if (ret && !gimple_clobber_p (ret->stmt))
a62b3dc5
JJ
3119 return NULL;
3120 if (!second)
3121 ret = info;
f663d9ad 3122 }
a62b3dc5 3123 return ret;
f663d9ad
KT
3124}
3125
d7a9512e
JJ
3126/* Return how many SSA_NAMEs used to compute value to store in the INFO
3127 store have multiple uses. If any SSA_NAME has multiple uses, also
3128 count statements needed to compute it. */
3129
3130static unsigned
3131count_multiple_uses (store_immediate_info *info)
3132{
3133 gimple *stmt = info->stmt;
3134 unsigned ret = 0;
3135 switch (info->rhs_code)
3136 {
3137 case INTEGER_CST:
3138 return 0;
3139 case BIT_AND_EXPR:
3140 case BIT_IOR_EXPR:
3141 case BIT_XOR_EXPR:
d60edaba
JJ
3142 if (info->bit_not_p)
3143 {
3144 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3145 ret = 1; /* Fall through below to return
3146 the BIT_NOT_EXPR stmt and then
3147 BIT_{AND,IOR,XOR}_EXPR and anything it
3148 uses. */
3149 else
3150 /* stmt is after this the BIT_NOT_EXPR. */
3151 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3152 }
d7a9512e
JJ
3153 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3154 {
3155 ret += 1 + info->ops[0].bit_not_p;
3156 if (info->ops[1].base_addr)
3157 ret += 1 + info->ops[1].bit_not_p;
3158 return ret + 1;
3159 }
3160 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3161 /* stmt is now the BIT_*_EXPR. */
3162 if (!has_single_use (gimple_assign_rhs1 (stmt)))
127ef369
JJ
3163 ret += 1 + info->ops[info->ops_swapped_p].bit_not_p;
3164 else if (info->ops[info->ops_swapped_p].bit_not_p)
d7a9512e
JJ
3165 {
3166 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3167 if (!has_single_use (gimple_assign_rhs1 (stmt2)))
3168 ++ret;
3169 }
3170 if (info->ops[1].base_addr == NULL_TREE)
127ef369
JJ
3171 {
3172 gcc_checking_assert (!info->ops_swapped_p);
3173 return ret;
3174 }
d7a9512e 3175 if (!has_single_use (gimple_assign_rhs2 (stmt)))
127ef369
JJ
3176 ret += 1 + info->ops[1 - info->ops_swapped_p].bit_not_p;
3177 else if (info->ops[1 - info->ops_swapped_p].bit_not_p)
d7a9512e
JJ
3178 {
3179 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3180 if (!has_single_use (gimple_assign_rhs1 (stmt2)))
3181 ++ret;
3182 }
3183 return ret;
3184 case MEM_REF:
3185 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3186 return 1 + info->ops[0].bit_not_p;
3187 else if (info->ops[0].bit_not_p)
3188 {
3189 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3190 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3191 return 1;
3192 }
3193 return 0;
c94c3532
EB
3194 case BIT_INSERT_EXPR:
3195 return has_single_use (gimple_assign_rhs1 (stmt)) ? 0 : 1;
d7a9512e
JJ
3196 default:
3197 gcc_unreachable ();
3198 }
3199}
3200
f663d9ad 3201/* Split a merged store described by GROUP by populating the SPLIT_STORES
a62b3dc5
JJ
3202 vector (if non-NULL) with split_store structs describing the byte offset
3203 (from the base), the bit size and alignment of each store as well as the
3204 original statements involved in each such split group.
f663d9ad
KT
3205 This is to separate the splitting strategy from the statement
3206 building/emission/linking done in output_merged_store.
a62b3dc5 3207 Return number of new stores.
245f6de1
JJ
3208 If ALLOW_UNALIGNED_STORE is false, then all stores must be aligned.
3209 If ALLOW_UNALIGNED_LOAD is false, then all loads must be aligned.
3afd514b
JJ
3210 BZERO_FIRST may be true only when the first store covers the whole group
3211 and clears it; if BZERO_FIRST is true, keep that first store in the set
3212 unmodified and emit further stores for the overrides only.
a62b3dc5
JJ
3213 If SPLIT_STORES is NULL, it is just a dry run to count number of
3214 new stores. */
f663d9ad 3215
a62b3dc5 3216static unsigned int
245f6de1 3217split_group (merged_store_group *group, bool allow_unaligned_store,
3afd514b 3218 bool allow_unaligned_load, bool bzero_first,
99b1c316 3219 vec<split_store *> *split_stores,
d7a9512e
JJ
3220 unsigned *total_orig,
3221 unsigned *total_new)
f663d9ad 3222{
a62b3dc5
JJ
3223 unsigned HOST_WIDE_INT pos = group->bitregion_start;
3224 unsigned HOST_WIDE_INT size = group->bitregion_end - pos;
f663d9ad 3225 unsigned HOST_WIDE_INT bytepos = pos / BITS_PER_UNIT;
a62b3dc5
JJ
3226 unsigned HOST_WIDE_INT group_align = group->align;
3227 unsigned HOST_WIDE_INT align_base = group->align_base;
245f6de1 3228 unsigned HOST_WIDE_INT group_load_align = group_align;
d7a9512e 3229 bool any_orig = false;
f663d9ad 3230
f663d9ad
KT
3231 gcc_assert ((size % BITS_PER_UNIT == 0) && (pos % BITS_PER_UNIT == 0));
3232
4b84d9b8
JJ
3233 if (group->stores[0]->rhs_code == LROTATE_EXPR
3234 || group->stores[0]->rhs_code == NOP_EXPR)
3235 {
3afd514b 3236 gcc_assert (!bzero_first);
4b84d9b8
JJ
3237 /* For bswap framework using sets of stores, all the checking
3238 has been done earlier in try_coalesce_bswap and needs to be
3239 emitted as a single store. */
3240 if (total_orig)
3241 {
3242 /* Avoid the old/new stmt count heuristics. It should be
3243 always beneficial. */
3244 total_new[0] = 1;
3245 total_orig[0] = 2;
3246 }
3247
3248 if (split_stores)
3249 {
3250 unsigned HOST_WIDE_INT align_bitpos
3251 = (group->start - align_base) & (group_align - 1);
3252 unsigned HOST_WIDE_INT align = group_align;
3253 if (align_bitpos)
3254 align = least_bit_hwi (align_bitpos);
3255 bytepos = group->start / BITS_PER_UNIT;
99b1c316 3256 split_store *store
4b84d9b8
JJ
3257 = new split_store (bytepos, group->width, align);
3258 unsigned int first = 0;
3259 find_constituent_stores (group, &store->orig_stores,
3260 &first, group->start, group->width);
3261 split_stores->safe_push (store);
3262 }
3263
3264 return 1;
3265 }
3266
a62b3dc5 3267 unsigned int ret = 0, first = 0;
f663d9ad 3268 unsigned HOST_WIDE_INT try_pos = bytepos;
f663d9ad 3269
d7a9512e
JJ
3270 if (total_orig)
3271 {
3272 unsigned int i;
3273 store_immediate_info *info = group->stores[0];
3274
3275 total_new[0] = 0;
3276 total_orig[0] = 1; /* The orig store. */
3277 info = group->stores[0];
3278 if (info->ops[0].base_addr)
a6fbd154 3279 total_orig[0]++;
d7a9512e 3280 if (info->ops[1].base_addr)
a6fbd154 3281 total_orig[0]++;
d7a9512e
JJ
3282 switch (info->rhs_code)
3283 {
3284 case BIT_AND_EXPR:
3285 case BIT_IOR_EXPR:
3286 case BIT_XOR_EXPR:
3287 total_orig[0]++; /* The orig BIT_*_EXPR stmt. */
3288 break;
3289 default:
3290 break;
3291 }
3292 total_orig[0] *= group->stores.length ();
3293
3294 FOR_EACH_VEC_ELT (group->stores, i, info)
a6fbd154
JJ
3295 {
3296 total_new[0] += count_multiple_uses (info);
3297 total_orig[0] += (info->bit_not_p
3298 + info->ops[0].bit_not_p
3299 + info->ops[1].bit_not_p);
3300 }
d7a9512e
JJ
3301 }
3302
245f6de1
JJ
3303 if (!allow_unaligned_load)
3304 for (int i = 0; i < 2; ++i)
3305 if (group->load_align[i])
3306 group_load_align = MIN (group_load_align, group->load_align[i]);
3307
3afd514b
JJ
3308 if (bzero_first)
3309 {
5384a802
JJ
3310 store_immediate_info *gstore;
3311 FOR_EACH_VEC_ELT (group->stores, first, gstore)
3312 if (!gimple_clobber_p (gstore->stmt))
3313 break;
3314 ++first;
3afd514b
JJ
3315 ret = 1;
3316 if (split_stores)
3317 {
99b1c316 3318 split_store *store
5384a802
JJ
3319 = new split_store (bytepos, gstore->bitsize, align_base);
3320 store->orig_stores.safe_push (gstore);
3afd514b
JJ
3321 store->orig = true;
3322 any_orig = true;
3323 split_stores->safe_push (store);
3324 }
3325 }
3326
f663d9ad
KT
3327 while (size > 0)
3328 {
245f6de1 3329 if ((allow_unaligned_store || group_align <= BITS_PER_UNIT)
3afd514b
JJ
3330 && (group->mask[try_pos - bytepos] == (unsigned char) ~0U
3331 || (bzero_first && group->val[try_pos - bytepos] == 0)))
a62b3dc5
JJ
3332 {
3333 /* Skip padding bytes. */
3334 ++try_pos;
3335 size -= BITS_PER_UNIT;
3336 continue;
3337 }
3338
f663d9ad 3339 unsigned HOST_WIDE_INT try_bitpos = try_pos * BITS_PER_UNIT;
a62b3dc5
JJ
3340 unsigned int try_size = MAX_STORE_BITSIZE, nonmasked;
3341 unsigned HOST_WIDE_INT align_bitpos
3342 = (try_bitpos - align_base) & (group_align - 1);
3343 unsigned HOST_WIDE_INT align = group_align;
5384a802 3344 bool found_orig = false;
a62b3dc5
JJ
3345 if (align_bitpos)
3346 align = least_bit_hwi (align_bitpos);
245f6de1 3347 if (!allow_unaligned_store)
a62b3dc5 3348 try_size = MIN (try_size, align);
245f6de1
JJ
3349 if (!allow_unaligned_load)
3350 {
3351 /* If we can't do or don't want to do unaligned stores
3352 as well as loads, we need to take the loads into account
3353 as well. */
3354 unsigned HOST_WIDE_INT load_align = group_load_align;
3355 align_bitpos = (try_bitpos - align_base) & (load_align - 1);
3356 if (align_bitpos)
3357 load_align = least_bit_hwi (align_bitpos);
3358 for (int i = 0; i < 2; ++i)
3359 if (group->load_align[i])
3360 {
8a91d545
RS
3361 align_bitpos
3362 = known_alignment (try_bitpos
3363 - group->stores[0]->bitpos
3364 + group->stores[0]->ops[i].bitpos
3365 - group->load_align_base[i]);
3366 if (align_bitpos & (group_load_align - 1))
245f6de1
JJ
3367 {
3368 unsigned HOST_WIDE_INT a = least_bit_hwi (align_bitpos);
3369 load_align = MIN (load_align, a);
3370 }
3371 }
3372 try_size = MIN (try_size, load_align);
3373 }
a62b3dc5 3374 store_immediate_info *info
245f6de1 3375 = find_constituent_stores (group, NULL, &first, try_bitpos, try_size);
5384a802 3376 if (info && !gimple_clobber_p (info->stmt))
a62b3dc5
JJ
3377 {
3378 /* If there is just one original statement for the range, see if
3379 we can just reuse the original store which could be even larger
3380 than try_size. */
3381 unsigned HOST_WIDE_INT stmt_end
3382 = ROUND_UP (info->bitpos + info->bitsize, BITS_PER_UNIT);
245f6de1
JJ
3383 info = find_constituent_stores (group, NULL, &first, try_bitpos,
3384 stmt_end - try_bitpos);
a62b3dc5
JJ
3385 if (info && info->bitpos >= try_bitpos)
3386 {
5384a802
JJ
3387 store_immediate_info *info2 = NULL;
3388 unsigned int first_copy = first;
3389 if (info->bitpos > try_bitpos
3390 && stmt_end - try_bitpos <= try_size)
3391 {
3392 info2 = find_constituent_stores (group, NULL, &first_copy,
3393 try_bitpos,
3394 info->bitpos - try_bitpos);
3395 gcc_assert (info2 == NULL || gimple_clobber_p (info2->stmt));
3396 }
3397 if (info2 == NULL && stmt_end - try_bitpos < try_size)
3398 {
3399 info2 = find_constituent_stores (group, NULL, &first_copy,
3400 stmt_end,
3401 (try_bitpos + try_size)
3402 - stmt_end);
3403 gcc_assert (info2 == NULL || gimple_clobber_p (info2->stmt));
3404 }
3405 if (info2 == NULL)
3406 {
3407 try_size = stmt_end - try_bitpos;
3408 found_orig = true;
3409 goto found;
3410 }
a62b3dc5
JJ
3411 }
3412 }
f663d9ad 3413
a62b3dc5
JJ
3414 /* Approximate store bitsize for the case when there are no padding
3415 bits. */
3416 while (try_size > size)
3417 try_size /= 2;
3418 /* Now look for whole padding bytes at the end of that bitsize. */
3419 for (nonmasked = try_size / BITS_PER_UNIT; nonmasked > 0; --nonmasked)
3420 if (group->mask[try_pos - bytepos + nonmasked - 1]
3afd514b
JJ
3421 != (unsigned char) ~0U
3422 && (!bzero_first
3423 || group->val[try_pos - bytepos + nonmasked - 1] != 0))
a62b3dc5 3424 break;
5384a802 3425 if (nonmasked == 0 || (info && gimple_clobber_p (info->stmt)))
a62b3dc5
JJ
3426 {
3427 /* If entire try_size range is padding, skip it. */
3428 try_pos += try_size / BITS_PER_UNIT;
3429 size -= try_size;
3430 continue;
3431 }
3432 /* Otherwise try to decrease try_size if second half, last 3 quarters
3433 etc. are padding. */
3434 nonmasked *= BITS_PER_UNIT;
3435 while (nonmasked <= try_size / 2)
3436 try_size /= 2;
245f6de1 3437 if (!allow_unaligned_store && group_align > BITS_PER_UNIT)
a62b3dc5
JJ
3438 {
3439 /* Now look for whole padding bytes at the start of that bitsize. */
3440 unsigned int try_bytesize = try_size / BITS_PER_UNIT, masked;
3441 for (masked = 0; masked < try_bytesize; ++masked)
3afd514b
JJ
3442 if (group->mask[try_pos - bytepos + masked] != (unsigned char) ~0U
3443 && (!bzero_first
3444 || group->val[try_pos - bytepos + masked] != 0))
a62b3dc5
JJ
3445 break;
3446 masked *= BITS_PER_UNIT;
3447 gcc_assert (masked < try_size);
3448 if (masked >= try_size / 2)
3449 {
3450 while (masked >= try_size / 2)
3451 {
3452 try_size /= 2;
3453 try_pos += try_size / BITS_PER_UNIT;
3454 size -= try_size;
3455 masked -= try_size;
3456 }
3457 /* Need to recompute the alignment, so just retry at the new
3458 position. */
3459 continue;
3460 }
3461 }
3462
3463 found:
3464 ++ret;
f663d9ad 3465
a62b3dc5
JJ
3466 if (split_stores)
3467 {
99b1c316 3468 split_store *store
a62b3dc5 3469 = new split_store (try_pos, try_size, align);
245f6de1
JJ
3470 info = find_constituent_stores (group, &store->orig_stores,
3471 &first, try_bitpos, try_size);
a62b3dc5 3472 if (info
5384a802 3473 && !gimple_clobber_p (info->stmt)
a62b3dc5 3474 && info->bitpos >= try_bitpos
5384a802
JJ
3475 && info->bitpos + info->bitsize <= try_bitpos + try_size
3476 && (store->orig_stores.length () == 1
3477 || found_orig
3478 || (info->bitpos == try_bitpos
3479 && (info->bitpos + info->bitsize
3480 == try_bitpos + try_size))))
d7a9512e
JJ
3481 {
3482 store->orig = true;
3483 any_orig = true;
3484 }
a62b3dc5
JJ
3485 split_stores->safe_push (store);
3486 }
3487
3488 try_pos += try_size / BITS_PER_UNIT;
f663d9ad 3489 size -= try_size;
f663d9ad 3490 }
a62b3dc5 3491
d7a9512e
JJ
3492 if (total_orig)
3493 {
a6fbd154 3494 unsigned int i;
99b1c316 3495 split_store *store;
d7a9512e
JJ
3496 /* If we are reusing some original stores and any of the
3497 original SSA_NAMEs had multiple uses, we need to subtract
3498 those now before we add the new ones. */
3499 if (total_new[0] && any_orig)
3500 {
d7a9512e
JJ
3501 FOR_EACH_VEC_ELT (*split_stores, i, store)
3502 if (store->orig)
3503 total_new[0] -= count_multiple_uses (store->orig_stores[0]);
3504 }
3505 total_new[0] += ret; /* The new store. */
3506 store_immediate_info *info = group->stores[0];
3507 if (info->ops[0].base_addr)
a6fbd154 3508 total_new[0] += ret;
d7a9512e 3509 if (info->ops[1].base_addr)
a6fbd154 3510 total_new[0] += ret;
d7a9512e
JJ
3511 switch (info->rhs_code)
3512 {
3513 case BIT_AND_EXPR:
3514 case BIT_IOR_EXPR:
3515 case BIT_XOR_EXPR:
3516 total_new[0] += ret; /* The new BIT_*_EXPR stmt. */
3517 break;
3518 default:
3519 break;
3520 }
a6fbd154
JJ
3521 FOR_EACH_VEC_ELT (*split_stores, i, store)
3522 {
3523 unsigned int j;
3524 bool bit_not_p[3] = { false, false, false };
3525 /* If all orig_stores have certain bit_not_p set, then
3526 we'd use a BIT_NOT_EXPR stmt and need to account for it.
3527 If some orig_stores have certain bit_not_p set, then
3528 we'd use a BIT_XOR_EXPR with a mask and need to account for
3529 it. */
3530 FOR_EACH_VEC_ELT (store->orig_stores, j, info)
3531 {
3532 if (info->ops[0].bit_not_p)
3533 bit_not_p[0] = true;
3534 if (info->ops[1].bit_not_p)
3535 bit_not_p[1] = true;
3536 if (info->bit_not_p)
3537 bit_not_p[2] = true;
3538 }
3539 total_new[0] += bit_not_p[0] + bit_not_p[1] + bit_not_p[2];
3540 }
3541
d7a9512e
JJ
3542 }
3543
a62b3dc5 3544 return ret;
f663d9ad
KT
3545}
3546
a6fbd154
JJ
3547/* Return the operation through which the operand IDX (if < 2) or
3548 result (IDX == 2) should be inverted. If NOP_EXPR, no inversion
3549 is done, if BIT_NOT_EXPR, all bits are inverted, if BIT_XOR_EXPR,
3550 the bits should be xored with mask. */
3551
3552static enum tree_code
3553invert_op (split_store *split_store, int idx, tree int_type, tree &mask)
3554{
3555 unsigned int i;
3556 store_immediate_info *info;
3557 unsigned int cnt = 0;
e215422f 3558 bool any_paddings = false;
a6fbd154
JJ
3559 FOR_EACH_VEC_ELT (split_store->orig_stores, i, info)
3560 {
3561 bool bit_not_p = idx < 2 ? info->ops[idx].bit_not_p : info->bit_not_p;
3562 if (bit_not_p)
e215422f
JJ
3563 {
3564 ++cnt;
3565 tree lhs = gimple_assign_lhs (info->stmt);
3566 if (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3567 && TYPE_PRECISION (TREE_TYPE (lhs)) < info->bitsize)
3568 any_paddings = true;
3569 }
a6fbd154
JJ
3570 }
3571 mask = NULL_TREE;
3572 if (cnt == 0)
3573 return NOP_EXPR;
e215422f 3574 if (cnt == split_store->orig_stores.length () && !any_paddings)
a6fbd154
JJ
3575 return BIT_NOT_EXPR;
3576
3577 unsigned HOST_WIDE_INT try_bitpos = split_store->bytepos * BITS_PER_UNIT;
3578 unsigned buf_size = split_store->size / BITS_PER_UNIT;
3579 unsigned char *buf
3580 = XALLOCAVEC (unsigned char, buf_size);
3581 memset (buf, ~0U, buf_size);
3582 FOR_EACH_VEC_ELT (split_store->orig_stores, i, info)
3583 {
3584 bool bit_not_p = idx < 2 ? info->ops[idx].bit_not_p : info->bit_not_p;
3585 if (!bit_not_p)
3586 continue;
3587 /* Clear regions with bit_not_p and invert afterwards, rather than
3588 clear regions with !bit_not_p, so that gaps in between stores aren't
3589 set in the mask. */
3590 unsigned HOST_WIDE_INT bitsize = info->bitsize;
e215422f 3591 unsigned HOST_WIDE_INT prec = bitsize;
a6fbd154 3592 unsigned int pos_in_buffer = 0;
e215422f
JJ
3593 if (any_paddings)
3594 {
3595 tree lhs = gimple_assign_lhs (info->stmt);
3596 if (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3597 && TYPE_PRECISION (TREE_TYPE (lhs)) < bitsize)
3598 prec = TYPE_PRECISION (TREE_TYPE (lhs));
3599 }
a6fbd154
JJ
3600 if (info->bitpos < try_bitpos)
3601 {
3602 gcc_assert (info->bitpos + bitsize > try_bitpos);
e215422f
JJ
3603 if (!BYTES_BIG_ENDIAN)
3604 {
3605 if (prec <= try_bitpos - info->bitpos)
3606 continue;
3607 prec -= try_bitpos - info->bitpos;
3608 }
3609 bitsize -= try_bitpos - info->bitpos;
3610 if (BYTES_BIG_ENDIAN && prec > bitsize)
3611 prec = bitsize;
a6fbd154
JJ
3612 }
3613 else
3614 pos_in_buffer = info->bitpos - try_bitpos;
e215422f
JJ
3615 if (prec < bitsize)
3616 {
3617 /* If this is a bool inversion, invert just the least significant
3618 prec bits rather than all bits of it. */
3619 if (BYTES_BIG_ENDIAN)
3620 {
3621 pos_in_buffer += bitsize - prec;
3622 if (pos_in_buffer >= split_store->size)
3623 continue;
3624 }
3625 bitsize = prec;
3626 }
a6fbd154
JJ
3627 if (pos_in_buffer + bitsize > split_store->size)
3628 bitsize = split_store->size - pos_in_buffer;
3629 unsigned char *p = buf + (pos_in_buffer / BITS_PER_UNIT);
3630 if (BYTES_BIG_ENDIAN)
3631 clear_bit_region_be (p, (BITS_PER_UNIT - 1
3632 - (pos_in_buffer % BITS_PER_UNIT)), bitsize);
3633 else
3634 clear_bit_region (p, pos_in_buffer % BITS_PER_UNIT, bitsize);
3635 }
3636 for (unsigned int i = 0; i < buf_size; ++i)
3637 buf[i] = ~buf[i];
3638 mask = native_interpret_expr (int_type, buf, buf_size);
3639 return BIT_XOR_EXPR;
3640}
3641
f663d9ad
KT
3642/* Given a merged store group GROUP output the widened version of it.
3643 The store chain is against the base object BASE.
3644 Try store sizes of at most MAX_STORE_BITSIZE bits wide and don't output
3645 unaligned stores for STRICT_ALIGNMENT targets or if it's too expensive.
3646 Make sure that the number of statements output is less than the number of
3647 original statements. If a better sequence is possible emit it and
3648 return true. */
3649
3650bool
b5926e23 3651imm_store_chain_info::output_merged_store (merged_store_group *group)
f663d9ad 3652{
dd172744
RB
3653 split_store *split_store;
3654 unsigned int i;
a62b3dc5
JJ
3655 unsigned HOST_WIDE_INT start_byte_pos
3656 = group->bitregion_start / BITS_PER_UNIT;
f663d9ad
KT
3657
3658 unsigned int orig_num_stmts = group->stores.length ();
3659 if (orig_num_stmts < 2)
3660 return false;
3661
99b1c316 3662 auto_vec<class split_store *, 32> split_stores;
245f6de1 3663 bool allow_unaligned_store
028d4092 3664 = !STRICT_ALIGNMENT && param_store_merging_allow_unaligned;
245f6de1 3665 bool allow_unaligned_load = allow_unaligned_store;
3afd514b 3666 bool bzero_first = false;
5384a802
JJ
3667 store_immediate_info *store;
3668 unsigned int num_clobber_stmts = 0;
3669 if (group->stores[0]->rhs_code == INTEGER_CST)
3670 {
3671 FOR_EACH_VEC_ELT (group->stores, i, store)
3672 if (gimple_clobber_p (store->stmt))
3673 num_clobber_stmts++;
3674 else if (TREE_CODE (gimple_assign_rhs1 (store->stmt)) == CONSTRUCTOR
3675 && CONSTRUCTOR_NELTS (gimple_assign_rhs1 (store->stmt)) == 0
3676 && group->start == store->bitpos
3677 && group->width == store->bitsize
3678 && (group->start % BITS_PER_UNIT) == 0
3679 && (group->width % BITS_PER_UNIT) == 0)
3680 {
3681 bzero_first = true;
3682 break;
3683 }
3684 else
3685 break;
3686 FOR_EACH_VEC_ELT_FROM (group->stores, i, store, i)
3687 if (gimple_clobber_p (store->stmt))
3688 num_clobber_stmts++;
3689 if (num_clobber_stmts == orig_num_stmts)
3690 return false;
3691 orig_num_stmts -= num_clobber_stmts;
3692 }
3afd514b 3693 if (allow_unaligned_store || bzero_first)
a62b3dc5
JJ
3694 {
3695 /* If unaligned stores are allowed, see how many stores we'd emit
3696 for unaligned and how many stores we'd emit for aligned stores.
3afd514b
JJ
3697 Only use unaligned stores if it allows fewer stores than aligned.
3698 Similarly, if there is a whole region clear first, prefer expanding
3699 it together compared to expanding clear first followed by merged
3700 further stores. */
3701 unsigned cnt[4] = { ~0, ~0, ~0, ~0 };
3702 int pass_min = 0;
3703 for (int pass = 0; pass < 4; ++pass)
3704 {
3705 if (!allow_unaligned_store && (pass & 1) != 0)
3706 continue;
3707 if (!bzero_first && (pass & 2) != 0)
3708 continue;
3709 cnt[pass] = split_group (group, (pass & 1) != 0,
3710 allow_unaligned_load, (pass & 2) != 0,
3711 NULL, NULL, NULL);
3712 if (cnt[pass] < cnt[pass_min])
3713 pass_min = pass;
3714 }
3715 if ((pass_min & 1) == 0)
245f6de1 3716 allow_unaligned_store = false;
3afd514b
JJ
3717 if ((pass_min & 2) == 0)
3718 bzero_first = false;
a62b3dc5 3719 }
d7a9512e 3720 unsigned total_orig, total_new;
3afd514b 3721 split_group (group, allow_unaligned_store, allow_unaligned_load, bzero_first,
d7a9512e 3722 &split_stores, &total_orig, &total_new);
a62b3dc5 3723
5384a802
JJ
3724 /* Determine if there is a clobber covering the whole group at the start,
3725 followed by proposed split stores that cover the whole group. In that
3726 case, prefer the transformation even if
3727 split_stores.length () == orig_num_stmts. */
3728 bool clobber_first = false;
3729 if (num_clobber_stmts
3730 && gimple_clobber_p (group->stores[0]->stmt)
3731 && group->start == group->stores[0]->bitpos
3732 && group->width == group->stores[0]->bitsize
3733 && (group->start % BITS_PER_UNIT) == 0
3734 && (group->width % BITS_PER_UNIT) == 0)
3735 {
3736 clobber_first = true;
3737 unsigned HOST_WIDE_INT pos = group->start / BITS_PER_UNIT;
3738 FOR_EACH_VEC_ELT (split_stores, i, split_store)
3739 if (split_store->bytepos != pos)
3740 {
3741 clobber_first = false;
3742 break;
3743 }
3744 else
3745 pos += split_store->size / BITS_PER_UNIT;
3746 if (pos != (group->start + group->width) / BITS_PER_UNIT)
3747 clobber_first = false;
3748 }
3749
3750 if (split_stores.length () >= orig_num_stmts + clobber_first)
a62b3dc5 3751 {
5384a802 3752
a62b3dc5
JJ
3753 /* We didn't manage to reduce the number of statements. Bail out. */
3754 if (dump_file && (dump_flags & TDF_DETAILS))
d7a9512e
JJ
3755 fprintf (dump_file, "Exceeded original number of stmts (%u)."
3756 " Not profitable to emit new sequence.\n",
3757 orig_num_stmts);
dd172744
RB
3758 FOR_EACH_VEC_ELT (split_stores, i, split_store)
3759 delete split_store;
a62b3dc5
JJ
3760 return false;
3761 }
d7a9512e
JJ
3762 if (total_orig <= total_new)
3763 {
3764 /* If number of estimated new statements is above estimated original
3765 statements, bail out too. */
3766 if (dump_file && (dump_flags & TDF_DETAILS))
3767 fprintf (dump_file, "Estimated number of original stmts (%u)"
3768 " not larger than estimated number of new"
3769 " stmts (%u).\n",
3770 total_orig, total_new);
dd172744
RB
3771 FOR_EACH_VEC_ELT (split_stores, i, split_store)
3772 delete split_store;
4b84d9b8 3773 return false;
d7a9512e 3774 }
5384a802
JJ
3775 if (group->stores[0]->rhs_code == INTEGER_CST)
3776 {
3777 bool all_orig = true;
3778 FOR_EACH_VEC_ELT (split_stores, i, split_store)
3779 if (!split_store->orig)
3780 {
3781 all_orig = false;
3782 break;
3783 }
3784 if (all_orig)
3785 {
3786 unsigned int cnt = split_stores.length ();
3787 store_immediate_info *store;
3788 FOR_EACH_VEC_ELT (group->stores, i, store)
3789 if (gimple_clobber_p (store->stmt))
3790 ++cnt;
3791 /* Punt if we wouldn't make any real changes, i.e. keep all
3792 orig stmts + all clobbers. */
3793 if (cnt == group->stores.length ())
3794 {
3795 if (dump_file && (dump_flags & TDF_DETAILS))
3796 fprintf (dump_file, "Exceeded original number of stmts (%u)."
3797 " Not profitable to emit new sequence.\n",
3798 orig_num_stmts);
3799 FOR_EACH_VEC_ELT (split_stores, i, split_store)
3800 delete split_store;
3801 return false;
3802 }
3803 }
3804 }
f663d9ad
KT
3805
3806 gimple_stmt_iterator last_gsi = gsi_for_stmt (group->last_stmt);
3807 gimple_seq seq = NULL;
f663d9ad
KT
3808 tree last_vdef, new_vuse;
3809 last_vdef = gimple_vdef (group->last_stmt);
3810 new_vuse = gimple_vuse (group->last_stmt);
4b84d9b8
JJ
3811 tree bswap_res = NULL_TREE;
3812
5384a802
JJ
3813 /* Clobbers are not removed. */
3814 if (gimple_clobber_p (group->last_stmt))
3815 {
3816 new_vuse = make_ssa_name (gimple_vop (cfun), group->last_stmt);
3817 gimple_set_vdef (group->last_stmt, new_vuse);
3818 }
3819
4b84d9b8
JJ
3820 if (group->stores[0]->rhs_code == LROTATE_EXPR
3821 || group->stores[0]->rhs_code == NOP_EXPR)
3822 {
3823 tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
3824 gimple *ins_stmt = group->stores[0]->ins_stmt;
3825 struct symbolic_number *n = &group->stores[0]->n;
3826 bool bswap = group->stores[0]->rhs_code == LROTATE_EXPR;
3827
3828 switch (n->range)
3829 {
3830 case 16:
3831 load_type = bswap_type = uint16_type_node;
3832 break;
3833 case 32:
3834 load_type = uint32_type_node;
3835 if (bswap)
3836 {
3837 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
3838 bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
3839 }
3840 break;
3841 case 64:
3842 load_type = uint64_type_node;
3843 if (bswap)
3844 {
3845 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
3846 bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
3847 }
3848 break;
3849 default:
3850 gcc_unreachable ();
3851 }
3852
3853 /* If the loads have each vuse of the corresponding store,
3854 we've checked the aliasing already in try_coalesce_bswap and
3855 we want to sink the need load into seq. So need to use new_vuse
3856 on the load. */
30fa8e9c 3857 if (n->base_addr)
4b84d9b8 3858 {
30fa8e9c
JJ
3859 if (n->vuse == NULL)
3860 {
3861 n->vuse = new_vuse;
3862 ins_stmt = NULL;
3863 }
3864 else
3865 /* Update vuse in case it has changed by output_merged_stores. */
3866 n->vuse = gimple_vuse (ins_stmt);
4b84d9b8
JJ
3867 }
3868 bswap_res = bswap_replace (gsi_start (seq), ins_stmt, fndecl,
3869 bswap_type, load_type, n, bswap);
3870 gcc_assert (bswap_res);
3871 }
f663d9ad
KT
3872
3873 gimple *stmt = NULL;
245f6de1 3874 auto_vec<gimple *, 32> orig_stmts;
4b84d9b8
JJ
3875 gimple_seq this_seq;
3876 tree addr = force_gimple_operand_1 (unshare_expr (base_addr), &this_seq,
aa55dc0c 3877 is_gimple_mem_ref_addr, NULL_TREE);
4b84d9b8 3878 gimple_seq_add_seq_without_update (&seq, this_seq);
245f6de1
JJ
3879
3880 tree load_addr[2] = { NULL_TREE, NULL_TREE };
3881 gimple_seq load_seq[2] = { NULL, NULL };
3882 gimple_stmt_iterator load_gsi[2] = { gsi_none (), gsi_none () };
3883 for (int j = 0; j < 2; ++j)
3884 {
3885 store_operand_info &op = group->stores[0]->ops[j];
3886 if (op.base_addr == NULL_TREE)
3887 continue;
3888
3889 store_immediate_info *infol = group->stores.last ();
3890 if (gimple_vuse (op.stmt) == gimple_vuse (infol->ops[j].stmt))
3891 {
97031af7
JJ
3892 /* We can't pick the location randomly; while we've verified
3893 all the loads have the same vuse, they can be still in different
3894 basic blocks and we need to pick the one from the last bb:
3895 int x = q[0];
3896 if (x == N) return;
3897 int y = q[1];
3898 p[0] = x;
3899 p[1] = y;
3900 otherwise if we put the wider load at the q[0] load, we might
3901 segfault if q[1] is not mapped. */
3902 basic_block bb = gimple_bb (op.stmt);
3903 gimple *ostmt = op.stmt;
3904 store_immediate_info *info;
3905 FOR_EACH_VEC_ELT (group->stores, i, info)
3906 {
3907 gimple *tstmt = info->ops[j].stmt;
3908 basic_block tbb = gimple_bb (tstmt);
3909 if (dominated_by_p (CDI_DOMINATORS, tbb, bb))
3910 {
3911 ostmt = tstmt;
3912 bb = tbb;
3913 }
3914 }
3915 load_gsi[j] = gsi_for_stmt (ostmt);
245f6de1
JJ
3916 load_addr[j]
3917 = force_gimple_operand_1 (unshare_expr (op.base_addr),
3918 &load_seq[j], is_gimple_mem_ref_addr,
3919 NULL_TREE);
3920 }
3921 else if (operand_equal_p (base_addr, op.base_addr, 0))
3922 load_addr[j] = addr;
3923 else
3e2927a1 3924 {
3e2927a1
JJ
3925 load_addr[j]
3926 = force_gimple_operand_1 (unshare_expr (op.base_addr),
3927 &this_seq, is_gimple_mem_ref_addr,
3928 NULL_TREE);
3929 gimple_seq_add_seq_without_update (&seq, this_seq);
3930 }
245f6de1
JJ
3931 }
3932
f663d9ad
KT
3933 FOR_EACH_VEC_ELT (split_stores, i, split_store)
3934 {
3935 unsigned HOST_WIDE_INT try_size = split_store->size;
3936 unsigned HOST_WIDE_INT try_pos = split_store->bytepos;
c94c3532 3937 unsigned HOST_WIDE_INT try_bitpos = try_pos * BITS_PER_UNIT;
f663d9ad 3938 unsigned HOST_WIDE_INT align = split_store->align;
a62b3dc5
JJ
3939 tree dest, src;
3940 location_t loc;
3941 if (split_store->orig)
3942 {
5384a802
JJ
3943 /* If there is just a single non-clobber constituent store
3944 which covers the whole area, just reuse the lhs and rhs. */
3945 gimple *orig_stmt = NULL;
3946 store_immediate_info *store;
3947 unsigned int j;
3948 FOR_EACH_VEC_ELT (split_store->orig_stores, j, store)
3949 if (!gimple_clobber_p (store->stmt))
3950 {
3951 orig_stmt = store->stmt;
3952 break;
3953 }
245f6de1
JJ
3954 dest = gimple_assign_lhs (orig_stmt);
3955 src = gimple_assign_rhs1 (orig_stmt);
3956 loc = gimple_location (orig_stmt);
a62b3dc5
JJ
3957 }
3958 else
3959 {
245f6de1
JJ
3960 store_immediate_info *info;
3961 unsigned short clique, base;
3962 unsigned int k;
3963 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
3964 orig_stmts.safe_push (info->stmt);
a62b3dc5 3965 tree offset_type
245f6de1
JJ
3966 = get_alias_type_for_stmts (orig_stmts, false, &clique, &base);
3967 loc = get_location_for_stmts (orig_stmts);
3968 orig_stmts.truncate (0);
a62b3dc5
JJ
3969
3970 tree int_type = build_nonstandard_integer_type (try_size, UNSIGNED);
3971 int_type = build_aligned_type (int_type, align);
3972 dest = fold_build2 (MEM_REF, int_type, addr,
3973 build_int_cst (offset_type, try_pos));
245f6de1
JJ
3974 if (TREE_CODE (dest) == MEM_REF)
3975 {
3976 MR_DEPENDENCE_CLIQUE (dest) = clique;
3977 MR_DEPENDENCE_BASE (dest) = base;
3978 }
3979
c94c3532
EB
3980 tree mask;
3981 if (bswap_res)
3982 mask = integer_zero_node;
3983 else
4b84d9b8
JJ
3984 mask = native_interpret_expr (int_type,
3985 group->mask + try_pos
3986 - start_byte_pos,
3987 group->buf_size);
245f6de1
JJ
3988
3989 tree ops[2];
3990 for (int j = 0;
3991 j < 1 + (split_store->orig_stores[0]->ops[1].val != NULL_TREE);
3992 ++j)
3993 {
3994 store_operand_info &op = split_store->orig_stores[0]->ops[j];
4b84d9b8
JJ
3995 if (bswap_res)
3996 ops[j] = bswap_res;
3997 else if (op.base_addr)
245f6de1
JJ
3998 {
3999 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4000 orig_stmts.safe_push (info->ops[j].stmt);
4001
4002 offset_type = get_alias_type_for_stmts (orig_stmts, true,
4003 &clique, &base);
4004 location_t load_loc = get_location_for_stmts (orig_stmts);
4005 orig_stmts.truncate (0);
4006
4007 unsigned HOST_WIDE_INT load_align = group->load_align[j];
4008 unsigned HOST_WIDE_INT align_bitpos
c94c3532 4009 = known_alignment (try_bitpos
8a91d545
RS
4010 - split_store->orig_stores[0]->bitpos
4011 + op.bitpos);
4012 if (align_bitpos & (load_align - 1))
245f6de1
JJ
4013 load_align = least_bit_hwi (align_bitpos);
4014
4015 tree load_int_type
4016 = build_nonstandard_integer_type (try_size, UNSIGNED);
4017 load_int_type
4018 = build_aligned_type (load_int_type, load_align);
4019
8a91d545 4020 poly_uint64 load_pos
c94c3532 4021 = exact_div (try_bitpos
8a91d545
RS
4022 - split_store->orig_stores[0]->bitpos
4023 + op.bitpos,
4024 BITS_PER_UNIT);
245f6de1
JJ
4025 ops[j] = fold_build2 (MEM_REF, load_int_type, load_addr[j],
4026 build_int_cst (offset_type, load_pos));
4027 if (TREE_CODE (ops[j]) == MEM_REF)
4028 {
4029 MR_DEPENDENCE_CLIQUE (ops[j]) = clique;
4030 MR_DEPENDENCE_BASE (ops[j]) = base;
4031 }
4032 if (!integer_zerop (mask))
4033 /* The load might load some bits (that will be masked off
4034 later on) uninitialized, avoid -W*uninitialized
4035 warnings in that case. */
4036 TREE_NO_WARNING (ops[j]) = 1;
4037
4038 stmt = gimple_build_assign (make_ssa_name (int_type),
4039 ops[j]);
4040 gimple_set_location (stmt, load_loc);
4041 if (gsi_bb (load_gsi[j]))
4042 {
4043 gimple_set_vuse (stmt, gimple_vuse (op.stmt));
4044 gimple_seq_add_stmt_without_update (&load_seq[j], stmt);
4045 }
4046 else
4047 {
4048 gimple_set_vuse (stmt, new_vuse);
4049 gimple_seq_add_stmt_without_update (&seq, stmt);
4050 }
4051 ops[j] = gimple_assign_lhs (stmt);
a6fbd154
JJ
4052 tree xor_mask;
4053 enum tree_code inv_op
4054 = invert_op (split_store, j, int_type, xor_mask);
4055 if (inv_op != NOP_EXPR)
383ac8dc
JJ
4056 {
4057 stmt = gimple_build_assign (make_ssa_name (int_type),
a6fbd154 4058 inv_op, ops[j], xor_mask);
383ac8dc
JJ
4059 gimple_set_location (stmt, load_loc);
4060 ops[j] = gimple_assign_lhs (stmt);
4061
4062 if (gsi_bb (load_gsi[j]))
4063 gimple_seq_add_stmt_without_update (&load_seq[j],
4064 stmt);
4065 else
4066 gimple_seq_add_stmt_without_update (&seq, stmt);
4067 }
245f6de1
JJ
4068 }
4069 else
4070 ops[j] = native_interpret_expr (int_type,
4071 group->val + try_pos
4072 - start_byte_pos,
4073 group->buf_size);
4074 }
4075
4076 switch (split_store->orig_stores[0]->rhs_code)
4077 {
4078 case BIT_AND_EXPR:
4079 case BIT_IOR_EXPR:
4080 case BIT_XOR_EXPR:
4081 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4082 {
4083 tree rhs1 = gimple_assign_rhs1 (info->stmt);
4084 orig_stmts.safe_push (SSA_NAME_DEF_STMT (rhs1));
4085 }
4086 location_t bit_loc;
4087 bit_loc = get_location_for_stmts (orig_stmts);
4088 orig_stmts.truncate (0);
4089
4090 stmt
4091 = gimple_build_assign (make_ssa_name (int_type),
4092 split_store->orig_stores[0]->rhs_code,
4093 ops[0], ops[1]);
4094 gimple_set_location (stmt, bit_loc);
4095 /* If there is just one load and there is a separate
4096 load_seq[0], emit the bitwise op right after it. */
4097 if (load_addr[1] == NULL_TREE && gsi_bb (load_gsi[0]))
4098 gimple_seq_add_stmt_without_update (&load_seq[0], stmt);
4099 /* Otherwise, if at least one load is in seq, we need to
4100 emit the bitwise op right before the store. If there
4101 are two loads and are emitted somewhere else, it would
4102 be better to emit the bitwise op as early as possible;
4103 we don't track where that would be possible right now
4104 though. */
4105 else
4106 gimple_seq_add_stmt_without_update (&seq, stmt);
4107 src = gimple_assign_lhs (stmt);
a6fbd154
JJ
4108 tree xor_mask;
4109 enum tree_code inv_op;
4110 inv_op = invert_op (split_store, 2, int_type, xor_mask);
4111 if (inv_op != NOP_EXPR)
d60edaba
JJ
4112 {
4113 stmt = gimple_build_assign (make_ssa_name (int_type),
a6fbd154 4114 inv_op, src, xor_mask);
d60edaba
JJ
4115 gimple_set_location (stmt, bit_loc);
4116 if (load_addr[1] == NULL_TREE && gsi_bb (load_gsi[0]))
4117 gimple_seq_add_stmt_without_update (&load_seq[0], stmt);
4118 else
4119 gimple_seq_add_stmt_without_update (&seq, stmt);
4120 src = gimple_assign_lhs (stmt);
4121 }
245f6de1 4122 break;
4b84d9b8
JJ
4123 case LROTATE_EXPR:
4124 case NOP_EXPR:
4125 src = ops[0];
4126 if (!is_gimple_val (src))
4127 {
4128 stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (src)),
4129 src);
4130 gimple_seq_add_stmt_without_update (&seq, stmt);
4131 src = gimple_assign_lhs (stmt);
4132 }
4133 if (!useless_type_conversion_p (int_type, TREE_TYPE (src)))
4134 {
4135 stmt = gimple_build_assign (make_ssa_name (int_type),
4136 NOP_EXPR, src);
4137 gimple_seq_add_stmt_without_update (&seq, stmt);
4138 src = gimple_assign_lhs (stmt);
4139 }
be52ac73
JJ
4140 inv_op = invert_op (split_store, 2, int_type, xor_mask);
4141 if (inv_op != NOP_EXPR)
4142 {
4143 stmt = gimple_build_assign (make_ssa_name (int_type),
4144 inv_op, src, xor_mask);
4145 gimple_set_location (stmt, loc);
4146 gimple_seq_add_stmt_without_update (&seq, stmt);
4147 src = gimple_assign_lhs (stmt);
4148 }
4b84d9b8 4149 break;
245f6de1
JJ
4150 default:
4151 src = ops[0];
4152 break;
4153 }
4154
c94c3532
EB
4155 /* If bit insertion is required, we use the source as an accumulator
4156 into which the successive bit-field values are manually inserted.
4157 FIXME: perhaps use BIT_INSERT_EXPR instead in some cases? */
4158 if (group->bit_insertion)
4159 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4160 if (info->rhs_code == BIT_INSERT_EXPR
4161 && info->bitpos < try_bitpos + try_size
4162 && info->bitpos + info->bitsize > try_bitpos)
4163 {
4164 /* Mask, truncate, convert to final type, shift and ior into
4165 the accumulator. Note that every step can be a no-op. */
4166 const HOST_WIDE_INT start_gap = info->bitpos - try_bitpos;
4167 const HOST_WIDE_INT end_gap
4168 = (try_bitpos + try_size) - (info->bitpos + info->bitsize);
4169 tree tem = info->ops[0].val;
c14add82
EB
4170 if (TYPE_PRECISION (TREE_TYPE (tem)) <= info->bitsize)
4171 {
4172 tree bitfield_type
4173 = build_nonstandard_integer_type (info->bitsize,
4174 UNSIGNED);
4175 tem = gimple_convert (&seq, loc, bitfield_type, tem);
4176 }
4177 else if ((BYTES_BIG_ENDIAN ? start_gap : end_gap) > 0)
c94c3532
EB
4178 {
4179 const unsigned HOST_WIDE_INT imask
4180 = (HOST_WIDE_INT_1U << info->bitsize) - 1;
4181 tem = gimple_build (&seq, loc,
4182 BIT_AND_EXPR, TREE_TYPE (tem), tem,
4183 build_int_cst (TREE_TYPE (tem),
4184 imask));
4185 }
4186 const HOST_WIDE_INT shift
4187 = (BYTES_BIG_ENDIAN ? end_gap : start_gap);
4188 if (shift < 0)
4189 tem = gimple_build (&seq, loc,
4190 RSHIFT_EXPR, TREE_TYPE (tem), tem,
4191 build_int_cst (NULL_TREE, -shift));
4192 tem = gimple_convert (&seq, loc, int_type, tem);
4193 if (shift > 0)
4194 tem = gimple_build (&seq, loc,
4195 LSHIFT_EXPR, int_type, tem,
4196 build_int_cst (NULL_TREE, shift));
4197 src = gimple_build (&seq, loc,
4198 BIT_IOR_EXPR, int_type, tem, src);
4199 }
4200
a62b3dc5
JJ
4201 if (!integer_zerop (mask))
4202 {
4203 tree tem = make_ssa_name (int_type);
4204 tree load_src = unshare_expr (dest);
4205 /* The load might load some or all bits uninitialized,
4206 avoid -W*uninitialized warnings in that case.
4207 As optimization, it would be nice if all the bits are
4208 provably uninitialized (no stores at all yet or previous
4209 store a CLOBBER) we'd optimize away the load and replace
4210 it e.g. with 0. */
4211 TREE_NO_WARNING (load_src) = 1;
4212 stmt = gimple_build_assign (tem, load_src);
4213 gimple_set_location (stmt, loc);
4214 gimple_set_vuse (stmt, new_vuse);
4215 gimple_seq_add_stmt_without_update (&seq, stmt);
4216
4217 /* FIXME: If there is a single chunk of zero bits in mask,
4218 perhaps use BIT_INSERT_EXPR instead? */
4219 stmt = gimple_build_assign (make_ssa_name (int_type),
4220 BIT_AND_EXPR, tem, mask);
4221 gimple_set_location (stmt, loc);
4222 gimple_seq_add_stmt_without_update (&seq, stmt);
4223 tem = gimple_assign_lhs (stmt);
4224
245f6de1
JJ
4225 if (TREE_CODE (src) == INTEGER_CST)
4226 src = wide_int_to_tree (int_type,
4227 wi::bit_and_not (wi::to_wide (src),
4228 wi::to_wide (mask)));
4229 else
4230 {
4231 tree nmask
4232 = wide_int_to_tree (int_type,
4233 wi::bit_not (wi::to_wide (mask)));
4234 stmt = gimple_build_assign (make_ssa_name (int_type),
4235 BIT_AND_EXPR, src, nmask);
4236 gimple_set_location (stmt, loc);
4237 gimple_seq_add_stmt_without_update (&seq, stmt);
4238 src = gimple_assign_lhs (stmt);
4239 }
a62b3dc5
JJ
4240 stmt = gimple_build_assign (make_ssa_name (int_type),
4241 BIT_IOR_EXPR, tem, src);
4242 gimple_set_location (stmt, loc);
4243 gimple_seq_add_stmt_without_update (&seq, stmt);
4244 src = gimple_assign_lhs (stmt);
4245 }
4246 }
f663d9ad
KT
4247
4248 stmt = gimple_build_assign (dest, src);
4249 gimple_set_location (stmt, loc);
4250 gimple_set_vuse (stmt, new_vuse);
4251 gimple_seq_add_stmt_without_update (&seq, stmt);
4252
629387a6
EB
4253 if (group->lp_nr && stmt_could_throw_p (cfun, stmt))
4254 add_stmt_to_eh_lp (stmt, group->lp_nr);
4255
f663d9ad
KT
4256 tree new_vdef;
4257 if (i < split_stores.length () - 1)
a62b3dc5 4258 new_vdef = make_ssa_name (gimple_vop (cfun), stmt);
f663d9ad
KT
4259 else
4260 new_vdef = last_vdef;
4261
4262 gimple_set_vdef (stmt, new_vdef);
4263 SSA_NAME_DEF_STMT (new_vdef) = stmt;
4264 new_vuse = new_vdef;
4265 }
4266
4267 FOR_EACH_VEC_ELT (split_stores, i, split_store)
4268 delete split_store;
4269
f663d9ad
KT
4270 gcc_assert (seq);
4271 if (dump_file)
4272 {
4273 fprintf (dump_file,
c94c3532 4274 "New sequence of %u stores to replace old one of %u stores\n",
a62b3dc5 4275 split_stores.length (), orig_num_stmts);
f663d9ad
KT
4276 if (dump_flags & TDF_DETAILS)
4277 print_gimple_seq (dump_file, seq, 0, TDF_VOPS | TDF_MEMSYMS);
4278 }
629387a6 4279
5384a802
JJ
4280 if (gimple_clobber_p (group->last_stmt))
4281 update_stmt (group->last_stmt);
4282
629387a6
EB
4283 if (group->lp_nr > 0)
4284 {
4285 /* We're going to insert a sequence of (potentially) throwing stores
4286 into an active EH region. This means that we're going to create
4287 new basic blocks with EH edges pointing to the post landing pad
4288 and, therefore, to have to update its PHI nodes, if any. For the
4289 virtual PHI node, we're going to use the VDEFs created above, but
4290 for the other nodes, we need to record the original reaching defs. */
4291 eh_landing_pad lp = get_eh_landing_pad_from_number (group->lp_nr);
4292 basic_block lp_bb = label_to_block (cfun, lp->post_landing_pad);
4293 basic_block last_bb = gimple_bb (group->last_stmt);
4294 edge last_edge = find_edge (last_bb, lp_bb);
4295 auto_vec<tree, 16> last_defs;
4296 gphi_iterator gpi;
4297 for (gpi = gsi_start_phis (lp_bb); !gsi_end_p (gpi); gsi_next (&gpi))
4298 {
4299 gphi *phi = gpi.phi ();
4300 tree last_def;
4301 if (virtual_operand_p (gimple_phi_result (phi)))
4302 last_def = NULL_TREE;
4303 else
4304 last_def = gimple_phi_arg_def (phi, last_edge->dest_idx);
4305 last_defs.safe_push (last_def);
4306 }
4307
4308 /* Do the insertion. Then, if new basic blocks have been created in the
4309 process, rewind the chain of VDEFs create above to walk the new basic
4310 blocks and update the corresponding arguments of the PHI nodes. */
4311 update_modified_stmts (seq);
4312 if (gimple_find_sub_bbs (seq, &last_gsi))
4313 while (last_vdef != gimple_vuse (group->last_stmt))
4314 {
4315 gimple *stmt = SSA_NAME_DEF_STMT (last_vdef);
4316 if (stmt_could_throw_p (cfun, stmt))
4317 {
4318 edge new_edge = find_edge (gimple_bb (stmt), lp_bb);
4319 unsigned int i;
4320 for (gpi = gsi_start_phis (lp_bb), i = 0;
4321 !gsi_end_p (gpi);
4322 gsi_next (&gpi), i++)
4323 {
4324 gphi *phi = gpi.phi ();
4325 tree new_def;
4326 if (virtual_operand_p (gimple_phi_result (phi)))
4327 new_def = last_vdef;
4328 else
4329 new_def = last_defs[i];
4330 add_phi_arg (phi, new_def, new_edge, UNKNOWN_LOCATION);
4331 }
4332 }
4333 last_vdef = gimple_vuse (stmt);
4334 }
4335 }
4336 else
4337 gsi_insert_seq_after (&last_gsi, seq, GSI_SAME_STMT);
4338
245f6de1
JJ
4339 for (int j = 0; j < 2; ++j)
4340 if (load_seq[j])
4341 gsi_insert_seq_after (&load_gsi[j], load_seq[j], GSI_SAME_STMT);
f663d9ad
KT
4342
4343 return true;
4344}
4345
4346/* Process the merged_store_group objects created in the coalescing phase.
4347 The stores are all against the base object BASE.
4348 Try to output the widened stores and delete the original statements if
4349 successful. Return true iff any changes were made. */
4350
4351bool
b5926e23 4352imm_store_chain_info::output_merged_stores ()
f663d9ad
KT
4353{
4354 unsigned int i;
4355 merged_store_group *merged_store;
4356 bool ret = false;
4357 FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_store)
4358 {
a95b474a
ML
4359 if (dbg_cnt (store_merging)
4360 && output_merged_store (merged_store))
f663d9ad
KT
4361 {
4362 unsigned int j;
4363 store_immediate_info *store;
4364 FOR_EACH_VEC_ELT (merged_store->stores, j, store)
4365 {
4366 gimple *stmt = store->stmt;
4367 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5384a802
JJ
4368 /* Don't remove clobbers, they are still useful even if
4369 everything is overwritten afterwards. */
4370 if (gimple_clobber_p (stmt))
4371 continue;
f663d9ad 4372 gsi_remove (&gsi, true);
629387a6
EB
4373 if (store->lp_nr)
4374 remove_stmt_from_eh_lp (stmt);
f663d9ad
KT
4375 if (stmt != merged_store->last_stmt)
4376 {
4377 unlink_stmt_vdef (stmt);
4378 release_defs (stmt);
4379 }
4380 }
4381 ret = true;
4382 }
4383 }
4384 if (ret && dump_file)
4385 fprintf (dump_file, "Merging successful!\n");
4386
4387 return ret;
4388}
4389
4390/* Coalesce the store_immediate_info objects recorded against the base object
4391 BASE in the first phase and output them.
4392 Delete the allocated structures.
4393 Return true if any changes were made. */
4394
4395bool
b5926e23 4396imm_store_chain_info::terminate_and_process_chain ()
f663d9ad
KT
4397{
4398 /* Process store chain. */
4399 bool ret = false;
4400 if (m_store_info.length () > 1)
4401 {
4402 ret = coalesce_immediate_stores ();
4403 if (ret)
b5926e23 4404 ret = output_merged_stores ();
f663d9ad
KT
4405 }
4406
4407 /* Delete all the entries we allocated ourselves. */
4408 store_immediate_info *info;
4409 unsigned int i;
4410 FOR_EACH_VEC_ELT (m_store_info, i, info)
4411 delete info;
4412
4413 merged_store_group *merged_info;
4414 FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_info)
4415 delete merged_info;
4416
4417 return ret;
4418}
4419
4420/* Return true iff LHS is a destination potentially interesting for
4421 store merging. In practice these are the codes that get_inner_reference
4422 can process. */
4423
4424static bool
4425lhs_valid_for_store_merging_p (tree lhs)
4426{
629387a6 4427 if (DECL_P (lhs))
f663d9ad
KT
4428 return true;
4429
629387a6
EB
4430 switch (TREE_CODE (lhs))
4431 {
4432 case ARRAY_REF:
4433 case ARRAY_RANGE_REF:
4434 case BIT_FIELD_REF:
4435 case COMPONENT_REF:
4436 case MEM_REF:
4437 return true;
4438 default:
4439 return false;
4440 }
4441
4442 gcc_unreachable ();
f663d9ad
KT
4443}
4444
4445/* Return true if the tree RHS is a constant we want to consider
4446 during store merging. In practice accept all codes that
4447 native_encode_expr accepts. */
4448
4449static bool
4450rhs_valid_for_store_merging_p (tree rhs)
4451{
cf098191 4452 unsigned HOST_WIDE_INT size;
3afd514b 4453 if (TREE_CODE (rhs) == CONSTRUCTOR
3afd514b
JJ
4454 && CONSTRUCTOR_NELTS (rhs) == 0
4455 && TYPE_SIZE_UNIT (TREE_TYPE (rhs))
4456 && tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (rhs))))
4457 return true;
cf098191
RS
4458 return (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (rhs))).is_constant (&size)
4459 && native_encode_expr (rhs, NULL, size) != 0);
f663d9ad
KT
4460}
4461
629387a6
EB
4462/* Adjust *PBITPOS, *PBITREGION_START and *PBITREGION_END by BYTE_OFF bytes
4463 and return true on success or false on failure. */
4464
4465static bool
4466adjust_bit_pos (poly_offset_int byte_off,
4467 poly_int64 *pbitpos,
4468 poly_uint64 *pbitregion_start,
4469 poly_uint64 *pbitregion_end)
4470{
4471 poly_offset_int bit_off = byte_off << LOG2_BITS_PER_UNIT;
4472 bit_off += *pbitpos;
4473
4474 if (known_ge (bit_off, 0) && bit_off.to_shwi (pbitpos))
4475 {
4476 if (maybe_ne (*pbitregion_end, 0U))
4477 {
4478 bit_off = byte_off << LOG2_BITS_PER_UNIT;
4479 bit_off += *pbitregion_start;
4480 if (bit_off.to_uhwi (pbitregion_start))
4481 {
4482 bit_off = byte_off << LOG2_BITS_PER_UNIT;
4483 bit_off += *pbitregion_end;
4484 if (!bit_off.to_uhwi (pbitregion_end))
4485 *pbitregion_end = 0;
4486 }
4487 else
4488 *pbitregion_end = 0;
4489 }
4490 return true;
4491 }
4492 else
4493 return false;
4494}
4495
245f6de1
JJ
4496/* If MEM is a memory reference usable for store merging (either as
4497 store destination or for loads), return the non-NULL base_addr
4498 and set *PBITSIZE, *PBITPOS, *PBITREGION_START and *PBITREGION_END.
4499 Otherwise return NULL, *PBITPOS should be still valid even for that
4500 case. */
4501
4502static tree
8a91d545
RS
4503mem_valid_for_store_merging (tree mem, poly_uint64 *pbitsize,
4504 poly_uint64 *pbitpos,
4505 poly_uint64 *pbitregion_start,
4506 poly_uint64 *pbitregion_end)
245f6de1 4507{
8a91d545
RS
4508 poly_int64 bitsize, bitpos;
4509 poly_uint64 bitregion_start = 0, bitregion_end = 0;
245f6de1
JJ
4510 machine_mode mode;
4511 int unsignedp = 0, reversep = 0, volatilep = 0;
4512 tree offset;
4513 tree base_addr = get_inner_reference (mem, &bitsize, &bitpos, &offset, &mode,
4514 &unsignedp, &reversep, &volatilep);
4515 *pbitsize = bitsize;
8a91d545 4516 if (known_eq (bitsize, 0))
245f6de1
JJ
4517 return NULL_TREE;
4518
4519 if (TREE_CODE (mem) == COMPONENT_REF
4520 && DECL_BIT_FIELD_TYPE (TREE_OPERAND (mem, 1)))
4521 {
4522 get_bit_range (&bitregion_start, &bitregion_end, mem, &bitpos, &offset);
8a91d545
RS
4523 if (maybe_ne (bitregion_end, 0U))
4524 bitregion_end += 1;
245f6de1
JJ
4525 }
4526
4527 if (reversep)
4528 return NULL_TREE;
4529
4530 /* We do not want to rewrite TARGET_MEM_REFs. */
4531 if (TREE_CODE (base_addr) == TARGET_MEM_REF)
4532 return NULL_TREE;
4533 /* In some cases get_inner_reference may return a
4534 MEM_REF [ptr + byteoffset]. For the purposes of this pass
4535 canonicalize the base_addr to MEM_REF [ptr] and take
4536 byteoffset into account in the bitpos. This occurs in
4537 PR 23684 and this way we can catch more chains. */
4538 else if (TREE_CODE (base_addr) == MEM_REF)
4539 {
629387a6
EB
4540 if (!adjust_bit_pos (mem_ref_offset (base_addr), &bitpos,
4541 &bitregion_start, &bitregion_end))
245f6de1
JJ
4542 return NULL_TREE;
4543 base_addr = TREE_OPERAND (base_addr, 0);
4544 }
4545 /* get_inner_reference returns the base object, get at its
4546 address now. */
4547 else
4548 {
8a91d545 4549 if (maybe_lt (bitpos, 0))
245f6de1
JJ
4550 return NULL_TREE;
4551 base_addr = build_fold_addr_expr (base_addr);
4552 }
4553
629387a6 4554 if (offset)
245f6de1
JJ
4555 {
4556 /* If the access is variable offset then a base decl has to be
4557 address-taken to be able to emit pointer-based stores to it.
4558 ??? We might be able to get away with re-using the original
4559 base up to the first variable part and then wrapping that inside
4560 a BIT_FIELD_REF. */
4561 tree base = get_base_address (base_addr);
629387a6 4562 if (!base || (DECL_P (base) && !TREE_ADDRESSABLE (base)))
245f6de1
JJ
4563 return NULL_TREE;
4564
629387a6
EB
4565 /* Similarly to above for the base, remove constant from the offset. */
4566 if (TREE_CODE (offset) == PLUS_EXPR
4567 && TREE_CODE (TREE_OPERAND (offset, 1)) == INTEGER_CST
4568 && adjust_bit_pos (wi::to_poly_offset (TREE_OPERAND (offset, 1)),
4569 &bitpos, &bitregion_start, &bitregion_end))
4570 offset = TREE_OPERAND (offset, 0);
4571
245f6de1
JJ
4572 base_addr = build2 (POINTER_PLUS_EXPR, TREE_TYPE (base_addr),
4573 base_addr, offset);
4574 }
4575
629387a6
EB
4576 if (known_eq (bitregion_end, 0U))
4577 {
4578 bitregion_start = round_down_to_byte_boundary (bitpos);
4579 bitregion_end = round_up_to_byte_boundary (bitpos + bitsize);
4580 }
4581
245f6de1
JJ
4582 *pbitsize = bitsize;
4583 *pbitpos = bitpos;
4584 *pbitregion_start = bitregion_start;
4585 *pbitregion_end = bitregion_end;
4586 return base_addr;
4587}
4588
4589/* Return true if STMT is a load that can be used for store merging.
4590 In that case fill in *OP. BITSIZE, BITPOS, BITREGION_START and
4591 BITREGION_END are properties of the corresponding store. */
4592
4593static bool
4594handled_load (gimple *stmt, store_operand_info *op,
8a91d545
RS
4595 poly_uint64 bitsize, poly_uint64 bitpos,
4596 poly_uint64 bitregion_start, poly_uint64 bitregion_end)
245f6de1 4597{
383ac8dc 4598 if (!is_gimple_assign (stmt))
245f6de1 4599 return false;
383ac8dc
JJ
4600 if (gimple_assign_rhs_code (stmt) == BIT_NOT_EXPR)
4601 {
4602 tree rhs1 = gimple_assign_rhs1 (stmt);
4603 if (TREE_CODE (rhs1) == SSA_NAME
383ac8dc
JJ
4604 && handled_load (SSA_NAME_DEF_STMT (rhs1), op, bitsize, bitpos,
4605 bitregion_start, bitregion_end))
4606 {
d60edaba
JJ
4607 /* Don't allow _1 = load; _2 = ~1; _3 = ~_2; which should have
4608 been optimized earlier, but if allowed here, would confuse the
4609 multiple uses counting. */
4610 if (op->bit_not_p)
4611 return false;
383ac8dc
JJ
4612 op->bit_not_p = !op->bit_not_p;
4613 return true;
4614 }
4615 return false;
4616 }
4617 if (gimple_vuse (stmt)
4618 && gimple_assign_load_p (stmt)
36bbc05d 4619 && !stmt_can_throw_internal (cfun, stmt)
245f6de1
JJ
4620 && !gimple_has_volatile_ops (stmt))
4621 {
4622 tree mem = gimple_assign_rhs1 (stmt);
4623 op->base_addr
4624 = mem_valid_for_store_merging (mem, &op->bitsize, &op->bitpos,
4625 &op->bitregion_start,
4626 &op->bitregion_end);
4627 if (op->base_addr != NULL_TREE
8a91d545
RS
4628 && known_eq (op->bitsize, bitsize)
4629 && multiple_p (op->bitpos - bitpos, BITS_PER_UNIT)
4630 && known_ge (op->bitpos - op->bitregion_start,
4631 bitpos - bitregion_start)
4632 && known_ge (op->bitregion_end - op->bitpos,
4633 bitregion_end - bitpos))
245f6de1
JJ
4634 {
4635 op->stmt = stmt;
4636 op->val = mem;
383ac8dc 4637 op->bit_not_p = false;
245f6de1
JJ
4638 return true;
4639 }
4640 }
4641 return false;
4642}
4643
629387a6
EB
4644/* Return the index number of the landing pad for STMT, if any. */
4645
4646static int
4647lp_nr_for_store (gimple *stmt)
4648{
4649 if (!cfun->can_throw_non_call_exceptions || !cfun->eh)
4650 return 0;
4651
4652 if (!stmt_could_throw_p (cfun, stmt))
4653 return 0;
4654
4655 return lookup_stmt_eh_lp (stmt);
4656}
4657
245f6de1 4658/* Record the store STMT for store merging optimization if it can be
629387a6 4659 optimized. Return true if any changes were made. */
245f6de1 4660
629387a6 4661bool
245f6de1
JJ
4662pass_store_merging::process_store (gimple *stmt)
4663{
4664 tree lhs = gimple_assign_lhs (stmt);
4665 tree rhs = gimple_assign_rhs1 (stmt);
8a91d545
RS
4666 poly_uint64 bitsize, bitpos;
4667 poly_uint64 bitregion_start, bitregion_end;
245f6de1
JJ
4668 tree base_addr
4669 = mem_valid_for_store_merging (lhs, &bitsize, &bitpos,
4670 &bitregion_start, &bitregion_end);
8a91d545 4671 if (known_eq (bitsize, 0U))
629387a6 4672 return false;
245f6de1
JJ
4673
4674 bool invalid = (base_addr == NULL_TREE
8a91d545
RS
4675 || (maybe_gt (bitsize,
4676 (unsigned int) MAX_BITSIZE_MODE_ANY_INT)
3afd514b
JJ
4677 && TREE_CODE (rhs) != INTEGER_CST
4678 && (TREE_CODE (rhs) != CONSTRUCTOR
4679 || CONSTRUCTOR_NELTS (rhs) != 0)));
245f6de1 4680 enum tree_code rhs_code = ERROR_MARK;
d60edaba 4681 bool bit_not_p = false;
4b84d9b8
JJ
4682 struct symbolic_number n;
4683 gimple *ins_stmt = NULL;
245f6de1
JJ
4684 store_operand_info ops[2];
4685 if (invalid)
4686 ;
4687 else if (rhs_valid_for_store_merging_p (rhs))
4688 {
4689 rhs_code = INTEGER_CST;
4690 ops[0].val = rhs;
4691 }
d7a9512e 4692 else if (TREE_CODE (rhs) != SSA_NAME)
245f6de1
JJ
4693 invalid = true;
4694 else
4695 {
4696 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs), *def_stmt1, *def_stmt2;
4697 if (!is_gimple_assign (def_stmt))
4698 invalid = true;
4699 else if (handled_load (def_stmt, &ops[0], bitsize, bitpos,
4700 bitregion_start, bitregion_end))
4701 rhs_code = MEM_REF;
d60edaba
JJ
4702 else if (gimple_assign_rhs_code (def_stmt) == BIT_NOT_EXPR)
4703 {
4704 tree rhs1 = gimple_assign_rhs1 (def_stmt);
4705 if (TREE_CODE (rhs1) == SSA_NAME
4706 && is_gimple_assign (SSA_NAME_DEF_STMT (rhs1)))
4707 {
4708 bit_not_p = true;
4709 def_stmt = SSA_NAME_DEF_STMT (rhs1);
4710 }
4711 }
c94c3532 4712
d60edaba 4713 if (rhs_code == ERROR_MARK && !invalid)
245f6de1
JJ
4714 switch ((rhs_code = gimple_assign_rhs_code (def_stmt)))
4715 {
4716 case BIT_AND_EXPR:
4717 case BIT_IOR_EXPR:
4718 case BIT_XOR_EXPR:
4719 tree rhs1, rhs2;
4720 rhs1 = gimple_assign_rhs1 (def_stmt);
4721 rhs2 = gimple_assign_rhs2 (def_stmt);
4722 invalid = true;
d7a9512e 4723 if (TREE_CODE (rhs1) != SSA_NAME)
245f6de1
JJ
4724 break;
4725 def_stmt1 = SSA_NAME_DEF_STMT (rhs1);
4726 if (!is_gimple_assign (def_stmt1)
4727 || !handled_load (def_stmt1, &ops[0], bitsize, bitpos,
4728 bitregion_start, bitregion_end))
4729 break;
4730 if (rhs_valid_for_store_merging_p (rhs2))
4731 ops[1].val = rhs2;
d7a9512e 4732 else if (TREE_CODE (rhs2) != SSA_NAME)
245f6de1
JJ
4733 break;
4734 else
4735 {
4736 def_stmt2 = SSA_NAME_DEF_STMT (rhs2);
4737 if (!is_gimple_assign (def_stmt2))
4738 break;
4739 else if (!handled_load (def_stmt2, &ops[1], bitsize, bitpos,
4740 bitregion_start, bitregion_end))
4741 break;
4742 }
4743 invalid = false;
4744 break;
4745 default:
4746 invalid = true;
4747 break;
4748 }
c94c3532 4749
8a91d545
RS
4750 unsigned HOST_WIDE_INT const_bitsize;
4751 if (bitsize.is_constant (&const_bitsize)
c94c3532 4752 && (const_bitsize % BITS_PER_UNIT) == 0
8a91d545 4753 && const_bitsize <= 64
c94c3532 4754 && multiple_p (bitpos, BITS_PER_UNIT))
4b84d9b8
JJ
4755 {
4756 ins_stmt = find_bswap_or_nop_1 (def_stmt, &n, 12);
4757 if (ins_stmt)
4758 {
4759 uint64_t nn = n.n;
4760 for (unsigned HOST_WIDE_INT i = 0;
8a91d545
RS
4761 i < const_bitsize;
4762 i += BITS_PER_UNIT, nn >>= BITS_PER_MARKER)
4b84d9b8
JJ
4763 if ((nn & MARKER_MASK) == 0
4764 || (nn & MARKER_MASK) == MARKER_BYTE_UNKNOWN)
4765 {
4766 ins_stmt = NULL;
4767 break;
4768 }
4769 if (ins_stmt)
4770 {
4771 if (invalid)
4772 {
4773 rhs_code = LROTATE_EXPR;
4774 ops[0].base_addr = NULL_TREE;
4775 ops[1].base_addr = NULL_TREE;
4776 }
4777 invalid = false;
4778 }
4779 }
4780 }
c94c3532
EB
4781
4782 if (invalid
4783 && bitsize.is_constant (&const_bitsize)
4784 && ((const_bitsize % BITS_PER_UNIT) != 0
4785 || !multiple_p (bitpos, BITS_PER_UNIT))
4786 && const_bitsize <= 64)
4787 {
c14add82 4788 /* Bypass a conversion to the bit-field type. */
31a5d8c5
EB
4789 if (!bit_not_p
4790 && is_gimple_assign (def_stmt)
4791 && CONVERT_EXPR_CODE_P (rhs_code))
c94c3532
EB
4792 {
4793 tree rhs1 = gimple_assign_rhs1 (def_stmt);
4794 if (TREE_CODE (rhs1) == SSA_NAME
c14add82 4795 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1)))
c94c3532
EB
4796 rhs = rhs1;
4797 }
4798 rhs_code = BIT_INSERT_EXPR;
31a5d8c5 4799 bit_not_p = false;
c94c3532
EB
4800 ops[0].val = rhs;
4801 ops[0].base_addr = NULL_TREE;
4802 ops[1].base_addr = NULL_TREE;
4803 invalid = false;
4804 }
245f6de1
JJ
4805 }
4806
8a91d545
RS
4807 unsigned HOST_WIDE_INT const_bitsize, const_bitpos;
4808 unsigned HOST_WIDE_INT const_bitregion_start, const_bitregion_end;
4809 if (invalid
4810 || !bitsize.is_constant (&const_bitsize)
4811 || !bitpos.is_constant (&const_bitpos)
4812 || !bitregion_start.is_constant (&const_bitregion_start)
4813 || !bitregion_end.is_constant (&const_bitregion_end))
629387a6 4814 return terminate_all_aliasing_chains (NULL, stmt);
245f6de1 4815
4b84d9b8
JJ
4816 if (!ins_stmt)
4817 memset (&n, 0, sizeof (n));
4818
99b1c316 4819 class imm_store_chain_info **chain_info = NULL;
629387a6 4820 bool ret = false;
383ac8dc
JJ
4821 if (base_addr)
4822 chain_info = m_stores.get (base_addr);
4823
245f6de1
JJ
4824 store_immediate_info *info;
4825 if (chain_info)
4826 {
4827 unsigned int ord = (*chain_info)->m_store_info.length ();
8a91d545
RS
4828 info = new store_immediate_info (const_bitsize, const_bitpos,
4829 const_bitregion_start,
4830 const_bitregion_end,
4831 stmt, ord, rhs_code, n, ins_stmt,
629387a6
EB
4832 bit_not_p, lp_nr_for_store (stmt),
4833 ops[0], ops[1]);
245f6de1
JJ
4834 if (dump_file && (dump_flags & TDF_DETAILS))
4835 {
4836 fprintf (dump_file, "Recording immediate store from stmt:\n");
4837 print_gimple_stmt (dump_file, stmt, 0);
4838 }
4839 (*chain_info)->m_store_info.safe_push (info);
629387a6 4840 ret |= terminate_all_aliasing_chains (chain_info, stmt);
245f6de1
JJ
4841 /* If we reach the limit of stores to merge in a chain terminate and
4842 process the chain now. */
4843 if ((*chain_info)->m_store_info.length ()
028d4092 4844 == (unsigned int) param_max_stores_to_merge)
245f6de1
JJ
4845 {
4846 if (dump_file && (dump_flags & TDF_DETAILS))
4847 fprintf (dump_file,
4848 "Reached maximum number of statements to merge:\n");
629387a6 4849 ret |= terminate_and_process_chain (*chain_info);
245f6de1 4850 }
629387a6 4851 return ret;
245f6de1
JJ
4852 }
4853
4854 /* Store aliases any existing chain? */
629387a6 4855 ret |= terminate_all_aliasing_chains (NULL, stmt);
245f6de1 4856 /* Start a new chain. */
99b1c316 4857 class imm_store_chain_info *new_chain
245f6de1 4858 = new imm_store_chain_info (m_stores_head, base_addr);
8a91d545
RS
4859 info = new store_immediate_info (const_bitsize, const_bitpos,
4860 const_bitregion_start,
4861 const_bitregion_end,
4862 stmt, 0, rhs_code, n, ins_stmt,
629387a6
EB
4863 bit_not_p, lp_nr_for_store (stmt),
4864 ops[0], ops[1]);
245f6de1
JJ
4865 new_chain->m_store_info.safe_push (info);
4866 m_stores.put (base_addr, new_chain);
4867 if (dump_file && (dump_flags & TDF_DETAILS))
4868 {
4869 fprintf (dump_file, "Starting new chain with statement:\n");
4870 print_gimple_stmt (dump_file, stmt, 0);
4871 fprintf (dump_file, "The base object is:\n");
4872 print_generic_expr (dump_file, base_addr);
4873 fprintf (dump_file, "\n");
4874 }
629387a6
EB
4875 return ret;
4876}
4877
4878/* Return true if STMT is a store valid for store merging. */
4879
4880static bool
4881store_valid_for_store_merging_p (gimple *stmt)
4882{
4883 return gimple_assign_single_p (stmt)
4884 && gimple_vdef (stmt)
4885 && lhs_valid_for_store_merging_p (gimple_assign_lhs (stmt))
5384a802 4886 && (!gimple_has_volatile_ops (stmt) || gimple_clobber_p (stmt));
629387a6
EB
4887}
4888
4889enum basic_block_status { BB_INVALID, BB_VALID, BB_EXTENDED_VALID };
4890
4891/* Return the status of basic block BB wrt store merging. */
4892
4893static enum basic_block_status
4894get_status_for_store_merging (basic_block bb)
4895{
4896 unsigned int num_statements = 0;
4897 gimple_stmt_iterator gsi;
4898 edge e;
4899
4900 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4901 {
4902 gimple *stmt = gsi_stmt (gsi);
4903
4904 if (is_gimple_debug (stmt))
4905 continue;
4906
4907 if (store_valid_for_store_merging_p (stmt) && ++num_statements >= 2)
4908 break;
4909 }
4910
4911 if (num_statements == 0)
4912 return BB_INVALID;
4913
4914 if (cfun->can_throw_non_call_exceptions && cfun->eh
4915 && store_valid_for_store_merging_p (gimple_seq_last_stmt (bb_seq (bb)))
4916 && (e = find_fallthru_edge (bb->succs))
4917 && e->dest == bb->next_bb)
4918 return BB_EXTENDED_VALID;
4919
4920 return num_statements >= 2 ? BB_VALID : BB_INVALID;
245f6de1
JJ
4921}
4922
f663d9ad 4923/* Entry point for the pass. Go over each basic block recording chains of
245f6de1
JJ
4924 immediate stores. Upon encountering a terminating statement (as defined
4925 by stmt_terminates_chain_p) process the recorded stores and emit the widened
4926 variants. */
f663d9ad
KT
4927
4928unsigned int
4929pass_store_merging::execute (function *fun)
4930{
4931 basic_block bb;
4932 hash_set<gimple *> orig_stmts;
629387a6
EB
4933 bool changed = false, open_chains = false;
4934
4935 /* If the function can throw and catch non-call exceptions, we'll be trying
4936 to merge stores across different basic blocks so we need to first unsplit
4937 the EH edges in order to streamline the CFG of the function. */
4938 if (cfun->can_throw_non_call_exceptions && cfun->eh)
4939 unsplit_eh_edges ();
f663d9ad 4940
4b84d9b8
JJ
4941 calculate_dominance_info (CDI_DOMINATORS);
4942
f663d9ad
KT
4943 FOR_EACH_BB_FN (bb, fun)
4944 {
629387a6 4945 const basic_block_status bb_status = get_status_for_store_merging (bb);
f663d9ad 4946 gimple_stmt_iterator gsi;
f663d9ad 4947
629387a6
EB
4948 if (open_chains && (bb_status == BB_INVALID || !single_pred_p (bb)))
4949 {
4950 changed |= terminate_and_process_all_chains ();
4951 open_chains = false;
f663d9ad
KT
4952 }
4953
629387a6 4954 if (bb_status == BB_INVALID)
f663d9ad
KT
4955 continue;
4956
4957 if (dump_file && (dump_flags & TDF_DETAILS))
4958 fprintf (dump_file, "Processing basic block <%d>:\n", bb->index);
4959
4960 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4961 {
4962 gimple *stmt = gsi_stmt (gsi);
4963
50b6d676
AO
4964 if (is_gimple_debug (stmt))
4965 continue;
4966
5384a802 4967 if (gimple_has_volatile_ops (stmt) && !gimple_clobber_p (stmt))
f663d9ad
KT
4968 {
4969 /* Terminate all chains. */
4970 if (dump_file && (dump_flags & TDF_DETAILS))
4971 fprintf (dump_file, "Volatile access terminates "
4972 "all chains\n");
629387a6
EB
4973 changed |= terminate_and_process_all_chains ();
4974 open_chains = false;
f663d9ad
KT
4975 continue;
4976 }
4977
629387a6
EB
4978 if (store_valid_for_store_merging_p (stmt))
4979 changed |= process_store (stmt);
245f6de1 4980 else
629387a6
EB
4981 changed |= terminate_all_aliasing_chains (NULL, stmt);
4982 }
4983
4984 if (bb_status == BB_EXTENDED_VALID)
4985 open_chains = true;
4986 else
4987 {
4988 changed |= terminate_and_process_all_chains ();
4989 open_chains = false;
f663d9ad 4990 }
f663d9ad 4991 }
629387a6
EB
4992
4993 if (open_chains)
4994 changed |= terminate_and_process_all_chains ();
4995
4996 /* If the function can throw and catch non-call exceptions and something
4997 changed during the pass, then the CFG has (very likely) changed too. */
4998 if (cfun->can_throw_non_call_exceptions && cfun->eh && changed)
4999 {
5000 free_dominance_info (CDI_DOMINATORS);
5001 return TODO_cleanup_cfg;
5002 }
5003
f663d9ad
KT
5004 return 0;
5005}
5006
5007} // anon namespace
5008
5009/* Construct and return a store merging pass object. */
5010
5011gimple_opt_pass *
5012make_pass_store_merging (gcc::context *ctxt)
5013{
5014 return new pass_store_merging (ctxt);
5015}
c22d8787
KT
5016
5017#if CHECKING_P
5018
5019namespace selftest {
5020
5021/* Selftests for store merging helpers. */
5022
5023/* Assert that all elements of the byte arrays X and Y, both of length N
5024 are equal. */
5025
5026static void
5027verify_array_eq (unsigned char *x, unsigned char *y, unsigned int n)
5028{
5029 for (unsigned int i = 0; i < n; i++)
5030 {
5031 if (x[i] != y[i])
5032 {
5033 fprintf (stderr, "Arrays do not match. X:\n");
5034 dump_char_array (stderr, x, n);
5035 fprintf (stderr, "Y:\n");
5036 dump_char_array (stderr, y, n);
5037 }
5038 ASSERT_EQ (x[i], y[i]);
5039 }
5040}
5041
8aba425f 5042/* Test shift_bytes_in_array_left and that it carries bits across between
c22d8787
KT
5043 bytes correctly. */
5044
5045static void
8aba425f 5046verify_shift_bytes_in_array_left (void)
c22d8787
KT
5047{
5048 /* byte 1 | byte 0
5049 00011111 | 11100000. */
5050 unsigned char orig[2] = { 0xe0, 0x1f };
5051 unsigned char in[2];
5052 memcpy (in, orig, sizeof orig);
5053
5054 unsigned char expected[2] = { 0x80, 0x7f };
8aba425f 5055 shift_bytes_in_array_left (in, sizeof (in), 2);
c22d8787
KT
5056 verify_array_eq (in, expected, sizeof (in));
5057
5058 memcpy (in, orig, sizeof orig);
5059 memcpy (expected, orig, sizeof orig);
5060 /* Check that shifting by zero doesn't change anything. */
8aba425f 5061 shift_bytes_in_array_left (in, sizeof (in), 0);
c22d8787
KT
5062 verify_array_eq (in, expected, sizeof (in));
5063
5064}
5065
5066/* Test shift_bytes_in_array_right and that it carries bits across between
5067 bytes correctly. */
5068
5069static void
5070verify_shift_bytes_in_array_right (void)
5071{
5072 /* byte 1 | byte 0
5073 00011111 | 11100000. */
5074 unsigned char orig[2] = { 0x1f, 0xe0};
5075 unsigned char in[2];
5076 memcpy (in, orig, sizeof orig);
5077 unsigned char expected[2] = { 0x07, 0xf8};
5078 shift_bytes_in_array_right (in, sizeof (in), 2);
5079 verify_array_eq (in, expected, sizeof (in));
5080
5081 memcpy (in, orig, sizeof orig);
5082 memcpy (expected, orig, sizeof orig);
5083 /* Check that shifting by zero doesn't change anything. */
5084 shift_bytes_in_array_right (in, sizeof (in), 0);
5085 verify_array_eq (in, expected, sizeof (in));
5086}
5087
5088/* Test clear_bit_region that it clears exactly the bits asked and
5089 nothing more. */
5090
5091static void
5092verify_clear_bit_region (void)
5093{
5094 /* Start with all bits set and test clearing various patterns in them. */
5095 unsigned char orig[3] = { 0xff, 0xff, 0xff};
5096 unsigned char in[3];
5097 unsigned char expected[3];
5098 memcpy (in, orig, sizeof in);
5099
5100 /* Check zeroing out all the bits. */
5101 clear_bit_region (in, 0, 3 * BITS_PER_UNIT);
5102 expected[0] = expected[1] = expected[2] = 0;
5103 verify_array_eq (in, expected, sizeof in);
5104
5105 memcpy (in, orig, sizeof in);
5106 /* Leave the first and last bits intact. */
5107 clear_bit_region (in, 1, 3 * BITS_PER_UNIT - 2);
5108 expected[0] = 0x1;
5109 expected[1] = 0;
5110 expected[2] = 0x80;
5111 verify_array_eq (in, expected, sizeof in);
5112}
5113
5384a802 5114/* Test clear_bit_region_be that it clears exactly the bits asked and
c22d8787
KT
5115 nothing more. */
5116
5117static void
5118verify_clear_bit_region_be (void)
5119{
5120 /* Start with all bits set and test clearing various patterns in them. */
5121 unsigned char orig[3] = { 0xff, 0xff, 0xff};
5122 unsigned char in[3];
5123 unsigned char expected[3];
5124 memcpy (in, orig, sizeof in);
5125
5126 /* Check zeroing out all the bits. */
5127 clear_bit_region_be (in, BITS_PER_UNIT - 1, 3 * BITS_PER_UNIT);
5128 expected[0] = expected[1] = expected[2] = 0;
5129 verify_array_eq (in, expected, sizeof in);
5130
5131 memcpy (in, orig, sizeof in);
5132 /* Leave the first and last bits intact. */
5133 clear_bit_region_be (in, BITS_PER_UNIT - 2, 3 * BITS_PER_UNIT - 2);
5134 expected[0] = 0x80;
5135 expected[1] = 0;
5136 expected[2] = 0x1;
5137 verify_array_eq (in, expected, sizeof in);
5138}
5139
5140
5141/* Run all of the selftests within this file. */
5142
5143void
5144store_merging_c_tests (void)
5145{
8aba425f 5146 verify_shift_bytes_in_array_left ();
c22d8787
KT
5147 verify_shift_bytes_in_array_right ();
5148 verify_clear_bit_region ();
5149 verify_clear_bit_region_be ();
5150}
5151
5152} // namespace selftest
5153#endif /* CHECKING_P. */