]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/gimple-ssa-store-merging.c
Correct a function pre/postcondition [PR102403].
[thirdparty/gcc.git] / gcc / gimple-ssa-store-merging.c
CommitLineData
dffec8eb 1/* GIMPLE store merging and byte swapping passes.
99dee823 2 Copyright (C) 2009-2021 Free Software Foundation, Inc.
f663d9ad
KT
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
c94c3532
EB
21/* The purpose of the store merging pass is to combine multiple memory stores
22 of constant values, values loaded from memory, bitwise operations on those,
23 or bit-field values, to consecutive locations, into fewer wider stores.
24
f663d9ad
KT
25 For example, if we have a sequence peforming four byte stores to
26 consecutive memory locations:
27 [p ] := imm1;
28 [p + 1B] := imm2;
29 [p + 2B] := imm3;
30 [p + 3B] := imm4;
31 we can transform this into a single 4-byte store if the target supports it:
c94c3532 32 [p] := imm1:imm2:imm3:imm4 concatenated according to endianness.
f663d9ad 33
245f6de1
JJ
34 Or:
35 [p ] := [q ];
36 [p + 1B] := [q + 1B];
37 [p + 2B] := [q + 2B];
38 [p + 3B] := [q + 3B];
39 if there is no overlap can be transformed into a single 4-byte
40 load followed by single 4-byte store.
41
42 Or:
43 [p ] := [q ] ^ imm1;
44 [p + 1B] := [q + 1B] ^ imm2;
45 [p + 2B] := [q + 2B] ^ imm3;
46 [p + 3B] := [q + 3B] ^ imm4;
47 if there is no overlap can be transformed into a single 4-byte
48 load, xored with imm1:imm2:imm3:imm4 and stored using a single 4-byte store.
49
c94c3532
EB
50 Or:
51 [p:1 ] := imm;
52 [p:31] := val & 0x7FFFFFFF;
53 we can transform this into a single 4-byte store if the target supports it:
54 [p] := imm:(val & 0x7FFFFFFF) concatenated according to endianness.
55
f663d9ad
KT
56 The algorithm is applied to each basic block in three phases:
57
c94c3532
EB
58 1) Scan through the basic block and record assignments to destinations
59 that can be expressed as a store to memory of a certain size at a certain
60 bit offset from base expressions we can handle. For bit-fields we also
61 record the surrounding bit region, i.e. bits that could be stored in
245f6de1
JJ
62 a read-modify-write operation when storing the bit-field. Record store
63 chains to different bases in a hash_map (m_stores) and make sure to
700d4cb0 64 terminate such chains when appropriate (for example when the stored
245f6de1 65 values get used subsequently).
f663d9ad
KT
66 These stores can be a result of structure element initializers, array stores
67 etc. A store_immediate_info object is recorded for every such store.
68 Record as many such assignments to a single base as possible until a
69 statement that interferes with the store sequence is encountered.
c94c3532
EB
70 Each store has up to 2 operands, which can be a either constant, a memory
71 load or an SSA name, from which the value to be stored can be computed.
245f6de1
JJ
72 At most one of the operands can be a constant. The operands are recorded
73 in store_operand_info struct.
f663d9ad 74
c94c3532 75 2) Analyze the chains of stores recorded in phase 1) (i.e. the vector of
f663d9ad 76 store_immediate_info objects) and coalesce contiguous stores into
c94c3532 77 merged_store_group objects. For bit-field stores, we don't need to
245f6de1
JJ
78 require the stores to be contiguous, just their surrounding bit regions
79 have to be contiguous. If the expression being stored is different
80 between adjacent stores, such as one store storing a constant and
81 following storing a value loaded from memory, or if the loaded memory
82 objects are not adjacent, a new merged_store_group is created as well.
f663d9ad
KT
83
84 For example, given the stores:
85 [p ] := 0;
86 [p + 1B] := 1;
87 [p + 3B] := 0;
88 [p + 4B] := 1;
89 [p + 5B] := 0;
90 [p + 6B] := 0;
91 This phase would produce two merged_store_group objects, one recording the
92 two bytes stored in the memory region [p : p + 1] and another
93 recording the four bytes stored in the memory region [p + 3 : p + 6].
94
95 3) The merged_store_group objects produced in phase 2) are processed
96 to generate the sequence of wider stores that set the contiguous memory
97 regions to the sequence of bytes that correspond to it. This may emit
98 multiple stores per store group to handle contiguous stores that are not
99 of a size that is a power of 2. For example it can try to emit a 40-bit
100 store as a 32-bit store followed by an 8-bit store.
c94c3532
EB
101 We try to emit as wide stores as we can while respecting STRICT_ALIGNMENT
102 or TARGET_SLOW_UNALIGNED_ACCESS settings.
f663d9ad
KT
103
104 Note on endianness and example:
105 Consider 2 contiguous 16-bit stores followed by 2 contiguous 8-bit stores:
106 [p ] := 0x1234;
107 [p + 2B] := 0x5678;
108 [p + 4B] := 0xab;
109 [p + 5B] := 0xcd;
110
111 The memory layout for little-endian (LE) and big-endian (BE) must be:
112 p |LE|BE|
113 ---------
114 0 |34|12|
115 1 |12|34|
116 2 |78|56|
117 3 |56|78|
118 4 |ab|ab|
119 5 |cd|cd|
120
121 To merge these into a single 48-bit merged value 'val' in phase 2)
122 on little-endian we insert stores to higher (consecutive) bitpositions
123 into the most significant bits of the merged value.
124 The final merged value would be: 0xcdab56781234
125
126 For big-endian we insert stores to higher bitpositions into the least
127 significant bits of the merged value.
128 The final merged value would be: 0x12345678abcd
129
130 Then, in phase 3), we want to emit this 48-bit value as a 32-bit store
131 followed by a 16-bit store. Again, we must consider endianness when
132 breaking down the 48-bit value 'val' computed above.
133 For little endian we emit:
134 [p] (32-bit) := 0x56781234; // val & 0x0000ffffffff;
135 [p + 4B] (16-bit) := 0xcdab; // (val & 0xffff00000000) >> 32;
136
137 Whereas for big-endian we emit:
138 [p] (32-bit) := 0x12345678; // (val & 0xffffffff0000) >> 16;
139 [p + 4B] (16-bit) := 0xabcd; // val & 0x00000000ffff; */
140
141#include "config.h"
142#include "system.h"
143#include "coretypes.h"
144#include "backend.h"
145#include "tree.h"
146#include "gimple.h"
147#include "builtins.h"
148#include "fold-const.h"
149#include "tree-pass.h"
150#include "ssa.h"
151#include "gimple-pretty-print.h"
152#include "alias.h"
153#include "fold-const.h"
f663d9ad
KT
154#include "print-tree.h"
155#include "tree-hash-traits.h"
156#include "gimple-iterator.h"
157#include "gimplify.h"
c94c3532 158#include "gimple-fold.h"
f663d9ad
KT
159#include "stor-layout.h"
160#include "timevar.h"
629387a6
EB
161#include "cfganal.h"
162#include "cfgcleanup.h"
f663d9ad 163#include "tree-cfg.h"
629387a6 164#include "except.h"
f663d9ad
KT
165#include "tree-eh.h"
166#include "target.h"
aa55dc0c 167#include "gimplify-me.h"
a62b3dc5
JJ
168#include "rtl.h"
169#include "expr.h" /* For get_bit_range. */
dffec8eb 170#include "optabs-tree.h"
a95b474a 171#include "dbgcnt.h"
c22d8787 172#include "selftest.h"
f663d9ad
KT
173
174/* The maximum size (in bits) of the stores this pass should generate. */
175#define MAX_STORE_BITSIZE (BITS_PER_WORD)
176#define MAX_STORE_BYTES (MAX_STORE_BITSIZE / BITS_PER_UNIT)
177
245f6de1
JJ
178/* Limit to bound the number of aliasing checks for loads with the same
179 vuse as the corresponding store. */
180#define MAX_STORE_ALIAS_CHECKS 64
181
f663d9ad
KT
182namespace {
183
bebadeca 184struct bswap_stat
dffec8eb
JJ
185{
186 /* Number of hand-written 16-bit nop / bswaps found. */
187 int found_16bit;
188
189 /* Number of hand-written 32-bit nop / bswaps found. */
190 int found_32bit;
191
192 /* Number of hand-written 64-bit nop / bswaps found. */
193 int found_64bit;
194} nop_stats, bswap_stats;
195
196/* A symbolic number structure is used to detect byte permutation and selection
197 patterns of a source. To achieve that, its field N contains an artificial
198 number consisting of BITS_PER_MARKER sized markers tracking where does each
199 byte come from in the source:
200
201 0 - target byte has the value 0
202 FF - target byte has an unknown value (eg. due to sign extension)
203 1..size - marker value is the byte index in the source (0 for lsb).
204
205 To detect permutations on memory sources (arrays and structures), a symbolic
206 number is also associated:
207 - a base address BASE_ADDR and an OFFSET giving the address of the source;
208 - a range which gives the difference between the highest and lowest accessed
209 memory location to make such a symbolic number;
210 - the address SRC of the source element of lowest address as a convenience
211 to easily get BASE_ADDR + offset + lowest bytepos;
212 - number of expressions N_OPS bitwise ored together to represent
213 approximate cost of the computation.
214
215 Note 1: the range is different from size as size reflects the size of the
216 type of the current expression. For instance, for an array char a[],
217 (short) a[0] | (short) a[3] would have a size of 2 but a range of 4 while
218 (short) a[0] | ((short) a[0] << 1) would still have a size of 2 but this
219 time a range of 1.
220
221 Note 2: for non-memory sources, range holds the same value as size.
222
223 Note 3: SRC points to the SSA_NAME in case of non-memory source. */
224
225struct symbolic_number {
226 uint64_t n;
227 tree type;
228 tree base_addr;
229 tree offset;
4a022c70 230 poly_int64_pod bytepos;
dffec8eb
JJ
231 tree src;
232 tree alias_set;
233 tree vuse;
234 unsigned HOST_WIDE_INT range;
235 int n_ops;
236};
237
238#define BITS_PER_MARKER 8
239#define MARKER_MASK ((1 << BITS_PER_MARKER) - 1)
240#define MARKER_BYTE_UNKNOWN MARKER_MASK
241#define HEAD_MARKER(n, size) \
242 ((n) & ((uint64_t) MARKER_MASK << (((size) - 1) * BITS_PER_MARKER)))
243
244/* The number which the find_bswap_or_nop_1 result should match in
245 order to have a nop. The number is masked according to the size of
246 the symbolic number before using it. */
247#define CMPNOP (sizeof (int64_t) < 8 ? 0 : \
248 (uint64_t)0x08070605 << 32 | 0x04030201)
249
250/* The number which the find_bswap_or_nop_1 result should match in
251 order to have a byte swap. The number is masked according to the
252 size of the symbolic number before using it. */
253#define CMPXCHG (sizeof (int64_t) < 8 ? 0 : \
254 (uint64_t)0x01020304 << 32 | 0x05060708)
255
256/* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
257 number N. Return false if the requested operation is not permitted
258 on a symbolic number. */
259
260inline bool
261do_shift_rotate (enum tree_code code,
262 struct symbolic_number *n,
263 int count)
264{
265 int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
266 unsigned head_marker;
267
444cda74
JJ
268 if (count < 0
269 || count >= TYPE_PRECISION (n->type)
270 || count % BITS_PER_UNIT != 0)
dffec8eb
JJ
271 return false;
272 count = (count / BITS_PER_UNIT) * BITS_PER_MARKER;
273
274 /* Zero out the extra bits of N in order to avoid them being shifted
275 into the significant bits. */
276 if (size < 64 / BITS_PER_MARKER)
277 n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
278
279 switch (code)
280 {
281 case LSHIFT_EXPR:
282 n->n <<= count;
283 break;
284 case RSHIFT_EXPR:
285 head_marker = HEAD_MARKER (n->n, size);
286 n->n >>= count;
287 /* Arithmetic shift of signed type: result is dependent on the value. */
288 if (!TYPE_UNSIGNED (n->type) && head_marker)
289 for (i = 0; i < count / BITS_PER_MARKER; i++)
290 n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
291 << ((size - 1 - i) * BITS_PER_MARKER);
292 break;
293 case LROTATE_EXPR:
294 n->n = (n->n << count) | (n->n >> ((size * BITS_PER_MARKER) - count));
295 break;
296 case RROTATE_EXPR:
297 n->n = (n->n >> count) | (n->n << ((size * BITS_PER_MARKER) - count));
298 break;
299 default:
300 return false;
301 }
302 /* Zero unused bits for size. */
303 if (size < 64 / BITS_PER_MARKER)
304 n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
305 return true;
306}
307
308/* Perform sanity checking for the symbolic number N and the gimple
309 statement STMT. */
310
311inline bool
312verify_symbolic_number_p (struct symbolic_number *n, gimple *stmt)
313{
314 tree lhs_type;
315
650c70a9 316 lhs_type = TREE_TYPE (gimple_get_lhs (stmt));
dffec8eb 317
5ea39b24
JJ
318 if (TREE_CODE (lhs_type) != INTEGER_TYPE
319 && TREE_CODE (lhs_type) != ENUMERAL_TYPE)
dffec8eb
JJ
320 return false;
321
322 if (TYPE_PRECISION (lhs_type) != TYPE_PRECISION (n->type))
323 return false;
324
325 return true;
326}
327
328/* Initialize the symbolic number N for the bswap pass from the base element
329 SRC manipulated by the bitwise OR expression. */
330
331bool
332init_symbolic_number (struct symbolic_number *n, tree src)
333{
334 int size;
335
5b9a65ec 336 if (!INTEGRAL_TYPE_P (TREE_TYPE (src)) && !POINTER_TYPE_P (TREE_TYPE (src)))
dffec8eb
JJ
337 return false;
338
339 n->base_addr = n->offset = n->alias_set = n->vuse = NULL_TREE;
340 n->src = src;
341
342 /* Set up the symbolic number N by setting each byte to a value between 1 and
343 the byte size of rhs1. The highest order byte is set to n->size and the
344 lowest order byte to 1. */
345 n->type = TREE_TYPE (src);
346 size = TYPE_PRECISION (n->type);
347 if (size % BITS_PER_UNIT != 0)
348 return false;
349 size /= BITS_PER_UNIT;
350 if (size > 64 / BITS_PER_MARKER)
351 return false;
352 n->range = size;
353 n->n = CMPNOP;
354 n->n_ops = 1;
355
356 if (size < 64 / BITS_PER_MARKER)
357 n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
358
359 return true;
360}
361
362/* Check if STMT might be a byte swap or a nop from a memory source and returns
363 the answer. If so, REF is that memory source and the base of the memory area
364 accessed and the offset of the access from that base are recorded in N. */
365
366bool
367find_bswap_or_nop_load (gimple *stmt, tree ref, struct symbolic_number *n)
368{
369 /* Leaf node is an array or component ref. Memorize its base and
370 offset from base to compare to other such leaf node. */
f37fac2b 371 poly_int64 bitsize, bitpos, bytepos;
dffec8eb
JJ
372 machine_mode mode;
373 int unsignedp, reversep, volatilep;
374 tree offset, base_addr;
375
376 /* Not prepared to handle PDP endian. */
377 if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
378 return false;
379
380 if (!gimple_assign_load_p (stmt) || gimple_has_volatile_ops (stmt))
381 return false;
382
383 base_addr = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
384 &unsignedp, &reversep, &volatilep);
385
4b84d9b8
JJ
386 if (TREE_CODE (base_addr) == TARGET_MEM_REF)
387 /* Do not rewrite TARGET_MEM_REF. */
388 return false;
389 else if (TREE_CODE (base_addr) == MEM_REF)
dffec8eb 390 {
3fed2ce9 391 poly_offset_int bit_offset = 0;
dffec8eb
JJ
392 tree off = TREE_OPERAND (base_addr, 1);
393
394 if (!integer_zerop (off))
395 {
3fed2ce9
RS
396 poly_offset_int boff = mem_ref_offset (base_addr);
397 boff <<= LOG2_BITS_PER_UNIT;
dffec8eb
JJ
398 bit_offset += boff;
399 }
400
401 base_addr = TREE_OPERAND (base_addr, 0);
402
403 /* Avoid returning a negative bitpos as this may wreak havoc later. */
3fed2ce9 404 if (maybe_lt (bit_offset, 0))
dffec8eb 405 {
3fed2ce9
RS
406 tree byte_offset = wide_int_to_tree
407 (sizetype, bits_to_bytes_round_down (bit_offset));
408 bit_offset = num_trailing_bits (bit_offset);
dffec8eb 409 if (offset)
3fed2ce9 410 offset = size_binop (PLUS_EXPR, offset, byte_offset);
dffec8eb 411 else
3fed2ce9 412 offset = byte_offset;
dffec8eb
JJ
413 }
414
3fed2ce9 415 bitpos += bit_offset.force_shwi ();
dffec8eb 416 }
4b84d9b8
JJ
417 else
418 base_addr = build_fold_addr_expr (base_addr);
dffec8eb 419
f37fac2b 420 if (!multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
dffec8eb 421 return false;
f37fac2b 422 if (!multiple_p (bitsize, BITS_PER_UNIT))
dffec8eb
JJ
423 return false;
424 if (reversep)
425 return false;
426
427 if (!init_symbolic_number (n, ref))
428 return false;
429 n->base_addr = base_addr;
430 n->offset = offset;
f37fac2b 431 n->bytepos = bytepos;
dffec8eb
JJ
432 n->alias_set = reference_alias_ptr_type (ref);
433 n->vuse = gimple_vuse (stmt);
434 return true;
435}
436
437/* Compute the symbolic number N representing the result of a bitwise OR on 2
438 symbolic number N1 and N2 whose source statements are respectively
439 SOURCE_STMT1 and SOURCE_STMT2. */
440
441gimple *
442perform_symbolic_merge (gimple *source_stmt1, struct symbolic_number *n1,
443 gimple *source_stmt2, struct symbolic_number *n2,
444 struct symbolic_number *n)
445{
446 int i, size;
447 uint64_t mask;
448 gimple *source_stmt;
449 struct symbolic_number *n_start;
450
451 tree rhs1 = gimple_assign_rhs1 (source_stmt1);
452 if (TREE_CODE (rhs1) == BIT_FIELD_REF
453 && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
454 rhs1 = TREE_OPERAND (rhs1, 0);
455 tree rhs2 = gimple_assign_rhs1 (source_stmt2);
456 if (TREE_CODE (rhs2) == BIT_FIELD_REF
457 && TREE_CODE (TREE_OPERAND (rhs2, 0)) == SSA_NAME)
458 rhs2 = TREE_OPERAND (rhs2, 0);
459
460 /* Sources are different, cancel bswap if they are not memory location with
461 the same base (array, structure, ...). */
462 if (rhs1 != rhs2)
463 {
464 uint64_t inc;
4a022c70 465 HOST_WIDE_INT start1, start2, start_sub, end_sub, end1, end2, end;
dffec8eb
JJ
466 struct symbolic_number *toinc_n_ptr, *n_end;
467 basic_block bb1, bb2;
468
469 if (!n1->base_addr || !n2->base_addr
470 || !operand_equal_p (n1->base_addr, n2->base_addr, 0))
471 return NULL;
472
473 if (!n1->offset != !n2->offset
474 || (n1->offset && !operand_equal_p (n1->offset, n2->offset, 0)))
475 return NULL;
476
4a022c70
RS
477 start1 = 0;
478 if (!(n2->bytepos - n1->bytepos).is_constant (&start2))
479 return NULL;
480
481 if (start1 < start2)
dffec8eb
JJ
482 {
483 n_start = n1;
4a022c70 484 start_sub = start2 - start1;
dffec8eb
JJ
485 }
486 else
487 {
488 n_start = n2;
4a022c70 489 start_sub = start1 - start2;
dffec8eb
JJ
490 }
491
492 bb1 = gimple_bb (source_stmt1);
493 bb2 = gimple_bb (source_stmt2);
494 if (dominated_by_p (CDI_DOMINATORS, bb1, bb2))
495 source_stmt = source_stmt1;
496 else
497 source_stmt = source_stmt2;
498
499 /* Find the highest address at which a load is performed and
500 compute related info. */
4a022c70
RS
501 end1 = start1 + (n1->range - 1);
502 end2 = start2 + (n2->range - 1);
dffec8eb
JJ
503 if (end1 < end2)
504 {
505 end = end2;
506 end_sub = end2 - end1;
507 }
508 else
509 {
510 end = end1;
511 end_sub = end1 - end2;
512 }
513 n_end = (end2 > end1) ? n2 : n1;
514
515 /* Find symbolic number whose lsb is the most significant. */
516 if (BYTES_BIG_ENDIAN)
517 toinc_n_ptr = (n_end == n1) ? n2 : n1;
518 else
519 toinc_n_ptr = (n_start == n1) ? n2 : n1;
520
4a022c70 521 n->range = end - MIN (start1, start2) + 1;
dffec8eb
JJ
522
523 /* Check that the range of memory covered can be represented by
524 a symbolic number. */
525 if (n->range > 64 / BITS_PER_MARKER)
526 return NULL;
527
528 /* Reinterpret byte marks in symbolic number holding the value of
529 bigger weight according to target endianness. */
530 inc = BYTES_BIG_ENDIAN ? end_sub : start_sub;
531 size = TYPE_PRECISION (n1->type) / BITS_PER_UNIT;
532 for (i = 0; i < size; i++, inc <<= BITS_PER_MARKER)
533 {
534 unsigned marker
535 = (toinc_n_ptr->n >> (i * BITS_PER_MARKER)) & MARKER_MASK;
536 if (marker && marker != MARKER_BYTE_UNKNOWN)
537 toinc_n_ptr->n += inc;
538 }
539 }
540 else
541 {
542 n->range = n1->range;
543 n_start = n1;
544 source_stmt = source_stmt1;
545 }
546
547 if (!n1->alias_set
548 || alias_ptr_types_compatible_p (n1->alias_set, n2->alias_set))
549 n->alias_set = n1->alias_set;
550 else
551 n->alias_set = ptr_type_node;
552 n->vuse = n_start->vuse;
553 n->base_addr = n_start->base_addr;
554 n->offset = n_start->offset;
555 n->src = n_start->src;
556 n->bytepos = n_start->bytepos;
557 n->type = n_start->type;
558 size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
559
560 for (i = 0, mask = MARKER_MASK; i < size; i++, mask <<= BITS_PER_MARKER)
561 {
562 uint64_t masked1, masked2;
563
564 masked1 = n1->n & mask;
565 masked2 = n2->n & mask;
566 if (masked1 && masked2 && masked1 != masked2)
567 return NULL;
568 }
569 n->n = n1->n | n2->n;
570 n->n_ops = n1->n_ops + n2->n_ops;
571
572 return source_stmt;
573}
574
575/* find_bswap_or_nop_1 invokes itself recursively with N and tries to perform
576 the operation given by the rhs of STMT on the result. If the operation
577 could successfully be executed the function returns a gimple stmt whose
578 rhs's first tree is the expression of the source operand and NULL
579 otherwise. */
580
581gimple *
582find_bswap_or_nop_1 (gimple *stmt, struct symbolic_number *n, int limit)
583{
584 enum tree_code code;
585 tree rhs1, rhs2 = NULL;
586 gimple *rhs1_stmt, *rhs2_stmt, *source_stmt1;
587 enum gimple_rhs_class rhs_class;
588
589 if (!limit || !is_gimple_assign (stmt))
590 return NULL;
591
592 rhs1 = gimple_assign_rhs1 (stmt);
593
594 if (find_bswap_or_nop_load (stmt, rhs1, n))
595 return stmt;
596
597 /* Handle BIT_FIELD_REF. */
598 if (TREE_CODE (rhs1) == BIT_FIELD_REF
599 && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
600 {
35cf3c55
KZ
601 if (!tree_fits_uhwi_p (TREE_OPERAND (rhs1, 1))
602 || !tree_fits_uhwi_p (TREE_OPERAND (rhs1, 2)))
603 return NULL;
604
dffec8eb
JJ
605 unsigned HOST_WIDE_INT bitsize = tree_to_uhwi (TREE_OPERAND (rhs1, 1));
606 unsigned HOST_WIDE_INT bitpos = tree_to_uhwi (TREE_OPERAND (rhs1, 2));
607 if (bitpos % BITS_PER_UNIT == 0
608 && bitsize % BITS_PER_UNIT == 0
609 && init_symbolic_number (n, TREE_OPERAND (rhs1, 0)))
610 {
611 /* Handle big-endian bit numbering in BIT_FIELD_REF. */
612 if (BYTES_BIG_ENDIAN)
613 bitpos = TYPE_PRECISION (n->type) - bitpos - bitsize;
614
615 /* Shift. */
616 if (!do_shift_rotate (RSHIFT_EXPR, n, bitpos))
617 return NULL;
618
619 /* Mask. */
620 uint64_t mask = 0;
621 uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
622 for (unsigned i = 0; i < bitsize / BITS_PER_UNIT;
623 i++, tmp <<= BITS_PER_UNIT)
624 mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
625 n->n &= mask;
626
627 /* Convert. */
628 n->type = TREE_TYPE (rhs1);
629 if (!n->base_addr)
630 n->range = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
631
632 return verify_symbolic_number_p (n, stmt) ? stmt : NULL;
633 }
634
635 return NULL;
636 }
637
638 if (TREE_CODE (rhs1) != SSA_NAME)
639 return NULL;
640
641 code = gimple_assign_rhs_code (stmt);
642 rhs_class = gimple_assign_rhs_class (stmt);
643 rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
644
645 if (rhs_class == GIMPLE_BINARY_RHS)
646 rhs2 = gimple_assign_rhs2 (stmt);
647
648 /* Handle unary rhs and binary rhs with integer constants as second
649 operand. */
650
651 if (rhs_class == GIMPLE_UNARY_RHS
652 || (rhs_class == GIMPLE_BINARY_RHS
653 && TREE_CODE (rhs2) == INTEGER_CST))
654 {
655 if (code != BIT_AND_EXPR
656 && code != LSHIFT_EXPR
657 && code != RSHIFT_EXPR
658 && code != LROTATE_EXPR
659 && code != RROTATE_EXPR
660 && !CONVERT_EXPR_CODE_P (code))
661 return NULL;
662
663 source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, n, limit - 1);
664
665 /* If find_bswap_or_nop_1 returned NULL, STMT is a leaf node and
666 we have to initialize the symbolic number. */
667 if (!source_stmt1)
668 {
669 if (gimple_assign_load_p (stmt)
670 || !init_symbolic_number (n, rhs1))
671 return NULL;
672 source_stmt1 = stmt;
673 }
674
675 switch (code)
676 {
677 case BIT_AND_EXPR:
678 {
679 int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
680 uint64_t val = int_cst_value (rhs2), mask = 0;
681 uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
682
683 /* Only constants masking full bytes are allowed. */
684 for (i = 0; i < size; i++, tmp <<= BITS_PER_UNIT)
685 if ((val & tmp) != 0 && (val & tmp) != tmp)
686 return NULL;
687 else if (val & tmp)
688 mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
689
690 n->n &= mask;
691 }
692 break;
693 case LSHIFT_EXPR:
694 case RSHIFT_EXPR:
695 case LROTATE_EXPR:
696 case RROTATE_EXPR:
697 if (!do_shift_rotate (code, n, (int) TREE_INT_CST_LOW (rhs2)))
698 return NULL;
699 break;
700 CASE_CONVERT:
701 {
702 int i, type_size, old_type_size;
703 tree type;
704
650c70a9 705 type = TREE_TYPE (gimple_assign_lhs (stmt));
dffec8eb
JJ
706 type_size = TYPE_PRECISION (type);
707 if (type_size % BITS_PER_UNIT != 0)
708 return NULL;
709 type_size /= BITS_PER_UNIT;
710 if (type_size > 64 / BITS_PER_MARKER)
711 return NULL;
712
713 /* Sign extension: result is dependent on the value. */
714 old_type_size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
715 if (!TYPE_UNSIGNED (n->type) && type_size > old_type_size
716 && HEAD_MARKER (n->n, old_type_size))
717 for (i = 0; i < type_size - old_type_size; i++)
718 n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
719 << ((type_size - 1 - i) * BITS_PER_MARKER);
720
721 if (type_size < 64 / BITS_PER_MARKER)
722 {
723 /* If STMT casts to a smaller type mask out the bits not
724 belonging to the target type. */
725 n->n &= ((uint64_t) 1 << (type_size * BITS_PER_MARKER)) - 1;
726 }
727 n->type = type;
728 if (!n->base_addr)
729 n->range = type_size;
730 }
731 break;
732 default:
733 return NULL;
734 };
735 return verify_symbolic_number_p (n, stmt) ? source_stmt1 : NULL;
736 }
737
738 /* Handle binary rhs. */
739
740 if (rhs_class == GIMPLE_BINARY_RHS)
741 {
742 struct symbolic_number n1, n2;
743 gimple *source_stmt, *source_stmt2;
744
745 if (code != BIT_IOR_EXPR)
746 return NULL;
747
748 if (TREE_CODE (rhs2) != SSA_NAME)
749 return NULL;
750
751 rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
752
753 switch (code)
754 {
755 case BIT_IOR_EXPR:
756 source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, &n1, limit - 1);
757
758 if (!source_stmt1)
759 return NULL;
760
761 source_stmt2 = find_bswap_or_nop_1 (rhs2_stmt, &n2, limit - 1);
762
763 if (!source_stmt2)
764 return NULL;
765
766 if (TYPE_PRECISION (n1.type) != TYPE_PRECISION (n2.type))
767 return NULL;
768
4b84d9b8 769 if (n1.vuse != n2.vuse)
dffec8eb
JJ
770 return NULL;
771
772 source_stmt
773 = perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, n);
774
775 if (!source_stmt)
776 return NULL;
777
778 if (!verify_symbolic_number_p (n, stmt))
779 return NULL;
780
781 break;
782 default:
783 return NULL;
784 }
785 return source_stmt;
786 }
787 return NULL;
788}
789
4b84d9b8
JJ
790/* Helper for find_bswap_or_nop and try_coalesce_bswap to compute
791 *CMPXCHG, *CMPNOP and adjust *N. */
dffec8eb 792
4b84d9b8
JJ
793void
794find_bswap_or_nop_finalize (struct symbolic_number *n, uint64_t *cmpxchg,
b320edc0 795 uint64_t *cmpnop, bool *cast64_to_32)
dffec8eb
JJ
796{
797 unsigned rsize;
798 uint64_t tmpn, mask;
dffec8eb 799
4b84d9b8
JJ
800 /* The number which the find_bswap_or_nop_1 result should match in order
801 to have a full byte swap. The number is shifted to the right
802 according to the size of the symbolic number before using it. */
803 *cmpxchg = CMPXCHG;
804 *cmpnop = CMPNOP;
b320edc0 805 *cast64_to_32 = false;
dffec8eb
JJ
806
807 /* Find real size of result (highest non-zero byte). */
808 if (n->base_addr)
809 for (tmpn = n->n, rsize = 0; tmpn; tmpn >>= BITS_PER_MARKER, rsize++);
810 else
811 rsize = n->range;
812
813 /* Zero out the bits corresponding to untouched bytes in original gimple
814 expression. */
815 if (n->range < (int) sizeof (int64_t))
816 {
817 mask = ((uint64_t) 1 << (n->range * BITS_PER_MARKER)) - 1;
b320edc0
JJ
818 if (n->base_addr == NULL
819 && n->range == 4
820 && int_size_in_bytes (TREE_TYPE (n->src)) == 8)
821 {
822 /* If all bytes in n->n are either 0 or in [5..8] range, this
823 might be a candidate for (unsigned) __builtin_bswap64 (src).
824 It is not worth it for (unsigned short) __builtin_bswap64 (src)
825 or (unsigned short) __builtin_bswap32 (src). */
826 *cast64_to_32 = true;
827 for (tmpn = n->n; tmpn; tmpn >>= BITS_PER_MARKER)
828 if ((tmpn & MARKER_MASK)
829 && ((tmpn & MARKER_MASK) <= 4 || (tmpn & MARKER_MASK) > 8))
830 {
831 *cast64_to_32 = false;
832 break;
833 }
834 }
835 if (*cast64_to_32)
836 *cmpxchg &= mask;
837 else
838 *cmpxchg >>= (64 / BITS_PER_MARKER - n->range) * BITS_PER_MARKER;
4b84d9b8 839 *cmpnop &= mask;
dffec8eb
JJ
840 }
841
842 /* Zero out the bits corresponding to unused bytes in the result of the
843 gimple expression. */
844 if (rsize < n->range)
845 {
846 if (BYTES_BIG_ENDIAN)
847 {
848 mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
4b84d9b8
JJ
849 *cmpxchg &= mask;
850 *cmpnop >>= (n->range - rsize) * BITS_PER_MARKER;
dffec8eb
JJ
851 }
852 else
853 {
854 mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
4b84d9b8
JJ
855 *cmpxchg >>= (n->range - rsize) * BITS_PER_MARKER;
856 *cmpnop &= mask;
dffec8eb
JJ
857 }
858 n->range = rsize;
859 }
860
b320edc0
JJ
861 if (*cast64_to_32)
862 n->range = 8;
4b84d9b8
JJ
863 n->range *= BITS_PER_UNIT;
864}
865
866/* Check if STMT completes a bswap implementation or a read in a given
867 endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP
868 accordingly. It also sets N to represent the kind of operations
869 performed: size of the resulting expression and whether it works on
870 a memory source, and if so alias-set and vuse. At last, the
871 function returns a stmt whose rhs's first tree is the source
872 expression. */
873
874gimple *
b320edc0
JJ
875find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap,
876 bool *cast64_to_32, uint64_t *mask)
4b84d9b8 877{
650c70a9 878 tree type_size = TYPE_SIZE_UNIT (TREE_TYPE (gimple_get_lhs (stmt)));
7f0ce82a
KT
879 if (!tree_fits_uhwi_p (type_size))
880 return NULL;
881
4b84d9b8
JJ
882 /* The last parameter determines the depth search limit. It usually
883 correlates directly to the number n of bytes to be touched. We
0f507a36 884 increase that number by 2 * (log2(n) + 1) here in order to also
4b84d9b8
JJ
885 cover signed -> unsigned conversions of the src operand as can be seen
886 in libgcc, and for initial shift/and operation of the src operand. */
7f0ce82a 887 int limit = tree_to_uhwi (type_size);
0f507a36 888 limit += 2 * (1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit));
4b84d9b8
JJ
889 gimple *ins_stmt = find_bswap_or_nop_1 (stmt, n, limit);
890
891 if (!ins_stmt)
cd676dfa
JJ
892 {
893 if (gimple_assign_rhs_code (stmt) != CONSTRUCTOR
894 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
895 return NULL;
896 unsigned HOST_WIDE_INT sz = tree_to_uhwi (type_size) * BITS_PER_UNIT;
897 if (sz != 16 && sz != 32 && sz != 64)
898 return NULL;
899 tree rhs = gimple_assign_rhs1 (stmt);
9032d2b2
JJ
900 if (CONSTRUCTOR_NELTS (rhs) == 0)
901 return NULL;
cd676dfa
JJ
902 tree eltype = TREE_TYPE (TREE_TYPE (rhs));
903 unsigned HOST_WIDE_INT eltsz
904 = int_size_in_bytes (eltype) * BITS_PER_UNIT;
905 if (TYPE_PRECISION (eltype) != eltsz)
906 return NULL;
907 constructor_elt *elt;
908 unsigned int i;
909 tree type = build_nonstandard_integer_type (sz, 1);
910 FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs), i, elt)
911 {
912 if (TREE_CODE (elt->value) != SSA_NAME
913 || !INTEGRAL_TYPE_P (TREE_TYPE (elt->value)))
914 return NULL;
915 struct symbolic_number n1;
916 gimple *source_stmt
917 = find_bswap_or_nop_1 (SSA_NAME_DEF_STMT (elt->value), &n1,
918 limit - 1);
919
920 if (!source_stmt)
921 return NULL;
922
923 n1.type = type;
924 if (!n1.base_addr)
925 n1.range = sz / BITS_PER_UNIT;
926
927 if (i == 0)
928 {
929 ins_stmt = source_stmt;
930 *n = n1;
931 }
932 else
933 {
934 if (n->vuse != n1.vuse)
935 return NULL;
936
937 struct symbolic_number n0 = *n;
938
939 if (!BYTES_BIG_ENDIAN)
940 {
941 if (!do_shift_rotate (LSHIFT_EXPR, &n1, i * eltsz))
942 return NULL;
943 }
944 else if (!do_shift_rotate (LSHIFT_EXPR, &n0, eltsz))
945 return NULL;
946 ins_stmt
947 = perform_symbolic_merge (ins_stmt, &n0, source_stmt, &n1, n);
948
949 if (!ins_stmt)
950 return NULL;
951 }
952 }
953 }
4b84d9b8
JJ
954
955 uint64_t cmpxchg, cmpnop;
b320edc0 956 find_bswap_or_nop_finalize (n, &cmpxchg, &cmpnop, cast64_to_32);
4b84d9b8 957
dffec8eb
JJ
958 /* A complete byte swap should make the symbolic number to start with
959 the largest digit in the highest order byte. Unchanged symbolic
960 number indicates a read with same endianness as target architecture. */
b320edc0 961 *mask = ~(uint64_t) 0;
dffec8eb
JJ
962 if (n->n == cmpnop)
963 *bswap = false;
964 else if (n->n == cmpxchg)
965 *bswap = true;
966 else
b320edc0
JJ
967 {
968 int set = 0;
969 for (uint64_t msk = MARKER_MASK; msk; msk <<= BITS_PER_MARKER)
970 if ((n->n & msk) == 0)
971 *mask &= ~msk;
972 else if ((n->n & msk) == (cmpxchg & msk))
973 set++;
974 else
975 return NULL;
976 if (set < 2)
977 return NULL;
978 *bswap = true;
979 }
dffec8eb
JJ
980
981 /* Useless bit manipulation performed by code. */
982 if (!n->base_addr && n->n == cmpnop && n->n_ops == 1)
983 return NULL;
984
dffec8eb
JJ
985 return ins_stmt;
986}
987
988const pass_data pass_data_optimize_bswap =
989{
990 GIMPLE_PASS, /* type */
991 "bswap", /* name */
992 OPTGROUP_NONE, /* optinfo_flags */
993 TV_NONE, /* tv_id */
994 PROP_ssa, /* properties_required */
995 0, /* properties_provided */
996 0, /* properties_destroyed */
997 0, /* todo_flags_start */
998 0, /* todo_flags_finish */
999};
1000
1001class pass_optimize_bswap : public gimple_opt_pass
1002{
1003public:
1004 pass_optimize_bswap (gcc::context *ctxt)
1005 : gimple_opt_pass (pass_data_optimize_bswap, ctxt)
1006 {}
1007
1008 /* opt_pass methods: */
1009 virtual bool gate (function *)
1010 {
1011 return flag_expensive_optimizations && optimize && BITS_PER_UNIT == 8;
1012 }
1013
1014 virtual unsigned int execute (function *);
1015
1016}; // class pass_optimize_bswap
1017
d02a8b63
JJ
1018/* Helper function for bswap_replace. Build VIEW_CONVERT_EXPR from
1019 VAL to TYPE. If VAL has different type size, emit a NOP_EXPR cast
1020 first. */
1021
1022static tree
45ff1251
JJ
1023bswap_view_convert (gimple_stmt_iterator *gsi, tree type, tree val,
1024 bool before)
d02a8b63 1025{
a4001578
JJ
1026 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (val))
1027 || POINTER_TYPE_P (TREE_TYPE (val)));
d02a8b63
JJ
1028 if (TYPE_SIZE (type) != TYPE_SIZE (TREE_TYPE (val)))
1029 {
1030 HOST_WIDE_INT prec = TREE_INT_CST_LOW (TYPE_SIZE (type));
a4001578
JJ
1031 if (POINTER_TYPE_P (TREE_TYPE (val)))
1032 {
1033 gimple *g
1034 = gimple_build_assign (make_ssa_name (pointer_sized_int_node),
1035 NOP_EXPR, val);
45ff1251
JJ
1036 if (before)
1037 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1038 else
1039 gsi_insert_after (gsi, g, GSI_NEW_STMT);
a4001578
JJ
1040 val = gimple_assign_lhs (g);
1041 }
d02a8b63
JJ
1042 tree itype = build_nonstandard_integer_type (prec, 1);
1043 gimple *g = gimple_build_assign (make_ssa_name (itype), NOP_EXPR, val);
45ff1251
JJ
1044 if (before)
1045 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1046 else
1047 gsi_insert_after (gsi, g, GSI_NEW_STMT);
d02a8b63
JJ
1048 val = gimple_assign_lhs (g);
1049 }
1050 return build1 (VIEW_CONVERT_EXPR, type, val);
1051}
1052
dffec8eb 1053/* Perform the bswap optimization: replace the expression computed in the rhs
4b84d9b8
JJ
1054 of gsi_stmt (GSI) (or if NULL add instead of replace) by an equivalent
1055 bswap, load or load + bswap expression.
dffec8eb
JJ
1056 Which of these alternatives replace the rhs is given by N->base_addr (non
1057 null if a load is needed) and BSWAP. The type, VUSE and set-alias of the
1058 load to perform are also given in N while the builtin bswap invoke is given
4b84d9b8
JJ
1059 in FNDEL. Finally, if a load is involved, INS_STMT refers to one of the
1060 load statements involved to construct the rhs in gsi_stmt (GSI) and
1061 N->range gives the size of the rhs expression for maintaining some
1062 statistics.
dffec8eb 1063
4b84d9b8
JJ
1064 Note that if the replacement involve a load and if gsi_stmt (GSI) is
1065 non-NULL, that stmt is moved just after INS_STMT to do the load with the
1066 same VUSE which can lead to gsi_stmt (GSI) changing of basic block. */
dffec8eb 1067
4b84d9b8
JJ
1068tree
1069bswap_replace (gimple_stmt_iterator gsi, gimple *ins_stmt, tree fndecl,
dffec8eb 1070 tree bswap_type, tree load_type, struct symbolic_number *n,
b320edc0 1071 bool bswap, uint64_t mask)
dffec8eb 1072{
4b84d9b8 1073 tree src, tmp, tgt = NULL_TREE;
b320edc0 1074 gimple *bswap_stmt, *mask_stmt = NULL;
cd676dfa 1075 tree_code conv_code = NOP_EXPR;
dffec8eb 1076
4b84d9b8 1077 gimple *cur_stmt = gsi_stmt (gsi);
dffec8eb 1078 src = n->src;
4b84d9b8 1079 if (cur_stmt)
cd676dfa
JJ
1080 {
1081 tgt = gimple_assign_lhs (cur_stmt);
1082 if (gimple_assign_rhs_code (cur_stmt) == CONSTRUCTOR
1083 && tgt
1084 && VECTOR_TYPE_P (TREE_TYPE (tgt)))
1085 conv_code = VIEW_CONVERT_EXPR;
1086 }
dffec8eb
JJ
1087
1088 /* Need to load the value from memory first. */
1089 if (n->base_addr)
1090 {
4b84d9b8
JJ
1091 gimple_stmt_iterator gsi_ins = gsi;
1092 if (ins_stmt)
1093 gsi_ins = gsi_for_stmt (ins_stmt);
dffec8eb
JJ
1094 tree addr_expr, addr_tmp, val_expr, val_tmp;
1095 tree load_offset_ptr, aligned_load_type;
4b84d9b8
JJ
1096 gimple *load_stmt;
1097 unsigned align = get_object_alignment (src);
4a022c70 1098 poly_int64 load_offset = 0;
dffec8eb 1099
4b84d9b8
JJ
1100 if (cur_stmt)
1101 {
1102 basic_block ins_bb = gimple_bb (ins_stmt);
1103 basic_block cur_bb = gimple_bb (cur_stmt);
1104 if (!dominated_by_p (CDI_DOMINATORS, cur_bb, ins_bb))
1105 return NULL_TREE;
1106
1107 /* Move cur_stmt just before one of the load of the original
1108 to ensure it has the same VUSE. See PR61517 for what could
1109 go wrong. */
1110 if (gimple_bb (cur_stmt) != gimple_bb (ins_stmt))
1111 reset_flow_sensitive_info (gimple_assign_lhs (cur_stmt));
1112 gsi_move_before (&gsi, &gsi_ins);
1113 gsi = gsi_for_stmt (cur_stmt);
1114 }
1115 else
1116 gsi = gsi_ins;
dffec8eb
JJ
1117
1118 /* Compute address to load from and cast according to the size
1119 of the load. */
4b84d9b8 1120 addr_expr = build_fold_addr_expr (src);
dffec8eb 1121 if (is_gimple_mem_ref_addr (addr_expr))
4b84d9b8 1122 addr_tmp = unshare_expr (addr_expr);
dffec8eb
JJ
1123 else
1124 {
4b84d9b8
JJ
1125 addr_tmp = unshare_expr (n->base_addr);
1126 if (!is_gimple_mem_ref_addr (addr_tmp))
1127 addr_tmp = force_gimple_operand_gsi_1 (&gsi, addr_tmp,
1128 is_gimple_mem_ref_addr,
1129 NULL_TREE, true,
1130 GSI_SAME_STMT);
1131 load_offset = n->bytepos;
1132 if (n->offset)
1133 {
1134 tree off
1135 = force_gimple_operand_gsi (&gsi, unshare_expr (n->offset),
1136 true, NULL_TREE, true,
1137 GSI_SAME_STMT);
1138 gimple *stmt
1139 = gimple_build_assign (make_ssa_name (TREE_TYPE (addr_tmp)),
1140 POINTER_PLUS_EXPR, addr_tmp, off);
1141 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1142 addr_tmp = gimple_assign_lhs (stmt);
1143 }
dffec8eb
JJ
1144 }
1145
1146 /* Perform the load. */
1147 aligned_load_type = load_type;
1148 if (align < TYPE_ALIGN (load_type))
1149 aligned_load_type = build_aligned_type (load_type, align);
1150 load_offset_ptr = build_int_cst (n->alias_set, load_offset);
1151 val_expr = fold_build2 (MEM_REF, aligned_load_type, addr_tmp,
1152 load_offset_ptr);
1153
1154 if (!bswap)
1155 {
1156 if (n->range == 16)
1157 nop_stats.found_16bit++;
1158 else if (n->range == 32)
1159 nop_stats.found_32bit++;
1160 else
1161 {
1162 gcc_assert (n->range == 64);
1163 nop_stats.found_64bit++;
1164 }
1165
1166 /* Convert the result of load if necessary. */
4b84d9b8 1167 if (tgt && !useless_type_conversion_p (TREE_TYPE (tgt), load_type))
dffec8eb
JJ
1168 {
1169 val_tmp = make_temp_ssa_name (aligned_load_type, NULL,
1170 "load_dst");
1171 load_stmt = gimple_build_assign (val_tmp, val_expr);
1172 gimple_set_vuse (load_stmt, n->vuse);
1173 gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
cd676dfa 1174 if (conv_code == VIEW_CONVERT_EXPR)
45ff1251
JJ
1175 val_tmp = bswap_view_convert (&gsi, TREE_TYPE (tgt), val_tmp,
1176 true);
cd676dfa 1177 gimple_assign_set_rhs_with_ops (&gsi, conv_code, val_tmp);
4b84d9b8 1178 update_stmt (cur_stmt);
dffec8eb 1179 }
4b84d9b8 1180 else if (cur_stmt)
dffec8eb
JJ
1181 {
1182 gimple_assign_set_rhs_with_ops (&gsi, MEM_REF, val_expr);
1183 gimple_set_vuse (cur_stmt, n->vuse);
4b84d9b8
JJ
1184 update_stmt (cur_stmt);
1185 }
1186 else
1187 {
1188 tgt = make_ssa_name (load_type);
1189 cur_stmt = gimple_build_assign (tgt, MEM_REF, val_expr);
1190 gimple_set_vuse (cur_stmt, n->vuse);
1191 gsi_insert_before (&gsi, cur_stmt, GSI_SAME_STMT);
dffec8eb 1192 }
dffec8eb
JJ
1193
1194 if (dump_file)
1195 {
1196 fprintf (dump_file,
1197 "%d bit load in target endianness found at: ",
1198 (int) n->range);
1199 print_gimple_stmt (dump_file, cur_stmt, 0);
1200 }
4b84d9b8 1201 return tgt;
dffec8eb
JJ
1202 }
1203 else
1204 {
1205 val_tmp = make_temp_ssa_name (aligned_load_type, NULL, "load_dst");
1206 load_stmt = gimple_build_assign (val_tmp, val_expr);
1207 gimple_set_vuse (load_stmt, n->vuse);
1208 gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
1209 }
1210 src = val_tmp;
1211 }
1212 else if (!bswap)
1213 {
4b84d9b8
JJ
1214 gimple *g = NULL;
1215 if (tgt && !useless_type_conversion_p (TREE_TYPE (tgt), TREE_TYPE (src)))
dffec8eb
JJ
1216 {
1217 if (!is_gimple_val (src))
4b84d9b8 1218 return NULL_TREE;
cd676dfa 1219 if (conv_code == VIEW_CONVERT_EXPR)
45ff1251 1220 src = bswap_view_convert (&gsi, TREE_TYPE (tgt), src, true);
cd676dfa 1221 g = gimple_build_assign (tgt, conv_code, src);
dffec8eb 1222 }
4b84d9b8 1223 else if (cur_stmt)
dffec8eb 1224 g = gimple_build_assign (tgt, src);
4b84d9b8
JJ
1225 else
1226 tgt = src;
dffec8eb
JJ
1227 if (n->range == 16)
1228 nop_stats.found_16bit++;
1229 else if (n->range == 32)
1230 nop_stats.found_32bit++;
1231 else
1232 {
1233 gcc_assert (n->range == 64);
1234 nop_stats.found_64bit++;
1235 }
1236 if (dump_file)
1237 {
1238 fprintf (dump_file,
1239 "%d bit reshuffle in target endianness found at: ",
1240 (int) n->range);
4b84d9b8
JJ
1241 if (cur_stmt)
1242 print_gimple_stmt (dump_file, cur_stmt, 0);
1243 else
1244 {
4af78ef8 1245 print_generic_expr (dump_file, tgt, TDF_NONE);
4b84d9b8
JJ
1246 fprintf (dump_file, "\n");
1247 }
dffec8eb 1248 }
4b84d9b8
JJ
1249 if (cur_stmt)
1250 gsi_replace (&gsi, g, true);
1251 return tgt;
dffec8eb
JJ
1252 }
1253 else if (TREE_CODE (src) == BIT_FIELD_REF)
1254 src = TREE_OPERAND (src, 0);
1255
1256 if (n->range == 16)
1257 bswap_stats.found_16bit++;
1258 else if (n->range == 32)
1259 bswap_stats.found_32bit++;
1260 else
1261 {
1262 gcc_assert (n->range == 64);
1263 bswap_stats.found_64bit++;
1264 }
1265
1266 tmp = src;
1267
1268 /* Convert the src expression if necessary. */
1269 if (!useless_type_conversion_p (TREE_TYPE (tmp), bswap_type))
1270 {
1271 gimple *convert_stmt;
1272
1273 tmp = make_temp_ssa_name (bswap_type, NULL, "bswapsrc");
1274 convert_stmt = gimple_build_assign (tmp, NOP_EXPR, src);
1275 gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT);
1276 }
1277
1278 /* Canonical form for 16 bit bswap is a rotate expression. Only 16bit values
1279 are considered as rotation of 2N bit values by N bits is generally not
1280 equivalent to a bswap. Consider for instance 0x01020304 r>> 16 which
1281 gives 0x03040102 while a bswap for that value is 0x04030201. */
1282 if (bswap && n->range == 16)
1283 {
1284 tree count = build_int_cst (NULL, BITS_PER_UNIT);
1285 src = fold_build2 (LROTATE_EXPR, bswap_type, tmp, count);
1286 bswap_stmt = gimple_build_assign (NULL, src);
1287 }
1288 else
1289 bswap_stmt = gimple_build_call (fndecl, 1, tmp);
1290
4b84d9b8
JJ
1291 if (tgt == NULL_TREE)
1292 tgt = make_ssa_name (bswap_type);
dffec8eb
JJ
1293 tmp = tgt;
1294
b320edc0
JJ
1295 if (mask != ~(uint64_t) 0)
1296 {
1297 tree m = build_int_cst (bswap_type, mask);
1298 tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
1299 gimple_set_lhs (bswap_stmt, tmp);
1300 mask_stmt = gimple_build_assign (tgt, BIT_AND_EXPR, tmp, m);
1301 tmp = tgt;
1302 }
1303
dffec8eb
JJ
1304 /* Convert the result if necessary. */
1305 if (!useless_type_conversion_p (TREE_TYPE (tgt), bswap_type))
1306 {
dffec8eb 1307 tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
cd676dfa 1308 tree atmp = tmp;
45ff1251 1309 gimple_stmt_iterator gsi2 = gsi;
cd676dfa 1310 if (conv_code == VIEW_CONVERT_EXPR)
45ff1251
JJ
1311 atmp = bswap_view_convert (&gsi2, TREE_TYPE (tgt), tmp, false);
1312 gimple *convert_stmt = gimple_build_assign (tgt, conv_code, atmp);
1313 gsi_insert_after (&gsi2, convert_stmt, GSI_SAME_STMT);
dffec8eb
JJ
1314 }
1315
b320edc0 1316 gimple_set_lhs (mask_stmt ? mask_stmt : bswap_stmt, tmp);
dffec8eb
JJ
1317
1318 if (dump_file)
1319 {
1320 fprintf (dump_file, "%d bit bswap implementation found at: ",
1321 (int) n->range);
4b84d9b8
JJ
1322 if (cur_stmt)
1323 print_gimple_stmt (dump_file, cur_stmt, 0);
1324 else
1325 {
4af78ef8 1326 print_generic_expr (dump_file, tgt, TDF_NONE);
4b84d9b8
JJ
1327 fprintf (dump_file, "\n");
1328 }
dffec8eb
JJ
1329 }
1330
4b84d9b8
JJ
1331 if (cur_stmt)
1332 {
b320edc0
JJ
1333 if (mask_stmt)
1334 gsi_insert_after (&gsi, mask_stmt, GSI_SAME_STMT);
4b84d9b8
JJ
1335 gsi_insert_after (&gsi, bswap_stmt, GSI_SAME_STMT);
1336 gsi_remove (&gsi, true);
1337 }
1338 else
b320edc0
JJ
1339 {
1340 gsi_insert_before (&gsi, bswap_stmt, GSI_SAME_STMT);
1341 if (mask_stmt)
1342 gsi_insert_before (&gsi, mask_stmt, GSI_SAME_STMT);
1343 }
4b84d9b8 1344 return tgt;
dffec8eb
JJ
1345}
1346
a7553ad6
JJ
1347/* Try to optimize an assignment CUR_STMT with CONSTRUCTOR on the rhs
1348 using bswap optimizations. CDI_DOMINATORS need to be
1349 computed on entry. Return true if it has been optimized and
1350 TODO_update_ssa is needed. */
1351
1352static bool
1353maybe_optimize_vector_constructor (gimple *cur_stmt)
1354{
1355 tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
1356 struct symbolic_number n;
1357 bool bswap;
1358
1359 gcc_assert (is_gimple_assign (cur_stmt)
1360 && gimple_assign_rhs_code (cur_stmt) == CONSTRUCTOR);
1361
1362 tree rhs = gimple_assign_rhs1 (cur_stmt);
1363 if (!VECTOR_TYPE_P (TREE_TYPE (rhs))
1364 || !INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (rhs)))
1365 || gimple_assign_lhs (cur_stmt) == NULL_TREE)
1366 return false;
1367
1368 HOST_WIDE_INT sz = int_size_in_bytes (TREE_TYPE (rhs)) * BITS_PER_UNIT;
1369 switch (sz)
1370 {
1371 case 16:
1372 load_type = bswap_type = uint16_type_node;
1373 break;
1374 case 32:
1375 if (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
1376 && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing)
1377 {
1378 load_type = uint32_type_node;
1379 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1380 bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1381 }
1382 else
1383 return false;
1384 break;
1385 case 64:
1386 if (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
1387 && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
1388 || (word_mode == SImode
1389 && builtin_decl_explicit_p (BUILT_IN_BSWAP32)
1390 && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing)))
1391 {
1392 load_type = uint64_type_node;
1393 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1394 bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1395 }
1396 else
1397 return false;
1398 break;
1399 default:
1400 return false;
1401 }
1402
b320edc0
JJ
1403 bool cast64_to_32;
1404 uint64_t mask;
1405 gimple *ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap,
1406 &cast64_to_32, &mask);
1407 if (!ins_stmt
1408 || n.range != (unsigned HOST_WIDE_INT) sz
1409 || cast64_to_32
1410 || mask != ~(uint64_t) 0)
a7553ad6
JJ
1411 return false;
1412
1413 if (bswap && !fndecl && n.range != 16)
1414 return false;
1415
1416 memset (&nop_stats, 0, sizeof (nop_stats));
1417 memset (&bswap_stats, 0, sizeof (bswap_stats));
1418 return bswap_replace (gsi_for_stmt (cur_stmt), ins_stmt, fndecl,
b320edc0 1419 bswap_type, load_type, &n, bswap, mask) != NULL_TREE;
a7553ad6
JJ
1420}
1421
dffec8eb
JJ
1422/* Find manual byte swap implementations as well as load in a given
1423 endianness. Byte swaps are turned into a bswap builtin invokation
1424 while endian loads are converted to bswap builtin invokation or
1425 simple load according to the target endianness. */
1426
1427unsigned int
1428pass_optimize_bswap::execute (function *fun)
1429{
1430 basic_block bb;
1431 bool bswap32_p, bswap64_p;
1432 bool changed = false;
1433 tree bswap32_type = NULL_TREE, bswap64_type = NULL_TREE;
1434
1435 bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
1436 && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing);
1437 bswap64_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
1438 && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
1439 || (bswap32_p && word_mode == SImode)));
1440
1441 /* Determine the argument type of the builtins. The code later on
1442 assumes that the return and argument type are the same. */
1443 if (bswap32_p)
1444 {
1445 tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1446 bswap32_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1447 }
1448
1449 if (bswap64_p)
1450 {
1451 tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1452 bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1453 }
1454
1455 memset (&nop_stats, 0, sizeof (nop_stats));
1456 memset (&bswap_stats, 0, sizeof (bswap_stats));
1457 calculate_dominance_info (CDI_DOMINATORS);
1458
1459 FOR_EACH_BB_FN (bb, fun)
1460 {
1461 gimple_stmt_iterator gsi;
1462
1463 /* We do a reverse scan for bswap patterns to make sure we get the
1464 widest match. As bswap pattern matching doesn't handle previously
1465 inserted smaller bswap replacements as sub-patterns, the wider
1466 variant wouldn't be detected. */
1467 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi);)
1468 {
1469 gimple *ins_stmt, *cur_stmt = gsi_stmt (gsi);
1470 tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
1471 enum tree_code code;
1472 struct symbolic_number n;
b320edc0
JJ
1473 bool bswap, cast64_to_32;
1474 uint64_t mask;
dffec8eb
JJ
1475
1476 /* This gsi_prev (&gsi) is not part of the for loop because cur_stmt
1477 might be moved to a different basic block by bswap_replace and gsi
1478 must not points to it if that's the case. Moving the gsi_prev
1479 there make sure that gsi points to the statement previous to
1480 cur_stmt while still making sure that all statements are
1481 considered in this basic block. */
1482 gsi_prev (&gsi);
1483
1484 if (!is_gimple_assign (cur_stmt))
1485 continue;
1486
1487 code = gimple_assign_rhs_code (cur_stmt);
1488 switch (code)
1489 {
1490 case LROTATE_EXPR:
1491 case RROTATE_EXPR:
1492 if (!tree_fits_uhwi_p (gimple_assign_rhs2 (cur_stmt))
1493 || tree_to_uhwi (gimple_assign_rhs2 (cur_stmt))
1494 % BITS_PER_UNIT)
1495 continue;
1496 /* Fall through. */
1497 case BIT_IOR_EXPR:
1498 break;
cd676dfa
JJ
1499 case CONSTRUCTOR:
1500 {
1501 tree rhs = gimple_assign_rhs1 (cur_stmt);
1502 if (VECTOR_TYPE_P (TREE_TYPE (rhs))
1503 && INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (rhs))))
1504 break;
1505 }
1506 continue;
dffec8eb
JJ
1507 default:
1508 continue;
1509 }
1510
b320edc0
JJ
1511 ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap,
1512 &cast64_to_32, &mask);
dffec8eb
JJ
1513
1514 if (!ins_stmt)
1515 continue;
1516
1517 switch (n.range)
1518 {
1519 case 16:
1520 /* Already in canonical form, nothing to do. */
1521 if (code == LROTATE_EXPR || code == RROTATE_EXPR)
1522 continue;
1523 load_type = bswap_type = uint16_type_node;
1524 break;
1525 case 32:
1526 load_type = uint32_type_node;
1527 if (bswap32_p)
1528 {
1529 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1530 bswap_type = bswap32_type;
1531 }
1532 break;
1533 case 64:
1534 load_type = uint64_type_node;
1535 if (bswap64_p)
1536 {
1537 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1538 bswap_type = bswap64_type;
1539 }
1540 break;
1541 default:
1542 continue;
1543 }
1544
1545 if (bswap && !fndecl && n.range != 16)
1546 continue;
1547
4b84d9b8 1548 if (bswap_replace (gsi_for_stmt (cur_stmt), ins_stmt, fndecl,
b320edc0 1549 bswap_type, load_type, &n, bswap, mask))
dffec8eb
JJ
1550 changed = true;
1551 }
1552 }
1553
1554 statistics_counter_event (fun, "16-bit nop implementations found",
1555 nop_stats.found_16bit);
1556 statistics_counter_event (fun, "32-bit nop implementations found",
1557 nop_stats.found_32bit);
1558 statistics_counter_event (fun, "64-bit nop implementations found",
1559 nop_stats.found_64bit);
1560 statistics_counter_event (fun, "16-bit bswap implementations found",
1561 bswap_stats.found_16bit);
1562 statistics_counter_event (fun, "32-bit bswap implementations found",
1563 bswap_stats.found_32bit);
1564 statistics_counter_event (fun, "64-bit bswap implementations found",
1565 bswap_stats.found_64bit);
1566
1567 return (changed ? TODO_update_ssa : 0);
1568}
1569
1570} // anon namespace
1571
1572gimple_opt_pass *
1573make_pass_optimize_bswap (gcc::context *ctxt)
1574{
1575 return new pass_optimize_bswap (ctxt);
1576}
1577
1578namespace {
1579
245f6de1 1580/* Struct recording one operand for the store, which is either a constant,
c94c3532
EB
1581 then VAL represents the constant and all the other fields are zero, or
1582 a memory load, then VAL represents the reference, BASE_ADDR is non-NULL
1583 and the other fields also reflect the memory load, or an SSA name, then
1584 VAL represents the SSA name and all the other fields are zero, */
245f6de1 1585
6c1dae73 1586class store_operand_info
245f6de1 1587{
6c1dae73 1588public:
245f6de1
JJ
1589 tree val;
1590 tree base_addr;
8a91d545
RS
1591 poly_uint64 bitsize;
1592 poly_uint64 bitpos;
1593 poly_uint64 bitregion_start;
1594 poly_uint64 bitregion_end;
245f6de1 1595 gimple *stmt;
383ac8dc 1596 bool bit_not_p;
245f6de1
JJ
1597 store_operand_info ();
1598};
1599
1600store_operand_info::store_operand_info ()
1601 : val (NULL_TREE), base_addr (NULL_TREE), bitsize (0), bitpos (0),
383ac8dc 1602 bitregion_start (0), bitregion_end (0), stmt (NULL), bit_not_p (false)
245f6de1
JJ
1603{
1604}
1605
f663d9ad
KT
1606/* Struct recording the information about a single store of an immediate
1607 to memory. These are created in the first phase and coalesced into
1608 merged_store_group objects in the second phase. */
1609
6c1dae73 1610class store_immediate_info
f663d9ad 1611{
6c1dae73 1612public:
f663d9ad
KT
1613 unsigned HOST_WIDE_INT bitsize;
1614 unsigned HOST_WIDE_INT bitpos;
a62b3dc5
JJ
1615 unsigned HOST_WIDE_INT bitregion_start;
1616 /* This is one past the last bit of the bit region. */
1617 unsigned HOST_WIDE_INT bitregion_end;
f663d9ad
KT
1618 gimple *stmt;
1619 unsigned int order;
e362a897
EB
1620 /* INTEGER_CST for constant store, STRING_CST for string store,
1621 MEM_REF for memory copy, BIT_*_EXPR for logical bitwise operation,
1622 BIT_INSERT_EXPR for bit insertion.
4b84d9b8
JJ
1623 LROTATE_EXPR if it can be only bswap optimized and
1624 ops are not really meaningful.
1625 NOP_EXPR if bswap optimization detected identity, ops
1626 are not meaningful. */
245f6de1 1627 enum tree_code rhs_code;
4b84d9b8
JJ
1628 /* Two fields for bswap optimization purposes. */
1629 struct symbolic_number n;
1630 gimple *ins_stmt;
127ef369 1631 /* True if BIT_{AND,IOR,XOR}_EXPR result is inverted before storing. */
d60edaba 1632 bool bit_not_p;
127ef369
JJ
1633 /* True if ops have been swapped and thus ops[1] represents
1634 rhs1 of BIT_{AND,IOR,XOR}_EXPR and ops[0] represents rhs2. */
1635 bool ops_swapped_p;
629387a6
EB
1636 /* The index number of the landing pad, or 0 if there is none. */
1637 int lp_nr;
245f6de1
JJ
1638 /* Operands. For BIT_*_EXPR rhs_code both operands are used, otherwise
1639 just the first one. */
1640 store_operand_info ops[2];
b5926e23 1641 store_immediate_info (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
a62b3dc5 1642 unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
4b84d9b8 1643 gimple *, unsigned int, enum tree_code,
629387a6 1644 struct symbolic_number &, gimple *, bool, int,
245f6de1
JJ
1645 const store_operand_info &,
1646 const store_operand_info &);
f663d9ad
KT
1647};
1648
1649store_immediate_info::store_immediate_info (unsigned HOST_WIDE_INT bs,
b5926e23 1650 unsigned HOST_WIDE_INT bp,
a62b3dc5
JJ
1651 unsigned HOST_WIDE_INT brs,
1652 unsigned HOST_WIDE_INT bre,
b5926e23 1653 gimple *st,
245f6de1
JJ
1654 unsigned int ord,
1655 enum tree_code rhscode,
4b84d9b8
JJ
1656 struct symbolic_number &nr,
1657 gimple *ins_stmtp,
d60edaba 1658 bool bitnotp,
629387a6 1659 int nr2,
245f6de1
JJ
1660 const store_operand_info &op0r,
1661 const store_operand_info &op1r)
a62b3dc5 1662 : bitsize (bs), bitpos (bp), bitregion_start (brs), bitregion_end (bre),
4b84d9b8 1663 stmt (st), order (ord), rhs_code (rhscode), n (nr),
629387a6 1664 ins_stmt (ins_stmtp), bit_not_p (bitnotp), ops_swapped_p (false),
4bc6fb21 1665 lp_nr (nr2), ops { op0r, op1r }
245f6de1
JJ
1666{
1667}
f663d9ad
KT
1668
1669/* Struct representing a group of stores to contiguous memory locations.
1670 These are produced by the second phase (coalescing) and consumed in the
1671 third phase that outputs the widened stores. */
1672
6c1dae73 1673class merged_store_group
f663d9ad 1674{
6c1dae73 1675public:
f663d9ad
KT
1676 unsigned HOST_WIDE_INT start;
1677 unsigned HOST_WIDE_INT width;
a62b3dc5
JJ
1678 unsigned HOST_WIDE_INT bitregion_start;
1679 unsigned HOST_WIDE_INT bitregion_end;
1680 /* The size of the allocated memory for val and mask. */
f663d9ad 1681 unsigned HOST_WIDE_INT buf_size;
a62b3dc5 1682 unsigned HOST_WIDE_INT align_base;
8a91d545 1683 poly_uint64 load_align_base[2];
f663d9ad
KT
1684
1685 unsigned int align;
245f6de1 1686 unsigned int load_align[2];
f663d9ad
KT
1687 unsigned int first_order;
1688 unsigned int last_order;
7f5a3982 1689 bool bit_insertion;
e362a897 1690 bool string_concatenation;
18e0c3d1 1691 bool only_constants;
1b3c9813 1692 bool consecutive;
18e0c3d1 1693 unsigned int first_nonmergeable_order;
629387a6 1694 int lp_nr;
f663d9ad 1695
a62b3dc5 1696 auto_vec<store_immediate_info *> stores;
f663d9ad
KT
1697 /* We record the first and last original statements in the sequence because
1698 we'll need their vuse/vdef and replacement position. It's easier to keep
1699 track of them separately as 'stores' is reordered by apply_stores. */
1700 gimple *last_stmt;
1701 gimple *first_stmt;
1702 unsigned char *val;
a62b3dc5 1703 unsigned char *mask;
f663d9ad
KT
1704
1705 merged_store_group (store_immediate_info *);
1706 ~merged_store_group ();
7f5a3982 1707 bool can_be_merged_into (store_immediate_info *);
f663d9ad
KT
1708 void merge_into (store_immediate_info *);
1709 void merge_overlapping (store_immediate_info *);
1710 bool apply_stores ();
a62b3dc5
JJ
1711private:
1712 void do_merge (store_immediate_info *);
f663d9ad
KT
1713};
1714
1715/* Debug helper. Dump LEN elements of byte array PTR to FD in hex. */
1716
1717static void
1718dump_char_array (FILE *fd, unsigned char *ptr, unsigned int len)
1719{
1720 if (!fd)
1721 return;
1722
1723 for (unsigned int i = 0; i < len; i++)
c94c3532 1724 fprintf (fd, "%02x ", ptr[i]);
f663d9ad
KT
1725 fprintf (fd, "\n");
1726}
1727
f663d9ad
KT
1728/* Clear out LEN bits starting from bit START in the byte array
1729 PTR. This clears the bits to the *right* from START.
1730 START must be within [0, BITS_PER_UNIT) and counts starting from
1731 the least significant bit. */
1732
1733static void
1734clear_bit_region_be (unsigned char *ptr, unsigned int start,
1735 unsigned int len)
1736{
1737 if (len == 0)
1738 return;
1739 /* Clear len bits to the right of start. */
1740 else if (len <= start + 1)
1741 {
1742 unsigned char mask = (~(~0U << len));
1743 mask = mask << (start + 1U - len);
1744 ptr[0] &= ~mask;
1745 }
1746 else if (start != BITS_PER_UNIT - 1)
1747 {
1748 clear_bit_region_be (ptr, start, (start % BITS_PER_UNIT) + 1);
1749 clear_bit_region_be (ptr + 1, BITS_PER_UNIT - 1,
1750 len - (start % BITS_PER_UNIT) - 1);
1751 }
1752 else if (start == BITS_PER_UNIT - 1
1753 && len > BITS_PER_UNIT)
1754 {
1755 unsigned int nbytes = len / BITS_PER_UNIT;
a62b3dc5 1756 memset (ptr, 0, nbytes);
f663d9ad
KT
1757 if (len % BITS_PER_UNIT != 0)
1758 clear_bit_region_be (ptr + nbytes, BITS_PER_UNIT - 1,
1759 len % BITS_PER_UNIT);
1760 }
1761 else
1762 gcc_unreachable ();
1763}
1764
1765/* In the byte array PTR clear the bit region starting at bit
1766 START and is LEN bits wide.
1767 For regions spanning multiple bytes do this recursively until we reach
1768 zero LEN or a region contained within a single byte. */
1769
1770static void
1771clear_bit_region (unsigned char *ptr, unsigned int start,
1772 unsigned int len)
1773{
1774 /* Degenerate base case. */
1775 if (len == 0)
1776 return;
1777 else if (start >= BITS_PER_UNIT)
1778 clear_bit_region (ptr + 1, start - BITS_PER_UNIT, len);
1779 /* Second base case. */
1780 else if ((start + len) <= BITS_PER_UNIT)
1781 {
46a61395 1782 unsigned char mask = (~0U) << (unsigned char) (BITS_PER_UNIT - len);
f663d9ad
KT
1783 mask >>= BITS_PER_UNIT - (start + len);
1784
1785 ptr[0] &= ~mask;
1786
1787 return;
1788 }
1789 /* Clear most significant bits in a byte and proceed with the next byte. */
1790 else if (start != 0)
1791 {
1792 clear_bit_region (ptr, start, BITS_PER_UNIT - start);
1f069ef5 1793 clear_bit_region (ptr + 1, 0, len - (BITS_PER_UNIT - start));
f663d9ad
KT
1794 }
1795 /* Whole bytes need to be cleared. */
1796 else if (start == 0 && len > BITS_PER_UNIT)
1797 {
1798 unsigned int nbytes = len / BITS_PER_UNIT;
a848c710
KT
1799 /* We could recurse on each byte but we clear whole bytes, so a simple
1800 memset will do. */
46a61395 1801 memset (ptr, '\0', nbytes);
f663d9ad
KT
1802 /* Clear the remaining sub-byte region if there is one. */
1803 if (len % BITS_PER_UNIT != 0)
1804 clear_bit_region (ptr + nbytes, 0, len % BITS_PER_UNIT);
1805 }
1806 else
1807 gcc_unreachable ();
1808}
1809
1810/* Write BITLEN bits of EXPR to the byte array PTR at
1811 bit position BITPOS. PTR should contain TOTAL_BYTES elements.
1812 Return true if the operation succeeded. */
1813
1814static bool
1815encode_tree_to_bitpos (tree expr, unsigned char *ptr, int bitlen, int bitpos,
46a61395 1816 unsigned int total_bytes)
f663d9ad
KT
1817{
1818 unsigned int first_byte = bitpos / BITS_PER_UNIT;
ad1de652
JJ
1819 bool sub_byte_op_p = ((bitlen % BITS_PER_UNIT)
1820 || (bitpos % BITS_PER_UNIT)
f4b31647 1821 || !int_mode_for_size (bitlen, 0).exists ());
3afd514b
JJ
1822 bool empty_ctor_p
1823 = (TREE_CODE (expr) == CONSTRUCTOR
1824 && CONSTRUCTOR_NELTS (expr) == 0
1825 && TYPE_SIZE_UNIT (TREE_TYPE (expr))
1826 && tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (expr))));
f663d9ad
KT
1827
1828 if (!sub_byte_op_p)
3afd514b
JJ
1829 {
1830 if (first_byte >= total_bytes)
1831 return false;
1832 total_bytes -= first_byte;
1833 if (empty_ctor_p)
1834 {
1835 unsigned HOST_WIDE_INT rhs_bytes
1836 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)));
1837 if (rhs_bytes > total_bytes)
1838 return false;
1839 memset (ptr + first_byte, '\0', rhs_bytes);
1840 return true;
1841 }
1842 return native_encode_expr (expr, ptr + first_byte, total_bytes) != 0;
1843 }
f663d9ad
KT
1844
1845 /* LITTLE-ENDIAN
1846 We are writing a non byte-sized quantity or at a position that is not
1847 at a byte boundary.
1848 |--------|--------|--------| ptr + first_byte
1849 ^ ^
1850 xxx xxxxxxxx xxx< bp>
1851 |______EXPR____|
1852
46a61395 1853 First native_encode_expr EXPR into a temporary buffer and shift each
f663d9ad
KT
1854 byte in the buffer by 'bp' (carrying the bits over as necessary).
1855 |00000000|00xxxxxx|xxxxxxxx| << bp = |000xxxxx|xxxxxxxx|xxx00000|
1856 <------bitlen---->< bp>
1857 Then we clear the destination bits:
1858 |---00000|00000000|000-----| ptr + first_byte
1859 <-------bitlen--->< bp>
1860
1861 Finally we ORR the bytes of the shifted EXPR into the cleared region:
1862 |---xxxxx||xxxxxxxx||xxx-----| ptr + first_byte.
1863
1864 BIG-ENDIAN
1865 We are writing a non byte-sized quantity or at a position that is not
1866 at a byte boundary.
1867 ptr + first_byte |--------|--------|--------|
1868 ^ ^
1869 <bp >xxx xxxxxxxx xxx
1870 |_____EXPR_____|
1871
46a61395 1872 First native_encode_expr EXPR into a temporary buffer and shift each
f663d9ad
KT
1873 byte in the buffer to the right by (carrying the bits over as necessary).
1874 We shift by as much as needed to align the most significant bit of EXPR
1875 with bitpos:
1876 |00xxxxxx|xxxxxxxx| >> 3 = |00000xxx|xxxxxxxx|xxxxx000|
1877 <---bitlen----> <bp ><-----bitlen----->
1878 Then we clear the destination bits:
1879 ptr + first_byte |-----000||00000000||00000---|
1880 <bp ><-------bitlen----->
1881
1882 Finally we ORR the bytes of the shifted EXPR into the cleared region:
1883 ptr + first_byte |---xxxxx||xxxxxxxx||xxx-----|.
1884 The awkwardness comes from the fact that bitpos is counted from the
1885 most significant bit of a byte. */
1886
ef1d3b57
RS
1887 /* We must be dealing with fixed-size data at this point, since the
1888 total size is also fixed. */
3afd514b
JJ
1889 unsigned int byte_size;
1890 if (empty_ctor_p)
1891 {
1892 unsigned HOST_WIDE_INT rhs_bytes
1893 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)));
1894 if (rhs_bytes > total_bytes)
1895 return false;
1896 byte_size = rhs_bytes;
1897 }
1898 else
1899 {
1900 fixed_size_mode mode
1901 = as_a <fixed_size_mode> (TYPE_MODE (TREE_TYPE (expr)));
e362a897
EB
1902 byte_size
1903 = mode == BLKmode
1904 ? tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)))
1905 : GET_MODE_SIZE (mode);
3afd514b 1906 }
f663d9ad 1907 /* Allocate an extra byte so that we have space to shift into. */
3afd514b 1908 byte_size++;
f663d9ad 1909 unsigned char *tmpbuf = XALLOCAVEC (unsigned char, byte_size);
46a61395 1910 memset (tmpbuf, '\0', byte_size);
f663d9ad 1911 /* The store detection code should only have allowed constants that are
3afd514b
JJ
1912 accepted by native_encode_expr or empty ctors. */
1913 if (!empty_ctor_p
1914 && native_encode_expr (expr, tmpbuf, byte_size - 1) == 0)
f663d9ad
KT
1915 gcc_unreachable ();
1916
1917 /* The native_encode_expr machinery uses TYPE_MODE to determine how many
1918 bytes to write. This means it can write more than
1919 ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT bytes (for example
1920 write 8 bytes for a bitlen of 40). Skip the bytes that are not within
1921 bitlen and zero out the bits that are not relevant as well (that may
1922 contain a sign bit due to sign-extension). */
1923 unsigned int padding
1924 = byte_size - ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT - 1;
ad1de652
JJ
1925 /* On big-endian the padding is at the 'front' so just skip the initial
1926 bytes. */
1927 if (BYTES_BIG_ENDIAN)
1928 tmpbuf += padding;
1929
1930 byte_size -= padding;
1931
1932 if (bitlen % BITS_PER_UNIT != 0)
f663d9ad 1933 {
4b2c06f4 1934 if (BYTES_BIG_ENDIAN)
ad1de652
JJ
1935 clear_bit_region_be (tmpbuf, BITS_PER_UNIT - 1,
1936 BITS_PER_UNIT - (bitlen % BITS_PER_UNIT));
1937 else
1938 clear_bit_region (tmpbuf, bitlen,
1939 byte_size * BITS_PER_UNIT - bitlen);
f663d9ad 1940 }
ad1de652
JJ
1941 /* Left shifting relies on the last byte being clear if bitlen is
1942 a multiple of BITS_PER_UNIT, which might not be clear if
1943 there are padding bytes. */
1944 else if (!BYTES_BIG_ENDIAN)
1945 tmpbuf[byte_size - 1] = '\0';
f663d9ad
KT
1946
1947 /* Clear the bit region in PTR where the bits from TMPBUF will be
46a61395 1948 inserted into. */
f663d9ad
KT
1949 if (BYTES_BIG_ENDIAN)
1950 clear_bit_region_be (ptr + first_byte,
1951 BITS_PER_UNIT - 1 - (bitpos % BITS_PER_UNIT), bitlen);
1952 else
1953 clear_bit_region (ptr + first_byte, bitpos % BITS_PER_UNIT, bitlen);
1954
1955 int shift_amnt;
1956 int bitlen_mod = bitlen % BITS_PER_UNIT;
1957 int bitpos_mod = bitpos % BITS_PER_UNIT;
1958
1959 bool skip_byte = false;
1960 if (BYTES_BIG_ENDIAN)
1961 {
1962 /* BITPOS and BITLEN are exactly aligned and no shifting
1963 is necessary. */
1964 if (bitpos_mod + bitlen_mod == BITS_PER_UNIT
1965 || (bitpos_mod == 0 && bitlen_mod == 0))
1966 shift_amnt = 0;
1967 /* |. . . . . . . .|
1968 <bp > <blen >.
1969 We always shift right for BYTES_BIG_ENDIAN so shift the beginning
1970 of the value until it aligns with 'bp' in the next byte over. */
1971 else if (bitpos_mod + bitlen_mod < BITS_PER_UNIT)
1972 {
1973 shift_amnt = bitlen_mod + bitpos_mod;
1974 skip_byte = bitlen_mod != 0;
1975 }
1976 /* |. . . . . . . .|
1977 <----bp--->
1978 <---blen---->.
1979 Shift the value right within the same byte so it aligns with 'bp'. */
1980 else
1981 shift_amnt = bitlen_mod + bitpos_mod - BITS_PER_UNIT;
1982 }
1983 else
1984 shift_amnt = bitpos % BITS_PER_UNIT;
1985
1986 /* Create the shifted version of EXPR. */
1987 if (!BYTES_BIG_ENDIAN)
46a61395 1988 {
8aba425f 1989 shift_bytes_in_array_left (tmpbuf, byte_size, shift_amnt);
46a61395
JJ
1990 if (shift_amnt == 0)
1991 byte_size--;
1992 }
f663d9ad
KT
1993 else
1994 {
1995 gcc_assert (BYTES_BIG_ENDIAN);
1996 shift_bytes_in_array_right (tmpbuf, byte_size, shift_amnt);
1997 /* If shifting right forced us to move into the next byte skip the now
1998 empty byte. */
1999 if (skip_byte)
2000 {
2001 tmpbuf++;
2002 byte_size--;
2003 }
2004 }
2005
2006 /* Insert the bits from TMPBUF. */
2007 for (unsigned int i = 0; i < byte_size; i++)
2008 ptr[first_byte + i] |= tmpbuf[i];
2009
2010 return true;
2011}
2012
2013/* Sorting function for store_immediate_info objects.
2014 Sorts them by bitposition. */
2015
2016static int
2017sort_by_bitpos (const void *x, const void *y)
2018{
2019 store_immediate_info *const *tmp = (store_immediate_info * const *) x;
2020 store_immediate_info *const *tmp2 = (store_immediate_info * const *) y;
2021
109cca3b 2022 if ((*tmp)->bitpos < (*tmp2)->bitpos)
f663d9ad
KT
2023 return -1;
2024 else if ((*tmp)->bitpos > (*tmp2)->bitpos)
2025 return 1;
109cca3b 2026 else
0f0027d1
KT
2027 /* If they are the same let's use the order which is guaranteed to
2028 be different. */
2029 return (*tmp)->order - (*tmp2)->order;
f663d9ad
KT
2030}
2031
2032/* Sorting function for store_immediate_info objects.
2033 Sorts them by the order field. */
2034
2035static int
2036sort_by_order (const void *x, const void *y)
2037{
2038 store_immediate_info *const *tmp = (store_immediate_info * const *) x;
2039 store_immediate_info *const *tmp2 = (store_immediate_info * const *) y;
2040
2041 if ((*tmp)->order < (*tmp2)->order)
2042 return -1;
2043 else if ((*tmp)->order > (*tmp2)->order)
2044 return 1;
2045
2046 gcc_unreachable ();
2047}
2048
2049/* Initialize a merged_store_group object from a store_immediate_info
2050 object. */
2051
2052merged_store_group::merged_store_group (store_immediate_info *info)
2053{
2054 start = info->bitpos;
2055 width = info->bitsize;
a62b3dc5
JJ
2056 bitregion_start = info->bitregion_start;
2057 bitregion_end = info->bitregion_end;
f663d9ad
KT
2058 /* VAL has memory allocated for it in apply_stores once the group
2059 width has been finalized. */
2060 val = NULL;
a62b3dc5 2061 mask = NULL;
e362a897
EB
2062 bit_insertion = info->rhs_code == BIT_INSERT_EXPR;
2063 string_concatenation = info->rhs_code == STRING_CST;
18e0c3d1 2064 only_constants = info->rhs_code == INTEGER_CST;
1b3c9813 2065 consecutive = true;
18e0c3d1 2066 first_nonmergeable_order = ~0U;
629387a6 2067 lp_nr = info->lp_nr;
a62b3dc5
JJ
2068 unsigned HOST_WIDE_INT align_bitpos = 0;
2069 get_object_alignment_1 (gimple_assign_lhs (info->stmt),
2070 &align, &align_bitpos);
2071 align_base = start - align_bitpos;
245f6de1
JJ
2072 for (int i = 0; i < 2; ++i)
2073 {
2074 store_operand_info &op = info->ops[i];
2075 if (op.base_addr == NULL_TREE)
2076 {
2077 load_align[i] = 0;
2078 load_align_base[i] = 0;
2079 }
2080 else
2081 {
2082 get_object_alignment_1 (op.val, &load_align[i], &align_bitpos);
2083 load_align_base[i] = op.bitpos - align_bitpos;
2084 }
2085 }
f663d9ad
KT
2086 stores.create (1);
2087 stores.safe_push (info);
2088 last_stmt = info->stmt;
2089 last_order = info->order;
2090 first_stmt = last_stmt;
2091 first_order = last_order;
2092 buf_size = 0;
2093}
2094
2095merged_store_group::~merged_store_group ()
2096{
2097 if (val)
2098 XDELETEVEC (val);
2099}
2100
7f5a3982
EB
2101/* Return true if the store described by INFO can be merged into the group. */
2102
2103bool
2104merged_store_group::can_be_merged_into (store_immediate_info *info)
2105{
2106 /* Do not merge bswap patterns. */
2107 if (info->rhs_code == LROTATE_EXPR)
2108 return false;
2109
629387a6
EB
2110 if (info->lp_nr != lp_nr)
2111 return false;
2112
7f5a3982
EB
2113 /* The canonical case. */
2114 if (info->rhs_code == stores[0]->rhs_code)
2115 return true;
2116
e362a897 2117 /* BIT_INSERT_EXPR is compatible with INTEGER_CST if no STRING_CST. */
7f5a3982 2118 if (info->rhs_code == BIT_INSERT_EXPR && stores[0]->rhs_code == INTEGER_CST)
e362a897 2119 return !string_concatenation;
7f5a3982
EB
2120
2121 if (stores[0]->rhs_code == BIT_INSERT_EXPR && info->rhs_code == INTEGER_CST)
e362a897 2122 return !string_concatenation;
7f5a3982 2123
ed01d707
EB
2124 /* We can turn MEM_REF into BIT_INSERT_EXPR for bit-field stores, but do it
2125 only for small regions since this can generate a lot of instructions. */
7f5a3982
EB
2126 if (info->rhs_code == MEM_REF
2127 && (stores[0]->rhs_code == INTEGER_CST
2128 || stores[0]->rhs_code == BIT_INSERT_EXPR)
2129 && info->bitregion_start == stores[0]->bitregion_start
ed01d707 2130 && info->bitregion_end == stores[0]->bitregion_end
2815558a 2131 && info->bitregion_end - info->bitregion_start <= MAX_FIXED_MODE_SIZE)
e362a897 2132 return !string_concatenation;
7f5a3982
EB
2133
2134 if (stores[0]->rhs_code == MEM_REF
2135 && (info->rhs_code == INTEGER_CST
2136 || info->rhs_code == BIT_INSERT_EXPR)
2137 && info->bitregion_start == stores[0]->bitregion_start
ed01d707 2138 && info->bitregion_end == stores[0]->bitregion_end
2815558a 2139 && info->bitregion_end - info->bitregion_start <= MAX_FIXED_MODE_SIZE)
e362a897
EB
2140 return !string_concatenation;
2141
2142 /* STRING_CST is compatible with INTEGER_CST if no BIT_INSERT_EXPR. */
2143 if (info->rhs_code == STRING_CST
2144 && stores[0]->rhs_code == INTEGER_CST
2145 && stores[0]->bitsize == CHAR_BIT)
2146 return !bit_insertion;
2147
2148 if (stores[0]->rhs_code == STRING_CST
2149 && info->rhs_code == INTEGER_CST
2150 && info->bitsize == CHAR_BIT)
2151 return !bit_insertion;
7f5a3982
EB
2152
2153 return false;
2154}
2155
a62b3dc5
JJ
2156/* Helper method for merge_into and merge_overlapping to do
2157 the common part. */
7f5a3982 2158
f663d9ad 2159void
a62b3dc5 2160merged_store_group::do_merge (store_immediate_info *info)
f663d9ad 2161{
a62b3dc5
JJ
2162 bitregion_start = MIN (bitregion_start, info->bitregion_start);
2163 bitregion_end = MAX (bitregion_end, info->bitregion_end);
2164
2165 unsigned int this_align;
2166 unsigned HOST_WIDE_INT align_bitpos = 0;
2167 get_object_alignment_1 (gimple_assign_lhs (info->stmt),
2168 &this_align, &align_bitpos);
2169 if (this_align > align)
2170 {
2171 align = this_align;
2172 align_base = info->bitpos - align_bitpos;
2173 }
245f6de1
JJ
2174 for (int i = 0; i < 2; ++i)
2175 {
2176 store_operand_info &op = info->ops[i];
2177 if (!op.base_addr)
2178 continue;
2179
2180 get_object_alignment_1 (op.val, &this_align, &align_bitpos);
2181 if (this_align > load_align[i])
2182 {
2183 load_align[i] = this_align;
2184 load_align_base[i] = op.bitpos - align_bitpos;
2185 }
2186 }
f663d9ad 2187
f663d9ad
KT
2188 gimple *stmt = info->stmt;
2189 stores.safe_push (info);
2190 if (info->order > last_order)
2191 {
2192 last_order = info->order;
2193 last_stmt = stmt;
2194 }
2195 else if (info->order < first_order)
2196 {
2197 first_order = info->order;
2198 first_stmt = stmt;
2199 }
e362a897 2200
1b3c9813
EB
2201 if (info->bitpos != start + width)
2202 consecutive = false;
2203
e362a897
EB
2204 /* We need to use extraction if there is any bit-field. */
2205 if (info->rhs_code == BIT_INSERT_EXPR)
2206 {
2207 bit_insertion = true;
2208 gcc_assert (!string_concatenation);
2209 }
2210
1b3c9813 2211 /* We want to use concatenation if there is any string. */
e362a897
EB
2212 if (info->rhs_code == STRING_CST)
2213 {
2214 string_concatenation = true;
2215 gcc_assert (!bit_insertion);
2216 }
2217
1b3c9813
EB
2218 /* But we cannot use it if we don't have consecutive stores. */
2219 if (!consecutive)
2220 string_concatenation = false;
2221
18e0c3d1
JJ
2222 if (info->rhs_code != INTEGER_CST)
2223 only_constants = false;
f663d9ad
KT
2224}
2225
a62b3dc5
JJ
2226/* Merge a store recorded by INFO into this merged store.
2227 The store is not overlapping with the existing recorded
2228 stores. */
2229
2230void
2231merged_store_group::merge_into (store_immediate_info *info)
2232{
1b3c9813
EB
2233 do_merge (info);
2234
a62b3dc5
JJ
2235 /* Make sure we're inserting in the position we think we're inserting. */
2236 gcc_assert (info->bitpos >= start + width
2237 && info->bitregion_start <= bitregion_end);
2238
c5679c37 2239 width = info->bitpos + info->bitsize - start;
a62b3dc5
JJ
2240}
2241
f663d9ad
KT
2242/* Merge a store described by INFO into this merged store.
2243 INFO overlaps in some way with the current store (i.e. it's not contiguous
2244 which is handled by merged_store_group::merge_into). */
2245
2246void
2247merged_store_group::merge_overlapping (store_immediate_info *info)
2248{
1b3c9813
EB
2249 do_merge (info);
2250
f663d9ad 2251 /* If the store extends the size of the group, extend the width. */
a62b3dc5 2252 if (info->bitpos + info->bitsize > start + width)
c5679c37 2253 width = info->bitpos + info->bitsize - start;
f663d9ad
KT
2254}
2255
2256/* Go through all the recorded stores in this group in program order and
2257 apply their values to the VAL byte array to create the final merged
2258 value. Return true if the operation succeeded. */
2259
2260bool
2261merged_store_group::apply_stores ()
2262{
e362a897
EB
2263 store_immediate_info *info;
2264 unsigned int i;
2265
a62b3dc5
JJ
2266 /* Make sure we have more than one store in the group, otherwise we cannot
2267 merge anything. */
2268 if (bitregion_start % BITS_PER_UNIT != 0
2269 || bitregion_end % BITS_PER_UNIT != 0
f663d9ad
KT
2270 || stores.length () == 1)
2271 return false;
2272
e362a897
EB
2273 buf_size = (bitregion_end - bitregion_start) / BITS_PER_UNIT;
2274
2275 /* Really do string concatenation for large strings only. */
2276 if (buf_size <= MOVE_MAX)
2277 string_concatenation = false;
2278
c94c3532 2279 /* Create a power-of-2-sized buffer for native_encode_expr. */
e362a897
EB
2280 if (!string_concatenation)
2281 buf_size = 1 << ceil_log2 (buf_size);
2282
a62b3dc5
JJ
2283 val = XNEWVEC (unsigned char, 2 * buf_size);
2284 mask = val + buf_size;
2285 memset (val, 0, buf_size);
2286 memset (mask, ~0U, buf_size);
f663d9ad 2287
e362a897
EB
2288 stores.qsort (sort_by_order);
2289
f663d9ad
KT
2290 FOR_EACH_VEC_ELT (stores, i, info)
2291 {
a62b3dc5 2292 unsigned int pos_in_buffer = info->bitpos - bitregion_start;
c94c3532 2293 tree cst;
245f6de1
JJ
2294 if (info->ops[0].val && info->ops[0].base_addr == NULL_TREE)
2295 cst = info->ops[0].val;
2296 else if (info->ops[1].val && info->ops[1].base_addr == NULL_TREE)
2297 cst = info->ops[1].val;
c94c3532
EB
2298 else
2299 cst = NULL_TREE;
245f6de1 2300 bool ret = true;
e362a897
EB
2301 if (cst && info->rhs_code != BIT_INSERT_EXPR)
2302 ret = encode_tree_to_bitpos (cst, val, info->bitsize, pos_in_buffer,
2303 buf_size);
c94c3532
EB
2304 unsigned char *m = mask + (pos_in_buffer / BITS_PER_UNIT);
2305 if (BYTES_BIG_ENDIAN)
2306 clear_bit_region_be (m, (BITS_PER_UNIT - 1
2307 - (pos_in_buffer % BITS_PER_UNIT)),
2308 info->bitsize);
2309 else
2310 clear_bit_region (m, pos_in_buffer % BITS_PER_UNIT, info->bitsize);
245f6de1 2311 if (cst && dump_file && (dump_flags & TDF_DETAILS))
f663d9ad
KT
2312 {
2313 if (ret)
2314 {
c94c3532 2315 fputs ("After writing ", dump_file);
4af78ef8 2316 print_generic_expr (dump_file, cst, TDF_NONE);
f663d9ad 2317 fprintf (dump_file, " of size " HOST_WIDE_INT_PRINT_DEC
c94c3532
EB
2318 " at position %d\n", info->bitsize, pos_in_buffer);
2319 fputs (" the merged value contains ", dump_file);
f663d9ad 2320 dump_char_array (dump_file, val, buf_size);
c94c3532
EB
2321 fputs (" the merged mask contains ", dump_file);
2322 dump_char_array (dump_file, mask, buf_size);
2323 if (bit_insertion)
2324 fputs (" bit insertion is required\n", dump_file);
e362a897
EB
2325 if (string_concatenation)
2326 fputs (" string concatenation is required\n", dump_file);
f663d9ad
KT
2327 }
2328 else
2329 fprintf (dump_file, "Failed to merge stores\n");
4b84d9b8 2330 }
f663d9ad
KT
2331 if (!ret)
2332 return false;
2333 }
4b84d9b8 2334 stores.qsort (sort_by_bitpos);
f663d9ad
KT
2335 return true;
2336}
2337
2338/* Structure describing the store chain. */
2339
6c1dae73 2340class imm_store_chain_info
f663d9ad 2341{
6c1dae73 2342public:
50b6d676
AO
2343 /* Doubly-linked list that imposes an order on chain processing.
2344 PNXP (prev's next pointer) points to the head of a list, or to
2345 the next field in the previous chain in the list.
2346 See pass_store_merging::m_stores_head for more rationale. */
2347 imm_store_chain_info *next, **pnxp;
b5926e23 2348 tree base_addr;
a62b3dc5 2349 auto_vec<store_immediate_info *> m_store_info;
f663d9ad
KT
2350 auto_vec<merged_store_group *> m_merged_store_groups;
2351
50b6d676
AO
2352 imm_store_chain_info (imm_store_chain_info *&inspt, tree b_a)
2353 : next (inspt), pnxp (&inspt), base_addr (b_a)
2354 {
2355 inspt = this;
2356 if (next)
2357 {
2358 gcc_checking_assert (pnxp == next->pnxp);
2359 next->pnxp = &next;
2360 }
2361 }
2362 ~imm_store_chain_info ()
2363 {
2364 *pnxp = next;
2365 if (next)
2366 {
2367 gcc_checking_assert (&next == next->pnxp);
2368 next->pnxp = pnxp;
2369 }
2370 }
b5926e23 2371 bool terminate_and_process_chain ();
bd909071
JJ
2372 bool try_coalesce_bswap (merged_store_group *, unsigned int, unsigned int,
2373 unsigned int);
f663d9ad 2374 bool coalesce_immediate_stores ();
b5926e23
RB
2375 bool output_merged_store (merged_store_group *);
2376 bool output_merged_stores ();
f663d9ad
KT
2377};
2378
2379const pass_data pass_data_tree_store_merging = {
2380 GIMPLE_PASS, /* type */
2381 "store-merging", /* name */
2382 OPTGROUP_NONE, /* optinfo_flags */
2383 TV_GIMPLE_STORE_MERGING, /* tv_id */
2384 PROP_ssa, /* properties_required */
2385 0, /* properties_provided */
2386 0, /* properties_destroyed */
2387 0, /* todo_flags_start */
2388 TODO_update_ssa, /* todo_flags_finish */
2389};
2390
2391class pass_store_merging : public gimple_opt_pass
2392{
2393public:
2394 pass_store_merging (gcc::context *ctxt)
95d94b52
RB
2395 : gimple_opt_pass (pass_data_tree_store_merging, ctxt), m_stores_head (),
2396 m_n_chains (0), m_n_stores (0)
f663d9ad
KT
2397 {
2398 }
2399
c94c3532
EB
2400 /* Pass not supported for PDP-endian, nor for insane hosts or
2401 target character sizes where native_{encode,interpret}_expr
a62b3dc5 2402 doesn't work properly. */
f663d9ad
KT
2403 virtual bool
2404 gate (function *)
2405 {
a62b3dc5 2406 return flag_store_merging
c94c3532 2407 && BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN
a62b3dc5
JJ
2408 && CHAR_BIT == 8
2409 && BITS_PER_UNIT == 8;
f663d9ad
KT
2410 }
2411
2412 virtual unsigned int execute (function *);
2413
2414private:
99b1c316 2415 hash_map<tree_operand_hash, class imm_store_chain_info *> m_stores;
f663d9ad 2416
50b6d676
AO
2417 /* Form a doubly-linked stack of the elements of m_stores, so that
2418 we can iterate over them in a predictable way. Using this order
2419 avoids extraneous differences in the compiler output just because
2420 of tree pointer variations (e.g. different chains end up in
2421 different positions of m_stores, so they are handled in different
2422 orders, so they allocate or release SSA names in different
2423 orders, and when they get reused, subsequent passes end up
2424 getting different SSA names, which may ultimately change
2425 decisions when going out of SSA). */
2426 imm_store_chain_info *m_stores_head;
2427
95d94b52
RB
2428 /* The number of store chains currently tracked. */
2429 unsigned m_n_chains;
2430 /* The number of stores currently tracked. */
2431 unsigned m_n_stores;
2432
629387a6
EB
2433 bool process_store (gimple *);
2434 bool terminate_and_process_chain (imm_store_chain_info *);
383ac8dc 2435 bool terminate_all_aliasing_chains (imm_store_chain_info **, gimple *);
629387a6 2436 bool terminate_and_process_all_chains ();
f663d9ad
KT
2437}; // class pass_store_merging
2438
2439/* Terminate and process all recorded chains. Return true if any changes
2440 were made. */
2441
2442bool
2443pass_store_merging::terminate_and_process_all_chains ()
2444{
f663d9ad 2445 bool ret = false;
50b6d676 2446 while (m_stores_head)
629387a6 2447 ret |= terminate_and_process_chain (m_stores_head);
b119c055 2448 gcc_assert (m_stores.is_empty ());
f663d9ad
KT
2449 return ret;
2450}
2451
383ac8dc
JJ
2452/* Terminate all chains that are affected by the statement STMT.
2453 CHAIN_INFO is the chain we should ignore from the checks if
629387a6 2454 non-NULL. Return true if any changes were made. */
f663d9ad
KT
2455
2456bool
20770eb8 2457pass_store_merging::terminate_all_aliasing_chains (imm_store_chain_info
b5926e23 2458 **chain_info,
f663d9ad
KT
2459 gimple *stmt)
2460{
2461 bool ret = false;
2462
2463 /* If the statement doesn't touch memory it can't alias. */
2464 if (!gimple_vuse (stmt))
2465 return false;
2466
9e875fd8 2467 tree store_lhs = gimple_store_p (stmt) ? gimple_get_lhs (stmt) : NULL_TREE;
6b412bf6
RB
2468 ao_ref store_lhs_ref;
2469 ao_ref_init (&store_lhs_ref, store_lhs);
383ac8dc 2470 for (imm_store_chain_info *next = m_stores_head, *cur = next; cur; cur = next)
f663d9ad 2471 {
383ac8dc
JJ
2472 next = cur->next;
2473
2474 /* We already checked all the stores in chain_info and terminated the
2475 chain if necessary. Skip it here. */
2476 if (chain_info && *chain_info == cur)
2477 continue;
2478
245f6de1
JJ
2479 store_immediate_info *info;
2480 unsigned int i;
383ac8dc 2481 FOR_EACH_VEC_ELT (cur->m_store_info, i, info)
f663d9ad 2482 {
9e875fd8 2483 tree lhs = gimple_assign_lhs (info->stmt);
6b412bf6
RB
2484 ao_ref lhs_ref;
2485 ao_ref_init (&lhs_ref, lhs);
2486 if (ref_maybe_used_by_stmt_p (stmt, &lhs_ref)
2487 || stmt_may_clobber_ref_p_1 (stmt, &lhs_ref)
2488 || (store_lhs && refs_may_alias_p_1 (&store_lhs_ref,
2489 &lhs_ref, false)))
f663d9ad 2490 {
245f6de1 2491 if (dump_file && (dump_flags & TDF_DETAILS))
f663d9ad 2492 {
245f6de1
JJ
2493 fprintf (dump_file, "stmt causes chain termination:\n");
2494 print_gimple_stmt (dump_file, stmt, 0);
f663d9ad 2495 }
629387a6 2496 ret |= terminate_and_process_chain (cur);
245f6de1 2497 break;
f663d9ad
KT
2498 }
2499 }
2500 }
2501
f663d9ad
KT
2502 return ret;
2503}
2504
2505/* Helper function. Terminate the recorded chain storing to base object
2506 BASE. Return true if the merging and output was successful. The m_stores
2507 entry is removed after the processing in any case. */
2508
2509bool
629387a6 2510pass_store_merging::terminate_and_process_chain (imm_store_chain_info *chain_info)
f663d9ad 2511{
95d94b52
RB
2512 m_n_stores -= chain_info->m_store_info.length ();
2513 m_n_chains--;
b5926e23
RB
2514 bool ret = chain_info->terminate_and_process_chain ();
2515 m_stores.remove (chain_info->base_addr);
2516 delete chain_info;
f663d9ad
KT
2517 return ret;
2518}
2519
245f6de1 2520/* Return true if stmts in between FIRST (inclusive) and LAST (exclusive)
629387a6
EB
2521 may clobber REF. FIRST and LAST must have non-NULL vdef. We want to
2522 be able to sink load of REF across stores between FIRST and LAST, up
2523 to right before LAST. */
245f6de1
JJ
2524
2525bool
2526stmts_may_clobber_ref_p (gimple *first, gimple *last, tree ref)
2527{
2528 ao_ref r;
2529 ao_ref_init (&r, ref);
2530 unsigned int count = 0;
2531 tree vop = gimple_vdef (last);
2532 gimple *stmt;
2533
629387a6
EB
2534 /* Return true conservatively if the basic blocks are different. */
2535 if (gimple_bb (first) != gimple_bb (last))
2536 return true;
2537
245f6de1
JJ
2538 do
2539 {
2540 stmt = SSA_NAME_DEF_STMT (vop);
2541 if (stmt_may_clobber_ref_p_1 (stmt, &r))
2542 return true;
4b84d9b8
JJ
2543 if (gimple_store_p (stmt)
2544 && refs_anti_dependent_p (ref, gimple_get_lhs (stmt)))
2545 return true;
245f6de1
JJ
2546 /* Avoid quadratic compile time by bounding the number of checks
2547 we perform. */
2548 if (++count > MAX_STORE_ALIAS_CHECKS)
2549 return true;
2550 vop = gimple_vuse (stmt);
2551 }
2552 while (stmt != first);
629387a6 2553
245f6de1
JJ
2554 return false;
2555}
2556
2557/* Return true if INFO->ops[IDX] is mergeable with the
2558 corresponding loads already in MERGED_STORE group.
2559 BASE_ADDR is the base address of the whole store group. */
2560
2561bool
2562compatible_load_p (merged_store_group *merged_store,
2563 store_immediate_info *info,
2564 tree base_addr, int idx)
2565{
2566 store_immediate_info *infof = merged_store->stores[0];
2567 if (!info->ops[idx].base_addr
8a91d545
RS
2568 || maybe_ne (info->ops[idx].bitpos - infof->ops[idx].bitpos,
2569 info->bitpos - infof->bitpos)
245f6de1
JJ
2570 || !operand_equal_p (info->ops[idx].base_addr,
2571 infof->ops[idx].base_addr, 0))
2572 return false;
2573
2574 store_immediate_info *infol = merged_store->stores.last ();
2575 tree load_vuse = gimple_vuse (info->ops[idx].stmt);
2576 /* In this case all vuses should be the same, e.g.
2577 _1 = s.a; _2 = s.b; _3 = _1 | 1; t.a = _3; _4 = _2 | 2; t.b = _4;
2578 or
2579 _1 = s.a; _2 = s.b; t.a = _1; t.b = _2;
2580 and we can emit the coalesced load next to any of those loads. */
2581 if (gimple_vuse (infof->ops[idx].stmt) == load_vuse
2582 && gimple_vuse (infol->ops[idx].stmt) == load_vuse)
2583 return true;
2584
2585 /* Otherwise, at least for now require that the load has the same
2586 vuse as the store. See following examples. */
2587 if (gimple_vuse (info->stmt) != load_vuse)
2588 return false;
2589
2590 if (gimple_vuse (infof->stmt) != gimple_vuse (infof->ops[idx].stmt)
2591 || (infof != infol
2592 && gimple_vuse (infol->stmt) != gimple_vuse (infol->ops[idx].stmt)))
2593 return false;
2594
2595 /* If the load is from the same location as the store, already
2596 the construction of the immediate chain info guarantees no intervening
2597 stores, so no further checks are needed. Example:
2598 _1 = s.a; _2 = _1 & -7; s.a = _2; _3 = s.b; _4 = _3 & -7; s.b = _4; */
8a91d545 2599 if (known_eq (info->ops[idx].bitpos, info->bitpos)
245f6de1
JJ
2600 && operand_equal_p (info->ops[idx].base_addr, base_addr, 0))
2601 return true;
2602
2603 /* Otherwise, we need to punt if any of the loads can be clobbered by any
2604 of the stores in the group, or any other stores in between those.
2605 Previous calls to compatible_load_p ensured that for all the
2606 merged_store->stores IDX loads, no stmts starting with
2607 merged_store->first_stmt and ending right before merged_store->last_stmt
2608 clobbers those loads. */
2609 gimple *first = merged_store->first_stmt;
2610 gimple *last = merged_store->last_stmt;
245f6de1
JJ
2611 /* The stores are sorted by increasing store bitpos, so if info->stmt store
2612 comes before the so far first load, we'll be changing
2613 merged_store->first_stmt. In that case we need to give up if
2614 any of the earlier processed loads clobber with the stmts in the new
2615 range. */
2616 if (info->order < merged_store->first_order)
2617 {
3f207ab3 2618 for (store_immediate_info *infoc : merged_store->stores)
245f6de1
JJ
2619 if (stmts_may_clobber_ref_p (info->stmt, first, infoc->ops[idx].val))
2620 return false;
2621 first = info->stmt;
2622 }
2623 /* Similarly, we could change merged_store->last_stmt, so ensure
2624 in that case no stmts in the new range clobber any of the earlier
2625 processed loads. */
2626 else if (info->order > merged_store->last_order)
2627 {
3f207ab3 2628 for (store_immediate_info *infoc : merged_store->stores)
245f6de1
JJ
2629 if (stmts_may_clobber_ref_p (last, info->stmt, infoc->ops[idx].val))
2630 return false;
2631 last = info->stmt;
2632 }
2633 /* And finally, we'd be adding a new load to the set, ensure it isn't
2634 clobbered in the new range. */
2635 if (stmts_may_clobber_ref_p (first, last, info->ops[idx].val))
2636 return false;
2637
2638 /* Otherwise, we are looking for:
2639 _1 = s.a; _2 = _1 ^ 15; t.a = _2; _3 = s.b; _4 = _3 ^ 15; t.b = _4;
2640 or
2641 _1 = s.a; t.a = _1; _2 = s.b; t.b = _2; */
2642 return true;
2643}
2644
4b84d9b8
JJ
2645/* Add all refs loaded to compute VAL to REFS vector. */
2646
2647void
2648gather_bswap_load_refs (vec<tree> *refs, tree val)
2649{
2650 if (TREE_CODE (val) != SSA_NAME)
2651 return;
2652
2653 gimple *stmt = SSA_NAME_DEF_STMT (val);
2654 if (!is_gimple_assign (stmt))
2655 return;
2656
2657 if (gimple_assign_load_p (stmt))
2658 {
2659 refs->safe_push (gimple_assign_rhs1 (stmt));
2660 return;
2661 }
2662
2663 switch (gimple_assign_rhs_class (stmt))
2664 {
2665 case GIMPLE_BINARY_RHS:
2666 gather_bswap_load_refs (refs, gimple_assign_rhs2 (stmt));
2667 /* FALLTHRU */
2668 case GIMPLE_UNARY_RHS:
2669 gather_bswap_load_refs (refs, gimple_assign_rhs1 (stmt));
2670 break;
2671 default:
2672 gcc_unreachable ();
2673 }
2674}
2675
c5679c37
JJ
2676/* Check if there are any stores in M_STORE_INFO after index I
2677 (where M_STORE_INFO must be sorted by sort_by_bitpos) that overlap
2678 a potential group ending with END that have their order
4d213bf6
JJ
2679 smaller than LAST_ORDER. ALL_INTEGER_CST_P is true if
2680 all the stores already merged and the one under consideration
2681 have rhs_code of INTEGER_CST. Return true if there are no such stores.
c5679c37
JJ
2682 Consider:
2683 MEM[(long long int *)p_28] = 0;
2684 MEM[(long long int *)p_28 + 8B] = 0;
2685 MEM[(long long int *)p_28 + 16B] = 0;
2686 MEM[(long long int *)p_28 + 24B] = 0;
2687 _129 = (int) _130;
2688 MEM[(int *)p_28 + 8B] = _129;
2689 MEM[(int *)p_28].a = -1;
2690 We already have
2691 MEM[(long long int *)p_28] = 0;
2692 MEM[(int *)p_28].a = -1;
2693 stmts in the current group and need to consider if it is safe to
2694 add MEM[(long long int *)p_28 + 8B] = 0; store into the same group.
2695 There is an overlap between that store and the MEM[(int *)p_28 + 8B] = _129;
2696 store though, so if we add the MEM[(long long int *)p_28 + 8B] = 0;
2697 into the group and merging of those 3 stores is successful, merged
2698 stmts will be emitted at the latest store from that group, i.e.
2699 LAST_ORDER, which is the MEM[(int *)p_28].a = -1; store.
2700 The MEM[(int *)p_28 + 8B] = _129; store that originally follows
2701 the MEM[(long long int *)p_28 + 8B] = 0; would now be before it,
2702 so we need to refuse merging MEM[(long long int *)p_28 + 8B] = 0;
2703 into the group. That way it will be its own store group and will
4d213bf6 2704 not be touched. If ALL_INTEGER_CST_P and there are overlapping
c5679c37 2705 INTEGER_CST stores, those are mergeable using merge_overlapping,
bd909071
JJ
2706 so don't return false for those.
2707
2708 Similarly, check stores from FIRST_EARLIER (inclusive) to END_EARLIER
2709 (exclusive), whether they don't overlap the bitrange START to END
2710 and have order in between FIRST_ORDER and LAST_ORDER. This is to
2711 prevent merging in cases like:
2712 MEM <char[12]> [&b + 8B] = {};
2713 MEM[(short *) &b] = 5;
2714 _5 = *x_4(D);
2715 MEM <long long unsigned int> [&b + 2B] = _5;
2716 MEM[(char *)&b + 16B] = 88;
2717 MEM[(int *)&b + 20B] = 1;
2718 The = {} store comes in sort_by_bitpos before the = 88 store, and can't
2719 be merged with it, because the = _5 store overlaps these and is in between
2720 them in sort_by_order ordering. If it was merged, the merged store would
2721 go after the = _5 store and thus change behavior. */
c5679c37
JJ
2722
2723static bool
00dcc88a
MS
2724check_no_overlap (const vec<store_immediate_info *> &m_store_info,
2725 unsigned int i,
bd909071
JJ
2726 bool all_integer_cst_p, unsigned int first_order,
2727 unsigned int last_order, unsigned HOST_WIDE_INT start,
2728 unsigned HOST_WIDE_INT end, unsigned int first_earlier,
2729 unsigned end_earlier)
c5679c37
JJ
2730{
2731 unsigned int len = m_store_info.length ();
bd909071
JJ
2732 for (unsigned int j = first_earlier; j < end_earlier; j++)
2733 {
2734 store_immediate_info *info = m_store_info[j];
2735 if (info->order > first_order
2736 && info->order < last_order
2737 && info->bitpos + info->bitsize > start)
2738 return false;
2739 }
c5679c37
JJ
2740 for (++i; i < len; ++i)
2741 {
2742 store_immediate_info *info = m_store_info[i];
2743 if (info->bitpos >= end)
2744 break;
2745 if (info->order < last_order
4d213bf6 2746 && (!all_integer_cst_p || info->rhs_code != INTEGER_CST))
c5679c37
JJ
2747 return false;
2748 }
2749 return true;
2750}
2751
4b84d9b8
JJ
2752/* Return true if m_store_info[first] and at least one following store
2753 form a group which store try_size bitsize value which is byte swapped
2754 from a memory load or some value, or identity from some value.
2755 This uses the bswap pass APIs. */
2756
2757bool
2758imm_store_chain_info::try_coalesce_bswap (merged_store_group *merged_store,
2759 unsigned int first,
bd909071
JJ
2760 unsigned int try_size,
2761 unsigned int first_earlier)
4b84d9b8
JJ
2762{
2763 unsigned int len = m_store_info.length (), last = first;
2764 unsigned HOST_WIDE_INT width = m_store_info[first]->bitsize;
2765 if (width >= try_size)
2766 return false;
2767 for (unsigned int i = first + 1; i < len; ++i)
2768 {
2769 if (m_store_info[i]->bitpos != m_store_info[first]->bitpos + width
cb76fcd7 2770 || m_store_info[i]->lp_nr != merged_store->lp_nr
4b84d9b8
JJ
2771 || m_store_info[i]->ins_stmt == NULL)
2772 return false;
2773 width += m_store_info[i]->bitsize;
2774 if (width >= try_size)
2775 {
2776 last = i;
2777 break;
2778 }
2779 }
2780 if (width != try_size)
2781 return false;
2782
2783 bool allow_unaligned
028d4092 2784 = !STRICT_ALIGNMENT && param_store_merging_allow_unaligned;
4b84d9b8
JJ
2785 /* Punt if the combined store would not be aligned and we need alignment. */
2786 if (!allow_unaligned)
2787 {
2788 unsigned int align = merged_store->align;
2789 unsigned HOST_WIDE_INT align_base = merged_store->align_base;
2790 for (unsigned int i = first + 1; i <= last; ++i)
2791 {
2792 unsigned int this_align;
2793 unsigned HOST_WIDE_INT align_bitpos = 0;
2794 get_object_alignment_1 (gimple_assign_lhs (m_store_info[i]->stmt),
2795 &this_align, &align_bitpos);
2796 if (this_align > align)
2797 {
2798 align = this_align;
2799 align_base = m_store_info[i]->bitpos - align_bitpos;
2800 }
2801 }
2802 unsigned HOST_WIDE_INT align_bitpos
2803 = (m_store_info[first]->bitpos - align_base) & (align - 1);
2804 if (align_bitpos)
2805 align = least_bit_hwi (align_bitpos);
2806 if (align < try_size)
2807 return false;
2808 }
2809
2810 tree type;
2811 switch (try_size)
2812 {
2813 case 16: type = uint16_type_node; break;
2814 case 32: type = uint32_type_node; break;
2815 case 64: type = uint64_type_node; break;
2816 default: gcc_unreachable ();
2817 }
2818 struct symbolic_number n;
2819 gimple *ins_stmt = NULL;
2820 int vuse_store = -1;
2821 unsigned int first_order = merged_store->first_order;
2822 unsigned int last_order = merged_store->last_order;
2823 gimple *first_stmt = merged_store->first_stmt;
2824 gimple *last_stmt = merged_store->last_stmt;
c5679c37 2825 unsigned HOST_WIDE_INT end = merged_store->start + merged_store->width;
4b84d9b8
JJ
2826 store_immediate_info *infof = m_store_info[first];
2827
2828 for (unsigned int i = first; i <= last; ++i)
2829 {
2830 store_immediate_info *info = m_store_info[i];
2831 struct symbolic_number this_n = info->n;
2832 this_n.type = type;
2833 if (!this_n.base_addr)
2834 this_n.range = try_size / BITS_PER_UNIT;
30fa8e9c
JJ
2835 else
2836 /* Update vuse in case it has changed by output_merged_stores. */
2837 this_n.vuse = gimple_vuse (info->ins_stmt);
4b84d9b8
JJ
2838 unsigned int bitpos = info->bitpos - infof->bitpos;
2839 if (!do_shift_rotate (LSHIFT_EXPR, &this_n,
2840 BYTES_BIG_ENDIAN
2841 ? try_size - info->bitsize - bitpos
2842 : bitpos))
2843 return false;
aa11164a 2844 if (this_n.base_addr && vuse_store)
4b84d9b8
JJ
2845 {
2846 unsigned int j;
2847 for (j = first; j <= last; ++j)
2848 if (this_n.vuse == gimple_vuse (m_store_info[j]->stmt))
2849 break;
2850 if (j > last)
2851 {
2852 if (vuse_store == 1)
2853 return false;
2854 vuse_store = 0;
2855 }
2856 }
2857 if (i == first)
2858 {
2859 n = this_n;
2860 ins_stmt = info->ins_stmt;
2861 }
2862 else
2863 {
c5679c37 2864 if (n.base_addr && n.vuse != this_n.vuse)
4b84d9b8 2865 {
c5679c37
JJ
2866 if (vuse_store == 0)
2867 return false;
2868 vuse_store = 1;
4b84d9b8 2869 }
c5679c37
JJ
2870 if (info->order > last_order)
2871 {
2872 last_order = info->order;
2873 last_stmt = info->stmt;
2874 }
2875 else if (info->order < first_order)
2876 {
2877 first_order = info->order;
2878 first_stmt = info->stmt;
2879 }
2880 end = MAX (end, info->bitpos + info->bitsize);
4b84d9b8
JJ
2881
2882 ins_stmt = perform_symbolic_merge (ins_stmt, &n, info->ins_stmt,
2883 &this_n, &n);
2884 if (ins_stmt == NULL)
2885 return false;
2886 }
2887 }
2888
2889 uint64_t cmpxchg, cmpnop;
b320edc0
JJ
2890 bool cast64_to_32;
2891 find_bswap_or_nop_finalize (&n, &cmpxchg, &cmpnop, &cast64_to_32);
4b84d9b8
JJ
2892
2893 /* A complete byte swap should make the symbolic number to start with
2894 the largest digit in the highest order byte. Unchanged symbolic
2895 number indicates a read with same endianness as target architecture. */
2896 if (n.n != cmpnop && n.n != cmpxchg)
2897 return false;
2898
b320edc0
JJ
2899 /* For now. */
2900 if (cast64_to_32)
2901 return false;
2902
4b84d9b8
JJ
2903 if (n.base_addr == NULL_TREE && !is_gimple_val (n.src))
2904 return false;
2905
bd909071
JJ
2906 if (!check_no_overlap (m_store_info, last, false, first_order, last_order,
2907 merged_store->start, end, first_earlier, first))
c5679c37
JJ
2908 return false;
2909
4b84d9b8
JJ
2910 /* Don't handle memory copy this way if normal non-bswap processing
2911 would handle it too. */
2912 if (n.n == cmpnop && (unsigned) n.n_ops == last - first + 1)
2913 {
2914 unsigned int i;
2915 for (i = first; i <= last; ++i)
2916 if (m_store_info[i]->rhs_code != MEM_REF)
2917 break;
2918 if (i == last + 1)
2919 return false;
2920 }
2921
2922 if (n.n == cmpxchg)
2923 switch (try_size)
2924 {
2925 case 16:
2926 /* Will emit LROTATE_EXPR. */
2927 break;
2928 case 32:
2929 if (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
2930 && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing)
2931 break;
2932 return false;
2933 case 64:
2934 if (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
2935 && optab_handler (bswap_optab, DImode) != CODE_FOR_nothing)
2936 break;
2937 return false;
2938 default:
2939 gcc_unreachable ();
2940 }
2941
2942 if (!allow_unaligned && n.base_addr)
2943 {
2944 unsigned int align = get_object_alignment (n.src);
2945 if (align < try_size)
2946 return false;
2947 }
2948
2949 /* If each load has vuse of the corresponding store, need to verify
2950 the loads can be sunk right before the last store. */
2951 if (vuse_store == 1)
2952 {
2953 auto_vec<tree, 64> refs;
2954 for (unsigned int i = first; i <= last; ++i)
2955 gather_bswap_load_refs (&refs,
2956 gimple_assign_rhs1 (m_store_info[i]->stmt));
2957
3f207ab3 2958 for (tree ref : refs)
4b84d9b8
JJ
2959 if (stmts_may_clobber_ref_p (first_stmt, last_stmt, ref))
2960 return false;
2961 n.vuse = NULL_TREE;
2962 }
2963
2964 infof->n = n;
2965 infof->ins_stmt = ins_stmt;
2966 for (unsigned int i = first; i <= last; ++i)
2967 {
2968 m_store_info[i]->rhs_code = n.n == cmpxchg ? LROTATE_EXPR : NOP_EXPR;
2969 m_store_info[i]->ops[0].base_addr = NULL_TREE;
2970 m_store_info[i]->ops[1].base_addr = NULL_TREE;
2971 if (i != first)
2972 merged_store->merge_into (m_store_info[i]);
2973 }
2974
2975 return true;
2976}
2977
f663d9ad
KT
2978/* Go through the candidate stores recorded in m_store_info and merge them
2979 into merged_store_group objects recorded into m_merged_store_groups
2980 representing the widened stores. Return true if coalescing was successful
2981 and the number of widened stores is fewer than the original number
2982 of stores. */
2983
2984bool
2985imm_store_chain_info::coalesce_immediate_stores ()
2986{
2987 /* Anything less can't be processed. */
2988 if (m_store_info.length () < 2)
2989 return false;
2990
2991 if (dump_file && (dump_flags & TDF_DETAILS))
c94c3532 2992 fprintf (dump_file, "Attempting to coalesce %u stores in chain\n",
f663d9ad
KT
2993 m_store_info.length ());
2994
2995 store_immediate_info *info;
4b84d9b8 2996 unsigned int i, ignore = 0;
bd909071
JJ
2997 unsigned int first_earlier = 0;
2998 unsigned int end_earlier = 0;
f663d9ad
KT
2999
3000 /* Order the stores by the bitposition they write to. */
3001 m_store_info.qsort (sort_by_bitpos);
3002
3003 info = m_store_info[0];
3004 merged_store_group *merged_store = new merged_store_group (info);
c94c3532
EB
3005 if (dump_file && (dump_flags & TDF_DETAILS))
3006 fputs ("New store group\n", dump_file);
f663d9ad
KT
3007
3008 FOR_EACH_VEC_ELT (m_store_info, i, info)
3009 {
3afd514b
JJ
3010 unsigned HOST_WIDE_INT new_bitregion_start, new_bitregion_end;
3011
4b84d9b8 3012 if (i <= ignore)
c94c3532 3013 goto done;
f663d9ad 3014
bd909071
JJ
3015 while (first_earlier < end_earlier
3016 && (m_store_info[first_earlier]->bitpos
3017 + m_store_info[first_earlier]->bitsize
3018 <= merged_store->start))
3019 first_earlier++;
3020
4b84d9b8
JJ
3021 /* First try to handle group of stores like:
3022 p[0] = data >> 24;
3023 p[1] = data >> 16;
3024 p[2] = data >> 8;
3025 p[3] = data;
3026 using the bswap framework. */
3027 if (info->bitpos == merged_store->start + merged_store->width
3028 && merged_store->stores.length () == 1
3029 && merged_store->stores[0]->ins_stmt != NULL
cb76fcd7 3030 && info->lp_nr == merged_store->lp_nr
4b84d9b8
JJ
3031 && info->ins_stmt != NULL)
3032 {
3033 unsigned int try_size;
3034 for (try_size = 64; try_size >= 16; try_size >>= 1)
bd909071
JJ
3035 if (try_coalesce_bswap (merged_store, i - 1, try_size,
3036 first_earlier))
4b84d9b8
JJ
3037 break;
3038
3039 if (try_size >= 16)
3040 {
3041 ignore = i + merged_store->stores.length () - 1;
3042 m_merged_store_groups.safe_push (merged_store);
3043 if (ignore < m_store_info.length ())
bd909071
JJ
3044 {
3045 merged_store = new merged_store_group (m_store_info[ignore]);
3046 end_earlier = ignore;
3047 }
4b84d9b8
JJ
3048 else
3049 merged_store = NULL;
c94c3532 3050 goto done;
4b84d9b8
JJ
3051 }
3052 }
3053
3afd514b
JJ
3054 new_bitregion_start
3055 = MIN (merged_store->bitregion_start, info->bitregion_start);
3056 new_bitregion_end
3057 = MAX (merged_store->bitregion_end, info->bitregion_end);
3058
3059 if (info->order >= merged_store->first_nonmergeable_order
3060 || (((new_bitregion_end - new_bitregion_start + 1) / BITS_PER_UNIT)
028d4092 3061 > (unsigned) param_store_merging_max_size))
18e0c3d1
JJ
3062 ;
3063
f663d9ad
KT
3064 /* |---store 1---|
3065 |---store 2---|
4b84d9b8 3066 Overlapping stores. */
18e0c3d1 3067 else if (IN_RANGE (info->bitpos, merged_store->start,
4d213bf6
JJ
3068 merged_store->start + merged_store->width - 1)
3069 /* |---store 1---||---store 2---|
3070 Handle also the consecutive INTEGER_CST stores case here,
3071 as we have here the code to deal with overlaps. */
3072 || (info->bitregion_start <= merged_store->bitregion_end
3073 && info->rhs_code == INTEGER_CST
3074 && merged_store->only_constants
3075 && merged_store->can_be_merged_into (info)))
f663d9ad 3076 {
245f6de1 3077 /* Only allow overlapping stores of constants. */
629387a6
EB
3078 if (info->rhs_code == INTEGER_CST
3079 && merged_store->only_constants
3080 && info->lp_nr == merged_store->lp_nr)
245f6de1 3081 {
bd909071
JJ
3082 unsigned int first_order
3083 = MIN (merged_store->first_order, info->order);
6cd4c66e
JJ
3084 unsigned int last_order
3085 = MAX (merged_store->last_order, info->order);
3086 unsigned HOST_WIDE_INT end
3087 = MAX (merged_store->start + merged_store->width,
3088 info->bitpos + info->bitsize);
bd909071
JJ
3089 if (check_no_overlap (m_store_info, i, true, first_order,
3090 last_order, merged_store->start, end,
3091 first_earlier, end_earlier))
6cd4c66e
JJ
3092 {
3093 /* check_no_overlap call above made sure there are no
3094 overlapping stores with non-INTEGER_CST rhs_code
3095 in between the first and last of the stores we've
3096 just merged. If there are any INTEGER_CST rhs_code
3097 stores in between, we need to merge_overlapping them
3098 even if in the sort_by_bitpos order there are other
3099 overlapping stores in between. Keep those stores as is.
3100 Example:
3101 MEM[(int *)p_28] = 0;
3102 MEM[(char *)p_28 + 3B] = 1;
3103 MEM[(char *)p_28 + 1B] = 2;
3104 MEM[(char *)p_28 + 2B] = MEM[(char *)p_28 + 6B];
3105 We can't merge the zero store with the store of two and
3106 not merge anything else, because the store of one is
3107 in the original order in between those two, but in
3108 store_by_bitpos order it comes after the last store that
3109 we can't merge with them. We can merge the first 3 stores
3110 and keep the last store as is though. */
18e0c3d1
JJ
3111 unsigned int len = m_store_info.length ();
3112 unsigned int try_order = last_order;
3113 unsigned int first_nonmergeable_order;
3114 unsigned int k;
3115 bool last_iter = false;
3116 int attempts = 0;
3117 do
6cd4c66e 3118 {
18e0c3d1 3119 unsigned int max_order = 0;
bd909071 3120 unsigned int min_order = first_order;
18e0c3d1
JJ
3121 unsigned first_nonmergeable_int_order = ~0U;
3122 unsigned HOST_WIDE_INT this_end = end;
3123 k = i;
3124 first_nonmergeable_order = ~0U;
3125 for (unsigned int j = i + 1; j < len; ++j)
6cd4c66e 3126 {
18e0c3d1
JJ
3127 store_immediate_info *info2 = m_store_info[j];
3128 if (info2->bitpos >= this_end)
3129 break;
3130 if (info2->order < try_order)
6cd4c66e 3131 {
4119cd69
JJ
3132 if (info2->rhs_code != INTEGER_CST
3133 || info2->lp_nr != merged_store->lp_nr)
18e0c3d1
JJ
3134 {
3135 /* Normally check_no_overlap makes sure this
3136 doesn't happen, but if end grows below,
3137 then we need to process more stores than
3138 check_no_overlap verified. Example:
3139 MEM[(int *)p_5] = 0;
3140 MEM[(short *)p_5 + 3B] = 1;
3141 MEM[(char *)p_5 + 4B] = _9;
3142 MEM[(char *)p_5 + 2B] = 2; */
3143 k = 0;
3144 break;
3145 }
3146 k = j;
bd909071 3147 min_order = MIN (min_order, info2->order);
18e0c3d1
JJ
3148 this_end = MAX (this_end,
3149 info2->bitpos + info2->bitsize);
6cd4c66e 3150 }
18e0c3d1 3151 else if (info2->rhs_code == INTEGER_CST
4119cd69 3152 && info2->lp_nr == merged_store->lp_nr
18e0c3d1
JJ
3153 && !last_iter)
3154 {
3155 max_order = MAX (max_order, info2->order + 1);
3156 first_nonmergeable_int_order
3157 = MIN (first_nonmergeable_int_order,
3158 info2->order);
3159 }
3160 else
3161 first_nonmergeable_order
3162 = MIN (first_nonmergeable_order, info2->order);
6cd4c66e 3163 }
bd909071
JJ
3164 if (k > i
3165 && !check_no_overlap (m_store_info, len - 1, true,
3166 min_order, try_order,
3167 merged_store->start, this_end,
3168 first_earlier, end_earlier))
3169 k = 0;
18e0c3d1
JJ
3170 if (k == 0)
3171 {
3172 if (last_order == try_order)
3173 break;
3174 /* If this failed, but only because we grew
3175 try_order, retry with the last working one,
3176 so that we merge at least something. */
3177 try_order = last_order;
3178 last_iter = true;
3179 continue;
3180 }
3181 last_order = try_order;
3182 /* Retry with a larger try_order to see if we could
3183 merge some further INTEGER_CST stores. */
3184 if (max_order
3185 && (first_nonmergeable_int_order
3186 < first_nonmergeable_order))
3187 {
3188 try_order = MIN (max_order,
3189 first_nonmergeable_order);
3190 try_order
3191 = MIN (try_order,
3192 merged_store->first_nonmergeable_order);
3193 if (try_order > last_order && ++attempts < 16)
3194 continue;
3195 }
3196 first_nonmergeable_order
3197 = MIN (first_nonmergeable_order,
3198 first_nonmergeable_int_order);
3199 end = this_end;
3200 break;
6cd4c66e 3201 }
18e0c3d1 3202 while (1);
6cd4c66e
JJ
3203
3204 if (k != 0)
3205 {
3206 merged_store->merge_overlapping (info);
3207
18e0c3d1
JJ
3208 merged_store->first_nonmergeable_order
3209 = MIN (merged_store->first_nonmergeable_order,
3210 first_nonmergeable_order);
3211
6cd4c66e
JJ
3212 for (unsigned int j = i + 1; j <= k; j++)
3213 {
3214 store_immediate_info *info2 = m_store_info[j];
3215 gcc_assert (info2->bitpos < end);
3216 if (info2->order < last_order)
3217 {
3218 gcc_assert (info2->rhs_code == INTEGER_CST);
18e0c3d1
JJ
3219 if (info != info2)
3220 merged_store->merge_overlapping (info2);
6cd4c66e
JJ
3221 }
3222 /* Other stores are kept and not merged in any
3223 way. */
3224 }
3225 ignore = k;
3226 goto done;
3227 }
3228 }
245f6de1 3229 }
f663d9ad 3230 }
245f6de1
JJ
3231 /* |---store 1---||---store 2---|
3232 This store is consecutive to the previous one.
3233 Merge it into the current store group. There can be gaps in between
3234 the stores, but there can't be gaps in between bitregions. */
c94c3532 3235 else if (info->bitregion_start <= merged_store->bitregion_end
7f5a3982 3236 && merged_store->can_be_merged_into (info))
f663d9ad 3237 {
245f6de1
JJ
3238 store_immediate_info *infof = merged_store->stores[0];
3239
3240 /* All the rhs_code ops that take 2 operands are commutative,
3241 swap the operands if it could make the operands compatible. */
3242 if (infof->ops[0].base_addr
3243 && infof->ops[1].base_addr
3244 && info->ops[0].base_addr
3245 && info->ops[1].base_addr
8a91d545
RS
3246 && known_eq (info->ops[1].bitpos - infof->ops[0].bitpos,
3247 info->bitpos - infof->bitpos)
245f6de1
JJ
3248 && operand_equal_p (info->ops[1].base_addr,
3249 infof->ops[0].base_addr, 0))
127ef369
JJ
3250 {
3251 std::swap (info->ops[0], info->ops[1]);
3252 info->ops_swapped_p = true;
3253 }
4d213bf6 3254 if (check_no_overlap (m_store_info, i, false,
bd909071 3255 MIN (merged_store->first_order, info->order),
a7fe6482 3256 MAX (merged_store->last_order, info->order),
bd909071 3257 merged_store->start,
a7fe6482 3258 MAX (merged_store->start + merged_store->width,
bd909071
JJ
3259 info->bitpos + info->bitsize),
3260 first_earlier, end_earlier))
245f6de1 3261 {
7f5a3982
EB
3262 /* Turn MEM_REF into BIT_INSERT_EXPR for bit-field stores. */
3263 if (info->rhs_code == MEM_REF && infof->rhs_code != MEM_REF)
3264 {
3265 info->rhs_code = BIT_INSERT_EXPR;
3266 info->ops[0].val = gimple_assign_rhs1 (info->stmt);
3267 info->ops[0].base_addr = NULL_TREE;
3268 }
3269 else if (infof->rhs_code == MEM_REF && info->rhs_code != MEM_REF)
3270 {
3f207ab3 3271 for (store_immediate_info *infoj : merged_store->stores)
7f5a3982
EB
3272 {
3273 infoj->rhs_code = BIT_INSERT_EXPR;
3274 infoj->ops[0].val = gimple_assign_rhs1 (infoj->stmt);
3275 infoj->ops[0].base_addr = NULL_TREE;
3276 }
e362a897 3277 merged_store->bit_insertion = true;
7f5a3982
EB
3278 }
3279 if ((infof->ops[0].base_addr
3280 ? compatible_load_p (merged_store, info, base_addr, 0)
3281 : !info->ops[0].base_addr)
3282 && (infof->ops[1].base_addr
3283 ? compatible_load_p (merged_store, info, base_addr, 1)
3284 : !info->ops[1].base_addr))
3285 {
3286 merged_store->merge_into (info);
3287 goto done;
3288 }
245f6de1
JJ
3289 }
3290 }
f663d9ad 3291
245f6de1
JJ
3292 /* |---store 1---| <gap> |---store 2---|.
3293 Gap between stores or the rhs not compatible. Start a new group. */
f663d9ad 3294
245f6de1
JJ
3295 /* Try to apply all the stores recorded for the group to determine
3296 the bitpattern they write and discard it if that fails.
3297 This will also reject single-store groups. */
c94c3532 3298 if (merged_store->apply_stores ())
245f6de1 3299 m_merged_store_groups.safe_push (merged_store);
c94c3532
EB
3300 else
3301 delete merged_store;
f663d9ad 3302
245f6de1 3303 merged_store = new merged_store_group (info);
bd909071 3304 end_earlier = i;
c94c3532
EB
3305 if (dump_file && (dump_flags & TDF_DETAILS))
3306 fputs ("New store group\n", dump_file);
3307
3308 done:
3309 if (dump_file && (dump_flags & TDF_DETAILS))
3310 {
3311 fprintf (dump_file, "Store %u:\nbitsize:" HOST_WIDE_INT_PRINT_DEC
3312 " bitpos:" HOST_WIDE_INT_PRINT_DEC " val:",
3313 i, info->bitsize, info->bitpos);
3314 print_generic_expr (dump_file, gimple_assign_rhs1 (info->stmt));
3315 fputc ('\n', dump_file);
3316 }
f663d9ad
KT
3317 }
3318
a62b3dc5 3319 /* Record or discard the last store group. */
4b84d9b8
JJ
3320 if (merged_store)
3321 {
c94c3532 3322 if (merged_store->apply_stores ())
4b84d9b8 3323 m_merged_store_groups.safe_push (merged_store);
c94c3532
EB
3324 else
3325 delete merged_store;
4b84d9b8 3326 }
f663d9ad
KT
3327
3328 gcc_assert (m_merged_store_groups.length () <= m_store_info.length ());
c94c3532 3329
f663d9ad
KT
3330 bool success
3331 = !m_merged_store_groups.is_empty ()
3332 && m_merged_store_groups.length () < m_store_info.length ();
3333
3334 if (success && dump_file)
c94c3532 3335 fprintf (dump_file, "Coalescing successful!\nMerged into %u stores\n",
a62b3dc5 3336 m_merged_store_groups.length ());
f663d9ad
KT
3337
3338 return success;
3339}
3340
245f6de1
JJ
3341/* Return the type to use for the merged stores or loads described by STMTS.
3342 This is needed to get the alias sets right. If IS_LOAD, look for rhs,
3343 otherwise lhs. Additionally set *CLIQUEP and *BASEP to MR_DEPENDENCE_*
3344 of the MEM_REFs if any. */
f663d9ad
KT
3345
3346static tree
245f6de1
JJ
3347get_alias_type_for_stmts (vec<gimple *> &stmts, bool is_load,
3348 unsigned short *cliquep, unsigned short *basep)
f663d9ad
KT
3349{
3350 gimple *stmt;
3351 unsigned int i;
245f6de1
JJ
3352 tree type = NULL_TREE;
3353 tree ret = NULL_TREE;
3354 *cliquep = 0;
3355 *basep = 0;
f663d9ad
KT
3356
3357 FOR_EACH_VEC_ELT (stmts, i, stmt)
3358 {
245f6de1
JJ
3359 tree ref = is_load ? gimple_assign_rhs1 (stmt)
3360 : gimple_assign_lhs (stmt);
3361 tree type1 = reference_alias_ptr_type (ref);
3362 tree base = get_base_address (ref);
f663d9ad 3363
245f6de1
JJ
3364 if (i == 0)
3365 {
3366 if (TREE_CODE (base) == MEM_REF)
3367 {
3368 *cliquep = MR_DEPENDENCE_CLIQUE (base);
3369 *basep = MR_DEPENDENCE_BASE (base);
3370 }
3371 ret = type = type1;
3372 continue;
3373 }
f663d9ad 3374 if (!alias_ptr_types_compatible_p (type, type1))
245f6de1
JJ
3375 ret = ptr_type_node;
3376 if (TREE_CODE (base) != MEM_REF
3377 || *cliquep != MR_DEPENDENCE_CLIQUE (base)
3378 || *basep != MR_DEPENDENCE_BASE (base))
3379 {
3380 *cliquep = 0;
3381 *basep = 0;
3382 }
f663d9ad 3383 }
245f6de1 3384 return ret;
f663d9ad
KT
3385}
3386
3387/* Return the location_t information we can find among the statements
3388 in STMTS. */
3389
3390static location_t
245f6de1 3391get_location_for_stmts (vec<gimple *> &stmts)
f663d9ad 3392{
3f207ab3 3393 for (gimple *stmt : stmts)
f663d9ad
KT
3394 if (gimple_has_location (stmt))
3395 return gimple_location (stmt);
3396
3397 return UNKNOWN_LOCATION;
3398}
3399
3400/* Used to decribe a store resulting from splitting a wide store in smaller
3401 regularly-sized stores in split_group. */
3402
6c1dae73 3403class split_store
f663d9ad 3404{
6c1dae73 3405public:
f663d9ad
KT
3406 unsigned HOST_WIDE_INT bytepos;
3407 unsigned HOST_WIDE_INT size;
3408 unsigned HOST_WIDE_INT align;
245f6de1 3409 auto_vec<store_immediate_info *> orig_stores;
a62b3dc5
JJ
3410 /* True if there is a single orig stmt covering the whole split store. */
3411 bool orig;
f663d9ad
KT
3412 split_store (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
3413 unsigned HOST_WIDE_INT);
3414};
3415
3416/* Simple constructor. */
3417
3418split_store::split_store (unsigned HOST_WIDE_INT bp,
3419 unsigned HOST_WIDE_INT sz,
3420 unsigned HOST_WIDE_INT al)
a62b3dc5 3421 : bytepos (bp), size (sz), align (al), orig (false)
f663d9ad 3422{
245f6de1 3423 orig_stores.create (0);
f663d9ad
KT
3424}
3425
245f6de1
JJ
3426/* Record all stores in GROUP that write to the region starting at BITPOS and
3427 is of size BITSIZE. Record infos for such statements in STORES if
3428 non-NULL. The stores in GROUP must be sorted by bitposition. Return INFO
5384a802
JJ
3429 if there is exactly one original store in the range (in that case ignore
3430 clobber stmts, unless there are only clobber stmts). */
f663d9ad 3431
a62b3dc5 3432static store_immediate_info *
99b1c316 3433find_constituent_stores (class merged_store_group *group,
245f6de1
JJ
3434 vec<store_immediate_info *> *stores,
3435 unsigned int *first,
3436 unsigned HOST_WIDE_INT bitpos,
3437 unsigned HOST_WIDE_INT bitsize)
f663d9ad 3438{
a62b3dc5 3439 store_immediate_info *info, *ret = NULL;
f663d9ad 3440 unsigned int i;
a62b3dc5
JJ
3441 bool second = false;
3442 bool update_first = true;
f663d9ad 3443 unsigned HOST_WIDE_INT end = bitpos + bitsize;
a62b3dc5 3444 for (i = *first; group->stores.iterate (i, &info); ++i)
f663d9ad
KT
3445 {
3446 unsigned HOST_WIDE_INT stmt_start = info->bitpos;
3447 unsigned HOST_WIDE_INT stmt_end = stmt_start + info->bitsize;
a62b3dc5
JJ
3448 if (stmt_end <= bitpos)
3449 {
3450 /* BITPOS passed to this function never decreases from within the
3451 same split_group call, so optimize and don't scan info records
3452 which are known to end before or at BITPOS next time.
3453 Only do it if all stores before this one also pass this. */
3454 if (update_first)
3455 *first = i + 1;
3456 continue;
3457 }
3458 else
3459 update_first = false;
3460
f663d9ad 3461 /* The stores in GROUP are ordered by bitposition so if we're past
a62b3dc5
JJ
3462 the region for this group return early. */
3463 if (stmt_start >= end)
3464 return ret;
3465
5384a802
JJ
3466 if (gimple_clobber_p (info->stmt))
3467 {
3468 if (stores)
3469 stores->safe_push (info);
3470 if (ret == NULL)
3471 ret = info;
3472 continue;
3473 }
245f6de1 3474 if (stores)
a62b3dc5 3475 {
245f6de1 3476 stores->safe_push (info);
5384a802 3477 if (ret && !gimple_clobber_p (ret->stmt))
a62b3dc5
JJ
3478 {
3479 ret = NULL;
3480 second = true;
3481 }
3482 }
5384a802 3483 else if (ret && !gimple_clobber_p (ret->stmt))
a62b3dc5
JJ
3484 return NULL;
3485 if (!second)
3486 ret = info;
f663d9ad 3487 }
a62b3dc5 3488 return ret;
f663d9ad
KT
3489}
3490
d7a9512e
JJ
3491/* Return how many SSA_NAMEs used to compute value to store in the INFO
3492 store have multiple uses. If any SSA_NAME has multiple uses, also
3493 count statements needed to compute it. */
3494
3495static unsigned
3496count_multiple_uses (store_immediate_info *info)
3497{
3498 gimple *stmt = info->stmt;
3499 unsigned ret = 0;
3500 switch (info->rhs_code)
3501 {
3502 case INTEGER_CST:
e362a897 3503 case STRING_CST:
d7a9512e
JJ
3504 return 0;
3505 case BIT_AND_EXPR:
3506 case BIT_IOR_EXPR:
3507 case BIT_XOR_EXPR:
d60edaba
JJ
3508 if (info->bit_not_p)
3509 {
3510 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3511 ret = 1; /* Fall through below to return
3512 the BIT_NOT_EXPR stmt and then
3513 BIT_{AND,IOR,XOR}_EXPR and anything it
3514 uses. */
3515 else
3516 /* stmt is after this the BIT_NOT_EXPR. */
3517 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3518 }
d7a9512e
JJ
3519 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3520 {
3521 ret += 1 + info->ops[0].bit_not_p;
3522 if (info->ops[1].base_addr)
3523 ret += 1 + info->ops[1].bit_not_p;
3524 return ret + 1;
3525 }
3526 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3527 /* stmt is now the BIT_*_EXPR. */
3528 if (!has_single_use (gimple_assign_rhs1 (stmt)))
127ef369
JJ
3529 ret += 1 + info->ops[info->ops_swapped_p].bit_not_p;
3530 else if (info->ops[info->ops_swapped_p].bit_not_p)
d7a9512e
JJ
3531 {
3532 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3533 if (!has_single_use (gimple_assign_rhs1 (stmt2)))
3534 ++ret;
3535 }
3536 if (info->ops[1].base_addr == NULL_TREE)
127ef369
JJ
3537 {
3538 gcc_checking_assert (!info->ops_swapped_p);
3539 return ret;
3540 }
d7a9512e 3541 if (!has_single_use (gimple_assign_rhs2 (stmt)))
127ef369
JJ
3542 ret += 1 + info->ops[1 - info->ops_swapped_p].bit_not_p;
3543 else if (info->ops[1 - info->ops_swapped_p].bit_not_p)
d7a9512e
JJ
3544 {
3545 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3546 if (!has_single_use (gimple_assign_rhs1 (stmt2)))
3547 ++ret;
3548 }
3549 return ret;
3550 case MEM_REF:
3551 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3552 return 1 + info->ops[0].bit_not_p;
3553 else if (info->ops[0].bit_not_p)
3554 {
3555 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3556 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3557 return 1;
3558 }
3559 return 0;
c94c3532
EB
3560 case BIT_INSERT_EXPR:
3561 return has_single_use (gimple_assign_rhs1 (stmt)) ? 0 : 1;
d7a9512e
JJ
3562 default:
3563 gcc_unreachable ();
3564 }
3565}
3566
f663d9ad 3567/* Split a merged store described by GROUP by populating the SPLIT_STORES
a62b3dc5
JJ
3568 vector (if non-NULL) with split_store structs describing the byte offset
3569 (from the base), the bit size and alignment of each store as well as the
3570 original statements involved in each such split group.
f663d9ad
KT
3571 This is to separate the splitting strategy from the statement
3572 building/emission/linking done in output_merged_store.
a62b3dc5 3573 Return number of new stores.
245f6de1
JJ
3574 If ALLOW_UNALIGNED_STORE is false, then all stores must be aligned.
3575 If ALLOW_UNALIGNED_LOAD is false, then all loads must be aligned.
3afd514b
JJ
3576 BZERO_FIRST may be true only when the first store covers the whole group
3577 and clears it; if BZERO_FIRST is true, keep that first store in the set
3578 unmodified and emit further stores for the overrides only.
a62b3dc5
JJ
3579 If SPLIT_STORES is NULL, it is just a dry run to count number of
3580 new stores. */
f663d9ad 3581
a62b3dc5 3582static unsigned int
245f6de1 3583split_group (merged_store_group *group, bool allow_unaligned_store,
3afd514b 3584 bool allow_unaligned_load, bool bzero_first,
99b1c316 3585 vec<split_store *> *split_stores,
d7a9512e
JJ
3586 unsigned *total_orig,
3587 unsigned *total_new)
f663d9ad 3588{
a62b3dc5
JJ
3589 unsigned HOST_WIDE_INT pos = group->bitregion_start;
3590 unsigned HOST_WIDE_INT size = group->bitregion_end - pos;
f663d9ad 3591 unsigned HOST_WIDE_INT bytepos = pos / BITS_PER_UNIT;
a62b3dc5
JJ
3592 unsigned HOST_WIDE_INT group_align = group->align;
3593 unsigned HOST_WIDE_INT align_base = group->align_base;
245f6de1 3594 unsigned HOST_WIDE_INT group_load_align = group_align;
d7a9512e 3595 bool any_orig = false;
f663d9ad 3596
f663d9ad
KT
3597 gcc_assert ((size % BITS_PER_UNIT == 0) && (pos % BITS_PER_UNIT == 0));
3598
e362a897
EB
3599 /* For bswap framework using sets of stores, all the checking has been done
3600 earlier in try_coalesce_bswap and the result always needs to be emitted
3601 as a single store. Likewise for string concatenation, */
4b84d9b8 3602 if (group->stores[0]->rhs_code == LROTATE_EXPR
e362a897
EB
3603 || group->stores[0]->rhs_code == NOP_EXPR
3604 || group->string_concatenation)
4b84d9b8 3605 {
3afd514b 3606 gcc_assert (!bzero_first);
4b84d9b8
JJ
3607 if (total_orig)
3608 {
3609 /* Avoid the old/new stmt count heuristics. It should be
3610 always beneficial. */
3611 total_new[0] = 1;
3612 total_orig[0] = 2;
3613 }
3614
3615 if (split_stores)
3616 {
3617 unsigned HOST_WIDE_INT align_bitpos
3618 = (group->start - align_base) & (group_align - 1);
3619 unsigned HOST_WIDE_INT align = group_align;
3620 if (align_bitpos)
3621 align = least_bit_hwi (align_bitpos);
3622 bytepos = group->start / BITS_PER_UNIT;
99b1c316 3623 split_store *store
4b84d9b8
JJ
3624 = new split_store (bytepos, group->width, align);
3625 unsigned int first = 0;
3626 find_constituent_stores (group, &store->orig_stores,
3627 &first, group->start, group->width);
3628 split_stores->safe_push (store);
3629 }
3630
3631 return 1;
3632 }
3633
a62b3dc5 3634 unsigned int ret = 0, first = 0;
f663d9ad 3635 unsigned HOST_WIDE_INT try_pos = bytepos;
f663d9ad 3636
d7a9512e
JJ
3637 if (total_orig)
3638 {
3639 unsigned int i;
3640 store_immediate_info *info = group->stores[0];
3641
3642 total_new[0] = 0;
3643 total_orig[0] = 1; /* The orig store. */
3644 info = group->stores[0];
3645 if (info->ops[0].base_addr)
a6fbd154 3646 total_orig[0]++;
d7a9512e 3647 if (info->ops[1].base_addr)
a6fbd154 3648 total_orig[0]++;
d7a9512e
JJ
3649 switch (info->rhs_code)
3650 {
3651 case BIT_AND_EXPR:
3652 case BIT_IOR_EXPR:
3653 case BIT_XOR_EXPR:
3654 total_orig[0]++; /* The orig BIT_*_EXPR stmt. */
3655 break;
3656 default:
3657 break;
3658 }
3659 total_orig[0] *= group->stores.length ();
3660
3661 FOR_EACH_VEC_ELT (group->stores, i, info)
a6fbd154
JJ
3662 {
3663 total_new[0] += count_multiple_uses (info);
3664 total_orig[0] += (info->bit_not_p
3665 + info->ops[0].bit_not_p
3666 + info->ops[1].bit_not_p);
3667 }
d7a9512e
JJ
3668 }
3669
245f6de1
JJ
3670 if (!allow_unaligned_load)
3671 for (int i = 0; i < 2; ++i)
3672 if (group->load_align[i])
3673 group_load_align = MIN (group_load_align, group->load_align[i]);
3674
3afd514b
JJ
3675 if (bzero_first)
3676 {
5384a802
JJ
3677 store_immediate_info *gstore;
3678 FOR_EACH_VEC_ELT (group->stores, first, gstore)
3679 if (!gimple_clobber_p (gstore->stmt))
3680 break;
3681 ++first;
3afd514b
JJ
3682 ret = 1;
3683 if (split_stores)
3684 {
99b1c316 3685 split_store *store
5384a802
JJ
3686 = new split_store (bytepos, gstore->bitsize, align_base);
3687 store->orig_stores.safe_push (gstore);
3afd514b
JJ
3688 store->orig = true;
3689 any_orig = true;
3690 split_stores->safe_push (store);
3691 }
3692 }
3693
f663d9ad
KT
3694 while (size > 0)
3695 {
245f6de1 3696 if ((allow_unaligned_store || group_align <= BITS_PER_UNIT)
3afd514b
JJ
3697 && (group->mask[try_pos - bytepos] == (unsigned char) ~0U
3698 || (bzero_first && group->val[try_pos - bytepos] == 0)))
a62b3dc5
JJ
3699 {
3700 /* Skip padding bytes. */
3701 ++try_pos;
3702 size -= BITS_PER_UNIT;
3703 continue;
3704 }
3705
f663d9ad 3706 unsigned HOST_WIDE_INT try_bitpos = try_pos * BITS_PER_UNIT;
a62b3dc5
JJ
3707 unsigned int try_size = MAX_STORE_BITSIZE, nonmasked;
3708 unsigned HOST_WIDE_INT align_bitpos
3709 = (try_bitpos - align_base) & (group_align - 1);
3710 unsigned HOST_WIDE_INT align = group_align;
5384a802 3711 bool found_orig = false;
a62b3dc5
JJ
3712 if (align_bitpos)
3713 align = least_bit_hwi (align_bitpos);
245f6de1 3714 if (!allow_unaligned_store)
a62b3dc5 3715 try_size = MIN (try_size, align);
245f6de1
JJ
3716 if (!allow_unaligned_load)
3717 {
3718 /* If we can't do or don't want to do unaligned stores
3719 as well as loads, we need to take the loads into account
3720 as well. */
3721 unsigned HOST_WIDE_INT load_align = group_load_align;
3722 align_bitpos = (try_bitpos - align_base) & (load_align - 1);
3723 if (align_bitpos)
3724 load_align = least_bit_hwi (align_bitpos);
3725 for (int i = 0; i < 2; ++i)
3726 if (group->load_align[i])
3727 {
8a91d545
RS
3728 align_bitpos
3729 = known_alignment (try_bitpos
3730 - group->stores[0]->bitpos
3731 + group->stores[0]->ops[i].bitpos
3732 - group->load_align_base[i]);
3733 if (align_bitpos & (group_load_align - 1))
245f6de1
JJ
3734 {
3735 unsigned HOST_WIDE_INT a = least_bit_hwi (align_bitpos);
3736 load_align = MIN (load_align, a);
3737 }
3738 }
3739 try_size = MIN (try_size, load_align);
3740 }
a62b3dc5 3741 store_immediate_info *info
245f6de1 3742 = find_constituent_stores (group, NULL, &first, try_bitpos, try_size);
5384a802 3743 if (info && !gimple_clobber_p (info->stmt))
a62b3dc5
JJ
3744 {
3745 /* If there is just one original statement for the range, see if
3746 we can just reuse the original store which could be even larger
3747 than try_size. */
3748 unsigned HOST_WIDE_INT stmt_end
3749 = ROUND_UP (info->bitpos + info->bitsize, BITS_PER_UNIT);
245f6de1
JJ
3750 info = find_constituent_stores (group, NULL, &first, try_bitpos,
3751 stmt_end - try_bitpos);
a62b3dc5
JJ
3752 if (info && info->bitpos >= try_bitpos)
3753 {
5384a802
JJ
3754 store_immediate_info *info2 = NULL;
3755 unsigned int first_copy = first;
3756 if (info->bitpos > try_bitpos
3757 && stmt_end - try_bitpos <= try_size)
3758 {
3759 info2 = find_constituent_stores (group, NULL, &first_copy,
3760 try_bitpos,
3761 info->bitpos - try_bitpos);
3762 gcc_assert (info2 == NULL || gimple_clobber_p (info2->stmt));
3763 }
3764 if (info2 == NULL && stmt_end - try_bitpos < try_size)
3765 {
3766 info2 = find_constituent_stores (group, NULL, &first_copy,
3767 stmt_end,
3768 (try_bitpos + try_size)
3769 - stmt_end);
3770 gcc_assert (info2 == NULL || gimple_clobber_p (info2->stmt));
3771 }
3772 if (info2 == NULL)
3773 {
3774 try_size = stmt_end - try_bitpos;
3775 found_orig = true;
3776 goto found;
3777 }
a62b3dc5
JJ
3778 }
3779 }
f663d9ad 3780
a62b3dc5
JJ
3781 /* Approximate store bitsize for the case when there are no padding
3782 bits. */
3783 while (try_size > size)
3784 try_size /= 2;
3785 /* Now look for whole padding bytes at the end of that bitsize. */
3786 for (nonmasked = try_size / BITS_PER_UNIT; nonmasked > 0; --nonmasked)
3787 if (group->mask[try_pos - bytepos + nonmasked - 1]
3afd514b
JJ
3788 != (unsigned char) ~0U
3789 && (!bzero_first
3790 || group->val[try_pos - bytepos + nonmasked - 1] != 0))
a62b3dc5 3791 break;
5384a802 3792 if (nonmasked == 0 || (info && gimple_clobber_p (info->stmt)))
a62b3dc5
JJ
3793 {
3794 /* If entire try_size range is padding, skip it. */
3795 try_pos += try_size / BITS_PER_UNIT;
3796 size -= try_size;
3797 continue;
3798 }
3799 /* Otherwise try to decrease try_size if second half, last 3 quarters
3800 etc. are padding. */
3801 nonmasked *= BITS_PER_UNIT;
3802 while (nonmasked <= try_size / 2)
3803 try_size /= 2;
245f6de1 3804 if (!allow_unaligned_store && group_align > BITS_PER_UNIT)
a62b3dc5
JJ
3805 {
3806 /* Now look for whole padding bytes at the start of that bitsize. */
3807 unsigned int try_bytesize = try_size / BITS_PER_UNIT, masked;
3808 for (masked = 0; masked < try_bytesize; ++masked)
3afd514b
JJ
3809 if (group->mask[try_pos - bytepos + masked] != (unsigned char) ~0U
3810 && (!bzero_first
3811 || group->val[try_pos - bytepos + masked] != 0))
a62b3dc5
JJ
3812 break;
3813 masked *= BITS_PER_UNIT;
3814 gcc_assert (masked < try_size);
3815 if (masked >= try_size / 2)
3816 {
3817 while (masked >= try_size / 2)
3818 {
3819 try_size /= 2;
3820 try_pos += try_size / BITS_PER_UNIT;
3821 size -= try_size;
3822 masked -= try_size;
3823 }
3824 /* Need to recompute the alignment, so just retry at the new
3825 position. */
3826 continue;
3827 }
3828 }
3829
3830 found:
3831 ++ret;
f663d9ad 3832
a62b3dc5
JJ
3833 if (split_stores)
3834 {
99b1c316 3835 split_store *store
a62b3dc5 3836 = new split_store (try_pos, try_size, align);
245f6de1
JJ
3837 info = find_constituent_stores (group, &store->orig_stores,
3838 &first, try_bitpos, try_size);
a62b3dc5 3839 if (info
5384a802 3840 && !gimple_clobber_p (info->stmt)
a62b3dc5 3841 && info->bitpos >= try_bitpos
5384a802
JJ
3842 && info->bitpos + info->bitsize <= try_bitpos + try_size
3843 && (store->orig_stores.length () == 1
3844 || found_orig
3845 || (info->bitpos == try_bitpos
3846 && (info->bitpos + info->bitsize
3847 == try_bitpos + try_size))))
d7a9512e
JJ
3848 {
3849 store->orig = true;
3850 any_orig = true;
3851 }
a62b3dc5
JJ
3852 split_stores->safe_push (store);
3853 }
3854
3855 try_pos += try_size / BITS_PER_UNIT;
f663d9ad 3856 size -= try_size;
f663d9ad 3857 }
a62b3dc5 3858
d7a9512e
JJ
3859 if (total_orig)
3860 {
a6fbd154 3861 unsigned int i;
99b1c316 3862 split_store *store;
d7a9512e
JJ
3863 /* If we are reusing some original stores and any of the
3864 original SSA_NAMEs had multiple uses, we need to subtract
3865 those now before we add the new ones. */
3866 if (total_new[0] && any_orig)
3867 {
d7a9512e
JJ
3868 FOR_EACH_VEC_ELT (*split_stores, i, store)
3869 if (store->orig)
3870 total_new[0] -= count_multiple_uses (store->orig_stores[0]);
3871 }
3872 total_new[0] += ret; /* The new store. */
3873 store_immediate_info *info = group->stores[0];
3874 if (info->ops[0].base_addr)
a6fbd154 3875 total_new[0] += ret;
d7a9512e 3876 if (info->ops[1].base_addr)
a6fbd154 3877 total_new[0] += ret;
d7a9512e
JJ
3878 switch (info->rhs_code)
3879 {
3880 case BIT_AND_EXPR:
3881 case BIT_IOR_EXPR:
3882 case BIT_XOR_EXPR:
3883 total_new[0] += ret; /* The new BIT_*_EXPR stmt. */
3884 break;
3885 default:
3886 break;
3887 }
a6fbd154
JJ
3888 FOR_EACH_VEC_ELT (*split_stores, i, store)
3889 {
3890 unsigned int j;
3891 bool bit_not_p[3] = { false, false, false };
3892 /* If all orig_stores have certain bit_not_p set, then
3893 we'd use a BIT_NOT_EXPR stmt and need to account for it.
3894 If some orig_stores have certain bit_not_p set, then
3895 we'd use a BIT_XOR_EXPR with a mask and need to account for
3896 it. */
3897 FOR_EACH_VEC_ELT (store->orig_stores, j, info)
3898 {
3899 if (info->ops[0].bit_not_p)
3900 bit_not_p[0] = true;
3901 if (info->ops[1].bit_not_p)
3902 bit_not_p[1] = true;
3903 if (info->bit_not_p)
3904 bit_not_p[2] = true;
3905 }
3906 total_new[0] += bit_not_p[0] + bit_not_p[1] + bit_not_p[2];
3907 }
3908
d7a9512e
JJ
3909 }
3910
a62b3dc5 3911 return ret;
f663d9ad
KT
3912}
3913
a6fbd154
JJ
3914/* Return the operation through which the operand IDX (if < 2) or
3915 result (IDX == 2) should be inverted. If NOP_EXPR, no inversion
3916 is done, if BIT_NOT_EXPR, all bits are inverted, if BIT_XOR_EXPR,
3917 the bits should be xored with mask. */
3918
3919static enum tree_code
3920invert_op (split_store *split_store, int idx, tree int_type, tree &mask)
3921{
3922 unsigned int i;
3923 store_immediate_info *info;
3924 unsigned int cnt = 0;
e215422f 3925 bool any_paddings = false;
a6fbd154
JJ
3926 FOR_EACH_VEC_ELT (split_store->orig_stores, i, info)
3927 {
3928 bool bit_not_p = idx < 2 ? info->ops[idx].bit_not_p : info->bit_not_p;
3929 if (bit_not_p)
e215422f
JJ
3930 {
3931 ++cnt;
3932 tree lhs = gimple_assign_lhs (info->stmt);
3933 if (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3934 && TYPE_PRECISION (TREE_TYPE (lhs)) < info->bitsize)
3935 any_paddings = true;
3936 }
a6fbd154
JJ
3937 }
3938 mask = NULL_TREE;
3939 if (cnt == 0)
3940 return NOP_EXPR;
e215422f 3941 if (cnt == split_store->orig_stores.length () && !any_paddings)
a6fbd154
JJ
3942 return BIT_NOT_EXPR;
3943
3944 unsigned HOST_WIDE_INT try_bitpos = split_store->bytepos * BITS_PER_UNIT;
3945 unsigned buf_size = split_store->size / BITS_PER_UNIT;
3946 unsigned char *buf
3947 = XALLOCAVEC (unsigned char, buf_size);
3948 memset (buf, ~0U, buf_size);
3949 FOR_EACH_VEC_ELT (split_store->orig_stores, i, info)
3950 {
3951 bool bit_not_p = idx < 2 ? info->ops[idx].bit_not_p : info->bit_not_p;
3952 if (!bit_not_p)
3953 continue;
3954 /* Clear regions with bit_not_p and invert afterwards, rather than
3955 clear regions with !bit_not_p, so that gaps in between stores aren't
3956 set in the mask. */
3957 unsigned HOST_WIDE_INT bitsize = info->bitsize;
e215422f 3958 unsigned HOST_WIDE_INT prec = bitsize;
a6fbd154 3959 unsigned int pos_in_buffer = 0;
e215422f
JJ
3960 if (any_paddings)
3961 {
3962 tree lhs = gimple_assign_lhs (info->stmt);
3963 if (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3964 && TYPE_PRECISION (TREE_TYPE (lhs)) < bitsize)
3965 prec = TYPE_PRECISION (TREE_TYPE (lhs));
3966 }
a6fbd154
JJ
3967 if (info->bitpos < try_bitpos)
3968 {
3969 gcc_assert (info->bitpos + bitsize > try_bitpos);
e215422f
JJ
3970 if (!BYTES_BIG_ENDIAN)
3971 {
3972 if (prec <= try_bitpos - info->bitpos)
3973 continue;
3974 prec -= try_bitpos - info->bitpos;
3975 }
3976 bitsize -= try_bitpos - info->bitpos;
3977 if (BYTES_BIG_ENDIAN && prec > bitsize)
3978 prec = bitsize;
a6fbd154
JJ
3979 }
3980 else
3981 pos_in_buffer = info->bitpos - try_bitpos;
e215422f
JJ
3982 if (prec < bitsize)
3983 {
3984 /* If this is a bool inversion, invert just the least significant
3985 prec bits rather than all bits of it. */
3986 if (BYTES_BIG_ENDIAN)
3987 {
3988 pos_in_buffer += bitsize - prec;
3989 if (pos_in_buffer >= split_store->size)
3990 continue;
3991 }
3992 bitsize = prec;
3993 }
a6fbd154
JJ
3994 if (pos_in_buffer + bitsize > split_store->size)
3995 bitsize = split_store->size - pos_in_buffer;
3996 unsigned char *p = buf + (pos_in_buffer / BITS_PER_UNIT);
3997 if (BYTES_BIG_ENDIAN)
3998 clear_bit_region_be (p, (BITS_PER_UNIT - 1
3999 - (pos_in_buffer % BITS_PER_UNIT)), bitsize);
4000 else
4001 clear_bit_region (p, pos_in_buffer % BITS_PER_UNIT, bitsize);
4002 }
4003 for (unsigned int i = 0; i < buf_size; ++i)
4004 buf[i] = ~buf[i];
4005 mask = native_interpret_expr (int_type, buf, buf_size);
4006 return BIT_XOR_EXPR;
4007}
4008
f663d9ad
KT
4009/* Given a merged store group GROUP output the widened version of it.
4010 The store chain is against the base object BASE.
4011 Try store sizes of at most MAX_STORE_BITSIZE bits wide and don't output
4012 unaligned stores for STRICT_ALIGNMENT targets or if it's too expensive.
4013 Make sure that the number of statements output is less than the number of
4014 original statements. If a better sequence is possible emit it and
4015 return true. */
4016
4017bool
b5926e23 4018imm_store_chain_info::output_merged_store (merged_store_group *group)
f663d9ad 4019{
e362a897 4020 const unsigned HOST_WIDE_INT start_byte_pos
a62b3dc5 4021 = group->bitregion_start / BITS_PER_UNIT;
f663d9ad
KT
4022 unsigned int orig_num_stmts = group->stores.length ();
4023 if (orig_num_stmts < 2)
4024 return false;
4025
245f6de1 4026 bool allow_unaligned_store
028d4092 4027 = !STRICT_ALIGNMENT && param_store_merging_allow_unaligned;
245f6de1 4028 bool allow_unaligned_load = allow_unaligned_store;
3afd514b 4029 bool bzero_first = false;
5384a802
JJ
4030 store_immediate_info *store;
4031 unsigned int num_clobber_stmts = 0;
4032 if (group->stores[0]->rhs_code == INTEGER_CST)
4033 {
e362a897 4034 unsigned int i;
5384a802
JJ
4035 FOR_EACH_VEC_ELT (group->stores, i, store)
4036 if (gimple_clobber_p (store->stmt))
4037 num_clobber_stmts++;
4038 else if (TREE_CODE (gimple_assign_rhs1 (store->stmt)) == CONSTRUCTOR
4039 && CONSTRUCTOR_NELTS (gimple_assign_rhs1 (store->stmt)) == 0
4040 && group->start == store->bitpos
4041 && group->width == store->bitsize
4042 && (group->start % BITS_PER_UNIT) == 0
4043 && (group->width % BITS_PER_UNIT) == 0)
4044 {
4045 bzero_first = true;
4046 break;
4047 }
4048 else
4049 break;
4050 FOR_EACH_VEC_ELT_FROM (group->stores, i, store, i)
4051 if (gimple_clobber_p (store->stmt))
4052 num_clobber_stmts++;
4053 if (num_clobber_stmts == orig_num_stmts)
4054 return false;
4055 orig_num_stmts -= num_clobber_stmts;
4056 }
3afd514b 4057 if (allow_unaligned_store || bzero_first)
a62b3dc5
JJ
4058 {
4059 /* If unaligned stores are allowed, see how many stores we'd emit
4060 for unaligned and how many stores we'd emit for aligned stores.
3afd514b
JJ
4061 Only use unaligned stores if it allows fewer stores than aligned.
4062 Similarly, if there is a whole region clear first, prefer expanding
4063 it together compared to expanding clear first followed by merged
4064 further stores. */
21f65995 4065 unsigned cnt[4] = { ~0U, ~0U, ~0U, ~0U };
3afd514b
JJ
4066 int pass_min = 0;
4067 for (int pass = 0; pass < 4; ++pass)
4068 {
4069 if (!allow_unaligned_store && (pass & 1) != 0)
4070 continue;
4071 if (!bzero_first && (pass & 2) != 0)
4072 continue;
4073 cnt[pass] = split_group (group, (pass & 1) != 0,
4074 allow_unaligned_load, (pass & 2) != 0,
4075 NULL, NULL, NULL);
4076 if (cnt[pass] < cnt[pass_min])
4077 pass_min = pass;
4078 }
4079 if ((pass_min & 1) == 0)
245f6de1 4080 allow_unaligned_store = false;
3afd514b
JJ
4081 if ((pass_min & 2) == 0)
4082 bzero_first = false;
a62b3dc5 4083 }
e362a897
EB
4084
4085 auto_vec<class split_store *, 32> split_stores;
4086 split_store *split_store;
4087 unsigned total_orig, total_new, i;
3afd514b 4088 split_group (group, allow_unaligned_store, allow_unaligned_load, bzero_first,
d7a9512e 4089 &split_stores, &total_orig, &total_new);
a62b3dc5 4090
5384a802
JJ
4091 /* Determine if there is a clobber covering the whole group at the start,
4092 followed by proposed split stores that cover the whole group. In that
4093 case, prefer the transformation even if
4094 split_stores.length () == orig_num_stmts. */
4095 bool clobber_first = false;
4096 if (num_clobber_stmts
4097 && gimple_clobber_p (group->stores[0]->stmt)
4098 && group->start == group->stores[0]->bitpos
4099 && group->width == group->stores[0]->bitsize
4100 && (group->start % BITS_PER_UNIT) == 0
4101 && (group->width % BITS_PER_UNIT) == 0)
4102 {
4103 clobber_first = true;
4104 unsigned HOST_WIDE_INT pos = group->start / BITS_PER_UNIT;
4105 FOR_EACH_VEC_ELT (split_stores, i, split_store)
4106 if (split_store->bytepos != pos)
4107 {
4108 clobber_first = false;
4109 break;
4110 }
4111 else
4112 pos += split_store->size / BITS_PER_UNIT;
4113 if (pos != (group->start + group->width) / BITS_PER_UNIT)
4114 clobber_first = false;
4115 }
4116
4117 if (split_stores.length () >= orig_num_stmts + clobber_first)
a62b3dc5 4118 {
5384a802 4119
a62b3dc5
JJ
4120 /* We didn't manage to reduce the number of statements. Bail out. */
4121 if (dump_file && (dump_flags & TDF_DETAILS))
d7a9512e
JJ
4122 fprintf (dump_file, "Exceeded original number of stmts (%u)."
4123 " Not profitable to emit new sequence.\n",
4124 orig_num_stmts);
dd172744
RB
4125 FOR_EACH_VEC_ELT (split_stores, i, split_store)
4126 delete split_store;
a62b3dc5
JJ
4127 return false;
4128 }
d7a9512e
JJ
4129 if (total_orig <= total_new)
4130 {
4131 /* If number of estimated new statements is above estimated original
4132 statements, bail out too. */
4133 if (dump_file && (dump_flags & TDF_DETAILS))
4134 fprintf (dump_file, "Estimated number of original stmts (%u)"
4135 " not larger than estimated number of new"
4136 " stmts (%u).\n",
4137 total_orig, total_new);
dd172744
RB
4138 FOR_EACH_VEC_ELT (split_stores, i, split_store)
4139 delete split_store;
4b84d9b8 4140 return false;
d7a9512e 4141 }
5384a802
JJ
4142 if (group->stores[0]->rhs_code == INTEGER_CST)
4143 {
4144 bool all_orig = true;
4145 FOR_EACH_VEC_ELT (split_stores, i, split_store)
4146 if (!split_store->orig)
4147 {
4148 all_orig = false;
4149 break;
4150 }
4151 if (all_orig)
4152 {
4153 unsigned int cnt = split_stores.length ();
4154 store_immediate_info *store;
4155 FOR_EACH_VEC_ELT (group->stores, i, store)
4156 if (gimple_clobber_p (store->stmt))
4157 ++cnt;
4158 /* Punt if we wouldn't make any real changes, i.e. keep all
4159 orig stmts + all clobbers. */
4160 if (cnt == group->stores.length ())
4161 {
4162 if (dump_file && (dump_flags & TDF_DETAILS))
4163 fprintf (dump_file, "Exceeded original number of stmts (%u)."
4164 " Not profitable to emit new sequence.\n",
4165 orig_num_stmts);
4166 FOR_EACH_VEC_ELT (split_stores, i, split_store)
4167 delete split_store;
4168 return false;
4169 }
4170 }
4171 }
f663d9ad
KT
4172
4173 gimple_stmt_iterator last_gsi = gsi_for_stmt (group->last_stmt);
4174 gimple_seq seq = NULL;
f663d9ad
KT
4175 tree last_vdef, new_vuse;
4176 last_vdef = gimple_vdef (group->last_stmt);
4177 new_vuse = gimple_vuse (group->last_stmt);
4b84d9b8
JJ
4178 tree bswap_res = NULL_TREE;
4179
5384a802
JJ
4180 /* Clobbers are not removed. */
4181 if (gimple_clobber_p (group->last_stmt))
4182 {
4183 new_vuse = make_ssa_name (gimple_vop (cfun), group->last_stmt);
4184 gimple_set_vdef (group->last_stmt, new_vuse);
4185 }
4186
4b84d9b8
JJ
4187 if (group->stores[0]->rhs_code == LROTATE_EXPR
4188 || group->stores[0]->rhs_code == NOP_EXPR)
4189 {
4190 tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
4191 gimple *ins_stmt = group->stores[0]->ins_stmt;
4192 struct symbolic_number *n = &group->stores[0]->n;
4193 bool bswap = group->stores[0]->rhs_code == LROTATE_EXPR;
4194
4195 switch (n->range)
4196 {
4197 case 16:
4198 load_type = bswap_type = uint16_type_node;
4199 break;
4200 case 32:
4201 load_type = uint32_type_node;
4202 if (bswap)
4203 {
4204 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
4205 bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
4206 }
4207 break;
4208 case 64:
4209 load_type = uint64_type_node;
4210 if (bswap)
4211 {
4212 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
4213 bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
4214 }
4215 break;
4216 default:
4217 gcc_unreachable ();
4218 }
4219
4220 /* If the loads have each vuse of the corresponding store,
4221 we've checked the aliasing already in try_coalesce_bswap and
4222 we want to sink the need load into seq. So need to use new_vuse
4223 on the load. */
30fa8e9c 4224 if (n->base_addr)
4b84d9b8 4225 {
30fa8e9c
JJ
4226 if (n->vuse == NULL)
4227 {
4228 n->vuse = new_vuse;
4229 ins_stmt = NULL;
4230 }
4231 else
4232 /* Update vuse in case it has changed by output_merged_stores. */
4233 n->vuse = gimple_vuse (ins_stmt);
4b84d9b8
JJ
4234 }
4235 bswap_res = bswap_replace (gsi_start (seq), ins_stmt, fndecl,
b320edc0
JJ
4236 bswap_type, load_type, n, bswap,
4237 ~(uint64_t) 0);
4b84d9b8
JJ
4238 gcc_assert (bswap_res);
4239 }
f663d9ad
KT
4240
4241 gimple *stmt = NULL;
245f6de1 4242 auto_vec<gimple *, 32> orig_stmts;
4b84d9b8
JJ
4243 gimple_seq this_seq;
4244 tree addr = force_gimple_operand_1 (unshare_expr (base_addr), &this_seq,
aa55dc0c 4245 is_gimple_mem_ref_addr, NULL_TREE);
4b84d9b8 4246 gimple_seq_add_seq_without_update (&seq, this_seq);
245f6de1
JJ
4247
4248 tree load_addr[2] = { NULL_TREE, NULL_TREE };
4249 gimple_seq load_seq[2] = { NULL, NULL };
4250 gimple_stmt_iterator load_gsi[2] = { gsi_none (), gsi_none () };
4251 for (int j = 0; j < 2; ++j)
4252 {
4253 store_operand_info &op = group->stores[0]->ops[j];
4254 if (op.base_addr == NULL_TREE)
4255 continue;
4256
4257 store_immediate_info *infol = group->stores.last ();
4258 if (gimple_vuse (op.stmt) == gimple_vuse (infol->ops[j].stmt))
4259 {
97031af7
JJ
4260 /* We can't pick the location randomly; while we've verified
4261 all the loads have the same vuse, they can be still in different
4262 basic blocks and we need to pick the one from the last bb:
4263 int x = q[0];
4264 if (x == N) return;
4265 int y = q[1];
4266 p[0] = x;
4267 p[1] = y;
4268 otherwise if we put the wider load at the q[0] load, we might
4269 segfault if q[1] is not mapped. */
4270 basic_block bb = gimple_bb (op.stmt);
4271 gimple *ostmt = op.stmt;
4272 store_immediate_info *info;
4273 FOR_EACH_VEC_ELT (group->stores, i, info)
4274 {
4275 gimple *tstmt = info->ops[j].stmt;
4276 basic_block tbb = gimple_bb (tstmt);
4277 if (dominated_by_p (CDI_DOMINATORS, tbb, bb))
4278 {
4279 ostmt = tstmt;
4280 bb = tbb;
4281 }
4282 }
4283 load_gsi[j] = gsi_for_stmt (ostmt);
245f6de1
JJ
4284 load_addr[j]
4285 = force_gimple_operand_1 (unshare_expr (op.base_addr),
4286 &load_seq[j], is_gimple_mem_ref_addr,
4287 NULL_TREE);
4288 }
4289 else if (operand_equal_p (base_addr, op.base_addr, 0))
4290 load_addr[j] = addr;
4291 else
3e2927a1 4292 {
3e2927a1
JJ
4293 load_addr[j]
4294 = force_gimple_operand_1 (unshare_expr (op.base_addr),
4295 &this_seq, is_gimple_mem_ref_addr,
4296 NULL_TREE);
4297 gimple_seq_add_seq_without_update (&seq, this_seq);
4298 }
245f6de1
JJ
4299 }
4300
f663d9ad
KT
4301 FOR_EACH_VEC_ELT (split_stores, i, split_store)
4302 {
e362a897
EB
4303 const unsigned HOST_WIDE_INT try_size = split_store->size;
4304 const unsigned HOST_WIDE_INT try_pos = split_store->bytepos;
4305 const unsigned HOST_WIDE_INT try_bitpos = try_pos * BITS_PER_UNIT;
4306 const unsigned HOST_WIDE_INT try_align = split_store->align;
4307 const unsigned HOST_WIDE_INT try_offset = try_pos - start_byte_pos;
a62b3dc5
JJ
4308 tree dest, src;
4309 location_t loc;
e362a897 4310
a62b3dc5
JJ
4311 if (split_store->orig)
4312 {
5384a802
JJ
4313 /* If there is just a single non-clobber constituent store
4314 which covers the whole area, just reuse the lhs and rhs. */
4315 gimple *orig_stmt = NULL;
4316 store_immediate_info *store;
4317 unsigned int j;
4318 FOR_EACH_VEC_ELT (split_store->orig_stores, j, store)
4319 if (!gimple_clobber_p (store->stmt))
4320 {
4321 orig_stmt = store->stmt;
4322 break;
4323 }
245f6de1
JJ
4324 dest = gimple_assign_lhs (orig_stmt);
4325 src = gimple_assign_rhs1 (orig_stmt);
4326 loc = gimple_location (orig_stmt);
a62b3dc5
JJ
4327 }
4328 else
4329 {
245f6de1
JJ
4330 store_immediate_info *info;
4331 unsigned short clique, base;
4332 unsigned int k;
4333 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4334 orig_stmts.safe_push (info->stmt);
a62b3dc5 4335 tree offset_type
245f6de1 4336 = get_alias_type_for_stmts (orig_stmts, false, &clique, &base);
e362a897 4337 tree dest_type;
245f6de1
JJ
4338 loc = get_location_for_stmts (orig_stmts);
4339 orig_stmts.truncate (0);
a62b3dc5 4340
e362a897
EB
4341 if (group->string_concatenation)
4342 dest_type
4343 = build_array_type_nelts (char_type_node,
4344 try_size / BITS_PER_UNIT);
4345 else
4346 {
4347 dest_type = build_nonstandard_integer_type (try_size, UNSIGNED);
4348 dest_type = build_aligned_type (dest_type, try_align);
4349 }
4350 dest = fold_build2 (MEM_REF, dest_type, addr,
a62b3dc5 4351 build_int_cst (offset_type, try_pos));
245f6de1
JJ
4352 if (TREE_CODE (dest) == MEM_REF)
4353 {
4354 MR_DEPENDENCE_CLIQUE (dest) = clique;
4355 MR_DEPENDENCE_BASE (dest) = base;
4356 }
4357
c94c3532 4358 tree mask;
e362a897 4359 if (bswap_res || group->string_concatenation)
c94c3532
EB
4360 mask = integer_zero_node;
4361 else
e362a897
EB
4362 mask = native_interpret_expr (dest_type,
4363 group->mask + try_offset,
4b84d9b8 4364 group->buf_size);
245f6de1
JJ
4365
4366 tree ops[2];
4367 for (int j = 0;
4368 j < 1 + (split_store->orig_stores[0]->ops[1].val != NULL_TREE);
4369 ++j)
4370 {
4371 store_operand_info &op = split_store->orig_stores[0]->ops[j];
4b84d9b8
JJ
4372 if (bswap_res)
4373 ops[j] = bswap_res;
e362a897
EB
4374 else if (group->string_concatenation)
4375 {
4376 ops[j] = build_string (try_size / BITS_PER_UNIT,
4377 (const char *) group->val + try_offset);
4378 TREE_TYPE (ops[j]) = dest_type;
4379 }
4b84d9b8 4380 else if (op.base_addr)
245f6de1
JJ
4381 {
4382 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4383 orig_stmts.safe_push (info->ops[j].stmt);
4384
4385 offset_type = get_alias_type_for_stmts (orig_stmts, true,
4386 &clique, &base);
4387 location_t load_loc = get_location_for_stmts (orig_stmts);
4388 orig_stmts.truncate (0);
4389
4390 unsigned HOST_WIDE_INT load_align = group->load_align[j];
4391 unsigned HOST_WIDE_INT align_bitpos
c94c3532 4392 = known_alignment (try_bitpos
8a91d545
RS
4393 - split_store->orig_stores[0]->bitpos
4394 + op.bitpos);
4395 if (align_bitpos & (load_align - 1))
245f6de1
JJ
4396 load_align = least_bit_hwi (align_bitpos);
4397
4398 tree load_int_type
4399 = build_nonstandard_integer_type (try_size, UNSIGNED);
4400 load_int_type
4401 = build_aligned_type (load_int_type, load_align);
4402
8a91d545 4403 poly_uint64 load_pos
c94c3532 4404 = exact_div (try_bitpos
8a91d545
RS
4405 - split_store->orig_stores[0]->bitpos
4406 + op.bitpos,
4407 BITS_PER_UNIT);
245f6de1
JJ
4408 ops[j] = fold_build2 (MEM_REF, load_int_type, load_addr[j],
4409 build_int_cst (offset_type, load_pos));
4410 if (TREE_CODE (ops[j]) == MEM_REF)
4411 {
4412 MR_DEPENDENCE_CLIQUE (ops[j]) = clique;
4413 MR_DEPENDENCE_BASE (ops[j]) = base;
4414 }
4415 if (!integer_zerop (mask))
e9e2bad7
MS
4416 {
4417 /* The load might load some bits (that will be masked
4418 off later on) uninitialized, avoid -W*uninitialized
4419 warnings in that case. */
4420 suppress_warning (ops[j], OPT_Wuninitialized);
4421 }
245f6de1 4422
e362a897 4423 stmt = gimple_build_assign (make_ssa_name (dest_type), ops[j]);
245f6de1
JJ
4424 gimple_set_location (stmt, load_loc);
4425 if (gsi_bb (load_gsi[j]))
4426 {
4427 gimple_set_vuse (stmt, gimple_vuse (op.stmt));
4428 gimple_seq_add_stmt_without_update (&load_seq[j], stmt);
4429 }
4430 else
4431 {
4432 gimple_set_vuse (stmt, new_vuse);
4433 gimple_seq_add_stmt_without_update (&seq, stmt);
4434 }
4435 ops[j] = gimple_assign_lhs (stmt);
a6fbd154
JJ
4436 tree xor_mask;
4437 enum tree_code inv_op
e362a897 4438 = invert_op (split_store, j, dest_type, xor_mask);
a6fbd154 4439 if (inv_op != NOP_EXPR)
383ac8dc 4440 {
e362a897 4441 stmt = gimple_build_assign (make_ssa_name (dest_type),
a6fbd154 4442 inv_op, ops[j], xor_mask);
383ac8dc
JJ
4443 gimple_set_location (stmt, load_loc);
4444 ops[j] = gimple_assign_lhs (stmt);
4445
4446 if (gsi_bb (load_gsi[j]))
4447 gimple_seq_add_stmt_without_update (&load_seq[j],
4448 stmt);
4449 else
4450 gimple_seq_add_stmt_without_update (&seq, stmt);
4451 }
245f6de1
JJ
4452 }
4453 else
e362a897
EB
4454 ops[j] = native_interpret_expr (dest_type,
4455 group->val + try_offset,
245f6de1
JJ
4456 group->buf_size);
4457 }
4458
4459 switch (split_store->orig_stores[0]->rhs_code)
4460 {
4461 case BIT_AND_EXPR:
4462 case BIT_IOR_EXPR:
4463 case BIT_XOR_EXPR:
4464 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4465 {
4466 tree rhs1 = gimple_assign_rhs1 (info->stmt);
4467 orig_stmts.safe_push (SSA_NAME_DEF_STMT (rhs1));
4468 }
4469 location_t bit_loc;
4470 bit_loc = get_location_for_stmts (orig_stmts);
4471 orig_stmts.truncate (0);
4472
4473 stmt
e362a897 4474 = gimple_build_assign (make_ssa_name (dest_type),
245f6de1
JJ
4475 split_store->orig_stores[0]->rhs_code,
4476 ops[0], ops[1]);
4477 gimple_set_location (stmt, bit_loc);
4478 /* If there is just one load and there is a separate
4479 load_seq[0], emit the bitwise op right after it. */
4480 if (load_addr[1] == NULL_TREE && gsi_bb (load_gsi[0]))
4481 gimple_seq_add_stmt_without_update (&load_seq[0], stmt);
4482 /* Otherwise, if at least one load is in seq, we need to
4483 emit the bitwise op right before the store. If there
4484 are two loads and are emitted somewhere else, it would
4485 be better to emit the bitwise op as early as possible;
4486 we don't track where that would be possible right now
4487 though. */
4488 else
4489 gimple_seq_add_stmt_without_update (&seq, stmt);
4490 src = gimple_assign_lhs (stmt);
a6fbd154
JJ
4491 tree xor_mask;
4492 enum tree_code inv_op;
e362a897 4493 inv_op = invert_op (split_store, 2, dest_type, xor_mask);
a6fbd154 4494 if (inv_op != NOP_EXPR)
d60edaba 4495 {
e362a897 4496 stmt = gimple_build_assign (make_ssa_name (dest_type),
a6fbd154 4497 inv_op, src, xor_mask);
d60edaba
JJ
4498 gimple_set_location (stmt, bit_loc);
4499 if (load_addr[1] == NULL_TREE && gsi_bb (load_gsi[0]))
4500 gimple_seq_add_stmt_without_update (&load_seq[0], stmt);
4501 else
4502 gimple_seq_add_stmt_without_update (&seq, stmt);
4503 src = gimple_assign_lhs (stmt);
4504 }
245f6de1 4505 break;
4b84d9b8
JJ
4506 case LROTATE_EXPR:
4507 case NOP_EXPR:
4508 src = ops[0];
4509 if (!is_gimple_val (src))
4510 {
4511 stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (src)),
4512 src);
4513 gimple_seq_add_stmt_without_update (&seq, stmt);
4514 src = gimple_assign_lhs (stmt);
4515 }
e362a897 4516 if (!useless_type_conversion_p (dest_type, TREE_TYPE (src)))
4b84d9b8 4517 {
e362a897 4518 stmt = gimple_build_assign (make_ssa_name (dest_type),
4b84d9b8
JJ
4519 NOP_EXPR, src);
4520 gimple_seq_add_stmt_without_update (&seq, stmt);
4521 src = gimple_assign_lhs (stmt);
4522 }
e362a897 4523 inv_op = invert_op (split_store, 2, dest_type, xor_mask);
be52ac73
JJ
4524 if (inv_op != NOP_EXPR)
4525 {
e362a897 4526 stmt = gimple_build_assign (make_ssa_name (dest_type),
be52ac73
JJ
4527 inv_op, src, xor_mask);
4528 gimple_set_location (stmt, loc);
4529 gimple_seq_add_stmt_without_update (&seq, stmt);
4530 src = gimple_assign_lhs (stmt);
4531 }
4b84d9b8 4532 break;
245f6de1
JJ
4533 default:
4534 src = ops[0];
4535 break;
4536 }
4537
c94c3532
EB
4538 /* If bit insertion is required, we use the source as an accumulator
4539 into which the successive bit-field values are manually inserted.
4540 FIXME: perhaps use BIT_INSERT_EXPR instead in some cases? */
4541 if (group->bit_insertion)
4542 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4543 if (info->rhs_code == BIT_INSERT_EXPR
4544 && info->bitpos < try_bitpos + try_size
4545 && info->bitpos + info->bitsize > try_bitpos)
4546 {
4547 /* Mask, truncate, convert to final type, shift and ior into
4548 the accumulator. Note that every step can be a no-op. */
4549 const HOST_WIDE_INT start_gap = info->bitpos - try_bitpos;
4550 const HOST_WIDE_INT end_gap
4551 = (try_bitpos + try_size) - (info->bitpos + info->bitsize);
4552 tree tem = info->ops[0].val;
ed01d707
EB
4553 if (!INTEGRAL_TYPE_P (TREE_TYPE (tem)))
4554 {
4555 const unsigned HOST_WIDE_INT size
4556 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (tem)));
4557 tree integer_type
4558 = build_nonstandard_integer_type (size, UNSIGNED);
4559 tem = gimple_build (&seq, loc, VIEW_CONVERT_EXPR,
4560 integer_type, tem);
4561 }
c14add82
EB
4562 if (TYPE_PRECISION (TREE_TYPE (tem)) <= info->bitsize)
4563 {
4564 tree bitfield_type
4565 = build_nonstandard_integer_type (info->bitsize,
4566 UNSIGNED);
4567 tem = gimple_convert (&seq, loc, bitfield_type, tem);
4568 }
4569 else if ((BYTES_BIG_ENDIAN ? start_gap : end_gap) > 0)
c94c3532
EB
4570 {
4571 const unsigned HOST_WIDE_INT imask
4572 = (HOST_WIDE_INT_1U << info->bitsize) - 1;
4573 tem = gimple_build (&seq, loc,
4574 BIT_AND_EXPR, TREE_TYPE (tem), tem,
4575 build_int_cst (TREE_TYPE (tem),
4576 imask));
4577 }
4578 const HOST_WIDE_INT shift
4579 = (BYTES_BIG_ENDIAN ? end_gap : start_gap);
4580 if (shift < 0)
4581 tem = gimple_build (&seq, loc,
4582 RSHIFT_EXPR, TREE_TYPE (tem), tem,
4583 build_int_cst (NULL_TREE, -shift));
e362a897 4584 tem = gimple_convert (&seq, loc, dest_type, tem);
c94c3532
EB
4585 if (shift > 0)
4586 tem = gimple_build (&seq, loc,
e362a897 4587 LSHIFT_EXPR, dest_type, tem,
c94c3532
EB
4588 build_int_cst (NULL_TREE, shift));
4589 src = gimple_build (&seq, loc,
e362a897 4590 BIT_IOR_EXPR, dest_type, tem, src);
c94c3532
EB
4591 }
4592
a62b3dc5
JJ
4593 if (!integer_zerop (mask))
4594 {
e362a897 4595 tree tem = make_ssa_name (dest_type);
a62b3dc5
JJ
4596 tree load_src = unshare_expr (dest);
4597 /* The load might load some or all bits uninitialized,
4598 avoid -W*uninitialized warnings in that case.
4599 As optimization, it would be nice if all the bits are
4600 provably uninitialized (no stores at all yet or previous
4601 store a CLOBBER) we'd optimize away the load and replace
4602 it e.g. with 0. */
e9e2bad7 4603 suppress_warning (load_src, OPT_Wuninitialized);
a62b3dc5
JJ
4604 stmt = gimple_build_assign (tem, load_src);
4605 gimple_set_location (stmt, loc);
4606 gimple_set_vuse (stmt, new_vuse);
4607 gimple_seq_add_stmt_without_update (&seq, stmt);
4608
4609 /* FIXME: If there is a single chunk of zero bits in mask,
4610 perhaps use BIT_INSERT_EXPR instead? */
e362a897 4611 stmt = gimple_build_assign (make_ssa_name (dest_type),
a62b3dc5
JJ
4612 BIT_AND_EXPR, tem, mask);
4613 gimple_set_location (stmt, loc);
4614 gimple_seq_add_stmt_without_update (&seq, stmt);
4615 tem = gimple_assign_lhs (stmt);
4616
245f6de1 4617 if (TREE_CODE (src) == INTEGER_CST)
e362a897 4618 src = wide_int_to_tree (dest_type,
245f6de1
JJ
4619 wi::bit_and_not (wi::to_wide (src),
4620 wi::to_wide (mask)));
4621 else
4622 {
4623 tree nmask
e362a897 4624 = wide_int_to_tree (dest_type,
245f6de1 4625 wi::bit_not (wi::to_wide (mask)));
e362a897 4626 stmt = gimple_build_assign (make_ssa_name (dest_type),
245f6de1
JJ
4627 BIT_AND_EXPR, src, nmask);
4628 gimple_set_location (stmt, loc);
4629 gimple_seq_add_stmt_without_update (&seq, stmt);
4630 src = gimple_assign_lhs (stmt);
4631 }
e362a897 4632 stmt = gimple_build_assign (make_ssa_name (dest_type),
a62b3dc5
JJ
4633 BIT_IOR_EXPR, tem, src);
4634 gimple_set_location (stmt, loc);
4635 gimple_seq_add_stmt_without_update (&seq, stmt);
4636 src = gimple_assign_lhs (stmt);
4637 }
4638 }
f663d9ad
KT
4639
4640 stmt = gimple_build_assign (dest, src);
4641 gimple_set_location (stmt, loc);
4642 gimple_set_vuse (stmt, new_vuse);
4643 gimple_seq_add_stmt_without_update (&seq, stmt);
4644
629387a6
EB
4645 if (group->lp_nr && stmt_could_throw_p (cfun, stmt))
4646 add_stmt_to_eh_lp (stmt, group->lp_nr);
4647
f663d9ad
KT
4648 tree new_vdef;
4649 if (i < split_stores.length () - 1)
a62b3dc5 4650 new_vdef = make_ssa_name (gimple_vop (cfun), stmt);
f663d9ad
KT
4651 else
4652 new_vdef = last_vdef;
4653
4654 gimple_set_vdef (stmt, new_vdef);
4655 SSA_NAME_DEF_STMT (new_vdef) = stmt;
4656 new_vuse = new_vdef;
4657 }
4658
4659 FOR_EACH_VEC_ELT (split_stores, i, split_store)
4660 delete split_store;
4661
f663d9ad
KT
4662 gcc_assert (seq);
4663 if (dump_file)
4664 {
4665 fprintf (dump_file,
c94c3532 4666 "New sequence of %u stores to replace old one of %u stores\n",
a62b3dc5 4667 split_stores.length (), orig_num_stmts);
f663d9ad
KT
4668 if (dump_flags & TDF_DETAILS)
4669 print_gimple_seq (dump_file, seq, 0, TDF_VOPS | TDF_MEMSYMS);
4670 }
629387a6 4671
5384a802
JJ
4672 if (gimple_clobber_p (group->last_stmt))
4673 update_stmt (group->last_stmt);
4674
629387a6
EB
4675 if (group->lp_nr > 0)
4676 {
4677 /* We're going to insert a sequence of (potentially) throwing stores
4678 into an active EH region. This means that we're going to create
4679 new basic blocks with EH edges pointing to the post landing pad
4680 and, therefore, to have to update its PHI nodes, if any. For the
4681 virtual PHI node, we're going to use the VDEFs created above, but
4682 for the other nodes, we need to record the original reaching defs. */
4683 eh_landing_pad lp = get_eh_landing_pad_from_number (group->lp_nr);
4684 basic_block lp_bb = label_to_block (cfun, lp->post_landing_pad);
4685 basic_block last_bb = gimple_bb (group->last_stmt);
4686 edge last_edge = find_edge (last_bb, lp_bb);
4687 auto_vec<tree, 16> last_defs;
4688 gphi_iterator gpi;
4689 for (gpi = gsi_start_phis (lp_bb); !gsi_end_p (gpi); gsi_next (&gpi))
4690 {
4691 gphi *phi = gpi.phi ();
4692 tree last_def;
4693 if (virtual_operand_p (gimple_phi_result (phi)))
4694 last_def = NULL_TREE;
4695 else
4696 last_def = gimple_phi_arg_def (phi, last_edge->dest_idx);
4697 last_defs.safe_push (last_def);
4698 }
4699
4700 /* Do the insertion. Then, if new basic blocks have been created in the
4701 process, rewind the chain of VDEFs create above to walk the new basic
4702 blocks and update the corresponding arguments of the PHI nodes. */
4703 update_modified_stmts (seq);
4704 if (gimple_find_sub_bbs (seq, &last_gsi))
4705 while (last_vdef != gimple_vuse (group->last_stmt))
4706 {
4707 gimple *stmt = SSA_NAME_DEF_STMT (last_vdef);
4708 if (stmt_could_throw_p (cfun, stmt))
4709 {
4710 edge new_edge = find_edge (gimple_bb (stmt), lp_bb);
4711 unsigned int i;
4712 for (gpi = gsi_start_phis (lp_bb), i = 0;
4713 !gsi_end_p (gpi);
4714 gsi_next (&gpi), i++)
4715 {
4716 gphi *phi = gpi.phi ();
4717 tree new_def;
4718 if (virtual_operand_p (gimple_phi_result (phi)))
4719 new_def = last_vdef;
4720 else
4721 new_def = last_defs[i];
4722 add_phi_arg (phi, new_def, new_edge, UNKNOWN_LOCATION);
4723 }
4724 }
4725 last_vdef = gimple_vuse (stmt);
4726 }
4727 }
4728 else
4729 gsi_insert_seq_after (&last_gsi, seq, GSI_SAME_STMT);
4730
245f6de1
JJ
4731 for (int j = 0; j < 2; ++j)
4732 if (load_seq[j])
4733 gsi_insert_seq_after (&load_gsi[j], load_seq[j], GSI_SAME_STMT);
f663d9ad
KT
4734
4735 return true;
4736}
4737
4738/* Process the merged_store_group objects created in the coalescing phase.
4739 The stores are all against the base object BASE.
4740 Try to output the widened stores and delete the original statements if
4741 successful. Return true iff any changes were made. */
4742
4743bool
b5926e23 4744imm_store_chain_info::output_merged_stores ()
f663d9ad
KT
4745{
4746 unsigned int i;
4747 merged_store_group *merged_store;
4748 bool ret = false;
4749 FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_store)
4750 {
a95b474a
ML
4751 if (dbg_cnt (store_merging)
4752 && output_merged_store (merged_store))
f663d9ad
KT
4753 {
4754 unsigned int j;
4755 store_immediate_info *store;
4756 FOR_EACH_VEC_ELT (merged_store->stores, j, store)
4757 {
4758 gimple *stmt = store->stmt;
4759 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5384a802
JJ
4760 /* Don't remove clobbers, they are still useful even if
4761 everything is overwritten afterwards. */
4762 if (gimple_clobber_p (stmt))
4763 continue;
f663d9ad 4764 gsi_remove (&gsi, true);
629387a6
EB
4765 if (store->lp_nr)
4766 remove_stmt_from_eh_lp (stmt);
f663d9ad
KT
4767 if (stmt != merged_store->last_stmt)
4768 {
4769 unlink_stmt_vdef (stmt);
4770 release_defs (stmt);
4771 }
4772 }
4773 ret = true;
4774 }
4775 }
4776 if (ret && dump_file)
4777 fprintf (dump_file, "Merging successful!\n");
4778
4779 return ret;
4780}
4781
4782/* Coalesce the store_immediate_info objects recorded against the base object
4783 BASE in the first phase and output them.
4784 Delete the allocated structures.
4785 Return true if any changes were made. */
4786
4787bool
b5926e23 4788imm_store_chain_info::terminate_and_process_chain ()
f663d9ad 4789{
95d94b52
RB
4790 if (dump_file && (dump_flags & TDF_DETAILS))
4791 fprintf (dump_file, "Terminating chain with %u stores\n",
4792 m_store_info.length ());
f663d9ad
KT
4793 /* Process store chain. */
4794 bool ret = false;
4795 if (m_store_info.length () > 1)
4796 {
4797 ret = coalesce_immediate_stores ();
4798 if (ret)
b5926e23 4799 ret = output_merged_stores ();
f663d9ad
KT
4800 }
4801
4802 /* Delete all the entries we allocated ourselves. */
4803 store_immediate_info *info;
4804 unsigned int i;
4805 FOR_EACH_VEC_ELT (m_store_info, i, info)
4806 delete info;
4807
4808 merged_store_group *merged_info;
4809 FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_info)
4810 delete merged_info;
4811
4812 return ret;
4813}
4814
4815/* Return true iff LHS is a destination potentially interesting for
4816 store merging. In practice these are the codes that get_inner_reference
4817 can process. */
4818
4819static bool
4820lhs_valid_for_store_merging_p (tree lhs)
4821{
629387a6 4822 if (DECL_P (lhs))
f663d9ad
KT
4823 return true;
4824
629387a6
EB
4825 switch (TREE_CODE (lhs))
4826 {
4827 case ARRAY_REF:
4828 case ARRAY_RANGE_REF:
4829 case BIT_FIELD_REF:
4830 case COMPONENT_REF:
4831 case MEM_REF:
e362a897 4832 case VIEW_CONVERT_EXPR:
629387a6
EB
4833 return true;
4834 default:
4835 return false;
4836 }
4837
4838 gcc_unreachable ();
f663d9ad
KT
4839}
4840
4841/* Return true if the tree RHS is a constant we want to consider
4842 during store merging. In practice accept all codes that
4843 native_encode_expr accepts. */
4844
4845static bool
4846rhs_valid_for_store_merging_p (tree rhs)
4847{
cf098191 4848 unsigned HOST_WIDE_INT size;
3afd514b 4849 if (TREE_CODE (rhs) == CONSTRUCTOR
3afd514b
JJ
4850 && CONSTRUCTOR_NELTS (rhs) == 0
4851 && TYPE_SIZE_UNIT (TREE_TYPE (rhs))
4852 && tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (rhs))))
4853 return true;
cf098191
RS
4854 return (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (rhs))).is_constant (&size)
4855 && native_encode_expr (rhs, NULL, size) != 0);
f663d9ad
KT
4856}
4857
629387a6
EB
4858/* Adjust *PBITPOS, *PBITREGION_START and *PBITREGION_END by BYTE_OFF bytes
4859 and return true on success or false on failure. */
4860
4861static bool
4862adjust_bit_pos (poly_offset_int byte_off,
4863 poly_int64 *pbitpos,
4864 poly_uint64 *pbitregion_start,
4865 poly_uint64 *pbitregion_end)
4866{
4867 poly_offset_int bit_off = byte_off << LOG2_BITS_PER_UNIT;
4868 bit_off += *pbitpos;
4869
4870 if (known_ge (bit_off, 0) && bit_off.to_shwi (pbitpos))
4871 {
4872 if (maybe_ne (*pbitregion_end, 0U))
4873 {
4874 bit_off = byte_off << LOG2_BITS_PER_UNIT;
4875 bit_off += *pbitregion_start;
4876 if (bit_off.to_uhwi (pbitregion_start))
4877 {
4878 bit_off = byte_off << LOG2_BITS_PER_UNIT;
4879 bit_off += *pbitregion_end;
4880 if (!bit_off.to_uhwi (pbitregion_end))
4881 *pbitregion_end = 0;
4882 }
4883 else
4884 *pbitregion_end = 0;
4885 }
4886 return true;
4887 }
4888 else
4889 return false;
4890}
4891
245f6de1
JJ
4892/* If MEM is a memory reference usable for store merging (either as
4893 store destination or for loads), return the non-NULL base_addr
4894 and set *PBITSIZE, *PBITPOS, *PBITREGION_START and *PBITREGION_END.
4895 Otherwise return NULL, *PBITPOS should be still valid even for that
4896 case. */
4897
4898static tree
8a91d545
RS
4899mem_valid_for_store_merging (tree mem, poly_uint64 *pbitsize,
4900 poly_uint64 *pbitpos,
4901 poly_uint64 *pbitregion_start,
4902 poly_uint64 *pbitregion_end)
245f6de1 4903{
8a91d545
RS
4904 poly_int64 bitsize, bitpos;
4905 poly_uint64 bitregion_start = 0, bitregion_end = 0;
245f6de1
JJ
4906 machine_mode mode;
4907 int unsignedp = 0, reversep = 0, volatilep = 0;
4908 tree offset;
4909 tree base_addr = get_inner_reference (mem, &bitsize, &bitpos, &offset, &mode,
4910 &unsignedp, &reversep, &volatilep);
4911 *pbitsize = bitsize;
8a91d545 4912 if (known_eq (bitsize, 0))
245f6de1
JJ
4913 return NULL_TREE;
4914
4915 if (TREE_CODE (mem) == COMPONENT_REF
4916 && DECL_BIT_FIELD_TYPE (TREE_OPERAND (mem, 1)))
4917 {
4918 get_bit_range (&bitregion_start, &bitregion_end, mem, &bitpos, &offset);
8a91d545
RS
4919 if (maybe_ne (bitregion_end, 0U))
4920 bitregion_end += 1;
245f6de1
JJ
4921 }
4922
4923 if (reversep)
4924 return NULL_TREE;
4925
4926 /* We do not want to rewrite TARGET_MEM_REFs. */
4927 if (TREE_CODE (base_addr) == TARGET_MEM_REF)
4928 return NULL_TREE;
4929 /* In some cases get_inner_reference may return a
4930 MEM_REF [ptr + byteoffset]. For the purposes of this pass
4931 canonicalize the base_addr to MEM_REF [ptr] and take
4932 byteoffset into account in the bitpos. This occurs in
4933 PR 23684 and this way we can catch more chains. */
4934 else if (TREE_CODE (base_addr) == MEM_REF)
4935 {
629387a6
EB
4936 if (!adjust_bit_pos (mem_ref_offset (base_addr), &bitpos,
4937 &bitregion_start, &bitregion_end))
245f6de1
JJ
4938 return NULL_TREE;
4939 base_addr = TREE_OPERAND (base_addr, 0);
4940 }
4941 /* get_inner_reference returns the base object, get at its
4942 address now. */
4943 else
4944 {
8a91d545 4945 if (maybe_lt (bitpos, 0))
245f6de1
JJ
4946 return NULL_TREE;
4947 base_addr = build_fold_addr_expr (base_addr);
4948 }
4949
629387a6 4950 if (offset)
245f6de1
JJ
4951 {
4952 /* If the access is variable offset then a base decl has to be
4953 address-taken to be able to emit pointer-based stores to it.
4954 ??? We might be able to get away with re-using the original
4955 base up to the first variable part and then wrapping that inside
4956 a BIT_FIELD_REF. */
4957 tree base = get_base_address (base_addr);
629387a6 4958 if (!base || (DECL_P (base) && !TREE_ADDRESSABLE (base)))
245f6de1
JJ
4959 return NULL_TREE;
4960
629387a6
EB
4961 /* Similarly to above for the base, remove constant from the offset. */
4962 if (TREE_CODE (offset) == PLUS_EXPR
4963 && TREE_CODE (TREE_OPERAND (offset, 1)) == INTEGER_CST
4964 && adjust_bit_pos (wi::to_poly_offset (TREE_OPERAND (offset, 1)),
4965 &bitpos, &bitregion_start, &bitregion_end))
4966 offset = TREE_OPERAND (offset, 0);
4967
245f6de1
JJ
4968 base_addr = build2 (POINTER_PLUS_EXPR, TREE_TYPE (base_addr),
4969 base_addr, offset);
4970 }
4971
629387a6
EB
4972 if (known_eq (bitregion_end, 0U))
4973 {
4974 bitregion_start = round_down_to_byte_boundary (bitpos);
4975 bitregion_end = round_up_to_byte_boundary (bitpos + bitsize);
4976 }
4977
245f6de1
JJ
4978 *pbitsize = bitsize;
4979 *pbitpos = bitpos;
4980 *pbitregion_start = bitregion_start;
4981 *pbitregion_end = bitregion_end;
4982 return base_addr;
4983}
4984
4985/* Return true if STMT is a load that can be used for store merging.
4986 In that case fill in *OP. BITSIZE, BITPOS, BITREGION_START and
4987 BITREGION_END are properties of the corresponding store. */
4988
4989static bool
4990handled_load (gimple *stmt, store_operand_info *op,
8a91d545
RS
4991 poly_uint64 bitsize, poly_uint64 bitpos,
4992 poly_uint64 bitregion_start, poly_uint64 bitregion_end)
245f6de1 4993{
383ac8dc 4994 if (!is_gimple_assign (stmt))
245f6de1 4995 return false;
383ac8dc
JJ
4996 if (gimple_assign_rhs_code (stmt) == BIT_NOT_EXPR)
4997 {
4998 tree rhs1 = gimple_assign_rhs1 (stmt);
4999 if (TREE_CODE (rhs1) == SSA_NAME
383ac8dc
JJ
5000 && handled_load (SSA_NAME_DEF_STMT (rhs1), op, bitsize, bitpos,
5001 bitregion_start, bitregion_end))
5002 {
d60edaba
JJ
5003 /* Don't allow _1 = load; _2 = ~1; _3 = ~_2; which should have
5004 been optimized earlier, but if allowed here, would confuse the
5005 multiple uses counting. */
5006 if (op->bit_not_p)
5007 return false;
383ac8dc
JJ
5008 op->bit_not_p = !op->bit_not_p;
5009 return true;
5010 }
5011 return false;
5012 }
5013 if (gimple_vuse (stmt)
5014 && gimple_assign_load_p (stmt)
36bbc05d 5015 && !stmt_can_throw_internal (cfun, stmt)
245f6de1
JJ
5016 && !gimple_has_volatile_ops (stmt))
5017 {
5018 tree mem = gimple_assign_rhs1 (stmt);
5019 op->base_addr
5020 = mem_valid_for_store_merging (mem, &op->bitsize, &op->bitpos,
5021 &op->bitregion_start,
5022 &op->bitregion_end);
5023 if (op->base_addr != NULL_TREE
8a91d545
RS
5024 && known_eq (op->bitsize, bitsize)
5025 && multiple_p (op->bitpos - bitpos, BITS_PER_UNIT)
5026 && known_ge (op->bitpos - op->bitregion_start,
5027 bitpos - bitregion_start)
5028 && known_ge (op->bitregion_end - op->bitpos,
5029 bitregion_end - bitpos))
245f6de1
JJ
5030 {
5031 op->stmt = stmt;
5032 op->val = mem;
383ac8dc 5033 op->bit_not_p = false;
245f6de1
JJ
5034 return true;
5035 }
5036 }
5037 return false;
5038}
5039
629387a6
EB
5040/* Return the index number of the landing pad for STMT, if any. */
5041
5042static int
5043lp_nr_for_store (gimple *stmt)
5044{
5045 if (!cfun->can_throw_non_call_exceptions || !cfun->eh)
5046 return 0;
5047
5048 if (!stmt_could_throw_p (cfun, stmt))
5049 return 0;
5050
5051 return lookup_stmt_eh_lp (stmt);
5052}
5053
245f6de1 5054/* Record the store STMT for store merging optimization if it can be
629387a6 5055 optimized. Return true if any changes were made. */
245f6de1 5056
629387a6 5057bool
245f6de1
JJ
5058pass_store_merging::process_store (gimple *stmt)
5059{
5060 tree lhs = gimple_assign_lhs (stmt);
5061 tree rhs = gimple_assign_rhs1 (stmt);
2c832ffe
SSF
5062 poly_uint64 bitsize, bitpos = 0;
5063 poly_uint64 bitregion_start = 0, bitregion_end = 0;
245f6de1
JJ
5064 tree base_addr
5065 = mem_valid_for_store_merging (lhs, &bitsize, &bitpos,
5066 &bitregion_start, &bitregion_end);
8a91d545 5067 if (known_eq (bitsize, 0U))
629387a6 5068 return false;
245f6de1
JJ
5069
5070 bool invalid = (base_addr == NULL_TREE
8a91d545
RS
5071 || (maybe_gt (bitsize,
5072 (unsigned int) MAX_BITSIZE_MODE_ANY_INT)
3afd514b
JJ
5073 && TREE_CODE (rhs) != INTEGER_CST
5074 && (TREE_CODE (rhs) != CONSTRUCTOR
5075 || CONSTRUCTOR_NELTS (rhs) != 0)));
245f6de1 5076 enum tree_code rhs_code = ERROR_MARK;
d60edaba 5077 bool bit_not_p = false;
4b84d9b8
JJ
5078 struct symbolic_number n;
5079 gimple *ins_stmt = NULL;
245f6de1
JJ
5080 store_operand_info ops[2];
5081 if (invalid)
5082 ;
e362a897
EB
5083 else if (TREE_CODE (rhs) == STRING_CST)
5084 {
5085 rhs_code = STRING_CST;
5086 ops[0].val = rhs;
5087 }
245f6de1
JJ
5088 else if (rhs_valid_for_store_merging_p (rhs))
5089 {
5090 rhs_code = INTEGER_CST;
5091 ops[0].val = rhs;
5092 }
e362a897 5093 else if (TREE_CODE (rhs) == SSA_NAME)
245f6de1
JJ
5094 {
5095 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs), *def_stmt1, *def_stmt2;
5096 if (!is_gimple_assign (def_stmt))
5097 invalid = true;
5098 else if (handled_load (def_stmt, &ops[0], bitsize, bitpos,
5099 bitregion_start, bitregion_end))
5100 rhs_code = MEM_REF;
d60edaba
JJ
5101 else if (gimple_assign_rhs_code (def_stmt) == BIT_NOT_EXPR)
5102 {
5103 tree rhs1 = gimple_assign_rhs1 (def_stmt);
5104 if (TREE_CODE (rhs1) == SSA_NAME
5105 && is_gimple_assign (SSA_NAME_DEF_STMT (rhs1)))
5106 {
5107 bit_not_p = true;
5108 def_stmt = SSA_NAME_DEF_STMT (rhs1);
5109 }
5110 }
c94c3532 5111
d60edaba 5112 if (rhs_code == ERROR_MARK && !invalid)
245f6de1
JJ
5113 switch ((rhs_code = gimple_assign_rhs_code (def_stmt)))
5114 {
5115 case BIT_AND_EXPR:
5116 case BIT_IOR_EXPR:
5117 case BIT_XOR_EXPR:
5118 tree rhs1, rhs2;
5119 rhs1 = gimple_assign_rhs1 (def_stmt);
5120 rhs2 = gimple_assign_rhs2 (def_stmt);
5121 invalid = true;
d7a9512e 5122 if (TREE_CODE (rhs1) != SSA_NAME)
245f6de1
JJ
5123 break;
5124 def_stmt1 = SSA_NAME_DEF_STMT (rhs1);
5125 if (!is_gimple_assign (def_stmt1)
5126 || !handled_load (def_stmt1, &ops[0], bitsize, bitpos,
5127 bitregion_start, bitregion_end))
5128 break;
5129 if (rhs_valid_for_store_merging_p (rhs2))
5130 ops[1].val = rhs2;
d7a9512e 5131 else if (TREE_CODE (rhs2) != SSA_NAME)
245f6de1
JJ
5132 break;
5133 else
5134 {
5135 def_stmt2 = SSA_NAME_DEF_STMT (rhs2);
5136 if (!is_gimple_assign (def_stmt2))
5137 break;
5138 else if (!handled_load (def_stmt2, &ops[1], bitsize, bitpos,
5139 bitregion_start, bitregion_end))
5140 break;
5141 }
5142 invalid = false;
5143 break;
5144 default:
5145 invalid = true;
5146 break;
5147 }
c94c3532 5148
8a91d545
RS
5149 unsigned HOST_WIDE_INT const_bitsize;
5150 if (bitsize.is_constant (&const_bitsize)
c94c3532 5151 && (const_bitsize % BITS_PER_UNIT) == 0
8a91d545 5152 && const_bitsize <= 64
c94c3532 5153 && multiple_p (bitpos, BITS_PER_UNIT))
4b84d9b8
JJ
5154 {
5155 ins_stmt = find_bswap_or_nop_1 (def_stmt, &n, 12);
5156 if (ins_stmt)
5157 {
5158 uint64_t nn = n.n;
5159 for (unsigned HOST_WIDE_INT i = 0;
8a91d545
RS
5160 i < const_bitsize;
5161 i += BITS_PER_UNIT, nn >>= BITS_PER_MARKER)
4b84d9b8
JJ
5162 if ((nn & MARKER_MASK) == 0
5163 || (nn & MARKER_MASK) == MARKER_BYTE_UNKNOWN)
5164 {
5165 ins_stmt = NULL;
5166 break;
5167 }
5168 if (ins_stmt)
5169 {
5170 if (invalid)
5171 {
5172 rhs_code = LROTATE_EXPR;
5173 ops[0].base_addr = NULL_TREE;
5174 ops[1].base_addr = NULL_TREE;
5175 }
5176 invalid = false;
5177 }
5178 }
5179 }
c94c3532
EB
5180
5181 if (invalid
5182 && bitsize.is_constant (&const_bitsize)
5183 && ((const_bitsize % BITS_PER_UNIT) != 0
5184 || !multiple_p (bitpos, BITS_PER_UNIT))
ed01d707 5185 && const_bitsize <= MAX_FIXED_MODE_SIZE)
c94c3532 5186 {
c14add82 5187 /* Bypass a conversion to the bit-field type. */
31a5d8c5
EB
5188 if (!bit_not_p
5189 && is_gimple_assign (def_stmt)
5190 && CONVERT_EXPR_CODE_P (rhs_code))
c94c3532
EB
5191 {
5192 tree rhs1 = gimple_assign_rhs1 (def_stmt);
5193 if (TREE_CODE (rhs1) == SSA_NAME
c14add82 5194 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1)))
c94c3532
EB
5195 rhs = rhs1;
5196 }
5197 rhs_code = BIT_INSERT_EXPR;
31a5d8c5 5198 bit_not_p = false;
c94c3532
EB
5199 ops[0].val = rhs;
5200 ops[0].base_addr = NULL_TREE;
5201 ops[1].base_addr = NULL_TREE;
5202 invalid = false;
5203 }
245f6de1 5204 }
e362a897
EB
5205 else
5206 invalid = true;
245f6de1 5207
8a91d545
RS
5208 unsigned HOST_WIDE_INT const_bitsize, const_bitpos;
5209 unsigned HOST_WIDE_INT const_bitregion_start, const_bitregion_end;
5210 if (invalid
5211 || !bitsize.is_constant (&const_bitsize)
5212 || !bitpos.is_constant (&const_bitpos)
5213 || !bitregion_start.is_constant (&const_bitregion_start)
5214 || !bitregion_end.is_constant (&const_bitregion_end))
629387a6 5215 return terminate_all_aliasing_chains (NULL, stmt);
245f6de1 5216
4b84d9b8
JJ
5217 if (!ins_stmt)
5218 memset (&n, 0, sizeof (n));
5219
99b1c316 5220 class imm_store_chain_info **chain_info = NULL;
629387a6 5221 bool ret = false;
383ac8dc
JJ
5222 if (base_addr)
5223 chain_info = m_stores.get (base_addr);
5224
245f6de1
JJ
5225 store_immediate_info *info;
5226 if (chain_info)
5227 {
5228 unsigned int ord = (*chain_info)->m_store_info.length ();
8a91d545
RS
5229 info = new store_immediate_info (const_bitsize, const_bitpos,
5230 const_bitregion_start,
5231 const_bitregion_end,
5232 stmt, ord, rhs_code, n, ins_stmt,
629387a6
EB
5233 bit_not_p, lp_nr_for_store (stmt),
5234 ops[0], ops[1]);
245f6de1
JJ
5235 if (dump_file && (dump_flags & TDF_DETAILS))
5236 {
5237 fprintf (dump_file, "Recording immediate store from stmt:\n");
5238 print_gimple_stmt (dump_file, stmt, 0);
5239 }
5240 (*chain_info)->m_store_info.safe_push (info);
95d94b52 5241 m_n_stores++;
629387a6 5242 ret |= terminate_all_aliasing_chains (chain_info, stmt);
245f6de1
JJ
5243 /* If we reach the limit of stores to merge in a chain terminate and
5244 process the chain now. */
5245 if ((*chain_info)->m_store_info.length ()
028d4092 5246 == (unsigned int) param_max_stores_to_merge)
245f6de1
JJ
5247 {
5248 if (dump_file && (dump_flags & TDF_DETAILS))
5249 fprintf (dump_file,
5250 "Reached maximum number of statements to merge:\n");
629387a6 5251 ret |= terminate_and_process_chain (*chain_info);
245f6de1 5252 }
245f6de1 5253 }
95d94b52
RB
5254 else
5255 {
5256 /* Store aliases any existing chain? */
5257 ret |= terminate_all_aliasing_chains (NULL, stmt);
245f6de1 5258
95d94b52
RB
5259 /* Start a new chain. */
5260 class imm_store_chain_info *new_chain
5261 = new imm_store_chain_info (m_stores_head, base_addr);
5262 info = new store_immediate_info (const_bitsize, const_bitpos,
5263 const_bitregion_start,
5264 const_bitregion_end,
5265 stmt, 0, rhs_code, n, ins_stmt,
5266 bit_not_p, lp_nr_for_store (stmt),
5267 ops[0], ops[1]);
5268 new_chain->m_store_info.safe_push (info);
5269 m_n_stores++;
5270 m_stores.put (base_addr, new_chain);
5271 m_n_chains++;
5272 if (dump_file && (dump_flags & TDF_DETAILS))
5273 {
5274 fprintf (dump_file, "Starting active chain number %u with statement:\n",
5275 m_n_chains);
5276 print_gimple_stmt (dump_file, stmt, 0);
5277 fprintf (dump_file, "The base object is:\n");
5278 print_generic_expr (dump_file, base_addr);
5279 fprintf (dump_file, "\n");
5280 }
5281 }
5282
5283 /* Prune oldest chains so that after adding the chain or store above
5284 we're again within the limits set by the params. */
5285 if (m_n_chains > (unsigned)param_max_store_chains_to_track
5286 || m_n_stores > (unsigned)param_max_stores_to_track)
245f6de1 5287 {
95d94b52
RB
5288 if (dump_file && (dump_flags & TDF_DETAILS))
5289 fprintf (dump_file, "Too many chains (%u > %d) or stores (%u > %d), "
5290 "terminating oldest chain(s).\n", m_n_chains,
5291 param_max_store_chains_to_track, m_n_stores,
5292 param_max_stores_to_track);
5293 imm_store_chain_info **e = &m_stores_head;
5294 unsigned idx = 0;
5295 unsigned n_stores = 0;
5296 while (*e)
5297 {
5298 if (idx >= (unsigned)param_max_store_chains_to_track
5299 || (n_stores + (*e)->m_store_info.length ()
5300 > (unsigned)param_max_stores_to_track))
8a8eee6b 5301 ret |= terminate_and_process_chain (*e);
95d94b52
RB
5302 else
5303 {
5304 n_stores += (*e)->m_store_info.length ();
5305 e = &(*e)->next;
5306 ++idx;
5307 }
5308 }
245f6de1 5309 }
95d94b52 5310
629387a6
EB
5311 return ret;
5312}
5313
5314/* Return true if STMT is a store valid for store merging. */
5315
5316static bool
5317store_valid_for_store_merging_p (gimple *stmt)
5318{
5319 return gimple_assign_single_p (stmt)
5320 && gimple_vdef (stmt)
5321 && lhs_valid_for_store_merging_p (gimple_assign_lhs (stmt))
5384a802 5322 && (!gimple_has_volatile_ops (stmt) || gimple_clobber_p (stmt));
629387a6
EB
5323}
5324
5325enum basic_block_status { BB_INVALID, BB_VALID, BB_EXTENDED_VALID };
5326
5327/* Return the status of basic block BB wrt store merging. */
5328
5329static enum basic_block_status
5330get_status_for_store_merging (basic_block bb)
5331{
5332 unsigned int num_statements = 0;
a7553ad6 5333 unsigned int num_constructors = 0;
629387a6
EB
5334 gimple_stmt_iterator gsi;
5335 edge e;
5336
5337 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5338 {
5339 gimple *stmt = gsi_stmt (gsi);
5340
5341 if (is_gimple_debug (stmt))
5342 continue;
5343
5344 if (store_valid_for_store_merging_p (stmt) && ++num_statements >= 2)
5345 break;
a7553ad6
JJ
5346
5347 if (is_gimple_assign (stmt)
5348 && gimple_assign_rhs_code (stmt) == CONSTRUCTOR)
5349 {
5350 tree rhs = gimple_assign_rhs1 (stmt);
5351 if (VECTOR_TYPE_P (TREE_TYPE (rhs))
5352 && INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (rhs)))
5353 && gimple_assign_lhs (stmt) != NULL_TREE)
5354 {
5355 HOST_WIDE_INT sz
5356 = int_size_in_bytes (TREE_TYPE (rhs)) * BITS_PER_UNIT;
5357 if (sz == 16 || sz == 32 || sz == 64)
5358 {
5359 num_constructors = 1;
5360 break;
5361 }
5362 }
5363 }
629387a6
EB
5364 }
5365
a7553ad6 5366 if (num_statements == 0 && num_constructors == 0)
629387a6
EB
5367 return BB_INVALID;
5368
5369 if (cfun->can_throw_non_call_exceptions && cfun->eh
5370 && store_valid_for_store_merging_p (gimple_seq_last_stmt (bb_seq (bb)))
5371 && (e = find_fallthru_edge (bb->succs))
5372 && e->dest == bb->next_bb)
5373 return BB_EXTENDED_VALID;
5374
a7553ad6 5375 return (num_statements >= 2 || num_constructors) ? BB_VALID : BB_INVALID;
245f6de1
JJ
5376}
5377
f663d9ad 5378/* Entry point for the pass. Go over each basic block recording chains of
245f6de1
JJ
5379 immediate stores. Upon encountering a terminating statement (as defined
5380 by stmt_terminates_chain_p) process the recorded stores and emit the widened
5381 variants. */
f663d9ad
KT
5382
5383unsigned int
5384pass_store_merging::execute (function *fun)
5385{
5386 basic_block bb;
5387 hash_set<gimple *> orig_stmts;
629387a6
EB
5388 bool changed = false, open_chains = false;
5389
5390 /* If the function can throw and catch non-call exceptions, we'll be trying
5391 to merge stores across different basic blocks so we need to first unsplit
5392 the EH edges in order to streamline the CFG of the function. */
5393 if (cfun->can_throw_non_call_exceptions && cfun->eh)
5394 unsplit_eh_edges ();
f663d9ad 5395
4b84d9b8
JJ
5396 calculate_dominance_info (CDI_DOMINATORS);
5397
f663d9ad
KT
5398 FOR_EACH_BB_FN (bb, fun)
5399 {
629387a6 5400 const basic_block_status bb_status = get_status_for_store_merging (bb);
f663d9ad 5401 gimple_stmt_iterator gsi;
f663d9ad 5402
629387a6
EB
5403 if (open_chains && (bb_status == BB_INVALID || !single_pred_p (bb)))
5404 {
5405 changed |= terminate_and_process_all_chains ();
5406 open_chains = false;
f663d9ad
KT
5407 }
5408
629387a6 5409 if (bb_status == BB_INVALID)
f663d9ad
KT
5410 continue;
5411
5412 if (dump_file && (dump_flags & TDF_DETAILS))
5413 fprintf (dump_file, "Processing basic block <%d>:\n", bb->index);
5414
a7553ad6 5415 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); )
f663d9ad
KT
5416 {
5417 gimple *stmt = gsi_stmt (gsi);
a7553ad6 5418 gsi_next (&gsi);
f663d9ad 5419
50b6d676
AO
5420 if (is_gimple_debug (stmt))
5421 continue;
5422
5384a802 5423 if (gimple_has_volatile_ops (stmt) && !gimple_clobber_p (stmt))
f663d9ad
KT
5424 {
5425 /* Terminate all chains. */
5426 if (dump_file && (dump_flags & TDF_DETAILS))
5427 fprintf (dump_file, "Volatile access terminates "
5428 "all chains\n");
629387a6
EB
5429 changed |= terminate_and_process_all_chains ();
5430 open_chains = false;
f663d9ad
KT
5431 continue;
5432 }
5433
a7553ad6
JJ
5434 if (is_gimple_assign (stmt)
5435 && gimple_assign_rhs_code (stmt) == CONSTRUCTOR
5436 && maybe_optimize_vector_constructor (stmt))
5437 continue;
5438
629387a6
EB
5439 if (store_valid_for_store_merging_p (stmt))
5440 changed |= process_store (stmt);
245f6de1 5441 else
629387a6
EB
5442 changed |= terminate_all_aliasing_chains (NULL, stmt);
5443 }
5444
5445 if (bb_status == BB_EXTENDED_VALID)
5446 open_chains = true;
5447 else
5448 {
5449 changed |= terminate_and_process_all_chains ();
5450 open_chains = false;
f663d9ad 5451 }
f663d9ad 5452 }
629387a6
EB
5453
5454 if (open_chains)
5455 changed |= terminate_and_process_all_chains ();
5456
5457 /* If the function can throw and catch non-call exceptions and something
5458 changed during the pass, then the CFG has (very likely) changed too. */
5459 if (cfun->can_throw_non_call_exceptions && cfun->eh && changed)
5460 {
5461 free_dominance_info (CDI_DOMINATORS);
5462 return TODO_cleanup_cfg;
5463 }
5464
f663d9ad
KT
5465 return 0;
5466}
5467
5468} // anon namespace
5469
5470/* Construct and return a store merging pass object. */
5471
5472gimple_opt_pass *
5473make_pass_store_merging (gcc::context *ctxt)
5474{
5475 return new pass_store_merging (ctxt);
5476}
c22d8787
KT
5477
5478#if CHECKING_P
5479
5480namespace selftest {
5481
5482/* Selftests for store merging helpers. */
5483
5484/* Assert that all elements of the byte arrays X and Y, both of length N
5485 are equal. */
5486
5487static void
5488verify_array_eq (unsigned char *x, unsigned char *y, unsigned int n)
5489{
5490 for (unsigned int i = 0; i < n; i++)
5491 {
5492 if (x[i] != y[i])
5493 {
5494 fprintf (stderr, "Arrays do not match. X:\n");
5495 dump_char_array (stderr, x, n);
5496 fprintf (stderr, "Y:\n");
5497 dump_char_array (stderr, y, n);
5498 }
5499 ASSERT_EQ (x[i], y[i]);
5500 }
5501}
5502
8aba425f 5503/* Test shift_bytes_in_array_left and that it carries bits across between
c22d8787
KT
5504 bytes correctly. */
5505
5506static void
8aba425f 5507verify_shift_bytes_in_array_left (void)
c22d8787
KT
5508{
5509 /* byte 1 | byte 0
5510 00011111 | 11100000. */
5511 unsigned char orig[2] = { 0xe0, 0x1f };
5512 unsigned char in[2];
5513 memcpy (in, orig, sizeof orig);
5514
5515 unsigned char expected[2] = { 0x80, 0x7f };
8aba425f 5516 shift_bytes_in_array_left (in, sizeof (in), 2);
c22d8787
KT
5517 verify_array_eq (in, expected, sizeof (in));
5518
5519 memcpy (in, orig, sizeof orig);
5520 memcpy (expected, orig, sizeof orig);
5521 /* Check that shifting by zero doesn't change anything. */
8aba425f 5522 shift_bytes_in_array_left (in, sizeof (in), 0);
c22d8787
KT
5523 verify_array_eq (in, expected, sizeof (in));
5524
5525}
5526
5527/* Test shift_bytes_in_array_right and that it carries bits across between
5528 bytes correctly. */
5529
5530static void
5531verify_shift_bytes_in_array_right (void)
5532{
5533 /* byte 1 | byte 0
5534 00011111 | 11100000. */
5535 unsigned char orig[2] = { 0x1f, 0xe0};
5536 unsigned char in[2];
5537 memcpy (in, orig, sizeof orig);
5538 unsigned char expected[2] = { 0x07, 0xf8};
5539 shift_bytes_in_array_right (in, sizeof (in), 2);
5540 verify_array_eq (in, expected, sizeof (in));
5541
5542 memcpy (in, orig, sizeof orig);
5543 memcpy (expected, orig, sizeof orig);
5544 /* Check that shifting by zero doesn't change anything. */
5545 shift_bytes_in_array_right (in, sizeof (in), 0);
5546 verify_array_eq (in, expected, sizeof (in));
5547}
5548
5549/* Test clear_bit_region that it clears exactly the bits asked and
5550 nothing more. */
5551
5552static void
5553verify_clear_bit_region (void)
5554{
5555 /* Start with all bits set and test clearing various patterns in them. */
5556 unsigned char orig[3] = { 0xff, 0xff, 0xff};
5557 unsigned char in[3];
5558 unsigned char expected[3];
5559 memcpy (in, orig, sizeof in);
5560
5561 /* Check zeroing out all the bits. */
5562 clear_bit_region (in, 0, 3 * BITS_PER_UNIT);
5563 expected[0] = expected[1] = expected[2] = 0;
5564 verify_array_eq (in, expected, sizeof in);
5565
5566 memcpy (in, orig, sizeof in);
5567 /* Leave the first and last bits intact. */
5568 clear_bit_region (in, 1, 3 * BITS_PER_UNIT - 2);
5569 expected[0] = 0x1;
5570 expected[1] = 0;
5571 expected[2] = 0x80;
5572 verify_array_eq (in, expected, sizeof in);
5573}
5574
5384a802 5575/* Test clear_bit_region_be that it clears exactly the bits asked and
c22d8787
KT
5576 nothing more. */
5577
5578static void
5579verify_clear_bit_region_be (void)
5580{
5581 /* Start with all bits set and test clearing various patterns in them. */
5582 unsigned char orig[3] = { 0xff, 0xff, 0xff};
5583 unsigned char in[3];
5584 unsigned char expected[3];
5585 memcpy (in, orig, sizeof in);
5586
5587 /* Check zeroing out all the bits. */
5588 clear_bit_region_be (in, BITS_PER_UNIT - 1, 3 * BITS_PER_UNIT);
5589 expected[0] = expected[1] = expected[2] = 0;
5590 verify_array_eq (in, expected, sizeof in);
5591
5592 memcpy (in, orig, sizeof in);
5593 /* Leave the first and last bits intact. */
5594 clear_bit_region_be (in, BITS_PER_UNIT - 2, 3 * BITS_PER_UNIT - 2);
5595 expected[0] = 0x80;
5596 expected[1] = 0;
5597 expected[2] = 0x1;
5598 verify_array_eq (in, expected, sizeof in);
5599}
5600
5601
5602/* Run all of the selftests within this file. */
5603
5604void
5605store_merging_c_tests (void)
5606{
8aba425f 5607 verify_shift_bytes_in_array_left ();
c22d8787
KT
5608 verify_shift_bytes_in_array_right ();
5609 verify_clear_bit_region ();
5610 verify_clear_bit_region_be ();
5611}
5612
5613} // namespace selftest
5614#endif /* CHECKING_P. */