]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/gimple-ssa-store-merging.c
poly_int: get_object_alignment_2
[thirdparty/gcc.git] / gcc / gimple-ssa-store-merging.c
CommitLineData
dffec8eb
JJ
1/* GIMPLE store merging and byte swapping passes.
2 Copyright (C) 2009-2017 Free Software Foundation, Inc.
f663d9ad
KT
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
dffec8eb
JJ
21/* The purpose of the store merging pass is to combine multiple memory
22 stores of constant values, values loaded from memory or bitwise operations
245f6de1 23 on those to consecutive memory locations into fewer wider stores.
f663d9ad
KT
24 For example, if we have a sequence peforming four byte stores to
25 consecutive memory locations:
26 [p ] := imm1;
27 [p + 1B] := imm2;
28 [p + 2B] := imm3;
29 [p + 3B] := imm4;
30 we can transform this into a single 4-byte store if the target supports it:
31 [p] := imm1:imm2:imm3:imm4 //concatenated immediates according to endianness.
32
245f6de1
JJ
33 Or:
34 [p ] := [q ];
35 [p + 1B] := [q + 1B];
36 [p + 2B] := [q + 2B];
37 [p + 3B] := [q + 3B];
38 if there is no overlap can be transformed into a single 4-byte
39 load followed by single 4-byte store.
40
41 Or:
42 [p ] := [q ] ^ imm1;
43 [p + 1B] := [q + 1B] ^ imm2;
44 [p + 2B] := [q + 2B] ^ imm3;
45 [p + 3B] := [q + 3B] ^ imm4;
46 if there is no overlap can be transformed into a single 4-byte
47 load, xored with imm1:imm2:imm3:imm4 and stored using a single 4-byte store.
48
f663d9ad
KT
49 The algorithm is applied to each basic block in three phases:
50
245f6de1 51 1) Scan through the basic block recording assignments to
f663d9ad 52 destinations that can be expressed as a store to memory of a certain size
245f6de1
JJ
53 at a certain bit offset from expressions we can handle. For bit-fields
54 we also note the surrounding bit region, bits that could be stored in
55 a read-modify-write operation when storing the bit-field. Record store
56 chains to different bases in a hash_map (m_stores) and make sure to
57 terminate such chains when appropriate (for example when when the stored
58 values get used subsequently).
f663d9ad
KT
59 These stores can be a result of structure element initializers, array stores
60 etc. A store_immediate_info object is recorded for every such store.
61 Record as many such assignments to a single base as possible until a
62 statement that interferes with the store sequence is encountered.
245f6de1
JJ
63 Each store has up to 2 operands, which can be an immediate constant
64 or a memory load, from which the value to be stored can be computed.
65 At most one of the operands can be a constant. The operands are recorded
66 in store_operand_info struct.
f663d9ad
KT
67
68 2) Analyze the chain of stores recorded in phase 1) (i.e. the vector of
69 store_immediate_info objects) and coalesce contiguous stores into
245f6de1
JJ
70 merged_store_group objects. For bit-fields stores, we don't need to
71 require the stores to be contiguous, just their surrounding bit regions
72 have to be contiguous. If the expression being stored is different
73 between adjacent stores, such as one store storing a constant and
74 following storing a value loaded from memory, or if the loaded memory
75 objects are not adjacent, a new merged_store_group is created as well.
f663d9ad
KT
76
77 For example, given the stores:
78 [p ] := 0;
79 [p + 1B] := 1;
80 [p + 3B] := 0;
81 [p + 4B] := 1;
82 [p + 5B] := 0;
83 [p + 6B] := 0;
84 This phase would produce two merged_store_group objects, one recording the
85 two bytes stored in the memory region [p : p + 1] and another
86 recording the four bytes stored in the memory region [p + 3 : p + 6].
87
88 3) The merged_store_group objects produced in phase 2) are processed
89 to generate the sequence of wider stores that set the contiguous memory
90 regions to the sequence of bytes that correspond to it. This may emit
91 multiple stores per store group to handle contiguous stores that are not
92 of a size that is a power of 2. For example it can try to emit a 40-bit
93 store as a 32-bit store followed by an 8-bit store.
94 We try to emit as wide stores as we can while respecting STRICT_ALIGNMENT or
e0bd6c9f 95 TARGET_SLOW_UNALIGNED_ACCESS rules.
f663d9ad
KT
96
97 Note on endianness and example:
98 Consider 2 contiguous 16-bit stores followed by 2 contiguous 8-bit stores:
99 [p ] := 0x1234;
100 [p + 2B] := 0x5678;
101 [p + 4B] := 0xab;
102 [p + 5B] := 0xcd;
103
104 The memory layout for little-endian (LE) and big-endian (BE) must be:
105 p |LE|BE|
106 ---------
107 0 |34|12|
108 1 |12|34|
109 2 |78|56|
110 3 |56|78|
111 4 |ab|ab|
112 5 |cd|cd|
113
114 To merge these into a single 48-bit merged value 'val' in phase 2)
115 on little-endian we insert stores to higher (consecutive) bitpositions
116 into the most significant bits of the merged value.
117 The final merged value would be: 0xcdab56781234
118
119 For big-endian we insert stores to higher bitpositions into the least
120 significant bits of the merged value.
121 The final merged value would be: 0x12345678abcd
122
123 Then, in phase 3), we want to emit this 48-bit value as a 32-bit store
124 followed by a 16-bit store. Again, we must consider endianness when
125 breaking down the 48-bit value 'val' computed above.
126 For little endian we emit:
127 [p] (32-bit) := 0x56781234; // val & 0x0000ffffffff;
128 [p + 4B] (16-bit) := 0xcdab; // (val & 0xffff00000000) >> 32;
129
130 Whereas for big-endian we emit:
131 [p] (32-bit) := 0x12345678; // (val & 0xffffffff0000) >> 16;
132 [p + 4B] (16-bit) := 0xabcd; // val & 0x00000000ffff; */
133
134#include "config.h"
135#include "system.h"
136#include "coretypes.h"
137#include "backend.h"
138#include "tree.h"
139#include "gimple.h"
140#include "builtins.h"
141#include "fold-const.h"
142#include "tree-pass.h"
143#include "ssa.h"
144#include "gimple-pretty-print.h"
145#include "alias.h"
146#include "fold-const.h"
147#include "params.h"
148#include "print-tree.h"
149#include "tree-hash-traits.h"
150#include "gimple-iterator.h"
151#include "gimplify.h"
152#include "stor-layout.h"
153#include "timevar.h"
154#include "tree-cfg.h"
155#include "tree-eh.h"
156#include "target.h"
aa55dc0c 157#include "gimplify-me.h"
a62b3dc5
JJ
158#include "rtl.h"
159#include "expr.h" /* For get_bit_range. */
dffec8eb 160#include "optabs-tree.h"
c22d8787 161#include "selftest.h"
f663d9ad
KT
162
163/* The maximum size (in bits) of the stores this pass should generate. */
164#define MAX_STORE_BITSIZE (BITS_PER_WORD)
165#define MAX_STORE_BYTES (MAX_STORE_BITSIZE / BITS_PER_UNIT)
166
245f6de1
JJ
167/* Limit to bound the number of aliasing checks for loads with the same
168 vuse as the corresponding store. */
169#define MAX_STORE_ALIAS_CHECKS 64
170
f663d9ad
KT
171namespace {
172
bebadeca 173struct bswap_stat
dffec8eb
JJ
174{
175 /* Number of hand-written 16-bit nop / bswaps found. */
176 int found_16bit;
177
178 /* Number of hand-written 32-bit nop / bswaps found. */
179 int found_32bit;
180
181 /* Number of hand-written 64-bit nop / bswaps found. */
182 int found_64bit;
183} nop_stats, bswap_stats;
184
185/* A symbolic number structure is used to detect byte permutation and selection
186 patterns of a source. To achieve that, its field N contains an artificial
187 number consisting of BITS_PER_MARKER sized markers tracking where does each
188 byte come from in the source:
189
190 0 - target byte has the value 0
191 FF - target byte has an unknown value (eg. due to sign extension)
192 1..size - marker value is the byte index in the source (0 for lsb).
193
194 To detect permutations on memory sources (arrays and structures), a symbolic
195 number is also associated:
196 - a base address BASE_ADDR and an OFFSET giving the address of the source;
197 - a range which gives the difference between the highest and lowest accessed
198 memory location to make such a symbolic number;
199 - the address SRC of the source element of lowest address as a convenience
200 to easily get BASE_ADDR + offset + lowest bytepos;
201 - number of expressions N_OPS bitwise ored together to represent
202 approximate cost of the computation.
203
204 Note 1: the range is different from size as size reflects the size of the
205 type of the current expression. For instance, for an array char a[],
206 (short) a[0] | (short) a[3] would have a size of 2 but a range of 4 while
207 (short) a[0] | ((short) a[0] << 1) would still have a size of 2 but this
208 time a range of 1.
209
210 Note 2: for non-memory sources, range holds the same value as size.
211
212 Note 3: SRC points to the SSA_NAME in case of non-memory source. */
213
214struct symbolic_number {
215 uint64_t n;
216 tree type;
217 tree base_addr;
218 tree offset;
4a022c70 219 poly_int64_pod bytepos;
dffec8eb
JJ
220 tree src;
221 tree alias_set;
222 tree vuse;
223 unsigned HOST_WIDE_INT range;
224 int n_ops;
225};
226
227#define BITS_PER_MARKER 8
228#define MARKER_MASK ((1 << BITS_PER_MARKER) - 1)
229#define MARKER_BYTE_UNKNOWN MARKER_MASK
230#define HEAD_MARKER(n, size) \
231 ((n) & ((uint64_t) MARKER_MASK << (((size) - 1) * BITS_PER_MARKER)))
232
233/* The number which the find_bswap_or_nop_1 result should match in
234 order to have a nop. The number is masked according to the size of
235 the symbolic number before using it. */
236#define CMPNOP (sizeof (int64_t) < 8 ? 0 : \
237 (uint64_t)0x08070605 << 32 | 0x04030201)
238
239/* The number which the find_bswap_or_nop_1 result should match in
240 order to have a byte swap. The number is masked according to the
241 size of the symbolic number before using it. */
242#define CMPXCHG (sizeof (int64_t) < 8 ? 0 : \
243 (uint64_t)0x01020304 << 32 | 0x05060708)
244
245/* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
246 number N. Return false if the requested operation is not permitted
247 on a symbolic number. */
248
249inline bool
250do_shift_rotate (enum tree_code code,
251 struct symbolic_number *n,
252 int count)
253{
254 int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
255 unsigned head_marker;
256
257 if (count % BITS_PER_UNIT != 0)
258 return false;
259 count = (count / BITS_PER_UNIT) * BITS_PER_MARKER;
260
261 /* Zero out the extra bits of N in order to avoid them being shifted
262 into the significant bits. */
263 if (size < 64 / BITS_PER_MARKER)
264 n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
265
266 switch (code)
267 {
268 case LSHIFT_EXPR:
269 n->n <<= count;
270 break;
271 case RSHIFT_EXPR:
272 head_marker = HEAD_MARKER (n->n, size);
273 n->n >>= count;
274 /* Arithmetic shift of signed type: result is dependent on the value. */
275 if (!TYPE_UNSIGNED (n->type) && head_marker)
276 for (i = 0; i < count / BITS_PER_MARKER; i++)
277 n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
278 << ((size - 1 - i) * BITS_PER_MARKER);
279 break;
280 case LROTATE_EXPR:
281 n->n = (n->n << count) | (n->n >> ((size * BITS_PER_MARKER) - count));
282 break;
283 case RROTATE_EXPR:
284 n->n = (n->n >> count) | (n->n << ((size * BITS_PER_MARKER) - count));
285 break;
286 default:
287 return false;
288 }
289 /* Zero unused bits for size. */
290 if (size < 64 / BITS_PER_MARKER)
291 n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
292 return true;
293}
294
295/* Perform sanity checking for the symbolic number N and the gimple
296 statement STMT. */
297
298inline bool
299verify_symbolic_number_p (struct symbolic_number *n, gimple *stmt)
300{
301 tree lhs_type;
302
303 lhs_type = gimple_expr_type (stmt);
304
305 if (TREE_CODE (lhs_type) != INTEGER_TYPE)
306 return false;
307
308 if (TYPE_PRECISION (lhs_type) != TYPE_PRECISION (n->type))
309 return false;
310
311 return true;
312}
313
314/* Initialize the symbolic number N for the bswap pass from the base element
315 SRC manipulated by the bitwise OR expression. */
316
317bool
318init_symbolic_number (struct symbolic_number *n, tree src)
319{
320 int size;
321
322 if (! INTEGRAL_TYPE_P (TREE_TYPE (src)))
323 return false;
324
325 n->base_addr = n->offset = n->alias_set = n->vuse = NULL_TREE;
326 n->src = src;
327
328 /* Set up the symbolic number N by setting each byte to a value between 1 and
329 the byte size of rhs1. The highest order byte is set to n->size and the
330 lowest order byte to 1. */
331 n->type = TREE_TYPE (src);
332 size = TYPE_PRECISION (n->type);
333 if (size % BITS_PER_UNIT != 0)
334 return false;
335 size /= BITS_PER_UNIT;
336 if (size > 64 / BITS_PER_MARKER)
337 return false;
338 n->range = size;
339 n->n = CMPNOP;
340 n->n_ops = 1;
341
342 if (size < 64 / BITS_PER_MARKER)
343 n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
344
345 return true;
346}
347
348/* Check if STMT might be a byte swap or a nop from a memory source and returns
349 the answer. If so, REF is that memory source and the base of the memory area
350 accessed and the offset of the access from that base are recorded in N. */
351
352bool
353find_bswap_or_nop_load (gimple *stmt, tree ref, struct symbolic_number *n)
354{
355 /* Leaf node is an array or component ref. Memorize its base and
356 offset from base to compare to other such leaf node. */
357 HOST_WIDE_INT bitsize, bitpos;
358 machine_mode mode;
359 int unsignedp, reversep, volatilep;
360 tree offset, base_addr;
361
362 /* Not prepared to handle PDP endian. */
363 if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
364 return false;
365
366 if (!gimple_assign_load_p (stmt) || gimple_has_volatile_ops (stmt))
367 return false;
368
369 base_addr = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
370 &unsignedp, &reversep, &volatilep);
371
4b84d9b8
JJ
372 if (TREE_CODE (base_addr) == TARGET_MEM_REF)
373 /* Do not rewrite TARGET_MEM_REF. */
374 return false;
375 else if (TREE_CODE (base_addr) == MEM_REF)
dffec8eb
JJ
376 {
377 offset_int bit_offset = 0;
378 tree off = TREE_OPERAND (base_addr, 1);
379
380 if (!integer_zerop (off))
381 {
382 offset_int boff, coff = mem_ref_offset (base_addr);
383 boff = coff << LOG2_BITS_PER_UNIT;
384 bit_offset += boff;
385 }
386
387 base_addr = TREE_OPERAND (base_addr, 0);
388
389 /* Avoid returning a negative bitpos as this may wreak havoc later. */
390 if (wi::neg_p (bit_offset))
391 {
392 offset_int mask = wi::mask <offset_int> (LOG2_BITS_PER_UNIT, false);
393 offset_int tem = wi::bit_and_not (bit_offset, mask);
394 /* TEM is the bitpos rounded to BITS_PER_UNIT towards -Inf.
395 Subtract it to BIT_OFFSET and add it (scaled) to OFFSET. */
396 bit_offset -= tem;
397 tem >>= LOG2_BITS_PER_UNIT;
398 if (offset)
399 offset = size_binop (PLUS_EXPR, offset,
400 wide_int_to_tree (sizetype, tem));
401 else
402 offset = wide_int_to_tree (sizetype, tem);
403 }
404
405 bitpos += bit_offset.to_shwi ();
406 }
4b84d9b8
JJ
407 else
408 base_addr = build_fold_addr_expr (base_addr);
dffec8eb
JJ
409
410 if (bitpos % BITS_PER_UNIT)
411 return false;
412 if (bitsize % BITS_PER_UNIT)
413 return false;
414 if (reversep)
415 return false;
416
417 if (!init_symbolic_number (n, ref))
418 return false;
419 n->base_addr = base_addr;
420 n->offset = offset;
421 n->bytepos = bitpos / BITS_PER_UNIT;
422 n->alias_set = reference_alias_ptr_type (ref);
423 n->vuse = gimple_vuse (stmt);
424 return true;
425}
426
427/* Compute the symbolic number N representing the result of a bitwise OR on 2
428 symbolic number N1 and N2 whose source statements are respectively
429 SOURCE_STMT1 and SOURCE_STMT2. */
430
431gimple *
432perform_symbolic_merge (gimple *source_stmt1, struct symbolic_number *n1,
433 gimple *source_stmt2, struct symbolic_number *n2,
434 struct symbolic_number *n)
435{
436 int i, size;
437 uint64_t mask;
438 gimple *source_stmt;
439 struct symbolic_number *n_start;
440
441 tree rhs1 = gimple_assign_rhs1 (source_stmt1);
442 if (TREE_CODE (rhs1) == BIT_FIELD_REF
443 && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
444 rhs1 = TREE_OPERAND (rhs1, 0);
445 tree rhs2 = gimple_assign_rhs1 (source_stmt2);
446 if (TREE_CODE (rhs2) == BIT_FIELD_REF
447 && TREE_CODE (TREE_OPERAND (rhs2, 0)) == SSA_NAME)
448 rhs2 = TREE_OPERAND (rhs2, 0);
449
450 /* Sources are different, cancel bswap if they are not memory location with
451 the same base (array, structure, ...). */
452 if (rhs1 != rhs2)
453 {
454 uint64_t inc;
4a022c70 455 HOST_WIDE_INT start1, start2, start_sub, end_sub, end1, end2, end;
dffec8eb
JJ
456 struct symbolic_number *toinc_n_ptr, *n_end;
457 basic_block bb1, bb2;
458
459 if (!n1->base_addr || !n2->base_addr
460 || !operand_equal_p (n1->base_addr, n2->base_addr, 0))
461 return NULL;
462
463 if (!n1->offset != !n2->offset
464 || (n1->offset && !operand_equal_p (n1->offset, n2->offset, 0)))
465 return NULL;
466
4a022c70
RS
467 start1 = 0;
468 if (!(n2->bytepos - n1->bytepos).is_constant (&start2))
469 return NULL;
470
471 if (start1 < start2)
dffec8eb
JJ
472 {
473 n_start = n1;
4a022c70 474 start_sub = start2 - start1;
dffec8eb
JJ
475 }
476 else
477 {
478 n_start = n2;
4a022c70 479 start_sub = start1 - start2;
dffec8eb
JJ
480 }
481
482 bb1 = gimple_bb (source_stmt1);
483 bb2 = gimple_bb (source_stmt2);
484 if (dominated_by_p (CDI_DOMINATORS, bb1, bb2))
485 source_stmt = source_stmt1;
486 else
487 source_stmt = source_stmt2;
488
489 /* Find the highest address at which a load is performed and
490 compute related info. */
4a022c70
RS
491 end1 = start1 + (n1->range - 1);
492 end2 = start2 + (n2->range - 1);
dffec8eb
JJ
493 if (end1 < end2)
494 {
495 end = end2;
496 end_sub = end2 - end1;
497 }
498 else
499 {
500 end = end1;
501 end_sub = end1 - end2;
502 }
503 n_end = (end2 > end1) ? n2 : n1;
504
505 /* Find symbolic number whose lsb is the most significant. */
506 if (BYTES_BIG_ENDIAN)
507 toinc_n_ptr = (n_end == n1) ? n2 : n1;
508 else
509 toinc_n_ptr = (n_start == n1) ? n2 : n1;
510
4a022c70 511 n->range = end - MIN (start1, start2) + 1;
dffec8eb
JJ
512
513 /* Check that the range of memory covered can be represented by
514 a symbolic number. */
515 if (n->range > 64 / BITS_PER_MARKER)
516 return NULL;
517
518 /* Reinterpret byte marks in symbolic number holding the value of
519 bigger weight according to target endianness. */
520 inc = BYTES_BIG_ENDIAN ? end_sub : start_sub;
521 size = TYPE_PRECISION (n1->type) / BITS_PER_UNIT;
522 for (i = 0; i < size; i++, inc <<= BITS_PER_MARKER)
523 {
524 unsigned marker
525 = (toinc_n_ptr->n >> (i * BITS_PER_MARKER)) & MARKER_MASK;
526 if (marker && marker != MARKER_BYTE_UNKNOWN)
527 toinc_n_ptr->n += inc;
528 }
529 }
530 else
531 {
532 n->range = n1->range;
533 n_start = n1;
534 source_stmt = source_stmt1;
535 }
536
537 if (!n1->alias_set
538 || alias_ptr_types_compatible_p (n1->alias_set, n2->alias_set))
539 n->alias_set = n1->alias_set;
540 else
541 n->alias_set = ptr_type_node;
542 n->vuse = n_start->vuse;
543 n->base_addr = n_start->base_addr;
544 n->offset = n_start->offset;
545 n->src = n_start->src;
546 n->bytepos = n_start->bytepos;
547 n->type = n_start->type;
548 size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
549
550 for (i = 0, mask = MARKER_MASK; i < size; i++, mask <<= BITS_PER_MARKER)
551 {
552 uint64_t masked1, masked2;
553
554 masked1 = n1->n & mask;
555 masked2 = n2->n & mask;
556 if (masked1 && masked2 && masked1 != masked2)
557 return NULL;
558 }
559 n->n = n1->n | n2->n;
560 n->n_ops = n1->n_ops + n2->n_ops;
561
562 return source_stmt;
563}
564
565/* find_bswap_or_nop_1 invokes itself recursively with N and tries to perform
566 the operation given by the rhs of STMT on the result. If the operation
567 could successfully be executed the function returns a gimple stmt whose
568 rhs's first tree is the expression of the source operand and NULL
569 otherwise. */
570
571gimple *
572find_bswap_or_nop_1 (gimple *stmt, struct symbolic_number *n, int limit)
573{
574 enum tree_code code;
575 tree rhs1, rhs2 = NULL;
576 gimple *rhs1_stmt, *rhs2_stmt, *source_stmt1;
577 enum gimple_rhs_class rhs_class;
578
579 if (!limit || !is_gimple_assign (stmt))
580 return NULL;
581
582 rhs1 = gimple_assign_rhs1 (stmt);
583
584 if (find_bswap_or_nop_load (stmt, rhs1, n))
585 return stmt;
586
587 /* Handle BIT_FIELD_REF. */
588 if (TREE_CODE (rhs1) == BIT_FIELD_REF
589 && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
590 {
591 unsigned HOST_WIDE_INT bitsize = tree_to_uhwi (TREE_OPERAND (rhs1, 1));
592 unsigned HOST_WIDE_INT bitpos = tree_to_uhwi (TREE_OPERAND (rhs1, 2));
593 if (bitpos % BITS_PER_UNIT == 0
594 && bitsize % BITS_PER_UNIT == 0
595 && init_symbolic_number (n, TREE_OPERAND (rhs1, 0)))
596 {
597 /* Handle big-endian bit numbering in BIT_FIELD_REF. */
598 if (BYTES_BIG_ENDIAN)
599 bitpos = TYPE_PRECISION (n->type) - bitpos - bitsize;
600
601 /* Shift. */
602 if (!do_shift_rotate (RSHIFT_EXPR, n, bitpos))
603 return NULL;
604
605 /* Mask. */
606 uint64_t mask = 0;
607 uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
608 for (unsigned i = 0; i < bitsize / BITS_PER_UNIT;
609 i++, tmp <<= BITS_PER_UNIT)
610 mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
611 n->n &= mask;
612
613 /* Convert. */
614 n->type = TREE_TYPE (rhs1);
615 if (!n->base_addr)
616 n->range = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
617
618 return verify_symbolic_number_p (n, stmt) ? stmt : NULL;
619 }
620
621 return NULL;
622 }
623
624 if (TREE_CODE (rhs1) != SSA_NAME)
625 return NULL;
626
627 code = gimple_assign_rhs_code (stmt);
628 rhs_class = gimple_assign_rhs_class (stmt);
629 rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
630
631 if (rhs_class == GIMPLE_BINARY_RHS)
632 rhs2 = gimple_assign_rhs2 (stmt);
633
634 /* Handle unary rhs and binary rhs with integer constants as second
635 operand. */
636
637 if (rhs_class == GIMPLE_UNARY_RHS
638 || (rhs_class == GIMPLE_BINARY_RHS
639 && TREE_CODE (rhs2) == INTEGER_CST))
640 {
641 if (code != BIT_AND_EXPR
642 && code != LSHIFT_EXPR
643 && code != RSHIFT_EXPR
644 && code != LROTATE_EXPR
645 && code != RROTATE_EXPR
646 && !CONVERT_EXPR_CODE_P (code))
647 return NULL;
648
649 source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, n, limit - 1);
650
651 /* If find_bswap_or_nop_1 returned NULL, STMT is a leaf node and
652 we have to initialize the symbolic number. */
653 if (!source_stmt1)
654 {
655 if (gimple_assign_load_p (stmt)
656 || !init_symbolic_number (n, rhs1))
657 return NULL;
658 source_stmt1 = stmt;
659 }
660
661 switch (code)
662 {
663 case BIT_AND_EXPR:
664 {
665 int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
666 uint64_t val = int_cst_value (rhs2), mask = 0;
667 uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
668
669 /* Only constants masking full bytes are allowed. */
670 for (i = 0; i < size; i++, tmp <<= BITS_PER_UNIT)
671 if ((val & tmp) != 0 && (val & tmp) != tmp)
672 return NULL;
673 else if (val & tmp)
674 mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
675
676 n->n &= mask;
677 }
678 break;
679 case LSHIFT_EXPR:
680 case RSHIFT_EXPR:
681 case LROTATE_EXPR:
682 case RROTATE_EXPR:
683 if (!do_shift_rotate (code, n, (int) TREE_INT_CST_LOW (rhs2)))
684 return NULL;
685 break;
686 CASE_CONVERT:
687 {
688 int i, type_size, old_type_size;
689 tree type;
690
691 type = gimple_expr_type (stmt);
692 type_size = TYPE_PRECISION (type);
693 if (type_size % BITS_PER_UNIT != 0)
694 return NULL;
695 type_size /= BITS_PER_UNIT;
696 if (type_size > 64 / BITS_PER_MARKER)
697 return NULL;
698
699 /* Sign extension: result is dependent on the value. */
700 old_type_size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
701 if (!TYPE_UNSIGNED (n->type) && type_size > old_type_size
702 && HEAD_MARKER (n->n, old_type_size))
703 for (i = 0; i < type_size - old_type_size; i++)
704 n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
705 << ((type_size - 1 - i) * BITS_PER_MARKER);
706
707 if (type_size < 64 / BITS_PER_MARKER)
708 {
709 /* If STMT casts to a smaller type mask out the bits not
710 belonging to the target type. */
711 n->n &= ((uint64_t) 1 << (type_size * BITS_PER_MARKER)) - 1;
712 }
713 n->type = type;
714 if (!n->base_addr)
715 n->range = type_size;
716 }
717 break;
718 default:
719 return NULL;
720 };
721 return verify_symbolic_number_p (n, stmt) ? source_stmt1 : NULL;
722 }
723
724 /* Handle binary rhs. */
725
726 if (rhs_class == GIMPLE_BINARY_RHS)
727 {
728 struct symbolic_number n1, n2;
729 gimple *source_stmt, *source_stmt2;
730
731 if (code != BIT_IOR_EXPR)
732 return NULL;
733
734 if (TREE_CODE (rhs2) != SSA_NAME)
735 return NULL;
736
737 rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
738
739 switch (code)
740 {
741 case BIT_IOR_EXPR:
742 source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, &n1, limit - 1);
743
744 if (!source_stmt1)
745 return NULL;
746
747 source_stmt2 = find_bswap_or_nop_1 (rhs2_stmt, &n2, limit - 1);
748
749 if (!source_stmt2)
750 return NULL;
751
752 if (TYPE_PRECISION (n1.type) != TYPE_PRECISION (n2.type))
753 return NULL;
754
4b84d9b8 755 if (n1.vuse != n2.vuse)
dffec8eb
JJ
756 return NULL;
757
758 source_stmt
759 = perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, n);
760
761 if (!source_stmt)
762 return NULL;
763
764 if (!verify_symbolic_number_p (n, stmt))
765 return NULL;
766
767 break;
768 default:
769 return NULL;
770 }
771 return source_stmt;
772 }
773 return NULL;
774}
775
4b84d9b8
JJ
776/* Helper for find_bswap_or_nop and try_coalesce_bswap to compute
777 *CMPXCHG, *CMPNOP and adjust *N. */
dffec8eb 778
4b84d9b8
JJ
779void
780find_bswap_or_nop_finalize (struct symbolic_number *n, uint64_t *cmpxchg,
781 uint64_t *cmpnop)
dffec8eb
JJ
782{
783 unsigned rsize;
784 uint64_t tmpn, mask;
dffec8eb 785
4b84d9b8
JJ
786 /* The number which the find_bswap_or_nop_1 result should match in order
787 to have a full byte swap. The number is shifted to the right
788 according to the size of the symbolic number before using it. */
789 *cmpxchg = CMPXCHG;
790 *cmpnop = CMPNOP;
dffec8eb
JJ
791
792 /* Find real size of result (highest non-zero byte). */
793 if (n->base_addr)
794 for (tmpn = n->n, rsize = 0; tmpn; tmpn >>= BITS_PER_MARKER, rsize++);
795 else
796 rsize = n->range;
797
798 /* Zero out the bits corresponding to untouched bytes in original gimple
799 expression. */
800 if (n->range < (int) sizeof (int64_t))
801 {
802 mask = ((uint64_t) 1 << (n->range * BITS_PER_MARKER)) - 1;
4b84d9b8
JJ
803 *cmpxchg >>= (64 / BITS_PER_MARKER - n->range) * BITS_PER_MARKER;
804 *cmpnop &= mask;
dffec8eb
JJ
805 }
806
807 /* Zero out the bits corresponding to unused bytes in the result of the
808 gimple expression. */
809 if (rsize < n->range)
810 {
811 if (BYTES_BIG_ENDIAN)
812 {
813 mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
4b84d9b8
JJ
814 *cmpxchg &= mask;
815 *cmpnop >>= (n->range - rsize) * BITS_PER_MARKER;
dffec8eb
JJ
816 }
817 else
818 {
819 mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
4b84d9b8
JJ
820 *cmpxchg >>= (n->range - rsize) * BITS_PER_MARKER;
821 *cmpnop &= mask;
dffec8eb
JJ
822 }
823 n->range = rsize;
824 }
825
4b84d9b8
JJ
826 n->range *= BITS_PER_UNIT;
827}
828
829/* Check if STMT completes a bswap implementation or a read in a given
830 endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP
831 accordingly. It also sets N to represent the kind of operations
832 performed: size of the resulting expression and whether it works on
833 a memory source, and if so alias-set and vuse. At last, the
834 function returns a stmt whose rhs's first tree is the source
835 expression. */
836
837gimple *
838find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap)
839{
840 /* The last parameter determines the depth search limit. It usually
841 correlates directly to the number n of bytes to be touched. We
842 increase that number by log2(n) + 1 here in order to also
843 cover signed -> unsigned conversions of the src operand as can be seen
844 in libgcc, and for initial shift/and operation of the src operand. */
845 int limit = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt)));
846 limit += 1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit);
847 gimple *ins_stmt = find_bswap_or_nop_1 (stmt, n, limit);
848
849 if (!ins_stmt)
850 return NULL;
851
852 uint64_t cmpxchg, cmpnop;
853 find_bswap_or_nop_finalize (n, &cmpxchg, &cmpnop);
854
dffec8eb
JJ
855 /* A complete byte swap should make the symbolic number to start with
856 the largest digit in the highest order byte. Unchanged symbolic
857 number indicates a read with same endianness as target architecture. */
858 if (n->n == cmpnop)
859 *bswap = false;
860 else if (n->n == cmpxchg)
861 *bswap = true;
862 else
863 return NULL;
864
865 /* Useless bit manipulation performed by code. */
866 if (!n->base_addr && n->n == cmpnop && n->n_ops == 1)
867 return NULL;
868
dffec8eb
JJ
869 return ins_stmt;
870}
871
872const pass_data pass_data_optimize_bswap =
873{
874 GIMPLE_PASS, /* type */
875 "bswap", /* name */
876 OPTGROUP_NONE, /* optinfo_flags */
877 TV_NONE, /* tv_id */
878 PROP_ssa, /* properties_required */
879 0, /* properties_provided */
880 0, /* properties_destroyed */
881 0, /* todo_flags_start */
882 0, /* todo_flags_finish */
883};
884
885class pass_optimize_bswap : public gimple_opt_pass
886{
887public:
888 pass_optimize_bswap (gcc::context *ctxt)
889 : gimple_opt_pass (pass_data_optimize_bswap, ctxt)
890 {}
891
892 /* opt_pass methods: */
893 virtual bool gate (function *)
894 {
895 return flag_expensive_optimizations && optimize && BITS_PER_UNIT == 8;
896 }
897
898 virtual unsigned int execute (function *);
899
900}; // class pass_optimize_bswap
901
902/* Perform the bswap optimization: replace the expression computed in the rhs
4b84d9b8
JJ
903 of gsi_stmt (GSI) (or if NULL add instead of replace) by an equivalent
904 bswap, load or load + bswap expression.
dffec8eb
JJ
905 Which of these alternatives replace the rhs is given by N->base_addr (non
906 null if a load is needed) and BSWAP. The type, VUSE and set-alias of the
907 load to perform are also given in N while the builtin bswap invoke is given
4b84d9b8
JJ
908 in FNDEL. Finally, if a load is involved, INS_STMT refers to one of the
909 load statements involved to construct the rhs in gsi_stmt (GSI) and
910 N->range gives the size of the rhs expression for maintaining some
911 statistics.
dffec8eb 912
4b84d9b8
JJ
913 Note that if the replacement involve a load and if gsi_stmt (GSI) is
914 non-NULL, that stmt is moved just after INS_STMT to do the load with the
915 same VUSE which can lead to gsi_stmt (GSI) changing of basic block. */
dffec8eb 916
4b84d9b8
JJ
917tree
918bswap_replace (gimple_stmt_iterator gsi, gimple *ins_stmt, tree fndecl,
dffec8eb
JJ
919 tree bswap_type, tree load_type, struct symbolic_number *n,
920 bool bswap)
921{
4b84d9b8 922 tree src, tmp, tgt = NULL_TREE;
dffec8eb
JJ
923 gimple *bswap_stmt;
924
4b84d9b8 925 gimple *cur_stmt = gsi_stmt (gsi);
dffec8eb 926 src = n->src;
4b84d9b8
JJ
927 if (cur_stmt)
928 tgt = gimple_assign_lhs (cur_stmt);
dffec8eb
JJ
929
930 /* Need to load the value from memory first. */
931 if (n->base_addr)
932 {
4b84d9b8
JJ
933 gimple_stmt_iterator gsi_ins = gsi;
934 if (ins_stmt)
935 gsi_ins = gsi_for_stmt (ins_stmt);
dffec8eb
JJ
936 tree addr_expr, addr_tmp, val_expr, val_tmp;
937 tree load_offset_ptr, aligned_load_type;
4b84d9b8
JJ
938 gimple *load_stmt;
939 unsigned align = get_object_alignment (src);
4a022c70 940 poly_int64 load_offset = 0;
dffec8eb 941
4b84d9b8
JJ
942 if (cur_stmt)
943 {
944 basic_block ins_bb = gimple_bb (ins_stmt);
945 basic_block cur_bb = gimple_bb (cur_stmt);
946 if (!dominated_by_p (CDI_DOMINATORS, cur_bb, ins_bb))
947 return NULL_TREE;
948
949 /* Move cur_stmt just before one of the load of the original
950 to ensure it has the same VUSE. See PR61517 for what could
951 go wrong. */
952 if (gimple_bb (cur_stmt) != gimple_bb (ins_stmt))
953 reset_flow_sensitive_info (gimple_assign_lhs (cur_stmt));
954 gsi_move_before (&gsi, &gsi_ins);
955 gsi = gsi_for_stmt (cur_stmt);
956 }
957 else
958 gsi = gsi_ins;
dffec8eb
JJ
959
960 /* Compute address to load from and cast according to the size
961 of the load. */
4b84d9b8 962 addr_expr = build_fold_addr_expr (src);
dffec8eb 963 if (is_gimple_mem_ref_addr (addr_expr))
4b84d9b8 964 addr_tmp = unshare_expr (addr_expr);
dffec8eb
JJ
965 else
966 {
4b84d9b8
JJ
967 addr_tmp = unshare_expr (n->base_addr);
968 if (!is_gimple_mem_ref_addr (addr_tmp))
969 addr_tmp = force_gimple_operand_gsi_1 (&gsi, addr_tmp,
970 is_gimple_mem_ref_addr,
971 NULL_TREE, true,
972 GSI_SAME_STMT);
973 load_offset = n->bytepos;
974 if (n->offset)
975 {
976 tree off
977 = force_gimple_operand_gsi (&gsi, unshare_expr (n->offset),
978 true, NULL_TREE, true,
979 GSI_SAME_STMT);
980 gimple *stmt
981 = gimple_build_assign (make_ssa_name (TREE_TYPE (addr_tmp)),
982 POINTER_PLUS_EXPR, addr_tmp, off);
983 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
984 addr_tmp = gimple_assign_lhs (stmt);
985 }
dffec8eb
JJ
986 }
987
988 /* Perform the load. */
989 aligned_load_type = load_type;
990 if (align < TYPE_ALIGN (load_type))
991 aligned_load_type = build_aligned_type (load_type, align);
992 load_offset_ptr = build_int_cst (n->alias_set, load_offset);
993 val_expr = fold_build2 (MEM_REF, aligned_load_type, addr_tmp,
994 load_offset_ptr);
995
996 if (!bswap)
997 {
998 if (n->range == 16)
999 nop_stats.found_16bit++;
1000 else if (n->range == 32)
1001 nop_stats.found_32bit++;
1002 else
1003 {
1004 gcc_assert (n->range == 64);
1005 nop_stats.found_64bit++;
1006 }
1007
1008 /* Convert the result of load if necessary. */
4b84d9b8 1009 if (tgt && !useless_type_conversion_p (TREE_TYPE (tgt), load_type))
dffec8eb
JJ
1010 {
1011 val_tmp = make_temp_ssa_name (aligned_load_type, NULL,
1012 "load_dst");
1013 load_stmt = gimple_build_assign (val_tmp, val_expr);
1014 gimple_set_vuse (load_stmt, n->vuse);
1015 gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
1016 gimple_assign_set_rhs_with_ops (&gsi, NOP_EXPR, val_tmp);
4b84d9b8 1017 update_stmt (cur_stmt);
dffec8eb 1018 }
4b84d9b8 1019 else if (cur_stmt)
dffec8eb
JJ
1020 {
1021 gimple_assign_set_rhs_with_ops (&gsi, MEM_REF, val_expr);
1022 gimple_set_vuse (cur_stmt, n->vuse);
4b84d9b8
JJ
1023 update_stmt (cur_stmt);
1024 }
1025 else
1026 {
1027 tgt = make_ssa_name (load_type);
1028 cur_stmt = gimple_build_assign (tgt, MEM_REF, val_expr);
1029 gimple_set_vuse (cur_stmt, n->vuse);
1030 gsi_insert_before (&gsi, cur_stmt, GSI_SAME_STMT);
dffec8eb 1031 }
dffec8eb
JJ
1032
1033 if (dump_file)
1034 {
1035 fprintf (dump_file,
1036 "%d bit load in target endianness found at: ",
1037 (int) n->range);
1038 print_gimple_stmt (dump_file, cur_stmt, 0);
1039 }
4b84d9b8 1040 return tgt;
dffec8eb
JJ
1041 }
1042 else
1043 {
1044 val_tmp = make_temp_ssa_name (aligned_load_type, NULL, "load_dst");
1045 load_stmt = gimple_build_assign (val_tmp, val_expr);
1046 gimple_set_vuse (load_stmt, n->vuse);
1047 gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
1048 }
1049 src = val_tmp;
1050 }
1051 else if (!bswap)
1052 {
4b84d9b8
JJ
1053 gimple *g = NULL;
1054 if (tgt && !useless_type_conversion_p (TREE_TYPE (tgt), TREE_TYPE (src)))
dffec8eb
JJ
1055 {
1056 if (!is_gimple_val (src))
4b84d9b8 1057 return NULL_TREE;
dffec8eb
JJ
1058 g = gimple_build_assign (tgt, NOP_EXPR, src);
1059 }
4b84d9b8 1060 else if (cur_stmt)
dffec8eb 1061 g = gimple_build_assign (tgt, src);
4b84d9b8
JJ
1062 else
1063 tgt = src;
dffec8eb
JJ
1064 if (n->range == 16)
1065 nop_stats.found_16bit++;
1066 else if (n->range == 32)
1067 nop_stats.found_32bit++;
1068 else
1069 {
1070 gcc_assert (n->range == 64);
1071 nop_stats.found_64bit++;
1072 }
1073 if (dump_file)
1074 {
1075 fprintf (dump_file,
1076 "%d bit reshuffle in target endianness found at: ",
1077 (int) n->range);
4b84d9b8
JJ
1078 if (cur_stmt)
1079 print_gimple_stmt (dump_file, cur_stmt, 0);
1080 else
1081 {
1082 print_generic_expr (dump_file, tgt, 0);
1083 fprintf (dump_file, "\n");
1084 }
dffec8eb 1085 }
4b84d9b8
JJ
1086 if (cur_stmt)
1087 gsi_replace (&gsi, g, true);
1088 return tgt;
dffec8eb
JJ
1089 }
1090 else if (TREE_CODE (src) == BIT_FIELD_REF)
1091 src = TREE_OPERAND (src, 0);
1092
1093 if (n->range == 16)
1094 bswap_stats.found_16bit++;
1095 else if (n->range == 32)
1096 bswap_stats.found_32bit++;
1097 else
1098 {
1099 gcc_assert (n->range == 64);
1100 bswap_stats.found_64bit++;
1101 }
1102
1103 tmp = src;
1104
1105 /* Convert the src expression if necessary. */
1106 if (!useless_type_conversion_p (TREE_TYPE (tmp), bswap_type))
1107 {
1108 gimple *convert_stmt;
1109
1110 tmp = make_temp_ssa_name (bswap_type, NULL, "bswapsrc");
1111 convert_stmt = gimple_build_assign (tmp, NOP_EXPR, src);
1112 gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT);
1113 }
1114
1115 /* Canonical form for 16 bit bswap is a rotate expression. Only 16bit values
1116 are considered as rotation of 2N bit values by N bits is generally not
1117 equivalent to a bswap. Consider for instance 0x01020304 r>> 16 which
1118 gives 0x03040102 while a bswap for that value is 0x04030201. */
1119 if (bswap && n->range == 16)
1120 {
1121 tree count = build_int_cst (NULL, BITS_PER_UNIT);
1122 src = fold_build2 (LROTATE_EXPR, bswap_type, tmp, count);
1123 bswap_stmt = gimple_build_assign (NULL, src);
1124 }
1125 else
1126 bswap_stmt = gimple_build_call (fndecl, 1, tmp);
1127
4b84d9b8
JJ
1128 if (tgt == NULL_TREE)
1129 tgt = make_ssa_name (bswap_type);
dffec8eb
JJ
1130 tmp = tgt;
1131
1132 /* Convert the result if necessary. */
1133 if (!useless_type_conversion_p (TREE_TYPE (tgt), bswap_type))
1134 {
1135 gimple *convert_stmt;
1136
1137 tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
1138 convert_stmt = gimple_build_assign (tgt, NOP_EXPR, tmp);
1139 gsi_insert_after (&gsi, convert_stmt, GSI_SAME_STMT);
1140 }
1141
1142 gimple_set_lhs (bswap_stmt, tmp);
1143
1144 if (dump_file)
1145 {
1146 fprintf (dump_file, "%d bit bswap implementation found at: ",
1147 (int) n->range);
4b84d9b8
JJ
1148 if (cur_stmt)
1149 print_gimple_stmt (dump_file, cur_stmt, 0);
1150 else
1151 {
1152 print_generic_expr (dump_file, tgt, 0);
1153 fprintf (dump_file, "\n");
1154 }
dffec8eb
JJ
1155 }
1156
4b84d9b8
JJ
1157 if (cur_stmt)
1158 {
1159 gsi_insert_after (&gsi, bswap_stmt, GSI_SAME_STMT);
1160 gsi_remove (&gsi, true);
1161 }
1162 else
1163 gsi_insert_before (&gsi, bswap_stmt, GSI_SAME_STMT);
1164 return tgt;
dffec8eb
JJ
1165}
1166
1167/* Find manual byte swap implementations as well as load in a given
1168 endianness. Byte swaps are turned into a bswap builtin invokation
1169 while endian loads are converted to bswap builtin invokation or
1170 simple load according to the target endianness. */
1171
1172unsigned int
1173pass_optimize_bswap::execute (function *fun)
1174{
1175 basic_block bb;
1176 bool bswap32_p, bswap64_p;
1177 bool changed = false;
1178 tree bswap32_type = NULL_TREE, bswap64_type = NULL_TREE;
1179
1180 bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
1181 && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing);
1182 bswap64_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
1183 && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
1184 || (bswap32_p && word_mode == SImode)));
1185
1186 /* Determine the argument type of the builtins. The code later on
1187 assumes that the return and argument type are the same. */
1188 if (bswap32_p)
1189 {
1190 tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1191 bswap32_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1192 }
1193
1194 if (bswap64_p)
1195 {
1196 tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1197 bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1198 }
1199
1200 memset (&nop_stats, 0, sizeof (nop_stats));
1201 memset (&bswap_stats, 0, sizeof (bswap_stats));
1202 calculate_dominance_info (CDI_DOMINATORS);
1203
1204 FOR_EACH_BB_FN (bb, fun)
1205 {
1206 gimple_stmt_iterator gsi;
1207
1208 /* We do a reverse scan for bswap patterns to make sure we get the
1209 widest match. As bswap pattern matching doesn't handle previously
1210 inserted smaller bswap replacements as sub-patterns, the wider
1211 variant wouldn't be detected. */
1212 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi);)
1213 {
1214 gimple *ins_stmt, *cur_stmt = gsi_stmt (gsi);
1215 tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
1216 enum tree_code code;
1217 struct symbolic_number n;
1218 bool bswap;
1219
1220 /* This gsi_prev (&gsi) is not part of the for loop because cur_stmt
1221 might be moved to a different basic block by bswap_replace and gsi
1222 must not points to it if that's the case. Moving the gsi_prev
1223 there make sure that gsi points to the statement previous to
1224 cur_stmt while still making sure that all statements are
1225 considered in this basic block. */
1226 gsi_prev (&gsi);
1227
1228 if (!is_gimple_assign (cur_stmt))
1229 continue;
1230
1231 code = gimple_assign_rhs_code (cur_stmt);
1232 switch (code)
1233 {
1234 case LROTATE_EXPR:
1235 case RROTATE_EXPR:
1236 if (!tree_fits_uhwi_p (gimple_assign_rhs2 (cur_stmt))
1237 || tree_to_uhwi (gimple_assign_rhs2 (cur_stmt))
1238 % BITS_PER_UNIT)
1239 continue;
1240 /* Fall through. */
1241 case BIT_IOR_EXPR:
1242 break;
1243 default:
1244 continue;
1245 }
1246
1247 ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap);
1248
1249 if (!ins_stmt)
1250 continue;
1251
1252 switch (n.range)
1253 {
1254 case 16:
1255 /* Already in canonical form, nothing to do. */
1256 if (code == LROTATE_EXPR || code == RROTATE_EXPR)
1257 continue;
1258 load_type = bswap_type = uint16_type_node;
1259 break;
1260 case 32:
1261 load_type = uint32_type_node;
1262 if (bswap32_p)
1263 {
1264 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1265 bswap_type = bswap32_type;
1266 }
1267 break;
1268 case 64:
1269 load_type = uint64_type_node;
1270 if (bswap64_p)
1271 {
1272 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1273 bswap_type = bswap64_type;
1274 }
1275 break;
1276 default:
1277 continue;
1278 }
1279
1280 if (bswap && !fndecl && n.range != 16)
1281 continue;
1282
4b84d9b8
JJ
1283 if (bswap_replace (gsi_for_stmt (cur_stmt), ins_stmt, fndecl,
1284 bswap_type, load_type, &n, bswap))
dffec8eb
JJ
1285 changed = true;
1286 }
1287 }
1288
1289 statistics_counter_event (fun, "16-bit nop implementations found",
1290 nop_stats.found_16bit);
1291 statistics_counter_event (fun, "32-bit nop implementations found",
1292 nop_stats.found_32bit);
1293 statistics_counter_event (fun, "64-bit nop implementations found",
1294 nop_stats.found_64bit);
1295 statistics_counter_event (fun, "16-bit bswap implementations found",
1296 bswap_stats.found_16bit);
1297 statistics_counter_event (fun, "32-bit bswap implementations found",
1298 bswap_stats.found_32bit);
1299 statistics_counter_event (fun, "64-bit bswap implementations found",
1300 bswap_stats.found_64bit);
1301
1302 return (changed ? TODO_update_ssa : 0);
1303}
1304
1305} // anon namespace
1306
1307gimple_opt_pass *
1308make_pass_optimize_bswap (gcc::context *ctxt)
1309{
1310 return new pass_optimize_bswap (ctxt);
1311}
1312
1313namespace {
1314
245f6de1
JJ
1315/* Struct recording one operand for the store, which is either a constant,
1316 then VAL represents the constant and all the other fields are zero,
1317 or a memory load, then VAL represents the reference, BASE_ADDR is non-NULL
1318 and the other fields also reflect the memory load. */
1319
1320struct store_operand_info
1321{
1322 tree val;
1323 tree base_addr;
1324 unsigned HOST_WIDE_INT bitsize;
1325 unsigned HOST_WIDE_INT bitpos;
1326 unsigned HOST_WIDE_INT bitregion_start;
1327 unsigned HOST_WIDE_INT bitregion_end;
1328 gimple *stmt;
383ac8dc 1329 bool bit_not_p;
245f6de1
JJ
1330 store_operand_info ();
1331};
1332
1333store_operand_info::store_operand_info ()
1334 : val (NULL_TREE), base_addr (NULL_TREE), bitsize (0), bitpos (0),
383ac8dc 1335 bitregion_start (0), bitregion_end (0), stmt (NULL), bit_not_p (false)
245f6de1
JJ
1336{
1337}
1338
f663d9ad
KT
1339/* Struct recording the information about a single store of an immediate
1340 to memory. These are created in the first phase and coalesced into
1341 merged_store_group objects in the second phase. */
1342
1343struct store_immediate_info
1344{
1345 unsigned HOST_WIDE_INT bitsize;
1346 unsigned HOST_WIDE_INT bitpos;
a62b3dc5
JJ
1347 unsigned HOST_WIDE_INT bitregion_start;
1348 /* This is one past the last bit of the bit region. */
1349 unsigned HOST_WIDE_INT bitregion_end;
f663d9ad
KT
1350 gimple *stmt;
1351 unsigned int order;
245f6de1 1352 /* INTEGER_CST for constant stores, MEM_REF for memory copy or
4b84d9b8
JJ
1353 BIT_*_EXPR for logical bitwise operation.
1354 LROTATE_EXPR if it can be only bswap optimized and
1355 ops are not really meaningful.
1356 NOP_EXPR if bswap optimization detected identity, ops
1357 are not meaningful. */
245f6de1 1358 enum tree_code rhs_code;
4b84d9b8
JJ
1359 /* Two fields for bswap optimization purposes. */
1360 struct symbolic_number n;
1361 gimple *ins_stmt;
127ef369 1362 /* True if BIT_{AND,IOR,XOR}_EXPR result is inverted before storing. */
d60edaba 1363 bool bit_not_p;
127ef369
JJ
1364 /* True if ops have been swapped and thus ops[1] represents
1365 rhs1 of BIT_{AND,IOR,XOR}_EXPR and ops[0] represents rhs2. */
1366 bool ops_swapped_p;
245f6de1
JJ
1367 /* Operands. For BIT_*_EXPR rhs_code both operands are used, otherwise
1368 just the first one. */
1369 store_operand_info ops[2];
b5926e23 1370 store_immediate_info (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
a62b3dc5 1371 unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
4b84d9b8
JJ
1372 gimple *, unsigned int, enum tree_code,
1373 struct symbolic_number &, gimple *, bool,
245f6de1
JJ
1374 const store_operand_info &,
1375 const store_operand_info &);
f663d9ad
KT
1376};
1377
1378store_immediate_info::store_immediate_info (unsigned HOST_WIDE_INT bs,
b5926e23 1379 unsigned HOST_WIDE_INT bp,
a62b3dc5
JJ
1380 unsigned HOST_WIDE_INT brs,
1381 unsigned HOST_WIDE_INT bre,
b5926e23 1382 gimple *st,
245f6de1
JJ
1383 unsigned int ord,
1384 enum tree_code rhscode,
4b84d9b8
JJ
1385 struct symbolic_number &nr,
1386 gimple *ins_stmtp,
d60edaba 1387 bool bitnotp,
245f6de1
JJ
1388 const store_operand_info &op0r,
1389 const store_operand_info &op1r)
a62b3dc5 1390 : bitsize (bs), bitpos (bp), bitregion_start (brs), bitregion_end (bre),
4b84d9b8
JJ
1391 stmt (st), order (ord), rhs_code (rhscode), n (nr),
1392 ins_stmt (ins_stmtp), bit_not_p (bitnotp), ops_swapped_p (false)
245f6de1
JJ
1393#if __cplusplus >= 201103L
1394 , ops { op0r, op1r }
1395{
1396}
1397#else
f663d9ad 1398{
245f6de1
JJ
1399 ops[0] = op0r;
1400 ops[1] = op1r;
f663d9ad 1401}
245f6de1 1402#endif
f663d9ad
KT
1403
1404/* Struct representing a group of stores to contiguous memory locations.
1405 These are produced by the second phase (coalescing) and consumed in the
1406 third phase that outputs the widened stores. */
1407
1408struct merged_store_group
1409{
1410 unsigned HOST_WIDE_INT start;
1411 unsigned HOST_WIDE_INT width;
a62b3dc5
JJ
1412 unsigned HOST_WIDE_INT bitregion_start;
1413 unsigned HOST_WIDE_INT bitregion_end;
1414 /* The size of the allocated memory for val and mask. */
f663d9ad 1415 unsigned HOST_WIDE_INT buf_size;
a62b3dc5 1416 unsigned HOST_WIDE_INT align_base;
245f6de1 1417 unsigned HOST_WIDE_INT load_align_base[2];
f663d9ad
KT
1418
1419 unsigned int align;
245f6de1 1420 unsigned int load_align[2];
f663d9ad
KT
1421 unsigned int first_order;
1422 unsigned int last_order;
1423
a62b3dc5 1424 auto_vec<store_immediate_info *> stores;
f663d9ad
KT
1425 /* We record the first and last original statements in the sequence because
1426 we'll need their vuse/vdef and replacement position. It's easier to keep
1427 track of them separately as 'stores' is reordered by apply_stores. */
1428 gimple *last_stmt;
1429 gimple *first_stmt;
1430 unsigned char *val;
a62b3dc5 1431 unsigned char *mask;
f663d9ad
KT
1432
1433 merged_store_group (store_immediate_info *);
1434 ~merged_store_group ();
1435 void merge_into (store_immediate_info *);
1436 void merge_overlapping (store_immediate_info *);
1437 bool apply_stores ();
a62b3dc5
JJ
1438private:
1439 void do_merge (store_immediate_info *);
f663d9ad
KT
1440};
1441
1442/* Debug helper. Dump LEN elements of byte array PTR to FD in hex. */
1443
1444static void
1445dump_char_array (FILE *fd, unsigned char *ptr, unsigned int len)
1446{
1447 if (!fd)
1448 return;
1449
1450 for (unsigned int i = 0; i < len; i++)
1451 fprintf (fd, "%x ", ptr[i]);
1452 fprintf (fd, "\n");
1453}
1454
f663d9ad
KT
1455/* Shift left the bytes in PTR of SZ elements by AMNT bits, carrying over the
1456 bits between adjacent elements. AMNT should be within
1457 [0, BITS_PER_UNIT).
1458 Example, AMNT = 2:
1459 00011111|11100000 << 2 = 01111111|10000000
1460 PTR[1] | PTR[0] PTR[1] | PTR[0]. */
1461
1462static void
1463shift_bytes_in_array (unsigned char *ptr, unsigned int sz, unsigned int amnt)
1464{
1465 if (amnt == 0)
1466 return;
1467
1468 unsigned char carry_over = 0U;
46a61395 1469 unsigned char carry_mask = (~0U) << (unsigned char) (BITS_PER_UNIT - amnt);
f663d9ad
KT
1470 unsigned char clear_mask = (~0U) << amnt;
1471
1472 for (unsigned int i = 0; i < sz; i++)
1473 {
1474 unsigned prev_carry_over = carry_over;
46a61395 1475 carry_over = (ptr[i] & carry_mask) >> (BITS_PER_UNIT - amnt);
f663d9ad
KT
1476
1477 ptr[i] <<= amnt;
1478 if (i != 0)
1479 {
1480 ptr[i] &= clear_mask;
1481 ptr[i] |= prev_carry_over;
1482 }
1483 }
1484}
1485
1486/* Like shift_bytes_in_array but for big-endian.
1487 Shift right the bytes in PTR of SZ elements by AMNT bits, carrying over the
1488 bits between adjacent elements. AMNT should be within
1489 [0, BITS_PER_UNIT).
1490 Example, AMNT = 2:
1491 00011111|11100000 >> 2 = 00000111|11111000
1492 PTR[0] | PTR[1] PTR[0] | PTR[1]. */
1493
1494static void
1495shift_bytes_in_array_right (unsigned char *ptr, unsigned int sz,
1496 unsigned int amnt)
1497{
1498 if (amnt == 0)
1499 return;
1500
1501 unsigned char carry_over = 0U;
1502 unsigned char carry_mask = ~(~0U << amnt);
1503
1504 for (unsigned int i = 0; i < sz; i++)
1505 {
1506 unsigned prev_carry_over = carry_over;
46a61395 1507 carry_over = ptr[i] & carry_mask;
f663d9ad 1508
ad1de652
JJ
1509 carry_over <<= (unsigned char) BITS_PER_UNIT - amnt;
1510 ptr[i] >>= amnt;
1511 ptr[i] |= prev_carry_over;
f663d9ad
KT
1512 }
1513}
1514
1515/* Clear out LEN bits starting from bit START in the byte array
1516 PTR. This clears the bits to the *right* from START.
1517 START must be within [0, BITS_PER_UNIT) and counts starting from
1518 the least significant bit. */
1519
1520static void
1521clear_bit_region_be (unsigned char *ptr, unsigned int start,
1522 unsigned int len)
1523{
1524 if (len == 0)
1525 return;
1526 /* Clear len bits to the right of start. */
1527 else if (len <= start + 1)
1528 {
1529 unsigned char mask = (~(~0U << len));
1530 mask = mask << (start + 1U - len);
1531 ptr[0] &= ~mask;
1532 }
1533 else if (start != BITS_PER_UNIT - 1)
1534 {
1535 clear_bit_region_be (ptr, start, (start % BITS_PER_UNIT) + 1);
1536 clear_bit_region_be (ptr + 1, BITS_PER_UNIT - 1,
1537 len - (start % BITS_PER_UNIT) - 1);
1538 }
1539 else if (start == BITS_PER_UNIT - 1
1540 && len > BITS_PER_UNIT)
1541 {
1542 unsigned int nbytes = len / BITS_PER_UNIT;
a62b3dc5 1543 memset (ptr, 0, nbytes);
f663d9ad
KT
1544 if (len % BITS_PER_UNIT != 0)
1545 clear_bit_region_be (ptr + nbytes, BITS_PER_UNIT - 1,
1546 len % BITS_PER_UNIT);
1547 }
1548 else
1549 gcc_unreachable ();
1550}
1551
1552/* In the byte array PTR clear the bit region starting at bit
1553 START and is LEN bits wide.
1554 For regions spanning multiple bytes do this recursively until we reach
1555 zero LEN or a region contained within a single byte. */
1556
1557static void
1558clear_bit_region (unsigned char *ptr, unsigned int start,
1559 unsigned int len)
1560{
1561 /* Degenerate base case. */
1562 if (len == 0)
1563 return;
1564 else if (start >= BITS_PER_UNIT)
1565 clear_bit_region (ptr + 1, start - BITS_PER_UNIT, len);
1566 /* Second base case. */
1567 else if ((start + len) <= BITS_PER_UNIT)
1568 {
46a61395 1569 unsigned char mask = (~0U) << (unsigned char) (BITS_PER_UNIT - len);
f663d9ad
KT
1570 mask >>= BITS_PER_UNIT - (start + len);
1571
1572 ptr[0] &= ~mask;
1573
1574 return;
1575 }
1576 /* Clear most significant bits in a byte and proceed with the next byte. */
1577 else if (start != 0)
1578 {
1579 clear_bit_region (ptr, start, BITS_PER_UNIT - start);
1f069ef5 1580 clear_bit_region (ptr + 1, 0, len - (BITS_PER_UNIT - start));
f663d9ad
KT
1581 }
1582 /* Whole bytes need to be cleared. */
1583 else if (start == 0 && len > BITS_PER_UNIT)
1584 {
1585 unsigned int nbytes = len / BITS_PER_UNIT;
a848c710
KT
1586 /* We could recurse on each byte but we clear whole bytes, so a simple
1587 memset will do. */
46a61395 1588 memset (ptr, '\0', nbytes);
f663d9ad
KT
1589 /* Clear the remaining sub-byte region if there is one. */
1590 if (len % BITS_PER_UNIT != 0)
1591 clear_bit_region (ptr + nbytes, 0, len % BITS_PER_UNIT);
1592 }
1593 else
1594 gcc_unreachable ();
1595}
1596
1597/* Write BITLEN bits of EXPR to the byte array PTR at
1598 bit position BITPOS. PTR should contain TOTAL_BYTES elements.
1599 Return true if the operation succeeded. */
1600
1601static bool
1602encode_tree_to_bitpos (tree expr, unsigned char *ptr, int bitlen, int bitpos,
46a61395 1603 unsigned int total_bytes)
f663d9ad
KT
1604{
1605 unsigned int first_byte = bitpos / BITS_PER_UNIT;
1606 tree tmp_int = expr;
ad1de652
JJ
1607 bool sub_byte_op_p = ((bitlen % BITS_PER_UNIT)
1608 || (bitpos % BITS_PER_UNIT)
f4b31647 1609 || !int_mode_for_size (bitlen, 0).exists ());
f663d9ad
KT
1610
1611 if (!sub_byte_op_p)
2f391428 1612 return native_encode_expr (tmp_int, ptr + first_byte, total_bytes) != 0;
f663d9ad
KT
1613
1614 /* LITTLE-ENDIAN
1615 We are writing a non byte-sized quantity or at a position that is not
1616 at a byte boundary.
1617 |--------|--------|--------| ptr + first_byte
1618 ^ ^
1619 xxx xxxxxxxx xxx< bp>
1620 |______EXPR____|
1621
46a61395 1622 First native_encode_expr EXPR into a temporary buffer and shift each
f663d9ad
KT
1623 byte in the buffer by 'bp' (carrying the bits over as necessary).
1624 |00000000|00xxxxxx|xxxxxxxx| << bp = |000xxxxx|xxxxxxxx|xxx00000|
1625 <------bitlen---->< bp>
1626 Then we clear the destination bits:
1627 |---00000|00000000|000-----| ptr + first_byte
1628 <-------bitlen--->< bp>
1629
1630 Finally we ORR the bytes of the shifted EXPR into the cleared region:
1631 |---xxxxx||xxxxxxxx||xxx-----| ptr + first_byte.
1632
1633 BIG-ENDIAN
1634 We are writing a non byte-sized quantity or at a position that is not
1635 at a byte boundary.
1636 ptr + first_byte |--------|--------|--------|
1637 ^ ^
1638 <bp >xxx xxxxxxxx xxx
1639 |_____EXPR_____|
1640
46a61395 1641 First native_encode_expr EXPR into a temporary buffer and shift each
f663d9ad
KT
1642 byte in the buffer to the right by (carrying the bits over as necessary).
1643 We shift by as much as needed to align the most significant bit of EXPR
1644 with bitpos:
1645 |00xxxxxx|xxxxxxxx| >> 3 = |00000xxx|xxxxxxxx|xxxxx000|
1646 <---bitlen----> <bp ><-----bitlen----->
1647 Then we clear the destination bits:
1648 ptr + first_byte |-----000||00000000||00000---|
1649 <bp ><-------bitlen----->
1650
1651 Finally we ORR the bytes of the shifted EXPR into the cleared region:
1652 ptr + first_byte |---xxxxx||xxxxxxxx||xxx-----|.
1653 The awkwardness comes from the fact that bitpos is counted from the
1654 most significant bit of a byte. */
1655
ef1d3b57
RS
1656 /* We must be dealing with fixed-size data at this point, since the
1657 total size is also fixed. */
1658 fixed_size_mode mode = as_a <fixed_size_mode> (TYPE_MODE (TREE_TYPE (expr)));
f663d9ad 1659 /* Allocate an extra byte so that we have space to shift into. */
ef1d3b57 1660 unsigned int byte_size = GET_MODE_SIZE (mode) + 1;
f663d9ad 1661 unsigned char *tmpbuf = XALLOCAVEC (unsigned char, byte_size);
46a61395 1662 memset (tmpbuf, '\0', byte_size);
f663d9ad
KT
1663 /* The store detection code should only have allowed constants that are
1664 accepted by native_encode_expr. */
2f391428 1665 if (native_encode_expr (expr, tmpbuf, byte_size - 1) == 0)
f663d9ad
KT
1666 gcc_unreachable ();
1667
1668 /* The native_encode_expr machinery uses TYPE_MODE to determine how many
1669 bytes to write. This means it can write more than
1670 ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT bytes (for example
1671 write 8 bytes for a bitlen of 40). Skip the bytes that are not within
1672 bitlen and zero out the bits that are not relevant as well (that may
1673 contain a sign bit due to sign-extension). */
1674 unsigned int padding
1675 = byte_size - ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT - 1;
ad1de652
JJ
1676 /* On big-endian the padding is at the 'front' so just skip the initial
1677 bytes. */
1678 if (BYTES_BIG_ENDIAN)
1679 tmpbuf += padding;
1680
1681 byte_size -= padding;
1682
1683 if (bitlen % BITS_PER_UNIT != 0)
f663d9ad 1684 {
4b2c06f4 1685 if (BYTES_BIG_ENDIAN)
ad1de652
JJ
1686 clear_bit_region_be (tmpbuf, BITS_PER_UNIT - 1,
1687 BITS_PER_UNIT - (bitlen % BITS_PER_UNIT));
1688 else
1689 clear_bit_region (tmpbuf, bitlen,
1690 byte_size * BITS_PER_UNIT - bitlen);
f663d9ad 1691 }
ad1de652
JJ
1692 /* Left shifting relies on the last byte being clear if bitlen is
1693 a multiple of BITS_PER_UNIT, which might not be clear if
1694 there are padding bytes. */
1695 else if (!BYTES_BIG_ENDIAN)
1696 tmpbuf[byte_size - 1] = '\0';
f663d9ad
KT
1697
1698 /* Clear the bit region in PTR where the bits from TMPBUF will be
46a61395 1699 inserted into. */
f663d9ad
KT
1700 if (BYTES_BIG_ENDIAN)
1701 clear_bit_region_be (ptr + first_byte,
1702 BITS_PER_UNIT - 1 - (bitpos % BITS_PER_UNIT), bitlen);
1703 else
1704 clear_bit_region (ptr + first_byte, bitpos % BITS_PER_UNIT, bitlen);
1705
1706 int shift_amnt;
1707 int bitlen_mod = bitlen % BITS_PER_UNIT;
1708 int bitpos_mod = bitpos % BITS_PER_UNIT;
1709
1710 bool skip_byte = false;
1711 if (BYTES_BIG_ENDIAN)
1712 {
1713 /* BITPOS and BITLEN are exactly aligned and no shifting
1714 is necessary. */
1715 if (bitpos_mod + bitlen_mod == BITS_PER_UNIT
1716 || (bitpos_mod == 0 && bitlen_mod == 0))
1717 shift_amnt = 0;
1718 /* |. . . . . . . .|
1719 <bp > <blen >.
1720 We always shift right for BYTES_BIG_ENDIAN so shift the beginning
1721 of the value until it aligns with 'bp' in the next byte over. */
1722 else if (bitpos_mod + bitlen_mod < BITS_PER_UNIT)
1723 {
1724 shift_amnt = bitlen_mod + bitpos_mod;
1725 skip_byte = bitlen_mod != 0;
1726 }
1727 /* |. . . . . . . .|
1728 <----bp--->
1729 <---blen---->.
1730 Shift the value right within the same byte so it aligns with 'bp'. */
1731 else
1732 shift_amnt = bitlen_mod + bitpos_mod - BITS_PER_UNIT;
1733 }
1734 else
1735 shift_amnt = bitpos % BITS_PER_UNIT;
1736
1737 /* Create the shifted version of EXPR. */
1738 if (!BYTES_BIG_ENDIAN)
46a61395
JJ
1739 {
1740 shift_bytes_in_array (tmpbuf, byte_size, shift_amnt);
1741 if (shift_amnt == 0)
1742 byte_size--;
1743 }
f663d9ad
KT
1744 else
1745 {
1746 gcc_assert (BYTES_BIG_ENDIAN);
1747 shift_bytes_in_array_right (tmpbuf, byte_size, shift_amnt);
1748 /* If shifting right forced us to move into the next byte skip the now
1749 empty byte. */
1750 if (skip_byte)
1751 {
1752 tmpbuf++;
1753 byte_size--;
1754 }
1755 }
1756
1757 /* Insert the bits from TMPBUF. */
1758 for (unsigned int i = 0; i < byte_size; i++)
1759 ptr[first_byte + i] |= tmpbuf[i];
1760
1761 return true;
1762}
1763
1764/* Sorting function for store_immediate_info objects.
1765 Sorts them by bitposition. */
1766
1767static int
1768sort_by_bitpos (const void *x, const void *y)
1769{
1770 store_immediate_info *const *tmp = (store_immediate_info * const *) x;
1771 store_immediate_info *const *tmp2 = (store_immediate_info * const *) y;
1772
109cca3b 1773 if ((*tmp)->bitpos < (*tmp2)->bitpos)
f663d9ad
KT
1774 return -1;
1775 else if ((*tmp)->bitpos > (*tmp2)->bitpos)
1776 return 1;
109cca3b 1777 else
0f0027d1
KT
1778 /* If they are the same let's use the order which is guaranteed to
1779 be different. */
1780 return (*tmp)->order - (*tmp2)->order;
f663d9ad
KT
1781}
1782
1783/* Sorting function for store_immediate_info objects.
1784 Sorts them by the order field. */
1785
1786static int
1787sort_by_order (const void *x, const void *y)
1788{
1789 store_immediate_info *const *tmp = (store_immediate_info * const *) x;
1790 store_immediate_info *const *tmp2 = (store_immediate_info * const *) y;
1791
1792 if ((*tmp)->order < (*tmp2)->order)
1793 return -1;
1794 else if ((*tmp)->order > (*tmp2)->order)
1795 return 1;
1796
1797 gcc_unreachable ();
1798}
1799
1800/* Initialize a merged_store_group object from a store_immediate_info
1801 object. */
1802
1803merged_store_group::merged_store_group (store_immediate_info *info)
1804{
1805 start = info->bitpos;
1806 width = info->bitsize;
a62b3dc5
JJ
1807 bitregion_start = info->bitregion_start;
1808 bitregion_end = info->bitregion_end;
f663d9ad
KT
1809 /* VAL has memory allocated for it in apply_stores once the group
1810 width has been finalized. */
1811 val = NULL;
a62b3dc5
JJ
1812 mask = NULL;
1813 unsigned HOST_WIDE_INT align_bitpos = 0;
1814 get_object_alignment_1 (gimple_assign_lhs (info->stmt),
1815 &align, &align_bitpos);
1816 align_base = start - align_bitpos;
245f6de1
JJ
1817 for (int i = 0; i < 2; ++i)
1818 {
1819 store_operand_info &op = info->ops[i];
1820 if (op.base_addr == NULL_TREE)
1821 {
1822 load_align[i] = 0;
1823 load_align_base[i] = 0;
1824 }
1825 else
1826 {
1827 get_object_alignment_1 (op.val, &load_align[i], &align_bitpos);
1828 load_align_base[i] = op.bitpos - align_bitpos;
1829 }
1830 }
f663d9ad
KT
1831 stores.create (1);
1832 stores.safe_push (info);
1833 last_stmt = info->stmt;
1834 last_order = info->order;
1835 first_stmt = last_stmt;
1836 first_order = last_order;
1837 buf_size = 0;
1838}
1839
1840merged_store_group::~merged_store_group ()
1841{
1842 if (val)
1843 XDELETEVEC (val);
1844}
1845
a62b3dc5
JJ
1846/* Helper method for merge_into and merge_overlapping to do
1847 the common part. */
f663d9ad 1848void
a62b3dc5 1849merged_store_group::do_merge (store_immediate_info *info)
f663d9ad 1850{
a62b3dc5
JJ
1851 bitregion_start = MIN (bitregion_start, info->bitregion_start);
1852 bitregion_end = MAX (bitregion_end, info->bitregion_end);
1853
1854 unsigned int this_align;
1855 unsigned HOST_WIDE_INT align_bitpos = 0;
1856 get_object_alignment_1 (gimple_assign_lhs (info->stmt),
1857 &this_align, &align_bitpos);
1858 if (this_align > align)
1859 {
1860 align = this_align;
1861 align_base = info->bitpos - align_bitpos;
1862 }
245f6de1
JJ
1863 for (int i = 0; i < 2; ++i)
1864 {
1865 store_operand_info &op = info->ops[i];
1866 if (!op.base_addr)
1867 continue;
1868
1869 get_object_alignment_1 (op.val, &this_align, &align_bitpos);
1870 if (this_align > load_align[i])
1871 {
1872 load_align[i] = this_align;
1873 load_align_base[i] = op.bitpos - align_bitpos;
1874 }
1875 }
f663d9ad 1876
f663d9ad
KT
1877 gimple *stmt = info->stmt;
1878 stores.safe_push (info);
1879 if (info->order > last_order)
1880 {
1881 last_order = info->order;
1882 last_stmt = stmt;
1883 }
1884 else if (info->order < first_order)
1885 {
1886 first_order = info->order;
1887 first_stmt = stmt;
1888 }
1889}
1890
a62b3dc5
JJ
1891/* Merge a store recorded by INFO into this merged store.
1892 The store is not overlapping with the existing recorded
1893 stores. */
1894
1895void
1896merged_store_group::merge_into (store_immediate_info *info)
1897{
1898 unsigned HOST_WIDE_INT wid = info->bitsize;
1899 /* Make sure we're inserting in the position we think we're inserting. */
1900 gcc_assert (info->bitpos >= start + width
1901 && info->bitregion_start <= bitregion_end);
1902
1903 width += wid;
1904 do_merge (info);
1905}
1906
f663d9ad
KT
1907/* Merge a store described by INFO into this merged store.
1908 INFO overlaps in some way with the current store (i.e. it's not contiguous
1909 which is handled by merged_store_group::merge_into). */
1910
1911void
1912merged_store_group::merge_overlapping (store_immediate_info *info)
1913{
f663d9ad 1914 /* If the store extends the size of the group, extend the width. */
a62b3dc5 1915 if (info->bitpos + info->bitsize > start + width)
f663d9ad
KT
1916 width += info->bitpos + info->bitsize - (start + width);
1917
a62b3dc5 1918 do_merge (info);
f663d9ad
KT
1919}
1920
1921/* Go through all the recorded stores in this group in program order and
1922 apply their values to the VAL byte array to create the final merged
1923 value. Return true if the operation succeeded. */
1924
1925bool
1926merged_store_group::apply_stores ()
1927{
a62b3dc5
JJ
1928 /* Make sure we have more than one store in the group, otherwise we cannot
1929 merge anything. */
1930 if (bitregion_start % BITS_PER_UNIT != 0
1931 || bitregion_end % BITS_PER_UNIT != 0
f663d9ad
KT
1932 || stores.length () == 1)
1933 return false;
1934
1935 stores.qsort (sort_by_order);
a62b3dc5 1936 store_immediate_info *info;
f663d9ad
KT
1937 unsigned int i;
1938 /* Create a buffer of a size that is 2 times the number of bytes we're
1939 storing. That way native_encode_expr can write power-of-2-sized
1940 chunks without overrunning. */
a62b3dc5
JJ
1941 buf_size = 2 * ((bitregion_end - bitregion_start) / BITS_PER_UNIT);
1942 val = XNEWVEC (unsigned char, 2 * buf_size);
1943 mask = val + buf_size;
1944 memset (val, 0, buf_size);
1945 memset (mask, ~0U, buf_size);
f663d9ad
KT
1946
1947 FOR_EACH_VEC_ELT (stores, i, info)
1948 {
a62b3dc5 1949 unsigned int pos_in_buffer = info->bitpos - bitregion_start;
245f6de1
JJ
1950 tree cst = NULL_TREE;
1951 if (info->ops[0].val && info->ops[0].base_addr == NULL_TREE)
1952 cst = info->ops[0].val;
1953 else if (info->ops[1].val && info->ops[1].base_addr == NULL_TREE)
1954 cst = info->ops[1].val;
1955 bool ret = true;
1956 if (cst)
1957 ret = encode_tree_to_bitpos (cst, val, info->bitsize,
1958 pos_in_buffer, buf_size);
1959 if (cst && dump_file && (dump_flags & TDF_DETAILS))
f663d9ad
KT
1960 {
1961 if (ret)
1962 {
1963 fprintf (dump_file, "After writing ");
245f6de1 1964 print_generic_expr (dump_file, cst, 0);
f663d9ad 1965 fprintf (dump_file, " of size " HOST_WIDE_INT_PRINT_DEC
4b84d9b8
JJ
1966 " at position %d the merged region contains:\n",
1967 info->bitsize, pos_in_buffer);
f663d9ad
KT
1968 dump_char_array (dump_file, val, buf_size);
1969 }
1970 else
1971 fprintf (dump_file, "Failed to merge stores\n");
4b84d9b8 1972 }
f663d9ad
KT
1973 if (!ret)
1974 return false;
a62b3dc5
JJ
1975 unsigned char *m = mask + (pos_in_buffer / BITS_PER_UNIT);
1976 if (BYTES_BIG_ENDIAN)
4403d99a
JJ
1977 clear_bit_region_be (m, (BITS_PER_UNIT - 1
1978 - (pos_in_buffer % BITS_PER_UNIT)),
1979 info->bitsize);
a62b3dc5
JJ
1980 else
1981 clear_bit_region (m, pos_in_buffer % BITS_PER_UNIT, info->bitsize);
f663d9ad 1982 }
4b84d9b8 1983 stores.qsort (sort_by_bitpos);
f663d9ad
KT
1984 return true;
1985}
1986
1987/* Structure describing the store chain. */
1988
1989struct imm_store_chain_info
1990{
50b6d676
AO
1991 /* Doubly-linked list that imposes an order on chain processing.
1992 PNXP (prev's next pointer) points to the head of a list, or to
1993 the next field in the previous chain in the list.
1994 See pass_store_merging::m_stores_head for more rationale. */
1995 imm_store_chain_info *next, **pnxp;
b5926e23 1996 tree base_addr;
a62b3dc5 1997 auto_vec<store_immediate_info *> m_store_info;
f663d9ad
KT
1998 auto_vec<merged_store_group *> m_merged_store_groups;
1999
50b6d676
AO
2000 imm_store_chain_info (imm_store_chain_info *&inspt, tree b_a)
2001 : next (inspt), pnxp (&inspt), base_addr (b_a)
2002 {
2003 inspt = this;
2004 if (next)
2005 {
2006 gcc_checking_assert (pnxp == next->pnxp);
2007 next->pnxp = &next;
2008 }
2009 }
2010 ~imm_store_chain_info ()
2011 {
2012 *pnxp = next;
2013 if (next)
2014 {
2015 gcc_checking_assert (&next == next->pnxp);
2016 next->pnxp = pnxp;
2017 }
2018 }
b5926e23 2019 bool terminate_and_process_chain ();
4b84d9b8 2020 bool try_coalesce_bswap (merged_store_group *, unsigned int, unsigned int);
f663d9ad 2021 bool coalesce_immediate_stores ();
b5926e23
RB
2022 bool output_merged_store (merged_store_group *);
2023 bool output_merged_stores ();
f663d9ad
KT
2024};
2025
2026const pass_data pass_data_tree_store_merging = {
2027 GIMPLE_PASS, /* type */
2028 "store-merging", /* name */
2029 OPTGROUP_NONE, /* optinfo_flags */
2030 TV_GIMPLE_STORE_MERGING, /* tv_id */
2031 PROP_ssa, /* properties_required */
2032 0, /* properties_provided */
2033 0, /* properties_destroyed */
2034 0, /* todo_flags_start */
2035 TODO_update_ssa, /* todo_flags_finish */
2036};
2037
2038class pass_store_merging : public gimple_opt_pass
2039{
2040public:
2041 pass_store_merging (gcc::context *ctxt)
faec5f24 2042 : gimple_opt_pass (pass_data_tree_store_merging, ctxt), m_stores_head ()
f663d9ad
KT
2043 {
2044 }
2045
a62b3dc5
JJ
2046 /* Pass not supported for PDP-endianness, nor for insane hosts
2047 or target character sizes where native_{encode,interpret}_expr
2048 doesn't work properly. */
f663d9ad
KT
2049 virtual bool
2050 gate (function *)
2051 {
a62b3dc5
JJ
2052 return flag_store_merging
2053 && WORDS_BIG_ENDIAN == BYTES_BIG_ENDIAN
2054 && CHAR_BIT == 8
2055 && BITS_PER_UNIT == 8;
f663d9ad
KT
2056 }
2057
2058 virtual unsigned int execute (function *);
2059
2060private:
2061 hash_map<tree_operand_hash, struct imm_store_chain_info *> m_stores;
2062
50b6d676
AO
2063 /* Form a doubly-linked stack of the elements of m_stores, so that
2064 we can iterate over them in a predictable way. Using this order
2065 avoids extraneous differences in the compiler output just because
2066 of tree pointer variations (e.g. different chains end up in
2067 different positions of m_stores, so they are handled in different
2068 orders, so they allocate or release SSA names in different
2069 orders, and when they get reused, subsequent passes end up
2070 getting different SSA names, which may ultimately change
2071 decisions when going out of SSA). */
2072 imm_store_chain_info *m_stores_head;
2073
245f6de1 2074 void process_store (gimple *);
f663d9ad 2075 bool terminate_and_process_all_chains ();
383ac8dc 2076 bool terminate_all_aliasing_chains (imm_store_chain_info **, gimple *);
b5926e23 2077 bool terminate_and_release_chain (imm_store_chain_info *);
f663d9ad
KT
2078}; // class pass_store_merging
2079
2080/* Terminate and process all recorded chains. Return true if any changes
2081 were made. */
2082
2083bool
2084pass_store_merging::terminate_and_process_all_chains ()
2085{
f663d9ad 2086 bool ret = false;
50b6d676
AO
2087 while (m_stores_head)
2088 ret |= terminate_and_release_chain (m_stores_head);
2089 gcc_assert (m_stores.elements () == 0);
2090 gcc_assert (m_stores_head == NULL);
f663d9ad
KT
2091
2092 return ret;
2093}
2094
383ac8dc
JJ
2095/* Terminate all chains that are affected by the statement STMT.
2096 CHAIN_INFO is the chain we should ignore from the checks if
2097 non-NULL. */
f663d9ad
KT
2098
2099bool
20770eb8 2100pass_store_merging::terminate_all_aliasing_chains (imm_store_chain_info
b5926e23 2101 **chain_info,
f663d9ad
KT
2102 gimple *stmt)
2103{
2104 bool ret = false;
2105
2106 /* If the statement doesn't touch memory it can't alias. */
2107 if (!gimple_vuse (stmt))
2108 return false;
2109
9e875fd8 2110 tree store_lhs = gimple_store_p (stmt) ? gimple_get_lhs (stmt) : NULL_TREE;
383ac8dc 2111 for (imm_store_chain_info *next = m_stores_head, *cur = next; cur; cur = next)
f663d9ad 2112 {
383ac8dc
JJ
2113 next = cur->next;
2114
2115 /* We already checked all the stores in chain_info and terminated the
2116 chain if necessary. Skip it here. */
2117 if (chain_info && *chain_info == cur)
2118 continue;
2119
245f6de1
JJ
2120 store_immediate_info *info;
2121 unsigned int i;
383ac8dc 2122 FOR_EACH_VEC_ELT (cur->m_store_info, i, info)
f663d9ad 2123 {
9e875fd8
JJ
2124 tree lhs = gimple_assign_lhs (info->stmt);
2125 if (ref_maybe_used_by_stmt_p (stmt, lhs)
2126 || stmt_may_clobber_ref_p (stmt, lhs)
2127 || (store_lhs && refs_output_dependent_p (store_lhs, lhs)))
f663d9ad 2128 {
245f6de1 2129 if (dump_file && (dump_flags & TDF_DETAILS))
f663d9ad 2130 {
245f6de1
JJ
2131 fprintf (dump_file, "stmt causes chain termination:\n");
2132 print_gimple_stmt (dump_file, stmt, 0);
f663d9ad 2133 }
383ac8dc 2134 terminate_and_release_chain (cur);
245f6de1
JJ
2135 ret = true;
2136 break;
f663d9ad
KT
2137 }
2138 }
2139 }
2140
f663d9ad
KT
2141 return ret;
2142}
2143
2144/* Helper function. Terminate the recorded chain storing to base object
2145 BASE. Return true if the merging and output was successful. The m_stores
2146 entry is removed after the processing in any case. */
2147
2148bool
b5926e23 2149pass_store_merging::terminate_and_release_chain (imm_store_chain_info *chain_info)
f663d9ad 2150{
b5926e23
RB
2151 bool ret = chain_info->terminate_and_process_chain ();
2152 m_stores.remove (chain_info->base_addr);
2153 delete chain_info;
f663d9ad
KT
2154 return ret;
2155}
2156
245f6de1
JJ
2157/* Return true if stmts in between FIRST (inclusive) and LAST (exclusive)
2158 may clobber REF. FIRST and LAST must be in the same basic block and
4b84d9b8
JJ
2159 have non-NULL vdef. We want to be able to sink load of REF across
2160 stores between FIRST and LAST, up to right before LAST. */
245f6de1
JJ
2161
2162bool
2163stmts_may_clobber_ref_p (gimple *first, gimple *last, tree ref)
2164{
2165 ao_ref r;
2166 ao_ref_init (&r, ref);
2167 unsigned int count = 0;
2168 tree vop = gimple_vdef (last);
2169 gimple *stmt;
2170
2171 gcc_checking_assert (gimple_bb (first) == gimple_bb (last));
2172 do
2173 {
2174 stmt = SSA_NAME_DEF_STMT (vop);
2175 if (stmt_may_clobber_ref_p_1 (stmt, &r))
2176 return true;
4b84d9b8
JJ
2177 if (gimple_store_p (stmt)
2178 && refs_anti_dependent_p (ref, gimple_get_lhs (stmt)))
2179 return true;
245f6de1
JJ
2180 /* Avoid quadratic compile time by bounding the number of checks
2181 we perform. */
2182 if (++count > MAX_STORE_ALIAS_CHECKS)
2183 return true;
2184 vop = gimple_vuse (stmt);
2185 }
2186 while (stmt != first);
2187 return false;
2188}
2189
2190/* Return true if INFO->ops[IDX] is mergeable with the
2191 corresponding loads already in MERGED_STORE group.
2192 BASE_ADDR is the base address of the whole store group. */
2193
2194bool
2195compatible_load_p (merged_store_group *merged_store,
2196 store_immediate_info *info,
2197 tree base_addr, int idx)
2198{
2199 store_immediate_info *infof = merged_store->stores[0];
2200 if (!info->ops[idx].base_addr
2201 || (info->ops[idx].bitpos - infof->ops[idx].bitpos
2202 != info->bitpos - infof->bitpos)
2203 || !operand_equal_p (info->ops[idx].base_addr,
2204 infof->ops[idx].base_addr, 0))
2205 return false;
2206
2207 store_immediate_info *infol = merged_store->stores.last ();
2208 tree load_vuse = gimple_vuse (info->ops[idx].stmt);
2209 /* In this case all vuses should be the same, e.g.
2210 _1 = s.a; _2 = s.b; _3 = _1 | 1; t.a = _3; _4 = _2 | 2; t.b = _4;
2211 or
2212 _1 = s.a; _2 = s.b; t.a = _1; t.b = _2;
2213 and we can emit the coalesced load next to any of those loads. */
2214 if (gimple_vuse (infof->ops[idx].stmt) == load_vuse
2215 && gimple_vuse (infol->ops[idx].stmt) == load_vuse)
2216 return true;
2217
2218 /* Otherwise, at least for now require that the load has the same
2219 vuse as the store. See following examples. */
2220 if (gimple_vuse (info->stmt) != load_vuse)
2221 return false;
2222
2223 if (gimple_vuse (infof->stmt) != gimple_vuse (infof->ops[idx].stmt)
2224 || (infof != infol
2225 && gimple_vuse (infol->stmt) != gimple_vuse (infol->ops[idx].stmt)))
2226 return false;
2227
2228 /* If the load is from the same location as the store, already
2229 the construction of the immediate chain info guarantees no intervening
2230 stores, so no further checks are needed. Example:
2231 _1 = s.a; _2 = _1 & -7; s.a = _2; _3 = s.b; _4 = _3 & -7; s.b = _4; */
2232 if (info->ops[idx].bitpos == info->bitpos
2233 && operand_equal_p (info->ops[idx].base_addr, base_addr, 0))
2234 return true;
2235
2236 /* Otherwise, we need to punt if any of the loads can be clobbered by any
2237 of the stores in the group, or any other stores in between those.
2238 Previous calls to compatible_load_p ensured that for all the
2239 merged_store->stores IDX loads, no stmts starting with
2240 merged_store->first_stmt and ending right before merged_store->last_stmt
2241 clobbers those loads. */
2242 gimple *first = merged_store->first_stmt;
2243 gimple *last = merged_store->last_stmt;
2244 unsigned int i;
2245 store_immediate_info *infoc;
2246 /* The stores are sorted by increasing store bitpos, so if info->stmt store
2247 comes before the so far first load, we'll be changing
2248 merged_store->first_stmt. In that case we need to give up if
2249 any of the earlier processed loads clobber with the stmts in the new
2250 range. */
2251 if (info->order < merged_store->first_order)
2252 {
2253 FOR_EACH_VEC_ELT (merged_store->stores, i, infoc)
2254 if (stmts_may_clobber_ref_p (info->stmt, first, infoc->ops[idx].val))
2255 return false;
2256 first = info->stmt;
2257 }
2258 /* Similarly, we could change merged_store->last_stmt, so ensure
2259 in that case no stmts in the new range clobber any of the earlier
2260 processed loads. */
2261 else if (info->order > merged_store->last_order)
2262 {
2263 FOR_EACH_VEC_ELT (merged_store->stores, i, infoc)
2264 if (stmts_may_clobber_ref_p (last, info->stmt, infoc->ops[idx].val))
2265 return false;
2266 last = info->stmt;
2267 }
2268 /* And finally, we'd be adding a new load to the set, ensure it isn't
2269 clobbered in the new range. */
2270 if (stmts_may_clobber_ref_p (first, last, info->ops[idx].val))
2271 return false;
2272
2273 /* Otherwise, we are looking for:
2274 _1 = s.a; _2 = _1 ^ 15; t.a = _2; _3 = s.b; _4 = _3 ^ 15; t.b = _4;
2275 or
2276 _1 = s.a; t.a = _1; _2 = s.b; t.b = _2; */
2277 return true;
2278}
2279
4b84d9b8
JJ
2280/* Add all refs loaded to compute VAL to REFS vector. */
2281
2282void
2283gather_bswap_load_refs (vec<tree> *refs, tree val)
2284{
2285 if (TREE_CODE (val) != SSA_NAME)
2286 return;
2287
2288 gimple *stmt = SSA_NAME_DEF_STMT (val);
2289 if (!is_gimple_assign (stmt))
2290 return;
2291
2292 if (gimple_assign_load_p (stmt))
2293 {
2294 refs->safe_push (gimple_assign_rhs1 (stmt));
2295 return;
2296 }
2297
2298 switch (gimple_assign_rhs_class (stmt))
2299 {
2300 case GIMPLE_BINARY_RHS:
2301 gather_bswap_load_refs (refs, gimple_assign_rhs2 (stmt));
2302 /* FALLTHRU */
2303 case GIMPLE_UNARY_RHS:
2304 gather_bswap_load_refs (refs, gimple_assign_rhs1 (stmt));
2305 break;
2306 default:
2307 gcc_unreachable ();
2308 }
2309}
2310
2311/* Return true if m_store_info[first] and at least one following store
2312 form a group which store try_size bitsize value which is byte swapped
2313 from a memory load or some value, or identity from some value.
2314 This uses the bswap pass APIs. */
2315
2316bool
2317imm_store_chain_info::try_coalesce_bswap (merged_store_group *merged_store,
2318 unsigned int first,
2319 unsigned int try_size)
2320{
2321 unsigned int len = m_store_info.length (), last = first;
2322 unsigned HOST_WIDE_INT width = m_store_info[first]->bitsize;
2323 if (width >= try_size)
2324 return false;
2325 for (unsigned int i = first + 1; i < len; ++i)
2326 {
2327 if (m_store_info[i]->bitpos != m_store_info[first]->bitpos + width
2328 || m_store_info[i]->ins_stmt == NULL)
2329 return false;
2330 width += m_store_info[i]->bitsize;
2331 if (width >= try_size)
2332 {
2333 last = i;
2334 break;
2335 }
2336 }
2337 if (width != try_size)
2338 return false;
2339
2340 bool allow_unaligned
2341 = !STRICT_ALIGNMENT && PARAM_VALUE (PARAM_STORE_MERGING_ALLOW_UNALIGNED);
2342 /* Punt if the combined store would not be aligned and we need alignment. */
2343 if (!allow_unaligned)
2344 {
2345 unsigned int align = merged_store->align;
2346 unsigned HOST_WIDE_INT align_base = merged_store->align_base;
2347 for (unsigned int i = first + 1; i <= last; ++i)
2348 {
2349 unsigned int this_align;
2350 unsigned HOST_WIDE_INT align_bitpos = 0;
2351 get_object_alignment_1 (gimple_assign_lhs (m_store_info[i]->stmt),
2352 &this_align, &align_bitpos);
2353 if (this_align > align)
2354 {
2355 align = this_align;
2356 align_base = m_store_info[i]->bitpos - align_bitpos;
2357 }
2358 }
2359 unsigned HOST_WIDE_INT align_bitpos
2360 = (m_store_info[first]->bitpos - align_base) & (align - 1);
2361 if (align_bitpos)
2362 align = least_bit_hwi (align_bitpos);
2363 if (align < try_size)
2364 return false;
2365 }
2366
2367 tree type;
2368 switch (try_size)
2369 {
2370 case 16: type = uint16_type_node; break;
2371 case 32: type = uint32_type_node; break;
2372 case 64: type = uint64_type_node; break;
2373 default: gcc_unreachable ();
2374 }
2375 struct symbolic_number n;
2376 gimple *ins_stmt = NULL;
2377 int vuse_store = -1;
2378 unsigned int first_order = merged_store->first_order;
2379 unsigned int last_order = merged_store->last_order;
2380 gimple *first_stmt = merged_store->first_stmt;
2381 gimple *last_stmt = merged_store->last_stmt;
2382 store_immediate_info *infof = m_store_info[first];
2383
2384 for (unsigned int i = first; i <= last; ++i)
2385 {
2386 store_immediate_info *info = m_store_info[i];
2387 struct symbolic_number this_n = info->n;
2388 this_n.type = type;
2389 if (!this_n.base_addr)
2390 this_n.range = try_size / BITS_PER_UNIT;
30fa8e9c
JJ
2391 else
2392 /* Update vuse in case it has changed by output_merged_stores. */
2393 this_n.vuse = gimple_vuse (info->ins_stmt);
4b84d9b8
JJ
2394 unsigned int bitpos = info->bitpos - infof->bitpos;
2395 if (!do_shift_rotate (LSHIFT_EXPR, &this_n,
2396 BYTES_BIG_ENDIAN
2397 ? try_size - info->bitsize - bitpos
2398 : bitpos))
2399 return false;
aa11164a 2400 if (this_n.base_addr && vuse_store)
4b84d9b8
JJ
2401 {
2402 unsigned int j;
2403 for (j = first; j <= last; ++j)
2404 if (this_n.vuse == gimple_vuse (m_store_info[j]->stmt))
2405 break;
2406 if (j > last)
2407 {
2408 if (vuse_store == 1)
2409 return false;
2410 vuse_store = 0;
2411 }
2412 }
2413 if (i == first)
2414 {
2415 n = this_n;
2416 ins_stmt = info->ins_stmt;
2417 }
2418 else
2419 {
2420 if (n.base_addr)
2421 {
2422 if (n.vuse != this_n.vuse)
2423 {
2424 if (vuse_store == 0)
2425 return false;
2426 vuse_store = 1;
2427 }
2428 if (info->order > last_order)
2429 {
2430 last_order = info->order;
2431 last_stmt = info->stmt;
2432 }
2433 else if (info->order < first_order)
2434 {
2435 first_order = info->order;
2436 first_stmt = info->stmt;
2437 }
2438 }
2439
2440 ins_stmt = perform_symbolic_merge (ins_stmt, &n, info->ins_stmt,
2441 &this_n, &n);
2442 if (ins_stmt == NULL)
2443 return false;
2444 }
2445 }
2446
2447 uint64_t cmpxchg, cmpnop;
2448 find_bswap_or_nop_finalize (&n, &cmpxchg, &cmpnop);
2449
2450 /* A complete byte swap should make the symbolic number to start with
2451 the largest digit in the highest order byte. Unchanged symbolic
2452 number indicates a read with same endianness as target architecture. */
2453 if (n.n != cmpnop && n.n != cmpxchg)
2454 return false;
2455
2456 if (n.base_addr == NULL_TREE && !is_gimple_val (n.src))
2457 return false;
2458
2459 /* Don't handle memory copy this way if normal non-bswap processing
2460 would handle it too. */
2461 if (n.n == cmpnop && (unsigned) n.n_ops == last - first + 1)
2462 {
2463 unsigned int i;
2464 for (i = first; i <= last; ++i)
2465 if (m_store_info[i]->rhs_code != MEM_REF)
2466 break;
2467 if (i == last + 1)
2468 return false;
2469 }
2470
2471 if (n.n == cmpxchg)
2472 switch (try_size)
2473 {
2474 case 16:
2475 /* Will emit LROTATE_EXPR. */
2476 break;
2477 case 32:
2478 if (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
2479 && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing)
2480 break;
2481 return false;
2482 case 64:
2483 if (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
2484 && optab_handler (bswap_optab, DImode) != CODE_FOR_nothing)
2485 break;
2486 return false;
2487 default:
2488 gcc_unreachable ();
2489 }
2490
2491 if (!allow_unaligned && n.base_addr)
2492 {
2493 unsigned int align = get_object_alignment (n.src);
2494 if (align < try_size)
2495 return false;
2496 }
2497
2498 /* If each load has vuse of the corresponding store, need to verify
2499 the loads can be sunk right before the last store. */
2500 if (vuse_store == 1)
2501 {
2502 auto_vec<tree, 64> refs;
2503 for (unsigned int i = first; i <= last; ++i)
2504 gather_bswap_load_refs (&refs,
2505 gimple_assign_rhs1 (m_store_info[i]->stmt));
2506
2507 unsigned int i;
2508 tree ref;
2509 FOR_EACH_VEC_ELT (refs, i, ref)
2510 if (stmts_may_clobber_ref_p (first_stmt, last_stmt, ref))
2511 return false;
2512 n.vuse = NULL_TREE;
2513 }
2514
2515 infof->n = n;
2516 infof->ins_stmt = ins_stmt;
2517 for (unsigned int i = first; i <= last; ++i)
2518 {
2519 m_store_info[i]->rhs_code = n.n == cmpxchg ? LROTATE_EXPR : NOP_EXPR;
2520 m_store_info[i]->ops[0].base_addr = NULL_TREE;
2521 m_store_info[i]->ops[1].base_addr = NULL_TREE;
2522 if (i != first)
2523 merged_store->merge_into (m_store_info[i]);
2524 }
2525
2526 return true;
2527}
2528
f663d9ad
KT
2529/* Go through the candidate stores recorded in m_store_info and merge them
2530 into merged_store_group objects recorded into m_merged_store_groups
2531 representing the widened stores. Return true if coalescing was successful
2532 and the number of widened stores is fewer than the original number
2533 of stores. */
2534
2535bool
2536imm_store_chain_info::coalesce_immediate_stores ()
2537{
2538 /* Anything less can't be processed. */
2539 if (m_store_info.length () < 2)
2540 return false;
2541
2542 if (dump_file && (dump_flags & TDF_DETAILS))
2543 fprintf (dump_file, "Attempting to coalesce %u stores in chain.\n",
2544 m_store_info.length ());
2545
2546 store_immediate_info *info;
4b84d9b8 2547 unsigned int i, ignore = 0;
f663d9ad
KT
2548
2549 /* Order the stores by the bitposition they write to. */
2550 m_store_info.qsort (sort_by_bitpos);
2551
2552 info = m_store_info[0];
2553 merged_store_group *merged_store = new merged_store_group (info);
2554
2555 FOR_EACH_VEC_ELT (m_store_info, i, info)
2556 {
2557 if (dump_file && (dump_flags & TDF_DETAILS))
2558 {
2559 fprintf (dump_file, "Store %u:\nbitsize:" HOST_WIDE_INT_PRINT_DEC
2560 " bitpos:" HOST_WIDE_INT_PRINT_DEC " val:\n",
2561 i, info->bitsize, info->bitpos);
ef6cb4c7 2562 print_generic_expr (dump_file, gimple_assign_rhs1 (info->stmt));
f663d9ad
KT
2563 fprintf (dump_file, "\n------------\n");
2564 }
2565
4b84d9b8 2566 if (i <= ignore)
f663d9ad
KT
2567 continue;
2568
4b84d9b8
JJ
2569 /* First try to handle group of stores like:
2570 p[0] = data >> 24;
2571 p[1] = data >> 16;
2572 p[2] = data >> 8;
2573 p[3] = data;
2574 using the bswap framework. */
2575 if (info->bitpos == merged_store->start + merged_store->width
2576 && merged_store->stores.length () == 1
2577 && merged_store->stores[0]->ins_stmt != NULL
2578 && info->ins_stmt != NULL)
2579 {
2580 unsigned int try_size;
2581 for (try_size = 64; try_size >= 16; try_size >>= 1)
2582 if (try_coalesce_bswap (merged_store, i - 1, try_size))
2583 break;
2584
2585 if (try_size >= 16)
2586 {
2587 ignore = i + merged_store->stores.length () - 1;
2588 m_merged_store_groups.safe_push (merged_store);
2589 if (ignore < m_store_info.length ())
2590 merged_store = new merged_store_group (m_store_info[ignore]);
2591 else
2592 merged_store = NULL;
2593 continue;
2594 }
2595 }
2596
f663d9ad
KT
2597 /* |---store 1---|
2598 |---store 2---|
4b84d9b8
JJ
2599 Overlapping stores. */
2600 if (IN_RANGE (info->bitpos, merged_store->start,
f663d9ad
KT
2601 merged_store->start + merged_store->width - 1))
2602 {
245f6de1
JJ
2603 /* Only allow overlapping stores of constants. */
2604 if (info->rhs_code == INTEGER_CST
2605 && merged_store->stores[0]->rhs_code == INTEGER_CST)
2606 {
2607 merged_store->merge_overlapping (info);
2608 continue;
2609 }
f663d9ad 2610 }
245f6de1
JJ
2611 /* |---store 1---||---store 2---|
2612 This store is consecutive to the previous one.
2613 Merge it into the current store group. There can be gaps in between
2614 the stores, but there can't be gaps in between bitregions. */
4b84d9b8
JJ
2615 else if (info->rhs_code != LROTATE_EXPR
2616 && info->bitregion_start <= merged_store->bitregion_end
a6fbd154 2617 && info->rhs_code == merged_store->stores[0]->rhs_code)
f663d9ad 2618 {
245f6de1
JJ
2619 store_immediate_info *infof = merged_store->stores[0];
2620
2621 /* All the rhs_code ops that take 2 operands are commutative,
2622 swap the operands if it could make the operands compatible. */
2623 if (infof->ops[0].base_addr
2624 && infof->ops[1].base_addr
2625 && info->ops[0].base_addr
2626 && info->ops[1].base_addr
2627 && (info->ops[1].bitpos - infof->ops[0].bitpos
2628 == info->bitpos - infof->bitpos)
2629 && operand_equal_p (info->ops[1].base_addr,
2630 infof->ops[0].base_addr, 0))
127ef369
JJ
2631 {
2632 std::swap (info->ops[0], info->ops[1]);
2633 info->ops_swapped_p = true;
2634 }
5bfd2f9b
JJ
2635 if ((infof->ops[0].base_addr
2636 ? compatible_load_p (merged_store, info, base_addr, 0)
2637 : !info->ops[0].base_addr)
2638 && (infof->ops[1].base_addr
2639 ? compatible_load_p (merged_store, info, base_addr, 1)
2640 : !info->ops[1].base_addr))
245f6de1
JJ
2641 {
2642 merged_store->merge_into (info);
2643 continue;
2644 }
2645 }
f663d9ad 2646
245f6de1
JJ
2647 /* |---store 1---| <gap> |---store 2---|.
2648 Gap between stores or the rhs not compatible. Start a new group. */
f663d9ad 2649
245f6de1
JJ
2650 /* Try to apply all the stores recorded for the group to determine
2651 the bitpattern they write and discard it if that fails.
2652 This will also reject single-store groups. */
2653 if (!merged_store->apply_stores ())
2654 delete merged_store;
2655 else
2656 m_merged_store_groups.safe_push (merged_store);
f663d9ad 2657
245f6de1 2658 merged_store = new merged_store_group (info);
f663d9ad
KT
2659 }
2660
a62b3dc5 2661 /* Record or discard the last store group. */
4b84d9b8
JJ
2662 if (merged_store)
2663 {
2664 if (!merged_store->apply_stores ())
2665 delete merged_store;
2666 else
2667 m_merged_store_groups.safe_push (merged_store);
2668 }
f663d9ad
KT
2669
2670 gcc_assert (m_merged_store_groups.length () <= m_store_info.length ());
2671 bool success
2672 = !m_merged_store_groups.is_empty ()
2673 && m_merged_store_groups.length () < m_store_info.length ();
2674
2675 if (success && dump_file)
2676 fprintf (dump_file, "Coalescing successful!\n"
a62b3dc5
JJ
2677 "Merged into %u stores\n",
2678 m_merged_store_groups.length ());
f663d9ad
KT
2679
2680 return success;
2681}
2682
245f6de1
JJ
2683/* Return the type to use for the merged stores or loads described by STMTS.
2684 This is needed to get the alias sets right. If IS_LOAD, look for rhs,
2685 otherwise lhs. Additionally set *CLIQUEP and *BASEP to MR_DEPENDENCE_*
2686 of the MEM_REFs if any. */
f663d9ad
KT
2687
2688static tree
245f6de1
JJ
2689get_alias_type_for_stmts (vec<gimple *> &stmts, bool is_load,
2690 unsigned short *cliquep, unsigned short *basep)
f663d9ad
KT
2691{
2692 gimple *stmt;
2693 unsigned int i;
245f6de1
JJ
2694 tree type = NULL_TREE;
2695 tree ret = NULL_TREE;
2696 *cliquep = 0;
2697 *basep = 0;
f663d9ad
KT
2698
2699 FOR_EACH_VEC_ELT (stmts, i, stmt)
2700 {
245f6de1
JJ
2701 tree ref = is_load ? gimple_assign_rhs1 (stmt)
2702 : gimple_assign_lhs (stmt);
2703 tree type1 = reference_alias_ptr_type (ref);
2704 tree base = get_base_address (ref);
f663d9ad 2705
245f6de1
JJ
2706 if (i == 0)
2707 {
2708 if (TREE_CODE (base) == MEM_REF)
2709 {
2710 *cliquep = MR_DEPENDENCE_CLIQUE (base);
2711 *basep = MR_DEPENDENCE_BASE (base);
2712 }
2713 ret = type = type1;
2714 continue;
2715 }
f663d9ad 2716 if (!alias_ptr_types_compatible_p (type, type1))
245f6de1
JJ
2717 ret = ptr_type_node;
2718 if (TREE_CODE (base) != MEM_REF
2719 || *cliquep != MR_DEPENDENCE_CLIQUE (base)
2720 || *basep != MR_DEPENDENCE_BASE (base))
2721 {
2722 *cliquep = 0;
2723 *basep = 0;
2724 }
f663d9ad 2725 }
245f6de1 2726 return ret;
f663d9ad
KT
2727}
2728
2729/* Return the location_t information we can find among the statements
2730 in STMTS. */
2731
2732static location_t
245f6de1 2733get_location_for_stmts (vec<gimple *> &stmts)
f663d9ad
KT
2734{
2735 gimple *stmt;
2736 unsigned int i;
2737
2738 FOR_EACH_VEC_ELT (stmts, i, stmt)
2739 if (gimple_has_location (stmt))
2740 return gimple_location (stmt);
2741
2742 return UNKNOWN_LOCATION;
2743}
2744
2745/* Used to decribe a store resulting from splitting a wide store in smaller
2746 regularly-sized stores in split_group. */
2747
2748struct split_store
2749{
2750 unsigned HOST_WIDE_INT bytepos;
2751 unsigned HOST_WIDE_INT size;
2752 unsigned HOST_WIDE_INT align;
245f6de1 2753 auto_vec<store_immediate_info *> orig_stores;
a62b3dc5
JJ
2754 /* True if there is a single orig stmt covering the whole split store. */
2755 bool orig;
f663d9ad
KT
2756 split_store (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
2757 unsigned HOST_WIDE_INT);
2758};
2759
2760/* Simple constructor. */
2761
2762split_store::split_store (unsigned HOST_WIDE_INT bp,
2763 unsigned HOST_WIDE_INT sz,
2764 unsigned HOST_WIDE_INT al)
a62b3dc5 2765 : bytepos (bp), size (sz), align (al), orig (false)
f663d9ad 2766{
245f6de1 2767 orig_stores.create (0);
f663d9ad
KT
2768}
2769
245f6de1
JJ
2770/* Record all stores in GROUP that write to the region starting at BITPOS and
2771 is of size BITSIZE. Record infos for such statements in STORES if
2772 non-NULL. The stores in GROUP must be sorted by bitposition. Return INFO
2773 if there is exactly one original store in the range. */
f663d9ad 2774
a62b3dc5 2775static store_immediate_info *
245f6de1
JJ
2776find_constituent_stores (struct merged_store_group *group,
2777 vec<store_immediate_info *> *stores,
2778 unsigned int *first,
2779 unsigned HOST_WIDE_INT bitpos,
2780 unsigned HOST_WIDE_INT bitsize)
f663d9ad 2781{
a62b3dc5 2782 store_immediate_info *info, *ret = NULL;
f663d9ad 2783 unsigned int i;
a62b3dc5
JJ
2784 bool second = false;
2785 bool update_first = true;
f663d9ad 2786 unsigned HOST_WIDE_INT end = bitpos + bitsize;
a62b3dc5 2787 for (i = *first; group->stores.iterate (i, &info); ++i)
f663d9ad
KT
2788 {
2789 unsigned HOST_WIDE_INT stmt_start = info->bitpos;
2790 unsigned HOST_WIDE_INT stmt_end = stmt_start + info->bitsize;
a62b3dc5
JJ
2791 if (stmt_end <= bitpos)
2792 {
2793 /* BITPOS passed to this function never decreases from within the
2794 same split_group call, so optimize and don't scan info records
2795 which are known to end before or at BITPOS next time.
2796 Only do it if all stores before this one also pass this. */
2797 if (update_first)
2798 *first = i + 1;
2799 continue;
2800 }
2801 else
2802 update_first = false;
2803
f663d9ad 2804 /* The stores in GROUP are ordered by bitposition so if we're past
a62b3dc5
JJ
2805 the region for this group return early. */
2806 if (stmt_start >= end)
2807 return ret;
2808
245f6de1 2809 if (stores)
a62b3dc5 2810 {
245f6de1 2811 stores->safe_push (info);
a62b3dc5
JJ
2812 if (ret)
2813 {
2814 ret = NULL;
2815 second = true;
2816 }
2817 }
2818 else if (ret)
2819 return NULL;
2820 if (!second)
2821 ret = info;
f663d9ad 2822 }
a62b3dc5 2823 return ret;
f663d9ad
KT
2824}
2825
d7a9512e
JJ
2826/* Return how many SSA_NAMEs used to compute value to store in the INFO
2827 store have multiple uses. If any SSA_NAME has multiple uses, also
2828 count statements needed to compute it. */
2829
2830static unsigned
2831count_multiple_uses (store_immediate_info *info)
2832{
2833 gimple *stmt = info->stmt;
2834 unsigned ret = 0;
2835 switch (info->rhs_code)
2836 {
2837 case INTEGER_CST:
2838 return 0;
2839 case BIT_AND_EXPR:
2840 case BIT_IOR_EXPR:
2841 case BIT_XOR_EXPR:
d60edaba
JJ
2842 if (info->bit_not_p)
2843 {
2844 if (!has_single_use (gimple_assign_rhs1 (stmt)))
2845 ret = 1; /* Fall through below to return
2846 the BIT_NOT_EXPR stmt and then
2847 BIT_{AND,IOR,XOR}_EXPR and anything it
2848 uses. */
2849 else
2850 /* stmt is after this the BIT_NOT_EXPR. */
2851 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
2852 }
d7a9512e
JJ
2853 if (!has_single_use (gimple_assign_rhs1 (stmt)))
2854 {
2855 ret += 1 + info->ops[0].bit_not_p;
2856 if (info->ops[1].base_addr)
2857 ret += 1 + info->ops[1].bit_not_p;
2858 return ret + 1;
2859 }
2860 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
2861 /* stmt is now the BIT_*_EXPR. */
2862 if (!has_single_use (gimple_assign_rhs1 (stmt)))
127ef369
JJ
2863 ret += 1 + info->ops[info->ops_swapped_p].bit_not_p;
2864 else if (info->ops[info->ops_swapped_p].bit_not_p)
d7a9512e
JJ
2865 {
2866 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
2867 if (!has_single_use (gimple_assign_rhs1 (stmt2)))
2868 ++ret;
2869 }
2870 if (info->ops[1].base_addr == NULL_TREE)
127ef369
JJ
2871 {
2872 gcc_checking_assert (!info->ops_swapped_p);
2873 return ret;
2874 }
d7a9512e 2875 if (!has_single_use (gimple_assign_rhs2 (stmt)))
127ef369
JJ
2876 ret += 1 + info->ops[1 - info->ops_swapped_p].bit_not_p;
2877 else if (info->ops[1 - info->ops_swapped_p].bit_not_p)
d7a9512e
JJ
2878 {
2879 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
2880 if (!has_single_use (gimple_assign_rhs1 (stmt2)))
2881 ++ret;
2882 }
2883 return ret;
2884 case MEM_REF:
2885 if (!has_single_use (gimple_assign_rhs1 (stmt)))
2886 return 1 + info->ops[0].bit_not_p;
2887 else if (info->ops[0].bit_not_p)
2888 {
2889 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
2890 if (!has_single_use (gimple_assign_rhs1 (stmt)))
2891 return 1;
2892 }
2893 return 0;
2894 default:
2895 gcc_unreachable ();
2896 }
2897}
2898
f663d9ad 2899/* Split a merged store described by GROUP by populating the SPLIT_STORES
a62b3dc5
JJ
2900 vector (if non-NULL) with split_store structs describing the byte offset
2901 (from the base), the bit size and alignment of each store as well as the
2902 original statements involved in each such split group.
f663d9ad
KT
2903 This is to separate the splitting strategy from the statement
2904 building/emission/linking done in output_merged_store.
a62b3dc5 2905 Return number of new stores.
245f6de1
JJ
2906 If ALLOW_UNALIGNED_STORE is false, then all stores must be aligned.
2907 If ALLOW_UNALIGNED_LOAD is false, then all loads must be aligned.
a62b3dc5
JJ
2908 If SPLIT_STORES is NULL, it is just a dry run to count number of
2909 new stores. */
f663d9ad 2910
a62b3dc5 2911static unsigned int
245f6de1
JJ
2912split_group (merged_store_group *group, bool allow_unaligned_store,
2913 bool allow_unaligned_load,
d7a9512e
JJ
2914 vec<struct split_store *> *split_stores,
2915 unsigned *total_orig,
2916 unsigned *total_new)
f663d9ad 2917{
a62b3dc5
JJ
2918 unsigned HOST_WIDE_INT pos = group->bitregion_start;
2919 unsigned HOST_WIDE_INT size = group->bitregion_end - pos;
f663d9ad 2920 unsigned HOST_WIDE_INT bytepos = pos / BITS_PER_UNIT;
a62b3dc5
JJ
2921 unsigned HOST_WIDE_INT group_align = group->align;
2922 unsigned HOST_WIDE_INT align_base = group->align_base;
245f6de1 2923 unsigned HOST_WIDE_INT group_load_align = group_align;
d7a9512e 2924 bool any_orig = false;
f663d9ad 2925
f663d9ad
KT
2926 gcc_assert ((size % BITS_PER_UNIT == 0) && (pos % BITS_PER_UNIT == 0));
2927
4b84d9b8
JJ
2928 if (group->stores[0]->rhs_code == LROTATE_EXPR
2929 || group->stores[0]->rhs_code == NOP_EXPR)
2930 {
2931 /* For bswap framework using sets of stores, all the checking
2932 has been done earlier in try_coalesce_bswap and needs to be
2933 emitted as a single store. */
2934 if (total_orig)
2935 {
2936 /* Avoid the old/new stmt count heuristics. It should be
2937 always beneficial. */
2938 total_new[0] = 1;
2939 total_orig[0] = 2;
2940 }
2941
2942 if (split_stores)
2943 {
2944 unsigned HOST_WIDE_INT align_bitpos
2945 = (group->start - align_base) & (group_align - 1);
2946 unsigned HOST_WIDE_INT align = group_align;
2947 if (align_bitpos)
2948 align = least_bit_hwi (align_bitpos);
2949 bytepos = group->start / BITS_PER_UNIT;
2950 struct split_store *store
2951 = new split_store (bytepos, group->width, align);
2952 unsigned int first = 0;
2953 find_constituent_stores (group, &store->orig_stores,
2954 &first, group->start, group->width);
2955 split_stores->safe_push (store);
2956 }
2957
2958 return 1;
2959 }
2960
a62b3dc5 2961 unsigned int ret = 0, first = 0;
f663d9ad 2962 unsigned HOST_WIDE_INT try_pos = bytepos;
f663d9ad 2963
d7a9512e
JJ
2964 if (total_orig)
2965 {
2966 unsigned int i;
2967 store_immediate_info *info = group->stores[0];
2968
2969 total_new[0] = 0;
2970 total_orig[0] = 1; /* The orig store. */
2971 info = group->stores[0];
2972 if (info->ops[0].base_addr)
a6fbd154 2973 total_orig[0]++;
d7a9512e 2974 if (info->ops[1].base_addr)
a6fbd154 2975 total_orig[0]++;
d7a9512e
JJ
2976 switch (info->rhs_code)
2977 {
2978 case BIT_AND_EXPR:
2979 case BIT_IOR_EXPR:
2980 case BIT_XOR_EXPR:
2981 total_orig[0]++; /* The orig BIT_*_EXPR stmt. */
2982 break;
2983 default:
2984 break;
2985 }
2986 total_orig[0] *= group->stores.length ();
2987
2988 FOR_EACH_VEC_ELT (group->stores, i, info)
a6fbd154
JJ
2989 {
2990 total_new[0] += count_multiple_uses (info);
2991 total_orig[0] += (info->bit_not_p
2992 + info->ops[0].bit_not_p
2993 + info->ops[1].bit_not_p);
2994 }
d7a9512e
JJ
2995 }
2996
245f6de1
JJ
2997 if (!allow_unaligned_load)
2998 for (int i = 0; i < 2; ++i)
2999 if (group->load_align[i])
3000 group_load_align = MIN (group_load_align, group->load_align[i]);
3001
f663d9ad
KT
3002 while (size > 0)
3003 {
245f6de1 3004 if ((allow_unaligned_store || group_align <= BITS_PER_UNIT)
a62b3dc5
JJ
3005 && group->mask[try_pos - bytepos] == (unsigned char) ~0U)
3006 {
3007 /* Skip padding bytes. */
3008 ++try_pos;
3009 size -= BITS_PER_UNIT;
3010 continue;
3011 }
3012
f663d9ad 3013 unsigned HOST_WIDE_INT try_bitpos = try_pos * BITS_PER_UNIT;
a62b3dc5
JJ
3014 unsigned int try_size = MAX_STORE_BITSIZE, nonmasked;
3015 unsigned HOST_WIDE_INT align_bitpos
3016 = (try_bitpos - align_base) & (group_align - 1);
3017 unsigned HOST_WIDE_INT align = group_align;
3018 if (align_bitpos)
3019 align = least_bit_hwi (align_bitpos);
245f6de1 3020 if (!allow_unaligned_store)
a62b3dc5 3021 try_size = MIN (try_size, align);
245f6de1
JJ
3022 if (!allow_unaligned_load)
3023 {
3024 /* If we can't do or don't want to do unaligned stores
3025 as well as loads, we need to take the loads into account
3026 as well. */
3027 unsigned HOST_WIDE_INT load_align = group_load_align;
3028 align_bitpos = (try_bitpos - align_base) & (load_align - 1);
3029 if (align_bitpos)
3030 load_align = least_bit_hwi (align_bitpos);
3031 for (int i = 0; i < 2; ++i)
3032 if (group->load_align[i])
3033 {
3034 align_bitpos = try_bitpos - group->stores[0]->bitpos;
3035 align_bitpos += group->stores[0]->ops[i].bitpos;
3036 align_bitpos -= group->load_align_base[i];
3037 align_bitpos &= (group_load_align - 1);
3038 if (align_bitpos)
3039 {
3040 unsigned HOST_WIDE_INT a = least_bit_hwi (align_bitpos);
3041 load_align = MIN (load_align, a);
3042 }
3043 }
3044 try_size = MIN (try_size, load_align);
3045 }
a62b3dc5 3046 store_immediate_info *info
245f6de1 3047 = find_constituent_stores (group, NULL, &first, try_bitpos, try_size);
a62b3dc5
JJ
3048 if (info)
3049 {
3050 /* If there is just one original statement for the range, see if
3051 we can just reuse the original store which could be even larger
3052 than try_size. */
3053 unsigned HOST_WIDE_INT stmt_end
3054 = ROUND_UP (info->bitpos + info->bitsize, BITS_PER_UNIT);
245f6de1
JJ
3055 info = find_constituent_stores (group, NULL, &first, try_bitpos,
3056 stmt_end - try_bitpos);
a62b3dc5
JJ
3057 if (info && info->bitpos >= try_bitpos)
3058 {
3059 try_size = stmt_end - try_bitpos;
3060 goto found;
3061 }
3062 }
f663d9ad 3063
a62b3dc5
JJ
3064 /* Approximate store bitsize for the case when there are no padding
3065 bits. */
3066 while (try_size > size)
3067 try_size /= 2;
3068 /* Now look for whole padding bytes at the end of that bitsize. */
3069 for (nonmasked = try_size / BITS_PER_UNIT; nonmasked > 0; --nonmasked)
3070 if (group->mask[try_pos - bytepos + nonmasked - 1]
3071 != (unsigned char) ~0U)
3072 break;
3073 if (nonmasked == 0)
3074 {
3075 /* If entire try_size range is padding, skip it. */
3076 try_pos += try_size / BITS_PER_UNIT;
3077 size -= try_size;
3078 continue;
3079 }
3080 /* Otherwise try to decrease try_size if second half, last 3 quarters
3081 etc. are padding. */
3082 nonmasked *= BITS_PER_UNIT;
3083 while (nonmasked <= try_size / 2)
3084 try_size /= 2;
245f6de1 3085 if (!allow_unaligned_store && group_align > BITS_PER_UNIT)
a62b3dc5
JJ
3086 {
3087 /* Now look for whole padding bytes at the start of that bitsize. */
3088 unsigned int try_bytesize = try_size / BITS_PER_UNIT, masked;
3089 for (masked = 0; masked < try_bytesize; ++masked)
3090 if (group->mask[try_pos - bytepos + masked] != (unsigned char) ~0U)
3091 break;
3092 masked *= BITS_PER_UNIT;
3093 gcc_assert (masked < try_size);
3094 if (masked >= try_size / 2)
3095 {
3096 while (masked >= try_size / 2)
3097 {
3098 try_size /= 2;
3099 try_pos += try_size / BITS_PER_UNIT;
3100 size -= try_size;
3101 masked -= try_size;
3102 }
3103 /* Need to recompute the alignment, so just retry at the new
3104 position. */
3105 continue;
3106 }
3107 }
3108
3109 found:
3110 ++ret;
f663d9ad 3111
a62b3dc5
JJ
3112 if (split_stores)
3113 {
3114 struct split_store *store
3115 = new split_store (try_pos, try_size, align);
245f6de1
JJ
3116 info = find_constituent_stores (group, &store->orig_stores,
3117 &first, try_bitpos, try_size);
a62b3dc5
JJ
3118 if (info
3119 && info->bitpos >= try_bitpos
3120 && info->bitpos + info->bitsize <= try_bitpos + try_size)
d7a9512e
JJ
3121 {
3122 store->orig = true;
3123 any_orig = true;
3124 }
a62b3dc5
JJ
3125 split_stores->safe_push (store);
3126 }
3127
3128 try_pos += try_size / BITS_PER_UNIT;
f663d9ad 3129 size -= try_size;
f663d9ad 3130 }
a62b3dc5 3131
d7a9512e
JJ
3132 if (total_orig)
3133 {
a6fbd154
JJ
3134 unsigned int i;
3135 struct split_store *store;
d7a9512e
JJ
3136 /* If we are reusing some original stores and any of the
3137 original SSA_NAMEs had multiple uses, we need to subtract
3138 those now before we add the new ones. */
3139 if (total_new[0] && any_orig)
3140 {
d7a9512e
JJ
3141 FOR_EACH_VEC_ELT (*split_stores, i, store)
3142 if (store->orig)
3143 total_new[0] -= count_multiple_uses (store->orig_stores[0]);
3144 }
3145 total_new[0] += ret; /* The new store. */
3146 store_immediate_info *info = group->stores[0];
3147 if (info->ops[0].base_addr)
a6fbd154 3148 total_new[0] += ret;
d7a9512e 3149 if (info->ops[1].base_addr)
a6fbd154 3150 total_new[0] += ret;
d7a9512e
JJ
3151 switch (info->rhs_code)
3152 {
3153 case BIT_AND_EXPR:
3154 case BIT_IOR_EXPR:
3155 case BIT_XOR_EXPR:
3156 total_new[0] += ret; /* The new BIT_*_EXPR stmt. */
3157 break;
3158 default:
3159 break;
3160 }
a6fbd154
JJ
3161 FOR_EACH_VEC_ELT (*split_stores, i, store)
3162 {
3163 unsigned int j;
3164 bool bit_not_p[3] = { false, false, false };
3165 /* If all orig_stores have certain bit_not_p set, then
3166 we'd use a BIT_NOT_EXPR stmt and need to account for it.
3167 If some orig_stores have certain bit_not_p set, then
3168 we'd use a BIT_XOR_EXPR with a mask and need to account for
3169 it. */
3170 FOR_EACH_VEC_ELT (store->orig_stores, j, info)
3171 {
3172 if (info->ops[0].bit_not_p)
3173 bit_not_p[0] = true;
3174 if (info->ops[1].bit_not_p)
3175 bit_not_p[1] = true;
3176 if (info->bit_not_p)
3177 bit_not_p[2] = true;
3178 }
3179 total_new[0] += bit_not_p[0] + bit_not_p[1] + bit_not_p[2];
3180 }
3181
d7a9512e
JJ
3182 }
3183
a62b3dc5 3184 return ret;
f663d9ad
KT
3185}
3186
a6fbd154
JJ
3187/* Return the operation through which the operand IDX (if < 2) or
3188 result (IDX == 2) should be inverted. If NOP_EXPR, no inversion
3189 is done, if BIT_NOT_EXPR, all bits are inverted, if BIT_XOR_EXPR,
3190 the bits should be xored with mask. */
3191
3192static enum tree_code
3193invert_op (split_store *split_store, int idx, tree int_type, tree &mask)
3194{
3195 unsigned int i;
3196 store_immediate_info *info;
3197 unsigned int cnt = 0;
3198 FOR_EACH_VEC_ELT (split_store->orig_stores, i, info)
3199 {
3200 bool bit_not_p = idx < 2 ? info->ops[idx].bit_not_p : info->bit_not_p;
3201 if (bit_not_p)
3202 ++cnt;
3203 }
3204 mask = NULL_TREE;
3205 if (cnt == 0)
3206 return NOP_EXPR;
3207 if (cnt == split_store->orig_stores.length ())
3208 return BIT_NOT_EXPR;
3209
3210 unsigned HOST_WIDE_INT try_bitpos = split_store->bytepos * BITS_PER_UNIT;
3211 unsigned buf_size = split_store->size / BITS_PER_UNIT;
3212 unsigned char *buf
3213 = XALLOCAVEC (unsigned char, buf_size);
3214 memset (buf, ~0U, buf_size);
3215 FOR_EACH_VEC_ELT (split_store->orig_stores, i, info)
3216 {
3217 bool bit_not_p = idx < 2 ? info->ops[idx].bit_not_p : info->bit_not_p;
3218 if (!bit_not_p)
3219 continue;
3220 /* Clear regions with bit_not_p and invert afterwards, rather than
3221 clear regions with !bit_not_p, so that gaps in between stores aren't
3222 set in the mask. */
3223 unsigned HOST_WIDE_INT bitsize = info->bitsize;
3224 unsigned int pos_in_buffer = 0;
3225 if (info->bitpos < try_bitpos)
3226 {
3227 gcc_assert (info->bitpos + bitsize > try_bitpos);
3228 bitsize -= (try_bitpos - info->bitpos);
3229 }
3230 else
3231 pos_in_buffer = info->bitpos - try_bitpos;
3232 if (pos_in_buffer + bitsize > split_store->size)
3233 bitsize = split_store->size - pos_in_buffer;
3234 unsigned char *p = buf + (pos_in_buffer / BITS_PER_UNIT);
3235 if (BYTES_BIG_ENDIAN)
3236 clear_bit_region_be (p, (BITS_PER_UNIT - 1
3237 - (pos_in_buffer % BITS_PER_UNIT)), bitsize);
3238 else
3239 clear_bit_region (p, pos_in_buffer % BITS_PER_UNIT, bitsize);
3240 }
3241 for (unsigned int i = 0; i < buf_size; ++i)
3242 buf[i] = ~buf[i];
3243 mask = native_interpret_expr (int_type, buf, buf_size);
3244 return BIT_XOR_EXPR;
3245}
3246
f663d9ad
KT
3247/* Given a merged store group GROUP output the widened version of it.
3248 The store chain is against the base object BASE.
3249 Try store sizes of at most MAX_STORE_BITSIZE bits wide and don't output
3250 unaligned stores for STRICT_ALIGNMENT targets or if it's too expensive.
3251 Make sure that the number of statements output is less than the number of
3252 original statements. If a better sequence is possible emit it and
3253 return true. */
3254
3255bool
b5926e23 3256imm_store_chain_info::output_merged_store (merged_store_group *group)
f663d9ad 3257{
a62b3dc5
JJ
3258 unsigned HOST_WIDE_INT start_byte_pos
3259 = group->bitregion_start / BITS_PER_UNIT;
f663d9ad
KT
3260
3261 unsigned int orig_num_stmts = group->stores.length ();
3262 if (orig_num_stmts < 2)
3263 return false;
3264
a62b3dc5 3265 auto_vec<struct split_store *, 32> split_stores;
f663d9ad 3266 split_stores.create (0);
245f6de1 3267 bool allow_unaligned_store
a62b3dc5 3268 = !STRICT_ALIGNMENT && PARAM_VALUE (PARAM_STORE_MERGING_ALLOW_UNALIGNED);
245f6de1
JJ
3269 bool allow_unaligned_load = allow_unaligned_store;
3270 if (allow_unaligned_store)
a62b3dc5
JJ
3271 {
3272 /* If unaligned stores are allowed, see how many stores we'd emit
3273 for unaligned and how many stores we'd emit for aligned stores.
3274 Only use unaligned stores if it allows fewer stores than aligned. */
245f6de1 3275 unsigned aligned_cnt
d7a9512e 3276 = split_group (group, false, allow_unaligned_load, NULL, NULL, NULL);
245f6de1 3277 unsigned unaligned_cnt
d7a9512e 3278 = split_group (group, true, allow_unaligned_load, NULL, NULL, NULL);
a62b3dc5 3279 if (aligned_cnt <= unaligned_cnt)
245f6de1 3280 allow_unaligned_store = false;
a62b3dc5 3281 }
d7a9512e 3282 unsigned total_orig, total_new;
245f6de1 3283 split_group (group, allow_unaligned_store, allow_unaligned_load,
d7a9512e 3284 &split_stores, &total_orig, &total_new);
a62b3dc5
JJ
3285
3286 if (split_stores.length () >= orig_num_stmts)
3287 {
3288 /* We didn't manage to reduce the number of statements. Bail out. */
3289 if (dump_file && (dump_flags & TDF_DETAILS))
d7a9512e
JJ
3290 fprintf (dump_file, "Exceeded original number of stmts (%u)."
3291 " Not profitable to emit new sequence.\n",
3292 orig_num_stmts);
a62b3dc5
JJ
3293 return false;
3294 }
d7a9512e
JJ
3295 if (total_orig <= total_new)
3296 {
3297 /* If number of estimated new statements is above estimated original
3298 statements, bail out too. */
3299 if (dump_file && (dump_flags & TDF_DETAILS))
3300 fprintf (dump_file, "Estimated number of original stmts (%u)"
3301 " not larger than estimated number of new"
3302 " stmts (%u).\n",
3303 total_orig, total_new);
4b84d9b8 3304 return false;
d7a9512e 3305 }
f663d9ad
KT
3306
3307 gimple_stmt_iterator last_gsi = gsi_for_stmt (group->last_stmt);
3308 gimple_seq seq = NULL;
f663d9ad
KT
3309 tree last_vdef, new_vuse;
3310 last_vdef = gimple_vdef (group->last_stmt);
3311 new_vuse = gimple_vuse (group->last_stmt);
4b84d9b8
JJ
3312 tree bswap_res = NULL_TREE;
3313
3314 if (group->stores[0]->rhs_code == LROTATE_EXPR
3315 || group->stores[0]->rhs_code == NOP_EXPR)
3316 {
3317 tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
3318 gimple *ins_stmt = group->stores[0]->ins_stmt;
3319 struct symbolic_number *n = &group->stores[0]->n;
3320 bool bswap = group->stores[0]->rhs_code == LROTATE_EXPR;
3321
3322 switch (n->range)
3323 {
3324 case 16:
3325 load_type = bswap_type = uint16_type_node;
3326 break;
3327 case 32:
3328 load_type = uint32_type_node;
3329 if (bswap)
3330 {
3331 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
3332 bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
3333 }
3334 break;
3335 case 64:
3336 load_type = uint64_type_node;
3337 if (bswap)
3338 {
3339 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
3340 bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
3341 }
3342 break;
3343 default:
3344 gcc_unreachable ();
3345 }
3346
3347 /* If the loads have each vuse of the corresponding store,
3348 we've checked the aliasing already in try_coalesce_bswap and
3349 we want to sink the need load into seq. So need to use new_vuse
3350 on the load. */
30fa8e9c 3351 if (n->base_addr)
4b84d9b8 3352 {
30fa8e9c
JJ
3353 if (n->vuse == NULL)
3354 {
3355 n->vuse = new_vuse;
3356 ins_stmt = NULL;
3357 }
3358 else
3359 /* Update vuse in case it has changed by output_merged_stores. */
3360 n->vuse = gimple_vuse (ins_stmt);
4b84d9b8
JJ
3361 }
3362 bswap_res = bswap_replace (gsi_start (seq), ins_stmt, fndecl,
3363 bswap_type, load_type, n, bswap);
3364 gcc_assert (bswap_res);
3365 }
f663d9ad
KT
3366
3367 gimple *stmt = NULL;
f663d9ad
KT
3368 split_store *split_store;
3369 unsigned int i;
245f6de1 3370 auto_vec<gimple *, 32> orig_stmts;
4b84d9b8
JJ
3371 gimple_seq this_seq;
3372 tree addr = force_gimple_operand_1 (unshare_expr (base_addr), &this_seq,
aa55dc0c 3373 is_gimple_mem_ref_addr, NULL_TREE);
4b84d9b8 3374 gimple_seq_add_seq_without_update (&seq, this_seq);
245f6de1
JJ
3375
3376 tree load_addr[2] = { NULL_TREE, NULL_TREE };
3377 gimple_seq load_seq[2] = { NULL, NULL };
3378 gimple_stmt_iterator load_gsi[2] = { gsi_none (), gsi_none () };
3379 for (int j = 0; j < 2; ++j)
3380 {
3381 store_operand_info &op = group->stores[0]->ops[j];
3382 if (op.base_addr == NULL_TREE)
3383 continue;
3384
3385 store_immediate_info *infol = group->stores.last ();
3386 if (gimple_vuse (op.stmt) == gimple_vuse (infol->ops[j].stmt))
3387 {
97031af7
JJ
3388 /* We can't pick the location randomly; while we've verified
3389 all the loads have the same vuse, they can be still in different
3390 basic blocks and we need to pick the one from the last bb:
3391 int x = q[0];
3392 if (x == N) return;
3393 int y = q[1];
3394 p[0] = x;
3395 p[1] = y;
3396 otherwise if we put the wider load at the q[0] load, we might
3397 segfault if q[1] is not mapped. */
3398 basic_block bb = gimple_bb (op.stmt);
3399 gimple *ostmt = op.stmt;
3400 store_immediate_info *info;
3401 FOR_EACH_VEC_ELT (group->stores, i, info)
3402 {
3403 gimple *tstmt = info->ops[j].stmt;
3404 basic_block tbb = gimple_bb (tstmt);
3405 if (dominated_by_p (CDI_DOMINATORS, tbb, bb))
3406 {
3407 ostmt = tstmt;
3408 bb = tbb;
3409 }
3410 }
3411 load_gsi[j] = gsi_for_stmt (ostmt);
245f6de1
JJ
3412 load_addr[j]
3413 = force_gimple_operand_1 (unshare_expr (op.base_addr),
3414 &load_seq[j], is_gimple_mem_ref_addr,
3415 NULL_TREE);
3416 }
3417 else if (operand_equal_p (base_addr, op.base_addr, 0))
3418 load_addr[j] = addr;
3419 else
3e2927a1 3420 {
3e2927a1
JJ
3421 load_addr[j]
3422 = force_gimple_operand_1 (unshare_expr (op.base_addr),
3423 &this_seq, is_gimple_mem_ref_addr,
3424 NULL_TREE);
3425 gimple_seq_add_seq_without_update (&seq, this_seq);
3426 }
245f6de1
JJ
3427 }
3428
f663d9ad
KT
3429 FOR_EACH_VEC_ELT (split_stores, i, split_store)
3430 {
3431 unsigned HOST_WIDE_INT try_size = split_store->size;
3432 unsigned HOST_WIDE_INT try_pos = split_store->bytepos;
3433 unsigned HOST_WIDE_INT align = split_store->align;
a62b3dc5
JJ
3434 tree dest, src;
3435 location_t loc;
3436 if (split_store->orig)
3437 {
3438 /* If there is just a single constituent store which covers
3439 the whole area, just reuse the lhs and rhs. */
245f6de1
JJ
3440 gimple *orig_stmt = split_store->orig_stores[0]->stmt;
3441 dest = gimple_assign_lhs (orig_stmt);
3442 src = gimple_assign_rhs1 (orig_stmt);
3443 loc = gimple_location (orig_stmt);
a62b3dc5
JJ
3444 }
3445 else
3446 {
245f6de1
JJ
3447 store_immediate_info *info;
3448 unsigned short clique, base;
3449 unsigned int k;
3450 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
3451 orig_stmts.safe_push (info->stmt);
a62b3dc5 3452 tree offset_type
245f6de1
JJ
3453 = get_alias_type_for_stmts (orig_stmts, false, &clique, &base);
3454 loc = get_location_for_stmts (orig_stmts);
3455 orig_stmts.truncate (0);
a62b3dc5
JJ
3456
3457 tree int_type = build_nonstandard_integer_type (try_size, UNSIGNED);
3458 int_type = build_aligned_type (int_type, align);
3459 dest = fold_build2 (MEM_REF, int_type, addr,
3460 build_int_cst (offset_type, try_pos));
245f6de1
JJ
3461 if (TREE_CODE (dest) == MEM_REF)
3462 {
3463 MR_DEPENDENCE_CLIQUE (dest) = clique;
3464 MR_DEPENDENCE_BASE (dest) = base;
3465 }
3466
4b84d9b8
JJ
3467 tree mask = integer_zero_node;
3468 if (!bswap_res)
3469 mask = native_interpret_expr (int_type,
3470 group->mask + try_pos
3471 - start_byte_pos,
3472 group->buf_size);
245f6de1
JJ
3473
3474 tree ops[2];
3475 for (int j = 0;
3476 j < 1 + (split_store->orig_stores[0]->ops[1].val != NULL_TREE);
3477 ++j)
3478 {
3479 store_operand_info &op = split_store->orig_stores[0]->ops[j];
4b84d9b8
JJ
3480 if (bswap_res)
3481 ops[j] = bswap_res;
3482 else if (op.base_addr)
245f6de1
JJ
3483 {
3484 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
3485 orig_stmts.safe_push (info->ops[j].stmt);
3486
3487 offset_type = get_alias_type_for_stmts (orig_stmts, true,
3488 &clique, &base);
3489 location_t load_loc = get_location_for_stmts (orig_stmts);
3490 orig_stmts.truncate (0);
3491
3492 unsigned HOST_WIDE_INT load_align = group->load_align[j];
3493 unsigned HOST_WIDE_INT align_bitpos
3494 = (try_pos * BITS_PER_UNIT
3495 - split_store->orig_stores[0]->bitpos
3496 + op.bitpos) & (load_align - 1);
3497 if (align_bitpos)
3498 load_align = least_bit_hwi (align_bitpos);
3499
3500 tree load_int_type
3501 = build_nonstandard_integer_type (try_size, UNSIGNED);
3502 load_int_type
3503 = build_aligned_type (load_int_type, load_align);
3504
3505 unsigned HOST_WIDE_INT load_pos
3506 = (try_pos * BITS_PER_UNIT
3507 - split_store->orig_stores[0]->bitpos
3508 + op.bitpos) / BITS_PER_UNIT;
3509 ops[j] = fold_build2 (MEM_REF, load_int_type, load_addr[j],
3510 build_int_cst (offset_type, load_pos));
3511 if (TREE_CODE (ops[j]) == MEM_REF)
3512 {
3513 MR_DEPENDENCE_CLIQUE (ops[j]) = clique;
3514 MR_DEPENDENCE_BASE (ops[j]) = base;
3515 }
3516 if (!integer_zerop (mask))
3517 /* The load might load some bits (that will be masked off
3518 later on) uninitialized, avoid -W*uninitialized
3519 warnings in that case. */
3520 TREE_NO_WARNING (ops[j]) = 1;
3521
3522 stmt = gimple_build_assign (make_ssa_name (int_type),
3523 ops[j]);
3524 gimple_set_location (stmt, load_loc);
3525 if (gsi_bb (load_gsi[j]))
3526 {
3527 gimple_set_vuse (stmt, gimple_vuse (op.stmt));
3528 gimple_seq_add_stmt_without_update (&load_seq[j], stmt);
3529 }
3530 else
3531 {
3532 gimple_set_vuse (stmt, new_vuse);
3533 gimple_seq_add_stmt_without_update (&seq, stmt);
3534 }
3535 ops[j] = gimple_assign_lhs (stmt);
a6fbd154
JJ
3536 tree xor_mask;
3537 enum tree_code inv_op
3538 = invert_op (split_store, j, int_type, xor_mask);
3539 if (inv_op != NOP_EXPR)
383ac8dc
JJ
3540 {
3541 stmt = gimple_build_assign (make_ssa_name (int_type),
a6fbd154 3542 inv_op, ops[j], xor_mask);
383ac8dc
JJ
3543 gimple_set_location (stmt, load_loc);
3544 ops[j] = gimple_assign_lhs (stmt);
3545
3546 if (gsi_bb (load_gsi[j]))
3547 gimple_seq_add_stmt_without_update (&load_seq[j],
3548 stmt);
3549 else
3550 gimple_seq_add_stmt_without_update (&seq, stmt);
3551 }
245f6de1
JJ
3552 }
3553 else
3554 ops[j] = native_interpret_expr (int_type,
3555 group->val + try_pos
3556 - start_byte_pos,
3557 group->buf_size);
3558 }
3559
3560 switch (split_store->orig_stores[0]->rhs_code)
3561 {
3562 case BIT_AND_EXPR:
3563 case BIT_IOR_EXPR:
3564 case BIT_XOR_EXPR:
3565 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
3566 {
3567 tree rhs1 = gimple_assign_rhs1 (info->stmt);
3568 orig_stmts.safe_push (SSA_NAME_DEF_STMT (rhs1));
3569 }
3570 location_t bit_loc;
3571 bit_loc = get_location_for_stmts (orig_stmts);
3572 orig_stmts.truncate (0);
3573
3574 stmt
3575 = gimple_build_assign (make_ssa_name (int_type),
3576 split_store->orig_stores[0]->rhs_code,
3577 ops[0], ops[1]);
3578 gimple_set_location (stmt, bit_loc);
3579 /* If there is just one load and there is a separate
3580 load_seq[0], emit the bitwise op right after it. */
3581 if (load_addr[1] == NULL_TREE && gsi_bb (load_gsi[0]))
3582 gimple_seq_add_stmt_without_update (&load_seq[0], stmt);
3583 /* Otherwise, if at least one load is in seq, we need to
3584 emit the bitwise op right before the store. If there
3585 are two loads and are emitted somewhere else, it would
3586 be better to emit the bitwise op as early as possible;
3587 we don't track where that would be possible right now
3588 though. */
3589 else
3590 gimple_seq_add_stmt_without_update (&seq, stmt);
3591 src = gimple_assign_lhs (stmt);
a6fbd154
JJ
3592 tree xor_mask;
3593 enum tree_code inv_op;
3594 inv_op = invert_op (split_store, 2, int_type, xor_mask);
3595 if (inv_op != NOP_EXPR)
d60edaba
JJ
3596 {
3597 stmt = gimple_build_assign (make_ssa_name (int_type),
a6fbd154 3598 inv_op, src, xor_mask);
d60edaba
JJ
3599 gimple_set_location (stmt, bit_loc);
3600 if (load_addr[1] == NULL_TREE && gsi_bb (load_gsi[0]))
3601 gimple_seq_add_stmt_without_update (&load_seq[0], stmt);
3602 else
3603 gimple_seq_add_stmt_without_update (&seq, stmt);
3604 src = gimple_assign_lhs (stmt);
3605 }
245f6de1 3606 break;
4b84d9b8
JJ
3607 case LROTATE_EXPR:
3608 case NOP_EXPR:
3609 src = ops[0];
3610 if (!is_gimple_val (src))
3611 {
3612 stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (src)),
3613 src);
3614 gimple_seq_add_stmt_without_update (&seq, stmt);
3615 src = gimple_assign_lhs (stmt);
3616 }
3617 if (!useless_type_conversion_p (int_type, TREE_TYPE (src)))
3618 {
3619 stmt = gimple_build_assign (make_ssa_name (int_type),
3620 NOP_EXPR, src);
3621 gimple_seq_add_stmt_without_update (&seq, stmt);
3622 src = gimple_assign_lhs (stmt);
3623 }
3624 break;
245f6de1
JJ
3625 default:
3626 src = ops[0];
3627 break;
3628 }
3629
a62b3dc5
JJ
3630 if (!integer_zerop (mask))
3631 {
3632 tree tem = make_ssa_name (int_type);
3633 tree load_src = unshare_expr (dest);
3634 /* The load might load some or all bits uninitialized,
3635 avoid -W*uninitialized warnings in that case.
3636 As optimization, it would be nice if all the bits are
3637 provably uninitialized (no stores at all yet or previous
3638 store a CLOBBER) we'd optimize away the load and replace
3639 it e.g. with 0. */
3640 TREE_NO_WARNING (load_src) = 1;
3641 stmt = gimple_build_assign (tem, load_src);
3642 gimple_set_location (stmt, loc);
3643 gimple_set_vuse (stmt, new_vuse);
3644 gimple_seq_add_stmt_without_update (&seq, stmt);
3645
3646 /* FIXME: If there is a single chunk of zero bits in mask,
3647 perhaps use BIT_INSERT_EXPR instead? */
3648 stmt = gimple_build_assign (make_ssa_name (int_type),
3649 BIT_AND_EXPR, tem, mask);
3650 gimple_set_location (stmt, loc);
3651 gimple_seq_add_stmt_without_update (&seq, stmt);
3652 tem = gimple_assign_lhs (stmt);
3653
245f6de1
JJ
3654 if (TREE_CODE (src) == INTEGER_CST)
3655 src = wide_int_to_tree (int_type,
3656 wi::bit_and_not (wi::to_wide (src),
3657 wi::to_wide (mask)));
3658 else
3659 {
3660 tree nmask
3661 = wide_int_to_tree (int_type,
3662 wi::bit_not (wi::to_wide (mask)));
3663 stmt = gimple_build_assign (make_ssa_name (int_type),
3664 BIT_AND_EXPR, src, nmask);
3665 gimple_set_location (stmt, loc);
3666 gimple_seq_add_stmt_without_update (&seq, stmt);
3667 src = gimple_assign_lhs (stmt);
3668 }
a62b3dc5
JJ
3669 stmt = gimple_build_assign (make_ssa_name (int_type),
3670 BIT_IOR_EXPR, tem, src);
3671 gimple_set_location (stmt, loc);
3672 gimple_seq_add_stmt_without_update (&seq, stmt);
3673 src = gimple_assign_lhs (stmt);
3674 }
3675 }
f663d9ad
KT
3676
3677 stmt = gimple_build_assign (dest, src);
3678 gimple_set_location (stmt, loc);
3679 gimple_set_vuse (stmt, new_vuse);
3680 gimple_seq_add_stmt_without_update (&seq, stmt);
3681
f663d9ad
KT
3682 tree new_vdef;
3683 if (i < split_stores.length () - 1)
a62b3dc5 3684 new_vdef = make_ssa_name (gimple_vop (cfun), stmt);
f663d9ad
KT
3685 else
3686 new_vdef = last_vdef;
3687
3688 gimple_set_vdef (stmt, new_vdef);
3689 SSA_NAME_DEF_STMT (new_vdef) = stmt;
3690 new_vuse = new_vdef;
3691 }
3692
3693 FOR_EACH_VEC_ELT (split_stores, i, split_store)
3694 delete split_store;
3695
f663d9ad
KT
3696 gcc_assert (seq);
3697 if (dump_file)
3698 {
3699 fprintf (dump_file,
3700 "New sequence of %u stmts to replace old one of %u stmts\n",
a62b3dc5 3701 split_stores.length (), orig_num_stmts);
f663d9ad
KT
3702 if (dump_flags & TDF_DETAILS)
3703 print_gimple_seq (dump_file, seq, 0, TDF_VOPS | TDF_MEMSYMS);
3704 }
3705 gsi_insert_seq_after (&last_gsi, seq, GSI_SAME_STMT);
245f6de1
JJ
3706 for (int j = 0; j < 2; ++j)
3707 if (load_seq[j])
3708 gsi_insert_seq_after (&load_gsi[j], load_seq[j], GSI_SAME_STMT);
f663d9ad
KT
3709
3710 return true;
3711}
3712
3713/* Process the merged_store_group objects created in the coalescing phase.
3714 The stores are all against the base object BASE.
3715 Try to output the widened stores and delete the original statements if
3716 successful. Return true iff any changes were made. */
3717
3718bool
b5926e23 3719imm_store_chain_info::output_merged_stores ()
f663d9ad
KT
3720{
3721 unsigned int i;
3722 merged_store_group *merged_store;
3723 bool ret = false;
3724 FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_store)
3725 {
b5926e23 3726 if (output_merged_store (merged_store))
f663d9ad
KT
3727 {
3728 unsigned int j;
3729 store_immediate_info *store;
3730 FOR_EACH_VEC_ELT (merged_store->stores, j, store)
3731 {
3732 gimple *stmt = store->stmt;
3733 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3734 gsi_remove (&gsi, true);
3735 if (stmt != merged_store->last_stmt)
3736 {
3737 unlink_stmt_vdef (stmt);
3738 release_defs (stmt);
3739 }
3740 }
3741 ret = true;
3742 }
3743 }
3744 if (ret && dump_file)
3745 fprintf (dump_file, "Merging successful!\n");
3746
3747 return ret;
3748}
3749
3750/* Coalesce the store_immediate_info objects recorded against the base object
3751 BASE in the first phase and output them.
3752 Delete the allocated structures.
3753 Return true if any changes were made. */
3754
3755bool
b5926e23 3756imm_store_chain_info::terminate_and_process_chain ()
f663d9ad
KT
3757{
3758 /* Process store chain. */
3759 bool ret = false;
3760 if (m_store_info.length () > 1)
3761 {
3762 ret = coalesce_immediate_stores ();
3763 if (ret)
b5926e23 3764 ret = output_merged_stores ();
f663d9ad
KT
3765 }
3766
3767 /* Delete all the entries we allocated ourselves. */
3768 store_immediate_info *info;
3769 unsigned int i;
3770 FOR_EACH_VEC_ELT (m_store_info, i, info)
3771 delete info;
3772
3773 merged_store_group *merged_info;
3774 FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_info)
3775 delete merged_info;
3776
3777 return ret;
3778}
3779
3780/* Return true iff LHS is a destination potentially interesting for
3781 store merging. In practice these are the codes that get_inner_reference
3782 can process. */
3783
3784static bool
3785lhs_valid_for_store_merging_p (tree lhs)
3786{
3787 tree_code code = TREE_CODE (lhs);
3788
3789 if (code == ARRAY_REF || code == ARRAY_RANGE_REF || code == MEM_REF
3790 || code == COMPONENT_REF || code == BIT_FIELD_REF)
3791 return true;
3792
3793 return false;
3794}
3795
3796/* Return true if the tree RHS is a constant we want to consider
3797 during store merging. In practice accept all codes that
3798 native_encode_expr accepts. */
3799
3800static bool
3801rhs_valid_for_store_merging_p (tree rhs)
3802{
2f391428
JJ
3803 return native_encode_expr (rhs, NULL,
3804 GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (rhs)))) != 0;
f663d9ad
KT
3805}
3806
245f6de1
JJ
3807/* If MEM is a memory reference usable for store merging (either as
3808 store destination or for loads), return the non-NULL base_addr
3809 and set *PBITSIZE, *PBITPOS, *PBITREGION_START and *PBITREGION_END.
3810 Otherwise return NULL, *PBITPOS should be still valid even for that
3811 case. */
3812
3813static tree
3814mem_valid_for_store_merging (tree mem, unsigned HOST_WIDE_INT *pbitsize,
3815 unsigned HOST_WIDE_INT *pbitpos,
3816 unsigned HOST_WIDE_INT *pbitregion_start,
3817 unsigned HOST_WIDE_INT *pbitregion_end)
3818{
3819 HOST_WIDE_INT bitsize;
3820 HOST_WIDE_INT bitpos;
3821 unsigned HOST_WIDE_INT bitregion_start = 0;
3822 unsigned HOST_WIDE_INT bitregion_end = 0;
3823 machine_mode mode;
3824 int unsignedp = 0, reversep = 0, volatilep = 0;
3825 tree offset;
3826 tree base_addr = get_inner_reference (mem, &bitsize, &bitpos, &offset, &mode,
3827 &unsignedp, &reversep, &volatilep);
3828 *pbitsize = bitsize;
3829 if (bitsize == 0)
3830 return NULL_TREE;
3831
3832 if (TREE_CODE (mem) == COMPONENT_REF
3833 && DECL_BIT_FIELD_TYPE (TREE_OPERAND (mem, 1)))
3834 {
3835 get_bit_range (&bitregion_start, &bitregion_end, mem, &bitpos, &offset);
3836 if (bitregion_end)
3837 ++bitregion_end;
3838 }
3839
3840 if (reversep)
3841 return NULL_TREE;
3842
3843 /* We do not want to rewrite TARGET_MEM_REFs. */
3844 if (TREE_CODE (base_addr) == TARGET_MEM_REF)
3845 return NULL_TREE;
3846 /* In some cases get_inner_reference may return a
3847 MEM_REF [ptr + byteoffset]. For the purposes of this pass
3848 canonicalize the base_addr to MEM_REF [ptr] and take
3849 byteoffset into account in the bitpos. This occurs in
3850 PR 23684 and this way we can catch more chains. */
3851 else if (TREE_CODE (base_addr) == MEM_REF)
3852 {
3853 offset_int bit_off, byte_off = mem_ref_offset (base_addr);
3854 bit_off = byte_off << LOG2_BITS_PER_UNIT;
3855 bit_off += bitpos;
3856 if (!wi::neg_p (bit_off) && wi::fits_shwi_p (bit_off))
3857 {
3858 bitpos = bit_off.to_shwi ();
3859 if (bitregion_end)
3860 {
3861 bit_off = byte_off << LOG2_BITS_PER_UNIT;
3862 bit_off += bitregion_start;
3863 if (wi::fits_uhwi_p (bit_off))
3864 {
3865 bitregion_start = bit_off.to_uhwi ();
3866 bit_off = byte_off << LOG2_BITS_PER_UNIT;
3867 bit_off += bitregion_end;
3868 if (wi::fits_uhwi_p (bit_off))
3869 bitregion_end = bit_off.to_uhwi ();
3870 else
3871 bitregion_end = 0;
3872 }
3873 else
3874 bitregion_end = 0;
3875 }
3876 }
3877 else
3878 return NULL_TREE;
3879 base_addr = TREE_OPERAND (base_addr, 0);
3880 }
3881 /* get_inner_reference returns the base object, get at its
3882 address now. */
3883 else
3884 {
3885 if (bitpos < 0)
3886 return NULL_TREE;
3887 base_addr = build_fold_addr_expr (base_addr);
3888 }
3889
3890 if (!bitregion_end)
3891 {
3892 bitregion_start = ROUND_DOWN (bitpos, BITS_PER_UNIT);
3893 bitregion_end = ROUND_UP (bitpos + bitsize, BITS_PER_UNIT);
3894 }
3895
3896 if (offset != NULL_TREE)
3897 {
3898 /* If the access is variable offset then a base decl has to be
3899 address-taken to be able to emit pointer-based stores to it.
3900 ??? We might be able to get away with re-using the original
3901 base up to the first variable part and then wrapping that inside
3902 a BIT_FIELD_REF. */
3903 tree base = get_base_address (base_addr);
3904 if (! base
3905 || (DECL_P (base) && ! TREE_ADDRESSABLE (base)))
3906 return NULL_TREE;
3907
3908 base_addr = build2 (POINTER_PLUS_EXPR, TREE_TYPE (base_addr),
3909 base_addr, offset);
3910 }
3911
3912 *pbitsize = bitsize;
3913 *pbitpos = bitpos;
3914 *pbitregion_start = bitregion_start;
3915 *pbitregion_end = bitregion_end;
3916 return base_addr;
3917}
3918
3919/* Return true if STMT is a load that can be used for store merging.
3920 In that case fill in *OP. BITSIZE, BITPOS, BITREGION_START and
3921 BITREGION_END are properties of the corresponding store. */
3922
3923static bool
3924handled_load (gimple *stmt, store_operand_info *op,
3925 unsigned HOST_WIDE_INT bitsize, unsigned HOST_WIDE_INT bitpos,
3926 unsigned HOST_WIDE_INT bitregion_start,
3927 unsigned HOST_WIDE_INT bitregion_end)
3928{
383ac8dc 3929 if (!is_gimple_assign (stmt))
245f6de1 3930 return false;
383ac8dc
JJ
3931 if (gimple_assign_rhs_code (stmt) == BIT_NOT_EXPR)
3932 {
3933 tree rhs1 = gimple_assign_rhs1 (stmt);
3934 if (TREE_CODE (rhs1) == SSA_NAME
383ac8dc
JJ
3935 && handled_load (SSA_NAME_DEF_STMT (rhs1), op, bitsize, bitpos,
3936 bitregion_start, bitregion_end))
3937 {
d60edaba
JJ
3938 /* Don't allow _1 = load; _2 = ~1; _3 = ~_2; which should have
3939 been optimized earlier, but if allowed here, would confuse the
3940 multiple uses counting. */
3941 if (op->bit_not_p)
3942 return false;
383ac8dc
JJ
3943 op->bit_not_p = !op->bit_not_p;
3944 return true;
3945 }
3946 return false;
3947 }
3948 if (gimple_vuse (stmt)
3949 && gimple_assign_load_p (stmt)
245f6de1
JJ
3950 && !stmt_can_throw_internal (stmt)
3951 && !gimple_has_volatile_ops (stmt))
3952 {
3953 tree mem = gimple_assign_rhs1 (stmt);
3954 op->base_addr
3955 = mem_valid_for_store_merging (mem, &op->bitsize, &op->bitpos,
3956 &op->bitregion_start,
3957 &op->bitregion_end);
3958 if (op->base_addr != NULL_TREE
3959 && op->bitsize == bitsize
3960 && ((op->bitpos - bitpos) % BITS_PER_UNIT) == 0
3961 && op->bitpos - op->bitregion_start >= bitpos - bitregion_start
3962 && op->bitregion_end - op->bitpos >= bitregion_end - bitpos)
3963 {
3964 op->stmt = stmt;
3965 op->val = mem;
383ac8dc 3966 op->bit_not_p = false;
245f6de1
JJ
3967 return true;
3968 }
3969 }
3970 return false;
3971}
3972
3973/* Record the store STMT for store merging optimization if it can be
3974 optimized. */
3975
3976void
3977pass_store_merging::process_store (gimple *stmt)
3978{
3979 tree lhs = gimple_assign_lhs (stmt);
3980 tree rhs = gimple_assign_rhs1 (stmt);
3981 unsigned HOST_WIDE_INT bitsize, bitpos;
3982 unsigned HOST_WIDE_INT bitregion_start;
3983 unsigned HOST_WIDE_INT bitregion_end;
3984 tree base_addr
3985 = mem_valid_for_store_merging (lhs, &bitsize, &bitpos,
3986 &bitregion_start, &bitregion_end);
3987 if (bitsize == 0)
3988 return;
3989
3990 bool invalid = (base_addr == NULL_TREE
3991 || ((bitsize > MAX_BITSIZE_MODE_ANY_INT)
3992 && (TREE_CODE (rhs) != INTEGER_CST)));
3993 enum tree_code rhs_code = ERROR_MARK;
d60edaba 3994 bool bit_not_p = false;
4b84d9b8
JJ
3995 struct symbolic_number n;
3996 gimple *ins_stmt = NULL;
245f6de1
JJ
3997 store_operand_info ops[2];
3998 if (invalid)
3999 ;
4000 else if (rhs_valid_for_store_merging_p (rhs))
4001 {
4002 rhs_code = INTEGER_CST;
4003 ops[0].val = rhs;
4004 }
d7a9512e 4005 else if (TREE_CODE (rhs) != SSA_NAME)
245f6de1
JJ
4006 invalid = true;
4007 else
4008 {
4009 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs), *def_stmt1, *def_stmt2;
4010 if (!is_gimple_assign (def_stmt))
4011 invalid = true;
4012 else if (handled_load (def_stmt, &ops[0], bitsize, bitpos,
4013 bitregion_start, bitregion_end))
4014 rhs_code = MEM_REF;
d60edaba
JJ
4015 else if (gimple_assign_rhs_code (def_stmt) == BIT_NOT_EXPR)
4016 {
4017 tree rhs1 = gimple_assign_rhs1 (def_stmt);
4018 if (TREE_CODE (rhs1) == SSA_NAME
4019 && is_gimple_assign (SSA_NAME_DEF_STMT (rhs1)))
4020 {
4021 bit_not_p = true;
4022 def_stmt = SSA_NAME_DEF_STMT (rhs1);
4023 }
4024 }
4025 if (rhs_code == ERROR_MARK && !invalid)
245f6de1
JJ
4026 switch ((rhs_code = gimple_assign_rhs_code (def_stmt)))
4027 {
4028 case BIT_AND_EXPR:
4029 case BIT_IOR_EXPR:
4030 case BIT_XOR_EXPR:
4031 tree rhs1, rhs2;
4032 rhs1 = gimple_assign_rhs1 (def_stmt);
4033 rhs2 = gimple_assign_rhs2 (def_stmt);
4034 invalid = true;
d7a9512e 4035 if (TREE_CODE (rhs1) != SSA_NAME)
245f6de1
JJ
4036 break;
4037 def_stmt1 = SSA_NAME_DEF_STMT (rhs1);
4038 if (!is_gimple_assign (def_stmt1)
4039 || !handled_load (def_stmt1, &ops[0], bitsize, bitpos,
4040 bitregion_start, bitregion_end))
4041 break;
4042 if (rhs_valid_for_store_merging_p (rhs2))
4043 ops[1].val = rhs2;
d7a9512e 4044 else if (TREE_CODE (rhs2) != SSA_NAME)
245f6de1
JJ
4045 break;
4046 else
4047 {
4048 def_stmt2 = SSA_NAME_DEF_STMT (rhs2);
4049 if (!is_gimple_assign (def_stmt2))
4050 break;
4051 else if (!handled_load (def_stmt2, &ops[1], bitsize, bitpos,
4052 bitregion_start, bitregion_end))
4053 break;
4054 }
4055 invalid = false;
4056 break;
4057 default:
4058 invalid = true;
4059 break;
4060 }
4b84d9b8
JJ
4061 if ((bitsize % BITS_PER_UNIT) == 0
4062 && (bitpos % BITS_PER_UNIT) == 0
4063 && bitsize <= 64
4064 && BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN)
4065 {
4066 ins_stmt = find_bswap_or_nop_1 (def_stmt, &n, 12);
4067 if (ins_stmt)
4068 {
4069 uint64_t nn = n.n;
4070 for (unsigned HOST_WIDE_INT i = 0;
4071 i < bitsize; i += BITS_PER_UNIT, nn >>= BITS_PER_MARKER)
4072 if ((nn & MARKER_MASK) == 0
4073 || (nn & MARKER_MASK) == MARKER_BYTE_UNKNOWN)
4074 {
4075 ins_stmt = NULL;
4076 break;
4077 }
4078 if (ins_stmt)
4079 {
4080 if (invalid)
4081 {
4082 rhs_code = LROTATE_EXPR;
4083 ops[0].base_addr = NULL_TREE;
4084 ops[1].base_addr = NULL_TREE;
4085 }
4086 invalid = false;
4087 }
4088 }
4089 }
245f6de1
JJ
4090 }
4091
245f6de1
JJ
4092 if (invalid)
4093 {
383ac8dc 4094 terminate_all_aliasing_chains (NULL, stmt);
245f6de1
JJ
4095 return;
4096 }
4097
4b84d9b8
JJ
4098 if (!ins_stmt)
4099 memset (&n, 0, sizeof (n));
4100
383ac8dc
JJ
4101 struct imm_store_chain_info **chain_info = NULL;
4102 if (base_addr)
4103 chain_info = m_stores.get (base_addr);
4104
245f6de1
JJ
4105 store_immediate_info *info;
4106 if (chain_info)
4107 {
4108 unsigned int ord = (*chain_info)->m_store_info.length ();
4109 info = new store_immediate_info (bitsize, bitpos, bitregion_start,
4110 bitregion_end, stmt, ord, rhs_code,
4b84d9b8 4111 n, ins_stmt,
d60edaba 4112 bit_not_p, ops[0], ops[1]);
245f6de1
JJ
4113 if (dump_file && (dump_flags & TDF_DETAILS))
4114 {
4115 fprintf (dump_file, "Recording immediate store from stmt:\n");
4116 print_gimple_stmt (dump_file, stmt, 0);
4117 }
4118 (*chain_info)->m_store_info.safe_push (info);
383ac8dc 4119 terminate_all_aliasing_chains (chain_info, stmt);
245f6de1
JJ
4120 /* If we reach the limit of stores to merge in a chain terminate and
4121 process the chain now. */
4122 if ((*chain_info)->m_store_info.length ()
4123 == (unsigned int) PARAM_VALUE (PARAM_MAX_STORES_TO_MERGE))
4124 {
4125 if (dump_file && (dump_flags & TDF_DETAILS))
4126 fprintf (dump_file,
4127 "Reached maximum number of statements to merge:\n");
4128 terminate_and_release_chain (*chain_info);
4129 }
4130 return;
4131 }
4132
4133 /* Store aliases any existing chain? */
383ac8dc 4134 terminate_all_aliasing_chains (NULL, stmt);
245f6de1
JJ
4135 /* Start a new chain. */
4136 struct imm_store_chain_info *new_chain
4137 = new imm_store_chain_info (m_stores_head, base_addr);
4138 info = new store_immediate_info (bitsize, bitpos, bitregion_start,
4139 bitregion_end, stmt, 0, rhs_code,
4b84d9b8 4140 n, ins_stmt,
d60edaba 4141 bit_not_p, ops[0], ops[1]);
245f6de1
JJ
4142 new_chain->m_store_info.safe_push (info);
4143 m_stores.put (base_addr, new_chain);
4144 if (dump_file && (dump_flags & TDF_DETAILS))
4145 {
4146 fprintf (dump_file, "Starting new chain with statement:\n");
4147 print_gimple_stmt (dump_file, stmt, 0);
4148 fprintf (dump_file, "The base object is:\n");
4149 print_generic_expr (dump_file, base_addr);
4150 fprintf (dump_file, "\n");
4151 }
4152}
4153
f663d9ad 4154/* Entry point for the pass. Go over each basic block recording chains of
245f6de1
JJ
4155 immediate stores. Upon encountering a terminating statement (as defined
4156 by stmt_terminates_chain_p) process the recorded stores and emit the widened
4157 variants. */
f663d9ad
KT
4158
4159unsigned int
4160pass_store_merging::execute (function *fun)
4161{
4162 basic_block bb;
4163 hash_set<gimple *> orig_stmts;
4164
4b84d9b8
JJ
4165 calculate_dominance_info (CDI_DOMINATORS);
4166
f663d9ad
KT
4167 FOR_EACH_BB_FN (bb, fun)
4168 {
4169 gimple_stmt_iterator gsi;
4170 unsigned HOST_WIDE_INT num_statements = 0;
4171 /* Record the original statements so that we can keep track of
4172 statements emitted in this pass and not re-process new
4173 statements. */
4174 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4175 {
4176 if (is_gimple_debug (gsi_stmt (gsi)))
4177 continue;
4178
2f391428 4179 if (++num_statements >= 2)
f663d9ad
KT
4180 break;
4181 }
4182
4183 if (num_statements < 2)
4184 continue;
4185
4186 if (dump_file && (dump_flags & TDF_DETAILS))
4187 fprintf (dump_file, "Processing basic block <%d>:\n", bb->index);
4188
4189 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4190 {
4191 gimple *stmt = gsi_stmt (gsi);
4192
50b6d676
AO
4193 if (is_gimple_debug (stmt))
4194 continue;
4195
f663d9ad
KT
4196 if (gimple_has_volatile_ops (stmt))
4197 {
4198 /* Terminate all chains. */
4199 if (dump_file && (dump_flags & TDF_DETAILS))
4200 fprintf (dump_file, "Volatile access terminates "
4201 "all chains\n");
4202 terminate_and_process_all_chains ();
4203 continue;
4204 }
4205
f663d9ad
KT
4206 if (gimple_assign_single_p (stmt) && gimple_vdef (stmt)
4207 && !stmt_can_throw_internal (stmt)
4208 && lhs_valid_for_store_merging_p (gimple_assign_lhs (stmt)))
245f6de1
JJ
4209 process_store (stmt);
4210 else
4211 terminate_all_aliasing_chains (NULL, stmt);
f663d9ad
KT
4212 }
4213 terminate_and_process_all_chains ();
4214 }
4215 return 0;
4216}
4217
4218} // anon namespace
4219
4220/* Construct and return a store merging pass object. */
4221
4222gimple_opt_pass *
4223make_pass_store_merging (gcc::context *ctxt)
4224{
4225 return new pass_store_merging (ctxt);
4226}
c22d8787
KT
4227
4228#if CHECKING_P
4229
4230namespace selftest {
4231
4232/* Selftests for store merging helpers. */
4233
4234/* Assert that all elements of the byte arrays X and Y, both of length N
4235 are equal. */
4236
4237static void
4238verify_array_eq (unsigned char *x, unsigned char *y, unsigned int n)
4239{
4240 for (unsigned int i = 0; i < n; i++)
4241 {
4242 if (x[i] != y[i])
4243 {
4244 fprintf (stderr, "Arrays do not match. X:\n");
4245 dump_char_array (stderr, x, n);
4246 fprintf (stderr, "Y:\n");
4247 dump_char_array (stderr, y, n);
4248 }
4249 ASSERT_EQ (x[i], y[i]);
4250 }
4251}
4252
4253/* Test shift_bytes_in_array and that it carries bits across between
4254 bytes correctly. */
4255
4256static void
4257verify_shift_bytes_in_array (void)
4258{
4259 /* byte 1 | byte 0
4260 00011111 | 11100000. */
4261 unsigned char orig[2] = { 0xe0, 0x1f };
4262 unsigned char in[2];
4263 memcpy (in, orig, sizeof orig);
4264
4265 unsigned char expected[2] = { 0x80, 0x7f };
4266 shift_bytes_in_array (in, sizeof (in), 2);
4267 verify_array_eq (in, expected, sizeof (in));
4268
4269 memcpy (in, orig, sizeof orig);
4270 memcpy (expected, orig, sizeof orig);
4271 /* Check that shifting by zero doesn't change anything. */
4272 shift_bytes_in_array (in, sizeof (in), 0);
4273 verify_array_eq (in, expected, sizeof (in));
4274
4275}
4276
4277/* Test shift_bytes_in_array_right and that it carries bits across between
4278 bytes correctly. */
4279
4280static void
4281verify_shift_bytes_in_array_right (void)
4282{
4283 /* byte 1 | byte 0
4284 00011111 | 11100000. */
4285 unsigned char orig[2] = { 0x1f, 0xe0};
4286 unsigned char in[2];
4287 memcpy (in, orig, sizeof orig);
4288 unsigned char expected[2] = { 0x07, 0xf8};
4289 shift_bytes_in_array_right (in, sizeof (in), 2);
4290 verify_array_eq (in, expected, sizeof (in));
4291
4292 memcpy (in, orig, sizeof orig);
4293 memcpy (expected, orig, sizeof orig);
4294 /* Check that shifting by zero doesn't change anything. */
4295 shift_bytes_in_array_right (in, sizeof (in), 0);
4296 verify_array_eq (in, expected, sizeof (in));
4297}
4298
4299/* Test clear_bit_region that it clears exactly the bits asked and
4300 nothing more. */
4301
4302static void
4303verify_clear_bit_region (void)
4304{
4305 /* Start with all bits set and test clearing various patterns in them. */
4306 unsigned char orig[3] = { 0xff, 0xff, 0xff};
4307 unsigned char in[3];
4308 unsigned char expected[3];
4309 memcpy (in, orig, sizeof in);
4310
4311 /* Check zeroing out all the bits. */
4312 clear_bit_region (in, 0, 3 * BITS_PER_UNIT);
4313 expected[0] = expected[1] = expected[2] = 0;
4314 verify_array_eq (in, expected, sizeof in);
4315
4316 memcpy (in, orig, sizeof in);
4317 /* Leave the first and last bits intact. */
4318 clear_bit_region (in, 1, 3 * BITS_PER_UNIT - 2);
4319 expected[0] = 0x1;
4320 expected[1] = 0;
4321 expected[2] = 0x80;
4322 verify_array_eq (in, expected, sizeof in);
4323}
4324
4325/* Test verify_clear_bit_region_be that it clears exactly the bits asked and
4326 nothing more. */
4327
4328static void
4329verify_clear_bit_region_be (void)
4330{
4331 /* Start with all bits set and test clearing various patterns in them. */
4332 unsigned char orig[3] = { 0xff, 0xff, 0xff};
4333 unsigned char in[3];
4334 unsigned char expected[3];
4335 memcpy (in, orig, sizeof in);
4336
4337 /* Check zeroing out all the bits. */
4338 clear_bit_region_be (in, BITS_PER_UNIT - 1, 3 * BITS_PER_UNIT);
4339 expected[0] = expected[1] = expected[2] = 0;
4340 verify_array_eq (in, expected, sizeof in);
4341
4342 memcpy (in, orig, sizeof in);
4343 /* Leave the first and last bits intact. */
4344 clear_bit_region_be (in, BITS_PER_UNIT - 2, 3 * BITS_PER_UNIT - 2);
4345 expected[0] = 0x80;
4346 expected[1] = 0;
4347 expected[2] = 0x1;
4348 verify_array_eq (in, expected, sizeof in);
4349}
4350
4351
4352/* Run all of the selftests within this file. */
4353
4354void
4355store_merging_c_tests (void)
4356{
4357 verify_shift_bytes_in_array ();
4358 verify_shift_bytes_in_array_right ();
4359 verify_clear_bit_region ();
4360 verify_clear_bit_region_be ();
4361}
4362
4363} // namespace selftest
4364#endif /* CHECKING_P. */