]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/gimple-ssa-store-merging.c
PR testsuite/92016 - Excess errors in Wstringop-overflow-17.c
[thirdparty/gcc.git] / gcc / gimple-ssa-store-merging.c
CommitLineData
4aba7bd3 1/* GIMPLE store merging and byte swapping passes.
fbd26352 2 Copyright (C) 2009-2019 Free Software Foundation, Inc.
3d3e04ac 3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
10f0d48d 21/* The purpose of the store merging pass is to combine multiple memory stores
22 of constant values, values loaded from memory, bitwise operations on those,
23 or bit-field values, to consecutive locations, into fewer wider stores.
24
3d3e04ac 25 For example, if we have a sequence peforming four byte stores to
26 consecutive memory locations:
27 [p ] := imm1;
28 [p + 1B] := imm2;
29 [p + 2B] := imm3;
30 [p + 3B] := imm4;
31 we can transform this into a single 4-byte store if the target supports it:
10f0d48d 32 [p] := imm1:imm2:imm3:imm4 concatenated according to endianness.
3d3e04ac 33
9991d1d3 34 Or:
35 [p ] := [q ];
36 [p + 1B] := [q + 1B];
37 [p + 2B] := [q + 2B];
38 [p + 3B] := [q + 3B];
39 if there is no overlap can be transformed into a single 4-byte
40 load followed by single 4-byte store.
41
42 Or:
43 [p ] := [q ] ^ imm1;
44 [p + 1B] := [q + 1B] ^ imm2;
45 [p + 2B] := [q + 2B] ^ imm3;
46 [p + 3B] := [q + 3B] ^ imm4;
47 if there is no overlap can be transformed into a single 4-byte
48 load, xored with imm1:imm2:imm3:imm4 and stored using a single 4-byte store.
49
10f0d48d 50 Or:
51 [p:1 ] := imm;
52 [p:31] := val & 0x7FFFFFFF;
53 we can transform this into a single 4-byte store if the target supports it:
54 [p] := imm:(val & 0x7FFFFFFF) concatenated according to endianness.
55
3d3e04ac 56 The algorithm is applied to each basic block in three phases:
57
10f0d48d 58 1) Scan through the basic block and record assignments to destinations
59 that can be expressed as a store to memory of a certain size at a certain
60 bit offset from base expressions we can handle. For bit-fields we also
61 record the surrounding bit region, i.e. bits that could be stored in
9991d1d3 62 a read-modify-write operation when storing the bit-field. Record store
63 chains to different bases in a hash_map (m_stores) and make sure to
64 terminate such chains when appropriate (for example when when the stored
65 values get used subsequently).
3d3e04ac 66 These stores can be a result of structure element initializers, array stores
67 etc. A store_immediate_info object is recorded for every such store.
68 Record as many such assignments to a single base as possible until a
69 statement that interferes with the store sequence is encountered.
10f0d48d 70 Each store has up to 2 operands, which can be a either constant, a memory
71 load or an SSA name, from which the value to be stored can be computed.
9991d1d3 72 At most one of the operands can be a constant. The operands are recorded
73 in store_operand_info struct.
3d3e04ac 74
10f0d48d 75 2) Analyze the chains of stores recorded in phase 1) (i.e. the vector of
3d3e04ac 76 store_immediate_info objects) and coalesce contiguous stores into
10f0d48d 77 merged_store_group objects. For bit-field stores, we don't need to
9991d1d3 78 require the stores to be contiguous, just their surrounding bit regions
79 have to be contiguous. If the expression being stored is different
80 between adjacent stores, such as one store storing a constant and
81 following storing a value loaded from memory, or if the loaded memory
82 objects are not adjacent, a new merged_store_group is created as well.
3d3e04ac 83
84 For example, given the stores:
85 [p ] := 0;
86 [p + 1B] := 1;
87 [p + 3B] := 0;
88 [p + 4B] := 1;
89 [p + 5B] := 0;
90 [p + 6B] := 0;
91 This phase would produce two merged_store_group objects, one recording the
92 two bytes stored in the memory region [p : p + 1] and another
93 recording the four bytes stored in the memory region [p + 3 : p + 6].
94
95 3) The merged_store_group objects produced in phase 2) are processed
96 to generate the sequence of wider stores that set the contiguous memory
97 regions to the sequence of bytes that correspond to it. This may emit
98 multiple stores per store group to handle contiguous stores that are not
99 of a size that is a power of 2. For example it can try to emit a 40-bit
100 store as a 32-bit store followed by an 8-bit store.
10f0d48d 101 We try to emit as wide stores as we can while respecting STRICT_ALIGNMENT
102 or TARGET_SLOW_UNALIGNED_ACCESS settings.
3d3e04ac 103
104 Note on endianness and example:
105 Consider 2 contiguous 16-bit stores followed by 2 contiguous 8-bit stores:
106 [p ] := 0x1234;
107 [p + 2B] := 0x5678;
108 [p + 4B] := 0xab;
109 [p + 5B] := 0xcd;
110
111 The memory layout for little-endian (LE) and big-endian (BE) must be:
112 p |LE|BE|
113 ---------
114 0 |34|12|
115 1 |12|34|
116 2 |78|56|
117 3 |56|78|
118 4 |ab|ab|
119 5 |cd|cd|
120
121 To merge these into a single 48-bit merged value 'val' in phase 2)
122 on little-endian we insert stores to higher (consecutive) bitpositions
123 into the most significant bits of the merged value.
124 The final merged value would be: 0xcdab56781234
125
126 For big-endian we insert stores to higher bitpositions into the least
127 significant bits of the merged value.
128 The final merged value would be: 0x12345678abcd
129
130 Then, in phase 3), we want to emit this 48-bit value as a 32-bit store
131 followed by a 16-bit store. Again, we must consider endianness when
132 breaking down the 48-bit value 'val' computed above.
133 For little endian we emit:
134 [p] (32-bit) := 0x56781234; // val & 0x0000ffffffff;
135 [p + 4B] (16-bit) := 0xcdab; // (val & 0xffff00000000) >> 32;
136
137 Whereas for big-endian we emit:
138 [p] (32-bit) := 0x12345678; // (val & 0xffffffff0000) >> 16;
139 [p + 4B] (16-bit) := 0xabcd; // val & 0x00000000ffff; */
140
141#include "config.h"
142#include "system.h"
143#include "coretypes.h"
144#include "backend.h"
145#include "tree.h"
146#include "gimple.h"
147#include "builtins.h"
148#include "fold-const.h"
149#include "tree-pass.h"
150#include "ssa.h"
151#include "gimple-pretty-print.h"
152#include "alias.h"
153#include "fold-const.h"
154#include "params.h"
155#include "print-tree.h"
156#include "tree-hash-traits.h"
157#include "gimple-iterator.h"
158#include "gimplify.h"
10f0d48d 159#include "gimple-fold.h"
3d3e04ac 160#include "stor-layout.h"
161#include "timevar.h"
393122a0 162#include "cfganal.h"
163#include "cfgcleanup.h"
3d3e04ac 164#include "tree-cfg.h"
393122a0 165#include "except.h"
3d3e04ac 166#include "tree-eh.h"
167#include "target.h"
427223f1 168#include "gimplify-me.h"
902cb3b7 169#include "rtl.h"
170#include "expr.h" /* For get_bit_range. */
4aba7bd3 171#include "optabs-tree.h"
a8786363 172#include "dbgcnt.h"
3d9a2fb3 173#include "selftest.h"
3d3e04ac 174
175/* The maximum size (in bits) of the stores this pass should generate. */
176#define MAX_STORE_BITSIZE (BITS_PER_WORD)
177#define MAX_STORE_BYTES (MAX_STORE_BITSIZE / BITS_PER_UNIT)
178
9991d1d3 179/* Limit to bound the number of aliasing checks for loads with the same
180 vuse as the corresponding store. */
181#define MAX_STORE_ALIAS_CHECKS 64
182
3d3e04ac 183namespace {
184
81c8113b 185struct bswap_stat
4aba7bd3 186{
187 /* Number of hand-written 16-bit nop / bswaps found. */
188 int found_16bit;
189
190 /* Number of hand-written 32-bit nop / bswaps found. */
191 int found_32bit;
192
193 /* Number of hand-written 64-bit nop / bswaps found. */
194 int found_64bit;
195} nop_stats, bswap_stats;
196
197/* A symbolic number structure is used to detect byte permutation and selection
198 patterns of a source. To achieve that, its field N contains an artificial
199 number consisting of BITS_PER_MARKER sized markers tracking where does each
200 byte come from in the source:
201
202 0 - target byte has the value 0
203 FF - target byte has an unknown value (eg. due to sign extension)
204 1..size - marker value is the byte index in the source (0 for lsb).
205
206 To detect permutations on memory sources (arrays and structures), a symbolic
207 number is also associated:
208 - a base address BASE_ADDR and an OFFSET giving the address of the source;
209 - a range which gives the difference between the highest and lowest accessed
210 memory location to make such a symbolic number;
211 - the address SRC of the source element of lowest address as a convenience
212 to easily get BASE_ADDR + offset + lowest bytepos;
213 - number of expressions N_OPS bitwise ored together to represent
214 approximate cost of the computation.
215
216 Note 1: the range is different from size as size reflects the size of the
217 type of the current expression. For instance, for an array char a[],
218 (short) a[0] | (short) a[3] would have a size of 2 but a range of 4 while
219 (short) a[0] | ((short) a[0] << 1) would still have a size of 2 but this
220 time a range of 1.
221
222 Note 2: for non-memory sources, range holds the same value as size.
223
224 Note 3: SRC points to the SSA_NAME in case of non-memory source. */
225
226struct symbolic_number {
227 uint64_t n;
228 tree type;
229 tree base_addr;
230 tree offset;
08454aa5 231 poly_int64_pod bytepos;
4aba7bd3 232 tree src;
233 tree alias_set;
234 tree vuse;
235 unsigned HOST_WIDE_INT range;
236 int n_ops;
237};
238
239#define BITS_PER_MARKER 8
240#define MARKER_MASK ((1 << BITS_PER_MARKER) - 1)
241#define MARKER_BYTE_UNKNOWN MARKER_MASK
242#define HEAD_MARKER(n, size) \
243 ((n) & ((uint64_t) MARKER_MASK << (((size) - 1) * BITS_PER_MARKER)))
244
245/* The number which the find_bswap_or_nop_1 result should match in
246 order to have a nop. The number is masked according to the size of
247 the symbolic number before using it. */
248#define CMPNOP (sizeof (int64_t) < 8 ? 0 : \
249 (uint64_t)0x08070605 << 32 | 0x04030201)
250
251/* The number which the find_bswap_or_nop_1 result should match in
252 order to have a byte swap. The number is masked according to the
253 size of the symbolic number before using it. */
254#define CMPXCHG (sizeof (int64_t) < 8 ? 0 : \
255 (uint64_t)0x01020304 << 32 | 0x05060708)
256
257/* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
258 number N. Return false if the requested operation is not permitted
259 on a symbolic number. */
260
261inline bool
262do_shift_rotate (enum tree_code code,
263 struct symbolic_number *n,
264 int count)
265{
266 int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
267 unsigned head_marker;
268
fdaa2822 269 if (count < 0
270 || count >= TYPE_PRECISION (n->type)
271 || count % BITS_PER_UNIT != 0)
4aba7bd3 272 return false;
273 count = (count / BITS_PER_UNIT) * BITS_PER_MARKER;
274
275 /* Zero out the extra bits of N in order to avoid them being shifted
276 into the significant bits. */
277 if (size < 64 / BITS_PER_MARKER)
278 n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
279
280 switch (code)
281 {
282 case LSHIFT_EXPR:
283 n->n <<= count;
284 break;
285 case RSHIFT_EXPR:
286 head_marker = HEAD_MARKER (n->n, size);
287 n->n >>= count;
288 /* Arithmetic shift of signed type: result is dependent on the value. */
289 if (!TYPE_UNSIGNED (n->type) && head_marker)
290 for (i = 0; i < count / BITS_PER_MARKER; i++)
291 n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
292 << ((size - 1 - i) * BITS_PER_MARKER);
293 break;
294 case LROTATE_EXPR:
295 n->n = (n->n << count) | (n->n >> ((size * BITS_PER_MARKER) - count));
296 break;
297 case RROTATE_EXPR:
298 n->n = (n->n >> count) | (n->n << ((size * BITS_PER_MARKER) - count));
299 break;
300 default:
301 return false;
302 }
303 /* Zero unused bits for size. */
304 if (size < 64 / BITS_PER_MARKER)
305 n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
306 return true;
307}
308
309/* Perform sanity checking for the symbolic number N and the gimple
310 statement STMT. */
311
312inline bool
313verify_symbolic_number_p (struct symbolic_number *n, gimple *stmt)
314{
315 tree lhs_type;
316
317 lhs_type = gimple_expr_type (stmt);
318
319 if (TREE_CODE (lhs_type) != INTEGER_TYPE)
320 return false;
321
322 if (TYPE_PRECISION (lhs_type) != TYPE_PRECISION (n->type))
323 return false;
324
325 return true;
326}
327
328/* Initialize the symbolic number N for the bswap pass from the base element
329 SRC manipulated by the bitwise OR expression. */
330
331bool
332init_symbolic_number (struct symbolic_number *n, tree src)
333{
334 int size;
335
336 if (! INTEGRAL_TYPE_P (TREE_TYPE (src)))
337 return false;
338
339 n->base_addr = n->offset = n->alias_set = n->vuse = NULL_TREE;
340 n->src = src;
341
342 /* Set up the symbolic number N by setting each byte to a value between 1 and
343 the byte size of rhs1. The highest order byte is set to n->size and the
344 lowest order byte to 1. */
345 n->type = TREE_TYPE (src);
346 size = TYPE_PRECISION (n->type);
347 if (size % BITS_PER_UNIT != 0)
348 return false;
349 size /= BITS_PER_UNIT;
350 if (size > 64 / BITS_PER_MARKER)
351 return false;
352 n->range = size;
353 n->n = CMPNOP;
354 n->n_ops = 1;
355
356 if (size < 64 / BITS_PER_MARKER)
357 n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
358
359 return true;
360}
361
362/* Check if STMT might be a byte swap or a nop from a memory source and returns
363 the answer. If so, REF is that memory source and the base of the memory area
364 accessed and the offset of the access from that base are recorded in N. */
365
366bool
367find_bswap_or_nop_load (gimple *stmt, tree ref, struct symbolic_number *n)
368{
369 /* Leaf node is an array or component ref. Memorize its base and
370 offset from base to compare to other such leaf node. */
81bc0f0f 371 poly_int64 bitsize, bitpos, bytepos;
4aba7bd3 372 machine_mode mode;
373 int unsignedp, reversep, volatilep;
374 tree offset, base_addr;
375
376 /* Not prepared to handle PDP endian. */
377 if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
378 return false;
379
380 if (!gimple_assign_load_p (stmt) || gimple_has_volatile_ops (stmt))
381 return false;
382
383 base_addr = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
384 &unsignedp, &reversep, &volatilep);
385
509ab8cd 386 if (TREE_CODE (base_addr) == TARGET_MEM_REF)
387 /* Do not rewrite TARGET_MEM_REF. */
388 return false;
389 else if (TREE_CODE (base_addr) == MEM_REF)
4aba7bd3 390 {
9c6deaf0 391 poly_offset_int bit_offset = 0;
4aba7bd3 392 tree off = TREE_OPERAND (base_addr, 1);
393
394 if (!integer_zerop (off))
395 {
9c6deaf0 396 poly_offset_int boff = mem_ref_offset (base_addr);
397 boff <<= LOG2_BITS_PER_UNIT;
4aba7bd3 398 bit_offset += boff;
399 }
400
401 base_addr = TREE_OPERAND (base_addr, 0);
402
403 /* Avoid returning a negative bitpos as this may wreak havoc later. */
9c6deaf0 404 if (maybe_lt (bit_offset, 0))
4aba7bd3 405 {
9c6deaf0 406 tree byte_offset = wide_int_to_tree
407 (sizetype, bits_to_bytes_round_down (bit_offset));
408 bit_offset = num_trailing_bits (bit_offset);
4aba7bd3 409 if (offset)
9c6deaf0 410 offset = size_binop (PLUS_EXPR, offset, byte_offset);
4aba7bd3 411 else
9c6deaf0 412 offset = byte_offset;
4aba7bd3 413 }
414
9c6deaf0 415 bitpos += bit_offset.force_shwi ();
4aba7bd3 416 }
509ab8cd 417 else
418 base_addr = build_fold_addr_expr (base_addr);
4aba7bd3 419
81bc0f0f 420 if (!multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
4aba7bd3 421 return false;
81bc0f0f 422 if (!multiple_p (bitsize, BITS_PER_UNIT))
4aba7bd3 423 return false;
424 if (reversep)
425 return false;
426
427 if (!init_symbolic_number (n, ref))
428 return false;
429 n->base_addr = base_addr;
430 n->offset = offset;
81bc0f0f 431 n->bytepos = bytepos;
4aba7bd3 432 n->alias_set = reference_alias_ptr_type (ref);
433 n->vuse = gimple_vuse (stmt);
434 return true;
435}
436
437/* Compute the symbolic number N representing the result of a bitwise OR on 2
438 symbolic number N1 and N2 whose source statements are respectively
439 SOURCE_STMT1 and SOURCE_STMT2. */
440
441gimple *
442perform_symbolic_merge (gimple *source_stmt1, struct symbolic_number *n1,
443 gimple *source_stmt2, struct symbolic_number *n2,
444 struct symbolic_number *n)
445{
446 int i, size;
447 uint64_t mask;
448 gimple *source_stmt;
449 struct symbolic_number *n_start;
450
451 tree rhs1 = gimple_assign_rhs1 (source_stmt1);
452 if (TREE_CODE (rhs1) == BIT_FIELD_REF
453 && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
454 rhs1 = TREE_OPERAND (rhs1, 0);
455 tree rhs2 = gimple_assign_rhs1 (source_stmt2);
456 if (TREE_CODE (rhs2) == BIT_FIELD_REF
457 && TREE_CODE (TREE_OPERAND (rhs2, 0)) == SSA_NAME)
458 rhs2 = TREE_OPERAND (rhs2, 0);
459
460 /* Sources are different, cancel bswap if they are not memory location with
461 the same base (array, structure, ...). */
462 if (rhs1 != rhs2)
463 {
464 uint64_t inc;
08454aa5 465 HOST_WIDE_INT start1, start2, start_sub, end_sub, end1, end2, end;
4aba7bd3 466 struct symbolic_number *toinc_n_ptr, *n_end;
467 basic_block bb1, bb2;
468
469 if (!n1->base_addr || !n2->base_addr
470 || !operand_equal_p (n1->base_addr, n2->base_addr, 0))
471 return NULL;
472
473 if (!n1->offset != !n2->offset
474 || (n1->offset && !operand_equal_p (n1->offset, n2->offset, 0)))
475 return NULL;
476
08454aa5 477 start1 = 0;
478 if (!(n2->bytepos - n1->bytepos).is_constant (&start2))
479 return NULL;
480
481 if (start1 < start2)
4aba7bd3 482 {
483 n_start = n1;
08454aa5 484 start_sub = start2 - start1;
4aba7bd3 485 }
486 else
487 {
488 n_start = n2;
08454aa5 489 start_sub = start1 - start2;
4aba7bd3 490 }
491
492 bb1 = gimple_bb (source_stmt1);
493 bb2 = gimple_bb (source_stmt2);
494 if (dominated_by_p (CDI_DOMINATORS, bb1, bb2))
495 source_stmt = source_stmt1;
496 else
497 source_stmt = source_stmt2;
498
499 /* Find the highest address at which a load is performed and
500 compute related info. */
08454aa5 501 end1 = start1 + (n1->range - 1);
502 end2 = start2 + (n2->range - 1);
4aba7bd3 503 if (end1 < end2)
504 {
505 end = end2;
506 end_sub = end2 - end1;
507 }
508 else
509 {
510 end = end1;
511 end_sub = end1 - end2;
512 }
513 n_end = (end2 > end1) ? n2 : n1;
514
515 /* Find symbolic number whose lsb is the most significant. */
516 if (BYTES_BIG_ENDIAN)
517 toinc_n_ptr = (n_end == n1) ? n2 : n1;
518 else
519 toinc_n_ptr = (n_start == n1) ? n2 : n1;
520
08454aa5 521 n->range = end - MIN (start1, start2) + 1;
4aba7bd3 522
523 /* Check that the range of memory covered can be represented by
524 a symbolic number. */
525 if (n->range > 64 / BITS_PER_MARKER)
526 return NULL;
527
528 /* Reinterpret byte marks in symbolic number holding the value of
529 bigger weight according to target endianness. */
530 inc = BYTES_BIG_ENDIAN ? end_sub : start_sub;
531 size = TYPE_PRECISION (n1->type) / BITS_PER_UNIT;
532 for (i = 0; i < size; i++, inc <<= BITS_PER_MARKER)
533 {
534 unsigned marker
535 = (toinc_n_ptr->n >> (i * BITS_PER_MARKER)) & MARKER_MASK;
536 if (marker && marker != MARKER_BYTE_UNKNOWN)
537 toinc_n_ptr->n += inc;
538 }
539 }
540 else
541 {
542 n->range = n1->range;
543 n_start = n1;
544 source_stmt = source_stmt1;
545 }
546
547 if (!n1->alias_set
548 || alias_ptr_types_compatible_p (n1->alias_set, n2->alias_set))
549 n->alias_set = n1->alias_set;
550 else
551 n->alias_set = ptr_type_node;
552 n->vuse = n_start->vuse;
553 n->base_addr = n_start->base_addr;
554 n->offset = n_start->offset;
555 n->src = n_start->src;
556 n->bytepos = n_start->bytepos;
557 n->type = n_start->type;
558 size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
559
560 for (i = 0, mask = MARKER_MASK; i < size; i++, mask <<= BITS_PER_MARKER)
561 {
562 uint64_t masked1, masked2;
563
564 masked1 = n1->n & mask;
565 masked2 = n2->n & mask;
566 if (masked1 && masked2 && masked1 != masked2)
567 return NULL;
568 }
569 n->n = n1->n | n2->n;
570 n->n_ops = n1->n_ops + n2->n_ops;
571
572 return source_stmt;
573}
574
575/* find_bswap_or_nop_1 invokes itself recursively with N and tries to perform
576 the operation given by the rhs of STMT on the result. If the operation
577 could successfully be executed the function returns a gimple stmt whose
578 rhs's first tree is the expression of the source operand and NULL
579 otherwise. */
580
581gimple *
582find_bswap_or_nop_1 (gimple *stmt, struct symbolic_number *n, int limit)
583{
584 enum tree_code code;
585 tree rhs1, rhs2 = NULL;
586 gimple *rhs1_stmt, *rhs2_stmt, *source_stmt1;
587 enum gimple_rhs_class rhs_class;
588
589 if (!limit || !is_gimple_assign (stmt))
590 return NULL;
591
592 rhs1 = gimple_assign_rhs1 (stmt);
593
594 if (find_bswap_or_nop_load (stmt, rhs1, n))
595 return stmt;
596
597 /* Handle BIT_FIELD_REF. */
598 if (TREE_CODE (rhs1) == BIT_FIELD_REF
599 && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
600 {
601 unsigned HOST_WIDE_INT bitsize = tree_to_uhwi (TREE_OPERAND (rhs1, 1));
602 unsigned HOST_WIDE_INT bitpos = tree_to_uhwi (TREE_OPERAND (rhs1, 2));
603 if (bitpos % BITS_PER_UNIT == 0
604 && bitsize % BITS_PER_UNIT == 0
605 && init_symbolic_number (n, TREE_OPERAND (rhs1, 0)))
606 {
607 /* Handle big-endian bit numbering in BIT_FIELD_REF. */
608 if (BYTES_BIG_ENDIAN)
609 bitpos = TYPE_PRECISION (n->type) - bitpos - bitsize;
610
611 /* Shift. */
612 if (!do_shift_rotate (RSHIFT_EXPR, n, bitpos))
613 return NULL;
614
615 /* Mask. */
616 uint64_t mask = 0;
617 uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
618 for (unsigned i = 0; i < bitsize / BITS_PER_UNIT;
619 i++, tmp <<= BITS_PER_UNIT)
620 mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
621 n->n &= mask;
622
623 /* Convert. */
624 n->type = TREE_TYPE (rhs1);
625 if (!n->base_addr)
626 n->range = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
627
628 return verify_symbolic_number_p (n, stmt) ? stmt : NULL;
629 }
630
631 return NULL;
632 }
633
634 if (TREE_CODE (rhs1) != SSA_NAME)
635 return NULL;
636
637 code = gimple_assign_rhs_code (stmt);
638 rhs_class = gimple_assign_rhs_class (stmt);
639 rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
640
641 if (rhs_class == GIMPLE_BINARY_RHS)
642 rhs2 = gimple_assign_rhs2 (stmt);
643
644 /* Handle unary rhs and binary rhs with integer constants as second
645 operand. */
646
647 if (rhs_class == GIMPLE_UNARY_RHS
648 || (rhs_class == GIMPLE_BINARY_RHS
649 && TREE_CODE (rhs2) == INTEGER_CST))
650 {
651 if (code != BIT_AND_EXPR
652 && code != LSHIFT_EXPR
653 && code != RSHIFT_EXPR
654 && code != LROTATE_EXPR
655 && code != RROTATE_EXPR
656 && !CONVERT_EXPR_CODE_P (code))
657 return NULL;
658
659 source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, n, limit - 1);
660
661 /* If find_bswap_or_nop_1 returned NULL, STMT is a leaf node and
662 we have to initialize the symbolic number. */
663 if (!source_stmt1)
664 {
665 if (gimple_assign_load_p (stmt)
666 || !init_symbolic_number (n, rhs1))
667 return NULL;
668 source_stmt1 = stmt;
669 }
670
671 switch (code)
672 {
673 case BIT_AND_EXPR:
674 {
675 int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
676 uint64_t val = int_cst_value (rhs2), mask = 0;
677 uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
678
679 /* Only constants masking full bytes are allowed. */
680 for (i = 0; i < size; i++, tmp <<= BITS_PER_UNIT)
681 if ((val & tmp) != 0 && (val & tmp) != tmp)
682 return NULL;
683 else if (val & tmp)
684 mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
685
686 n->n &= mask;
687 }
688 break;
689 case LSHIFT_EXPR:
690 case RSHIFT_EXPR:
691 case LROTATE_EXPR:
692 case RROTATE_EXPR:
693 if (!do_shift_rotate (code, n, (int) TREE_INT_CST_LOW (rhs2)))
694 return NULL;
695 break;
696 CASE_CONVERT:
697 {
698 int i, type_size, old_type_size;
699 tree type;
700
701 type = gimple_expr_type (stmt);
702 type_size = TYPE_PRECISION (type);
703 if (type_size % BITS_PER_UNIT != 0)
704 return NULL;
705 type_size /= BITS_PER_UNIT;
706 if (type_size > 64 / BITS_PER_MARKER)
707 return NULL;
708
709 /* Sign extension: result is dependent on the value. */
710 old_type_size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
711 if (!TYPE_UNSIGNED (n->type) && type_size > old_type_size
712 && HEAD_MARKER (n->n, old_type_size))
713 for (i = 0; i < type_size - old_type_size; i++)
714 n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
715 << ((type_size - 1 - i) * BITS_PER_MARKER);
716
717 if (type_size < 64 / BITS_PER_MARKER)
718 {
719 /* If STMT casts to a smaller type mask out the bits not
720 belonging to the target type. */
721 n->n &= ((uint64_t) 1 << (type_size * BITS_PER_MARKER)) - 1;
722 }
723 n->type = type;
724 if (!n->base_addr)
725 n->range = type_size;
726 }
727 break;
728 default:
729 return NULL;
730 };
731 return verify_symbolic_number_p (n, stmt) ? source_stmt1 : NULL;
732 }
733
734 /* Handle binary rhs. */
735
736 if (rhs_class == GIMPLE_BINARY_RHS)
737 {
738 struct symbolic_number n1, n2;
739 gimple *source_stmt, *source_stmt2;
740
741 if (code != BIT_IOR_EXPR)
742 return NULL;
743
744 if (TREE_CODE (rhs2) != SSA_NAME)
745 return NULL;
746
747 rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
748
749 switch (code)
750 {
751 case BIT_IOR_EXPR:
752 source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, &n1, limit - 1);
753
754 if (!source_stmt1)
755 return NULL;
756
757 source_stmt2 = find_bswap_or_nop_1 (rhs2_stmt, &n2, limit - 1);
758
759 if (!source_stmt2)
760 return NULL;
761
762 if (TYPE_PRECISION (n1.type) != TYPE_PRECISION (n2.type))
763 return NULL;
764
509ab8cd 765 if (n1.vuse != n2.vuse)
4aba7bd3 766 return NULL;
767
768 source_stmt
769 = perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, n);
770
771 if (!source_stmt)
772 return NULL;
773
774 if (!verify_symbolic_number_p (n, stmt))
775 return NULL;
776
777 break;
778 default:
779 return NULL;
780 }
781 return source_stmt;
782 }
783 return NULL;
784}
785
509ab8cd 786/* Helper for find_bswap_or_nop and try_coalesce_bswap to compute
787 *CMPXCHG, *CMPNOP and adjust *N. */
4aba7bd3 788
509ab8cd 789void
790find_bswap_or_nop_finalize (struct symbolic_number *n, uint64_t *cmpxchg,
791 uint64_t *cmpnop)
4aba7bd3 792{
793 unsigned rsize;
794 uint64_t tmpn, mask;
4aba7bd3 795
509ab8cd 796 /* The number which the find_bswap_or_nop_1 result should match in order
797 to have a full byte swap. The number is shifted to the right
798 according to the size of the symbolic number before using it. */
799 *cmpxchg = CMPXCHG;
800 *cmpnop = CMPNOP;
4aba7bd3 801
802 /* Find real size of result (highest non-zero byte). */
803 if (n->base_addr)
804 for (tmpn = n->n, rsize = 0; tmpn; tmpn >>= BITS_PER_MARKER, rsize++);
805 else
806 rsize = n->range;
807
808 /* Zero out the bits corresponding to untouched bytes in original gimple
809 expression. */
810 if (n->range < (int) sizeof (int64_t))
811 {
812 mask = ((uint64_t) 1 << (n->range * BITS_PER_MARKER)) - 1;
509ab8cd 813 *cmpxchg >>= (64 / BITS_PER_MARKER - n->range) * BITS_PER_MARKER;
814 *cmpnop &= mask;
4aba7bd3 815 }
816
817 /* Zero out the bits corresponding to unused bytes in the result of the
818 gimple expression. */
819 if (rsize < n->range)
820 {
821 if (BYTES_BIG_ENDIAN)
822 {
823 mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
509ab8cd 824 *cmpxchg &= mask;
825 *cmpnop >>= (n->range - rsize) * BITS_PER_MARKER;
4aba7bd3 826 }
827 else
828 {
829 mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
509ab8cd 830 *cmpxchg >>= (n->range - rsize) * BITS_PER_MARKER;
831 *cmpnop &= mask;
4aba7bd3 832 }
833 n->range = rsize;
834 }
835
509ab8cd 836 n->range *= BITS_PER_UNIT;
837}
838
839/* Check if STMT completes a bswap implementation or a read in a given
840 endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP
841 accordingly. It also sets N to represent the kind of operations
842 performed: size of the resulting expression and whether it works on
843 a memory source, and if so alias-set and vuse. At last, the
844 function returns a stmt whose rhs's first tree is the source
845 expression. */
846
847gimple *
848find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap)
849{
850 /* The last parameter determines the depth search limit. It usually
851 correlates directly to the number n of bytes to be touched. We
852 increase that number by log2(n) + 1 here in order to also
853 cover signed -> unsigned conversions of the src operand as can be seen
854 in libgcc, and for initial shift/and operation of the src operand. */
855 int limit = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt)));
856 limit += 1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit);
857 gimple *ins_stmt = find_bswap_or_nop_1 (stmt, n, limit);
858
859 if (!ins_stmt)
860 return NULL;
861
862 uint64_t cmpxchg, cmpnop;
863 find_bswap_or_nop_finalize (n, &cmpxchg, &cmpnop);
864
4aba7bd3 865 /* A complete byte swap should make the symbolic number to start with
866 the largest digit in the highest order byte. Unchanged symbolic
867 number indicates a read with same endianness as target architecture. */
868 if (n->n == cmpnop)
869 *bswap = false;
870 else if (n->n == cmpxchg)
871 *bswap = true;
872 else
873 return NULL;
874
875 /* Useless bit manipulation performed by code. */
876 if (!n->base_addr && n->n == cmpnop && n->n_ops == 1)
877 return NULL;
878
4aba7bd3 879 return ins_stmt;
880}
881
882const pass_data pass_data_optimize_bswap =
883{
884 GIMPLE_PASS, /* type */
885 "bswap", /* name */
886 OPTGROUP_NONE, /* optinfo_flags */
887 TV_NONE, /* tv_id */
888 PROP_ssa, /* properties_required */
889 0, /* properties_provided */
890 0, /* properties_destroyed */
891 0, /* todo_flags_start */
892 0, /* todo_flags_finish */
893};
894
895class pass_optimize_bswap : public gimple_opt_pass
896{
897public:
898 pass_optimize_bswap (gcc::context *ctxt)
899 : gimple_opt_pass (pass_data_optimize_bswap, ctxt)
900 {}
901
902 /* opt_pass methods: */
903 virtual bool gate (function *)
904 {
905 return flag_expensive_optimizations && optimize && BITS_PER_UNIT == 8;
906 }
907
908 virtual unsigned int execute (function *);
909
910}; // class pass_optimize_bswap
911
912/* Perform the bswap optimization: replace the expression computed in the rhs
509ab8cd 913 of gsi_stmt (GSI) (or if NULL add instead of replace) by an equivalent
914 bswap, load or load + bswap expression.
4aba7bd3 915 Which of these alternatives replace the rhs is given by N->base_addr (non
916 null if a load is needed) and BSWAP. The type, VUSE and set-alias of the
917 load to perform are also given in N while the builtin bswap invoke is given
509ab8cd 918 in FNDEL. Finally, if a load is involved, INS_STMT refers to one of the
919 load statements involved to construct the rhs in gsi_stmt (GSI) and
920 N->range gives the size of the rhs expression for maintaining some
921 statistics.
4aba7bd3 922
509ab8cd 923 Note that if the replacement involve a load and if gsi_stmt (GSI) is
924 non-NULL, that stmt is moved just after INS_STMT to do the load with the
925 same VUSE which can lead to gsi_stmt (GSI) changing of basic block. */
4aba7bd3 926
509ab8cd 927tree
928bswap_replace (gimple_stmt_iterator gsi, gimple *ins_stmt, tree fndecl,
4aba7bd3 929 tree bswap_type, tree load_type, struct symbolic_number *n,
930 bool bswap)
931{
509ab8cd 932 tree src, tmp, tgt = NULL_TREE;
4aba7bd3 933 gimple *bswap_stmt;
934
509ab8cd 935 gimple *cur_stmt = gsi_stmt (gsi);
4aba7bd3 936 src = n->src;
509ab8cd 937 if (cur_stmt)
938 tgt = gimple_assign_lhs (cur_stmt);
4aba7bd3 939
940 /* Need to load the value from memory first. */
941 if (n->base_addr)
942 {
509ab8cd 943 gimple_stmt_iterator gsi_ins = gsi;
944 if (ins_stmt)
945 gsi_ins = gsi_for_stmt (ins_stmt);
4aba7bd3 946 tree addr_expr, addr_tmp, val_expr, val_tmp;
947 tree load_offset_ptr, aligned_load_type;
509ab8cd 948 gimple *load_stmt;
949 unsigned align = get_object_alignment (src);
08454aa5 950 poly_int64 load_offset = 0;
4aba7bd3 951
509ab8cd 952 if (cur_stmt)
953 {
954 basic_block ins_bb = gimple_bb (ins_stmt);
955 basic_block cur_bb = gimple_bb (cur_stmt);
956 if (!dominated_by_p (CDI_DOMINATORS, cur_bb, ins_bb))
957 return NULL_TREE;
958
959 /* Move cur_stmt just before one of the load of the original
960 to ensure it has the same VUSE. See PR61517 for what could
961 go wrong. */
962 if (gimple_bb (cur_stmt) != gimple_bb (ins_stmt))
963 reset_flow_sensitive_info (gimple_assign_lhs (cur_stmt));
964 gsi_move_before (&gsi, &gsi_ins);
965 gsi = gsi_for_stmt (cur_stmt);
966 }
967 else
968 gsi = gsi_ins;
4aba7bd3 969
970 /* Compute address to load from and cast according to the size
971 of the load. */
509ab8cd 972 addr_expr = build_fold_addr_expr (src);
4aba7bd3 973 if (is_gimple_mem_ref_addr (addr_expr))
509ab8cd 974 addr_tmp = unshare_expr (addr_expr);
4aba7bd3 975 else
976 {
509ab8cd 977 addr_tmp = unshare_expr (n->base_addr);
978 if (!is_gimple_mem_ref_addr (addr_tmp))
979 addr_tmp = force_gimple_operand_gsi_1 (&gsi, addr_tmp,
980 is_gimple_mem_ref_addr,
981 NULL_TREE, true,
982 GSI_SAME_STMT);
983 load_offset = n->bytepos;
984 if (n->offset)
985 {
986 tree off
987 = force_gimple_operand_gsi (&gsi, unshare_expr (n->offset),
988 true, NULL_TREE, true,
989 GSI_SAME_STMT);
990 gimple *stmt
991 = gimple_build_assign (make_ssa_name (TREE_TYPE (addr_tmp)),
992 POINTER_PLUS_EXPR, addr_tmp, off);
993 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
994 addr_tmp = gimple_assign_lhs (stmt);
995 }
4aba7bd3 996 }
997
998 /* Perform the load. */
999 aligned_load_type = load_type;
1000 if (align < TYPE_ALIGN (load_type))
1001 aligned_load_type = build_aligned_type (load_type, align);
1002 load_offset_ptr = build_int_cst (n->alias_set, load_offset);
1003 val_expr = fold_build2 (MEM_REF, aligned_load_type, addr_tmp,
1004 load_offset_ptr);
1005
1006 if (!bswap)
1007 {
1008 if (n->range == 16)
1009 nop_stats.found_16bit++;
1010 else if (n->range == 32)
1011 nop_stats.found_32bit++;
1012 else
1013 {
1014 gcc_assert (n->range == 64);
1015 nop_stats.found_64bit++;
1016 }
1017
1018 /* Convert the result of load if necessary. */
509ab8cd 1019 if (tgt && !useless_type_conversion_p (TREE_TYPE (tgt), load_type))
4aba7bd3 1020 {
1021 val_tmp = make_temp_ssa_name (aligned_load_type, NULL,
1022 "load_dst");
1023 load_stmt = gimple_build_assign (val_tmp, val_expr);
1024 gimple_set_vuse (load_stmt, n->vuse);
1025 gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
1026 gimple_assign_set_rhs_with_ops (&gsi, NOP_EXPR, val_tmp);
509ab8cd 1027 update_stmt (cur_stmt);
4aba7bd3 1028 }
509ab8cd 1029 else if (cur_stmt)
4aba7bd3 1030 {
1031 gimple_assign_set_rhs_with_ops (&gsi, MEM_REF, val_expr);
1032 gimple_set_vuse (cur_stmt, n->vuse);
509ab8cd 1033 update_stmt (cur_stmt);
1034 }
1035 else
1036 {
1037 tgt = make_ssa_name (load_type);
1038 cur_stmt = gimple_build_assign (tgt, MEM_REF, val_expr);
1039 gimple_set_vuse (cur_stmt, n->vuse);
1040 gsi_insert_before (&gsi, cur_stmt, GSI_SAME_STMT);
4aba7bd3 1041 }
4aba7bd3 1042
1043 if (dump_file)
1044 {
1045 fprintf (dump_file,
1046 "%d bit load in target endianness found at: ",
1047 (int) n->range);
1048 print_gimple_stmt (dump_file, cur_stmt, 0);
1049 }
509ab8cd 1050 return tgt;
4aba7bd3 1051 }
1052 else
1053 {
1054 val_tmp = make_temp_ssa_name (aligned_load_type, NULL, "load_dst");
1055 load_stmt = gimple_build_assign (val_tmp, val_expr);
1056 gimple_set_vuse (load_stmt, n->vuse);
1057 gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
1058 }
1059 src = val_tmp;
1060 }
1061 else if (!bswap)
1062 {
509ab8cd 1063 gimple *g = NULL;
1064 if (tgt && !useless_type_conversion_p (TREE_TYPE (tgt), TREE_TYPE (src)))
4aba7bd3 1065 {
1066 if (!is_gimple_val (src))
509ab8cd 1067 return NULL_TREE;
4aba7bd3 1068 g = gimple_build_assign (tgt, NOP_EXPR, src);
1069 }
509ab8cd 1070 else if (cur_stmt)
4aba7bd3 1071 g = gimple_build_assign (tgt, src);
509ab8cd 1072 else
1073 tgt = src;
4aba7bd3 1074 if (n->range == 16)
1075 nop_stats.found_16bit++;
1076 else if (n->range == 32)
1077 nop_stats.found_32bit++;
1078 else
1079 {
1080 gcc_assert (n->range == 64);
1081 nop_stats.found_64bit++;
1082 }
1083 if (dump_file)
1084 {
1085 fprintf (dump_file,
1086 "%d bit reshuffle in target endianness found at: ",
1087 (int) n->range);
509ab8cd 1088 if (cur_stmt)
1089 print_gimple_stmt (dump_file, cur_stmt, 0);
1090 else
1091 {
54e7de93 1092 print_generic_expr (dump_file, tgt, TDF_NONE);
509ab8cd 1093 fprintf (dump_file, "\n");
1094 }
4aba7bd3 1095 }
509ab8cd 1096 if (cur_stmt)
1097 gsi_replace (&gsi, g, true);
1098 return tgt;
4aba7bd3 1099 }
1100 else if (TREE_CODE (src) == BIT_FIELD_REF)
1101 src = TREE_OPERAND (src, 0);
1102
1103 if (n->range == 16)
1104 bswap_stats.found_16bit++;
1105 else if (n->range == 32)
1106 bswap_stats.found_32bit++;
1107 else
1108 {
1109 gcc_assert (n->range == 64);
1110 bswap_stats.found_64bit++;
1111 }
1112
1113 tmp = src;
1114
1115 /* Convert the src expression if necessary. */
1116 if (!useless_type_conversion_p (TREE_TYPE (tmp), bswap_type))
1117 {
1118 gimple *convert_stmt;
1119
1120 tmp = make_temp_ssa_name (bswap_type, NULL, "bswapsrc");
1121 convert_stmt = gimple_build_assign (tmp, NOP_EXPR, src);
1122 gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT);
1123 }
1124
1125 /* Canonical form for 16 bit bswap is a rotate expression. Only 16bit values
1126 are considered as rotation of 2N bit values by N bits is generally not
1127 equivalent to a bswap. Consider for instance 0x01020304 r>> 16 which
1128 gives 0x03040102 while a bswap for that value is 0x04030201. */
1129 if (bswap && n->range == 16)
1130 {
1131 tree count = build_int_cst (NULL, BITS_PER_UNIT);
1132 src = fold_build2 (LROTATE_EXPR, bswap_type, tmp, count);
1133 bswap_stmt = gimple_build_assign (NULL, src);
1134 }
1135 else
1136 bswap_stmt = gimple_build_call (fndecl, 1, tmp);
1137
509ab8cd 1138 if (tgt == NULL_TREE)
1139 tgt = make_ssa_name (bswap_type);
4aba7bd3 1140 tmp = tgt;
1141
1142 /* Convert the result if necessary. */
1143 if (!useless_type_conversion_p (TREE_TYPE (tgt), bswap_type))
1144 {
1145 gimple *convert_stmt;
1146
1147 tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
1148 convert_stmt = gimple_build_assign (tgt, NOP_EXPR, tmp);
1149 gsi_insert_after (&gsi, convert_stmt, GSI_SAME_STMT);
1150 }
1151
1152 gimple_set_lhs (bswap_stmt, tmp);
1153
1154 if (dump_file)
1155 {
1156 fprintf (dump_file, "%d bit bswap implementation found at: ",
1157 (int) n->range);
509ab8cd 1158 if (cur_stmt)
1159 print_gimple_stmt (dump_file, cur_stmt, 0);
1160 else
1161 {
54e7de93 1162 print_generic_expr (dump_file, tgt, TDF_NONE);
509ab8cd 1163 fprintf (dump_file, "\n");
1164 }
4aba7bd3 1165 }
1166
509ab8cd 1167 if (cur_stmt)
1168 {
1169 gsi_insert_after (&gsi, bswap_stmt, GSI_SAME_STMT);
1170 gsi_remove (&gsi, true);
1171 }
1172 else
1173 gsi_insert_before (&gsi, bswap_stmt, GSI_SAME_STMT);
1174 return tgt;
4aba7bd3 1175}
1176
1177/* Find manual byte swap implementations as well as load in a given
1178 endianness. Byte swaps are turned into a bswap builtin invokation
1179 while endian loads are converted to bswap builtin invokation or
1180 simple load according to the target endianness. */
1181
1182unsigned int
1183pass_optimize_bswap::execute (function *fun)
1184{
1185 basic_block bb;
1186 bool bswap32_p, bswap64_p;
1187 bool changed = false;
1188 tree bswap32_type = NULL_TREE, bswap64_type = NULL_TREE;
1189
1190 bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
1191 && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing);
1192 bswap64_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
1193 && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
1194 || (bswap32_p && word_mode == SImode)));
1195
1196 /* Determine the argument type of the builtins. The code later on
1197 assumes that the return and argument type are the same. */
1198 if (bswap32_p)
1199 {
1200 tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1201 bswap32_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1202 }
1203
1204 if (bswap64_p)
1205 {
1206 tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1207 bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1208 }
1209
1210 memset (&nop_stats, 0, sizeof (nop_stats));
1211 memset (&bswap_stats, 0, sizeof (bswap_stats));
1212 calculate_dominance_info (CDI_DOMINATORS);
1213
1214 FOR_EACH_BB_FN (bb, fun)
1215 {
1216 gimple_stmt_iterator gsi;
1217
1218 /* We do a reverse scan for bswap patterns to make sure we get the
1219 widest match. As bswap pattern matching doesn't handle previously
1220 inserted smaller bswap replacements as sub-patterns, the wider
1221 variant wouldn't be detected. */
1222 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi);)
1223 {
1224 gimple *ins_stmt, *cur_stmt = gsi_stmt (gsi);
1225 tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
1226 enum tree_code code;
1227 struct symbolic_number n;
1228 bool bswap;
1229
1230 /* This gsi_prev (&gsi) is not part of the for loop because cur_stmt
1231 might be moved to a different basic block by bswap_replace and gsi
1232 must not points to it if that's the case. Moving the gsi_prev
1233 there make sure that gsi points to the statement previous to
1234 cur_stmt while still making sure that all statements are
1235 considered in this basic block. */
1236 gsi_prev (&gsi);
1237
1238 if (!is_gimple_assign (cur_stmt))
1239 continue;
1240
1241 code = gimple_assign_rhs_code (cur_stmt);
1242 switch (code)
1243 {
1244 case LROTATE_EXPR:
1245 case RROTATE_EXPR:
1246 if (!tree_fits_uhwi_p (gimple_assign_rhs2 (cur_stmt))
1247 || tree_to_uhwi (gimple_assign_rhs2 (cur_stmt))
1248 % BITS_PER_UNIT)
1249 continue;
1250 /* Fall through. */
1251 case BIT_IOR_EXPR:
1252 break;
1253 default:
1254 continue;
1255 }
1256
1257 ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap);
1258
1259 if (!ins_stmt)
1260 continue;
1261
1262 switch (n.range)
1263 {
1264 case 16:
1265 /* Already in canonical form, nothing to do. */
1266 if (code == LROTATE_EXPR || code == RROTATE_EXPR)
1267 continue;
1268 load_type = bswap_type = uint16_type_node;
1269 break;
1270 case 32:
1271 load_type = uint32_type_node;
1272 if (bswap32_p)
1273 {
1274 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1275 bswap_type = bswap32_type;
1276 }
1277 break;
1278 case 64:
1279 load_type = uint64_type_node;
1280 if (bswap64_p)
1281 {
1282 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1283 bswap_type = bswap64_type;
1284 }
1285 break;
1286 default:
1287 continue;
1288 }
1289
1290 if (bswap && !fndecl && n.range != 16)
1291 continue;
1292
509ab8cd 1293 if (bswap_replace (gsi_for_stmt (cur_stmt), ins_stmt, fndecl,
1294 bswap_type, load_type, &n, bswap))
4aba7bd3 1295 changed = true;
1296 }
1297 }
1298
1299 statistics_counter_event (fun, "16-bit nop implementations found",
1300 nop_stats.found_16bit);
1301 statistics_counter_event (fun, "32-bit nop implementations found",
1302 nop_stats.found_32bit);
1303 statistics_counter_event (fun, "64-bit nop implementations found",
1304 nop_stats.found_64bit);
1305 statistics_counter_event (fun, "16-bit bswap implementations found",
1306 bswap_stats.found_16bit);
1307 statistics_counter_event (fun, "32-bit bswap implementations found",
1308 bswap_stats.found_32bit);
1309 statistics_counter_event (fun, "64-bit bswap implementations found",
1310 bswap_stats.found_64bit);
1311
1312 return (changed ? TODO_update_ssa : 0);
1313}
1314
1315} // anon namespace
1316
1317gimple_opt_pass *
1318make_pass_optimize_bswap (gcc::context *ctxt)
1319{
1320 return new pass_optimize_bswap (ctxt);
1321}
1322
1323namespace {
1324
9991d1d3 1325/* Struct recording one operand for the store, which is either a constant,
10f0d48d 1326 then VAL represents the constant and all the other fields are zero, or
1327 a memory load, then VAL represents the reference, BASE_ADDR is non-NULL
1328 and the other fields also reflect the memory load, or an SSA name, then
1329 VAL represents the SSA name and all the other fields are zero, */
9991d1d3 1330
251317e4 1331class store_operand_info
9991d1d3 1332{
251317e4 1333public:
9991d1d3 1334 tree val;
1335 tree base_addr;
e61263f2 1336 poly_uint64 bitsize;
1337 poly_uint64 bitpos;
1338 poly_uint64 bitregion_start;
1339 poly_uint64 bitregion_end;
9991d1d3 1340 gimple *stmt;
c35548ce 1341 bool bit_not_p;
9991d1d3 1342 store_operand_info ();
1343};
1344
1345store_operand_info::store_operand_info ()
1346 : val (NULL_TREE), base_addr (NULL_TREE), bitsize (0), bitpos (0),
c35548ce 1347 bitregion_start (0), bitregion_end (0), stmt (NULL), bit_not_p (false)
9991d1d3 1348{
1349}
1350
3d3e04ac 1351/* Struct recording the information about a single store of an immediate
1352 to memory. These are created in the first phase and coalesced into
1353 merged_store_group objects in the second phase. */
1354
251317e4 1355class store_immediate_info
3d3e04ac 1356{
251317e4 1357public:
3d3e04ac 1358 unsigned HOST_WIDE_INT bitsize;
1359 unsigned HOST_WIDE_INT bitpos;
902cb3b7 1360 unsigned HOST_WIDE_INT bitregion_start;
1361 /* This is one past the last bit of the bit region. */
1362 unsigned HOST_WIDE_INT bitregion_end;
3d3e04ac 1363 gimple *stmt;
1364 unsigned int order;
10f0d48d 1365 /* INTEGER_CST for constant stores, MEM_REF for memory copy,
1366 BIT_*_EXPR for logical bitwise operation, BIT_INSERT_EXPR
1367 for bit insertion.
509ab8cd 1368 LROTATE_EXPR if it can be only bswap optimized and
1369 ops are not really meaningful.
1370 NOP_EXPR if bswap optimization detected identity, ops
1371 are not meaningful. */
9991d1d3 1372 enum tree_code rhs_code;
509ab8cd 1373 /* Two fields for bswap optimization purposes. */
1374 struct symbolic_number n;
1375 gimple *ins_stmt;
aa0a1d29 1376 /* True if BIT_{AND,IOR,XOR}_EXPR result is inverted before storing. */
832a73b9 1377 bool bit_not_p;
aa0a1d29 1378 /* True if ops have been swapped and thus ops[1] represents
1379 rhs1 of BIT_{AND,IOR,XOR}_EXPR and ops[0] represents rhs2. */
1380 bool ops_swapped_p;
393122a0 1381 /* The index number of the landing pad, or 0 if there is none. */
1382 int lp_nr;
9991d1d3 1383 /* Operands. For BIT_*_EXPR rhs_code both operands are used, otherwise
1384 just the first one. */
1385 store_operand_info ops[2];
f85e7cb7 1386 store_immediate_info (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
902cb3b7 1387 unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
509ab8cd 1388 gimple *, unsigned int, enum tree_code,
393122a0 1389 struct symbolic_number &, gimple *, bool, int,
9991d1d3 1390 const store_operand_info &,
1391 const store_operand_info &);
3d3e04ac 1392};
1393
1394store_immediate_info::store_immediate_info (unsigned HOST_WIDE_INT bs,
f85e7cb7 1395 unsigned HOST_WIDE_INT bp,
902cb3b7 1396 unsigned HOST_WIDE_INT brs,
1397 unsigned HOST_WIDE_INT bre,
f85e7cb7 1398 gimple *st,
9991d1d3 1399 unsigned int ord,
1400 enum tree_code rhscode,
509ab8cd 1401 struct symbolic_number &nr,
1402 gimple *ins_stmtp,
832a73b9 1403 bool bitnotp,
393122a0 1404 int nr2,
9991d1d3 1405 const store_operand_info &op0r,
1406 const store_operand_info &op1r)
902cb3b7 1407 : bitsize (bs), bitpos (bp), bitregion_start (brs), bitregion_end (bre),
509ab8cd 1408 stmt (st), order (ord), rhs_code (rhscode), n (nr),
393122a0 1409 ins_stmt (ins_stmtp), bit_not_p (bitnotp), ops_swapped_p (false),
1410 lp_nr (nr2)
9991d1d3 1411#if __cplusplus >= 201103L
1412 , ops { op0r, op1r }
1413{
1414}
1415#else
3d3e04ac 1416{
9991d1d3 1417 ops[0] = op0r;
1418 ops[1] = op1r;
3d3e04ac 1419}
9991d1d3 1420#endif
3d3e04ac 1421
1422/* Struct representing a group of stores to contiguous memory locations.
1423 These are produced by the second phase (coalescing) and consumed in the
1424 third phase that outputs the widened stores. */
1425
251317e4 1426class merged_store_group
3d3e04ac 1427{
251317e4 1428public:
3d3e04ac 1429 unsigned HOST_WIDE_INT start;
1430 unsigned HOST_WIDE_INT width;
902cb3b7 1431 unsigned HOST_WIDE_INT bitregion_start;
1432 unsigned HOST_WIDE_INT bitregion_end;
1433 /* The size of the allocated memory for val and mask. */
3d3e04ac 1434 unsigned HOST_WIDE_INT buf_size;
902cb3b7 1435 unsigned HOST_WIDE_INT align_base;
e61263f2 1436 poly_uint64 load_align_base[2];
3d3e04ac 1437
1438 unsigned int align;
9991d1d3 1439 unsigned int load_align[2];
3d3e04ac 1440 unsigned int first_order;
1441 unsigned int last_order;
f9ceb302 1442 bool bit_insertion;
e580254a 1443 bool only_constants;
1444 unsigned int first_nonmergeable_order;
393122a0 1445 int lp_nr;
3d3e04ac 1446
902cb3b7 1447 auto_vec<store_immediate_info *> stores;
3d3e04ac 1448 /* We record the first and last original statements in the sequence because
1449 we'll need their vuse/vdef and replacement position. It's easier to keep
1450 track of them separately as 'stores' is reordered by apply_stores. */
1451 gimple *last_stmt;
1452 gimple *first_stmt;
1453 unsigned char *val;
902cb3b7 1454 unsigned char *mask;
3d3e04ac 1455
1456 merged_store_group (store_immediate_info *);
1457 ~merged_store_group ();
f9ceb302 1458 bool can_be_merged_into (store_immediate_info *);
3d3e04ac 1459 void merge_into (store_immediate_info *);
1460 void merge_overlapping (store_immediate_info *);
1461 bool apply_stores ();
902cb3b7 1462private:
1463 void do_merge (store_immediate_info *);
3d3e04ac 1464};
1465
1466/* Debug helper. Dump LEN elements of byte array PTR to FD in hex. */
1467
1468static void
1469dump_char_array (FILE *fd, unsigned char *ptr, unsigned int len)
1470{
1471 if (!fd)
1472 return;
1473
1474 for (unsigned int i = 0; i < len; i++)
10f0d48d 1475 fprintf (fd, "%02x ", ptr[i]);
3d3e04ac 1476 fprintf (fd, "\n");
1477}
1478
3d3e04ac 1479/* Shift left the bytes in PTR of SZ elements by AMNT bits, carrying over the
1480 bits between adjacent elements. AMNT should be within
1481 [0, BITS_PER_UNIT).
1482 Example, AMNT = 2:
1483 00011111|11100000 << 2 = 01111111|10000000
1484 PTR[1] | PTR[0] PTR[1] | PTR[0]. */
1485
1486static void
1487shift_bytes_in_array (unsigned char *ptr, unsigned int sz, unsigned int amnt)
1488{
1489 if (amnt == 0)
1490 return;
1491
1492 unsigned char carry_over = 0U;
b1c71535 1493 unsigned char carry_mask = (~0U) << (unsigned char) (BITS_PER_UNIT - amnt);
3d3e04ac 1494 unsigned char clear_mask = (~0U) << amnt;
1495
1496 for (unsigned int i = 0; i < sz; i++)
1497 {
1498 unsigned prev_carry_over = carry_over;
b1c71535 1499 carry_over = (ptr[i] & carry_mask) >> (BITS_PER_UNIT - amnt);
3d3e04ac 1500
1501 ptr[i] <<= amnt;
1502 if (i != 0)
1503 {
1504 ptr[i] &= clear_mask;
1505 ptr[i] |= prev_carry_over;
1506 }
1507 }
1508}
1509
1510/* Like shift_bytes_in_array but for big-endian.
1511 Shift right the bytes in PTR of SZ elements by AMNT bits, carrying over the
1512 bits between adjacent elements. AMNT should be within
1513 [0, BITS_PER_UNIT).
1514 Example, AMNT = 2:
1515 00011111|11100000 >> 2 = 00000111|11111000
1516 PTR[0] | PTR[1] PTR[0] | PTR[1]. */
1517
1518static void
1519shift_bytes_in_array_right (unsigned char *ptr, unsigned int sz,
1520 unsigned int amnt)
1521{
1522 if (amnt == 0)
1523 return;
1524
1525 unsigned char carry_over = 0U;
1526 unsigned char carry_mask = ~(~0U << amnt);
1527
1528 for (unsigned int i = 0; i < sz; i++)
1529 {
1530 unsigned prev_carry_over = carry_over;
b1c71535 1531 carry_over = ptr[i] & carry_mask;
3d3e04ac 1532
a425d9af 1533 carry_over <<= (unsigned char) BITS_PER_UNIT - amnt;
1534 ptr[i] >>= amnt;
1535 ptr[i] |= prev_carry_over;
3d3e04ac 1536 }
1537}
1538
1539/* Clear out LEN bits starting from bit START in the byte array
1540 PTR. This clears the bits to the *right* from START.
1541 START must be within [0, BITS_PER_UNIT) and counts starting from
1542 the least significant bit. */
1543
1544static void
1545clear_bit_region_be (unsigned char *ptr, unsigned int start,
1546 unsigned int len)
1547{
1548 if (len == 0)
1549 return;
1550 /* Clear len bits to the right of start. */
1551 else if (len <= start + 1)
1552 {
1553 unsigned char mask = (~(~0U << len));
1554 mask = mask << (start + 1U - len);
1555 ptr[0] &= ~mask;
1556 }
1557 else if (start != BITS_PER_UNIT - 1)
1558 {
1559 clear_bit_region_be (ptr, start, (start % BITS_PER_UNIT) + 1);
1560 clear_bit_region_be (ptr + 1, BITS_PER_UNIT - 1,
1561 len - (start % BITS_PER_UNIT) - 1);
1562 }
1563 else if (start == BITS_PER_UNIT - 1
1564 && len > BITS_PER_UNIT)
1565 {
1566 unsigned int nbytes = len / BITS_PER_UNIT;
902cb3b7 1567 memset (ptr, 0, nbytes);
3d3e04ac 1568 if (len % BITS_PER_UNIT != 0)
1569 clear_bit_region_be (ptr + nbytes, BITS_PER_UNIT - 1,
1570 len % BITS_PER_UNIT);
1571 }
1572 else
1573 gcc_unreachable ();
1574}
1575
1576/* In the byte array PTR clear the bit region starting at bit
1577 START and is LEN bits wide.
1578 For regions spanning multiple bytes do this recursively until we reach
1579 zero LEN or a region contained within a single byte. */
1580
1581static void
1582clear_bit_region (unsigned char *ptr, unsigned int start,
1583 unsigned int len)
1584{
1585 /* Degenerate base case. */
1586 if (len == 0)
1587 return;
1588 else if (start >= BITS_PER_UNIT)
1589 clear_bit_region (ptr + 1, start - BITS_PER_UNIT, len);
1590 /* Second base case. */
1591 else if ((start + len) <= BITS_PER_UNIT)
1592 {
b1c71535 1593 unsigned char mask = (~0U) << (unsigned char) (BITS_PER_UNIT - len);
3d3e04ac 1594 mask >>= BITS_PER_UNIT - (start + len);
1595
1596 ptr[0] &= ~mask;
1597
1598 return;
1599 }
1600 /* Clear most significant bits in a byte and proceed with the next byte. */
1601 else if (start != 0)
1602 {
1603 clear_bit_region (ptr, start, BITS_PER_UNIT - start);
3d6071e9 1604 clear_bit_region (ptr + 1, 0, len - (BITS_PER_UNIT - start));
3d3e04ac 1605 }
1606 /* Whole bytes need to be cleared. */
1607 else if (start == 0 && len > BITS_PER_UNIT)
1608 {
1609 unsigned int nbytes = len / BITS_PER_UNIT;
7839cdcc 1610 /* We could recurse on each byte but we clear whole bytes, so a simple
1611 memset will do. */
b1c71535 1612 memset (ptr, '\0', nbytes);
3d3e04ac 1613 /* Clear the remaining sub-byte region if there is one. */
1614 if (len % BITS_PER_UNIT != 0)
1615 clear_bit_region (ptr + nbytes, 0, len % BITS_PER_UNIT);
1616 }
1617 else
1618 gcc_unreachable ();
1619}
1620
1621/* Write BITLEN bits of EXPR to the byte array PTR at
1622 bit position BITPOS. PTR should contain TOTAL_BYTES elements.
1623 Return true if the operation succeeded. */
1624
1625static bool
1626encode_tree_to_bitpos (tree expr, unsigned char *ptr, int bitlen, int bitpos,
b1c71535 1627 unsigned int total_bytes)
3d3e04ac 1628{
1629 unsigned int first_byte = bitpos / BITS_PER_UNIT;
a425d9af 1630 bool sub_byte_op_p = ((bitlen % BITS_PER_UNIT)
1631 || (bitpos % BITS_PER_UNIT)
517be012 1632 || !int_mode_for_size (bitlen, 0).exists ());
e1eef457 1633 bool empty_ctor_p
1634 = (TREE_CODE (expr) == CONSTRUCTOR
1635 && CONSTRUCTOR_NELTS (expr) == 0
1636 && TYPE_SIZE_UNIT (TREE_TYPE (expr))
1637 && tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (expr))));
3d3e04ac 1638
1639 if (!sub_byte_op_p)
e1eef457 1640 {
1641 if (first_byte >= total_bytes)
1642 return false;
1643 total_bytes -= first_byte;
1644 if (empty_ctor_p)
1645 {
1646 unsigned HOST_WIDE_INT rhs_bytes
1647 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)));
1648 if (rhs_bytes > total_bytes)
1649 return false;
1650 memset (ptr + first_byte, '\0', rhs_bytes);
1651 return true;
1652 }
1653 return native_encode_expr (expr, ptr + first_byte, total_bytes) != 0;
1654 }
3d3e04ac 1655
1656 /* LITTLE-ENDIAN
1657 We are writing a non byte-sized quantity or at a position that is not
1658 at a byte boundary.
1659 |--------|--------|--------| ptr + first_byte
1660 ^ ^
1661 xxx xxxxxxxx xxx< bp>
1662 |______EXPR____|
1663
b1c71535 1664 First native_encode_expr EXPR into a temporary buffer and shift each
3d3e04ac 1665 byte in the buffer by 'bp' (carrying the bits over as necessary).
1666 |00000000|00xxxxxx|xxxxxxxx| << bp = |000xxxxx|xxxxxxxx|xxx00000|
1667 <------bitlen---->< bp>
1668 Then we clear the destination bits:
1669 |---00000|00000000|000-----| ptr + first_byte
1670 <-------bitlen--->< bp>
1671
1672 Finally we ORR the bytes of the shifted EXPR into the cleared region:
1673 |---xxxxx||xxxxxxxx||xxx-----| ptr + first_byte.
1674
1675 BIG-ENDIAN
1676 We are writing a non byte-sized quantity or at a position that is not
1677 at a byte boundary.
1678 ptr + first_byte |--------|--------|--------|
1679 ^ ^
1680 <bp >xxx xxxxxxxx xxx
1681 |_____EXPR_____|
1682
b1c71535 1683 First native_encode_expr EXPR into a temporary buffer and shift each
3d3e04ac 1684 byte in the buffer to the right by (carrying the bits over as necessary).
1685 We shift by as much as needed to align the most significant bit of EXPR
1686 with bitpos:
1687 |00xxxxxx|xxxxxxxx| >> 3 = |00000xxx|xxxxxxxx|xxxxx000|
1688 <---bitlen----> <bp ><-----bitlen----->
1689 Then we clear the destination bits:
1690 ptr + first_byte |-----000||00000000||00000---|
1691 <bp ><-------bitlen----->
1692
1693 Finally we ORR the bytes of the shifted EXPR into the cleared region:
1694 ptr + first_byte |---xxxxx||xxxxxxxx||xxx-----|.
1695 The awkwardness comes from the fact that bitpos is counted from the
1696 most significant bit of a byte. */
1697
d2401312 1698 /* We must be dealing with fixed-size data at this point, since the
1699 total size is also fixed. */
e1eef457 1700 unsigned int byte_size;
1701 if (empty_ctor_p)
1702 {
1703 unsigned HOST_WIDE_INT rhs_bytes
1704 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)));
1705 if (rhs_bytes > total_bytes)
1706 return false;
1707 byte_size = rhs_bytes;
1708 }
1709 else
1710 {
1711 fixed_size_mode mode
1712 = as_a <fixed_size_mode> (TYPE_MODE (TREE_TYPE (expr)));
1713 byte_size = GET_MODE_SIZE (mode);
1714 }
3d3e04ac 1715 /* Allocate an extra byte so that we have space to shift into. */
e1eef457 1716 byte_size++;
3d3e04ac 1717 unsigned char *tmpbuf = XALLOCAVEC (unsigned char, byte_size);
b1c71535 1718 memset (tmpbuf, '\0', byte_size);
3d3e04ac 1719 /* The store detection code should only have allowed constants that are
e1eef457 1720 accepted by native_encode_expr or empty ctors. */
1721 if (!empty_ctor_p
1722 && native_encode_expr (expr, tmpbuf, byte_size - 1) == 0)
3d3e04ac 1723 gcc_unreachable ();
1724
1725 /* The native_encode_expr machinery uses TYPE_MODE to determine how many
1726 bytes to write. This means it can write more than
1727 ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT bytes (for example
1728 write 8 bytes for a bitlen of 40). Skip the bytes that are not within
1729 bitlen and zero out the bits that are not relevant as well (that may
1730 contain a sign bit due to sign-extension). */
1731 unsigned int padding
1732 = byte_size - ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT - 1;
a425d9af 1733 /* On big-endian the padding is at the 'front' so just skip the initial
1734 bytes. */
1735 if (BYTES_BIG_ENDIAN)
1736 tmpbuf += padding;
1737
1738 byte_size -= padding;
1739
1740 if (bitlen % BITS_PER_UNIT != 0)
3d3e04ac 1741 {
5e922e43 1742 if (BYTES_BIG_ENDIAN)
a425d9af 1743 clear_bit_region_be (tmpbuf, BITS_PER_UNIT - 1,
1744 BITS_PER_UNIT - (bitlen % BITS_PER_UNIT));
1745 else
1746 clear_bit_region (tmpbuf, bitlen,
1747 byte_size * BITS_PER_UNIT - bitlen);
3d3e04ac 1748 }
a425d9af 1749 /* Left shifting relies on the last byte being clear if bitlen is
1750 a multiple of BITS_PER_UNIT, which might not be clear if
1751 there are padding bytes. */
1752 else if (!BYTES_BIG_ENDIAN)
1753 tmpbuf[byte_size - 1] = '\0';
3d3e04ac 1754
1755 /* Clear the bit region in PTR where the bits from TMPBUF will be
b1c71535 1756 inserted into. */
3d3e04ac 1757 if (BYTES_BIG_ENDIAN)
1758 clear_bit_region_be (ptr + first_byte,
1759 BITS_PER_UNIT - 1 - (bitpos % BITS_PER_UNIT), bitlen);
1760 else
1761 clear_bit_region (ptr + first_byte, bitpos % BITS_PER_UNIT, bitlen);
1762
1763 int shift_amnt;
1764 int bitlen_mod = bitlen % BITS_PER_UNIT;
1765 int bitpos_mod = bitpos % BITS_PER_UNIT;
1766
1767 bool skip_byte = false;
1768 if (BYTES_BIG_ENDIAN)
1769 {
1770 /* BITPOS and BITLEN are exactly aligned and no shifting
1771 is necessary. */
1772 if (bitpos_mod + bitlen_mod == BITS_PER_UNIT
1773 || (bitpos_mod == 0 && bitlen_mod == 0))
1774 shift_amnt = 0;
1775 /* |. . . . . . . .|
1776 <bp > <blen >.
1777 We always shift right for BYTES_BIG_ENDIAN so shift the beginning
1778 of the value until it aligns with 'bp' in the next byte over. */
1779 else if (bitpos_mod + bitlen_mod < BITS_PER_UNIT)
1780 {
1781 shift_amnt = bitlen_mod + bitpos_mod;
1782 skip_byte = bitlen_mod != 0;
1783 }
1784 /* |. . . . . . . .|
1785 <----bp--->
1786 <---blen---->.
1787 Shift the value right within the same byte so it aligns with 'bp'. */
1788 else
1789 shift_amnt = bitlen_mod + bitpos_mod - BITS_PER_UNIT;
1790 }
1791 else
1792 shift_amnt = bitpos % BITS_PER_UNIT;
1793
1794 /* Create the shifted version of EXPR. */
1795 if (!BYTES_BIG_ENDIAN)
b1c71535 1796 {
1797 shift_bytes_in_array (tmpbuf, byte_size, shift_amnt);
1798 if (shift_amnt == 0)
1799 byte_size--;
1800 }
3d3e04ac 1801 else
1802 {
1803 gcc_assert (BYTES_BIG_ENDIAN);
1804 shift_bytes_in_array_right (tmpbuf, byte_size, shift_amnt);
1805 /* If shifting right forced us to move into the next byte skip the now
1806 empty byte. */
1807 if (skip_byte)
1808 {
1809 tmpbuf++;
1810 byte_size--;
1811 }
1812 }
1813
1814 /* Insert the bits from TMPBUF. */
1815 for (unsigned int i = 0; i < byte_size; i++)
1816 ptr[first_byte + i] |= tmpbuf[i];
1817
1818 return true;
1819}
1820
1821/* Sorting function for store_immediate_info objects.
1822 Sorts them by bitposition. */
1823
1824static int
1825sort_by_bitpos (const void *x, const void *y)
1826{
1827 store_immediate_info *const *tmp = (store_immediate_info * const *) x;
1828 store_immediate_info *const *tmp2 = (store_immediate_info * const *) y;
1829
61d052e5 1830 if ((*tmp)->bitpos < (*tmp2)->bitpos)
3d3e04ac 1831 return -1;
1832 else if ((*tmp)->bitpos > (*tmp2)->bitpos)
1833 return 1;
61d052e5 1834 else
ca4982c2 1835 /* If they are the same let's use the order which is guaranteed to
1836 be different. */
1837 return (*tmp)->order - (*tmp2)->order;
3d3e04ac 1838}
1839
1840/* Sorting function for store_immediate_info objects.
1841 Sorts them by the order field. */
1842
1843static int
1844sort_by_order (const void *x, const void *y)
1845{
1846 store_immediate_info *const *tmp = (store_immediate_info * const *) x;
1847 store_immediate_info *const *tmp2 = (store_immediate_info * const *) y;
1848
1849 if ((*tmp)->order < (*tmp2)->order)
1850 return -1;
1851 else if ((*tmp)->order > (*tmp2)->order)
1852 return 1;
1853
1854 gcc_unreachable ();
1855}
1856
1857/* Initialize a merged_store_group object from a store_immediate_info
1858 object. */
1859
1860merged_store_group::merged_store_group (store_immediate_info *info)
1861{
1862 start = info->bitpos;
1863 width = info->bitsize;
902cb3b7 1864 bitregion_start = info->bitregion_start;
1865 bitregion_end = info->bitregion_end;
3d3e04ac 1866 /* VAL has memory allocated for it in apply_stores once the group
1867 width has been finalized. */
1868 val = NULL;
902cb3b7 1869 mask = NULL;
10f0d48d 1870 bit_insertion = false;
e580254a 1871 only_constants = info->rhs_code == INTEGER_CST;
1872 first_nonmergeable_order = ~0U;
393122a0 1873 lp_nr = info->lp_nr;
902cb3b7 1874 unsigned HOST_WIDE_INT align_bitpos = 0;
1875 get_object_alignment_1 (gimple_assign_lhs (info->stmt),
1876 &align, &align_bitpos);
1877 align_base = start - align_bitpos;
9991d1d3 1878 for (int i = 0; i < 2; ++i)
1879 {
1880 store_operand_info &op = info->ops[i];
1881 if (op.base_addr == NULL_TREE)
1882 {
1883 load_align[i] = 0;
1884 load_align_base[i] = 0;
1885 }
1886 else
1887 {
1888 get_object_alignment_1 (op.val, &load_align[i], &align_bitpos);
1889 load_align_base[i] = op.bitpos - align_bitpos;
1890 }
1891 }
3d3e04ac 1892 stores.create (1);
1893 stores.safe_push (info);
1894 last_stmt = info->stmt;
1895 last_order = info->order;
1896 first_stmt = last_stmt;
1897 first_order = last_order;
1898 buf_size = 0;
1899}
1900
1901merged_store_group::~merged_store_group ()
1902{
1903 if (val)
1904 XDELETEVEC (val);
1905}
1906
f9ceb302 1907/* Return true if the store described by INFO can be merged into the group. */
1908
1909bool
1910merged_store_group::can_be_merged_into (store_immediate_info *info)
1911{
1912 /* Do not merge bswap patterns. */
1913 if (info->rhs_code == LROTATE_EXPR)
1914 return false;
1915
393122a0 1916 if (info->lp_nr != lp_nr)
1917 return false;
1918
f9ceb302 1919 /* The canonical case. */
1920 if (info->rhs_code == stores[0]->rhs_code)
1921 return true;
1922
1923 /* BIT_INSERT_EXPR is compatible with INTEGER_CST. */
1924 if (info->rhs_code == BIT_INSERT_EXPR && stores[0]->rhs_code == INTEGER_CST)
1925 return true;
1926
1927 if (stores[0]->rhs_code == BIT_INSERT_EXPR && info->rhs_code == INTEGER_CST)
1928 return true;
1929
1930 /* We can turn MEM_REF into BIT_INSERT_EXPR for bit-field stores. */
1931 if (info->rhs_code == MEM_REF
1932 && (stores[0]->rhs_code == INTEGER_CST
1933 || stores[0]->rhs_code == BIT_INSERT_EXPR)
1934 && info->bitregion_start == stores[0]->bitregion_start
1935 && info->bitregion_end == stores[0]->bitregion_end)
1936 return true;
1937
1938 if (stores[0]->rhs_code == MEM_REF
1939 && (info->rhs_code == INTEGER_CST
1940 || info->rhs_code == BIT_INSERT_EXPR)
1941 && info->bitregion_start == stores[0]->bitregion_start
1942 && info->bitregion_end == stores[0]->bitregion_end)
1943 return true;
1944
1945 return false;
1946}
1947
902cb3b7 1948/* Helper method for merge_into and merge_overlapping to do
1949 the common part. */
f9ceb302 1950
3d3e04ac 1951void
902cb3b7 1952merged_store_group::do_merge (store_immediate_info *info)
3d3e04ac 1953{
902cb3b7 1954 bitregion_start = MIN (bitregion_start, info->bitregion_start);
1955 bitregion_end = MAX (bitregion_end, info->bitregion_end);
1956
1957 unsigned int this_align;
1958 unsigned HOST_WIDE_INT align_bitpos = 0;
1959 get_object_alignment_1 (gimple_assign_lhs (info->stmt),
1960 &this_align, &align_bitpos);
1961 if (this_align > align)
1962 {
1963 align = this_align;
1964 align_base = info->bitpos - align_bitpos;
1965 }
9991d1d3 1966 for (int i = 0; i < 2; ++i)
1967 {
1968 store_operand_info &op = info->ops[i];
1969 if (!op.base_addr)
1970 continue;
1971
1972 get_object_alignment_1 (op.val, &this_align, &align_bitpos);
1973 if (this_align > load_align[i])
1974 {
1975 load_align[i] = this_align;
1976 load_align_base[i] = op.bitpos - align_bitpos;
1977 }
1978 }
3d3e04ac 1979
3d3e04ac 1980 gimple *stmt = info->stmt;
1981 stores.safe_push (info);
1982 if (info->order > last_order)
1983 {
1984 last_order = info->order;
1985 last_stmt = stmt;
1986 }
1987 else if (info->order < first_order)
1988 {
1989 first_order = info->order;
1990 first_stmt = stmt;
1991 }
e580254a 1992 if (info->rhs_code != INTEGER_CST)
1993 only_constants = false;
3d3e04ac 1994}
1995
902cb3b7 1996/* Merge a store recorded by INFO into this merged store.
1997 The store is not overlapping with the existing recorded
1998 stores. */
1999
2000void
2001merged_store_group::merge_into (store_immediate_info *info)
2002{
902cb3b7 2003 /* Make sure we're inserting in the position we think we're inserting. */
2004 gcc_assert (info->bitpos >= start + width
2005 && info->bitregion_start <= bitregion_end);
2006
cf5e422b 2007 width = info->bitpos + info->bitsize - start;
902cb3b7 2008 do_merge (info);
2009}
2010
3d3e04ac 2011/* Merge a store described by INFO into this merged store.
2012 INFO overlaps in some way with the current store (i.e. it's not contiguous
2013 which is handled by merged_store_group::merge_into). */
2014
2015void
2016merged_store_group::merge_overlapping (store_immediate_info *info)
2017{
3d3e04ac 2018 /* If the store extends the size of the group, extend the width. */
902cb3b7 2019 if (info->bitpos + info->bitsize > start + width)
cf5e422b 2020 width = info->bitpos + info->bitsize - start;
3d3e04ac 2021
902cb3b7 2022 do_merge (info);
3d3e04ac 2023}
2024
2025/* Go through all the recorded stores in this group in program order and
2026 apply their values to the VAL byte array to create the final merged
2027 value. Return true if the operation succeeded. */
2028
2029bool
2030merged_store_group::apply_stores ()
2031{
902cb3b7 2032 /* Make sure we have more than one store in the group, otherwise we cannot
2033 merge anything. */
2034 if (bitregion_start % BITS_PER_UNIT != 0
2035 || bitregion_end % BITS_PER_UNIT != 0
3d3e04ac 2036 || stores.length () == 1)
2037 return false;
2038
2039 stores.qsort (sort_by_order);
902cb3b7 2040 store_immediate_info *info;
3d3e04ac 2041 unsigned int i;
10f0d48d 2042 /* Create a power-of-2-sized buffer for native_encode_expr. */
2043 buf_size = 1 << ceil_log2 ((bitregion_end - bitregion_start) / BITS_PER_UNIT);
902cb3b7 2044 val = XNEWVEC (unsigned char, 2 * buf_size);
2045 mask = val + buf_size;
2046 memset (val, 0, buf_size);
2047 memset (mask, ~0U, buf_size);
3d3e04ac 2048
2049 FOR_EACH_VEC_ELT (stores, i, info)
2050 {
902cb3b7 2051 unsigned int pos_in_buffer = info->bitpos - bitregion_start;
10f0d48d 2052 tree cst;
9991d1d3 2053 if (info->ops[0].val && info->ops[0].base_addr == NULL_TREE)
2054 cst = info->ops[0].val;
2055 else if (info->ops[1].val && info->ops[1].base_addr == NULL_TREE)
2056 cst = info->ops[1].val;
10f0d48d 2057 else
2058 cst = NULL_TREE;
9991d1d3 2059 bool ret = true;
2060 if (cst)
10f0d48d 2061 {
2062 if (info->rhs_code == BIT_INSERT_EXPR)
2063 bit_insertion = true;
2064 else
2065 ret = encode_tree_to_bitpos (cst, val, info->bitsize,
2066 pos_in_buffer, buf_size);
2067 }
2068 unsigned char *m = mask + (pos_in_buffer / BITS_PER_UNIT);
2069 if (BYTES_BIG_ENDIAN)
2070 clear_bit_region_be (m, (BITS_PER_UNIT - 1
2071 - (pos_in_buffer % BITS_PER_UNIT)),
2072 info->bitsize);
2073 else
2074 clear_bit_region (m, pos_in_buffer % BITS_PER_UNIT, info->bitsize);
9991d1d3 2075 if (cst && dump_file && (dump_flags & TDF_DETAILS))
3d3e04ac 2076 {
2077 if (ret)
2078 {
10f0d48d 2079 fputs ("After writing ", dump_file);
54e7de93 2080 print_generic_expr (dump_file, cst, TDF_NONE);
3d3e04ac 2081 fprintf (dump_file, " of size " HOST_WIDE_INT_PRINT_DEC
10f0d48d 2082 " at position %d\n", info->bitsize, pos_in_buffer);
2083 fputs (" the merged value contains ", dump_file);
3d3e04ac 2084 dump_char_array (dump_file, val, buf_size);
10f0d48d 2085 fputs (" the merged mask contains ", dump_file);
2086 dump_char_array (dump_file, mask, buf_size);
2087 if (bit_insertion)
2088 fputs (" bit insertion is required\n", dump_file);
3d3e04ac 2089 }
2090 else
2091 fprintf (dump_file, "Failed to merge stores\n");
509ab8cd 2092 }
3d3e04ac 2093 if (!ret)
2094 return false;
2095 }
509ab8cd 2096 stores.qsort (sort_by_bitpos);
3d3e04ac 2097 return true;
2098}
2099
2100/* Structure describing the store chain. */
2101
251317e4 2102class imm_store_chain_info
3d3e04ac 2103{
251317e4 2104public:
3a3ba7de 2105 /* Doubly-linked list that imposes an order on chain processing.
2106 PNXP (prev's next pointer) points to the head of a list, or to
2107 the next field in the previous chain in the list.
2108 See pass_store_merging::m_stores_head for more rationale. */
2109 imm_store_chain_info *next, **pnxp;
f85e7cb7 2110 tree base_addr;
902cb3b7 2111 auto_vec<store_immediate_info *> m_store_info;
3d3e04ac 2112 auto_vec<merged_store_group *> m_merged_store_groups;
2113
3a3ba7de 2114 imm_store_chain_info (imm_store_chain_info *&inspt, tree b_a)
2115 : next (inspt), pnxp (&inspt), base_addr (b_a)
2116 {
2117 inspt = this;
2118 if (next)
2119 {
2120 gcc_checking_assert (pnxp == next->pnxp);
2121 next->pnxp = &next;
2122 }
2123 }
2124 ~imm_store_chain_info ()
2125 {
2126 *pnxp = next;
2127 if (next)
2128 {
2129 gcc_checking_assert (&next == next->pnxp);
2130 next->pnxp = pnxp;
2131 }
2132 }
f85e7cb7 2133 bool terminate_and_process_chain ();
509ab8cd 2134 bool try_coalesce_bswap (merged_store_group *, unsigned int, unsigned int);
3d3e04ac 2135 bool coalesce_immediate_stores ();
f85e7cb7 2136 bool output_merged_store (merged_store_group *);
2137 bool output_merged_stores ();
3d3e04ac 2138};
2139
2140const pass_data pass_data_tree_store_merging = {
2141 GIMPLE_PASS, /* type */
2142 "store-merging", /* name */
2143 OPTGROUP_NONE, /* optinfo_flags */
2144 TV_GIMPLE_STORE_MERGING, /* tv_id */
2145 PROP_ssa, /* properties_required */
2146 0, /* properties_provided */
2147 0, /* properties_destroyed */
2148 0, /* todo_flags_start */
2149 TODO_update_ssa, /* todo_flags_finish */
2150};
2151
2152class pass_store_merging : public gimple_opt_pass
2153{
2154public:
2155 pass_store_merging (gcc::context *ctxt)
2d27e5c1 2156 : gimple_opt_pass (pass_data_tree_store_merging, ctxt), m_stores_head ()
3d3e04ac 2157 {
2158 }
2159
10f0d48d 2160 /* Pass not supported for PDP-endian, nor for insane hosts or
2161 target character sizes where native_{encode,interpret}_expr
902cb3b7 2162 doesn't work properly. */
3d3e04ac 2163 virtual bool
2164 gate (function *)
2165 {
902cb3b7 2166 return flag_store_merging
10f0d48d 2167 && BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN
902cb3b7 2168 && CHAR_BIT == 8
2169 && BITS_PER_UNIT == 8;
3d3e04ac 2170 }
2171
2172 virtual unsigned int execute (function *);
2173
2174private:
2e966e2a 2175 hash_map<tree_operand_hash, class imm_store_chain_info *> m_stores;
3d3e04ac 2176
3a3ba7de 2177 /* Form a doubly-linked stack of the elements of m_stores, so that
2178 we can iterate over them in a predictable way. Using this order
2179 avoids extraneous differences in the compiler output just because
2180 of tree pointer variations (e.g. different chains end up in
2181 different positions of m_stores, so they are handled in different
2182 orders, so they allocate or release SSA names in different
2183 orders, and when they get reused, subsequent passes end up
2184 getting different SSA names, which may ultimately change
2185 decisions when going out of SSA). */
2186 imm_store_chain_info *m_stores_head;
2187
393122a0 2188 bool process_store (gimple *);
2189 bool terminate_and_process_chain (imm_store_chain_info *);
c35548ce 2190 bool terminate_all_aliasing_chains (imm_store_chain_info **, gimple *);
393122a0 2191 bool terminate_and_process_all_chains ();
3d3e04ac 2192}; // class pass_store_merging
2193
2194/* Terminate and process all recorded chains. Return true if any changes
2195 were made. */
2196
2197bool
2198pass_store_merging::terminate_and_process_all_chains ()
2199{
3d3e04ac 2200 bool ret = false;
3a3ba7de 2201 while (m_stores_head)
393122a0 2202 ret |= terminate_and_process_chain (m_stores_head);
9a78b979 2203 gcc_assert (m_stores.is_empty ());
3d3e04ac 2204 return ret;
2205}
2206
c35548ce 2207/* Terminate all chains that are affected by the statement STMT.
2208 CHAIN_INFO is the chain we should ignore from the checks if
393122a0 2209 non-NULL. Return true if any changes were made. */
3d3e04ac 2210
2211bool
4de7f8df 2212pass_store_merging::terminate_all_aliasing_chains (imm_store_chain_info
f85e7cb7 2213 **chain_info,
3d3e04ac 2214 gimple *stmt)
2215{
2216 bool ret = false;
2217
2218 /* If the statement doesn't touch memory it can't alias. */
2219 if (!gimple_vuse (stmt))
2220 return false;
2221
9fead2ab 2222 tree store_lhs = gimple_store_p (stmt) ? gimple_get_lhs (stmt) : NULL_TREE;
c35548ce 2223 for (imm_store_chain_info *next = m_stores_head, *cur = next; cur; cur = next)
3d3e04ac 2224 {
c35548ce 2225 next = cur->next;
2226
2227 /* We already checked all the stores in chain_info and terminated the
2228 chain if necessary. Skip it here. */
2229 if (chain_info && *chain_info == cur)
2230 continue;
2231
9991d1d3 2232 store_immediate_info *info;
2233 unsigned int i;
c35548ce 2234 FOR_EACH_VEC_ELT (cur->m_store_info, i, info)
3d3e04ac 2235 {
9fead2ab 2236 tree lhs = gimple_assign_lhs (info->stmt);
2237 if (ref_maybe_used_by_stmt_p (stmt, lhs)
2238 || stmt_may_clobber_ref_p (stmt, lhs)
2239 || (store_lhs && refs_output_dependent_p (store_lhs, lhs)))
3d3e04ac 2240 {
9991d1d3 2241 if (dump_file && (dump_flags & TDF_DETAILS))
3d3e04ac 2242 {
9991d1d3 2243 fprintf (dump_file, "stmt causes chain termination:\n");
2244 print_gimple_stmt (dump_file, stmt, 0);
3d3e04ac 2245 }
393122a0 2246 ret |= terminate_and_process_chain (cur);
9991d1d3 2247 break;
3d3e04ac 2248 }
2249 }
2250 }
2251
3d3e04ac 2252 return ret;
2253}
2254
2255/* Helper function. Terminate the recorded chain storing to base object
2256 BASE. Return true if the merging and output was successful. The m_stores
2257 entry is removed after the processing in any case. */
2258
2259bool
393122a0 2260pass_store_merging::terminate_and_process_chain (imm_store_chain_info *chain_info)
3d3e04ac 2261{
f85e7cb7 2262 bool ret = chain_info->terminate_and_process_chain ();
2263 m_stores.remove (chain_info->base_addr);
2264 delete chain_info;
3d3e04ac 2265 return ret;
2266}
2267
9991d1d3 2268/* Return true if stmts in between FIRST (inclusive) and LAST (exclusive)
393122a0 2269 may clobber REF. FIRST and LAST must have non-NULL vdef. We want to
2270 be able to sink load of REF across stores between FIRST and LAST, up
2271 to right before LAST. */
9991d1d3 2272
2273bool
2274stmts_may_clobber_ref_p (gimple *first, gimple *last, tree ref)
2275{
2276 ao_ref r;
2277 ao_ref_init (&r, ref);
2278 unsigned int count = 0;
2279 tree vop = gimple_vdef (last);
2280 gimple *stmt;
2281
393122a0 2282 /* Return true conservatively if the basic blocks are different. */
2283 if (gimple_bb (first) != gimple_bb (last))
2284 return true;
2285
9991d1d3 2286 do
2287 {
2288 stmt = SSA_NAME_DEF_STMT (vop);
2289 if (stmt_may_clobber_ref_p_1 (stmt, &r))
2290 return true;
509ab8cd 2291 if (gimple_store_p (stmt)
2292 && refs_anti_dependent_p (ref, gimple_get_lhs (stmt)))
2293 return true;
9991d1d3 2294 /* Avoid quadratic compile time by bounding the number of checks
2295 we perform. */
2296 if (++count > MAX_STORE_ALIAS_CHECKS)
2297 return true;
2298 vop = gimple_vuse (stmt);
2299 }
2300 while (stmt != first);
393122a0 2301
9991d1d3 2302 return false;
2303}
2304
2305/* Return true if INFO->ops[IDX] is mergeable with the
2306 corresponding loads already in MERGED_STORE group.
2307 BASE_ADDR is the base address of the whole store group. */
2308
2309bool
2310compatible_load_p (merged_store_group *merged_store,
2311 store_immediate_info *info,
2312 tree base_addr, int idx)
2313{
2314 store_immediate_info *infof = merged_store->stores[0];
2315 if (!info->ops[idx].base_addr
e61263f2 2316 || maybe_ne (info->ops[idx].bitpos - infof->ops[idx].bitpos,
2317 info->bitpos - infof->bitpos)
9991d1d3 2318 || !operand_equal_p (info->ops[idx].base_addr,
2319 infof->ops[idx].base_addr, 0))
2320 return false;
2321
2322 store_immediate_info *infol = merged_store->stores.last ();
2323 tree load_vuse = gimple_vuse (info->ops[idx].stmt);
2324 /* In this case all vuses should be the same, e.g.
2325 _1 = s.a; _2 = s.b; _3 = _1 | 1; t.a = _3; _4 = _2 | 2; t.b = _4;
2326 or
2327 _1 = s.a; _2 = s.b; t.a = _1; t.b = _2;
2328 and we can emit the coalesced load next to any of those loads. */
2329 if (gimple_vuse (infof->ops[idx].stmt) == load_vuse
2330 && gimple_vuse (infol->ops[idx].stmt) == load_vuse)
2331 return true;
2332
2333 /* Otherwise, at least for now require that the load has the same
2334 vuse as the store. See following examples. */
2335 if (gimple_vuse (info->stmt) != load_vuse)
2336 return false;
2337
2338 if (gimple_vuse (infof->stmt) != gimple_vuse (infof->ops[idx].stmt)
2339 || (infof != infol
2340 && gimple_vuse (infol->stmt) != gimple_vuse (infol->ops[idx].stmt)))
2341 return false;
2342
2343 /* If the load is from the same location as the store, already
2344 the construction of the immediate chain info guarantees no intervening
2345 stores, so no further checks are needed. Example:
2346 _1 = s.a; _2 = _1 & -7; s.a = _2; _3 = s.b; _4 = _3 & -7; s.b = _4; */
e61263f2 2347 if (known_eq (info->ops[idx].bitpos, info->bitpos)
9991d1d3 2348 && operand_equal_p (info->ops[idx].base_addr, base_addr, 0))
2349 return true;
2350
2351 /* Otherwise, we need to punt if any of the loads can be clobbered by any
2352 of the stores in the group, or any other stores in between those.
2353 Previous calls to compatible_load_p ensured that for all the
2354 merged_store->stores IDX loads, no stmts starting with
2355 merged_store->first_stmt and ending right before merged_store->last_stmt
2356 clobbers those loads. */
2357 gimple *first = merged_store->first_stmt;
2358 gimple *last = merged_store->last_stmt;
2359 unsigned int i;
2360 store_immediate_info *infoc;
2361 /* The stores are sorted by increasing store bitpos, so if info->stmt store
2362 comes before the so far first load, we'll be changing
2363 merged_store->first_stmt. In that case we need to give up if
2364 any of the earlier processed loads clobber with the stmts in the new
2365 range. */
2366 if (info->order < merged_store->first_order)
2367 {
2368 FOR_EACH_VEC_ELT (merged_store->stores, i, infoc)
2369 if (stmts_may_clobber_ref_p (info->stmt, first, infoc->ops[idx].val))
2370 return false;
2371 first = info->stmt;
2372 }
2373 /* Similarly, we could change merged_store->last_stmt, so ensure
2374 in that case no stmts in the new range clobber any of the earlier
2375 processed loads. */
2376 else if (info->order > merged_store->last_order)
2377 {
2378 FOR_EACH_VEC_ELT (merged_store->stores, i, infoc)
2379 if (stmts_may_clobber_ref_p (last, info->stmt, infoc->ops[idx].val))
2380 return false;
2381 last = info->stmt;
2382 }
2383 /* And finally, we'd be adding a new load to the set, ensure it isn't
2384 clobbered in the new range. */
2385 if (stmts_may_clobber_ref_p (first, last, info->ops[idx].val))
2386 return false;
2387
2388 /* Otherwise, we are looking for:
2389 _1 = s.a; _2 = _1 ^ 15; t.a = _2; _3 = s.b; _4 = _3 ^ 15; t.b = _4;
2390 or
2391 _1 = s.a; t.a = _1; _2 = s.b; t.b = _2; */
2392 return true;
2393}
2394
509ab8cd 2395/* Add all refs loaded to compute VAL to REFS vector. */
2396
2397void
2398gather_bswap_load_refs (vec<tree> *refs, tree val)
2399{
2400 if (TREE_CODE (val) != SSA_NAME)
2401 return;
2402
2403 gimple *stmt = SSA_NAME_DEF_STMT (val);
2404 if (!is_gimple_assign (stmt))
2405 return;
2406
2407 if (gimple_assign_load_p (stmt))
2408 {
2409 refs->safe_push (gimple_assign_rhs1 (stmt));
2410 return;
2411 }
2412
2413 switch (gimple_assign_rhs_class (stmt))
2414 {
2415 case GIMPLE_BINARY_RHS:
2416 gather_bswap_load_refs (refs, gimple_assign_rhs2 (stmt));
2417 /* FALLTHRU */
2418 case GIMPLE_UNARY_RHS:
2419 gather_bswap_load_refs (refs, gimple_assign_rhs1 (stmt));
2420 break;
2421 default:
2422 gcc_unreachable ();
2423 }
2424}
2425
cf5e422b 2426/* Check if there are any stores in M_STORE_INFO after index I
2427 (where M_STORE_INFO must be sorted by sort_by_bitpos) that overlap
2428 a potential group ending with END that have their order
2429 smaller than LAST_ORDER. RHS_CODE is the kind of store in the
2430 group. Return true if there are no such stores.
2431 Consider:
2432 MEM[(long long int *)p_28] = 0;
2433 MEM[(long long int *)p_28 + 8B] = 0;
2434 MEM[(long long int *)p_28 + 16B] = 0;
2435 MEM[(long long int *)p_28 + 24B] = 0;
2436 _129 = (int) _130;
2437 MEM[(int *)p_28 + 8B] = _129;
2438 MEM[(int *)p_28].a = -1;
2439 We already have
2440 MEM[(long long int *)p_28] = 0;
2441 MEM[(int *)p_28].a = -1;
2442 stmts in the current group and need to consider if it is safe to
2443 add MEM[(long long int *)p_28 + 8B] = 0; store into the same group.
2444 There is an overlap between that store and the MEM[(int *)p_28 + 8B] = _129;
2445 store though, so if we add the MEM[(long long int *)p_28 + 8B] = 0;
2446 into the group and merging of those 3 stores is successful, merged
2447 stmts will be emitted at the latest store from that group, i.e.
2448 LAST_ORDER, which is the MEM[(int *)p_28].a = -1; store.
2449 The MEM[(int *)p_28 + 8B] = _129; store that originally follows
2450 the MEM[(long long int *)p_28 + 8B] = 0; would now be before it,
2451 so we need to refuse merging MEM[(long long int *)p_28 + 8B] = 0;
2452 into the group. That way it will be its own store group and will
2453 not be touched. If RHS_CODE is INTEGER_CST and there are overlapping
2454 INTEGER_CST stores, those are mergeable using merge_overlapping,
2455 so don't return false for those. */
2456
2457static bool
2458check_no_overlap (vec<store_immediate_info *> m_store_info, unsigned int i,
2459 enum tree_code rhs_code, unsigned int last_order,
2460 unsigned HOST_WIDE_INT end)
2461{
2462 unsigned int len = m_store_info.length ();
2463 for (++i; i < len; ++i)
2464 {
2465 store_immediate_info *info = m_store_info[i];
2466 if (info->bitpos >= end)
2467 break;
2468 if (info->order < last_order
2469 && (rhs_code != INTEGER_CST || info->rhs_code != INTEGER_CST))
2470 return false;
2471 }
2472 return true;
2473}
2474
509ab8cd 2475/* Return true if m_store_info[first] and at least one following store
2476 form a group which store try_size bitsize value which is byte swapped
2477 from a memory load or some value, or identity from some value.
2478 This uses the bswap pass APIs. */
2479
2480bool
2481imm_store_chain_info::try_coalesce_bswap (merged_store_group *merged_store,
2482 unsigned int first,
2483 unsigned int try_size)
2484{
2485 unsigned int len = m_store_info.length (), last = first;
2486 unsigned HOST_WIDE_INT width = m_store_info[first]->bitsize;
2487 if (width >= try_size)
2488 return false;
2489 for (unsigned int i = first + 1; i < len; ++i)
2490 {
2491 if (m_store_info[i]->bitpos != m_store_info[first]->bitpos + width
2492 || m_store_info[i]->ins_stmt == NULL)
2493 return false;
2494 width += m_store_info[i]->bitsize;
2495 if (width >= try_size)
2496 {
2497 last = i;
2498 break;
2499 }
2500 }
2501 if (width != try_size)
2502 return false;
2503
2504 bool allow_unaligned
2505 = !STRICT_ALIGNMENT && PARAM_VALUE (PARAM_STORE_MERGING_ALLOW_UNALIGNED);
2506 /* Punt if the combined store would not be aligned and we need alignment. */
2507 if (!allow_unaligned)
2508 {
2509 unsigned int align = merged_store->align;
2510 unsigned HOST_WIDE_INT align_base = merged_store->align_base;
2511 for (unsigned int i = first + 1; i <= last; ++i)
2512 {
2513 unsigned int this_align;
2514 unsigned HOST_WIDE_INT align_bitpos = 0;
2515 get_object_alignment_1 (gimple_assign_lhs (m_store_info[i]->stmt),
2516 &this_align, &align_bitpos);
2517 if (this_align > align)
2518 {
2519 align = this_align;
2520 align_base = m_store_info[i]->bitpos - align_bitpos;
2521 }
2522 }
2523 unsigned HOST_WIDE_INT align_bitpos
2524 = (m_store_info[first]->bitpos - align_base) & (align - 1);
2525 if (align_bitpos)
2526 align = least_bit_hwi (align_bitpos);
2527 if (align < try_size)
2528 return false;
2529 }
2530
2531 tree type;
2532 switch (try_size)
2533 {
2534 case 16: type = uint16_type_node; break;
2535 case 32: type = uint32_type_node; break;
2536 case 64: type = uint64_type_node; break;
2537 default: gcc_unreachable ();
2538 }
2539 struct symbolic_number n;
2540 gimple *ins_stmt = NULL;
2541 int vuse_store = -1;
2542 unsigned int first_order = merged_store->first_order;
2543 unsigned int last_order = merged_store->last_order;
2544 gimple *first_stmt = merged_store->first_stmt;
2545 gimple *last_stmt = merged_store->last_stmt;
cf5e422b 2546 unsigned HOST_WIDE_INT end = merged_store->start + merged_store->width;
509ab8cd 2547 store_immediate_info *infof = m_store_info[first];
2548
2549 for (unsigned int i = first; i <= last; ++i)
2550 {
2551 store_immediate_info *info = m_store_info[i];
2552 struct symbolic_number this_n = info->n;
2553 this_n.type = type;
2554 if (!this_n.base_addr)
2555 this_n.range = try_size / BITS_PER_UNIT;
58cff6a2 2556 else
2557 /* Update vuse in case it has changed by output_merged_stores. */
2558 this_n.vuse = gimple_vuse (info->ins_stmt);
509ab8cd 2559 unsigned int bitpos = info->bitpos - infof->bitpos;
2560 if (!do_shift_rotate (LSHIFT_EXPR, &this_n,
2561 BYTES_BIG_ENDIAN
2562 ? try_size - info->bitsize - bitpos
2563 : bitpos))
2564 return false;
1636105f 2565 if (this_n.base_addr && vuse_store)
509ab8cd 2566 {
2567 unsigned int j;
2568 for (j = first; j <= last; ++j)
2569 if (this_n.vuse == gimple_vuse (m_store_info[j]->stmt))
2570 break;
2571 if (j > last)
2572 {
2573 if (vuse_store == 1)
2574 return false;
2575 vuse_store = 0;
2576 }
2577 }
2578 if (i == first)
2579 {
2580 n = this_n;
2581 ins_stmt = info->ins_stmt;
2582 }
2583 else
2584 {
cf5e422b 2585 if (n.base_addr && n.vuse != this_n.vuse)
509ab8cd 2586 {
cf5e422b 2587 if (vuse_store == 0)
2588 return false;
2589 vuse_store = 1;
509ab8cd 2590 }
cf5e422b 2591 if (info->order > last_order)
2592 {
2593 last_order = info->order;
2594 last_stmt = info->stmt;
2595 }
2596 else if (info->order < first_order)
2597 {
2598 first_order = info->order;
2599 first_stmt = info->stmt;
2600 }
2601 end = MAX (end, info->bitpos + info->bitsize);
509ab8cd 2602
2603 ins_stmt = perform_symbolic_merge (ins_stmt, &n, info->ins_stmt,
2604 &this_n, &n);
2605 if (ins_stmt == NULL)
2606 return false;
2607 }
2608 }
2609
2610 uint64_t cmpxchg, cmpnop;
2611 find_bswap_or_nop_finalize (&n, &cmpxchg, &cmpnop);
2612
2613 /* A complete byte swap should make the symbolic number to start with
2614 the largest digit in the highest order byte. Unchanged symbolic
2615 number indicates a read with same endianness as target architecture. */
2616 if (n.n != cmpnop && n.n != cmpxchg)
2617 return false;
2618
2619 if (n.base_addr == NULL_TREE && !is_gimple_val (n.src))
2620 return false;
2621
cf5e422b 2622 if (!check_no_overlap (m_store_info, last, LROTATE_EXPR, last_order, end))
2623 return false;
2624
509ab8cd 2625 /* Don't handle memory copy this way if normal non-bswap processing
2626 would handle it too. */
2627 if (n.n == cmpnop && (unsigned) n.n_ops == last - first + 1)
2628 {
2629 unsigned int i;
2630 for (i = first; i <= last; ++i)
2631 if (m_store_info[i]->rhs_code != MEM_REF)
2632 break;
2633 if (i == last + 1)
2634 return false;
2635 }
2636
2637 if (n.n == cmpxchg)
2638 switch (try_size)
2639 {
2640 case 16:
2641 /* Will emit LROTATE_EXPR. */
2642 break;
2643 case 32:
2644 if (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
2645 && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing)
2646 break;
2647 return false;
2648 case 64:
2649 if (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
2650 && optab_handler (bswap_optab, DImode) != CODE_FOR_nothing)
2651 break;
2652 return false;
2653 default:
2654 gcc_unreachable ();
2655 }
2656
2657 if (!allow_unaligned && n.base_addr)
2658 {
2659 unsigned int align = get_object_alignment (n.src);
2660 if (align < try_size)
2661 return false;
2662 }
2663
2664 /* If each load has vuse of the corresponding store, need to verify
2665 the loads can be sunk right before the last store. */
2666 if (vuse_store == 1)
2667 {
2668 auto_vec<tree, 64> refs;
2669 for (unsigned int i = first; i <= last; ++i)
2670 gather_bswap_load_refs (&refs,
2671 gimple_assign_rhs1 (m_store_info[i]->stmt));
2672
2673 unsigned int i;
2674 tree ref;
2675 FOR_EACH_VEC_ELT (refs, i, ref)
2676 if (stmts_may_clobber_ref_p (first_stmt, last_stmt, ref))
2677 return false;
2678 n.vuse = NULL_TREE;
2679 }
2680
2681 infof->n = n;
2682 infof->ins_stmt = ins_stmt;
2683 for (unsigned int i = first; i <= last; ++i)
2684 {
2685 m_store_info[i]->rhs_code = n.n == cmpxchg ? LROTATE_EXPR : NOP_EXPR;
2686 m_store_info[i]->ops[0].base_addr = NULL_TREE;
2687 m_store_info[i]->ops[1].base_addr = NULL_TREE;
2688 if (i != first)
2689 merged_store->merge_into (m_store_info[i]);
2690 }
2691
2692 return true;
2693}
2694
3d3e04ac 2695/* Go through the candidate stores recorded in m_store_info and merge them
2696 into merged_store_group objects recorded into m_merged_store_groups
2697 representing the widened stores. Return true if coalescing was successful
2698 and the number of widened stores is fewer than the original number
2699 of stores. */
2700
2701bool
2702imm_store_chain_info::coalesce_immediate_stores ()
2703{
2704 /* Anything less can't be processed. */
2705 if (m_store_info.length () < 2)
2706 return false;
2707
2708 if (dump_file && (dump_flags & TDF_DETAILS))
10f0d48d 2709 fprintf (dump_file, "Attempting to coalesce %u stores in chain\n",
3d3e04ac 2710 m_store_info.length ());
2711
2712 store_immediate_info *info;
509ab8cd 2713 unsigned int i, ignore = 0;
3d3e04ac 2714
2715 /* Order the stores by the bitposition they write to. */
2716 m_store_info.qsort (sort_by_bitpos);
2717
2718 info = m_store_info[0];
2719 merged_store_group *merged_store = new merged_store_group (info);
10f0d48d 2720 if (dump_file && (dump_flags & TDF_DETAILS))
2721 fputs ("New store group\n", dump_file);
3d3e04ac 2722
2723 FOR_EACH_VEC_ELT (m_store_info, i, info)
2724 {
e1eef457 2725 unsigned HOST_WIDE_INT new_bitregion_start, new_bitregion_end;
2726
509ab8cd 2727 if (i <= ignore)
10f0d48d 2728 goto done;
3d3e04ac 2729
509ab8cd 2730 /* First try to handle group of stores like:
2731 p[0] = data >> 24;
2732 p[1] = data >> 16;
2733 p[2] = data >> 8;
2734 p[3] = data;
2735 using the bswap framework. */
2736 if (info->bitpos == merged_store->start + merged_store->width
2737 && merged_store->stores.length () == 1
2738 && merged_store->stores[0]->ins_stmt != NULL
2739 && info->ins_stmt != NULL)
2740 {
2741 unsigned int try_size;
2742 for (try_size = 64; try_size >= 16; try_size >>= 1)
2743 if (try_coalesce_bswap (merged_store, i - 1, try_size))
2744 break;
2745
2746 if (try_size >= 16)
2747 {
2748 ignore = i + merged_store->stores.length () - 1;
2749 m_merged_store_groups.safe_push (merged_store);
2750 if (ignore < m_store_info.length ())
2751 merged_store = new merged_store_group (m_store_info[ignore]);
2752 else
2753 merged_store = NULL;
10f0d48d 2754 goto done;
509ab8cd 2755 }
2756 }
2757
e1eef457 2758 new_bitregion_start
2759 = MIN (merged_store->bitregion_start, info->bitregion_start);
2760 new_bitregion_end
2761 = MAX (merged_store->bitregion_end, info->bitregion_end);
2762
2763 if (info->order >= merged_store->first_nonmergeable_order
2764 || (((new_bitregion_end - new_bitregion_start + 1) / BITS_PER_UNIT)
2765 > (unsigned) PARAM_VALUE (PARAM_STORE_MERGING_MAX_SIZE)))
e580254a 2766 ;
2767
3d3e04ac 2768 /* |---store 1---|
2769 |---store 2---|
509ab8cd 2770 Overlapping stores. */
e580254a 2771 else if (IN_RANGE (info->bitpos, merged_store->start,
2772 merged_store->start + merged_store->width - 1))
3d3e04ac 2773 {
9991d1d3 2774 /* Only allow overlapping stores of constants. */
393122a0 2775 if (info->rhs_code == INTEGER_CST
2776 && merged_store->only_constants
2777 && info->lp_nr == merged_store->lp_nr)
9991d1d3 2778 {
a1cf1dfe 2779 unsigned int last_order
2780 = MAX (merged_store->last_order, info->order);
2781 unsigned HOST_WIDE_INT end
2782 = MAX (merged_store->start + merged_store->width,
2783 info->bitpos + info->bitsize);
e580254a 2784 if (check_no_overlap (m_store_info, i, INTEGER_CST,
2785 last_order, end))
a1cf1dfe 2786 {
2787 /* check_no_overlap call above made sure there are no
2788 overlapping stores with non-INTEGER_CST rhs_code
2789 in between the first and last of the stores we've
2790 just merged. If there are any INTEGER_CST rhs_code
2791 stores in between, we need to merge_overlapping them
2792 even if in the sort_by_bitpos order there are other
2793 overlapping stores in between. Keep those stores as is.
2794 Example:
2795 MEM[(int *)p_28] = 0;
2796 MEM[(char *)p_28 + 3B] = 1;
2797 MEM[(char *)p_28 + 1B] = 2;
2798 MEM[(char *)p_28 + 2B] = MEM[(char *)p_28 + 6B];
2799 We can't merge the zero store with the store of two and
2800 not merge anything else, because the store of one is
2801 in the original order in between those two, but in
2802 store_by_bitpos order it comes after the last store that
2803 we can't merge with them. We can merge the first 3 stores
2804 and keep the last store as is though. */
e580254a 2805 unsigned int len = m_store_info.length ();
2806 unsigned int try_order = last_order;
2807 unsigned int first_nonmergeable_order;
2808 unsigned int k;
2809 bool last_iter = false;
2810 int attempts = 0;
2811 do
a1cf1dfe 2812 {
e580254a 2813 unsigned int max_order = 0;
2814 unsigned first_nonmergeable_int_order = ~0U;
2815 unsigned HOST_WIDE_INT this_end = end;
2816 k = i;
2817 first_nonmergeable_order = ~0U;
2818 for (unsigned int j = i + 1; j < len; ++j)
a1cf1dfe 2819 {
e580254a 2820 store_immediate_info *info2 = m_store_info[j];
2821 if (info2->bitpos >= this_end)
2822 break;
2823 if (info2->order < try_order)
a1cf1dfe 2824 {
e580254a 2825 if (info2->rhs_code != INTEGER_CST)
2826 {
2827 /* Normally check_no_overlap makes sure this
2828 doesn't happen, but if end grows below,
2829 then we need to process more stores than
2830 check_no_overlap verified. Example:
2831 MEM[(int *)p_5] = 0;
2832 MEM[(short *)p_5 + 3B] = 1;
2833 MEM[(char *)p_5 + 4B] = _9;
2834 MEM[(char *)p_5 + 2B] = 2; */
2835 k = 0;
2836 break;
2837 }
2838 k = j;
2839 this_end = MAX (this_end,
2840 info2->bitpos + info2->bitsize);
a1cf1dfe 2841 }
e580254a 2842 else if (info2->rhs_code == INTEGER_CST
2843 && !last_iter)
2844 {
2845 max_order = MAX (max_order, info2->order + 1);
2846 first_nonmergeable_int_order
2847 = MIN (first_nonmergeable_int_order,
2848 info2->order);
2849 }
2850 else
2851 first_nonmergeable_order
2852 = MIN (first_nonmergeable_order, info2->order);
a1cf1dfe 2853 }
e580254a 2854 if (k == 0)
2855 {
2856 if (last_order == try_order)
2857 break;
2858 /* If this failed, but only because we grew
2859 try_order, retry with the last working one,
2860 so that we merge at least something. */
2861 try_order = last_order;
2862 last_iter = true;
2863 continue;
2864 }
2865 last_order = try_order;
2866 /* Retry with a larger try_order to see if we could
2867 merge some further INTEGER_CST stores. */
2868 if (max_order
2869 && (first_nonmergeable_int_order
2870 < first_nonmergeable_order))
2871 {
2872 try_order = MIN (max_order,
2873 first_nonmergeable_order);
2874 try_order
2875 = MIN (try_order,
2876 merged_store->first_nonmergeable_order);
2877 if (try_order > last_order && ++attempts < 16)
2878 continue;
2879 }
2880 first_nonmergeable_order
2881 = MIN (first_nonmergeable_order,
2882 first_nonmergeable_int_order);
2883 end = this_end;
2884 break;
a1cf1dfe 2885 }
e580254a 2886 while (1);
a1cf1dfe 2887
2888 if (k != 0)
2889 {
2890 merged_store->merge_overlapping (info);
2891
e580254a 2892 merged_store->first_nonmergeable_order
2893 = MIN (merged_store->first_nonmergeable_order,
2894 first_nonmergeable_order);
2895
a1cf1dfe 2896 for (unsigned int j = i + 1; j <= k; j++)
2897 {
2898 store_immediate_info *info2 = m_store_info[j];
2899 gcc_assert (info2->bitpos < end);
2900 if (info2->order < last_order)
2901 {
2902 gcc_assert (info2->rhs_code == INTEGER_CST);
e580254a 2903 if (info != info2)
2904 merged_store->merge_overlapping (info2);
a1cf1dfe 2905 }
2906 /* Other stores are kept and not merged in any
2907 way. */
2908 }
2909 ignore = k;
2910 goto done;
2911 }
2912 }
9991d1d3 2913 }
3d3e04ac 2914 }
9991d1d3 2915 /* |---store 1---||---store 2---|
2916 This store is consecutive to the previous one.
2917 Merge it into the current store group. There can be gaps in between
2918 the stores, but there can't be gaps in between bitregions. */
10f0d48d 2919 else if (info->bitregion_start <= merged_store->bitregion_end
f9ceb302 2920 && merged_store->can_be_merged_into (info))
3d3e04ac 2921 {
9991d1d3 2922 store_immediate_info *infof = merged_store->stores[0];
2923
2924 /* All the rhs_code ops that take 2 operands are commutative,
2925 swap the operands if it could make the operands compatible. */
2926 if (infof->ops[0].base_addr
2927 && infof->ops[1].base_addr
2928 && info->ops[0].base_addr
2929 && info->ops[1].base_addr
e61263f2 2930 && known_eq (info->ops[1].bitpos - infof->ops[0].bitpos,
2931 info->bitpos - infof->bitpos)
9991d1d3 2932 && operand_equal_p (info->ops[1].base_addr,
2933 infof->ops[0].base_addr, 0))
aa0a1d29 2934 {
2935 std::swap (info->ops[0], info->ops[1]);
2936 info->ops_swapped_p = true;
2937 }
f9ceb302 2938 if (check_no_overlap (m_store_info, i, info->rhs_code,
c7b16284 2939 MAX (merged_store->last_order, info->order),
2940 MAX (merged_store->start + merged_store->width,
f9ceb302 2941 info->bitpos + info->bitsize)))
9991d1d3 2942 {
f9ceb302 2943 /* Turn MEM_REF into BIT_INSERT_EXPR for bit-field stores. */
2944 if (info->rhs_code == MEM_REF && infof->rhs_code != MEM_REF)
2945 {
2946 info->rhs_code = BIT_INSERT_EXPR;
2947 info->ops[0].val = gimple_assign_rhs1 (info->stmt);
2948 info->ops[0].base_addr = NULL_TREE;
2949 }
2950 else if (infof->rhs_code == MEM_REF && info->rhs_code != MEM_REF)
2951 {
2952 store_immediate_info *infoj;
2953 unsigned int j;
2954 FOR_EACH_VEC_ELT (merged_store->stores, j, infoj)
2955 {
2956 infoj->rhs_code = BIT_INSERT_EXPR;
2957 infoj->ops[0].val = gimple_assign_rhs1 (infoj->stmt);
2958 infoj->ops[0].base_addr = NULL_TREE;
2959 }
2960 }
2961 if ((infof->ops[0].base_addr
2962 ? compatible_load_p (merged_store, info, base_addr, 0)
2963 : !info->ops[0].base_addr)
2964 && (infof->ops[1].base_addr
2965 ? compatible_load_p (merged_store, info, base_addr, 1)
2966 : !info->ops[1].base_addr))
2967 {
2968 merged_store->merge_into (info);
2969 goto done;
2970 }
9991d1d3 2971 }
2972 }
3d3e04ac 2973
9991d1d3 2974 /* |---store 1---| <gap> |---store 2---|.
2975 Gap between stores or the rhs not compatible. Start a new group. */
3d3e04ac 2976
9991d1d3 2977 /* Try to apply all the stores recorded for the group to determine
2978 the bitpattern they write and discard it if that fails.
2979 This will also reject single-store groups. */
10f0d48d 2980 if (merged_store->apply_stores ())
9991d1d3 2981 m_merged_store_groups.safe_push (merged_store);
10f0d48d 2982 else
2983 delete merged_store;
3d3e04ac 2984
9991d1d3 2985 merged_store = new merged_store_group (info);
10f0d48d 2986 if (dump_file && (dump_flags & TDF_DETAILS))
2987 fputs ("New store group\n", dump_file);
2988
2989 done:
2990 if (dump_file && (dump_flags & TDF_DETAILS))
2991 {
2992 fprintf (dump_file, "Store %u:\nbitsize:" HOST_WIDE_INT_PRINT_DEC
2993 " bitpos:" HOST_WIDE_INT_PRINT_DEC " val:",
2994 i, info->bitsize, info->bitpos);
2995 print_generic_expr (dump_file, gimple_assign_rhs1 (info->stmt));
2996 fputc ('\n', dump_file);
2997 }
3d3e04ac 2998 }
2999
902cb3b7 3000 /* Record or discard the last store group. */
509ab8cd 3001 if (merged_store)
3002 {
10f0d48d 3003 if (merged_store->apply_stores ())
509ab8cd 3004 m_merged_store_groups.safe_push (merged_store);
10f0d48d 3005 else
3006 delete merged_store;
509ab8cd 3007 }
3d3e04ac 3008
3009 gcc_assert (m_merged_store_groups.length () <= m_store_info.length ());
10f0d48d 3010
3d3e04ac 3011 bool success
3012 = !m_merged_store_groups.is_empty ()
3013 && m_merged_store_groups.length () < m_store_info.length ();
3014
3015 if (success && dump_file)
10f0d48d 3016 fprintf (dump_file, "Coalescing successful!\nMerged into %u stores\n",
902cb3b7 3017 m_merged_store_groups.length ());
3d3e04ac 3018
3019 return success;
3020}
3021
9991d1d3 3022/* Return the type to use for the merged stores or loads described by STMTS.
3023 This is needed to get the alias sets right. If IS_LOAD, look for rhs,
3024 otherwise lhs. Additionally set *CLIQUEP and *BASEP to MR_DEPENDENCE_*
3025 of the MEM_REFs if any. */
3d3e04ac 3026
3027static tree
9991d1d3 3028get_alias_type_for_stmts (vec<gimple *> &stmts, bool is_load,
3029 unsigned short *cliquep, unsigned short *basep)
3d3e04ac 3030{
3031 gimple *stmt;
3032 unsigned int i;
9991d1d3 3033 tree type = NULL_TREE;
3034 tree ret = NULL_TREE;
3035 *cliquep = 0;
3036 *basep = 0;
3d3e04ac 3037
3038 FOR_EACH_VEC_ELT (stmts, i, stmt)
3039 {
9991d1d3 3040 tree ref = is_load ? gimple_assign_rhs1 (stmt)
3041 : gimple_assign_lhs (stmt);
3042 tree type1 = reference_alias_ptr_type (ref);
3043 tree base = get_base_address (ref);
3d3e04ac 3044
9991d1d3 3045 if (i == 0)
3046 {
3047 if (TREE_CODE (base) == MEM_REF)
3048 {
3049 *cliquep = MR_DEPENDENCE_CLIQUE (base);
3050 *basep = MR_DEPENDENCE_BASE (base);
3051 }
3052 ret = type = type1;
3053 continue;
3054 }
3d3e04ac 3055 if (!alias_ptr_types_compatible_p (type, type1))
9991d1d3 3056 ret = ptr_type_node;
3057 if (TREE_CODE (base) != MEM_REF
3058 || *cliquep != MR_DEPENDENCE_CLIQUE (base)
3059 || *basep != MR_DEPENDENCE_BASE (base))
3060 {
3061 *cliquep = 0;
3062 *basep = 0;
3063 }
3d3e04ac 3064 }
9991d1d3 3065 return ret;
3d3e04ac 3066}
3067
3068/* Return the location_t information we can find among the statements
3069 in STMTS. */
3070
3071static location_t
9991d1d3 3072get_location_for_stmts (vec<gimple *> &stmts)
3d3e04ac 3073{
3074 gimple *stmt;
3075 unsigned int i;
3076
3077 FOR_EACH_VEC_ELT (stmts, i, stmt)
3078 if (gimple_has_location (stmt))
3079 return gimple_location (stmt);
3080
3081 return UNKNOWN_LOCATION;
3082}
3083
3084/* Used to decribe a store resulting from splitting a wide store in smaller
3085 regularly-sized stores in split_group. */
3086
251317e4 3087class split_store
3d3e04ac 3088{
251317e4 3089public:
3d3e04ac 3090 unsigned HOST_WIDE_INT bytepos;
3091 unsigned HOST_WIDE_INT size;
3092 unsigned HOST_WIDE_INT align;
9991d1d3 3093 auto_vec<store_immediate_info *> orig_stores;
902cb3b7 3094 /* True if there is a single orig stmt covering the whole split store. */
3095 bool orig;
3d3e04ac 3096 split_store (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
3097 unsigned HOST_WIDE_INT);
3098};
3099
3100/* Simple constructor. */
3101
3102split_store::split_store (unsigned HOST_WIDE_INT bp,
3103 unsigned HOST_WIDE_INT sz,
3104 unsigned HOST_WIDE_INT al)
902cb3b7 3105 : bytepos (bp), size (sz), align (al), orig (false)
3d3e04ac 3106{
9991d1d3 3107 orig_stores.create (0);
3d3e04ac 3108}
3109
9991d1d3 3110/* Record all stores in GROUP that write to the region starting at BITPOS and
3111 is of size BITSIZE. Record infos for such statements in STORES if
3112 non-NULL. The stores in GROUP must be sorted by bitposition. Return INFO
3113 if there is exactly one original store in the range. */
3d3e04ac 3114
902cb3b7 3115static store_immediate_info *
2e966e2a 3116find_constituent_stores (class merged_store_group *group,
9991d1d3 3117 vec<store_immediate_info *> *stores,
3118 unsigned int *first,
3119 unsigned HOST_WIDE_INT bitpos,
3120 unsigned HOST_WIDE_INT bitsize)
3d3e04ac 3121{
902cb3b7 3122 store_immediate_info *info, *ret = NULL;
3d3e04ac 3123 unsigned int i;
902cb3b7 3124 bool second = false;
3125 bool update_first = true;
3d3e04ac 3126 unsigned HOST_WIDE_INT end = bitpos + bitsize;
902cb3b7 3127 for (i = *first; group->stores.iterate (i, &info); ++i)
3d3e04ac 3128 {
3129 unsigned HOST_WIDE_INT stmt_start = info->bitpos;
3130 unsigned HOST_WIDE_INT stmt_end = stmt_start + info->bitsize;
902cb3b7 3131 if (stmt_end <= bitpos)
3132 {
3133 /* BITPOS passed to this function never decreases from within the
3134 same split_group call, so optimize and don't scan info records
3135 which are known to end before or at BITPOS next time.
3136 Only do it if all stores before this one also pass this. */
3137 if (update_first)
3138 *first = i + 1;
3139 continue;
3140 }
3141 else
3142 update_first = false;
3143
3d3e04ac 3144 /* The stores in GROUP are ordered by bitposition so if we're past
902cb3b7 3145 the region for this group return early. */
3146 if (stmt_start >= end)
3147 return ret;
3148
9991d1d3 3149 if (stores)
902cb3b7 3150 {
9991d1d3 3151 stores->safe_push (info);
902cb3b7 3152 if (ret)
3153 {
3154 ret = NULL;
3155 second = true;
3156 }
3157 }
3158 else if (ret)
3159 return NULL;
3160 if (!second)
3161 ret = info;
3d3e04ac 3162 }
902cb3b7 3163 return ret;
3d3e04ac 3164}
3165
871a91ec 3166/* Return how many SSA_NAMEs used to compute value to store in the INFO
3167 store have multiple uses. If any SSA_NAME has multiple uses, also
3168 count statements needed to compute it. */
3169
3170static unsigned
3171count_multiple_uses (store_immediate_info *info)
3172{
3173 gimple *stmt = info->stmt;
3174 unsigned ret = 0;
3175 switch (info->rhs_code)
3176 {
3177 case INTEGER_CST:
3178 return 0;
3179 case BIT_AND_EXPR:
3180 case BIT_IOR_EXPR:
3181 case BIT_XOR_EXPR:
832a73b9 3182 if (info->bit_not_p)
3183 {
3184 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3185 ret = 1; /* Fall through below to return
3186 the BIT_NOT_EXPR stmt and then
3187 BIT_{AND,IOR,XOR}_EXPR and anything it
3188 uses. */
3189 else
3190 /* stmt is after this the BIT_NOT_EXPR. */
3191 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3192 }
871a91ec 3193 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3194 {
3195 ret += 1 + info->ops[0].bit_not_p;
3196 if (info->ops[1].base_addr)
3197 ret += 1 + info->ops[1].bit_not_p;
3198 return ret + 1;
3199 }
3200 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3201 /* stmt is now the BIT_*_EXPR. */
3202 if (!has_single_use (gimple_assign_rhs1 (stmt)))
aa0a1d29 3203 ret += 1 + info->ops[info->ops_swapped_p].bit_not_p;
3204 else if (info->ops[info->ops_swapped_p].bit_not_p)
871a91ec 3205 {
3206 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3207 if (!has_single_use (gimple_assign_rhs1 (stmt2)))
3208 ++ret;
3209 }
3210 if (info->ops[1].base_addr == NULL_TREE)
aa0a1d29 3211 {
3212 gcc_checking_assert (!info->ops_swapped_p);
3213 return ret;
3214 }
871a91ec 3215 if (!has_single_use (gimple_assign_rhs2 (stmt)))
aa0a1d29 3216 ret += 1 + info->ops[1 - info->ops_swapped_p].bit_not_p;
3217 else if (info->ops[1 - info->ops_swapped_p].bit_not_p)
871a91ec 3218 {
3219 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3220 if (!has_single_use (gimple_assign_rhs1 (stmt2)))
3221 ++ret;
3222 }
3223 return ret;
3224 case MEM_REF:
3225 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3226 return 1 + info->ops[0].bit_not_p;
3227 else if (info->ops[0].bit_not_p)
3228 {
3229 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3230 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3231 return 1;
3232 }
3233 return 0;
10f0d48d 3234 case BIT_INSERT_EXPR:
3235 return has_single_use (gimple_assign_rhs1 (stmt)) ? 0 : 1;
871a91ec 3236 default:
3237 gcc_unreachable ();
3238 }
3239}
3240
3d3e04ac 3241/* Split a merged store described by GROUP by populating the SPLIT_STORES
902cb3b7 3242 vector (if non-NULL) with split_store structs describing the byte offset
3243 (from the base), the bit size and alignment of each store as well as the
3244 original statements involved in each such split group.
3d3e04ac 3245 This is to separate the splitting strategy from the statement
3246 building/emission/linking done in output_merged_store.
902cb3b7 3247 Return number of new stores.
9991d1d3 3248 If ALLOW_UNALIGNED_STORE is false, then all stores must be aligned.
3249 If ALLOW_UNALIGNED_LOAD is false, then all loads must be aligned.
e1eef457 3250 BZERO_FIRST may be true only when the first store covers the whole group
3251 and clears it; if BZERO_FIRST is true, keep that first store in the set
3252 unmodified and emit further stores for the overrides only.
902cb3b7 3253 If SPLIT_STORES is NULL, it is just a dry run to count number of
3254 new stores. */
3d3e04ac 3255
902cb3b7 3256static unsigned int
9991d1d3 3257split_group (merged_store_group *group, bool allow_unaligned_store,
e1eef457 3258 bool allow_unaligned_load, bool bzero_first,
2e966e2a 3259 vec<split_store *> *split_stores,
871a91ec 3260 unsigned *total_orig,
3261 unsigned *total_new)
3d3e04ac 3262{
902cb3b7 3263 unsigned HOST_WIDE_INT pos = group->bitregion_start;
3264 unsigned HOST_WIDE_INT size = group->bitregion_end - pos;
3d3e04ac 3265 unsigned HOST_WIDE_INT bytepos = pos / BITS_PER_UNIT;
902cb3b7 3266 unsigned HOST_WIDE_INT group_align = group->align;
3267 unsigned HOST_WIDE_INT align_base = group->align_base;
9991d1d3 3268 unsigned HOST_WIDE_INT group_load_align = group_align;
871a91ec 3269 bool any_orig = false;
3d3e04ac 3270
3d3e04ac 3271 gcc_assert ((size % BITS_PER_UNIT == 0) && (pos % BITS_PER_UNIT == 0));
3272
509ab8cd 3273 if (group->stores[0]->rhs_code == LROTATE_EXPR
3274 || group->stores[0]->rhs_code == NOP_EXPR)
3275 {
e1eef457 3276 gcc_assert (!bzero_first);
509ab8cd 3277 /* For bswap framework using sets of stores, all the checking
3278 has been done earlier in try_coalesce_bswap and needs to be
3279 emitted as a single store. */
3280 if (total_orig)
3281 {
3282 /* Avoid the old/new stmt count heuristics. It should be
3283 always beneficial. */
3284 total_new[0] = 1;
3285 total_orig[0] = 2;
3286 }
3287
3288 if (split_stores)
3289 {
3290 unsigned HOST_WIDE_INT align_bitpos
3291 = (group->start - align_base) & (group_align - 1);
3292 unsigned HOST_WIDE_INT align = group_align;
3293 if (align_bitpos)
3294 align = least_bit_hwi (align_bitpos);
3295 bytepos = group->start / BITS_PER_UNIT;
2e966e2a 3296 split_store *store
509ab8cd 3297 = new split_store (bytepos, group->width, align);
3298 unsigned int first = 0;
3299 find_constituent_stores (group, &store->orig_stores,
3300 &first, group->start, group->width);
3301 split_stores->safe_push (store);
3302 }
3303
3304 return 1;
3305 }
3306
902cb3b7 3307 unsigned int ret = 0, first = 0;
3d3e04ac 3308 unsigned HOST_WIDE_INT try_pos = bytepos;
3d3e04ac 3309
871a91ec 3310 if (total_orig)
3311 {
3312 unsigned int i;
3313 store_immediate_info *info = group->stores[0];
3314
3315 total_new[0] = 0;
3316 total_orig[0] = 1; /* The orig store. */
3317 info = group->stores[0];
3318 if (info->ops[0].base_addr)
9deedf62 3319 total_orig[0]++;
871a91ec 3320 if (info->ops[1].base_addr)
9deedf62 3321 total_orig[0]++;
871a91ec 3322 switch (info->rhs_code)
3323 {
3324 case BIT_AND_EXPR:
3325 case BIT_IOR_EXPR:
3326 case BIT_XOR_EXPR:
3327 total_orig[0]++; /* The orig BIT_*_EXPR stmt. */
3328 break;
3329 default:
3330 break;
3331 }
3332 total_orig[0] *= group->stores.length ();
3333
3334 FOR_EACH_VEC_ELT (group->stores, i, info)
9deedf62 3335 {
3336 total_new[0] += count_multiple_uses (info);
3337 total_orig[0] += (info->bit_not_p
3338 + info->ops[0].bit_not_p
3339 + info->ops[1].bit_not_p);
3340 }
871a91ec 3341 }
3342
9991d1d3 3343 if (!allow_unaligned_load)
3344 for (int i = 0; i < 2; ++i)
3345 if (group->load_align[i])
3346 group_load_align = MIN (group_load_align, group->load_align[i]);
3347
e1eef457 3348 if (bzero_first)
3349 {
3350 first = 1;
3351 ret = 1;
3352 if (split_stores)
3353 {
2e966e2a 3354 split_store *store
e1eef457 3355 = new split_store (bytepos, group->stores[0]->bitsize, align_base);
3356 store->orig_stores.safe_push (group->stores[0]);
3357 store->orig = true;
3358 any_orig = true;
3359 split_stores->safe_push (store);
3360 }
3361 }
3362
3d3e04ac 3363 while (size > 0)
3364 {
9991d1d3 3365 if ((allow_unaligned_store || group_align <= BITS_PER_UNIT)
e1eef457 3366 && (group->mask[try_pos - bytepos] == (unsigned char) ~0U
3367 || (bzero_first && group->val[try_pos - bytepos] == 0)))
902cb3b7 3368 {
3369 /* Skip padding bytes. */
3370 ++try_pos;
3371 size -= BITS_PER_UNIT;
3372 continue;
3373 }
3374
3d3e04ac 3375 unsigned HOST_WIDE_INT try_bitpos = try_pos * BITS_PER_UNIT;
902cb3b7 3376 unsigned int try_size = MAX_STORE_BITSIZE, nonmasked;
3377 unsigned HOST_WIDE_INT align_bitpos
3378 = (try_bitpos - align_base) & (group_align - 1);
3379 unsigned HOST_WIDE_INT align = group_align;
3380 if (align_bitpos)
3381 align = least_bit_hwi (align_bitpos);
9991d1d3 3382 if (!allow_unaligned_store)
902cb3b7 3383 try_size = MIN (try_size, align);
9991d1d3 3384 if (!allow_unaligned_load)
3385 {
3386 /* If we can't do or don't want to do unaligned stores
3387 as well as loads, we need to take the loads into account
3388 as well. */
3389 unsigned HOST_WIDE_INT load_align = group_load_align;
3390 align_bitpos = (try_bitpos - align_base) & (load_align - 1);
3391 if (align_bitpos)
3392 load_align = least_bit_hwi (align_bitpos);
3393 for (int i = 0; i < 2; ++i)
3394 if (group->load_align[i])
3395 {
e61263f2 3396 align_bitpos
3397 = known_alignment (try_bitpos
3398 - group->stores[0]->bitpos
3399 + group->stores[0]->ops[i].bitpos
3400 - group->load_align_base[i]);
3401 if (align_bitpos & (group_load_align - 1))
9991d1d3 3402 {
3403 unsigned HOST_WIDE_INT a = least_bit_hwi (align_bitpos);
3404 load_align = MIN (load_align, a);
3405 }
3406 }
3407 try_size = MIN (try_size, load_align);
3408 }
902cb3b7 3409 store_immediate_info *info
9991d1d3 3410 = find_constituent_stores (group, NULL, &first, try_bitpos, try_size);
902cb3b7 3411 if (info)
3412 {
3413 /* If there is just one original statement for the range, see if
3414 we can just reuse the original store which could be even larger
3415 than try_size. */
3416 unsigned HOST_WIDE_INT stmt_end
3417 = ROUND_UP (info->bitpos + info->bitsize, BITS_PER_UNIT);
9991d1d3 3418 info = find_constituent_stores (group, NULL, &first, try_bitpos,
3419 stmt_end - try_bitpos);
902cb3b7 3420 if (info && info->bitpos >= try_bitpos)
3421 {
3422 try_size = stmt_end - try_bitpos;
3423 goto found;
3424 }
3425 }
3d3e04ac 3426
902cb3b7 3427 /* Approximate store bitsize for the case when there are no padding
3428 bits. */
3429 while (try_size > size)
3430 try_size /= 2;
3431 /* Now look for whole padding bytes at the end of that bitsize. */
3432 for (nonmasked = try_size / BITS_PER_UNIT; nonmasked > 0; --nonmasked)
3433 if (group->mask[try_pos - bytepos + nonmasked - 1]
e1eef457 3434 != (unsigned char) ~0U
3435 && (!bzero_first
3436 || group->val[try_pos - bytepos + nonmasked - 1] != 0))
902cb3b7 3437 break;
3438 if (nonmasked == 0)
3439 {
3440 /* If entire try_size range is padding, skip it. */
3441 try_pos += try_size / BITS_PER_UNIT;
3442 size -= try_size;
3443 continue;
3444 }
3445 /* Otherwise try to decrease try_size if second half, last 3 quarters
3446 etc. are padding. */
3447 nonmasked *= BITS_PER_UNIT;
3448 while (nonmasked <= try_size / 2)
3449 try_size /= 2;
9991d1d3 3450 if (!allow_unaligned_store && group_align > BITS_PER_UNIT)
902cb3b7 3451 {
3452 /* Now look for whole padding bytes at the start of that bitsize. */
3453 unsigned int try_bytesize = try_size / BITS_PER_UNIT, masked;
3454 for (masked = 0; masked < try_bytesize; ++masked)
e1eef457 3455 if (group->mask[try_pos - bytepos + masked] != (unsigned char) ~0U
3456 && (!bzero_first
3457 || group->val[try_pos - bytepos + masked] != 0))
902cb3b7 3458 break;
3459 masked *= BITS_PER_UNIT;
3460 gcc_assert (masked < try_size);
3461 if (masked >= try_size / 2)
3462 {
3463 while (masked >= try_size / 2)
3464 {
3465 try_size /= 2;
3466 try_pos += try_size / BITS_PER_UNIT;
3467 size -= try_size;
3468 masked -= try_size;
3469 }
3470 /* Need to recompute the alignment, so just retry at the new
3471 position. */
3472 continue;
3473 }
3474 }
3475
3476 found:
3477 ++ret;
3d3e04ac 3478
902cb3b7 3479 if (split_stores)
3480 {
2e966e2a 3481 split_store *store
902cb3b7 3482 = new split_store (try_pos, try_size, align);
9991d1d3 3483 info = find_constituent_stores (group, &store->orig_stores,
3484 &first, try_bitpos, try_size);
902cb3b7 3485 if (info
3486 && info->bitpos >= try_bitpos
3487 && info->bitpos + info->bitsize <= try_bitpos + try_size)
871a91ec 3488 {
3489 store->orig = true;
3490 any_orig = true;
3491 }
902cb3b7 3492 split_stores->safe_push (store);
3493 }
3494
3495 try_pos += try_size / BITS_PER_UNIT;
3d3e04ac 3496 size -= try_size;
3d3e04ac 3497 }
902cb3b7 3498
871a91ec 3499 if (total_orig)
3500 {
9deedf62 3501 unsigned int i;
2e966e2a 3502 split_store *store;
871a91ec 3503 /* If we are reusing some original stores and any of the
3504 original SSA_NAMEs had multiple uses, we need to subtract
3505 those now before we add the new ones. */
3506 if (total_new[0] && any_orig)
3507 {
871a91ec 3508 FOR_EACH_VEC_ELT (*split_stores, i, store)
3509 if (store->orig)
3510 total_new[0] -= count_multiple_uses (store->orig_stores[0]);
3511 }
3512 total_new[0] += ret; /* The new store. */
3513 store_immediate_info *info = group->stores[0];
3514 if (info->ops[0].base_addr)
9deedf62 3515 total_new[0] += ret;
871a91ec 3516 if (info->ops[1].base_addr)
9deedf62 3517 total_new[0] += ret;
871a91ec 3518 switch (info->rhs_code)
3519 {
3520 case BIT_AND_EXPR:
3521 case BIT_IOR_EXPR:
3522 case BIT_XOR_EXPR:
3523 total_new[0] += ret; /* The new BIT_*_EXPR stmt. */
3524 break;
3525 default:
3526 break;
3527 }
9deedf62 3528 FOR_EACH_VEC_ELT (*split_stores, i, store)
3529 {
3530 unsigned int j;
3531 bool bit_not_p[3] = { false, false, false };
3532 /* If all orig_stores have certain bit_not_p set, then
3533 we'd use a BIT_NOT_EXPR stmt and need to account for it.
3534 If some orig_stores have certain bit_not_p set, then
3535 we'd use a BIT_XOR_EXPR with a mask and need to account for
3536 it. */
3537 FOR_EACH_VEC_ELT (store->orig_stores, j, info)
3538 {
3539 if (info->ops[0].bit_not_p)
3540 bit_not_p[0] = true;
3541 if (info->ops[1].bit_not_p)
3542 bit_not_p[1] = true;
3543 if (info->bit_not_p)
3544 bit_not_p[2] = true;
3545 }
3546 total_new[0] += bit_not_p[0] + bit_not_p[1] + bit_not_p[2];
3547 }
3548
871a91ec 3549 }
3550
902cb3b7 3551 return ret;
3d3e04ac 3552}
3553
9deedf62 3554/* Return the operation through which the operand IDX (if < 2) or
3555 result (IDX == 2) should be inverted. If NOP_EXPR, no inversion
3556 is done, if BIT_NOT_EXPR, all bits are inverted, if BIT_XOR_EXPR,
3557 the bits should be xored with mask. */
3558
3559static enum tree_code
3560invert_op (split_store *split_store, int idx, tree int_type, tree &mask)
3561{
3562 unsigned int i;
3563 store_immediate_info *info;
3564 unsigned int cnt = 0;
6187b750 3565 bool any_paddings = false;
9deedf62 3566 FOR_EACH_VEC_ELT (split_store->orig_stores, i, info)
3567 {
3568 bool bit_not_p = idx < 2 ? info->ops[idx].bit_not_p : info->bit_not_p;
3569 if (bit_not_p)
6187b750 3570 {
3571 ++cnt;
3572 tree lhs = gimple_assign_lhs (info->stmt);
3573 if (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3574 && TYPE_PRECISION (TREE_TYPE (lhs)) < info->bitsize)
3575 any_paddings = true;
3576 }
9deedf62 3577 }
3578 mask = NULL_TREE;
3579 if (cnt == 0)
3580 return NOP_EXPR;
6187b750 3581 if (cnt == split_store->orig_stores.length () && !any_paddings)
9deedf62 3582 return BIT_NOT_EXPR;
3583
3584 unsigned HOST_WIDE_INT try_bitpos = split_store->bytepos * BITS_PER_UNIT;
3585 unsigned buf_size = split_store->size / BITS_PER_UNIT;
3586 unsigned char *buf
3587 = XALLOCAVEC (unsigned char, buf_size);
3588 memset (buf, ~0U, buf_size);
3589 FOR_EACH_VEC_ELT (split_store->orig_stores, i, info)
3590 {
3591 bool bit_not_p = idx < 2 ? info->ops[idx].bit_not_p : info->bit_not_p;
3592 if (!bit_not_p)
3593 continue;
3594 /* Clear regions with bit_not_p and invert afterwards, rather than
3595 clear regions with !bit_not_p, so that gaps in between stores aren't
3596 set in the mask. */
3597 unsigned HOST_WIDE_INT bitsize = info->bitsize;
6187b750 3598 unsigned HOST_WIDE_INT prec = bitsize;
9deedf62 3599 unsigned int pos_in_buffer = 0;
6187b750 3600 if (any_paddings)
3601 {
3602 tree lhs = gimple_assign_lhs (info->stmt);
3603 if (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3604 && TYPE_PRECISION (TREE_TYPE (lhs)) < bitsize)
3605 prec = TYPE_PRECISION (TREE_TYPE (lhs));
3606 }
9deedf62 3607 if (info->bitpos < try_bitpos)
3608 {
3609 gcc_assert (info->bitpos + bitsize > try_bitpos);
6187b750 3610 if (!BYTES_BIG_ENDIAN)
3611 {
3612 if (prec <= try_bitpos - info->bitpos)
3613 continue;
3614 prec -= try_bitpos - info->bitpos;
3615 }
3616 bitsize -= try_bitpos - info->bitpos;
3617 if (BYTES_BIG_ENDIAN && prec > bitsize)
3618 prec = bitsize;
9deedf62 3619 }
3620 else
3621 pos_in_buffer = info->bitpos - try_bitpos;
6187b750 3622 if (prec < bitsize)
3623 {
3624 /* If this is a bool inversion, invert just the least significant
3625 prec bits rather than all bits of it. */
3626 if (BYTES_BIG_ENDIAN)
3627 {
3628 pos_in_buffer += bitsize - prec;
3629 if (pos_in_buffer >= split_store->size)
3630 continue;
3631 }
3632 bitsize = prec;
3633 }
9deedf62 3634 if (pos_in_buffer + bitsize > split_store->size)
3635 bitsize = split_store->size - pos_in_buffer;
3636 unsigned char *p = buf + (pos_in_buffer / BITS_PER_UNIT);
3637 if (BYTES_BIG_ENDIAN)
3638 clear_bit_region_be (p, (BITS_PER_UNIT - 1
3639 - (pos_in_buffer % BITS_PER_UNIT)), bitsize);
3640 else
3641 clear_bit_region (p, pos_in_buffer % BITS_PER_UNIT, bitsize);
3642 }
3643 for (unsigned int i = 0; i < buf_size; ++i)
3644 buf[i] = ~buf[i];
3645 mask = native_interpret_expr (int_type, buf, buf_size);
3646 return BIT_XOR_EXPR;
3647}
3648
3d3e04ac 3649/* Given a merged store group GROUP output the widened version of it.
3650 The store chain is against the base object BASE.
3651 Try store sizes of at most MAX_STORE_BITSIZE bits wide and don't output
3652 unaligned stores for STRICT_ALIGNMENT targets or if it's too expensive.
3653 Make sure that the number of statements output is less than the number of
3654 original statements. If a better sequence is possible emit it and
3655 return true. */
3656
3657bool
f85e7cb7 3658imm_store_chain_info::output_merged_store (merged_store_group *group)
3d3e04ac 3659{
79b9cc46 3660 split_store *split_store;
3661 unsigned int i;
902cb3b7 3662 unsigned HOST_WIDE_INT start_byte_pos
3663 = group->bitregion_start / BITS_PER_UNIT;
3d3e04ac 3664
3665 unsigned int orig_num_stmts = group->stores.length ();
3666 if (orig_num_stmts < 2)
3667 return false;
3668
2e966e2a 3669 auto_vec<class split_store *, 32> split_stores;
9991d1d3 3670 bool allow_unaligned_store
902cb3b7 3671 = !STRICT_ALIGNMENT && PARAM_VALUE (PARAM_STORE_MERGING_ALLOW_UNALIGNED);
9991d1d3 3672 bool allow_unaligned_load = allow_unaligned_store;
e1eef457 3673 bool bzero_first = false;
3674 if (group->stores[0]->rhs_code == INTEGER_CST
3675 && TREE_CODE (gimple_assign_rhs1 (group->stores[0]->stmt)) == CONSTRUCTOR
3676 && CONSTRUCTOR_NELTS (gimple_assign_rhs1 (group->stores[0]->stmt)) == 0
3677 && group->start == group->stores[0]->bitpos
3678 && group->width == group->stores[0]->bitsize
3679 && (group->start % BITS_PER_UNIT) == 0
3680 && (group->width % BITS_PER_UNIT) == 0)
3681 bzero_first = true;
3682 if (allow_unaligned_store || bzero_first)
902cb3b7 3683 {
3684 /* If unaligned stores are allowed, see how many stores we'd emit
3685 for unaligned and how many stores we'd emit for aligned stores.
e1eef457 3686 Only use unaligned stores if it allows fewer stores than aligned.
3687 Similarly, if there is a whole region clear first, prefer expanding
3688 it together compared to expanding clear first followed by merged
3689 further stores. */
3690 unsigned cnt[4] = { ~0, ~0, ~0, ~0 };
3691 int pass_min = 0;
3692 for (int pass = 0; pass < 4; ++pass)
3693 {
3694 if (!allow_unaligned_store && (pass & 1) != 0)
3695 continue;
3696 if (!bzero_first && (pass & 2) != 0)
3697 continue;
3698 cnt[pass] = split_group (group, (pass & 1) != 0,
3699 allow_unaligned_load, (pass & 2) != 0,
3700 NULL, NULL, NULL);
3701 if (cnt[pass] < cnt[pass_min])
3702 pass_min = pass;
3703 }
3704 if ((pass_min & 1) == 0)
9991d1d3 3705 allow_unaligned_store = false;
e1eef457 3706 if ((pass_min & 2) == 0)
3707 bzero_first = false;
902cb3b7 3708 }
871a91ec 3709 unsigned total_orig, total_new;
e1eef457 3710 split_group (group, allow_unaligned_store, allow_unaligned_load, bzero_first,
871a91ec 3711 &split_stores, &total_orig, &total_new);
902cb3b7 3712
3713 if (split_stores.length () >= orig_num_stmts)
3714 {
3715 /* We didn't manage to reduce the number of statements. Bail out. */
3716 if (dump_file && (dump_flags & TDF_DETAILS))
871a91ec 3717 fprintf (dump_file, "Exceeded original number of stmts (%u)."
3718 " Not profitable to emit new sequence.\n",
3719 orig_num_stmts);
79b9cc46 3720 FOR_EACH_VEC_ELT (split_stores, i, split_store)
3721 delete split_store;
902cb3b7 3722 return false;
3723 }
871a91ec 3724 if (total_orig <= total_new)
3725 {
3726 /* If number of estimated new statements is above estimated original
3727 statements, bail out too. */
3728 if (dump_file && (dump_flags & TDF_DETAILS))
3729 fprintf (dump_file, "Estimated number of original stmts (%u)"
3730 " not larger than estimated number of new"
3731 " stmts (%u).\n",
3732 total_orig, total_new);
79b9cc46 3733 FOR_EACH_VEC_ELT (split_stores, i, split_store)
3734 delete split_store;
509ab8cd 3735 return false;
871a91ec 3736 }
3d3e04ac 3737
3738 gimple_stmt_iterator last_gsi = gsi_for_stmt (group->last_stmt);
3739 gimple_seq seq = NULL;
3d3e04ac 3740 tree last_vdef, new_vuse;
3741 last_vdef = gimple_vdef (group->last_stmt);
3742 new_vuse = gimple_vuse (group->last_stmt);
509ab8cd 3743 tree bswap_res = NULL_TREE;
3744
3745 if (group->stores[0]->rhs_code == LROTATE_EXPR
3746 || group->stores[0]->rhs_code == NOP_EXPR)
3747 {
3748 tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
3749 gimple *ins_stmt = group->stores[0]->ins_stmt;
3750 struct symbolic_number *n = &group->stores[0]->n;
3751 bool bswap = group->stores[0]->rhs_code == LROTATE_EXPR;
3752
3753 switch (n->range)
3754 {
3755 case 16:
3756 load_type = bswap_type = uint16_type_node;
3757 break;
3758 case 32:
3759 load_type = uint32_type_node;
3760 if (bswap)
3761 {
3762 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
3763 bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
3764 }
3765 break;
3766 case 64:
3767 load_type = uint64_type_node;
3768 if (bswap)
3769 {
3770 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
3771 bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
3772 }
3773 break;
3774 default:
3775 gcc_unreachable ();
3776 }
3777
3778 /* If the loads have each vuse of the corresponding store,
3779 we've checked the aliasing already in try_coalesce_bswap and
3780 we want to sink the need load into seq. So need to use new_vuse
3781 on the load. */
58cff6a2 3782 if (n->base_addr)
509ab8cd 3783 {
58cff6a2 3784 if (n->vuse == NULL)
3785 {
3786 n->vuse = new_vuse;
3787 ins_stmt = NULL;
3788 }
3789 else
3790 /* Update vuse in case it has changed by output_merged_stores. */
3791 n->vuse = gimple_vuse (ins_stmt);
509ab8cd 3792 }
3793 bswap_res = bswap_replace (gsi_start (seq), ins_stmt, fndecl,
3794 bswap_type, load_type, n, bswap);
3795 gcc_assert (bswap_res);
3796 }
3d3e04ac 3797
3798 gimple *stmt = NULL;
9991d1d3 3799 auto_vec<gimple *, 32> orig_stmts;
509ab8cd 3800 gimple_seq this_seq;
3801 tree addr = force_gimple_operand_1 (unshare_expr (base_addr), &this_seq,
427223f1 3802 is_gimple_mem_ref_addr, NULL_TREE);
509ab8cd 3803 gimple_seq_add_seq_without_update (&seq, this_seq);
9991d1d3 3804
3805 tree load_addr[2] = { NULL_TREE, NULL_TREE };
3806 gimple_seq load_seq[2] = { NULL, NULL };
3807 gimple_stmt_iterator load_gsi[2] = { gsi_none (), gsi_none () };
3808 for (int j = 0; j < 2; ++j)
3809 {
3810 store_operand_info &op = group->stores[0]->ops[j];
3811 if (op.base_addr == NULL_TREE)
3812 continue;
3813
3814 store_immediate_info *infol = group->stores.last ();
3815 if (gimple_vuse (op.stmt) == gimple_vuse (infol->ops[j].stmt))
3816 {
e0099abc 3817 /* We can't pick the location randomly; while we've verified
3818 all the loads have the same vuse, they can be still in different
3819 basic blocks and we need to pick the one from the last bb:
3820 int x = q[0];
3821 if (x == N) return;
3822 int y = q[1];
3823 p[0] = x;
3824 p[1] = y;
3825 otherwise if we put the wider load at the q[0] load, we might
3826 segfault if q[1] is not mapped. */
3827 basic_block bb = gimple_bb (op.stmt);
3828 gimple *ostmt = op.stmt;
3829 store_immediate_info *info;
3830 FOR_EACH_VEC_ELT (group->stores, i, info)
3831 {
3832 gimple *tstmt = info->ops[j].stmt;
3833 basic_block tbb = gimple_bb (tstmt);
3834 if (dominated_by_p (CDI_DOMINATORS, tbb, bb))
3835 {
3836 ostmt = tstmt;
3837 bb = tbb;
3838 }
3839 }
3840 load_gsi[j] = gsi_for_stmt (ostmt);
9991d1d3 3841 load_addr[j]
3842 = force_gimple_operand_1 (unshare_expr (op.base_addr),
3843 &load_seq[j], is_gimple_mem_ref_addr,
3844 NULL_TREE);
3845 }
3846 else if (operand_equal_p (base_addr, op.base_addr, 0))
3847 load_addr[j] = addr;
3848 else
ad3e5b2f 3849 {
ad3e5b2f 3850 load_addr[j]
3851 = force_gimple_operand_1 (unshare_expr (op.base_addr),
3852 &this_seq, is_gimple_mem_ref_addr,
3853 NULL_TREE);
3854 gimple_seq_add_seq_without_update (&seq, this_seq);
3855 }
9991d1d3 3856 }
3857
3d3e04ac 3858 FOR_EACH_VEC_ELT (split_stores, i, split_store)
3859 {
3860 unsigned HOST_WIDE_INT try_size = split_store->size;
3861 unsigned HOST_WIDE_INT try_pos = split_store->bytepos;
10f0d48d 3862 unsigned HOST_WIDE_INT try_bitpos = try_pos * BITS_PER_UNIT;
3d3e04ac 3863 unsigned HOST_WIDE_INT align = split_store->align;
902cb3b7 3864 tree dest, src;
3865 location_t loc;
3866 if (split_store->orig)
3867 {
3868 /* If there is just a single constituent store which covers
3869 the whole area, just reuse the lhs and rhs. */
9991d1d3 3870 gimple *orig_stmt = split_store->orig_stores[0]->stmt;
3871 dest = gimple_assign_lhs (orig_stmt);
3872 src = gimple_assign_rhs1 (orig_stmt);
3873 loc = gimple_location (orig_stmt);
902cb3b7 3874 }
3875 else
3876 {
9991d1d3 3877 store_immediate_info *info;
3878 unsigned short clique, base;
3879 unsigned int k;
3880 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
3881 orig_stmts.safe_push (info->stmt);
902cb3b7 3882 tree offset_type
9991d1d3 3883 = get_alias_type_for_stmts (orig_stmts, false, &clique, &base);
3884 loc = get_location_for_stmts (orig_stmts);
3885 orig_stmts.truncate (0);
902cb3b7 3886
3887 tree int_type = build_nonstandard_integer_type (try_size, UNSIGNED);
3888 int_type = build_aligned_type (int_type, align);
3889 dest = fold_build2 (MEM_REF, int_type, addr,
3890 build_int_cst (offset_type, try_pos));
9991d1d3 3891 if (TREE_CODE (dest) == MEM_REF)
3892 {
3893 MR_DEPENDENCE_CLIQUE (dest) = clique;
3894 MR_DEPENDENCE_BASE (dest) = base;
3895 }
3896
10f0d48d 3897 tree mask;
3898 if (bswap_res)
3899 mask = integer_zero_node;
3900 else
509ab8cd 3901 mask = native_interpret_expr (int_type,
3902 group->mask + try_pos
3903 - start_byte_pos,
3904 group->buf_size);
9991d1d3 3905
3906 tree ops[2];
3907 for (int j = 0;
3908 j < 1 + (split_store->orig_stores[0]->ops[1].val != NULL_TREE);
3909 ++j)
3910 {
3911 store_operand_info &op = split_store->orig_stores[0]->ops[j];
509ab8cd 3912 if (bswap_res)
3913 ops[j] = bswap_res;
3914 else if (op.base_addr)
9991d1d3 3915 {
3916 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
3917 orig_stmts.safe_push (info->ops[j].stmt);
3918
3919 offset_type = get_alias_type_for_stmts (orig_stmts, true,
3920 &clique, &base);
3921 location_t load_loc = get_location_for_stmts (orig_stmts);
3922 orig_stmts.truncate (0);
3923
3924 unsigned HOST_WIDE_INT load_align = group->load_align[j];
3925 unsigned HOST_WIDE_INT align_bitpos
10f0d48d 3926 = known_alignment (try_bitpos
e61263f2 3927 - split_store->orig_stores[0]->bitpos
3928 + op.bitpos);
3929 if (align_bitpos & (load_align - 1))
9991d1d3 3930 load_align = least_bit_hwi (align_bitpos);
3931
3932 tree load_int_type
3933 = build_nonstandard_integer_type (try_size, UNSIGNED);
3934 load_int_type
3935 = build_aligned_type (load_int_type, load_align);
3936
e61263f2 3937 poly_uint64 load_pos
10f0d48d 3938 = exact_div (try_bitpos
e61263f2 3939 - split_store->orig_stores[0]->bitpos
3940 + op.bitpos,
3941 BITS_PER_UNIT);
9991d1d3 3942 ops[j] = fold_build2 (MEM_REF, load_int_type, load_addr[j],
3943 build_int_cst (offset_type, load_pos));
3944 if (TREE_CODE (ops[j]) == MEM_REF)
3945 {
3946 MR_DEPENDENCE_CLIQUE (ops[j]) = clique;
3947 MR_DEPENDENCE_BASE (ops[j]) = base;
3948 }
3949 if (!integer_zerop (mask))
3950 /* The load might load some bits (that will be masked off
3951 later on) uninitialized, avoid -W*uninitialized
3952 warnings in that case. */
3953 TREE_NO_WARNING (ops[j]) = 1;
3954
3955 stmt = gimple_build_assign (make_ssa_name (int_type),
3956 ops[j]);
3957 gimple_set_location (stmt, load_loc);
3958 if (gsi_bb (load_gsi[j]))
3959 {
3960 gimple_set_vuse (stmt, gimple_vuse (op.stmt));
3961 gimple_seq_add_stmt_without_update (&load_seq[j], stmt);
3962 }
3963 else
3964 {
3965 gimple_set_vuse (stmt, new_vuse);
3966 gimple_seq_add_stmt_without_update (&seq, stmt);
3967 }
3968 ops[j] = gimple_assign_lhs (stmt);
9deedf62 3969 tree xor_mask;
3970 enum tree_code inv_op
3971 = invert_op (split_store, j, int_type, xor_mask);
3972 if (inv_op != NOP_EXPR)
c35548ce 3973 {
3974 stmt = gimple_build_assign (make_ssa_name (int_type),
9deedf62 3975 inv_op, ops[j], xor_mask);
c35548ce 3976 gimple_set_location (stmt, load_loc);
3977 ops[j] = gimple_assign_lhs (stmt);
3978
3979 if (gsi_bb (load_gsi[j]))
3980 gimple_seq_add_stmt_without_update (&load_seq[j],
3981 stmt);
3982 else
3983 gimple_seq_add_stmt_without_update (&seq, stmt);
3984 }
9991d1d3 3985 }
3986 else
3987 ops[j] = native_interpret_expr (int_type,
3988 group->val + try_pos
3989 - start_byte_pos,
3990 group->buf_size);
3991 }
3992
3993 switch (split_store->orig_stores[0]->rhs_code)
3994 {
3995 case BIT_AND_EXPR:
3996 case BIT_IOR_EXPR:
3997 case BIT_XOR_EXPR:
3998 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
3999 {
4000 tree rhs1 = gimple_assign_rhs1 (info->stmt);
4001 orig_stmts.safe_push (SSA_NAME_DEF_STMT (rhs1));
4002 }
4003 location_t bit_loc;
4004 bit_loc = get_location_for_stmts (orig_stmts);
4005 orig_stmts.truncate (0);
4006
4007 stmt
4008 = gimple_build_assign (make_ssa_name (int_type),
4009 split_store->orig_stores[0]->rhs_code,
4010 ops[0], ops[1]);
4011 gimple_set_location (stmt, bit_loc);
4012 /* If there is just one load and there is a separate
4013 load_seq[0], emit the bitwise op right after it. */
4014 if (load_addr[1] == NULL_TREE && gsi_bb (load_gsi[0]))
4015 gimple_seq_add_stmt_without_update (&load_seq[0], stmt);
4016 /* Otherwise, if at least one load is in seq, we need to
4017 emit the bitwise op right before the store. If there
4018 are two loads and are emitted somewhere else, it would
4019 be better to emit the bitwise op as early as possible;
4020 we don't track where that would be possible right now
4021 though. */
4022 else
4023 gimple_seq_add_stmt_without_update (&seq, stmt);
4024 src = gimple_assign_lhs (stmt);
9deedf62 4025 tree xor_mask;
4026 enum tree_code inv_op;
4027 inv_op = invert_op (split_store, 2, int_type, xor_mask);
4028 if (inv_op != NOP_EXPR)
832a73b9 4029 {
4030 stmt = gimple_build_assign (make_ssa_name (int_type),
9deedf62 4031 inv_op, src, xor_mask);
832a73b9 4032 gimple_set_location (stmt, bit_loc);
4033 if (load_addr[1] == NULL_TREE && gsi_bb (load_gsi[0]))
4034 gimple_seq_add_stmt_without_update (&load_seq[0], stmt);
4035 else
4036 gimple_seq_add_stmt_without_update (&seq, stmt);
4037 src = gimple_assign_lhs (stmt);
4038 }
9991d1d3 4039 break;
509ab8cd 4040 case LROTATE_EXPR:
4041 case NOP_EXPR:
4042 src = ops[0];
4043 if (!is_gimple_val (src))
4044 {
4045 stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (src)),
4046 src);
4047 gimple_seq_add_stmt_without_update (&seq, stmt);
4048 src = gimple_assign_lhs (stmt);
4049 }
4050 if (!useless_type_conversion_p (int_type, TREE_TYPE (src)))
4051 {
4052 stmt = gimple_build_assign (make_ssa_name (int_type),
4053 NOP_EXPR, src);
4054 gimple_seq_add_stmt_without_update (&seq, stmt);
4055 src = gimple_assign_lhs (stmt);
4056 }
665dafda 4057 inv_op = invert_op (split_store, 2, int_type, xor_mask);
4058 if (inv_op != NOP_EXPR)
4059 {
4060 stmt = gimple_build_assign (make_ssa_name (int_type),
4061 inv_op, src, xor_mask);
4062 gimple_set_location (stmt, loc);
4063 gimple_seq_add_stmt_without_update (&seq, stmt);
4064 src = gimple_assign_lhs (stmt);
4065 }
509ab8cd 4066 break;
9991d1d3 4067 default:
4068 src = ops[0];
4069 break;
4070 }
4071
10f0d48d 4072 /* If bit insertion is required, we use the source as an accumulator
4073 into which the successive bit-field values are manually inserted.
4074 FIXME: perhaps use BIT_INSERT_EXPR instead in some cases? */
4075 if (group->bit_insertion)
4076 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4077 if (info->rhs_code == BIT_INSERT_EXPR
4078 && info->bitpos < try_bitpos + try_size
4079 && info->bitpos + info->bitsize > try_bitpos)
4080 {
4081 /* Mask, truncate, convert to final type, shift and ior into
4082 the accumulator. Note that every step can be a no-op. */
4083 const HOST_WIDE_INT start_gap = info->bitpos - try_bitpos;
4084 const HOST_WIDE_INT end_gap
4085 = (try_bitpos + try_size) - (info->bitpos + info->bitsize);
4086 tree tem = info->ops[0].val;
1bcb04d5 4087 if (TYPE_PRECISION (TREE_TYPE (tem)) <= info->bitsize)
4088 {
4089 tree bitfield_type
4090 = build_nonstandard_integer_type (info->bitsize,
4091 UNSIGNED);
4092 tem = gimple_convert (&seq, loc, bitfield_type, tem);
4093 }
4094 else if ((BYTES_BIG_ENDIAN ? start_gap : end_gap) > 0)
10f0d48d 4095 {
4096 const unsigned HOST_WIDE_INT imask
4097 = (HOST_WIDE_INT_1U << info->bitsize) - 1;
4098 tem = gimple_build (&seq, loc,
4099 BIT_AND_EXPR, TREE_TYPE (tem), tem,
4100 build_int_cst (TREE_TYPE (tem),
4101 imask));
4102 }
4103 const HOST_WIDE_INT shift
4104 = (BYTES_BIG_ENDIAN ? end_gap : start_gap);
4105 if (shift < 0)
4106 tem = gimple_build (&seq, loc,
4107 RSHIFT_EXPR, TREE_TYPE (tem), tem,
4108 build_int_cst (NULL_TREE, -shift));
4109 tem = gimple_convert (&seq, loc, int_type, tem);
4110 if (shift > 0)
4111 tem = gimple_build (&seq, loc,
4112 LSHIFT_EXPR, int_type, tem,
4113 build_int_cst (NULL_TREE, shift));
4114 src = gimple_build (&seq, loc,
4115 BIT_IOR_EXPR, int_type, tem, src);
4116 }
4117
902cb3b7 4118 if (!integer_zerop (mask))
4119 {
4120 tree tem = make_ssa_name (int_type);
4121 tree load_src = unshare_expr (dest);
4122 /* The load might load some or all bits uninitialized,
4123 avoid -W*uninitialized warnings in that case.
4124 As optimization, it would be nice if all the bits are
4125 provably uninitialized (no stores at all yet or previous
4126 store a CLOBBER) we'd optimize away the load and replace
4127 it e.g. with 0. */
4128 TREE_NO_WARNING (load_src) = 1;
4129 stmt = gimple_build_assign (tem, load_src);
4130 gimple_set_location (stmt, loc);
4131 gimple_set_vuse (stmt, new_vuse);
4132 gimple_seq_add_stmt_without_update (&seq, stmt);
4133
4134 /* FIXME: If there is a single chunk of zero bits in mask,
4135 perhaps use BIT_INSERT_EXPR instead? */
4136 stmt = gimple_build_assign (make_ssa_name (int_type),
4137 BIT_AND_EXPR, tem, mask);
4138 gimple_set_location (stmt, loc);
4139 gimple_seq_add_stmt_without_update (&seq, stmt);
4140 tem = gimple_assign_lhs (stmt);
4141
9991d1d3 4142 if (TREE_CODE (src) == INTEGER_CST)
4143 src = wide_int_to_tree (int_type,
4144 wi::bit_and_not (wi::to_wide (src),
4145 wi::to_wide (mask)));
4146 else
4147 {
4148 tree nmask
4149 = wide_int_to_tree (int_type,
4150 wi::bit_not (wi::to_wide (mask)));
4151 stmt = gimple_build_assign (make_ssa_name (int_type),
4152 BIT_AND_EXPR, src, nmask);
4153 gimple_set_location (stmt, loc);
4154 gimple_seq_add_stmt_without_update (&seq, stmt);
4155 src = gimple_assign_lhs (stmt);
4156 }
902cb3b7 4157 stmt = gimple_build_assign (make_ssa_name (int_type),
4158 BIT_IOR_EXPR, tem, src);
4159 gimple_set_location (stmt, loc);
4160 gimple_seq_add_stmt_without_update (&seq, stmt);
4161 src = gimple_assign_lhs (stmt);
4162 }
4163 }
3d3e04ac 4164
4165 stmt = gimple_build_assign (dest, src);
4166 gimple_set_location (stmt, loc);
4167 gimple_set_vuse (stmt, new_vuse);
4168 gimple_seq_add_stmt_without_update (&seq, stmt);
4169
393122a0 4170 if (group->lp_nr && stmt_could_throw_p (cfun, stmt))
4171 add_stmt_to_eh_lp (stmt, group->lp_nr);
4172
3d3e04ac 4173 tree new_vdef;
4174 if (i < split_stores.length () - 1)
902cb3b7 4175 new_vdef = make_ssa_name (gimple_vop (cfun), stmt);
3d3e04ac 4176 else
4177 new_vdef = last_vdef;
4178
4179 gimple_set_vdef (stmt, new_vdef);
4180 SSA_NAME_DEF_STMT (new_vdef) = stmt;
4181 new_vuse = new_vdef;
4182 }
4183
4184 FOR_EACH_VEC_ELT (split_stores, i, split_store)
4185 delete split_store;
4186
3d3e04ac 4187 gcc_assert (seq);
4188 if (dump_file)
4189 {
4190 fprintf (dump_file,
10f0d48d 4191 "New sequence of %u stores to replace old one of %u stores\n",
902cb3b7 4192 split_stores.length (), orig_num_stmts);
3d3e04ac 4193 if (dump_flags & TDF_DETAILS)
4194 print_gimple_seq (dump_file, seq, 0, TDF_VOPS | TDF_MEMSYMS);
4195 }
393122a0 4196
4197 if (group->lp_nr > 0)
4198 {
4199 /* We're going to insert a sequence of (potentially) throwing stores
4200 into an active EH region. This means that we're going to create
4201 new basic blocks with EH edges pointing to the post landing pad
4202 and, therefore, to have to update its PHI nodes, if any. For the
4203 virtual PHI node, we're going to use the VDEFs created above, but
4204 for the other nodes, we need to record the original reaching defs. */
4205 eh_landing_pad lp = get_eh_landing_pad_from_number (group->lp_nr);
4206 basic_block lp_bb = label_to_block (cfun, lp->post_landing_pad);
4207 basic_block last_bb = gimple_bb (group->last_stmt);
4208 edge last_edge = find_edge (last_bb, lp_bb);
4209 auto_vec<tree, 16> last_defs;
4210 gphi_iterator gpi;
4211 for (gpi = gsi_start_phis (lp_bb); !gsi_end_p (gpi); gsi_next (&gpi))
4212 {
4213 gphi *phi = gpi.phi ();
4214 tree last_def;
4215 if (virtual_operand_p (gimple_phi_result (phi)))
4216 last_def = NULL_TREE;
4217 else
4218 last_def = gimple_phi_arg_def (phi, last_edge->dest_idx);
4219 last_defs.safe_push (last_def);
4220 }
4221
4222 /* Do the insertion. Then, if new basic blocks have been created in the
4223 process, rewind the chain of VDEFs create above to walk the new basic
4224 blocks and update the corresponding arguments of the PHI nodes. */
4225 update_modified_stmts (seq);
4226 if (gimple_find_sub_bbs (seq, &last_gsi))
4227 while (last_vdef != gimple_vuse (group->last_stmt))
4228 {
4229 gimple *stmt = SSA_NAME_DEF_STMT (last_vdef);
4230 if (stmt_could_throw_p (cfun, stmt))
4231 {
4232 edge new_edge = find_edge (gimple_bb (stmt), lp_bb);
4233 unsigned int i;
4234 for (gpi = gsi_start_phis (lp_bb), i = 0;
4235 !gsi_end_p (gpi);
4236 gsi_next (&gpi), i++)
4237 {
4238 gphi *phi = gpi.phi ();
4239 tree new_def;
4240 if (virtual_operand_p (gimple_phi_result (phi)))
4241 new_def = last_vdef;
4242 else
4243 new_def = last_defs[i];
4244 add_phi_arg (phi, new_def, new_edge, UNKNOWN_LOCATION);
4245 }
4246 }
4247 last_vdef = gimple_vuse (stmt);
4248 }
4249 }
4250 else
4251 gsi_insert_seq_after (&last_gsi, seq, GSI_SAME_STMT);
4252
9991d1d3 4253 for (int j = 0; j < 2; ++j)
4254 if (load_seq[j])
4255 gsi_insert_seq_after (&load_gsi[j], load_seq[j], GSI_SAME_STMT);
3d3e04ac 4256
4257 return true;
4258}
4259
4260/* Process the merged_store_group objects created in the coalescing phase.
4261 The stores are all against the base object BASE.
4262 Try to output the widened stores and delete the original statements if
4263 successful. Return true iff any changes were made. */
4264
4265bool
f85e7cb7 4266imm_store_chain_info::output_merged_stores ()
3d3e04ac 4267{
4268 unsigned int i;
4269 merged_store_group *merged_store;
4270 bool ret = false;
4271 FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_store)
4272 {
a8786363 4273 if (dbg_cnt (store_merging)
4274 && output_merged_store (merged_store))
3d3e04ac 4275 {
4276 unsigned int j;
4277 store_immediate_info *store;
4278 FOR_EACH_VEC_ELT (merged_store->stores, j, store)
4279 {
4280 gimple *stmt = store->stmt;
4281 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
4282 gsi_remove (&gsi, true);
393122a0 4283 if (store->lp_nr)
4284 remove_stmt_from_eh_lp (stmt);
3d3e04ac 4285 if (stmt != merged_store->last_stmt)
4286 {
4287 unlink_stmt_vdef (stmt);
4288 release_defs (stmt);
4289 }
4290 }
4291 ret = true;
4292 }
4293 }
4294 if (ret && dump_file)
4295 fprintf (dump_file, "Merging successful!\n");
4296
4297 return ret;
4298}
4299
4300/* Coalesce the store_immediate_info objects recorded against the base object
4301 BASE in the first phase and output them.
4302 Delete the allocated structures.
4303 Return true if any changes were made. */
4304
4305bool
f85e7cb7 4306imm_store_chain_info::terminate_and_process_chain ()
3d3e04ac 4307{
4308 /* Process store chain. */
4309 bool ret = false;
4310 if (m_store_info.length () > 1)
4311 {
4312 ret = coalesce_immediate_stores ();
4313 if (ret)
f85e7cb7 4314 ret = output_merged_stores ();
3d3e04ac 4315 }
4316
4317 /* Delete all the entries we allocated ourselves. */
4318 store_immediate_info *info;
4319 unsigned int i;
4320 FOR_EACH_VEC_ELT (m_store_info, i, info)
4321 delete info;
4322
4323 merged_store_group *merged_info;
4324 FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_info)
4325 delete merged_info;
4326
4327 return ret;
4328}
4329
4330/* Return true iff LHS is a destination potentially interesting for
4331 store merging. In practice these are the codes that get_inner_reference
4332 can process. */
4333
4334static bool
4335lhs_valid_for_store_merging_p (tree lhs)
4336{
393122a0 4337 if (DECL_P (lhs))
3d3e04ac 4338 return true;
4339
393122a0 4340 switch (TREE_CODE (lhs))
4341 {
4342 case ARRAY_REF:
4343 case ARRAY_RANGE_REF:
4344 case BIT_FIELD_REF:
4345 case COMPONENT_REF:
4346 case MEM_REF:
4347 return true;
4348 default:
4349 return false;
4350 }
4351
4352 gcc_unreachable ();
3d3e04ac 4353}
4354
4355/* Return true if the tree RHS is a constant we want to consider
4356 during store merging. In practice accept all codes that
4357 native_encode_expr accepts. */
4358
4359static bool
4360rhs_valid_for_store_merging_p (tree rhs)
4361{
52acb7ae 4362 unsigned HOST_WIDE_INT size;
e1eef457 4363 if (TREE_CODE (rhs) == CONSTRUCTOR
4364 && !TREE_CLOBBER_P (rhs)
4365 && CONSTRUCTOR_NELTS (rhs) == 0
4366 && TYPE_SIZE_UNIT (TREE_TYPE (rhs))
4367 && tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (rhs))))
4368 return true;
52acb7ae 4369 return (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (rhs))).is_constant (&size)
4370 && native_encode_expr (rhs, NULL, size) != 0);
3d3e04ac 4371}
4372
393122a0 4373/* Adjust *PBITPOS, *PBITREGION_START and *PBITREGION_END by BYTE_OFF bytes
4374 and return true on success or false on failure. */
4375
4376static bool
4377adjust_bit_pos (poly_offset_int byte_off,
4378 poly_int64 *pbitpos,
4379 poly_uint64 *pbitregion_start,
4380 poly_uint64 *pbitregion_end)
4381{
4382 poly_offset_int bit_off = byte_off << LOG2_BITS_PER_UNIT;
4383 bit_off += *pbitpos;
4384
4385 if (known_ge (bit_off, 0) && bit_off.to_shwi (pbitpos))
4386 {
4387 if (maybe_ne (*pbitregion_end, 0U))
4388 {
4389 bit_off = byte_off << LOG2_BITS_PER_UNIT;
4390 bit_off += *pbitregion_start;
4391 if (bit_off.to_uhwi (pbitregion_start))
4392 {
4393 bit_off = byte_off << LOG2_BITS_PER_UNIT;
4394 bit_off += *pbitregion_end;
4395 if (!bit_off.to_uhwi (pbitregion_end))
4396 *pbitregion_end = 0;
4397 }
4398 else
4399 *pbitregion_end = 0;
4400 }
4401 return true;
4402 }
4403 else
4404 return false;
4405}
4406
9991d1d3 4407/* If MEM is a memory reference usable for store merging (either as
4408 store destination or for loads), return the non-NULL base_addr
4409 and set *PBITSIZE, *PBITPOS, *PBITREGION_START and *PBITREGION_END.
4410 Otherwise return NULL, *PBITPOS should be still valid even for that
4411 case. */
4412
4413static tree
e61263f2 4414mem_valid_for_store_merging (tree mem, poly_uint64 *pbitsize,
4415 poly_uint64 *pbitpos,
4416 poly_uint64 *pbitregion_start,
4417 poly_uint64 *pbitregion_end)
9991d1d3 4418{
e61263f2 4419 poly_int64 bitsize, bitpos;
4420 poly_uint64 bitregion_start = 0, bitregion_end = 0;
9991d1d3 4421 machine_mode mode;
4422 int unsignedp = 0, reversep = 0, volatilep = 0;
4423 tree offset;
4424 tree base_addr = get_inner_reference (mem, &bitsize, &bitpos, &offset, &mode,
4425 &unsignedp, &reversep, &volatilep);
4426 *pbitsize = bitsize;
e61263f2 4427 if (known_eq (bitsize, 0))
9991d1d3 4428 return NULL_TREE;
4429
4430 if (TREE_CODE (mem) == COMPONENT_REF
4431 && DECL_BIT_FIELD_TYPE (TREE_OPERAND (mem, 1)))
4432 {
4433 get_bit_range (&bitregion_start, &bitregion_end, mem, &bitpos, &offset);
e61263f2 4434 if (maybe_ne (bitregion_end, 0U))
4435 bitregion_end += 1;
9991d1d3 4436 }
4437
4438 if (reversep)
4439 return NULL_TREE;
4440
4441 /* We do not want to rewrite TARGET_MEM_REFs. */
4442 if (TREE_CODE (base_addr) == TARGET_MEM_REF)
4443 return NULL_TREE;
4444 /* In some cases get_inner_reference may return a
4445 MEM_REF [ptr + byteoffset]. For the purposes of this pass
4446 canonicalize the base_addr to MEM_REF [ptr] and take
4447 byteoffset into account in the bitpos. This occurs in
4448 PR 23684 and this way we can catch more chains. */
4449 else if (TREE_CODE (base_addr) == MEM_REF)
4450 {
393122a0 4451 if (!adjust_bit_pos (mem_ref_offset (base_addr), &bitpos,
4452 &bitregion_start, &bitregion_end))
9991d1d3 4453 return NULL_TREE;
4454 base_addr = TREE_OPERAND (base_addr, 0);
4455 }
4456 /* get_inner_reference returns the base object, get at its
4457 address now. */
4458 else
4459 {
e61263f2 4460 if (maybe_lt (bitpos, 0))
9991d1d3 4461 return NULL_TREE;
4462 base_addr = build_fold_addr_expr (base_addr);
4463 }
4464
393122a0 4465 if (offset)
9991d1d3 4466 {
4467 /* If the access is variable offset then a base decl has to be
4468 address-taken to be able to emit pointer-based stores to it.
4469 ??? We might be able to get away with re-using the original
4470 base up to the first variable part and then wrapping that inside
4471 a BIT_FIELD_REF. */
4472 tree base = get_base_address (base_addr);
393122a0 4473 if (!base || (DECL_P (base) && !TREE_ADDRESSABLE (base)))
9991d1d3 4474 return NULL_TREE;
4475
393122a0 4476 /* Similarly to above for the base, remove constant from the offset. */
4477 if (TREE_CODE (offset) == PLUS_EXPR
4478 && TREE_CODE (TREE_OPERAND (offset, 1)) == INTEGER_CST
4479 && adjust_bit_pos (wi::to_poly_offset (TREE_OPERAND (offset, 1)),
4480 &bitpos, &bitregion_start, &bitregion_end))
4481 offset = TREE_OPERAND (offset, 0);
4482
9991d1d3 4483 base_addr = build2 (POINTER_PLUS_EXPR, TREE_TYPE (base_addr),
4484 base_addr, offset);
4485 }
4486
393122a0 4487 if (known_eq (bitregion_end, 0U))
4488 {
4489 bitregion_start = round_down_to_byte_boundary (bitpos);
4490 bitregion_end = round_up_to_byte_boundary (bitpos + bitsize);
4491 }
4492
9991d1d3 4493 *pbitsize = bitsize;
4494 *pbitpos = bitpos;
4495 *pbitregion_start = bitregion_start;
4496 *pbitregion_end = bitregion_end;
4497 return base_addr;
4498}
4499
4500/* Return true if STMT is a load that can be used for store merging.
4501 In that case fill in *OP. BITSIZE, BITPOS, BITREGION_START and
4502 BITREGION_END are properties of the corresponding store. */
4503
4504static bool
4505handled_load (gimple *stmt, store_operand_info *op,
e61263f2 4506 poly_uint64 bitsize, poly_uint64 bitpos,
4507 poly_uint64 bitregion_start, poly_uint64 bitregion_end)
9991d1d3 4508{
c35548ce 4509 if (!is_gimple_assign (stmt))
9991d1d3 4510 return false;
c35548ce 4511 if (gimple_assign_rhs_code (stmt) == BIT_NOT_EXPR)
4512 {
4513 tree rhs1 = gimple_assign_rhs1 (stmt);
4514 if (TREE_CODE (rhs1) == SSA_NAME
c35548ce 4515 && handled_load (SSA_NAME_DEF_STMT (rhs1), op, bitsize, bitpos,
4516 bitregion_start, bitregion_end))
4517 {
832a73b9 4518 /* Don't allow _1 = load; _2 = ~1; _3 = ~_2; which should have
4519 been optimized earlier, but if allowed here, would confuse the
4520 multiple uses counting. */
4521 if (op->bit_not_p)
4522 return false;
c35548ce 4523 op->bit_not_p = !op->bit_not_p;
4524 return true;
4525 }
4526 return false;
4527 }
4528 if (gimple_vuse (stmt)
4529 && gimple_assign_load_p (stmt)
aac19106 4530 && !stmt_can_throw_internal (cfun, stmt)
9991d1d3 4531 && !gimple_has_volatile_ops (stmt))
4532 {
4533 tree mem = gimple_assign_rhs1 (stmt);
4534 op->base_addr
4535 = mem_valid_for_store_merging (mem, &op->bitsize, &op->bitpos,
4536 &op->bitregion_start,
4537 &op->bitregion_end);
4538 if (op->base_addr != NULL_TREE
e61263f2 4539 && known_eq (op->bitsize, bitsize)
4540 && multiple_p (op->bitpos - bitpos, BITS_PER_UNIT)
4541 && known_ge (op->bitpos - op->bitregion_start,
4542 bitpos - bitregion_start)
4543 && known_ge (op->bitregion_end - op->bitpos,
4544 bitregion_end - bitpos))
9991d1d3 4545 {
4546 op->stmt = stmt;
4547 op->val = mem;
c35548ce 4548 op->bit_not_p = false;
9991d1d3 4549 return true;
4550 }
4551 }
4552 return false;
4553}
4554
393122a0 4555/* Return the index number of the landing pad for STMT, if any. */
4556
4557static int
4558lp_nr_for_store (gimple *stmt)
4559{
4560 if (!cfun->can_throw_non_call_exceptions || !cfun->eh)
4561 return 0;
4562
4563 if (!stmt_could_throw_p (cfun, stmt))
4564 return 0;
4565
4566 return lookup_stmt_eh_lp (stmt);
4567}
4568
9991d1d3 4569/* Record the store STMT for store merging optimization if it can be
393122a0 4570 optimized. Return true if any changes were made. */
9991d1d3 4571
393122a0 4572bool
9991d1d3 4573pass_store_merging::process_store (gimple *stmt)
4574{
4575 tree lhs = gimple_assign_lhs (stmt);
4576 tree rhs = gimple_assign_rhs1 (stmt);
e61263f2 4577 poly_uint64 bitsize, bitpos;
4578 poly_uint64 bitregion_start, bitregion_end;
9991d1d3 4579 tree base_addr
4580 = mem_valid_for_store_merging (lhs, &bitsize, &bitpos,
4581 &bitregion_start, &bitregion_end);
e61263f2 4582 if (known_eq (bitsize, 0U))
393122a0 4583 return false;
9991d1d3 4584
4585 bool invalid = (base_addr == NULL_TREE
e61263f2 4586 || (maybe_gt (bitsize,
4587 (unsigned int) MAX_BITSIZE_MODE_ANY_INT)
e1eef457 4588 && TREE_CODE (rhs) != INTEGER_CST
4589 && (TREE_CODE (rhs) != CONSTRUCTOR
4590 || CONSTRUCTOR_NELTS (rhs) != 0)));
9991d1d3 4591 enum tree_code rhs_code = ERROR_MARK;
832a73b9 4592 bool bit_not_p = false;
509ab8cd 4593 struct symbolic_number n;
4594 gimple *ins_stmt = NULL;
9991d1d3 4595 store_operand_info ops[2];
4596 if (invalid)
4597 ;
4598 else if (rhs_valid_for_store_merging_p (rhs))
4599 {
4600 rhs_code = INTEGER_CST;
4601 ops[0].val = rhs;
4602 }
871a91ec 4603 else if (TREE_CODE (rhs) != SSA_NAME)
9991d1d3 4604 invalid = true;
4605 else
4606 {
4607 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs), *def_stmt1, *def_stmt2;
4608 if (!is_gimple_assign (def_stmt))
4609 invalid = true;
4610 else if (handled_load (def_stmt, &ops[0], bitsize, bitpos,
4611 bitregion_start, bitregion_end))
4612 rhs_code = MEM_REF;
832a73b9 4613 else if (gimple_assign_rhs_code (def_stmt) == BIT_NOT_EXPR)
4614 {
4615 tree rhs1 = gimple_assign_rhs1 (def_stmt);
4616 if (TREE_CODE (rhs1) == SSA_NAME
4617 && is_gimple_assign (SSA_NAME_DEF_STMT (rhs1)))
4618 {
4619 bit_not_p = true;
4620 def_stmt = SSA_NAME_DEF_STMT (rhs1);
4621 }
4622 }
10f0d48d 4623
832a73b9 4624 if (rhs_code == ERROR_MARK && !invalid)
9991d1d3 4625 switch ((rhs_code = gimple_assign_rhs_code (def_stmt)))
4626 {
4627 case BIT_AND_EXPR:
4628 case BIT_IOR_EXPR:
4629 case BIT_XOR_EXPR:
4630 tree rhs1, rhs2;
4631 rhs1 = gimple_assign_rhs1 (def_stmt);
4632 rhs2 = gimple_assign_rhs2 (def_stmt);
4633 invalid = true;
871a91ec 4634 if (TREE_CODE (rhs1) != SSA_NAME)
9991d1d3 4635 break;
4636 def_stmt1 = SSA_NAME_DEF_STMT (rhs1);
4637 if (!is_gimple_assign (def_stmt1)
4638 || !handled_load (def_stmt1, &ops[0], bitsize, bitpos,
4639 bitregion_start, bitregion_end))
4640 break;
4641 if (rhs_valid_for_store_merging_p (rhs2))
4642 ops[1].val = rhs2;
871a91ec 4643 else if (TREE_CODE (rhs2) != SSA_NAME)
9991d1d3 4644 break;
4645 else
4646 {
4647 def_stmt2 = SSA_NAME_DEF_STMT (rhs2);
4648 if (!is_gimple_assign (def_stmt2))
4649 break;
4650 else if (!handled_load (def_stmt2, &ops[1], bitsize, bitpos,
4651 bitregion_start, bitregion_end))
4652 break;
4653 }
4654 invalid = false;
4655 break;
4656 default:
4657 invalid = true;
4658 break;
4659 }
10f0d48d 4660
e61263f2 4661 unsigned HOST_WIDE_INT const_bitsize;
4662 if (bitsize.is_constant (&const_bitsize)
10f0d48d 4663 && (const_bitsize % BITS_PER_UNIT) == 0
e61263f2 4664 && const_bitsize <= 64
10f0d48d 4665 && multiple_p (bitpos, BITS_PER_UNIT))
509ab8cd 4666 {
4667 ins_stmt = find_bswap_or_nop_1 (def_stmt, &n, 12);
4668 if (ins_stmt)
4669 {
4670 uint64_t nn = n.n;
4671 for (unsigned HOST_WIDE_INT i = 0;
e61263f2 4672 i < const_bitsize;
4673 i += BITS_PER_UNIT, nn >>= BITS_PER_MARKER)
509ab8cd 4674 if ((nn & MARKER_MASK) == 0
4675 || (nn & MARKER_MASK) == MARKER_BYTE_UNKNOWN)
4676 {
4677 ins_stmt = NULL;
4678 break;
4679 }
4680 if (ins_stmt)
4681 {
4682 if (invalid)
4683 {
4684 rhs_code = LROTATE_EXPR;
4685 ops[0].base_addr = NULL_TREE;
4686 ops[1].base_addr = NULL_TREE;
4687 }
4688 invalid = false;
4689 }
4690 }
4691 }
10f0d48d 4692
4693 if (invalid
4694 && bitsize.is_constant (&const_bitsize)
4695 && ((const_bitsize % BITS_PER_UNIT) != 0
4696 || !multiple_p (bitpos, BITS_PER_UNIT))
4697 && const_bitsize <= 64)
4698 {
1bcb04d5 4699 /* Bypass a conversion to the bit-field type. */
4d885f78 4700 if (!bit_not_p
4701 && is_gimple_assign (def_stmt)
4702 && CONVERT_EXPR_CODE_P (rhs_code))
10f0d48d 4703 {
4704 tree rhs1 = gimple_assign_rhs1 (def_stmt);
4705 if (TREE_CODE (rhs1) == SSA_NAME
1bcb04d5 4706 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1)))
10f0d48d 4707 rhs = rhs1;
4708 }
4709 rhs_code = BIT_INSERT_EXPR;
4d885f78 4710 bit_not_p = false;
10f0d48d 4711 ops[0].val = rhs;
4712 ops[0].base_addr = NULL_TREE;
4713 ops[1].base_addr = NULL_TREE;
4714 invalid = false;
4715 }
9991d1d3 4716 }
4717
e61263f2 4718 unsigned HOST_WIDE_INT const_bitsize, const_bitpos;
4719 unsigned HOST_WIDE_INT const_bitregion_start, const_bitregion_end;
4720 if (invalid
4721 || !bitsize.is_constant (&const_bitsize)
4722 || !bitpos.is_constant (&const_bitpos)
4723 || !bitregion_start.is_constant (&const_bitregion_start)
4724 || !bitregion_end.is_constant (&const_bitregion_end))
393122a0 4725 return terminate_all_aliasing_chains (NULL, stmt);
9991d1d3 4726
509ab8cd 4727 if (!ins_stmt)
4728 memset (&n, 0, sizeof (n));
4729
2e966e2a 4730 class imm_store_chain_info **chain_info = NULL;
393122a0 4731 bool ret = false;
c35548ce 4732 if (base_addr)
4733 chain_info = m_stores.get (base_addr);
4734
9991d1d3 4735 store_immediate_info *info;
4736 if (chain_info)
4737 {
4738 unsigned int ord = (*chain_info)->m_store_info.length ();
e61263f2 4739 info = new store_immediate_info (const_bitsize, const_bitpos,
4740 const_bitregion_start,
4741 const_bitregion_end,
4742 stmt, ord, rhs_code, n, ins_stmt,
393122a0 4743 bit_not_p, lp_nr_for_store (stmt),
4744 ops[0], ops[1]);
9991d1d3 4745 if (dump_file && (dump_flags & TDF_DETAILS))
4746 {
4747 fprintf (dump_file, "Recording immediate store from stmt:\n");
4748 print_gimple_stmt (dump_file, stmt, 0);
4749 }
4750 (*chain_info)->m_store_info.safe_push (info);
393122a0 4751 ret |= terminate_all_aliasing_chains (chain_info, stmt);
9991d1d3 4752 /* If we reach the limit of stores to merge in a chain terminate and
4753 process the chain now. */
4754 if ((*chain_info)->m_store_info.length ()
4755 == (unsigned int) PARAM_VALUE (PARAM_MAX_STORES_TO_MERGE))
4756 {
4757 if (dump_file && (dump_flags & TDF_DETAILS))
4758 fprintf (dump_file,
4759 "Reached maximum number of statements to merge:\n");
393122a0 4760 ret |= terminate_and_process_chain (*chain_info);
9991d1d3 4761 }
393122a0 4762 return ret;
9991d1d3 4763 }
4764
4765 /* Store aliases any existing chain? */
393122a0 4766 ret |= terminate_all_aliasing_chains (NULL, stmt);
9991d1d3 4767 /* Start a new chain. */
2e966e2a 4768 class imm_store_chain_info *new_chain
9991d1d3 4769 = new imm_store_chain_info (m_stores_head, base_addr);
e61263f2 4770 info = new store_immediate_info (const_bitsize, const_bitpos,
4771 const_bitregion_start,
4772 const_bitregion_end,
4773 stmt, 0, rhs_code, n, ins_stmt,
393122a0 4774 bit_not_p, lp_nr_for_store (stmt),
4775 ops[0], ops[1]);
9991d1d3 4776 new_chain->m_store_info.safe_push (info);
4777 m_stores.put (base_addr, new_chain);
4778 if (dump_file && (dump_flags & TDF_DETAILS))
4779 {
4780 fprintf (dump_file, "Starting new chain with statement:\n");
4781 print_gimple_stmt (dump_file, stmt, 0);
4782 fprintf (dump_file, "The base object is:\n");
4783 print_generic_expr (dump_file, base_addr);
4784 fprintf (dump_file, "\n");
4785 }
393122a0 4786 return ret;
4787}
4788
4789/* Return true if STMT is a store valid for store merging. */
4790
4791static bool
4792store_valid_for_store_merging_p (gimple *stmt)
4793{
4794 return gimple_assign_single_p (stmt)
4795 && gimple_vdef (stmt)
4796 && lhs_valid_for_store_merging_p (gimple_assign_lhs (stmt))
4797 && !gimple_has_volatile_ops (stmt);
4798}
4799
4800enum basic_block_status { BB_INVALID, BB_VALID, BB_EXTENDED_VALID };
4801
4802/* Return the status of basic block BB wrt store merging. */
4803
4804static enum basic_block_status
4805get_status_for_store_merging (basic_block bb)
4806{
4807 unsigned int num_statements = 0;
4808 gimple_stmt_iterator gsi;
4809 edge e;
4810
4811 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4812 {
4813 gimple *stmt = gsi_stmt (gsi);
4814
4815 if (is_gimple_debug (stmt))
4816 continue;
4817
4818 if (store_valid_for_store_merging_p (stmt) && ++num_statements >= 2)
4819 break;
4820 }
4821
4822 if (num_statements == 0)
4823 return BB_INVALID;
4824
4825 if (cfun->can_throw_non_call_exceptions && cfun->eh
4826 && store_valid_for_store_merging_p (gimple_seq_last_stmt (bb_seq (bb)))
4827 && (e = find_fallthru_edge (bb->succs))
4828 && e->dest == bb->next_bb)
4829 return BB_EXTENDED_VALID;
4830
4831 return num_statements >= 2 ? BB_VALID : BB_INVALID;
9991d1d3 4832}
4833
3d3e04ac 4834/* Entry point for the pass. Go over each basic block recording chains of
9991d1d3 4835 immediate stores. Upon encountering a terminating statement (as defined
4836 by stmt_terminates_chain_p) process the recorded stores and emit the widened
4837 variants. */
3d3e04ac 4838
4839unsigned int
4840pass_store_merging::execute (function *fun)
4841{
4842 basic_block bb;
4843 hash_set<gimple *> orig_stmts;
393122a0 4844 bool changed = false, open_chains = false;
4845
4846 /* If the function can throw and catch non-call exceptions, we'll be trying
4847 to merge stores across different basic blocks so we need to first unsplit
4848 the EH edges in order to streamline the CFG of the function. */
4849 if (cfun->can_throw_non_call_exceptions && cfun->eh)
4850 unsplit_eh_edges ();
3d3e04ac 4851
509ab8cd 4852 calculate_dominance_info (CDI_DOMINATORS);
4853
3d3e04ac 4854 FOR_EACH_BB_FN (bb, fun)
4855 {
393122a0 4856 const basic_block_status bb_status = get_status_for_store_merging (bb);
3d3e04ac 4857 gimple_stmt_iterator gsi;
3d3e04ac 4858
393122a0 4859 if (open_chains && (bb_status == BB_INVALID || !single_pred_p (bb)))
4860 {
4861 changed |= terminate_and_process_all_chains ();
4862 open_chains = false;
3d3e04ac 4863 }
4864
393122a0 4865 if (bb_status == BB_INVALID)
3d3e04ac 4866 continue;
4867
4868 if (dump_file && (dump_flags & TDF_DETAILS))
4869 fprintf (dump_file, "Processing basic block <%d>:\n", bb->index);
4870
4871 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4872 {
4873 gimple *stmt = gsi_stmt (gsi);
4874
3a3ba7de 4875 if (is_gimple_debug (stmt))
4876 continue;
4877
3d3e04ac 4878 if (gimple_has_volatile_ops (stmt))
4879 {
4880 /* Terminate all chains. */
4881 if (dump_file && (dump_flags & TDF_DETAILS))
4882 fprintf (dump_file, "Volatile access terminates "
4883 "all chains\n");
393122a0 4884 changed |= terminate_and_process_all_chains ();
4885 open_chains = false;
3d3e04ac 4886 continue;
4887 }
4888
393122a0 4889 if (store_valid_for_store_merging_p (stmt))
4890 changed |= process_store (stmt);
9991d1d3 4891 else
393122a0 4892 changed |= terminate_all_aliasing_chains (NULL, stmt);
4893 }
4894
4895 if (bb_status == BB_EXTENDED_VALID)
4896 open_chains = true;
4897 else
4898 {
4899 changed |= terminate_and_process_all_chains ();
4900 open_chains = false;
3d3e04ac 4901 }
3d3e04ac 4902 }
393122a0 4903
4904 if (open_chains)
4905 changed |= terminate_and_process_all_chains ();
4906
4907 /* If the function can throw and catch non-call exceptions and something
4908 changed during the pass, then the CFG has (very likely) changed too. */
4909 if (cfun->can_throw_non_call_exceptions && cfun->eh && changed)
4910 {
4911 free_dominance_info (CDI_DOMINATORS);
4912 return TODO_cleanup_cfg;
4913 }
4914
3d3e04ac 4915 return 0;
4916}
4917
4918} // anon namespace
4919
4920/* Construct and return a store merging pass object. */
4921
4922gimple_opt_pass *
4923make_pass_store_merging (gcc::context *ctxt)
4924{
4925 return new pass_store_merging (ctxt);
4926}
3d9a2fb3 4927
4928#if CHECKING_P
4929
4930namespace selftest {
4931
4932/* Selftests for store merging helpers. */
4933
4934/* Assert that all elements of the byte arrays X and Y, both of length N
4935 are equal. */
4936
4937static void
4938verify_array_eq (unsigned char *x, unsigned char *y, unsigned int n)
4939{
4940 for (unsigned int i = 0; i < n; i++)
4941 {
4942 if (x[i] != y[i])
4943 {
4944 fprintf (stderr, "Arrays do not match. X:\n");
4945 dump_char_array (stderr, x, n);
4946 fprintf (stderr, "Y:\n");
4947 dump_char_array (stderr, y, n);
4948 }
4949 ASSERT_EQ (x[i], y[i]);
4950 }
4951}
4952
4953/* Test shift_bytes_in_array and that it carries bits across between
4954 bytes correctly. */
4955
4956static void
4957verify_shift_bytes_in_array (void)
4958{
4959 /* byte 1 | byte 0
4960 00011111 | 11100000. */
4961 unsigned char orig[2] = { 0xe0, 0x1f };
4962 unsigned char in[2];
4963 memcpy (in, orig, sizeof orig);
4964
4965 unsigned char expected[2] = { 0x80, 0x7f };
4966 shift_bytes_in_array (in, sizeof (in), 2);
4967 verify_array_eq (in, expected, sizeof (in));
4968
4969 memcpy (in, orig, sizeof orig);
4970 memcpy (expected, orig, sizeof orig);
4971 /* Check that shifting by zero doesn't change anything. */
4972 shift_bytes_in_array (in, sizeof (in), 0);
4973 verify_array_eq (in, expected, sizeof (in));
4974
4975}
4976
4977/* Test shift_bytes_in_array_right and that it carries bits across between
4978 bytes correctly. */
4979
4980static void
4981verify_shift_bytes_in_array_right (void)
4982{
4983 /* byte 1 | byte 0
4984 00011111 | 11100000. */
4985 unsigned char orig[2] = { 0x1f, 0xe0};
4986 unsigned char in[2];
4987 memcpy (in, orig, sizeof orig);
4988 unsigned char expected[2] = { 0x07, 0xf8};
4989 shift_bytes_in_array_right (in, sizeof (in), 2);
4990 verify_array_eq (in, expected, sizeof (in));
4991
4992 memcpy (in, orig, sizeof orig);
4993 memcpy (expected, orig, sizeof orig);
4994 /* Check that shifting by zero doesn't change anything. */
4995 shift_bytes_in_array_right (in, sizeof (in), 0);
4996 verify_array_eq (in, expected, sizeof (in));
4997}
4998
4999/* Test clear_bit_region that it clears exactly the bits asked and
5000 nothing more. */
5001
5002static void
5003verify_clear_bit_region (void)
5004{
5005 /* Start with all bits set and test clearing various patterns in them. */
5006 unsigned char orig[3] = { 0xff, 0xff, 0xff};
5007 unsigned char in[3];
5008 unsigned char expected[3];
5009 memcpy (in, orig, sizeof in);
5010
5011 /* Check zeroing out all the bits. */
5012 clear_bit_region (in, 0, 3 * BITS_PER_UNIT);
5013 expected[0] = expected[1] = expected[2] = 0;
5014 verify_array_eq (in, expected, sizeof in);
5015
5016 memcpy (in, orig, sizeof in);
5017 /* Leave the first and last bits intact. */
5018 clear_bit_region (in, 1, 3 * BITS_PER_UNIT - 2);
5019 expected[0] = 0x1;
5020 expected[1] = 0;
5021 expected[2] = 0x80;
5022 verify_array_eq (in, expected, sizeof in);
5023}
5024
5025/* Test verify_clear_bit_region_be that it clears exactly the bits asked and
5026 nothing more. */
5027
5028static void
5029verify_clear_bit_region_be (void)
5030{
5031 /* Start with all bits set and test clearing various patterns in them. */
5032 unsigned char orig[3] = { 0xff, 0xff, 0xff};
5033 unsigned char in[3];
5034 unsigned char expected[3];
5035 memcpy (in, orig, sizeof in);
5036
5037 /* Check zeroing out all the bits. */
5038 clear_bit_region_be (in, BITS_PER_UNIT - 1, 3 * BITS_PER_UNIT);
5039 expected[0] = expected[1] = expected[2] = 0;
5040 verify_array_eq (in, expected, sizeof in);
5041
5042 memcpy (in, orig, sizeof in);
5043 /* Leave the first and last bits intact. */
5044 clear_bit_region_be (in, BITS_PER_UNIT - 2, 3 * BITS_PER_UNIT - 2);
5045 expected[0] = 0x80;
5046 expected[1] = 0;
5047 expected[2] = 0x1;
5048 verify_array_eq (in, expected, sizeof in);
5049}
5050
5051
5052/* Run all of the selftests within this file. */
5053
5054void
5055store_merging_c_tests (void)
5056{
5057 verify_shift_bytes_in_array ();
5058 verify_shift_bytes_in_array_right ();
5059 verify_clear_bit_region ();
5060 verify_clear_bit_region_be ();
5061}
5062
5063} // namespace selftest
5064#endif /* CHECKING_P. */