]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/gimple-ssa-store-merging.c
Add a VEC_SERIES rtl code
[thirdparty/gcc.git] / gcc / gimple-ssa-store-merging.c
CommitLineData
f663d9ad 1/* GIMPLE store merging pass.
cbe34bb5 2 Copyright (C) 2016-2017 Free Software Foundation, Inc.
f663d9ad
KT
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21/* The purpose of this pass is to combine multiple memory stores of
22 constant values to consecutive memory locations into fewer wider stores.
23 For example, if we have a sequence peforming four byte stores to
24 consecutive memory locations:
25 [p ] := imm1;
26 [p + 1B] := imm2;
27 [p + 2B] := imm3;
28 [p + 3B] := imm4;
29 we can transform this into a single 4-byte store if the target supports it:
30 [p] := imm1:imm2:imm3:imm4 //concatenated immediates according to endianness.
31
32 The algorithm is applied to each basic block in three phases:
33
34 1) Scan through the basic block recording constant assignments to
35 destinations that can be expressed as a store to memory of a certain size
36 at a certain bit offset. Record store chains to different bases in a
37 hash_map (m_stores) and make sure to terminate such chains when appropriate
38 (for example when when the stored values get used subsequently).
39 These stores can be a result of structure element initializers, array stores
40 etc. A store_immediate_info object is recorded for every such store.
41 Record as many such assignments to a single base as possible until a
42 statement that interferes with the store sequence is encountered.
43
44 2) Analyze the chain of stores recorded in phase 1) (i.e. the vector of
45 store_immediate_info objects) and coalesce contiguous stores into
46 merged_store_group objects.
47
48 For example, given the stores:
49 [p ] := 0;
50 [p + 1B] := 1;
51 [p + 3B] := 0;
52 [p + 4B] := 1;
53 [p + 5B] := 0;
54 [p + 6B] := 0;
55 This phase would produce two merged_store_group objects, one recording the
56 two bytes stored in the memory region [p : p + 1] and another
57 recording the four bytes stored in the memory region [p + 3 : p + 6].
58
59 3) The merged_store_group objects produced in phase 2) are processed
60 to generate the sequence of wider stores that set the contiguous memory
61 regions to the sequence of bytes that correspond to it. This may emit
62 multiple stores per store group to handle contiguous stores that are not
63 of a size that is a power of 2. For example it can try to emit a 40-bit
64 store as a 32-bit store followed by an 8-bit store.
65 We try to emit as wide stores as we can while respecting STRICT_ALIGNMENT or
e0bd6c9f 66 TARGET_SLOW_UNALIGNED_ACCESS rules.
f663d9ad
KT
67
68 Note on endianness and example:
69 Consider 2 contiguous 16-bit stores followed by 2 contiguous 8-bit stores:
70 [p ] := 0x1234;
71 [p + 2B] := 0x5678;
72 [p + 4B] := 0xab;
73 [p + 5B] := 0xcd;
74
75 The memory layout for little-endian (LE) and big-endian (BE) must be:
76 p |LE|BE|
77 ---------
78 0 |34|12|
79 1 |12|34|
80 2 |78|56|
81 3 |56|78|
82 4 |ab|ab|
83 5 |cd|cd|
84
85 To merge these into a single 48-bit merged value 'val' in phase 2)
86 on little-endian we insert stores to higher (consecutive) bitpositions
87 into the most significant bits of the merged value.
88 The final merged value would be: 0xcdab56781234
89
90 For big-endian we insert stores to higher bitpositions into the least
91 significant bits of the merged value.
92 The final merged value would be: 0x12345678abcd
93
94 Then, in phase 3), we want to emit this 48-bit value as a 32-bit store
95 followed by a 16-bit store. Again, we must consider endianness when
96 breaking down the 48-bit value 'val' computed above.
97 For little endian we emit:
98 [p] (32-bit) := 0x56781234; // val & 0x0000ffffffff;
99 [p + 4B] (16-bit) := 0xcdab; // (val & 0xffff00000000) >> 32;
100
101 Whereas for big-endian we emit:
102 [p] (32-bit) := 0x12345678; // (val & 0xffffffff0000) >> 16;
103 [p + 4B] (16-bit) := 0xabcd; // val & 0x00000000ffff; */
104
105#include "config.h"
106#include "system.h"
107#include "coretypes.h"
108#include "backend.h"
109#include "tree.h"
110#include "gimple.h"
111#include "builtins.h"
112#include "fold-const.h"
113#include "tree-pass.h"
114#include "ssa.h"
115#include "gimple-pretty-print.h"
116#include "alias.h"
117#include "fold-const.h"
118#include "params.h"
119#include "print-tree.h"
120#include "tree-hash-traits.h"
121#include "gimple-iterator.h"
122#include "gimplify.h"
123#include "stor-layout.h"
124#include "timevar.h"
125#include "tree-cfg.h"
126#include "tree-eh.h"
127#include "target.h"
aa55dc0c 128#include "gimplify-me.h"
a62b3dc5
JJ
129#include "rtl.h"
130#include "expr.h" /* For get_bit_range. */
c22d8787 131#include "selftest.h"
f663d9ad
KT
132
133/* The maximum size (in bits) of the stores this pass should generate. */
134#define MAX_STORE_BITSIZE (BITS_PER_WORD)
135#define MAX_STORE_BYTES (MAX_STORE_BITSIZE / BITS_PER_UNIT)
136
137namespace {
138
139/* Struct recording the information about a single store of an immediate
140 to memory. These are created in the first phase and coalesced into
141 merged_store_group objects in the second phase. */
142
143struct store_immediate_info
144{
145 unsigned HOST_WIDE_INT bitsize;
146 unsigned HOST_WIDE_INT bitpos;
a62b3dc5
JJ
147 unsigned HOST_WIDE_INT bitregion_start;
148 /* This is one past the last bit of the bit region. */
149 unsigned HOST_WIDE_INT bitregion_end;
f663d9ad
KT
150 gimple *stmt;
151 unsigned int order;
b5926e23 152 store_immediate_info (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
a62b3dc5 153 unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
b5926e23 154 gimple *, unsigned int);
f663d9ad
KT
155};
156
157store_immediate_info::store_immediate_info (unsigned HOST_WIDE_INT bs,
b5926e23 158 unsigned HOST_WIDE_INT bp,
a62b3dc5
JJ
159 unsigned HOST_WIDE_INT brs,
160 unsigned HOST_WIDE_INT bre,
b5926e23 161 gimple *st,
f663d9ad 162 unsigned int ord)
a62b3dc5
JJ
163 : bitsize (bs), bitpos (bp), bitregion_start (brs), bitregion_end (bre),
164 stmt (st), order (ord)
f663d9ad
KT
165{
166}
167
168/* Struct representing a group of stores to contiguous memory locations.
169 These are produced by the second phase (coalescing) and consumed in the
170 third phase that outputs the widened stores. */
171
172struct merged_store_group
173{
174 unsigned HOST_WIDE_INT start;
175 unsigned HOST_WIDE_INT width;
a62b3dc5
JJ
176 unsigned HOST_WIDE_INT bitregion_start;
177 unsigned HOST_WIDE_INT bitregion_end;
178 /* The size of the allocated memory for val and mask. */
f663d9ad 179 unsigned HOST_WIDE_INT buf_size;
a62b3dc5 180 unsigned HOST_WIDE_INT align_base;
f663d9ad
KT
181
182 unsigned int align;
183 unsigned int first_order;
184 unsigned int last_order;
185
a62b3dc5 186 auto_vec<store_immediate_info *> stores;
f663d9ad
KT
187 /* We record the first and last original statements in the sequence because
188 we'll need their vuse/vdef and replacement position. It's easier to keep
189 track of them separately as 'stores' is reordered by apply_stores. */
190 gimple *last_stmt;
191 gimple *first_stmt;
192 unsigned char *val;
a62b3dc5 193 unsigned char *mask;
f663d9ad
KT
194
195 merged_store_group (store_immediate_info *);
196 ~merged_store_group ();
197 void merge_into (store_immediate_info *);
198 void merge_overlapping (store_immediate_info *);
199 bool apply_stores ();
a62b3dc5
JJ
200private:
201 void do_merge (store_immediate_info *);
f663d9ad
KT
202};
203
204/* Debug helper. Dump LEN elements of byte array PTR to FD in hex. */
205
206static void
207dump_char_array (FILE *fd, unsigned char *ptr, unsigned int len)
208{
209 if (!fd)
210 return;
211
212 for (unsigned int i = 0; i < len; i++)
213 fprintf (fd, "%x ", ptr[i]);
214 fprintf (fd, "\n");
215}
216
f663d9ad
KT
217/* Shift left the bytes in PTR of SZ elements by AMNT bits, carrying over the
218 bits between adjacent elements. AMNT should be within
219 [0, BITS_PER_UNIT).
220 Example, AMNT = 2:
221 00011111|11100000 << 2 = 01111111|10000000
222 PTR[1] | PTR[0] PTR[1] | PTR[0]. */
223
224static void
225shift_bytes_in_array (unsigned char *ptr, unsigned int sz, unsigned int amnt)
226{
227 if (amnt == 0)
228 return;
229
230 unsigned char carry_over = 0U;
46a61395 231 unsigned char carry_mask = (~0U) << (unsigned char) (BITS_PER_UNIT - amnt);
f663d9ad
KT
232 unsigned char clear_mask = (~0U) << amnt;
233
234 for (unsigned int i = 0; i < sz; i++)
235 {
236 unsigned prev_carry_over = carry_over;
46a61395 237 carry_over = (ptr[i] & carry_mask) >> (BITS_PER_UNIT - amnt);
f663d9ad
KT
238
239 ptr[i] <<= amnt;
240 if (i != 0)
241 {
242 ptr[i] &= clear_mask;
243 ptr[i] |= prev_carry_over;
244 }
245 }
246}
247
248/* Like shift_bytes_in_array but for big-endian.
249 Shift right the bytes in PTR of SZ elements by AMNT bits, carrying over the
250 bits between adjacent elements. AMNT should be within
251 [0, BITS_PER_UNIT).
252 Example, AMNT = 2:
253 00011111|11100000 >> 2 = 00000111|11111000
254 PTR[0] | PTR[1] PTR[0] | PTR[1]. */
255
256static void
257shift_bytes_in_array_right (unsigned char *ptr, unsigned int sz,
258 unsigned int amnt)
259{
260 if (amnt == 0)
261 return;
262
263 unsigned char carry_over = 0U;
264 unsigned char carry_mask = ~(~0U << amnt);
265
266 for (unsigned int i = 0; i < sz; i++)
267 {
268 unsigned prev_carry_over = carry_over;
46a61395 269 carry_over = ptr[i] & carry_mask;
f663d9ad 270
ad1de652
JJ
271 carry_over <<= (unsigned char) BITS_PER_UNIT - amnt;
272 ptr[i] >>= amnt;
273 ptr[i] |= prev_carry_over;
f663d9ad
KT
274 }
275}
276
277/* Clear out LEN bits starting from bit START in the byte array
278 PTR. This clears the bits to the *right* from START.
279 START must be within [0, BITS_PER_UNIT) and counts starting from
280 the least significant bit. */
281
282static void
283clear_bit_region_be (unsigned char *ptr, unsigned int start,
284 unsigned int len)
285{
286 if (len == 0)
287 return;
288 /* Clear len bits to the right of start. */
289 else if (len <= start + 1)
290 {
291 unsigned char mask = (~(~0U << len));
292 mask = mask << (start + 1U - len);
293 ptr[0] &= ~mask;
294 }
295 else if (start != BITS_PER_UNIT - 1)
296 {
297 clear_bit_region_be (ptr, start, (start % BITS_PER_UNIT) + 1);
298 clear_bit_region_be (ptr + 1, BITS_PER_UNIT - 1,
299 len - (start % BITS_PER_UNIT) - 1);
300 }
301 else if (start == BITS_PER_UNIT - 1
302 && len > BITS_PER_UNIT)
303 {
304 unsigned int nbytes = len / BITS_PER_UNIT;
a62b3dc5 305 memset (ptr, 0, nbytes);
f663d9ad
KT
306 if (len % BITS_PER_UNIT != 0)
307 clear_bit_region_be (ptr + nbytes, BITS_PER_UNIT - 1,
308 len % BITS_PER_UNIT);
309 }
310 else
311 gcc_unreachable ();
312}
313
314/* In the byte array PTR clear the bit region starting at bit
315 START and is LEN bits wide.
316 For regions spanning multiple bytes do this recursively until we reach
317 zero LEN or a region contained within a single byte. */
318
319static void
320clear_bit_region (unsigned char *ptr, unsigned int start,
321 unsigned int len)
322{
323 /* Degenerate base case. */
324 if (len == 0)
325 return;
326 else if (start >= BITS_PER_UNIT)
327 clear_bit_region (ptr + 1, start - BITS_PER_UNIT, len);
328 /* Second base case. */
329 else if ((start + len) <= BITS_PER_UNIT)
330 {
46a61395 331 unsigned char mask = (~0U) << (unsigned char) (BITS_PER_UNIT - len);
f663d9ad
KT
332 mask >>= BITS_PER_UNIT - (start + len);
333
334 ptr[0] &= ~mask;
335
336 return;
337 }
338 /* Clear most significant bits in a byte and proceed with the next byte. */
339 else if (start != 0)
340 {
341 clear_bit_region (ptr, start, BITS_PER_UNIT - start);
1f069ef5 342 clear_bit_region (ptr + 1, 0, len - (BITS_PER_UNIT - start));
f663d9ad
KT
343 }
344 /* Whole bytes need to be cleared. */
345 else if (start == 0 && len > BITS_PER_UNIT)
346 {
347 unsigned int nbytes = len / BITS_PER_UNIT;
a848c710
KT
348 /* We could recurse on each byte but we clear whole bytes, so a simple
349 memset will do. */
46a61395 350 memset (ptr, '\0', nbytes);
f663d9ad
KT
351 /* Clear the remaining sub-byte region if there is one. */
352 if (len % BITS_PER_UNIT != 0)
353 clear_bit_region (ptr + nbytes, 0, len % BITS_PER_UNIT);
354 }
355 else
356 gcc_unreachable ();
357}
358
359/* Write BITLEN bits of EXPR to the byte array PTR at
360 bit position BITPOS. PTR should contain TOTAL_BYTES elements.
361 Return true if the operation succeeded. */
362
363static bool
364encode_tree_to_bitpos (tree expr, unsigned char *ptr, int bitlen, int bitpos,
46a61395 365 unsigned int total_bytes)
f663d9ad
KT
366{
367 unsigned int first_byte = bitpos / BITS_PER_UNIT;
368 tree tmp_int = expr;
ad1de652
JJ
369 bool sub_byte_op_p = ((bitlen % BITS_PER_UNIT)
370 || (bitpos % BITS_PER_UNIT)
f4b31647 371 || !int_mode_for_size (bitlen, 0).exists ());
f663d9ad
KT
372
373 if (!sub_byte_op_p)
2f391428 374 return native_encode_expr (tmp_int, ptr + first_byte, total_bytes) != 0;
f663d9ad
KT
375
376 /* LITTLE-ENDIAN
377 We are writing a non byte-sized quantity or at a position that is not
378 at a byte boundary.
379 |--------|--------|--------| ptr + first_byte
380 ^ ^
381 xxx xxxxxxxx xxx< bp>
382 |______EXPR____|
383
46a61395 384 First native_encode_expr EXPR into a temporary buffer and shift each
f663d9ad
KT
385 byte in the buffer by 'bp' (carrying the bits over as necessary).
386 |00000000|00xxxxxx|xxxxxxxx| << bp = |000xxxxx|xxxxxxxx|xxx00000|
387 <------bitlen---->< bp>
388 Then we clear the destination bits:
389 |---00000|00000000|000-----| ptr + first_byte
390 <-------bitlen--->< bp>
391
392 Finally we ORR the bytes of the shifted EXPR into the cleared region:
393 |---xxxxx||xxxxxxxx||xxx-----| ptr + first_byte.
394
395 BIG-ENDIAN
396 We are writing a non byte-sized quantity or at a position that is not
397 at a byte boundary.
398 ptr + first_byte |--------|--------|--------|
399 ^ ^
400 <bp >xxx xxxxxxxx xxx
401 |_____EXPR_____|
402
46a61395 403 First native_encode_expr EXPR into a temporary buffer and shift each
f663d9ad
KT
404 byte in the buffer to the right by (carrying the bits over as necessary).
405 We shift by as much as needed to align the most significant bit of EXPR
406 with bitpos:
407 |00xxxxxx|xxxxxxxx| >> 3 = |00000xxx|xxxxxxxx|xxxxx000|
408 <---bitlen----> <bp ><-----bitlen----->
409 Then we clear the destination bits:
410 ptr + first_byte |-----000||00000000||00000---|
411 <bp ><-------bitlen----->
412
413 Finally we ORR the bytes of the shifted EXPR into the cleared region:
414 ptr + first_byte |---xxxxx||xxxxxxxx||xxx-----|.
415 The awkwardness comes from the fact that bitpos is counted from the
416 most significant bit of a byte. */
417
418 /* Allocate an extra byte so that we have space to shift into. */
419 unsigned int byte_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (expr))) + 1;
420 unsigned char *tmpbuf = XALLOCAVEC (unsigned char, byte_size);
46a61395 421 memset (tmpbuf, '\0', byte_size);
f663d9ad
KT
422 /* The store detection code should only have allowed constants that are
423 accepted by native_encode_expr. */
2f391428 424 if (native_encode_expr (expr, tmpbuf, byte_size - 1) == 0)
f663d9ad
KT
425 gcc_unreachable ();
426
427 /* The native_encode_expr machinery uses TYPE_MODE to determine how many
428 bytes to write. This means it can write more than
429 ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT bytes (for example
430 write 8 bytes for a bitlen of 40). Skip the bytes that are not within
431 bitlen and zero out the bits that are not relevant as well (that may
432 contain a sign bit due to sign-extension). */
433 unsigned int padding
434 = byte_size - ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT - 1;
ad1de652
JJ
435 /* On big-endian the padding is at the 'front' so just skip the initial
436 bytes. */
437 if (BYTES_BIG_ENDIAN)
438 tmpbuf += padding;
439
440 byte_size -= padding;
441
442 if (bitlen % BITS_PER_UNIT != 0)
f663d9ad 443 {
4b2c06f4 444 if (BYTES_BIG_ENDIAN)
ad1de652
JJ
445 clear_bit_region_be (tmpbuf, BITS_PER_UNIT - 1,
446 BITS_PER_UNIT - (bitlen % BITS_PER_UNIT));
447 else
448 clear_bit_region (tmpbuf, bitlen,
449 byte_size * BITS_PER_UNIT - bitlen);
f663d9ad 450 }
ad1de652
JJ
451 /* Left shifting relies on the last byte being clear if bitlen is
452 a multiple of BITS_PER_UNIT, which might not be clear if
453 there are padding bytes. */
454 else if (!BYTES_BIG_ENDIAN)
455 tmpbuf[byte_size - 1] = '\0';
f663d9ad
KT
456
457 /* Clear the bit region in PTR where the bits from TMPBUF will be
46a61395 458 inserted into. */
f663d9ad
KT
459 if (BYTES_BIG_ENDIAN)
460 clear_bit_region_be (ptr + first_byte,
461 BITS_PER_UNIT - 1 - (bitpos % BITS_PER_UNIT), bitlen);
462 else
463 clear_bit_region (ptr + first_byte, bitpos % BITS_PER_UNIT, bitlen);
464
465 int shift_amnt;
466 int bitlen_mod = bitlen % BITS_PER_UNIT;
467 int bitpos_mod = bitpos % BITS_PER_UNIT;
468
469 bool skip_byte = false;
470 if (BYTES_BIG_ENDIAN)
471 {
472 /* BITPOS and BITLEN are exactly aligned and no shifting
473 is necessary. */
474 if (bitpos_mod + bitlen_mod == BITS_PER_UNIT
475 || (bitpos_mod == 0 && bitlen_mod == 0))
476 shift_amnt = 0;
477 /* |. . . . . . . .|
478 <bp > <blen >.
479 We always shift right for BYTES_BIG_ENDIAN so shift the beginning
480 of the value until it aligns with 'bp' in the next byte over. */
481 else if (bitpos_mod + bitlen_mod < BITS_PER_UNIT)
482 {
483 shift_amnt = bitlen_mod + bitpos_mod;
484 skip_byte = bitlen_mod != 0;
485 }
486 /* |. . . . . . . .|
487 <----bp--->
488 <---blen---->.
489 Shift the value right within the same byte so it aligns with 'bp'. */
490 else
491 shift_amnt = bitlen_mod + bitpos_mod - BITS_PER_UNIT;
492 }
493 else
494 shift_amnt = bitpos % BITS_PER_UNIT;
495
496 /* Create the shifted version of EXPR. */
497 if (!BYTES_BIG_ENDIAN)
46a61395
JJ
498 {
499 shift_bytes_in_array (tmpbuf, byte_size, shift_amnt);
500 if (shift_amnt == 0)
501 byte_size--;
502 }
f663d9ad
KT
503 else
504 {
505 gcc_assert (BYTES_BIG_ENDIAN);
506 shift_bytes_in_array_right (tmpbuf, byte_size, shift_amnt);
507 /* If shifting right forced us to move into the next byte skip the now
508 empty byte. */
509 if (skip_byte)
510 {
511 tmpbuf++;
512 byte_size--;
513 }
514 }
515
516 /* Insert the bits from TMPBUF. */
517 for (unsigned int i = 0; i < byte_size; i++)
518 ptr[first_byte + i] |= tmpbuf[i];
519
520 return true;
521}
522
523/* Sorting function for store_immediate_info objects.
524 Sorts them by bitposition. */
525
526static int
527sort_by_bitpos (const void *x, const void *y)
528{
529 store_immediate_info *const *tmp = (store_immediate_info * const *) x;
530 store_immediate_info *const *tmp2 = (store_immediate_info * const *) y;
531
109cca3b 532 if ((*tmp)->bitpos < (*tmp2)->bitpos)
f663d9ad
KT
533 return -1;
534 else if ((*tmp)->bitpos > (*tmp2)->bitpos)
535 return 1;
109cca3b 536 else
0f0027d1
KT
537 /* If they are the same let's use the order which is guaranteed to
538 be different. */
539 return (*tmp)->order - (*tmp2)->order;
f663d9ad
KT
540}
541
542/* Sorting function for store_immediate_info objects.
543 Sorts them by the order field. */
544
545static int
546sort_by_order (const void *x, const void *y)
547{
548 store_immediate_info *const *tmp = (store_immediate_info * const *) x;
549 store_immediate_info *const *tmp2 = (store_immediate_info * const *) y;
550
551 if ((*tmp)->order < (*tmp2)->order)
552 return -1;
553 else if ((*tmp)->order > (*tmp2)->order)
554 return 1;
555
556 gcc_unreachable ();
557}
558
559/* Initialize a merged_store_group object from a store_immediate_info
560 object. */
561
562merged_store_group::merged_store_group (store_immediate_info *info)
563{
564 start = info->bitpos;
565 width = info->bitsize;
a62b3dc5
JJ
566 bitregion_start = info->bitregion_start;
567 bitregion_end = info->bitregion_end;
f663d9ad
KT
568 /* VAL has memory allocated for it in apply_stores once the group
569 width has been finalized. */
570 val = NULL;
a62b3dc5
JJ
571 mask = NULL;
572 unsigned HOST_WIDE_INT align_bitpos = 0;
573 get_object_alignment_1 (gimple_assign_lhs (info->stmt),
574 &align, &align_bitpos);
575 align_base = start - align_bitpos;
f663d9ad
KT
576 stores.create (1);
577 stores.safe_push (info);
578 last_stmt = info->stmt;
579 last_order = info->order;
580 first_stmt = last_stmt;
581 first_order = last_order;
582 buf_size = 0;
583}
584
585merged_store_group::~merged_store_group ()
586{
587 if (val)
588 XDELETEVEC (val);
589}
590
a62b3dc5
JJ
591/* Helper method for merge_into and merge_overlapping to do
592 the common part. */
f663d9ad 593void
a62b3dc5 594merged_store_group::do_merge (store_immediate_info *info)
f663d9ad 595{
a62b3dc5
JJ
596 bitregion_start = MIN (bitregion_start, info->bitregion_start);
597 bitregion_end = MAX (bitregion_end, info->bitregion_end);
598
599 unsigned int this_align;
600 unsigned HOST_WIDE_INT align_bitpos = 0;
601 get_object_alignment_1 (gimple_assign_lhs (info->stmt),
602 &this_align, &align_bitpos);
603 if (this_align > align)
604 {
605 align = this_align;
606 align_base = info->bitpos - align_bitpos;
607 }
f663d9ad 608
f663d9ad
KT
609 gimple *stmt = info->stmt;
610 stores.safe_push (info);
611 if (info->order > last_order)
612 {
613 last_order = info->order;
614 last_stmt = stmt;
615 }
616 else if (info->order < first_order)
617 {
618 first_order = info->order;
619 first_stmt = stmt;
620 }
621}
622
a62b3dc5
JJ
623/* Merge a store recorded by INFO into this merged store.
624 The store is not overlapping with the existing recorded
625 stores. */
626
627void
628merged_store_group::merge_into (store_immediate_info *info)
629{
630 unsigned HOST_WIDE_INT wid = info->bitsize;
631 /* Make sure we're inserting in the position we think we're inserting. */
632 gcc_assert (info->bitpos >= start + width
633 && info->bitregion_start <= bitregion_end);
634
635 width += wid;
636 do_merge (info);
637}
638
f663d9ad
KT
639/* Merge a store described by INFO into this merged store.
640 INFO overlaps in some way with the current store (i.e. it's not contiguous
641 which is handled by merged_store_group::merge_into). */
642
643void
644merged_store_group::merge_overlapping (store_immediate_info *info)
645{
f663d9ad 646 /* If the store extends the size of the group, extend the width. */
a62b3dc5 647 if (info->bitpos + info->bitsize > start + width)
f663d9ad
KT
648 width += info->bitpos + info->bitsize - (start + width);
649
a62b3dc5 650 do_merge (info);
f663d9ad
KT
651}
652
653/* Go through all the recorded stores in this group in program order and
654 apply their values to the VAL byte array to create the final merged
655 value. Return true if the operation succeeded. */
656
657bool
658merged_store_group::apply_stores ()
659{
a62b3dc5
JJ
660 /* Make sure we have more than one store in the group, otherwise we cannot
661 merge anything. */
662 if (bitregion_start % BITS_PER_UNIT != 0
663 || bitregion_end % BITS_PER_UNIT != 0
f663d9ad
KT
664 || stores.length () == 1)
665 return false;
666
667 stores.qsort (sort_by_order);
a62b3dc5 668 store_immediate_info *info;
f663d9ad
KT
669 unsigned int i;
670 /* Create a buffer of a size that is 2 times the number of bytes we're
671 storing. That way native_encode_expr can write power-of-2-sized
672 chunks without overrunning. */
a62b3dc5
JJ
673 buf_size = 2 * ((bitregion_end - bitregion_start) / BITS_PER_UNIT);
674 val = XNEWVEC (unsigned char, 2 * buf_size);
675 mask = val + buf_size;
676 memset (val, 0, buf_size);
677 memset (mask, ~0U, buf_size);
f663d9ad
KT
678
679 FOR_EACH_VEC_ELT (stores, i, info)
680 {
a62b3dc5 681 unsigned int pos_in_buffer = info->bitpos - bitregion_start;
b5926e23
RB
682 bool ret = encode_tree_to_bitpos (gimple_assign_rhs1 (info->stmt),
683 val, info->bitsize,
684 pos_in_buffer, buf_size);
f663d9ad
KT
685 if (dump_file && (dump_flags & TDF_DETAILS))
686 {
687 if (ret)
688 {
689 fprintf (dump_file, "After writing ");
b5926e23
RB
690 print_generic_expr (dump_file,
691 gimple_assign_rhs1 (info->stmt), 0);
f663d9ad
KT
692 fprintf (dump_file, " of size " HOST_WIDE_INT_PRINT_DEC
693 " at position %d the merged region contains:\n",
694 info->bitsize, pos_in_buffer);
695 dump_char_array (dump_file, val, buf_size);
696 }
697 else
698 fprintf (dump_file, "Failed to merge stores\n");
699 }
700 if (!ret)
701 return false;
a62b3dc5
JJ
702 unsigned char *m = mask + (pos_in_buffer / BITS_PER_UNIT);
703 if (BYTES_BIG_ENDIAN)
4403d99a
JJ
704 clear_bit_region_be (m, (BITS_PER_UNIT - 1
705 - (pos_in_buffer % BITS_PER_UNIT)),
706 info->bitsize);
a62b3dc5
JJ
707 else
708 clear_bit_region (m, pos_in_buffer % BITS_PER_UNIT, info->bitsize);
f663d9ad
KT
709 }
710 return true;
711}
712
713/* Structure describing the store chain. */
714
715struct imm_store_chain_info
716{
50b6d676
AO
717 /* Doubly-linked list that imposes an order on chain processing.
718 PNXP (prev's next pointer) points to the head of a list, or to
719 the next field in the previous chain in the list.
720 See pass_store_merging::m_stores_head for more rationale. */
721 imm_store_chain_info *next, **pnxp;
b5926e23 722 tree base_addr;
a62b3dc5 723 auto_vec<store_immediate_info *> m_store_info;
f663d9ad
KT
724 auto_vec<merged_store_group *> m_merged_store_groups;
725
50b6d676
AO
726 imm_store_chain_info (imm_store_chain_info *&inspt, tree b_a)
727 : next (inspt), pnxp (&inspt), base_addr (b_a)
728 {
729 inspt = this;
730 if (next)
731 {
732 gcc_checking_assert (pnxp == next->pnxp);
733 next->pnxp = &next;
734 }
735 }
736 ~imm_store_chain_info ()
737 {
738 *pnxp = next;
739 if (next)
740 {
741 gcc_checking_assert (&next == next->pnxp);
742 next->pnxp = pnxp;
743 }
744 }
b5926e23 745 bool terminate_and_process_chain ();
f663d9ad 746 bool coalesce_immediate_stores ();
b5926e23
RB
747 bool output_merged_store (merged_store_group *);
748 bool output_merged_stores ();
f663d9ad
KT
749};
750
751const pass_data pass_data_tree_store_merging = {
752 GIMPLE_PASS, /* type */
753 "store-merging", /* name */
754 OPTGROUP_NONE, /* optinfo_flags */
755 TV_GIMPLE_STORE_MERGING, /* tv_id */
756 PROP_ssa, /* properties_required */
757 0, /* properties_provided */
758 0, /* properties_destroyed */
759 0, /* todo_flags_start */
760 TODO_update_ssa, /* todo_flags_finish */
761};
762
763class pass_store_merging : public gimple_opt_pass
764{
765public:
766 pass_store_merging (gcc::context *ctxt)
faec5f24 767 : gimple_opt_pass (pass_data_tree_store_merging, ctxt), m_stores_head ()
f663d9ad
KT
768 {
769 }
770
a62b3dc5
JJ
771 /* Pass not supported for PDP-endianness, nor for insane hosts
772 or target character sizes where native_{encode,interpret}_expr
773 doesn't work properly. */
f663d9ad
KT
774 virtual bool
775 gate (function *)
776 {
a62b3dc5
JJ
777 return flag_store_merging
778 && WORDS_BIG_ENDIAN == BYTES_BIG_ENDIAN
779 && CHAR_BIT == 8
780 && BITS_PER_UNIT == 8;
f663d9ad
KT
781 }
782
783 virtual unsigned int execute (function *);
784
785private:
786 hash_map<tree_operand_hash, struct imm_store_chain_info *> m_stores;
787
50b6d676
AO
788 /* Form a doubly-linked stack of the elements of m_stores, so that
789 we can iterate over them in a predictable way. Using this order
790 avoids extraneous differences in the compiler output just because
791 of tree pointer variations (e.g. different chains end up in
792 different positions of m_stores, so they are handled in different
793 orders, so they allocate or release SSA names in different
794 orders, and when they get reused, subsequent passes end up
795 getting different SSA names, which may ultimately change
796 decisions when going out of SSA). */
797 imm_store_chain_info *m_stores_head;
798
f663d9ad 799 bool terminate_and_process_all_chains ();
20770eb8 800 bool terminate_all_aliasing_chains (imm_store_chain_info **,
b5926e23
RB
801 bool, gimple *);
802 bool terminate_and_release_chain (imm_store_chain_info *);
f663d9ad
KT
803}; // class pass_store_merging
804
805/* Terminate and process all recorded chains. Return true if any changes
806 were made. */
807
808bool
809pass_store_merging::terminate_and_process_all_chains ()
810{
f663d9ad 811 bool ret = false;
50b6d676
AO
812 while (m_stores_head)
813 ret |= terminate_and_release_chain (m_stores_head);
814 gcc_assert (m_stores.elements () == 0);
815 gcc_assert (m_stores_head == NULL);
f663d9ad
KT
816
817 return ret;
818}
819
820/* Terminate all chains that are affected by the assignment to DEST, appearing
821 in statement STMT and ultimately points to the object BASE. Return true if
822 at least one aliasing chain was terminated. BASE and DEST are allowed to
823 be NULL_TREE. In that case the aliasing checks are performed on the whole
824 statement rather than a particular operand in it. VAR_OFFSET_P signifies
825 whether STMT represents a store to BASE offset by a variable amount.
826 If that is the case we have to terminate any chain anchored at BASE. */
827
828bool
20770eb8 829pass_store_merging::terminate_all_aliasing_chains (imm_store_chain_info
b5926e23 830 **chain_info,
f663d9ad
KT
831 bool var_offset_p,
832 gimple *stmt)
833{
834 bool ret = false;
835
836 /* If the statement doesn't touch memory it can't alias. */
837 if (!gimple_vuse (stmt))
838 return false;
839
f663d9ad
KT
840 /* Check if the assignment destination (BASE) is part of a store chain.
841 This is to catch non-constant stores to destinations that may be part
842 of a chain. */
b5926e23 843 if (chain_info)
f663d9ad 844 {
b5926e23
RB
845 /* We have a chain at BASE and we're writing to [BASE + <variable>].
846 This can interfere with any of the stores so terminate
847 the chain. */
848 if (var_offset_p)
f663d9ad 849 {
b5926e23
RB
850 terminate_and_release_chain (*chain_info);
851 ret = true;
852 }
853 /* Otherwise go through every store in the chain to see if it
854 aliases with any of them. */
855 else
856 {
a62b3dc5 857 store_immediate_info *info;
b5926e23
RB
858 unsigned int i;
859 FOR_EACH_VEC_ELT ((*chain_info)->m_store_info, i, info)
f663d9ad 860 {
20770eb8
RB
861 if (ref_maybe_used_by_stmt_p (stmt,
862 gimple_assign_lhs (info->stmt))
863 || stmt_may_clobber_ref_p (stmt,
864 gimple_assign_lhs (info->stmt)))
f663d9ad 865 {
b5926e23 866 if (dump_file && (dump_flags & TDF_DETAILS))
f663d9ad 867 {
b5926e23
RB
868 fprintf (dump_file,
869 "stmt causes chain termination:\n");
ef6cb4c7 870 print_gimple_stmt (dump_file, stmt, 0);
f663d9ad 871 }
b5926e23
RB
872 terminate_and_release_chain (*chain_info);
873 ret = true;
874 break;
f663d9ad
KT
875 }
876 }
877 }
878 }
879
f663d9ad 880 /* Check for aliasing with all other store chains. */
50b6d676 881 for (imm_store_chain_info *next = m_stores_head, *cur = next; cur; cur = next)
f663d9ad 882 {
50b6d676
AO
883 next = cur->next;
884
f663d9ad
KT
885 /* We already checked all the stores in chain_info and terminated the
886 chain if necessary. Skip it here. */
50b6d676 887 if (chain_info && (*chain_info) == cur)
f663d9ad
KT
888 continue;
889
b5926e23
RB
890 /* We can't use the base object here as that does not reliably exist.
891 Build a ao_ref from the base object address (if we know the
892 minimum and maximum offset and the maximum size we could improve
893 things here). */
894 ao_ref chain_ref;
50b6d676 895 ao_ref_init_from_ptr_and_size (&chain_ref, cur->base_addr, NULL_TREE);
b5926e23
RB
896 if (ref_maybe_used_by_stmt_p (stmt, &chain_ref)
897 || stmt_may_clobber_ref_p_1 (stmt, &chain_ref))
f663d9ad 898 {
50b6d676 899 terminate_and_release_chain (cur);
f663d9ad
KT
900 ret = true;
901 }
902 }
903
904 return ret;
905}
906
907/* Helper function. Terminate the recorded chain storing to base object
908 BASE. Return true if the merging and output was successful. The m_stores
909 entry is removed after the processing in any case. */
910
911bool
b5926e23 912pass_store_merging::terminate_and_release_chain (imm_store_chain_info *chain_info)
f663d9ad 913{
b5926e23
RB
914 bool ret = chain_info->terminate_and_process_chain ();
915 m_stores.remove (chain_info->base_addr);
916 delete chain_info;
f663d9ad
KT
917 return ret;
918}
919
920/* Go through the candidate stores recorded in m_store_info and merge them
921 into merged_store_group objects recorded into m_merged_store_groups
922 representing the widened stores. Return true if coalescing was successful
923 and the number of widened stores is fewer than the original number
924 of stores. */
925
926bool
927imm_store_chain_info::coalesce_immediate_stores ()
928{
929 /* Anything less can't be processed. */
930 if (m_store_info.length () < 2)
931 return false;
932
933 if (dump_file && (dump_flags & TDF_DETAILS))
934 fprintf (dump_file, "Attempting to coalesce %u stores in chain.\n",
935 m_store_info.length ());
936
937 store_immediate_info *info;
938 unsigned int i;
939
940 /* Order the stores by the bitposition they write to. */
941 m_store_info.qsort (sort_by_bitpos);
942
943 info = m_store_info[0];
944 merged_store_group *merged_store = new merged_store_group (info);
945
946 FOR_EACH_VEC_ELT (m_store_info, i, info)
947 {
948 if (dump_file && (dump_flags & TDF_DETAILS))
949 {
950 fprintf (dump_file, "Store %u:\nbitsize:" HOST_WIDE_INT_PRINT_DEC
951 " bitpos:" HOST_WIDE_INT_PRINT_DEC " val:\n",
952 i, info->bitsize, info->bitpos);
ef6cb4c7 953 print_generic_expr (dump_file, gimple_assign_rhs1 (info->stmt));
f663d9ad
KT
954 fprintf (dump_file, "\n------------\n");
955 }
956
957 if (i == 0)
958 continue;
959
960 /* |---store 1---|
961 |---store 2---|
962 Overlapping stores. */
963 unsigned HOST_WIDE_INT start = info->bitpos;
964 if (IN_RANGE (start, merged_store->start,
965 merged_store->start + merged_store->width - 1))
966 {
967 merged_store->merge_overlapping (info);
968 continue;
969 }
970
971 /* |---store 1---| <gap> |---store 2---|.
a62b3dc5
JJ
972 Gap between stores. Start a new group if there are any gaps
973 between bitregions. */
974 if (info->bitregion_start > merged_store->bitregion_end)
f663d9ad
KT
975 {
976 /* Try to apply all the stores recorded for the group to determine
977 the bitpattern they write and discard it if that fails.
978 This will also reject single-store groups. */
979 if (!merged_store->apply_stores ())
980 delete merged_store;
981 else
982 m_merged_store_groups.safe_push (merged_store);
983
984 merged_store = new merged_store_group (info);
985
986 continue;
987 }
988
989 /* |---store 1---||---store 2---|
990 This store is consecutive to the previous one.
991 Merge it into the current store group. */
992 merged_store->merge_into (info);
993 }
994
a62b3dc5
JJ
995 /* Record or discard the last store group. */
996 if (!merged_store->apply_stores ())
997 delete merged_store;
998 else
999 m_merged_store_groups.safe_push (merged_store);
f663d9ad
KT
1000
1001 gcc_assert (m_merged_store_groups.length () <= m_store_info.length ());
1002 bool success
1003 = !m_merged_store_groups.is_empty ()
1004 && m_merged_store_groups.length () < m_store_info.length ();
1005
1006 if (success && dump_file)
1007 fprintf (dump_file, "Coalescing successful!\n"
a62b3dc5
JJ
1008 "Merged into %u stores\n",
1009 m_merged_store_groups.length ());
f663d9ad
KT
1010
1011 return success;
1012}
1013
1014/* Return the type to use for the merged stores described by STMTS.
1015 This is needed to get the alias sets right. */
1016
1017static tree
1018get_alias_type_for_stmts (auto_vec<gimple *> &stmts)
1019{
1020 gimple *stmt;
1021 unsigned int i;
1022 tree lhs = gimple_assign_lhs (stmts[0]);
1023 tree type = reference_alias_ptr_type (lhs);
1024
1025 FOR_EACH_VEC_ELT (stmts, i, stmt)
1026 {
1027 if (i == 0)
1028 continue;
1029
1030 lhs = gimple_assign_lhs (stmt);
1031 tree type1 = reference_alias_ptr_type (lhs);
1032 if (!alias_ptr_types_compatible_p (type, type1))
1033 return ptr_type_node;
1034 }
1035 return type;
1036}
1037
1038/* Return the location_t information we can find among the statements
1039 in STMTS. */
1040
1041static location_t
1042get_location_for_stmts (auto_vec<gimple *> &stmts)
1043{
1044 gimple *stmt;
1045 unsigned int i;
1046
1047 FOR_EACH_VEC_ELT (stmts, i, stmt)
1048 if (gimple_has_location (stmt))
1049 return gimple_location (stmt);
1050
1051 return UNKNOWN_LOCATION;
1052}
1053
1054/* Used to decribe a store resulting from splitting a wide store in smaller
1055 regularly-sized stores in split_group. */
1056
1057struct split_store
1058{
1059 unsigned HOST_WIDE_INT bytepos;
1060 unsigned HOST_WIDE_INT size;
1061 unsigned HOST_WIDE_INT align;
1062 auto_vec<gimple *> orig_stmts;
a62b3dc5
JJ
1063 /* True if there is a single orig stmt covering the whole split store. */
1064 bool orig;
f663d9ad
KT
1065 split_store (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
1066 unsigned HOST_WIDE_INT);
1067};
1068
1069/* Simple constructor. */
1070
1071split_store::split_store (unsigned HOST_WIDE_INT bp,
1072 unsigned HOST_WIDE_INT sz,
1073 unsigned HOST_WIDE_INT al)
a62b3dc5 1074 : bytepos (bp), size (sz), align (al), orig (false)
f663d9ad
KT
1075{
1076 orig_stmts.create (0);
1077}
1078
1079/* Record all statements corresponding to stores in GROUP that write to
1080 the region starting at BITPOS and is of size BITSIZE. Record such
a62b3dc5
JJ
1081 statements in STMTS if non-NULL. The stores in GROUP must be sorted by
1082 bitposition. Return INFO if there is exactly one original store
1083 in the range. */
f663d9ad 1084
a62b3dc5 1085static store_immediate_info *
f663d9ad 1086find_constituent_stmts (struct merged_store_group *group,
a62b3dc5
JJ
1087 vec<gimple *> *stmts,
1088 unsigned int *first,
1089 unsigned HOST_WIDE_INT bitpos,
1090 unsigned HOST_WIDE_INT bitsize)
f663d9ad 1091{
a62b3dc5 1092 store_immediate_info *info, *ret = NULL;
f663d9ad 1093 unsigned int i;
a62b3dc5
JJ
1094 bool second = false;
1095 bool update_first = true;
f663d9ad 1096 unsigned HOST_WIDE_INT end = bitpos + bitsize;
a62b3dc5 1097 for (i = *first; group->stores.iterate (i, &info); ++i)
f663d9ad
KT
1098 {
1099 unsigned HOST_WIDE_INT stmt_start = info->bitpos;
1100 unsigned HOST_WIDE_INT stmt_end = stmt_start + info->bitsize;
a62b3dc5
JJ
1101 if (stmt_end <= bitpos)
1102 {
1103 /* BITPOS passed to this function never decreases from within the
1104 same split_group call, so optimize and don't scan info records
1105 which are known to end before or at BITPOS next time.
1106 Only do it if all stores before this one also pass this. */
1107 if (update_first)
1108 *first = i + 1;
1109 continue;
1110 }
1111 else
1112 update_first = false;
1113
f663d9ad 1114 /* The stores in GROUP are ordered by bitposition so if we're past
a62b3dc5
JJ
1115 the region for this group return early. */
1116 if (stmt_start >= end)
1117 return ret;
1118
1119 if (stmts)
1120 {
1121 stmts->safe_push (info->stmt);
1122 if (ret)
1123 {
1124 ret = NULL;
1125 second = true;
1126 }
1127 }
1128 else if (ret)
1129 return NULL;
1130 if (!second)
1131 ret = info;
f663d9ad 1132 }
a62b3dc5 1133 return ret;
f663d9ad
KT
1134}
1135
1136/* Split a merged store described by GROUP by populating the SPLIT_STORES
a62b3dc5
JJ
1137 vector (if non-NULL) with split_store structs describing the byte offset
1138 (from the base), the bit size and alignment of each store as well as the
1139 original statements involved in each such split group.
f663d9ad
KT
1140 This is to separate the splitting strategy from the statement
1141 building/emission/linking done in output_merged_store.
a62b3dc5
JJ
1142 Return number of new stores.
1143 If SPLIT_STORES is NULL, it is just a dry run to count number of
1144 new stores. */
f663d9ad 1145
a62b3dc5
JJ
1146static unsigned int
1147split_group (merged_store_group *group, bool allow_unaligned,
1148 vec<struct split_store *> *split_stores)
f663d9ad 1149{
a62b3dc5
JJ
1150 unsigned HOST_WIDE_INT pos = group->bitregion_start;
1151 unsigned HOST_WIDE_INT size = group->bitregion_end - pos;
f663d9ad 1152 unsigned HOST_WIDE_INT bytepos = pos / BITS_PER_UNIT;
a62b3dc5
JJ
1153 unsigned HOST_WIDE_INT group_align = group->align;
1154 unsigned HOST_WIDE_INT align_base = group->align_base;
f663d9ad 1155
f663d9ad
KT
1156 gcc_assert ((size % BITS_PER_UNIT == 0) && (pos % BITS_PER_UNIT == 0));
1157
a62b3dc5 1158 unsigned int ret = 0, first = 0;
f663d9ad
KT
1159 unsigned HOST_WIDE_INT try_pos = bytepos;
1160 group->stores.qsort (sort_by_bitpos);
1161
1162 while (size > 0)
1163 {
a62b3dc5
JJ
1164 if ((allow_unaligned || group_align <= BITS_PER_UNIT)
1165 && group->mask[try_pos - bytepos] == (unsigned char) ~0U)
1166 {
1167 /* Skip padding bytes. */
1168 ++try_pos;
1169 size -= BITS_PER_UNIT;
1170 continue;
1171 }
1172
f663d9ad 1173 unsigned HOST_WIDE_INT try_bitpos = try_pos * BITS_PER_UNIT;
a62b3dc5
JJ
1174 unsigned int try_size = MAX_STORE_BITSIZE, nonmasked;
1175 unsigned HOST_WIDE_INT align_bitpos
1176 = (try_bitpos - align_base) & (group_align - 1);
1177 unsigned HOST_WIDE_INT align = group_align;
1178 if (align_bitpos)
1179 align = least_bit_hwi (align_bitpos);
1180 if (!allow_unaligned)
1181 try_size = MIN (try_size, align);
1182 store_immediate_info *info
1183 = find_constituent_stmts (group, NULL, &first, try_bitpos, try_size);
1184 if (info)
1185 {
1186 /* If there is just one original statement for the range, see if
1187 we can just reuse the original store which could be even larger
1188 than try_size. */
1189 unsigned HOST_WIDE_INT stmt_end
1190 = ROUND_UP (info->bitpos + info->bitsize, BITS_PER_UNIT);
1191 info = find_constituent_stmts (group, NULL, &first, try_bitpos,
1192 stmt_end - try_bitpos);
1193 if (info && info->bitpos >= try_bitpos)
1194 {
1195 try_size = stmt_end - try_bitpos;
1196 goto found;
1197 }
1198 }
f663d9ad 1199
a62b3dc5
JJ
1200 /* Approximate store bitsize for the case when there are no padding
1201 bits. */
1202 while (try_size > size)
1203 try_size /= 2;
1204 /* Now look for whole padding bytes at the end of that bitsize. */
1205 for (nonmasked = try_size / BITS_PER_UNIT; nonmasked > 0; --nonmasked)
1206 if (group->mask[try_pos - bytepos + nonmasked - 1]
1207 != (unsigned char) ~0U)
1208 break;
1209 if (nonmasked == 0)
1210 {
1211 /* If entire try_size range is padding, skip it. */
1212 try_pos += try_size / BITS_PER_UNIT;
1213 size -= try_size;
1214 continue;
1215 }
1216 /* Otherwise try to decrease try_size if second half, last 3 quarters
1217 etc. are padding. */
1218 nonmasked *= BITS_PER_UNIT;
1219 while (nonmasked <= try_size / 2)
1220 try_size /= 2;
1221 if (!allow_unaligned && group_align > BITS_PER_UNIT)
1222 {
1223 /* Now look for whole padding bytes at the start of that bitsize. */
1224 unsigned int try_bytesize = try_size / BITS_PER_UNIT, masked;
1225 for (masked = 0; masked < try_bytesize; ++masked)
1226 if (group->mask[try_pos - bytepos + masked] != (unsigned char) ~0U)
1227 break;
1228 masked *= BITS_PER_UNIT;
1229 gcc_assert (masked < try_size);
1230 if (masked >= try_size / 2)
1231 {
1232 while (masked >= try_size / 2)
1233 {
1234 try_size /= 2;
1235 try_pos += try_size / BITS_PER_UNIT;
1236 size -= try_size;
1237 masked -= try_size;
1238 }
1239 /* Need to recompute the alignment, so just retry at the new
1240 position. */
1241 continue;
1242 }
1243 }
1244
1245 found:
1246 ++ret;
f663d9ad 1247
a62b3dc5
JJ
1248 if (split_stores)
1249 {
1250 struct split_store *store
1251 = new split_store (try_pos, try_size, align);
1252 info = find_constituent_stmts (group, &store->orig_stmts,
1253 &first, try_bitpos, try_size);
1254 if (info
1255 && info->bitpos >= try_bitpos
1256 && info->bitpos + info->bitsize <= try_bitpos + try_size)
1257 store->orig = true;
1258 split_stores->safe_push (store);
1259 }
1260
1261 try_pos += try_size / BITS_PER_UNIT;
f663d9ad 1262 size -= try_size;
f663d9ad 1263 }
a62b3dc5
JJ
1264
1265 return ret;
f663d9ad
KT
1266}
1267
1268/* Given a merged store group GROUP output the widened version of it.
1269 The store chain is against the base object BASE.
1270 Try store sizes of at most MAX_STORE_BITSIZE bits wide and don't output
1271 unaligned stores for STRICT_ALIGNMENT targets or if it's too expensive.
1272 Make sure that the number of statements output is less than the number of
1273 original statements. If a better sequence is possible emit it and
1274 return true. */
1275
1276bool
b5926e23 1277imm_store_chain_info::output_merged_store (merged_store_group *group)
f663d9ad 1278{
a62b3dc5
JJ
1279 unsigned HOST_WIDE_INT start_byte_pos
1280 = group->bitregion_start / BITS_PER_UNIT;
f663d9ad
KT
1281
1282 unsigned int orig_num_stmts = group->stores.length ();
1283 if (orig_num_stmts < 2)
1284 return false;
1285
a62b3dc5 1286 auto_vec<struct split_store *, 32> split_stores;
f663d9ad 1287 split_stores.create (0);
a62b3dc5
JJ
1288 bool allow_unaligned
1289 = !STRICT_ALIGNMENT && PARAM_VALUE (PARAM_STORE_MERGING_ALLOW_UNALIGNED);
1290 if (allow_unaligned)
1291 {
1292 /* If unaligned stores are allowed, see how many stores we'd emit
1293 for unaligned and how many stores we'd emit for aligned stores.
1294 Only use unaligned stores if it allows fewer stores than aligned. */
1295 unsigned aligned_cnt = split_group (group, false, NULL);
1296 unsigned unaligned_cnt = split_group (group, true, NULL);
1297 if (aligned_cnt <= unaligned_cnt)
1298 allow_unaligned = false;
1299 }
1300 split_group (group, allow_unaligned, &split_stores);
1301
1302 if (split_stores.length () >= orig_num_stmts)
1303 {
1304 /* We didn't manage to reduce the number of statements. Bail out. */
1305 if (dump_file && (dump_flags & TDF_DETAILS))
1306 {
1307 fprintf (dump_file, "Exceeded original number of stmts (%u)."
1308 " Not profitable to emit new sequence.\n",
1309 orig_num_stmts);
1310 }
1311 return false;
1312 }
f663d9ad
KT
1313
1314 gimple_stmt_iterator last_gsi = gsi_for_stmt (group->last_stmt);
1315 gimple_seq seq = NULL;
f663d9ad
KT
1316 tree last_vdef, new_vuse;
1317 last_vdef = gimple_vdef (group->last_stmt);
1318 new_vuse = gimple_vuse (group->last_stmt);
1319
1320 gimple *stmt = NULL;
f663d9ad
KT
1321 split_store *split_store;
1322 unsigned int i;
f663d9ad 1323
aa55dc0c
RB
1324 tree addr = force_gimple_operand_1 (unshare_expr (base_addr), &seq,
1325 is_gimple_mem_ref_addr, NULL_TREE);
f663d9ad
KT
1326 FOR_EACH_VEC_ELT (split_stores, i, split_store)
1327 {
1328 unsigned HOST_WIDE_INT try_size = split_store->size;
1329 unsigned HOST_WIDE_INT try_pos = split_store->bytepos;
1330 unsigned HOST_WIDE_INT align = split_store->align;
a62b3dc5
JJ
1331 tree dest, src;
1332 location_t loc;
1333 if (split_store->orig)
1334 {
1335 /* If there is just a single constituent store which covers
1336 the whole area, just reuse the lhs and rhs. */
1337 dest = gimple_assign_lhs (split_store->orig_stmts[0]);
1338 src = gimple_assign_rhs1 (split_store->orig_stmts[0]);
1339 loc = gimple_location (split_store->orig_stmts[0]);
1340 }
1341 else
1342 {
1343 tree offset_type
1344 = get_alias_type_for_stmts (split_store->orig_stmts);
1345 loc = get_location_for_stmts (split_store->orig_stmts);
1346
1347 tree int_type = build_nonstandard_integer_type (try_size, UNSIGNED);
1348 int_type = build_aligned_type (int_type, align);
1349 dest = fold_build2 (MEM_REF, int_type, addr,
1350 build_int_cst (offset_type, try_pos));
1351 src = native_interpret_expr (int_type,
1352 group->val + try_pos - start_byte_pos,
1353 group->buf_size);
1354 tree mask
1355 = native_interpret_expr (int_type,
1356 group->mask + try_pos - start_byte_pos,
1357 group->buf_size);
1358 if (!integer_zerop (mask))
1359 {
1360 tree tem = make_ssa_name (int_type);
1361 tree load_src = unshare_expr (dest);
1362 /* The load might load some or all bits uninitialized,
1363 avoid -W*uninitialized warnings in that case.
1364 As optimization, it would be nice if all the bits are
1365 provably uninitialized (no stores at all yet or previous
1366 store a CLOBBER) we'd optimize away the load and replace
1367 it e.g. with 0. */
1368 TREE_NO_WARNING (load_src) = 1;
1369 stmt = gimple_build_assign (tem, load_src);
1370 gimple_set_location (stmt, loc);
1371 gimple_set_vuse (stmt, new_vuse);
1372 gimple_seq_add_stmt_without_update (&seq, stmt);
1373
1374 /* FIXME: If there is a single chunk of zero bits in mask,
1375 perhaps use BIT_INSERT_EXPR instead? */
1376 stmt = gimple_build_assign (make_ssa_name (int_type),
1377 BIT_AND_EXPR, tem, mask);
1378 gimple_set_location (stmt, loc);
1379 gimple_seq_add_stmt_without_update (&seq, stmt);
1380 tem = gimple_assign_lhs (stmt);
1381
1382 src = wide_int_to_tree (int_type,
1383 wi::bit_and_not (wi::to_wide (src),
1384 wi::to_wide (mask)));
1385 stmt = gimple_build_assign (make_ssa_name (int_type),
1386 BIT_IOR_EXPR, tem, src);
1387 gimple_set_location (stmt, loc);
1388 gimple_seq_add_stmt_without_update (&seq, stmt);
1389 src = gimple_assign_lhs (stmt);
1390 }
1391 }
f663d9ad
KT
1392
1393 stmt = gimple_build_assign (dest, src);
1394 gimple_set_location (stmt, loc);
1395 gimple_set_vuse (stmt, new_vuse);
1396 gimple_seq_add_stmt_without_update (&seq, stmt);
1397
f663d9ad
KT
1398 tree new_vdef;
1399 if (i < split_stores.length () - 1)
a62b3dc5 1400 new_vdef = make_ssa_name (gimple_vop (cfun), stmt);
f663d9ad
KT
1401 else
1402 new_vdef = last_vdef;
1403
1404 gimple_set_vdef (stmt, new_vdef);
1405 SSA_NAME_DEF_STMT (new_vdef) = stmt;
1406 new_vuse = new_vdef;
1407 }
1408
1409 FOR_EACH_VEC_ELT (split_stores, i, split_store)
1410 delete split_store;
1411
f663d9ad
KT
1412 gcc_assert (seq);
1413 if (dump_file)
1414 {
1415 fprintf (dump_file,
1416 "New sequence of %u stmts to replace old one of %u stmts\n",
a62b3dc5 1417 split_stores.length (), orig_num_stmts);
f663d9ad
KT
1418 if (dump_flags & TDF_DETAILS)
1419 print_gimple_seq (dump_file, seq, 0, TDF_VOPS | TDF_MEMSYMS);
1420 }
1421 gsi_insert_seq_after (&last_gsi, seq, GSI_SAME_STMT);
1422
1423 return true;
1424}
1425
1426/* Process the merged_store_group objects created in the coalescing phase.
1427 The stores are all against the base object BASE.
1428 Try to output the widened stores and delete the original statements if
1429 successful. Return true iff any changes were made. */
1430
1431bool
b5926e23 1432imm_store_chain_info::output_merged_stores ()
f663d9ad
KT
1433{
1434 unsigned int i;
1435 merged_store_group *merged_store;
1436 bool ret = false;
1437 FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_store)
1438 {
b5926e23 1439 if (output_merged_store (merged_store))
f663d9ad
KT
1440 {
1441 unsigned int j;
1442 store_immediate_info *store;
1443 FOR_EACH_VEC_ELT (merged_store->stores, j, store)
1444 {
1445 gimple *stmt = store->stmt;
1446 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1447 gsi_remove (&gsi, true);
1448 if (stmt != merged_store->last_stmt)
1449 {
1450 unlink_stmt_vdef (stmt);
1451 release_defs (stmt);
1452 }
1453 }
1454 ret = true;
1455 }
1456 }
1457 if (ret && dump_file)
1458 fprintf (dump_file, "Merging successful!\n");
1459
1460 return ret;
1461}
1462
1463/* Coalesce the store_immediate_info objects recorded against the base object
1464 BASE in the first phase and output them.
1465 Delete the allocated structures.
1466 Return true if any changes were made. */
1467
1468bool
b5926e23 1469imm_store_chain_info::terminate_and_process_chain ()
f663d9ad
KT
1470{
1471 /* Process store chain. */
1472 bool ret = false;
1473 if (m_store_info.length () > 1)
1474 {
1475 ret = coalesce_immediate_stores ();
1476 if (ret)
b5926e23 1477 ret = output_merged_stores ();
f663d9ad
KT
1478 }
1479
1480 /* Delete all the entries we allocated ourselves. */
1481 store_immediate_info *info;
1482 unsigned int i;
1483 FOR_EACH_VEC_ELT (m_store_info, i, info)
1484 delete info;
1485
1486 merged_store_group *merged_info;
1487 FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_info)
1488 delete merged_info;
1489
1490 return ret;
1491}
1492
1493/* Return true iff LHS is a destination potentially interesting for
1494 store merging. In practice these are the codes that get_inner_reference
1495 can process. */
1496
1497static bool
1498lhs_valid_for_store_merging_p (tree lhs)
1499{
1500 tree_code code = TREE_CODE (lhs);
1501
1502 if (code == ARRAY_REF || code == ARRAY_RANGE_REF || code == MEM_REF
1503 || code == COMPONENT_REF || code == BIT_FIELD_REF)
1504 return true;
1505
1506 return false;
1507}
1508
1509/* Return true if the tree RHS is a constant we want to consider
1510 during store merging. In practice accept all codes that
1511 native_encode_expr accepts. */
1512
1513static bool
1514rhs_valid_for_store_merging_p (tree rhs)
1515{
2f391428
JJ
1516 return native_encode_expr (rhs, NULL,
1517 GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (rhs)))) != 0;
f663d9ad
KT
1518}
1519
1520/* Entry point for the pass. Go over each basic block recording chains of
1521 immediate stores. Upon encountering a terminating statement (as defined
1522 by stmt_terminates_chain_p) process the recorded stores and emit the widened
1523 variants. */
1524
1525unsigned int
1526pass_store_merging::execute (function *fun)
1527{
1528 basic_block bb;
1529 hash_set<gimple *> orig_stmts;
1530
1531 FOR_EACH_BB_FN (bb, fun)
1532 {
1533 gimple_stmt_iterator gsi;
1534 unsigned HOST_WIDE_INT num_statements = 0;
1535 /* Record the original statements so that we can keep track of
1536 statements emitted in this pass and not re-process new
1537 statements. */
1538 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1539 {
1540 if (is_gimple_debug (gsi_stmt (gsi)))
1541 continue;
1542
2f391428 1543 if (++num_statements >= 2)
f663d9ad
KT
1544 break;
1545 }
1546
1547 if (num_statements < 2)
1548 continue;
1549
1550 if (dump_file && (dump_flags & TDF_DETAILS))
1551 fprintf (dump_file, "Processing basic block <%d>:\n", bb->index);
1552
1553 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1554 {
1555 gimple *stmt = gsi_stmt (gsi);
1556
50b6d676
AO
1557 if (is_gimple_debug (stmt))
1558 continue;
1559
f663d9ad
KT
1560 if (gimple_has_volatile_ops (stmt))
1561 {
1562 /* Terminate all chains. */
1563 if (dump_file && (dump_flags & TDF_DETAILS))
1564 fprintf (dump_file, "Volatile access terminates "
1565 "all chains\n");
1566 terminate_and_process_all_chains ();
1567 continue;
1568 }
1569
f663d9ad
KT
1570 if (gimple_assign_single_p (stmt) && gimple_vdef (stmt)
1571 && !stmt_can_throw_internal (stmt)
1572 && lhs_valid_for_store_merging_p (gimple_assign_lhs (stmt)))
1573 {
1574 tree lhs = gimple_assign_lhs (stmt);
1575 tree rhs = gimple_assign_rhs1 (stmt);
1576
1577 HOST_WIDE_INT bitsize, bitpos;
a62b3dc5
JJ
1578 unsigned HOST_WIDE_INT bitregion_start = 0;
1579 unsigned HOST_WIDE_INT bitregion_end = 0;
f663d9ad
KT
1580 machine_mode mode;
1581 int unsignedp = 0, reversep = 0, volatilep = 0;
1582 tree offset, base_addr;
1583 base_addr
1584 = get_inner_reference (lhs, &bitsize, &bitpos, &offset, &mode,
1585 &unsignedp, &reversep, &volatilep);
a62b3dc5
JJ
1586 if (TREE_CODE (lhs) == COMPONENT_REF
1587 && DECL_BIT_FIELD_TYPE (TREE_OPERAND (lhs, 1)))
1588 {
1589 get_bit_range (&bitregion_start, &bitregion_end, lhs,
1590 &bitpos, &offset);
1591 if (bitregion_end)
1592 ++bitregion_end;
1593 }
1594 if (bitsize == 0)
1595 continue;
1596
f663d9ad
KT
1597 /* As a future enhancement we could handle stores with the same
1598 base and offset. */
aa55dc0c 1599 bool invalid = reversep
f663d9ad
KT
1600 || ((bitsize > MAX_BITSIZE_MODE_ANY_INT)
1601 && (TREE_CODE (rhs) != INTEGER_CST))
aa55dc0c 1602 || !rhs_valid_for_store_merging_p (rhs);
f663d9ad 1603
b5926e23
RB
1604 /* We do not want to rewrite TARGET_MEM_REFs. */
1605 if (TREE_CODE (base_addr) == TARGET_MEM_REF)
1606 invalid = true;
f663d9ad
KT
1607 /* In some cases get_inner_reference may return a
1608 MEM_REF [ptr + byteoffset]. For the purposes of this pass
1609 canonicalize the base_addr to MEM_REF [ptr] and take
1610 byteoffset into account in the bitpos. This occurs in
1611 PR 23684 and this way we can catch more chains. */
b5926e23 1612 else if (TREE_CODE (base_addr) == MEM_REF)
f663d9ad
KT
1613 {
1614 offset_int bit_off, byte_off = mem_ref_offset (base_addr);
1615 bit_off = byte_off << LOG2_BITS_PER_UNIT;
1616 bit_off += bitpos;
1617 if (!wi::neg_p (bit_off) && wi::fits_shwi_p (bit_off))
a62b3dc5
JJ
1618 {
1619 bitpos = bit_off.to_shwi ();
1620 if (bitregion_end)
1621 {
1622 bit_off = byte_off << LOG2_BITS_PER_UNIT;
1623 bit_off += bitregion_start;
1624 if (wi::fits_uhwi_p (bit_off))
1625 {
1626 bitregion_start = bit_off.to_uhwi ();
1627 bit_off = byte_off << LOG2_BITS_PER_UNIT;
1628 bit_off += bitregion_end;
1629 if (wi::fits_uhwi_p (bit_off))
1630 bitregion_end = bit_off.to_uhwi ();
1631 else
1632 bitregion_end = 0;
1633 }
1634 else
1635 bitregion_end = 0;
1636 }
1637 }
f663d9ad
KT
1638 else
1639 invalid = true;
b5926e23 1640 base_addr = TREE_OPERAND (base_addr, 0);
f663d9ad 1641 }
b5926e23
RB
1642 /* get_inner_reference returns the base object, get at its
1643 address now. */
1644 else
aa55dc0c
RB
1645 {
1646 if (bitpos < 0)
1647 invalid = true;
1648 base_addr = build_fold_addr_expr (base_addr);
1649 }
1650
a62b3dc5
JJ
1651 if (!bitregion_end)
1652 {
1653 bitregion_start = ROUND_DOWN (bitpos, BITS_PER_UNIT);
1654 bitregion_end = ROUND_UP (bitpos + bitsize, BITS_PER_UNIT);
1655 }
1656
aa55dc0c
RB
1657 if (! invalid
1658 && offset != NULL_TREE)
1659 {
1660 /* If the access is variable offset then a base
1661 decl has to be address-taken to be able to
1662 emit pointer-based stores to it.
1663 ??? We might be able to get away with
1664 re-using the original base up to the first
1665 variable part and then wrapping that inside
1666 a BIT_FIELD_REF. */
1667 tree base = get_base_address (base_addr);
1668 if (! base
1669 || (DECL_P (base)
1670 && ! TREE_ADDRESSABLE (base)))
1671 invalid = true;
1672 else
1673 base_addr = build2 (POINTER_PLUS_EXPR,
1674 TREE_TYPE (base_addr),
1675 base_addr, offset);
1676 }
f663d9ad
KT
1677
1678 struct imm_store_chain_info **chain_info
1679 = m_stores.get (base_addr);
1680
1681 if (!invalid)
1682 {
1683 store_immediate_info *info;
1684 if (chain_info)
1685 {
a62b3dc5
JJ
1686 unsigned int ord = (*chain_info)->m_store_info.length ();
1687 info = new store_immediate_info (bitsize, bitpos,
1688 bitregion_start,
1689 bitregion_end,
1690 stmt, ord);
f663d9ad
KT
1691 if (dump_file && (dump_flags & TDF_DETAILS))
1692 {
1693 fprintf (dump_file,
1694 "Recording immediate store from stmt:\n");
ef6cb4c7 1695 print_gimple_stmt (dump_file, stmt, 0);
f663d9ad
KT
1696 }
1697 (*chain_info)->m_store_info.safe_push (info);
1698 /* If we reach the limit of stores to merge in a chain
1699 terminate and process the chain now. */
1700 if ((*chain_info)->m_store_info.length ()
1701 == (unsigned int)
1702 PARAM_VALUE (PARAM_MAX_STORES_TO_MERGE))
1703 {
1704 if (dump_file && (dump_flags & TDF_DETAILS))
1705 fprintf (dump_file,
1706 "Reached maximum number of statements"
1707 " to merge:\n");
b5926e23 1708 terminate_and_release_chain (*chain_info);
f663d9ad
KT
1709 }
1710 continue;
1711 }
1712
1713 /* Store aliases any existing chain? */
20770eb8 1714 terminate_all_aliasing_chains (chain_info, false, stmt);
f663d9ad
KT
1715 /* Start a new chain. */
1716 struct imm_store_chain_info *new_chain
50b6d676 1717 = new imm_store_chain_info (m_stores_head, base_addr);
b5926e23 1718 info = new store_immediate_info (bitsize, bitpos,
a62b3dc5
JJ
1719 bitregion_start,
1720 bitregion_end,
f663d9ad
KT
1721 stmt, 0);
1722 new_chain->m_store_info.safe_push (info);
1723 m_stores.put (base_addr, new_chain);
1724 if (dump_file && (dump_flags & TDF_DETAILS))
1725 {
1726 fprintf (dump_file,
1727 "Starting new chain with statement:\n");
ef6cb4c7 1728 print_gimple_stmt (dump_file, stmt, 0);
f663d9ad 1729 fprintf (dump_file, "The base object is:\n");
ef6cb4c7 1730 print_generic_expr (dump_file, base_addr);
f663d9ad
KT
1731 fprintf (dump_file, "\n");
1732 }
1733 }
1734 else
20770eb8 1735 terminate_all_aliasing_chains (chain_info,
f663d9ad
KT
1736 offset != NULL_TREE, stmt);
1737
1738 continue;
1739 }
1740
20770eb8 1741 terminate_all_aliasing_chains (NULL, false, stmt);
f663d9ad
KT
1742 }
1743 terminate_and_process_all_chains ();
1744 }
1745 return 0;
1746}
1747
1748} // anon namespace
1749
1750/* Construct and return a store merging pass object. */
1751
1752gimple_opt_pass *
1753make_pass_store_merging (gcc::context *ctxt)
1754{
1755 return new pass_store_merging (ctxt);
1756}
c22d8787
KT
1757
1758#if CHECKING_P
1759
1760namespace selftest {
1761
1762/* Selftests for store merging helpers. */
1763
1764/* Assert that all elements of the byte arrays X and Y, both of length N
1765 are equal. */
1766
1767static void
1768verify_array_eq (unsigned char *x, unsigned char *y, unsigned int n)
1769{
1770 for (unsigned int i = 0; i < n; i++)
1771 {
1772 if (x[i] != y[i])
1773 {
1774 fprintf (stderr, "Arrays do not match. X:\n");
1775 dump_char_array (stderr, x, n);
1776 fprintf (stderr, "Y:\n");
1777 dump_char_array (stderr, y, n);
1778 }
1779 ASSERT_EQ (x[i], y[i]);
1780 }
1781}
1782
1783/* Test shift_bytes_in_array and that it carries bits across between
1784 bytes correctly. */
1785
1786static void
1787verify_shift_bytes_in_array (void)
1788{
1789 /* byte 1 | byte 0
1790 00011111 | 11100000. */
1791 unsigned char orig[2] = { 0xe0, 0x1f };
1792 unsigned char in[2];
1793 memcpy (in, orig, sizeof orig);
1794
1795 unsigned char expected[2] = { 0x80, 0x7f };
1796 shift_bytes_in_array (in, sizeof (in), 2);
1797 verify_array_eq (in, expected, sizeof (in));
1798
1799 memcpy (in, orig, sizeof orig);
1800 memcpy (expected, orig, sizeof orig);
1801 /* Check that shifting by zero doesn't change anything. */
1802 shift_bytes_in_array (in, sizeof (in), 0);
1803 verify_array_eq (in, expected, sizeof (in));
1804
1805}
1806
1807/* Test shift_bytes_in_array_right and that it carries bits across between
1808 bytes correctly. */
1809
1810static void
1811verify_shift_bytes_in_array_right (void)
1812{
1813 /* byte 1 | byte 0
1814 00011111 | 11100000. */
1815 unsigned char orig[2] = { 0x1f, 0xe0};
1816 unsigned char in[2];
1817 memcpy (in, orig, sizeof orig);
1818 unsigned char expected[2] = { 0x07, 0xf8};
1819 shift_bytes_in_array_right (in, sizeof (in), 2);
1820 verify_array_eq (in, expected, sizeof (in));
1821
1822 memcpy (in, orig, sizeof orig);
1823 memcpy (expected, orig, sizeof orig);
1824 /* Check that shifting by zero doesn't change anything. */
1825 shift_bytes_in_array_right (in, sizeof (in), 0);
1826 verify_array_eq (in, expected, sizeof (in));
1827}
1828
1829/* Test clear_bit_region that it clears exactly the bits asked and
1830 nothing more. */
1831
1832static void
1833verify_clear_bit_region (void)
1834{
1835 /* Start with all bits set and test clearing various patterns in them. */
1836 unsigned char orig[3] = { 0xff, 0xff, 0xff};
1837 unsigned char in[3];
1838 unsigned char expected[3];
1839 memcpy (in, orig, sizeof in);
1840
1841 /* Check zeroing out all the bits. */
1842 clear_bit_region (in, 0, 3 * BITS_PER_UNIT);
1843 expected[0] = expected[1] = expected[2] = 0;
1844 verify_array_eq (in, expected, sizeof in);
1845
1846 memcpy (in, orig, sizeof in);
1847 /* Leave the first and last bits intact. */
1848 clear_bit_region (in, 1, 3 * BITS_PER_UNIT - 2);
1849 expected[0] = 0x1;
1850 expected[1] = 0;
1851 expected[2] = 0x80;
1852 verify_array_eq (in, expected, sizeof in);
1853}
1854
1855/* Test verify_clear_bit_region_be that it clears exactly the bits asked and
1856 nothing more. */
1857
1858static void
1859verify_clear_bit_region_be (void)
1860{
1861 /* Start with all bits set and test clearing various patterns in them. */
1862 unsigned char orig[3] = { 0xff, 0xff, 0xff};
1863 unsigned char in[3];
1864 unsigned char expected[3];
1865 memcpy (in, orig, sizeof in);
1866
1867 /* Check zeroing out all the bits. */
1868 clear_bit_region_be (in, BITS_PER_UNIT - 1, 3 * BITS_PER_UNIT);
1869 expected[0] = expected[1] = expected[2] = 0;
1870 verify_array_eq (in, expected, sizeof in);
1871
1872 memcpy (in, orig, sizeof in);
1873 /* Leave the first and last bits intact. */
1874 clear_bit_region_be (in, BITS_PER_UNIT - 2, 3 * BITS_PER_UNIT - 2);
1875 expected[0] = 0x80;
1876 expected[1] = 0;
1877 expected[2] = 0x1;
1878 verify_array_eq (in, expected, sizeof in);
1879}
1880
1881
1882/* Run all of the selftests within this file. */
1883
1884void
1885store_merging_c_tests (void)
1886{
1887 verify_shift_bytes_in_array ();
1888 verify_shift_bytes_in_array_right ();
1889 verify_clear_bit_region ();
1890 verify_clear_bit_region_be ();
1891}
1892
1893} // namespace selftest
1894#endif /* CHECKING_P. */