]>
Commit | Line | Data |
---|---|---|
3d3e04ac | 1 | /* GIMPLE store merging pass. |
aad93da1 | 2 | Copyright (C) 2016-2017 Free Software Foundation, Inc. |
3d3e04ac | 3 | Contributed by ARM Ltd. |
4 | ||
5 | This file is part of GCC. | |
6 | ||
7 | GCC is free software; you can redistribute it and/or modify it | |
8 | under the terms of the GNU General Public License as published by | |
9 | the Free Software Foundation; either version 3, or (at your option) | |
10 | any later version. | |
11 | ||
12 | GCC is distributed in the hope that it will be useful, but | |
13 | WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | General Public License for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
18 | along with GCC; see the file COPYING3. If not see | |
19 | <http://www.gnu.org/licenses/>. */ | |
20 | ||
21 | /* The purpose of this pass is to combine multiple memory stores of | |
22 | constant values to consecutive memory locations into fewer wider stores. | |
23 | For example, if we have a sequence peforming four byte stores to | |
24 | consecutive memory locations: | |
25 | [p ] := imm1; | |
26 | [p + 1B] := imm2; | |
27 | [p + 2B] := imm3; | |
28 | [p + 3B] := imm4; | |
29 | we can transform this into a single 4-byte store if the target supports it: | |
30 | [p] := imm1:imm2:imm3:imm4 //concatenated immediates according to endianness. | |
31 | ||
32 | The algorithm is applied to each basic block in three phases: | |
33 | ||
34 | 1) Scan through the basic block recording constant assignments to | |
35 | destinations that can be expressed as a store to memory of a certain size | |
36 | at a certain bit offset. Record store chains to different bases in a | |
37 | hash_map (m_stores) and make sure to terminate such chains when appropriate | |
38 | (for example when when the stored values get used subsequently). | |
39 | These stores can be a result of structure element initializers, array stores | |
40 | etc. A store_immediate_info object is recorded for every such store. | |
41 | Record as many such assignments to a single base as possible until a | |
42 | statement that interferes with the store sequence is encountered. | |
43 | ||
44 | 2) Analyze the chain of stores recorded in phase 1) (i.e. the vector of | |
45 | store_immediate_info objects) and coalesce contiguous stores into | |
46 | merged_store_group objects. | |
47 | ||
48 | For example, given the stores: | |
49 | [p ] := 0; | |
50 | [p + 1B] := 1; | |
51 | [p + 3B] := 0; | |
52 | [p + 4B] := 1; | |
53 | [p + 5B] := 0; | |
54 | [p + 6B] := 0; | |
55 | This phase would produce two merged_store_group objects, one recording the | |
56 | two bytes stored in the memory region [p : p + 1] and another | |
57 | recording the four bytes stored in the memory region [p + 3 : p + 6]. | |
58 | ||
59 | 3) The merged_store_group objects produced in phase 2) are processed | |
60 | to generate the sequence of wider stores that set the contiguous memory | |
61 | regions to the sequence of bytes that correspond to it. This may emit | |
62 | multiple stores per store group to handle contiguous stores that are not | |
63 | of a size that is a power of 2. For example it can try to emit a 40-bit | |
64 | store as a 32-bit store followed by an 8-bit store. | |
65 | We try to emit as wide stores as we can while respecting STRICT_ALIGNMENT or | |
dfdced85 | 66 | TARGET_SLOW_UNALIGNED_ACCESS rules. |
3d3e04ac | 67 | |
68 | Note on endianness and example: | |
69 | Consider 2 contiguous 16-bit stores followed by 2 contiguous 8-bit stores: | |
70 | [p ] := 0x1234; | |
71 | [p + 2B] := 0x5678; | |
72 | [p + 4B] := 0xab; | |
73 | [p + 5B] := 0xcd; | |
74 | ||
75 | The memory layout for little-endian (LE) and big-endian (BE) must be: | |
76 | p |LE|BE| | |
77 | --------- | |
78 | 0 |34|12| | |
79 | 1 |12|34| | |
80 | 2 |78|56| | |
81 | 3 |56|78| | |
82 | 4 |ab|ab| | |
83 | 5 |cd|cd| | |
84 | ||
85 | To merge these into a single 48-bit merged value 'val' in phase 2) | |
86 | on little-endian we insert stores to higher (consecutive) bitpositions | |
87 | into the most significant bits of the merged value. | |
88 | The final merged value would be: 0xcdab56781234 | |
89 | ||
90 | For big-endian we insert stores to higher bitpositions into the least | |
91 | significant bits of the merged value. | |
92 | The final merged value would be: 0x12345678abcd | |
93 | ||
94 | Then, in phase 3), we want to emit this 48-bit value as a 32-bit store | |
95 | followed by a 16-bit store. Again, we must consider endianness when | |
96 | breaking down the 48-bit value 'val' computed above. | |
97 | For little endian we emit: | |
98 | [p] (32-bit) := 0x56781234; // val & 0x0000ffffffff; | |
99 | [p + 4B] (16-bit) := 0xcdab; // (val & 0xffff00000000) >> 32; | |
100 | ||
101 | Whereas for big-endian we emit: | |
102 | [p] (32-bit) := 0x12345678; // (val & 0xffffffff0000) >> 16; | |
103 | [p + 4B] (16-bit) := 0xabcd; // val & 0x00000000ffff; */ | |
104 | ||
105 | #include "config.h" | |
106 | #include "system.h" | |
107 | #include "coretypes.h" | |
108 | #include "backend.h" | |
109 | #include "tree.h" | |
110 | #include "gimple.h" | |
111 | #include "builtins.h" | |
112 | #include "fold-const.h" | |
113 | #include "tree-pass.h" | |
114 | #include "ssa.h" | |
115 | #include "gimple-pretty-print.h" | |
116 | #include "alias.h" | |
117 | #include "fold-const.h" | |
118 | #include "params.h" | |
119 | #include "print-tree.h" | |
120 | #include "tree-hash-traits.h" | |
121 | #include "gimple-iterator.h" | |
122 | #include "gimplify.h" | |
123 | #include "stor-layout.h" | |
124 | #include "timevar.h" | |
125 | #include "tree-cfg.h" | |
126 | #include "tree-eh.h" | |
127 | #include "target.h" | |
427223f1 | 128 | #include "gimplify-me.h" |
3d9a2fb3 | 129 | #include "selftest.h" |
3d3e04ac | 130 | |
131 | /* The maximum size (in bits) of the stores this pass should generate. */ | |
132 | #define MAX_STORE_BITSIZE (BITS_PER_WORD) | |
133 | #define MAX_STORE_BYTES (MAX_STORE_BITSIZE / BITS_PER_UNIT) | |
134 | ||
135 | namespace { | |
136 | ||
137 | /* Struct recording the information about a single store of an immediate | |
138 | to memory. These are created in the first phase and coalesced into | |
139 | merged_store_group objects in the second phase. */ | |
140 | ||
141 | struct store_immediate_info | |
142 | { | |
143 | unsigned HOST_WIDE_INT bitsize; | |
144 | unsigned HOST_WIDE_INT bitpos; | |
3d3e04ac | 145 | gimple *stmt; |
146 | unsigned int order; | |
f85e7cb7 | 147 | store_immediate_info (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT, |
148 | gimple *, unsigned int); | |
3d3e04ac | 149 | }; |
150 | ||
151 | store_immediate_info::store_immediate_info (unsigned HOST_WIDE_INT bs, | |
f85e7cb7 | 152 | unsigned HOST_WIDE_INT bp, |
153 | gimple *st, | |
3d3e04ac | 154 | unsigned int ord) |
f85e7cb7 | 155 | : bitsize (bs), bitpos (bp), stmt (st), order (ord) |
3d3e04ac | 156 | { |
157 | } | |
158 | ||
159 | /* Struct representing a group of stores to contiguous memory locations. | |
160 | These are produced by the second phase (coalescing) and consumed in the | |
161 | third phase that outputs the widened stores. */ | |
162 | ||
163 | struct merged_store_group | |
164 | { | |
165 | unsigned HOST_WIDE_INT start; | |
166 | unsigned HOST_WIDE_INT width; | |
167 | /* The size of the allocated memory for val. */ | |
168 | unsigned HOST_WIDE_INT buf_size; | |
169 | ||
170 | unsigned int align; | |
171 | unsigned int first_order; | |
172 | unsigned int last_order; | |
173 | ||
174 | auto_vec<struct store_immediate_info *> stores; | |
175 | /* We record the first and last original statements in the sequence because | |
176 | we'll need their vuse/vdef and replacement position. It's easier to keep | |
177 | track of them separately as 'stores' is reordered by apply_stores. */ | |
178 | gimple *last_stmt; | |
179 | gimple *first_stmt; | |
180 | unsigned char *val; | |
181 | ||
182 | merged_store_group (store_immediate_info *); | |
183 | ~merged_store_group (); | |
184 | void merge_into (store_immediate_info *); | |
185 | void merge_overlapping (store_immediate_info *); | |
186 | bool apply_stores (); | |
187 | }; | |
188 | ||
189 | /* Debug helper. Dump LEN elements of byte array PTR to FD in hex. */ | |
190 | ||
191 | static void | |
192 | dump_char_array (FILE *fd, unsigned char *ptr, unsigned int len) | |
193 | { | |
194 | if (!fd) | |
195 | return; | |
196 | ||
197 | for (unsigned int i = 0; i < len; i++) | |
198 | fprintf (fd, "%x ", ptr[i]); | |
199 | fprintf (fd, "\n"); | |
200 | } | |
201 | ||
3d3e04ac | 202 | /* Shift left the bytes in PTR of SZ elements by AMNT bits, carrying over the |
203 | bits between adjacent elements. AMNT should be within | |
204 | [0, BITS_PER_UNIT). | |
205 | Example, AMNT = 2: | |
206 | 00011111|11100000 << 2 = 01111111|10000000 | |
207 | PTR[1] | PTR[0] PTR[1] | PTR[0]. */ | |
208 | ||
209 | static void | |
210 | shift_bytes_in_array (unsigned char *ptr, unsigned int sz, unsigned int amnt) | |
211 | { | |
212 | if (amnt == 0) | |
213 | return; | |
214 | ||
215 | unsigned char carry_over = 0U; | |
b1c71535 | 216 | unsigned char carry_mask = (~0U) << (unsigned char) (BITS_PER_UNIT - amnt); |
3d3e04ac | 217 | unsigned char clear_mask = (~0U) << amnt; |
218 | ||
219 | for (unsigned int i = 0; i < sz; i++) | |
220 | { | |
221 | unsigned prev_carry_over = carry_over; | |
b1c71535 | 222 | carry_over = (ptr[i] & carry_mask) >> (BITS_PER_UNIT - amnt); |
3d3e04ac | 223 | |
224 | ptr[i] <<= amnt; | |
225 | if (i != 0) | |
226 | { | |
227 | ptr[i] &= clear_mask; | |
228 | ptr[i] |= prev_carry_over; | |
229 | } | |
230 | } | |
231 | } | |
232 | ||
233 | /* Like shift_bytes_in_array but for big-endian. | |
234 | Shift right the bytes in PTR of SZ elements by AMNT bits, carrying over the | |
235 | bits between adjacent elements. AMNT should be within | |
236 | [0, BITS_PER_UNIT). | |
237 | Example, AMNT = 2: | |
238 | 00011111|11100000 >> 2 = 00000111|11111000 | |
239 | PTR[0] | PTR[1] PTR[0] | PTR[1]. */ | |
240 | ||
241 | static void | |
242 | shift_bytes_in_array_right (unsigned char *ptr, unsigned int sz, | |
243 | unsigned int amnt) | |
244 | { | |
245 | if (amnt == 0) | |
246 | return; | |
247 | ||
248 | unsigned char carry_over = 0U; | |
249 | unsigned char carry_mask = ~(~0U << amnt); | |
250 | ||
251 | for (unsigned int i = 0; i < sz; i++) | |
252 | { | |
253 | unsigned prev_carry_over = carry_over; | |
b1c71535 | 254 | carry_over = ptr[i] & carry_mask; |
3d3e04ac | 255 | |
a425d9af | 256 | carry_over <<= (unsigned char) BITS_PER_UNIT - amnt; |
257 | ptr[i] >>= amnt; | |
258 | ptr[i] |= prev_carry_over; | |
3d3e04ac | 259 | } |
260 | } | |
261 | ||
262 | /* Clear out LEN bits starting from bit START in the byte array | |
263 | PTR. This clears the bits to the *right* from START. | |
264 | START must be within [0, BITS_PER_UNIT) and counts starting from | |
265 | the least significant bit. */ | |
266 | ||
267 | static void | |
268 | clear_bit_region_be (unsigned char *ptr, unsigned int start, | |
269 | unsigned int len) | |
270 | { | |
271 | if (len == 0) | |
272 | return; | |
273 | /* Clear len bits to the right of start. */ | |
274 | else if (len <= start + 1) | |
275 | { | |
276 | unsigned char mask = (~(~0U << len)); | |
277 | mask = mask << (start + 1U - len); | |
278 | ptr[0] &= ~mask; | |
279 | } | |
280 | else if (start != BITS_PER_UNIT - 1) | |
281 | { | |
282 | clear_bit_region_be (ptr, start, (start % BITS_PER_UNIT) + 1); | |
283 | clear_bit_region_be (ptr + 1, BITS_PER_UNIT - 1, | |
284 | len - (start % BITS_PER_UNIT) - 1); | |
285 | } | |
286 | else if (start == BITS_PER_UNIT - 1 | |
287 | && len > BITS_PER_UNIT) | |
288 | { | |
289 | unsigned int nbytes = len / BITS_PER_UNIT; | |
290 | for (unsigned int i = 0; i < nbytes; i++) | |
291 | ptr[i] = 0U; | |
292 | if (len % BITS_PER_UNIT != 0) | |
293 | clear_bit_region_be (ptr + nbytes, BITS_PER_UNIT - 1, | |
294 | len % BITS_PER_UNIT); | |
295 | } | |
296 | else | |
297 | gcc_unreachable (); | |
298 | } | |
299 | ||
300 | /* In the byte array PTR clear the bit region starting at bit | |
301 | START and is LEN bits wide. | |
302 | For regions spanning multiple bytes do this recursively until we reach | |
303 | zero LEN or a region contained within a single byte. */ | |
304 | ||
305 | static void | |
306 | clear_bit_region (unsigned char *ptr, unsigned int start, | |
307 | unsigned int len) | |
308 | { | |
309 | /* Degenerate base case. */ | |
310 | if (len == 0) | |
311 | return; | |
312 | else if (start >= BITS_PER_UNIT) | |
313 | clear_bit_region (ptr + 1, start - BITS_PER_UNIT, len); | |
314 | /* Second base case. */ | |
315 | else if ((start + len) <= BITS_PER_UNIT) | |
316 | { | |
b1c71535 | 317 | unsigned char mask = (~0U) << (unsigned char) (BITS_PER_UNIT - len); |
3d3e04ac | 318 | mask >>= BITS_PER_UNIT - (start + len); |
319 | ||
320 | ptr[0] &= ~mask; | |
321 | ||
322 | return; | |
323 | } | |
324 | /* Clear most significant bits in a byte and proceed with the next byte. */ | |
325 | else if (start != 0) | |
326 | { | |
327 | clear_bit_region (ptr, start, BITS_PER_UNIT - start); | |
3d6071e9 | 328 | clear_bit_region (ptr + 1, 0, len - (BITS_PER_UNIT - start)); |
3d3e04ac | 329 | } |
330 | /* Whole bytes need to be cleared. */ | |
331 | else if (start == 0 && len > BITS_PER_UNIT) | |
332 | { | |
333 | unsigned int nbytes = len / BITS_PER_UNIT; | |
7839cdcc | 334 | /* We could recurse on each byte but we clear whole bytes, so a simple |
335 | memset will do. */ | |
b1c71535 | 336 | memset (ptr, '\0', nbytes); |
3d3e04ac | 337 | /* Clear the remaining sub-byte region if there is one. */ |
338 | if (len % BITS_PER_UNIT != 0) | |
339 | clear_bit_region (ptr + nbytes, 0, len % BITS_PER_UNIT); | |
340 | } | |
341 | else | |
342 | gcc_unreachable (); | |
343 | } | |
344 | ||
345 | /* Write BITLEN bits of EXPR to the byte array PTR at | |
346 | bit position BITPOS. PTR should contain TOTAL_BYTES elements. | |
347 | Return true if the operation succeeded. */ | |
348 | ||
349 | static bool | |
350 | encode_tree_to_bitpos (tree expr, unsigned char *ptr, int bitlen, int bitpos, | |
b1c71535 | 351 | unsigned int total_bytes) |
3d3e04ac | 352 | { |
353 | unsigned int first_byte = bitpos / BITS_PER_UNIT; | |
354 | tree tmp_int = expr; | |
a425d9af | 355 | bool sub_byte_op_p = ((bitlen % BITS_PER_UNIT) |
356 | || (bitpos % BITS_PER_UNIT) | |
517be012 | 357 | || !int_mode_for_size (bitlen, 0).exists ()); |
3d3e04ac | 358 | |
359 | if (!sub_byte_op_p) | |
b1c71535 | 360 | return (native_encode_expr (tmp_int, ptr + first_byte, total_bytes, 0) |
361 | != 0); | |
3d3e04ac | 362 | |
363 | /* LITTLE-ENDIAN | |
364 | We are writing a non byte-sized quantity or at a position that is not | |
365 | at a byte boundary. | |
366 | |--------|--------|--------| ptr + first_byte | |
367 | ^ ^ | |
368 | xxx xxxxxxxx xxx< bp> | |
369 | |______EXPR____| | |
370 | ||
b1c71535 | 371 | First native_encode_expr EXPR into a temporary buffer and shift each |
3d3e04ac | 372 | byte in the buffer by 'bp' (carrying the bits over as necessary). |
373 | |00000000|00xxxxxx|xxxxxxxx| << bp = |000xxxxx|xxxxxxxx|xxx00000| | |
374 | <------bitlen---->< bp> | |
375 | Then we clear the destination bits: | |
376 | |---00000|00000000|000-----| ptr + first_byte | |
377 | <-------bitlen--->< bp> | |
378 | ||
379 | Finally we ORR the bytes of the shifted EXPR into the cleared region: | |
380 | |---xxxxx||xxxxxxxx||xxx-----| ptr + first_byte. | |
381 | ||
382 | BIG-ENDIAN | |
383 | We are writing a non byte-sized quantity or at a position that is not | |
384 | at a byte boundary. | |
385 | ptr + first_byte |--------|--------|--------| | |
386 | ^ ^ | |
387 | <bp >xxx xxxxxxxx xxx | |
388 | |_____EXPR_____| | |
389 | ||
b1c71535 | 390 | First native_encode_expr EXPR into a temporary buffer and shift each |
3d3e04ac | 391 | byte in the buffer to the right by (carrying the bits over as necessary). |
392 | We shift by as much as needed to align the most significant bit of EXPR | |
393 | with bitpos: | |
394 | |00xxxxxx|xxxxxxxx| >> 3 = |00000xxx|xxxxxxxx|xxxxx000| | |
395 | <---bitlen----> <bp ><-----bitlen-----> | |
396 | Then we clear the destination bits: | |
397 | ptr + first_byte |-----000||00000000||00000---| | |
398 | <bp ><-------bitlen-----> | |
399 | ||
400 | Finally we ORR the bytes of the shifted EXPR into the cleared region: | |
401 | ptr + first_byte |---xxxxx||xxxxxxxx||xxx-----|. | |
402 | The awkwardness comes from the fact that bitpos is counted from the | |
403 | most significant bit of a byte. */ | |
404 | ||
405 | /* Allocate an extra byte so that we have space to shift into. */ | |
406 | unsigned int byte_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (expr))) + 1; | |
407 | unsigned char *tmpbuf = XALLOCAVEC (unsigned char, byte_size); | |
b1c71535 | 408 | memset (tmpbuf, '\0', byte_size); |
3d3e04ac | 409 | /* The store detection code should only have allowed constants that are |
410 | accepted by native_encode_expr. */ | |
a425d9af | 411 | if (native_encode_expr (expr, tmpbuf, byte_size - 1, 0) == 0) |
3d3e04ac | 412 | gcc_unreachable (); |
413 | ||
414 | /* The native_encode_expr machinery uses TYPE_MODE to determine how many | |
415 | bytes to write. This means it can write more than | |
416 | ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT bytes (for example | |
417 | write 8 bytes for a bitlen of 40). Skip the bytes that are not within | |
418 | bitlen and zero out the bits that are not relevant as well (that may | |
419 | contain a sign bit due to sign-extension). */ | |
420 | unsigned int padding | |
421 | = byte_size - ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT - 1; | |
a425d9af | 422 | /* On big-endian the padding is at the 'front' so just skip the initial |
423 | bytes. */ | |
424 | if (BYTES_BIG_ENDIAN) | |
425 | tmpbuf += padding; | |
426 | ||
427 | byte_size -= padding; | |
428 | ||
429 | if (bitlen % BITS_PER_UNIT != 0) | |
3d3e04ac | 430 | { |
5e922e43 | 431 | if (BYTES_BIG_ENDIAN) |
a425d9af | 432 | clear_bit_region_be (tmpbuf, BITS_PER_UNIT - 1, |
433 | BITS_PER_UNIT - (bitlen % BITS_PER_UNIT)); | |
434 | else | |
435 | clear_bit_region (tmpbuf, bitlen, | |
436 | byte_size * BITS_PER_UNIT - bitlen); | |
3d3e04ac | 437 | } |
a425d9af | 438 | /* Left shifting relies on the last byte being clear if bitlen is |
439 | a multiple of BITS_PER_UNIT, which might not be clear if | |
440 | there are padding bytes. */ | |
441 | else if (!BYTES_BIG_ENDIAN) | |
442 | tmpbuf[byte_size - 1] = '\0'; | |
3d3e04ac | 443 | |
444 | /* Clear the bit region in PTR where the bits from TMPBUF will be | |
b1c71535 | 445 | inserted into. */ |
3d3e04ac | 446 | if (BYTES_BIG_ENDIAN) |
447 | clear_bit_region_be (ptr + first_byte, | |
448 | BITS_PER_UNIT - 1 - (bitpos % BITS_PER_UNIT), bitlen); | |
449 | else | |
450 | clear_bit_region (ptr + first_byte, bitpos % BITS_PER_UNIT, bitlen); | |
451 | ||
452 | int shift_amnt; | |
453 | int bitlen_mod = bitlen % BITS_PER_UNIT; | |
454 | int bitpos_mod = bitpos % BITS_PER_UNIT; | |
455 | ||
456 | bool skip_byte = false; | |
457 | if (BYTES_BIG_ENDIAN) | |
458 | { | |
459 | /* BITPOS and BITLEN are exactly aligned and no shifting | |
460 | is necessary. */ | |
461 | if (bitpos_mod + bitlen_mod == BITS_PER_UNIT | |
462 | || (bitpos_mod == 0 && bitlen_mod == 0)) | |
463 | shift_amnt = 0; | |
464 | /* |. . . . . . . .| | |
465 | <bp > <blen >. | |
466 | We always shift right for BYTES_BIG_ENDIAN so shift the beginning | |
467 | of the value until it aligns with 'bp' in the next byte over. */ | |
468 | else if (bitpos_mod + bitlen_mod < BITS_PER_UNIT) | |
469 | { | |
470 | shift_amnt = bitlen_mod + bitpos_mod; | |
471 | skip_byte = bitlen_mod != 0; | |
472 | } | |
473 | /* |. . . . . . . .| | |
474 | <----bp---> | |
475 | <---blen---->. | |
476 | Shift the value right within the same byte so it aligns with 'bp'. */ | |
477 | else | |
478 | shift_amnt = bitlen_mod + bitpos_mod - BITS_PER_UNIT; | |
479 | } | |
480 | else | |
481 | shift_amnt = bitpos % BITS_PER_UNIT; | |
482 | ||
483 | /* Create the shifted version of EXPR. */ | |
484 | if (!BYTES_BIG_ENDIAN) | |
b1c71535 | 485 | { |
486 | shift_bytes_in_array (tmpbuf, byte_size, shift_amnt); | |
487 | if (shift_amnt == 0) | |
488 | byte_size--; | |
489 | } | |
3d3e04ac | 490 | else |
491 | { | |
492 | gcc_assert (BYTES_BIG_ENDIAN); | |
493 | shift_bytes_in_array_right (tmpbuf, byte_size, shift_amnt); | |
494 | /* If shifting right forced us to move into the next byte skip the now | |
495 | empty byte. */ | |
496 | if (skip_byte) | |
497 | { | |
498 | tmpbuf++; | |
499 | byte_size--; | |
500 | } | |
501 | } | |
502 | ||
503 | /* Insert the bits from TMPBUF. */ | |
504 | for (unsigned int i = 0; i < byte_size; i++) | |
505 | ptr[first_byte + i] |= tmpbuf[i]; | |
506 | ||
507 | return true; | |
508 | } | |
509 | ||
510 | /* Sorting function for store_immediate_info objects. | |
511 | Sorts them by bitposition. */ | |
512 | ||
513 | static int | |
514 | sort_by_bitpos (const void *x, const void *y) | |
515 | { | |
516 | store_immediate_info *const *tmp = (store_immediate_info * const *) x; | |
517 | store_immediate_info *const *tmp2 = (store_immediate_info * const *) y; | |
518 | ||
61d052e5 | 519 | if ((*tmp)->bitpos < (*tmp2)->bitpos) |
3d3e04ac | 520 | return -1; |
521 | else if ((*tmp)->bitpos > (*tmp2)->bitpos) | |
522 | return 1; | |
61d052e5 | 523 | else |
ca4982c2 | 524 | /* If they are the same let's use the order which is guaranteed to |
525 | be different. */ | |
526 | return (*tmp)->order - (*tmp2)->order; | |
3d3e04ac | 527 | } |
528 | ||
529 | /* Sorting function for store_immediate_info objects. | |
530 | Sorts them by the order field. */ | |
531 | ||
532 | static int | |
533 | sort_by_order (const void *x, const void *y) | |
534 | { | |
535 | store_immediate_info *const *tmp = (store_immediate_info * const *) x; | |
536 | store_immediate_info *const *tmp2 = (store_immediate_info * const *) y; | |
537 | ||
538 | if ((*tmp)->order < (*tmp2)->order) | |
539 | return -1; | |
540 | else if ((*tmp)->order > (*tmp2)->order) | |
541 | return 1; | |
542 | ||
543 | gcc_unreachable (); | |
544 | } | |
545 | ||
546 | /* Initialize a merged_store_group object from a store_immediate_info | |
547 | object. */ | |
548 | ||
549 | merged_store_group::merged_store_group (store_immediate_info *info) | |
550 | { | |
551 | start = info->bitpos; | |
552 | width = info->bitsize; | |
553 | /* VAL has memory allocated for it in apply_stores once the group | |
554 | width has been finalized. */ | |
555 | val = NULL; | |
f85e7cb7 | 556 | align = get_object_alignment (gimple_assign_lhs (info->stmt)); |
3d3e04ac | 557 | stores.create (1); |
558 | stores.safe_push (info); | |
559 | last_stmt = info->stmt; | |
560 | last_order = info->order; | |
561 | first_stmt = last_stmt; | |
562 | first_order = last_order; | |
563 | buf_size = 0; | |
564 | } | |
565 | ||
566 | merged_store_group::~merged_store_group () | |
567 | { | |
568 | if (val) | |
569 | XDELETEVEC (val); | |
570 | } | |
571 | ||
572 | /* Merge a store recorded by INFO into this merged store. | |
573 | The store is not overlapping with the existing recorded | |
574 | stores. */ | |
575 | ||
576 | void | |
577 | merged_store_group::merge_into (store_immediate_info *info) | |
578 | { | |
579 | unsigned HOST_WIDE_INT wid = info->bitsize; | |
580 | /* Make sure we're inserting in the position we think we're inserting. */ | |
581 | gcc_assert (info->bitpos == start + width); | |
582 | ||
583 | width += wid; | |
584 | gimple *stmt = info->stmt; | |
585 | stores.safe_push (info); | |
586 | if (info->order > last_order) | |
587 | { | |
588 | last_order = info->order; | |
589 | last_stmt = stmt; | |
590 | } | |
591 | else if (info->order < first_order) | |
592 | { | |
593 | first_order = info->order; | |
594 | first_stmt = stmt; | |
595 | } | |
596 | } | |
597 | ||
598 | /* Merge a store described by INFO into this merged store. | |
599 | INFO overlaps in some way with the current store (i.e. it's not contiguous | |
600 | which is handled by merged_store_group::merge_into). */ | |
601 | ||
602 | void | |
603 | merged_store_group::merge_overlapping (store_immediate_info *info) | |
604 | { | |
605 | gimple *stmt = info->stmt; | |
606 | stores.safe_push (info); | |
607 | ||
608 | /* If the store extends the size of the group, extend the width. */ | |
609 | if ((info->bitpos + info->bitsize) > (start + width)) | |
610 | width += info->bitpos + info->bitsize - (start + width); | |
611 | ||
612 | if (info->order > last_order) | |
613 | { | |
614 | last_order = info->order; | |
615 | last_stmt = stmt; | |
616 | } | |
617 | else if (info->order < first_order) | |
618 | { | |
619 | first_order = info->order; | |
620 | first_stmt = stmt; | |
621 | } | |
622 | } | |
623 | ||
624 | /* Go through all the recorded stores in this group in program order and | |
625 | apply their values to the VAL byte array to create the final merged | |
626 | value. Return true if the operation succeeded. */ | |
627 | ||
628 | bool | |
629 | merged_store_group::apply_stores () | |
630 | { | |
631 | /* The total width of the stores must add up to a whole number of bytes | |
632 | and start at a byte boundary. We don't support emitting bitfield | |
633 | references for now. Also, make sure we have more than one store | |
634 | in the group, otherwise we cannot merge anything. */ | |
635 | if (width % BITS_PER_UNIT != 0 | |
636 | || start % BITS_PER_UNIT != 0 | |
637 | || stores.length () == 1) | |
638 | return false; | |
639 | ||
640 | stores.qsort (sort_by_order); | |
641 | struct store_immediate_info *info; | |
642 | unsigned int i; | |
643 | /* Create a buffer of a size that is 2 times the number of bytes we're | |
644 | storing. That way native_encode_expr can write power-of-2-sized | |
645 | chunks without overrunning. */ | |
b1c71535 | 646 | buf_size = 2 * (ROUND_UP (width, BITS_PER_UNIT) / BITS_PER_UNIT); |
3d3e04ac | 647 | val = XCNEWVEC (unsigned char, buf_size); |
648 | ||
649 | FOR_EACH_VEC_ELT (stores, i, info) | |
650 | { | |
651 | unsigned int pos_in_buffer = info->bitpos - start; | |
f85e7cb7 | 652 | bool ret = encode_tree_to_bitpos (gimple_assign_rhs1 (info->stmt), |
653 | val, info->bitsize, | |
654 | pos_in_buffer, buf_size); | |
3d3e04ac | 655 | if (dump_file && (dump_flags & TDF_DETAILS)) |
656 | { | |
657 | if (ret) | |
658 | { | |
659 | fprintf (dump_file, "After writing "); | |
f85e7cb7 | 660 | print_generic_expr (dump_file, |
661 | gimple_assign_rhs1 (info->stmt), 0); | |
3d3e04ac | 662 | fprintf (dump_file, " of size " HOST_WIDE_INT_PRINT_DEC |
663 | " at position %d the merged region contains:\n", | |
664 | info->bitsize, pos_in_buffer); | |
665 | dump_char_array (dump_file, val, buf_size); | |
666 | } | |
667 | else | |
668 | fprintf (dump_file, "Failed to merge stores\n"); | |
669 | } | |
670 | if (!ret) | |
671 | return false; | |
672 | } | |
673 | return true; | |
674 | } | |
675 | ||
676 | /* Structure describing the store chain. */ | |
677 | ||
678 | struct imm_store_chain_info | |
679 | { | |
3a3ba7de | 680 | /* Doubly-linked list that imposes an order on chain processing. |
681 | PNXP (prev's next pointer) points to the head of a list, or to | |
682 | the next field in the previous chain in the list. | |
683 | See pass_store_merging::m_stores_head for more rationale. */ | |
684 | imm_store_chain_info *next, **pnxp; | |
f85e7cb7 | 685 | tree base_addr; |
3d3e04ac | 686 | auto_vec<struct store_immediate_info *> m_store_info; |
687 | auto_vec<merged_store_group *> m_merged_store_groups; | |
688 | ||
3a3ba7de | 689 | imm_store_chain_info (imm_store_chain_info *&inspt, tree b_a) |
690 | : next (inspt), pnxp (&inspt), base_addr (b_a) | |
691 | { | |
692 | inspt = this; | |
693 | if (next) | |
694 | { | |
695 | gcc_checking_assert (pnxp == next->pnxp); | |
696 | next->pnxp = &next; | |
697 | } | |
698 | } | |
699 | ~imm_store_chain_info () | |
700 | { | |
701 | *pnxp = next; | |
702 | if (next) | |
703 | { | |
704 | gcc_checking_assert (&next == next->pnxp); | |
705 | next->pnxp = pnxp; | |
706 | } | |
707 | } | |
f85e7cb7 | 708 | bool terminate_and_process_chain (); |
3d3e04ac | 709 | bool coalesce_immediate_stores (); |
f85e7cb7 | 710 | bool output_merged_store (merged_store_group *); |
711 | bool output_merged_stores (); | |
3d3e04ac | 712 | }; |
713 | ||
714 | const pass_data pass_data_tree_store_merging = { | |
715 | GIMPLE_PASS, /* type */ | |
716 | "store-merging", /* name */ | |
717 | OPTGROUP_NONE, /* optinfo_flags */ | |
718 | TV_GIMPLE_STORE_MERGING, /* tv_id */ | |
719 | PROP_ssa, /* properties_required */ | |
720 | 0, /* properties_provided */ | |
721 | 0, /* properties_destroyed */ | |
722 | 0, /* todo_flags_start */ | |
723 | TODO_update_ssa, /* todo_flags_finish */ | |
724 | }; | |
725 | ||
726 | class pass_store_merging : public gimple_opt_pass | |
727 | { | |
728 | public: | |
729 | pass_store_merging (gcc::context *ctxt) | |
2d27e5c1 | 730 | : gimple_opt_pass (pass_data_tree_store_merging, ctxt), m_stores_head () |
3d3e04ac | 731 | { |
732 | } | |
733 | ||
734 | /* Pass not supported for PDP-endianness. */ | |
735 | virtual bool | |
736 | gate (function *) | |
737 | { | |
738 | return flag_store_merging && (WORDS_BIG_ENDIAN == BYTES_BIG_ENDIAN); | |
739 | } | |
740 | ||
741 | virtual unsigned int execute (function *); | |
742 | ||
743 | private: | |
744 | hash_map<tree_operand_hash, struct imm_store_chain_info *> m_stores; | |
745 | ||
3a3ba7de | 746 | /* Form a doubly-linked stack of the elements of m_stores, so that |
747 | we can iterate over them in a predictable way. Using this order | |
748 | avoids extraneous differences in the compiler output just because | |
749 | of tree pointer variations (e.g. different chains end up in | |
750 | different positions of m_stores, so they are handled in different | |
751 | orders, so they allocate or release SSA names in different | |
752 | orders, and when they get reused, subsequent passes end up | |
753 | getting different SSA names, which may ultimately change | |
754 | decisions when going out of SSA). */ | |
755 | imm_store_chain_info *m_stores_head; | |
756 | ||
3d3e04ac | 757 | bool terminate_and_process_all_chains (); |
4de7f8df | 758 | bool terminate_all_aliasing_chains (imm_store_chain_info **, |
f85e7cb7 | 759 | bool, gimple *); |
760 | bool terminate_and_release_chain (imm_store_chain_info *); | |
3d3e04ac | 761 | }; // class pass_store_merging |
762 | ||
763 | /* Terminate and process all recorded chains. Return true if any changes | |
764 | were made. */ | |
765 | ||
766 | bool | |
767 | pass_store_merging::terminate_and_process_all_chains () | |
768 | { | |
3d3e04ac | 769 | bool ret = false; |
3a3ba7de | 770 | while (m_stores_head) |
771 | ret |= terminate_and_release_chain (m_stores_head); | |
772 | gcc_assert (m_stores.elements () == 0); | |
773 | gcc_assert (m_stores_head == NULL); | |
3d3e04ac | 774 | |
775 | return ret; | |
776 | } | |
777 | ||
778 | /* Terminate all chains that are affected by the assignment to DEST, appearing | |
779 | in statement STMT and ultimately points to the object BASE. Return true if | |
780 | at least one aliasing chain was terminated. BASE and DEST are allowed to | |
781 | be NULL_TREE. In that case the aliasing checks are performed on the whole | |
782 | statement rather than a particular operand in it. VAR_OFFSET_P signifies | |
783 | whether STMT represents a store to BASE offset by a variable amount. | |
784 | If that is the case we have to terminate any chain anchored at BASE. */ | |
785 | ||
786 | bool | |
4de7f8df | 787 | pass_store_merging::terminate_all_aliasing_chains (imm_store_chain_info |
f85e7cb7 | 788 | **chain_info, |
3d3e04ac | 789 | bool var_offset_p, |
790 | gimple *stmt) | |
791 | { | |
792 | bool ret = false; | |
793 | ||
794 | /* If the statement doesn't touch memory it can't alias. */ | |
795 | if (!gimple_vuse (stmt)) | |
796 | return false; | |
797 | ||
3d3e04ac | 798 | /* Check if the assignment destination (BASE) is part of a store chain. |
799 | This is to catch non-constant stores to destinations that may be part | |
800 | of a chain. */ | |
f85e7cb7 | 801 | if (chain_info) |
3d3e04ac | 802 | { |
f85e7cb7 | 803 | /* We have a chain at BASE and we're writing to [BASE + <variable>]. |
804 | This can interfere with any of the stores so terminate | |
805 | the chain. */ | |
806 | if (var_offset_p) | |
3d3e04ac | 807 | { |
f85e7cb7 | 808 | terminate_and_release_chain (*chain_info); |
809 | ret = true; | |
810 | } | |
811 | /* Otherwise go through every store in the chain to see if it | |
812 | aliases with any of them. */ | |
813 | else | |
814 | { | |
815 | struct store_immediate_info *info; | |
816 | unsigned int i; | |
817 | FOR_EACH_VEC_ELT ((*chain_info)->m_store_info, i, info) | |
3d3e04ac | 818 | { |
4de7f8df | 819 | if (ref_maybe_used_by_stmt_p (stmt, |
820 | gimple_assign_lhs (info->stmt)) | |
821 | || stmt_may_clobber_ref_p (stmt, | |
822 | gimple_assign_lhs (info->stmt))) | |
3d3e04ac | 823 | { |
f85e7cb7 | 824 | if (dump_file && (dump_flags & TDF_DETAILS)) |
3d3e04ac | 825 | { |
f85e7cb7 | 826 | fprintf (dump_file, |
827 | "stmt causes chain termination:\n"); | |
1ffa4346 | 828 | print_gimple_stmt (dump_file, stmt, 0); |
3d3e04ac | 829 | } |
f85e7cb7 | 830 | terminate_and_release_chain (*chain_info); |
831 | ret = true; | |
832 | break; | |
3d3e04ac | 833 | } |
834 | } | |
835 | } | |
836 | } | |
837 | ||
3d3e04ac | 838 | /* Check for aliasing with all other store chains. */ |
3a3ba7de | 839 | for (imm_store_chain_info *next = m_stores_head, *cur = next; cur; cur = next) |
3d3e04ac | 840 | { |
3a3ba7de | 841 | next = cur->next; |
842 | ||
3d3e04ac | 843 | /* We already checked all the stores in chain_info and terminated the |
844 | chain if necessary. Skip it here. */ | |
3a3ba7de | 845 | if (chain_info && (*chain_info) == cur) |
3d3e04ac | 846 | continue; |
847 | ||
f85e7cb7 | 848 | /* We can't use the base object here as that does not reliably exist. |
849 | Build a ao_ref from the base object address (if we know the | |
850 | minimum and maximum offset and the maximum size we could improve | |
851 | things here). */ | |
852 | ao_ref chain_ref; | |
3a3ba7de | 853 | ao_ref_init_from_ptr_and_size (&chain_ref, cur->base_addr, NULL_TREE); |
f85e7cb7 | 854 | if (ref_maybe_used_by_stmt_p (stmt, &chain_ref) |
855 | || stmt_may_clobber_ref_p_1 (stmt, &chain_ref)) | |
3d3e04ac | 856 | { |
3a3ba7de | 857 | terminate_and_release_chain (cur); |
3d3e04ac | 858 | ret = true; |
859 | } | |
860 | } | |
861 | ||
862 | return ret; | |
863 | } | |
864 | ||
865 | /* Helper function. Terminate the recorded chain storing to base object | |
866 | BASE. Return true if the merging and output was successful. The m_stores | |
867 | entry is removed after the processing in any case. */ | |
868 | ||
869 | bool | |
f85e7cb7 | 870 | pass_store_merging::terminate_and_release_chain (imm_store_chain_info *chain_info) |
3d3e04ac | 871 | { |
f85e7cb7 | 872 | bool ret = chain_info->terminate_and_process_chain (); |
873 | m_stores.remove (chain_info->base_addr); | |
874 | delete chain_info; | |
3d3e04ac | 875 | return ret; |
876 | } | |
877 | ||
878 | /* Go through the candidate stores recorded in m_store_info and merge them | |
879 | into merged_store_group objects recorded into m_merged_store_groups | |
880 | representing the widened stores. Return true if coalescing was successful | |
881 | and the number of widened stores is fewer than the original number | |
882 | of stores. */ | |
883 | ||
884 | bool | |
885 | imm_store_chain_info::coalesce_immediate_stores () | |
886 | { | |
887 | /* Anything less can't be processed. */ | |
888 | if (m_store_info.length () < 2) | |
889 | return false; | |
890 | ||
891 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
892 | fprintf (dump_file, "Attempting to coalesce %u stores in chain.\n", | |
893 | m_store_info.length ()); | |
894 | ||
895 | store_immediate_info *info; | |
896 | unsigned int i; | |
897 | ||
898 | /* Order the stores by the bitposition they write to. */ | |
899 | m_store_info.qsort (sort_by_bitpos); | |
900 | ||
901 | info = m_store_info[0]; | |
902 | merged_store_group *merged_store = new merged_store_group (info); | |
903 | ||
904 | FOR_EACH_VEC_ELT (m_store_info, i, info) | |
905 | { | |
906 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
907 | { | |
908 | fprintf (dump_file, "Store %u:\nbitsize:" HOST_WIDE_INT_PRINT_DEC | |
909 | " bitpos:" HOST_WIDE_INT_PRINT_DEC " val:\n", | |
910 | i, info->bitsize, info->bitpos); | |
1ffa4346 | 911 | print_generic_expr (dump_file, gimple_assign_rhs1 (info->stmt)); |
3d3e04ac | 912 | fprintf (dump_file, "\n------------\n"); |
913 | } | |
914 | ||
915 | if (i == 0) | |
916 | continue; | |
917 | ||
918 | /* |---store 1---| | |
919 | |---store 2---| | |
920 | Overlapping stores. */ | |
921 | unsigned HOST_WIDE_INT start = info->bitpos; | |
922 | if (IN_RANGE (start, merged_store->start, | |
923 | merged_store->start + merged_store->width - 1)) | |
924 | { | |
925 | merged_store->merge_overlapping (info); | |
926 | continue; | |
927 | } | |
928 | ||
929 | /* |---store 1---| <gap> |---store 2---|. | |
930 | Gap between stores. Start a new group. */ | |
931 | if (start != merged_store->start + merged_store->width) | |
932 | { | |
933 | /* Try to apply all the stores recorded for the group to determine | |
934 | the bitpattern they write and discard it if that fails. | |
935 | This will also reject single-store groups. */ | |
936 | if (!merged_store->apply_stores ()) | |
937 | delete merged_store; | |
938 | else | |
939 | m_merged_store_groups.safe_push (merged_store); | |
940 | ||
941 | merged_store = new merged_store_group (info); | |
942 | ||
943 | continue; | |
944 | } | |
945 | ||
946 | /* |---store 1---||---store 2---| | |
947 | This store is consecutive to the previous one. | |
948 | Merge it into the current store group. */ | |
949 | merged_store->merge_into (info); | |
950 | } | |
951 | ||
952 | /* Record or discard the last store group. */ | |
953 | if (!merged_store->apply_stores ()) | |
954 | delete merged_store; | |
955 | else | |
956 | m_merged_store_groups.safe_push (merged_store); | |
957 | ||
958 | gcc_assert (m_merged_store_groups.length () <= m_store_info.length ()); | |
959 | bool success | |
960 | = !m_merged_store_groups.is_empty () | |
961 | && m_merged_store_groups.length () < m_store_info.length (); | |
962 | ||
963 | if (success && dump_file) | |
964 | fprintf (dump_file, "Coalescing successful!\n" | |
965 | "Merged into %u stores\n", | |
966 | m_merged_store_groups.length ()); | |
967 | ||
968 | return success; | |
969 | } | |
970 | ||
971 | /* Return the type to use for the merged stores described by STMTS. | |
972 | This is needed to get the alias sets right. */ | |
973 | ||
974 | static tree | |
975 | get_alias_type_for_stmts (auto_vec<gimple *> &stmts) | |
976 | { | |
977 | gimple *stmt; | |
978 | unsigned int i; | |
979 | tree lhs = gimple_assign_lhs (stmts[0]); | |
980 | tree type = reference_alias_ptr_type (lhs); | |
981 | ||
982 | FOR_EACH_VEC_ELT (stmts, i, stmt) | |
983 | { | |
984 | if (i == 0) | |
985 | continue; | |
986 | ||
987 | lhs = gimple_assign_lhs (stmt); | |
988 | tree type1 = reference_alias_ptr_type (lhs); | |
989 | if (!alias_ptr_types_compatible_p (type, type1)) | |
990 | return ptr_type_node; | |
991 | } | |
992 | return type; | |
993 | } | |
994 | ||
995 | /* Return the location_t information we can find among the statements | |
996 | in STMTS. */ | |
997 | ||
998 | static location_t | |
999 | get_location_for_stmts (auto_vec<gimple *> &stmts) | |
1000 | { | |
1001 | gimple *stmt; | |
1002 | unsigned int i; | |
1003 | ||
1004 | FOR_EACH_VEC_ELT (stmts, i, stmt) | |
1005 | if (gimple_has_location (stmt)) | |
1006 | return gimple_location (stmt); | |
1007 | ||
1008 | return UNKNOWN_LOCATION; | |
1009 | } | |
1010 | ||
1011 | /* Used to decribe a store resulting from splitting a wide store in smaller | |
1012 | regularly-sized stores in split_group. */ | |
1013 | ||
1014 | struct split_store | |
1015 | { | |
1016 | unsigned HOST_WIDE_INT bytepos; | |
1017 | unsigned HOST_WIDE_INT size; | |
1018 | unsigned HOST_WIDE_INT align; | |
1019 | auto_vec<gimple *> orig_stmts; | |
1020 | split_store (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT, | |
1021 | unsigned HOST_WIDE_INT); | |
1022 | }; | |
1023 | ||
1024 | /* Simple constructor. */ | |
1025 | ||
1026 | split_store::split_store (unsigned HOST_WIDE_INT bp, | |
1027 | unsigned HOST_WIDE_INT sz, | |
1028 | unsigned HOST_WIDE_INT al) | |
1029 | : bytepos (bp), size (sz), align (al) | |
1030 | { | |
1031 | orig_stmts.create (0); | |
1032 | } | |
1033 | ||
1034 | /* Record all statements corresponding to stores in GROUP that write to | |
1035 | the region starting at BITPOS and is of size BITSIZE. Record such | |
1036 | statements in STMTS. The stores in GROUP must be sorted by | |
1037 | bitposition. */ | |
1038 | ||
1039 | static void | |
1040 | find_constituent_stmts (struct merged_store_group *group, | |
1041 | auto_vec<gimple *> &stmts, | |
1042 | unsigned HOST_WIDE_INT bitpos, | |
1043 | unsigned HOST_WIDE_INT bitsize) | |
1044 | { | |
1045 | struct store_immediate_info *info; | |
1046 | unsigned int i; | |
1047 | unsigned HOST_WIDE_INT end = bitpos + bitsize; | |
1048 | FOR_EACH_VEC_ELT (group->stores, i, info) | |
1049 | { | |
1050 | unsigned HOST_WIDE_INT stmt_start = info->bitpos; | |
1051 | unsigned HOST_WIDE_INT stmt_end = stmt_start + info->bitsize; | |
1052 | if (stmt_end < bitpos) | |
1053 | continue; | |
1054 | /* The stores in GROUP are ordered by bitposition so if we're past | |
1055 | the region for this group return early. */ | |
1056 | if (stmt_start > end) | |
1057 | return; | |
1058 | ||
1059 | if (IN_RANGE (stmt_start, bitpos, bitpos + bitsize) | |
1060 | || IN_RANGE (stmt_end, bitpos, end) | |
1061 | /* The statement writes a region that completely encloses the region | |
1062 | that this group writes. Unlikely to occur but let's | |
1063 | handle it. */ | |
1064 | || IN_RANGE (bitpos, stmt_start, stmt_end)) | |
1065 | stmts.safe_push (info->stmt); | |
1066 | } | |
1067 | } | |
1068 | ||
1069 | /* Split a merged store described by GROUP by populating the SPLIT_STORES | |
1070 | vector with split_store structs describing the byte offset (from the base), | |
1071 | the bit size and alignment of each store as well as the original statements | |
1072 | involved in each such split group. | |
1073 | This is to separate the splitting strategy from the statement | |
1074 | building/emission/linking done in output_merged_store. | |
1075 | At the moment just start with the widest possible size and keep emitting | |
1076 | the widest we can until we have emitted all the bytes, halving the size | |
1077 | when appropriate. */ | |
1078 | ||
1079 | static bool | |
1080 | split_group (merged_store_group *group, | |
1081 | auto_vec<struct split_store *> &split_stores) | |
1082 | { | |
1083 | unsigned HOST_WIDE_INT pos = group->start; | |
1084 | unsigned HOST_WIDE_INT size = group->width; | |
1085 | unsigned HOST_WIDE_INT bytepos = pos / BITS_PER_UNIT; | |
1086 | unsigned HOST_WIDE_INT align = group->align; | |
1087 | ||
1088 | /* We don't handle partial bitfields for now. We shouldn't have | |
1089 | reached this far. */ | |
1090 | gcc_assert ((size % BITS_PER_UNIT == 0) && (pos % BITS_PER_UNIT == 0)); | |
1091 | ||
1092 | bool allow_unaligned | |
1093 | = !STRICT_ALIGNMENT && PARAM_VALUE (PARAM_STORE_MERGING_ALLOW_UNALIGNED); | |
1094 | ||
1095 | unsigned int try_size = MAX_STORE_BITSIZE; | |
1096 | while (try_size > size | |
1097 | || (!allow_unaligned | |
1098 | && try_size > align)) | |
1099 | { | |
1100 | try_size /= 2; | |
1101 | if (try_size < BITS_PER_UNIT) | |
1102 | return false; | |
1103 | } | |
1104 | ||
1105 | unsigned HOST_WIDE_INT try_pos = bytepos; | |
1106 | group->stores.qsort (sort_by_bitpos); | |
1107 | ||
1108 | while (size > 0) | |
1109 | { | |
1110 | struct split_store *store = new split_store (try_pos, try_size, align); | |
1111 | unsigned HOST_WIDE_INT try_bitpos = try_pos * BITS_PER_UNIT; | |
1112 | find_constituent_stmts (group, store->orig_stmts, try_bitpos, try_size); | |
1113 | split_stores.safe_push (store); | |
1114 | ||
1115 | try_pos += try_size / BITS_PER_UNIT; | |
1116 | ||
1117 | size -= try_size; | |
1118 | align = try_size; | |
1119 | while (size < try_size) | |
1120 | try_size /= 2; | |
1121 | } | |
1122 | return true; | |
1123 | } | |
1124 | ||
1125 | /* Given a merged store group GROUP output the widened version of it. | |
1126 | The store chain is against the base object BASE. | |
1127 | Try store sizes of at most MAX_STORE_BITSIZE bits wide and don't output | |
1128 | unaligned stores for STRICT_ALIGNMENT targets or if it's too expensive. | |
1129 | Make sure that the number of statements output is less than the number of | |
1130 | original statements. If a better sequence is possible emit it and | |
1131 | return true. */ | |
1132 | ||
1133 | bool | |
f85e7cb7 | 1134 | imm_store_chain_info::output_merged_store (merged_store_group *group) |
3d3e04ac | 1135 | { |
1136 | unsigned HOST_WIDE_INT start_byte_pos = group->start / BITS_PER_UNIT; | |
1137 | ||
1138 | unsigned int orig_num_stmts = group->stores.length (); | |
1139 | if (orig_num_stmts < 2) | |
1140 | return false; | |
1141 | ||
1142 | auto_vec<struct split_store *> split_stores; | |
1143 | split_stores.create (0); | |
1144 | if (!split_group (group, split_stores)) | |
1145 | return false; | |
1146 | ||
1147 | gimple_stmt_iterator last_gsi = gsi_for_stmt (group->last_stmt); | |
1148 | gimple_seq seq = NULL; | |
1149 | unsigned int num_stmts = 0; | |
1150 | tree last_vdef, new_vuse; | |
1151 | last_vdef = gimple_vdef (group->last_stmt); | |
1152 | new_vuse = gimple_vuse (group->last_stmt); | |
1153 | ||
1154 | gimple *stmt = NULL; | |
1155 | /* The new SSA names created. Keep track of them so that we can free them | |
1156 | if we decide to not use the new sequence. */ | |
1157 | auto_vec<tree> new_ssa_names; | |
1158 | split_store *split_store; | |
1159 | unsigned int i; | |
1160 | bool fail = false; | |
1161 | ||
427223f1 | 1162 | tree addr = force_gimple_operand_1 (unshare_expr (base_addr), &seq, |
1163 | is_gimple_mem_ref_addr, NULL_TREE); | |
3d3e04ac | 1164 | FOR_EACH_VEC_ELT (split_stores, i, split_store) |
1165 | { | |
1166 | unsigned HOST_WIDE_INT try_size = split_store->size; | |
1167 | unsigned HOST_WIDE_INT try_pos = split_store->bytepos; | |
1168 | unsigned HOST_WIDE_INT align = split_store->align; | |
1169 | tree offset_type = get_alias_type_for_stmts (split_store->orig_stmts); | |
1170 | location_t loc = get_location_for_stmts (split_store->orig_stmts); | |
1171 | ||
1172 | tree int_type = build_nonstandard_integer_type (try_size, UNSIGNED); | |
f6443ac9 | 1173 | int_type = build_aligned_type (int_type, align); |
427223f1 | 1174 | tree dest = fold_build2 (MEM_REF, int_type, addr, |
3d3e04ac | 1175 | build_int_cst (offset_type, try_pos)); |
1176 | ||
1177 | tree src = native_interpret_expr (int_type, | |
1178 | group->val + try_pos - start_byte_pos, | |
1179 | group->buf_size); | |
1180 | ||
1181 | stmt = gimple_build_assign (dest, src); | |
1182 | gimple_set_location (stmt, loc); | |
1183 | gimple_set_vuse (stmt, new_vuse); | |
1184 | gimple_seq_add_stmt_without_update (&seq, stmt); | |
1185 | ||
1186 | /* We didn't manage to reduce the number of statements. Bail out. */ | |
1187 | if (++num_stmts == orig_num_stmts) | |
1188 | { | |
1189 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1190 | { | |
1191 | fprintf (dump_file, "Exceeded original number of stmts (%u)." | |
1192 | " Not profitable to emit new sequence.\n", | |
1193 | orig_num_stmts); | |
1194 | } | |
1195 | unsigned int ssa_count; | |
1196 | tree ssa_name; | |
1197 | /* Don't forget to cleanup the temporary SSA names. */ | |
1198 | FOR_EACH_VEC_ELT (new_ssa_names, ssa_count, ssa_name) | |
1199 | release_ssa_name (ssa_name); | |
1200 | ||
1201 | fail = true; | |
1202 | break; | |
1203 | } | |
1204 | ||
1205 | tree new_vdef; | |
1206 | if (i < split_stores.length () - 1) | |
1207 | { | |
1208 | new_vdef = make_ssa_name (gimple_vop (cfun), stmt); | |
1209 | new_ssa_names.safe_push (new_vdef); | |
1210 | } | |
1211 | else | |
1212 | new_vdef = last_vdef; | |
1213 | ||
1214 | gimple_set_vdef (stmt, new_vdef); | |
1215 | SSA_NAME_DEF_STMT (new_vdef) = stmt; | |
1216 | new_vuse = new_vdef; | |
1217 | } | |
1218 | ||
1219 | FOR_EACH_VEC_ELT (split_stores, i, split_store) | |
1220 | delete split_store; | |
1221 | ||
1222 | if (fail) | |
1223 | return false; | |
1224 | ||
1225 | gcc_assert (seq); | |
1226 | if (dump_file) | |
1227 | { | |
1228 | fprintf (dump_file, | |
1229 | "New sequence of %u stmts to replace old one of %u stmts\n", | |
1230 | num_stmts, orig_num_stmts); | |
1231 | if (dump_flags & TDF_DETAILS) | |
1232 | print_gimple_seq (dump_file, seq, 0, TDF_VOPS | TDF_MEMSYMS); | |
1233 | } | |
1234 | gsi_insert_seq_after (&last_gsi, seq, GSI_SAME_STMT); | |
1235 | ||
1236 | return true; | |
1237 | } | |
1238 | ||
1239 | /* Process the merged_store_group objects created in the coalescing phase. | |
1240 | The stores are all against the base object BASE. | |
1241 | Try to output the widened stores and delete the original statements if | |
1242 | successful. Return true iff any changes were made. */ | |
1243 | ||
1244 | bool | |
f85e7cb7 | 1245 | imm_store_chain_info::output_merged_stores () |
3d3e04ac | 1246 | { |
1247 | unsigned int i; | |
1248 | merged_store_group *merged_store; | |
1249 | bool ret = false; | |
1250 | FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_store) | |
1251 | { | |
f85e7cb7 | 1252 | if (output_merged_store (merged_store)) |
3d3e04ac | 1253 | { |
1254 | unsigned int j; | |
1255 | store_immediate_info *store; | |
1256 | FOR_EACH_VEC_ELT (merged_store->stores, j, store) | |
1257 | { | |
1258 | gimple *stmt = store->stmt; | |
1259 | gimple_stmt_iterator gsi = gsi_for_stmt (stmt); | |
1260 | gsi_remove (&gsi, true); | |
1261 | if (stmt != merged_store->last_stmt) | |
1262 | { | |
1263 | unlink_stmt_vdef (stmt); | |
1264 | release_defs (stmt); | |
1265 | } | |
1266 | } | |
1267 | ret = true; | |
1268 | } | |
1269 | } | |
1270 | if (ret && dump_file) | |
1271 | fprintf (dump_file, "Merging successful!\n"); | |
1272 | ||
1273 | return ret; | |
1274 | } | |
1275 | ||
1276 | /* Coalesce the store_immediate_info objects recorded against the base object | |
1277 | BASE in the first phase and output them. | |
1278 | Delete the allocated structures. | |
1279 | Return true if any changes were made. */ | |
1280 | ||
1281 | bool | |
f85e7cb7 | 1282 | imm_store_chain_info::terminate_and_process_chain () |
3d3e04ac | 1283 | { |
1284 | /* Process store chain. */ | |
1285 | bool ret = false; | |
1286 | if (m_store_info.length () > 1) | |
1287 | { | |
1288 | ret = coalesce_immediate_stores (); | |
1289 | if (ret) | |
f85e7cb7 | 1290 | ret = output_merged_stores (); |
3d3e04ac | 1291 | } |
1292 | ||
1293 | /* Delete all the entries we allocated ourselves. */ | |
1294 | store_immediate_info *info; | |
1295 | unsigned int i; | |
1296 | FOR_EACH_VEC_ELT (m_store_info, i, info) | |
1297 | delete info; | |
1298 | ||
1299 | merged_store_group *merged_info; | |
1300 | FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_info) | |
1301 | delete merged_info; | |
1302 | ||
1303 | return ret; | |
1304 | } | |
1305 | ||
1306 | /* Return true iff LHS is a destination potentially interesting for | |
1307 | store merging. In practice these are the codes that get_inner_reference | |
1308 | can process. */ | |
1309 | ||
1310 | static bool | |
1311 | lhs_valid_for_store_merging_p (tree lhs) | |
1312 | { | |
1313 | tree_code code = TREE_CODE (lhs); | |
1314 | ||
1315 | if (code == ARRAY_REF || code == ARRAY_RANGE_REF || code == MEM_REF | |
1316 | || code == COMPONENT_REF || code == BIT_FIELD_REF) | |
1317 | return true; | |
1318 | ||
1319 | return false; | |
1320 | } | |
1321 | ||
1322 | /* Return true if the tree RHS is a constant we want to consider | |
1323 | during store merging. In practice accept all codes that | |
1324 | native_encode_expr accepts. */ | |
1325 | ||
1326 | static bool | |
1327 | rhs_valid_for_store_merging_p (tree rhs) | |
1328 | { | |
1329 | tree type = TREE_TYPE (rhs); | |
1330 | if (TREE_CODE_CLASS (TREE_CODE (rhs)) != tcc_constant | |
1331 | || !can_native_encode_type_p (type)) | |
1332 | return false; | |
1333 | ||
1334 | return true; | |
1335 | } | |
1336 | ||
1337 | /* Entry point for the pass. Go over each basic block recording chains of | |
1338 | immediate stores. Upon encountering a terminating statement (as defined | |
1339 | by stmt_terminates_chain_p) process the recorded stores and emit the widened | |
1340 | variants. */ | |
1341 | ||
1342 | unsigned int | |
1343 | pass_store_merging::execute (function *fun) | |
1344 | { | |
1345 | basic_block bb; | |
1346 | hash_set<gimple *> orig_stmts; | |
1347 | ||
1348 | FOR_EACH_BB_FN (bb, fun) | |
1349 | { | |
1350 | gimple_stmt_iterator gsi; | |
1351 | unsigned HOST_WIDE_INT num_statements = 0; | |
1352 | /* Record the original statements so that we can keep track of | |
1353 | statements emitted in this pass and not re-process new | |
1354 | statements. */ | |
1355 | for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
1356 | { | |
1357 | if (is_gimple_debug (gsi_stmt (gsi))) | |
1358 | continue; | |
1359 | ||
1360 | if (++num_statements > 2) | |
1361 | break; | |
1362 | } | |
1363 | ||
1364 | if (num_statements < 2) | |
1365 | continue; | |
1366 | ||
1367 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1368 | fprintf (dump_file, "Processing basic block <%d>:\n", bb->index); | |
1369 | ||
1370 | for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
1371 | { | |
1372 | gimple *stmt = gsi_stmt (gsi); | |
1373 | ||
3a3ba7de | 1374 | if (is_gimple_debug (stmt)) |
1375 | continue; | |
1376 | ||
3d3e04ac | 1377 | if (gimple_has_volatile_ops (stmt)) |
1378 | { | |
1379 | /* Terminate all chains. */ | |
1380 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1381 | fprintf (dump_file, "Volatile access terminates " | |
1382 | "all chains\n"); | |
1383 | terminate_and_process_all_chains (); | |
1384 | continue; | |
1385 | } | |
1386 | ||
3d3e04ac | 1387 | if (gimple_assign_single_p (stmt) && gimple_vdef (stmt) |
1388 | && !stmt_can_throw_internal (stmt) | |
1389 | && lhs_valid_for_store_merging_p (gimple_assign_lhs (stmt))) | |
1390 | { | |
1391 | tree lhs = gimple_assign_lhs (stmt); | |
1392 | tree rhs = gimple_assign_rhs1 (stmt); | |
1393 | ||
1394 | HOST_WIDE_INT bitsize, bitpos; | |
1395 | machine_mode mode; | |
1396 | int unsignedp = 0, reversep = 0, volatilep = 0; | |
1397 | tree offset, base_addr; | |
1398 | base_addr | |
1399 | = get_inner_reference (lhs, &bitsize, &bitpos, &offset, &mode, | |
1400 | &unsignedp, &reversep, &volatilep); | |
1401 | /* As a future enhancement we could handle stores with the same | |
1402 | base and offset. */ | |
427223f1 | 1403 | bool invalid = reversep |
3d3e04ac | 1404 | || ((bitsize > MAX_BITSIZE_MODE_ANY_INT) |
1405 | && (TREE_CODE (rhs) != INTEGER_CST)) | |
427223f1 | 1406 | || !rhs_valid_for_store_merging_p (rhs); |
3d3e04ac | 1407 | |
f85e7cb7 | 1408 | /* We do not want to rewrite TARGET_MEM_REFs. */ |
1409 | if (TREE_CODE (base_addr) == TARGET_MEM_REF) | |
1410 | invalid = true; | |
3d3e04ac | 1411 | /* In some cases get_inner_reference may return a |
1412 | MEM_REF [ptr + byteoffset]. For the purposes of this pass | |
1413 | canonicalize the base_addr to MEM_REF [ptr] and take | |
1414 | byteoffset into account in the bitpos. This occurs in | |
1415 | PR 23684 and this way we can catch more chains. */ | |
f85e7cb7 | 1416 | else if (TREE_CODE (base_addr) == MEM_REF) |
3d3e04ac | 1417 | { |
1418 | offset_int bit_off, byte_off = mem_ref_offset (base_addr); | |
1419 | bit_off = byte_off << LOG2_BITS_PER_UNIT; | |
1420 | bit_off += bitpos; | |
1421 | if (!wi::neg_p (bit_off) && wi::fits_shwi_p (bit_off)) | |
f85e7cb7 | 1422 | bitpos = bit_off.to_shwi (); |
3d3e04ac | 1423 | else |
1424 | invalid = true; | |
f85e7cb7 | 1425 | base_addr = TREE_OPERAND (base_addr, 0); |
3d3e04ac | 1426 | } |
f85e7cb7 | 1427 | /* get_inner_reference returns the base object, get at its |
1428 | address now. */ | |
1429 | else | |
427223f1 | 1430 | { |
1431 | if (bitpos < 0) | |
1432 | invalid = true; | |
1433 | base_addr = build_fold_addr_expr (base_addr); | |
1434 | } | |
1435 | ||
1436 | if (! invalid | |
1437 | && offset != NULL_TREE) | |
1438 | { | |
1439 | /* If the access is variable offset then a base | |
1440 | decl has to be address-taken to be able to | |
1441 | emit pointer-based stores to it. | |
1442 | ??? We might be able to get away with | |
1443 | re-using the original base up to the first | |
1444 | variable part and then wrapping that inside | |
1445 | a BIT_FIELD_REF. */ | |
1446 | tree base = get_base_address (base_addr); | |
1447 | if (! base | |
1448 | || (DECL_P (base) | |
1449 | && ! TREE_ADDRESSABLE (base))) | |
1450 | invalid = true; | |
1451 | else | |
1452 | base_addr = build2 (POINTER_PLUS_EXPR, | |
1453 | TREE_TYPE (base_addr), | |
1454 | base_addr, offset); | |
1455 | } | |
3d3e04ac | 1456 | |
1457 | struct imm_store_chain_info **chain_info | |
1458 | = m_stores.get (base_addr); | |
1459 | ||
1460 | if (!invalid) | |
1461 | { | |
1462 | store_immediate_info *info; | |
1463 | if (chain_info) | |
1464 | { | |
1465 | info = new store_immediate_info ( | |
f85e7cb7 | 1466 | bitsize, bitpos, stmt, |
3d3e04ac | 1467 | (*chain_info)->m_store_info.length ()); |
1468 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1469 | { | |
1470 | fprintf (dump_file, | |
1471 | "Recording immediate store from stmt:\n"); | |
1ffa4346 | 1472 | print_gimple_stmt (dump_file, stmt, 0); |
3d3e04ac | 1473 | } |
1474 | (*chain_info)->m_store_info.safe_push (info); | |
1475 | /* If we reach the limit of stores to merge in a chain | |
1476 | terminate and process the chain now. */ | |
1477 | if ((*chain_info)->m_store_info.length () | |
1478 | == (unsigned int) | |
1479 | PARAM_VALUE (PARAM_MAX_STORES_TO_MERGE)) | |
1480 | { | |
1481 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1482 | fprintf (dump_file, | |
1483 | "Reached maximum number of statements" | |
1484 | " to merge:\n"); | |
f85e7cb7 | 1485 | terminate_and_release_chain (*chain_info); |
3d3e04ac | 1486 | } |
1487 | continue; | |
1488 | } | |
1489 | ||
1490 | /* Store aliases any existing chain? */ | |
4de7f8df | 1491 | terminate_all_aliasing_chains (chain_info, false, stmt); |
3d3e04ac | 1492 | /* Start a new chain. */ |
1493 | struct imm_store_chain_info *new_chain | |
3a3ba7de | 1494 | = new imm_store_chain_info (m_stores_head, base_addr); |
f85e7cb7 | 1495 | info = new store_immediate_info (bitsize, bitpos, |
3d3e04ac | 1496 | stmt, 0); |
1497 | new_chain->m_store_info.safe_push (info); | |
1498 | m_stores.put (base_addr, new_chain); | |
1499 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1500 | { | |
1501 | fprintf (dump_file, | |
1502 | "Starting new chain with statement:\n"); | |
1ffa4346 | 1503 | print_gimple_stmt (dump_file, stmt, 0); |
3d3e04ac | 1504 | fprintf (dump_file, "The base object is:\n"); |
1ffa4346 | 1505 | print_generic_expr (dump_file, base_addr); |
3d3e04ac | 1506 | fprintf (dump_file, "\n"); |
1507 | } | |
1508 | } | |
1509 | else | |
4de7f8df | 1510 | terminate_all_aliasing_chains (chain_info, |
3d3e04ac | 1511 | offset != NULL_TREE, stmt); |
1512 | ||
1513 | continue; | |
1514 | } | |
1515 | ||
4de7f8df | 1516 | terminate_all_aliasing_chains (NULL, false, stmt); |
3d3e04ac | 1517 | } |
1518 | terminate_and_process_all_chains (); | |
1519 | } | |
1520 | return 0; | |
1521 | } | |
1522 | ||
1523 | } // anon namespace | |
1524 | ||
1525 | /* Construct and return a store merging pass object. */ | |
1526 | ||
1527 | gimple_opt_pass * | |
1528 | make_pass_store_merging (gcc::context *ctxt) | |
1529 | { | |
1530 | return new pass_store_merging (ctxt); | |
1531 | } | |
3d9a2fb3 | 1532 | |
1533 | #if CHECKING_P | |
1534 | ||
1535 | namespace selftest { | |
1536 | ||
1537 | /* Selftests for store merging helpers. */ | |
1538 | ||
1539 | /* Assert that all elements of the byte arrays X and Y, both of length N | |
1540 | are equal. */ | |
1541 | ||
1542 | static void | |
1543 | verify_array_eq (unsigned char *x, unsigned char *y, unsigned int n) | |
1544 | { | |
1545 | for (unsigned int i = 0; i < n; i++) | |
1546 | { | |
1547 | if (x[i] != y[i]) | |
1548 | { | |
1549 | fprintf (stderr, "Arrays do not match. X:\n"); | |
1550 | dump_char_array (stderr, x, n); | |
1551 | fprintf (stderr, "Y:\n"); | |
1552 | dump_char_array (stderr, y, n); | |
1553 | } | |
1554 | ASSERT_EQ (x[i], y[i]); | |
1555 | } | |
1556 | } | |
1557 | ||
1558 | /* Test shift_bytes_in_array and that it carries bits across between | |
1559 | bytes correctly. */ | |
1560 | ||
1561 | static void | |
1562 | verify_shift_bytes_in_array (void) | |
1563 | { | |
1564 | /* byte 1 | byte 0 | |
1565 | 00011111 | 11100000. */ | |
1566 | unsigned char orig[2] = { 0xe0, 0x1f }; | |
1567 | unsigned char in[2]; | |
1568 | memcpy (in, orig, sizeof orig); | |
1569 | ||
1570 | unsigned char expected[2] = { 0x80, 0x7f }; | |
1571 | shift_bytes_in_array (in, sizeof (in), 2); | |
1572 | verify_array_eq (in, expected, sizeof (in)); | |
1573 | ||
1574 | memcpy (in, orig, sizeof orig); | |
1575 | memcpy (expected, orig, sizeof orig); | |
1576 | /* Check that shifting by zero doesn't change anything. */ | |
1577 | shift_bytes_in_array (in, sizeof (in), 0); | |
1578 | verify_array_eq (in, expected, sizeof (in)); | |
1579 | ||
1580 | } | |
1581 | ||
1582 | /* Test shift_bytes_in_array_right and that it carries bits across between | |
1583 | bytes correctly. */ | |
1584 | ||
1585 | static void | |
1586 | verify_shift_bytes_in_array_right (void) | |
1587 | { | |
1588 | /* byte 1 | byte 0 | |
1589 | 00011111 | 11100000. */ | |
1590 | unsigned char orig[2] = { 0x1f, 0xe0}; | |
1591 | unsigned char in[2]; | |
1592 | memcpy (in, orig, sizeof orig); | |
1593 | unsigned char expected[2] = { 0x07, 0xf8}; | |
1594 | shift_bytes_in_array_right (in, sizeof (in), 2); | |
1595 | verify_array_eq (in, expected, sizeof (in)); | |
1596 | ||
1597 | memcpy (in, orig, sizeof orig); | |
1598 | memcpy (expected, orig, sizeof orig); | |
1599 | /* Check that shifting by zero doesn't change anything. */ | |
1600 | shift_bytes_in_array_right (in, sizeof (in), 0); | |
1601 | verify_array_eq (in, expected, sizeof (in)); | |
1602 | } | |
1603 | ||
1604 | /* Test clear_bit_region that it clears exactly the bits asked and | |
1605 | nothing more. */ | |
1606 | ||
1607 | static void | |
1608 | verify_clear_bit_region (void) | |
1609 | { | |
1610 | /* Start with all bits set and test clearing various patterns in them. */ | |
1611 | unsigned char orig[3] = { 0xff, 0xff, 0xff}; | |
1612 | unsigned char in[3]; | |
1613 | unsigned char expected[3]; | |
1614 | memcpy (in, orig, sizeof in); | |
1615 | ||
1616 | /* Check zeroing out all the bits. */ | |
1617 | clear_bit_region (in, 0, 3 * BITS_PER_UNIT); | |
1618 | expected[0] = expected[1] = expected[2] = 0; | |
1619 | verify_array_eq (in, expected, sizeof in); | |
1620 | ||
1621 | memcpy (in, orig, sizeof in); | |
1622 | /* Leave the first and last bits intact. */ | |
1623 | clear_bit_region (in, 1, 3 * BITS_PER_UNIT - 2); | |
1624 | expected[0] = 0x1; | |
1625 | expected[1] = 0; | |
1626 | expected[2] = 0x80; | |
1627 | verify_array_eq (in, expected, sizeof in); | |
1628 | } | |
1629 | ||
1630 | /* Test verify_clear_bit_region_be that it clears exactly the bits asked and | |
1631 | nothing more. */ | |
1632 | ||
1633 | static void | |
1634 | verify_clear_bit_region_be (void) | |
1635 | { | |
1636 | /* Start with all bits set and test clearing various patterns in them. */ | |
1637 | unsigned char orig[3] = { 0xff, 0xff, 0xff}; | |
1638 | unsigned char in[3]; | |
1639 | unsigned char expected[3]; | |
1640 | memcpy (in, orig, sizeof in); | |
1641 | ||
1642 | /* Check zeroing out all the bits. */ | |
1643 | clear_bit_region_be (in, BITS_PER_UNIT - 1, 3 * BITS_PER_UNIT); | |
1644 | expected[0] = expected[1] = expected[2] = 0; | |
1645 | verify_array_eq (in, expected, sizeof in); | |
1646 | ||
1647 | memcpy (in, orig, sizeof in); | |
1648 | /* Leave the first and last bits intact. */ | |
1649 | clear_bit_region_be (in, BITS_PER_UNIT - 2, 3 * BITS_PER_UNIT - 2); | |
1650 | expected[0] = 0x80; | |
1651 | expected[1] = 0; | |
1652 | expected[2] = 0x1; | |
1653 | verify_array_eq (in, expected, sizeof in); | |
1654 | } | |
1655 | ||
1656 | ||
1657 | /* Run all of the selftests within this file. */ | |
1658 | ||
1659 | void | |
1660 | store_merging_c_tests (void) | |
1661 | { | |
1662 | verify_shift_bytes_in_array (); | |
1663 | verify_shift_bytes_in_array_right (); | |
1664 | verify_clear_bit_region (); | |
1665 | verify_clear_bit_region_be (); | |
1666 | } | |
1667 | ||
1668 | } // namespace selftest | |
1669 | #endif /* CHECKING_P. */ |