]>
Commit | Line | Data |
---|---|---|
f663d9ad | 1 | /* GIMPLE store merging pass. |
cbe34bb5 | 2 | Copyright (C) 2016-2017 Free Software Foundation, Inc. |
f663d9ad KT |
3 | Contributed by ARM Ltd. |
4 | ||
5 | This file is part of GCC. | |
6 | ||
7 | GCC is free software; you can redistribute it and/or modify it | |
8 | under the terms of the GNU General Public License as published by | |
9 | the Free Software Foundation; either version 3, or (at your option) | |
10 | any later version. | |
11 | ||
12 | GCC is distributed in the hope that it will be useful, but | |
13 | WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | General Public License for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
18 | along with GCC; see the file COPYING3. If not see | |
19 | <http://www.gnu.org/licenses/>. */ | |
20 | ||
21 | /* The purpose of this pass is to combine multiple memory stores of | |
22 | constant values to consecutive memory locations into fewer wider stores. | |
23 | For example, if we have a sequence peforming four byte stores to | |
24 | consecutive memory locations: | |
25 | [p ] := imm1; | |
26 | [p + 1B] := imm2; | |
27 | [p + 2B] := imm3; | |
28 | [p + 3B] := imm4; | |
29 | we can transform this into a single 4-byte store if the target supports it: | |
30 | [p] := imm1:imm2:imm3:imm4 //concatenated immediates according to endianness. | |
31 | ||
32 | The algorithm is applied to each basic block in three phases: | |
33 | ||
34 | 1) Scan through the basic block recording constant assignments to | |
35 | destinations that can be expressed as a store to memory of a certain size | |
36 | at a certain bit offset. Record store chains to different bases in a | |
37 | hash_map (m_stores) and make sure to terminate such chains when appropriate | |
38 | (for example when when the stored values get used subsequently). | |
39 | These stores can be a result of structure element initializers, array stores | |
40 | etc. A store_immediate_info object is recorded for every such store. | |
41 | Record as many such assignments to a single base as possible until a | |
42 | statement that interferes with the store sequence is encountered. | |
43 | ||
44 | 2) Analyze the chain of stores recorded in phase 1) (i.e. the vector of | |
45 | store_immediate_info objects) and coalesce contiguous stores into | |
46 | merged_store_group objects. | |
47 | ||
48 | For example, given the stores: | |
49 | [p ] := 0; | |
50 | [p + 1B] := 1; | |
51 | [p + 3B] := 0; | |
52 | [p + 4B] := 1; | |
53 | [p + 5B] := 0; | |
54 | [p + 6B] := 0; | |
55 | This phase would produce two merged_store_group objects, one recording the | |
56 | two bytes stored in the memory region [p : p + 1] and another | |
57 | recording the four bytes stored in the memory region [p + 3 : p + 6]. | |
58 | ||
59 | 3) The merged_store_group objects produced in phase 2) are processed | |
60 | to generate the sequence of wider stores that set the contiguous memory | |
61 | regions to the sequence of bytes that correspond to it. This may emit | |
62 | multiple stores per store group to handle contiguous stores that are not | |
63 | of a size that is a power of 2. For example it can try to emit a 40-bit | |
64 | store as a 32-bit store followed by an 8-bit store. | |
65 | We try to emit as wide stores as we can while respecting STRICT_ALIGNMENT or | |
e0bd6c9f | 66 | TARGET_SLOW_UNALIGNED_ACCESS rules. |
f663d9ad KT |
67 | |
68 | Note on endianness and example: | |
69 | Consider 2 contiguous 16-bit stores followed by 2 contiguous 8-bit stores: | |
70 | [p ] := 0x1234; | |
71 | [p + 2B] := 0x5678; | |
72 | [p + 4B] := 0xab; | |
73 | [p + 5B] := 0xcd; | |
74 | ||
75 | The memory layout for little-endian (LE) and big-endian (BE) must be: | |
76 | p |LE|BE| | |
77 | --------- | |
78 | 0 |34|12| | |
79 | 1 |12|34| | |
80 | 2 |78|56| | |
81 | 3 |56|78| | |
82 | 4 |ab|ab| | |
83 | 5 |cd|cd| | |
84 | ||
85 | To merge these into a single 48-bit merged value 'val' in phase 2) | |
86 | on little-endian we insert stores to higher (consecutive) bitpositions | |
87 | into the most significant bits of the merged value. | |
88 | The final merged value would be: 0xcdab56781234 | |
89 | ||
90 | For big-endian we insert stores to higher bitpositions into the least | |
91 | significant bits of the merged value. | |
92 | The final merged value would be: 0x12345678abcd | |
93 | ||
94 | Then, in phase 3), we want to emit this 48-bit value as a 32-bit store | |
95 | followed by a 16-bit store. Again, we must consider endianness when | |
96 | breaking down the 48-bit value 'val' computed above. | |
97 | For little endian we emit: | |
98 | [p] (32-bit) := 0x56781234; // val & 0x0000ffffffff; | |
99 | [p + 4B] (16-bit) := 0xcdab; // (val & 0xffff00000000) >> 32; | |
100 | ||
101 | Whereas for big-endian we emit: | |
102 | [p] (32-bit) := 0x12345678; // (val & 0xffffffff0000) >> 16; | |
103 | [p + 4B] (16-bit) := 0xabcd; // val & 0x00000000ffff; */ | |
104 | ||
105 | #include "config.h" | |
106 | #include "system.h" | |
107 | #include "coretypes.h" | |
108 | #include "backend.h" | |
109 | #include "tree.h" | |
110 | #include "gimple.h" | |
111 | #include "builtins.h" | |
112 | #include "fold-const.h" | |
113 | #include "tree-pass.h" | |
114 | #include "ssa.h" | |
115 | #include "gimple-pretty-print.h" | |
116 | #include "alias.h" | |
117 | #include "fold-const.h" | |
118 | #include "params.h" | |
119 | #include "print-tree.h" | |
120 | #include "tree-hash-traits.h" | |
121 | #include "gimple-iterator.h" | |
122 | #include "gimplify.h" | |
123 | #include "stor-layout.h" | |
124 | #include "timevar.h" | |
125 | #include "tree-cfg.h" | |
126 | #include "tree-eh.h" | |
127 | #include "target.h" | |
aa55dc0c | 128 | #include "gimplify-me.h" |
a62b3dc5 JJ |
129 | #include "rtl.h" |
130 | #include "expr.h" /* For get_bit_range. */ | |
c22d8787 | 131 | #include "selftest.h" |
f663d9ad KT |
132 | |
133 | /* The maximum size (in bits) of the stores this pass should generate. */ | |
134 | #define MAX_STORE_BITSIZE (BITS_PER_WORD) | |
135 | #define MAX_STORE_BYTES (MAX_STORE_BITSIZE / BITS_PER_UNIT) | |
136 | ||
137 | namespace { | |
138 | ||
139 | /* Struct recording the information about a single store of an immediate | |
140 | to memory. These are created in the first phase and coalesced into | |
141 | merged_store_group objects in the second phase. */ | |
142 | ||
143 | struct store_immediate_info | |
144 | { | |
145 | unsigned HOST_WIDE_INT bitsize; | |
146 | unsigned HOST_WIDE_INT bitpos; | |
a62b3dc5 JJ |
147 | unsigned HOST_WIDE_INT bitregion_start; |
148 | /* This is one past the last bit of the bit region. */ | |
149 | unsigned HOST_WIDE_INT bitregion_end; | |
f663d9ad KT |
150 | gimple *stmt; |
151 | unsigned int order; | |
b5926e23 | 152 | store_immediate_info (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT, |
a62b3dc5 | 153 | unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT, |
b5926e23 | 154 | gimple *, unsigned int); |
f663d9ad KT |
155 | }; |
156 | ||
157 | store_immediate_info::store_immediate_info (unsigned HOST_WIDE_INT bs, | |
b5926e23 | 158 | unsigned HOST_WIDE_INT bp, |
a62b3dc5 JJ |
159 | unsigned HOST_WIDE_INT brs, |
160 | unsigned HOST_WIDE_INT bre, | |
b5926e23 | 161 | gimple *st, |
f663d9ad | 162 | unsigned int ord) |
a62b3dc5 JJ |
163 | : bitsize (bs), bitpos (bp), bitregion_start (brs), bitregion_end (bre), |
164 | stmt (st), order (ord) | |
f663d9ad KT |
165 | { |
166 | } | |
167 | ||
168 | /* Struct representing a group of stores to contiguous memory locations. | |
169 | These are produced by the second phase (coalescing) and consumed in the | |
170 | third phase that outputs the widened stores. */ | |
171 | ||
172 | struct merged_store_group | |
173 | { | |
174 | unsigned HOST_WIDE_INT start; | |
175 | unsigned HOST_WIDE_INT width; | |
a62b3dc5 JJ |
176 | unsigned HOST_WIDE_INT bitregion_start; |
177 | unsigned HOST_WIDE_INT bitregion_end; | |
178 | /* The size of the allocated memory for val and mask. */ | |
f663d9ad | 179 | unsigned HOST_WIDE_INT buf_size; |
a62b3dc5 | 180 | unsigned HOST_WIDE_INT align_base; |
f663d9ad KT |
181 | |
182 | unsigned int align; | |
183 | unsigned int first_order; | |
184 | unsigned int last_order; | |
185 | ||
a62b3dc5 | 186 | auto_vec<store_immediate_info *> stores; |
f663d9ad KT |
187 | /* We record the first and last original statements in the sequence because |
188 | we'll need their vuse/vdef and replacement position. It's easier to keep | |
189 | track of them separately as 'stores' is reordered by apply_stores. */ | |
190 | gimple *last_stmt; | |
191 | gimple *first_stmt; | |
192 | unsigned char *val; | |
a62b3dc5 | 193 | unsigned char *mask; |
f663d9ad KT |
194 | |
195 | merged_store_group (store_immediate_info *); | |
196 | ~merged_store_group (); | |
197 | void merge_into (store_immediate_info *); | |
198 | void merge_overlapping (store_immediate_info *); | |
199 | bool apply_stores (); | |
a62b3dc5 JJ |
200 | private: |
201 | void do_merge (store_immediate_info *); | |
f663d9ad KT |
202 | }; |
203 | ||
204 | /* Debug helper. Dump LEN elements of byte array PTR to FD in hex. */ | |
205 | ||
206 | static void | |
207 | dump_char_array (FILE *fd, unsigned char *ptr, unsigned int len) | |
208 | { | |
209 | if (!fd) | |
210 | return; | |
211 | ||
212 | for (unsigned int i = 0; i < len; i++) | |
213 | fprintf (fd, "%x ", ptr[i]); | |
214 | fprintf (fd, "\n"); | |
215 | } | |
216 | ||
f663d9ad KT |
217 | /* Shift left the bytes in PTR of SZ elements by AMNT bits, carrying over the |
218 | bits between adjacent elements. AMNT should be within | |
219 | [0, BITS_PER_UNIT). | |
220 | Example, AMNT = 2: | |
221 | 00011111|11100000 << 2 = 01111111|10000000 | |
222 | PTR[1] | PTR[0] PTR[1] | PTR[0]. */ | |
223 | ||
224 | static void | |
225 | shift_bytes_in_array (unsigned char *ptr, unsigned int sz, unsigned int amnt) | |
226 | { | |
227 | if (amnt == 0) | |
228 | return; | |
229 | ||
230 | unsigned char carry_over = 0U; | |
46a61395 | 231 | unsigned char carry_mask = (~0U) << (unsigned char) (BITS_PER_UNIT - amnt); |
f663d9ad KT |
232 | unsigned char clear_mask = (~0U) << amnt; |
233 | ||
234 | for (unsigned int i = 0; i < sz; i++) | |
235 | { | |
236 | unsigned prev_carry_over = carry_over; | |
46a61395 | 237 | carry_over = (ptr[i] & carry_mask) >> (BITS_PER_UNIT - amnt); |
f663d9ad KT |
238 | |
239 | ptr[i] <<= amnt; | |
240 | if (i != 0) | |
241 | { | |
242 | ptr[i] &= clear_mask; | |
243 | ptr[i] |= prev_carry_over; | |
244 | } | |
245 | } | |
246 | } | |
247 | ||
248 | /* Like shift_bytes_in_array but for big-endian. | |
249 | Shift right the bytes in PTR of SZ elements by AMNT bits, carrying over the | |
250 | bits between adjacent elements. AMNT should be within | |
251 | [0, BITS_PER_UNIT). | |
252 | Example, AMNT = 2: | |
253 | 00011111|11100000 >> 2 = 00000111|11111000 | |
254 | PTR[0] | PTR[1] PTR[0] | PTR[1]. */ | |
255 | ||
256 | static void | |
257 | shift_bytes_in_array_right (unsigned char *ptr, unsigned int sz, | |
258 | unsigned int amnt) | |
259 | { | |
260 | if (amnt == 0) | |
261 | return; | |
262 | ||
263 | unsigned char carry_over = 0U; | |
264 | unsigned char carry_mask = ~(~0U << amnt); | |
265 | ||
266 | for (unsigned int i = 0; i < sz; i++) | |
267 | { | |
268 | unsigned prev_carry_over = carry_over; | |
46a61395 | 269 | carry_over = ptr[i] & carry_mask; |
f663d9ad | 270 | |
ad1de652 JJ |
271 | carry_over <<= (unsigned char) BITS_PER_UNIT - amnt; |
272 | ptr[i] >>= amnt; | |
273 | ptr[i] |= prev_carry_over; | |
f663d9ad KT |
274 | } |
275 | } | |
276 | ||
277 | /* Clear out LEN bits starting from bit START in the byte array | |
278 | PTR. This clears the bits to the *right* from START. | |
279 | START must be within [0, BITS_PER_UNIT) and counts starting from | |
280 | the least significant bit. */ | |
281 | ||
282 | static void | |
283 | clear_bit_region_be (unsigned char *ptr, unsigned int start, | |
284 | unsigned int len) | |
285 | { | |
286 | if (len == 0) | |
287 | return; | |
288 | /* Clear len bits to the right of start. */ | |
289 | else if (len <= start + 1) | |
290 | { | |
291 | unsigned char mask = (~(~0U << len)); | |
292 | mask = mask << (start + 1U - len); | |
293 | ptr[0] &= ~mask; | |
294 | } | |
295 | else if (start != BITS_PER_UNIT - 1) | |
296 | { | |
297 | clear_bit_region_be (ptr, start, (start % BITS_PER_UNIT) + 1); | |
298 | clear_bit_region_be (ptr + 1, BITS_PER_UNIT - 1, | |
299 | len - (start % BITS_PER_UNIT) - 1); | |
300 | } | |
301 | else if (start == BITS_PER_UNIT - 1 | |
302 | && len > BITS_PER_UNIT) | |
303 | { | |
304 | unsigned int nbytes = len / BITS_PER_UNIT; | |
a62b3dc5 | 305 | memset (ptr, 0, nbytes); |
f663d9ad KT |
306 | if (len % BITS_PER_UNIT != 0) |
307 | clear_bit_region_be (ptr + nbytes, BITS_PER_UNIT - 1, | |
308 | len % BITS_PER_UNIT); | |
309 | } | |
310 | else | |
311 | gcc_unreachable (); | |
312 | } | |
313 | ||
314 | /* In the byte array PTR clear the bit region starting at bit | |
315 | START and is LEN bits wide. | |
316 | For regions spanning multiple bytes do this recursively until we reach | |
317 | zero LEN or a region contained within a single byte. */ | |
318 | ||
319 | static void | |
320 | clear_bit_region (unsigned char *ptr, unsigned int start, | |
321 | unsigned int len) | |
322 | { | |
323 | /* Degenerate base case. */ | |
324 | if (len == 0) | |
325 | return; | |
326 | else if (start >= BITS_PER_UNIT) | |
327 | clear_bit_region (ptr + 1, start - BITS_PER_UNIT, len); | |
328 | /* Second base case. */ | |
329 | else if ((start + len) <= BITS_PER_UNIT) | |
330 | { | |
46a61395 | 331 | unsigned char mask = (~0U) << (unsigned char) (BITS_PER_UNIT - len); |
f663d9ad KT |
332 | mask >>= BITS_PER_UNIT - (start + len); |
333 | ||
334 | ptr[0] &= ~mask; | |
335 | ||
336 | return; | |
337 | } | |
338 | /* Clear most significant bits in a byte and proceed with the next byte. */ | |
339 | else if (start != 0) | |
340 | { | |
341 | clear_bit_region (ptr, start, BITS_PER_UNIT - start); | |
1f069ef5 | 342 | clear_bit_region (ptr + 1, 0, len - (BITS_PER_UNIT - start)); |
f663d9ad KT |
343 | } |
344 | /* Whole bytes need to be cleared. */ | |
345 | else if (start == 0 && len > BITS_PER_UNIT) | |
346 | { | |
347 | unsigned int nbytes = len / BITS_PER_UNIT; | |
a848c710 KT |
348 | /* We could recurse on each byte but we clear whole bytes, so a simple |
349 | memset will do. */ | |
46a61395 | 350 | memset (ptr, '\0', nbytes); |
f663d9ad KT |
351 | /* Clear the remaining sub-byte region if there is one. */ |
352 | if (len % BITS_PER_UNIT != 0) | |
353 | clear_bit_region (ptr + nbytes, 0, len % BITS_PER_UNIT); | |
354 | } | |
355 | else | |
356 | gcc_unreachable (); | |
357 | } | |
358 | ||
359 | /* Write BITLEN bits of EXPR to the byte array PTR at | |
360 | bit position BITPOS. PTR should contain TOTAL_BYTES elements. | |
361 | Return true if the operation succeeded. */ | |
362 | ||
363 | static bool | |
364 | encode_tree_to_bitpos (tree expr, unsigned char *ptr, int bitlen, int bitpos, | |
46a61395 | 365 | unsigned int total_bytes) |
f663d9ad KT |
366 | { |
367 | unsigned int first_byte = bitpos / BITS_PER_UNIT; | |
368 | tree tmp_int = expr; | |
ad1de652 JJ |
369 | bool sub_byte_op_p = ((bitlen % BITS_PER_UNIT) |
370 | || (bitpos % BITS_PER_UNIT) | |
f4b31647 | 371 | || !int_mode_for_size (bitlen, 0).exists ()); |
f663d9ad KT |
372 | |
373 | if (!sub_byte_op_p) | |
2f391428 | 374 | return native_encode_expr (tmp_int, ptr + first_byte, total_bytes) != 0; |
f663d9ad KT |
375 | |
376 | /* LITTLE-ENDIAN | |
377 | We are writing a non byte-sized quantity or at a position that is not | |
378 | at a byte boundary. | |
379 | |--------|--------|--------| ptr + first_byte | |
380 | ^ ^ | |
381 | xxx xxxxxxxx xxx< bp> | |
382 | |______EXPR____| | |
383 | ||
46a61395 | 384 | First native_encode_expr EXPR into a temporary buffer and shift each |
f663d9ad KT |
385 | byte in the buffer by 'bp' (carrying the bits over as necessary). |
386 | |00000000|00xxxxxx|xxxxxxxx| << bp = |000xxxxx|xxxxxxxx|xxx00000| | |
387 | <------bitlen---->< bp> | |
388 | Then we clear the destination bits: | |
389 | |---00000|00000000|000-----| ptr + first_byte | |
390 | <-------bitlen--->< bp> | |
391 | ||
392 | Finally we ORR the bytes of the shifted EXPR into the cleared region: | |
393 | |---xxxxx||xxxxxxxx||xxx-----| ptr + first_byte. | |
394 | ||
395 | BIG-ENDIAN | |
396 | We are writing a non byte-sized quantity or at a position that is not | |
397 | at a byte boundary. | |
398 | ptr + first_byte |--------|--------|--------| | |
399 | ^ ^ | |
400 | <bp >xxx xxxxxxxx xxx | |
401 | |_____EXPR_____| | |
402 | ||
46a61395 | 403 | First native_encode_expr EXPR into a temporary buffer and shift each |
f663d9ad KT |
404 | byte in the buffer to the right by (carrying the bits over as necessary). |
405 | We shift by as much as needed to align the most significant bit of EXPR | |
406 | with bitpos: | |
407 | |00xxxxxx|xxxxxxxx| >> 3 = |00000xxx|xxxxxxxx|xxxxx000| | |
408 | <---bitlen----> <bp ><-----bitlen-----> | |
409 | Then we clear the destination bits: | |
410 | ptr + first_byte |-----000||00000000||00000---| | |
411 | <bp ><-------bitlen-----> | |
412 | ||
413 | Finally we ORR the bytes of the shifted EXPR into the cleared region: | |
414 | ptr + first_byte |---xxxxx||xxxxxxxx||xxx-----|. | |
415 | The awkwardness comes from the fact that bitpos is counted from the | |
416 | most significant bit of a byte. */ | |
417 | ||
418 | /* Allocate an extra byte so that we have space to shift into. */ | |
419 | unsigned int byte_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (expr))) + 1; | |
420 | unsigned char *tmpbuf = XALLOCAVEC (unsigned char, byte_size); | |
46a61395 | 421 | memset (tmpbuf, '\0', byte_size); |
f663d9ad KT |
422 | /* The store detection code should only have allowed constants that are |
423 | accepted by native_encode_expr. */ | |
2f391428 | 424 | if (native_encode_expr (expr, tmpbuf, byte_size - 1) == 0) |
f663d9ad KT |
425 | gcc_unreachable (); |
426 | ||
427 | /* The native_encode_expr machinery uses TYPE_MODE to determine how many | |
428 | bytes to write. This means it can write more than | |
429 | ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT bytes (for example | |
430 | write 8 bytes for a bitlen of 40). Skip the bytes that are not within | |
431 | bitlen and zero out the bits that are not relevant as well (that may | |
432 | contain a sign bit due to sign-extension). */ | |
433 | unsigned int padding | |
434 | = byte_size - ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT - 1; | |
ad1de652 JJ |
435 | /* On big-endian the padding is at the 'front' so just skip the initial |
436 | bytes. */ | |
437 | if (BYTES_BIG_ENDIAN) | |
438 | tmpbuf += padding; | |
439 | ||
440 | byte_size -= padding; | |
441 | ||
442 | if (bitlen % BITS_PER_UNIT != 0) | |
f663d9ad | 443 | { |
4b2c06f4 | 444 | if (BYTES_BIG_ENDIAN) |
ad1de652 JJ |
445 | clear_bit_region_be (tmpbuf, BITS_PER_UNIT - 1, |
446 | BITS_PER_UNIT - (bitlen % BITS_PER_UNIT)); | |
447 | else | |
448 | clear_bit_region (tmpbuf, bitlen, | |
449 | byte_size * BITS_PER_UNIT - bitlen); | |
f663d9ad | 450 | } |
ad1de652 JJ |
451 | /* Left shifting relies on the last byte being clear if bitlen is |
452 | a multiple of BITS_PER_UNIT, which might not be clear if | |
453 | there are padding bytes. */ | |
454 | else if (!BYTES_BIG_ENDIAN) | |
455 | tmpbuf[byte_size - 1] = '\0'; | |
f663d9ad KT |
456 | |
457 | /* Clear the bit region in PTR where the bits from TMPBUF will be | |
46a61395 | 458 | inserted into. */ |
f663d9ad KT |
459 | if (BYTES_BIG_ENDIAN) |
460 | clear_bit_region_be (ptr + first_byte, | |
461 | BITS_PER_UNIT - 1 - (bitpos % BITS_PER_UNIT), bitlen); | |
462 | else | |
463 | clear_bit_region (ptr + first_byte, bitpos % BITS_PER_UNIT, bitlen); | |
464 | ||
465 | int shift_amnt; | |
466 | int bitlen_mod = bitlen % BITS_PER_UNIT; | |
467 | int bitpos_mod = bitpos % BITS_PER_UNIT; | |
468 | ||
469 | bool skip_byte = false; | |
470 | if (BYTES_BIG_ENDIAN) | |
471 | { | |
472 | /* BITPOS and BITLEN are exactly aligned and no shifting | |
473 | is necessary. */ | |
474 | if (bitpos_mod + bitlen_mod == BITS_PER_UNIT | |
475 | || (bitpos_mod == 0 && bitlen_mod == 0)) | |
476 | shift_amnt = 0; | |
477 | /* |. . . . . . . .| | |
478 | <bp > <blen >. | |
479 | We always shift right for BYTES_BIG_ENDIAN so shift the beginning | |
480 | of the value until it aligns with 'bp' in the next byte over. */ | |
481 | else if (bitpos_mod + bitlen_mod < BITS_PER_UNIT) | |
482 | { | |
483 | shift_amnt = bitlen_mod + bitpos_mod; | |
484 | skip_byte = bitlen_mod != 0; | |
485 | } | |
486 | /* |. . . . . . . .| | |
487 | <----bp---> | |
488 | <---blen---->. | |
489 | Shift the value right within the same byte so it aligns with 'bp'. */ | |
490 | else | |
491 | shift_amnt = bitlen_mod + bitpos_mod - BITS_PER_UNIT; | |
492 | } | |
493 | else | |
494 | shift_amnt = bitpos % BITS_PER_UNIT; | |
495 | ||
496 | /* Create the shifted version of EXPR. */ | |
497 | if (!BYTES_BIG_ENDIAN) | |
46a61395 JJ |
498 | { |
499 | shift_bytes_in_array (tmpbuf, byte_size, shift_amnt); | |
500 | if (shift_amnt == 0) | |
501 | byte_size--; | |
502 | } | |
f663d9ad KT |
503 | else |
504 | { | |
505 | gcc_assert (BYTES_BIG_ENDIAN); | |
506 | shift_bytes_in_array_right (tmpbuf, byte_size, shift_amnt); | |
507 | /* If shifting right forced us to move into the next byte skip the now | |
508 | empty byte. */ | |
509 | if (skip_byte) | |
510 | { | |
511 | tmpbuf++; | |
512 | byte_size--; | |
513 | } | |
514 | } | |
515 | ||
516 | /* Insert the bits from TMPBUF. */ | |
517 | for (unsigned int i = 0; i < byte_size; i++) | |
518 | ptr[first_byte + i] |= tmpbuf[i]; | |
519 | ||
520 | return true; | |
521 | } | |
522 | ||
523 | /* Sorting function for store_immediate_info objects. | |
524 | Sorts them by bitposition. */ | |
525 | ||
526 | static int | |
527 | sort_by_bitpos (const void *x, const void *y) | |
528 | { | |
529 | store_immediate_info *const *tmp = (store_immediate_info * const *) x; | |
530 | store_immediate_info *const *tmp2 = (store_immediate_info * const *) y; | |
531 | ||
109cca3b | 532 | if ((*tmp)->bitpos < (*tmp2)->bitpos) |
f663d9ad KT |
533 | return -1; |
534 | else if ((*tmp)->bitpos > (*tmp2)->bitpos) | |
535 | return 1; | |
109cca3b | 536 | else |
0f0027d1 KT |
537 | /* If they are the same let's use the order which is guaranteed to |
538 | be different. */ | |
539 | return (*tmp)->order - (*tmp2)->order; | |
f663d9ad KT |
540 | } |
541 | ||
542 | /* Sorting function for store_immediate_info objects. | |
543 | Sorts them by the order field. */ | |
544 | ||
545 | static int | |
546 | sort_by_order (const void *x, const void *y) | |
547 | { | |
548 | store_immediate_info *const *tmp = (store_immediate_info * const *) x; | |
549 | store_immediate_info *const *tmp2 = (store_immediate_info * const *) y; | |
550 | ||
551 | if ((*tmp)->order < (*tmp2)->order) | |
552 | return -1; | |
553 | else if ((*tmp)->order > (*tmp2)->order) | |
554 | return 1; | |
555 | ||
556 | gcc_unreachable (); | |
557 | } | |
558 | ||
559 | /* Initialize a merged_store_group object from a store_immediate_info | |
560 | object. */ | |
561 | ||
562 | merged_store_group::merged_store_group (store_immediate_info *info) | |
563 | { | |
564 | start = info->bitpos; | |
565 | width = info->bitsize; | |
a62b3dc5 JJ |
566 | bitregion_start = info->bitregion_start; |
567 | bitregion_end = info->bitregion_end; | |
f663d9ad KT |
568 | /* VAL has memory allocated for it in apply_stores once the group |
569 | width has been finalized. */ | |
570 | val = NULL; | |
a62b3dc5 JJ |
571 | mask = NULL; |
572 | unsigned HOST_WIDE_INT align_bitpos = 0; | |
573 | get_object_alignment_1 (gimple_assign_lhs (info->stmt), | |
574 | &align, &align_bitpos); | |
575 | align_base = start - align_bitpos; | |
f663d9ad KT |
576 | stores.create (1); |
577 | stores.safe_push (info); | |
578 | last_stmt = info->stmt; | |
579 | last_order = info->order; | |
580 | first_stmt = last_stmt; | |
581 | first_order = last_order; | |
582 | buf_size = 0; | |
583 | } | |
584 | ||
585 | merged_store_group::~merged_store_group () | |
586 | { | |
587 | if (val) | |
588 | XDELETEVEC (val); | |
589 | } | |
590 | ||
a62b3dc5 JJ |
591 | /* Helper method for merge_into and merge_overlapping to do |
592 | the common part. */ | |
f663d9ad | 593 | void |
a62b3dc5 | 594 | merged_store_group::do_merge (store_immediate_info *info) |
f663d9ad | 595 | { |
a62b3dc5 JJ |
596 | bitregion_start = MIN (bitregion_start, info->bitregion_start); |
597 | bitregion_end = MAX (bitregion_end, info->bitregion_end); | |
598 | ||
599 | unsigned int this_align; | |
600 | unsigned HOST_WIDE_INT align_bitpos = 0; | |
601 | get_object_alignment_1 (gimple_assign_lhs (info->stmt), | |
602 | &this_align, &align_bitpos); | |
603 | if (this_align > align) | |
604 | { | |
605 | align = this_align; | |
606 | align_base = info->bitpos - align_bitpos; | |
607 | } | |
f663d9ad | 608 | |
f663d9ad KT |
609 | gimple *stmt = info->stmt; |
610 | stores.safe_push (info); | |
611 | if (info->order > last_order) | |
612 | { | |
613 | last_order = info->order; | |
614 | last_stmt = stmt; | |
615 | } | |
616 | else if (info->order < first_order) | |
617 | { | |
618 | first_order = info->order; | |
619 | first_stmt = stmt; | |
620 | } | |
621 | } | |
622 | ||
a62b3dc5 JJ |
623 | /* Merge a store recorded by INFO into this merged store. |
624 | The store is not overlapping with the existing recorded | |
625 | stores. */ | |
626 | ||
627 | void | |
628 | merged_store_group::merge_into (store_immediate_info *info) | |
629 | { | |
630 | unsigned HOST_WIDE_INT wid = info->bitsize; | |
631 | /* Make sure we're inserting in the position we think we're inserting. */ | |
632 | gcc_assert (info->bitpos >= start + width | |
633 | && info->bitregion_start <= bitregion_end); | |
634 | ||
635 | width += wid; | |
636 | do_merge (info); | |
637 | } | |
638 | ||
f663d9ad KT |
639 | /* Merge a store described by INFO into this merged store. |
640 | INFO overlaps in some way with the current store (i.e. it's not contiguous | |
641 | which is handled by merged_store_group::merge_into). */ | |
642 | ||
643 | void | |
644 | merged_store_group::merge_overlapping (store_immediate_info *info) | |
645 | { | |
f663d9ad | 646 | /* If the store extends the size of the group, extend the width. */ |
a62b3dc5 | 647 | if (info->bitpos + info->bitsize > start + width) |
f663d9ad KT |
648 | width += info->bitpos + info->bitsize - (start + width); |
649 | ||
a62b3dc5 | 650 | do_merge (info); |
f663d9ad KT |
651 | } |
652 | ||
653 | /* Go through all the recorded stores in this group in program order and | |
654 | apply their values to the VAL byte array to create the final merged | |
655 | value. Return true if the operation succeeded. */ | |
656 | ||
657 | bool | |
658 | merged_store_group::apply_stores () | |
659 | { | |
a62b3dc5 JJ |
660 | /* Make sure we have more than one store in the group, otherwise we cannot |
661 | merge anything. */ | |
662 | if (bitregion_start % BITS_PER_UNIT != 0 | |
663 | || bitregion_end % BITS_PER_UNIT != 0 | |
f663d9ad KT |
664 | || stores.length () == 1) |
665 | return false; | |
666 | ||
667 | stores.qsort (sort_by_order); | |
a62b3dc5 | 668 | store_immediate_info *info; |
f663d9ad KT |
669 | unsigned int i; |
670 | /* Create a buffer of a size that is 2 times the number of bytes we're | |
671 | storing. That way native_encode_expr can write power-of-2-sized | |
672 | chunks without overrunning. */ | |
a62b3dc5 JJ |
673 | buf_size = 2 * ((bitregion_end - bitregion_start) / BITS_PER_UNIT); |
674 | val = XNEWVEC (unsigned char, 2 * buf_size); | |
675 | mask = val + buf_size; | |
676 | memset (val, 0, buf_size); | |
677 | memset (mask, ~0U, buf_size); | |
f663d9ad KT |
678 | |
679 | FOR_EACH_VEC_ELT (stores, i, info) | |
680 | { | |
a62b3dc5 | 681 | unsigned int pos_in_buffer = info->bitpos - bitregion_start; |
b5926e23 RB |
682 | bool ret = encode_tree_to_bitpos (gimple_assign_rhs1 (info->stmt), |
683 | val, info->bitsize, | |
684 | pos_in_buffer, buf_size); | |
f663d9ad KT |
685 | if (dump_file && (dump_flags & TDF_DETAILS)) |
686 | { | |
687 | if (ret) | |
688 | { | |
689 | fprintf (dump_file, "After writing "); | |
b5926e23 RB |
690 | print_generic_expr (dump_file, |
691 | gimple_assign_rhs1 (info->stmt), 0); | |
f663d9ad KT |
692 | fprintf (dump_file, " of size " HOST_WIDE_INT_PRINT_DEC |
693 | " at position %d the merged region contains:\n", | |
694 | info->bitsize, pos_in_buffer); | |
695 | dump_char_array (dump_file, val, buf_size); | |
696 | } | |
697 | else | |
698 | fprintf (dump_file, "Failed to merge stores\n"); | |
699 | } | |
700 | if (!ret) | |
701 | return false; | |
a62b3dc5 JJ |
702 | unsigned char *m = mask + (pos_in_buffer / BITS_PER_UNIT); |
703 | if (BYTES_BIG_ENDIAN) | |
4403d99a JJ |
704 | clear_bit_region_be (m, (BITS_PER_UNIT - 1 |
705 | - (pos_in_buffer % BITS_PER_UNIT)), | |
706 | info->bitsize); | |
a62b3dc5 JJ |
707 | else |
708 | clear_bit_region (m, pos_in_buffer % BITS_PER_UNIT, info->bitsize); | |
f663d9ad KT |
709 | } |
710 | return true; | |
711 | } | |
712 | ||
713 | /* Structure describing the store chain. */ | |
714 | ||
715 | struct imm_store_chain_info | |
716 | { | |
50b6d676 AO |
717 | /* Doubly-linked list that imposes an order on chain processing. |
718 | PNXP (prev's next pointer) points to the head of a list, or to | |
719 | the next field in the previous chain in the list. | |
720 | See pass_store_merging::m_stores_head for more rationale. */ | |
721 | imm_store_chain_info *next, **pnxp; | |
b5926e23 | 722 | tree base_addr; |
a62b3dc5 | 723 | auto_vec<store_immediate_info *> m_store_info; |
f663d9ad KT |
724 | auto_vec<merged_store_group *> m_merged_store_groups; |
725 | ||
50b6d676 AO |
726 | imm_store_chain_info (imm_store_chain_info *&inspt, tree b_a) |
727 | : next (inspt), pnxp (&inspt), base_addr (b_a) | |
728 | { | |
729 | inspt = this; | |
730 | if (next) | |
731 | { | |
732 | gcc_checking_assert (pnxp == next->pnxp); | |
733 | next->pnxp = &next; | |
734 | } | |
735 | } | |
736 | ~imm_store_chain_info () | |
737 | { | |
738 | *pnxp = next; | |
739 | if (next) | |
740 | { | |
741 | gcc_checking_assert (&next == next->pnxp); | |
742 | next->pnxp = pnxp; | |
743 | } | |
744 | } | |
b5926e23 | 745 | bool terminate_and_process_chain (); |
f663d9ad | 746 | bool coalesce_immediate_stores (); |
b5926e23 RB |
747 | bool output_merged_store (merged_store_group *); |
748 | bool output_merged_stores (); | |
f663d9ad KT |
749 | }; |
750 | ||
751 | const pass_data pass_data_tree_store_merging = { | |
752 | GIMPLE_PASS, /* type */ | |
753 | "store-merging", /* name */ | |
754 | OPTGROUP_NONE, /* optinfo_flags */ | |
755 | TV_GIMPLE_STORE_MERGING, /* tv_id */ | |
756 | PROP_ssa, /* properties_required */ | |
757 | 0, /* properties_provided */ | |
758 | 0, /* properties_destroyed */ | |
759 | 0, /* todo_flags_start */ | |
760 | TODO_update_ssa, /* todo_flags_finish */ | |
761 | }; | |
762 | ||
763 | class pass_store_merging : public gimple_opt_pass | |
764 | { | |
765 | public: | |
766 | pass_store_merging (gcc::context *ctxt) | |
faec5f24 | 767 | : gimple_opt_pass (pass_data_tree_store_merging, ctxt), m_stores_head () |
f663d9ad KT |
768 | { |
769 | } | |
770 | ||
a62b3dc5 JJ |
771 | /* Pass not supported for PDP-endianness, nor for insane hosts |
772 | or target character sizes where native_{encode,interpret}_expr | |
773 | doesn't work properly. */ | |
f663d9ad KT |
774 | virtual bool |
775 | gate (function *) | |
776 | { | |
a62b3dc5 JJ |
777 | return flag_store_merging |
778 | && WORDS_BIG_ENDIAN == BYTES_BIG_ENDIAN | |
779 | && CHAR_BIT == 8 | |
780 | && BITS_PER_UNIT == 8; | |
f663d9ad KT |
781 | } |
782 | ||
783 | virtual unsigned int execute (function *); | |
784 | ||
785 | private: | |
786 | hash_map<tree_operand_hash, struct imm_store_chain_info *> m_stores; | |
787 | ||
50b6d676 AO |
788 | /* Form a doubly-linked stack of the elements of m_stores, so that |
789 | we can iterate over them in a predictable way. Using this order | |
790 | avoids extraneous differences in the compiler output just because | |
791 | of tree pointer variations (e.g. different chains end up in | |
792 | different positions of m_stores, so they are handled in different | |
793 | orders, so they allocate or release SSA names in different | |
794 | orders, and when they get reused, subsequent passes end up | |
795 | getting different SSA names, which may ultimately change | |
796 | decisions when going out of SSA). */ | |
797 | imm_store_chain_info *m_stores_head; | |
798 | ||
f663d9ad | 799 | bool terminate_and_process_all_chains (); |
20770eb8 | 800 | bool terminate_all_aliasing_chains (imm_store_chain_info **, |
b5926e23 RB |
801 | bool, gimple *); |
802 | bool terminate_and_release_chain (imm_store_chain_info *); | |
f663d9ad KT |
803 | }; // class pass_store_merging |
804 | ||
805 | /* Terminate and process all recorded chains. Return true if any changes | |
806 | were made. */ | |
807 | ||
808 | bool | |
809 | pass_store_merging::terminate_and_process_all_chains () | |
810 | { | |
f663d9ad | 811 | bool ret = false; |
50b6d676 AO |
812 | while (m_stores_head) |
813 | ret |= terminate_and_release_chain (m_stores_head); | |
814 | gcc_assert (m_stores.elements () == 0); | |
815 | gcc_assert (m_stores_head == NULL); | |
f663d9ad KT |
816 | |
817 | return ret; | |
818 | } | |
819 | ||
820 | /* Terminate all chains that are affected by the assignment to DEST, appearing | |
821 | in statement STMT and ultimately points to the object BASE. Return true if | |
822 | at least one aliasing chain was terminated. BASE and DEST are allowed to | |
823 | be NULL_TREE. In that case the aliasing checks are performed on the whole | |
824 | statement rather than a particular operand in it. VAR_OFFSET_P signifies | |
825 | whether STMT represents a store to BASE offset by a variable amount. | |
826 | If that is the case we have to terminate any chain anchored at BASE. */ | |
827 | ||
828 | bool | |
20770eb8 | 829 | pass_store_merging::terminate_all_aliasing_chains (imm_store_chain_info |
b5926e23 | 830 | **chain_info, |
f663d9ad KT |
831 | bool var_offset_p, |
832 | gimple *stmt) | |
833 | { | |
834 | bool ret = false; | |
835 | ||
836 | /* If the statement doesn't touch memory it can't alias. */ | |
837 | if (!gimple_vuse (stmt)) | |
838 | return false; | |
839 | ||
f663d9ad KT |
840 | /* Check if the assignment destination (BASE) is part of a store chain. |
841 | This is to catch non-constant stores to destinations that may be part | |
842 | of a chain. */ | |
b5926e23 | 843 | if (chain_info) |
f663d9ad | 844 | { |
b5926e23 RB |
845 | /* We have a chain at BASE and we're writing to [BASE + <variable>]. |
846 | This can interfere with any of the stores so terminate | |
847 | the chain. */ | |
848 | if (var_offset_p) | |
f663d9ad | 849 | { |
b5926e23 RB |
850 | terminate_and_release_chain (*chain_info); |
851 | ret = true; | |
852 | } | |
853 | /* Otherwise go through every store in the chain to see if it | |
854 | aliases with any of them. */ | |
855 | else | |
856 | { | |
a62b3dc5 | 857 | store_immediate_info *info; |
b5926e23 RB |
858 | unsigned int i; |
859 | FOR_EACH_VEC_ELT ((*chain_info)->m_store_info, i, info) | |
f663d9ad | 860 | { |
20770eb8 RB |
861 | if (ref_maybe_used_by_stmt_p (stmt, |
862 | gimple_assign_lhs (info->stmt)) | |
863 | || stmt_may_clobber_ref_p (stmt, | |
864 | gimple_assign_lhs (info->stmt))) | |
f663d9ad | 865 | { |
b5926e23 | 866 | if (dump_file && (dump_flags & TDF_DETAILS)) |
f663d9ad | 867 | { |
b5926e23 RB |
868 | fprintf (dump_file, |
869 | "stmt causes chain termination:\n"); | |
ef6cb4c7 | 870 | print_gimple_stmt (dump_file, stmt, 0); |
f663d9ad | 871 | } |
b5926e23 RB |
872 | terminate_and_release_chain (*chain_info); |
873 | ret = true; | |
874 | break; | |
f663d9ad KT |
875 | } |
876 | } | |
877 | } | |
878 | } | |
879 | ||
f663d9ad | 880 | /* Check for aliasing with all other store chains. */ |
50b6d676 | 881 | for (imm_store_chain_info *next = m_stores_head, *cur = next; cur; cur = next) |
f663d9ad | 882 | { |
50b6d676 AO |
883 | next = cur->next; |
884 | ||
f663d9ad KT |
885 | /* We already checked all the stores in chain_info and terminated the |
886 | chain if necessary. Skip it here. */ | |
50b6d676 | 887 | if (chain_info && (*chain_info) == cur) |
f663d9ad KT |
888 | continue; |
889 | ||
b5926e23 RB |
890 | /* We can't use the base object here as that does not reliably exist. |
891 | Build a ao_ref from the base object address (if we know the | |
892 | minimum and maximum offset and the maximum size we could improve | |
893 | things here). */ | |
894 | ao_ref chain_ref; | |
50b6d676 | 895 | ao_ref_init_from_ptr_and_size (&chain_ref, cur->base_addr, NULL_TREE); |
b5926e23 RB |
896 | if (ref_maybe_used_by_stmt_p (stmt, &chain_ref) |
897 | || stmt_may_clobber_ref_p_1 (stmt, &chain_ref)) | |
f663d9ad | 898 | { |
50b6d676 | 899 | terminate_and_release_chain (cur); |
f663d9ad KT |
900 | ret = true; |
901 | } | |
902 | } | |
903 | ||
904 | return ret; | |
905 | } | |
906 | ||
907 | /* Helper function. Terminate the recorded chain storing to base object | |
908 | BASE. Return true if the merging and output was successful. The m_stores | |
909 | entry is removed after the processing in any case. */ | |
910 | ||
911 | bool | |
b5926e23 | 912 | pass_store_merging::terminate_and_release_chain (imm_store_chain_info *chain_info) |
f663d9ad | 913 | { |
b5926e23 RB |
914 | bool ret = chain_info->terminate_and_process_chain (); |
915 | m_stores.remove (chain_info->base_addr); | |
916 | delete chain_info; | |
f663d9ad KT |
917 | return ret; |
918 | } | |
919 | ||
920 | /* Go through the candidate stores recorded in m_store_info and merge them | |
921 | into merged_store_group objects recorded into m_merged_store_groups | |
922 | representing the widened stores. Return true if coalescing was successful | |
923 | and the number of widened stores is fewer than the original number | |
924 | of stores. */ | |
925 | ||
926 | bool | |
927 | imm_store_chain_info::coalesce_immediate_stores () | |
928 | { | |
929 | /* Anything less can't be processed. */ | |
930 | if (m_store_info.length () < 2) | |
931 | return false; | |
932 | ||
933 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
934 | fprintf (dump_file, "Attempting to coalesce %u stores in chain.\n", | |
935 | m_store_info.length ()); | |
936 | ||
937 | store_immediate_info *info; | |
938 | unsigned int i; | |
939 | ||
940 | /* Order the stores by the bitposition they write to. */ | |
941 | m_store_info.qsort (sort_by_bitpos); | |
942 | ||
943 | info = m_store_info[0]; | |
944 | merged_store_group *merged_store = new merged_store_group (info); | |
945 | ||
946 | FOR_EACH_VEC_ELT (m_store_info, i, info) | |
947 | { | |
948 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
949 | { | |
950 | fprintf (dump_file, "Store %u:\nbitsize:" HOST_WIDE_INT_PRINT_DEC | |
951 | " bitpos:" HOST_WIDE_INT_PRINT_DEC " val:\n", | |
952 | i, info->bitsize, info->bitpos); | |
ef6cb4c7 | 953 | print_generic_expr (dump_file, gimple_assign_rhs1 (info->stmt)); |
f663d9ad KT |
954 | fprintf (dump_file, "\n------------\n"); |
955 | } | |
956 | ||
957 | if (i == 0) | |
958 | continue; | |
959 | ||
960 | /* |---store 1---| | |
961 | |---store 2---| | |
962 | Overlapping stores. */ | |
963 | unsigned HOST_WIDE_INT start = info->bitpos; | |
964 | if (IN_RANGE (start, merged_store->start, | |
965 | merged_store->start + merged_store->width - 1)) | |
966 | { | |
967 | merged_store->merge_overlapping (info); | |
968 | continue; | |
969 | } | |
970 | ||
971 | /* |---store 1---| <gap> |---store 2---|. | |
a62b3dc5 JJ |
972 | Gap between stores. Start a new group if there are any gaps |
973 | between bitregions. */ | |
974 | if (info->bitregion_start > merged_store->bitregion_end) | |
f663d9ad KT |
975 | { |
976 | /* Try to apply all the stores recorded for the group to determine | |
977 | the bitpattern they write and discard it if that fails. | |
978 | This will also reject single-store groups. */ | |
979 | if (!merged_store->apply_stores ()) | |
980 | delete merged_store; | |
981 | else | |
982 | m_merged_store_groups.safe_push (merged_store); | |
983 | ||
984 | merged_store = new merged_store_group (info); | |
985 | ||
986 | continue; | |
987 | } | |
988 | ||
989 | /* |---store 1---||---store 2---| | |
990 | This store is consecutive to the previous one. | |
991 | Merge it into the current store group. */ | |
992 | merged_store->merge_into (info); | |
993 | } | |
994 | ||
a62b3dc5 JJ |
995 | /* Record or discard the last store group. */ |
996 | if (!merged_store->apply_stores ()) | |
997 | delete merged_store; | |
998 | else | |
999 | m_merged_store_groups.safe_push (merged_store); | |
f663d9ad KT |
1000 | |
1001 | gcc_assert (m_merged_store_groups.length () <= m_store_info.length ()); | |
1002 | bool success | |
1003 | = !m_merged_store_groups.is_empty () | |
1004 | && m_merged_store_groups.length () < m_store_info.length (); | |
1005 | ||
1006 | if (success && dump_file) | |
1007 | fprintf (dump_file, "Coalescing successful!\n" | |
a62b3dc5 JJ |
1008 | "Merged into %u stores\n", |
1009 | m_merged_store_groups.length ()); | |
f663d9ad KT |
1010 | |
1011 | return success; | |
1012 | } | |
1013 | ||
1014 | /* Return the type to use for the merged stores described by STMTS. | |
1015 | This is needed to get the alias sets right. */ | |
1016 | ||
1017 | static tree | |
1018 | get_alias_type_for_stmts (auto_vec<gimple *> &stmts) | |
1019 | { | |
1020 | gimple *stmt; | |
1021 | unsigned int i; | |
1022 | tree lhs = gimple_assign_lhs (stmts[0]); | |
1023 | tree type = reference_alias_ptr_type (lhs); | |
1024 | ||
1025 | FOR_EACH_VEC_ELT (stmts, i, stmt) | |
1026 | { | |
1027 | if (i == 0) | |
1028 | continue; | |
1029 | ||
1030 | lhs = gimple_assign_lhs (stmt); | |
1031 | tree type1 = reference_alias_ptr_type (lhs); | |
1032 | if (!alias_ptr_types_compatible_p (type, type1)) | |
1033 | return ptr_type_node; | |
1034 | } | |
1035 | return type; | |
1036 | } | |
1037 | ||
1038 | /* Return the location_t information we can find among the statements | |
1039 | in STMTS. */ | |
1040 | ||
1041 | static location_t | |
1042 | get_location_for_stmts (auto_vec<gimple *> &stmts) | |
1043 | { | |
1044 | gimple *stmt; | |
1045 | unsigned int i; | |
1046 | ||
1047 | FOR_EACH_VEC_ELT (stmts, i, stmt) | |
1048 | if (gimple_has_location (stmt)) | |
1049 | return gimple_location (stmt); | |
1050 | ||
1051 | return UNKNOWN_LOCATION; | |
1052 | } | |
1053 | ||
1054 | /* Used to decribe a store resulting from splitting a wide store in smaller | |
1055 | regularly-sized stores in split_group. */ | |
1056 | ||
1057 | struct split_store | |
1058 | { | |
1059 | unsigned HOST_WIDE_INT bytepos; | |
1060 | unsigned HOST_WIDE_INT size; | |
1061 | unsigned HOST_WIDE_INT align; | |
1062 | auto_vec<gimple *> orig_stmts; | |
a62b3dc5 JJ |
1063 | /* True if there is a single orig stmt covering the whole split store. */ |
1064 | bool orig; | |
f663d9ad KT |
1065 | split_store (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT, |
1066 | unsigned HOST_WIDE_INT); | |
1067 | }; | |
1068 | ||
1069 | /* Simple constructor. */ | |
1070 | ||
1071 | split_store::split_store (unsigned HOST_WIDE_INT bp, | |
1072 | unsigned HOST_WIDE_INT sz, | |
1073 | unsigned HOST_WIDE_INT al) | |
a62b3dc5 | 1074 | : bytepos (bp), size (sz), align (al), orig (false) |
f663d9ad KT |
1075 | { |
1076 | orig_stmts.create (0); | |
1077 | } | |
1078 | ||
1079 | /* Record all statements corresponding to stores in GROUP that write to | |
1080 | the region starting at BITPOS and is of size BITSIZE. Record such | |
a62b3dc5 JJ |
1081 | statements in STMTS if non-NULL. The stores in GROUP must be sorted by |
1082 | bitposition. Return INFO if there is exactly one original store | |
1083 | in the range. */ | |
f663d9ad | 1084 | |
a62b3dc5 | 1085 | static store_immediate_info * |
f663d9ad | 1086 | find_constituent_stmts (struct merged_store_group *group, |
a62b3dc5 JJ |
1087 | vec<gimple *> *stmts, |
1088 | unsigned int *first, | |
1089 | unsigned HOST_WIDE_INT bitpos, | |
1090 | unsigned HOST_WIDE_INT bitsize) | |
f663d9ad | 1091 | { |
a62b3dc5 | 1092 | store_immediate_info *info, *ret = NULL; |
f663d9ad | 1093 | unsigned int i; |
a62b3dc5 JJ |
1094 | bool second = false; |
1095 | bool update_first = true; | |
f663d9ad | 1096 | unsigned HOST_WIDE_INT end = bitpos + bitsize; |
a62b3dc5 | 1097 | for (i = *first; group->stores.iterate (i, &info); ++i) |
f663d9ad KT |
1098 | { |
1099 | unsigned HOST_WIDE_INT stmt_start = info->bitpos; | |
1100 | unsigned HOST_WIDE_INT stmt_end = stmt_start + info->bitsize; | |
a62b3dc5 JJ |
1101 | if (stmt_end <= bitpos) |
1102 | { | |
1103 | /* BITPOS passed to this function never decreases from within the | |
1104 | same split_group call, so optimize and don't scan info records | |
1105 | which are known to end before or at BITPOS next time. | |
1106 | Only do it if all stores before this one also pass this. */ | |
1107 | if (update_first) | |
1108 | *first = i + 1; | |
1109 | continue; | |
1110 | } | |
1111 | else | |
1112 | update_first = false; | |
1113 | ||
f663d9ad | 1114 | /* The stores in GROUP are ordered by bitposition so if we're past |
a62b3dc5 JJ |
1115 | the region for this group return early. */ |
1116 | if (stmt_start >= end) | |
1117 | return ret; | |
1118 | ||
1119 | if (stmts) | |
1120 | { | |
1121 | stmts->safe_push (info->stmt); | |
1122 | if (ret) | |
1123 | { | |
1124 | ret = NULL; | |
1125 | second = true; | |
1126 | } | |
1127 | } | |
1128 | else if (ret) | |
1129 | return NULL; | |
1130 | if (!second) | |
1131 | ret = info; | |
f663d9ad | 1132 | } |
a62b3dc5 | 1133 | return ret; |
f663d9ad KT |
1134 | } |
1135 | ||
1136 | /* Split a merged store described by GROUP by populating the SPLIT_STORES | |
a62b3dc5 JJ |
1137 | vector (if non-NULL) with split_store structs describing the byte offset |
1138 | (from the base), the bit size and alignment of each store as well as the | |
1139 | original statements involved in each such split group. | |
f663d9ad KT |
1140 | This is to separate the splitting strategy from the statement |
1141 | building/emission/linking done in output_merged_store. | |
a62b3dc5 JJ |
1142 | Return number of new stores. |
1143 | If SPLIT_STORES is NULL, it is just a dry run to count number of | |
1144 | new stores. */ | |
f663d9ad | 1145 | |
a62b3dc5 JJ |
1146 | static unsigned int |
1147 | split_group (merged_store_group *group, bool allow_unaligned, | |
1148 | vec<struct split_store *> *split_stores) | |
f663d9ad | 1149 | { |
a62b3dc5 JJ |
1150 | unsigned HOST_WIDE_INT pos = group->bitregion_start; |
1151 | unsigned HOST_WIDE_INT size = group->bitregion_end - pos; | |
f663d9ad | 1152 | unsigned HOST_WIDE_INT bytepos = pos / BITS_PER_UNIT; |
a62b3dc5 JJ |
1153 | unsigned HOST_WIDE_INT group_align = group->align; |
1154 | unsigned HOST_WIDE_INT align_base = group->align_base; | |
f663d9ad | 1155 | |
f663d9ad KT |
1156 | gcc_assert ((size % BITS_PER_UNIT == 0) && (pos % BITS_PER_UNIT == 0)); |
1157 | ||
a62b3dc5 | 1158 | unsigned int ret = 0, first = 0; |
f663d9ad KT |
1159 | unsigned HOST_WIDE_INT try_pos = bytepos; |
1160 | group->stores.qsort (sort_by_bitpos); | |
1161 | ||
1162 | while (size > 0) | |
1163 | { | |
a62b3dc5 JJ |
1164 | if ((allow_unaligned || group_align <= BITS_PER_UNIT) |
1165 | && group->mask[try_pos - bytepos] == (unsigned char) ~0U) | |
1166 | { | |
1167 | /* Skip padding bytes. */ | |
1168 | ++try_pos; | |
1169 | size -= BITS_PER_UNIT; | |
1170 | continue; | |
1171 | } | |
1172 | ||
f663d9ad | 1173 | unsigned HOST_WIDE_INT try_bitpos = try_pos * BITS_PER_UNIT; |
a62b3dc5 JJ |
1174 | unsigned int try_size = MAX_STORE_BITSIZE, nonmasked; |
1175 | unsigned HOST_WIDE_INT align_bitpos | |
1176 | = (try_bitpos - align_base) & (group_align - 1); | |
1177 | unsigned HOST_WIDE_INT align = group_align; | |
1178 | if (align_bitpos) | |
1179 | align = least_bit_hwi (align_bitpos); | |
1180 | if (!allow_unaligned) | |
1181 | try_size = MIN (try_size, align); | |
1182 | store_immediate_info *info | |
1183 | = find_constituent_stmts (group, NULL, &first, try_bitpos, try_size); | |
1184 | if (info) | |
1185 | { | |
1186 | /* If there is just one original statement for the range, see if | |
1187 | we can just reuse the original store which could be even larger | |
1188 | than try_size. */ | |
1189 | unsigned HOST_WIDE_INT stmt_end | |
1190 | = ROUND_UP (info->bitpos + info->bitsize, BITS_PER_UNIT); | |
1191 | info = find_constituent_stmts (group, NULL, &first, try_bitpos, | |
1192 | stmt_end - try_bitpos); | |
1193 | if (info && info->bitpos >= try_bitpos) | |
1194 | { | |
1195 | try_size = stmt_end - try_bitpos; | |
1196 | goto found; | |
1197 | } | |
1198 | } | |
f663d9ad | 1199 | |
a62b3dc5 JJ |
1200 | /* Approximate store bitsize for the case when there are no padding |
1201 | bits. */ | |
1202 | while (try_size > size) | |
1203 | try_size /= 2; | |
1204 | /* Now look for whole padding bytes at the end of that bitsize. */ | |
1205 | for (nonmasked = try_size / BITS_PER_UNIT; nonmasked > 0; --nonmasked) | |
1206 | if (group->mask[try_pos - bytepos + nonmasked - 1] | |
1207 | != (unsigned char) ~0U) | |
1208 | break; | |
1209 | if (nonmasked == 0) | |
1210 | { | |
1211 | /* If entire try_size range is padding, skip it. */ | |
1212 | try_pos += try_size / BITS_PER_UNIT; | |
1213 | size -= try_size; | |
1214 | continue; | |
1215 | } | |
1216 | /* Otherwise try to decrease try_size if second half, last 3 quarters | |
1217 | etc. are padding. */ | |
1218 | nonmasked *= BITS_PER_UNIT; | |
1219 | while (nonmasked <= try_size / 2) | |
1220 | try_size /= 2; | |
1221 | if (!allow_unaligned && group_align > BITS_PER_UNIT) | |
1222 | { | |
1223 | /* Now look for whole padding bytes at the start of that bitsize. */ | |
1224 | unsigned int try_bytesize = try_size / BITS_PER_UNIT, masked; | |
1225 | for (masked = 0; masked < try_bytesize; ++masked) | |
1226 | if (group->mask[try_pos - bytepos + masked] != (unsigned char) ~0U) | |
1227 | break; | |
1228 | masked *= BITS_PER_UNIT; | |
1229 | gcc_assert (masked < try_size); | |
1230 | if (masked >= try_size / 2) | |
1231 | { | |
1232 | while (masked >= try_size / 2) | |
1233 | { | |
1234 | try_size /= 2; | |
1235 | try_pos += try_size / BITS_PER_UNIT; | |
1236 | size -= try_size; | |
1237 | masked -= try_size; | |
1238 | } | |
1239 | /* Need to recompute the alignment, so just retry at the new | |
1240 | position. */ | |
1241 | continue; | |
1242 | } | |
1243 | } | |
1244 | ||
1245 | found: | |
1246 | ++ret; | |
f663d9ad | 1247 | |
a62b3dc5 JJ |
1248 | if (split_stores) |
1249 | { | |
1250 | struct split_store *store | |
1251 | = new split_store (try_pos, try_size, align); | |
1252 | info = find_constituent_stmts (group, &store->orig_stmts, | |
1253 | &first, try_bitpos, try_size); | |
1254 | if (info | |
1255 | && info->bitpos >= try_bitpos | |
1256 | && info->bitpos + info->bitsize <= try_bitpos + try_size) | |
1257 | store->orig = true; | |
1258 | split_stores->safe_push (store); | |
1259 | } | |
1260 | ||
1261 | try_pos += try_size / BITS_PER_UNIT; | |
f663d9ad | 1262 | size -= try_size; |
f663d9ad | 1263 | } |
a62b3dc5 JJ |
1264 | |
1265 | return ret; | |
f663d9ad KT |
1266 | } |
1267 | ||
1268 | /* Given a merged store group GROUP output the widened version of it. | |
1269 | The store chain is against the base object BASE. | |
1270 | Try store sizes of at most MAX_STORE_BITSIZE bits wide and don't output | |
1271 | unaligned stores for STRICT_ALIGNMENT targets or if it's too expensive. | |
1272 | Make sure that the number of statements output is less than the number of | |
1273 | original statements. If a better sequence is possible emit it and | |
1274 | return true. */ | |
1275 | ||
1276 | bool | |
b5926e23 | 1277 | imm_store_chain_info::output_merged_store (merged_store_group *group) |
f663d9ad | 1278 | { |
a62b3dc5 JJ |
1279 | unsigned HOST_WIDE_INT start_byte_pos |
1280 | = group->bitregion_start / BITS_PER_UNIT; | |
f663d9ad KT |
1281 | |
1282 | unsigned int orig_num_stmts = group->stores.length (); | |
1283 | if (orig_num_stmts < 2) | |
1284 | return false; | |
1285 | ||
a62b3dc5 | 1286 | auto_vec<struct split_store *, 32> split_stores; |
f663d9ad | 1287 | split_stores.create (0); |
a62b3dc5 JJ |
1288 | bool allow_unaligned |
1289 | = !STRICT_ALIGNMENT && PARAM_VALUE (PARAM_STORE_MERGING_ALLOW_UNALIGNED); | |
1290 | if (allow_unaligned) | |
1291 | { | |
1292 | /* If unaligned stores are allowed, see how many stores we'd emit | |
1293 | for unaligned and how many stores we'd emit for aligned stores. | |
1294 | Only use unaligned stores if it allows fewer stores than aligned. */ | |
1295 | unsigned aligned_cnt = split_group (group, false, NULL); | |
1296 | unsigned unaligned_cnt = split_group (group, true, NULL); | |
1297 | if (aligned_cnt <= unaligned_cnt) | |
1298 | allow_unaligned = false; | |
1299 | } | |
1300 | split_group (group, allow_unaligned, &split_stores); | |
1301 | ||
1302 | if (split_stores.length () >= orig_num_stmts) | |
1303 | { | |
1304 | /* We didn't manage to reduce the number of statements. Bail out. */ | |
1305 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1306 | { | |
1307 | fprintf (dump_file, "Exceeded original number of stmts (%u)." | |
1308 | " Not profitable to emit new sequence.\n", | |
1309 | orig_num_stmts); | |
1310 | } | |
1311 | return false; | |
1312 | } | |
f663d9ad KT |
1313 | |
1314 | gimple_stmt_iterator last_gsi = gsi_for_stmt (group->last_stmt); | |
1315 | gimple_seq seq = NULL; | |
f663d9ad KT |
1316 | tree last_vdef, new_vuse; |
1317 | last_vdef = gimple_vdef (group->last_stmt); | |
1318 | new_vuse = gimple_vuse (group->last_stmt); | |
1319 | ||
1320 | gimple *stmt = NULL; | |
f663d9ad KT |
1321 | split_store *split_store; |
1322 | unsigned int i; | |
f663d9ad | 1323 | |
aa55dc0c RB |
1324 | tree addr = force_gimple_operand_1 (unshare_expr (base_addr), &seq, |
1325 | is_gimple_mem_ref_addr, NULL_TREE); | |
f663d9ad KT |
1326 | FOR_EACH_VEC_ELT (split_stores, i, split_store) |
1327 | { | |
1328 | unsigned HOST_WIDE_INT try_size = split_store->size; | |
1329 | unsigned HOST_WIDE_INT try_pos = split_store->bytepos; | |
1330 | unsigned HOST_WIDE_INT align = split_store->align; | |
a62b3dc5 JJ |
1331 | tree dest, src; |
1332 | location_t loc; | |
1333 | if (split_store->orig) | |
1334 | { | |
1335 | /* If there is just a single constituent store which covers | |
1336 | the whole area, just reuse the lhs and rhs. */ | |
1337 | dest = gimple_assign_lhs (split_store->orig_stmts[0]); | |
1338 | src = gimple_assign_rhs1 (split_store->orig_stmts[0]); | |
1339 | loc = gimple_location (split_store->orig_stmts[0]); | |
1340 | } | |
1341 | else | |
1342 | { | |
1343 | tree offset_type | |
1344 | = get_alias_type_for_stmts (split_store->orig_stmts); | |
1345 | loc = get_location_for_stmts (split_store->orig_stmts); | |
1346 | ||
1347 | tree int_type = build_nonstandard_integer_type (try_size, UNSIGNED); | |
1348 | int_type = build_aligned_type (int_type, align); | |
1349 | dest = fold_build2 (MEM_REF, int_type, addr, | |
1350 | build_int_cst (offset_type, try_pos)); | |
1351 | src = native_interpret_expr (int_type, | |
1352 | group->val + try_pos - start_byte_pos, | |
1353 | group->buf_size); | |
1354 | tree mask | |
1355 | = native_interpret_expr (int_type, | |
1356 | group->mask + try_pos - start_byte_pos, | |
1357 | group->buf_size); | |
1358 | if (!integer_zerop (mask)) | |
1359 | { | |
1360 | tree tem = make_ssa_name (int_type); | |
1361 | tree load_src = unshare_expr (dest); | |
1362 | /* The load might load some or all bits uninitialized, | |
1363 | avoid -W*uninitialized warnings in that case. | |
1364 | As optimization, it would be nice if all the bits are | |
1365 | provably uninitialized (no stores at all yet or previous | |
1366 | store a CLOBBER) we'd optimize away the load and replace | |
1367 | it e.g. with 0. */ | |
1368 | TREE_NO_WARNING (load_src) = 1; | |
1369 | stmt = gimple_build_assign (tem, load_src); | |
1370 | gimple_set_location (stmt, loc); | |
1371 | gimple_set_vuse (stmt, new_vuse); | |
1372 | gimple_seq_add_stmt_without_update (&seq, stmt); | |
1373 | ||
1374 | /* FIXME: If there is a single chunk of zero bits in mask, | |
1375 | perhaps use BIT_INSERT_EXPR instead? */ | |
1376 | stmt = gimple_build_assign (make_ssa_name (int_type), | |
1377 | BIT_AND_EXPR, tem, mask); | |
1378 | gimple_set_location (stmt, loc); | |
1379 | gimple_seq_add_stmt_without_update (&seq, stmt); | |
1380 | tem = gimple_assign_lhs (stmt); | |
1381 | ||
1382 | src = wide_int_to_tree (int_type, | |
1383 | wi::bit_and_not (wi::to_wide (src), | |
1384 | wi::to_wide (mask))); | |
1385 | stmt = gimple_build_assign (make_ssa_name (int_type), | |
1386 | BIT_IOR_EXPR, tem, src); | |
1387 | gimple_set_location (stmt, loc); | |
1388 | gimple_seq_add_stmt_without_update (&seq, stmt); | |
1389 | src = gimple_assign_lhs (stmt); | |
1390 | } | |
1391 | } | |
f663d9ad KT |
1392 | |
1393 | stmt = gimple_build_assign (dest, src); | |
1394 | gimple_set_location (stmt, loc); | |
1395 | gimple_set_vuse (stmt, new_vuse); | |
1396 | gimple_seq_add_stmt_without_update (&seq, stmt); | |
1397 | ||
f663d9ad KT |
1398 | tree new_vdef; |
1399 | if (i < split_stores.length () - 1) | |
a62b3dc5 | 1400 | new_vdef = make_ssa_name (gimple_vop (cfun), stmt); |
f663d9ad KT |
1401 | else |
1402 | new_vdef = last_vdef; | |
1403 | ||
1404 | gimple_set_vdef (stmt, new_vdef); | |
1405 | SSA_NAME_DEF_STMT (new_vdef) = stmt; | |
1406 | new_vuse = new_vdef; | |
1407 | } | |
1408 | ||
1409 | FOR_EACH_VEC_ELT (split_stores, i, split_store) | |
1410 | delete split_store; | |
1411 | ||
f663d9ad KT |
1412 | gcc_assert (seq); |
1413 | if (dump_file) | |
1414 | { | |
1415 | fprintf (dump_file, | |
1416 | "New sequence of %u stmts to replace old one of %u stmts\n", | |
a62b3dc5 | 1417 | split_stores.length (), orig_num_stmts); |
f663d9ad KT |
1418 | if (dump_flags & TDF_DETAILS) |
1419 | print_gimple_seq (dump_file, seq, 0, TDF_VOPS | TDF_MEMSYMS); | |
1420 | } | |
1421 | gsi_insert_seq_after (&last_gsi, seq, GSI_SAME_STMT); | |
1422 | ||
1423 | return true; | |
1424 | } | |
1425 | ||
1426 | /* Process the merged_store_group objects created in the coalescing phase. | |
1427 | The stores are all against the base object BASE. | |
1428 | Try to output the widened stores and delete the original statements if | |
1429 | successful. Return true iff any changes were made. */ | |
1430 | ||
1431 | bool | |
b5926e23 | 1432 | imm_store_chain_info::output_merged_stores () |
f663d9ad KT |
1433 | { |
1434 | unsigned int i; | |
1435 | merged_store_group *merged_store; | |
1436 | bool ret = false; | |
1437 | FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_store) | |
1438 | { | |
b5926e23 | 1439 | if (output_merged_store (merged_store)) |
f663d9ad KT |
1440 | { |
1441 | unsigned int j; | |
1442 | store_immediate_info *store; | |
1443 | FOR_EACH_VEC_ELT (merged_store->stores, j, store) | |
1444 | { | |
1445 | gimple *stmt = store->stmt; | |
1446 | gimple_stmt_iterator gsi = gsi_for_stmt (stmt); | |
1447 | gsi_remove (&gsi, true); | |
1448 | if (stmt != merged_store->last_stmt) | |
1449 | { | |
1450 | unlink_stmt_vdef (stmt); | |
1451 | release_defs (stmt); | |
1452 | } | |
1453 | } | |
1454 | ret = true; | |
1455 | } | |
1456 | } | |
1457 | if (ret && dump_file) | |
1458 | fprintf (dump_file, "Merging successful!\n"); | |
1459 | ||
1460 | return ret; | |
1461 | } | |
1462 | ||
1463 | /* Coalesce the store_immediate_info objects recorded against the base object | |
1464 | BASE in the first phase and output them. | |
1465 | Delete the allocated structures. | |
1466 | Return true if any changes were made. */ | |
1467 | ||
1468 | bool | |
b5926e23 | 1469 | imm_store_chain_info::terminate_and_process_chain () |
f663d9ad KT |
1470 | { |
1471 | /* Process store chain. */ | |
1472 | bool ret = false; | |
1473 | if (m_store_info.length () > 1) | |
1474 | { | |
1475 | ret = coalesce_immediate_stores (); | |
1476 | if (ret) | |
b5926e23 | 1477 | ret = output_merged_stores (); |
f663d9ad KT |
1478 | } |
1479 | ||
1480 | /* Delete all the entries we allocated ourselves. */ | |
1481 | store_immediate_info *info; | |
1482 | unsigned int i; | |
1483 | FOR_EACH_VEC_ELT (m_store_info, i, info) | |
1484 | delete info; | |
1485 | ||
1486 | merged_store_group *merged_info; | |
1487 | FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_info) | |
1488 | delete merged_info; | |
1489 | ||
1490 | return ret; | |
1491 | } | |
1492 | ||
1493 | /* Return true iff LHS is a destination potentially interesting for | |
1494 | store merging. In practice these are the codes that get_inner_reference | |
1495 | can process. */ | |
1496 | ||
1497 | static bool | |
1498 | lhs_valid_for_store_merging_p (tree lhs) | |
1499 | { | |
1500 | tree_code code = TREE_CODE (lhs); | |
1501 | ||
1502 | if (code == ARRAY_REF || code == ARRAY_RANGE_REF || code == MEM_REF | |
1503 | || code == COMPONENT_REF || code == BIT_FIELD_REF) | |
1504 | return true; | |
1505 | ||
1506 | return false; | |
1507 | } | |
1508 | ||
1509 | /* Return true if the tree RHS is a constant we want to consider | |
1510 | during store merging. In practice accept all codes that | |
1511 | native_encode_expr accepts. */ | |
1512 | ||
1513 | static bool | |
1514 | rhs_valid_for_store_merging_p (tree rhs) | |
1515 | { | |
2f391428 JJ |
1516 | return native_encode_expr (rhs, NULL, |
1517 | GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (rhs)))) != 0; | |
f663d9ad KT |
1518 | } |
1519 | ||
1520 | /* Entry point for the pass. Go over each basic block recording chains of | |
1521 | immediate stores. Upon encountering a terminating statement (as defined | |
1522 | by stmt_terminates_chain_p) process the recorded stores and emit the widened | |
1523 | variants. */ | |
1524 | ||
1525 | unsigned int | |
1526 | pass_store_merging::execute (function *fun) | |
1527 | { | |
1528 | basic_block bb; | |
1529 | hash_set<gimple *> orig_stmts; | |
1530 | ||
1531 | FOR_EACH_BB_FN (bb, fun) | |
1532 | { | |
1533 | gimple_stmt_iterator gsi; | |
1534 | unsigned HOST_WIDE_INT num_statements = 0; | |
1535 | /* Record the original statements so that we can keep track of | |
1536 | statements emitted in this pass and not re-process new | |
1537 | statements. */ | |
1538 | for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
1539 | { | |
1540 | if (is_gimple_debug (gsi_stmt (gsi))) | |
1541 | continue; | |
1542 | ||
2f391428 | 1543 | if (++num_statements >= 2) |
f663d9ad KT |
1544 | break; |
1545 | } | |
1546 | ||
1547 | if (num_statements < 2) | |
1548 | continue; | |
1549 | ||
1550 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1551 | fprintf (dump_file, "Processing basic block <%d>:\n", bb->index); | |
1552 | ||
1553 | for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
1554 | { | |
1555 | gimple *stmt = gsi_stmt (gsi); | |
1556 | ||
50b6d676 AO |
1557 | if (is_gimple_debug (stmt)) |
1558 | continue; | |
1559 | ||
f663d9ad KT |
1560 | if (gimple_has_volatile_ops (stmt)) |
1561 | { | |
1562 | /* Terminate all chains. */ | |
1563 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1564 | fprintf (dump_file, "Volatile access terminates " | |
1565 | "all chains\n"); | |
1566 | terminate_and_process_all_chains (); | |
1567 | continue; | |
1568 | } | |
1569 | ||
f663d9ad KT |
1570 | if (gimple_assign_single_p (stmt) && gimple_vdef (stmt) |
1571 | && !stmt_can_throw_internal (stmt) | |
1572 | && lhs_valid_for_store_merging_p (gimple_assign_lhs (stmt))) | |
1573 | { | |
1574 | tree lhs = gimple_assign_lhs (stmt); | |
1575 | tree rhs = gimple_assign_rhs1 (stmt); | |
1576 | ||
1577 | HOST_WIDE_INT bitsize, bitpos; | |
a62b3dc5 JJ |
1578 | unsigned HOST_WIDE_INT bitregion_start = 0; |
1579 | unsigned HOST_WIDE_INT bitregion_end = 0; | |
f663d9ad KT |
1580 | machine_mode mode; |
1581 | int unsignedp = 0, reversep = 0, volatilep = 0; | |
1582 | tree offset, base_addr; | |
1583 | base_addr | |
1584 | = get_inner_reference (lhs, &bitsize, &bitpos, &offset, &mode, | |
1585 | &unsignedp, &reversep, &volatilep); | |
a62b3dc5 JJ |
1586 | if (TREE_CODE (lhs) == COMPONENT_REF |
1587 | && DECL_BIT_FIELD_TYPE (TREE_OPERAND (lhs, 1))) | |
1588 | { | |
1589 | get_bit_range (&bitregion_start, &bitregion_end, lhs, | |
1590 | &bitpos, &offset); | |
1591 | if (bitregion_end) | |
1592 | ++bitregion_end; | |
1593 | } | |
1594 | if (bitsize == 0) | |
1595 | continue; | |
1596 | ||
f663d9ad KT |
1597 | /* As a future enhancement we could handle stores with the same |
1598 | base and offset. */ | |
aa55dc0c | 1599 | bool invalid = reversep |
f663d9ad KT |
1600 | || ((bitsize > MAX_BITSIZE_MODE_ANY_INT) |
1601 | && (TREE_CODE (rhs) != INTEGER_CST)) | |
aa55dc0c | 1602 | || !rhs_valid_for_store_merging_p (rhs); |
f663d9ad | 1603 | |
b5926e23 RB |
1604 | /* We do not want to rewrite TARGET_MEM_REFs. */ |
1605 | if (TREE_CODE (base_addr) == TARGET_MEM_REF) | |
1606 | invalid = true; | |
f663d9ad KT |
1607 | /* In some cases get_inner_reference may return a |
1608 | MEM_REF [ptr + byteoffset]. For the purposes of this pass | |
1609 | canonicalize the base_addr to MEM_REF [ptr] and take | |
1610 | byteoffset into account in the bitpos. This occurs in | |
1611 | PR 23684 and this way we can catch more chains. */ | |
b5926e23 | 1612 | else if (TREE_CODE (base_addr) == MEM_REF) |
f663d9ad KT |
1613 | { |
1614 | offset_int bit_off, byte_off = mem_ref_offset (base_addr); | |
1615 | bit_off = byte_off << LOG2_BITS_PER_UNIT; | |
1616 | bit_off += bitpos; | |
1617 | if (!wi::neg_p (bit_off) && wi::fits_shwi_p (bit_off)) | |
a62b3dc5 JJ |
1618 | { |
1619 | bitpos = bit_off.to_shwi (); | |
1620 | if (bitregion_end) | |
1621 | { | |
1622 | bit_off = byte_off << LOG2_BITS_PER_UNIT; | |
1623 | bit_off += bitregion_start; | |
1624 | if (wi::fits_uhwi_p (bit_off)) | |
1625 | { | |
1626 | bitregion_start = bit_off.to_uhwi (); | |
1627 | bit_off = byte_off << LOG2_BITS_PER_UNIT; | |
1628 | bit_off += bitregion_end; | |
1629 | if (wi::fits_uhwi_p (bit_off)) | |
1630 | bitregion_end = bit_off.to_uhwi (); | |
1631 | else | |
1632 | bitregion_end = 0; | |
1633 | } | |
1634 | else | |
1635 | bitregion_end = 0; | |
1636 | } | |
1637 | } | |
f663d9ad KT |
1638 | else |
1639 | invalid = true; | |
b5926e23 | 1640 | base_addr = TREE_OPERAND (base_addr, 0); |
f663d9ad | 1641 | } |
b5926e23 RB |
1642 | /* get_inner_reference returns the base object, get at its |
1643 | address now. */ | |
1644 | else | |
aa55dc0c RB |
1645 | { |
1646 | if (bitpos < 0) | |
1647 | invalid = true; | |
1648 | base_addr = build_fold_addr_expr (base_addr); | |
1649 | } | |
1650 | ||
a62b3dc5 JJ |
1651 | if (!bitregion_end) |
1652 | { | |
1653 | bitregion_start = ROUND_DOWN (bitpos, BITS_PER_UNIT); | |
1654 | bitregion_end = ROUND_UP (bitpos + bitsize, BITS_PER_UNIT); | |
1655 | } | |
1656 | ||
aa55dc0c RB |
1657 | if (! invalid |
1658 | && offset != NULL_TREE) | |
1659 | { | |
1660 | /* If the access is variable offset then a base | |
1661 | decl has to be address-taken to be able to | |
1662 | emit pointer-based stores to it. | |
1663 | ??? We might be able to get away with | |
1664 | re-using the original base up to the first | |
1665 | variable part and then wrapping that inside | |
1666 | a BIT_FIELD_REF. */ | |
1667 | tree base = get_base_address (base_addr); | |
1668 | if (! base | |
1669 | || (DECL_P (base) | |
1670 | && ! TREE_ADDRESSABLE (base))) | |
1671 | invalid = true; | |
1672 | else | |
1673 | base_addr = build2 (POINTER_PLUS_EXPR, | |
1674 | TREE_TYPE (base_addr), | |
1675 | base_addr, offset); | |
1676 | } | |
f663d9ad KT |
1677 | |
1678 | struct imm_store_chain_info **chain_info | |
1679 | = m_stores.get (base_addr); | |
1680 | ||
1681 | if (!invalid) | |
1682 | { | |
1683 | store_immediate_info *info; | |
1684 | if (chain_info) | |
1685 | { | |
a62b3dc5 JJ |
1686 | unsigned int ord = (*chain_info)->m_store_info.length (); |
1687 | info = new store_immediate_info (bitsize, bitpos, | |
1688 | bitregion_start, | |
1689 | bitregion_end, | |
1690 | stmt, ord); | |
f663d9ad KT |
1691 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1692 | { | |
1693 | fprintf (dump_file, | |
1694 | "Recording immediate store from stmt:\n"); | |
ef6cb4c7 | 1695 | print_gimple_stmt (dump_file, stmt, 0); |
f663d9ad KT |
1696 | } |
1697 | (*chain_info)->m_store_info.safe_push (info); | |
1698 | /* If we reach the limit of stores to merge in a chain | |
1699 | terminate and process the chain now. */ | |
1700 | if ((*chain_info)->m_store_info.length () | |
1701 | == (unsigned int) | |
1702 | PARAM_VALUE (PARAM_MAX_STORES_TO_MERGE)) | |
1703 | { | |
1704 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1705 | fprintf (dump_file, | |
1706 | "Reached maximum number of statements" | |
1707 | " to merge:\n"); | |
b5926e23 | 1708 | terminate_and_release_chain (*chain_info); |
f663d9ad KT |
1709 | } |
1710 | continue; | |
1711 | } | |
1712 | ||
1713 | /* Store aliases any existing chain? */ | |
20770eb8 | 1714 | terminate_all_aliasing_chains (chain_info, false, stmt); |
f663d9ad KT |
1715 | /* Start a new chain. */ |
1716 | struct imm_store_chain_info *new_chain | |
50b6d676 | 1717 | = new imm_store_chain_info (m_stores_head, base_addr); |
b5926e23 | 1718 | info = new store_immediate_info (bitsize, bitpos, |
a62b3dc5 JJ |
1719 | bitregion_start, |
1720 | bitregion_end, | |
f663d9ad KT |
1721 | stmt, 0); |
1722 | new_chain->m_store_info.safe_push (info); | |
1723 | m_stores.put (base_addr, new_chain); | |
1724 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1725 | { | |
1726 | fprintf (dump_file, | |
1727 | "Starting new chain with statement:\n"); | |
ef6cb4c7 | 1728 | print_gimple_stmt (dump_file, stmt, 0); |
f663d9ad | 1729 | fprintf (dump_file, "The base object is:\n"); |
ef6cb4c7 | 1730 | print_generic_expr (dump_file, base_addr); |
f663d9ad KT |
1731 | fprintf (dump_file, "\n"); |
1732 | } | |
1733 | } | |
1734 | else | |
20770eb8 | 1735 | terminate_all_aliasing_chains (chain_info, |
f663d9ad KT |
1736 | offset != NULL_TREE, stmt); |
1737 | ||
1738 | continue; | |
1739 | } | |
1740 | ||
20770eb8 | 1741 | terminate_all_aliasing_chains (NULL, false, stmt); |
f663d9ad KT |
1742 | } |
1743 | terminate_and_process_all_chains (); | |
1744 | } | |
1745 | return 0; | |
1746 | } | |
1747 | ||
1748 | } // anon namespace | |
1749 | ||
1750 | /* Construct and return a store merging pass object. */ | |
1751 | ||
1752 | gimple_opt_pass * | |
1753 | make_pass_store_merging (gcc::context *ctxt) | |
1754 | { | |
1755 | return new pass_store_merging (ctxt); | |
1756 | } | |
c22d8787 KT |
1757 | |
1758 | #if CHECKING_P | |
1759 | ||
1760 | namespace selftest { | |
1761 | ||
1762 | /* Selftests for store merging helpers. */ | |
1763 | ||
1764 | /* Assert that all elements of the byte arrays X and Y, both of length N | |
1765 | are equal. */ | |
1766 | ||
1767 | static void | |
1768 | verify_array_eq (unsigned char *x, unsigned char *y, unsigned int n) | |
1769 | { | |
1770 | for (unsigned int i = 0; i < n; i++) | |
1771 | { | |
1772 | if (x[i] != y[i]) | |
1773 | { | |
1774 | fprintf (stderr, "Arrays do not match. X:\n"); | |
1775 | dump_char_array (stderr, x, n); | |
1776 | fprintf (stderr, "Y:\n"); | |
1777 | dump_char_array (stderr, y, n); | |
1778 | } | |
1779 | ASSERT_EQ (x[i], y[i]); | |
1780 | } | |
1781 | } | |
1782 | ||
1783 | /* Test shift_bytes_in_array and that it carries bits across between | |
1784 | bytes correctly. */ | |
1785 | ||
1786 | static void | |
1787 | verify_shift_bytes_in_array (void) | |
1788 | { | |
1789 | /* byte 1 | byte 0 | |
1790 | 00011111 | 11100000. */ | |
1791 | unsigned char orig[2] = { 0xe0, 0x1f }; | |
1792 | unsigned char in[2]; | |
1793 | memcpy (in, orig, sizeof orig); | |
1794 | ||
1795 | unsigned char expected[2] = { 0x80, 0x7f }; | |
1796 | shift_bytes_in_array (in, sizeof (in), 2); | |
1797 | verify_array_eq (in, expected, sizeof (in)); | |
1798 | ||
1799 | memcpy (in, orig, sizeof orig); | |
1800 | memcpy (expected, orig, sizeof orig); | |
1801 | /* Check that shifting by zero doesn't change anything. */ | |
1802 | shift_bytes_in_array (in, sizeof (in), 0); | |
1803 | verify_array_eq (in, expected, sizeof (in)); | |
1804 | ||
1805 | } | |
1806 | ||
1807 | /* Test shift_bytes_in_array_right and that it carries bits across between | |
1808 | bytes correctly. */ | |
1809 | ||
1810 | static void | |
1811 | verify_shift_bytes_in_array_right (void) | |
1812 | { | |
1813 | /* byte 1 | byte 0 | |
1814 | 00011111 | 11100000. */ | |
1815 | unsigned char orig[2] = { 0x1f, 0xe0}; | |
1816 | unsigned char in[2]; | |
1817 | memcpy (in, orig, sizeof orig); | |
1818 | unsigned char expected[2] = { 0x07, 0xf8}; | |
1819 | shift_bytes_in_array_right (in, sizeof (in), 2); | |
1820 | verify_array_eq (in, expected, sizeof (in)); | |
1821 | ||
1822 | memcpy (in, orig, sizeof orig); | |
1823 | memcpy (expected, orig, sizeof orig); | |
1824 | /* Check that shifting by zero doesn't change anything. */ | |
1825 | shift_bytes_in_array_right (in, sizeof (in), 0); | |
1826 | verify_array_eq (in, expected, sizeof (in)); | |
1827 | } | |
1828 | ||
1829 | /* Test clear_bit_region that it clears exactly the bits asked and | |
1830 | nothing more. */ | |
1831 | ||
1832 | static void | |
1833 | verify_clear_bit_region (void) | |
1834 | { | |
1835 | /* Start with all bits set and test clearing various patterns in them. */ | |
1836 | unsigned char orig[3] = { 0xff, 0xff, 0xff}; | |
1837 | unsigned char in[3]; | |
1838 | unsigned char expected[3]; | |
1839 | memcpy (in, orig, sizeof in); | |
1840 | ||
1841 | /* Check zeroing out all the bits. */ | |
1842 | clear_bit_region (in, 0, 3 * BITS_PER_UNIT); | |
1843 | expected[0] = expected[1] = expected[2] = 0; | |
1844 | verify_array_eq (in, expected, sizeof in); | |
1845 | ||
1846 | memcpy (in, orig, sizeof in); | |
1847 | /* Leave the first and last bits intact. */ | |
1848 | clear_bit_region (in, 1, 3 * BITS_PER_UNIT - 2); | |
1849 | expected[0] = 0x1; | |
1850 | expected[1] = 0; | |
1851 | expected[2] = 0x80; | |
1852 | verify_array_eq (in, expected, sizeof in); | |
1853 | } | |
1854 | ||
1855 | /* Test verify_clear_bit_region_be that it clears exactly the bits asked and | |
1856 | nothing more. */ | |
1857 | ||
1858 | static void | |
1859 | verify_clear_bit_region_be (void) | |
1860 | { | |
1861 | /* Start with all bits set and test clearing various patterns in them. */ | |
1862 | unsigned char orig[3] = { 0xff, 0xff, 0xff}; | |
1863 | unsigned char in[3]; | |
1864 | unsigned char expected[3]; | |
1865 | memcpy (in, orig, sizeof in); | |
1866 | ||
1867 | /* Check zeroing out all the bits. */ | |
1868 | clear_bit_region_be (in, BITS_PER_UNIT - 1, 3 * BITS_PER_UNIT); | |
1869 | expected[0] = expected[1] = expected[2] = 0; | |
1870 | verify_array_eq (in, expected, sizeof in); | |
1871 | ||
1872 | memcpy (in, orig, sizeof in); | |
1873 | /* Leave the first and last bits intact. */ | |
1874 | clear_bit_region_be (in, BITS_PER_UNIT - 2, 3 * BITS_PER_UNIT - 2); | |
1875 | expected[0] = 0x80; | |
1876 | expected[1] = 0; | |
1877 | expected[2] = 0x1; | |
1878 | verify_array_eq (in, expected, sizeof in); | |
1879 | } | |
1880 | ||
1881 | ||
1882 | /* Run all of the selftests within this file. */ | |
1883 | ||
1884 | void | |
1885 | store_merging_c_tests (void) | |
1886 | { | |
1887 | verify_shift_bytes_in_array (); | |
1888 | verify_shift_bytes_in_array_right (); | |
1889 | verify_clear_bit_region (); | |
1890 | verify_clear_bit_region_be (); | |
1891 | } | |
1892 | ||
1893 | } // namespace selftest | |
1894 | #endif /* CHECKING_P. */ |